xref: /linux/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c (revision e46b3c0d011eab9933c183d5b47569db8e377281)
1e86d1aa8SWill Deacon // SPDX-License-Identifier: GPL-2.0
2e86d1aa8SWill Deacon /*
3e86d1aa8SWill Deacon  * IOMMU API for ARM architected SMMUv3 implementations.
4e86d1aa8SWill Deacon  *
5e86d1aa8SWill Deacon  * Copyright (C) 2015 ARM Limited
6e86d1aa8SWill Deacon  *
7e86d1aa8SWill Deacon  * Author: Will Deacon <will.deacon@arm.com>
8e86d1aa8SWill Deacon  *
9e86d1aa8SWill Deacon  * This driver is powered by bad coffee and bombay mix.
10e86d1aa8SWill Deacon  */
11e86d1aa8SWill Deacon 
12e86d1aa8SWill Deacon #include <linux/acpi.h>
13e86d1aa8SWill Deacon #include <linux/acpi_iort.h>
14e86d1aa8SWill Deacon #include <linux/bitfield.h>
15e86d1aa8SWill Deacon #include <linux/bitops.h>
16e86d1aa8SWill Deacon #include <linux/crash_dump.h>
17e86d1aa8SWill Deacon #include <linux/delay.h>
18e86d1aa8SWill Deacon #include <linux/dma-iommu.h>
19e86d1aa8SWill Deacon #include <linux/err.h>
20e86d1aa8SWill Deacon #include <linux/interrupt.h>
21e86d1aa8SWill Deacon #include <linux/io-pgtable.h>
22e86d1aa8SWill Deacon #include <linux/iommu.h>
23e86d1aa8SWill Deacon #include <linux/iopoll.h>
24e86d1aa8SWill Deacon #include <linux/module.h>
25e86d1aa8SWill Deacon #include <linux/msi.h>
26e86d1aa8SWill Deacon #include <linux/of.h>
27e86d1aa8SWill Deacon #include <linux/of_address.h>
28e86d1aa8SWill Deacon #include <linux/of_iommu.h>
29e86d1aa8SWill Deacon #include <linux/of_platform.h>
30e86d1aa8SWill Deacon #include <linux/pci.h>
31e86d1aa8SWill Deacon #include <linux/pci-ats.h>
32e86d1aa8SWill Deacon #include <linux/platform_device.h>
33e86d1aa8SWill Deacon 
34e86d1aa8SWill Deacon #include <linux/amba/bus.h>
35e86d1aa8SWill Deacon 
36e86d1aa8SWill Deacon /* MMIO registers */
37e86d1aa8SWill Deacon #define ARM_SMMU_IDR0			0x0
38e86d1aa8SWill Deacon #define IDR0_ST_LVL			GENMASK(28, 27)
39e86d1aa8SWill Deacon #define IDR0_ST_LVL_2LVL		1
40e86d1aa8SWill Deacon #define IDR0_STALL_MODEL		GENMASK(25, 24)
41e86d1aa8SWill Deacon #define IDR0_STALL_MODEL_STALL		0
42e86d1aa8SWill Deacon #define IDR0_STALL_MODEL_FORCE		2
43e86d1aa8SWill Deacon #define IDR0_TTENDIAN			GENMASK(22, 21)
44e86d1aa8SWill Deacon #define IDR0_TTENDIAN_MIXED		0
45e86d1aa8SWill Deacon #define IDR0_TTENDIAN_LE		2
46e86d1aa8SWill Deacon #define IDR0_TTENDIAN_BE		3
47e86d1aa8SWill Deacon #define IDR0_CD2L			(1 << 19)
48e86d1aa8SWill Deacon #define IDR0_VMID16			(1 << 18)
49e86d1aa8SWill Deacon #define IDR0_PRI			(1 << 16)
50e86d1aa8SWill Deacon #define IDR0_SEV			(1 << 14)
51e86d1aa8SWill Deacon #define IDR0_MSI			(1 << 13)
52e86d1aa8SWill Deacon #define IDR0_ASID16			(1 << 12)
53e86d1aa8SWill Deacon #define IDR0_ATS			(1 << 10)
54e86d1aa8SWill Deacon #define IDR0_HYP			(1 << 9)
55e86d1aa8SWill Deacon #define IDR0_COHACC			(1 << 4)
56e86d1aa8SWill Deacon #define IDR0_TTF			GENMASK(3, 2)
57e86d1aa8SWill Deacon #define IDR0_TTF_AARCH64		2
58e86d1aa8SWill Deacon #define IDR0_TTF_AARCH32_64		3
59e86d1aa8SWill Deacon #define IDR0_S1P			(1 << 1)
60e86d1aa8SWill Deacon #define IDR0_S2P			(1 << 0)
61e86d1aa8SWill Deacon 
62e86d1aa8SWill Deacon #define ARM_SMMU_IDR1			0x4
63e86d1aa8SWill Deacon #define IDR1_TABLES_PRESET		(1 << 30)
64e86d1aa8SWill Deacon #define IDR1_QUEUES_PRESET		(1 << 29)
65e86d1aa8SWill Deacon #define IDR1_REL			(1 << 28)
66e86d1aa8SWill Deacon #define IDR1_CMDQS			GENMASK(25, 21)
67e86d1aa8SWill Deacon #define IDR1_EVTQS			GENMASK(20, 16)
68e86d1aa8SWill Deacon #define IDR1_PRIQS			GENMASK(15, 11)
69e86d1aa8SWill Deacon #define IDR1_SSIDSIZE			GENMASK(10, 6)
70e86d1aa8SWill Deacon #define IDR1_SIDSIZE			GENMASK(5, 0)
71e86d1aa8SWill Deacon 
72e86d1aa8SWill Deacon #define ARM_SMMU_IDR3			0xc
73e86d1aa8SWill Deacon #define IDR3_RIL			(1 << 10)
74e86d1aa8SWill Deacon 
75e86d1aa8SWill Deacon #define ARM_SMMU_IDR5			0x14
76e86d1aa8SWill Deacon #define IDR5_STALL_MAX			GENMASK(31, 16)
77e86d1aa8SWill Deacon #define IDR5_GRAN64K			(1 << 6)
78e86d1aa8SWill Deacon #define IDR5_GRAN16K			(1 << 5)
79e86d1aa8SWill Deacon #define IDR5_GRAN4K			(1 << 4)
80e86d1aa8SWill Deacon #define IDR5_OAS			GENMASK(2, 0)
81e86d1aa8SWill Deacon #define IDR5_OAS_32_BIT			0
82e86d1aa8SWill Deacon #define IDR5_OAS_36_BIT			1
83e86d1aa8SWill Deacon #define IDR5_OAS_40_BIT			2
84e86d1aa8SWill Deacon #define IDR5_OAS_42_BIT			3
85e86d1aa8SWill Deacon #define IDR5_OAS_44_BIT			4
86e86d1aa8SWill Deacon #define IDR5_OAS_48_BIT			5
87e86d1aa8SWill Deacon #define IDR5_OAS_52_BIT			6
88e86d1aa8SWill Deacon #define IDR5_VAX			GENMASK(11, 10)
89e86d1aa8SWill Deacon #define IDR5_VAX_52_BIT			1
90e86d1aa8SWill Deacon 
91e86d1aa8SWill Deacon #define ARM_SMMU_CR0			0x20
92e86d1aa8SWill Deacon #define CR0_ATSCHK			(1 << 4)
93e86d1aa8SWill Deacon #define CR0_CMDQEN			(1 << 3)
94e86d1aa8SWill Deacon #define CR0_EVTQEN			(1 << 2)
95e86d1aa8SWill Deacon #define CR0_PRIQEN			(1 << 1)
96e86d1aa8SWill Deacon #define CR0_SMMUEN			(1 << 0)
97e86d1aa8SWill Deacon 
98e86d1aa8SWill Deacon #define ARM_SMMU_CR0ACK			0x24
99e86d1aa8SWill Deacon 
100e86d1aa8SWill Deacon #define ARM_SMMU_CR1			0x28
101e86d1aa8SWill Deacon #define CR1_TABLE_SH			GENMASK(11, 10)
102e86d1aa8SWill Deacon #define CR1_TABLE_OC			GENMASK(9, 8)
103e86d1aa8SWill Deacon #define CR1_TABLE_IC			GENMASK(7, 6)
104e86d1aa8SWill Deacon #define CR1_QUEUE_SH			GENMASK(5, 4)
105e86d1aa8SWill Deacon #define CR1_QUEUE_OC			GENMASK(3, 2)
106e86d1aa8SWill Deacon #define CR1_QUEUE_IC			GENMASK(1, 0)
107e86d1aa8SWill Deacon /* CR1 cacheability fields don't quite follow the usual TCR-style encoding */
108e86d1aa8SWill Deacon #define CR1_CACHE_NC			0
109e86d1aa8SWill Deacon #define CR1_CACHE_WB			1
110e86d1aa8SWill Deacon #define CR1_CACHE_WT			2
111e86d1aa8SWill Deacon 
112e86d1aa8SWill Deacon #define ARM_SMMU_CR2			0x2c
113e86d1aa8SWill Deacon #define CR2_PTM				(1 << 2)
114e86d1aa8SWill Deacon #define CR2_RECINVSID			(1 << 1)
115e86d1aa8SWill Deacon #define CR2_E2H				(1 << 0)
116e86d1aa8SWill Deacon 
117e86d1aa8SWill Deacon #define ARM_SMMU_GBPA			0x44
118e86d1aa8SWill Deacon #define GBPA_UPDATE			(1 << 31)
119e86d1aa8SWill Deacon #define GBPA_ABORT			(1 << 20)
120e86d1aa8SWill Deacon 
121e86d1aa8SWill Deacon #define ARM_SMMU_IRQ_CTRL		0x50
122e86d1aa8SWill Deacon #define IRQ_CTRL_EVTQ_IRQEN		(1 << 2)
123e86d1aa8SWill Deacon #define IRQ_CTRL_PRIQ_IRQEN		(1 << 1)
124e86d1aa8SWill Deacon #define IRQ_CTRL_GERROR_IRQEN		(1 << 0)
125e86d1aa8SWill Deacon 
126e86d1aa8SWill Deacon #define ARM_SMMU_IRQ_CTRLACK		0x54
127e86d1aa8SWill Deacon 
128e86d1aa8SWill Deacon #define ARM_SMMU_GERROR			0x60
129e86d1aa8SWill Deacon #define GERROR_SFM_ERR			(1 << 8)
130e86d1aa8SWill Deacon #define GERROR_MSI_GERROR_ABT_ERR	(1 << 7)
131e86d1aa8SWill Deacon #define GERROR_MSI_PRIQ_ABT_ERR		(1 << 6)
132e86d1aa8SWill Deacon #define GERROR_MSI_EVTQ_ABT_ERR		(1 << 5)
133e86d1aa8SWill Deacon #define GERROR_MSI_CMDQ_ABT_ERR		(1 << 4)
134e86d1aa8SWill Deacon #define GERROR_PRIQ_ABT_ERR		(1 << 3)
135e86d1aa8SWill Deacon #define GERROR_EVTQ_ABT_ERR		(1 << 2)
136e86d1aa8SWill Deacon #define GERROR_CMDQ_ERR			(1 << 0)
137e86d1aa8SWill Deacon #define GERROR_ERR_MASK			0xfd
138e86d1aa8SWill Deacon 
139e86d1aa8SWill Deacon #define ARM_SMMU_GERRORN		0x64
140e86d1aa8SWill Deacon 
141e86d1aa8SWill Deacon #define ARM_SMMU_GERROR_IRQ_CFG0	0x68
142e86d1aa8SWill Deacon #define ARM_SMMU_GERROR_IRQ_CFG1	0x70
143e86d1aa8SWill Deacon #define ARM_SMMU_GERROR_IRQ_CFG2	0x74
144e86d1aa8SWill Deacon 
145e86d1aa8SWill Deacon #define ARM_SMMU_STRTAB_BASE		0x80
146e86d1aa8SWill Deacon #define STRTAB_BASE_RA			(1UL << 62)
147e86d1aa8SWill Deacon #define STRTAB_BASE_ADDR_MASK		GENMASK_ULL(51, 6)
148e86d1aa8SWill Deacon 
149e86d1aa8SWill Deacon #define ARM_SMMU_STRTAB_BASE_CFG	0x88
150e86d1aa8SWill Deacon #define STRTAB_BASE_CFG_FMT		GENMASK(17, 16)
151e86d1aa8SWill Deacon #define STRTAB_BASE_CFG_FMT_LINEAR	0
152e86d1aa8SWill Deacon #define STRTAB_BASE_CFG_FMT_2LVL	1
153e86d1aa8SWill Deacon #define STRTAB_BASE_CFG_SPLIT		GENMASK(10, 6)
154e86d1aa8SWill Deacon #define STRTAB_BASE_CFG_LOG2SIZE	GENMASK(5, 0)
155e86d1aa8SWill Deacon 
156e86d1aa8SWill Deacon #define ARM_SMMU_CMDQ_BASE		0x90
157e86d1aa8SWill Deacon #define ARM_SMMU_CMDQ_PROD		0x98
158e86d1aa8SWill Deacon #define ARM_SMMU_CMDQ_CONS		0x9c
159e86d1aa8SWill Deacon 
160e86d1aa8SWill Deacon #define ARM_SMMU_EVTQ_BASE		0xa0
161e86d1aa8SWill Deacon #define ARM_SMMU_EVTQ_PROD		0x100a8
162e86d1aa8SWill Deacon #define ARM_SMMU_EVTQ_CONS		0x100ac
163e86d1aa8SWill Deacon #define ARM_SMMU_EVTQ_IRQ_CFG0		0xb0
164e86d1aa8SWill Deacon #define ARM_SMMU_EVTQ_IRQ_CFG1		0xb8
165e86d1aa8SWill Deacon #define ARM_SMMU_EVTQ_IRQ_CFG2		0xbc
166e86d1aa8SWill Deacon 
167e86d1aa8SWill Deacon #define ARM_SMMU_PRIQ_BASE		0xc0
168e86d1aa8SWill Deacon #define ARM_SMMU_PRIQ_PROD		0x100c8
169e86d1aa8SWill Deacon #define ARM_SMMU_PRIQ_CONS		0x100cc
170e86d1aa8SWill Deacon #define ARM_SMMU_PRIQ_IRQ_CFG0		0xd0
171e86d1aa8SWill Deacon #define ARM_SMMU_PRIQ_IRQ_CFG1		0xd8
172e86d1aa8SWill Deacon #define ARM_SMMU_PRIQ_IRQ_CFG2		0xdc
173e86d1aa8SWill Deacon 
174e86d1aa8SWill Deacon #define ARM_SMMU_REG_SZ			0xe00
175e86d1aa8SWill Deacon 
176e86d1aa8SWill Deacon /* Common MSI config fields */
177e86d1aa8SWill Deacon #define MSI_CFG0_ADDR_MASK		GENMASK_ULL(51, 2)
178e86d1aa8SWill Deacon #define MSI_CFG2_SH			GENMASK(5, 4)
179e86d1aa8SWill Deacon #define MSI_CFG2_MEMATTR		GENMASK(3, 0)
180e86d1aa8SWill Deacon 
181e86d1aa8SWill Deacon /* Common memory attribute values */
182e86d1aa8SWill Deacon #define ARM_SMMU_SH_NSH			0
183e86d1aa8SWill Deacon #define ARM_SMMU_SH_OSH			2
184e86d1aa8SWill Deacon #define ARM_SMMU_SH_ISH			3
185e86d1aa8SWill Deacon #define ARM_SMMU_MEMATTR_DEVICE_nGnRE	0x1
186e86d1aa8SWill Deacon #define ARM_SMMU_MEMATTR_OIWB		0xf
187e86d1aa8SWill Deacon 
188e86d1aa8SWill Deacon #define Q_IDX(llq, p)			((p) & ((1 << (llq)->max_n_shift) - 1))
189e86d1aa8SWill Deacon #define Q_WRP(llq, p)			((p) & (1 << (llq)->max_n_shift))
190e86d1aa8SWill Deacon #define Q_OVERFLOW_FLAG			(1U << 31)
191e86d1aa8SWill Deacon #define Q_OVF(p)			((p) & Q_OVERFLOW_FLAG)
192e86d1aa8SWill Deacon #define Q_ENT(q, p)			((q)->base +			\
193e86d1aa8SWill Deacon 					 Q_IDX(&((q)->llq), p) *	\
194e86d1aa8SWill Deacon 					 (q)->ent_dwords)
195e86d1aa8SWill Deacon 
196e86d1aa8SWill Deacon #define Q_BASE_RWA			(1UL << 62)
197e86d1aa8SWill Deacon #define Q_BASE_ADDR_MASK		GENMASK_ULL(51, 5)
198e86d1aa8SWill Deacon #define Q_BASE_LOG2SIZE			GENMASK(4, 0)
199e86d1aa8SWill Deacon 
200e86d1aa8SWill Deacon /* Ensure DMA allocations are naturally aligned */
201e86d1aa8SWill Deacon #ifdef CONFIG_CMA_ALIGNMENT
202e86d1aa8SWill Deacon #define Q_MAX_SZ_SHIFT			(PAGE_SHIFT + CONFIG_CMA_ALIGNMENT)
203e86d1aa8SWill Deacon #else
204e86d1aa8SWill Deacon #define Q_MAX_SZ_SHIFT			(PAGE_SHIFT + MAX_ORDER - 1)
205e86d1aa8SWill Deacon #endif
206e86d1aa8SWill Deacon 
207e86d1aa8SWill Deacon /*
208e86d1aa8SWill Deacon  * Stream table.
209e86d1aa8SWill Deacon  *
210e86d1aa8SWill Deacon  * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
211e86d1aa8SWill Deacon  * 2lvl: 128k L1 entries,
212e86d1aa8SWill Deacon  *       256 lazy entries per table (each table covers a PCI bus)
213e86d1aa8SWill Deacon  */
214e86d1aa8SWill Deacon #define STRTAB_L1_SZ_SHIFT		20
215e86d1aa8SWill Deacon #define STRTAB_SPLIT			8
216e86d1aa8SWill Deacon 
217e86d1aa8SWill Deacon #define STRTAB_L1_DESC_DWORDS		1
218e86d1aa8SWill Deacon #define STRTAB_L1_DESC_SPAN		GENMASK_ULL(4, 0)
219e86d1aa8SWill Deacon #define STRTAB_L1_DESC_L2PTR_MASK	GENMASK_ULL(51, 6)
220e86d1aa8SWill Deacon 
221e86d1aa8SWill Deacon #define STRTAB_STE_DWORDS		8
222e86d1aa8SWill Deacon #define STRTAB_STE_0_V			(1UL << 0)
223e86d1aa8SWill Deacon #define STRTAB_STE_0_CFG		GENMASK_ULL(3, 1)
224e86d1aa8SWill Deacon #define STRTAB_STE_0_CFG_ABORT		0
225e86d1aa8SWill Deacon #define STRTAB_STE_0_CFG_BYPASS		4
226e86d1aa8SWill Deacon #define STRTAB_STE_0_CFG_S1_TRANS	5
227e86d1aa8SWill Deacon #define STRTAB_STE_0_CFG_S2_TRANS	6
228e86d1aa8SWill Deacon 
229e86d1aa8SWill Deacon #define STRTAB_STE_0_S1FMT		GENMASK_ULL(5, 4)
230e86d1aa8SWill Deacon #define STRTAB_STE_0_S1FMT_LINEAR	0
231e86d1aa8SWill Deacon #define STRTAB_STE_0_S1FMT_64K_L2	2
232e86d1aa8SWill Deacon #define STRTAB_STE_0_S1CTXPTR_MASK	GENMASK_ULL(51, 6)
233e86d1aa8SWill Deacon #define STRTAB_STE_0_S1CDMAX		GENMASK_ULL(63, 59)
234e86d1aa8SWill Deacon 
235e86d1aa8SWill Deacon #define STRTAB_STE_1_S1DSS		GENMASK_ULL(1, 0)
236e86d1aa8SWill Deacon #define STRTAB_STE_1_S1DSS_TERMINATE	0x0
237e86d1aa8SWill Deacon #define STRTAB_STE_1_S1DSS_BYPASS	0x1
238e86d1aa8SWill Deacon #define STRTAB_STE_1_S1DSS_SSID0	0x2
239e86d1aa8SWill Deacon 
240e86d1aa8SWill Deacon #define STRTAB_STE_1_S1C_CACHE_NC	0UL
241e86d1aa8SWill Deacon #define STRTAB_STE_1_S1C_CACHE_WBRA	1UL
242e86d1aa8SWill Deacon #define STRTAB_STE_1_S1C_CACHE_WT	2UL
243e86d1aa8SWill Deacon #define STRTAB_STE_1_S1C_CACHE_WB	3UL
244e86d1aa8SWill Deacon #define STRTAB_STE_1_S1CIR		GENMASK_ULL(3, 2)
245e86d1aa8SWill Deacon #define STRTAB_STE_1_S1COR		GENMASK_ULL(5, 4)
246e86d1aa8SWill Deacon #define STRTAB_STE_1_S1CSH		GENMASK_ULL(7, 6)
247e86d1aa8SWill Deacon 
248e86d1aa8SWill Deacon #define STRTAB_STE_1_S1STALLD		(1UL << 27)
249e86d1aa8SWill Deacon 
250e86d1aa8SWill Deacon #define STRTAB_STE_1_EATS		GENMASK_ULL(29, 28)
251e86d1aa8SWill Deacon #define STRTAB_STE_1_EATS_ABT		0UL
252e86d1aa8SWill Deacon #define STRTAB_STE_1_EATS_TRANS		1UL
253e86d1aa8SWill Deacon #define STRTAB_STE_1_EATS_S1CHK		2UL
254e86d1aa8SWill Deacon 
255e86d1aa8SWill Deacon #define STRTAB_STE_1_STRW		GENMASK_ULL(31, 30)
256e86d1aa8SWill Deacon #define STRTAB_STE_1_STRW_NSEL1		0UL
257e86d1aa8SWill Deacon #define STRTAB_STE_1_STRW_EL2		2UL
258e86d1aa8SWill Deacon 
259e86d1aa8SWill Deacon #define STRTAB_STE_1_SHCFG		GENMASK_ULL(45, 44)
260e86d1aa8SWill Deacon #define STRTAB_STE_1_SHCFG_INCOMING	1UL
261e86d1aa8SWill Deacon 
262e86d1aa8SWill Deacon #define STRTAB_STE_2_S2VMID		GENMASK_ULL(15, 0)
263e86d1aa8SWill Deacon #define STRTAB_STE_2_VTCR		GENMASK_ULL(50, 32)
264e86d1aa8SWill Deacon #define STRTAB_STE_2_VTCR_S2T0SZ	GENMASK_ULL(5, 0)
265e86d1aa8SWill Deacon #define STRTAB_STE_2_VTCR_S2SL0		GENMASK_ULL(7, 6)
266e86d1aa8SWill Deacon #define STRTAB_STE_2_VTCR_S2IR0		GENMASK_ULL(9, 8)
267e86d1aa8SWill Deacon #define STRTAB_STE_2_VTCR_S2OR0		GENMASK_ULL(11, 10)
268e86d1aa8SWill Deacon #define STRTAB_STE_2_VTCR_S2SH0		GENMASK_ULL(13, 12)
269e86d1aa8SWill Deacon #define STRTAB_STE_2_VTCR_S2TG		GENMASK_ULL(15, 14)
270e86d1aa8SWill Deacon #define STRTAB_STE_2_VTCR_S2PS		GENMASK_ULL(18, 16)
271e86d1aa8SWill Deacon #define STRTAB_STE_2_S2AA64		(1UL << 51)
272e86d1aa8SWill Deacon #define STRTAB_STE_2_S2ENDI		(1UL << 52)
273e86d1aa8SWill Deacon #define STRTAB_STE_2_S2PTW		(1UL << 54)
274e86d1aa8SWill Deacon #define STRTAB_STE_2_S2R		(1UL << 58)
275e86d1aa8SWill Deacon 
276e86d1aa8SWill Deacon #define STRTAB_STE_3_S2TTB_MASK		GENMASK_ULL(51, 4)
277e86d1aa8SWill Deacon 
278e86d1aa8SWill Deacon /*
279e86d1aa8SWill Deacon  * Context descriptors.
280e86d1aa8SWill Deacon  *
281e86d1aa8SWill Deacon  * Linear: when less than 1024 SSIDs are supported
282e86d1aa8SWill Deacon  * 2lvl: at most 1024 L1 entries,
283e86d1aa8SWill Deacon  *       1024 lazy entries per table.
284e86d1aa8SWill Deacon  */
285e86d1aa8SWill Deacon #define CTXDESC_SPLIT			10
286e86d1aa8SWill Deacon #define CTXDESC_L2_ENTRIES		(1 << CTXDESC_SPLIT)
287e86d1aa8SWill Deacon 
288e86d1aa8SWill Deacon #define CTXDESC_L1_DESC_DWORDS		1
289e86d1aa8SWill Deacon #define CTXDESC_L1_DESC_V		(1UL << 0)
290e86d1aa8SWill Deacon #define CTXDESC_L1_DESC_L2PTR_MASK	GENMASK_ULL(51, 12)
291e86d1aa8SWill Deacon 
292e86d1aa8SWill Deacon #define CTXDESC_CD_DWORDS		8
293e86d1aa8SWill Deacon #define CTXDESC_CD_0_TCR_T0SZ		GENMASK_ULL(5, 0)
294e86d1aa8SWill Deacon #define CTXDESC_CD_0_TCR_TG0		GENMASK_ULL(7, 6)
295e86d1aa8SWill Deacon #define CTXDESC_CD_0_TCR_IRGN0		GENMASK_ULL(9, 8)
296e86d1aa8SWill Deacon #define CTXDESC_CD_0_TCR_ORGN0		GENMASK_ULL(11, 10)
297e86d1aa8SWill Deacon #define CTXDESC_CD_0_TCR_SH0		GENMASK_ULL(13, 12)
298e86d1aa8SWill Deacon #define CTXDESC_CD_0_TCR_EPD0		(1ULL << 14)
299e86d1aa8SWill Deacon #define CTXDESC_CD_0_TCR_EPD1		(1ULL << 30)
300e86d1aa8SWill Deacon 
301e86d1aa8SWill Deacon #define CTXDESC_CD_0_ENDI		(1UL << 15)
302e86d1aa8SWill Deacon #define CTXDESC_CD_0_V			(1UL << 31)
303e86d1aa8SWill Deacon 
304e86d1aa8SWill Deacon #define CTXDESC_CD_0_TCR_IPS		GENMASK_ULL(34, 32)
305e86d1aa8SWill Deacon #define CTXDESC_CD_0_TCR_TBI0		(1ULL << 38)
306e86d1aa8SWill Deacon 
307e86d1aa8SWill Deacon #define CTXDESC_CD_0_AA64		(1UL << 41)
308e86d1aa8SWill Deacon #define CTXDESC_CD_0_S			(1UL << 44)
309e86d1aa8SWill Deacon #define CTXDESC_CD_0_R			(1UL << 45)
310e86d1aa8SWill Deacon #define CTXDESC_CD_0_A			(1UL << 46)
311e86d1aa8SWill Deacon #define CTXDESC_CD_0_ASET		(1UL << 47)
312e86d1aa8SWill Deacon #define CTXDESC_CD_0_ASID		GENMASK_ULL(63, 48)
313e86d1aa8SWill Deacon 
314e86d1aa8SWill Deacon #define CTXDESC_CD_1_TTB0_MASK		GENMASK_ULL(51, 4)
315e86d1aa8SWill Deacon 
316e86d1aa8SWill Deacon /*
317e86d1aa8SWill Deacon  * When the SMMU only supports linear context descriptor tables, pick a
318e86d1aa8SWill Deacon  * reasonable size limit (64kB).
319e86d1aa8SWill Deacon  */
320e86d1aa8SWill Deacon #define CTXDESC_LINEAR_CDMAX		ilog2(SZ_64K / (CTXDESC_CD_DWORDS << 3))
321e86d1aa8SWill Deacon 
322e86d1aa8SWill Deacon /* Command queue */
323e86d1aa8SWill Deacon #define CMDQ_ENT_SZ_SHIFT		4
324e86d1aa8SWill Deacon #define CMDQ_ENT_DWORDS			((1 << CMDQ_ENT_SZ_SHIFT) >> 3)
325e86d1aa8SWill Deacon #define CMDQ_MAX_SZ_SHIFT		(Q_MAX_SZ_SHIFT - CMDQ_ENT_SZ_SHIFT)
326e86d1aa8SWill Deacon 
327e86d1aa8SWill Deacon #define CMDQ_CONS_ERR			GENMASK(30, 24)
328e86d1aa8SWill Deacon #define CMDQ_ERR_CERROR_NONE_IDX	0
329e86d1aa8SWill Deacon #define CMDQ_ERR_CERROR_ILL_IDX		1
330e86d1aa8SWill Deacon #define CMDQ_ERR_CERROR_ABT_IDX		2
331e86d1aa8SWill Deacon #define CMDQ_ERR_CERROR_ATC_INV_IDX	3
332e86d1aa8SWill Deacon 
333e86d1aa8SWill Deacon #define CMDQ_PROD_OWNED_FLAG		Q_OVERFLOW_FLAG
334e86d1aa8SWill Deacon 
335e86d1aa8SWill Deacon /*
336e86d1aa8SWill Deacon  * This is used to size the command queue and therefore must be at least
337e86d1aa8SWill Deacon  * BITS_PER_LONG so that the valid_map works correctly (it relies on the
338e86d1aa8SWill Deacon  * total number of queue entries being a multiple of BITS_PER_LONG).
339e86d1aa8SWill Deacon  */
340e86d1aa8SWill Deacon #define CMDQ_BATCH_ENTRIES		BITS_PER_LONG
341e86d1aa8SWill Deacon 
342e86d1aa8SWill Deacon #define CMDQ_0_OP			GENMASK_ULL(7, 0)
343e86d1aa8SWill Deacon #define CMDQ_0_SSV			(1UL << 11)
344e86d1aa8SWill Deacon 
345e86d1aa8SWill Deacon #define CMDQ_PREFETCH_0_SID		GENMASK_ULL(63, 32)
346e86d1aa8SWill Deacon #define CMDQ_PREFETCH_1_SIZE		GENMASK_ULL(4, 0)
347e86d1aa8SWill Deacon #define CMDQ_PREFETCH_1_ADDR_MASK	GENMASK_ULL(63, 12)
348e86d1aa8SWill Deacon 
349e86d1aa8SWill Deacon #define CMDQ_CFGI_0_SSID		GENMASK_ULL(31, 12)
350e86d1aa8SWill Deacon #define CMDQ_CFGI_0_SID			GENMASK_ULL(63, 32)
351e86d1aa8SWill Deacon #define CMDQ_CFGI_1_LEAF		(1UL << 0)
352e86d1aa8SWill Deacon #define CMDQ_CFGI_1_RANGE		GENMASK_ULL(4, 0)
353e86d1aa8SWill Deacon 
354e86d1aa8SWill Deacon #define CMDQ_TLBI_0_NUM			GENMASK_ULL(16, 12)
355e86d1aa8SWill Deacon #define CMDQ_TLBI_RANGE_NUM_MAX		31
356e86d1aa8SWill Deacon #define CMDQ_TLBI_0_SCALE		GENMASK_ULL(24, 20)
357e86d1aa8SWill Deacon #define CMDQ_TLBI_0_VMID		GENMASK_ULL(47, 32)
358e86d1aa8SWill Deacon #define CMDQ_TLBI_0_ASID		GENMASK_ULL(63, 48)
359e86d1aa8SWill Deacon #define CMDQ_TLBI_1_LEAF		(1UL << 0)
360e86d1aa8SWill Deacon #define CMDQ_TLBI_1_TTL			GENMASK_ULL(9, 8)
361e86d1aa8SWill Deacon #define CMDQ_TLBI_1_TG			GENMASK_ULL(11, 10)
362e86d1aa8SWill Deacon #define CMDQ_TLBI_1_VA_MASK		GENMASK_ULL(63, 12)
363e86d1aa8SWill Deacon #define CMDQ_TLBI_1_IPA_MASK		GENMASK_ULL(51, 12)
364e86d1aa8SWill Deacon 
365e86d1aa8SWill Deacon #define CMDQ_ATC_0_SSID			GENMASK_ULL(31, 12)
366e86d1aa8SWill Deacon #define CMDQ_ATC_0_SID			GENMASK_ULL(63, 32)
367e86d1aa8SWill Deacon #define CMDQ_ATC_0_GLOBAL		(1UL << 9)
368e86d1aa8SWill Deacon #define CMDQ_ATC_1_SIZE			GENMASK_ULL(5, 0)
369e86d1aa8SWill Deacon #define CMDQ_ATC_1_ADDR_MASK		GENMASK_ULL(63, 12)
370e86d1aa8SWill Deacon 
371e86d1aa8SWill Deacon #define CMDQ_PRI_0_SSID			GENMASK_ULL(31, 12)
372e86d1aa8SWill Deacon #define CMDQ_PRI_0_SID			GENMASK_ULL(63, 32)
373e86d1aa8SWill Deacon #define CMDQ_PRI_1_GRPID		GENMASK_ULL(8, 0)
374e86d1aa8SWill Deacon #define CMDQ_PRI_1_RESP			GENMASK_ULL(13, 12)
375e86d1aa8SWill Deacon 
376e86d1aa8SWill Deacon #define CMDQ_SYNC_0_CS			GENMASK_ULL(13, 12)
377e86d1aa8SWill Deacon #define CMDQ_SYNC_0_CS_NONE		0
378e86d1aa8SWill Deacon #define CMDQ_SYNC_0_CS_IRQ		1
379e86d1aa8SWill Deacon #define CMDQ_SYNC_0_CS_SEV		2
380e86d1aa8SWill Deacon #define CMDQ_SYNC_0_MSH			GENMASK_ULL(23, 22)
381e86d1aa8SWill Deacon #define CMDQ_SYNC_0_MSIATTR		GENMASK_ULL(27, 24)
382e86d1aa8SWill Deacon #define CMDQ_SYNC_0_MSIDATA		GENMASK_ULL(63, 32)
383e86d1aa8SWill Deacon #define CMDQ_SYNC_1_MSIADDR_MASK	GENMASK_ULL(51, 2)
384e86d1aa8SWill Deacon 
385e86d1aa8SWill Deacon /* Event queue */
386e86d1aa8SWill Deacon #define EVTQ_ENT_SZ_SHIFT		5
387e86d1aa8SWill Deacon #define EVTQ_ENT_DWORDS			((1 << EVTQ_ENT_SZ_SHIFT) >> 3)
388e86d1aa8SWill Deacon #define EVTQ_MAX_SZ_SHIFT		(Q_MAX_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT)
389e86d1aa8SWill Deacon 
390e86d1aa8SWill Deacon #define EVTQ_0_ID			GENMASK_ULL(7, 0)
391e86d1aa8SWill Deacon 
392e86d1aa8SWill Deacon /* PRI queue */
393e86d1aa8SWill Deacon #define PRIQ_ENT_SZ_SHIFT		4
394e86d1aa8SWill Deacon #define PRIQ_ENT_DWORDS			((1 << PRIQ_ENT_SZ_SHIFT) >> 3)
395e86d1aa8SWill Deacon #define PRIQ_MAX_SZ_SHIFT		(Q_MAX_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT)
396e86d1aa8SWill Deacon 
397e86d1aa8SWill Deacon #define PRIQ_0_SID			GENMASK_ULL(31, 0)
398e86d1aa8SWill Deacon #define PRIQ_0_SSID			GENMASK_ULL(51, 32)
399e86d1aa8SWill Deacon #define PRIQ_0_PERM_PRIV		(1UL << 58)
400e86d1aa8SWill Deacon #define PRIQ_0_PERM_EXEC		(1UL << 59)
401e86d1aa8SWill Deacon #define PRIQ_0_PERM_READ		(1UL << 60)
402e86d1aa8SWill Deacon #define PRIQ_0_PERM_WRITE		(1UL << 61)
403e86d1aa8SWill Deacon #define PRIQ_0_PRG_LAST			(1UL << 62)
404e86d1aa8SWill Deacon #define PRIQ_0_SSID_V			(1UL << 63)
405e86d1aa8SWill Deacon 
406e86d1aa8SWill Deacon #define PRIQ_1_PRG_IDX			GENMASK_ULL(8, 0)
407e86d1aa8SWill Deacon #define PRIQ_1_ADDR_MASK		GENMASK_ULL(63, 12)
408e86d1aa8SWill Deacon 
409e86d1aa8SWill Deacon /* High-level queue structures */
410e86d1aa8SWill Deacon #define ARM_SMMU_POLL_TIMEOUT_US	1000000 /* 1s! */
411e86d1aa8SWill Deacon #define ARM_SMMU_POLL_SPIN_COUNT	10
412e86d1aa8SWill Deacon 
413e86d1aa8SWill Deacon #define MSI_IOVA_BASE			0x8000000
414e86d1aa8SWill Deacon #define MSI_IOVA_LENGTH			0x100000
415e86d1aa8SWill Deacon 
416e86d1aa8SWill Deacon static bool disable_bypass = 1;
417e86d1aa8SWill Deacon module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
418e86d1aa8SWill Deacon MODULE_PARM_DESC(disable_bypass,
419e86d1aa8SWill Deacon 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
420e86d1aa8SWill Deacon 
421e86d1aa8SWill Deacon enum pri_resp {
422e86d1aa8SWill Deacon 	PRI_RESP_DENY = 0,
423e86d1aa8SWill Deacon 	PRI_RESP_FAIL = 1,
424e86d1aa8SWill Deacon 	PRI_RESP_SUCC = 2,
425e86d1aa8SWill Deacon };
426e86d1aa8SWill Deacon 
427e86d1aa8SWill Deacon enum arm_smmu_msi_index {
428e86d1aa8SWill Deacon 	EVTQ_MSI_INDEX,
429e86d1aa8SWill Deacon 	GERROR_MSI_INDEX,
430e86d1aa8SWill Deacon 	PRIQ_MSI_INDEX,
431e86d1aa8SWill Deacon 	ARM_SMMU_MAX_MSIS,
432e86d1aa8SWill Deacon };
433e86d1aa8SWill Deacon 
434e86d1aa8SWill Deacon static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
435e86d1aa8SWill Deacon 	[EVTQ_MSI_INDEX] = {
436e86d1aa8SWill Deacon 		ARM_SMMU_EVTQ_IRQ_CFG0,
437e86d1aa8SWill Deacon 		ARM_SMMU_EVTQ_IRQ_CFG1,
438e86d1aa8SWill Deacon 		ARM_SMMU_EVTQ_IRQ_CFG2,
439e86d1aa8SWill Deacon 	},
440e86d1aa8SWill Deacon 	[GERROR_MSI_INDEX] = {
441e86d1aa8SWill Deacon 		ARM_SMMU_GERROR_IRQ_CFG0,
442e86d1aa8SWill Deacon 		ARM_SMMU_GERROR_IRQ_CFG1,
443e86d1aa8SWill Deacon 		ARM_SMMU_GERROR_IRQ_CFG2,
444e86d1aa8SWill Deacon 	},
445e86d1aa8SWill Deacon 	[PRIQ_MSI_INDEX] = {
446e86d1aa8SWill Deacon 		ARM_SMMU_PRIQ_IRQ_CFG0,
447e86d1aa8SWill Deacon 		ARM_SMMU_PRIQ_IRQ_CFG1,
448e86d1aa8SWill Deacon 		ARM_SMMU_PRIQ_IRQ_CFG2,
449e86d1aa8SWill Deacon 	},
450e86d1aa8SWill Deacon };
451e86d1aa8SWill Deacon 
452e86d1aa8SWill Deacon struct arm_smmu_cmdq_ent {
453e86d1aa8SWill Deacon 	/* Common fields */
454e86d1aa8SWill Deacon 	u8				opcode;
455e86d1aa8SWill Deacon 	bool				substream_valid;
456e86d1aa8SWill Deacon 
457e86d1aa8SWill Deacon 	/* Command-specific fields */
458e86d1aa8SWill Deacon 	union {
459e86d1aa8SWill Deacon 		#define CMDQ_OP_PREFETCH_CFG	0x1
460e86d1aa8SWill Deacon 		struct {
461e86d1aa8SWill Deacon 			u32			sid;
462e86d1aa8SWill Deacon 			u8			size;
463e86d1aa8SWill Deacon 			u64			addr;
464e86d1aa8SWill Deacon 		} prefetch;
465e86d1aa8SWill Deacon 
466e86d1aa8SWill Deacon 		#define CMDQ_OP_CFGI_STE	0x3
467e86d1aa8SWill Deacon 		#define CMDQ_OP_CFGI_ALL	0x4
468e86d1aa8SWill Deacon 		#define CMDQ_OP_CFGI_CD		0x5
469e86d1aa8SWill Deacon 		#define CMDQ_OP_CFGI_CD_ALL	0x6
470e86d1aa8SWill Deacon 		struct {
471e86d1aa8SWill Deacon 			u32			sid;
472e86d1aa8SWill Deacon 			u32			ssid;
473e86d1aa8SWill Deacon 			union {
474e86d1aa8SWill Deacon 				bool		leaf;
475e86d1aa8SWill Deacon 				u8		span;
476e86d1aa8SWill Deacon 			};
477e86d1aa8SWill Deacon 		} cfgi;
478e86d1aa8SWill Deacon 
479e86d1aa8SWill Deacon 		#define CMDQ_OP_TLBI_NH_ASID	0x11
480e86d1aa8SWill Deacon 		#define CMDQ_OP_TLBI_NH_VA	0x12
481e86d1aa8SWill Deacon 		#define CMDQ_OP_TLBI_EL2_ALL	0x20
482e86d1aa8SWill Deacon 		#define CMDQ_OP_TLBI_S12_VMALL	0x28
483e86d1aa8SWill Deacon 		#define CMDQ_OP_TLBI_S2_IPA	0x2a
484e86d1aa8SWill Deacon 		#define CMDQ_OP_TLBI_NSNH_ALL	0x30
485e86d1aa8SWill Deacon 		struct {
486e86d1aa8SWill Deacon 			u8			num;
487e86d1aa8SWill Deacon 			u8			scale;
488e86d1aa8SWill Deacon 			u16			asid;
489e86d1aa8SWill Deacon 			u16			vmid;
490e86d1aa8SWill Deacon 			bool			leaf;
491e86d1aa8SWill Deacon 			u8			ttl;
492e86d1aa8SWill Deacon 			u8			tg;
493e86d1aa8SWill Deacon 			u64			addr;
494e86d1aa8SWill Deacon 		} tlbi;
495e86d1aa8SWill Deacon 
496e86d1aa8SWill Deacon 		#define CMDQ_OP_ATC_INV		0x40
497e86d1aa8SWill Deacon 		#define ATC_INV_SIZE_ALL	52
498e86d1aa8SWill Deacon 		struct {
499e86d1aa8SWill Deacon 			u32			sid;
500e86d1aa8SWill Deacon 			u32			ssid;
501e86d1aa8SWill Deacon 			u64			addr;
502e86d1aa8SWill Deacon 			u8			size;
503e86d1aa8SWill Deacon 			bool			global;
504e86d1aa8SWill Deacon 		} atc;
505e86d1aa8SWill Deacon 
506e86d1aa8SWill Deacon 		#define CMDQ_OP_PRI_RESP	0x41
507e86d1aa8SWill Deacon 		struct {
508e86d1aa8SWill Deacon 			u32			sid;
509e86d1aa8SWill Deacon 			u32			ssid;
510e86d1aa8SWill Deacon 			u16			grpid;
511e86d1aa8SWill Deacon 			enum pri_resp		resp;
512e86d1aa8SWill Deacon 		} pri;
513e86d1aa8SWill Deacon 
514e86d1aa8SWill Deacon 		#define CMDQ_OP_CMD_SYNC	0x46
515e86d1aa8SWill Deacon 		struct {
516e86d1aa8SWill Deacon 			u64			msiaddr;
517e86d1aa8SWill Deacon 		} sync;
518e86d1aa8SWill Deacon 	};
519e86d1aa8SWill Deacon };
520e86d1aa8SWill Deacon 
521e86d1aa8SWill Deacon struct arm_smmu_ll_queue {
522e86d1aa8SWill Deacon 	union {
523e86d1aa8SWill Deacon 		u64			val;
524e86d1aa8SWill Deacon 		struct {
525e86d1aa8SWill Deacon 			u32		prod;
526e86d1aa8SWill Deacon 			u32		cons;
527e86d1aa8SWill Deacon 		};
528e86d1aa8SWill Deacon 		struct {
529e86d1aa8SWill Deacon 			atomic_t	prod;
530e86d1aa8SWill Deacon 			atomic_t	cons;
531e86d1aa8SWill Deacon 		} atomic;
532e86d1aa8SWill Deacon 		u8			__pad[SMP_CACHE_BYTES];
533e86d1aa8SWill Deacon 	} ____cacheline_aligned_in_smp;
534e86d1aa8SWill Deacon 	u32				max_n_shift;
535e86d1aa8SWill Deacon };
536e86d1aa8SWill Deacon 
537e86d1aa8SWill Deacon struct arm_smmu_queue {
538e86d1aa8SWill Deacon 	struct arm_smmu_ll_queue	llq;
539e86d1aa8SWill Deacon 	int				irq; /* Wired interrupt */
540e86d1aa8SWill Deacon 
541e86d1aa8SWill Deacon 	__le64				*base;
542e86d1aa8SWill Deacon 	dma_addr_t			base_dma;
543e86d1aa8SWill Deacon 	u64				q_base;
544e86d1aa8SWill Deacon 
545e86d1aa8SWill Deacon 	size_t				ent_dwords;
546e86d1aa8SWill Deacon 
547e86d1aa8SWill Deacon 	u32 __iomem			*prod_reg;
548e86d1aa8SWill Deacon 	u32 __iomem			*cons_reg;
549e86d1aa8SWill Deacon };
550e86d1aa8SWill Deacon 
551e86d1aa8SWill Deacon struct arm_smmu_queue_poll {
552e86d1aa8SWill Deacon 	ktime_t				timeout;
553e86d1aa8SWill Deacon 	unsigned int			delay;
554e86d1aa8SWill Deacon 	unsigned int			spin_cnt;
555e86d1aa8SWill Deacon 	bool				wfe;
556e86d1aa8SWill Deacon };
557e86d1aa8SWill Deacon 
558e86d1aa8SWill Deacon struct arm_smmu_cmdq {
559e86d1aa8SWill Deacon 	struct arm_smmu_queue		q;
560e86d1aa8SWill Deacon 	atomic_long_t			*valid_map;
561e86d1aa8SWill Deacon 	atomic_t			owner_prod;
562e86d1aa8SWill Deacon 	atomic_t			lock;
563e86d1aa8SWill Deacon };
564e86d1aa8SWill Deacon 
565e86d1aa8SWill Deacon struct arm_smmu_cmdq_batch {
566e86d1aa8SWill Deacon 	u64				cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS];
567e86d1aa8SWill Deacon 	int				num;
568e86d1aa8SWill Deacon };
569e86d1aa8SWill Deacon 
570e86d1aa8SWill Deacon struct arm_smmu_evtq {
571e86d1aa8SWill Deacon 	struct arm_smmu_queue		q;
572e86d1aa8SWill Deacon 	u32				max_stalls;
573e86d1aa8SWill Deacon };
574e86d1aa8SWill Deacon 
575e86d1aa8SWill Deacon struct arm_smmu_priq {
576e86d1aa8SWill Deacon 	struct arm_smmu_queue		q;
577e86d1aa8SWill Deacon };
578e86d1aa8SWill Deacon 
579e86d1aa8SWill Deacon /* High-level stream table and context descriptor structures */
580e86d1aa8SWill Deacon struct arm_smmu_strtab_l1_desc {
581e86d1aa8SWill Deacon 	u8				span;
582e86d1aa8SWill Deacon 
583e86d1aa8SWill Deacon 	__le64				*l2ptr;
584e86d1aa8SWill Deacon 	dma_addr_t			l2ptr_dma;
585e86d1aa8SWill Deacon };
586e86d1aa8SWill Deacon 
587e86d1aa8SWill Deacon struct arm_smmu_ctx_desc {
588e86d1aa8SWill Deacon 	u16				asid;
589e86d1aa8SWill Deacon 	u64				ttbr;
590e86d1aa8SWill Deacon 	u64				tcr;
591e86d1aa8SWill Deacon 	u64				mair;
592e86d1aa8SWill Deacon };
593e86d1aa8SWill Deacon 
594e86d1aa8SWill Deacon struct arm_smmu_l1_ctx_desc {
595e86d1aa8SWill Deacon 	__le64				*l2ptr;
596e86d1aa8SWill Deacon 	dma_addr_t			l2ptr_dma;
597e86d1aa8SWill Deacon };
598e86d1aa8SWill Deacon 
599e86d1aa8SWill Deacon struct arm_smmu_ctx_desc_cfg {
600e86d1aa8SWill Deacon 	__le64				*cdtab;
601e86d1aa8SWill Deacon 	dma_addr_t			cdtab_dma;
602e86d1aa8SWill Deacon 	struct arm_smmu_l1_ctx_desc	*l1_desc;
603e86d1aa8SWill Deacon 	unsigned int			num_l1_ents;
604e86d1aa8SWill Deacon };
605e86d1aa8SWill Deacon 
606e86d1aa8SWill Deacon struct arm_smmu_s1_cfg {
607e86d1aa8SWill Deacon 	struct arm_smmu_ctx_desc_cfg	cdcfg;
608e86d1aa8SWill Deacon 	struct arm_smmu_ctx_desc	cd;
609e86d1aa8SWill Deacon 	u8				s1fmt;
610e86d1aa8SWill Deacon 	u8				s1cdmax;
611e86d1aa8SWill Deacon };
612e86d1aa8SWill Deacon 
613e86d1aa8SWill Deacon struct arm_smmu_s2_cfg {
614e86d1aa8SWill Deacon 	u16				vmid;
615e86d1aa8SWill Deacon 	u64				vttbr;
616e86d1aa8SWill Deacon 	u64				vtcr;
617e86d1aa8SWill Deacon };
618e86d1aa8SWill Deacon 
619e86d1aa8SWill Deacon struct arm_smmu_strtab_cfg {
620e86d1aa8SWill Deacon 	__le64				*strtab;
621e86d1aa8SWill Deacon 	dma_addr_t			strtab_dma;
622e86d1aa8SWill Deacon 	struct arm_smmu_strtab_l1_desc	*l1_desc;
623e86d1aa8SWill Deacon 	unsigned int			num_l1_ents;
624e86d1aa8SWill Deacon 
625e86d1aa8SWill Deacon 	u64				strtab_base;
626e86d1aa8SWill Deacon 	u32				strtab_base_cfg;
627e86d1aa8SWill Deacon };
628e86d1aa8SWill Deacon 
629e86d1aa8SWill Deacon /* An SMMUv3 instance */
630e86d1aa8SWill Deacon struct arm_smmu_device {
631e86d1aa8SWill Deacon 	struct device			*dev;
632e86d1aa8SWill Deacon 	void __iomem			*base;
633e86d1aa8SWill Deacon 	void __iomem			*page1;
634e86d1aa8SWill Deacon 
635e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_2_LVL_STRTAB	(1 << 0)
636e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_2_LVL_CDTAB	(1 << 1)
637e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_TT_LE		(1 << 2)
638e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_TT_BE		(1 << 3)
639e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_PRI		(1 << 4)
640e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_ATS		(1 << 5)
641e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_SEV		(1 << 6)
642e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_MSI		(1 << 7)
643e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_COHERENCY		(1 << 8)
644e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_TRANS_S1		(1 << 9)
645e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_TRANS_S2		(1 << 10)
646e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_STALLS		(1 << 11)
647e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_HYP		(1 << 12)
648e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_STALL_FORCE	(1 << 13)
649e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_VAX		(1 << 14)
650e86d1aa8SWill Deacon #define ARM_SMMU_FEAT_RANGE_INV		(1 << 15)
651e86d1aa8SWill Deacon 	u32				features;
652e86d1aa8SWill Deacon 
653e86d1aa8SWill Deacon #define ARM_SMMU_OPT_SKIP_PREFETCH	(1 << 0)
654e86d1aa8SWill Deacon #define ARM_SMMU_OPT_PAGE0_REGS_ONLY	(1 << 1)
655e86d1aa8SWill Deacon 	u32				options;
656e86d1aa8SWill Deacon 
657e86d1aa8SWill Deacon 	struct arm_smmu_cmdq		cmdq;
658e86d1aa8SWill Deacon 	struct arm_smmu_evtq		evtq;
659e86d1aa8SWill Deacon 	struct arm_smmu_priq		priq;
660e86d1aa8SWill Deacon 
661e86d1aa8SWill Deacon 	int				gerr_irq;
662e86d1aa8SWill Deacon 	int				combined_irq;
663e86d1aa8SWill Deacon 
664e86d1aa8SWill Deacon 	unsigned long			ias; /* IPA */
665e86d1aa8SWill Deacon 	unsigned long			oas; /* PA */
666e86d1aa8SWill Deacon 	unsigned long			pgsize_bitmap;
667e86d1aa8SWill Deacon 
668e86d1aa8SWill Deacon #define ARM_SMMU_MAX_ASIDS		(1 << 16)
669e86d1aa8SWill Deacon 	unsigned int			asid_bits;
670e86d1aa8SWill Deacon 
671e86d1aa8SWill Deacon #define ARM_SMMU_MAX_VMIDS		(1 << 16)
672e86d1aa8SWill Deacon 	unsigned int			vmid_bits;
673e86d1aa8SWill Deacon 	DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
674e86d1aa8SWill Deacon 
675e86d1aa8SWill Deacon 	unsigned int			ssid_bits;
676e86d1aa8SWill Deacon 	unsigned int			sid_bits;
677e86d1aa8SWill Deacon 
678e86d1aa8SWill Deacon 	struct arm_smmu_strtab_cfg	strtab_cfg;
679e86d1aa8SWill Deacon 
680e86d1aa8SWill Deacon 	/* IOMMU core code handle */
681e86d1aa8SWill Deacon 	struct iommu_device		iommu;
682e86d1aa8SWill Deacon };
683e86d1aa8SWill Deacon 
684e86d1aa8SWill Deacon /* SMMU private data for each master */
685e86d1aa8SWill Deacon struct arm_smmu_master {
686e86d1aa8SWill Deacon 	struct arm_smmu_device		*smmu;
687e86d1aa8SWill Deacon 	struct device			*dev;
688e86d1aa8SWill Deacon 	struct arm_smmu_domain		*domain;
689e86d1aa8SWill Deacon 	struct list_head		domain_head;
690e86d1aa8SWill Deacon 	u32				*sids;
691e86d1aa8SWill Deacon 	unsigned int			num_sids;
692e86d1aa8SWill Deacon 	bool				ats_enabled;
693e86d1aa8SWill Deacon 	unsigned int			ssid_bits;
694e86d1aa8SWill Deacon };
695e86d1aa8SWill Deacon 
696e86d1aa8SWill Deacon /* SMMU private data for an IOMMU domain */
697e86d1aa8SWill Deacon enum arm_smmu_domain_stage {
698e86d1aa8SWill Deacon 	ARM_SMMU_DOMAIN_S1 = 0,
699e86d1aa8SWill Deacon 	ARM_SMMU_DOMAIN_S2,
700e86d1aa8SWill Deacon 	ARM_SMMU_DOMAIN_NESTED,
701e86d1aa8SWill Deacon 	ARM_SMMU_DOMAIN_BYPASS,
702e86d1aa8SWill Deacon };
703e86d1aa8SWill Deacon 
704e86d1aa8SWill Deacon struct arm_smmu_domain {
705e86d1aa8SWill Deacon 	struct arm_smmu_device		*smmu;
706e86d1aa8SWill Deacon 	struct mutex			init_mutex; /* Protects smmu pointer */
707e86d1aa8SWill Deacon 
708e86d1aa8SWill Deacon 	struct io_pgtable_ops		*pgtbl_ops;
709e86d1aa8SWill Deacon 	bool				non_strict;
710e86d1aa8SWill Deacon 	atomic_t			nr_ats_masters;
711e86d1aa8SWill Deacon 
712e86d1aa8SWill Deacon 	enum arm_smmu_domain_stage	stage;
713e86d1aa8SWill Deacon 	union {
714e86d1aa8SWill Deacon 		struct arm_smmu_s1_cfg	s1_cfg;
715e86d1aa8SWill Deacon 		struct arm_smmu_s2_cfg	s2_cfg;
716e86d1aa8SWill Deacon 	};
717e86d1aa8SWill Deacon 
718e86d1aa8SWill Deacon 	struct iommu_domain		domain;
719e86d1aa8SWill Deacon 
720e86d1aa8SWill Deacon 	struct list_head		devices;
721e86d1aa8SWill Deacon 	spinlock_t			devices_lock;
722e86d1aa8SWill Deacon };
723e86d1aa8SWill Deacon 
724e86d1aa8SWill Deacon struct arm_smmu_option_prop {
725e86d1aa8SWill Deacon 	u32 opt;
726e86d1aa8SWill Deacon 	const char *prop;
727e86d1aa8SWill Deacon };
728e86d1aa8SWill Deacon 
729e86d1aa8SWill Deacon static DEFINE_XARRAY_ALLOC1(asid_xa);
730e86d1aa8SWill Deacon 
731e86d1aa8SWill Deacon static struct arm_smmu_option_prop arm_smmu_options[] = {
732e86d1aa8SWill Deacon 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
733e86d1aa8SWill Deacon 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
734e86d1aa8SWill Deacon 	{ 0, NULL},
735e86d1aa8SWill Deacon };
736e86d1aa8SWill Deacon 
737e86d1aa8SWill Deacon static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
738e86d1aa8SWill Deacon 						 struct arm_smmu_device *smmu)
739e86d1aa8SWill Deacon {
740e86d1aa8SWill Deacon 	if (offset > SZ_64K)
741e86d1aa8SWill Deacon 		return smmu->page1 + offset - SZ_64K;
742e86d1aa8SWill Deacon 
743e86d1aa8SWill Deacon 	return smmu->base + offset;
744e86d1aa8SWill Deacon }
745e86d1aa8SWill Deacon 
746e86d1aa8SWill Deacon static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
747e86d1aa8SWill Deacon {
748e86d1aa8SWill Deacon 	return container_of(dom, struct arm_smmu_domain, domain);
749e86d1aa8SWill Deacon }
750e86d1aa8SWill Deacon 
751e86d1aa8SWill Deacon static void parse_driver_options(struct arm_smmu_device *smmu)
752e86d1aa8SWill Deacon {
753e86d1aa8SWill Deacon 	int i = 0;
754e86d1aa8SWill Deacon 
755e86d1aa8SWill Deacon 	do {
756e86d1aa8SWill Deacon 		if (of_property_read_bool(smmu->dev->of_node,
757e86d1aa8SWill Deacon 						arm_smmu_options[i].prop)) {
758e86d1aa8SWill Deacon 			smmu->options |= arm_smmu_options[i].opt;
759e86d1aa8SWill Deacon 			dev_notice(smmu->dev, "option %s\n",
760e86d1aa8SWill Deacon 				arm_smmu_options[i].prop);
761e86d1aa8SWill Deacon 		}
762e86d1aa8SWill Deacon 	} while (arm_smmu_options[++i].opt);
763e86d1aa8SWill Deacon }
764e86d1aa8SWill Deacon 
765e86d1aa8SWill Deacon /* Low-level queue manipulation functions */
766e86d1aa8SWill Deacon static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
767e86d1aa8SWill Deacon {
768e86d1aa8SWill Deacon 	u32 space, prod, cons;
769e86d1aa8SWill Deacon 
770e86d1aa8SWill Deacon 	prod = Q_IDX(q, q->prod);
771e86d1aa8SWill Deacon 	cons = Q_IDX(q, q->cons);
772e86d1aa8SWill Deacon 
773e86d1aa8SWill Deacon 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
774e86d1aa8SWill Deacon 		space = (1 << q->max_n_shift) - (prod - cons);
775e86d1aa8SWill Deacon 	else
776e86d1aa8SWill Deacon 		space = cons - prod;
777e86d1aa8SWill Deacon 
778e86d1aa8SWill Deacon 	return space >= n;
779e86d1aa8SWill Deacon }
780e86d1aa8SWill Deacon 
781e86d1aa8SWill Deacon static bool queue_full(struct arm_smmu_ll_queue *q)
782e86d1aa8SWill Deacon {
783e86d1aa8SWill Deacon 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
784e86d1aa8SWill Deacon 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
785e86d1aa8SWill Deacon }
786e86d1aa8SWill Deacon 
787e86d1aa8SWill Deacon static bool queue_empty(struct arm_smmu_ll_queue *q)
788e86d1aa8SWill Deacon {
789e86d1aa8SWill Deacon 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
790e86d1aa8SWill Deacon 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
791e86d1aa8SWill Deacon }
792e86d1aa8SWill Deacon 
793e86d1aa8SWill Deacon static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
794e86d1aa8SWill Deacon {
795e86d1aa8SWill Deacon 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
796e86d1aa8SWill Deacon 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
797e86d1aa8SWill Deacon 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
798e86d1aa8SWill Deacon 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
799e86d1aa8SWill Deacon }
800e86d1aa8SWill Deacon 
801e86d1aa8SWill Deacon static void queue_sync_cons_out(struct arm_smmu_queue *q)
802e86d1aa8SWill Deacon {
803e86d1aa8SWill Deacon 	/*
804e86d1aa8SWill Deacon 	 * Ensure that all CPU accesses (reads and writes) to the queue
805e86d1aa8SWill Deacon 	 * are complete before we update the cons pointer.
806e86d1aa8SWill Deacon 	 */
807e86d1aa8SWill Deacon 	mb();
808e86d1aa8SWill Deacon 	writel_relaxed(q->llq.cons, q->cons_reg);
809e86d1aa8SWill Deacon }
810e86d1aa8SWill Deacon 
811e86d1aa8SWill Deacon static void queue_inc_cons(struct arm_smmu_ll_queue *q)
812e86d1aa8SWill Deacon {
813e86d1aa8SWill Deacon 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
814e86d1aa8SWill Deacon 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
815e86d1aa8SWill Deacon }
816e86d1aa8SWill Deacon 
817e86d1aa8SWill Deacon static int queue_sync_prod_in(struct arm_smmu_queue *q)
818e86d1aa8SWill Deacon {
819e86d1aa8SWill Deacon 	int ret = 0;
820e86d1aa8SWill Deacon 	u32 prod = readl_relaxed(q->prod_reg);
821e86d1aa8SWill Deacon 
822e86d1aa8SWill Deacon 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
823e86d1aa8SWill Deacon 		ret = -EOVERFLOW;
824e86d1aa8SWill Deacon 
825e86d1aa8SWill Deacon 	q->llq.prod = prod;
826e86d1aa8SWill Deacon 	return ret;
827e86d1aa8SWill Deacon }
828e86d1aa8SWill Deacon 
829e86d1aa8SWill Deacon static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
830e86d1aa8SWill Deacon {
831e86d1aa8SWill Deacon 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
832e86d1aa8SWill Deacon 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
833e86d1aa8SWill Deacon }
834e86d1aa8SWill Deacon 
835e86d1aa8SWill Deacon static void queue_poll_init(struct arm_smmu_device *smmu,
836e86d1aa8SWill Deacon 			    struct arm_smmu_queue_poll *qp)
837e86d1aa8SWill Deacon {
838e86d1aa8SWill Deacon 	qp->delay = 1;
839e86d1aa8SWill Deacon 	qp->spin_cnt = 0;
840e86d1aa8SWill Deacon 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
841e86d1aa8SWill Deacon 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
842e86d1aa8SWill Deacon }
843e86d1aa8SWill Deacon 
844e86d1aa8SWill Deacon static int queue_poll(struct arm_smmu_queue_poll *qp)
845e86d1aa8SWill Deacon {
846e86d1aa8SWill Deacon 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
847e86d1aa8SWill Deacon 		return -ETIMEDOUT;
848e86d1aa8SWill Deacon 
849e86d1aa8SWill Deacon 	if (qp->wfe) {
850e86d1aa8SWill Deacon 		wfe();
851e86d1aa8SWill Deacon 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
852e86d1aa8SWill Deacon 		cpu_relax();
853e86d1aa8SWill Deacon 	} else {
854e86d1aa8SWill Deacon 		udelay(qp->delay);
855e86d1aa8SWill Deacon 		qp->delay *= 2;
856e86d1aa8SWill Deacon 		qp->spin_cnt = 0;
857e86d1aa8SWill Deacon 	}
858e86d1aa8SWill Deacon 
859e86d1aa8SWill Deacon 	return 0;
860e86d1aa8SWill Deacon }
861e86d1aa8SWill Deacon 
862e86d1aa8SWill Deacon static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
863e86d1aa8SWill Deacon {
864e86d1aa8SWill Deacon 	int i;
865e86d1aa8SWill Deacon 
866e86d1aa8SWill Deacon 	for (i = 0; i < n_dwords; ++i)
867e86d1aa8SWill Deacon 		*dst++ = cpu_to_le64(*src++);
868e86d1aa8SWill Deacon }
869e86d1aa8SWill Deacon 
870e86d1aa8SWill Deacon static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
871e86d1aa8SWill Deacon {
872e86d1aa8SWill Deacon 	int i;
873e86d1aa8SWill Deacon 
874e86d1aa8SWill Deacon 	for (i = 0; i < n_dwords; ++i)
875e86d1aa8SWill Deacon 		*dst++ = le64_to_cpu(*src++);
876e86d1aa8SWill Deacon }
877e86d1aa8SWill Deacon 
878e86d1aa8SWill Deacon static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
879e86d1aa8SWill Deacon {
880e86d1aa8SWill Deacon 	if (queue_empty(&q->llq))
881e86d1aa8SWill Deacon 		return -EAGAIN;
882e86d1aa8SWill Deacon 
883e86d1aa8SWill Deacon 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
884e86d1aa8SWill Deacon 	queue_inc_cons(&q->llq);
885e86d1aa8SWill Deacon 	queue_sync_cons_out(q);
886e86d1aa8SWill Deacon 	return 0;
887e86d1aa8SWill Deacon }
888e86d1aa8SWill Deacon 
889e86d1aa8SWill Deacon /* High-level queue accessors */
890e86d1aa8SWill Deacon static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
891e86d1aa8SWill Deacon {
892e86d1aa8SWill Deacon 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
893e86d1aa8SWill Deacon 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
894e86d1aa8SWill Deacon 
895e86d1aa8SWill Deacon 	switch (ent->opcode) {
896e86d1aa8SWill Deacon 	case CMDQ_OP_TLBI_EL2_ALL:
897e86d1aa8SWill Deacon 	case CMDQ_OP_TLBI_NSNH_ALL:
898e86d1aa8SWill Deacon 		break;
899e86d1aa8SWill Deacon 	case CMDQ_OP_PREFETCH_CFG:
900e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
901e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
902e86d1aa8SWill Deacon 		cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
903e86d1aa8SWill Deacon 		break;
904e86d1aa8SWill Deacon 	case CMDQ_OP_CFGI_CD:
905e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
906e86d1aa8SWill Deacon 		/* Fallthrough */
907e86d1aa8SWill Deacon 	case CMDQ_OP_CFGI_STE:
908e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
909e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
910e86d1aa8SWill Deacon 		break;
911e86d1aa8SWill Deacon 	case CMDQ_OP_CFGI_CD_ALL:
912e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
913e86d1aa8SWill Deacon 		break;
914e86d1aa8SWill Deacon 	case CMDQ_OP_CFGI_ALL:
915e86d1aa8SWill Deacon 		/* Cover the entire SID range */
916e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
917e86d1aa8SWill Deacon 		break;
918e86d1aa8SWill Deacon 	case CMDQ_OP_TLBI_NH_VA:
919e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
920e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
921e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
922e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
923e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
924e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
925e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
926e86d1aa8SWill Deacon 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
927e86d1aa8SWill Deacon 		break;
928e86d1aa8SWill Deacon 	case CMDQ_OP_TLBI_S2_IPA:
929e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
930e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
931e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
932e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
933e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
934e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
935e86d1aa8SWill Deacon 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
936e86d1aa8SWill Deacon 		break;
937e86d1aa8SWill Deacon 	case CMDQ_OP_TLBI_NH_ASID:
938e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
939e86d1aa8SWill Deacon 		/* Fallthrough */
940e86d1aa8SWill Deacon 	case CMDQ_OP_TLBI_S12_VMALL:
941e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
942e86d1aa8SWill Deacon 		break;
943e86d1aa8SWill Deacon 	case CMDQ_OP_ATC_INV:
944e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
945e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
946e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
947e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
948e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
949e86d1aa8SWill Deacon 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
950e86d1aa8SWill Deacon 		break;
951e86d1aa8SWill Deacon 	case CMDQ_OP_PRI_RESP:
952e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
953e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
954e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
955e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
956e86d1aa8SWill Deacon 		switch (ent->pri.resp) {
957e86d1aa8SWill Deacon 		case PRI_RESP_DENY:
958e86d1aa8SWill Deacon 		case PRI_RESP_FAIL:
959e86d1aa8SWill Deacon 		case PRI_RESP_SUCC:
960e86d1aa8SWill Deacon 			break;
961e86d1aa8SWill Deacon 		default:
962e86d1aa8SWill Deacon 			return -EINVAL;
963e86d1aa8SWill Deacon 		}
964e86d1aa8SWill Deacon 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
965e86d1aa8SWill Deacon 		break;
966e86d1aa8SWill Deacon 	case CMDQ_OP_CMD_SYNC:
967e86d1aa8SWill Deacon 		if (ent->sync.msiaddr) {
968e86d1aa8SWill Deacon 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
969e86d1aa8SWill Deacon 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
970e86d1aa8SWill Deacon 		} else {
971e86d1aa8SWill Deacon 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
972e86d1aa8SWill Deacon 		}
973e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
974e86d1aa8SWill Deacon 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
975e86d1aa8SWill Deacon 		break;
976e86d1aa8SWill Deacon 	default:
977e86d1aa8SWill Deacon 		return -ENOENT;
978e86d1aa8SWill Deacon 	}
979e86d1aa8SWill Deacon 
980e86d1aa8SWill Deacon 	return 0;
981e86d1aa8SWill Deacon }
982e86d1aa8SWill Deacon 
983e86d1aa8SWill Deacon static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
984e86d1aa8SWill Deacon 					 u32 prod)
985e86d1aa8SWill Deacon {
986e86d1aa8SWill Deacon 	struct arm_smmu_queue *q = &smmu->cmdq.q;
987e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_ent ent = {
988e86d1aa8SWill Deacon 		.opcode = CMDQ_OP_CMD_SYNC,
989e86d1aa8SWill Deacon 	};
990e86d1aa8SWill Deacon 
991e86d1aa8SWill Deacon 	/*
992e86d1aa8SWill Deacon 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
993e86d1aa8SWill Deacon 	 * payload, so the write will zero the entire command on that platform.
994e86d1aa8SWill Deacon 	 */
995e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_MSI &&
996e86d1aa8SWill Deacon 	    smmu->features & ARM_SMMU_FEAT_COHERENCY) {
997e86d1aa8SWill Deacon 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
998e86d1aa8SWill Deacon 				   q->ent_dwords * 8;
999e86d1aa8SWill Deacon 	}
1000e86d1aa8SWill Deacon 
1001e86d1aa8SWill Deacon 	arm_smmu_cmdq_build_cmd(cmd, &ent);
1002e86d1aa8SWill Deacon }
1003e86d1aa8SWill Deacon 
1004e86d1aa8SWill Deacon static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
1005e86d1aa8SWill Deacon {
1006e86d1aa8SWill Deacon 	static const char *cerror_str[] = {
1007e86d1aa8SWill Deacon 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
1008e86d1aa8SWill Deacon 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
1009e86d1aa8SWill Deacon 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
1010e86d1aa8SWill Deacon 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
1011e86d1aa8SWill Deacon 	};
1012e86d1aa8SWill Deacon 
1013e86d1aa8SWill Deacon 	int i;
1014e86d1aa8SWill Deacon 	u64 cmd[CMDQ_ENT_DWORDS];
1015e86d1aa8SWill Deacon 	struct arm_smmu_queue *q = &smmu->cmdq.q;
1016e86d1aa8SWill Deacon 	u32 cons = readl_relaxed(q->cons_reg);
1017e86d1aa8SWill Deacon 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
1018e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_ent cmd_sync = {
1019e86d1aa8SWill Deacon 		.opcode = CMDQ_OP_CMD_SYNC,
1020e86d1aa8SWill Deacon 	};
1021e86d1aa8SWill Deacon 
1022e86d1aa8SWill Deacon 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
1023e86d1aa8SWill Deacon 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
1024e86d1aa8SWill Deacon 
1025e86d1aa8SWill Deacon 	switch (idx) {
1026e86d1aa8SWill Deacon 	case CMDQ_ERR_CERROR_ABT_IDX:
1027e86d1aa8SWill Deacon 		dev_err(smmu->dev, "retrying command fetch\n");
1028e86d1aa8SWill Deacon 	case CMDQ_ERR_CERROR_NONE_IDX:
1029e86d1aa8SWill Deacon 		return;
1030e86d1aa8SWill Deacon 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
1031e86d1aa8SWill Deacon 		/*
1032e86d1aa8SWill Deacon 		 * ATC Invalidation Completion timeout. CONS is still pointing
1033e86d1aa8SWill Deacon 		 * at the CMD_SYNC. Attempt to complete other pending commands
1034e86d1aa8SWill Deacon 		 * by repeating the CMD_SYNC, though we might well end up back
1035e86d1aa8SWill Deacon 		 * here since the ATC invalidation may still be pending.
1036e86d1aa8SWill Deacon 		 */
1037e86d1aa8SWill Deacon 		return;
1038e86d1aa8SWill Deacon 	case CMDQ_ERR_CERROR_ILL_IDX:
1039e86d1aa8SWill Deacon 		/* Fallthrough */
1040e86d1aa8SWill Deacon 	default:
1041e86d1aa8SWill Deacon 		break;
1042e86d1aa8SWill Deacon 	}
1043e86d1aa8SWill Deacon 
1044e86d1aa8SWill Deacon 	/*
1045e86d1aa8SWill Deacon 	 * We may have concurrent producers, so we need to be careful
1046e86d1aa8SWill Deacon 	 * not to touch any of the shadow cmdq state.
1047e86d1aa8SWill Deacon 	 */
1048e86d1aa8SWill Deacon 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
1049e86d1aa8SWill Deacon 	dev_err(smmu->dev, "skipping command in error state:\n");
1050e86d1aa8SWill Deacon 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
1051e86d1aa8SWill Deacon 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
1052e86d1aa8SWill Deacon 
1053e86d1aa8SWill Deacon 	/* Convert the erroneous command into a CMD_SYNC */
1054e86d1aa8SWill Deacon 	if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
1055e86d1aa8SWill Deacon 		dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
1056e86d1aa8SWill Deacon 		return;
1057e86d1aa8SWill Deacon 	}
1058e86d1aa8SWill Deacon 
1059e86d1aa8SWill Deacon 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
1060e86d1aa8SWill Deacon }
1061e86d1aa8SWill Deacon 
1062e86d1aa8SWill Deacon /*
1063e86d1aa8SWill Deacon  * Command queue locking.
1064e86d1aa8SWill Deacon  * This is a form of bastardised rwlock with the following major changes:
1065e86d1aa8SWill Deacon  *
1066e86d1aa8SWill Deacon  * - The only LOCK routines are exclusive_trylock() and shared_lock().
1067e86d1aa8SWill Deacon  *   Neither have barrier semantics, and instead provide only a control
1068e86d1aa8SWill Deacon  *   dependency.
1069e86d1aa8SWill Deacon  *
1070e86d1aa8SWill Deacon  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
1071e86d1aa8SWill Deacon  *   fails if the caller appears to be the last lock holder (yes, this is
1072e86d1aa8SWill Deacon  *   racy). All successful UNLOCK routines have RELEASE semantics.
1073e86d1aa8SWill Deacon  */
1074e86d1aa8SWill Deacon static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
1075e86d1aa8SWill Deacon {
1076e86d1aa8SWill Deacon 	int val;
1077e86d1aa8SWill Deacon 
1078e86d1aa8SWill Deacon 	/*
1079e86d1aa8SWill Deacon 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
1080e86d1aa8SWill Deacon 	 * lock counter. When held in exclusive state, the lock counter is set
1081e86d1aa8SWill Deacon 	 * to INT_MIN so these increments won't hurt as the value will remain
1082e86d1aa8SWill Deacon 	 * negative.
1083e86d1aa8SWill Deacon 	 */
1084e86d1aa8SWill Deacon 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
1085e86d1aa8SWill Deacon 		return;
1086e86d1aa8SWill Deacon 
1087e86d1aa8SWill Deacon 	do {
1088e86d1aa8SWill Deacon 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
1089e86d1aa8SWill Deacon 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
1090e86d1aa8SWill Deacon }
1091e86d1aa8SWill Deacon 
1092e86d1aa8SWill Deacon static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
1093e86d1aa8SWill Deacon {
1094e86d1aa8SWill Deacon 	(void)atomic_dec_return_release(&cmdq->lock);
1095e86d1aa8SWill Deacon }
1096e86d1aa8SWill Deacon 
1097e86d1aa8SWill Deacon static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
1098e86d1aa8SWill Deacon {
1099e86d1aa8SWill Deacon 	if (atomic_read(&cmdq->lock) == 1)
1100e86d1aa8SWill Deacon 		return false;
1101e86d1aa8SWill Deacon 
1102e86d1aa8SWill Deacon 	arm_smmu_cmdq_shared_unlock(cmdq);
1103e86d1aa8SWill Deacon 	return true;
1104e86d1aa8SWill Deacon }
1105e86d1aa8SWill Deacon 
1106e86d1aa8SWill Deacon #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
1107e86d1aa8SWill Deacon ({									\
1108e86d1aa8SWill Deacon 	bool __ret;							\
1109e86d1aa8SWill Deacon 	local_irq_save(flags);						\
1110e86d1aa8SWill Deacon 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
1111e86d1aa8SWill Deacon 	if (!__ret)							\
1112e86d1aa8SWill Deacon 		local_irq_restore(flags);				\
1113e86d1aa8SWill Deacon 	__ret;								\
1114e86d1aa8SWill Deacon })
1115e86d1aa8SWill Deacon 
1116e86d1aa8SWill Deacon #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
1117e86d1aa8SWill Deacon ({									\
1118e86d1aa8SWill Deacon 	atomic_set_release(&cmdq->lock, 0);				\
1119e86d1aa8SWill Deacon 	local_irq_restore(flags);					\
1120e86d1aa8SWill Deacon })
1121e86d1aa8SWill Deacon 
1122e86d1aa8SWill Deacon 
1123e86d1aa8SWill Deacon /*
1124e86d1aa8SWill Deacon  * Command queue insertion.
1125e86d1aa8SWill Deacon  * This is made fiddly by our attempts to achieve some sort of scalability
1126e86d1aa8SWill Deacon  * since there is one queue shared amongst all of the CPUs in the system.  If
1127e86d1aa8SWill Deacon  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
1128e86d1aa8SWill Deacon  * then you'll *love* this monstrosity.
1129e86d1aa8SWill Deacon  *
1130e86d1aa8SWill Deacon  * The basic idea is to split the queue up into ranges of commands that are
1131e86d1aa8SWill Deacon  * owned by a given CPU; the owner may not have written all of the commands
1132e86d1aa8SWill Deacon  * itself, but is responsible for advancing the hardware prod pointer when
1133e86d1aa8SWill Deacon  * the time comes. The algorithm is roughly:
1134e86d1aa8SWill Deacon  *
1135e86d1aa8SWill Deacon  * 	1. Allocate some space in the queue. At this point we also discover
1136e86d1aa8SWill Deacon  *	   whether the head of the queue is currently owned by another CPU,
1137e86d1aa8SWill Deacon  *	   or whether we are the owner.
1138e86d1aa8SWill Deacon  *
1139e86d1aa8SWill Deacon  *	2. Write our commands into our allocated slots in the queue.
1140e86d1aa8SWill Deacon  *
1141e86d1aa8SWill Deacon  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
1142e86d1aa8SWill Deacon  *
1143e86d1aa8SWill Deacon  *	4. If we are an owner:
1144e86d1aa8SWill Deacon  *		a. Wait for the previous owner to finish.
1145e86d1aa8SWill Deacon  *		b. Mark the queue head as unowned, which tells us the range
1146e86d1aa8SWill Deacon  *		   that we are responsible for publishing.
1147e86d1aa8SWill Deacon  *		c. Wait for all commands in our owned range to become valid.
1148e86d1aa8SWill Deacon  *		d. Advance the hardware prod pointer.
1149e86d1aa8SWill Deacon  *		e. Tell the next owner we've finished.
1150e86d1aa8SWill Deacon  *
1151e86d1aa8SWill Deacon  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
1152e86d1aa8SWill Deacon  *	   owner), then we need to stick around until it has completed:
1153e86d1aa8SWill Deacon  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
1154e86d1aa8SWill Deacon  *		   to clear the first 4 bytes.
1155e86d1aa8SWill Deacon  *		b. Otherwise, we spin waiting for the hardware cons pointer to
1156e86d1aa8SWill Deacon  *		   advance past our command.
1157e86d1aa8SWill Deacon  *
1158e86d1aa8SWill Deacon  * The devil is in the details, particularly the use of locking for handling
1159e86d1aa8SWill Deacon  * SYNC completion and freeing up space in the queue before we think that it is
1160e86d1aa8SWill Deacon  * full.
1161e86d1aa8SWill Deacon  */
1162e86d1aa8SWill Deacon static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
1163e86d1aa8SWill Deacon 					       u32 sprod, u32 eprod, bool set)
1164e86d1aa8SWill Deacon {
1165e86d1aa8SWill Deacon 	u32 swidx, sbidx, ewidx, ebidx;
1166e86d1aa8SWill Deacon 	struct arm_smmu_ll_queue llq = {
1167e86d1aa8SWill Deacon 		.max_n_shift	= cmdq->q.llq.max_n_shift,
1168e86d1aa8SWill Deacon 		.prod		= sprod,
1169e86d1aa8SWill Deacon 	};
1170e86d1aa8SWill Deacon 
1171e86d1aa8SWill Deacon 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
1172e86d1aa8SWill Deacon 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
1173e86d1aa8SWill Deacon 
1174e86d1aa8SWill Deacon 	while (llq.prod != eprod) {
1175e86d1aa8SWill Deacon 		unsigned long mask;
1176e86d1aa8SWill Deacon 		atomic_long_t *ptr;
1177e86d1aa8SWill Deacon 		u32 limit = BITS_PER_LONG;
1178e86d1aa8SWill Deacon 
1179e86d1aa8SWill Deacon 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
1180e86d1aa8SWill Deacon 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
1181e86d1aa8SWill Deacon 
1182e86d1aa8SWill Deacon 		ptr = &cmdq->valid_map[swidx];
1183e86d1aa8SWill Deacon 
1184e86d1aa8SWill Deacon 		if ((swidx == ewidx) && (sbidx < ebidx))
1185e86d1aa8SWill Deacon 			limit = ebidx;
1186e86d1aa8SWill Deacon 
1187e86d1aa8SWill Deacon 		mask = GENMASK(limit - 1, sbidx);
1188e86d1aa8SWill Deacon 
1189e86d1aa8SWill Deacon 		/*
1190e86d1aa8SWill Deacon 		 * The valid bit is the inverse of the wrap bit. This means
1191e86d1aa8SWill Deacon 		 * that a zero-initialised queue is invalid and, after marking
1192e86d1aa8SWill Deacon 		 * all entries as valid, they become invalid again when we
1193e86d1aa8SWill Deacon 		 * wrap.
1194e86d1aa8SWill Deacon 		 */
1195e86d1aa8SWill Deacon 		if (set) {
1196e86d1aa8SWill Deacon 			atomic_long_xor(mask, ptr);
1197e86d1aa8SWill Deacon 		} else { /* Poll */
1198e86d1aa8SWill Deacon 			unsigned long valid;
1199e86d1aa8SWill Deacon 
1200e86d1aa8SWill Deacon 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
1201e86d1aa8SWill Deacon 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
1202e86d1aa8SWill Deacon 		}
1203e86d1aa8SWill Deacon 
1204e86d1aa8SWill Deacon 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
1205e86d1aa8SWill Deacon 	}
1206e86d1aa8SWill Deacon }
1207e86d1aa8SWill Deacon 
1208e86d1aa8SWill Deacon /* Mark all entries in the range [sprod, eprod) as valid */
1209e86d1aa8SWill Deacon static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
1210e86d1aa8SWill Deacon 					u32 sprod, u32 eprod)
1211e86d1aa8SWill Deacon {
1212e86d1aa8SWill Deacon 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
1213e86d1aa8SWill Deacon }
1214e86d1aa8SWill Deacon 
1215e86d1aa8SWill Deacon /* Wait for all entries in the range [sprod, eprod) to become valid */
1216e86d1aa8SWill Deacon static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
1217e86d1aa8SWill Deacon 					 u32 sprod, u32 eprod)
1218e86d1aa8SWill Deacon {
1219e86d1aa8SWill Deacon 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
1220e86d1aa8SWill Deacon }
1221e86d1aa8SWill Deacon 
1222e86d1aa8SWill Deacon /* Wait for the command queue to become non-full */
1223e86d1aa8SWill Deacon static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
1224e86d1aa8SWill Deacon 					     struct arm_smmu_ll_queue *llq)
1225e86d1aa8SWill Deacon {
1226e86d1aa8SWill Deacon 	unsigned long flags;
1227e86d1aa8SWill Deacon 	struct arm_smmu_queue_poll qp;
1228e86d1aa8SWill Deacon 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
1229e86d1aa8SWill Deacon 	int ret = 0;
1230e86d1aa8SWill Deacon 
1231e86d1aa8SWill Deacon 	/*
1232e86d1aa8SWill Deacon 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
1233e86d1aa8SWill Deacon 	 * that fails, spin until somebody else updates it for us.
1234e86d1aa8SWill Deacon 	 */
1235e86d1aa8SWill Deacon 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
1236e86d1aa8SWill Deacon 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
1237e86d1aa8SWill Deacon 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
1238e86d1aa8SWill Deacon 		llq->val = READ_ONCE(cmdq->q.llq.val);
1239e86d1aa8SWill Deacon 		return 0;
1240e86d1aa8SWill Deacon 	}
1241e86d1aa8SWill Deacon 
1242e86d1aa8SWill Deacon 	queue_poll_init(smmu, &qp);
1243e86d1aa8SWill Deacon 	do {
1244e86d1aa8SWill Deacon 		llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
1245e86d1aa8SWill Deacon 		if (!queue_full(llq))
1246e86d1aa8SWill Deacon 			break;
1247e86d1aa8SWill Deacon 
1248e86d1aa8SWill Deacon 		ret = queue_poll(&qp);
1249e86d1aa8SWill Deacon 	} while (!ret);
1250e86d1aa8SWill Deacon 
1251e86d1aa8SWill Deacon 	return ret;
1252e86d1aa8SWill Deacon }
1253e86d1aa8SWill Deacon 
1254e86d1aa8SWill Deacon /*
1255e86d1aa8SWill Deacon  * Wait until the SMMU signals a CMD_SYNC completion MSI.
1256e86d1aa8SWill Deacon  * Must be called with the cmdq lock held in some capacity.
1257e86d1aa8SWill Deacon  */
1258e86d1aa8SWill Deacon static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
1259e86d1aa8SWill Deacon 					  struct arm_smmu_ll_queue *llq)
1260e86d1aa8SWill Deacon {
1261e86d1aa8SWill Deacon 	int ret = 0;
1262e86d1aa8SWill Deacon 	struct arm_smmu_queue_poll qp;
1263e86d1aa8SWill Deacon 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
1264e86d1aa8SWill Deacon 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
1265e86d1aa8SWill Deacon 
1266e86d1aa8SWill Deacon 	queue_poll_init(smmu, &qp);
1267e86d1aa8SWill Deacon 
1268e86d1aa8SWill Deacon 	/*
1269e86d1aa8SWill Deacon 	 * The MSI won't generate an event, since it's being written back
1270e86d1aa8SWill Deacon 	 * into the command queue.
1271e86d1aa8SWill Deacon 	 */
1272e86d1aa8SWill Deacon 	qp.wfe = false;
1273e86d1aa8SWill Deacon 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
1274e86d1aa8SWill Deacon 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
1275e86d1aa8SWill Deacon 	return ret;
1276e86d1aa8SWill Deacon }
1277e86d1aa8SWill Deacon 
1278e86d1aa8SWill Deacon /*
1279e86d1aa8SWill Deacon  * Wait until the SMMU cons index passes llq->prod.
1280e86d1aa8SWill Deacon  * Must be called with the cmdq lock held in some capacity.
1281e86d1aa8SWill Deacon  */
1282e86d1aa8SWill Deacon static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
1283e86d1aa8SWill Deacon 					       struct arm_smmu_ll_queue *llq)
1284e86d1aa8SWill Deacon {
1285e86d1aa8SWill Deacon 	struct arm_smmu_queue_poll qp;
1286e86d1aa8SWill Deacon 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
1287e86d1aa8SWill Deacon 	u32 prod = llq->prod;
1288e86d1aa8SWill Deacon 	int ret = 0;
1289e86d1aa8SWill Deacon 
1290e86d1aa8SWill Deacon 	queue_poll_init(smmu, &qp);
1291e86d1aa8SWill Deacon 	llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
1292e86d1aa8SWill Deacon 	do {
1293e86d1aa8SWill Deacon 		if (queue_consumed(llq, prod))
1294e86d1aa8SWill Deacon 			break;
1295e86d1aa8SWill Deacon 
1296e86d1aa8SWill Deacon 		ret = queue_poll(&qp);
1297e86d1aa8SWill Deacon 
1298e86d1aa8SWill Deacon 		/*
1299e86d1aa8SWill Deacon 		 * This needs to be a readl() so that our subsequent call
1300e86d1aa8SWill Deacon 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
1301e86d1aa8SWill Deacon 		 *
1302e86d1aa8SWill Deacon 		 * Specifically, we need to ensure that we observe all
1303e86d1aa8SWill Deacon 		 * shared_lock()s by other CMD_SYNCs that share our owner,
1304e86d1aa8SWill Deacon 		 * so that a failing call to tryunlock() means that we're
1305e86d1aa8SWill Deacon 		 * the last one out and therefore we can safely advance
1306e86d1aa8SWill Deacon 		 * cmdq->q.llq.cons. Roughly speaking:
1307e86d1aa8SWill Deacon 		 *
1308e86d1aa8SWill Deacon 		 * CPU 0		CPU1			CPU2 (us)
1309e86d1aa8SWill Deacon 		 *
1310e86d1aa8SWill Deacon 		 * if (sync)
1311e86d1aa8SWill Deacon 		 * 	shared_lock();
1312e86d1aa8SWill Deacon 		 *
1313e86d1aa8SWill Deacon 		 * dma_wmb();
1314e86d1aa8SWill Deacon 		 * set_valid_map();
1315e86d1aa8SWill Deacon 		 *
1316e86d1aa8SWill Deacon 		 * 			if (owner) {
1317e86d1aa8SWill Deacon 		 *				poll_valid_map();
1318e86d1aa8SWill Deacon 		 *				<control dependency>
1319e86d1aa8SWill Deacon 		 *				writel(prod_reg);
1320e86d1aa8SWill Deacon 		 *
1321e86d1aa8SWill Deacon 		 *						readl(cons_reg);
1322e86d1aa8SWill Deacon 		 *						tryunlock();
1323e86d1aa8SWill Deacon 		 *
1324e86d1aa8SWill Deacon 		 * Requires us to see CPU 0's shared_lock() acquisition.
1325e86d1aa8SWill Deacon 		 */
1326e86d1aa8SWill Deacon 		llq->cons = readl(cmdq->q.cons_reg);
1327e86d1aa8SWill Deacon 	} while (!ret);
1328e86d1aa8SWill Deacon 
1329e86d1aa8SWill Deacon 	return ret;
1330e86d1aa8SWill Deacon }
1331e86d1aa8SWill Deacon 
1332e86d1aa8SWill Deacon static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
1333e86d1aa8SWill Deacon 					 struct arm_smmu_ll_queue *llq)
1334e86d1aa8SWill Deacon {
1335e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_MSI &&
1336e86d1aa8SWill Deacon 	    smmu->features & ARM_SMMU_FEAT_COHERENCY)
1337e86d1aa8SWill Deacon 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
1338e86d1aa8SWill Deacon 
1339e86d1aa8SWill Deacon 	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
1340e86d1aa8SWill Deacon }
1341e86d1aa8SWill Deacon 
1342e86d1aa8SWill Deacon static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
1343e86d1aa8SWill Deacon 					u32 prod, int n)
1344e86d1aa8SWill Deacon {
1345e86d1aa8SWill Deacon 	int i;
1346e86d1aa8SWill Deacon 	struct arm_smmu_ll_queue llq = {
1347e86d1aa8SWill Deacon 		.max_n_shift	= cmdq->q.llq.max_n_shift,
1348e86d1aa8SWill Deacon 		.prod		= prod,
1349e86d1aa8SWill Deacon 	};
1350e86d1aa8SWill Deacon 
1351e86d1aa8SWill Deacon 	for (i = 0; i < n; ++i) {
1352e86d1aa8SWill Deacon 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
1353e86d1aa8SWill Deacon 
1354e86d1aa8SWill Deacon 		prod = queue_inc_prod_n(&llq, i);
1355e86d1aa8SWill Deacon 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
1356e86d1aa8SWill Deacon 	}
1357e86d1aa8SWill Deacon }
1358e86d1aa8SWill Deacon 
1359e86d1aa8SWill Deacon /*
1360e86d1aa8SWill Deacon  * This is the actual insertion function, and provides the following
1361e86d1aa8SWill Deacon  * ordering guarantees to callers:
1362e86d1aa8SWill Deacon  *
1363e86d1aa8SWill Deacon  * - There is a dma_wmb() before publishing any commands to the queue.
1364e86d1aa8SWill Deacon  *   This can be relied upon to order prior writes to data structures
1365e86d1aa8SWill Deacon  *   in memory (such as a CD or an STE) before the command.
1366e86d1aa8SWill Deacon  *
1367e86d1aa8SWill Deacon  * - On completion of a CMD_SYNC, there is a control dependency.
1368e86d1aa8SWill Deacon  *   This can be relied upon to order subsequent writes to memory (e.g.
1369e86d1aa8SWill Deacon  *   freeing an IOVA) after completion of the CMD_SYNC.
1370e86d1aa8SWill Deacon  *
1371e86d1aa8SWill Deacon  * - Command insertion is totally ordered, so if two CPUs each race to
1372e86d1aa8SWill Deacon  *   insert their own list of commands then all of the commands from one
1373e86d1aa8SWill Deacon  *   CPU will appear before any of the commands from the other CPU.
1374e86d1aa8SWill Deacon  */
1375e86d1aa8SWill Deacon static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
1376e86d1aa8SWill Deacon 				       u64 *cmds, int n, bool sync)
1377e86d1aa8SWill Deacon {
1378e86d1aa8SWill Deacon 	u64 cmd_sync[CMDQ_ENT_DWORDS];
1379e86d1aa8SWill Deacon 	u32 prod;
1380e86d1aa8SWill Deacon 	unsigned long flags;
1381e86d1aa8SWill Deacon 	bool owner;
1382e86d1aa8SWill Deacon 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
1383e86d1aa8SWill Deacon 	struct arm_smmu_ll_queue llq = {
1384e86d1aa8SWill Deacon 		.max_n_shift = cmdq->q.llq.max_n_shift,
1385e86d1aa8SWill Deacon 	}, head = llq;
1386e86d1aa8SWill Deacon 	int ret = 0;
1387e86d1aa8SWill Deacon 
1388e86d1aa8SWill Deacon 	/* 1. Allocate some space in the queue */
1389e86d1aa8SWill Deacon 	local_irq_save(flags);
1390e86d1aa8SWill Deacon 	llq.val = READ_ONCE(cmdq->q.llq.val);
1391e86d1aa8SWill Deacon 	do {
1392e86d1aa8SWill Deacon 		u64 old;
1393e86d1aa8SWill Deacon 
1394e86d1aa8SWill Deacon 		while (!queue_has_space(&llq, n + sync)) {
1395e86d1aa8SWill Deacon 			local_irq_restore(flags);
1396e86d1aa8SWill Deacon 			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
1397e86d1aa8SWill Deacon 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
1398e86d1aa8SWill Deacon 			local_irq_save(flags);
1399e86d1aa8SWill Deacon 		}
1400e86d1aa8SWill Deacon 
1401e86d1aa8SWill Deacon 		head.cons = llq.cons;
1402e86d1aa8SWill Deacon 		head.prod = queue_inc_prod_n(&llq, n + sync) |
1403e86d1aa8SWill Deacon 					     CMDQ_PROD_OWNED_FLAG;
1404e86d1aa8SWill Deacon 
1405e86d1aa8SWill Deacon 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
1406e86d1aa8SWill Deacon 		if (old == llq.val)
1407e86d1aa8SWill Deacon 			break;
1408e86d1aa8SWill Deacon 
1409e86d1aa8SWill Deacon 		llq.val = old;
1410e86d1aa8SWill Deacon 	} while (1);
1411e86d1aa8SWill Deacon 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
1412e86d1aa8SWill Deacon 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
1413e86d1aa8SWill Deacon 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
1414e86d1aa8SWill Deacon 
1415e86d1aa8SWill Deacon 	/*
1416e86d1aa8SWill Deacon 	 * 2. Write our commands into the queue
1417e86d1aa8SWill Deacon 	 * Dependency ordering from the cmpxchg() loop above.
1418e86d1aa8SWill Deacon 	 */
1419e86d1aa8SWill Deacon 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
1420e86d1aa8SWill Deacon 	if (sync) {
1421e86d1aa8SWill Deacon 		prod = queue_inc_prod_n(&llq, n);
1422e86d1aa8SWill Deacon 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
1423e86d1aa8SWill Deacon 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
1424e86d1aa8SWill Deacon 
1425e86d1aa8SWill Deacon 		/*
1426e86d1aa8SWill Deacon 		 * In order to determine completion of our CMD_SYNC, we must
1427e86d1aa8SWill Deacon 		 * ensure that the queue can't wrap twice without us noticing.
1428e86d1aa8SWill Deacon 		 * We achieve that by taking the cmdq lock as shared before
1429e86d1aa8SWill Deacon 		 * marking our slot as valid.
1430e86d1aa8SWill Deacon 		 */
1431e86d1aa8SWill Deacon 		arm_smmu_cmdq_shared_lock(cmdq);
1432e86d1aa8SWill Deacon 	}
1433e86d1aa8SWill Deacon 
1434e86d1aa8SWill Deacon 	/* 3. Mark our slots as valid, ensuring commands are visible first */
1435e86d1aa8SWill Deacon 	dma_wmb();
1436e86d1aa8SWill Deacon 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
1437e86d1aa8SWill Deacon 
1438e86d1aa8SWill Deacon 	/* 4. If we are the owner, take control of the SMMU hardware */
1439e86d1aa8SWill Deacon 	if (owner) {
1440e86d1aa8SWill Deacon 		/* a. Wait for previous owner to finish */
1441e86d1aa8SWill Deacon 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
1442e86d1aa8SWill Deacon 
1443e86d1aa8SWill Deacon 		/* b. Stop gathering work by clearing the owned flag */
1444e86d1aa8SWill Deacon 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
1445e86d1aa8SWill Deacon 						   &cmdq->q.llq.atomic.prod);
1446e86d1aa8SWill Deacon 		prod &= ~CMDQ_PROD_OWNED_FLAG;
1447e86d1aa8SWill Deacon 
1448e86d1aa8SWill Deacon 		/*
1449e86d1aa8SWill Deacon 		 * c. Wait for any gathered work to be written to the queue.
1450e86d1aa8SWill Deacon 		 * Note that we read our own entries so that we have the control
1451e86d1aa8SWill Deacon 		 * dependency required by (d).
1452e86d1aa8SWill Deacon 		 */
1453e86d1aa8SWill Deacon 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
1454e86d1aa8SWill Deacon 
1455e86d1aa8SWill Deacon 		/*
1456e86d1aa8SWill Deacon 		 * d. Advance the hardware prod pointer
1457e86d1aa8SWill Deacon 		 * Control dependency ordering from the entries becoming valid.
1458e86d1aa8SWill Deacon 		 */
1459e86d1aa8SWill Deacon 		writel_relaxed(prod, cmdq->q.prod_reg);
1460e86d1aa8SWill Deacon 
1461e86d1aa8SWill Deacon 		/*
1462e86d1aa8SWill Deacon 		 * e. Tell the next owner we're done
1463e86d1aa8SWill Deacon 		 * Make sure we've updated the hardware first, so that we don't
1464e86d1aa8SWill Deacon 		 * race to update prod and potentially move it backwards.
1465e86d1aa8SWill Deacon 		 */
1466e86d1aa8SWill Deacon 		atomic_set_release(&cmdq->owner_prod, prod);
1467e86d1aa8SWill Deacon 	}
1468e86d1aa8SWill Deacon 
1469e86d1aa8SWill Deacon 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
1470e86d1aa8SWill Deacon 	if (sync) {
1471e86d1aa8SWill Deacon 		llq.prod = queue_inc_prod_n(&llq, n);
1472e86d1aa8SWill Deacon 		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
1473e86d1aa8SWill Deacon 		if (ret) {
1474e86d1aa8SWill Deacon 			dev_err_ratelimited(smmu->dev,
1475e86d1aa8SWill Deacon 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
1476e86d1aa8SWill Deacon 					    llq.prod,
1477e86d1aa8SWill Deacon 					    readl_relaxed(cmdq->q.prod_reg),
1478e86d1aa8SWill Deacon 					    readl_relaxed(cmdq->q.cons_reg));
1479e86d1aa8SWill Deacon 		}
1480e86d1aa8SWill Deacon 
1481e86d1aa8SWill Deacon 		/*
1482e86d1aa8SWill Deacon 		 * Try to unlock the cmdq lock. This will fail if we're the last
1483e86d1aa8SWill Deacon 		 * reader, in which case we can safely update cmdq->q.llq.cons
1484e86d1aa8SWill Deacon 		 */
1485e86d1aa8SWill Deacon 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
1486e86d1aa8SWill Deacon 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
1487e86d1aa8SWill Deacon 			arm_smmu_cmdq_shared_unlock(cmdq);
1488e86d1aa8SWill Deacon 		}
1489e86d1aa8SWill Deacon 	}
1490e86d1aa8SWill Deacon 
1491e86d1aa8SWill Deacon 	local_irq_restore(flags);
1492e86d1aa8SWill Deacon 	return ret;
1493e86d1aa8SWill Deacon }
1494e86d1aa8SWill Deacon 
1495e86d1aa8SWill Deacon static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
1496e86d1aa8SWill Deacon 				   struct arm_smmu_cmdq_ent *ent)
1497e86d1aa8SWill Deacon {
1498e86d1aa8SWill Deacon 	u64 cmd[CMDQ_ENT_DWORDS];
1499e86d1aa8SWill Deacon 
1500e86d1aa8SWill Deacon 	if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
1501e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
1502e86d1aa8SWill Deacon 			 ent->opcode);
1503e86d1aa8SWill Deacon 		return -EINVAL;
1504e86d1aa8SWill Deacon 	}
1505e86d1aa8SWill Deacon 
1506e86d1aa8SWill Deacon 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
1507e86d1aa8SWill Deacon }
1508e86d1aa8SWill Deacon 
1509e86d1aa8SWill Deacon static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
1510e86d1aa8SWill Deacon {
1511e86d1aa8SWill Deacon 	return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
1512e86d1aa8SWill Deacon }
1513e86d1aa8SWill Deacon 
1514e86d1aa8SWill Deacon static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
1515e86d1aa8SWill Deacon 				    struct arm_smmu_cmdq_batch *cmds,
1516e86d1aa8SWill Deacon 				    struct arm_smmu_cmdq_ent *cmd)
1517e86d1aa8SWill Deacon {
1518e86d1aa8SWill Deacon 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
1519e86d1aa8SWill Deacon 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
1520e86d1aa8SWill Deacon 		cmds->num = 0;
1521e86d1aa8SWill Deacon 	}
1522e86d1aa8SWill Deacon 	arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
1523e86d1aa8SWill Deacon 	cmds->num++;
1524e86d1aa8SWill Deacon }
1525e86d1aa8SWill Deacon 
1526e86d1aa8SWill Deacon static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
1527e86d1aa8SWill Deacon 				      struct arm_smmu_cmdq_batch *cmds)
1528e86d1aa8SWill Deacon {
1529e86d1aa8SWill Deacon 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
1530e86d1aa8SWill Deacon }
1531e86d1aa8SWill Deacon 
1532e86d1aa8SWill Deacon /* Context descriptor manipulation functions */
1533e86d1aa8SWill Deacon static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
1534e86d1aa8SWill Deacon 			     int ssid, bool leaf)
1535e86d1aa8SWill Deacon {
1536e86d1aa8SWill Deacon 	size_t i;
1537e86d1aa8SWill Deacon 	unsigned long flags;
1538e86d1aa8SWill Deacon 	struct arm_smmu_master *master;
1539e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_batch cmds = {};
1540e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1541e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_ent cmd = {
1542e86d1aa8SWill Deacon 		.opcode	= CMDQ_OP_CFGI_CD,
1543e86d1aa8SWill Deacon 		.cfgi	= {
1544e86d1aa8SWill Deacon 			.ssid	= ssid,
1545e86d1aa8SWill Deacon 			.leaf	= leaf,
1546e86d1aa8SWill Deacon 		},
1547e86d1aa8SWill Deacon 	};
1548e86d1aa8SWill Deacon 
1549e86d1aa8SWill Deacon 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1550e86d1aa8SWill Deacon 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1551e86d1aa8SWill Deacon 		for (i = 0; i < master->num_sids; i++) {
1552e86d1aa8SWill Deacon 			cmd.cfgi.sid = master->sids[i];
1553e86d1aa8SWill Deacon 			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
1554e86d1aa8SWill Deacon 		}
1555e86d1aa8SWill Deacon 	}
1556e86d1aa8SWill Deacon 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1557e86d1aa8SWill Deacon 
1558e86d1aa8SWill Deacon 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1559e86d1aa8SWill Deacon }
1560e86d1aa8SWill Deacon 
1561e86d1aa8SWill Deacon static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
1562e86d1aa8SWill Deacon 					struct arm_smmu_l1_ctx_desc *l1_desc)
1563e86d1aa8SWill Deacon {
1564e86d1aa8SWill Deacon 	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1565e86d1aa8SWill Deacon 
1566e86d1aa8SWill Deacon 	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
1567e86d1aa8SWill Deacon 					     &l1_desc->l2ptr_dma, GFP_KERNEL);
1568e86d1aa8SWill Deacon 	if (!l1_desc->l2ptr) {
1569e86d1aa8SWill Deacon 		dev_warn(smmu->dev,
1570e86d1aa8SWill Deacon 			 "failed to allocate context descriptor table\n");
1571e86d1aa8SWill Deacon 		return -ENOMEM;
1572e86d1aa8SWill Deacon 	}
1573e86d1aa8SWill Deacon 	return 0;
1574e86d1aa8SWill Deacon }
1575e86d1aa8SWill Deacon 
1576e86d1aa8SWill Deacon static void arm_smmu_write_cd_l1_desc(__le64 *dst,
1577e86d1aa8SWill Deacon 				      struct arm_smmu_l1_ctx_desc *l1_desc)
1578e86d1aa8SWill Deacon {
1579e86d1aa8SWill Deacon 	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1580e86d1aa8SWill Deacon 		  CTXDESC_L1_DESC_V;
1581e86d1aa8SWill Deacon 
1582e86d1aa8SWill Deacon 	/* See comment in arm_smmu_write_ctx_desc() */
1583e86d1aa8SWill Deacon 	WRITE_ONCE(*dst, cpu_to_le64(val));
1584e86d1aa8SWill Deacon }
1585e86d1aa8SWill Deacon 
1586e86d1aa8SWill Deacon static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
1587e86d1aa8SWill Deacon 				   u32 ssid)
1588e86d1aa8SWill Deacon {
1589e86d1aa8SWill Deacon 	__le64 *l1ptr;
1590e86d1aa8SWill Deacon 	unsigned int idx;
1591e86d1aa8SWill Deacon 	struct arm_smmu_l1_ctx_desc *l1_desc;
1592e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1593e86d1aa8SWill Deacon 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1594e86d1aa8SWill Deacon 
1595e86d1aa8SWill Deacon 	if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1596e86d1aa8SWill Deacon 		return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
1597e86d1aa8SWill Deacon 
1598e86d1aa8SWill Deacon 	idx = ssid >> CTXDESC_SPLIT;
1599e86d1aa8SWill Deacon 	l1_desc = &cdcfg->l1_desc[idx];
1600e86d1aa8SWill Deacon 	if (!l1_desc->l2ptr) {
1601e86d1aa8SWill Deacon 		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1602e86d1aa8SWill Deacon 			return NULL;
1603e86d1aa8SWill Deacon 
1604e86d1aa8SWill Deacon 		l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1605e86d1aa8SWill Deacon 		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1606e86d1aa8SWill Deacon 		/* An invalid L1CD can be cached */
1607e86d1aa8SWill Deacon 		arm_smmu_sync_cd(smmu_domain, ssid, false);
1608e86d1aa8SWill Deacon 	}
1609e86d1aa8SWill Deacon 	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1610e86d1aa8SWill Deacon 	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1611e86d1aa8SWill Deacon }
1612e86d1aa8SWill Deacon 
1613e86d1aa8SWill Deacon static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
1614e86d1aa8SWill Deacon 				   int ssid, struct arm_smmu_ctx_desc *cd)
1615e86d1aa8SWill Deacon {
1616e86d1aa8SWill Deacon 	/*
1617e86d1aa8SWill Deacon 	 * This function handles the following cases:
1618e86d1aa8SWill Deacon 	 *
1619e86d1aa8SWill Deacon 	 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
1620e86d1aa8SWill Deacon 	 * (2) Install a secondary CD, for SID+SSID traffic.
1621e86d1aa8SWill Deacon 	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1622e86d1aa8SWill Deacon 	 *     CD, then invalidate the old entry and mappings.
1623e86d1aa8SWill Deacon 	 * (4) Remove a secondary CD.
1624e86d1aa8SWill Deacon 	 */
1625e86d1aa8SWill Deacon 	u64 val;
1626e86d1aa8SWill Deacon 	bool cd_live;
1627e86d1aa8SWill Deacon 	__le64 *cdptr;
1628e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1629e86d1aa8SWill Deacon 
1630e86d1aa8SWill Deacon 	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1631e86d1aa8SWill Deacon 		return -E2BIG;
1632e86d1aa8SWill Deacon 
1633e86d1aa8SWill Deacon 	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1634e86d1aa8SWill Deacon 	if (!cdptr)
1635e86d1aa8SWill Deacon 		return -ENOMEM;
1636e86d1aa8SWill Deacon 
1637e86d1aa8SWill Deacon 	val = le64_to_cpu(cdptr[0]);
1638e86d1aa8SWill Deacon 	cd_live = !!(val & CTXDESC_CD_0_V);
1639e86d1aa8SWill Deacon 
1640e86d1aa8SWill Deacon 	if (!cd) { /* (4) */
1641e86d1aa8SWill Deacon 		val = 0;
1642e86d1aa8SWill Deacon 	} else if (cd_live) { /* (3) */
1643e86d1aa8SWill Deacon 		val &= ~CTXDESC_CD_0_ASID;
1644e86d1aa8SWill Deacon 		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1645e86d1aa8SWill Deacon 		/*
1646e86d1aa8SWill Deacon 		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1647e86d1aa8SWill Deacon 		 * this substream's traffic
1648e86d1aa8SWill Deacon 		 */
1649e86d1aa8SWill Deacon 	} else { /* (1) and (2) */
1650e86d1aa8SWill Deacon 		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1651e86d1aa8SWill Deacon 		cdptr[2] = 0;
1652e86d1aa8SWill Deacon 		cdptr[3] = cpu_to_le64(cd->mair);
1653e86d1aa8SWill Deacon 
1654e86d1aa8SWill Deacon 		/*
1655e86d1aa8SWill Deacon 		 * STE is live, and the SMMU might read dwords of this CD in any
1656e86d1aa8SWill Deacon 		 * order. Ensure that it observes valid values before reading
1657e86d1aa8SWill Deacon 		 * V=1.
1658e86d1aa8SWill Deacon 		 */
1659e86d1aa8SWill Deacon 		arm_smmu_sync_cd(smmu_domain, ssid, true);
1660e86d1aa8SWill Deacon 
1661e86d1aa8SWill Deacon 		val = cd->tcr |
1662e86d1aa8SWill Deacon #ifdef __BIG_ENDIAN
1663e86d1aa8SWill Deacon 			CTXDESC_CD_0_ENDI |
1664e86d1aa8SWill Deacon #endif
1665e86d1aa8SWill Deacon 			CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
1666e86d1aa8SWill Deacon 			CTXDESC_CD_0_AA64 |
1667e86d1aa8SWill Deacon 			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1668e86d1aa8SWill Deacon 			CTXDESC_CD_0_V;
1669e86d1aa8SWill Deacon 
1670e86d1aa8SWill Deacon 		/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1671e86d1aa8SWill Deacon 		if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1672e86d1aa8SWill Deacon 			val |= CTXDESC_CD_0_S;
1673e86d1aa8SWill Deacon 	}
1674e86d1aa8SWill Deacon 
1675e86d1aa8SWill Deacon 	/*
1676e86d1aa8SWill Deacon 	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1677e86d1aa8SWill Deacon 	 * "Configuration structures and configuration invalidation completion"
1678e86d1aa8SWill Deacon 	 *
1679e86d1aa8SWill Deacon 	 *   The size of single-copy atomic reads made by the SMMU is
1680e86d1aa8SWill Deacon 	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1681e86d1aa8SWill Deacon 	 *   field within an aligned 64-bit span of a structure can be altered
1682e86d1aa8SWill Deacon 	 *   without first making the structure invalid.
1683e86d1aa8SWill Deacon 	 */
1684e86d1aa8SWill Deacon 	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1685e86d1aa8SWill Deacon 	arm_smmu_sync_cd(smmu_domain, ssid, true);
1686e86d1aa8SWill Deacon 	return 0;
1687e86d1aa8SWill Deacon }
1688e86d1aa8SWill Deacon 
1689e86d1aa8SWill Deacon static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1690e86d1aa8SWill Deacon {
1691e86d1aa8SWill Deacon 	int ret;
1692e86d1aa8SWill Deacon 	size_t l1size;
1693e86d1aa8SWill Deacon 	size_t max_contexts;
1694e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1695e86d1aa8SWill Deacon 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1696e86d1aa8SWill Deacon 	struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1697e86d1aa8SWill Deacon 
1698e86d1aa8SWill Deacon 	max_contexts = 1 << cfg->s1cdmax;
1699e86d1aa8SWill Deacon 
1700e86d1aa8SWill Deacon 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1701e86d1aa8SWill Deacon 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1702e86d1aa8SWill Deacon 		cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1703e86d1aa8SWill Deacon 		cdcfg->num_l1_ents = max_contexts;
1704e86d1aa8SWill Deacon 
1705e86d1aa8SWill Deacon 		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1706e86d1aa8SWill Deacon 	} else {
1707e86d1aa8SWill Deacon 		cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1708e86d1aa8SWill Deacon 		cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1709e86d1aa8SWill Deacon 						  CTXDESC_L2_ENTRIES);
1710e86d1aa8SWill Deacon 
1711e86d1aa8SWill Deacon 		cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1712e86d1aa8SWill Deacon 					      sizeof(*cdcfg->l1_desc),
1713e86d1aa8SWill Deacon 					      GFP_KERNEL);
1714e86d1aa8SWill Deacon 		if (!cdcfg->l1_desc)
1715e86d1aa8SWill Deacon 			return -ENOMEM;
1716e86d1aa8SWill Deacon 
1717e86d1aa8SWill Deacon 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1718e86d1aa8SWill Deacon 	}
1719e86d1aa8SWill Deacon 
1720e86d1aa8SWill Deacon 	cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1721e86d1aa8SWill Deacon 					   GFP_KERNEL);
1722e86d1aa8SWill Deacon 	if (!cdcfg->cdtab) {
1723e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1724e86d1aa8SWill Deacon 		ret = -ENOMEM;
1725e86d1aa8SWill Deacon 		goto err_free_l1;
1726e86d1aa8SWill Deacon 	}
1727e86d1aa8SWill Deacon 
1728e86d1aa8SWill Deacon 	return 0;
1729e86d1aa8SWill Deacon 
1730e86d1aa8SWill Deacon err_free_l1:
1731e86d1aa8SWill Deacon 	if (cdcfg->l1_desc) {
1732e86d1aa8SWill Deacon 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1733e86d1aa8SWill Deacon 		cdcfg->l1_desc = NULL;
1734e86d1aa8SWill Deacon 	}
1735e86d1aa8SWill Deacon 	return ret;
1736e86d1aa8SWill Deacon }
1737e86d1aa8SWill Deacon 
1738e86d1aa8SWill Deacon static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1739e86d1aa8SWill Deacon {
1740e86d1aa8SWill Deacon 	int i;
1741e86d1aa8SWill Deacon 	size_t size, l1size;
1742e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1743e86d1aa8SWill Deacon 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1744e86d1aa8SWill Deacon 
1745e86d1aa8SWill Deacon 	if (cdcfg->l1_desc) {
1746e86d1aa8SWill Deacon 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1747e86d1aa8SWill Deacon 
1748e86d1aa8SWill Deacon 		for (i = 0; i < cdcfg->num_l1_ents; i++) {
1749e86d1aa8SWill Deacon 			if (!cdcfg->l1_desc[i].l2ptr)
1750e86d1aa8SWill Deacon 				continue;
1751e86d1aa8SWill Deacon 
1752e86d1aa8SWill Deacon 			dmam_free_coherent(smmu->dev, size,
1753e86d1aa8SWill Deacon 					   cdcfg->l1_desc[i].l2ptr,
1754e86d1aa8SWill Deacon 					   cdcfg->l1_desc[i].l2ptr_dma);
1755e86d1aa8SWill Deacon 		}
1756e86d1aa8SWill Deacon 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1757e86d1aa8SWill Deacon 		cdcfg->l1_desc = NULL;
1758e86d1aa8SWill Deacon 
1759e86d1aa8SWill Deacon 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1760e86d1aa8SWill Deacon 	} else {
1761e86d1aa8SWill Deacon 		l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1762e86d1aa8SWill Deacon 	}
1763e86d1aa8SWill Deacon 
1764e86d1aa8SWill Deacon 	dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1765e86d1aa8SWill Deacon 	cdcfg->cdtab_dma = 0;
1766e86d1aa8SWill Deacon 	cdcfg->cdtab = NULL;
1767e86d1aa8SWill Deacon }
1768e86d1aa8SWill Deacon 
1769e86d1aa8SWill Deacon static void arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1770e86d1aa8SWill Deacon {
1771e86d1aa8SWill Deacon 	if (!cd->asid)
1772e86d1aa8SWill Deacon 		return;
1773e86d1aa8SWill Deacon 
1774e86d1aa8SWill Deacon 	xa_erase(&asid_xa, cd->asid);
1775e86d1aa8SWill Deacon }
1776e86d1aa8SWill Deacon 
1777e86d1aa8SWill Deacon /* Stream table manipulation functions */
1778e86d1aa8SWill Deacon static void
1779e86d1aa8SWill Deacon arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1780e86d1aa8SWill Deacon {
1781e86d1aa8SWill Deacon 	u64 val = 0;
1782e86d1aa8SWill Deacon 
1783e86d1aa8SWill Deacon 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1784e86d1aa8SWill Deacon 	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1785e86d1aa8SWill Deacon 
1786e86d1aa8SWill Deacon 	/* See comment in arm_smmu_write_ctx_desc() */
1787e86d1aa8SWill Deacon 	WRITE_ONCE(*dst, cpu_to_le64(val));
1788e86d1aa8SWill Deacon }
1789e86d1aa8SWill Deacon 
1790e86d1aa8SWill Deacon static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1791e86d1aa8SWill Deacon {
1792e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_ent cmd = {
1793e86d1aa8SWill Deacon 		.opcode	= CMDQ_OP_CFGI_STE,
1794e86d1aa8SWill Deacon 		.cfgi	= {
1795e86d1aa8SWill Deacon 			.sid	= sid,
1796e86d1aa8SWill Deacon 			.leaf	= true,
1797e86d1aa8SWill Deacon 		},
1798e86d1aa8SWill Deacon 	};
1799e86d1aa8SWill Deacon 
1800e86d1aa8SWill Deacon 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1801e86d1aa8SWill Deacon 	arm_smmu_cmdq_issue_sync(smmu);
1802e86d1aa8SWill Deacon }
1803e86d1aa8SWill Deacon 
1804e86d1aa8SWill Deacon static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1805e86d1aa8SWill Deacon 				      __le64 *dst)
1806e86d1aa8SWill Deacon {
1807e86d1aa8SWill Deacon 	/*
1808e86d1aa8SWill Deacon 	 * This is hideously complicated, but we only really care about
1809e86d1aa8SWill Deacon 	 * three cases at the moment:
1810e86d1aa8SWill Deacon 	 *
1811e86d1aa8SWill Deacon 	 * 1. Invalid (all zero) -> bypass/fault (init)
1812e86d1aa8SWill Deacon 	 * 2. Bypass/fault -> translation/bypass (attach)
1813e86d1aa8SWill Deacon 	 * 3. Translation/bypass -> bypass/fault (detach)
1814e86d1aa8SWill Deacon 	 *
1815e86d1aa8SWill Deacon 	 * Given that we can't update the STE atomically and the SMMU
1816e86d1aa8SWill Deacon 	 * doesn't read the thing in a defined order, that leaves us
1817e86d1aa8SWill Deacon 	 * with the following maintenance requirements:
1818e86d1aa8SWill Deacon 	 *
1819e86d1aa8SWill Deacon 	 * 1. Update Config, return (init time STEs aren't live)
1820e86d1aa8SWill Deacon 	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1821e86d1aa8SWill Deacon 	 * 3. Update Config, sync
1822e86d1aa8SWill Deacon 	 */
1823e86d1aa8SWill Deacon 	u64 val = le64_to_cpu(dst[0]);
1824e86d1aa8SWill Deacon 	bool ste_live = false;
1825e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = NULL;
1826e86d1aa8SWill Deacon 	struct arm_smmu_s1_cfg *s1_cfg = NULL;
1827e86d1aa8SWill Deacon 	struct arm_smmu_s2_cfg *s2_cfg = NULL;
1828e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = NULL;
1829e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_ent prefetch_cmd = {
1830e86d1aa8SWill Deacon 		.opcode		= CMDQ_OP_PREFETCH_CFG,
1831e86d1aa8SWill Deacon 		.prefetch	= {
1832e86d1aa8SWill Deacon 			.sid	= sid,
1833e86d1aa8SWill Deacon 		},
1834e86d1aa8SWill Deacon 	};
1835e86d1aa8SWill Deacon 
1836e86d1aa8SWill Deacon 	if (master) {
1837e86d1aa8SWill Deacon 		smmu_domain = master->domain;
1838e86d1aa8SWill Deacon 		smmu = master->smmu;
1839e86d1aa8SWill Deacon 	}
1840e86d1aa8SWill Deacon 
1841e86d1aa8SWill Deacon 	if (smmu_domain) {
1842e86d1aa8SWill Deacon 		switch (smmu_domain->stage) {
1843e86d1aa8SWill Deacon 		case ARM_SMMU_DOMAIN_S1:
1844e86d1aa8SWill Deacon 			s1_cfg = &smmu_domain->s1_cfg;
1845e86d1aa8SWill Deacon 			break;
1846e86d1aa8SWill Deacon 		case ARM_SMMU_DOMAIN_S2:
1847e86d1aa8SWill Deacon 		case ARM_SMMU_DOMAIN_NESTED:
1848e86d1aa8SWill Deacon 			s2_cfg = &smmu_domain->s2_cfg;
1849e86d1aa8SWill Deacon 			break;
1850e86d1aa8SWill Deacon 		default:
1851e86d1aa8SWill Deacon 			break;
1852e86d1aa8SWill Deacon 		}
1853e86d1aa8SWill Deacon 	}
1854e86d1aa8SWill Deacon 
1855e86d1aa8SWill Deacon 	if (val & STRTAB_STE_0_V) {
1856e86d1aa8SWill Deacon 		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1857e86d1aa8SWill Deacon 		case STRTAB_STE_0_CFG_BYPASS:
1858e86d1aa8SWill Deacon 			break;
1859e86d1aa8SWill Deacon 		case STRTAB_STE_0_CFG_S1_TRANS:
1860e86d1aa8SWill Deacon 		case STRTAB_STE_0_CFG_S2_TRANS:
1861e86d1aa8SWill Deacon 			ste_live = true;
1862e86d1aa8SWill Deacon 			break;
1863e86d1aa8SWill Deacon 		case STRTAB_STE_0_CFG_ABORT:
1864e86d1aa8SWill Deacon 			BUG_ON(!disable_bypass);
1865e86d1aa8SWill Deacon 			break;
1866e86d1aa8SWill Deacon 		default:
1867e86d1aa8SWill Deacon 			BUG(); /* STE corruption */
1868e86d1aa8SWill Deacon 		}
1869e86d1aa8SWill Deacon 	}
1870e86d1aa8SWill Deacon 
1871e86d1aa8SWill Deacon 	/* Nuke the existing STE_0 value, as we're going to rewrite it */
1872e86d1aa8SWill Deacon 	val = STRTAB_STE_0_V;
1873e86d1aa8SWill Deacon 
1874e86d1aa8SWill Deacon 	/* Bypass/fault */
1875e86d1aa8SWill Deacon 	if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1876e86d1aa8SWill Deacon 		if (!smmu_domain && disable_bypass)
1877e86d1aa8SWill Deacon 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1878e86d1aa8SWill Deacon 		else
1879e86d1aa8SWill Deacon 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1880e86d1aa8SWill Deacon 
1881e86d1aa8SWill Deacon 		dst[0] = cpu_to_le64(val);
1882e86d1aa8SWill Deacon 		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1883e86d1aa8SWill Deacon 						STRTAB_STE_1_SHCFG_INCOMING));
1884e86d1aa8SWill Deacon 		dst[2] = 0; /* Nuke the VMID */
1885e86d1aa8SWill Deacon 		/*
1886e86d1aa8SWill Deacon 		 * The SMMU can perform negative caching, so we must sync
1887e86d1aa8SWill Deacon 		 * the STE regardless of whether the old value was live.
1888e86d1aa8SWill Deacon 		 */
1889e86d1aa8SWill Deacon 		if (smmu)
1890e86d1aa8SWill Deacon 			arm_smmu_sync_ste_for_sid(smmu, sid);
1891e86d1aa8SWill Deacon 		return;
1892e86d1aa8SWill Deacon 	}
1893e86d1aa8SWill Deacon 
1894e86d1aa8SWill Deacon 	if (s1_cfg) {
1895e86d1aa8SWill Deacon 		BUG_ON(ste_live);
1896e86d1aa8SWill Deacon 		dst[1] = cpu_to_le64(
1897e86d1aa8SWill Deacon 			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1898e86d1aa8SWill Deacon 			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1899e86d1aa8SWill Deacon 			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1900e86d1aa8SWill Deacon 			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1901e86d1aa8SWill Deacon 			 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1902e86d1aa8SWill Deacon 
1903e86d1aa8SWill Deacon 		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1904e86d1aa8SWill Deacon 		   !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1905e86d1aa8SWill Deacon 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1906e86d1aa8SWill Deacon 
1907e86d1aa8SWill Deacon 		val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1908e86d1aa8SWill Deacon 			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1909e86d1aa8SWill Deacon 			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1910e86d1aa8SWill Deacon 			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1911e86d1aa8SWill Deacon 	}
1912e86d1aa8SWill Deacon 
1913e86d1aa8SWill Deacon 	if (s2_cfg) {
1914e86d1aa8SWill Deacon 		BUG_ON(ste_live);
1915e86d1aa8SWill Deacon 		dst[2] = cpu_to_le64(
1916e86d1aa8SWill Deacon 			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1917e86d1aa8SWill Deacon 			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1918e86d1aa8SWill Deacon #ifdef __BIG_ENDIAN
1919e86d1aa8SWill Deacon 			 STRTAB_STE_2_S2ENDI |
1920e86d1aa8SWill Deacon #endif
1921e86d1aa8SWill Deacon 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1922e86d1aa8SWill Deacon 			 STRTAB_STE_2_S2R);
1923e86d1aa8SWill Deacon 
1924e86d1aa8SWill Deacon 		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1925e86d1aa8SWill Deacon 
1926e86d1aa8SWill Deacon 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1927e86d1aa8SWill Deacon 	}
1928e86d1aa8SWill Deacon 
1929e86d1aa8SWill Deacon 	if (master->ats_enabled)
1930e86d1aa8SWill Deacon 		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1931e86d1aa8SWill Deacon 						 STRTAB_STE_1_EATS_TRANS));
1932e86d1aa8SWill Deacon 
1933e86d1aa8SWill Deacon 	arm_smmu_sync_ste_for_sid(smmu, sid);
1934e86d1aa8SWill Deacon 	/* See comment in arm_smmu_write_ctx_desc() */
1935e86d1aa8SWill Deacon 	WRITE_ONCE(dst[0], cpu_to_le64(val));
1936e86d1aa8SWill Deacon 	arm_smmu_sync_ste_for_sid(smmu, sid);
1937e86d1aa8SWill Deacon 
1938e86d1aa8SWill Deacon 	/* It's likely that we'll want to use the new STE soon */
1939e86d1aa8SWill Deacon 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1940e86d1aa8SWill Deacon 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1941e86d1aa8SWill Deacon }
1942e86d1aa8SWill Deacon 
1943e86d1aa8SWill Deacon static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
1944e86d1aa8SWill Deacon {
1945e86d1aa8SWill Deacon 	unsigned int i;
1946e86d1aa8SWill Deacon 
1947e86d1aa8SWill Deacon 	for (i = 0; i < nent; ++i) {
1948e86d1aa8SWill Deacon 		arm_smmu_write_strtab_ent(NULL, -1, strtab);
1949e86d1aa8SWill Deacon 		strtab += STRTAB_STE_DWORDS;
1950e86d1aa8SWill Deacon 	}
1951e86d1aa8SWill Deacon }
1952e86d1aa8SWill Deacon 
1953e86d1aa8SWill Deacon static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1954e86d1aa8SWill Deacon {
1955e86d1aa8SWill Deacon 	size_t size;
1956e86d1aa8SWill Deacon 	void *strtab;
1957e86d1aa8SWill Deacon 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1958e86d1aa8SWill Deacon 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1959e86d1aa8SWill Deacon 
1960e86d1aa8SWill Deacon 	if (desc->l2ptr)
1961e86d1aa8SWill Deacon 		return 0;
1962e86d1aa8SWill Deacon 
1963e86d1aa8SWill Deacon 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1964e86d1aa8SWill Deacon 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1965e86d1aa8SWill Deacon 
1966e86d1aa8SWill Deacon 	desc->span = STRTAB_SPLIT + 1;
1967e86d1aa8SWill Deacon 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1968e86d1aa8SWill Deacon 					  GFP_KERNEL);
1969e86d1aa8SWill Deacon 	if (!desc->l2ptr) {
1970e86d1aa8SWill Deacon 		dev_err(smmu->dev,
1971e86d1aa8SWill Deacon 			"failed to allocate l2 stream table for SID %u\n",
1972e86d1aa8SWill Deacon 			sid);
1973e86d1aa8SWill Deacon 		return -ENOMEM;
1974e86d1aa8SWill Deacon 	}
1975e86d1aa8SWill Deacon 
1976e86d1aa8SWill Deacon 	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1977e86d1aa8SWill Deacon 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1978e86d1aa8SWill Deacon 	return 0;
1979e86d1aa8SWill Deacon }
1980e86d1aa8SWill Deacon 
1981e86d1aa8SWill Deacon /* IRQ and event handlers */
1982e86d1aa8SWill Deacon static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1983e86d1aa8SWill Deacon {
1984e86d1aa8SWill Deacon 	int i;
1985e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = dev;
1986e86d1aa8SWill Deacon 	struct arm_smmu_queue *q = &smmu->evtq.q;
1987e86d1aa8SWill Deacon 	struct arm_smmu_ll_queue *llq = &q->llq;
1988e86d1aa8SWill Deacon 	u64 evt[EVTQ_ENT_DWORDS];
1989e86d1aa8SWill Deacon 
1990e86d1aa8SWill Deacon 	do {
1991e86d1aa8SWill Deacon 		while (!queue_remove_raw(q, evt)) {
1992e86d1aa8SWill Deacon 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1993e86d1aa8SWill Deacon 
1994e86d1aa8SWill Deacon 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1995e86d1aa8SWill Deacon 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1996e86d1aa8SWill Deacon 				dev_info(smmu->dev, "\t0x%016llx\n",
1997e86d1aa8SWill Deacon 					 (unsigned long long)evt[i]);
1998e86d1aa8SWill Deacon 
1999e86d1aa8SWill Deacon 		}
2000e86d1aa8SWill Deacon 
2001e86d1aa8SWill Deacon 		/*
2002e86d1aa8SWill Deacon 		 * Not much we can do on overflow, so scream and pretend we're
2003e86d1aa8SWill Deacon 		 * trying harder.
2004e86d1aa8SWill Deacon 		 */
2005e86d1aa8SWill Deacon 		if (queue_sync_prod_in(q) == -EOVERFLOW)
2006e86d1aa8SWill Deacon 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
2007e86d1aa8SWill Deacon 	} while (!queue_empty(llq));
2008e86d1aa8SWill Deacon 
2009e86d1aa8SWill Deacon 	/* Sync our overflow flag, as we believe we're up to speed */
2010e86d1aa8SWill Deacon 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
2011e86d1aa8SWill Deacon 		    Q_IDX(llq, llq->cons);
2012e86d1aa8SWill Deacon 	return IRQ_HANDLED;
2013e86d1aa8SWill Deacon }
2014e86d1aa8SWill Deacon 
2015e86d1aa8SWill Deacon static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
2016e86d1aa8SWill Deacon {
2017e86d1aa8SWill Deacon 	u32 sid, ssid;
2018e86d1aa8SWill Deacon 	u16 grpid;
2019e86d1aa8SWill Deacon 	bool ssv, last;
2020e86d1aa8SWill Deacon 
2021e86d1aa8SWill Deacon 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
2022e86d1aa8SWill Deacon 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
2023e86d1aa8SWill Deacon 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
2024e86d1aa8SWill Deacon 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
2025e86d1aa8SWill Deacon 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
2026e86d1aa8SWill Deacon 
2027e86d1aa8SWill Deacon 	dev_info(smmu->dev, "unexpected PRI request received:\n");
2028e86d1aa8SWill Deacon 	dev_info(smmu->dev,
2029e86d1aa8SWill Deacon 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
2030e86d1aa8SWill Deacon 		 sid, ssid, grpid, last ? "L" : "",
2031e86d1aa8SWill Deacon 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
2032e86d1aa8SWill Deacon 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
2033e86d1aa8SWill Deacon 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
2034e86d1aa8SWill Deacon 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
2035e86d1aa8SWill Deacon 		 evt[1] & PRIQ_1_ADDR_MASK);
2036e86d1aa8SWill Deacon 
2037e86d1aa8SWill Deacon 	if (last) {
2038e86d1aa8SWill Deacon 		struct arm_smmu_cmdq_ent cmd = {
2039e86d1aa8SWill Deacon 			.opcode			= CMDQ_OP_PRI_RESP,
2040e86d1aa8SWill Deacon 			.substream_valid	= ssv,
2041e86d1aa8SWill Deacon 			.pri			= {
2042e86d1aa8SWill Deacon 				.sid	= sid,
2043e86d1aa8SWill Deacon 				.ssid	= ssid,
2044e86d1aa8SWill Deacon 				.grpid	= grpid,
2045e86d1aa8SWill Deacon 				.resp	= PRI_RESP_DENY,
2046e86d1aa8SWill Deacon 			},
2047e86d1aa8SWill Deacon 		};
2048e86d1aa8SWill Deacon 
2049e86d1aa8SWill Deacon 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2050e86d1aa8SWill Deacon 	}
2051e86d1aa8SWill Deacon }
2052e86d1aa8SWill Deacon 
2053e86d1aa8SWill Deacon static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
2054e86d1aa8SWill Deacon {
2055e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = dev;
2056e86d1aa8SWill Deacon 	struct arm_smmu_queue *q = &smmu->priq.q;
2057e86d1aa8SWill Deacon 	struct arm_smmu_ll_queue *llq = &q->llq;
2058e86d1aa8SWill Deacon 	u64 evt[PRIQ_ENT_DWORDS];
2059e86d1aa8SWill Deacon 
2060e86d1aa8SWill Deacon 	do {
2061e86d1aa8SWill Deacon 		while (!queue_remove_raw(q, evt))
2062e86d1aa8SWill Deacon 			arm_smmu_handle_ppr(smmu, evt);
2063e86d1aa8SWill Deacon 
2064e86d1aa8SWill Deacon 		if (queue_sync_prod_in(q) == -EOVERFLOW)
2065e86d1aa8SWill Deacon 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
2066e86d1aa8SWill Deacon 	} while (!queue_empty(llq));
2067e86d1aa8SWill Deacon 
2068e86d1aa8SWill Deacon 	/* Sync our overflow flag, as we believe we're up to speed */
2069e86d1aa8SWill Deacon 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
2070e86d1aa8SWill Deacon 		      Q_IDX(llq, llq->cons);
2071e86d1aa8SWill Deacon 	queue_sync_cons_out(q);
2072e86d1aa8SWill Deacon 	return IRQ_HANDLED;
2073e86d1aa8SWill Deacon }
2074e86d1aa8SWill Deacon 
2075e86d1aa8SWill Deacon static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
2076e86d1aa8SWill Deacon 
2077e86d1aa8SWill Deacon static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
2078e86d1aa8SWill Deacon {
2079e86d1aa8SWill Deacon 	u32 gerror, gerrorn, active;
2080e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = dev;
2081e86d1aa8SWill Deacon 
2082e86d1aa8SWill Deacon 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
2083e86d1aa8SWill Deacon 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
2084e86d1aa8SWill Deacon 
2085e86d1aa8SWill Deacon 	active = gerror ^ gerrorn;
2086e86d1aa8SWill Deacon 	if (!(active & GERROR_ERR_MASK))
2087e86d1aa8SWill Deacon 		return IRQ_NONE; /* No errors pending */
2088e86d1aa8SWill Deacon 
2089e86d1aa8SWill Deacon 	dev_warn(smmu->dev,
2090e86d1aa8SWill Deacon 		 "unexpected global error reported (0x%08x), this could be serious\n",
2091e86d1aa8SWill Deacon 		 active);
2092e86d1aa8SWill Deacon 
2093e86d1aa8SWill Deacon 	if (active & GERROR_SFM_ERR) {
2094e86d1aa8SWill Deacon 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
2095e86d1aa8SWill Deacon 		arm_smmu_device_disable(smmu);
2096e86d1aa8SWill Deacon 	}
2097e86d1aa8SWill Deacon 
2098e86d1aa8SWill Deacon 	if (active & GERROR_MSI_GERROR_ABT_ERR)
2099e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
2100e86d1aa8SWill Deacon 
2101e86d1aa8SWill Deacon 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
2102e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
2103e86d1aa8SWill Deacon 
2104e86d1aa8SWill Deacon 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
2105e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
2106e86d1aa8SWill Deacon 
2107e86d1aa8SWill Deacon 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
2108e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
2109e86d1aa8SWill Deacon 
2110e86d1aa8SWill Deacon 	if (active & GERROR_PRIQ_ABT_ERR)
2111e86d1aa8SWill Deacon 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
2112e86d1aa8SWill Deacon 
2113e86d1aa8SWill Deacon 	if (active & GERROR_EVTQ_ABT_ERR)
2114e86d1aa8SWill Deacon 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
2115e86d1aa8SWill Deacon 
2116e86d1aa8SWill Deacon 	if (active & GERROR_CMDQ_ERR)
2117e86d1aa8SWill Deacon 		arm_smmu_cmdq_skip_err(smmu);
2118e86d1aa8SWill Deacon 
2119e86d1aa8SWill Deacon 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
2120e86d1aa8SWill Deacon 	return IRQ_HANDLED;
2121e86d1aa8SWill Deacon }
2122e86d1aa8SWill Deacon 
2123e86d1aa8SWill Deacon static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
2124e86d1aa8SWill Deacon {
2125e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = dev;
2126e86d1aa8SWill Deacon 
2127e86d1aa8SWill Deacon 	arm_smmu_evtq_thread(irq, dev);
2128e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_PRI)
2129e86d1aa8SWill Deacon 		arm_smmu_priq_thread(irq, dev);
2130e86d1aa8SWill Deacon 
2131e86d1aa8SWill Deacon 	return IRQ_HANDLED;
2132e86d1aa8SWill Deacon }
2133e86d1aa8SWill Deacon 
2134e86d1aa8SWill Deacon static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
2135e86d1aa8SWill Deacon {
2136e86d1aa8SWill Deacon 	arm_smmu_gerror_handler(irq, dev);
2137e86d1aa8SWill Deacon 	return IRQ_WAKE_THREAD;
2138e86d1aa8SWill Deacon }
2139e86d1aa8SWill Deacon 
2140e86d1aa8SWill Deacon static void
2141e86d1aa8SWill Deacon arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
2142e86d1aa8SWill Deacon 			struct arm_smmu_cmdq_ent *cmd)
2143e86d1aa8SWill Deacon {
2144e86d1aa8SWill Deacon 	size_t log2_span;
2145e86d1aa8SWill Deacon 	size_t span_mask;
2146e86d1aa8SWill Deacon 	/* ATC invalidates are always on 4096-bytes pages */
2147e86d1aa8SWill Deacon 	size_t inval_grain_shift = 12;
2148e86d1aa8SWill Deacon 	unsigned long page_start, page_end;
2149e86d1aa8SWill Deacon 
2150e86d1aa8SWill Deacon 	*cmd = (struct arm_smmu_cmdq_ent) {
2151e86d1aa8SWill Deacon 		.opcode			= CMDQ_OP_ATC_INV,
2152e86d1aa8SWill Deacon 		.substream_valid	= !!ssid,
2153e86d1aa8SWill Deacon 		.atc.ssid		= ssid,
2154e86d1aa8SWill Deacon 	};
2155e86d1aa8SWill Deacon 
2156e86d1aa8SWill Deacon 	if (!size) {
2157e86d1aa8SWill Deacon 		cmd->atc.size = ATC_INV_SIZE_ALL;
2158e86d1aa8SWill Deacon 		return;
2159e86d1aa8SWill Deacon 	}
2160e86d1aa8SWill Deacon 
2161e86d1aa8SWill Deacon 	page_start	= iova >> inval_grain_shift;
2162e86d1aa8SWill Deacon 	page_end	= (iova + size - 1) >> inval_grain_shift;
2163e86d1aa8SWill Deacon 
2164e86d1aa8SWill Deacon 	/*
2165e86d1aa8SWill Deacon 	 * In an ATS Invalidate Request, the address must be aligned on the
2166e86d1aa8SWill Deacon 	 * range size, which must be a power of two number of page sizes. We
2167e86d1aa8SWill Deacon 	 * thus have to choose between grossly over-invalidating the region, or
2168e86d1aa8SWill Deacon 	 * splitting the invalidation into multiple commands. For simplicity
2169e86d1aa8SWill Deacon 	 * we'll go with the first solution, but should refine it in the future
2170e86d1aa8SWill Deacon 	 * if multiple commands are shown to be more efficient.
2171e86d1aa8SWill Deacon 	 *
2172e86d1aa8SWill Deacon 	 * Find the smallest power of two that covers the range. The most
2173e86d1aa8SWill Deacon 	 * significant differing bit between the start and end addresses,
2174e86d1aa8SWill Deacon 	 * fls(start ^ end), indicates the required span. For example:
2175e86d1aa8SWill Deacon 	 *
2176e86d1aa8SWill Deacon 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
2177e86d1aa8SWill Deacon 	 *		x = 0b1000 ^ 0b1011 = 0b11
2178e86d1aa8SWill Deacon 	 *		span = 1 << fls(x) = 4
2179e86d1aa8SWill Deacon 	 *
2180e86d1aa8SWill Deacon 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
2181e86d1aa8SWill Deacon 	 *		x = 0b0111 ^ 0b1010 = 0b1101
2182e86d1aa8SWill Deacon 	 *		span = 1 << fls(x) = 16
2183e86d1aa8SWill Deacon 	 */
2184e86d1aa8SWill Deacon 	log2_span	= fls_long(page_start ^ page_end);
2185e86d1aa8SWill Deacon 	span_mask	= (1ULL << log2_span) - 1;
2186e86d1aa8SWill Deacon 
2187e86d1aa8SWill Deacon 	page_start	&= ~span_mask;
2188e86d1aa8SWill Deacon 
2189e86d1aa8SWill Deacon 	cmd->atc.addr	= page_start << inval_grain_shift;
2190e86d1aa8SWill Deacon 	cmd->atc.size	= log2_span;
2191e86d1aa8SWill Deacon }
2192e86d1aa8SWill Deacon 
2193e86d1aa8SWill Deacon static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
2194e86d1aa8SWill Deacon {
2195e86d1aa8SWill Deacon 	int i;
2196e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_ent cmd;
2197e86d1aa8SWill Deacon 
2198e86d1aa8SWill Deacon 	arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
2199e86d1aa8SWill Deacon 
2200e86d1aa8SWill Deacon 	for (i = 0; i < master->num_sids; i++) {
2201e86d1aa8SWill Deacon 		cmd.atc.sid = master->sids[i];
2202e86d1aa8SWill Deacon 		arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
2203e86d1aa8SWill Deacon 	}
2204e86d1aa8SWill Deacon 
2205e86d1aa8SWill Deacon 	return arm_smmu_cmdq_issue_sync(master->smmu);
2206e86d1aa8SWill Deacon }
2207e86d1aa8SWill Deacon 
2208e86d1aa8SWill Deacon static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
2209e86d1aa8SWill Deacon 				   int ssid, unsigned long iova, size_t size)
2210e86d1aa8SWill Deacon {
2211e86d1aa8SWill Deacon 	int i;
2212e86d1aa8SWill Deacon 	unsigned long flags;
2213e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_ent cmd;
2214e86d1aa8SWill Deacon 	struct arm_smmu_master *master;
2215e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_batch cmds = {};
2216e86d1aa8SWill Deacon 
2217e86d1aa8SWill Deacon 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
2218e86d1aa8SWill Deacon 		return 0;
2219e86d1aa8SWill Deacon 
2220e86d1aa8SWill Deacon 	/*
2221e86d1aa8SWill Deacon 	 * Ensure that we've completed prior invalidation of the main TLBs
2222e86d1aa8SWill Deacon 	 * before we read 'nr_ats_masters' in case of a concurrent call to
2223e86d1aa8SWill Deacon 	 * arm_smmu_enable_ats():
2224e86d1aa8SWill Deacon 	 *
2225e86d1aa8SWill Deacon 	 *	// unmap()			// arm_smmu_enable_ats()
2226e86d1aa8SWill Deacon 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
2227e86d1aa8SWill Deacon 	 *	smp_mb();			[...]
2228e86d1aa8SWill Deacon 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
2229e86d1aa8SWill Deacon 	 *
2230e86d1aa8SWill Deacon 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
2231e86d1aa8SWill Deacon 	 * ATS was enabled at the PCI device before completion of the TLBI.
2232e86d1aa8SWill Deacon 	 */
2233e86d1aa8SWill Deacon 	smp_mb();
2234e86d1aa8SWill Deacon 	if (!atomic_read(&smmu_domain->nr_ats_masters))
2235e86d1aa8SWill Deacon 		return 0;
2236e86d1aa8SWill Deacon 
2237e86d1aa8SWill Deacon 	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
2238e86d1aa8SWill Deacon 
2239e86d1aa8SWill Deacon 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2240e86d1aa8SWill Deacon 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
2241e86d1aa8SWill Deacon 		if (!master->ats_enabled)
2242e86d1aa8SWill Deacon 			continue;
2243e86d1aa8SWill Deacon 
2244e86d1aa8SWill Deacon 		for (i = 0; i < master->num_sids; i++) {
2245e86d1aa8SWill Deacon 			cmd.atc.sid = master->sids[i];
2246e86d1aa8SWill Deacon 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
2247e86d1aa8SWill Deacon 		}
2248e86d1aa8SWill Deacon 	}
2249e86d1aa8SWill Deacon 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2250e86d1aa8SWill Deacon 
2251e86d1aa8SWill Deacon 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
2252e86d1aa8SWill Deacon }
2253e86d1aa8SWill Deacon 
2254e86d1aa8SWill Deacon /* IO_PGTABLE API */
2255e86d1aa8SWill Deacon static void arm_smmu_tlb_inv_context(void *cookie)
2256e86d1aa8SWill Deacon {
2257e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = cookie;
2258e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2259e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_ent cmd;
2260e86d1aa8SWill Deacon 
2261e86d1aa8SWill Deacon 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2262e86d1aa8SWill Deacon 		cmd.opcode	= CMDQ_OP_TLBI_NH_ASID;
2263e86d1aa8SWill Deacon 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
2264e86d1aa8SWill Deacon 		cmd.tlbi.vmid	= 0;
2265e86d1aa8SWill Deacon 	} else {
2266e86d1aa8SWill Deacon 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
2267e86d1aa8SWill Deacon 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
2268e86d1aa8SWill Deacon 	}
2269e86d1aa8SWill Deacon 
2270e86d1aa8SWill Deacon 	/*
2271e86d1aa8SWill Deacon 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
2272e86d1aa8SWill Deacon 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
2273e86d1aa8SWill Deacon 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
2274e86d1aa8SWill Deacon 	 * insertion to guarantee those are observed before the TLBI. Do be
2275e86d1aa8SWill Deacon 	 * careful, 007.
2276e86d1aa8SWill Deacon 	 */
2277e86d1aa8SWill Deacon 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2278e86d1aa8SWill Deacon 	arm_smmu_cmdq_issue_sync(smmu);
2279e86d1aa8SWill Deacon 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2280e86d1aa8SWill Deacon }
2281e86d1aa8SWill Deacon 
2282e86d1aa8SWill Deacon static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
2283e86d1aa8SWill Deacon 				   size_t granule, bool leaf,
2284e86d1aa8SWill Deacon 				   struct arm_smmu_domain *smmu_domain)
2285e86d1aa8SWill Deacon {
2286e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2287e86d1aa8SWill Deacon 	unsigned long start = iova, end = iova + size, num_pages = 0, tg = 0;
2288e86d1aa8SWill Deacon 	size_t inv_range = granule;
2289e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_batch cmds = {};
2290e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_ent cmd = {
2291e86d1aa8SWill Deacon 		.tlbi = {
2292e86d1aa8SWill Deacon 			.leaf	= leaf,
2293e86d1aa8SWill Deacon 		},
2294e86d1aa8SWill Deacon 	};
2295e86d1aa8SWill Deacon 
2296e86d1aa8SWill Deacon 	if (!size)
2297e86d1aa8SWill Deacon 		return;
2298e86d1aa8SWill Deacon 
2299e86d1aa8SWill Deacon 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2300e86d1aa8SWill Deacon 		cmd.opcode	= CMDQ_OP_TLBI_NH_VA;
2301e86d1aa8SWill Deacon 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
2302e86d1aa8SWill Deacon 	} else {
2303e86d1aa8SWill Deacon 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
2304e86d1aa8SWill Deacon 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
2305e86d1aa8SWill Deacon 	}
2306e86d1aa8SWill Deacon 
2307e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
2308e86d1aa8SWill Deacon 		/* Get the leaf page size */
2309e86d1aa8SWill Deacon 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
2310e86d1aa8SWill Deacon 
2311e86d1aa8SWill Deacon 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
2312e86d1aa8SWill Deacon 		cmd.tlbi.tg = (tg - 10) / 2;
2313e86d1aa8SWill Deacon 
2314e86d1aa8SWill Deacon 		/* Determine what level the granule is at */
2315e86d1aa8SWill Deacon 		cmd.tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
2316e86d1aa8SWill Deacon 
2317e86d1aa8SWill Deacon 		num_pages = size >> tg;
2318e86d1aa8SWill Deacon 	}
2319e86d1aa8SWill Deacon 
2320e86d1aa8SWill Deacon 	while (iova < end) {
2321e86d1aa8SWill Deacon 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
2322e86d1aa8SWill Deacon 			/*
2323e86d1aa8SWill Deacon 			 * On each iteration of the loop, the range is 5 bits
2324e86d1aa8SWill Deacon 			 * worth of the aligned size remaining.
2325e86d1aa8SWill Deacon 			 * The range in pages is:
2326e86d1aa8SWill Deacon 			 *
2327e86d1aa8SWill Deacon 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
2328e86d1aa8SWill Deacon 			 */
2329e86d1aa8SWill Deacon 			unsigned long scale, num;
2330e86d1aa8SWill Deacon 
2331e86d1aa8SWill Deacon 			/* Determine the power of 2 multiple number of pages */
2332e86d1aa8SWill Deacon 			scale = __ffs(num_pages);
2333e86d1aa8SWill Deacon 			cmd.tlbi.scale = scale;
2334e86d1aa8SWill Deacon 
2335e86d1aa8SWill Deacon 			/* Determine how many chunks of 2^scale size we have */
2336e86d1aa8SWill Deacon 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
2337e86d1aa8SWill Deacon 			cmd.tlbi.num = num - 1;
2338e86d1aa8SWill Deacon 
2339e86d1aa8SWill Deacon 			/* range is num * 2^scale * pgsize */
2340e86d1aa8SWill Deacon 			inv_range = num << (scale + tg);
2341e86d1aa8SWill Deacon 
2342e86d1aa8SWill Deacon 			/* Clear out the lower order bits for the next iteration */
2343e86d1aa8SWill Deacon 			num_pages -= num << scale;
2344e86d1aa8SWill Deacon 		}
2345e86d1aa8SWill Deacon 
2346e86d1aa8SWill Deacon 		cmd.tlbi.addr = iova;
2347e86d1aa8SWill Deacon 		arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
2348e86d1aa8SWill Deacon 		iova += inv_range;
2349e86d1aa8SWill Deacon 	}
2350e86d1aa8SWill Deacon 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
2351e86d1aa8SWill Deacon 
2352e86d1aa8SWill Deacon 	/*
2353e86d1aa8SWill Deacon 	 * Unfortunately, this can't be leaf-only since we may have
2354e86d1aa8SWill Deacon 	 * zapped an entire table.
2355e86d1aa8SWill Deacon 	 */
2356e86d1aa8SWill Deacon 	arm_smmu_atc_inv_domain(smmu_domain, 0, start, size);
2357e86d1aa8SWill Deacon }
2358e86d1aa8SWill Deacon 
2359e86d1aa8SWill Deacon static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
2360e86d1aa8SWill Deacon 					 unsigned long iova, size_t granule,
2361e86d1aa8SWill Deacon 					 void *cookie)
2362e86d1aa8SWill Deacon {
2363e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = cookie;
2364e86d1aa8SWill Deacon 	struct iommu_domain *domain = &smmu_domain->domain;
2365e86d1aa8SWill Deacon 
2366e86d1aa8SWill Deacon 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
2367e86d1aa8SWill Deacon }
2368e86d1aa8SWill Deacon 
2369e86d1aa8SWill Deacon static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
2370e86d1aa8SWill Deacon 				  size_t granule, void *cookie)
2371e86d1aa8SWill Deacon {
2372e86d1aa8SWill Deacon 	arm_smmu_tlb_inv_range(iova, size, granule, false, cookie);
2373e86d1aa8SWill Deacon }
2374e86d1aa8SWill Deacon 
2375e86d1aa8SWill Deacon static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
2376e86d1aa8SWill Deacon 				  size_t granule, void *cookie)
2377e86d1aa8SWill Deacon {
2378e86d1aa8SWill Deacon 	arm_smmu_tlb_inv_range(iova, size, granule, true, cookie);
2379e86d1aa8SWill Deacon }
2380e86d1aa8SWill Deacon 
2381e86d1aa8SWill Deacon static const struct iommu_flush_ops arm_smmu_flush_ops = {
2382e86d1aa8SWill Deacon 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
2383e86d1aa8SWill Deacon 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
2384e86d1aa8SWill Deacon 	.tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
2385e86d1aa8SWill Deacon 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
2386e86d1aa8SWill Deacon };
2387e86d1aa8SWill Deacon 
2388e86d1aa8SWill Deacon /* IOMMU API */
2389e86d1aa8SWill Deacon static bool arm_smmu_capable(enum iommu_cap cap)
2390e86d1aa8SWill Deacon {
2391e86d1aa8SWill Deacon 	switch (cap) {
2392e86d1aa8SWill Deacon 	case IOMMU_CAP_CACHE_COHERENCY:
2393e86d1aa8SWill Deacon 		return true;
2394e86d1aa8SWill Deacon 	case IOMMU_CAP_NOEXEC:
2395e86d1aa8SWill Deacon 		return true;
2396e86d1aa8SWill Deacon 	default:
2397e86d1aa8SWill Deacon 		return false;
2398e86d1aa8SWill Deacon 	}
2399e86d1aa8SWill Deacon }
2400e86d1aa8SWill Deacon 
2401e86d1aa8SWill Deacon static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
2402e86d1aa8SWill Deacon {
2403e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain;
2404e86d1aa8SWill Deacon 
2405e86d1aa8SWill Deacon 	if (type != IOMMU_DOMAIN_UNMANAGED &&
2406e86d1aa8SWill Deacon 	    type != IOMMU_DOMAIN_DMA &&
2407e86d1aa8SWill Deacon 	    type != IOMMU_DOMAIN_IDENTITY)
2408e86d1aa8SWill Deacon 		return NULL;
2409e86d1aa8SWill Deacon 
2410e86d1aa8SWill Deacon 	/*
2411e86d1aa8SWill Deacon 	 * Allocate the domain and initialise some of its data structures.
2412e86d1aa8SWill Deacon 	 * We can't really do anything meaningful until we've added a
2413e86d1aa8SWill Deacon 	 * master.
2414e86d1aa8SWill Deacon 	 */
2415e86d1aa8SWill Deacon 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2416e86d1aa8SWill Deacon 	if (!smmu_domain)
2417e86d1aa8SWill Deacon 		return NULL;
2418e86d1aa8SWill Deacon 
2419e86d1aa8SWill Deacon 	if (type == IOMMU_DOMAIN_DMA &&
2420e86d1aa8SWill Deacon 	    iommu_get_dma_cookie(&smmu_domain->domain)) {
2421e86d1aa8SWill Deacon 		kfree(smmu_domain);
2422e86d1aa8SWill Deacon 		return NULL;
2423e86d1aa8SWill Deacon 	}
2424e86d1aa8SWill Deacon 
2425e86d1aa8SWill Deacon 	mutex_init(&smmu_domain->init_mutex);
2426e86d1aa8SWill Deacon 	INIT_LIST_HEAD(&smmu_domain->devices);
2427e86d1aa8SWill Deacon 	spin_lock_init(&smmu_domain->devices_lock);
2428e86d1aa8SWill Deacon 
2429e86d1aa8SWill Deacon 	return &smmu_domain->domain;
2430e86d1aa8SWill Deacon }
2431e86d1aa8SWill Deacon 
2432e86d1aa8SWill Deacon static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
2433e86d1aa8SWill Deacon {
2434e86d1aa8SWill Deacon 	int idx, size = 1 << span;
2435e86d1aa8SWill Deacon 
2436e86d1aa8SWill Deacon 	do {
2437e86d1aa8SWill Deacon 		idx = find_first_zero_bit(map, size);
2438e86d1aa8SWill Deacon 		if (idx == size)
2439e86d1aa8SWill Deacon 			return -ENOSPC;
2440e86d1aa8SWill Deacon 	} while (test_and_set_bit(idx, map));
2441e86d1aa8SWill Deacon 
2442e86d1aa8SWill Deacon 	return idx;
2443e86d1aa8SWill Deacon }
2444e86d1aa8SWill Deacon 
2445e86d1aa8SWill Deacon static void arm_smmu_bitmap_free(unsigned long *map, int idx)
2446e86d1aa8SWill Deacon {
2447e86d1aa8SWill Deacon 	clear_bit(idx, map);
2448e86d1aa8SWill Deacon }
2449e86d1aa8SWill Deacon 
2450e86d1aa8SWill Deacon static void arm_smmu_domain_free(struct iommu_domain *domain)
2451e86d1aa8SWill Deacon {
2452e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2453e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2454e86d1aa8SWill Deacon 
2455e86d1aa8SWill Deacon 	iommu_put_dma_cookie(domain);
2456e86d1aa8SWill Deacon 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2457e86d1aa8SWill Deacon 
2458e86d1aa8SWill Deacon 	/* Free the CD and ASID, if we allocated them */
2459e86d1aa8SWill Deacon 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2460e86d1aa8SWill Deacon 		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2461e86d1aa8SWill Deacon 
2462e86d1aa8SWill Deacon 		if (cfg->cdcfg.cdtab)
2463e86d1aa8SWill Deacon 			arm_smmu_free_cd_tables(smmu_domain);
2464e86d1aa8SWill Deacon 		arm_smmu_free_asid(&cfg->cd);
2465e86d1aa8SWill Deacon 	} else {
2466e86d1aa8SWill Deacon 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2467e86d1aa8SWill Deacon 		if (cfg->vmid)
2468e86d1aa8SWill Deacon 			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
2469e86d1aa8SWill Deacon 	}
2470e86d1aa8SWill Deacon 
2471e86d1aa8SWill Deacon 	kfree(smmu_domain);
2472e86d1aa8SWill Deacon }
2473e86d1aa8SWill Deacon 
2474e86d1aa8SWill Deacon static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2475e86d1aa8SWill Deacon 				       struct arm_smmu_master *master,
2476e86d1aa8SWill Deacon 				       struct io_pgtable_cfg *pgtbl_cfg)
2477e86d1aa8SWill Deacon {
2478e86d1aa8SWill Deacon 	int ret;
2479e86d1aa8SWill Deacon 	u32 asid;
2480e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2481e86d1aa8SWill Deacon 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2482e86d1aa8SWill Deacon 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2483e86d1aa8SWill Deacon 
2484e86d1aa8SWill Deacon 	ret = xa_alloc(&asid_xa, &asid, &cfg->cd,
2485e86d1aa8SWill Deacon 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2486e86d1aa8SWill Deacon 	if (ret)
2487e86d1aa8SWill Deacon 		return ret;
2488e86d1aa8SWill Deacon 
2489e86d1aa8SWill Deacon 	cfg->s1cdmax = master->ssid_bits;
2490e86d1aa8SWill Deacon 
2491e86d1aa8SWill Deacon 	ret = arm_smmu_alloc_cd_tables(smmu_domain);
2492e86d1aa8SWill Deacon 	if (ret)
2493e86d1aa8SWill Deacon 		goto out_free_asid;
2494e86d1aa8SWill Deacon 
2495e86d1aa8SWill Deacon 	cfg->cd.asid	= (u16)asid;
2496e86d1aa8SWill Deacon 	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2497e86d1aa8SWill Deacon 	cfg->cd.tcr	= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2498e86d1aa8SWill Deacon 			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2499e86d1aa8SWill Deacon 			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2500e86d1aa8SWill Deacon 			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2501e86d1aa8SWill Deacon 			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2502e86d1aa8SWill Deacon 			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2503e86d1aa8SWill Deacon 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2504e86d1aa8SWill Deacon 	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
2505e86d1aa8SWill Deacon 
2506e86d1aa8SWill Deacon 	/*
2507e86d1aa8SWill Deacon 	 * Note that this will end up calling arm_smmu_sync_cd() before
2508e86d1aa8SWill Deacon 	 * the master has been added to the devices list for this domain.
2509e86d1aa8SWill Deacon 	 * This isn't an issue because the STE hasn't been installed yet.
2510e86d1aa8SWill Deacon 	 */
2511e86d1aa8SWill Deacon 	ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
2512e86d1aa8SWill Deacon 	if (ret)
2513e86d1aa8SWill Deacon 		goto out_free_cd_tables;
2514e86d1aa8SWill Deacon 
2515e86d1aa8SWill Deacon 	return 0;
2516e86d1aa8SWill Deacon 
2517e86d1aa8SWill Deacon out_free_cd_tables:
2518e86d1aa8SWill Deacon 	arm_smmu_free_cd_tables(smmu_domain);
2519e86d1aa8SWill Deacon out_free_asid:
2520e86d1aa8SWill Deacon 	arm_smmu_free_asid(&cfg->cd);
2521e86d1aa8SWill Deacon 	return ret;
2522e86d1aa8SWill Deacon }
2523e86d1aa8SWill Deacon 
2524e86d1aa8SWill Deacon static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2525e86d1aa8SWill Deacon 				       struct arm_smmu_master *master,
2526e86d1aa8SWill Deacon 				       struct io_pgtable_cfg *pgtbl_cfg)
2527e86d1aa8SWill Deacon {
2528e86d1aa8SWill Deacon 	int vmid;
2529e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2530e86d1aa8SWill Deacon 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2531e86d1aa8SWill Deacon 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2532e86d1aa8SWill Deacon 
2533e86d1aa8SWill Deacon 	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
2534e86d1aa8SWill Deacon 	if (vmid < 0)
2535e86d1aa8SWill Deacon 		return vmid;
2536e86d1aa8SWill Deacon 
2537e86d1aa8SWill Deacon 	vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2538e86d1aa8SWill Deacon 	cfg->vmid	= (u16)vmid;
2539e86d1aa8SWill Deacon 	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2540e86d1aa8SWill Deacon 	cfg->vtcr	= FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2541e86d1aa8SWill Deacon 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2542e86d1aa8SWill Deacon 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2543e86d1aa8SWill Deacon 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2544e86d1aa8SWill Deacon 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2545e86d1aa8SWill Deacon 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2546e86d1aa8SWill Deacon 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2547e86d1aa8SWill Deacon 	return 0;
2548e86d1aa8SWill Deacon }
2549e86d1aa8SWill Deacon 
2550e86d1aa8SWill Deacon static int arm_smmu_domain_finalise(struct iommu_domain *domain,
2551e86d1aa8SWill Deacon 				    struct arm_smmu_master *master)
2552e86d1aa8SWill Deacon {
2553e86d1aa8SWill Deacon 	int ret;
2554e86d1aa8SWill Deacon 	unsigned long ias, oas;
2555e86d1aa8SWill Deacon 	enum io_pgtable_fmt fmt;
2556e86d1aa8SWill Deacon 	struct io_pgtable_cfg pgtbl_cfg;
2557e86d1aa8SWill Deacon 	struct io_pgtable_ops *pgtbl_ops;
2558e86d1aa8SWill Deacon 	int (*finalise_stage_fn)(struct arm_smmu_domain *,
2559e86d1aa8SWill Deacon 				 struct arm_smmu_master *,
2560e86d1aa8SWill Deacon 				 struct io_pgtable_cfg *);
2561e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2562e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2563e86d1aa8SWill Deacon 
2564e86d1aa8SWill Deacon 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2565e86d1aa8SWill Deacon 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2566e86d1aa8SWill Deacon 		return 0;
2567e86d1aa8SWill Deacon 	}
2568e86d1aa8SWill Deacon 
2569e86d1aa8SWill Deacon 	/* Restrict the stage to what we can actually support */
2570e86d1aa8SWill Deacon 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2571e86d1aa8SWill Deacon 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2572e86d1aa8SWill Deacon 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2573e86d1aa8SWill Deacon 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2574e86d1aa8SWill Deacon 
2575e86d1aa8SWill Deacon 	switch (smmu_domain->stage) {
2576e86d1aa8SWill Deacon 	case ARM_SMMU_DOMAIN_S1:
2577e86d1aa8SWill Deacon 		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2578e86d1aa8SWill Deacon 		ias = min_t(unsigned long, ias, VA_BITS);
2579e86d1aa8SWill Deacon 		oas = smmu->ias;
2580e86d1aa8SWill Deacon 		fmt = ARM_64_LPAE_S1;
2581e86d1aa8SWill Deacon 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
2582e86d1aa8SWill Deacon 		break;
2583e86d1aa8SWill Deacon 	case ARM_SMMU_DOMAIN_NESTED:
2584e86d1aa8SWill Deacon 	case ARM_SMMU_DOMAIN_S2:
2585e86d1aa8SWill Deacon 		ias = smmu->ias;
2586e86d1aa8SWill Deacon 		oas = smmu->oas;
2587e86d1aa8SWill Deacon 		fmt = ARM_64_LPAE_S2;
2588e86d1aa8SWill Deacon 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
2589e86d1aa8SWill Deacon 		break;
2590e86d1aa8SWill Deacon 	default:
2591e86d1aa8SWill Deacon 		return -EINVAL;
2592e86d1aa8SWill Deacon 	}
2593e86d1aa8SWill Deacon 
2594e86d1aa8SWill Deacon 	pgtbl_cfg = (struct io_pgtable_cfg) {
2595e86d1aa8SWill Deacon 		.pgsize_bitmap	= smmu->pgsize_bitmap,
2596e86d1aa8SWill Deacon 		.ias		= ias,
2597e86d1aa8SWill Deacon 		.oas		= oas,
2598e86d1aa8SWill Deacon 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
2599e86d1aa8SWill Deacon 		.tlb		= &arm_smmu_flush_ops,
2600e86d1aa8SWill Deacon 		.iommu_dev	= smmu->dev,
2601e86d1aa8SWill Deacon 	};
2602e86d1aa8SWill Deacon 
2603e86d1aa8SWill Deacon 	if (smmu_domain->non_strict)
2604e86d1aa8SWill Deacon 		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
2605e86d1aa8SWill Deacon 
2606e86d1aa8SWill Deacon 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2607e86d1aa8SWill Deacon 	if (!pgtbl_ops)
2608e86d1aa8SWill Deacon 		return -ENOMEM;
2609e86d1aa8SWill Deacon 
2610e86d1aa8SWill Deacon 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2611e86d1aa8SWill Deacon 	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2612e86d1aa8SWill Deacon 	domain->geometry.force_aperture = true;
2613e86d1aa8SWill Deacon 
2614e86d1aa8SWill Deacon 	ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2615e86d1aa8SWill Deacon 	if (ret < 0) {
2616e86d1aa8SWill Deacon 		free_io_pgtable_ops(pgtbl_ops);
2617e86d1aa8SWill Deacon 		return ret;
2618e86d1aa8SWill Deacon 	}
2619e86d1aa8SWill Deacon 
2620e86d1aa8SWill Deacon 	smmu_domain->pgtbl_ops = pgtbl_ops;
2621e86d1aa8SWill Deacon 	return 0;
2622e86d1aa8SWill Deacon }
2623e86d1aa8SWill Deacon 
2624e86d1aa8SWill Deacon static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2625e86d1aa8SWill Deacon {
2626e86d1aa8SWill Deacon 	__le64 *step;
2627e86d1aa8SWill Deacon 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2628e86d1aa8SWill Deacon 
2629e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2630e86d1aa8SWill Deacon 		struct arm_smmu_strtab_l1_desc *l1_desc;
2631e86d1aa8SWill Deacon 		int idx;
2632e86d1aa8SWill Deacon 
2633e86d1aa8SWill Deacon 		/* Two-level walk */
2634e86d1aa8SWill Deacon 		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2635e86d1aa8SWill Deacon 		l1_desc = &cfg->l1_desc[idx];
2636e86d1aa8SWill Deacon 		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2637e86d1aa8SWill Deacon 		step = &l1_desc->l2ptr[idx];
2638e86d1aa8SWill Deacon 	} else {
2639e86d1aa8SWill Deacon 		/* Simple linear lookup */
2640e86d1aa8SWill Deacon 		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2641e86d1aa8SWill Deacon 	}
2642e86d1aa8SWill Deacon 
2643e86d1aa8SWill Deacon 	return step;
2644e86d1aa8SWill Deacon }
2645e86d1aa8SWill Deacon 
2646e86d1aa8SWill Deacon static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2647e86d1aa8SWill Deacon {
2648e86d1aa8SWill Deacon 	int i, j;
2649e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = master->smmu;
2650e86d1aa8SWill Deacon 
2651e86d1aa8SWill Deacon 	for (i = 0; i < master->num_sids; ++i) {
2652e86d1aa8SWill Deacon 		u32 sid = master->sids[i];
2653e86d1aa8SWill Deacon 		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2654e86d1aa8SWill Deacon 
2655e86d1aa8SWill Deacon 		/* Bridged PCI devices may end up with duplicated IDs */
2656e86d1aa8SWill Deacon 		for (j = 0; j < i; j++)
2657e86d1aa8SWill Deacon 			if (master->sids[j] == sid)
2658e86d1aa8SWill Deacon 				break;
2659e86d1aa8SWill Deacon 		if (j < i)
2660e86d1aa8SWill Deacon 			continue;
2661e86d1aa8SWill Deacon 
2662e86d1aa8SWill Deacon 		arm_smmu_write_strtab_ent(master, sid, step);
2663e86d1aa8SWill Deacon 	}
2664e86d1aa8SWill Deacon }
2665e86d1aa8SWill Deacon 
2666e86d1aa8SWill Deacon static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2667e86d1aa8SWill Deacon {
2668e86d1aa8SWill Deacon 	struct device *dev = master->dev;
2669e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = master->smmu;
2670e86d1aa8SWill Deacon 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2671e86d1aa8SWill Deacon 
2672e86d1aa8SWill Deacon 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2673e86d1aa8SWill Deacon 		return false;
2674e86d1aa8SWill Deacon 
2675e86d1aa8SWill Deacon 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2676e86d1aa8SWill Deacon 		return false;
2677e86d1aa8SWill Deacon 
2678e86d1aa8SWill Deacon 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2679e86d1aa8SWill Deacon }
2680e86d1aa8SWill Deacon 
2681e86d1aa8SWill Deacon static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2682e86d1aa8SWill Deacon {
2683e86d1aa8SWill Deacon 	size_t stu;
2684e86d1aa8SWill Deacon 	struct pci_dev *pdev;
2685e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = master->smmu;
2686e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = master->domain;
2687e86d1aa8SWill Deacon 
2688e86d1aa8SWill Deacon 	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2689e86d1aa8SWill Deacon 	if (!master->ats_enabled)
2690e86d1aa8SWill Deacon 		return;
2691e86d1aa8SWill Deacon 
2692e86d1aa8SWill Deacon 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2693e86d1aa8SWill Deacon 	stu = __ffs(smmu->pgsize_bitmap);
2694e86d1aa8SWill Deacon 	pdev = to_pci_dev(master->dev);
2695e86d1aa8SWill Deacon 
2696e86d1aa8SWill Deacon 	atomic_inc(&smmu_domain->nr_ats_masters);
2697e86d1aa8SWill Deacon 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2698e86d1aa8SWill Deacon 	if (pci_enable_ats(pdev, stu))
2699e86d1aa8SWill Deacon 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2700e86d1aa8SWill Deacon }
2701e86d1aa8SWill Deacon 
2702e86d1aa8SWill Deacon static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2703e86d1aa8SWill Deacon {
2704e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = master->domain;
2705e86d1aa8SWill Deacon 
2706e86d1aa8SWill Deacon 	if (!master->ats_enabled)
2707e86d1aa8SWill Deacon 		return;
2708e86d1aa8SWill Deacon 
2709e86d1aa8SWill Deacon 	pci_disable_ats(to_pci_dev(master->dev));
2710e86d1aa8SWill Deacon 	/*
2711e86d1aa8SWill Deacon 	 * Ensure ATS is disabled at the endpoint before we issue the
2712e86d1aa8SWill Deacon 	 * ATC invalidation via the SMMU.
2713e86d1aa8SWill Deacon 	 */
2714e86d1aa8SWill Deacon 	wmb();
2715e86d1aa8SWill Deacon 	arm_smmu_atc_inv_master(master);
2716e86d1aa8SWill Deacon 	atomic_dec(&smmu_domain->nr_ats_masters);
2717e86d1aa8SWill Deacon }
2718e86d1aa8SWill Deacon 
2719e86d1aa8SWill Deacon static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2720e86d1aa8SWill Deacon {
2721e86d1aa8SWill Deacon 	int ret;
2722e86d1aa8SWill Deacon 	int features;
2723e86d1aa8SWill Deacon 	int num_pasids;
2724e86d1aa8SWill Deacon 	struct pci_dev *pdev;
2725e86d1aa8SWill Deacon 
2726e86d1aa8SWill Deacon 	if (!dev_is_pci(master->dev))
2727e86d1aa8SWill Deacon 		return -ENODEV;
2728e86d1aa8SWill Deacon 
2729e86d1aa8SWill Deacon 	pdev = to_pci_dev(master->dev);
2730e86d1aa8SWill Deacon 
2731e86d1aa8SWill Deacon 	features = pci_pasid_features(pdev);
2732e86d1aa8SWill Deacon 	if (features < 0)
2733e86d1aa8SWill Deacon 		return features;
2734e86d1aa8SWill Deacon 
2735e86d1aa8SWill Deacon 	num_pasids = pci_max_pasids(pdev);
2736e86d1aa8SWill Deacon 	if (num_pasids <= 0)
2737e86d1aa8SWill Deacon 		return num_pasids;
2738e86d1aa8SWill Deacon 
2739e86d1aa8SWill Deacon 	ret = pci_enable_pasid(pdev, features);
2740e86d1aa8SWill Deacon 	if (ret) {
2741e86d1aa8SWill Deacon 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2742e86d1aa8SWill Deacon 		return ret;
2743e86d1aa8SWill Deacon 	}
2744e86d1aa8SWill Deacon 
2745e86d1aa8SWill Deacon 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2746e86d1aa8SWill Deacon 				  master->smmu->ssid_bits);
2747e86d1aa8SWill Deacon 	return 0;
2748e86d1aa8SWill Deacon }
2749e86d1aa8SWill Deacon 
2750e86d1aa8SWill Deacon static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2751e86d1aa8SWill Deacon {
2752e86d1aa8SWill Deacon 	struct pci_dev *pdev;
2753e86d1aa8SWill Deacon 
2754e86d1aa8SWill Deacon 	if (!dev_is_pci(master->dev))
2755e86d1aa8SWill Deacon 		return;
2756e86d1aa8SWill Deacon 
2757e86d1aa8SWill Deacon 	pdev = to_pci_dev(master->dev);
2758e86d1aa8SWill Deacon 
2759e86d1aa8SWill Deacon 	if (!pdev->pasid_enabled)
2760e86d1aa8SWill Deacon 		return;
2761e86d1aa8SWill Deacon 
2762e86d1aa8SWill Deacon 	master->ssid_bits = 0;
2763e86d1aa8SWill Deacon 	pci_disable_pasid(pdev);
2764e86d1aa8SWill Deacon }
2765e86d1aa8SWill Deacon 
2766e86d1aa8SWill Deacon static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2767e86d1aa8SWill Deacon {
2768e86d1aa8SWill Deacon 	unsigned long flags;
2769e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = master->domain;
2770e86d1aa8SWill Deacon 
2771e86d1aa8SWill Deacon 	if (!smmu_domain)
2772e86d1aa8SWill Deacon 		return;
2773e86d1aa8SWill Deacon 
2774e86d1aa8SWill Deacon 	arm_smmu_disable_ats(master);
2775e86d1aa8SWill Deacon 
2776e86d1aa8SWill Deacon 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2777e86d1aa8SWill Deacon 	list_del(&master->domain_head);
2778e86d1aa8SWill Deacon 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2779e86d1aa8SWill Deacon 
2780e86d1aa8SWill Deacon 	master->domain = NULL;
2781e86d1aa8SWill Deacon 	master->ats_enabled = false;
2782e86d1aa8SWill Deacon 	arm_smmu_install_ste_for_dev(master);
2783e86d1aa8SWill Deacon }
2784e86d1aa8SWill Deacon 
2785e86d1aa8SWill Deacon static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2786e86d1aa8SWill Deacon {
2787e86d1aa8SWill Deacon 	int ret = 0;
2788e86d1aa8SWill Deacon 	unsigned long flags;
2789e86d1aa8SWill Deacon 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2790e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu;
2791e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2792e86d1aa8SWill Deacon 	struct arm_smmu_master *master;
2793e86d1aa8SWill Deacon 
2794e86d1aa8SWill Deacon 	if (!fwspec)
2795e86d1aa8SWill Deacon 		return -ENOENT;
2796e86d1aa8SWill Deacon 
2797e86d1aa8SWill Deacon 	master = dev_iommu_priv_get(dev);
2798e86d1aa8SWill Deacon 	smmu = master->smmu;
2799e86d1aa8SWill Deacon 
2800e86d1aa8SWill Deacon 	arm_smmu_detach_dev(master);
2801e86d1aa8SWill Deacon 
2802e86d1aa8SWill Deacon 	mutex_lock(&smmu_domain->init_mutex);
2803e86d1aa8SWill Deacon 
2804e86d1aa8SWill Deacon 	if (!smmu_domain->smmu) {
2805e86d1aa8SWill Deacon 		smmu_domain->smmu = smmu;
2806e86d1aa8SWill Deacon 		ret = arm_smmu_domain_finalise(domain, master);
2807e86d1aa8SWill Deacon 		if (ret) {
2808e86d1aa8SWill Deacon 			smmu_domain->smmu = NULL;
2809e86d1aa8SWill Deacon 			goto out_unlock;
2810e86d1aa8SWill Deacon 		}
2811e86d1aa8SWill Deacon 	} else if (smmu_domain->smmu != smmu) {
2812e86d1aa8SWill Deacon 		dev_err(dev,
2813e86d1aa8SWill Deacon 			"cannot attach to SMMU %s (upstream of %s)\n",
2814e86d1aa8SWill Deacon 			dev_name(smmu_domain->smmu->dev),
2815e86d1aa8SWill Deacon 			dev_name(smmu->dev));
2816e86d1aa8SWill Deacon 		ret = -ENXIO;
2817e86d1aa8SWill Deacon 		goto out_unlock;
2818e86d1aa8SWill Deacon 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2819e86d1aa8SWill Deacon 		   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2820e86d1aa8SWill Deacon 		dev_err(dev,
2821e86d1aa8SWill Deacon 			"cannot attach to incompatible domain (%u SSID bits != %u)\n",
2822e86d1aa8SWill Deacon 			smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2823e86d1aa8SWill Deacon 		ret = -EINVAL;
2824e86d1aa8SWill Deacon 		goto out_unlock;
2825e86d1aa8SWill Deacon 	}
2826e86d1aa8SWill Deacon 
2827e86d1aa8SWill Deacon 	master->domain = smmu_domain;
2828e86d1aa8SWill Deacon 
2829e86d1aa8SWill Deacon 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2830e86d1aa8SWill Deacon 		master->ats_enabled = arm_smmu_ats_supported(master);
2831e86d1aa8SWill Deacon 
2832e86d1aa8SWill Deacon 	arm_smmu_install_ste_for_dev(master);
2833e86d1aa8SWill Deacon 
2834e86d1aa8SWill Deacon 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2835e86d1aa8SWill Deacon 	list_add(&master->domain_head, &smmu_domain->devices);
2836e86d1aa8SWill Deacon 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2837e86d1aa8SWill Deacon 
2838e86d1aa8SWill Deacon 	arm_smmu_enable_ats(master);
2839e86d1aa8SWill Deacon 
2840e86d1aa8SWill Deacon out_unlock:
2841e86d1aa8SWill Deacon 	mutex_unlock(&smmu_domain->init_mutex);
2842e86d1aa8SWill Deacon 	return ret;
2843e86d1aa8SWill Deacon }
2844e86d1aa8SWill Deacon 
2845e86d1aa8SWill Deacon static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
2846e86d1aa8SWill Deacon 			phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2847e86d1aa8SWill Deacon {
2848e86d1aa8SWill Deacon 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2849e86d1aa8SWill Deacon 
2850e86d1aa8SWill Deacon 	if (!ops)
2851e86d1aa8SWill Deacon 		return -ENODEV;
2852e86d1aa8SWill Deacon 
2853*e46b3c0dSJoerg Roedel 	return ops->map(ops, iova, paddr, size, prot, gfp);
2854e86d1aa8SWill Deacon }
2855e86d1aa8SWill Deacon 
2856e86d1aa8SWill Deacon static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
2857e86d1aa8SWill Deacon 			     size_t size, struct iommu_iotlb_gather *gather)
2858e86d1aa8SWill Deacon {
2859e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2860e86d1aa8SWill Deacon 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2861e86d1aa8SWill Deacon 
2862e86d1aa8SWill Deacon 	if (!ops)
2863e86d1aa8SWill Deacon 		return 0;
2864e86d1aa8SWill Deacon 
2865e86d1aa8SWill Deacon 	return ops->unmap(ops, iova, size, gather);
2866e86d1aa8SWill Deacon }
2867e86d1aa8SWill Deacon 
2868e86d1aa8SWill Deacon static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2869e86d1aa8SWill Deacon {
2870e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2871e86d1aa8SWill Deacon 
2872e86d1aa8SWill Deacon 	if (smmu_domain->smmu)
2873e86d1aa8SWill Deacon 		arm_smmu_tlb_inv_context(smmu_domain);
2874e86d1aa8SWill Deacon }
2875e86d1aa8SWill Deacon 
2876e86d1aa8SWill Deacon static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2877e86d1aa8SWill Deacon 				struct iommu_iotlb_gather *gather)
2878e86d1aa8SWill Deacon {
2879e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2880e86d1aa8SWill Deacon 
2881e86d1aa8SWill Deacon 	arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start,
2882e86d1aa8SWill Deacon 			       gather->pgsize, true, smmu_domain);
2883e86d1aa8SWill Deacon }
2884e86d1aa8SWill Deacon 
2885e86d1aa8SWill Deacon static phys_addr_t
2886e86d1aa8SWill Deacon arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2887e86d1aa8SWill Deacon {
2888e86d1aa8SWill Deacon 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2889e86d1aa8SWill Deacon 
2890e86d1aa8SWill Deacon 	if (domain->type == IOMMU_DOMAIN_IDENTITY)
2891e86d1aa8SWill Deacon 		return iova;
2892e86d1aa8SWill Deacon 
2893e86d1aa8SWill Deacon 	if (!ops)
2894e86d1aa8SWill Deacon 		return 0;
2895e86d1aa8SWill Deacon 
2896e86d1aa8SWill Deacon 	return ops->iova_to_phys(ops, iova);
2897e86d1aa8SWill Deacon }
2898e86d1aa8SWill Deacon 
2899e86d1aa8SWill Deacon static struct platform_driver arm_smmu_driver;
2900e86d1aa8SWill Deacon 
2901e86d1aa8SWill Deacon static
2902e86d1aa8SWill Deacon struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2903e86d1aa8SWill Deacon {
2904e86d1aa8SWill Deacon 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2905e86d1aa8SWill Deacon 							  fwnode);
2906e86d1aa8SWill Deacon 	put_device(dev);
2907e86d1aa8SWill Deacon 	return dev ? dev_get_drvdata(dev) : NULL;
2908e86d1aa8SWill Deacon }
2909e86d1aa8SWill Deacon 
2910e86d1aa8SWill Deacon static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2911e86d1aa8SWill Deacon {
2912e86d1aa8SWill Deacon 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2913e86d1aa8SWill Deacon 
2914e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2915e86d1aa8SWill Deacon 		limit *= 1UL << STRTAB_SPLIT;
2916e86d1aa8SWill Deacon 
2917e86d1aa8SWill Deacon 	return sid < limit;
2918e86d1aa8SWill Deacon }
2919e86d1aa8SWill Deacon 
2920e86d1aa8SWill Deacon static struct iommu_ops arm_smmu_ops;
2921e86d1aa8SWill Deacon 
2922e86d1aa8SWill Deacon static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2923e86d1aa8SWill Deacon {
2924e86d1aa8SWill Deacon 	int i, ret;
2925e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu;
2926e86d1aa8SWill Deacon 	struct arm_smmu_master *master;
2927e86d1aa8SWill Deacon 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2928e86d1aa8SWill Deacon 
2929e86d1aa8SWill Deacon 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2930e86d1aa8SWill Deacon 		return ERR_PTR(-ENODEV);
2931e86d1aa8SWill Deacon 
2932e86d1aa8SWill Deacon 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2933e86d1aa8SWill Deacon 		return ERR_PTR(-EBUSY);
2934e86d1aa8SWill Deacon 
2935e86d1aa8SWill Deacon 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2936e86d1aa8SWill Deacon 	if (!smmu)
2937e86d1aa8SWill Deacon 		return ERR_PTR(-ENODEV);
2938e86d1aa8SWill Deacon 
2939e86d1aa8SWill Deacon 	master = kzalloc(sizeof(*master), GFP_KERNEL);
2940e86d1aa8SWill Deacon 	if (!master)
2941e86d1aa8SWill Deacon 		return ERR_PTR(-ENOMEM);
2942e86d1aa8SWill Deacon 
2943e86d1aa8SWill Deacon 	master->dev = dev;
2944e86d1aa8SWill Deacon 	master->smmu = smmu;
2945e86d1aa8SWill Deacon 	master->sids = fwspec->ids;
2946e86d1aa8SWill Deacon 	master->num_sids = fwspec->num_ids;
2947e86d1aa8SWill Deacon 	dev_iommu_priv_set(dev, master);
2948e86d1aa8SWill Deacon 
2949e86d1aa8SWill Deacon 	/* Check the SIDs are in range of the SMMU and our stream table */
2950e86d1aa8SWill Deacon 	for (i = 0; i < master->num_sids; i++) {
2951e86d1aa8SWill Deacon 		u32 sid = master->sids[i];
2952e86d1aa8SWill Deacon 
2953e86d1aa8SWill Deacon 		if (!arm_smmu_sid_in_range(smmu, sid)) {
2954e86d1aa8SWill Deacon 			ret = -ERANGE;
2955e86d1aa8SWill Deacon 			goto err_free_master;
2956e86d1aa8SWill Deacon 		}
2957e86d1aa8SWill Deacon 
2958e86d1aa8SWill Deacon 		/* Ensure l2 strtab is initialised */
2959e86d1aa8SWill Deacon 		if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2960e86d1aa8SWill Deacon 			ret = arm_smmu_init_l2_strtab(smmu, sid);
2961e86d1aa8SWill Deacon 			if (ret)
2962e86d1aa8SWill Deacon 				goto err_free_master;
2963e86d1aa8SWill Deacon 		}
2964e86d1aa8SWill Deacon 	}
2965e86d1aa8SWill Deacon 
2966e86d1aa8SWill Deacon 	master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
2967e86d1aa8SWill Deacon 
2968e86d1aa8SWill Deacon 	/*
2969e86d1aa8SWill Deacon 	 * Note that PASID must be enabled before, and disabled after ATS:
2970e86d1aa8SWill Deacon 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2971e86d1aa8SWill Deacon 	 *
2972e86d1aa8SWill Deacon 	 *   Behavior is undefined if this bit is Set and the value of the PASID
2973e86d1aa8SWill Deacon 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2974e86d1aa8SWill Deacon 	 *   are changed.
2975e86d1aa8SWill Deacon 	 */
2976e86d1aa8SWill Deacon 	arm_smmu_enable_pasid(master);
2977e86d1aa8SWill Deacon 
2978e86d1aa8SWill Deacon 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2979e86d1aa8SWill Deacon 		master->ssid_bits = min_t(u8, master->ssid_bits,
2980e86d1aa8SWill Deacon 					  CTXDESC_LINEAR_CDMAX);
2981e86d1aa8SWill Deacon 
2982e86d1aa8SWill Deacon 	return &smmu->iommu;
2983e86d1aa8SWill Deacon 
2984e86d1aa8SWill Deacon err_free_master:
2985e86d1aa8SWill Deacon 	kfree(master);
2986e86d1aa8SWill Deacon 	dev_iommu_priv_set(dev, NULL);
2987e86d1aa8SWill Deacon 	return ERR_PTR(ret);
2988e86d1aa8SWill Deacon }
2989e86d1aa8SWill Deacon 
2990e86d1aa8SWill Deacon static void arm_smmu_release_device(struct device *dev)
2991e86d1aa8SWill Deacon {
2992e86d1aa8SWill Deacon 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2993e86d1aa8SWill Deacon 	struct arm_smmu_master *master;
2994e86d1aa8SWill Deacon 
2995e86d1aa8SWill Deacon 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2996e86d1aa8SWill Deacon 		return;
2997e86d1aa8SWill Deacon 
2998e86d1aa8SWill Deacon 	master = dev_iommu_priv_get(dev);
2999e86d1aa8SWill Deacon 	arm_smmu_detach_dev(master);
3000e86d1aa8SWill Deacon 	arm_smmu_disable_pasid(master);
3001e86d1aa8SWill Deacon 	kfree(master);
3002e86d1aa8SWill Deacon 	iommu_fwspec_free(dev);
3003e86d1aa8SWill Deacon }
3004e86d1aa8SWill Deacon 
3005e86d1aa8SWill Deacon static struct iommu_group *arm_smmu_device_group(struct device *dev)
3006e86d1aa8SWill Deacon {
3007e86d1aa8SWill Deacon 	struct iommu_group *group;
3008e86d1aa8SWill Deacon 
3009e86d1aa8SWill Deacon 	/*
3010e86d1aa8SWill Deacon 	 * We don't support devices sharing stream IDs other than PCI RID
3011e86d1aa8SWill Deacon 	 * aliases, since the necessary ID-to-device lookup becomes rather
3012e86d1aa8SWill Deacon 	 * impractical given a potential sparse 32-bit stream ID space.
3013e86d1aa8SWill Deacon 	 */
3014e86d1aa8SWill Deacon 	if (dev_is_pci(dev))
3015e86d1aa8SWill Deacon 		group = pci_device_group(dev);
3016e86d1aa8SWill Deacon 	else
3017e86d1aa8SWill Deacon 		group = generic_device_group(dev);
3018e86d1aa8SWill Deacon 
3019e86d1aa8SWill Deacon 	return group;
3020e86d1aa8SWill Deacon }
3021e86d1aa8SWill Deacon 
3022e86d1aa8SWill Deacon static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
3023e86d1aa8SWill Deacon 				    enum iommu_attr attr, void *data)
3024e86d1aa8SWill Deacon {
3025e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3026e86d1aa8SWill Deacon 
3027e86d1aa8SWill Deacon 	switch (domain->type) {
3028e86d1aa8SWill Deacon 	case IOMMU_DOMAIN_UNMANAGED:
3029e86d1aa8SWill Deacon 		switch (attr) {
3030e86d1aa8SWill Deacon 		case DOMAIN_ATTR_NESTING:
3031e86d1aa8SWill Deacon 			*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
3032e86d1aa8SWill Deacon 			return 0;
3033e86d1aa8SWill Deacon 		default:
3034e86d1aa8SWill Deacon 			return -ENODEV;
3035e86d1aa8SWill Deacon 		}
3036e86d1aa8SWill Deacon 		break;
3037e86d1aa8SWill Deacon 	case IOMMU_DOMAIN_DMA:
3038e86d1aa8SWill Deacon 		switch (attr) {
3039e86d1aa8SWill Deacon 		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
3040e86d1aa8SWill Deacon 			*(int *)data = smmu_domain->non_strict;
3041e86d1aa8SWill Deacon 			return 0;
3042e86d1aa8SWill Deacon 		default:
3043e86d1aa8SWill Deacon 			return -ENODEV;
3044e86d1aa8SWill Deacon 		}
3045e86d1aa8SWill Deacon 		break;
3046e86d1aa8SWill Deacon 	default:
3047e86d1aa8SWill Deacon 		return -EINVAL;
3048e86d1aa8SWill Deacon 	}
3049e86d1aa8SWill Deacon }
3050e86d1aa8SWill Deacon 
3051e86d1aa8SWill Deacon static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
3052e86d1aa8SWill Deacon 				    enum iommu_attr attr, void *data)
3053e86d1aa8SWill Deacon {
3054e86d1aa8SWill Deacon 	int ret = 0;
3055e86d1aa8SWill Deacon 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3056e86d1aa8SWill Deacon 
3057e86d1aa8SWill Deacon 	mutex_lock(&smmu_domain->init_mutex);
3058e86d1aa8SWill Deacon 
3059e86d1aa8SWill Deacon 	switch (domain->type) {
3060e86d1aa8SWill Deacon 	case IOMMU_DOMAIN_UNMANAGED:
3061e86d1aa8SWill Deacon 		switch (attr) {
3062e86d1aa8SWill Deacon 		case DOMAIN_ATTR_NESTING:
3063e86d1aa8SWill Deacon 			if (smmu_domain->smmu) {
3064e86d1aa8SWill Deacon 				ret = -EPERM;
3065e86d1aa8SWill Deacon 				goto out_unlock;
3066e86d1aa8SWill Deacon 			}
3067e86d1aa8SWill Deacon 
3068e86d1aa8SWill Deacon 			if (*(int *)data)
3069e86d1aa8SWill Deacon 				smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
3070e86d1aa8SWill Deacon 			else
3071e86d1aa8SWill Deacon 				smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
3072e86d1aa8SWill Deacon 			break;
3073e86d1aa8SWill Deacon 		default:
3074e86d1aa8SWill Deacon 			ret = -ENODEV;
3075e86d1aa8SWill Deacon 		}
3076e86d1aa8SWill Deacon 		break;
3077e86d1aa8SWill Deacon 	case IOMMU_DOMAIN_DMA:
3078e86d1aa8SWill Deacon 		switch(attr) {
3079e86d1aa8SWill Deacon 		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
3080e86d1aa8SWill Deacon 			smmu_domain->non_strict = *(int *)data;
3081e86d1aa8SWill Deacon 			break;
3082e86d1aa8SWill Deacon 		default:
3083e86d1aa8SWill Deacon 			ret = -ENODEV;
3084e86d1aa8SWill Deacon 		}
3085e86d1aa8SWill Deacon 		break;
3086e86d1aa8SWill Deacon 	default:
3087e86d1aa8SWill Deacon 		ret = -EINVAL;
3088e86d1aa8SWill Deacon 	}
3089e86d1aa8SWill Deacon 
3090e86d1aa8SWill Deacon out_unlock:
3091e86d1aa8SWill Deacon 	mutex_unlock(&smmu_domain->init_mutex);
3092e86d1aa8SWill Deacon 	return ret;
3093e86d1aa8SWill Deacon }
3094e86d1aa8SWill Deacon 
3095e86d1aa8SWill Deacon static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
3096e86d1aa8SWill Deacon {
3097e86d1aa8SWill Deacon 	return iommu_fwspec_add_ids(dev, args->args, 1);
3098e86d1aa8SWill Deacon }
3099e86d1aa8SWill Deacon 
3100e86d1aa8SWill Deacon static void arm_smmu_get_resv_regions(struct device *dev,
3101e86d1aa8SWill Deacon 				      struct list_head *head)
3102e86d1aa8SWill Deacon {
3103e86d1aa8SWill Deacon 	struct iommu_resv_region *region;
3104e86d1aa8SWill Deacon 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
3105e86d1aa8SWill Deacon 
3106e86d1aa8SWill Deacon 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
3107e86d1aa8SWill Deacon 					 prot, IOMMU_RESV_SW_MSI);
3108e86d1aa8SWill Deacon 	if (!region)
3109e86d1aa8SWill Deacon 		return;
3110e86d1aa8SWill Deacon 
3111e86d1aa8SWill Deacon 	list_add_tail(&region->list, head);
3112e86d1aa8SWill Deacon 
3113e86d1aa8SWill Deacon 	iommu_dma_get_resv_regions(dev, head);
3114e86d1aa8SWill Deacon }
3115e86d1aa8SWill Deacon 
3116e86d1aa8SWill Deacon static struct iommu_ops arm_smmu_ops = {
3117e86d1aa8SWill Deacon 	.capable		= arm_smmu_capable,
3118e86d1aa8SWill Deacon 	.domain_alloc		= arm_smmu_domain_alloc,
3119e86d1aa8SWill Deacon 	.domain_free		= arm_smmu_domain_free,
3120e86d1aa8SWill Deacon 	.attach_dev		= arm_smmu_attach_dev,
3121e86d1aa8SWill Deacon 	.map			= arm_smmu_map,
3122e86d1aa8SWill Deacon 	.unmap			= arm_smmu_unmap,
3123e86d1aa8SWill Deacon 	.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
3124e86d1aa8SWill Deacon 	.iotlb_sync		= arm_smmu_iotlb_sync,
3125e86d1aa8SWill Deacon 	.iova_to_phys		= arm_smmu_iova_to_phys,
3126e86d1aa8SWill Deacon 	.probe_device		= arm_smmu_probe_device,
3127e86d1aa8SWill Deacon 	.release_device		= arm_smmu_release_device,
3128e86d1aa8SWill Deacon 	.device_group		= arm_smmu_device_group,
3129e86d1aa8SWill Deacon 	.domain_get_attr	= arm_smmu_domain_get_attr,
3130e86d1aa8SWill Deacon 	.domain_set_attr	= arm_smmu_domain_set_attr,
3131e86d1aa8SWill Deacon 	.of_xlate		= arm_smmu_of_xlate,
3132e86d1aa8SWill Deacon 	.get_resv_regions	= arm_smmu_get_resv_regions,
3133e86d1aa8SWill Deacon 	.put_resv_regions	= generic_iommu_put_resv_regions,
3134e86d1aa8SWill Deacon 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
3135e86d1aa8SWill Deacon };
3136e86d1aa8SWill Deacon 
3137e86d1aa8SWill Deacon /* Probing and initialisation functions */
3138e86d1aa8SWill Deacon static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
3139e86d1aa8SWill Deacon 				   struct arm_smmu_queue *q,
3140e86d1aa8SWill Deacon 				   unsigned long prod_off,
3141e86d1aa8SWill Deacon 				   unsigned long cons_off,
3142e86d1aa8SWill Deacon 				   size_t dwords, const char *name)
3143e86d1aa8SWill Deacon {
3144e86d1aa8SWill Deacon 	size_t qsz;
3145e86d1aa8SWill Deacon 
3146e86d1aa8SWill Deacon 	do {
3147e86d1aa8SWill Deacon 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
3148e86d1aa8SWill Deacon 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
3149e86d1aa8SWill Deacon 					      GFP_KERNEL);
3150e86d1aa8SWill Deacon 		if (q->base || qsz < PAGE_SIZE)
3151e86d1aa8SWill Deacon 			break;
3152e86d1aa8SWill Deacon 
3153e86d1aa8SWill Deacon 		q->llq.max_n_shift--;
3154e86d1aa8SWill Deacon 	} while (1);
3155e86d1aa8SWill Deacon 
3156e86d1aa8SWill Deacon 	if (!q->base) {
3157e86d1aa8SWill Deacon 		dev_err(smmu->dev,
3158e86d1aa8SWill Deacon 			"failed to allocate queue (0x%zx bytes) for %s\n",
3159e86d1aa8SWill Deacon 			qsz, name);
3160e86d1aa8SWill Deacon 		return -ENOMEM;
3161e86d1aa8SWill Deacon 	}
3162e86d1aa8SWill Deacon 
3163e86d1aa8SWill Deacon 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
3164e86d1aa8SWill Deacon 		dev_info(smmu->dev, "allocated %u entries for %s\n",
3165e86d1aa8SWill Deacon 			 1 << q->llq.max_n_shift, name);
3166e86d1aa8SWill Deacon 	}
3167e86d1aa8SWill Deacon 
3168e86d1aa8SWill Deacon 	q->prod_reg	= arm_smmu_page1_fixup(prod_off, smmu);
3169e86d1aa8SWill Deacon 	q->cons_reg	= arm_smmu_page1_fixup(cons_off, smmu);
3170e86d1aa8SWill Deacon 	q->ent_dwords	= dwords;
3171e86d1aa8SWill Deacon 
3172e86d1aa8SWill Deacon 	q->q_base  = Q_BASE_RWA;
3173e86d1aa8SWill Deacon 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
3174e86d1aa8SWill Deacon 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
3175e86d1aa8SWill Deacon 
3176e86d1aa8SWill Deacon 	q->llq.prod = q->llq.cons = 0;
3177e86d1aa8SWill Deacon 	return 0;
3178e86d1aa8SWill Deacon }
3179e86d1aa8SWill Deacon 
3180e86d1aa8SWill Deacon static void arm_smmu_cmdq_free_bitmap(void *data)
3181e86d1aa8SWill Deacon {
3182e86d1aa8SWill Deacon 	unsigned long *bitmap = data;
3183e86d1aa8SWill Deacon 	bitmap_free(bitmap);
3184e86d1aa8SWill Deacon }
3185e86d1aa8SWill Deacon 
3186e86d1aa8SWill Deacon static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
3187e86d1aa8SWill Deacon {
3188e86d1aa8SWill Deacon 	int ret = 0;
3189e86d1aa8SWill Deacon 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
3190e86d1aa8SWill Deacon 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
3191e86d1aa8SWill Deacon 	atomic_long_t *bitmap;
3192e86d1aa8SWill Deacon 
3193e86d1aa8SWill Deacon 	atomic_set(&cmdq->owner_prod, 0);
3194e86d1aa8SWill Deacon 	atomic_set(&cmdq->lock, 0);
3195e86d1aa8SWill Deacon 
3196e86d1aa8SWill Deacon 	bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
3197e86d1aa8SWill Deacon 	if (!bitmap) {
3198e86d1aa8SWill Deacon 		dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
3199e86d1aa8SWill Deacon 		ret = -ENOMEM;
3200e86d1aa8SWill Deacon 	} else {
3201e86d1aa8SWill Deacon 		cmdq->valid_map = bitmap;
3202e86d1aa8SWill Deacon 		devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
3203e86d1aa8SWill Deacon 	}
3204e86d1aa8SWill Deacon 
3205e86d1aa8SWill Deacon 	return ret;
3206e86d1aa8SWill Deacon }
3207e86d1aa8SWill Deacon 
3208e86d1aa8SWill Deacon static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
3209e86d1aa8SWill Deacon {
3210e86d1aa8SWill Deacon 	int ret;
3211e86d1aa8SWill Deacon 
3212e86d1aa8SWill Deacon 	/* cmdq */
3213e86d1aa8SWill Deacon 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
3214e86d1aa8SWill Deacon 				      ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS,
3215e86d1aa8SWill Deacon 				      "cmdq");
3216e86d1aa8SWill Deacon 	if (ret)
3217e86d1aa8SWill Deacon 		return ret;
3218e86d1aa8SWill Deacon 
3219e86d1aa8SWill Deacon 	ret = arm_smmu_cmdq_init(smmu);
3220e86d1aa8SWill Deacon 	if (ret)
3221e86d1aa8SWill Deacon 		return ret;
3222e86d1aa8SWill Deacon 
3223e86d1aa8SWill Deacon 	/* evtq */
3224e86d1aa8SWill Deacon 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
3225e86d1aa8SWill Deacon 				      ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS,
3226e86d1aa8SWill Deacon 				      "evtq");
3227e86d1aa8SWill Deacon 	if (ret)
3228e86d1aa8SWill Deacon 		return ret;
3229e86d1aa8SWill Deacon 
3230e86d1aa8SWill Deacon 	/* priq */
3231e86d1aa8SWill Deacon 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
3232e86d1aa8SWill Deacon 		return 0;
3233e86d1aa8SWill Deacon 
3234e86d1aa8SWill Deacon 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
3235e86d1aa8SWill Deacon 				       ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS,
3236e86d1aa8SWill Deacon 				       "priq");
3237e86d1aa8SWill Deacon }
3238e86d1aa8SWill Deacon 
3239e86d1aa8SWill Deacon static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
3240e86d1aa8SWill Deacon {
3241e86d1aa8SWill Deacon 	unsigned int i;
3242e86d1aa8SWill Deacon 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3243e86d1aa8SWill Deacon 	size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
3244e86d1aa8SWill Deacon 	void *strtab = smmu->strtab_cfg.strtab;
3245e86d1aa8SWill Deacon 
3246e86d1aa8SWill Deacon 	cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
3247e86d1aa8SWill Deacon 	if (!cfg->l1_desc) {
3248e86d1aa8SWill Deacon 		dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
3249e86d1aa8SWill Deacon 		return -ENOMEM;
3250e86d1aa8SWill Deacon 	}
3251e86d1aa8SWill Deacon 
3252e86d1aa8SWill Deacon 	for (i = 0; i < cfg->num_l1_ents; ++i) {
3253e86d1aa8SWill Deacon 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
3254e86d1aa8SWill Deacon 		strtab += STRTAB_L1_DESC_DWORDS << 3;
3255e86d1aa8SWill Deacon 	}
3256e86d1aa8SWill Deacon 
3257e86d1aa8SWill Deacon 	return 0;
3258e86d1aa8SWill Deacon }
3259e86d1aa8SWill Deacon 
3260e86d1aa8SWill Deacon static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
3261e86d1aa8SWill Deacon {
3262e86d1aa8SWill Deacon 	void *strtab;
3263e86d1aa8SWill Deacon 	u64 reg;
3264e86d1aa8SWill Deacon 	u32 size, l1size;
3265e86d1aa8SWill Deacon 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3266e86d1aa8SWill Deacon 
3267e86d1aa8SWill Deacon 	/* Calculate the L1 size, capped to the SIDSIZE. */
3268e86d1aa8SWill Deacon 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
3269e86d1aa8SWill Deacon 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
3270e86d1aa8SWill Deacon 	cfg->num_l1_ents = 1 << size;
3271e86d1aa8SWill Deacon 
3272e86d1aa8SWill Deacon 	size += STRTAB_SPLIT;
3273e86d1aa8SWill Deacon 	if (size < smmu->sid_bits)
3274e86d1aa8SWill Deacon 		dev_warn(smmu->dev,
3275e86d1aa8SWill Deacon 			 "2-level strtab only covers %u/%u bits of SID\n",
3276e86d1aa8SWill Deacon 			 size, smmu->sid_bits);
3277e86d1aa8SWill Deacon 
3278e86d1aa8SWill Deacon 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
3279e86d1aa8SWill Deacon 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3280e86d1aa8SWill Deacon 				     GFP_KERNEL);
3281e86d1aa8SWill Deacon 	if (!strtab) {
3282e86d1aa8SWill Deacon 		dev_err(smmu->dev,
3283e86d1aa8SWill Deacon 			"failed to allocate l1 stream table (%u bytes)\n",
3284e86d1aa8SWill Deacon 			size);
3285e86d1aa8SWill Deacon 		return -ENOMEM;
3286e86d1aa8SWill Deacon 	}
3287e86d1aa8SWill Deacon 	cfg->strtab = strtab;
3288e86d1aa8SWill Deacon 
3289e86d1aa8SWill Deacon 	/* Configure strtab_base_cfg for 2 levels */
3290e86d1aa8SWill Deacon 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3291e86d1aa8SWill Deacon 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3292e86d1aa8SWill Deacon 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3293e86d1aa8SWill Deacon 	cfg->strtab_base_cfg = reg;
3294e86d1aa8SWill Deacon 
3295e86d1aa8SWill Deacon 	return arm_smmu_init_l1_strtab(smmu);
3296e86d1aa8SWill Deacon }
3297e86d1aa8SWill Deacon 
3298e86d1aa8SWill Deacon static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3299e86d1aa8SWill Deacon {
3300e86d1aa8SWill Deacon 	void *strtab;
3301e86d1aa8SWill Deacon 	u64 reg;
3302e86d1aa8SWill Deacon 	u32 size;
3303e86d1aa8SWill Deacon 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3304e86d1aa8SWill Deacon 
3305e86d1aa8SWill Deacon 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3306e86d1aa8SWill Deacon 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3307e86d1aa8SWill Deacon 				     GFP_KERNEL);
3308e86d1aa8SWill Deacon 	if (!strtab) {
3309e86d1aa8SWill Deacon 		dev_err(smmu->dev,
3310e86d1aa8SWill Deacon 			"failed to allocate linear stream table (%u bytes)\n",
3311e86d1aa8SWill Deacon 			size);
3312e86d1aa8SWill Deacon 		return -ENOMEM;
3313e86d1aa8SWill Deacon 	}
3314e86d1aa8SWill Deacon 	cfg->strtab = strtab;
3315e86d1aa8SWill Deacon 	cfg->num_l1_ents = 1 << smmu->sid_bits;
3316e86d1aa8SWill Deacon 
3317e86d1aa8SWill Deacon 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
3318e86d1aa8SWill Deacon 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3319e86d1aa8SWill Deacon 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3320e86d1aa8SWill Deacon 	cfg->strtab_base_cfg = reg;
3321e86d1aa8SWill Deacon 
3322e86d1aa8SWill Deacon 	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
3323e86d1aa8SWill Deacon 	return 0;
3324e86d1aa8SWill Deacon }
3325e86d1aa8SWill Deacon 
3326e86d1aa8SWill Deacon static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3327e86d1aa8SWill Deacon {
3328e86d1aa8SWill Deacon 	u64 reg;
3329e86d1aa8SWill Deacon 	int ret;
3330e86d1aa8SWill Deacon 
3331e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3332e86d1aa8SWill Deacon 		ret = arm_smmu_init_strtab_2lvl(smmu);
3333e86d1aa8SWill Deacon 	else
3334e86d1aa8SWill Deacon 		ret = arm_smmu_init_strtab_linear(smmu);
3335e86d1aa8SWill Deacon 
3336e86d1aa8SWill Deacon 	if (ret)
3337e86d1aa8SWill Deacon 		return ret;
3338e86d1aa8SWill Deacon 
3339e86d1aa8SWill Deacon 	/* Set the strtab base address */
3340e86d1aa8SWill Deacon 	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3341e86d1aa8SWill Deacon 	reg |= STRTAB_BASE_RA;
3342e86d1aa8SWill Deacon 	smmu->strtab_cfg.strtab_base = reg;
3343e86d1aa8SWill Deacon 
3344e86d1aa8SWill Deacon 	/* Allocate the first VMID for stage-2 bypass STEs */
3345e86d1aa8SWill Deacon 	set_bit(0, smmu->vmid_map);
3346e86d1aa8SWill Deacon 	return 0;
3347e86d1aa8SWill Deacon }
3348e86d1aa8SWill Deacon 
3349e86d1aa8SWill Deacon static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3350e86d1aa8SWill Deacon {
3351e86d1aa8SWill Deacon 	int ret;
3352e86d1aa8SWill Deacon 
3353e86d1aa8SWill Deacon 	ret = arm_smmu_init_queues(smmu);
3354e86d1aa8SWill Deacon 	if (ret)
3355e86d1aa8SWill Deacon 		return ret;
3356e86d1aa8SWill Deacon 
3357e86d1aa8SWill Deacon 	return arm_smmu_init_strtab(smmu);
3358e86d1aa8SWill Deacon }
3359e86d1aa8SWill Deacon 
3360e86d1aa8SWill Deacon static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3361e86d1aa8SWill Deacon 				   unsigned int reg_off, unsigned int ack_off)
3362e86d1aa8SWill Deacon {
3363e86d1aa8SWill Deacon 	u32 reg;
3364e86d1aa8SWill Deacon 
3365e86d1aa8SWill Deacon 	writel_relaxed(val, smmu->base + reg_off);
3366e86d1aa8SWill Deacon 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3367e86d1aa8SWill Deacon 					  1, ARM_SMMU_POLL_TIMEOUT_US);
3368e86d1aa8SWill Deacon }
3369e86d1aa8SWill Deacon 
3370e86d1aa8SWill Deacon /* GBPA is "special" */
3371e86d1aa8SWill Deacon static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3372e86d1aa8SWill Deacon {
3373e86d1aa8SWill Deacon 	int ret;
3374e86d1aa8SWill Deacon 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3375e86d1aa8SWill Deacon 
3376e86d1aa8SWill Deacon 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3377e86d1aa8SWill Deacon 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3378e86d1aa8SWill Deacon 	if (ret)
3379e86d1aa8SWill Deacon 		return ret;
3380e86d1aa8SWill Deacon 
3381e86d1aa8SWill Deacon 	reg &= ~clr;
3382e86d1aa8SWill Deacon 	reg |= set;
3383e86d1aa8SWill Deacon 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
3384e86d1aa8SWill Deacon 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3385e86d1aa8SWill Deacon 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3386e86d1aa8SWill Deacon 
3387e86d1aa8SWill Deacon 	if (ret)
3388e86d1aa8SWill Deacon 		dev_err(smmu->dev, "GBPA not responding to update\n");
3389e86d1aa8SWill Deacon 	return ret;
3390e86d1aa8SWill Deacon }
3391e86d1aa8SWill Deacon 
3392e86d1aa8SWill Deacon static void arm_smmu_free_msis(void *data)
3393e86d1aa8SWill Deacon {
3394e86d1aa8SWill Deacon 	struct device *dev = data;
3395e86d1aa8SWill Deacon 	platform_msi_domain_free_irqs(dev);
3396e86d1aa8SWill Deacon }
3397e86d1aa8SWill Deacon 
3398e86d1aa8SWill Deacon static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3399e86d1aa8SWill Deacon {
3400e86d1aa8SWill Deacon 	phys_addr_t doorbell;
3401e86d1aa8SWill Deacon 	struct device *dev = msi_desc_to_dev(desc);
3402e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3403e86d1aa8SWill Deacon 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
3404e86d1aa8SWill Deacon 
3405e86d1aa8SWill Deacon 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3406e86d1aa8SWill Deacon 	doorbell &= MSI_CFG0_ADDR_MASK;
3407e86d1aa8SWill Deacon 
3408e86d1aa8SWill Deacon 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
3409e86d1aa8SWill Deacon 	writel_relaxed(msg->data, smmu->base + cfg[1]);
3410e86d1aa8SWill Deacon 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3411e86d1aa8SWill Deacon }
3412e86d1aa8SWill Deacon 
3413e86d1aa8SWill Deacon static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3414e86d1aa8SWill Deacon {
3415e86d1aa8SWill Deacon 	struct msi_desc *desc;
3416e86d1aa8SWill Deacon 	int ret, nvec = ARM_SMMU_MAX_MSIS;
3417e86d1aa8SWill Deacon 	struct device *dev = smmu->dev;
3418e86d1aa8SWill Deacon 
3419e86d1aa8SWill Deacon 	/* Clear the MSI address regs */
3420e86d1aa8SWill Deacon 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3421e86d1aa8SWill Deacon 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3422e86d1aa8SWill Deacon 
3423e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3424e86d1aa8SWill Deacon 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3425e86d1aa8SWill Deacon 	else
3426e86d1aa8SWill Deacon 		nvec--;
3427e86d1aa8SWill Deacon 
3428e86d1aa8SWill Deacon 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3429e86d1aa8SWill Deacon 		return;
3430e86d1aa8SWill Deacon 
3431e86d1aa8SWill Deacon 	if (!dev->msi_domain) {
3432e86d1aa8SWill Deacon 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3433e86d1aa8SWill Deacon 		return;
3434e86d1aa8SWill Deacon 	}
3435e86d1aa8SWill Deacon 
3436e86d1aa8SWill Deacon 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3437e86d1aa8SWill Deacon 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3438e86d1aa8SWill Deacon 	if (ret) {
3439e86d1aa8SWill Deacon 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3440e86d1aa8SWill Deacon 		return;
3441e86d1aa8SWill Deacon 	}
3442e86d1aa8SWill Deacon 
3443e86d1aa8SWill Deacon 	for_each_msi_entry(desc, dev) {
3444e86d1aa8SWill Deacon 		switch (desc->platform.msi_index) {
3445e86d1aa8SWill Deacon 		case EVTQ_MSI_INDEX:
3446e86d1aa8SWill Deacon 			smmu->evtq.q.irq = desc->irq;
3447e86d1aa8SWill Deacon 			break;
3448e86d1aa8SWill Deacon 		case GERROR_MSI_INDEX:
3449e86d1aa8SWill Deacon 			smmu->gerr_irq = desc->irq;
3450e86d1aa8SWill Deacon 			break;
3451e86d1aa8SWill Deacon 		case PRIQ_MSI_INDEX:
3452e86d1aa8SWill Deacon 			smmu->priq.q.irq = desc->irq;
3453e86d1aa8SWill Deacon 			break;
3454e86d1aa8SWill Deacon 		default:	/* Unknown */
3455e86d1aa8SWill Deacon 			continue;
3456e86d1aa8SWill Deacon 		}
3457e86d1aa8SWill Deacon 	}
3458e86d1aa8SWill Deacon 
3459e86d1aa8SWill Deacon 	/* Add callback to free MSIs on teardown */
3460e86d1aa8SWill Deacon 	devm_add_action(dev, arm_smmu_free_msis, dev);
3461e86d1aa8SWill Deacon }
3462e86d1aa8SWill Deacon 
3463e86d1aa8SWill Deacon static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3464e86d1aa8SWill Deacon {
3465e86d1aa8SWill Deacon 	int irq, ret;
3466e86d1aa8SWill Deacon 
3467e86d1aa8SWill Deacon 	arm_smmu_setup_msis(smmu);
3468e86d1aa8SWill Deacon 
3469e86d1aa8SWill Deacon 	/* Request interrupt lines */
3470e86d1aa8SWill Deacon 	irq = smmu->evtq.q.irq;
3471e86d1aa8SWill Deacon 	if (irq) {
3472e86d1aa8SWill Deacon 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3473e86d1aa8SWill Deacon 						arm_smmu_evtq_thread,
3474e86d1aa8SWill Deacon 						IRQF_ONESHOT,
3475e86d1aa8SWill Deacon 						"arm-smmu-v3-evtq", smmu);
3476e86d1aa8SWill Deacon 		if (ret < 0)
3477e86d1aa8SWill Deacon 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
3478e86d1aa8SWill Deacon 	} else {
3479e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3480e86d1aa8SWill Deacon 	}
3481e86d1aa8SWill Deacon 
3482e86d1aa8SWill Deacon 	irq = smmu->gerr_irq;
3483e86d1aa8SWill Deacon 	if (irq) {
3484e86d1aa8SWill Deacon 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3485e86d1aa8SWill Deacon 				       0, "arm-smmu-v3-gerror", smmu);
3486e86d1aa8SWill Deacon 		if (ret < 0)
3487e86d1aa8SWill Deacon 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
3488e86d1aa8SWill Deacon 	} else {
3489e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3490e86d1aa8SWill Deacon 	}
3491e86d1aa8SWill Deacon 
3492e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3493e86d1aa8SWill Deacon 		irq = smmu->priq.q.irq;
3494e86d1aa8SWill Deacon 		if (irq) {
3495e86d1aa8SWill Deacon 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3496e86d1aa8SWill Deacon 							arm_smmu_priq_thread,
3497e86d1aa8SWill Deacon 							IRQF_ONESHOT,
3498e86d1aa8SWill Deacon 							"arm-smmu-v3-priq",
3499e86d1aa8SWill Deacon 							smmu);
3500e86d1aa8SWill Deacon 			if (ret < 0)
3501e86d1aa8SWill Deacon 				dev_warn(smmu->dev,
3502e86d1aa8SWill Deacon 					 "failed to enable priq irq\n");
3503e86d1aa8SWill Deacon 		} else {
3504e86d1aa8SWill Deacon 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3505e86d1aa8SWill Deacon 		}
3506e86d1aa8SWill Deacon 	}
3507e86d1aa8SWill Deacon }
3508e86d1aa8SWill Deacon 
3509e86d1aa8SWill Deacon static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3510e86d1aa8SWill Deacon {
3511e86d1aa8SWill Deacon 	int ret, irq;
3512e86d1aa8SWill Deacon 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3513e86d1aa8SWill Deacon 
3514e86d1aa8SWill Deacon 	/* Disable IRQs first */
3515e86d1aa8SWill Deacon 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3516e86d1aa8SWill Deacon 				      ARM_SMMU_IRQ_CTRLACK);
3517e86d1aa8SWill Deacon 	if (ret) {
3518e86d1aa8SWill Deacon 		dev_err(smmu->dev, "failed to disable irqs\n");
3519e86d1aa8SWill Deacon 		return ret;
3520e86d1aa8SWill Deacon 	}
3521e86d1aa8SWill Deacon 
3522e86d1aa8SWill Deacon 	irq = smmu->combined_irq;
3523e86d1aa8SWill Deacon 	if (irq) {
3524e86d1aa8SWill Deacon 		/*
3525e86d1aa8SWill Deacon 		 * Cavium ThunderX2 implementation doesn't support unique irq
3526e86d1aa8SWill Deacon 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
3527e86d1aa8SWill Deacon 		 */
3528e86d1aa8SWill Deacon 		ret = devm_request_threaded_irq(smmu->dev, irq,
3529e86d1aa8SWill Deacon 					arm_smmu_combined_irq_handler,
3530e86d1aa8SWill Deacon 					arm_smmu_combined_irq_thread,
3531e86d1aa8SWill Deacon 					IRQF_ONESHOT,
3532e86d1aa8SWill Deacon 					"arm-smmu-v3-combined-irq", smmu);
3533e86d1aa8SWill Deacon 		if (ret < 0)
3534e86d1aa8SWill Deacon 			dev_warn(smmu->dev, "failed to enable combined irq\n");
3535e86d1aa8SWill Deacon 	} else
3536e86d1aa8SWill Deacon 		arm_smmu_setup_unique_irqs(smmu);
3537e86d1aa8SWill Deacon 
3538e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3539e86d1aa8SWill Deacon 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3540e86d1aa8SWill Deacon 
3541e86d1aa8SWill Deacon 	/* Enable interrupt generation on the SMMU */
3542e86d1aa8SWill Deacon 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3543e86d1aa8SWill Deacon 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3544e86d1aa8SWill Deacon 	if (ret)
3545e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "failed to enable irqs\n");
3546e86d1aa8SWill Deacon 
3547e86d1aa8SWill Deacon 	return 0;
3548e86d1aa8SWill Deacon }
3549e86d1aa8SWill Deacon 
3550e86d1aa8SWill Deacon static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3551e86d1aa8SWill Deacon {
3552e86d1aa8SWill Deacon 	int ret;
3553e86d1aa8SWill Deacon 
3554e86d1aa8SWill Deacon 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3555e86d1aa8SWill Deacon 	if (ret)
3556e86d1aa8SWill Deacon 		dev_err(smmu->dev, "failed to clear cr0\n");
3557e86d1aa8SWill Deacon 
3558e86d1aa8SWill Deacon 	return ret;
3559e86d1aa8SWill Deacon }
3560e86d1aa8SWill Deacon 
3561e86d1aa8SWill Deacon static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3562e86d1aa8SWill Deacon {
3563e86d1aa8SWill Deacon 	int ret;
3564e86d1aa8SWill Deacon 	u32 reg, enables;
3565e86d1aa8SWill Deacon 	struct arm_smmu_cmdq_ent cmd;
3566e86d1aa8SWill Deacon 
3567e86d1aa8SWill Deacon 	/* Clear CR0 and sync (disables SMMU and queue processing) */
3568e86d1aa8SWill Deacon 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3569e86d1aa8SWill Deacon 	if (reg & CR0_SMMUEN) {
3570e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3571e86d1aa8SWill Deacon 		WARN_ON(is_kdump_kernel() && !disable_bypass);
3572e86d1aa8SWill Deacon 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3573e86d1aa8SWill Deacon 	}
3574e86d1aa8SWill Deacon 
3575e86d1aa8SWill Deacon 	ret = arm_smmu_device_disable(smmu);
3576e86d1aa8SWill Deacon 	if (ret)
3577e86d1aa8SWill Deacon 		return ret;
3578e86d1aa8SWill Deacon 
3579e86d1aa8SWill Deacon 	/* CR1 (table and queue memory attributes) */
3580e86d1aa8SWill Deacon 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3581e86d1aa8SWill Deacon 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3582e86d1aa8SWill Deacon 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3583e86d1aa8SWill Deacon 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3584e86d1aa8SWill Deacon 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3585e86d1aa8SWill Deacon 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3586e86d1aa8SWill Deacon 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3587e86d1aa8SWill Deacon 
3588e86d1aa8SWill Deacon 	/* CR2 (random crap) */
3589e86d1aa8SWill Deacon 	reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
3590e86d1aa8SWill Deacon 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3591e86d1aa8SWill Deacon 
3592e86d1aa8SWill Deacon 	/* Stream table */
3593e86d1aa8SWill Deacon 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3594e86d1aa8SWill Deacon 		       smmu->base + ARM_SMMU_STRTAB_BASE);
3595e86d1aa8SWill Deacon 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3596e86d1aa8SWill Deacon 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3597e86d1aa8SWill Deacon 
3598e86d1aa8SWill Deacon 	/* Command queue */
3599e86d1aa8SWill Deacon 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3600e86d1aa8SWill Deacon 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3601e86d1aa8SWill Deacon 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3602e86d1aa8SWill Deacon 
3603e86d1aa8SWill Deacon 	enables = CR0_CMDQEN;
3604e86d1aa8SWill Deacon 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3605e86d1aa8SWill Deacon 				      ARM_SMMU_CR0ACK);
3606e86d1aa8SWill Deacon 	if (ret) {
3607e86d1aa8SWill Deacon 		dev_err(smmu->dev, "failed to enable command queue\n");
3608e86d1aa8SWill Deacon 		return ret;
3609e86d1aa8SWill Deacon 	}
3610e86d1aa8SWill Deacon 
3611e86d1aa8SWill Deacon 	/* Invalidate any cached configuration */
3612e86d1aa8SWill Deacon 	cmd.opcode = CMDQ_OP_CFGI_ALL;
3613e86d1aa8SWill Deacon 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3614e86d1aa8SWill Deacon 	arm_smmu_cmdq_issue_sync(smmu);
3615e86d1aa8SWill Deacon 
3616e86d1aa8SWill Deacon 	/* Invalidate any stale TLB entries */
3617e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3618e86d1aa8SWill Deacon 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3619e86d1aa8SWill Deacon 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3620e86d1aa8SWill Deacon 	}
3621e86d1aa8SWill Deacon 
3622e86d1aa8SWill Deacon 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3623e86d1aa8SWill Deacon 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3624e86d1aa8SWill Deacon 	arm_smmu_cmdq_issue_sync(smmu);
3625e86d1aa8SWill Deacon 
3626e86d1aa8SWill Deacon 	/* Event queue */
3627e86d1aa8SWill Deacon 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3628e86d1aa8SWill Deacon 	writel_relaxed(smmu->evtq.q.llq.prod,
3629e86d1aa8SWill Deacon 		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
3630e86d1aa8SWill Deacon 	writel_relaxed(smmu->evtq.q.llq.cons,
3631e86d1aa8SWill Deacon 		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
3632e86d1aa8SWill Deacon 
3633e86d1aa8SWill Deacon 	enables |= CR0_EVTQEN;
3634e86d1aa8SWill Deacon 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3635e86d1aa8SWill Deacon 				      ARM_SMMU_CR0ACK);
3636e86d1aa8SWill Deacon 	if (ret) {
3637e86d1aa8SWill Deacon 		dev_err(smmu->dev, "failed to enable event queue\n");
3638e86d1aa8SWill Deacon 		return ret;
3639e86d1aa8SWill Deacon 	}
3640e86d1aa8SWill Deacon 
3641e86d1aa8SWill Deacon 	/* PRI queue */
3642e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3643e86d1aa8SWill Deacon 		writeq_relaxed(smmu->priq.q.q_base,
3644e86d1aa8SWill Deacon 			       smmu->base + ARM_SMMU_PRIQ_BASE);
3645e86d1aa8SWill Deacon 		writel_relaxed(smmu->priq.q.llq.prod,
3646e86d1aa8SWill Deacon 			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
3647e86d1aa8SWill Deacon 		writel_relaxed(smmu->priq.q.llq.cons,
3648e86d1aa8SWill Deacon 			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
3649e86d1aa8SWill Deacon 
3650e86d1aa8SWill Deacon 		enables |= CR0_PRIQEN;
3651e86d1aa8SWill Deacon 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3652e86d1aa8SWill Deacon 					      ARM_SMMU_CR0ACK);
3653e86d1aa8SWill Deacon 		if (ret) {
3654e86d1aa8SWill Deacon 			dev_err(smmu->dev, "failed to enable PRI queue\n");
3655e86d1aa8SWill Deacon 			return ret;
3656e86d1aa8SWill Deacon 		}
3657e86d1aa8SWill Deacon 	}
3658e86d1aa8SWill Deacon 
3659e86d1aa8SWill Deacon 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3660e86d1aa8SWill Deacon 		enables |= CR0_ATSCHK;
3661e86d1aa8SWill Deacon 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3662e86d1aa8SWill Deacon 					      ARM_SMMU_CR0ACK);
3663e86d1aa8SWill Deacon 		if (ret) {
3664e86d1aa8SWill Deacon 			dev_err(smmu->dev, "failed to enable ATS check\n");
3665e86d1aa8SWill Deacon 			return ret;
3666e86d1aa8SWill Deacon 		}
3667e86d1aa8SWill Deacon 	}
3668e86d1aa8SWill Deacon 
3669e86d1aa8SWill Deacon 	ret = arm_smmu_setup_irqs(smmu);
3670e86d1aa8SWill Deacon 	if (ret) {
3671e86d1aa8SWill Deacon 		dev_err(smmu->dev, "failed to setup irqs\n");
3672e86d1aa8SWill Deacon 		return ret;
3673e86d1aa8SWill Deacon 	}
3674e86d1aa8SWill Deacon 
3675e86d1aa8SWill Deacon 	if (is_kdump_kernel())
3676e86d1aa8SWill Deacon 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3677e86d1aa8SWill Deacon 
3678e86d1aa8SWill Deacon 	/* Enable the SMMU interface, or ensure bypass */
3679e86d1aa8SWill Deacon 	if (!bypass || disable_bypass) {
3680e86d1aa8SWill Deacon 		enables |= CR0_SMMUEN;
3681e86d1aa8SWill Deacon 	} else {
3682e86d1aa8SWill Deacon 		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3683e86d1aa8SWill Deacon 		if (ret)
3684e86d1aa8SWill Deacon 			return ret;
3685e86d1aa8SWill Deacon 	}
3686e86d1aa8SWill Deacon 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3687e86d1aa8SWill Deacon 				      ARM_SMMU_CR0ACK);
3688e86d1aa8SWill Deacon 	if (ret) {
3689e86d1aa8SWill Deacon 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3690e86d1aa8SWill Deacon 		return ret;
3691e86d1aa8SWill Deacon 	}
3692e86d1aa8SWill Deacon 
3693e86d1aa8SWill Deacon 	return 0;
3694e86d1aa8SWill Deacon }
3695e86d1aa8SWill Deacon 
3696e86d1aa8SWill Deacon static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3697e86d1aa8SWill Deacon {
3698e86d1aa8SWill Deacon 	u32 reg;
3699e86d1aa8SWill Deacon 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3700e86d1aa8SWill Deacon 
3701e86d1aa8SWill Deacon 	/* IDR0 */
3702e86d1aa8SWill Deacon 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3703e86d1aa8SWill Deacon 
3704e86d1aa8SWill Deacon 	/* 2-level structures */
3705e86d1aa8SWill Deacon 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3706e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3707e86d1aa8SWill Deacon 
3708e86d1aa8SWill Deacon 	if (reg & IDR0_CD2L)
3709e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3710e86d1aa8SWill Deacon 
3711e86d1aa8SWill Deacon 	/*
3712e86d1aa8SWill Deacon 	 * Translation table endianness.
3713e86d1aa8SWill Deacon 	 * We currently require the same endianness as the CPU, but this
3714e86d1aa8SWill Deacon 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3715e86d1aa8SWill Deacon 	 */
3716e86d1aa8SWill Deacon 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3717e86d1aa8SWill Deacon 	case IDR0_TTENDIAN_MIXED:
3718e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3719e86d1aa8SWill Deacon 		break;
3720e86d1aa8SWill Deacon #ifdef __BIG_ENDIAN
3721e86d1aa8SWill Deacon 	case IDR0_TTENDIAN_BE:
3722e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3723e86d1aa8SWill Deacon 		break;
3724e86d1aa8SWill Deacon #else
3725e86d1aa8SWill Deacon 	case IDR0_TTENDIAN_LE:
3726e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3727e86d1aa8SWill Deacon 		break;
3728e86d1aa8SWill Deacon #endif
3729e86d1aa8SWill Deacon 	default:
3730e86d1aa8SWill Deacon 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3731e86d1aa8SWill Deacon 		return -ENXIO;
3732e86d1aa8SWill Deacon 	}
3733e86d1aa8SWill Deacon 
3734e86d1aa8SWill Deacon 	/* Boolean feature flags */
3735e86d1aa8SWill Deacon 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3736e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_PRI;
3737e86d1aa8SWill Deacon 
3738e86d1aa8SWill Deacon 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3739e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_ATS;
3740e86d1aa8SWill Deacon 
3741e86d1aa8SWill Deacon 	if (reg & IDR0_SEV)
3742e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_SEV;
3743e86d1aa8SWill Deacon 
3744e86d1aa8SWill Deacon 	if (reg & IDR0_MSI)
3745e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_MSI;
3746e86d1aa8SWill Deacon 
3747e86d1aa8SWill Deacon 	if (reg & IDR0_HYP)
3748e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_HYP;
3749e86d1aa8SWill Deacon 
3750e86d1aa8SWill Deacon 	/*
3751e86d1aa8SWill Deacon 	 * The coherency feature as set by FW is used in preference to the ID
3752e86d1aa8SWill Deacon 	 * register, but warn on mismatch.
3753e86d1aa8SWill Deacon 	 */
3754e86d1aa8SWill Deacon 	if (!!(reg & IDR0_COHACC) != coherent)
3755e86d1aa8SWill Deacon 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3756e86d1aa8SWill Deacon 			 coherent ? "true" : "false");
3757e86d1aa8SWill Deacon 
3758e86d1aa8SWill Deacon 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3759e86d1aa8SWill Deacon 	case IDR0_STALL_MODEL_FORCE:
3760e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3761e86d1aa8SWill Deacon 		/* Fallthrough */
3762e86d1aa8SWill Deacon 	case IDR0_STALL_MODEL_STALL:
3763e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_STALLS;
3764e86d1aa8SWill Deacon 	}
3765e86d1aa8SWill Deacon 
3766e86d1aa8SWill Deacon 	if (reg & IDR0_S1P)
3767e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3768e86d1aa8SWill Deacon 
3769e86d1aa8SWill Deacon 	if (reg & IDR0_S2P)
3770e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3771e86d1aa8SWill Deacon 
3772e86d1aa8SWill Deacon 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3773e86d1aa8SWill Deacon 		dev_err(smmu->dev, "no translation support!\n");
3774e86d1aa8SWill Deacon 		return -ENXIO;
3775e86d1aa8SWill Deacon 	}
3776e86d1aa8SWill Deacon 
3777e86d1aa8SWill Deacon 	/* We only support the AArch64 table format at present */
3778e86d1aa8SWill Deacon 	switch (FIELD_GET(IDR0_TTF, reg)) {
3779e86d1aa8SWill Deacon 	case IDR0_TTF_AARCH32_64:
3780e86d1aa8SWill Deacon 		smmu->ias = 40;
3781e86d1aa8SWill Deacon 		/* Fallthrough */
3782e86d1aa8SWill Deacon 	case IDR0_TTF_AARCH64:
3783e86d1aa8SWill Deacon 		break;
3784e86d1aa8SWill Deacon 	default:
3785e86d1aa8SWill Deacon 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3786e86d1aa8SWill Deacon 		return -ENXIO;
3787e86d1aa8SWill Deacon 	}
3788e86d1aa8SWill Deacon 
3789e86d1aa8SWill Deacon 	/* ASID/VMID sizes */
3790e86d1aa8SWill Deacon 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3791e86d1aa8SWill Deacon 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3792e86d1aa8SWill Deacon 
3793e86d1aa8SWill Deacon 	/* IDR1 */
3794e86d1aa8SWill Deacon 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3795e86d1aa8SWill Deacon 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3796e86d1aa8SWill Deacon 		dev_err(smmu->dev, "embedded implementation not supported\n");
3797e86d1aa8SWill Deacon 		return -ENXIO;
3798e86d1aa8SWill Deacon 	}
3799e86d1aa8SWill Deacon 
3800e86d1aa8SWill Deacon 	/* Queue sizes, capped to ensure natural alignment */
3801e86d1aa8SWill Deacon 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3802e86d1aa8SWill Deacon 					     FIELD_GET(IDR1_CMDQS, reg));
3803e86d1aa8SWill Deacon 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3804e86d1aa8SWill Deacon 		/*
3805e86d1aa8SWill Deacon 		 * We don't support splitting up batches, so one batch of
3806e86d1aa8SWill Deacon 		 * commands plus an extra sync needs to fit inside the command
3807e86d1aa8SWill Deacon 		 * queue. There's also no way we can handle the weird alignment
3808e86d1aa8SWill Deacon 		 * restrictions on the base pointer for a unit-length queue.
3809e86d1aa8SWill Deacon 		 */
3810e86d1aa8SWill Deacon 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3811e86d1aa8SWill Deacon 			CMDQ_BATCH_ENTRIES);
3812e86d1aa8SWill Deacon 		return -ENXIO;
3813e86d1aa8SWill Deacon 	}
3814e86d1aa8SWill Deacon 
3815e86d1aa8SWill Deacon 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3816e86d1aa8SWill Deacon 					     FIELD_GET(IDR1_EVTQS, reg));
3817e86d1aa8SWill Deacon 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3818e86d1aa8SWill Deacon 					     FIELD_GET(IDR1_PRIQS, reg));
3819e86d1aa8SWill Deacon 
3820e86d1aa8SWill Deacon 	/* SID/SSID sizes */
3821e86d1aa8SWill Deacon 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3822e86d1aa8SWill Deacon 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3823e86d1aa8SWill Deacon 
3824e86d1aa8SWill Deacon 	/*
3825e86d1aa8SWill Deacon 	 * If the SMMU supports fewer bits than would fill a single L2 stream
3826e86d1aa8SWill Deacon 	 * table, use a linear table instead.
3827e86d1aa8SWill Deacon 	 */
3828e86d1aa8SWill Deacon 	if (smmu->sid_bits <= STRTAB_SPLIT)
3829e86d1aa8SWill Deacon 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3830e86d1aa8SWill Deacon 
3831e86d1aa8SWill Deacon 	/* IDR3 */
3832e86d1aa8SWill Deacon 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3833e86d1aa8SWill Deacon 	if (FIELD_GET(IDR3_RIL, reg))
3834e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3835e86d1aa8SWill Deacon 
3836e86d1aa8SWill Deacon 	/* IDR5 */
3837e86d1aa8SWill Deacon 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3838e86d1aa8SWill Deacon 
3839e86d1aa8SWill Deacon 	/* Maximum number of outstanding stalls */
3840e86d1aa8SWill Deacon 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3841e86d1aa8SWill Deacon 
3842e86d1aa8SWill Deacon 	/* Page sizes */
3843e86d1aa8SWill Deacon 	if (reg & IDR5_GRAN64K)
3844e86d1aa8SWill Deacon 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3845e86d1aa8SWill Deacon 	if (reg & IDR5_GRAN16K)
3846e86d1aa8SWill Deacon 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3847e86d1aa8SWill Deacon 	if (reg & IDR5_GRAN4K)
3848e86d1aa8SWill Deacon 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3849e86d1aa8SWill Deacon 
3850e86d1aa8SWill Deacon 	/* Input address size */
3851e86d1aa8SWill Deacon 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3852e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_VAX;
3853e86d1aa8SWill Deacon 
3854e86d1aa8SWill Deacon 	/* Output address size */
3855e86d1aa8SWill Deacon 	switch (FIELD_GET(IDR5_OAS, reg)) {
3856e86d1aa8SWill Deacon 	case IDR5_OAS_32_BIT:
3857e86d1aa8SWill Deacon 		smmu->oas = 32;
3858e86d1aa8SWill Deacon 		break;
3859e86d1aa8SWill Deacon 	case IDR5_OAS_36_BIT:
3860e86d1aa8SWill Deacon 		smmu->oas = 36;
3861e86d1aa8SWill Deacon 		break;
3862e86d1aa8SWill Deacon 	case IDR5_OAS_40_BIT:
3863e86d1aa8SWill Deacon 		smmu->oas = 40;
3864e86d1aa8SWill Deacon 		break;
3865e86d1aa8SWill Deacon 	case IDR5_OAS_42_BIT:
3866e86d1aa8SWill Deacon 		smmu->oas = 42;
3867e86d1aa8SWill Deacon 		break;
3868e86d1aa8SWill Deacon 	case IDR5_OAS_44_BIT:
3869e86d1aa8SWill Deacon 		smmu->oas = 44;
3870e86d1aa8SWill Deacon 		break;
3871e86d1aa8SWill Deacon 	case IDR5_OAS_52_BIT:
3872e86d1aa8SWill Deacon 		smmu->oas = 52;
3873e86d1aa8SWill Deacon 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3874e86d1aa8SWill Deacon 		break;
3875e86d1aa8SWill Deacon 	default:
3876e86d1aa8SWill Deacon 		dev_info(smmu->dev,
3877e86d1aa8SWill Deacon 			"unknown output address size. Truncating to 48-bit\n");
3878e86d1aa8SWill Deacon 		/* Fallthrough */
3879e86d1aa8SWill Deacon 	case IDR5_OAS_48_BIT:
3880e86d1aa8SWill Deacon 		smmu->oas = 48;
3881e86d1aa8SWill Deacon 	}
3882e86d1aa8SWill Deacon 
3883e86d1aa8SWill Deacon 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3884e86d1aa8SWill Deacon 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3885e86d1aa8SWill Deacon 	else
3886e86d1aa8SWill Deacon 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3887e86d1aa8SWill Deacon 
3888e86d1aa8SWill Deacon 	/* Set the DMA mask for our table walker */
3889e86d1aa8SWill Deacon 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3890e86d1aa8SWill Deacon 		dev_warn(smmu->dev,
3891e86d1aa8SWill Deacon 			 "failed to set DMA mask for table walker\n");
3892e86d1aa8SWill Deacon 
3893e86d1aa8SWill Deacon 	smmu->ias = max(smmu->ias, smmu->oas);
3894e86d1aa8SWill Deacon 
3895e86d1aa8SWill Deacon 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3896e86d1aa8SWill Deacon 		 smmu->ias, smmu->oas, smmu->features);
3897e86d1aa8SWill Deacon 	return 0;
3898e86d1aa8SWill Deacon }
3899e86d1aa8SWill Deacon 
3900e86d1aa8SWill Deacon #ifdef CONFIG_ACPI
3901e86d1aa8SWill Deacon static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3902e86d1aa8SWill Deacon {
3903e86d1aa8SWill Deacon 	switch (model) {
3904e86d1aa8SWill Deacon 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3905e86d1aa8SWill Deacon 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3906e86d1aa8SWill Deacon 		break;
3907e86d1aa8SWill Deacon 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3908e86d1aa8SWill Deacon 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3909e86d1aa8SWill Deacon 		break;
3910e86d1aa8SWill Deacon 	}
3911e86d1aa8SWill Deacon 
3912e86d1aa8SWill Deacon 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3913e86d1aa8SWill Deacon }
3914e86d1aa8SWill Deacon 
3915e86d1aa8SWill Deacon static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3916e86d1aa8SWill Deacon 				      struct arm_smmu_device *smmu)
3917e86d1aa8SWill Deacon {
3918e86d1aa8SWill Deacon 	struct acpi_iort_smmu_v3 *iort_smmu;
3919e86d1aa8SWill Deacon 	struct device *dev = smmu->dev;
3920e86d1aa8SWill Deacon 	struct acpi_iort_node *node;
3921e86d1aa8SWill Deacon 
3922e86d1aa8SWill Deacon 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3923e86d1aa8SWill Deacon 
3924e86d1aa8SWill Deacon 	/* Retrieve SMMUv3 specific data */
3925e86d1aa8SWill Deacon 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3926e86d1aa8SWill Deacon 
3927e86d1aa8SWill Deacon 	acpi_smmu_get_options(iort_smmu->model, smmu);
3928e86d1aa8SWill Deacon 
3929e86d1aa8SWill Deacon 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3930e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3931e86d1aa8SWill Deacon 
3932e86d1aa8SWill Deacon 	return 0;
3933e86d1aa8SWill Deacon }
3934e86d1aa8SWill Deacon #else
3935e86d1aa8SWill Deacon static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3936e86d1aa8SWill Deacon 					     struct arm_smmu_device *smmu)
3937e86d1aa8SWill Deacon {
3938e86d1aa8SWill Deacon 	return -ENODEV;
3939e86d1aa8SWill Deacon }
3940e86d1aa8SWill Deacon #endif
3941e86d1aa8SWill Deacon 
3942e86d1aa8SWill Deacon static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3943e86d1aa8SWill Deacon 				    struct arm_smmu_device *smmu)
3944e86d1aa8SWill Deacon {
3945e86d1aa8SWill Deacon 	struct device *dev = &pdev->dev;
3946e86d1aa8SWill Deacon 	u32 cells;
3947e86d1aa8SWill Deacon 	int ret = -EINVAL;
3948e86d1aa8SWill Deacon 
3949e86d1aa8SWill Deacon 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3950e86d1aa8SWill Deacon 		dev_err(dev, "missing #iommu-cells property\n");
3951e86d1aa8SWill Deacon 	else if (cells != 1)
3952e86d1aa8SWill Deacon 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3953e86d1aa8SWill Deacon 	else
3954e86d1aa8SWill Deacon 		ret = 0;
3955e86d1aa8SWill Deacon 
3956e86d1aa8SWill Deacon 	parse_driver_options(smmu);
3957e86d1aa8SWill Deacon 
3958e86d1aa8SWill Deacon 	if (of_dma_is_coherent(dev->of_node))
3959e86d1aa8SWill Deacon 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3960e86d1aa8SWill Deacon 
3961e86d1aa8SWill Deacon 	return ret;
3962e86d1aa8SWill Deacon }
3963e86d1aa8SWill Deacon 
3964e86d1aa8SWill Deacon static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3965e86d1aa8SWill Deacon {
3966e86d1aa8SWill Deacon 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3967e86d1aa8SWill Deacon 		return SZ_64K;
3968e86d1aa8SWill Deacon 	else
3969e86d1aa8SWill Deacon 		return SZ_128K;
3970e86d1aa8SWill Deacon }
3971e86d1aa8SWill Deacon 
3972e86d1aa8SWill Deacon static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3973e86d1aa8SWill Deacon {
3974e86d1aa8SWill Deacon 	int err;
3975e86d1aa8SWill Deacon 
3976e86d1aa8SWill Deacon #ifdef CONFIG_PCI
3977e86d1aa8SWill Deacon 	if (pci_bus_type.iommu_ops != ops) {
3978e86d1aa8SWill Deacon 		err = bus_set_iommu(&pci_bus_type, ops);
3979e86d1aa8SWill Deacon 		if (err)
3980e86d1aa8SWill Deacon 			return err;
3981e86d1aa8SWill Deacon 	}
3982e86d1aa8SWill Deacon #endif
3983e86d1aa8SWill Deacon #ifdef CONFIG_ARM_AMBA
3984e86d1aa8SWill Deacon 	if (amba_bustype.iommu_ops != ops) {
3985e86d1aa8SWill Deacon 		err = bus_set_iommu(&amba_bustype, ops);
3986e86d1aa8SWill Deacon 		if (err)
3987e86d1aa8SWill Deacon 			goto err_reset_pci_ops;
3988e86d1aa8SWill Deacon 	}
3989e86d1aa8SWill Deacon #endif
3990e86d1aa8SWill Deacon 	if (platform_bus_type.iommu_ops != ops) {
3991e86d1aa8SWill Deacon 		err = bus_set_iommu(&platform_bus_type, ops);
3992e86d1aa8SWill Deacon 		if (err)
3993e86d1aa8SWill Deacon 			goto err_reset_amba_ops;
3994e86d1aa8SWill Deacon 	}
3995e86d1aa8SWill Deacon 
3996e86d1aa8SWill Deacon 	return 0;
3997e86d1aa8SWill Deacon 
3998e86d1aa8SWill Deacon err_reset_amba_ops:
3999e86d1aa8SWill Deacon #ifdef CONFIG_ARM_AMBA
4000e86d1aa8SWill Deacon 	bus_set_iommu(&amba_bustype, NULL);
4001e86d1aa8SWill Deacon #endif
4002e86d1aa8SWill Deacon err_reset_pci_ops: __maybe_unused;
4003e86d1aa8SWill Deacon #ifdef CONFIG_PCI
4004e86d1aa8SWill Deacon 	bus_set_iommu(&pci_bus_type, NULL);
4005e86d1aa8SWill Deacon #endif
4006e86d1aa8SWill Deacon 	return err;
4007e86d1aa8SWill Deacon }
4008e86d1aa8SWill Deacon 
4009e86d1aa8SWill Deacon static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
4010e86d1aa8SWill Deacon 				      resource_size_t size)
4011e86d1aa8SWill Deacon {
4012e86d1aa8SWill Deacon 	struct resource res = {
4013e86d1aa8SWill Deacon 		.flags = IORESOURCE_MEM,
4014e86d1aa8SWill Deacon 		.start = start,
4015e86d1aa8SWill Deacon 		.end = start + size - 1,
4016e86d1aa8SWill Deacon 	};
4017e86d1aa8SWill Deacon 
4018e86d1aa8SWill Deacon 	return devm_ioremap_resource(dev, &res);
4019e86d1aa8SWill Deacon }
4020e86d1aa8SWill Deacon 
4021e86d1aa8SWill Deacon static int arm_smmu_device_probe(struct platform_device *pdev)
4022e86d1aa8SWill Deacon {
4023e86d1aa8SWill Deacon 	int irq, ret;
4024e86d1aa8SWill Deacon 	struct resource *res;
4025e86d1aa8SWill Deacon 	resource_size_t ioaddr;
4026e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu;
4027e86d1aa8SWill Deacon 	struct device *dev = &pdev->dev;
4028e86d1aa8SWill Deacon 	bool bypass;
4029e86d1aa8SWill Deacon 
4030e86d1aa8SWill Deacon 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
4031e86d1aa8SWill Deacon 	if (!smmu) {
4032e86d1aa8SWill Deacon 		dev_err(dev, "failed to allocate arm_smmu_device\n");
4033e86d1aa8SWill Deacon 		return -ENOMEM;
4034e86d1aa8SWill Deacon 	}
4035e86d1aa8SWill Deacon 	smmu->dev = dev;
4036e86d1aa8SWill Deacon 
4037e86d1aa8SWill Deacon 	if (dev->of_node) {
4038e86d1aa8SWill Deacon 		ret = arm_smmu_device_dt_probe(pdev, smmu);
4039e86d1aa8SWill Deacon 	} else {
4040e86d1aa8SWill Deacon 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
4041e86d1aa8SWill Deacon 		if (ret == -ENODEV)
4042e86d1aa8SWill Deacon 			return ret;
4043e86d1aa8SWill Deacon 	}
4044e86d1aa8SWill Deacon 
4045e86d1aa8SWill Deacon 	/* Set bypass mode according to firmware probing result */
4046e86d1aa8SWill Deacon 	bypass = !!ret;
4047e86d1aa8SWill Deacon 
4048e86d1aa8SWill Deacon 	/* Base address */
4049e86d1aa8SWill Deacon 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
4050e86d1aa8SWill Deacon 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
4051e86d1aa8SWill Deacon 		dev_err(dev, "MMIO region too small (%pr)\n", res);
4052e86d1aa8SWill Deacon 		return -EINVAL;
4053e86d1aa8SWill Deacon 	}
4054e86d1aa8SWill Deacon 	ioaddr = res->start;
4055e86d1aa8SWill Deacon 
4056e86d1aa8SWill Deacon 	/*
4057e86d1aa8SWill Deacon 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
4058e86d1aa8SWill Deacon 	 * the PMCG registers which are reserved by the PMU driver.
4059e86d1aa8SWill Deacon 	 */
4060e86d1aa8SWill Deacon 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
4061e86d1aa8SWill Deacon 	if (IS_ERR(smmu->base))
4062e86d1aa8SWill Deacon 		return PTR_ERR(smmu->base);
4063e86d1aa8SWill Deacon 
4064e86d1aa8SWill Deacon 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
4065e86d1aa8SWill Deacon 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
4066e86d1aa8SWill Deacon 					       ARM_SMMU_REG_SZ);
4067e86d1aa8SWill Deacon 		if (IS_ERR(smmu->page1))
4068e86d1aa8SWill Deacon 			return PTR_ERR(smmu->page1);
4069e86d1aa8SWill Deacon 	} else {
4070e86d1aa8SWill Deacon 		smmu->page1 = smmu->base;
4071e86d1aa8SWill Deacon 	}
4072e86d1aa8SWill Deacon 
4073e86d1aa8SWill Deacon 	/* Interrupt lines */
4074e86d1aa8SWill Deacon 
4075e86d1aa8SWill Deacon 	irq = platform_get_irq_byname_optional(pdev, "combined");
4076e86d1aa8SWill Deacon 	if (irq > 0)
4077e86d1aa8SWill Deacon 		smmu->combined_irq = irq;
4078e86d1aa8SWill Deacon 	else {
4079e86d1aa8SWill Deacon 		irq = platform_get_irq_byname_optional(pdev, "eventq");
4080e86d1aa8SWill Deacon 		if (irq > 0)
4081e86d1aa8SWill Deacon 			smmu->evtq.q.irq = irq;
4082e86d1aa8SWill Deacon 
4083e86d1aa8SWill Deacon 		irq = platform_get_irq_byname_optional(pdev, "priq");
4084e86d1aa8SWill Deacon 		if (irq > 0)
4085e86d1aa8SWill Deacon 			smmu->priq.q.irq = irq;
4086e86d1aa8SWill Deacon 
4087e86d1aa8SWill Deacon 		irq = platform_get_irq_byname_optional(pdev, "gerror");
4088e86d1aa8SWill Deacon 		if (irq > 0)
4089e86d1aa8SWill Deacon 			smmu->gerr_irq = irq;
4090e86d1aa8SWill Deacon 	}
4091e86d1aa8SWill Deacon 	/* Probe the h/w */
4092e86d1aa8SWill Deacon 	ret = arm_smmu_device_hw_probe(smmu);
4093e86d1aa8SWill Deacon 	if (ret)
4094e86d1aa8SWill Deacon 		return ret;
4095e86d1aa8SWill Deacon 
4096e86d1aa8SWill Deacon 	/* Initialise in-memory data structures */
4097e86d1aa8SWill Deacon 	ret = arm_smmu_init_structures(smmu);
4098e86d1aa8SWill Deacon 	if (ret)
4099e86d1aa8SWill Deacon 		return ret;
4100e86d1aa8SWill Deacon 
4101e86d1aa8SWill Deacon 	/* Record our private device structure */
4102e86d1aa8SWill Deacon 	platform_set_drvdata(pdev, smmu);
4103e86d1aa8SWill Deacon 
4104e86d1aa8SWill Deacon 	/* Reset the device */
4105e86d1aa8SWill Deacon 	ret = arm_smmu_device_reset(smmu, bypass);
4106e86d1aa8SWill Deacon 	if (ret)
4107e86d1aa8SWill Deacon 		return ret;
4108e86d1aa8SWill Deacon 
4109e86d1aa8SWill Deacon 	/* And we're up. Go go go! */
4110e86d1aa8SWill Deacon 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
4111e86d1aa8SWill Deacon 				     "smmu3.%pa", &ioaddr);
4112e86d1aa8SWill Deacon 	if (ret)
4113e86d1aa8SWill Deacon 		return ret;
4114e86d1aa8SWill Deacon 
4115e86d1aa8SWill Deacon 	iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
4116e86d1aa8SWill Deacon 	iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
4117e86d1aa8SWill Deacon 
4118e86d1aa8SWill Deacon 	ret = iommu_device_register(&smmu->iommu);
4119e86d1aa8SWill Deacon 	if (ret) {
4120e86d1aa8SWill Deacon 		dev_err(dev, "Failed to register iommu\n");
4121e86d1aa8SWill Deacon 		return ret;
4122e86d1aa8SWill Deacon 	}
4123e86d1aa8SWill Deacon 
4124e86d1aa8SWill Deacon 	return arm_smmu_set_bus_ops(&arm_smmu_ops);
4125e86d1aa8SWill Deacon }
4126e86d1aa8SWill Deacon 
4127e86d1aa8SWill Deacon static int arm_smmu_device_remove(struct platform_device *pdev)
4128e86d1aa8SWill Deacon {
4129e86d1aa8SWill Deacon 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
4130e86d1aa8SWill Deacon 
4131e86d1aa8SWill Deacon 	arm_smmu_set_bus_ops(NULL);
4132e86d1aa8SWill Deacon 	iommu_device_unregister(&smmu->iommu);
4133e86d1aa8SWill Deacon 	iommu_device_sysfs_remove(&smmu->iommu);
4134e86d1aa8SWill Deacon 	arm_smmu_device_disable(smmu);
4135e86d1aa8SWill Deacon 
4136e86d1aa8SWill Deacon 	return 0;
4137e86d1aa8SWill Deacon }
4138e86d1aa8SWill Deacon 
4139e86d1aa8SWill Deacon static void arm_smmu_device_shutdown(struct platform_device *pdev)
4140e86d1aa8SWill Deacon {
4141e86d1aa8SWill Deacon 	arm_smmu_device_remove(pdev);
4142e86d1aa8SWill Deacon }
4143e86d1aa8SWill Deacon 
4144e86d1aa8SWill Deacon static const struct of_device_id arm_smmu_of_match[] = {
4145e86d1aa8SWill Deacon 	{ .compatible = "arm,smmu-v3", },
4146e86d1aa8SWill Deacon 	{ },
4147e86d1aa8SWill Deacon };
4148e86d1aa8SWill Deacon MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
4149e86d1aa8SWill Deacon 
4150e86d1aa8SWill Deacon static struct platform_driver arm_smmu_driver = {
4151e86d1aa8SWill Deacon 	.driver	= {
4152e86d1aa8SWill Deacon 		.name			= "arm-smmu-v3",
4153e86d1aa8SWill Deacon 		.of_match_table		= arm_smmu_of_match,
4154e86d1aa8SWill Deacon 		.suppress_bind_attrs	= true,
4155e86d1aa8SWill Deacon 	},
4156e86d1aa8SWill Deacon 	.probe	= arm_smmu_device_probe,
4157e86d1aa8SWill Deacon 	.remove	= arm_smmu_device_remove,
4158e86d1aa8SWill Deacon 	.shutdown = arm_smmu_device_shutdown,
4159e86d1aa8SWill Deacon };
4160e86d1aa8SWill Deacon module_platform_driver(arm_smmu_driver);
4161e86d1aa8SWill Deacon 
4162e86d1aa8SWill Deacon MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
4163e86d1aa8SWill Deacon MODULE_AUTHOR("Will Deacon <will@kernel.org>");
4164e86d1aa8SWill Deacon MODULE_ALIAS("platform:arm-smmu-v3");
4165e86d1aa8SWill Deacon MODULE_LICENSE("GPL v2");
4166