xref: /linux/drivers/iommu/arm/arm-smmu/arm-smmu.c (revision 151ebcf0797b1a3ba53c8843dc21748c80e098c7)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * IOMMU API for ARM architected SMMU implementations.
4  *
5  * Copyright (C) 2013 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver currently supports:
10  *	- SMMUv1 and v2 implementations
11  *	- Stream-matching and stream-indexing
12  *	- v7/v8 long-descriptor format
13  *	- Non-secure access to the SMMU
14  *	- Context fault reporting
15  *	- Extended Stream ID (16 bit)
16  */
17 
18 #define pr_fmt(fmt) "arm-smmu: " fmt
19 
20 #include <linux/acpi.h>
21 #include <linux/acpi_iort.h>
22 #include <linux/bitfield.h>
23 #include <linux/delay.h>
24 #include <linux/dma-mapping.h>
25 #include <linux/err.h>
26 #include <linux/interrupt.h>
27 #include <linux/io.h>
28 #include <linux/iopoll.h>
29 #include <linux/module.h>
30 #include <linux/of.h>
31 #include <linux/of_address.h>
32 #include <linux/pci.h>
33 #include <linux/platform_device.h>
34 #include <linux/pm_runtime.h>
35 #include <linux/ratelimit.h>
36 #include <linux/slab.h>
37 
38 #include <linux/fsl/mc.h>
39 
40 #include "arm-smmu.h"
41 #include "../../dma-iommu.h"
42 
43 /*
44  * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
45  * global register space are still, in fact, using a hypervisor to mediate it
46  * by trapping and emulating register accesses. Sadly, some deployed versions
47  * of said trapping code have bugs wherein they go horribly wrong for stores
48  * using r31 (i.e. XZR/WZR) as the source register.
49  */
50 #define QCOM_DUMMY_VAL -1
51 
52 #define MSI_IOVA_BASE			0x8000000
53 #define MSI_IOVA_LENGTH			0x100000
54 
55 static int force_stage;
56 module_param(force_stage, int, S_IRUGO);
57 MODULE_PARM_DESC(force_stage,
58 	"Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
59 static bool disable_bypass =
60 	IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
61 module_param(disable_bypass, bool, S_IRUGO);
62 MODULE_PARM_DESC(disable_bypass,
63 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
64 
65 #define s2cr_init_val (struct arm_smmu_s2cr){				\
66 	.type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,	\
67 }
68 
69 static bool using_legacy_binding, using_generic_binding;
70 
71 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
72 {
73 	if (pm_runtime_enabled(smmu->dev))
74 		return pm_runtime_resume_and_get(smmu->dev);
75 
76 	return 0;
77 }
78 
79 static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
80 {
81 	if (pm_runtime_enabled(smmu->dev))
82 		pm_runtime_put_autosuspend(smmu->dev);
83 }
84 
85 static void arm_smmu_rpm_use_autosuspend(struct arm_smmu_device *smmu)
86 {
87 	/*
88 	 * Setup an autosuspend delay to avoid bouncing runpm state.
89 	 * Otherwise, if a driver for a suspended consumer device
90 	 * unmaps buffers, it will runpm resume/suspend for each one.
91 	 *
92 	 * For example, when used by a GPU device, when an application
93 	 * or game exits, it can trigger unmapping 100s or 1000s of
94 	 * buffers.  With a runpm cycle for each buffer, that adds up
95 	 * to 5-10sec worth of reprogramming the context bank, while
96 	 * the system appears to be locked up to the user.
97 	 */
98 	pm_runtime_set_autosuspend_delay(smmu->dev, 20);
99 	pm_runtime_use_autosuspend(smmu->dev);
100 }
101 
102 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
103 {
104 	return container_of(dom, struct arm_smmu_domain, domain);
105 }
106 
107 static struct platform_driver arm_smmu_driver;
108 static struct iommu_ops arm_smmu_ops;
109 
110 #ifdef CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS
111 static struct device_node *dev_get_dev_node(struct device *dev)
112 {
113 	if (dev_is_pci(dev)) {
114 		struct pci_bus *bus = to_pci_dev(dev)->bus;
115 
116 		while (!pci_is_root_bus(bus))
117 			bus = bus->parent;
118 		return of_node_get(bus->bridge->parent->of_node);
119 	}
120 
121 	return of_node_get(dev->of_node);
122 }
123 
124 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
125 {
126 	*((__be32 *)data) = cpu_to_be32(alias);
127 	return 0; /* Continue walking */
128 }
129 
130 static int __find_legacy_master_phandle(struct device *dev, void *data)
131 {
132 	struct of_phandle_iterator *it = *(void **)data;
133 	struct device_node *np = it->node;
134 	int err;
135 
136 	of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
137 			    "#stream-id-cells", -1)
138 		if (it->node == np) {
139 			*(void **)data = dev;
140 			return 1;
141 		}
142 	it->node = np;
143 	return err == -ENOENT ? 0 : err;
144 }
145 
146 static int arm_smmu_register_legacy_master(struct device *dev,
147 					   struct arm_smmu_device **smmu)
148 {
149 	struct device *smmu_dev;
150 	struct device_node *np;
151 	struct of_phandle_iterator it;
152 	void *data = &it;
153 	u32 *sids;
154 	__be32 pci_sid;
155 	int err;
156 
157 	np = dev_get_dev_node(dev);
158 	if (!np || !of_property_present(np, "#stream-id-cells")) {
159 		of_node_put(np);
160 		return -ENODEV;
161 	}
162 
163 	it.node = np;
164 	err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
165 				     __find_legacy_master_phandle);
166 	smmu_dev = data;
167 	of_node_put(np);
168 	if (err == 0)
169 		return -ENODEV;
170 	if (err < 0)
171 		return err;
172 
173 	if (dev_is_pci(dev)) {
174 		/* "mmu-masters" assumes Stream ID == Requester ID */
175 		pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
176 				       &pci_sid);
177 		it.cur = &pci_sid;
178 		it.cur_count = 1;
179 	}
180 
181 	err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
182 				&arm_smmu_ops);
183 	if (err)
184 		return err;
185 
186 	sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
187 	if (!sids)
188 		return -ENOMEM;
189 
190 	*smmu = dev_get_drvdata(smmu_dev);
191 	of_phandle_iterator_args(&it, sids, it.cur_count);
192 	err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
193 	kfree(sids);
194 	return err;
195 }
196 #else
197 static int arm_smmu_register_legacy_master(struct device *dev,
198 					   struct arm_smmu_device **smmu)
199 {
200 	return -ENODEV;
201 }
202 #endif /* CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS */
203 
204 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
205 {
206 	clear_bit(idx, map);
207 }
208 
209 /* Wait for any pending TLB invalidations to complete */
210 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
211 				int sync, int status)
212 {
213 	unsigned int spin_cnt, delay;
214 	u32 reg;
215 
216 	if (smmu->impl && unlikely(smmu->impl->tlb_sync))
217 		return smmu->impl->tlb_sync(smmu, page, sync, status);
218 
219 	arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
220 	for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
221 		for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
222 			reg = arm_smmu_readl(smmu, page, status);
223 			if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
224 				return;
225 			cpu_relax();
226 		}
227 		udelay(delay);
228 	}
229 	dev_err_ratelimited(smmu->dev,
230 			    "TLB sync timed out -- SMMU may be deadlocked\n");
231 }
232 
233 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
234 {
235 	unsigned long flags;
236 
237 	spin_lock_irqsave(&smmu->global_sync_lock, flags);
238 	__arm_smmu_tlb_sync(smmu, ARM_SMMU_GR0, ARM_SMMU_GR0_sTLBGSYNC,
239 			    ARM_SMMU_GR0_sTLBGSTATUS);
240 	spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
241 }
242 
243 static void arm_smmu_tlb_sync_context(struct arm_smmu_domain *smmu_domain)
244 {
245 	struct arm_smmu_device *smmu = smmu_domain->smmu;
246 	unsigned long flags;
247 
248 	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
249 	__arm_smmu_tlb_sync(smmu, ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx),
250 			    ARM_SMMU_CB_TLBSYNC, ARM_SMMU_CB_TLBSTATUS);
251 	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
252 }
253 
254 static void arm_smmu_tlb_inv_context_s1(void *cookie)
255 {
256 	struct arm_smmu_domain *smmu_domain = cookie;
257 	/*
258 	 * The TLBI write may be relaxed, so ensure that PTEs cleared by the
259 	 * current CPU are visible beforehand.
260 	 */
261 	wmb();
262 	arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx,
263 			  ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid);
264 	arm_smmu_tlb_sync_context(smmu_domain);
265 }
266 
267 static void arm_smmu_tlb_inv_context_s2(void *cookie)
268 {
269 	struct arm_smmu_domain *smmu_domain = cookie;
270 	struct arm_smmu_device *smmu = smmu_domain->smmu;
271 
272 	/* See above */
273 	wmb();
274 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
275 	arm_smmu_tlb_sync_global(smmu);
276 }
277 
278 static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
279 				      size_t granule, void *cookie, int reg)
280 {
281 	struct arm_smmu_domain *smmu_domain = cookie;
282 	struct arm_smmu_device *smmu = smmu_domain->smmu;
283 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
284 	int idx = cfg->cbndx;
285 
286 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
287 		wmb();
288 
289 	if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
290 		iova = (iova >> 12) << 12;
291 		iova |= cfg->asid;
292 		do {
293 			arm_smmu_cb_write(smmu, idx, reg, iova);
294 			iova += granule;
295 		} while (size -= granule);
296 	} else {
297 		iova >>= 12;
298 		iova |= (u64)cfg->asid << 48;
299 		do {
300 			arm_smmu_cb_writeq(smmu, idx, reg, iova);
301 			iova += granule >> 12;
302 		} while (size -= granule);
303 	}
304 }
305 
306 static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
307 				      size_t granule, void *cookie, int reg)
308 {
309 	struct arm_smmu_domain *smmu_domain = cookie;
310 	struct arm_smmu_device *smmu = smmu_domain->smmu;
311 	int idx = smmu_domain->cfg.cbndx;
312 
313 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
314 		wmb();
315 
316 	iova >>= 12;
317 	do {
318 		if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)
319 			arm_smmu_cb_writeq(smmu, idx, reg, iova);
320 		else
321 			arm_smmu_cb_write(smmu, idx, reg, iova);
322 		iova += granule >> 12;
323 	} while (size -= granule);
324 }
325 
326 static void arm_smmu_tlb_inv_walk_s1(unsigned long iova, size_t size,
327 				     size_t granule, void *cookie)
328 {
329 	struct arm_smmu_domain *smmu_domain = cookie;
330 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
331 
332 	if (cfg->flush_walk_prefer_tlbiasid) {
333 		arm_smmu_tlb_inv_context_s1(cookie);
334 	} else {
335 		arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
336 					  ARM_SMMU_CB_S1_TLBIVA);
337 		arm_smmu_tlb_sync_context(cookie);
338 	}
339 }
340 
341 static void arm_smmu_tlb_add_page_s1(struct iommu_iotlb_gather *gather,
342 				     unsigned long iova, size_t granule,
343 				     void *cookie)
344 {
345 	arm_smmu_tlb_inv_range_s1(iova, granule, granule, cookie,
346 				  ARM_SMMU_CB_S1_TLBIVAL);
347 }
348 
349 static void arm_smmu_tlb_inv_walk_s2(unsigned long iova, size_t size,
350 				     size_t granule, void *cookie)
351 {
352 	arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
353 				  ARM_SMMU_CB_S2_TLBIIPAS2);
354 	arm_smmu_tlb_sync_context(cookie);
355 }
356 
357 static void arm_smmu_tlb_add_page_s2(struct iommu_iotlb_gather *gather,
358 				     unsigned long iova, size_t granule,
359 				     void *cookie)
360 {
361 	arm_smmu_tlb_inv_range_s2(iova, granule, granule, cookie,
362 				  ARM_SMMU_CB_S2_TLBIIPAS2L);
363 }
364 
365 static void arm_smmu_tlb_inv_walk_s2_v1(unsigned long iova, size_t size,
366 					size_t granule, void *cookie)
367 {
368 	arm_smmu_tlb_inv_context_s2(cookie);
369 }
370 /*
371  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
372  * almost negligible, but the benefit of getting the first one in as far ahead
373  * of the sync as possible is significant, hence we don't just make this a
374  * no-op and call arm_smmu_tlb_inv_context_s2() from .iotlb_sync as you might
375  * think.
376  */
377 static void arm_smmu_tlb_add_page_s2_v1(struct iommu_iotlb_gather *gather,
378 					unsigned long iova, size_t granule,
379 					void *cookie)
380 {
381 	struct arm_smmu_domain *smmu_domain = cookie;
382 	struct arm_smmu_device *smmu = smmu_domain->smmu;
383 
384 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
385 		wmb();
386 
387 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
388 }
389 
390 static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = {
391 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s1,
392 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s1,
393 	.tlb_add_page	= arm_smmu_tlb_add_page_s1,
394 };
395 
396 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
397 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
398 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s2,
399 	.tlb_add_page	= arm_smmu_tlb_add_page_s2,
400 };
401 
402 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
403 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
404 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s2_v1,
405 	.tlb_add_page	= arm_smmu_tlb_add_page_s2_v1,
406 };
407 
408 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
409 {
410 	u32 fsr, fsynr, cbfrsynra;
411 	unsigned long iova;
412 	struct arm_smmu_domain *smmu_domain = dev;
413 	struct arm_smmu_device *smmu = smmu_domain->smmu;
414 	int idx = smmu_domain->cfg.cbndx;
415 	int ret;
416 
417 	fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
418 	if (!(fsr & ARM_SMMU_FSR_FAULT))
419 		return IRQ_NONE;
420 
421 	fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
422 	iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
423 	cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
424 
425 	ret = report_iommu_fault(&smmu_domain->domain, NULL, iova,
426 		fsynr & ARM_SMMU_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
427 
428 	if (ret == -ENOSYS)
429 		dev_err_ratelimited(smmu->dev,
430 		"Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
431 			    fsr, iova, fsynr, cbfrsynra, idx);
432 
433 	arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
434 	return IRQ_HANDLED;
435 }
436 
437 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
438 {
439 	u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
440 	struct arm_smmu_device *smmu = dev;
441 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
442 				      DEFAULT_RATELIMIT_BURST);
443 
444 	gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
445 	gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
446 	gfsynr1 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR1);
447 	gfsynr2 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR2);
448 
449 	if (!gfsr)
450 		return IRQ_NONE;
451 
452 	if (__ratelimit(&rs)) {
453 		if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) &&
454 		    (gfsr & ARM_SMMU_sGFSR_USF))
455 			dev_err(smmu->dev,
456 				"Blocked unknown Stream ID 0x%hx; boot with \"arm-smmu.disable_bypass=0\" to allow, but this may have security implications\n",
457 				(u16)gfsynr1);
458 		else
459 			dev_err(smmu->dev,
460 				"Unexpected global fault, this could be serious\n");
461 		dev_err(smmu->dev,
462 			"\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
463 			gfsr, gfsynr0, gfsynr1, gfsynr2);
464 	}
465 
466 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
467 	return IRQ_HANDLED;
468 }
469 
470 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
471 				       struct io_pgtable_cfg *pgtbl_cfg)
472 {
473 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
474 	struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
475 	bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
476 
477 	cb->cfg = cfg;
478 
479 	/* TCR */
480 	if (stage1) {
481 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
482 			cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
483 		} else {
484 			cb->tcr[0] = arm_smmu_lpae_tcr(pgtbl_cfg);
485 			cb->tcr[1] = arm_smmu_lpae_tcr2(pgtbl_cfg);
486 			if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
487 				cb->tcr[1] |= ARM_SMMU_TCR2_AS;
488 			else
489 				cb->tcr[0] |= ARM_SMMU_TCR_EAE;
490 		}
491 	} else {
492 		cb->tcr[0] = arm_smmu_lpae_vtcr(pgtbl_cfg);
493 	}
494 
495 	/* TTBRs */
496 	if (stage1) {
497 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
498 			cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr;
499 			cb->ttbr[1] = 0;
500 		} else {
501 			cb->ttbr[0] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
502 						 cfg->asid);
503 			cb->ttbr[1] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
504 						 cfg->asid);
505 
506 			if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
507 				cb->ttbr[1] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
508 			else
509 				cb->ttbr[0] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
510 		}
511 	} else {
512 		cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
513 	}
514 
515 	/* MAIRs (stage-1 only) */
516 	if (stage1) {
517 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
518 			cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
519 			cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
520 		} else {
521 			cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair;
522 			cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair >> 32;
523 		}
524 	}
525 }
526 
527 void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
528 {
529 	u32 reg;
530 	bool stage1;
531 	struct arm_smmu_cb *cb = &smmu->cbs[idx];
532 	struct arm_smmu_cfg *cfg = cb->cfg;
533 
534 	/* Unassigned context banks only need disabling */
535 	if (!cfg) {
536 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, 0);
537 		return;
538 	}
539 
540 	stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
541 
542 	/* CBA2R */
543 	if (smmu->version > ARM_SMMU_V1) {
544 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
545 			reg = ARM_SMMU_CBA2R_VA64;
546 		else
547 			reg = 0;
548 		/* 16-bit VMIDs live in CBA2R */
549 		if (smmu->features & ARM_SMMU_FEAT_VMID16)
550 			reg |= FIELD_PREP(ARM_SMMU_CBA2R_VMID16, cfg->vmid);
551 
552 		arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBA2R(idx), reg);
553 	}
554 
555 	/* CBAR */
556 	reg = FIELD_PREP(ARM_SMMU_CBAR_TYPE, cfg->cbar);
557 	if (smmu->version < ARM_SMMU_V2)
558 		reg |= FIELD_PREP(ARM_SMMU_CBAR_IRPTNDX, cfg->irptndx);
559 
560 	/*
561 	 * Use the weakest shareability/memory types, so they are
562 	 * overridden by the ttbcr/pte.
563 	 */
564 	if (stage1) {
565 		reg |= FIELD_PREP(ARM_SMMU_CBAR_S1_BPSHCFG,
566 				  ARM_SMMU_CBAR_S1_BPSHCFG_NSH) |
567 		       FIELD_PREP(ARM_SMMU_CBAR_S1_MEMATTR,
568 				  ARM_SMMU_CBAR_S1_MEMATTR_WB);
569 	} else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
570 		/* 8-bit VMIDs live in CBAR */
571 		reg |= FIELD_PREP(ARM_SMMU_CBAR_VMID, cfg->vmid);
572 	}
573 	arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(idx), reg);
574 
575 	/*
576 	 * TCR
577 	 * We must write this before the TTBRs, since it determines the
578 	 * access behaviour of some fields (in particular, ASID[15:8]).
579 	 */
580 	if (stage1 && smmu->version > ARM_SMMU_V1)
581 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR2, cb->tcr[1]);
582 	arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR, cb->tcr[0]);
583 
584 	/* TTBRs */
585 	if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
586 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_CONTEXTIDR, cfg->asid);
587 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
588 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR1, cb->ttbr[1]);
589 	} else {
590 		arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
591 		if (stage1)
592 			arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR1,
593 					   cb->ttbr[1]);
594 	}
595 
596 	/* MAIRs (stage-1 only) */
597 	if (stage1) {
598 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR0, cb->mair[0]);
599 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR1, cb->mair[1]);
600 	}
601 
602 	/* SCTLR */
603 	reg = ARM_SMMU_SCTLR_CFIE | ARM_SMMU_SCTLR_CFRE | ARM_SMMU_SCTLR_AFE |
604 	      ARM_SMMU_SCTLR_TRE | ARM_SMMU_SCTLR_M;
605 	if (stage1)
606 		reg |= ARM_SMMU_SCTLR_S1_ASIDPNE;
607 	if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
608 		reg |= ARM_SMMU_SCTLR_E;
609 
610 	if (smmu->impl && smmu->impl->write_sctlr)
611 		smmu->impl->write_sctlr(smmu, idx, reg);
612 	else
613 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg);
614 }
615 
616 static int arm_smmu_alloc_context_bank(struct arm_smmu_domain *smmu_domain,
617 				       struct arm_smmu_device *smmu,
618 				       struct device *dev, unsigned int start)
619 {
620 	if (smmu->impl && smmu->impl->alloc_context_bank)
621 		return smmu->impl->alloc_context_bank(smmu_domain, smmu, dev, start);
622 
623 	return __arm_smmu_alloc_bitmap(smmu->context_map, start, smmu->num_context_banks);
624 }
625 
626 static int arm_smmu_init_domain_context(struct arm_smmu_domain *smmu_domain,
627 					struct arm_smmu_device *smmu,
628 					struct device *dev)
629 {
630 	int irq, start, ret = 0;
631 	unsigned long ias, oas;
632 	struct io_pgtable_ops *pgtbl_ops;
633 	struct io_pgtable_cfg pgtbl_cfg;
634 	enum io_pgtable_fmt fmt;
635 	struct iommu_domain *domain = &smmu_domain->domain;
636 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
637 	irqreturn_t (*context_fault)(int irq, void *dev);
638 
639 	mutex_lock(&smmu_domain->init_mutex);
640 	if (smmu_domain->smmu)
641 		goto out_unlock;
642 
643 	/*
644 	 * Mapping the requested stage onto what we support is surprisingly
645 	 * complicated, mainly because the spec allows S1+S2 SMMUs without
646 	 * support for nested translation. That means we end up with the
647 	 * following table:
648 	 *
649 	 * Requested        Supported        Actual
650 	 *     S1               N              S1
651 	 *     S1             S1+S2            S1
652 	 *     S1               S2             S2
653 	 *     S1               S1             S1
654 	 *     N                N              N
655 	 *     N              S1+S2            S2
656 	 *     N                S2             S2
657 	 *     N                S1             S1
658 	 *
659 	 * Note that you can't actually request stage-2 mappings.
660 	 */
661 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
662 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
663 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
664 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
665 
666 	/*
667 	 * Choosing a suitable context format is even more fiddly. Until we
668 	 * grow some way for the caller to express a preference, and/or move
669 	 * the decision into the io-pgtable code where it arguably belongs,
670 	 * just aim for the closest thing to the rest of the system, and hope
671 	 * that the hardware isn't esoteric enough that we can't assume AArch64
672 	 * support to be a superset of AArch32 support...
673 	 */
674 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
675 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
676 	if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
677 	    !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
678 	    (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
679 	    (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
680 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
681 	if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
682 	    (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
683 			       ARM_SMMU_FEAT_FMT_AARCH64_16K |
684 			       ARM_SMMU_FEAT_FMT_AARCH64_4K)))
685 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
686 
687 	if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
688 		ret = -EINVAL;
689 		goto out_unlock;
690 	}
691 
692 	switch (smmu_domain->stage) {
693 	case ARM_SMMU_DOMAIN_S1:
694 		cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
695 		start = smmu->num_s2_context_banks;
696 		ias = smmu->va_size;
697 		oas = smmu->ipa_size;
698 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
699 			fmt = ARM_64_LPAE_S1;
700 		} else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
701 			fmt = ARM_32_LPAE_S1;
702 			ias = min(ias, 32UL);
703 			oas = min(oas, 40UL);
704 		} else {
705 			fmt = ARM_V7S;
706 			ias = min(ias, 32UL);
707 			oas = min(oas, 32UL);
708 		}
709 		smmu_domain->flush_ops = &arm_smmu_s1_tlb_ops;
710 		break;
711 	case ARM_SMMU_DOMAIN_NESTED:
712 		/*
713 		 * We will likely want to change this if/when KVM gets
714 		 * involved.
715 		 */
716 	case ARM_SMMU_DOMAIN_S2:
717 		cfg->cbar = CBAR_TYPE_S2_TRANS;
718 		start = 0;
719 		ias = smmu->ipa_size;
720 		oas = smmu->pa_size;
721 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
722 			fmt = ARM_64_LPAE_S2;
723 		} else {
724 			fmt = ARM_32_LPAE_S2;
725 			ias = min(ias, 40UL);
726 			oas = min(oas, 40UL);
727 		}
728 		if (smmu->version == ARM_SMMU_V2)
729 			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v2;
730 		else
731 			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v1;
732 		break;
733 	default:
734 		ret = -EINVAL;
735 		goto out_unlock;
736 	}
737 
738 	ret = arm_smmu_alloc_context_bank(smmu_domain, smmu, dev, start);
739 	if (ret < 0) {
740 		goto out_unlock;
741 	}
742 
743 	smmu_domain->smmu = smmu;
744 
745 	cfg->cbndx = ret;
746 	if (smmu->version < ARM_SMMU_V2) {
747 		cfg->irptndx = atomic_inc_return(&smmu->irptndx);
748 		cfg->irptndx %= smmu->num_context_irqs;
749 	} else {
750 		cfg->irptndx = cfg->cbndx;
751 	}
752 
753 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
754 		cfg->vmid = cfg->cbndx + 1;
755 	else
756 		cfg->asid = cfg->cbndx;
757 
758 	pgtbl_cfg = (struct io_pgtable_cfg) {
759 		.pgsize_bitmap	= smmu->pgsize_bitmap,
760 		.ias		= ias,
761 		.oas		= oas,
762 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
763 		.tlb		= smmu_domain->flush_ops,
764 		.iommu_dev	= smmu->dev,
765 	};
766 
767 	if (smmu->impl && smmu->impl->init_context) {
768 		ret = smmu->impl->init_context(smmu_domain, &pgtbl_cfg, dev);
769 		if (ret)
770 			goto out_clear_smmu;
771 	}
772 
773 	if (smmu_domain->pgtbl_quirks)
774 		pgtbl_cfg.quirks |= smmu_domain->pgtbl_quirks;
775 
776 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
777 	if (!pgtbl_ops) {
778 		ret = -ENOMEM;
779 		goto out_clear_smmu;
780 	}
781 
782 	/* Update the domain's page sizes to reflect the page table format */
783 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
784 
785 	if (pgtbl_cfg.quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) {
786 		domain->geometry.aperture_start = ~0UL << ias;
787 		domain->geometry.aperture_end = ~0UL;
788 	} else {
789 		domain->geometry.aperture_end = (1UL << ias) - 1;
790 	}
791 
792 	domain->geometry.force_aperture = true;
793 
794 	/* Initialise the context bank with our page table cfg */
795 	arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
796 	arm_smmu_write_context_bank(smmu, cfg->cbndx);
797 
798 	/*
799 	 * Request context fault interrupt. Do this last to avoid the
800 	 * handler seeing a half-initialised domain state.
801 	 */
802 	irq = smmu->irqs[cfg->irptndx];
803 
804 	if (smmu->impl && smmu->impl->context_fault)
805 		context_fault = smmu->impl->context_fault;
806 	else
807 		context_fault = arm_smmu_context_fault;
808 
809 	if (smmu->impl && smmu->impl->context_fault_needs_threaded_irq)
810 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
811 						context_fault,
812 						IRQF_ONESHOT | IRQF_SHARED,
813 						"arm-smmu-context-fault",
814 						smmu_domain);
815 	else
816 		ret = devm_request_irq(smmu->dev, irq, context_fault, IRQF_SHARED,
817 				       "arm-smmu-context-fault", smmu_domain);
818 
819 	if (ret < 0) {
820 		dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
821 			cfg->irptndx, irq);
822 		cfg->irptndx = ARM_SMMU_INVALID_IRPTNDX;
823 	}
824 
825 	mutex_unlock(&smmu_domain->init_mutex);
826 
827 	/* Publish page table ops for map/unmap */
828 	smmu_domain->pgtbl_ops = pgtbl_ops;
829 	return 0;
830 
831 out_clear_smmu:
832 	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
833 	smmu_domain->smmu = NULL;
834 out_unlock:
835 	mutex_unlock(&smmu_domain->init_mutex);
836 	return ret;
837 }
838 
839 static void arm_smmu_destroy_domain_context(struct arm_smmu_domain *smmu_domain)
840 {
841 	struct arm_smmu_device *smmu = smmu_domain->smmu;
842 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
843 	int ret, irq;
844 
845 	if (!smmu)
846 		return;
847 
848 	ret = arm_smmu_rpm_get(smmu);
849 	if (ret < 0)
850 		return;
851 
852 	/*
853 	 * Disable the context bank and free the page tables before freeing
854 	 * it.
855 	 */
856 	smmu->cbs[cfg->cbndx].cfg = NULL;
857 	arm_smmu_write_context_bank(smmu, cfg->cbndx);
858 
859 	if (cfg->irptndx != ARM_SMMU_INVALID_IRPTNDX) {
860 		irq = smmu->irqs[cfg->irptndx];
861 		devm_free_irq(smmu->dev, irq, smmu_domain);
862 	}
863 
864 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
865 	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
866 
867 	arm_smmu_rpm_put(smmu);
868 }
869 
870 static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev)
871 {
872 	struct arm_smmu_domain *smmu_domain;
873 
874 	/*
875 	 * Allocate the domain and initialise some of its data structures.
876 	 * We can't really do anything meaningful until we've added a
877 	 * master.
878 	 */
879 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
880 	if (!smmu_domain)
881 		return NULL;
882 
883 	mutex_init(&smmu_domain->init_mutex);
884 	spin_lock_init(&smmu_domain->cb_lock);
885 
886 	return &smmu_domain->domain;
887 }
888 
889 static void arm_smmu_domain_free(struct iommu_domain *domain)
890 {
891 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
892 
893 	/*
894 	 * Free the domain resources. We assume that all devices have
895 	 * already been detached.
896 	 */
897 	arm_smmu_destroy_domain_context(smmu_domain);
898 	kfree(smmu_domain);
899 }
900 
901 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
902 {
903 	struct arm_smmu_smr *smr = smmu->smrs + idx;
904 	u32 reg = FIELD_PREP(ARM_SMMU_SMR_ID, smr->id) |
905 		  FIELD_PREP(ARM_SMMU_SMR_MASK, smr->mask);
906 
907 	if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
908 		reg |= ARM_SMMU_SMR_VALID;
909 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(idx), reg);
910 }
911 
912 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
913 {
914 	struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
915 	u32 reg;
916 
917 	if (smmu->impl && smmu->impl->write_s2cr) {
918 		smmu->impl->write_s2cr(smmu, idx);
919 		return;
920 	}
921 
922 	reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, s2cr->type) |
923 	      FIELD_PREP(ARM_SMMU_S2CR_CBNDX, s2cr->cbndx) |
924 	      FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, s2cr->privcfg);
925 
926 	if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
927 	    smmu->smrs[idx].valid)
928 		reg |= ARM_SMMU_S2CR_EXIDVALID;
929 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg);
930 }
931 
932 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
933 {
934 	arm_smmu_write_s2cr(smmu, idx);
935 	if (smmu->smrs)
936 		arm_smmu_write_smr(smmu, idx);
937 }
938 
939 /*
940  * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
941  * should be called after sCR0 is written.
942  */
943 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
944 {
945 	u32 smr;
946 	int i;
947 
948 	if (!smmu->smrs)
949 		return;
950 	/*
951 	 * If we've had to accommodate firmware memory regions, we may
952 	 * have live SMRs by now; tread carefully...
953 	 *
954 	 * Somewhat perversely, not having a free SMR for this test implies we
955 	 * can get away without it anyway, as we'll only be able to 'allocate'
956 	 * these SMRs for the ID/mask values we're already trusting to be OK.
957 	 */
958 	for (i = 0; i < smmu->num_mapping_groups; i++)
959 		if (!smmu->smrs[i].valid)
960 			goto smr_ok;
961 	return;
962 smr_ok:
963 	/*
964 	 * SMR.ID bits may not be preserved if the corresponding MASK
965 	 * bits are set, so check each one separately. We can reject
966 	 * masters later if they try to claim IDs outside these masks.
967 	 */
968 	smr = FIELD_PREP(ARM_SMMU_SMR_ID, smmu->streamid_mask);
969 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
970 	smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
971 	smmu->streamid_mask = FIELD_GET(ARM_SMMU_SMR_ID, smr);
972 
973 	smr = FIELD_PREP(ARM_SMMU_SMR_MASK, smmu->streamid_mask);
974 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
975 	smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
976 	smmu->smr_mask_mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr);
977 }
978 
979 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
980 {
981 	struct arm_smmu_smr *smrs = smmu->smrs;
982 	int i, free_idx = -ENOSPC;
983 
984 	/* Stream indexing is blissfully easy */
985 	if (!smrs)
986 		return id;
987 
988 	/* Validating SMRs is... less so */
989 	for (i = 0; i < smmu->num_mapping_groups; ++i) {
990 		if (!smrs[i].valid) {
991 			/*
992 			 * Note the first free entry we come across, which
993 			 * we'll claim in the end if nothing else matches.
994 			 */
995 			if (free_idx < 0)
996 				free_idx = i;
997 			continue;
998 		}
999 		/*
1000 		 * If the new entry is _entirely_ matched by an existing entry,
1001 		 * then reuse that, with the guarantee that there also cannot
1002 		 * be any subsequent conflicting entries. In normal use we'd
1003 		 * expect simply identical entries for this case, but there's
1004 		 * no harm in accommodating the generalisation.
1005 		 */
1006 		if ((mask & smrs[i].mask) == mask &&
1007 		    !((id ^ smrs[i].id) & ~smrs[i].mask))
1008 			return i;
1009 		/*
1010 		 * If the new entry has any other overlap with an existing one,
1011 		 * though, then there always exists at least one stream ID
1012 		 * which would cause a conflict, and we can't allow that risk.
1013 		 */
1014 		if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1015 			return -EINVAL;
1016 	}
1017 
1018 	return free_idx;
1019 }
1020 
1021 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1022 {
1023 	if (--smmu->s2crs[idx].count)
1024 		return false;
1025 
1026 	smmu->s2crs[idx] = s2cr_init_val;
1027 	if (smmu->smrs)
1028 		smmu->smrs[idx].valid = false;
1029 
1030 	return true;
1031 }
1032 
1033 static int arm_smmu_master_alloc_smes(struct device *dev)
1034 {
1035 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1036 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1037 	struct arm_smmu_device *smmu = cfg->smmu;
1038 	struct arm_smmu_smr *smrs = smmu->smrs;
1039 	int i, idx, ret;
1040 
1041 	mutex_lock(&smmu->stream_map_mutex);
1042 	/* Figure out a viable stream map entry allocation */
1043 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1044 		u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1045 		u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1046 
1047 		if (idx != INVALID_SMENDX) {
1048 			ret = -EEXIST;
1049 			goto out_err;
1050 		}
1051 
1052 		ret = arm_smmu_find_sme(smmu, sid, mask);
1053 		if (ret < 0)
1054 			goto out_err;
1055 
1056 		idx = ret;
1057 		if (smrs && smmu->s2crs[idx].count == 0) {
1058 			smrs[idx].id = sid;
1059 			smrs[idx].mask = mask;
1060 			smrs[idx].valid = true;
1061 		}
1062 		smmu->s2crs[idx].count++;
1063 		cfg->smendx[i] = (s16)idx;
1064 	}
1065 
1066 	/* It worked! Now, poke the actual hardware */
1067 	for_each_cfg_sme(cfg, fwspec, i, idx)
1068 		arm_smmu_write_sme(smmu, idx);
1069 
1070 	mutex_unlock(&smmu->stream_map_mutex);
1071 	return 0;
1072 
1073 out_err:
1074 	while (i--) {
1075 		arm_smmu_free_sme(smmu, cfg->smendx[i]);
1076 		cfg->smendx[i] = INVALID_SMENDX;
1077 	}
1078 	mutex_unlock(&smmu->stream_map_mutex);
1079 	return ret;
1080 }
1081 
1082 static void arm_smmu_master_free_smes(struct arm_smmu_master_cfg *cfg,
1083 				      struct iommu_fwspec *fwspec)
1084 {
1085 	struct arm_smmu_device *smmu = cfg->smmu;
1086 	int i, idx;
1087 
1088 	mutex_lock(&smmu->stream_map_mutex);
1089 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1090 		if (arm_smmu_free_sme(smmu, idx))
1091 			arm_smmu_write_sme(smmu, idx);
1092 		cfg->smendx[i] = INVALID_SMENDX;
1093 	}
1094 	mutex_unlock(&smmu->stream_map_mutex);
1095 }
1096 
1097 static void arm_smmu_master_install_s2crs(struct arm_smmu_master_cfg *cfg,
1098 					  enum arm_smmu_s2cr_type type,
1099 					  u8 cbndx, struct iommu_fwspec *fwspec)
1100 {
1101 	struct arm_smmu_device *smmu = cfg->smmu;
1102 	struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1103 	int i, idx;
1104 
1105 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1106 		if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1107 			continue;
1108 
1109 		s2cr[idx].type = type;
1110 		s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1111 		s2cr[idx].cbndx = cbndx;
1112 		arm_smmu_write_s2cr(smmu, idx);
1113 	}
1114 }
1115 
1116 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1117 {
1118 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1119 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1120 	struct arm_smmu_master_cfg *cfg;
1121 	struct arm_smmu_device *smmu;
1122 	int ret;
1123 
1124 	/*
1125 	 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1126 	 * domains between of_xlate() and probe_device() - we have no way to cope
1127 	 * with that, so until ARM gets converted to rely on groups and default
1128 	 * domains, just say no (but more politely than by dereferencing NULL).
1129 	 * This should be at least a WARN_ON once that's sorted.
1130 	 */
1131 	cfg = dev_iommu_priv_get(dev);
1132 	if (!cfg)
1133 		return -ENODEV;
1134 
1135 	smmu = cfg->smmu;
1136 
1137 	ret = arm_smmu_rpm_get(smmu);
1138 	if (ret < 0)
1139 		return ret;
1140 
1141 	/* Ensure that the domain is finalised */
1142 	ret = arm_smmu_init_domain_context(smmu_domain, smmu, dev);
1143 	if (ret < 0)
1144 		goto rpm_put;
1145 
1146 	/*
1147 	 * Sanity check the domain. We don't support domains across
1148 	 * different SMMUs.
1149 	 */
1150 	if (smmu_domain->smmu != smmu) {
1151 		ret = -EINVAL;
1152 		goto rpm_put;
1153 	}
1154 
1155 	/* Looks ok, so add the device to the domain */
1156 	arm_smmu_master_install_s2crs(cfg, S2CR_TYPE_TRANS,
1157 				      smmu_domain->cfg.cbndx, fwspec);
1158 	arm_smmu_rpm_use_autosuspend(smmu);
1159 rpm_put:
1160 	arm_smmu_rpm_put(smmu);
1161 	return ret;
1162 }
1163 
1164 static int arm_smmu_attach_dev_type(struct device *dev,
1165 				    enum arm_smmu_s2cr_type type)
1166 {
1167 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1168 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1169 	struct arm_smmu_device *smmu;
1170 	int ret;
1171 
1172 	if (!cfg)
1173 		return -ENODEV;
1174 	smmu = cfg->smmu;
1175 
1176 	ret = arm_smmu_rpm_get(smmu);
1177 	if (ret < 0)
1178 		return ret;
1179 
1180 	arm_smmu_master_install_s2crs(cfg, type, 0, fwspec);
1181 	arm_smmu_rpm_use_autosuspend(smmu);
1182 	arm_smmu_rpm_put(smmu);
1183 	return 0;
1184 }
1185 
1186 static int arm_smmu_attach_dev_identity(struct iommu_domain *domain,
1187 					struct device *dev)
1188 {
1189 	return arm_smmu_attach_dev_type(dev, S2CR_TYPE_BYPASS);
1190 }
1191 
1192 static const struct iommu_domain_ops arm_smmu_identity_ops = {
1193 	.attach_dev = arm_smmu_attach_dev_identity,
1194 };
1195 
1196 static struct iommu_domain arm_smmu_identity_domain = {
1197 	.type = IOMMU_DOMAIN_IDENTITY,
1198 	.ops = &arm_smmu_identity_ops,
1199 };
1200 
1201 static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain,
1202 				       struct device *dev)
1203 {
1204 	return arm_smmu_attach_dev_type(dev, S2CR_TYPE_FAULT);
1205 }
1206 
1207 static const struct iommu_domain_ops arm_smmu_blocked_ops = {
1208 	.attach_dev = arm_smmu_attach_dev_blocked,
1209 };
1210 
1211 static struct iommu_domain arm_smmu_blocked_domain = {
1212 	.type = IOMMU_DOMAIN_BLOCKED,
1213 	.ops = &arm_smmu_blocked_ops,
1214 };
1215 
1216 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
1217 			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
1218 			      int prot, gfp_t gfp, size_t *mapped)
1219 {
1220 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1221 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1222 	int ret;
1223 
1224 	if (!ops)
1225 		return -ENODEV;
1226 
1227 	arm_smmu_rpm_get(smmu);
1228 	ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
1229 	arm_smmu_rpm_put(smmu);
1230 
1231 	return ret;
1232 }
1233 
1234 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
1235 				   size_t pgsize, size_t pgcount,
1236 				   struct iommu_iotlb_gather *iotlb_gather)
1237 {
1238 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1239 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1240 	size_t ret;
1241 
1242 	if (!ops)
1243 		return 0;
1244 
1245 	arm_smmu_rpm_get(smmu);
1246 	ret = ops->unmap_pages(ops, iova, pgsize, pgcount, iotlb_gather);
1247 	arm_smmu_rpm_put(smmu);
1248 
1249 	return ret;
1250 }
1251 
1252 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1253 {
1254 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1255 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1256 
1257 	if (smmu_domain->flush_ops) {
1258 		arm_smmu_rpm_get(smmu);
1259 		smmu_domain->flush_ops->tlb_flush_all(smmu_domain);
1260 		arm_smmu_rpm_put(smmu);
1261 	}
1262 }
1263 
1264 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
1265 				struct iommu_iotlb_gather *gather)
1266 {
1267 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1268 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1269 
1270 	if (!smmu)
1271 		return;
1272 
1273 	arm_smmu_rpm_get(smmu);
1274 	if (smmu->version == ARM_SMMU_V2 ||
1275 	    smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1276 		arm_smmu_tlb_sync_context(smmu_domain);
1277 	else
1278 		arm_smmu_tlb_sync_global(smmu);
1279 	arm_smmu_rpm_put(smmu);
1280 }
1281 
1282 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1283 					      dma_addr_t iova)
1284 {
1285 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1286 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1287 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1288 	struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1289 	struct device *dev = smmu->dev;
1290 	void __iomem *reg;
1291 	u32 tmp;
1292 	u64 phys;
1293 	unsigned long va, flags;
1294 	int ret, idx = cfg->cbndx;
1295 	phys_addr_t addr = 0;
1296 
1297 	ret = arm_smmu_rpm_get(smmu);
1298 	if (ret < 0)
1299 		return 0;
1300 
1301 	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1302 	va = iova & ~0xfffUL;
1303 	if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
1304 		arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1305 	else
1306 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1307 
1308 	reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR;
1309 	if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ARM_SMMU_ATSR_ACTIVE),
1310 				      5, 50)) {
1311 		spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1312 		dev_err(dev,
1313 			"iova to phys timed out on %pad. Falling back to software table walk.\n",
1314 			&iova);
1315 		arm_smmu_rpm_put(smmu);
1316 		return ops->iova_to_phys(ops, iova);
1317 	}
1318 
1319 	phys = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_PAR);
1320 	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1321 	if (phys & ARM_SMMU_CB_PAR_F) {
1322 		dev_err(dev, "translation fault!\n");
1323 		dev_err(dev, "PAR = 0x%llx\n", phys);
1324 		goto out;
1325 	}
1326 
1327 	addr = (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1328 out:
1329 	arm_smmu_rpm_put(smmu);
1330 
1331 	return addr;
1332 }
1333 
1334 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1335 					dma_addr_t iova)
1336 {
1337 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1338 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1339 
1340 	if (!ops)
1341 		return 0;
1342 
1343 	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1344 			smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1345 		return arm_smmu_iova_to_phys_hard(domain, iova);
1346 
1347 	return ops->iova_to_phys(ops, iova);
1348 }
1349 
1350 static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
1351 {
1352 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1353 
1354 	switch (cap) {
1355 	case IOMMU_CAP_CACHE_COHERENCY:
1356 		/*
1357 		 * It's overwhelmingly the case in practice that when the pagetable
1358 		 * walk interface is connected to a coherent interconnect, all the
1359 		 * translation interfaces are too. Furthermore if the device is
1360 		 * natively coherent, then its translation interface must also be.
1361 		 */
1362 		return cfg->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK ||
1363 			device_get_dma_attr(dev) == DEV_DMA_COHERENT;
1364 	case IOMMU_CAP_NOEXEC:
1365 	case IOMMU_CAP_DEFERRED_FLUSH:
1366 		return true;
1367 	default:
1368 		return false;
1369 	}
1370 }
1371 
1372 static
1373 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1374 {
1375 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
1376 							  fwnode);
1377 	put_device(dev);
1378 	return dev ? dev_get_drvdata(dev) : NULL;
1379 }
1380 
1381 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
1382 {
1383 	struct arm_smmu_device *smmu = NULL;
1384 	struct arm_smmu_master_cfg *cfg;
1385 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1386 	int i, ret;
1387 
1388 	if (using_legacy_binding) {
1389 		ret = arm_smmu_register_legacy_master(dev, &smmu);
1390 
1391 		/*
1392 		 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1393 		 * will allocate/initialise a new one. Thus we need to update fwspec for
1394 		 * later use.
1395 		 */
1396 		fwspec = dev_iommu_fwspec_get(dev);
1397 		if (ret)
1398 			goto out_free;
1399 	} else {
1400 		smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1401 	}
1402 
1403 	ret = -EINVAL;
1404 	for (i = 0; i < fwspec->num_ids; i++) {
1405 		u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1406 		u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1407 
1408 		if (sid & ~smmu->streamid_mask) {
1409 			dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1410 				sid, smmu->streamid_mask);
1411 			goto out_free;
1412 		}
1413 		if (mask & ~smmu->smr_mask_mask) {
1414 			dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1415 				mask, smmu->smr_mask_mask);
1416 			goto out_free;
1417 		}
1418 	}
1419 
1420 	ret = -ENOMEM;
1421 	cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1422 		      GFP_KERNEL);
1423 	if (!cfg)
1424 		goto out_free;
1425 
1426 	cfg->smmu = smmu;
1427 	dev_iommu_priv_set(dev, cfg);
1428 	while (i--)
1429 		cfg->smendx[i] = INVALID_SMENDX;
1430 
1431 	ret = arm_smmu_rpm_get(smmu);
1432 	if (ret < 0)
1433 		goto out_cfg_free;
1434 
1435 	ret = arm_smmu_master_alloc_smes(dev);
1436 	arm_smmu_rpm_put(smmu);
1437 
1438 	if (ret)
1439 		goto out_cfg_free;
1440 
1441 	device_link_add(dev, smmu->dev,
1442 			DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
1443 
1444 	return &smmu->iommu;
1445 
1446 out_cfg_free:
1447 	kfree(cfg);
1448 out_free:
1449 	iommu_fwspec_free(dev);
1450 	return ERR_PTR(ret);
1451 }
1452 
1453 static void arm_smmu_release_device(struct device *dev)
1454 {
1455 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1456 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1457 	int ret;
1458 
1459 	ret = arm_smmu_rpm_get(cfg->smmu);
1460 	if (ret < 0)
1461 		return;
1462 
1463 	arm_smmu_master_free_smes(cfg, fwspec);
1464 
1465 	arm_smmu_rpm_put(cfg->smmu);
1466 
1467 	kfree(cfg);
1468 }
1469 
1470 static void arm_smmu_probe_finalize(struct device *dev)
1471 {
1472 	struct arm_smmu_master_cfg *cfg;
1473 	struct arm_smmu_device *smmu;
1474 
1475 	cfg = dev_iommu_priv_get(dev);
1476 	smmu = cfg->smmu;
1477 
1478 	if (smmu->impl && smmu->impl->probe_finalize)
1479 		smmu->impl->probe_finalize(smmu, dev);
1480 }
1481 
1482 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1483 {
1484 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1485 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1486 	struct arm_smmu_device *smmu = cfg->smmu;
1487 	struct iommu_group *group = NULL;
1488 	int i, idx;
1489 
1490 	mutex_lock(&smmu->stream_map_mutex);
1491 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1492 		if (group && smmu->s2crs[idx].group &&
1493 		    group != smmu->s2crs[idx].group) {
1494 			mutex_unlock(&smmu->stream_map_mutex);
1495 			return ERR_PTR(-EINVAL);
1496 		}
1497 
1498 		group = smmu->s2crs[idx].group;
1499 	}
1500 
1501 	if (group) {
1502 		mutex_unlock(&smmu->stream_map_mutex);
1503 		return iommu_group_ref_get(group);
1504 	}
1505 
1506 	if (dev_is_pci(dev))
1507 		group = pci_device_group(dev);
1508 	else if (dev_is_fsl_mc(dev))
1509 		group = fsl_mc_device_group(dev);
1510 	else
1511 		group = generic_device_group(dev);
1512 
1513 	/* Remember group for faster lookups */
1514 	if (!IS_ERR(group))
1515 		for_each_cfg_sme(cfg, fwspec, i, idx)
1516 			smmu->s2crs[idx].group = group;
1517 
1518 	mutex_unlock(&smmu->stream_map_mutex);
1519 	return group;
1520 }
1521 
1522 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
1523 {
1524 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1525 	int ret = 0;
1526 
1527 	mutex_lock(&smmu_domain->init_mutex);
1528 	if (smmu_domain->smmu)
1529 		ret = -EPERM;
1530 	else
1531 		smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1532 	mutex_unlock(&smmu_domain->init_mutex);
1533 
1534 	return ret;
1535 }
1536 
1537 static int arm_smmu_set_pgtable_quirks(struct iommu_domain *domain,
1538 		unsigned long quirks)
1539 {
1540 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1541 	int ret = 0;
1542 
1543 	mutex_lock(&smmu_domain->init_mutex);
1544 	if (smmu_domain->smmu)
1545 		ret = -EPERM;
1546 	else
1547 		smmu_domain->pgtbl_quirks = quirks;
1548 	mutex_unlock(&smmu_domain->init_mutex);
1549 
1550 	return ret;
1551 }
1552 
1553 static int arm_smmu_of_xlate(struct device *dev,
1554 			     const struct of_phandle_args *args)
1555 {
1556 	u32 mask, fwid = 0;
1557 
1558 	if (args->args_count > 0)
1559 		fwid |= FIELD_PREP(ARM_SMMU_SMR_ID, args->args[0]);
1560 
1561 	if (args->args_count > 1)
1562 		fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, args->args[1]);
1563 	else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1564 		fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, mask);
1565 
1566 	return iommu_fwspec_add_ids(dev, &fwid, 1);
1567 }
1568 
1569 static void arm_smmu_get_resv_regions(struct device *dev,
1570 				      struct list_head *head)
1571 {
1572 	struct iommu_resv_region *region;
1573 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1574 
1575 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1576 					 prot, IOMMU_RESV_SW_MSI, GFP_KERNEL);
1577 	if (!region)
1578 		return;
1579 
1580 	list_add_tail(&region->list, head);
1581 
1582 	iommu_dma_get_resv_regions(dev, head);
1583 }
1584 
1585 static int arm_smmu_def_domain_type(struct device *dev)
1586 {
1587 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1588 	const struct arm_smmu_impl *impl = cfg->smmu->impl;
1589 
1590 	if (using_legacy_binding)
1591 		return IOMMU_DOMAIN_IDENTITY;
1592 
1593 	if (impl && impl->def_domain_type)
1594 		return impl->def_domain_type(dev);
1595 
1596 	return 0;
1597 }
1598 
1599 static struct iommu_ops arm_smmu_ops = {
1600 	.identity_domain	= &arm_smmu_identity_domain,
1601 	.blocked_domain		= &arm_smmu_blocked_domain,
1602 	.capable		= arm_smmu_capable,
1603 	.domain_alloc_paging	= arm_smmu_domain_alloc_paging,
1604 	.probe_device		= arm_smmu_probe_device,
1605 	.release_device		= arm_smmu_release_device,
1606 	.probe_finalize		= arm_smmu_probe_finalize,
1607 	.device_group		= arm_smmu_device_group,
1608 	.of_xlate		= arm_smmu_of_xlate,
1609 	.get_resv_regions	= arm_smmu_get_resv_regions,
1610 	.def_domain_type	= arm_smmu_def_domain_type,
1611 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
1612 	.owner			= THIS_MODULE,
1613 	.default_domain_ops = &(const struct iommu_domain_ops) {
1614 		.attach_dev		= arm_smmu_attach_dev,
1615 		.map_pages		= arm_smmu_map_pages,
1616 		.unmap_pages		= arm_smmu_unmap_pages,
1617 		.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
1618 		.iotlb_sync		= arm_smmu_iotlb_sync,
1619 		.iova_to_phys		= arm_smmu_iova_to_phys,
1620 		.enable_nesting		= arm_smmu_enable_nesting,
1621 		.set_pgtable_quirks	= arm_smmu_set_pgtable_quirks,
1622 		.free			= arm_smmu_domain_free,
1623 	}
1624 };
1625 
1626 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1627 {
1628 	int i;
1629 	u32 reg;
1630 
1631 	/* clear global FSR */
1632 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
1633 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, reg);
1634 
1635 	/*
1636 	 * Reset stream mapping groups: Initial values mark all SMRn as
1637 	 * invalid and all S2CRn as bypass unless overridden.
1638 	 */
1639 	for (i = 0; i < smmu->num_mapping_groups; ++i)
1640 		arm_smmu_write_sme(smmu, i);
1641 
1642 	/* Make sure all context banks are disabled and clear CB_FSR  */
1643 	for (i = 0; i < smmu->num_context_banks; ++i) {
1644 		arm_smmu_write_context_bank(smmu, i);
1645 		arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT);
1646 	}
1647 
1648 	/* Invalidate the TLB, just in case */
1649 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLH, QCOM_DUMMY_VAL);
1650 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLNSNH, QCOM_DUMMY_VAL);
1651 
1652 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
1653 
1654 	/* Enable fault reporting */
1655 	reg |= (ARM_SMMU_sCR0_GFRE | ARM_SMMU_sCR0_GFIE |
1656 		ARM_SMMU_sCR0_GCFGFRE | ARM_SMMU_sCR0_GCFGFIE);
1657 
1658 	/* Disable TLB broadcasting. */
1659 	reg |= (ARM_SMMU_sCR0_VMIDPNE | ARM_SMMU_sCR0_PTM);
1660 
1661 	/* Enable client access, handling unmatched streams as appropriate */
1662 	reg &= ~ARM_SMMU_sCR0_CLIENTPD;
1663 	if (disable_bypass)
1664 		reg |= ARM_SMMU_sCR0_USFCFG;
1665 	else
1666 		reg &= ~ARM_SMMU_sCR0_USFCFG;
1667 
1668 	/* Disable forced broadcasting */
1669 	reg &= ~ARM_SMMU_sCR0_FB;
1670 
1671 	/* Don't upgrade barriers */
1672 	reg &= ~(ARM_SMMU_sCR0_BSU);
1673 
1674 	if (smmu->features & ARM_SMMU_FEAT_VMID16)
1675 		reg |= ARM_SMMU_sCR0_VMID16EN;
1676 
1677 	if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1678 		reg |= ARM_SMMU_sCR0_EXIDENABLE;
1679 
1680 	if (smmu->impl && smmu->impl->reset)
1681 		smmu->impl->reset(smmu);
1682 
1683 	/* Push the button */
1684 	arm_smmu_tlb_sync_global(smmu);
1685 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
1686 }
1687 
1688 static int arm_smmu_id_size_to_bits(int size)
1689 {
1690 	switch (size) {
1691 	case 0:
1692 		return 32;
1693 	case 1:
1694 		return 36;
1695 	case 2:
1696 		return 40;
1697 	case 3:
1698 		return 42;
1699 	case 4:
1700 		return 44;
1701 	case 5:
1702 	default:
1703 		return 48;
1704 	}
1705 }
1706 
1707 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1708 {
1709 	unsigned int size;
1710 	u32 id;
1711 	bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1712 	int i, ret;
1713 
1714 	dev_notice(smmu->dev, "probing hardware configuration...\n");
1715 	dev_notice(smmu->dev, "SMMUv%d with:\n",
1716 			smmu->version == ARM_SMMU_V2 ? 2 : 1);
1717 
1718 	/* ID0 */
1719 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID0);
1720 
1721 	/* Restrict available stages based on module parameter */
1722 	if (force_stage == 1)
1723 		id &= ~(ARM_SMMU_ID0_S2TS | ARM_SMMU_ID0_NTS);
1724 	else if (force_stage == 2)
1725 		id &= ~(ARM_SMMU_ID0_S1TS | ARM_SMMU_ID0_NTS);
1726 
1727 	if (id & ARM_SMMU_ID0_S1TS) {
1728 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1729 		dev_notice(smmu->dev, "\tstage 1 translation\n");
1730 	}
1731 
1732 	if (id & ARM_SMMU_ID0_S2TS) {
1733 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1734 		dev_notice(smmu->dev, "\tstage 2 translation\n");
1735 	}
1736 
1737 	if (id & ARM_SMMU_ID0_NTS) {
1738 		smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1739 		dev_notice(smmu->dev, "\tnested translation\n");
1740 	}
1741 
1742 	if (!(smmu->features &
1743 		(ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1744 		dev_err(smmu->dev, "\tno translation support!\n");
1745 		return -ENODEV;
1746 	}
1747 
1748 	if ((id & ARM_SMMU_ID0_S1TS) &&
1749 	    ((smmu->version < ARM_SMMU_V2) || !(id & ARM_SMMU_ID0_ATOSNS))) {
1750 		smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1751 		dev_notice(smmu->dev, "\taddress translation ops\n");
1752 	}
1753 
1754 	/*
1755 	 * In order for DMA API calls to work properly, we must defer to what
1756 	 * the FW says about coherency, regardless of what the hardware claims.
1757 	 * Fortunately, this also opens up a workaround for systems where the
1758 	 * ID register value has ended up configured incorrectly.
1759 	 */
1760 	cttw_reg = !!(id & ARM_SMMU_ID0_CTTW);
1761 	if (cttw_fw || cttw_reg)
1762 		dev_notice(smmu->dev, "\t%scoherent table walk\n",
1763 			   cttw_fw ? "" : "non-");
1764 	if (cttw_fw != cttw_reg)
1765 		dev_notice(smmu->dev,
1766 			   "\t(IDR0.CTTW overridden by FW configuration)\n");
1767 
1768 	/* Max. number of entries we have for stream matching/indexing */
1769 	if (smmu->version == ARM_SMMU_V2 && id & ARM_SMMU_ID0_EXIDS) {
1770 		smmu->features |= ARM_SMMU_FEAT_EXIDS;
1771 		size = 1 << 16;
1772 	} else {
1773 		size = 1 << FIELD_GET(ARM_SMMU_ID0_NUMSIDB, id);
1774 	}
1775 	smmu->streamid_mask = size - 1;
1776 	if (id & ARM_SMMU_ID0_SMS) {
1777 		smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1778 		size = FIELD_GET(ARM_SMMU_ID0_NUMSMRG, id);
1779 		if (size == 0) {
1780 			dev_err(smmu->dev,
1781 				"stream-matching supported, but no SMRs present!\n");
1782 			return -ENODEV;
1783 		}
1784 
1785 		/* Zero-initialised to mark as invalid */
1786 		smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1787 					  GFP_KERNEL);
1788 		if (!smmu->smrs)
1789 			return -ENOMEM;
1790 
1791 		dev_notice(smmu->dev,
1792 			   "\tstream matching with %u register groups", size);
1793 	}
1794 	/* s2cr->type == 0 means translation, so initialise explicitly */
1795 	smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1796 					 GFP_KERNEL);
1797 	if (!smmu->s2crs)
1798 		return -ENOMEM;
1799 	for (i = 0; i < size; i++)
1800 		smmu->s2crs[i] = s2cr_init_val;
1801 
1802 	smmu->num_mapping_groups = size;
1803 	mutex_init(&smmu->stream_map_mutex);
1804 	spin_lock_init(&smmu->global_sync_lock);
1805 
1806 	if (smmu->version < ARM_SMMU_V2 ||
1807 	    !(id & ARM_SMMU_ID0_PTFS_NO_AARCH32)) {
1808 		smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1809 		if (!(id & ARM_SMMU_ID0_PTFS_NO_AARCH32S))
1810 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1811 	}
1812 
1813 	/* ID1 */
1814 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID1);
1815 	smmu->pgshift = (id & ARM_SMMU_ID1_PAGESIZE) ? 16 : 12;
1816 
1817 	/* Check for size mismatch of SMMU address space from mapped region */
1818 	size = 1 << (FIELD_GET(ARM_SMMU_ID1_NUMPAGENDXB, id) + 1);
1819 	if (smmu->numpage != 2 * size << smmu->pgshift)
1820 		dev_warn(smmu->dev,
1821 			"SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n",
1822 			2 * size << smmu->pgshift, smmu->numpage);
1823 	/* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */
1824 	smmu->numpage = size;
1825 
1826 	smmu->num_s2_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMS2CB, id);
1827 	smmu->num_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMCB, id);
1828 	if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1829 		dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1830 		return -ENODEV;
1831 	}
1832 	dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1833 		   smmu->num_context_banks, smmu->num_s2_context_banks);
1834 	smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1835 				 sizeof(*smmu->cbs), GFP_KERNEL);
1836 	if (!smmu->cbs)
1837 		return -ENOMEM;
1838 
1839 	/* ID2 */
1840 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID2);
1841 	size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_IAS, id));
1842 	smmu->ipa_size = size;
1843 
1844 	/* The output mask is also applied for bypass */
1845 	size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_OAS, id));
1846 	smmu->pa_size = size;
1847 
1848 	if (id & ARM_SMMU_ID2_VMID16)
1849 		smmu->features |= ARM_SMMU_FEAT_VMID16;
1850 
1851 	/*
1852 	 * What the page table walker can address actually depends on which
1853 	 * descriptor format is in use, but since a) we don't know that yet,
1854 	 * and b) it can vary per context bank, this will have to do...
1855 	 */
1856 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1857 		dev_warn(smmu->dev,
1858 			 "failed to set DMA mask for table walker\n");
1859 
1860 	if (smmu->version < ARM_SMMU_V2) {
1861 		smmu->va_size = smmu->ipa_size;
1862 		if (smmu->version == ARM_SMMU_V1_64K)
1863 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1864 	} else {
1865 		size = FIELD_GET(ARM_SMMU_ID2_UBS, id);
1866 		smmu->va_size = arm_smmu_id_size_to_bits(size);
1867 		if (id & ARM_SMMU_ID2_PTFS_4K)
1868 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1869 		if (id & ARM_SMMU_ID2_PTFS_16K)
1870 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1871 		if (id & ARM_SMMU_ID2_PTFS_64K)
1872 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1873 	}
1874 
1875 	if (smmu->impl && smmu->impl->cfg_probe) {
1876 		ret = smmu->impl->cfg_probe(smmu);
1877 		if (ret)
1878 			return ret;
1879 	}
1880 
1881 	/* Now we've corralled the various formats, what'll it do? */
1882 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1883 		smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1884 	if (smmu->features &
1885 	    (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1886 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1887 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1888 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1889 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1890 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1891 
1892 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
1893 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1894 	else
1895 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1896 	dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1897 		   smmu->pgsize_bitmap);
1898 
1899 
1900 	if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1901 		dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1902 			   smmu->va_size, smmu->ipa_size);
1903 
1904 	if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1905 		dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1906 			   smmu->ipa_size, smmu->pa_size);
1907 
1908 	return 0;
1909 }
1910 
1911 struct arm_smmu_match_data {
1912 	enum arm_smmu_arch_version version;
1913 	enum arm_smmu_implementation model;
1914 };
1915 
1916 #define ARM_SMMU_MATCH_DATA(name, ver, imp)	\
1917 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
1918 
1919 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1920 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1921 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1922 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1923 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1924 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
1925 
1926 static const struct of_device_id arm_smmu_of_match[] = {
1927 	{ .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1928 	{ .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1929 	{ .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1930 	{ .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1931 	{ .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1932 	{ .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1933 	{ .compatible = "nvidia,smmu-500", .data = &arm_mmu500 },
1934 	{ .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
1935 	{ },
1936 };
1937 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1938 
1939 #ifdef CONFIG_ACPI
1940 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1941 {
1942 	int ret = 0;
1943 
1944 	switch (model) {
1945 	case ACPI_IORT_SMMU_V1:
1946 	case ACPI_IORT_SMMU_CORELINK_MMU400:
1947 		smmu->version = ARM_SMMU_V1;
1948 		smmu->model = GENERIC_SMMU;
1949 		break;
1950 	case ACPI_IORT_SMMU_CORELINK_MMU401:
1951 		smmu->version = ARM_SMMU_V1_64K;
1952 		smmu->model = GENERIC_SMMU;
1953 		break;
1954 	case ACPI_IORT_SMMU_V2:
1955 		smmu->version = ARM_SMMU_V2;
1956 		smmu->model = GENERIC_SMMU;
1957 		break;
1958 	case ACPI_IORT_SMMU_CORELINK_MMU500:
1959 		smmu->version = ARM_SMMU_V2;
1960 		smmu->model = ARM_MMU500;
1961 		break;
1962 	case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1963 		smmu->version = ARM_SMMU_V2;
1964 		smmu->model = CAVIUM_SMMUV2;
1965 		break;
1966 	default:
1967 		ret = -ENODEV;
1968 	}
1969 
1970 	return ret;
1971 }
1972 
1973 static int arm_smmu_device_acpi_probe(struct arm_smmu_device *smmu,
1974 				      u32 *global_irqs, u32 *pmu_irqs)
1975 {
1976 	struct device *dev = smmu->dev;
1977 	struct acpi_iort_node *node =
1978 		*(struct acpi_iort_node **)dev_get_platdata(dev);
1979 	struct acpi_iort_smmu *iort_smmu;
1980 	int ret;
1981 
1982 	/* Retrieve SMMU1/2 specific data */
1983 	iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1984 
1985 	ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1986 	if (ret < 0)
1987 		return ret;
1988 
1989 	/* Ignore the configuration access interrupt */
1990 	*global_irqs = 1;
1991 	*pmu_irqs = 0;
1992 
1993 	if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
1994 		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1995 
1996 	return 0;
1997 }
1998 #else
1999 static inline int arm_smmu_device_acpi_probe(struct arm_smmu_device *smmu,
2000 					     u32 *global_irqs, u32 *pmu_irqs)
2001 {
2002 	return -ENODEV;
2003 }
2004 #endif
2005 
2006 static int arm_smmu_device_dt_probe(struct arm_smmu_device *smmu,
2007 				    u32 *global_irqs, u32 *pmu_irqs)
2008 {
2009 	const struct arm_smmu_match_data *data;
2010 	struct device *dev = smmu->dev;
2011 	bool legacy_binding;
2012 
2013 	if (of_property_read_u32(dev->of_node, "#global-interrupts", global_irqs))
2014 		return dev_err_probe(dev, -ENODEV,
2015 				     "missing #global-interrupts property\n");
2016 	*pmu_irqs = 0;
2017 
2018 	data = of_device_get_match_data(dev);
2019 	smmu->version = data->version;
2020 	smmu->model = data->model;
2021 
2022 	legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2023 	if (legacy_binding && !using_generic_binding) {
2024 		if (!using_legacy_binding) {
2025 			pr_notice("deprecated \"mmu-masters\" DT property in use; %s support unavailable\n",
2026 				  IS_ENABLED(CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS) ? "DMA API" : "SMMU");
2027 		}
2028 		using_legacy_binding = true;
2029 	} else if (!legacy_binding && !using_legacy_binding) {
2030 		using_generic_binding = true;
2031 	} else {
2032 		dev_err(dev, "not probing due to mismatched DT properties\n");
2033 		return -ENODEV;
2034 	}
2035 
2036 	if (of_dma_is_coherent(dev->of_node))
2037 		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2038 
2039 	return 0;
2040 }
2041 
2042 static void arm_smmu_rmr_install_bypass_smr(struct arm_smmu_device *smmu)
2043 {
2044 	struct list_head rmr_list;
2045 	struct iommu_resv_region *e;
2046 	int idx, cnt = 0;
2047 	u32 reg;
2048 
2049 	INIT_LIST_HEAD(&rmr_list);
2050 	iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
2051 
2052 	/*
2053 	 * Rather than trying to look at existing mappings that
2054 	 * are setup by the firmware and then invalidate the ones
2055 	 * that do no have matching RMR entries, just disable the
2056 	 * SMMU until it gets enabled again in the reset routine.
2057 	 */
2058 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
2059 	reg |= ARM_SMMU_sCR0_CLIENTPD;
2060 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
2061 
2062 	list_for_each_entry(e, &rmr_list, list) {
2063 		struct iommu_iort_rmr_data *rmr;
2064 		int i;
2065 
2066 		rmr = container_of(e, struct iommu_iort_rmr_data, rr);
2067 		for (i = 0; i < rmr->num_sids; i++) {
2068 			idx = arm_smmu_find_sme(smmu, rmr->sids[i], ~0);
2069 			if (idx < 0)
2070 				continue;
2071 
2072 			if (smmu->s2crs[idx].count == 0) {
2073 				smmu->smrs[idx].id = rmr->sids[i];
2074 				smmu->smrs[idx].mask = 0;
2075 				smmu->smrs[idx].valid = true;
2076 			}
2077 			smmu->s2crs[idx].count++;
2078 			smmu->s2crs[idx].type = S2CR_TYPE_BYPASS;
2079 			smmu->s2crs[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
2080 
2081 			cnt++;
2082 		}
2083 	}
2084 
2085 	dev_notice(smmu->dev, "\tpreserved %d boot mapping%s\n", cnt,
2086 		   cnt == 1 ? "" : "s");
2087 	iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
2088 }
2089 
2090 static int arm_smmu_device_probe(struct platform_device *pdev)
2091 {
2092 	struct resource *res;
2093 	struct arm_smmu_device *smmu;
2094 	struct device *dev = &pdev->dev;
2095 	int num_irqs, i, err;
2096 	u32 global_irqs, pmu_irqs;
2097 	irqreturn_t (*global_fault)(int irq, void *dev);
2098 
2099 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2100 	if (!smmu) {
2101 		dev_err(dev, "failed to allocate arm_smmu_device\n");
2102 		return -ENOMEM;
2103 	}
2104 	smmu->dev = dev;
2105 
2106 	if (dev->of_node)
2107 		err = arm_smmu_device_dt_probe(smmu, &global_irqs, &pmu_irqs);
2108 	else
2109 		err = arm_smmu_device_acpi_probe(smmu, &global_irqs, &pmu_irqs);
2110 	if (err)
2111 		return err;
2112 
2113 	smmu->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
2114 	if (IS_ERR(smmu->base))
2115 		return PTR_ERR(smmu->base);
2116 	smmu->ioaddr = res->start;
2117 
2118 	/*
2119 	 * The resource size should effectively match the value of SMMU_TOP;
2120 	 * stash that temporarily until we know PAGESIZE to validate it with.
2121 	 */
2122 	smmu->numpage = resource_size(res);
2123 
2124 	smmu = arm_smmu_impl_init(smmu);
2125 	if (IS_ERR(smmu))
2126 		return PTR_ERR(smmu);
2127 
2128 	num_irqs = platform_irq_count(pdev);
2129 
2130 	smmu->num_context_irqs = num_irqs - global_irqs - pmu_irqs;
2131 	if (smmu->num_context_irqs <= 0)
2132 		return dev_err_probe(dev, -ENODEV,
2133 				"found %d interrupts but expected at least %d\n",
2134 				num_irqs, global_irqs + pmu_irqs + 1);
2135 
2136 	smmu->irqs = devm_kcalloc(dev, smmu->num_context_irqs,
2137 				  sizeof(*smmu->irqs), GFP_KERNEL);
2138 	if (!smmu->irqs)
2139 		return dev_err_probe(dev, -ENOMEM, "failed to allocate %d irqs\n",
2140 				     smmu->num_context_irqs);
2141 
2142 	for (i = 0; i < smmu->num_context_irqs; i++) {
2143 		int irq = platform_get_irq(pdev, global_irqs + pmu_irqs + i);
2144 
2145 		if (irq < 0)
2146 			return irq;
2147 		smmu->irqs[i] = irq;
2148 	}
2149 
2150 	err = devm_clk_bulk_get_all(dev, &smmu->clks);
2151 	if (err < 0) {
2152 		dev_err(dev, "failed to get clocks %d\n", err);
2153 		return err;
2154 	}
2155 	smmu->num_clks = err;
2156 
2157 	err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
2158 	if (err)
2159 		return err;
2160 
2161 	err = arm_smmu_device_cfg_probe(smmu);
2162 	if (err)
2163 		return err;
2164 
2165 	if (smmu->version == ARM_SMMU_V2) {
2166 		if (smmu->num_context_banks > smmu->num_context_irqs) {
2167 			dev_err(dev,
2168 			      "found only %d context irq(s) but %d required\n",
2169 			      smmu->num_context_irqs, smmu->num_context_banks);
2170 			return -ENODEV;
2171 		}
2172 
2173 		/* Ignore superfluous interrupts */
2174 		smmu->num_context_irqs = smmu->num_context_banks;
2175 	}
2176 
2177 	if (smmu->impl && smmu->impl->global_fault)
2178 		global_fault = smmu->impl->global_fault;
2179 	else
2180 		global_fault = arm_smmu_global_fault;
2181 
2182 	for (i = 0; i < global_irqs; i++) {
2183 		int irq = platform_get_irq(pdev, i);
2184 
2185 		if (irq < 0)
2186 			return irq;
2187 
2188 		err = devm_request_irq(dev, irq, global_fault, IRQF_SHARED,
2189 				       "arm-smmu global fault", smmu);
2190 		if (err)
2191 			return dev_err_probe(dev, err,
2192 					"failed to request global IRQ %d (%u)\n",
2193 					i, irq);
2194 	}
2195 
2196 	err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2197 				     "smmu.%pa", &smmu->ioaddr);
2198 	if (err) {
2199 		dev_err(dev, "Failed to register iommu in sysfs\n");
2200 		return err;
2201 	}
2202 
2203 	err = iommu_device_register(&smmu->iommu, &arm_smmu_ops,
2204 				    using_legacy_binding ? NULL : dev);
2205 	if (err) {
2206 		dev_err(dev, "Failed to register iommu\n");
2207 		iommu_device_sysfs_remove(&smmu->iommu);
2208 		return err;
2209 	}
2210 
2211 	platform_set_drvdata(pdev, smmu);
2212 
2213 	/* Check for RMRs and install bypass SMRs if any */
2214 	arm_smmu_rmr_install_bypass_smr(smmu);
2215 
2216 	arm_smmu_device_reset(smmu);
2217 	arm_smmu_test_smr_masks(smmu);
2218 
2219 	/*
2220 	 * We want to avoid touching dev->power.lock in fastpaths unless
2221 	 * it's really going to do something useful - pm_runtime_enabled()
2222 	 * can serve as an ideal proxy for that decision. So, conditionally
2223 	 * enable pm_runtime.
2224 	 */
2225 	if (dev->pm_domain) {
2226 		pm_runtime_set_active(dev);
2227 		pm_runtime_enable(dev);
2228 	}
2229 
2230 	return 0;
2231 }
2232 
2233 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2234 {
2235 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2236 
2237 	if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2238 		dev_notice(&pdev->dev, "disabling translation\n");
2239 
2240 	arm_smmu_rpm_get(smmu);
2241 	/* Turn the thing off */
2242 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, ARM_SMMU_sCR0_CLIENTPD);
2243 	arm_smmu_rpm_put(smmu);
2244 
2245 	if (pm_runtime_enabled(smmu->dev))
2246 		pm_runtime_force_suspend(smmu->dev);
2247 	else
2248 		clk_bulk_disable(smmu->num_clks, smmu->clks);
2249 
2250 	clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2251 }
2252 
2253 static void arm_smmu_device_remove(struct platform_device *pdev)
2254 {
2255 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2256 
2257 	iommu_device_unregister(&smmu->iommu);
2258 	iommu_device_sysfs_remove(&smmu->iommu);
2259 
2260 	arm_smmu_device_shutdown(pdev);
2261 }
2262 
2263 static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2264 {
2265 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2266 	int ret;
2267 
2268 	ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
2269 	if (ret)
2270 		return ret;
2271 
2272 	arm_smmu_device_reset(smmu);
2273 
2274 	return 0;
2275 }
2276 
2277 static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2278 {
2279 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2280 
2281 	clk_bulk_disable(smmu->num_clks, smmu->clks);
2282 
2283 	return 0;
2284 }
2285 
2286 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2287 {
2288 	int ret;
2289 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2290 
2291 	ret = clk_bulk_prepare(smmu->num_clks, smmu->clks);
2292 	if (ret)
2293 		return ret;
2294 
2295 	if (pm_runtime_suspended(dev))
2296 		return 0;
2297 
2298 	ret = arm_smmu_runtime_resume(dev);
2299 	if (ret)
2300 		clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2301 
2302 	return ret;
2303 }
2304 
2305 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
2306 {
2307 	int ret = 0;
2308 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2309 
2310 	if (pm_runtime_suspended(dev))
2311 		goto clk_unprepare;
2312 
2313 	ret = arm_smmu_runtime_suspend(dev);
2314 	if (ret)
2315 		return ret;
2316 
2317 clk_unprepare:
2318 	clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2319 	return ret;
2320 }
2321 
2322 static const struct dev_pm_ops arm_smmu_pm_ops = {
2323 	SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
2324 	SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
2325 			   arm_smmu_runtime_resume, NULL)
2326 };
2327 
2328 static struct platform_driver arm_smmu_driver = {
2329 	.driver	= {
2330 		.name			= "arm-smmu",
2331 		.of_match_table		= arm_smmu_of_match,
2332 		.pm			= &arm_smmu_pm_ops,
2333 		.suppress_bind_attrs    = true,
2334 	},
2335 	.probe	= arm_smmu_device_probe,
2336 	.remove_new = arm_smmu_device_remove,
2337 	.shutdown = arm_smmu_device_shutdown,
2338 };
2339 module_platform_driver(arm_smmu_driver);
2340 
2341 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2342 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
2343 MODULE_ALIAS("platform:arm-smmu");
2344 MODULE_LICENSE("GPL v2");
2345