xref: /linux/drivers/iommu/arm/arm-smmu/arm-smmu.c (revision 4e73826089ce899357580bbf6e0afe4e6f9900b7)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * IOMMU API for ARM architected SMMU implementations.
4  *
5  * Copyright (C) 2013 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver currently supports:
10  *	- SMMUv1 and v2 implementations
11  *	- Stream-matching and stream-indexing
12  *	- v7/v8 long-descriptor format
13  *	- Non-secure access to the SMMU
14  *	- Context fault reporting
15  *	- Extended Stream ID (16 bit)
16  */
17 
18 #define pr_fmt(fmt) "arm-smmu: " fmt
19 
20 #include <linux/acpi.h>
21 #include <linux/acpi_iort.h>
22 #include <linux/bitfield.h>
23 #include <linux/delay.h>
24 #include <linux/dma-mapping.h>
25 #include <linux/err.h>
26 #include <linux/interrupt.h>
27 #include <linux/io.h>
28 #include <linux/iopoll.h>
29 #include <linux/module.h>
30 #include <linux/of.h>
31 #include <linux/of_address.h>
32 #include <linux/pci.h>
33 #include <linux/platform_device.h>
34 #include <linux/pm_runtime.h>
35 #include <linux/ratelimit.h>
36 #include <linux/slab.h>
37 
38 #include <linux/fsl/mc.h>
39 
40 #include "arm-smmu.h"
41 #include "../../dma-iommu.h"
42 
43 /*
44  * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
45  * global register space are still, in fact, using a hypervisor to mediate it
46  * by trapping and emulating register accesses. Sadly, some deployed versions
47  * of said trapping code have bugs wherein they go horribly wrong for stores
48  * using r31 (i.e. XZR/WZR) as the source register.
49  */
50 #define QCOM_DUMMY_VAL -1
51 
52 #define MSI_IOVA_BASE			0x8000000
53 #define MSI_IOVA_LENGTH			0x100000
54 
55 static int force_stage;
56 module_param(force_stage, int, S_IRUGO);
57 MODULE_PARM_DESC(force_stage,
58 	"Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
59 static bool disable_bypass =
60 	IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
61 module_param(disable_bypass, bool, S_IRUGO);
62 MODULE_PARM_DESC(disable_bypass,
63 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
64 
65 #define s2cr_init_val (struct arm_smmu_s2cr){				\
66 	.type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,	\
67 }
68 
69 static bool using_legacy_binding, using_generic_binding;
70 
71 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
72 {
73 	if (pm_runtime_enabled(smmu->dev))
74 		return pm_runtime_resume_and_get(smmu->dev);
75 
76 	return 0;
77 }
78 
79 static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
80 {
81 	if (pm_runtime_enabled(smmu->dev))
82 		pm_runtime_put_autosuspend(smmu->dev);
83 }
84 
85 static void arm_smmu_rpm_use_autosuspend(struct arm_smmu_device *smmu)
86 {
87 	/*
88 	 * Setup an autosuspend delay to avoid bouncing runpm state.
89 	 * Otherwise, if a driver for a suspended consumer device
90 	 * unmaps buffers, it will runpm resume/suspend for each one.
91 	 *
92 	 * For example, when used by a GPU device, when an application
93 	 * or game exits, it can trigger unmapping 100s or 1000s of
94 	 * buffers.  With a runpm cycle for each buffer, that adds up
95 	 * to 5-10sec worth of reprogramming the context bank, while
96 	 * the system appears to be locked up to the user.
97 	 */
98 	pm_runtime_set_autosuspend_delay(smmu->dev, 20);
99 	pm_runtime_use_autosuspend(smmu->dev);
100 }
101 
102 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
103 {
104 	return container_of(dom, struct arm_smmu_domain, domain);
105 }
106 
107 static struct platform_driver arm_smmu_driver;
108 static struct iommu_ops arm_smmu_ops;
109 
110 #ifdef CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS
111 static struct device_node *dev_get_dev_node(struct device *dev)
112 {
113 	if (dev_is_pci(dev)) {
114 		struct pci_bus *bus = to_pci_dev(dev)->bus;
115 
116 		while (!pci_is_root_bus(bus))
117 			bus = bus->parent;
118 		return of_node_get(bus->bridge->parent->of_node);
119 	}
120 
121 	return of_node_get(dev->of_node);
122 }
123 
124 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
125 {
126 	*((__be32 *)data) = cpu_to_be32(alias);
127 	return 0; /* Continue walking */
128 }
129 
130 static int __find_legacy_master_phandle(struct device *dev, void *data)
131 {
132 	struct of_phandle_iterator *it = *(void **)data;
133 	struct device_node *np = it->node;
134 	int err;
135 
136 	of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
137 			    "#stream-id-cells", -1)
138 		if (it->node == np) {
139 			*(void **)data = dev;
140 			return 1;
141 		}
142 	it->node = np;
143 	return err == -ENOENT ? 0 : err;
144 }
145 
146 static int arm_smmu_register_legacy_master(struct device *dev,
147 					   struct arm_smmu_device **smmu)
148 {
149 	struct device *smmu_dev;
150 	struct device_node *np;
151 	struct of_phandle_iterator it;
152 	void *data = &it;
153 	u32 *sids;
154 	__be32 pci_sid;
155 	int err;
156 
157 	np = dev_get_dev_node(dev);
158 	if (!np || !of_property_present(np, "#stream-id-cells")) {
159 		of_node_put(np);
160 		return -ENODEV;
161 	}
162 
163 	it.node = np;
164 	err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
165 				     __find_legacy_master_phandle);
166 	smmu_dev = data;
167 	of_node_put(np);
168 	if (err == 0)
169 		return -ENODEV;
170 	if (err < 0)
171 		return err;
172 
173 	if (dev_is_pci(dev)) {
174 		/* "mmu-masters" assumes Stream ID == Requester ID */
175 		pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
176 				       &pci_sid);
177 		it.cur = &pci_sid;
178 		it.cur_count = 1;
179 	}
180 
181 	err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
182 				&arm_smmu_ops);
183 	if (err)
184 		return err;
185 
186 	sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
187 	if (!sids)
188 		return -ENOMEM;
189 
190 	*smmu = dev_get_drvdata(smmu_dev);
191 	of_phandle_iterator_args(&it, sids, it.cur_count);
192 	err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
193 	kfree(sids);
194 	return err;
195 }
196 #else
197 static int arm_smmu_register_legacy_master(struct device *dev,
198 					   struct arm_smmu_device **smmu)
199 {
200 	return -ENODEV;
201 }
202 #endif /* CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS */
203 
204 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
205 {
206 	clear_bit(idx, map);
207 }
208 
209 /* Wait for any pending TLB invalidations to complete */
210 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
211 				int sync, int status)
212 {
213 	unsigned int spin_cnt, delay;
214 	u32 reg;
215 
216 	if (smmu->impl && unlikely(smmu->impl->tlb_sync))
217 		return smmu->impl->tlb_sync(smmu, page, sync, status);
218 
219 	arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
220 	for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
221 		for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
222 			reg = arm_smmu_readl(smmu, page, status);
223 			if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
224 				return;
225 			cpu_relax();
226 		}
227 		udelay(delay);
228 	}
229 	dev_err_ratelimited(smmu->dev,
230 			    "TLB sync timed out -- SMMU may be deadlocked\n");
231 }
232 
233 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
234 {
235 	unsigned long flags;
236 
237 	spin_lock_irqsave(&smmu->global_sync_lock, flags);
238 	__arm_smmu_tlb_sync(smmu, ARM_SMMU_GR0, ARM_SMMU_GR0_sTLBGSYNC,
239 			    ARM_SMMU_GR0_sTLBGSTATUS);
240 	spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
241 }
242 
243 static void arm_smmu_tlb_sync_context(struct arm_smmu_domain *smmu_domain)
244 {
245 	struct arm_smmu_device *smmu = smmu_domain->smmu;
246 	unsigned long flags;
247 
248 	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
249 	__arm_smmu_tlb_sync(smmu, ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx),
250 			    ARM_SMMU_CB_TLBSYNC, ARM_SMMU_CB_TLBSTATUS);
251 	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
252 }
253 
254 static void arm_smmu_tlb_inv_context_s1(void *cookie)
255 {
256 	struct arm_smmu_domain *smmu_domain = cookie;
257 	/*
258 	 * The TLBI write may be relaxed, so ensure that PTEs cleared by the
259 	 * current CPU are visible beforehand.
260 	 */
261 	wmb();
262 	arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx,
263 			  ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid);
264 	arm_smmu_tlb_sync_context(smmu_domain);
265 }
266 
267 static void arm_smmu_tlb_inv_context_s2(void *cookie)
268 {
269 	struct arm_smmu_domain *smmu_domain = cookie;
270 	struct arm_smmu_device *smmu = smmu_domain->smmu;
271 
272 	/* See above */
273 	wmb();
274 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
275 	arm_smmu_tlb_sync_global(smmu);
276 }
277 
278 static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
279 				      size_t granule, void *cookie, int reg)
280 {
281 	struct arm_smmu_domain *smmu_domain = cookie;
282 	struct arm_smmu_device *smmu = smmu_domain->smmu;
283 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
284 	int idx = cfg->cbndx;
285 
286 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
287 		wmb();
288 
289 	if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
290 		iova = (iova >> 12) << 12;
291 		iova |= cfg->asid;
292 		do {
293 			arm_smmu_cb_write(smmu, idx, reg, iova);
294 			iova += granule;
295 		} while (size -= granule);
296 	} else {
297 		iova >>= 12;
298 		iova |= (u64)cfg->asid << 48;
299 		do {
300 			arm_smmu_cb_writeq(smmu, idx, reg, iova);
301 			iova += granule >> 12;
302 		} while (size -= granule);
303 	}
304 }
305 
306 static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
307 				      size_t granule, void *cookie, int reg)
308 {
309 	struct arm_smmu_domain *smmu_domain = cookie;
310 	struct arm_smmu_device *smmu = smmu_domain->smmu;
311 	int idx = smmu_domain->cfg.cbndx;
312 
313 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
314 		wmb();
315 
316 	iova >>= 12;
317 	do {
318 		if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)
319 			arm_smmu_cb_writeq(smmu, idx, reg, iova);
320 		else
321 			arm_smmu_cb_write(smmu, idx, reg, iova);
322 		iova += granule >> 12;
323 	} while (size -= granule);
324 }
325 
326 static void arm_smmu_tlb_inv_walk_s1(unsigned long iova, size_t size,
327 				     size_t granule, void *cookie)
328 {
329 	struct arm_smmu_domain *smmu_domain = cookie;
330 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
331 
332 	if (cfg->flush_walk_prefer_tlbiasid) {
333 		arm_smmu_tlb_inv_context_s1(cookie);
334 	} else {
335 		arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
336 					  ARM_SMMU_CB_S1_TLBIVA);
337 		arm_smmu_tlb_sync_context(cookie);
338 	}
339 }
340 
341 static void arm_smmu_tlb_add_page_s1(struct iommu_iotlb_gather *gather,
342 				     unsigned long iova, size_t granule,
343 				     void *cookie)
344 {
345 	arm_smmu_tlb_inv_range_s1(iova, granule, granule, cookie,
346 				  ARM_SMMU_CB_S1_TLBIVAL);
347 }
348 
349 static void arm_smmu_tlb_inv_walk_s2(unsigned long iova, size_t size,
350 				     size_t granule, void *cookie)
351 {
352 	arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
353 				  ARM_SMMU_CB_S2_TLBIIPAS2);
354 	arm_smmu_tlb_sync_context(cookie);
355 }
356 
357 static void arm_smmu_tlb_add_page_s2(struct iommu_iotlb_gather *gather,
358 				     unsigned long iova, size_t granule,
359 				     void *cookie)
360 {
361 	arm_smmu_tlb_inv_range_s2(iova, granule, granule, cookie,
362 				  ARM_SMMU_CB_S2_TLBIIPAS2L);
363 }
364 
365 static void arm_smmu_tlb_inv_walk_s2_v1(unsigned long iova, size_t size,
366 					size_t granule, void *cookie)
367 {
368 	arm_smmu_tlb_inv_context_s2(cookie);
369 }
370 /*
371  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
372  * almost negligible, but the benefit of getting the first one in as far ahead
373  * of the sync as possible is significant, hence we don't just make this a
374  * no-op and call arm_smmu_tlb_inv_context_s2() from .iotlb_sync as you might
375  * think.
376  */
377 static void arm_smmu_tlb_add_page_s2_v1(struct iommu_iotlb_gather *gather,
378 					unsigned long iova, size_t granule,
379 					void *cookie)
380 {
381 	struct arm_smmu_domain *smmu_domain = cookie;
382 	struct arm_smmu_device *smmu = smmu_domain->smmu;
383 
384 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
385 		wmb();
386 
387 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
388 }
389 
390 static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = {
391 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s1,
392 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s1,
393 	.tlb_add_page	= arm_smmu_tlb_add_page_s1,
394 };
395 
396 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
397 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
398 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s2,
399 	.tlb_add_page	= arm_smmu_tlb_add_page_s2,
400 };
401 
402 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
403 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
404 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s2_v1,
405 	.tlb_add_page	= arm_smmu_tlb_add_page_s2_v1,
406 };
407 
408 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
409 {
410 	u32 fsr, fsynr, cbfrsynra;
411 	unsigned long iova;
412 	struct arm_smmu_domain *smmu_domain = dev;
413 	struct arm_smmu_device *smmu = smmu_domain->smmu;
414 	int idx = smmu_domain->cfg.cbndx;
415 	int ret;
416 
417 	fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
418 	if (!(fsr & ARM_SMMU_FSR_FAULT))
419 		return IRQ_NONE;
420 
421 	fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
422 	iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
423 	cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
424 
425 	ret = report_iommu_fault(&smmu_domain->domain, NULL, iova,
426 		fsynr & ARM_SMMU_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
427 
428 	if (ret == -ENOSYS)
429 		dev_err_ratelimited(smmu->dev,
430 		"Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
431 			    fsr, iova, fsynr, cbfrsynra, idx);
432 
433 	arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
434 	return IRQ_HANDLED;
435 }
436 
437 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
438 {
439 	u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
440 	struct arm_smmu_device *smmu = dev;
441 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
442 				      DEFAULT_RATELIMIT_BURST);
443 
444 	gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
445 	gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
446 	gfsynr1 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR1);
447 	gfsynr2 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR2);
448 
449 	if (!gfsr)
450 		return IRQ_NONE;
451 
452 	if (__ratelimit(&rs)) {
453 		if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) &&
454 		    (gfsr & ARM_SMMU_sGFSR_USF))
455 			dev_err(smmu->dev,
456 				"Blocked unknown Stream ID 0x%hx; boot with \"arm-smmu.disable_bypass=0\" to allow, but this may have security implications\n",
457 				(u16)gfsynr1);
458 		else
459 			dev_err(smmu->dev,
460 				"Unexpected global fault, this could be serious\n");
461 		dev_err(smmu->dev,
462 			"\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
463 			gfsr, gfsynr0, gfsynr1, gfsynr2);
464 	}
465 
466 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
467 	return IRQ_HANDLED;
468 }
469 
470 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
471 				       struct io_pgtable_cfg *pgtbl_cfg)
472 {
473 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
474 	struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
475 	bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
476 
477 	cb->cfg = cfg;
478 
479 	/* TCR */
480 	if (stage1) {
481 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
482 			cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
483 		} else {
484 			cb->tcr[0] = arm_smmu_lpae_tcr(pgtbl_cfg);
485 			cb->tcr[1] = arm_smmu_lpae_tcr2(pgtbl_cfg);
486 			if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
487 				cb->tcr[1] |= ARM_SMMU_TCR2_AS;
488 			else
489 				cb->tcr[0] |= ARM_SMMU_TCR_EAE;
490 		}
491 	} else {
492 		cb->tcr[0] = arm_smmu_lpae_vtcr(pgtbl_cfg);
493 	}
494 
495 	/* TTBRs */
496 	if (stage1) {
497 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
498 			cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr;
499 			cb->ttbr[1] = 0;
500 		} else {
501 			cb->ttbr[0] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
502 						 cfg->asid);
503 			cb->ttbr[1] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
504 						 cfg->asid);
505 
506 			if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
507 				cb->ttbr[1] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
508 			else
509 				cb->ttbr[0] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
510 		}
511 	} else {
512 		cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
513 	}
514 
515 	/* MAIRs (stage-1 only) */
516 	if (stage1) {
517 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
518 			cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
519 			cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
520 		} else {
521 			cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair;
522 			cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair >> 32;
523 		}
524 	}
525 }
526 
527 void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
528 {
529 	u32 reg;
530 	bool stage1;
531 	struct arm_smmu_cb *cb = &smmu->cbs[idx];
532 	struct arm_smmu_cfg *cfg = cb->cfg;
533 
534 	/* Unassigned context banks only need disabling */
535 	if (!cfg) {
536 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, 0);
537 		return;
538 	}
539 
540 	stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
541 
542 	/* CBA2R */
543 	if (smmu->version > ARM_SMMU_V1) {
544 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
545 			reg = ARM_SMMU_CBA2R_VA64;
546 		else
547 			reg = 0;
548 		/* 16-bit VMIDs live in CBA2R */
549 		if (smmu->features & ARM_SMMU_FEAT_VMID16)
550 			reg |= FIELD_PREP(ARM_SMMU_CBA2R_VMID16, cfg->vmid);
551 
552 		arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBA2R(idx), reg);
553 	}
554 
555 	/* CBAR */
556 	reg = FIELD_PREP(ARM_SMMU_CBAR_TYPE, cfg->cbar);
557 	if (smmu->version < ARM_SMMU_V2)
558 		reg |= FIELD_PREP(ARM_SMMU_CBAR_IRPTNDX, cfg->irptndx);
559 
560 	/*
561 	 * Use the weakest shareability/memory types, so they are
562 	 * overridden by the ttbcr/pte.
563 	 */
564 	if (stage1) {
565 		reg |= FIELD_PREP(ARM_SMMU_CBAR_S1_BPSHCFG,
566 				  ARM_SMMU_CBAR_S1_BPSHCFG_NSH) |
567 		       FIELD_PREP(ARM_SMMU_CBAR_S1_MEMATTR,
568 				  ARM_SMMU_CBAR_S1_MEMATTR_WB);
569 	} else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
570 		/* 8-bit VMIDs live in CBAR */
571 		reg |= FIELD_PREP(ARM_SMMU_CBAR_VMID, cfg->vmid);
572 	}
573 	arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(idx), reg);
574 
575 	/*
576 	 * TCR
577 	 * We must write this before the TTBRs, since it determines the
578 	 * access behaviour of some fields (in particular, ASID[15:8]).
579 	 */
580 	if (stage1 && smmu->version > ARM_SMMU_V1)
581 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR2, cb->tcr[1]);
582 	arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR, cb->tcr[0]);
583 
584 	/* TTBRs */
585 	if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
586 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_CONTEXTIDR, cfg->asid);
587 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
588 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR1, cb->ttbr[1]);
589 	} else {
590 		arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
591 		if (stage1)
592 			arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR1,
593 					   cb->ttbr[1]);
594 	}
595 
596 	/* MAIRs (stage-1 only) */
597 	if (stage1) {
598 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR0, cb->mair[0]);
599 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR1, cb->mair[1]);
600 	}
601 
602 	/* SCTLR */
603 	reg = ARM_SMMU_SCTLR_CFIE | ARM_SMMU_SCTLR_CFRE | ARM_SMMU_SCTLR_AFE |
604 	      ARM_SMMU_SCTLR_TRE | ARM_SMMU_SCTLR_M;
605 	if (stage1)
606 		reg |= ARM_SMMU_SCTLR_S1_ASIDPNE;
607 	if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
608 		reg |= ARM_SMMU_SCTLR_E;
609 
610 	if (smmu->impl && smmu->impl->write_sctlr)
611 		smmu->impl->write_sctlr(smmu, idx, reg);
612 	else
613 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg);
614 }
615 
616 static int arm_smmu_alloc_context_bank(struct arm_smmu_domain *smmu_domain,
617 				       struct arm_smmu_device *smmu,
618 				       struct device *dev, unsigned int start)
619 {
620 	if (smmu->impl && smmu->impl->alloc_context_bank)
621 		return smmu->impl->alloc_context_bank(smmu_domain, smmu, dev, start);
622 
623 	return __arm_smmu_alloc_bitmap(smmu->context_map, start, smmu->num_context_banks);
624 }
625 
626 static int arm_smmu_init_domain_context(struct arm_smmu_domain *smmu_domain,
627 					struct arm_smmu_device *smmu,
628 					struct device *dev)
629 {
630 	int irq, start, ret = 0;
631 	unsigned long ias, oas;
632 	struct io_pgtable_ops *pgtbl_ops;
633 	struct io_pgtable_cfg pgtbl_cfg;
634 	enum io_pgtable_fmt fmt;
635 	struct iommu_domain *domain = &smmu_domain->domain;
636 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
637 	irqreturn_t (*context_fault)(int irq, void *dev);
638 
639 	mutex_lock(&smmu_domain->init_mutex);
640 	if (smmu_domain->smmu)
641 		goto out_unlock;
642 
643 	/*
644 	 * Mapping the requested stage onto what we support is surprisingly
645 	 * complicated, mainly because the spec allows S1+S2 SMMUs without
646 	 * support for nested translation. That means we end up with the
647 	 * following table:
648 	 *
649 	 * Requested        Supported        Actual
650 	 *     S1               N              S1
651 	 *     S1             S1+S2            S1
652 	 *     S1               S2             S2
653 	 *     S1               S1             S1
654 	 *     N                N              N
655 	 *     N              S1+S2            S2
656 	 *     N                S2             S2
657 	 *     N                S1             S1
658 	 *
659 	 * Note that you can't actually request stage-2 mappings.
660 	 */
661 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
662 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
663 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
664 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
665 
666 	/*
667 	 * Choosing a suitable context format is even more fiddly. Until we
668 	 * grow some way for the caller to express a preference, and/or move
669 	 * the decision into the io-pgtable code where it arguably belongs,
670 	 * just aim for the closest thing to the rest of the system, and hope
671 	 * that the hardware isn't esoteric enough that we can't assume AArch64
672 	 * support to be a superset of AArch32 support...
673 	 */
674 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
675 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
676 	if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
677 	    !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
678 	    (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
679 	    (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
680 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
681 	if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
682 	    (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
683 			       ARM_SMMU_FEAT_FMT_AARCH64_16K |
684 			       ARM_SMMU_FEAT_FMT_AARCH64_4K)))
685 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
686 
687 	if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
688 		ret = -EINVAL;
689 		goto out_unlock;
690 	}
691 
692 	switch (smmu_domain->stage) {
693 	case ARM_SMMU_DOMAIN_S1:
694 		cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
695 		start = smmu->num_s2_context_banks;
696 		ias = smmu->va_size;
697 		oas = smmu->ipa_size;
698 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
699 			fmt = ARM_64_LPAE_S1;
700 		} else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
701 			fmt = ARM_32_LPAE_S1;
702 			ias = min(ias, 32UL);
703 			oas = min(oas, 40UL);
704 		} else {
705 			fmt = ARM_V7S;
706 			ias = min(ias, 32UL);
707 			oas = min(oas, 32UL);
708 		}
709 		smmu_domain->flush_ops = &arm_smmu_s1_tlb_ops;
710 		break;
711 	case ARM_SMMU_DOMAIN_NESTED:
712 		/*
713 		 * We will likely want to change this if/when KVM gets
714 		 * involved.
715 		 */
716 	case ARM_SMMU_DOMAIN_S2:
717 		cfg->cbar = CBAR_TYPE_S2_TRANS;
718 		start = 0;
719 		ias = smmu->ipa_size;
720 		oas = smmu->pa_size;
721 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
722 			fmt = ARM_64_LPAE_S2;
723 		} else {
724 			fmt = ARM_32_LPAE_S2;
725 			ias = min(ias, 40UL);
726 			oas = min(oas, 40UL);
727 		}
728 		if (smmu->version == ARM_SMMU_V2)
729 			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v2;
730 		else
731 			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v1;
732 		break;
733 	default:
734 		ret = -EINVAL;
735 		goto out_unlock;
736 	}
737 
738 	ret = arm_smmu_alloc_context_bank(smmu_domain, smmu, dev, start);
739 	if (ret < 0) {
740 		goto out_unlock;
741 	}
742 
743 	smmu_domain->smmu = smmu;
744 
745 	cfg->cbndx = ret;
746 	if (smmu->version < ARM_SMMU_V2) {
747 		cfg->irptndx = atomic_inc_return(&smmu->irptndx);
748 		cfg->irptndx %= smmu->num_context_irqs;
749 	} else {
750 		cfg->irptndx = cfg->cbndx;
751 	}
752 
753 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
754 		cfg->vmid = cfg->cbndx + 1;
755 	else
756 		cfg->asid = cfg->cbndx;
757 
758 	pgtbl_cfg = (struct io_pgtable_cfg) {
759 		.pgsize_bitmap	= smmu->pgsize_bitmap,
760 		.ias		= ias,
761 		.oas		= oas,
762 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
763 		.tlb		= smmu_domain->flush_ops,
764 		.iommu_dev	= smmu->dev,
765 	};
766 
767 	if (smmu->impl && smmu->impl->init_context) {
768 		ret = smmu->impl->init_context(smmu_domain, &pgtbl_cfg, dev);
769 		if (ret)
770 			goto out_clear_smmu;
771 	}
772 
773 	if (smmu_domain->pgtbl_quirks)
774 		pgtbl_cfg.quirks |= smmu_domain->pgtbl_quirks;
775 
776 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
777 	if (!pgtbl_ops) {
778 		ret = -ENOMEM;
779 		goto out_clear_smmu;
780 	}
781 
782 	/* Update the domain's page sizes to reflect the page table format */
783 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
784 
785 	if (pgtbl_cfg.quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) {
786 		domain->geometry.aperture_start = ~0UL << ias;
787 		domain->geometry.aperture_end = ~0UL;
788 	} else {
789 		domain->geometry.aperture_end = (1UL << ias) - 1;
790 	}
791 
792 	domain->geometry.force_aperture = true;
793 
794 	/* Initialise the context bank with our page table cfg */
795 	arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
796 	arm_smmu_write_context_bank(smmu, cfg->cbndx);
797 
798 	/*
799 	 * Request context fault interrupt. Do this last to avoid the
800 	 * handler seeing a half-initialised domain state.
801 	 */
802 	irq = smmu->irqs[cfg->irptndx];
803 
804 	if (smmu->impl && smmu->impl->context_fault)
805 		context_fault = smmu->impl->context_fault;
806 	else
807 		context_fault = arm_smmu_context_fault;
808 
809 	ret = devm_request_irq(smmu->dev, irq, context_fault, IRQF_SHARED,
810 			       "arm-smmu-context-fault", smmu_domain);
811 	if (ret < 0) {
812 		dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
813 			cfg->irptndx, irq);
814 		cfg->irptndx = ARM_SMMU_INVALID_IRPTNDX;
815 	}
816 
817 	mutex_unlock(&smmu_domain->init_mutex);
818 
819 	/* Publish page table ops for map/unmap */
820 	smmu_domain->pgtbl_ops = pgtbl_ops;
821 	return 0;
822 
823 out_clear_smmu:
824 	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
825 	smmu_domain->smmu = NULL;
826 out_unlock:
827 	mutex_unlock(&smmu_domain->init_mutex);
828 	return ret;
829 }
830 
831 static void arm_smmu_destroy_domain_context(struct arm_smmu_domain *smmu_domain)
832 {
833 	struct arm_smmu_device *smmu = smmu_domain->smmu;
834 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
835 	int ret, irq;
836 
837 	if (!smmu)
838 		return;
839 
840 	ret = arm_smmu_rpm_get(smmu);
841 	if (ret < 0)
842 		return;
843 
844 	/*
845 	 * Disable the context bank and free the page tables before freeing
846 	 * it.
847 	 */
848 	smmu->cbs[cfg->cbndx].cfg = NULL;
849 	arm_smmu_write_context_bank(smmu, cfg->cbndx);
850 
851 	if (cfg->irptndx != ARM_SMMU_INVALID_IRPTNDX) {
852 		irq = smmu->irqs[cfg->irptndx];
853 		devm_free_irq(smmu->dev, irq, smmu_domain);
854 	}
855 
856 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
857 	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
858 
859 	arm_smmu_rpm_put(smmu);
860 }
861 
862 static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev)
863 {
864 	struct arm_smmu_domain *smmu_domain;
865 
866 	/*
867 	 * Allocate the domain and initialise some of its data structures.
868 	 * We can't really do anything meaningful until we've added a
869 	 * master.
870 	 */
871 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
872 	if (!smmu_domain)
873 		return NULL;
874 
875 	mutex_init(&smmu_domain->init_mutex);
876 	spin_lock_init(&smmu_domain->cb_lock);
877 
878 	if (dev) {
879 		struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
880 
881 		if (arm_smmu_init_domain_context(smmu_domain, cfg->smmu, dev)) {
882 			kfree(smmu_domain);
883 			return NULL;
884 		}
885 	}
886 
887 	return &smmu_domain->domain;
888 }
889 
890 static void arm_smmu_domain_free(struct iommu_domain *domain)
891 {
892 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
893 
894 	/*
895 	 * Free the domain resources. We assume that all devices have
896 	 * already been detached.
897 	 */
898 	arm_smmu_destroy_domain_context(smmu_domain);
899 	kfree(smmu_domain);
900 }
901 
902 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
903 {
904 	struct arm_smmu_smr *smr = smmu->smrs + idx;
905 	u32 reg = FIELD_PREP(ARM_SMMU_SMR_ID, smr->id) |
906 		  FIELD_PREP(ARM_SMMU_SMR_MASK, smr->mask);
907 
908 	if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
909 		reg |= ARM_SMMU_SMR_VALID;
910 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(idx), reg);
911 }
912 
913 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
914 {
915 	struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
916 	u32 reg;
917 
918 	if (smmu->impl && smmu->impl->write_s2cr) {
919 		smmu->impl->write_s2cr(smmu, idx);
920 		return;
921 	}
922 
923 	reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, s2cr->type) |
924 	      FIELD_PREP(ARM_SMMU_S2CR_CBNDX, s2cr->cbndx) |
925 	      FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, s2cr->privcfg);
926 
927 	if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
928 	    smmu->smrs[idx].valid)
929 		reg |= ARM_SMMU_S2CR_EXIDVALID;
930 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg);
931 }
932 
933 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
934 {
935 	arm_smmu_write_s2cr(smmu, idx);
936 	if (smmu->smrs)
937 		arm_smmu_write_smr(smmu, idx);
938 }
939 
940 /*
941  * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
942  * should be called after sCR0 is written.
943  */
944 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
945 {
946 	u32 smr;
947 	int i;
948 
949 	if (!smmu->smrs)
950 		return;
951 	/*
952 	 * If we've had to accommodate firmware memory regions, we may
953 	 * have live SMRs by now; tread carefully...
954 	 *
955 	 * Somewhat perversely, not having a free SMR for this test implies we
956 	 * can get away without it anyway, as we'll only be able to 'allocate'
957 	 * these SMRs for the ID/mask values we're already trusting to be OK.
958 	 */
959 	for (i = 0; i < smmu->num_mapping_groups; i++)
960 		if (!smmu->smrs[i].valid)
961 			goto smr_ok;
962 	return;
963 smr_ok:
964 	/*
965 	 * SMR.ID bits may not be preserved if the corresponding MASK
966 	 * bits are set, so check each one separately. We can reject
967 	 * masters later if they try to claim IDs outside these masks.
968 	 */
969 	smr = FIELD_PREP(ARM_SMMU_SMR_ID, smmu->streamid_mask);
970 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
971 	smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
972 	smmu->streamid_mask = FIELD_GET(ARM_SMMU_SMR_ID, smr);
973 
974 	smr = FIELD_PREP(ARM_SMMU_SMR_MASK, smmu->streamid_mask);
975 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
976 	smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
977 	smmu->smr_mask_mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr);
978 }
979 
980 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
981 {
982 	struct arm_smmu_smr *smrs = smmu->smrs;
983 	int i, free_idx = -ENOSPC;
984 
985 	/* Stream indexing is blissfully easy */
986 	if (!smrs)
987 		return id;
988 
989 	/* Validating SMRs is... less so */
990 	for (i = 0; i < smmu->num_mapping_groups; ++i) {
991 		if (!smrs[i].valid) {
992 			/*
993 			 * Note the first free entry we come across, which
994 			 * we'll claim in the end if nothing else matches.
995 			 */
996 			if (free_idx < 0)
997 				free_idx = i;
998 			continue;
999 		}
1000 		/*
1001 		 * If the new entry is _entirely_ matched by an existing entry,
1002 		 * then reuse that, with the guarantee that there also cannot
1003 		 * be any subsequent conflicting entries. In normal use we'd
1004 		 * expect simply identical entries for this case, but there's
1005 		 * no harm in accommodating the generalisation.
1006 		 */
1007 		if ((mask & smrs[i].mask) == mask &&
1008 		    !((id ^ smrs[i].id) & ~smrs[i].mask))
1009 			return i;
1010 		/*
1011 		 * If the new entry has any other overlap with an existing one,
1012 		 * though, then there always exists at least one stream ID
1013 		 * which would cause a conflict, and we can't allow that risk.
1014 		 */
1015 		if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1016 			return -EINVAL;
1017 	}
1018 
1019 	return free_idx;
1020 }
1021 
1022 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1023 {
1024 	if (--smmu->s2crs[idx].count)
1025 		return false;
1026 
1027 	smmu->s2crs[idx] = s2cr_init_val;
1028 	if (smmu->smrs)
1029 		smmu->smrs[idx].valid = false;
1030 
1031 	return true;
1032 }
1033 
1034 static int arm_smmu_master_alloc_smes(struct device *dev)
1035 {
1036 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1037 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1038 	struct arm_smmu_device *smmu = cfg->smmu;
1039 	struct arm_smmu_smr *smrs = smmu->smrs;
1040 	int i, idx, ret;
1041 
1042 	mutex_lock(&smmu->stream_map_mutex);
1043 	/* Figure out a viable stream map entry allocation */
1044 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1045 		u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1046 		u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1047 
1048 		if (idx != INVALID_SMENDX) {
1049 			ret = -EEXIST;
1050 			goto out_err;
1051 		}
1052 
1053 		ret = arm_smmu_find_sme(smmu, sid, mask);
1054 		if (ret < 0)
1055 			goto out_err;
1056 
1057 		idx = ret;
1058 		if (smrs && smmu->s2crs[idx].count == 0) {
1059 			smrs[idx].id = sid;
1060 			smrs[idx].mask = mask;
1061 			smrs[idx].valid = true;
1062 		}
1063 		smmu->s2crs[idx].count++;
1064 		cfg->smendx[i] = (s16)idx;
1065 	}
1066 
1067 	/* It worked! Now, poke the actual hardware */
1068 	for_each_cfg_sme(cfg, fwspec, i, idx)
1069 		arm_smmu_write_sme(smmu, idx);
1070 
1071 	mutex_unlock(&smmu->stream_map_mutex);
1072 	return 0;
1073 
1074 out_err:
1075 	while (i--) {
1076 		arm_smmu_free_sme(smmu, cfg->smendx[i]);
1077 		cfg->smendx[i] = INVALID_SMENDX;
1078 	}
1079 	mutex_unlock(&smmu->stream_map_mutex);
1080 	return ret;
1081 }
1082 
1083 static void arm_smmu_master_free_smes(struct arm_smmu_master_cfg *cfg,
1084 				      struct iommu_fwspec *fwspec)
1085 {
1086 	struct arm_smmu_device *smmu = cfg->smmu;
1087 	int i, idx;
1088 
1089 	mutex_lock(&smmu->stream_map_mutex);
1090 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1091 		if (arm_smmu_free_sme(smmu, idx))
1092 			arm_smmu_write_sme(smmu, idx);
1093 		cfg->smendx[i] = INVALID_SMENDX;
1094 	}
1095 	mutex_unlock(&smmu->stream_map_mutex);
1096 }
1097 
1098 static void arm_smmu_master_install_s2crs(struct arm_smmu_master_cfg *cfg,
1099 					  enum arm_smmu_s2cr_type type,
1100 					  u8 cbndx, struct iommu_fwspec *fwspec)
1101 {
1102 	struct arm_smmu_device *smmu = cfg->smmu;
1103 	struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1104 	int i, idx;
1105 
1106 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1107 		if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1108 			continue;
1109 
1110 		s2cr[idx].type = type;
1111 		s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1112 		s2cr[idx].cbndx = cbndx;
1113 		arm_smmu_write_s2cr(smmu, idx);
1114 	}
1115 }
1116 
1117 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1118 {
1119 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1120 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1121 	struct arm_smmu_master_cfg *cfg;
1122 	struct arm_smmu_device *smmu;
1123 	int ret;
1124 
1125 	/*
1126 	 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1127 	 * domains between of_xlate() and probe_device() - we have no way to cope
1128 	 * with that, so until ARM gets converted to rely on groups and default
1129 	 * domains, just say no (but more politely than by dereferencing NULL).
1130 	 * This should be at least a WARN_ON once that's sorted.
1131 	 */
1132 	cfg = dev_iommu_priv_get(dev);
1133 	if (!cfg)
1134 		return -ENODEV;
1135 
1136 	smmu = cfg->smmu;
1137 
1138 	ret = arm_smmu_rpm_get(smmu);
1139 	if (ret < 0)
1140 		return ret;
1141 
1142 	/* Ensure that the domain is finalised */
1143 	ret = arm_smmu_init_domain_context(smmu_domain, smmu, dev);
1144 	if (ret < 0)
1145 		goto rpm_put;
1146 
1147 	/*
1148 	 * Sanity check the domain. We don't support domains across
1149 	 * different SMMUs.
1150 	 */
1151 	if (smmu_domain->smmu != smmu) {
1152 		ret = -EINVAL;
1153 		goto rpm_put;
1154 	}
1155 
1156 	/* Looks ok, so add the device to the domain */
1157 	arm_smmu_master_install_s2crs(cfg, S2CR_TYPE_TRANS,
1158 				      smmu_domain->cfg.cbndx, fwspec);
1159 	arm_smmu_rpm_use_autosuspend(smmu);
1160 rpm_put:
1161 	arm_smmu_rpm_put(smmu);
1162 	return ret;
1163 }
1164 
1165 static int arm_smmu_attach_dev_type(struct device *dev,
1166 				    enum arm_smmu_s2cr_type type)
1167 {
1168 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1169 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1170 	struct arm_smmu_device *smmu;
1171 	int ret;
1172 
1173 	if (!cfg)
1174 		return -ENODEV;
1175 	smmu = cfg->smmu;
1176 
1177 	ret = arm_smmu_rpm_get(smmu);
1178 	if (ret < 0)
1179 		return ret;
1180 
1181 	arm_smmu_master_install_s2crs(cfg, type, 0, fwspec);
1182 	arm_smmu_rpm_use_autosuspend(smmu);
1183 	arm_smmu_rpm_put(smmu);
1184 	return 0;
1185 }
1186 
1187 static int arm_smmu_attach_dev_identity(struct iommu_domain *domain,
1188 					struct device *dev)
1189 {
1190 	return arm_smmu_attach_dev_type(dev, S2CR_TYPE_BYPASS);
1191 }
1192 
1193 static const struct iommu_domain_ops arm_smmu_identity_ops = {
1194 	.attach_dev = arm_smmu_attach_dev_identity,
1195 };
1196 
1197 static struct iommu_domain arm_smmu_identity_domain = {
1198 	.type = IOMMU_DOMAIN_IDENTITY,
1199 	.ops = &arm_smmu_identity_ops,
1200 };
1201 
1202 static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain,
1203 				       struct device *dev)
1204 {
1205 	return arm_smmu_attach_dev_type(dev, S2CR_TYPE_FAULT);
1206 }
1207 
1208 static const struct iommu_domain_ops arm_smmu_blocked_ops = {
1209 	.attach_dev = arm_smmu_attach_dev_blocked,
1210 };
1211 
1212 static struct iommu_domain arm_smmu_blocked_domain = {
1213 	.type = IOMMU_DOMAIN_BLOCKED,
1214 	.ops = &arm_smmu_blocked_ops,
1215 };
1216 
1217 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
1218 			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
1219 			      int prot, gfp_t gfp, size_t *mapped)
1220 {
1221 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1222 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1223 	int ret;
1224 
1225 	if (!ops)
1226 		return -ENODEV;
1227 
1228 	arm_smmu_rpm_get(smmu);
1229 	ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
1230 	arm_smmu_rpm_put(smmu);
1231 
1232 	return ret;
1233 }
1234 
1235 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
1236 				   size_t pgsize, size_t pgcount,
1237 				   struct iommu_iotlb_gather *iotlb_gather)
1238 {
1239 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1240 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1241 	size_t ret;
1242 
1243 	if (!ops)
1244 		return 0;
1245 
1246 	arm_smmu_rpm_get(smmu);
1247 	ret = ops->unmap_pages(ops, iova, pgsize, pgcount, iotlb_gather);
1248 	arm_smmu_rpm_put(smmu);
1249 
1250 	return ret;
1251 }
1252 
1253 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1254 {
1255 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1256 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1257 
1258 	if (smmu_domain->flush_ops) {
1259 		arm_smmu_rpm_get(smmu);
1260 		smmu_domain->flush_ops->tlb_flush_all(smmu_domain);
1261 		arm_smmu_rpm_put(smmu);
1262 	}
1263 }
1264 
1265 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
1266 				struct iommu_iotlb_gather *gather)
1267 {
1268 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1269 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1270 
1271 	if (!smmu)
1272 		return;
1273 
1274 	arm_smmu_rpm_get(smmu);
1275 	if (smmu->version == ARM_SMMU_V2 ||
1276 	    smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1277 		arm_smmu_tlb_sync_context(smmu_domain);
1278 	else
1279 		arm_smmu_tlb_sync_global(smmu);
1280 	arm_smmu_rpm_put(smmu);
1281 }
1282 
1283 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1284 					      dma_addr_t iova)
1285 {
1286 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1287 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1288 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1289 	struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1290 	struct device *dev = smmu->dev;
1291 	void __iomem *reg;
1292 	u32 tmp;
1293 	u64 phys;
1294 	unsigned long va, flags;
1295 	int ret, idx = cfg->cbndx;
1296 	phys_addr_t addr = 0;
1297 
1298 	ret = arm_smmu_rpm_get(smmu);
1299 	if (ret < 0)
1300 		return 0;
1301 
1302 	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1303 	va = iova & ~0xfffUL;
1304 	if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
1305 		arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1306 	else
1307 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1308 
1309 	reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR;
1310 	if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ARM_SMMU_ATSR_ACTIVE),
1311 				      5, 50)) {
1312 		spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1313 		dev_err(dev,
1314 			"iova to phys timed out on %pad. Falling back to software table walk.\n",
1315 			&iova);
1316 		arm_smmu_rpm_put(smmu);
1317 		return ops->iova_to_phys(ops, iova);
1318 	}
1319 
1320 	phys = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_PAR);
1321 	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1322 	if (phys & ARM_SMMU_CB_PAR_F) {
1323 		dev_err(dev, "translation fault!\n");
1324 		dev_err(dev, "PAR = 0x%llx\n", phys);
1325 		goto out;
1326 	}
1327 
1328 	addr = (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1329 out:
1330 	arm_smmu_rpm_put(smmu);
1331 
1332 	return addr;
1333 }
1334 
1335 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1336 					dma_addr_t iova)
1337 {
1338 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1339 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1340 
1341 	if (!ops)
1342 		return 0;
1343 
1344 	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1345 			smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1346 		return arm_smmu_iova_to_phys_hard(domain, iova);
1347 
1348 	return ops->iova_to_phys(ops, iova);
1349 }
1350 
1351 static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
1352 {
1353 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1354 
1355 	switch (cap) {
1356 	case IOMMU_CAP_CACHE_COHERENCY:
1357 		/*
1358 		 * It's overwhelmingly the case in practice that when the pagetable
1359 		 * walk interface is connected to a coherent interconnect, all the
1360 		 * translation interfaces are too. Furthermore if the device is
1361 		 * natively coherent, then its translation interface must also be.
1362 		 */
1363 		return cfg->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK ||
1364 			device_get_dma_attr(dev) == DEV_DMA_COHERENT;
1365 	case IOMMU_CAP_NOEXEC:
1366 	case IOMMU_CAP_DEFERRED_FLUSH:
1367 		return true;
1368 	default:
1369 		return false;
1370 	}
1371 }
1372 
1373 static
1374 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1375 {
1376 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
1377 							  fwnode);
1378 	put_device(dev);
1379 	return dev ? dev_get_drvdata(dev) : NULL;
1380 }
1381 
1382 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
1383 {
1384 	struct arm_smmu_device *smmu = NULL;
1385 	struct arm_smmu_master_cfg *cfg;
1386 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1387 	int i, ret;
1388 
1389 	if (using_legacy_binding) {
1390 		ret = arm_smmu_register_legacy_master(dev, &smmu);
1391 
1392 		/*
1393 		 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1394 		 * will allocate/initialise a new one. Thus we need to update fwspec for
1395 		 * later use.
1396 		 */
1397 		fwspec = dev_iommu_fwspec_get(dev);
1398 		if (ret)
1399 			goto out_free;
1400 	} else {
1401 		smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1402 	}
1403 
1404 	ret = -EINVAL;
1405 	for (i = 0; i < fwspec->num_ids; i++) {
1406 		u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1407 		u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1408 
1409 		if (sid & ~smmu->streamid_mask) {
1410 			dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1411 				sid, smmu->streamid_mask);
1412 			goto out_free;
1413 		}
1414 		if (mask & ~smmu->smr_mask_mask) {
1415 			dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1416 				mask, smmu->smr_mask_mask);
1417 			goto out_free;
1418 		}
1419 	}
1420 
1421 	ret = -ENOMEM;
1422 	cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1423 		      GFP_KERNEL);
1424 	if (!cfg)
1425 		goto out_free;
1426 
1427 	cfg->smmu = smmu;
1428 	dev_iommu_priv_set(dev, cfg);
1429 	while (i--)
1430 		cfg->smendx[i] = INVALID_SMENDX;
1431 
1432 	ret = arm_smmu_rpm_get(smmu);
1433 	if (ret < 0)
1434 		goto out_cfg_free;
1435 
1436 	ret = arm_smmu_master_alloc_smes(dev);
1437 	arm_smmu_rpm_put(smmu);
1438 
1439 	if (ret)
1440 		goto out_cfg_free;
1441 
1442 	device_link_add(dev, smmu->dev,
1443 			DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
1444 
1445 	return &smmu->iommu;
1446 
1447 out_cfg_free:
1448 	kfree(cfg);
1449 out_free:
1450 	iommu_fwspec_free(dev);
1451 	return ERR_PTR(ret);
1452 }
1453 
1454 static void arm_smmu_release_device(struct device *dev)
1455 {
1456 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1457 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1458 	int ret;
1459 
1460 	ret = arm_smmu_rpm_get(cfg->smmu);
1461 	if (ret < 0)
1462 		return;
1463 
1464 	arm_smmu_master_free_smes(cfg, fwspec);
1465 
1466 	arm_smmu_rpm_put(cfg->smmu);
1467 
1468 	kfree(cfg);
1469 }
1470 
1471 static void arm_smmu_probe_finalize(struct device *dev)
1472 {
1473 	struct arm_smmu_master_cfg *cfg;
1474 	struct arm_smmu_device *smmu;
1475 
1476 	cfg = dev_iommu_priv_get(dev);
1477 	smmu = cfg->smmu;
1478 
1479 	if (smmu->impl && smmu->impl->probe_finalize)
1480 		smmu->impl->probe_finalize(smmu, dev);
1481 }
1482 
1483 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1484 {
1485 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1486 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1487 	struct arm_smmu_device *smmu = cfg->smmu;
1488 	struct iommu_group *group = NULL;
1489 	int i, idx;
1490 
1491 	mutex_lock(&smmu->stream_map_mutex);
1492 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1493 		if (group && smmu->s2crs[idx].group &&
1494 		    group != smmu->s2crs[idx].group) {
1495 			mutex_unlock(&smmu->stream_map_mutex);
1496 			return ERR_PTR(-EINVAL);
1497 		}
1498 
1499 		group = smmu->s2crs[idx].group;
1500 	}
1501 
1502 	if (group) {
1503 		mutex_unlock(&smmu->stream_map_mutex);
1504 		return iommu_group_ref_get(group);
1505 	}
1506 
1507 	if (dev_is_pci(dev))
1508 		group = pci_device_group(dev);
1509 	else if (dev_is_fsl_mc(dev))
1510 		group = fsl_mc_device_group(dev);
1511 	else
1512 		group = generic_device_group(dev);
1513 
1514 	/* Remember group for faster lookups */
1515 	if (!IS_ERR(group))
1516 		for_each_cfg_sme(cfg, fwspec, i, idx)
1517 			smmu->s2crs[idx].group = group;
1518 
1519 	mutex_unlock(&smmu->stream_map_mutex);
1520 	return group;
1521 }
1522 
1523 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
1524 {
1525 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1526 	int ret = 0;
1527 
1528 	mutex_lock(&smmu_domain->init_mutex);
1529 	if (smmu_domain->smmu)
1530 		ret = -EPERM;
1531 	else
1532 		smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1533 	mutex_unlock(&smmu_domain->init_mutex);
1534 
1535 	return ret;
1536 }
1537 
1538 static int arm_smmu_set_pgtable_quirks(struct iommu_domain *domain,
1539 		unsigned long quirks)
1540 {
1541 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1542 	int ret = 0;
1543 
1544 	mutex_lock(&smmu_domain->init_mutex);
1545 	if (smmu_domain->smmu)
1546 		ret = -EPERM;
1547 	else
1548 		smmu_domain->pgtbl_quirks = quirks;
1549 	mutex_unlock(&smmu_domain->init_mutex);
1550 
1551 	return ret;
1552 }
1553 
1554 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1555 {
1556 	u32 mask, fwid = 0;
1557 
1558 	if (args->args_count > 0)
1559 		fwid |= FIELD_PREP(ARM_SMMU_SMR_ID, args->args[0]);
1560 
1561 	if (args->args_count > 1)
1562 		fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, args->args[1]);
1563 	else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1564 		fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, mask);
1565 
1566 	return iommu_fwspec_add_ids(dev, &fwid, 1);
1567 }
1568 
1569 static void arm_smmu_get_resv_regions(struct device *dev,
1570 				      struct list_head *head)
1571 {
1572 	struct iommu_resv_region *region;
1573 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1574 
1575 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1576 					 prot, IOMMU_RESV_SW_MSI, GFP_KERNEL);
1577 	if (!region)
1578 		return;
1579 
1580 	list_add_tail(&region->list, head);
1581 
1582 	iommu_dma_get_resv_regions(dev, head);
1583 }
1584 
1585 static int arm_smmu_def_domain_type(struct device *dev)
1586 {
1587 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1588 	const struct arm_smmu_impl *impl = cfg->smmu->impl;
1589 
1590 	if (using_legacy_binding)
1591 		return IOMMU_DOMAIN_IDENTITY;
1592 
1593 	if (impl && impl->def_domain_type)
1594 		return impl->def_domain_type(dev);
1595 
1596 	return 0;
1597 }
1598 
1599 static struct iommu_ops arm_smmu_ops = {
1600 	.identity_domain	= &arm_smmu_identity_domain,
1601 	.blocked_domain		= &arm_smmu_blocked_domain,
1602 	.capable		= arm_smmu_capable,
1603 	.domain_alloc_paging	= arm_smmu_domain_alloc_paging,
1604 	.probe_device		= arm_smmu_probe_device,
1605 	.release_device		= arm_smmu_release_device,
1606 	.probe_finalize		= arm_smmu_probe_finalize,
1607 	.device_group		= arm_smmu_device_group,
1608 	.of_xlate		= arm_smmu_of_xlate,
1609 	.get_resv_regions	= arm_smmu_get_resv_regions,
1610 	.def_domain_type	= arm_smmu_def_domain_type,
1611 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
1612 	.owner			= THIS_MODULE,
1613 	.default_domain_ops = &(const struct iommu_domain_ops) {
1614 		.attach_dev		= arm_smmu_attach_dev,
1615 		.map_pages		= arm_smmu_map_pages,
1616 		.unmap_pages		= arm_smmu_unmap_pages,
1617 		.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
1618 		.iotlb_sync		= arm_smmu_iotlb_sync,
1619 		.iova_to_phys		= arm_smmu_iova_to_phys,
1620 		.enable_nesting		= arm_smmu_enable_nesting,
1621 		.set_pgtable_quirks	= arm_smmu_set_pgtable_quirks,
1622 		.free			= arm_smmu_domain_free,
1623 	}
1624 };
1625 
1626 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1627 {
1628 	int i;
1629 	u32 reg;
1630 
1631 	/* clear global FSR */
1632 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
1633 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, reg);
1634 
1635 	/*
1636 	 * Reset stream mapping groups: Initial values mark all SMRn as
1637 	 * invalid and all S2CRn as bypass unless overridden.
1638 	 */
1639 	for (i = 0; i < smmu->num_mapping_groups; ++i)
1640 		arm_smmu_write_sme(smmu, i);
1641 
1642 	/* Make sure all context banks are disabled and clear CB_FSR  */
1643 	for (i = 0; i < smmu->num_context_banks; ++i) {
1644 		arm_smmu_write_context_bank(smmu, i);
1645 		arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT);
1646 	}
1647 
1648 	/* Invalidate the TLB, just in case */
1649 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLH, QCOM_DUMMY_VAL);
1650 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLNSNH, QCOM_DUMMY_VAL);
1651 
1652 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
1653 
1654 	/* Enable fault reporting */
1655 	reg |= (ARM_SMMU_sCR0_GFRE | ARM_SMMU_sCR0_GFIE |
1656 		ARM_SMMU_sCR0_GCFGFRE | ARM_SMMU_sCR0_GCFGFIE);
1657 
1658 	/* Disable TLB broadcasting. */
1659 	reg |= (ARM_SMMU_sCR0_VMIDPNE | ARM_SMMU_sCR0_PTM);
1660 
1661 	/* Enable client access, handling unmatched streams as appropriate */
1662 	reg &= ~ARM_SMMU_sCR0_CLIENTPD;
1663 	if (disable_bypass)
1664 		reg |= ARM_SMMU_sCR0_USFCFG;
1665 	else
1666 		reg &= ~ARM_SMMU_sCR0_USFCFG;
1667 
1668 	/* Disable forced broadcasting */
1669 	reg &= ~ARM_SMMU_sCR0_FB;
1670 
1671 	/* Don't upgrade barriers */
1672 	reg &= ~(ARM_SMMU_sCR0_BSU);
1673 
1674 	if (smmu->features & ARM_SMMU_FEAT_VMID16)
1675 		reg |= ARM_SMMU_sCR0_VMID16EN;
1676 
1677 	if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1678 		reg |= ARM_SMMU_sCR0_EXIDENABLE;
1679 
1680 	if (smmu->impl && smmu->impl->reset)
1681 		smmu->impl->reset(smmu);
1682 
1683 	/* Push the button */
1684 	arm_smmu_tlb_sync_global(smmu);
1685 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
1686 }
1687 
1688 static int arm_smmu_id_size_to_bits(int size)
1689 {
1690 	switch (size) {
1691 	case 0:
1692 		return 32;
1693 	case 1:
1694 		return 36;
1695 	case 2:
1696 		return 40;
1697 	case 3:
1698 		return 42;
1699 	case 4:
1700 		return 44;
1701 	case 5:
1702 	default:
1703 		return 48;
1704 	}
1705 }
1706 
1707 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1708 {
1709 	unsigned int size;
1710 	u32 id;
1711 	bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1712 	int i, ret;
1713 
1714 	dev_notice(smmu->dev, "probing hardware configuration...\n");
1715 	dev_notice(smmu->dev, "SMMUv%d with:\n",
1716 			smmu->version == ARM_SMMU_V2 ? 2 : 1);
1717 
1718 	/* ID0 */
1719 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID0);
1720 
1721 	/* Restrict available stages based on module parameter */
1722 	if (force_stage == 1)
1723 		id &= ~(ARM_SMMU_ID0_S2TS | ARM_SMMU_ID0_NTS);
1724 	else if (force_stage == 2)
1725 		id &= ~(ARM_SMMU_ID0_S1TS | ARM_SMMU_ID0_NTS);
1726 
1727 	if (id & ARM_SMMU_ID0_S1TS) {
1728 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1729 		dev_notice(smmu->dev, "\tstage 1 translation\n");
1730 	}
1731 
1732 	if (id & ARM_SMMU_ID0_S2TS) {
1733 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1734 		dev_notice(smmu->dev, "\tstage 2 translation\n");
1735 	}
1736 
1737 	if (id & ARM_SMMU_ID0_NTS) {
1738 		smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1739 		dev_notice(smmu->dev, "\tnested translation\n");
1740 	}
1741 
1742 	if (!(smmu->features &
1743 		(ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1744 		dev_err(smmu->dev, "\tno translation support!\n");
1745 		return -ENODEV;
1746 	}
1747 
1748 	if ((id & ARM_SMMU_ID0_S1TS) &&
1749 	    ((smmu->version < ARM_SMMU_V2) || !(id & ARM_SMMU_ID0_ATOSNS))) {
1750 		smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1751 		dev_notice(smmu->dev, "\taddress translation ops\n");
1752 	}
1753 
1754 	/*
1755 	 * In order for DMA API calls to work properly, we must defer to what
1756 	 * the FW says about coherency, regardless of what the hardware claims.
1757 	 * Fortunately, this also opens up a workaround for systems where the
1758 	 * ID register value has ended up configured incorrectly.
1759 	 */
1760 	cttw_reg = !!(id & ARM_SMMU_ID0_CTTW);
1761 	if (cttw_fw || cttw_reg)
1762 		dev_notice(smmu->dev, "\t%scoherent table walk\n",
1763 			   cttw_fw ? "" : "non-");
1764 	if (cttw_fw != cttw_reg)
1765 		dev_notice(smmu->dev,
1766 			   "\t(IDR0.CTTW overridden by FW configuration)\n");
1767 
1768 	/* Max. number of entries we have for stream matching/indexing */
1769 	if (smmu->version == ARM_SMMU_V2 && id & ARM_SMMU_ID0_EXIDS) {
1770 		smmu->features |= ARM_SMMU_FEAT_EXIDS;
1771 		size = 1 << 16;
1772 	} else {
1773 		size = 1 << FIELD_GET(ARM_SMMU_ID0_NUMSIDB, id);
1774 	}
1775 	smmu->streamid_mask = size - 1;
1776 	if (id & ARM_SMMU_ID0_SMS) {
1777 		smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1778 		size = FIELD_GET(ARM_SMMU_ID0_NUMSMRG, id);
1779 		if (size == 0) {
1780 			dev_err(smmu->dev,
1781 				"stream-matching supported, but no SMRs present!\n");
1782 			return -ENODEV;
1783 		}
1784 
1785 		/* Zero-initialised to mark as invalid */
1786 		smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1787 					  GFP_KERNEL);
1788 		if (!smmu->smrs)
1789 			return -ENOMEM;
1790 
1791 		dev_notice(smmu->dev,
1792 			   "\tstream matching with %u register groups", size);
1793 	}
1794 	/* s2cr->type == 0 means translation, so initialise explicitly */
1795 	smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1796 					 GFP_KERNEL);
1797 	if (!smmu->s2crs)
1798 		return -ENOMEM;
1799 	for (i = 0; i < size; i++)
1800 		smmu->s2crs[i] = s2cr_init_val;
1801 
1802 	smmu->num_mapping_groups = size;
1803 	mutex_init(&smmu->stream_map_mutex);
1804 	spin_lock_init(&smmu->global_sync_lock);
1805 
1806 	if (smmu->version < ARM_SMMU_V2 ||
1807 	    !(id & ARM_SMMU_ID0_PTFS_NO_AARCH32)) {
1808 		smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1809 		if (!(id & ARM_SMMU_ID0_PTFS_NO_AARCH32S))
1810 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1811 	}
1812 
1813 	/* ID1 */
1814 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID1);
1815 	smmu->pgshift = (id & ARM_SMMU_ID1_PAGESIZE) ? 16 : 12;
1816 
1817 	/* Check for size mismatch of SMMU address space from mapped region */
1818 	size = 1 << (FIELD_GET(ARM_SMMU_ID1_NUMPAGENDXB, id) + 1);
1819 	if (smmu->numpage != 2 * size << smmu->pgshift)
1820 		dev_warn(smmu->dev,
1821 			"SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n",
1822 			2 * size << smmu->pgshift, smmu->numpage);
1823 	/* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */
1824 	smmu->numpage = size;
1825 
1826 	smmu->num_s2_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMS2CB, id);
1827 	smmu->num_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMCB, id);
1828 	if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1829 		dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1830 		return -ENODEV;
1831 	}
1832 	dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1833 		   smmu->num_context_banks, smmu->num_s2_context_banks);
1834 	smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1835 				 sizeof(*smmu->cbs), GFP_KERNEL);
1836 	if (!smmu->cbs)
1837 		return -ENOMEM;
1838 
1839 	/* ID2 */
1840 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID2);
1841 	size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_IAS, id));
1842 	smmu->ipa_size = size;
1843 
1844 	/* The output mask is also applied for bypass */
1845 	size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_OAS, id));
1846 	smmu->pa_size = size;
1847 
1848 	if (id & ARM_SMMU_ID2_VMID16)
1849 		smmu->features |= ARM_SMMU_FEAT_VMID16;
1850 
1851 	/*
1852 	 * What the page table walker can address actually depends on which
1853 	 * descriptor format is in use, but since a) we don't know that yet,
1854 	 * and b) it can vary per context bank, this will have to do...
1855 	 */
1856 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1857 		dev_warn(smmu->dev,
1858 			 "failed to set DMA mask for table walker\n");
1859 
1860 	if (smmu->version < ARM_SMMU_V2) {
1861 		smmu->va_size = smmu->ipa_size;
1862 		if (smmu->version == ARM_SMMU_V1_64K)
1863 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1864 	} else {
1865 		size = FIELD_GET(ARM_SMMU_ID2_UBS, id);
1866 		smmu->va_size = arm_smmu_id_size_to_bits(size);
1867 		if (id & ARM_SMMU_ID2_PTFS_4K)
1868 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1869 		if (id & ARM_SMMU_ID2_PTFS_16K)
1870 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1871 		if (id & ARM_SMMU_ID2_PTFS_64K)
1872 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1873 	}
1874 
1875 	if (smmu->impl && smmu->impl->cfg_probe) {
1876 		ret = smmu->impl->cfg_probe(smmu);
1877 		if (ret)
1878 			return ret;
1879 	}
1880 
1881 	/* Now we've corralled the various formats, what'll it do? */
1882 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1883 		smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1884 	if (smmu->features &
1885 	    (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1886 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1887 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1888 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1889 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1890 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1891 
1892 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
1893 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1894 	else
1895 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1896 	dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1897 		   smmu->pgsize_bitmap);
1898 
1899 
1900 	if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1901 		dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1902 			   smmu->va_size, smmu->ipa_size);
1903 
1904 	if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1905 		dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1906 			   smmu->ipa_size, smmu->pa_size);
1907 
1908 	return 0;
1909 }
1910 
1911 struct arm_smmu_match_data {
1912 	enum arm_smmu_arch_version version;
1913 	enum arm_smmu_implementation model;
1914 };
1915 
1916 #define ARM_SMMU_MATCH_DATA(name, ver, imp)	\
1917 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
1918 
1919 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1920 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1921 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1922 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1923 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1924 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
1925 
1926 static const struct of_device_id arm_smmu_of_match[] = {
1927 	{ .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1928 	{ .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1929 	{ .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1930 	{ .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1931 	{ .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1932 	{ .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1933 	{ .compatible = "nvidia,smmu-500", .data = &arm_mmu500 },
1934 	{ .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
1935 	{ },
1936 };
1937 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1938 
1939 #ifdef CONFIG_ACPI
1940 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1941 {
1942 	int ret = 0;
1943 
1944 	switch (model) {
1945 	case ACPI_IORT_SMMU_V1:
1946 	case ACPI_IORT_SMMU_CORELINK_MMU400:
1947 		smmu->version = ARM_SMMU_V1;
1948 		smmu->model = GENERIC_SMMU;
1949 		break;
1950 	case ACPI_IORT_SMMU_CORELINK_MMU401:
1951 		smmu->version = ARM_SMMU_V1_64K;
1952 		smmu->model = GENERIC_SMMU;
1953 		break;
1954 	case ACPI_IORT_SMMU_V2:
1955 		smmu->version = ARM_SMMU_V2;
1956 		smmu->model = GENERIC_SMMU;
1957 		break;
1958 	case ACPI_IORT_SMMU_CORELINK_MMU500:
1959 		smmu->version = ARM_SMMU_V2;
1960 		smmu->model = ARM_MMU500;
1961 		break;
1962 	case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1963 		smmu->version = ARM_SMMU_V2;
1964 		smmu->model = CAVIUM_SMMUV2;
1965 		break;
1966 	default:
1967 		ret = -ENODEV;
1968 	}
1969 
1970 	return ret;
1971 }
1972 
1973 static int arm_smmu_device_acpi_probe(struct arm_smmu_device *smmu,
1974 				      u32 *global_irqs, u32 *pmu_irqs)
1975 {
1976 	struct device *dev = smmu->dev;
1977 	struct acpi_iort_node *node =
1978 		*(struct acpi_iort_node **)dev_get_platdata(dev);
1979 	struct acpi_iort_smmu *iort_smmu;
1980 	int ret;
1981 
1982 	/* Retrieve SMMU1/2 specific data */
1983 	iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1984 
1985 	ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1986 	if (ret < 0)
1987 		return ret;
1988 
1989 	/* Ignore the configuration access interrupt */
1990 	*global_irqs = 1;
1991 	*pmu_irqs = 0;
1992 
1993 	if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
1994 		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1995 
1996 	return 0;
1997 }
1998 #else
1999 static inline int arm_smmu_device_acpi_probe(struct arm_smmu_device *smmu,
2000 					     u32 *global_irqs, u32 *pmu_irqs)
2001 {
2002 	return -ENODEV;
2003 }
2004 #endif
2005 
2006 static int arm_smmu_device_dt_probe(struct arm_smmu_device *smmu,
2007 				    u32 *global_irqs, u32 *pmu_irqs)
2008 {
2009 	const struct arm_smmu_match_data *data;
2010 	struct device *dev = smmu->dev;
2011 	bool legacy_binding;
2012 
2013 	if (of_property_read_u32(dev->of_node, "#global-interrupts", global_irqs))
2014 		return dev_err_probe(dev, -ENODEV,
2015 				     "missing #global-interrupts property\n");
2016 	*pmu_irqs = 0;
2017 
2018 	data = of_device_get_match_data(dev);
2019 	smmu->version = data->version;
2020 	smmu->model = data->model;
2021 
2022 	legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2023 	if (legacy_binding && !using_generic_binding) {
2024 		if (!using_legacy_binding) {
2025 			pr_notice("deprecated \"mmu-masters\" DT property in use; %s support unavailable\n",
2026 				  IS_ENABLED(CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS) ? "DMA API" : "SMMU");
2027 		}
2028 		using_legacy_binding = true;
2029 	} else if (!legacy_binding && !using_legacy_binding) {
2030 		using_generic_binding = true;
2031 	} else {
2032 		dev_err(dev, "not probing due to mismatched DT properties\n");
2033 		return -ENODEV;
2034 	}
2035 
2036 	if (of_dma_is_coherent(dev->of_node))
2037 		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2038 
2039 	return 0;
2040 }
2041 
2042 static void arm_smmu_rmr_install_bypass_smr(struct arm_smmu_device *smmu)
2043 {
2044 	struct list_head rmr_list;
2045 	struct iommu_resv_region *e;
2046 	int idx, cnt = 0;
2047 	u32 reg;
2048 
2049 	INIT_LIST_HEAD(&rmr_list);
2050 	iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
2051 
2052 	/*
2053 	 * Rather than trying to look at existing mappings that
2054 	 * are setup by the firmware and then invalidate the ones
2055 	 * that do no have matching RMR entries, just disable the
2056 	 * SMMU until it gets enabled again in the reset routine.
2057 	 */
2058 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
2059 	reg |= ARM_SMMU_sCR0_CLIENTPD;
2060 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
2061 
2062 	list_for_each_entry(e, &rmr_list, list) {
2063 		struct iommu_iort_rmr_data *rmr;
2064 		int i;
2065 
2066 		rmr = container_of(e, struct iommu_iort_rmr_data, rr);
2067 		for (i = 0; i < rmr->num_sids; i++) {
2068 			idx = arm_smmu_find_sme(smmu, rmr->sids[i], ~0);
2069 			if (idx < 0)
2070 				continue;
2071 
2072 			if (smmu->s2crs[idx].count == 0) {
2073 				smmu->smrs[idx].id = rmr->sids[i];
2074 				smmu->smrs[idx].mask = 0;
2075 				smmu->smrs[idx].valid = true;
2076 			}
2077 			smmu->s2crs[idx].count++;
2078 			smmu->s2crs[idx].type = S2CR_TYPE_BYPASS;
2079 			smmu->s2crs[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
2080 
2081 			cnt++;
2082 		}
2083 	}
2084 
2085 	dev_notice(smmu->dev, "\tpreserved %d boot mapping%s\n", cnt,
2086 		   cnt == 1 ? "" : "s");
2087 	iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
2088 }
2089 
2090 static int arm_smmu_device_probe(struct platform_device *pdev)
2091 {
2092 	struct resource *res;
2093 	struct arm_smmu_device *smmu;
2094 	struct device *dev = &pdev->dev;
2095 	int num_irqs, i, err;
2096 	u32 global_irqs, pmu_irqs;
2097 	irqreturn_t (*global_fault)(int irq, void *dev);
2098 
2099 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2100 	if (!smmu) {
2101 		dev_err(dev, "failed to allocate arm_smmu_device\n");
2102 		return -ENOMEM;
2103 	}
2104 	smmu->dev = dev;
2105 
2106 	if (dev->of_node)
2107 		err = arm_smmu_device_dt_probe(smmu, &global_irqs, &pmu_irqs);
2108 	else
2109 		err = arm_smmu_device_acpi_probe(smmu, &global_irqs, &pmu_irqs);
2110 	if (err)
2111 		return err;
2112 
2113 	smmu->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
2114 	if (IS_ERR(smmu->base))
2115 		return PTR_ERR(smmu->base);
2116 	smmu->ioaddr = res->start;
2117 
2118 	/*
2119 	 * The resource size should effectively match the value of SMMU_TOP;
2120 	 * stash that temporarily until we know PAGESIZE to validate it with.
2121 	 */
2122 	smmu->numpage = resource_size(res);
2123 
2124 	smmu = arm_smmu_impl_init(smmu);
2125 	if (IS_ERR(smmu))
2126 		return PTR_ERR(smmu);
2127 
2128 	num_irqs = platform_irq_count(pdev);
2129 
2130 	smmu->num_context_irqs = num_irqs - global_irqs - pmu_irqs;
2131 	if (smmu->num_context_irqs <= 0)
2132 		return dev_err_probe(dev, -ENODEV,
2133 				"found %d interrupts but expected at least %d\n",
2134 				num_irqs, global_irqs + pmu_irqs + 1);
2135 
2136 	smmu->irqs = devm_kcalloc(dev, smmu->num_context_irqs,
2137 				  sizeof(*smmu->irqs), GFP_KERNEL);
2138 	if (!smmu->irqs)
2139 		return dev_err_probe(dev, -ENOMEM, "failed to allocate %d irqs\n",
2140 				     smmu->num_context_irqs);
2141 
2142 	for (i = 0; i < smmu->num_context_irqs; i++) {
2143 		int irq = platform_get_irq(pdev, global_irqs + pmu_irqs + i);
2144 
2145 		if (irq < 0)
2146 			return irq;
2147 		smmu->irqs[i] = irq;
2148 	}
2149 
2150 	err = devm_clk_bulk_get_all(dev, &smmu->clks);
2151 	if (err < 0) {
2152 		dev_err(dev, "failed to get clocks %d\n", err);
2153 		return err;
2154 	}
2155 	smmu->num_clks = err;
2156 
2157 	err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
2158 	if (err)
2159 		return err;
2160 
2161 	err = arm_smmu_device_cfg_probe(smmu);
2162 	if (err)
2163 		return err;
2164 
2165 	if (smmu->version == ARM_SMMU_V2) {
2166 		if (smmu->num_context_banks > smmu->num_context_irqs) {
2167 			dev_err(dev,
2168 			      "found only %d context irq(s) but %d required\n",
2169 			      smmu->num_context_irqs, smmu->num_context_banks);
2170 			return -ENODEV;
2171 		}
2172 
2173 		/* Ignore superfluous interrupts */
2174 		smmu->num_context_irqs = smmu->num_context_banks;
2175 	}
2176 
2177 	if (smmu->impl && smmu->impl->global_fault)
2178 		global_fault = smmu->impl->global_fault;
2179 	else
2180 		global_fault = arm_smmu_global_fault;
2181 
2182 	for (i = 0; i < global_irqs; i++) {
2183 		int irq = platform_get_irq(pdev, i);
2184 
2185 		if (irq < 0)
2186 			return irq;
2187 
2188 		err = devm_request_irq(dev, irq, global_fault, IRQF_SHARED,
2189 				       "arm-smmu global fault", smmu);
2190 		if (err)
2191 			return dev_err_probe(dev, err,
2192 					"failed to request global IRQ %d (%u)\n",
2193 					i, irq);
2194 	}
2195 
2196 	err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2197 				     "smmu.%pa", &smmu->ioaddr);
2198 	if (err) {
2199 		dev_err(dev, "Failed to register iommu in sysfs\n");
2200 		return err;
2201 	}
2202 
2203 	err = iommu_device_register(&smmu->iommu, &arm_smmu_ops,
2204 				    using_legacy_binding ? NULL : dev);
2205 	if (err) {
2206 		dev_err(dev, "Failed to register iommu\n");
2207 		iommu_device_sysfs_remove(&smmu->iommu);
2208 		return err;
2209 	}
2210 
2211 	platform_set_drvdata(pdev, smmu);
2212 
2213 	/* Check for RMRs and install bypass SMRs if any */
2214 	arm_smmu_rmr_install_bypass_smr(smmu);
2215 
2216 	arm_smmu_device_reset(smmu);
2217 	arm_smmu_test_smr_masks(smmu);
2218 
2219 	/*
2220 	 * We want to avoid touching dev->power.lock in fastpaths unless
2221 	 * it's really going to do something useful - pm_runtime_enabled()
2222 	 * can serve as an ideal proxy for that decision. So, conditionally
2223 	 * enable pm_runtime.
2224 	 */
2225 	if (dev->pm_domain) {
2226 		pm_runtime_set_active(dev);
2227 		pm_runtime_enable(dev);
2228 	}
2229 
2230 	return 0;
2231 }
2232 
2233 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2234 {
2235 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2236 
2237 	if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2238 		dev_notice(&pdev->dev, "disabling translation\n");
2239 
2240 	arm_smmu_rpm_get(smmu);
2241 	/* Turn the thing off */
2242 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, ARM_SMMU_sCR0_CLIENTPD);
2243 	arm_smmu_rpm_put(smmu);
2244 
2245 	if (pm_runtime_enabled(smmu->dev))
2246 		pm_runtime_force_suspend(smmu->dev);
2247 	else
2248 		clk_bulk_disable(smmu->num_clks, smmu->clks);
2249 
2250 	clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2251 }
2252 
2253 static void arm_smmu_device_remove(struct platform_device *pdev)
2254 {
2255 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2256 
2257 	iommu_device_unregister(&smmu->iommu);
2258 	iommu_device_sysfs_remove(&smmu->iommu);
2259 
2260 	arm_smmu_device_shutdown(pdev);
2261 }
2262 
2263 static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2264 {
2265 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2266 	int ret;
2267 
2268 	ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
2269 	if (ret)
2270 		return ret;
2271 
2272 	arm_smmu_device_reset(smmu);
2273 
2274 	return 0;
2275 }
2276 
2277 static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2278 {
2279 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2280 
2281 	clk_bulk_disable(smmu->num_clks, smmu->clks);
2282 
2283 	return 0;
2284 }
2285 
2286 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2287 {
2288 	int ret;
2289 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2290 
2291 	ret = clk_bulk_prepare(smmu->num_clks, smmu->clks);
2292 	if (ret)
2293 		return ret;
2294 
2295 	if (pm_runtime_suspended(dev))
2296 		return 0;
2297 
2298 	ret = arm_smmu_runtime_resume(dev);
2299 	if (ret)
2300 		clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2301 
2302 	return ret;
2303 }
2304 
2305 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
2306 {
2307 	int ret = 0;
2308 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2309 
2310 	if (pm_runtime_suspended(dev))
2311 		goto clk_unprepare;
2312 
2313 	ret = arm_smmu_runtime_suspend(dev);
2314 	if (ret)
2315 		return ret;
2316 
2317 clk_unprepare:
2318 	clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2319 	return ret;
2320 }
2321 
2322 static const struct dev_pm_ops arm_smmu_pm_ops = {
2323 	SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
2324 	SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
2325 			   arm_smmu_runtime_resume, NULL)
2326 };
2327 
2328 static struct platform_driver arm_smmu_driver = {
2329 	.driver	= {
2330 		.name			= "arm-smmu",
2331 		.of_match_table		= arm_smmu_of_match,
2332 		.pm			= &arm_smmu_pm_ops,
2333 		.suppress_bind_attrs    = true,
2334 	},
2335 	.probe	= arm_smmu_device_probe,
2336 	.remove_new = arm_smmu_device_remove,
2337 	.shutdown = arm_smmu_device_shutdown,
2338 };
2339 module_platform_driver(arm_smmu_driver);
2340 
2341 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2342 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
2343 MODULE_ALIAS("platform:arm-smmu");
2344 MODULE_LICENSE("GPL v2");
2345