xref: /linux/drivers/iommu/arm/arm-smmu/arm-smmu.c (revision eb01fe7abbe2d0b38824d2a93fdb4cc3eaf2ccc1)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * IOMMU API for ARM architected SMMU implementations.
4  *
5  * Copyright (C) 2013 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver currently supports:
10  *	- SMMUv1 and v2 implementations
11  *	- Stream-matching and stream-indexing
12  *	- v7/v8 long-descriptor format
13  *	- Non-secure access to the SMMU
14  *	- Context fault reporting
15  *	- Extended Stream ID (16 bit)
16  */
17 
18 #define pr_fmt(fmt) "arm-smmu: " fmt
19 
20 #include <linux/acpi.h>
21 #include <linux/acpi_iort.h>
22 #include <linux/bitfield.h>
23 #include <linux/delay.h>
24 #include <linux/dma-mapping.h>
25 #include <linux/err.h>
26 #include <linux/interrupt.h>
27 #include <linux/io.h>
28 #include <linux/iopoll.h>
29 #include <linux/module.h>
30 #include <linux/of.h>
31 #include <linux/of_address.h>
32 #include <linux/pci.h>
33 #include <linux/platform_device.h>
34 #include <linux/pm_runtime.h>
35 #include <linux/ratelimit.h>
36 #include <linux/slab.h>
37 
38 #include <linux/fsl/mc.h>
39 
40 #include "arm-smmu.h"
41 #include "../../dma-iommu.h"
42 
43 /*
44  * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
45  * global register space are still, in fact, using a hypervisor to mediate it
46  * by trapping and emulating register accesses. Sadly, some deployed versions
47  * of said trapping code have bugs wherein they go horribly wrong for stores
48  * using r31 (i.e. XZR/WZR) as the source register.
49  */
50 #define QCOM_DUMMY_VAL -1
51 
52 #define MSI_IOVA_BASE			0x8000000
53 #define MSI_IOVA_LENGTH			0x100000
54 
55 static int force_stage;
56 module_param(force_stage, int, S_IRUGO);
57 MODULE_PARM_DESC(force_stage,
58 	"Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
59 static bool disable_bypass =
60 	IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
61 module_param(disable_bypass, bool, S_IRUGO);
62 MODULE_PARM_DESC(disable_bypass,
63 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
64 
65 #define s2cr_init_val (struct arm_smmu_s2cr){				\
66 	.type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,	\
67 }
68 
69 static bool using_legacy_binding, using_generic_binding;
70 
71 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
72 {
73 	if (pm_runtime_enabled(smmu->dev))
74 		return pm_runtime_resume_and_get(smmu->dev);
75 
76 	return 0;
77 }
78 
79 static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
80 {
81 	if (pm_runtime_enabled(smmu->dev))
82 		pm_runtime_put_autosuspend(smmu->dev);
83 }
84 
85 static void arm_smmu_rpm_use_autosuspend(struct arm_smmu_device *smmu)
86 {
87 	/*
88 	 * Setup an autosuspend delay to avoid bouncing runpm state.
89 	 * Otherwise, if a driver for a suspended consumer device
90 	 * unmaps buffers, it will runpm resume/suspend for each one.
91 	 *
92 	 * For example, when used by a GPU device, when an application
93 	 * or game exits, it can trigger unmapping 100s or 1000s of
94 	 * buffers.  With a runpm cycle for each buffer, that adds up
95 	 * to 5-10sec worth of reprogramming the context bank, while
96 	 * the system appears to be locked up to the user.
97 	 */
98 	pm_runtime_set_autosuspend_delay(smmu->dev, 20);
99 	pm_runtime_use_autosuspend(smmu->dev);
100 }
101 
102 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
103 {
104 	return container_of(dom, struct arm_smmu_domain, domain);
105 }
106 
107 static struct platform_driver arm_smmu_driver;
108 static struct iommu_ops arm_smmu_ops;
109 
110 #ifdef CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS
111 static struct device_node *dev_get_dev_node(struct device *dev)
112 {
113 	if (dev_is_pci(dev)) {
114 		struct pci_bus *bus = to_pci_dev(dev)->bus;
115 
116 		while (!pci_is_root_bus(bus))
117 			bus = bus->parent;
118 		return of_node_get(bus->bridge->parent->of_node);
119 	}
120 
121 	return of_node_get(dev->of_node);
122 }
123 
124 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
125 {
126 	*((__be32 *)data) = cpu_to_be32(alias);
127 	return 0; /* Continue walking */
128 }
129 
130 static int __find_legacy_master_phandle(struct device *dev, void *data)
131 {
132 	struct of_phandle_iterator *it = *(void **)data;
133 	struct device_node *np = it->node;
134 	int err;
135 
136 	of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
137 			    "#stream-id-cells", -1)
138 		if (it->node == np) {
139 			*(void **)data = dev;
140 			return 1;
141 		}
142 	it->node = np;
143 	return err == -ENOENT ? 0 : err;
144 }
145 
146 static int arm_smmu_register_legacy_master(struct device *dev,
147 					   struct arm_smmu_device **smmu)
148 {
149 	struct device *smmu_dev;
150 	struct device_node *np;
151 	struct of_phandle_iterator it;
152 	void *data = &it;
153 	u32 *sids;
154 	__be32 pci_sid;
155 	int err;
156 
157 	np = dev_get_dev_node(dev);
158 	if (!np || !of_property_present(np, "#stream-id-cells")) {
159 		of_node_put(np);
160 		return -ENODEV;
161 	}
162 
163 	it.node = np;
164 	err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
165 				     __find_legacy_master_phandle);
166 	smmu_dev = data;
167 	of_node_put(np);
168 	if (err == 0)
169 		return -ENODEV;
170 	if (err < 0)
171 		return err;
172 
173 	if (dev_is_pci(dev)) {
174 		/* "mmu-masters" assumes Stream ID == Requester ID */
175 		pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
176 				       &pci_sid);
177 		it.cur = &pci_sid;
178 		it.cur_count = 1;
179 	}
180 
181 	err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
182 				&arm_smmu_ops);
183 	if (err)
184 		return err;
185 
186 	sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
187 	if (!sids)
188 		return -ENOMEM;
189 
190 	*smmu = dev_get_drvdata(smmu_dev);
191 	of_phandle_iterator_args(&it, sids, it.cur_count);
192 	err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
193 	kfree(sids);
194 	return err;
195 }
196 #else
197 static int arm_smmu_register_legacy_master(struct device *dev,
198 					   struct arm_smmu_device **smmu)
199 {
200 	return -ENODEV;
201 }
202 #endif /* CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS */
203 
204 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
205 {
206 	clear_bit(idx, map);
207 }
208 
209 /* Wait for any pending TLB invalidations to complete */
210 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
211 				int sync, int status)
212 {
213 	unsigned int spin_cnt, delay;
214 	u32 reg;
215 
216 	if (smmu->impl && unlikely(smmu->impl->tlb_sync))
217 		return smmu->impl->tlb_sync(smmu, page, sync, status);
218 
219 	arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
220 	for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
221 		for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
222 			reg = arm_smmu_readl(smmu, page, status);
223 			if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
224 				return;
225 			cpu_relax();
226 		}
227 		udelay(delay);
228 	}
229 	dev_err_ratelimited(smmu->dev,
230 			    "TLB sync timed out -- SMMU may be deadlocked\n");
231 }
232 
233 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
234 {
235 	unsigned long flags;
236 
237 	spin_lock_irqsave(&smmu->global_sync_lock, flags);
238 	__arm_smmu_tlb_sync(smmu, ARM_SMMU_GR0, ARM_SMMU_GR0_sTLBGSYNC,
239 			    ARM_SMMU_GR0_sTLBGSTATUS);
240 	spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
241 }
242 
243 static void arm_smmu_tlb_sync_context(struct arm_smmu_domain *smmu_domain)
244 {
245 	struct arm_smmu_device *smmu = smmu_domain->smmu;
246 	unsigned long flags;
247 
248 	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
249 	__arm_smmu_tlb_sync(smmu, ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx),
250 			    ARM_SMMU_CB_TLBSYNC, ARM_SMMU_CB_TLBSTATUS);
251 	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
252 }
253 
254 static void arm_smmu_tlb_inv_context_s1(void *cookie)
255 {
256 	struct arm_smmu_domain *smmu_domain = cookie;
257 	/*
258 	 * The TLBI write may be relaxed, so ensure that PTEs cleared by the
259 	 * current CPU are visible beforehand.
260 	 */
261 	wmb();
262 	arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx,
263 			  ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid);
264 	arm_smmu_tlb_sync_context(smmu_domain);
265 }
266 
267 static void arm_smmu_tlb_inv_context_s2(void *cookie)
268 {
269 	struct arm_smmu_domain *smmu_domain = cookie;
270 	struct arm_smmu_device *smmu = smmu_domain->smmu;
271 
272 	/* See above */
273 	wmb();
274 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
275 	arm_smmu_tlb_sync_global(smmu);
276 }
277 
278 static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
279 				      size_t granule, void *cookie, int reg)
280 {
281 	struct arm_smmu_domain *smmu_domain = cookie;
282 	struct arm_smmu_device *smmu = smmu_domain->smmu;
283 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
284 	int idx = cfg->cbndx;
285 
286 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
287 		wmb();
288 
289 	if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
290 		iova = (iova >> 12) << 12;
291 		iova |= cfg->asid;
292 		do {
293 			arm_smmu_cb_write(smmu, idx, reg, iova);
294 			iova += granule;
295 		} while (size -= granule);
296 	} else {
297 		iova >>= 12;
298 		iova |= (u64)cfg->asid << 48;
299 		do {
300 			arm_smmu_cb_writeq(smmu, idx, reg, iova);
301 			iova += granule >> 12;
302 		} while (size -= granule);
303 	}
304 }
305 
306 static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
307 				      size_t granule, void *cookie, int reg)
308 {
309 	struct arm_smmu_domain *smmu_domain = cookie;
310 	struct arm_smmu_device *smmu = smmu_domain->smmu;
311 	int idx = smmu_domain->cfg.cbndx;
312 
313 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
314 		wmb();
315 
316 	iova >>= 12;
317 	do {
318 		if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)
319 			arm_smmu_cb_writeq(smmu, idx, reg, iova);
320 		else
321 			arm_smmu_cb_write(smmu, idx, reg, iova);
322 		iova += granule >> 12;
323 	} while (size -= granule);
324 }
325 
326 static void arm_smmu_tlb_inv_walk_s1(unsigned long iova, size_t size,
327 				     size_t granule, void *cookie)
328 {
329 	struct arm_smmu_domain *smmu_domain = cookie;
330 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
331 
332 	if (cfg->flush_walk_prefer_tlbiasid) {
333 		arm_smmu_tlb_inv_context_s1(cookie);
334 	} else {
335 		arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
336 					  ARM_SMMU_CB_S1_TLBIVA);
337 		arm_smmu_tlb_sync_context(cookie);
338 	}
339 }
340 
341 static void arm_smmu_tlb_add_page_s1(struct iommu_iotlb_gather *gather,
342 				     unsigned long iova, size_t granule,
343 				     void *cookie)
344 {
345 	arm_smmu_tlb_inv_range_s1(iova, granule, granule, cookie,
346 				  ARM_SMMU_CB_S1_TLBIVAL);
347 }
348 
349 static void arm_smmu_tlb_inv_walk_s2(unsigned long iova, size_t size,
350 				     size_t granule, void *cookie)
351 {
352 	arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
353 				  ARM_SMMU_CB_S2_TLBIIPAS2);
354 	arm_smmu_tlb_sync_context(cookie);
355 }
356 
357 static void arm_smmu_tlb_add_page_s2(struct iommu_iotlb_gather *gather,
358 				     unsigned long iova, size_t granule,
359 				     void *cookie)
360 {
361 	arm_smmu_tlb_inv_range_s2(iova, granule, granule, cookie,
362 				  ARM_SMMU_CB_S2_TLBIIPAS2L);
363 }
364 
365 static void arm_smmu_tlb_inv_walk_s2_v1(unsigned long iova, size_t size,
366 					size_t granule, void *cookie)
367 {
368 	arm_smmu_tlb_inv_context_s2(cookie);
369 }
370 /*
371  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
372  * almost negligible, but the benefit of getting the first one in as far ahead
373  * of the sync as possible is significant, hence we don't just make this a
374  * no-op and call arm_smmu_tlb_inv_context_s2() from .iotlb_sync as you might
375  * think.
376  */
377 static void arm_smmu_tlb_add_page_s2_v1(struct iommu_iotlb_gather *gather,
378 					unsigned long iova, size_t granule,
379 					void *cookie)
380 {
381 	struct arm_smmu_domain *smmu_domain = cookie;
382 	struct arm_smmu_device *smmu = smmu_domain->smmu;
383 
384 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
385 		wmb();
386 
387 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
388 }
389 
390 static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = {
391 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s1,
392 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s1,
393 	.tlb_add_page	= arm_smmu_tlb_add_page_s1,
394 };
395 
396 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
397 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
398 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s2,
399 	.tlb_add_page	= arm_smmu_tlb_add_page_s2,
400 };
401 
402 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
403 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
404 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s2_v1,
405 	.tlb_add_page	= arm_smmu_tlb_add_page_s2_v1,
406 };
407 
408 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
409 {
410 	u32 fsr, fsynr, cbfrsynra;
411 	unsigned long iova;
412 	struct arm_smmu_domain *smmu_domain = dev;
413 	struct arm_smmu_device *smmu = smmu_domain->smmu;
414 	int idx = smmu_domain->cfg.cbndx;
415 	int ret;
416 
417 	fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
418 	if (!(fsr & ARM_SMMU_FSR_FAULT))
419 		return IRQ_NONE;
420 
421 	fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
422 	iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
423 	cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
424 
425 	ret = report_iommu_fault(&smmu_domain->domain, NULL, iova,
426 		fsynr & ARM_SMMU_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
427 
428 	if (ret == -ENOSYS)
429 		dev_err_ratelimited(smmu->dev,
430 		"Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
431 			    fsr, iova, fsynr, cbfrsynra, idx);
432 
433 	arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
434 	return IRQ_HANDLED;
435 }
436 
437 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
438 {
439 	u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
440 	struct arm_smmu_device *smmu = dev;
441 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
442 				      DEFAULT_RATELIMIT_BURST);
443 
444 	gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
445 	gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
446 	gfsynr1 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR1);
447 	gfsynr2 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR2);
448 
449 	if (!gfsr)
450 		return IRQ_NONE;
451 
452 	if (__ratelimit(&rs)) {
453 		if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) &&
454 		    (gfsr & ARM_SMMU_sGFSR_USF))
455 			dev_err(smmu->dev,
456 				"Blocked unknown Stream ID 0x%hx; boot with \"arm-smmu.disable_bypass=0\" to allow, but this may have security implications\n",
457 				(u16)gfsynr1);
458 		else
459 			dev_err(smmu->dev,
460 				"Unexpected global fault, this could be serious\n");
461 		dev_err(smmu->dev,
462 			"\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
463 			gfsr, gfsynr0, gfsynr1, gfsynr2);
464 	}
465 
466 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
467 	return IRQ_HANDLED;
468 }
469 
470 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
471 				       struct io_pgtable_cfg *pgtbl_cfg)
472 {
473 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
474 	struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
475 	bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
476 
477 	cb->cfg = cfg;
478 
479 	/* TCR */
480 	if (stage1) {
481 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
482 			cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
483 		} else {
484 			cb->tcr[0] = arm_smmu_lpae_tcr(pgtbl_cfg);
485 			cb->tcr[1] = arm_smmu_lpae_tcr2(pgtbl_cfg);
486 			if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
487 				cb->tcr[1] |= ARM_SMMU_TCR2_AS;
488 			else
489 				cb->tcr[0] |= ARM_SMMU_TCR_EAE;
490 		}
491 	} else {
492 		cb->tcr[0] = arm_smmu_lpae_vtcr(pgtbl_cfg);
493 	}
494 
495 	/* TTBRs */
496 	if (stage1) {
497 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
498 			cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr;
499 			cb->ttbr[1] = 0;
500 		} else {
501 			cb->ttbr[0] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
502 						 cfg->asid);
503 			cb->ttbr[1] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
504 						 cfg->asid);
505 
506 			if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
507 				cb->ttbr[1] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
508 			else
509 				cb->ttbr[0] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
510 		}
511 	} else {
512 		cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
513 	}
514 
515 	/* MAIRs (stage-1 only) */
516 	if (stage1) {
517 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
518 			cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
519 			cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
520 		} else {
521 			cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair;
522 			cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair >> 32;
523 		}
524 	}
525 }
526 
527 void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
528 {
529 	u32 reg;
530 	bool stage1;
531 	struct arm_smmu_cb *cb = &smmu->cbs[idx];
532 	struct arm_smmu_cfg *cfg = cb->cfg;
533 
534 	/* Unassigned context banks only need disabling */
535 	if (!cfg) {
536 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, 0);
537 		return;
538 	}
539 
540 	stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
541 
542 	/* CBA2R */
543 	if (smmu->version > ARM_SMMU_V1) {
544 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
545 			reg = ARM_SMMU_CBA2R_VA64;
546 		else
547 			reg = 0;
548 		/* 16-bit VMIDs live in CBA2R */
549 		if (smmu->features & ARM_SMMU_FEAT_VMID16)
550 			reg |= FIELD_PREP(ARM_SMMU_CBA2R_VMID16, cfg->vmid);
551 
552 		arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBA2R(idx), reg);
553 	}
554 
555 	/* CBAR */
556 	reg = FIELD_PREP(ARM_SMMU_CBAR_TYPE, cfg->cbar);
557 	if (smmu->version < ARM_SMMU_V2)
558 		reg |= FIELD_PREP(ARM_SMMU_CBAR_IRPTNDX, cfg->irptndx);
559 
560 	/*
561 	 * Use the weakest shareability/memory types, so they are
562 	 * overridden by the ttbcr/pte.
563 	 */
564 	if (stage1) {
565 		reg |= FIELD_PREP(ARM_SMMU_CBAR_S1_BPSHCFG,
566 				  ARM_SMMU_CBAR_S1_BPSHCFG_NSH) |
567 		       FIELD_PREP(ARM_SMMU_CBAR_S1_MEMATTR,
568 				  ARM_SMMU_CBAR_S1_MEMATTR_WB);
569 	} else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
570 		/* 8-bit VMIDs live in CBAR */
571 		reg |= FIELD_PREP(ARM_SMMU_CBAR_VMID, cfg->vmid);
572 	}
573 	arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(idx), reg);
574 
575 	/*
576 	 * TCR
577 	 * We must write this before the TTBRs, since it determines the
578 	 * access behaviour of some fields (in particular, ASID[15:8]).
579 	 */
580 	if (stage1 && smmu->version > ARM_SMMU_V1)
581 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR2, cb->tcr[1]);
582 	arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR, cb->tcr[0]);
583 
584 	/* TTBRs */
585 	if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
586 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_CONTEXTIDR, cfg->asid);
587 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
588 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR1, cb->ttbr[1]);
589 	} else {
590 		arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
591 		if (stage1)
592 			arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR1,
593 					   cb->ttbr[1]);
594 	}
595 
596 	/* MAIRs (stage-1 only) */
597 	if (stage1) {
598 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR0, cb->mair[0]);
599 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR1, cb->mair[1]);
600 	}
601 
602 	/* SCTLR */
603 	reg = ARM_SMMU_SCTLR_CFIE | ARM_SMMU_SCTLR_CFRE | ARM_SMMU_SCTLR_AFE |
604 	      ARM_SMMU_SCTLR_TRE | ARM_SMMU_SCTLR_M;
605 	if (stage1)
606 		reg |= ARM_SMMU_SCTLR_S1_ASIDPNE;
607 	if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
608 		reg |= ARM_SMMU_SCTLR_E;
609 
610 	if (smmu->impl && smmu->impl->write_sctlr)
611 		smmu->impl->write_sctlr(smmu, idx, reg);
612 	else
613 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg);
614 }
615 
616 static int arm_smmu_alloc_context_bank(struct arm_smmu_domain *smmu_domain,
617 				       struct arm_smmu_device *smmu,
618 				       struct device *dev, unsigned int start)
619 {
620 	if (smmu->impl && smmu->impl->alloc_context_bank)
621 		return smmu->impl->alloc_context_bank(smmu_domain, smmu, dev, start);
622 
623 	return __arm_smmu_alloc_bitmap(smmu->context_map, start, smmu->num_context_banks);
624 }
625 
626 static int arm_smmu_init_domain_context(struct arm_smmu_domain *smmu_domain,
627 					struct arm_smmu_device *smmu,
628 					struct device *dev)
629 {
630 	int irq, start, ret = 0;
631 	unsigned long ias, oas;
632 	struct io_pgtable_ops *pgtbl_ops;
633 	struct io_pgtable_cfg pgtbl_cfg;
634 	enum io_pgtable_fmt fmt;
635 	struct iommu_domain *domain = &smmu_domain->domain;
636 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
637 	irqreturn_t (*context_fault)(int irq, void *dev);
638 
639 	mutex_lock(&smmu_domain->init_mutex);
640 	if (smmu_domain->smmu)
641 		goto out_unlock;
642 
643 	/*
644 	 * Mapping the requested stage onto what we support is surprisingly
645 	 * complicated, mainly because the spec allows S1+S2 SMMUs without
646 	 * support for nested translation. That means we end up with the
647 	 * following table:
648 	 *
649 	 * Requested        Supported        Actual
650 	 *     S1               N              S1
651 	 *     S1             S1+S2            S1
652 	 *     S1               S2             S2
653 	 *     S1               S1             S1
654 	 *     N                N              N
655 	 *     N              S1+S2            S2
656 	 *     N                S2             S2
657 	 *     N                S1             S1
658 	 *
659 	 * Note that you can't actually request stage-2 mappings.
660 	 */
661 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
662 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
663 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
664 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
665 
666 	/*
667 	 * Choosing a suitable context format is even more fiddly. Until we
668 	 * grow some way for the caller to express a preference, and/or move
669 	 * the decision into the io-pgtable code where it arguably belongs,
670 	 * just aim for the closest thing to the rest of the system, and hope
671 	 * that the hardware isn't esoteric enough that we can't assume AArch64
672 	 * support to be a superset of AArch32 support...
673 	 */
674 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
675 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
676 	if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
677 	    !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
678 	    (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
679 	    (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
680 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
681 	if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
682 	    (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
683 			       ARM_SMMU_FEAT_FMT_AARCH64_16K |
684 			       ARM_SMMU_FEAT_FMT_AARCH64_4K)))
685 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
686 
687 	if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
688 		ret = -EINVAL;
689 		goto out_unlock;
690 	}
691 
692 	switch (smmu_domain->stage) {
693 	case ARM_SMMU_DOMAIN_S1:
694 		cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
695 		start = smmu->num_s2_context_banks;
696 		ias = smmu->va_size;
697 		oas = smmu->ipa_size;
698 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
699 			fmt = ARM_64_LPAE_S1;
700 		} else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
701 			fmt = ARM_32_LPAE_S1;
702 			ias = min(ias, 32UL);
703 			oas = min(oas, 40UL);
704 		} else {
705 			fmt = ARM_V7S;
706 			ias = min(ias, 32UL);
707 			oas = min(oas, 32UL);
708 		}
709 		smmu_domain->flush_ops = &arm_smmu_s1_tlb_ops;
710 		break;
711 	case ARM_SMMU_DOMAIN_NESTED:
712 		/*
713 		 * We will likely want to change this if/when KVM gets
714 		 * involved.
715 		 */
716 	case ARM_SMMU_DOMAIN_S2:
717 		cfg->cbar = CBAR_TYPE_S2_TRANS;
718 		start = 0;
719 		ias = smmu->ipa_size;
720 		oas = smmu->pa_size;
721 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
722 			fmt = ARM_64_LPAE_S2;
723 		} else {
724 			fmt = ARM_32_LPAE_S2;
725 			ias = min(ias, 40UL);
726 			oas = min(oas, 40UL);
727 		}
728 		if (smmu->version == ARM_SMMU_V2)
729 			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v2;
730 		else
731 			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v1;
732 		break;
733 	default:
734 		ret = -EINVAL;
735 		goto out_unlock;
736 	}
737 
738 	ret = arm_smmu_alloc_context_bank(smmu_domain, smmu, dev, start);
739 	if (ret < 0) {
740 		goto out_unlock;
741 	}
742 
743 	smmu_domain->smmu = smmu;
744 
745 	cfg->cbndx = ret;
746 	if (smmu->version < ARM_SMMU_V2) {
747 		cfg->irptndx = atomic_inc_return(&smmu->irptndx);
748 		cfg->irptndx %= smmu->num_context_irqs;
749 	} else {
750 		cfg->irptndx = cfg->cbndx;
751 	}
752 
753 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
754 		cfg->vmid = cfg->cbndx + 1;
755 	else
756 		cfg->asid = cfg->cbndx;
757 
758 	pgtbl_cfg = (struct io_pgtable_cfg) {
759 		.pgsize_bitmap	= smmu->pgsize_bitmap,
760 		.ias		= ias,
761 		.oas		= oas,
762 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
763 		.tlb		= smmu_domain->flush_ops,
764 		.iommu_dev	= smmu->dev,
765 	};
766 
767 	if (smmu->impl && smmu->impl->init_context) {
768 		ret = smmu->impl->init_context(smmu_domain, &pgtbl_cfg, dev);
769 		if (ret)
770 			goto out_clear_smmu;
771 	}
772 
773 	if (smmu_domain->pgtbl_quirks)
774 		pgtbl_cfg.quirks |= smmu_domain->pgtbl_quirks;
775 
776 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
777 	if (!pgtbl_ops) {
778 		ret = -ENOMEM;
779 		goto out_clear_smmu;
780 	}
781 
782 	/* Update the domain's page sizes to reflect the page table format */
783 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
784 
785 	if (pgtbl_cfg.quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) {
786 		domain->geometry.aperture_start = ~0UL << ias;
787 		domain->geometry.aperture_end = ~0UL;
788 	} else {
789 		domain->geometry.aperture_end = (1UL << ias) - 1;
790 	}
791 
792 	domain->geometry.force_aperture = true;
793 
794 	/* Initialise the context bank with our page table cfg */
795 	arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
796 	arm_smmu_write_context_bank(smmu, cfg->cbndx);
797 
798 	/*
799 	 * Request context fault interrupt. Do this last to avoid the
800 	 * handler seeing a half-initialised domain state.
801 	 */
802 	irq = smmu->irqs[cfg->irptndx];
803 
804 	if (smmu->impl && smmu->impl->context_fault)
805 		context_fault = smmu->impl->context_fault;
806 	else
807 		context_fault = arm_smmu_context_fault;
808 
809 	ret = devm_request_irq(smmu->dev, irq, context_fault, IRQF_SHARED,
810 			       "arm-smmu-context-fault", smmu_domain);
811 	if (ret < 0) {
812 		dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
813 			cfg->irptndx, irq);
814 		cfg->irptndx = ARM_SMMU_INVALID_IRPTNDX;
815 	}
816 
817 	mutex_unlock(&smmu_domain->init_mutex);
818 
819 	/* Publish page table ops for map/unmap */
820 	smmu_domain->pgtbl_ops = pgtbl_ops;
821 	return 0;
822 
823 out_clear_smmu:
824 	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
825 	smmu_domain->smmu = NULL;
826 out_unlock:
827 	mutex_unlock(&smmu_domain->init_mutex);
828 	return ret;
829 }
830 
831 static void arm_smmu_destroy_domain_context(struct arm_smmu_domain *smmu_domain)
832 {
833 	struct arm_smmu_device *smmu = smmu_domain->smmu;
834 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
835 	int ret, irq;
836 
837 	if (!smmu)
838 		return;
839 
840 	ret = arm_smmu_rpm_get(smmu);
841 	if (ret < 0)
842 		return;
843 
844 	/*
845 	 * Disable the context bank and free the page tables before freeing
846 	 * it.
847 	 */
848 	smmu->cbs[cfg->cbndx].cfg = NULL;
849 	arm_smmu_write_context_bank(smmu, cfg->cbndx);
850 
851 	if (cfg->irptndx != ARM_SMMU_INVALID_IRPTNDX) {
852 		irq = smmu->irqs[cfg->irptndx];
853 		devm_free_irq(smmu->dev, irq, smmu_domain);
854 	}
855 
856 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
857 	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
858 
859 	arm_smmu_rpm_put(smmu);
860 }
861 
862 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
863 {
864 	struct arm_smmu_domain *smmu_domain;
865 
866 	if (type != IOMMU_DOMAIN_UNMANAGED) {
867 		if (using_legacy_binding || type != IOMMU_DOMAIN_DMA)
868 			return NULL;
869 	}
870 	/*
871 	 * Allocate the domain and initialise some of its data structures.
872 	 * We can't really do anything meaningful until we've added a
873 	 * master.
874 	 */
875 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
876 	if (!smmu_domain)
877 		return NULL;
878 
879 	mutex_init(&smmu_domain->init_mutex);
880 	spin_lock_init(&smmu_domain->cb_lock);
881 
882 	return &smmu_domain->domain;
883 }
884 
885 static void arm_smmu_domain_free(struct iommu_domain *domain)
886 {
887 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
888 
889 	/*
890 	 * Free the domain resources. We assume that all devices have
891 	 * already been detached.
892 	 */
893 	arm_smmu_destroy_domain_context(smmu_domain);
894 	kfree(smmu_domain);
895 }
896 
897 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
898 {
899 	struct arm_smmu_smr *smr = smmu->smrs + idx;
900 	u32 reg = FIELD_PREP(ARM_SMMU_SMR_ID, smr->id) |
901 		  FIELD_PREP(ARM_SMMU_SMR_MASK, smr->mask);
902 
903 	if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
904 		reg |= ARM_SMMU_SMR_VALID;
905 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(idx), reg);
906 }
907 
908 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
909 {
910 	struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
911 	u32 reg;
912 
913 	if (smmu->impl && smmu->impl->write_s2cr) {
914 		smmu->impl->write_s2cr(smmu, idx);
915 		return;
916 	}
917 
918 	reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, s2cr->type) |
919 	      FIELD_PREP(ARM_SMMU_S2CR_CBNDX, s2cr->cbndx) |
920 	      FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, s2cr->privcfg);
921 
922 	if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
923 	    smmu->smrs[idx].valid)
924 		reg |= ARM_SMMU_S2CR_EXIDVALID;
925 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg);
926 }
927 
928 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
929 {
930 	arm_smmu_write_s2cr(smmu, idx);
931 	if (smmu->smrs)
932 		arm_smmu_write_smr(smmu, idx);
933 }
934 
935 /*
936  * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
937  * should be called after sCR0 is written.
938  */
939 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
940 {
941 	u32 smr;
942 	int i;
943 
944 	if (!smmu->smrs)
945 		return;
946 	/*
947 	 * If we've had to accommodate firmware memory regions, we may
948 	 * have live SMRs by now; tread carefully...
949 	 *
950 	 * Somewhat perversely, not having a free SMR for this test implies we
951 	 * can get away without it anyway, as we'll only be able to 'allocate'
952 	 * these SMRs for the ID/mask values we're already trusting to be OK.
953 	 */
954 	for (i = 0; i < smmu->num_mapping_groups; i++)
955 		if (!smmu->smrs[i].valid)
956 			goto smr_ok;
957 	return;
958 smr_ok:
959 	/*
960 	 * SMR.ID bits may not be preserved if the corresponding MASK
961 	 * bits are set, so check each one separately. We can reject
962 	 * masters later if they try to claim IDs outside these masks.
963 	 */
964 	smr = FIELD_PREP(ARM_SMMU_SMR_ID, smmu->streamid_mask);
965 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
966 	smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
967 	smmu->streamid_mask = FIELD_GET(ARM_SMMU_SMR_ID, smr);
968 
969 	smr = FIELD_PREP(ARM_SMMU_SMR_MASK, smmu->streamid_mask);
970 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
971 	smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
972 	smmu->smr_mask_mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr);
973 }
974 
975 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
976 {
977 	struct arm_smmu_smr *smrs = smmu->smrs;
978 	int i, free_idx = -ENOSPC;
979 
980 	/* Stream indexing is blissfully easy */
981 	if (!smrs)
982 		return id;
983 
984 	/* Validating SMRs is... less so */
985 	for (i = 0; i < smmu->num_mapping_groups; ++i) {
986 		if (!smrs[i].valid) {
987 			/*
988 			 * Note the first free entry we come across, which
989 			 * we'll claim in the end if nothing else matches.
990 			 */
991 			if (free_idx < 0)
992 				free_idx = i;
993 			continue;
994 		}
995 		/*
996 		 * If the new entry is _entirely_ matched by an existing entry,
997 		 * then reuse that, with the guarantee that there also cannot
998 		 * be any subsequent conflicting entries. In normal use we'd
999 		 * expect simply identical entries for this case, but there's
1000 		 * no harm in accommodating the generalisation.
1001 		 */
1002 		if ((mask & smrs[i].mask) == mask &&
1003 		    !((id ^ smrs[i].id) & ~smrs[i].mask))
1004 			return i;
1005 		/*
1006 		 * If the new entry has any other overlap with an existing one,
1007 		 * though, then there always exists at least one stream ID
1008 		 * which would cause a conflict, and we can't allow that risk.
1009 		 */
1010 		if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1011 			return -EINVAL;
1012 	}
1013 
1014 	return free_idx;
1015 }
1016 
1017 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1018 {
1019 	if (--smmu->s2crs[idx].count)
1020 		return false;
1021 
1022 	smmu->s2crs[idx] = s2cr_init_val;
1023 	if (smmu->smrs)
1024 		smmu->smrs[idx].valid = false;
1025 
1026 	return true;
1027 }
1028 
1029 static int arm_smmu_master_alloc_smes(struct device *dev)
1030 {
1031 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1032 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1033 	struct arm_smmu_device *smmu = cfg->smmu;
1034 	struct arm_smmu_smr *smrs = smmu->smrs;
1035 	int i, idx, ret;
1036 
1037 	mutex_lock(&smmu->stream_map_mutex);
1038 	/* Figure out a viable stream map entry allocation */
1039 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1040 		u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1041 		u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1042 
1043 		if (idx != INVALID_SMENDX) {
1044 			ret = -EEXIST;
1045 			goto out_err;
1046 		}
1047 
1048 		ret = arm_smmu_find_sme(smmu, sid, mask);
1049 		if (ret < 0)
1050 			goto out_err;
1051 
1052 		idx = ret;
1053 		if (smrs && smmu->s2crs[idx].count == 0) {
1054 			smrs[idx].id = sid;
1055 			smrs[idx].mask = mask;
1056 			smrs[idx].valid = true;
1057 		}
1058 		smmu->s2crs[idx].count++;
1059 		cfg->smendx[i] = (s16)idx;
1060 	}
1061 
1062 	/* It worked! Now, poke the actual hardware */
1063 	for_each_cfg_sme(cfg, fwspec, i, idx)
1064 		arm_smmu_write_sme(smmu, idx);
1065 
1066 	mutex_unlock(&smmu->stream_map_mutex);
1067 	return 0;
1068 
1069 out_err:
1070 	while (i--) {
1071 		arm_smmu_free_sme(smmu, cfg->smendx[i]);
1072 		cfg->smendx[i] = INVALID_SMENDX;
1073 	}
1074 	mutex_unlock(&smmu->stream_map_mutex);
1075 	return ret;
1076 }
1077 
1078 static void arm_smmu_master_free_smes(struct arm_smmu_master_cfg *cfg,
1079 				      struct iommu_fwspec *fwspec)
1080 {
1081 	struct arm_smmu_device *smmu = cfg->smmu;
1082 	int i, idx;
1083 
1084 	mutex_lock(&smmu->stream_map_mutex);
1085 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1086 		if (arm_smmu_free_sme(smmu, idx))
1087 			arm_smmu_write_sme(smmu, idx);
1088 		cfg->smendx[i] = INVALID_SMENDX;
1089 	}
1090 	mutex_unlock(&smmu->stream_map_mutex);
1091 }
1092 
1093 static void arm_smmu_master_install_s2crs(struct arm_smmu_master_cfg *cfg,
1094 					  enum arm_smmu_s2cr_type type,
1095 					  u8 cbndx, struct iommu_fwspec *fwspec)
1096 {
1097 	struct arm_smmu_device *smmu = cfg->smmu;
1098 	struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1099 	int i, idx;
1100 
1101 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1102 		if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1103 			continue;
1104 
1105 		s2cr[idx].type = type;
1106 		s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1107 		s2cr[idx].cbndx = cbndx;
1108 		arm_smmu_write_s2cr(smmu, idx);
1109 	}
1110 }
1111 
1112 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1113 {
1114 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1115 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1116 	struct arm_smmu_master_cfg *cfg;
1117 	struct arm_smmu_device *smmu;
1118 	int ret;
1119 
1120 	/*
1121 	 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1122 	 * domains between of_xlate() and probe_device() - we have no way to cope
1123 	 * with that, so until ARM gets converted to rely on groups and default
1124 	 * domains, just say no (but more politely than by dereferencing NULL).
1125 	 * This should be at least a WARN_ON once that's sorted.
1126 	 */
1127 	cfg = dev_iommu_priv_get(dev);
1128 	if (!cfg)
1129 		return -ENODEV;
1130 
1131 	smmu = cfg->smmu;
1132 
1133 	ret = arm_smmu_rpm_get(smmu);
1134 	if (ret < 0)
1135 		return ret;
1136 
1137 	/* Ensure that the domain is finalised */
1138 	ret = arm_smmu_init_domain_context(smmu_domain, smmu, dev);
1139 	if (ret < 0)
1140 		goto rpm_put;
1141 
1142 	/*
1143 	 * Sanity check the domain. We don't support domains across
1144 	 * different SMMUs.
1145 	 */
1146 	if (smmu_domain->smmu != smmu) {
1147 		ret = -EINVAL;
1148 		goto rpm_put;
1149 	}
1150 
1151 	/* Looks ok, so add the device to the domain */
1152 	arm_smmu_master_install_s2crs(cfg, S2CR_TYPE_TRANS,
1153 				      smmu_domain->cfg.cbndx, fwspec);
1154 	arm_smmu_rpm_use_autosuspend(smmu);
1155 rpm_put:
1156 	arm_smmu_rpm_put(smmu);
1157 	return ret;
1158 }
1159 
1160 static int arm_smmu_attach_dev_type(struct device *dev,
1161 				    enum arm_smmu_s2cr_type type)
1162 {
1163 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1164 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1165 	struct arm_smmu_device *smmu;
1166 	int ret;
1167 
1168 	if (!cfg)
1169 		return -ENODEV;
1170 	smmu = cfg->smmu;
1171 
1172 	ret = arm_smmu_rpm_get(smmu);
1173 	if (ret < 0)
1174 		return ret;
1175 
1176 	arm_smmu_master_install_s2crs(cfg, type, 0, fwspec);
1177 	arm_smmu_rpm_use_autosuspend(smmu);
1178 	arm_smmu_rpm_put(smmu);
1179 	return 0;
1180 }
1181 
1182 static int arm_smmu_attach_dev_identity(struct iommu_domain *domain,
1183 					struct device *dev)
1184 {
1185 	return arm_smmu_attach_dev_type(dev, S2CR_TYPE_BYPASS);
1186 }
1187 
1188 static const struct iommu_domain_ops arm_smmu_identity_ops = {
1189 	.attach_dev = arm_smmu_attach_dev_identity,
1190 };
1191 
1192 static struct iommu_domain arm_smmu_identity_domain = {
1193 	.type = IOMMU_DOMAIN_IDENTITY,
1194 	.ops = &arm_smmu_identity_ops,
1195 };
1196 
1197 static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain,
1198 				       struct device *dev)
1199 {
1200 	return arm_smmu_attach_dev_type(dev, S2CR_TYPE_FAULT);
1201 }
1202 
1203 static const struct iommu_domain_ops arm_smmu_blocked_ops = {
1204 	.attach_dev = arm_smmu_attach_dev_blocked,
1205 };
1206 
1207 static struct iommu_domain arm_smmu_blocked_domain = {
1208 	.type = IOMMU_DOMAIN_BLOCKED,
1209 	.ops = &arm_smmu_blocked_ops,
1210 };
1211 
1212 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
1213 			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
1214 			      int prot, gfp_t gfp, size_t *mapped)
1215 {
1216 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1217 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1218 	int ret;
1219 
1220 	if (!ops)
1221 		return -ENODEV;
1222 
1223 	arm_smmu_rpm_get(smmu);
1224 	ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
1225 	arm_smmu_rpm_put(smmu);
1226 
1227 	return ret;
1228 }
1229 
1230 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
1231 				   size_t pgsize, size_t pgcount,
1232 				   struct iommu_iotlb_gather *iotlb_gather)
1233 {
1234 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1235 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1236 	size_t ret;
1237 
1238 	if (!ops)
1239 		return 0;
1240 
1241 	arm_smmu_rpm_get(smmu);
1242 	ret = ops->unmap_pages(ops, iova, pgsize, pgcount, iotlb_gather);
1243 	arm_smmu_rpm_put(smmu);
1244 
1245 	return ret;
1246 }
1247 
1248 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1249 {
1250 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1251 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1252 
1253 	if (smmu_domain->flush_ops) {
1254 		arm_smmu_rpm_get(smmu);
1255 		smmu_domain->flush_ops->tlb_flush_all(smmu_domain);
1256 		arm_smmu_rpm_put(smmu);
1257 	}
1258 }
1259 
1260 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
1261 				struct iommu_iotlb_gather *gather)
1262 {
1263 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1264 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1265 
1266 	if (!smmu)
1267 		return;
1268 
1269 	arm_smmu_rpm_get(smmu);
1270 	if (smmu->version == ARM_SMMU_V2 ||
1271 	    smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1272 		arm_smmu_tlb_sync_context(smmu_domain);
1273 	else
1274 		arm_smmu_tlb_sync_global(smmu);
1275 	arm_smmu_rpm_put(smmu);
1276 }
1277 
1278 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1279 					      dma_addr_t iova)
1280 {
1281 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1282 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1283 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1284 	struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1285 	struct device *dev = smmu->dev;
1286 	void __iomem *reg;
1287 	u32 tmp;
1288 	u64 phys;
1289 	unsigned long va, flags;
1290 	int ret, idx = cfg->cbndx;
1291 	phys_addr_t addr = 0;
1292 
1293 	ret = arm_smmu_rpm_get(smmu);
1294 	if (ret < 0)
1295 		return 0;
1296 
1297 	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1298 	va = iova & ~0xfffUL;
1299 	if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
1300 		arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1301 	else
1302 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1303 
1304 	reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR;
1305 	if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ARM_SMMU_ATSR_ACTIVE),
1306 				      5, 50)) {
1307 		spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1308 		dev_err(dev,
1309 			"iova to phys timed out on %pad. Falling back to software table walk.\n",
1310 			&iova);
1311 		arm_smmu_rpm_put(smmu);
1312 		return ops->iova_to_phys(ops, iova);
1313 	}
1314 
1315 	phys = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_PAR);
1316 	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1317 	if (phys & ARM_SMMU_CB_PAR_F) {
1318 		dev_err(dev, "translation fault!\n");
1319 		dev_err(dev, "PAR = 0x%llx\n", phys);
1320 		goto out;
1321 	}
1322 
1323 	addr = (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1324 out:
1325 	arm_smmu_rpm_put(smmu);
1326 
1327 	return addr;
1328 }
1329 
1330 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1331 					dma_addr_t iova)
1332 {
1333 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1334 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1335 
1336 	if (!ops)
1337 		return 0;
1338 
1339 	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1340 			smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1341 		return arm_smmu_iova_to_phys_hard(domain, iova);
1342 
1343 	return ops->iova_to_phys(ops, iova);
1344 }
1345 
1346 static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
1347 {
1348 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1349 
1350 	switch (cap) {
1351 	case IOMMU_CAP_CACHE_COHERENCY:
1352 		/*
1353 		 * It's overwhelmingly the case in practice that when the pagetable
1354 		 * walk interface is connected to a coherent interconnect, all the
1355 		 * translation interfaces are too. Furthermore if the device is
1356 		 * natively coherent, then its translation interface must also be.
1357 		 */
1358 		return cfg->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK ||
1359 			device_get_dma_attr(dev) == DEV_DMA_COHERENT;
1360 	case IOMMU_CAP_NOEXEC:
1361 	case IOMMU_CAP_DEFERRED_FLUSH:
1362 		return true;
1363 	default:
1364 		return false;
1365 	}
1366 }
1367 
1368 static
1369 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1370 {
1371 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
1372 							  fwnode);
1373 	put_device(dev);
1374 	return dev ? dev_get_drvdata(dev) : NULL;
1375 }
1376 
1377 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
1378 {
1379 	struct arm_smmu_device *smmu = NULL;
1380 	struct arm_smmu_master_cfg *cfg;
1381 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1382 	int i, ret;
1383 
1384 	if (using_legacy_binding) {
1385 		ret = arm_smmu_register_legacy_master(dev, &smmu);
1386 
1387 		/*
1388 		 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1389 		 * will allocate/initialise a new one. Thus we need to update fwspec for
1390 		 * later use.
1391 		 */
1392 		fwspec = dev_iommu_fwspec_get(dev);
1393 		if (ret)
1394 			goto out_free;
1395 	} else {
1396 		smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1397 	}
1398 
1399 	ret = -EINVAL;
1400 	for (i = 0; i < fwspec->num_ids; i++) {
1401 		u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1402 		u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1403 
1404 		if (sid & ~smmu->streamid_mask) {
1405 			dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1406 				sid, smmu->streamid_mask);
1407 			goto out_free;
1408 		}
1409 		if (mask & ~smmu->smr_mask_mask) {
1410 			dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1411 				mask, smmu->smr_mask_mask);
1412 			goto out_free;
1413 		}
1414 	}
1415 
1416 	ret = -ENOMEM;
1417 	cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1418 		      GFP_KERNEL);
1419 	if (!cfg)
1420 		goto out_free;
1421 
1422 	cfg->smmu = smmu;
1423 	dev_iommu_priv_set(dev, cfg);
1424 	while (i--)
1425 		cfg->smendx[i] = INVALID_SMENDX;
1426 
1427 	ret = arm_smmu_rpm_get(smmu);
1428 	if (ret < 0)
1429 		goto out_cfg_free;
1430 
1431 	ret = arm_smmu_master_alloc_smes(dev);
1432 	arm_smmu_rpm_put(smmu);
1433 
1434 	if (ret)
1435 		goto out_cfg_free;
1436 
1437 	device_link_add(dev, smmu->dev,
1438 			DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
1439 
1440 	return &smmu->iommu;
1441 
1442 out_cfg_free:
1443 	kfree(cfg);
1444 out_free:
1445 	iommu_fwspec_free(dev);
1446 	return ERR_PTR(ret);
1447 }
1448 
1449 static void arm_smmu_release_device(struct device *dev)
1450 {
1451 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1452 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1453 	int ret;
1454 
1455 	ret = arm_smmu_rpm_get(cfg->smmu);
1456 	if (ret < 0)
1457 		return;
1458 
1459 	arm_smmu_master_free_smes(cfg, fwspec);
1460 
1461 	arm_smmu_rpm_put(cfg->smmu);
1462 
1463 	kfree(cfg);
1464 }
1465 
1466 static void arm_smmu_probe_finalize(struct device *dev)
1467 {
1468 	struct arm_smmu_master_cfg *cfg;
1469 	struct arm_smmu_device *smmu;
1470 
1471 	cfg = dev_iommu_priv_get(dev);
1472 	smmu = cfg->smmu;
1473 
1474 	if (smmu->impl && smmu->impl->probe_finalize)
1475 		smmu->impl->probe_finalize(smmu, dev);
1476 }
1477 
1478 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1479 {
1480 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1481 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1482 	struct arm_smmu_device *smmu = cfg->smmu;
1483 	struct iommu_group *group = NULL;
1484 	int i, idx;
1485 
1486 	mutex_lock(&smmu->stream_map_mutex);
1487 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1488 		if (group && smmu->s2crs[idx].group &&
1489 		    group != smmu->s2crs[idx].group) {
1490 			mutex_unlock(&smmu->stream_map_mutex);
1491 			return ERR_PTR(-EINVAL);
1492 		}
1493 
1494 		group = smmu->s2crs[idx].group;
1495 	}
1496 
1497 	if (group) {
1498 		mutex_unlock(&smmu->stream_map_mutex);
1499 		return iommu_group_ref_get(group);
1500 	}
1501 
1502 	if (dev_is_pci(dev))
1503 		group = pci_device_group(dev);
1504 	else if (dev_is_fsl_mc(dev))
1505 		group = fsl_mc_device_group(dev);
1506 	else
1507 		group = generic_device_group(dev);
1508 
1509 	/* Remember group for faster lookups */
1510 	if (!IS_ERR(group))
1511 		for_each_cfg_sme(cfg, fwspec, i, idx)
1512 			smmu->s2crs[idx].group = group;
1513 
1514 	mutex_unlock(&smmu->stream_map_mutex);
1515 	return group;
1516 }
1517 
1518 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
1519 {
1520 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1521 	int ret = 0;
1522 
1523 	mutex_lock(&smmu_domain->init_mutex);
1524 	if (smmu_domain->smmu)
1525 		ret = -EPERM;
1526 	else
1527 		smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1528 	mutex_unlock(&smmu_domain->init_mutex);
1529 
1530 	return ret;
1531 }
1532 
1533 static int arm_smmu_set_pgtable_quirks(struct iommu_domain *domain,
1534 		unsigned long quirks)
1535 {
1536 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1537 	int ret = 0;
1538 
1539 	mutex_lock(&smmu_domain->init_mutex);
1540 	if (smmu_domain->smmu)
1541 		ret = -EPERM;
1542 	else
1543 		smmu_domain->pgtbl_quirks = quirks;
1544 	mutex_unlock(&smmu_domain->init_mutex);
1545 
1546 	return ret;
1547 }
1548 
1549 static int arm_smmu_of_xlate(struct device *dev,
1550 			     const struct of_phandle_args *args)
1551 {
1552 	u32 mask, fwid = 0;
1553 
1554 	if (args->args_count > 0)
1555 		fwid |= FIELD_PREP(ARM_SMMU_SMR_ID, args->args[0]);
1556 
1557 	if (args->args_count > 1)
1558 		fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, args->args[1]);
1559 	else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1560 		fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, mask);
1561 
1562 	return iommu_fwspec_add_ids(dev, &fwid, 1);
1563 }
1564 
1565 static void arm_smmu_get_resv_regions(struct device *dev,
1566 				      struct list_head *head)
1567 {
1568 	struct iommu_resv_region *region;
1569 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1570 
1571 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1572 					 prot, IOMMU_RESV_SW_MSI, GFP_KERNEL);
1573 	if (!region)
1574 		return;
1575 
1576 	list_add_tail(&region->list, head);
1577 
1578 	iommu_dma_get_resv_regions(dev, head);
1579 }
1580 
1581 static int arm_smmu_def_domain_type(struct device *dev)
1582 {
1583 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1584 	const struct arm_smmu_impl *impl = cfg->smmu->impl;
1585 
1586 	if (using_legacy_binding)
1587 		return IOMMU_DOMAIN_IDENTITY;
1588 
1589 	if (impl && impl->def_domain_type)
1590 		return impl->def_domain_type(dev);
1591 
1592 	return 0;
1593 }
1594 
1595 static struct iommu_ops arm_smmu_ops = {
1596 	.identity_domain	= &arm_smmu_identity_domain,
1597 	.blocked_domain		= &arm_smmu_blocked_domain,
1598 	.capable		= arm_smmu_capable,
1599 	.domain_alloc		= arm_smmu_domain_alloc,
1600 	.probe_device		= arm_smmu_probe_device,
1601 	.release_device		= arm_smmu_release_device,
1602 	.probe_finalize		= arm_smmu_probe_finalize,
1603 	.device_group		= arm_smmu_device_group,
1604 	.of_xlate		= arm_smmu_of_xlate,
1605 	.get_resv_regions	= arm_smmu_get_resv_regions,
1606 	.def_domain_type	= arm_smmu_def_domain_type,
1607 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
1608 	.owner			= THIS_MODULE,
1609 	.default_domain_ops = &(const struct iommu_domain_ops) {
1610 		.attach_dev		= arm_smmu_attach_dev,
1611 		.map_pages		= arm_smmu_map_pages,
1612 		.unmap_pages		= arm_smmu_unmap_pages,
1613 		.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
1614 		.iotlb_sync		= arm_smmu_iotlb_sync,
1615 		.iova_to_phys		= arm_smmu_iova_to_phys,
1616 		.enable_nesting		= arm_smmu_enable_nesting,
1617 		.set_pgtable_quirks	= arm_smmu_set_pgtable_quirks,
1618 		.free			= arm_smmu_domain_free,
1619 	}
1620 };
1621 
1622 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1623 {
1624 	int i;
1625 	u32 reg;
1626 
1627 	/* clear global FSR */
1628 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
1629 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, reg);
1630 
1631 	/*
1632 	 * Reset stream mapping groups: Initial values mark all SMRn as
1633 	 * invalid and all S2CRn as bypass unless overridden.
1634 	 */
1635 	for (i = 0; i < smmu->num_mapping_groups; ++i)
1636 		arm_smmu_write_sme(smmu, i);
1637 
1638 	/* Make sure all context banks are disabled and clear CB_FSR  */
1639 	for (i = 0; i < smmu->num_context_banks; ++i) {
1640 		arm_smmu_write_context_bank(smmu, i);
1641 		arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT);
1642 	}
1643 
1644 	/* Invalidate the TLB, just in case */
1645 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLH, QCOM_DUMMY_VAL);
1646 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLNSNH, QCOM_DUMMY_VAL);
1647 
1648 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
1649 
1650 	/* Enable fault reporting */
1651 	reg |= (ARM_SMMU_sCR0_GFRE | ARM_SMMU_sCR0_GFIE |
1652 		ARM_SMMU_sCR0_GCFGFRE | ARM_SMMU_sCR0_GCFGFIE);
1653 
1654 	/* Disable TLB broadcasting. */
1655 	reg |= (ARM_SMMU_sCR0_VMIDPNE | ARM_SMMU_sCR0_PTM);
1656 
1657 	/* Enable client access, handling unmatched streams as appropriate */
1658 	reg &= ~ARM_SMMU_sCR0_CLIENTPD;
1659 	if (disable_bypass)
1660 		reg |= ARM_SMMU_sCR0_USFCFG;
1661 	else
1662 		reg &= ~ARM_SMMU_sCR0_USFCFG;
1663 
1664 	/* Disable forced broadcasting */
1665 	reg &= ~ARM_SMMU_sCR0_FB;
1666 
1667 	/* Don't upgrade barriers */
1668 	reg &= ~(ARM_SMMU_sCR0_BSU);
1669 
1670 	if (smmu->features & ARM_SMMU_FEAT_VMID16)
1671 		reg |= ARM_SMMU_sCR0_VMID16EN;
1672 
1673 	if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1674 		reg |= ARM_SMMU_sCR0_EXIDENABLE;
1675 
1676 	if (smmu->impl && smmu->impl->reset)
1677 		smmu->impl->reset(smmu);
1678 
1679 	/* Push the button */
1680 	arm_smmu_tlb_sync_global(smmu);
1681 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
1682 }
1683 
1684 static int arm_smmu_id_size_to_bits(int size)
1685 {
1686 	switch (size) {
1687 	case 0:
1688 		return 32;
1689 	case 1:
1690 		return 36;
1691 	case 2:
1692 		return 40;
1693 	case 3:
1694 		return 42;
1695 	case 4:
1696 		return 44;
1697 	case 5:
1698 	default:
1699 		return 48;
1700 	}
1701 }
1702 
1703 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1704 {
1705 	unsigned int size;
1706 	u32 id;
1707 	bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1708 	int i, ret;
1709 
1710 	dev_notice(smmu->dev, "probing hardware configuration...\n");
1711 	dev_notice(smmu->dev, "SMMUv%d with:\n",
1712 			smmu->version == ARM_SMMU_V2 ? 2 : 1);
1713 
1714 	/* ID0 */
1715 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID0);
1716 
1717 	/* Restrict available stages based on module parameter */
1718 	if (force_stage == 1)
1719 		id &= ~(ARM_SMMU_ID0_S2TS | ARM_SMMU_ID0_NTS);
1720 	else if (force_stage == 2)
1721 		id &= ~(ARM_SMMU_ID0_S1TS | ARM_SMMU_ID0_NTS);
1722 
1723 	if (id & ARM_SMMU_ID0_S1TS) {
1724 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1725 		dev_notice(smmu->dev, "\tstage 1 translation\n");
1726 	}
1727 
1728 	if (id & ARM_SMMU_ID0_S2TS) {
1729 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1730 		dev_notice(smmu->dev, "\tstage 2 translation\n");
1731 	}
1732 
1733 	if (id & ARM_SMMU_ID0_NTS) {
1734 		smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1735 		dev_notice(smmu->dev, "\tnested translation\n");
1736 	}
1737 
1738 	if (!(smmu->features &
1739 		(ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1740 		dev_err(smmu->dev, "\tno translation support!\n");
1741 		return -ENODEV;
1742 	}
1743 
1744 	if ((id & ARM_SMMU_ID0_S1TS) &&
1745 	    ((smmu->version < ARM_SMMU_V2) || !(id & ARM_SMMU_ID0_ATOSNS))) {
1746 		smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1747 		dev_notice(smmu->dev, "\taddress translation ops\n");
1748 	}
1749 
1750 	/*
1751 	 * In order for DMA API calls to work properly, we must defer to what
1752 	 * the FW says about coherency, regardless of what the hardware claims.
1753 	 * Fortunately, this also opens up a workaround for systems where the
1754 	 * ID register value has ended up configured incorrectly.
1755 	 */
1756 	cttw_reg = !!(id & ARM_SMMU_ID0_CTTW);
1757 	if (cttw_fw || cttw_reg)
1758 		dev_notice(smmu->dev, "\t%scoherent table walk\n",
1759 			   cttw_fw ? "" : "non-");
1760 	if (cttw_fw != cttw_reg)
1761 		dev_notice(smmu->dev,
1762 			   "\t(IDR0.CTTW overridden by FW configuration)\n");
1763 
1764 	/* Max. number of entries we have for stream matching/indexing */
1765 	if (smmu->version == ARM_SMMU_V2 && id & ARM_SMMU_ID0_EXIDS) {
1766 		smmu->features |= ARM_SMMU_FEAT_EXIDS;
1767 		size = 1 << 16;
1768 	} else {
1769 		size = 1 << FIELD_GET(ARM_SMMU_ID0_NUMSIDB, id);
1770 	}
1771 	smmu->streamid_mask = size - 1;
1772 	if (id & ARM_SMMU_ID0_SMS) {
1773 		smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1774 		size = FIELD_GET(ARM_SMMU_ID0_NUMSMRG, id);
1775 		if (size == 0) {
1776 			dev_err(smmu->dev,
1777 				"stream-matching supported, but no SMRs present!\n");
1778 			return -ENODEV;
1779 		}
1780 
1781 		/* Zero-initialised to mark as invalid */
1782 		smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1783 					  GFP_KERNEL);
1784 		if (!smmu->smrs)
1785 			return -ENOMEM;
1786 
1787 		dev_notice(smmu->dev,
1788 			   "\tstream matching with %u register groups", size);
1789 	}
1790 	/* s2cr->type == 0 means translation, so initialise explicitly */
1791 	smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1792 					 GFP_KERNEL);
1793 	if (!smmu->s2crs)
1794 		return -ENOMEM;
1795 	for (i = 0; i < size; i++)
1796 		smmu->s2crs[i] = s2cr_init_val;
1797 
1798 	smmu->num_mapping_groups = size;
1799 	mutex_init(&smmu->stream_map_mutex);
1800 	spin_lock_init(&smmu->global_sync_lock);
1801 
1802 	if (smmu->version < ARM_SMMU_V2 ||
1803 	    !(id & ARM_SMMU_ID0_PTFS_NO_AARCH32)) {
1804 		smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1805 		if (!(id & ARM_SMMU_ID0_PTFS_NO_AARCH32S))
1806 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1807 	}
1808 
1809 	/* ID1 */
1810 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID1);
1811 	smmu->pgshift = (id & ARM_SMMU_ID1_PAGESIZE) ? 16 : 12;
1812 
1813 	/* Check for size mismatch of SMMU address space from mapped region */
1814 	size = 1 << (FIELD_GET(ARM_SMMU_ID1_NUMPAGENDXB, id) + 1);
1815 	if (smmu->numpage != 2 * size << smmu->pgshift)
1816 		dev_warn(smmu->dev,
1817 			"SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n",
1818 			2 * size << smmu->pgshift, smmu->numpage);
1819 	/* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */
1820 	smmu->numpage = size;
1821 
1822 	smmu->num_s2_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMS2CB, id);
1823 	smmu->num_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMCB, id);
1824 	if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1825 		dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1826 		return -ENODEV;
1827 	}
1828 	dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1829 		   smmu->num_context_banks, smmu->num_s2_context_banks);
1830 	smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1831 				 sizeof(*smmu->cbs), GFP_KERNEL);
1832 	if (!smmu->cbs)
1833 		return -ENOMEM;
1834 
1835 	/* ID2 */
1836 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID2);
1837 	size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_IAS, id));
1838 	smmu->ipa_size = size;
1839 
1840 	/* The output mask is also applied for bypass */
1841 	size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_OAS, id));
1842 	smmu->pa_size = size;
1843 
1844 	if (id & ARM_SMMU_ID2_VMID16)
1845 		smmu->features |= ARM_SMMU_FEAT_VMID16;
1846 
1847 	/*
1848 	 * What the page table walker can address actually depends on which
1849 	 * descriptor format is in use, but since a) we don't know that yet,
1850 	 * and b) it can vary per context bank, this will have to do...
1851 	 */
1852 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1853 		dev_warn(smmu->dev,
1854 			 "failed to set DMA mask for table walker\n");
1855 
1856 	if (smmu->version < ARM_SMMU_V2) {
1857 		smmu->va_size = smmu->ipa_size;
1858 		if (smmu->version == ARM_SMMU_V1_64K)
1859 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1860 	} else {
1861 		size = FIELD_GET(ARM_SMMU_ID2_UBS, id);
1862 		smmu->va_size = arm_smmu_id_size_to_bits(size);
1863 		if (id & ARM_SMMU_ID2_PTFS_4K)
1864 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1865 		if (id & ARM_SMMU_ID2_PTFS_16K)
1866 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1867 		if (id & ARM_SMMU_ID2_PTFS_64K)
1868 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1869 	}
1870 
1871 	if (smmu->impl && smmu->impl->cfg_probe) {
1872 		ret = smmu->impl->cfg_probe(smmu);
1873 		if (ret)
1874 			return ret;
1875 	}
1876 
1877 	/* Now we've corralled the various formats, what'll it do? */
1878 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1879 		smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1880 	if (smmu->features &
1881 	    (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1882 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1883 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1884 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1885 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1886 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1887 
1888 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
1889 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1890 	else
1891 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1892 	dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1893 		   smmu->pgsize_bitmap);
1894 
1895 
1896 	if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1897 		dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1898 			   smmu->va_size, smmu->ipa_size);
1899 
1900 	if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1901 		dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1902 			   smmu->ipa_size, smmu->pa_size);
1903 
1904 	return 0;
1905 }
1906 
1907 struct arm_smmu_match_data {
1908 	enum arm_smmu_arch_version version;
1909 	enum arm_smmu_implementation model;
1910 };
1911 
1912 #define ARM_SMMU_MATCH_DATA(name, ver, imp)	\
1913 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
1914 
1915 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1916 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1917 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1918 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1919 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1920 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
1921 
1922 static const struct of_device_id arm_smmu_of_match[] = {
1923 	{ .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1924 	{ .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1925 	{ .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1926 	{ .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1927 	{ .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1928 	{ .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1929 	{ .compatible = "nvidia,smmu-500", .data = &arm_mmu500 },
1930 	{ .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
1931 	{ },
1932 };
1933 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1934 
1935 #ifdef CONFIG_ACPI
1936 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1937 {
1938 	int ret = 0;
1939 
1940 	switch (model) {
1941 	case ACPI_IORT_SMMU_V1:
1942 	case ACPI_IORT_SMMU_CORELINK_MMU400:
1943 		smmu->version = ARM_SMMU_V1;
1944 		smmu->model = GENERIC_SMMU;
1945 		break;
1946 	case ACPI_IORT_SMMU_CORELINK_MMU401:
1947 		smmu->version = ARM_SMMU_V1_64K;
1948 		smmu->model = GENERIC_SMMU;
1949 		break;
1950 	case ACPI_IORT_SMMU_V2:
1951 		smmu->version = ARM_SMMU_V2;
1952 		smmu->model = GENERIC_SMMU;
1953 		break;
1954 	case ACPI_IORT_SMMU_CORELINK_MMU500:
1955 		smmu->version = ARM_SMMU_V2;
1956 		smmu->model = ARM_MMU500;
1957 		break;
1958 	case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1959 		smmu->version = ARM_SMMU_V2;
1960 		smmu->model = CAVIUM_SMMUV2;
1961 		break;
1962 	default:
1963 		ret = -ENODEV;
1964 	}
1965 
1966 	return ret;
1967 }
1968 
1969 static int arm_smmu_device_acpi_probe(struct arm_smmu_device *smmu,
1970 				      u32 *global_irqs, u32 *pmu_irqs)
1971 {
1972 	struct device *dev = smmu->dev;
1973 	struct acpi_iort_node *node =
1974 		*(struct acpi_iort_node **)dev_get_platdata(dev);
1975 	struct acpi_iort_smmu *iort_smmu;
1976 	int ret;
1977 
1978 	/* Retrieve SMMU1/2 specific data */
1979 	iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1980 
1981 	ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1982 	if (ret < 0)
1983 		return ret;
1984 
1985 	/* Ignore the configuration access interrupt */
1986 	*global_irqs = 1;
1987 	*pmu_irqs = 0;
1988 
1989 	if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
1990 		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1991 
1992 	return 0;
1993 }
1994 #else
1995 static inline int arm_smmu_device_acpi_probe(struct arm_smmu_device *smmu,
1996 					     u32 *global_irqs, u32 *pmu_irqs)
1997 {
1998 	return -ENODEV;
1999 }
2000 #endif
2001 
2002 static int arm_smmu_device_dt_probe(struct arm_smmu_device *smmu,
2003 				    u32 *global_irqs, u32 *pmu_irqs)
2004 {
2005 	const struct arm_smmu_match_data *data;
2006 	struct device *dev = smmu->dev;
2007 	bool legacy_binding;
2008 
2009 	if (of_property_read_u32(dev->of_node, "#global-interrupts", global_irqs))
2010 		return dev_err_probe(dev, -ENODEV,
2011 				     "missing #global-interrupts property\n");
2012 	*pmu_irqs = 0;
2013 
2014 	data = of_device_get_match_data(dev);
2015 	smmu->version = data->version;
2016 	smmu->model = data->model;
2017 
2018 	legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2019 	if (legacy_binding && !using_generic_binding) {
2020 		if (!using_legacy_binding) {
2021 			pr_notice("deprecated \"mmu-masters\" DT property in use; %s support unavailable\n",
2022 				  IS_ENABLED(CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS) ? "DMA API" : "SMMU");
2023 		}
2024 		using_legacy_binding = true;
2025 	} else if (!legacy_binding && !using_legacy_binding) {
2026 		using_generic_binding = true;
2027 	} else {
2028 		dev_err(dev, "not probing due to mismatched DT properties\n");
2029 		return -ENODEV;
2030 	}
2031 
2032 	if (of_dma_is_coherent(dev->of_node))
2033 		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2034 
2035 	return 0;
2036 }
2037 
2038 static void arm_smmu_rmr_install_bypass_smr(struct arm_smmu_device *smmu)
2039 {
2040 	struct list_head rmr_list;
2041 	struct iommu_resv_region *e;
2042 	int idx, cnt = 0;
2043 	u32 reg;
2044 
2045 	INIT_LIST_HEAD(&rmr_list);
2046 	iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
2047 
2048 	/*
2049 	 * Rather than trying to look at existing mappings that
2050 	 * are setup by the firmware and then invalidate the ones
2051 	 * that do no have matching RMR entries, just disable the
2052 	 * SMMU until it gets enabled again in the reset routine.
2053 	 */
2054 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
2055 	reg |= ARM_SMMU_sCR0_CLIENTPD;
2056 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
2057 
2058 	list_for_each_entry(e, &rmr_list, list) {
2059 		struct iommu_iort_rmr_data *rmr;
2060 		int i;
2061 
2062 		rmr = container_of(e, struct iommu_iort_rmr_data, rr);
2063 		for (i = 0; i < rmr->num_sids; i++) {
2064 			idx = arm_smmu_find_sme(smmu, rmr->sids[i], ~0);
2065 			if (idx < 0)
2066 				continue;
2067 
2068 			if (smmu->s2crs[idx].count == 0) {
2069 				smmu->smrs[idx].id = rmr->sids[i];
2070 				smmu->smrs[idx].mask = 0;
2071 				smmu->smrs[idx].valid = true;
2072 			}
2073 			smmu->s2crs[idx].count++;
2074 			smmu->s2crs[idx].type = S2CR_TYPE_BYPASS;
2075 			smmu->s2crs[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
2076 
2077 			cnt++;
2078 		}
2079 	}
2080 
2081 	dev_notice(smmu->dev, "\tpreserved %d boot mapping%s\n", cnt,
2082 		   cnt == 1 ? "" : "s");
2083 	iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
2084 }
2085 
2086 static int arm_smmu_device_probe(struct platform_device *pdev)
2087 {
2088 	struct resource *res;
2089 	struct arm_smmu_device *smmu;
2090 	struct device *dev = &pdev->dev;
2091 	int num_irqs, i, err;
2092 	u32 global_irqs, pmu_irqs;
2093 	irqreturn_t (*global_fault)(int irq, void *dev);
2094 
2095 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2096 	if (!smmu) {
2097 		dev_err(dev, "failed to allocate arm_smmu_device\n");
2098 		return -ENOMEM;
2099 	}
2100 	smmu->dev = dev;
2101 
2102 	if (dev->of_node)
2103 		err = arm_smmu_device_dt_probe(smmu, &global_irqs, &pmu_irqs);
2104 	else
2105 		err = arm_smmu_device_acpi_probe(smmu, &global_irqs, &pmu_irqs);
2106 	if (err)
2107 		return err;
2108 
2109 	smmu->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
2110 	if (IS_ERR(smmu->base))
2111 		return PTR_ERR(smmu->base);
2112 	smmu->ioaddr = res->start;
2113 
2114 	/*
2115 	 * The resource size should effectively match the value of SMMU_TOP;
2116 	 * stash that temporarily until we know PAGESIZE to validate it with.
2117 	 */
2118 	smmu->numpage = resource_size(res);
2119 
2120 	smmu = arm_smmu_impl_init(smmu);
2121 	if (IS_ERR(smmu))
2122 		return PTR_ERR(smmu);
2123 
2124 	num_irqs = platform_irq_count(pdev);
2125 
2126 	smmu->num_context_irqs = num_irqs - global_irqs - pmu_irqs;
2127 	if (smmu->num_context_irqs <= 0)
2128 		return dev_err_probe(dev, -ENODEV,
2129 				"found %d interrupts but expected at least %d\n",
2130 				num_irqs, global_irqs + pmu_irqs + 1);
2131 
2132 	smmu->irqs = devm_kcalloc(dev, smmu->num_context_irqs,
2133 				  sizeof(*smmu->irqs), GFP_KERNEL);
2134 	if (!smmu->irqs)
2135 		return dev_err_probe(dev, -ENOMEM, "failed to allocate %d irqs\n",
2136 				     smmu->num_context_irqs);
2137 
2138 	for (i = 0; i < smmu->num_context_irqs; i++) {
2139 		int irq = platform_get_irq(pdev, global_irqs + pmu_irqs + i);
2140 
2141 		if (irq < 0)
2142 			return irq;
2143 		smmu->irqs[i] = irq;
2144 	}
2145 
2146 	err = devm_clk_bulk_get_all(dev, &smmu->clks);
2147 	if (err < 0) {
2148 		dev_err(dev, "failed to get clocks %d\n", err);
2149 		return err;
2150 	}
2151 	smmu->num_clks = err;
2152 
2153 	err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
2154 	if (err)
2155 		return err;
2156 
2157 	err = arm_smmu_device_cfg_probe(smmu);
2158 	if (err)
2159 		return err;
2160 
2161 	if (smmu->version == ARM_SMMU_V2) {
2162 		if (smmu->num_context_banks > smmu->num_context_irqs) {
2163 			dev_err(dev,
2164 			      "found only %d context irq(s) but %d required\n",
2165 			      smmu->num_context_irqs, smmu->num_context_banks);
2166 			return -ENODEV;
2167 		}
2168 
2169 		/* Ignore superfluous interrupts */
2170 		smmu->num_context_irqs = smmu->num_context_banks;
2171 	}
2172 
2173 	if (smmu->impl && smmu->impl->global_fault)
2174 		global_fault = smmu->impl->global_fault;
2175 	else
2176 		global_fault = arm_smmu_global_fault;
2177 
2178 	for (i = 0; i < global_irqs; i++) {
2179 		int irq = platform_get_irq(pdev, i);
2180 
2181 		if (irq < 0)
2182 			return irq;
2183 
2184 		err = devm_request_irq(dev, irq, global_fault, IRQF_SHARED,
2185 				       "arm-smmu global fault", smmu);
2186 		if (err)
2187 			return dev_err_probe(dev, err,
2188 					"failed to request global IRQ %d (%u)\n",
2189 					i, irq);
2190 	}
2191 
2192 	err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2193 				     "smmu.%pa", &smmu->ioaddr);
2194 	if (err) {
2195 		dev_err(dev, "Failed to register iommu in sysfs\n");
2196 		return err;
2197 	}
2198 
2199 	err = iommu_device_register(&smmu->iommu, &arm_smmu_ops,
2200 				    using_legacy_binding ? NULL : dev);
2201 	if (err) {
2202 		dev_err(dev, "Failed to register iommu\n");
2203 		iommu_device_sysfs_remove(&smmu->iommu);
2204 		return err;
2205 	}
2206 
2207 	platform_set_drvdata(pdev, smmu);
2208 
2209 	/* Check for RMRs and install bypass SMRs if any */
2210 	arm_smmu_rmr_install_bypass_smr(smmu);
2211 
2212 	arm_smmu_device_reset(smmu);
2213 	arm_smmu_test_smr_masks(smmu);
2214 
2215 	/*
2216 	 * We want to avoid touching dev->power.lock in fastpaths unless
2217 	 * it's really going to do something useful - pm_runtime_enabled()
2218 	 * can serve as an ideal proxy for that decision. So, conditionally
2219 	 * enable pm_runtime.
2220 	 */
2221 	if (dev->pm_domain) {
2222 		pm_runtime_set_active(dev);
2223 		pm_runtime_enable(dev);
2224 	}
2225 
2226 	return 0;
2227 }
2228 
2229 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2230 {
2231 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2232 
2233 	if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2234 		dev_notice(&pdev->dev, "disabling translation\n");
2235 
2236 	arm_smmu_rpm_get(smmu);
2237 	/* Turn the thing off */
2238 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, ARM_SMMU_sCR0_CLIENTPD);
2239 	arm_smmu_rpm_put(smmu);
2240 
2241 	if (pm_runtime_enabled(smmu->dev))
2242 		pm_runtime_force_suspend(smmu->dev);
2243 	else
2244 		clk_bulk_disable(smmu->num_clks, smmu->clks);
2245 
2246 	clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2247 }
2248 
2249 static void arm_smmu_device_remove(struct platform_device *pdev)
2250 {
2251 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2252 
2253 	iommu_device_unregister(&smmu->iommu);
2254 	iommu_device_sysfs_remove(&smmu->iommu);
2255 
2256 	arm_smmu_device_shutdown(pdev);
2257 }
2258 
2259 static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2260 {
2261 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2262 	int ret;
2263 
2264 	ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
2265 	if (ret)
2266 		return ret;
2267 
2268 	arm_smmu_device_reset(smmu);
2269 
2270 	return 0;
2271 }
2272 
2273 static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2274 {
2275 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2276 
2277 	clk_bulk_disable(smmu->num_clks, smmu->clks);
2278 
2279 	return 0;
2280 }
2281 
2282 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2283 {
2284 	int ret;
2285 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2286 
2287 	ret = clk_bulk_prepare(smmu->num_clks, smmu->clks);
2288 	if (ret)
2289 		return ret;
2290 
2291 	if (pm_runtime_suspended(dev))
2292 		return 0;
2293 
2294 	ret = arm_smmu_runtime_resume(dev);
2295 	if (ret)
2296 		clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2297 
2298 	return ret;
2299 }
2300 
2301 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
2302 {
2303 	int ret = 0;
2304 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2305 
2306 	if (pm_runtime_suspended(dev))
2307 		goto clk_unprepare;
2308 
2309 	ret = arm_smmu_runtime_suspend(dev);
2310 	if (ret)
2311 		return ret;
2312 
2313 clk_unprepare:
2314 	clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2315 	return ret;
2316 }
2317 
2318 static const struct dev_pm_ops arm_smmu_pm_ops = {
2319 	SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
2320 	SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
2321 			   arm_smmu_runtime_resume, NULL)
2322 };
2323 
2324 static struct platform_driver arm_smmu_driver = {
2325 	.driver	= {
2326 		.name			= "arm-smmu",
2327 		.of_match_table		= arm_smmu_of_match,
2328 		.pm			= &arm_smmu_pm_ops,
2329 		.suppress_bind_attrs    = true,
2330 	},
2331 	.probe	= arm_smmu_device_probe,
2332 	.remove_new = arm_smmu_device_remove,
2333 	.shutdown = arm_smmu_device_shutdown,
2334 };
2335 module_platform_driver(arm_smmu_driver);
2336 
2337 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2338 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
2339 MODULE_ALIAS("platform:arm-smmu");
2340 MODULE_LICENSE("GPL v2");
2341