xref: /linux/drivers/iommu/iommu.c (revision 336b4dae6dfecc9aa53a3a68c71b9c1c1d466388)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
4   * Author: Joerg Roedel <jroedel@suse.de>
5   */
6  
7  #define pr_fmt(fmt)    "iommu: " fmt
8  
9  #include <linux/amba/bus.h>
10  #include <linux/device.h>
11  #include <linux/kernel.h>
12  #include <linux/bits.h>
13  #include <linux/bug.h>
14  #include <linux/types.h>
15  #include <linux/init.h>
16  #include <linux/export.h>
17  #include <linux/slab.h>
18  #include <linux/errno.h>
19  #include <linux/host1x_context_bus.h>
20  #include <linux/iommu.h>
21  #include <linux/idr.h>
22  #include <linux/err.h>
23  #include <linux/pci.h>
24  #include <linux/pci-ats.h>
25  #include <linux/bitops.h>
26  #include <linux/platform_device.h>
27  #include <linux/property.h>
28  #include <linux/fsl/mc.h>
29  #include <linux/module.h>
30  #include <linux/cc_platform.h>
31  #include <linux/cdx/cdx_bus.h>
32  #include <trace/events/iommu.h>
33  #include <linux/sched/mm.h>
34  #include <linux/msi.h>
35  #include <uapi/linux/iommufd.h>
36  
37  #include "dma-iommu.h"
38  #include "iommu-priv.h"
39  
40  static struct kset *iommu_group_kset;
41  static DEFINE_IDA(iommu_group_ida);
42  static DEFINE_IDA(iommu_global_pasid_ida);
43  
44  static unsigned int iommu_def_domain_type __read_mostly;
45  static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT);
46  static u32 iommu_cmd_line __read_mostly;
47  
48  /* Tags used with xa_tag_pointer() in group->pasid_array */
49  enum { IOMMU_PASID_ARRAY_DOMAIN = 0, IOMMU_PASID_ARRAY_HANDLE = 1 };
50  
51  struct iommu_group {
52  	struct kobject kobj;
53  	struct kobject *devices_kobj;
54  	struct list_head devices;
55  	struct xarray pasid_array;
56  	struct mutex mutex;
57  	void *iommu_data;
58  	void (*iommu_data_release)(void *iommu_data);
59  	char *name;
60  	int id;
61  	struct iommu_domain *default_domain;
62  	struct iommu_domain *blocking_domain;
63  	struct iommu_domain *domain;
64  	struct list_head entry;
65  	unsigned int owner_cnt;
66  	void *owner;
67  };
68  
69  struct group_device {
70  	struct list_head list;
71  	struct device *dev;
72  	char *name;
73  };
74  
75  /* Iterate over each struct group_device in a struct iommu_group */
76  #define for_each_group_device(group, pos) \
77  	list_for_each_entry(pos, &(group)->devices, list)
78  
79  struct iommu_group_attribute {
80  	struct attribute attr;
81  	ssize_t (*show)(struct iommu_group *group, char *buf);
82  	ssize_t (*store)(struct iommu_group *group,
83  			 const char *buf, size_t count);
84  };
85  
86  static const char * const iommu_group_resv_type_string[] = {
87  	[IOMMU_RESV_DIRECT]			= "direct",
88  	[IOMMU_RESV_DIRECT_RELAXABLE]		= "direct-relaxable",
89  	[IOMMU_RESV_RESERVED]			= "reserved",
90  	[IOMMU_RESV_MSI]			= "msi",
91  	[IOMMU_RESV_SW_MSI]			= "msi",
92  };
93  
94  #define IOMMU_CMD_LINE_DMA_API		BIT(0)
95  #define IOMMU_CMD_LINE_STRICT		BIT(1)
96  
97  static int bus_iommu_probe(const struct bus_type *bus);
98  static int iommu_bus_notifier(struct notifier_block *nb,
99  			      unsigned long action, void *data);
100  static void iommu_release_device(struct device *dev);
101  static int __iommu_attach_device(struct iommu_domain *domain,
102  				 struct device *dev);
103  static int __iommu_attach_group(struct iommu_domain *domain,
104  				struct iommu_group *group);
105  static struct iommu_domain *__iommu_paging_domain_alloc_flags(struct device *dev,
106  						       unsigned int type,
107  						       unsigned int flags);
108  
109  enum {
110  	IOMMU_SET_DOMAIN_MUST_SUCCEED = 1 << 0,
111  };
112  
113  static int __iommu_device_set_domain(struct iommu_group *group,
114  				     struct device *dev,
115  				     struct iommu_domain *new_domain,
116  				     unsigned int flags);
117  static int __iommu_group_set_domain_internal(struct iommu_group *group,
118  					     struct iommu_domain *new_domain,
119  					     unsigned int flags);
__iommu_group_set_domain(struct iommu_group * group,struct iommu_domain * new_domain)120  static int __iommu_group_set_domain(struct iommu_group *group,
121  				    struct iommu_domain *new_domain)
122  {
123  	return __iommu_group_set_domain_internal(group, new_domain, 0);
124  }
__iommu_group_set_domain_nofail(struct iommu_group * group,struct iommu_domain * new_domain)125  static void __iommu_group_set_domain_nofail(struct iommu_group *group,
126  					    struct iommu_domain *new_domain)
127  {
128  	WARN_ON(__iommu_group_set_domain_internal(
129  		group, new_domain, IOMMU_SET_DOMAIN_MUST_SUCCEED));
130  }
131  
132  static int iommu_setup_default_domain(struct iommu_group *group,
133  				      int target_type);
134  static int iommu_create_device_direct_mappings(struct iommu_domain *domain,
135  					       struct device *dev);
136  static ssize_t iommu_group_store_type(struct iommu_group *group,
137  				      const char *buf, size_t count);
138  static struct group_device *iommu_group_alloc_device(struct iommu_group *group,
139  						     struct device *dev);
140  static void __iommu_group_free_device(struct iommu_group *group,
141  				      struct group_device *grp_dev);
142  static void iommu_domain_init(struct iommu_domain *domain, unsigned int type,
143  			      const struct iommu_ops *ops);
144  
145  #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store)		\
146  struct iommu_group_attribute iommu_group_attr_##_name =		\
147  	__ATTR(_name, _mode, _show, _store)
148  
149  #define to_iommu_group_attr(_attr)	\
150  	container_of(_attr, struct iommu_group_attribute, attr)
151  #define to_iommu_group(_kobj)		\
152  	container_of(_kobj, struct iommu_group, kobj)
153  
154  static LIST_HEAD(iommu_device_list);
155  static DEFINE_SPINLOCK(iommu_device_lock);
156  
157  static const struct bus_type * const iommu_buses[] = {
158  	&platform_bus_type,
159  #ifdef CONFIG_PCI
160  	&pci_bus_type,
161  #endif
162  #ifdef CONFIG_ARM_AMBA
163  	&amba_bustype,
164  #endif
165  #ifdef CONFIG_FSL_MC_BUS
166  	&fsl_mc_bus_type,
167  #endif
168  #ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS
169  	&host1x_context_device_bus_type,
170  #endif
171  #ifdef CONFIG_CDX_BUS
172  	&cdx_bus_type,
173  #endif
174  };
175  
176  /*
177   * Use a function instead of an array here because the domain-type is a
178   * bit-field, so an array would waste memory.
179   */
iommu_domain_type_str(unsigned int t)180  static const char *iommu_domain_type_str(unsigned int t)
181  {
182  	switch (t) {
183  	case IOMMU_DOMAIN_BLOCKED:
184  		return "Blocked";
185  	case IOMMU_DOMAIN_IDENTITY:
186  		return "Passthrough";
187  	case IOMMU_DOMAIN_UNMANAGED:
188  		return "Unmanaged";
189  	case IOMMU_DOMAIN_DMA:
190  	case IOMMU_DOMAIN_DMA_FQ:
191  		return "Translated";
192  	case IOMMU_DOMAIN_PLATFORM:
193  		return "Platform";
194  	default:
195  		return "Unknown";
196  	}
197  }
198  
iommu_subsys_init(void)199  static int __init iommu_subsys_init(void)
200  {
201  	struct notifier_block *nb;
202  
203  	if (!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API)) {
204  		if (IS_ENABLED(CONFIG_IOMMU_DEFAULT_PASSTHROUGH))
205  			iommu_set_default_passthrough(false);
206  		else
207  			iommu_set_default_translated(false);
208  
209  		if (iommu_default_passthrough() && cc_platform_has(CC_ATTR_MEM_ENCRYPT)) {
210  			pr_info("Memory encryption detected - Disabling default IOMMU Passthrough\n");
211  			iommu_set_default_translated(false);
212  		}
213  	}
214  
215  	if (!iommu_default_passthrough() && !iommu_dma_strict)
216  		iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ;
217  
218  	pr_info("Default domain type: %s%s\n",
219  		iommu_domain_type_str(iommu_def_domain_type),
220  		(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ?
221  			" (set via kernel command line)" : "");
222  
223  	if (!iommu_default_passthrough())
224  		pr_info("DMA domain TLB invalidation policy: %s mode%s\n",
225  			iommu_dma_strict ? "strict" : "lazy",
226  			(iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ?
227  				" (set via kernel command line)" : "");
228  
229  	nb = kcalloc(ARRAY_SIZE(iommu_buses), sizeof(*nb), GFP_KERNEL);
230  	if (!nb)
231  		return -ENOMEM;
232  
233  	for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) {
234  		nb[i].notifier_call = iommu_bus_notifier;
235  		bus_register_notifier(iommu_buses[i], &nb[i]);
236  	}
237  
238  	return 0;
239  }
240  subsys_initcall(iommu_subsys_init);
241  
remove_iommu_group(struct device * dev,void * data)242  static int remove_iommu_group(struct device *dev, void *data)
243  {
244  	if (dev->iommu && dev->iommu->iommu_dev == data)
245  		iommu_release_device(dev);
246  
247  	return 0;
248  }
249  
250  /**
251   * iommu_device_register() - Register an IOMMU hardware instance
252   * @iommu: IOMMU handle for the instance
253   * @ops:   IOMMU ops to associate with the instance
254   * @hwdev: (optional) actual instance device, used for fwnode lookup
255   *
256   * Return: 0 on success, or an error.
257   */
iommu_device_register(struct iommu_device * iommu,const struct iommu_ops * ops,struct device * hwdev)258  int iommu_device_register(struct iommu_device *iommu,
259  			  const struct iommu_ops *ops, struct device *hwdev)
260  {
261  	int err = 0;
262  
263  	/* We need to be able to take module references appropriately */
264  	if (WARN_ON(is_module_address((unsigned long)ops) && !ops->owner))
265  		return -EINVAL;
266  
267  	iommu->ops = ops;
268  	if (hwdev)
269  		iommu->fwnode = dev_fwnode(hwdev);
270  
271  	spin_lock(&iommu_device_lock);
272  	list_add_tail(&iommu->list, &iommu_device_list);
273  	spin_unlock(&iommu_device_lock);
274  
275  	for (int i = 0; i < ARRAY_SIZE(iommu_buses) && !err; i++)
276  		err = bus_iommu_probe(iommu_buses[i]);
277  	if (err)
278  		iommu_device_unregister(iommu);
279  	return err;
280  }
281  EXPORT_SYMBOL_GPL(iommu_device_register);
282  
iommu_device_unregister(struct iommu_device * iommu)283  void iommu_device_unregister(struct iommu_device *iommu)
284  {
285  	for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++)
286  		bus_for_each_dev(iommu_buses[i], NULL, iommu, remove_iommu_group);
287  
288  	spin_lock(&iommu_device_lock);
289  	list_del(&iommu->list);
290  	spin_unlock(&iommu_device_lock);
291  
292  	/* Pairs with the alloc in generic_single_device_group() */
293  	iommu_group_put(iommu->singleton_group);
294  	iommu->singleton_group = NULL;
295  }
296  EXPORT_SYMBOL_GPL(iommu_device_unregister);
297  
298  #if IS_ENABLED(CONFIG_IOMMUFD_TEST)
iommu_device_unregister_bus(struct iommu_device * iommu,const struct bus_type * bus,struct notifier_block * nb)299  void iommu_device_unregister_bus(struct iommu_device *iommu,
300  				 const struct bus_type *bus,
301  				 struct notifier_block *nb)
302  {
303  	bus_unregister_notifier(bus, nb);
304  	iommu_device_unregister(iommu);
305  }
306  EXPORT_SYMBOL_GPL(iommu_device_unregister_bus);
307  
308  /*
309   * Register an iommu driver against a single bus. This is only used by iommufd
310   * selftest to create a mock iommu driver. The caller must provide
311   * some memory to hold a notifier_block.
312   */
iommu_device_register_bus(struct iommu_device * iommu,const struct iommu_ops * ops,const struct bus_type * bus,struct notifier_block * nb)313  int iommu_device_register_bus(struct iommu_device *iommu,
314  			      const struct iommu_ops *ops,
315  			      const struct bus_type *bus,
316  			      struct notifier_block *nb)
317  {
318  	int err;
319  
320  	iommu->ops = ops;
321  	nb->notifier_call = iommu_bus_notifier;
322  	err = bus_register_notifier(bus, nb);
323  	if (err)
324  		return err;
325  
326  	spin_lock(&iommu_device_lock);
327  	list_add_tail(&iommu->list, &iommu_device_list);
328  	spin_unlock(&iommu_device_lock);
329  
330  	err = bus_iommu_probe(bus);
331  	if (err) {
332  		iommu_device_unregister_bus(iommu, bus, nb);
333  		return err;
334  	}
335  	return 0;
336  }
337  EXPORT_SYMBOL_GPL(iommu_device_register_bus);
338  #endif
339  
dev_iommu_get(struct device * dev)340  static struct dev_iommu *dev_iommu_get(struct device *dev)
341  {
342  	struct dev_iommu *param = dev->iommu;
343  
344  	lockdep_assert_held(&iommu_probe_device_lock);
345  
346  	if (param)
347  		return param;
348  
349  	param = kzalloc(sizeof(*param), GFP_KERNEL);
350  	if (!param)
351  		return NULL;
352  
353  	mutex_init(&param->lock);
354  	dev->iommu = param;
355  	return param;
356  }
357  
dev_iommu_free(struct device * dev)358  void dev_iommu_free(struct device *dev)
359  {
360  	struct dev_iommu *param = dev->iommu;
361  
362  	dev->iommu = NULL;
363  	if (param->fwspec) {
364  		fwnode_handle_put(param->fwspec->iommu_fwnode);
365  		kfree(param->fwspec);
366  	}
367  	kfree(param);
368  }
369  
370  /*
371   * Internal equivalent of device_iommu_mapped() for when we care that a device
372   * actually has API ops, and don't want false positives from VFIO-only groups.
373   */
dev_has_iommu(struct device * dev)374  static bool dev_has_iommu(struct device *dev)
375  {
376  	return dev->iommu && dev->iommu->iommu_dev;
377  }
378  
dev_iommu_get_max_pasids(struct device * dev)379  static u32 dev_iommu_get_max_pasids(struct device *dev)
380  {
381  	u32 max_pasids = 0, bits = 0;
382  	int ret;
383  
384  	if (dev_is_pci(dev)) {
385  		ret = pci_max_pasids(to_pci_dev(dev));
386  		if (ret > 0)
387  			max_pasids = ret;
388  	} else {
389  		ret = device_property_read_u32(dev, "pasid-num-bits", &bits);
390  		if (!ret)
391  			max_pasids = 1UL << bits;
392  	}
393  
394  	return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids);
395  }
396  
dev_iommu_priv_set(struct device * dev,void * priv)397  void dev_iommu_priv_set(struct device *dev, void *priv)
398  {
399  	/* FSL_PAMU does something weird */
400  	if (!IS_ENABLED(CONFIG_FSL_PAMU))
401  		lockdep_assert_held(&iommu_probe_device_lock);
402  	dev->iommu->priv = priv;
403  }
404  EXPORT_SYMBOL_GPL(dev_iommu_priv_set);
405  
406  /*
407   * Init the dev->iommu and dev->iommu_group in the struct device and get the
408   * driver probed
409   */
iommu_init_device(struct device * dev)410  static int iommu_init_device(struct device *dev)
411  {
412  	const struct iommu_ops *ops;
413  	struct iommu_device *iommu_dev;
414  	struct iommu_group *group;
415  	int ret;
416  
417  	if (!dev_iommu_get(dev))
418  		return -ENOMEM;
419  	/*
420  	 * For FDT-based systems and ACPI IORT/VIOT, the common firmware parsing
421  	 * is buried in the bus dma_configure path. Properly unpicking that is
422  	 * still a big job, so for now just invoke the whole thing. The device
423  	 * already having a driver bound means dma_configure has already run and
424  	 * either found no IOMMU to wait for, or we're in its replay call right
425  	 * now, so either way there's no point calling it again.
426  	 */
427  	if (!dev->driver && dev->bus->dma_configure) {
428  		mutex_unlock(&iommu_probe_device_lock);
429  		dev->bus->dma_configure(dev);
430  		mutex_lock(&iommu_probe_device_lock);
431  	}
432  	/*
433  	 * At this point, relevant devices either now have a fwspec which will
434  	 * match ops registered with a non-NULL fwnode, or we can reasonably
435  	 * assume that only one of Intel, AMD, s390, PAMU or legacy SMMUv2 can
436  	 * be present, and that any of their registered instances has suitable
437  	 * ops for probing, and thus cheekily co-opt the same mechanism.
438  	 */
439  	ops = iommu_fwspec_ops(dev->iommu->fwspec);
440  	if (!ops) {
441  		ret = -ENODEV;
442  		goto err_free;
443  	}
444  
445  	if (!try_module_get(ops->owner)) {
446  		ret = -EINVAL;
447  		goto err_free;
448  	}
449  
450  	iommu_dev = ops->probe_device(dev);
451  	if (IS_ERR(iommu_dev)) {
452  		ret = PTR_ERR(iommu_dev);
453  		goto err_module_put;
454  	}
455  	dev->iommu->iommu_dev = iommu_dev;
456  
457  	ret = iommu_device_link(iommu_dev, dev);
458  	if (ret)
459  		goto err_release;
460  
461  	group = ops->device_group(dev);
462  	if (WARN_ON_ONCE(group == NULL))
463  		group = ERR_PTR(-EINVAL);
464  	if (IS_ERR(group)) {
465  		ret = PTR_ERR(group);
466  		goto err_unlink;
467  	}
468  	dev->iommu_group = group;
469  
470  	dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev);
471  	if (ops->is_attach_deferred)
472  		dev->iommu->attach_deferred = ops->is_attach_deferred(dev);
473  	return 0;
474  
475  err_unlink:
476  	iommu_device_unlink(iommu_dev, dev);
477  err_release:
478  	if (ops->release_device)
479  		ops->release_device(dev);
480  err_module_put:
481  	module_put(ops->owner);
482  err_free:
483  	dev->iommu->iommu_dev = NULL;
484  	dev_iommu_free(dev);
485  	return ret;
486  }
487  
iommu_deinit_device(struct device * dev)488  static void iommu_deinit_device(struct device *dev)
489  {
490  	struct iommu_group *group = dev->iommu_group;
491  	const struct iommu_ops *ops = dev_iommu_ops(dev);
492  
493  	lockdep_assert_held(&group->mutex);
494  
495  	iommu_device_unlink(dev->iommu->iommu_dev, dev);
496  
497  	/*
498  	 * release_device() must stop using any attached domain on the device.
499  	 * If there are still other devices in the group, they are not affected
500  	 * by this callback.
501  	 *
502  	 * If the iommu driver provides release_domain, the core code ensures
503  	 * that domain is attached prior to calling release_device. Drivers can
504  	 * use this to enforce a translation on the idle iommu. Typically, the
505  	 * global static blocked_domain is a good choice.
506  	 *
507  	 * Otherwise, the iommu driver must set the device to either an identity
508  	 * or a blocking translation in release_device() and stop using any
509  	 * domain pointer, as it is going to be freed.
510  	 *
511  	 * Regardless, if a delayed attach never occurred, then the release
512  	 * should still avoid touching any hardware configuration either.
513  	 */
514  	if (!dev->iommu->attach_deferred && ops->release_domain)
515  		ops->release_domain->ops->attach_dev(ops->release_domain, dev);
516  
517  	if (ops->release_device)
518  		ops->release_device(dev);
519  
520  	/*
521  	 * If this is the last driver to use the group then we must free the
522  	 * domains before we do the module_put().
523  	 */
524  	if (list_empty(&group->devices)) {
525  		if (group->default_domain) {
526  			iommu_domain_free(group->default_domain);
527  			group->default_domain = NULL;
528  		}
529  		if (group->blocking_domain) {
530  			iommu_domain_free(group->blocking_domain);
531  			group->blocking_domain = NULL;
532  		}
533  		group->domain = NULL;
534  	}
535  
536  	/* Caller must put iommu_group */
537  	dev->iommu_group = NULL;
538  	module_put(ops->owner);
539  	dev_iommu_free(dev);
540  }
541  
542  DEFINE_MUTEX(iommu_probe_device_lock);
543  
__iommu_probe_device(struct device * dev,struct list_head * group_list)544  static int __iommu_probe_device(struct device *dev, struct list_head *group_list)
545  {
546  	struct iommu_group *group;
547  	struct group_device *gdev;
548  	int ret;
549  
550  	/*
551  	 * Serialise to avoid races between IOMMU drivers registering in
552  	 * parallel and/or the "replay" calls from ACPI/OF code via client
553  	 * driver probe. Once the latter have been cleaned up we should
554  	 * probably be able to use device_lock() here to minimise the scope,
555  	 * but for now enforcing a simple global ordering is fine.
556  	 */
557  	lockdep_assert_held(&iommu_probe_device_lock);
558  
559  	/* Device is probed already if in a group */
560  	if (dev->iommu_group)
561  		return 0;
562  
563  	ret = iommu_init_device(dev);
564  	if (ret)
565  		return ret;
566  	/*
567  	 * And if we do now see any replay calls, they would indicate someone
568  	 * misusing the dma_configure path outside bus code.
569  	 */
570  	if (dev->driver)
571  		dev_WARN(dev, "late IOMMU probe at driver bind, something fishy here!\n");
572  
573  	group = dev->iommu_group;
574  	gdev = iommu_group_alloc_device(group, dev);
575  	mutex_lock(&group->mutex);
576  	if (IS_ERR(gdev)) {
577  		ret = PTR_ERR(gdev);
578  		goto err_put_group;
579  	}
580  
581  	/*
582  	 * The gdev must be in the list before calling
583  	 * iommu_setup_default_domain()
584  	 */
585  	list_add_tail(&gdev->list, &group->devices);
586  	WARN_ON(group->default_domain && !group->domain);
587  	if (group->default_domain)
588  		iommu_create_device_direct_mappings(group->default_domain, dev);
589  	if (group->domain) {
590  		ret = __iommu_device_set_domain(group, dev, group->domain, 0);
591  		if (ret)
592  			goto err_remove_gdev;
593  	} else if (!group->default_domain && !group_list) {
594  		ret = iommu_setup_default_domain(group, 0);
595  		if (ret)
596  			goto err_remove_gdev;
597  	} else if (!group->default_domain) {
598  		/*
599  		 * With a group_list argument we defer the default_domain setup
600  		 * to the caller by providing a de-duplicated list of groups
601  		 * that need further setup.
602  		 */
603  		if (list_empty(&group->entry))
604  			list_add_tail(&group->entry, group_list);
605  	}
606  
607  	if (group->default_domain)
608  		iommu_setup_dma_ops(dev);
609  
610  	mutex_unlock(&group->mutex);
611  
612  	return 0;
613  
614  err_remove_gdev:
615  	list_del(&gdev->list);
616  	__iommu_group_free_device(group, gdev);
617  err_put_group:
618  	iommu_deinit_device(dev);
619  	mutex_unlock(&group->mutex);
620  	iommu_group_put(group);
621  
622  	return ret;
623  }
624  
iommu_probe_device(struct device * dev)625  int iommu_probe_device(struct device *dev)
626  {
627  	const struct iommu_ops *ops;
628  	int ret;
629  
630  	mutex_lock(&iommu_probe_device_lock);
631  	ret = __iommu_probe_device(dev, NULL);
632  	mutex_unlock(&iommu_probe_device_lock);
633  	if (ret)
634  		return ret;
635  
636  	ops = dev_iommu_ops(dev);
637  	if (ops->probe_finalize)
638  		ops->probe_finalize(dev);
639  
640  	return 0;
641  }
642  
__iommu_group_free_device(struct iommu_group * group,struct group_device * grp_dev)643  static void __iommu_group_free_device(struct iommu_group *group,
644  				      struct group_device *grp_dev)
645  {
646  	struct device *dev = grp_dev->dev;
647  
648  	sysfs_remove_link(group->devices_kobj, grp_dev->name);
649  	sysfs_remove_link(&dev->kobj, "iommu_group");
650  
651  	trace_remove_device_from_group(group->id, dev);
652  
653  	/*
654  	 * If the group has become empty then ownership must have been
655  	 * released, and the current domain must be set back to NULL or
656  	 * the default domain.
657  	 */
658  	if (list_empty(&group->devices))
659  		WARN_ON(group->owner_cnt ||
660  			group->domain != group->default_domain);
661  
662  	kfree(grp_dev->name);
663  	kfree(grp_dev);
664  }
665  
666  /* Remove the iommu_group from the struct device. */
__iommu_group_remove_device(struct device * dev)667  static void __iommu_group_remove_device(struct device *dev)
668  {
669  	struct iommu_group *group = dev->iommu_group;
670  	struct group_device *device;
671  
672  	mutex_lock(&group->mutex);
673  	for_each_group_device(group, device) {
674  		if (device->dev != dev)
675  			continue;
676  
677  		list_del(&device->list);
678  		__iommu_group_free_device(group, device);
679  		if (dev_has_iommu(dev))
680  			iommu_deinit_device(dev);
681  		else
682  			dev->iommu_group = NULL;
683  		break;
684  	}
685  	mutex_unlock(&group->mutex);
686  
687  	/*
688  	 * Pairs with the get in iommu_init_device() or
689  	 * iommu_group_add_device()
690  	 */
691  	iommu_group_put(group);
692  }
693  
iommu_release_device(struct device * dev)694  static void iommu_release_device(struct device *dev)
695  {
696  	struct iommu_group *group = dev->iommu_group;
697  
698  	if (group)
699  		__iommu_group_remove_device(dev);
700  
701  	/* Free any fwspec if no iommu_driver was ever attached */
702  	if (dev->iommu)
703  		dev_iommu_free(dev);
704  }
705  
iommu_set_def_domain_type(char * str)706  static int __init iommu_set_def_domain_type(char *str)
707  {
708  	bool pt;
709  	int ret;
710  
711  	ret = kstrtobool(str, &pt);
712  	if (ret)
713  		return ret;
714  
715  	if (pt)
716  		iommu_set_default_passthrough(true);
717  	else
718  		iommu_set_default_translated(true);
719  
720  	return 0;
721  }
722  early_param("iommu.passthrough", iommu_set_def_domain_type);
723  
iommu_dma_setup(char * str)724  static int __init iommu_dma_setup(char *str)
725  {
726  	int ret = kstrtobool(str, &iommu_dma_strict);
727  
728  	if (!ret)
729  		iommu_cmd_line |= IOMMU_CMD_LINE_STRICT;
730  	return ret;
731  }
732  early_param("iommu.strict", iommu_dma_setup);
733  
iommu_set_dma_strict(void)734  void iommu_set_dma_strict(void)
735  {
736  	iommu_dma_strict = true;
737  	if (iommu_def_domain_type == IOMMU_DOMAIN_DMA_FQ)
738  		iommu_def_domain_type = IOMMU_DOMAIN_DMA;
739  }
740  
iommu_group_attr_show(struct kobject * kobj,struct attribute * __attr,char * buf)741  static ssize_t iommu_group_attr_show(struct kobject *kobj,
742  				     struct attribute *__attr, char *buf)
743  {
744  	struct iommu_group_attribute *attr = to_iommu_group_attr(__attr);
745  	struct iommu_group *group = to_iommu_group(kobj);
746  	ssize_t ret = -EIO;
747  
748  	if (attr->show)
749  		ret = attr->show(group, buf);
750  	return ret;
751  }
752  
iommu_group_attr_store(struct kobject * kobj,struct attribute * __attr,const char * buf,size_t count)753  static ssize_t iommu_group_attr_store(struct kobject *kobj,
754  				      struct attribute *__attr,
755  				      const char *buf, size_t count)
756  {
757  	struct iommu_group_attribute *attr = to_iommu_group_attr(__attr);
758  	struct iommu_group *group = to_iommu_group(kobj);
759  	ssize_t ret = -EIO;
760  
761  	if (attr->store)
762  		ret = attr->store(group, buf, count);
763  	return ret;
764  }
765  
766  static const struct sysfs_ops iommu_group_sysfs_ops = {
767  	.show = iommu_group_attr_show,
768  	.store = iommu_group_attr_store,
769  };
770  
iommu_group_create_file(struct iommu_group * group,struct iommu_group_attribute * attr)771  static int iommu_group_create_file(struct iommu_group *group,
772  				   struct iommu_group_attribute *attr)
773  {
774  	return sysfs_create_file(&group->kobj, &attr->attr);
775  }
776  
iommu_group_remove_file(struct iommu_group * group,struct iommu_group_attribute * attr)777  static void iommu_group_remove_file(struct iommu_group *group,
778  				    struct iommu_group_attribute *attr)
779  {
780  	sysfs_remove_file(&group->kobj, &attr->attr);
781  }
782  
iommu_group_show_name(struct iommu_group * group,char * buf)783  static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf)
784  {
785  	return sysfs_emit(buf, "%s\n", group->name);
786  }
787  
788  /**
789   * iommu_insert_resv_region - Insert a new region in the
790   * list of reserved regions.
791   * @new: new region to insert
792   * @regions: list of regions
793   *
794   * Elements are sorted by start address and overlapping segments
795   * of the same type are merged.
796   */
iommu_insert_resv_region(struct iommu_resv_region * new,struct list_head * regions)797  static int iommu_insert_resv_region(struct iommu_resv_region *new,
798  				    struct list_head *regions)
799  {
800  	struct iommu_resv_region *iter, *tmp, *nr, *top;
801  	LIST_HEAD(stack);
802  
803  	nr = iommu_alloc_resv_region(new->start, new->length,
804  				     new->prot, new->type, GFP_KERNEL);
805  	if (!nr)
806  		return -ENOMEM;
807  
808  	/* First add the new element based on start address sorting */
809  	list_for_each_entry(iter, regions, list) {
810  		if (nr->start < iter->start ||
811  		    (nr->start == iter->start && nr->type <= iter->type))
812  			break;
813  	}
814  	list_add_tail(&nr->list, &iter->list);
815  
816  	/* Merge overlapping segments of type nr->type in @regions, if any */
817  	list_for_each_entry_safe(iter, tmp, regions, list) {
818  		phys_addr_t top_end, iter_end = iter->start + iter->length - 1;
819  
820  		/* no merge needed on elements of different types than @new */
821  		if (iter->type != new->type) {
822  			list_move_tail(&iter->list, &stack);
823  			continue;
824  		}
825  
826  		/* look for the last stack element of same type as @iter */
827  		list_for_each_entry_reverse(top, &stack, list)
828  			if (top->type == iter->type)
829  				goto check_overlap;
830  
831  		list_move_tail(&iter->list, &stack);
832  		continue;
833  
834  check_overlap:
835  		top_end = top->start + top->length - 1;
836  
837  		if (iter->start > top_end + 1) {
838  			list_move_tail(&iter->list, &stack);
839  		} else {
840  			top->length = max(top_end, iter_end) - top->start + 1;
841  			list_del(&iter->list);
842  			kfree(iter);
843  		}
844  	}
845  	list_splice(&stack, regions);
846  	return 0;
847  }
848  
849  static int
iommu_insert_device_resv_regions(struct list_head * dev_resv_regions,struct list_head * group_resv_regions)850  iommu_insert_device_resv_regions(struct list_head *dev_resv_regions,
851  				 struct list_head *group_resv_regions)
852  {
853  	struct iommu_resv_region *entry;
854  	int ret = 0;
855  
856  	list_for_each_entry(entry, dev_resv_regions, list) {
857  		ret = iommu_insert_resv_region(entry, group_resv_regions);
858  		if (ret)
859  			break;
860  	}
861  	return ret;
862  }
863  
iommu_get_group_resv_regions(struct iommu_group * group,struct list_head * head)864  int iommu_get_group_resv_regions(struct iommu_group *group,
865  				 struct list_head *head)
866  {
867  	struct group_device *device;
868  	int ret = 0;
869  
870  	mutex_lock(&group->mutex);
871  	for_each_group_device(group, device) {
872  		struct list_head dev_resv_regions;
873  
874  		/*
875  		 * Non-API groups still expose reserved_regions in sysfs,
876  		 * so filter out calls that get here that way.
877  		 */
878  		if (!dev_has_iommu(device->dev))
879  			break;
880  
881  		INIT_LIST_HEAD(&dev_resv_regions);
882  		iommu_get_resv_regions(device->dev, &dev_resv_regions);
883  		ret = iommu_insert_device_resv_regions(&dev_resv_regions, head);
884  		iommu_put_resv_regions(device->dev, &dev_resv_regions);
885  		if (ret)
886  			break;
887  	}
888  	mutex_unlock(&group->mutex);
889  	return ret;
890  }
891  EXPORT_SYMBOL_GPL(iommu_get_group_resv_regions);
892  
iommu_group_show_resv_regions(struct iommu_group * group,char * buf)893  static ssize_t iommu_group_show_resv_regions(struct iommu_group *group,
894  					     char *buf)
895  {
896  	struct iommu_resv_region *region, *next;
897  	struct list_head group_resv_regions;
898  	int offset = 0;
899  
900  	INIT_LIST_HEAD(&group_resv_regions);
901  	iommu_get_group_resv_regions(group, &group_resv_regions);
902  
903  	list_for_each_entry_safe(region, next, &group_resv_regions, list) {
904  		offset += sysfs_emit_at(buf, offset, "0x%016llx 0x%016llx %s\n",
905  					(long long)region->start,
906  					(long long)(region->start +
907  						    region->length - 1),
908  					iommu_group_resv_type_string[region->type]);
909  		kfree(region);
910  	}
911  
912  	return offset;
913  }
914  
iommu_group_show_type(struct iommu_group * group,char * buf)915  static ssize_t iommu_group_show_type(struct iommu_group *group,
916  				     char *buf)
917  {
918  	char *type = "unknown";
919  
920  	mutex_lock(&group->mutex);
921  	if (group->default_domain) {
922  		switch (group->default_domain->type) {
923  		case IOMMU_DOMAIN_BLOCKED:
924  			type = "blocked";
925  			break;
926  		case IOMMU_DOMAIN_IDENTITY:
927  			type = "identity";
928  			break;
929  		case IOMMU_DOMAIN_UNMANAGED:
930  			type = "unmanaged";
931  			break;
932  		case IOMMU_DOMAIN_DMA:
933  			type = "DMA";
934  			break;
935  		case IOMMU_DOMAIN_DMA_FQ:
936  			type = "DMA-FQ";
937  			break;
938  		}
939  	}
940  	mutex_unlock(&group->mutex);
941  
942  	return sysfs_emit(buf, "%s\n", type);
943  }
944  
945  static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL);
946  
947  static IOMMU_GROUP_ATTR(reserved_regions, 0444,
948  			iommu_group_show_resv_regions, NULL);
949  
950  static IOMMU_GROUP_ATTR(type, 0644, iommu_group_show_type,
951  			iommu_group_store_type);
952  
iommu_group_release(struct kobject * kobj)953  static void iommu_group_release(struct kobject *kobj)
954  {
955  	struct iommu_group *group = to_iommu_group(kobj);
956  
957  	pr_debug("Releasing group %d\n", group->id);
958  
959  	if (group->iommu_data_release)
960  		group->iommu_data_release(group->iommu_data);
961  
962  	ida_free(&iommu_group_ida, group->id);
963  
964  	/* Domains are free'd by iommu_deinit_device() */
965  	WARN_ON(group->default_domain);
966  	WARN_ON(group->blocking_domain);
967  
968  	kfree(group->name);
969  	kfree(group);
970  }
971  
972  static const struct kobj_type iommu_group_ktype = {
973  	.sysfs_ops = &iommu_group_sysfs_ops,
974  	.release = iommu_group_release,
975  };
976  
977  /**
978   * iommu_group_alloc - Allocate a new group
979   *
980   * This function is called by an iommu driver to allocate a new iommu
981   * group.  The iommu group represents the minimum granularity of the iommu.
982   * Upon successful return, the caller holds a reference to the supplied
983   * group in order to hold the group until devices are added.  Use
984   * iommu_group_put() to release this extra reference count, allowing the
985   * group to be automatically reclaimed once it has no devices or external
986   * references.
987   */
iommu_group_alloc(void)988  struct iommu_group *iommu_group_alloc(void)
989  {
990  	struct iommu_group *group;
991  	int ret;
992  
993  	group = kzalloc(sizeof(*group), GFP_KERNEL);
994  	if (!group)
995  		return ERR_PTR(-ENOMEM);
996  
997  	group->kobj.kset = iommu_group_kset;
998  	mutex_init(&group->mutex);
999  	INIT_LIST_HEAD(&group->devices);
1000  	INIT_LIST_HEAD(&group->entry);
1001  	xa_init(&group->pasid_array);
1002  
1003  	ret = ida_alloc(&iommu_group_ida, GFP_KERNEL);
1004  	if (ret < 0) {
1005  		kfree(group);
1006  		return ERR_PTR(ret);
1007  	}
1008  	group->id = ret;
1009  
1010  	ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype,
1011  				   NULL, "%d", group->id);
1012  	if (ret) {
1013  		kobject_put(&group->kobj);
1014  		return ERR_PTR(ret);
1015  	}
1016  
1017  	group->devices_kobj = kobject_create_and_add("devices", &group->kobj);
1018  	if (!group->devices_kobj) {
1019  		kobject_put(&group->kobj); /* triggers .release & free */
1020  		return ERR_PTR(-ENOMEM);
1021  	}
1022  
1023  	/*
1024  	 * The devices_kobj holds a reference on the group kobject, so
1025  	 * as long as that exists so will the group.  We can therefore
1026  	 * use the devices_kobj for reference counting.
1027  	 */
1028  	kobject_put(&group->kobj);
1029  
1030  	ret = iommu_group_create_file(group,
1031  				      &iommu_group_attr_reserved_regions);
1032  	if (ret) {
1033  		kobject_put(group->devices_kobj);
1034  		return ERR_PTR(ret);
1035  	}
1036  
1037  	ret = iommu_group_create_file(group, &iommu_group_attr_type);
1038  	if (ret) {
1039  		kobject_put(group->devices_kobj);
1040  		return ERR_PTR(ret);
1041  	}
1042  
1043  	pr_debug("Allocated group %d\n", group->id);
1044  
1045  	return group;
1046  }
1047  EXPORT_SYMBOL_GPL(iommu_group_alloc);
1048  
1049  /**
1050   * iommu_group_get_iommudata - retrieve iommu_data registered for a group
1051   * @group: the group
1052   *
1053   * iommu drivers can store data in the group for use when doing iommu
1054   * operations.  This function provides a way to retrieve it.  Caller
1055   * should hold a group reference.
1056   */
iommu_group_get_iommudata(struct iommu_group * group)1057  void *iommu_group_get_iommudata(struct iommu_group *group)
1058  {
1059  	return group->iommu_data;
1060  }
1061  EXPORT_SYMBOL_GPL(iommu_group_get_iommudata);
1062  
1063  /**
1064   * iommu_group_set_iommudata - set iommu_data for a group
1065   * @group: the group
1066   * @iommu_data: new data
1067   * @release: release function for iommu_data
1068   *
1069   * iommu drivers can store data in the group for use when doing iommu
1070   * operations.  This function provides a way to set the data after
1071   * the group has been allocated.  Caller should hold a group reference.
1072   */
iommu_group_set_iommudata(struct iommu_group * group,void * iommu_data,void (* release)(void * iommu_data))1073  void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data,
1074  			       void (*release)(void *iommu_data))
1075  {
1076  	group->iommu_data = iommu_data;
1077  	group->iommu_data_release = release;
1078  }
1079  EXPORT_SYMBOL_GPL(iommu_group_set_iommudata);
1080  
1081  /**
1082   * iommu_group_set_name - set name for a group
1083   * @group: the group
1084   * @name: name
1085   *
1086   * Allow iommu driver to set a name for a group.  When set it will
1087   * appear in a name attribute file under the group in sysfs.
1088   */
iommu_group_set_name(struct iommu_group * group,const char * name)1089  int iommu_group_set_name(struct iommu_group *group, const char *name)
1090  {
1091  	int ret;
1092  
1093  	if (group->name) {
1094  		iommu_group_remove_file(group, &iommu_group_attr_name);
1095  		kfree(group->name);
1096  		group->name = NULL;
1097  		if (!name)
1098  			return 0;
1099  	}
1100  
1101  	group->name = kstrdup(name, GFP_KERNEL);
1102  	if (!group->name)
1103  		return -ENOMEM;
1104  
1105  	ret = iommu_group_create_file(group, &iommu_group_attr_name);
1106  	if (ret) {
1107  		kfree(group->name);
1108  		group->name = NULL;
1109  		return ret;
1110  	}
1111  
1112  	return 0;
1113  }
1114  EXPORT_SYMBOL_GPL(iommu_group_set_name);
1115  
iommu_create_device_direct_mappings(struct iommu_domain * domain,struct device * dev)1116  static int iommu_create_device_direct_mappings(struct iommu_domain *domain,
1117  					       struct device *dev)
1118  {
1119  	struct iommu_resv_region *entry;
1120  	struct list_head mappings;
1121  	unsigned long pg_size;
1122  	int ret = 0;
1123  
1124  	pg_size = domain->pgsize_bitmap ? 1UL << __ffs(domain->pgsize_bitmap) : 0;
1125  	INIT_LIST_HEAD(&mappings);
1126  
1127  	if (WARN_ON_ONCE(iommu_is_dma_domain(domain) && !pg_size))
1128  		return -EINVAL;
1129  
1130  	iommu_get_resv_regions(dev, &mappings);
1131  
1132  	/* We need to consider overlapping regions for different devices */
1133  	list_for_each_entry(entry, &mappings, list) {
1134  		dma_addr_t start, end, addr;
1135  		size_t map_size = 0;
1136  
1137  		if (entry->type == IOMMU_RESV_DIRECT)
1138  			dev->iommu->require_direct = 1;
1139  
1140  		if ((entry->type != IOMMU_RESV_DIRECT &&
1141  		     entry->type != IOMMU_RESV_DIRECT_RELAXABLE) ||
1142  		    !iommu_is_dma_domain(domain))
1143  			continue;
1144  
1145  		start = ALIGN(entry->start, pg_size);
1146  		end   = ALIGN(entry->start + entry->length, pg_size);
1147  
1148  		for (addr = start; addr <= end; addr += pg_size) {
1149  			phys_addr_t phys_addr;
1150  
1151  			if (addr == end)
1152  				goto map_end;
1153  
1154  			phys_addr = iommu_iova_to_phys(domain, addr);
1155  			if (!phys_addr) {
1156  				map_size += pg_size;
1157  				continue;
1158  			}
1159  
1160  map_end:
1161  			if (map_size) {
1162  				ret = iommu_map(domain, addr - map_size,
1163  						addr - map_size, map_size,
1164  						entry->prot, GFP_KERNEL);
1165  				if (ret)
1166  					goto out;
1167  				map_size = 0;
1168  			}
1169  		}
1170  
1171  	}
1172  out:
1173  	iommu_put_resv_regions(dev, &mappings);
1174  
1175  	return ret;
1176  }
1177  
1178  /* This is undone by __iommu_group_free_device() */
iommu_group_alloc_device(struct iommu_group * group,struct device * dev)1179  static struct group_device *iommu_group_alloc_device(struct iommu_group *group,
1180  						     struct device *dev)
1181  {
1182  	int ret, i = 0;
1183  	struct group_device *device;
1184  
1185  	device = kzalloc(sizeof(*device), GFP_KERNEL);
1186  	if (!device)
1187  		return ERR_PTR(-ENOMEM);
1188  
1189  	device->dev = dev;
1190  
1191  	ret = sysfs_create_link(&dev->kobj, &group->kobj, "iommu_group");
1192  	if (ret)
1193  		goto err_free_device;
1194  
1195  	device->name = kasprintf(GFP_KERNEL, "%s", kobject_name(&dev->kobj));
1196  rename:
1197  	if (!device->name) {
1198  		ret = -ENOMEM;
1199  		goto err_remove_link;
1200  	}
1201  
1202  	ret = sysfs_create_link_nowarn(group->devices_kobj,
1203  				       &dev->kobj, device->name);
1204  	if (ret) {
1205  		if (ret == -EEXIST && i >= 0) {
1206  			/*
1207  			 * Account for the slim chance of collision
1208  			 * and append an instance to the name.
1209  			 */
1210  			kfree(device->name);
1211  			device->name = kasprintf(GFP_KERNEL, "%s.%d",
1212  						 kobject_name(&dev->kobj), i++);
1213  			goto rename;
1214  		}
1215  		goto err_free_name;
1216  	}
1217  
1218  	trace_add_device_to_group(group->id, dev);
1219  
1220  	dev_info(dev, "Adding to iommu group %d\n", group->id);
1221  
1222  	return device;
1223  
1224  err_free_name:
1225  	kfree(device->name);
1226  err_remove_link:
1227  	sysfs_remove_link(&dev->kobj, "iommu_group");
1228  err_free_device:
1229  	kfree(device);
1230  	dev_err(dev, "Failed to add to iommu group %d: %d\n", group->id, ret);
1231  	return ERR_PTR(ret);
1232  }
1233  
1234  /**
1235   * iommu_group_add_device - add a device to an iommu group
1236   * @group: the group into which to add the device (reference should be held)
1237   * @dev: the device
1238   *
1239   * This function is called by an iommu driver to add a device into a
1240   * group.  Adding a device increments the group reference count.
1241   */
iommu_group_add_device(struct iommu_group * group,struct device * dev)1242  int iommu_group_add_device(struct iommu_group *group, struct device *dev)
1243  {
1244  	struct group_device *gdev;
1245  
1246  	gdev = iommu_group_alloc_device(group, dev);
1247  	if (IS_ERR(gdev))
1248  		return PTR_ERR(gdev);
1249  
1250  	iommu_group_ref_get(group);
1251  	dev->iommu_group = group;
1252  
1253  	mutex_lock(&group->mutex);
1254  	list_add_tail(&gdev->list, &group->devices);
1255  	mutex_unlock(&group->mutex);
1256  	return 0;
1257  }
1258  EXPORT_SYMBOL_GPL(iommu_group_add_device);
1259  
1260  /**
1261   * iommu_group_remove_device - remove a device from it's current group
1262   * @dev: device to be removed
1263   *
1264   * This function is called by an iommu driver to remove the device from
1265   * it's current group.  This decrements the iommu group reference count.
1266   */
iommu_group_remove_device(struct device * dev)1267  void iommu_group_remove_device(struct device *dev)
1268  {
1269  	struct iommu_group *group = dev->iommu_group;
1270  
1271  	if (!group)
1272  		return;
1273  
1274  	dev_info(dev, "Removing from iommu group %d\n", group->id);
1275  
1276  	__iommu_group_remove_device(dev);
1277  }
1278  EXPORT_SYMBOL_GPL(iommu_group_remove_device);
1279  
1280  #if IS_ENABLED(CONFIG_LOCKDEP) && IS_ENABLED(CONFIG_IOMMU_API)
1281  /**
1282   * iommu_group_mutex_assert - Check device group mutex lock
1283   * @dev: the device that has group param set
1284   *
1285   * This function is called by an iommu driver to check whether it holds
1286   * group mutex lock for the given device or not.
1287   *
1288   * Note that this function must be called after device group param is set.
1289   */
iommu_group_mutex_assert(struct device * dev)1290  void iommu_group_mutex_assert(struct device *dev)
1291  {
1292  	struct iommu_group *group = dev->iommu_group;
1293  
1294  	lockdep_assert_held(&group->mutex);
1295  }
1296  EXPORT_SYMBOL_GPL(iommu_group_mutex_assert);
1297  #endif
1298  
iommu_group_first_dev(struct iommu_group * group)1299  static struct device *iommu_group_first_dev(struct iommu_group *group)
1300  {
1301  	lockdep_assert_held(&group->mutex);
1302  	return list_first_entry(&group->devices, struct group_device, list)->dev;
1303  }
1304  
1305  /**
1306   * iommu_group_for_each_dev - iterate over each device in the group
1307   * @group: the group
1308   * @data: caller opaque data to be passed to callback function
1309   * @fn: caller supplied callback function
1310   *
1311   * This function is called by group users to iterate over group devices.
1312   * Callers should hold a reference count to the group during callback.
1313   * The group->mutex is held across callbacks, which will block calls to
1314   * iommu_group_add/remove_device.
1315   */
iommu_group_for_each_dev(struct iommu_group * group,void * data,int (* fn)(struct device *,void *))1316  int iommu_group_for_each_dev(struct iommu_group *group, void *data,
1317  			     int (*fn)(struct device *, void *))
1318  {
1319  	struct group_device *device;
1320  	int ret = 0;
1321  
1322  	mutex_lock(&group->mutex);
1323  	for_each_group_device(group, device) {
1324  		ret = fn(device->dev, data);
1325  		if (ret)
1326  			break;
1327  	}
1328  	mutex_unlock(&group->mutex);
1329  
1330  	return ret;
1331  }
1332  EXPORT_SYMBOL_GPL(iommu_group_for_each_dev);
1333  
1334  /**
1335   * iommu_group_get - Return the group for a device and increment reference
1336   * @dev: get the group that this device belongs to
1337   *
1338   * This function is called by iommu drivers and users to get the group
1339   * for the specified device.  If found, the group is returned and the group
1340   * reference in incremented, else NULL.
1341   */
iommu_group_get(struct device * dev)1342  struct iommu_group *iommu_group_get(struct device *dev)
1343  {
1344  	struct iommu_group *group = dev->iommu_group;
1345  
1346  	if (group)
1347  		kobject_get(group->devices_kobj);
1348  
1349  	return group;
1350  }
1351  EXPORT_SYMBOL_GPL(iommu_group_get);
1352  
1353  /**
1354   * iommu_group_ref_get - Increment reference on a group
1355   * @group: the group to use, must not be NULL
1356   *
1357   * This function is called by iommu drivers to take additional references on an
1358   * existing group.  Returns the given group for convenience.
1359   */
iommu_group_ref_get(struct iommu_group * group)1360  struct iommu_group *iommu_group_ref_get(struct iommu_group *group)
1361  {
1362  	kobject_get(group->devices_kobj);
1363  	return group;
1364  }
1365  EXPORT_SYMBOL_GPL(iommu_group_ref_get);
1366  
1367  /**
1368   * iommu_group_put - Decrement group reference
1369   * @group: the group to use
1370   *
1371   * This function is called by iommu drivers and users to release the
1372   * iommu group.  Once the reference count is zero, the group is released.
1373   */
iommu_group_put(struct iommu_group * group)1374  void iommu_group_put(struct iommu_group *group)
1375  {
1376  	if (group)
1377  		kobject_put(group->devices_kobj);
1378  }
1379  EXPORT_SYMBOL_GPL(iommu_group_put);
1380  
1381  /**
1382   * iommu_group_id - Return ID for a group
1383   * @group: the group to ID
1384   *
1385   * Return the unique ID for the group matching the sysfs group number.
1386   */
iommu_group_id(struct iommu_group * group)1387  int iommu_group_id(struct iommu_group *group)
1388  {
1389  	return group->id;
1390  }
1391  EXPORT_SYMBOL_GPL(iommu_group_id);
1392  
1393  static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev,
1394  					       unsigned long *devfns);
1395  
1396  /*
1397   * To consider a PCI device isolated, we require ACS to support Source
1398   * Validation, Request Redirection, Completer Redirection, and Upstream
1399   * Forwarding.  This effectively means that devices cannot spoof their
1400   * requester ID, requests and completions cannot be redirected, and all
1401   * transactions are forwarded upstream, even as it passes through a
1402   * bridge where the target device is downstream.
1403   */
1404  #define REQ_ACS_FLAGS   (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
1405  
1406  /*
1407   * For multifunction devices which are not isolated from each other, find
1408   * all the other non-isolated functions and look for existing groups.  For
1409   * each function, we also need to look for aliases to or from other devices
1410   * that may already have a group.
1411   */
get_pci_function_alias_group(struct pci_dev * pdev,unsigned long * devfns)1412  static struct iommu_group *get_pci_function_alias_group(struct pci_dev *pdev,
1413  							unsigned long *devfns)
1414  {
1415  	struct pci_dev *tmp = NULL;
1416  	struct iommu_group *group;
1417  
1418  	if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS))
1419  		return NULL;
1420  
1421  	for_each_pci_dev(tmp) {
1422  		if (tmp == pdev || tmp->bus != pdev->bus ||
1423  		    PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) ||
1424  		    pci_acs_enabled(tmp, REQ_ACS_FLAGS))
1425  			continue;
1426  
1427  		group = get_pci_alias_group(tmp, devfns);
1428  		if (group) {
1429  			pci_dev_put(tmp);
1430  			return group;
1431  		}
1432  	}
1433  
1434  	return NULL;
1435  }
1436  
1437  /*
1438   * Look for aliases to or from the given device for existing groups. DMA
1439   * aliases are only supported on the same bus, therefore the search
1440   * space is quite small (especially since we're really only looking at pcie
1441   * device, and therefore only expect multiple slots on the root complex or
1442   * downstream switch ports).  It's conceivable though that a pair of
1443   * multifunction devices could have aliases between them that would cause a
1444   * loop.  To prevent this, we use a bitmap to track where we've been.
1445   */
get_pci_alias_group(struct pci_dev * pdev,unsigned long * devfns)1446  static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev,
1447  					       unsigned long *devfns)
1448  {
1449  	struct pci_dev *tmp = NULL;
1450  	struct iommu_group *group;
1451  
1452  	if (test_and_set_bit(pdev->devfn & 0xff, devfns))
1453  		return NULL;
1454  
1455  	group = iommu_group_get(&pdev->dev);
1456  	if (group)
1457  		return group;
1458  
1459  	for_each_pci_dev(tmp) {
1460  		if (tmp == pdev || tmp->bus != pdev->bus)
1461  			continue;
1462  
1463  		/* We alias them or they alias us */
1464  		if (pci_devs_are_dma_aliases(pdev, tmp)) {
1465  			group = get_pci_alias_group(tmp, devfns);
1466  			if (group) {
1467  				pci_dev_put(tmp);
1468  				return group;
1469  			}
1470  
1471  			group = get_pci_function_alias_group(tmp, devfns);
1472  			if (group) {
1473  				pci_dev_put(tmp);
1474  				return group;
1475  			}
1476  		}
1477  	}
1478  
1479  	return NULL;
1480  }
1481  
1482  struct group_for_pci_data {
1483  	struct pci_dev *pdev;
1484  	struct iommu_group *group;
1485  };
1486  
1487  /*
1488   * DMA alias iterator callback, return the last seen device.  Stop and return
1489   * the IOMMU group if we find one along the way.
1490   */
get_pci_alias_or_group(struct pci_dev * pdev,u16 alias,void * opaque)1491  static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque)
1492  {
1493  	struct group_for_pci_data *data = opaque;
1494  
1495  	data->pdev = pdev;
1496  	data->group = iommu_group_get(&pdev->dev);
1497  
1498  	return data->group != NULL;
1499  }
1500  
1501  /*
1502   * Generic device_group call-back function. It just allocates one
1503   * iommu-group per device.
1504   */
generic_device_group(struct device * dev)1505  struct iommu_group *generic_device_group(struct device *dev)
1506  {
1507  	return iommu_group_alloc();
1508  }
1509  EXPORT_SYMBOL_GPL(generic_device_group);
1510  
1511  /*
1512   * Generic device_group call-back function. It just allocates one
1513   * iommu-group per iommu driver instance shared by every device
1514   * probed by that iommu driver.
1515   */
generic_single_device_group(struct device * dev)1516  struct iommu_group *generic_single_device_group(struct device *dev)
1517  {
1518  	struct iommu_device *iommu = dev->iommu->iommu_dev;
1519  
1520  	if (!iommu->singleton_group) {
1521  		struct iommu_group *group;
1522  
1523  		group = iommu_group_alloc();
1524  		if (IS_ERR(group))
1525  			return group;
1526  		iommu->singleton_group = group;
1527  	}
1528  	return iommu_group_ref_get(iommu->singleton_group);
1529  }
1530  EXPORT_SYMBOL_GPL(generic_single_device_group);
1531  
1532  /*
1533   * Use standard PCI bus topology, isolation features, and DMA alias quirks
1534   * to find or create an IOMMU group for a device.
1535   */
pci_device_group(struct device * dev)1536  struct iommu_group *pci_device_group(struct device *dev)
1537  {
1538  	struct pci_dev *pdev = to_pci_dev(dev);
1539  	struct group_for_pci_data data;
1540  	struct pci_bus *bus;
1541  	struct iommu_group *group = NULL;
1542  	u64 devfns[4] = { 0 };
1543  
1544  	if (WARN_ON(!dev_is_pci(dev)))
1545  		return ERR_PTR(-EINVAL);
1546  
1547  	/*
1548  	 * Find the upstream DMA alias for the device.  A device must not
1549  	 * be aliased due to topology in order to have its own IOMMU group.
1550  	 * If we find an alias along the way that already belongs to a
1551  	 * group, use it.
1552  	 */
1553  	if (pci_for_each_dma_alias(pdev, get_pci_alias_or_group, &data))
1554  		return data.group;
1555  
1556  	pdev = data.pdev;
1557  
1558  	/*
1559  	 * Continue upstream from the point of minimum IOMMU granularity
1560  	 * due to aliases to the point where devices are protected from
1561  	 * peer-to-peer DMA by PCI ACS.  Again, if we find an existing
1562  	 * group, use it.
1563  	 */
1564  	for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) {
1565  		if (!bus->self)
1566  			continue;
1567  
1568  		if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
1569  			break;
1570  
1571  		pdev = bus->self;
1572  
1573  		group = iommu_group_get(&pdev->dev);
1574  		if (group)
1575  			return group;
1576  	}
1577  
1578  	/*
1579  	 * Look for existing groups on device aliases.  If we alias another
1580  	 * device or another device aliases us, use the same group.
1581  	 */
1582  	group = get_pci_alias_group(pdev, (unsigned long *)devfns);
1583  	if (group)
1584  		return group;
1585  
1586  	/*
1587  	 * Look for existing groups on non-isolated functions on the same
1588  	 * slot and aliases of those funcions, if any.  No need to clear
1589  	 * the search bitmap, the tested devfns are still valid.
1590  	 */
1591  	group = get_pci_function_alias_group(pdev, (unsigned long *)devfns);
1592  	if (group)
1593  		return group;
1594  
1595  	/* No shared group found, allocate new */
1596  	return iommu_group_alloc();
1597  }
1598  EXPORT_SYMBOL_GPL(pci_device_group);
1599  
1600  /* Get the IOMMU group for device on fsl-mc bus */
fsl_mc_device_group(struct device * dev)1601  struct iommu_group *fsl_mc_device_group(struct device *dev)
1602  {
1603  	struct device *cont_dev = fsl_mc_cont_dev(dev);
1604  	struct iommu_group *group;
1605  
1606  	group = iommu_group_get(cont_dev);
1607  	if (!group)
1608  		group = iommu_group_alloc();
1609  	return group;
1610  }
1611  EXPORT_SYMBOL_GPL(fsl_mc_device_group);
1612  
__iommu_alloc_identity_domain(struct device * dev)1613  static struct iommu_domain *__iommu_alloc_identity_domain(struct device *dev)
1614  {
1615  	const struct iommu_ops *ops = dev_iommu_ops(dev);
1616  	struct iommu_domain *domain;
1617  
1618  	if (ops->identity_domain)
1619  		return ops->identity_domain;
1620  
1621  	/* Older drivers create the identity domain via ops->domain_alloc() */
1622  	if (!ops->domain_alloc)
1623  		return ERR_PTR(-EOPNOTSUPP);
1624  
1625  	domain = ops->domain_alloc(IOMMU_DOMAIN_IDENTITY);
1626  	if (IS_ERR(domain))
1627  		return domain;
1628  	if (!domain)
1629  		return ERR_PTR(-ENOMEM);
1630  
1631  	iommu_domain_init(domain, IOMMU_DOMAIN_IDENTITY, ops);
1632  	return domain;
1633  }
1634  
1635  static struct iommu_domain *
__iommu_group_alloc_default_domain(struct iommu_group * group,int req_type)1636  __iommu_group_alloc_default_domain(struct iommu_group *group, int req_type)
1637  {
1638  	struct device *dev = iommu_group_first_dev(group);
1639  	struct iommu_domain *dom;
1640  
1641  	if (group->default_domain && group->default_domain->type == req_type)
1642  		return group->default_domain;
1643  
1644  	/*
1645  	 * When allocating the DMA API domain assume that the driver is going to
1646  	 * use PASID and make sure the RID's domain is PASID compatible.
1647  	 */
1648  	if (req_type & __IOMMU_DOMAIN_PAGING) {
1649  		dom = __iommu_paging_domain_alloc_flags(dev, req_type,
1650  			   dev->iommu->max_pasids ? IOMMU_HWPT_ALLOC_PASID : 0);
1651  
1652  		/*
1653  		 * If driver does not support PASID feature then
1654  		 * try to allocate non-PASID domain
1655  		 */
1656  		if (PTR_ERR(dom) == -EOPNOTSUPP)
1657  			dom = __iommu_paging_domain_alloc_flags(dev, req_type, 0);
1658  
1659  		return dom;
1660  	}
1661  
1662  	if (req_type == IOMMU_DOMAIN_IDENTITY)
1663  		return __iommu_alloc_identity_domain(dev);
1664  
1665  	return ERR_PTR(-EINVAL);
1666  }
1667  
1668  /*
1669   * req_type of 0 means "auto" which means to select a domain based on
1670   * iommu_def_domain_type or what the driver actually supports.
1671   */
1672  static struct iommu_domain *
iommu_group_alloc_default_domain(struct iommu_group * group,int req_type)1673  iommu_group_alloc_default_domain(struct iommu_group *group, int req_type)
1674  {
1675  	const struct iommu_ops *ops = dev_iommu_ops(iommu_group_first_dev(group));
1676  	struct iommu_domain *dom;
1677  
1678  	lockdep_assert_held(&group->mutex);
1679  
1680  	/*
1681  	 * Allow legacy drivers to specify the domain that will be the default
1682  	 * domain. This should always be either an IDENTITY/BLOCKED/PLATFORM
1683  	 * domain. Do not use in new drivers.
1684  	 */
1685  	if (ops->default_domain) {
1686  		if (req_type != ops->default_domain->type)
1687  			return ERR_PTR(-EINVAL);
1688  		return ops->default_domain;
1689  	}
1690  
1691  	if (req_type)
1692  		return __iommu_group_alloc_default_domain(group, req_type);
1693  
1694  	/* The driver gave no guidance on what type to use, try the default */
1695  	dom = __iommu_group_alloc_default_domain(group, iommu_def_domain_type);
1696  	if (!IS_ERR(dom))
1697  		return dom;
1698  
1699  	/* Otherwise IDENTITY and DMA_FQ defaults will try DMA */
1700  	if (iommu_def_domain_type == IOMMU_DOMAIN_DMA)
1701  		return ERR_PTR(-EINVAL);
1702  	dom = __iommu_group_alloc_default_domain(group, IOMMU_DOMAIN_DMA);
1703  	if (IS_ERR(dom))
1704  		return dom;
1705  
1706  	pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA",
1707  		iommu_def_domain_type, group->name);
1708  	return dom;
1709  }
1710  
iommu_group_default_domain(struct iommu_group * group)1711  struct iommu_domain *iommu_group_default_domain(struct iommu_group *group)
1712  {
1713  	return group->default_domain;
1714  }
1715  
probe_iommu_group(struct device * dev,void * data)1716  static int probe_iommu_group(struct device *dev, void *data)
1717  {
1718  	struct list_head *group_list = data;
1719  	int ret;
1720  
1721  	mutex_lock(&iommu_probe_device_lock);
1722  	ret = __iommu_probe_device(dev, group_list);
1723  	mutex_unlock(&iommu_probe_device_lock);
1724  	if (ret == -ENODEV)
1725  		ret = 0;
1726  
1727  	return ret;
1728  }
1729  
iommu_bus_notifier(struct notifier_block * nb,unsigned long action,void * data)1730  static int iommu_bus_notifier(struct notifier_block *nb,
1731  			      unsigned long action, void *data)
1732  {
1733  	struct device *dev = data;
1734  
1735  	if (action == BUS_NOTIFY_ADD_DEVICE) {
1736  		int ret;
1737  
1738  		ret = iommu_probe_device(dev);
1739  		return (ret) ? NOTIFY_DONE : NOTIFY_OK;
1740  	} else if (action == BUS_NOTIFY_REMOVED_DEVICE) {
1741  		iommu_release_device(dev);
1742  		return NOTIFY_OK;
1743  	}
1744  
1745  	return 0;
1746  }
1747  
1748  /*
1749   * Combine the driver's chosen def_domain_type across all the devices in a
1750   * group. Drivers must give a consistent result.
1751   */
iommu_get_def_domain_type(struct iommu_group * group,struct device * dev,int cur_type)1752  static int iommu_get_def_domain_type(struct iommu_group *group,
1753  				     struct device *dev, int cur_type)
1754  {
1755  	const struct iommu_ops *ops = dev_iommu_ops(dev);
1756  	int type;
1757  
1758  	if (ops->default_domain) {
1759  		/*
1760  		 * Drivers that declare a global static default_domain will
1761  		 * always choose that.
1762  		 */
1763  		type = ops->default_domain->type;
1764  	} else {
1765  		if (ops->def_domain_type)
1766  			type = ops->def_domain_type(dev);
1767  		else
1768  			return cur_type;
1769  	}
1770  	if (!type || cur_type == type)
1771  		return cur_type;
1772  	if (!cur_type)
1773  		return type;
1774  
1775  	dev_err_ratelimited(
1776  		dev,
1777  		"IOMMU driver error, requesting conflicting def_domain_type, %s and %s, for devices in group %u.\n",
1778  		iommu_domain_type_str(cur_type), iommu_domain_type_str(type),
1779  		group->id);
1780  
1781  	/*
1782  	 * Try to recover, drivers are allowed to force IDENTITY or DMA, IDENTITY
1783  	 * takes precedence.
1784  	 */
1785  	if (type == IOMMU_DOMAIN_IDENTITY)
1786  		return type;
1787  	return cur_type;
1788  }
1789  
1790  /*
1791   * A target_type of 0 will select the best domain type. 0 can be returned in
1792   * this case meaning the global default should be used.
1793   */
iommu_get_default_domain_type(struct iommu_group * group,int target_type)1794  static int iommu_get_default_domain_type(struct iommu_group *group,
1795  					 int target_type)
1796  {
1797  	struct device *untrusted = NULL;
1798  	struct group_device *gdev;
1799  	int driver_type = 0;
1800  
1801  	lockdep_assert_held(&group->mutex);
1802  
1803  	/*
1804  	 * ARM32 drivers supporting CONFIG_ARM_DMA_USE_IOMMU can declare an
1805  	 * identity_domain and it will automatically become their default
1806  	 * domain. Later on ARM_DMA_USE_IOMMU will install its UNMANAGED domain.
1807  	 * Override the selection to IDENTITY.
1808  	 */
1809  	if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) {
1810  		static_assert(!(IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) &&
1811  				IS_ENABLED(CONFIG_IOMMU_DMA)));
1812  		driver_type = IOMMU_DOMAIN_IDENTITY;
1813  	}
1814  
1815  	for_each_group_device(group, gdev) {
1816  		driver_type = iommu_get_def_domain_type(group, gdev->dev,
1817  							driver_type);
1818  
1819  		if (dev_is_pci(gdev->dev) && to_pci_dev(gdev->dev)->untrusted) {
1820  			/*
1821  			 * No ARM32 using systems will set untrusted, it cannot
1822  			 * work.
1823  			 */
1824  			if (WARN_ON(IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)))
1825  				return -1;
1826  			untrusted = gdev->dev;
1827  		}
1828  	}
1829  
1830  	/*
1831  	 * If the common dma ops are not selected in kconfig then we cannot use
1832  	 * IOMMU_DOMAIN_DMA at all. Force IDENTITY if nothing else has been
1833  	 * selected.
1834  	 */
1835  	if (!IS_ENABLED(CONFIG_IOMMU_DMA)) {
1836  		if (WARN_ON(driver_type == IOMMU_DOMAIN_DMA))
1837  			return -1;
1838  		if (!driver_type)
1839  			driver_type = IOMMU_DOMAIN_IDENTITY;
1840  	}
1841  
1842  	if (untrusted) {
1843  		if (driver_type && driver_type != IOMMU_DOMAIN_DMA) {
1844  			dev_err_ratelimited(
1845  				untrusted,
1846  				"Device is not trusted, but driver is overriding group %u to %s, refusing to probe.\n",
1847  				group->id, iommu_domain_type_str(driver_type));
1848  			return -1;
1849  		}
1850  		driver_type = IOMMU_DOMAIN_DMA;
1851  	}
1852  
1853  	if (target_type) {
1854  		if (driver_type && target_type != driver_type)
1855  			return -1;
1856  		return target_type;
1857  	}
1858  	return driver_type;
1859  }
1860  
iommu_group_do_probe_finalize(struct device * dev)1861  static void iommu_group_do_probe_finalize(struct device *dev)
1862  {
1863  	const struct iommu_ops *ops = dev_iommu_ops(dev);
1864  
1865  	if (ops->probe_finalize)
1866  		ops->probe_finalize(dev);
1867  }
1868  
bus_iommu_probe(const struct bus_type * bus)1869  static int bus_iommu_probe(const struct bus_type *bus)
1870  {
1871  	struct iommu_group *group, *next;
1872  	LIST_HEAD(group_list);
1873  	int ret;
1874  
1875  	ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group);
1876  	if (ret)
1877  		return ret;
1878  
1879  	list_for_each_entry_safe(group, next, &group_list, entry) {
1880  		struct group_device *gdev;
1881  
1882  		mutex_lock(&group->mutex);
1883  
1884  		/* Remove item from the list */
1885  		list_del_init(&group->entry);
1886  
1887  		/*
1888  		 * We go to the trouble of deferred default domain creation so
1889  		 * that the cross-group default domain type and the setup of the
1890  		 * IOMMU_RESV_DIRECT will work correctly in non-hotpug scenarios.
1891  		 */
1892  		ret = iommu_setup_default_domain(group, 0);
1893  		if (ret) {
1894  			mutex_unlock(&group->mutex);
1895  			return ret;
1896  		}
1897  		for_each_group_device(group, gdev)
1898  			iommu_setup_dma_ops(gdev->dev);
1899  		mutex_unlock(&group->mutex);
1900  
1901  		/*
1902  		 * FIXME: Mis-locked because the ops->probe_finalize() call-back
1903  		 * of some IOMMU drivers calls arm_iommu_attach_device() which
1904  		 * in-turn might call back into IOMMU core code, where it tries
1905  		 * to take group->mutex, resulting in a deadlock.
1906  		 */
1907  		for_each_group_device(group, gdev)
1908  			iommu_group_do_probe_finalize(gdev->dev);
1909  	}
1910  
1911  	return 0;
1912  }
1913  
1914  /**
1915   * device_iommu_capable() - check for a general IOMMU capability
1916   * @dev: device to which the capability would be relevant, if available
1917   * @cap: IOMMU capability
1918   *
1919   * Return: true if an IOMMU is present and supports the given capability
1920   * for the given device, otherwise false.
1921   */
device_iommu_capable(struct device * dev,enum iommu_cap cap)1922  bool device_iommu_capable(struct device *dev, enum iommu_cap cap)
1923  {
1924  	const struct iommu_ops *ops;
1925  
1926  	if (!dev_has_iommu(dev))
1927  		return false;
1928  
1929  	ops = dev_iommu_ops(dev);
1930  	if (!ops->capable)
1931  		return false;
1932  
1933  	return ops->capable(dev, cap);
1934  }
1935  EXPORT_SYMBOL_GPL(device_iommu_capable);
1936  
1937  /**
1938   * iommu_group_has_isolated_msi() - Compute msi_device_has_isolated_msi()
1939   *       for a group
1940   * @group: Group to query
1941   *
1942   * IOMMU groups should not have differing values of
1943   * msi_device_has_isolated_msi() for devices in a group. However nothing
1944   * directly prevents this, so ensure mistakes don't result in isolation failures
1945   * by checking that all the devices are the same.
1946   */
iommu_group_has_isolated_msi(struct iommu_group * group)1947  bool iommu_group_has_isolated_msi(struct iommu_group *group)
1948  {
1949  	struct group_device *group_dev;
1950  	bool ret = true;
1951  
1952  	mutex_lock(&group->mutex);
1953  	for_each_group_device(group, group_dev)
1954  		ret &= msi_device_has_isolated_msi(group_dev->dev);
1955  	mutex_unlock(&group->mutex);
1956  	return ret;
1957  }
1958  EXPORT_SYMBOL_GPL(iommu_group_has_isolated_msi);
1959  
1960  /**
1961   * iommu_set_fault_handler() - set a fault handler for an iommu domain
1962   * @domain: iommu domain
1963   * @handler: fault handler
1964   * @token: user data, will be passed back to the fault handler
1965   *
1966   * This function should be used by IOMMU users which want to be notified
1967   * whenever an IOMMU fault happens.
1968   *
1969   * The fault handler itself should return 0 on success, and an appropriate
1970   * error code otherwise.
1971   */
iommu_set_fault_handler(struct iommu_domain * domain,iommu_fault_handler_t handler,void * token)1972  void iommu_set_fault_handler(struct iommu_domain *domain,
1973  					iommu_fault_handler_t handler,
1974  					void *token)
1975  {
1976  	BUG_ON(!domain);
1977  
1978  	domain->handler = handler;
1979  	domain->handler_token = token;
1980  }
1981  EXPORT_SYMBOL_GPL(iommu_set_fault_handler);
1982  
iommu_domain_init(struct iommu_domain * domain,unsigned int type,const struct iommu_ops * ops)1983  static void iommu_domain_init(struct iommu_domain *domain, unsigned int type,
1984  			      const struct iommu_ops *ops)
1985  {
1986  	domain->type = type;
1987  	domain->owner = ops;
1988  	if (!domain->ops)
1989  		domain->ops = ops->default_domain_ops;
1990  
1991  	/*
1992  	 * If not already set, assume all sizes by default; the driver
1993  	 * may override this later
1994  	 */
1995  	if (!domain->pgsize_bitmap)
1996  		domain->pgsize_bitmap = ops->pgsize_bitmap;
1997  }
1998  
1999  static struct iommu_domain *
__iommu_paging_domain_alloc_flags(struct device * dev,unsigned int type,unsigned int flags)2000  __iommu_paging_domain_alloc_flags(struct device *dev, unsigned int type,
2001  				  unsigned int flags)
2002  {
2003  	const struct iommu_ops *ops;
2004  	struct iommu_domain *domain;
2005  
2006  	if (!dev_has_iommu(dev))
2007  		return ERR_PTR(-ENODEV);
2008  
2009  	ops = dev_iommu_ops(dev);
2010  
2011  	if (ops->domain_alloc_paging && !flags)
2012  		domain = ops->domain_alloc_paging(dev);
2013  	else if (ops->domain_alloc_paging_flags)
2014  		domain = ops->domain_alloc_paging_flags(dev, flags, NULL);
2015  	else if (ops->domain_alloc && !flags)
2016  		domain = ops->domain_alloc(IOMMU_DOMAIN_UNMANAGED);
2017  	else
2018  		return ERR_PTR(-EOPNOTSUPP);
2019  
2020  	if (IS_ERR(domain))
2021  		return domain;
2022  	if (!domain)
2023  		return ERR_PTR(-ENOMEM);
2024  
2025  	iommu_domain_init(domain, type, ops);
2026  	return domain;
2027  }
2028  
2029  /**
2030   * iommu_paging_domain_alloc_flags() - Allocate a paging domain
2031   * @dev: device for which the domain is allocated
2032   * @flags: Bitmap of iommufd_hwpt_alloc_flags
2033   *
2034   * Allocate a paging domain which will be managed by a kernel driver. Return
2035   * allocated domain if successful, or an ERR pointer for failure.
2036   */
iommu_paging_domain_alloc_flags(struct device * dev,unsigned int flags)2037  struct iommu_domain *iommu_paging_domain_alloc_flags(struct device *dev,
2038  						     unsigned int flags)
2039  {
2040  	return __iommu_paging_domain_alloc_flags(dev,
2041  					 IOMMU_DOMAIN_UNMANAGED, flags);
2042  }
2043  EXPORT_SYMBOL_GPL(iommu_paging_domain_alloc_flags);
2044  
iommu_domain_free(struct iommu_domain * domain)2045  void iommu_domain_free(struct iommu_domain *domain)
2046  {
2047  	if (domain->type == IOMMU_DOMAIN_SVA)
2048  		mmdrop(domain->mm);
2049  	iommu_put_dma_cookie(domain);
2050  	if (domain->ops->free)
2051  		domain->ops->free(domain);
2052  }
2053  EXPORT_SYMBOL_GPL(iommu_domain_free);
2054  
2055  /*
2056   * Put the group's domain back to the appropriate core-owned domain - either the
2057   * standard kernel-mode DMA configuration or an all-DMA-blocked domain.
2058   */
__iommu_group_set_core_domain(struct iommu_group * group)2059  static void __iommu_group_set_core_domain(struct iommu_group *group)
2060  {
2061  	struct iommu_domain *new_domain;
2062  
2063  	if (group->owner)
2064  		new_domain = group->blocking_domain;
2065  	else
2066  		new_domain = group->default_domain;
2067  
2068  	__iommu_group_set_domain_nofail(group, new_domain);
2069  }
2070  
__iommu_attach_device(struct iommu_domain * domain,struct device * dev)2071  static int __iommu_attach_device(struct iommu_domain *domain,
2072  				 struct device *dev)
2073  {
2074  	int ret;
2075  
2076  	if (unlikely(domain->ops->attach_dev == NULL))
2077  		return -ENODEV;
2078  
2079  	ret = domain->ops->attach_dev(domain, dev);
2080  	if (ret)
2081  		return ret;
2082  	dev->iommu->attach_deferred = 0;
2083  	trace_attach_device_to_domain(dev);
2084  	return 0;
2085  }
2086  
2087  /**
2088   * iommu_attach_device - Attach an IOMMU domain to a device
2089   * @domain: IOMMU domain to attach
2090   * @dev: Device that will be attached
2091   *
2092   * Returns 0 on success and error code on failure
2093   *
2094   * Note that EINVAL can be treated as a soft failure, indicating
2095   * that certain configuration of the domain is incompatible with
2096   * the device. In this case attaching a different domain to the
2097   * device may succeed.
2098   */
iommu_attach_device(struct iommu_domain * domain,struct device * dev)2099  int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
2100  {
2101  	/* Caller must be a probed driver on dev */
2102  	struct iommu_group *group = dev->iommu_group;
2103  	int ret;
2104  
2105  	if (!group)
2106  		return -ENODEV;
2107  
2108  	/*
2109  	 * Lock the group to make sure the device-count doesn't
2110  	 * change while we are attaching
2111  	 */
2112  	mutex_lock(&group->mutex);
2113  	ret = -EINVAL;
2114  	if (list_count_nodes(&group->devices) != 1)
2115  		goto out_unlock;
2116  
2117  	ret = __iommu_attach_group(domain, group);
2118  
2119  out_unlock:
2120  	mutex_unlock(&group->mutex);
2121  	return ret;
2122  }
2123  EXPORT_SYMBOL_GPL(iommu_attach_device);
2124  
iommu_deferred_attach(struct device * dev,struct iommu_domain * domain)2125  int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain)
2126  {
2127  	if (dev->iommu && dev->iommu->attach_deferred)
2128  		return __iommu_attach_device(domain, dev);
2129  
2130  	return 0;
2131  }
2132  
iommu_detach_device(struct iommu_domain * domain,struct device * dev)2133  void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
2134  {
2135  	/* Caller must be a probed driver on dev */
2136  	struct iommu_group *group = dev->iommu_group;
2137  
2138  	if (!group)
2139  		return;
2140  
2141  	mutex_lock(&group->mutex);
2142  	if (WARN_ON(domain != group->domain) ||
2143  	    WARN_ON(list_count_nodes(&group->devices) != 1))
2144  		goto out_unlock;
2145  	__iommu_group_set_core_domain(group);
2146  
2147  out_unlock:
2148  	mutex_unlock(&group->mutex);
2149  }
2150  EXPORT_SYMBOL_GPL(iommu_detach_device);
2151  
iommu_get_domain_for_dev(struct device * dev)2152  struct iommu_domain *iommu_get_domain_for_dev(struct device *dev)
2153  {
2154  	/* Caller must be a probed driver on dev */
2155  	struct iommu_group *group = dev->iommu_group;
2156  
2157  	if (!group)
2158  		return NULL;
2159  
2160  	return group->domain;
2161  }
2162  EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev);
2163  
2164  /*
2165   * For IOMMU_DOMAIN_DMA implementations which already provide their own
2166   * guarantees that the group and its default domain are valid and correct.
2167   */
iommu_get_dma_domain(struct device * dev)2168  struct iommu_domain *iommu_get_dma_domain(struct device *dev)
2169  {
2170  	return dev->iommu_group->default_domain;
2171  }
2172  
iommu_make_pasid_array_entry(struct iommu_domain * domain,struct iommu_attach_handle * handle)2173  static void *iommu_make_pasid_array_entry(struct iommu_domain *domain,
2174  					  struct iommu_attach_handle *handle)
2175  {
2176  	if (handle) {
2177  		handle->domain = domain;
2178  		return xa_tag_pointer(handle, IOMMU_PASID_ARRAY_HANDLE);
2179  	}
2180  
2181  	return xa_tag_pointer(domain, IOMMU_PASID_ARRAY_DOMAIN);
2182  }
2183  
__iommu_attach_group(struct iommu_domain * domain,struct iommu_group * group)2184  static int __iommu_attach_group(struct iommu_domain *domain,
2185  				struct iommu_group *group)
2186  {
2187  	struct device *dev;
2188  
2189  	if (group->domain && group->domain != group->default_domain &&
2190  	    group->domain != group->blocking_domain)
2191  		return -EBUSY;
2192  
2193  	dev = iommu_group_first_dev(group);
2194  	if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain->owner)
2195  		return -EINVAL;
2196  
2197  	return __iommu_group_set_domain(group, domain);
2198  }
2199  
2200  /**
2201   * iommu_attach_group - Attach an IOMMU domain to an IOMMU group
2202   * @domain: IOMMU domain to attach
2203   * @group: IOMMU group that will be attached
2204   *
2205   * Returns 0 on success and error code on failure
2206   *
2207   * Note that EINVAL can be treated as a soft failure, indicating
2208   * that certain configuration of the domain is incompatible with
2209   * the group. In this case attaching a different domain to the
2210   * group may succeed.
2211   */
iommu_attach_group(struct iommu_domain * domain,struct iommu_group * group)2212  int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group)
2213  {
2214  	int ret;
2215  
2216  	mutex_lock(&group->mutex);
2217  	ret = __iommu_attach_group(domain, group);
2218  	mutex_unlock(&group->mutex);
2219  
2220  	return ret;
2221  }
2222  EXPORT_SYMBOL_GPL(iommu_attach_group);
2223  
__iommu_device_set_domain(struct iommu_group * group,struct device * dev,struct iommu_domain * new_domain,unsigned int flags)2224  static int __iommu_device_set_domain(struct iommu_group *group,
2225  				     struct device *dev,
2226  				     struct iommu_domain *new_domain,
2227  				     unsigned int flags)
2228  {
2229  	int ret;
2230  
2231  	/*
2232  	 * If the device requires IOMMU_RESV_DIRECT then we cannot allow
2233  	 * the blocking domain to be attached as it does not contain the
2234  	 * required 1:1 mapping. This test effectively excludes the device
2235  	 * being used with iommu_group_claim_dma_owner() which will block
2236  	 * vfio and iommufd as well.
2237  	 */
2238  	if (dev->iommu->require_direct &&
2239  	    (new_domain->type == IOMMU_DOMAIN_BLOCKED ||
2240  	     new_domain == group->blocking_domain)) {
2241  		dev_warn(dev,
2242  			 "Firmware has requested this device have a 1:1 IOMMU mapping, rejecting configuring the device without a 1:1 mapping. Contact your platform vendor.\n");
2243  		return -EINVAL;
2244  	}
2245  
2246  	if (dev->iommu->attach_deferred) {
2247  		if (new_domain == group->default_domain)
2248  			return 0;
2249  		dev->iommu->attach_deferred = 0;
2250  	}
2251  
2252  	ret = __iommu_attach_device(new_domain, dev);
2253  	if (ret) {
2254  		/*
2255  		 * If we have a blocking domain then try to attach that in hopes
2256  		 * of avoiding a UAF. Modern drivers should implement blocking
2257  		 * domains as global statics that cannot fail.
2258  		 */
2259  		if ((flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) &&
2260  		    group->blocking_domain &&
2261  		    group->blocking_domain != new_domain)
2262  			__iommu_attach_device(group->blocking_domain, dev);
2263  		return ret;
2264  	}
2265  	return 0;
2266  }
2267  
2268  /*
2269   * If 0 is returned the group's domain is new_domain. If an error is returned
2270   * then the group's domain will be set back to the existing domain unless
2271   * IOMMU_SET_DOMAIN_MUST_SUCCEED, otherwise an error is returned and the group's
2272   * domains is left inconsistent. This is a driver bug to fail attach with a
2273   * previously good domain. We try to avoid a kernel UAF because of this.
2274   *
2275   * IOMMU groups are really the natural working unit of the IOMMU, but the IOMMU
2276   * API works on domains and devices.  Bridge that gap by iterating over the
2277   * devices in a group.  Ideally we'd have a single device which represents the
2278   * requestor ID of the group, but we also allow IOMMU drivers to create policy
2279   * defined minimum sets, where the physical hardware may be able to distiguish
2280   * members, but we wish to group them at a higher level (ex. untrusted
2281   * multi-function PCI devices).  Thus we attach each device.
2282   */
__iommu_group_set_domain_internal(struct iommu_group * group,struct iommu_domain * new_domain,unsigned int flags)2283  static int __iommu_group_set_domain_internal(struct iommu_group *group,
2284  					     struct iommu_domain *new_domain,
2285  					     unsigned int flags)
2286  {
2287  	struct group_device *last_gdev;
2288  	struct group_device *gdev;
2289  	int result;
2290  	int ret;
2291  
2292  	lockdep_assert_held(&group->mutex);
2293  
2294  	if (group->domain == new_domain)
2295  		return 0;
2296  
2297  	if (WARN_ON(!new_domain))
2298  		return -EINVAL;
2299  
2300  	/*
2301  	 * Changing the domain is done by calling attach_dev() on the new
2302  	 * domain. This switch does not have to be atomic and DMA can be
2303  	 * discarded during the transition. DMA must only be able to access
2304  	 * either new_domain or group->domain, never something else.
2305  	 */
2306  	result = 0;
2307  	for_each_group_device(group, gdev) {
2308  		ret = __iommu_device_set_domain(group, gdev->dev, new_domain,
2309  						flags);
2310  		if (ret) {
2311  			result = ret;
2312  			/*
2313  			 * Keep trying the other devices in the group. If a
2314  			 * driver fails attach to an otherwise good domain, and
2315  			 * does not support blocking domains, it should at least
2316  			 * drop its reference on the current domain so we don't
2317  			 * UAF.
2318  			 */
2319  			if (flags & IOMMU_SET_DOMAIN_MUST_SUCCEED)
2320  				continue;
2321  			goto err_revert;
2322  		}
2323  	}
2324  	group->domain = new_domain;
2325  	return result;
2326  
2327  err_revert:
2328  	/*
2329  	 * This is called in error unwind paths. A well behaved driver should
2330  	 * always allow us to attach to a domain that was already attached.
2331  	 */
2332  	last_gdev = gdev;
2333  	for_each_group_device(group, gdev) {
2334  		/*
2335  		 * A NULL domain can happen only for first probe, in which case
2336  		 * we leave group->domain as NULL and let release clean
2337  		 * everything up.
2338  		 */
2339  		if (group->domain)
2340  			WARN_ON(__iommu_device_set_domain(
2341  				group, gdev->dev, group->domain,
2342  				IOMMU_SET_DOMAIN_MUST_SUCCEED));
2343  		if (gdev == last_gdev)
2344  			break;
2345  	}
2346  	return ret;
2347  }
2348  
iommu_detach_group(struct iommu_domain * domain,struct iommu_group * group)2349  void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group)
2350  {
2351  	mutex_lock(&group->mutex);
2352  	__iommu_group_set_core_domain(group);
2353  	mutex_unlock(&group->mutex);
2354  }
2355  EXPORT_SYMBOL_GPL(iommu_detach_group);
2356  
iommu_iova_to_phys(struct iommu_domain * domain,dma_addr_t iova)2357  phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2358  {
2359  	if (domain->type == IOMMU_DOMAIN_IDENTITY)
2360  		return iova;
2361  
2362  	if (domain->type == IOMMU_DOMAIN_BLOCKED)
2363  		return 0;
2364  
2365  	return domain->ops->iova_to_phys(domain, iova);
2366  }
2367  EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
2368  
iommu_pgsize(struct iommu_domain * domain,unsigned long iova,phys_addr_t paddr,size_t size,size_t * count)2369  static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova,
2370  			   phys_addr_t paddr, size_t size, size_t *count)
2371  {
2372  	unsigned int pgsize_idx, pgsize_idx_next;
2373  	unsigned long pgsizes;
2374  	size_t offset, pgsize, pgsize_next;
2375  	unsigned long addr_merge = paddr | iova;
2376  
2377  	/* Page sizes supported by the hardware and small enough for @size */
2378  	pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0);
2379  
2380  	/* Constrain the page sizes further based on the maximum alignment */
2381  	if (likely(addr_merge))
2382  		pgsizes &= GENMASK(__ffs(addr_merge), 0);
2383  
2384  	/* Make sure we have at least one suitable page size */
2385  	BUG_ON(!pgsizes);
2386  
2387  	/* Pick the biggest page size remaining */
2388  	pgsize_idx = __fls(pgsizes);
2389  	pgsize = BIT(pgsize_idx);
2390  	if (!count)
2391  		return pgsize;
2392  
2393  	/* Find the next biggest support page size, if it exists */
2394  	pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0);
2395  	if (!pgsizes)
2396  		goto out_set_count;
2397  
2398  	pgsize_idx_next = __ffs(pgsizes);
2399  	pgsize_next = BIT(pgsize_idx_next);
2400  
2401  	/*
2402  	 * There's no point trying a bigger page size unless the virtual
2403  	 * and physical addresses are similarly offset within the larger page.
2404  	 */
2405  	if ((iova ^ paddr) & (pgsize_next - 1))
2406  		goto out_set_count;
2407  
2408  	/* Calculate the offset to the next page size alignment boundary */
2409  	offset = pgsize_next - (addr_merge & (pgsize_next - 1));
2410  
2411  	/*
2412  	 * If size is big enough to accommodate the larger page, reduce
2413  	 * the number of smaller pages.
2414  	 */
2415  	if (offset + pgsize_next <= size)
2416  		size = offset;
2417  
2418  out_set_count:
2419  	*count = size >> pgsize_idx;
2420  	return pgsize;
2421  }
2422  
__iommu_map(struct iommu_domain * domain,unsigned long iova,phys_addr_t paddr,size_t size,int prot,gfp_t gfp)2423  static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
2424  		       phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2425  {
2426  	const struct iommu_domain_ops *ops = domain->ops;
2427  	unsigned long orig_iova = iova;
2428  	unsigned int min_pagesz;
2429  	size_t orig_size = size;
2430  	phys_addr_t orig_paddr = paddr;
2431  	int ret = 0;
2432  
2433  	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
2434  		return -EINVAL;
2435  
2436  	if (WARN_ON(!ops->map_pages || domain->pgsize_bitmap == 0UL))
2437  		return -ENODEV;
2438  
2439  	/* find out the minimum page size supported */
2440  	min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
2441  
2442  	/*
2443  	 * both the virtual address and the physical one, as well as
2444  	 * the size of the mapping, must be aligned (at least) to the
2445  	 * size of the smallest page supported by the hardware
2446  	 */
2447  	if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
2448  		pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n",
2449  		       iova, &paddr, size, min_pagesz);
2450  		return -EINVAL;
2451  	}
2452  
2453  	pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
2454  
2455  	while (size) {
2456  		size_t pgsize, count, mapped = 0;
2457  
2458  		pgsize = iommu_pgsize(domain, iova, paddr, size, &count);
2459  
2460  		pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n",
2461  			 iova, &paddr, pgsize, count);
2462  		ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot,
2463  				     gfp, &mapped);
2464  		/*
2465  		 * Some pages may have been mapped, even if an error occurred,
2466  		 * so we should account for those so they can be unmapped.
2467  		 */
2468  		size -= mapped;
2469  
2470  		if (ret)
2471  			break;
2472  
2473  		iova += mapped;
2474  		paddr += mapped;
2475  	}
2476  
2477  	/* unroll mapping in case something went wrong */
2478  	if (ret)
2479  		iommu_unmap(domain, orig_iova, orig_size - size);
2480  	else
2481  		trace_map(orig_iova, orig_paddr, orig_size);
2482  
2483  	return ret;
2484  }
2485  
iommu_map(struct iommu_domain * domain,unsigned long iova,phys_addr_t paddr,size_t size,int prot,gfp_t gfp)2486  int iommu_map(struct iommu_domain *domain, unsigned long iova,
2487  	      phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2488  {
2489  	const struct iommu_domain_ops *ops = domain->ops;
2490  	int ret;
2491  
2492  	might_sleep_if(gfpflags_allow_blocking(gfp));
2493  
2494  	/* Discourage passing strange GFP flags */
2495  	if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 |
2496  				__GFP_HIGHMEM)))
2497  		return -EINVAL;
2498  
2499  	ret = __iommu_map(domain, iova, paddr, size, prot, gfp);
2500  	if (ret == 0 && ops->iotlb_sync_map) {
2501  		ret = ops->iotlb_sync_map(domain, iova, size);
2502  		if (ret)
2503  			goto out_err;
2504  	}
2505  
2506  	return ret;
2507  
2508  out_err:
2509  	/* undo mappings already done */
2510  	iommu_unmap(domain, iova, size);
2511  
2512  	return ret;
2513  }
2514  EXPORT_SYMBOL_GPL(iommu_map);
2515  
__iommu_unmap(struct iommu_domain * domain,unsigned long iova,size_t size,struct iommu_iotlb_gather * iotlb_gather)2516  static size_t __iommu_unmap(struct iommu_domain *domain,
2517  			    unsigned long iova, size_t size,
2518  			    struct iommu_iotlb_gather *iotlb_gather)
2519  {
2520  	const struct iommu_domain_ops *ops = domain->ops;
2521  	size_t unmapped_page, unmapped = 0;
2522  	unsigned long orig_iova = iova;
2523  	unsigned int min_pagesz;
2524  
2525  	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
2526  		return 0;
2527  
2528  	if (WARN_ON(!ops->unmap_pages || domain->pgsize_bitmap == 0UL))
2529  		return 0;
2530  
2531  	/* find out the minimum page size supported */
2532  	min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
2533  
2534  	/*
2535  	 * The virtual address, as well as the size of the mapping, must be
2536  	 * aligned (at least) to the size of the smallest page supported
2537  	 * by the hardware
2538  	 */
2539  	if (!IS_ALIGNED(iova | size, min_pagesz)) {
2540  		pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n",
2541  		       iova, size, min_pagesz);
2542  		return 0;
2543  	}
2544  
2545  	pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size);
2546  
2547  	/*
2548  	 * Keep iterating until we either unmap 'size' bytes (or more)
2549  	 * or we hit an area that isn't mapped.
2550  	 */
2551  	while (unmapped < size) {
2552  		size_t pgsize, count;
2553  
2554  		pgsize = iommu_pgsize(domain, iova, iova, size - unmapped, &count);
2555  		unmapped_page = ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather);
2556  		if (!unmapped_page)
2557  			break;
2558  
2559  		pr_debug("unmapped: iova 0x%lx size 0x%zx\n",
2560  			 iova, unmapped_page);
2561  
2562  		iova += unmapped_page;
2563  		unmapped += unmapped_page;
2564  	}
2565  
2566  	trace_unmap(orig_iova, size, unmapped);
2567  	return unmapped;
2568  }
2569  
2570  /**
2571   * iommu_unmap() - Remove mappings from a range of IOVA
2572   * @domain: Domain to manipulate
2573   * @iova: IO virtual address to start
2574   * @size: Length of the range starting from @iova
2575   *
2576   * iommu_unmap() will remove a translation created by iommu_map(). It cannot
2577   * subdivide a mapping created by iommu_map(), so it should be called with IOVA
2578   * ranges that match what was passed to iommu_map(). The range can aggregate
2579   * contiguous iommu_map() calls so long as no individual range is split.
2580   *
2581   * Returns: Number of bytes of IOVA unmapped. iova + res will be the point
2582   * unmapping stopped.
2583   */
iommu_unmap(struct iommu_domain * domain,unsigned long iova,size_t size)2584  size_t iommu_unmap(struct iommu_domain *domain,
2585  		   unsigned long iova, size_t size)
2586  {
2587  	struct iommu_iotlb_gather iotlb_gather;
2588  	size_t ret;
2589  
2590  	iommu_iotlb_gather_init(&iotlb_gather);
2591  	ret = __iommu_unmap(domain, iova, size, &iotlb_gather);
2592  	iommu_iotlb_sync(domain, &iotlb_gather);
2593  
2594  	return ret;
2595  }
2596  EXPORT_SYMBOL_GPL(iommu_unmap);
2597  
iommu_unmap_fast(struct iommu_domain * domain,unsigned long iova,size_t size,struct iommu_iotlb_gather * iotlb_gather)2598  size_t iommu_unmap_fast(struct iommu_domain *domain,
2599  			unsigned long iova, size_t size,
2600  			struct iommu_iotlb_gather *iotlb_gather)
2601  {
2602  	return __iommu_unmap(domain, iova, size, iotlb_gather);
2603  }
2604  EXPORT_SYMBOL_GPL(iommu_unmap_fast);
2605  
iommu_map_sg(struct iommu_domain * domain,unsigned long iova,struct scatterlist * sg,unsigned int nents,int prot,gfp_t gfp)2606  ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
2607  		     struct scatterlist *sg, unsigned int nents, int prot,
2608  		     gfp_t gfp)
2609  {
2610  	const struct iommu_domain_ops *ops = domain->ops;
2611  	size_t len = 0, mapped = 0;
2612  	phys_addr_t start;
2613  	unsigned int i = 0;
2614  	int ret;
2615  
2616  	might_sleep_if(gfpflags_allow_blocking(gfp));
2617  
2618  	/* Discourage passing strange GFP flags */
2619  	if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 |
2620  				__GFP_HIGHMEM)))
2621  		return -EINVAL;
2622  
2623  	while (i <= nents) {
2624  		phys_addr_t s_phys = sg_phys(sg);
2625  
2626  		if (len && s_phys != start + len) {
2627  			ret = __iommu_map(domain, iova + mapped, start,
2628  					len, prot, gfp);
2629  
2630  			if (ret)
2631  				goto out_err;
2632  
2633  			mapped += len;
2634  			len = 0;
2635  		}
2636  
2637  		if (sg_dma_is_bus_address(sg))
2638  			goto next;
2639  
2640  		if (len) {
2641  			len += sg->length;
2642  		} else {
2643  			len = sg->length;
2644  			start = s_phys;
2645  		}
2646  
2647  next:
2648  		if (++i < nents)
2649  			sg = sg_next(sg);
2650  	}
2651  
2652  	if (ops->iotlb_sync_map) {
2653  		ret = ops->iotlb_sync_map(domain, iova, mapped);
2654  		if (ret)
2655  			goto out_err;
2656  	}
2657  	return mapped;
2658  
2659  out_err:
2660  	/* undo mappings already done */
2661  	iommu_unmap(domain, iova, mapped);
2662  
2663  	return ret;
2664  }
2665  EXPORT_SYMBOL_GPL(iommu_map_sg);
2666  
2667  /**
2668   * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework
2669   * @domain: the iommu domain where the fault has happened
2670   * @dev: the device where the fault has happened
2671   * @iova: the faulting address
2672   * @flags: mmu fault flags (e.g. IOMMU_FAULT_READ/IOMMU_FAULT_WRITE/...)
2673   *
2674   * This function should be called by the low-level IOMMU implementations
2675   * whenever IOMMU faults happen, to allow high-level users, that are
2676   * interested in such events, to know about them.
2677   *
2678   * This event may be useful for several possible use cases:
2679   * - mere logging of the event
2680   * - dynamic TLB/PTE loading
2681   * - if restarting of the faulting device is required
2682   *
2683   * Returns 0 on success and an appropriate error code otherwise (if dynamic
2684   * PTE/TLB loading will one day be supported, implementations will be able
2685   * to tell whether it succeeded or not according to this return value).
2686   *
2687   * Specifically, -ENOSYS is returned if a fault handler isn't installed
2688   * (though fault handlers can also return -ENOSYS, in case they want to
2689   * elicit the default behavior of the IOMMU drivers).
2690   */
report_iommu_fault(struct iommu_domain * domain,struct device * dev,unsigned long iova,int flags)2691  int report_iommu_fault(struct iommu_domain *domain, struct device *dev,
2692  		       unsigned long iova, int flags)
2693  {
2694  	int ret = -ENOSYS;
2695  
2696  	/*
2697  	 * if upper layers showed interest and installed a fault handler,
2698  	 * invoke it.
2699  	 */
2700  	if (domain->handler)
2701  		ret = domain->handler(domain, dev, iova, flags,
2702  						domain->handler_token);
2703  
2704  	trace_io_page_fault(dev, iova, flags);
2705  	return ret;
2706  }
2707  EXPORT_SYMBOL_GPL(report_iommu_fault);
2708  
iommu_init(void)2709  static int __init iommu_init(void)
2710  {
2711  	iommu_group_kset = kset_create_and_add("iommu_groups",
2712  					       NULL, kernel_kobj);
2713  	BUG_ON(!iommu_group_kset);
2714  
2715  	iommu_debugfs_setup();
2716  
2717  	return 0;
2718  }
2719  core_initcall(iommu_init);
2720  
iommu_set_pgtable_quirks(struct iommu_domain * domain,unsigned long quirk)2721  int iommu_set_pgtable_quirks(struct iommu_domain *domain,
2722  		unsigned long quirk)
2723  {
2724  	if (domain->type != IOMMU_DOMAIN_UNMANAGED)
2725  		return -EINVAL;
2726  	if (!domain->ops->set_pgtable_quirks)
2727  		return -EINVAL;
2728  	return domain->ops->set_pgtable_quirks(domain, quirk);
2729  }
2730  EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks);
2731  
2732  /**
2733   * iommu_get_resv_regions - get reserved regions
2734   * @dev: device for which to get reserved regions
2735   * @list: reserved region list for device
2736   *
2737   * This returns a list of reserved IOVA regions specific to this device.
2738   * A domain user should not map IOVA in these ranges.
2739   */
iommu_get_resv_regions(struct device * dev,struct list_head * list)2740  void iommu_get_resv_regions(struct device *dev, struct list_head *list)
2741  {
2742  	const struct iommu_ops *ops = dev_iommu_ops(dev);
2743  
2744  	if (ops->get_resv_regions)
2745  		ops->get_resv_regions(dev, list);
2746  }
2747  EXPORT_SYMBOL_GPL(iommu_get_resv_regions);
2748  
2749  /**
2750   * iommu_put_resv_regions - release reserved regions
2751   * @dev: device for which to free reserved regions
2752   * @list: reserved region list for device
2753   *
2754   * This releases a reserved region list acquired by iommu_get_resv_regions().
2755   */
iommu_put_resv_regions(struct device * dev,struct list_head * list)2756  void iommu_put_resv_regions(struct device *dev, struct list_head *list)
2757  {
2758  	struct iommu_resv_region *entry, *next;
2759  
2760  	list_for_each_entry_safe(entry, next, list, list) {
2761  		if (entry->free)
2762  			entry->free(dev, entry);
2763  		else
2764  			kfree(entry);
2765  	}
2766  }
2767  EXPORT_SYMBOL(iommu_put_resv_regions);
2768  
iommu_alloc_resv_region(phys_addr_t start,size_t length,int prot,enum iommu_resv_type type,gfp_t gfp)2769  struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start,
2770  						  size_t length, int prot,
2771  						  enum iommu_resv_type type,
2772  						  gfp_t gfp)
2773  {
2774  	struct iommu_resv_region *region;
2775  
2776  	region = kzalloc(sizeof(*region), gfp);
2777  	if (!region)
2778  		return NULL;
2779  
2780  	INIT_LIST_HEAD(&region->list);
2781  	region->start = start;
2782  	region->length = length;
2783  	region->prot = prot;
2784  	region->type = type;
2785  	return region;
2786  }
2787  EXPORT_SYMBOL_GPL(iommu_alloc_resv_region);
2788  
iommu_set_default_passthrough(bool cmd_line)2789  void iommu_set_default_passthrough(bool cmd_line)
2790  {
2791  	if (cmd_line)
2792  		iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API;
2793  	iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY;
2794  }
2795  
iommu_set_default_translated(bool cmd_line)2796  void iommu_set_default_translated(bool cmd_line)
2797  {
2798  	if (cmd_line)
2799  		iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API;
2800  	iommu_def_domain_type = IOMMU_DOMAIN_DMA;
2801  }
2802  
iommu_default_passthrough(void)2803  bool iommu_default_passthrough(void)
2804  {
2805  	return iommu_def_domain_type == IOMMU_DOMAIN_IDENTITY;
2806  }
2807  EXPORT_SYMBOL_GPL(iommu_default_passthrough);
2808  
iommu_ops_from_fwnode(const struct fwnode_handle * fwnode)2809  const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode)
2810  {
2811  	const struct iommu_ops *ops = NULL;
2812  	struct iommu_device *iommu;
2813  
2814  	spin_lock(&iommu_device_lock);
2815  	list_for_each_entry(iommu, &iommu_device_list, list)
2816  		if (iommu->fwnode == fwnode) {
2817  			ops = iommu->ops;
2818  			break;
2819  		}
2820  	spin_unlock(&iommu_device_lock);
2821  	return ops;
2822  }
2823  
iommu_fwspec_init(struct device * dev,struct fwnode_handle * iommu_fwnode)2824  int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode)
2825  {
2826  	const struct iommu_ops *ops = iommu_ops_from_fwnode(iommu_fwnode);
2827  	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2828  
2829  	if (!ops)
2830  		return driver_deferred_probe_check_state(dev);
2831  
2832  	if (fwspec)
2833  		return ops == iommu_fwspec_ops(fwspec) ? 0 : -EINVAL;
2834  
2835  	if (!dev_iommu_get(dev))
2836  		return -ENOMEM;
2837  
2838  	/* Preallocate for the overwhelmingly common case of 1 ID */
2839  	fwspec = kzalloc(struct_size(fwspec, ids, 1), GFP_KERNEL);
2840  	if (!fwspec)
2841  		return -ENOMEM;
2842  
2843  	fwnode_handle_get(iommu_fwnode);
2844  	fwspec->iommu_fwnode = iommu_fwnode;
2845  	dev_iommu_fwspec_set(dev, fwspec);
2846  	return 0;
2847  }
2848  EXPORT_SYMBOL_GPL(iommu_fwspec_init);
2849  
iommu_fwspec_free(struct device * dev)2850  void iommu_fwspec_free(struct device *dev)
2851  {
2852  	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2853  
2854  	if (fwspec) {
2855  		fwnode_handle_put(fwspec->iommu_fwnode);
2856  		kfree(fwspec);
2857  		dev_iommu_fwspec_set(dev, NULL);
2858  	}
2859  }
2860  
iommu_fwspec_add_ids(struct device * dev,const u32 * ids,int num_ids)2861  int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids)
2862  {
2863  	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2864  	int i, new_num;
2865  
2866  	if (!fwspec)
2867  		return -EINVAL;
2868  
2869  	new_num = fwspec->num_ids + num_ids;
2870  	if (new_num > 1) {
2871  		fwspec = krealloc(fwspec, struct_size(fwspec, ids, new_num),
2872  				  GFP_KERNEL);
2873  		if (!fwspec)
2874  			return -ENOMEM;
2875  
2876  		dev_iommu_fwspec_set(dev, fwspec);
2877  	}
2878  
2879  	for (i = 0; i < num_ids; i++)
2880  		fwspec->ids[fwspec->num_ids + i] = ids[i];
2881  
2882  	fwspec->num_ids = new_num;
2883  	return 0;
2884  }
2885  EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids);
2886  
2887  /*
2888   * Per device IOMMU features.
2889   */
iommu_dev_enable_feature(struct device * dev,enum iommu_dev_features feat)2890  int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat)
2891  {
2892  	if (dev_has_iommu(dev)) {
2893  		const struct iommu_ops *ops = dev_iommu_ops(dev);
2894  
2895  		if (ops->dev_enable_feat)
2896  			return ops->dev_enable_feat(dev, feat);
2897  	}
2898  
2899  	return -ENODEV;
2900  }
2901  EXPORT_SYMBOL_GPL(iommu_dev_enable_feature);
2902  
2903  /*
2904   * The device drivers should do the necessary cleanups before calling this.
2905   */
iommu_dev_disable_feature(struct device * dev,enum iommu_dev_features feat)2906  int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat)
2907  {
2908  	if (dev_has_iommu(dev)) {
2909  		const struct iommu_ops *ops = dev_iommu_ops(dev);
2910  
2911  		if (ops->dev_disable_feat)
2912  			return ops->dev_disable_feat(dev, feat);
2913  	}
2914  
2915  	return -EBUSY;
2916  }
2917  EXPORT_SYMBOL_GPL(iommu_dev_disable_feature);
2918  
2919  /**
2920   * iommu_setup_default_domain - Set the default_domain for the group
2921   * @group: Group to change
2922   * @target_type: Domain type to set as the default_domain
2923   *
2924   * Allocate a default domain and set it as the current domain on the group. If
2925   * the group already has a default domain it will be changed to the target_type.
2926   * When target_type is 0 the default domain is selected based on driver and
2927   * system preferences.
2928   */
iommu_setup_default_domain(struct iommu_group * group,int target_type)2929  static int iommu_setup_default_domain(struct iommu_group *group,
2930  				      int target_type)
2931  {
2932  	struct iommu_domain *old_dom = group->default_domain;
2933  	struct group_device *gdev;
2934  	struct iommu_domain *dom;
2935  	bool direct_failed;
2936  	int req_type;
2937  	int ret;
2938  
2939  	lockdep_assert_held(&group->mutex);
2940  
2941  	req_type = iommu_get_default_domain_type(group, target_type);
2942  	if (req_type < 0)
2943  		return -EINVAL;
2944  
2945  	dom = iommu_group_alloc_default_domain(group, req_type);
2946  	if (IS_ERR(dom))
2947  		return PTR_ERR(dom);
2948  
2949  	if (group->default_domain == dom)
2950  		return 0;
2951  
2952  	if (iommu_is_dma_domain(dom)) {
2953  		ret = iommu_get_dma_cookie(dom);
2954  		if (ret) {
2955  			iommu_domain_free(dom);
2956  			return ret;
2957  		}
2958  	}
2959  
2960  	/*
2961  	 * IOMMU_RESV_DIRECT and IOMMU_RESV_DIRECT_RELAXABLE regions must be
2962  	 * mapped before their device is attached, in order to guarantee
2963  	 * continuity with any FW activity
2964  	 */
2965  	direct_failed = false;
2966  	for_each_group_device(group, gdev) {
2967  		if (iommu_create_device_direct_mappings(dom, gdev->dev)) {
2968  			direct_failed = true;
2969  			dev_warn_once(
2970  				gdev->dev->iommu->iommu_dev->dev,
2971  				"IOMMU driver was not able to establish FW requested direct mapping.");
2972  		}
2973  	}
2974  
2975  	/* We must set default_domain early for __iommu_device_set_domain */
2976  	group->default_domain = dom;
2977  	if (!group->domain) {
2978  		/*
2979  		 * Drivers are not allowed to fail the first domain attach.
2980  		 * The only way to recover from this is to fail attaching the
2981  		 * iommu driver and call ops->release_device. Put the domain
2982  		 * in group->default_domain so it is freed after.
2983  		 */
2984  		ret = __iommu_group_set_domain_internal(
2985  			group, dom, IOMMU_SET_DOMAIN_MUST_SUCCEED);
2986  		if (WARN_ON(ret))
2987  			goto out_free_old;
2988  	} else {
2989  		ret = __iommu_group_set_domain(group, dom);
2990  		if (ret)
2991  			goto err_restore_def_domain;
2992  	}
2993  
2994  	/*
2995  	 * Drivers are supposed to allow mappings to be installed in a domain
2996  	 * before device attachment, but some don't. Hack around this defect by
2997  	 * trying again after attaching. If this happens it means the device
2998  	 * will not continuously have the IOMMU_RESV_DIRECT map.
2999  	 */
3000  	if (direct_failed) {
3001  		for_each_group_device(group, gdev) {
3002  			ret = iommu_create_device_direct_mappings(dom, gdev->dev);
3003  			if (ret)
3004  				goto err_restore_domain;
3005  		}
3006  	}
3007  
3008  out_free_old:
3009  	if (old_dom)
3010  		iommu_domain_free(old_dom);
3011  	return ret;
3012  
3013  err_restore_domain:
3014  	if (old_dom)
3015  		__iommu_group_set_domain_internal(
3016  			group, old_dom, IOMMU_SET_DOMAIN_MUST_SUCCEED);
3017  err_restore_def_domain:
3018  	if (old_dom) {
3019  		iommu_domain_free(dom);
3020  		group->default_domain = old_dom;
3021  	}
3022  	return ret;
3023  }
3024  
3025  /*
3026   * Changing the default domain through sysfs requires the users to unbind the
3027   * drivers from the devices in the iommu group, except for a DMA -> DMA-FQ
3028   * transition. Return failure if this isn't met.
3029   *
3030   * We need to consider the race between this and the device release path.
3031   * group->mutex is used here to guarantee that the device release path
3032   * will not be entered at the same time.
3033   */
iommu_group_store_type(struct iommu_group * group,const char * buf,size_t count)3034  static ssize_t iommu_group_store_type(struct iommu_group *group,
3035  				      const char *buf, size_t count)
3036  {
3037  	struct group_device *gdev;
3038  	int ret, req_type;
3039  
3040  	if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
3041  		return -EACCES;
3042  
3043  	if (WARN_ON(!group) || !group->default_domain)
3044  		return -EINVAL;
3045  
3046  	if (sysfs_streq(buf, "identity"))
3047  		req_type = IOMMU_DOMAIN_IDENTITY;
3048  	else if (sysfs_streq(buf, "DMA"))
3049  		req_type = IOMMU_DOMAIN_DMA;
3050  	else if (sysfs_streq(buf, "DMA-FQ"))
3051  		req_type = IOMMU_DOMAIN_DMA_FQ;
3052  	else if (sysfs_streq(buf, "auto"))
3053  		req_type = 0;
3054  	else
3055  		return -EINVAL;
3056  
3057  	mutex_lock(&group->mutex);
3058  	/* We can bring up a flush queue without tearing down the domain. */
3059  	if (req_type == IOMMU_DOMAIN_DMA_FQ &&
3060  	    group->default_domain->type == IOMMU_DOMAIN_DMA) {
3061  		ret = iommu_dma_init_fq(group->default_domain);
3062  		if (ret)
3063  			goto out_unlock;
3064  
3065  		group->default_domain->type = IOMMU_DOMAIN_DMA_FQ;
3066  		ret = count;
3067  		goto out_unlock;
3068  	}
3069  
3070  	/* Otherwise, ensure that device exists and no driver is bound. */
3071  	if (list_empty(&group->devices) || group->owner_cnt) {
3072  		ret = -EPERM;
3073  		goto out_unlock;
3074  	}
3075  
3076  	ret = iommu_setup_default_domain(group, req_type);
3077  	if (ret)
3078  		goto out_unlock;
3079  
3080  	/* Make sure dma_ops is appropriatley set */
3081  	for_each_group_device(group, gdev)
3082  		iommu_setup_dma_ops(gdev->dev);
3083  
3084  out_unlock:
3085  	mutex_unlock(&group->mutex);
3086  	return ret ?: count;
3087  }
3088  
3089  /**
3090   * iommu_device_use_default_domain() - Device driver wants to handle device
3091   *                                     DMA through the kernel DMA API.
3092   * @dev: The device.
3093   *
3094   * The device driver about to bind @dev wants to do DMA through the kernel
3095   * DMA API. Return 0 if it is allowed, otherwise an error.
3096   */
iommu_device_use_default_domain(struct device * dev)3097  int iommu_device_use_default_domain(struct device *dev)
3098  {
3099  	/* Caller is the driver core during the pre-probe path */
3100  	struct iommu_group *group = dev->iommu_group;
3101  	int ret = 0;
3102  
3103  	if (!group)
3104  		return 0;
3105  
3106  	mutex_lock(&group->mutex);
3107  	/* We may race against bus_iommu_probe() finalising groups here */
3108  	if (!group->default_domain) {
3109  		ret = -EPROBE_DEFER;
3110  		goto unlock_out;
3111  	}
3112  	if (group->owner_cnt) {
3113  		if (group->domain != group->default_domain || group->owner ||
3114  		    !xa_empty(&group->pasid_array)) {
3115  			ret = -EBUSY;
3116  			goto unlock_out;
3117  		}
3118  	}
3119  
3120  	group->owner_cnt++;
3121  
3122  unlock_out:
3123  	mutex_unlock(&group->mutex);
3124  	return ret;
3125  }
3126  
3127  /**
3128   * iommu_device_unuse_default_domain() - Device driver stops handling device
3129   *                                       DMA through the kernel DMA API.
3130   * @dev: The device.
3131   *
3132   * The device driver doesn't want to do DMA through kernel DMA API anymore.
3133   * It must be called after iommu_device_use_default_domain().
3134   */
iommu_device_unuse_default_domain(struct device * dev)3135  void iommu_device_unuse_default_domain(struct device *dev)
3136  {
3137  	/* Caller is the driver core during the post-probe path */
3138  	struct iommu_group *group = dev->iommu_group;
3139  
3140  	if (!group)
3141  		return;
3142  
3143  	mutex_lock(&group->mutex);
3144  	if (!WARN_ON(!group->owner_cnt || !xa_empty(&group->pasid_array)))
3145  		group->owner_cnt--;
3146  
3147  	mutex_unlock(&group->mutex);
3148  }
3149  
__iommu_group_alloc_blocking_domain(struct iommu_group * group)3150  static int __iommu_group_alloc_blocking_domain(struct iommu_group *group)
3151  {
3152  	struct device *dev = iommu_group_first_dev(group);
3153  	const struct iommu_ops *ops = dev_iommu_ops(dev);
3154  	struct iommu_domain *domain;
3155  
3156  	if (group->blocking_domain)
3157  		return 0;
3158  
3159  	if (ops->blocked_domain) {
3160  		group->blocking_domain = ops->blocked_domain;
3161  		return 0;
3162  	}
3163  
3164  	/*
3165  	 * For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED create an
3166  	 * empty PAGING domain instead.
3167  	 */
3168  	domain = iommu_paging_domain_alloc(dev);
3169  	if (IS_ERR(domain))
3170  		return PTR_ERR(domain);
3171  	group->blocking_domain = domain;
3172  	return 0;
3173  }
3174  
__iommu_take_dma_ownership(struct iommu_group * group,void * owner)3175  static int __iommu_take_dma_ownership(struct iommu_group *group, void *owner)
3176  {
3177  	int ret;
3178  
3179  	if ((group->domain && group->domain != group->default_domain) ||
3180  	    !xa_empty(&group->pasid_array))
3181  		return -EBUSY;
3182  
3183  	ret = __iommu_group_alloc_blocking_domain(group);
3184  	if (ret)
3185  		return ret;
3186  	ret = __iommu_group_set_domain(group, group->blocking_domain);
3187  	if (ret)
3188  		return ret;
3189  
3190  	group->owner = owner;
3191  	group->owner_cnt++;
3192  	return 0;
3193  }
3194  
3195  /**
3196   * iommu_group_claim_dma_owner() - Set DMA ownership of a group
3197   * @group: The group.
3198   * @owner: Caller specified pointer. Used for exclusive ownership.
3199   *
3200   * This is to support backward compatibility for vfio which manages the dma
3201   * ownership in iommu_group level. New invocations on this interface should be
3202   * prohibited. Only a single owner may exist for a group.
3203   */
iommu_group_claim_dma_owner(struct iommu_group * group,void * owner)3204  int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner)
3205  {
3206  	int ret = 0;
3207  
3208  	if (WARN_ON(!owner))
3209  		return -EINVAL;
3210  
3211  	mutex_lock(&group->mutex);
3212  	if (group->owner_cnt) {
3213  		ret = -EPERM;
3214  		goto unlock_out;
3215  	}
3216  
3217  	ret = __iommu_take_dma_ownership(group, owner);
3218  unlock_out:
3219  	mutex_unlock(&group->mutex);
3220  
3221  	return ret;
3222  }
3223  EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner);
3224  
3225  /**
3226   * iommu_device_claim_dma_owner() - Set DMA ownership of a device
3227   * @dev: The device.
3228   * @owner: Caller specified pointer. Used for exclusive ownership.
3229   *
3230   * Claim the DMA ownership of a device. Multiple devices in the same group may
3231   * concurrently claim ownership if they present the same owner value. Returns 0
3232   * on success and error code on failure
3233   */
iommu_device_claim_dma_owner(struct device * dev,void * owner)3234  int iommu_device_claim_dma_owner(struct device *dev, void *owner)
3235  {
3236  	/* Caller must be a probed driver on dev */
3237  	struct iommu_group *group = dev->iommu_group;
3238  	int ret = 0;
3239  
3240  	if (WARN_ON(!owner))
3241  		return -EINVAL;
3242  
3243  	if (!group)
3244  		return -ENODEV;
3245  
3246  	mutex_lock(&group->mutex);
3247  	if (group->owner_cnt) {
3248  		if (group->owner != owner) {
3249  			ret = -EPERM;
3250  			goto unlock_out;
3251  		}
3252  		group->owner_cnt++;
3253  		goto unlock_out;
3254  	}
3255  
3256  	ret = __iommu_take_dma_ownership(group, owner);
3257  unlock_out:
3258  	mutex_unlock(&group->mutex);
3259  	return ret;
3260  }
3261  EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner);
3262  
__iommu_release_dma_ownership(struct iommu_group * group)3263  static void __iommu_release_dma_ownership(struct iommu_group *group)
3264  {
3265  	if (WARN_ON(!group->owner_cnt || !group->owner ||
3266  		    !xa_empty(&group->pasid_array)))
3267  		return;
3268  
3269  	group->owner_cnt = 0;
3270  	group->owner = NULL;
3271  	__iommu_group_set_domain_nofail(group, group->default_domain);
3272  }
3273  
3274  /**
3275   * iommu_group_release_dma_owner() - Release DMA ownership of a group
3276   * @group: The group
3277   *
3278   * Release the DMA ownership claimed by iommu_group_claim_dma_owner().
3279   */
iommu_group_release_dma_owner(struct iommu_group * group)3280  void iommu_group_release_dma_owner(struct iommu_group *group)
3281  {
3282  	mutex_lock(&group->mutex);
3283  	__iommu_release_dma_ownership(group);
3284  	mutex_unlock(&group->mutex);
3285  }
3286  EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner);
3287  
3288  /**
3289   * iommu_device_release_dma_owner() - Release DMA ownership of a device
3290   * @dev: The device.
3291   *
3292   * Release the DMA ownership claimed by iommu_device_claim_dma_owner().
3293   */
iommu_device_release_dma_owner(struct device * dev)3294  void iommu_device_release_dma_owner(struct device *dev)
3295  {
3296  	/* Caller must be a probed driver on dev */
3297  	struct iommu_group *group = dev->iommu_group;
3298  
3299  	mutex_lock(&group->mutex);
3300  	if (group->owner_cnt > 1)
3301  		group->owner_cnt--;
3302  	else
3303  		__iommu_release_dma_ownership(group);
3304  	mutex_unlock(&group->mutex);
3305  }
3306  EXPORT_SYMBOL_GPL(iommu_device_release_dma_owner);
3307  
3308  /**
3309   * iommu_group_dma_owner_claimed() - Query group dma ownership status
3310   * @group: The group.
3311   *
3312   * This provides status query on a given group. It is racy and only for
3313   * non-binding status reporting.
3314   */
iommu_group_dma_owner_claimed(struct iommu_group * group)3315  bool iommu_group_dma_owner_claimed(struct iommu_group *group)
3316  {
3317  	unsigned int user;
3318  
3319  	mutex_lock(&group->mutex);
3320  	user = group->owner_cnt;
3321  	mutex_unlock(&group->mutex);
3322  
3323  	return user;
3324  }
3325  EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed);
3326  
iommu_remove_dev_pasid(struct device * dev,ioasid_t pasid,struct iommu_domain * domain)3327  static void iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
3328  				   struct iommu_domain *domain)
3329  {
3330  	const struct iommu_ops *ops = dev_iommu_ops(dev);
3331  	struct iommu_domain *blocked_domain = ops->blocked_domain;
3332  
3333  	WARN_ON(blocked_domain->ops->set_dev_pasid(blocked_domain,
3334  						   dev, pasid, domain));
3335  }
3336  
__iommu_set_group_pasid(struct iommu_domain * domain,struct iommu_group * group,ioasid_t pasid)3337  static int __iommu_set_group_pasid(struct iommu_domain *domain,
3338  				   struct iommu_group *group, ioasid_t pasid)
3339  {
3340  	struct group_device *device, *last_gdev;
3341  	int ret;
3342  
3343  	for_each_group_device(group, device) {
3344  		ret = domain->ops->set_dev_pasid(domain, device->dev,
3345  						 pasid, NULL);
3346  		if (ret)
3347  			goto err_revert;
3348  	}
3349  
3350  	return 0;
3351  
3352  err_revert:
3353  	last_gdev = device;
3354  	for_each_group_device(group, device) {
3355  		if (device == last_gdev)
3356  			break;
3357  		iommu_remove_dev_pasid(device->dev, pasid, domain);
3358  	}
3359  	return ret;
3360  }
3361  
__iommu_remove_group_pasid(struct iommu_group * group,ioasid_t pasid,struct iommu_domain * domain)3362  static void __iommu_remove_group_pasid(struct iommu_group *group,
3363  				       ioasid_t pasid,
3364  				       struct iommu_domain *domain)
3365  {
3366  	struct group_device *device;
3367  
3368  	for_each_group_device(group, device)
3369  		iommu_remove_dev_pasid(device->dev, pasid, domain);
3370  }
3371  
3372  /*
3373   * iommu_attach_device_pasid() - Attach a domain to pasid of device
3374   * @domain: the iommu domain.
3375   * @dev: the attached device.
3376   * @pasid: the pasid of the device.
3377   * @handle: the attach handle.
3378   *
3379   * Return: 0 on success, or an error.
3380   */
iommu_attach_device_pasid(struct iommu_domain * domain,struct device * dev,ioasid_t pasid,struct iommu_attach_handle * handle)3381  int iommu_attach_device_pasid(struct iommu_domain *domain,
3382  			      struct device *dev, ioasid_t pasid,
3383  			      struct iommu_attach_handle *handle)
3384  {
3385  	/* Caller must be a probed driver on dev */
3386  	struct iommu_group *group = dev->iommu_group;
3387  	struct group_device *device;
3388  	const struct iommu_ops *ops;
3389  	void *entry;
3390  	int ret;
3391  
3392  	if (!group)
3393  		return -ENODEV;
3394  
3395  	ops = dev_iommu_ops(dev);
3396  
3397  	if (!domain->ops->set_dev_pasid ||
3398  	    !ops->blocked_domain ||
3399  	    !ops->blocked_domain->ops->set_dev_pasid)
3400  		return -EOPNOTSUPP;
3401  
3402  	if (ops != domain->owner || pasid == IOMMU_NO_PASID)
3403  		return -EINVAL;
3404  
3405  	mutex_lock(&group->mutex);
3406  	for_each_group_device(group, device) {
3407  		if (pasid >= device->dev->iommu->max_pasids) {
3408  			ret = -EINVAL;
3409  			goto out_unlock;
3410  		}
3411  	}
3412  
3413  	entry = iommu_make_pasid_array_entry(domain, handle);
3414  
3415  	/*
3416  	 * Entry present is a failure case. Use xa_insert() instead of
3417  	 * xa_reserve().
3418  	 */
3419  	ret = xa_insert(&group->pasid_array, pasid, XA_ZERO_ENTRY, GFP_KERNEL);
3420  	if (ret)
3421  		goto out_unlock;
3422  
3423  	ret = __iommu_set_group_pasid(domain, group, pasid);
3424  	if (ret) {
3425  		xa_release(&group->pasid_array, pasid);
3426  		goto out_unlock;
3427  	}
3428  
3429  	/*
3430  	 * The xa_insert() above reserved the memory, and the group->mutex is
3431  	 * held, this cannot fail. The new domain cannot be visible until the
3432  	 * operation succeeds as we cannot tolerate PRIs becoming concurrently
3433  	 * queued and then failing attach.
3434  	 */
3435  	WARN_ON(xa_is_err(xa_store(&group->pasid_array,
3436  				   pasid, entry, GFP_KERNEL)));
3437  
3438  out_unlock:
3439  	mutex_unlock(&group->mutex);
3440  	return ret;
3441  }
3442  EXPORT_SYMBOL_GPL(iommu_attach_device_pasid);
3443  
3444  /*
3445   * iommu_detach_device_pasid() - Detach the domain from pasid of device
3446   * @domain: the iommu domain.
3447   * @dev: the attached device.
3448   * @pasid: the pasid of the device.
3449   *
3450   * The @domain must have been attached to @pasid of the @dev with
3451   * iommu_attach_device_pasid().
3452   */
iommu_detach_device_pasid(struct iommu_domain * domain,struct device * dev,ioasid_t pasid)3453  void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev,
3454  			       ioasid_t pasid)
3455  {
3456  	/* Caller must be a probed driver on dev */
3457  	struct iommu_group *group = dev->iommu_group;
3458  
3459  	mutex_lock(&group->mutex);
3460  	__iommu_remove_group_pasid(group, pasid, domain);
3461  	xa_erase(&group->pasid_array, pasid);
3462  	mutex_unlock(&group->mutex);
3463  }
3464  EXPORT_SYMBOL_GPL(iommu_detach_device_pasid);
3465  
iommu_alloc_global_pasid(struct device * dev)3466  ioasid_t iommu_alloc_global_pasid(struct device *dev)
3467  {
3468  	int ret;
3469  
3470  	/* max_pasids == 0 means that the device does not support PASID */
3471  	if (!dev->iommu->max_pasids)
3472  		return IOMMU_PASID_INVALID;
3473  
3474  	/*
3475  	 * max_pasids is set up by vendor driver based on number of PASID bits
3476  	 * supported but the IDA allocation is inclusive.
3477  	 */
3478  	ret = ida_alloc_range(&iommu_global_pasid_ida, IOMMU_FIRST_GLOBAL_PASID,
3479  			      dev->iommu->max_pasids - 1, GFP_KERNEL);
3480  	return ret < 0 ? IOMMU_PASID_INVALID : ret;
3481  }
3482  EXPORT_SYMBOL_GPL(iommu_alloc_global_pasid);
3483  
iommu_free_global_pasid(ioasid_t pasid)3484  void iommu_free_global_pasid(ioasid_t pasid)
3485  {
3486  	if (WARN_ON(pasid == IOMMU_PASID_INVALID))
3487  		return;
3488  
3489  	ida_free(&iommu_global_pasid_ida, pasid);
3490  }
3491  EXPORT_SYMBOL_GPL(iommu_free_global_pasid);
3492  
3493  /**
3494   * iommu_attach_handle_get - Return the attach handle
3495   * @group: the iommu group that domain was attached to
3496   * @pasid: the pasid within the group
3497   * @type: matched domain type, 0 for any match
3498   *
3499   * Return handle or ERR_PTR(-ENOENT) on none, ERR_PTR(-EBUSY) on mismatch.
3500   *
3501   * Return the attach handle to the caller. The life cycle of an iommu attach
3502   * handle is from the time when the domain is attached to the time when the
3503   * domain is detached. Callers are required to synchronize the call of
3504   * iommu_attach_handle_get() with domain attachment and detachment. The attach
3505   * handle can only be used during its life cycle.
3506   */
3507  struct iommu_attach_handle *
iommu_attach_handle_get(struct iommu_group * group,ioasid_t pasid,unsigned int type)3508  iommu_attach_handle_get(struct iommu_group *group, ioasid_t pasid, unsigned int type)
3509  {
3510  	struct iommu_attach_handle *handle;
3511  	void *entry;
3512  
3513  	xa_lock(&group->pasid_array);
3514  	entry = xa_load(&group->pasid_array, pasid);
3515  	if (!entry || xa_pointer_tag(entry) != IOMMU_PASID_ARRAY_HANDLE) {
3516  		handle = ERR_PTR(-ENOENT);
3517  	} else {
3518  		handle = xa_untag_pointer(entry);
3519  		if (type && handle->domain->type != type)
3520  			handle = ERR_PTR(-EBUSY);
3521  	}
3522  	xa_unlock(&group->pasid_array);
3523  
3524  	return handle;
3525  }
3526  EXPORT_SYMBOL_NS_GPL(iommu_attach_handle_get, "IOMMUFD_INTERNAL");
3527  
3528  /**
3529   * iommu_attach_group_handle - Attach an IOMMU domain to an IOMMU group
3530   * @domain: IOMMU domain to attach
3531   * @group: IOMMU group that will be attached
3532   * @handle: attach handle
3533   *
3534   * Returns 0 on success and error code on failure.
3535   *
3536   * This is a variant of iommu_attach_group(). It allows the caller to provide
3537   * an attach handle and use it when the domain is attached. This is currently
3538   * used by IOMMUFD to deliver the I/O page faults.
3539   */
iommu_attach_group_handle(struct iommu_domain * domain,struct iommu_group * group,struct iommu_attach_handle * handle)3540  int iommu_attach_group_handle(struct iommu_domain *domain,
3541  			      struct iommu_group *group,
3542  			      struct iommu_attach_handle *handle)
3543  {
3544  	void *entry;
3545  	int ret;
3546  
3547  	if (!handle)
3548  		return -EINVAL;
3549  
3550  	mutex_lock(&group->mutex);
3551  	entry = iommu_make_pasid_array_entry(domain, handle);
3552  	ret = xa_insert(&group->pasid_array,
3553  			IOMMU_NO_PASID, XA_ZERO_ENTRY, GFP_KERNEL);
3554  	if (ret)
3555  		goto out_unlock;
3556  
3557  	ret = __iommu_attach_group(domain, group);
3558  	if (ret) {
3559  		xa_release(&group->pasid_array, IOMMU_NO_PASID);
3560  		goto out_unlock;
3561  	}
3562  
3563  	/*
3564  	 * The xa_insert() above reserved the memory, and the group->mutex is
3565  	 * held, this cannot fail. The new domain cannot be visible until the
3566  	 * operation succeeds as we cannot tolerate PRIs becoming concurrently
3567  	 * queued and then failing attach.
3568  	 */
3569  	WARN_ON(xa_is_err(xa_store(&group->pasid_array,
3570  				   IOMMU_NO_PASID, entry, GFP_KERNEL)));
3571  
3572  out_unlock:
3573  	mutex_unlock(&group->mutex);
3574  	return ret;
3575  }
3576  EXPORT_SYMBOL_NS_GPL(iommu_attach_group_handle, "IOMMUFD_INTERNAL");
3577  
3578  /**
3579   * iommu_detach_group_handle - Detach an IOMMU domain from an IOMMU group
3580   * @domain: IOMMU domain to attach
3581   * @group: IOMMU group that will be attached
3582   *
3583   * Detach the specified IOMMU domain from the specified IOMMU group.
3584   * It must be used in conjunction with iommu_attach_group_handle().
3585   */
iommu_detach_group_handle(struct iommu_domain * domain,struct iommu_group * group)3586  void iommu_detach_group_handle(struct iommu_domain *domain,
3587  			       struct iommu_group *group)
3588  {
3589  	mutex_lock(&group->mutex);
3590  	__iommu_group_set_core_domain(group);
3591  	xa_erase(&group->pasid_array, IOMMU_NO_PASID);
3592  	mutex_unlock(&group->mutex);
3593  }
3594  EXPORT_SYMBOL_NS_GPL(iommu_detach_group_handle, "IOMMUFD_INTERNAL");
3595  
3596  /**
3597   * iommu_replace_group_handle - replace the domain that a group is attached to
3598   * @group: IOMMU group that will be attached to the new domain
3599   * @new_domain: new IOMMU domain to replace with
3600   * @handle: attach handle
3601   *
3602   * This API allows the group to switch domains without being forced to go to
3603   * the blocking domain in-between. It allows the caller to provide an attach
3604   * handle for the new domain and use it when the domain is attached.
3605   *
3606   * If the currently attached domain is a core domain (e.g. a default_domain),
3607   * it will act just like the iommu_attach_group_handle().
3608   */
iommu_replace_group_handle(struct iommu_group * group,struct iommu_domain * new_domain,struct iommu_attach_handle * handle)3609  int iommu_replace_group_handle(struct iommu_group *group,
3610  			       struct iommu_domain *new_domain,
3611  			       struct iommu_attach_handle *handle)
3612  {
3613  	void *curr, *entry;
3614  	int ret;
3615  
3616  	if (!new_domain || !handle)
3617  		return -EINVAL;
3618  
3619  	mutex_lock(&group->mutex);
3620  	entry = iommu_make_pasid_array_entry(new_domain, handle);
3621  	ret = xa_reserve(&group->pasid_array, IOMMU_NO_PASID, GFP_KERNEL);
3622  	if (ret)
3623  		goto err_unlock;
3624  
3625  	ret = __iommu_group_set_domain(group, new_domain);
3626  	if (ret)
3627  		goto err_release;
3628  
3629  	curr = xa_store(&group->pasid_array, IOMMU_NO_PASID, entry, GFP_KERNEL);
3630  	WARN_ON(xa_is_err(curr));
3631  
3632  	mutex_unlock(&group->mutex);
3633  
3634  	return 0;
3635  err_release:
3636  	xa_release(&group->pasid_array, IOMMU_NO_PASID);
3637  err_unlock:
3638  	mutex_unlock(&group->mutex);
3639  	return ret;
3640  }
3641  EXPORT_SYMBOL_NS_GPL(iommu_replace_group_handle, "IOMMUFD_INTERNAL");
3642  
3643  #if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU)
3644  /**
3645   * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU domain
3646   * @desc: MSI descriptor, will store the MSI page
3647   * @msi_addr: MSI target address to be mapped
3648   *
3649   * The implementation of sw_msi() should take msi_addr and map it to
3650   * an IOVA in the domain and call msi_desc_set_iommu_msi_iova() with the
3651   * mapping information.
3652   *
3653   * Return: 0 on success or negative error code if the mapping failed.
3654   */
iommu_dma_prepare_msi(struct msi_desc * desc,phys_addr_t msi_addr)3655  int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
3656  {
3657  	struct device *dev = msi_desc_to_dev(desc);
3658  	struct iommu_group *group = dev->iommu_group;
3659  	int ret = 0;
3660  
3661  	if (!group)
3662  		return 0;
3663  
3664  	mutex_lock(&group->mutex);
3665  	if (group->domain && group->domain->sw_msi)
3666  		ret = group->domain->sw_msi(group->domain, desc, msi_addr);
3667  	mutex_unlock(&group->mutex);
3668  	return ret;
3669  }
3670  #endif /* CONFIG_IRQ_MSI_IOMMU */
3671