1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. 4 * Author: Joerg Roedel <jroedel@suse.de> 5 */ 6 7 #define pr_fmt(fmt) "iommu: " fmt 8 9 #include <linux/amba/bus.h> 10 #include <linux/device.h> 11 #include <linux/kernel.h> 12 #include <linux/bits.h> 13 #include <linux/bug.h> 14 #include <linux/types.h> 15 #include <linux/init.h> 16 #include <linux/export.h> 17 #include <linux/slab.h> 18 #include <linux/errno.h> 19 #include <linux/host1x_context_bus.h> 20 #include <linux/iommu.h> 21 #include <linux/iommufd.h> 22 #include <linux/idr.h> 23 #include <linux/err.h> 24 #include <linux/pci.h> 25 #include <linux/pci-ats.h> 26 #include <linux/bitops.h> 27 #include <linux/platform_device.h> 28 #include <linux/property.h> 29 #include <linux/fsl/mc.h> 30 #include <linux/module.h> 31 #include <linux/cc_platform.h> 32 #include <linux/cdx/cdx_bus.h> 33 #include <trace/events/iommu.h> 34 #include <linux/sched/mm.h> 35 #include <linux/msi.h> 36 #include <uapi/linux/iommufd.h> 37 38 #include "dma-iommu.h" 39 #include "iommu-priv.h" 40 41 static struct kset *iommu_group_kset; 42 static DEFINE_IDA(iommu_group_ida); 43 static DEFINE_IDA(iommu_global_pasid_ida); 44 45 static unsigned int iommu_def_domain_type __read_mostly; 46 static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT); 47 static u32 iommu_cmd_line __read_mostly; 48 49 /* Tags used with xa_tag_pointer() in group->pasid_array */ 50 enum { IOMMU_PASID_ARRAY_DOMAIN = 0, IOMMU_PASID_ARRAY_HANDLE = 1 }; 51 52 struct iommu_group { 53 struct kobject kobj; 54 struct kobject *devices_kobj; 55 struct list_head devices; 56 struct xarray pasid_array; 57 struct mutex mutex; 58 void *iommu_data; 59 void (*iommu_data_release)(void *iommu_data); 60 char *name; 61 int id; 62 struct iommu_domain *default_domain; 63 struct iommu_domain *blocking_domain; 64 struct iommu_domain *domain; 65 struct list_head entry; 66 unsigned int owner_cnt; 67 void *owner; 68 }; 69 70 struct group_device { 71 struct list_head list; 72 struct device *dev; 73 char *name; 74 }; 75 76 /* Iterate over each struct group_device in a struct iommu_group */ 77 #define for_each_group_device(group, pos) \ 78 list_for_each_entry(pos, &(group)->devices, list) 79 80 struct iommu_group_attribute { 81 struct attribute attr; 82 ssize_t (*show)(struct iommu_group *group, char *buf); 83 ssize_t (*store)(struct iommu_group *group, 84 const char *buf, size_t count); 85 }; 86 87 static const char * const iommu_group_resv_type_string[] = { 88 [IOMMU_RESV_DIRECT] = "direct", 89 [IOMMU_RESV_DIRECT_RELAXABLE] = "direct-relaxable", 90 [IOMMU_RESV_RESERVED] = "reserved", 91 [IOMMU_RESV_MSI] = "msi", 92 [IOMMU_RESV_SW_MSI] = "msi", 93 }; 94 95 #define IOMMU_CMD_LINE_DMA_API BIT(0) 96 #define IOMMU_CMD_LINE_STRICT BIT(1) 97 98 static int bus_iommu_probe(const struct bus_type *bus); 99 static int iommu_bus_notifier(struct notifier_block *nb, 100 unsigned long action, void *data); 101 static void iommu_release_device(struct device *dev); 102 static int __iommu_attach_device(struct iommu_domain *domain, 103 struct device *dev, struct iommu_domain *old); 104 static int __iommu_attach_group(struct iommu_domain *domain, 105 struct iommu_group *group); 106 static struct iommu_domain *__iommu_paging_domain_alloc_flags(struct device *dev, 107 unsigned int type, 108 unsigned int flags); 109 110 enum { 111 IOMMU_SET_DOMAIN_MUST_SUCCEED = 1 << 0, 112 }; 113 114 static int __iommu_device_set_domain(struct iommu_group *group, 115 struct device *dev, 116 struct iommu_domain *new_domain, 117 struct iommu_domain *old_domain, 118 unsigned int flags); 119 static int __iommu_group_set_domain_internal(struct iommu_group *group, 120 struct iommu_domain *new_domain, 121 unsigned int flags); 122 static int __iommu_group_set_domain(struct iommu_group *group, 123 struct iommu_domain *new_domain) 124 { 125 return __iommu_group_set_domain_internal(group, new_domain, 0); 126 } 127 static void __iommu_group_set_domain_nofail(struct iommu_group *group, 128 struct iommu_domain *new_domain) 129 { 130 WARN_ON(__iommu_group_set_domain_internal( 131 group, new_domain, IOMMU_SET_DOMAIN_MUST_SUCCEED)); 132 } 133 134 static int iommu_setup_default_domain(struct iommu_group *group, 135 int target_type); 136 static int iommu_create_device_direct_mappings(struct iommu_domain *domain, 137 struct device *dev); 138 static ssize_t iommu_group_store_type(struct iommu_group *group, 139 const char *buf, size_t count); 140 static struct group_device *iommu_group_alloc_device(struct iommu_group *group, 141 struct device *dev); 142 static void __iommu_group_free_device(struct iommu_group *group, 143 struct group_device *grp_dev); 144 static void iommu_domain_init(struct iommu_domain *domain, unsigned int type, 145 const struct iommu_ops *ops); 146 147 #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \ 148 struct iommu_group_attribute iommu_group_attr_##_name = \ 149 __ATTR(_name, _mode, _show, _store) 150 151 #define to_iommu_group_attr(_attr) \ 152 container_of(_attr, struct iommu_group_attribute, attr) 153 #define to_iommu_group(_kobj) \ 154 container_of(_kobj, struct iommu_group, kobj) 155 156 static LIST_HEAD(iommu_device_list); 157 static DEFINE_SPINLOCK(iommu_device_lock); 158 159 static const struct bus_type * const iommu_buses[] = { 160 &platform_bus_type, 161 #ifdef CONFIG_PCI 162 &pci_bus_type, 163 #endif 164 #ifdef CONFIG_ARM_AMBA 165 &amba_bustype, 166 #endif 167 #ifdef CONFIG_FSL_MC_BUS 168 &fsl_mc_bus_type, 169 #endif 170 #ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS 171 &host1x_context_device_bus_type, 172 #endif 173 #ifdef CONFIG_CDX_BUS 174 &cdx_bus_type, 175 #endif 176 }; 177 178 /* 179 * Use a function instead of an array here because the domain-type is a 180 * bit-field, so an array would waste memory. 181 */ 182 static const char *iommu_domain_type_str(unsigned int t) 183 { 184 switch (t) { 185 case IOMMU_DOMAIN_BLOCKED: 186 return "Blocked"; 187 case IOMMU_DOMAIN_IDENTITY: 188 return "Passthrough"; 189 case IOMMU_DOMAIN_UNMANAGED: 190 return "Unmanaged"; 191 case IOMMU_DOMAIN_DMA: 192 case IOMMU_DOMAIN_DMA_FQ: 193 return "Translated"; 194 case IOMMU_DOMAIN_PLATFORM: 195 return "Platform"; 196 default: 197 return "Unknown"; 198 } 199 } 200 201 static int __init iommu_subsys_init(void) 202 { 203 struct notifier_block *nb; 204 205 if (!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API)) { 206 if (IS_ENABLED(CONFIG_IOMMU_DEFAULT_PASSTHROUGH)) 207 iommu_set_default_passthrough(false); 208 else 209 iommu_set_default_translated(false); 210 211 if (iommu_default_passthrough() && cc_platform_has(CC_ATTR_MEM_ENCRYPT)) { 212 pr_info("Memory encryption detected - Disabling default IOMMU Passthrough\n"); 213 iommu_set_default_translated(false); 214 } 215 } 216 217 if (!iommu_default_passthrough() && !iommu_dma_strict) 218 iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ; 219 220 pr_info("Default domain type: %s%s\n", 221 iommu_domain_type_str(iommu_def_domain_type), 222 (iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ? 223 " (set via kernel command line)" : ""); 224 225 if (!iommu_default_passthrough()) 226 pr_info("DMA domain TLB invalidation policy: %s mode%s\n", 227 iommu_dma_strict ? "strict" : "lazy", 228 (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ? 229 " (set via kernel command line)" : ""); 230 231 nb = kcalloc(ARRAY_SIZE(iommu_buses), sizeof(*nb), GFP_KERNEL); 232 if (!nb) 233 return -ENOMEM; 234 235 for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) { 236 nb[i].notifier_call = iommu_bus_notifier; 237 bus_register_notifier(iommu_buses[i], &nb[i]); 238 } 239 240 return 0; 241 } 242 subsys_initcall(iommu_subsys_init); 243 244 static int remove_iommu_group(struct device *dev, void *data) 245 { 246 if (dev->iommu && dev->iommu->iommu_dev == data) 247 iommu_release_device(dev); 248 249 return 0; 250 } 251 252 /** 253 * iommu_device_register() - Register an IOMMU hardware instance 254 * @iommu: IOMMU handle for the instance 255 * @ops: IOMMU ops to associate with the instance 256 * @hwdev: (optional) actual instance device, used for fwnode lookup 257 * 258 * Return: 0 on success, or an error. 259 */ 260 int iommu_device_register(struct iommu_device *iommu, 261 const struct iommu_ops *ops, struct device *hwdev) 262 { 263 int err = 0; 264 265 /* We need to be able to take module references appropriately */ 266 if (WARN_ON(is_module_address((unsigned long)ops) && !ops->owner)) 267 return -EINVAL; 268 269 iommu->ops = ops; 270 if (hwdev) 271 iommu->fwnode = dev_fwnode(hwdev); 272 273 spin_lock(&iommu_device_lock); 274 list_add_tail(&iommu->list, &iommu_device_list); 275 spin_unlock(&iommu_device_lock); 276 277 for (int i = 0; i < ARRAY_SIZE(iommu_buses) && !err; i++) 278 err = bus_iommu_probe(iommu_buses[i]); 279 if (err) 280 iommu_device_unregister(iommu); 281 else 282 WRITE_ONCE(iommu->ready, true); 283 return err; 284 } 285 EXPORT_SYMBOL_GPL(iommu_device_register); 286 287 void iommu_device_unregister(struct iommu_device *iommu) 288 { 289 for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) 290 bus_for_each_dev(iommu_buses[i], NULL, iommu, remove_iommu_group); 291 292 spin_lock(&iommu_device_lock); 293 list_del(&iommu->list); 294 spin_unlock(&iommu_device_lock); 295 296 /* Pairs with the alloc in generic_single_device_group() */ 297 iommu_group_put(iommu->singleton_group); 298 iommu->singleton_group = NULL; 299 } 300 EXPORT_SYMBOL_GPL(iommu_device_unregister); 301 302 #if IS_ENABLED(CONFIG_IOMMUFD_TEST) 303 void iommu_device_unregister_bus(struct iommu_device *iommu, 304 const struct bus_type *bus, 305 struct notifier_block *nb) 306 { 307 bus_unregister_notifier(bus, nb); 308 fwnode_remove_software_node(iommu->fwnode); 309 iommu_device_unregister(iommu); 310 } 311 EXPORT_SYMBOL_GPL(iommu_device_unregister_bus); 312 313 /* 314 * Register an iommu driver against a single bus. This is only used by iommufd 315 * selftest to create a mock iommu driver. The caller must provide 316 * some memory to hold a notifier_block. 317 */ 318 int iommu_device_register_bus(struct iommu_device *iommu, 319 const struct iommu_ops *ops, 320 const struct bus_type *bus, 321 struct notifier_block *nb) 322 { 323 int err; 324 325 iommu->ops = ops; 326 nb->notifier_call = iommu_bus_notifier; 327 err = bus_register_notifier(bus, nb); 328 if (err) 329 return err; 330 331 iommu->fwnode = fwnode_create_software_node(NULL, NULL); 332 if (IS_ERR(iommu->fwnode)) { 333 bus_unregister_notifier(bus, nb); 334 return PTR_ERR(iommu->fwnode); 335 } 336 337 spin_lock(&iommu_device_lock); 338 list_add_tail(&iommu->list, &iommu_device_list); 339 spin_unlock(&iommu_device_lock); 340 341 err = bus_iommu_probe(bus); 342 if (err) { 343 iommu_device_unregister_bus(iommu, bus, nb); 344 return err; 345 } 346 WRITE_ONCE(iommu->ready, true); 347 return 0; 348 } 349 EXPORT_SYMBOL_GPL(iommu_device_register_bus); 350 351 int iommu_mock_device_add(struct device *dev, struct iommu_device *iommu) 352 { 353 int rc; 354 355 mutex_lock(&iommu_probe_device_lock); 356 rc = iommu_fwspec_init(dev, iommu->fwnode); 357 mutex_unlock(&iommu_probe_device_lock); 358 359 if (rc) 360 return rc; 361 362 rc = device_add(dev); 363 if (rc) 364 iommu_fwspec_free(dev); 365 return rc; 366 } 367 EXPORT_SYMBOL_GPL(iommu_mock_device_add); 368 #endif 369 370 static struct dev_iommu *dev_iommu_get(struct device *dev) 371 { 372 struct dev_iommu *param = dev->iommu; 373 374 lockdep_assert_held(&iommu_probe_device_lock); 375 376 if (param) 377 return param; 378 379 param = kzalloc(sizeof(*param), GFP_KERNEL); 380 if (!param) 381 return NULL; 382 383 mutex_init(¶m->lock); 384 dev->iommu = param; 385 return param; 386 } 387 388 void dev_iommu_free(struct device *dev) 389 { 390 struct dev_iommu *param = dev->iommu; 391 392 dev->iommu = NULL; 393 if (param->fwspec) { 394 fwnode_handle_put(param->fwspec->iommu_fwnode); 395 kfree(param->fwspec); 396 } 397 kfree(param); 398 } 399 400 /* 401 * Internal equivalent of device_iommu_mapped() for when we care that a device 402 * actually has API ops, and don't want false positives from VFIO-only groups. 403 */ 404 static bool dev_has_iommu(struct device *dev) 405 { 406 return dev->iommu && dev->iommu->iommu_dev; 407 } 408 409 static u32 dev_iommu_get_max_pasids(struct device *dev) 410 { 411 u32 max_pasids = 0, bits = 0; 412 int ret; 413 414 if (dev_is_pci(dev)) { 415 ret = pci_max_pasids(to_pci_dev(dev)); 416 if (ret > 0) 417 max_pasids = ret; 418 } else { 419 ret = device_property_read_u32(dev, "pasid-num-bits", &bits); 420 if (!ret) 421 max_pasids = 1UL << bits; 422 } 423 424 return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids); 425 } 426 427 void dev_iommu_priv_set(struct device *dev, void *priv) 428 { 429 /* FSL_PAMU does something weird */ 430 if (!IS_ENABLED(CONFIG_FSL_PAMU)) 431 lockdep_assert_held(&iommu_probe_device_lock); 432 dev->iommu->priv = priv; 433 } 434 EXPORT_SYMBOL_GPL(dev_iommu_priv_set); 435 436 /* 437 * Init the dev->iommu and dev->iommu_group in the struct device and get the 438 * driver probed 439 */ 440 static int iommu_init_device(struct device *dev) 441 { 442 const struct iommu_ops *ops; 443 struct iommu_device *iommu_dev; 444 struct iommu_group *group; 445 int ret; 446 447 if (!dev_iommu_get(dev)) 448 return -ENOMEM; 449 /* 450 * For FDT-based systems and ACPI IORT/VIOT, the common firmware parsing 451 * is buried in the bus dma_configure path. Properly unpicking that is 452 * still a big job, so for now just invoke the whole thing. The device 453 * already having a driver bound means dma_configure has already run and 454 * found no IOMMU to wait for, so there's no point calling it again. 455 */ 456 if (!dev->iommu->fwspec && !dev->driver && dev->bus->dma_configure) { 457 mutex_unlock(&iommu_probe_device_lock); 458 dev->bus->dma_configure(dev); 459 mutex_lock(&iommu_probe_device_lock); 460 /* If another instance finished the job for us, skip it */ 461 if (!dev->iommu || dev->iommu_group) 462 return -ENODEV; 463 } 464 /* 465 * At this point, relevant devices either now have a fwspec which will 466 * match ops registered with a non-NULL fwnode, or we can reasonably 467 * assume that only one of Intel, AMD, s390, PAMU or legacy SMMUv2 can 468 * be present, and that any of their registered instances has suitable 469 * ops for probing, and thus cheekily co-opt the same mechanism. 470 */ 471 ops = iommu_fwspec_ops(dev->iommu->fwspec); 472 if (!ops) { 473 ret = -ENODEV; 474 goto err_free; 475 } 476 477 if (!try_module_get(ops->owner)) { 478 ret = -EINVAL; 479 goto err_free; 480 } 481 482 iommu_dev = ops->probe_device(dev); 483 if (IS_ERR(iommu_dev)) { 484 ret = PTR_ERR(iommu_dev); 485 goto err_module_put; 486 } 487 dev->iommu->iommu_dev = iommu_dev; 488 489 ret = iommu_device_link(iommu_dev, dev); 490 if (ret) 491 goto err_release; 492 493 group = ops->device_group(dev); 494 if (WARN_ON_ONCE(group == NULL)) 495 group = ERR_PTR(-EINVAL); 496 if (IS_ERR(group)) { 497 ret = PTR_ERR(group); 498 goto err_unlink; 499 } 500 dev->iommu_group = group; 501 502 dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev); 503 if (ops->is_attach_deferred) 504 dev->iommu->attach_deferred = ops->is_attach_deferred(dev); 505 return 0; 506 507 err_unlink: 508 iommu_device_unlink(iommu_dev, dev); 509 err_release: 510 if (ops->release_device) 511 ops->release_device(dev); 512 err_module_put: 513 module_put(ops->owner); 514 err_free: 515 dev->iommu->iommu_dev = NULL; 516 dev_iommu_free(dev); 517 return ret; 518 } 519 520 static void iommu_deinit_device(struct device *dev) 521 { 522 struct iommu_group *group = dev->iommu_group; 523 const struct iommu_ops *ops = dev_iommu_ops(dev); 524 525 lockdep_assert_held(&group->mutex); 526 527 iommu_device_unlink(dev->iommu->iommu_dev, dev); 528 529 /* 530 * release_device() must stop using any attached domain on the device. 531 * If there are still other devices in the group, they are not affected 532 * by this callback. 533 * 534 * If the iommu driver provides release_domain, the core code ensures 535 * that domain is attached prior to calling release_device. Drivers can 536 * use this to enforce a translation on the idle iommu. Typically, the 537 * global static blocked_domain is a good choice. 538 * 539 * Otherwise, the iommu driver must set the device to either an identity 540 * or a blocking translation in release_device() and stop using any 541 * domain pointer, as it is going to be freed. 542 * 543 * Regardless, if a delayed attach never occurred, then the release 544 * should still avoid touching any hardware configuration either. 545 */ 546 if (!dev->iommu->attach_deferred && ops->release_domain) { 547 struct iommu_domain *release_domain = ops->release_domain; 548 549 /* 550 * If the device requires direct mappings then it should not 551 * be parked on a BLOCKED domain during release as that would 552 * break the direct mappings. 553 */ 554 if (dev->iommu->require_direct && ops->identity_domain && 555 release_domain == ops->blocked_domain) 556 release_domain = ops->identity_domain; 557 558 release_domain->ops->attach_dev(release_domain, dev, 559 group->domain); 560 } 561 562 if (ops->release_device) 563 ops->release_device(dev); 564 565 /* 566 * If this is the last driver to use the group then we must free the 567 * domains before we do the module_put(). 568 */ 569 if (list_empty(&group->devices)) { 570 if (group->default_domain) { 571 iommu_domain_free(group->default_domain); 572 group->default_domain = NULL; 573 } 574 if (group->blocking_domain) { 575 iommu_domain_free(group->blocking_domain); 576 group->blocking_domain = NULL; 577 } 578 group->domain = NULL; 579 } 580 581 /* Caller must put iommu_group */ 582 dev->iommu_group = NULL; 583 module_put(ops->owner); 584 dev_iommu_free(dev); 585 #ifdef CONFIG_IOMMU_DMA 586 dev->dma_iommu = false; 587 #endif 588 } 589 590 static struct iommu_domain *pasid_array_entry_to_domain(void *entry) 591 { 592 if (xa_pointer_tag(entry) == IOMMU_PASID_ARRAY_DOMAIN) 593 return xa_untag_pointer(entry); 594 return ((struct iommu_attach_handle *)xa_untag_pointer(entry))->domain; 595 } 596 597 DEFINE_MUTEX(iommu_probe_device_lock); 598 599 static int __iommu_probe_device(struct device *dev, struct list_head *group_list) 600 { 601 struct iommu_group *group; 602 struct group_device *gdev; 603 int ret; 604 605 /* 606 * Serialise to avoid races between IOMMU drivers registering in 607 * parallel and/or the "replay" calls from ACPI/OF code via client 608 * driver probe. Once the latter have been cleaned up we should 609 * probably be able to use device_lock() here to minimise the scope, 610 * but for now enforcing a simple global ordering is fine. 611 */ 612 lockdep_assert_held(&iommu_probe_device_lock); 613 614 /* Device is probed already if in a group */ 615 if (dev->iommu_group) 616 return 0; 617 618 ret = iommu_init_device(dev); 619 if (ret) 620 return ret; 621 /* 622 * And if we do now see any replay calls, they would indicate someone 623 * misusing the dma_configure path outside bus code. 624 */ 625 if (dev->driver) 626 dev_WARN(dev, "late IOMMU probe at driver bind, something fishy here!\n"); 627 628 group = dev->iommu_group; 629 gdev = iommu_group_alloc_device(group, dev); 630 mutex_lock(&group->mutex); 631 if (IS_ERR(gdev)) { 632 ret = PTR_ERR(gdev); 633 goto err_put_group; 634 } 635 636 /* 637 * The gdev must be in the list before calling 638 * iommu_setup_default_domain() 639 */ 640 list_add_tail(&gdev->list, &group->devices); 641 WARN_ON(group->default_domain && !group->domain); 642 if (group->default_domain) 643 iommu_create_device_direct_mappings(group->default_domain, dev); 644 if (group->domain) { 645 ret = __iommu_device_set_domain(group, dev, group->domain, NULL, 646 0); 647 if (ret) 648 goto err_remove_gdev; 649 } else if (!group->default_domain && !group_list) { 650 ret = iommu_setup_default_domain(group, 0); 651 if (ret) 652 goto err_remove_gdev; 653 } else if (!group->default_domain) { 654 /* 655 * With a group_list argument we defer the default_domain setup 656 * to the caller by providing a de-duplicated list of groups 657 * that need further setup. 658 */ 659 if (list_empty(&group->entry)) 660 list_add_tail(&group->entry, group_list); 661 } 662 663 if (group->default_domain) 664 iommu_setup_dma_ops(dev); 665 666 mutex_unlock(&group->mutex); 667 668 return 0; 669 670 err_remove_gdev: 671 list_del(&gdev->list); 672 __iommu_group_free_device(group, gdev); 673 err_put_group: 674 iommu_deinit_device(dev); 675 mutex_unlock(&group->mutex); 676 iommu_group_put(group); 677 678 return ret; 679 } 680 681 int iommu_probe_device(struct device *dev) 682 { 683 const struct iommu_ops *ops; 684 int ret; 685 686 mutex_lock(&iommu_probe_device_lock); 687 ret = __iommu_probe_device(dev, NULL); 688 mutex_unlock(&iommu_probe_device_lock); 689 if (ret) 690 return ret; 691 692 ops = dev_iommu_ops(dev); 693 if (ops->probe_finalize) 694 ops->probe_finalize(dev); 695 696 return 0; 697 } 698 699 static void __iommu_group_free_device(struct iommu_group *group, 700 struct group_device *grp_dev) 701 { 702 struct device *dev = grp_dev->dev; 703 704 sysfs_remove_link(group->devices_kobj, grp_dev->name); 705 sysfs_remove_link(&dev->kobj, "iommu_group"); 706 707 trace_remove_device_from_group(group->id, dev); 708 709 /* 710 * If the group has become empty then ownership must have been 711 * released, and the current domain must be set back to NULL or 712 * the default domain. 713 */ 714 if (list_empty(&group->devices)) 715 WARN_ON(group->owner_cnt || 716 group->domain != group->default_domain); 717 718 kfree(grp_dev->name); 719 kfree(grp_dev); 720 } 721 722 /* Remove the iommu_group from the struct device. */ 723 static void __iommu_group_remove_device(struct device *dev) 724 { 725 struct iommu_group *group = dev->iommu_group; 726 struct group_device *device; 727 728 mutex_lock(&group->mutex); 729 for_each_group_device(group, device) { 730 if (device->dev != dev) 731 continue; 732 733 list_del(&device->list); 734 __iommu_group_free_device(group, device); 735 if (dev_has_iommu(dev)) 736 iommu_deinit_device(dev); 737 else 738 dev->iommu_group = NULL; 739 break; 740 } 741 mutex_unlock(&group->mutex); 742 743 /* 744 * Pairs with the get in iommu_init_device() or 745 * iommu_group_add_device() 746 */ 747 iommu_group_put(group); 748 } 749 750 static void iommu_release_device(struct device *dev) 751 { 752 struct iommu_group *group = dev->iommu_group; 753 754 if (group) 755 __iommu_group_remove_device(dev); 756 757 /* Free any fwspec if no iommu_driver was ever attached */ 758 if (dev->iommu) 759 dev_iommu_free(dev); 760 } 761 762 static int __init iommu_set_def_domain_type(char *str) 763 { 764 bool pt; 765 int ret; 766 767 ret = kstrtobool(str, &pt); 768 if (ret) 769 return ret; 770 771 if (pt) 772 iommu_set_default_passthrough(true); 773 else 774 iommu_set_default_translated(true); 775 776 return 0; 777 } 778 early_param("iommu.passthrough", iommu_set_def_domain_type); 779 780 static int __init iommu_dma_setup(char *str) 781 { 782 int ret = kstrtobool(str, &iommu_dma_strict); 783 784 if (!ret) 785 iommu_cmd_line |= IOMMU_CMD_LINE_STRICT; 786 return ret; 787 } 788 early_param("iommu.strict", iommu_dma_setup); 789 790 void iommu_set_dma_strict(void) 791 { 792 iommu_dma_strict = true; 793 if (iommu_def_domain_type == IOMMU_DOMAIN_DMA_FQ) 794 iommu_def_domain_type = IOMMU_DOMAIN_DMA; 795 } 796 797 static ssize_t iommu_group_attr_show(struct kobject *kobj, 798 struct attribute *__attr, char *buf) 799 { 800 struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); 801 struct iommu_group *group = to_iommu_group(kobj); 802 ssize_t ret = -EIO; 803 804 if (attr->show) 805 ret = attr->show(group, buf); 806 return ret; 807 } 808 809 static ssize_t iommu_group_attr_store(struct kobject *kobj, 810 struct attribute *__attr, 811 const char *buf, size_t count) 812 { 813 struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); 814 struct iommu_group *group = to_iommu_group(kobj); 815 ssize_t ret = -EIO; 816 817 if (attr->store) 818 ret = attr->store(group, buf, count); 819 return ret; 820 } 821 822 static const struct sysfs_ops iommu_group_sysfs_ops = { 823 .show = iommu_group_attr_show, 824 .store = iommu_group_attr_store, 825 }; 826 827 static int iommu_group_create_file(struct iommu_group *group, 828 struct iommu_group_attribute *attr) 829 { 830 return sysfs_create_file(&group->kobj, &attr->attr); 831 } 832 833 static void iommu_group_remove_file(struct iommu_group *group, 834 struct iommu_group_attribute *attr) 835 { 836 sysfs_remove_file(&group->kobj, &attr->attr); 837 } 838 839 static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf) 840 { 841 return sysfs_emit(buf, "%s\n", group->name); 842 } 843 844 /** 845 * iommu_insert_resv_region - Insert a new region in the 846 * list of reserved regions. 847 * @new: new region to insert 848 * @regions: list of regions 849 * 850 * Elements are sorted by start address and overlapping segments 851 * of the same type are merged. 852 */ 853 static int iommu_insert_resv_region(struct iommu_resv_region *new, 854 struct list_head *regions) 855 { 856 struct iommu_resv_region *iter, *tmp, *nr, *top; 857 LIST_HEAD(stack); 858 859 nr = iommu_alloc_resv_region(new->start, new->length, 860 new->prot, new->type, GFP_KERNEL); 861 if (!nr) 862 return -ENOMEM; 863 864 /* First add the new element based on start address sorting */ 865 list_for_each_entry(iter, regions, list) { 866 if (nr->start < iter->start || 867 (nr->start == iter->start && nr->type <= iter->type)) 868 break; 869 } 870 list_add_tail(&nr->list, &iter->list); 871 872 /* Merge overlapping segments of type nr->type in @regions, if any */ 873 list_for_each_entry_safe(iter, tmp, regions, list) { 874 phys_addr_t top_end, iter_end = iter->start + iter->length - 1; 875 876 /* no merge needed on elements of different types than @new */ 877 if (iter->type != new->type) { 878 list_move_tail(&iter->list, &stack); 879 continue; 880 } 881 882 /* look for the last stack element of same type as @iter */ 883 list_for_each_entry_reverse(top, &stack, list) 884 if (top->type == iter->type) 885 goto check_overlap; 886 887 list_move_tail(&iter->list, &stack); 888 continue; 889 890 check_overlap: 891 top_end = top->start + top->length - 1; 892 893 if (iter->start > top_end + 1) { 894 list_move_tail(&iter->list, &stack); 895 } else { 896 top->length = max(top_end, iter_end) - top->start + 1; 897 list_del(&iter->list); 898 kfree(iter); 899 } 900 } 901 list_splice(&stack, regions); 902 return 0; 903 } 904 905 static int 906 iommu_insert_device_resv_regions(struct list_head *dev_resv_regions, 907 struct list_head *group_resv_regions) 908 { 909 struct iommu_resv_region *entry; 910 int ret = 0; 911 912 list_for_each_entry(entry, dev_resv_regions, list) { 913 ret = iommu_insert_resv_region(entry, group_resv_regions); 914 if (ret) 915 break; 916 } 917 return ret; 918 } 919 920 int iommu_get_group_resv_regions(struct iommu_group *group, 921 struct list_head *head) 922 { 923 struct group_device *device; 924 int ret = 0; 925 926 mutex_lock(&group->mutex); 927 for_each_group_device(group, device) { 928 struct list_head dev_resv_regions; 929 930 /* 931 * Non-API groups still expose reserved_regions in sysfs, 932 * so filter out calls that get here that way. 933 */ 934 if (!dev_has_iommu(device->dev)) 935 break; 936 937 INIT_LIST_HEAD(&dev_resv_regions); 938 iommu_get_resv_regions(device->dev, &dev_resv_regions); 939 ret = iommu_insert_device_resv_regions(&dev_resv_regions, head); 940 iommu_put_resv_regions(device->dev, &dev_resv_regions); 941 if (ret) 942 break; 943 } 944 mutex_unlock(&group->mutex); 945 return ret; 946 } 947 EXPORT_SYMBOL_GPL(iommu_get_group_resv_regions); 948 949 static ssize_t iommu_group_show_resv_regions(struct iommu_group *group, 950 char *buf) 951 { 952 struct iommu_resv_region *region, *next; 953 struct list_head group_resv_regions; 954 int offset = 0; 955 956 INIT_LIST_HEAD(&group_resv_regions); 957 iommu_get_group_resv_regions(group, &group_resv_regions); 958 959 list_for_each_entry_safe(region, next, &group_resv_regions, list) { 960 offset += sysfs_emit_at(buf, offset, "0x%016llx 0x%016llx %s\n", 961 (long long)region->start, 962 (long long)(region->start + 963 region->length - 1), 964 iommu_group_resv_type_string[region->type]); 965 kfree(region); 966 } 967 968 return offset; 969 } 970 971 static ssize_t iommu_group_show_type(struct iommu_group *group, 972 char *buf) 973 { 974 char *type = "unknown"; 975 976 mutex_lock(&group->mutex); 977 if (group->default_domain) { 978 switch (group->default_domain->type) { 979 case IOMMU_DOMAIN_BLOCKED: 980 type = "blocked"; 981 break; 982 case IOMMU_DOMAIN_IDENTITY: 983 type = "identity"; 984 break; 985 case IOMMU_DOMAIN_UNMANAGED: 986 type = "unmanaged"; 987 break; 988 case IOMMU_DOMAIN_DMA: 989 type = "DMA"; 990 break; 991 case IOMMU_DOMAIN_DMA_FQ: 992 type = "DMA-FQ"; 993 break; 994 } 995 } 996 mutex_unlock(&group->mutex); 997 998 return sysfs_emit(buf, "%s\n", type); 999 } 1000 1001 static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL); 1002 1003 static IOMMU_GROUP_ATTR(reserved_regions, 0444, 1004 iommu_group_show_resv_regions, NULL); 1005 1006 static IOMMU_GROUP_ATTR(type, 0644, iommu_group_show_type, 1007 iommu_group_store_type); 1008 1009 static void iommu_group_release(struct kobject *kobj) 1010 { 1011 struct iommu_group *group = to_iommu_group(kobj); 1012 1013 pr_debug("Releasing group %d\n", group->id); 1014 1015 if (group->iommu_data_release) 1016 group->iommu_data_release(group->iommu_data); 1017 1018 ida_free(&iommu_group_ida, group->id); 1019 1020 /* Domains are free'd by iommu_deinit_device() */ 1021 WARN_ON(group->default_domain); 1022 WARN_ON(group->blocking_domain); 1023 1024 kfree(group->name); 1025 kfree(group); 1026 } 1027 1028 static const struct kobj_type iommu_group_ktype = { 1029 .sysfs_ops = &iommu_group_sysfs_ops, 1030 .release = iommu_group_release, 1031 }; 1032 1033 /** 1034 * iommu_group_alloc - Allocate a new group 1035 * 1036 * This function is called by an iommu driver to allocate a new iommu 1037 * group. The iommu group represents the minimum granularity of the iommu. 1038 * Upon successful return, the caller holds a reference to the supplied 1039 * group in order to hold the group until devices are added. Use 1040 * iommu_group_put() to release this extra reference count, allowing the 1041 * group to be automatically reclaimed once it has no devices or external 1042 * references. 1043 */ 1044 struct iommu_group *iommu_group_alloc(void) 1045 { 1046 struct iommu_group *group; 1047 int ret; 1048 1049 group = kzalloc(sizeof(*group), GFP_KERNEL); 1050 if (!group) 1051 return ERR_PTR(-ENOMEM); 1052 1053 group->kobj.kset = iommu_group_kset; 1054 mutex_init(&group->mutex); 1055 INIT_LIST_HEAD(&group->devices); 1056 INIT_LIST_HEAD(&group->entry); 1057 xa_init(&group->pasid_array); 1058 1059 ret = ida_alloc(&iommu_group_ida, GFP_KERNEL); 1060 if (ret < 0) { 1061 kfree(group); 1062 return ERR_PTR(ret); 1063 } 1064 group->id = ret; 1065 1066 ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype, 1067 NULL, "%d", group->id); 1068 if (ret) { 1069 kobject_put(&group->kobj); 1070 return ERR_PTR(ret); 1071 } 1072 1073 group->devices_kobj = kobject_create_and_add("devices", &group->kobj); 1074 if (!group->devices_kobj) { 1075 kobject_put(&group->kobj); /* triggers .release & free */ 1076 return ERR_PTR(-ENOMEM); 1077 } 1078 1079 /* 1080 * The devices_kobj holds a reference on the group kobject, so 1081 * as long as that exists so will the group. We can therefore 1082 * use the devices_kobj for reference counting. 1083 */ 1084 kobject_put(&group->kobj); 1085 1086 ret = iommu_group_create_file(group, 1087 &iommu_group_attr_reserved_regions); 1088 if (ret) { 1089 kobject_put(group->devices_kobj); 1090 return ERR_PTR(ret); 1091 } 1092 1093 ret = iommu_group_create_file(group, &iommu_group_attr_type); 1094 if (ret) { 1095 kobject_put(group->devices_kobj); 1096 return ERR_PTR(ret); 1097 } 1098 1099 pr_debug("Allocated group %d\n", group->id); 1100 1101 return group; 1102 } 1103 EXPORT_SYMBOL_GPL(iommu_group_alloc); 1104 1105 /** 1106 * iommu_group_get_iommudata - retrieve iommu_data registered for a group 1107 * @group: the group 1108 * 1109 * iommu drivers can store data in the group for use when doing iommu 1110 * operations. This function provides a way to retrieve it. Caller 1111 * should hold a group reference. 1112 */ 1113 void *iommu_group_get_iommudata(struct iommu_group *group) 1114 { 1115 return group->iommu_data; 1116 } 1117 EXPORT_SYMBOL_GPL(iommu_group_get_iommudata); 1118 1119 /** 1120 * iommu_group_set_iommudata - set iommu_data for a group 1121 * @group: the group 1122 * @iommu_data: new data 1123 * @release: release function for iommu_data 1124 * 1125 * iommu drivers can store data in the group for use when doing iommu 1126 * operations. This function provides a way to set the data after 1127 * the group has been allocated. Caller should hold a group reference. 1128 */ 1129 void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data, 1130 void (*release)(void *iommu_data)) 1131 { 1132 group->iommu_data = iommu_data; 1133 group->iommu_data_release = release; 1134 } 1135 EXPORT_SYMBOL_GPL(iommu_group_set_iommudata); 1136 1137 /** 1138 * iommu_group_set_name - set name for a group 1139 * @group: the group 1140 * @name: name 1141 * 1142 * Allow iommu driver to set a name for a group. When set it will 1143 * appear in a name attribute file under the group in sysfs. 1144 */ 1145 int iommu_group_set_name(struct iommu_group *group, const char *name) 1146 { 1147 int ret; 1148 1149 if (group->name) { 1150 iommu_group_remove_file(group, &iommu_group_attr_name); 1151 kfree(group->name); 1152 group->name = NULL; 1153 if (!name) 1154 return 0; 1155 } 1156 1157 group->name = kstrdup(name, GFP_KERNEL); 1158 if (!group->name) 1159 return -ENOMEM; 1160 1161 ret = iommu_group_create_file(group, &iommu_group_attr_name); 1162 if (ret) { 1163 kfree(group->name); 1164 group->name = NULL; 1165 return ret; 1166 } 1167 1168 return 0; 1169 } 1170 EXPORT_SYMBOL_GPL(iommu_group_set_name); 1171 1172 static int iommu_create_device_direct_mappings(struct iommu_domain *domain, 1173 struct device *dev) 1174 { 1175 struct iommu_resv_region *entry; 1176 struct list_head mappings; 1177 unsigned long pg_size; 1178 int ret = 0; 1179 1180 pg_size = domain->pgsize_bitmap ? 1UL << __ffs(domain->pgsize_bitmap) : 0; 1181 INIT_LIST_HEAD(&mappings); 1182 1183 if (WARN_ON_ONCE(iommu_is_dma_domain(domain) && !pg_size)) 1184 return -EINVAL; 1185 1186 iommu_get_resv_regions(dev, &mappings); 1187 1188 /* We need to consider overlapping regions for different devices */ 1189 list_for_each_entry(entry, &mappings, list) { 1190 dma_addr_t start, end, addr; 1191 size_t map_size = 0; 1192 1193 if (entry->type == IOMMU_RESV_DIRECT) 1194 dev->iommu->require_direct = 1; 1195 1196 if ((entry->type != IOMMU_RESV_DIRECT && 1197 entry->type != IOMMU_RESV_DIRECT_RELAXABLE) || 1198 !iommu_is_dma_domain(domain)) 1199 continue; 1200 1201 start = ALIGN(entry->start, pg_size); 1202 end = ALIGN(entry->start + entry->length, pg_size); 1203 1204 for (addr = start; addr <= end; addr += pg_size) { 1205 phys_addr_t phys_addr; 1206 1207 if (addr == end) 1208 goto map_end; 1209 1210 phys_addr = iommu_iova_to_phys(domain, addr); 1211 if (!phys_addr) { 1212 map_size += pg_size; 1213 continue; 1214 } 1215 1216 map_end: 1217 if (map_size) { 1218 ret = iommu_map(domain, addr - map_size, 1219 addr - map_size, map_size, 1220 entry->prot, GFP_KERNEL); 1221 if (ret) 1222 goto out; 1223 map_size = 0; 1224 } 1225 } 1226 1227 } 1228 out: 1229 iommu_put_resv_regions(dev, &mappings); 1230 1231 return ret; 1232 } 1233 1234 /* This is undone by __iommu_group_free_device() */ 1235 static struct group_device *iommu_group_alloc_device(struct iommu_group *group, 1236 struct device *dev) 1237 { 1238 int ret, i = 0; 1239 struct group_device *device; 1240 1241 device = kzalloc(sizeof(*device), GFP_KERNEL); 1242 if (!device) 1243 return ERR_PTR(-ENOMEM); 1244 1245 device->dev = dev; 1246 1247 ret = sysfs_create_link(&dev->kobj, &group->kobj, "iommu_group"); 1248 if (ret) 1249 goto err_free_device; 1250 1251 device->name = kasprintf(GFP_KERNEL, "%s", kobject_name(&dev->kobj)); 1252 rename: 1253 if (!device->name) { 1254 ret = -ENOMEM; 1255 goto err_remove_link; 1256 } 1257 1258 ret = sysfs_create_link_nowarn(group->devices_kobj, 1259 &dev->kobj, device->name); 1260 if (ret) { 1261 if (ret == -EEXIST && i >= 0) { 1262 /* 1263 * Account for the slim chance of collision 1264 * and append an instance to the name. 1265 */ 1266 kfree(device->name); 1267 device->name = kasprintf(GFP_KERNEL, "%s.%d", 1268 kobject_name(&dev->kobj), i++); 1269 goto rename; 1270 } 1271 goto err_free_name; 1272 } 1273 1274 trace_add_device_to_group(group->id, dev); 1275 1276 dev_info(dev, "Adding to iommu group %d\n", group->id); 1277 1278 return device; 1279 1280 err_free_name: 1281 kfree(device->name); 1282 err_remove_link: 1283 sysfs_remove_link(&dev->kobj, "iommu_group"); 1284 err_free_device: 1285 kfree(device); 1286 dev_err(dev, "Failed to add to iommu group %d: %d\n", group->id, ret); 1287 return ERR_PTR(ret); 1288 } 1289 1290 /** 1291 * iommu_group_add_device - add a device to an iommu group 1292 * @group: the group into which to add the device (reference should be held) 1293 * @dev: the device 1294 * 1295 * This function is called by an iommu driver to add a device into a 1296 * group. Adding a device increments the group reference count. 1297 */ 1298 int iommu_group_add_device(struct iommu_group *group, struct device *dev) 1299 { 1300 struct group_device *gdev; 1301 1302 gdev = iommu_group_alloc_device(group, dev); 1303 if (IS_ERR(gdev)) 1304 return PTR_ERR(gdev); 1305 1306 iommu_group_ref_get(group); 1307 dev->iommu_group = group; 1308 1309 mutex_lock(&group->mutex); 1310 list_add_tail(&gdev->list, &group->devices); 1311 mutex_unlock(&group->mutex); 1312 return 0; 1313 } 1314 EXPORT_SYMBOL_GPL(iommu_group_add_device); 1315 1316 /** 1317 * iommu_group_remove_device - remove a device from it's current group 1318 * @dev: device to be removed 1319 * 1320 * This function is called by an iommu driver to remove the device from 1321 * it's current group. This decrements the iommu group reference count. 1322 */ 1323 void iommu_group_remove_device(struct device *dev) 1324 { 1325 struct iommu_group *group = dev->iommu_group; 1326 1327 if (!group) 1328 return; 1329 1330 dev_info(dev, "Removing from iommu group %d\n", group->id); 1331 1332 __iommu_group_remove_device(dev); 1333 } 1334 EXPORT_SYMBOL_GPL(iommu_group_remove_device); 1335 1336 #if IS_ENABLED(CONFIG_LOCKDEP) && IS_ENABLED(CONFIG_IOMMU_API) 1337 /** 1338 * iommu_group_mutex_assert - Check device group mutex lock 1339 * @dev: the device that has group param set 1340 * 1341 * This function is called by an iommu driver to check whether it holds 1342 * group mutex lock for the given device or not. 1343 * 1344 * Note that this function must be called after device group param is set. 1345 */ 1346 void iommu_group_mutex_assert(struct device *dev) 1347 { 1348 struct iommu_group *group = dev->iommu_group; 1349 1350 lockdep_assert_held(&group->mutex); 1351 } 1352 EXPORT_SYMBOL_GPL(iommu_group_mutex_assert); 1353 #endif 1354 1355 static struct device *iommu_group_first_dev(struct iommu_group *group) 1356 { 1357 lockdep_assert_held(&group->mutex); 1358 return list_first_entry(&group->devices, struct group_device, list)->dev; 1359 } 1360 1361 /** 1362 * iommu_group_for_each_dev - iterate over each device in the group 1363 * @group: the group 1364 * @data: caller opaque data to be passed to callback function 1365 * @fn: caller supplied callback function 1366 * 1367 * This function is called by group users to iterate over group devices. 1368 * Callers should hold a reference count to the group during callback. 1369 * The group->mutex is held across callbacks, which will block calls to 1370 * iommu_group_add/remove_device. 1371 */ 1372 int iommu_group_for_each_dev(struct iommu_group *group, void *data, 1373 int (*fn)(struct device *, void *)) 1374 { 1375 struct group_device *device; 1376 int ret = 0; 1377 1378 mutex_lock(&group->mutex); 1379 for_each_group_device(group, device) { 1380 ret = fn(device->dev, data); 1381 if (ret) 1382 break; 1383 } 1384 mutex_unlock(&group->mutex); 1385 1386 return ret; 1387 } 1388 EXPORT_SYMBOL_GPL(iommu_group_for_each_dev); 1389 1390 /** 1391 * iommu_group_get - Return the group for a device and increment reference 1392 * @dev: get the group that this device belongs to 1393 * 1394 * This function is called by iommu drivers and users to get the group 1395 * for the specified device. If found, the group is returned and the group 1396 * reference in incremented, else NULL. 1397 */ 1398 struct iommu_group *iommu_group_get(struct device *dev) 1399 { 1400 struct iommu_group *group = dev->iommu_group; 1401 1402 if (group) 1403 kobject_get(group->devices_kobj); 1404 1405 return group; 1406 } 1407 EXPORT_SYMBOL_GPL(iommu_group_get); 1408 1409 /** 1410 * iommu_group_ref_get - Increment reference on a group 1411 * @group: the group to use, must not be NULL 1412 * 1413 * This function is called by iommu drivers to take additional references on an 1414 * existing group. Returns the given group for convenience. 1415 */ 1416 struct iommu_group *iommu_group_ref_get(struct iommu_group *group) 1417 { 1418 kobject_get(group->devices_kobj); 1419 return group; 1420 } 1421 EXPORT_SYMBOL_GPL(iommu_group_ref_get); 1422 1423 /** 1424 * iommu_group_put - Decrement group reference 1425 * @group: the group to use 1426 * 1427 * This function is called by iommu drivers and users to release the 1428 * iommu group. Once the reference count is zero, the group is released. 1429 */ 1430 void iommu_group_put(struct iommu_group *group) 1431 { 1432 if (group) 1433 kobject_put(group->devices_kobj); 1434 } 1435 EXPORT_SYMBOL_GPL(iommu_group_put); 1436 1437 /** 1438 * iommu_group_id - Return ID for a group 1439 * @group: the group to ID 1440 * 1441 * Return the unique ID for the group matching the sysfs group number. 1442 */ 1443 int iommu_group_id(struct iommu_group *group) 1444 { 1445 return group->id; 1446 } 1447 EXPORT_SYMBOL_GPL(iommu_group_id); 1448 1449 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, 1450 unsigned long *devfns); 1451 1452 /* 1453 * To consider a PCI device isolated, we require ACS to support Source 1454 * Validation, Request Redirection, Completer Redirection, and Upstream 1455 * Forwarding. This effectively means that devices cannot spoof their 1456 * requester ID, requests and completions cannot be redirected, and all 1457 * transactions are forwarded upstream, even as it passes through a 1458 * bridge where the target device is downstream. 1459 */ 1460 #define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) 1461 1462 /* 1463 * For multifunction devices which are not isolated from each other, find 1464 * all the other non-isolated functions and look for existing groups. For 1465 * each function, we also need to look for aliases to or from other devices 1466 * that may already have a group. 1467 */ 1468 static struct iommu_group *get_pci_function_alias_group(struct pci_dev *pdev, 1469 unsigned long *devfns) 1470 { 1471 struct pci_dev *tmp = NULL; 1472 struct iommu_group *group; 1473 1474 if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS)) 1475 return NULL; 1476 1477 for_each_pci_dev(tmp) { 1478 if (tmp == pdev || tmp->bus != pdev->bus || 1479 PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) || 1480 pci_acs_enabled(tmp, REQ_ACS_FLAGS)) 1481 continue; 1482 1483 group = get_pci_alias_group(tmp, devfns); 1484 if (group) { 1485 pci_dev_put(tmp); 1486 return group; 1487 } 1488 } 1489 1490 return NULL; 1491 } 1492 1493 /* 1494 * Look for aliases to or from the given device for existing groups. DMA 1495 * aliases are only supported on the same bus, therefore the search 1496 * space is quite small (especially since we're really only looking at pcie 1497 * device, and therefore only expect multiple slots on the root complex or 1498 * downstream switch ports). It's conceivable though that a pair of 1499 * multifunction devices could have aliases between them that would cause a 1500 * loop. To prevent this, we use a bitmap to track where we've been. 1501 */ 1502 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, 1503 unsigned long *devfns) 1504 { 1505 struct pci_dev *tmp = NULL; 1506 struct iommu_group *group; 1507 1508 if (test_and_set_bit(pdev->devfn & 0xff, devfns)) 1509 return NULL; 1510 1511 group = iommu_group_get(&pdev->dev); 1512 if (group) 1513 return group; 1514 1515 for_each_pci_dev(tmp) { 1516 if (tmp == pdev || tmp->bus != pdev->bus) 1517 continue; 1518 1519 /* We alias them or they alias us */ 1520 if (pci_devs_are_dma_aliases(pdev, tmp)) { 1521 group = get_pci_alias_group(tmp, devfns); 1522 if (group) { 1523 pci_dev_put(tmp); 1524 return group; 1525 } 1526 1527 group = get_pci_function_alias_group(tmp, devfns); 1528 if (group) { 1529 pci_dev_put(tmp); 1530 return group; 1531 } 1532 } 1533 } 1534 1535 return NULL; 1536 } 1537 1538 struct group_for_pci_data { 1539 struct pci_dev *pdev; 1540 struct iommu_group *group; 1541 }; 1542 1543 /* 1544 * DMA alias iterator callback, return the last seen device. Stop and return 1545 * the IOMMU group if we find one along the way. 1546 */ 1547 static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque) 1548 { 1549 struct group_for_pci_data *data = opaque; 1550 1551 data->pdev = pdev; 1552 data->group = iommu_group_get(&pdev->dev); 1553 1554 return data->group != NULL; 1555 } 1556 1557 /* 1558 * Generic device_group call-back function. It just allocates one 1559 * iommu-group per device. 1560 */ 1561 struct iommu_group *generic_device_group(struct device *dev) 1562 { 1563 return iommu_group_alloc(); 1564 } 1565 EXPORT_SYMBOL_GPL(generic_device_group); 1566 1567 /* 1568 * Generic device_group call-back function. It just allocates one 1569 * iommu-group per iommu driver instance shared by every device 1570 * probed by that iommu driver. 1571 */ 1572 struct iommu_group *generic_single_device_group(struct device *dev) 1573 { 1574 struct iommu_device *iommu = dev->iommu->iommu_dev; 1575 1576 if (!iommu->singleton_group) { 1577 struct iommu_group *group; 1578 1579 group = iommu_group_alloc(); 1580 if (IS_ERR(group)) 1581 return group; 1582 iommu->singleton_group = group; 1583 } 1584 return iommu_group_ref_get(iommu->singleton_group); 1585 } 1586 EXPORT_SYMBOL_GPL(generic_single_device_group); 1587 1588 /* 1589 * Use standard PCI bus topology, isolation features, and DMA alias quirks 1590 * to find or create an IOMMU group for a device. 1591 */ 1592 struct iommu_group *pci_device_group(struct device *dev) 1593 { 1594 struct pci_dev *pdev = to_pci_dev(dev); 1595 struct group_for_pci_data data; 1596 struct pci_bus *bus; 1597 struct iommu_group *group = NULL; 1598 u64 devfns[4] = { 0 }; 1599 1600 if (WARN_ON(!dev_is_pci(dev))) 1601 return ERR_PTR(-EINVAL); 1602 1603 /* 1604 * Find the upstream DMA alias for the device. A device must not 1605 * be aliased due to topology in order to have its own IOMMU group. 1606 * If we find an alias along the way that already belongs to a 1607 * group, use it. 1608 */ 1609 if (pci_for_each_dma_alias(pdev, get_pci_alias_or_group, &data)) 1610 return data.group; 1611 1612 pdev = data.pdev; 1613 1614 /* 1615 * Continue upstream from the point of minimum IOMMU granularity 1616 * due to aliases to the point where devices are protected from 1617 * peer-to-peer DMA by PCI ACS. Again, if we find an existing 1618 * group, use it. 1619 */ 1620 for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) { 1621 if (!bus->self) 1622 continue; 1623 1624 if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) 1625 break; 1626 1627 pdev = bus->self; 1628 1629 group = iommu_group_get(&pdev->dev); 1630 if (group) 1631 return group; 1632 } 1633 1634 /* 1635 * Look for existing groups on device aliases. If we alias another 1636 * device or another device aliases us, use the same group. 1637 */ 1638 group = get_pci_alias_group(pdev, (unsigned long *)devfns); 1639 if (group) 1640 return group; 1641 1642 /* 1643 * Look for existing groups on non-isolated functions on the same 1644 * slot and aliases of those funcions, if any. No need to clear 1645 * the search bitmap, the tested devfns are still valid. 1646 */ 1647 group = get_pci_function_alias_group(pdev, (unsigned long *)devfns); 1648 if (group) 1649 return group; 1650 1651 /* No shared group found, allocate new */ 1652 return iommu_group_alloc(); 1653 } 1654 EXPORT_SYMBOL_GPL(pci_device_group); 1655 1656 /* Get the IOMMU group for device on fsl-mc bus */ 1657 struct iommu_group *fsl_mc_device_group(struct device *dev) 1658 { 1659 struct device *cont_dev = fsl_mc_cont_dev(dev); 1660 struct iommu_group *group; 1661 1662 group = iommu_group_get(cont_dev); 1663 if (!group) 1664 group = iommu_group_alloc(); 1665 return group; 1666 } 1667 EXPORT_SYMBOL_GPL(fsl_mc_device_group); 1668 1669 static struct iommu_domain *__iommu_alloc_identity_domain(struct device *dev) 1670 { 1671 const struct iommu_ops *ops = dev_iommu_ops(dev); 1672 struct iommu_domain *domain; 1673 1674 if (ops->identity_domain) 1675 return ops->identity_domain; 1676 1677 if (ops->domain_alloc_identity) { 1678 domain = ops->domain_alloc_identity(dev); 1679 if (IS_ERR(domain)) 1680 return domain; 1681 } else { 1682 return ERR_PTR(-EOPNOTSUPP); 1683 } 1684 1685 iommu_domain_init(domain, IOMMU_DOMAIN_IDENTITY, ops); 1686 return domain; 1687 } 1688 1689 static struct iommu_domain * 1690 __iommu_group_alloc_default_domain(struct iommu_group *group, int req_type) 1691 { 1692 struct device *dev = iommu_group_first_dev(group); 1693 struct iommu_domain *dom; 1694 1695 if (group->default_domain && group->default_domain->type == req_type) 1696 return group->default_domain; 1697 1698 /* 1699 * When allocating the DMA API domain assume that the driver is going to 1700 * use PASID and make sure the RID's domain is PASID compatible. 1701 */ 1702 if (req_type & __IOMMU_DOMAIN_PAGING) { 1703 dom = __iommu_paging_domain_alloc_flags(dev, req_type, 1704 dev->iommu->max_pasids ? IOMMU_HWPT_ALLOC_PASID : 0); 1705 1706 /* 1707 * If driver does not support PASID feature then 1708 * try to allocate non-PASID domain 1709 */ 1710 if (PTR_ERR(dom) == -EOPNOTSUPP) 1711 dom = __iommu_paging_domain_alloc_flags(dev, req_type, 0); 1712 1713 return dom; 1714 } 1715 1716 if (req_type == IOMMU_DOMAIN_IDENTITY) 1717 return __iommu_alloc_identity_domain(dev); 1718 1719 return ERR_PTR(-EINVAL); 1720 } 1721 1722 /* 1723 * req_type of 0 means "auto" which means to select a domain based on 1724 * iommu_def_domain_type or what the driver actually supports. 1725 */ 1726 static struct iommu_domain * 1727 iommu_group_alloc_default_domain(struct iommu_group *group, int req_type) 1728 { 1729 const struct iommu_ops *ops = dev_iommu_ops(iommu_group_first_dev(group)); 1730 struct iommu_domain *dom; 1731 1732 lockdep_assert_held(&group->mutex); 1733 1734 /* 1735 * Allow legacy drivers to specify the domain that will be the default 1736 * domain. This should always be either an IDENTITY/BLOCKED/PLATFORM 1737 * domain. Do not use in new drivers. 1738 */ 1739 if (ops->default_domain) { 1740 if (req_type != ops->default_domain->type) 1741 return ERR_PTR(-EINVAL); 1742 return ops->default_domain; 1743 } 1744 1745 if (req_type) 1746 return __iommu_group_alloc_default_domain(group, req_type); 1747 1748 /* The driver gave no guidance on what type to use, try the default */ 1749 dom = __iommu_group_alloc_default_domain(group, iommu_def_domain_type); 1750 if (!IS_ERR(dom)) 1751 return dom; 1752 1753 /* Otherwise IDENTITY and DMA_FQ defaults will try DMA */ 1754 if (iommu_def_domain_type == IOMMU_DOMAIN_DMA) 1755 return ERR_PTR(-EINVAL); 1756 dom = __iommu_group_alloc_default_domain(group, IOMMU_DOMAIN_DMA); 1757 if (IS_ERR(dom)) 1758 return dom; 1759 1760 pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA", 1761 iommu_def_domain_type, group->name); 1762 return dom; 1763 } 1764 1765 struct iommu_domain *iommu_group_default_domain(struct iommu_group *group) 1766 { 1767 return group->default_domain; 1768 } 1769 1770 static int probe_iommu_group(struct device *dev, void *data) 1771 { 1772 struct list_head *group_list = data; 1773 int ret; 1774 1775 mutex_lock(&iommu_probe_device_lock); 1776 ret = __iommu_probe_device(dev, group_list); 1777 mutex_unlock(&iommu_probe_device_lock); 1778 if (ret == -ENODEV) 1779 ret = 0; 1780 1781 return ret; 1782 } 1783 1784 static int iommu_bus_notifier(struct notifier_block *nb, 1785 unsigned long action, void *data) 1786 { 1787 struct device *dev = data; 1788 1789 if (action == BUS_NOTIFY_ADD_DEVICE) { 1790 int ret; 1791 1792 ret = iommu_probe_device(dev); 1793 return (ret) ? NOTIFY_DONE : NOTIFY_OK; 1794 } else if (action == BUS_NOTIFY_REMOVED_DEVICE) { 1795 iommu_release_device(dev); 1796 return NOTIFY_OK; 1797 } 1798 1799 return 0; 1800 } 1801 1802 /* 1803 * Combine the driver's chosen def_domain_type across all the devices in a 1804 * group. Drivers must give a consistent result. 1805 */ 1806 static int iommu_get_def_domain_type(struct iommu_group *group, 1807 struct device *dev, int cur_type) 1808 { 1809 const struct iommu_ops *ops = dev_iommu_ops(dev); 1810 int type; 1811 1812 if (ops->default_domain) { 1813 /* 1814 * Drivers that declare a global static default_domain will 1815 * always choose that. 1816 */ 1817 type = ops->default_domain->type; 1818 } else { 1819 if (ops->def_domain_type) 1820 type = ops->def_domain_type(dev); 1821 else 1822 return cur_type; 1823 } 1824 if (!type || cur_type == type) 1825 return cur_type; 1826 if (!cur_type) 1827 return type; 1828 1829 dev_err_ratelimited( 1830 dev, 1831 "IOMMU driver error, requesting conflicting def_domain_type, %s and %s, for devices in group %u.\n", 1832 iommu_domain_type_str(cur_type), iommu_domain_type_str(type), 1833 group->id); 1834 1835 /* 1836 * Try to recover, drivers are allowed to force IDENTITY or DMA, IDENTITY 1837 * takes precedence. 1838 */ 1839 if (type == IOMMU_DOMAIN_IDENTITY) 1840 return type; 1841 return cur_type; 1842 } 1843 1844 /* 1845 * A target_type of 0 will select the best domain type. 0 can be returned in 1846 * this case meaning the global default should be used. 1847 */ 1848 static int iommu_get_default_domain_type(struct iommu_group *group, 1849 int target_type) 1850 { 1851 struct device *untrusted = NULL; 1852 struct group_device *gdev; 1853 int driver_type = 0; 1854 1855 lockdep_assert_held(&group->mutex); 1856 1857 /* 1858 * ARM32 drivers supporting CONFIG_ARM_DMA_USE_IOMMU can declare an 1859 * identity_domain and it will automatically become their default 1860 * domain. Later on ARM_DMA_USE_IOMMU will install its UNMANAGED domain. 1861 * Override the selection to IDENTITY. 1862 */ 1863 if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) { 1864 static_assert(!(IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) && 1865 IS_ENABLED(CONFIG_IOMMU_DMA))); 1866 driver_type = IOMMU_DOMAIN_IDENTITY; 1867 } 1868 1869 for_each_group_device(group, gdev) { 1870 driver_type = iommu_get_def_domain_type(group, gdev->dev, 1871 driver_type); 1872 1873 if (dev_is_pci(gdev->dev) && to_pci_dev(gdev->dev)->untrusted) { 1874 /* 1875 * No ARM32 using systems will set untrusted, it cannot 1876 * work. 1877 */ 1878 if (WARN_ON(IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU))) 1879 return -1; 1880 untrusted = gdev->dev; 1881 } 1882 } 1883 1884 /* 1885 * If the common dma ops are not selected in kconfig then we cannot use 1886 * IOMMU_DOMAIN_DMA at all. Force IDENTITY if nothing else has been 1887 * selected. 1888 */ 1889 if (!IS_ENABLED(CONFIG_IOMMU_DMA)) { 1890 if (WARN_ON(driver_type == IOMMU_DOMAIN_DMA)) 1891 return -1; 1892 if (!driver_type) 1893 driver_type = IOMMU_DOMAIN_IDENTITY; 1894 } 1895 1896 if (untrusted) { 1897 if (driver_type && driver_type != IOMMU_DOMAIN_DMA) { 1898 dev_err_ratelimited( 1899 untrusted, 1900 "Device is not trusted, but driver is overriding group %u to %s, refusing to probe.\n", 1901 group->id, iommu_domain_type_str(driver_type)); 1902 return -1; 1903 } 1904 driver_type = IOMMU_DOMAIN_DMA; 1905 } 1906 1907 if (target_type) { 1908 if (driver_type && target_type != driver_type) 1909 return -1; 1910 return target_type; 1911 } 1912 return driver_type; 1913 } 1914 1915 static void iommu_group_do_probe_finalize(struct device *dev) 1916 { 1917 const struct iommu_ops *ops = dev_iommu_ops(dev); 1918 1919 if (ops->probe_finalize) 1920 ops->probe_finalize(dev); 1921 } 1922 1923 static int bus_iommu_probe(const struct bus_type *bus) 1924 { 1925 struct iommu_group *group, *next; 1926 LIST_HEAD(group_list); 1927 int ret; 1928 1929 ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group); 1930 if (ret) 1931 return ret; 1932 1933 list_for_each_entry_safe(group, next, &group_list, entry) { 1934 struct group_device *gdev; 1935 1936 mutex_lock(&group->mutex); 1937 1938 /* Remove item from the list */ 1939 list_del_init(&group->entry); 1940 1941 /* 1942 * We go to the trouble of deferred default domain creation so 1943 * that the cross-group default domain type and the setup of the 1944 * IOMMU_RESV_DIRECT will work correctly in non-hotpug scenarios. 1945 */ 1946 ret = iommu_setup_default_domain(group, 0); 1947 if (ret) { 1948 mutex_unlock(&group->mutex); 1949 return ret; 1950 } 1951 for_each_group_device(group, gdev) 1952 iommu_setup_dma_ops(gdev->dev); 1953 mutex_unlock(&group->mutex); 1954 1955 /* 1956 * FIXME: Mis-locked because the ops->probe_finalize() call-back 1957 * of some IOMMU drivers calls arm_iommu_attach_device() which 1958 * in-turn might call back into IOMMU core code, where it tries 1959 * to take group->mutex, resulting in a deadlock. 1960 */ 1961 for_each_group_device(group, gdev) 1962 iommu_group_do_probe_finalize(gdev->dev); 1963 } 1964 1965 return 0; 1966 } 1967 1968 /** 1969 * device_iommu_capable() - check for a general IOMMU capability 1970 * @dev: device to which the capability would be relevant, if available 1971 * @cap: IOMMU capability 1972 * 1973 * Return: true if an IOMMU is present and supports the given capability 1974 * for the given device, otherwise false. 1975 */ 1976 bool device_iommu_capable(struct device *dev, enum iommu_cap cap) 1977 { 1978 const struct iommu_ops *ops; 1979 1980 if (!dev_has_iommu(dev)) 1981 return false; 1982 1983 ops = dev_iommu_ops(dev); 1984 if (!ops->capable) 1985 return false; 1986 1987 return ops->capable(dev, cap); 1988 } 1989 EXPORT_SYMBOL_GPL(device_iommu_capable); 1990 1991 /** 1992 * iommu_group_has_isolated_msi() - Compute msi_device_has_isolated_msi() 1993 * for a group 1994 * @group: Group to query 1995 * 1996 * IOMMU groups should not have differing values of 1997 * msi_device_has_isolated_msi() for devices in a group. However nothing 1998 * directly prevents this, so ensure mistakes don't result in isolation failures 1999 * by checking that all the devices are the same. 2000 */ 2001 bool iommu_group_has_isolated_msi(struct iommu_group *group) 2002 { 2003 struct group_device *group_dev; 2004 bool ret = true; 2005 2006 mutex_lock(&group->mutex); 2007 for_each_group_device(group, group_dev) 2008 ret &= msi_device_has_isolated_msi(group_dev->dev); 2009 mutex_unlock(&group->mutex); 2010 return ret; 2011 } 2012 EXPORT_SYMBOL_GPL(iommu_group_has_isolated_msi); 2013 2014 /** 2015 * iommu_set_fault_handler() - set a fault handler for an iommu domain 2016 * @domain: iommu domain 2017 * @handler: fault handler 2018 * @token: user data, will be passed back to the fault handler 2019 * 2020 * This function should be used by IOMMU users which want to be notified 2021 * whenever an IOMMU fault happens. 2022 * 2023 * The fault handler itself should return 0 on success, and an appropriate 2024 * error code otherwise. 2025 */ 2026 void iommu_set_fault_handler(struct iommu_domain *domain, 2027 iommu_fault_handler_t handler, 2028 void *token) 2029 { 2030 if (WARN_ON(!domain || domain->cookie_type != IOMMU_COOKIE_NONE)) 2031 return; 2032 2033 domain->cookie_type = IOMMU_COOKIE_FAULT_HANDLER; 2034 domain->handler = handler; 2035 domain->handler_token = token; 2036 } 2037 EXPORT_SYMBOL_GPL(iommu_set_fault_handler); 2038 2039 static void iommu_domain_init(struct iommu_domain *domain, unsigned int type, 2040 const struct iommu_ops *ops) 2041 { 2042 domain->type = type; 2043 domain->owner = ops; 2044 if (!domain->ops) 2045 domain->ops = ops->default_domain_ops; 2046 } 2047 2048 static struct iommu_domain * 2049 __iommu_paging_domain_alloc_flags(struct device *dev, unsigned int type, 2050 unsigned int flags) 2051 { 2052 const struct iommu_ops *ops; 2053 struct iommu_domain *domain; 2054 2055 if (!dev_has_iommu(dev)) 2056 return ERR_PTR(-ENODEV); 2057 2058 ops = dev_iommu_ops(dev); 2059 2060 if (ops->domain_alloc_paging && !flags) 2061 domain = ops->domain_alloc_paging(dev); 2062 else if (ops->domain_alloc_paging_flags) 2063 domain = ops->domain_alloc_paging_flags(dev, flags, NULL); 2064 #if IS_ENABLED(CONFIG_FSL_PAMU) 2065 else if (ops->domain_alloc && !flags) 2066 domain = ops->domain_alloc(IOMMU_DOMAIN_UNMANAGED); 2067 #endif 2068 else 2069 return ERR_PTR(-EOPNOTSUPP); 2070 2071 if (IS_ERR(domain)) 2072 return domain; 2073 if (!domain) 2074 return ERR_PTR(-ENOMEM); 2075 2076 iommu_domain_init(domain, type, ops); 2077 return domain; 2078 } 2079 2080 /** 2081 * iommu_paging_domain_alloc_flags() - Allocate a paging domain 2082 * @dev: device for which the domain is allocated 2083 * @flags: Bitmap of iommufd_hwpt_alloc_flags 2084 * 2085 * Allocate a paging domain which will be managed by a kernel driver. Return 2086 * allocated domain if successful, or an ERR pointer for failure. 2087 */ 2088 struct iommu_domain *iommu_paging_domain_alloc_flags(struct device *dev, 2089 unsigned int flags) 2090 { 2091 return __iommu_paging_domain_alloc_flags(dev, 2092 IOMMU_DOMAIN_UNMANAGED, flags); 2093 } 2094 EXPORT_SYMBOL_GPL(iommu_paging_domain_alloc_flags); 2095 2096 void iommu_domain_free(struct iommu_domain *domain) 2097 { 2098 switch (domain->cookie_type) { 2099 case IOMMU_COOKIE_DMA_IOVA: 2100 iommu_put_dma_cookie(domain); 2101 break; 2102 case IOMMU_COOKIE_DMA_MSI: 2103 iommu_put_msi_cookie(domain); 2104 break; 2105 case IOMMU_COOKIE_SVA: 2106 mmdrop(domain->mm); 2107 break; 2108 default: 2109 break; 2110 } 2111 if (domain->ops->free) 2112 domain->ops->free(domain); 2113 } 2114 EXPORT_SYMBOL_GPL(iommu_domain_free); 2115 2116 /* 2117 * Put the group's domain back to the appropriate core-owned domain - either the 2118 * standard kernel-mode DMA configuration or an all-DMA-blocked domain. 2119 */ 2120 static void __iommu_group_set_core_domain(struct iommu_group *group) 2121 { 2122 struct iommu_domain *new_domain; 2123 2124 if (group->owner) 2125 new_domain = group->blocking_domain; 2126 else 2127 new_domain = group->default_domain; 2128 2129 __iommu_group_set_domain_nofail(group, new_domain); 2130 } 2131 2132 static int __iommu_attach_device(struct iommu_domain *domain, 2133 struct device *dev, struct iommu_domain *old) 2134 { 2135 int ret; 2136 2137 if (unlikely(domain->ops->attach_dev == NULL)) 2138 return -ENODEV; 2139 2140 ret = domain->ops->attach_dev(domain, dev, old); 2141 if (ret) 2142 return ret; 2143 dev->iommu->attach_deferred = 0; 2144 trace_attach_device_to_domain(dev); 2145 return 0; 2146 } 2147 2148 /** 2149 * iommu_attach_device - Attach an IOMMU domain to a device 2150 * @domain: IOMMU domain to attach 2151 * @dev: Device that will be attached 2152 * 2153 * Returns 0 on success and error code on failure 2154 * 2155 * Note that EINVAL can be treated as a soft failure, indicating 2156 * that certain configuration of the domain is incompatible with 2157 * the device. In this case attaching a different domain to the 2158 * device may succeed. 2159 */ 2160 int iommu_attach_device(struct iommu_domain *domain, struct device *dev) 2161 { 2162 /* Caller must be a probed driver on dev */ 2163 struct iommu_group *group = dev->iommu_group; 2164 int ret; 2165 2166 if (!group) 2167 return -ENODEV; 2168 2169 /* 2170 * Lock the group to make sure the device-count doesn't 2171 * change while we are attaching 2172 */ 2173 mutex_lock(&group->mutex); 2174 ret = -EINVAL; 2175 if (list_count_nodes(&group->devices) != 1) 2176 goto out_unlock; 2177 2178 ret = __iommu_attach_group(domain, group); 2179 2180 out_unlock: 2181 mutex_unlock(&group->mutex); 2182 return ret; 2183 } 2184 EXPORT_SYMBOL_GPL(iommu_attach_device); 2185 2186 int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain) 2187 { 2188 if (dev->iommu && dev->iommu->attach_deferred) 2189 return __iommu_attach_device(domain, dev, NULL); 2190 2191 return 0; 2192 } 2193 2194 void iommu_detach_device(struct iommu_domain *domain, struct device *dev) 2195 { 2196 /* Caller must be a probed driver on dev */ 2197 struct iommu_group *group = dev->iommu_group; 2198 2199 if (!group) 2200 return; 2201 2202 mutex_lock(&group->mutex); 2203 if (WARN_ON(domain != group->domain) || 2204 WARN_ON(list_count_nodes(&group->devices) != 1)) 2205 goto out_unlock; 2206 __iommu_group_set_core_domain(group); 2207 2208 out_unlock: 2209 mutex_unlock(&group->mutex); 2210 } 2211 EXPORT_SYMBOL_GPL(iommu_detach_device); 2212 2213 struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) 2214 { 2215 /* Caller must be a probed driver on dev */ 2216 struct iommu_group *group = dev->iommu_group; 2217 2218 if (!group) 2219 return NULL; 2220 2221 return group->domain; 2222 } 2223 EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev); 2224 2225 /* 2226 * For IOMMU_DOMAIN_DMA implementations which already provide their own 2227 * guarantees that the group and its default domain are valid and correct. 2228 */ 2229 struct iommu_domain *iommu_get_dma_domain(struct device *dev) 2230 { 2231 return dev->iommu_group->default_domain; 2232 } 2233 2234 static void *iommu_make_pasid_array_entry(struct iommu_domain *domain, 2235 struct iommu_attach_handle *handle) 2236 { 2237 if (handle) { 2238 handle->domain = domain; 2239 return xa_tag_pointer(handle, IOMMU_PASID_ARRAY_HANDLE); 2240 } 2241 2242 return xa_tag_pointer(domain, IOMMU_PASID_ARRAY_DOMAIN); 2243 } 2244 2245 static bool domain_iommu_ops_compatible(const struct iommu_ops *ops, 2246 struct iommu_domain *domain) 2247 { 2248 if (domain->owner == ops) 2249 return true; 2250 2251 /* For static domains, owner isn't set. */ 2252 if (domain == ops->blocked_domain || domain == ops->identity_domain) 2253 return true; 2254 2255 return false; 2256 } 2257 2258 static int __iommu_attach_group(struct iommu_domain *domain, 2259 struct iommu_group *group) 2260 { 2261 struct device *dev; 2262 2263 if (group->domain && group->domain != group->default_domain && 2264 group->domain != group->blocking_domain) 2265 return -EBUSY; 2266 2267 dev = iommu_group_first_dev(group); 2268 if (!dev_has_iommu(dev) || 2269 !domain_iommu_ops_compatible(dev_iommu_ops(dev), domain)) 2270 return -EINVAL; 2271 2272 return __iommu_group_set_domain(group, domain); 2273 } 2274 2275 /** 2276 * iommu_attach_group - Attach an IOMMU domain to an IOMMU group 2277 * @domain: IOMMU domain to attach 2278 * @group: IOMMU group that will be attached 2279 * 2280 * Returns 0 on success and error code on failure 2281 * 2282 * Note that EINVAL can be treated as a soft failure, indicating 2283 * that certain configuration of the domain is incompatible with 2284 * the group. In this case attaching a different domain to the 2285 * group may succeed. 2286 */ 2287 int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) 2288 { 2289 int ret; 2290 2291 mutex_lock(&group->mutex); 2292 ret = __iommu_attach_group(domain, group); 2293 mutex_unlock(&group->mutex); 2294 2295 return ret; 2296 } 2297 EXPORT_SYMBOL_GPL(iommu_attach_group); 2298 2299 static int __iommu_device_set_domain(struct iommu_group *group, 2300 struct device *dev, 2301 struct iommu_domain *new_domain, 2302 struct iommu_domain *old_domain, 2303 unsigned int flags) 2304 { 2305 int ret; 2306 2307 /* 2308 * If the device requires IOMMU_RESV_DIRECT then we cannot allow 2309 * the blocking domain to be attached as it does not contain the 2310 * required 1:1 mapping. This test effectively excludes the device 2311 * being used with iommu_group_claim_dma_owner() which will block 2312 * vfio and iommufd as well. 2313 */ 2314 if (dev->iommu->require_direct && 2315 (new_domain->type == IOMMU_DOMAIN_BLOCKED || 2316 new_domain == group->blocking_domain)) { 2317 dev_warn(dev, 2318 "Firmware has requested this device have a 1:1 IOMMU mapping, rejecting configuring the device without a 1:1 mapping. Contact your platform vendor.\n"); 2319 return -EINVAL; 2320 } 2321 2322 if (dev->iommu->attach_deferred) { 2323 if (new_domain == group->default_domain) 2324 return 0; 2325 dev->iommu->attach_deferred = 0; 2326 } 2327 2328 ret = __iommu_attach_device(new_domain, dev, old_domain); 2329 if (ret) { 2330 /* 2331 * If we have a blocking domain then try to attach that in hopes 2332 * of avoiding a UAF. Modern drivers should implement blocking 2333 * domains as global statics that cannot fail. 2334 */ 2335 if ((flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) && 2336 group->blocking_domain && 2337 group->blocking_domain != new_domain) 2338 __iommu_attach_device(group->blocking_domain, dev, 2339 old_domain); 2340 return ret; 2341 } 2342 return 0; 2343 } 2344 2345 /* 2346 * If 0 is returned the group's domain is new_domain. If an error is returned 2347 * then the group's domain will be set back to the existing domain unless 2348 * IOMMU_SET_DOMAIN_MUST_SUCCEED, otherwise an error is returned and the group's 2349 * domains is left inconsistent. This is a driver bug to fail attach with a 2350 * previously good domain. We try to avoid a kernel UAF because of this. 2351 * 2352 * IOMMU groups are really the natural working unit of the IOMMU, but the IOMMU 2353 * API works on domains and devices. Bridge that gap by iterating over the 2354 * devices in a group. Ideally we'd have a single device which represents the 2355 * requestor ID of the group, but we also allow IOMMU drivers to create policy 2356 * defined minimum sets, where the physical hardware may be able to distiguish 2357 * members, but we wish to group them at a higher level (ex. untrusted 2358 * multi-function PCI devices). Thus we attach each device. 2359 */ 2360 static int __iommu_group_set_domain_internal(struct iommu_group *group, 2361 struct iommu_domain *new_domain, 2362 unsigned int flags) 2363 { 2364 struct group_device *last_gdev; 2365 struct group_device *gdev; 2366 int result; 2367 int ret; 2368 2369 lockdep_assert_held(&group->mutex); 2370 2371 if (group->domain == new_domain) 2372 return 0; 2373 2374 if (WARN_ON(!new_domain)) 2375 return -EINVAL; 2376 2377 /* 2378 * Changing the domain is done by calling attach_dev() on the new 2379 * domain. This switch does not have to be atomic and DMA can be 2380 * discarded during the transition. DMA must only be able to access 2381 * either new_domain or group->domain, never something else. 2382 */ 2383 result = 0; 2384 for_each_group_device(group, gdev) { 2385 ret = __iommu_device_set_domain(group, gdev->dev, new_domain, 2386 group->domain, flags); 2387 if (ret) { 2388 result = ret; 2389 /* 2390 * Keep trying the other devices in the group. If a 2391 * driver fails attach to an otherwise good domain, and 2392 * does not support blocking domains, it should at least 2393 * drop its reference on the current domain so we don't 2394 * UAF. 2395 */ 2396 if (flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) 2397 continue; 2398 goto err_revert; 2399 } 2400 } 2401 group->domain = new_domain; 2402 return result; 2403 2404 err_revert: 2405 /* 2406 * This is called in error unwind paths. A well behaved driver should 2407 * always allow us to attach to a domain that was already attached. 2408 */ 2409 last_gdev = gdev; 2410 for_each_group_device(group, gdev) { 2411 /* No need to revert the last gdev that failed to set domain */ 2412 if (gdev == last_gdev) 2413 break; 2414 /* 2415 * A NULL domain can happen only for first probe, in which case 2416 * we leave group->domain as NULL and let release clean 2417 * everything up. 2418 */ 2419 if (group->domain) 2420 WARN_ON(__iommu_device_set_domain( 2421 group, gdev->dev, group->domain, new_domain, 2422 IOMMU_SET_DOMAIN_MUST_SUCCEED)); 2423 } 2424 return ret; 2425 } 2426 2427 void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group) 2428 { 2429 mutex_lock(&group->mutex); 2430 __iommu_group_set_core_domain(group); 2431 mutex_unlock(&group->mutex); 2432 } 2433 EXPORT_SYMBOL_GPL(iommu_detach_group); 2434 2435 phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) 2436 { 2437 if (domain->type == IOMMU_DOMAIN_IDENTITY) 2438 return iova; 2439 2440 if (domain->type == IOMMU_DOMAIN_BLOCKED) 2441 return 0; 2442 2443 return domain->ops->iova_to_phys(domain, iova); 2444 } 2445 EXPORT_SYMBOL_GPL(iommu_iova_to_phys); 2446 2447 static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, 2448 phys_addr_t paddr, size_t size, size_t *count) 2449 { 2450 unsigned int pgsize_idx, pgsize_idx_next; 2451 unsigned long pgsizes; 2452 size_t offset, pgsize, pgsize_next; 2453 size_t offset_end; 2454 unsigned long addr_merge = paddr | iova; 2455 2456 /* Page sizes supported by the hardware and small enough for @size */ 2457 pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0); 2458 2459 /* Constrain the page sizes further based on the maximum alignment */ 2460 if (likely(addr_merge)) 2461 pgsizes &= GENMASK(__ffs(addr_merge), 0); 2462 2463 /* Make sure we have at least one suitable page size */ 2464 BUG_ON(!pgsizes); 2465 2466 /* Pick the biggest page size remaining */ 2467 pgsize_idx = __fls(pgsizes); 2468 pgsize = BIT(pgsize_idx); 2469 if (!count) 2470 return pgsize; 2471 2472 /* Find the next biggest support page size, if it exists */ 2473 pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0); 2474 if (!pgsizes) 2475 goto out_set_count; 2476 2477 pgsize_idx_next = __ffs(pgsizes); 2478 pgsize_next = BIT(pgsize_idx_next); 2479 2480 /* 2481 * There's no point trying a bigger page size unless the virtual 2482 * and physical addresses are similarly offset within the larger page. 2483 */ 2484 if ((iova ^ paddr) & (pgsize_next - 1)) 2485 goto out_set_count; 2486 2487 /* Calculate the offset to the next page size alignment boundary */ 2488 offset = pgsize_next - (addr_merge & (pgsize_next - 1)); 2489 2490 /* 2491 * If size is big enough to accommodate the larger page, reduce 2492 * the number of smaller pages. 2493 */ 2494 if (!check_add_overflow(offset, pgsize_next, &offset_end) && 2495 offset_end <= size) 2496 size = offset; 2497 2498 out_set_count: 2499 *count = size >> pgsize_idx; 2500 return pgsize; 2501 } 2502 2503 int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova, 2504 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 2505 { 2506 const struct iommu_domain_ops *ops = domain->ops; 2507 unsigned long orig_iova = iova; 2508 unsigned int min_pagesz; 2509 size_t orig_size = size; 2510 phys_addr_t orig_paddr = paddr; 2511 int ret = 0; 2512 2513 might_sleep_if(gfpflags_allow_blocking(gfp)); 2514 2515 if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) 2516 return -EINVAL; 2517 2518 if (WARN_ON(!ops->map_pages || domain->pgsize_bitmap == 0UL)) 2519 return -ENODEV; 2520 2521 /* Discourage passing strange GFP flags */ 2522 if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | 2523 __GFP_HIGHMEM))) 2524 return -EINVAL; 2525 2526 /* find out the minimum page size supported */ 2527 min_pagesz = 1 << __ffs(domain->pgsize_bitmap); 2528 2529 /* 2530 * both the virtual address and the physical one, as well as 2531 * the size of the mapping, must be aligned (at least) to the 2532 * size of the smallest page supported by the hardware 2533 */ 2534 if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) { 2535 pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n", 2536 iova, &paddr, size, min_pagesz); 2537 return -EINVAL; 2538 } 2539 2540 pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size); 2541 2542 while (size) { 2543 size_t pgsize, count, mapped = 0; 2544 2545 pgsize = iommu_pgsize(domain, iova, paddr, size, &count); 2546 2547 pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n", 2548 iova, &paddr, pgsize, count); 2549 ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot, 2550 gfp, &mapped); 2551 /* 2552 * Some pages may have been mapped, even if an error occurred, 2553 * so we should account for those so they can be unmapped. 2554 */ 2555 size -= mapped; 2556 2557 if (ret) 2558 break; 2559 2560 iova += mapped; 2561 paddr += mapped; 2562 } 2563 2564 /* unroll mapping in case something went wrong */ 2565 if (ret) 2566 iommu_unmap(domain, orig_iova, orig_size - size); 2567 else 2568 trace_map(orig_iova, orig_paddr, orig_size); 2569 2570 return ret; 2571 } 2572 2573 int iommu_sync_map(struct iommu_domain *domain, unsigned long iova, size_t size) 2574 { 2575 const struct iommu_domain_ops *ops = domain->ops; 2576 2577 if (!ops->iotlb_sync_map) 2578 return 0; 2579 return ops->iotlb_sync_map(domain, iova, size); 2580 } 2581 2582 int iommu_map(struct iommu_domain *domain, unsigned long iova, 2583 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 2584 { 2585 int ret; 2586 2587 ret = iommu_map_nosync(domain, iova, paddr, size, prot, gfp); 2588 if (ret) 2589 return ret; 2590 2591 ret = iommu_sync_map(domain, iova, size); 2592 if (ret) 2593 iommu_unmap(domain, iova, size); 2594 2595 return ret; 2596 } 2597 EXPORT_SYMBOL_GPL(iommu_map); 2598 2599 static size_t __iommu_unmap(struct iommu_domain *domain, 2600 unsigned long iova, size_t size, 2601 struct iommu_iotlb_gather *iotlb_gather) 2602 { 2603 const struct iommu_domain_ops *ops = domain->ops; 2604 size_t unmapped_page, unmapped = 0; 2605 unsigned long orig_iova = iova; 2606 unsigned int min_pagesz; 2607 2608 if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) 2609 return 0; 2610 2611 if (WARN_ON(!ops->unmap_pages || domain->pgsize_bitmap == 0UL)) 2612 return 0; 2613 2614 /* find out the minimum page size supported */ 2615 min_pagesz = 1 << __ffs(domain->pgsize_bitmap); 2616 2617 /* 2618 * The virtual address, as well as the size of the mapping, must be 2619 * aligned (at least) to the size of the smallest page supported 2620 * by the hardware 2621 */ 2622 if (!IS_ALIGNED(iova | size, min_pagesz)) { 2623 pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n", 2624 iova, size, min_pagesz); 2625 return 0; 2626 } 2627 2628 pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size); 2629 2630 /* 2631 * Keep iterating until we either unmap 'size' bytes (or more) 2632 * or we hit an area that isn't mapped. 2633 */ 2634 while (unmapped < size) { 2635 size_t pgsize, count; 2636 2637 pgsize = iommu_pgsize(domain, iova, iova, size - unmapped, &count); 2638 unmapped_page = ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather); 2639 if (!unmapped_page) 2640 break; 2641 2642 pr_debug("unmapped: iova 0x%lx size 0x%zx\n", 2643 iova, unmapped_page); 2644 2645 iova += unmapped_page; 2646 unmapped += unmapped_page; 2647 } 2648 2649 trace_unmap(orig_iova, size, unmapped); 2650 return unmapped; 2651 } 2652 2653 /** 2654 * iommu_unmap() - Remove mappings from a range of IOVA 2655 * @domain: Domain to manipulate 2656 * @iova: IO virtual address to start 2657 * @size: Length of the range starting from @iova 2658 * 2659 * iommu_unmap() will remove a translation created by iommu_map(). It cannot 2660 * subdivide a mapping created by iommu_map(), so it should be called with IOVA 2661 * ranges that match what was passed to iommu_map(). The range can aggregate 2662 * contiguous iommu_map() calls so long as no individual range is split. 2663 * 2664 * Returns: Number of bytes of IOVA unmapped. iova + res will be the point 2665 * unmapping stopped. 2666 */ 2667 size_t iommu_unmap(struct iommu_domain *domain, 2668 unsigned long iova, size_t size) 2669 { 2670 struct iommu_iotlb_gather iotlb_gather; 2671 size_t ret; 2672 2673 iommu_iotlb_gather_init(&iotlb_gather); 2674 ret = __iommu_unmap(domain, iova, size, &iotlb_gather); 2675 iommu_iotlb_sync(domain, &iotlb_gather); 2676 2677 return ret; 2678 } 2679 EXPORT_SYMBOL_GPL(iommu_unmap); 2680 2681 /** 2682 * iommu_unmap_fast() - Remove mappings from a range of IOVA without IOTLB sync 2683 * @domain: Domain to manipulate 2684 * @iova: IO virtual address to start 2685 * @size: Length of the range starting from @iova 2686 * @iotlb_gather: range information for a pending IOTLB flush 2687 * 2688 * iommu_unmap_fast() will remove a translation created by iommu_map(). 2689 * It can't subdivide a mapping created by iommu_map(), so it should be 2690 * called with IOVA ranges that match what was passed to iommu_map(). The 2691 * range can aggregate contiguous iommu_map() calls so long as no individual 2692 * range is split. 2693 * 2694 * Basically iommu_unmap_fast() is the same as iommu_unmap() but for callers 2695 * which manage the IOTLB flushing externally to perform a batched sync. 2696 * 2697 * Returns: Number of bytes of IOVA unmapped. iova + res will be the point 2698 * unmapping stopped. 2699 */ 2700 size_t iommu_unmap_fast(struct iommu_domain *domain, 2701 unsigned long iova, size_t size, 2702 struct iommu_iotlb_gather *iotlb_gather) 2703 { 2704 return __iommu_unmap(domain, iova, size, iotlb_gather); 2705 } 2706 EXPORT_SYMBOL_GPL(iommu_unmap_fast); 2707 2708 ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, 2709 struct scatterlist *sg, unsigned int nents, int prot, 2710 gfp_t gfp) 2711 { 2712 size_t len = 0, mapped = 0; 2713 phys_addr_t start; 2714 unsigned int i = 0; 2715 int ret; 2716 2717 while (i <= nents) { 2718 phys_addr_t s_phys = sg_phys(sg); 2719 2720 if (len && s_phys != start + len) { 2721 ret = iommu_map_nosync(domain, iova + mapped, start, 2722 len, prot, gfp); 2723 if (ret) 2724 goto out_err; 2725 2726 mapped += len; 2727 len = 0; 2728 } 2729 2730 if (sg_dma_is_bus_address(sg)) 2731 goto next; 2732 2733 if (len) { 2734 len += sg->length; 2735 } else { 2736 len = sg->length; 2737 start = s_phys; 2738 } 2739 2740 next: 2741 if (++i < nents) 2742 sg = sg_next(sg); 2743 } 2744 2745 ret = iommu_sync_map(domain, iova, mapped); 2746 if (ret) 2747 goto out_err; 2748 2749 return mapped; 2750 2751 out_err: 2752 /* undo mappings already done */ 2753 iommu_unmap(domain, iova, mapped); 2754 2755 return ret; 2756 } 2757 EXPORT_SYMBOL_GPL(iommu_map_sg); 2758 2759 /** 2760 * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework 2761 * @domain: the iommu domain where the fault has happened 2762 * @dev: the device where the fault has happened 2763 * @iova: the faulting address 2764 * @flags: mmu fault flags (e.g. IOMMU_FAULT_READ/IOMMU_FAULT_WRITE/...) 2765 * 2766 * This function should be called by the low-level IOMMU implementations 2767 * whenever IOMMU faults happen, to allow high-level users, that are 2768 * interested in such events, to know about them. 2769 * 2770 * This event may be useful for several possible use cases: 2771 * - mere logging of the event 2772 * - dynamic TLB/PTE loading 2773 * - if restarting of the faulting device is required 2774 * 2775 * Returns 0 on success and an appropriate error code otherwise (if dynamic 2776 * PTE/TLB loading will one day be supported, implementations will be able 2777 * to tell whether it succeeded or not according to this return value). 2778 * 2779 * Specifically, -ENOSYS is returned if a fault handler isn't installed 2780 * (though fault handlers can also return -ENOSYS, in case they want to 2781 * elicit the default behavior of the IOMMU drivers). 2782 */ 2783 int report_iommu_fault(struct iommu_domain *domain, struct device *dev, 2784 unsigned long iova, int flags) 2785 { 2786 int ret = -ENOSYS; 2787 2788 /* 2789 * if upper layers showed interest and installed a fault handler, 2790 * invoke it. 2791 */ 2792 if (domain->cookie_type == IOMMU_COOKIE_FAULT_HANDLER && 2793 domain->handler) 2794 ret = domain->handler(domain, dev, iova, flags, 2795 domain->handler_token); 2796 2797 trace_io_page_fault(dev, iova, flags); 2798 return ret; 2799 } 2800 EXPORT_SYMBOL_GPL(report_iommu_fault); 2801 2802 static int __init iommu_init(void) 2803 { 2804 iommu_group_kset = kset_create_and_add("iommu_groups", 2805 NULL, kernel_kobj); 2806 BUG_ON(!iommu_group_kset); 2807 2808 iommu_debugfs_setup(); 2809 2810 return 0; 2811 } 2812 core_initcall(iommu_init); 2813 2814 int iommu_set_pgtable_quirks(struct iommu_domain *domain, 2815 unsigned long quirk) 2816 { 2817 if (domain->type != IOMMU_DOMAIN_UNMANAGED) 2818 return -EINVAL; 2819 if (!domain->ops->set_pgtable_quirks) 2820 return -EINVAL; 2821 return domain->ops->set_pgtable_quirks(domain, quirk); 2822 } 2823 EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks); 2824 2825 /** 2826 * iommu_get_resv_regions - get reserved regions 2827 * @dev: device for which to get reserved regions 2828 * @list: reserved region list for device 2829 * 2830 * This returns a list of reserved IOVA regions specific to this device. 2831 * A domain user should not map IOVA in these ranges. 2832 */ 2833 void iommu_get_resv_regions(struct device *dev, struct list_head *list) 2834 { 2835 const struct iommu_ops *ops = dev_iommu_ops(dev); 2836 2837 if (ops->get_resv_regions) 2838 ops->get_resv_regions(dev, list); 2839 } 2840 EXPORT_SYMBOL_GPL(iommu_get_resv_regions); 2841 2842 /** 2843 * iommu_put_resv_regions - release reserved regions 2844 * @dev: device for which to free reserved regions 2845 * @list: reserved region list for device 2846 * 2847 * This releases a reserved region list acquired by iommu_get_resv_regions(). 2848 */ 2849 void iommu_put_resv_regions(struct device *dev, struct list_head *list) 2850 { 2851 struct iommu_resv_region *entry, *next; 2852 2853 list_for_each_entry_safe(entry, next, list, list) { 2854 if (entry->free) 2855 entry->free(dev, entry); 2856 else 2857 kfree(entry); 2858 } 2859 } 2860 EXPORT_SYMBOL(iommu_put_resv_regions); 2861 2862 struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start, 2863 size_t length, int prot, 2864 enum iommu_resv_type type, 2865 gfp_t gfp) 2866 { 2867 struct iommu_resv_region *region; 2868 2869 region = kzalloc(sizeof(*region), gfp); 2870 if (!region) 2871 return NULL; 2872 2873 INIT_LIST_HEAD(®ion->list); 2874 region->start = start; 2875 region->length = length; 2876 region->prot = prot; 2877 region->type = type; 2878 return region; 2879 } 2880 EXPORT_SYMBOL_GPL(iommu_alloc_resv_region); 2881 2882 void iommu_set_default_passthrough(bool cmd_line) 2883 { 2884 if (cmd_line) 2885 iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; 2886 iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY; 2887 } 2888 2889 void iommu_set_default_translated(bool cmd_line) 2890 { 2891 if (cmd_line) 2892 iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; 2893 iommu_def_domain_type = IOMMU_DOMAIN_DMA; 2894 } 2895 2896 bool iommu_default_passthrough(void) 2897 { 2898 return iommu_def_domain_type == IOMMU_DOMAIN_IDENTITY; 2899 } 2900 EXPORT_SYMBOL_GPL(iommu_default_passthrough); 2901 2902 static const struct iommu_device *iommu_from_fwnode(const struct fwnode_handle *fwnode) 2903 { 2904 const struct iommu_device *iommu, *ret = NULL; 2905 2906 spin_lock(&iommu_device_lock); 2907 list_for_each_entry(iommu, &iommu_device_list, list) 2908 if (iommu->fwnode == fwnode) { 2909 ret = iommu; 2910 break; 2911 } 2912 spin_unlock(&iommu_device_lock); 2913 return ret; 2914 } 2915 2916 const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode) 2917 { 2918 const struct iommu_device *iommu = iommu_from_fwnode(fwnode); 2919 2920 return iommu ? iommu->ops : NULL; 2921 } 2922 2923 int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode) 2924 { 2925 const struct iommu_device *iommu = iommu_from_fwnode(iommu_fwnode); 2926 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2927 2928 if (!iommu) 2929 return driver_deferred_probe_check_state(dev); 2930 if (!dev->iommu && !READ_ONCE(iommu->ready)) 2931 return -EPROBE_DEFER; 2932 2933 if (fwspec) 2934 return iommu->ops == iommu_fwspec_ops(fwspec) ? 0 : -EINVAL; 2935 2936 if (!dev_iommu_get(dev)) 2937 return -ENOMEM; 2938 2939 /* Preallocate for the overwhelmingly common case of 1 ID */ 2940 fwspec = kzalloc(struct_size(fwspec, ids, 1), GFP_KERNEL); 2941 if (!fwspec) 2942 return -ENOMEM; 2943 2944 fwnode_handle_get(iommu_fwnode); 2945 fwspec->iommu_fwnode = iommu_fwnode; 2946 dev_iommu_fwspec_set(dev, fwspec); 2947 return 0; 2948 } 2949 EXPORT_SYMBOL_GPL(iommu_fwspec_init); 2950 2951 void iommu_fwspec_free(struct device *dev) 2952 { 2953 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2954 2955 if (fwspec) { 2956 fwnode_handle_put(fwspec->iommu_fwnode); 2957 kfree(fwspec); 2958 dev_iommu_fwspec_set(dev, NULL); 2959 } 2960 } 2961 2962 int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids) 2963 { 2964 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2965 int i, new_num; 2966 2967 if (!fwspec) 2968 return -EINVAL; 2969 2970 new_num = fwspec->num_ids + num_ids; 2971 if (new_num > 1) { 2972 fwspec = krealloc(fwspec, struct_size(fwspec, ids, new_num), 2973 GFP_KERNEL); 2974 if (!fwspec) 2975 return -ENOMEM; 2976 2977 dev_iommu_fwspec_set(dev, fwspec); 2978 } 2979 2980 for (i = 0; i < num_ids; i++) 2981 fwspec->ids[fwspec->num_ids + i] = ids[i]; 2982 2983 fwspec->num_ids = new_num; 2984 return 0; 2985 } 2986 EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids); 2987 2988 /** 2989 * iommu_setup_default_domain - Set the default_domain for the group 2990 * @group: Group to change 2991 * @target_type: Domain type to set as the default_domain 2992 * 2993 * Allocate a default domain and set it as the current domain on the group. If 2994 * the group already has a default domain it will be changed to the target_type. 2995 * When target_type is 0 the default domain is selected based on driver and 2996 * system preferences. 2997 */ 2998 static int iommu_setup_default_domain(struct iommu_group *group, 2999 int target_type) 3000 { 3001 struct iommu_domain *old_dom = group->default_domain; 3002 struct group_device *gdev; 3003 struct iommu_domain *dom; 3004 bool direct_failed; 3005 int req_type; 3006 int ret; 3007 3008 lockdep_assert_held(&group->mutex); 3009 3010 req_type = iommu_get_default_domain_type(group, target_type); 3011 if (req_type < 0) 3012 return -EINVAL; 3013 3014 dom = iommu_group_alloc_default_domain(group, req_type); 3015 if (IS_ERR(dom)) 3016 return PTR_ERR(dom); 3017 3018 if (group->default_domain == dom) 3019 return 0; 3020 3021 if (iommu_is_dma_domain(dom)) { 3022 ret = iommu_get_dma_cookie(dom); 3023 if (ret) { 3024 iommu_domain_free(dom); 3025 return ret; 3026 } 3027 } 3028 3029 /* 3030 * IOMMU_RESV_DIRECT and IOMMU_RESV_DIRECT_RELAXABLE regions must be 3031 * mapped before their device is attached, in order to guarantee 3032 * continuity with any FW activity 3033 */ 3034 direct_failed = false; 3035 for_each_group_device(group, gdev) { 3036 if (iommu_create_device_direct_mappings(dom, gdev->dev)) { 3037 direct_failed = true; 3038 dev_warn_once( 3039 gdev->dev->iommu->iommu_dev->dev, 3040 "IOMMU driver was not able to establish FW requested direct mapping."); 3041 } 3042 } 3043 3044 /* We must set default_domain early for __iommu_device_set_domain */ 3045 group->default_domain = dom; 3046 if (!group->domain) { 3047 /* 3048 * Drivers are not allowed to fail the first domain attach. 3049 * The only way to recover from this is to fail attaching the 3050 * iommu driver and call ops->release_device. Put the domain 3051 * in group->default_domain so it is freed after. 3052 */ 3053 ret = __iommu_group_set_domain_internal( 3054 group, dom, IOMMU_SET_DOMAIN_MUST_SUCCEED); 3055 if (WARN_ON(ret)) 3056 goto out_free_old; 3057 } else { 3058 ret = __iommu_group_set_domain(group, dom); 3059 if (ret) 3060 goto err_restore_def_domain; 3061 } 3062 3063 /* 3064 * Drivers are supposed to allow mappings to be installed in a domain 3065 * before device attachment, but some don't. Hack around this defect by 3066 * trying again after attaching. If this happens it means the device 3067 * will not continuously have the IOMMU_RESV_DIRECT map. 3068 */ 3069 if (direct_failed) { 3070 for_each_group_device(group, gdev) { 3071 ret = iommu_create_device_direct_mappings(dom, gdev->dev); 3072 if (ret) 3073 goto err_restore_domain; 3074 } 3075 } 3076 3077 out_free_old: 3078 if (old_dom) 3079 iommu_domain_free(old_dom); 3080 return ret; 3081 3082 err_restore_domain: 3083 if (old_dom) 3084 __iommu_group_set_domain_internal( 3085 group, old_dom, IOMMU_SET_DOMAIN_MUST_SUCCEED); 3086 err_restore_def_domain: 3087 if (old_dom) { 3088 iommu_domain_free(dom); 3089 group->default_domain = old_dom; 3090 } 3091 return ret; 3092 } 3093 3094 /* 3095 * Changing the default domain through sysfs requires the users to unbind the 3096 * drivers from the devices in the iommu group, except for a DMA -> DMA-FQ 3097 * transition. Return failure if this isn't met. 3098 * 3099 * We need to consider the race between this and the device release path. 3100 * group->mutex is used here to guarantee that the device release path 3101 * will not be entered at the same time. 3102 */ 3103 static ssize_t iommu_group_store_type(struct iommu_group *group, 3104 const char *buf, size_t count) 3105 { 3106 struct group_device *gdev; 3107 int ret, req_type; 3108 3109 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) 3110 return -EACCES; 3111 3112 if (WARN_ON(!group) || !group->default_domain) 3113 return -EINVAL; 3114 3115 if (sysfs_streq(buf, "identity")) 3116 req_type = IOMMU_DOMAIN_IDENTITY; 3117 else if (sysfs_streq(buf, "DMA")) 3118 req_type = IOMMU_DOMAIN_DMA; 3119 else if (sysfs_streq(buf, "DMA-FQ")) 3120 req_type = IOMMU_DOMAIN_DMA_FQ; 3121 else if (sysfs_streq(buf, "auto")) 3122 req_type = 0; 3123 else 3124 return -EINVAL; 3125 3126 mutex_lock(&group->mutex); 3127 /* We can bring up a flush queue without tearing down the domain. */ 3128 if (req_type == IOMMU_DOMAIN_DMA_FQ && 3129 group->default_domain->type == IOMMU_DOMAIN_DMA) { 3130 ret = iommu_dma_init_fq(group->default_domain); 3131 if (ret) 3132 goto out_unlock; 3133 3134 group->default_domain->type = IOMMU_DOMAIN_DMA_FQ; 3135 ret = count; 3136 goto out_unlock; 3137 } 3138 3139 /* Otherwise, ensure that device exists and no driver is bound. */ 3140 if (list_empty(&group->devices) || group->owner_cnt) { 3141 ret = -EPERM; 3142 goto out_unlock; 3143 } 3144 3145 ret = iommu_setup_default_domain(group, req_type); 3146 if (ret) 3147 goto out_unlock; 3148 3149 /* Make sure dma_ops is appropriatley set */ 3150 for_each_group_device(group, gdev) 3151 iommu_setup_dma_ops(gdev->dev); 3152 3153 out_unlock: 3154 mutex_unlock(&group->mutex); 3155 return ret ?: count; 3156 } 3157 3158 /** 3159 * iommu_device_use_default_domain() - Device driver wants to handle device 3160 * DMA through the kernel DMA API. 3161 * @dev: The device. 3162 * 3163 * The device driver about to bind @dev wants to do DMA through the kernel 3164 * DMA API. Return 0 if it is allowed, otherwise an error. 3165 */ 3166 int iommu_device_use_default_domain(struct device *dev) 3167 { 3168 /* Caller is the driver core during the pre-probe path */ 3169 struct iommu_group *group = dev->iommu_group; 3170 int ret = 0; 3171 3172 if (!group) 3173 return 0; 3174 3175 mutex_lock(&group->mutex); 3176 /* We may race against bus_iommu_probe() finalising groups here */ 3177 if (!group->default_domain) { 3178 ret = -EPROBE_DEFER; 3179 goto unlock_out; 3180 } 3181 if (group->owner_cnt) { 3182 if (group->domain != group->default_domain || group->owner || 3183 !xa_empty(&group->pasid_array)) { 3184 ret = -EBUSY; 3185 goto unlock_out; 3186 } 3187 } 3188 3189 group->owner_cnt++; 3190 3191 unlock_out: 3192 mutex_unlock(&group->mutex); 3193 return ret; 3194 } 3195 3196 /** 3197 * iommu_device_unuse_default_domain() - Device driver stops handling device 3198 * DMA through the kernel DMA API. 3199 * @dev: The device. 3200 * 3201 * The device driver doesn't want to do DMA through kernel DMA API anymore. 3202 * It must be called after iommu_device_use_default_domain(). 3203 */ 3204 void iommu_device_unuse_default_domain(struct device *dev) 3205 { 3206 /* Caller is the driver core during the post-probe path */ 3207 struct iommu_group *group = dev->iommu_group; 3208 3209 if (!group) 3210 return; 3211 3212 mutex_lock(&group->mutex); 3213 if (!WARN_ON(!group->owner_cnt || !xa_empty(&group->pasid_array))) 3214 group->owner_cnt--; 3215 3216 mutex_unlock(&group->mutex); 3217 } 3218 3219 static int __iommu_group_alloc_blocking_domain(struct iommu_group *group) 3220 { 3221 struct device *dev = iommu_group_first_dev(group); 3222 const struct iommu_ops *ops = dev_iommu_ops(dev); 3223 struct iommu_domain *domain; 3224 3225 if (group->blocking_domain) 3226 return 0; 3227 3228 if (ops->blocked_domain) { 3229 group->blocking_domain = ops->blocked_domain; 3230 return 0; 3231 } 3232 3233 /* 3234 * For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED create an 3235 * empty PAGING domain instead. 3236 */ 3237 domain = iommu_paging_domain_alloc(dev); 3238 if (IS_ERR(domain)) 3239 return PTR_ERR(domain); 3240 group->blocking_domain = domain; 3241 return 0; 3242 } 3243 3244 static int __iommu_take_dma_ownership(struct iommu_group *group, void *owner) 3245 { 3246 int ret; 3247 3248 if ((group->domain && group->domain != group->default_domain) || 3249 !xa_empty(&group->pasid_array)) 3250 return -EBUSY; 3251 3252 ret = __iommu_group_alloc_blocking_domain(group); 3253 if (ret) 3254 return ret; 3255 ret = __iommu_group_set_domain(group, group->blocking_domain); 3256 if (ret) 3257 return ret; 3258 3259 group->owner = owner; 3260 group->owner_cnt++; 3261 return 0; 3262 } 3263 3264 /** 3265 * iommu_group_claim_dma_owner() - Set DMA ownership of a group 3266 * @group: The group. 3267 * @owner: Caller specified pointer. Used for exclusive ownership. 3268 * 3269 * This is to support backward compatibility for vfio which manages the dma 3270 * ownership in iommu_group level. New invocations on this interface should be 3271 * prohibited. Only a single owner may exist for a group. 3272 */ 3273 int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner) 3274 { 3275 int ret = 0; 3276 3277 if (WARN_ON(!owner)) 3278 return -EINVAL; 3279 3280 mutex_lock(&group->mutex); 3281 if (group->owner_cnt) { 3282 ret = -EPERM; 3283 goto unlock_out; 3284 } 3285 3286 ret = __iommu_take_dma_ownership(group, owner); 3287 unlock_out: 3288 mutex_unlock(&group->mutex); 3289 3290 return ret; 3291 } 3292 EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner); 3293 3294 /** 3295 * iommu_device_claim_dma_owner() - Set DMA ownership of a device 3296 * @dev: The device. 3297 * @owner: Caller specified pointer. Used for exclusive ownership. 3298 * 3299 * Claim the DMA ownership of a device. Multiple devices in the same group may 3300 * concurrently claim ownership if they present the same owner value. Returns 0 3301 * on success and error code on failure 3302 */ 3303 int iommu_device_claim_dma_owner(struct device *dev, void *owner) 3304 { 3305 /* Caller must be a probed driver on dev */ 3306 struct iommu_group *group = dev->iommu_group; 3307 int ret = 0; 3308 3309 if (WARN_ON(!owner)) 3310 return -EINVAL; 3311 3312 if (!group) 3313 return -ENODEV; 3314 3315 mutex_lock(&group->mutex); 3316 if (group->owner_cnt) { 3317 if (group->owner != owner) { 3318 ret = -EPERM; 3319 goto unlock_out; 3320 } 3321 group->owner_cnt++; 3322 goto unlock_out; 3323 } 3324 3325 ret = __iommu_take_dma_ownership(group, owner); 3326 unlock_out: 3327 mutex_unlock(&group->mutex); 3328 return ret; 3329 } 3330 EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner); 3331 3332 static void __iommu_release_dma_ownership(struct iommu_group *group) 3333 { 3334 if (WARN_ON(!group->owner_cnt || !group->owner || 3335 !xa_empty(&group->pasid_array))) 3336 return; 3337 3338 group->owner_cnt = 0; 3339 group->owner = NULL; 3340 __iommu_group_set_domain_nofail(group, group->default_domain); 3341 } 3342 3343 /** 3344 * iommu_group_release_dma_owner() - Release DMA ownership of a group 3345 * @group: The group 3346 * 3347 * Release the DMA ownership claimed by iommu_group_claim_dma_owner(). 3348 */ 3349 void iommu_group_release_dma_owner(struct iommu_group *group) 3350 { 3351 mutex_lock(&group->mutex); 3352 __iommu_release_dma_ownership(group); 3353 mutex_unlock(&group->mutex); 3354 } 3355 EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner); 3356 3357 /** 3358 * iommu_device_release_dma_owner() - Release DMA ownership of a device 3359 * @dev: The device. 3360 * 3361 * Release the DMA ownership claimed by iommu_device_claim_dma_owner(). 3362 */ 3363 void iommu_device_release_dma_owner(struct device *dev) 3364 { 3365 /* Caller must be a probed driver on dev */ 3366 struct iommu_group *group = dev->iommu_group; 3367 3368 mutex_lock(&group->mutex); 3369 if (group->owner_cnt > 1) 3370 group->owner_cnt--; 3371 else 3372 __iommu_release_dma_ownership(group); 3373 mutex_unlock(&group->mutex); 3374 } 3375 EXPORT_SYMBOL_GPL(iommu_device_release_dma_owner); 3376 3377 /** 3378 * iommu_group_dma_owner_claimed() - Query group dma ownership status 3379 * @group: The group. 3380 * 3381 * This provides status query on a given group. It is racy and only for 3382 * non-binding status reporting. 3383 */ 3384 bool iommu_group_dma_owner_claimed(struct iommu_group *group) 3385 { 3386 unsigned int user; 3387 3388 mutex_lock(&group->mutex); 3389 user = group->owner_cnt; 3390 mutex_unlock(&group->mutex); 3391 3392 return user; 3393 } 3394 EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed); 3395 3396 static void iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid, 3397 struct iommu_domain *domain) 3398 { 3399 const struct iommu_ops *ops = dev_iommu_ops(dev); 3400 struct iommu_domain *blocked_domain = ops->blocked_domain; 3401 3402 WARN_ON(blocked_domain->ops->set_dev_pasid(blocked_domain, 3403 dev, pasid, domain)); 3404 } 3405 3406 static int __iommu_set_group_pasid(struct iommu_domain *domain, 3407 struct iommu_group *group, ioasid_t pasid, 3408 struct iommu_domain *old) 3409 { 3410 struct group_device *device, *last_gdev; 3411 int ret; 3412 3413 for_each_group_device(group, device) { 3414 if (device->dev->iommu->max_pasids > 0) { 3415 ret = domain->ops->set_dev_pasid(domain, device->dev, 3416 pasid, old); 3417 if (ret) 3418 goto err_revert; 3419 } 3420 } 3421 3422 return 0; 3423 3424 err_revert: 3425 last_gdev = device; 3426 for_each_group_device(group, device) { 3427 if (device == last_gdev) 3428 break; 3429 if (device->dev->iommu->max_pasids > 0) { 3430 /* 3431 * If no old domain, undo the succeeded devices/pasid. 3432 * Otherwise, rollback the succeeded devices/pasid to 3433 * the old domain. And it is a driver bug to fail 3434 * attaching with a previously good domain. 3435 */ 3436 if (!old || 3437 WARN_ON(old->ops->set_dev_pasid(old, device->dev, 3438 pasid, domain))) 3439 iommu_remove_dev_pasid(device->dev, pasid, domain); 3440 } 3441 } 3442 return ret; 3443 } 3444 3445 static void __iommu_remove_group_pasid(struct iommu_group *group, 3446 ioasid_t pasid, 3447 struct iommu_domain *domain) 3448 { 3449 struct group_device *device; 3450 3451 for_each_group_device(group, device) { 3452 if (device->dev->iommu->max_pasids > 0) 3453 iommu_remove_dev_pasid(device->dev, pasid, domain); 3454 } 3455 } 3456 3457 /* 3458 * iommu_attach_device_pasid() - Attach a domain to pasid of device 3459 * @domain: the iommu domain. 3460 * @dev: the attached device. 3461 * @pasid: the pasid of the device. 3462 * @handle: the attach handle. 3463 * 3464 * Caller should always provide a new handle to avoid race with the paths 3465 * that have lockless reference to handle if it intends to pass a valid handle. 3466 * 3467 * Return: 0 on success, or an error. 3468 */ 3469 int iommu_attach_device_pasid(struct iommu_domain *domain, 3470 struct device *dev, ioasid_t pasid, 3471 struct iommu_attach_handle *handle) 3472 { 3473 /* Caller must be a probed driver on dev */ 3474 struct iommu_group *group = dev->iommu_group; 3475 struct group_device *device; 3476 const struct iommu_ops *ops; 3477 void *entry; 3478 int ret; 3479 3480 if (!group) 3481 return -ENODEV; 3482 3483 ops = dev_iommu_ops(dev); 3484 3485 if (!domain->ops->set_dev_pasid || 3486 !ops->blocked_domain || 3487 !ops->blocked_domain->ops->set_dev_pasid) 3488 return -EOPNOTSUPP; 3489 3490 if (!domain_iommu_ops_compatible(ops, domain) || 3491 pasid == IOMMU_NO_PASID) 3492 return -EINVAL; 3493 3494 mutex_lock(&group->mutex); 3495 for_each_group_device(group, device) { 3496 /* 3497 * Skip PASID validation for devices without PASID support 3498 * (max_pasids = 0). These devices cannot issue transactions 3499 * with PASID, so they don't affect group's PASID usage. 3500 */ 3501 if ((device->dev->iommu->max_pasids > 0) && 3502 (pasid >= device->dev->iommu->max_pasids)) { 3503 ret = -EINVAL; 3504 goto out_unlock; 3505 } 3506 } 3507 3508 entry = iommu_make_pasid_array_entry(domain, handle); 3509 3510 /* 3511 * Entry present is a failure case. Use xa_insert() instead of 3512 * xa_reserve(). 3513 */ 3514 ret = xa_insert(&group->pasid_array, pasid, XA_ZERO_ENTRY, GFP_KERNEL); 3515 if (ret) 3516 goto out_unlock; 3517 3518 ret = __iommu_set_group_pasid(domain, group, pasid, NULL); 3519 if (ret) { 3520 xa_release(&group->pasid_array, pasid); 3521 goto out_unlock; 3522 } 3523 3524 /* 3525 * The xa_insert() above reserved the memory, and the group->mutex is 3526 * held, this cannot fail. The new domain cannot be visible until the 3527 * operation succeeds as we cannot tolerate PRIs becoming concurrently 3528 * queued and then failing attach. 3529 */ 3530 WARN_ON(xa_is_err(xa_store(&group->pasid_array, 3531 pasid, entry, GFP_KERNEL))); 3532 3533 out_unlock: 3534 mutex_unlock(&group->mutex); 3535 return ret; 3536 } 3537 EXPORT_SYMBOL_GPL(iommu_attach_device_pasid); 3538 3539 /** 3540 * iommu_replace_device_pasid - Replace the domain that a specific pasid 3541 * of the device is attached to 3542 * @domain: the new iommu domain 3543 * @dev: the attached device. 3544 * @pasid: the pasid of the device. 3545 * @handle: the attach handle. 3546 * 3547 * This API allows the pasid to switch domains. The @pasid should have been 3548 * attached. Otherwise, this fails. The pasid will keep the old configuration 3549 * if replacement failed. 3550 * 3551 * Caller should always provide a new handle to avoid race with the paths 3552 * that have lockless reference to handle if it intends to pass a valid handle. 3553 * 3554 * Return 0 on success, or an error. 3555 */ 3556 int iommu_replace_device_pasid(struct iommu_domain *domain, 3557 struct device *dev, ioasid_t pasid, 3558 struct iommu_attach_handle *handle) 3559 { 3560 /* Caller must be a probed driver on dev */ 3561 struct iommu_group *group = dev->iommu_group; 3562 struct iommu_attach_handle *entry; 3563 struct iommu_domain *curr_domain; 3564 void *curr; 3565 int ret; 3566 3567 if (!group) 3568 return -ENODEV; 3569 3570 if (!domain->ops->set_dev_pasid) 3571 return -EOPNOTSUPP; 3572 3573 if (!domain_iommu_ops_compatible(dev_iommu_ops(dev), domain) || 3574 pasid == IOMMU_NO_PASID || !handle) 3575 return -EINVAL; 3576 3577 mutex_lock(&group->mutex); 3578 entry = iommu_make_pasid_array_entry(domain, handle); 3579 curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, 3580 XA_ZERO_ENTRY, GFP_KERNEL); 3581 if (xa_is_err(curr)) { 3582 ret = xa_err(curr); 3583 goto out_unlock; 3584 } 3585 3586 /* 3587 * No domain (with or without handle) attached, hence not 3588 * a replace case. 3589 */ 3590 if (!curr) { 3591 xa_release(&group->pasid_array, pasid); 3592 ret = -EINVAL; 3593 goto out_unlock; 3594 } 3595 3596 /* 3597 * Reusing handle is problematic as there are paths that refers 3598 * the handle without lock. To avoid race, reject the callers that 3599 * attempt it. 3600 */ 3601 if (curr == entry) { 3602 WARN_ON(1); 3603 ret = -EINVAL; 3604 goto out_unlock; 3605 } 3606 3607 curr_domain = pasid_array_entry_to_domain(curr); 3608 ret = 0; 3609 3610 if (curr_domain != domain) { 3611 ret = __iommu_set_group_pasid(domain, group, 3612 pasid, curr_domain); 3613 if (ret) 3614 goto out_unlock; 3615 } 3616 3617 /* 3618 * The above xa_cmpxchg() reserved the memory, and the 3619 * group->mutex is held, this cannot fail. 3620 */ 3621 WARN_ON(xa_is_err(xa_store(&group->pasid_array, 3622 pasid, entry, GFP_KERNEL))); 3623 3624 out_unlock: 3625 mutex_unlock(&group->mutex); 3626 return ret; 3627 } 3628 EXPORT_SYMBOL_NS_GPL(iommu_replace_device_pasid, "IOMMUFD_INTERNAL"); 3629 3630 /* 3631 * iommu_detach_device_pasid() - Detach the domain from pasid of device 3632 * @domain: the iommu domain. 3633 * @dev: the attached device. 3634 * @pasid: the pasid of the device. 3635 * 3636 * The @domain must have been attached to @pasid of the @dev with 3637 * iommu_attach_device_pasid(). 3638 */ 3639 void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, 3640 ioasid_t pasid) 3641 { 3642 /* Caller must be a probed driver on dev */ 3643 struct iommu_group *group = dev->iommu_group; 3644 3645 mutex_lock(&group->mutex); 3646 __iommu_remove_group_pasid(group, pasid, domain); 3647 xa_erase(&group->pasid_array, pasid); 3648 mutex_unlock(&group->mutex); 3649 } 3650 EXPORT_SYMBOL_GPL(iommu_detach_device_pasid); 3651 3652 ioasid_t iommu_alloc_global_pasid(struct device *dev) 3653 { 3654 int ret; 3655 3656 /* max_pasids == 0 means that the device does not support PASID */ 3657 if (!dev->iommu->max_pasids) 3658 return IOMMU_PASID_INVALID; 3659 3660 /* 3661 * max_pasids is set up by vendor driver based on number of PASID bits 3662 * supported but the IDA allocation is inclusive. 3663 */ 3664 ret = ida_alloc_range(&iommu_global_pasid_ida, IOMMU_FIRST_GLOBAL_PASID, 3665 dev->iommu->max_pasids - 1, GFP_KERNEL); 3666 return ret < 0 ? IOMMU_PASID_INVALID : ret; 3667 } 3668 EXPORT_SYMBOL_GPL(iommu_alloc_global_pasid); 3669 3670 void iommu_free_global_pasid(ioasid_t pasid) 3671 { 3672 if (WARN_ON(pasid == IOMMU_PASID_INVALID)) 3673 return; 3674 3675 ida_free(&iommu_global_pasid_ida, pasid); 3676 } 3677 EXPORT_SYMBOL_GPL(iommu_free_global_pasid); 3678 3679 /** 3680 * iommu_attach_handle_get - Return the attach handle 3681 * @group: the iommu group that domain was attached to 3682 * @pasid: the pasid within the group 3683 * @type: matched domain type, 0 for any match 3684 * 3685 * Return handle or ERR_PTR(-ENOENT) on none, ERR_PTR(-EBUSY) on mismatch. 3686 * 3687 * Return the attach handle to the caller. The life cycle of an iommu attach 3688 * handle is from the time when the domain is attached to the time when the 3689 * domain is detached. Callers are required to synchronize the call of 3690 * iommu_attach_handle_get() with domain attachment and detachment. The attach 3691 * handle can only be used during its life cycle. 3692 */ 3693 struct iommu_attach_handle * 3694 iommu_attach_handle_get(struct iommu_group *group, ioasid_t pasid, unsigned int type) 3695 { 3696 struct iommu_attach_handle *handle; 3697 void *entry; 3698 3699 xa_lock(&group->pasid_array); 3700 entry = xa_load(&group->pasid_array, pasid); 3701 if (!entry || xa_pointer_tag(entry) != IOMMU_PASID_ARRAY_HANDLE) { 3702 handle = ERR_PTR(-ENOENT); 3703 } else { 3704 handle = xa_untag_pointer(entry); 3705 if (type && handle->domain->type != type) 3706 handle = ERR_PTR(-EBUSY); 3707 } 3708 xa_unlock(&group->pasid_array); 3709 3710 return handle; 3711 } 3712 EXPORT_SYMBOL_NS_GPL(iommu_attach_handle_get, "IOMMUFD_INTERNAL"); 3713 3714 /** 3715 * iommu_attach_group_handle - Attach an IOMMU domain to an IOMMU group 3716 * @domain: IOMMU domain to attach 3717 * @group: IOMMU group that will be attached 3718 * @handle: attach handle 3719 * 3720 * Returns 0 on success and error code on failure. 3721 * 3722 * This is a variant of iommu_attach_group(). It allows the caller to provide 3723 * an attach handle and use it when the domain is attached. This is currently 3724 * used by IOMMUFD to deliver the I/O page faults. 3725 * 3726 * Caller should always provide a new handle to avoid race with the paths 3727 * that have lockless reference to handle. 3728 */ 3729 int iommu_attach_group_handle(struct iommu_domain *domain, 3730 struct iommu_group *group, 3731 struct iommu_attach_handle *handle) 3732 { 3733 void *entry; 3734 int ret; 3735 3736 if (!handle) 3737 return -EINVAL; 3738 3739 mutex_lock(&group->mutex); 3740 entry = iommu_make_pasid_array_entry(domain, handle); 3741 ret = xa_insert(&group->pasid_array, 3742 IOMMU_NO_PASID, XA_ZERO_ENTRY, GFP_KERNEL); 3743 if (ret) 3744 goto out_unlock; 3745 3746 ret = __iommu_attach_group(domain, group); 3747 if (ret) { 3748 xa_release(&group->pasid_array, IOMMU_NO_PASID); 3749 goto out_unlock; 3750 } 3751 3752 /* 3753 * The xa_insert() above reserved the memory, and the group->mutex is 3754 * held, this cannot fail. The new domain cannot be visible until the 3755 * operation succeeds as we cannot tolerate PRIs becoming concurrently 3756 * queued and then failing attach. 3757 */ 3758 WARN_ON(xa_is_err(xa_store(&group->pasid_array, 3759 IOMMU_NO_PASID, entry, GFP_KERNEL))); 3760 3761 out_unlock: 3762 mutex_unlock(&group->mutex); 3763 return ret; 3764 } 3765 EXPORT_SYMBOL_NS_GPL(iommu_attach_group_handle, "IOMMUFD_INTERNAL"); 3766 3767 /** 3768 * iommu_detach_group_handle - Detach an IOMMU domain from an IOMMU group 3769 * @domain: IOMMU domain to attach 3770 * @group: IOMMU group that will be attached 3771 * 3772 * Detach the specified IOMMU domain from the specified IOMMU group. 3773 * It must be used in conjunction with iommu_attach_group_handle(). 3774 */ 3775 void iommu_detach_group_handle(struct iommu_domain *domain, 3776 struct iommu_group *group) 3777 { 3778 mutex_lock(&group->mutex); 3779 __iommu_group_set_core_domain(group); 3780 xa_erase(&group->pasid_array, IOMMU_NO_PASID); 3781 mutex_unlock(&group->mutex); 3782 } 3783 EXPORT_SYMBOL_NS_GPL(iommu_detach_group_handle, "IOMMUFD_INTERNAL"); 3784 3785 /** 3786 * iommu_replace_group_handle - replace the domain that a group is attached to 3787 * @group: IOMMU group that will be attached to the new domain 3788 * @new_domain: new IOMMU domain to replace with 3789 * @handle: attach handle 3790 * 3791 * This API allows the group to switch domains without being forced to go to 3792 * the blocking domain in-between. It allows the caller to provide an attach 3793 * handle for the new domain and use it when the domain is attached. 3794 * 3795 * If the currently attached domain is a core domain (e.g. a default_domain), 3796 * it will act just like the iommu_attach_group_handle(). 3797 * 3798 * Caller should always provide a new handle to avoid race with the paths 3799 * that have lockless reference to handle. 3800 */ 3801 int iommu_replace_group_handle(struct iommu_group *group, 3802 struct iommu_domain *new_domain, 3803 struct iommu_attach_handle *handle) 3804 { 3805 void *curr, *entry; 3806 int ret; 3807 3808 if (!new_domain || !handle) 3809 return -EINVAL; 3810 3811 mutex_lock(&group->mutex); 3812 entry = iommu_make_pasid_array_entry(new_domain, handle); 3813 ret = xa_reserve(&group->pasid_array, IOMMU_NO_PASID, GFP_KERNEL); 3814 if (ret) 3815 goto err_unlock; 3816 3817 ret = __iommu_group_set_domain(group, new_domain); 3818 if (ret) 3819 goto err_release; 3820 3821 curr = xa_store(&group->pasid_array, IOMMU_NO_PASID, entry, GFP_KERNEL); 3822 WARN_ON(xa_is_err(curr)); 3823 3824 mutex_unlock(&group->mutex); 3825 3826 return 0; 3827 err_release: 3828 xa_release(&group->pasid_array, IOMMU_NO_PASID); 3829 err_unlock: 3830 mutex_unlock(&group->mutex); 3831 return ret; 3832 } 3833 EXPORT_SYMBOL_NS_GPL(iommu_replace_group_handle, "IOMMUFD_INTERNAL"); 3834 3835 #if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU) 3836 /** 3837 * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU domain 3838 * @desc: MSI descriptor, will store the MSI page 3839 * @msi_addr: MSI target address to be mapped 3840 * 3841 * The implementation of sw_msi() should take msi_addr and map it to 3842 * an IOVA in the domain and call msi_desc_set_iommu_msi_iova() with the 3843 * mapping information. 3844 * 3845 * Return: 0 on success or negative error code if the mapping failed. 3846 */ 3847 int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) 3848 { 3849 struct device *dev = msi_desc_to_dev(desc); 3850 struct iommu_group *group = dev->iommu_group; 3851 int ret = 0; 3852 3853 if (!group) 3854 return 0; 3855 3856 mutex_lock(&group->mutex); 3857 /* An IDENTITY domain must pass through */ 3858 if (group->domain && group->domain->type != IOMMU_DOMAIN_IDENTITY) { 3859 switch (group->domain->cookie_type) { 3860 case IOMMU_COOKIE_DMA_MSI: 3861 case IOMMU_COOKIE_DMA_IOVA: 3862 ret = iommu_dma_sw_msi(group->domain, desc, msi_addr); 3863 break; 3864 case IOMMU_COOKIE_IOMMUFD: 3865 ret = iommufd_sw_msi(group->domain, desc, msi_addr); 3866 break; 3867 default: 3868 ret = -EOPNOTSUPP; 3869 break; 3870 } 3871 } 3872 mutex_unlock(&group->mutex); 3873 return ret; 3874 } 3875 #endif /* CONFIG_IRQ_MSI_IOMMU */ 3876