1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. 4 * Author: Joerg Roedel <jroedel@suse.de> 5 */ 6 7 #define pr_fmt(fmt) "iommu: " fmt 8 9 #include <linux/amba/bus.h> 10 #include <linux/device.h> 11 #include <linux/kernel.h> 12 #include <linux/bits.h> 13 #include <linux/bug.h> 14 #include <linux/types.h> 15 #include <linux/init.h> 16 #include <linux/export.h> 17 #include <linux/slab.h> 18 #include <linux/errno.h> 19 #include <linux/host1x_context_bus.h> 20 #include <linux/iommu.h> 21 #include <linux/iommufd.h> 22 #include <linux/idr.h> 23 #include <linux/err.h> 24 #include <linux/pci.h> 25 #include <linux/pci-ats.h> 26 #include <linux/bitops.h> 27 #include <linux/platform_device.h> 28 #include <linux/property.h> 29 #include <linux/fsl/mc.h> 30 #include <linux/module.h> 31 #include <linux/cc_platform.h> 32 #include <linux/cdx/cdx_bus.h> 33 #include <trace/events/iommu.h> 34 #include <linux/sched/mm.h> 35 #include <linux/msi.h> 36 #include <uapi/linux/iommufd.h> 37 38 #include "dma-iommu.h" 39 #include "iommu-priv.h" 40 41 static struct kset *iommu_group_kset; 42 static DEFINE_IDA(iommu_group_ida); 43 static DEFINE_IDA(iommu_global_pasid_ida); 44 45 static unsigned int iommu_def_domain_type __read_mostly; 46 static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT); 47 static u32 iommu_cmd_line __read_mostly; 48 49 /* Tags used with xa_tag_pointer() in group->pasid_array */ 50 enum { IOMMU_PASID_ARRAY_DOMAIN = 0, IOMMU_PASID_ARRAY_HANDLE = 1 }; 51 52 struct iommu_group { 53 struct kobject kobj; 54 struct kobject *devices_kobj; 55 struct list_head devices; 56 struct xarray pasid_array; 57 struct mutex mutex; 58 void *iommu_data; 59 void (*iommu_data_release)(void *iommu_data); 60 char *name; 61 int id; 62 struct iommu_domain *default_domain; 63 struct iommu_domain *blocking_domain; 64 struct iommu_domain *domain; 65 struct list_head entry; 66 unsigned int owner_cnt; 67 void *owner; 68 }; 69 70 struct group_device { 71 struct list_head list; 72 struct device *dev; 73 char *name; 74 }; 75 76 /* Iterate over each struct group_device in a struct iommu_group */ 77 #define for_each_group_device(group, pos) \ 78 list_for_each_entry(pos, &(group)->devices, list) 79 80 struct iommu_group_attribute { 81 struct attribute attr; 82 ssize_t (*show)(struct iommu_group *group, char *buf); 83 ssize_t (*store)(struct iommu_group *group, 84 const char *buf, size_t count); 85 }; 86 87 static const char * const iommu_group_resv_type_string[] = { 88 [IOMMU_RESV_DIRECT] = "direct", 89 [IOMMU_RESV_DIRECT_RELAXABLE] = "direct-relaxable", 90 [IOMMU_RESV_RESERVED] = "reserved", 91 [IOMMU_RESV_MSI] = "msi", 92 [IOMMU_RESV_SW_MSI] = "msi", 93 }; 94 95 #define IOMMU_CMD_LINE_DMA_API BIT(0) 96 #define IOMMU_CMD_LINE_STRICT BIT(1) 97 98 static int bus_iommu_probe(const struct bus_type *bus); 99 static int iommu_bus_notifier(struct notifier_block *nb, 100 unsigned long action, void *data); 101 static void iommu_release_device(struct device *dev); 102 static int __iommu_attach_device(struct iommu_domain *domain, 103 struct device *dev); 104 static int __iommu_attach_group(struct iommu_domain *domain, 105 struct iommu_group *group); 106 static struct iommu_domain *__iommu_paging_domain_alloc_flags(struct device *dev, 107 unsigned int type, 108 unsigned int flags); 109 110 enum { 111 IOMMU_SET_DOMAIN_MUST_SUCCEED = 1 << 0, 112 }; 113 114 static int __iommu_device_set_domain(struct iommu_group *group, 115 struct device *dev, 116 struct iommu_domain *new_domain, 117 unsigned int flags); 118 static int __iommu_group_set_domain_internal(struct iommu_group *group, 119 struct iommu_domain *new_domain, 120 unsigned int flags); 121 static int __iommu_group_set_domain(struct iommu_group *group, 122 struct iommu_domain *new_domain) 123 { 124 return __iommu_group_set_domain_internal(group, new_domain, 0); 125 } 126 static void __iommu_group_set_domain_nofail(struct iommu_group *group, 127 struct iommu_domain *new_domain) 128 { 129 WARN_ON(__iommu_group_set_domain_internal( 130 group, new_domain, IOMMU_SET_DOMAIN_MUST_SUCCEED)); 131 } 132 133 static int iommu_setup_default_domain(struct iommu_group *group, 134 int target_type); 135 static int iommu_create_device_direct_mappings(struct iommu_domain *domain, 136 struct device *dev); 137 static ssize_t iommu_group_store_type(struct iommu_group *group, 138 const char *buf, size_t count); 139 static struct group_device *iommu_group_alloc_device(struct iommu_group *group, 140 struct device *dev); 141 static void __iommu_group_free_device(struct iommu_group *group, 142 struct group_device *grp_dev); 143 static void iommu_domain_init(struct iommu_domain *domain, unsigned int type, 144 const struct iommu_ops *ops); 145 146 #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \ 147 struct iommu_group_attribute iommu_group_attr_##_name = \ 148 __ATTR(_name, _mode, _show, _store) 149 150 #define to_iommu_group_attr(_attr) \ 151 container_of(_attr, struct iommu_group_attribute, attr) 152 #define to_iommu_group(_kobj) \ 153 container_of(_kobj, struct iommu_group, kobj) 154 155 static LIST_HEAD(iommu_device_list); 156 static DEFINE_SPINLOCK(iommu_device_lock); 157 158 static const struct bus_type * const iommu_buses[] = { 159 &platform_bus_type, 160 #ifdef CONFIG_PCI 161 &pci_bus_type, 162 #endif 163 #ifdef CONFIG_ARM_AMBA 164 &amba_bustype, 165 #endif 166 #ifdef CONFIG_FSL_MC_BUS 167 &fsl_mc_bus_type, 168 #endif 169 #ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS 170 &host1x_context_device_bus_type, 171 #endif 172 #ifdef CONFIG_CDX_BUS 173 &cdx_bus_type, 174 #endif 175 }; 176 177 /* 178 * Use a function instead of an array here because the domain-type is a 179 * bit-field, so an array would waste memory. 180 */ 181 static const char *iommu_domain_type_str(unsigned int t) 182 { 183 switch (t) { 184 case IOMMU_DOMAIN_BLOCKED: 185 return "Blocked"; 186 case IOMMU_DOMAIN_IDENTITY: 187 return "Passthrough"; 188 case IOMMU_DOMAIN_UNMANAGED: 189 return "Unmanaged"; 190 case IOMMU_DOMAIN_DMA: 191 case IOMMU_DOMAIN_DMA_FQ: 192 return "Translated"; 193 case IOMMU_DOMAIN_PLATFORM: 194 return "Platform"; 195 default: 196 return "Unknown"; 197 } 198 } 199 200 static int __init iommu_subsys_init(void) 201 { 202 struct notifier_block *nb; 203 204 if (!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API)) { 205 if (IS_ENABLED(CONFIG_IOMMU_DEFAULT_PASSTHROUGH)) 206 iommu_set_default_passthrough(false); 207 else 208 iommu_set_default_translated(false); 209 210 if (iommu_default_passthrough() && cc_platform_has(CC_ATTR_MEM_ENCRYPT)) { 211 pr_info("Memory encryption detected - Disabling default IOMMU Passthrough\n"); 212 iommu_set_default_translated(false); 213 } 214 } 215 216 if (!iommu_default_passthrough() && !iommu_dma_strict) 217 iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ; 218 219 pr_info("Default domain type: %s%s\n", 220 iommu_domain_type_str(iommu_def_domain_type), 221 (iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ? 222 " (set via kernel command line)" : ""); 223 224 if (!iommu_default_passthrough()) 225 pr_info("DMA domain TLB invalidation policy: %s mode%s\n", 226 iommu_dma_strict ? "strict" : "lazy", 227 (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ? 228 " (set via kernel command line)" : ""); 229 230 nb = kcalloc(ARRAY_SIZE(iommu_buses), sizeof(*nb), GFP_KERNEL); 231 if (!nb) 232 return -ENOMEM; 233 234 for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) { 235 nb[i].notifier_call = iommu_bus_notifier; 236 bus_register_notifier(iommu_buses[i], &nb[i]); 237 } 238 239 return 0; 240 } 241 subsys_initcall(iommu_subsys_init); 242 243 static int remove_iommu_group(struct device *dev, void *data) 244 { 245 if (dev->iommu && dev->iommu->iommu_dev == data) 246 iommu_release_device(dev); 247 248 return 0; 249 } 250 251 /** 252 * iommu_device_register() - Register an IOMMU hardware instance 253 * @iommu: IOMMU handle for the instance 254 * @ops: IOMMU ops to associate with the instance 255 * @hwdev: (optional) actual instance device, used for fwnode lookup 256 * 257 * Return: 0 on success, or an error. 258 */ 259 int iommu_device_register(struct iommu_device *iommu, 260 const struct iommu_ops *ops, struct device *hwdev) 261 { 262 int err = 0; 263 264 /* We need to be able to take module references appropriately */ 265 if (WARN_ON(is_module_address((unsigned long)ops) && !ops->owner)) 266 return -EINVAL; 267 268 iommu->ops = ops; 269 if (hwdev) 270 iommu->fwnode = dev_fwnode(hwdev); 271 272 spin_lock(&iommu_device_lock); 273 list_add_tail(&iommu->list, &iommu_device_list); 274 spin_unlock(&iommu_device_lock); 275 276 for (int i = 0; i < ARRAY_SIZE(iommu_buses) && !err; i++) 277 err = bus_iommu_probe(iommu_buses[i]); 278 if (err) 279 iommu_device_unregister(iommu); 280 return err; 281 } 282 EXPORT_SYMBOL_GPL(iommu_device_register); 283 284 void iommu_device_unregister(struct iommu_device *iommu) 285 { 286 for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) 287 bus_for_each_dev(iommu_buses[i], NULL, iommu, remove_iommu_group); 288 289 spin_lock(&iommu_device_lock); 290 list_del(&iommu->list); 291 spin_unlock(&iommu_device_lock); 292 293 /* Pairs with the alloc in generic_single_device_group() */ 294 iommu_group_put(iommu->singleton_group); 295 iommu->singleton_group = NULL; 296 } 297 EXPORT_SYMBOL_GPL(iommu_device_unregister); 298 299 #if IS_ENABLED(CONFIG_IOMMUFD_TEST) 300 void iommu_device_unregister_bus(struct iommu_device *iommu, 301 const struct bus_type *bus, 302 struct notifier_block *nb) 303 { 304 bus_unregister_notifier(bus, nb); 305 iommu_device_unregister(iommu); 306 } 307 EXPORT_SYMBOL_GPL(iommu_device_unregister_bus); 308 309 /* 310 * Register an iommu driver against a single bus. This is only used by iommufd 311 * selftest to create a mock iommu driver. The caller must provide 312 * some memory to hold a notifier_block. 313 */ 314 int iommu_device_register_bus(struct iommu_device *iommu, 315 const struct iommu_ops *ops, 316 const struct bus_type *bus, 317 struct notifier_block *nb) 318 { 319 int err; 320 321 iommu->ops = ops; 322 nb->notifier_call = iommu_bus_notifier; 323 err = bus_register_notifier(bus, nb); 324 if (err) 325 return err; 326 327 spin_lock(&iommu_device_lock); 328 list_add_tail(&iommu->list, &iommu_device_list); 329 spin_unlock(&iommu_device_lock); 330 331 err = bus_iommu_probe(bus); 332 if (err) { 333 iommu_device_unregister_bus(iommu, bus, nb); 334 return err; 335 } 336 return 0; 337 } 338 EXPORT_SYMBOL_GPL(iommu_device_register_bus); 339 #endif 340 341 static struct dev_iommu *dev_iommu_get(struct device *dev) 342 { 343 struct dev_iommu *param = dev->iommu; 344 345 lockdep_assert_held(&iommu_probe_device_lock); 346 347 if (param) 348 return param; 349 350 param = kzalloc(sizeof(*param), GFP_KERNEL); 351 if (!param) 352 return NULL; 353 354 mutex_init(¶m->lock); 355 dev->iommu = param; 356 return param; 357 } 358 359 void dev_iommu_free(struct device *dev) 360 { 361 struct dev_iommu *param = dev->iommu; 362 363 dev->iommu = NULL; 364 if (param->fwspec) { 365 fwnode_handle_put(param->fwspec->iommu_fwnode); 366 kfree(param->fwspec); 367 } 368 kfree(param); 369 } 370 371 /* 372 * Internal equivalent of device_iommu_mapped() for when we care that a device 373 * actually has API ops, and don't want false positives from VFIO-only groups. 374 */ 375 static bool dev_has_iommu(struct device *dev) 376 { 377 return dev->iommu && dev->iommu->iommu_dev; 378 } 379 380 static u32 dev_iommu_get_max_pasids(struct device *dev) 381 { 382 u32 max_pasids = 0, bits = 0; 383 int ret; 384 385 if (dev_is_pci(dev)) { 386 ret = pci_max_pasids(to_pci_dev(dev)); 387 if (ret > 0) 388 max_pasids = ret; 389 } else { 390 ret = device_property_read_u32(dev, "pasid-num-bits", &bits); 391 if (!ret) 392 max_pasids = 1UL << bits; 393 } 394 395 return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids); 396 } 397 398 void dev_iommu_priv_set(struct device *dev, void *priv) 399 { 400 /* FSL_PAMU does something weird */ 401 if (!IS_ENABLED(CONFIG_FSL_PAMU)) 402 lockdep_assert_held(&iommu_probe_device_lock); 403 dev->iommu->priv = priv; 404 } 405 EXPORT_SYMBOL_GPL(dev_iommu_priv_set); 406 407 /* 408 * Init the dev->iommu and dev->iommu_group in the struct device and get the 409 * driver probed 410 */ 411 static int iommu_init_device(struct device *dev) 412 { 413 const struct iommu_ops *ops; 414 struct iommu_device *iommu_dev; 415 struct iommu_group *group; 416 int ret; 417 418 if (!dev_iommu_get(dev)) 419 return -ENOMEM; 420 /* 421 * For FDT-based systems and ACPI IORT/VIOT, the common firmware parsing 422 * is buried in the bus dma_configure path. Properly unpicking that is 423 * still a big job, so for now just invoke the whole thing. The device 424 * already having a driver bound means dma_configure has already run and 425 * either found no IOMMU to wait for, or we're in its replay call right 426 * now, so either way there's no point calling it again. 427 */ 428 if (!dev->driver && dev->bus->dma_configure) { 429 mutex_unlock(&iommu_probe_device_lock); 430 dev->bus->dma_configure(dev); 431 mutex_lock(&iommu_probe_device_lock); 432 } 433 /* 434 * At this point, relevant devices either now have a fwspec which will 435 * match ops registered with a non-NULL fwnode, or we can reasonably 436 * assume that only one of Intel, AMD, s390, PAMU or legacy SMMUv2 can 437 * be present, and that any of their registered instances has suitable 438 * ops for probing, and thus cheekily co-opt the same mechanism. 439 */ 440 ops = iommu_fwspec_ops(dev->iommu->fwspec); 441 if (!ops) { 442 ret = -ENODEV; 443 goto err_free; 444 } 445 446 if (!try_module_get(ops->owner)) { 447 ret = -EINVAL; 448 goto err_free; 449 } 450 451 iommu_dev = ops->probe_device(dev); 452 if (IS_ERR(iommu_dev)) { 453 ret = PTR_ERR(iommu_dev); 454 goto err_module_put; 455 } 456 dev->iommu->iommu_dev = iommu_dev; 457 458 ret = iommu_device_link(iommu_dev, dev); 459 if (ret) 460 goto err_release; 461 462 group = ops->device_group(dev); 463 if (WARN_ON_ONCE(group == NULL)) 464 group = ERR_PTR(-EINVAL); 465 if (IS_ERR(group)) { 466 ret = PTR_ERR(group); 467 goto err_unlink; 468 } 469 dev->iommu_group = group; 470 471 dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev); 472 if (ops->is_attach_deferred) 473 dev->iommu->attach_deferred = ops->is_attach_deferred(dev); 474 return 0; 475 476 err_unlink: 477 iommu_device_unlink(iommu_dev, dev); 478 err_release: 479 if (ops->release_device) 480 ops->release_device(dev); 481 err_module_put: 482 module_put(ops->owner); 483 err_free: 484 dev->iommu->iommu_dev = NULL; 485 dev_iommu_free(dev); 486 return ret; 487 } 488 489 static void iommu_deinit_device(struct device *dev) 490 { 491 struct iommu_group *group = dev->iommu_group; 492 const struct iommu_ops *ops = dev_iommu_ops(dev); 493 494 lockdep_assert_held(&group->mutex); 495 496 iommu_device_unlink(dev->iommu->iommu_dev, dev); 497 498 /* 499 * release_device() must stop using any attached domain on the device. 500 * If there are still other devices in the group, they are not affected 501 * by this callback. 502 * 503 * If the iommu driver provides release_domain, the core code ensures 504 * that domain is attached prior to calling release_device. Drivers can 505 * use this to enforce a translation on the idle iommu. Typically, the 506 * global static blocked_domain is a good choice. 507 * 508 * Otherwise, the iommu driver must set the device to either an identity 509 * or a blocking translation in release_device() and stop using any 510 * domain pointer, as it is going to be freed. 511 * 512 * Regardless, if a delayed attach never occurred, then the release 513 * should still avoid touching any hardware configuration either. 514 */ 515 if (!dev->iommu->attach_deferred && ops->release_domain) 516 ops->release_domain->ops->attach_dev(ops->release_domain, dev); 517 518 if (ops->release_device) 519 ops->release_device(dev); 520 521 /* 522 * If this is the last driver to use the group then we must free the 523 * domains before we do the module_put(). 524 */ 525 if (list_empty(&group->devices)) { 526 if (group->default_domain) { 527 iommu_domain_free(group->default_domain); 528 group->default_domain = NULL; 529 } 530 if (group->blocking_domain) { 531 iommu_domain_free(group->blocking_domain); 532 group->blocking_domain = NULL; 533 } 534 group->domain = NULL; 535 } 536 537 /* Caller must put iommu_group */ 538 dev->iommu_group = NULL; 539 module_put(ops->owner); 540 dev_iommu_free(dev); 541 #ifdef CONFIG_IOMMU_DMA 542 dev->dma_iommu = false; 543 #endif 544 } 545 546 static struct iommu_domain *pasid_array_entry_to_domain(void *entry) 547 { 548 if (xa_pointer_tag(entry) == IOMMU_PASID_ARRAY_DOMAIN) 549 return xa_untag_pointer(entry); 550 return ((struct iommu_attach_handle *)xa_untag_pointer(entry))->domain; 551 } 552 553 DEFINE_MUTEX(iommu_probe_device_lock); 554 555 static int __iommu_probe_device(struct device *dev, struct list_head *group_list) 556 { 557 struct iommu_group *group; 558 struct group_device *gdev; 559 int ret; 560 561 /* 562 * Serialise to avoid races between IOMMU drivers registering in 563 * parallel and/or the "replay" calls from ACPI/OF code via client 564 * driver probe. Once the latter have been cleaned up we should 565 * probably be able to use device_lock() here to minimise the scope, 566 * but for now enforcing a simple global ordering is fine. 567 */ 568 lockdep_assert_held(&iommu_probe_device_lock); 569 570 /* Device is probed already if in a group */ 571 if (dev->iommu_group) 572 return 0; 573 574 ret = iommu_init_device(dev); 575 if (ret) 576 return ret; 577 /* 578 * And if we do now see any replay calls, they would indicate someone 579 * misusing the dma_configure path outside bus code. 580 */ 581 if (dev->driver) 582 dev_WARN(dev, "late IOMMU probe at driver bind, something fishy here!\n"); 583 584 group = dev->iommu_group; 585 gdev = iommu_group_alloc_device(group, dev); 586 mutex_lock(&group->mutex); 587 if (IS_ERR(gdev)) { 588 ret = PTR_ERR(gdev); 589 goto err_put_group; 590 } 591 592 /* 593 * The gdev must be in the list before calling 594 * iommu_setup_default_domain() 595 */ 596 list_add_tail(&gdev->list, &group->devices); 597 WARN_ON(group->default_domain && !group->domain); 598 if (group->default_domain) 599 iommu_create_device_direct_mappings(group->default_domain, dev); 600 if (group->domain) { 601 ret = __iommu_device_set_domain(group, dev, group->domain, 0); 602 if (ret) 603 goto err_remove_gdev; 604 } else if (!group->default_domain && !group_list) { 605 ret = iommu_setup_default_domain(group, 0); 606 if (ret) 607 goto err_remove_gdev; 608 } else if (!group->default_domain) { 609 /* 610 * With a group_list argument we defer the default_domain setup 611 * to the caller by providing a de-duplicated list of groups 612 * that need further setup. 613 */ 614 if (list_empty(&group->entry)) 615 list_add_tail(&group->entry, group_list); 616 } 617 618 if (group->default_domain) 619 iommu_setup_dma_ops(dev); 620 621 mutex_unlock(&group->mutex); 622 623 return 0; 624 625 err_remove_gdev: 626 list_del(&gdev->list); 627 __iommu_group_free_device(group, gdev); 628 err_put_group: 629 iommu_deinit_device(dev); 630 mutex_unlock(&group->mutex); 631 iommu_group_put(group); 632 633 return ret; 634 } 635 636 int iommu_probe_device(struct device *dev) 637 { 638 const struct iommu_ops *ops; 639 int ret; 640 641 mutex_lock(&iommu_probe_device_lock); 642 ret = __iommu_probe_device(dev, NULL); 643 mutex_unlock(&iommu_probe_device_lock); 644 if (ret) 645 return ret; 646 647 ops = dev_iommu_ops(dev); 648 if (ops->probe_finalize) 649 ops->probe_finalize(dev); 650 651 return 0; 652 } 653 654 static void __iommu_group_free_device(struct iommu_group *group, 655 struct group_device *grp_dev) 656 { 657 struct device *dev = grp_dev->dev; 658 659 sysfs_remove_link(group->devices_kobj, grp_dev->name); 660 sysfs_remove_link(&dev->kobj, "iommu_group"); 661 662 trace_remove_device_from_group(group->id, dev); 663 664 /* 665 * If the group has become empty then ownership must have been 666 * released, and the current domain must be set back to NULL or 667 * the default domain. 668 */ 669 if (list_empty(&group->devices)) 670 WARN_ON(group->owner_cnt || 671 group->domain != group->default_domain); 672 673 kfree(grp_dev->name); 674 kfree(grp_dev); 675 } 676 677 /* Remove the iommu_group from the struct device. */ 678 static void __iommu_group_remove_device(struct device *dev) 679 { 680 struct iommu_group *group = dev->iommu_group; 681 struct group_device *device; 682 683 mutex_lock(&group->mutex); 684 for_each_group_device(group, device) { 685 if (device->dev != dev) 686 continue; 687 688 list_del(&device->list); 689 __iommu_group_free_device(group, device); 690 if (dev_has_iommu(dev)) 691 iommu_deinit_device(dev); 692 else 693 dev->iommu_group = NULL; 694 break; 695 } 696 mutex_unlock(&group->mutex); 697 698 /* 699 * Pairs with the get in iommu_init_device() or 700 * iommu_group_add_device() 701 */ 702 iommu_group_put(group); 703 } 704 705 static void iommu_release_device(struct device *dev) 706 { 707 struct iommu_group *group = dev->iommu_group; 708 709 if (group) 710 __iommu_group_remove_device(dev); 711 712 /* Free any fwspec if no iommu_driver was ever attached */ 713 if (dev->iommu) 714 dev_iommu_free(dev); 715 } 716 717 static int __init iommu_set_def_domain_type(char *str) 718 { 719 bool pt; 720 int ret; 721 722 ret = kstrtobool(str, &pt); 723 if (ret) 724 return ret; 725 726 if (pt) 727 iommu_set_default_passthrough(true); 728 else 729 iommu_set_default_translated(true); 730 731 return 0; 732 } 733 early_param("iommu.passthrough", iommu_set_def_domain_type); 734 735 static int __init iommu_dma_setup(char *str) 736 { 737 int ret = kstrtobool(str, &iommu_dma_strict); 738 739 if (!ret) 740 iommu_cmd_line |= IOMMU_CMD_LINE_STRICT; 741 return ret; 742 } 743 early_param("iommu.strict", iommu_dma_setup); 744 745 void iommu_set_dma_strict(void) 746 { 747 iommu_dma_strict = true; 748 if (iommu_def_domain_type == IOMMU_DOMAIN_DMA_FQ) 749 iommu_def_domain_type = IOMMU_DOMAIN_DMA; 750 } 751 752 static ssize_t iommu_group_attr_show(struct kobject *kobj, 753 struct attribute *__attr, char *buf) 754 { 755 struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); 756 struct iommu_group *group = to_iommu_group(kobj); 757 ssize_t ret = -EIO; 758 759 if (attr->show) 760 ret = attr->show(group, buf); 761 return ret; 762 } 763 764 static ssize_t iommu_group_attr_store(struct kobject *kobj, 765 struct attribute *__attr, 766 const char *buf, size_t count) 767 { 768 struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); 769 struct iommu_group *group = to_iommu_group(kobj); 770 ssize_t ret = -EIO; 771 772 if (attr->store) 773 ret = attr->store(group, buf, count); 774 return ret; 775 } 776 777 static const struct sysfs_ops iommu_group_sysfs_ops = { 778 .show = iommu_group_attr_show, 779 .store = iommu_group_attr_store, 780 }; 781 782 static int iommu_group_create_file(struct iommu_group *group, 783 struct iommu_group_attribute *attr) 784 { 785 return sysfs_create_file(&group->kobj, &attr->attr); 786 } 787 788 static void iommu_group_remove_file(struct iommu_group *group, 789 struct iommu_group_attribute *attr) 790 { 791 sysfs_remove_file(&group->kobj, &attr->attr); 792 } 793 794 static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf) 795 { 796 return sysfs_emit(buf, "%s\n", group->name); 797 } 798 799 /** 800 * iommu_insert_resv_region - Insert a new region in the 801 * list of reserved regions. 802 * @new: new region to insert 803 * @regions: list of regions 804 * 805 * Elements are sorted by start address and overlapping segments 806 * of the same type are merged. 807 */ 808 static int iommu_insert_resv_region(struct iommu_resv_region *new, 809 struct list_head *regions) 810 { 811 struct iommu_resv_region *iter, *tmp, *nr, *top; 812 LIST_HEAD(stack); 813 814 nr = iommu_alloc_resv_region(new->start, new->length, 815 new->prot, new->type, GFP_KERNEL); 816 if (!nr) 817 return -ENOMEM; 818 819 /* First add the new element based on start address sorting */ 820 list_for_each_entry(iter, regions, list) { 821 if (nr->start < iter->start || 822 (nr->start == iter->start && nr->type <= iter->type)) 823 break; 824 } 825 list_add_tail(&nr->list, &iter->list); 826 827 /* Merge overlapping segments of type nr->type in @regions, if any */ 828 list_for_each_entry_safe(iter, tmp, regions, list) { 829 phys_addr_t top_end, iter_end = iter->start + iter->length - 1; 830 831 /* no merge needed on elements of different types than @new */ 832 if (iter->type != new->type) { 833 list_move_tail(&iter->list, &stack); 834 continue; 835 } 836 837 /* look for the last stack element of same type as @iter */ 838 list_for_each_entry_reverse(top, &stack, list) 839 if (top->type == iter->type) 840 goto check_overlap; 841 842 list_move_tail(&iter->list, &stack); 843 continue; 844 845 check_overlap: 846 top_end = top->start + top->length - 1; 847 848 if (iter->start > top_end + 1) { 849 list_move_tail(&iter->list, &stack); 850 } else { 851 top->length = max(top_end, iter_end) - top->start + 1; 852 list_del(&iter->list); 853 kfree(iter); 854 } 855 } 856 list_splice(&stack, regions); 857 return 0; 858 } 859 860 static int 861 iommu_insert_device_resv_regions(struct list_head *dev_resv_regions, 862 struct list_head *group_resv_regions) 863 { 864 struct iommu_resv_region *entry; 865 int ret = 0; 866 867 list_for_each_entry(entry, dev_resv_regions, list) { 868 ret = iommu_insert_resv_region(entry, group_resv_regions); 869 if (ret) 870 break; 871 } 872 return ret; 873 } 874 875 int iommu_get_group_resv_regions(struct iommu_group *group, 876 struct list_head *head) 877 { 878 struct group_device *device; 879 int ret = 0; 880 881 mutex_lock(&group->mutex); 882 for_each_group_device(group, device) { 883 struct list_head dev_resv_regions; 884 885 /* 886 * Non-API groups still expose reserved_regions in sysfs, 887 * so filter out calls that get here that way. 888 */ 889 if (!dev_has_iommu(device->dev)) 890 break; 891 892 INIT_LIST_HEAD(&dev_resv_regions); 893 iommu_get_resv_regions(device->dev, &dev_resv_regions); 894 ret = iommu_insert_device_resv_regions(&dev_resv_regions, head); 895 iommu_put_resv_regions(device->dev, &dev_resv_regions); 896 if (ret) 897 break; 898 } 899 mutex_unlock(&group->mutex); 900 return ret; 901 } 902 EXPORT_SYMBOL_GPL(iommu_get_group_resv_regions); 903 904 static ssize_t iommu_group_show_resv_regions(struct iommu_group *group, 905 char *buf) 906 { 907 struct iommu_resv_region *region, *next; 908 struct list_head group_resv_regions; 909 int offset = 0; 910 911 INIT_LIST_HEAD(&group_resv_regions); 912 iommu_get_group_resv_regions(group, &group_resv_regions); 913 914 list_for_each_entry_safe(region, next, &group_resv_regions, list) { 915 offset += sysfs_emit_at(buf, offset, "0x%016llx 0x%016llx %s\n", 916 (long long)region->start, 917 (long long)(region->start + 918 region->length - 1), 919 iommu_group_resv_type_string[region->type]); 920 kfree(region); 921 } 922 923 return offset; 924 } 925 926 static ssize_t iommu_group_show_type(struct iommu_group *group, 927 char *buf) 928 { 929 char *type = "unknown"; 930 931 mutex_lock(&group->mutex); 932 if (group->default_domain) { 933 switch (group->default_domain->type) { 934 case IOMMU_DOMAIN_BLOCKED: 935 type = "blocked"; 936 break; 937 case IOMMU_DOMAIN_IDENTITY: 938 type = "identity"; 939 break; 940 case IOMMU_DOMAIN_UNMANAGED: 941 type = "unmanaged"; 942 break; 943 case IOMMU_DOMAIN_DMA: 944 type = "DMA"; 945 break; 946 case IOMMU_DOMAIN_DMA_FQ: 947 type = "DMA-FQ"; 948 break; 949 } 950 } 951 mutex_unlock(&group->mutex); 952 953 return sysfs_emit(buf, "%s\n", type); 954 } 955 956 static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL); 957 958 static IOMMU_GROUP_ATTR(reserved_regions, 0444, 959 iommu_group_show_resv_regions, NULL); 960 961 static IOMMU_GROUP_ATTR(type, 0644, iommu_group_show_type, 962 iommu_group_store_type); 963 964 static void iommu_group_release(struct kobject *kobj) 965 { 966 struct iommu_group *group = to_iommu_group(kobj); 967 968 pr_debug("Releasing group %d\n", group->id); 969 970 if (group->iommu_data_release) 971 group->iommu_data_release(group->iommu_data); 972 973 ida_free(&iommu_group_ida, group->id); 974 975 /* Domains are free'd by iommu_deinit_device() */ 976 WARN_ON(group->default_domain); 977 WARN_ON(group->blocking_domain); 978 979 kfree(group->name); 980 kfree(group); 981 } 982 983 static const struct kobj_type iommu_group_ktype = { 984 .sysfs_ops = &iommu_group_sysfs_ops, 985 .release = iommu_group_release, 986 }; 987 988 /** 989 * iommu_group_alloc - Allocate a new group 990 * 991 * This function is called by an iommu driver to allocate a new iommu 992 * group. The iommu group represents the minimum granularity of the iommu. 993 * Upon successful return, the caller holds a reference to the supplied 994 * group in order to hold the group until devices are added. Use 995 * iommu_group_put() to release this extra reference count, allowing the 996 * group to be automatically reclaimed once it has no devices or external 997 * references. 998 */ 999 struct iommu_group *iommu_group_alloc(void) 1000 { 1001 struct iommu_group *group; 1002 int ret; 1003 1004 group = kzalloc(sizeof(*group), GFP_KERNEL); 1005 if (!group) 1006 return ERR_PTR(-ENOMEM); 1007 1008 group->kobj.kset = iommu_group_kset; 1009 mutex_init(&group->mutex); 1010 INIT_LIST_HEAD(&group->devices); 1011 INIT_LIST_HEAD(&group->entry); 1012 xa_init(&group->pasid_array); 1013 1014 ret = ida_alloc(&iommu_group_ida, GFP_KERNEL); 1015 if (ret < 0) { 1016 kfree(group); 1017 return ERR_PTR(ret); 1018 } 1019 group->id = ret; 1020 1021 ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype, 1022 NULL, "%d", group->id); 1023 if (ret) { 1024 kobject_put(&group->kobj); 1025 return ERR_PTR(ret); 1026 } 1027 1028 group->devices_kobj = kobject_create_and_add("devices", &group->kobj); 1029 if (!group->devices_kobj) { 1030 kobject_put(&group->kobj); /* triggers .release & free */ 1031 return ERR_PTR(-ENOMEM); 1032 } 1033 1034 /* 1035 * The devices_kobj holds a reference on the group kobject, so 1036 * as long as that exists so will the group. We can therefore 1037 * use the devices_kobj for reference counting. 1038 */ 1039 kobject_put(&group->kobj); 1040 1041 ret = iommu_group_create_file(group, 1042 &iommu_group_attr_reserved_regions); 1043 if (ret) { 1044 kobject_put(group->devices_kobj); 1045 return ERR_PTR(ret); 1046 } 1047 1048 ret = iommu_group_create_file(group, &iommu_group_attr_type); 1049 if (ret) { 1050 kobject_put(group->devices_kobj); 1051 return ERR_PTR(ret); 1052 } 1053 1054 pr_debug("Allocated group %d\n", group->id); 1055 1056 return group; 1057 } 1058 EXPORT_SYMBOL_GPL(iommu_group_alloc); 1059 1060 /** 1061 * iommu_group_get_iommudata - retrieve iommu_data registered for a group 1062 * @group: the group 1063 * 1064 * iommu drivers can store data in the group for use when doing iommu 1065 * operations. This function provides a way to retrieve it. Caller 1066 * should hold a group reference. 1067 */ 1068 void *iommu_group_get_iommudata(struct iommu_group *group) 1069 { 1070 return group->iommu_data; 1071 } 1072 EXPORT_SYMBOL_GPL(iommu_group_get_iommudata); 1073 1074 /** 1075 * iommu_group_set_iommudata - set iommu_data for a group 1076 * @group: the group 1077 * @iommu_data: new data 1078 * @release: release function for iommu_data 1079 * 1080 * iommu drivers can store data in the group for use when doing iommu 1081 * operations. This function provides a way to set the data after 1082 * the group has been allocated. Caller should hold a group reference. 1083 */ 1084 void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data, 1085 void (*release)(void *iommu_data)) 1086 { 1087 group->iommu_data = iommu_data; 1088 group->iommu_data_release = release; 1089 } 1090 EXPORT_SYMBOL_GPL(iommu_group_set_iommudata); 1091 1092 /** 1093 * iommu_group_set_name - set name for a group 1094 * @group: the group 1095 * @name: name 1096 * 1097 * Allow iommu driver to set a name for a group. When set it will 1098 * appear in a name attribute file under the group in sysfs. 1099 */ 1100 int iommu_group_set_name(struct iommu_group *group, const char *name) 1101 { 1102 int ret; 1103 1104 if (group->name) { 1105 iommu_group_remove_file(group, &iommu_group_attr_name); 1106 kfree(group->name); 1107 group->name = NULL; 1108 if (!name) 1109 return 0; 1110 } 1111 1112 group->name = kstrdup(name, GFP_KERNEL); 1113 if (!group->name) 1114 return -ENOMEM; 1115 1116 ret = iommu_group_create_file(group, &iommu_group_attr_name); 1117 if (ret) { 1118 kfree(group->name); 1119 group->name = NULL; 1120 return ret; 1121 } 1122 1123 return 0; 1124 } 1125 EXPORT_SYMBOL_GPL(iommu_group_set_name); 1126 1127 static int iommu_create_device_direct_mappings(struct iommu_domain *domain, 1128 struct device *dev) 1129 { 1130 struct iommu_resv_region *entry; 1131 struct list_head mappings; 1132 unsigned long pg_size; 1133 int ret = 0; 1134 1135 pg_size = domain->pgsize_bitmap ? 1UL << __ffs(domain->pgsize_bitmap) : 0; 1136 INIT_LIST_HEAD(&mappings); 1137 1138 if (WARN_ON_ONCE(iommu_is_dma_domain(domain) && !pg_size)) 1139 return -EINVAL; 1140 1141 iommu_get_resv_regions(dev, &mappings); 1142 1143 /* We need to consider overlapping regions for different devices */ 1144 list_for_each_entry(entry, &mappings, list) { 1145 dma_addr_t start, end, addr; 1146 size_t map_size = 0; 1147 1148 if (entry->type == IOMMU_RESV_DIRECT) 1149 dev->iommu->require_direct = 1; 1150 1151 if ((entry->type != IOMMU_RESV_DIRECT && 1152 entry->type != IOMMU_RESV_DIRECT_RELAXABLE) || 1153 !iommu_is_dma_domain(domain)) 1154 continue; 1155 1156 start = ALIGN(entry->start, pg_size); 1157 end = ALIGN(entry->start + entry->length, pg_size); 1158 1159 for (addr = start; addr <= end; addr += pg_size) { 1160 phys_addr_t phys_addr; 1161 1162 if (addr == end) 1163 goto map_end; 1164 1165 phys_addr = iommu_iova_to_phys(domain, addr); 1166 if (!phys_addr) { 1167 map_size += pg_size; 1168 continue; 1169 } 1170 1171 map_end: 1172 if (map_size) { 1173 ret = iommu_map(domain, addr - map_size, 1174 addr - map_size, map_size, 1175 entry->prot, GFP_KERNEL); 1176 if (ret) 1177 goto out; 1178 map_size = 0; 1179 } 1180 } 1181 1182 } 1183 out: 1184 iommu_put_resv_regions(dev, &mappings); 1185 1186 return ret; 1187 } 1188 1189 /* This is undone by __iommu_group_free_device() */ 1190 static struct group_device *iommu_group_alloc_device(struct iommu_group *group, 1191 struct device *dev) 1192 { 1193 int ret, i = 0; 1194 struct group_device *device; 1195 1196 device = kzalloc(sizeof(*device), GFP_KERNEL); 1197 if (!device) 1198 return ERR_PTR(-ENOMEM); 1199 1200 device->dev = dev; 1201 1202 ret = sysfs_create_link(&dev->kobj, &group->kobj, "iommu_group"); 1203 if (ret) 1204 goto err_free_device; 1205 1206 device->name = kasprintf(GFP_KERNEL, "%s", kobject_name(&dev->kobj)); 1207 rename: 1208 if (!device->name) { 1209 ret = -ENOMEM; 1210 goto err_remove_link; 1211 } 1212 1213 ret = sysfs_create_link_nowarn(group->devices_kobj, 1214 &dev->kobj, device->name); 1215 if (ret) { 1216 if (ret == -EEXIST && i >= 0) { 1217 /* 1218 * Account for the slim chance of collision 1219 * and append an instance to the name. 1220 */ 1221 kfree(device->name); 1222 device->name = kasprintf(GFP_KERNEL, "%s.%d", 1223 kobject_name(&dev->kobj), i++); 1224 goto rename; 1225 } 1226 goto err_free_name; 1227 } 1228 1229 trace_add_device_to_group(group->id, dev); 1230 1231 dev_info(dev, "Adding to iommu group %d\n", group->id); 1232 1233 return device; 1234 1235 err_free_name: 1236 kfree(device->name); 1237 err_remove_link: 1238 sysfs_remove_link(&dev->kobj, "iommu_group"); 1239 err_free_device: 1240 kfree(device); 1241 dev_err(dev, "Failed to add to iommu group %d: %d\n", group->id, ret); 1242 return ERR_PTR(ret); 1243 } 1244 1245 /** 1246 * iommu_group_add_device - add a device to an iommu group 1247 * @group: the group into which to add the device (reference should be held) 1248 * @dev: the device 1249 * 1250 * This function is called by an iommu driver to add a device into a 1251 * group. Adding a device increments the group reference count. 1252 */ 1253 int iommu_group_add_device(struct iommu_group *group, struct device *dev) 1254 { 1255 struct group_device *gdev; 1256 1257 gdev = iommu_group_alloc_device(group, dev); 1258 if (IS_ERR(gdev)) 1259 return PTR_ERR(gdev); 1260 1261 iommu_group_ref_get(group); 1262 dev->iommu_group = group; 1263 1264 mutex_lock(&group->mutex); 1265 list_add_tail(&gdev->list, &group->devices); 1266 mutex_unlock(&group->mutex); 1267 return 0; 1268 } 1269 EXPORT_SYMBOL_GPL(iommu_group_add_device); 1270 1271 /** 1272 * iommu_group_remove_device - remove a device from it's current group 1273 * @dev: device to be removed 1274 * 1275 * This function is called by an iommu driver to remove the device from 1276 * it's current group. This decrements the iommu group reference count. 1277 */ 1278 void iommu_group_remove_device(struct device *dev) 1279 { 1280 struct iommu_group *group = dev->iommu_group; 1281 1282 if (!group) 1283 return; 1284 1285 dev_info(dev, "Removing from iommu group %d\n", group->id); 1286 1287 __iommu_group_remove_device(dev); 1288 } 1289 EXPORT_SYMBOL_GPL(iommu_group_remove_device); 1290 1291 #if IS_ENABLED(CONFIG_LOCKDEP) && IS_ENABLED(CONFIG_IOMMU_API) 1292 /** 1293 * iommu_group_mutex_assert - Check device group mutex lock 1294 * @dev: the device that has group param set 1295 * 1296 * This function is called by an iommu driver to check whether it holds 1297 * group mutex lock for the given device or not. 1298 * 1299 * Note that this function must be called after device group param is set. 1300 */ 1301 void iommu_group_mutex_assert(struct device *dev) 1302 { 1303 struct iommu_group *group = dev->iommu_group; 1304 1305 lockdep_assert_held(&group->mutex); 1306 } 1307 EXPORT_SYMBOL_GPL(iommu_group_mutex_assert); 1308 #endif 1309 1310 static struct device *iommu_group_first_dev(struct iommu_group *group) 1311 { 1312 lockdep_assert_held(&group->mutex); 1313 return list_first_entry(&group->devices, struct group_device, list)->dev; 1314 } 1315 1316 /** 1317 * iommu_group_for_each_dev - iterate over each device in the group 1318 * @group: the group 1319 * @data: caller opaque data to be passed to callback function 1320 * @fn: caller supplied callback function 1321 * 1322 * This function is called by group users to iterate over group devices. 1323 * Callers should hold a reference count to the group during callback. 1324 * The group->mutex is held across callbacks, which will block calls to 1325 * iommu_group_add/remove_device. 1326 */ 1327 int iommu_group_for_each_dev(struct iommu_group *group, void *data, 1328 int (*fn)(struct device *, void *)) 1329 { 1330 struct group_device *device; 1331 int ret = 0; 1332 1333 mutex_lock(&group->mutex); 1334 for_each_group_device(group, device) { 1335 ret = fn(device->dev, data); 1336 if (ret) 1337 break; 1338 } 1339 mutex_unlock(&group->mutex); 1340 1341 return ret; 1342 } 1343 EXPORT_SYMBOL_GPL(iommu_group_for_each_dev); 1344 1345 /** 1346 * iommu_group_get - Return the group for a device and increment reference 1347 * @dev: get the group that this device belongs to 1348 * 1349 * This function is called by iommu drivers and users to get the group 1350 * for the specified device. If found, the group is returned and the group 1351 * reference in incremented, else NULL. 1352 */ 1353 struct iommu_group *iommu_group_get(struct device *dev) 1354 { 1355 struct iommu_group *group = dev->iommu_group; 1356 1357 if (group) 1358 kobject_get(group->devices_kobj); 1359 1360 return group; 1361 } 1362 EXPORT_SYMBOL_GPL(iommu_group_get); 1363 1364 /** 1365 * iommu_group_ref_get - Increment reference on a group 1366 * @group: the group to use, must not be NULL 1367 * 1368 * This function is called by iommu drivers to take additional references on an 1369 * existing group. Returns the given group for convenience. 1370 */ 1371 struct iommu_group *iommu_group_ref_get(struct iommu_group *group) 1372 { 1373 kobject_get(group->devices_kobj); 1374 return group; 1375 } 1376 EXPORT_SYMBOL_GPL(iommu_group_ref_get); 1377 1378 /** 1379 * iommu_group_put - Decrement group reference 1380 * @group: the group to use 1381 * 1382 * This function is called by iommu drivers and users to release the 1383 * iommu group. Once the reference count is zero, the group is released. 1384 */ 1385 void iommu_group_put(struct iommu_group *group) 1386 { 1387 if (group) 1388 kobject_put(group->devices_kobj); 1389 } 1390 EXPORT_SYMBOL_GPL(iommu_group_put); 1391 1392 /** 1393 * iommu_group_id - Return ID for a group 1394 * @group: the group to ID 1395 * 1396 * Return the unique ID for the group matching the sysfs group number. 1397 */ 1398 int iommu_group_id(struct iommu_group *group) 1399 { 1400 return group->id; 1401 } 1402 EXPORT_SYMBOL_GPL(iommu_group_id); 1403 1404 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, 1405 unsigned long *devfns); 1406 1407 /* 1408 * To consider a PCI device isolated, we require ACS to support Source 1409 * Validation, Request Redirection, Completer Redirection, and Upstream 1410 * Forwarding. This effectively means that devices cannot spoof their 1411 * requester ID, requests and completions cannot be redirected, and all 1412 * transactions are forwarded upstream, even as it passes through a 1413 * bridge where the target device is downstream. 1414 */ 1415 #define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) 1416 1417 /* 1418 * For multifunction devices which are not isolated from each other, find 1419 * all the other non-isolated functions and look for existing groups. For 1420 * each function, we also need to look for aliases to or from other devices 1421 * that may already have a group. 1422 */ 1423 static struct iommu_group *get_pci_function_alias_group(struct pci_dev *pdev, 1424 unsigned long *devfns) 1425 { 1426 struct pci_dev *tmp = NULL; 1427 struct iommu_group *group; 1428 1429 if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS)) 1430 return NULL; 1431 1432 for_each_pci_dev(tmp) { 1433 if (tmp == pdev || tmp->bus != pdev->bus || 1434 PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) || 1435 pci_acs_enabled(tmp, REQ_ACS_FLAGS)) 1436 continue; 1437 1438 group = get_pci_alias_group(tmp, devfns); 1439 if (group) { 1440 pci_dev_put(tmp); 1441 return group; 1442 } 1443 } 1444 1445 return NULL; 1446 } 1447 1448 /* 1449 * Look for aliases to or from the given device for existing groups. DMA 1450 * aliases are only supported on the same bus, therefore the search 1451 * space is quite small (especially since we're really only looking at pcie 1452 * device, and therefore only expect multiple slots on the root complex or 1453 * downstream switch ports). It's conceivable though that a pair of 1454 * multifunction devices could have aliases between them that would cause a 1455 * loop. To prevent this, we use a bitmap to track where we've been. 1456 */ 1457 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, 1458 unsigned long *devfns) 1459 { 1460 struct pci_dev *tmp = NULL; 1461 struct iommu_group *group; 1462 1463 if (test_and_set_bit(pdev->devfn & 0xff, devfns)) 1464 return NULL; 1465 1466 group = iommu_group_get(&pdev->dev); 1467 if (group) 1468 return group; 1469 1470 for_each_pci_dev(tmp) { 1471 if (tmp == pdev || tmp->bus != pdev->bus) 1472 continue; 1473 1474 /* We alias them or they alias us */ 1475 if (pci_devs_are_dma_aliases(pdev, tmp)) { 1476 group = get_pci_alias_group(tmp, devfns); 1477 if (group) { 1478 pci_dev_put(tmp); 1479 return group; 1480 } 1481 1482 group = get_pci_function_alias_group(tmp, devfns); 1483 if (group) { 1484 pci_dev_put(tmp); 1485 return group; 1486 } 1487 } 1488 } 1489 1490 return NULL; 1491 } 1492 1493 struct group_for_pci_data { 1494 struct pci_dev *pdev; 1495 struct iommu_group *group; 1496 }; 1497 1498 /* 1499 * DMA alias iterator callback, return the last seen device. Stop and return 1500 * the IOMMU group if we find one along the way. 1501 */ 1502 static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque) 1503 { 1504 struct group_for_pci_data *data = opaque; 1505 1506 data->pdev = pdev; 1507 data->group = iommu_group_get(&pdev->dev); 1508 1509 return data->group != NULL; 1510 } 1511 1512 /* 1513 * Generic device_group call-back function. It just allocates one 1514 * iommu-group per device. 1515 */ 1516 struct iommu_group *generic_device_group(struct device *dev) 1517 { 1518 return iommu_group_alloc(); 1519 } 1520 EXPORT_SYMBOL_GPL(generic_device_group); 1521 1522 /* 1523 * Generic device_group call-back function. It just allocates one 1524 * iommu-group per iommu driver instance shared by every device 1525 * probed by that iommu driver. 1526 */ 1527 struct iommu_group *generic_single_device_group(struct device *dev) 1528 { 1529 struct iommu_device *iommu = dev->iommu->iommu_dev; 1530 1531 if (!iommu->singleton_group) { 1532 struct iommu_group *group; 1533 1534 group = iommu_group_alloc(); 1535 if (IS_ERR(group)) 1536 return group; 1537 iommu->singleton_group = group; 1538 } 1539 return iommu_group_ref_get(iommu->singleton_group); 1540 } 1541 EXPORT_SYMBOL_GPL(generic_single_device_group); 1542 1543 /* 1544 * Use standard PCI bus topology, isolation features, and DMA alias quirks 1545 * to find or create an IOMMU group for a device. 1546 */ 1547 struct iommu_group *pci_device_group(struct device *dev) 1548 { 1549 struct pci_dev *pdev = to_pci_dev(dev); 1550 struct group_for_pci_data data; 1551 struct pci_bus *bus; 1552 struct iommu_group *group = NULL; 1553 u64 devfns[4] = { 0 }; 1554 1555 if (WARN_ON(!dev_is_pci(dev))) 1556 return ERR_PTR(-EINVAL); 1557 1558 /* 1559 * Find the upstream DMA alias for the device. A device must not 1560 * be aliased due to topology in order to have its own IOMMU group. 1561 * If we find an alias along the way that already belongs to a 1562 * group, use it. 1563 */ 1564 if (pci_for_each_dma_alias(pdev, get_pci_alias_or_group, &data)) 1565 return data.group; 1566 1567 pdev = data.pdev; 1568 1569 /* 1570 * Continue upstream from the point of minimum IOMMU granularity 1571 * due to aliases to the point where devices are protected from 1572 * peer-to-peer DMA by PCI ACS. Again, if we find an existing 1573 * group, use it. 1574 */ 1575 for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) { 1576 if (!bus->self) 1577 continue; 1578 1579 if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) 1580 break; 1581 1582 pdev = bus->self; 1583 1584 group = iommu_group_get(&pdev->dev); 1585 if (group) 1586 return group; 1587 } 1588 1589 /* 1590 * Look for existing groups on device aliases. If we alias another 1591 * device or another device aliases us, use the same group. 1592 */ 1593 group = get_pci_alias_group(pdev, (unsigned long *)devfns); 1594 if (group) 1595 return group; 1596 1597 /* 1598 * Look for existing groups on non-isolated functions on the same 1599 * slot and aliases of those funcions, if any. No need to clear 1600 * the search bitmap, the tested devfns are still valid. 1601 */ 1602 group = get_pci_function_alias_group(pdev, (unsigned long *)devfns); 1603 if (group) 1604 return group; 1605 1606 /* No shared group found, allocate new */ 1607 return iommu_group_alloc(); 1608 } 1609 EXPORT_SYMBOL_GPL(pci_device_group); 1610 1611 /* Get the IOMMU group for device on fsl-mc bus */ 1612 struct iommu_group *fsl_mc_device_group(struct device *dev) 1613 { 1614 struct device *cont_dev = fsl_mc_cont_dev(dev); 1615 struct iommu_group *group; 1616 1617 group = iommu_group_get(cont_dev); 1618 if (!group) 1619 group = iommu_group_alloc(); 1620 return group; 1621 } 1622 EXPORT_SYMBOL_GPL(fsl_mc_device_group); 1623 1624 static struct iommu_domain *__iommu_alloc_identity_domain(struct device *dev) 1625 { 1626 const struct iommu_ops *ops = dev_iommu_ops(dev); 1627 struct iommu_domain *domain; 1628 1629 if (ops->identity_domain) 1630 return ops->identity_domain; 1631 1632 /* Older drivers create the identity domain via ops->domain_alloc() */ 1633 if (!ops->domain_alloc) 1634 return ERR_PTR(-EOPNOTSUPP); 1635 1636 domain = ops->domain_alloc(IOMMU_DOMAIN_IDENTITY); 1637 if (IS_ERR(domain)) 1638 return domain; 1639 if (!domain) 1640 return ERR_PTR(-ENOMEM); 1641 1642 iommu_domain_init(domain, IOMMU_DOMAIN_IDENTITY, ops); 1643 return domain; 1644 } 1645 1646 static struct iommu_domain * 1647 __iommu_group_alloc_default_domain(struct iommu_group *group, int req_type) 1648 { 1649 struct device *dev = iommu_group_first_dev(group); 1650 struct iommu_domain *dom; 1651 1652 if (group->default_domain && group->default_domain->type == req_type) 1653 return group->default_domain; 1654 1655 /* 1656 * When allocating the DMA API domain assume that the driver is going to 1657 * use PASID and make sure the RID's domain is PASID compatible. 1658 */ 1659 if (req_type & __IOMMU_DOMAIN_PAGING) { 1660 dom = __iommu_paging_domain_alloc_flags(dev, req_type, 1661 dev->iommu->max_pasids ? IOMMU_HWPT_ALLOC_PASID : 0); 1662 1663 /* 1664 * If driver does not support PASID feature then 1665 * try to allocate non-PASID domain 1666 */ 1667 if (PTR_ERR(dom) == -EOPNOTSUPP) 1668 dom = __iommu_paging_domain_alloc_flags(dev, req_type, 0); 1669 1670 return dom; 1671 } 1672 1673 if (req_type == IOMMU_DOMAIN_IDENTITY) 1674 return __iommu_alloc_identity_domain(dev); 1675 1676 return ERR_PTR(-EINVAL); 1677 } 1678 1679 /* 1680 * req_type of 0 means "auto" which means to select a domain based on 1681 * iommu_def_domain_type or what the driver actually supports. 1682 */ 1683 static struct iommu_domain * 1684 iommu_group_alloc_default_domain(struct iommu_group *group, int req_type) 1685 { 1686 const struct iommu_ops *ops = dev_iommu_ops(iommu_group_first_dev(group)); 1687 struct iommu_domain *dom; 1688 1689 lockdep_assert_held(&group->mutex); 1690 1691 /* 1692 * Allow legacy drivers to specify the domain that will be the default 1693 * domain. This should always be either an IDENTITY/BLOCKED/PLATFORM 1694 * domain. Do not use in new drivers. 1695 */ 1696 if (ops->default_domain) { 1697 if (req_type != ops->default_domain->type) 1698 return ERR_PTR(-EINVAL); 1699 return ops->default_domain; 1700 } 1701 1702 if (req_type) 1703 return __iommu_group_alloc_default_domain(group, req_type); 1704 1705 /* The driver gave no guidance on what type to use, try the default */ 1706 dom = __iommu_group_alloc_default_domain(group, iommu_def_domain_type); 1707 if (!IS_ERR(dom)) 1708 return dom; 1709 1710 /* Otherwise IDENTITY and DMA_FQ defaults will try DMA */ 1711 if (iommu_def_domain_type == IOMMU_DOMAIN_DMA) 1712 return ERR_PTR(-EINVAL); 1713 dom = __iommu_group_alloc_default_domain(group, IOMMU_DOMAIN_DMA); 1714 if (IS_ERR(dom)) 1715 return dom; 1716 1717 pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA", 1718 iommu_def_domain_type, group->name); 1719 return dom; 1720 } 1721 1722 struct iommu_domain *iommu_group_default_domain(struct iommu_group *group) 1723 { 1724 return group->default_domain; 1725 } 1726 1727 static int probe_iommu_group(struct device *dev, void *data) 1728 { 1729 struct list_head *group_list = data; 1730 int ret; 1731 1732 mutex_lock(&iommu_probe_device_lock); 1733 ret = __iommu_probe_device(dev, group_list); 1734 mutex_unlock(&iommu_probe_device_lock); 1735 if (ret == -ENODEV) 1736 ret = 0; 1737 1738 return ret; 1739 } 1740 1741 static int iommu_bus_notifier(struct notifier_block *nb, 1742 unsigned long action, void *data) 1743 { 1744 struct device *dev = data; 1745 1746 if (action == BUS_NOTIFY_ADD_DEVICE) { 1747 int ret; 1748 1749 ret = iommu_probe_device(dev); 1750 return (ret) ? NOTIFY_DONE : NOTIFY_OK; 1751 } else if (action == BUS_NOTIFY_REMOVED_DEVICE) { 1752 iommu_release_device(dev); 1753 return NOTIFY_OK; 1754 } 1755 1756 return 0; 1757 } 1758 1759 /* 1760 * Combine the driver's chosen def_domain_type across all the devices in a 1761 * group. Drivers must give a consistent result. 1762 */ 1763 static int iommu_get_def_domain_type(struct iommu_group *group, 1764 struct device *dev, int cur_type) 1765 { 1766 const struct iommu_ops *ops = dev_iommu_ops(dev); 1767 int type; 1768 1769 if (ops->default_domain) { 1770 /* 1771 * Drivers that declare a global static default_domain will 1772 * always choose that. 1773 */ 1774 type = ops->default_domain->type; 1775 } else { 1776 if (ops->def_domain_type) 1777 type = ops->def_domain_type(dev); 1778 else 1779 return cur_type; 1780 } 1781 if (!type || cur_type == type) 1782 return cur_type; 1783 if (!cur_type) 1784 return type; 1785 1786 dev_err_ratelimited( 1787 dev, 1788 "IOMMU driver error, requesting conflicting def_domain_type, %s and %s, for devices in group %u.\n", 1789 iommu_domain_type_str(cur_type), iommu_domain_type_str(type), 1790 group->id); 1791 1792 /* 1793 * Try to recover, drivers are allowed to force IDENTITY or DMA, IDENTITY 1794 * takes precedence. 1795 */ 1796 if (type == IOMMU_DOMAIN_IDENTITY) 1797 return type; 1798 return cur_type; 1799 } 1800 1801 /* 1802 * A target_type of 0 will select the best domain type. 0 can be returned in 1803 * this case meaning the global default should be used. 1804 */ 1805 static int iommu_get_default_domain_type(struct iommu_group *group, 1806 int target_type) 1807 { 1808 struct device *untrusted = NULL; 1809 struct group_device *gdev; 1810 int driver_type = 0; 1811 1812 lockdep_assert_held(&group->mutex); 1813 1814 /* 1815 * ARM32 drivers supporting CONFIG_ARM_DMA_USE_IOMMU can declare an 1816 * identity_domain and it will automatically become their default 1817 * domain. Later on ARM_DMA_USE_IOMMU will install its UNMANAGED domain. 1818 * Override the selection to IDENTITY. 1819 */ 1820 if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) { 1821 static_assert(!(IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) && 1822 IS_ENABLED(CONFIG_IOMMU_DMA))); 1823 driver_type = IOMMU_DOMAIN_IDENTITY; 1824 } 1825 1826 for_each_group_device(group, gdev) { 1827 driver_type = iommu_get_def_domain_type(group, gdev->dev, 1828 driver_type); 1829 1830 if (dev_is_pci(gdev->dev) && to_pci_dev(gdev->dev)->untrusted) { 1831 /* 1832 * No ARM32 using systems will set untrusted, it cannot 1833 * work. 1834 */ 1835 if (WARN_ON(IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU))) 1836 return -1; 1837 untrusted = gdev->dev; 1838 } 1839 } 1840 1841 /* 1842 * If the common dma ops are not selected in kconfig then we cannot use 1843 * IOMMU_DOMAIN_DMA at all. Force IDENTITY if nothing else has been 1844 * selected. 1845 */ 1846 if (!IS_ENABLED(CONFIG_IOMMU_DMA)) { 1847 if (WARN_ON(driver_type == IOMMU_DOMAIN_DMA)) 1848 return -1; 1849 if (!driver_type) 1850 driver_type = IOMMU_DOMAIN_IDENTITY; 1851 } 1852 1853 if (untrusted) { 1854 if (driver_type && driver_type != IOMMU_DOMAIN_DMA) { 1855 dev_err_ratelimited( 1856 untrusted, 1857 "Device is not trusted, but driver is overriding group %u to %s, refusing to probe.\n", 1858 group->id, iommu_domain_type_str(driver_type)); 1859 return -1; 1860 } 1861 driver_type = IOMMU_DOMAIN_DMA; 1862 } 1863 1864 if (target_type) { 1865 if (driver_type && target_type != driver_type) 1866 return -1; 1867 return target_type; 1868 } 1869 return driver_type; 1870 } 1871 1872 static void iommu_group_do_probe_finalize(struct device *dev) 1873 { 1874 const struct iommu_ops *ops = dev_iommu_ops(dev); 1875 1876 if (ops->probe_finalize) 1877 ops->probe_finalize(dev); 1878 } 1879 1880 static int bus_iommu_probe(const struct bus_type *bus) 1881 { 1882 struct iommu_group *group, *next; 1883 LIST_HEAD(group_list); 1884 int ret; 1885 1886 ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group); 1887 if (ret) 1888 return ret; 1889 1890 list_for_each_entry_safe(group, next, &group_list, entry) { 1891 struct group_device *gdev; 1892 1893 mutex_lock(&group->mutex); 1894 1895 /* Remove item from the list */ 1896 list_del_init(&group->entry); 1897 1898 /* 1899 * We go to the trouble of deferred default domain creation so 1900 * that the cross-group default domain type and the setup of the 1901 * IOMMU_RESV_DIRECT will work correctly in non-hotpug scenarios. 1902 */ 1903 ret = iommu_setup_default_domain(group, 0); 1904 if (ret) { 1905 mutex_unlock(&group->mutex); 1906 return ret; 1907 } 1908 for_each_group_device(group, gdev) 1909 iommu_setup_dma_ops(gdev->dev); 1910 mutex_unlock(&group->mutex); 1911 1912 /* 1913 * FIXME: Mis-locked because the ops->probe_finalize() call-back 1914 * of some IOMMU drivers calls arm_iommu_attach_device() which 1915 * in-turn might call back into IOMMU core code, where it tries 1916 * to take group->mutex, resulting in a deadlock. 1917 */ 1918 for_each_group_device(group, gdev) 1919 iommu_group_do_probe_finalize(gdev->dev); 1920 } 1921 1922 return 0; 1923 } 1924 1925 /** 1926 * device_iommu_capable() - check for a general IOMMU capability 1927 * @dev: device to which the capability would be relevant, if available 1928 * @cap: IOMMU capability 1929 * 1930 * Return: true if an IOMMU is present and supports the given capability 1931 * for the given device, otherwise false. 1932 */ 1933 bool device_iommu_capable(struct device *dev, enum iommu_cap cap) 1934 { 1935 const struct iommu_ops *ops; 1936 1937 if (!dev_has_iommu(dev)) 1938 return false; 1939 1940 ops = dev_iommu_ops(dev); 1941 if (!ops->capable) 1942 return false; 1943 1944 return ops->capable(dev, cap); 1945 } 1946 EXPORT_SYMBOL_GPL(device_iommu_capable); 1947 1948 /** 1949 * iommu_group_has_isolated_msi() - Compute msi_device_has_isolated_msi() 1950 * for a group 1951 * @group: Group to query 1952 * 1953 * IOMMU groups should not have differing values of 1954 * msi_device_has_isolated_msi() for devices in a group. However nothing 1955 * directly prevents this, so ensure mistakes don't result in isolation failures 1956 * by checking that all the devices are the same. 1957 */ 1958 bool iommu_group_has_isolated_msi(struct iommu_group *group) 1959 { 1960 struct group_device *group_dev; 1961 bool ret = true; 1962 1963 mutex_lock(&group->mutex); 1964 for_each_group_device(group, group_dev) 1965 ret &= msi_device_has_isolated_msi(group_dev->dev); 1966 mutex_unlock(&group->mutex); 1967 return ret; 1968 } 1969 EXPORT_SYMBOL_GPL(iommu_group_has_isolated_msi); 1970 1971 /** 1972 * iommu_set_fault_handler() - set a fault handler for an iommu domain 1973 * @domain: iommu domain 1974 * @handler: fault handler 1975 * @token: user data, will be passed back to the fault handler 1976 * 1977 * This function should be used by IOMMU users which want to be notified 1978 * whenever an IOMMU fault happens. 1979 * 1980 * The fault handler itself should return 0 on success, and an appropriate 1981 * error code otherwise. 1982 */ 1983 void iommu_set_fault_handler(struct iommu_domain *domain, 1984 iommu_fault_handler_t handler, 1985 void *token) 1986 { 1987 if (WARN_ON(!domain || domain->cookie_type != IOMMU_COOKIE_NONE)) 1988 return; 1989 1990 domain->cookie_type = IOMMU_COOKIE_FAULT_HANDLER; 1991 domain->handler = handler; 1992 domain->handler_token = token; 1993 } 1994 EXPORT_SYMBOL_GPL(iommu_set_fault_handler); 1995 1996 static void iommu_domain_init(struct iommu_domain *domain, unsigned int type, 1997 const struct iommu_ops *ops) 1998 { 1999 domain->type = type; 2000 domain->owner = ops; 2001 if (!domain->ops) 2002 domain->ops = ops->default_domain_ops; 2003 2004 /* 2005 * If not already set, assume all sizes by default; the driver 2006 * may override this later 2007 */ 2008 if (!domain->pgsize_bitmap) 2009 domain->pgsize_bitmap = ops->pgsize_bitmap; 2010 } 2011 2012 static struct iommu_domain * 2013 __iommu_paging_domain_alloc_flags(struct device *dev, unsigned int type, 2014 unsigned int flags) 2015 { 2016 const struct iommu_ops *ops; 2017 struct iommu_domain *domain; 2018 2019 if (!dev_has_iommu(dev)) 2020 return ERR_PTR(-ENODEV); 2021 2022 ops = dev_iommu_ops(dev); 2023 2024 if (ops->domain_alloc_paging && !flags) 2025 domain = ops->domain_alloc_paging(dev); 2026 else if (ops->domain_alloc_paging_flags) 2027 domain = ops->domain_alloc_paging_flags(dev, flags, NULL); 2028 else if (ops->domain_alloc && !flags) 2029 domain = ops->domain_alloc(IOMMU_DOMAIN_UNMANAGED); 2030 else 2031 return ERR_PTR(-EOPNOTSUPP); 2032 2033 if (IS_ERR(domain)) 2034 return domain; 2035 if (!domain) 2036 return ERR_PTR(-ENOMEM); 2037 2038 iommu_domain_init(domain, type, ops); 2039 return domain; 2040 } 2041 2042 /** 2043 * iommu_paging_domain_alloc_flags() - Allocate a paging domain 2044 * @dev: device for which the domain is allocated 2045 * @flags: Bitmap of iommufd_hwpt_alloc_flags 2046 * 2047 * Allocate a paging domain which will be managed by a kernel driver. Return 2048 * allocated domain if successful, or an ERR pointer for failure. 2049 */ 2050 struct iommu_domain *iommu_paging_domain_alloc_flags(struct device *dev, 2051 unsigned int flags) 2052 { 2053 return __iommu_paging_domain_alloc_flags(dev, 2054 IOMMU_DOMAIN_UNMANAGED, flags); 2055 } 2056 EXPORT_SYMBOL_GPL(iommu_paging_domain_alloc_flags); 2057 2058 void iommu_domain_free(struct iommu_domain *domain) 2059 { 2060 switch (domain->cookie_type) { 2061 case IOMMU_COOKIE_DMA_IOVA: 2062 iommu_put_dma_cookie(domain); 2063 break; 2064 case IOMMU_COOKIE_DMA_MSI: 2065 iommu_put_msi_cookie(domain); 2066 break; 2067 case IOMMU_COOKIE_SVA: 2068 mmdrop(domain->mm); 2069 break; 2070 default: 2071 break; 2072 } 2073 if (domain->ops->free) 2074 domain->ops->free(domain); 2075 } 2076 EXPORT_SYMBOL_GPL(iommu_domain_free); 2077 2078 /* 2079 * Put the group's domain back to the appropriate core-owned domain - either the 2080 * standard kernel-mode DMA configuration or an all-DMA-blocked domain. 2081 */ 2082 static void __iommu_group_set_core_domain(struct iommu_group *group) 2083 { 2084 struct iommu_domain *new_domain; 2085 2086 if (group->owner) 2087 new_domain = group->blocking_domain; 2088 else 2089 new_domain = group->default_domain; 2090 2091 __iommu_group_set_domain_nofail(group, new_domain); 2092 } 2093 2094 static int __iommu_attach_device(struct iommu_domain *domain, 2095 struct device *dev) 2096 { 2097 int ret; 2098 2099 if (unlikely(domain->ops->attach_dev == NULL)) 2100 return -ENODEV; 2101 2102 ret = domain->ops->attach_dev(domain, dev); 2103 if (ret) 2104 return ret; 2105 dev->iommu->attach_deferred = 0; 2106 trace_attach_device_to_domain(dev); 2107 return 0; 2108 } 2109 2110 /** 2111 * iommu_attach_device - Attach an IOMMU domain to a device 2112 * @domain: IOMMU domain to attach 2113 * @dev: Device that will be attached 2114 * 2115 * Returns 0 on success and error code on failure 2116 * 2117 * Note that EINVAL can be treated as a soft failure, indicating 2118 * that certain configuration of the domain is incompatible with 2119 * the device. In this case attaching a different domain to the 2120 * device may succeed. 2121 */ 2122 int iommu_attach_device(struct iommu_domain *domain, struct device *dev) 2123 { 2124 /* Caller must be a probed driver on dev */ 2125 struct iommu_group *group = dev->iommu_group; 2126 int ret; 2127 2128 if (!group) 2129 return -ENODEV; 2130 2131 /* 2132 * Lock the group to make sure the device-count doesn't 2133 * change while we are attaching 2134 */ 2135 mutex_lock(&group->mutex); 2136 ret = -EINVAL; 2137 if (list_count_nodes(&group->devices) != 1) 2138 goto out_unlock; 2139 2140 ret = __iommu_attach_group(domain, group); 2141 2142 out_unlock: 2143 mutex_unlock(&group->mutex); 2144 return ret; 2145 } 2146 EXPORT_SYMBOL_GPL(iommu_attach_device); 2147 2148 int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain) 2149 { 2150 if (dev->iommu && dev->iommu->attach_deferred) 2151 return __iommu_attach_device(domain, dev); 2152 2153 return 0; 2154 } 2155 2156 void iommu_detach_device(struct iommu_domain *domain, struct device *dev) 2157 { 2158 /* Caller must be a probed driver on dev */ 2159 struct iommu_group *group = dev->iommu_group; 2160 2161 if (!group) 2162 return; 2163 2164 mutex_lock(&group->mutex); 2165 if (WARN_ON(domain != group->domain) || 2166 WARN_ON(list_count_nodes(&group->devices) != 1)) 2167 goto out_unlock; 2168 __iommu_group_set_core_domain(group); 2169 2170 out_unlock: 2171 mutex_unlock(&group->mutex); 2172 } 2173 EXPORT_SYMBOL_GPL(iommu_detach_device); 2174 2175 struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) 2176 { 2177 /* Caller must be a probed driver on dev */ 2178 struct iommu_group *group = dev->iommu_group; 2179 2180 if (!group) 2181 return NULL; 2182 2183 return group->domain; 2184 } 2185 EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev); 2186 2187 /* 2188 * For IOMMU_DOMAIN_DMA implementations which already provide their own 2189 * guarantees that the group and its default domain are valid and correct. 2190 */ 2191 struct iommu_domain *iommu_get_dma_domain(struct device *dev) 2192 { 2193 return dev->iommu_group->default_domain; 2194 } 2195 2196 static void *iommu_make_pasid_array_entry(struct iommu_domain *domain, 2197 struct iommu_attach_handle *handle) 2198 { 2199 if (handle) { 2200 handle->domain = domain; 2201 return xa_tag_pointer(handle, IOMMU_PASID_ARRAY_HANDLE); 2202 } 2203 2204 return xa_tag_pointer(domain, IOMMU_PASID_ARRAY_DOMAIN); 2205 } 2206 2207 static int __iommu_attach_group(struct iommu_domain *domain, 2208 struct iommu_group *group) 2209 { 2210 struct device *dev; 2211 2212 if (group->domain && group->domain != group->default_domain && 2213 group->domain != group->blocking_domain) 2214 return -EBUSY; 2215 2216 dev = iommu_group_first_dev(group); 2217 if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain->owner) 2218 return -EINVAL; 2219 2220 return __iommu_group_set_domain(group, domain); 2221 } 2222 2223 /** 2224 * iommu_attach_group - Attach an IOMMU domain to an IOMMU group 2225 * @domain: IOMMU domain to attach 2226 * @group: IOMMU group that will be attached 2227 * 2228 * Returns 0 on success and error code on failure 2229 * 2230 * Note that EINVAL can be treated as a soft failure, indicating 2231 * that certain configuration of the domain is incompatible with 2232 * the group. In this case attaching a different domain to the 2233 * group may succeed. 2234 */ 2235 int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) 2236 { 2237 int ret; 2238 2239 mutex_lock(&group->mutex); 2240 ret = __iommu_attach_group(domain, group); 2241 mutex_unlock(&group->mutex); 2242 2243 return ret; 2244 } 2245 EXPORT_SYMBOL_GPL(iommu_attach_group); 2246 2247 static int __iommu_device_set_domain(struct iommu_group *group, 2248 struct device *dev, 2249 struct iommu_domain *new_domain, 2250 unsigned int flags) 2251 { 2252 int ret; 2253 2254 /* 2255 * If the device requires IOMMU_RESV_DIRECT then we cannot allow 2256 * the blocking domain to be attached as it does not contain the 2257 * required 1:1 mapping. This test effectively excludes the device 2258 * being used with iommu_group_claim_dma_owner() which will block 2259 * vfio and iommufd as well. 2260 */ 2261 if (dev->iommu->require_direct && 2262 (new_domain->type == IOMMU_DOMAIN_BLOCKED || 2263 new_domain == group->blocking_domain)) { 2264 dev_warn(dev, 2265 "Firmware has requested this device have a 1:1 IOMMU mapping, rejecting configuring the device without a 1:1 mapping. Contact your platform vendor.\n"); 2266 return -EINVAL; 2267 } 2268 2269 if (dev->iommu->attach_deferred) { 2270 if (new_domain == group->default_domain) 2271 return 0; 2272 dev->iommu->attach_deferred = 0; 2273 } 2274 2275 ret = __iommu_attach_device(new_domain, dev); 2276 if (ret) { 2277 /* 2278 * If we have a blocking domain then try to attach that in hopes 2279 * of avoiding a UAF. Modern drivers should implement blocking 2280 * domains as global statics that cannot fail. 2281 */ 2282 if ((flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) && 2283 group->blocking_domain && 2284 group->blocking_domain != new_domain) 2285 __iommu_attach_device(group->blocking_domain, dev); 2286 return ret; 2287 } 2288 return 0; 2289 } 2290 2291 /* 2292 * If 0 is returned the group's domain is new_domain. If an error is returned 2293 * then the group's domain will be set back to the existing domain unless 2294 * IOMMU_SET_DOMAIN_MUST_SUCCEED, otherwise an error is returned and the group's 2295 * domains is left inconsistent. This is a driver bug to fail attach with a 2296 * previously good domain. We try to avoid a kernel UAF because of this. 2297 * 2298 * IOMMU groups are really the natural working unit of the IOMMU, but the IOMMU 2299 * API works on domains and devices. Bridge that gap by iterating over the 2300 * devices in a group. Ideally we'd have a single device which represents the 2301 * requestor ID of the group, but we also allow IOMMU drivers to create policy 2302 * defined minimum sets, where the physical hardware may be able to distiguish 2303 * members, but we wish to group them at a higher level (ex. untrusted 2304 * multi-function PCI devices). Thus we attach each device. 2305 */ 2306 static int __iommu_group_set_domain_internal(struct iommu_group *group, 2307 struct iommu_domain *new_domain, 2308 unsigned int flags) 2309 { 2310 struct group_device *last_gdev; 2311 struct group_device *gdev; 2312 int result; 2313 int ret; 2314 2315 lockdep_assert_held(&group->mutex); 2316 2317 if (group->domain == new_domain) 2318 return 0; 2319 2320 if (WARN_ON(!new_domain)) 2321 return -EINVAL; 2322 2323 /* 2324 * Changing the domain is done by calling attach_dev() on the new 2325 * domain. This switch does not have to be atomic and DMA can be 2326 * discarded during the transition. DMA must only be able to access 2327 * either new_domain or group->domain, never something else. 2328 */ 2329 result = 0; 2330 for_each_group_device(group, gdev) { 2331 ret = __iommu_device_set_domain(group, gdev->dev, new_domain, 2332 flags); 2333 if (ret) { 2334 result = ret; 2335 /* 2336 * Keep trying the other devices in the group. If a 2337 * driver fails attach to an otherwise good domain, and 2338 * does not support blocking domains, it should at least 2339 * drop its reference on the current domain so we don't 2340 * UAF. 2341 */ 2342 if (flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) 2343 continue; 2344 goto err_revert; 2345 } 2346 } 2347 group->domain = new_domain; 2348 return result; 2349 2350 err_revert: 2351 /* 2352 * This is called in error unwind paths. A well behaved driver should 2353 * always allow us to attach to a domain that was already attached. 2354 */ 2355 last_gdev = gdev; 2356 for_each_group_device(group, gdev) { 2357 /* 2358 * A NULL domain can happen only for first probe, in which case 2359 * we leave group->domain as NULL and let release clean 2360 * everything up. 2361 */ 2362 if (group->domain) 2363 WARN_ON(__iommu_device_set_domain( 2364 group, gdev->dev, group->domain, 2365 IOMMU_SET_DOMAIN_MUST_SUCCEED)); 2366 if (gdev == last_gdev) 2367 break; 2368 } 2369 return ret; 2370 } 2371 2372 void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group) 2373 { 2374 mutex_lock(&group->mutex); 2375 __iommu_group_set_core_domain(group); 2376 mutex_unlock(&group->mutex); 2377 } 2378 EXPORT_SYMBOL_GPL(iommu_detach_group); 2379 2380 phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) 2381 { 2382 if (domain->type == IOMMU_DOMAIN_IDENTITY) 2383 return iova; 2384 2385 if (domain->type == IOMMU_DOMAIN_BLOCKED) 2386 return 0; 2387 2388 return domain->ops->iova_to_phys(domain, iova); 2389 } 2390 EXPORT_SYMBOL_GPL(iommu_iova_to_phys); 2391 2392 static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, 2393 phys_addr_t paddr, size_t size, size_t *count) 2394 { 2395 unsigned int pgsize_idx, pgsize_idx_next; 2396 unsigned long pgsizes; 2397 size_t offset, pgsize, pgsize_next; 2398 unsigned long addr_merge = paddr | iova; 2399 2400 /* Page sizes supported by the hardware and small enough for @size */ 2401 pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0); 2402 2403 /* Constrain the page sizes further based on the maximum alignment */ 2404 if (likely(addr_merge)) 2405 pgsizes &= GENMASK(__ffs(addr_merge), 0); 2406 2407 /* Make sure we have at least one suitable page size */ 2408 BUG_ON(!pgsizes); 2409 2410 /* Pick the biggest page size remaining */ 2411 pgsize_idx = __fls(pgsizes); 2412 pgsize = BIT(pgsize_idx); 2413 if (!count) 2414 return pgsize; 2415 2416 /* Find the next biggest support page size, if it exists */ 2417 pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0); 2418 if (!pgsizes) 2419 goto out_set_count; 2420 2421 pgsize_idx_next = __ffs(pgsizes); 2422 pgsize_next = BIT(pgsize_idx_next); 2423 2424 /* 2425 * There's no point trying a bigger page size unless the virtual 2426 * and physical addresses are similarly offset within the larger page. 2427 */ 2428 if ((iova ^ paddr) & (pgsize_next - 1)) 2429 goto out_set_count; 2430 2431 /* Calculate the offset to the next page size alignment boundary */ 2432 offset = pgsize_next - (addr_merge & (pgsize_next - 1)); 2433 2434 /* 2435 * If size is big enough to accommodate the larger page, reduce 2436 * the number of smaller pages. 2437 */ 2438 if (offset + pgsize_next <= size) 2439 size = offset; 2440 2441 out_set_count: 2442 *count = size >> pgsize_idx; 2443 return pgsize; 2444 } 2445 2446 static int __iommu_map(struct iommu_domain *domain, unsigned long iova, 2447 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 2448 { 2449 const struct iommu_domain_ops *ops = domain->ops; 2450 unsigned long orig_iova = iova; 2451 unsigned int min_pagesz; 2452 size_t orig_size = size; 2453 phys_addr_t orig_paddr = paddr; 2454 int ret = 0; 2455 2456 if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) 2457 return -EINVAL; 2458 2459 if (WARN_ON(!ops->map_pages || domain->pgsize_bitmap == 0UL)) 2460 return -ENODEV; 2461 2462 /* find out the minimum page size supported */ 2463 min_pagesz = 1 << __ffs(domain->pgsize_bitmap); 2464 2465 /* 2466 * both the virtual address and the physical one, as well as 2467 * the size of the mapping, must be aligned (at least) to the 2468 * size of the smallest page supported by the hardware 2469 */ 2470 if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) { 2471 pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n", 2472 iova, &paddr, size, min_pagesz); 2473 return -EINVAL; 2474 } 2475 2476 pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size); 2477 2478 while (size) { 2479 size_t pgsize, count, mapped = 0; 2480 2481 pgsize = iommu_pgsize(domain, iova, paddr, size, &count); 2482 2483 pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n", 2484 iova, &paddr, pgsize, count); 2485 ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot, 2486 gfp, &mapped); 2487 /* 2488 * Some pages may have been mapped, even if an error occurred, 2489 * so we should account for those so they can be unmapped. 2490 */ 2491 size -= mapped; 2492 2493 if (ret) 2494 break; 2495 2496 iova += mapped; 2497 paddr += mapped; 2498 } 2499 2500 /* unroll mapping in case something went wrong */ 2501 if (ret) 2502 iommu_unmap(domain, orig_iova, orig_size - size); 2503 else 2504 trace_map(orig_iova, orig_paddr, orig_size); 2505 2506 return ret; 2507 } 2508 2509 int iommu_map(struct iommu_domain *domain, unsigned long iova, 2510 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 2511 { 2512 const struct iommu_domain_ops *ops = domain->ops; 2513 int ret; 2514 2515 might_sleep_if(gfpflags_allow_blocking(gfp)); 2516 2517 /* Discourage passing strange GFP flags */ 2518 if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | 2519 __GFP_HIGHMEM))) 2520 return -EINVAL; 2521 2522 ret = __iommu_map(domain, iova, paddr, size, prot, gfp); 2523 if (ret == 0 && ops->iotlb_sync_map) { 2524 ret = ops->iotlb_sync_map(domain, iova, size); 2525 if (ret) 2526 goto out_err; 2527 } 2528 2529 return ret; 2530 2531 out_err: 2532 /* undo mappings already done */ 2533 iommu_unmap(domain, iova, size); 2534 2535 return ret; 2536 } 2537 EXPORT_SYMBOL_GPL(iommu_map); 2538 2539 static size_t __iommu_unmap(struct iommu_domain *domain, 2540 unsigned long iova, size_t size, 2541 struct iommu_iotlb_gather *iotlb_gather) 2542 { 2543 const struct iommu_domain_ops *ops = domain->ops; 2544 size_t unmapped_page, unmapped = 0; 2545 unsigned long orig_iova = iova; 2546 unsigned int min_pagesz; 2547 2548 if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) 2549 return 0; 2550 2551 if (WARN_ON(!ops->unmap_pages || domain->pgsize_bitmap == 0UL)) 2552 return 0; 2553 2554 /* find out the minimum page size supported */ 2555 min_pagesz = 1 << __ffs(domain->pgsize_bitmap); 2556 2557 /* 2558 * The virtual address, as well as the size of the mapping, must be 2559 * aligned (at least) to the size of the smallest page supported 2560 * by the hardware 2561 */ 2562 if (!IS_ALIGNED(iova | size, min_pagesz)) { 2563 pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n", 2564 iova, size, min_pagesz); 2565 return 0; 2566 } 2567 2568 pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size); 2569 2570 /* 2571 * Keep iterating until we either unmap 'size' bytes (or more) 2572 * or we hit an area that isn't mapped. 2573 */ 2574 while (unmapped < size) { 2575 size_t pgsize, count; 2576 2577 pgsize = iommu_pgsize(domain, iova, iova, size - unmapped, &count); 2578 unmapped_page = ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather); 2579 if (!unmapped_page) 2580 break; 2581 2582 pr_debug("unmapped: iova 0x%lx size 0x%zx\n", 2583 iova, unmapped_page); 2584 2585 iova += unmapped_page; 2586 unmapped += unmapped_page; 2587 } 2588 2589 trace_unmap(orig_iova, size, unmapped); 2590 return unmapped; 2591 } 2592 2593 /** 2594 * iommu_unmap() - Remove mappings from a range of IOVA 2595 * @domain: Domain to manipulate 2596 * @iova: IO virtual address to start 2597 * @size: Length of the range starting from @iova 2598 * 2599 * iommu_unmap() will remove a translation created by iommu_map(). It cannot 2600 * subdivide a mapping created by iommu_map(), so it should be called with IOVA 2601 * ranges that match what was passed to iommu_map(). The range can aggregate 2602 * contiguous iommu_map() calls so long as no individual range is split. 2603 * 2604 * Returns: Number of bytes of IOVA unmapped. iova + res will be the point 2605 * unmapping stopped. 2606 */ 2607 size_t iommu_unmap(struct iommu_domain *domain, 2608 unsigned long iova, size_t size) 2609 { 2610 struct iommu_iotlb_gather iotlb_gather; 2611 size_t ret; 2612 2613 iommu_iotlb_gather_init(&iotlb_gather); 2614 ret = __iommu_unmap(domain, iova, size, &iotlb_gather); 2615 iommu_iotlb_sync(domain, &iotlb_gather); 2616 2617 return ret; 2618 } 2619 EXPORT_SYMBOL_GPL(iommu_unmap); 2620 2621 size_t iommu_unmap_fast(struct iommu_domain *domain, 2622 unsigned long iova, size_t size, 2623 struct iommu_iotlb_gather *iotlb_gather) 2624 { 2625 return __iommu_unmap(domain, iova, size, iotlb_gather); 2626 } 2627 EXPORT_SYMBOL_GPL(iommu_unmap_fast); 2628 2629 ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, 2630 struct scatterlist *sg, unsigned int nents, int prot, 2631 gfp_t gfp) 2632 { 2633 const struct iommu_domain_ops *ops = domain->ops; 2634 size_t len = 0, mapped = 0; 2635 phys_addr_t start; 2636 unsigned int i = 0; 2637 int ret; 2638 2639 might_sleep_if(gfpflags_allow_blocking(gfp)); 2640 2641 /* Discourage passing strange GFP flags */ 2642 if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | 2643 __GFP_HIGHMEM))) 2644 return -EINVAL; 2645 2646 while (i <= nents) { 2647 phys_addr_t s_phys = sg_phys(sg); 2648 2649 if (len && s_phys != start + len) { 2650 ret = __iommu_map(domain, iova + mapped, start, 2651 len, prot, gfp); 2652 2653 if (ret) 2654 goto out_err; 2655 2656 mapped += len; 2657 len = 0; 2658 } 2659 2660 if (sg_dma_is_bus_address(sg)) 2661 goto next; 2662 2663 if (len) { 2664 len += sg->length; 2665 } else { 2666 len = sg->length; 2667 start = s_phys; 2668 } 2669 2670 next: 2671 if (++i < nents) 2672 sg = sg_next(sg); 2673 } 2674 2675 if (ops->iotlb_sync_map) { 2676 ret = ops->iotlb_sync_map(domain, iova, mapped); 2677 if (ret) 2678 goto out_err; 2679 } 2680 return mapped; 2681 2682 out_err: 2683 /* undo mappings already done */ 2684 iommu_unmap(domain, iova, mapped); 2685 2686 return ret; 2687 } 2688 EXPORT_SYMBOL_GPL(iommu_map_sg); 2689 2690 /** 2691 * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework 2692 * @domain: the iommu domain where the fault has happened 2693 * @dev: the device where the fault has happened 2694 * @iova: the faulting address 2695 * @flags: mmu fault flags (e.g. IOMMU_FAULT_READ/IOMMU_FAULT_WRITE/...) 2696 * 2697 * This function should be called by the low-level IOMMU implementations 2698 * whenever IOMMU faults happen, to allow high-level users, that are 2699 * interested in such events, to know about them. 2700 * 2701 * This event may be useful for several possible use cases: 2702 * - mere logging of the event 2703 * - dynamic TLB/PTE loading 2704 * - if restarting of the faulting device is required 2705 * 2706 * Returns 0 on success and an appropriate error code otherwise (if dynamic 2707 * PTE/TLB loading will one day be supported, implementations will be able 2708 * to tell whether it succeeded or not according to this return value). 2709 * 2710 * Specifically, -ENOSYS is returned if a fault handler isn't installed 2711 * (though fault handlers can also return -ENOSYS, in case they want to 2712 * elicit the default behavior of the IOMMU drivers). 2713 */ 2714 int report_iommu_fault(struct iommu_domain *domain, struct device *dev, 2715 unsigned long iova, int flags) 2716 { 2717 int ret = -ENOSYS; 2718 2719 /* 2720 * if upper layers showed interest and installed a fault handler, 2721 * invoke it. 2722 */ 2723 if (domain->cookie_type == IOMMU_COOKIE_FAULT_HANDLER && 2724 domain->handler) 2725 ret = domain->handler(domain, dev, iova, flags, 2726 domain->handler_token); 2727 2728 trace_io_page_fault(dev, iova, flags); 2729 return ret; 2730 } 2731 EXPORT_SYMBOL_GPL(report_iommu_fault); 2732 2733 static int __init iommu_init(void) 2734 { 2735 iommu_group_kset = kset_create_and_add("iommu_groups", 2736 NULL, kernel_kobj); 2737 BUG_ON(!iommu_group_kset); 2738 2739 iommu_debugfs_setup(); 2740 2741 return 0; 2742 } 2743 core_initcall(iommu_init); 2744 2745 int iommu_set_pgtable_quirks(struct iommu_domain *domain, 2746 unsigned long quirk) 2747 { 2748 if (domain->type != IOMMU_DOMAIN_UNMANAGED) 2749 return -EINVAL; 2750 if (!domain->ops->set_pgtable_quirks) 2751 return -EINVAL; 2752 return domain->ops->set_pgtable_quirks(domain, quirk); 2753 } 2754 EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks); 2755 2756 /** 2757 * iommu_get_resv_regions - get reserved regions 2758 * @dev: device for which to get reserved regions 2759 * @list: reserved region list for device 2760 * 2761 * This returns a list of reserved IOVA regions specific to this device. 2762 * A domain user should not map IOVA in these ranges. 2763 */ 2764 void iommu_get_resv_regions(struct device *dev, struct list_head *list) 2765 { 2766 const struct iommu_ops *ops = dev_iommu_ops(dev); 2767 2768 if (ops->get_resv_regions) 2769 ops->get_resv_regions(dev, list); 2770 } 2771 EXPORT_SYMBOL_GPL(iommu_get_resv_regions); 2772 2773 /** 2774 * iommu_put_resv_regions - release reserved regions 2775 * @dev: device for which to free reserved regions 2776 * @list: reserved region list for device 2777 * 2778 * This releases a reserved region list acquired by iommu_get_resv_regions(). 2779 */ 2780 void iommu_put_resv_regions(struct device *dev, struct list_head *list) 2781 { 2782 struct iommu_resv_region *entry, *next; 2783 2784 list_for_each_entry_safe(entry, next, list, list) { 2785 if (entry->free) 2786 entry->free(dev, entry); 2787 else 2788 kfree(entry); 2789 } 2790 } 2791 EXPORT_SYMBOL(iommu_put_resv_regions); 2792 2793 struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start, 2794 size_t length, int prot, 2795 enum iommu_resv_type type, 2796 gfp_t gfp) 2797 { 2798 struct iommu_resv_region *region; 2799 2800 region = kzalloc(sizeof(*region), gfp); 2801 if (!region) 2802 return NULL; 2803 2804 INIT_LIST_HEAD(®ion->list); 2805 region->start = start; 2806 region->length = length; 2807 region->prot = prot; 2808 region->type = type; 2809 return region; 2810 } 2811 EXPORT_SYMBOL_GPL(iommu_alloc_resv_region); 2812 2813 void iommu_set_default_passthrough(bool cmd_line) 2814 { 2815 if (cmd_line) 2816 iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; 2817 iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY; 2818 } 2819 2820 void iommu_set_default_translated(bool cmd_line) 2821 { 2822 if (cmd_line) 2823 iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; 2824 iommu_def_domain_type = IOMMU_DOMAIN_DMA; 2825 } 2826 2827 bool iommu_default_passthrough(void) 2828 { 2829 return iommu_def_domain_type == IOMMU_DOMAIN_IDENTITY; 2830 } 2831 EXPORT_SYMBOL_GPL(iommu_default_passthrough); 2832 2833 const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode) 2834 { 2835 const struct iommu_ops *ops = NULL; 2836 struct iommu_device *iommu; 2837 2838 spin_lock(&iommu_device_lock); 2839 list_for_each_entry(iommu, &iommu_device_list, list) 2840 if (iommu->fwnode == fwnode) { 2841 ops = iommu->ops; 2842 break; 2843 } 2844 spin_unlock(&iommu_device_lock); 2845 return ops; 2846 } 2847 2848 int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode) 2849 { 2850 const struct iommu_ops *ops = iommu_ops_from_fwnode(iommu_fwnode); 2851 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2852 2853 if (!ops) 2854 return driver_deferred_probe_check_state(dev); 2855 2856 if (fwspec) 2857 return ops == iommu_fwspec_ops(fwspec) ? 0 : -EINVAL; 2858 2859 if (!dev_iommu_get(dev)) 2860 return -ENOMEM; 2861 2862 /* Preallocate for the overwhelmingly common case of 1 ID */ 2863 fwspec = kzalloc(struct_size(fwspec, ids, 1), GFP_KERNEL); 2864 if (!fwspec) 2865 return -ENOMEM; 2866 2867 fwnode_handle_get(iommu_fwnode); 2868 fwspec->iommu_fwnode = iommu_fwnode; 2869 dev_iommu_fwspec_set(dev, fwspec); 2870 return 0; 2871 } 2872 EXPORT_SYMBOL_GPL(iommu_fwspec_init); 2873 2874 void iommu_fwspec_free(struct device *dev) 2875 { 2876 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2877 2878 if (fwspec) { 2879 fwnode_handle_put(fwspec->iommu_fwnode); 2880 kfree(fwspec); 2881 dev_iommu_fwspec_set(dev, NULL); 2882 } 2883 } 2884 2885 int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids) 2886 { 2887 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2888 int i, new_num; 2889 2890 if (!fwspec) 2891 return -EINVAL; 2892 2893 new_num = fwspec->num_ids + num_ids; 2894 if (new_num > 1) { 2895 fwspec = krealloc(fwspec, struct_size(fwspec, ids, new_num), 2896 GFP_KERNEL); 2897 if (!fwspec) 2898 return -ENOMEM; 2899 2900 dev_iommu_fwspec_set(dev, fwspec); 2901 } 2902 2903 for (i = 0; i < num_ids; i++) 2904 fwspec->ids[fwspec->num_ids + i] = ids[i]; 2905 2906 fwspec->num_ids = new_num; 2907 return 0; 2908 } 2909 EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids); 2910 2911 /* 2912 * Per device IOMMU features. 2913 */ 2914 int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) 2915 { 2916 if (dev_has_iommu(dev)) { 2917 const struct iommu_ops *ops = dev_iommu_ops(dev); 2918 2919 if (ops->dev_enable_feat) 2920 return ops->dev_enable_feat(dev, feat); 2921 } 2922 2923 return -ENODEV; 2924 } 2925 EXPORT_SYMBOL_GPL(iommu_dev_enable_feature); 2926 2927 /* 2928 * The device drivers should do the necessary cleanups before calling this. 2929 */ 2930 int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) 2931 { 2932 if (dev_has_iommu(dev)) { 2933 const struct iommu_ops *ops = dev_iommu_ops(dev); 2934 2935 if (ops->dev_disable_feat) 2936 return ops->dev_disable_feat(dev, feat); 2937 } 2938 2939 return -EBUSY; 2940 } 2941 EXPORT_SYMBOL_GPL(iommu_dev_disable_feature); 2942 2943 /** 2944 * iommu_setup_default_domain - Set the default_domain for the group 2945 * @group: Group to change 2946 * @target_type: Domain type to set as the default_domain 2947 * 2948 * Allocate a default domain and set it as the current domain on the group. If 2949 * the group already has a default domain it will be changed to the target_type. 2950 * When target_type is 0 the default domain is selected based on driver and 2951 * system preferences. 2952 */ 2953 static int iommu_setup_default_domain(struct iommu_group *group, 2954 int target_type) 2955 { 2956 struct iommu_domain *old_dom = group->default_domain; 2957 struct group_device *gdev; 2958 struct iommu_domain *dom; 2959 bool direct_failed; 2960 int req_type; 2961 int ret; 2962 2963 lockdep_assert_held(&group->mutex); 2964 2965 req_type = iommu_get_default_domain_type(group, target_type); 2966 if (req_type < 0) 2967 return -EINVAL; 2968 2969 dom = iommu_group_alloc_default_domain(group, req_type); 2970 if (IS_ERR(dom)) 2971 return PTR_ERR(dom); 2972 2973 if (group->default_domain == dom) 2974 return 0; 2975 2976 if (iommu_is_dma_domain(dom)) { 2977 ret = iommu_get_dma_cookie(dom); 2978 if (ret) { 2979 iommu_domain_free(dom); 2980 return ret; 2981 } 2982 } 2983 2984 /* 2985 * IOMMU_RESV_DIRECT and IOMMU_RESV_DIRECT_RELAXABLE regions must be 2986 * mapped before their device is attached, in order to guarantee 2987 * continuity with any FW activity 2988 */ 2989 direct_failed = false; 2990 for_each_group_device(group, gdev) { 2991 if (iommu_create_device_direct_mappings(dom, gdev->dev)) { 2992 direct_failed = true; 2993 dev_warn_once( 2994 gdev->dev->iommu->iommu_dev->dev, 2995 "IOMMU driver was not able to establish FW requested direct mapping."); 2996 } 2997 } 2998 2999 /* We must set default_domain early for __iommu_device_set_domain */ 3000 group->default_domain = dom; 3001 if (!group->domain) { 3002 /* 3003 * Drivers are not allowed to fail the first domain attach. 3004 * The only way to recover from this is to fail attaching the 3005 * iommu driver and call ops->release_device. Put the domain 3006 * in group->default_domain so it is freed after. 3007 */ 3008 ret = __iommu_group_set_domain_internal( 3009 group, dom, IOMMU_SET_DOMAIN_MUST_SUCCEED); 3010 if (WARN_ON(ret)) 3011 goto out_free_old; 3012 } else { 3013 ret = __iommu_group_set_domain(group, dom); 3014 if (ret) 3015 goto err_restore_def_domain; 3016 } 3017 3018 /* 3019 * Drivers are supposed to allow mappings to be installed in a domain 3020 * before device attachment, but some don't. Hack around this defect by 3021 * trying again after attaching. If this happens it means the device 3022 * will not continuously have the IOMMU_RESV_DIRECT map. 3023 */ 3024 if (direct_failed) { 3025 for_each_group_device(group, gdev) { 3026 ret = iommu_create_device_direct_mappings(dom, gdev->dev); 3027 if (ret) 3028 goto err_restore_domain; 3029 } 3030 } 3031 3032 out_free_old: 3033 if (old_dom) 3034 iommu_domain_free(old_dom); 3035 return ret; 3036 3037 err_restore_domain: 3038 if (old_dom) 3039 __iommu_group_set_domain_internal( 3040 group, old_dom, IOMMU_SET_DOMAIN_MUST_SUCCEED); 3041 err_restore_def_domain: 3042 if (old_dom) { 3043 iommu_domain_free(dom); 3044 group->default_domain = old_dom; 3045 } 3046 return ret; 3047 } 3048 3049 /* 3050 * Changing the default domain through sysfs requires the users to unbind the 3051 * drivers from the devices in the iommu group, except for a DMA -> DMA-FQ 3052 * transition. Return failure if this isn't met. 3053 * 3054 * We need to consider the race between this and the device release path. 3055 * group->mutex is used here to guarantee that the device release path 3056 * will not be entered at the same time. 3057 */ 3058 static ssize_t iommu_group_store_type(struct iommu_group *group, 3059 const char *buf, size_t count) 3060 { 3061 struct group_device *gdev; 3062 int ret, req_type; 3063 3064 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) 3065 return -EACCES; 3066 3067 if (WARN_ON(!group) || !group->default_domain) 3068 return -EINVAL; 3069 3070 if (sysfs_streq(buf, "identity")) 3071 req_type = IOMMU_DOMAIN_IDENTITY; 3072 else if (sysfs_streq(buf, "DMA")) 3073 req_type = IOMMU_DOMAIN_DMA; 3074 else if (sysfs_streq(buf, "DMA-FQ")) 3075 req_type = IOMMU_DOMAIN_DMA_FQ; 3076 else if (sysfs_streq(buf, "auto")) 3077 req_type = 0; 3078 else 3079 return -EINVAL; 3080 3081 mutex_lock(&group->mutex); 3082 /* We can bring up a flush queue without tearing down the domain. */ 3083 if (req_type == IOMMU_DOMAIN_DMA_FQ && 3084 group->default_domain->type == IOMMU_DOMAIN_DMA) { 3085 ret = iommu_dma_init_fq(group->default_domain); 3086 if (ret) 3087 goto out_unlock; 3088 3089 group->default_domain->type = IOMMU_DOMAIN_DMA_FQ; 3090 ret = count; 3091 goto out_unlock; 3092 } 3093 3094 /* Otherwise, ensure that device exists and no driver is bound. */ 3095 if (list_empty(&group->devices) || group->owner_cnt) { 3096 ret = -EPERM; 3097 goto out_unlock; 3098 } 3099 3100 ret = iommu_setup_default_domain(group, req_type); 3101 if (ret) 3102 goto out_unlock; 3103 3104 /* Make sure dma_ops is appropriatley set */ 3105 for_each_group_device(group, gdev) 3106 iommu_setup_dma_ops(gdev->dev); 3107 3108 out_unlock: 3109 mutex_unlock(&group->mutex); 3110 return ret ?: count; 3111 } 3112 3113 /** 3114 * iommu_device_use_default_domain() - Device driver wants to handle device 3115 * DMA through the kernel DMA API. 3116 * @dev: The device. 3117 * 3118 * The device driver about to bind @dev wants to do DMA through the kernel 3119 * DMA API. Return 0 if it is allowed, otherwise an error. 3120 */ 3121 int iommu_device_use_default_domain(struct device *dev) 3122 { 3123 /* Caller is the driver core during the pre-probe path */ 3124 struct iommu_group *group = dev->iommu_group; 3125 int ret = 0; 3126 3127 if (!group) 3128 return 0; 3129 3130 mutex_lock(&group->mutex); 3131 /* We may race against bus_iommu_probe() finalising groups here */ 3132 if (!group->default_domain) { 3133 ret = -EPROBE_DEFER; 3134 goto unlock_out; 3135 } 3136 if (group->owner_cnt) { 3137 if (group->domain != group->default_domain || group->owner || 3138 !xa_empty(&group->pasid_array)) { 3139 ret = -EBUSY; 3140 goto unlock_out; 3141 } 3142 } 3143 3144 group->owner_cnt++; 3145 3146 unlock_out: 3147 mutex_unlock(&group->mutex); 3148 return ret; 3149 } 3150 3151 /** 3152 * iommu_device_unuse_default_domain() - Device driver stops handling device 3153 * DMA through the kernel DMA API. 3154 * @dev: The device. 3155 * 3156 * The device driver doesn't want to do DMA through kernel DMA API anymore. 3157 * It must be called after iommu_device_use_default_domain(). 3158 */ 3159 void iommu_device_unuse_default_domain(struct device *dev) 3160 { 3161 /* Caller is the driver core during the post-probe path */ 3162 struct iommu_group *group = dev->iommu_group; 3163 3164 if (!group) 3165 return; 3166 3167 mutex_lock(&group->mutex); 3168 if (!WARN_ON(!group->owner_cnt || !xa_empty(&group->pasid_array))) 3169 group->owner_cnt--; 3170 3171 mutex_unlock(&group->mutex); 3172 } 3173 3174 static int __iommu_group_alloc_blocking_domain(struct iommu_group *group) 3175 { 3176 struct device *dev = iommu_group_first_dev(group); 3177 const struct iommu_ops *ops = dev_iommu_ops(dev); 3178 struct iommu_domain *domain; 3179 3180 if (group->blocking_domain) 3181 return 0; 3182 3183 if (ops->blocked_domain) { 3184 group->blocking_domain = ops->blocked_domain; 3185 return 0; 3186 } 3187 3188 /* 3189 * For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED create an 3190 * empty PAGING domain instead. 3191 */ 3192 domain = iommu_paging_domain_alloc(dev); 3193 if (IS_ERR(domain)) 3194 return PTR_ERR(domain); 3195 group->blocking_domain = domain; 3196 return 0; 3197 } 3198 3199 static int __iommu_take_dma_ownership(struct iommu_group *group, void *owner) 3200 { 3201 int ret; 3202 3203 if ((group->domain && group->domain != group->default_domain) || 3204 !xa_empty(&group->pasid_array)) 3205 return -EBUSY; 3206 3207 ret = __iommu_group_alloc_blocking_domain(group); 3208 if (ret) 3209 return ret; 3210 ret = __iommu_group_set_domain(group, group->blocking_domain); 3211 if (ret) 3212 return ret; 3213 3214 group->owner = owner; 3215 group->owner_cnt++; 3216 return 0; 3217 } 3218 3219 /** 3220 * iommu_group_claim_dma_owner() - Set DMA ownership of a group 3221 * @group: The group. 3222 * @owner: Caller specified pointer. Used for exclusive ownership. 3223 * 3224 * This is to support backward compatibility for vfio which manages the dma 3225 * ownership in iommu_group level. New invocations on this interface should be 3226 * prohibited. Only a single owner may exist for a group. 3227 */ 3228 int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner) 3229 { 3230 int ret = 0; 3231 3232 if (WARN_ON(!owner)) 3233 return -EINVAL; 3234 3235 mutex_lock(&group->mutex); 3236 if (group->owner_cnt) { 3237 ret = -EPERM; 3238 goto unlock_out; 3239 } 3240 3241 ret = __iommu_take_dma_ownership(group, owner); 3242 unlock_out: 3243 mutex_unlock(&group->mutex); 3244 3245 return ret; 3246 } 3247 EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner); 3248 3249 /** 3250 * iommu_device_claim_dma_owner() - Set DMA ownership of a device 3251 * @dev: The device. 3252 * @owner: Caller specified pointer. Used for exclusive ownership. 3253 * 3254 * Claim the DMA ownership of a device. Multiple devices in the same group may 3255 * concurrently claim ownership if they present the same owner value. Returns 0 3256 * on success and error code on failure 3257 */ 3258 int iommu_device_claim_dma_owner(struct device *dev, void *owner) 3259 { 3260 /* Caller must be a probed driver on dev */ 3261 struct iommu_group *group = dev->iommu_group; 3262 int ret = 0; 3263 3264 if (WARN_ON(!owner)) 3265 return -EINVAL; 3266 3267 if (!group) 3268 return -ENODEV; 3269 3270 mutex_lock(&group->mutex); 3271 if (group->owner_cnt) { 3272 if (group->owner != owner) { 3273 ret = -EPERM; 3274 goto unlock_out; 3275 } 3276 group->owner_cnt++; 3277 goto unlock_out; 3278 } 3279 3280 ret = __iommu_take_dma_ownership(group, owner); 3281 unlock_out: 3282 mutex_unlock(&group->mutex); 3283 return ret; 3284 } 3285 EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner); 3286 3287 static void __iommu_release_dma_ownership(struct iommu_group *group) 3288 { 3289 if (WARN_ON(!group->owner_cnt || !group->owner || 3290 !xa_empty(&group->pasid_array))) 3291 return; 3292 3293 group->owner_cnt = 0; 3294 group->owner = NULL; 3295 __iommu_group_set_domain_nofail(group, group->default_domain); 3296 } 3297 3298 /** 3299 * iommu_group_release_dma_owner() - Release DMA ownership of a group 3300 * @group: The group 3301 * 3302 * Release the DMA ownership claimed by iommu_group_claim_dma_owner(). 3303 */ 3304 void iommu_group_release_dma_owner(struct iommu_group *group) 3305 { 3306 mutex_lock(&group->mutex); 3307 __iommu_release_dma_ownership(group); 3308 mutex_unlock(&group->mutex); 3309 } 3310 EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner); 3311 3312 /** 3313 * iommu_device_release_dma_owner() - Release DMA ownership of a device 3314 * @dev: The device. 3315 * 3316 * Release the DMA ownership claimed by iommu_device_claim_dma_owner(). 3317 */ 3318 void iommu_device_release_dma_owner(struct device *dev) 3319 { 3320 /* Caller must be a probed driver on dev */ 3321 struct iommu_group *group = dev->iommu_group; 3322 3323 mutex_lock(&group->mutex); 3324 if (group->owner_cnt > 1) 3325 group->owner_cnt--; 3326 else 3327 __iommu_release_dma_ownership(group); 3328 mutex_unlock(&group->mutex); 3329 } 3330 EXPORT_SYMBOL_GPL(iommu_device_release_dma_owner); 3331 3332 /** 3333 * iommu_group_dma_owner_claimed() - Query group dma ownership status 3334 * @group: The group. 3335 * 3336 * This provides status query on a given group. It is racy and only for 3337 * non-binding status reporting. 3338 */ 3339 bool iommu_group_dma_owner_claimed(struct iommu_group *group) 3340 { 3341 unsigned int user; 3342 3343 mutex_lock(&group->mutex); 3344 user = group->owner_cnt; 3345 mutex_unlock(&group->mutex); 3346 3347 return user; 3348 } 3349 EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed); 3350 3351 static void iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid, 3352 struct iommu_domain *domain) 3353 { 3354 const struct iommu_ops *ops = dev_iommu_ops(dev); 3355 struct iommu_domain *blocked_domain = ops->blocked_domain; 3356 3357 WARN_ON(blocked_domain->ops->set_dev_pasid(blocked_domain, 3358 dev, pasid, domain)); 3359 } 3360 3361 static int __iommu_set_group_pasid(struct iommu_domain *domain, 3362 struct iommu_group *group, ioasid_t pasid, 3363 struct iommu_domain *old) 3364 { 3365 struct group_device *device, *last_gdev; 3366 int ret; 3367 3368 for_each_group_device(group, device) { 3369 ret = domain->ops->set_dev_pasid(domain, device->dev, 3370 pasid, old); 3371 if (ret) 3372 goto err_revert; 3373 } 3374 3375 return 0; 3376 3377 err_revert: 3378 last_gdev = device; 3379 for_each_group_device(group, device) { 3380 if (device == last_gdev) 3381 break; 3382 /* 3383 * If no old domain, undo the succeeded devices/pasid. 3384 * Otherwise, rollback the succeeded devices/pasid to the old 3385 * domain. And it is a driver bug to fail attaching with a 3386 * previously good domain. 3387 */ 3388 if (!old || WARN_ON(old->ops->set_dev_pasid(old, device->dev, 3389 pasid, domain))) 3390 iommu_remove_dev_pasid(device->dev, pasid, domain); 3391 } 3392 return ret; 3393 } 3394 3395 static void __iommu_remove_group_pasid(struct iommu_group *group, 3396 ioasid_t pasid, 3397 struct iommu_domain *domain) 3398 { 3399 struct group_device *device; 3400 3401 for_each_group_device(group, device) 3402 iommu_remove_dev_pasid(device->dev, pasid, domain); 3403 } 3404 3405 /* 3406 * iommu_attach_device_pasid() - Attach a domain to pasid of device 3407 * @domain: the iommu domain. 3408 * @dev: the attached device. 3409 * @pasid: the pasid of the device. 3410 * @handle: the attach handle. 3411 * 3412 * Caller should always provide a new handle to avoid race with the paths 3413 * that have lockless reference to handle if it intends to pass a valid handle. 3414 * 3415 * Return: 0 on success, or an error. 3416 */ 3417 int iommu_attach_device_pasid(struct iommu_domain *domain, 3418 struct device *dev, ioasid_t pasid, 3419 struct iommu_attach_handle *handle) 3420 { 3421 /* Caller must be a probed driver on dev */ 3422 struct iommu_group *group = dev->iommu_group; 3423 struct group_device *device; 3424 const struct iommu_ops *ops; 3425 void *entry; 3426 int ret; 3427 3428 if (!group) 3429 return -ENODEV; 3430 3431 ops = dev_iommu_ops(dev); 3432 3433 if (!domain->ops->set_dev_pasid || 3434 !ops->blocked_domain || 3435 !ops->blocked_domain->ops->set_dev_pasid) 3436 return -EOPNOTSUPP; 3437 3438 if (ops != domain->owner || pasid == IOMMU_NO_PASID) 3439 return -EINVAL; 3440 3441 mutex_lock(&group->mutex); 3442 for_each_group_device(group, device) { 3443 if (pasid >= device->dev->iommu->max_pasids) { 3444 ret = -EINVAL; 3445 goto out_unlock; 3446 } 3447 } 3448 3449 entry = iommu_make_pasid_array_entry(domain, handle); 3450 3451 /* 3452 * Entry present is a failure case. Use xa_insert() instead of 3453 * xa_reserve(). 3454 */ 3455 ret = xa_insert(&group->pasid_array, pasid, XA_ZERO_ENTRY, GFP_KERNEL); 3456 if (ret) 3457 goto out_unlock; 3458 3459 ret = __iommu_set_group_pasid(domain, group, pasid, NULL); 3460 if (ret) { 3461 xa_release(&group->pasid_array, pasid); 3462 goto out_unlock; 3463 } 3464 3465 /* 3466 * The xa_insert() above reserved the memory, and the group->mutex is 3467 * held, this cannot fail. The new domain cannot be visible until the 3468 * operation succeeds as we cannot tolerate PRIs becoming concurrently 3469 * queued and then failing attach. 3470 */ 3471 WARN_ON(xa_is_err(xa_store(&group->pasid_array, 3472 pasid, entry, GFP_KERNEL))); 3473 3474 out_unlock: 3475 mutex_unlock(&group->mutex); 3476 return ret; 3477 } 3478 EXPORT_SYMBOL_GPL(iommu_attach_device_pasid); 3479 3480 /** 3481 * iommu_replace_device_pasid - Replace the domain that a specific pasid 3482 * of the device is attached to 3483 * @domain: the new iommu domain 3484 * @dev: the attached device. 3485 * @pasid: the pasid of the device. 3486 * @handle: the attach handle. 3487 * 3488 * This API allows the pasid to switch domains. The @pasid should have been 3489 * attached. Otherwise, this fails. The pasid will keep the old configuration 3490 * if replacement failed. 3491 * 3492 * Caller should always provide a new handle to avoid race with the paths 3493 * that have lockless reference to handle if it intends to pass a valid handle. 3494 * 3495 * Return 0 on success, or an error. 3496 */ 3497 int iommu_replace_device_pasid(struct iommu_domain *domain, 3498 struct device *dev, ioasid_t pasid, 3499 struct iommu_attach_handle *handle) 3500 { 3501 /* Caller must be a probed driver on dev */ 3502 struct iommu_group *group = dev->iommu_group; 3503 struct iommu_attach_handle *entry; 3504 struct iommu_domain *curr_domain; 3505 void *curr; 3506 int ret; 3507 3508 if (!group) 3509 return -ENODEV; 3510 3511 if (!domain->ops->set_dev_pasid) 3512 return -EOPNOTSUPP; 3513 3514 if (dev_iommu_ops(dev) != domain->owner || 3515 pasid == IOMMU_NO_PASID || !handle) 3516 return -EINVAL; 3517 3518 mutex_lock(&group->mutex); 3519 entry = iommu_make_pasid_array_entry(domain, handle); 3520 curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, 3521 XA_ZERO_ENTRY, GFP_KERNEL); 3522 if (xa_is_err(curr)) { 3523 ret = xa_err(curr); 3524 goto out_unlock; 3525 } 3526 3527 /* 3528 * No domain (with or without handle) attached, hence not 3529 * a replace case. 3530 */ 3531 if (!curr) { 3532 xa_release(&group->pasid_array, pasid); 3533 ret = -EINVAL; 3534 goto out_unlock; 3535 } 3536 3537 /* 3538 * Reusing handle is problematic as there are paths that refers 3539 * the handle without lock. To avoid race, reject the callers that 3540 * attempt it. 3541 */ 3542 if (curr == entry) { 3543 WARN_ON(1); 3544 ret = -EINVAL; 3545 goto out_unlock; 3546 } 3547 3548 curr_domain = pasid_array_entry_to_domain(curr); 3549 ret = 0; 3550 3551 if (curr_domain != domain) { 3552 ret = __iommu_set_group_pasid(domain, group, 3553 pasid, curr_domain); 3554 if (ret) 3555 goto out_unlock; 3556 } 3557 3558 /* 3559 * The above xa_cmpxchg() reserved the memory, and the 3560 * group->mutex is held, this cannot fail. 3561 */ 3562 WARN_ON(xa_is_err(xa_store(&group->pasid_array, 3563 pasid, entry, GFP_KERNEL))); 3564 3565 out_unlock: 3566 mutex_unlock(&group->mutex); 3567 return ret; 3568 } 3569 EXPORT_SYMBOL_NS_GPL(iommu_replace_device_pasid, "IOMMUFD_INTERNAL"); 3570 3571 /* 3572 * iommu_detach_device_pasid() - Detach the domain from pasid of device 3573 * @domain: the iommu domain. 3574 * @dev: the attached device. 3575 * @pasid: the pasid of the device. 3576 * 3577 * The @domain must have been attached to @pasid of the @dev with 3578 * iommu_attach_device_pasid(). 3579 */ 3580 void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, 3581 ioasid_t pasid) 3582 { 3583 /* Caller must be a probed driver on dev */ 3584 struct iommu_group *group = dev->iommu_group; 3585 3586 mutex_lock(&group->mutex); 3587 __iommu_remove_group_pasid(group, pasid, domain); 3588 xa_erase(&group->pasid_array, pasid); 3589 mutex_unlock(&group->mutex); 3590 } 3591 EXPORT_SYMBOL_GPL(iommu_detach_device_pasid); 3592 3593 ioasid_t iommu_alloc_global_pasid(struct device *dev) 3594 { 3595 int ret; 3596 3597 /* max_pasids == 0 means that the device does not support PASID */ 3598 if (!dev->iommu->max_pasids) 3599 return IOMMU_PASID_INVALID; 3600 3601 /* 3602 * max_pasids is set up by vendor driver based on number of PASID bits 3603 * supported but the IDA allocation is inclusive. 3604 */ 3605 ret = ida_alloc_range(&iommu_global_pasid_ida, IOMMU_FIRST_GLOBAL_PASID, 3606 dev->iommu->max_pasids - 1, GFP_KERNEL); 3607 return ret < 0 ? IOMMU_PASID_INVALID : ret; 3608 } 3609 EXPORT_SYMBOL_GPL(iommu_alloc_global_pasid); 3610 3611 void iommu_free_global_pasid(ioasid_t pasid) 3612 { 3613 if (WARN_ON(pasid == IOMMU_PASID_INVALID)) 3614 return; 3615 3616 ida_free(&iommu_global_pasid_ida, pasid); 3617 } 3618 EXPORT_SYMBOL_GPL(iommu_free_global_pasid); 3619 3620 /** 3621 * iommu_attach_handle_get - Return the attach handle 3622 * @group: the iommu group that domain was attached to 3623 * @pasid: the pasid within the group 3624 * @type: matched domain type, 0 for any match 3625 * 3626 * Return handle or ERR_PTR(-ENOENT) on none, ERR_PTR(-EBUSY) on mismatch. 3627 * 3628 * Return the attach handle to the caller. The life cycle of an iommu attach 3629 * handle is from the time when the domain is attached to the time when the 3630 * domain is detached. Callers are required to synchronize the call of 3631 * iommu_attach_handle_get() with domain attachment and detachment. The attach 3632 * handle can only be used during its life cycle. 3633 */ 3634 struct iommu_attach_handle * 3635 iommu_attach_handle_get(struct iommu_group *group, ioasid_t pasid, unsigned int type) 3636 { 3637 struct iommu_attach_handle *handle; 3638 void *entry; 3639 3640 xa_lock(&group->pasid_array); 3641 entry = xa_load(&group->pasid_array, pasid); 3642 if (!entry || xa_pointer_tag(entry) != IOMMU_PASID_ARRAY_HANDLE) { 3643 handle = ERR_PTR(-ENOENT); 3644 } else { 3645 handle = xa_untag_pointer(entry); 3646 if (type && handle->domain->type != type) 3647 handle = ERR_PTR(-EBUSY); 3648 } 3649 xa_unlock(&group->pasid_array); 3650 3651 return handle; 3652 } 3653 EXPORT_SYMBOL_NS_GPL(iommu_attach_handle_get, "IOMMUFD_INTERNAL"); 3654 3655 /** 3656 * iommu_attach_group_handle - Attach an IOMMU domain to an IOMMU group 3657 * @domain: IOMMU domain to attach 3658 * @group: IOMMU group that will be attached 3659 * @handle: attach handle 3660 * 3661 * Returns 0 on success and error code on failure. 3662 * 3663 * This is a variant of iommu_attach_group(). It allows the caller to provide 3664 * an attach handle and use it when the domain is attached. This is currently 3665 * used by IOMMUFD to deliver the I/O page faults. 3666 * 3667 * Caller should always provide a new handle to avoid race with the paths 3668 * that have lockless reference to handle. 3669 */ 3670 int iommu_attach_group_handle(struct iommu_domain *domain, 3671 struct iommu_group *group, 3672 struct iommu_attach_handle *handle) 3673 { 3674 void *entry; 3675 int ret; 3676 3677 if (!handle) 3678 return -EINVAL; 3679 3680 mutex_lock(&group->mutex); 3681 entry = iommu_make_pasid_array_entry(domain, handle); 3682 ret = xa_insert(&group->pasid_array, 3683 IOMMU_NO_PASID, XA_ZERO_ENTRY, GFP_KERNEL); 3684 if (ret) 3685 goto out_unlock; 3686 3687 ret = __iommu_attach_group(domain, group); 3688 if (ret) { 3689 xa_release(&group->pasid_array, IOMMU_NO_PASID); 3690 goto out_unlock; 3691 } 3692 3693 /* 3694 * The xa_insert() above reserved the memory, and the group->mutex is 3695 * held, this cannot fail. The new domain cannot be visible until the 3696 * operation succeeds as we cannot tolerate PRIs becoming concurrently 3697 * queued and then failing attach. 3698 */ 3699 WARN_ON(xa_is_err(xa_store(&group->pasid_array, 3700 IOMMU_NO_PASID, entry, GFP_KERNEL))); 3701 3702 out_unlock: 3703 mutex_unlock(&group->mutex); 3704 return ret; 3705 } 3706 EXPORT_SYMBOL_NS_GPL(iommu_attach_group_handle, "IOMMUFD_INTERNAL"); 3707 3708 /** 3709 * iommu_detach_group_handle - Detach an IOMMU domain from an IOMMU group 3710 * @domain: IOMMU domain to attach 3711 * @group: IOMMU group that will be attached 3712 * 3713 * Detach the specified IOMMU domain from the specified IOMMU group. 3714 * It must be used in conjunction with iommu_attach_group_handle(). 3715 */ 3716 void iommu_detach_group_handle(struct iommu_domain *domain, 3717 struct iommu_group *group) 3718 { 3719 mutex_lock(&group->mutex); 3720 __iommu_group_set_core_domain(group); 3721 xa_erase(&group->pasid_array, IOMMU_NO_PASID); 3722 mutex_unlock(&group->mutex); 3723 } 3724 EXPORT_SYMBOL_NS_GPL(iommu_detach_group_handle, "IOMMUFD_INTERNAL"); 3725 3726 /** 3727 * iommu_replace_group_handle - replace the domain that a group is attached to 3728 * @group: IOMMU group that will be attached to the new domain 3729 * @new_domain: new IOMMU domain to replace with 3730 * @handle: attach handle 3731 * 3732 * This API allows the group to switch domains without being forced to go to 3733 * the blocking domain in-between. It allows the caller to provide an attach 3734 * handle for the new domain and use it when the domain is attached. 3735 * 3736 * If the currently attached domain is a core domain (e.g. a default_domain), 3737 * it will act just like the iommu_attach_group_handle(). 3738 * 3739 * Caller should always provide a new handle to avoid race with the paths 3740 * that have lockless reference to handle. 3741 */ 3742 int iommu_replace_group_handle(struct iommu_group *group, 3743 struct iommu_domain *new_domain, 3744 struct iommu_attach_handle *handle) 3745 { 3746 void *curr, *entry; 3747 int ret; 3748 3749 if (!new_domain || !handle) 3750 return -EINVAL; 3751 3752 mutex_lock(&group->mutex); 3753 entry = iommu_make_pasid_array_entry(new_domain, handle); 3754 ret = xa_reserve(&group->pasid_array, IOMMU_NO_PASID, GFP_KERNEL); 3755 if (ret) 3756 goto err_unlock; 3757 3758 ret = __iommu_group_set_domain(group, new_domain); 3759 if (ret) 3760 goto err_release; 3761 3762 curr = xa_store(&group->pasid_array, IOMMU_NO_PASID, entry, GFP_KERNEL); 3763 WARN_ON(xa_is_err(curr)); 3764 3765 mutex_unlock(&group->mutex); 3766 3767 return 0; 3768 err_release: 3769 xa_release(&group->pasid_array, IOMMU_NO_PASID); 3770 err_unlock: 3771 mutex_unlock(&group->mutex); 3772 return ret; 3773 } 3774 EXPORT_SYMBOL_NS_GPL(iommu_replace_group_handle, "IOMMUFD_INTERNAL"); 3775 3776 #if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU) 3777 /** 3778 * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU domain 3779 * @desc: MSI descriptor, will store the MSI page 3780 * @msi_addr: MSI target address to be mapped 3781 * 3782 * The implementation of sw_msi() should take msi_addr and map it to 3783 * an IOVA in the domain and call msi_desc_set_iommu_msi_iova() with the 3784 * mapping information. 3785 * 3786 * Return: 0 on success or negative error code if the mapping failed. 3787 */ 3788 int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) 3789 { 3790 struct device *dev = msi_desc_to_dev(desc); 3791 struct iommu_group *group = dev->iommu_group; 3792 int ret = 0; 3793 3794 if (!group) 3795 return 0; 3796 3797 mutex_lock(&group->mutex); 3798 /* An IDENTITY domain must pass through */ 3799 if (group->domain && group->domain->type != IOMMU_DOMAIN_IDENTITY) { 3800 switch (group->domain->cookie_type) { 3801 case IOMMU_COOKIE_DMA_MSI: 3802 case IOMMU_COOKIE_DMA_IOVA: 3803 ret = iommu_dma_sw_msi(group->domain, desc, msi_addr); 3804 break; 3805 case IOMMU_COOKIE_IOMMUFD: 3806 ret = iommufd_sw_msi(group->domain, desc, msi_addr); 3807 break; 3808 default: 3809 ret = -EOPNOTSUPP; 3810 break; 3811 } 3812 } 3813 mutex_unlock(&group->mutex); 3814 return ret; 3815 } 3816 #endif /* CONFIG_IRQ_MSI_IOMMU */ 3817