1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. 4 * Author: Joerg Roedel <jroedel@suse.de> 5 */ 6 7 #define pr_fmt(fmt) "iommu: " fmt 8 9 #include <linux/amba/bus.h> 10 #include <linux/device.h> 11 #include <linux/kernel.h> 12 #include <linux/bits.h> 13 #include <linux/bug.h> 14 #include <linux/types.h> 15 #include <linux/init.h> 16 #include <linux/export.h> 17 #include <linux/slab.h> 18 #include <linux/errno.h> 19 #include <linux/host1x_context_bus.h> 20 #include <linux/iommu.h> 21 #include <linux/iommufd.h> 22 #include <linux/idr.h> 23 #include <linux/err.h> 24 #include <linux/pci.h> 25 #include <linux/pci-ats.h> 26 #include <linux/bitops.h> 27 #include <linux/platform_device.h> 28 #include <linux/property.h> 29 #include <linux/fsl/mc.h> 30 #include <linux/module.h> 31 #include <linux/cc_platform.h> 32 #include <linux/cdx/cdx_bus.h> 33 #include <trace/events/iommu.h> 34 #include <linux/sched/mm.h> 35 #include <linux/msi.h> 36 #include <uapi/linux/iommufd.h> 37 #include <linux/generic_pt/iommu.h> 38 39 #include "dma-iommu.h" 40 #include "iommu-priv.h" 41 42 static struct kset *iommu_group_kset; 43 static DEFINE_IDA(iommu_group_ida); 44 static DEFINE_IDA(iommu_global_pasid_ida); 45 46 static unsigned int iommu_def_domain_type __read_mostly; 47 static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT); 48 static u32 iommu_cmd_line __read_mostly; 49 50 /* Tags used with xa_tag_pointer() in group->pasid_array */ 51 enum { IOMMU_PASID_ARRAY_DOMAIN = 0, IOMMU_PASID_ARRAY_HANDLE = 1 }; 52 53 struct iommu_group { 54 struct kobject kobj; 55 struct kobject *devices_kobj; 56 struct list_head devices; 57 struct xarray pasid_array; 58 struct mutex mutex; 59 void *iommu_data; 60 void (*iommu_data_release)(void *iommu_data); 61 char *name; 62 int id; 63 struct iommu_domain *default_domain; 64 struct iommu_domain *blocking_domain; 65 struct iommu_domain *domain; 66 struct list_head entry; 67 unsigned int owner_cnt; 68 /* 69 * Number of devices in the group undergoing or awaiting recovery. 70 * If non-zero, concurrent domain attachments are rejected. 71 */ 72 unsigned int recovery_cnt; 73 void *owner; 74 }; 75 76 struct group_device { 77 struct list_head list; 78 struct device *dev; 79 char *name; 80 /* 81 * Device is blocked for a pending recovery while its group->domain is 82 * retained. This can happen when: 83 * - Device is undergoing a reset 84 */ 85 bool blocked; 86 unsigned int reset_depth; 87 }; 88 89 /* Iterate over each struct group_device in a struct iommu_group */ 90 #define for_each_group_device(group, pos) \ 91 list_for_each_entry(pos, &(group)->devices, list) 92 93 static struct group_device *__dev_to_gdev(struct device *dev) 94 { 95 struct iommu_group *group = dev->iommu_group; 96 struct group_device *gdev; 97 98 lockdep_assert_held(&group->mutex); 99 100 for_each_group_device(group, gdev) { 101 if (gdev->dev == dev) 102 return gdev; 103 } 104 return NULL; 105 } 106 107 struct iommu_group_attribute { 108 struct attribute attr; 109 ssize_t (*show)(struct iommu_group *group, char *buf); 110 ssize_t (*store)(struct iommu_group *group, 111 const char *buf, size_t count); 112 }; 113 114 static const char * const iommu_group_resv_type_string[] = { 115 [IOMMU_RESV_DIRECT] = "direct", 116 [IOMMU_RESV_DIRECT_RELAXABLE] = "direct-relaxable", 117 [IOMMU_RESV_RESERVED] = "reserved", 118 [IOMMU_RESV_MSI] = "msi", 119 [IOMMU_RESV_SW_MSI] = "msi", 120 }; 121 122 #define IOMMU_CMD_LINE_DMA_API BIT(0) 123 #define IOMMU_CMD_LINE_STRICT BIT(1) 124 125 static int bus_iommu_probe(const struct bus_type *bus); 126 static int iommu_bus_notifier(struct notifier_block *nb, 127 unsigned long action, void *data); 128 static void iommu_release_device(struct device *dev); 129 static int __iommu_attach_device(struct iommu_domain *domain, 130 struct device *dev, struct iommu_domain *old); 131 static int __iommu_attach_group(struct iommu_domain *domain, 132 struct iommu_group *group); 133 static struct iommu_domain *__iommu_paging_domain_alloc_flags(struct device *dev, 134 unsigned int type, 135 unsigned int flags); 136 137 enum { 138 IOMMU_SET_DOMAIN_MUST_SUCCEED = 1 << 0, 139 }; 140 141 static int __iommu_device_set_domain(struct iommu_group *group, 142 struct device *dev, 143 struct iommu_domain *new_domain, 144 struct iommu_domain *old_domain, 145 unsigned int flags); 146 static int __iommu_group_set_domain_internal(struct iommu_group *group, 147 struct iommu_domain *new_domain, 148 unsigned int flags); 149 static int __iommu_group_set_domain(struct iommu_group *group, 150 struct iommu_domain *new_domain) 151 { 152 return __iommu_group_set_domain_internal(group, new_domain, 0); 153 } 154 static void __iommu_group_set_domain_nofail(struct iommu_group *group, 155 struct iommu_domain *new_domain) 156 { 157 WARN_ON(__iommu_group_set_domain_internal( 158 group, new_domain, IOMMU_SET_DOMAIN_MUST_SUCCEED)); 159 } 160 161 static int iommu_setup_default_domain(struct iommu_group *group, 162 int target_type); 163 static int iommu_create_device_direct_mappings(struct iommu_domain *domain, 164 struct device *dev); 165 static ssize_t iommu_group_store_type(struct iommu_group *group, 166 const char *buf, size_t count); 167 static struct group_device *iommu_group_alloc_device(struct iommu_group *group, 168 struct device *dev); 169 static void __iommu_group_free_device(struct iommu_group *group, 170 struct group_device *grp_dev); 171 static void iommu_domain_init(struct iommu_domain *domain, unsigned int type, 172 const struct iommu_ops *ops); 173 174 #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \ 175 struct iommu_group_attribute iommu_group_attr_##_name = \ 176 __ATTR(_name, _mode, _show, _store) 177 178 #define to_iommu_group_attr(_attr) \ 179 container_of(_attr, struct iommu_group_attribute, attr) 180 #define to_iommu_group(_kobj) \ 181 container_of(_kobj, struct iommu_group, kobj) 182 183 static LIST_HEAD(iommu_device_list); 184 static DEFINE_SPINLOCK(iommu_device_lock); 185 186 static const struct bus_type * const iommu_buses[] = { 187 &platform_bus_type, 188 #ifdef CONFIG_PCI 189 &pci_bus_type, 190 #endif 191 #ifdef CONFIG_ARM_AMBA 192 &amba_bustype, 193 #endif 194 #ifdef CONFIG_FSL_MC_BUS 195 &fsl_mc_bus_type, 196 #endif 197 #ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS 198 &host1x_context_device_bus_type, 199 #endif 200 #ifdef CONFIG_CDX_BUS 201 &cdx_bus_type, 202 #endif 203 }; 204 205 /* 206 * Use a function instead of an array here because the domain-type is a 207 * bit-field, so an array would waste memory. 208 */ 209 static const char *iommu_domain_type_str(unsigned int t) 210 { 211 switch (t) { 212 case IOMMU_DOMAIN_BLOCKED: 213 return "Blocked"; 214 case IOMMU_DOMAIN_IDENTITY: 215 return "Passthrough"; 216 case IOMMU_DOMAIN_UNMANAGED: 217 return "Unmanaged"; 218 case IOMMU_DOMAIN_DMA: 219 case IOMMU_DOMAIN_DMA_FQ: 220 return "Translated"; 221 case IOMMU_DOMAIN_PLATFORM: 222 return "Platform"; 223 default: 224 return "Unknown"; 225 } 226 } 227 228 static int __init iommu_subsys_init(void) 229 { 230 struct notifier_block *nb; 231 232 if (!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API)) { 233 if (IS_ENABLED(CONFIG_IOMMU_DEFAULT_PASSTHROUGH)) 234 iommu_set_default_passthrough(false); 235 else 236 iommu_set_default_translated(false); 237 238 if (iommu_default_passthrough() && cc_platform_has(CC_ATTR_MEM_ENCRYPT)) { 239 pr_info("Memory encryption detected - Disabling default IOMMU Passthrough\n"); 240 iommu_set_default_translated(false); 241 } 242 } 243 244 if (!iommu_default_passthrough() && !iommu_dma_strict) 245 iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ; 246 247 pr_info("Default domain type: %s%s\n", 248 iommu_domain_type_str(iommu_def_domain_type), 249 (iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ? 250 " (set via kernel command line)" : ""); 251 252 if (!iommu_default_passthrough()) 253 pr_info("DMA domain TLB invalidation policy: %s mode%s\n", 254 iommu_dma_strict ? "strict" : "lazy", 255 (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ? 256 " (set via kernel command line)" : ""); 257 258 nb = kzalloc_objs(*nb, ARRAY_SIZE(iommu_buses)); 259 if (!nb) 260 return -ENOMEM; 261 262 iommu_debug_init(); 263 264 for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) { 265 nb[i].notifier_call = iommu_bus_notifier; 266 bus_register_notifier(iommu_buses[i], &nb[i]); 267 } 268 269 return 0; 270 } 271 subsys_initcall(iommu_subsys_init); 272 273 static int remove_iommu_group(struct device *dev, void *data) 274 { 275 if (dev->iommu && dev->iommu->iommu_dev == data) 276 iommu_release_device(dev); 277 278 return 0; 279 } 280 281 /** 282 * iommu_device_register() - Register an IOMMU hardware instance 283 * @iommu: IOMMU handle for the instance 284 * @ops: IOMMU ops to associate with the instance 285 * @hwdev: (optional) actual instance device, used for fwnode lookup 286 * 287 * Return: 0 on success, or an error. 288 */ 289 int iommu_device_register(struct iommu_device *iommu, 290 const struct iommu_ops *ops, struct device *hwdev) 291 { 292 int err = 0; 293 294 /* We need to be able to take module references appropriately */ 295 if (WARN_ON(is_module_address((unsigned long)ops) && !ops->owner)) 296 return -EINVAL; 297 298 iommu->ops = ops; 299 if (hwdev) 300 iommu->fwnode = dev_fwnode(hwdev); 301 302 spin_lock(&iommu_device_lock); 303 list_add_tail(&iommu->list, &iommu_device_list); 304 spin_unlock(&iommu_device_lock); 305 306 for (int i = 0; i < ARRAY_SIZE(iommu_buses) && !err; i++) 307 err = bus_iommu_probe(iommu_buses[i]); 308 if (err) 309 iommu_device_unregister(iommu); 310 else 311 WRITE_ONCE(iommu->ready, true); 312 return err; 313 } 314 EXPORT_SYMBOL_GPL(iommu_device_register); 315 316 void iommu_device_unregister(struct iommu_device *iommu) 317 { 318 for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) 319 bus_for_each_dev(iommu_buses[i], NULL, iommu, remove_iommu_group); 320 321 spin_lock(&iommu_device_lock); 322 list_del(&iommu->list); 323 spin_unlock(&iommu_device_lock); 324 325 /* Pairs with the alloc in generic_single_device_group() */ 326 iommu_group_put(iommu->singleton_group); 327 iommu->singleton_group = NULL; 328 } 329 EXPORT_SYMBOL_GPL(iommu_device_unregister); 330 331 #if IS_ENABLED(CONFIG_IOMMUFD_TEST) 332 void iommu_device_unregister_bus(struct iommu_device *iommu, 333 const struct bus_type *bus, 334 struct notifier_block *nb) 335 { 336 bus_unregister_notifier(bus, nb); 337 fwnode_remove_software_node(iommu->fwnode); 338 iommu_device_unregister(iommu); 339 } 340 EXPORT_SYMBOL_GPL(iommu_device_unregister_bus); 341 342 /* 343 * Register an iommu driver against a single bus. This is only used by iommufd 344 * selftest to create a mock iommu driver. The caller must provide 345 * some memory to hold a notifier_block. 346 */ 347 int iommu_device_register_bus(struct iommu_device *iommu, 348 const struct iommu_ops *ops, 349 const struct bus_type *bus, 350 struct notifier_block *nb) 351 { 352 int err; 353 354 iommu->ops = ops; 355 nb->notifier_call = iommu_bus_notifier; 356 err = bus_register_notifier(bus, nb); 357 if (err) 358 return err; 359 360 iommu->fwnode = fwnode_create_software_node(NULL, NULL); 361 if (IS_ERR(iommu->fwnode)) { 362 bus_unregister_notifier(bus, nb); 363 return PTR_ERR(iommu->fwnode); 364 } 365 366 spin_lock(&iommu_device_lock); 367 list_add_tail(&iommu->list, &iommu_device_list); 368 spin_unlock(&iommu_device_lock); 369 370 err = bus_iommu_probe(bus); 371 if (err) { 372 iommu_device_unregister_bus(iommu, bus, nb); 373 return err; 374 } 375 WRITE_ONCE(iommu->ready, true); 376 return 0; 377 } 378 EXPORT_SYMBOL_GPL(iommu_device_register_bus); 379 380 int iommu_mock_device_add(struct device *dev, struct iommu_device *iommu) 381 { 382 int rc; 383 384 mutex_lock(&iommu_probe_device_lock); 385 rc = iommu_fwspec_init(dev, iommu->fwnode); 386 mutex_unlock(&iommu_probe_device_lock); 387 388 if (rc) 389 return rc; 390 391 rc = device_add(dev); 392 if (rc) 393 iommu_fwspec_free(dev); 394 return rc; 395 } 396 EXPORT_SYMBOL_GPL(iommu_mock_device_add); 397 #endif 398 399 static struct dev_iommu *dev_iommu_get(struct device *dev) 400 { 401 struct dev_iommu *param = dev->iommu; 402 403 lockdep_assert_held(&iommu_probe_device_lock); 404 405 if (param) 406 return param; 407 408 param = kzalloc_obj(*param); 409 if (!param) 410 return NULL; 411 412 mutex_init(¶m->lock); 413 dev->iommu = param; 414 return param; 415 } 416 417 void dev_iommu_free(struct device *dev) 418 { 419 struct dev_iommu *param = dev->iommu; 420 421 dev->iommu = NULL; 422 if (param->fwspec) { 423 fwnode_handle_put(param->fwspec->iommu_fwnode); 424 kfree(param->fwspec); 425 } 426 kfree(param); 427 } 428 429 /* 430 * Internal equivalent of device_iommu_mapped() for when we care that a device 431 * actually has API ops, and don't want false positives from VFIO-only groups. 432 */ 433 static bool dev_has_iommu(struct device *dev) 434 { 435 return dev->iommu && dev->iommu->iommu_dev; 436 } 437 438 static u32 dev_iommu_get_max_pasids(struct device *dev) 439 { 440 u32 max_pasids = 0, bits = 0; 441 int ret; 442 443 if (dev_is_pci(dev)) { 444 ret = pci_max_pasids(to_pci_dev(dev)); 445 if (ret > 0) 446 max_pasids = ret; 447 } else { 448 ret = device_property_read_u32(dev, "pasid-num-bits", &bits); 449 if (!ret) 450 max_pasids = 1UL << bits; 451 } 452 453 return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids); 454 } 455 456 void dev_iommu_priv_set(struct device *dev, void *priv) 457 { 458 /* FSL_PAMU does something weird */ 459 if (!IS_ENABLED(CONFIG_FSL_PAMU)) 460 lockdep_assert_held(&iommu_probe_device_lock); 461 dev->iommu->priv = priv; 462 } 463 EXPORT_SYMBOL_GPL(dev_iommu_priv_set); 464 465 /* 466 * Init the dev->iommu and dev->iommu_group in the struct device and get the 467 * driver probed 468 */ 469 static int iommu_init_device(struct device *dev) 470 { 471 const struct iommu_ops *ops; 472 struct iommu_device *iommu_dev; 473 struct iommu_group *group; 474 int ret; 475 476 if (!dev_iommu_get(dev)) 477 return -ENOMEM; 478 /* 479 * For FDT-based systems and ACPI IORT/VIOT, the common firmware parsing 480 * is buried in the bus dma_configure path. Properly unpicking that is 481 * still a big job, so for now just invoke the whole thing. The device 482 * already having a driver bound means dma_configure has already run and 483 * found no IOMMU to wait for, so there's no point calling it again. 484 */ 485 if (!dev->iommu->fwspec && !dev->driver && dev->bus->dma_configure) { 486 mutex_unlock(&iommu_probe_device_lock); 487 dev->bus->dma_configure(dev); 488 mutex_lock(&iommu_probe_device_lock); 489 /* If another instance finished the job for us, skip it */ 490 if (!dev->iommu || dev->iommu_group) 491 return -ENODEV; 492 } 493 /* 494 * At this point, relevant devices either now have a fwspec which will 495 * match ops registered with a non-NULL fwnode, or we can reasonably 496 * assume that only one of Intel, AMD, s390, PAMU or legacy SMMUv2 can 497 * be present, and that any of their registered instances has suitable 498 * ops for probing, and thus cheekily co-opt the same mechanism. 499 */ 500 ops = iommu_fwspec_ops(dev->iommu->fwspec); 501 if (!ops) { 502 ret = -ENODEV; 503 goto err_free; 504 } 505 506 if (!try_module_get(ops->owner)) { 507 ret = -EINVAL; 508 goto err_free; 509 } 510 511 iommu_dev = ops->probe_device(dev); 512 if (IS_ERR(iommu_dev)) { 513 ret = PTR_ERR(iommu_dev); 514 goto err_module_put; 515 } 516 dev->iommu->iommu_dev = iommu_dev; 517 518 ret = iommu_device_link(iommu_dev, dev); 519 if (ret) 520 goto err_release; 521 522 group = ops->device_group(dev); 523 if (WARN_ON_ONCE(group == NULL)) 524 group = ERR_PTR(-EINVAL); 525 if (IS_ERR(group)) { 526 ret = PTR_ERR(group); 527 goto err_unlink; 528 } 529 dev->iommu_group = group; 530 531 dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev); 532 if (ops->is_attach_deferred) 533 dev->iommu->attach_deferred = ops->is_attach_deferred(dev); 534 return 0; 535 536 err_unlink: 537 iommu_device_unlink(iommu_dev, dev); 538 err_release: 539 if (ops->release_device) 540 ops->release_device(dev); 541 err_module_put: 542 module_put(ops->owner); 543 err_free: 544 dev->iommu->iommu_dev = NULL; 545 dev_iommu_free(dev); 546 return ret; 547 } 548 549 static void iommu_deinit_device(struct device *dev) 550 { 551 struct iommu_group *group = dev->iommu_group; 552 const struct iommu_ops *ops = dev_iommu_ops(dev); 553 554 lockdep_assert_held(&group->mutex); 555 556 iommu_device_unlink(dev->iommu->iommu_dev, dev); 557 558 /* 559 * release_device() must stop using any attached domain on the device. 560 * If there are still other devices in the group, they are not affected 561 * by this callback. 562 * 563 * If the iommu driver provides release_domain, the core code ensures 564 * that domain is attached prior to calling release_device. Drivers can 565 * use this to enforce a translation on the idle iommu. Typically, the 566 * global static blocked_domain is a good choice. 567 * 568 * Otherwise, the iommu driver must set the device to either an identity 569 * or a blocking translation in release_device() and stop using any 570 * domain pointer, as it is going to be freed. 571 * 572 * Regardless, if a delayed attach never occurred, then the release 573 * should still avoid touching any hardware configuration either. 574 */ 575 if (!dev->iommu->attach_deferred && ops->release_domain) { 576 struct iommu_domain *release_domain = ops->release_domain; 577 578 /* 579 * If the device requires direct mappings then it should not 580 * be parked on a BLOCKED domain during release as that would 581 * break the direct mappings. 582 */ 583 if (dev->iommu->require_direct && ops->identity_domain && 584 release_domain == ops->blocked_domain) 585 release_domain = ops->identity_domain; 586 587 release_domain->ops->attach_dev(release_domain, dev, 588 group->domain); 589 } 590 591 if (ops->release_device) 592 ops->release_device(dev); 593 594 /* 595 * If this is the last driver to use the group then we must free the 596 * domains before we do the module_put(). 597 */ 598 if (list_empty(&group->devices)) { 599 if (group->default_domain) { 600 iommu_domain_free(group->default_domain); 601 group->default_domain = NULL; 602 } 603 if (group->blocking_domain) { 604 iommu_domain_free(group->blocking_domain); 605 group->blocking_domain = NULL; 606 } 607 group->domain = NULL; 608 } 609 610 /* Caller must put iommu_group */ 611 dev->iommu_group = NULL; 612 module_put(ops->owner); 613 dev_iommu_free(dev); 614 #ifdef CONFIG_IOMMU_DMA 615 dev->dma_iommu = false; 616 #endif 617 } 618 619 static struct iommu_domain *pasid_array_entry_to_domain(void *entry) 620 { 621 if (xa_pointer_tag(entry) == IOMMU_PASID_ARRAY_DOMAIN) 622 return xa_untag_pointer(entry); 623 return ((struct iommu_attach_handle *)xa_untag_pointer(entry))->domain; 624 } 625 626 DEFINE_MUTEX(iommu_probe_device_lock); 627 628 static int __iommu_probe_device(struct device *dev, struct list_head *group_list) 629 { 630 struct iommu_group *group; 631 struct group_device *gdev; 632 int ret; 633 634 /* 635 * Serialise to avoid races between IOMMU drivers registering in 636 * parallel and/or the "replay" calls from ACPI/OF code via client 637 * driver probe. Once the latter have been cleaned up we should 638 * probably be able to use device_lock() here to minimise the scope, 639 * but for now enforcing a simple global ordering is fine. 640 */ 641 lockdep_assert_held(&iommu_probe_device_lock); 642 643 /* Device is probed already if in a group */ 644 if (dev->iommu_group) 645 return 0; 646 647 ret = iommu_init_device(dev); 648 if (ret) 649 return ret; 650 /* 651 * And if we do now see any replay calls, they would indicate someone 652 * misusing the dma_configure path outside bus code. 653 */ 654 if (dev->driver) 655 dev_WARN(dev, "late IOMMU probe at driver bind, something fishy here!\n"); 656 657 group = dev->iommu_group; 658 gdev = iommu_group_alloc_device(group, dev); 659 mutex_lock(&group->mutex); 660 if (IS_ERR(gdev)) { 661 ret = PTR_ERR(gdev); 662 goto err_put_group; 663 } 664 665 /* 666 * The gdev must be in the list before calling 667 * iommu_setup_default_domain() 668 */ 669 list_add_tail(&gdev->list, &group->devices); 670 WARN_ON(group->default_domain && !group->domain); 671 if (group->default_domain) 672 iommu_create_device_direct_mappings(group->default_domain, dev); 673 if (group->domain) { 674 ret = __iommu_device_set_domain(group, dev, group->domain, NULL, 675 0); 676 if (ret) 677 goto err_remove_gdev; 678 } else if (!group->default_domain && !group_list) { 679 ret = iommu_setup_default_domain(group, 0); 680 if (ret) 681 goto err_remove_gdev; 682 } else if (!group->default_domain) { 683 /* 684 * With a group_list argument we defer the default_domain setup 685 * to the caller by providing a de-duplicated list of groups 686 * that need further setup. 687 */ 688 if (list_empty(&group->entry)) 689 list_add_tail(&group->entry, group_list); 690 } 691 692 if (group->default_domain) 693 iommu_setup_dma_ops(dev, group->default_domain); 694 695 mutex_unlock(&group->mutex); 696 697 return 0; 698 699 err_remove_gdev: 700 list_del(&gdev->list); 701 __iommu_group_free_device(group, gdev); 702 err_put_group: 703 iommu_deinit_device(dev); 704 mutex_unlock(&group->mutex); 705 iommu_group_put(group); 706 707 return ret; 708 } 709 710 int iommu_probe_device(struct device *dev) 711 { 712 const struct iommu_ops *ops; 713 int ret; 714 715 mutex_lock(&iommu_probe_device_lock); 716 ret = __iommu_probe_device(dev, NULL); 717 mutex_unlock(&iommu_probe_device_lock); 718 if (ret) 719 return ret; 720 721 ops = dev_iommu_ops(dev); 722 if (ops->probe_finalize) 723 ops->probe_finalize(dev); 724 725 return 0; 726 } 727 728 static void __iommu_group_free_device(struct iommu_group *group, 729 struct group_device *grp_dev) 730 { 731 struct device *dev = grp_dev->dev; 732 733 sysfs_remove_link(group->devices_kobj, grp_dev->name); 734 sysfs_remove_link(&dev->kobj, "iommu_group"); 735 736 trace_remove_device_from_group(group->id, dev); 737 738 /* 739 * If the group has become empty then ownership must have been 740 * released, and the current domain must be set back to NULL or 741 * the default domain. 742 */ 743 if (list_empty(&group->devices)) 744 WARN_ON(group->owner_cnt || 745 group->domain != group->default_domain); 746 747 kfree(grp_dev->name); 748 kfree(grp_dev); 749 } 750 751 /* Remove the iommu_group from the struct device. */ 752 static void __iommu_group_remove_device(struct device *dev) 753 { 754 struct iommu_group *group = dev->iommu_group; 755 struct group_device *device; 756 757 mutex_lock(&group->mutex); 758 for_each_group_device(group, device) { 759 if (device->dev != dev) 760 continue; 761 762 list_del(&device->list); 763 __iommu_group_free_device(group, device); 764 if (dev_has_iommu(dev)) 765 iommu_deinit_device(dev); 766 else 767 dev->iommu_group = NULL; 768 break; 769 } 770 mutex_unlock(&group->mutex); 771 772 /* 773 * Pairs with the get in iommu_init_device() or 774 * iommu_group_add_device() 775 */ 776 iommu_group_put(group); 777 } 778 779 static void iommu_release_device(struct device *dev) 780 { 781 struct iommu_group *group = dev->iommu_group; 782 783 if (group) 784 __iommu_group_remove_device(dev); 785 786 /* Free any fwspec if no iommu_driver was ever attached */ 787 if (dev->iommu) 788 dev_iommu_free(dev); 789 } 790 791 static int __init iommu_set_def_domain_type(char *str) 792 { 793 bool pt; 794 int ret; 795 796 ret = kstrtobool(str, &pt); 797 if (ret) 798 return ret; 799 800 if (pt) 801 iommu_set_default_passthrough(true); 802 else 803 iommu_set_default_translated(true); 804 805 return 0; 806 } 807 early_param("iommu.passthrough", iommu_set_def_domain_type); 808 809 static int __init iommu_dma_setup(char *str) 810 { 811 int ret = kstrtobool(str, &iommu_dma_strict); 812 813 if (!ret) 814 iommu_cmd_line |= IOMMU_CMD_LINE_STRICT; 815 return ret; 816 } 817 early_param("iommu.strict", iommu_dma_setup); 818 819 void iommu_set_dma_strict(void) 820 { 821 iommu_dma_strict = true; 822 if (iommu_def_domain_type == IOMMU_DOMAIN_DMA_FQ) 823 iommu_def_domain_type = IOMMU_DOMAIN_DMA; 824 } 825 826 static ssize_t iommu_group_attr_show(struct kobject *kobj, 827 struct attribute *__attr, char *buf) 828 { 829 struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); 830 struct iommu_group *group = to_iommu_group(kobj); 831 ssize_t ret = -EIO; 832 833 if (attr->show) 834 ret = attr->show(group, buf); 835 return ret; 836 } 837 838 static ssize_t iommu_group_attr_store(struct kobject *kobj, 839 struct attribute *__attr, 840 const char *buf, size_t count) 841 { 842 struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); 843 struct iommu_group *group = to_iommu_group(kobj); 844 ssize_t ret = -EIO; 845 846 if (attr->store) 847 ret = attr->store(group, buf, count); 848 return ret; 849 } 850 851 static const struct sysfs_ops iommu_group_sysfs_ops = { 852 .show = iommu_group_attr_show, 853 .store = iommu_group_attr_store, 854 }; 855 856 static int iommu_group_create_file(struct iommu_group *group, 857 struct iommu_group_attribute *attr) 858 { 859 return sysfs_create_file(&group->kobj, &attr->attr); 860 } 861 862 static void iommu_group_remove_file(struct iommu_group *group, 863 struct iommu_group_attribute *attr) 864 { 865 sysfs_remove_file(&group->kobj, &attr->attr); 866 } 867 868 static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf) 869 { 870 return sysfs_emit(buf, "%s\n", group->name); 871 } 872 873 /** 874 * iommu_insert_resv_region - Insert a new region in the 875 * list of reserved regions. 876 * @new: new region to insert 877 * @regions: list of regions 878 * 879 * Elements are sorted by start address and overlapping segments 880 * of the same type are merged. 881 */ 882 static int iommu_insert_resv_region(struct iommu_resv_region *new, 883 struct list_head *regions) 884 { 885 struct iommu_resv_region *iter, *tmp, *nr, *top; 886 LIST_HEAD(stack); 887 888 nr = iommu_alloc_resv_region(new->start, new->length, 889 new->prot, new->type, GFP_KERNEL); 890 if (!nr) 891 return -ENOMEM; 892 893 /* First add the new element based on start address sorting */ 894 list_for_each_entry(iter, regions, list) { 895 if (nr->start < iter->start || 896 (nr->start == iter->start && nr->type <= iter->type)) 897 break; 898 } 899 list_add_tail(&nr->list, &iter->list); 900 901 /* Merge overlapping segments of type nr->type in @regions, if any */ 902 list_for_each_entry_safe(iter, tmp, regions, list) { 903 phys_addr_t top_end, iter_end = iter->start + iter->length - 1; 904 905 /* no merge needed on elements of different types than @new */ 906 if (iter->type != new->type) { 907 list_move_tail(&iter->list, &stack); 908 continue; 909 } 910 911 /* look for the last stack element of same type as @iter */ 912 list_for_each_entry_reverse(top, &stack, list) 913 if (top->type == iter->type) 914 goto check_overlap; 915 916 list_move_tail(&iter->list, &stack); 917 continue; 918 919 check_overlap: 920 top_end = top->start + top->length - 1; 921 922 if (iter->start > top_end + 1) { 923 list_move_tail(&iter->list, &stack); 924 } else { 925 top->length = max(top_end, iter_end) - top->start + 1; 926 list_del(&iter->list); 927 kfree(iter); 928 } 929 } 930 list_splice(&stack, regions); 931 return 0; 932 } 933 934 static int 935 iommu_insert_device_resv_regions(struct list_head *dev_resv_regions, 936 struct list_head *group_resv_regions) 937 { 938 struct iommu_resv_region *entry; 939 int ret = 0; 940 941 list_for_each_entry(entry, dev_resv_regions, list) { 942 ret = iommu_insert_resv_region(entry, group_resv_regions); 943 if (ret) 944 break; 945 } 946 return ret; 947 } 948 949 int iommu_get_group_resv_regions(struct iommu_group *group, 950 struct list_head *head) 951 { 952 struct group_device *device; 953 int ret = 0; 954 955 mutex_lock(&group->mutex); 956 for_each_group_device(group, device) { 957 struct list_head dev_resv_regions; 958 959 /* 960 * Non-API groups still expose reserved_regions in sysfs, 961 * so filter out calls that get here that way. 962 */ 963 if (!dev_has_iommu(device->dev)) 964 break; 965 966 INIT_LIST_HEAD(&dev_resv_regions); 967 iommu_get_resv_regions(device->dev, &dev_resv_regions); 968 ret = iommu_insert_device_resv_regions(&dev_resv_regions, head); 969 iommu_put_resv_regions(device->dev, &dev_resv_regions); 970 if (ret) 971 break; 972 } 973 mutex_unlock(&group->mutex); 974 return ret; 975 } 976 EXPORT_SYMBOL_GPL(iommu_get_group_resv_regions); 977 978 static ssize_t iommu_group_show_resv_regions(struct iommu_group *group, 979 char *buf) 980 { 981 struct iommu_resv_region *region, *next; 982 struct list_head group_resv_regions; 983 int offset = 0; 984 985 INIT_LIST_HEAD(&group_resv_regions); 986 iommu_get_group_resv_regions(group, &group_resv_regions); 987 988 list_for_each_entry_safe(region, next, &group_resv_regions, list) { 989 offset += sysfs_emit_at(buf, offset, "0x%016llx 0x%016llx %s\n", 990 (long long)region->start, 991 (long long)(region->start + 992 region->length - 1), 993 iommu_group_resv_type_string[region->type]); 994 kfree(region); 995 } 996 997 return offset; 998 } 999 1000 static ssize_t iommu_group_show_type(struct iommu_group *group, 1001 char *buf) 1002 { 1003 char *type = "unknown"; 1004 1005 mutex_lock(&group->mutex); 1006 if (group->default_domain) { 1007 switch (group->default_domain->type) { 1008 case IOMMU_DOMAIN_BLOCKED: 1009 type = "blocked"; 1010 break; 1011 case IOMMU_DOMAIN_IDENTITY: 1012 type = "identity"; 1013 break; 1014 case IOMMU_DOMAIN_UNMANAGED: 1015 type = "unmanaged"; 1016 break; 1017 case IOMMU_DOMAIN_DMA: 1018 type = "DMA"; 1019 break; 1020 case IOMMU_DOMAIN_DMA_FQ: 1021 type = "DMA-FQ"; 1022 break; 1023 } 1024 } 1025 mutex_unlock(&group->mutex); 1026 1027 return sysfs_emit(buf, "%s\n", type); 1028 } 1029 1030 static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL); 1031 1032 static IOMMU_GROUP_ATTR(reserved_regions, 0444, 1033 iommu_group_show_resv_regions, NULL); 1034 1035 static IOMMU_GROUP_ATTR(type, 0644, iommu_group_show_type, 1036 iommu_group_store_type); 1037 1038 static void iommu_group_release(struct kobject *kobj) 1039 { 1040 struct iommu_group *group = to_iommu_group(kobj); 1041 1042 pr_debug("Releasing group %d\n", group->id); 1043 1044 if (group->iommu_data_release) 1045 group->iommu_data_release(group->iommu_data); 1046 1047 ida_free(&iommu_group_ida, group->id); 1048 1049 /* Domains are free'd by iommu_deinit_device() */ 1050 WARN_ON(group->default_domain); 1051 WARN_ON(group->blocking_domain); 1052 1053 kfree(group->name); 1054 kfree(group); 1055 } 1056 1057 static const struct kobj_type iommu_group_ktype = { 1058 .sysfs_ops = &iommu_group_sysfs_ops, 1059 .release = iommu_group_release, 1060 }; 1061 1062 /** 1063 * iommu_group_alloc - Allocate a new group 1064 * 1065 * This function is called by an iommu driver to allocate a new iommu 1066 * group. The iommu group represents the minimum granularity of the iommu. 1067 * Upon successful return, the caller holds a reference to the supplied 1068 * group in order to hold the group until devices are added. Use 1069 * iommu_group_put() to release this extra reference count, allowing the 1070 * group to be automatically reclaimed once it has no devices or external 1071 * references. 1072 */ 1073 struct iommu_group *iommu_group_alloc(void) 1074 { 1075 struct iommu_group *group; 1076 int ret; 1077 1078 group = kzalloc_obj(*group); 1079 if (!group) 1080 return ERR_PTR(-ENOMEM); 1081 1082 group->kobj.kset = iommu_group_kset; 1083 mutex_init(&group->mutex); 1084 INIT_LIST_HEAD(&group->devices); 1085 INIT_LIST_HEAD(&group->entry); 1086 xa_init(&group->pasid_array); 1087 1088 ret = ida_alloc(&iommu_group_ida, GFP_KERNEL); 1089 if (ret < 0) { 1090 kfree(group); 1091 return ERR_PTR(ret); 1092 } 1093 group->id = ret; 1094 1095 ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype, 1096 NULL, "%d", group->id); 1097 if (ret) { 1098 kobject_put(&group->kobj); 1099 return ERR_PTR(ret); 1100 } 1101 1102 group->devices_kobj = kobject_create_and_add("devices", &group->kobj); 1103 if (!group->devices_kobj) { 1104 kobject_put(&group->kobj); /* triggers .release & free */ 1105 return ERR_PTR(-ENOMEM); 1106 } 1107 1108 /* 1109 * The devices_kobj holds a reference on the group kobject, so 1110 * as long as that exists so will the group. We can therefore 1111 * use the devices_kobj for reference counting. 1112 */ 1113 kobject_put(&group->kobj); 1114 1115 ret = iommu_group_create_file(group, 1116 &iommu_group_attr_reserved_regions); 1117 if (ret) { 1118 kobject_put(group->devices_kobj); 1119 return ERR_PTR(ret); 1120 } 1121 1122 ret = iommu_group_create_file(group, &iommu_group_attr_type); 1123 if (ret) { 1124 kobject_put(group->devices_kobj); 1125 return ERR_PTR(ret); 1126 } 1127 1128 pr_debug("Allocated group %d\n", group->id); 1129 1130 return group; 1131 } 1132 EXPORT_SYMBOL_GPL(iommu_group_alloc); 1133 1134 /** 1135 * iommu_group_get_iommudata - retrieve iommu_data registered for a group 1136 * @group: the group 1137 * 1138 * iommu drivers can store data in the group for use when doing iommu 1139 * operations. This function provides a way to retrieve it. Caller 1140 * should hold a group reference. 1141 */ 1142 void *iommu_group_get_iommudata(struct iommu_group *group) 1143 { 1144 return group->iommu_data; 1145 } 1146 EXPORT_SYMBOL_GPL(iommu_group_get_iommudata); 1147 1148 /** 1149 * iommu_group_set_iommudata - set iommu_data for a group 1150 * @group: the group 1151 * @iommu_data: new data 1152 * @release: release function for iommu_data 1153 * 1154 * iommu drivers can store data in the group for use when doing iommu 1155 * operations. This function provides a way to set the data after 1156 * the group has been allocated. Caller should hold a group reference. 1157 */ 1158 void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data, 1159 void (*release)(void *iommu_data)) 1160 { 1161 group->iommu_data = iommu_data; 1162 group->iommu_data_release = release; 1163 } 1164 EXPORT_SYMBOL_GPL(iommu_group_set_iommudata); 1165 1166 /** 1167 * iommu_group_set_name - set name for a group 1168 * @group: the group 1169 * @name: name 1170 * 1171 * Allow iommu driver to set a name for a group. When set it will 1172 * appear in a name attribute file under the group in sysfs. 1173 */ 1174 int iommu_group_set_name(struct iommu_group *group, const char *name) 1175 { 1176 int ret; 1177 1178 if (group->name) { 1179 iommu_group_remove_file(group, &iommu_group_attr_name); 1180 kfree(group->name); 1181 group->name = NULL; 1182 if (!name) 1183 return 0; 1184 } 1185 1186 group->name = kstrdup(name, GFP_KERNEL); 1187 if (!group->name) 1188 return -ENOMEM; 1189 1190 ret = iommu_group_create_file(group, &iommu_group_attr_name); 1191 if (ret) { 1192 kfree(group->name); 1193 group->name = NULL; 1194 return ret; 1195 } 1196 1197 return 0; 1198 } 1199 EXPORT_SYMBOL_GPL(iommu_group_set_name); 1200 1201 static int iommu_create_device_direct_mappings(struct iommu_domain *domain, 1202 struct device *dev) 1203 { 1204 struct iommu_resv_region *entry; 1205 LIST_HEAD(mappings); 1206 unsigned long pg_size; 1207 int ret = 0; 1208 1209 pg_size = domain->pgsize_bitmap ? 1UL << __ffs(domain->pgsize_bitmap) : 0; 1210 1211 if (WARN_ON_ONCE(iommu_is_dma_domain(domain) && !pg_size)) 1212 return -EINVAL; 1213 1214 iommu_get_resv_regions(dev, &mappings); 1215 1216 /* We need to consider overlapping regions for different devices */ 1217 list_for_each_entry(entry, &mappings, list) { 1218 dma_addr_t start, end, addr; 1219 size_t map_size = 0; 1220 1221 if (entry->type == IOMMU_RESV_DIRECT) 1222 dev->iommu->require_direct = 1; 1223 1224 if ((entry->type != IOMMU_RESV_DIRECT && 1225 entry->type != IOMMU_RESV_DIRECT_RELAXABLE) || 1226 !iommu_is_dma_domain(domain)) 1227 continue; 1228 1229 start = ALIGN(entry->start, pg_size); 1230 end = ALIGN(entry->start + entry->length, pg_size); 1231 1232 for (addr = start; addr <= end; addr += pg_size) { 1233 phys_addr_t phys_addr; 1234 1235 if (addr == end) 1236 goto map_end; 1237 1238 /* 1239 * Return address by iommu_iova_to_phys for 0 is 1240 * ambiguous. Offset to address 1 if addr is 0. 1241 */ 1242 phys_addr = iommu_iova_to_phys(domain, addr ? addr : 1); 1243 if (!phys_addr) { 1244 map_size += pg_size; 1245 continue; 1246 } 1247 1248 map_end: 1249 if (map_size) { 1250 ret = iommu_map(domain, addr - map_size, 1251 addr - map_size, map_size, 1252 entry->prot, GFP_KERNEL); 1253 if (ret) 1254 goto out; 1255 map_size = 0; 1256 } 1257 } 1258 1259 } 1260 out: 1261 iommu_put_resv_regions(dev, &mappings); 1262 1263 return ret; 1264 } 1265 1266 /* This is undone by __iommu_group_free_device() */ 1267 static struct group_device *iommu_group_alloc_device(struct iommu_group *group, 1268 struct device *dev) 1269 { 1270 int ret, i = 0; 1271 struct group_device *device; 1272 1273 device = kzalloc_obj(*device); 1274 if (!device) 1275 return ERR_PTR(-ENOMEM); 1276 1277 device->dev = dev; 1278 1279 ret = sysfs_create_link(&dev->kobj, &group->kobj, "iommu_group"); 1280 if (ret) 1281 goto err_free_device; 1282 1283 device->name = kasprintf(GFP_KERNEL, "%s", kobject_name(&dev->kobj)); 1284 rename: 1285 if (!device->name) { 1286 ret = -ENOMEM; 1287 goto err_remove_link; 1288 } 1289 1290 ret = sysfs_create_link_nowarn(group->devices_kobj, 1291 &dev->kobj, device->name); 1292 if (ret) { 1293 if (ret == -EEXIST && i >= 0) { 1294 /* 1295 * Account for the slim chance of collision 1296 * and append an instance to the name. 1297 */ 1298 kfree(device->name); 1299 device->name = kasprintf(GFP_KERNEL, "%s.%d", 1300 kobject_name(&dev->kobj), i++); 1301 goto rename; 1302 } 1303 goto err_free_name; 1304 } 1305 1306 trace_add_device_to_group(group->id, dev); 1307 1308 dev_info(dev, "Adding to iommu group %d\n", group->id); 1309 1310 return device; 1311 1312 err_free_name: 1313 kfree(device->name); 1314 err_remove_link: 1315 sysfs_remove_link(&dev->kobj, "iommu_group"); 1316 err_free_device: 1317 kfree(device); 1318 dev_err(dev, "Failed to add to iommu group %d: %d\n", group->id, ret); 1319 return ERR_PTR(ret); 1320 } 1321 1322 /** 1323 * iommu_group_add_device - add a device to an iommu group 1324 * @group: the group into which to add the device (reference should be held) 1325 * @dev: the device 1326 * 1327 * This function is called by an iommu driver to add a device into a 1328 * group. Adding a device increments the group reference count. 1329 */ 1330 int iommu_group_add_device(struct iommu_group *group, struct device *dev) 1331 { 1332 struct group_device *gdev; 1333 1334 gdev = iommu_group_alloc_device(group, dev); 1335 if (IS_ERR(gdev)) 1336 return PTR_ERR(gdev); 1337 1338 iommu_group_ref_get(group); 1339 dev->iommu_group = group; 1340 1341 mutex_lock(&group->mutex); 1342 list_add_tail(&gdev->list, &group->devices); 1343 mutex_unlock(&group->mutex); 1344 return 0; 1345 } 1346 EXPORT_SYMBOL_GPL(iommu_group_add_device); 1347 1348 /** 1349 * iommu_group_remove_device - remove a device from it's current group 1350 * @dev: device to be removed 1351 * 1352 * This function is called by an iommu driver to remove the device from 1353 * it's current group. This decrements the iommu group reference count. 1354 */ 1355 void iommu_group_remove_device(struct device *dev) 1356 { 1357 struct iommu_group *group = dev->iommu_group; 1358 1359 if (!group) 1360 return; 1361 1362 dev_info(dev, "Removing from iommu group %d\n", group->id); 1363 1364 __iommu_group_remove_device(dev); 1365 } 1366 EXPORT_SYMBOL_GPL(iommu_group_remove_device); 1367 1368 #if IS_ENABLED(CONFIG_LOCKDEP) && IS_ENABLED(CONFIG_IOMMU_API) 1369 /** 1370 * iommu_group_mutex_assert - Check device group mutex lock 1371 * @dev: the device that has group param set 1372 * 1373 * This function is called by an iommu driver to check whether it holds 1374 * group mutex lock for the given device or not. 1375 * 1376 * Note that this function must be called after device group param is set. 1377 */ 1378 void iommu_group_mutex_assert(struct device *dev) 1379 { 1380 struct iommu_group *group = dev->iommu_group; 1381 1382 lockdep_assert_held(&group->mutex); 1383 } 1384 EXPORT_SYMBOL_GPL(iommu_group_mutex_assert); 1385 #endif 1386 1387 static struct device *iommu_group_first_dev(struct iommu_group *group) 1388 { 1389 lockdep_assert_held(&group->mutex); 1390 return list_first_entry(&group->devices, struct group_device, list)->dev; 1391 } 1392 1393 /** 1394 * iommu_group_for_each_dev - iterate over each device in the group 1395 * @group: the group 1396 * @data: caller opaque data to be passed to callback function 1397 * @fn: caller supplied callback function 1398 * 1399 * This function is called by group users to iterate over group devices. 1400 * Callers should hold a reference count to the group during callback. 1401 * The group->mutex is held across callbacks, which will block calls to 1402 * iommu_group_add/remove_device. 1403 */ 1404 int iommu_group_for_each_dev(struct iommu_group *group, void *data, 1405 int (*fn)(struct device *, void *)) 1406 { 1407 struct group_device *device; 1408 int ret = 0; 1409 1410 mutex_lock(&group->mutex); 1411 for_each_group_device(group, device) { 1412 ret = fn(device->dev, data); 1413 if (ret) 1414 break; 1415 } 1416 mutex_unlock(&group->mutex); 1417 1418 return ret; 1419 } 1420 EXPORT_SYMBOL_GPL(iommu_group_for_each_dev); 1421 1422 /** 1423 * iommu_group_get - Return the group for a device and increment reference 1424 * @dev: get the group that this device belongs to 1425 * 1426 * This function is called by iommu drivers and users to get the group 1427 * for the specified device. If found, the group is returned and the group 1428 * reference in incremented, else NULL. 1429 */ 1430 struct iommu_group *iommu_group_get(struct device *dev) 1431 { 1432 struct iommu_group *group = dev->iommu_group; 1433 1434 if (group) 1435 kobject_get(group->devices_kobj); 1436 1437 return group; 1438 } 1439 EXPORT_SYMBOL_GPL(iommu_group_get); 1440 1441 /** 1442 * iommu_group_ref_get - Increment reference on a group 1443 * @group: the group to use, must not be NULL 1444 * 1445 * This function is called by iommu drivers to take additional references on an 1446 * existing group. Returns the given group for convenience. 1447 */ 1448 struct iommu_group *iommu_group_ref_get(struct iommu_group *group) 1449 { 1450 kobject_get(group->devices_kobj); 1451 return group; 1452 } 1453 EXPORT_SYMBOL_GPL(iommu_group_ref_get); 1454 1455 /** 1456 * iommu_group_put - Decrement group reference 1457 * @group: the group to use 1458 * 1459 * This function is called by iommu drivers and users to release the 1460 * iommu group. Once the reference count is zero, the group is released. 1461 */ 1462 void iommu_group_put(struct iommu_group *group) 1463 { 1464 if (group) 1465 kobject_put(group->devices_kobj); 1466 } 1467 EXPORT_SYMBOL_GPL(iommu_group_put); 1468 1469 /** 1470 * iommu_group_id - Return ID for a group 1471 * @group: the group to ID 1472 * 1473 * Return the unique ID for the group matching the sysfs group number. 1474 */ 1475 int iommu_group_id(struct iommu_group *group) 1476 { 1477 return group->id; 1478 } 1479 EXPORT_SYMBOL_GPL(iommu_group_id); 1480 1481 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, 1482 unsigned long *devfns); 1483 1484 /* 1485 * To consider a PCI device isolated, we require ACS to support Source 1486 * Validation, Request Redirection, Completer Redirection, and Upstream 1487 * Forwarding. This effectively means that devices cannot spoof their 1488 * requester ID, requests and completions cannot be redirected, and all 1489 * transactions are forwarded upstream, even as it passes through a 1490 * bridge where the target device is downstream. 1491 */ 1492 #define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) 1493 1494 /* 1495 * For multifunction devices which are not isolated from each other, find 1496 * all the other non-isolated functions and look for existing groups. For 1497 * each function, we also need to look for aliases to or from other devices 1498 * that may already have a group. 1499 */ 1500 static struct iommu_group *get_pci_function_alias_group(struct pci_dev *pdev, 1501 unsigned long *devfns) 1502 { 1503 struct pci_dev *tmp = NULL; 1504 struct iommu_group *group; 1505 1506 if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS)) 1507 return NULL; 1508 1509 for_each_pci_dev(tmp) { 1510 if (tmp == pdev || tmp->bus != pdev->bus || 1511 PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) || 1512 pci_acs_enabled(tmp, REQ_ACS_FLAGS)) 1513 continue; 1514 1515 group = get_pci_alias_group(tmp, devfns); 1516 if (group) { 1517 pci_dev_put(tmp); 1518 return group; 1519 } 1520 } 1521 1522 return NULL; 1523 } 1524 1525 /* 1526 * Look for aliases to or from the given device for existing groups. DMA 1527 * aliases are only supported on the same bus, therefore the search 1528 * space is quite small (especially since we're really only looking at pcie 1529 * device, and therefore only expect multiple slots on the root complex or 1530 * downstream switch ports). It's conceivable though that a pair of 1531 * multifunction devices could have aliases between them that would cause a 1532 * loop. To prevent this, we use a bitmap to track where we've been. 1533 */ 1534 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, 1535 unsigned long *devfns) 1536 { 1537 struct pci_dev *tmp = NULL; 1538 struct iommu_group *group; 1539 1540 if (test_and_set_bit(pdev->devfn & 0xff, devfns)) 1541 return NULL; 1542 1543 group = iommu_group_get(&pdev->dev); 1544 if (group) 1545 return group; 1546 1547 for_each_pci_dev(tmp) { 1548 if (tmp == pdev || tmp->bus != pdev->bus) 1549 continue; 1550 1551 /* We alias them or they alias us */ 1552 if (pci_devs_are_dma_aliases(pdev, tmp)) { 1553 group = get_pci_alias_group(tmp, devfns); 1554 if (group) { 1555 pci_dev_put(tmp); 1556 return group; 1557 } 1558 1559 group = get_pci_function_alias_group(tmp, devfns); 1560 if (group) { 1561 pci_dev_put(tmp); 1562 return group; 1563 } 1564 } 1565 } 1566 1567 return NULL; 1568 } 1569 1570 struct group_for_pci_data { 1571 struct pci_dev *pdev; 1572 struct iommu_group *group; 1573 }; 1574 1575 /* 1576 * DMA alias iterator callback, return the last seen device. Stop and return 1577 * the IOMMU group if we find one along the way. 1578 */ 1579 static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque) 1580 { 1581 struct group_for_pci_data *data = opaque; 1582 1583 data->pdev = pdev; 1584 data->group = iommu_group_get(&pdev->dev); 1585 1586 return data->group != NULL; 1587 } 1588 1589 /* 1590 * Generic device_group call-back function. It just allocates one 1591 * iommu-group per device. 1592 */ 1593 struct iommu_group *generic_device_group(struct device *dev) 1594 { 1595 return iommu_group_alloc(); 1596 } 1597 EXPORT_SYMBOL_GPL(generic_device_group); 1598 1599 /* 1600 * Generic device_group call-back function. It just allocates one 1601 * iommu-group per iommu driver instance shared by every device 1602 * probed by that iommu driver. 1603 */ 1604 struct iommu_group *generic_single_device_group(struct device *dev) 1605 { 1606 struct iommu_device *iommu = dev->iommu->iommu_dev; 1607 1608 if (!iommu->singleton_group) { 1609 struct iommu_group *group; 1610 1611 group = iommu_group_alloc(); 1612 if (IS_ERR(group)) 1613 return group; 1614 iommu->singleton_group = group; 1615 } 1616 return iommu_group_ref_get(iommu->singleton_group); 1617 } 1618 EXPORT_SYMBOL_GPL(generic_single_device_group); 1619 1620 /* 1621 * Use standard PCI bus topology, isolation features, and DMA alias quirks 1622 * to find or create an IOMMU group for a device. 1623 */ 1624 struct iommu_group *pci_device_group(struct device *dev) 1625 { 1626 struct pci_dev *pdev = to_pci_dev(dev); 1627 struct group_for_pci_data data; 1628 struct pci_bus *bus; 1629 struct iommu_group *group = NULL; 1630 u64 devfns[4] = { 0 }; 1631 1632 if (WARN_ON(!dev_is_pci(dev))) 1633 return ERR_PTR(-EINVAL); 1634 1635 /* 1636 * Find the upstream DMA alias for the device. A device must not 1637 * be aliased due to topology in order to have its own IOMMU group. 1638 * If we find an alias along the way that already belongs to a 1639 * group, use it. 1640 */ 1641 if (pci_for_each_dma_alias(pdev, get_pci_alias_or_group, &data)) 1642 return data.group; 1643 1644 pdev = data.pdev; 1645 1646 /* 1647 * Continue upstream from the point of minimum IOMMU granularity 1648 * due to aliases to the point where devices are protected from 1649 * peer-to-peer DMA by PCI ACS. Again, if we find an existing 1650 * group, use it. 1651 */ 1652 for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) { 1653 if (!bus->self) 1654 continue; 1655 1656 if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) 1657 break; 1658 1659 pdev = bus->self; 1660 1661 group = iommu_group_get(&pdev->dev); 1662 if (group) 1663 return group; 1664 } 1665 1666 /* 1667 * Look for existing groups on device aliases. If we alias another 1668 * device or another device aliases us, use the same group. 1669 */ 1670 group = get_pci_alias_group(pdev, (unsigned long *)devfns); 1671 if (group) 1672 return group; 1673 1674 /* 1675 * Look for existing groups on non-isolated functions on the same 1676 * slot and aliases of those funcions, if any. No need to clear 1677 * the search bitmap, the tested devfns are still valid. 1678 */ 1679 group = get_pci_function_alias_group(pdev, (unsigned long *)devfns); 1680 if (group) 1681 return group; 1682 1683 /* No shared group found, allocate new */ 1684 return iommu_group_alloc(); 1685 } 1686 EXPORT_SYMBOL_GPL(pci_device_group); 1687 1688 /* Get the IOMMU group for device on fsl-mc bus */ 1689 struct iommu_group *fsl_mc_device_group(struct device *dev) 1690 { 1691 struct device *cont_dev = fsl_mc_cont_dev(dev); 1692 struct iommu_group *group; 1693 1694 group = iommu_group_get(cont_dev); 1695 if (!group) 1696 group = iommu_group_alloc(); 1697 return group; 1698 } 1699 EXPORT_SYMBOL_GPL(fsl_mc_device_group); 1700 1701 static struct iommu_domain *__iommu_alloc_identity_domain(struct device *dev) 1702 { 1703 const struct iommu_ops *ops = dev_iommu_ops(dev); 1704 struct iommu_domain *domain; 1705 1706 if (ops->identity_domain) 1707 return ops->identity_domain; 1708 1709 if (ops->domain_alloc_identity) { 1710 domain = ops->domain_alloc_identity(dev); 1711 if (IS_ERR(domain)) 1712 return domain; 1713 } else { 1714 return ERR_PTR(-EOPNOTSUPP); 1715 } 1716 1717 iommu_domain_init(domain, IOMMU_DOMAIN_IDENTITY, ops); 1718 return domain; 1719 } 1720 1721 static struct iommu_domain * 1722 __iommu_group_alloc_default_domain(struct iommu_group *group, int req_type) 1723 { 1724 struct device *dev = iommu_group_first_dev(group); 1725 struct iommu_domain *dom; 1726 1727 if (group->default_domain && group->default_domain->type == req_type) 1728 return group->default_domain; 1729 1730 /* 1731 * When allocating the DMA API domain assume that the driver is going to 1732 * use PASID and make sure the RID's domain is PASID compatible. 1733 */ 1734 if (req_type & __IOMMU_DOMAIN_PAGING) { 1735 dom = __iommu_paging_domain_alloc_flags(dev, req_type, 1736 dev->iommu->max_pasids ? IOMMU_HWPT_ALLOC_PASID : 0); 1737 1738 /* 1739 * If driver does not support PASID feature then 1740 * try to allocate non-PASID domain 1741 */ 1742 if (PTR_ERR(dom) == -EOPNOTSUPP) 1743 dom = __iommu_paging_domain_alloc_flags(dev, req_type, 0); 1744 1745 return dom; 1746 } 1747 1748 if (req_type == IOMMU_DOMAIN_IDENTITY) 1749 return __iommu_alloc_identity_domain(dev); 1750 1751 return ERR_PTR(-EINVAL); 1752 } 1753 1754 /* 1755 * req_type of 0 means "auto" which means to select a domain based on 1756 * iommu_def_domain_type or what the driver actually supports. 1757 */ 1758 static struct iommu_domain * 1759 iommu_group_alloc_default_domain(struct iommu_group *group, int req_type) 1760 { 1761 const struct iommu_ops *ops = dev_iommu_ops(iommu_group_first_dev(group)); 1762 struct iommu_domain *dom; 1763 1764 lockdep_assert_held(&group->mutex); 1765 1766 /* 1767 * Allow legacy drivers to specify the domain that will be the default 1768 * domain. This should always be either an IDENTITY/BLOCKED/PLATFORM 1769 * domain. Do not use in new drivers. 1770 */ 1771 if (ops->default_domain) { 1772 if (req_type != ops->default_domain->type) 1773 return ERR_PTR(-EINVAL); 1774 return ops->default_domain; 1775 } 1776 1777 if (req_type) 1778 return __iommu_group_alloc_default_domain(group, req_type); 1779 1780 /* The driver gave no guidance on what type to use, try the default */ 1781 dom = __iommu_group_alloc_default_domain(group, iommu_def_domain_type); 1782 if (!IS_ERR(dom)) 1783 return dom; 1784 1785 /* Otherwise IDENTITY and DMA_FQ defaults will try DMA */ 1786 if (iommu_def_domain_type == IOMMU_DOMAIN_DMA) 1787 return ERR_PTR(-EINVAL); 1788 dom = __iommu_group_alloc_default_domain(group, IOMMU_DOMAIN_DMA); 1789 if (IS_ERR(dom)) 1790 return dom; 1791 1792 pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA", 1793 iommu_def_domain_type, group->name); 1794 return dom; 1795 } 1796 1797 struct iommu_domain *iommu_group_default_domain(struct iommu_group *group) 1798 { 1799 return group->default_domain; 1800 } 1801 1802 static int probe_iommu_group(struct device *dev, void *data) 1803 { 1804 struct list_head *group_list = data; 1805 int ret; 1806 1807 mutex_lock(&iommu_probe_device_lock); 1808 ret = __iommu_probe_device(dev, group_list); 1809 mutex_unlock(&iommu_probe_device_lock); 1810 if (ret == -ENODEV) 1811 ret = 0; 1812 1813 return ret; 1814 } 1815 1816 static int iommu_bus_notifier(struct notifier_block *nb, 1817 unsigned long action, void *data) 1818 { 1819 struct device *dev = data; 1820 1821 if (action == BUS_NOTIFY_ADD_DEVICE) { 1822 int ret; 1823 1824 ret = iommu_probe_device(dev); 1825 return (ret) ? NOTIFY_DONE : NOTIFY_OK; 1826 } else if (action == BUS_NOTIFY_REMOVED_DEVICE) { 1827 iommu_release_device(dev); 1828 return NOTIFY_OK; 1829 } 1830 1831 return 0; 1832 } 1833 1834 /* 1835 * Combine the driver's chosen def_domain_type across all the devices in a 1836 * group. Drivers must give a consistent result. 1837 */ 1838 static int iommu_get_def_domain_type(struct iommu_group *group, 1839 struct device *dev, int cur_type) 1840 { 1841 const struct iommu_ops *ops = dev_iommu_ops(dev); 1842 int type; 1843 1844 if (ops->default_domain) { 1845 /* 1846 * Drivers that declare a global static default_domain will 1847 * always choose that. 1848 */ 1849 type = ops->default_domain->type; 1850 } else { 1851 if (ops->def_domain_type) 1852 type = ops->def_domain_type(dev); 1853 else 1854 return cur_type; 1855 } 1856 if (!type || cur_type == type) 1857 return cur_type; 1858 if (!cur_type) 1859 return type; 1860 1861 dev_err_ratelimited( 1862 dev, 1863 "IOMMU driver error, requesting conflicting def_domain_type, %s and %s, for devices in group %u.\n", 1864 iommu_domain_type_str(cur_type), iommu_domain_type_str(type), 1865 group->id); 1866 1867 /* 1868 * Try to recover, drivers are allowed to force IDENTITY or DMA, IDENTITY 1869 * takes precedence. 1870 */ 1871 if (type == IOMMU_DOMAIN_IDENTITY) 1872 return type; 1873 return cur_type; 1874 } 1875 1876 /* 1877 * A target_type of 0 will select the best domain type. 0 can be returned in 1878 * this case meaning the global default should be used. 1879 */ 1880 static int iommu_get_default_domain_type(struct iommu_group *group, 1881 int target_type) 1882 { 1883 struct device *untrusted = NULL; 1884 struct group_device *gdev; 1885 int driver_type = 0; 1886 1887 lockdep_assert_held(&group->mutex); 1888 1889 /* 1890 * ARM32 drivers supporting CONFIG_ARM_DMA_USE_IOMMU can declare an 1891 * identity_domain and it will automatically become their default 1892 * domain. Later on ARM_DMA_USE_IOMMU will install its UNMANAGED domain. 1893 * Override the selection to IDENTITY. 1894 */ 1895 if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) { 1896 static_assert(!(IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) && 1897 IS_ENABLED(CONFIG_IOMMU_DMA))); 1898 driver_type = IOMMU_DOMAIN_IDENTITY; 1899 } 1900 1901 for_each_group_device(group, gdev) { 1902 driver_type = iommu_get_def_domain_type(group, gdev->dev, 1903 driver_type); 1904 1905 if (dev_is_pci(gdev->dev) && to_pci_dev(gdev->dev)->untrusted) { 1906 /* 1907 * No ARM32 using systems will set untrusted, it cannot 1908 * work. 1909 */ 1910 if (WARN_ON(IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU))) 1911 return -1; 1912 untrusted = gdev->dev; 1913 } 1914 } 1915 1916 /* 1917 * If the common dma ops are not selected in kconfig then we cannot use 1918 * IOMMU_DOMAIN_DMA at all. Force IDENTITY if nothing else has been 1919 * selected. 1920 */ 1921 if (!IS_ENABLED(CONFIG_IOMMU_DMA)) { 1922 if (WARN_ON(driver_type == IOMMU_DOMAIN_DMA)) 1923 return -1; 1924 if (!driver_type) 1925 driver_type = IOMMU_DOMAIN_IDENTITY; 1926 } 1927 1928 if (untrusted) { 1929 if (driver_type && driver_type != IOMMU_DOMAIN_DMA) { 1930 dev_err_ratelimited( 1931 untrusted, 1932 "Device is not trusted, but driver is overriding group %u to %s, refusing to probe.\n", 1933 group->id, iommu_domain_type_str(driver_type)); 1934 return -1; 1935 } 1936 driver_type = IOMMU_DOMAIN_DMA; 1937 } 1938 1939 if (target_type) { 1940 if (driver_type && target_type != driver_type) 1941 return -1; 1942 return target_type; 1943 } 1944 return driver_type; 1945 } 1946 1947 static void iommu_group_do_probe_finalize(struct device *dev) 1948 { 1949 const struct iommu_ops *ops = dev_iommu_ops(dev); 1950 1951 if (ops->probe_finalize) 1952 ops->probe_finalize(dev); 1953 } 1954 1955 static int bus_iommu_probe(const struct bus_type *bus) 1956 { 1957 struct iommu_group *group, *next; 1958 LIST_HEAD(group_list); 1959 int ret; 1960 1961 ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group); 1962 if (ret) 1963 return ret; 1964 1965 list_for_each_entry_safe(group, next, &group_list, entry) { 1966 struct group_device *gdev; 1967 1968 mutex_lock(&group->mutex); 1969 1970 /* Remove item from the list */ 1971 list_del_init(&group->entry); 1972 1973 /* 1974 * We go to the trouble of deferred default domain creation so 1975 * that the cross-group default domain type and the setup of the 1976 * IOMMU_RESV_DIRECT will work correctly in non-hotpug scenarios. 1977 */ 1978 ret = iommu_setup_default_domain(group, 0); 1979 if (ret) { 1980 mutex_unlock(&group->mutex); 1981 return ret; 1982 } 1983 for_each_group_device(group, gdev) 1984 iommu_setup_dma_ops(gdev->dev, group->default_domain); 1985 mutex_unlock(&group->mutex); 1986 1987 /* 1988 * FIXME: Mis-locked because the ops->probe_finalize() call-back 1989 * of some IOMMU drivers calls arm_iommu_attach_device() which 1990 * in-turn might call back into IOMMU core code, where it tries 1991 * to take group->mutex, resulting in a deadlock. 1992 */ 1993 for_each_group_device(group, gdev) 1994 iommu_group_do_probe_finalize(gdev->dev); 1995 } 1996 1997 return 0; 1998 } 1999 2000 /** 2001 * device_iommu_capable() - check for a general IOMMU capability 2002 * @dev: device to which the capability would be relevant, if available 2003 * @cap: IOMMU capability 2004 * 2005 * Return: true if an IOMMU is present and supports the given capability 2006 * for the given device, otherwise false. 2007 */ 2008 bool device_iommu_capable(struct device *dev, enum iommu_cap cap) 2009 { 2010 const struct iommu_ops *ops; 2011 2012 if (!dev_has_iommu(dev)) 2013 return false; 2014 2015 ops = dev_iommu_ops(dev); 2016 if (!ops->capable) 2017 return false; 2018 2019 return ops->capable(dev, cap); 2020 } 2021 EXPORT_SYMBOL_GPL(device_iommu_capable); 2022 2023 /** 2024 * iommu_group_has_isolated_msi() - Compute msi_device_has_isolated_msi() 2025 * for a group 2026 * @group: Group to query 2027 * 2028 * IOMMU groups should not have differing values of 2029 * msi_device_has_isolated_msi() for devices in a group. However nothing 2030 * directly prevents this, so ensure mistakes don't result in isolation failures 2031 * by checking that all the devices are the same. 2032 */ 2033 bool iommu_group_has_isolated_msi(struct iommu_group *group) 2034 { 2035 struct group_device *group_dev; 2036 bool ret = true; 2037 2038 mutex_lock(&group->mutex); 2039 for_each_group_device(group, group_dev) 2040 ret &= msi_device_has_isolated_msi(group_dev->dev); 2041 mutex_unlock(&group->mutex); 2042 return ret; 2043 } 2044 EXPORT_SYMBOL_GPL(iommu_group_has_isolated_msi); 2045 2046 /** 2047 * iommu_set_fault_handler() - set a fault handler for an iommu domain 2048 * @domain: iommu domain 2049 * @handler: fault handler 2050 * @token: user data, will be passed back to the fault handler 2051 * 2052 * This function should be used by IOMMU users which want to be notified 2053 * whenever an IOMMU fault happens. 2054 * 2055 * The fault handler itself should return 0 on success, and an appropriate 2056 * error code otherwise. 2057 */ 2058 void iommu_set_fault_handler(struct iommu_domain *domain, 2059 iommu_fault_handler_t handler, 2060 void *token) 2061 { 2062 if (WARN_ON(!domain || domain->cookie_type != IOMMU_COOKIE_NONE)) 2063 return; 2064 2065 domain->cookie_type = IOMMU_COOKIE_FAULT_HANDLER; 2066 domain->handler = handler; 2067 domain->handler_token = token; 2068 } 2069 EXPORT_SYMBOL_GPL(iommu_set_fault_handler); 2070 2071 static void iommu_domain_init(struct iommu_domain *domain, unsigned int type, 2072 const struct iommu_ops *ops) 2073 { 2074 domain->type = type; 2075 domain->owner = ops; 2076 if (!domain->ops) 2077 domain->ops = ops->default_domain_ops; 2078 } 2079 2080 static struct iommu_domain * 2081 __iommu_paging_domain_alloc_flags(struct device *dev, unsigned int type, 2082 unsigned int flags) 2083 { 2084 const struct iommu_ops *ops; 2085 struct iommu_domain *domain; 2086 2087 if (!dev_has_iommu(dev)) 2088 return ERR_PTR(-ENODEV); 2089 2090 ops = dev_iommu_ops(dev); 2091 2092 if (ops->domain_alloc_paging && !flags) 2093 domain = ops->domain_alloc_paging(dev); 2094 else if (ops->domain_alloc_paging_flags) 2095 domain = ops->domain_alloc_paging_flags(dev, flags, NULL); 2096 #if IS_ENABLED(CONFIG_FSL_PAMU) 2097 else if (ops->domain_alloc && !flags) 2098 domain = ops->domain_alloc(IOMMU_DOMAIN_UNMANAGED); 2099 #endif 2100 else 2101 return ERR_PTR(-EOPNOTSUPP); 2102 2103 if (IS_ERR(domain)) 2104 return domain; 2105 if (!domain) 2106 return ERR_PTR(-ENOMEM); 2107 2108 iommu_domain_init(domain, type, ops); 2109 return domain; 2110 } 2111 2112 /** 2113 * iommu_paging_domain_alloc_flags() - Allocate a paging domain 2114 * @dev: device for which the domain is allocated 2115 * @flags: Bitmap of iommufd_hwpt_alloc_flags 2116 * 2117 * Allocate a paging domain which will be managed by a kernel driver. Return 2118 * allocated domain if successful, or an ERR pointer for failure. 2119 */ 2120 struct iommu_domain *iommu_paging_domain_alloc_flags(struct device *dev, 2121 unsigned int flags) 2122 { 2123 return __iommu_paging_domain_alloc_flags(dev, 2124 IOMMU_DOMAIN_UNMANAGED, flags); 2125 } 2126 EXPORT_SYMBOL_GPL(iommu_paging_domain_alloc_flags); 2127 2128 void iommu_domain_free(struct iommu_domain *domain) 2129 { 2130 switch (domain->cookie_type) { 2131 case IOMMU_COOKIE_DMA_IOVA: 2132 iommu_put_dma_cookie(domain); 2133 break; 2134 case IOMMU_COOKIE_DMA_MSI: 2135 iommu_put_msi_cookie(domain); 2136 break; 2137 case IOMMU_COOKIE_SVA: 2138 mmdrop(domain->mm); 2139 break; 2140 default: 2141 break; 2142 } 2143 if (domain->ops->free) 2144 domain->ops->free(domain); 2145 } 2146 EXPORT_SYMBOL_GPL(iommu_domain_free); 2147 2148 /* 2149 * Put the group's domain back to the appropriate core-owned domain - either the 2150 * standard kernel-mode DMA configuration or an all-DMA-blocked domain. 2151 */ 2152 static void __iommu_group_set_core_domain(struct iommu_group *group) 2153 { 2154 struct iommu_domain *new_domain; 2155 2156 if (group->owner) 2157 new_domain = group->blocking_domain; 2158 else 2159 new_domain = group->default_domain; 2160 2161 __iommu_group_set_domain_nofail(group, new_domain); 2162 } 2163 2164 static int __iommu_attach_device(struct iommu_domain *domain, 2165 struct device *dev, struct iommu_domain *old) 2166 { 2167 int ret; 2168 2169 if (unlikely(domain->ops->attach_dev == NULL)) 2170 return -ENODEV; 2171 2172 ret = domain->ops->attach_dev(domain, dev, old); 2173 if (ret) 2174 return ret; 2175 dev->iommu->attach_deferred = 0; 2176 trace_attach_device_to_domain(dev); 2177 return 0; 2178 } 2179 2180 /** 2181 * iommu_attach_device - Attach an IOMMU domain to a device 2182 * @domain: IOMMU domain to attach 2183 * @dev: Device that will be attached 2184 * 2185 * Returns 0 on success and error code on failure 2186 * 2187 * Note that EINVAL can be treated as a soft failure, indicating 2188 * that certain configuration of the domain is incompatible with 2189 * the device. In this case attaching a different domain to the 2190 * device may succeed. 2191 */ 2192 int iommu_attach_device(struct iommu_domain *domain, struct device *dev) 2193 { 2194 /* Caller must be a probed driver on dev */ 2195 struct iommu_group *group = dev->iommu_group; 2196 int ret; 2197 2198 if (!group) 2199 return -ENODEV; 2200 2201 /* 2202 * Lock the group to make sure the device-count doesn't 2203 * change while we are attaching 2204 */ 2205 mutex_lock(&group->mutex); 2206 ret = -EINVAL; 2207 if (list_count_nodes(&group->devices) != 1) 2208 goto out_unlock; 2209 2210 ret = __iommu_attach_group(domain, group); 2211 2212 out_unlock: 2213 mutex_unlock(&group->mutex); 2214 return ret; 2215 } 2216 EXPORT_SYMBOL_GPL(iommu_attach_device); 2217 2218 int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain) 2219 { 2220 struct group_device *gdev; 2221 2222 /* 2223 * This is called on the dma mapping fast path so avoid locking. This is 2224 * racy, but we have an expectation that the driver will setup its DMAs 2225 * inside probe while being single threaded to avoid racing. 2226 */ 2227 if (!dev->iommu || !dev->iommu->attach_deferred) 2228 return 0; 2229 2230 guard(mutex)(&dev->iommu_group->mutex); 2231 2232 gdev = __dev_to_gdev(dev); 2233 if (WARN_ON(!gdev)) 2234 return -ENODEV; 2235 2236 /* 2237 * This is a concurrent attach during device recovery. Reject it until 2238 * pci_dev_reset_iommu_done() attaches the device to group->domain. 2239 * 2240 * Note that this might fail the iommu_dma_map(). But there's nothing 2241 * more we can do here. 2242 */ 2243 if (gdev->blocked) 2244 return -EBUSY; 2245 return __iommu_attach_device(domain, dev, NULL); 2246 } 2247 2248 void iommu_detach_device(struct iommu_domain *domain, struct device *dev) 2249 { 2250 /* Caller must be a probed driver on dev */ 2251 struct iommu_group *group = dev->iommu_group; 2252 2253 if (!group) 2254 return; 2255 2256 mutex_lock(&group->mutex); 2257 if (WARN_ON(domain != group->domain) || 2258 WARN_ON(list_count_nodes(&group->devices) != 1)) 2259 goto out_unlock; 2260 __iommu_group_set_core_domain(group); 2261 2262 out_unlock: 2263 mutex_unlock(&group->mutex); 2264 } 2265 EXPORT_SYMBOL_GPL(iommu_detach_device); 2266 2267 /** 2268 * iommu_get_domain_for_dev() - Return the DMA API domain pointer 2269 * @dev: Device to query 2270 * 2271 * This function can be called within a driver bound to dev. The returned 2272 * pointer is valid for the lifetime of the bound driver. 2273 * 2274 * It should not be called by drivers with driver_managed_dma = true. 2275 */ 2276 struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) 2277 { 2278 /* Caller must be a probed driver on dev */ 2279 struct iommu_group *group = dev->iommu_group; 2280 2281 if (!group) 2282 return NULL; 2283 2284 lockdep_assert_not_held(&group->mutex); 2285 2286 return group->domain; 2287 } 2288 EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev); 2289 2290 /** 2291 * iommu_driver_get_domain_for_dev() - Return the driver-level domain pointer 2292 * @dev: Device to query 2293 * 2294 * This function can be called by an iommu driver that wants to get the physical 2295 * domain within an iommu callback function where group->mutex is held. 2296 */ 2297 struct iommu_domain *iommu_driver_get_domain_for_dev(struct device *dev) 2298 { 2299 struct iommu_group *group = dev->iommu_group; 2300 struct group_device *gdev; 2301 2302 lockdep_assert_held(&group->mutex); 2303 2304 gdev = __dev_to_gdev(dev); 2305 if (WARN_ON(!gdev)) 2306 return NULL; 2307 2308 /* 2309 * Driver handles the low-level __iommu_attach_device(), including the 2310 * one invoked by pci_dev_reset_iommu_done() re-attaching the device to 2311 * the cached group->domain. In this case, the driver must get the old 2312 * domain from group->blocking_domain rather than group->domain. This 2313 * prevents it from re-attaching the device from group->domain (old) to 2314 * group->domain (new). 2315 */ 2316 if (gdev->blocked) 2317 return group->blocking_domain; 2318 2319 return group->domain; 2320 } 2321 EXPORT_SYMBOL_GPL(iommu_driver_get_domain_for_dev); 2322 2323 /* 2324 * For IOMMU_DOMAIN_DMA implementations which already provide their own 2325 * guarantees that the group and its default domain are valid and correct. 2326 */ 2327 struct iommu_domain *iommu_get_dma_domain(struct device *dev) 2328 { 2329 return dev->iommu_group->default_domain; 2330 } 2331 2332 static void *iommu_make_pasid_array_entry(struct iommu_domain *domain, 2333 struct iommu_attach_handle *handle) 2334 { 2335 if (handle) { 2336 handle->domain = domain; 2337 return xa_tag_pointer(handle, IOMMU_PASID_ARRAY_HANDLE); 2338 } 2339 2340 return xa_tag_pointer(domain, IOMMU_PASID_ARRAY_DOMAIN); 2341 } 2342 2343 static bool domain_iommu_ops_compatible(const struct iommu_ops *ops, 2344 struct iommu_domain *domain) 2345 { 2346 if (domain->owner == ops) 2347 return true; 2348 2349 /* For static domains, owner isn't set. */ 2350 if (domain == ops->blocked_domain || domain == ops->identity_domain) 2351 return true; 2352 2353 return false; 2354 } 2355 2356 static int __iommu_attach_group(struct iommu_domain *domain, 2357 struct iommu_group *group) 2358 { 2359 struct device *dev; 2360 2361 if (group->domain && group->domain != group->default_domain && 2362 group->domain != group->blocking_domain) 2363 return -EBUSY; 2364 2365 dev = iommu_group_first_dev(group); 2366 if (!dev_has_iommu(dev) || 2367 !domain_iommu_ops_compatible(dev_iommu_ops(dev), domain)) 2368 return -EINVAL; 2369 2370 return __iommu_group_set_domain(group, domain); 2371 } 2372 2373 /** 2374 * iommu_attach_group - Attach an IOMMU domain to an IOMMU group 2375 * @domain: IOMMU domain to attach 2376 * @group: IOMMU group that will be attached 2377 * 2378 * Returns 0 on success and error code on failure 2379 * 2380 * Note that EINVAL can be treated as a soft failure, indicating 2381 * that certain configuration of the domain is incompatible with 2382 * the group. In this case attaching a different domain to the 2383 * group may succeed. 2384 */ 2385 int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) 2386 { 2387 int ret; 2388 2389 mutex_lock(&group->mutex); 2390 ret = __iommu_attach_group(domain, group); 2391 mutex_unlock(&group->mutex); 2392 2393 return ret; 2394 } 2395 EXPORT_SYMBOL_GPL(iommu_attach_group); 2396 2397 static int __iommu_device_set_domain(struct iommu_group *group, 2398 struct device *dev, 2399 struct iommu_domain *new_domain, 2400 struct iommu_domain *old_domain, 2401 unsigned int flags) 2402 { 2403 int ret; 2404 2405 /* 2406 * If the device requires IOMMU_RESV_DIRECT then we cannot allow 2407 * the blocking domain to be attached as it does not contain the 2408 * required 1:1 mapping. This test effectively excludes the device 2409 * being used with iommu_group_claim_dma_owner() which will block 2410 * vfio and iommufd as well. 2411 */ 2412 if (dev->iommu->require_direct && 2413 (new_domain->type == IOMMU_DOMAIN_BLOCKED || 2414 new_domain == group->blocking_domain)) { 2415 dev_warn(dev, 2416 "Firmware has requested this device have a 1:1 IOMMU mapping, rejecting configuring the device without a 1:1 mapping. Contact your platform vendor.\n"); 2417 return -EINVAL; 2418 } 2419 2420 if (dev->iommu->attach_deferred) { 2421 if (new_domain == group->default_domain) 2422 return 0; 2423 dev->iommu->attach_deferred = 0; 2424 } 2425 2426 ret = __iommu_attach_device(new_domain, dev, old_domain); 2427 if (ret) { 2428 /* 2429 * If we have a blocking domain then try to attach that in hopes 2430 * of avoiding a UAF. Modern drivers should implement blocking 2431 * domains as global statics that cannot fail. 2432 */ 2433 if ((flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) && 2434 group->blocking_domain && 2435 group->blocking_domain != new_domain) 2436 __iommu_attach_device(group->blocking_domain, dev, 2437 old_domain); 2438 return ret; 2439 } 2440 return 0; 2441 } 2442 2443 /* 2444 * If 0 is returned the group's domain is new_domain. If an error is returned 2445 * then the group's domain will be set back to the existing domain unless 2446 * IOMMU_SET_DOMAIN_MUST_SUCCEED, otherwise an error is returned and the group's 2447 * domains is left inconsistent. This is a driver bug to fail attach with a 2448 * previously good domain. We try to avoid a kernel UAF because of this. 2449 * 2450 * IOMMU groups are really the natural working unit of the IOMMU, but the IOMMU 2451 * API works on domains and devices. Bridge that gap by iterating over the 2452 * devices in a group. Ideally we'd have a single device which represents the 2453 * requestor ID of the group, but we also allow IOMMU drivers to create policy 2454 * defined minimum sets, where the physical hardware may be able to distiguish 2455 * members, but we wish to group them at a higher level (ex. untrusted 2456 * multi-function PCI devices). Thus we attach each device. 2457 */ 2458 static int __iommu_group_set_domain_internal(struct iommu_group *group, 2459 struct iommu_domain *new_domain, 2460 unsigned int flags) 2461 { 2462 struct group_device *last_gdev; 2463 struct group_device *gdev; 2464 int result; 2465 int ret; 2466 2467 lockdep_assert_held(&group->mutex); 2468 2469 if (group->domain == new_domain) 2470 return 0; 2471 2472 if (WARN_ON(!new_domain)) 2473 return -EINVAL; 2474 2475 /* 2476 * This is a concurrent attach during device recovery. Reject it until 2477 * pci_dev_reset_iommu_done() attaches the device to group->domain, if 2478 * IOMMU_SET_DOMAIN_MUST_SUCCEED is not set. 2479 */ 2480 if (group->recovery_cnt && !(flags & IOMMU_SET_DOMAIN_MUST_SUCCEED)) 2481 return -EBUSY; 2482 2483 /* 2484 * Changing the domain is done by calling attach_dev() on the new 2485 * domain. This switch does not have to be atomic and DMA can be 2486 * discarded during the transition. DMA must only be able to access 2487 * either new_domain or group->domain, never something else. 2488 */ 2489 result = 0; 2490 for_each_group_device(group, gdev) { 2491 /* 2492 * Device under recovery is attached to group->blocking_domain. 2493 * Don't change that. pci_dev_reset_iommu_done() will re-attach 2494 * its domain to the updated group->domain, after the recovery. 2495 */ 2496 if (gdev->blocked) 2497 continue; 2498 ret = __iommu_device_set_domain(group, gdev->dev, new_domain, 2499 group->domain, flags); 2500 if (ret) { 2501 result = ret; 2502 /* 2503 * Keep trying the other devices in the group. If a 2504 * driver fails attach to an otherwise good domain, and 2505 * does not support blocking domains, it should at least 2506 * drop its reference on the current domain so we don't 2507 * UAF. 2508 */ 2509 if (flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) 2510 continue; 2511 goto err_revert; 2512 } 2513 } 2514 group->domain = new_domain; 2515 return result; 2516 2517 err_revert: 2518 /* 2519 * This is called in error unwind paths. A well behaved driver should 2520 * always allow us to attach to a domain that was already attached. 2521 */ 2522 last_gdev = gdev; 2523 for_each_group_device(group, gdev) { 2524 /* No need to revert the last gdev that failed to set domain */ 2525 if (gdev == last_gdev) 2526 break; 2527 /* 2528 * A NULL domain can happen only for first probe, in which case 2529 * we leave group->domain as NULL and let release clean 2530 * everything up. 2531 */ 2532 if (group->domain) 2533 WARN_ON(__iommu_device_set_domain( 2534 group, gdev->dev, group->domain, new_domain, 2535 IOMMU_SET_DOMAIN_MUST_SUCCEED)); 2536 } 2537 return ret; 2538 } 2539 2540 void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group) 2541 { 2542 mutex_lock(&group->mutex); 2543 __iommu_group_set_core_domain(group); 2544 mutex_unlock(&group->mutex); 2545 } 2546 EXPORT_SYMBOL_GPL(iommu_detach_group); 2547 2548 phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) 2549 { 2550 if (domain->type == IOMMU_DOMAIN_IDENTITY) 2551 return iova; 2552 2553 if (domain->type == IOMMU_DOMAIN_BLOCKED) 2554 return 0; 2555 2556 return domain->ops->iova_to_phys(domain, iova); 2557 } 2558 EXPORT_SYMBOL_GPL(iommu_iova_to_phys); 2559 2560 static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, 2561 phys_addr_t paddr, size_t size, size_t *count) 2562 { 2563 unsigned int pgsize_idx, pgsize_idx_next; 2564 unsigned long pgsizes; 2565 size_t offset, pgsize, pgsize_next; 2566 size_t offset_end; 2567 unsigned long addr_merge = paddr | iova; 2568 2569 /* Page sizes supported by the hardware and small enough for @size */ 2570 pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0); 2571 2572 /* Constrain the page sizes further based on the maximum alignment */ 2573 if (likely(addr_merge)) 2574 pgsizes &= GENMASK(__ffs(addr_merge), 0); 2575 2576 /* Make sure we have at least one suitable page size */ 2577 BUG_ON(!pgsizes); 2578 2579 /* Pick the biggest page size remaining */ 2580 pgsize_idx = __fls(pgsizes); 2581 pgsize = BIT(pgsize_idx); 2582 if (!count) 2583 return pgsize; 2584 2585 /* Find the next biggest support page size, if it exists */ 2586 pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0); 2587 if (!pgsizes) 2588 goto out_set_count; 2589 2590 pgsize_idx_next = __ffs(pgsizes); 2591 pgsize_next = BIT(pgsize_idx_next); 2592 2593 /* 2594 * There's no point trying a bigger page size unless the virtual 2595 * and physical addresses are similarly offset within the larger page. 2596 */ 2597 if ((iova ^ paddr) & (pgsize_next - 1)) 2598 goto out_set_count; 2599 2600 /* Calculate the offset to the next page size alignment boundary */ 2601 offset = pgsize_next - (addr_merge & (pgsize_next - 1)); 2602 2603 /* 2604 * If size is big enough to accommodate the larger page, reduce 2605 * the number of smaller pages. 2606 */ 2607 if (!check_add_overflow(offset, pgsize_next, &offset_end) && 2608 offset_end <= size) 2609 size = offset; 2610 2611 out_set_count: 2612 *count = size >> pgsize_idx; 2613 return pgsize; 2614 } 2615 2616 static int __iommu_map_domain_pgtbl(struct iommu_domain *domain, 2617 unsigned long iova, phys_addr_t paddr, 2618 size_t size, int prot, gfp_t gfp, 2619 size_t *mapped) 2620 { 2621 const struct iommu_domain_ops *ops = domain->ops; 2622 unsigned int min_pagesz; 2623 int ret = 0; 2624 2625 if (WARN_ON(!ops->map_pages)) 2626 return -ENODEV; 2627 2628 /* find out the minimum page size supported */ 2629 min_pagesz = 1 << __ffs(domain->pgsize_bitmap); 2630 2631 /* 2632 * both the virtual address and the physical one, as well as 2633 * the size of the mapping, must be aligned (at least) to the 2634 * size of the smallest page supported by the hardware 2635 */ 2636 if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) { 2637 pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n", 2638 iova, &paddr, size, min_pagesz); 2639 return -EINVAL; 2640 } 2641 2642 pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size); 2643 2644 while (size) { 2645 size_t pgsize, count, op_mapped = 0; 2646 2647 pgsize = iommu_pgsize(domain, iova, paddr, size, &count); 2648 2649 pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n", 2650 iova, &paddr, pgsize, count); 2651 ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot, 2652 gfp, &op_mapped); 2653 /* 2654 * Some pages may have been mapped, even if an error occurred, 2655 * so we should account for those so they can be unmapped. 2656 */ 2657 *mapped += op_mapped; 2658 if (ret) 2659 return ret; 2660 2661 size -= op_mapped; 2662 iova += op_mapped; 2663 paddr += op_mapped; 2664 } 2665 return 0; 2666 } 2667 2668 int iommu_sync_map(struct iommu_domain *domain, unsigned long iova, size_t size) 2669 { 2670 const struct iommu_domain_ops *ops = domain->ops; 2671 2672 if (!ops->iotlb_sync_map) 2673 return 0; 2674 return ops->iotlb_sync_map(domain, iova, size); 2675 } 2676 2677 int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova, 2678 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 2679 { 2680 struct pt_iommu *pt = iommupt_from_domain(domain); 2681 size_t mapped = 0; 2682 int ret; 2683 2684 might_sleep_if(gfpflags_allow_blocking(gfp)); 2685 2686 /* Discourage passing strange GFP flags or illegal domains */ 2687 if (WARN_ON_ONCE(!(domain->type & __IOMMU_DOMAIN_PAGING) || 2688 !domain->pgsize_bitmap || 2689 (gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | 2690 __GFP_HIGHMEM)))) 2691 return -EINVAL; 2692 2693 if (pt) 2694 ret = pt->ops->map_range(pt, iova, paddr, size, prot, gfp, 2695 &mapped); 2696 else 2697 ret = __iommu_map_domain_pgtbl(domain, iova, paddr, size, prot, 2698 gfp, &mapped); 2699 2700 trace_map(iova, paddr, mapped); 2701 iommu_debug_map(domain, paddr, mapped); 2702 if (ret) { 2703 iommu_unmap(domain, iova, mapped); 2704 return ret; 2705 } 2706 return 0; 2707 } 2708 2709 int iommu_map(struct iommu_domain *domain, unsigned long iova, 2710 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 2711 { 2712 int ret; 2713 2714 ret = iommu_map_nosync(domain, iova, paddr, size, prot, gfp); 2715 if (ret) 2716 return ret; 2717 2718 ret = iommu_sync_map(domain, iova, size); 2719 if (ret) 2720 iommu_unmap(domain, iova, size); 2721 2722 return ret; 2723 } 2724 EXPORT_SYMBOL_GPL(iommu_map); 2725 2726 static size_t 2727 __iommu_unmap_domain_pgtbl(struct iommu_domain *domain, unsigned long iova, 2728 size_t size, struct iommu_iotlb_gather *iotlb_gather) 2729 { 2730 const struct iommu_domain_ops *ops = domain->ops; 2731 size_t unmapped_page, unmapped = 0; 2732 unsigned int min_pagesz; 2733 2734 if (WARN_ON(!ops->unmap_pages)) 2735 return 0; 2736 2737 /* find out the minimum page size supported */ 2738 min_pagesz = 1 << __ffs(domain->pgsize_bitmap); 2739 2740 /* 2741 * The virtual address, as well as the size of the mapping, must be 2742 * aligned (at least) to the size of the smallest page supported 2743 * by the hardware 2744 */ 2745 if (!IS_ALIGNED(iova | size, min_pagesz)) { 2746 pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n", 2747 iova, size, min_pagesz); 2748 return 0; 2749 } 2750 2751 pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size); 2752 2753 /* 2754 * Keep iterating until we either unmap 'size' bytes (or more) 2755 * or we hit an area that isn't mapped. 2756 */ 2757 while (unmapped < size) { 2758 size_t pgsize, count; 2759 2760 pgsize = iommu_pgsize(domain, iova, iova, size - unmapped, &count); 2761 unmapped_page = ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather); 2762 if (!unmapped_page) 2763 break; 2764 2765 pr_debug("unmapped: iova 0x%lx size 0x%zx\n", 2766 iova, unmapped_page); 2767 /* 2768 * If the driver itself isn't using the gather, make sure 2769 * it looks non-empty so iotlb_sync will still be called. 2770 */ 2771 if (iotlb_gather->start >= iotlb_gather->end) 2772 iommu_iotlb_gather_add_range(iotlb_gather, iova, size); 2773 2774 iova += unmapped_page; 2775 unmapped += unmapped_page; 2776 } 2777 2778 return unmapped; 2779 } 2780 2781 static size_t __iommu_unmap(struct iommu_domain *domain, unsigned long iova, 2782 size_t size, 2783 struct iommu_iotlb_gather *iotlb_gather) 2784 { 2785 struct pt_iommu *pt = iommupt_from_domain(domain); 2786 size_t unmapped; 2787 2788 if (WARN_ON_ONCE(!(domain->type & __IOMMU_DOMAIN_PAGING) || 2789 !domain->pgsize_bitmap)) 2790 return 0; 2791 2792 iommu_debug_unmap_begin(domain, iova, size); 2793 2794 if (pt) 2795 unmapped = pt->ops->unmap_range(pt, iova, size, iotlb_gather); 2796 else 2797 unmapped = __iommu_unmap_domain_pgtbl(domain, iova, size, 2798 iotlb_gather); 2799 trace_unmap(iova, size, unmapped); 2800 iommu_debug_unmap_end(domain, iova, size, unmapped); 2801 return unmapped; 2802 } 2803 2804 /** 2805 * iommu_unmap() - Remove mappings from a range of IOVA 2806 * @domain: Domain to manipulate 2807 * @iova: IO virtual address to start 2808 * @size: Length of the range starting from @iova 2809 * 2810 * iommu_unmap() will remove a translation created by iommu_map(). It cannot 2811 * subdivide a mapping created by iommu_map(), so it should be called with IOVA 2812 * ranges that match what was passed to iommu_map(). The range can aggregate 2813 * contiguous iommu_map() calls so long as no individual range is split. 2814 * 2815 * Returns: Number of bytes of IOVA unmapped. iova + res will be the point 2816 * unmapping stopped. 2817 */ 2818 size_t iommu_unmap(struct iommu_domain *domain, 2819 unsigned long iova, size_t size) 2820 { 2821 struct iommu_iotlb_gather iotlb_gather; 2822 size_t ret; 2823 2824 iommu_iotlb_gather_init(&iotlb_gather); 2825 ret = __iommu_unmap(domain, iova, size, &iotlb_gather); 2826 iommu_iotlb_sync(domain, &iotlb_gather); 2827 2828 return ret; 2829 } 2830 EXPORT_SYMBOL_GPL(iommu_unmap); 2831 2832 /** 2833 * iommu_unmap_fast() - Remove mappings from a range of IOVA without IOTLB sync 2834 * @domain: Domain to manipulate 2835 * @iova: IO virtual address to start 2836 * @size: Length of the range starting from @iova 2837 * @iotlb_gather: range information for a pending IOTLB flush 2838 * 2839 * iommu_unmap_fast() will remove a translation created by iommu_map(). 2840 * It can't subdivide a mapping created by iommu_map(), so it should be 2841 * called with IOVA ranges that match what was passed to iommu_map(). The 2842 * range can aggregate contiguous iommu_map() calls so long as no individual 2843 * range is split. 2844 * 2845 * Basically iommu_unmap_fast() is the same as iommu_unmap() but for callers 2846 * which manage the IOTLB flushing externally to perform a batched sync. 2847 * 2848 * Returns: Number of bytes of IOVA unmapped. iova + res will be the point 2849 * unmapping stopped. 2850 */ 2851 size_t iommu_unmap_fast(struct iommu_domain *domain, 2852 unsigned long iova, size_t size, 2853 struct iommu_iotlb_gather *iotlb_gather) 2854 { 2855 return __iommu_unmap(domain, iova, size, iotlb_gather); 2856 } 2857 EXPORT_SYMBOL_GPL(iommu_unmap_fast); 2858 2859 ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, 2860 struct scatterlist *sg, unsigned int nents, int prot, 2861 gfp_t gfp) 2862 { 2863 size_t len = 0, mapped = 0; 2864 phys_addr_t start; 2865 unsigned int i = 0; 2866 int ret; 2867 2868 while (i <= nents) { 2869 phys_addr_t s_phys = sg_phys(sg); 2870 2871 if (len && s_phys != start + len) { 2872 ret = iommu_map_nosync(domain, iova + mapped, start, 2873 len, prot, gfp); 2874 if (ret) 2875 goto out_err; 2876 2877 mapped += len; 2878 len = 0; 2879 } 2880 2881 if (sg_dma_is_bus_address(sg)) 2882 goto next; 2883 2884 if (len) { 2885 len += sg->length; 2886 } else { 2887 len = sg->length; 2888 start = s_phys; 2889 } 2890 2891 next: 2892 if (++i < nents) 2893 sg = sg_next(sg); 2894 } 2895 2896 ret = iommu_sync_map(domain, iova, mapped); 2897 if (ret) 2898 goto out_err; 2899 2900 return mapped; 2901 2902 out_err: 2903 /* undo mappings already done */ 2904 iommu_unmap(domain, iova, mapped); 2905 2906 return ret; 2907 } 2908 EXPORT_SYMBOL_GPL(iommu_map_sg); 2909 2910 /** 2911 * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework 2912 * @domain: the iommu domain where the fault has happened 2913 * @dev: the device where the fault has happened 2914 * @iova: the faulting address 2915 * @flags: mmu fault flags (e.g. IOMMU_FAULT_READ/IOMMU_FAULT_WRITE/...) 2916 * 2917 * This function should be called by the low-level IOMMU implementations 2918 * whenever IOMMU faults happen, to allow high-level users, that are 2919 * interested in such events, to know about them. 2920 * 2921 * This event may be useful for several possible use cases: 2922 * - mere logging of the event 2923 * - dynamic TLB/PTE loading 2924 * - if restarting of the faulting device is required 2925 * 2926 * Returns 0 on success and an appropriate error code otherwise (if dynamic 2927 * PTE/TLB loading will one day be supported, implementations will be able 2928 * to tell whether it succeeded or not according to this return value). 2929 * 2930 * Specifically, -ENOSYS is returned if a fault handler isn't installed 2931 * (though fault handlers can also return -ENOSYS, in case they want to 2932 * elicit the default behavior of the IOMMU drivers). 2933 */ 2934 int report_iommu_fault(struct iommu_domain *domain, struct device *dev, 2935 unsigned long iova, int flags) 2936 { 2937 int ret = -ENOSYS; 2938 2939 /* 2940 * if upper layers showed interest and installed a fault handler, 2941 * invoke it. 2942 */ 2943 if (domain->cookie_type == IOMMU_COOKIE_FAULT_HANDLER && 2944 domain->handler) 2945 ret = domain->handler(domain, dev, iova, flags, 2946 domain->handler_token); 2947 2948 trace_io_page_fault(dev, iova, flags); 2949 return ret; 2950 } 2951 EXPORT_SYMBOL_GPL(report_iommu_fault); 2952 2953 static int __init iommu_init(void) 2954 { 2955 iommu_group_kset = kset_create_and_add("iommu_groups", 2956 NULL, kernel_kobj); 2957 BUG_ON(!iommu_group_kset); 2958 2959 iommu_debugfs_setup(); 2960 2961 return 0; 2962 } 2963 core_initcall(iommu_init); 2964 2965 int iommu_set_pgtable_quirks(struct iommu_domain *domain, 2966 unsigned long quirk) 2967 { 2968 if (domain->type != IOMMU_DOMAIN_UNMANAGED) 2969 return -EINVAL; 2970 if (!domain->ops->set_pgtable_quirks) 2971 return -EINVAL; 2972 return domain->ops->set_pgtable_quirks(domain, quirk); 2973 } 2974 EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks); 2975 2976 /** 2977 * iommu_get_resv_regions - get reserved regions 2978 * @dev: device for which to get reserved regions 2979 * @list: reserved region list for device 2980 * 2981 * This returns a list of reserved IOVA regions specific to this device. 2982 * A domain user should not map IOVA in these ranges. 2983 */ 2984 void iommu_get_resv_regions(struct device *dev, struct list_head *list) 2985 { 2986 const struct iommu_ops *ops = dev_iommu_ops(dev); 2987 2988 if (ops->get_resv_regions) 2989 ops->get_resv_regions(dev, list); 2990 } 2991 EXPORT_SYMBOL_GPL(iommu_get_resv_regions); 2992 2993 /** 2994 * iommu_put_resv_regions - release reserved regions 2995 * @dev: device for which to free reserved regions 2996 * @list: reserved region list for device 2997 * 2998 * This releases a reserved region list acquired by iommu_get_resv_regions(). 2999 */ 3000 void iommu_put_resv_regions(struct device *dev, struct list_head *list) 3001 { 3002 struct iommu_resv_region *entry, *next; 3003 3004 list_for_each_entry_safe(entry, next, list, list) { 3005 if (entry->free) 3006 entry->free(dev, entry); 3007 else 3008 kfree(entry); 3009 } 3010 } 3011 EXPORT_SYMBOL(iommu_put_resv_regions); 3012 3013 struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start, 3014 size_t length, int prot, 3015 enum iommu_resv_type type, 3016 gfp_t gfp) 3017 { 3018 struct iommu_resv_region *region; 3019 3020 region = kzalloc_obj(*region, gfp); 3021 if (!region) 3022 return NULL; 3023 3024 INIT_LIST_HEAD(®ion->list); 3025 region->start = start; 3026 region->length = length; 3027 region->prot = prot; 3028 region->type = type; 3029 return region; 3030 } 3031 EXPORT_SYMBOL_GPL(iommu_alloc_resv_region); 3032 3033 void iommu_set_default_passthrough(bool cmd_line) 3034 { 3035 if (cmd_line) 3036 iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; 3037 iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY; 3038 } 3039 3040 void iommu_set_default_translated(bool cmd_line) 3041 { 3042 if (cmd_line) 3043 iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; 3044 iommu_def_domain_type = IOMMU_DOMAIN_DMA; 3045 } 3046 3047 bool iommu_default_passthrough(void) 3048 { 3049 return iommu_def_domain_type == IOMMU_DOMAIN_IDENTITY; 3050 } 3051 EXPORT_SYMBOL_GPL(iommu_default_passthrough); 3052 3053 static const struct iommu_device *iommu_from_fwnode(const struct fwnode_handle *fwnode) 3054 { 3055 const struct iommu_device *iommu, *ret = NULL; 3056 3057 spin_lock(&iommu_device_lock); 3058 list_for_each_entry(iommu, &iommu_device_list, list) 3059 if (iommu->fwnode == fwnode) { 3060 ret = iommu; 3061 break; 3062 } 3063 spin_unlock(&iommu_device_lock); 3064 return ret; 3065 } 3066 3067 const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode) 3068 { 3069 const struct iommu_device *iommu = iommu_from_fwnode(fwnode); 3070 3071 return iommu ? iommu->ops : NULL; 3072 } 3073 3074 int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode) 3075 { 3076 const struct iommu_device *iommu = iommu_from_fwnode(iommu_fwnode); 3077 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 3078 3079 if (!iommu) 3080 return driver_deferred_probe_check_state(dev); 3081 if (!dev->iommu && !READ_ONCE(iommu->ready)) 3082 return -EPROBE_DEFER; 3083 3084 if (fwspec) 3085 return iommu->ops == iommu_fwspec_ops(fwspec) ? 0 : -EINVAL; 3086 3087 if (!dev_iommu_get(dev)) 3088 return -ENOMEM; 3089 3090 /* Preallocate for the overwhelmingly common case of 1 ID */ 3091 fwspec = kzalloc_flex(*fwspec, ids, 1); 3092 if (!fwspec) 3093 return -ENOMEM; 3094 3095 fwnode_handle_get(iommu_fwnode); 3096 fwspec->iommu_fwnode = iommu_fwnode; 3097 dev_iommu_fwspec_set(dev, fwspec); 3098 return 0; 3099 } 3100 EXPORT_SYMBOL_GPL(iommu_fwspec_init); 3101 3102 void iommu_fwspec_free(struct device *dev) 3103 { 3104 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 3105 3106 if (fwspec) { 3107 fwnode_handle_put(fwspec->iommu_fwnode); 3108 kfree(fwspec); 3109 dev_iommu_fwspec_set(dev, NULL); 3110 } 3111 } 3112 3113 int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids) 3114 { 3115 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 3116 int i, new_num; 3117 3118 if (!fwspec) 3119 return -EINVAL; 3120 3121 new_num = fwspec->num_ids + num_ids; 3122 if (new_num > 1) { 3123 fwspec = krealloc(fwspec, struct_size(fwspec, ids, new_num), 3124 GFP_KERNEL); 3125 if (!fwspec) 3126 return -ENOMEM; 3127 3128 dev_iommu_fwspec_set(dev, fwspec); 3129 } 3130 3131 for (i = 0; i < num_ids; i++) 3132 fwspec->ids[fwspec->num_ids + i] = ids[i]; 3133 3134 fwspec->num_ids = new_num; 3135 return 0; 3136 } 3137 EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids); 3138 3139 /** 3140 * iommu_setup_default_domain - Set the default_domain for the group 3141 * @group: Group to change 3142 * @target_type: Domain type to set as the default_domain 3143 * 3144 * Allocate a default domain and set it as the current domain on the group. If 3145 * the group already has a default domain it will be changed to the target_type. 3146 * When target_type is 0 the default domain is selected based on driver and 3147 * system preferences. 3148 */ 3149 static int iommu_setup_default_domain(struct iommu_group *group, 3150 int target_type) 3151 { 3152 struct iommu_domain *old_dom = group->default_domain; 3153 struct group_device *gdev; 3154 struct iommu_domain *dom; 3155 bool direct_failed; 3156 int req_type; 3157 int ret; 3158 3159 lockdep_assert_held(&group->mutex); 3160 3161 req_type = iommu_get_default_domain_type(group, target_type); 3162 if (req_type < 0) 3163 return -EINVAL; 3164 3165 dom = iommu_group_alloc_default_domain(group, req_type); 3166 if (IS_ERR(dom)) 3167 return PTR_ERR(dom); 3168 3169 if (group->default_domain == dom) 3170 return 0; 3171 3172 if (iommu_is_dma_domain(dom)) { 3173 ret = iommu_get_dma_cookie(dom); 3174 if (ret) { 3175 iommu_domain_free(dom); 3176 return ret; 3177 } 3178 } 3179 3180 /* 3181 * IOMMU_RESV_DIRECT and IOMMU_RESV_DIRECT_RELAXABLE regions must be 3182 * mapped before their device is attached, in order to guarantee 3183 * continuity with any FW activity 3184 */ 3185 direct_failed = false; 3186 for_each_group_device(group, gdev) { 3187 if (iommu_create_device_direct_mappings(dom, gdev->dev)) { 3188 direct_failed = true; 3189 dev_warn_once( 3190 gdev->dev->iommu->iommu_dev->dev, 3191 "IOMMU driver was not able to establish FW requested direct mapping."); 3192 } 3193 } 3194 3195 /* We must set default_domain early for __iommu_device_set_domain */ 3196 group->default_domain = dom; 3197 if (!group->domain) { 3198 /* 3199 * Drivers are not allowed to fail the first domain attach. 3200 * The only way to recover from this is to fail attaching the 3201 * iommu driver and call ops->release_device. Put the domain 3202 * in group->default_domain so it is freed after. 3203 */ 3204 ret = __iommu_group_set_domain_internal( 3205 group, dom, IOMMU_SET_DOMAIN_MUST_SUCCEED); 3206 if (WARN_ON(ret)) 3207 goto out_free_old; 3208 } else { 3209 ret = __iommu_group_set_domain(group, dom); 3210 if (ret) 3211 goto err_restore_def_domain; 3212 } 3213 3214 /* 3215 * Drivers are supposed to allow mappings to be installed in a domain 3216 * before device attachment, but some don't. Hack around this defect by 3217 * trying again after attaching. If this happens it means the device 3218 * will not continuously have the IOMMU_RESV_DIRECT map. 3219 */ 3220 if (direct_failed) { 3221 for_each_group_device(group, gdev) { 3222 ret = iommu_create_device_direct_mappings(dom, gdev->dev); 3223 if (ret) 3224 goto err_restore_domain; 3225 } 3226 } 3227 3228 out_free_old: 3229 if (old_dom) 3230 iommu_domain_free(old_dom); 3231 return ret; 3232 3233 err_restore_domain: 3234 if (old_dom) 3235 __iommu_group_set_domain_internal( 3236 group, old_dom, IOMMU_SET_DOMAIN_MUST_SUCCEED); 3237 err_restore_def_domain: 3238 if (old_dom) { 3239 iommu_domain_free(dom); 3240 group->default_domain = old_dom; 3241 } 3242 return ret; 3243 } 3244 3245 /* 3246 * Changing the default domain through sysfs requires the users to unbind the 3247 * drivers from the devices in the iommu group, except for a DMA -> DMA-FQ 3248 * transition. Return failure if this isn't met. 3249 * 3250 * We need to consider the race between this and the device release path. 3251 * group->mutex is used here to guarantee that the device release path 3252 * will not be entered at the same time. 3253 */ 3254 static ssize_t iommu_group_store_type(struct iommu_group *group, 3255 const char *buf, size_t count) 3256 { 3257 struct group_device *gdev; 3258 int ret, req_type; 3259 3260 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) 3261 return -EACCES; 3262 3263 if (WARN_ON(!group) || !group->default_domain) 3264 return -EINVAL; 3265 3266 if (sysfs_streq(buf, "identity")) 3267 req_type = IOMMU_DOMAIN_IDENTITY; 3268 else if (sysfs_streq(buf, "DMA")) 3269 req_type = IOMMU_DOMAIN_DMA; 3270 else if (sysfs_streq(buf, "DMA-FQ")) 3271 req_type = IOMMU_DOMAIN_DMA_FQ; 3272 else if (sysfs_streq(buf, "auto")) 3273 req_type = 0; 3274 else 3275 return -EINVAL; 3276 3277 mutex_lock(&group->mutex); 3278 /* We can bring up a flush queue without tearing down the domain. */ 3279 if (req_type == IOMMU_DOMAIN_DMA_FQ && 3280 group->default_domain->type == IOMMU_DOMAIN_DMA) { 3281 ret = iommu_dma_init_fq(group->default_domain); 3282 if (ret) 3283 goto out_unlock; 3284 3285 group->default_domain->type = IOMMU_DOMAIN_DMA_FQ; 3286 ret = count; 3287 goto out_unlock; 3288 } 3289 3290 /* Otherwise, ensure that device exists and no driver is bound. */ 3291 if (list_empty(&group->devices) || group->owner_cnt) { 3292 ret = -EPERM; 3293 goto out_unlock; 3294 } 3295 3296 ret = iommu_setup_default_domain(group, req_type); 3297 if (ret) 3298 goto out_unlock; 3299 3300 /* Make sure dma_ops is appropriatley set */ 3301 for_each_group_device(group, gdev) 3302 iommu_setup_dma_ops(gdev->dev, group->default_domain); 3303 3304 out_unlock: 3305 mutex_unlock(&group->mutex); 3306 return ret ?: count; 3307 } 3308 3309 /** 3310 * iommu_device_use_default_domain() - Device driver wants to handle device 3311 * DMA through the kernel DMA API. 3312 * @dev: The device. 3313 * 3314 * The device driver about to bind @dev wants to do DMA through the kernel 3315 * DMA API. Return 0 if it is allowed, otherwise an error. 3316 */ 3317 int iommu_device_use_default_domain(struct device *dev) 3318 { 3319 /* Caller is the driver core during the pre-probe path */ 3320 struct iommu_group *group = dev->iommu_group; 3321 int ret = 0; 3322 3323 if (!group) 3324 return 0; 3325 3326 mutex_lock(&group->mutex); 3327 /* We may race against bus_iommu_probe() finalising groups here */ 3328 if (!group->default_domain) { 3329 ret = -EPROBE_DEFER; 3330 goto unlock_out; 3331 } 3332 if (group->owner_cnt) { 3333 if (group->domain != group->default_domain || group->owner || 3334 !xa_empty(&group->pasid_array)) { 3335 ret = -EBUSY; 3336 goto unlock_out; 3337 } 3338 } 3339 3340 group->owner_cnt++; 3341 3342 unlock_out: 3343 mutex_unlock(&group->mutex); 3344 return ret; 3345 } 3346 3347 /** 3348 * iommu_device_unuse_default_domain() - Device driver stops handling device 3349 * DMA through the kernel DMA API. 3350 * @dev: The device. 3351 * 3352 * The device driver doesn't want to do DMA through kernel DMA API anymore. 3353 * It must be called after iommu_device_use_default_domain(). 3354 */ 3355 void iommu_device_unuse_default_domain(struct device *dev) 3356 { 3357 /* Caller is the driver core during the post-probe path */ 3358 struct iommu_group *group = dev->iommu_group; 3359 3360 if (!group) 3361 return; 3362 3363 mutex_lock(&group->mutex); 3364 if (!WARN_ON(!group->owner_cnt || !xa_empty(&group->pasid_array))) 3365 group->owner_cnt--; 3366 3367 mutex_unlock(&group->mutex); 3368 } 3369 3370 static int __iommu_group_alloc_blocking_domain(struct iommu_group *group) 3371 { 3372 struct device *dev = iommu_group_first_dev(group); 3373 const struct iommu_ops *ops = dev_iommu_ops(dev); 3374 struct iommu_domain *domain; 3375 3376 if (group->blocking_domain) 3377 return 0; 3378 3379 if (ops->blocked_domain) { 3380 group->blocking_domain = ops->blocked_domain; 3381 return 0; 3382 } 3383 3384 /* 3385 * For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED create an 3386 * empty PAGING domain instead. 3387 */ 3388 domain = iommu_paging_domain_alloc(dev); 3389 if (IS_ERR(domain)) 3390 return PTR_ERR(domain); 3391 group->blocking_domain = domain; 3392 return 0; 3393 } 3394 3395 static int __iommu_take_dma_ownership(struct iommu_group *group, void *owner) 3396 { 3397 int ret; 3398 3399 if ((group->domain && group->domain != group->default_domain) || 3400 !xa_empty(&group->pasid_array)) 3401 return -EBUSY; 3402 3403 ret = __iommu_group_alloc_blocking_domain(group); 3404 if (ret) 3405 return ret; 3406 ret = __iommu_group_set_domain(group, group->blocking_domain); 3407 if (ret) 3408 return ret; 3409 3410 group->owner = owner; 3411 group->owner_cnt++; 3412 return 0; 3413 } 3414 3415 /** 3416 * iommu_group_claim_dma_owner() - Set DMA ownership of a group 3417 * @group: The group. 3418 * @owner: Caller specified pointer. Used for exclusive ownership. 3419 * 3420 * This is to support backward compatibility for vfio which manages the dma 3421 * ownership in iommu_group level. New invocations on this interface should be 3422 * prohibited. Only a single owner may exist for a group. 3423 */ 3424 int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner) 3425 { 3426 int ret = 0; 3427 3428 if (WARN_ON(!owner)) 3429 return -EINVAL; 3430 3431 mutex_lock(&group->mutex); 3432 if (group->owner_cnt) { 3433 ret = -EPERM; 3434 goto unlock_out; 3435 } 3436 3437 ret = __iommu_take_dma_ownership(group, owner); 3438 unlock_out: 3439 mutex_unlock(&group->mutex); 3440 3441 return ret; 3442 } 3443 EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner); 3444 3445 /** 3446 * iommu_device_claim_dma_owner() - Set DMA ownership of a device 3447 * @dev: The device. 3448 * @owner: Caller specified pointer. Used for exclusive ownership. 3449 * 3450 * Claim the DMA ownership of a device. Multiple devices in the same group may 3451 * concurrently claim ownership if they present the same owner value. Returns 0 3452 * on success and error code on failure 3453 */ 3454 int iommu_device_claim_dma_owner(struct device *dev, void *owner) 3455 { 3456 /* Caller must be a probed driver on dev */ 3457 struct iommu_group *group = dev->iommu_group; 3458 int ret = 0; 3459 3460 if (WARN_ON(!owner)) 3461 return -EINVAL; 3462 3463 if (!group) 3464 return -ENODEV; 3465 3466 mutex_lock(&group->mutex); 3467 if (group->owner_cnt) { 3468 if (group->owner != owner) { 3469 ret = -EPERM; 3470 goto unlock_out; 3471 } 3472 group->owner_cnt++; 3473 goto unlock_out; 3474 } 3475 3476 ret = __iommu_take_dma_ownership(group, owner); 3477 unlock_out: 3478 mutex_unlock(&group->mutex); 3479 return ret; 3480 } 3481 EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner); 3482 3483 static void __iommu_release_dma_ownership(struct iommu_group *group) 3484 { 3485 if (WARN_ON(!group->owner_cnt || !group->owner || 3486 !xa_empty(&group->pasid_array))) 3487 return; 3488 3489 group->owner_cnt = 0; 3490 group->owner = NULL; 3491 __iommu_group_set_domain_nofail(group, group->default_domain); 3492 } 3493 3494 /** 3495 * iommu_group_release_dma_owner() - Release DMA ownership of a group 3496 * @group: The group 3497 * 3498 * Release the DMA ownership claimed by iommu_group_claim_dma_owner(). 3499 */ 3500 void iommu_group_release_dma_owner(struct iommu_group *group) 3501 { 3502 mutex_lock(&group->mutex); 3503 __iommu_release_dma_ownership(group); 3504 mutex_unlock(&group->mutex); 3505 } 3506 EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner); 3507 3508 /** 3509 * iommu_device_release_dma_owner() - Release DMA ownership of a device 3510 * @dev: The device. 3511 * 3512 * Release the DMA ownership claimed by iommu_device_claim_dma_owner(). 3513 */ 3514 void iommu_device_release_dma_owner(struct device *dev) 3515 { 3516 /* Caller must be a probed driver on dev */ 3517 struct iommu_group *group = dev->iommu_group; 3518 3519 mutex_lock(&group->mutex); 3520 if (group->owner_cnt > 1) 3521 group->owner_cnt--; 3522 else 3523 __iommu_release_dma_ownership(group); 3524 mutex_unlock(&group->mutex); 3525 } 3526 EXPORT_SYMBOL_GPL(iommu_device_release_dma_owner); 3527 3528 /** 3529 * iommu_group_dma_owner_claimed() - Query group dma ownership status 3530 * @group: The group. 3531 * 3532 * This provides status query on a given group. It is racy and only for 3533 * non-binding status reporting. 3534 */ 3535 bool iommu_group_dma_owner_claimed(struct iommu_group *group) 3536 { 3537 unsigned int user; 3538 3539 mutex_lock(&group->mutex); 3540 user = group->owner_cnt; 3541 mutex_unlock(&group->mutex); 3542 3543 return user; 3544 } 3545 EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed); 3546 3547 static void iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid, 3548 struct iommu_domain *domain) 3549 { 3550 const struct iommu_ops *ops = dev_iommu_ops(dev); 3551 struct iommu_domain *blocked_domain = ops->blocked_domain; 3552 3553 WARN_ON(blocked_domain->ops->set_dev_pasid(blocked_domain, 3554 dev, pasid, domain)); 3555 } 3556 3557 static int __iommu_set_group_pasid(struct iommu_domain *domain, 3558 struct iommu_group *group, ioasid_t pasid, 3559 struct iommu_domain *old) 3560 { 3561 struct group_device *device, *last_gdev; 3562 int ret; 3563 3564 for_each_group_device(group, device) { 3565 if (device->dev->iommu->max_pasids > 0) { 3566 ret = domain->ops->set_dev_pasid(domain, device->dev, 3567 pasid, old); 3568 if (ret) 3569 goto err_revert; 3570 } 3571 } 3572 3573 return 0; 3574 3575 err_revert: 3576 last_gdev = device; 3577 for_each_group_device(group, device) { 3578 if (device == last_gdev) 3579 break; 3580 if (device->dev->iommu->max_pasids > 0) { 3581 /* 3582 * If no old domain, undo the succeeded devices/pasid. 3583 * Otherwise, rollback the succeeded devices/pasid to 3584 * the old domain. And it is a driver bug to fail 3585 * attaching with a previously good domain. 3586 */ 3587 if (!old || 3588 WARN_ON(old->ops->set_dev_pasid(old, device->dev, 3589 pasid, domain))) 3590 iommu_remove_dev_pasid(device->dev, pasid, domain); 3591 } 3592 } 3593 return ret; 3594 } 3595 3596 static void __iommu_remove_group_pasid(struct iommu_group *group, 3597 ioasid_t pasid, 3598 struct iommu_domain *domain) 3599 { 3600 struct group_device *device; 3601 3602 for_each_group_device(group, device) { 3603 /* 3604 * A group-level detach cannot fail, even if there is a blocked 3605 * device. In fact, blocked devices must be already detached for 3606 * a pending device recovery. 3607 */ 3608 if (!device->blocked && device->dev->iommu->max_pasids > 0) 3609 iommu_remove_dev_pasid(device->dev, pasid, domain); 3610 } 3611 } 3612 3613 /* 3614 * iommu_attach_device_pasid() - Attach a domain to pasid of device 3615 * @domain: the iommu domain. 3616 * @dev: the attached device. 3617 * @pasid: the pasid of the device. 3618 * @handle: the attach handle. 3619 * 3620 * Caller should always provide a new handle to avoid race with the paths 3621 * that have lockless reference to handle if it intends to pass a valid handle. 3622 * 3623 * Return: 0 on success, or an error. 3624 */ 3625 int iommu_attach_device_pasid(struct iommu_domain *domain, 3626 struct device *dev, ioasid_t pasid, 3627 struct iommu_attach_handle *handle) 3628 { 3629 /* Caller must be a probed driver on dev */ 3630 struct iommu_group *group = dev->iommu_group; 3631 struct group_device *device; 3632 const struct iommu_ops *ops; 3633 void *entry; 3634 int ret; 3635 3636 if (!group) 3637 return -ENODEV; 3638 3639 ops = dev_iommu_ops(dev); 3640 3641 if (!domain->ops->set_dev_pasid || 3642 !ops->blocked_domain || 3643 !ops->blocked_domain->ops->set_dev_pasid) 3644 return -EOPNOTSUPP; 3645 3646 if (!domain_iommu_ops_compatible(ops, domain) || 3647 pasid == IOMMU_NO_PASID) 3648 return -EINVAL; 3649 3650 mutex_lock(&group->mutex); 3651 3652 /* 3653 * This is a concurrent attach during device recovery. Reject it until 3654 * pci_dev_reset_iommu_done() attaches the device to group->domain. 3655 */ 3656 if (group->recovery_cnt) { 3657 ret = -EBUSY; 3658 goto out_unlock; 3659 } 3660 3661 for_each_group_device(group, device) { 3662 /* 3663 * Skip PASID validation for devices without PASID support 3664 * (max_pasids = 0). These devices cannot issue transactions 3665 * with PASID, so they don't affect group's PASID usage. 3666 */ 3667 if ((device->dev->iommu->max_pasids > 0) && 3668 (pasid >= device->dev->iommu->max_pasids)) { 3669 ret = -EINVAL; 3670 goto out_unlock; 3671 } 3672 } 3673 3674 entry = iommu_make_pasid_array_entry(domain, handle); 3675 3676 /* 3677 * Entry present is a failure case. Use xa_insert() instead of 3678 * xa_reserve(). 3679 */ 3680 ret = xa_insert(&group->pasid_array, pasid, XA_ZERO_ENTRY, GFP_KERNEL); 3681 if (ret) 3682 goto out_unlock; 3683 3684 ret = __iommu_set_group_pasid(domain, group, pasid, NULL); 3685 if (ret) { 3686 xa_release(&group->pasid_array, pasid); 3687 goto out_unlock; 3688 } 3689 3690 /* 3691 * The xa_insert() above reserved the memory, and the group->mutex is 3692 * held, this cannot fail. The new domain cannot be visible until the 3693 * operation succeeds as we cannot tolerate PRIs becoming concurrently 3694 * queued and then failing attach. 3695 */ 3696 WARN_ON(xa_is_err(xa_store(&group->pasid_array, 3697 pasid, entry, GFP_KERNEL))); 3698 3699 out_unlock: 3700 mutex_unlock(&group->mutex); 3701 return ret; 3702 } 3703 EXPORT_SYMBOL_GPL(iommu_attach_device_pasid); 3704 3705 /** 3706 * iommu_replace_device_pasid - Replace the domain that a specific pasid 3707 * of the device is attached to 3708 * @domain: the new iommu domain 3709 * @dev: the attached device. 3710 * @pasid: the pasid of the device. 3711 * @handle: the attach handle. 3712 * 3713 * This API allows the pasid to switch domains. The @pasid should have been 3714 * attached. Otherwise, this fails. The pasid will keep the old configuration 3715 * if replacement failed. 3716 * 3717 * Caller should always provide a new handle to avoid race with the paths 3718 * that have lockless reference to handle if it intends to pass a valid handle. 3719 * 3720 * Return 0 on success, or an error. 3721 */ 3722 int iommu_replace_device_pasid(struct iommu_domain *domain, 3723 struct device *dev, ioasid_t pasid, 3724 struct iommu_attach_handle *handle) 3725 { 3726 /* Caller must be a probed driver on dev */ 3727 struct iommu_group *group = dev->iommu_group; 3728 struct iommu_attach_handle *entry; 3729 struct iommu_domain *curr_domain; 3730 void *curr; 3731 int ret; 3732 3733 if (!group) 3734 return -ENODEV; 3735 3736 if (!domain->ops->set_dev_pasid) 3737 return -EOPNOTSUPP; 3738 3739 if (!domain_iommu_ops_compatible(dev_iommu_ops(dev), domain) || 3740 pasid == IOMMU_NO_PASID || !handle) 3741 return -EINVAL; 3742 3743 mutex_lock(&group->mutex); 3744 3745 /* 3746 * This is a concurrent attach during device recovery. Reject it until 3747 * pci_dev_reset_iommu_done() attaches the device to group->domain. 3748 */ 3749 if (group->recovery_cnt) { 3750 ret = -EBUSY; 3751 goto out_unlock; 3752 } 3753 3754 entry = iommu_make_pasid_array_entry(domain, handle); 3755 curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, 3756 XA_ZERO_ENTRY, GFP_KERNEL); 3757 if (xa_is_err(curr)) { 3758 ret = xa_err(curr); 3759 goto out_unlock; 3760 } 3761 3762 /* 3763 * No domain (with or without handle) attached, hence not 3764 * a replace case. 3765 */ 3766 if (!curr) { 3767 xa_release(&group->pasid_array, pasid); 3768 ret = -EINVAL; 3769 goto out_unlock; 3770 } 3771 3772 /* 3773 * Reusing handle is problematic as there are paths that refers 3774 * the handle without lock. To avoid race, reject the callers that 3775 * attempt it. 3776 */ 3777 if (curr == entry) { 3778 WARN_ON(1); 3779 ret = -EINVAL; 3780 goto out_unlock; 3781 } 3782 3783 curr_domain = pasid_array_entry_to_domain(curr); 3784 ret = 0; 3785 3786 if (curr_domain != domain) { 3787 ret = __iommu_set_group_pasid(domain, group, 3788 pasid, curr_domain); 3789 if (ret) 3790 goto out_unlock; 3791 } 3792 3793 /* 3794 * The above xa_cmpxchg() reserved the memory, and the 3795 * group->mutex is held, this cannot fail. 3796 */ 3797 WARN_ON(xa_is_err(xa_store(&group->pasid_array, 3798 pasid, entry, GFP_KERNEL))); 3799 3800 out_unlock: 3801 mutex_unlock(&group->mutex); 3802 return ret; 3803 } 3804 EXPORT_SYMBOL_NS_GPL(iommu_replace_device_pasid, "IOMMUFD_INTERNAL"); 3805 3806 /* 3807 * iommu_detach_device_pasid() - Detach the domain from pasid of device 3808 * @domain: the iommu domain. 3809 * @dev: the attached device. 3810 * @pasid: the pasid of the device. 3811 * 3812 * The @domain must have been attached to @pasid of the @dev with 3813 * iommu_attach_device_pasid(). 3814 */ 3815 void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, 3816 ioasid_t pasid) 3817 { 3818 /* Caller must be a probed driver on dev */ 3819 struct iommu_group *group = dev->iommu_group; 3820 3821 mutex_lock(&group->mutex); 3822 __iommu_remove_group_pasid(group, pasid, domain); 3823 xa_erase(&group->pasid_array, pasid); 3824 mutex_unlock(&group->mutex); 3825 } 3826 EXPORT_SYMBOL_GPL(iommu_detach_device_pasid); 3827 3828 ioasid_t iommu_alloc_global_pasid(struct device *dev) 3829 { 3830 int ret; 3831 3832 /* max_pasids == 0 means that the device does not support PASID */ 3833 if (!dev->iommu->max_pasids) 3834 return IOMMU_PASID_INVALID; 3835 3836 /* 3837 * max_pasids is set up by vendor driver based on number of PASID bits 3838 * supported but the IDA allocation is inclusive. 3839 */ 3840 ret = ida_alloc_range(&iommu_global_pasid_ida, IOMMU_FIRST_GLOBAL_PASID, 3841 dev->iommu->max_pasids - 1, GFP_KERNEL); 3842 return ret < 0 ? IOMMU_PASID_INVALID : ret; 3843 } 3844 EXPORT_SYMBOL_GPL(iommu_alloc_global_pasid); 3845 3846 void iommu_free_global_pasid(ioasid_t pasid) 3847 { 3848 if (WARN_ON(pasid == IOMMU_PASID_INVALID)) 3849 return; 3850 3851 ida_free(&iommu_global_pasid_ida, pasid); 3852 } 3853 EXPORT_SYMBOL_GPL(iommu_free_global_pasid); 3854 3855 /** 3856 * iommu_attach_handle_get - Return the attach handle 3857 * @group: the iommu group that domain was attached to 3858 * @pasid: the pasid within the group 3859 * @type: matched domain type, 0 for any match 3860 * 3861 * Return handle or ERR_PTR(-ENOENT) on none, ERR_PTR(-EBUSY) on mismatch. 3862 * 3863 * Return the attach handle to the caller. The life cycle of an iommu attach 3864 * handle is from the time when the domain is attached to the time when the 3865 * domain is detached. Callers are required to synchronize the call of 3866 * iommu_attach_handle_get() with domain attachment and detachment. The attach 3867 * handle can only be used during its life cycle. 3868 */ 3869 struct iommu_attach_handle * 3870 iommu_attach_handle_get(struct iommu_group *group, ioasid_t pasid, unsigned int type) 3871 { 3872 struct iommu_attach_handle *handle; 3873 void *entry; 3874 3875 xa_lock(&group->pasid_array); 3876 entry = xa_load(&group->pasid_array, pasid); 3877 if (!entry || xa_pointer_tag(entry) != IOMMU_PASID_ARRAY_HANDLE) { 3878 handle = ERR_PTR(-ENOENT); 3879 } else { 3880 handle = xa_untag_pointer(entry); 3881 if (type && handle->domain->type != type) 3882 handle = ERR_PTR(-EBUSY); 3883 } 3884 xa_unlock(&group->pasid_array); 3885 3886 return handle; 3887 } 3888 EXPORT_SYMBOL_NS_GPL(iommu_attach_handle_get, "IOMMUFD_INTERNAL"); 3889 3890 /** 3891 * iommu_attach_group_handle - Attach an IOMMU domain to an IOMMU group 3892 * @domain: IOMMU domain to attach 3893 * @group: IOMMU group that will be attached 3894 * @handle: attach handle 3895 * 3896 * Returns 0 on success and error code on failure. 3897 * 3898 * This is a variant of iommu_attach_group(). It allows the caller to provide 3899 * an attach handle and use it when the domain is attached. This is currently 3900 * used by IOMMUFD to deliver the I/O page faults. 3901 * 3902 * Caller should always provide a new handle to avoid race with the paths 3903 * that have lockless reference to handle. 3904 */ 3905 int iommu_attach_group_handle(struct iommu_domain *domain, 3906 struct iommu_group *group, 3907 struct iommu_attach_handle *handle) 3908 { 3909 void *entry; 3910 int ret; 3911 3912 if (!handle) 3913 return -EINVAL; 3914 3915 mutex_lock(&group->mutex); 3916 entry = iommu_make_pasid_array_entry(domain, handle); 3917 ret = xa_insert(&group->pasid_array, 3918 IOMMU_NO_PASID, XA_ZERO_ENTRY, GFP_KERNEL); 3919 if (ret) 3920 goto out_unlock; 3921 3922 ret = __iommu_attach_group(domain, group); 3923 if (ret) { 3924 xa_release(&group->pasid_array, IOMMU_NO_PASID); 3925 goto out_unlock; 3926 } 3927 3928 /* 3929 * The xa_insert() above reserved the memory, and the group->mutex is 3930 * held, this cannot fail. The new domain cannot be visible until the 3931 * operation succeeds as we cannot tolerate PRIs becoming concurrently 3932 * queued and then failing attach. 3933 */ 3934 WARN_ON(xa_is_err(xa_store(&group->pasid_array, 3935 IOMMU_NO_PASID, entry, GFP_KERNEL))); 3936 3937 out_unlock: 3938 mutex_unlock(&group->mutex); 3939 return ret; 3940 } 3941 EXPORT_SYMBOL_NS_GPL(iommu_attach_group_handle, "IOMMUFD_INTERNAL"); 3942 3943 /** 3944 * iommu_detach_group_handle - Detach an IOMMU domain from an IOMMU group 3945 * @domain: IOMMU domain to attach 3946 * @group: IOMMU group that will be attached 3947 * 3948 * Detach the specified IOMMU domain from the specified IOMMU group. 3949 * It must be used in conjunction with iommu_attach_group_handle(). 3950 */ 3951 void iommu_detach_group_handle(struct iommu_domain *domain, 3952 struct iommu_group *group) 3953 { 3954 mutex_lock(&group->mutex); 3955 __iommu_group_set_core_domain(group); 3956 xa_erase(&group->pasid_array, IOMMU_NO_PASID); 3957 mutex_unlock(&group->mutex); 3958 } 3959 EXPORT_SYMBOL_NS_GPL(iommu_detach_group_handle, "IOMMUFD_INTERNAL"); 3960 3961 /** 3962 * iommu_replace_group_handle - replace the domain that a group is attached to 3963 * @group: IOMMU group that will be attached to the new domain 3964 * @new_domain: new IOMMU domain to replace with 3965 * @handle: attach handle 3966 * 3967 * This API allows the group to switch domains without being forced to go to 3968 * the blocking domain in-between. It allows the caller to provide an attach 3969 * handle for the new domain and use it when the domain is attached. 3970 * 3971 * If the currently attached domain is a core domain (e.g. a default_domain), 3972 * it will act just like the iommu_attach_group_handle(). 3973 * 3974 * Caller should always provide a new handle to avoid race with the paths 3975 * that have lockless reference to handle. 3976 */ 3977 int iommu_replace_group_handle(struct iommu_group *group, 3978 struct iommu_domain *new_domain, 3979 struct iommu_attach_handle *handle) 3980 { 3981 void *curr, *entry; 3982 int ret; 3983 3984 if (!new_domain || !handle) 3985 return -EINVAL; 3986 3987 mutex_lock(&group->mutex); 3988 entry = iommu_make_pasid_array_entry(new_domain, handle); 3989 ret = xa_reserve(&group->pasid_array, IOMMU_NO_PASID, GFP_KERNEL); 3990 if (ret) 3991 goto err_unlock; 3992 3993 ret = __iommu_group_set_domain(group, new_domain); 3994 if (ret) 3995 goto err_release; 3996 3997 curr = xa_store(&group->pasid_array, IOMMU_NO_PASID, entry, GFP_KERNEL); 3998 WARN_ON(xa_is_err(curr)); 3999 4000 mutex_unlock(&group->mutex); 4001 4002 return 0; 4003 err_release: 4004 xa_release(&group->pasid_array, IOMMU_NO_PASID); 4005 err_unlock: 4006 mutex_unlock(&group->mutex); 4007 return ret; 4008 } 4009 EXPORT_SYMBOL_NS_GPL(iommu_replace_group_handle, "IOMMUFD_INTERNAL"); 4010 4011 /** 4012 * pci_dev_reset_iommu_prepare() - Block IOMMU to prepare for a PCI device reset 4013 * @pdev: PCI device that is going to enter a reset routine 4014 * 4015 * The PCIe r6.0, sec 10.3.1 IMPLEMENTATION NOTE recommends to disable and block 4016 * ATS before initiating a reset. This means that a PCIe device during the reset 4017 * routine wants to block any IOMMU activity: translation and ATS invalidation. 4018 * 4019 * This function attaches the device's RID/PASID(s) the group->blocking_domain, 4020 * incrementing the group->recovery_cnt, to allow the IOMMU driver pausing any 4021 * IOMMU activity while leaving the group->domain pointer intact. Later when the 4022 * reset is finished, pci_dev_reset_iommu_done() can restore everything. 4023 * 4024 * Caller must use pci_dev_reset_iommu_prepare() with pci_dev_reset_iommu_done() 4025 * before/after the core-level reset routine, to decrement the recovery_cnt. 4026 * 4027 * Return: 0 on success or negative error code if the preparation failed. 4028 * 4029 * These two functions are designed to be used by PCI reset functions that would 4030 * not invoke any racy iommu_release_device(), since PCI sysfs node gets removed 4031 * before it notifies with a BUS_NOTIFY_REMOVED_DEVICE. When using them in other 4032 * case, callers must ensure there will be no racy iommu_release_device() call, 4033 * which otherwise would UAF the dev->iommu_group pointer. 4034 */ 4035 int pci_dev_reset_iommu_prepare(struct pci_dev *pdev) 4036 { 4037 struct iommu_group *group = pdev->dev.iommu_group; 4038 struct group_device *gdev; 4039 unsigned long pasid; 4040 void *entry; 4041 int ret; 4042 4043 if (!pci_ats_supported(pdev) || !dev_has_iommu(&pdev->dev)) 4044 return 0; 4045 4046 guard(mutex)(&group->mutex); 4047 4048 gdev = __dev_to_gdev(&pdev->dev); 4049 if (WARN_ON(!gdev)) 4050 return -ENODEV; 4051 4052 if (gdev->reset_depth++) 4053 return 0; 4054 4055 ret = __iommu_group_alloc_blocking_domain(group); 4056 if (ret) { 4057 gdev->reset_depth--; 4058 return ret; 4059 } 4060 4061 /* Stage RID domain at blocking_domain while retaining group->domain */ 4062 if (group->domain != group->blocking_domain) { 4063 ret = __iommu_attach_device(group->blocking_domain, &pdev->dev, 4064 group->domain); 4065 if (ret) { 4066 gdev->reset_depth--; 4067 return ret; 4068 } 4069 } 4070 4071 /* 4072 * Update gdev->blocked upon the domain change, as it is used to return 4073 * the correct domain in iommu_driver_get_domain_for_dev() that might be 4074 * called in a set_dev_pasid callback function. 4075 */ 4076 gdev->blocked = true; 4077 4078 /* 4079 * Stage PASID domains at blocking_domain while retaining pasid_array. 4080 * 4081 * The pasid_array is mostly fenced by group->mutex, except one reader 4082 * in iommu_attach_handle_get(), so it's safe to read without xa_lock. 4083 */ 4084 if (pdev->dev.iommu->max_pasids > 0) { 4085 xa_for_each_start(&group->pasid_array, pasid, entry, 1) { 4086 struct iommu_domain *pasid_dom = 4087 pasid_array_entry_to_domain(entry); 4088 4089 iommu_remove_dev_pasid(&pdev->dev, pasid, pasid_dom); 4090 } 4091 } 4092 4093 group->recovery_cnt++; 4094 return ret; 4095 } 4096 EXPORT_SYMBOL_GPL(pci_dev_reset_iommu_prepare); 4097 4098 static int __group_device_cmp_dma_alias(struct pci_dev *dev, u16 alias, 4099 void *data) 4100 { 4101 return alias == *(u16 *)data; 4102 } 4103 4104 static int group_device_cmp_dma_alias(struct pci_dev *dev, u16 alias, 4105 void *data) 4106 { 4107 return pci_for_each_dma_alias(data, __group_device_cmp_dma_alias, 4108 &alias); 4109 } 4110 4111 static bool group_device_dma_alias_is_blocked(struct iommu_group *group, 4112 struct group_device *gdev) 4113 { 4114 struct group_device *sibling; 4115 4116 lockdep_assert_held(&group->mutex); 4117 4118 if (!dev_is_pci(gdev->dev)) 4119 return false; 4120 4121 for_each_group_device(group, sibling) { 4122 if (sibling == gdev || !sibling->blocked || 4123 !dev_is_pci(sibling->dev)) 4124 continue; 4125 if (pci_for_each_dma_alias(to_pci_dev(gdev->dev), 4126 group_device_cmp_dma_alias, 4127 to_pci_dev(sibling->dev))) 4128 return true; 4129 } 4130 return false; 4131 } 4132 4133 /** 4134 * pci_dev_reset_iommu_done() - Restore IOMMU after a PCI device reset is done 4135 * @pdev: PCI device that has finished a reset routine 4136 * 4137 * After a PCIe device finishes a reset routine, it wants to restore its IOMMU 4138 * activity, including new translation and cache invalidation, by re-attaching 4139 * all RID/PASID of the device back to the domains retained in the core-level 4140 * structure. 4141 * 4142 * Caller must pair it with a successful pci_dev_reset_iommu_prepare(). 4143 * 4144 * Note that, although unlikely, there is a risk that re-attaching domains might 4145 * fail due to some unexpected happening like OOM. 4146 */ 4147 void pci_dev_reset_iommu_done(struct pci_dev *pdev) 4148 { 4149 struct iommu_group *group = pdev->dev.iommu_group; 4150 struct group_device *gdev; 4151 unsigned long pasid; 4152 void *entry; 4153 4154 if (!pci_ats_supported(pdev) || !dev_has_iommu(&pdev->dev)) 4155 return; 4156 4157 guard(mutex)(&group->mutex); 4158 4159 gdev = __dev_to_gdev(&pdev->dev); 4160 if (WARN_ON(!gdev)) 4161 return; 4162 4163 /* Unbalanced done() calls would underflow the counter */ 4164 if (WARN_ON(gdev->reset_depth == 0)) 4165 return; 4166 if (--gdev->reset_depth) 4167 return; 4168 4169 if (WARN_ON(!group->blocking_domain)) 4170 return; 4171 4172 if (group_device_dma_alias_is_blocked(group, gdev)) { 4173 /* 4174 * FIXME: DMA aliased devices share the same RID, which would be 4175 * convoluted to handle, as "gdev->blocked" is not sufficient: 4176 * - "blocked" state is effectively shared across these devices 4177 * - if the core skipped the blocking on the second device, the 4178 * IOMMU driver's attachment state would diverge from the HW 4179 * state 4180 * For now, just warn and see whether real ATS use cases hit it. 4181 */ 4182 pci_warn(pdev, 4183 "DMA-aliased sibling may be prematurely unblocked\n"); 4184 } 4185 4186 /* 4187 * Re-attach RID domain back to group->domain 4188 * 4189 * Leave the device parked in the blocking_domain if group->domain isn't 4190 * initialized yet 4191 */ 4192 if (group->domain && group->domain != group->blocking_domain) { 4193 WARN_ON(__iommu_attach_device(group->domain, &pdev->dev, 4194 group->blocking_domain)); 4195 } 4196 4197 /* 4198 * Update gdev->blocked upon the domain change, as it is used to return 4199 * the correct domain in iommu_driver_get_domain_for_dev() that might be 4200 * called in a set_dev_pasid callback function. 4201 */ 4202 gdev->blocked = false; 4203 4204 /* 4205 * Re-attach PASID domains back to the domains retained in pasid_array. 4206 * 4207 * The pasid_array is mostly fenced by group->mutex, except one reader 4208 * in iommu_attach_handle_get(), so it's safe to read without xa_lock. 4209 */ 4210 if (pdev->dev.iommu->max_pasids > 0) { 4211 xa_for_each_start(&group->pasid_array, pasid, entry, 1) { 4212 struct iommu_domain *pasid_dom = 4213 pasid_array_entry_to_domain(entry); 4214 4215 WARN_ON(pasid_dom->ops->set_dev_pasid( 4216 pasid_dom, &pdev->dev, pasid, 4217 group->blocking_domain)); 4218 } 4219 } 4220 4221 if (!WARN_ON(group->recovery_cnt == 0)) 4222 group->recovery_cnt--; 4223 } 4224 EXPORT_SYMBOL_GPL(pci_dev_reset_iommu_done); 4225 4226 #if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU) 4227 /** 4228 * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU domain 4229 * @desc: MSI descriptor, will store the MSI page 4230 * @msi_addr: MSI target address to be mapped 4231 * 4232 * The implementation of sw_msi() should take msi_addr and map it to 4233 * an IOVA in the domain and call msi_desc_set_iommu_msi_iova() with the 4234 * mapping information. 4235 * 4236 * Return: 0 on success or negative error code if the mapping failed. 4237 */ 4238 int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) 4239 { 4240 struct device *dev = msi_desc_to_dev(desc); 4241 struct iommu_group *group = dev->iommu_group; 4242 int ret = 0; 4243 4244 if (!group) 4245 return 0; 4246 4247 mutex_lock(&group->mutex); 4248 /* An IDENTITY domain must pass through */ 4249 if (group->domain && group->domain->type != IOMMU_DOMAIN_IDENTITY) { 4250 switch (group->domain->cookie_type) { 4251 case IOMMU_COOKIE_DMA_MSI: 4252 case IOMMU_COOKIE_DMA_IOVA: 4253 ret = iommu_dma_sw_msi(group->domain, desc, msi_addr); 4254 break; 4255 case IOMMU_COOKIE_IOMMUFD: 4256 ret = iommufd_sw_msi(group->domain, desc, msi_addr); 4257 break; 4258 default: 4259 ret = -EOPNOTSUPP; 4260 break; 4261 } 4262 } 4263 mutex_unlock(&group->mutex); 4264 return ret; 4265 } 4266 #endif /* CONFIG_IRQ_MSI_IOMMU */ 4267