1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. 4 * Author: Joerg Roedel <jroedel@suse.de> 5 */ 6 7 #define pr_fmt(fmt) "iommu: " fmt 8 9 #include <linux/amba/bus.h> 10 #include <linux/device.h> 11 #include <linux/kernel.h> 12 #include <linux/bits.h> 13 #include <linux/bug.h> 14 #include <linux/types.h> 15 #include <linux/init.h> 16 #include <linux/export.h> 17 #include <linux/slab.h> 18 #include <linux/errno.h> 19 #include <linux/host1x_context_bus.h> 20 #include <linux/iommu.h> 21 #include <linux/iommufd.h> 22 #include <linux/idr.h> 23 #include <linux/err.h> 24 #include <linux/pci.h> 25 #include <linux/pci-ats.h> 26 #include <linux/bitops.h> 27 #include <linux/platform_device.h> 28 #include <linux/property.h> 29 #include <linux/fsl/mc.h> 30 #include <linux/module.h> 31 #include <linux/cc_platform.h> 32 #include <linux/cdx/cdx_bus.h> 33 #include <trace/events/iommu.h> 34 #include <linux/sched/mm.h> 35 #include <linux/msi.h> 36 #include <uapi/linux/iommufd.h> 37 #include <linux/generic_pt/iommu.h> 38 39 #include "dma-iommu.h" 40 #include "iommu-priv.h" 41 42 static struct kset *iommu_group_kset; 43 static DEFINE_IDA(iommu_group_ida); 44 static DEFINE_IDA(iommu_global_pasid_ida); 45 46 static unsigned int iommu_def_domain_type __read_mostly; 47 static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT); 48 static u32 iommu_cmd_line __read_mostly; 49 50 /* Tags used with xa_tag_pointer() in group->pasid_array */ 51 enum { IOMMU_PASID_ARRAY_DOMAIN = 0, IOMMU_PASID_ARRAY_HANDLE = 1 }; 52 53 struct iommu_group { 54 struct kobject kobj; 55 struct kobject *devices_kobj; 56 struct list_head devices; 57 struct xarray pasid_array; 58 struct mutex mutex; 59 void *iommu_data; 60 void (*iommu_data_release)(void *iommu_data); 61 char *name; 62 int id; 63 struct iommu_domain *default_domain; 64 struct iommu_domain *blocking_domain; 65 struct iommu_domain *domain; 66 struct list_head entry; 67 unsigned int owner_cnt; 68 /* 69 * Number of devices in the group undergoing or awaiting recovery. 70 * If non-zero, concurrent domain attachments are rejected. 71 */ 72 unsigned int recovery_cnt; 73 void *owner; 74 }; 75 76 struct group_device { 77 struct list_head list; 78 struct device *dev; 79 char *name; 80 /* 81 * Device is blocked for a pending recovery while its group->domain is 82 * retained. This can happen when: 83 * - Device is undergoing a reset 84 */ 85 bool blocked; 86 unsigned int reset_depth; 87 }; 88 89 /* Iterate over each struct group_device in a struct iommu_group */ 90 #define for_each_group_device(group, pos) \ 91 list_for_each_entry(pos, &(group)->devices, list) 92 93 static struct group_device *__dev_to_gdev(struct device *dev) 94 { 95 struct iommu_group *group = dev->iommu_group; 96 struct group_device *gdev; 97 98 lockdep_assert_held(&group->mutex); 99 100 for_each_group_device(group, gdev) { 101 if (gdev->dev == dev) 102 return gdev; 103 } 104 return NULL; 105 } 106 107 struct iommu_group_attribute { 108 struct attribute attr; 109 ssize_t (*show)(struct iommu_group *group, char *buf); 110 ssize_t (*store)(struct iommu_group *group, 111 const char *buf, size_t count); 112 }; 113 114 static const char * const iommu_group_resv_type_string[] = { 115 [IOMMU_RESV_DIRECT] = "direct", 116 [IOMMU_RESV_DIRECT_RELAXABLE] = "direct-relaxable", 117 [IOMMU_RESV_RESERVED] = "reserved", 118 [IOMMU_RESV_MSI] = "msi", 119 [IOMMU_RESV_SW_MSI] = "msi", 120 }; 121 122 #define IOMMU_CMD_LINE_DMA_API BIT(0) 123 #define IOMMU_CMD_LINE_STRICT BIT(1) 124 125 static int bus_iommu_probe(const struct bus_type *bus); 126 static int iommu_bus_notifier(struct notifier_block *nb, 127 unsigned long action, void *data); 128 static void iommu_release_device(struct device *dev); 129 static int __iommu_attach_device(struct iommu_domain *domain, 130 struct device *dev, struct iommu_domain *old); 131 static int __iommu_attach_group(struct iommu_domain *domain, 132 struct iommu_group *group); 133 static struct iommu_domain *__iommu_paging_domain_alloc_flags(struct device *dev, 134 unsigned int type, 135 unsigned int flags); 136 137 enum { 138 IOMMU_SET_DOMAIN_MUST_SUCCEED = 1 << 0, 139 }; 140 141 static int __iommu_device_set_domain(struct iommu_group *group, 142 struct device *dev, 143 struct iommu_domain *new_domain, 144 struct iommu_domain *old_domain, 145 unsigned int flags); 146 static int __iommu_group_set_domain_internal(struct iommu_group *group, 147 struct iommu_domain *new_domain, 148 unsigned int flags); 149 static int __iommu_group_set_domain(struct iommu_group *group, 150 struct iommu_domain *new_domain) 151 { 152 return __iommu_group_set_domain_internal(group, new_domain, 0); 153 } 154 static void __iommu_group_set_domain_nofail(struct iommu_group *group, 155 struct iommu_domain *new_domain) 156 { 157 WARN_ON(__iommu_group_set_domain_internal( 158 group, new_domain, IOMMU_SET_DOMAIN_MUST_SUCCEED)); 159 } 160 161 static int iommu_setup_default_domain(struct iommu_group *group, 162 int target_type); 163 static int iommu_create_device_direct_mappings(struct iommu_domain *domain, 164 struct device *dev); 165 static ssize_t iommu_group_store_type(struct iommu_group *group, 166 const char *buf, size_t count); 167 static struct group_device *iommu_group_alloc_device(struct iommu_group *group, 168 struct device *dev); 169 static void __iommu_group_free_device(struct iommu_group *group, 170 struct group_device *grp_dev); 171 static void iommu_domain_init(struct iommu_domain *domain, unsigned int type, 172 const struct iommu_ops *ops); 173 174 #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \ 175 struct iommu_group_attribute iommu_group_attr_##_name = \ 176 __ATTR(_name, _mode, _show, _store) 177 178 #define to_iommu_group_attr(_attr) \ 179 container_of(_attr, struct iommu_group_attribute, attr) 180 #define to_iommu_group(_kobj) \ 181 container_of(_kobj, struct iommu_group, kobj) 182 183 static LIST_HEAD(iommu_device_list); 184 static DEFINE_SPINLOCK(iommu_device_lock); 185 186 static const struct bus_type * const iommu_buses[] = { 187 &platform_bus_type, 188 #ifdef CONFIG_PCI 189 &pci_bus_type, 190 #endif 191 #ifdef CONFIG_ARM_AMBA 192 &amba_bustype, 193 #endif 194 #ifdef CONFIG_FSL_MC_BUS 195 &fsl_mc_bus_type, 196 #endif 197 #ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS 198 &host1x_context_device_bus_type, 199 #endif 200 #ifdef CONFIG_CDX_BUS 201 &cdx_bus_type, 202 #endif 203 }; 204 205 /* 206 * Use a function instead of an array here because the domain-type is a 207 * bit-field, so an array would waste memory. 208 */ 209 static const char *iommu_domain_type_str(unsigned int t) 210 { 211 switch (t) { 212 case IOMMU_DOMAIN_BLOCKED: 213 return "Blocked"; 214 case IOMMU_DOMAIN_IDENTITY: 215 return "Passthrough"; 216 case IOMMU_DOMAIN_UNMANAGED: 217 return "Unmanaged"; 218 case IOMMU_DOMAIN_DMA: 219 case IOMMU_DOMAIN_DMA_FQ: 220 return "Translated"; 221 case IOMMU_DOMAIN_PLATFORM: 222 return "Platform"; 223 default: 224 return "Unknown"; 225 } 226 } 227 228 static int __init iommu_subsys_init(void) 229 { 230 struct notifier_block *nb; 231 232 if (!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API)) { 233 if (IS_ENABLED(CONFIG_IOMMU_DEFAULT_PASSTHROUGH)) 234 iommu_set_default_passthrough(false); 235 else 236 iommu_set_default_translated(false); 237 238 if (iommu_default_passthrough() && cc_platform_has(CC_ATTR_MEM_ENCRYPT)) { 239 pr_info("Memory encryption detected - Disabling default IOMMU Passthrough\n"); 240 iommu_set_default_translated(false); 241 } 242 } 243 244 if (!iommu_default_passthrough() && !iommu_dma_strict) 245 iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ; 246 247 pr_info("Default domain type: %s%s\n", 248 iommu_domain_type_str(iommu_def_domain_type), 249 (iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ? 250 " (set via kernel command line)" : ""); 251 252 if (!iommu_default_passthrough()) 253 pr_info("DMA domain TLB invalidation policy: %s mode%s\n", 254 iommu_dma_strict ? "strict" : "lazy", 255 (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ? 256 " (set via kernel command line)" : ""); 257 258 nb = kzalloc_objs(*nb, ARRAY_SIZE(iommu_buses)); 259 if (!nb) 260 return -ENOMEM; 261 262 iommu_debug_init(); 263 264 for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) { 265 nb[i].notifier_call = iommu_bus_notifier; 266 bus_register_notifier(iommu_buses[i], &nb[i]); 267 } 268 269 return 0; 270 } 271 subsys_initcall(iommu_subsys_init); 272 273 static int remove_iommu_group(struct device *dev, void *data) 274 { 275 if (dev->iommu && dev->iommu->iommu_dev == data) 276 iommu_release_device(dev); 277 278 return 0; 279 } 280 281 /** 282 * iommu_device_register() - Register an IOMMU hardware instance 283 * @iommu: IOMMU handle for the instance 284 * @ops: IOMMU ops to associate with the instance 285 * @hwdev: (optional) actual instance device, used for fwnode lookup 286 * 287 * Return: 0 on success, or an error. 288 */ 289 int iommu_device_register(struct iommu_device *iommu, 290 const struct iommu_ops *ops, struct device *hwdev) 291 { 292 int err = 0; 293 294 /* We need to be able to take module references appropriately */ 295 if (WARN_ON(is_module_address((unsigned long)ops) && !ops->owner)) 296 return -EINVAL; 297 298 iommu->ops = ops; 299 if (hwdev) 300 iommu->fwnode = dev_fwnode(hwdev); 301 302 spin_lock(&iommu_device_lock); 303 list_add_tail(&iommu->list, &iommu_device_list); 304 spin_unlock(&iommu_device_lock); 305 306 for (int i = 0; i < ARRAY_SIZE(iommu_buses) && !err; i++) 307 err = bus_iommu_probe(iommu_buses[i]); 308 if (err) 309 iommu_device_unregister(iommu); 310 else 311 WRITE_ONCE(iommu->ready, true); 312 return err; 313 } 314 EXPORT_SYMBOL_GPL(iommu_device_register); 315 316 void iommu_device_unregister(struct iommu_device *iommu) 317 { 318 for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) 319 bus_for_each_dev(iommu_buses[i], NULL, iommu, remove_iommu_group); 320 321 spin_lock(&iommu_device_lock); 322 list_del(&iommu->list); 323 spin_unlock(&iommu_device_lock); 324 325 /* Pairs with the alloc in generic_single_device_group() */ 326 iommu_group_put(iommu->singleton_group); 327 iommu->singleton_group = NULL; 328 } 329 EXPORT_SYMBOL_GPL(iommu_device_unregister); 330 331 #if IS_ENABLED(CONFIG_IOMMUFD_TEST) 332 void iommu_device_unregister_bus(struct iommu_device *iommu, 333 const struct bus_type *bus, 334 struct notifier_block *nb) 335 { 336 bus_unregister_notifier(bus, nb); 337 fwnode_remove_software_node(iommu->fwnode); 338 iommu_device_unregister(iommu); 339 } 340 EXPORT_SYMBOL_GPL(iommu_device_unregister_bus); 341 342 /* 343 * Register an iommu driver against a single bus. This is only used by iommufd 344 * selftest to create a mock iommu driver. The caller must provide 345 * some memory to hold a notifier_block. 346 */ 347 int iommu_device_register_bus(struct iommu_device *iommu, 348 const struct iommu_ops *ops, 349 const struct bus_type *bus, 350 struct notifier_block *nb) 351 { 352 int err; 353 354 iommu->ops = ops; 355 nb->notifier_call = iommu_bus_notifier; 356 err = bus_register_notifier(bus, nb); 357 if (err) 358 return err; 359 360 iommu->fwnode = fwnode_create_software_node(NULL, NULL); 361 if (IS_ERR(iommu->fwnode)) { 362 bus_unregister_notifier(bus, nb); 363 return PTR_ERR(iommu->fwnode); 364 } 365 366 spin_lock(&iommu_device_lock); 367 list_add_tail(&iommu->list, &iommu_device_list); 368 spin_unlock(&iommu_device_lock); 369 370 err = bus_iommu_probe(bus); 371 if (err) { 372 iommu_device_unregister_bus(iommu, bus, nb); 373 return err; 374 } 375 WRITE_ONCE(iommu->ready, true); 376 return 0; 377 } 378 EXPORT_SYMBOL_GPL(iommu_device_register_bus); 379 380 int iommu_mock_device_add(struct device *dev, struct iommu_device *iommu) 381 { 382 int rc; 383 384 mutex_lock(&iommu_probe_device_lock); 385 rc = iommu_fwspec_init(dev, iommu->fwnode); 386 mutex_unlock(&iommu_probe_device_lock); 387 388 if (rc) 389 return rc; 390 391 rc = device_add(dev); 392 if (rc) 393 iommu_fwspec_free(dev); 394 return rc; 395 } 396 EXPORT_SYMBOL_GPL(iommu_mock_device_add); 397 #endif 398 399 static struct dev_iommu *dev_iommu_get(struct device *dev) 400 { 401 struct dev_iommu *param = dev->iommu; 402 403 lockdep_assert_held(&iommu_probe_device_lock); 404 405 if (param) 406 return param; 407 408 param = kzalloc_obj(*param); 409 if (!param) 410 return NULL; 411 412 mutex_init(¶m->lock); 413 dev->iommu = param; 414 return param; 415 } 416 417 void dev_iommu_free(struct device *dev) 418 { 419 struct dev_iommu *param = dev->iommu; 420 421 dev->iommu = NULL; 422 if (param->fwspec) { 423 fwnode_handle_put(param->fwspec->iommu_fwnode); 424 kfree(param->fwspec); 425 } 426 kfree(param); 427 } 428 429 /* 430 * Internal equivalent of device_iommu_mapped() for when we care that a device 431 * actually has API ops, and don't want false positives from VFIO-only groups. 432 */ 433 static bool dev_has_iommu(struct device *dev) 434 { 435 return dev->iommu && dev->iommu->iommu_dev; 436 } 437 438 static u32 dev_iommu_get_max_pasids(struct device *dev) 439 { 440 u32 max_pasids = 0, bits = 0; 441 int ret; 442 443 if (dev_is_pci(dev)) { 444 ret = pci_max_pasids(to_pci_dev(dev)); 445 if (ret > 0) 446 max_pasids = ret; 447 } else { 448 ret = device_property_read_u32(dev, "pasid-num-bits", &bits); 449 if (!ret) 450 max_pasids = 1UL << bits; 451 } 452 453 return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids); 454 } 455 456 void dev_iommu_priv_set(struct device *dev, void *priv) 457 { 458 /* FSL_PAMU does something weird */ 459 if (!IS_ENABLED(CONFIG_FSL_PAMU)) 460 lockdep_assert_held(&iommu_probe_device_lock); 461 dev->iommu->priv = priv; 462 } 463 EXPORT_SYMBOL_GPL(dev_iommu_priv_set); 464 465 /* 466 * Init the dev->iommu and dev->iommu_group in the struct device and get the 467 * driver probed 468 */ 469 static int iommu_init_device(struct device *dev) 470 { 471 const struct iommu_ops *ops; 472 struct iommu_device *iommu_dev; 473 struct iommu_group *group; 474 int ret; 475 476 if (!dev_iommu_get(dev)) 477 return -ENOMEM; 478 /* 479 * For FDT-based systems and ACPI IORT/VIOT, the common firmware parsing 480 * is buried in the bus dma_configure path. Properly unpicking that is 481 * still a big job, so for now just invoke the whole thing. The device 482 * already having a driver bound means dma_configure has already run and 483 * found no IOMMU to wait for, so there's no point calling it again. 484 */ 485 if (!dev->iommu->fwspec && !dev->driver && dev->bus->dma_configure) { 486 mutex_unlock(&iommu_probe_device_lock); 487 dev->bus->dma_configure(dev); 488 mutex_lock(&iommu_probe_device_lock); 489 /* If another instance finished the job for us, skip it */ 490 if (!dev->iommu || dev->iommu_group) 491 return -ENODEV; 492 } 493 /* 494 * At this point, relevant devices either now have a fwspec which will 495 * match ops registered with a non-NULL fwnode, or we can reasonably 496 * assume that only one of Intel, AMD, s390, PAMU or legacy SMMUv2 can 497 * be present, and that any of their registered instances has suitable 498 * ops for probing, and thus cheekily co-opt the same mechanism. 499 */ 500 ops = iommu_fwspec_ops(dev->iommu->fwspec); 501 if (!ops) { 502 ret = -ENODEV; 503 goto err_free; 504 } 505 506 if (!try_module_get(ops->owner)) { 507 ret = -EINVAL; 508 goto err_free; 509 } 510 511 iommu_dev = ops->probe_device(dev); 512 if (IS_ERR(iommu_dev)) { 513 ret = PTR_ERR(iommu_dev); 514 goto err_module_put; 515 } 516 dev->iommu->iommu_dev = iommu_dev; 517 518 ret = iommu_device_link(iommu_dev, dev); 519 if (ret) 520 goto err_release; 521 522 group = ops->device_group(dev); 523 if (WARN_ON_ONCE(group == NULL)) 524 group = ERR_PTR(-EINVAL); 525 if (IS_ERR(group)) { 526 ret = PTR_ERR(group); 527 goto err_unlink; 528 } 529 dev->iommu_group = group; 530 531 dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev); 532 if (ops->is_attach_deferred) 533 dev->iommu->attach_deferred = ops->is_attach_deferred(dev); 534 return 0; 535 536 err_unlink: 537 iommu_device_unlink(iommu_dev, dev); 538 err_release: 539 if (ops->release_device) 540 ops->release_device(dev); 541 err_module_put: 542 module_put(ops->owner); 543 err_free: 544 dev->iommu->iommu_dev = NULL; 545 dev_iommu_free(dev); 546 return ret; 547 } 548 549 static void iommu_deinit_device(struct device *dev) 550 { 551 struct iommu_group *group = dev->iommu_group; 552 const struct iommu_ops *ops = dev_iommu_ops(dev); 553 554 lockdep_assert_held(&group->mutex); 555 556 iommu_device_unlink(dev->iommu->iommu_dev, dev); 557 558 /* 559 * release_device() must stop using any attached domain on the device. 560 * If there are still other devices in the group, they are not affected 561 * by this callback. 562 * 563 * If the iommu driver provides release_domain, the core code ensures 564 * that domain is attached prior to calling release_device. Drivers can 565 * use this to enforce a translation on the idle iommu. Typically, the 566 * global static blocked_domain is a good choice. 567 * 568 * Otherwise, the iommu driver must set the device to either an identity 569 * or a blocking translation in release_device() and stop using any 570 * domain pointer, as it is going to be freed. 571 * 572 * Regardless, if a delayed attach never occurred, then the release 573 * should still avoid touching any hardware configuration either. 574 */ 575 if (!dev->iommu->attach_deferred && ops->release_domain) { 576 struct iommu_domain *release_domain = ops->release_domain; 577 578 /* 579 * If the device requires direct mappings then it should not 580 * be parked on a BLOCKED domain during release as that would 581 * break the direct mappings. 582 */ 583 if (dev->iommu->require_direct && ops->identity_domain && 584 release_domain == ops->blocked_domain) 585 release_domain = ops->identity_domain; 586 587 release_domain->ops->attach_dev(release_domain, dev, 588 group->domain); 589 } 590 591 if (ops->release_device) 592 ops->release_device(dev); 593 594 /* 595 * If this is the last driver to use the group then we must free the 596 * domains before we do the module_put(). 597 */ 598 if (list_empty(&group->devices)) { 599 if (group->default_domain) { 600 iommu_domain_free(group->default_domain); 601 group->default_domain = NULL; 602 } 603 if (group->blocking_domain) { 604 iommu_domain_free(group->blocking_domain); 605 group->blocking_domain = NULL; 606 } 607 group->domain = NULL; 608 } 609 610 /* Caller must put iommu_group */ 611 dev->iommu_group = NULL; 612 module_put(ops->owner); 613 dev_iommu_free(dev); 614 if (IS_ENABLED(CONFIG_IOMMU_DMA)) 615 dev_clear_dma_iommu(dev); 616 } 617 618 static struct iommu_domain *pasid_array_entry_to_domain(void *entry) 619 { 620 if (xa_pointer_tag(entry) == IOMMU_PASID_ARRAY_DOMAIN) 621 return xa_untag_pointer(entry); 622 return ((struct iommu_attach_handle *)xa_untag_pointer(entry))->domain; 623 } 624 625 DEFINE_MUTEX(iommu_probe_device_lock); 626 627 static int __iommu_probe_device(struct device *dev, struct list_head *group_list) 628 { 629 struct iommu_group *group; 630 struct group_device *gdev; 631 int ret; 632 633 /* 634 * Serialise to avoid races between IOMMU drivers registering in 635 * parallel and/or the "replay" calls from ACPI/OF code via client 636 * driver probe. Once the latter have been cleaned up we should 637 * probably be able to use device_lock() here to minimise the scope, 638 * but for now enforcing a simple global ordering is fine. 639 */ 640 lockdep_assert_held(&iommu_probe_device_lock); 641 642 /* Device is probed already if in a group */ 643 if (dev->iommu_group) 644 return 0; 645 646 ret = iommu_init_device(dev); 647 if (ret) 648 return ret; 649 /* 650 * And if we do now see any replay calls, they would indicate someone 651 * misusing the dma_configure path outside bus code. 652 */ 653 if (dev->driver) 654 dev_WARN(dev, "late IOMMU probe at driver bind, something fishy here!\n"); 655 656 group = dev->iommu_group; 657 gdev = iommu_group_alloc_device(group, dev); 658 mutex_lock(&group->mutex); 659 if (IS_ERR(gdev)) { 660 ret = PTR_ERR(gdev); 661 goto err_put_group; 662 } 663 664 /* 665 * The gdev must be in the list before calling 666 * iommu_setup_default_domain() 667 */ 668 list_add_tail(&gdev->list, &group->devices); 669 WARN_ON(group->default_domain && !group->domain); 670 if (group->default_domain) 671 iommu_create_device_direct_mappings(group->default_domain, dev); 672 if (group->domain) { 673 ret = __iommu_device_set_domain(group, dev, group->domain, NULL, 674 0); 675 if (ret) 676 goto err_remove_gdev; 677 } else if (!group->default_domain && !group_list) { 678 ret = iommu_setup_default_domain(group, 0); 679 if (ret) 680 goto err_remove_gdev; 681 } else if (!group->default_domain) { 682 /* 683 * With a group_list argument we defer the default_domain setup 684 * to the caller by providing a de-duplicated list of groups 685 * that need further setup. 686 */ 687 if (list_empty(&group->entry)) 688 list_add_tail(&group->entry, group_list); 689 } 690 691 if (group->default_domain) 692 iommu_setup_dma_ops(dev, group->default_domain); 693 694 mutex_unlock(&group->mutex); 695 696 return 0; 697 698 err_remove_gdev: 699 list_del(&gdev->list); 700 __iommu_group_free_device(group, gdev); 701 err_put_group: 702 iommu_deinit_device(dev); 703 mutex_unlock(&group->mutex); 704 iommu_group_put(group); 705 706 return ret; 707 } 708 709 int iommu_probe_device(struct device *dev) 710 { 711 const struct iommu_ops *ops; 712 int ret; 713 714 mutex_lock(&iommu_probe_device_lock); 715 ret = __iommu_probe_device(dev, NULL); 716 mutex_unlock(&iommu_probe_device_lock); 717 if (ret) 718 return ret; 719 720 ops = dev_iommu_ops(dev); 721 if (ops->probe_finalize) 722 ops->probe_finalize(dev); 723 724 return 0; 725 } 726 727 static void __iommu_group_free_device(struct iommu_group *group, 728 struct group_device *grp_dev) 729 { 730 struct device *dev = grp_dev->dev; 731 732 sysfs_remove_link(group->devices_kobj, grp_dev->name); 733 sysfs_remove_link(&dev->kobj, "iommu_group"); 734 735 trace_remove_device_from_group(group->id, dev); 736 737 /* 738 * If the group has become empty then ownership must have been 739 * released, and the current domain must be set back to NULL or 740 * the default domain. 741 */ 742 if (list_empty(&group->devices)) 743 WARN_ON(group->owner_cnt || 744 group->domain != group->default_domain); 745 746 kfree(grp_dev->name); 747 kfree(grp_dev); 748 } 749 750 /* Remove the iommu_group from the struct device. */ 751 static void __iommu_group_remove_device(struct device *dev) 752 { 753 struct iommu_group *group = dev->iommu_group; 754 struct group_device *device; 755 756 mutex_lock(&group->mutex); 757 for_each_group_device(group, device) { 758 if (device->dev != dev) 759 continue; 760 761 list_del(&device->list); 762 __iommu_group_free_device(group, device); 763 if (dev_has_iommu(dev)) 764 iommu_deinit_device(dev); 765 else 766 dev->iommu_group = NULL; 767 break; 768 } 769 mutex_unlock(&group->mutex); 770 771 /* 772 * Pairs with the get in iommu_init_device() or 773 * iommu_group_add_device() 774 */ 775 iommu_group_put(group); 776 } 777 778 static void iommu_release_device(struct device *dev) 779 { 780 struct iommu_group *group = dev->iommu_group; 781 782 if (group) 783 __iommu_group_remove_device(dev); 784 785 /* Free any fwspec if no iommu_driver was ever attached */ 786 if (dev->iommu) 787 dev_iommu_free(dev); 788 } 789 790 static int __init iommu_set_def_domain_type(char *str) 791 { 792 bool pt; 793 int ret; 794 795 ret = kstrtobool(str, &pt); 796 if (ret) 797 return ret; 798 799 if (pt) 800 iommu_set_default_passthrough(true); 801 else 802 iommu_set_default_translated(true); 803 804 return 0; 805 } 806 early_param("iommu.passthrough", iommu_set_def_domain_type); 807 808 static int __init iommu_dma_setup(char *str) 809 { 810 int ret = kstrtobool(str, &iommu_dma_strict); 811 812 if (!ret) 813 iommu_cmd_line |= IOMMU_CMD_LINE_STRICT; 814 return ret; 815 } 816 early_param("iommu.strict", iommu_dma_setup); 817 818 void iommu_set_dma_strict(void) 819 { 820 iommu_dma_strict = true; 821 if (iommu_def_domain_type == IOMMU_DOMAIN_DMA_FQ) 822 iommu_def_domain_type = IOMMU_DOMAIN_DMA; 823 } 824 825 static ssize_t iommu_group_attr_show(struct kobject *kobj, 826 struct attribute *__attr, char *buf) 827 { 828 struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); 829 struct iommu_group *group = to_iommu_group(kobj); 830 ssize_t ret = -EIO; 831 832 if (attr->show) 833 ret = attr->show(group, buf); 834 return ret; 835 } 836 837 static ssize_t iommu_group_attr_store(struct kobject *kobj, 838 struct attribute *__attr, 839 const char *buf, size_t count) 840 { 841 struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); 842 struct iommu_group *group = to_iommu_group(kobj); 843 ssize_t ret = -EIO; 844 845 if (attr->store) 846 ret = attr->store(group, buf, count); 847 return ret; 848 } 849 850 static const struct sysfs_ops iommu_group_sysfs_ops = { 851 .show = iommu_group_attr_show, 852 .store = iommu_group_attr_store, 853 }; 854 855 static int iommu_group_create_file(struct iommu_group *group, 856 struct iommu_group_attribute *attr) 857 { 858 return sysfs_create_file(&group->kobj, &attr->attr); 859 } 860 861 static void iommu_group_remove_file(struct iommu_group *group, 862 struct iommu_group_attribute *attr) 863 { 864 sysfs_remove_file(&group->kobj, &attr->attr); 865 } 866 867 static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf) 868 { 869 return sysfs_emit(buf, "%s\n", group->name); 870 } 871 872 /** 873 * iommu_insert_resv_region - Insert a new region in the 874 * list of reserved regions. 875 * @new: new region to insert 876 * @regions: list of regions 877 * 878 * Elements are sorted by start address and overlapping segments 879 * of the same type are merged. 880 */ 881 static int iommu_insert_resv_region(struct iommu_resv_region *new, 882 struct list_head *regions) 883 { 884 struct iommu_resv_region *iter, *tmp, *nr, *top; 885 LIST_HEAD(stack); 886 887 nr = iommu_alloc_resv_region(new->start, new->length, 888 new->prot, new->type, GFP_KERNEL); 889 if (!nr) 890 return -ENOMEM; 891 892 /* First add the new element based on start address sorting */ 893 list_for_each_entry(iter, regions, list) { 894 if (nr->start < iter->start || 895 (nr->start == iter->start && nr->type <= iter->type)) 896 break; 897 } 898 list_add_tail(&nr->list, &iter->list); 899 900 /* Merge overlapping segments of type nr->type in @regions, if any */ 901 list_for_each_entry_safe(iter, tmp, regions, list) { 902 phys_addr_t top_end, iter_end = iter->start + iter->length - 1; 903 904 /* no merge needed on elements of different types than @new */ 905 if (iter->type != new->type) { 906 list_move_tail(&iter->list, &stack); 907 continue; 908 } 909 910 /* look for the last stack element of same type as @iter */ 911 list_for_each_entry_reverse(top, &stack, list) 912 if (top->type == iter->type) 913 goto check_overlap; 914 915 list_move_tail(&iter->list, &stack); 916 continue; 917 918 check_overlap: 919 top_end = top->start + top->length - 1; 920 921 if (iter->start > top_end + 1) { 922 list_move_tail(&iter->list, &stack); 923 } else { 924 top->length = max(top_end, iter_end) - top->start + 1; 925 list_del(&iter->list); 926 kfree(iter); 927 } 928 } 929 list_splice(&stack, regions); 930 return 0; 931 } 932 933 static int 934 iommu_insert_device_resv_regions(struct list_head *dev_resv_regions, 935 struct list_head *group_resv_regions) 936 { 937 struct iommu_resv_region *entry; 938 int ret = 0; 939 940 list_for_each_entry(entry, dev_resv_regions, list) { 941 ret = iommu_insert_resv_region(entry, group_resv_regions); 942 if (ret) 943 break; 944 } 945 return ret; 946 } 947 948 int iommu_get_group_resv_regions(struct iommu_group *group, 949 struct list_head *head) 950 { 951 struct group_device *device; 952 int ret = 0; 953 954 mutex_lock(&group->mutex); 955 for_each_group_device(group, device) { 956 struct list_head dev_resv_regions; 957 958 /* 959 * Non-API groups still expose reserved_regions in sysfs, 960 * so filter out calls that get here that way. 961 */ 962 if (!dev_has_iommu(device->dev)) 963 break; 964 965 INIT_LIST_HEAD(&dev_resv_regions); 966 iommu_get_resv_regions(device->dev, &dev_resv_regions); 967 ret = iommu_insert_device_resv_regions(&dev_resv_regions, head); 968 iommu_put_resv_regions(device->dev, &dev_resv_regions); 969 if (ret) 970 break; 971 } 972 mutex_unlock(&group->mutex); 973 return ret; 974 } 975 EXPORT_SYMBOL_GPL(iommu_get_group_resv_regions); 976 977 static ssize_t iommu_group_show_resv_regions(struct iommu_group *group, 978 char *buf) 979 { 980 struct iommu_resv_region *region, *next; 981 struct list_head group_resv_regions; 982 int offset = 0; 983 984 INIT_LIST_HEAD(&group_resv_regions); 985 iommu_get_group_resv_regions(group, &group_resv_regions); 986 987 list_for_each_entry_safe(region, next, &group_resv_regions, list) { 988 offset += sysfs_emit_at(buf, offset, "0x%016llx 0x%016llx %s\n", 989 (long long)region->start, 990 (long long)(region->start + 991 region->length - 1), 992 iommu_group_resv_type_string[region->type]); 993 kfree(region); 994 } 995 996 return offset; 997 } 998 999 static ssize_t iommu_group_show_type(struct iommu_group *group, 1000 char *buf) 1001 { 1002 char *type = "unknown"; 1003 1004 mutex_lock(&group->mutex); 1005 if (group->default_domain) { 1006 switch (group->default_domain->type) { 1007 case IOMMU_DOMAIN_BLOCKED: 1008 type = "blocked"; 1009 break; 1010 case IOMMU_DOMAIN_IDENTITY: 1011 type = "identity"; 1012 break; 1013 case IOMMU_DOMAIN_UNMANAGED: 1014 type = "unmanaged"; 1015 break; 1016 case IOMMU_DOMAIN_DMA: 1017 type = "DMA"; 1018 break; 1019 case IOMMU_DOMAIN_DMA_FQ: 1020 type = "DMA-FQ"; 1021 break; 1022 } 1023 } 1024 mutex_unlock(&group->mutex); 1025 1026 return sysfs_emit(buf, "%s\n", type); 1027 } 1028 1029 static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL); 1030 1031 static IOMMU_GROUP_ATTR(reserved_regions, 0444, 1032 iommu_group_show_resv_regions, NULL); 1033 1034 static IOMMU_GROUP_ATTR(type, 0644, iommu_group_show_type, 1035 iommu_group_store_type); 1036 1037 static void iommu_group_release(struct kobject *kobj) 1038 { 1039 struct iommu_group *group = to_iommu_group(kobj); 1040 1041 pr_debug("Releasing group %d\n", group->id); 1042 1043 if (group->iommu_data_release) 1044 group->iommu_data_release(group->iommu_data); 1045 1046 ida_free(&iommu_group_ida, group->id); 1047 1048 /* Domains are free'd by iommu_deinit_device() */ 1049 WARN_ON(group->default_domain); 1050 WARN_ON(group->blocking_domain); 1051 1052 kfree(group->name); 1053 kfree(group); 1054 } 1055 1056 static const struct kobj_type iommu_group_ktype = { 1057 .sysfs_ops = &iommu_group_sysfs_ops, 1058 .release = iommu_group_release, 1059 }; 1060 1061 /** 1062 * iommu_group_alloc - Allocate a new group 1063 * 1064 * This function is called by an iommu driver to allocate a new iommu 1065 * group. The iommu group represents the minimum granularity of the iommu. 1066 * Upon successful return, the caller holds a reference to the supplied 1067 * group in order to hold the group until devices are added. Use 1068 * iommu_group_put() to release this extra reference count, allowing the 1069 * group to be automatically reclaimed once it has no devices or external 1070 * references. 1071 */ 1072 struct iommu_group *iommu_group_alloc(void) 1073 { 1074 struct iommu_group *group; 1075 int ret; 1076 1077 group = kzalloc_obj(*group); 1078 if (!group) 1079 return ERR_PTR(-ENOMEM); 1080 1081 group->kobj.kset = iommu_group_kset; 1082 mutex_init(&group->mutex); 1083 INIT_LIST_HEAD(&group->devices); 1084 INIT_LIST_HEAD(&group->entry); 1085 xa_init(&group->pasid_array); 1086 1087 ret = ida_alloc(&iommu_group_ida, GFP_KERNEL); 1088 if (ret < 0) { 1089 kfree(group); 1090 return ERR_PTR(ret); 1091 } 1092 group->id = ret; 1093 1094 ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype, 1095 NULL, "%d", group->id); 1096 if (ret) { 1097 kobject_put(&group->kobj); 1098 return ERR_PTR(ret); 1099 } 1100 1101 group->devices_kobj = kobject_create_and_add("devices", &group->kobj); 1102 if (!group->devices_kobj) { 1103 kobject_put(&group->kobj); /* triggers .release & free */ 1104 return ERR_PTR(-ENOMEM); 1105 } 1106 1107 /* 1108 * The devices_kobj holds a reference on the group kobject, so 1109 * as long as that exists so will the group. We can therefore 1110 * use the devices_kobj for reference counting. 1111 */ 1112 kobject_put(&group->kobj); 1113 1114 ret = iommu_group_create_file(group, 1115 &iommu_group_attr_reserved_regions); 1116 if (ret) { 1117 kobject_put(group->devices_kobj); 1118 return ERR_PTR(ret); 1119 } 1120 1121 ret = iommu_group_create_file(group, &iommu_group_attr_type); 1122 if (ret) { 1123 kobject_put(group->devices_kobj); 1124 return ERR_PTR(ret); 1125 } 1126 1127 pr_debug("Allocated group %d\n", group->id); 1128 1129 return group; 1130 } 1131 EXPORT_SYMBOL_GPL(iommu_group_alloc); 1132 1133 /** 1134 * iommu_group_get_iommudata - retrieve iommu_data registered for a group 1135 * @group: the group 1136 * 1137 * iommu drivers can store data in the group for use when doing iommu 1138 * operations. This function provides a way to retrieve it. Caller 1139 * should hold a group reference. 1140 */ 1141 void *iommu_group_get_iommudata(struct iommu_group *group) 1142 { 1143 return group->iommu_data; 1144 } 1145 EXPORT_SYMBOL_GPL(iommu_group_get_iommudata); 1146 1147 /** 1148 * iommu_group_set_iommudata - set iommu_data for a group 1149 * @group: the group 1150 * @iommu_data: new data 1151 * @release: release function for iommu_data 1152 * 1153 * iommu drivers can store data in the group for use when doing iommu 1154 * operations. This function provides a way to set the data after 1155 * the group has been allocated. Caller should hold a group reference. 1156 */ 1157 void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data, 1158 void (*release)(void *iommu_data)) 1159 { 1160 group->iommu_data = iommu_data; 1161 group->iommu_data_release = release; 1162 } 1163 EXPORT_SYMBOL_GPL(iommu_group_set_iommudata); 1164 1165 /** 1166 * iommu_group_set_name - set name for a group 1167 * @group: the group 1168 * @name: name 1169 * 1170 * Allow iommu driver to set a name for a group. When set it will 1171 * appear in a name attribute file under the group in sysfs. 1172 */ 1173 int iommu_group_set_name(struct iommu_group *group, const char *name) 1174 { 1175 int ret; 1176 1177 if (group->name) { 1178 iommu_group_remove_file(group, &iommu_group_attr_name); 1179 kfree(group->name); 1180 group->name = NULL; 1181 if (!name) 1182 return 0; 1183 } 1184 1185 group->name = kstrdup(name, GFP_KERNEL); 1186 if (!group->name) 1187 return -ENOMEM; 1188 1189 ret = iommu_group_create_file(group, &iommu_group_attr_name); 1190 if (ret) { 1191 kfree(group->name); 1192 group->name = NULL; 1193 return ret; 1194 } 1195 1196 return 0; 1197 } 1198 EXPORT_SYMBOL_GPL(iommu_group_set_name); 1199 1200 static int iommu_create_device_direct_mappings(struct iommu_domain *domain, 1201 struct device *dev) 1202 { 1203 struct iommu_resv_region *entry; 1204 LIST_HEAD(mappings); 1205 unsigned long pg_size; 1206 int ret = 0; 1207 1208 pg_size = domain->pgsize_bitmap ? 1UL << __ffs(domain->pgsize_bitmap) : 0; 1209 1210 if (WARN_ON_ONCE(iommu_is_dma_domain(domain) && !pg_size)) 1211 return -EINVAL; 1212 1213 iommu_get_resv_regions(dev, &mappings); 1214 1215 /* We need to consider overlapping regions for different devices */ 1216 list_for_each_entry(entry, &mappings, list) { 1217 dma_addr_t start, end, addr; 1218 size_t map_size = 0; 1219 1220 if (entry->type == IOMMU_RESV_DIRECT) 1221 dev->iommu->require_direct = 1; 1222 1223 if ((entry->type != IOMMU_RESV_DIRECT && 1224 entry->type != IOMMU_RESV_DIRECT_RELAXABLE) || 1225 !iommu_is_dma_domain(domain)) 1226 continue; 1227 1228 start = ALIGN(entry->start, pg_size); 1229 end = ALIGN(entry->start + entry->length, pg_size); 1230 1231 for (addr = start; addr <= end; addr += pg_size) { 1232 phys_addr_t phys_addr; 1233 1234 if (addr == end) 1235 goto map_end; 1236 1237 /* 1238 * Return address by iommu_iova_to_phys for 0 is 1239 * ambiguous. Offset to address 1 if addr is 0. 1240 */ 1241 phys_addr = iommu_iova_to_phys(domain, addr ? addr : 1); 1242 if (!phys_addr) { 1243 map_size += pg_size; 1244 continue; 1245 } 1246 1247 map_end: 1248 if (map_size) { 1249 ret = iommu_map(domain, addr - map_size, 1250 addr - map_size, map_size, 1251 entry->prot, GFP_KERNEL); 1252 if (ret) 1253 goto out; 1254 map_size = 0; 1255 } 1256 } 1257 1258 } 1259 out: 1260 iommu_put_resv_regions(dev, &mappings); 1261 1262 return ret; 1263 } 1264 1265 /* This is undone by __iommu_group_free_device() */ 1266 static struct group_device *iommu_group_alloc_device(struct iommu_group *group, 1267 struct device *dev) 1268 { 1269 int ret, i = 0; 1270 struct group_device *device; 1271 1272 device = kzalloc_obj(*device); 1273 if (!device) 1274 return ERR_PTR(-ENOMEM); 1275 1276 device->dev = dev; 1277 1278 ret = sysfs_create_link(&dev->kobj, &group->kobj, "iommu_group"); 1279 if (ret) 1280 goto err_free_device; 1281 1282 device->name = kasprintf(GFP_KERNEL, "%s", kobject_name(&dev->kobj)); 1283 rename: 1284 if (!device->name) { 1285 ret = -ENOMEM; 1286 goto err_remove_link; 1287 } 1288 1289 ret = sysfs_create_link_nowarn(group->devices_kobj, 1290 &dev->kobj, device->name); 1291 if (ret) { 1292 if (ret == -EEXIST && i >= 0) { 1293 /* 1294 * Account for the slim chance of collision 1295 * and append an instance to the name. 1296 */ 1297 kfree(device->name); 1298 device->name = kasprintf(GFP_KERNEL, "%s.%d", 1299 kobject_name(&dev->kobj), i++); 1300 goto rename; 1301 } 1302 goto err_free_name; 1303 } 1304 1305 trace_add_device_to_group(group->id, dev); 1306 1307 dev_info(dev, "Adding to iommu group %d\n", group->id); 1308 1309 return device; 1310 1311 err_free_name: 1312 kfree(device->name); 1313 err_remove_link: 1314 sysfs_remove_link(&dev->kobj, "iommu_group"); 1315 err_free_device: 1316 kfree(device); 1317 dev_err(dev, "Failed to add to iommu group %d: %d\n", group->id, ret); 1318 return ERR_PTR(ret); 1319 } 1320 1321 /** 1322 * iommu_group_add_device - add a device to an iommu group 1323 * @group: the group into which to add the device (reference should be held) 1324 * @dev: the device 1325 * 1326 * This function is called by an iommu driver to add a device into a 1327 * group. Adding a device increments the group reference count. 1328 */ 1329 int iommu_group_add_device(struct iommu_group *group, struct device *dev) 1330 { 1331 struct group_device *gdev; 1332 1333 gdev = iommu_group_alloc_device(group, dev); 1334 if (IS_ERR(gdev)) 1335 return PTR_ERR(gdev); 1336 1337 iommu_group_ref_get(group); 1338 dev->iommu_group = group; 1339 1340 mutex_lock(&group->mutex); 1341 list_add_tail(&gdev->list, &group->devices); 1342 mutex_unlock(&group->mutex); 1343 return 0; 1344 } 1345 EXPORT_SYMBOL_GPL(iommu_group_add_device); 1346 1347 /** 1348 * iommu_group_remove_device - remove a device from it's current group 1349 * @dev: device to be removed 1350 * 1351 * This function is called by an iommu driver to remove the device from 1352 * it's current group. This decrements the iommu group reference count. 1353 */ 1354 void iommu_group_remove_device(struct device *dev) 1355 { 1356 struct iommu_group *group = dev->iommu_group; 1357 1358 if (!group) 1359 return; 1360 1361 dev_info(dev, "Removing from iommu group %d\n", group->id); 1362 1363 __iommu_group_remove_device(dev); 1364 } 1365 EXPORT_SYMBOL_GPL(iommu_group_remove_device); 1366 1367 #if IS_ENABLED(CONFIG_LOCKDEP) && IS_ENABLED(CONFIG_IOMMU_API) 1368 /** 1369 * iommu_group_mutex_assert - Check device group mutex lock 1370 * @dev: the device that has group param set 1371 * 1372 * This function is called by an iommu driver to check whether it holds 1373 * group mutex lock for the given device or not. 1374 * 1375 * Note that this function must be called after device group param is set. 1376 */ 1377 void iommu_group_mutex_assert(struct device *dev) 1378 { 1379 struct iommu_group *group = dev->iommu_group; 1380 1381 lockdep_assert_held(&group->mutex); 1382 } 1383 EXPORT_SYMBOL_GPL(iommu_group_mutex_assert); 1384 #endif 1385 1386 static struct device *iommu_group_first_dev(struct iommu_group *group) 1387 { 1388 lockdep_assert_held(&group->mutex); 1389 return list_first_entry(&group->devices, struct group_device, list)->dev; 1390 } 1391 1392 /** 1393 * iommu_group_for_each_dev - iterate over each device in the group 1394 * @group: the group 1395 * @data: caller opaque data to be passed to callback function 1396 * @fn: caller supplied callback function 1397 * 1398 * This function is called by group users to iterate over group devices. 1399 * Callers should hold a reference count to the group during callback. 1400 * The group->mutex is held across callbacks, which will block calls to 1401 * iommu_group_add/remove_device. 1402 */ 1403 int iommu_group_for_each_dev(struct iommu_group *group, void *data, 1404 int (*fn)(struct device *, void *)) 1405 { 1406 struct group_device *device; 1407 int ret = 0; 1408 1409 mutex_lock(&group->mutex); 1410 for_each_group_device(group, device) { 1411 ret = fn(device->dev, data); 1412 if (ret) 1413 break; 1414 } 1415 mutex_unlock(&group->mutex); 1416 1417 return ret; 1418 } 1419 EXPORT_SYMBOL_GPL(iommu_group_for_each_dev); 1420 1421 /** 1422 * iommu_group_get - Return the group for a device and increment reference 1423 * @dev: get the group that this device belongs to 1424 * 1425 * This function is called by iommu drivers and users to get the group 1426 * for the specified device. If found, the group is returned and the group 1427 * reference in incremented, else NULL. 1428 */ 1429 struct iommu_group *iommu_group_get(struct device *dev) 1430 { 1431 struct iommu_group *group = dev->iommu_group; 1432 1433 if (group) 1434 kobject_get(group->devices_kobj); 1435 1436 return group; 1437 } 1438 EXPORT_SYMBOL_GPL(iommu_group_get); 1439 1440 /** 1441 * iommu_group_ref_get - Increment reference on a group 1442 * @group: the group to use, must not be NULL 1443 * 1444 * This function is called by iommu drivers to take additional references on an 1445 * existing group. Returns the given group for convenience. 1446 */ 1447 struct iommu_group *iommu_group_ref_get(struct iommu_group *group) 1448 { 1449 kobject_get(group->devices_kobj); 1450 return group; 1451 } 1452 EXPORT_SYMBOL_GPL(iommu_group_ref_get); 1453 1454 /** 1455 * iommu_group_put - Decrement group reference 1456 * @group: the group to use 1457 * 1458 * This function is called by iommu drivers and users to release the 1459 * iommu group. Once the reference count is zero, the group is released. 1460 */ 1461 void iommu_group_put(struct iommu_group *group) 1462 { 1463 if (group) 1464 kobject_put(group->devices_kobj); 1465 } 1466 EXPORT_SYMBOL_GPL(iommu_group_put); 1467 1468 /** 1469 * iommu_group_id - Return ID for a group 1470 * @group: the group to ID 1471 * 1472 * Return the unique ID for the group matching the sysfs group number. 1473 */ 1474 int iommu_group_id(struct iommu_group *group) 1475 { 1476 return group->id; 1477 } 1478 EXPORT_SYMBOL_GPL(iommu_group_id); 1479 1480 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, 1481 unsigned long *devfns); 1482 1483 /* 1484 * To consider a PCI device isolated, we require ACS to support Source 1485 * Validation, Request Redirection, Completer Redirection, and Upstream 1486 * Forwarding. This effectively means that devices cannot spoof their 1487 * requester ID, requests and completions cannot be redirected, and all 1488 * transactions are forwarded upstream, even as it passes through a 1489 * bridge where the target device is downstream. 1490 */ 1491 #define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) 1492 1493 /* 1494 * For multifunction devices which are not isolated from each other, find 1495 * all the other non-isolated functions and look for existing groups. For 1496 * each function, we also need to look for aliases to or from other devices 1497 * that may already have a group. 1498 */ 1499 static struct iommu_group *get_pci_function_alias_group(struct pci_dev *pdev, 1500 unsigned long *devfns) 1501 { 1502 struct pci_dev *tmp = NULL; 1503 struct iommu_group *group; 1504 1505 if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS)) 1506 return NULL; 1507 1508 for_each_pci_dev(tmp) { 1509 if (tmp == pdev || tmp->bus != pdev->bus || 1510 PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) || 1511 pci_acs_enabled(tmp, REQ_ACS_FLAGS)) 1512 continue; 1513 1514 group = get_pci_alias_group(tmp, devfns); 1515 if (group) { 1516 pci_dev_put(tmp); 1517 return group; 1518 } 1519 } 1520 1521 return NULL; 1522 } 1523 1524 /* 1525 * Look for aliases to or from the given device for existing groups. DMA 1526 * aliases are only supported on the same bus, therefore the search 1527 * space is quite small (especially since we're really only looking at pcie 1528 * device, and therefore only expect multiple slots on the root complex or 1529 * downstream switch ports). It's conceivable though that a pair of 1530 * multifunction devices could have aliases between them that would cause a 1531 * loop. To prevent this, we use a bitmap to track where we've been. 1532 */ 1533 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, 1534 unsigned long *devfns) 1535 { 1536 struct pci_dev *tmp = NULL; 1537 struct iommu_group *group; 1538 1539 if (test_and_set_bit(pdev->devfn & 0xff, devfns)) 1540 return NULL; 1541 1542 group = iommu_group_get(&pdev->dev); 1543 if (group) 1544 return group; 1545 1546 for_each_pci_dev(tmp) { 1547 if (tmp == pdev || tmp->bus != pdev->bus) 1548 continue; 1549 1550 /* We alias them or they alias us */ 1551 if (pci_devs_are_dma_aliases(pdev, tmp)) { 1552 group = get_pci_alias_group(tmp, devfns); 1553 if (group) { 1554 pci_dev_put(tmp); 1555 return group; 1556 } 1557 1558 group = get_pci_function_alias_group(tmp, devfns); 1559 if (group) { 1560 pci_dev_put(tmp); 1561 return group; 1562 } 1563 } 1564 } 1565 1566 return NULL; 1567 } 1568 1569 struct group_for_pci_data { 1570 struct pci_dev *pdev; 1571 struct iommu_group *group; 1572 }; 1573 1574 /* 1575 * DMA alias iterator callback, return the last seen device. Stop and return 1576 * the IOMMU group if we find one along the way. 1577 */ 1578 static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque) 1579 { 1580 struct group_for_pci_data *data = opaque; 1581 1582 data->pdev = pdev; 1583 data->group = iommu_group_get(&pdev->dev); 1584 1585 return data->group != NULL; 1586 } 1587 1588 /* 1589 * Generic device_group call-back function. It just allocates one 1590 * iommu-group per device. 1591 */ 1592 struct iommu_group *generic_device_group(struct device *dev) 1593 { 1594 return iommu_group_alloc(); 1595 } 1596 EXPORT_SYMBOL_GPL(generic_device_group); 1597 1598 /* 1599 * Generic device_group call-back function. It just allocates one 1600 * iommu-group per iommu driver instance shared by every device 1601 * probed by that iommu driver. 1602 */ 1603 struct iommu_group *generic_single_device_group(struct device *dev) 1604 { 1605 struct iommu_device *iommu = dev->iommu->iommu_dev; 1606 1607 if (!iommu->singleton_group) { 1608 struct iommu_group *group; 1609 1610 group = iommu_group_alloc(); 1611 if (IS_ERR(group)) 1612 return group; 1613 iommu->singleton_group = group; 1614 } 1615 return iommu_group_ref_get(iommu->singleton_group); 1616 } 1617 EXPORT_SYMBOL_GPL(generic_single_device_group); 1618 1619 /* 1620 * Use standard PCI bus topology, isolation features, and DMA alias quirks 1621 * to find or create an IOMMU group for a device. 1622 */ 1623 struct iommu_group *pci_device_group(struct device *dev) 1624 { 1625 struct pci_dev *pdev = to_pci_dev(dev); 1626 struct group_for_pci_data data; 1627 struct pci_bus *bus; 1628 struct iommu_group *group = NULL; 1629 u64 devfns[4] = { 0 }; 1630 1631 if (WARN_ON(!dev_is_pci(dev))) 1632 return ERR_PTR(-EINVAL); 1633 1634 /* 1635 * Find the upstream DMA alias for the device. A device must not 1636 * be aliased due to topology in order to have its own IOMMU group. 1637 * If we find an alias along the way that already belongs to a 1638 * group, use it. 1639 */ 1640 if (pci_for_each_dma_alias(pdev, get_pci_alias_or_group, &data)) 1641 return data.group; 1642 1643 pdev = data.pdev; 1644 1645 /* 1646 * Continue upstream from the point of minimum IOMMU granularity 1647 * due to aliases to the point where devices are protected from 1648 * peer-to-peer DMA by PCI ACS. Again, if we find an existing 1649 * group, use it. 1650 */ 1651 for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) { 1652 if (!bus->self) 1653 continue; 1654 1655 if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) 1656 break; 1657 1658 pdev = bus->self; 1659 1660 group = iommu_group_get(&pdev->dev); 1661 if (group) 1662 return group; 1663 } 1664 1665 /* 1666 * Look for existing groups on device aliases. If we alias another 1667 * device or another device aliases us, use the same group. 1668 */ 1669 group = get_pci_alias_group(pdev, (unsigned long *)devfns); 1670 if (group) 1671 return group; 1672 1673 /* 1674 * Look for existing groups on non-isolated functions on the same 1675 * slot and aliases of those funcions, if any. No need to clear 1676 * the search bitmap, the tested devfns are still valid. 1677 */ 1678 group = get_pci_function_alias_group(pdev, (unsigned long *)devfns); 1679 if (group) 1680 return group; 1681 1682 /* No shared group found, allocate new */ 1683 return iommu_group_alloc(); 1684 } 1685 EXPORT_SYMBOL_GPL(pci_device_group); 1686 1687 /* Get the IOMMU group for device on fsl-mc bus */ 1688 struct iommu_group *fsl_mc_device_group(struct device *dev) 1689 { 1690 struct device *cont_dev = fsl_mc_cont_dev(dev); 1691 struct iommu_group *group; 1692 1693 group = iommu_group_get(cont_dev); 1694 if (!group) 1695 group = iommu_group_alloc(); 1696 return group; 1697 } 1698 EXPORT_SYMBOL_GPL(fsl_mc_device_group); 1699 1700 static struct iommu_domain *__iommu_alloc_identity_domain(struct device *dev) 1701 { 1702 const struct iommu_ops *ops = dev_iommu_ops(dev); 1703 struct iommu_domain *domain; 1704 1705 if (ops->identity_domain) 1706 return ops->identity_domain; 1707 1708 if (ops->domain_alloc_identity) { 1709 domain = ops->domain_alloc_identity(dev); 1710 if (IS_ERR(domain)) 1711 return domain; 1712 } else { 1713 return ERR_PTR(-EOPNOTSUPP); 1714 } 1715 1716 iommu_domain_init(domain, IOMMU_DOMAIN_IDENTITY, ops); 1717 return domain; 1718 } 1719 1720 static struct iommu_domain * 1721 __iommu_group_alloc_default_domain(struct iommu_group *group, int req_type) 1722 { 1723 struct device *dev = iommu_group_first_dev(group); 1724 struct iommu_domain *dom; 1725 1726 if (group->default_domain && group->default_domain->type == req_type) 1727 return group->default_domain; 1728 1729 /* 1730 * When allocating the DMA API domain assume that the driver is going to 1731 * use PASID and make sure the RID's domain is PASID compatible. 1732 */ 1733 if (req_type & __IOMMU_DOMAIN_PAGING) { 1734 dom = __iommu_paging_domain_alloc_flags(dev, req_type, 1735 dev->iommu->max_pasids ? IOMMU_HWPT_ALLOC_PASID : 0); 1736 1737 /* 1738 * If driver does not support PASID feature then 1739 * try to allocate non-PASID domain 1740 */ 1741 if (PTR_ERR(dom) == -EOPNOTSUPP) 1742 dom = __iommu_paging_domain_alloc_flags(dev, req_type, 0); 1743 1744 return dom; 1745 } 1746 1747 if (req_type == IOMMU_DOMAIN_IDENTITY) 1748 return __iommu_alloc_identity_domain(dev); 1749 1750 return ERR_PTR(-EINVAL); 1751 } 1752 1753 /* 1754 * req_type of 0 means "auto" which means to select a domain based on 1755 * iommu_def_domain_type or what the driver actually supports. 1756 */ 1757 static struct iommu_domain * 1758 iommu_group_alloc_default_domain(struct iommu_group *group, int req_type) 1759 { 1760 const struct iommu_ops *ops = dev_iommu_ops(iommu_group_first_dev(group)); 1761 struct iommu_domain *dom; 1762 1763 lockdep_assert_held(&group->mutex); 1764 1765 /* 1766 * Allow legacy drivers to specify the domain that will be the default 1767 * domain. This should always be either an IDENTITY/BLOCKED/PLATFORM 1768 * domain. Do not use in new drivers. 1769 */ 1770 if (ops->default_domain) { 1771 if (req_type != ops->default_domain->type) 1772 return ERR_PTR(-EINVAL); 1773 return ops->default_domain; 1774 } 1775 1776 if (req_type) 1777 return __iommu_group_alloc_default_domain(group, req_type); 1778 1779 /* The driver gave no guidance on what type to use, try the default */ 1780 dom = __iommu_group_alloc_default_domain(group, iommu_def_domain_type); 1781 if (!IS_ERR(dom)) 1782 return dom; 1783 1784 /* Otherwise IDENTITY and DMA_FQ defaults will try DMA */ 1785 if (iommu_def_domain_type == IOMMU_DOMAIN_DMA) 1786 return ERR_PTR(-EINVAL); 1787 dom = __iommu_group_alloc_default_domain(group, IOMMU_DOMAIN_DMA); 1788 if (IS_ERR(dom)) 1789 return dom; 1790 1791 pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA", 1792 iommu_def_domain_type, group->name); 1793 return dom; 1794 } 1795 1796 struct iommu_domain *iommu_group_default_domain(struct iommu_group *group) 1797 { 1798 return group->default_domain; 1799 } 1800 1801 static int probe_iommu_group(struct device *dev, void *data) 1802 { 1803 struct list_head *group_list = data; 1804 int ret; 1805 1806 mutex_lock(&iommu_probe_device_lock); 1807 ret = __iommu_probe_device(dev, group_list); 1808 mutex_unlock(&iommu_probe_device_lock); 1809 if (ret == -ENODEV) 1810 ret = 0; 1811 1812 return ret; 1813 } 1814 1815 static int iommu_bus_notifier(struct notifier_block *nb, 1816 unsigned long action, void *data) 1817 { 1818 struct device *dev = data; 1819 1820 if (action == BUS_NOTIFY_ADD_DEVICE) { 1821 int ret; 1822 1823 ret = iommu_probe_device(dev); 1824 return (ret) ? NOTIFY_DONE : NOTIFY_OK; 1825 } else if (action == BUS_NOTIFY_REMOVED_DEVICE) { 1826 iommu_release_device(dev); 1827 return NOTIFY_OK; 1828 } 1829 1830 return 0; 1831 } 1832 1833 /* 1834 * Combine the driver's chosen def_domain_type across all the devices in a 1835 * group. Drivers must give a consistent result. 1836 */ 1837 static int iommu_get_def_domain_type(struct iommu_group *group, 1838 struct device *dev, int cur_type) 1839 { 1840 const struct iommu_ops *ops = dev_iommu_ops(dev); 1841 int type; 1842 1843 if (ops->default_domain) { 1844 /* 1845 * Drivers that declare a global static default_domain will 1846 * always choose that. 1847 */ 1848 type = ops->default_domain->type; 1849 } else { 1850 if (ops->def_domain_type) 1851 type = ops->def_domain_type(dev); 1852 else 1853 return cur_type; 1854 } 1855 if (!type || cur_type == type) 1856 return cur_type; 1857 if (!cur_type) 1858 return type; 1859 1860 dev_err_ratelimited( 1861 dev, 1862 "IOMMU driver error, requesting conflicting def_domain_type, %s and %s, for devices in group %u.\n", 1863 iommu_domain_type_str(cur_type), iommu_domain_type_str(type), 1864 group->id); 1865 1866 /* 1867 * Try to recover, drivers are allowed to force IDENTITY or DMA, IDENTITY 1868 * takes precedence. 1869 */ 1870 if (type == IOMMU_DOMAIN_IDENTITY) 1871 return type; 1872 return cur_type; 1873 } 1874 1875 /* 1876 * A target_type of 0 will select the best domain type. 0 can be returned in 1877 * this case meaning the global default should be used. 1878 */ 1879 static int iommu_get_default_domain_type(struct iommu_group *group, 1880 int target_type) 1881 { 1882 struct device *untrusted = NULL; 1883 struct group_device *gdev; 1884 int driver_type = 0; 1885 1886 lockdep_assert_held(&group->mutex); 1887 1888 /* 1889 * ARM32 drivers supporting CONFIG_ARM_DMA_USE_IOMMU can declare an 1890 * identity_domain and it will automatically become their default 1891 * domain. Later on ARM_DMA_USE_IOMMU will install its UNMANAGED domain. 1892 * Override the selection to IDENTITY. 1893 */ 1894 if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) { 1895 static_assert(!(IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) && 1896 IS_ENABLED(CONFIG_IOMMU_DMA))); 1897 driver_type = IOMMU_DOMAIN_IDENTITY; 1898 } 1899 1900 for_each_group_device(group, gdev) { 1901 driver_type = iommu_get_def_domain_type(group, gdev->dev, 1902 driver_type); 1903 1904 if (dev_is_pci(gdev->dev) && to_pci_dev(gdev->dev)->untrusted) { 1905 /* 1906 * No ARM32 using systems will set untrusted, it cannot 1907 * work. 1908 */ 1909 if (WARN_ON(IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU))) 1910 return -1; 1911 untrusted = gdev->dev; 1912 } 1913 } 1914 1915 /* 1916 * If the common dma ops are not selected in kconfig then we cannot use 1917 * IOMMU_DOMAIN_DMA at all. Force IDENTITY if nothing else has been 1918 * selected. 1919 */ 1920 if (!IS_ENABLED(CONFIG_IOMMU_DMA)) { 1921 if (WARN_ON(driver_type == IOMMU_DOMAIN_DMA)) 1922 return -1; 1923 if (!driver_type) 1924 driver_type = IOMMU_DOMAIN_IDENTITY; 1925 } 1926 1927 if (untrusted) { 1928 if (driver_type && driver_type != IOMMU_DOMAIN_DMA) { 1929 dev_err_ratelimited( 1930 untrusted, 1931 "Device is not trusted, but driver is overriding group %u to %s, refusing to probe.\n", 1932 group->id, iommu_domain_type_str(driver_type)); 1933 return -1; 1934 } 1935 driver_type = IOMMU_DOMAIN_DMA; 1936 } 1937 1938 if (target_type) { 1939 if (driver_type && target_type != driver_type) 1940 return -1; 1941 return target_type; 1942 } 1943 return driver_type; 1944 } 1945 1946 static void iommu_group_do_probe_finalize(struct device *dev) 1947 { 1948 const struct iommu_ops *ops = dev_iommu_ops(dev); 1949 1950 if (ops->probe_finalize) 1951 ops->probe_finalize(dev); 1952 } 1953 1954 static int bus_iommu_probe(const struct bus_type *bus) 1955 { 1956 struct iommu_group *group, *next; 1957 LIST_HEAD(group_list); 1958 int ret; 1959 1960 ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group); 1961 if (ret) 1962 return ret; 1963 1964 list_for_each_entry_safe(group, next, &group_list, entry) { 1965 struct group_device *gdev; 1966 1967 mutex_lock(&group->mutex); 1968 1969 /* Remove item from the list */ 1970 list_del_init(&group->entry); 1971 1972 /* 1973 * We go to the trouble of deferred default domain creation so 1974 * that the cross-group default domain type and the setup of the 1975 * IOMMU_RESV_DIRECT will work correctly in non-hotpug scenarios. 1976 */ 1977 ret = iommu_setup_default_domain(group, 0); 1978 if (ret) { 1979 mutex_unlock(&group->mutex); 1980 return ret; 1981 } 1982 for_each_group_device(group, gdev) 1983 iommu_setup_dma_ops(gdev->dev, group->default_domain); 1984 mutex_unlock(&group->mutex); 1985 1986 /* 1987 * FIXME: Mis-locked because the ops->probe_finalize() call-back 1988 * of some IOMMU drivers calls arm_iommu_attach_device() which 1989 * in-turn might call back into IOMMU core code, where it tries 1990 * to take group->mutex, resulting in a deadlock. 1991 */ 1992 for_each_group_device(group, gdev) 1993 iommu_group_do_probe_finalize(gdev->dev); 1994 } 1995 1996 return 0; 1997 } 1998 1999 /** 2000 * device_iommu_capable() - check for a general IOMMU capability 2001 * @dev: device to which the capability would be relevant, if available 2002 * @cap: IOMMU capability 2003 * 2004 * Return: true if an IOMMU is present and supports the given capability 2005 * for the given device, otherwise false. 2006 */ 2007 bool device_iommu_capable(struct device *dev, enum iommu_cap cap) 2008 { 2009 const struct iommu_ops *ops; 2010 2011 if (!dev_has_iommu(dev)) 2012 return false; 2013 2014 ops = dev_iommu_ops(dev); 2015 if (!ops->capable) 2016 return false; 2017 2018 return ops->capable(dev, cap); 2019 } 2020 EXPORT_SYMBOL_GPL(device_iommu_capable); 2021 2022 /** 2023 * iommu_group_has_isolated_msi() - Compute msi_device_has_isolated_msi() 2024 * for a group 2025 * @group: Group to query 2026 * 2027 * IOMMU groups should not have differing values of 2028 * msi_device_has_isolated_msi() for devices in a group. However nothing 2029 * directly prevents this, so ensure mistakes don't result in isolation failures 2030 * by checking that all the devices are the same. 2031 */ 2032 bool iommu_group_has_isolated_msi(struct iommu_group *group) 2033 { 2034 struct group_device *group_dev; 2035 bool ret = true; 2036 2037 mutex_lock(&group->mutex); 2038 for_each_group_device(group, group_dev) 2039 ret &= msi_device_has_isolated_msi(group_dev->dev); 2040 mutex_unlock(&group->mutex); 2041 return ret; 2042 } 2043 EXPORT_SYMBOL_GPL(iommu_group_has_isolated_msi); 2044 2045 /** 2046 * iommu_set_fault_handler() - set a fault handler for an iommu domain 2047 * @domain: iommu domain 2048 * @handler: fault handler 2049 * @token: user data, will be passed back to the fault handler 2050 * 2051 * This function should be used by IOMMU users which want to be notified 2052 * whenever an IOMMU fault happens. 2053 * 2054 * The fault handler itself should return 0 on success, and an appropriate 2055 * error code otherwise. 2056 */ 2057 void iommu_set_fault_handler(struct iommu_domain *domain, 2058 iommu_fault_handler_t handler, 2059 void *token) 2060 { 2061 if (WARN_ON(!domain || domain->cookie_type != IOMMU_COOKIE_NONE)) 2062 return; 2063 2064 domain->cookie_type = IOMMU_COOKIE_FAULT_HANDLER; 2065 domain->handler = handler; 2066 domain->handler_token = token; 2067 } 2068 EXPORT_SYMBOL_GPL(iommu_set_fault_handler); 2069 2070 static void iommu_domain_init(struct iommu_domain *domain, unsigned int type, 2071 const struct iommu_ops *ops) 2072 { 2073 domain->type = type; 2074 domain->owner = ops; 2075 if (!domain->ops) 2076 domain->ops = ops->default_domain_ops; 2077 } 2078 2079 static struct iommu_domain * 2080 __iommu_paging_domain_alloc_flags(struct device *dev, unsigned int type, 2081 unsigned int flags) 2082 { 2083 const struct iommu_ops *ops; 2084 struct iommu_domain *domain; 2085 2086 if (!dev_has_iommu(dev)) 2087 return ERR_PTR(-ENODEV); 2088 2089 ops = dev_iommu_ops(dev); 2090 2091 if (ops->domain_alloc_paging && !flags) 2092 domain = ops->domain_alloc_paging(dev); 2093 else if (ops->domain_alloc_paging_flags) 2094 domain = ops->domain_alloc_paging_flags(dev, flags, NULL); 2095 #if IS_ENABLED(CONFIG_FSL_PAMU) 2096 else if (ops->domain_alloc && !flags) 2097 domain = ops->domain_alloc(IOMMU_DOMAIN_UNMANAGED); 2098 #endif 2099 else 2100 return ERR_PTR(-EOPNOTSUPP); 2101 2102 if (IS_ERR(domain)) 2103 return domain; 2104 if (!domain) 2105 return ERR_PTR(-ENOMEM); 2106 2107 iommu_domain_init(domain, type, ops); 2108 return domain; 2109 } 2110 2111 /** 2112 * iommu_paging_domain_alloc_flags() - Allocate a paging domain 2113 * @dev: device for which the domain is allocated 2114 * @flags: Bitmap of iommufd_hwpt_alloc_flags 2115 * 2116 * Allocate a paging domain which will be managed by a kernel driver. Return 2117 * allocated domain if successful, or an ERR pointer for failure. 2118 */ 2119 struct iommu_domain *iommu_paging_domain_alloc_flags(struct device *dev, 2120 unsigned int flags) 2121 { 2122 return __iommu_paging_domain_alloc_flags(dev, 2123 IOMMU_DOMAIN_UNMANAGED, flags); 2124 } 2125 EXPORT_SYMBOL_GPL(iommu_paging_domain_alloc_flags); 2126 2127 void iommu_domain_free(struct iommu_domain *domain) 2128 { 2129 switch (domain->cookie_type) { 2130 case IOMMU_COOKIE_DMA_IOVA: 2131 iommu_put_dma_cookie(domain); 2132 break; 2133 case IOMMU_COOKIE_DMA_MSI: 2134 iommu_put_msi_cookie(domain); 2135 break; 2136 case IOMMU_COOKIE_SVA: 2137 mmdrop(domain->mm); 2138 break; 2139 default: 2140 break; 2141 } 2142 if (domain->ops->free) 2143 domain->ops->free(domain); 2144 } 2145 EXPORT_SYMBOL_GPL(iommu_domain_free); 2146 2147 /* 2148 * Put the group's domain back to the appropriate core-owned domain - either the 2149 * standard kernel-mode DMA configuration or an all-DMA-blocked domain. 2150 */ 2151 static void __iommu_group_set_core_domain(struct iommu_group *group) 2152 { 2153 struct iommu_domain *new_domain; 2154 2155 if (group->owner) 2156 new_domain = group->blocking_domain; 2157 else 2158 new_domain = group->default_domain; 2159 2160 __iommu_group_set_domain_nofail(group, new_domain); 2161 } 2162 2163 static int __iommu_attach_device(struct iommu_domain *domain, 2164 struct device *dev, struct iommu_domain *old) 2165 { 2166 int ret; 2167 2168 if (unlikely(domain->ops->attach_dev == NULL)) 2169 return -ENODEV; 2170 2171 ret = domain->ops->attach_dev(domain, dev, old); 2172 if (ret) 2173 return ret; 2174 dev->iommu->attach_deferred = 0; 2175 trace_attach_device_to_domain(dev); 2176 return 0; 2177 } 2178 2179 /** 2180 * iommu_attach_device - Attach an IOMMU domain to a device 2181 * @domain: IOMMU domain to attach 2182 * @dev: Device that will be attached 2183 * 2184 * Returns 0 on success and error code on failure 2185 * 2186 * Note that EINVAL can be treated as a soft failure, indicating 2187 * that certain configuration of the domain is incompatible with 2188 * the device. In this case attaching a different domain to the 2189 * device may succeed. 2190 */ 2191 int iommu_attach_device(struct iommu_domain *domain, struct device *dev) 2192 { 2193 /* Caller must be a probed driver on dev */ 2194 struct iommu_group *group = dev->iommu_group; 2195 int ret; 2196 2197 if (!group) 2198 return -ENODEV; 2199 2200 /* 2201 * Lock the group to make sure the device-count doesn't 2202 * change while we are attaching 2203 */ 2204 mutex_lock(&group->mutex); 2205 ret = -EINVAL; 2206 if (list_count_nodes(&group->devices) != 1) 2207 goto out_unlock; 2208 2209 ret = __iommu_attach_group(domain, group); 2210 2211 out_unlock: 2212 mutex_unlock(&group->mutex); 2213 return ret; 2214 } 2215 EXPORT_SYMBOL_GPL(iommu_attach_device); 2216 2217 int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain) 2218 { 2219 struct group_device *gdev; 2220 2221 /* 2222 * This is called on the dma mapping fast path so avoid locking. This is 2223 * racy, but we have an expectation that the driver will setup its DMAs 2224 * inside probe while being single threaded to avoid racing. 2225 */ 2226 if (!dev->iommu || !dev->iommu->attach_deferred) 2227 return 0; 2228 2229 guard(mutex)(&dev->iommu_group->mutex); 2230 2231 gdev = __dev_to_gdev(dev); 2232 if (WARN_ON(!gdev)) 2233 return -ENODEV; 2234 2235 /* 2236 * This is a concurrent attach during device recovery. Reject it until 2237 * pci_dev_reset_iommu_done() attaches the device to group->domain. 2238 * 2239 * Note that this might fail the iommu_dma_map(). But there's nothing 2240 * more we can do here. 2241 */ 2242 if (gdev->blocked) 2243 return -EBUSY; 2244 return __iommu_attach_device(domain, dev, NULL); 2245 } 2246 2247 void iommu_detach_device(struct iommu_domain *domain, struct device *dev) 2248 { 2249 /* Caller must be a probed driver on dev */ 2250 struct iommu_group *group = dev->iommu_group; 2251 2252 if (!group) 2253 return; 2254 2255 mutex_lock(&group->mutex); 2256 if (WARN_ON(domain != group->domain) || 2257 WARN_ON(list_count_nodes(&group->devices) != 1)) 2258 goto out_unlock; 2259 __iommu_group_set_core_domain(group); 2260 2261 out_unlock: 2262 mutex_unlock(&group->mutex); 2263 } 2264 EXPORT_SYMBOL_GPL(iommu_detach_device); 2265 2266 /** 2267 * iommu_get_domain_for_dev() - Return the DMA API domain pointer 2268 * @dev: Device to query 2269 * 2270 * This function can be called within a driver bound to dev. The returned 2271 * pointer is valid for the lifetime of the bound driver. 2272 * 2273 * It should not be called by drivers with driver_managed_dma = true. 2274 */ 2275 struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) 2276 { 2277 /* Caller must be a probed driver on dev */ 2278 struct iommu_group *group = dev->iommu_group; 2279 2280 if (!group) 2281 return NULL; 2282 2283 lockdep_assert_not_held(&group->mutex); 2284 2285 return group->domain; 2286 } 2287 EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev); 2288 2289 /** 2290 * iommu_driver_get_domain_for_dev() - Return the driver-level domain pointer 2291 * @dev: Device to query 2292 * 2293 * This function can be called by an iommu driver that wants to get the physical 2294 * domain within an iommu callback function where group->mutex is held. 2295 */ 2296 struct iommu_domain *iommu_driver_get_domain_for_dev(struct device *dev) 2297 { 2298 struct iommu_group *group = dev->iommu_group; 2299 struct group_device *gdev; 2300 2301 lockdep_assert_held(&group->mutex); 2302 2303 gdev = __dev_to_gdev(dev); 2304 if (WARN_ON(!gdev)) 2305 return NULL; 2306 2307 /* 2308 * Driver handles the low-level __iommu_attach_device(), including the 2309 * one invoked by pci_dev_reset_iommu_done() re-attaching the device to 2310 * the cached group->domain. In this case, the driver must get the old 2311 * domain from group->blocking_domain rather than group->domain. This 2312 * prevents it from re-attaching the device from group->domain (old) to 2313 * group->domain (new). 2314 */ 2315 if (gdev->blocked) 2316 return group->blocking_domain; 2317 2318 return group->domain; 2319 } 2320 EXPORT_SYMBOL_GPL(iommu_driver_get_domain_for_dev); 2321 2322 /* 2323 * For IOMMU_DOMAIN_DMA implementations which already provide their own 2324 * guarantees that the group and its default domain are valid and correct. 2325 */ 2326 struct iommu_domain *iommu_get_dma_domain(struct device *dev) 2327 { 2328 return dev->iommu_group->default_domain; 2329 } 2330 2331 static void *iommu_make_pasid_array_entry(struct iommu_domain *domain, 2332 struct iommu_attach_handle *handle) 2333 { 2334 if (handle) { 2335 handle->domain = domain; 2336 return xa_tag_pointer(handle, IOMMU_PASID_ARRAY_HANDLE); 2337 } 2338 2339 return xa_tag_pointer(domain, IOMMU_PASID_ARRAY_DOMAIN); 2340 } 2341 2342 static bool domain_iommu_ops_compatible(const struct iommu_ops *ops, 2343 struct iommu_domain *domain) 2344 { 2345 if (domain->owner == ops) 2346 return true; 2347 2348 /* For static domains, owner isn't set. */ 2349 if (domain == ops->blocked_domain || domain == ops->identity_domain) 2350 return true; 2351 2352 return false; 2353 } 2354 2355 static int __iommu_attach_group(struct iommu_domain *domain, 2356 struct iommu_group *group) 2357 { 2358 struct device *dev; 2359 2360 if (group->domain && group->domain != group->default_domain && 2361 group->domain != group->blocking_domain) 2362 return -EBUSY; 2363 2364 dev = iommu_group_first_dev(group); 2365 if (!dev_has_iommu(dev) || 2366 !domain_iommu_ops_compatible(dev_iommu_ops(dev), domain)) 2367 return -EINVAL; 2368 2369 return __iommu_group_set_domain(group, domain); 2370 } 2371 2372 /** 2373 * iommu_attach_group - Attach an IOMMU domain to an IOMMU group 2374 * @domain: IOMMU domain to attach 2375 * @group: IOMMU group that will be attached 2376 * 2377 * Returns 0 on success and error code on failure 2378 * 2379 * Note that EINVAL can be treated as a soft failure, indicating 2380 * that certain configuration of the domain is incompatible with 2381 * the group. In this case attaching a different domain to the 2382 * group may succeed. 2383 */ 2384 int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) 2385 { 2386 int ret; 2387 2388 mutex_lock(&group->mutex); 2389 ret = __iommu_attach_group(domain, group); 2390 mutex_unlock(&group->mutex); 2391 2392 return ret; 2393 } 2394 EXPORT_SYMBOL_GPL(iommu_attach_group); 2395 2396 static int __iommu_device_set_domain(struct iommu_group *group, 2397 struct device *dev, 2398 struct iommu_domain *new_domain, 2399 struct iommu_domain *old_domain, 2400 unsigned int flags) 2401 { 2402 int ret; 2403 2404 /* 2405 * If the device requires IOMMU_RESV_DIRECT then we cannot allow 2406 * the blocking domain to be attached as it does not contain the 2407 * required 1:1 mapping. This test effectively excludes the device 2408 * being used with iommu_group_claim_dma_owner() which will block 2409 * vfio and iommufd as well. 2410 */ 2411 if (dev->iommu->require_direct && 2412 (new_domain->type == IOMMU_DOMAIN_BLOCKED || 2413 new_domain == group->blocking_domain)) { 2414 dev_warn(dev, 2415 "Firmware has requested this device have a 1:1 IOMMU mapping, rejecting configuring the device without a 1:1 mapping. Contact your platform vendor.\n"); 2416 return -EINVAL; 2417 } 2418 2419 if (dev->iommu->attach_deferred) { 2420 if (new_domain == group->default_domain) 2421 return 0; 2422 dev->iommu->attach_deferred = 0; 2423 } 2424 2425 ret = __iommu_attach_device(new_domain, dev, old_domain); 2426 if (ret) { 2427 /* 2428 * If we have a blocking domain then try to attach that in hopes 2429 * of avoiding a UAF. Modern drivers should implement blocking 2430 * domains as global statics that cannot fail. 2431 */ 2432 if ((flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) && 2433 group->blocking_domain && 2434 group->blocking_domain != new_domain) 2435 __iommu_attach_device(group->blocking_domain, dev, 2436 old_domain); 2437 return ret; 2438 } 2439 return 0; 2440 } 2441 2442 /* 2443 * If 0 is returned the group's domain is new_domain. If an error is returned 2444 * then the group's domain will be set back to the existing domain unless 2445 * IOMMU_SET_DOMAIN_MUST_SUCCEED, otherwise an error is returned and the group's 2446 * domains is left inconsistent. This is a driver bug to fail attach with a 2447 * previously good domain. We try to avoid a kernel UAF because of this. 2448 * 2449 * IOMMU groups are really the natural working unit of the IOMMU, but the IOMMU 2450 * API works on domains and devices. Bridge that gap by iterating over the 2451 * devices in a group. Ideally we'd have a single device which represents the 2452 * requestor ID of the group, but we also allow IOMMU drivers to create policy 2453 * defined minimum sets, where the physical hardware may be able to distiguish 2454 * members, but we wish to group them at a higher level (ex. untrusted 2455 * multi-function PCI devices). Thus we attach each device. 2456 */ 2457 static int __iommu_group_set_domain_internal(struct iommu_group *group, 2458 struct iommu_domain *new_domain, 2459 unsigned int flags) 2460 { 2461 struct group_device *last_gdev; 2462 struct group_device *gdev; 2463 int result; 2464 int ret; 2465 2466 lockdep_assert_held(&group->mutex); 2467 2468 if (group->domain == new_domain) 2469 return 0; 2470 2471 if (WARN_ON(!new_domain)) 2472 return -EINVAL; 2473 2474 /* 2475 * This is a concurrent attach during device recovery. Reject it until 2476 * pci_dev_reset_iommu_done() attaches the device to group->domain, if 2477 * IOMMU_SET_DOMAIN_MUST_SUCCEED is not set. 2478 */ 2479 if (group->recovery_cnt && !(flags & IOMMU_SET_DOMAIN_MUST_SUCCEED)) 2480 return -EBUSY; 2481 2482 /* 2483 * Changing the domain is done by calling attach_dev() on the new 2484 * domain. This switch does not have to be atomic and DMA can be 2485 * discarded during the transition. DMA must only be able to access 2486 * either new_domain or group->domain, never something else. 2487 */ 2488 result = 0; 2489 for_each_group_device(group, gdev) { 2490 /* 2491 * Device under recovery is attached to group->blocking_domain. 2492 * Don't change that. pci_dev_reset_iommu_done() will re-attach 2493 * its domain to the updated group->domain, after the recovery. 2494 */ 2495 if (gdev->blocked) 2496 continue; 2497 ret = __iommu_device_set_domain(group, gdev->dev, new_domain, 2498 group->domain, flags); 2499 if (ret) { 2500 result = ret; 2501 /* 2502 * Keep trying the other devices in the group. If a 2503 * driver fails attach to an otherwise good domain, and 2504 * does not support blocking domains, it should at least 2505 * drop its reference on the current domain so we don't 2506 * UAF. 2507 */ 2508 if (flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) 2509 continue; 2510 goto err_revert; 2511 } 2512 } 2513 group->domain = new_domain; 2514 return result; 2515 2516 err_revert: 2517 /* 2518 * This is called in error unwind paths. A well behaved driver should 2519 * always allow us to attach to a domain that was already attached. 2520 */ 2521 last_gdev = gdev; 2522 for_each_group_device(group, gdev) { 2523 /* No need to revert the last gdev that failed to set domain */ 2524 if (gdev == last_gdev) 2525 break; 2526 /* 2527 * A NULL domain can happen only for first probe, in which case 2528 * we leave group->domain as NULL and let release clean 2529 * everything up. 2530 */ 2531 if (group->domain) 2532 WARN_ON(__iommu_device_set_domain( 2533 group, gdev->dev, group->domain, new_domain, 2534 IOMMU_SET_DOMAIN_MUST_SUCCEED)); 2535 } 2536 return ret; 2537 } 2538 2539 void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group) 2540 { 2541 mutex_lock(&group->mutex); 2542 __iommu_group_set_core_domain(group); 2543 mutex_unlock(&group->mutex); 2544 } 2545 EXPORT_SYMBOL_GPL(iommu_detach_group); 2546 2547 phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) 2548 { 2549 if (domain->type == IOMMU_DOMAIN_IDENTITY) 2550 return iova; 2551 2552 if (domain->type == IOMMU_DOMAIN_BLOCKED) 2553 return 0; 2554 2555 return domain->ops->iova_to_phys(domain, iova); 2556 } 2557 EXPORT_SYMBOL_GPL(iommu_iova_to_phys); 2558 2559 static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, 2560 phys_addr_t paddr, size_t size, size_t *count) 2561 { 2562 unsigned int pgsize_idx, pgsize_idx_next; 2563 unsigned long pgsizes; 2564 size_t offset, pgsize, pgsize_next; 2565 size_t offset_end; 2566 unsigned long addr_merge = paddr | iova; 2567 2568 /* Page sizes supported by the hardware and small enough for @size */ 2569 pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0); 2570 2571 /* Constrain the page sizes further based on the maximum alignment */ 2572 if (likely(addr_merge)) 2573 pgsizes &= GENMASK(__ffs(addr_merge), 0); 2574 2575 /* Make sure we have at least one suitable page size */ 2576 BUG_ON(!pgsizes); 2577 2578 /* Pick the biggest page size remaining */ 2579 pgsize_idx = __fls(pgsizes); 2580 pgsize = BIT(pgsize_idx); 2581 if (!count) 2582 return pgsize; 2583 2584 /* Find the next biggest support page size, if it exists */ 2585 pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0); 2586 if (!pgsizes) 2587 goto out_set_count; 2588 2589 pgsize_idx_next = __ffs(pgsizes); 2590 pgsize_next = BIT(pgsize_idx_next); 2591 2592 /* 2593 * There's no point trying a bigger page size unless the virtual 2594 * and physical addresses are similarly offset within the larger page. 2595 */ 2596 if ((iova ^ paddr) & (pgsize_next - 1)) 2597 goto out_set_count; 2598 2599 /* Calculate the offset to the next page size alignment boundary */ 2600 offset = pgsize_next - (addr_merge & (pgsize_next - 1)); 2601 2602 /* 2603 * If size is big enough to accommodate the larger page, reduce 2604 * the number of smaller pages. 2605 */ 2606 if (!check_add_overflow(offset, pgsize_next, &offset_end) && 2607 offset_end <= size) 2608 size = offset; 2609 2610 out_set_count: 2611 *count = size >> pgsize_idx; 2612 return pgsize; 2613 } 2614 2615 static int __iommu_map_domain_pgtbl(struct iommu_domain *domain, 2616 unsigned long iova, phys_addr_t paddr, 2617 size_t size, int prot, gfp_t gfp, 2618 size_t *mapped) 2619 { 2620 const struct iommu_domain_ops *ops = domain->ops; 2621 unsigned int min_pagesz; 2622 int ret = 0; 2623 2624 if (WARN_ON(!ops->map_pages)) 2625 return -ENODEV; 2626 2627 /* find out the minimum page size supported */ 2628 min_pagesz = 1 << __ffs(domain->pgsize_bitmap); 2629 2630 /* 2631 * both the virtual address and the physical one, as well as 2632 * the size of the mapping, must be aligned (at least) to the 2633 * size of the smallest page supported by the hardware 2634 */ 2635 if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) { 2636 pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n", 2637 iova, &paddr, size, min_pagesz); 2638 return -EINVAL; 2639 } 2640 2641 pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size); 2642 2643 while (size) { 2644 size_t pgsize, count, op_mapped = 0; 2645 2646 pgsize = iommu_pgsize(domain, iova, paddr, size, &count); 2647 2648 pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n", 2649 iova, &paddr, pgsize, count); 2650 ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot, 2651 gfp, &op_mapped); 2652 /* 2653 * Some pages may have been mapped, even if an error occurred, 2654 * so we should account for those so they can be unmapped. 2655 */ 2656 *mapped += op_mapped; 2657 if (ret) 2658 return ret; 2659 2660 size -= op_mapped; 2661 iova += op_mapped; 2662 paddr += op_mapped; 2663 } 2664 return 0; 2665 } 2666 2667 int iommu_sync_map(struct iommu_domain *domain, unsigned long iova, size_t size) 2668 { 2669 const struct iommu_domain_ops *ops = domain->ops; 2670 2671 if (!ops->iotlb_sync_map) 2672 return 0; 2673 return ops->iotlb_sync_map(domain, iova, size); 2674 } 2675 2676 int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova, 2677 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 2678 { 2679 struct pt_iommu *pt = iommupt_from_domain(domain); 2680 size_t mapped = 0; 2681 int ret; 2682 2683 might_sleep_if(gfpflags_allow_blocking(gfp)); 2684 2685 /* Discourage passing strange GFP flags or illegal domains */ 2686 if (WARN_ON_ONCE(!(domain->type & __IOMMU_DOMAIN_PAGING) || 2687 !domain->pgsize_bitmap || 2688 (gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | 2689 __GFP_HIGHMEM)))) 2690 return -EINVAL; 2691 2692 if (pt) 2693 ret = pt->ops->map_range(pt, iova, paddr, size, prot, gfp, 2694 &mapped); 2695 else 2696 ret = __iommu_map_domain_pgtbl(domain, iova, paddr, size, prot, 2697 gfp, &mapped); 2698 2699 trace_map(iova, paddr, mapped); 2700 iommu_debug_map(domain, paddr, mapped); 2701 if (ret) { 2702 iommu_unmap(domain, iova, mapped); 2703 return ret; 2704 } 2705 return 0; 2706 } 2707 2708 int iommu_map(struct iommu_domain *domain, unsigned long iova, 2709 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 2710 { 2711 int ret; 2712 2713 ret = iommu_map_nosync(domain, iova, paddr, size, prot, gfp); 2714 if (ret) 2715 return ret; 2716 2717 ret = iommu_sync_map(domain, iova, size); 2718 if (ret) 2719 iommu_unmap(domain, iova, size); 2720 2721 return ret; 2722 } 2723 EXPORT_SYMBOL_GPL(iommu_map); 2724 2725 static size_t 2726 __iommu_unmap_domain_pgtbl(struct iommu_domain *domain, unsigned long iova, 2727 size_t size, struct iommu_iotlb_gather *iotlb_gather) 2728 { 2729 const struct iommu_domain_ops *ops = domain->ops; 2730 size_t unmapped_page, unmapped = 0; 2731 unsigned int min_pagesz; 2732 2733 if (WARN_ON(!ops->unmap_pages)) 2734 return 0; 2735 2736 /* find out the minimum page size supported */ 2737 min_pagesz = 1 << __ffs(domain->pgsize_bitmap); 2738 2739 /* 2740 * The virtual address, as well as the size of the mapping, must be 2741 * aligned (at least) to the size of the smallest page supported 2742 * by the hardware 2743 */ 2744 if (!IS_ALIGNED(iova | size, min_pagesz)) { 2745 pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n", 2746 iova, size, min_pagesz); 2747 return 0; 2748 } 2749 2750 pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size); 2751 2752 /* 2753 * Keep iterating until we either unmap 'size' bytes (or more) 2754 * or we hit an area that isn't mapped. 2755 */ 2756 while (unmapped < size) { 2757 size_t pgsize, count; 2758 2759 pgsize = iommu_pgsize(domain, iova, iova, size - unmapped, &count); 2760 unmapped_page = ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather); 2761 if (!unmapped_page) 2762 break; 2763 2764 pr_debug("unmapped: iova 0x%lx size 0x%zx\n", 2765 iova, unmapped_page); 2766 /* 2767 * If the driver itself isn't using the gather, make sure 2768 * it looks non-empty so iotlb_sync will still be called. 2769 */ 2770 if (iotlb_gather->start >= iotlb_gather->end) 2771 iommu_iotlb_gather_add_range(iotlb_gather, iova, size); 2772 2773 iova += unmapped_page; 2774 unmapped += unmapped_page; 2775 } 2776 2777 return unmapped; 2778 } 2779 2780 static size_t __iommu_unmap(struct iommu_domain *domain, unsigned long iova, 2781 size_t size, 2782 struct iommu_iotlb_gather *iotlb_gather) 2783 { 2784 struct pt_iommu *pt = iommupt_from_domain(domain); 2785 size_t unmapped; 2786 2787 if (WARN_ON_ONCE(!(domain->type & __IOMMU_DOMAIN_PAGING) || 2788 !domain->pgsize_bitmap)) 2789 return 0; 2790 2791 iommu_debug_unmap_begin(domain, iova, size); 2792 2793 if (pt) 2794 unmapped = pt->ops->unmap_range(pt, iova, size, iotlb_gather); 2795 else 2796 unmapped = __iommu_unmap_domain_pgtbl(domain, iova, size, 2797 iotlb_gather); 2798 trace_unmap(iova, size, unmapped); 2799 iommu_debug_unmap_end(domain, iova, size, unmapped); 2800 return unmapped; 2801 } 2802 2803 /** 2804 * iommu_unmap() - Remove mappings from a range of IOVA 2805 * @domain: Domain to manipulate 2806 * @iova: IO virtual address to start 2807 * @size: Length of the range starting from @iova 2808 * 2809 * iommu_unmap() will remove a translation created by iommu_map(). It cannot 2810 * subdivide a mapping created by iommu_map(), so it should be called with IOVA 2811 * ranges that match what was passed to iommu_map(). The range can aggregate 2812 * contiguous iommu_map() calls so long as no individual range is split. 2813 * 2814 * Returns: Number of bytes of IOVA unmapped. iova + res will be the point 2815 * unmapping stopped. 2816 */ 2817 size_t iommu_unmap(struct iommu_domain *domain, 2818 unsigned long iova, size_t size) 2819 { 2820 struct iommu_iotlb_gather iotlb_gather; 2821 size_t ret; 2822 2823 iommu_iotlb_gather_init(&iotlb_gather); 2824 ret = __iommu_unmap(domain, iova, size, &iotlb_gather); 2825 iommu_iotlb_sync(domain, &iotlb_gather); 2826 2827 return ret; 2828 } 2829 EXPORT_SYMBOL_GPL(iommu_unmap); 2830 2831 /** 2832 * iommu_unmap_fast() - Remove mappings from a range of IOVA without IOTLB sync 2833 * @domain: Domain to manipulate 2834 * @iova: IO virtual address to start 2835 * @size: Length of the range starting from @iova 2836 * @iotlb_gather: range information for a pending IOTLB flush 2837 * 2838 * iommu_unmap_fast() will remove a translation created by iommu_map(). 2839 * It can't subdivide a mapping created by iommu_map(), so it should be 2840 * called with IOVA ranges that match what was passed to iommu_map(). The 2841 * range can aggregate contiguous iommu_map() calls so long as no individual 2842 * range is split. 2843 * 2844 * Basically iommu_unmap_fast() is the same as iommu_unmap() but for callers 2845 * which manage the IOTLB flushing externally to perform a batched sync. 2846 * 2847 * Returns: Number of bytes of IOVA unmapped. iova + res will be the point 2848 * unmapping stopped. 2849 */ 2850 size_t iommu_unmap_fast(struct iommu_domain *domain, 2851 unsigned long iova, size_t size, 2852 struct iommu_iotlb_gather *iotlb_gather) 2853 { 2854 return __iommu_unmap(domain, iova, size, iotlb_gather); 2855 } 2856 EXPORT_SYMBOL_GPL(iommu_unmap_fast); 2857 2858 ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, 2859 struct scatterlist *sg, unsigned int nents, int prot, 2860 gfp_t gfp) 2861 { 2862 size_t len = 0, mapped = 0; 2863 phys_addr_t start; 2864 unsigned int i = 0; 2865 int ret; 2866 2867 while (i <= nents) { 2868 phys_addr_t s_phys = sg_phys(sg); 2869 2870 if (len && s_phys != start + len) { 2871 ret = iommu_map_nosync(domain, iova + mapped, start, 2872 len, prot, gfp); 2873 if (ret) 2874 goto out_err; 2875 2876 mapped += len; 2877 len = 0; 2878 } 2879 2880 if (sg_dma_is_bus_address(sg)) 2881 goto next; 2882 2883 if (len) { 2884 len += sg->length; 2885 } else { 2886 len = sg->length; 2887 start = s_phys; 2888 } 2889 2890 next: 2891 if (++i < nents) 2892 sg = sg_next(sg); 2893 } 2894 2895 ret = iommu_sync_map(domain, iova, mapped); 2896 if (ret) 2897 goto out_err; 2898 2899 return mapped; 2900 2901 out_err: 2902 /* undo mappings already done */ 2903 iommu_unmap(domain, iova, mapped); 2904 2905 return ret; 2906 } 2907 EXPORT_SYMBOL_GPL(iommu_map_sg); 2908 2909 /** 2910 * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework 2911 * @domain: the iommu domain where the fault has happened 2912 * @dev: the device where the fault has happened 2913 * @iova: the faulting address 2914 * @flags: mmu fault flags (e.g. IOMMU_FAULT_READ/IOMMU_FAULT_WRITE/...) 2915 * 2916 * This function should be called by the low-level IOMMU implementations 2917 * whenever IOMMU faults happen, to allow high-level users, that are 2918 * interested in such events, to know about them. 2919 * 2920 * This event may be useful for several possible use cases: 2921 * - mere logging of the event 2922 * - dynamic TLB/PTE loading 2923 * - if restarting of the faulting device is required 2924 * 2925 * Returns 0 on success and an appropriate error code otherwise (if dynamic 2926 * PTE/TLB loading will one day be supported, implementations will be able 2927 * to tell whether it succeeded or not according to this return value). 2928 * 2929 * Specifically, -ENOSYS is returned if a fault handler isn't installed 2930 * (though fault handlers can also return -ENOSYS, in case they want to 2931 * elicit the default behavior of the IOMMU drivers). 2932 */ 2933 int report_iommu_fault(struct iommu_domain *domain, struct device *dev, 2934 unsigned long iova, int flags) 2935 { 2936 int ret = -ENOSYS; 2937 2938 /* 2939 * if upper layers showed interest and installed a fault handler, 2940 * invoke it. 2941 */ 2942 if (domain->cookie_type == IOMMU_COOKIE_FAULT_HANDLER && 2943 domain->handler) 2944 ret = domain->handler(domain, dev, iova, flags, 2945 domain->handler_token); 2946 2947 trace_io_page_fault(dev, iova, flags); 2948 return ret; 2949 } 2950 EXPORT_SYMBOL_GPL(report_iommu_fault); 2951 2952 static int __init iommu_init(void) 2953 { 2954 iommu_group_kset = kset_create_and_add("iommu_groups", 2955 NULL, kernel_kobj); 2956 BUG_ON(!iommu_group_kset); 2957 2958 iommu_debugfs_setup(); 2959 2960 return 0; 2961 } 2962 core_initcall(iommu_init); 2963 2964 int iommu_set_pgtable_quirks(struct iommu_domain *domain, 2965 unsigned long quirk) 2966 { 2967 if (domain->type != IOMMU_DOMAIN_UNMANAGED) 2968 return -EINVAL; 2969 if (!domain->ops->set_pgtable_quirks) 2970 return -EINVAL; 2971 return domain->ops->set_pgtable_quirks(domain, quirk); 2972 } 2973 EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks); 2974 2975 /** 2976 * iommu_get_resv_regions - get reserved regions 2977 * @dev: device for which to get reserved regions 2978 * @list: reserved region list for device 2979 * 2980 * This returns a list of reserved IOVA regions specific to this device. 2981 * A domain user should not map IOVA in these ranges. 2982 */ 2983 void iommu_get_resv_regions(struct device *dev, struct list_head *list) 2984 { 2985 const struct iommu_ops *ops = dev_iommu_ops(dev); 2986 2987 if (ops->get_resv_regions) 2988 ops->get_resv_regions(dev, list); 2989 } 2990 EXPORT_SYMBOL_GPL(iommu_get_resv_regions); 2991 2992 /** 2993 * iommu_put_resv_regions - release reserved regions 2994 * @dev: device for which to free reserved regions 2995 * @list: reserved region list for device 2996 * 2997 * This releases a reserved region list acquired by iommu_get_resv_regions(). 2998 */ 2999 void iommu_put_resv_regions(struct device *dev, struct list_head *list) 3000 { 3001 struct iommu_resv_region *entry, *next; 3002 3003 list_for_each_entry_safe(entry, next, list, list) { 3004 if (entry->free) 3005 entry->free(dev, entry); 3006 else 3007 kfree(entry); 3008 } 3009 } 3010 EXPORT_SYMBOL(iommu_put_resv_regions); 3011 3012 struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start, 3013 size_t length, int prot, 3014 enum iommu_resv_type type, 3015 gfp_t gfp) 3016 { 3017 struct iommu_resv_region *region; 3018 3019 region = kzalloc_obj(*region, gfp); 3020 if (!region) 3021 return NULL; 3022 3023 INIT_LIST_HEAD(®ion->list); 3024 region->start = start; 3025 region->length = length; 3026 region->prot = prot; 3027 region->type = type; 3028 return region; 3029 } 3030 EXPORT_SYMBOL_GPL(iommu_alloc_resv_region); 3031 3032 void iommu_set_default_passthrough(bool cmd_line) 3033 { 3034 if (cmd_line) 3035 iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; 3036 iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY; 3037 } 3038 3039 void iommu_set_default_translated(bool cmd_line) 3040 { 3041 if (cmd_line) 3042 iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; 3043 iommu_def_domain_type = IOMMU_DOMAIN_DMA; 3044 } 3045 3046 bool iommu_default_passthrough(void) 3047 { 3048 return iommu_def_domain_type == IOMMU_DOMAIN_IDENTITY; 3049 } 3050 EXPORT_SYMBOL_GPL(iommu_default_passthrough); 3051 3052 static const struct iommu_device *iommu_from_fwnode(const struct fwnode_handle *fwnode) 3053 { 3054 const struct iommu_device *iommu, *ret = NULL; 3055 3056 spin_lock(&iommu_device_lock); 3057 list_for_each_entry(iommu, &iommu_device_list, list) 3058 if (iommu->fwnode == fwnode) { 3059 ret = iommu; 3060 break; 3061 } 3062 spin_unlock(&iommu_device_lock); 3063 return ret; 3064 } 3065 3066 const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode) 3067 { 3068 const struct iommu_device *iommu = iommu_from_fwnode(fwnode); 3069 3070 return iommu ? iommu->ops : NULL; 3071 } 3072 3073 int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode) 3074 { 3075 const struct iommu_device *iommu = iommu_from_fwnode(iommu_fwnode); 3076 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 3077 3078 if (!iommu) 3079 return driver_deferred_probe_check_state(dev); 3080 if (!dev->iommu && !READ_ONCE(iommu->ready)) 3081 return -EPROBE_DEFER; 3082 3083 if (fwspec) 3084 return iommu->ops == iommu_fwspec_ops(fwspec) ? 0 : -EINVAL; 3085 3086 if (!dev_iommu_get(dev)) 3087 return -ENOMEM; 3088 3089 /* Preallocate for the overwhelmingly common case of 1 ID */ 3090 fwspec = kzalloc_flex(*fwspec, ids, 1); 3091 if (!fwspec) 3092 return -ENOMEM; 3093 3094 fwnode_handle_get(iommu_fwnode); 3095 fwspec->iommu_fwnode = iommu_fwnode; 3096 dev_iommu_fwspec_set(dev, fwspec); 3097 return 0; 3098 } 3099 EXPORT_SYMBOL_GPL(iommu_fwspec_init); 3100 3101 void iommu_fwspec_free(struct device *dev) 3102 { 3103 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 3104 3105 if (fwspec) { 3106 fwnode_handle_put(fwspec->iommu_fwnode); 3107 kfree(fwspec); 3108 dev_iommu_fwspec_set(dev, NULL); 3109 } 3110 } 3111 3112 int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids) 3113 { 3114 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 3115 int i, new_num; 3116 3117 if (!fwspec) 3118 return -EINVAL; 3119 3120 new_num = fwspec->num_ids + num_ids; 3121 if (new_num > 1) { 3122 fwspec = krealloc(fwspec, struct_size(fwspec, ids, new_num), 3123 GFP_KERNEL); 3124 if (!fwspec) 3125 return -ENOMEM; 3126 3127 dev_iommu_fwspec_set(dev, fwspec); 3128 } 3129 3130 for (i = 0; i < num_ids; i++) 3131 fwspec->ids[fwspec->num_ids + i] = ids[i]; 3132 3133 fwspec->num_ids = new_num; 3134 return 0; 3135 } 3136 EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids); 3137 3138 /** 3139 * iommu_setup_default_domain - Set the default_domain for the group 3140 * @group: Group to change 3141 * @target_type: Domain type to set as the default_domain 3142 * 3143 * Allocate a default domain and set it as the current domain on the group. If 3144 * the group already has a default domain it will be changed to the target_type. 3145 * When target_type is 0 the default domain is selected based on driver and 3146 * system preferences. 3147 */ 3148 static int iommu_setup_default_domain(struct iommu_group *group, 3149 int target_type) 3150 { 3151 struct iommu_domain *old_dom = group->default_domain; 3152 struct group_device *gdev; 3153 struct iommu_domain *dom; 3154 bool direct_failed; 3155 int req_type; 3156 int ret; 3157 3158 lockdep_assert_held(&group->mutex); 3159 3160 req_type = iommu_get_default_domain_type(group, target_type); 3161 if (req_type < 0) 3162 return -EINVAL; 3163 3164 dom = iommu_group_alloc_default_domain(group, req_type); 3165 if (IS_ERR(dom)) 3166 return PTR_ERR(dom); 3167 3168 if (group->default_domain == dom) 3169 return 0; 3170 3171 if (iommu_is_dma_domain(dom)) { 3172 ret = iommu_get_dma_cookie(dom); 3173 if (ret) { 3174 iommu_domain_free(dom); 3175 return ret; 3176 } 3177 } 3178 3179 /* 3180 * IOMMU_RESV_DIRECT and IOMMU_RESV_DIRECT_RELAXABLE regions must be 3181 * mapped before their device is attached, in order to guarantee 3182 * continuity with any FW activity 3183 */ 3184 direct_failed = false; 3185 for_each_group_device(group, gdev) { 3186 if (iommu_create_device_direct_mappings(dom, gdev->dev)) { 3187 direct_failed = true; 3188 dev_warn_once( 3189 gdev->dev->iommu->iommu_dev->dev, 3190 "IOMMU driver was not able to establish FW requested direct mapping."); 3191 } 3192 } 3193 3194 /* We must set default_domain early for __iommu_device_set_domain */ 3195 group->default_domain = dom; 3196 if (!group->domain) { 3197 /* 3198 * Drivers are not allowed to fail the first domain attach. 3199 * The only way to recover from this is to fail attaching the 3200 * iommu driver and call ops->release_device. Put the domain 3201 * in group->default_domain so it is freed after. 3202 */ 3203 ret = __iommu_group_set_domain_internal( 3204 group, dom, IOMMU_SET_DOMAIN_MUST_SUCCEED); 3205 if (WARN_ON(ret)) 3206 goto out_free_old; 3207 } else { 3208 ret = __iommu_group_set_domain(group, dom); 3209 if (ret) 3210 goto err_restore_def_domain; 3211 } 3212 3213 /* 3214 * Drivers are supposed to allow mappings to be installed in a domain 3215 * before device attachment, but some don't. Hack around this defect by 3216 * trying again after attaching. If this happens it means the device 3217 * will not continuously have the IOMMU_RESV_DIRECT map. 3218 */ 3219 if (direct_failed) { 3220 for_each_group_device(group, gdev) { 3221 ret = iommu_create_device_direct_mappings(dom, gdev->dev); 3222 if (ret) 3223 goto err_restore_domain; 3224 } 3225 } 3226 3227 out_free_old: 3228 if (old_dom) 3229 iommu_domain_free(old_dom); 3230 return ret; 3231 3232 err_restore_domain: 3233 if (old_dom) 3234 __iommu_group_set_domain_internal( 3235 group, old_dom, IOMMU_SET_DOMAIN_MUST_SUCCEED); 3236 err_restore_def_domain: 3237 if (old_dom) { 3238 iommu_domain_free(dom); 3239 group->default_domain = old_dom; 3240 } 3241 return ret; 3242 } 3243 3244 /* 3245 * Changing the default domain through sysfs requires the users to unbind the 3246 * drivers from the devices in the iommu group, except for a DMA -> DMA-FQ 3247 * transition. Return failure if this isn't met. 3248 * 3249 * We need to consider the race between this and the device release path. 3250 * group->mutex is used here to guarantee that the device release path 3251 * will not be entered at the same time. 3252 */ 3253 static ssize_t iommu_group_store_type(struct iommu_group *group, 3254 const char *buf, size_t count) 3255 { 3256 struct group_device *gdev; 3257 int ret, req_type; 3258 3259 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) 3260 return -EACCES; 3261 3262 if (WARN_ON(!group) || !group->default_domain) 3263 return -EINVAL; 3264 3265 if (sysfs_streq(buf, "identity")) 3266 req_type = IOMMU_DOMAIN_IDENTITY; 3267 else if (sysfs_streq(buf, "DMA")) 3268 req_type = IOMMU_DOMAIN_DMA; 3269 else if (sysfs_streq(buf, "DMA-FQ")) 3270 req_type = IOMMU_DOMAIN_DMA_FQ; 3271 else if (sysfs_streq(buf, "auto")) 3272 req_type = 0; 3273 else 3274 return -EINVAL; 3275 3276 mutex_lock(&group->mutex); 3277 /* We can bring up a flush queue without tearing down the domain. */ 3278 if (req_type == IOMMU_DOMAIN_DMA_FQ && 3279 group->default_domain->type == IOMMU_DOMAIN_DMA) { 3280 ret = iommu_dma_init_fq(group->default_domain); 3281 if (ret) 3282 goto out_unlock; 3283 3284 group->default_domain->type = IOMMU_DOMAIN_DMA_FQ; 3285 ret = count; 3286 goto out_unlock; 3287 } 3288 3289 /* Otherwise, ensure that device exists and no driver is bound. */ 3290 if (list_empty(&group->devices) || group->owner_cnt) { 3291 ret = -EPERM; 3292 goto out_unlock; 3293 } 3294 3295 ret = iommu_setup_default_domain(group, req_type); 3296 if (ret) 3297 goto out_unlock; 3298 3299 /* Make sure dma_ops is appropriatley set */ 3300 for_each_group_device(group, gdev) 3301 iommu_setup_dma_ops(gdev->dev, group->default_domain); 3302 3303 out_unlock: 3304 mutex_unlock(&group->mutex); 3305 return ret ?: count; 3306 } 3307 3308 /** 3309 * iommu_device_use_default_domain() - Device driver wants to handle device 3310 * DMA through the kernel DMA API. 3311 * @dev: The device. 3312 * 3313 * The device driver about to bind @dev wants to do DMA through the kernel 3314 * DMA API. Return 0 if it is allowed, otherwise an error. 3315 */ 3316 int iommu_device_use_default_domain(struct device *dev) 3317 { 3318 /* Caller is the driver core during the pre-probe path */ 3319 struct iommu_group *group = dev->iommu_group; 3320 int ret = 0; 3321 3322 if (!group) 3323 return 0; 3324 3325 mutex_lock(&group->mutex); 3326 /* We may race against bus_iommu_probe() finalising groups here */ 3327 if (!group->default_domain) { 3328 ret = -EPROBE_DEFER; 3329 goto unlock_out; 3330 } 3331 if (group->owner_cnt) { 3332 if (group->domain != group->default_domain || group->owner || 3333 !xa_empty(&group->pasid_array)) { 3334 ret = -EBUSY; 3335 goto unlock_out; 3336 } 3337 } 3338 3339 group->owner_cnt++; 3340 3341 unlock_out: 3342 mutex_unlock(&group->mutex); 3343 return ret; 3344 } 3345 3346 /** 3347 * iommu_device_unuse_default_domain() - Device driver stops handling device 3348 * DMA through the kernel DMA API. 3349 * @dev: The device. 3350 * 3351 * The device driver doesn't want to do DMA through kernel DMA API anymore. 3352 * It must be called after iommu_device_use_default_domain(). 3353 */ 3354 void iommu_device_unuse_default_domain(struct device *dev) 3355 { 3356 /* Caller is the driver core during the post-probe path */ 3357 struct iommu_group *group = dev->iommu_group; 3358 3359 if (!group) 3360 return; 3361 3362 mutex_lock(&group->mutex); 3363 if (!WARN_ON(!group->owner_cnt || !xa_empty(&group->pasid_array))) 3364 group->owner_cnt--; 3365 3366 mutex_unlock(&group->mutex); 3367 } 3368 3369 static int __iommu_group_alloc_blocking_domain(struct iommu_group *group) 3370 { 3371 struct device *dev = iommu_group_first_dev(group); 3372 const struct iommu_ops *ops = dev_iommu_ops(dev); 3373 struct iommu_domain *domain; 3374 3375 if (group->blocking_domain) 3376 return 0; 3377 3378 if (ops->blocked_domain) { 3379 group->blocking_domain = ops->blocked_domain; 3380 return 0; 3381 } 3382 3383 /* 3384 * For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED create an 3385 * empty PAGING domain instead. 3386 */ 3387 domain = iommu_paging_domain_alloc(dev); 3388 if (IS_ERR(domain)) 3389 return PTR_ERR(domain); 3390 group->blocking_domain = domain; 3391 return 0; 3392 } 3393 3394 static int __iommu_take_dma_ownership(struct iommu_group *group, void *owner) 3395 { 3396 int ret; 3397 3398 if ((group->domain && group->domain != group->default_domain) || 3399 !xa_empty(&group->pasid_array)) 3400 return -EBUSY; 3401 3402 ret = __iommu_group_alloc_blocking_domain(group); 3403 if (ret) 3404 return ret; 3405 ret = __iommu_group_set_domain(group, group->blocking_domain); 3406 if (ret) 3407 return ret; 3408 3409 group->owner = owner; 3410 group->owner_cnt++; 3411 return 0; 3412 } 3413 3414 /** 3415 * iommu_group_claim_dma_owner() - Set DMA ownership of a group 3416 * @group: The group. 3417 * @owner: Caller specified pointer. Used for exclusive ownership. 3418 * 3419 * This is to support backward compatibility for vfio which manages the dma 3420 * ownership in iommu_group level. New invocations on this interface should be 3421 * prohibited. Only a single owner may exist for a group. 3422 */ 3423 int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner) 3424 { 3425 int ret = 0; 3426 3427 if (WARN_ON(!owner)) 3428 return -EINVAL; 3429 3430 mutex_lock(&group->mutex); 3431 if (group->owner_cnt) { 3432 ret = -EPERM; 3433 goto unlock_out; 3434 } 3435 3436 ret = __iommu_take_dma_ownership(group, owner); 3437 unlock_out: 3438 mutex_unlock(&group->mutex); 3439 3440 return ret; 3441 } 3442 EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner); 3443 3444 /** 3445 * iommu_device_claim_dma_owner() - Set DMA ownership of a device 3446 * @dev: The device. 3447 * @owner: Caller specified pointer. Used for exclusive ownership. 3448 * 3449 * Claim the DMA ownership of a device. Multiple devices in the same group may 3450 * concurrently claim ownership if they present the same owner value. Returns 0 3451 * on success and error code on failure 3452 */ 3453 int iommu_device_claim_dma_owner(struct device *dev, void *owner) 3454 { 3455 /* Caller must be a probed driver on dev */ 3456 struct iommu_group *group = dev->iommu_group; 3457 int ret = 0; 3458 3459 if (WARN_ON(!owner)) 3460 return -EINVAL; 3461 3462 if (!group) 3463 return -ENODEV; 3464 3465 mutex_lock(&group->mutex); 3466 if (group->owner_cnt) { 3467 if (group->owner != owner) { 3468 ret = -EPERM; 3469 goto unlock_out; 3470 } 3471 group->owner_cnt++; 3472 goto unlock_out; 3473 } 3474 3475 ret = __iommu_take_dma_ownership(group, owner); 3476 unlock_out: 3477 mutex_unlock(&group->mutex); 3478 return ret; 3479 } 3480 EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner); 3481 3482 static void __iommu_release_dma_ownership(struct iommu_group *group) 3483 { 3484 if (WARN_ON(!group->owner_cnt || !group->owner || 3485 !xa_empty(&group->pasid_array))) 3486 return; 3487 3488 group->owner_cnt = 0; 3489 group->owner = NULL; 3490 __iommu_group_set_domain_nofail(group, group->default_domain); 3491 } 3492 3493 /** 3494 * iommu_group_release_dma_owner() - Release DMA ownership of a group 3495 * @group: The group 3496 * 3497 * Release the DMA ownership claimed by iommu_group_claim_dma_owner(). 3498 */ 3499 void iommu_group_release_dma_owner(struct iommu_group *group) 3500 { 3501 mutex_lock(&group->mutex); 3502 __iommu_release_dma_ownership(group); 3503 mutex_unlock(&group->mutex); 3504 } 3505 EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner); 3506 3507 /** 3508 * iommu_device_release_dma_owner() - Release DMA ownership of a device 3509 * @dev: The device. 3510 * 3511 * Release the DMA ownership claimed by iommu_device_claim_dma_owner(). 3512 */ 3513 void iommu_device_release_dma_owner(struct device *dev) 3514 { 3515 /* Caller must be a probed driver on dev */ 3516 struct iommu_group *group = dev->iommu_group; 3517 3518 mutex_lock(&group->mutex); 3519 if (group->owner_cnt > 1) 3520 group->owner_cnt--; 3521 else 3522 __iommu_release_dma_ownership(group); 3523 mutex_unlock(&group->mutex); 3524 } 3525 EXPORT_SYMBOL_GPL(iommu_device_release_dma_owner); 3526 3527 /** 3528 * iommu_group_dma_owner_claimed() - Query group dma ownership status 3529 * @group: The group. 3530 * 3531 * This provides status query on a given group. It is racy and only for 3532 * non-binding status reporting. 3533 */ 3534 bool iommu_group_dma_owner_claimed(struct iommu_group *group) 3535 { 3536 unsigned int user; 3537 3538 mutex_lock(&group->mutex); 3539 user = group->owner_cnt; 3540 mutex_unlock(&group->mutex); 3541 3542 return user; 3543 } 3544 EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed); 3545 3546 static void iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid, 3547 struct iommu_domain *domain) 3548 { 3549 const struct iommu_ops *ops = dev_iommu_ops(dev); 3550 struct iommu_domain *blocked_domain = ops->blocked_domain; 3551 3552 WARN_ON(blocked_domain->ops->set_dev_pasid(blocked_domain, 3553 dev, pasid, domain)); 3554 } 3555 3556 static int __iommu_set_group_pasid(struct iommu_domain *domain, 3557 struct iommu_group *group, ioasid_t pasid, 3558 struct iommu_domain *old) 3559 { 3560 struct group_device *device, *last_gdev; 3561 int ret; 3562 3563 for_each_group_device(group, device) { 3564 if (device->dev->iommu->max_pasids > 0) { 3565 ret = domain->ops->set_dev_pasid(domain, device->dev, 3566 pasid, old); 3567 if (ret) 3568 goto err_revert; 3569 } 3570 } 3571 3572 return 0; 3573 3574 err_revert: 3575 last_gdev = device; 3576 for_each_group_device(group, device) { 3577 if (device == last_gdev) 3578 break; 3579 if (device->dev->iommu->max_pasids > 0) { 3580 /* 3581 * If no old domain, undo the succeeded devices/pasid. 3582 * Otherwise, rollback the succeeded devices/pasid to 3583 * the old domain. And it is a driver bug to fail 3584 * attaching with a previously good domain. 3585 */ 3586 if (!old || 3587 WARN_ON(old->ops->set_dev_pasid(old, device->dev, 3588 pasid, domain))) 3589 iommu_remove_dev_pasid(device->dev, pasid, domain); 3590 } 3591 } 3592 return ret; 3593 } 3594 3595 static void __iommu_remove_group_pasid(struct iommu_group *group, 3596 ioasid_t pasid, 3597 struct iommu_domain *domain) 3598 { 3599 struct group_device *device; 3600 3601 for_each_group_device(group, device) { 3602 /* 3603 * A group-level detach cannot fail, even if there is a blocked 3604 * device. In fact, blocked devices must be already detached for 3605 * a pending device recovery. 3606 */ 3607 if (!device->blocked && device->dev->iommu->max_pasids > 0) 3608 iommu_remove_dev_pasid(device->dev, pasid, domain); 3609 } 3610 } 3611 3612 /* 3613 * iommu_attach_device_pasid() - Attach a domain to pasid of device 3614 * @domain: the iommu domain. 3615 * @dev: the attached device. 3616 * @pasid: the pasid of the device. 3617 * @handle: the attach handle. 3618 * 3619 * Caller should always provide a new handle to avoid race with the paths 3620 * that have lockless reference to handle if it intends to pass a valid handle. 3621 * 3622 * Return: 0 on success, or an error. 3623 */ 3624 int iommu_attach_device_pasid(struct iommu_domain *domain, 3625 struct device *dev, ioasid_t pasid, 3626 struct iommu_attach_handle *handle) 3627 { 3628 /* Caller must be a probed driver on dev */ 3629 struct iommu_group *group = dev->iommu_group; 3630 struct group_device *device; 3631 const struct iommu_ops *ops; 3632 void *entry; 3633 int ret; 3634 3635 if (!group) 3636 return -ENODEV; 3637 3638 ops = dev_iommu_ops(dev); 3639 3640 if (!domain->ops->set_dev_pasid || 3641 !ops->blocked_domain || 3642 !ops->blocked_domain->ops->set_dev_pasid) 3643 return -EOPNOTSUPP; 3644 3645 if (!domain_iommu_ops_compatible(ops, domain) || 3646 pasid == IOMMU_NO_PASID) 3647 return -EINVAL; 3648 3649 mutex_lock(&group->mutex); 3650 3651 /* 3652 * This is a concurrent attach during device recovery. Reject it until 3653 * pci_dev_reset_iommu_done() attaches the device to group->domain. 3654 */ 3655 if (group->recovery_cnt) { 3656 ret = -EBUSY; 3657 goto out_unlock; 3658 } 3659 3660 for_each_group_device(group, device) { 3661 /* 3662 * Skip PASID validation for devices without PASID support 3663 * (max_pasids = 0). These devices cannot issue transactions 3664 * with PASID, so they don't affect group's PASID usage. 3665 */ 3666 if ((device->dev->iommu->max_pasids > 0) && 3667 (pasid >= device->dev->iommu->max_pasids)) { 3668 ret = -EINVAL; 3669 goto out_unlock; 3670 } 3671 } 3672 3673 entry = iommu_make_pasid_array_entry(domain, handle); 3674 3675 /* 3676 * Entry present is a failure case. Use xa_insert() instead of 3677 * xa_reserve(). 3678 */ 3679 ret = xa_insert(&group->pasid_array, pasid, XA_ZERO_ENTRY, GFP_KERNEL); 3680 if (ret) 3681 goto out_unlock; 3682 3683 ret = __iommu_set_group_pasid(domain, group, pasid, NULL); 3684 if (ret) { 3685 xa_release(&group->pasid_array, pasid); 3686 goto out_unlock; 3687 } 3688 3689 /* 3690 * The xa_insert() above reserved the memory, and the group->mutex is 3691 * held, this cannot fail. The new domain cannot be visible until the 3692 * operation succeeds as we cannot tolerate PRIs becoming concurrently 3693 * queued and then failing attach. 3694 */ 3695 WARN_ON(xa_is_err(xa_store(&group->pasid_array, 3696 pasid, entry, GFP_KERNEL))); 3697 3698 out_unlock: 3699 mutex_unlock(&group->mutex); 3700 return ret; 3701 } 3702 EXPORT_SYMBOL_GPL(iommu_attach_device_pasid); 3703 3704 /** 3705 * iommu_replace_device_pasid - Replace the domain that a specific pasid 3706 * of the device is attached to 3707 * @domain: the new iommu domain 3708 * @dev: the attached device. 3709 * @pasid: the pasid of the device. 3710 * @handle: the attach handle. 3711 * 3712 * This API allows the pasid to switch domains. The @pasid should have been 3713 * attached. Otherwise, this fails. The pasid will keep the old configuration 3714 * if replacement failed. 3715 * 3716 * Caller should always provide a new handle to avoid race with the paths 3717 * that have lockless reference to handle if it intends to pass a valid handle. 3718 * 3719 * Return 0 on success, or an error. 3720 */ 3721 int iommu_replace_device_pasid(struct iommu_domain *domain, 3722 struct device *dev, ioasid_t pasid, 3723 struct iommu_attach_handle *handle) 3724 { 3725 /* Caller must be a probed driver on dev */ 3726 struct iommu_group *group = dev->iommu_group; 3727 struct iommu_attach_handle *entry; 3728 struct iommu_domain *curr_domain; 3729 void *curr; 3730 int ret; 3731 3732 if (!group) 3733 return -ENODEV; 3734 3735 if (!domain->ops->set_dev_pasid) 3736 return -EOPNOTSUPP; 3737 3738 if (!domain_iommu_ops_compatible(dev_iommu_ops(dev), domain) || 3739 pasid == IOMMU_NO_PASID || !handle) 3740 return -EINVAL; 3741 3742 mutex_lock(&group->mutex); 3743 3744 /* 3745 * This is a concurrent attach during device recovery. Reject it until 3746 * pci_dev_reset_iommu_done() attaches the device to group->domain. 3747 */ 3748 if (group->recovery_cnt) { 3749 ret = -EBUSY; 3750 goto out_unlock; 3751 } 3752 3753 entry = iommu_make_pasid_array_entry(domain, handle); 3754 curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, 3755 XA_ZERO_ENTRY, GFP_KERNEL); 3756 if (xa_is_err(curr)) { 3757 ret = xa_err(curr); 3758 goto out_unlock; 3759 } 3760 3761 /* 3762 * No domain (with or without handle) attached, hence not 3763 * a replace case. 3764 */ 3765 if (!curr) { 3766 xa_release(&group->pasid_array, pasid); 3767 ret = -EINVAL; 3768 goto out_unlock; 3769 } 3770 3771 /* 3772 * Reusing handle is problematic as there are paths that refers 3773 * the handle without lock. To avoid race, reject the callers that 3774 * attempt it. 3775 */ 3776 if (curr == entry) { 3777 WARN_ON(1); 3778 ret = -EINVAL; 3779 goto out_unlock; 3780 } 3781 3782 curr_domain = pasid_array_entry_to_domain(curr); 3783 ret = 0; 3784 3785 if (curr_domain != domain) { 3786 ret = __iommu_set_group_pasid(domain, group, 3787 pasid, curr_domain); 3788 if (ret) 3789 goto out_unlock; 3790 } 3791 3792 /* 3793 * The above xa_cmpxchg() reserved the memory, and the 3794 * group->mutex is held, this cannot fail. 3795 */ 3796 WARN_ON(xa_is_err(xa_store(&group->pasid_array, 3797 pasid, entry, GFP_KERNEL))); 3798 3799 out_unlock: 3800 mutex_unlock(&group->mutex); 3801 return ret; 3802 } 3803 EXPORT_SYMBOL_NS_GPL(iommu_replace_device_pasid, "IOMMUFD_INTERNAL"); 3804 3805 /* 3806 * iommu_detach_device_pasid() - Detach the domain from pasid of device 3807 * @domain: the iommu domain. 3808 * @dev: the attached device. 3809 * @pasid: the pasid of the device. 3810 * 3811 * The @domain must have been attached to @pasid of the @dev with 3812 * iommu_attach_device_pasid(). 3813 */ 3814 void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, 3815 ioasid_t pasid) 3816 { 3817 /* Caller must be a probed driver on dev */ 3818 struct iommu_group *group = dev->iommu_group; 3819 3820 mutex_lock(&group->mutex); 3821 __iommu_remove_group_pasid(group, pasid, domain); 3822 xa_erase(&group->pasid_array, pasid); 3823 mutex_unlock(&group->mutex); 3824 } 3825 EXPORT_SYMBOL_GPL(iommu_detach_device_pasid); 3826 3827 ioasid_t iommu_alloc_global_pasid(struct device *dev) 3828 { 3829 int ret; 3830 3831 /* max_pasids == 0 means that the device does not support PASID */ 3832 if (!dev->iommu->max_pasids) 3833 return IOMMU_PASID_INVALID; 3834 3835 /* 3836 * max_pasids is set up by vendor driver based on number of PASID bits 3837 * supported but the IDA allocation is inclusive. 3838 */ 3839 ret = ida_alloc_range(&iommu_global_pasid_ida, IOMMU_FIRST_GLOBAL_PASID, 3840 dev->iommu->max_pasids - 1, GFP_KERNEL); 3841 return ret < 0 ? IOMMU_PASID_INVALID : ret; 3842 } 3843 EXPORT_SYMBOL_GPL(iommu_alloc_global_pasid); 3844 3845 void iommu_free_global_pasid(ioasid_t pasid) 3846 { 3847 if (WARN_ON(pasid == IOMMU_PASID_INVALID)) 3848 return; 3849 3850 ida_free(&iommu_global_pasid_ida, pasid); 3851 } 3852 EXPORT_SYMBOL_GPL(iommu_free_global_pasid); 3853 3854 /** 3855 * iommu_attach_handle_get - Return the attach handle 3856 * @group: the iommu group that domain was attached to 3857 * @pasid: the pasid within the group 3858 * @type: matched domain type, 0 for any match 3859 * 3860 * Return handle or ERR_PTR(-ENOENT) on none, ERR_PTR(-EBUSY) on mismatch. 3861 * 3862 * Return the attach handle to the caller. The life cycle of an iommu attach 3863 * handle is from the time when the domain is attached to the time when the 3864 * domain is detached. Callers are required to synchronize the call of 3865 * iommu_attach_handle_get() with domain attachment and detachment. The attach 3866 * handle can only be used during its life cycle. 3867 */ 3868 struct iommu_attach_handle * 3869 iommu_attach_handle_get(struct iommu_group *group, ioasid_t pasid, unsigned int type) 3870 { 3871 struct iommu_attach_handle *handle; 3872 void *entry; 3873 3874 xa_lock(&group->pasid_array); 3875 entry = xa_load(&group->pasid_array, pasid); 3876 if (!entry || xa_pointer_tag(entry) != IOMMU_PASID_ARRAY_HANDLE) { 3877 handle = ERR_PTR(-ENOENT); 3878 } else { 3879 handle = xa_untag_pointer(entry); 3880 if (type && handle->domain->type != type) 3881 handle = ERR_PTR(-EBUSY); 3882 } 3883 xa_unlock(&group->pasid_array); 3884 3885 return handle; 3886 } 3887 EXPORT_SYMBOL_NS_GPL(iommu_attach_handle_get, "IOMMUFD_INTERNAL"); 3888 3889 /** 3890 * iommu_attach_group_handle - Attach an IOMMU domain to an IOMMU group 3891 * @domain: IOMMU domain to attach 3892 * @group: IOMMU group that will be attached 3893 * @handle: attach handle 3894 * 3895 * Returns 0 on success and error code on failure. 3896 * 3897 * This is a variant of iommu_attach_group(). It allows the caller to provide 3898 * an attach handle and use it when the domain is attached. This is currently 3899 * used by IOMMUFD to deliver the I/O page faults. 3900 * 3901 * Caller should always provide a new handle to avoid race with the paths 3902 * that have lockless reference to handle. 3903 */ 3904 int iommu_attach_group_handle(struct iommu_domain *domain, 3905 struct iommu_group *group, 3906 struct iommu_attach_handle *handle) 3907 { 3908 void *entry; 3909 int ret; 3910 3911 if (!handle) 3912 return -EINVAL; 3913 3914 mutex_lock(&group->mutex); 3915 entry = iommu_make_pasid_array_entry(domain, handle); 3916 ret = xa_insert(&group->pasid_array, 3917 IOMMU_NO_PASID, XA_ZERO_ENTRY, GFP_KERNEL); 3918 if (ret) 3919 goto out_unlock; 3920 3921 ret = __iommu_attach_group(domain, group); 3922 if (ret) { 3923 xa_release(&group->pasid_array, IOMMU_NO_PASID); 3924 goto out_unlock; 3925 } 3926 3927 /* 3928 * The xa_insert() above reserved the memory, and the group->mutex is 3929 * held, this cannot fail. The new domain cannot be visible until the 3930 * operation succeeds as we cannot tolerate PRIs becoming concurrently 3931 * queued and then failing attach. 3932 */ 3933 WARN_ON(xa_is_err(xa_store(&group->pasid_array, 3934 IOMMU_NO_PASID, entry, GFP_KERNEL))); 3935 3936 out_unlock: 3937 mutex_unlock(&group->mutex); 3938 return ret; 3939 } 3940 EXPORT_SYMBOL_NS_GPL(iommu_attach_group_handle, "IOMMUFD_INTERNAL"); 3941 3942 /** 3943 * iommu_detach_group_handle - Detach an IOMMU domain from an IOMMU group 3944 * @domain: IOMMU domain to attach 3945 * @group: IOMMU group that will be attached 3946 * 3947 * Detach the specified IOMMU domain from the specified IOMMU group. 3948 * It must be used in conjunction with iommu_attach_group_handle(). 3949 */ 3950 void iommu_detach_group_handle(struct iommu_domain *domain, 3951 struct iommu_group *group) 3952 { 3953 mutex_lock(&group->mutex); 3954 __iommu_group_set_core_domain(group); 3955 xa_erase(&group->pasid_array, IOMMU_NO_PASID); 3956 mutex_unlock(&group->mutex); 3957 } 3958 EXPORT_SYMBOL_NS_GPL(iommu_detach_group_handle, "IOMMUFD_INTERNAL"); 3959 3960 /** 3961 * iommu_replace_group_handle - replace the domain that a group is attached to 3962 * @group: IOMMU group that will be attached to the new domain 3963 * @new_domain: new IOMMU domain to replace with 3964 * @handle: attach handle 3965 * 3966 * This API allows the group to switch domains without being forced to go to 3967 * the blocking domain in-between. It allows the caller to provide an attach 3968 * handle for the new domain and use it when the domain is attached. 3969 * 3970 * If the currently attached domain is a core domain (e.g. a default_domain), 3971 * it will act just like the iommu_attach_group_handle(). 3972 * 3973 * Caller should always provide a new handle to avoid race with the paths 3974 * that have lockless reference to handle. 3975 */ 3976 int iommu_replace_group_handle(struct iommu_group *group, 3977 struct iommu_domain *new_domain, 3978 struct iommu_attach_handle *handle) 3979 { 3980 void *curr, *entry; 3981 int ret; 3982 3983 if (!new_domain || !handle) 3984 return -EINVAL; 3985 3986 mutex_lock(&group->mutex); 3987 entry = iommu_make_pasid_array_entry(new_domain, handle); 3988 ret = xa_reserve(&group->pasid_array, IOMMU_NO_PASID, GFP_KERNEL); 3989 if (ret) 3990 goto err_unlock; 3991 3992 ret = __iommu_group_set_domain(group, new_domain); 3993 if (ret) 3994 goto err_release; 3995 3996 curr = xa_store(&group->pasid_array, IOMMU_NO_PASID, entry, GFP_KERNEL); 3997 WARN_ON(xa_is_err(curr)); 3998 3999 mutex_unlock(&group->mutex); 4000 4001 return 0; 4002 err_release: 4003 xa_release(&group->pasid_array, IOMMU_NO_PASID); 4004 err_unlock: 4005 mutex_unlock(&group->mutex); 4006 return ret; 4007 } 4008 EXPORT_SYMBOL_NS_GPL(iommu_replace_group_handle, "IOMMUFD_INTERNAL"); 4009 4010 /** 4011 * pci_dev_reset_iommu_prepare() - Block IOMMU to prepare for a PCI device reset 4012 * @pdev: PCI device that is going to enter a reset routine 4013 * 4014 * The PCIe r6.0, sec 10.3.1 IMPLEMENTATION NOTE recommends to disable and block 4015 * ATS before initiating a reset. This means that a PCIe device during the reset 4016 * routine wants to block any IOMMU activity: translation and ATS invalidation. 4017 * 4018 * This function attaches the device's RID/PASID(s) the group->blocking_domain, 4019 * incrementing the group->recovery_cnt, to allow the IOMMU driver pausing any 4020 * IOMMU activity while leaving the group->domain pointer intact. Later when the 4021 * reset is finished, pci_dev_reset_iommu_done() can restore everything. 4022 * 4023 * Caller must use pci_dev_reset_iommu_prepare() with pci_dev_reset_iommu_done() 4024 * before/after the core-level reset routine, to decrement the recovery_cnt. 4025 * 4026 * Return: 0 on success or negative error code if the preparation failed. 4027 * 4028 * These two functions are designed to be used by PCI reset functions that would 4029 * not invoke any racy iommu_release_device(), since PCI sysfs node gets removed 4030 * before it notifies with a BUS_NOTIFY_REMOVED_DEVICE. When using them in other 4031 * case, callers must ensure there will be no racy iommu_release_device() call, 4032 * which otherwise would UAF the dev->iommu_group pointer. 4033 */ 4034 int pci_dev_reset_iommu_prepare(struct pci_dev *pdev) 4035 { 4036 struct iommu_group *group = pdev->dev.iommu_group; 4037 struct group_device *gdev; 4038 unsigned long pasid; 4039 void *entry; 4040 int ret; 4041 4042 if (!pci_ats_supported(pdev) || !dev_has_iommu(&pdev->dev)) 4043 return 0; 4044 4045 guard(mutex)(&group->mutex); 4046 4047 gdev = __dev_to_gdev(&pdev->dev); 4048 if (WARN_ON(!gdev)) 4049 return -ENODEV; 4050 4051 if (gdev->reset_depth++) 4052 return 0; 4053 4054 ret = __iommu_group_alloc_blocking_domain(group); 4055 if (ret) { 4056 gdev->reset_depth--; 4057 return ret; 4058 } 4059 4060 /* Stage RID domain at blocking_domain while retaining group->domain */ 4061 if (group->domain != group->blocking_domain) { 4062 ret = __iommu_attach_device(group->blocking_domain, &pdev->dev, 4063 group->domain); 4064 if (ret) { 4065 gdev->reset_depth--; 4066 return ret; 4067 } 4068 } 4069 4070 /* 4071 * Update gdev->blocked upon the domain change, as it is used to return 4072 * the correct domain in iommu_driver_get_domain_for_dev() that might be 4073 * called in a set_dev_pasid callback function. 4074 */ 4075 gdev->blocked = true; 4076 4077 /* 4078 * Stage PASID domains at blocking_domain while retaining pasid_array. 4079 * 4080 * The pasid_array is mostly fenced by group->mutex, except one reader 4081 * in iommu_attach_handle_get(), so it's safe to read without xa_lock. 4082 */ 4083 if (pdev->dev.iommu->max_pasids > 0) { 4084 xa_for_each_start(&group->pasid_array, pasid, entry, 1) { 4085 struct iommu_domain *pasid_dom = 4086 pasid_array_entry_to_domain(entry); 4087 4088 iommu_remove_dev_pasid(&pdev->dev, pasid, pasid_dom); 4089 } 4090 } 4091 4092 group->recovery_cnt++; 4093 return ret; 4094 } 4095 EXPORT_SYMBOL_GPL(pci_dev_reset_iommu_prepare); 4096 4097 static int __group_device_cmp_dma_alias(struct pci_dev *dev, u16 alias, 4098 void *data) 4099 { 4100 return alias == *(u16 *)data; 4101 } 4102 4103 static int group_device_cmp_dma_alias(struct pci_dev *dev, u16 alias, 4104 void *data) 4105 { 4106 return pci_for_each_dma_alias(data, __group_device_cmp_dma_alias, 4107 &alias); 4108 } 4109 4110 static bool group_device_dma_alias_is_blocked(struct iommu_group *group, 4111 struct group_device *gdev) 4112 { 4113 struct group_device *sibling; 4114 4115 lockdep_assert_held(&group->mutex); 4116 4117 if (!dev_is_pci(gdev->dev)) 4118 return false; 4119 4120 for_each_group_device(group, sibling) { 4121 if (sibling == gdev || !sibling->blocked || 4122 !dev_is_pci(sibling->dev)) 4123 continue; 4124 if (pci_for_each_dma_alias(to_pci_dev(gdev->dev), 4125 group_device_cmp_dma_alias, 4126 to_pci_dev(sibling->dev))) 4127 return true; 4128 } 4129 return false; 4130 } 4131 4132 /** 4133 * pci_dev_reset_iommu_done() - Restore IOMMU after a PCI device reset is done 4134 * @pdev: PCI device that has finished a reset routine 4135 * 4136 * After a PCIe device finishes a reset routine, it wants to restore its IOMMU 4137 * activity, including new translation and cache invalidation, by re-attaching 4138 * all RID/PASID of the device back to the domains retained in the core-level 4139 * structure. 4140 * 4141 * Caller must pair it with a successful pci_dev_reset_iommu_prepare(). 4142 * 4143 * Note that, although unlikely, there is a risk that re-attaching domains might 4144 * fail due to some unexpected happening like OOM. 4145 */ 4146 void pci_dev_reset_iommu_done(struct pci_dev *pdev) 4147 { 4148 struct iommu_group *group = pdev->dev.iommu_group; 4149 struct group_device *gdev; 4150 unsigned long pasid; 4151 void *entry; 4152 4153 if (!pci_ats_supported(pdev) || !dev_has_iommu(&pdev->dev)) 4154 return; 4155 4156 guard(mutex)(&group->mutex); 4157 4158 gdev = __dev_to_gdev(&pdev->dev); 4159 if (WARN_ON(!gdev)) 4160 return; 4161 4162 /* Unbalanced done() calls would underflow the counter */ 4163 if (WARN_ON(gdev->reset_depth == 0)) 4164 return; 4165 if (--gdev->reset_depth) 4166 return; 4167 4168 if (WARN_ON(!group->blocking_domain)) 4169 return; 4170 4171 if (group_device_dma_alias_is_blocked(group, gdev)) { 4172 /* 4173 * FIXME: DMA aliased devices share the same RID, which would be 4174 * convoluted to handle, as "gdev->blocked" is not sufficient: 4175 * - "blocked" state is effectively shared across these devices 4176 * - if the core skipped the blocking on the second device, the 4177 * IOMMU driver's attachment state would diverge from the HW 4178 * state 4179 * For now, just warn and see whether real ATS use cases hit it. 4180 */ 4181 pci_warn(pdev, 4182 "DMA-aliased sibling may be prematurely unblocked\n"); 4183 } 4184 4185 /* 4186 * Re-attach RID domain back to group->domain 4187 * 4188 * Leave the device parked in the blocking_domain if group->domain isn't 4189 * initialized yet 4190 */ 4191 if (group->domain && group->domain != group->blocking_domain) { 4192 WARN_ON(__iommu_attach_device(group->domain, &pdev->dev, 4193 group->blocking_domain)); 4194 } 4195 4196 /* 4197 * Update gdev->blocked upon the domain change, as it is used to return 4198 * the correct domain in iommu_driver_get_domain_for_dev() that might be 4199 * called in a set_dev_pasid callback function. 4200 */ 4201 gdev->blocked = false; 4202 4203 /* 4204 * Re-attach PASID domains back to the domains retained in pasid_array. 4205 * 4206 * The pasid_array is mostly fenced by group->mutex, except one reader 4207 * in iommu_attach_handle_get(), so it's safe to read without xa_lock. 4208 */ 4209 if (pdev->dev.iommu->max_pasids > 0) { 4210 xa_for_each_start(&group->pasid_array, pasid, entry, 1) { 4211 struct iommu_domain *pasid_dom = 4212 pasid_array_entry_to_domain(entry); 4213 4214 WARN_ON(pasid_dom->ops->set_dev_pasid( 4215 pasid_dom, &pdev->dev, pasid, 4216 group->blocking_domain)); 4217 } 4218 } 4219 4220 if (!WARN_ON(group->recovery_cnt == 0)) 4221 group->recovery_cnt--; 4222 } 4223 EXPORT_SYMBOL_GPL(pci_dev_reset_iommu_done); 4224 4225 #if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU) 4226 /** 4227 * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU domain 4228 * @desc: MSI descriptor, will store the MSI page 4229 * @msi_addr: MSI target address to be mapped 4230 * 4231 * The implementation of sw_msi() should take msi_addr and map it to 4232 * an IOVA in the domain and call msi_desc_set_iommu_msi_iova() with the 4233 * mapping information. 4234 * 4235 * Return: 0 on success or negative error code if the mapping failed. 4236 */ 4237 int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) 4238 { 4239 struct device *dev = msi_desc_to_dev(desc); 4240 struct iommu_group *group = dev->iommu_group; 4241 int ret = 0; 4242 4243 if (!group) 4244 return 0; 4245 4246 mutex_lock(&group->mutex); 4247 /* An IDENTITY domain must pass through */ 4248 if (group->domain && group->domain->type != IOMMU_DOMAIN_IDENTITY) { 4249 switch (group->domain->cookie_type) { 4250 case IOMMU_COOKIE_DMA_MSI: 4251 case IOMMU_COOKIE_DMA_IOVA: 4252 ret = iommu_dma_sw_msi(group->domain, desc, msi_addr); 4253 break; 4254 case IOMMU_COOKIE_IOMMUFD: 4255 ret = iommufd_sw_msi(group->domain, desc, msi_addr); 4256 break; 4257 default: 4258 ret = -EOPNOTSUPP; 4259 break; 4260 } 4261 } 4262 mutex_unlock(&group->mutex); 4263 return ret; 4264 } 4265 #endif /* CONFIG_IRQ_MSI_IOMMU */ 4266