1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. 4 * Author: Joerg Roedel <jroedel@suse.de> 5 */ 6 7 #define pr_fmt(fmt) "iommu: " fmt 8 9 #include <linux/amba/bus.h> 10 #include <linux/device.h> 11 #include <linux/kernel.h> 12 #include <linux/bits.h> 13 #include <linux/bug.h> 14 #include <linux/types.h> 15 #include <linux/init.h> 16 #include <linux/export.h> 17 #include <linux/slab.h> 18 #include <linux/errno.h> 19 #include <linux/host1x_context_bus.h> 20 #include <linux/iommu.h> 21 #include <linux/iommufd.h> 22 #include <linux/idr.h> 23 #include <linux/err.h> 24 #include <linux/pci.h> 25 #include <linux/pci-ats.h> 26 #include <linux/bitops.h> 27 #include <linux/platform_device.h> 28 #include <linux/property.h> 29 #include <linux/fsl/mc.h> 30 #include <linux/module.h> 31 #include <linux/cc_platform.h> 32 #include <linux/cdx/cdx_bus.h> 33 #include <trace/events/iommu.h> 34 #include <linux/sched/mm.h> 35 #include <linux/msi.h> 36 #include <uapi/linux/iommufd.h> 37 38 #include "dma-iommu.h" 39 #include "iommu-priv.h" 40 41 static struct kset *iommu_group_kset; 42 static DEFINE_IDA(iommu_group_ida); 43 static DEFINE_IDA(iommu_global_pasid_ida); 44 45 static unsigned int iommu_def_domain_type __read_mostly; 46 static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT); 47 static u32 iommu_cmd_line __read_mostly; 48 49 /* Tags used with xa_tag_pointer() in group->pasid_array */ 50 enum { IOMMU_PASID_ARRAY_DOMAIN = 0, IOMMU_PASID_ARRAY_HANDLE = 1 }; 51 52 struct iommu_group { 53 struct kobject kobj; 54 struct kobject *devices_kobj; 55 struct list_head devices; 56 struct xarray pasid_array; 57 struct mutex mutex; 58 void *iommu_data; 59 void (*iommu_data_release)(void *iommu_data); 60 char *name; 61 int id; 62 struct iommu_domain *default_domain; 63 struct iommu_domain *blocking_domain; 64 /* 65 * During a group device reset, @resetting_domain points to the physical 66 * domain, while @domain points to the attached domain before the reset. 67 */ 68 struct iommu_domain *resetting_domain; 69 struct iommu_domain *domain; 70 struct list_head entry; 71 unsigned int owner_cnt; 72 void *owner; 73 }; 74 75 struct group_device { 76 struct list_head list; 77 struct device *dev; 78 char *name; 79 }; 80 81 /* Iterate over each struct group_device in a struct iommu_group */ 82 #define for_each_group_device(group, pos) \ 83 list_for_each_entry(pos, &(group)->devices, list) 84 85 struct iommu_group_attribute { 86 struct attribute attr; 87 ssize_t (*show)(struct iommu_group *group, char *buf); 88 ssize_t (*store)(struct iommu_group *group, 89 const char *buf, size_t count); 90 }; 91 92 static const char * const iommu_group_resv_type_string[] = { 93 [IOMMU_RESV_DIRECT] = "direct", 94 [IOMMU_RESV_DIRECT_RELAXABLE] = "direct-relaxable", 95 [IOMMU_RESV_RESERVED] = "reserved", 96 [IOMMU_RESV_MSI] = "msi", 97 [IOMMU_RESV_SW_MSI] = "msi", 98 }; 99 100 #define IOMMU_CMD_LINE_DMA_API BIT(0) 101 #define IOMMU_CMD_LINE_STRICT BIT(1) 102 103 static int bus_iommu_probe(const struct bus_type *bus); 104 static int iommu_bus_notifier(struct notifier_block *nb, 105 unsigned long action, void *data); 106 static void iommu_release_device(struct device *dev); 107 static int __iommu_attach_device(struct iommu_domain *domain, 108 struct device *dev, struct iommu_domain *old); 109 static int __iommu_attach_group(struct iommu_domain *domain, 110 struct iommu_group *group); 111 static struct iommu_domain *__iommu_paging_domain_alloc_flags(struct device *dev, 112 unsigned int type, 113 unsigned int flags); 114 115 enum { 116 IOMMU_SET_DOMAIN_MUST_SUCCEED = 1 << 0, 117 }; 118 119 static int __iommu_device_set_domain(struct iommu_group *group, 120 struct device *dev, 121 struct iommu_domain *new_domain, 122 struct iommu_domain *old_domain, 123 unsigned int flags); 124 static int __iommu_group_set_domain_internal(struct iommu_group *group, 125 struct iommu_domain *new_domain, 126 unsigned int flags); 127 static int __iommu_group_set_domain(struct iommu_group *group, 128 struct iommu_domain *new_domain) 129 { 130 return __iommu_group_set_domain_internal(group, new_domain, 0); 131 } 132 static void __iommu_group_set_domain_nofail(struct iommu_group *group, 133 struct iommu_domain *new_domain) 134 { 135 WARN_ON(__iommu_group_set_domain_internal( 136 group, new_domain, IOMMU_SET_DOMAIN_MUST_SUCCEED)); 137 } 138 139 static int iommu_setup_default_domain(struct iommu_group *group, 140 int target_type); 141 static int iommu_create_device_direct_mappings(struct iommu_domain *domain, 142 struct device *dev); 143 static ssize_t iommu_group_store_type(struct iommu_group *group, 144 const char *buf, size_t count); 145 static struct group_device *iommu_group_alloc_device(struct iommu_group *group, 146 struct device *dev); 147 static void __iommu_group_free_device(struct iommu_group *group, 148 struct group_device *grp_dev); 149 static void iommu_domain_init(struct iommu_domain *domain, unsigned int type, 150 const struct iommu_ops *ops); 151 152 #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \ 153 struct iommu_group_attribute iommu_group_attr_##_name = \ 154 __ATTR(_name, _mode, _show, _store) 155 156 #define to_iommu_group_attr(_attr) \ 157 container_of(_attr, struct iommu_group_attribute, attr) 158 #define to_iommu_group(_kobj) \ 159 container_of(_kobj, struct iommu_group, kobj) 160 161 static LIST_HEAD(iommu_device_list); 162 static DEFINE_SPINLOCK(iommu_device_lock); 163 164 static const struct bus_type * const iommu_buses[] = { 165 &platform_bus_type, 166 #ifdef CONFIG_PCI 167 &pci_bus_type, 168 #endif 169 #ifdef CONFIG_ARM_AMBA 170 &amba_bustype, 171 #endif 172 #ifdef CONFIG_FSL_MC_BUS 173 &fsl_mc_bus_type, 174 #endif 175 #ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS 176 &host1x_context_device_bus_type, 177 #endif 178 #ifdef CONFIG_CDX_BUS 179 &cdx_bus_type, 180 #endif 181 }; 182 183 /* 184 * Use a function instead of an array here because the domain-type is a 185 * bit-field, so an array would waste memory. 186 */ 187 static const char *iommu_domain_type_str(unsigned int t) 188 { 189 switch (t) { 190 case IOMMU_DOMAIN_BLOCKED: 191 return "Blocked"; 192 case IOMMU_DOMAIN_IDENTITY: 193 return "Passthrough"; 194 case IOMMU_DOMAIN_UNMANAGED: 195 return "Unmanaged"; 196 case IOMMU_DOMAIN_DMA: 197 case IOMMU_DOMAIN_DMA_FQ: 198 return "Translated"; 199 case IOMMU_DOMAIN_PLATFORM: 200 return "Platform"; 201 default: 202 return "Unknown"; 203 } 204 } 205 206 static int __init iommu_subsys_init(void) 207 { 208 struct notifier_block *nb; 209 210 if (!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API)) { 211 if (IS_ENABLED(CONFIG_IOMMU_DEFAULT_PASSTHROUGH)) 212 iommu_set_default_passthrough(false); 213 else 214 iommu_set_default_translated(false); 215 216 if (iommu_default_passthrough() && cc_platform_has(CC_ATTR_MEM_ENCRYPT)) { 217 pr_info("Memory encryption detected - Disabling default IOMMU Passthrough\n"); 218 iommu_set_default_translated(false); 219 } 220 } 221 222 if (!iommu_default_passthrough() && !iommu_dma_strict) 223 iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ; 224 225 pr_info("Default domain type: %s%s\n", 226 iommu_domain_type_str(iommu_def_domain_type), 227 (iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ? 228 " (set via kernel command line)" : ""); 229 230 if (!iommu_default_passthrough()) 231 pr_info("DMA domain TLB invalidation policy: %s mode%s\n", 232 iommu_dma_strict ? "strict" : "lazy", 233 (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ? 234 " (set via kernel command line)" : ""); 235 236 nb = kzalloc_objs(*nb, ARRAY_SIZE(iommu_buses)); 237 if (!nb) 238 return -ENOMEM; 239 240 iommu_debug_init(); 241 242 for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) { 243 nb[i].notifier_call = iommu_bus_notifier; 244 bus_register_notifier(iommu_buses[i], &nb[i]); 245 } 246 247 return 0; 248 } 249 subsys_initcall(iommu_subsys_init); 250 251 static int remove_iommu_group(struct device *dev, void *data) 252 { 253 if (dev->iommu && dev->iommu->iommu_dev == data) 254 iommu_release_device(dev); 255 256 return 0; 257 } 258 259 /** 260 * iommu_device_register() - Register an IOMMU hardware instance 261 * @iommu: IOMMU handle for the instance 262 * @ops: IOMMU ops to associate with the instance 263 * @hwdev: (optional) actual instance device, used for fwnode lookup 264 * 265 * Return: 0 on success, or an error. 266 */ 267 int iommu_device_register(struct iommu_device *iommu, 268 const struct iommu_ops *ops, struct device *hwdev) 269 { 270 int err = 0; 271 272 /* We need to be able to take module references appropriately */ 273 if (WARN_ON(is_module_address((unsigned long)ops) && !ops->owner)) 274 return -EINVAL; 275 276 iommu->ops = ops; 277 if (hwdev) 278 iommu->fwnode = dev_fwnode(hwdev); 279 280 spin_lock(&iommu_device_lock); 281 list_add_tail(&iommu->list, &iommu_device_list); 282 spin_unlock(&iommu_device_lock); 283 284 for (int i = 0; i < ARRAY_SIZE(iommu_buses) && !err; i++) 285 err = bus_iommu_probe(iommu_buses[i]); 286 if (err) 287 iommu_device_unregister(iommu); 288 else 289 WRITE_ONCE(iommu->ready, true); 290 return err; 291 } 292 EXPORT_SYMBOL_GPL(iommu_device_register); 293 294 void iommu_device_unregister(struct iommu_device *iommu) 295 { 296 for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) 297 bus_for_each_dev(iommu_buses[i], NULL, iommu, remove_iommu_group); 298 299 spin_lock(&iommu_device_lock); 300 list_del(&iommu->list); 301 spin_unlock(&iommu_device_lock); 302 303 /* Pairs with the alloc in generic_single_device_group() */ 304 iommu_group_put(iommu->singleton_group); 305 iommu->singleton_group = NULL; 306 } 307 EXPORT_SYMBOL_GPL(iommu_device_unregister); 308 309 #if IS_ENABLED(CONFIG_IOMMUFD_TEST) 310 void iommu_device_unregister_bus(struct iommu_device *iommu, 311 const struct bus_type *bus, 312 struct notifier_block *nb) 313 { 314 bus_unregister_notifier(bus, nb); 315 fwnode_remove_software_node(iommu->fwnode); 316 iommu_device_unregister(iommu); 317 } 318 EXPORT_SYMBOL_GPL(iommu_device_unregister_bus); 319 320 /* 321 * Register an iommu driver against a single bus. This is only used by iommufd 322 * selftest to create a mock iommu driver. The caller must provide 323 * some memory to hold a notifier_block. 324 */ 325 int iommu_device_register_bus(struct iommu_device *iommu, 326 const struct iommu_ops *ops, 327 const struct bus_type *bus, 328 struct notifier_block *nb) 329 { 330 int err; 331 332 iommu->ops = ops; 333 nb->notifier_call = iommu_bus_notifier; 334 err = bus_register_notifier(bus, nb); 335 if (err) 336 return err; 337 338 iommu->fwnode = fwnode_create_software_node(NULL, NULL); 339 if (IS_ERR(iommu->fwnode)) { 340 bus_unregister_notifier(bus, nb); 341 return PTR_ERR(iommu->fwnode); 342 } 343 344 spin_lock(&iommu_device_lock); 345 list_add_tail(&iommu->list, &iommu_device_list); 346 spin_unlock(&iommu_device_lock); 347 348 err = bus_iommu_probe(bus); 349 if (err) { 350 iommu_device_unregister_bus(iommu, bus, nb); 351 return err; 352 } 353 WRITE_ONCE(iommu->ready, true); 354 return 0; 355 } 356 EXPORT_SYMBOL_GPL(iommu_device_register_bus); 357 358 int iommu_mock_device_add(struct device *dev, struct iommu_device *iommu) 359 { 360 int rc; 361 362 mutex_lock(&iommu_probe_device_lock); 363 rc = iommu_fwspec_init(dev, iommu->fwnode); 364 mutex_unlock(&iommu_probe_device_lock); 365 366 if (rc) 367 return rc; 368 369 rc = device_add(dev); 370 if (rc) 371 iommu_fwspec_free(dev); 372 return rc; 373 } 374 EXPORT_SYMBOL_GPL(iommu_mock_device_add); 375 #endif 376 377 static struct dev_iommu *dev_iommu_get(struct device *dev) 378 { 379 struct dev_iommu *param = dev->iommu; 380 381 lockdep_assert_held(&iommu_probe_device_lock); 382 383 if (param) 384 return param; 385 386 param = kzalloc_obj(*param); 387 if (!param) 388 return NULL; 389 390 mutex_init(¶m->lock); 391 dev->iommu = param; 392 return param; 393 } 394 395 void dev_iommu_free(struct device *dev) 396 { 397 struct dev_iommu *param = dev->iommu; 398 399 dev->iommu = NULL; 400 if (param->fwspec) { 401 fwnode_handle_put(param->fwspec->iommu_fwnode); 402 kfree(param->fwspec); 403 } 404 kfree(param); 405 } 406 407 /* 408 * Internal equivalent of device_iommu_mapped() for when we care that a device 409 * actually has API ops, and don't want false positives from VFIO-only groups. 410 */ 411 static bool dev_has_iommu(struct device *dev) 412 { 413 return dev->iommu && dev->iommu->iommu_dev; 414 } 415 416 static u32 dev_iommu_get_max_pasids(struct device *dev) 417 { 418 u32 max_pasids = 0, bits = 0; 419 int ret; 420 421 if (dev_is_pci(dev)) { 422 ret = pci_max_pasids(to_pci_dev(dev)); 423 if (ret > 0) 424 max_pasids = ret; 425 } else { 426 ret = device_property_read_u32(dev, "pasid-num-bits", &bits); 427 if (!ret) 428 max_pasids = 1UL << bits; 429 } 430 431 return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids); 432 } 433 434 void dev_iommu_priv_set(struct device *dev, void *priv) 435 { 436 /* FSL_PAMU does something weird */ 437 if (!IS_ENABLED(CONFIG_FSL_PAMU)) 438 lockdep_assert_held(&iommu_probe_device_lock); 439 dev->iommu->priv = priv; 440 } 441 EXPORT_SYMBOL_GPL(dev_iommu_priv_set); 442 443 /* 444 * Init the dev->iommu and dev->iommu_group in the struct device and get the 445 * driver probed 446 */ 447 static int iommu_init_device(struct device *dev) 448 { 449 const struct iommu_ops *ops; 450 struct iommu_device *iommu_dev; 451 struct iommu_group *group; 452 int ret; 453 454 if (!dev_iommu_get(dev)) 455 return -ENOMEM; 456 /* 457 * For FDT-based systems and ACPI IORT/VIOT, the common firmware parsing 458 * is buried in the bus dma_configure path. Properly unpicking that is 459 * still a big job, so for now just invoke the whole thing. The device 460 * already having a driver bound means dma_configure has already run and 461 * found no IOMMU to wait for, so there's no point calling it again. 462 */ 463 if (!dev->iommu->fwspec && !dev->driver && dev->bus->dma_configure) { 464 mutex_unlock(&iommu_probe_device_lock); 465 dev->bus->dma_configure(dev); 466 mutex_lock(&iommu_probe_device_lock); 467 /* If another instance finished the job for us, skip it */ 468 if (!dev->iommu || dev->iommu_group) 469 return -ENODEV; 470 } 471 /* 472 * At this point, relevant devices either now have a fwspec which will 473 * match ops registered with a non-NULL fwnode, or we can reasonably 474 * assume that only one of Intel, AMD, s390, PAMU or legacy SMMUv2 can 475 * be present, and that any of their registered instances has suitable 476 * ops for probing, and thus cheekily co-opt the same mechanism. 477 */ 478 ops = iommu_fwspec_ops(dev->iommu->fwspec); 479 if (!ops) { 480 ret = -ENODEV; 481 goto err_free; 482 } 483 484 if (!try_module_get(ops->owner)) { 485 ret = -EINVAL; 486 goto err_free; 487 } 488 489 iommu_dev = ops->probe_device(dev); 490 if (IS_ERR(iommu_dev)) { 491 ret = PTR_ERR(iommu_dev); 492 goto err_module_put; 493 } 494 dev->iommu->iommu_dev = iommu_dev; 495 496 ret = iommu_device_link(iommu_dev, dev); 497 if (ret) 498 goto err_release; 499 500 group = ops->device_group(dev); 501 if (WARN_ON_ONCE(group == NULL)) 502 group = ERR_PTR(-EINVAL); 503 if (IS_ERR(group)) { 504 ret = PTR_ERR(group); 505 goto err_unlink; 506 } 507 dev->iommu_group = group; 508 509 dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev); 510 if (ops->is_attach_deferred) 511 dev->iommu->attach_deferred = ops->is_attach_deferred(dev); 512 return 0; 513 514 err_unlink: 515 iommu_device_unlink(iommu_dev, dev); 516 err_release: 517 if (ops->release_device) 518 ops->release_device(dev); 519 err_module_put: 520 module_put(ops->owner); 521 err_free: 522 dev->iommu->iommu_dev = NULL; 523 dev_iommu_free(dev); 524 return ret; 525 } 526 527 static void iommu_deinit_device(struct device *dev) 528 { 529 struct iommu_group *group = dev->iommu_group; 530 const struct iommu_ops *ops = dev_iommu_ops(dev); 531 532 lockdep_assert_held(&group->mutex); 533 534 iommu_device_unlink(dev->iommu->iommu_dev, dev); 535 536 /* 537 * release_device() must stop using any attached domain on the device. 538 * If there are still other devices in the group, they are not affected 539 * by this callback. 540 * 541 * If the iommu driver provides release_domain, the core code ensures 542 * that domain is attached prior to calling release_device. Drivers can 543 * use this to enforce a translation on the idle iommu. Typically, the 544 * global static blocked_domain is a good choice. 545 * 546 * Otherwise, the iommu driver must set the device to either an identity 547 * or a blocking translation in release_device() and stop using any 548 * domain pointer, as it is going to be freed. 549 * 550 * Regardless, if a delayed attach never occurred, then the release 551 * should still avoid touching any hardware configuration either. 552 */ 553 if (!dev->iommu->attach_deferred && ops->release_domain) { 554 struct iommu_domain *release_domain = ops->release_domain; 555 556 /* 557 * If the device requires direct mappings then it should not 558 * be parked on a BLOCKED domain during release as that would 559 * break the direct mappings. 560 */ 561 if (dev->iommu->require_direct && ops->identity_domain && 562 release_domain == ops->blocked_domain) 563 release_domain = ops->identity_domain; 564 565 release_domain->ops->attach_dev(release_domain, dev, 566 group->domain); 567 } 568 569 if (ops->release_device) 570 ops->release_device(dev); 571 572 /* 573 * If this is the last driver to use the group then we must free the 574 * domains before we do the module_put(). 575 */ 576 if (list_empty(&group->devices)) { 577 if (group->default_domain) { 578 iommu_domain_free(group->default_domain); 579 group->default_domain = NULL; 580 } 581 if (group->blocking_domain) { 582 iommu_domain_free(group->blocking_domain); 583 group->blocking_domain = NULL; 584 } 585 group->domain = NULL; 586 } 587 588 /* Caller must put iommu_group */ 589 dev->iommu_group = NULL; 590 module_put(ops->owner); 591 dev_iommu_free(dev); 592 #ifdef CONFIG_IOMMU_DMA 593 dev->dma_iommu = false; 594 #endif 595 } 596 597 static struct iommu_domain *pasid_array_entry_to_domain(void *entry) 598 { 599 if (xa_pointer_tag(entry) == IOMMU_PASID_ARRAY_DOMAIN) 600 return xa_untag_pointer(entry); 601 return ((struct iommu_attach_handle *)xa_untag_pointer(entry))->domain; 602 } 603 604 DEFINE_MUTEX(iommu_probe_device_lock); 605 606 static int __iommu_probe_device(struct device *dev, struct list_head *group_list) 607 { 608 struct iommu_group *group; 609 struct group_device *gdev; 610 int ret; 611 612 /* 613 * Serialise to avoid races between IOMMU drivers registering in 614 * parallel and/or the "replay" calls from ACPI/OF code via client 615 * driver probe. Once the latter have been cleaned up we should 616 * probably be able to use device_lock() here to minimise the scope, 617 * but for now enforcing a simple global ordering is fine. 618 */ 619 lockdep_assert_held(&iommu_probe_device_lock); 620 621 /* Device is probed already if in a group */ 622 if (dev->iommu_group) 623 return 0; 624 625 ret = iommu_init_device(dev); 626 if (ret) 627 return ret; 628 /* 629 * And if we do now see any replay calls, they would indicate someone 630 * misusing the dma_configure path outside bus code. 631 */ 632 if (dev->driver) 633 dev_WARN(dev, "late IOMMU probe at driver bind, something fishy here!\n"); 634 635 group = dev->iommu_group; 636 gdev = iommu_group_alloc_device(group, dev); 637 mutex_lock(&group->mutex); 638 if (IS_ERR(gdev)) { 639 ret = PTR_ERR(gdev); 640 goto err_put_group; 641 } 642 643 /* 644 * The gdev must be in the list before calling 645 * iommu_setup_default_domain() 646 */ 647 list_add_tail(&gdev->list, &group->devices); 648 WARN_ON(group->default_domain && !group->domain); 649 if (group->default_domain) 650 iommu_create_device_direct_mappings(group->default_domain, dev); 651 if (group->domain) { 652 ret = __iommu_device_set_domain(group, dev, group->domain, NULL, 653 0); 654 if (ret) 655 goto err_remove_gdev; 656 } else if (!group->default_domain && !group_list) { 657 ret = iommu_setup_default_domain(group, 0); 658 if (ret) 659 goto err_remove_gdev; 660 } else if (!group->default_domain) { 661 /* 662 * With a group_list argument we defer the default_domain setup 663 * to the caller by providing a de-duplicated list of groups 664 * that need further setup. 665 */ 666 if (list_empty(&group->entry)) 667 list_add_tail(&group->entry, group_list); 668 } 669 670 if (group->default_domain) 671 iommu_setup_dma_ops(dev, group->default_domain); 672 673 mutex_unlock(&group->mutex); 674 675 return 0; 676 677 err_remove_gdev: 678 list_del(&gdev->list); 679 __iommu_group_free_device(group, gdev); 680 err_put_group: 681 iommu_deinit_device(dev); 682 mutex_unlock(&group->mutex); 683 iommu_group_put(group); 684 685 return ret; 686 } 687 688 int iommu_probe_device(struct device *dev) 689 { 690 const struct iommu_ops *ops; 691 int ret; 692 693 mutex_lock(&iommu_probe_device_lock); 694 ret = __iommu_probe_device(dev, NULL); 695 mutex_unlock(&iommu_probe_device_lock); 696 if (ret) 697 return ret; 698 699 ops = dev_iommu_ops(dev); 700 if (ops->probe_finalize) 701 ops->probe_finalize(dev); 702 703 return 0; 704 } 705 706 static void __iommu_group_free_device(struct iommu_group *group, 707 struct group_device *grp_dev) 708 { 709 struct device *dev = grp_dev->dev; 710 711 sysfs_remove_link(group->devices_kobj, grp_dev->name); 712 sysfs_remove_link(&dev->kobj, "iommu_group"); 713 714 trace_remove_device_from_group(group->id, dev); 715 716 /* 717 * If the group has become empty then ownership must have been 718 * released, and the current domain must be set back to NULL or 719 * the default domain. 720 */ 721 if (list_empty(&group->devices)) 722 WARN_ON(group->owner_cnt || 723 group->domain != group->default_domain); 724 725 kfree(grp_dev->name); 726 kfree(grp_dev); 727 } 728 729 /* Remove the iommu_group from the struct device. */ 730 static void __iommu_group_remove_device(struct device *dev) 731 { 732 struct iommu_group *group = dev->iommu_group; 733 struct group_device *device; 734 735 mutex_lock(&group->mutex); 736 for_each_group_device(group, device) { 737 if (device->dev != dev) 738 continue; 739 740 list_del(&device->list); 741 __iommu_group_free_device(group, device); 742 if (dev_has_iommu(dev)) 743 iommu_deinit_device(dev); 744 else 745 dev->iommu_group = NULL; 746 break; 747 } 748 mutex_unlock(&group->mutex); 749 750 /* 751 * Pairs with the get in iommu_init_device() or 752 * iommu_group_add_device() 753 */ 754 iommu_group_put(group); 755 } 756 757 static void iommu_release_device(struct device *dev) 758 { 759 struct iommu_group *group = dev->iommu_group; 760 761 if (group) 762 __iommu_group_remove_device(dev); 763 764 /* Free any fwspec if no iommu_driver was ever attached */ 765 if (dev->iommu) 766 dev_iommu_free(dev); 767 } 768 769 static int __init iommu_set_def_domain_type(char *str) 770 { 771 bool pt; 772 int ret; 773 774 ret = kstrtobool(str, &pt); 775 if (ret) 776 return ret; 777 778 if (pt) 779 iommu_set_default_passthrough(true); 780 else 781 iommu_set_default_translated(true); 782 783 return 0; 784 } 785 early_param("iommu.passthrough", iommu_set_def_domain_type); 786 787 static int __init iommu_dma_setup(char *str) 788 { 789 int ret = kstrtobool(str, &iommu_dma_strict); 790 791 if (!ret) 792 iommu_cmd_line |= IOMMU_CMD_LINE_STRICT; 793 return ret; 794 } 795 early_param("iommu.strict", iommu_dma_setup); 796 797 void iommu_set_dma_strict(void) 798 { 799 iommu_dma_strict = true; 800 if (iommu_def_domain_type == IOMMU_DOMAIN_DMA_FQ) 801 iommu_def_domain_type = IOMMU_DOMAIN_DMA; 802 } 803 804 static ssize_t iommu_group_attr_show(struct kobject *kobj, 805 struct attribute *__attr, char *buf) 806 { 807 struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); 808 struct iommu_group *group = to_iommu_group(kobj); 809 ssize_t ret = -EIO; 810 811 if (attr->show) 812 ret = attr->show(group, buf); 813 return ret; 814 } 815 816 static ssize_t iommu_group_attr_store(struct kobject *kobj, 817 struct attribute *__attr, 818 const char *buf, size_t count) 819 { 820 struct iommu_group_attribute *attr = to_iommu_group_attr(__attr); 821 struct iommu_group *group = to_iommu_group(kobj); 822 ssize_t ret = -EIO; 823 824 if (attr->store) 825 ret = attr->store(group, buf, count); 826 return ret; 827 } 828 829 static const struct sysfs_ops iommu_group_sysfs_ops = { 830 .show = iommu_group_attr_show, 831 .store = iommu_group_attr_store, 832 }; 833 834 static int iommu_group_create_file(struct iommu_group *group, 835 struct iommu_group_attribute *attr) 836 { 837 return sysfs_create_file(&group->kobj, &attr->attr); 838 } 839 840 static void iommu_group_remove_file(struct iommu_group *group, 841 struct iommu_group_attribute *attr) 842 { 843 sysfs_remove_file(&group->kobj, &attr->attr); 844 } 845 846 static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf) 847 { 848 return sysfs_emit(buf, "%s\n", group->name); 849 } 850 851 /** 852 * iommu_insert_resv_region - Insert a new region in the 853 * list of reserved regions. 854 * @new: new region to insert 855 * @regions: list of regions 856 * 857 * Elements are sorted by start address and overlapping segments 858 * of the same type are merged. 859 */ 860 static int iommu_insert_resv_region(struct iommu_resv_region *new, 861 struct list_head *regions) 862 { 863 struct iommu_resv_region *iter, *tmp, *nr, *top; 864 LIST_HEAD(stack); 865 866 nr = iommu_alloc_resv_region(new->start, new->length, 867 new->prot, new->type, GFP_KERNEL); 868 if (!nr) 869 return -ENOMEM; 870 871 /* First add the new element based on start address sorting */ 872 list_for_each_entry(iter, regions, list) { 873 if (nr->start < iter->start || 874 (nr->start == iter->start && nr->type <= iter->type)) 875 break; 876 } 877 list_add_tail(&nr->list, &iter->list); 878 879 /* Merge overlapping segments of type nr->type in @regions, if any */ 880 list_for_each_entry_safe(iter, tmp, regions, list) { 881 phys_addr_t top_end, iter_end = iter->start + iter->length - 1; 882 883 /* no merge needed on elements of different types than @new */ 884 if (iter->type != new->type) { 885 list_move_tail(&iter->list, &stack); 886 continue; 887 } 888 889 /* look for the last stack element of same type as @iter */ 890 list_for_each_entry_reverse(top, &stack, list) 891 if (top->type == iter->type) 892 goto check_overlap; 893 894 list_move_tail(&iter->list, &stack); 895 continue; 896 897 check_overlap: 898 top_end = top->start + top->length - 1; 899 900 if (iter->start > top_end + 1) { 901 list_move_tail(&iter->list, &stack); 902 } else { 903 top->length = max(top_end, iter_end) - top->start + 1; 904 list_del(&iter->list); 905 kfree(iter); 906 } 907 } 908 list_splice(&stack, regions); 909 return 0; 910 } 911 912 static int 913 iommu_insert_device_resv_regions(struct list_head *dev_resv_regions, 914 struct list_head *group_resv_regions) 915 { 916 struct iommu_resv_region *entry; 917 int ret = 0; 918 919 list_for_each_entry(entry, dev_resv_regions, list) { 920 ret = iommu_insert_resv_region(entry, group_resv_regions); 921 if (ret) 922 break; 923 } 924 return ret; 925 } 926 927 int iommu_get_group_resv_regions(struct iommu_group *group, 928 struct list_head *head) 929 { 930 struct group_device *device; 931 int ret = 0; 932 933 mutex_lock(&group->mutex); 934 for_each_group_device(group, device) { 935 struct list_head dev_resv_regions; 936 937 /* 938 * Non-API groups still expose reserved_regions in sysfs, 939 * so filter out calls that get here that way. 940 */ 941 if (!dev_has_iommu(device->dev)) 942 break; 943 944 INIT_LIST_HEAD(&dev_resv_regions); 945 iommu_get_resv_regions(device->dev, &dev_resv_regions); 946 ret = iommu_insert_device_resv_regions(&dev_resv_regions, head); 947 iommu_put_resv_regions(device->dev, &dev_resv_regions); 948 if (ret) 949 break; 950 } 951 mutex_unlock(&group->mutex); 952 return ret; 953 } 954 EXPORT_SYMBOL_GPL(iommu_get_group_resv_regions); 955 956 static ssize_t iommu_group_show_resv_regions(struct iommu_group *group, 957 char *buf) 958 { 959 struct iommu_resv_region *region, *next; 960 struct list_head group_resv_regions; 961 int offset = 0; 962 963 INIT_LIST_HEAD(&group_resv_regions); 964 iommu_get_group_resv_regions(group, &group_resv_regions); 965 966 list_for_each_entry_safe(region, next, &group_resv_regions, list) { 967 offset += sysfs_emit_at(buf, offset, "0x%016llx 0x%016llx %s\n", 968 (long long)region->start, 969 (long long)(region->start + 970 region->length - 1), 971 iommu_group_resv_type_string[region->type]); 972 kfree(region); 973 } 974 975 return offset; 976 } 977 978 static ssize_t iommu_group_show_type(struct iommu_group *group, 979 char *buf) 980 { 981 char *type = "unknown"; 982 983 mutex_lock(&group->mutex); 984 if (group->default_domain) { 985 switch (group->default_domain->type) { 986 case IOMMU_DOMAIN_BLOCKED: 987 type = "blocked"; 988 break; 989 case IOMMU_DOMAIN_IDENTITY: 990 type = "identity"; 991 break; 992 case IOMMU_DOMAIN_UNMANAGED: 993 type = "unmanaged"; 994 break; 995 case IOMMU_DOMAIN_DMA: 996 type = "DMA"; 997 break; 998 case IOMMU_DOMAIN_DMA_FQ: 999 type = "DMA-FQ"; 1000 break; 1001 } 1002 } 1003 mutex_unlock(&group->mutex); 1004 1005 return sysfs_emit(buf, "%s\n", type); 1006 } 1007 1008 static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL); 1009 1010 static IOMMU_GROUP_ATTR(reserved_regions, 0444, 1011 iommu_group_show_resv_regions, NULL); 1012 1013 static IOMMU_GROUP_ATTR(type, 0644, iommu_group_show_type, 1014 iommu_group_store_type); 1015 1016 static void iommu_group_release(struct kobject *kobj) 1017 { 1018 struct iommu_group *group = to_iommu_group(kobj); 1019 1020 pr_debug("Releasing group %d\n", group->id); 1021 1022 if (group->iommu_data_release) 1023 group->iommu_data_release(group->iommu_data); 1024 1025 ida_free(&iommu_group_ida, group->id); 1026 1027 /* Domains are free'd by iommu_deinit_device() */ 1028 WARN_ON(group->default_domain); 1029 WARN_ON(group->blocking_domain); 1030 1031 kfree(group->name); 1032 kfree(group); 1033 } 1034 1035 static const struct kobj_type iommu_group_ktype = { 1036 .sysfs_ops = &iommu_group_sysfs_ops, 1037 .release = iommu_group_release, 1038 }; 1039 1040 /** 1041 * iommu_group_alloc - Allocate a new group 1042 * 1043 * This function is called by an iommu driver to allocate a new iommu 1044 * group. The iommu group represents the minimum granularity of the iommu. 1045 * Upon successful return, the caller holds a reference to the supplied 1046 * group in order to hold the group until devices are added. Use 1047 * iommu_group_put() to release this extra reference count, allowing the 1048 * group to be automatically reclaimed once it has no devices or external 1049 * references. 1050 */ 1051 struct iommu_group *iommu_group_alloc(void) 1052 { 1053 struct iommu_group *group; 1054 int ret; 1055 1056 group = kzalloc_obj(*group); 1057 if (!group) 1058 return ERR_PTR(-ENOMEM); 1059 1060 group->kobj.kset = iommu_group_kset; 1061 mutex_init(&group->mutex); 1062 INIT_LIST_HEAD(&group->devices); 1063 INIT_LIST_HEAD(&group->entry); 1064 xa_init(&group->pasid_array); 1065 1066 ret = ida_alloc(&iommu_group_ida, GFP_KERNEL); 1067 if (ret < 0) { 1068 kfree(group); 1069 return ERR_PTR(ret); 1070 } 1071 group->id = ret; 1072 1073 ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype, 1074 NULL, "%d", group->id); 1075 if (ret) { 1076 kobject_put(&group->kobj); 1077 return ERR_PTR(ret); 1078 } 1079 1080 group->devices_kobj = kobject_create_and_add("devices", &group->kobj); 1081 if (!group->devices_kobj) { 1082 kobject_put(&group->kobj); /* triggers .release & free */ 1083 return ERR_PTR(-ENOMEM); 1084 } 1085 1086 /* 1087 * The devices_kobj holds a reference on the group kobject, so 1088 * as long as that exists so will the group. We can therefore 1089 * use the devices_kobj for reference counting. 1090 */ 1091 kobject_put(&group->kobj); 1092 1093 ret = iommu_group_create_file(group, 1094 &iommu_group_attr_reserved_regions); 1095 if (ret) { 1096 kobject_put(group->devices_kobj); 1097 return ERR_PTR(ret); 1098 } 1099 1100 ret = iommu_group_create_file(group, &iommu_group_attr_type); 1101 if (ret) { 1102 kobject_put(group->devices_kobj); 1103 return ERR_PTR(ret); 1104 } 1105 1106 pr_debug("Allocated group %d\n", group->id); 1107 1108 return group; 1109 } 1110 EXPORT_SYMBOL_GPL(iommu_group_alloc); 1111 1112 /** 1113 * iommu_group_get_iommudata - retrieve iommu_data registered for a group 1114 * @group: the group 1115 * 1116 * iommu drivers can store data in the group for use when doing iommu 1117 * operations. This function provides a way to retrieve it. Caller 1118 * should hold a group reference. 1119 */ 1120 void *iommu_group_get_iommudata(struct iommu_group *group) 1121 { 1122 return group->iommu_data; 1123 } 1124 EXPORT_SYMBOL_GPL(iommu_group_get_iommudata); 1125 1126 /** 1127 * iommu_group_set_iommudata - set iommu_data for a group 1128 * @group: the group 1129 * @iommu_data: new data 1130 * @release: release function for iommu_data 1131 * 1132 * iommu drivers can store data in the group for use when doing iommu 1133 * operations. This function provides a way to set the data after 1134 * the group has been allocated. Caller should hold a group reference. 1135 */ 1136 void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data, 1137 void (*release)(void *iommu_data)) 1138 { 1139 group->iommu_data = iommu_data; 1140 group->iommu_data_release = release; 1141 } 1142 EXPORT_SYMBOL_GPL(iommu_group_set_iommudata); 1143 1144 /** 1145 * iommu_group_set_name - set name for a group 1146 * @group: the group 1147 * @name: name 1148 * 1149 * Allow iommu driver to set a name for a group. When set it will 1150 * appear in a name attribute file under the group in sysfs. 1151 */ 1152 int iommu_group_set_name(struct iommu_group *group, const char *name) 1153 { 1154 int ret; 1155 1156 if (group->name) { 1157 iommu_group_remove_file(group, &iommu_group_attr_name); 1158 kfree(group->name); 1159 group->name = NULL; 1160 if (!name) 1161 return 0; 1162 } 1163 1164 group->name = kstrdup(name, GFP_KERNEL); 1165 if (!group->name) 1166 return -ENOMEM; 1167 1168 ret = iommu_group_create_file(group, &iommu_group_attr_name); 1169 if (ret) { 1170 kfree(group->name); 1171 group->name = NULL; 1172 return ret; 1173 } 1174 1175 return 0; 1176 } 1177 EXPORT_SYMBOL_GPL(iommu_group_set_name); 1178 1179 static int iommu_create_device_direct_mappings(struct iommu_domain *domain, 1180 struct device *dev) 1181 { 1182 struct iommu_resv_region *entry; 1183 LIST_HEAD(mappings); 1184 unsigned long pg_size; 1185 int ret = 0; 1186 1187 pg_size = domain->pgsize_bitmap ? 1UL << __ffs(domain->pgsize_bitmap) : 0; 1188 1189 if (WARN_ON_ONCE(iommu_is_dma_domain(domain) && !pg_size)) 1190 return -EINVAL; 1191 1192 iommu_get_resv_regions(dev, &mappings); 1193 1194 /* We need to consider overlapping regions for different devices */ 1195 list_for_each_entry(entry, &mappings, list) { 1196 dma_addr_t start, end, addr; 1197 size_t map_size = 0; 1198 1199 if (entry->type == IOMMU_RESV_DIRECT) 1200 dev->iommu->require_direct = 1; 1201 1202 if ((entry->type != IOMMU_RESV_DIRECT && 1203 entry->type != IOMMU_RESV_DIRECT_RELAXABLE) || 1204 !iommu_is_dma_domain(domain)) 1205 continue; 1206 1207 start = ALIGN(entry->start, pg_size); 1208 end = ALIGN(entry->start + entry->length, pg_size); 1209 1210 for (addr = start; addr <= end; addr += pg_size) { 1211 phys_addr_t phys_addr; 1212 1213 if (addr == end) 1214 goto map_end; 1215 1216 /* 1217 * Return address by iommu_iova_to_phys for 0 is 1218 * ambiguous. Offset to address 1 if addr is 0. 1219 */ 1220 phys_addr = iommu_iova_to_phys(domain, addr ? addr : 1); 1221 if (!phys_addr) { 1222 map_size += pg_size; 1223 continue; 1224 } 1225 1226 map_end: 1227 if (map_size) { 1228 ret = iommu_map(domain, addr - map_size, 1229 addr - map_size, map_size, 1230 entry->prot, GFP_KERNEL); 1231 if (ret) 1232 goto out; 1233 map_size = 0; 1234 } 1235 } 1236 1237 } 1238 out: 1239 iommu_put_resv_regions(dev, &mappings); 1240 1241 return ret; 1242 } 1243 1244 /* This is undone by __iommu_group_free_device() */ 1245 static struct group_device *iommu_group_alloc_device(struct iommu_group *group, 1246 struct device *dev) 1247 { 1248 int ret, i = 0; 1249 struct group_device *device; 1250 1251 device = kzalloc_obj(*device); 1252 if (!device) 1253 return ERR_PTR(-ENOMEM); 1254 1255 device->dev = dev; 1256 1257 ret = sysfs_create_link(&dev->kobj, &group->kobj, "iommu_group"); 1258 if (ret) 1259 goto err_free_device; 1260 1261 device->name = kasprintf(GFP_KERNEL, "%s", kobject_name(&dev->kobj)); 1262 rename: 1263 if (!device->name) { 1264 ret = -ENOMEM; 1265 goto err_remove_link; 1266 } 1267 1268 ret = sysfs_create_link_nowarn(group->devices_kobj, 1269 &dev->kobj, device->name); 1270 if (ret) { 1271 if (ret == -EEXIST && i >= 0) { 1272 /* 1273 * Account for the slim chance of collision 1274 * and append an instance to the name. 1275 */ 1276 kfree(device->name); 1277 device->name = kasprintf(GFP_KERNEL, "%s.%d", 1278 kobject_name(&dev->kobj), i++); 1279 goto rename; 1280 } 1281 goto err_free_name; 1282 } 1283 1284 trace_add_device_to_group(group->id, dev); 1285 1286 dev_info(dev, "Adding to iommu group %d\n", group->id); 1287 1288 return device; 1289 1290 err_free_name: 1291 kfree(device->name); 1292 err_remove_link: 1293 sysfs_remove_link(&dev->kobj, "iommu_group"); 1294 err_free_device: 1295 kfree(device); 1296 dev_err(dev, "Failed to add to iommu group %d: %d\n", group->id, ret); 1297 return ERR_PTR(ret); 1298 } 1299 1300 /** 1301 * iommu_group_add_device - add a device to an iommu group 1302 * @group: the group into which to add the device (reference should be held) 1303 * @dev: the device 1304 * 1305 * This function is called by an iommu driver to add a device into a 1306 * group. Adding a device increments the group reference count. 1307 */ 1308 int iommu_group_add_device(struct iommu_group *group, struct device *dev) 1309 { 1310 struct group_device *gdev; 1311 1312 gdev = iommu_group_alloc_device(group, dev); 1313 if (IS_ERR(gdev)) 1314 return PTR_ERR(gdev); 1315 1316 iommu_group_ref_get(group); 1317 dev->iommu_group = group; 1318 1319 mutex_lock(&group->mutex); 1320 list_add_tail(&gdev->list, &group->devices); 1321 mutex_unlock(&group->mutex); 1322 return 0; 1323 } 1324 EXPORT_SYMBOL_GPL(iommu_group_add_device); 1325 1326 /** 1327 * iommu_group_remove_device - remove a device from it's current group 1328 * @dev: device to be removed 1329 * 1330 * This function is called by an iommu driver to remove the device from 1331 * it's current group. This decrements the iommu group reference count. 1332 */ 1333 void iommu_group_remove_device(struct device *dev) 1334 { 1335 struct iommu_group *group = dev->iommu_group; 1336 1337 if (!group) 1338 return; 1339 1340 dev_info(dev, "Removing from iommu group %d\n", group->id); 1341 1342 __iommu_group_remove_device(dev); 1343 } 1344 EXPORT_SYMBOL_GPL(iommu_group_remove_device); 1345 1346 #if IS_ENABLED(CONFIG_LOCKDEP) && IS_ENABLED(CONFIG_IOMMU_API) 1347 /** 1348 * iommu_group_mutex_assert - Check device group mutex lock 1349 * @dev: the device that has group param set 1350 * 1351 * This function is called by an iommu driver to check whether it holds 1352 * group mutex lock for the given device or not. 1353 * 1354 * Note that this function must be called after device group param is set. 1355 */ 1356 void iommu_group_mutex_assert(struct device *dev) 1357 { 1358 struct iommu_group *group = dev->iommu_group; 1359 1360 lockdep_assert_held(&group->mutex); 1361 } 1362 EXPORT_SYMBOL_GPL(iommu_group_mutex_assert); 1363 #endif 1364 1365 static struct device *iommu_group_first_dev(struct iommu_group *group) 1366 { 1367 lockdep_assert_held(&group->mutex); 1368 return list_first_entry(&group->devices, struct group_device, list)->dev; 1369 } 1370 1371 /** 1372 * iommu_group_for_each_dev - iterate over each device in the group 1373 * @group: the group 1374 * @data: caller opaque data to be passed to callback function 1375 * @fn: caller supplied callback function 1376 * 1377 * This function is called by group users to iterate over group devices. 1378 * Callers should hold a reference count to the group during callback. 1379 * The group->mutex is held across callbacks, which will block calls to 1380 * iommu_group_add/remove_device. 1381 */ 1382 int iommu_group_for_each_dev(struct iommu_group *group, void *data, 1383 int (*fn)(struct device *, void *)) 1384 { 1385 struct group_device *device; 1386 int ret = 0; 1387 1388 mutex_lock(&group->mutex); 1389 for_each_group_device(group, device) { 1390 ret = fn(device->dev, data); 1391 if (ret) 1392 break; 1393 } 1394 mutex_unlock(&group->mutex); 1395 1396 return ret; 1397 } 1398 EXPORT_SYMBOL_GPL(iommu_group_for_each_dev); 1399 1400 /** 1401 * iommu_group_get - Return the group for a device and increment reference 1402 * @dev: get the group that this device belongs to 1403 * 1404 * This function is called by iommu drivers and users to get the group 1405 * for the specified device. If found, the group is returned and the group 1406 * reference in incremented, else NULL. 1407 */ 1408 struct iommu_group *iommu_group_get(struct device *dev) 1409 { 1410 struct iommu_group *group = dev->iommu_group; 1411 1412 if (group) 1413 kobject_get(group->devices_kobj); 1414 1415 return group; 1416 } 1417 EXPORT_SYMBOL_GPL(iommu_group_get); 1418 1419 /** 1420 * iommu_group_ref_get - Increment reference on a group 1421 * @group: the group to use, must not be NULL 1422 * 1423 * This function is called by iommu drivers to take additional references on an 1424 * existing group. Returns the given group for convenience. 1425 */ 1426 struct iommu_group *iommu_group_ref_get(struct iommu_group *group) 1427 { 1428 kobject_get(group->devices_kobj); 1429 return group; 1430 } 1431 EXPORT_SYMBOL_GPL(iommu_group_ref_get); 1432 1433 /** 1434 * iommu_group_put - Decrement group reference 1435 * @group: the group to use 1436 * 1437 * This function is called by iommu drivers and users to release the 1438 * iommu group. Once the reference count is zero, the group is released. 1439 */ 1440 void iommu_group_put(struct iommu_group *group) 1441 { 1442 if (group) 1443 kobject_put(group->devices_kobj); 1444 } 1445 EXPORT_SYMBOL_GPL(iommu_group_put); 1446 1447 /** 1448 * iommu_group_id - Return ID for a group 1449 * @group: the group to ID 1450 * 1451 * Return the unique ID for the group matching the sysfs group number. 1452 */ 1453 int iommu_group_id(struct iommu_group *group) 1454 { 1455 return group->id; 1456 } 1457 EXPORT_SYMBOL_GPL(iommu_group_id); 1458 1459 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, 1460 unsigned long *devfns); 1461 1462 /* 1463 * To consider a PCI device isolated, we require ACS to support Source 1464 * Validation, Request Redirection, Completer Redirection, and Upstream 1465 * Forwarding. This effectively means that devices cannot spoof their 1466 * requester ID, requests and completions cannot be redirected, and all 1467 * transactions are forwarded upstream, even as it passes through a 1468 * bridge where the target device is downstream. 1469 */ 1470 #define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) 1471 1472 /* 1473 * For multifunction devices which are not isolated from each other, find 1474 * all the other non-isolated functions and look for existing groups. For 1475 * each function, we also need to look for aliases to or from other devices 1476 * that may already have a group. 1477 */ 1478 static struct iommu_group *get_pci_function_alias_group(struct pci_dev *pdev, 1479 unsigned long *devfns) 1480 { 1481 struct pci_dev *tmp = NULL; 1482 struct iommu_group *group; 1483 1484 if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS)) 1485 return NULL; 1486 1487 for_each_pci_dev(tmp) { 1488 if (tmp == pdev || tmp->bus != pdev->bus || 1489 PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) || 1490 pci_acs_enabled(tmp, REQ_ACS_FLAGS)) 1491 continue; 1492 1493 group = get_pci_alias_group(tmp, devfns); 1494 if (group) { 1495 pci_dev_put(tmp); 1496 return group; 1497 } 1498 } 1499 1500 return NULL; 1501 } 1502 1503 /* 1504 * Look for aliases to or from the given device for existing groups. DMA 1505 * aliases are only supported on the same bus, therefore the search 1506 * space is quite small (especially since we're really only looking at pcie 1507 * device, and therefore only expect multiple slots on the root complex or 1508 * downstream switch ports). It's conceivable though that a pair of 1509 * multifunction devices could have aliases between them that would cause a 1510 * loop. To prevent this, we use a bitmap to track where we've been. 1511 */ 1512 static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, 1513 unsigned long *devfns) 1514 { 1515 struct pci_dev *tmp = NULL; 1516 struct iommu_group *group; 1517 1518 if (test_and_set_bit(pdev->devfn & 0xff, devfns)) 1519 return NULL; 1520 1521 group = iommu_group_get(&pdev->dev); 1522 if (group) 1523 return group; 1524 1525 for_each_pci_dev(tmp) { 1526 if (tmp == pdev || tmp->bus != pdev->bus) 1527 continue; 1528 1529 /* We alias them or they alias us */ 1530 if (pci_devs_are_dma_aliases(pdev, tmp)) { 1531 group = get_pci_alias_group(tmp, devfns); 1532 if (group) { 1533 pci_dev_put(tmp); 1534 return group; 1535 } 1536 1537 group = get_pci_function_alias_group(tmp, devfns); 1538 if (group) { 1539 pci_dev_put(tmp); 1540 return group; 1541 } 1542 } 1543 } 1544 1545 return NULL; 1546 } 1547 1548 struct group_for_pci_data { 1549 struct pci_dev *pdev; 1550 struct iommu_group *group; 1551 }; 1552 1553 /* 1554 * DMA alias iterator callback, return the last seen device. Stop and return 1555 * the IOMMU group if we find one along the way. 1556 */ 1557 static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque) 1558 { 1559 struct group_for_pci_data *data = opaque; 1560 1561 data->pdev = pdev; 1562 data->group = iommu_group_get(&pdev->dev); 1563 1564 return data->group != NULL; 1565 } 1566 1567 /* 1568 * Generic device_group call-back function. It just allocates one 1569 * iommu-group per device. 1570 */ 1571 struct iommu_group *generic_device_group(struct device *dev) 1572 { 1573 return iommu_group_alloc(); 1574 } 1575 EXPORT_SYMBOL_GPL(generic_device_group); 1576 1577 /* 1578 * Generic device_group call-back function. It just allocates one 1579 * iommu-group per iommu driver instance shared by every device 1580 * probed by that iommu driver. 1581 */ 1582 struct iommu_group *generic_single_device_group(struct device *dev) 1583 { 1584 struct iommu_device *iommu = dev->iommu->iommu_dev; 1585 1586 if (!iommu->singleton_group) { 1587 struct iommu_group *group; 1588 1589 group = iommu_group_alloc(); 1590 if (IS_ERR(group)) 1591 return group; 1592 iommu->singleton_group = group; 1593 } 1594 return iommu_group_ref_get(iommu->singleton_group); 1595 } 1596 EXPORT_SYMBOL_GPL(generic_single_device_group); 1597 1598 /* 1599 * Use standard PCI bus topology, isolation features, and DMA alias quirks 1600 * to find or create an IOMMU group for a device. 1601 */ 1602 struct iommu_group *pci_device_group(struct device *dev) 1603 { 1604 struct pci_dev *pdev = to_pci_dev(dev); 1605 struct group_for_pci_data data; 1606 struct pci_bus *bus; 1607 struct iommu_group *group = NULL; 1608 u64 devfns[4] = { 0 }; 1609 1610 if (WARN_ON(!dev_is_pci(dev))) 1611 return ERR_PTR(-EINVAL); 1612 1613 /* 1614 * Find the upstream DMA alias for the device. A device must not 1615 * be aliased due to topology in order to have its own IOMMU group. 1616 * If we find an alias along the way that already belongs to a 1617 * group, use it. 1618 */ 1619 if (pci_for_each_dma_alias(pdev, get_pci_alias_or_group, &data)) 1620 return data.group; 1621 1622 pdev = data.pdev; 1623 1624 /* 1625 * Continue upstream from the point of minimum IOMMU granularity 1626 * due to aliases to the point where devices are protected from 1627 * peer-to-peer DMA by PCI ACS. Again, if we find an existing 1628 * group, use it. 1629 */ 1630 for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) { 1631 if (!bus->self) 1632 continue; 1633 1634 if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) 1635 break; 1636 1637 pdev = bus->self; 1638 1639 group = iommu_group_get(&pdev->dev); 1640 if (group) 1641 return group; 1642 } 1643 1644 /* 1645 * Look for existing groups on device aliases. If we alias another 1646 * device or another device aliases us, use the same group. 1647 */ 1648 group = get_pci_alias_group(pdev, (unsigned long *)devfns); 1649 if (group) 1650 return group; 1651 1652 /* 1653 * Look for existing groups on non-isolated functions on the same 1654 * slot and aliases of those funcions, if any. No need to clear 1655 * the search bitmap, the tested devfns are still valid. 1656 */ 1657 group = get_pci_function_alias_group(pdev, (unsigned long *)devfns); 1658 if (group) 1659 return group; 1660 1661 /* No shared group found, allocate new */ 1662 return iommu_group_alloc(); 1663 } 1664 EXPORT_SYMBOL_GPL(pci_device_group); 1665 1666 /* Get the IOMMU group for device on fsl-mc bus */ 1667 struct iommu_group *fsl_mc_device_group(struct device *dev) 1668 { 1669 struct device *cont_dev = fsl_mc_cont_dev(dev); 1670 struct iommu_group *group; 1671 1672 group = iommu_group_get(cont_dev); 1673 if (!group) 1674 group = iommu_group_alloc(); 1675 return group; 1676 } 1677 EXPORT_SYMBOL_GPL(fsl_mc_device_group); 1678 1679 static struct iommu_domain *__iommu_alloc_identity_domain(struct device *dev) 1680 { 1681 const struct iommu_ops *ops = dev_iommu_ops(dev); 1682 struct iommu_domain *domain; 1683 1684 if (ops->identity_domain) 1685 return ops->identity_domain; 1686 1687 if (ops->domain_alloc_identity) { 1688 domain = ops->domain_alloc_identity(dev); 1689 if (IS_ERR(domain)) 1690 return domain; 1691 } else { 1692 return ERR_PTR(-EOPNOTSUPP); 1693 } 1694 1695 iommu_domain_init(domain, IOMMU_DOMAIN_IDENTITY, ops); 1696 return domain; 1697 } 1698 1699 static struct iommu_domain * 1700 __iommu_group_alloc_default_domain(struct iommu_group *group, int req_type) 1701 { 1702 struct device *dev = iommu_group_first_dev(group); 1703 struct iommu_domain *dom; 1704 1705 if (group->default_domain && group->default_domain->type == req_type) 1706 return group->default_domain; 1707 1708 /* 1709 * When allocating the DMA API domain assume that the driver is going to 1710 * use PASID and make sure the RID's domain is PASID compatible. 1711 */ 1712 if (req_type & __IOMMU_DOMAIN_PAGING) { 1713 dom = __iommu_paging_domain_alloc_flags(dev, req_type, 1714 dev->iommu->max_pasids ? IOMMU_HWPT_ALLOC_PASID : 0); 1715 1716 /* 1717 * If driver does not support PASID feature then 1718 * try to allocate non-PASID domain 1719 */ 1720 if (PTR_ERR(dom) == -EOPNOTSUPP) 1721 dom = __iommu_paging_domain_alloc_flags(dev, req_type, 0); 1722 1723 return dom; 1724 } 1725 1726 if (req_type == IOMMU_DOMAIN_IDENTITY) 1727 return __iommu_alloc_identity_domain(dev); 1728 1729 return ERR_PTR(-EINVAL); 1730 } 1731 1732 /* 1733 * req_type of 0 means "auto" which means to select a domain based on 1734 * iommu_def_domain_type or what the driver actually supports. 1735 */ 1736 static struct iommu_domain * 1737 iommu_group_alloc_default_domain(struct iommu_group *group, int req_type) 1738 { 1739 const struct iommu_ops *ops = dev_iommu_ops(iommu_group_first_dev(group)); 1740 struct iommu_domain *dom; 1741 1742 lockdep_assert_held(&group->mutex); 1743 1744 /* 1745 * Allow legacy drivers to specify the domain that will be the default 1746 * domain. This should always be either an IDENTITY/BLOCKED/PLATFORM 1747 * domain. Do not use in new drivers. 1748 */ 1749 if (ops->default_domain) { 1750 if (req_type != ops->default_domain->type) 1751 return ERR_PTR(-EINVAL); 1752 return ops->default_domain; 1753 } 1754 1755 if (req_type) 1756 return __iommu_group_alloc_default_domain(group, req_type); 1757 1758 /* The driver gave no guidance on what type to use, try the default */ 1759 dom = __iommu_group_alloc_default_domain(group, iommu_def_domain_type); 1760 if (!IS_ERR(dom)) 1761 return dom; 1762 1763 /* Otherwise IDENTITY and DMA_FQ defaults will try DMA */ 1764 if (iommu_def_domain_type == IOMMU_DOMAIN_DMA) 1765 return ERR_PTR(-EINVAL); 1766 dom = __iommu_group_alloc_default_domain(group, IOMMU_DOMAIN_DMA); 1767 if (IS_ERR(dom)) 1768 return dom; 1769 1770 pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA", 1771 iommu_def_domain_type, group->name); 1772 return dom; 1773 } 1774 1775 struct iommu_domain *iommu_group_default_domain(struct iommu_group *group) 1776 { 1777 return group->default_domain; 1778 } 1779 1780 static int probe_iommu_group(struct device *dev, void *data) 1781 { 1782 struct list_head *group_list = data; 1783 int ret; 1784 1785 mutex_lock(&iommu_probe_device_lock); 1786 ret = __iommu_probe_device(dev, group_list); 1787 mutex_unlock(&iommu_probe_device_lock); 1788 if (ret == -ENODEV) 1789 ret = 0; 1790 1791 return ret; 1792 } 1793 1794 static int iommu_bus_notifier(struct notifier_block *nb, 1795 unsigned long action, void *data) 1796 { 1797 struct device *dev = data; 1798 1799 if (action == BUS_NOTIFY_ADD_DEVICE) { 1800 int ret; 1801 1802 ret = iommu_probe_device(dev); 1803 return (ret) ? NOTIFY_DONE : NOTIFY_OK; 1804 } else if (action == BUS_NOTIFY_REMOVED_DEVICE) { 1805 iommu_release_device(dev); 1806 return NOTIFY_OK; 1807 } 1808 1809 return 0; 1810 } 1811 1812 /* 1813 * Combine the driver's chosen def_domain_type across all the devices in a 1814 * group. Drivers must give a consistent result. 1815 */ 1816 static int iommu_get_def_domain_type(struct iommu_group *group, 1817 struct device *dev, int cur_type) 1818 { 1819 const struct iommu_ops *ops = dev_iommu_ops(dev); 1820 int type; 1821 1822 if (ops->default_domain) { 1823 /* 1824 * Drivers that declare a global static default_domain will 1825 * always choose that. 1826 */ 1827 type = ops->default_domain->type; 1828 } else { 1829 if (ops->def_domain_type) 1830 type = ops->def_domain_type(dev); 1831 else 1832 return cur_type; 1833 } 1834 if (!type || cur_type == type) 1835 return cur_type; 1836 if (!cur_type) 1837 return type; 1838 1839 dev_err_ratelimited( 1840 dev, 1841 "IOMMU driver error, requesting conflicting def_domain_type, %s and %s, for devices in group %u.\n", 1842 iommu_domain_type_str(cur_type), iommu_domain_type_str(type), 1843 group->id); 1844 1845 /* 1846 * Try to recover, drivers are allowed to force IDENTITY or DMA, IDENTITY 1847 * takes precedence. 1848 */ 1849 if (type == IOMMU_DOMAIN_IDENTITY) 1850 return type; 1851 return cur_type; 1852 } 1853 1854 /* 1855 * A target_type of 0 will select the best domain type. 0 can be returned in 1856 * this case meaning the global default should be used. 1857 */ 1858 static int iommu_get_default_domain_type(struct iommu_group *group, 1859 int target_type) 1860 { 1861 struct device *untrusted = NULL; 1862 struct group_device *gdev; 1863 int driver_type = 0; 1864 1865 lockdep_assert_held(&group->mutex); 1866 1867 /* 1868 * ARM32 drivers supporting CONFIG_ARM_DMA_USE_IOMMU can declare an 1869 * identity_domain and it will automatically become their default 1870 * domain. Later on ARM_DMA_USE_IOMMU will install its UNMANAGED domain. 1871 * Override the selection to IDENTITY. 1872 */ 1873 if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) { 1874 static_assert(!(IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) && 1875 IS_ENABLED(CONFIG_IOMMU_DMA))); 1876 driver_type = IOMMU_DOMAIN_IDENTITY; 1877 } 1878 1879 for_each_group_device(group, gdev) { 1880 driver_type = iommu_get_def_domain_type(group, gdev->dev, 1881 driver_type); 1882 1883 if (dev_is_pci(gdev->dev) && to_pci_dev(gdev->dev)->untrusted) { 1884 /* 1885 * No ARM32 using systems will set untrusted, it cannot 1886 * work. 1887 */ 1888 if (WARN_ON(IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU))) 1889 return -1; 1890 untrusted = gdev->dev; 1891 } 1892 } 1893 1894 /* 1895 * If the common dma ops are not selected in kconfig then we cannot use 1896 * IOMMU_DOMAIN_DMA at all. Force IDENTITY if nothing else has been 1897 * selected. 1898 */ 1899 if (!IS_ENABLED(CONFIG_IOMMU_DMA)) { 1900 if (WARN_ON(driver_type == IOMMU_DOMAIN_DMA)) 1901 return -1; 1902 if (!driver_type) 1903 driver_type = IOMMU_DOMAIN_IDENTITY; 1904 } 1905 1906 if (untrusted) { 1907 if (driver_type && driver_type != IOMMU_DOMAIN_DMA) { 1908 dev_err_ratelimited( 1909 untrusted, 1910 "Device is not trusted, but driver is overriding group %u to %s, refusing to probe.\n", 1911 group->id, iommu_domain_type_str(driver_type)); 1912 return -1; 1913 } 1914 driver_type = IOMMU_DOMAIN_DMA; 1915 } 1916 1917 if (target_type) { 1918 if (driver_type && target_type != driver_type) 1919 return -1; 1920 return target_type; 1921 } 1922 return driver_type; 1923 } 1924 1925 static void iommu_group_do_probe_finalize(struct device *dev) 1926 { 1927 const struct iommu_ops *ops = dev_iommu_ops(dev); 1928 1929 if (ops->probe_finalize) 1930 ops->probe_finalize(dev); 1931 } 1932 1933 static int bus_iommu_probe(const struct bus_type *bus) 1934 { 1935 struct iommu_group *group, *next; 1936 LIST_HEAD(group_list); 1937 int ret; 1938 1939 ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group); 1940 if (ret) 1941 return ret; 1942 1943 list_for_each_entry_safe(group, next, &group_list, entry) { 1944 struct group_device *gdev; 1945 1946 mutex_lock(&group->mutex); 1947 1948 /* Remove item from the list */ 1949 list_del_init(&group->entry); 1950 1951 /* 1952 * We go to the trouble of deferred default domain creation so 1953 * that the cross-group default domain type and the setup of the 1954 * IOMMU_RESV_DIRECT will work correctly in non-hotpug scenarios. 1955 */ 1956 ret = iommu_setup_default_domain(group, 0); 1957 if (ret) { 1958 mutex_unlock(&group->mutex); 1959 return ret; 1960 } 1961 for_each_group_device(group, gdev) 1962 iommu_setup_dma_ops(gdev->dev, group->default_domain); 1963 mutex_unlock(&group->mutex); 1964 1965 /* 1966 * FIXME: Mis-locked because the ops->probe_finalize() call-back 1967 * of some IOMMU drivers calls arm_iommu_attach_device() which 1968 * in-turn might call back into IOMMU core code, where it tries 1969 * to take group->mutex, resulting in a deadlock. 1970 */ 1971 for_each_group_device(group, gdev) 1972 iommu_group_do_probe_finalize(gdev->dev); 1973 } 1974 1975 return 0; 1976 } 1977 1978 /** 1979 * device_iommu_capable() - check for a general IOMMU capability 1980 * @dev: device to which the capability would be relevant, if available 1981 * @cap: IOMMU capability 1982 * 1983 * Return: true if an IOMMU is present and supports the given capability 1984 * for the given device, otherwise false. 1985 */ 1986 bool device_iommu_capable(struct device *dev, enum iommu_cap cap) 1987 { 1988 const struct iommu_ops *ops; 1989 1990 if (!dev_has_iommu(dev)) 1991 return false; 1992 1993 ops = dev_iommu_ops(dev); 1994 if (!ops->capable) 1995 return false; 1996 1997 return ops->capable(dev, cap); 1998 } 1999 EXPORT_SYMBOL_GPL(device_iommu_capable); 2000 2001 /** 2002 * iommu_group_has_isolated_msi() - Compute msi_device_has_isolated_msi() 2003 * for a group 2004 * @group: Group to query 2005 * 2006 * IOMMU groups should not have differing values of 2007 * msi_device_has_isolated_msi() for devices in a group. However nothing 2008 * directly prevents this, so ensure mistakes don't result in isolation failures 2009 * by checking that all the devices are the same. 2010 */ 2011 bool iommu_group_has_isolated_msi(struct iommu_group *group) 2012 { 2013 struct group_device *group_dev; 2014 bool ret = true; 2015 2016 mutex_lock(&group->mutex); 2017 for_each_group_device(group, group_dev) 2018 ret &= msi_device_has_isolated_msi(group_dev->dev); 2019 mutex_unlock(&group->mutex); 2020 return ret; 2021 } 2022 EXPORT_SYMBOL_GPL(iommu_group_has_isolated_msi); 2023 2024 /** 2025 * iommu_set_fault_handler() - set a fault handler for an iommu domain 2026 * @domain: iommu domain 2027 * @handler: fault handler 2028 * @token: user data, will be passed back to the fault handler 2029 * 2030 * This function should be used by IOMMU users which want to be notified 2031 * whenever an IOMMU fault happens. 2032 * 2033 * The fault handler itself should return 0 on success, and an appropriate 2034 * error code otherwise. 2035 */ 2036 void iommu_set_fault_handler(struct iommu_domain *domain, 2037 iommu_fault_handler_t handler, 2038 void *token) 2039 { 2040 if (WARN_ON(!domain || domain->cookie_type != IOMMU_COOKIE_NONE)) 2041 return; 2042 2043 domain->cookie_type = IOMMU_COOKIE_FAULT_HANDLER; 2044 domain->handler = handler; 2045 domain->handler_token = token; 2046 } 2047 EXPORT_SYMBOL_GPL(iommu_set_fault_handler); 2048 2049 static void iommu_domain_init(struct iommu_domain *domain, unsigned int type, 2050 const struct iommu_ops *ops) 2051 { 2052 domain->type = type; 2053 domain->owner = ops; 2054 if (!domain->ops) 2055 domain->ops = ops->default_domain_ops; 2056 } 2057 2058 static struct iommu_domain * 2059 __iommu_paging_domain_alloc_flags(struct device *dev, unsigned int type, 2060 unsigned int flags) 2061 { 2062 const struct iommu_ops *ops; 2063 struct iommu_domain *domain; 2064 2065 if (!dev_has_iommu(dev)) 2066 return ERR_PTR(-ENODEV); 2067 2068 ops = dev_iommu_ops(dev); 2069 2070 if (ops->domain_alloc_paging && !flags) 2071 domain = ops->domain_alloc_paging(dev); 2072 else if (ops->domain_alloc_paging_flags) 2073 domain = ops->domain_alloc_paging_flags(dev, flags, NULL); 2074 #if IS_ENABLED(CONFIG_FSL_PAMU) 2075 else if (ops->domain_alloc && !flags) 2076 domain = ops->domain_alloc(IOMMU_DOMAIN_UNMANAGED); 2077 #endif 2078 else 2079 return ERR_PTR(-EOPNOTSUPP); 2080 2081 if (IS_ERR(domain)) 2082 return domain; 2083 if (!domain) 2084 return ERR_PTR(-ENOMEM); 2085 2086 iommu_domain_init(domain, type, ops); 2087 return domain; 2088 } 2089 2090 /** 2091 * iommu_paging_domain_alloc_flags() - Allocate a paging domain 2092 * @dev: device for which the domain is allocated 2093 * @flags: Bitmap of iommufd_hwpt_alloc_flags 2094 * 2095 * Allocate a paging domain which will be managed by a kernel driver. Return 2096 * allocated domain if successful, or an ERR pointer for failure. 2097 */ 2098 struct iommu_domain *iommu_paging_domain_alloc_flags(struct device *dev, 2099 unsigned int flags) 2100 { 2101 return __iommu_paging_domain_alloc_flags(dev, 2102 IOMMU_DOMAIN_UNMANAGED, flags); 2103 } 2104 EXPORT_SYMBOL_GPL(iommu_paging_domain_alloc_flags); 2105 2106 void iommu_domain_free(struct iommu_domain *domain) 2107 { 2108 switch (domain->cookie_type) { 2109 case IOMMU_COOKIE_DMA_IOVA: 2110 iommu_put_dma_cookie(domain); 2111 break; 2112 case IOMMU_COOKIE_DMA_MSI: 2113 iommu_put_msi_cookie(domain); 2114 break; 2115 case IOMMU_COOKIE_SVA: 2116 mmdrop(domain->mm); 2117 break; 2118 default: 2119 break; 2120 } 2121 if (domain->ops->free) 2122 domain->ops->free(domain); 2123 } 2124 EXPORT_SYMBOL_GPL(iommu_domain_free); 2125 2126 /* 2127 * Put the group's domain back to the appropriate core-owned domain - either the 2128 * standard kernel-mode DMA configuration or an all-DMA-blocked domain. 2129 */ 2130 static void __iommu_group_set_core_domain(struct iommu_group *group) 2131 { 2132 struct iommu_domain *new_domain; 2133 2134 if (group->owner) 2135 new_domain = group->blocking_domain; 2136 else 2137 new_domain = group->default_domain; 2138 2139 __iommu_group_set_domain_nofail(group, new_domain); 2140 } 2141 2142 static int __iommu_attach_device(struct iommu_domain *domain, 2143 struct device *dev, struct iommu_domain *old) 2144 { 2145 int ret; 2146 2147 if (unlikely(domain->ops->attach_dev == NULL)) 2148 return -ENODEV; 2149 2150 ret = domain->ops->attach_dev(domain, dev, old); 2151 if (ret) 2152 return ret; 2153 dev->iommu->attach_deferred = 0; 2154 trace_attach_device_to_domain(dev); 2155 return 0; 2156 } 2157 2158 /** 2159 * iommu_attach_device - Attach an IOMMU domain to a device 2160 * @domain: IOMMU domain to attach 2161 * @dev: Device that will be attached 2162 * 2163 * Returns 0 on success and error code on failure 2164 * 2165 * Note that EINVAL can be treated as a soft failure, indicating 2166 * that certain configuration of the domain is incompatible with 2167 * the device. In this case attaching a different domain to the 2168 * device may succeed. 2169 */ 2170 int iommu_attach_device(struct iommu_domain *domain, struct device *dev) 2171 { 2172 /* Caller must be a probed driver on dev */ 2173 struct iommu_group *group = dev->iommu_group; 2174 int ret; 2175 2176 if (!group) 2177 return -ENODEV; 2178 2179 /* 2180 * Lock the group to make sure the device-count doesn't 2181 * change while we are attaching 2182 */ 2183 mutex_lock(&group->mutex); 2184 ret = -EINVAL; 2185 if (list_count_nodes(&group->devices) != 1) 2186 goto out_unlock; 2187 2188 ret = __iommu_attach_group(domain, group); 2189 2190 out_unlock: 2191 mutex_unlock(&group->mutex); 2192 return ret; 2193 } 2194 EXPORT_SYMBOL_GPL(iommu_attach_device); 2195 2196 int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain) 2197 { 2198 /* 2199 * This is called on the dma mapping fast path so avoid locking. This is 2200 * racy, but we have an expectation that the driver will setup its DMAs 2201 * inside probe while being single threaded to avoid racing. 2202 */ 2203 if (!dev->iommu || !dev->iommu->attach_deferred) 2204 return 0; 2205 2206 guard(mutex)(&dev->iommu_group->mutex); 2207 2208 /* 2209 * This is a concurrent attach during a device reset. Reject it until 2210 * pci_dev_reset_iommu_done() attaches the device to group->domain. 2211 * 2212 * Note that this might fail the iommu_dma_map(). But there's nothing 2213 * more we can do here. 2214 */ 2215 if (dev->iommu_group->resetting_domain) 2216 return -EBUSY; 2217 return __iommu_attach_device(domain, dev, NULL); 2218 } 2219 2220 void iommu_detach_device(struct iommu_domain *domain, struct device *dev) 2221 { 2222 /* Caller must be a probed driver on dev */ 2223 struct iommu_group *group = dev->iommu_group; 2224 2225 if (!group) 2226 return; 2227 2228 mutex_lock(&group->mutex); 2229 if (WARN_ON(domain != group->domain) || 2230 WARN_ON(list_count_nodes(&group->devices) != 1)) 2231 goto out_unlock; 2232 __iommu_group_set_core_domain(group); 2233 2234 out_unlock: 2235 mutex_unlock(&group->mutex); 2236 } 2237 EXPORT_SYMBOL_GPL(iommu_detach_device); 2238 2239 /** 2240 * iommu_get_domain_for_dev() - Return the DMA API domain pointer 2241 * @dev: Device to query 2242 * 2243 * This function can be called within a driver bound to dev. The returned 2244 * pointer is valid for the lifetime of the bound driver. 2245 * 2246 * It should not be called by drivers with driver_managed_dma = true. 2247 */ 2248 struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) 2249 { 2250 /* Caller must be a probed driver on dev */ 2251 struct iommu_group *group = dev->iommu_group; 2252 2253 if (!group) 2254 return NULL; 2255 2256 lockdep_assert_not_held(&group->mutex); 2257 2258 return group->domain; 2259 } 2260 EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev); 2261 2262 /** 2263 * iommu_driver_get_domain_for_dev() - Return the driver-level domain pointer 2264 * @dev: Device to query 2265 * 2266 * This function can be called by an iommu driver that wants to get the physical 2267 * domain within an iommu callback function where group->mutex is held. 2268 */ 2269 struct iommu_domain *iommu_driver_get_domain_for_dev(struct device *dev) 2270 { 2271 struct iommu_group *group = dev->iommu_group; 2272 2273 lockdep_assert_held(&group->mutex); 2274 2275 /* 2276 * Driver handles the low-level __iommu_attach_device(), including the 2277 * one invoked by pci_dev_reset_iommu_done() re-attaching the device to 2278 * the cached group->domain. In this case, the driver must get the old 2279 * domain from group->resetting_domain rather than group->domain. This 2280 * prevents it from re-attaching the device from group->domain (old) to 2281 * group->domain (new). 2282 */ 2283 if (group->resetting_domain) 2284 return group->resetting_domain; 2285 2286 return group->domain; 2287 } 2288 EXPORT_SYMBOL_GPL(iommu_driver_get_domain_for_dev); 2289 2290 /* 2291 * For IOMMU_DOMAIN_DMA implementations which already provide their own 2292 * guarantees that the group and its default domain are valid and correct. 2293 */ 2294 struct iommu_domain *iommu_get_dma_domain(struct device *dev) 2295 { 2296 return dev->iommu_group->default_domain; 2297 } 2298 2299 static void *iommu_make_pasid_array_entry(struct iommu_domain *domain, 2300 struct iommu_attach_handle *handle) 2301 { 2302 if (handle) { 2303 handle->domain = domain; 2304 return xa_tag_pointer(handle, IOMMU_PASID_ARRAY_HANDLE); 2305 } 2306 2307 return xa_tag_pointer(domain, IOMMU_PASID_ARRAY_DOMAIN); 2308 } 2309 2310 static bool domain_iommu_ops_compatible(const struct iommu_ops *ops, 2311 struct iommu_domain *domain) 2312 { 2313 if (domain->owner == ops) 2314 return true; 2315 2316 /* For static domains, owner isn't set. */ 2317 if (domain == ops->blocked_domain || domain == ops->identity_domain) 2318 return true; 2319 2320 return false; 2321 } 2322 2323 static int __iommu_attach_group(struct iommu_domain *domain, 2324 struct iommu_group *group) 2325 { 2326 struct device *dev; 2327 2328 if (group->domain && group->domain != group->default_domain && 2329 group->domain != group->blocking_domain) 2330 return -EBUSY; 2331 2332 dev = iommu_group_first_dev(group); 2333 if (!dev_has_iommu(dev) || 2334 !domain_iommu_ops_compatible(dev_iommu_ops(dev), domain)) 2335 return -EINVAL; 2336 2337 return __iommu_group_set_domain(group, domain); 2338 } 2339 2340 /** 2341 * iommu_attach_group - Attach an IOMMU domain to an IOMMU group 2342 * @domain: IOMMU domain to attach 2343 * @group: IOMMU group that will be attached 2344 * 2345 * Returns 0 on success and error code on failure 2346 * 2347 * Note that EINVAL can be treated as a soft failure, indicating 2348 * that certain configuration of the domain is incompatible with 2349 * the group. In this case attaching a different domain to the 2350 * group may succeed. 2351 */ 2352 int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) 2353 { 2354 int ret; 2355 2356 mutex_lock(&group->mutex); 2357 ret = __iommu_attach_group(domain, group); 2358 mutex_unlock(&group->mutex); 2359 2360 return ret; 2361 } 2362 EXPORT_SYMBOL_GPL(iommu_attach_group); 2363 2364 static int __iommu_device_set_domain(struct iommu_group *group, 2365 struct device *dev, 2366 struct iommu_domain *new_domain, 2367 struct iommu_domain *old_domain, 2368 unsigned int flags) 2369 { 2370 int ret; 2371 2372 /* 2373 * If the device requires IOMMU_RESV_DIRECT then we cannot allow 2374 * the blocking domain to be attached as it does not contain the 2375 * required 1:1 mapping. This test effectively excludes the device 2376 * being used with iommu_group_claim_dma_owner() which will block 2377 * vfio and iommufd as well. 2378 */ 2379 if (dev->iommu->require_direct && 2380 (new_domain->type == IOMMU_DOMAIN_BLOCKED || 2381 new_domain == group->blocking_domain)) { 2382 dev_warn(dev, 2383 "Firmware has requested this device have a 1:1 IOMMU mapping, rejecting configuring the device without a 1:1 mapping. Contact your platform vendor.\n"); 2384 return -EINVAL; 2385 } 2386 2387 if (dev->iommu->attach_deferred) { 2388 if (new_domain == group->default_domain) 2389 return 0; 2390 dev->iommu->attach_deferred = 0; 2391 } 2392 2393 ret = __iommu_attach_device(new_domain, dev, old_domain); 2394 if (ret) { 2395 /* 2396 * If we have a blocking domain then try to attach that in hopes 2397 * of avoiding a UAF. Modern drivers should implement blocking 2398 * domains as global statics that cannot fail. 2399 */ 2400 if ((flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) && 2401 group->blocking_domain && 2402 group->blocking_domain != new_domain) 2403 __iommu_attach_device(group->blocking_domain, dev, 2404 old_domain); 2405 return ret; 2406 } 2407 return 0; 2408 } 2409 2410 /* 2411 * If 0 is returned the group's domain is new_domain. If an error is returned 2412 * then the group's domain will be set back to the existing domain unless 2413 * IOMMU_SET_DOMAIN_MUST_SUCCEED, otherwise an error is returned and the group's 2414 * domains is left inconsistent. This is a driver bug to fail attach with a 2415 * previously good domain. We try to avoid a kernel UAF because of this. 2416 * 2417 * IOMMU groups are really the natural working unit of the IOMMU, but the IOMMU 2418 * API works on domains and devices. Bridge that gap by iterating over the 2419 * devices in a group. Ideally we'd have a single device which represents the 2420 * requestor ID of the group, but we also allow IOMMU drivers to create policy 2421 * defined minimum sets, where the physical hardware may be able to distiguish 2422 * members, but we wish to group them at a higher level (ex. untrusted 2423 * multi-function PCI devices). Thus we attach each device. 2424 */ 2425 static int __iommu_group_set_domain_internal(struct iommu_group *group, 2426 struct iommu_domain *new_domain, 2427 unsigned int flags) 2428 { 2429 struct group_device *last_gdev; 2430 struct group_device *gdev; 2431 int result; 2432 int ret; 2433 2434 lockdep_assert_held(&group->mutex); 2435 2436 if (group->domain == new_domain) 2437 return 0; 2438 2439 if (WARN_ON(!new_domain)) 2440 return -EINVAL; 2441 2442 /* 2443 * This is a concurrent attach during a device reset. Reject it until 2444 * pci_dev_reset_iommu_done() attaches the device to group->domain. 2445 */ 2446 if (group->resetting_domain) 2447 return -EBUSY; 2448 2449 /* 2450 * Changing the domain is done by calling attach_dev() on the new 2451 * domain. This switch does not have to be atomic and DMA can be 2452 * discarded during the transition. DMA must only be able to access 2453 * either new_domain or group->domain, never something else. 2454 */ 2455 result = 0; 2456 for_each_group_device(group, gdev) { 2457 ret = __iommu_device_set_domain(group, gdev->dev, new_domain, 2458 group->domain, flags); 2459 if (ret) { 2460 result = ret; 2461 /* 2462 * Keep trying the other devices in the group. If a 2463 * driver fails attach to an otherwise good domain, and 2464 * does not support blocking domains, it should at least 2465 * drop its reference on the current domain so we don't 2466 * UAF. 2467 */ 2468 if (flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) 2469 continue; 2470 goto err_revert; 2471 } 2472 } 2473 group->domain = new_domain; 2474 return result; 2475 2476 err_revert: 2477 /* 2478 * This is called in error unwind paths. A well behaved driver should 2479 * always allow us to attach to a domain that was already attached. 2480 */ 2481 last_gdev = gdev; 2482 for_each_group_device(group, gdev) { 2483 /* No need to revert the last gdev that failed to set domain */ 2484 if (gdev == last_gdev) 2485 break; 2486 /* 2487 * A NULL domain can happen only for first probe, in which case 2488 * we leave group->domain as NULL and let release clean 2489 * everything up. 2490 */ 2491 if (group->domain) 2492 WARN_ON(__iommu_device_set_domain( 2493 group, gdev->dev, group->domain, new_domain, 2494 IOMMU_SET_DOMAIN_MUST_SUCCEED)); 2495 } 2496 return ret; 2497 } 2498 2499 void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group) 2500 { 2501 mutex_lock(&group->mutex); 2502 __iommu_group_set_core_domain(group); 2503 mutex_unlock(&group->mutex); 2504 } 2505 EXPORT_SYMBOL_GPL(iommu_detach_group); 2506 2507 phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) 2508 { 2509 if (domain->type == IOMMU_DOMAIN_IDENTITY) 2510 return iova; 2511 2512 if (domain->type == IOMMU_DOMAIN_BLOCKED) 2513 return 0; 2514 2515 return domain->ops->iova_to_phys(domain, iova); 2516 } 2517 EXPORT_SYMBOL_GPL(iommu_iova_to_phys); 2518 2519 static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, 2520 phys_addr_t paddr, size_t size, size_t *count) 2521 { 2522 unsigned int pgsize_idx, pgsize_idx_next; 2523 unsigned long pgsizes; 2524 size_t offset, pgsize, pgsize_next; 2525 size_t offset_end; 2526 unsigned long addr_merge = paddr | iova; 2527 2528 /* Page sizes supported by the hardware and small enough for @size */ 2529 pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0); 2530 2531 /* Constrain the page sizes further based on the maximum alignment */ 2532 if (likely(addr_merge)) 2533 pgsizes &= GENMASK(__ffs(addr_merge), 0); 2534 2535 /* Make sure we have at least one suitable page size */ 2536 BUG_ON(!pgsizes); 2537 2538 /* Pick the biggest page size remaining */ 2539 pgsize_idx = __fls(pgsizes); 2540 pgsize = BIT(pgsize_idx); 2541 if (!count) 2542 return pgsize; 2543 2544 /* Find the next biggest support page size, if it exists */ 2545 pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0); 2546 if (!pgsizes) 2547 goto out_set_count; 2548 2549 pgsize_idx_next = __ffs(pgsizes); 2550 pgsize_next = BIT(pgsize_idx_next); 2551 2552 /* 2553 * There's no point trying a bigger page size unless the virtual 2554 * and physical addresses are similarly offset within the larger page. 2555 */ 2556 if ((iova ^ paddr) & (pgsize_next - 1)) 2557 goto out_set_count; 2558 2559 /* Calculate the offset to the next page size alignment boundary */ 2560 offset = pgsize_next - (addr_merge & (pgsize_next - 1)); 2561 2562 /* 2563 * If size is big enough to accommodate the larger page, reduce 2564 * the number of smaller pages. 2565 */ 2566 if (!check_add_overflow(offset, pgsize_next, &offset_end) && 2567 offset_end <= size) 2568 size = offset; 2569 2570 out_set_count: 2571 *count = size >> pgsize_idx; 2572 return pgsize; 2573 } 2574 2575 int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova, 2576 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 2577 { 2578 const struct iommu_domain_ops *ops = domain->ops; 2579 unsigned long orig_iova = iova; 2580 unsigned int min_pagesz; 2581 size_t orig_size = size; 2582 phys_addr_t orig_paddr = paddr; 2583 int ret = 0; 2584 2585 might_sleep_if(gfpflags_allow_blocking(gfp)); 2586 2587 if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) 2588 return -EINVAL; 2589 2590 if (WARN_ON(!ops->map_pages || domain->pgsize_bitmap == 0UL)) 2591 return -ENODEV; 2592 2593 /* Discourage passing strange GFP flags */ 2594 if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | 2595 __GFP_HIGHMEM))) 2596 return -EINVAL; 2597 2598 /* find out the minimum page size supported */ 2599 min_pagesz = 1 << __ffs(domain->pgsize_bitmap); 2600 2601 /* 2602 * both the virtual address and the physical one, as well as 2603 * the size of the mapping, must be aligned (at least) to the 2604 * size of the smallest page supported by the hardware 2605 */ 2606 if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) { 2607 pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n", 2608 iova, &paddr, size, min_pagesz); 2609 return -EINVAL; 2610 } 2611 2612 pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size); 2613 2614 while (size) { 2615 size_t pgsize, count, mapped = 0; 2616 2617 pgsize = iommu_pgsize(domain, iova, paddr, size, &count); 2618 2619 pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n", 2620 iova, &paddr, pgsize, count); 2621 ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot, 2622 gfp, &mapped); 2623 /* 2624 * Some pages may have been mapped, even if an error occurred, 2625 * so we should account for those so they can be unmapped. 2626 */ 2627 size -= mapped; 2628 2629 if (ret) 2630 break; 2631 2632 iova += mapped; 2633 paddr += mapped; 2634 } 2635 2636 /* unroll mapping in case something went wrong */ 2637 if (ret) { 2638 iommu_unmap(domain, orig_iova, orig_size - size); 2639 } else { 2640 trace_map(orig_iova, orig_paddr, orig_size); 2641 iommu_debug_map(domain, orig_paddr, orig_size); 2642 } 2643 2644 return ret; 2645 } 2646 2647 int iommu_sync_map(struct iommu_domain *domain, unsigned long iova, size_t size) 2648 { 2649 const struct iommu_domain_ops *ops = domain->ops; 2650 2651 if (!ops->iotlb_sync_map) 2652 return 0; 2653 return ops->iotlb_sync_map(domain, iova, size); 2654 } 2655 2656 int iommu_map(struct iommu_domain *domain, unsigned long iova, 2657 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 2658 { 2659 int ret; 2660 2661 ret = iommu_map_nosync(domain, iova, paddr, size, prot, gfp); 2662 if (ret) 2663 return ret; 2664 2665 ret = iommu_sync_map(domain, iova, size); 2666 if (ret) 2667 iommu_unmap(domain, iova, size); 2668 2669 return ret; 2670 } 2671 EXPORT_SYMBOL_GPL(iommu_map); 2672 2673 static size_t __iommu_unmap(struct iommu_domain *domain, 2674 unsigned long iova, size_t size, 2675 struct iommu_iotlb_gather *iotlb_gather) 2676 { 2677 const struct iommu_domain_ops *ops = domain->ops; 2678 size_t unmapped_page, unmapped = 0; 2679 unsigned long orig_iova = iova; 2680 unsigned int min_pagesz; 2681 2682 if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) 2683 return 0; 2684 2685 if (WARN_ON(!ops->unmap_pages || domain->pgsize_bitmap == 0UL)) 2686 return 0; 2687 2688 /* find out the minimum page size supported */ 2689 min_pagesz = 1 << __ffs(domain->pgsize_bitmap); 2690 2691 /* 2692 * The virtual address, as well as the size of the mapping, must be 2693 * aligned (at least) to the size of the smallest page supported 2694 * by the hardware 2695 */ 2696 if (!IS_ALIGNED(iova | size, min_pagesz)) { 2697 pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n", 2698 iova, size, min_pagesz); 2699 return 0; 2700 } 2701 2702 pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size); 2703 2704 iommu_debug_unmap_begin(domain, iova, size); 2705 2706 /* 2707 * Keep iterating until we either unmap 'size' bytes (or more) 2708 * or we hit an area that isn't mapped. 2709 */ 2710 while (unmapped < size) { 2711 size_t pgsize, count; 2712 2713 pgsize = iommu_pgsize(domain, iova, iova, size - unmapped, &count); 2714 unmapped_page = ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather); 2715 if (!unmapped_page) 2716 break; 2717 2718 pr_debug("unmapped: iova 0x%lx size 0x%zx\n", 2719 iova, unmapped_page); 2720 /* 2721 * If the driver itself isn't using the gather, make sure 2722 * it looks non-empty so iotlb_sync will still be called. 2723 */ 2724 if (iotlb_gather->start >= iotlb_gather->end) 2725 iommu_iotlb_gather_add_range(iotlb_gather, iova, size); 2726 2727 iova += unmapped_page; 2728 unmapped += unmapped_page; 2729 } 2730 2731 trace_unmap(orig_iova, size, unmapped); 2732 iommu_debug_unmap_end(domain, orig_iova, size, unmapped); 2733 return unmapped; 2734 } 2735 2736 /** 2737 * iommu_unmap() - Remove mappings from a range of IOVA 2738 * @domain: Domain to manipulate 2739 * @iova: IO virtual address to start 2740 * @size: Length of the range starting from @iova 2741 * 2742 * iommu_unmap() will remove a translation created by iommu_map(). It cannot 2743 * subdivide a mapping created by iommu_map(), so it should be called with IOVA 2744 * ranges that match what was passed to iommu_map(). The range can aggregate 2745 * contiguous iommu_map() calls so long as no individual range is split. 2746 * 2747 * Returns: Number of bytes of IOVA unmapped. iova + res will be the point 2748 * unmapping stopped. 2749 */ 2750 size_t iommu_unmap(struct iommu_domain *domain, 2751 unsigned long iova, size_t size) 2752 { 2753 struct iommu_iotlb_gather iotlb_gather; 2754 size_t ret; 2755 2756 iommu_iotlb_gather_init(&iotlb_gather); 2757 ret = __iommu_unmap(domain, iova, size, &iotlb_gather); 2758 iommu_iotlb_sync(domain, &iotlb_gather); 2759 2760 return ret; 2761 } 2762 EXPORT_SYMBOL_GPL(iommu_unmap); 2763 2764 /** 2765 * iommu_unmap_fast() - Remove mappings from a range of IOVA without IOTLB sync 2766 * @domain: Domain to manipulate 2767 * @iova: IO virtual address to start 2768 * @size: Length of the range starting from @iova 2769 * @iotlb_gather: range information for a pending IOTLB flush 2770 * 2771 * iommu_unmap_fast() will remove a translation created by iommu_map(). 2772 * It can't subdivide a mapping created by iommu_map(), so it should be 2773 * called with IOVA ranges that match what was passed to iommu_map(). The 2774 * range can aggregate contiguous iommu_map() calls so long as no individual 2775 * range is split. 2776 * 2777 * Basically iommu_unmap_fast() is the same as iommu_unmap() but for callers 2778 * which manage the IOTLB flushing externally to perform a batched sync. 2779 * 2780 * Returns: Number of bytes of IOVA unmapped. iova + res will be the point 2781 * unmapping stopped. 2782 */ 2783 size_t iommu_unmap_fast(struct iommu_domain *domain, 2784 unsigned long iova, size_t size, 2785 struct iommu_iotlb_gather *iotlb_gather) 2786 { 2787 return __iommu_unmap(domain, iova, size, iotlb_gather); 2788 } 2789 EXPORT_SYMBOL_GPL(iommu_unmap_fast); 2790 2791 ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, 2792 struct scatterlist *sg, unsigned int nents, int prot, 2793 gfp_t gfp) 2794 { 2795 size_t len = 0, mapped = 0; 2796 phys_addr_t start; 2797 unsigned int i = 0; 2798 int ret; 2799 2800 while (i <= nents) { 2801 phys_addr_t s_phys = sg_phys(sg); 2802 2803 if (len && s_phys != start + len) { 2804 ret = iommu_map_nosync(domain, iova + mapped, start, 2805 len, prot, gfp); 2806 if (ret) 2807 goto out_err; 2808 2809 mapped += len; 2810 len = 0; 2811 } 2812 2813 if (sg_dma_is_bus_address(sg)) 2814 goto next; 2815 2816 if (len) { 2817 len += sg->length; 2818 } else { 2819 len = sg->length; 2820 start = s_phys; 2821 } 2822 2823 next: 2824 if (++i < nents) 2825 sg = sg_next(sg); 2826 } 2827 2828 ret = iommu_sync_map(domain, iova, mapped); 2829 if (ret) 2830 goto out_err; 2831 2832 return mapped; 2833 2834 out_err: 2835 /* undo mappings already done */ 2836 iommu_unmap(domain, iova, mapped); 2837 2838 return ret; 2839 } 2840 EXPORT_SYMBOL_GPL(iommu_map_sg); 2841 2842 /** 2843 * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework 2844 * @domain: the iommu domain where the fault has happened 2845 * @dev: the device where the fault has happened 2846 * @iova: the faulting address 2847 * @flags: mmu fault flags (e.g. IOMMU_FAULT_READ/IOMMU_FAULT_WRITE/...) 2848 * 2849 * This function should be called by the low-level IOMMU implementations 2850 * whenever IOMMU faults happen, to allow high-level users, that are 2851 * interested in such events, to know about them. 2852 * 2853 * This event may be useful for several possible use cases: 2854 * - mere logging of the event 2855 * - dynamic TLB/PTE loading 2856 * - if restarting of the faulting device is required 2857 * 2858 * Returns 0 on success and an appropriate error code otherwise (if dynamic 2859 * PTE/TLB loading will one day be supported, implementations will be able 2860 * to tell whether it succeeded or not according to this return value). 2861 * 2862 * Specifically, -ENOSYS is returned if a fault handler isn't installed 2863 * (though fault handlers can also return -ENOSYS, in case they want to 2864 * elicit the default behavior of the IOMMU drivers). 2865 */ 2866 int report_iommu_fault(struct iommu_domain *domain, struct device *dev, 2867 unsigned long iova, int flags) 2868 { 2869 int ret = -ENOSYS; 2870 2871 /* 2872 * if upper layers showed interest and installed a fault handler, 2873 * invoke it. 2874 */ 2875 if (domain->cookie_type == IOMMU_COOKIE_FAULT_HANDLER && 2876 domain->handler) 2877 ret = domain->handler(domain, dev, iova, flags, 2878 domain->handler_token); 2879 2880 trace_io_page_fault(dev, iova, flags); 2881 return ret; 2882 } 2883 EXPORT_SYMBOL_GPL(report_iommu_fault); 2884 2885 static int __init iommu_init(void) 2886 { 2887 iommu_group_kset = kset_create_and_add("iommu_groups", 2888 NULL, kernel_kobj); 2889 BUG_ON(!iommu_group_kset); 2890 2891 iommu_debugfs_setup(); 2892 2893 return 0; 2894 } 2895 core_initcall(iommu_init); 2896 2897 int iommu_set_pgtable_quirks(struct iommu_domain *domain, 2898 unsigned long quirk) 2899 { 2900 if (domain->type != IOMMU_DOMAIN_UNMANAGED) 2901 return -EINVAL; 2902 if (!domain->ops->set_pgtable_quirks) 2903 return -EINVAL; 2904 return domain->ops->set_pgtable_quirks(domain, quirk); 2905 } 2906 EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks); 2907 2908 /** 2909 * iommu_get_resv_regions - get reserved regions 2910 * @dev: device for which to get reserved regions 2911 * @list: reserved region list for device 2912 * 2913 * This returns a list of reserved IOVA regions specific to this device. 2914 * A domain user should not map IOVA in these ranges. 2915 */ 2916 void iommu_get_resv_regions(struct device *dev, struct list_head *list) 2917 { 2918 const struct iommu_ops *ops = dev_iommu_ops(dev); 2919 2920 if (ops->get_resv_regions) 2921 ops->get_resv_regions(dev, list); 2922 } 2923 EXPORT_SYMBOL_GPL(iommu_get_resv_regions); 2924 2925 /** 2926 * iommu_put_resv_regions - release reserved regions 2927 * @dev: device for which to free reserved regions 2928 * @list: reserved region list for device 2929 * 2930 * This releases a reserved region list acquired by iommu_get_resv_regions(). 2931 */ 2932 void iommu_put_resv_regions(struct device *dev, struct list_head *list) 2933 { 2934 struct iommu_resv_region *entry, *next; 2935 2936 list_for_each_entry_safe(entry, next, list, list) { 2937 if (entry->free) 2938 entry->free(dev, entry); 2939 else 2940 kfree(entry); 2941 } 2942 } 2943 EXPORT_SYMBOL(iommu_put_resv_regions); 2944 2945 struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start, 2946 size_t length, int prot, 2947 enum iommu_resv_type type, 2948 gfp_t gfp) 2949 { 2950 struct iommu_resv_region *region; 2951 2952 region = kzalloc_obj(*region, gfp); 2953 if (!region) 2954 return NULL; 2955 2956 INIT_LIST_HEAD(®ion->list); 2957 region->start = start; 2958 region->length = length; 2959 region->prot = prot; 2960 region->type = type; 2961 return region; 2962 } 2963 EXPORT_SYMBOL_GPL(iommu_alloc_resv_region); 2964 2965 void iommu_set_default_passthrough(bool cmd_line) 2966 { 2967 if (cmd_line) 2968 iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; 2969 iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY; 2970 } 2971 2972 void iommu_set_default_translated(bool cmd_line) 2973 { 2974 if (cmd_line) 2975 iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; 2976 iommu_def_domain_type = IOMMU_DOMAIN_DMA; 2977 } 2978 2979 bool iommu_default_passthrough(void) 2980 { 2981 return iommu_def_domain_type == IOMMU_DOMAIN_IDENTITY; 2982 } 2983 EXPORT_SYMBOL_GPL(iommu_default_passthrough); 2984 2985 static const struct iommu_device *iommu_from_fwnode(const struct fwnode_handle *fwnode) 2986 { 2987 const struct iommu_device *iommu, *ret = NULL; 2988 2989 spin_lock(&iommu_device_lock); 2990 list_for_each_entry(iommu, &iommu_device_list, list) 2991 if (iommu->fwnode == fwnode) { 2992 ret = iommu; 2993 break; 2994 } 2995 spin_unlock(&iommu_device_lock); 2996 return ret; 2997 } 2998 2999 const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode) 3000 { 3001 const struct iommu_device *iommu = iommu_from_fwnode(fwnode); 3002 3003 return iommu ? iommu->ops : NULL; 3004 } 3005 3006 int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode) 3007 { 3008 const struct iommu_device *iommu = iommu_from_fwnode(iommu_fwnode); 3009 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 3010 3011 if (!iommu) 3012 return driver_deferred_probe_check_state(dev); 3013 if (!dev->iommu && !READ_ONCE(iommu->ready)) 3014 return -EPROBE_DEFER; 3015 3016 if (fwspec) 3017 return iommu->ops == iommu_fwspec_ops(fwspec) ? 0 : -EINVAL; 3018 3019 if (!dev_iommu_get(dev)) 3020 return -ENOMEM; 3021 3022 /* Preallocate for the overwhelmingly common case of 1 ID */ 3023 fwspec = kzalloc_flex(*fwspec, ids, 1); 3024 if (!fwspec) 3025 return -ENOMEM; 3026 3027 fwnode_handle_get(iommu_fwnode); 3028 fwspec->iommu_fwnode = iommu_fwnode; 3029 dev_iommu_fwspec_set(dev, fwspec); 3030 return 0; 3031 } 3032 EXPORT_SYMBOL_GPL(iommu_fwspec_init); 3033 3034 void iommu_fwspec_free(struct device *dev) 3035 { 3036 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 3037 3038 if (fwspec) { 3039 fwnode_handle_put(fwspec->iommu_fwnode); 3040 kfree(fwspec); 3041 dev_iommu_fwspec_set(dev, NULL); 3042 } 3043 } 3044 3045 int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids) 3046 { 3047 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 3048 int i, new_num; 3049 3050 if (!fwspec) 3051 return -EINVAL; 3052 3053 new_num = fwspec->num_ids + num_ids; 3054 if (new_num > 1) { 3055 fwspec = krealloc(fwspec, struct_size(fwspec, ids, new_num), 3056 GFP_KERNEL); 3057 if (!fwspec) 3058 return -ENOMEM; 3059 3060 dev_iommu_fwspec_set(dev, fwspec); 3061 } 3062 3063 for (i = 0; i < num_ids; i++) 3064 fwspec->ids[fwspec->num_ids + i] = ids[i]; 3065 3066 fwspec->num_ids = new_num; 3067 return 0; 3068 } 3069 EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids); 3070 3071 /** 3072 * iommu_setup_default_domain - Set the default_domain for the group 3073 * @group: Group to change 3074 * @target_type: Domain type to set as the default_domain 3075 * 3076 * Allocate a default domain and set it as the current domain on the group. If 3077 * the group already has a default domain it will be changed to the target_type. 3078 * When target_type is 0 the default domain is selected based on driver and 3079 * system preferences. 3080 */ 3081 static int iommu_setup_default_domain(struct iommu_group *group, 3082 int target_type) 3083 { 3084 struct iommu_domain *old_dom = group->default_domain; 3085 struct group_device *gdev; 3086 struct iommu_domain *dom; 3087 bool direct_failed; 3088 int req_type; 3089 int ret; 3090 3091 lockdep_assert_held(&group->mutex); 3092 3093 req_type = iommu_get_default_domain_type(group, target_type); 3094 if (req_type < 0) 3095 return -EINVAL; 3096 3097 dom = iommu_group_alloc_default_domain(group, req_type); 3098 if (IS_ERR(dom)) 3099 return PTR_ERR(dom); 3100 3101 if (group->default_domain == dom) 3102 return 0; 3103 3104 if (iommu_is_dma_domain(dom)) { 3105 ret = iommu_get_dma_cookie(dom); 3106 if (ret) { 3107 iommu_domain_free(dom); 3108 return ret; 3109 } 3110 } 3111 3112 /* 3113 * IOMMU_RESV_DIRECT and IOMMU_RESV_DIRECT_RELAXABLE regions must be 3114 * mapped before their device is attached, in order to guarantee 3115 * continuity with any FW activity 3116 */ 3117 direct_failed = false; 3118 for_each_group_device(group, gdev) { 3119 if (iommu_create_device_direct_mappings(dom, gdev->dev)) { 3120 direct_failed = true; 3121 dev_warn_once( 3122 gdev->dev->iommu->iommu_dev->dev, 3123 "IOMMU driver was not able to establish FW requested direct mapping."); 3124 } 3125 } 3126 3127 /* We must set default_domain early for __iommu_device_set_domain */ 3128 group->default_domain = dom; 3129 if (!group->domain) { 3130 /* 3131 * Drivers are not allowed to fail the first domain attach. 3132 * The only way to recover from this is to fail attaching the 3133 * iommu driver and call ops->release_device. Put the domain 3134 * in group->default_domain so it is freed after. 3135 */ 3136 ret = __iommu_group_set_domain_internal( 3137 group, dom, IOMMU_SET_DOMAIN_MUST_SUCCEED); 3138 if (WARN_ON(ret)) 3139 goto out_free_old; 3140 } else { 3141 ret = __iommu_group_set_domain(group, dom); 3142 if (ret) 3143 goto err_restore_def_domain; 3144 } 3145 3146 /* 3147 * Drivers are supposed to allow mappings to be installed in a domain 3148 * before device attachment, but some don't. Hack around this defect by 3149 * trying again after attaching. If this happens it means the device 3150 * will not continuously have the IOMMU_RESV_DIRECT map. 3151 */ 3152 if (direct_failed) { 3153 for_each_group_device(group, gdev) { 3154 ret = iommu_create_device_direct_mappings(dom, gdev->dev); 3155 if (ret) 3156 goto err_restore_domain; 3157 } 3158 } 3159 3160 out_free_old: 3161 if (old_dom) 3162 iommu_domain_free(old_dom); 3163 return ret; 3164 3165 err_restore_domain: 3166 if (old_dom) 3167 __iommu_group_set_domain_internal( 3168 group, old_dom, IOMMU_SET_DOMAIN_MUST_SUCCEED); 3169 err_restore_def_domain: 3170 if (old_dom) { 3171 iommu_domain_free(dom); 3172 group->default_domain = old_dom; 3173 } 3174 return ret; 3175 } 3176 3177 /* 3178 * Changing the default domain through sysfs requires the users to unbind the 3179 * drivers from the devices in the iommu group, except for a DMA -> DMA-FQ 3180 * transition. Return failure if this isn't met. 3181 * 3182 * We need to consider the race between this and the device release path. 3183 * group->mutex is used here to guarantee that the device release path 3184 * will not be entered at the same time. 3185 */ 3186 static ssize_t iommu_group_store_type(struct iommu_group *group, 3187 const char *buf, size_t count) 3188 { 3189 struct group_device *gdev; 3190 int ret, req_type; 3191 3192 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) 3193 return -EACCES; 3194 3195 if (WARN_ON(!group) || !group->default_domain) 3196 return -EINVAL; 3197 3198 if (sysfs_streq(buf, "identity")) 3199 req_type = IOMMU_DOMAIN_IDENTITY; 3200 else if (sysfs_streq(buf, "DMA")) 3201 req_type = IOMMU_DOMAIN_DMA; 3202 else if (sysfs_streq(buf, "DMA-FQ")) 3203 req_type = IOMMU_DOMAIN_DMA_FQ; 3204 else if (sysfs_streq(buf, "auto")) 3205 req_type = 0; 3206 else 3207 return -EINVAL; 3208 3209 mutex_lock(&group->mutex); 3210 /* We can bring up a flush queue without tearing down the domain. */ 3211 if (req_type == IOMMU_DOMAIN_DMA_FQ && 3212 group->default_domain->type == IOMMU_DOMAIN_DMA) { 3213 ret = iommu_dma_init_fq(group->default_domain); 3214 if (ret) 3215 goto out_unlock; 3216 3217 group->default_domain->type = IOMMU_DOMAIN_DMA_FQ; 3218 ret = count; 3219 goto out_unlock; 3220 } 3221 3222 /* Otherwise, ensure that device exists and no driver is bound. */ 3223 if (list_empty(&group->devices) || group->owner_cnt) { 3224 ret = -EPERM; 3225 goto out_unlock; 3226 } 3227 3228 ret = iommu_setup_default_domain(group, req_type); 3229 if (ret) 3230 goto out_unlock; 3231 3232 /* Make sure dma_ops is appropriatley set */ 3233 for_each_group_device(group, gdev) 3234 iommu_setup_dma_ops(gdev->dev, group->default_domain); 3235 3236 out_unlock: 3237 mutex_unlock(&group->mutex); 3238 return ret ?: count; 3239 } 3240 3241 /** 3242 * iommu_device_use_default_domain() - Device driver wants to handle device 3243 * DMA through the kernel DMA API. 3244 * @dev: The device. 3245 * 3246 * The device driver about to bind @dev wants to do DMA through the kernel 3247 * DMA API. Return 0 if it is allowed, otherwise an error. 3248 */ 3249 int iommu_device_use_default_domain(struct device *dev) 3250 { 3251 /* Caller is the driver core during the pre-probe path */ 3252 struct iommu_group *group = dev->iommu_group; 3253 int ret = 0; 3254 3255 if (!group) 3256 return 0; 3257 3258 mutex_lock(&group->mutex); 3259 /* We may race against bus_iommu_probe() finalising groups here */ 3260 if (!group->default_domain) { 3261 ret = -EPROBE_DEFER; 3262 goto unlock_out; 3263 } 3264 if (group->owner_cnt) { 3265 if (group->domain != group->default_domain || group->owner || 3266 !xa_empty(&group->pasid_array)) { 3267 ret = -EBUSY; 3268 goto unlock_out; 3269 } 3270 } 3271 3272 group->owner_cnt++; 3273 3274 unlock_out: 3275 mutex_unlock(&group->mutex); 3276 return ret; 3277 } 3278 3279 /** 3280 * iommu_device_unuse_default_domain() - Device driver stops handling device 3281 * DMA through the kernel DMA API. 3282 * @dev: The device. 3283 * 3284 * The device driver doesn't want to do DMA through kernel DMA API anymore. 3285 * It must be called after iommu_device_use_default_domain(). 3286 */ 3287 void iommu_device_unuse_default_domain(struct device *dev) 3288 { 3289 /* Caller is the driver core during the post-probe path */ 3290 struct iommu_group *group = dev->iommu_group; 3291 3292 if (!group) 3293 return; 3294 3295 mutex_lock(&group->mutex); 3296 if (!WARN_ON(!group->owner_cnt || !xa_empty(&group->pasid_array))) 3297 group->owner_cnt--; 3298 3299 mutex_unlock(&group->mutex); 3300 } 3301 3302 static int __iommu_group_alloc_blocking_domain(struct iommu_group *group) 3303 { 3304 struct device *dev = iommu_group_first_dev(group); 3305 const struct iommu_ops *ops = dev_iommu_ops(dev); 3306 struct iommu_domain *domain; 3307 3308 if (group->blocking_domain) 3309 return 0; 3310 3311 if (ops->blocked_domain) { 3312 group->blocking_domain = ops->blocked_domain; 3313 return 0; 3314 } 3315 3316 /* 3317 * For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED create an 3318 * empty PAGING domain instead. 3319 */ 3320 domain = iommu_paging_domain_alloc(dev); 3321 if (IS_ERR(domain)) 3322 return PTR_ERR(domain); 3323 group->blocking_domain = domain; 3324 return 0; 3325 } 3326 3327 static int __iommu_take_dma_ownership(struct iommu_group *group, void *owner) 3328 { 3329 int ret; 3330 3331 if ((group->domain && group->domain != group->default_domain) || 3332 !xa_empty(&group->pasid_array)) 3333 return -EBUSY; 3334 3335 ret = __iommu_group_alloc_blocking_domain(group); 3336 if (ret) 3337 return ret; 3338 ret = __iommu_group_set_domain(group, group->blocking_domain); 3339 if (ret) 3340 return ret; 3341 3342 group->owner = owner; 3343 group->owner_cnt++; 3344 return 0; 3345 } 3346 3347 /** 3348 * iommu_group_claim_dma_owner() - Set DMA ownership of a group 3349 * @group: The group. 3350 * @owner: Caller specified pointer. Used for exclusive ownership. 3351 * 3352 * This is to support backward compatibility for vfio which manages the dma 3353 * ownership in iommu_group level. New invocations on this interface should be 3354 * prohibited. Only a single owner may exist for a group. 3355 */ 3356 int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner) 3357 { 3358 int ret = 0; 3359 3360 if (WARN_ON(!owner)) 3361 return -EINVAL; 3362 3363 mutex_lock(&group->mutex); 3364 if (group->owner_cnt) { 3365 ret = -EPERM; 3366 goto unlock_out; 3367 } 3368 3369 ret = __iommu_take_dma_ownership(group, owner); 3370 unlock_out: 3371 mutex_unlock(&group->mutex); 3372 3373 return ret; 3374 } 3375 EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner); 3376 3377 /** 3378 * iommu_device_claim_dma_owner() - Set DMA ownership of a device 3379 * @dev: The device. 3380 * @owner: Caller specified pointer. Used for exclusive ownership. 3381 * 3382 * Claim the DMA ownership of a device. Multiple devices in the same group may 3383 * concurrently claim ownership if they present the same owner value. Returns 0 3384 * on success and error code on failure 3385 */ 3386 int iommu_device_claim_dma_owner(struct device *dev, void *owner) 3387 { 3388 /* Caller must be a probed driver on dev */ 3389 struct iommu_group *group = dev->iommu_group; 3390 int ret = 0; 3391 3392 if (WARN_ON(!owner)) 3393 return -EINVAL; 3394 3395 if (!group) 3396 return -ENODEV; 3397 3398 mutex_lock(&group->mutex); 3399 if (group->owner_cnt) { 3400 if (group->owner != owner) { 3401 ret = -EPERM; 3402 goto unlock_out; 3403 } 3404 group->owner_cnt++; 3405 goto unlock_out; 3406 } 3407 3408 ret = __iommu_take_dma_ownership(group, owner); 3409 unlock_out: 3410 mutex_unlock(&group->mutex); 3411 return ret; 3412 } 3413 EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner); 3414 3415 static void __iommu_release_dma_ownership(struct iommu_group *group) 3416 { 3417 if (WARN_ON(!group->owner_cnt || !group->owner || 3418 !xa_empty(&group->pasid_array))) 3419 return; 3420 3421 group->owner_cnt = 0; 3422 group->owner = NULL; 3423 __iommu_group_set_domain_nofail(group, group->default_domain); 3424 } 3425 3426 /** 3427 * iommu_group_release_dma_owner() - Release DMA ownership of a group 3428 * @group: The group 3429 * 3430 * Release the DMA ownership claimed by iommu_group_claim_dma_owner(). 3431 */ 3432 void iommu_group_release_dma_owner(struct iommu_group *group) 3433 { 3434 mutex_lock(&group->mutex); 3435 __iommu_release_dma_ownership(group); 3436 mutex_unlock(&group->mutex); 3437 } 3438 EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner); 3439 3440 /** 3441 * iommu_device_release_dma_owner() - Release DMA ownership of a device 3442 * @dev: The device. 3443 * 3444 * Release the DMA ownership claimed by iommu_device_claim_dma_owner(). 3445 */ 3446 void iommu_device_release_dma_owner(struct device *dev) 3447 { 3448 /* Caller must be a probed driver on dev */ 3449 struct iommu_group *group = dev->iommu_group; 3450 3451 mutex_lock(&group->mutex); 3452 if (group->owner_cnt > 1) 3453 group->owner_cnt--; 3454 else 3455 __iommu_release_dma_ownership(group); 3456 mutex_unlock(&group->mutex); 3457 } 3458 EXPORT_SYMBOL_GPL(iommu_device_release_dma_owner); 3459 3460 /** 3461 * iommu_group_dma_owner_claimed() - Query group dma ownership status 3462 * @group: The group. 3463 * 3464 * This provides status query on a given group. It is racy and only for 3465 * non-binding status reporting. 3466 */ 3467 bool iommu_group_dma_owner_claimed(struct iommu_group *group) 3468 { 3469 unsigned int user; 3470 3471 mutex_lock(&group->mutex); 3472 user = group->owner_cnt; 3473 mutex_unlock(&group->mutex); 3474 3475 return user; 3476 } 3477 EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed); 3478 3479 static void iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid, 3480 struct iommu_domain *domain) 3481 { 3482 const struct iommu_ops *ops = dev_iommu_ops(dev); 3483 struct iommu_domain *blocked_domain = ops->blocked_domain; 3484 3485 WARN_ON(blocked_domain->ops->set_dev_pasid(blocked_domain, 3486 dev, pasid, domain)); 3487 } 3488 3489 static int __iommu_set_group_pasid(struct iommu_domain *domain, 3490 struct iommu_group *group, ioasid_t pasid, 3491 struct iommu_domain *old) 3492 { 3493 struct group_device *device, *last_gdev; 3494 int ret; 3495 3496 for_each_group_device(group, device) { 3497 if (device->dev->iommu->max_pasids > 0) { 3498 ret = domain->ops->set_dev_pasid(domain, device->dev, 3499 pasid, old); 3500 if (ret) 3501 goto err_revert; 3502 } 3503 } 3504 3505 return 0; 3506 3507 err_revert: 3508 last_gdev = device; 3509 for_each_group_device(group, device) { 3510 if (device == last_gdev) 3511 break; 3512 if (device->dev->iommu->max_pasids > 0) { 3513 /* 3514 * If no old domain, undo the succeeded devices/pasid. 3515 * Otherwise, rollback the succeeded devices/pasid to 3516 * the old domain. And it is a driver bug to fail 3517 * attaching with a previously good domain. 3518 */ 3519 if (!old || 3520 WARN_ON(old->ops->set_dev_pasid(old, device->dev, 3521 pasid, domain))) 3522 iommu_remove_dev_pasid(device->dev, pasid, domain); 3523 } 3524 } 3525 return ret; 3526 } 3527 3528 static void __iommu_remove_group_pasid(struct iommu_group *group, 3529 ioasid_t pasid, 3530 struct iommu_domain *domain) 3531 { 3532 struct group_device *device; 3533 3534 for_each_group_device(group, device) { 3535 if (device->dev->iommu->max_pasids > 0) 3536 iommu_remove_dev_pasid(device->dev, pasid, domain); 3537 } 3538 } 3539 3540 /* 3541 * iommu_attach_device_pasid() - Attach a domain to pasid of device 3542 * @domain: the iommu domain. 3543 * @dev: the attached device. 3544 * @pasid: the pasid of the device. 3545 * @handle: the attach handle. 3546 * 3547 * Caller should always provide a new handle to avoid race with the paths 3548 * that have lockless reference to handle if it intends to pass a valid handle. 3549 * 3550 * Return: 0 on success, or an error. 3551 */ 3552 int iommu_attach_device_pasid(struct iommu_domain *domain, 3553 struct device *dev, ioasid_t pasid, 3554 struct iommu_attach_handle *handle) 3555 { 3556 /* Caller must be a probed driver on dev */ 3557 struct iommu_group *group = dev->iommu_group; 3558 struct group_device *device; 3559 const struct iommu_ops *ops; 3560 void *entry; 3561 int ret; 3562 3563 if (!group) 3564 return -ENODEV; 3565 3566 ops = dev_iommu_ops(dev); 3567 3568 if (!domain->ops->set_dev_pasid || 3569 !ops->blocked_domain || 3570 !ops->blocked_domain->ops->set_dev_pasid) 3571 return -EOPNOTSUPP; 3572 3573 if (!domain_iommu_ops_compatible(ops, domain) || 3574 pasid == IOMMU_NO_PASID) 3575 return -EINVAL; 3576 3577 mutex_lock(&group->mutex); 3578 3579 /* 3580 * This is a concurrent attach during a device reset. Reject it until 3581 * pci_dev_reset_iommu_done() attaches the device to group->domain. 3582 */ 3583 if (group->resetting_domain) { 3584 ret = -EBUSY; 3585 goto out_unlock; 3586 } 3587 3588 for_each_group_device(group, device) { 3589 /* 3590 * Skip PASID validation for devices without PASID support 3591 * (max_pasids = 0). These devices cannot issue transactions 3592 * with PASID, so they don't affect group's PASID usage. 3593 */ 3594 if ((device->dev->iommu->max_pasids > 0) && 3595 (pasid >= device->dev->iommu->max_pasids)) { 3596 ret = -EINVAL; 3597 goto out_unlock; 3598 } 3599 } 3600 3601 entry = iommu_make_pasid_array_entry(domain, handle); 3602 3603 /* 3604 * Entry present is a failure case. Use xa_insert() instead of 3605 * xa_reserve(). 3606 */ 3607 ret = xa_insert(&group->pasid_array, pasid, XA_ZERO_ENTRY, GFP_KERNEL); 3608 if (ret) 3609 goto out_unlock; 3610 3611 ret = __iommu_set_group_pasid(domain, group, pasid, NULL); 3612 if (ret) { 3613 xa_release(&group->pasid_array, pasid); 3614 goto out_unlock; 3615 } 3616 3617 /* 3618 * The xa_insert() above reserved the memory, and the group->mutex is 3619 * held, this cannot fail. The new domain cannot be visible until the 3620 * operation succeeds as we cannot tolerate PRIs becoming concurrently 3621 * queued and then failing attach. 3622 */ 3623 WARN_ON(xa_is_err(xa_store(&group->pasid_array, 3624 pasid, entry, GFP_KERNEL))); 3625 3626 out_unlock: 3627 mutex_unlock(&group->mutex); 3628 return ret; 3629 } 3630 EXPORT_SYMBOL_GPL(iommu_attach_device_pasid); 3631 3632 /** 3633 * iommu_replace_device_pasid - Replace the domain that a specific pasid 3634 * of the device is attached to 3635 * @domain: the new iommu domain 3636 * @dev: the attached device. 3637 * @pasid: the pasid of the device. 3638 * @handle: the attach handle. 3639 * 3640 * This API allows the pasid to switch domains. The @pasid should have been 3641 * attached. Otherwise, this fails. The pasid will keep the old configuration 3642 * if replacement failed. 3643 * 3644 * Caller should always provide a new handle to avoid race with the paths 3645 * that have lockless reference to handle if it intends to pass a valid handle. 3646 * 3647 * Return 0 on success, or an error. 3648 */ 3649 int iommu_replace_device_pasid(struct iommu_domain *domain, 3650 struct device *dev, ioasid_t pasid, 3651 struct iommu_attach_handle *handle) 3652 { 3653 /* Caller must be a probed driver on dev */ 3654 struct iommu_group *group = dev->iommu_group; 3655 struct iommu_attach_handle *entry; 3656 struct iommu_domain *curr_domain; 3657 void *curr; 3658 int ret; 3659 3660 if (!group) 3661 return -ENODEV; 3662 3663 if (!domain->ops->set_dev_pasid) 3664 return -EOPNOTSUPP; 3665 3666 if (!domain_iommu_ops_compatible(dev_iommu_ops(dev), domain) || 3667 pasid == IOMMU_NO_PASID || !handle) 3668 return -EINVAL; 3669 3670 mutex_lock(&group->mutex); 3671 3672 /* 3673 * This is a concurrent attach during a device reset. Reject it until 3674 * pci_dev_reset_iommu_done() attaches the device to group->domain. 3675 */ 3676 if (group->resetting_domain) { 3677 ret = -EBUSY; 3678 goto out_unlock; 3679 } 3680 3681 entry = iommu_make_pasid_array_entry(domain, handle); 3682 curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, 3683 XA_ZERO_ENTRY, GFP_KERNEL); 3684 if (xa_is_err(curr)) { 3685 ret = xa_err(curr); 3686 goto out_unlock; 3687 } 3688 3689 /* 3690 * No domain (with or without handle) attached, hence not 3691 * a replace case. 3692 */ 3693 if (!curr) { 3694 xa_release(&group->pasid_array, pasid); 3695 ret = -EINVAL; 3696 goto out_unlock; 3697 } 3698 3699 /* 3700 * Reusing handle is problematic as there are paths that refers 3701 * the handle without lock. To avoid race, reject the callers that 3702 * attempt it. 3703 */ 3704 if (curr == entry) { 3705 WARN_ON(1); 3706 ret = -EINVAL; 3707 goto out_unlock; 3708 } 3709 3710 curr_domain = pasid_array_entry_to_domain(curr); 3711 ret = 0; 3712 3713 if (curr_domain != domain) { 3714 ret = __iommu_set_group_pasid(domain, group, 3715 pasid, curr_domain); 3716 if (ret) 3717 goto out_unlock; 3718 } 3719 3720 /* 3721 * The above xa_cmpxchg() reserved the memory, and the 3722 * group->mutex is held, this cannot fail. 3723 */ 3724 WARN_ON(xa_is_err(xa_store(&group->pasid_array, 3725 pasid, entry, GFP_KERNEL))); 3726 3727 out_unlock: 3728 mutex_unlock(&group->mutex); 3729 return ret; 3730 } 3731 EXPORT_SYMBOL_NS_GPL(iommu_replace_device_pasid, "IOMMUFD_INTERNAL"); 3732 3733 /* 3734 * iommu_detach_device_pasid() - Detach the domain from pasid of device 3735 * @domain: the iommu domain. 3736 * @dev: the attached device. 3737 * @pasid: the pasid of the device. 3738 * 3739 * The @domain must have been attached to @pasid of the @dev with 3740 * iommu_attach_device_pasid(). 3741 */ 3742 void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, 3743 ioasid_t pasid) 3744 { 3745 /* Caller must be a probed driver on dev */ 3746 struct iommu_group *group = dev->iommu_group; 3747 3748 mutex_lock(&group->mutex); 3749 __iommu_remove_group_pasid(group, pasid, domain); 3750 xa_erase(&group->pasid_array, pasid); 3751 mutex_unlock(&group->mutex); 3752 } 3753 EXPORT_SYMBOL_GPL(iommu_detach_device_pasid); 3754 3755 ioasid_t iommu_alloc_global_pasid(struct device *dev) 3756 { 3757 int ret; 3758 3759 /* max_pasids == 0 means that the device does not support PASID */ 3760 if (!dev->iommu->max_pasids) 3761 return IOMMU_PASID_INVALID; 3762 3763 /* 3764 * max_pasids is set up by vendor driver based on number of PASID bits 3765 * supported but the IDA allocation is inclusive. 3766 */ 3767 ret = ida_alloc_range(&iommu_global_pasid_ida, IOMMU_FIRST_GLOBAL_PASID, 3768 dev->iommu->max_pasids - 1, GFP_KERNEL); 3769 return ret < 0 ? IOMMU_PASID_INVALID : ret; 3770 } 3771 EXPORT_SYMBOL_GPL(iommu_alloc_global_pasid); 3772 3773 void iommu_free_global_pasid(ioasid_t pasid) 3774 { 3775 if (WARN_ON(pasid == IOMMU_PASID_INVALID)) 3776 return; 3777 3778 ida_free(&iommu_global_pasid_ida, pasid); 3779 } 3780 EXPORT_SYMBOL_GPL(iommu_free_global_pasid); 3781 3782 /** 3783 * iommu_attach_handle_get - Return the attach handle 3784 * @group: the iommu group that domain was attached to 3785 * @pasid: the pasid within the group 3786 * @type: matched domain type, 0 for any match 3787 * 3788 * Return handle or ERR_PTR(-ENOENT) on none, ERR_PTR(-EBUSY) on mismatch. 3789 * 3790 * Return the attach handle to the caller. The life cycle of an iommu attach 3791 * handle is from the time when the domain is attached to the time when the 3792 * domain is detached. Callers are required to synchronize the call of 3793 * iommu_attach_handle_get() with domain attachment and detachment. The attach 3794 * handle can only be used during its life cycle. 3795 */ 3796 struct iommu_attach_handle * 3797 iommu_attach_handle_get(struct iommu_group *group, ioasid_t pasid, unsigned int type) 3798 { 3799 struct iommu_attach_handle *handle; 3800 void *entry; 3801 3802 xa_lock(&group->pasid_array); 3803 entry = xa_load(&group->pasid_array, pasid); 3804 if (!entry || xa_pointer_tag(entry) != IOMMU_PASID_ARRAY_HANDLE) { 3805 handle = ERR_PTR(-ENOENT); 3806 } else { 3807 handle = xa_untag_pointer(entry); 3808 if (type && handle->domain->type != type) 3809 handle = ERR_PTR(-EBUSY); 3810 } 3811 xa_unlock(&group->pasid_array); 3812 3813 return handle; 3814 } 3815 EXPORT_SYMBOL_NS_GPL(iommu_attach_handle_get, "IOMMUFD_INTERNAL"); 3816 3817 /** 3818 * iommu_attach_group_handle - Attach an IOMMU domain to an IOMMU group 3819 * @domain: IOMMU domain to attach 3820 * @group: IOMMU group that will be attached 3821 * @handle: attach handle 3822 * 3823 * Returns 0 on success and error code on failure. 3824 * 3825 * This is a variant of iommu_attach_group(). It allows the caller to provide 3826 * an attach handle and use it when the domain is attached. This is currently 3827 * used by IOMMUFD to deliver the I/O page faults. 3828 * 3829 * Caller should always provide a new handle to avoid race with the paths 3830 * that have lockless reference to handle. 3831 */ 3832 int iommu_attach_group_handle(struct iommu_domain *domain, 3833 struct iommu_group *group, 3834 struct iommu_attach_handle *handle) 3835 { 3836 void *entry; 3837 int ret; 3838 3839 if (!handle) 3840 return -EINVAL; 3841 3842 mutex_lock(&group->mutex); 3843 entry = iommu_make_pasid_array_entry(domain, handle); 3844 ret = xa_insert(&group->pasid_array, 3845 IOMMU_NO_PASID, XA_ZERO_ENTRY, GFP_KERNEL); 3846 if (ret) 3847 goto out_unlock; 3848 3849 ret = __iommu_attach_group(domain, group); 3850 if (ret) { 3851 xa_release(&group->pasid_array, IOMMU_NO_PASID); 3852 goto out_unlock; 3853 } 3854 3855 /* 3856 * The xa_insert() above reserved the memory, and the group->mutex is 3857 * held, this cannot fail. The new domain cannot be visible until the 3858 * operation succeeds as we cannot tolerate PRIs becoming concurrently 3859 * queued and then failing attach. 3860 */ 3861 WARN_ON(xa_is_err(xa_store(&group->pasid_array, 3862 IOMMU_NO_PASID, entry, GFP_KERNEL))); 3863 3864 out_unlock: 3865 mutex_unlock(&group->mutex); 3866 return ret; 3867 } 3868 EXPORT_SYMBOL_NS_GPL(iommu_attach_group_handle, "IOMMUFD_INTERNAL"); 3869 3870 /** 3871 * iommu_detach_group_handle - Detach an IOMMU domain from an IOMMU group 3872 * @domain: IOMMU domain to attach 3873 * @group: IOMMU group that will be attached 3874 * 3875 * Detach the specified IOMMU domain from the specified IOMMU group. 3876 * It must be used in conjunction with iommu_attach_group_handle(). 3877 */ 3878 void iommu_detach_group_handle(struct iommu_domain *domain, 3879 struct iommu_group *group) 3880 { 3881 mutex_lock(&group->mutex); 3882 __iommu_group_set_core_domain(group); 3883 xa_erase(&group->pasid_array, IOMMU_NO_PASID); 3884 mutex_unlock(&group->mutex); 3885 } 3886 EXPORT_SYMBOL_NS_GPL(iommu_detach_group_handle, "IOMMUFD_INTERNAL"); 3887 3888 /** 3889 * iommu_replace_group_handle - replace the domain that a group is attached to 3890 * @group: IOMMU group that will be attached to the new domain 3891 * @new_domain: new IOMMU domain to replace with 3892 * @handle: attach handle 3893 * 3894 * This API allows the group to switch domains without being forced to go to 3895 * the blocking domain in-between. It allows the caller to provide an attach 3896 * handle for the new domain and use it when the domain is attached. 3897 * 3898 * If the currently attached domain is a core domain (e.g. a default_domain), 3899 * it will act just like the iommu_attach_group_handle(). 3900 * 3901 * Caller should always provide a new handle to avoid race with the paths 3902 * that have lockless reference to handle. 3903 */ 3904 int iommu_replace_group_handle(struct iommu_group *group, 3905 struct iommu_domain *new_domain, 3906 struct iommu_attach_handle *handle) 3907 { 3908 void *curr, *entry; 3909 int ret; 3910 3911 if (!new_domain || !handle) 3912 return -EINVAL; 3913 3914 mutex_lock(&group->mutex); 3915 entry = iommu_make_pasid_array_entry(new_domain, handle); 3916 ret = xa_reserve(&group->pasid_array, IOMMU_NO_PASID, GFP_KERNEL); 3917 if (ret) 3918 goto err_unlock; 3919 3920 ret = __iommu_group_set_domain(group, new_domain); 3921 if (ret) 3922 goto err_release; 3923 3924 curr = xa_store(&group->pasid_array, IOMMU_NO_PASID, entry, GFP_KERNEL); 3925 WARN_ON(xa_is_err(curr)); 3926 3927 mutex_unlock(&group->mutex); 3928 3929 return 0; 3930 err_release: 3931 xa_release(&group->pasid_array, IOMMU_NO_PASID); 3932 err_unlock: 3933 mutex_unlock(&group->mutex); 3934 return ret; 3935 } 3936 EXPORT_SYMBOL_NS_GPL(iommu_replace_group_handle, "IOMMUFD_INTERNAL"); 3937 3938 /** 3939 * pci_dev_reset_iommu_prepare() - Block IOMMU to prepare for a PCI device reset 3940 * @pdev: PCI device that is going to enter a reset routine 3941 * 3942 * The PCIe r6.0, sec 10.3.1 IMPLEMENTATION NOTE recommends to disable and block 3943 * ATS before initiating a reset. This means that a PCIe device during the reset 3944 * routine wants to block any IOMMU activity: translation and ATS invalidation. 3945 * 3946 * This function attaches the device's RID/PASID(s) the group->blocking_domain, 3947 * setting the group->resetting_domain. This allows the IOMMU driver pausing any 3948 * IOMMU activity while leaving the group->domain pointer intact. Later when the 3949 * reset is finished, pci_dev_reset_iommu_done() can restore everything. 3950 * 3951 * Caller must use pci_dev_reset_iommu_prepare() with pci_dev_reset_iommu_done() 3952 * before/after the core-level reset routine, to unset the resetting_domain. 3953 * 3954 * Return: 0 on success or negative error code if the preparation failed. 3955 * 3956 * These two functions are designed to be used by PCI reset functions that would 3957 * not invoke any racy iommu_release_device(), since PCI sysfs node gets removed 3958 * before it notifies with a BUS_NOTIFY_REMOVED_DEVICE. When using them in other 3959 * case, callers must ensure there will be no racy iommu_release_device() call, 3960 * which otherwise would UAF the dev->iommu_group pointer. 3961 */ 3962 int pci_dev_reset_iommu_prepare(struct pci_dev *pdev) 3963 { 3964 struct iommu_group *group = pdev->dev.iommu_group; 3965 unsigned long pasid; 3966 void *entry; 3967 int ret; 3968 3969 if (!pci_ats_supported(pdev) || !dev_has_iommu(&pdev->dev)) 3970 return 0; 3971 3972 guard(mutex)(&group->mutex); 3973 3974 /* Re-entry is not allowed */ 3975 if (WARN_ON(group->resetting_domain)) 3976 return -EBUSY; 3977 3978 ret = __iommu_group_alloc_blocking_domain(group); 3979 if (ret) 3980 return ret; 3981 3982 /* Stage RID domain at blocking_domain while retaining group->domain */ 3983 if (group->domain != group->blocking_domain) { 3984 ret = __iommu_attach_device(group->blocking_domain, &pdev->dev, 3985 group->domain); 3986 if (ret) 3987 return ret; 3988 } 3989 3990 /* 3991 * Stage PASID domains at blocking_domain while retaining pasid_array. 3992 * 3993 * The pasid_array is mostly fenced by group->mutex, except one reader 3994 * in iommu_attach_handle_get(), so it's safe to read without xa_lock. 3995 */ 3996 xa_for_each_start(&group->pasid_array, pasid, entry, 1) 3997 iommu_remove_dev_pasid(&pdev->dev, pasid, 3998 pasid_array_entry_to_domain(entry)); 3999 4000 group->resetting_domain = group->blocking_domain; 4001 return ret; 4002 } 4003 EXPORT_SYMBOL_GPL(pci_dev_reset_iommu_prepare); 4004 4005 /** 4006 * pci_dev_reset_iommu_done() - Restore IOMMU after a PCI device reset is done 4007 * @pdev: PCI device that has finished a reset routine 4008 * 4009 * After a PCIe device finishes a reset routine, it wants to restore its IOMMU 4010 * IOMMU activity, including new translation as well as cache invalidation, by 4011 * re-attaching all RID/PASID of the device's back to the domains retained in 4012 * the core-level structure. 4013 * 4014 * Caller must pair it with a successful pci_dev_reset_iommu_prepare(). 4015 * 4016 * Note that, although unlikely, there is a risk that re-attaching domains might 4017 * fail due to some unexpected happening like OOM. 4018 */ 4019 void pci_dev_reset_iommu_done(struct pci_dev *pdev) 4020 { 4021 struct iommu_group *group = pdev->dev.iommu_group; 4022 unsigned long pasid; 4023 void *entry; 4024 4025 if (!pci_ats_supported(pdev) || !dev_has_iommu(&pdev->dev)) 4026 return; 4027 4028 guard(mutex)(&group->mutex); 4029 4030 /* pci_dev_reset_iommu_prepare() was bypassed for the device */ 4031 if (!group->resetting_domain) 4032 return; 4033 4034 /* pci_dev_reset_iommu_prepare() was not successfully called */ 4035 if (WARN_ON(!group->blocking_domain)) 4036 return; 4037 4038 /* Re-attach RID domain back to group->domain */ 4039 if (group->domain != group->blocking_domain) { 4040 WARN_ON(__iommu_attach_device(group->domain, &pdev->dev, 4041 group->blocking_domain)); 4042 } 4043 4044 /* 4045 * Re-attach PASID domains back to the domains retained in pasid_array. 4046 * 4047 * The pasid_array is mostly fenced by group->mutex, except one reader 4048 * in iommu_attach_handle_get(), so it's safe to read without xa_lock. 4049 */ 4050 xa_for_each_start(&group->pasid_array, pasid, entry, 1) 4051 WARN_ON(__iommu_set_group_pasid( 4052 pasid_array_entry_to_domain(entry), group, pasid, 4053 group->blocking_domain)); 4054 4055 group->resetting_domain = NULL; 4056 } 4057 EXPORT_SYMBOL_GPL(pci_dev_reset_iommu_done); 4058 4059 #if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU) 4060 /** 4061 * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU domain 4062 * @desc: MSI descriptor, will store the MSI page 4063 * @msi_addr: MSI target address to be mapped 4064 * 4065 * The implementation of sw_msi() should take msi_addr and map it to 4066 * an IOVA in the domain and call msi_desc_set_iommu_msi_iova() with the 4067 * mapping information. 4068 * 4069 * Return: 0 on success or negative error code if the mapping failed. 4070 */ 4071 int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) 4072 { 4073 struct device *dev = msi_desc_to_dev(desc); 4074 struct iommu_group *group = dev->iommu_group; 4075 int ret = 0; 4076 4077 if (!group) 4078 return 0; 4079 4080 mutex_lock(&group->mutex); 4081 /* An IDENTITY domain must pass through */ 4082 if (group->domain && group->domain->type != IOMMU_DOMAIN_IDENTITY) { 4083 switch (group->domain->cookie_type) { 4084 case IOMMU_COOKIE_DMA_MSI: 4085 case IOMMU_COOKIE_DMA_IOVA: 4086 ret = iommu_dma_sw_msi(group->domain, desc, msi_addr); 4087 break; 4088 case IOMMU_COOKIE_IOMMUFD: 4089 ret = iommufd_sw_msi(group->domain, desc, msi_addr); 4090 break; 4091 default: 4092 ret = -EOPNOTSUPP; 4093 break; 4094 } 4095 } 4096 mutex_unlock(&group->mutex); 4097 return ret; 4098 } 4099 #endif /* CONFIG_IRQ_MSI_IOMMU */ 4100