1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright © 2006-2014 Intel Corporation. 4 * 5 * Authors: David Woodhouse <dwmw2@infradead.org>, 6 * Ashok Raj <ashok.raj@intel.com>, 7 * Shaohua Li <shaohua.li@intel.com>, 8 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>, 9 * Fenghua Yu <fenghua.yu@intel.com> 10 * Joerg Roedel <jroedel@suse.de> 11 */ 12 13 #define pr_fmt(fmt) "DMAR: " fmt 14 #define dev_fmt(fmt) pr_fmt(fmt) 15 16 #include <linux/crash_dump.h> 17 #include <linux/dma-direct.h> 18 #include <linux/dmi.h> 19 #include <linux/memory.h> 20 #include <linux/pci.h> 21 #include <linux/pci-ats.h> 22 #include <linux/spinlock.h> 23 #include <linux/syscore_ops.h> 24 #include <linux/tboot.h> 25 #include <uapi/linux/iommufd.h> 26 27 #include "iommu.h" 28 #include "../dma-iommu.h" 29 #include "../irq_remapping.h" 30 #include "pasid.h" 31 #include "cap_audit.h" 32 #include "perfmon.h" 33 34 #define ROOT_SIZE VTD_PAGE_SIZE 35 #define CONTEXT_SIZE VTD_PAGE_SIZE 36 37 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) 38 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB) 39 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) 40 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) 41 42 #define IOAPIC_RANGE_START (0xfee00000) 43 #define IOAPIC_RANGE_END (0xfeefffff) 44 #define IOVA_START_ADDR (0x1000) 45 46 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57 47 48 #define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << ((gaw) - VTD_PAGE_SHIFT)) - 1) 49 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << (gaw)) - 1) 50 51 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR 52 to match. That way, we can use 'unsigned long' for PFNs with impunity. */ 53 #define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \ 54 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1)) 55 #define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT) 56 57 /* IO virtual address start page frame number */ 58 #define IOVA_START_PFN (1) 59 60 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) 61 62 static void __init check_tylersburg_isoch(void); 63 static int rwbf_quirk; 64 65 /* 66 * set to 1 to panic kernel if can't successfully enable VT-d 67 * (used when kernel is launched w/ TXT) 68 */ 69 static int force_on = 0; 70 static int intel_iommu_tboot_noforce; 71 static int no_platform_optin; 72 73 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry)) 74 75 /* 76 * Take a root_entry and return the Lower Context Table Pointer (LCTP) 77 * if marked present. 78 */ 79 static phys_addr_t root_entry_lctp(struct root_entry *re) 80 { 81 if (!(re->lo & 1)) 82 return 0; 83 84 return re->lo & VTD_PAGE_MASK; 85 } 86 87 /* 88 * Take a root_entry and return the Upper Context Table Pointer (UCTP) 89 * if marked present. 90 */ 91 static phys_addr_t root_entry_uctp(struct root_entry *re) 92 { 93 if (!(re->hi & 1)) 94 return 0; 95 96 return re->hi & VTD_PAGE_MASK; 97 } 98 99 static int device_rid_cmp_key(const void *key, const struct rb_node *node) 100 { 101 struct device_domain_info *info = 102 rb_entry(node, struct device_domain_info, node); 103 const u16 *rid_lhs = key; 104 105 if (*rid_lhs < PCI_DEVID(info->bus, info->devfn)) 106 return -1; 107 108 if (*rid_lhs > PCI_DEVID(info->bus, info->devfn)) 109 return 1; 110 111 return 0; 112 } 113 114 static int device_rid_cmp(struct rb_node *lhs, const struct rb_node *rhs) 115 { 116 struct device_domain_info *info = 117 rb_entry(lhs, struct device_domain_info, node); 118 u16 key = PCI_DEVID(info->bus, info->devfn); 119 120 return device_rid_cmp_key(&key, rhs); 121 } 122 123 /* 124 * Looks up an IOMMU-probed device using its source ID. 125 * 126 * Returns the pointer to the device if there is a match. Otherwise, 127 * returns NULL. 128 * 129 * Note that this helper doesn't guarantee that the device won't be 130 * released by the iommu subsystem after being returned. The caller 131 * should use its own synchronization mechanism to avoid the device 132 * being released during its use if its possibly the case. 133 */ 134 struct device *device_rbtree_find(struct intel_iommu *iommu, u16 rid) 135 { 136 struct device_domain_info *info = NULL; 137 struct rb_node *node; 138 unsigned long flags; 139 140 spin_lock_irqsave(&iommu->device_rbtree_lock, flags); 141 node = rb_find(&rid, &iommu->device_rbtree, device_rid_cmp_key); 142 if (node) 143 info = rb_entry(node, struct device_domain_info, node); 144 spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags); 145 146 return info ? info->dev : NULL; 147 } 148 149 static int device_rbtree_insert(struct intel_iommu *iommu, 150 struct device_domain_info *info) 151 { 152 struct rb_node *curr; 153 unsigned long flags; 154 155 spin_lock_irqsave(&iommu->device_rbtree_lock, flags); 156 curr = rb_find_add(&info->node, &iommu->device_rbtree, device_rid_cmp); 157 spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags); 158 if (WARN_ON(curr)) 159 return -EEXIST; 160 161 return 0; 162 } 163 164 static void device_rbtree_remove(struct device_domain_info *info) 165 { 166 struct intel_iommu *iommu = info->iommu; 167 unsigned long flags; 168 169 spin_lock_irqsave(&iommu->device_rbtree_lock, flags); 170 rb_erase(&info->node, &iommu->device_rbtree); 171 spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags); 172 } 173 174 /* 175 * This domain is a statically identity mapping domain. 176 * 1. This domain creats a static 1:1 mapping to all usable memory. 177 * 2. It maps to each iommu if successful. 178 * 3. Each iommu mapps to this domain if successful. 179 */ 180 static struct dmar_domain *si_domain; 181 static int hw_pass_through = 1; 182 183 struct dmar_rmrr_unit { 184 struct list_head list; /* list of rmrr units */ 185 struct acpi_dmar_header *hdr; /* ACPI header */ 186 u64 base_address; /* reserved base address*/ 187 u64 end_address; /* reserved end address */ 188 struct dmar_dev_scope *devices; /* target devices */ 189 int devices_cnt; /* target device count */ 190 }; 191 192 struct dmar_atsr_unit { 193 struct list_head list; /* list of ATSR units */ 194 struct acpi_dmar_header *hdr; /* ACPI header */ 195 struct dmar_dev_scope *devices; /* target devices */ 196 int devices_cnt; /* target device count */ 197 u8 include_all:1; /* include all ports */ 198 }; 199 200 struct dmar_satc_unit { 201 struct list_head list; /* list of SATC units */ 202 struct acpi_dmar_header *hdr; /* ACPI header */ 203 struct dmar_dev_scope *devices; /* target devices */ 204 struct intel_iommu *iommu; /* the corresponding iommu */ 205 int devices_cnt; /* target device count */ 206 u8 atc_required:1; /* ATS is required */ 207 }; 208 209 static LIST_HEAD(dmar_atsr_units); 210 static LIST_HEAD(dmar_rmrr_units); 211 static LIST_HEAD(dmar_satc_units); 212 213 #define for_each_rmrr_units(rmrr) \ 214 list_for_each_entry(rmrr, &dmar_rmrr_units, list) 215 216 static void intel_iommu_domain_free(struct iommu_domain *domain); 217 218 int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON); 219 int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON); 220 221 int intel_iommu_enabled = 0; 222 EXPORT_SYMBOL_GPL(intel_iommu_enabled); 223 224 static int dmar_map_gfx = 1; 225 static int intel_iommu_superpage = 1; 226 static int iommu_identity_mapping; 227 static int iommu_skip_te_disable; 228 229 #define IDENTMAP_GFX 2 230 #define IDENTMAP_AZALIA 4 231 232 const struct iommu_ops intel_iommu_ops; 233 static const struct iommu_dirty_ops intel_dirty_ops; 234 235 static bool translation_pre_enabled(struct intel_iommu *iommu) 236 { 237 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED); 238 } 239 240 static void clear_translation_pre_enabled(struct intel_iommu *iommu) 241 { 242 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED; 243 } 244 245 static void init_translation_status(struct intel_iommu *iommu) 246 { 247 u32 gsts; 248 249 gsts = readl(iommu->reg + DMAR_GSTS_REG); 250 if (gsts & DMA_GSTS_TES) 251 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED; 252 } 253 254 static int __init intel_iommu_setup(char *str) 255 { 256 if (!str) 257 return -EINVAL; 258 259 while (*str) { 260 if (!strncmp(str, "on", 2)) { 261 dmar_disabled = 0; 262 pr_info("IOMMU enabled\n"); 263 } else if (!strncmp(str, "off", 3)) { 264 dmar_disabled = 1; 265 no_platform_optin = 1; 266 pr_info("IOMMU disabled\n"); 267 } else if (!strncmp(str, "igfx_off", 8)) { 268 dmar_map_gfx = 0; 269 pr_info("Disable GFX device mapping\n"); 270 } else if (!strncmp(str, "forcedac", 8)) { 271 pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n"); 272 iommu_dma_forcedac = true; 273 } else if (!strncmp(str, "strict", 6)) { 274 pr_warn("intel_iommu=strict deprecated; use iommu.strict=1 instead\n"); 275 iommu_set_dma_strict(); 276 } else if (!strncmp(str, "sp_off", 6)) { 277 pr_info("Disable supported super page\n"); 278 intel_iommu_superpage = 0; 279 } else if (!strncmp(str, "sm_on", 5)) { 280 pr_info("Enable scalable mode if hardware supports\n"); 281 intel_iommu_sm = 1; 282 } else if (!strncmp(str, "sm_off", 6)) { 283 pr_info("Scalable mode is disallowed\n"); 284 intel_iommu_sm = 0; 285 } else if (!strncmp(str, "tboot_noforce", 13)) { 286 pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n"); 287 intel_iommu_tboot_noforce = 1; 288 } else { 289 pr_notice("Unknown option - '%s'\n", str); 290 } 291 292 str += strcspn(str, ","); 293 while (*str == ',') 294 str++; 295 } 296 297 return 1; 298 } 299 __setup("intel_iommu=", intel_iommu_setup); 300 301 void *alloc_pgtable_page(int node, gfp_t gfp) 302 { 303 struct page *page; 304 void *vaddr = NULL; 305 306 page = alloc_pages_node(node, gfp | __GFP_ZERO, 0); 307 if (page) 308 vaddr = page_address(page); 309 return vaddr; 310 } 311 312 void free_pgtable_page(void *vaddr) 313 { 314 free_page((unsigned long)vaddr); 315 } 316 317 static int domain_type_is_si(struct dmar_domain *domain) 318 { 319 return domain->domain.type == IOMMU_DOMAIN_IDENTITY; 320 } 321 322 static int domain_pfn_supported(struct dmar_domain *domain, unsigned long pfn) 323 { 324 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; 325 326 return !(addr_width < BITS_PER_LONG && pfn >> addr_width); 327 } 328 329 /* 330 * Calculate the Supported Adjusted Guest Address Widths of an IOMMU. 331 * Refer to 11.4.2 of the VT-d spec for the encoding of each bit of 332 * the returned SAGAW. 333 */ 334 static unsigned long __iommu_calculate_sagaw(struct intel_iommu *iommu) 335 { 336 unsigned long fl_sagaw, sl_sagaw; 337 338 fl_sagaw = BIT(2) | (cap_fl5lp_support(iommu->cap) ? BIT(3) : 0); 339 sl_sagaw = cap_sagaw(iommu->cap); 340 341 /* Second level only. */ 342 if (!sm_supported(iommu) || !ecap_flts(iommu->ecap)) 343 return sl_sagaw; 344 345 /* First level only. */ 346 if (!ecap_slts(iommu->ecap)) 347 return fl_sagaw; 348 349 return fl_sagaw & sl_sagaw; 350 } 351 352 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw) 353 { 354 unsigned long sagaw; 355 int agaw; 356 357 sagaw = __iommu_calculate_sagaw(iommu); 358 for (agaw = width_to_agaw(max_gaw); agaw >= 0; agaw--) { 359 if (test_bit(agaw, &sagaw)) 360 break; 361 } 362 363 return agaw; 364 } 365 366 /* 367 * Calculate max SAGAW for each iommu. 368 */ 369 int iommu_calculate_max_sagaw(struct intel_iommu *iommu) 370 { 371 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH); 372 } 373 374 /* 375 * calculate agaw for each iommu. 376 * "SAGAW" may be different across iommus, use a default agaw, and 377 * get a supported less agaw for iommus that don't support the default agaw. 378 */ 379 int iommu_calculate_agaw(struct intel_iommu *iommu) 380 { 381 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH); 382 } 383 384 static bool iommu_paging_structure_coherency(struct intel_iommu *iommu) 385 { 386 return sm_supported(iommu) ? 387 ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap); 388 } 389 390 static void domain_update_iommu_coherency(struct dmar_domain *domain) 391 { 392 struct iommu_domain_info *info; 393 struct dmar_drhd_unit *drhd; 394 struct intel_iommu *iommu; 395 bool found = false; 396 unsigned long i; 397 398 domain->iommu_coherency = true; 399 xa_for_each(&domain->iommu_array, i, info) { 400 found = true; 401 if (!iommu_paging_structure_coherency(info->iommu)) { 402 domain->iommu_coherency = false; 403 break; 404 } 405 } 406 if (found) 407 return; 408 409 /* No hardware attached; use lowest common denominator */ 410 rcu_read_lock(); 411 for_each_active_iommu(iommu, drhd) { 412 if (!iommu_paging_structure_coherency(iommu)) { 413 domain->iommu_coherency = false; 414 break; 415 } 416 } 417 rcu_read_unlock(); 418 } 419 420 static int domain_update_iommu_superpage(struct dmar_domain *domain, 421 struct intel_iommu *skip) 422 { 423 struct dmar_drhd_unit *drhd; 424 struct intel_iommu *iommu; 425 int mask = 0x3; 426 427 if (!intel_iommu_superpage) 428 return 0; 429 430 /* set iommu_superpage to the smallest common denominator */ 431 rcu_read_lock(); 432 for_each_active_iommu(iommu, drhd) { 433 if (iommu != skip) { 434 if (domain && domain->use_first_level) { 435 if (!cap_fl1gp_support(iommu->cap)) 436 mask = 0x1; 437 } else { 438 mask &= cap_super_page_val(iommu->cap); 439 } 440 441 if (!mask) 442 break; 443 } 444 } 445 rcu_read_unlock(); 446 447 return fls(mask); 448 } 449 450 static int domain_update_device_node(struct dmar_domain *domain) 451 { 452 struct device_domain_info *info; 453 int nid = NUMA_NO_NODE; 454 unsigned long flags; 455 456 spin_lock_irqsave(&domain->lock, flags); 457 list_for_each_entry(info, &domain->devices, link) { 458 /* 459 * There could possibly be multiple device numa nodes as devices 460 * within the same domain may sit behind different IOMMUs. There 461 * isn't perfect answer in such situation, so we select first 462 * come first served policy. 463 */ 464 nid = dev_to_node(info->dev); 465 if (nid != NUMA_NO_NODE) 466 break; 467 } 468 spin_unlock_irqrestore(&domain->lock, flags); 469 470 return nid; 471 } 472 473 /* Return the super pagesize bitmap if supported. */ 474 static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain) 475 { 476 unsigned long bitmap = 0; 477 478 /* 479 * 1-level super page supports page size of 2MiB, 2-level super page 480 * supports page size of both 2MiB and 1GiB. 481 */ 482 if (domain->iommu_superpage == 1) 483 bitmap |= SZ_2M; 484 else if (domain->iommu_superpage == 2) 485 bitmap |= SZ_2M | SZ_1G; 486 487 return bitmap; 488 } 489 490 /* Some capabilities may be different across iommus */ 491 void domain_update_iommu_cap(struct dmar_domain *domain) 492 { 493 domain_update_iommu_coherency(domain); 494 domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL); 495 496 /* 497 * If RHSA is missing, we should default to the device numa domain 498 * as fall back. 499 */ 500 if (domain->nid == NUMA_NO_NODE) 501 domain->nid = domain_update_device_node(domain); 502 503 /* 504 * First-level translation restricts the input-address to a 505 * canonical address (i.e., address bits 63:N have the same 506 * value as address bit [N-1], where N is 48-bits with 4-level 507 * paging and 57-bits with 5-level paging). Hence, skip bit 508 * [N-1]. 509 */ 510 if (domain->use_first_level) 511 domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1); 512 else 513 domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw); 514 515 domain->domain.pgsize_bitmap |= domain_super_pgsize_bitmap(domain); 516 domain_update_iotlb(domain); 517 } 518 519 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, 520 u8 devfn, int alloc) 521 { 522 struct root_entry *root = &iommu->root_entry[bus]; 523 struct context_entry *context; 524 u64 *entry; 525 526 /* 527 * Except that the caller requested to allocate a new entry, 528 * returning a copied context entry makes no sense. 529 */ 530 if (!alloc && context_copied(iommu, bus, devfn)) 531 return NULL; 532 533 entry = &root->lo; 534 if (sm_supported(iommu)) { 535 if (devfn >= 0x80) { 536 devfn -= 0x80; 537 entry = &root->hi; 538 } 539 devfn *= 2; 540 } 541 if (*entry & 1) 542 context = phys_to_virt(*entry & VTD_PAGE_MASK); 543 else { 544 unsigned long phy_addr; 545 if (!alloc) 546 return NULL; 547 548 context = alloc_pgtable_page(iommu->node, GFP_ATOMIC); 549 if (!context) 550 return NULL; 551 552 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE); 553 phy_addr = virt_to_phys((void *)context); 554 *entry = phy_addr | 1; 555 __iommu_flush_cache(iommu, entry, sizeof(*entry)); 556 } 557 return &context[devfn]; 558 } 559 560 /** 561 * is_downstream_to_pci_bridge - test if a device belongs to the PCI 562 * sub-hierarchy of a candidate PCI-PCI bridge 563 * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy 564 * @bridge: the candidate PCI-PCI bridge 565 * 566 * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false. 567 */ 568 static bool 569 is_downstream_to_pci_bridge(struct device *dev, struct device *bridge) 570 { 571 struct pci_dev *pdev, *pbridge; 572 573 if (!dev_is_pci(dev) || !dev_is_pci(bridge)) 574 return false; 575 576 pdev = to_pci_dev(dev); 577 pbridge = to_pci_dev(bridge); 578 579 if (pbridge->subordinate && 580 pbridge->subordinate->number <= pdev->bus->number && 581 pbridge->subordinate->busn_res.end >= pdev->bus->number) 582 return true; 583 584 return false; 585 } 586 587 static bool quirk_ioat_snb_local_iommu(struct pci_dev *pdev) 588 { 589 struct dmar_drhd_unit *drhd; 590 u32 vtbar; 591 int rc; 592 593 /* We know that this device on this chipset has its own IOMMU. 594 * If we find it under a different IOMMU, then the BIOS is lying 595 * to us. Hope that the IOMMU for this device is actually 596 * disabled, and it needs no translation... 597 */ 598 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar); 599 if (rc) { 600 /* "can't" happen */ 601 dev_info(&pdev->dev, "failed to run vt-d quirk\n"); 602 return false; 603 } 604 vtbar &= 0xffff0000; 605 606 /* we know that the this iommu should be at offset 0xa000 from vtbar */ 607 drhd = dmar_find_matched_drhd_unit(pdev); 608 if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) { 609 pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"); 610 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); 611 return true; 612 } 613 614 return false; 615 } 616 617 static bool iommu_is_dummy(struct intel_iommu *iommu, struct device *dev) 618 { 619 if (!iommu || iommu->drhd->ignored) 620 return true; 621 622 if (dev_is_pci(dev)) { 623 struct pci_dev *pdev = to_pci_dev(dev); 624 625 if (pdev->vendor == PCI_VENDOR_ID_INTEL && 626 pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SNB && 627 quirk_ioat_snb_local_iommu(pdev)) 628 return true; 629 } 630 631 return false; 632 } 633 634 static struct intel_iommu *device_lookup_iommu(struct device *dev, u8 *bus, u8 *devfn) 635 { 636 struct dmar_drhd_unit *drhd = NULL; 637 struct pci_dev *pdev = NULL; 638 struct intel_iommu *iommu; 639 struct device *tmp; 640 u16 segment = 0; 641 int i; 642 643 if (!dev) 644 return NULL; 645 646 if (dev_is_pci(dev)) { 647 struct pci_dev *pf_pdev; 648 649 pdev = pci_real_dma_dev(to_pci_dev(dev)); 650 651 /* VFs aren't listed in scope tables; we need to look up 652 * the PF instead to find the IOMMU. */ 653 pf_pdev = pci_physfn(pdev); 654 dev = &pf_pdev->dev; 655 segment = pci_domain_nr(pdev->bus); 656 } else if (has_acpi_companion(dev)) 657 dev = &ACPI_COMPANION(dev)->dev; 658 659 rcu_read_lock(); 660 for_each_iommu(iommu, drhd) { 661 if (pdev && segment != drhd->segment) 662 continue; 663 664 for_each_active_dev_scope(drhd->devices, 665 drhd->devices_cnt, i, tmp) { 666 if (tmp == dev) { 667 /* For a VF use its original BDF# not that of the PF 668 * which we used for the IOMMU lookup. Strictly speaking 669 * we could do this for all PCI devices; we only need to 670 * get the BDF# from the scope table for ACPI matches. */ 671 if (pdev && pdev->is_virtfn) 672 goto got_pdev; 673 674 if (bus && devfn) { 675 *bus = drhd->devices[i].bus; 676 *devfn = drhd->devices[i].devfn; 677 } 678 goto out; 679 } 680 681 if (is_downstream_to_pci_bridge(dev, tmp)) 682 goto got_pdev; 683 } 684 685 if (pdev && drhd->include_all) { 686 got_pdev: 687 if (bus && devfn) { 688 *bus = pdev->bus->number; 689 *devfn = pdev->devfn; 690 } 691 goto out; 692 } 693 } 694 iommu = NULL; 695 out: 696 if (iommu_is_dummy(iommu, dev)) 697 iommu = NULL; 698 699 rcu_read_unlock(); 700 701 return iommu; 702 } 703 704 static void domain_flush_cache(struct dmar_domain *domain, 705 void *addr, int size) 706 { 707 if (!domain->iommu_coherency) 708 clflush_cache_range(addr, size); 709 } 710 711 static void free_context_table(struct intel_iommu *iommu) 712 { 713 struct context_entry *context; 714 int i; 715 716 if (!iommu->root_entry) 717 return; 718 719 for (i = 0; i < ROOT_ENTRY_NR; i++) { 720 context = iommu_context_addr(iommu, i, 0, 0); 721 if (context) 722 free_pgtable_page(context); 723 724 if (!sm_supported(iommu)) 725 continue; 726 727 context = iommu_context_addr(iommu, i, 0x80, 0); 728 if (context) 729 free_pgtable_page(context); 730 } 731 732 free_pgtable_page(iommu->root_entry); 733 iommu->root_entry = NULL; 734 } 735 736 #ifdef CONFIG_DMAR_DEBUG 737 static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn, 738 u8 bus, u8 devfn, struct dma_pte *parent, int level) 739 { 740 struct dma_pte *pte; 741 int offset; 742 743 while (1) { 744 offset = pfn_level_offset(pfn, level); 745 pte = &parent[offset]; 746 if (!pte || (dma_pte_superpage(pte) || !dma_pte_present(pte))) { 747 pr_info("PTE not present at level %d\n", level); 748 break; 749 } 750 751 pr_info("pte level: %d, pte value: 0x%016llx\n", level, pte->val); 752 753 if (level == 1) 754 break; 755 756 parent = phys_to_virt(dma_pte_addr(pte)); 757 level--; 758 } 759 } 760 761 void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id, 762 unsigned long long addr, u32 pasid) 763 { 764 struct pasid_dir_entry *dir, *pde; 765 struct pasid_entry *entries, *pte; 766 struct context_entry *ctx_entry; 767 struct root_entry *rt_entry; 768 int i, dir_index, index, level; 769 u8 devfn = source_id & 0xff; 770 u8 bus = source_id >> 8; 771 struct dma_pte *pgtable; 772 773 pr_info("Dump %s table entries for IOVA 0x%llx\n", iommu->name, addr); 774 775 /* root entry dump */ 776 rt_entry = &iommu->root_entry[bus]; 777 if (!rt_entry) { 778 pr_info("root table entry is not present\n"); 779 return; 780 } 781 782 if (sm_supported(iommu)) 783 pr_info("scalable mode root entry: hi 0x%016llx, low 0x%016llx\n", 784 rt_entry->hi, rt_entry->lo); 785 else 786 pr_info("root entry: 0x%016llx", rt_entry->lo); 787 788 /* context entry dump */ 789 ctx_entry = iommu_context_addr(iommu, bus, devfn, 0); 790 if (!ctx_entry) { 791 pr_info("context table entry is not present\n"); 792 return; 793 } 794 795 pr_info("context entry: hi 0x%016llx, low 0x%016llx\n", 796 ctx_entry->hi, ctx_entry->lo); 797 798 /* legacy mode does not require PASID entries */ 799 if (!sm_supported(iommu)) { 800 level = agaw_to_level(ctx_entry->hi & 7); 801 pgtable = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK); 802 goto pgtable_walk; 803 } 804 805 /* get the pointer to pasid directory entry */ 806 dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK); 807 if (!dir) { 808 pr_info("pasid directory entry is not present\n"); 809 return; 810 } 811 /* For request-without-pasid, get the pasid from context entry */ 812 if (intel_iommu_sm && pasid == IOMMU_PASID_INVALID) 813 pasid = IOMMU_NO_PASID; 814 815 dir_index = pasid >> PASID_PDE_SHIFT; 816 pde = &dir[dir_index]; 817 pr_info("pasid dir entry: 0x%016llx\n", pde->val); 818 819 /* get the pointer to the pasid table entry */ 820 entries = get_pasid_table_from_pde(pde); 821 if (!entries) { 822 pr_info("pasid table entry is not present\n"); 823 return; 824 } 825 index = pasid & PASID_PTE_MASK; 826 pte = &entries[index]; 827 for (i = 0; i < ARRAY_SIZE(pte->val); i++) 828 pr_info("pasid table entry[%d]: 0x%016llx\n", i, pte->val[i]); 829 830 if (pasid_pte_get_pgtt(pte) == PASID_ENTRY_PGTT_FL_ONLY) { 831 level = pte->val[2] & BIT_ULL(2) ? 5 : 4; 832 pgtable = phys_to_virt(pte->val[2] & VTD_PAGE_MASK); 833 } else { 834 level = agaw_to_level((pte->val[0] >> 2) & 0x7); 835 pgtable = phys_to_virt(pte->val[0] & VTD_PAGE_MASK); 836 } 837 838 pgtable_walk: 839 pgtable_walk(iommu, addr >> VTD_PAGE_SHIFT, bus, devfn, pgtable, level); 840 } 841 #endif 842 843 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, 844 unsigned long pfn, int *target_level, 845 gfp_t gfp) 846 { 847 struct dma_pte *parent, *pte; 848 int level = agaw_to_level(domain->agaw); 849 int offset; 850 851 if (!domain_pfn_supported(domain, pfn)) 852 /* Address beyond IOMMU's addressing capabilities. */ 853 return NULL; 854 855 parent = domain->pgd; 856 857 while (1) { 858 void *tmp_page; 859 860 offset = pfn_level_offset(pfn, level); 861 pte = &parent[offset]; 862 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte))) 863 break; 864 if (level == *target_level) 865 break; 866 867 if (!dma_pte_present(pte)) { 868 uint64_t pteval; 869 870 tmp_page = alloc_pgtable_page(domain->nid, gfp); 871 872 if (!tmp_page) 873 return NULL; 874 875 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); 876 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; 877 if (domain->use_first_level) 878 pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS; 879 880 if (cmpxchg64(&pte->val, 0ULL, pteval)) 881 /* Someone else set it while we were thinking; use theirs. */ 882 free_pgtable_page(tmp_page); 883 else 884 domain_flush_cache(domain, pte, sizeof(*pte)); 885 } 886 if (level == 1) 887 break; 888 889 parent = phys_to_virt(dma_pte_addr(pte)); 890 level--; 891 } 892 893 if (!*target_level) 894 *target_level = level; 895 896 return pte; 897 } 898 899 /* return address's pte at specific level */ 900 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, 901 unsigned long pfn, 902 int level, int *large_page) 903 { 904 struct dma_pte *parent, *pte; 905 int total = agaw_to_level(domain->agaw); 906 int offset; 907 908 parent = domain->pgd; 909 while (level <= total) { 910 offset = pfn_level_offset(pfn, total); 911 pte = &parent[offset]; 912 if (level == total) 913 return pte; 914 915 if (!dma_pte_present(pte)) { 916 *large_page = total; 917 break; 918 } 919 920 if (dma_pte_superpage(pte)) { 921 *large_page = total; 922 return pte; 923 } 924 925 parent = phys_to_virt(dma_pte_addr(pte)); 926 total--; 927 } 928 return NULL; 929 } 930 931 /* clear last level pte, a tlb flush should be followed */ 932 static void dma_pte_clear_range(struct dmar_domain *domain, 933 unsigned long start_pfn, 934 unsigned long last_pfn) 935 { 936 unsigned int large_page; 937 struct dma_pte *first_pte, *pte; 938 939 if (WARN_ON(!domain_pfn_supported(domain, last_pfn)) || 940 WARN_ON(start_pfn > last_pfn)) 941 return; 942 943 /* we don't need lock here; nobody else touches the iova range */ 944 do { 945 large_page = 1; 946 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page); 947 if (!pte) { 948 start_pfn = align_to_level(start_pfn + 1, large_page + 1); 949 continue; 950 } 951 do { 952 dma_clear_pte(pte); 953 start_pfn += lvl_to_nr_pages(large_page); 954 pte++; 955 } while (start_pfn <= last_pfn && !first_pte_in_page(pte)); 956 957 domain_flush_cache(domain, first_pte, 958 (void *)pte - (void *)first_pte); 959 960 } while (start_pfn && start_pfn <= last_pfn); 961 } 962 963 static void dma_pte_free_level(struct dmar_domain *domain, int level, 964 int retain_level, struct dma_pte *pte, 965 unsigned long pfn, unsigned long start_pfn, 966 unsigned long last_pfn) 967 { 968 pfn = max(start_pfn, pfn); 969 pte = &pte[pfn_level_offset(pfn, level)]; 970 971 do { 972 unsigned long level_pfn; 973 struct dma_pte *level_pte; 974 975 if (!dma_pte_present(pte) || dma_pte_superpage(pte)) 976 goto next; 977 978 level_pfn = pfn & level_mask(level); 979 level_pte = phys_to_virt(dma_pte_addr(pte)); 980 981 if (level > 2) { 982 dma_pte_free_level(domain, level - 1, retain_level, 983 level_pte, level_pfn, start_pfn, 984 last_pfn); 985 } 986 987 /* 988 * Free the page table if we're below the level we want to 989 * retain and the range covers the entire table. 990 */ 991 if (level < retain_level && !(start_pfn > level_pfn || 992 last_pfn < level_pfn + level_size(level) - 1)) { 993 dma_clear_pte(pte); 994 domain_flush_cache(domain, pte, sizeof(*pte)); 995 free_pgtable_page(level_pte); 996 } 997 next: 998 pfn += level_size(level); 999 } while (!first_pte_in_page(++pte) && pfn <= last_pfn); 1000 } 1001 1002 /* 1003 * clear last level (leaf) ptes and free page table pages below the 1004 * level we wish to keep intact. 1005 */ 1006 static void dma_pte_free_pagetable(struct dmar_domain *domain, 1007 unsigned long start_pfn, 1008 unsigned long last_pfn, 1009 int retain_level) 1010 { 1011 dma_pte_clear_range(domain, start_pfn, last_pfn); 1012 1013 /* We don't need lock here; nobody else touches the iova range */ 1014 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level, 1015 domain->pgd, 0, start_pfn, last_pfn); 1016 1017 /* free pgd */ 1018 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { 1019 free_pgtable_page(domain->pgd); 1020 domain->pgd = NULL; 1021 } 1022 } 1023 1024 /* When a page at a given level is being unlinked from its parent, we don't 1025 need to *modify* it at all. All we need to do is make a list of all the 1026 pages which can be freed just as soon as we've flushed the IOTLB and we 1027 know the hardware page-walk will no longer touch them. 1028 The 'pte' argument is the *parent* PTE, pointing to the page that is to 1029 be freed. */ 1030 static void dma_pte_list_pagetables(struct dmar_domain *domain, 1031 int level, struct dma_pte *pte, 1032 struct list_head *freelist) 1033 { 1034 struct page *pg; 1035 1036 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT); 1037 list_add_tail(&pg->lru, freelist); 1038 1039 if (level == 1) 1040 return; 1041 1042 pte = page_address(pg); 1043 do { 1044 if (dma_pte_present(pte) && !dma_pte_superpage(pte)) 1045 dma_pte_list_pagetables(domain, level - 1, pte, freelist); 1046 pte++; 1047 } while (!first_pte_in_page(pte)); 1048 } 1049 1050 static void dma_pte_clear_level(struct dmar_domain *domain, int level, 1051 struct dma_pte *pte, unsigned long pfn, 1052 unsigned long start_pfn, unsigned long last_pfn, 1053 struct list_head *freelist) 1054 { 1055 struct dma_pte *first_pte = NULL, *last_pte = NULL; 1056 1057 pfn = max(start_pfn, pfn); 1058 pte = &pte[pfn_level_offset(pfn, level)]; 1059 1060 do { 1061 unsigned long level_pfn = pfn & level_mask(level); 1062 1063 if (!dma_pte_present(pte)) 1064 goto next; 1065 1066 /* If range covers entire pagetable, free it */ 1067 if (start_pfn <= level_pfn && 1068 last_pfn >= level_pfn + level_size(level) - 1) { 1069 /* These suborbinate page tables are going away entirely. Don't 1070 bother to clear them; we're just going to *free* them. */ 1071 if (level > 1 && !dma_pte_superpage(pte)) 1072 dma_pte_list_pagetables(domain, level - 1, pte, freelist); 1073 1074 dma_clear_pte(pte); 1075 if (!first_pte) 1076 first_pte = pte; 1077 last_pte = pte; 1078 } else if (level > 1) { 1079 /* Recurse down into a level that isn't *entirely* obsolete */ 1080 dma_pte_clear_level(domain, level - 1, 1081 phys_to_virt(dma_pte_addr(pte)), 1082 level_pfn, start_pfn, last_pfn, 1083 freelist); 1084 } 1085 next: 1086 pfn = level_pfn + level_size(level); 1087 } while (!first_pte_in_page(++pte) && pfn <= last_pfn); 1088 1089 if (first_pte) 1090 domain_flush_cache(domain, first_pte, 1091 (void *)++last_pte - (void *)first_pte); 1092 } 1093 1094 /* We can't just free the pages because the IOMMU may still be walking 1095 the page tables, and may have cached the intermediate levels. The 1096 pages can only be freed after the IOTLB flush has been done. */ 1097 static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn, 1098 unsigned long last_pfn, struct list_head *freelist) 1099 { 1100 if (WARN_ON(!domain_pfn_supported(domain, last_pfn)) || 1101 WARN_ON(start_pfn > last_pfn)) 1102 return; 1103 1104 /* we don't need lock here; nobody else touches the iova range */ 1105 dma_pte_clear_level(domain, agaw_to_level(domain->agaw), 1106 domain->pgd, 0, start_pfn, last_pfn, freelist); 1107 1108 /* free pgd */ 1109 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { 1110 struct page *pgd_page = virt_to_page(domain->pgd); 1111 list_add_tail(&pgd_page->lru, freelist); 1112 domain->pgd = NULL; 1113 } 1114 } 1115 1116 /* iommu handling */ 1117 static int iommu_alloc_root_entry(struct intel_iommu *iommu) 1118 { 1119 struct root_entry *root; 1120 1121 root = alloc_pgtable_page(iommu->node, GFP_ATOMIC); 1122 if (!root) { 1123 pr_err("Allocating root entry for %s failed\n", 1124 iommu->name); 1125 return -ENOMEM; 1126 } 1127 1128 __iommu_flush_cache(iommu, root, ROOT_SIZE); 1129 iommu->root_entry = root; 1130 1131 return 0; 1132 } 1133 1134 static void iommu_set_root_entry(struct intel_iommu *iommu) 1135 { 1136 u64 addr; 1137 u32 sts; 1138 unsigned long flag; 1139 1140 addr = virt_to_phys(iommu->root_entry); 1141 if (sm_supported(iommu)) 1142 addr |= DMA_RTADDR_SMT; 1143 1144 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1145 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr); 1146 1147 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG); 1148 1149 /* Make sure hardware complete it */ 1150 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 1151 readl, (sts & DMA_GSTS_RTPS), sts); 1152 1153 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1154 1155 /* 1156 * Hardware invalidates all DMA remapping hardware translation 1157 * caches as part of SRTP flow. 1158 */ 1159 if (cap_esrtps(iommu->cap)) 1160 return; 1161 1162 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); 1163 if (sm_supported(iommu)) 1164 qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0); 1165 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); 1166 } 1167 1168 void iommu_flush_write_buffer(struct intel_iommu *iommu) 1169 { 1170 u32 val; 1171 unsigned long flag; 1172 1173 if (!rwbf_quirk && !cap_rwbf(iommu->cap)) 1174 return; 1175 1176 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1177 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG); 1178 1179 /* Make sure hardware complete it */ 1180 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 1181 readl, (!(val & DMA_GSTS_WBFS)), val); 1182 1183 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1184 } 1185 1186 /* return value determine if we need a write buffer flush */ 1187 static void __iommu_flush_context(struct intel_iommu *iommu, 1188 u16 did, u16 source_id, u8 function_mask, 1189 u64 type) 1190 { 1191 u64 val = 0; 1192 unsigned long flag; 1193 1194 switch (type) { 1195 case DMA_CCMD_GLOBAL_INVL: 1196 val = DMA_CCMD_GLOBAL_INVL; 1197 break; 1198 case DMA_CCMD_DOMAIN_INVL: 1199 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did); 1200 break; 1201 case DMA_CCMD_DEVICE_INVL: 1202 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did) 1203 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask); 1204 break; 1205 default: 1206 pr_warn("%s: Unexpected context-cache invalidation type 0x%llx\n", 1207 iommu->name, type); 1208 return; 1209 } 1210 val |= DMA_CCMD_ICC; 1211 1212 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1213 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val); 1214 1215 /* Make sure hardware complete it */ 1216 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG, 1217 dmar_readq, (!(val & DMA_CCMD_ICC)), val); 1218 1219 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1220 } 1221 1222 /* return value determine if we need a write buffer flush */ 1223 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, 1224 u64 addr, unsigned int size_order, u64 type) 1225 { 1226 int tlb_offset = ecap_iotlb_offset(iommu->ecap); 1227 u64 val = 0, val_iva = 0; 1228 unsigned long flag; 1229 1230 switch (type) { 1231 case DMA_TLB_GLOBAL_FLUSH: 1232 /* global flush doesn't need set IVA_REG */ 1233 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT; 1234 break; 1235 case DMA_TLB_DSI_FLUSH: 1236 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); 1237 break; 1238 case DMA_TLB_PSI_FLUSH: 1239 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); 1240 /* IH bit is passed in as part of address */ 1241 val_iva = size_order | addr; 1242 break; 1243 default: 1244 pr_warn("%s: Unexpected iotlb invalidation type 0x%llx\n", 1245 iommu->name, type); 1246 return; 1247 } 1248 1249 if (cap_write_drain(iommu->cap)) 1250 val |= DMA_TLB_WRITE_DRAIN; 1251 1252 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1253 /* Note: Only uses first TLB reg currently */ 1254 if (val_iva) 1255 dmar_writeq(iommu->reg + tlb_offset, val_iva); 1256 dmar_writeq(iommu->reg + tlb_offset + 8, val); 1257 1258 /* Make sure hardware complete it */ 1259 IOMMU_WAIT_OP(iommu, tlb_offset + 8, 1260 dmar_readq, (!(val & DMA_TLB_IVT)), val); 1261 1262 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1263 1264 /* check IOTLB invalidation granularity */ 1265 if (DMA_TLB_IAIG(val) == 0) 1266 pr_err("Flush IOTLB failed\n"); 1267 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type)) 1268 pr_debug("TLB flush request %Lx, actual %Lx\n", 1269 (unsigned long long)DMA_TLB_IIRG(type), 1270 (unsigned long long)DMA_TLB_IAIG(val)); 1271 } 1272 1273 static struct device_domain_info * 1274 domain_lookup_dev_info(struct dmar_domain *domain, 1275 struct intel_iommu *iommu, u8 bus, u8 devfn) 1276 { 1277 struct device_domain_info *info; 1278 unsigned long flags; 1279 1280 spin_lock_irqsave(&domain->lock, flags); 1281 list_for_each_entry(info, &domain->devices, link) { 1282 if (info->iommu == iommu && info->bus == bus && 1283 info->devfn == devfn) { 1284 spin_unlock_irqrestore(&domain->lock, flags); 1285 return info; 1286 } 1287 } 1288 spin_unlock_irqrestore(&domain->lock, flags); 1289 1290 return NULL; 1291 } 1292 1293 void domain_update_iotlb(struct dmar_domain *domain) 1294 { 1295 struct dev_pasid_info *dev_pasid; 1296 struct device_domain_info *info; 1297 bool has_iotlb_device = false; 1298 unsigned long flags; 1299 1300 spin_lock_irqsave(&domain->lock, flags); 1301 list_for_each_entry(info, &domain->devices, link) { 1302 if (info->ats_enabled) { 1303 has_iotlb_device = true; 1304 break; 1305 } 1306 } 1307 1308 list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) { 1309 info = dev_iommu_priv_get(dev_pasid->dev); 1310 if (info->ats_enabled) { 1311 has_iotlb_device = true; 1312 break; 1313 } 1314 } 1315 domain->has_iotlb_device = has_iotlb_device; 1316 spin_unlock_irqrestore(&domain->lock, flags); 1317 } 1318 1319 /* 1320 * The extra devTLB flush quirk impacts those QAT devices with PCI device 1321 * IDs ranging from 0x4940 to 0x4943. It is exempted from risky_device() 1322 * check because it applies only to the built-in QAT devices and it doesn't 1323 * grant additional privileges. 1324 */ 1325 #define BUGGY_QAT_DEVID_MASK 0x4940 1326 static bool dev_needs_extra_dtlb_flush(struct pci_dev *pdev) 1327 { 1328 if (pdev->vendor != PCI_VENDOR_ID_INTEL) 1329 return false; 1330 1331 if ((pdev->device & 0xfffc) != BUGGY_QAT_DEVID_MASK) 1332 return false; 1333 1334 return true; 1335 } 1336 1337 static void iommu_enable_pci_caps(struct device_domain_info *info) 1338 { 1339 struct pci_dev *pdev; 1340 1341 if (!dev_is_pci(info->dev)) 1342 return; 1343 1344 pdev = to_pci_dev(info->dev); 1345 1346 /* The PCIe spec, in its wisdom, declares that the behaviour of 1347 the device if you enable PASID support after ATS support is 1348 undefined. So always enable PASID support on devices which 1349 have it, even if we can't yet know if we're ever going to 1350 use it. */ 1351 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1)) 1352 info->pasid_enabled = 1; 1353 1354 if (info->ats_supported && pci_ats_page_aligned(pdev) && 1355 !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) { 1356 info->ats_enabled = 1; 1357 domain_update_iotlb(info->domain); 1358 } 1359 } 1360 1361 static void iommu_disable_pci_caps(struct device_domain_info *info) 1362 { 1363 struct pci_dev *pdev; 1364 1365 if (!dev_is_pci(info->dev)) 1366 return; 1367 1368 pdev = to_pci_dev(info->dev); 1369 1370 if (info->ats_enabled) { 1371 pci_disable_ats(pdev); 1372 info->ats_enabled = 0; 1373 domain_update_iotlb(info->domain); 1374 } 1375 1376 if (info->pasid_enabled) { 1377 pci_disable_pasid(pdev); 1378 info->pasid_enabled = 0; 1379 } 1380 } 1381 1382 static void __iommu_flush_dev_iotlb(struct device_domain_info *info, 1383 u64 addr, unsigned int mask) 1384 { 1385 u16 sid, qdep; 1386 1387 if (!info || !info->ats_enabled) 1388 return; 1389 1390 sid = info->bus << 8 | info->devfn; 1391 qdep = info->ats_qdep; 1392 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, 1393 qdep, addr, mask); 1394 quirk_extra_dev_tlb_flush(info, addr, mask, IOMMU_NO_PASID, qdep); 1395 } 1396 1397 static void iommu_flush_dev_iotlb(struct dmar_domain *domain, 1398 u64 addr, unsigned mask) 1399 { 1400 struct dev_pasid_info *dev_pasid; 1401 struct device_domain_info *info; 1402 unsigned long flags; 1403 1404 if (!domain->has_iotlb_device) 1405 return; 1406 1407 spin_lock_irqsave(&domain->lock, flags); 1408 list_for_each_entry(info, &domain->devices, link) 1409 __iommu_flush_dev_iotlb(info, addr, mask); 1410 1411 list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) { 1412 info = dev_iommu_priv_get(dev_pasid->dev); 1413 1414 if (!info->ats_enabled) 1415 continue; 1416 1417 qi_flush_dev_iotlb_pasid(info->iommu, 1418 PCI_DEVID(info->bus, info->devfn), 1419 info->pfsid, dev_pasid->pasid, 1420 info->ats_qdep, addr, 1421 mask); 1422 } 1423 spin_unlock_irqrestore(&domain->lock, flags); 1424 } 1425 1426 static void domain_flush_pasid_iotlb(struct intel_iommu *iommu, 1427 struct dmar_domain *domain, u64 addr, 1428 unsigned long npages, bool ih) 1429 { 1430 u16 did = domain_id_iommu(domain, iommu); 1431 struct dev_pasid_info *dev_pasid; 1432 unsigned long flags; 1433 1434 spin_lock_irqsave(&domain->lock, flags); 1435 list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) 1436 qi_flush_piotlb(iommu, did, dev_pasid->pasid, addr, npages, ih); 1437 1438 if (!list_empty(&domain->devices)) 1439 qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr, npages, ih); 1440 spin_unlock_irqrestore(&domain->lock, flags); 1441 } 1442 1443 static void __iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, 1444 unsigned long pfn, unsigned int pages, 1445 int ih) 1446 { 1447 unsigned int aligned_pages = __roundup_pow_of_two(pages); 1448 unsigned long bitmask = aligned_pages - 1; 1449 unsigned int mask = ilog2(aligned_pages); 1450 u64 addr = (u64)pfn << VTD_PAGE_SHIFT; 1451 1452 /* 1453 * PSI masks the low order bits of the base address. If the 1454 * address isn't aligned to the mask, then compute a mask value 1455 * needed to ensure the target range is flushed. 1456 */ 1457 if (unlikely(bitmask & pfn)) { 1458 unsigned long end_pfn = pfn + pages - 1, shared_bits; 1459 1460 /* 1461 * Since end_pfn <= pfn + bitmask, the only way bits 1462 * higher than bitmask can differ in pfn and end_pfn is 1463 * by carrying. This means after masking out bitmask, 1464 * high bits starting with the first set bit in 1465 * shared_bits are all equal in both pfn and end_pfn. 1466 */ 1467 shared_bits = ~(pfn ^ end_pfn) & ~bitmask; 1468 mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG; 1469 } 1470 1471 /* 1472 * Fallback to domain selective flush if no PSI support or 1473 * the size is too big. 1474 */ 1475 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap)) 1476 iommu->flush.flush_iotlb(iommu, did, 0, 0, 1477 DMA_TLB_DSI_FLUSH); 1478 else 1479 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask, 1480 DMA_TLB_PSI_FLUSH); 1481 } 1482 1483 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, 1484 struct dmar_domain *domain, 1485 unsigned long pfn, unsigned int pages, 1486 int ih, int map) 1487 { 1488 unsigned int aligned_pages = __roundup_pow_of_two(pages); 1489 unsigned int mask = ilog2(aligned_pages); 1490 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT; 1491 u16 did = domain_id_iommu(domain, iommu); 1492 1493 if (WARN_ON(!pages)) 1494 return; 1495 1496 if (ih) 1497 ih = 1 << 6; 1498 1499 if (domain->use_first_level) 1500 domain_flush_pasid_iotlb(iommu, domain, addr, pages, ih); 1501 else 1502 __iommu_flush_iotlb_psi(iommu, did, pfn, pages, ih); 1503 1504 /* 1505 * In caching mode, changes of pages from non-present to present require 1506 * flush. However, device IOTLB doesn't need to be flushed in this case. 1507 */ 1508 if (!cap_caching_mode(iommu->cap) || !map) 1509 iommu_flush_dev_iotlb(domain, addr, mask); 1510 } 1511 1512 /* Notification for newly created mappings */ 1513 static void __mapping_notify_one(struct intel_iommu *iommu, struct dmar_domain *domain, 1514 unsigned long pfn, unsigned int pages) 1515 { 1516 /* 1517 * It's a non-present to present mapping. Only flush if caching mode 1518 * and second level. 1519 */ 1520 if (cap_caching_mode(iommu->cap) && !domain->use_first_level) 1521 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1); 1522 else 1523 iommu_flush_write_buffer(iommu); 1524 } 1525 1526 /* 1527 * Flush the relevant caches in nested translation if the domain 1528 * also serves as a parent 1529 */ 1530 static void parent_domain_flush(struct dmar_domain *domain, 1531 unsigned long pfn, 1532 unsigned long pages, int ih) 1533 { 1534 struct dmar_domain *s1_domain; 1535 1536 spin_lock(&domain->s1_lock); 1537 list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) { 1538 struct device_domain_info *device_info; 1539 struct iommu_domain_info *info; 1540 unsigned long flags; 1541 unsigned long i; 1542 1543 xa_for_each(&s1_domain->iommu_array, i, info) 1544 __iommu_flush_iotlb_psi(info->iommu, info->did, 1545 pfn, pages, ih); 1546 1547 if (!s1_domain->has_iotlb_device) 1548 continue; 1549 1550 spin_lock_irqsave(&s1_domain->lock, flags); 1551 list_for_each_entry(device_info, &s1_domain->devices, link) 1552 /* 1553 * Address translation cache in device side caches the 1554 * result of nested translation. There is no easy way 1555 * to identify the exact set of nested translations 1556 * affected by a change in S2. So just flush the entire 1557 * device cache. 1558 */ 1559 __iommu_flush_dev_iotlb(device_info, 0, 1560 MAX_AGAW_PFN_WIDTH); 1561 spin_unlock_irqrestore(&s1_domain->lock, flags); 1562 } 1563 spin_unlock(&domain->s1_lock); 1564 } 1565 1566 static void intel_flush_iotlb_all(struct iommu_domain *domain) 1567 { 1568 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 1569 struct iommu_domain_info *info; 1570 unsigned long idx; 1571 1572 xa_for_each(&dmar_domain->iommu_array, idx, info) { 1573 struct intel_iommu *iommu = info->iommu; 1574 u16 did = domain_id_iommu(dmar_domain, iommu); 1575 1576 if (dmar_domain->use_first_level) 1577 domain_flush_pasid_iotlb(iommu, dmar_domain, 0, -1, 0); 1578 else 1579 iommu->flush.flush_iotlb(iommu, did, 0, 0, 1580 DMA_TLB_DSI_FLUSH); 1581 1582 if (!cap_caching_mode(iommu->cap)) 1583 iommu_flush_dev_iotlb(dmar_domain, 0, MAX_AGAW_PFN_WIDTH); 1584 } 1585 1586 if (dmar_domain->nested_parent) 1587 parent_domain_flush(dmar_domain, 0, -1, 0); 1588 } 1589 1590 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) 1591 { 1592 u32 pmen; 1593 unsigned long flags; 1594 1595 if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap)) 1596 return; 1597 1598 raw_spin_lock_irqsave(&iommu->register_lock, flags); 1599 pmen = readl(iommu->reg + DMAR_PMEN_REG); 1600 pmen &= ~DMA_PMEN_EPM; 1601 writel(pmen, iommu->reg + DMAR_PMEN_REG); 1602 1603 /* wait for the protected region status bit to clear */ 1604 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG, 1605 readl, !(pmen & DMA_PMEN_PRS), pmen); 1606 1607 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 1608 } 1609 1610 static void iommu_enable_translation(struct intel_iommu *iommu) 1611 { 1612 u32 sts; 1613 unsigned long flags; 1614 1615 raw_spin_lock_irqsave(&iommu->register_lock, flags); 1616 iommu->gcmd |= DMA_GCMD_TE; 1617 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); 1618 1619 /* Make sure hardware complete it */ 1620 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 1621 readl, (sts & DMA_GSTS_TES), sts); 1622 1623 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 1624 } 1625 1626 static void iommu_disable_translation(struct intel_iommu *iommu) 1627 { 1628 u32 sts; 1629 unsigned long flag; 1630 1631 if (iommu_skip_te_disable && iommu->drhd->gfx_dedicated && 1632 (cap_read_drain(iommu->cap) || cap_write_drain(iommu->cap))) 1633 return; 1634 1635 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1636 iommu->gcmd &= ~DMA_GCMD_TE; 1637 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); 1638 1639 /* Make sure hardware complete it */ 1640 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 1641 readl, (!(sts & DMA_GSTS_TES)), sts); 1642 1643 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1644 } 1645 1646 static int iommu_init_domains(struct intel_iommu *iommu) 1647 { 1648 u32 ndomains; 1649 1650 ndomains = cap_ndoms(iommu->cap); 1651 pr_debug("%s: Number of Domains supported <%d>\n", 1652 iommu->name, ndomains); 1653 1654 spin_lock_init(&iommu->lock); 1655 1656 iommu->domain_ids = bitmap_zalloc(ndomains, GFP_KERNEL); 1657 if (!iommu->domain_ids) 1658 return -ENOMEM; 1659 1660 /* 1661 * If Caching mode is set, then invalid translations are tagged 1662 * with domain-id 0, hence we need to pre-allocate it. We also 1663 * use domain-id 0 as a marker for non-allocated domain-id, so 1664 * make sure it is not used for a real domain. 1665 */ 1666 set_bit(0, iommu->domain_ids); 1667 1668 /* 1669 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid 1670 * entry for first-level or pass-through translation modes should 1671 * be programmed with a domain id different from those used for 1672 * second-level or nested translation. We reserve a domain id for 1673 * this purpose. 1674 */ 1675 if (sm_supported(iommu)) 1676 set_bit(FLPT_DEFAULT_DID, iommu->domain_ids); 1677 1678 return 0; 1679 } 1680 1681 static void disable_dmar_iommu(struct intel_iommu *iommu) 1682 { 1683 if (!iommu->domain_ids) 1684 return; 1685 1686 /* 1687 * All iommu domains must have been detached from the devices, 1688 * hence there should be no domain IDs in use. 1689 */ 1690 if (WARN_ON(bitmap_weight(iommu->domain_ids, cap_ndoms(iommu->cap)) 1691 > NUM_RESERVED_DID)) 1692 return; 1693 1694 if (iommu->gcmd & DMA_GCMD_TE) 1695 iommu_disable_translation(iommu); 1696 } 1697 1698 static void free_dmar_iommu(struct intel_iommu *iommu) 1699 { 1700 if (iommu->domain_ids) { 1701 bitmap_free(iommu->domain_ids); 1702 iommu->domain_ids = NULL; 1703 } 1704 1705 if (iommu->copied_tables) { 1706 bitmap_free(iommu->copied_tables); 1707 iommu->copied_tables = NULL; 1708 } 1709 1710 /* free context mapping */ 1711 free_context_table(iommu); 1712 1713 #ifdef CONFIG_INTEL_IOMMU_SVM 1714 if (pasid_supported(iommu)) { 1715 if (ecap_prs(iommu->ecap)) 1716 intel_svm_finish_prq(iommu); 1717 } 1718 #endif 1719 } 1720 1721 /* 1722 * Check and return whether first level is used by default for 1723 * DMA translation. 1724 */ 1725 static bool first_level_by_default(unsigned int type) 1726 { 1727 /* Only SL is available in legacy mode */ 1728 if (!scalable_mode_support()) 1729 return false; 1730 1731 /* Only level (either FL or SL) is available, just use it */ 1732 if (intel_cap_flts_sanity() ^ intel_cap_slts_sanity()) 1733 return intel_cap_flts_sanity(); 1734 1735 /* Both levels are available, decide it based on domain type */ 1736 return type != IOMMU_DOMAIN_UNMANAGED; 1737 } 1738 1739 static struct dmar_domain *alloc_domain(unsigned int type) 1740 { 1741 struct dmar_domain *domain; 1742 1743 domain = kzalloc(sizeof(*domain), GFP_KERNEL); 1744 if (!domain) 1745 return NULL; 1746 1747 domain->nid = NUMA_NO_NODE; 1748 if (first_level_by_default(type)) 1749 domain->use_first_level = true; 1750 domain->has_iotlb_device = false; 1751 INIT_LIST_HEAD(&domain->devices); 1752 INIT_LIST_HEAD(&domain->dev_pasids); 1753 spin_lock_init(&domain->lock); 1754 xa_init(&domain->iommu_array); 1755 1756 return domain; 1757 } 1758 1759 int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) 1760 { 1761 struct iommu_domain_info *info, *curr; 1762 unsigned long ndomains; 1763 int num, ret = -ENOSPC; 1764 1765 info = kzalloc(sizeof(*info), GFP_KERNEL); 1766 if (!info) 1767 return -ENOMEM; 1768 1769 spin_lock(&iommu->lock); 1770 curr = xa_load(&domain->iommu_array, iommu->seq_id); 1771 if (curr) { 1772 curr->refcnt++; 1773 spin_unlock(&iommu->lock); 1774 kfree(info); 1775 return 0; 1776 } 1777 1778 ndomains = cap_ndoms(iommu->cap); 1779 num = find_first_zero_bit(iommu->domain_ids, ndomains); 1780 if (num >= ndomains) { 1781 pr_err("%s: No free domain ids\n", iommu->name); 1782 goto err_unlock; 1783 } 1784 1785 set_bit(num, iommu->domain_ids); 1786 info->refcnt = 1; 1787 info->did = num; 1788 info->iommu = iommu; 1789 curr = xa_cmpxchg(&domain->iommu_array, iommu->seq_id, 1790 NULL, info, GFP_ATOMIC); 1791 if (curr) { 1792 ret = xa_err(curr) ? : -EBUSY; 1793 goto err_clear; 1794 } 1795 domain_update_iommu_cap(domain); 1796 1797 spin_unlock(&iommu->lock); 1798 return 0; 1799 1800 err_clear: 1801 clear_bit(info->did, iommu->domain_ids); 1802 err_unlock: 1803 spin_unlock(&iommu->lock); 1804 kfree(info); 1805 return ret; 1806 } 1807 1808 void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) 1809 { 1810 struct iommu_domain_info *info; 1811 1812 spin_lock(&iommu->lock); 1813 info = xa_load(&domain->iommu_array, iommu->seq_id); 1814 if (--info->refcnt == 0) { 1815 clear_bit(info->did, iommu->domain_ids); 1816 xa_erase(&domain->iommu_array, iommu->seq_id); 1817 domain->nid = NUMA_NO_NODE; 1818 domain_update_iommu_cap(domain); 1819 kfree(info); 1820 } 1821 spin_unlock(&iommu->lock); 1822 } 1823 1824 static int guestwidth_to_adjustwidth(int gaw) 1825 { 1826 int agaw; 1827 int r = (gaw - 12) % 9; 1828 1829 if (r == 0) 1830 agaw = gaw; 1831 else 1832 agaw = gaw + 9 - r; 1833 if (agaw > 64) 1834 agaw = 64; 1835 return agaw; 1836 } 1837 1838 static void domain_exit(struct dmar_domain *domain) 1839 { 1840 if (domain->pgd) { 1841 LIST_HEAD(freelist); 1842 1843 domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw), &freelist); 1844 put_pages_list(&freelist); 1845 } 1846 1847 if (WARN_ON(!list_empty(&domain->devices))) 1848 return; 1849 1850 kfree(domain); 1851 } 1852 1853 static int domain_context_mapping_one(struct dmar_domain *domain, 1854 struct intel_iommu *iommu, 1855 u8 bus, u8 devfn) 1856 { 1857 struct device_domain_info *info = 1858 domain_lookup_dev_info(domain, iommu, bus, devfn); 1859 u16 did = domain_id_iommu(domain, iommu); 1860 int translation = CONTEXT_TT_MULTI_LEVEL; 1861 struct dma_pte *pgd = domain->pgd; 1862 struct context_entry *context; 1863 int agaw, ret; 1864 1865 if (hw_pass_through && domain_type_is_si(domain)) 1866 translation = CONTEXT_TT_PASS_THROUGH; 1867 1868 pr_debug("Set context mapping for %02x:%02x.%d\n", 1869 bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 1870 1871 spin_lock(&iommu->lock); 1872 ret = -ENOMEM; 1873 context = iommu_context_addr(iommu, bus, devfn, 1); 1874 if (!context) 1875 goto out_unlock; 1876 1877 ret = 0; 1878 if (context_present(context) && !context_copied(iommu, bus, devfn)) 1879 goto out_unlock; 1880 1881 /* 1882 * For kdump cases, old valid entries may be cached due to the 1883 * in-flight DMA and copied pgtable, but there is no unmapping 1884 * behaviour for them, thus we need an explicit cache flush for 1885 * the newly-mapped device. For kdump, at this point, the device 1886 * is supposed to finish reset at its driver probe stage, so no 1887 * in-flight DMA will exist, and we don't need to worry anymore 1888 * hereafter. 1889 */ 1890 if (context_copied(iommu, bus, devfn)) { 1891 u16 did_old = context_domain_id(context); 1892 1893 if (did_old < cap_ndoms(iommu->cap)) { 1894 iommu->flush.flush_context(iommu, did_old, 1895 (((u16)bus) << 8) | devfn, 1896 DMA_CCMD_MASK_NOBIT, 1897 DMA_CCMD_DEVICE_INVL); 1898 iommu->flush.flush_iotlb(iommu, did_old, 0, 0, 1899 DMA_TLB_DSI_FLUSH); 1900 } 1901 1902 clear_context_copied(iommu, bus, devfn); 1903 } 1904 1905 context_clear_entry(context); 1906 context_set_domain_id(context, did); 1907 1908 if (translation != CONTEXT_TT_PASS_THROUGH) { 1909 /* 1910 * Skip top levels of page tables for iommu which has 1911 * less agaw than default. Unnecessary for PT mode. 1912 */ 1913 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { 1914 ret = -ENOMEM; 1915 pgd = phys_to_virt(dma_pte_addr(pgd)); 1916 if (!dma_pte_present(pgd)) 1917 goto out_unlock; 1918 } 1919 1920 if (info && info->ats_supported) 1921 translation = CONTEXT_TT_DEV_IOTLB; 1922 else 1923 translation = CONTEXT_TT_MULTI_LEVEL; 1924 1925 context_set_address_root(context, virt_to_phys(pgd)); 1926 context_set_address_width(context, agaw); 1927 } else { 1928 /* 1929 * In pass through mode, AW must be programmed to 1930 * indicate the largest AGAW value supported by 1931 * hardware. And ASR is ignored by hardware. 1932 */ 1933 context_set_address_width(context, iommu->msagaw); 1934 } 1935 1936 context_set_translation_type(context, translation); 1937 context_set_fault_enable(context); 1938 context_set_present(context); 1939 if (!ecap_coherent(iommu->ecap)) 1940 clflush_cache_range(context, sizeof(*context)); 1941 1942 /* 1943 * It's a non-present to present mapping. If hardware doesn't cache 1944 * non-present entry we only need to flush the write-buffer. If the 1945 * _does_ cache non-present entries, then it does so in the special 1946 * domain #0, which we have to flush: 1947 */ 1948 if (cap_caching_mode(iommu->cap)) { 1949 iommu->flush.flush_context(iommu, 0, 1950 (((u16)bus) << 8) | devfn, 1951 DMA_CCMD_MASK_NOBIT, 1952 DMA_CCMD_DEVICE_INVL); 1953 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); 1954 } else { 1955 iommu_flush_write_buffer(iommu); 1956 } 1957 1958 ret = 0; 1959 1960 out_unlock: 1961 spin_unlock(&iommu->lock); 1962 1963 return ret; 1964 } 1965 1966 static int domain_context_mapping_cb(struct pci_dev *pdev, 1967 u16 alias, void *opaque) 1968 { 1969 struct device_domain_info *info = dev_iommu_priv_get(&pdev->dev); 1970 struct intel_iommu *iommu = info->iommu; 1971 struct dmar_domain *domain = opaque; 1972 1973 return domain_context_mapping_one(domain, iommu, 1974 PCI_BUS_NUM(alias), alias & 0xff); 1975 } 1976 1977 static int 1978 domain_context_mapping(struct dmar_domain *domain, struct device *dev) 1979 { 1980 struct device_domain_info *info = dev_iommu_priv_get(dev); 1981 struct intel_iommu *iommu = info->iommu; 1982 u8 bus = info->bus, devfn = info->devfn; 1983 1984 if (!dev_is_pci(dev)) 1985 return domain_context_mapping_one(domain, iommu, bus, devfn); 1986 1987 return pci_for_each_dma_alias(to_pci_dev(dev), 1988 domain_context_mapping_cb, domain); 1989 } 1990 1991 /* Returns a number of VTD pages, but aligned to MM page size */ 1992 static unsigned long aligned_nrpages(unsigned long host_addr, size_t size) 1993 { 1994 host_addr &= ~PAGE_MASK; 1995 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; 1996 } 1997 1998 /* Return largest possible superpage level for a given mapping */ 1999 static int hardware_largepage_caps(struct dmar_domain *domain, unsigned long iov_pfn, 2000 unsigned long phy_pfn, unsigned long pages) 2001 { 2002 int support, level = 1; 2003 unsigned long pfnmerge; 2004 2005 support = domain->iommu_superpage; 2006 2007 /* To use a large page, the virtual *and* physical addresses 2008 must be aligned to 2MiB/1GiB/etc. Lower bits set in either 2009 of them will mean we have to use smaller pages. So just 2010 merge them and check both at once. */ 2011 pfnmerge = iov_pfn | phy_pfn; 2012 2013 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) { 2014 pages >>= VTD_STRIDE_SHIFT; 2015 if (!pages) 2016 break; 2017 pfnmerge >>= VTD_STRIDE_SHIFT; 2018 level++; 2019 support--; 2020 } 2021 return level; 2022 } 2023 2024 /* 2025 * Ensure that old small page tables are removed to make room for superpage(s). 2026 * We're going to add new large pages, so make sure we don't remove their parent 2027 * tables. The IOTLB/devTLBs should be flushed if any PDE/PTEs are cleared. 2028 */ 2029 static void switch_to_super_page(struct dmar_domain *domain, 2030 unsigned long start_pfn, 2031 unsigned long end_pfn, int level) 2032 { 2033 unsigned long lvl_pages = lvl_to_nr_pages(level); 2034 struct iommu_domain_info *info; 2035 struct dma_pte *pte = NULL; 2036 unsigned long i; 2037 2038 while (start_pfn <= end_pfn) { 2039 if (!pte) 2040 pte = pfn_to_dma_pte(domain, start_pfn, &level, 2041 GFP_ATOMIC); 2042 2043 if (dma_pte_present(pte)) { 2044 dma_pte_free_pagetable(domain, start_pfn, 2045 start_pfn + lvl_pages - 1, 2046 level + 1); 2047 2048 xa_for_each(&domain->iommu_array, i, info) 2049 iommu_flush_iotlb_psi(info->iommu, domain, 2050 start_pfn, lvl_pages, 2051 0, 0); 2052 if (domain->nested_parent) 2053 parent_domain_flush(domain, start_pfn, 2054 lvl_pages, 0); 2055 } 2056 2057 pte++; 2058 start_pfn += lvl_pages; 2059 if (first_pte_in_page(pte)) 2060 pte = NULL; 2061 } 2062 } 2063 2064 static int 2065 __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, 2066 unsigned long phys_pfn, unsigned long nr_pages, int prot, 2067 gfp_t gfp) 2068 { 2069 struct dma_pte *first_pte = NULL, *pte = NULL; 2070 unsigned int largepage_lvl = 0; 2071 unsigned long lvl_pages = 0; 2072 phys_addr_t pteval; 2073 u64 attr; 2074 2075 if (unlikely(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1))) 2076 return -EINVAL; 2077 2078 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) 2079 return -EINVAL; 2080 2081 if (!(prot & DMA_PTE_WRITE) && domain->nested_parent) { 2082 pr_err_ratelimited("Read-only mapping is disallowed on the domain which serves as the parent in a nested configuration, due to HW errata (ERRATA_772415_SPR17)\n"); 2083 return -EINVAL; 2084 } 2085 2086 attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP); 2087 attr |= DMA_FL_PTE_PRESENT; 2088 if (domain->use_first_level) { 2089 attr |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS; 2090 if (prot & DMA_PTE_WRITE) 2091 attr |= DMA_FL_PTE_DIRTY; 2092 } 2093 2094 domain->has_mappings = true; 2095 2096 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr; 2097 2098 while (nr_pages > 0) { 2099 uint64_t tmp; 2100 2101 if (!pte) { 2102 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, 2103 phys_pfn, nr_pages); 2104 2105 pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl, 2106 gfp); 2107 if (!pte) 2108 return -ENOMEM; 2109 first_pte = pte; 2110 2111 lvl_pages = lvl_to_nr_pages(largepage_lvl); 2112 2113 /* It is large page*/ 2114 if (largepage_lvl > 1) { 2115 unsigned long end_pfn; 2116 unsigned long pages_to_remove; 2117 2118 pteval |= DMA_PTE_LARGE_PAGE; 2119 pages_to_remove = min_t(unsigned long, nr_pages, 2120 nr_pte_to_next_page(pte) * lvl_pages); 2121 end_pfn = iov_pfn + pages_to_remove - 1; 2122 switch_to_super_page(domain, iov_pfn, end_pfn, largepage_lvl); 2123 } else { 2124 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE; 2125 } 2126 2127 } 2128 /* We don't need lock here, nobody else 2129 * touches the iova range 2130 */ 2131 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval); 2132 if (tmp) { 2133 static int dumps = 5; 2134 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n", 2135 iov_pfn, tmp, (unsigned long long)pteval); 2136 if (dumps) { 2137 dumps--; 2138 debug_dma_dump_mappings(NULL); 2139 } 2140 WARN_ON(1); 2141 } 2142 2143 nr_pages -= lvl_pages; 2144 iov_pfn += lvl_pages; 2145 phys_pfn += lvl_pages; 2146 pteval += lvl_pages * VTD_PAGE_SIZE; 2147 2148 /* If the next PTE would be the first in a new page, then we 2149 * need to flush the cache on the entries we've just written. 2150 * And then we'll need to recalculate 'pte', so clear it and 2151 * let it get set again in the if (!pte) block above. 2152 * 2153 * If we're done (!nr_pages) we need to flush the cache too. 2154 * 2155 * Also if we've been setting superpages, we may need to 2156 * recalculate 'pte' and switch back to smaller pages for the 2157 * end of the mapping, if the trailing size is not enough to 2158 * use another superpage (i.e. nr_pages < lvl_pages). 2159 */ 2160 pte++; 2161 if (!nr_pages || first_pte_in_page(pte) || 2162 (largepage_lvl > 1 && nr_pages < lvl_pages)) { 2163 domain_flush_cache(domain, first_pte, 2164 (void *)pte - (void *)first_pte); 2165 pte = NULL; 2166 } 2167 } 2168 2169 return 0; 2170 } 2171 2172 static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 devfn) 2173 { 2174 struct intel_iommu *iommu = info->iommu; 2175 struct context_entry *context; 2176 u16 did_old; 2177 2178 spin_lock(&iommu->lock); 2179 context = iommu_context_addr(iommu, bus, devfn, 0); 2180 if (!context) { 2181 spin_unlock(&iommu->lock); 2182 return; 2183 } 2184 2185 did_old = context_domain_id(context); 2186 2187 context_clear_entry(context); 2188 __iommu_flush_cache(iommu, context, sizeof(*context)); 2189 spin_unlock(&iommu->lock); 2190 iommu->flush.flush_context(iommu, 2191 did_old, 2192 (((u16)bus) << 8) | devfn, 2193 DMA_CCMD_MASK_NOBIT, 2194 DMA_CCMD_DEVICE_INVL); 2195 2196 iommu->flush.flush_iotlb(iommu, 2197 did_old, 2198 0, 2199 0, 2200 DMA_TLB_DSI_FLUSH); 2201 2202 __iommu_flush_dev_iotlb(info, 0, MAX_AGAW_PFN_WIDTH); 2203 } 2204 2205 static int domain_setup_first_level(struct intel_iommu *iommu, 2206 struct dmar_domain *domain, 2207 struct device *dev, 2208 u32 pasid) 2209 { 2210 struct dma_pte *pgd = domain->pgd; 2211 int agaw, level; 2212 int flags = 0; 2213 2214 /* 2215 * Skip top levels of page tables for iommu which has 2216 * less agaw than default. Unnecessary for PT mode. 2217 */ 2218 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { 2219 pgd = phys_to_virt(dma_pte_addr(pgd)); 2220 if (!dma_pte_present(pgd)) 2221 return -ENOMEM; 2222 } 2223 2224 level = agaw_to_level(agaw); 2225 if (level != 4 && level != 5) 2226 return -EINVAL; 2227 2228 if (level == 5) 2229 flags |= PASID_FLAG_FL5LP; 2230 2231 if (domain->force_snooping) 2232 flags |= PASID_FLAG_PAGE_SNOOP; 2233 2234 return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid, 2235 domain_id_iommu(domain, iommu), 2236 flags); 2237 } 2238 2239 static bool dev_is_real_dma_subdevice(struct device *dev) 2240 { 2241 return dev && dev_is_pci(dev) && 2242 pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev); 2243 } 2244 2245 static int iommu_domain_identity_map(struct dmar_domain *domain, 2246 unsigned long first_vpfn, 2247 unsigned long last_vpfn) 2248 { 2249 /* 2250 * RMRR range might have overlap with physical memory range, 2251 * clear it first 2252 */ 2253 dma_pte_clear_range(domain, first_vpfn, last_vpfn); 2254 2255 return __domain_mapping(domain, first_vpfn, 2256 first_vpfn, last_vpfn - first_vpfn + 1, 2257 DMA_PTE_READ|DMA_PTE_WRITE, GFP_KERNEL); 2258 } 2259 2260 static int md_domain_init(struct dmar_domain *domain, int guest_width); 2261 2262 static int __init si_domain_init(int hw) 2263 { 2264 struct dmar_rmrr_unit *rmrr; 2265 struct device *dev; 2266 int i, nid, ret; 2267 2268 si_domain = alloc_domain(IOMMU_DOMAIN_IDENTITY); 2269 if (!si_domain) 2270 return -EFAULT; 2271 2272 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { 2273 domain_exit(si_domain); 2274 si_domain = NULL; 2275 return -EFAULT; 2276 } 2277 2278 if (hw) 2279 return 0; 2280 2281 for_each_online_node(nid) { 2282 unsigned long start_pfn, end_pfn; 2283 int i; 2284 2285 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { 2286 ret = iommu_domain_identity_map(si_domain, 2287 mm_to_dma_pfn_start(start_pfn), 2288 mm_to_dma_pfn_end(end_pfn)); 2289 if (ret) 2290 return ret; 2291 } 2292 } 2293 2294 /* 2295 * Identity map the RMRRs so that devices with RMRRs could also use 2296 * the si_domain. 2297 */ 2298 for_each_rmrr_units(rmrr) { 2299 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, 2300 i, dev) { 2301 unsigned long long start = rmrr->base_address; 2302 unsigned long long end = rmrr->end_address; 2303 2304 if (WARN_ON(end < start || 2305 end >> agaw_to_width(si_domain->agaw))) 2306 continue; 2307 2308 ret = iommu_domain_identity_map(si_domain, 2309 mm_to_dma_pfn_start(start >> PAGE_SHIFT), 2310 mm_to_dma_pfn_end(end >> PAGE_SHIFT)); 2311 if (ret) 2312 return ret; 2313 } 2314 } 2315 2316 return 0; 2317 } 2318 2319 static int dmar_domain_attach_device(struct dmar_domain *domain, 2320 struct device *dev) 2321 { 2322 struct device_domain_info *info = dev_iommu_priv_get(dev); 2323 struct intel_iommu *iommu = info->iommu; 2324 unsigned long flags; 2325 int ret; 2326 2327 ret = domain_attach_iommu(domain, iommu); 2328 if (ret) 2329 return ret; 2330 info->domain = domain; 2331 spin_lock_irqsave(&domain->lock, flags); 2332 list_add(&info->link, &domain->devices); 2333 spin_unlock_irqrestore(&domain->lock, flags); 2334 2335 if (dev_is_real_dma_subdevice(dev)) 2336 return 0; 2337 2338 if (!sm_supported(iommu)) 2339 ret = domain_context_mapping(domain, dev); 2340 else if (hw_pass_through && domain_type_is_si(domain)) 2341 ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID); 2342 else if (domain->use_first_level) 2343 ret = domain_setup_first_level(iommu, domain, dev, IOMMU_NO_PASID); 2344 else 2345 ret = intel_pasid_setup_second_level(iommu, domain, dev, IOMMU_NO_PASID); 2346 2347 if (ret) { 2348 device_block_translation(dev); 2349 return ret; 2350 } 2351 2352 if (sm_supported(info->iommu) || !domain_type_is_si(info->domain)) 2353 iommu_enable_pci_caps(info); 2354 2355 return 0; 2356 } 2357 2358 /** 2359 * device_rmrr_is_relaxable - Test whether the RMRR of this device 2360 * is relaxable (ie. is allowed to be not enforced under some conditions) 2361 * @dev: device handle 2362 * 2363 * We assume that PCI USB devices with RMRRs have them largely 2364 * for historical reasons and that the RMRR space is not actively used post 2365 * boot. This exclusion may change if vendors begin to abuse it. 2366 * 2367 * The same exception is made for graphics devices, with the requirement that 2368 * any use of the RMRR regions will be torn down before assigning the device 2369 * to a guest. 2370 * 2371 * Return: true if the RMRR is relaxable, false otherwise 2372 */ 2373 static bool device_rmrr_is_relaxable(struct device *dev) 2374 { 2375 struct pci_dev *pdev; 2376 2377 if (!dev_is_pci(dev)) 2378 return false; 2379 2380 pdev = to_pci_dev(dev); 2381 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev)) 2382 return true; 2383 else 2384 return false; 2385 } 2386 2387 /* 2388 * Return the required default domain type for a specific device. 2389 * 2390 * @dev: the device in query 2391 * @startup: true if this is during early boot 2392 * 2393 * Returns: 2394 * - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain 2395 * - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain 2396 * - 0: both identity and dynamic domains work for this device 2397 */ 2398 static int device_def_domain_type(struct device *dev) 2399 { 2400 if (dev_is_pci(dev)) { 2401 struct pci_dev *pdev = to_pci_dev(dev); 2402 2403 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) 2404 return IOMMU_DOMAIN_IDENTITY; 2405 2406 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev)) 2407 return IOMMU_DOMAIN_IDENTITY; 2408 } 2409 2410 return 0; 2411 } 2412 2413 static void intel_iommu_init_qi(struct intel_iommu *iommu) 2414 { 2415 /* 2416 * Start from the sane iommu hardware state. 2417 * If the queued invalidation is already initialized by us 2418 * (for example, while enabling interrupt-remapping) then 2419 * we got the things already rolling from a sane state. 2420 */ 2421 if (!iommu->qi) { 2422 /* 2423 * Clear any previous faults. 2424 */ 2425 dmar_fault(-1, iommu); 2426 /* 2427 * Disable queued invalidation if supported and already enabled 2428 * before OS handover. 2429 */ 2430 dmar_disable_qi(iommu); 2431 } 2432 2433 if (dmar_enable_qi(iommu)) { 2434 /* 2435 * Queued Invalidate not enabled, use Register Based Invalidate 2436 */ 2437 iommu->flush.flush_context = __iommu_flush_context; 2438 iommu->flush.flush_iotlb = __iommu_flush_iotlb; 2439 pr_info("%s: Using Register based invalidation\n", 2440 iommu->name); 2441 } else { 2442 iommu->flush.flush_context = qi_flush_context; 2443 iommu->flush.flush_iotlb = qi_flush_iotlb; 2444 pr_info("%s: Using Queued invalidation\n", iommu->name); 2445 } 2446 } 2447 2448 static int copy_context_table(struct intel_iommu *iommu, 2449 struct root_entry *old_re, 2450 struct context_entry **tbl, 2451 int bus, bool ext) 2452 { 2453 int tbl_idx, pos = 0, idx, devfn, ret = 0, did; 2454 struct context_entry *new_ce = NULL, ce; 2455 struct context_entry *old_ce = NULL; 2456 struct root_entry re; 2457 phys_addr_t old_ce_phys; 2458 2459 tbl_idx = ext ? bus * 2 : bus; 2460 memcpy(&re, old_re, sizeof(re)); 2461 2462 for (devfn = 0; devfn < 256; devfn++) { 2463 /* First calculate the correct index */ 2464 idx = (ext ? devfn * 2 : devfn) % 256; 2465 2466 if (idx == 0) { 2467 /* First save what we may have and clean up */ 2468 if (new_ce) { 2469 tbl[tbl_idx] = new_ce; 2470 __iommu_flush_cache(iommu, new_ce, 2471 VTD_PAGE_SIZE); 2472 pos = 1; 2473 } 2474 2475 if (old_ce) 2476 memunmap(old_ce); 2477 2478 ret = 0; 2479 if (devfn < 0x80) 2480 old_ce_phys = root_entry_lctp(&re); 2481 else 2482 old_ce_phys = root_entry_uctp(&re); 2483 2484 if (!old_ce_phys) { 2485 if (ext && devfn == 0) { 2486 /* No LCTP, try UCTP */ 2487 devfn = 0x7f; 2488 continue; 2489 } else { 2490 goto out; 2491 } 2492 } 2493 2494 ret = -ENOMEM; 2495 old_ce = memremap(old_ce_phys, PAGE_SIZE, 2496 MEMREMAP_WB); 2497 if (!old_ce) 2498 goto out; 2499 2500 new_ce = alloc_pgtable_page(iommu->node, GFP_KERNEL); 2501 if (!new_ce) 2502 goto out_unmap; 2503 2504 ret = 0; 2505 } 2506 2507 /* Now copy the context entry */ 2508 memcpy(&ce, old_ce + idx, sizeof(ce)); 2509 2510 if (!context_present(&ce)) 2511 continue; 2512 2513 did = context_domain_id(&ce); 2514 if (did >= 0 && did < cap_ndoms(iommu->cap)) 2515 set_bit(did, iommu->domain_ids); 2516 2517 set_context_copied(iommu, bus, devfn); 2518 new_ce[idx] = ce; 2519 } 2520 2521 tbl[tbl_idx + pos] = new_ce; 2522 2523 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE); 2524 2525 out_unmap: 2526 memunmap(old_ce); 2527 2528 out: 2529 return ret; 2530 } 2531 2532 static int copy_translation_tables(struct intel_iommu *iommu) 2533 { 2534 struct context_entry **ctxt_tbls; 2535 struct root_entry *old_rt; 2536 phys_addr_t old_rt_phys; 2537 int ctxt_table_entries; 2538 u64 rtaddr_reg; 2539 int bus, ret; 2540 bool new_ext, ext; 2541 2542 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG); 2543 ext = !!(rtaddr_reg & DMA_RTADDR_SMT); 2544 new_ext = !!sm_supported(iommu); 2545 2546 /* 2547 * The RTT bit can only be changed when translation is disabled, 2548 * but disabling translation means to open a window for data 2549 * corruption. So bail out and don't copy anything if we would 2550 * have to change the bit. 2551 */ 2552 if (new_ext != ext) 2553 return -EINVAL; 2554 2555 iommu->copied_tables = bitmap_zalloc(BIT_ULL(16), GFP_KERNEL); 2556 if (!iommu->copied_tables) 2557 return -ENOMEM; 2558 2559 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK; 2560 if (!old_rt_phys) 2561 return -EINVAL; 2562 2563 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB); 2564 if (!old_rt) 2565 return -ENOMEM; 2566 2567 /* This is too big for the stack - allocate it from slab */ 2568 ctxt_table_entries = ext ? 512 : 256; 2569 ret = -ENOMEM; 2570 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL); 2571 if (!ctxt_tbls) 2572 goto out_unmap; 2573 2574 for (bus = 0; bus < 256; bus++) { 2575 ret = copy_context_table(iommu, &old_rt[bus], 2576 ctxt_tbls, bus, ext); 2577 if (ret) { 2578 pr_err("%s: Failed to copy context table for bus %d\n", 2579 iommu->name, bus); 2580 continue; 2581 } 2582 } 2583 2584 spin_lock(&iommu->lock); 2585 2586 /* Context tables are copied, now write them to the root_entry table */ 2587 for (bus = 0; bus < 256; bus++) { 2588 int idx = ext ? bus * 2 : bus; 2589 u64 val; 2590 2591 if (ctxt_tbls[idx]) { 2592 val = virt_to_phys(ctxt_tbls[idx]) | 1; 2593 iommu->root_entry[bus].lo = val; 2594 } 2595 2596 if (!ext || !ctxt_tbls[idx + 1]) 2597 continue; 2598 2599 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1; 2600 iommu->root_entry[bus].hi = val; 2601 } 2602 2603 spin_unlock(&iommu->lock); 2604 2605 kfree(ctxt_tbls); 2606 2607 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE); 2608 2609 ret = 0; 2610 2611 out_unmap: 2612 memunmap(old_rt); 2613 2614 return ret; 2615 } 2616 2617 static int __init init_dmars(void) 2618 { 2619 struct dmar_drhd_unit *drhd; 2620 struct intel_iommu *iommu; 2621 int ret; 2622 2623 ret = intel_cap_audit(CAP_AUDIT_STATIC_DMAR, NULL); 2624 if (ret) 2625 goto free_iommu; 2626 2627 for_each_iommu(iommu, drhd) { 2628 if (drhd->ignored) { 2629 iommu_disable_translation(iommu); 2630 continue; 2631 } 2632 2633 /* 2634 * Find the max pasid size of all IOMMU's in the system. 2635 * We need to ensure the system pasid table is no bigger 2636 * than the smallest supported. 2637 */ 2638 if (pasid_supported(iommu)) { 2639 u32 temp = 2 << ecap_pss(iommu->ecap); 2640 2641 intel_pasid_max_id = min_t(u32, temp, 2642 intel_pasid_max_id); 2643 } 2644 2645 intel_iommu_init_qi(iommu); 2646 2647 ret = iommu_init_domains(iommu); 2648 if (ret) 2649 goto free_iommu; 2650 2651 init_translation_status(iommu); 2652 2653 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) { 2654 iommu_disable_translation(iommu); 2655 clear_translation_pre_enabled(iommu); 2656 pr_warn("Translation was enabled for %s but we are not in kdump mode\n", 2657 iommu->name); 2658 } 2659 2660 /* 2661 * TBD: 2662 * we could share the same root & context tables 2663 * among all IOMMU's. Need to Split it later. 2664 */ 2665 ret = iommu_alloc_root_entry(iommu); 2666 if (ret) 2667 goto free_iommu; 2668 2669 if (translation_pre_enabled(iommu)) { 2670 pr_info("Translation already enabled - trying to copy translation structures\n"); 2671 2672 ret = copy_translation_tables(iommu); 2673 if (ret) { 2674 /* 2675 * We found the IOMMU with translation 2676 * enabled - but failed to copy over the 2677 * old root-entry table. Try to proceed 2678 * by disabling translation now and 2679 * allocating a clean root-entry table. 2680 * This might cause DMAR faults, but 2681 * probably the dump will still succeed. 2682 */ 2683 pr_err("Failed to copy translation tables from previous kernel for %s\n", 2684 iommu->name); 2685 iommu_disable_translation(iommu); 2686 clear_translation_pre_enabled(iommu); 2687 } else { 2688 pr_info("Copied translation tables from previous kernel for %s\n", 2689 iommu->name); 2690 } 2691 } 2692 2693 if (!ecap_pass_through(iommu->ecap)) 2694 hw_pass_through = 0; 2695 intel_svm_check(iommu); 2696 } 2697 2698 /* 2699 * Now that qi is enabled on all iommus, set the root entry and flush 2700 * caches. This is required on some Intel X58 chipsets, otherwise the 2701 * flush_context function will loop forever and the boot hangs. 2702 */ 2703 for_each_active_iommu(iommu, drhd) { 2704 iommu_flush_write_buffer(iommu); 2705 iommu_set_root_entry(iommu); 2706 } 2707 2708 if (!dmar_map_gfx) 2709 iommu_identity_mapping |= IDENTMAP_GFX; 2710 2711 check_tylersburg_isoch(); 2712 2713 ret = si_domain_init(hw_pass_through); 2714 if (ret) 2715 goto free_iommu; 2716 2717 /* 2718 * for each drhd 2719 * enable fault log 2720 * global invalidate context cache 2721 * global invalidate iotlb 2722 * enable translation 2723 */ 2724 for_each_iommu(iommu, drhd) { 2725 if (drhd->ignored) { 2726 /* 2727 * we always have to disable PMRs or DMA may fail on 2728 * this device 2729 */ 2730 if (force_on) 2731 iommu_disable_protect_mem_regions(iommu); 2732 continue; 2733 } 2734 2735 iommu_flush_write_buffer(iommu); 2736 2737 #ifdef CONFIG_INTEL_IOMMU_SVM 2738 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) { 2739 /* 2740 * Call dmar_alloc_hwirq() with dmar_global_lock held, 2741 * could cause possible lock race condition. 2742 */ 2743 up_write(&dmar_global_lock); 2744 ret = intel_svm_enable_prq(iommu); 2745 down_write(&dmar_global_lock); 2746 if (ret) 2747 goto free_iommu; 2748 } 2749 #endif 2750 ret = dmar_set_interrupt(iommu); 2751 if (ret) 2752 goto free_iommu; 2753 } 2754 2755 return 0; 2756 2757 free_iommu: 2758 for_each_active_iommu(iommu, drhd) { 2759 disable_dmar_iommu(iommu); 2760 free_dmar_iommu(iommu); 2761 } 2762 if (si_domain) { 2763 domain_exit(si_domain); 2764 si_domain = NULL; 2765 } 2766 2767 return ret; 2768 } 2769 2770 static void __init init_no_remapping_devices(void) 2771 { 2772 struct dmar_drhd_unit *drhd; 2773 struct device *dev; 2774 int i; 2775 2776 for_each_drhd_unit(drhd) { 2777 if (!drhd->include_all) { 2778 for_each_active_dev_scope(drhd->devices, 2779 drhd->devices_cnt, i, dev) 2780 break; 2781 /* ignore DMAR unit if no devices exist */ 2782 if (i == drhd->devices_cnt) 2783 drhd->ignored = 1; 2784 } 2785 } 2786 2787 for_each_active_drhd_unit(drhd) { 2788 if (drhd->include_all) 2789 continue; 2790 2791 for_each_active_dev_scope(drhd->devices, 2792 drhd->devices_cnt, i, dev) 2793 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev))) 2794 break; 2795 if (i < drhd->devices_cnt) 2796 continue; 2797 2798 /* This IOMMU has *only* gfx devices. Either bypass it or 2799 set the gfx_mapped flag, as appropriate */ 2800 drhd->gfx_dedicated = 1; 2801 if (!dmar_map_gfx) 2802 drhd->ignored = 1; 2803 } 2804 } 2805 2806 #ifdef CONFIG_SUSPEND 2807 static int init_iommu_hw(void) 2808 { 2809 struct dmar_drhd_unit *drhd; 2810 struct intel_iommu *iommu = NULL; 2811 int ret; 2812 2813 for_each_active_iommu(iommu, drhd) { 2814 if (iommu->qi) { 2815 ret = dmar_reenable_qi(iommu); 2816 if (ret) 2817 return ret; 2818 } 2819 } 2820 2821 for_each_iommu(iommu, drhd) { 2822 if (drhd->ignored) { 2823 /* 2824 * we always have to disable PMRs or DMA may fail on 2825 * this device 2826 */ 2827 if (force_on) 2828 iommu_disable_protect_mem_regions(iommu); 2829 continue; 2830 } 2831 2832 iommu_flush_write_buffer(iommu); 2833 iommu_set_root_entry(iommu); 2834 iommu_enable_translation(iommu); 2835 iommu_disable_protect_mem_regions(iommu); 2836 } 2837 2838 return 0; 2839 } 2840 2841 static void iommu_flush_all(void) 2842 { 2843 struct dmar_drhd_unit *drhd; 2844 struct intel_iommu *iommu; 2845 2846 for_each_active_iommu(iommu, drhd) { 2847 iommu->flush.flush_context(iommu, 0, 0, 0, 2848 DMA_CCMD_GLOBAL_INVL); 2849 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 2850 DMA_TLB_GLOBAL_FLUSH); 2851 } 2852 } 2853 2854 static int iommu_suspend(void) 2855 { 2856 struct dmar_drhd_unit *drhd; 2857 struct intel_iommu *iommu = NULL; 2858 unsigned long flag; 2859 2860 iommu_flush_all(); 2861 2862 for_each_active_iommu(iommu, drhd) { 2863 iommu_disable_translation(iommu); 2864 2865 raw_spin_lock_irqsave(&iommu->register_lock, flag); 2866 2867 iommu->iommu_state[SR_DMAR_FECTL_REG] = 2868 readl(iommu->reg + DMAR_FECTL_REG); 2869 iommu->iommu_state[SR_DMAR_FEDATA_REG] = 2870 readl(iommu->reg + DMAR_FEDATA_REG); 2871 iommu->iommu_state[SR_DMAR_FEADDR_REG] = 2872 readl(iommu->reg + DMAR_FEADDR_REG); 2873 iommu->iommu_state[SR_DMAR_FEUADDR_REG] = 2874 readl(iommu->reg + DMAR_FEUADDR_REG); 2875 2876 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 2877 } 2878 return 0; 2879 } 2880 2881 static void iommu_resume(void) 2882 { 2883 struct dmar_drhd_unit *drhd; 2884 struct intel_iommu *iommu = NULL; 2885 unsigned long flag; 2886 2887 if (init_iommu_hw()) { 2888 if (force_on) 2889 panic("tboot: IOMMU setup failed, DMAR can not resume!\n"); 2890 else 2891 WARN(1, "IOMMU setup failed, DMAR can not resume!\n"); 2892 return; 2893 } 2894 2895 for_each_active_iommu(iommu, drhd) { 2896 2897 raw_spin_lock_irqsave(&iommu->register_lock, flag); 2898 2899 writel(iommu->iommu_state[SR_DMAR_FECTL_REG], 2900 iommu->reg + DMAR_FECTL_REG); 2901 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG], 2902 iommu->reg + DMAR_FEDATA_REG); 2903 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG], 2904 iommu->reg + DMAR_FEADDR_REG); 2905 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG], 2906 iommu->reg + DMAR_FEUADDR_REG); 2907 2908 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 2909 } 2910 } 2911 2912 static struct syscore_ops iommu_syscore_ops = { 2913 .resume = iommu_resume, 2914 .suspend = iommu_suspend, 2915 }; 2916 2917 static void __init init_iommu_pm_ops(void) 2918 { 2919 register_syscore_ops(&iommu_syscore_ops); 2920 } 2921 2922 #else 2923 static inline void init_iommu_pm_ops(void) {} 2924 #endif /* CONFIG_PM */ 2925 2926 static int __init rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr) 2927 { 2928 if (!IS_ALIGNED(rmrr->base_address, PAGE_SIZE) || 2929 !IS_ALIGNED(rmrr->end_address + 1, PAGE_SIZE) || 2930 rmrr->end_address <= rmrr->base_address || 2931 arch_rmrr_sanity_check(rmrr)) 2932 return -EINVAL; 2933 2934 return 0; 2935 } 2936 2937 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg) 2938 { 2939 struct acpi_dmar_reserved_memory *rmrr; 2940 struct dmar_rmrr_unit *rmrru; 2941 2942 rmrr = (struct acpi_dmar_reserved_memory *)header; 2943 if (rmrr_sanity_check(rmrr)) { 2944 pr_warn(FW_BUG 2945 "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n" 2946 "BIOS vendor: %s; Ver: %s; Product Version: %s\n", 2947 rmrr->base_address, rmrr->end_address, 2948 dmi_get_system_info(DMI_BIOS_VENDOR), 2949 dmi_get_system_info(DMI_BIOS_VERSION), 2950 dmi_get_system_info(DMI_PRODUCT_VERSION)); 2951 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); 2952 } 2953 2954 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL); 2955 if (!rmrru) 2956 goto out; 2957 2958 rmrru->hdr = header; 2959 2960 rmrru->base_address = rmrr->base_address; 2961 rmrru->end_address = rmrr->end_address; 2962 2963 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1), 2964 ((void *)rmrr) + rmrr->header.length, 2965 &rmrru->devices_cnt); 2966 if (rmrru->devices_cnt && rmrru->devices == NULL) 2967 goto free_rmrru; 2968 2969 list_add(&rmrru->list, &dmar_rmrr_units); 2970 2971 return 0; 2972 free_rmrru: 2973 kfree(rmrru); 2974 out: 2975 return -ENOMEM; 2976 } 2977 2978 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr) 2979 { 2980 struct dmar_atsr_unit *atsru; 2981 struct acpi_dmar_atsr *tmp; 2982 2983 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list, 2984 dmar_rcu_check()) { 2985 tmp = (struct acpi_dmar_atsr *)atsru->hdr; 2986 if (atsr->segment != tmp->segment) 2987 continue; 2988 if (atsr->header.length != tmp->header.length) 2989 continue; 2990 if (memcmp(atsr, tmp, atsr->header.length) == 0) 2991 return atsru; 2992 } 2993 2994 return NULL; 2995 } 2996 2997 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg) 2998 { 2999 struct acpi_dmar_atsr *atsr; 3000 struct dmar_atsr_unit *atsru; 3001 3002 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled) 3003 return 0; 3004 3005 atsr = container_of(hdr, struct acpi_dmar_atsr, header); 3006 atsru = dmar_find_atsr(atsr); 3007 if (atsru) 3008 return 0; 3009 3010 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL); 3011 if (!atsru) 3012 return -ENOMEM; 3013 3014 /* 3015 * If memory is allocated from slab by ACPI _DSM method, we need to 3016 * copy the memory content because the memory buffer will be freed 3017 * on return. 3018 */ 3019 atsru->hdr = (void *)(atsru + 1); 3020 memcpy(atsru->hdr, hdr, hdr->length); 3021 atsru->include_all = atsr->flags & 0x1; 3022 if (!atsru->include_all) { 3023 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1), 3024 (void *)atsr + atsr->header.length, 3025 &atsru->devices_cnt); 3026 if (atsru->devices_cnt && atsru->devices == NULL) { 3027 kfree(atsru); 3028 return -ENOMEM; 3029 } 3030 } 3031 3032 list_add_rcu(&atsru->list, &dmar_atsr_units); 3033 3034 return 0; 3035 } 3036 3037 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru) 3038 { 3039 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt); 3040 kfree(atsru); 3041 } 3042 3043 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg) 3044 { 3045 struct acpi_dmar_atsr *atsr; 3046 struct dmar_atsr_unit *atsru; 3047 3048 atsr = container_of(hdr, struct acpi_dmar_atsr, header); 3049 atsru = dmar_find_atsr(atsr); 3050 if (atsru) { 3051 list_del_rcu(&atsru->list); 3052 synchronize_rcu(); 3053 intel_iommu_free_atsr(atsru); 3054 } 3055 3056 return 0; 3057 } 3058 3059 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg) 3060 { 3061 int i; 3062 struct device *dev; 3063 struct acpi_dmar_atsr *atsr; 3064 struct dmar_atsr_unit *atsru; 3065 3066 atsr = container_of(hdr, struct acpi_dmar_atsr, header); 3067 atsru = dmar_find_atsr(atsr); 3068 if (!atsru) 3069 return 0; 3070 3071 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) { 3072 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt, 3073 i, dev) 3074 return -EBUSY; 3075 } 3076 3077 return 0; 3078 } 3079 3080 static struct dmar_satc_unit *dmar_find_satc(struct acpi_dmar_satc *satc) 3081 { 3082 struct dmar_satc_unit *satcu; 3083 struct acpi_dmar_satc *tmp; 3084 3085 list_for_each_entry_rcu(satcu, &dmar_satc_units, list, 3086 dmar_rcu_check()) { 3087 tmp = (struct acpi_dmar_satc *)satcu->hdr; 3088 if (satc->segment != tmp->segment) 3089 continue; 3090 if (satc->header.length != tmp->header.length) 3091 continue; 3092 if (memcmp(satc, tmp, satc->header.length) == 0) 3093 return satcu; 3094 } 3095 3096 return NULL; 3097 } 3098 3099 int dmar_parse_one_satc(struct acpi_dmar_header *hdr, void *arg) 3100 { 3101 struct acpi_dmar_satc *satc; 3102 struct dmar_satc_unit *satcu; 3103 3104 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled) 3105 return 0; 3106 3107 satc = container_of(hdr, struct acpi_dmar_satc, header); 3108 satcu = dmar_find_satc(satc); 3109 if (satcu) 3110 return 0; 3111 3112 satcu = kzalloc(sizeof(*satcu) + hdr->length, GFP_KERNEL); 3113 if (!satcu) 3114 return -ENOMEM; 3115 3116 satcu->hdr = (void *)(satcu + 1); 3117 memcpy(satcu->hdr, hdr, hdr->length); 3118 satcu->atc_required = satc->flags & 0x1; 3119 satcu->devices = dmar_alloc_dev_scope((void *)(satc + 1), 3120 (void *)satc + satc->header.length, 3121 &satcu->devices_cnt); 3122 if (satcu->devices_cnt && !satcu->devices) { 3123 kfree(satcu); 3124 return -ENOMEM; 3125 } 3126 list_add_rcu(&satcu->list, &dmar_satc_units); 3127 3128 return 0; 3129 } 3130 3131 static int intel_iommu_add(struct dmar_drhd_unit *dmaru) 3132 { 3133 int sp, ret; 3134 struct intel_iommu *iommu = dmaru->iommu; 3135 3136 ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_DMAR, iommu); 3137 if (ret) 3138 goto out; 3139 3140 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) { 3141 pr_warn("%s: Doesn't support hardware pass through.\n", 3142 iommu->name); 3143 return -ENXIO; 3144 } 3145 3146 sp = domain_update_iommu_superpage(NULL, iommu) - 1; 3147 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) { 3148 pr_warn("%s: Doesn't support large page.\n", 3149 iommu->name); 3150 return -ENXIO; 3151 } 3152 3153 /* 3154 * Disable translation if already enabled prior to OS handover. 3155 */ 3156 if (iommu->gcmd & DMA_GCMD_TE) 3157 iommu_disable_translation(iommu); 3158 3159 ret = iommu_init_domains(iommu); 3160 if (ret == 0) 3161 ret = iommu_alloc_root_entry(iommu); 3162 if (ret) 3163 goto out; 3164 3165 intel_svm_check(iommu); 3166 3167 if (dmaru->ignored) { 3168 /* 3169 * we always have to disable PMRs or DMA may fail on this device 3170 */ 3171 if (force_on) 3172 iommu_disable_protect_mem_regions(iommu); 3173 return 0; 3174 } 3175 3176 intel_iommu_init_qi(iommu); 3177 iommu_flush_write_buffer(iommu); 3178 3179 #ifdef CONFIG_INTEL_IOMMU_SVM 3180 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) { 3181 ret = intel_svm_enable_prq(iommu); 3182 if (ret) 3183 goto disable_iommu; 3184 } 3185 #endif 3186 ret = dmar_set_interrupt(iommu); 3187 if (ret) 3188 goto disable_iommu; 3189 3190 iommu_set_root_entry(iommu); 3191 iommu_enable_translation(iommu); 3192 3193 iommu_disable_protect_mem_regions(iommu); 3194 return 0; 3195 3196 disable_iommu: 3197 disable_dmar_iommu(iommu); 3198 out: 3199 free_dmar_iommu(iommu); 3200 return ret; 3201 } 3202 3203 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert) 3204 { 3205 int ret = 0; 3206 struct intel_iommu *iommu = dmaru->iommu; 3207 3208 if (!intel_iommu_enabled) 3209 return 0; 3210 if (iommu == NULL) 3211 return -EINVAL; 3212 3213 if (insert) { 3214 ret = intel_iommu_add(dmaru); 3215 } else { 3216 disable_dmar_iommu(iommu); 3217 free_dmar_iommu(iommu); 3218 } 3219 3220 return ret; 3221 } 3222 3223 static void intel_iommu_free_dmars(void) 3224 { 3225 struct dmar_rmrr_unit *rmrru, *rmrr_n; 3226 struct dmar_atsr_unit *atsru, *atsr_n; 3227 struct dmar_satc_unit *satcu, *satc_n; 3228 3229 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) { 3230 list_del(&rmrru->list); 3231 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt); 3232 kfree(rmrru); 3233 } 3234 3235 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) { 3236 list_del(&atsru->list); 3237 intel_iommu_free_atsr(atsru); 3238 } 3239 list_for_each_entry_safe(satcu, satc_n, &dmar_satc_units, list) { 3240 list_del(&satcu->list); 3241 dmar_free_dev_scope(&satcu->devices, &satcu->devices_cnt); 3242 kfree(satcu); 3243 } 3244 } 3245 3246 static struct dmar_satc_unit *dmar_find_matched_satc_unit(struct pci_dev *dev) 3247 { 3248 struct dmar_satc_unit *satcu; 3249 struct acpi_dmar_satc *satc; 3250 struct device *tmp; 3251 int i; 3252 3253 dev = pci_physfn(dev); 3254 rcu_read_lock(); 3255 3256 list_for_each_entry_rcu(satcu, &dmar_satc_units, list) { 3257 satc = container_of(satcu->hdr, struct acpi_dmar_satc, header); 3258 if (satc->segment != pci_domain_nr(dev->bus)) 3259 continue; 3260 for_each_dev_scope(satcu->devices, satcu->devices_cnt, i, tmp) 3261 if (to_pci_dev(tmp) == dev) 3262 goto out; 3263 } 3264 satcu = NULL; 3265 out: 3266 rcu_read_unlock(); 3267 return satcu; 3268 } 3269 3270 static int dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu) 3271 { 3272 int i, ret = 1; 3273 struct pci_bus *bus; 3274 struct pci_dev *bridge = NULL; 3275 struct device *tmp; 3276 struct acpi_dmar_atsr *atsr; 3277 struct dmar_atsr_unit *atsru; 3278 struct dmar_satc_unit *satcu; 3279 3280 dev = pci_physfn(dev); 3281 satcu = dmar_find_matched_satc_unit(dev); 3282 if (satcu) 3283 /* 3284 * This device supports ATS as it is in SATC table. 3285 * When IOMMU is in legacy mode, enabling ATS is done 3286 * automatically by HW for the device that requires 3287 * ATS, hence OS should not enable this device ATS 3288 * to avoid duplicated TLB invalidation. 3289 */ 3290 return !(satcu->atc_required && !sm_supported(iommu)); 3291 3292 for (bus = dev->bus; bus; bus = bus->parent) { 3293 bridge = bus->self; 3294 /* If it's an integrated device, allow ATS */ 3295 if (!bridge) 3296 return 1; 3297 /* Connected via non-PCIe: no ATS */ 3298 if (!pci_is_pcie(bridge) || 3299 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) 3300 return 0; 3301 /* If we found the root port, look it up in the ATSR */ 3302 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) 3303 break; 3304 } 3305 3306 rcu_read_lock(); 3307 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) { 3308 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); 3309 if (atsr->segment != pci_domain_nr(dev->bus)) 3310 continue; 3311 3312 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp) 3313 if (tmp == &bridge->dev) 3314 goto out; 3315 3316 if (atsru->include_all) 3317 goto out; 3318 } 3319 ret = 0; 3320 out: 3321 rcu_read_unlock(); 3322 3323 return ret; 3324 } 3325 3326 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) 3327 { 3328 int ret; 3329 struct dmar_rmrr_unit *rmrru; 3330 struct dmar_atsr_unit *atsru; 3331 struct dmar_satc_unit *satcu; 3332 struct acpi_dmar_atsr *atsr; 3333 struct acpi_dmar_reserved_memory *rmrr; 3334 struct acpi_dmar_satc *satc; 3335 3336 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING) 3337 return 0; 3338 3339 list_for_each_entry(rmrru, &dmar_rmrr_units, list) { 3340 rmrr = container_of(rmrru->hdr, 3341 struct acpi_dmar_reserved_memory, header); 3342 if (info->event == BUS_NOTIFY_ADD_DEVICE) { 3343 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1), 3344 ((void *)rmrr) + rmrr->header.length, 3345 rmrr->segment, rmrru->devices, 3346 rmrru->devices_cnt); 3347 if (ret < 0) 3348 return ret; 3349 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) { 3350 dmar_remove_dev_scope(info, rmrr->segment, 3351 rmrru->devices, rmrru->devices_cnt); 3352 } 3353 } 3354 3355 list_for_each_entry(atsru, &dmar_atsr_units, list) { 3356 if (atsru->include_all) 3357 continue; 3358 3359 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); 3360 if (info->event == BUS_NOTIFY_ADD_DEVICE) { 3361 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1), 3362 (void *)atsr + atsr->header.length, 3363 atsr->segment, atsru->devices, 3364 atsru->devices_cnt); 3365 if (ret > 0) 3366 break; 3367 else if (ret < 0) 3368 return ret; 3369 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) { 3370 if (dmar_remove_dev_scope(info, atsr->segment, 3371 atsru->devices, atsru->devices_cnt)) 3372 break; 3373 } 3374 } 3375 list_for_each_entry(satcu, &dmar_satc_units, list) { 3376 satc = container_of(satcu->hdr, struct acpi_dmar_satc, header); 3377 if (info->event == BUS_NOTIFY_ADD_DEVICE) { 3378 ret = dmar_insert_dev_scope(info, (void *)(satc + 1), 3379 (void *)satc + satc->header.length, 3380 satc->segment, satcu->devices, 3381 satcu->devices_cnt); 3382 if (ret > 0) 3383 break; 3384 else if (ret < 0) 3385 return ret; 3386 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) { 3387 if (dmar_remove_dev_scope(info, satc->segment, 3388 satcu->devices, satcu->devices_cnt)) 3389 break; 3390 } 3391 } 3392 3393 return 0; 3394 } 3395 3396 static int intel_iommu_memory_notifier(struct notifier_block *nb, 3397 unsigned long val, void *v) 3398 { 3399 struct memory_notify *mhp = v; 3400 unsigned long start_vpfn = mm_to_dma_pfn_start(mhp->start_pfn); 3401 unsigned long last_vpfn = mm_to_dma_pfn_end(mhp->start_pfn + 3402 mhp->nr_pages - 1); 3403 3404 switch (val) { 3405 case MEM_GOING_ONLINE: 3406 if (iommu_domain_identity_map(si_domain, 3407 start_vpfn, last_vpfn)) { 3408 pr_warn("Failed to build identity map for [%lx-%lx]\n", 3409 start_vpfn, last_vpfn); 3410 return NOTIFY_BAD; 3411 } 3412 break; 3413 3414 case MEM_OFFLINE: 3415 case MEM_CANCEL_ONLINE: 3416 { 3417 struct dmar_drhd_unit *drhd; 3418 struct intel_iommu *iommu; 3419 LIST_HEAD(freelist); 3420 3421 domain_unmap(si_domain, start_vpfn, last_vpfn, &freelist); 3422 3423 rcu_read_lock(); 3424 for_each_active_iommu(iommu, drhd) 3425 iommu_flush_iotlb_psi(iommu, si_domain, 3426 start_vpfn, mhp->nr_pages, 3427 list_empty(&freelist), 0); 3428 rcu_read_unlock(); 3429 put_pages_list(&freelist); 3430 } 3431 break; 3432 } 3433 3434 return NOTIFY_OK; 3435 } 3436 3437 static struct notifier_block intel_iommu_memory_nb = { 3438 .notifier_call = intel_iommu_memory_notifier, 3439 .priority = 0 3440 }; 3441 3442 static void intel_disable_iommus(void) 3443 { 3444 struct intel_iommu *iommu = NULL; 3445 struct dmar_drhd_unit *drhd; 3446 3447 for_each_iommu(iommu, drhd) 3448 iommu_disable_translation(iommu); 3449 } 3450 3451 void intel_iommu_shutdown(void) 3452 { 3453 struct dmar_drhd_unit *drhd; 3454 struct intel_iommu *iommu = NULL; 3455 3456 if (no_iommu || dmar_disabled) 3457 return; 3458 3459 down_write(&dmar_global_lock); 3460 3461 /* Disable PMRs explicitly here. */ 3462 for_each_iommu(iommu, drhd) 3463 iommu_disable_protect_mem_regions(iommu); 3464 3465 /* Make sure the IOMMUs are switched off */ 3466 intel_disable_iommus(); 3467 3468 up_write(&dmar_global_lock); 3469 } 3470 3471 static struct intel_iommu *dev_to_intel_iommu(struct device *dev) 3472 { 3473 struct iommu_device *iommu_dev = dev_to_iommu_device(dev); 3474 3475 return container_of(iommu_dev, struct intel_iommu, iommu); 3476 } 3477 3478 static ssize_t version_show(struct device *dev, 3479 struct device_attribute *attr, char *buf) 3480 { 3481 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 3482 u32 ver = readl(iommu->reg + DMAR_VER_REG); 3483 return sysfs_emit(buf, "%d:%d\n", 3484 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver)); 3485 } 3486 static DEVICE_ATTR_RO(version); 3487 3488 static ssize_t address_show(struct device *dev, 3489 struct device_attribute *attr, char *buf) 3490 { 3491 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 3492 return sysfs_emit(buf, "%llx\n", iommu->reg_phys); 3493 } 3494 static DEVICE_ATTR_RO(address); 3495 3496 static ssize_t cap_show(struct device *dev, 3497 struct device_attribute *attr, char *buf) 3498 { 3499 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 3500 return sysfs_emit(buf, "%llx\n", iommu->cap); 3501 } 3502 static DEVICE_ATTR_RO(cap); 3503 3504 static ssize_t ecap_show(struct device *dev, 3505 struct device_attribute *attr, char *buf) 3506 { 3507 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 3508 return sysfs_emit(buf, "%llx\n", iommu->ecap); 3509 } 3510 static DEVICE_ATTR_RO(ecap); 3511 3512 static ssize_t domains_supported_show(struct device *dev, 3513 struct device_attribute *attr, char *buf) 3514 { 3515 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 3516 return sysfs_emit(buf, "%ld\n", cap_ndoms(iommu->cap)); 3517 } 3518 static DEVICE_ATTR_RO(domains_supported); 3519 3520 static ssize_t domains_used_show(struct device *dev, 3521 struct device_attribute *attr, char *buf) 3522 { 3523 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 3524 return sysfs_emit(buf, "%d\n", 3525 bitmap_weight(iommu->domain_ids, 3526 cap_ndoms(iommu->cap))); 3527 } 3528 static DEVICE_ATTR_RO(domains_used); 3529 3530 static struct attribute *intel_iommu_attrs[] = { 3531 &dev_attr_version.attr, 3532 &dev_attr_address.attr, 3533 &dev_attr_cap.attr, 3534 &dev_attr_ecap.attr, 3535 &dev_attr_domains_supported.attr, 3536 &dev_attr_domains_used.attr, 3537 NULL, 3538 }; 3539 3540 static struct attribute_group intel_iommu_group = { 3541 .name = "intel-iommu", 3542 .attrs = intel_iommu_attrs, 3543 }; 3544 3545 const struct attribute_group *intel_iommu_groups[] = { 3546 &intel_iommu_group, 3547 NULL, 3548 }; 3549 3550 static bool has_external_pci(void) 3551 { 3552 struct pci_dev *pdev = NULL; 3553 3554 for_each_pci_dev(pdev) 3555 if (pdev->external_facing) { 3556 pci_dev_put(pdev); 3557 return true; 3558 } 3559 3560 return false; 3561 } 3562 3563 static int __init platform_optin_force_iommu(void) 3564 { 3565 if (!dmar_platform_optin() || no_platform_optin || !has_external_pci()) 3566 return 0; 3567 3568 if (no_iommu || dmar_disabled) 3569 pr_info("Intel-IOMMU force enabled due to platform opt in\n"); 3570 3571 /* 3572 * If Intel-IOMMU is disabled by default, we will apply identity 3573 * map for all devices except those marked as being untrusted. 3574 */ 3575 if (dmar_disabled) 3576 iommu_set_default_passthrough(false); 3577 3578 dmar_disabled = 0; 3579 no_iommu = 0; 3580 3581 return 1; 3582 } 3583 3584 static int __init probe_acpi_namespace_devices(void) 3585 { 3586 struct dmar_drhd_unit *drhd; 3587 /* To avoid a -Wunused-but-set-variable warning. */ 3588 struct intel_iommu *iommu __maybe_unused; 3589 struct device *dev; 3590 int i, ret = 0; 3591 3592 for_each_active_iommu(iommu, drhd) { 3593 for_each_active_dev_scope(drhd->devices, 3594 drhd->devices_cnt, i, dev) { 3595 struct acpi_device_physical_node *pn; 3596 struct acpi_device *adev; 3597 3598 if (dev->bus != &acpi_bus_type) 3599 continue; 3600 3601 adev = to_acpi_device(dev); 3602 mutex_lock(&adev->physical_node_lock); 3603 list_for_each_entry(pn, 3604 &adev->physical_node_list, node) { 3605 ret = iommu_probe_device(pn->dev); 3606 if (ret) 3607 break; 3608 } 3609 mutex_unlock(&adev->physical_node_lock); 3610 3611 if (ret) 3612 return ret; 3613 } 3614 } 3615 3616 return 0; 3617 } 3618 3619 static __init int tboot_force_iommu(void) 3620 { 3621 if (!tboot_enabled()) 3622 return 0; 3623 3624 if (no_iommu || dmar_disabled) 3625 pr_warn("Forcing Intel-IOMMU to enabled\n"); 3626 3627 dmar_disabled = 0; 3628 no_iommu = 0; 3629 3630 return 1; 3631 } 3632 3633 int __init intel_iommu_init(void) 3634 { 3635 int ret = -ENODEV; 3636 struct dmar_drhd_unit *drhd; 3637 struct intel_iommu *iommu; 3638 3639 /* 3640 * Intel IOMMU is required for a TXT/tboot launch or platform 3641 * opt in, so enforce that. 3642 */ 3643 force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) || 3644 platform_optin_force_iommu(); 3645 3646 down_write(&dmar_global_lock); 3647 if (dmar_table_init()) { 3648 if (force_on) 3649 panic("tboot: Failed to initialize DMAR table\n"); 3650 goto out_free_dmar; 3651 } 3652 3653 if (dmar_dev_scope_init() < 0) { 3654 if (force_on) 3655 panic("tboot: Failed to initialize DMAR device scope\n"); 3656 goto out_free_dmar; 3657 } 3658 3659 up_write(&dmar_global_lock); 3660 3661 /* 3662 * The bus notifier takes the dmar_global_lock, so lockdep will 3663 * complain later when we register it under the lock. 3664 */ 3665 dmar_register_bus_notifier(); 3666 3667 down_write(&dmar_global_lock); 3668 3669 if (!no_iommu) 3670 intel_iommu_debugfs_init(); 3671 3672 if (no_iommu || dmar_disabled) { 3673 /* 3674 * We exit the function here to ensure IOMMU's remapping and 3675 * mempool aren't setup, which means that the IOMMU's PMRs 3676 * won't be disabled via the call to init_dmars(). So disable 3677 * it explicitly here. The PMRs were setup by tboot prior to 3678 * calling SENTER, but the kernel is expected to reset/tear 3679 * down the PMRs. 3680 */ 3681 if (intel_iommu_tboot_noforce) { 3682 for_each_iommu(iommu, drhd) 3683 iommu_disable_protect_mem_regions(iommu); 3684 } 3685 3686 /* 3687 * Make sure the IOMMUs are switched off, even when we 3688 * boot into a kexec kernel and the previous kernel left 3689 * them enabled 3690 */ 3691 intel_disable_iommus(); 3692 goto out_free_dmar; 3693 } 3694 3695 if (list_empty(&dmar_rmrr_units)) 3696 pr_info("No RMRR found\n"); 3697 3698 if (list_empty(&dmar_atsr_units)) 3699 pr_info("No ATSR found\n"); 3700 3701 if (list_empty(&dmar_satc_units)) 3702 pr_info("No SATC found\n"); 3703 3704 init_no_remapping_devices(); 3705 3706 ret = init_dmars(); 3707 if (ret) { 3708 if (force_on) 3709 panic("tboot: Failed to initialize DMARs\n"); 3710 pr_err("Initialization failed\n"); 3711 goto out_free_dmar; 3712 } 3713 up_write(&dmar_global_lock); 3714 3715 init_iommu_pm_ops(); 3716 3717 down_read(&dmar_global_lock); 3718 for_each_active_iommu(iommu, drhd) { 3719 /* 3720 * The flush queue implementation does not perform 3721 * page-selective invalidations that are required for efficient 3722 * TLB flushes in virtual environments. The benefit of batching 3723 * is likely to be much lower than the overhead of synchronizing 3724 * the virtual and physical IOMMU page-tables. 3725 */ 3726 if (cap_caching_mode(iommu->cap) && 3727 !first_level_by_default(IOMMU_DOMAIN_DMA)) { 3728 pr_info_once("IOMMU batching disallowed due to virtualization\n"); 3729 iommu_set_dma_strict(); 3730 } 3731 iommu_device_sysfs_add(&iommu->iommu, NULL, 3732 intel_iommu_groups, 3733 "%s", iommu->name); 3734 iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL); 3735 3736 iommu_pmu_register(iommu); 3737 } 3738 up_read(&dmar_global_lock); 3739 3740 if (si_domain && !hw_pass_through) 3741 register_memory_notifier(&intel_iommu_memory_nb); 3742 3743 down_read(&dmar_global_lock); 3744 if (probe_acpi_namespace_devices()) 3745 pr_warn("ACPI name space devices didn't probe correctly\n"); 3746 3747 /* Finally, we enable the DMA remapping hardware. */ 3748 for_each_iommu(iommu, drhd) { 3749 if (!drhd->ignored && !translation_pre_enabled(iommu)) 3750 iommu_enable_translation(iommu); 3751 3752 iommu_disable_protect_mem_regions(iommu); 3753 } 3754 up_read(&dmar_global_lock); 3755 3756 pr_info("Intel(R) Virtualization Technology for Directed I/O\n"); 3757 3758 intel_iommu_enabled = 1; 3759 3760 return 0; 3761 3762 out_free_dmar: 3763 intel_iommu_free_dmars(); 3764 up_write(&dmar_global_lock); 3765 return ret; 3766 } 3767 3768 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque) 3769 { 3770 struct device_domain_info *info = opaque; 3771 3772 domain_context_clear_one(info, PCI_BUS_NUM(alias), alias & 0xff); 3773 return 0; 3774 } 3775 3776 /* 3777 * NB - intel-iommu lacks any sort of reference counting for the users of 3778 * dependent devices. If multiple endpoints have intersecting dependent 3779 * devices, unbinding the driver from any one of them will possibly leave 3780 * the others unable to operate. 3781 */ 3782 static void domain_context_clear(struct device_domain_info *info) 3783 { 3784 if (!dev_is_pci(info->dev)) 3785 domain_context_clear_one(info, info->bus, info->devfn); 3786 3787 pci_for_each_dma_alias(to_pci_dev(info->dev), 3788 &domain_context_clear_one_cb, info); 3789 } 3790 3791 /* 3792 * Clear the page table pointer in context or pasid table entries so that 3793 * all DMA requests without PASID from the device are blocked. If the page 3794 * table has been set, clean up the data structures. 3795 */ 3796 void device_block_translation(struct device *dev) 3797 { 3798 struct device_domain_info *info = dev_iommu_priv_get(dev); 3799 struct intel_iommu *iommu = info->iommu; 3800 unsigned long flags; 3801 3802 iommu_disable_pci_caps(info); 3803 if (!dev_is_real_dma_subdevice(dev)) { 3804 if (sm_supported(iommu)) 3805 intel_pasid_tear_down_entry(iommu, dev, 3806 IOMMU_NO_PASID, false); 3807 else 3808 domain_context_clear(info); 3809 } 3810 3811 if (!info->domain) 3812 return; 3813 3814 spin_lock_irqsave(&info->domain->lock, flags); 3815 list_del(&info->link); 3816 spin_unlock_irqrestore(&info->domain->lock, flags); 3817 3818 domain_detach_iommu(info->domain, iommu); 3819 info->domain = NULL; 3820 } 3821 3822 static int md_domain_init(struct dmar_domain *domain, int guest_width) 3823 { 3824 int adjust_width; 3825 3826 /* calculate AGAW */ 3827 domain->gaw = guest_width; 3828 adjust_width = guestwidth_to_adjustwidth(guest_width); 3829 domain->agaw = width_to_agaw(adjust_width); 3830 3831 domain->iommu_coherency = false; 3832 domain->iommu_superpage = 0; 3833 domain->max_addr = 0; 3834 3835 /* always allocate the top pgd */ 3836 domain->pgd = alloc_pgtable_page(domain->nid, GFP_ATOMIC); 3837 if (!domain->pgd) 3838 return -ENOMEM; 3839 domain_flush_cache(domain, domain->pgd, PAGE_SIZE); 3840 return 0; 3841 } 3842 3843 static int blocking_domain_attach_dev(struct iommu_domain *domain, 3844 struct device *dev) 3845 { 3846 device_block_translation(dev); 3847 return 0; 3848 } 3849 3850 static struct iommu_domain blocking_domain = { 3851 .type = IOMMU_DOMAIN_BLOCKED, 3852 .ops = &(const struct iommu_domain_ops) { 3853 .attach_dev = blocking_domain_attach_dev, 3854 } 3855 }; 3856 3857 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) 3858 { 3859 struct dmar_domain *dmar_domain; 3860 struct iommu_domain *domain; 3861 3862 switch (type) { 3863 case IOMMU_DOMAIN_DMA: 3864 case IOMMU_DOMAIN_UNMANAGED: 3865 dmar_domain = alloc_domain(type); 3866 if (!dmar_domain) { 3867 pr_err("Can't allocate dmar_domain\n"); 3868 return NULL; 3869 } 3870 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { 3871 pr_err("Domain initialization failed\n"); 3872 domain_exit(dmar_domain); 3873 return NULL; 3874 } 3875 3876 domain = &dmar_domain->domain; 3877 domain->geometry.aperture_start = 0; 3878 domain->geometry.aperture_end = 3879 __DOMAIN_MAX_ADDR(dmar_domain->gaw); 3880 domain->geometry.force_aperture = true; 3881 3882 return domain; 3883 case IOMMU_DOMAIN_IDENTITY: 3884 return &si_domain->domain; 3885 case IOMMU_DOMAIN_SVA: 3886 return intel_svm_domain_alloc(); 3887 default: 3888 return NULL; 3889 } 3890 3891 return NULL; 3892 } 3893 3894 static struct iommu_domain * 3895 intel_iommu_domain_alloc_user(struct device *dev, u32 flags, 3896 struct iommu_domain *parent, 3897 const struct iommu_user_data *user_data) 3898 { 3899 struct device_domain_info *info = dev_iommu_priv_get(dev); 3900 bool dirty_tracking = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; 3901 bool nested_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT; 3902 struct intel_iommu *iommu = info->iommu; 3903 struct dmar_domain *dmar_domain; 3904 struct iommu_domain *domain; 3905 3906 /* Must be NESTING domain */ 3907 if (parent) { 3908 if (!nested_supported(iommu) || flags) 3909 return ERR_PTR(-EOPNOTSUPP); 3910 return intel_nested_domain_alloc(parent, user_data); 3911 } 3912 3913 if (flags & 3914 (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING))) 3915 return ERR_PTR(-EOPNOTSUPP); 3916 if (nested_parent && !nested_supported(iommu)) 3917 return ERR_PTR(-EOPNOTSUPP); 3918 if (user_data || (dirty_tracking && !ssads_supported(iommu))) 3919 return ERR_PTR(-EOPNOTSUPP); 3920 3921 /* 3922 * domain_alloc_user op needs to fully initialize a domain before 3923 * return, so uses iommu_domain_alloc() here for simple. 3924 */ 3925 domain = iommu_domain_alloc(dev->bus); 3926 if (!domain) 3927 return ERR_PTR(-ENOMEM); 3928 3929 dmar_domain = to_dmar_domain(domain); 3930 3931 if (nested_parent) { 3932 dmar_domain->nested_parent = true; 3933 INIT_LIST_HEAD(&dmar_domain->s1_domains); 3934 spin_lock_init(&dmar_domain->s1_lock); 3935 } 3936 3937 if (dirty_tracking) { 3938 if (dmar_domain->use_first_level) { 3939 iommu_domain_free(domain); 3940 return ERR_PTR(-EOPNOTSUPP); 3941 } 3942 domain->dirty_ops = &intel_dirty_ops; 3943 } 3944 3945 return domain; 3946 } 3947 3948 static void intel_iommu_domain_free(struct iommu_domain *domain) 3949 { 3950 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 3951 3952 WARN_ON(dmar_domain->nested_parent && 3953 !list_empty(&dmar_domain->s1_domains)); 3954 if (domain != &si_domain->domain) 3955 domain_exit(dmar_domain); 3956 } 3957 3958 int prepare_domain_attach_device(struct iommu_domain *domain, 3959 struct device *dev) 3960 { 3961 struct device_domain_info *info = dev_iommu_priv_get(dev); 3962 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 3963 struct intel_iommu *iommu = info->iommu; 3964 int addr_width; 3965 3966 if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap)) 3967 return -EINVAL; 3968 3969 if (domain->dirty_ops && !ssads_supported(iommu)) 3970 return -EINVAL; 3971 3972 /* check if this iommu agaw is sufficient for max mapped address */ 3973 addr_width = agaw_to_width(iommu->agaw); 3974 if (addr_width > cap_mgaw(iommu->cap)) 3975 addr_width = cap_mgaw(iommu->cap); 3976 3977 if (dmar_domain->max_addr > (1LL << addr_width)) 3978 return -EINVAL; 3979 dmar_domain->gaw = addr_width; 3980 3981 /* 3982 * Knock out extra levels of page tables if necessary 3983 */ 3984 while (iommu->agaw < dmar_domain->agaw) { 3985 struct dma_pte *pte; 3986 3987 pte = dmar_domain->pgd; 3988 if (dma_pte_present(pte)) { 3989 dmar_domain->pgd = phys_to_virt(dma_pte_addr(pte)); 3990 free_pgtable_page(pte); 3991 } 3992 dmar_domain->agaw--; 3993 } 3994 3995 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) && 3996 context_copied(iommu, info->bus, info->devfn)) 3997 return intel_pasid_setup_sm_context(dev); 3998 3999 return 0; 4000 } 4001 4002 static int intel_iommu_attach_device(struct iommu_domain *domain, 4003 struct device *dev) 4004 { 4005 struct device_domain_info *info = dev_iommu_priv_get(dev); 4006 int ret; 4007 4008 if (info->domain) 4009 device_block_translation(dev); 4010 4011 ret = prepare_domain_attach_device(domain, dev); 4012 if (ret) 4013 return ret; 4014 4015 return dmar_domain_attach_device(to_dmar_domain(domain), dev); 4016 } 4017 4018 static int intel_iommu_map(struct iommu_domain *domain, 4019 unsigned long iova, phys_addr_t hpa, 4020 size_t size, int iommu_prot, gfp_t gfp) 4021 { 4022 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 4023 u64 max_addr; 4024 int prot = 0; 4025 4026 if (iommu_prot & IOMMU_READ) 4027 prot |= DMA_PTE_READ; 4028 if (iommu_prot & IOMMU_WRITE) 4029 prot |= DMA_PTE_WRITE; 4030 if (dmar_domain->set_pte_snp) 4031 prot |= DMA_PTE_SNP; 4032 4033 max_addr = iova + size; 4034 if (dmar_domain->max_addr < max_addr) { 4035 u64 end; 4036 4037 /* check if minimum agaw is sufficient for mapped address */ 4038 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1; 4039 if (end < max_addr) { 4040 pr_err("%s: iommu width (%d) is not " 4041 "sufficient for the mapped address (%llx)\n", 4042 __func__, dmar_domain->gaw, max_addr); 4043 return -EFAULT; 4044 } 4045 dmar_domain->max_addr = max_addr; 4046 } 4047 /* Round up size to next multiple of PAGE_SIZE, if it and 4048 the low bits of hpa would take us onto the next page */ 4049 size = aligned_nrpages(hpa, size); 4050 return __domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT, 4051 hpa >> VTD_PAGE_SHIFT, size, prot, gfp); 4052 } 4053 4054 static int intel_iommu_map_pages(struct iommu_domain *domain, 4055 unsigned long iova, phys_addr_t paddr, 4056 size_t pgsize, size_t pgcount, 4057 int prot, gfp_t gfp, size_t *mapped) 4058 { 4059 unsigned long pgshift = __ffs(pgsize); 4060 size_t size = pgcount << pgshift; 4061 int ret; 4062 4063 if (pgsize != SZ_4K && pgsize != SZ_2M && pgsize != SZ_1G) 4064 return -EINVAL; 4065 4066 if (!IS_ALIGNED(iova | paddr, pgsize)) 4067 return -EINVAL; 4068 4069 ret = intel_iommu_map(domain, iova, paddr, size, prot, gfp); 4070 if (!ret && mapped) 4071 *mapped = size; 4072 4073 return ret; 4074 } 4075 4076 static size_t intel_iommu_unmap(struct iommu_domain *domain, 4077 unsigned long iova, size_t size, 4078 struct iommu_iotlb_gather *gather) 4079 { 4080 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 4081 unsigned long start_pfn, last_pfn; 4082 int level = 0; 4083 4084 /* Cope with horrid API which requires us to unmap more than the 4085 size argument if it happens to be a large-page mapping. */ 4086 if (unlikely(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 4087 &level, GFP_ATOMIC))) 4088 return 0; 4089 4090 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level)) 4091 size = VTD_PAGE_SIZE << level_to_offset_bits(level); 4092 4093 start_pfn = iova >> VTD_PAGE_SHIFT; 4094 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT; 4095 4096 domain_unmap(dmar_domain, start_pfn, last_pfn, &gather->freelist); 4097 4098 if (dmar_domain->max_addr == iova + size) 4099 dmar_domain->max_addr = iova; 4100 4101 /* 4102 * We do not use page-selective IOTLB invalidation in flush queue, 4103 * so there is no need to track page and sync iotlb. 4104 */ 4105 if (!iommu_iotlb_gather_queued(gather)) 4106 iommu_iotlb_gather_add_page(domain, gather, iova, size); 4107 4108 return size; 4109 } 4110 4111 static size_t intel_iommu_unmap_pages(struct iommu_domain *domain, 4112 unsigned long iova, 4113 size_t pgsize, size_t pgcount, 4114 struct iommu_iotlb_gather *gather) 4115 { 4116 unsigned long pgshift = __ffs(pgsize); 4117 size_t size = pgcount << pgshift; 4118 4119 return intel_iommu_unmap(domain, iova, size, gather); 4120 } 4121 4122 static void intel_iommu_tlb_sync(struct iommu_domain *domain, 4123 struct iommu_iotlb_gather *gather) 4124 { 4125 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 4126 unsigned long iova_pfn = IOVA_PFN(gather->start); 4127 size_t size = gather->end - gather->start; 4128 struct iommu_domain_info *info; 4129 unsigned long start_pfn; 4130 unsigned long nrpages; 4131 unsigned long i; 4132 4133 nrpages = aligned_nrpages(gather->start, size); 4134 start_pfn = mm_to_dma_pfn_start(iova_pfn); 4135 4136 xa_for_each(&dmar_domain->iommu_array, i, info) 4137 iommu_flush_iotlb_psi(info->iommu, dmar_domain, 4138 start_pfn, nrpages, 4139 list_empty(&gather->freelist), 0); 4140 4141 if (dmar_domain->nested_parent) 4142 parent_domain_flush(dmar_domain, start_pfn, nrpages, 4143 list_empty(&gather->freelist)); 4144 put_pages_list(&gather->freelist); 4145 } 4146 4147 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, 4148 dma_addr_t iova) 4149 { 4150 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 4151 struct dma_pte *pte; 4152 int level = 0; 4153 u64 phys = 0; 4154 4155 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level, 4156 GFP_ATOMIC); 4157 if (pte && dma_pte_present(pte)) 4158 phys = dma_pte_addr(pte) + 4159 (iova & (BIT_MASK(level_to_offset_bits(level) + 4160 VTD_PAGE_SHIFT) - 1)); 4161 4162 return phys; 4163 } 4164 4165 static bool domain_support_force_snooping(struct dmar_domain *domain) 4166 { 4167 struct device_domain_info *info; 4168 bool support = true; 4169 4170 assert_spin_locked(&domain->lock); 4171 list_for_each_entry(info, &domain->devices, link) { 4172 if (!ecap_sc_support(info->iommu->ecap)) { 4173 support = false; 4174 break; 4175 } 4176 } 4177 4178 return support; 4179 } 4180 4181 static void domain_set_force_snooping(struct dmar_domain *domain) 4182 { 4183 struct device_domain_info *info; 4184 4185 assert_spin_locked(&domain->lock); 4186 /* 4187 * Second level page table supports per-PTE snoop control. The 4188 * iommu_map() interface will handle this by setting SNP bit. 4189 */ 4190 if (!domain->use_first_level) { 4191 domain->set_pte_snp = true; 4192 return; 4193 } 4194 4195 list_for_each_entry(info, &domain->devices, link) 4196 intel_pasid_setup_page_snoop_control(info->iommu, info->dev, 4197 IOMMU_NO_PASID); 4198 } 4199 4200 static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain) 4201 { 4202 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 4203 unsigned long flags; 4204 4205 if (dmar_domain->force_snooping) 4206 return true; 4207 4208 spin_lock_irqsave(&dmar_domain->lock, flags); 4209 if (!domain_support_force_snooping(dmar_domain) || 4210 (!dmar_domain->use_first_level && dmar_domain->has_mappings)) { 4211 spin_unlock_irqrestore(&dmar_domain->lock, flags); 4212 return false; 4213 } 4214 4215 domain_set_force_snooping(dmar_domain); 4216 dmar_domain->force_snooping = true; 4217 spin_unlock_irqrestore(&dmar_domain->lock, flags); 4218 4219 return true; 4220 } 4221 4222 static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap) 4223 { 4224 struct device_domain_info *info = dev_iommu_priv_get(dev); 4225 4226 switch (cap) { 4227 case IOMMU_CAP_CACHE_COHERENCY: 4228 case IOMMU_CAP_DEFERRED_FLUSH: 4229 return true; 4230 case IOMMU_CAP_PRE_BOOT_PROTECTION: 4231 return dmar_platform_optin(); 4232 case IOMMU_CAP_ENFORCE_CACHE_COHERENCY: 4233 return ecap_sc_support(info->iommu->ecap); 4234 case IOMMU_CAP_DIRTY_TRACKING: 4235 return ssads_supported(info->iommu); 4236 default: 4237 return false; 4238 } 4239 } 4240 4241 static struct iommu_device *intel_iommu_probe_device(struct device *dev) 4242 { 4243 struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL; 4244 struct device_domain_info *info; 4245 struct intel_iommu *iommu; 4246 u8 bus, devfn; 4247 int ret; 4248 4249 iommu = device_lookup_iommu(dev, &bus, &devfn); 4250 if (!iommu || !iommu->iommu.ops) 4251 return ERR_PTR(-ENODEV); 4252 4253 info = kzalloc(sizeof(*info), GFP_KERNEL); 4254 if (!info) 4255 return ERR_PTR(-ENOMEM); 4256 4257 if (dev_is_real_dma_subdevice(dev)) { 4258 info->bus = pdev->bus->number; 4259 info->devfn = pdev->devfn; 4260 info->segment = pci_domain_nr(pdev->bus); 4261 } else { 4262 info->bus = bus; 4263 info->devfn = devfn; 4264 info->segment = iommu->segment; 4265 } 4266 4267 info->dev = dev; 4268 info->iommu = iommu; 4269 if (dev_is_pci(dev)) { 4270 if (ecap_dev_iotlb_support(iommu->ecap) && 4271 pci_ats_supported(pdev) && 4272 dmar_ats_supported(pdev, iommu)) { 4273 info->ats_supported = 1; 4274 info->dtlb_extra_inval = dev_needs_extra_dtlb_flush(pdev); 4275 4276 /* 4277 * For IOMMU that supports device IOTLB throttling 4278 * (DIT), we assign PFSID to the invalidation desc 4279 * of a VF such that IOMMU HW can gauge queue depth 4280 * at PF level. If DIT is not set, PFSID will be 4281 * treated as reserved, which should be set to 0. 4282 */ 4283 if (ecap_dit(iommu->ecap)) 4284 info->pfsid = pci_dev_id(pci_physfn(pdev)); 4285 info->ats_qdep = pci_ats_queue_depth(pdev); 4286 } 4287 if (sm_supported(iommu)) { 4288 if (pasid_supported(iommu)) { 4289 int features = pci_pasid_features(pdev); 4290 4291 if (features >= 0) 4292 info->pasid_supported = features | 1; 4293 } 4294 4295 if (info->ats_supported && ecap_prs(iommu->ecap) && 4296 pci_pri_supported(pdev)) 4297 info->pri_supported = 1; 4298 } 4299 } 4300 4301 dev_iommu_priv_set(dev, info); 4302 if (pdev && pci_ats_supported(pdev)) { 4303 ret = device_rbtree_insert(iommu, info); 4304 if (ret) 4305 goto free; 4306 } 4307 4308 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { 4309 ret = intel_pasid_alloc_table(dev); 4310 if (ret) { 4311 dev_err(dev, "PASID table allocation failed\n"); 4312 goto clear_rbtree; 4313 } 4314 4315 if (!context_copied(iommu, info->bus, info->devfn)) { 4316 ret = intel_pasid_setup_sm_context(dev); 4317 if (ret) 4318 goto free_table; 4319 } 4320 } 4321 4322 intel_iommu_debugfs_create_dev(info); 4323 4324 return &iommu->iommu; 4325 free_table: 4326 intel_pasid_free_table(dev); 4327 clear_rbtree: 4328 device_rbtree_remove(info); 4329 free: 4330 kfree(info); 4331 4332 return ERR_PTR(ret); 4333 } 4334 4335 static void intel_iommu_release_device(struct device *dev) 4336 { 4337 struct device_domain_info *info = dev_iommu_priv_get(dev); 4338 struct intel_iommu *iommu = info->iommu; 4339 4340 mutex_lock(&iommu->iopf_lock); 4341 if (dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev))) 4342 device_rbtree_remove(info); 4343 mutex_unlock(&iommu->iopf_lock); 4344 4345 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) && 4346 !context_copied(iommu, info->bus, info->devfn)) 4347 intel_pasid_teardown_sm_context(dev); 4348 4349 intel_pasid_free_table(dev); 4350 intel_iommu_debugfs_remove_dev(info); 4351 kfree(info); 4352 set_dma_ops(dev, NULL); 4353 } 4354 4355 static void intel_iommu_probe_finalize(struct device *dev) 4356 { 4357 set_dma_ops(dev, NULL); 4358 iommu_setup_dma_ops(dev, 0, U64_MAX); 4359 } 4360 4361 static void intel_iommu_get_resv_regions(struct device *device, 4362 struct list_head *head) 4363 { 4364 int prot = DMA_PTE_READ | DMA_PTE_WRITE; 4365 struct iommu_resv_region *reg; 4366 struct dmar_rmrr_unit *rmrr; 4367 struct device *i_dev; 4368 int i; 4369 4370 rcu_read_lock(); 4371 for_each_rmrr_units(rmrr) { 4372 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, 4373 i, i_dev) { 4374 struct iommu_resv_region *resv; 4375 enum iommu_resv_type type; 4376 size_t length; 4377 4378 if (i_dev != device && 4379 !is_downstream_to_pci_bridge(device, i_dev)) 4380 continue; 4381 4382 length = rmrr->end_address - rmrr->base_address + 1; 4383 4384 type = device_rmrr_is_relaxable(device) ? 4385 IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT; 4386 4387 resv = iommu_alloc_resv_region(rmrr->base_address, 4388 length, prot, type, 4389 GFP_ATOMIC); 4390 if (!resv) 4391 break; 4392 4393 list_add_tail(&resv->list, head); 4394 } 4395 } 4396 rcu_read_unlock(); 4397 4398 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA 4399 if (dev_is_pci(device)) { 4400 struct pci_dev *pdev = to_pci_dev(device); 4401 4402 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) { 4403 reg = iommu_alloc_resv_region(0, 1UL << 24, prot, 4404 IOMMU_RESV_DIRECT_RELAXABLE, 4405 GFP_KERNEL); 4406 if (reg) 4407 list_add_tail(®->list, head); 4408 } 4409 } 4410 #endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */ 4411 4412 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START, 4413 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1, 4414 0, IOMMU_RESV_MSI, GFP_KERNEL); 4415 if (!reg) 4416 return; 4417 list_add_tail(®->list, head); 4418 } 4419 4420 static struct iommu_group *intel_iommu_device_group(struct device *dev) 4421 { 4422 if (dev_is_pci(dev)) 4423 return pci_device_group(dev); 4424 return generic_device_group(dev); 4425 } 4426 4427 static int intel_iommu_enable_sva(struct device *dev) 4428 { 4429 struct device_domain_info *info = dev_iommu_priv_get(dev); 4430 struct intel_iommu *iommu; 4431 4432 if (!info || dmar_disabled) 4433 return -EINVAL; 4434 4435 iommu = info->iommu; 4436 if (!iommu) 4437 return -EINVAL; 4438 4439 if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE)) 4440 return -ENODEV; 4441 4442 if (!info->pasid_enabled || !info->ats_enabled) 4443 return -EINVAL; 4444 4445 /* 4446 * Devices having device-specific I/O fault handling should not 4447 * support PCI/PRI. The IOMMU side has no means to check the 4448 * capability of device-specific IOPF. Therefore, IOMMU can only 4449 * default that if the device driver enables SVA on a non-PRI 4450 * device, it will handle IOPF in its own way. 4451 */ 4452 if (!info->pri_supported) 4453 return 0; 4454 4455 /* Devices supporting PRI should have it enabled. */ 4456 if (!info->pri_enabled) 4457 return -EINVAL; 4458 4459 return 0; 4460 } 4461 4462 static int intel_iommu_enable_iopf(struct device *dev) 4463 { 4464 struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL; 4465 struct device_domain_info *info = dev_iommu_priv_get(dev); 4466 struct intel_iommu *iommu; 4467 int ret; 4468 4469 if (!pdev || !info || !info->ats_enabled || !info->pri_supported) 4470 return -ENODEV; 4471 4472 if (info->pri_enabled) 4473 return -EBUSY; 4474 4475 iommu = info->iommu; 4476 if (!iommu) 4477 return -EINVAL; 4478 4479 /* PASID is required in PRG Response Message. */ 4480 if (info->pasid_enabled && !pci_prg_resp_pasid_required(pdev)) 4481 return -EINVAL; 4482 4483 ret = pci_reset_pri(pdev); 4484 if (ret) 4485 return ret; 4486 4487 ret = iopf_queue_add_device(iommu->iopf_queue, dev); 4488 if (ret) 4489 return ret; 4490 4491 ret = pci_enable_pri(pdev, PRQ_DEPTH); 4492 if (ret) { 4493 iopf_queue_remove_device(iommu->iopf_queue, dev); 4494 return ret; 4495 } 4496 4497 info->pri_enabled = 1; 4498 4499 return 0; 4500 } 4501 4502 static int intel_iommu_disable_iopf(struct device *dev) 4503 { 4504 struct device_domain_info *info = dev_iommu_priv_get(dev); 4505 struct intel_iommu *iommu = info->iommu; 4506 4507 if (!info->pri_enabled) 4508 return -EINVAL; 4509 4510 /* 4511 * PCIe spec states that by clearing PRI enable bit, the Page 4512 * Request Interface will not issue new page requests, but has 4513 * outstanding page requests that have been transmitted or are 4514 * queued for transmission. This is supposed to be called after 4515 * the device driver has stopped DMA, all PASIDs have been 4516 * unbound and the outstanding PRQs have been drained. 4517 */ 4518 pci_disable_pri(to_pci_dev(dev)); 4519 info->pri_enabled = 0; 4520 iopf_queue_remove_device(iommu->iopf_queue, dev); 4521 4522 return 0; 4523 } 4524 4525 static int 4526 intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) 4527 { 4528 switch (feat) { 4529 case IOMMU_DEV_FEAT_IOPF: 4530 return intel_iommu_enable_iopf(dev); 4531 4532 case IOMMU_DEV_FEAT_SVA: 4533 return intel_iommu_enable_sva(dev); 4534 4535 default: 4536 return -ENODEV; 4537 } 4538 } 4539 4540 static int 4541 intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) 4542 { 4543 switch (feat) { 4544 case IOMMU_DEV_FEAT_IOPF: 4545 return intel_iommu_disable_iopf(dev); 4546 4547 case IOMMU_DEV_FEAT_SVA: 4548 return 0; 4549 4550 default: 4551 return -ENODEV; 4552 } 4553 } 4554 4555 static bool intel_iommu_is_attach_deferred(struct device *dev) 4556 { 4557 struct device_domain_info *info = dev_iommu_priv_get(dev); 4558 4559 return translation_pre_enabled(info->iommu) && !info->domain; 4560 } 4561 4562 /* 4563 * Check that the device does not live on an external facing PCI port that is 4564 * marked as untrusted. Such devices should not be able to apply quirks and 4565 * thus not be able to bypass the IOMMU restrictions. 4566 */ 4567 static bool risky_device(struct pci_dev *pdev) 4568 { 4569 if (pdev->untrusted) { 4570 pci_info(pdev, 4571 "Skipping IOMMU quirk for dev [%04X:%04X] on untrusted PCI link\n", 4572 pdev->vendor, pdev->device); 4573 pci_info(pdev, "Please check with your BIOS/Platform vendor about this\n"); 4574 return true; 4575 } 4576 return false; 4577 } 4578 4579 static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain, 4580 unsigned long iova, size_t size) 4581 { 4582 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 4583 unsigned long pages = aligned_nrpages(iova, size); 4584 unsigned long pfn = iova >> VTD_PAGE_SHIFT; 4585 struct iommu_domain_info *info; 4586 unsigned long i; 4587 4588 xa_for_each(&dmar_domain->iommu_array, i, info) 4589 __mapping_notify_one(info->iommu, dmar_domain, pfn, pages); 4590 return 0; 4591 } 4592 4593 static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) 4594 { 4595 struct device_domain_info *info = dev_iommu_priv_get(dev); 4596 struct dev_pasid_info *curr, *dev_pasid = NULL; 4597 struct intel_iommu *iommu = info->iommu; 4598 struct dmar_domain *dmar_domain; 4599 struct iommu_domain *domain; 4600 unsigned long flags; 4601 4602 domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0); 4603 if (WARN_ON_ONCE(!domain)) 4604 goto out_tear_down; 4605 4606 /* 4607 * The SVA implementation needs to handle its own stuffs like the mm 4608 * notification. Before consolidating that code into iommu core, let 4609 * the intel sva code handle it. 4610 */ 4611 if (domain->type == IOMMU_DOMAIN_SVA) { 4612 intel_svm_remove_dev_pasid(dev, pasid); 4613 goto out_tear_down; 4614 } 4615 4616 dmar_domain = to_dmar_domain(domain); 4617 spin_lock_irqsave(&dmar_domain->lock, flags); 4618 list_for_each_entry(curr, &dmar_domain->dev_pasids, link_domain) { 4619 if (curr->dev == dev && curr->pasid == pasid) { 4620 list_del(&curr->link_domain); 4621 dev_pasid = curr; 4622 break; 4623 } 4624 } 4625 WARN_ON_ONCE(!dev_pasid); 4626 spin_unlock_irqrestore(&dmar_domain->lock, flags); 4627 4628 domain_detach_iommu(dmar_domain, iommu); 4629 intel_iommu_debugfs_remove_dev_pasid(dev_pasid); 4630 kfree(dev_pasid); 4631 out_tear_down: 4632 intel_pasid_tear_down_entry(iommu, dev, pasid, false); 4633 intel_drain_pasid_prq(dev, pasid); 4634 } 4635 4636 static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, 4637 struct device *dev, ioasid_t pasid) 4638 { 4639 struct device_domain_info *info = dev_iommu_priv_get(dev); 4640 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 4641 struct intel_iommu *iommu = info->iommu; 4642 struct dev_pasid_info *dev_pasid; 4643 unsigned long flags; 4644 int ret; 4645 4646 if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev)) 4647 return -EOPNOTSUPP; 4648 4649 if (domain->dirty_ops) 4650 return -EINVAL; 4651 4652 if (context_copied(iommu, info->bus, info->devfn)) 4653 return -EBUSY; 4654 4655 ret = prepare_domain_attach_device(domain, dev); 4656 if (ret) 4657 return ret; 4658 4659 dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL); 4660 if (!dev_pasid) 4661 return -ENOMEM; 4662 4663 ret = domain_attach_iommu(dmar_domain, iommu); 4664 if (ret) 4665 goto out_free; 4666 4667 if (domain_type_is_si(dmar_domain)) 4668 ret = intel_pasid_setup_pass_through(iommu, dev, pasid); 4669 else if (dmar_domain->use_first_level) 4670 ret = domain_setup_first_level(iommu, dmar_domain, 4671 dev, pasid); 4672 else 4673 ret = intel_pasid_setup_second_level(iommu, dmar_domain, 4674 dev, pasid); 4675 if (ret) 4676 goto out_detach_iommu; 4677 4678 dev_pasid->dev = dev; 4679 dev_pasid->pasid = pasid; 4680 spin_lock_irqsave(&dmar_domain->lock, flags); 4681 list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids); 4682 spin_unlock_irqrestore(&dmar_domain->lock, flags); 4683 4684 if (domain->type & __IOMMU_DOMAIN_PAGING) 4685 intel_iommu_debugfs_create_dev_pasid(dev_pasid); 4686 4687 return 0; 4688 out_detach_iommu: 4689 domain_detach_iommu(dmar_domain, iommu); 4690 out_free: 4691 kfree(dev_pasid); 4692 return ret; 4693 } 4694 4695 static void *intel_iommu_hw_info(struct device *dev, u32 *length, u32 *type) 4696 { 4697 struct device_domain_info *info = dev_iommu_priv_get(dev); 4698 struct intel_iommu *iommu = info->iommu; 4699 struct iommu_hw_info_vtd *vtd; 4700 4701 vtd = kzalloc(sizeof(*vtd), GFP_KERNEL); 4702 if (!vtd) 4703 return ERR_PTR(-ENOMEM); 4704 4705 vtd->flags = IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17; 4706 vtd->cap_reg = iommu->cap; 4707 vtd->ecap_reg = iommu->ecap; 4708 *length = sizeof(*vtd); 4709 *type = IOMMU_HW_INFO_TYPE_INTEL_VTD; 4710 return vtd; 4711 } 4712 4713 /* 4714 * Set dirty tracking for the device list of a domain. The caller must 4715 * hold the domain->lock when calling it. 4716 */ 4717 static int device_set_dirty_tracking(struct list_head *devices, bool enable) 4718 { 4719 struct device_domain_info *info; 4720 int ret = 0; 4721 4722 list_for_each_entry(info, devices, link) { 4723 ret = intel_pasid_setup_dirty_tracking(info->iommu, info->dev, 4724 IOMMU_NO_PASID, enable); 4725 if (ret) 4726 break; 4727 } 4728 4729 return ret; 4730 } 4731 4732 static int parent_domain_set_dirty_tracking(struct dmar_domain *domain, 4733 bool enable) 4734 { 4735 struct dmar_domain *s1_domain; 4736 unsigned long flags; 4737 int ret; 4738 4739 spin_lock(&domain->s1_lock); 4740 list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) { 4741 spin_lock_irqsave(&s1_domain->lock, flags); 4742 ret = device_set_dirty_tracking(&s1_domain->devices, enable); 4743 spin_unlock_irqrestore(&s1_domain->lock, flags); 4744 if (ret) 4745 goto err_unwind; 4746 } 4747 spin_unlock(&domain->s1_lock); 4748 return 0; 4749 4750 err_unwind: 4751 list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) { 4752 spin_lock_irqsave(&s1_domain->lock, flags); 4753 device_set_dirty_tracking(&s1_domain->devices, 4754 domain->dirty_tracking); 4755 spin_unlock_irqrestore(&s1_domain->lock, flags); 4756 } 4757 spin_unlock(&domain->s1_lock); 4758 return ret; 4759 } 4760 4761 static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain, 4762 bool enable) 4763 { 4764 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 4765 int ret; 4766 4767 spin_lock(&dmar_domain->lock); 4768 if (dmar_domain->dirty_tracking == enable) 4769 goto out_unlock; 4770 4771 ret = device_set_dirty_tracking(&dmar_domain->devices, enable); 4772 if (ret) 4773 goto err_unwind; 4774 4775 if (dmar_domain->nested_parent) { 4776 ret = parent_domain_set_dirty_tracking(dmar_domain, enable); 4777 if (ret) 4778 goto err_unwind; 4779 } 4780 4781 dmar_domain->dirty_tracking = enable; 4782 out_unlock: 4783 spin_unlock(&dmar_domain->lock); 4784 4785 return 0; 4786 4787 err_unwind: 4788 device_set_dirty_tracking(&dmar_domain->devices, 4789 dmar_domain->dirty_tracking); 4790 spin_unlock(&dmar_domain->lock); 4791 return ret; 4792 } 4793 4794 static int intel_iommu_read_and_clear_dirty(struct iommu_domain *domain, 4795 unsigned long iova, size_t size, 4796 unsigned long flags, 4797 struct iommu_dirty_bitmap *dirty) 4798 { 4799 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 4800 unsigned long end = iova + size - 1; 4801 unsigned long pgsize; 4802 4803 /* 4804 * IOMMUFD core calls into a dirty tracking disabled domain without an 4805 * IOVA bitmap set in order to clean dirty bits in all PTEs that might 4806 * have occurred when we stopped dirty tracking. This ensures that we 4807 * never inherit dirtied bits from a previous cycle. 4808 */ 4809 if (!dmar_domain->dirty_tracking && dirty->bitmap) 4810 return -EINVAL; 4811 4812 do { 4813 struct dma_pte *pte; 4814 int lvl = 0; 4815 4816 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &lvl, 4817 GFP_ATOMIC); 4818 pgsize = level_size(lvl) << VTD_PAGE_SHIFT; 4819 if (!pte || !dma_pte_present(pte)) { 4820 iova += pgsize; 4821 continue; 4822 } 4823 4824 if (dma_sl_pte_test_and_clear_dirty(pte, flags)) 4825 iommu_dirty_bitmap_record(dirty, iova, pgsize); 4826 iova += pgsize; 4827 } while (iova < end); 4828 4829 return 0; 4830 } 4831 4832 static const struct iommu_dirty_ops intel_dirty_ops = { 4833 .set_dirty_tracking = intel_iommu_set_dirty_tracking, 4834 .read_and_clear_dirty = intel_iommu_read_and_clear_dirty, 4835 }; 4836 4837 const struct iommu_ops intel_iommu_ops = { 4838 .blocked_domain = &blocking_domain, 4839 .release_domain = &blocking_domain, 4840 .capable = intel_iommu_capable, 4841 .hw_info = intel_iommu_hw_info, 4842 .domain_alloc = intel_iommu_domain_alloc, 4843 .domain_alloc_user = intel_iommu_domain_alloc_user, 4844 .probe_device = intel_iommu_probe_device, 4845 .probe_finalize = intel_iommu_probe_finalize, 4846 .release_device = intel_iommu_release_device, 4847 .get_resv_regions = intel_iommu_get_resv_regions, 4848 .device_group = intel_iommu_device_group, 4849 .dev_enable_feat = intel_iommu_dev_enable_feat, 4850 .dev_disable_feat = intel_iommu_dev_disable_feat, 4851 .is_attach_deferred = intel_iommu_is_attach_deferred, 4852 .def_domain_type = device_def_domain_type, 4853 .remove_dev_pasid = intel_iommu_remove_dev_pasid, 4854 .pgsize_bitmap = SZ_4K, 4855 #ifdef CONFIG_INTEL_IOMMU_SVM 4856 .page_response = intel_svm_page_response, 4857 #endif 4858 .default_domain_ops = &(const struct iommu_domain_ops) { 4859 .attach_dev = intel_iommu_attach_device, 4860 .set_dev_pasid = intel_iommu_set_dev_pasid, 4861 .map_pages = intel_iommu_map_pages, 4862 .unmap_pages = intel_iommu_unmap_pages, 4863 .iotlb_sync_map = intel_iommu_iotlb_sync_map, 4864 .flush_iotlb_all = intel_flush_iotlb_all, 4865 .iotlb_sync = intel_iommu_tlb_sync, 4866 .iova_to_phys = intel_iommu_iova_to_phys, 4867 .free = intel_iommu_domain_free, 4868 .enforce_cache_coherency = intel_iommu_enforce_cache_coherency, 4869 } 4870 }; 4871 4872 static void quirk_iommu_igfx(struct pci_dev *dev) 4873 { 4874 if (risky_device(dev)) 4875 return; 4876 4877 pci_info(dev, "Disabling IOMMU for graphics on this chipset\n"); 4878 dmar_map_gfx = 0; 4879 } 4880 4881 /* G4x/GM45 integrated gfx dmar support is totally busted. */ 4882 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx); 4883 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx); 4884 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx); 4885 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx); 4886 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx); 4887 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx); 4888 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx); 4889 4890 /* Broadwell igfx malfunctions with dmar */ 4891 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx); 4892 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx); 4893 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx); 4894 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx); 4895 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx); 4896 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx); 4897 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx); 4898 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx); 4899 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx); 4900 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx); 4901 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx); 4902 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx); 4903 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx); 4904 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx); 4905 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx); 4906 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx); 4907 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx); 4908 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx); 4909 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx); 4910 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx); 4911 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx); 4912 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx); 4913 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx); 4914 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx); 4915 4916 static void quirk_iommu_rwbf(struct pci_dev *dev) 4917 { 4918 if (risky_device(dev)) 4919 return; 4920 4921 /* 4922 * Mobile 4 Series Chipset neglects to set RWBF capability, 4923 * but needs it. Same seems to hold for the desktop versions. 4924 */ 4925 pci_info(dev, "Forcing write-buffer flush capability\n"); 4926 rwbf_quirk = 1; 4927 } 4928 4929 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf); 4930 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf); 4931 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf); 4932 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf); 4933 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf); 4934 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf); 4935 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf); 4936 4937 #define GGC 0x52 4938 #define GGC_MEMORY_SIZE_MASK (0xf << 8) 4939 #define GGC_MEMORY_SIZE_NONE (0x0 << 8) 4940 #define GGC_MEMORY_SIZE_1M (0x1 << 8) 4941 #define GGC_MEMORY_SIZE_2M (0x3 << 8) 4942 #define GGC_MEMORY_VT_ENABLED (0x8 << 8) 4943 #define GGC_MEMORY_SIZE_2M_VT (0x9 << 8) 4944 #define GGC_MEMORY_SIZE_3M_VT (0xa << 8) 4945 #define GGC_MEMORY_SIZE_4M_VT (0xb << 8) 4946 4947 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev) 4948 { 4949 unsigned short ggc; 4950 4951 if (risky_device(dev)) 4952 return; 4953 4954 if (pci_read_config_word(dev, GGC, &ggc)) 4955 return; 4956 4957 if (!(ggc & GGC_MEMORY_VT_ENABLED)) { 4958 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n"); 4959 dmar_map_gfx = 0; 4960 } else if (dmar_map_gfx) { 4961 /* we have to ensure the gfx device is idle before we flush */ 4962 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n"); 4963 iommu_set_dma_strict(); 4964 } 4965 } 4966 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt); 4967 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt); 4968 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt); 4969 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt); 4970 4971 static void quirk_igfx_skip_te_disable(struct pci_dev *dev) 4972 { 4973 unsigned short ver; 4974 4975 if (!IS_GFX_DEVICE(dev)) 4976 return; 4977 4978 ver = (dev->device >> 8) & 0xff; 4979 if (ver != 0x45 && ver != 0x46 && ver != 0x4c && 4980 ver != 0x4e && ver != 0x8a && ver != 0x98 && 4981 ver != 0x9a && ver != 0xa7 && ver != 0x7d) 4982 return; 4983 4984 if (risky_device(dev)) 4985 return; 4986 4987 pci_info(dev, "Skip IOMMU disabling for graphics\n"); 4988 iommu_skip_te_disable = 1; 4989 } 4990 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_igfx_skip_te_disable); 4991 4992 /* On Tylersburg chipsets, some BIOSes have been known to enable the 4993 ISOCH DMAR unit for the Azalia sound device, but not give it any 4994 TLB entries, which causes it to deadlock. Check for that. We do 4995 this in a function called from init_dmars(), instead of in a PCI 4996 quirk, because we don't want to print the obnoxious "BIOS broken" 4997 message if VT-d is actually disabled. 4998 */ 4999 static void __init check_tylersburg_isoch(void) 5000 { 5001 struct pci_dev *pdev; 5002 uint32_t vtisochctrl; 5003 5004 /* If there's no Azalia in the system anyway, forget it. */ 5005 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL); 5006 if (!pdev) 5007 return; 5008 5009 if (risky_device(pdev)) { 5010 pci_dev_put(pdev); 5011 return; 5012 } 5013 5014 pci_dev_put(pdev); 5015 5016 /* System Management Registers. Might be hidden, in which case 5017 we can't do the sanity check. But that's OK, because the 5018 known-broken BIOSes _don't_ actually hide it, so far. */ 5019 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL); 5020 if (!pdev) 5021 return; 5022 5023 if (risky_device(pdev)) { 5024 pci_dev_put(pdev); 5025 return; 5026 } 5027 5028 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) { 5029 pci_dev_put(pdev); 5030 return; 5031 } 5032 5033 pci_dev_put(pdev); 5034 5035 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */ 5036 if (vtisochctrl & 1) 5037 return; 5038 5039 /* Drop all bits other than the number of TLB entries */ 5040 vtisochctrl &= 0x1c; 5041 5042 /* If we have the recommended number of TLB entries (16), fine. */ 5043 if (vtisochctrl == 0x10) 5044 return; 5045 5046 /* Zero TLB entries? You get to ride the short bus to school. */ 5047 if (!vtisochctrl) { 5048 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n" 5049 "BIOS vendor: %s; Ver: %s; Product Version: %s\n", 5050 dmi_get_system_info(DMI_BIOS_VENDOR), 5051 dmi_get_system_info(DMI_BIOS_VERSION), 5052 dmi_get_system_info(DMI_PRODUCT_VERSION)); 5053 iommu_identity_mapping |= IDENTMAP_AZALIA; 5054 return; 5055 } 5056 5057 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n", 5058 vtisochctrl); 5059 } 5060 5061 /* 5062 * Here we deal with a device TLB defect where device may inadvertently issue ATS 5063 * invalidation completion before posted writes initiated with translated address 5064 * that utilized translations matching the invalidation address range, violating 5065 * the invalidation completion ordering. 5066 * Therefore, any use cases that cannot guarantee DMA is stopped before unmap is 5067 * vulnerable to this defect. In other words, any dTLB invalidation initiated not 5068 * under the control of the trusted/privileged host device driver must use this 5069 * quirk. 5070 * Device TLBs are invalidated under the following six conditions: 5071 * 1. Device driver does DMA API unmap IOVA 5072 * 2. Device driver unbind a PASID from a process, sva_unbind_device() 5073 * 3. PASID is torn down, after PASID cache is flushed. e.g. process 5074 * exit_mmap() due to crash 5075 * 4. Under SVA usage, called by mmu_notifier.invalidate_range() where 5076 * VM has to free pages that were unmapped 5077 * 5. Userspace driver unmaps a DMA buffer 5078 * 6. Cache invalidation in vSVA usage (upcoming) 5079 * 5080 * For #1 and #2, device drivers are responsible for stopping DMA traffic 5081 * before unmap/unbind. For #3, iommu driver gets mmu_notifier to 5082 * invalidate TLB the same way as normal user unmap which will use this quirk. 5083 * The dTLB invalidation after PASID cache flush does not need this quirk. 5084 * 5085 * As a reminder, #6 will *NEED* this quirk as we enable nested translation. 5086 */ 5087 void quirk_extra_dev_tlb_flush(struct device_domain_info *info, 5088 unsigned long address, unsigned long mask, 5089 u32 pasid, u16 qdep) 5090 { 5091 u16 sid; 5092 5093 if (likely(!info->dtlb_extra_inval)) 5094 return; 5095 5096 sid = PCI_DEVID(info->bus, info->devfn); 5097 if (pasid == IOMMU_NO_PASID) { 5098 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, 5099 qdep, address, mask); 5100 } else { 5101 qi_flush_dev_iotlb_pasid(info->iommu, sid, info->pfsid, 5102 pasid, qdep, address, mask); 5103 } 5104 } 5105 5106 #define ecmd_get_status_code(res) (((res) & 0xff) >> 1) 5107 5108 /* 5109 * Function to submit a command to the enhanced command interface. The 5110 * valid enhanced command descriptions are defined in Table 47 of the 5111 * VT-d spec. The VT-d hardware implementation may support some but not 5112 * all commands, which can be determined by checking the Enhanced 5113 * Command Capability Register. 5114 * 5115 * Return values: 5116 * - 0: Command successful without any error; 5117 * - Negative: software error value; 5118 * - Nonzero positive: failure status code defined in Table 48. 5119 */ 5120 int ecmd_submit_sync(struct intel_iommu *iommu, u8 ecmd, u64 oa, u64 ob) 5121 { 5122 unsigned long flags; 5123 u64 res; 5124 int ret; 5125 5126 if (!cap_ecmds(iommu->cap)) 5127 return -ENODEV; 5128 5129 raw_spin_lock_irqsave(&iommu->register_lock, flags); 5130 5131 res = dmar_readq(iommu->reg + DMAR_ECRSP_REG); 5132 if (res & DMA_ECMD_ECRSP_IP) { 5133 ret = -EBUSY; 5134 goto err; 5135 } 5136 5137 /* 5138 * Unconditionally write the operand B, because 5139 * - There is no side effect if an ecmd doesn't require an 5140 * operand B, but we set the register to some value. 5141 * - It's not invoked in any critical path. The extra MMIO 5142 * write doesn't bring any performance concerns. 5143 */ 5144 dmar_writeq(iommu->reg + DMAR_ECEO_REG, ob); 5145 dmar_writeq(iommu->reg + DMAR_ECMD_REG, ecmd | (oa << DMA_ECMD_OA_SHIFT)); 5146 5147 IOMMU_WAIT_OP(iommu, DMAR_ECRSP_REG, dmar_readq, 5148 !(res & DMA_ECMD_ECRSP_IP), res); 5149 5150 if (res & DMA_ECMD_ECRSP_IP) { 5151 ret = -ETIMEDOUT; 5152 goto err; 5153 } 5154 5155 ret = ecmd_get_status_code(res); 5156 err: 5157 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 5158 5159 return ret; 5160 } 5161