1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright © 2006-2014 Intel Corporation. 4 * 5 * Authors: David Woodhouse <dwmw2@infradead.org>, 6 * Ashok Raj <ashok.raj@intel.com>, 7 * Shaohua Li <shaohua.li@intel.com>, 8 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>, 9 * Fenghua Yu <fenghua.yu@intel.com> 10 * Joerg Roedel <jroedel@suse.de> 11 */ 12 13 #define pr_fmt(fmt) "DMAR: " fmt 14 #define dev_fmt(fmt) pr_fmt(fmt) 15 16 #include <linux/crash_dump.h> 17 #include <linux/dma-direct.h> 18 #include <linux/dmi.h> 19 #include <linux/memory.h> 20 #include <linux/pci.h> 21 #include <linux/pci-ats.h> 22 #include <linux/spinlock.h> 23 #include <linux/syscore_ops.h> 24 #include <linux/tboot.h> 25 #include <uapi/linux/iommufd.h> 26 27 #include "iommu.h" 28 #include "../dma-iommu.h" 29 #include "../irq_remapping.h" 30 #include "../iommu-pages.h" 31 #include "pasid.h" 32 #include "perfmon.h" 33 34 #define ROOT_SIZE VTD_PAGE_SIZE 35 #define CONTEXT_SIZE VTD_PAGE_SIZE 36 37 #define IS_GFX_DEVICE(pdev) pci_is_display(pdev) 38 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB) 39 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) 40 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) 41 42 #define IOAPIC_RANGE_START (0xfee00000) 43 #define IOAPIC_RANGE_END (0xfeefffff) 44 #define IOVA_START_ADDR (0x1000) 45 46 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57 47 48 static void __init check_tylersburg_isoch(void); 49 static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain, 50 bool enable); 51 static int rwbf_quirk; 52 53 #define rwbf_required(iommu) (rwbf_quirk || cap_rwbf((iommu)->cap)) 54 55 /* 56 * set to 1 to panic kernel if can't successfully enable VT-d 57 * (used when kernel is launched w/ TXT) 58 */ 59 static int force_on = 0; 60 static int intel_iommu_tboot_noforce; 61 static int no_platform_optin; 62 63 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry)) 64 65 /* 66 * Take a root_entry and return the Lower Context Table Pointer (LCTP) 67 * if marked present. 68 */ 69 static phys_addr_t root_entry_lctp(struct root_entry *re) 70 { 71 if (!(re->lo & 1)) 72 return 0; 73 74 return re->lo & VTD_PAGE_MASK; 75 } 76 77 /* 78 * Take a root_entry and return the Upper Context Table Pointer (UCTP) 79 * if marked present. 80 */ 81 static phys_addr_t root_entry_uctp(struct root_entry *re) 82 { 83 if (!(re->hi & 1)) 84 return 0; 85 86 return re->hi & VTD_PAGE_MASK; 87 } 88 89 static int device_rid_cmp_key(const void *key, const struct rb_node *node) 90 { 91 struct device_domain_info *info = 92 rb_entry(node, struct device_domain_info, node); 93 const u16 *rid_lhs = key; 94 95 if (*rid_lhs < PCI_DEVID(info->bus, info->devfn)) 96 return -1; 97 98 if (*rid_lhs > PCI_DEVID(info->bus, info->devfn)) 99 return 1; 100 101 return 0; 102 } 103 104 static int device_rid_cmp(struct rb_node *lhs, const struct rb_node *rhs) 105 { 106 struct device_domain_info *info = 107 rb_entry(lhs, struct device_domain_info, node); 108 u16 key = PCI_DEVID(info->bus, info->devfn); 109 110 return device_rid_cmp_key(&key, rhs); 111 } 112 113 /* 114 * Looks up an IOMMU-probed device using its source ID. 115 * 116 * Returns the pointer to the device if there is a match. Otherwise, 117 * returns NULL. 118 * 119 * Note that this helper doesn't guarantee that the device won't be 120 * released by the iommu subsystem after being returned. The caller 121 * should use its own synchronization mechanism to avoid the device 122 * being released during its use if its possibly the case. 123 */ 124 struct device *device_rbtree_find(struct intel_iommu *iommu, u16 rid) 125 { 126 struct device_domain_info *info = NULL; 127 struct rb_node *node; 128 unsigned long flags; 129 130 spin_lock_irqsave(&iommu->device_rbtree_lock, flags); 131 node = rb_find(&rid, &iommu->device_rbtree, device_rid_cmp_key); 132 if (node) 133 info = rb_entry(node, struct device_domain_info, node); 134 spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags); 135 136 return info ? info->dev : NULL; 137 } 138 139 static int device_rbtree_insert(struct intel_iommu *iommu, 140 struct device_domain_info *info) 141 { 142 struct rb_node *curr; 143 unsigned long flags; 144 145 spin_lock_irqsave(&iommu->device_rbtree_lock, flags); 146 curr = rb_find_add(&info->node, &iommu->device_rbtree, device_rid_cmp); 147 spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags); 148 if (WARN_ON(curr)) 149 return -EEXIST; 150 151 return 0; 152 } 153 154 static void device_rbtree_remove(struct device_domain_info *info) 155 { 156 struct intel_iommu *iommu = info->iommu; 157 unsigned long flags; 158 159 spin_lock_irqsave(&iommu->device_rbtree_lock, flags); 160 rb_erase(&info->node, &iommu->device_rbtree); 161 spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags); 162 } 163 164 struct dmar_rmrr_unit { 165 struct list_head list; /* list of rmrr units */ 166 struct acpi_dmar_header *hdr; /* ACPI header */ 167 u64 base_address; /* reserved base address*/ 168 u64 end_address; /* reserved end address */ 169 struct dmar_dev_scope *devices; /* target devices */ 170 int devices_cnt; /* target device count */ 171 }; 172 173 struct dmar_atsr_unit { 174 struct list_head list; /* list of ATSR units */ 175 struct acpi_dmar_header *hdr; /* ACPI header */ 176 struct dmar_dev_scope *devices; /* target devices */ 177 int devices_cnt; /* target device count */ 178 u8 include_all:1; /* include all ports */ 179 }; 180 181 struct dmar_satc_unit { 182 struct list_head list; /* list of SATC units */ 183 struct acpi_dmar_header *hdr; /* ACPI header */ 184 struct dmar_dev_scope *devices; /* target devices */ 185 struct intel_iommu *iommu; /* the corresponding iommu */ 186 int devices_cnt; /* target device count */ 187 u8 atc_required:1; /* ATS is required */ 188 }; 189 190 static LIST_HEAD(dmar_atsr_units); 191 static LIST_HEAD(dmar_rmrr_units); 192 static LIST_HEAD(dmar_satc_units); 193 194 #define for_each_rmrr_units(rmrr) \ 195 list_for_each_entry(rmrr, &dmar_rmrr_units, list) 196 197 static void intel_iommu_domain_free(struct iommu_domain *domain); 198 199 int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON); 200 int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON); 201 202 int intel_iommu_enabled = 0; 203 EXPORT_SYMBOL_GPL(intel_iommu_enabled); 204 205 static int intel_iommu_superpage = 1; 206 static int iommu_identity_mapping; 207 static int iommu_skip_te_disable; 208 static int disable_igfx_iommu; 209 210 #define IDENTMAP_AZALIA 4 211 212 const struct iommu_ops intel_iommu_ops; 213 214 static bool translation_pre_enabled(struct intel_iommu *iommu) 215 { 216 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED); 217 } 218 219 static void clear_translation_pre_enabled(struct intel_iommu *iommu) 220 { 221 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED; 222 } 223 224 static void init_translation_status(struct intel_iommu *iommu) 225 { 226 u32 gsts; 227 228 gsts = readl(iommu->reg + DMAR_GSTS_REG); 229 if (gsts & DMA_GSTS_TES) 230 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED; 231 } 232 233 static int __init intel_iommu_setup(char *str) 234 { 235 if (!str) 236 return -EINVAL; 237 238 while (*str) { 239 if (!strncmp(str, "on", 2)) { 240 dmar_disabled = 0; 241 pr_info("IOMMU enabled\n"); 242 } else if (!strncmp(str, "off", 3)) { 243 dmar_disabled = 1; 244 no_platform_optin = 1; 245 pr_info("IOMMU disabled\n"); 246 } else if (!strncmp(str, "igfx_off", 8)) { 247 disable_igfx_iommu = 1; 248 pr_info("Disable GFX device mapping\n"); 249 } else if (!strncmp(str, "forcedac", 8)) { 250 pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n"); 251 iommu_dma_forcedac = true; 252 } else if (!strncmp(str, "strict", 6)) { 253 pr_warn("intel_iommu=strict deprecated; use iommu.strict=1 instead\n"); 254 iommu_set_dma_strict(); 255 } else if (!strncmp(str, "sp_off", 6)) { 256 pr_info("Disable supported super page\n"); 257 intel_iommu_superpage = 0; 258 } else if (!strncmp(str, "sm_on", 5)) { 259 pr_info("Enable scalable mode if hardware supports\n"); 260 intel_iommu_sm = 1; 261 } else if (!strncmp(str, "sm_off", 6)) { 262 pr_info("Scalable mode is disallowed\n"); 263 intel_iommu_sm = 0; 264 } else if (!strncmp(str, "tboot_noforce", 13)) { 265 pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n"); 266 intel_iommu_tboot_noforce = 1; 267 } else { 268 pr_notice("Unknown option - '%s'\n", str); 269 } 270 271 str += strcspn(str, ","); 272 while (*str == ',') 273 str++; 274 } 275 276 return 1; 277 } 278 __setup("intel_iommu=", intel_iommu_setup); 279 280 /* 281 * Calculate the Supported Adjusted Guest Address Widths of an IOMMU. 282 * Refer to 11.4.2 of the VT-d spec for the encoding of each bit of 283 * the returned SAGAW. 284 */ 285 static unsigned long __iommu_calculate_sagaw(struct intel_iommu *iommu) 286 { 287 unsigned long fl_sagaw, sl_sagaw; 288 289 fl_sagaw = BIT(2) | (cap_fl5lp_support(iommu->cap) ? BIT(3) : 0); 290 sl_sagaw = cap_sagaw(iommu->cap); 291 292 /* Second level only. */ 293 if (!sm_supported(iommu) || !ecap_flts(iommu->ecap)) 294 return sl_sagaw; 295 296 /* First level only. */ 297 if (!ecap_slts(iommu->ecap)) 298 return fl_sagaw; 299 300 return fl_sagaw & sl_sagaw; 301 } 302 303 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw) 304 { 305 unsigned long sagaw; 306 int agaw; 307 308 sagaw = __iommu_calculate_sagaw(iommu); 309 for (agaw = width_to_agaw(max_gaw); agaw >= 0; agaw--) { 310 if (test_bit(agaw, &sagaw)) 311 break; 312 } 313 314 return agaw; 315 } 316 317 /* 318 * Calculate max SAGAW for each iommu. 319 */ 320 int iommu_calculate_max_sagaw(struct intel_iommu *iommu) 321 { 322 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH); 323 } 324 325 /* 326 * calculate agaw for each iommu. 327 * "SAGAW" may be different across iommus, use a default agaw, and 328 * get a supported less agaw for iommus that don't support the default agaw. 329 */ 330 int iommu_calculate_agaw(struct intel_iommu *iommu) 331 { 332 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH); 333 } 334 335 static bool iommu_paging_structure_coherency(struct intel_iommu *iommu) 336 { 337 return sm_supported(iommu) ? 338 ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap); 339 } 340 341 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, 342 u8 devfn, int alloc) 343 { 344 struct root_entry *root = &iommu->root_entry[bus]; 345 struct context_entry *context; 346 u64 *entry; 347 348 /* 349 * Except that the caller requested to allocate a new entry, 350 * returning a copied context entry makes no sense. 351 */ 352 if (!alloc && context_copied(iommu, bus, devfn)) 353 return NULL; 354 355 entry = &root->lo; 356 if (sm_supported(iommu)) { 357 if (devfn >= 0x80) { 358 devfn -= 0x80; 359 entry = &root->hi; 360 } 361 devfn *= 2; 362 } 363 if (*entry & 1) 364 context = phys_to_virt(*entry & VTD_PAGE_MASK); 365 else { 366 unsigned long phy_addr; 367 if (!alloc) 368 return NULL; 369 370 context = iommu_alloc_pages_node_sz(iommu->node, GFP_ATOMIC, 371 SZ_4K); 372 if (!context) 373 return NULL; 374 375 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE); 376 phy_addr = virt_to_phys((void *)context); 377 *entry = phy_addr | 1; 378 __iommu_flush_cache(iommu, entry, sizeof(*entry)); 379 } 380 return &context[devfn]; 381 } 382 383 /** 384 * is_downstream_to_pci_bridge - test if a device belongs to the PCI 385 * sub-hierarchy of a candidate PCI-PCI bridge 386 * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy 387 * @bridge: the candidate PCI-PCI bridge 388 * 389 * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false. 390 */ 391 static bool 392 is_downstream_to_pci_bridge(struct device *dev, struct device *bridge) 393 { 394 struct pci_dev *pdev, *pbridge; 395 396 if (!dev_is_pci(dev) || !dev_is_pci(bridge)) 397 return false; 398 399 pdev = to_pci_dev(dev); 400 pbridge = to_pci_dev(bridge); 401 402 if (pbridge->subordinate && 403 pbridge->subordinate->number <= pdev->bus->number && 404 pbridge->subordinate->busn_res.end >= pdev->bus->number) 405 return true; 406 407 return false; 408 } 409 410 static bool quirk_ioat_snb_local_iommu(struct pci_dev *pdev) 411 { 412 struct dmar_drhd_unit *drhd; 413 u32 vtbar; 414 int rc; 415 416 /* We know that this device on this chipset has its own IOMMU. 417 * If we find it under a different IOMMU, then the BIOS is lying 418 * to us. Hope that the IOMMU for this device is actually 419 * disabled, and it needs no translation... 420 */ 421 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar); 422 if (rc) { 423 /* "can't" happen */ 424 dev_info(&pdev->dev, "failed to run vt-d quirk\n"); 425 return false; 426 } 427 vtbar &= 0xffff0000; 428 429 /* we know that the this iommu should be at offset 0xa000 from vtbar */ 430 drhd = dmar_find_matched_drhd_unit(pdev); 431 if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) { 432 pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"); 433 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); 434 return true; 435 } 436 437 return false; 438 } 439 440 static bool iommu_is_dummy(struct intel_iommu *iommu, struct device *dev) 441 { 442 if (!iommu || iommu->drhd->ignored) 443 return true; 444 445 if (dev_is_pci(dev)) { 446 struct pci_dev *pdev = to_pci_dev(dev); 447 448 if (pdev->vendor == PCI_VENDOR_ID_INTEL && 449 pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SNB && 450 quirk_ioat_snb_local_iommu(pdev)) 451 return true; 452 } 453 454 return false; 455 } 456 457 static struct intel_iommu *device_lookup_iommu(struct device *dev, u8 *bus, u8 *devfn) 458 { 459 struct dmar_drhd_unit *drhd = NULL; 460 struct pci_dev *pdev = NULL; 461 struct intel_iommu *iommu; 462 struct device *tmp; 463 u16 segment = 0; 464 int i; 465 466 if (!dev) 467 return NULL; 468 469 if (dev_is_pci(dev)) { 470 struct pci_dev *pf_pdev; 471 472 pdev = pci_real_dma_dev(to_pci_dev(dev)); 473 474 /* VFs aren't listed in scope tables; we need to look up 475 * the PF instead to find the IOMMU. */ 476 pf_pdev = pci_physfn(pdev); 477 dev = &pf_pdev->dev; 478 segment = pci_domain_nr(pdev->bus); 479 } else if (has_acpi_companion(dev)) 480 dev = &ACPI_COMPANION(dev)->dev; 481 482 rcu_read_lock(); 483 for_each_iommu(iommu, drhd) { 484 if (pdev && segment != drhd->segment) 485 continue; 486 487 for_each_active_dev_scope(drhd->devices, 488 drhd->devices_cnt, i, tmp) { 489 if (tmp == dev) { 490 /* For a VF use its original BDF# not that of the PF 491 * which we used for the IOMMU lookup. Strictly speaking 492 * we could do this for all PCI devices; we only need to 493 * get the BDF# from the scope table for ACPI matches. */ 494 if (pdev && pdev->is_virtfn) 495 goto got_pdev; 496 497 if (bus && devfn) { 498 *bus = drhd->devices[i].bus; 499 *devfn = drhd->devices[i].devfn; 500 } 501 goto out; 502 } 503 504 if (is_downstream_to_pci_bridge(dev, tmp)) 505 goto got_pdev; 506 } 507 508 if (pdev && drhd->include_all) { 509 got_pdev: 510 if (bus && devfn) { 511 *bus = pdev->bus->number; 512 *devfn = pdev->devfn; 513 } 514 goto out; 515 } 516 } 517 iommu = NULL; 518 out: 519 if (iommu_is_dummy(iommu, dev)) 520 iommu = NULL; 521 522 rcu_read_unlock(); 523 524 return iommu; 525 } 526 527 static void free_context_table(struct intel_iommu *iommu) 528 { 529 struct context_entry *context; 530 int i; 531 532 if (!iommu->root_entry) 533 return; 534 535 for (i = 0; i < ROOT_ENTRY_NR; i++) { 536 context = iommu_context_addr(iommu, i, 0, 0); 537 if (context) 538 iommu_free_pages(context); 539 540 if (!sm_supported(iommu)) 541 continue; 542 543 context = iommu_context_addr(iommu, i, 0x80, 0); 544 if (context) 545 iommu_free_pages(context); 546 } 547 548 iommu_free_pages(iommu->root_entry); 549 iommu->root_entry = NULL; 550 } 551 552 #ifdef CONFIG_DMAR_DEBUG 553 static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn, 554 u8 bus, u8 devfn, struct dma_pte *parent, int level) 555 { 556 struct dma_pte *pte; 557 int offset; 558 559 while (1) { 560 offset = pfn_level_offset(pfn, level); 561 pte = &parent[offset]; 562 563 pr_info("pte level: %d, pte value: 0x%016llx\n", level, pte->val); 564 565 if (!dma_pte_present(pte)) { 566 pr_info("page table not present at level %d\n", level - 1); 567 break; 568 } 569 570 if (level == 1 || dma_pte_superpage(pte)) 571 break; 572 573 parent = phys_to_virt(dma_pte_addr(pte)); 574 level--; 575 } 576 } 577 578 void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id, 579 unsigned long long addr, u32 pasid) 580 { 581 struct pasid_dir_entry *dir, *pde; 582 struct pasid_entry *entries, *pte; 583 struct context_entry *ctx_entry; 584 struct root_entry *rt_entry; 585 int i, dir_index, index, level; 586 u8 devfn = source_id & 0xff; 587 u8 bus = source_id >> 8; 588 struct dma_pte *pgtable; 589 590 pr_info("Dump %s table entries for IOVA 0x%llx\n", iommu->name, addr); 591 592 /* root entry dump */ 593 if (!iommu->root_entry) { 594 pr_info("root table is not present\n"); 595 return; 596 } 597 rt_entry = &iommu->root_entry[bus]; 598 599 if (sm_supported(iommu)) 600 pr_info("scalable mode root entry: hi 0x%016llx, low 0x%016llx\n", 601 rt_entry->hi, rt_entry->lo); 602 else 603 pr_info("root entry: 0x%016llx", rt_entry->lo); 604 605 /* context entry dump */ 606 ctx_entry = iommu_context_addr(iommu, bus, devfn, 0); 607 if (!ctx_entry) { 608 pr_info("context table is not present\n"); 609 return; 610 } 611 612 pr_info("context entry: hi 0x%016llx, low 0x%016llx\n", 613 ctx_entry->hi, ctx_entry->lo); 614 615 /* legacy mode does not require PASID entries */ 616 if (!sm_supported(iommu)) { 617 if (!context_present(ctx_entry)) { 618 pr_info("legacy mode page table is not present\n"); 619 return; 620 } 621 level = agaw_to_level(ctx_entry->hi & 7); 622 pgtable = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK); 623 goto pgtable_walk; 624 } 625 626 if (!context_present(ctx_entry)) { 627 pr_info("pasid directory table is not present\n"); 628 return; 629 } 630 631 /* get the pointer to pasid directory entry */ 632 dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK); 633 634 /* For request-without-pasid, get the pasid from context entry */ 635 if (intel_iommu_sm && pasid == IOMMU_PASID_INVALID) 636 pasid = IOMMU_NO_PASID; 637 638 dir_index = pasid >> PASID_PDE_SHIFT; 639 pde = &dir[dir_index]; 640 pr_info("pasid dir entry: 0x%016llx\n", pde->val); 641 642 /* get the pointer to the pasid table entry */ 643 entries = get_pasid_table_from_pde(pde); 644 if (!entries) { 645 pr_info("pasid table is not present\n"); 646 return; 647 } 648 index = pasid & PASID_PTE_MASK; 649 pte = &entries[index]; 650 for (i = 0; i < ARRAY_SIZE(pte->val); i++) 651 pr_info("pasid table entry[%d]: 0x%016llx\n", i, pte->val[i]); 652 653 if (!pasid_pte_is_present(pte)) { 654 pr_info("scalable mode page table is not present\n"); 655 return; 656 } 657 658 if (pasid_pte_get_pgtt(pte) == PASID_ENTRY_PGTT_FL_ONLY) { 659 level = pte->val[2] & BIT_ULL(2) ? 5 : 4; 660 pgtable = phys_to_virt(pte->val[2] & VTD_PAGE_MASK); 661 } else { 662 level = agaw_to_level((pte->val[0] >> 2) & 0x7); 663 pgtable = phys_to_virt(pte->val[0] & VTD_PAGE_MASK); 664 } 665 666 pgtable_walk: 667 pgtable_walk(iommu, addr >> VTD_PAGE_SHIFT, bus, devfn, pgtable, level); 668 } 669 #endif 670 671 /* iommu handling */ 672 static int iommu_alloc_root_entry(struct intel_iommu *iommu) 673 { 674 struct root_entry *root; 675 676 root = iommu_alloc_pages_node_sz(iommu->node, GFP_ATOMIC, SZ_4K); 677 if (!root) { 678 pr_err("Allocating root entry for %s failed\n", 679 iommu->name); 680 return -ENOMEM; 681 } 682 683 __iommu_flush_cache(iommu, root, ROOT_SIZE); 684 iommu->root_entry = root; 685 686 return 0; 687 } 688 689 static void iommu_set_root_entry(struct intel_iommu *iommu) 690 { 691 u64 addr; 692 u32 sts; 693 unsigned long flag; 694 695 addr = virt_to_phys(iommu->root_entry); 696 if (sm_supported(iommu)) 697 addr |= DMA_RTADDR_SMT; 698 699 raw_spin_lock_irqsave(&iommu->register_lock, flag); 700 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr); 701 702 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG); 703 704 /* Make sure hardware complete it */ 705 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 706 readl, (sts & DMA_GSTS_RTPS), sts); 707 708 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 709 710 /* 711 * Hardware invalidates all DMA remapping hardware translation 712 * caches as part of SRTP flow. 713 */ 714 if (cap_esrtps(iommu->cap)) 715 return; 716 717 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); 718 if (sm_supported(iommu)) 719 qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0); 720 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); 721 } 722 723 void iommu_flush_write_buffer(struct intel_iommu *iommu) 724 { 725 u32 val; 726 unsigned long flag; 727 728 if (!rwbf_quirk && !cap_rwbf(iommu->cap)) 729 return; 730 731 raw_spin_lock_irqsave(&iommu->register_lock, flag); 732 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG); 733 734 /* Make sure hardware complete it */ 735 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 736 readl, (!(val & DMA_GSTS_WBFS)), val); 737 738 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 739 } 740 741 /* return value determine if we need a write buffer flush */ 742 static void __iommu_flush_context(struct intel_iommu *iommu, 743 u16 did, u16 source_id, u8 function_mask, 744 u64 type) 745 { 746 u64 val = 0; 747 unsigned long flag; 748 749 switch (type) { 750 case DMA_CCMD_GLOBAL_INVL: 751 val = DMA_CCMD_GLOBAL_INVL; 752 break; 753 case DMA_CCMD_DOMAIN_INVL: 754 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did); 755 break; 756 case DMA_CCMD_DEVICE_INVL: 757 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did) 758 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask); 759 break; 760 default: 761 pr_warn("%s: Unexpected context-cache invalidation type 0x%llx\n", 762 iommu->name, type); 763 return; 764 } 765 val |= DMA_CCMD_ICC; 766 767 raw_spin_lock_irqsave(&iommu->register_lock, flag); 768 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val); 769 770 /* Make sure hardware complete it */ 771 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG, 772 dmar_readq, (!(val & DMA_CCMD_ICC)), val); 773 774 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 775 } 776 777 void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, 778 unsigned int size_order, u64 type) 779 { 780 int tlb_offset = ecap_iotlb_offset(iommu->ecap); 781 u64 val = 0, val_iva = 0; 782 unsigned long flag; 783 784 switch (type) { 785 case DMA_TLB_GLOBAL_FLUSH: 786 /* global flush doesn't need set IVA_REG */ 787 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT; 788 break; 789 case DMA_TLB_DSI_FLUSH: 790 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); 791 break; 792 case DMA_TLB_PSI_FLUSH: 793 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); 794 /* IH bit is passed in as part of address */ 795 val_iva = size_order | addr; 796 break; 797 default: 798 pr_warn("%s: Unexpected iotlb invalidation type 0x%llx\n", 799 iommu->name, type); 800 return; 801 } 802 803 if (cap_write_drain(iommu->cap)) 804 val |= DMA_TLB_WRITE_DRAIN; 805 806 raw_spin_lock_irqsave(&iommu->register_lock, flag); 807 /* Note: Only uses first TLB reg currently */ 808 if (val_iva) 809 dmar_writeq(iommu->reg + tlb_offset, val_iva); 810 dmar_writeq(iommu->reg + tlb_offset + 8, val); 811 812 /* Make sure hardware complete it */ 813 IOMMU_WAIT_OP(iommu, tlb_offset + 8, 814 dmar_readq, (!(val & DMA_TLB_IVT)), val); 815 816 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 817 818 /* check IOTLB invalidation granularity */ 819 if (DMA_TLB_IAIG(val) == 0) 820 pr_err("Flush IOTLB failed\n"); 821 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type)) 822 pr_debug("TLB flush request %Lx, actual %Lx\n", 823 (unsigned long long)DMA_TLB_IIRG(type), 824 (unsigned long long)DMA_TLB_IAIG(val)); 825 } 826 827 static struct device_domain_info * 828 domain_lookup_dev_info(struct dmar_domain *domain, 829 struct intel_iommu *iommu, u8 bus, u8 devfn) 830 { 831 struct device_domain_info *info; 832 unsigned long flags; 833 834 spin_lock_irqsave(&domain->lock, flags); 835 list_for_each_entry(info, &domain->devices, link) { 836 if (info->iommu == iommu && info->bus == bus && 837 info->devfn == devfn) { 838 spin_unlock_irqrestore(&domain->lock, flags); 839 return info; 840 } 841 } 842 spin_unlock_irqrestore(&domain->lock, flags); 843 844 return NULL; 845 } 846 847 /* 848 * The extra devTLB flush quirk impacts those QAT devices with PCI device 849 * IDs ranging from 0x4940 to 0x4943. It is exempted from risky_device() 850 * check because it applies only to the built-in QAT devices and it doesn't 851 * grant additional privileges. 852 */ 853 #define BUGGY_QAT_DEVID_MASK 0x4940 854 static bool dev_needs_extra_dtlb_flush(struct pci_dev *pdev) 855 { 856 if (pdev->vendor != PCI_VENDOR_ID_INTEL) 857 return false; 858 859 if ((pdev->device & 0xfffc) != BUGGY_QAT_DEVID_MASK) 860 return false; 861 862 return true; 863 } 864 865 static void iommu_enable_pci_ats(struct device_domain_info *info) 866 { 867 struct pci_dev *pdev; 868 869 if (!info->ats_supported) 870 return; 871 872 pdev = to_pci_dev(info->dev); 873 if (!pci_ats_page_aligned(pdev)) 874 return; 875 876 if (!pci_enable_ats(pdev, VTD_PAGE_SHIFT)) 877 info->ats_enabled = 1; 878 } 879 880 static void iommu_disable_pci_ats(struct device_domain_info *info) 881 { 882 if (!info->ats_enabled) 883 return; 884 885 pci_disable_ats(to_pci_dev(info->dev)); 886 info->ats_enabled = 0; 887 } 888 889 static void iommu_enable_pci_pri(struct device_domain_info *info) 890 { 891 struct pci_dev *pdev; 892 893 if (!info->ats_enabled || !info->pri_supported) 894 return; 895 896 pdev = to_pci_dev(info->dev); 897 /* PASID is required in PRG Response Message. */ 898 if (info->pasid_enabled && !pci_prg_resp_pasid_required(pdev)) 899 return; 900 901 if (pci_reset_pri(pdev)) 902 return; 903 904 if (!pci_enable_pri(pdev, PRQ_DEPTH)) 905 info->pri_enabled = 1; 906 } 907 908 static void iommu_disable_pci_pri(struct device_domain_info *info) 909 { 910 if (!info->pri_enabled) 911 return; 912 913 if (WARN_ON(info->iopf_refcount)) 914 iopf_queue_remove_device(info->iommu->iopf_queue, info->dev); 915 916 pci_disable_pri(to_pci_dev(info->dev)); 917 info->pri_enabled = 0; 918 } 919 920 static void intel_flush_iotlb_all(struct iommu_domain *domain) 921 { 922 cache_tag_flush_all(to_dmar_domain(domain)); 923 } 924 925 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) 926 { 927 u32 pmen; 928 unsigned long flags; 929 930 if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap)) 931 return; 932 933 raw_spin_lock_irqsave(&iommu->register_lock, flags); 934 pmen = readl(iommu->reg + DMAR_PMEN_REG); 935 pmen &= ~DMA_PMEN_EPM; 936 writel(pmen, iommu->reg + DMAR_PMEN_REG); 937 938 /* wait for the protected region status bit to clear */ 939 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG, 940 readl, !(pmen & DMA_PMEN_PRS), pmen); 941 942 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 943 } 944 945 static void iommu_enable_translation(struct intel_iommu *iommu) 946 { 947 u32 sts; 948 unsigned long flags; 949 950 raw_spin_lock_irqsave(&iommu->register_lock, flags); 951 iommu->gcmd |= DMA_GCMD_TE; 952 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); 953 954 /* Make sure hardware complete it */ 955 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 956 readl, (sts & DMA_GSTS_TES), sts); 957 958 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 959 } 960 961 static void iommu_disable_translation(struct intel_iommu *iommu) 962 { 963 u32 sts; 964 unsigned long flag; 965 966 if (iommu_skip_te_disable && iommu->drhd->gfx_dedicated && 967 (cap_read_drain(iommu->cap) || cap_write_drain(iommu->cap))) 968 return; 969 970 raw_spin_lock_irqsave(&iommu->register_lock, flag); 971 iommu->gcmd &= ~DMA_GCMD_TE; 972 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); 973 974 /* Make sure hardware complete it */ 975 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 976 readl, (!(sts & DMA_GSTS_TES)), sts); 977 978 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 979 } 980 981 static void disable_dmar_iommu(struct intel_iommu *iommu) 982 { 983 /* 984 * All iommu domains must have been detached from the devices, 985 * hence there should be no domain IDs in use. 986 */ 987 if (WARN_ON(!ida_is_empty(&iommu->domain_ida))) 988 return; 989 990 if (iommu->gcmd & DMA_GCMD_TE) 991 iommu_disable_translation(iommu); 992 } 993 994 static void free_dmar_iommu(struct intel_iommu *iommu) 995 { 996 if (iommu->copied_tables) { 997 bitmap_free(iommu->copied_tables); 998 iommu->copied_tables = NULL; 999 } 1000 1001 /* free context mapping */ 1002 free_context_table(iommu); 1003 1004 if (ecap_prs(iommu->ecap)) 1005 intel_iommu_finish_prq(iommu); 1006 } 1007 1008 /* 1009 * Check and return whether first level is used by default for 1010 * DMA translation. 1011 */ 1012 static bool first_level_by_default(struct intel_iommu *iommu) 1013 { 1014 /* Only SL is available in legacy mode */ 1015 if (!sm_supported(iommu)) 1016 return false; 1017 1018 /* Only level (either FL or SL) is available, just use it */ 1019 if (ecap_flts(iommu->ecap) ^ ecap_slts(iommu->ecap)) 1020 return ecap_flts(iommu->ecap); 1021 1022 return true; 1023 } 1024 1025 int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) 1026 { 1027 struct iommu_domain_info *info, *curr; 1028 int num, ret = -ENOSPC; 1029 1030 if (domain->domain.type == IOMMU_DOMAIN_SVA) 1031 return 0; 1032 1033 info = kzalloc(sizeof(*info), GFP_KERNEL); 1034 if (!info) 1035 return -ENOMEM; 1036 1037 guard(mutex)(&iommu->did_lock); 1038 curr = xa_load(&domain->iommu_array, iommu->seq_id); 1039 if (curr) { 1040 curr->refcnt++; 1041 kfree(info); 1042 return 0; 1043 } 1044 1045 num = ida_alloc_range(&iommu->domain_ida, IDA_START_DID, 1046 cap_ndoms(iommu->cap) - 1, GFP_KERNEL); 1047 if (num < 0) { 1048 pr_err("%s: No free domain ids\n", iommu->name); 1049 goto err_unlock; 1050 } 1051 1052 info->refcnt = 1; 1053 info->did = num; 1054 info->iommu = iommu; 1055 curr = xa_cmpxchg(&domain->iommu_array, iommu->seq_id, 1056 NULL, info, GFP_KERNEL); 1057 if (curr) { 1058 ret = xa_err(curr) ? : -EBUSY; 1059 goto err_clear; 1060 } 1061 1062 return 0; 1063 1064 err_clear: 1065 ida_free(&iommu->domain_ida, info->did); 1066 err_unlock: 1067 kfree(info); 1068 return ret; 1069 } 1070 1071 void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) 1072 { 1073 struct iommu_domain_info *info; 1074 1075 if (domain->domain.type == IOMMU_DOMAIN_SVA) 1076 return; 1077 1078 guard(mutex)(&iommu->did_lock); 1079 info = xa_load(&domain->iommu_array, iommu->seq_id); 1080 if (--info->refcnt == 0) { 1081 ida_free(&iommu->domain_ida, info->did); 1082 xa_erase(&domain->iommu_array, iommu->seq_id); 1083 kfree(info); 1084 } 1085 } 1086 1087 /* 1088 * For kdump cases, old valid entries may be cached due to the 1089 * in-flight DMA and copied pgtable, but there is no unmapping 1090 * behaviour for them, thus we need an explicit cache flush for 1091 * the newly-mapped device. For kdump, at this point, the device 1092 * is supposed to finish reset at its driver probe stage, so no 1093 * in-flight DMA will exist, and we don't need to worry anymore 1094 * hereafter. 1095 */ 1096 static void copied_context_tear_down(struct intel_iommu *iommu, 1097 struct context_entry *context, 1098 u8 bus, u8 devfn) 1099 { 1100 u16 did_old; 1101 1102 if (!context_copied(iommu, bus, devfn)) 1103 return; 1104 1105 assert_spin_locked(&iommu->lock); 1106 1107 did_old = context_domain_id(context); 1108 context_clear_entry(context); 1109 1110 if (did_old < cap_ndoms(iommu->cap)) { 1111 iommu->flush.flush_context(iommu, did_old, 1112 PCI_DEVID(bus, devfn), 1113 DMA_CCMD_MASK_NOBIT, 1114 DMA_CCMD_DEVICE_INVL); 1115 iommu->flush.flush_iotlb(iommu, did_old, 0, 0, 1116 DMA_TLB_DSI_FLUSH); 1117 } 1118 1119 clear_context_copied(iommu, bus, devfn); 1120 } 1121 1122 /* 1123 * It's a non-present to present mapping. If hardware doesn't cache 1124 * non-present entry we only need to flush the write-buffer. If the 1125 * _does_ cache non-present entries, then it does so in the special 1126 * domain #0, which we have to flush: 1127 */ 1128 static void context_present_cache_flush(struct intel_iommu *iommu, u16 did, 1129 u8 bus, u8 devfn) 1130 { 1131 if (cap_caching_mode(iommu->cap)) { 1132 iommu->flush.flush_context(iommu, 0, 1133 PCI_DEVID(bus, devfn), 1134 DMA_CCMD_MASK_NOBIT, 1135 DMA_CCMD_DEVICE_INVL); 1136 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); 1137 } else { 1138 iommu_flush_write_buffer(iommu); 1139 } 1140 } 1141 1142 static int domain_context_mapping_one(struct dmar_domain *domain, 1143 struct intel_iommu *iommu, 1144 u8 bus, u8 devfn) 1145 { 1146 struct device_domain_info *info = 1147 domain_lookup_dev_info(domain, iommu, bus, devfn); 1148 u16 did = domain_id_iommu(domain, iommu); 1149 int translation = CONTEXT_TT_MULTI_LEVEL; 1150 struct pt_iommu_vtdss_hw_info pt_info; 1151 struct context_entry *context; 1152 int ret; 1153 1154 if (WARN_ON(!intel_domain_is_ss_paging(domain))) 1155 return -EINVAL; 1156 1157 pt_iommu_vtdss_hw_info(&domain->sspt, &pt_info); 1158 1159 pr_debug("Set context mapping for %02x:%02x.%d\n", 1160 bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 1161 1162 spin_lock(&iommu->lock); 1163 ret = -ENOMEM; 1164 context = iommu_context_addr(iommu, bus, devfn, 1); 1165 if (!context) 1166 goto out_unlock; 1167 1168 ret = 0; 1169 if (context_present(context) && !context_copied(iommu, bus, devfn)) 1170 goto out_unlock; 1171 1172 copied_context_tear_down(iommu, context, bus, devfn); 1173 context_clear_entry(context); 1174 context_set_domain_id(context, did); 1175 1176 if (info && info->ats_supported) 1177 translation = CONTEXT_TT_DEV_IOTLB; 1178 else 1179 translation = CONTEXT_TT_MULTI_LEVEL; 1180 1181 context_set_address_root(context, pt_info.ssptptr); 1182 context_set_address_width(context, pt_info.aw); 1183 context_set_translation_type(context, translation); 1184 context_set_fault_enable(context); 1185 context_set_present(context); 1186 if (!ecap_coherent(iommu->ecap)) 1187 clflush_cache_range(context, sizeof(*context)); 1188 context_present_cache_flush(iommu, did, bus, devfn); 1189 ret = 0; 1190 1191 out_unlock: 1192 spin_unlock(&iommu->lock); 1193 1194 return ret; 1195 } 1196 1197 static int domain_context_mapping_cb(struct pci_dev *pdev, 1198 u16 alias, void *opaque) 1199 { 1200 struct device_domain_info *info = dev_iommu_priv_get(&pdev->dev); 1201 struct intel_iommu *iommu = info->iommu; 1202 struct dmar_domain *domain = opaque; 1203 1204 return domain_context_mapping_one(domain, iommu, 1205 PCI_BUS_NUM(alias), alias & 0xff); 1206 } 1207 1208 static int 1209 domain_context_mapping(struct dmar_domain *domain, struct device *dev) 1210 { 1211 struct device_domain_info *info = dev_iommu_priv_get(dev); 1212 struct intel_iommu *iommu = info->iommu; 1213 u8 bus = info->bus, devfn = info->devfn; 1214 int ret; 1215 1216 if (!dev_is_pci(dev)) 1217 return domain_context_mapping_one(domain, iommu, bus, devfn); 1218 1219 ret = pci_for_each_dma_alias(to_pci_dev(dev), 1220 domain_context_mapping_cb, domain); 1221 if (ret) 1222 return ret; 1223 1224 iommu_enable_pci_ats(info); 1225 1226 return 0; 1227 } 1228 1229 static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 devfn) 1230 { 1231 struct intel_iommu *iommu = info->iommu; 1232 struct context_entry *context; 1233 u16 did; 1234 1235 spin_lock(&iommu->lock); 1236 context = iommu_context_addr(iommu, bus, devfn, 0); 1237 if (!context) { 1238 spin_unlock(&iommu->lock); 1239 return; 1240 } 1241 1242 did = context_domain_id(context); 1243 context_clear_entry(context); 1244 __iommu_flush_cache(iommu, context, sizeof(*context)); 1245 spin_unlock(&iommu->lock); 1246 intel_context_flush_no_pasid(info, context, did); 1247 } 1248 1249 int __domain_setup_first_level(struct intel_iommu *iommu, struct device *dev, 1250 ioasid_t pasid, u16 did, phys_addr_t fsptptr, 1251 int flags, struct iommu_domain *old) 1252 { 1253 if (!old) 1254 return intel_pasid_setup_first_level(iommu, dev, fsptptr, pasid, 1255 did, flags); 1256 return intel_pasid_replace_first_level(iommu, dev, fsptptr, pasid, did, 1257 iommu_domain_did(old, iommu), 1258 flags); 1259 } 1260 1261 static int domain_setup_second_level(struct intel_iommu *iommu, 1262 struct dmar_domain *domain, 1263 struct device *dev, ioasid_t pasid, 1264 struct iommu_domain *old) 1265 { 1266 if (!old) 1267 return intel_pasid_setup_second_level(iommu, domain, 1268 dev, pasid); 1269 return intel_pasid_replace_second_level(iommu, domain, dev, 1270 iommu_domain_did(old, iommu), 1271 pasid); 1272 } 1273 1274 static int domain_setup_passthrough(struct intel_iommu *iommu, 1275 struct device *dev, ioasid_t pasid, 1276 struct iommu_domain *old) 1277 { 1278 if (!old) 1279 return intel_pasid_setup_pass_through(iommu, dev, pasid); 1280 return intel_pasid_replace_pass_through(iommu, dev, 1281 iommu_domain_did(old, iommu), 1282 pasid); 1283 } 1284 1285 static int domain_setup_first_level(struct intel_iommu *iommu, 1286 struct dmar_domain *domain, 1287 struct device *dev, 1288 u32 pasid, struct iommu_domain *old) 1289 { 1290 struct pt_iommu_x86_64_hw_info pt_info; 1291 unsigned int flags = 0; 1292 1293 pt_iommu_x86_64_hw_info(&domain->fspt, &pt_info); 1294 if (WARN_ON(pt_info.levels != 4 && pt_info.levels != 5)) 1295 return -EINVAL; 1296 1297 if (pt_info.levels == 5) 1298 flags |= PASID_FLAG_FL5LP; 1299 1300 if (domain->force_snooping) 1301 flags |= PASID_FLAG_PAGE_SNOOP; 1302 1303 if (!(domain->fspt.x86_64_pt.common.features & 1304 BIT(PT_FEAT_DMA_INCOHERENT))) 1305 flags |= PASID_FLAG_PWSNP; 1306 1307 return __domain_setup_first_level(iommu, dev, pasid, 1308 domain_id_iommu(domain, iommu), 1309 pt_info.gcr3_pt, flags, old); 1310 } 1311 1312 static int dmar_domain_attach_device(struct dmar_domain *domain, 1313 struct device *dev) 1314 { 1315 struct device_domain_info *info = dev_iommu_priv_get(dev); 1316 struct intel_iommu *iommu = info->iommu; 1317 unsigned long flags; 1318 int ret; 1319 1320 ret = domain_attach_iommu(domain, iommu); 1321 if (ret) 1322 return ret; 1323 1324 info->domain = domain; 1325 info->domain_attached = true; 1326 spin_lock_irqsave(&domain->lock, flags); 1327 list_add(&info->link, &domain->devices); 1328 spin_unlock_irqrestore(&domain->lock, flags); 1329 1330 if (dev_is_real_dma_subdevice(dev)) 1331 return 0; 1332 1333 if (!sm_supported(iommu)) 1334 ret = domain_context_mapping(domain, dev); 1335 else if (intel_domain_is_fs_paging(domain)) 1336 ret = domain_setup_first_level(iommu, domain, dev, 1337 IOMMU_NO_PASID, NULL); 1338 else if (intel_domain_is_ss_paging(domain)) 1339 ret = domain_setup_second_level(iommu, domain, dev, 1340 IOMMU_NO_PASID, NULL); 1341 else if (WARN_ON(true)) 1342 ret = -EINVAL; 1343 1344 if (ret) 1345 goto out_block_translation; 1346 1347 ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID); 1348 if (ret) 1349 goto out_block_translation; 1350 1351 return 0; 1352 1353 out_block_translation: 1354 device_block_translation(dev); 1355 return ret; 1356 } 1357 1358 /** 1359 * device_rmrr_is_relaxable - Test whether the RMRR of this device 1360 * is relaxable (ie. is allowed to be not enforced under some conditions) 1361 * @dev: device handle 1362 * 1363 * We assume that PCI USB devices with RMRRs have them largely 1364 * for historical reasons and that the RMRR space is not actively used post 1365 * boot. This exclusion may change if vendors begin to abuse it. 1366 * 1367 * The same exception is made for graphics devices, with the requirement that 1368 * any use of the RMRR regions will be torn down before assigning the device 1369 * to a guest. 1370 * 1371 * Return: true if the RMRR is relaxable, false otherwise 1372 */ 1373 static bool device_rmrr_is_relaxable(struct device *dev) 1374 { 1375 struct pci_dev *pdev; 1376 1377 if (!dev_is_pci(dev)) 1378 return false; 1379 1380 pdev = to_pci_dev(dev); 1381 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev)) 1382 return true; 1383 else 1384 return false; 1385 } 1386 1387 static int device_def_domain_type(struct device *dev) 1388 { 1389 struct device_domain_info *info = dev_iommu_priv_get(dev); 1390 struct intel_iommu *iommu = info->iommu; 1391 1392 /* 1393 * Hardware does not support the passthrough translation mode. 1394 * Always use a dynamaic mapping domain. 1395 */ 1396 if (!ecap_pass_through(iommu->ecap)) 1397 return IOMMU_DOMAIN_DMA; 1398 1399 if (dev_is_pci(dev)) { 1400 struct pci_dev *pdev = to_pci_dev(dev); 1401 1402 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) 1403 return IOMMU_DOMAIN_IDENTITY; 1404 } 1405 1406 return 0; 1407 } 1408 1409 static void intel_iommu_init_qi(struct intel_iommu *iommu) 1410 { 1411 /* 1412 * Start from the sane iommu hardware state. 1413 * If the queued invalidation is already initialized by us 1414 * (for example, while enabling interrupt-remapping) then 1415 * we got the things already rolling from a sane state. 1416 */ 1417 if (!iommu->qi) { 1418 /* 1419 * Clear any previous faults. 1420 */ 1421 dmar_fault(-1, iommu); 1422 /* 1423 * Disable queued invalidation if supported and already enabled 1424 * before OS handover. 1425 */ 1426 dmar_disable_qi(iommu); 1427 } 1428 1429 if (dmar_enable_qi(iommu)) { 1430 /* 1431 * Queued Invalidate not enabled, use Register Based Invalidate 1432 */ 1433 iommu->flush.flush_context = __iommu_flush_context; 1434 iommu->flush.flush_iotlb = __iommu_flush_iotlb; 1435 pr_info("%s: Using Register based invalidation\n", 1436 iommu->name); 1437 } else { 1438 iommu->flush.flush_context = qi_flush_context; 1439 iommu->flush.flush_iotlb = qi_flush_iotlb; 1440 pr_info("%s: Using Queued invalidation\n", iommu->name); 1441 } 1442 } 1443 1444 static int copy_context_table(struct intel_iommu *iommu, 1445 struct root_entry *old_re, 1446 struct context_entry **tbl, 1447 int bus, bool ext) 1448 { 1449 int tbl_idx, pos = 0, idx, devfn, ret = 0, did; 1450 struct context_entry *new_ce = NULL, ce; 1451 struct context_entry *old_ce = NULL; 1452 struct root_entry re; 1453 phys_addr_t old_ce_phys; 1454 1455 tbl_idx = ext ? bus * 2 : bus; 1456 memcpy(&re, old_re, sizeof(re)); 1457 1458 for (devfn = 0; devfn < 256; devfn++) { 1459 /* First calculate the correct index */ 1460 idx = (ext ? devfn * 2 : devfn) % 256; 1461 1462 if (idx == 0) { 1463 /* First save what we may have and clean up */ 1464 if (new_ce) { 1465 tbl[tbl_idx] = new_ce; 1466 __iommu_flush_cache(iommu, new_ce, 1467 VTD_PAGE_SIZE); 1468 pos = 1; 1469 } 1470 1471 if (old_ce) 1472 memunmap(old_ce); 1473 1474 ret = 0; 1475 if (devfn < 0x80) 1476 old_ce_phys = root_entry_lctp(&re); 1477 else 1478 old_ce_phys = root_entry_uctp(&re); 1479 1480 if (!old_ce_phys) { 1481 if (ext && devfn == 0) { 1482 /* No LCTP, try UCTP */ 1483 devfn = 0x7f; 1484 continue; 1485 } else { 1486 goto out; 1487 } 1488 } 1489 1490 ret = -ENOMEM; 1491 old_ce = memremap(old_ce_phys, PAGE_SIZE, 1492 MEMREMAP_WB); 1493 if (!old_ce) 1494 goto out; 1495 1496 new_ce = iommu_alloc_pages_node_sz(iommu->node, 1497 GFP_KERNEL, SZ_4K); 1498 if (!new_ce) 1499 goto out_unmap; 1500 1501 ret = 0; 1502 } 1503 1504 /* Now copy the context entry */ 1505 memcpy(&ce, old_ce + idx, sizeof(ce)); 1506 1507 if (!context_present(&ce)) 1508 continue; 1509 1510 did = context_domain_id(&ce); 1511 if (did >= 0 && did < cap_ndoms(iommu->cap)) 1512 ida_alloc_range(&iommu->domain_ida, did, did, GFP_KERNEL); 1513 1514 set_context_copied(iommu, bus, devfn); 1515 new_ce[idx] = ce; 1516 } 1517 1518 tbl[tbl_idx + pos] = new_ce; 1519 1520 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE); 1521 1522 out_unmap: 1523 memunmap(old_ce); 1524 1525 out: 1526 return ret; 1527 } 1528 1529 static int copy_translation_tables(struct intel_iommu *iommu) 1530 { 1531 struct context_entry **ctxt_tbls; 1532 struct root_entry *old_rt; 1533 phys_addr_t old_rt_phys; 1534 int ctxt_table_entries; 1535 u64 rtaddr_reg; 1536 int bus, ret; 1537 bool new_ext, ext; 1538 1539 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG); 1540 ext = !!(rtaddr_reg & DMA_RTADDR_SMT); 1541 new_ext = !!sm_supported(iommu); 1542 1543 /* 1544 * The RTT bit can only be changed when translation is disabled, 1545 * but disabling translation means to open a window for data 1546 * corruption. So bail out and don't copy anything if we would 1547 * have to change the bit. 1548 */ 1549 if (new_ext != ext) 1550 return -EINVAL; 1551 1552 iommu->copied_tables = bitmap_zalloc(BIT_ULL(16), GFP_KERNEL); 1553 if (!iommu->copied_tables) 1554 return -ENOMEM; 1555 1556 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK; 1557 if (!old_rt_phys) 1558 return -EINVAL; 1559 1560 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB); 1561 if (!old_rt) 1562 return -ENOMEM; 1563 1564 /* This is too big for the stack - allocate it from slab */ 1565 ctxt_table_entries = ext ? 512 : 256; 1566 ret = -ENOMEM; 1567 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL); 1568 if (!ctxt_tbls) 1569 goto out_unmap; 1570 1571 for (bus = 0; bus < 256; bus++) { 1572 ret = copy_context_table(iommu, &old_rt[bus], 1573 ctxt_tbls, bus, ext); 1574 if (ret) { 1575 pr_err("%s: Failed to copy context table for bus %d\n", 1576 iommu->name, bus); 1577 continue; 1578 } 1579 } 1580 1581 spin_lock(&iommu->lock); 1582 1583 /* Context tables are copied, now write them to the root_entry table */ 1584 for (bus = 0; bus < 256; bus++) { 1585 int idx = ext ? bus * 2 : bus; 1586 u64 val; 1587 1588 if (ctxt_tbls[idx]) { 1589 val = virt_to_phys(ctxt_tbls[idx]) | 1; 1590 iommu->root_entry[bus].lo = val; 1591 } 1592 1593 if (!ext || !ctxt_tbls[idx + 1]) 1594 continue; 1595 1596 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1; 1597 iommu->root_entry[bus].hi = val; 1598 } 1599 1600 spin_unlock(&iommu->lock); 1601 1602 kfree(ctxt_tbls); 1603 1604 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE); 1605 1606 ret = 0; 1607 1608 out_unmap: 1609 memunmap(old_rt); 1610 1611 return ret; 1612 } 1613 1614 static int __init init_dmars(void) 1615 { 1616 struct dmar_drhd_unit *drhd; 1617 struct intel_iommu *iommu; 1618 int ret; 1619 1620 for_each_iommu(iommu, drhd) { 1621 if (drhd->ignored) { 1622 iommu_disable_translation(iommu); 1623 continue; 1624 } 1625 1626 /* 1627 * Find the max pasid size of all IOMMU's in the system. 1628 * We need to ensure the system pasid table is no bigger 1629 * than the smallest supported. 1630 */ 1631 if (pasid_supported(iommu)) { 1632 u32 temp = 2 << ecap_pss(iommu->ecap); 1633 1634 intel_pasid_max_id = min_t(u32, temp, 1635 intel_pasid_max_id); 1636 } 1637 1638 intel_iommu_init_qi(iommu); 1639 init_translation_status(iommu); 1640 1641 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) { 1642 iommu_disable_translation(iommu); 1643 clear_translation_pre_enabled(iommu); 1644 pr_warn("Translation was enabled for %s but we are not in kdump mode\n", 1645 iommu->name); 1646 } 1647 1648 /* 1649 * TBD: 1650 * we could share the same root & context tables 1651 * among all IOMMU's. Need to Split it later. 1652 */ 1653 ret = iommu_alloc_root_entry(iommu); 1654 if (ret) 1655 goto free_iommu; 1656 1657 if (translation_pre_enabled(iommu)) { 1658 pr_info("Translation already enabled - trying to copy translation structures\n"); 1659 1660 ret = copy_translation_tables(iommu); 1661 if (ret) { 1662 /* 1663 * We found the IOMMU with translation 1664 * enabled - but failed to copy over the 1665 * old root-entry table. Try to proceed 1666 * by disabling translation now and 1667 * allocating a clean root-entry table. 1668 * This might cause DMAR faults, but 1669 * probably the dump will still succeed. 1670 */ 1671 pr_err("Failed to copy translation tables from previous kernel for %s\n", 1672 iommu->name); 1673 iommu_disable_translation(iommu); 1674 clear_translation_pre_enabled(iommu); 1675 } else { 1676 pr_info("Copied translation tables from previous kernel for %s\n", 1677 iommu->name); 1678 } 1679 } 1680 1681 intel_svm_check(iommu); 1682 } 1683 1684 /* 1685 * Now that qi is enabled on all iommus, set the root entry and flush 1686 * caches. This is required on some Intel X58 chipsets, otherwise the 1687 * flush_context function will loop forever and the boot hangs. 1688 */ 1689 for_each_active_iommu(iommu, drhd) { 1690 iommu_flush_write_buffer(iommu); 1691 iommu_set_root_entry(iommu); 1692 } 1693 1694 check_tylersburg_isoch(); 1695 1696 /* 1697 * for each drhd 1698 * enable fault log 1699 * global invalidate context cache 1700 * global invalidate iotlb 1701 * enable translation 1702 */ 1703 for_each_iommu(iommu, drhd) { 1704 if (drhd->ignored) { 1705 /* 1706 * we always have to disable PMRs or DMA may fail on 1707 * this device 1708 */ 1709 if (force_on) 1710 iommu_disable_protect_mem_regions(iommu); 1711 continue; 1712 } 1713 1714 iommu_flush_write_buffer(iommu); 1715 1716 if (ecap_prs(iommu->ecap)) { 1717 /* 1718 * Call dmar_alloc_hwirq() with dmar_global_lock held, 1719 * could cause possible lock race condition. 1720 */ 1721 up_write(&dmar_global_lock); 1722 ret = intel_iommu_enable_prq(iommu); 1723 down_write(&dmar_global_lock); 1724 if (ret) 1725 goto free_iommu; 1726 } 1727 1728 ret = dmar_set_interrupt(iommu); 1729 if (ret) 1730 goto free_iommu; 1731 } 1732 1733 return 0; 1734 1735 free_iommu: 1736 for_each_active_iommu(iommu, drhd) { 1737 disable_dmar_iommu(iommu); 1738 free_dmar_iommu(iommu); 1739 } 1740 1741 return ret; 1742 } 1743 1744 static void __init init_no_remapping_devices(void) 1745 { 1746 struct dmar_drhd_unit *drhd; 1747 struct device *dev; 1748 int i; 1749 1750 for_each_drhd_unit(drhd) { 1751 if (!drhd->include_all) { 1752 for_each_active_dev_scope(drhd->devices, 1753 drhd->devices_cnt, i, dev) 1754 break; 1755 /* ignore DMAR unit if no devices exist */ 1756 if (i == drhd->devices_cnt) 1757 drhd->ignored = 1; 1758 } 1759 } 1760 1761 for_each_active_drhd_unit(drhd) { 1762 if (drhd->include_all) 1763 continue; 1764 1765 for_each_active_dev_scope(drhd->devices, 1766 drhd->devices_cnt, i, dev) 1767 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev))) 1768 break; 1769 if (i < drhd->devices_cnt) 1770 continue; 1771 1772 /* This IOMMU has *only* gfx devices. Either bypass it or 1773 set the gfx_mapped flag, as appropriate */ 1774 drhd->gfx_dedicated = 1; 1775 if (disable_igfx_iommu) 1776 drhd->ignored = 1; 1777 } 1778 } 1779 1780 #ifdef CONFIG_SUSPEND 1781 static int init_iommu_hw(void) 1782 { 1783 struct dmar_drhd_unit *drhd; 1784 struct intel_iommu *iommu = NULL; 1785 int ret; 1786 1787 for_each_active_iommu(iommu, drhd) { 1788 if (iommu->qi) { 1789 ret = dmar_reenable_qi(iommu); 1790 if (ret) 1791 return ret; 1792 } 1793 } 1794 1795 for_each_iommu(iommu, drhd) { 1796 if (drhd->ignored) { 1797 /* 1798 * we always have to disable PMRs or DMA may fail on 1799 * this device 1800 */ 1801 if (force_on) 1802 iommu_disable_protect_mem_regions(iommu); 1803 continue; 1804 } 1805 1806 iommu_flush_write_buffer(iommu); 1807 iommu_set_root_entry(iommu); 1808 iommu_enable_translation(iommu); 1809 iommu_disable_protect_mem_regions(iommu); 1810 } 1811 1812 return 0; 1813 } 1814 1815 static void iommu_flush_all(void) 1816 { 1817 struct dmar_drhd_unit *drhd; 1818 struct intel_iommu *iommu; 1819 1820 for_each_active_iommu(iommu, drhd) { 1821 iommu->flush.flush_context(iommu, 0, 0, 0, 1822 DMA_CCMD_GLOBAL_INVL); 1823 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 1824 DMA_TLB_GLOBAL_FLUSH); 1825 } 1826 } 1827 1828 static int iommu_suspend(void *data) 1829 { 1830 struct dmar_drhd_unit *drhd; 1831 struct intel_iommu *iommu = NULL; 1832 unsigned long flag; 1833 1834 iommu_flush_all(); 1835 1836 for_each_active_iommu(iommu, drhd) { 1837 iommu_disable_translation(iommu); 1838 1839 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1840 1841 iommu->iommu_state[SR_DMAR_FECTL_REG] = 1842 readl(iommu->reg + DMAR_FECTL_REG); 1843 iommu->iommu_state[SR_DMAR_FEDATA_REG] = 1844 readl(iommu->reg + DMAR_FEDATA_REG); 1845 iommu->iommu_state[SR_DMAR_FEADDR_REG] = 1846 readl(iommu->reg + DMAR_FEADDR_REG); 1847 iommu->iommu_state[SR_DMAR_FEUADDR_REG] = 1848 readl(iommu->reg + DMAR_FEUADDR_REG); 1849 1850 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1851 } 1852 return 0; 1853 } 1854 1855 static void iommu_resume(void *data) 1856 { 1857 struct dmar_drhd_unit *drhd; 1858 struct intel_iommu *iommu = NULL; 1859 unsigned long flag; 1860 1861 if (init_iommu_hw()) { 1862 if (force_on) 1863 panic("tboot: IOMMU setup failed, DMAR can not resume!\n"); 1864 else 1865 WARN(1, "IOMMU setup failed, DMAR can not resume!\n"); 1866 return; 1867 } 1868 1869 for_each_active_iommu(iommu, drhd) { 1870 1871 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1872 1873 writel(iommu->iommu_state[SR_DMAR_FECTL_REG], 1874 iommu->reg + DMAR_FECTL_REG); 1875 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG], 1876 iommu->reg + DMAR_FEDATA_REG); 1877 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG], 1878 iommu->reg + DMAR_FEADDR_REG); 1879 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG], 1880 iommu->reg + DMAR_FEUADDR_REG); 1881 1882 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1883 } 1884 } 1885 1886 static const struct syscore_ops iommu_syscore_ops = { 1887 .resume = iommu_resume, 1888 .suspend = iommu_suspend, 1889 }; 1890 1891 static struct syscore iommu_syscore = { 1892 .ops = &iommu_syscore_ops, 1893 }; 1894 1895 static void __init init_iommu_pm_ops(void) 1896 { 1897 register_syscore(&iommu_syscore); 1898 } 1899 1900 #else 1901 static inline void init_iommu_pm_ops(void) {} 1902 #endif /* CONFIG_PM */ 1903 1904 static int __init rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr) 1905 { 1906 if (!IS_ALIGNED(rmrr->base_address, PAGE_SIZE) || 1907 !IS_ALIGNED(rmrr->end_address + 1, PAGE_SIZE) || 1908 rmrr->end_address <= rmrr->base_address || 1909 arch_rmrr_sanity_check(rmrr)) 1910 return -EINVAL; 1911 1912 return 0; 1913 } 1914 1915 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg) 1916 { 1917 struct acpi_dmar_reserved_memory *rmrr; 1918 struct dmar_rmrr_unit *rmrru; 1919 1920 rmrr = (struct acpi_dmar_reserved_memory *)header; 1921 if (rmrr_sanity_check(rmrr)) { 1922 pr_warn(FW_BUG 1923 "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n" 1924 "BIOS vendor: %s; Ver: %s; Product Version: %s\n", 1925 rmrr->base_address, rmrr->end_address, 1926 dmi_get_system_info(DMI_BIOS_VENDOR), 1927 dmi_get_system_info(DMI_BIOS_VERSION), 1928 dmi_get_system_info(DMI_PRODUCT_VERSION)); 1929 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); 1930 } 1931 1932 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL); 1933 if (!rmrru) 1934 goto out; 1935 1936 rmrru->hdr = header; 1937 1938 rmrru->base_address = rmrr->base_address; 1939 rmrru->end_address = rmrr->end_address; 1940 1941 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1), 1942 ((void *)rmrr) + rmrr->header.length, 1943 &rmrru->devices_cnt); 1944 if (rmrru->devices_cnt && rmrru->devices == NULL) 1945 goto free_rmrru; 1946 1947 list_add(&rmrru->list, &dmar_rmrr_units); 1948 1949 return 0; 1950 free_rmrru: 1951 kfree(rmrru); 1952 out: 1953 return -ENOMEM; 1954 } 1955 1956 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr) 1957 { 1958 struct dmar_atsr_unit *atsru; 1959 struct acpi_dmar_atsr *tmp; 1960 1961 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list, 1962 dmar_rcu_check()) { 1963 tmp = (struct acpi_dmar_atsr *)atsru->hdr; 1964 if (atsr->segment != tmp->segment) 1965 continue; 1966 if (atsr->header.length != tmp->header.length) 1967 continue; 1968 if (memcmp(atsr, tmp, atsr->header.length) == 0) 1969 return atsru; 1970 } 1971 1972 return NULL; 1973 } 1974 1975 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg) 1976 { 1977 struct acpi_dmar_atsr *atsr; 1978 struct dmar_atsr_unit *atsru; 1979 1980 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled) 1981 return 0; 1982 1983 atsr = container_of(hdr, struct acpi_dmar_atsr, header); 1984 atsru = dmar_find_atsr(atsr); 1985 if (atsru) 1986 return 0; 1987 1988 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL); 1989 if (!atsru) 1990 return -ENOMEM; 1991 1992 /* 1993 * If memory is allocated from slab by ACPI _DSM method, we need to 1994 * copy the memory content because the memory buffer will be freed 1995 * on return. 1996 */ 1997 atsru->hdr = (void *)(atsru + 1); 1998 memcpy(atsru->hdr, hdr, hdr->length); 1999 atsru->include_all = atsr->flags & 0x1; 2000 if (!atsru->include_all) { 2001 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1), 2002 (void *)atsr + atsr->header.length, 2003 &atsru->devices_cnt); 2004 if (atsru->devices_cnt && atsru->devices == NULL) { 2005 kfree(atsru); 2006 return -ENOMEM; 2007 } 2008 } 2009 2010 list_add_rcu(&atsru->list, &dmar_atsr_units); 2011 2012 return 0; 2013 } 2014 2015 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru) 2016 { 2017 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt); 2018 kfree(atsru); 2019 } 2020 2021 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg) 2022 { 2023 struct acpi_dmar_atsr *atsr; 2024 struct dmar_atsr_unit *atsru; 2025 2026 atsr = container_of(hdr, struct acpi_dmar_atsr, header); 2027 atsru = dmar_find_atsr(atsr); 2028 if (atsru) { 2029 list_del_rcu(&atsru->list); 2030 synchronize_rcu(); 2031 intel_iommu_free_atsr(atsru); 2032 } 2033 2034 return 0; 2035 } 2036 2037 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg) 2038 { 2039 int i; 2040 struct device *dev; 2041 struct acpi_dmar_atsr *atsr; 2042 struct dmar_atsr_unit *atsru; 2043 2044 atsr = container_of(hdr, struct acpi_dmar_atsr, header); 2045 atsru = dmar_find_atsr(atsr); 2046 if (!atsru) 2047 return 0; 2048 2049 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) { 2050 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt, 2051 i, dev) 2052 return -EBUSY; 2053 } 2054 2055 return 0; 2056 } 2057 2058 static struct dmar_satc_unit *dmar_find_satc(struct acpi_dmar_satc *satc) 2059 { 2060 struct dmar_satc_unit *satcu; 2061 struct acpi_dmar_satc *tmp; 2062 2063 list_for_each_entry_rcu(satcu, &dmar_satc_units, list, 2064 dmar_rcu_check()) { 2065 tmp = (struct acpi_dmar_satc *)satcu->hdr; 2066 if (satc->segment != tmp->segment) 2067 continue; 2068 if (satc->header.length != tmp->header.length) 2069 continue; 2070 if (memcmp(satc, tmp, satc->header.length) == 0) 2071 return satcu; 2072 } 2073 2074 return NULL; 2075 } 2076 2077 int dmar_parse_one_satc(struct acpi_dmar_header *hdr, void *arg) 2078 { 2079 struct acpi_dmar_satc *satc; 2080 struct dmar_satc_unit *satcu; 2081 2082 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled) 2083 return 0; 2084 2085 satc = container_of(hdr, struct acpi_dmar_satc, header); 2086 satcu = dmar_find_satc(satc); 2087 if (satcu) 2088 return 0; 2089 2090 satcu = kzalloc(sizeof(*satcu) + hdr->length, GFP_KERNEL); 2091 if (!satcu) 2092 return -ENOMEM; 2093 2094 satcu->hdr = (void *)(satcu + 1); 2095 memcpy(satcu->hdr, hdr, hdr->length); 2096 satcu->atc_required = satc->flags & 0x1; 2097 satcu->devices = dmar_alloc_dev_scope((void *)(satc + 1), 2098 (void *)satc + satc->header.length, 2099 &satcu->devices_cnt); 2100 if (satcu->devices_cnt && !satcu->devices) { 2101 kfree(satcu); 2102 return -ENOMEM; 2103 } 2104 list_add_rcu(&satcu->list, &dmar_satc_units); 2105 2106 return 0; 2107 } 2108 2109 static int intel_iommu_add(struct dmar_drhd_unit *dmaru) 2110 { 2111 struct intel_iommu *iommu = dmaru->iommu; 2112 int ret; 2113 2114 /* 2115 * Disable translation if already enabled prior to OS handover. 2116 */ 2117 if (iommu->gcmd & DMA_GCMD_TE) 2118 iommu_disable_translation(iommu); 2119 2120 ret = iommu_alloc_root_entry(iommu); 2121 if (ret) 2122 goto out; 2123 2124 intel_svm_check(iommu); 2125 2126 if (dmaru->ignored) { 2127 /* 2128 * we always have to disable PMRs or DMA may fail on this device 2129 */ 2130 if (force_on) 2131 iommu_disable_protect_mem_regions(iommu); 2132 return 0; 2133 } 2134 2135 intel_iommu_init_qi(iommu); 2136 iommu_flush_write_buffer(iommu); 2137 2138 if (ecap_prs(iommu->ecap)) { 2139 ret = intel_iommu_enable_prq(iommu); 2140 if (ret) 2141 goto disable_iommu; 2142 } 2143 2144 ret = dmar_set_interrupt(iommu); 2145 if (ret) 2146 goto disable_iommu; 2147 2148 iommu_set_root_entry(iommu); 2149 iommu_enable_translation(iommu); 2150 2151 iommu_disable_protect_mem_regions(iommu); 2152 return 0; 2153 2154 disable_iommu: 2155 disable_dmar_iommu(iommu); 2156 out: 2157 free_dmar_iommu(iommu); 2158 return ret; 2159 } 2160 2161 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert) 2162 { 2163 int ret = 0; 2164 struct intel_iommu *iommu = dmaru->iommu; 2165 2166 if (!intel_iommu_enabled) 2167 return 0; 2168 if (iommu == NULL) 2169 return -EINVAL; 2170 2171 if (insert) { 2172 ret = intel_iommu_add(dmaru); 2173 } else { 2174 disable_dmar_iommu(iommu); 2175 free_dmar_iommu(iommu); 2176 } 2177 2178 return ret; 2179 } 2180 2181 static void intel_iommu_free_dmars(void) 2182 { 2183 struct dmar_rmrr_unit *rmrru, *rmrr_n; 2184 struct dmar_atsr_unit *atsru, *atsr_n; 2185 struct dmar_satc_unit *satcu, *satc_n; 2186 2187 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) { 2188 list_del(&rmrru->list); 2189 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt); 2190 kfree(rmrru); 2191 } 2192 2193 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) { 2194 list_del(&atsru->list); 2195 intel_iommu_free_atsr(atsru); 2196 } 2197 list_for_each_entry_safe(satcu, satc_n, &dmar_satc_units, list) { 2198 list_del(&satcu->list); 2199 dmar_free_dev_scope(&satcu->devices, &satcu->devices_cnt); 2200 kfree(satcu); 2201 } 2202 } 2203 2204 static struct dmar_satc_unit *dmar_find_matched_satc_unit(struct pci_dev *dev) 2205 { 2206 struct dmar_satc_unit *satcu; 2207 struct acpi_dmar_satc *satc; 2208 struct device *tmp; 2209 int i; 2210 2211 rcu_read_lock(); 2212 2213 list_for_each_entry_rcu(satcu, &dmar_satc_units, list) { 2214 satc = container_of(satcu->hdr, struct acpi_dmar_satc, header); 2215 if (satc->segment != pci_domain_nr(dev->bus)) 2216 continue; 2217 for_each_dev_scope(satcu->devices, satcu->devices_cnt, i, tmp) 2218 if (to_pci_dev(tmp) == dev) 2219 goto out; 2220 } 2221 satcu = NULL; 2222 out: 2223 rcu_read_unlock(); 2224 return satcu; 2225 } 2226 2227 static bool dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu) 2228 { 2229 struct pci_dev *bridge = NULL; 2230 struct dmar_atsr_unit *atsru; 2231 struct dmar_satc_unit *satcu; 2232 struct acpi_dmar_atsr *atsr; 2233 bool supported = true; 2234 struct pci_bus *bus; 2235 struct device *tmp; 2236 int i; 2237 2238 dev = pci_physfn(dev); 2239 satcu = dmar_find_matched_satc_unit(dev); 2240 if (satcu) 2241 /* 2242 * This device supports ATS as it is in SATC table. 2243 * When IOMMU is in legacy mode, enabling ATS is done 2244 * automatically by HW for the device that requires 2245 * ATS, hence OS should not enable this device ATS 2246 * to avoid duplicated TLB invalidation. 2247 */ 2248 return !(satcu->atc_required && !sm_supported(iommu)); 2249 2250 for (bus = dev->bus; bus; bus = bus->parent) { 2251 bridge = bus->self; 2252 /* If it's an integrated device, allow ATS */ 2253 if (!bridge) 2254 return true; 2255 /* Connected via non-PCIe: no ATS */ 2256 if (!pci_is_pcie(bridge) || 2257 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) 2258 return false; 2259 /* If we found the root port, look it up in the ATSR */ 2260 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) 2261 break; 2262 } 2263 2264 rcu_read_lock(); 2265 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) { 2266 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); 2267 if (atsr->segment != pci_domain_nr(dev->bus)) 2268 continue; 2269 2270 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp) 2271 if (tmp == &bridge->dev) 2272 goto out; 2273 2274 if (atsru->include_all) 2275 goto out; 2276 } 2277 supported = false; 2278 out: 2279 rcu_read_unlock(); 2280 2281 return supported; 2282 } 2283 2284 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) 2285 { 2286 int ret; 2287 struct dmar_rmrr_unit *rmrru; 2288 struct dmar_atsr_unit *atsru; 2289 struct dmar_satc_unit *satcu; 2290 struct acpi_dmar_atsr *atsr; 2291 struct acpi_dmar_reserved_memory *rmrr; 2292 struct acpi_dmar_satc *satc; 2293 2294 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING) 2295 return 0; 2296 2297 list_for_each_entry(rmrru, &dmar_rmrr_units, list) { 2298 rmrr = container_of(rmrru->hdr, 2299 struct acpi_dmar_reserved_memory, header); 2300 if (info->event == BUS_NOTIFY_ADD_DEVICE) { 2301 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1), 2302 ((void *)rmrr) + rmrr->header.length, 2303 rmrr->segment, rmrru->devices, 2304 rmrru->devices_cnt); 2305 if (ret < 0) 2306 return ret; 2307 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) { 2308 dmar_remove_dev_scope(info, rmrr->segment, 2309 rmrru->devices, rmrru->devices_cnt); 2310 } 2311 } 2312 2313 list_for_each_entry(atsru, &dmar_atsr_units, list) { 2314 if (atsru->include_all) 2315 continue; 2316 2317 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); 2318 if (info->event == BUS_NOTIFY_ADD_DEVICE) { 2319 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1), 2320 (void *)atsr + atsr->header.length, 2321 atsr->segment, atsru->devices, 2322 atsru->devices_cnt); 2323 if (ret > 0) 2324 break; 2325 else if (ret < 0) 2326 return ret; 2327 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) { 2328 if (dmar_remove_dev_scope(info, atsr->segment, 2329 atsru->devices, atsru->devices_cnt)) 2330 break; 2331 } 2332 } 2333 list_for_each_entry(satcu, &dmar_satc_units, list) { 2334 satc = container_of(satcu->hdr, struct acpi_dmar_satc, header); 2335 if (info->event == BUS_NOTIFY_ADD_DEVICE) { 2336 ret = dmar_insert_dev_scope(info, (void *)(satc + 1), 2337 (void *)satc + satc->header.length, 2338 satc->segment, satcu->devices, 2339 satcu->devices_cnt); 2340 if (ret > 0) 2341 break; 2342 else if (ret < 0) 2343 return ret; 2344 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) { 2345 if (dmar_remove_dev_scope(info, satc->segment, 2346 satcu->devices, satcu->devices_cnt)) 2347 break; 2348 } 2349 } 2350 2351 return 0; 2352 } 2353 2354 static void intel_disable_iommus(void) 2355 { 2356 struct intel_iommu *iommu = NULL; 2357 struct dmar_drhd_unit *drhd; 2358 2359 for_each_iommu(iommu, drhd) 2360 iommu_disable_translation(iommu); 2361 } 2362 2363 void intel_iommu_shutdown(void) 2364 { 2365 struct dmar_drhd_unit *drhd; 2366 struct intel_iommu *iommu = NULL; 2367 2368 if (no_iommu || dmar_disabled) 2369 return; 2370 2371 /* 2372 * All other CPUs were brought down, hotplug interrupts were disabled, 2373 * no lock and RCU checking needed anymore 2374 */ 2375 list_for_each_entry(drhd, &dmar_drhd_units, list) { 2376 iommu = drhd->iommu; 2377 2378 /* Disable PMRs explicitly here. */ 2379 iommu_disable_protect_mem_regions(iommu); 2380 2381 /* Make sure the IOMMUs are switched off */ 2382 iommu_disable_translation(iommu); 2383 } 2384 } 2385 2386 static struct intel_iommu *dev_to_intel_iommu(struct device *dev) 2387 { 2388 struct iommu_device *iommu_dev = dev_to_iommu_device(dev); 2389 2390 return container_of(iommu_dev, struct intel_iommu, iommu); 2391 } 2392 2393 static ssize_t version_show(struct device *dev, 2394 struct device_attribute *attr, char *buf) 2395 { 2396 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 2397 u32 ver = readl(iommu->reg + DMAR_VER_REG); 2398 return sysfs_emit(buf, "%d:%d\n", 2399 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver)); 2400 } 2401 static DEVICE_ATTR_RO(version); 2402 2403 static ssize_t address_show(struct device *dev, 2404 struct device_attribute *attr, char *buf) 2405 { 2406 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 2407 return sysfs_emit(buf, "%llx\n", iommu->reg_phys); 2408 } 2409 static DEVICE_ATTR_RO(address); 2410 2411 static ssize_t cap_show(struct device *dev, 2412 struct device_attribute *attr, char *buf) 2413 { 2414 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 2415 return sysfs_emit(buf, "%llx\n", iommu->cap); 2416 } 2417 static DEVICE_ATTR_RO(cap); 2418 2419 static ssize_t ecap_show(struct device *dev, 2420 struct device_attribute *attr, char *buf) 2421 { 2422 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 2423 return sysfs_emit(buf, "%llx\n", iommu->ecap); 2424 } 2425 static DEVICE_ATTR_RO(ecap); 2426 2427 static ssize_t domains_supported_show(struct device *dev, 2428 struct device_attribute *attr, char *buf) 2429 { 2430 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 2431 return sysfs_emit(buf, "%ld\n", cap_ndoms(iommu->cap)); 2432 } 2433 static DEVICE_ATTR_RO(domains_supported); 2434 2435 static ssize_t domains_used_show(struct device *dev, 2436 struct device_attribute *attr, char *buf) 2437 { 2438 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 2439 unsigned int count = 0; 2440 int id; 2441 2442 for (id = 0; id < cap_ndoms(iommu->cap); id++) 2443 if (ida_exists(&iommu->domain_ida, id)) 2444 count++; 2445 2446 return sysfs_emit(buf, "%d\n", count); 2447 } 2448 static DEVICE_ATTR_RO(domains_used); 2449 2450 static struct attribute *intel_iommu_attrs[] = { 2451 &dev_attr_version.attr, 2452 &dev_attr_address.attr, 2453 &dev_attr_cap.attr, 2454 &dev_attr_ecap.attr, 2455 &dev_attr_domains_supported.attr, 2456 &dev_attr_domains_used.attr, 2457 NULL, 2458 }; 2459 2460 static struct attribute_group intel_iommu_group = { 2461 .name = "intel-iommu", 2462 .attrs = intel_iommu_attrs, 2463 }; 2464 2465 const struct attribute_group *intel_iommu_groups[] = { 2466 &intel_iommu_group, 2467 NULL, 2468 }; 2469 2470 static bool has_external_pci(void) 2471 { 2472 struct pci_dev *pdev = NULL; 2473 2474 for_each_pci_dev(pdev) 2475 if (pdev->external_facing) { 2476 pci_dev_put(pdev); 2477 return true; 2478 } 2479 2480 return false; 2481 } 2482 2483 static int __init platform_optin_force_iommu(void) 2484 { 2485 if (!dmar_platform_optin() || no_platform_optin || !has_external_pci()) 2486 return 0; 2487 2488 if (no_iommu || dmar_disabled) 2489 pr_info("Intel-IOMMU force enabled due to platform opt in\n"); 2490 2491 /* 2492 * If Intel-IOMMU is disabled by default, we will apply identity 2493 * map for all devices except those marked as being untrusted. 2494 */ 2495 if (dmar_disabled) 2496 iommu_set_default_passthrough(false); 2497 2498 dmar_disabled = 0; 2499 no_iommu = 0; 2500 2501 return 1; 2502 } 2503 2504 static int __init probe_acpi_namespace_devices(void) 2505 { 2506 struct dmar_drhd_unit *drhd; 2507 /* To avoid a -Wunused-but-set-variable warning. */ 2508 struct intel_iommu *iommu __maybe_unused; 2509 struct device *dev; 2510 int i, ret = 0; 2511 2512 for_each_active_iommu(iommu, drhd) { 2513 for_each_active_dev_scope(drhd->devices, 2514 drhd->devices_cnt, i, dev) { 2515 struct acpi_device_physical_node *pn; 2516 struct acpi_device *adev; 2517 2518 if (dev->bus != &acpi_bus_type) 2519 continue; 2520 2521 up_read(&dmar_global_lock); 2522 adev = to_acpi_device(dev); 2523 mutex_lock(&adev->physical_node_lock); 2524 list_for_each_entry(pn, 2525 &adev->physical_node_list, node) { 2526 ret = iommu_probe_device(pn->dev); 2527 if (ret) 2528 break; 2529 } 2530 mutex_unlock(&adev->physical_node_lock); 2531 down_read(&dmar_global_lock); 2532 2533 if (ret) 2534 return ret; 2535 } 2536 } 2537 2538 return 0; 2539 } 2540 2541 static __init int tboot_force_iommu(void) 2542 { 2543 if (!tboot_enabled()) 2544 return 0; 2545 2546 if (no_iommu || dmar_disabled) 2547 pr_warn("Forcing Intel-IOMMU to enabled\n"); 2548 2549 dmar_disabled = 0; 2550 no_iommu = 0; 2551 2552 return 1; 2553 } 2554 2555 int __init intel_iommu_init(void) 2556 { 2557 int ret = -ENODEV; 2558 struct dmar_drhd_unit *drhd; 2559 struct intel_iommu *iommu; 2560 2561 /* 2562 * Intel IOMMU is required for a TXT/tboot launch or platform 2563 * opt in, so enforce that. 2564 */ 2565 force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) || 2566 platform_optin_force_iommu(); 2567 2568 down_write(&dmar_global_lock); 2569 if (dmar_table_init()) { 2570 if (force_on) 2571 panic("tboot: Failed to initialize DMAR table\n"); 2572 goto out_free_dmar; 2573 } 2574 2575 if (dmar_dev_scope_init() < 0) { 2576 if (force_on) 2577 panic("tboot: Failed to initialize DMAR device scope\n"); 2578 goto out_free_dmar; 2579 } 2580 2581 up_write(&dmar_global_lock); 2582 2583 /* 2584 * The bus notifier takes the dmar_global_lock, so lockdep will 2585 * complain later when we register it under the lock. 2586 */ 2587 dmar_register_bus_notifier(); 2588 2589 down_write(&dmar_global_lock); 2590 2591 if (!no_iommu) 2592 intel_iommu_debugfs_init(); 2593 2594 if (no_iommu || dmar_disabled) { 2595 /* 2596 * We exit the function here to ensure IOMMU's remapping and 2597 * mempool aren't setup, which means that the IOMMU's PMRs 2598 * won't be disabled via the call to init_dmars(). So disable 2599 * it explicitly here. The PMRs were setup by tboot prior to 2600 * calling SENTER, but the kernel is expected to reset/tear 2601 * down the PMRs. 2602 */ 2603 if (intel_iommu_tboot_noforce) { 2604 for_each_iommu(iommu, drhd) 2605 iommu_disable_protect_mem_regions(iommu); 2606 } 2607 2608 /* 2609 * Make sure the IOMMUs are switched off, even when we 2610 * boot into a kexec kernel and the previous kernel left 2611 * them enabled 2612 */ 2613 intel_disable_iommus(); 2614 goto out_free_dmar; 2615 } 2616 2617 if (list_empty(&dmar_rmrr_units)) 2618 pr_info("No RMRR found\n"); 2619 2620 if (list_empty(&dmar_atsr_units)) 2621 pr_info("No ATSR found\n"); 2622 2623 if (list_empty(&dmar_satc_units)) 2624 pr_info("No SATC found\n"); 2625 2626 init_no_remapping_devices(); 2627 2628 ret = init_dmars(); 2629 if (ret) { 2630 if (force_on) 2631 panic("tboot: Failed to initialize DMARs\n"); 2632 pr_err("Initialization failed\n"); 2633 goto out_free_dmar; 2634 } 2635 up_write(&dmar_global_lock); 2636 2637 init_iommu_pm_ops(); 2638 2639 down_read(&dmar_global_lock); 2640 for_each_active_iommu(iommu, drhd) { 2641 /* 2642 * The flush queue implementation does not perform 2643 * page-selective invalidations that are required for efficient 2644 * TLB flushes in virtual environments. The benefit of batching 2645 * is likely to be much lower than the overhead of synchronizing 2646 * the virtual and physical IOMMU page-tables. 2647 */ 2648 if (cap_caching_mode(iommu->cap) && 2649 !first_level_by_default(iommu)) { 2650 pr_info_once("IOMMU batching disallowed due to virtualization\n"); 2651 iommu_set_dma_strict(); 2652 } 2653 iommu_device_sysfs_add(&iommu->iommu, NULL, 2654 intel_iommu_groups, 2655 "%s", iommu->name); 2656 /* 2657 * The iommu device probe is protected by the iommu_probe_device_lock. 2658 * Release the dmar_global_lock before entering the device probe path 2659 * to avoid unnecessary lock order splat. 2660 */ 2661 up_read(&dmar_global_lock); 2662 iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL); 2663 down_read(&dmar_global_lock); 2664 2665 iommu_pmu_register(iommu); 2666 } 2667 2668 if (probe_acpi_namespace_devices()) 2669 pr_warn("ACPI name space devices didn't probe correctly\n"); 2670 2671 /* Finally, we enable the DMA remapping hardware. */ 2672 for_each_iommu(iommu, drhd) { 2673 if (!drhd->ignored && !translation_pre_enabled(iommu)) 2674 iommu_enable_translation(iommu); 2675 2676 iommu_disable_protect_mem_regions(iommu); 2677 } 2678 up_read(&dmar_global_lock); 2679 2680 pr_info("Intel(R) Virtualization Technology for Directed I/O\n"); 2681 2682 intel_iommu_enabled = 1; 2683 2684 return 0; 2685 2686 out_free_dmar: 2687 intel_iommu_free_dmars(); 2688 up_write(&dmar_global_lock); 2689 return ret; 2690 } 2691 2692 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque) 2693 { 2694 struct device_domain_info *info = opaque; 2695 2696 domain_context_clear_one(info, PCI_BUS_NUM(alias), alias & 0xff); 2697 return 0; 2698 } 2699 2700 /* 2701 * NB - intel-iommu lacks any sort of reference counting for the users of 2702 * dependent devices. If multiple endpoints have intersecting dependent 2703 * devices, unbinding the driver from any one of them will possibly leave 2704 * the others unable to operate. 2705 */ 2706 static void domain_context_clear(struct device_domain_info *info) 2707 { 2708 if (!dev_is_pci(info->dev)) { 2709 domain_context_clear_one(info, info->bus, info->devfn); 2710 return; 2711 } 2712 2713 pci_for_each_dma_alias(to_pci_dev(info->dev), 2714 &domain_context_clear_one_cb, info); 2715 iommu_disable_pci_ats(info); 2716 } 2717 2718 /* 2719 * Clear the page table pointer in context or pasid table entries so that 2720 * all DMA requests without PASID from the device are blocked. If the page 2721 * table has been set, clean up the data structures. 2722 */ 2723 void device_block_translation(struct device *dev) 2724 { 2725 struct device_domain_info *info = dev_iommu_priv_get(dev); 2726 struct intel_iommu *iommu = info->iommu; 2727 unsigned long flags; 2728 2729 /* Device in DMA blocking state. Noting to do. */ 2730 if (!info->domain_attached) 2731 return; 2732 2733 if (info->domain) 2734 cache_tag_unassign_domain(info->domain, dev, IOMMU_NO_PASID); 2735 2736 if (!dev_is_real_dma_subdevice(dev)) { 2737 if (sm_supported(iommu)) 2738 intel_pasid_tear_down_entry(iommu, dev, 2739 IOMMU_NO_PASID, false); 2740 else 2741 domain_context_clear(info); 2742 } 2743 2744 /* Device now in DMA blocking state. */ 2745 info->domain_attached = false; 2746 2747 if (!info->domain) 2748 return; 2749 2750 spin_lock_irqsave(&info->domain->lock, flags); 2751 list_del(&info->link); 2752 spin_unlock_irqrestore(&info->domain->lock, flags); 2753 2754 domain_detach_iommu(info->domain, iommu); 2755 info->domain = NULL; 2756 } 2757 2758 static int blocking_domain_attach_dev(struct iommu_domain *domain, 2759 struct device *dev, 2760 struct iommu_domain *old) 2761 { 2762 struct device_domain_info *info = dev_iommu_priv_get(dev); 2763 2764 iopf_for_domain_remove(info->domain ? &info->domain->domain : NULL, dev); 2765 device_block_translation(dev); 2766 return 0; 2767 } 2768 2769 static int blocking_domain_set_dev_pasid(struct iommu_domain *domain, 2770 struct device *dev, ioasid_t pasid, 2771 struct iommu_domain *old); 2772 2773 static struct iommu_domain blocking_domain = { 2774 .type = IOMMU_DOMAIN_BLOCKED, 2775 .ops = &(const struct iommu_domain_ops) { 2776 .attach_dev = blocking_domain_attach_dev, 2777 .set_dev_pasid = blocking_domain_set_dev_pasid, 2778 } 2779 }; 2780 2781 static struct dmar_domain *paging_domain_alloc(void) 2782 { 2783 struct dmar_domain *domain; 2784 2785 domain = kzalloc(sizeof(*domain), GFP_KERNEL); 2786 if (!domain) 2787 return ERR_PTR(-ENOMEM); 2788 2789 INIT_LIST_HEAD(&domain->devices); 2790 INIT_LIST_HEAD(&domain->dev_pasids); 2791 INIT_LIST_HEAD(&domain->cache_tags); 2792 spin_lock_init(&domain->lock); 2793 spin_lock_init(&domain->cache_lock); 2794 xa_init(&domain->iommu_array); 2795 INIT_LIST_HEAD(&domain->s1_domains); 2796 spin_lock_init(&domain->s1_lock); 2797 2798 return domain; 2799 } 2800 2801 static unsigned int compute_vasz_lg2_fs(struct intel_iommu *iommu, 2802 unsigned int *top_level) 2803 { 2804 unsigned int mgaw = cap_mgaw(iommu->cap); 2805 2806 /* 2807 * Spec 3.6 First-Stage Translation: 2808 * 2809 * Software must limit addresses to less than the minimum of MGAW 2810 * and the lower canonical address width implied by FSPM (i.e., 2811 * 47-bit when FSPM is 4-level and 56-bit when FSPM is 5-level). 2812 */ 2813 if (mgaw > 48 && cap_fl5lp_support(iommu->cap)) { 2814 *top_level = 4; 2815 return min(57, mgaw); 2816 } 2817 2818 /* Four level is always supported */ 2819 *top_level = 3; 2820 return min(48, mgaw); 2821 } 2822 2823 static struct iommu_domain * 2824 intel_iommu_domain_alloc_first_stage(struct device *dev, 2825 struct intel_iommu *iommu, u32 flags) 2826 { 2827 struct pt_iommu_x86_64_cfg cfg = {}; 2828 struct dmar_domain *dmar_domain; 2829 int ret; 2830 2831 if (flags & ~IOMMU_HWPT_ALLOC_PASID) 2832 return ERR_PTR(-EOPNOTSUPP); 2833 2834 /* Only SL is available in legacy mode */ 2835 if (!sm_supported(iommu) || !ecap_flts(iommu->ecap)) 2836 return ERR_PTR(-EOPNOTSUPP); 2837 2838 dmar_domain = paging_domain_alloc(); 2839 if (IS_ERR(dmar_domain)) 2840 return ERR_CAST(dmar_domain); 2841 2842 cfg.common.hw_max_vasz_lg2 = 2843 compute_vasz_lg2_fs(iommu, &cfg.top_level); 2844 cfg.common.hw_max_oasz_lg2 = 52; 2845 cfg.common.features = BIT(PT_FEAT_SIGN_EXTEND) | 2846 BIT(PT_FEAT_FLUSH_RANGE); 2847 /* First stage always uses scalable mode */ 2848 if (!ecap_smpwc(iommu->ecap)) 2849 cfg.common.features |= BIT(PT_FEAT_DMA_INCOHERENT); 2850 dmar_domain->iommu.iommu_device = dev; 2851 dmar_domain->iommu.nid = dev_to_node(dev); 2852 dmar_domain->domain.ops = &intel_fs_paging_domain_ops; 2853 /* 2854 * iotlb sync for map is only needed for legacy implementations that 2855 * explicitly require flushing internal write buffers to ensure memory 2856 * coherence. 2857 */ 2858 if (rwbf_required(iommu)) 2859 dmar_domain->iotlb_sync_map = true; 2860 2861 ret = pt_iommu_x86_64_init(&dmar_domain->fspt, &cfg, GFP_KERNEL); 2862 if (ret) { 2863 kfree(dmar_domain); 2864 return ERR_PTR(ret); 2865 } 2866 2867 if (!cap_fl1gp_support(iommu->cap)) 2868 dmar_domain->domain.pgsize_bitmap &= ~(u64)SZ_1G; 2869 if (!intel_iommu_superpage) 2870 dmar_domain->domain.pgsize_bitmap = SZ_4K; 2871 2872 return &dmar_domain->domain; 2873 } 2874 2875 static unsigned int compute_vasz_lg2_ss(struct intel_iommu *iommu, 2876 unsigned int *top_level) 2877 { 2878 unsigned int sagaw = cap_sagaw(iommu->cap); 2879 unsigned int mgaw = cap_mgaw(iommu->cap); 2880 2881 /* 2882 * Find the largest table size that both the mgaw and sagaw support. 2883 * This sets the valid range of IOVA and the top starting level. 2884 * Some HW may only support a 4 or 5 level walk but must limit IOVA to 2885 * 3 levels. 2886 */ 2887 if (mgaw > 48 && sagaw >= BIT(3)) { 2888 *top_level = 4; 2889 return min(57, mgaw); 2890 } else if (mgaw > 39 && sagaw >= BIT(2)) { 2891 *top_level = 3 + ffs(sagaw >> 3); 2892 return min(48, mgaw); 2893 } else if (mgaw > 30 && sagaw >= BIT(1)) { 2894 *top_level = 2 + ffs(sagaw >> 2); 2895 return min(39, mgaw); 2896 } 2897 return 0; 2898 } 2899 2900 static const struct iommu_dirty_ops intel_second_stage_dirty_ops = { 2901 IOMMU_PT_DIRTY_OPS(vtdss), 2902 .set_dirty_tracking = intel_iommu_set_dirty_tracking, 2903 }; 2904 2905 static struct iommu_domain * 2906 intel_iommu_domain_alloc_second_stage(struct device *dev, 2907 struct intel_iommu *iommu, u32 flags) 2908 { 2909 struct pt_iommu_vtdss_cfg cfg = {}; 2910 struct dmar_domain *dmar_domain; 2911 unsigned int sslps; 2912 int ret; 2913 2914 if (flags & 2915 (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING | 2916 IOMMU_HWPT_ALLOC_PASID))) 2917 return ERR_PTR(-EOPNOTSUPP); 2918 2919 if (((flags & IOMMU_HWPT_ALLOC_NEST_PARENT) && 2920 !nested_supported(iommu)) || 2921 ((flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) && 2922 !ssads_supported(iommu))) 2923 return ERR_PTR(-EOPNOTSUPP); 2924 2925 /* Legacy mode always supports second stage */ 2926 if (sm_supported(iommu) && !ecap_slts(iommu->ecap)) 2927 return ERR_PTR(-EOPNOTSUPP); 2928 2929 dmar_domain = paging_domain_alloc(); 2930 if (IS_ERR(dmar_domain)) 2931 return ERR_CAST(dmar_domain); 2932 2933 cfg.common.hw_max_vasz_lg2 = compute_vasz_lg2_ss(iommu, &cfg.top_level); 2934 cfg.common.hw_max_oasz_lg2 = 52; 2935 cfg.common.features = BIT(PT_FEAT_FLUSH_RANGE); 2936 2937 /* 2938 * Read-only mapping is disallowed on the domain which serves as the 2939 * parent in a nested configuration, due to HW errata 2940 * (ERRATA_772415_SPR17) 2941 */ 2942 if (flags & IOMMU_HWPT_ALLOC_NEST_PARENT) 2943 cfg.common.features |= BIT(PT_FEAT_VTDSS_FORCE_WRITEABLE); 2944 2945 if (!iommu_paging_structure_coherency(iommu)) 2946 cfg.common.features |= BIT(PT_FEAT_DMA_INCOHERENT); 2947 dmar_domain->iommu.iommu_device = dev; 2948 dmar_domain->iommu.nid = dev_to_node(dev); 2949 dmar_domain->domain.ops = &intel_ss_paging_domain_ops; 2950 dmar_domain->nested_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT; 2951 2952 if (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) 2953 dmar_domain->domain.dirty_ops = &intel_second_stage_dirty_ops; 2954 2955 ret = pt_iommu_vtdss_init(&dmar_domain->sspt, &cfg, GFP_KERNEL); 2956 if (ret) { 2957 kfree(dmar_domain); 2958 return ERR_PTR(ret); 2959 } 2960 2961 /* Adjust the supported page sizes to HW capability */ 2962 sslps = cap_super_page_val(iommu->cap); 2963 if (!(sslps & BIT(0))) 2964 dmar_domain->domain.pgsize_bitmap &= ~(u64)SZ_2M; 2965 if (!(sslps & BIT(1))) 2966 dmar_domain->domain.pgsize_bitmap &= ~(u64)SZ_1G; 2967 if (!intel_iommu_superpage) 2968 dmar_domain->domain.pgsize_bitmap = SZ_4K; 2969 2970 /* 2971 * Besides the internal write buffer flush, the caching mode used for 2972 * legacy nested translation (which utilizes shadowing page tables) 2973 * also requires iotlb sync on map. 2974 */ 2975 if (rwbf_required(iommu) || cap_caching_mode(iommu->cap)) 2976 dmar_domain->iotlb_sync_map = true; 2977 2978 return &dmar_domain->domain; 2979 } 2980 2981 static struct iommu_domain * 2982 intel_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags, 2983 const struct iommu_user_data *user_data) 2984 { 2985 struct device_domain_info *info = dev_iommu_priv_get(dev); 2986 struct intel_iommu *iommu = info->iommu; 2987 struct iommu_domain *domain; 2988 2989 if (user_data) 2990 return ERR_PTR(-EOPNOTSUPP); 2991 2992 /* Prefer first stage if possible by default. */ 2993 domain = intel_iommu_domain_alloc_first_stage(dev, iommu, flags); 2994 if (domain != ERR_PTR(-EOPNOTSUPP)) 2995 return domain; 2996 return intel_iommu_domain_alloc_second_stage(dev, iommu, flags); 2997 } 2998 2999 static void intel_iommu_domain_free(struct iommu_domain *domain) 3000 { 3001 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 3002 3003 if (WARN_ON(dmar_domain->nested_parent && 3004 !list_empty(&dmar_domain->s1_domains))) 3005 return; 3006 3007 if (WARN_ON(!list_empty(&dmar_domain->devices))) 3008 return; 3009 3010 pt_iommu_deinit(&dmar_domain->iommu); 3011 3012 kfree(dmar_domain->qi_batch); 3013 kfree(dmar_domain); 3014 } 3015 3016 static int paging_domain_compatible_first_stage(struct dmar_domain *dmar_domain, 3017 struct intel_iommu *iommu) 3018 { 3019 if (WARN_ON(dmar_domain->domain.dirty_ops || 3020 dmar_domain->nested_parent)) 3021 return -EINVAL; 3022 3023 /* Only SL is available in legacy mode */ 3024 if (!sm_supported(iommu) || !ecap_flts(iommu->ecap)) 3025 return -EINVAL; 3026 3027 if (!ecap_smpwc(iommu->ecap) && 3028 !(dmar_domain->fspt.x86_64_pt.common.features & 3029 BIT(PT_FEAT_DMA_INCOHERENT))) 3030 return -EINVAL; 3031 3032 /* Supports the number of table levels */ 3033 if (!cap_fl5lp_support(iommu->cap) && 3034 dmar_domain->fspt.x86_64_pt.common.max_vasz_lg2 > 48) 3035 return -EINVAL; 3036 3037 /* Same page size support */ 3038 if (!cap_fl1gp_support(iommu->cap) && 3039 (dmar_domain->domain.pgsize_bitmap & SZ_1G)) 3040 return -EINVAL; 3041 3042 /* iotlb sync on map requirement */ 3043 if ((rwbf_required(iommu)) && !dmar_domain->iotlb_sync_map) 3044 return -EINVAL; 3045 3046 return 0; 3047 } 3048 3049 static int 3050 paging_domain_compatible_second_stage(struct dmar_domain *dmar_domain, 3051 struct intel_iommu *iommu) 3052 { 3053 unsigned int vasz_lg2 = dmar_domain->sspt.vtdss_pt.common.max_vasz_lg2; 3054 unsigned int sslps = cap_super_page_val(iommu->cap); 3055 struct pt_iommu_vtdss_hw_info pt_info; 3056 3057 pt_iommu_vtdss_hw_info(&dmar_domain->sspt, &pt_info); 3058 3059 if (dmar_domain->domain.dirty_ops && !ssads_supported(iommu)) 3060 return -EINVAL; 3061 if (dmar_domain->nested_parent && !nested_supported(iommu)) 3062 return -EINVAL; 3063 3064 /* Legacy mode always supports second stage */ 3065 if (sm_supported(iommu) && !ecap_slts(iommu->ecap)) 3066 return -EINVAL; 3067 3068 if (!iommu_paging_structure_coherency(iommu) && 3069 !(dmar_domain->sspt.vtdss_pt.common.features & 3070 BIT(PT_FEAT_DMA_INCOHERENT))) 3071 return -EINVAL; 3072 3073 /* Address width falls within the capability */ 3074 if (cap_mgaw(iommu->cap) < vasz_lg2) 3075 return -EINVAL; 3076 3077 /* Page table level is supported. */ 3078 if (!(cap_sagaw(iommu->cap) & BIT(pt_info.aw))) 3079 return -EINVAL; 3080 3081 /* Same page size support */ 3082 if (!(sslps & BIT(0)) && (dmar_domain->domain.pgsize_bitmap & SZ_2M)) 3083 return -EINVAL; 3084 if (!(sslps & BIT(1)) && (dmar_domain->domain.pgsize_bitmap & SZ_1G)) 3085 return -EINVAL; 3086 3087 /* iotlb sync on map requirement */ 3088 if ((rwbf_required(iommu) || cap_caching_mode(iommu->cap)) && 3089 !dmar_domain->iotlb_sync_map) 3090 return -EINVAL; 3091 3092 /* 3093 * FIXME this is locked wrong, it needs to be under the 3094 * dmar_domain->lock 3095 */ 3096 if ((dmar_domain->sspt.vtdss_pt.common.features & 3097 BIT(PT_FEAT_VTDSS_FORCE_COHERENCE)) && 3098 !ecap_sc_support(iommu->ecap)) 3099 return -EINVAL; 3100 return 0; 3101 } 3102 3103 int paging_domain_compatible(struct iommu_domain *domain, struct device *dev) 3104 { 3105 struct device_domain_info *info = dev_iommu_priv_get(dev); 3106 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 3107 struct intel_iommu *iommu = info->iommu; 3108 int ret = -EINVAL; 3109 3110 if (intel_domain_is_fs_paging(dmar_domain)) 3111 ret = paging_domain_compatible_first_stage(dmar_domain, iommu); 3112 else if (intel_domain_is_ss_paging(dmar_domain)) 3113 ret = paging_domain_compatible_second_stage(dmar_domain, iommu); 3114 else if (WARN_ON(true)) 3115 ret = -EINVAL; 3116 if (ret) 3117 return ret; 3118 3119 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) && 3120 context_copied(iommu, info->bus, info->devfn)) 3121 return intel_pasid_setup_sm_context(dev); 3122 3123 return 0; 3124 } 3125 3126 static int intel_iommu_attach_device(struct iommu_domain *domain, 3127 struct device *dev, 3128 struct iommu_domain *old) 3129 { 3130 int ret; 3131 3132 device_block_translation(dev); 3133 3134 ret = paging_domain_compatible(domain, dev); 3135 if (ret) 3136 return ret; 3137 3138 ret = iopf_for_domain_set(domain, dev); 3139 if (ret) 3140 return ret; 3141 3142 ret = dmar_domain_attach_device(to_dmar_domain(domain), dev); 3143 if (ret) 3144 iopf_for_domain_remove(domain, dev); 3145 3146 return ret; 3147 } 3148 3149 static void intel_iommu_tlb_sync(struct iommu_domain *domain, 3150 struct iommu_iotlb_gather *gather) 3151 { 3152 cache_tag_flush_range(to_dmar_domain(domain), gather->start, 3153 gather->end, 3154 iommu_pages_list_empty(&gather->freelist)); 3155 iommu_put_pages_list(&gather->freelist); 3156 } 3157 3158 static bool domain_support_force_snooping(struct dmar_domain *domain) 3159 { 3160 struct device_domain_info *info; 3161 bool support = true; 3162 3163 assert_spin_locked(&domain->lock); 3164 list_for_each_entry(info, &domain->devices, link) { 3165 if (!ecap_sc_support(info->iommu->ecap)) { 3166 support = false; 3167 break; 3168 } 3169 } 3170 3171 return support; 3172 } 3173 3174 static bool intel_iommu_enforce_cache_coherency_fs(struct iommu_domain *domain) 3175 { 3176 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 3177 struct device_domain_info *info; 3178 3179 guard(spinlock_irqsave)(&dmar_domain->lock); 3180 3181 if (dmar_domain->force_snooping) 3182 return true; 3183 3184 if (!domain_support_force_snooping(dmar_domain)) 3185 return false; 3186 3187 dmar_domain->force_snooping = true; 3188 list_for_each_entry(info, &dmar_domain->devices, link) 3189 intel_pasid_setup_page_snoop_control(info->iommu, info->dev, 3190 IOMMU_NO_PASID); 3191 return true; 3192 } 3193 3194 static bool intel_iommu_enforce_cache_coherency_ss(struct iommu_domain *domain) 3195 { 3196 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 3197 3198 guard(spinlock_irqsave)(&dmar_domain->lock); 3199 if (!domain_support_force_snooping(dmar_domain)) 3200 return false; 3201 3202 /* 3203 * Second level page table supports per-PTE snoop control. The 3204 * iommu_map() interface will handle this by setting SNP bit. 3205 */ 3206 dmar_domain->sspt.vtdss_pt.common.features |= 3207 BIT(PT_FEAT_VTDSS_FORCE_COHERENCE); 3208 dmar_domain->force_snooping = true; 3209 return true; 3210 } 3211 3212 static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap) 3213 { 3214 struct device_domain_info *info = dev_iommu_priv_get(dev); 3215 3216 switch (cap) { 3217 case IOMMU_CAP_CACHE_COHERENCY: 3218 case IOMMU_CAP_DEFERRED_FLUSH: 3219 return true; 3220 case IOMMU_CAP_PRE_BOOT_PROTECTION: 3221 return dmar_platform_optin(); 3222 case IOMMU_CAP_ENFORCE_CACHE_COHERENCY: 3223 return ecap_sc_support(info->iommu->ecap); 3224 case IOMMU_CAP_DIRTY_TRACKING: 3225 return ssads_supported(info->iommu); 3226 default: 3227 return false; 3228 } 3229 } 3230 3231 static struct iommu_device *intel_iommu_probe_device(struct device *dev) 3232 { 3233 struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL; 3234 struct device_domain_info *info; 3235 struct intel_iommu *iommu; 3236 u8 bus, devfn; 3237 int ret; 3238 3239 iommu = device_lookup_iommu(dev, &bus, &devfn); 3240 if (!iommu || !iommu->iommu.ops) 3241 return ERR_PTR(-ENODEV); 3242 3243 info = kzalloc(sizeof(*info), GFP_KERNEL); 3244 if (!info) 3245 return ERR_PTR(-ENOMEM); 3246 3247 if (dev_is_real_dma_subdevice(dev)) { 3248 info->bus = pdev->bus->number; 3249 info->devfn = pdev->devfn; 3250 info->segment = pci_domain_nr(pdev->bus); 3251 } else { 3252 info->bus = bus; 3253 info->devfn = devfn; 3254 info->segment = iommu->segment; 3255 } 3256 3257 info->dev = dev; 3258 info->iommu = iommu; 3259 if (dev_is_pci(dev)) { 3260 if (ecap_dev_iotlb_support(iommu->ecap) && 3261 pci_ats_supported(pdev) && 3262 dmar_ats_supported(pdev, iommu)) { 3263 info->ats_supported = 1; 3264 info->dtlb_extra_inval = dev_needs_extra_dtlb_flush(pdev); 3265 3266 /* 3267 * For IOMMU that supports device IOTLB throttling 3268 * (DIT), we assign PFSID to the invalidation desc 3269 * of a VF such that IOMMU HW can gauge queue depth 3270 * at PF level. If DIT is not set, PFSID will be 3271 * treated as reserved, which should be set to 0. 3272 */ 3273 if (ecap_dit(iommu->ecap)) 3274 info->pfsid = pci_dev_id(pci_physfn(pdev)); 3275 info->ats_qdep = pci_ats_queue_depth(pdev); 3276 } 3277 if (sm_supported(iommu)) { 3278 if (pasid_supported(iommu)) { 3279 int features = pci_pasid_features(pdev); 3280 3281 if (features >= 0) 3282 info->pasid_supported = features | 1; 3283 } 3284 3285 if (info->ats_supported && ecap_prs(iommu->ecap) && 3286 ecap_pds(iommu->ecap) && pci_pri_supported(pdev)) 3287 info->pri_supported = 1; 3288 } 3289 } 3290 3291 dev_iommu_priv_set(dev, info); 3292 if (pdev && pci_ats_supported(pdev)) { 3293 pci_prepare_ats(pdev, VTD_PAGE_SHIFT); 3294 ret = device_rbtree_insert(iommu, info); 3295 if (ret) 3296 goto free; 3297 } 3298 3299 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { 3300 ret = intel_pasid_alloc_table(dev); 3301 if (ret) { 3302 dev_err(dev, "PASID table allocation failed\n"); 3303 goto clear_rbtree; 3304 } 3305 3306 if (!context_copied(iommu, info->bus, info->devfn)) { 3307 ret = intel_pasid_setup_sm_context(dev); 3308 if (ret) 3309 goto free_table; 3310 } 3311 } 3312 3313 intel_iommu_debugfs_create_dev(info); 3314 3315 return &iommu->iommu; 3316 free_table: 3317 intel_pasid_free_table(dev); 3318 clear_rbtree: 3319 device_rbtree_remove(info); 3320 free: 3321 kfree(info); 3322 3323 return ERR_PTR(ret); 3324 } 3325 3326 static void intel_iommu_probe_finalize(struct device *dev) 3327 { 3328 struct device_domain_info *info = dev_iommu_priv_get(dev); 3329 struct intel_iommu *iommu = info->iommu; 3330 3331 /* 3332 * The PCIe spec, in its wisdom, declares that the behaviour of the 3333 * device is undefined if you enable PASID support after ATS support. 3334 * So always enable PASID support on devices which have it, even if 3335 * we can't yet know if we're ever going to use it. 3336 */ 3337 if (info->pasid_supported && 3338 !pci_enable_pasid(to_pci_dev(dev), info->pasid_supported & ~1)) 3339 info->pasid_enabled = 1; 3340 3341 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { 3342 iommu_enable_pci_ats(info); 3343 /* Assign a DEVTLB cache tag to the default domain. */ 3344 if (info->ats_enabled && info->domain) { 3345 u16 did = domain_id_iommu(info->domain, iommu); 3346 3347 if (cache_tag_assign(info->domain, did, dev, 3348 IOMMU_NO_PASID, CACHE_TAG_DEVTLB)) 3349 iommu_disable_pci_ats(info); 3350 } 3351 } 3352 iommu_enable_pci_pri(info); 3353 } 3354 3355 static void intel_iommu_release_device(struct device *dev) 3356 { 3357 struct device_domain_info *info = dev_iommu_priv_get(dev); 3358 struct intel_iommu *iommu = info->iommu; 3359 3360 iommu_disable_pci_pri(info); 3361 iommu_disable_pci_ats(info); 3362 3363 if (info->pasid_enabled) { 3364 pci_disable_pasid(to_pci_dev(dev)); 3365 info->pasid_enabled = 0; 3366 } 3367 3368 mutex_lock(&iommu->iopf_lock); 3369 if (dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev))) 3370 device_rbtree_remove(info); 3371 mutex_unlock(&iommu->iopf_lock); 3372 3373 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) && 3374 !context_copied(iommu, info->bus, info->devfn)) 3375 intel_pasid_teardown_sm_context(dev); 3376 3377 intel_pasid_free_table(dev); 3378 intel_iommu_debugfs_remove_dev(info); 3379 kfree(info); 3380 } 3381 3382 static void intel_iommu_get_resv_regions(struct device *device, 3383 struct list_head *head) 3384 { 3385 int prot = DMA_PTE_READ | DMA_PTE_WRITE; 3386 struct iommu_resv_region *reg; 3387 struct dmar_rmrr_unit *rmrr; 3388 struct device *i_dev; 3389 int i; 3390 3391 rcu_read_lock(); 3392 for_each_rmrr_units(rmrr) { 3393 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, 3394 i, i_dev) { 3395 struct iommu_resv_region *resv; 3396 enum iommu_resv_type type; 3397 size_t length; 3398 3399 if (i_dev != device && 3400 !is_downstream_to_pci_bridge(device, i_dev)) 3401 continue; 3402 3403 length = rmrr->end_address - rmrr->base_address + 1; 3404 3405 type = device_rmrr_is_relaxable(device) ? 3406 IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT; 3407 3408 resv = iommu_alloc_resv_region(rmrr->base_address, 3409 length, prot, type, 3410 GFP_ATOMIC); 3411 if (!resv) 3412 break; 3413 3414 list_add_tail(&resv->list, head); 3415 } 3416 } 3417 rcu_read_unlock(); 3418 3419 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA 3420 if (dev_is_pci(device)) { 3421 struct pci_dev *pdev = to_pci_dev(device); 3422 3423 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) { 3424 reg = iommu_alloc_resv_region(0, 1UL << 24, prot, 3425 IOMMU_RESV_DIRECT_RELAXABLE, 3426 GFP_KERNEL); 3427 if (reg) 3428 list_add_tail(®->list, head); 3429 } 3430 } 3431 #endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */ 3432 3433 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START, 3434 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1, 3435 0, IOMMU_RESV_MSI, GFP_KERNEL); 3436 if (!reg) 3437 return; 3438 list_add_tail(®->list, head); 3439 } 3440 3441 static struct iommu_group *intel_iommu_device_group(struct device *dev) 3442 { 3443 if (dev_is_pci(dev)) 3444 return pci_device_group(dev); 3445 return generic_device_group(dev); 3446 } 3447 3448 int intel_iommu_enable_iopf(struct device *dev) 3449 { 3450 struct device_domain_info *info = dev_iommu_priv_get(dev); 3451 struct intel_iommu *iommu = info->iommu; 3452 int ret; 3453 3454 if (!info->pri_enabled) 3455 return -ENODEV; 3456 3457 /* pri_enabled is protected by the group mutex. */ 3458 iommu_group_mutex_assert(dev); 3459 if (info->iopf_refcount) { 3460 info->iopf_refcount++; 3461 return 0; 3462 } 3463 3464 ret = iopf_queue_add_device(iommu->iopf_queue, dev); 3465 if (ret) 3466 return ret; 3467 3468 info->iopf_refcount = 1; 3469 3470 return 0; 3471 } 3472 3473 void intel_iommu_disable_iopf(struct device *dev) 3474 { 3475 struct device_domain_info *info = dev_iommu_priv_get(dev); 3476 struct intel_iommu *iommu = info->iommu; 3477 3478 if (WARN_ON(!info->pri_enabled || !info->iopf_refcount)) 3479 return; 3480 3481 iommu_group_mutex_assert(dev); 3482 if (--info->iopf_refcount) 3483 return; 3484 3485 iopf_queue_remove_device(iommu->iopf_queue, dev); 3486 } 3487 3488 static bool intel_iommu_is_attach_deferred(struct device *dev) 3489 { 3490 struct device_domain_info *info = dev_iommu_priv_get(dev); 3491 3492 return translation_pre_enabled(info->iommu) && !info->domain; 3493 } 3494 3495 /* 3496 * Check that the device does not live on an external facing PCI port that is 3497 * marked as untrusted. Such devices should not be able to apply quirks and 3498 * thus not be able to bypass the IOMMU restrictions. 3499 */ 3500 static bool risky_device(struct pci_dev *pdev) 3501 { 3502 if (pdev->untrusted) { 3503 pci_info(pdev, 3504 "Skipping IOMMU quirk for dev [%04X:%04X] on untrusted PCI link\n", 3505 pdev->vendor, pdev->device); 3506 pci_info(pdev, "Please check with your BIOS/Platform vendor about this\n"); 3507 return true; 3508 } 3509 return false; 3510 } 3511 3512 static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain, 3513 unsigned long iova, size_t size) 3514 { 3515 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 3516 3517 if (dmar_domain->iotlb_sync_map) 3518 cache_tag_flush_range_np(dmar_domain, iova, iova + size - 1); 3519 3520 return 0; 3521 } 3522 3523 void domain_remove_dev_pasid(struct iommu_domain *domain, 3524 struct device *dev, ioasid_t pasid) 3525 { 3526 struct device_domain_info *info = dev_iommu_priv_get(dev); 3527 struct dev_pasid_info *curr, *dev_pasid = NULL; 3528 struct intel_iommu *iommu = info->iommu; 3529 struct dmar_domain *dmar_domain; 3530 unsigned long flags; 3531 3532 if (!domain) 3533 return; 3534 3535 /* Identity domain has no meta data for pasid. */ 3536 if (domain->type == IOMMU_DOMAIN_IDENTITY) 3537 return; 3538 3539 dmar_domain = to_dmar_domain(domain); 3540 spin_lock_irqsave(&dmar_domain->lock, flags); 3541 list_for_each_entry(curr, &dmar_domain->dev_pasids, link_domain) { 3542 if (curr->dev == dev && curr->pasid == pasid) { 3543 list_del(&curr->link_domain); 3544 dev_pasid = curr; 3545 break; 3546 } 3547 } 3548 spin_unlock_irqrestore(&dmar_domain->lock, flags); 3549 3550 cache_tag_unassign_domain(dmar_domain, dev, pasid); 3551 domain_detach_iommu(dmar_domain, iommu); 3552 if (!WARN_ON_ONCE(!dev_pasid)) { 3553 intel_iommu_debugfs_remove_dev_pasid(dev_pasid); 3554 kfree(dev_pasid); 3555 } 3556 } 3557 3558 static int blocking_domain_set_dev_pasid(struct iommu_domain *domain, 3559 struct device *dev, ioasid_t pasid, 3560 struct iommu_domain *old) 3561 { 3562 struct device_domain_info *info = dev_iommu_priv_get(dev); 3563 3564 intel_pasid_tear_down_entry(info->iommu, dev, pasid, false); 3565 iopf_for_domain_remove(old, dev); 3566 domain_remove_dev_pasid(old, dev, pasid); 3567 3568 return 0; 3569 } 3570 3571 struct dev_pasid_info * 3572 domain_add_dev_pasid(struct iommu_domain *domain, 3573 struct device *dev, ioasid_t pasid) 3574 { 3575 struct device_domain_info *info = dev_iommu_priv_get(dev); 3576 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 3577 struct intel_iommu *iommu = info->iommu; 3578 struct dev_pasid_info *dev_pasid; 3579 unsigned long flags; 3580 int ret; 3581 3582 dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL); 3583 if (!dev_pasid) 3584 return ERR_PTR(-ENOMEM); 3585 3586 ret = domain_attach_iommu(dmar_domain, iommu); 3587 if (ret) 3588 goto out_free; 3589 3590 ret = cache_tag_assign_domain(dmar_domain, dev, pasid); 3591 if (ret) 3592 goto out_detach_iommu; 3593 3594 dev_pasid->dev = dev; 3595 dev_pasid->pasid = pasid; 3596 spin_lock_irqsave(&dmar_domain->lock, flags); 3597 list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids); 3598 spin_unlock_irqrestore(&dmar_domain->lock, flags); 3599 3600 return dev_pasid; 3601 out_detach_iommu: 3602 domain_detach_iommu(dmar_domain, iommu); 3603 out_free: 3604 kfree(dev_pasid); 3605 return ERR_PTR(ret); 3606 } 3607 3608 static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, 3609 struct device *dev, ioasid_t pasid, 3610 struct iommu_domain *old) 3611 { 3612 struct device_domain_info *info = dev_iommu_priv_get(dev); 3613 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 3614 struct intel_iommu *iommu = info->iommu; 3615 struct dev_pasid_info *dev_pasid; 3616 int ret; 3617 3618 if (WARN_ON_ONCE(!(domain->type & __IOMMU_DOMAIN_PAGING))) 3619 return -EINVAL; 3620 3621 if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev)) 3622 return -EOPNOTSUPP; 3623 3624 if (domain->dirty_ops) 3625 return -EINVAL; 3626 3627 if (context_copied(iommu, info->bus, info->devfn)) 3628 return -EBUSY; 3629 3630 ret = paging_domain_compatible(domain, dev); 3631 if (ret) 3632 return ret; 3633 3634 dev_pasid = domain_add_dev_pasid(domain, dev, pasid); 3635 if (IS_ERR(dev_pasid)) 3636 return PTR_ERR(dev_pasid); 3637 3638 ret = iopf_for_domain_replace(domain, old, dev); 3639 if (ret) 3640 goto out_remove_dev_pasid; 3641 3642 if (intel_domain_is_fs_paging(dmar_domain)) 3643 ret = domain_setup_first_level(iommu, dmar_domain, 3644 dev, pasid, old); 3645 else if (intel_domain_is_ss_paging(dmar_domain)) 3646 ret = domain_setup_second_level(iommu, dmar_domain, 3647 dev, pasid, old); 3648 else if (WARN_ON(true)) 3649 ret = -EINVAL; 3650 3651 if (ret) 3652 goto out_unwind_iopf; 3653 3654 domain_remove_dev_pasid(old, dev, pasid); 3655 3656 intel_iommu_debugfs_create_dev_pasid(dev_pasid); 3657 3658 return 0; 3659 3660 out_unwind_iopf: 3661 iopf_for_domain_replace(old, domain, dev); 3662 out_remove_dev_pasid: 3663 domain_remove_dev_pasid(domain, dev, pasid); 3664 return ret; 3665 } 3666 3667 static void *intel_iommu_hw_info(struct device *dev, u32 *length, 3668 enum iommu_hw_info_type *type) 3669 { 3670 struct device_domain_info *info = dev_iommu_priv_get(dev); 3671 struct intel_iommu *iommu = info->iommu; 3672 struct iommu_hw_info_vtd *vtd; 3673 3674 if (*type != IOMMU_HW_INFO_TYPE_DEFAULT && 3675 *type != IOMMU_HW_INFO_TYPE_INTEL_VTD) 3676 return ERR_PTR(-EOPNOTSUPP); 3677 3678 vtd = kzalloc(sizeof(*vtd), GFP_KERNEL); 3679 if (!vtd) 3680 return ERR_PTR(-ENOMEM); 3681 3682 vtd->flags = IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17; 3683 vtd->cap_reg = iommu->cap; 3684 vtd->ecap_reg = iommu->ecap; 3685 *length = sizeof(*vtd); 3686 *type = IOMMU_HW_INFO_TYPE_INTEL_VTD; 3687 return vtd; 3688 } 3689 3690 /* 3691 * Set dirty tracking for the device list of a domain. The caller must 3692 * hold the domain->lock when calling it. 3693 */ 3694 static int device_set_dirty_tracking(struct list_head *devices, bool enable) 3695 { 3696 struct device_domain_info *info; 3697 int ret = 0; 3698 3699 list_for_each_entry(info, devices, link) { 3700 ret = intel_pasid_setup_dirty_tracking(info->iommu, info->dev, 3701 IOMMU_NO_PASID, enable); 3702 if (ret) 3703 break; 3704 } 3705 3706 return ret; 3707 } 3708 3709 static int parent_domain_set_dirty_tracking(struct dmar_domain *domain, 3710 bool enable) 3711 { 3712 struct dmar_domain *s1_domain; 3713 unsigned long flags; 3714 int ret; 3715 3716 spin_lock(&domain->s1_lock); 3717 list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) { 3718 spin_lock_irqsave(&s1_domain->lock, flags); 3719 ret = device_set_dirty_tracking(&s1_domain->devices, enable); 3720 spin_unlock_irqrestore(&s1_domain->lock, flags); 3721 if (ret) 3722 goto err_unwind; 3723 } 3724 spin_unlock(&domain->s1_lock); 3725 return 0; 3726 3727 err_unwind: 3728 list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) { 3729 spin_lock_irqsave(&s1_domain->lock, flags); 3730 device_set_dirty_tracking(&s1_domain->devices, 3731 domain->dirty_tracking); 3732 spin_unlock_irqrestore(&s1_domain->lock, flags); 3733 } 3734 spin_unlock(&domain->s1_lock); 3735 return ret; 3736 } 3737 3738 static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain, 3739 bool enable) 3740 { 3741 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 3742 int ret; 3743 3744 spin_lock(&dmar_domain->lock); 3745 if (dmar_domain->dirty_tracking == enable) 3746 goto out_unlock; 3747 3748 ret = device_set_dirty_tracking(&dmar_domain->devices, enable); 3749 if (ret) 3750 goto err_unwind; 3751 3752 if (dmar_domain->nested_parent) { 3753 ret = parent_domain_set_dirty_tracking(dmar_domain, enable); 3754 if (ret) 3755 goto err_unwind; 3756 } 3757 3758 dmar_domain->dirty_tracking = enable; 3759 out_unlock: 3760 spin_unlock(&dmar_domain->lock); 3761 3762 return 0; 3763 3764 err_unwind: 3765 device_set_dirty_tracking(&dmar_domain->devices, 3766 dmar_domain->dirty_tracking); 3767 spin_unlock(&dmar_domain->lock); 3768 return ret; 3769 } 3770 3771 static int context_setup_pass_through(struct device *dev, u8 bus, u8 devfn) 3772 { 3773 struct device_domain_info *info = dev_iommu_priv_get(dev); 3774 struct intel_iommu *iommu = info->iommu; 3775 struct context_entry *context; 3776 3777 spin_lock(&iommu->lock); 3778 context = iommu_context_addr(iommu, bus, devfn, 1); 3779 if (!context) { 3780 spin_unlock(&iommu->lock); 3781 return -ENOMEM; 3782 } 3783 3784 if (context_present(context) && !context_copied(iommu, bus, devfn)) { 3785 spin_unlock(&iommu->lock); 3786 return 0; 3787 } 3788 3789 copied_context_tear_down(iommu, context, bus, devfn); 3790 context_clear_entry(context); 3791 context_set_domain_id(context, FLPT_DEFAULT_DID); 3792 3793 /* 3794 * In pass through mode, AW must be programmed to indicate the largest 3795 * AGAW value supported by hardware. And ASR is ignored by hardware. 3796 */ 3797 context_set_address_width(context, iommu->msagaw); 3798 context_set_translation_type(context, CONTEXT_TT_PASS_THROUGH); 3799 context_set_fault_enable(context); 3800 context_set_present(context); 3801 if (!ecap_coherent(iommu->ecap)) 3802 clflush_cache_range(context, sizeof(*context)); 3803 context_present_cache_flush(iommu, FLPT_DEFAULT_DID, bus, devfn); 3804 spin_unlock(&iommu->lock); 3805 3806 return 0; 3807 } 3808 3809 static int context_setup_pass_through_cb(struct pci_dev *pdev, u16 alias, void *data) 3810 { 3811 struct device *dev = data; 3812 3813 return context_setup_pass_through(dev, PCI_BUS_NUM(alias), alias & 0xff); 3814 } 3815 3816 static int device_setup_pass_through(struct device *dev) 3817 { 3818 struct device_domain_info *info = dev_iommu_priv_get(dev); 3819 3820 if (!dev_is_pci(dev)) 3821 return context_setup_pass_through(dev, info->bus, info->devfn); 3822 3823 return pci_for_each_dma_alias(to_pci_dev(dev), 3824 context_setup_pass_through_cb, dev); 3825 } 3826 3827 static int identity_domain_attach_dev(struct iommu_domain *domain, 3828 struct device *dev, 3829 struct iommu_domain *old) 3830 { 3831 struct device_domain_info *info = dev_iommu_priv_get(dev); 3832 struct intel_iommu *iommu = info->iommu; 3833 int ret; 3834 3835 device_block_translation(dev); 3836 3837 if (dev_is_real_dma_subdevice(dev)) 3838 return 0; 3839 3840 /* 3841 * No PRI support with the global identity domain. No need to enable or 3842 * disable PRI in this path as the iommu has been put in the blocking 3843 * state. 3844 */ 3845 if (sm_supported(iommu)) 3846 ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID); 3847 else 3848 ret = device_setup_pass_through(dev); 3849 3850 if (!ret) 3851 info->domain_attached = true; 3852 3853 return ret; 3854 } 3855 3856 static int identity_domain_set_dev_pasid(struct iommu_domain *domain, 3857 struct device *dev, ioasid_t pasid, 3858 struct iommu_domain *old) 3859 { 3860 struct device_domain_info *info = dev_iommu_priv_get(dev); 3861 struct intel_iommu *iommu = info->iommu; 3862 int ret; 3863 3864 if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev)) 3865 return -EOPNOTSUPP; 3866 3867 ret = iopf_for_domain_replace(domain, old, dev); 3868 if (ret) 3869 return ret; 3870 3871 ret = domain_setup_passthrough(iommu, dev, pasid, old); 3872 if (ret) { 3873 iopf_for_domain_replace(old, domain, dev); 3874 return ret; 3875 } 3876 3877 domain_remove_dev_pasid(old, dev, pasid); 3878 return 0; 3879 } 3880 3881 static struct iommu_domain identity_domain = { 3882 .type = IOMMU_DOMAIN_IDENTITY, 3883 .ops = &(const struct iommu_domain_ops) { 3884 .attach_dev = identity_domain_attach_dev, 3885 .set_dev_pasid = identity_domain_set_dev_pasid, 3886 }, 3887 }; 3888 3889 const struct iommu_domain_ops intel_fs_paging_domain_ops = { 3890 IOMMU_PT_DOMAIN_OPS(x86_64), 3891 .attach_dev = intel_iommu_attach_device, 3892 .set_dev_pasid = intel_iommu_set_dev_pasid, 3893 .iotlb_sync_map = intel_iommu_iotlb_sync_map, 3894 .flush_iotlb_all = intel_flush_iotlb_all, 3895 .iotlb_sync = intel_iommu_tlb_sync, 3896 .free = intel_iommu_domain_free, 3897 .enforce_cache_coherency = intel_iommu_enforce_cache_coherency_fs, 3898 }; 3899 3900 const struct iommu_domain_ops intel_ss_paging_domain_ops = { 3901 IOMMU_PT_DOMAIN_OPS(vtdss), 3902 .attach_dev = intel_iommu_attach_device, 3903 .set_dev_pasid = intel_iommu_set_dev_pasid, 3904 .iotlb_sync_map = intel_iommu_iotlb_sync_map, 3905 .flush_iotlb_all = intel_flush_iotlb_all, 3906 .iotlb_sync = intel_iommu_tlb_sync, 3907 .free = intel_iommu_domain_free, 3908 .enforce_cache_coherency = intel_iommu_enforce_cache_coherency_ss, 3909 }; 3910 3911 const struct iommu_ops intel_iommu_ops = { 3912 .blocked_domain = &blocking_domain, 3913 .release_domain = &blocking_domain, 3914 .identity_domain = &identity_domain, 3915 .capable = intel_iommu_capable, 3916 .hw_info = intel_iommu_hw_info, 3917 .domain_alloc_paging_flags = intel_iommu_domain_alloc_paging_flags, 3918 .domain_alloc_sva = intel_svm_domain_alloc, 3919 .domain_alloc_nested = intel_iommu_domain_alloc_nested, 3920 .probe_device = intel_iommu_probe_device, 3921 .probe_finalize = intel_iommu_probe_finalize, 3922 .release_device = intel_iommu_release_device, 3923 .get_resv_regions = intel_iommu_get_resv_regions, 3924 .device_group = intel_iommu_device_group, 3925 .is_attach_deferred = intel_iommu_is_attach_deferred, 3926 .def_domain_type = device_def_domain_type, 3927 .page_response = intel_iommu_page_response, 3928 }; 3929 3930 static void quirk_iommu_igfx(struct pci_dev *dev) 3931 { 3932 if (risky_device(dev)) 3933 return; 3934 3935 pci_info(dev, "Disabling IOMMU for graphics on this chipset\n"); 3936 disable_igfx_iommu = 1; 3937 } 3938 3939 /* G4x/GM45 integrated gfx dmar support is totally busted. */ 3940 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx); 3941 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx); 3942 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx); 3943 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx); 3944 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx); 3945 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx); 3946 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx); 3947 3948 /* QM57/QS57 integrated gfx malfunctions with dmar */ 3949 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_iommu_igfx); 3950 3951 /* Broadwell igfx malfunctions with dmar */ 3952 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx); 3953 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx); 3954 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx); 3955 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx); 3956 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx); 3957 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx); 3958 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx); 3959 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx); 3960 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx); 3961 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx); 3962 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx); 3963 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx); 3964 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx); 3965 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx); 3966 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx); 3967 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx); 3968 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx); 3969 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx); 3970 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx); 3971 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx); 3972 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx); 3973 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx); 3974 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx); 3975 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx); 3976 3977 static void quirk_iommu_rwbf(struct pci_dev *dev) 3978 { 3979 if (risky_device(dev)) 3980 return; 3981 3982 /* 3983 * Mobile 4 Series Chipset neglects to set RWBF capability, 3984 * but needs it. Same seems to hold for the desktop versions. 3985 */ 3986 pci_info(dev, "Forcing write-buffer flush capability\n"); 3987 rwbf_quirk = 1; 3988 } 3989 3990 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf); 3991 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf); 3992 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf); 3993 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf); 3994 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf); 3995 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf); 3996 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf); 3997 3998 #define GGC 0x52 3999 #define GGC_MEMORY_SIZE_MASK (0xf << 8) 4000 #define GGC_MEMORY_SIZE_NONE (0x0 << 8) 4001 #define GGC_MEMORY_SIZE_1M (0x1 << 8) 4002 #define GGC_MEMORY_SIZE_2M (0x3 << 8) 4003 #define GGC_MEMORY_VT_ENABLED (0x8 << 8) 4004 #define GGC_MEMORY_SIZE_2M_VT (0x9 << 8) 4005 #define GGC_MEMORY_SIZE_3M_VT (0xa << 8) 4006 #define GGC_MEMORY_SIZE_4M_VT (0xb << 8) 4007 4008 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev) 4009 { 4010 unsigned short ggc; 4011 4012 if (risky_device(dev)) 4013 return; 4014 4015 if (pci_read_config_word(dev, GGC, &ggc)) 4016 return; 4017 4018 if (!(ggc & GGC_MEMORY_VT_ENABLED)) { 4019 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n"); 4020 disable_igfx_iommu = 1; 4021 } else if (!disable_igfx_iommu) { 4022 /* we have to ensure the gfx device is idle before we flush */ 4023 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n"); 4024 iommu_set_dma_strict(); 4025 } 4026 } 4027 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt); 4028 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt); 4029 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt); 4030 4031 static void quirk_igfx_skip_te_disable(struct pci_dev *dev) 4032 { 4033 unsigned short ver; 4034 4035 if (!IS_GFX_DEVICE(dev)) 4036 return; 4037 4038 ver = (dev->device >> 8) & 0xff; 4039 if (ver != 0x45 && ver != 0x46 && ver != 0x4c && 4040 ver != 0x4e && ver != 0x8a && ver != 0x98 && 4041 ver != 0x9a && ver != 0xa7 && ver != 0x7d) 4042 return; 4043 4044 if (risky_device(dev)) 4045 return; 4046 4047 pci_info(dev, "Skip IOMMU disabling for graphics\n"); 4048 iommu_skip_te_disable = 1; 4049 } 4050 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_igfx_skip_te_disable); 4051 4052 /* On Tylersburg chipsets, some BIOSes have been known to enable the 4053 ISOCH DMAR unit for the Azalia sound device, but not give it any 4054 TLB entries, which causes it to deadlock. Check for that. We do 4055 this in a function called from init_dmars(), instead of in a PCI 4056 quirk, because we don't want to print the obnoxious "BIOS broken" 4057 message if VT-d is actually disabled. 4058 */ 4059 static void __init check_tylersburg_isoch(void) 4060 { 4061 struct pci_dev *pdev; 4062 uint32_t vtisochctrl; 4063 4064 /* If there's no Azalia in the system anyway, forget it. */ 4065 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL); 4066 if (!pdev) 4067 return; 4068 4069 if (risky_device(pdev)) { 4070 pci_dev_put(pdev); 4071 return; 4072 } 4073 4074 pci_dev_put(pdev); 4075 4076 /* System Management Registers. Might be hidden, in which case 4077 we can't do the sanity check. But that's OK, because the 4078 known-broken BIOSes _don't_ actually hide it, so far. */ 4079 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL); 4080 if (!pdev) 4081 return; 4082 4083 if (risky_device(pdev)) { 4084 pci_dev_put(pdev); 4085 return; 4086 } 4087 4088 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) { 4089 pci_dev_put(pdev); 4090 return; 4091 } 4092 4093 pci_dev_put(pdev); 4094 4095 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */ 4096 if (vtisochctrl & 1) 4097 return; 4098 4099 /* Drop all bits other than the number of TLB entries */ 4100 vtisochctrl &= 0x1c; 4101 4102 /* If we have the recommended number of TLB entries (16), fine. */ 4103 if (vtisochctrl == 0x10) 4104 return; 4105 4106 /* Zero TLB entries? You get to ride the short bus to school. */ 4107 if (!vtisochctrl) { 4108 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n" 4109 "BIOS vendor: %s; Ver: %s; Product Version: %s\n", 4110 dmi_get_system_info(DMI_BIOS_VENDOR), 4111 dmi_get_system_info(DMI_BIOS_VERSION), 4112 dmi_get_system_info(DMI_PRODUCT_VERSION)); 4113 iommu_identity_mapping |= IDENTMAP_AZALIA; 4114 return; 4115 } 4116 4117 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n", 4118 vtisochctrl); 4119 } 4120 4121 /* 4122 * Here we deal with a device TLB defect where device may inadvertently issue ATS 4123 * invalidation completion before posted writes initiated with translated address 4124 * that utilized translations matching the invalidation address range, violating 4125 * the invalidation completion ordering. 4126 * Therefore, any use cases that cannot guarantee DMA is stopped before unmap is 4127 * vulnerable to this defect. In other words, any dTLB invalidation initiated not 4128 * under the control of the trusted/privileged host device driver must use this 4129 * quirk. 4130 * Device TLBs are invalidated under the following six conditions: 4131 * 1. Device driver does DMA API unmap IOVA 4132 * 2. Device driver unbind a PASID from a process, sva_unbind_device() 4133 * 3. PASID is torn down, after PASID cache is flushed. e.g. process 4134 * exit_mmap() due to crash 4135 * 4. Under SVA usage, called by mmu_notifier.invalidate_range() where 4136 * VM has to free pages that were unmapped 4137 * 5. Userspace driver unmaps a DMA buffer 4138 * 6. Cache invalidation in vSVA usage (upcoming) 4139 * 4140 * For #1 and #2, device drivers are responsible for stopping DMA traffic 4141 * before unmap/unbind. For #3, iommu driver gets mmu_notifier to 4142 * invalidate TLB the same way as normal user unmap which will use this quirk. 4143 * The dTLB invalidation after PASID cache flush does not need this quirk. 4144 * 4145 * As a reminder, #6 will *NEED* this quirk as we enable nested translation. 4146 */ 4147 void quirk_extra_dev_tlb_flush(struct device_domain_info *info, 4148 unsigned long address, unsigned long mask, 4149 u32 pasid, u16 qdep) 4150 { 4151 u16 sid; 4152 4153 if (likely(!info->dtlb_extra_inval)) 4154 return; 4155 4156 sid = PCI_DEVID(info->bus, info->devfn); 4157 if (pasid == IOMMU_NO_PASID) { 4158 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, 4159 qdep, address, mask); 4160 } else { 4161 qi_flush_dev_iotlb_pasid(info->iommu, sid, info->pfsid, 4162 pasid, qdep, address, mask); 4163 } 4164 } 4165 4166 #define ecmd_get_status_code(res) (((res) & 0xff) >> 1) 4167 4168 /* 4169 * Function to submit a command to the enhanced command interface. The 4170 * valid enhanced command descriptions are defined in Table 47 of the 4171 * VT-d spec. The VT-d hardware implementation may support some but not 4172 * all commands, which can be determined by checking the Enhanced 4173 * Command Capability Register. 4174 * 4175 * Return values: 4176 * - 0: Command successful without any error; 4177 * - Negative: software error value; 4178 * - Nonzero positive: failure status code defined in Table 48. 4179 */ 4180 int ecmd_submit_sync(struct intel_iommu *iommu, u8 ecmd, u64 oa, u64 ob) 4181 { 4182 unsigned long flags; 4183 u64 res; 4184 int ret; 4185 4186 if (!cap_ecmds(iommu->cap)) 4187 return -ENODEV; 4188 4189 raw_spin_lock_irqsave(&iommu->register_lock, flags); 4190 4191 res = dmar_readq(iommu->reg + DMAR_ECRSP_REG); 4192 if (res & DMA_ECMD_ECRSP_IP) { 4193 ret = -EBUSY; 4194 goto err; 4195 } 4196 4197 /* 4198 * Unconditionally write the operand B, because 4199 * - There is no side effect if an ecmd doesn't require an 4200 * operand B, but we set the register to some value. 4201 * - It's not invoked in any critical path. The extra MMIO 4202 * write doesn't bring any performance concerns. 4203 */ 4204 dmar_writeq(iommu->reg + DMAR_ECEO_REG, ob); 4205 dmar_writeq(iommu->reg + DMAR_ECMD_REG, ecmd | (oa << DMA_ECMD_OA_SHIFT)); 4206 4207 IOMMU_WAIT_OP(iommu, DMAR_ECRSP_REG, dmar_readq, 4208 !(res & DMA_ECMD_ECRSP_IP), res); 4209 4210 if (res & DMA_ECMD_ECRSP_IP) { 4211 ret = -ETIMEDOUT; 4212 goto err; 4213 } 4214 4215 ret = ecmd_get_status_code(res); 4216 err: 4217 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 4218 4219 return ret; 4220 } 4221 4222 MODULE_IMPORT_NS("GENERIC_PT_IOMMU"); 4223