1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright © 2006-2014 Intel Corporation. 4 * 5 * Authors: David Woodhouse <dwmw2@infradead.org>, 6 * Ashok Raj <ashok.raj@intel.com>, 7 * Shaohua Li <shaohua.li@intel.com>, 8 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>, 9 * Fenghua Yu <fenghua.yu@intel.com> 10 * Joerg Roedel <jroedel@suse.de> 11 */ 12 13 #define pr_fmt(fmt) "DMAR: " fmt 14 #define dev_fmt(fmt) pr_fmt(fmt) 15 16 #include <linux/crash_dump.h> 17 #include <linux/dma-direct.h> 18 #include <linux/dmi.h> 19 #include <linux/memory.h> 20 #include <linux/pci.h> 21 #include <linux/pci-ats.h> 22 #include <linux/spinlock.h> 23 #include <linux/syscore_ops.h> 24 #include <linux/tboot.h> 25 #include <uapi/linux/iommufd.h> 26 27 #include "iommu.h" 28 #include "../dma-iommu.h" 29 #include "../irq_remapping.h" 30 #include "../iommu-pages.h" 31 #include "pasid.h" 32 #include "perfmon.h" 33 34 #define ROOT_SIZE VTD_PAGE_SIZE 35 #define CONTEXT_SIZE VTD_PAGE_SIZE 36 37 #define IS_GFX_DEVICE(pdev) pci_is_display(pdev) 38 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB) 39 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) 40 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) 41 42 #define IOAPIC_RANGE_START (0xfee00000) 43 #define IOAPIC_RANGE_END (0xfeefffff) 44 #define IOVA_START_ADDR (0x1000) 45 46 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57 47 48 static void __init check_tylersburg_isoch(void); 49 static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain, 50 bool enable); 51 static int rwbf_quirk; 52 53 #define rwbf_required(iommu) (rwbf_quirk || cap_rwbf((iommu)->cap)) 54 55 /* 56 * set to 1 to panic kernel if can't successfully enable VT-d 57 * (used when kernel is launched w/ TXT) 58 */ 59 static int force_on = 0; 60 static int intel_iommu_tboot_noforce; 61 static int no_platform_optin; 62 63 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry)) 64 65 /* 66 * Take a root_entry and return the Lower Context Table Pointer (LCTP) 67 * if marked present. 68 */ 69 static phys_addr_t root_entry_lctp(struct root_entry *re) 70 { 71 if (!(re->lo & 1)) 72 return 0; 73 74 return re->lo & VTD_PAGE_MASK; 75 } 76 77 /* 78 * Take a root_entry and return the Upper Context Table Pointer (UCTP) 79 * if marked present. 80 */ 81 static phys_addr_t root_entry_uctp(struct root_entry *re) 82 { 83 if (!(re->hi & 1)) 84 return 0; 85 86 return re->hi & VTD_PAGE_MASK; 87 } 88 89 static int device_rid_cmp_key(const void *key, const struct rb_node *node) 90 { 91 struct device_domain_info *info = 92 rb_entry(node, struct device_domain_info, node); 93 const u16 *rid_lhs = key; 94 95 if (*rid_lhs < PCI_DEVID(info->bus, info->devfn)) 96 return -1; 97 98 if (*rid_lhs > PCI_DEVID(info->bus, info->devfn)) 99 return 1; 100 101 return 0; 102 } 103 104 static int device_rid_cmp(struct rb_node *lhs, const struct rb_node *rhs) 105 { 106 struct device_domain_info *info = 107 rb_entry(lhs, struct device_domain_info, node); 108 u16 key = PCI_DEVID(info->bus, info->devfn); 109 110 return device_rid_cmp_key(&key, rhs); 111 } 112 113 /* 114 * Looks up an IOMMU-probed device using its source ID. 115 * 116 * Returns the pointer to the device if there is a match. Otherwise, 117 * returns NULL. 118 * 119 * Note that this helper doesn't guarantee that the device won't be 120 * released by the iommu subsystem after being returned. The caller 121 * should use its own synchronization mechanism to avoid the device 122 * being released during its use if its possibly the case. 123 */ 124 struct device *device_rbtree_find(struct intel_iommu *iommu, u16 rid) 125 { 126 struct device_domain_info *info = NULL; 127 struct rb_node *node; 128 unsigned long flags; 129 130 spin_lock_irqsave(&iommu->device_rbtree_lock, flags); 131 node = rb_find(&rid, &iommu->device_rbtree, device_rid_cmp_key); 132 if (node) 133 info = rb_entry(node, struct device_domain_info, node); 134 spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags); 135 136 return info ? info->dev : NULL; 137 } 138 139 static int device_rbtree_insert(struct intel_iommu *iommu, 140 struct device_domain_info *info) 141 { 142 struct rb_node *curr; 143 unsigned long flags; 144 145 spin_lock_irqsave(&iommu->device_rbtree_lock, flags); 146 curr = rb_find_add(&info->node, &iommu->device_rbtree, device_rid_cmp); 147 spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags); 148 if (WARN_ON(curr)) 149 return -EEXIST; 150 151 return 0; 152 } 153 154 static void device_rbtree_remove(struct device_domain_info *info) 155 { 156 struct intel_iommu *iommu = info->iommu; 157 unsigned long flags; 158 159 spin_lock_irqsave(&iommu->device_rbtree_lock, flags); 160 rb_erase(&info->node, &iommu->device_rbtree); 161 spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags); 162 } 163 164 struct dmar_rmrr_unit { 165 struct list_head list; /* list of rmrr units */ 166 struct acpi_dmar_header *hdr; /* ACPI header */ 167 u64 base_address; /* reserved base address*/ 168 u64 end_address; /* reserved end address */ 169 struct dmar_dev_scope *devices; /* target devices */ 170 int devices_cnt; /* target device count */ 171 }; 172 173 struct dmar_atsr_unit { 174 struct list_head list; /* list of ATSR units */ 175 struct acpi_dmar_header *hdr; /* ACPI header */ 176 struct dmar_dev_scope *devices; /* target devices */ 177 int devices_cnt; /* target device count */ 178 u8 include_all:1; /* include all ports */ 179 }; 180 181 struct dmar_satc_unit { 182 struct list_head list; /* list of SATC units */ 183 struct acpi_dmar_header *hdr; /* ACPI header */ 184 struct dmar_dev_scope *devices; /* target devices */ 185 struct intel_iommu *iommu; /* the corresponding iommu */ 186 int devices_cnt; /* target device count */ 187 u8 atc_required:1; /* ATS is required */ 188 }; 189 190 static LIST_HEAD(dmar_atsr_units); 191 static LIST_HEAD(dmar_rmrr_units); 192 static LIST_HEAD(dmar_satc_units); 193 194 #define for_each_rmrr_units(rmrr) \ 195 list_for_each_entry(rmrr, &dmar_rmrr_units, list) 196 197 static void intel_iommu_domain_free(struct iommu_domain *domain); 198 199 int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON); 200 int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON); 201 202 int intel_iommu_enabled = 0; 203 EXPORT_SYMBOL_GPL(intel_iommu_enabled); 204 205 static int intel_iommu_superpage = 1; 206 static int iommu_identity_mapping; 207 static int iommu_skip_te_disable; 208 static int disable_igfx_iommu; 209 210 #define IDENTMAP_AZALIA 4 211 212 const struct iommu_ops intel_iommu_ops; 213 214 static bool translation_pre_enabled(struct intel_iommu *iommu) 215 { 216 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED); 217 } 218 219 static void clear_translation_pre_enabled(struct intel_iommu *iommu) 220 { 221 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED; 222 } 223 224 static void init_translation_status(struct intel_iommu *iommu) 225 { 226 u32 gsts; 227 228 gsts = readl(iommu->reg + DMAR_GSTS_REG); 229 if (gsts & DMA_GSTS_TES) 230 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED; 231 } 232 233 static int __init intel_iommu_setup(char *str) 234 { 235 if (!str) 236 return -EINVAL; 237 238 while (*str) { 239 if (!strncmp(str, "on", 2)) { 240 dmar_disabled = 0; 241 pr_info("IOMMU enabled\n"); 242 } else if (!strncmp(str, "off", 3)) { 243 dmar_disabled = 1; 244 no_platform_optin = 1; 245 pr_info("IOMMU disabled\n"); 246 } else if (!strncmp(str, "igfx_off", 8)) { 247 disable_igfx_iommu = 1; 248 pr_info("Disable GFX device mapping\n"); 249 } else if (!strncmp(str, "forcedac", 8)) { 250 pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n"); 251 iommu_dma_forcedac = true; 252 } else if (!strncmp(str, "strict", 6)) { 253 pr_warn("intel_iommu=strict deprecated; use iommu.strict=1 instead\n"); 254 iommu_set_dma_strict(); 255 } else if (!strncmp(str, "sp_off", 6)) { 256 pr_info("Disable supported super page\n"); 257 intel_iommu_superpage = 0; 258 } else if (!strncmp(str, "sm_on", 5)) { 259 pr_info("Enable scalable mode if hardware supports\n"); 260 intel_iommu_sm = 1; 261 } else if (!strncmp(str, "sm_off", 6)) { 262 pr_info("Scalable mode is disallowed\n"); 263 intel_iommu_sm = 0; 264 } else if (!strncmp(str, "tboot_noforce", 13)) { 265 pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n"); 266 intel_iommu_tboot_noforce = 1; 267 } else { 268 pr_notice("Unknown option - '%s'\n", str); 269 } 270 271 str += strcspn(str, ","); 272 while (*str == ',') 273 str++; 274 } 275 276 return 1; 277 } 278 __setup("intel_iommu=", intel_iommu_setup); 279 280 /* 281 * Calculate the Supported Adjusted Guest Address Widths of an IOMMU. 282 * Refer to 11.4.2 of the VT-d spec for the encoding of each bit of 283 * the returned SAGAW. 284 */ 285 static unsigned long __iommu_calculate_sagaw(struct intel_iommu *iommu) 286 { 287 unsigned long fl_sagaw, sl_sagaw; 288 289 fl_sagaw = BIT(2) | (cap_fl5lp_support(iommu->cap) ? BIT(3) : 0); 290 sl_sagaw = cap_sagaw(iommu->cap); 291 292 /* Second level only. */ 293 if (!sm_supported(iommu) || !ecap_flts(iommu->ecap)) 294 return sl_sagaw; 295 296 /* First level only. */ 297 if (!ecap_slts(iommu->ecap)) 298 return fl_sagaw; 299 300 return fl_sagaw & sl_sagaw; 301 } 302 303 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw) 304 { 305 unsigned long sagaw; 306 int agaw; 307 308 sagaw = __iommu_calculate_sagaw(iommu); 309 for (agaw = width_to_agaw(max_gaw); agaw >= 0; agaw--) { 310 if (test_bit(agaw, &sagaw)) 311 break; 312 } 313 314 return agaw; 315 } 316 317 /* 318 * Calculate max SAGAW for each iommu. 319 */ 320 int iommu_calculate_max_sagaw(struct intel_iommu *iommu) 321 { 322 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH); 323 } 324 325 /* 326 * calculate agaw for each iommu. 327 * "SAGAW" may be different across iommus, use a default agaw, and 328 * get a supported less agaw for iommus that don't support the default agaw. 329 */ 330 int iommu_calculate_agaw(struct intel_iommu *iommu) 331 { 332 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH); 333 } 334 335 static bool iommu_paging_structure_coherency(struct intel_iommu *iommu) 336 { 337 return sm_supported(iommu) ? 338 ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap); 339 } 340 341 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, 342 u8 devfn, int alloc) 343 { 344 struct root_entry *root = &iommu->root_entry[bus]; 345 struct context_entry *context; 346 u64 *entry; 347 348 /* 349 * Except that the caller requested to allocate a new entry, 350 * returning a copied context entry makes no sense. 351 */ 352 if (!alloc && context_copied(iommu, bus, devfn)) 353 return NULL; 354 355 entry = &root->lo; 356 if (sm_supported(iommu)) { 357 if (devfn >= 0x80) { 358 devfn -= 0x80; 359 entry = &root->hi; 360 } 361 devfn *= 2; 362 } 363 if (*entry & 1) 364 context = phys_to_virt(*entry & VTD_PAGE_MASK); 365 else { 366 unsigned long phy_addr; 367 if (!alloc) 368 return NULL; 369 370 context = iommu_alloc_pages_node_sz(iommu->node, GFP_ATOMIC, 371 SZ_4K); 372 if (!context) 373 return NULL; 374 375 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE); 376 phy_addr = virt_to_phys((void *)context); 377 *entry = phy_addr | 1; 378 __iommu_flush_cache(iommu, entry, sizeof(*entry)); 379 } 380 return &context[devfn]; 381 } 382 383 /** 384 * is_downstream_to_pci_bridge - test if a device belongs to the PCI 385 * sub-hierarchy of a candidate PCI-PCI bridge 386 * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy 387 * @bridge: the candidate PCI-PCI bridge 388 * 389 * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false. 390 */ 391 static bool 392 is_downstream_to_pci_bridge(struct device *dev, struct device *bridge) 393 { 394 struct pci_dev *pdev, *pbridge; 395 396 if (!dev_is_pci(dev) || !dev_is_pci(bridge)) 397 return false; 398 399 pdev = to_pci_dev(dev); 400 pbridge = to_pci_dev(bridge); 401 402 if (pbridge->subordinate && 403 pbridge->subordinate->number <= pdev->bus->number && 404 pbridge->subordinate->busn_res.end >= pdev->bus->number) 405 return true; 406 407 return false; 408 } 409 410 static bool quirk_ioat_snb_local_iommu(struct pci_dev *pdev) 411 { 412 struct dmar_drhd_unit *drhd; 413 u32 vtbar; 414 int rc; 415 416 /* We know that this device on this chipset has its own IOMMU. 417 * If we find it under a different IOMMU, then the BIOS is lying 418 * to us. Hope that the IOMMU for this device is actually 419 * disabled, and it needs no translation... 420 */ 421 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar); 422 if (rc) { 423 /* "can't" happen */ 424 dev_info(&pdev->dev, "failed to run vt-d quirk\n"); 425 return false; 426 } 427 vtbar &= 0xffff0000; 428 429 /* we know that the this iommu should be at offset 0xa000 from vtbar */ 430 drhd = dmar_find_matched_drhd_unit(pdev); 431 if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) { 432 pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"); 433 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); 434 return true; 435 } 436 437 return false; 438 } 439 440 static bool iommu_is_dummy(struct intel_iommu *iommu, struct device *dev) 441 { 442 if (!iommu || iommu->drhd->ignored) 443 return true; 444 445 if (dev_is_pci(dev)) { 446 struct pci_dev *pdev = to_pci_dev(dev); 447 448 if (pdev->vendor == PCI_VENDOR_ID_INTEL && 449 pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SNB && 450 quirk_ioat_snb_local_iommu(pdev)) 451 return true; 452 } 453 454 return false; 455 } 456 457 static struct intel_iommu *device_lookup_iommu(struct device *dev, u8 *bus, u8 *devfn) 458 { 459 struct dmar_drhd_unit *drhd = NULL; 460 struct pci_dev *pdev = NULL; 461 struct intel_iommu *iommu; 462 struct device *tmp; 463 u16 segment = 0; 464 int i; 465 466 if (!dev) 467 return NULL; 468 469 if (dev_is_pci(dev)) { 470 struct pci_dev *pf_pdev; 471 472 pdev = pci_real_dma_dev(to_pci_dev(dev)); 473 474 /* VFs aren't listed in scope tables; we need to look up 475 * the PF instead to find the IOMMU. */ 476 pf_pdev = pci_physfn(pdev); 477 dev = &pf_pdev->dev; 478 segment = pci_domain_nr(pdev->bus); 479 } else if (has_acpi_companion(dev)) 480 dev = &ACPI_COMPANION(dev)->dev; 481 482 rcu_read_lock(); 483 for_each_iommu(iommu, drhd) { 484 if (pdev && segment != drhd->segment) 485 continue; 486 487 for_each_active_dev_scope(drhd->devices, 488 drhd->devices_cnt, i, tmp) { 489 if (tmp == dev) { 490 /* For a VF use its original BDF# not that of the PF 491 * which we used for the IOMMU lookup. Strictly speaking 492 * we could do this for all PCI devices; we only need to 493 * get the BDF# from the scope table for ACPI matches. */ 494 if (pdev && pdev->is_virtfn) 495 goto got_pdev; 496 497 if (bus && devfn) { 498 *bus = drhd->devices[i].bus; 499 *devfn = drhd->devices[i].devfn; 500 } 501 goto out; 502 } 503 504 if (is_downstream_to_pci_bridge(dev, tmp)) 505 goto got_pdev; 506 } 507 508 if (pdev && drhd->include_all) { 509 got_pdev: 510 if (bus && devfn) { 511 *bus = pdev->bus->number; 512 *devfn = pdev->devfn; 513 } 514 goto out; 515 } 516 } 517 iommu = NULL; 518 out: 519 if (iommu_is_dummy(iommu, dev)) 520 iommu = NULL; 521 522 rcu_read_unlock(); 523 524 return iommu; 525 } 526 527 static void free_context_table(struct intel_iommu *iommu) 528 { 529 struct context_entry *context; 530 int i; 531 532 if (!iommu->root_entry) 533 return; 534 535 for (i = 0; i < ROOT_ENTRY_NR; i++) { 536 context = iommu_context_addr(iommu, i, 0, 0); 537 if (context) 538 iommu_free_pages(context); 539 540 if (!sm_supported(iommu)) 541 continue; 542 543 context = iommu_context_addr(iommu, i, 0x80, 0); 544 if (context) 545 iommu_free_pages(context); 546 } 547 548 iommu_free_pages(iommu->root_entry); 549 iommu->root_entry = NULL; 550 } 551 552 #ifdef CONFIG_DMAR_DEBUG 553 static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn, 554 u8 bus, u8 devfn, struct dma_pte *parent, int level) 555 { 556 struct dma_pte *pte; 557 int offset; 558 559 while (1) { 560 offset = pfn_level_offset(pfn, level); 561 pte = &parent[offset]; 562 563 pr_info("pte level: %d, pte value: 0x%016llx\n", level, pte->val); 564 565 if (!dma_pte_present(pte)) { 566 pr_info("page table not present at level %d\n", level - 1); 567 break; 568 } 569 570 if (level == 1 || dma_pte_superpage(pte)) 571 break; 572 573 parent = phys_to_virt(dma_pte_addr(pte)); 574 level--; 575 } 576 } 577 578 void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id, 579 unsigned long long addr, u32 pasid) 580 { 581 struct pasid_dir_entry *dir, *pde; 582 struct pasid_entry *entries, *pte; 583 struct context_entry *ctx_entry; 584 struct root_entry *rt_entry; 585 int i, dir_index, index, level; 586 u8 devfn = source_id & 0xff; 587 u8 bus = source_id >> 8; 588 struct dma_pte *pgtable; 589 590 pr_info("Dump %s table entries for IOVA 0x%llx\n", iommu->name, addr); 591 592 /* root entry dump */ 593 if (!iommu->root_entry) { 594 pr_info("root table is not present\n"); 595 return; 596 } 597 rt_entry = &iommu->root_entry[bus]; 598 599 if (sm_supported(iommu)) 600 pr_info("scalable mode root entry: hi 0x%016llx, low 0x%016llx\n", 601 rt_entry->hi, rt_entry->lo); 602 else 603 pr_info("root entry: 0x%016llx", rt_entry->lo); 604 605 /* context entry dump */ 606 ctx_entry = iommu_context_addr(iommu, bus, devfn, 0); 607 if (!ctx_entry) { 608 pr_info("context table is not present\n"); 609 return; 610 } 611 612 pr_info("context entry: hi 0x%016llx, low 0x%016llx\n", 613 ctx_entry->hi, ctx_entry->lo); 614 615 /* legacy mode does not require PASID entries */ 616 if (!sm_supported(iommu)) { 617 if (!context_present(ctx_entry)) { 618 pr_info("legacy mode page table is not present\n"); 619 return; 620 } 621 level = agaw_to_level(ctx_entry->hi & 7); 622 pgtable = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK); 623 goto pgtable_walk; 624 } 625 626 if (!context_present(ctx_entry)) { 627 pr_info("pasid directory table is not present\n"); 628 return; 629 } 630 631 /* get the pointer to pasid directory entry */ 632 dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK); 633 634 /* For request-without-pasid, get the pasid from context entry */ 635 if (intel_iommu_sm && pasid == IOMMU_PASID_INVALID) 636 pasid = IOMMU_NO_PASID; 637 638 dir_index = pasid >> PASID_PDE_SHIFT; 639 pde = &dir[dir_index]; 640 pr_info("pasid dir entry: 0x%016llx\n", pde->val); 641 642 /* get the pointer to the pasid table entry */ 643 entries = get_pasid_table_from_pde(pde); 644 if (!entries) { 645 pr_info("pasid table is not present\n"); 646 return; 647 } 648 index = pasid & PASID_PTE_MASK; 649 pte = &entries[index]; 650 for (i = 0; i < ARRAY_SIZE(pte->val); i++) 651 pr_info("pasid table entry[%d]: 0x%016llx\n", i, pte->val[i]); 652 653 if (!pasid_pte_is_present(pte)) { 654 pr_info("scalable mode page table is not present\n"); 655 return; 656 } 657 658 if (pasid_pte_get_pgtt(pte) == PASID_ENTRY_PGTT_FL_ONLY) { 659 level = pte->val[2] & BIT_ULL(2) ? 5 : 4; 660 pgtable = phys_to_virt(pte->val[2] & VTD_PAGE_MASK); 661 } else { 662 level = agaw_to_level((pte->val[0] >> 2) & 0x7); 663 pgtable = phys_to_virt(pte->val[0] & VTD_PAGE_MASK); 664 } 665 666 pgtable_walk: 667 pgtable_walk(iommu, addr >> VTD_PAGE_SHIFT, bus, devfn, pgtable, level); 668 } 669 #endif 670 671 /* iommu handling */ 672 static int iommu_alloc_root_entry(struct intel_iommu *iommu) 673 { 674 struct root_entry *root; 675 676 root = iommu_alloc_pages_node_sz(iommu->node, GFP_ATOMIC, SZ_4K); 677 if (!root) { 678 pr_err("Allocating root entry for %s failed\n", 679 iommu->name); 680 return -ENOMEM; 681 } 682 683 __iommu_flush_cache(iommu, root, ROOT_SIZE); 684 iommu->root_entry = root; 685 686 return 0; 687 } 688 689 static void iommu_set_root_entry(struct intel_iommu *iommu) 690 { 691 u64 addr; 692 u32 sts; 693 unsigned long flag; 694 695 addr = virt_to_phys(iommu->root_entry); 696 if (sm_supported(iommu)) 697 addr |= DMA_RTADDR_SMT; 698 699 raw_spin_lock_irqsave(&iommu->register_lock, flag); 700 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr); 701 702 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG); 703 704 /* Make sure hardware complete it */ 705 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 706 readl, (sts & DMA_GSTS_RTPS), sts); 707 708 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 709 710 /* 711 * Hardware invalidates all DMA remapping hardware translation 712 * caches as part of SRTP flow. 713 */ 714 if (cap_esrtps(iommu->cap)) 715 return; 716 717 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); 718 if (sm_supported(iommu)) 719 qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0); 720 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); 721 } 722 723 void iommu_flush_write_buffer(struct intel_iommu *iommu) 724 { 725 u32 val; 726 unsigned long flag; 727 728 if (!rwbf_quirk && !cap_rwbf(iommu->cap)) 729 return; 730 731 raw_spin_lock_irqsave(&iommu->register_lock, flag); 732 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG); 733 734 /* Make sure hardware complete it */ 735 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 736 readl, (!(val & DMA_GSTS_WBFS)), val); 737 738 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 739 } 740 741 /* return value determine if we need a write buffer flush */ 742 static void __iommu_flush_context(struct intel_iommu *iommu, 743 u16 did, u16 source_id, u8 function_mask, 744 u64 type) 745 { 746 u64 val = 0; 747 unsigned long flag; 748 749 switch (type) { 750 case DMA_CCMD_GLOBAL_INVL: 751 val = DMA_CCMD_GLOBAL_INVL; 752 break; 753 case DMA_CCMD_DOMAIN_INVL: 754 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did); 755 break; 756 case DMA_CCMD_DEVICE_INVL: 757 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did) 758 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask); 759 break; 760 default: 761 pr_warn("%s: Unexpected context-cache invalidation type 0x%llx\n", 762 iommu->name, type); 763 return; 764 } 765 val |= DMA_CCMD_ICC; 766 767 raw_spin_lock_irqsave(&iommu->register_lock, flag); 768 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val); 769 770 /* Make sure hardware complete it */ 771 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG, 772 dmar_readq, (!(val & DMA_CCMD_ICC)), val); 773 774 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 775 } 776 777 void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, 778 unsigned int size_order, u64 type) 779 { 780 int tlb_offset = ecap_iotlb_offset(iommu->ecap); 781 u64 val = 0, val_iva = 0; 782 unsigned long flag; 783 784 switch (type) { 785 case DMA_TLB_GLOBAL_FLUSH: 786 /* global flush doesn't need set IVA_REG */ 787 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT; 788 break; 789 case DMA_TLB_DSI_FLUSH: 790 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); 791 break; 792 case DMA_TLB_PSI_FLUSH: 793 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); 794 /* IH bit is passed in as part of address */ 795 val_iva = size_order | addr; 796 break; 797 default: 798 pr_warn("%s: Unexpected iotlb invalidation type 0x%llx\n", 799 iommu->name, type); 800 return; 801 } 802 803 if (cap_write_drain(iommu->cap)) 804 val |= DMA_TLB_WRITE_DRAIN; 805 806 raw_spin_lock_irqsave(&iommu->register_lock, flag); 807 /* Note: Only uses first TLB reg currently */ 808 if (val_iva) 809 dmar_writeq(iommu->reg + tlb_offset, val_iva); 810 dmar_writeq(iommu->reg + tlb_offset + 8, val); 811 812 /* Make sure hardware complete it */ 813 IOMMU_WAIT_OP(iommu, tlb_offset + 8, 814 dmar_readq, (!(val & DMA_TLB_IVT)), val); 815 816 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 817 818 /* check IOTLB invalidation granularity */ 819 if (DMA_TLB_IAIG(val) == 0) 820 pr_err("Flush IOTLB failed\n"); 821 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type)) 822 pr_debug("TLB flush request %Lx, actual %Lx\n", 823 (unsigned long long)DMA_TLB_IIRG(type), 824 (unsigned long long)DMA_TLB_IAIG(val)); 825 } 826 827 static struct device_domain_info * 828 domain_lookup_dev_info(struct dmar_domain *domain, 829 struct intel_iommu *iommu, u8 bus, u8 devfn) 830 { 831 struct device_domain_info *info; 832 unsigned long flags; 833 834 spin_lock_irqsave(&domain->lock, flags); 835 list_for_each_entry(info, &domain->devices, link) { 836 if (info->iommu == iommu && info->bus == bus && 837 info->devfn == devfn) { 838 spin_unlock_irqrestore(&domain->lock, flags); 839 return info; 840 } 841 } 842 spin_unlock_irqrestore(&domain->lock, flags); 843 844 return NULL; 845 } 846 847 /* 848 * The extra devTLB flush quirk impacts those QAT devices with PCI device 849 * IDs ranging from 0x4940 to 0x4943. It is exempted from risky_device() 850 * check because it applies only to the built-in QAT devices and it doesn't 851 * grant additional privileges. 852 */ 853 #define BUGGY_QAT_DEVID_MASK 0x4940 854 static bool dev_needs_extra_dtlb_flush(struct pci_dev *pdev) 855 { 856 if (pdev->vendor != PCI_VENDOR_ID_INTEL) 857 return false; 858 859 if ((pdev->device & 0xfffc) != BUGGY_QAT_DEVID_MASK) 860 return false; 861 862 return true; 863 } 864 865 static void iommu_enable_pci_ats(struct device_domain_info *info) 866 { 867 struct pci_dev *pdev; 868 869 if (!info->ats_supported) 870 return; 871 872 pdev = to_pci_dev(info->dev); 873 if (!pci_ats_page_aligned(pdev)) 874 return; 875 876 if (!pci_enable_ats(pdev, VTD_PAGE_SHIFT)) 877 info->ats_enabled = 1; 878 } 879 880 static void iommu_disable_pci_ats(struct device_domain_info *info) 881 { 882 if (!info->ats_enabled) 883 return; 884 885 pci_disable_ats(to_pci_dev(info->dev)); 886 info->ats_enabled = 0; 887 } 888 889 static void iommu_enable_pci_pri(struct device_domain_info *info) 890 { 891 struct pci_dev *pdev; 892 893 if (!info->ats_enabled || !info->pri_supported) 894 return; 895 896 pdev = to_pci_dev(info->dev); 897 /* PASID is required in PRG Response Message. */ 898 if (info->pasid_enabled && !pci_prg_resp_pasid_required(pdev)) 899 return; 900 901 if (pci_reset_pri(pdev)) 902 return; 903 904 if (!pci_enable_pri(pdev, PRQ_DEPTH)) 905 info->pri_enabled = 1; 906 } 907 908 static void iommu_disable_pci_pri(struct device_domain_info *info) 909 { 910 if (!info->pri_enabled) 911 return; 912 913 if (WARN_ON(info->iopf_refcount)) 914 iopf_queue_remove_device(info->iommu->iopf_queue, info->dev); 915 916 pci_disable_pri(to_pci_dev(info->dev)); 917 info->pri_enabled = 0; 918 } 919 920 static void intel_flush_iotlb_all(struct iommu_domain *domain) 921 { 922 cache_tag_flush_all(to_dmar_domain(domain)); 923 } 924 925 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) 926 { 927 u32 pmen; 928 unsigned long flags; 929 930 if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap)) 931 return; 932 933 raw_spin_lock_irqsave(&iommu->register_lock, flags); 934 pmen = readl(iommu->reg + DMAR_PMEN_REG); 935 pmen &= ~DMA_PMEN_EPM; 936 writel(pmen, iommu->reg + DMAR_PMEN_REG); 937 938 /* wait for the protected region status bit to clear */ 939 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG, 940 readl, !(pmen & DMA_PMEN_PRS), pmen); 941 942 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 943 } 944 945 static void iommu_enable_translation(struct intel_iommu *iommu) 946 { 947 u32 sts; 948 unsigned long flags; 949 950 raw_spin_lock_irqsave(&iommu->register_lock, flags); 951 iommu->gcmd |= DMA_GCMD_TE; 952 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); 953 954 /* Make sure hardware complete it */ 955 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 956 readl, (sts & DMA_GSTS_TES), sts); 957 958 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 959 } 960 961 static void iommu_disable_translation(struct intel_iommu *iommu) 962 { 963 u32 sts; 964 unsigned long flag; 965 966 if (iommu_skip_te_disable && iommu->drhd->gfx_dedicated && 967 (cap_read_drain(iommu->cap) || cap_write_drain(iommu->cap))) 968 return; 969 970 raw_spin_lock_irqsave(&iommu->register_lock, flag); 971 iommu->gcmd &= ~DMA_GCMD_TE; 972 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); 973 974 /* Make sure hardware complete it */ 975 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 976 readl, (!(sts & DMA_GSTS_TES)), sts); 977 978 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 979 } 980 981 static void disable_dmar_iommu(struct intel_iommu *iommu) 982 { 983 /* 984 * All iommu domains must have been detached from the devices, 985 * hence there should be no domain IDs in use. 986 */ 987 if (WARN_ON(!ida_is_empty(&iommu->domain_ida))) 988 return; 989 990 if (iommu->gcmd & DMA_GCMD_TE) 991 iommu_disable_translation(iommu); 992 } 993 994 static void free_dmar_iommu(struct intel_iommu *iommu) 995 { 996 if (iommu->copied_tables) { 997 bitmap_free(iommu->copied_tables); 998 iommu->copied_tables = NULL; 999 } 1000 1001 /* free context mapping */ 1002 free_context_table(iommu); 1003 1004 if (ecap_prs(iommu->ecap)) 1005 intel_iommu_finish_prq(iommu); 1006 } 1007 1008 /* 1009 * Check and return whether first level is used by default for 1010 * DMA translation. 1011 */ 1012 static bool first_level_by_default(struct intel_iommu *iommu) 1013 { 1014 /* Only SL is available in legacy mode */ 1015 if (!sm_supported(iommu)) 1016 return false; 1017 1018 /* Only level (either FL or SL) is available, just use it */ 1019 if (ecap_flts(iommu->ecap) ^ ecap_slts(iommu->ecap)) 1020 return ecap_flts(iommu->ecap); 1021 1022 return true; 1023 } 1024 1025 int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) 1026 { 1027 struct iommu_domain_info *info, *curr; 1028 int num, ret = -ENOSPC; 1029 1030 if (domain->domain.type == IOMMU_DOMAIN_SVA) 1031 return 0; 1032 1033 info = kzalloc(sizeof(*info), GFP_KERNEL); 1034 if (!info) 1035 return -ENOMEM; 1036 1037 guard(mutex)(&iommu->did_lock); 1038 curr = xa_load(&domain->iommu_array, iommu->seq_id); 1039 if (curr) { 1040 curr->refcnt++; 1041 kfree(info); 1042 return 0; 1043 } 1044 1045 num = ida_alloc_range(&iommu->domain_ida, IDA_START_DID, 1046 cap_ndoms(iommu->cap) - 1, GFP_KERNEL); 1047 if (num < 0) { 1048 pr_err("%s: No free domain ids\n", iommu->name); 1049 goto err_unlock; 1050 } 1051 1052 info->refcnt = 1; 1053 info->did = num; 1054 info->iommu = iommu; 1055 curr = xa_cmpxchg(&domain->iommu_array, iommu->seq_id, 1056 NULL, info, GFP_KERNEL); 1057 if (curr) { 1058 ret = xa_err(curr) ? : -EBUSY; 1059 goto err_clear; 1060 } 1061 1062 return 0; 1063 1064 err_clear: 1065 ida_free(&iommu->domain_ida, info->did); 1066 err_unlock: 1067 kfree(info); 1068 return ret; 1069 } 1070 1071 void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) 1072 { 1073 struct iommu_domain_info *info; 1074 1075 if (domain->domain.type == IOMMU_DOMAIN_SVA) 1076 return; 1077 1078 guard(mutex)(&iommu->did_lock); 1079 info = xa_load(&domain->iommu_array, iommu->seq_id); 1080 if (--info->refcnt == 0) { 1081 ida_free(&iommu->domain_ida, info->did); 1082 xa_erase(&domain->iommu_array, iommu->seq_id); 1083 kfree(info); 1084 } 1085 } 1086 1087 /* 1088 * For kdump cases, old valid entries may be cached due to the 1089 * in-flight DMA and copied pgtable, but there is no unmapping 1090 * behaviour for them, thus we need an explicit cache flush for 1091 * the newly-mapped device. For kdump, at this point, the device 1092 * is supposed to finish reset at its driver probe stage, so no 1093 * in-flight DMA will exist, and we don't need to worry anymore 1094 * hereafter. 1095 */ 1096 static void copied_context_tear_down(struct intel_iommu *iommu, 1097 struct context_entry *context, 1098 u8 bus, u8 devfn) 1099 { 1100 u16 did_old; 1101 1102 if (!context_copied(iommu, bus, devfn)) 1103 return; 1104 1105 assert_spin_locked(&iommu->lock); 1106 1107 did_old = context_domain_id(context); 1108 context_clear_entry(context); 1109 1110 if (did_old < cap_ndoms(iommu->cap)) { 1111 iommu->flush.flush_context(iommu, did_old, 1112 PCI_DEVID(bus, devfn), 1113 DMA_CCMD_MASK_NOBIT, 1114 DMA_CCMD_DEVICE_INVL); 1115 iommu->flush.flush_iotlb(iommu, did_old, 0, 0, 1116 DMA_TLB_DSI_FLUSH); 1117 } 1118 1119 clear_context_copied(iommu, bus, devfn); 1120 } 1121 1122 /* 1123 * It's a non-present to present mapping. If hardware doesn't cache 1124 * non-present entry we only need to flush the write-buffer. If the 1125 * _does_ cache non-present entries, then it does so in the special 1126 * domain #0, which we have to flush: 1127 */ 1128 static void context_present_cache_flush(struct intel_iommu *iommu, u16 did, 1129 u8 bus, u8 devfn) 1130 { 1131 if (cap_caching_mode(iommu->cap)) { 1132 iommu->flush.flush_context(iommu, 0, 1133 PCI_DEVID(bus, devfn), 1134 DMA_CCMD_MASK_NOBIT, 1135 DMA_CCMD_DEVICE_INVL); 1136 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); 1137 } else { 1138 iommu_flush_write_buffer(iommu); 1139 } 1140 } 1141 1142 static int domain_context_mapping_one(struct dmar_domain *domain, 1143 struct intel_iommu *iommu, 1144 u8 bus, u8 devfn) 1145 { 1146 struct device_domain_info *info = 1147 domain_lookup_dev_info(domain, iommu, bus, devfn); 1148 u16 did = domain_id_iommu(domain, iommu); 1149 int translation = CONTEXT_TT_MULTI_LEVEL; 1150 struct pt_iommu_vtdss_hw_info pt_info; 1151 struct context_entry *context; 1152 int ret; 1153 1154 if (WARN_ON(!intel_domain_is_ss_paging(domain))) 1155 return -EINVAL; 1156 1157 pt_iommu_vtdss_hw_info(&domain->sspt, &pt_info); 1158 1159 pr_debug("Set context mapping for %02x:%02x.%d\n", 1160 bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 1161 1162 spin_lock(&iommu->lock); 1163 ret = -ENOMEM; 1164 context = iommu_context_addr(iommu, bus, devfn, 1); 1165 if (!context) 1166 goto out_unlock; 1167 1168 ret = 0; 1169 if (context_present(context) && !context_copied(iommu, bus, devfn)) 1170 goto out_unlock; 1171 1172 copied_context_tear_down(iommu, context, bus, devfn); 1173 context_clear_entry(context); 1174 context_set_domain_id(context, did); 1175 1176 if (info && info->ats_supported) 1177 translation = CONTEXT_TT_DEV_IOTLB; 1178 else 1179 translation = CONTEXT_TT_MULTI_LEVEL; 1180 1181 context_set_address_root(context, pt_info.ssptptr); 1182 context_set_address_width(context, pt_info.aw); 1183 context_set_translation_type(context, translation); 1184 context_set_fault_enable(context); 1185 context_set_present(context); 1186 if (!ecap_coherent(iommu->ecap)) 1187 clflush_cache_range(context, sizeof(*context)); 1188 context_present_cache_flush(iommu, did, bus, devfn); 1189 ret = 0; 1190 1191 out_unlock: 1192 spin_unlock(&iommu->lock); 1193 1194 return ret; 1195 } 1196 1197 static int domain_context_mapping_cb(struct pci_dev *pdev, 1198 u16 alias, void *opaque) 1199 { 1200 struct device_domain_info *info = dev_iommu_priv_get(&pdev->dev); 1201 struct intel_iommu *iommu = info->iommu; 1202 struct dmar_domain *domain = opaque; 1203 1204 return domain_context_mapping_one(domain, iommu, 1205 PCI_BUS_NUM(alias), alias & 0xff); 1206 } 1207 1208 static int 1209 domain_context_mapping(struct dmar_domain *domain, struct device *dev) 1210 { 1211 struct device_domain_info *info = dev_iommu_priv_get(dev); 1212 struct intel_iommu *iommu = info->iommu; 1213 u8 bus = info->bus, devfn = info->devfn; 1214 int ret; 1215 1216 if (!dev_is_pci(dev)) 1217 return domain_context_mapping_one(domain, iommu, bus, devfn); 1218 1219 ret = pci_for_each_dma_alias(to_pci_dev(dev), 1220 domain_context_mapping_cb, domain); 1221 if (ret) 1222 return ret; 1223 1224 iommu_enable_pci_ats(info); 1225 1226 return 0; 1227 } 1228 1229 static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 devfn) 1230 { 1231 struct intel_iommu *iommu = info->iommu; 1232 struct context_entry *context; 1233 u16 did; 1234 1235 spin_lock(&iommu->lock); 1236 context = iommu_context_addr(iommu, bus, devfn, 0); 1237 if (!context) { 1238 spin_unlock(&iommu->lock); 1239 return; 1240 } 1241 1242 did = context_domain_id(context); 1243 context_clear_present(context); 1244 __iommu_flush_cache(iommu, context, sizeof(*context)); 1245 spin_unlock(&iommu->lock); 1246 intel_context_flush_no_pasid(info, context, did); 1247 context_clear_entry(context); 1248 __iommu_flush_cache(iommu, context, sizeof(*context)); 1249 } 1250 1251 int __domain_setup_first_level(struct intel_iommu *iommu, struct device *dev, 1252 ioasid_t pasid, u16 did, phys_addr_t fsptptr, 1253 int flags, struct iommu_domain *old) 1254 { 1255 if (old) 1256 intel_pasid_tear_down_entry(iommu, dev, pasid, false); 1257 1258 return intel_pasid_setup_first_level(iommu, dev, fsptptr, pasid, did, flags); 1259 } 1260 1261 static int domain_setup_second_level(struct intel_iommu *iommu, 1262 struct dmar_domain *domain, 1263 struct device *dev, ioasid_t pasid, 1264 struct iommu_domain *old) 1265 { 1266 if (old) 1267 intel_pasid_tear_down_entry(iommu, dev, pasid, false); 1268 1269 return intel_pasid_setup_second_level(iommu, domain, dev, pasid); 1270 } 1271 1272 static int domain_setup_passthrough(struct intel_iommu *iommu, 1273 struct device *dev, ioasid_t pasid, 1274 struct iommu_domain *old) 1275 { 1276 if (old) 1277 intel_pasid_tear_down_entry(iommu, dev, pasid, false); 1278 1279 return intel_pasid_setup_pass_through(iommu, dev, pasid); 1280 } 1281 1282 static int domain_setup_first_level(struct intel_iommu *iommu, 1283 struct dmar_domain *domain, 1284 struct device *dev, 1285 u32 pasid, struct iommu_domain *old) 1286 { 1287 struct pt_iommu_x86_64_hw_info pt_info; 1288 unsigned int flags = 0; 1289 1290 pt_iommu_x86_64_hw_info(&domain->fspt, &pt_info); 1291 if (WARN_ON(pt_info.levels != 4 && pt_info.levels != 5)) 1292 return -EINVAL; 1293 1294 if (pt_info.levels == 5) 1295 flags |= PASID_FLAG_FL5LP; 1296 1297 if (domain->force_snooping) 1298 flags |= PASID_FLAG_PAGE_SNOOP; 1299 1300 if (!(domain->fspt.x86_64_pt.common.features & 1301 BIT(PT_FEAT_DMA_INCOHERENT))) 1302 flags |= PASID_FLAG_PWSNP; 1303 1304 return __domain_setup_first_level(iommu, dev, pasid, 1305 domain_id_iommu(domain, iommu), 1306 pt_info.gcr3_pt, flags, old); 1307 } 1308 1309 static int dmar_domain_attach_device(struct dmar_domain *domain, 1310 struct device *dev) 1311 { 1312 struct device_domain_info *info = dev_iommu_priv_get(dev); 1313 struct intel_iommu *iommu = info->iommu; 1314 unsigned long flags; 1315 int ret; 1316 1317 ret = domain_attach_iommu(domain, iommu); 1318 if (ret) 1319 return ret; 1320 1321 info->domain = domain; 1322 info->domain_attached = true; 1323 spin_lock_irqsave(&domain->lock, flags); 1324 list_add(&info->link, &domain->devices); 1325 spin_unlock_irqrestore(&domain->lock, flags); 1326 1327 if (dev_is_real_dma_subdevice(dev)) 1328 return 0; 1329 1330 if (!sm_supported(iommu)) 1331 ret = domain_context_mapping(domain, dev); 1332 else if (intel_domain_is_fs_paging(domain)) 1333 ret = domain_setup_first_level(iommu, domain, dev, 1334 IOMMU_NO_PASID, NULL); 1335 else if (intel_domain_is_ss_paging(domain)) 1336 ret = domain_setup_second_level(iommu, domain, dev, 1337 IOMMU_NO_PASID, NULL); 1338 else if (WARN_ON(true)) 1339 ret = -EINVAL; 1340 1341 if (ret) 1342 goto out_block_translation; 1343 1344 ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID); 1345 if (ret) 1346 goto out_block_translation; 1347 1348 return 0; 1349 1350 out_block_translation: 1351 device_block_translation(dev); 1352 return ret; 1353 } 1354 1355 /** 1356 * device_rmrr_is_relaxable - Test whether the RMRR of this device 1357 * is relaxable (ie. is allowed to be not enforced under some conditions) 1358 * @dev: device handle 1359 * 1360 * We assume that PCI USB devices with RMRRs have them largely 1361 * for historical reasons and that the RMRR space is not actively used post 1362 * boot. This exclusion may change if vendors begin to abuse it. 1363 * 1364 * The same exception is made for graphics devices, with the requirement that 1365 * any use of the RMRR regions will be torn down before assigning the device 1366 * to a guest. 1367 * 1368 * Return: true if the RMRR is relaxable, false otherwise 1369 */ 1370 static bool device_rmrr_is_relaxable(struct device *dev) 1371 { 1372 struct pci_dev *pdev; 1373 1374 if (!dev_is_pci(dev)) 1375 return false; 1376 1377 pdev = to_pci_dev(dev); 1378 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev)) 1379 return true; 1380 else 1381 return false; 1382 } 1383 1384 static int device_def_domain_type(struct device *dev) 1385 { 1386 struct device_domain_info *info = dev_iommu_priv_get(dev); 1387 struct intel_iommu *iommu = info->iommu; 1388 1389 /* 1390 * Hardware does not support the passthrough translation mode. 1391 * Always use a dynamaic mapping domain. 1392 */ 1393 if (!ecap_pass_through(iommu->ecap)) 1394 return IOMMU_DOMAIN_DMA; 1395 1396 if (dev_is_pci(dev)) { 1397 struct pci_dev *pdev = to_pci_dev(dev); 1398 1399 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) 1400 return IOMMU_DOMAIN_IDENTITY; 1401 } 1402 1403 return 0; 1404 } 1405 1406 static void intel_iommu_init_qi(struct intel_iommu *iommu) 1407 { 1408 /* 1409 * Start from the sane iommu hardware state. 1410 * If the queued invalidation is already initialized by us 1411 * (for example, while enabling interrupt-remapping) then 1412 * we got the things already rolling from a sane state. 1413 */ 1414 if (!iommu->qi) { 1415 /* 1416 * Clear any previous faults. 1417 */ 1418 dmar_fault(-1, iommu); 1419 /* 1420 * Disable queued invalidation if supported and already enabled 1421 * before OS handover. 1422 */ 1423 dmar_disable_qi(iommu); 1424 } 1425 1426 if (dmar_enable_qi(iommu)) { 1427 /* 1428 * Queued Invalidate not enabled, use Register Based Invalidate 1429 */ 1430 iommu->flush.flush_context = __iommu_flush_context; 1431 iommu->flush.flush_iotlb = __iommu_flush_iotlb; 1432 pr_info("%s: Using Register based invalidation\n", 1433 iommu->name); 1434 } else { 1435 iommu->flush.flush_context = qi_flush_context; 1436 iommu->flush.flush_iotlb = qi_flush_iotlb; 1437 pr_info("%s: Using Queued invalidation\n", iommu->name); 1438 } 1439 } 1440 1441 static int copy_context_table(struct intel_iommu *iommu, 1442 struct root_entry *old_re, 1443 struct context_entry **tbl, 1444 int bus, bool ext) 1445 { 1446 int tbl_idx, pos = 0, idx, devfn, ret = 0, did; 1447 struct context_entry *new_ce = NULL, ce; 1448 struct context_entry *old_ce = NULL; 1449 struct root_entry re; 1450 phys_addr_t old_ce_phys; 1451 1452 tbl_idx = ext ? bus * 2 : bus; 1453 memcpy(&re, old_re, sizeof(re)); 1454 1455 for (devfn = 0; devfn < 256; devfn++) { 1456 /* First calculate the correct index */ 1457 idx = (ext ? devfn * 2 : devfn) % 256; 1458 1459 if (idx == 0) { 1460 /* First save what we may have and clean up */ 1461 if (new_ce) { 1462 tbl[tbl_idx] = new_ce; 1463 __iommu_flush_cache(iommu, new_ce, 1464 VTD_PAGE_SIZE); 1465 pos = 1; 1466 } 1467 1468 if (old_ce) 1469 memunmap(old_ce); 1470 1471 ret = 0; 1472 if (devfn < 0x80) 1473 old_ce_phys = root_entry_lctp(&re); 1474 else 1475 old_ce_phys = root_entry_uctp(&re); 1476 1477 if (!old_ce_phys) { 1478 if (ext && devfn == 0) { 1479 /* No LCTP, try UCTP */ 1480 devfn = 0x7f; 1481 continue; 1482 } else { 1483 goto out; 1484 } 1485 } 1486 1487 ret = -ENOMEM; 1488 old_ce = memremap(old_ce_phys, PAGE_SIZE, 1489 MEMREMAP_WB); 1490 if (!old_ce) 1491 goto out; 1492 1493 new_ce = iommu_alloc_pages_node_sz(iommu->node, 1494 GFP_KERNEL, SZ_4K); 1495 if (!new_ce) 1496 goto out_unmap; 1497 1498 ret = 0; 1499 } 1500 1501 /* Now copy the context entry */ 1502 memcpy(&ce, old_ce + idx, sizeof(ce)); 1503 1504 if (!context_present(&ce)) 1505 continue; 1506 1507 did = context_domain_id(&ce); 1508 if (did >= 0 && did < cap_ndoms(iommu->cap)) 1509 ida_alloc_range(&iommu->domain_ida, did, did, GFP_KERNEL); 1510 1511 set_context_copied(iommu, bus, devfn); 1512 new_ce[idx] = ce; 1513 } 1514 1515 tbl[tbl_idx + pos] = new_ce; 1516 1517 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE); 1518 1519 out_unmap: 1520 memunmap(old_ce); 1521 1522 out: 1523 return ret; 1524 } 1525 1526 static int copy_translation_tables(struct intel_iommu *iommu) 1527 { 1528 struct context_entry **ctxt_tbls; 1529 struct root_entry *old_rt; 1530 phys_addr_t old_rt_phys; 1531 int ctxt_table_entries; 1532 u64 rtaddr_reg; 1533 int bus, ret; 1534 bool new_ext, ext; 1535 1536 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG); 1537 ext = !!(rtaddr_reg & DMA_RTADDR_SMT); 1538 new_ext = !!sm_supported(iommu); 1539 1540 /* 1541 * The RTT bit can only be changed when translation is disabled, 1542 * but disabling translation means to open a window for data 1543 * corruption. So bail out and don't copy anything if we would 1544 * have to change the bit. 1545 */ 1546 if (new_ext != ext) 1547 return -EINVAL; 1548 1549 iommu->copied_tables = bitmap_zalloc(BIT_ULL(16), GFP_KERNEL); 1550 if (!iommu->copied_tables) 1551 return -ENOMEM; 1552 1553 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK; 1554 if (!old_rt_phys) 1555 return -EINVAL; 1556 1557 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB); 1558 if (!old_rt) 1559 return -ENOMEM; 1560 1561 /* This is too big for the stack - allocate it from slab */ 1562 ctxt_table_entries = ext ? 512 : 256; 1563 ret = -ENOMEM; 1564 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL); 1565 if (!ctxt_tbls) 1566 goto out_unmap; 1567 1568 for (bus = 0; bus < 256; bus++) { 1569 ret = copy_context_table(iommu, &old_rt[bus], 1570 ctxt_tbls, bus, ext); 1571 if (ret) { 1572 pr_err("%s: Failed to copy context table for bus %d\n", 1573 iommu->name, bus); 1574 continue; 1575 } 1576 } 1577 1578 spin_lock(&iommu->lock); 1579 1580 /* Context tables are copied, now write them to the root_entry table */ 1581 for (bus = 0; bus < 256; bus++) { 1582 int idx = ext ? bus * 2 : bus; 1583 u64 val; 1584 1585 if (ctxt_tbls[idx]) { 1586 val = virt_to_phys(ctxt_tbls[idx]) | 1; 1587 iommu->root_entry[bus].lo = val; 1588 } 1589 1590 if (!ext || !ctxt_tbls[idx + 1]) 1591 continue; 1592 1593 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1; 1594 iommu->root_entry[bus].hi = val; 1595 } 1596 1597 spin_unlock(&iommu->lock); 1598 1599 kfree(ctxt_tbls); 1600 1601 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE); 1602 1603 ret = 0; 1604 1605 out_unmap: 1606 memunmap(old_rt); 1607 1608 return ret; 1609 } 1610 1611 static int __init init_dmars(void) 1612 { 1613 struct dmar_drhd_unit *drhd; 1614 struct intel_iommu *iommu; 1615 int ret; 1616 1617 for_each_iommu(iommu, drhd) { 1618 if (drhd->ignored) { 1619 iommu_disable_translation(iommu); 1620 continue; 1621 } 1622 1623 /* 1624 * Find the max pasid size of all IOMMU's in the system. 1625 * We need to ensure the system pasid table is no bigger 1626 * than the smallest supported. 1627 */ 1628 if (pasid_supported(iommu)) { 1629 u32 temp = 2 << ecap_pss(iommu->ecap); 1630 1631 intel_pasid_max_id = min_t(u32, temp, 1632 intel_pasid_max_id); 1633 } 1634 1635 intel_iommu_init_qi(iommu); 1636 init_translation_status(iommu); 1637 1638 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) { 1639 iommu_disable_translation(iommu); 1640 clear_translation_pre_enabled(iommu); 1641 pr_warn("Translation was enabled for %s but we are not in kdump mode\n", 1642 iommu->name); 1643 } 1644 1645 /* 1646 * TBD: 1647 * we could share the same root & context tables 1648 * among all IOMMU's. Need to Split it later. 1649 */ 1650 ret = iommu_alloc_root_entry(iommu); 1651 if (ret) 1652 goto free_iommu; 1653 1654 if (translation_pre_enabled(iommu)) { 1655 pr_info("Translation already enabled - trying to copy translation structures\n"); 1656 1657 ret = copy_translation_tables(iommu); 1658 if (ret) { 1659 /* 1660 * We found the IOMMU with translation 1661 * enabled - but failed to copy over the 1662 * old root-entry table. Try to proceed 1663 * by disabling translation now and 1664 * allocating a clean root-entry table. 1665 * This might cause DMAR faults, but 1666 * probably the dump will still succeed. 1667 */ 1668 pr_err("Failed to copy translation tables from previous kernel for %s\n", 1669 iommu->name); 1670 iommu_disable_translation(iommu); 1671 clear_translation_pre_enabled(iommu); 1672 } else { 1673 pr_info("Copied translation tables from previous kernel for %s\n", 1674 iommu->name); 1675 } 1676 } 1677 1678 intel_svm_check(iommu); 1679 } 1680 1681 /* 1682 * Now that qi is enabled on all iommus, set the root entry and flush 1683 * caches. This is required on some Intel X58 chipsets, otherwise the 1684 * flush_context function will loop forever and the boot hangs. 1685 */ 1686 for_each_active_iommu(iommu, drhd) { 1687 iommu_flush_write_buffer(iommu); 1688 iommu_set_root_entry(iommu); 1689 } 1690 1691 check_tylersburg_isoch(); 1692 1693 /* 1694 * for each drhd 1695 * enable fault log 1696 * global invalidate context cache 1697 * global invalidate iotlb 1698 * enable translation 1699 */ 1700 for_each_iommu(iommu, drhd) { 1701 if (drhd->ignored) { 1702 /* 1703 * we always have to disable PMRs or DMA may fail on 1704 * this device 1705 */ 1706 if (force_on) 1707 iommu_disable_protect_mem_regions(iommu); 1708 continue; 1709 } 1710 1711 iommu_flush_write_buffer(iommu); 1712 1713 if (ecap_prs(iommu->ecap)) { 1714 /* 1715 * Call dmar_alloc_hwirq() with dmar_global_lock held, 1716 * could cause possible lock race condition. 1717 */ 1718 up_write(&dmar_global_lock); 1719 ret = intel_iommu_enable_prq(iommu); 1720 down_write(&dmar_global_lock); 1721 if (ret) 1722 goto free_iommu; 1723 } 1724 1725 ret = dmar_set_interrupt(iommu); 1726 if (ret) 1727 goto free_iommu; 1728 } 1729 1730 return 0; 1731 1732 free_iommu: 1733 for_each_active_iommu(iommu, drhd) { 1734 disable_dmar_iommu(iommu); 1735 free_dmar_iommu(iommu); 1736 } 1737 1738 return ret; 1739 } 1740 1741 static void __init init_no_remapping_devices(void) 1742 { 1743 struct dmar_drhd_unit *drhd; 1744 struct device *dev; 1745 int i; 1746 1747 for_each_drhd_unit(drhd) { 1748 if (!drhd->include_all) { 1749 for_each_active_dev_scope(drhd->devices, 1750 drhd->devices_cnt, i, dev) 1751 break; 1752 /* ignore DMAR unit if no devices exist */ 1753 if (i == drhd->devices_cnt) 1754 drhd->ignored = 1; 1755 } 1756 } 1757 1758 for_each_active_drhd_unit(drhd) { 1759 if (drhd->include_all) 1760 continue; 1761 1762 for_each_active_dev_scope(drhd->devices, 1763 drhd->devices_cnt, i, dev) 1764 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev))) 1765 break; 1766 if (i < drhd->devices_cnt) 1767 continue; 1768 1769 /* This IOMMU has *only* gfx devices. Either bypass it or 1770 set the gfx_mapped flag, as appropriate */ 1771 drhd->gfx_dedicated = 1; 1772 if (disable_igfx_iommu) 1773 drhd->ignored = 1; 1774 } 1775 } 1776 1777 #ifdef CONFIG_SUSPEND 1778 static int init_iommu_hw(void) 1779 { 1780 struct dmar_drhd_unit *drhd; 1781 struct intel_iommu *iommu = NULL; 1782 int ret; 1783 1784 for_each_active_iommu(iommu, drhd) { 1785 if (iommu->qi) { 1786 ret = dmar_reenable_qi(iommu); 1787 if (ret) 1788 return ret; 1789 } 1790 } 1791 1792 for_each_iommu(iommu, drhd) { 1793 if (drhd->ignored) { 1794 /* 1795 * we always have to disable PMRs or DMA may fail on 1796 * this device 1797 */ 1798 if (force_on) 1799 iommu_disable_protect_mem_regions(iommu); 1800 continue; 1801 } 1802 1803 iommu_flush_write_buffer(iommu); 1804 iommu_set_root_entry(iommu); 1805 iommu_enable_translation(iommu); 1806 iommu_disable_protect_mem_regions(iommu); 1807 } 1808 1809 return 0; 1810 } 1811 1812 static void iommu_flush_all(void) 1813 { 1814 struct dmar_drhd_unit *drhd; 1815 struct intel_iommu *iommu; 1816 1817 for_each_active_iommu(iommu, drhd) { 1818 iommu->flush.flush_context(iommu, 0, 0, 0, 1819 DMA_CCMD_GLOBAL_INVL); 1820 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 1821 DMA_TLB_GLOBAL_FLUSH); 1822 } 1823 } 1824 1825 static int iommu_suspend(void *data) 1826 { 1827 struct dmar_drhd_unit *drhd; 1828 struct intel_iommu *iommu = NULL; 1829 unsigned long flag; 1830 1831 iommu_flush_all(); 1832 1833 for_each_active_iommu(iommu, drhd) { 1834 iommu_disable_translation(iommu); 1835 1836 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1837 1838 iommu->iommu_state[SR_DMAR_FECTL_REG] = 1839 readl(iommu->reg + DMAR_FECTL_REG); 1840 iommu->iommu_state[SR_DMAR_FEDATA_REG] = 1841 readl(iommu->reg + DMAR_FEDATA_REG); 1842 iommu->iommu_state[SR_DMAR_FEADDR_REG] = 1843 readl(iommu->reg + DMAR_FEADDR_REG); 1844 iommu->iommu_state[SR_DMAR_FEUADDR_REG] = 1845 readl(iommu->reg + DMAR_FEUADDR_REG); 1846 1847 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1848 } 1849 return 0; 1850 } 1851 1852 static void iommu_resume(void *data) 1853 { 1854 struct dmar_drhd_unit *drhd; 1855 struct intel_iommu *iommu = NULL; 1856 unsigned long flag; 1857 1858 if (init_iommu_hw()) { 1859 if (force_on) 1860 panic("tboot: IOMMU setup failed, DMAR can not resume!\n"); 1861 else 1862 WARN(1, "IOMMU setup failed, DMAR can not resume!\n"); 1863 return; 1864 } 1865 1866 for_each_active_iommu(iommu, drhd) { 1867 1868 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1869 1870 writel(iommu->iommu_state[SR_DMAR_FECTL_REG], 1871 iommu->reg + DMAR_FECTL_REG); 1872 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG], 1873 iommu->reg + DMAR_FEDATA_REG); 1874 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG], 1875 iommu->reg + DMAR_FEADDR_REG); 1876 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG], 1877 iommu->reg + DMAR_FEUADDR_REG); 1878 1879 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1880 } 1881 } 1882 1883 static const struct syscore_ops iommu_syscore_ops = { 1884 .resume = iommu_resume, 1885 .suspend = iommu_suspend, 1886 }; 1887 1888 static struct syscore iommu_syscore = { 1889 .ops = &iommu_syscore_ops, 1890 }; 1891 1892 static void __init init_iommu_pm_ops(void) 1893 { 1894 register_syscore(&iommu_syscore); 1895 } 1896 1897 #else 1898 static inline void init_iommu_pm_ops(void) {} 1899 #endif /* CONFIG_PM */ 1900 1901 static int __init rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr) 1902 { 1903 if (!IS_ALIGNED(rmrr->base_address, PAGE_SIZE) || 1904 !IS_ALIGNED(rmrr->end_address + 1, PAGE_SIZE) || 1905 rmrr->end_address <= rmrr->base_address || 1906 arch_rmrr_sanity_check(rmrr)) 1907 return -EINVAL; 1908 1909 return 0; 1910 } 1911 1912 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg) 1913 { 1914 struct acpi_dmar_reserved_memory *rmrr; 1915 struct dmar_rmrr_unit *rmrru; 1916 1917 rmrr = (struct acpi_dmar_reserved_memory *)header; 1918 if (rmrr_sanity_check(rmrr)) { 1919 pr_warn(FW_BUG 1920 "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n" 1921 "BIOS vendor: %s; Ver: %s; Product Version: %s\n", 1922 rmrr->base_address, rmrr->end_address, 1923 dmi_get_system_info(DMI_BIOS_VENDOR), 1924 dmi_get_system_info(DMI_BIOS_VERSION), 1925 dmi_get_system_info(DMI_PRODUCT_VERSION)); 1926 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); 1927 } 1928 1929 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL); 1930 if (!rmrru) 1931 goto out; 1932 1933 rmrru->hdr = header; 1934 1935 rmrru->base_address = rmrr->base_address; 1936 rmrru->end_address = rmrr->end_address; 1937 1938 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1), 1939 ((void *)rmrr) + rmrr->header.length, 1940 &rmrru->devices_cnt); 1941 if (rmrru->devices_cnt && rmrru->devices == NULL) 1942 goto free_rmrru; 1943 1944 list_add(&rmrru->list, &dmar_rmrr_units); 1945 1946 return 0; 1947 free_rmrru: 1948 kfree(rmrru); 1949 out: 1950 return -ENOMEM; 1951 } 1952 1953 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr) 1954 { 1955 struct dmar_atsr_unit *atsru; 1956 struct acpi_dmar_atsr *tmp; 1957 1958 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list, 1959 dmar_rcu_check()) { 1960 tmp = (struct acpi_dmar_atsr *)atsru->hdr; 1961 if (atsr->segment != tmp->segment) 1962 continue; 1963 if (atsr->header.length != tmp->header.length) 1964 continue; 1965 if (memcmp(atsr, tmp, atsr->header.length) == 0) 1966 return atsru; 1967 } 1968 1969 return NULL; 1970 } 1971 1972 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg) 1973 { 1974 struct acpi_dmar_atsr *atsr; 1975 struct dmar_atsr_unit *atsru; 1976 1977 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled) 1978 return 0; 1979 1980 atsr = container_of(hdr, struct acpi_dmar_atsr, header); 1981 atsru = dmar_find_atsr(atsr); 1982 if (atsru) 1983 return 0; 1984 1985 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL); 1986 if (!atsru) 1987 return -ENOMEM; 1988 1989 /* 1990 * If memory is allocated from slab by ACPI _DSM method, we need to 1991 * copy the memory content because the memory buffer will be freed 1992 * on return. 1993 */ 1994 atsru->hdr = (void *)(atsru + 1); 1995 memcpy(atsru->hdr, hdr, hdr->length); 1996 atsru->include_all = atsr->flags & 0x1; 1997 if (!atsru->include_all) { 1998 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1), 1999 (void *)atsr + atsr->header.length, 2000 &atsru->devices_cnt); 2001 if (atsru->devices_cnt && atsru->devices == NULL) { 2002 kfree(atsru); 2003 return -ENOMEM; 2004 } 2005 } 2006 2007 list_add_rcu(&atsru->list, &dmar_atsr_units); 2008 2009 return 0; 2010 } 2011 2012 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru) 2013 { 2014 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt); 2015 kfree(atsru); 2016 } 2017 2018 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg) 2019 { 2020 struct acpi_dmar_atsr *atsr; 2021 struct dmar_atsr_unit *atsru; 2022 2023 atsr = container_of(hdr, struct acpi_dmar_atsr, header); 2024 atsru = dmar_find_atsr(atsr); 2025 if (atsru) { 2026 list_del_rcu(&atsru->list); 2027 synchronize_rcu(); 2028 intel_iommu_free_atsr(atsru); 2029 } 2030 2031 return 0; 2032 } 2033 2034 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg) 2035 { 2036 int i; 2037 struct device *dev; 2038 struct acpi_dmar_atsr *atsr; 2039 struct dmar_atsr_unit *atsru; 2040 2041 atsr = container_of(hdr, struct acpi_dmar_atsr, header); 2042 atsru = dmar_find_atsr(atsr); 2043 if (!atsru) 2044 return 0; 2045 2046 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) { 2047 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt, 2048 i, dev) 2049 return -EBUSY; 2050 } 2051 2052 return 0; 2053 } 2054 2055 static struct dmar_satc_unit *dmar_find_satc(struct acpi_dmar_satc *satc) 2056 { 2057 struct dmar_satc_unit *satcu; 2058 struct acpi_dmar_satc *tmp; 2059 2060 list_for_each_entry_rcu(satcu, &dmar_satc_units, list, 2061 dmar_rcu_check()) { 2062 tmp = (struct acpi_dmar_satc *)satcu->hdr; 2063 if (satc->segment != tmp->segment) 2064 continue; 2065 if (satc->header.length != tmp->header.length) 2066 continue; 2067 if (memcmp(satc, tmp, satc->header.length) == 0) 2068 return satcu; 2069 } 2070 2071 return NULL; 2072 } 2073 2074 int dmar_parse_one_satc(struct acpi_dmar_header *hdr, void *arg) 2075 { 2076 struct acpi_dmar_satc *satc; 2077 struct dmar_satc_unit *satcu; 2078 2079 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled) 2080 return 0; 2081 2082 satc = container_of(hdr, struct acpi_dmar_satc, header); 2083 satcu = dmar_find_satc(satc); 2084 if (satcu) 2085 return 0; 2086 2087 satcu = kzalloc(sizeof(*satcu) + hdr->length, GFP_KERNEL); 2088 if (!satcu) 2089 return -ENOMEM; 2090 2091 satcu->hdr = (void *)(satcu + 1); 2092 memcpy(satcu->hdr, hdr, hdr->length); 2093 satcu->atc_required = satc->flags & 0x1; 2094 satcu->devices = dmar_alloc_dev_scope((void *)(satc + 1), 2095 (void *)satc + satc->header.length, 2096 &satcu->devices_cnt); 2097 if (satcu->devices_cnt && !satcu->devices) { 2098 kfree(satcu); 2099 return -ENOMEM; 2100 } 2101 list_add_rcu(&satcu->list, &dmar_satc_units); 2102 2103 return 0; 2104 } 2105 2106 static int intel_iommu_add(struct dmar_drhd_unit *dmaru) 2107 { 2108 struct intel_iommu *iommu = dmaru->iommu; 2109 int ret; 2110 2111 /* 2112 * Disable translation if already enabled prior to OS handover. 2113 */ 2114 if (iommu->gcmd & DMA_GCMD_TE) 2115 iommu_disable_translation(iommu); 2116 2117 ret = iommu_alloc_root_entry(iommu); 2118 if (ret) 2119 goto out; 2120 2121 intel_svm_check(iommu); 2122 2123 if (dmaru->ignored) { 2124 /* 2125 * we always have to disable PMRs or DMA may fail on this device 2126 */ 2127 if (force_on) 2128 iommu_disable_protect_mem_regions(iommu); 2129 return 0; 2130 } 2131 2132 intel_iommu_init_qi(iommu); 2133 iommu_flush_write_buffer(iommu); 2134 2135 if (ecap_prs(iommu->ecap)) { 2136 ret = intel_iommu_enable_prq(iommu); 2137 if (ret) 2138 goto disable_iommu; 2139 } 2140 2141 ret = dmar_set_interrupt(iommu); 2142 if (ret) 2143 goto disable_iommu; 2144 2145 iommu_set_root_entry(iommu); 2146 iommu_enable_translation(iommu); 2147 2148 iommu_disable_protect_mem_regions(iommu); 2149 return 0; 2150 2151 disable_iommu: 2152 disable_dmar_iommu(iommu); 2153 out: 2154 free_dmar_iommu(iommu); 2155 return ret; 2156 } 2157 2158 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert) 2159 { 2160 int ret = 0; 2161 struct intel_iommu *iommu = dmaru->iommu; 2162 2163 if (!intel_iommu_enabled) 2164 return 0; 2165 if (iommu == NULL) 2166 return -EINVAL; 2167 2168 if (insert) { 2169 ret = intel_iommu_add(dmaru); 2170 } else { 2171 disable_dmar_iommu(iommu); 2172 free_dmar_iommu(iommu); 2173 } 2174 2175 return ret; 2176 } 2177 2178 static void intel_iommu_free_dmars(void) 2179 { 2180 struct dmar_rmrr_unit *rmrru, *rmrr_n; 2181 struct dmar_atsr_unit *atsru, *atsr_n; 2182 struct dmar_satc_unit *satcu, *satc_n; 2183 2184 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) { 2185 list_del(&rmrru->list); 2186 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt); 2187 kfree(rmrru); 2188 } 2189 2190 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) { 2191 list_del(&atsru->list); 2192 intel_iommu_free_atsr(atsru); 2193 } 2194 list_for_each_entry_safe(satcu, satc_n, &dmar_satc_units, list) { 2195 list_del(&satcu->list); 2196 dmar_free_dev_scope(&satcu->devices, &satcu->devices_cnt); 2197 kfree(satcu); 2198 } 2199 } 2200 2201 static struct dmar_satc_unit *dmar_find_matched_satc_unit(struct pci_dev *dev) 2202 { 2203 struct dmar_satc_unit *satcu; 2204 struct acpi_dmar_satc *satc; 2205 struct device *tmp; 2206 int i; 2207 2208 rcu_read_lock(); 2209 2210 list_for_each_entry_rcu(satcu, &dmar_satc_units, list) { 2211 satc = container_of(satcu->hdr, struct acpi_dmar_satc, header); 2212 if (satc->segment != pci_domain_nr(dev->bus)) 2213 continue; 2214 for_each_dev_scope(satcu->devices, satcu->devices_cnt, i, tmp) 2215 if (to_pci_dev(tmp) == dev) 2216 goto out; 2217 } 2218 satcu = NULL; 2219 out: 2220 rcu_read_unlock(); 2221 return satcu; 2222 } 2223 2224 static bool dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu) 2225 { 2226 struct pci_dev *bridge = NULL; 2227 struct dmar_atsr_unit *atsru; 2228 struct dmar_satc_unit *satcu; 2229 struct acpi_dmar_atsr *atsr; 2230 bool supported = true; 2231 struct pci_bus *bus; 2232 struct device *tmp; 2233 int i; 2234 2235 dev = pci_physfn(dev); 2236 satcu = dmar_find_matched_satc_unit(dev); 2237 if (satcu) 2238 /* 2239 * This device supports ATS as it is in SATC table. 2240 * When IOMMU is in legacy mode, enabling ATS is done 2241 * automatically by HW for the device that requires 2242 * ATS, hence OS should not enable this device ATS 2243 * to avoid duplicated TLB invalidation. 2244 */ 2245 return !(satcu->atc_required && !sm_supported(iommu)); 2246 2247 for (bus = dev->bus; bus; bus = bus->parent) { 2248 bridge = bus->self; 2249 /* If it's an integrated device, allow ATS */ 2250 if (!bridge) 2251 return true; 2252 /* Connected via non-PCIe: no ATS */ 2253 if (!pci_is_pcie(bridge) || 2254 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) 2255 return false; 2256 /* If we found the root port, look it up in the ATSR */ 2257 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) 2258 break; 2259 } 2260 2261 rcu_read_lock(); 2262 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) { 2263 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); 2264 if (atsr->segment != pci_domain_nr(dev->bus)) 2265 continue; 2266 2267 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp) 2268 if (tmp == &bridge->dev) 2269 goto out; 2270 2271 if (atsru->include_all) 2272 goto out; 2273 } 2274 supported = false; 2275 out: 2276 rcu_read_unlock(); 2277 2278 return supported; 2279 } 2280 2281 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) 2282 { 2283 int ret; 2284 struct dmar_rmrr_unit *rmrru; 2285 struct dmar_atsr_unit *atsru; 2286 struct dmar_satc_unit *satcu; 2287 struct acpi_dmar_atsr *atsr; 2288 struct acpi_dmar_reserved_memory *rmrr; 2289 struct acpi_dmar_satc *satc; 2290 2291 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING) 2292 return 0; 2293 2294 list_for_each_entry(rmrru, &dmar_rmrr_units, list) { 2295 rmrr = container_of(rmrru->hdr, 2296 struct acpi_dmar_reserved_memory, header); 2297 if (info->event == BUS_NOTIFY_ADD_DEVICE) { 2298 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1), 2299 ((void *)rmrr) + rmrr->header.length, 2300 rmrr->segment, rmrru->devices, 2301 rmrru->devices_cnt); 2302 if (ret < 0) 2303 return ret; 2304 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) { 2305 dmar_remove_dev_scope(info, rmrr->segment, 2306 rmrru->devices, rmrru->devices_cnt); 2307 } 2308 } 2309 2310 list_for_each_entry(atsru, &dmar_atsr_units, list) { 2311 if (atsru->include_all) 2312 continue; 2313 2314 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); 2315 if (info->event == BUS_NOTIFY_ADD_DEVICE) { 2316 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1), 2317 (void *)atsr + atsr->header.length, 2318 atsr->segment, atsru->devices, 2319 atsru->devices_cnt); 2320 if (ret > 0) 2321 break; 2322 else if (ret < 0) 2323 return ret; 2324 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) { 2325 if (dmar_remove_dev_scope(info, atsr->segment, 2326 atsru->devices, atsru->devices_cnt)) 2327 break; 2328 } 2329 } 2330 list_for_each_entry(satcu, &dmar_satc_units, list) { 2331 satc = container_of(satcu->hdr, struct acpi_dmar_satc, header); 2332 if (info->event == BUS_NOTIFY_ADD_DEVICE) { 2333 ret = dmar_insert_dev_scope(info, (void *)(satc + 1), 2334 (void *)satc + satc->header.length, 2335 satc->segment, satcu->devices, 2336 satcu->devices_cnt); 2337 if (ret > 0) 2338 break; 2339 else if (ret < 0) 2340 return ret; 2341 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) { 2342 if (dmar_remove_dev_scope(info, satc->segment, 2343 satcu->devices, satcu->devices_cnt)) 2344 break; 2345 } 2346 } 2347 2348 return 0; 2349 } 2350 2351 static void intel_disable_iommus(void) 2352 { 2353 struct intel_iommu *iommu = NULL; 2354 struct dmar_drhd_unit *drhd; 2355 2356 for_each_iommu(iommu, drhd) 2357 iommu_disable_translation(iommu); 2358 } 2359 2360 void intel_iommu_shutdown(void) 2361 { 2362 struct dmar_drhd_unit *drhd; 2363 struct intel_iommu *iommu = NULL; 2364 2365 if (no_iommu || dmar_disabled) 2366 return; 2367 2368 /* 2369 * All other CPUs were brought down, hotplug interrupts were disabled, 2370 * no lock and RCU checking needed anymore 2371 */ 2372 list_for_each_entry(drhd, &dmar_drhd_units, list) { 2373 iommu = drhd->iommu; 2374 2375 /* Disable PMRs explicitly here. */ 2376 iommu_disable_protect_mem_regions(iommu); 2377 2378 /* Make sure the IOMMUs are switched off */ 2379 iommu_disable_translation(iommu); 2380 } 2381 } 2382 2383 static struct intel_iommu *dev_to_intel_iommu(struct device *dev) 2384 { 2385 struct iommu_device *iommu_dev = dev_to_iommu_device(dev); 2386 2387 return container_of(iommu_dev, struct intel_iommu, iommu); 2388 } 2389 2390 static ssize_t version_show(struct device *dev, 2391 struct device_attribute *attr, char *buf) 2392 { 2393 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 2394 u32 ver = readl(iommu->reg + DMAR_VER_REG); 2395 return sysfs_emit(buf, "%d:%d\n", 2396 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver)); 2397 } 2398 static DEVICE_ATTR_RO(version); 2399 2400 static ssize_t address_show(struct device *dev, 2401 struct device_attribute *attr, char *buf) 2402 { 2403 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 2404 return sysfs_emit(buf, "%llx\n", iommu->reg_phys); 2405 } 2406 static DEVICE_ATTR_RO(address); 2407 2408 static ssize_t cap_show(struct device *dev, 2409 struct device_attribute *attr, char *buf) 2410 { 2411 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 2412 return sysfs_emit(buf, "%llx\n", iommu->cap); 2413 } 2414 static DEVICE_ATTR_RO(cap); 2415 2416 static ssize_t ecap_show(struct device *dev, 2417 struct device_attribute *attr, char *buf) 2418 { 2419 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 2420 return sysfs_emit(buf, "%llx\n", iommu->ecap); 2421 } 2422 static DEVICE_ATTR_RO(ecap); 2423 2424 static ssize_t domains_supported_show(struct device *dev, 2425 struct device_attribute *attr, char *buf) 2426 { 2427 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 2428 return sysfs_emit(buf, "%ld\n", cap_ndoms(iommu->cap)); 2429 } 2430 static DEVICE_ATTR_RO(domains_supported); 2431 2432 static ssize_t domains_used_show(struct device *dev, 2433 struct device_attribute *attr, char *buf) 2434 { 2435 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 2436 unsigned int count = 0; 2437 int id; 2438 2439 for (id = 0; id < cap_ndoms(iommu->cap); id++) 2440 if (ida_exists(&iommu->domain_ida, id)) 2441 count++; 2442 2443 return sysfs_emit(buf, "%d\n", count); 2444 } 2445 static DEVICE_ATTR_RO(domains_used); 2446 2447 static struct attribute *intel_iommu_attrs[] = { 2448 &dev_attr_version.attr, 2449 &dev_attr_address.attr, 2450 &dev_attr_cap.attr, 2451 &dev_attr_ecap.attr, 2452 &dev_attr_domains_supported.attr, 2453 &dev_attr_domains_used.attr, 2454 NULL, 2455 }; 2456 2457 static struct attribute_group intel_iommu_group = { 2458 .name = "intel-iommu", 2459 .attrs = intel_iommu_attrs, 2460 }; 2461 2462 const struct attribute_group *intel_iommu_groups[] = { 2463 &intel_iommu_group, 2464 NULL, 2465 }; 2466 2467 static bool has_external_pci(void) 2468 { 2469 struct pci_dev *pdev = NULL; 2470 2471 for_each_pci_dev(pdev) 2472 if (pdev->external_facing) { 2473 pci_dev_put(pdev); 2474 return true; 2475 } 2476 2477 return false; 2478 } 2479 2480 static int __init platform_optin_force_iommu(void) 2481 { 2482 if (!dmar_platform_optin() || no_platform_optin || !has_external_pci()) 2483 return 0; 2484 2485 if (no_iommu || dmar_disabled) 2486 pr_info("Intel-IOMMU force enabled due to platform opt in\n"); 2487 2488 /* 2489 * If Intel-IOMMU is disabled by default, we will apply identity 2490 * map for all devices except those marked as being untrusted. 2491 */ 2492 if (dmar_disabled) 2493 iommu_set_default_passthrough(false); 2494 2495 dmar_disabled = 0; 2496 no_iommu = 0; 2497 2498 return 1; 2499 } 2500 2501 static int __init probe_acpi_namespace_devices(void) 2502 { 2503 struct dmar_drhd_unit *drhd; 2504 /* To avoid a -Wunused-but-set-variable warning. */ 2505 struct intel_iommu *iommu __maybe_unused; 2506 struct device *dev; 2507 int i, ret = 0; 2508 2509 for_each_active_iommu(iommu, drhd) { 2510 for_each_active_dev_scope(drhd->devices, 2511 drhd->devices_cnt, i, dev) { 2512 struct acpi_device_physical_node *pn; 2513 struct acpi_device *adev; 2514 2515 if (dev->bus != &acpi_bus_type) 2516 continue; 2517 2518 up_read(&dmar_global_lock); 2519 adev = to_acpi_device(dev); 2520 mutex_lock(&adev->physical_node_lock); 2521 list_for_each_entry(pn, 2522 &adev->physical_node_list, node) { 2523 ret = iommu_probe_device(pn->dev); 2524 if (ret) 2525 break; 2526 } 2527 mutex_unlock(&adev->physical_node_lock); 2528 down_read(&dmar_global_lock); 2529 2530 if (ret) 2531 return ret; 2532 } 2533 } 2534 2535 return 0; 2536 } 2537 2538 static __init int tboot_force_iommu(void) 2539 { 2540 if (!tboot_enabled()) 2541 return 0; 2542 2543 if (no_iommu || dmar_disabled) 2544 pr_warn("Forcing Intel-IOMMU to enabled\n"); 2545 2546 dmar_disabled = 0; 2547 no_iommu = 0; 2548 2549 return 1; 2550 } 2551 2552 int __init intel_iommu_init(void) 2553 { 2554 int ret = -ENODEV; 2555 struct dmar_drhd_unit *drhd; 2556 struct intel_iommu *iommu; 2557 2558 /* 2559 * Intel IOMMU is required for a TXT/tboot launch or platform 2560 * opt in, so enforce that. 2561 */ 2562 force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) || 2563 platform_optin_force_iommu(); 2564 2565 down_write(&dmar_global_lock); 2566 if (dmar_table_init()) { 2567 if (force_on) 2568 panic("tboot: Failed to initialize DMAR table\n"); 2569 goto out_free_dmar; 2570 } 2571 2572 if (dmar_dev_scope_init() < 0) { 2573 if (force_on) 2574 panic("tboot: Failed to initialize DMAR device scope\n"); 2575 goto out_free_dmar; 2576 } 2577 2578 up_write(&dmar_global_lock); 2579 2580 /* 2581 * The bus notifier takes the dmar_global_lock, so lockdep will 2582 * complain later when we register it under the lock. 2583 */ 2584 dmar_register_bus_notifier(); 2585 2586 down_write(&dmar_global_lock); 2587 2588 if (!no_iommu) 2589 intel_iommu_debugfs_init(); 2590 2591 if (no_iommu || dmar_disabled) { 2592 /* 2593 * We exit the function here to ensure IOMMU's remapping and 2594 * mempool aren't setup, which means that the IOMMU's PMRs 2595 * won't be disabled via the call to init_dmars(). So disable 2596 * it explicitly here. The PMRs were setup by tboot prior to 2597 * calling SENTER, but the kernel is expected to reset/tear 2598 * down the PMRs. 2599 */ 2600 if (intel_iommu_tboot_noforce) { 2601 for_each_iommu(iommu, drhd) 2602 iommu_disable_protect_mem_regions(iommu); 2603 } 2604 2605 /* 2606 * Make sure the IOMMUs are switched off, even when we 2607 * boot into a kexec kernel and the previous kernel left 2608 * them enabled 2609 */ 2610 intel_disable_iommus(); 2611 goto out_free_dmar; 2612 } 2613 2614 if (list_empty(&dmar_rmrr_units)) 2615 pr_info("No RMRR found\n"); 2616 2617 if (list_empty(&dmar_atsr_units)) 2618 pr_info("No ATSR found\n"); 2619 2620 if (list_empty(&dmar_satc_units)) 2621 pr_info("No SATC found\n"); 2622 2623 init_no_remapping_devices(); 2624 2625 ret = init_dmars(); 2626 if (ret) { 2627 if (force_on) 2628 panic("tboot: Failed to initialize DMARs\n"); 2629 pr_err("Initialization failed\n"); 2630 goto out_free_dmar; 2631 } 2632 up_write(&dmar_global_lock); 2633 2634 init_iommu_pm_ops(); 2635 2636 down_read(&dmar_global_lock); 2637 for_each_active_iommu(iommu, drhd) { 2638 /* 2639 * The flush queue implementation does not perform 2640 * page-selective invalidations that are required for efficient 2641 * TLB flushes in virtual environments. The benefit of batching 2642 * is likely to be much lower than the overhead of synchronizing 2643 * the virtual and physical IOMMU page-tables. 2644 */ 2645 if (cap_caching_mode(iommu->cap) && 2646 !first_level_by_default(iommu)) { 2647 pr_info_once("IOMMU batching disallowed due to virtualization\n"); 2648 iommu_set_dma_strict(); 2649 } 2650 iommu_device_sysfs_add(&iommu->iommu, NULL, 2651 intel_iommu_groups, 2652 "%s", iommu->name); 2653 /* 2654 * The iommu device probe is protected by the iommu_probe_device_lock. 2655 * Release the dmar_global_lock before entering the device probe path 2656 * to avoid unnecessary lock order splat. 2657 */ 2658 up_read(&dmar_global_lock); 2659 iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL); 2660 down_read(&dmar_global_lock); 2661 2662 iommu_pmu_register(iommu); 2663 } 2664 2665 if (probe_acpi_namespace_devices()) 2666 pr_warn("ACPI name space devices didn't probe correctly\n"); 2667 2668 /* Finally, we enable the DMA remapping hardware. */ 2669 for_each_iommu(iommu, drhd) { 2670 if (!drhd->ignored && !translation_pre_enabled(iommu)) 2671 iommu_enable_translation(iommu); 2672 2673 iommu_disable_protect_mem_regions(iommu); 2674 } 2675 up_read(&dmar_global_lock); 2676 2677 pr_info("Intel(R) Virtualization Technology for Directed I/O\n"); 2678 2679 intel_iommu_enabled = 1; 2680 2681 return 0; 2682 2683 out_free_dmar: 2684 intel_iommu_free_dmars(); 2685 up_write(&dmar_global_lock); 2686 return ret; 2687 } 2688 2689 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque) 2690 { 2691 struct device_domain_info *info = opaque; 2692 2693 domain_context_clear_one(info, PCI_BUS_NUM(alias), alias & 0xff); 2694 return 0; 2695 } 2696 2697 /* 2698 * NB - intel-iommu lacks any sort of reference counting for the users of 2699 * dependent devices. If multiple endpoints have intersecting dependent 2700 * devices, unbinding the driver from any one of them will possibly leave 2701 * the others unable to operate. 2702 */ 2703 static void domain_context_clear(struct device_domain_info *info) 2704 { 2705 if (!dev_is_pci(info->dev)) { 2706 domain_context_clear_one(info, info->bus, info->devfn); 2707 return; 2708 } 2709 2710 pci_for_each_dma_alias(to_pci_dev(info->dev), 2711 &domain_context_clear_one_cb, info); 2712 iommu_disable_pci_ats(info); 2713 } 2714 2715 /* 2716 * Clear the page table pointer in context or pasid table entries so that 2717 * all DMA requests without PASID from the device are blocked. If the page 2718 * table has been set, clean up the data structures. 2719 */ 2720 void device_block_translation(struct device *dev) 2721 { 2722 struct device_domain_info *info = dev_iommu_priv_get(dev); 2723 struct intel_iommu *iommu = info->iommu; 2724 unsigned long flags; 2725 2726 /* Device in DMA blocking state. Noting to do. */ 2727 if (!info->domain_attached) 2728 return; 2729 2730 if (info->domain) 2731 cache_tag_unassign_domain(info->domain, dev, IOMMU_NO_PASID); 2732 2733 if (!dev_is_real_dma_subdevice(dev)) { 2734 if (sm_supported(iommu)) 2735 intel_pasid_tear_down_entry(iommu, dev, 2736 IOMMU_NO_PASID, false); 2737 else 2738 domain_context_clear(info); 2739 } 2740 2741 /* Device now in DMA blocking state. */ 2742 info->domain_attached = false; 2743 2744 if (!info->domain) 2745 return; 2746 2747 spin_lock_irqsave(&info->domain->lock, flags); 2748 list_del(&info->link); 2749 spin_unlock_irqrestore(&info->domain->lock, flags); 2750 2751 domain_detach_iommu(info->domain, iommu); 2752 info->domain = NULL; 2753 } 2754 2755 static int blocking_domain_attach_dev(struct iommu_domain *domain, 2756 struct device *dev, 2757 struct iommu_domain *old) 2758 { 2759 struct device_domain_info *info = dev_iommu_priv_get(dev); 2760 2761 iopf_for_domain_remove(info->domain ? &info->domain->domain : NULL, dev); 2762 device_block_translation(dev); 2763 return 0; 2764 } 2765 2766 static int blocking_domain_set_dev_pasid(struct iommu_domain *domain, 2767 struct device *dev, ioasid_t pasid, 2768 struct iommu_domain *old); 2769 2770 static struct iommu_domain blocking_domain = { 2771 .type = IOMMU_DOMAIN_BLOCKED, 2772 .ops = &(const struct iommu_domain_ops) { 2773 .attach_dev = blocking_domain_attach_dev, 2774 .set_dev_pasid = blocking_domain_set_dev_pasid, 2775 } 2776 }; 2777 2778 static struct dmar_domain *paging_domain_alloc(void) 2779 { 2780 struct dmar_domain *domain; 2781 2782 domain = kzalloc(sizeof(*domain), GFP_KERNEL); 2783 if (!domain) 2784 return ERR_PTR(-ENOMEM); 2785 2786 INIT_LIST_HEAD(&domain->devices); 2787 INIT_LIST_HEAD(&domain->dev_pasids); 2788 INIT_LIST_HEAD(&domain->cache_tags); 2789 spin_lock_init(&domain->lock); 2790 spin_lock_init(&domain->cache_lock); 2791 xa_init(&domain->iommu_array); 2792 INIT_LIST_HEAD(&domain->s1_domains); 2793 spin_lock_init(&domain->s1_lock); 2794 2795 return domain; 2796 } 2797 2798 static unsigned int compute_vasz_lg2_fs(struct intel_iommu *iommu, 2799 unsigned int *top_level) 2800 { 2801 unsigned int mgaw = cap_mgaw(iommu->cap); 2802 2803 /* 2804 * Spec 3.6 First-Stage Translation: 2805 * 2806 * Software must limit addresses to less than the minimum of MGAW 2807 * and the lower canonical address width implied by FSPM (i.e., 2808 * 47-bit when FSPM is 4-level and 56-bit when FSPM is 5-level). 2809 */ 2810 if (mgaw > 48 && cap_fl5lp_support(iommu->cap)) { 2811 *top_level = 4; 2812 return min(57, mgaw); 2813 } 2814 2815 /* Four level is always supported */ 2816 *top_level = 3; 2817 return min(48, mgaw); 2818 } 2819 2820 static struct iommu_domain * 2821 intel_iommu_domain_alloc_first_stage(struct device *dev, 2822 struct intel_iommu *iommu, u32 flags) 2823 { 2824 struct pt_iommu_x86_64_cfg cfg = {}; 2825 struct dmar_domain *dmar_domain; 2826 int ret; 2827 2828 if (flags & ~IOMMU_HWPT_ALLOC_PASID) 2829 return ERR_PTR(-EOPNOTSUPP); 2830 2831 /* Only SL is available in legacy mode */ 2832 if (!sm_supported(iommu) || !ecap_flts(iommu->ecap)) 2833 return ERR_PTR(-EOPNOTSUPP); 2834 2835 dmar_domain = paging_domain_alloc(); 2836 if (IS_ERR(dmar_domain)) 2837 return ERR_CAST(dmar_domain); 2838 2839 cfg.common.hw_max_vasz_lg2 = 2840 compute_vasz_lg2_fs(iommu, &cfg.top_level); 2841 cfg.common.hw_max_oasz_lg2 = 52; 2842 cfg.common.features = BIT(PT_FEAT_SIGN_EXTEND) | 2843 BIT(PT_FEAT_FLUSH_RANGE); 2844 /* First stage always uses scalable mode */ 2845 if (!ecap_smpwc(iommu->ecap)) 2846 cfg.common.features |= BIT(PT_FEAT_DMA_INCOHERENT); 2847 dmar_domain->iommu.iommu_device = dev; 2848 dmar_domain->iommu.nid = dev_to_node(dev); 2849 dmar_domain->domain.ops = &intel_fs_paging_domain_ops; 2850 /* 2851 * iotlb sync for map is only needed for legacy implementations that 2852 * explicitly require flushing internal write buffers to ensure memory 2853 * coherence. 2854 */ 2855 if (rwbf_required(iommu)) 2856 dmar_domain->iotlb_sync_map = true; 2857 2858 ret = pt_iommu_x86_64_init(&dmar_domain->fspt, &cfg, GFP_KERNEL); 2859 if (ret) { 2860 kfree(dmar_domain); 2861 return ERR_PTR(ret); 2862 } 2863 2864 if (!cap_fl1gp_support(iommu->cap)) 2865 dmar_domain->domain.pgsize_bitmap &= ~(u64)SZ_1G; 2866 if (!intel_iommu_superpage) 2867 dmar_domain->domain.pgsize_bitmap = SZ_4K; 2868 2869 return &dmar_domain->domain; 2870 } 2871 2872 static unsigned int compute_vasz_lg2_ss(struct intel_iommu *iommu, 2873 unsigned int *top_level) 2874 { 2875 unsigned int sagaw = cap_sagaw(iommu->cap); 2876 unsigned int mgaw = cap_mgaw(iommu->cap); 2877 2878 /* 2879 * Find the largest table size that both the mgaw and sagaw support. 2880 * This sets the valid range of IOVA and the top starting level. 2881 * Some HW may only support a 4 or 5 level walk but must limit IOVA to 2882 * 3 levels. 2883 */ 2884 if (mgaw > 48 && sagaw >= BIT(3)) { 2885 *top_level = 4; 2886 return min(57, mgaw); 2887 } else if (mgaw > 39 && sagaw >= BIT(2)) { 2888 *top_level = 3 + ffs(sagaw >> 3); 2889 return min(48, mgaw); 2890 } else if (mgaw > 30 && sagaw >= BIT(1)) { 2891 *top_level = 2 + ffs(sagaw >> 2); 2892 return min(39, mgaw); 2893 } 2894 return 0; 2895 } 2896 2897 static const struct iommu_dirty_ops intel_second_stage_dirty_ops = { 2898 IOMMU_PT_DIRTY_OPS(vtdss), 2899 .set_dirty_tracking = intel_iommu_set_dirty_tracking, 2900 }; 2901 2902 static struct iommu_domain * 2903 intel_iommu_domain_alloc_second_stage(struct device *dev, 2904 struct intel_iommu *iommu, u32 flags) 2905 { 2906 struct pt_iommu_vtdss_cfg cfg = {}; 2907 struct dmar_domain *dmar_domain; 2908 unsigned int sslps; 2909 int ret; 2910 2911 if (flags & 2912 (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING | 2913 IOMMU_HWPT_ALLOC_PASID))) 2914 return ERR_PTR(-EOPNOTSUPP); 2915 2916 if (((flags & IOMMU_HWPT_ALLOC_NEST_PARENT) && 2917 !nested_supported(iommu)) || 2918 ((flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) && 2919 !ssads_supported(iommu))) 2920 return ERR_PTR(-EOPNOTSUPP); 2921 2922 /* Legacy mode always supports second stage */ 2923 if (sm_supported(iommu) && !ecap_slts(iommu->ecap)) 2924 return ERR_PTR(-EOPNOTSUPP); 2925 2926 dmar_domain = paging_domain_alloc(); 2927 if (IS_ERR(dmar_domain)) 2928 return ERR_CAST(dmar_domain); 2929 2930 cfg.common.hw_max_vasz_lg2 = compute_vasz_lg2_ss(iommu, &cfg.top_level); 2931 cfg.common.hw_max_oasz_lg2 = 52; 2932 cfg.common.features = BIT(PT_FEAT_FLUSH_RANGE); 2933 2934 /* 2935 * Read-only mapping is disallowed on the domain which serves as the 2936 * parent in a nested configuration, due to HW errata 2937 * (ERRATA_772415_SPR17) 2938 */ 2939 if (flags & IOMMU_HWPT_ALLOC_NEST_PARENT) 2940 cfg.common.features |= BIT(PT_FEAT_VTDSS_FORCE_WRITEABLE); 2941 2942 if (!iommu_paging_structure_coherency(iommu)) 2943 cfg.common.features |= BIT(PT_FEAT_DMA_INCOHERENT); 2944 dmar_domain->iommu.iommu_device = dev; 2945 dmar_domain->iommu.nid = dev_to_node(dev); 2946 dmar_domain->domain.ops = &intel_ss_paging_domain_ops; 2947 dmar_domain->nested_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT; 2948 2949 if (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) 2950 dmar_domain->domain.dirty_ops = &intel_second_stage_dirty_ops; 2951 2952 ret = pt_iommu_vtdss_init(&dmar_domain->sspt, &cfg, GFP_KERNEL); 2953 if (ret) { 2954 kfree(dmar_domain); 2955 return ERR_PTR(ret); 2956 } 2957 2958 /* Adjust the supported page sizes to HW capability */ 2959 sslps = cap_super_page_val(iommu->cap); 2960 if (!(sslps & BIT(0))) 2961 dmar_domain->domain.pgsize_bitmap &= ~(u64)SZ_2M; 2962 if (!(sslps & BIT(1))) 2963 dmar_domain->domain.pgsize_bitmap &= ~(u64)SZ_1G; 2964 if (!intel_iommu_superpage) 2965 dmar_domain->domain.pgsize_bitmap = SZ_4K; 2966 2967 /* 2968 * Besides the internal write buffer flush, the caching mode used for 2969 * legacy nested translation (which utilizes shadowing page tables) 2970 * also requires iotlb sync on map. 2971 */ 2972 if (rwbf_required(iommu) || cap_caching_mode(iommu->cap)) 2973 dmar_domain->iotlb_sync_map = true; 2974 2975 return &dmar_domain->domain; 2976 } 2977 2978 static struct iommu_domain * 2979 intel_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags, 2980 const struct iommu_user_data *user_data) 2981 { 2982 struct device_domain_info *info = dev_iommu_priv_get(dev); 2983 struct intel_iommu *iommu = info->iommu; 2984 struct iommu_domain *domain; 2985 2986 if (user_data) 2987 return ERR_PTR(-EOPNOTSUPP); 2988 2989 /* Prefer first stage if possible by default. */ 2990 domain = intel_iommu_domain_alloc_first_stage(dev, iommu, flags); 2991 if (domain != ERR_PTR(-EOPNOTSUPP)) 2992 return domain; 2993 return intel_iommu_domain_alloc_second_stage(dev, iommu, flags); 2994 } 2995 2996 static void intel_iommu_domain_free(struct iommu_domain *domain) 2997 { 2998 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 2999 3000 if (WARN_ON(dmar_domain->nested_parent && 3001 !list_empty(&dmar_domain->s1_domains))) 3002 return; 3003 3004 if (WARN_ON(!list_empty(&dmar_domain->devices))) 3005 return; 3006 3007 pt_iommu_deinit(&dmar_domain->iommu); 3008 3009 kfree(dmar_domain->qi_batch); 3010 kfree(dmar_domain); 3011 } 3012 3013 static int paging_domain_compatible_first_stage(struct dmar_domain *dmar_domain, 3014 struct intel_iommu *iommu) 3015 { 3016 if (WARN_ON(dmar_domain->domain.dirty_ops || 3017 dmar_domain->nested_parent)) 3018 return -EINVAL; 3019 3020 /* Only SL is available in legacy mode */ 3021 if (!sm_supported(iommu) || !ecap_flts(iommu->ecap)) 3022 return -EINVAL; 3023 3024 if (!ecap_smpwc(iommu->ecap) && 3025 !(dmar_domain->fspt.x86_64_pt.common.features & 3026 BIT(PT_FEAT_DMA_INCOHERENT))) 3027 return -EINVAL; 3028 3029 /* Supports the number of table levels */ 3030 if (!cap_fl5lp_support(iommu->cap) && 3031 dmar_domain->fspt.x86_64_pt.common.max_vasz_lg2 > 48) 3032 return -EINVAL; 3033 3034 /* Same page size support */ 3035 if (!cap_fl1gp_support(iommu->cap) && 3036 (dmar_domain->domain.pgsize_bitmap & SZ_1G)) 3037 return -EINVAL; 3038 3039 /* iotlb sync on map requirement */ 3040 if ((rwbf_required(iommu)) && !dmar_domain->iotlb_sync_map) 3041 return -EINVAL; 3042 3043 return 0; 3044 } 3045 3046 static int 3047 paging_domain_compatible_second_stage(struct dmar_domain *dmar_domain, 3048 struct intel_iommu *iommu) 3049 { 3050 unsigned int vasz_lg2 = dmar_domain->sspt.vtdss_pt.common.max_vasz_lg2; 3051 unsigned int sslps = cap_super_page_val(iommu->cap); 3052 struct pt_iommu_vtdss_hw_info pt_info; 3053 3054 pt_iommu_vtdss_hw_info(&dmar_domain->sspt, &pt_info); 3055 3056 if (dmar_domain->domain.dirty_ops && !ssads_supported(iommu)) 3057 return -EINVAL; 3058 if (dmar_domain->nested_parent && !nested_supported(iommu)) 3059 return -EINVAL; 3060 3061 /* Legacy mode always supports second stage */ 3062 if (sm_supported(iommu) && !ecap_slts(iommu->ecap)) 3063 return -EINVAL; 3064 3065 if (!iommu_paging_structure_coherency(iommu) && 3066 !(dmar_domain->sspt.vtdss_pt.common.features & 3067 BIT(PT_FEAT_DMA_INCOHERENT))) 3068 return -EINVAL; 3069 3070 /* Address width falls within the capability */ 3071 if (cap_mgaw(iommu->cap) < vasz_lg2) 3072 return -EINVAL; 3073 3074 /* Page table level is supported. */ 3075 if (!(cap_sagaw(iommu->cap) & BIT(pt_info.aw))) 3076 return -EINVAL; 3077 3078 /* Same page size support */ 3079 if (!(sslps & BIT(0)) && (dmar_domain->domain.pgsize_bitmap & SZ_2M)) 3080 return -EINVAL; 3081 if (!(sslps & BIT(1)) && (dmar_domain->domain.pgsize_bitmap & SZ_1G)) 3082 return -EINVAL; 3083 3084 /* iotlb sync on map requirement */ 3085 if ((rwbf_required(iommu) || cap_caching_mode(iommu->cap)) && 3086 !dmar_domain->iotlb_sync_map) 3087 return -EINVAL; 3088 3089 /* 3090 * FIXME this is locked wrong, it needs to be under the 3091 * dmar_domain->lock 3092 */ 3093 if ((dmar_domain->sspt.vtdss_pt.common.features & 3094 BIT(PT_FEAT_VTDSS_FORCE_COHERENCE)) && 3095 !ecap_sc_support(iommu->ecap)) 3096 return -EINVAL; 3097 return 0; 3098 } 3099 3100 int paging_domain_compatible(struct iommu_domain *domain, struct device *dev) 3101 { 3102 struct device_domain_info *info = dev_iommu_priv_get(dev); 3103 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 3104 struct intel_iommu *iommu = info->iommu; 3105 int ret = -EINVAL; 3106 3107 if (intel_domain_is_fs_paging(dmar_domain)) 3108 ret = paging_domain_compatible_first_stage(dmar_domain, iommu); 3109 else if (intel_domain_is_ss_paging(dmar_domain)) 3110 ret = paging_domain_compatible_second_stage(dmar_domain, iommu); 3111 else if (WARN_ON(true)) 3112 ret = -EINVAL; 3113 if (ret) 3114 return ret; 3115 3116 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) && 3117 context_copied(iommu, info->bus, info->devfn)) 3118 return intel_pasid_setup_sm_context(dev); 3119 3120 return 0; 3121 } 3122 3123 static int intel_iommu_attach_device(struct iommu_domain *domain, 3124 struct device *dev, 3125 struct iommu_domain *old) 3126 { 3127 int ret; 3128 3129 device_block_translation(dev); 3130 3131 ret = paging_domain_compatible(domain, dev); 3132 if (ret) 3133 return ret; 3134 3135 ret = iopf_for_domain_set(domain, dev); 3136 if (ret) 3137 return ret; 3138 3139 ret = dmar_domain_attach_device(to_dmar_domain(domain), dev); 3140 if (ret) 3141 iopf_for_domain_remove(domain, dev); 3142 3143 return ret; 3144 } 3145 3146 static void intel_iommu_tlb_sync(struct iommu_domain *domain, 3147 struct iommu_iotlb_gather *gather) 3148 { 3149 cache_tag_flush_range(to_dmar_domain(domain), gather->start, 3150 gather->end, 3151 iommu_pages_list_empty(&gather->freelist)); 3152 iommu_put_pages_list(&gather->freelist); 3153 } 3154 3155 static bool domain_support_force_snooping(struct dmar_domain *domain) 3156 { 3157 struct device_domain_info *info; 3158 bool support = true; 3159 3160 assert_spin_locked(&domain->lock); 3161 list_for_each_entry(info, &domain->devices, link) { 3162 if (!ecap_sc_support(info->iommu->ecap)) { 3163 support = false; 3164 break; 3165 } 3166 } 3167 3168 return support; 3169 } 3170 3171 static bool intel_iommu_enforce_cache_coherency_fs(struct iommu_domain *domain) 3172 { 3173 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 3174 struct device_domain_info *info; 3175 3176 guard(spinlock_irqsave)(&dmar_domain->lock); 3177 3178 if (dmar_domain->force_snooping) 3179 return true; 3180 3181 if (!domain_support_force_snooping(dmar_domain)) 3182 return false; 3183 3184 dmar_domain->force_snooping = true; 3185 list_for_each_entry(info, &dmar_domain->devices, link) 3186 intel_pasid_setup_page_snoop_control(info->iommu, info->dev, 3187 IOMMU_NO_PASID); 3188 return true; 3189 } 3190 3191 static bool intel_iommu_enforce_cache_coherency_ss(struct iommu_domain *domain) 3192 { 3193 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 3194 3195 guard(spinlock_irqsave)(&dmar_domain->lock); 3196 if (!domain_support_force_snooping(dmar_domain)) 3197 return false; 3198 3199 /* 3200 * Second level page table supports per-PTE snoop control. The 3201 * iommu_map() interface will handle this by setting SNP bit. 3202 */ 3203 dmar_domain->sspt.vtdss_pt.common.features |= 3204 BIT(PT_FEAT_VTDSS_FORCE_COHERENCE); 3205 dmar_domain->force_snooping = true; 3206 return true; 3207 } 3208 3209 static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap) 3210 { 3211 struct device_domain_info *info = dev_iommu_priv_get(dev); 3212 3213 switch (cap) { 3214 case IOMMU_CAP_CACHE_COHERENCY: 3215 case IOMMU_CAP_DEFERRED_FLUSH: 3216 return true; 3217 case IOMMU_CAP_PRE_BOOT_PROTECTION: 3218 return dmar_platform_optin(); 3219 case IOMMU_CAP_ENFORCE_CACHE_COHERENCY: 3220 return ecap_sc_support(info->iommu->ecap); 3221 case IOMMU_CAP_DIRTY_TRACKING: 3222 return ssads_supported(info->iommu); 3223 default: 3224 return false; 3225 } 3226 } 3227 3228 static struct iommu_device *intel_iommu_probe_device(struct device *dev) 3229 { 3230 struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL; 3231 struct device_domain_info *info; 3232 struct intel_iommu *iommu; 3233 u8 bus, devfn; 3234 int ret; 3235 3236 iommu = device_lookup_iommu(dev, &bus, &devfn); 3237 if (!iommu || !iommu->iommu.ops) 3238 return ERR_PTR(-ENODEV); 3239 3240 info = kzalloc(sizeof(*info), GFP_KERNEL); 3241 if (!info) 3242 return ERR_PTR(-ENOMEM); 3243 3244 if (dev_is_real_dma_subdevice(dev)) { 3245 info->bus = pdev->bus->number; 3246 info->devfn = pdev->devfn; 3247 info->segment = pci_domain_nr(pdev->bus); 3248 } else { 3249 info->bus = bus; 3250 info->devfn = devfn; 3251 info->segment = iommu->segment; 3252 } 3253 3254 info->dev = dev; 3255 info->iommu = iommu; 3256 if (dev_is_pci(dev)) { 3257 if (ecap_dev_iotlb_support(iommu->ecap) && 3258 pci_ats_supported(pdev) && 3259 dmar_ats_supported(pdev, iommu)) { 3260 info->ats_supported = 1; 3261 info->dtlb_extra_inval = dev_needs_extra_dtlb_flush(pdev); 3262 3263 /* 3264 * For IOMMU that supports device IOTLB throttling 3265 * (DIT), we assign PFSID to the invalidation desc 3266 * of a VF such that IOMMU HW can gauge queue depth 3267 * at PF level. If DIT is not set, PFSID will be 3268 * treated as reserved, which should be set to 0. 3269 */ 3270 if (ecap_dit(iommu->ecap)) 3271 info->pfsid = pci_dev_id(pci_physfn(pdev)); 3272 info->ats_qdep = pci_ats_queue_depth(pdev); 3273 } 3274 if (sm_supported(iommu)) { 3275 if (pasid_supported(iommu)) { 3276 int features = pci_pasid_features(pdev); 3277 3278 if (features >= 0) 3279 info->pasid_supported = features | 1; 3280 } 3281 3282 if (info->ats_supported && ecap_prs(iommu->ecap) && 3283 ecap_pds(iommu->ecap) && pci_pri_supported(pdev)) 3284 info->pri_supported = 1; 3285 } 3286 } 3287 3288 dev_iommu_priv_set(dev, info); 3289 if (pdev && pci_ats_supported(pdev)) { 3290 pci_prepare_ats(pdev, VTD_PAGE_SHIFT); 3291 ret = device_rbtree_insert(iommu, info); 3292 if (ret) 3293 goto free; 3294 } 3295 3296 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { 3297 ret = intel_pasid_alloc_table(dev); 3298 if (ret) { 3299 dev_err(dev, "PASID table allocation failed\n"); 3300 goto clear_rbtree; 3301 } 3302 3303 if (!context_copied(iommu, info->bus, info->devfn)) { 3304 ret = intel_pasid_setup_sm_context(dev); 3305 if (ret) 3306 goto free_table; 3307 } 3308 } 3309 3310 intel_iommu_debugfs_create_dev(info); 3311 3312 return &iommu->iommu; 3313 free_table: 3314 intel_pasid_free_table(dev); 3315 clear_rbtree: 3316 device_rbtree_remove(info); 3317 free: 3318 kfree(info); 3319 3320 return ERR_PTR(ret); 3321 } 3322 3323 static void intel_iommu_probe_finalize(struct device *dev) 3324 { 3325 struct device_domain_info *info = dev_iommu_priv_get(dev); 3326 struct intel_iommu *iommu = info->iommu; 3327 3328 /* 3329 * The PCIe spec, in its wisdom, declares that the behaviour of the 3330 * device is undefined if you enable PASID support after ATS support. 3331 * So always enable PASID support on devices which have it, even if 3332 * we can't yet know if we're ever going to use it. 3333 */ 3334 if (info->pasid_supported && 3335 !pci_enable_pasid(to_pci_dev(dev), info->pasid_supported & ~1)) 3336 info->pasid_enabled = 1; 3337 3338 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { 3339 iommu_enable_pci_ats(info); 3340 /* Assign a DEVTLB cache tag to the default domain. */ 3341 if (info->ats_enabled && info->domain) { 3342 u16 did = domain_id_iommu(info->domain, iommu); 3343 3344 if (cache_tag_assign(info->domain, did, dev, 3345 IOMMU_NO_PASID, CACHE_TAG_DEVTLB)) 3346 iommu_disable_pci_ats(info); 3347 } 3348 } 3349 iommu_enable_pci_pri(info); 3350 } 3351 3352 static void intel_iommu_release_device(struct device *dev) 3353 { 3354 struct device_domain_info *info = dev_iommu_priv_get(dev); 3355 struct intel_iommu *iommu = info->iommu; 3356 3357 iommu_disable_pci_pri(info); 3358 iommu_disable_pci_ats(info); 3359 3360 if (info->pasid_enabled) { 3361 pci_disable_pasid(to_pci_dev(dev)); 3362 info->pasid_enabled = 0; 3363 } 3364 3365 mutex_lock(&iommu->iopf_lock); 3366 if (dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev))) 3367 device_rbtree_remove(info); 3368 mutex_unlock(&iommu->iopf_lock); 3369 3370 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) && 3371 !context_copied(iommu, info->bus, info->devfn)) 3372 intel_pasid_teardown_sm_context(dev); 3373 3374 intel_pasid_free_table(dev); 3375 intel_iommu_debugfs_remove_dev(info); 3376 kfree(info); 3377 } 3378 3379 static void intel_iommu_get_resv_regions(struct device *device, 3380 struct list_head *head) 3381 { 3382 int prot = DMA_PTE_READ | DMA_PTE_WRITE; 3383 struct iommu_resv_region *reg; 3384 struct dmar_rmrr_unit *rmrr; 3385 struct device *i_dev; 3386 int i; 3387 3388 rcu_read_lock(); 3389 for_each_rmrr_units(rmrr) { 3390 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, 3391 i, i_dev) { 3392 struct iommu_resv_region *resv; 3393 enum iommu_resv_type type; 3394 size_t length; 3395 3396 if (i_dev != device && 3397 !is_downstream_to_pci_bridge(device, i_dev)) 3398 continue; 3399 3400 length = rmrr->end_address - rmrr->base_address + 1; 3401 3402 type = device_rmrr_is_relaxable(device) ? 3403 IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT; 3404 3405 resv = iommu_alloc_resv_region(rmrr->base_address, 3406 length, prot, type, 3407 GFP_ATOMIC); 3408 if (!resv) 3409 break; 3410 3411 list_add_tail(&resv->list, head); 3412 } 3413 } 3414 rcu_read_unlock(); 3415 3416 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA 3417 if (dev_is_pci(device)) { 3418 struct pci_dev *pdev = to_pci_dev(device); 3419 3420 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) { 3421 reg = iommu_alloc_resv_region(0, 1UL << 24, prot, 3422 IOMMU_RESV_DIRECT_RELAXABLE, 3423 GFP_KERNEL); 3424 if (reg) 3425 list_add_tail(®->list, head); 3426 } 3427 } 3428 #endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */ 3429 3430 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START, 3431 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1, 3432 0, IOMMU_RESV_MSI, GFP_KERNEL); 3433 if (!reg) 3434 return; 3435 list_add_tail(®->list, head); 3436 } 3437 3438 static struct iommu_group *intel_iommu_device_group(struct device *dev) 3439 { 3440 if (dev_is_pci(dev)) 3441 return pci_device_group(dev); 3442 return generic_device_group(dev); 3443 } 3444 3445 int intel_iommu_enable_iopf(struct device *dev) 3446 { 3447 struct device_domain_info *info = dev_iommu_priv_get(dev); 3448 struct intel_iommu *iommu = info->iommu; 3449 int ret; 3450 3451 if (!info->pri_enabled) 3452 return -ENODEV; 3453 3454 /* pri_enabled is protected by the group mutex. */ 3455 iommu_group_mutex_assert(dev); 3456 if (info->iopf_refcount) { 3457 info->iopf_refcount++; 3458 return 0; 3459 } 3460 3461 ret = iopf_queue_add_device(iommu->iopf_queue, dev); 3462 if (ret) 3463 return ret; 3464 3465 info->iopf_refcount = 1; 3466 3467 return 0; 3468 } 3469 3470 void intel_iommu_disable_iopf(struct device *dev) 3471 { 3472 struct device_domain_info *info = dev_iommu_priv_get(dev); 3473 struct intel_iommu *iommu = info->iommu; 3474 3475 if (WARN_ON(!info->pri_enabled || !info->iopf_refcount)) 3476 return; 3477 3478 iommu_group_mutex_assert(dev); 3479 if (--info->iopf_refcount) 3480 return; 3481 3482 iopf_queue_remove_device(iommu->iopf_queue, dev); 3483 } 3484 3485 static bool intel_iommu_is_attach_deferred(struct device *dev) 3486 { 3487 struct device_domain_info *info = dev_iommu_priv_get(dev); 3488 3489 return translation_pre_enabled(info->iommu) && !info->domain; 3490 } 3491 3492 /* 3493 * Check that the device does not live on an external facing PCI port that is 3494 * marked as untrusted. Such devices should not be able to apply quirks and 3495 * thus not be able to bypass the IOMMU restrictions. 3496 */ 3497 static bool risky_device(struct pci_dev *pdev) 3498 { 3499 if (pdev->untrusted) { 3500 pci_info(pdev, 3501 "Skipping IOMMU quirk for dev [%04X:%04X] on untrusted PCI link\n", 3502 pdev->vendor, pdev->device); 3503 pci_info(pdev, "Please check with your BIOS/Platform vendor about this\n"); 3504 return true; 3505 } 3506 return false; 3507 } 3508 3509 static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain, 3510 unsigned long iova, size_t size) 3511 { 3512 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 3513 3514 if (dmar_domain->iotlb_sync_map) 3515 cache_tag_flush_range_np(dmar_domain, iova, iova + size - 1); 3516 3517 return 0; 3518 } 3519 3520 void domain_remove_dev_pasid(struct iommu_domain *domain, 3521 struct device *dev, ioasid_t pasid) 3522 { 3523 struct device_domain_info *info = dev_iommu_priv_get(dev); 3524 struct dev_pasid_info *curr, *dev_pasid = NULL; 3525 struct intel_iommu *iommu = info->iommu; 3526 struct dmar_domain *dmar_domain; 3527 unsigned long flags; 3528 3529 if (!domain) 3530 return; 3531 3532 /* Identity domain has no meta data for pasid. */ 3533 if (domain->type == IOMMU_DOMAIN_IDENTITY) 3534 return; 3535 3536 dmar_domain = to_dmar_domain(domain); 3537 spin_lock_irqsave(&dmar_domain->lock, flags); 3538 list_for_each_entry(curr, &dmar_domain->dev_pasids, link_domain) { 3539 if (curr->dev == dev && curr->pasid == pasid) { 3540 list_del(&curr->link_domain); 3541 dev_pasid = curr; 3542 break; 3543 } 3544 } 3545 spin_unlock_irqrestore(&dmar_domain->lock, flags); 3546 3547 cache_tag_unassign_domain(dmar_domain, dev, pasid); 3548 domain_detach_iommu(dmar_domain, iommu); 3549 if (!WARN_ON_ONCE(!dev_pasid)) { 3550 intel_iommu_debugfs_remove_dev_pasid(dev_pasid); 3551 kfree(dev_pasid); 3552 } 3553 } 3554 3555 static int blocking_domain_set_dev_pasid(struct iommu_domain *domain, 3556 struct device *dev, ioasid_t pasid, 3557 struct iommu_domain *old) 3558 { 3559 struct device_domain_info *info = dev_iommu_priv_get(dev); 3560 3561 intel_pasid_tear_down_entry(info->iommu, dev, pasid, false); 3562 iopf_for_domain_remove(old, dev); 3563 domain_remove_dev_pasid(old, dev, pasid); 3564 3565 return 0; 3566 } 3567 3568 struct dev_pasid_info * 3569 domain_add_dev_pasid(struct iommu_domain *domain, 3570 struct device *dev, ioasid_t pasid) 3571 { 3572 struct device_domain_info *info = dev_iommu_priv_get(dev); 3573 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 3574 struct intel_iommu *iommu = info->iommu; 3575 struct dev_pasid_info *dev_pasid; 3576 unsigned long flags; 3577 int ret; 3578 3579 dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL); 3580 if (!dev_pasid) 3581 return ERR_PTR(-ENOMEM); 3582 3583 ret = domain_attach_iommu(dmar_domain, iommu); 3584 if (ret) 3585 goto out_free; 3586 3587 ret = cache_tag_assign_domain(dmar_domain, dev, pasid); 3588 if (ret) 3589 goto out_detach_iommu; 3590 3591 dev_pasid->dev = dev; 3592 dev_pasid->pasid = pasid; 3593 spin_lock_irqsave(&dmar_domain->lock, flags); 3594 list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids); 3595 spin_unlock_irqrestore(&dmar_domain->lock, flags); 3596 3597 return dev_pasid; 3598 out_detach_iommu: 3599 domain_detach_iommu(dmar_domain, iommu); 3600 out_free: 3601 kfree(dev_pasid); 3602 return ERR_PTR(ret); 3603 } 3604 3605 static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, 3606 struct device *dev, ioasid_t pasid, 3607 struct iommu_domain *old) 3608 { 3609 struct device_domain_info *info = dev_iommu_priv_get(dev); 3610 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 3611 struct intel_iommu *iommu = info->iommu; 3612 struct dev_pasid_info *dev_pasid; 3613 int ret; 3614 3615 if (WARN_ON_ONCE(!(domain->type & __IOMMU_DOMAIN_PAGING))) 3616 return -EINVAL; 3617 3618 if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev)) 3619 return -EOPNOTSUPP; 3620 3621 if (domain->dirty_ops) 3622 return -EINVAL; 3623 3624 if (context_copied(iommu, info->bus, info->devfn)) 3625 return -EBUSY; 3626 3627 ret = paging_domain_compatible(domain, dev); 3628 if (ret) 3629 return ret; 3630 3631 dev_pasid = domain_add_dev_pasid(domain, dev, pasid); 3632 if (IS_ERR(dev_pasid)) 3633 return PTR_ERR(dev_pasid); 3634 3635 ret = iopf_for_domain_replace(domain, old, dev); 3636 if (ret) 3637 goto out_remove_dev_pasid; 3638 3639 if (intel_domain_is_fs_paging(dmar_domain)) 3640 ret = domain_setup_first_level(iommu, dmar_domain, 3641 dev, pasid, old); 3642 else if (intel_domain_is_ss_paging(dmar_domain)) 3643 ret = domain_setup_second_level(iommu, dmar_domain, 3644 dev, pasid, old); 3645 else if (WARN_ON(true)) 3646 ret = -EINVAL; 3647 3648 if (ret) 3649 goto out_unwind_iopf; 3650 3651 domain_remove_dev_pasid(old, dev, pasid); 3652 3653 intel_iommu_debugfs_create_dev_pasid(dev_pasid); 3654 3655 return 0; 3656 3657 out_unwind_iopf: 3658 iopf_for_domain_replace(old, domain, dev); 3659 out_remove_dev_pasid: 3660 domain_remove_dev_pasid(domain, dev, pasid); 3661 return ret; 3662 } 3663 3664 static void *intel_iommu_hw_info(struct device *dev, u32 *length, 3665 enum iommu_hw_info_type *type) 3666 { 3667 struct device_domain_info *info = dev_iommu_priv_get(dev); 3668 struct intel_iommu *iommu = info->iommu; 3669 struct iommu_hw_info_vtd *vtd; 3670 3671 if (*type != IOMMU_HW_INFO_TYPE_DEFAULT && 3672 *type != IOMMU_HW_INFO_TYPE_INTEL_VTD) 3673 return ERR_PTR(-EOPNOTSUPP); 3674 3675 vtd = kzalloc(sizeof(*vtd), GFP_KERNEL); 3676 if (!vtd) 3677 return ERR_PTR(-ENOMEM); 3678 3679 vtd->flags = IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17; 3680 vtd->cap_reg = iommu->cap; 3681 vtd->ecap_reg = iommu->ecap; 3682 *length = sizeof(*vtd); 3683 *type = IOMMU_HW_INFO_TYPE_INTEL_VTD; 3684 return vtd; 3685 } 3686 3687 /* 3688 * Set dirty tracking for the device list of a domain. The caller must 3689 * hold the domain->lock when calling it. 3690 */ 3691 static int device_set_dirty_tracking(struct list_head *devices, bool enable) 3692 { 3693 struct device_domain_info *info; 3694 int ret = 0; 3695 3696 list_for_each_entry(info, devices, link) { 3697 ret = intel_pasid_setup_dirty_tracking(info->iommu, info->dev, 3698 IOMMU_NO_PASID, enable); 3699 if (ret) 3700 break; 3701 } 3702 3703 return ret; 3704 } 3705 3706 static int parent_domain_set_dirty_tracking(struct dmar_domain *domain, 3707 bool enable) 3708 { 3709 struct dmar_domain *s1_domain; 3710 unsigned long flags; 3711 int ret; 3712 3713 spin_lock(&domain->s1_lock); 3714 list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) { 3715 spin_lock_irqsave(&s1_domain->lock, flags); 3716 ret = device_set_dirty_tracking(&s1_domain->devices, enable); 3717 spin_unlock_irqrestore(&s1_domain->lock, flags); 3718 if (ret) 3719 goto err_unwind; 3720 } 3721 spin_unlock(&domain->s1_lock); 3722 return 0; 3723 3724 err_unwind: 3725 list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) { 3726 spin_lock_irqsave(&s1_domain->lock, flags); 3727 device_set_dirty_tracking(&s1_domain->devices, 3728 domain->dirty_tracking); 3729 spin_unlock_irqrestore(&s1_domain->lock, flags); 3730 } 3731 spin_unlock(&domain->s1_lock); 3732 return ret; 3733 } 3734 3735 static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain, 3736 bool enable) 3737 { 3738 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 3739 int ret; 3740 3741 spin_lock(&dmar_domain->lock); 3742 if (dmar_domain->dirty_tracking == enable) 3743 goto out_unlock; 3744 3745 ret = device_set_dirty_tracking(&dmar_domain->devices, enable); 3746 if (ret) 3747 goto err_unwind; 3748 3749 if (dmar_domain->nested_parent) { 3750 ret = parent_domain_set_dirty_tracking(dmar_domain, enable); 3751 if (ret) 3752 goto err_unwind; 3753 } 3754 3755 dmar_domain->dirty_tracking = enable; 3756 out_unlock: 3757 spin_unlock(&dmar_domain->lock); 3758 3759 return 0; 3760 3761 err_unwind: 3762 device_set_dirty_tracking(&dmar_domain->devices, 3763 dmar_domain->dirty_tracking); 3764 spin_unlock(&dmar_domain->lock); 3765 return ret; 3766 } 3767 3768 static int context_setup_pass_through(struct device *dev, u8 bus, u8 devfn) 3769 { 3770 struct device_domain_info *info = dev_iommu_priv_get(dev); 3771 struct intel_iommu *iommu = info->iommu; 3772 struct context_entry *context; 3773 3774 spin_lock(&iommu->lock); 3775 context = iommu_context_addr(iommu, bus, devfn, 1); 3776 if (!context) { 3777 spin_unlock(&iommu->lock); 3778 return -ENOMEM; 3779 } 3780 3781 if (context_present(context) && !context_copied(iommu, bus, devfn)) { 3782 spin_unlock(&iommu->lock); 3783 return 0; 3784 } 3785 3786 copied_context_tear_down(iommu, context, bus, devfn); 3787 context_clear_entry(context); 3788 context_set_domain_id(context, FLPT_DEFAULT_DID); 3789 3790 /* 3791 * In pass through mode, AW must be programmed to indicate the largest 3792 * AGAW value supported by hardware. And ASR is ignored by hardware. 3793 */ 3794 context_set_address_width(context, iommu->msagaw); 3795 context_set_translation_type(context, CONTEXT_TT_PASS_THROUGH); 3796 context_set_fault_enable(context); 3797 context_set_present(context); 3798 if (!ecap_coherent(iommu->ecap)) 3799 clflush_cache_range(context, sizeof(*context)); 3800 context_present_cache_flush(iommu, FLPT_DEFAULT_DID, bus, devfn); 3801 spin_unlock(&iommu->lock); 3802 3803 return 0; 3804 } 3805 3806 static int context_setup_pass_through_cb(struct pci_dev *pdev, u16 alias, void *data) 3807 { 3808 struct device *dev = data; 3809 3810 return context_setup_pass_through(dev, PCI_BUS_NUM(alias), alias & 0xff); 3811 } 3812 3813 static int device_setup_pass_through(struct device *dev) 3814 { 3815 struct device_domain_info *info = dev_iommu_priv_get(dev); 3816 3817 if (!dev_is_pci(dev)) 3818 return context_setup_pass_through(dev, info->bus, info->devfn); 3819 3820 return pci_for_each_dma_alias(to_pci_dev(dev), 3821 context_setup_pass_through_cb, dev); 3822 } 3823 3824 static int identity_domain_attach_dev(struct iommu_domain *domain, 3825 struct device *dev, 3826 struct iommu_domain *old) 3827 { 3828 struct device_domain_info *info = dev_iommu_priv_get(dev); 3829 struct intel_iommu *iommu = info->iommu; 3830 int ret; 3831 3832 device_block_translation(dev); 3833 3834 if (dev_is_real_dma_subdevice(dev)) 3835 return 0; 3836 3837 /* 3838 * No PRI support with the global identity domain. No need to enable or 3839 * disable PRI in this path as the iommu has been put in the blocking 3840 * state. 3841 */ 3842 if (sm_supported(iommu)) 3843 ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID); 3844 else 3845 ret = device_setup_pass_through(dev); 3846 3847 if (!ret) 3848 info->domain_attached = true; 3849 3850 return ret; 3851 } 3852 3853 static int identity_domain_set_dev_pasid(struct iommu_domain *domain, 3854 struct device *dev, ioasid_t pasid, 3855 struct iommu_domain *old) 3856 { 3857 struct device_domain_info *info = dev_iommu_priv_get(dev); 3858 struct intel_iommu *iommu = info->iommu; 3859 int ret; 3860 3861 if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev)) 3862 return -EOPNOTSUPP; 3863 3864 ret = iopf_for_domain_replace(domain, old, dev); 3865 if (ret) 3866 return ret; 3867 3868 ret = domain_setup_passthrough(iommu, dev, pasid, old); 3869 if (ret) { 3870 iopf_for_domain_replace(old, domain, dev); 3871 return ret; 3872 } 3873 3874 domain_remove_dev_pasid(old, dev, pasid); 3875 return 0; 3876 } 3877 3878 static struct iommu_domain identity_domain = { 3879 .type = IOMMU_DOMAIN_IDENTITY, 3880 .ops = &(const struct iommu_domain_ops) { 3881 .attach_dev = identity_domain_attach_dev, 3882 .set_dev_pasid = identity_domain_set_dev_pasid, 3883 }, 3884 }; 3885 3886 const struct iommu_domain_ops intel_fs_paging_domain_ops = { 3887 IOMMU_PT_DOMAIN_OPS(x86_64), 3888 .attach_dev = intel_iommu_attach_device, 3889 .set_dev_pasid = intel_iommu_set_dev_pasid, 3890 .iotlb_sync_map = intel_iommu_iotlb_sync_map, 3891 .flush_iotlb_all = intel_flush_iotlb_all, 3892 .iotlb_sync = intel_iommu_tlb_sync, 3893 .free = intel_iommu_domain_free, 3894 .enforce_cache_coherency = intel_iommu_enforce_cache_coherency_fs, 3895 }; 3896 3897 const struct iommu_domain_ops intel_ss_paging_domain_ops = { 3898 IOMMU_PT_DOMAIN_OPS(vtdss), 3899 .attach_dev = intel_iommu_attach_device, 3900 .set_dev_pasid = intel_iommu_set_dev_pasid, 3901 .iotlb_sync_map = intel_iommu_iotlb_sync_map, 3902 .flush_iotlb_all = intel_flush_iotlb_all, 3903 .iotlb_sync = intel_iommu_tlb_sync, 3904 .free = intel_iommu_domain_free, 3905 .enforce_cache_coherency = intel_iommu_enforce_cache_coherency_ss, 3906 }; 3907 3908 const struct iommu_ops intel_iommu_ops = { 3909 .blocked_domain = &blocking_domain, 3910 .release_domain = &blocking_domain, 3911 .identity_domain = &identity_domain, 3912 .capable = intel_iommu_capable, 3913 .hw_info = intel_iommu_hw_info, 3914 .domain_alloc_paging_flags = intel_iommu_domain_alloc_paging_flags, 3915 .domain_alloc_sva = intel_svm_domain_alloc, 3916 .domain_alloc_nested = intel_iommu_domain_alloc_nested, 3917 .probe_device = intel_iommu_probe_device, 3918 .probe_finalize = intel_iommu_probe_finalize, 3919 .release_device = intel_iommu_release_device, 3920 .get_resv_regions = intel_iommu_get_resv_regions, 3921 .device_group = intel_iommu_device_group, 3922 .is_attach_deferred = intel_iommu_is_attach_deferred, 3923 .def_domain_type = device_def_domain_type, 3924 .page_response = intel_iommu_page_response, 3925 }; 3926 3927 static void quirk_iommu_igfx(struct pci_dev *dev) 3928 { 3929 if (risky_device(dev)) 3930 return; 3931 3932 pci_info(dev, "Disabling IOMMU for graphics on this chipset\n"); 3933 disable_igfx_iommu = 1; 3934 } 3935 3936 /* G4x/GM45 integrated gfx dmar support is totally busted. */ 3937 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx); 3938 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx); 3939 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx); 3940 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx); 3941 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx); 3942 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx); 3943 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx); 3944 3945 /* QM57/QS57 integrated gfx malfunctions with dmar */ 3946 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_iommu_igfx); 3947 3948 /* Broadwell igfx malfunctions with dmar */ 3949 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx); 3950 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx); 3951 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx); 3952 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx); 3953 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx); 3954 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx); 3955 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx); 3956 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx); 3957 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx); 3958 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx); 3959 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx); 3960 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx); 3961 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx); 3962 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx); 3963 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx); 3964 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx); 3965 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx); 3966 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx); 3967 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx); 3968 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx); 3969 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx); 3970 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx); 3971 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx); 3972 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx); 3973 3974 static void quirk_iommu_rwbf(struct pci_dev *dev) 3975 { 3976 if (risky_device(dev)) 3977 return; 3978 3979 /* 3980 * Mobile 4 Series Chipset neglects to set RWBF capability, 3981 * but needs it. Same seems to hold for the desktop versions. 3982 */ 3983 pci_info(dev, "Forcing write-buffer flush capability\n"); 3984 rwbf_quirk = 1; 3985 } 3986 3987 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf); 3988 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf); 3989 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf); 3990 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf); 3991 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf); 3992 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf); 3993 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf); 3994 3995 #define GGC 0x52 3996 #define GGC_MEMORY_SIZE_MASK (0xf << 8) 3997 #define GGC_MEMORY_SIZE_NONE (0x0 << 8) 3998 #define GGC_MEMORY_SIZE_1M (0x1 << 8) 3999 #define GGC_MEMORY_SIZE_2M (0x3 << 8) 4000 #define GGC_MEMORY_VT_ENABLED (0x8 << 8) 4001 #define GGC_MEMORY_SIZE_2M_VT (0x9 << 8) 4002 #define GGC_MEMORY_SIZE_3M_VT (0xa << 8) 4003 #define GGC_MEMORY_SIZE_4M_VT (0xb << 8) 4004 4005 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev) 4006 { 4007 unsigned short ggc; 4008 4009 if (risky_device(dev)) 4010 return; 4011 4012 if (pci_read_config_word(dev, GGC, &ggc)) 4013 return; 4014 4015 if (!(ggc & GGC_MEMORY_VT_ENABLED)) { 4016 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n"); 4017 disable_igfx_iommu = 1; 4018 } else if (!disable_igfx_iommu) { 4019 /* we have to ensure the gfx device is idle before we flush */ 4020 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n"); 4021 iommu_set_dma_strict(); 4022 } 4023 } 4024 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt); 4025 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt); 4026 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt); 4027 4028 static void quirk_igfx_skip_te_disable(struct pci_dev *dev) 4029 { 4030 unsigned short ver; 4031 4032 if (!IS_GFX_DEVICE(dev)) 4033 return; 4034 4035 ver = (dev->device >> 8) & 0xff; 4036 if (ver != 0x45 && ver != 0x46 && ver != 0x4c && 4037 ver != 0x4e && ver != 0x8a && ver != 0x98 && 4038 ver != 0x9a && ver != 0xa7 && ver != 0x7d) 4039 return; 4040 4041 if (risky_device(dev)) 4042 return; 4043 4044 pci_info(dev, "Skip IOMMU disabling for graphics\n"); 4045 iommu_skip_te_disable = 1; 4046 } 4047 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_igfx_skip_te_disable); 4048 4049 /* On Tylersburg chipsets, some BIOSes have been known to enable the 4050 ISOCH DMAR unit for the Azalia sound device, but not give it any 4051 TLB entries, which causes it to deadlock. Check for that. We do 4052 this in a function called from init_dmars(), instead of in a PCI 4053 quirk, because we don't want to print the obnoxious "BIOS broken" 4054 message if VT-d is actually disabled. 4055 */ 4056 static void __init check_tylersburg_isoch(void) 4057 { 4058 struct pci_dev *pdev; 4059 uint32_t vtisochctrl; 4060 4061 /* If there's no Azalia in the system anyway, forget it. */ 4062 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL); 4063 if (!pdev) 4064 return; 4065 4066 if (risky_device(pdev)) { 4067 pci_dev_put(pdev); 4068 return; 4069 } 4070 4071 pci_dev_put(pdev); 4072 4073 /* System Management Registers. Might be hidden, in which case 4074 we can't do the sanity check. But that's OK, because the 4075 known-broken BIOSes _don't_ actually hide it, so far. */ 4076 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL); 4077 if (!pdev) 4078 return; 4079 4080 if (risky_device(pdev)) { 4081 pci_dev_put(pdev); 4082 return; 4083 } 4084 4085 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) { 4086 pci_dev_put(pdev); 4087 return; 4088 } 4089 4090 pci_dev_put(pdev); 4091 4092 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */ 4093 if (vtisochctrl & 1) 4094 return; 4095 4096 /* Drop all bits other than the number of TLB entries */ 4097 vtisochctrl &= 0x1c; 4098 4099 /* If we have the recommended number of TLB entries (16), fine. */ 4100 if (vtisochctrl == 0x10) 4101 return; 4102 4103 /* Zero TLB entries? You get to ride the short bus to school. */ 4104 if (!vtisochctrl) { 4105 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n" 4106 "BIOS vendor: %s; Ver: %s; Product Version: %s\n", 4107 dmi_get_system_info(DMI_BIOS_VENDOR), 4108 dmi_get_system_info(DMI_BIOS_VERSION), 4109 dmi_get_system_info(DMI_PRODUCT_VERSION)); 4110 iommu_identity_mapping |= IDENTMAP_AZALIA; 4111 return; 4112 } 4113 4114 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n", 4115 vtisochctrl); 4116 } 4117 4118 /* 4119 * Here we deal with a device TLB defect where device may inadvertently issue ATS 4120 * invalidation completion before posted writes initiated with translated address 4121 * that utilized translations matching the invalidation address range, violating 4122 * the invalidation completion ordering. 4123 * Therefore, any use cases that cannot guarantee DMA is stopped before unmap is 4124 * vulnerable to this defect. In other words, any dTLB invalidation initiated not 4125 * under the control of the trusted/privileged host device driver must use this 4126 * quirk. 4127 * Device TLBs are invalidated under the following six conditions: 4128 * 1. Device driver does DMA API unmap IOVA 4129 * 2. Device driver unbind a PASID from a process, sva_unbind_device() 4130 * 3. PASID is torn down, after PASID cache is flushed. e.g. process 4131 * exit_mmap() due to crash 4132 * 4. Under SVA usage, called by mmu_notifier.invalidate_range() where 4133 * VM has to free pages that were unmapped 4134 * 5. Userspace driver unmaps a DMA buffer 4135 * 6. Cache invalidation in vSVA usage (upcoming) 4136 * 4137 * For #1 and #2, device drivers are responsible for stopping DMA traffic 4138 * before unmap/unbind. For #3, iommu driver gets mmu_notifier to 4139 * invalidate TLB the same way as normal user unmap which will use this quirk. 4140 * The dTLB invalidation after PASID cache flush does not need this quirk. 4141 * 4142 * As a reminder, #6 will *NEED* this quirk as we enable nested translation. 4143 */ 4144 void quirk_extra_dev_tlb_flush(struct device_domain_info *info, 4145 unsigned long address, unsigned long mask, 4146 u32 pasid, u16 qdep) 4147 { 4148 u16 sid; 4149 4150 if (likely(!info->dtlb_extra_inval)) 4151 return; 4152 4153 sid = PCI_DEVID(info->bus, info->devfn); 4154 if (pasid == IOMMU_NO_PASID) { 4155 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, 4156 qdep, address, mask); 4157 } else { 4158 qi_flush_dev_iotlb_pasid(info->iommu, sid, info->pfsid, 4159 pasid, qdep, address, mask); 4160 } 4161 } 4162 4163 #define ecmd_get_status_code(res) (((res) & 0xff) >> 1) 4164 4165 /* 4166 * Function to submit a command to the enhanced command interface. The 4167 * valid enhanced command descriptions are defined in Table 47 of the 4168 * VT-d spec. The VT-d hardware implementation may support some but not 4169 * all commands, which can be determined by checking the Enhanced 4170 * Command Capability Register. 4171 * 4172 * Return values: 4173 * - 0: Command successful without any error; 4174 * - Negative: software error value; 4175 * - Nonzero positive: failure status code defined in Table 48. 4176 */ 4177 int ecmd_submit_sync(struct intel_iommu *iommu, u8 ecmd, u64 oa, u64 ob) 4178 { 4179 unsigned long flags; 4180 u64 res; 4181 int ret; 4182 4183 if (!cap_ecmds(iommu->cap)) 4184 return -ENODEV; 4185 4186 raw_spin_lock_irqsave(&iommu->register_lock, flags); 4187 4188 res = dmar_readq(iommu->reg + DMAR_ECRSP_REG); 4189 if (res & DMA_ECMD_ECRSP_IP) { 4190 ret = -EBUSY; 4191 goto err; 4192 } 4193 4194 /* 4195 * Unconditionally write the operand B, because 4196 * - There is no side effect if an ecmd doesn't require an 4197 * operand B, but we set the register to some value. 4198 * - It's not invoked in any critical path. The extra MMIO 4199 * write doesn't bring any performance concerns. 4200 */ 4201 dmar_writeq(iommu->reg + DMAR_ECEO_REG, ob); 4202 dmar_writeq(iommu->reg + DMAR_ECMD_REG, ecmd | (oa << DMA_ECMD_OA_SHIFT)); 4203 4204 IOMMU_WAIT_OP(iommu, DMAR_ECRSP_REG, dmar_readq, 4205 !(res & DMA_ECMD_ECRSP_IP), res); 4206 4207 if (res & DMA_ECMD_ECRSP_IP) { 4208 ret = -ETIMEDOUT; 4209 goto err; 4210 } 4211 4212 ret = ecmd_get_status_code(res); 4213 err: 4214 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 4215 4216 return ret; 4217 } 4218 4219 MODULE_IMPORT_NS("GENERIC_PT_IOMMU"); 4220