1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright © 2006-2014 Intel Corporation. 4 * 5 * Authors: David Woodhouse <dwmw2@infradead.org>, 6 * Ashok Raj <ashok.raj@intel.com>, 7 * Shaohua Li <shaohua.li@intel.com>, 8 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>, 9 * Fenghua Yu <fenghua.yu@intel.com> 10 * Joerg Roedel <jroedel@suse.de> 11 */ 12 13 #define pr_fmt(fmt) "DMAR: " fmt 14 #define dev_fmt(fmt) pr_fmt(fmt) 15 16 #include <linux/crash_dump.h> 17 #include <linux/dma-direct.h> 18 #include <linux/dmi.h> 19 #include <linux/memory.h> 20 #include <linux/pci.h> 21 #include <linux/pci-ats.h> 22 #include <linux/spinlock.h> 23 #include <linux/syscore_ops.h> 24 #include <linux/tboot.h> 25 #include <uapi/linux/iommufd.h> 26 27 #include "iommu.h" 28 #include "../dma-iommu.h" 29 #include "../irq_remapping.h" 30 #include "../iommu-pages.h" 31 #include "pasid.h" 32 #include "perfmon.h" 33 34 #define ROOT_SIZE VTD_PAGE_SIZE 35 #define CONTEXT_SIZE VTD_PAGE_SIZE 36 37 #define IS_GFX_DEVICE(pdev) pci_is_display(pdev) 38 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB) 39 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) 40 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) 41 42 #define IOAPIC_RANGE_START (0xfee00000) 43 #define IOAPIC_RANGE_END (0xfeefffff) 44 #define IOVA_START_ADDR (0x1000) 45 46 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57 47 48 static void __init check_tylersburg_isoch(void); 49 static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain, 50 bool enable); 51 static int rwbf_quirk; 52 53 #define rwbf_required(iommu) (rwbf_quirk || cap_rwbf((iommu)->cap)) 54 55 /* 56 * set to 1 to panic kernel if can't successfully enable VT-d 57 * (used when kernel is launched w/ TXT) 58 */ 59 static int force_on = 0; 60 static int intel_iommu_tboot_noforce; 61 static int no_platform_optin; 62 63 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry)) 64 65 /* 66 * Take a root_entry and return the Lower Context Table Pointer (LCTP) 67 * if marked present. 68 */ 69 static phys_addr_t root_entry_lctp(struct root_entry *re) 70 { 71 if (!(re->lo & 1)) 72 return 0; 73 74 return re->lo & VTD_PAGE_MASK; 75 } 76 77 /* 78 * Take a root_entry and return the Upper Context Table Pointer (UCTP) 79 * if marked present. 80 */ 81 static phys_addr_t root_entry_uctp(struct root_entry *re) 82 { 83 if (!(re->hi & 1)) 84 return 0; 85 86 return re->hi & VTD_PAGE_MASK; 87 } 88 89 static int device_rid_cmp_key(const void *key, const struct rb_node *node) 90 { 91 struct device_domain_info *info = 92 rb_entry(node, struct device_domain_info, node); 93 const u16 *rid_lhs = key; 94 95 if (*rid_lhs < PCI_DEVID(info->bus, info->devfn)) 96 return -1; 97 98 if (*rid_lhs > PCI_DEVID(info->bus, info->devfn)) 99 return 1; 100 101 return 0; 102 } 103 104 static int device_rid_cmp(struct rb_node *lhs, const struct rb_node *rhs) 105 { 106 struct device_domain_info *info = 107 rb_entry(lhs, struct device_domain_info, node); 108 u16 key = PCI_DEVID(info->bus, info->devfn); 109 110 return device_rid_cmp_key(&key, rhs); 111 } 112 113 /* 114 * Looks up an IOMMU-probed device using its source ID. 115 * 116 * Returns the pointer to the device if there is a match. Otherwise, 117 * returns NULL. 118 * 119 * Note that this helper doesn't guarantee that the device won't be 120 * released by the iommu subsystem after being returned. The caller 121 * should use its own synchronization mechanism to avoid the device 122 * being released during its use if its possibly the case. 123 */ 124 struct device *device_rbtree_find(struct intel_iommu *iommu, u16 rid) 125 { 126 struct device_domain_info *info = NULL; 127 struct rb_node *node; 128 unsigned long flags; 129 130 spin_lock_irqsave(&iommu->device_rbtree_lock, flags); 131 node = rb_find(&rid, &iommu->device_rbtree, device_rid_cmp_key); 132 if (node) 133 info = rb_entry(node, struct device_domain_info, node); 134 spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags); 135 136 return info ? info->dev : NULL; 137 } 138 139 static int device_rbtree_insert(struct intel_iommu *iommu, 140 struct device_domain_info *info) 141 { 142 struct rb_node *curr; 143 unsigned long flags; 144 145 spin_lock_irqsave(&iommu->device_rbtree_lock, flags); 146 curr = rb_find_add(&info->node, &iommu->device_rbtree, device_rid_cmp); 147 spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags); 148 if (WARN_ON(curr)) 149 return -EEXIST; 150 151 return 0; 152 } 153 154 static void device_rbtree_remove(struct device_domain_info *info) 155 { 156 struct intel_iommu *iommu = info->iommu; 157 unsigned long flags; 158 159 spin_lock_irqsave(&iommu->device_rbtree_lock, flags); 160 if (!RB_EMPTY_NODE(&info->node)) { 161 rb_erase(&info->node, &iommu->device_rbtree); 162 RB_CLEAR_NODE(&info->node); 163 } 164 spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags); 165 } 166 167 struct dmar_rmrr_unit { 168 struct list_head list; /* list of rmrr units */ 169 struct acpi_dmar_header *hdr; /* ACPI header */ 170 u64 base_address; /* reserved base address*/ 171 u64 end_address; /* reserved end address */ 172 struct dmar_dev_scope *devices; /* target devices */ 173 int devices_cnt; /* target device count */ 174 }; 175 176 struct dmar_atsr_unit { 177 struct list_head list; /* list of ATSR units */ 178 struct acpi_dmar_header *hdr; /* ACPI header */ 179 struct dmar_dev_scope *devices; /* target devices */ 180 int devices_cnt; /* target device count */ 181 u8 include_all:1; /* include all ports */ 182 }; 183 184 struct dmar_satc_unit { 185 struct list_head list; /* list of SATC units */ 186 struct acpi_dmar_header *hdr; /* ACPI header */ 187 struct dmar_dev_scope *devices; /* target devices */ 188 struct intel_iommu *iommu; /* the corresponding iommu */ 189 int devices_cnt; /* target device count */ 190 u8 atc_required:1; /* ATS is required */ 191 }; 192 193 static LIST_HEAD(dmar_atsr_units); 194 static LIST_HEAD(dmar_rmrr_units); 195 static LIST_HEAD(dmar_satc_units); 196 197 #define for_each_rmrr_units(rmrr) \ 198 list_for_each_entry(rmrr, &dmar_rmrr_units, list) 199 200 static void intel_iommu_domain_free(struct iommu_domain *domain); 201 202 int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON); 203 int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON); 204 205 int intel_iommu_enabled = 0; 206 EXPORT_SYMBOL_GPL(intel_iommu_enabled); 207 208 static int intel_iommu_superpage = 1; 209 static int iommu_identity_mapping; 210 static int iommu_skip_te_disable; 211 static int disable_igfx_iommu; 212 213 #define IDENTMAP_AZALIA 4 214 215 const struct iommu_ops intel_iommu_ops; 216 217 static bool translation_pre_enabled(struct intel_iommu *iommu) 218 { 219 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED); 220 } 221 222 static void clear_translation_pre_enabled(struct intel_iommu *iommu) 223 { 224 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED; 225 } 226 227 static void init_translation_status(struct intel_iommu *iommu) 228 { 229 u32 gsts; 230 231 gsts = readl(iommu->reg + DMAR_GSTS_REG); 232 if (gsts & DMA_GSTS_TES) 233 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED; 234 } 235 236 static int __init intel_iommu_setup(char *str) 237 { 238 if (!str) 239 return -EINVAL; 240 241 while (*str) { 242 if (!strncmp(str, "on", 2)) { 243 dmar_disabled = 0; 244 pr_info("IOMMU enabled\n"); 245 } else if (!strncmp(str, "off", 3)) { 246 dmar_disabled = 1; 247 no_platform_optin = 1; 248 pr_info("IOMMU disabled\n"); 249 } else if (!strncmp(str, "igfx_off", 8)) { 250 disable_igfx_iommu = 1; 251 pr_info("Disable GFX device mapping\n"); 252 } else if (!strncmp(str, "forcedac", 8)) { 253 pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n"); 254 iommu_dma_forcedac = true; 255 } else if (!strncmp(str, "strict", 6)) { 256 pr_warn("intel_iommu=strict deprecated; use iommu.strict=1 instead\n"); 257 iommu_set_dma_strict(); 258 } else if (!strncmp(str, "sp_off", 6)) { 259 pr_info("Disable supported super page\n"); 260 intel_iommu_superpage = 0; 261 } else if (!strncmp(str, "sm_on", 5)) { 262 pr_info("Enable scalable mode if hardware supports\n"); 263 intel_iommu_sm = 1; 264 } else if (!strncmp(str, "sm_off", 6)) { 265 pr_info("Scalable mode is disallowed\n"); 266 intel_iommu_sm = 0; 267 } else if (!strncmp(str, "tboot_noforce", 13)) { 268 pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n"); 269 intel_iommu_tboot_noforce = 1; 270 } else { 271 pr_notice("Unknown option - '%s'\n", str); 272 } 273 274 str += strcspn(str, ","); 275 while (*str == ',') 276 str++; 277 } 278 279 return 1; 280 } 281 __setup("intel_iommu=", intel_iommu_setup); 282 283 /* 284 * Calculate the Supported Adjusted Guest Address Widths of an IOMMU. 285 * Refer to 11.4.2 of the VT-d spec for the encoding of each bit of 286 * the returned SAGAW. 287 */ 288 static unsigned long __iommu_calculate_sagaw(struct intel_iommu *iommu) 289 { 290 unsigned long fl_sagaw, sl_sagaw; 291 292 fl_sagaw = BIT(2) | (cap_fl5lp_support(iommu->cap) ? BIT(3) : 0); 293 sl_sagaw = cap_sagaw(iommu->cap); 294 295 /* Second level only. */ 296 if (!sm_supported(iommu) || !ecap_flts(iommu->ecap)) 297 return sl_sagaw; 298 299 /* First level only. */ 300 if (!ecap_slts(iommu->ecap)) 301 return fl_sagaw; 302 303 return fl_sagaw & sl_sagaw; 304 } 305 306 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw) 307 { 308 unsigned long sagaw; 309 int agaw; 310 311 sagaw = __iommu_calculate_sagaw(iommu); 312 for (agaw = width_to_agaw(max_gaw); agaw >= 0; agaw--) { 313 if (test_bit(agaw, &sagaw)) 314 break; 315 } 316 317 return agaw; 318 } 319 320 /* 321 * Calculate max SAGAW for each iommu. 322 */ 323 int iommu_calculate_max_sagaw(struct intel_iommu *iommu) 324 { 325 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH); 326 } 327 328 /* 329 * calculate agaw for each iommu. 330 * "SAGAW" may be different across iommus, use a default agaw, and 331 * get a supported less agaw for iommus that don't support the default agaw. 332 */ 333 int iommu_calculate_agaw(struct intel_iommu *iommu) 334 { 335 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH); 336 } 337 338 static bool iommu_paging_structure_coherency(struct intel_iommu *iommu) 339 { 340 return sm_supported(iommu) ? 341 ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap); 342 } 343 344 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, 345 u8 devfn, int alloc) 346 { 347 struct root_entry *root = &iommu->root_entry[bus]; 348 struct context_entry *context; 349 u64 *entry; 350 351 /* 352 * Except that the caller requested to allocate a new entry, 353 * returning a copied context entry makes no sense. 354 */ 355 if (!alloc && context_copied(iommu, bus, devfn)) 356 return NULL; 357 358 entry = &root->lo; 359 if (sm_supported(iommu)) { 360 if (devfn >= 0x80) { 361 devfn -= 0x80; 362 entry = &root->hi; 363 } 364 devfn *= 2; 365 } 366 if (*entry & 1) 367 context = phys_to_virt(*entry & VTD_PAGE_MASK); 368 else { 369 unsigned long phy_addr; 370 if (!alloc) 371 return NULL; 372 373 context = iommu_alloc_pages_node_sz(iommu->node, GFP_ATOMIC, 374 SZ_4K); 375 if (!context) 376 return NULL; 377 378 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE); 379 phy_addr = virt_to_phys((void *)context); 380 *entry = phy_addr | 1; 381 __iommu_flush_cache(iommu, entry, sizeof(*entry)); 382 } 383 return &context[devfn]; 384 } 385 386 /** 387 * is_downstream_to_pci_bridge - test if a device belongs to the PCI 388 * sub-hierarchy of a candidate PCI-PCI bridge 389 * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy 390 * @bridge: the candidate PCI-PCI bridge 391 * 392 * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false. 393 */ 394 static bool 395 is_downstream_to_pci_bridge(struct device *dev, struct device *bridge) 396 { 397 struct pci_dev *pdev, *pbridge; 398 399 if (!dev_is_pci(dev) || !dev_is_pci(bridge)) 400 return false; 401 402 pdev = to_pci_dev(dev); 403 pbridge = to_pci_dev(bridge); 404 405 if (pbridge->subordinate && 406 pbridge->subordinate->number <= pdev->bus->number && 407 pbridge->subordinate->busn_res.end >= pdev->bus->number) 408 return true; 409 410 return false; 411 } 412 413 static bool quirk_ioat_snb_local_iommu(struct pci_dev *pdev) 414 { 415 struct dmar_drhd_unit *drhd; 416 u32 vtbar; 417 int rc; 418 419 /* We know that this device on this chipset has its own IOMMU. 420 * If we find it under a different IOMMU, then the BIOS is lying 421 * to us. Hope that the IOMMU for this device is actually 422 * disabled, and it needs no translation... 423 */ 424 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar); 425 if (rc) { 426 /* "can't" happen */ 427 dev_info(&pdev->dev, "failed to run vt-d quirk\n"); 428 return false; 429 } 430 vtbar &= 0xffff0000; 431 432 /* we know that the this iommu should be at offset 0xa000 from vtbar */ 433 drhd = dmar_find_matched_drhd_unit(pdev); 434 if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) { 435 pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"); 436 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); 437 return true; 438 } 439 440 return false; 441 } 442 443 static bool iommu_is_dummy(struct intel_iommu *iommu, struct device *dev) 444 { 445 if (!iommu || iommu->drhd->ignored) 446 return true; 447 448 if (dev_is_pci(dev)) { 449 struct pci_dev *pdev = to_pci_dev(dev); 450 451 if (pdev->vendor == PCI_VENDOR_ID_INTEL && 452 pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SNB && 453 quirk_ioat_snb_local_iommu(pdev)) 454 return true; 455 } 456 457 return false; 458 } 459 460 static struct intel_iommu *device_lookup_iommu(struct device *dev, u8 *bus, u8 *devfn) 461 { 462 struct dmar_drhd_unit *drhd = NULL; 463 struct pci_dev *pdev = NULL; 464 struct intel_iommu *iommu; 465 struct device *tmp; 466 u16 segment = 0; 467 int i; 468 469 if (!dev) 470 return NULL; 471 472 if (dev_is_pci(dev)) { 473 struct pci_dev *pf_pdev; 474 475 pdev = pci_real_dma_dev(to_pci_dev(dev)); 476 477 /* VFs aren't listed in scope tables; we need to look up 478 * the PF instead to find the IOMMU. */ 479 pf_pdev = pci_physfn(pdev); 480 dev = &pf_pdev->dev; 481 segment = pci_domain_nr(pdev->bus); 482 } else if (has_acpi_companion(dev)) 483 dev = &ACPI_COMPANION(dev)->dev; 484 485 rcu_read_lock(); 486 for_each_iommu(iommu, drhd) { 487 if (pdev && segment != drhd->segment) 488 continue; 489 490 for_each_active_dev_scope(drhd->devices, 491 drhd->devices_cnt, i, tmp) { 492 if (tmp == dev) { 493 /* For a VF use its original BDF# not that of the PF 494 * which we used for the IOMMU lookup. Strictly speaking 495 * we could do this for all PCI devices; we only need to 496 * get the BDF# from the scope table for ACPI matches. */ 497 if (pdev && pdev->is_virtfn) 498 goto got_pdev; 499 500 if (bus && devfn) { 501 *bus = drhd->devices[i].bus; 502 *devfn = drhd->devices[i].devfn; 503 } 504 goto out; 505 } 506 507 if (is_downstream_to_pci_bridge(dev, tmp)) 508 goto got_pdev; 509 } 510 511 if (pdev && drhd->include_all) { 512 got_pdev: 513 if (bus && devfn) { 514 *bus = pdev->bus->number; 515 *devfn = pdev->devfn; 516 } 517 goto out; 518 } 519 } 520 iommu = NULL; 521 out: 522 if (iommu_is_dummy(iommu, dev)) 523 iommu = NULL; 524 525 rcu_read_unlock(); 526 527 return iommu; 528 } 529 530 static void free_context_table(struct intel_iommu *iommu) 531 { 532 struct context_entry *context; 533 int i; 534 535 if (!iommu->root_entry) 536 return; 537 538 for (i = 0; i < ROOT_ENTRY_NR; i++) { 539 context = iommu_context_addr(iommu, i, 0, 0); 540 if (context) 541 iommu_free_pages(context); 542 543 if (!sm_supported(iommu)) 544 continue; 545 546 context = iommu_context_addr(iommu, i, 0x80, 0); 547 if (context) 548 iommu_free_pages(context); 549 } 550 551 iommu_free_pages(iommu->root_entry); 552 iommu->root_entry = NULL; 553 } 554 555 #ifdef CONFIG_DMAR_DEBUG 556 static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn, 557 u8 bus, u8 devfn, struct dma_pte *parent, int level) 558 { 559 struct dma_pte *pte; 560 int offset; 561 562 while (1) { 563 offset = pfn_level_offset(pfn, level); 564 pte = &parent[offset]; 565 566 pr_info("pte level: %d, pte value: 0x%016llx\n", level, pte->val); 567 568 if (!dma_pte_present(pte)) { 569 pr_info("page table not present at level %d\n", level - 1); 570 break; 571 } 572 573 if (level == 1 || dma_pte_superpage(pte)) 574 break; 575 576 parent = phys_to_virt(dma_pte_addr(pte)); 577 level--; 578 } 579 } 580 581 void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id, 582 unsigned long long addr, u32 pasid) 583 { 584 struct pasid_dir_entry *dir, *pde; 585 struct pasid_entry *entries, *pte; 586 struct context_entry *ctx_entry; 587 struct root_entry *rt_entry; 588 int i, dir_index, index, level; 589 u8 devfn = source_id & 0xff; 590 u8 bus = source_id >> 8; 591 struct dma_pte *pgtable; 592 593 pr_info("Dump %s table entries for IOVA 0x%llx\n", iommu->name, addr); 594 595 /* root entry dump */ 596 if (!iommu->root_entry) { 597 pr_info("root table is not present\n"); 598 return; 599 } 600 rt_entry = &iommu->root_entry[bus]; 601 602 if (sm_supported(iommu)) 603 pr_info("scalable mode root entry: hi 0x%016llx, low 0x%016llx\n", 604 rt_entry->hi, rt_entry->lo); 605 else 606 pr_info("root entry: 0x%016llx", rt_entry->lo); 607 608 /* context entry dump */ 609 ctx_entry = iommu_context_addr(iommu, bus, devfn, 0); 610 if (!ctx_entry) { 611 pr_info("context table is not present\n"); 612 return; 613 } 614 615 pr_info("context entry: hi 0x%016llx, low 0x%016llx\n", 616 ctx_entry->hi, ctx_entry->lo); 617 618 /* legacy mode does not require PASID entries */ 619 if (!sm_supported(iommu)) { 620 if (!context_present(ctx_entry)) { 621 pr_info("legacy mode page table is not present\n"); 622 return; 623 } 624 level = agaw_to_level(ctx_entry->hi & 7); 625 pgtable = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK); 626 goto pgtable_walk; 627 } 628 629 if (!context_present(ctx_entry)) { 630 pr_info("pasid directory table is not present\n"); 631 return; 632 } 633 634 /* get the pointer to pasid directory entry */ 635 dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK); 636 637 /* For request-without-pasid, get the pasid from context entry */ 638 if (intel_iommu_sm && pasid == IOMMU_PASID_INVALID) 639 pasid = IOMMU_NO_PASID; 640 641 dir_index = pasid >> PASID_PDE_SHIFT; 642 pde = &dir[dir_index]; 643 pr_info("pasid dir entry: 0x%016llx\n", pde->val); 644 645 /* get the pointer to the pasid table entry */ 646 entries = get_pasid_table_from_pde(pde); 647 if (!entries) { 648 pr_info("pasid table is not present\n"); 649 return; 650 } 651 index = pasid & PASID_PTE_MASK; 652 pte = &entries[index]; 653 for (i = 0; i < ARRAY_SIZE(pte->val); i++) 654 pr_info("pasid table entry[%d]: 0x%016llx\n", i, pte->val[i]); 655 656 if (!pasid_pte_is_present(pte)) { 657 pr_info("scalable mode page table is not present\n"); 658 return; 659 } 660 661 if (pasid_pte_get_pgtt(pte) == PASID_ENTRY_PGTT_FL_ONLY) { 662 level = pte->val[2] & BIT_ULL(2) ? 5 : 4; 663 pgtable = phys_to_virt(pte->val[2] & VTD_PAGE_MASK); 664 } else { 665 level = agaw_to_level((pte->val[0] >> 2) & 0x7); 666 pgtable = phys_to_virt(pte->val[0] & VTD_PAGE_MASK); 667 } 668 669 pgtable_walk: 670 pgtable_walk(iommu, addr >> VTD_PAGE_SHIFT, bus, devfn, pgtable, level); 671 } 672 #endif 673 674 /* iommu handling */ 675 static int iommu_alloc_root_entry(struct intel_iommu *iommu) 676 { 677 struct root_entry *root; 678 679 root = iommu_alloc_pages_node_sz(iommu->node, GFP_ATOMIC, SZ_4K); 680 if (!root) { 681 pr_err("Allocating root entry for %s failed\n", 682 iommu->name); 683 return -ENOMEM; 684 } 685 686 __iommu_flush_cache(iommu, root, ROOT_SIZE); 687 iommu->root_entry = root; 688 689 return 0; 690 } 691 692 static void iommu_set_root_entry(struct intel_iommu *iommu) 693 { 694 u64 addr; 695 u32 sts; 696 unsigned long flag; 697 698 addr = virt_to_phys(iommu->root_entry); 699 if (sm_supported(iommu)) 700 addr |= DMA_RTADDR_SMT; 701 702 raw_spin_lock_irqsave(&iommu->register_lock, flag); 703 writeq(addr, iommu->reg + DMAR_RTADDR_REG); 704 705 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG); 706 707 /* Make sure hardware complete it */ 708 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 709 readl, (sts & DMA_GSTS_RTPS), sts); 710 711 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 712 713 /* 714 * Hardware invalidates all DMA remapping hardware translation 715 * caches as part of SRTP flow. 716 */ 717 if (cap_esrtps(iommu->cap)) 718 return; 719 720 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); 721 if (sm_supported(iommu)) 722 qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0); 723 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); 724 } 725 726 void iommu_flush_write_buffer(struct intel_iommu *iommu) 727 { 728 u32 val; 729 unsigned long flag; 730 731 if (!rwbf_quirk && !cap_rwbf(iommu->cap)) 732 return; 733 734 raw_spin_lock_irqsave(&iommu->register_lock, flag); 735 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG); 736 737 /* Make sure hardware complete it */ 738 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 739 readl, (!(val & DMA_GSTS_WBFS)), val); 740 741 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 742 } 743 744 /* return value determine if we need a write buffer flush */ 745 static void __iommu_flush_context(struct intel_iommu *iommu, 746 u16 did, u16 source_id, u8 function_mask, 747 u64 type) 748 { 749 u64 val = 0; 750 unsigned long flag; 751 752 switch (type) { 753 case DMA_CCMD_GLOBAL_INVL: 754 val = DMA_CCMD_GLOBAL_INVL; 755 break; 756 case DMA_CCMD_DOMAIN_INVL: 757 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did); 758 break; 759 case DMA_CCMD_DEVICE_INVL: 760 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did) 761 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask); 762 break; 763 default: 764 pr_warn("%s: Unexpected context-cache invalidation type 0x%llx\n", 765 iommu->name, type); 766 return; 767 } 768 val |= DMA_CCMD_ICC; 769 770 raw_spin_lock_irqsave(&iommu->register_lock, flag); 771 writeq(val, iommu->reg + DMAR_CCMD_REG); 772 773 /* Make sure hardware complete it */ 774 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG, 775 readq, (!(val & DMA_CCMD_ICC)), val); 776 777 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 778 } 779 780 void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, 781 unsigned int size_order, u64 type) 782 { 783 int tlb_offset = ecap_iotlb_offset(iommu->ecap); 784 u64 val = 0, val_iva = 0; 785 unsigned long flag; 786 787 switch (type) { 788 case DMA_TLB_GLOBAL_FLUSH: 789 /* global flush doesn't need set IVA_REG */ 790 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT; 791 break; 792 case DMA_TLB_DSI_FLUSH: 793 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); 794 break; 795 case DMA_TLB_PSI_FLUSH: 796 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); 797 /* IH bit is passed in as part of address */ 798 val_iva = size_order | addr; 799 break; 800 default: 801 pr_warn("%s: Unexpected iotlb invalidation type 0x%llx\n", 802 iommu->name, type); 803 return; 804 } 805 806 if (cap_write_drain(iommu->cap)) 807 val |= DMA_TLB_WRITE_DRAIN; 808 809 raw_spin_lock_irqsave(&iommu->register_lock, flag); 810 /* Note: Only uses first TLB reg currently */ 811 if (val_iva) 812 writeq(val_iva, iommu->reg + tlb_offset); 813 writeq(val, iommu->reg + tlb_offset + 8); 814 815 /* Make sure hardware complete it */ 816 IOMMU_WAIT_OP(iommu, tlb_offset + 8, 817 readq, (!(val & DMA_TLB_IVT)), val); 818 819 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 820 821 /* check IOTLB invalidation granularity */ 822 if (DMA_TLB_IAIG(val) == 0) 823 pr_err("Flush IOTLB failed\n"); 824 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type)) 825 pr_debug("TLB flush request %Lx, actual %Lx\n", 826 (unsigned long long)DMA_TLB_IIRG(type), 827 (unsigned long long)DMA_TLB_IAIG(val)); 828 } 829 830 static struct device_domain_info * 831 domain_lookup_dev_info(struct dmar_domain *domain, 832 struct intel_iommu *iommu, u8 bus, u8 devfn) 833 { 834 struct device_domain_info *info; 835 unsigned long flags; 836 837 spin_lock_irqsave(&domain->lock, flags); 838 list_for_each_entry(info, &domain->devices, link) { 839 if (info->iommu == iommu && info->bus == bus && 840 info->devfn == devfn) { 841 spin_unlock_irqrestore(&domain->lock, flags); 842 return info; 843 } 844 } 845 spin_unlock_irqrestore(&domain->lock, flags); 846 847 return NULL; 848 } 849 850 /* 851 * The extra devTLB flush quirk impacts those QAT devices with PCI device 852 * IDs ranging from 0x4940 to 0x4943. It is exempted from risky_device() 853 * check because it applies only to the built-in QAT devices and it doesn't 854 * grant additional privileges. 855 */ 856 #define BUGGY_QAT_DEVID_MASK 0x4940 857 static bool dev_needs_extra_dtlb_flush(struct pci_dev *pdev) 858 { 859 if (pdev->vendor != PCI_VENDOR_ID_INTEL) 860 return false; 861 862 if ((pdev->device & 0xfffc) != BUGGY_QAT_DEVID_MASK) 863 return false; 864 865 return true; 866 } 867 868 static void iommu_enable_pci_ats(struct device_domain_info *info) 869 { 870 struct pci_dev *pdev; 871 872 if (!info->ats_supported) 873 return; 874 875 pdev = to_pci_dev(info->dev); 876 if (!pci_ats_page_aligned(pdev)) 877 return; 878 879 if (!pci_enable_ats(pdev, VTD_PAGE_SHIFT)) 880 info->ats_enabled = 1; 881 } 882 883 static void iommu_disable_pci_ats(struct device_domain_info *info) 884 { 885 if (!info->ats_enabled) 886 return; 887 888 pci_disable_ats(to_pci_dev(info->dev)); 889 info->ats_enabled = 0; 890 } 891 892 static void iommu_enable_pci_pri(struct device_domain_info *info) 893 { 894 struct pci_dev *pdev; 895 896 if (!info->ats_enabled || !info->pri_supported) 897 return; 898 899 pdev = to_pci_dev(info->dev); 900 /* PASID is required in PRG Response Message. */ 901 if (info->pasid_enabled && !pci_prg_resp_pasid_required(pdev)) 902 return; 903 904 if (pci_reset_pri(pdev)) 905 return; 906 907 if (!pci_enable_pri(pdev, PRQ_DEPTH)) 908 info->pri_enabled = 1; 909 } 910 911 static void iommu_disable_pci_pri(struct device_domain_info *info) 912 { 913 if (!info->pri_enabled) 914 return; 915 916 if (WARN_ON(info->iopf_refcount)) 917 iopf_queue_remove_device(info->iommu->iopf_queue, info->dev); 918 919 pci_disable_pri(to_pci_dev(info->dev)); 920 info->pri_enabled = 0; 921 } 922 923 static void intel_flush_iotlb_all(struct iommu_domain *domain) 924 { 925 cache_tag_flush_all(to_dmar_domain(domain)); 926 } 927 928 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) 929 { 930 u32 pmen; 931 unsigned long flags; 932 933 if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap)) 934 return; 935 936 raw_spin_lock_irqsave(&iommu->register_lock, flags); 937 pmen = readl(iommu->reg + DMAR_PMEN_REG); 938 pmen &= ~DMA_PMEN_EPM; 939 writel(pmen, iommu->reg + DMAR_PMEN_REG); 940 941 /* wait for the protected region status bit to clear */ 942 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG, 943 readl, !(pmen & DMA_PMEN_PRS), pmen); 944 945 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 946 } 947 948 static void iommu_enable_translation(struct intel_iommu *iommu) 949 { 950 u32 sts; 951 unsigned long flags; 952 953 raw_spin_lock_irqsave(&iommu->register_lock, flags); 954 iommu->gcmd |= DMA_GCMD_TE; 955 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); 956 957 /* Make sure hardware complete it */ 958 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 959 readl, (sts & DMA_GSTS_TES), sts); 960 961 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 962 } 963 964 static void iommu_disable_translation(struct intel_iommu *iommu) 965 { 966 u32 sts; 967 unsigned long flag; 968 969 if (iommu_skip_te_disable && iommu->drhd->gfx_dedicated && 970 (cap_read_drain(iommu->cap) || cap_write_drain(iommu->cap))) 971 return; 972 973 raw_spin_lock_irqsave(&iommu->register_lock, flag); 974 iommu->gcmd &= ~DMA_GCMD_TE; 975 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); 976 977 /* Make sure hardware complete it */ 978 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 979 readl, (!(sts & DMA_GSTS_TES)), sts); 980 981 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 982 } 983 984 static void disable_dmar_iommu(struct intel_iommu *iommu) 985 { 986 /* 987 * All iommu domains must have been detached from the devices, 988 * hence there should be no domain IDs in use. 989 */ 990 if (WARN_ON(!ida_is_empty(&iommu->domain_ida))) 991 return; 992 993 if (iommu->gcmd & DMA_GCMD_TE) 994 iommu_disable_translation(iommu); 995 } 996 997 static void free_dmar_iommu(struct intel_iommu *iommu) 998 { 999 if (iommu->copied_tables) { 1000 bitmap_free(iommu->copied_tables); 1001 iommu->copied_tables = NULL; 1002 } 1003 1004 /* free context mapping */ 1005 free_context_table(iommu); 1006 1007 if (ecap_prs(iommu->ecap)) 1008 intel_iommu_finish_prq(iommu); 1009 } 1010 1011 /* 1012 * Check and return whether first level is used by default for 1013 * DMA translation. 1014 */ 1015 static bool first_level_by_default(struct intel_iommu *iommu) 1016 { 1017 /* Only SL is available in legacy mode */ 1018 if (!sm_supported(iommu)) 1019 return false; 1020 1021 /* Only level (either FL or SL) is available, just use it */ 1022 if (ecap_flts(iommu->ecap) ^ ecap_slts(iommu->ecap)) 1023 return ecap_flts(iommu->ecap); 1024 1025 return true; 1026 } 1027 1028 int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) 1029 { 1030 struct iommu_domain_info *info, *curr; 1031 int num, ret = -ENOSPC; 1032 1033 if (domain->domain.type == IOMMU_DOMAIN_SVA) 1034 return 0; 1035 1036 info = kzalloc_obj(*info); 1037 if (!info) 1038 return -ENOMEM; 1039 1040 guard(mutex)(&iommu->did_lock); 1041 curr = xa_load(&domain->iommu_array, iommu->seq_id); 1042 if (curr) { 1043 curr->refcnt++; 1044 kfree(info); 1045 return 0; 1046 } 1047 1048 num = ida_alloc_range(&iommu->domain_ida, IDA_START_DID, 1049 cap_ndoms(iommu->cap) - 1, GFP_KERNEL); 1050 if (num < 0) { 1051 pr_err("%s: No free domain ids\n", iommu->name); 1052 goto err_unlock; 1053 } 1054 1055 info->refcnt = 1; 1056 info->did = num; 1057 info->iommu = iommu; 1058 curr = xa_cmpxchg(&domain->iommu_array, iommu->seq_id, 1059 NULL, info, GFP_KERNEL); 1060 if (curr) { 1061 ret = xa_err(curr) ? : -EBUSY; 1062 goto err_clear; 1063 } 1064 1065 return 0; 1066 1067 err_clear: 1068 ida_free(&iommu->domain_ida, info->did); 1069 err_unlock: 1070 kfree(info); 1071 return ret; 1072 } 1073 1074 void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) 1075 { 1076 struct iommu_domain_info *info; 1077 1078 if (domain->domain.type == IOMMU_DOMAIN_SVA) 1079 return; 1080 1081 guard(mutex)(&iommu->did_lock); 1082 info = xa_load(&domain->iommu_array, iommu->seq_id); 1083 if (--info->refcnt == 0) { 1084 ida_free(&iommu->domain_ida, info->did); 1085 xa_erase(&domain->iommu_array, iommu->seq_id); 1086 kfree(info); 1087 } 1088 } 1089 1090 /* 1091 * For kdump cases, old valid entries may be cached due to the 1092 * in-flight DMA and copied pgtable, but there is no unmapping 1093 * behaviour for them, thus we need an explicit cache flush for 1094 * the newly-mapped device. For kdump, at this point, the device 1095 * is supposed to finish reset at its driver probe stage, so no 1096 * in-flight DMA will exist, and we don't need to worry anymore 1097 * hereafter. 1098 */ 1099 static void copied_context_tear_down(struct intel_iommu *iommu, 1100 struct context_entry *context, 1101 u8 bus, u8 devfn) 1102 { 1103 u16 did_old; 1104 1105 if (!context_copied(iommu, bus, devfn)) 1106 return; 1107 1108 assert_spin_locked(&iommu->lock); 1109 1110 did_old = context_domain_id(context); 1111 context_clear_entry(context); 1112 1113 if (did_old < cap_ndoms(iommu->cap)) { 1114 iommu->flush.flush_context(iommu, did_old, 1115 PCI_DEVID(bus, devfn), 1116 DMA_CCMD_MASK_NOBIT, 1117 DMA_CCMD_DEVICE_INVL); 1118 iommu->flush.flush_iotlb(iommu, did_old, 0, 0, 1119 DMA_TLB_DSI_FLUSH); 1120 } 1121 1122 clear_context_copied(iommu, bus, devfn); 1123 } 1124 1125 /* 1126 * It's a non-present to present mapping. If hardware doesn't cache 1127 * non-present entry we only need to flush the write-buffer. If the 1128 * _does_ cache non-present entries, then it does so in the special 1129 * domain #0, which we have to flush: 1130 */ 1131 static void context_present_cache_flush(struct intel_iommu *iommu, u16 did, 1132 u8 bus, u8 devfn) 1133 { 1134 if (cap_caching_mode(iommu->cap)) { 1135 iommu->flush.flush_context(iommu, 0, 1136 PCI_DEVID(bus, devfn), 1137 DMA_CCMD_MASK_NOBIT, 1138 DMA_CCMD_DEVICE_INVL); 1139 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); 1140 } else { 1141 iommu_flush_write_buffer(iommu); 1142 } 1143 } 1144 1145 static int domain_context_mapping_one(struct dmar_domain *domain, 1146 struct intel_iommu *iommu, 1147 u8 bus, u8 devfn) 1148 { 1149 struct device_domain_info *info = 1150 domain_lookup_dev_info(domain, iommu, bus, devfn); 1151 u16 did = domain_id_iommu(domain, iommu); 1152 int translation = CONTEXT_TT_MULTI_LEVEL; 1153 struct pt_iommu_vtdss_hw_info pt_info; 1154 struct context_entry *context; 1155 int ret; 1156 1157 if (WARN_ON(!intel_domain_is_ss_paging(domain))) 1158 return -EINVAL; 1159 1160 pt_iommu_vtdss_hw_info(&domain->sspt, &pt_info); 1161 1162 pr_debug("Set context mapping for %02x:%02x.%d\n", 1163 bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 1164 1165 spin_lock(&iommu->lock); 1166 ret = -ENOMEM; 1167 context = iommu_context_addr(iommu, bus, devfn, 1); 1168 if (!context) 1169 goto out_unlock; 1170 1171 ret = 0; 1172 if (context_present(context) && !context_copied(iommu, bus, devfn)) 1173 goto out_unlock; 1174 1175 copied_context_tear_down(iommu, context, bus, devfn); 1176 context_clear_entry(context); 1177 context_set_domain_id(context, did); 1178 1179 if (info && info->ats_supported) 1180 translation = CONTEXT_TT_DEV_IOTLB; 1181 else 1182 translation = CONTEXT_TT_MULTI_LEVEL; 1183 1184 context_set_address_root(context, pt_info.ssptptr); 1185 context_set_address_width(context, pt_info.aw); 1186 context_set_translation_type(context, translation); 1187 context_set_fault_enable(context); 1188 context_set_present(context); 1189 if (!ecap_coherent(iommu->ecap)) 1190 clflush_cache_range(context, sizeof(*context)); 1191 context_present_cache_flush(iommu, did, bus, devfn); 1192 ret = 0; 1193 1194 out_unlock: 1195 spin_unlock(&iommu->lock); 1196 1197 return ret; 1198 } 1199 1200 static int domain_context_mapping_cb(struct pci_dev *pdev, 1201 u16 alias, void *opaque) 1202 { 1203 struct device_domain_info *info = dev_iommu_priv_get(&pdev->dev); 1204 struct intel_iommu *iommu = info->iommu; 1205 struct dmar_domain *domain = opaque; 1206 1207 return domain_context_mapping_one(domain, iommu, 1208 PCI_BUS_NUM(alias), alias & 0xff); 1209 } 1210 1211 static int 1212 domain_context_mapping(struct dmar_domain *domain, struct device *dev) 1213 { 1214 struct device_domain_info *info = dev_iommu_priv_get(dev); 1215 struct intel_iommu *iommu = info->iommu; 1216 u8 bus = info->bus, devfn = info->devfn; 1217 int ret; 1218 1219 if (!dev_is_pci(dev)) 1220 return domain_context_mapping_one(domain, iommu, bus, devfn); 1221 1222 ret = pci_for_each_dma_alias(to_pci_dev(dev), 1223 domain_context_mapping_cb, domain); 1224 if (ret) 1225 return ret; 1226 1227 iommu_enable_pci_ats(info); 1228 1229 return 0; 1230 } 1231 1232 static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 devfn) 1233 { 1234 struct intel_iommu *iommu = info->iommu; 1235 struct context_entry *context; 1236 u16 did; 1237 1238 spin_lock(&iommu->lock); 1239 context = iommu_context_addr(iommu, bus, devfn, 0); 1240 if (!context) { 1241 spin_unlock(&iommu->lock); 1242 return; 1243 } 1244 1245 did = context_domain_id(context); 1246 context_clear_present(context); 1247 __iommu_flush_cache(iommu, context, sizeof(*context)); 1248 spin_unlock(&iommu->lock); 1249 intel_context_flush_no_pasid(info, context, did); 1250 context_clear_entry(context); 1251 __iommu_flush_cache(iommu, context, sizeof(*context)); 1252 } 1253 1254 int __domain_setup_first_level(struct intel_iommu *iommu, struct device *dev, 1255 ioasid_t pasid, u16 did, phys_addr_t fsptptr, 1256 int flags, struct iommu_domain *old) 1257 { 1258 if (old) 1259 intel_pasid_tear_down_entry(iommu, dev, pasid, false); 1260 1261 return intel_pasid_setup_first_level(iommu, dev, fsptptr, pasid, did, flags); 1262 } 1263 1264 static int domain_setup_second_level(struct intel_iommu *iommu, 1265 struct dmar_domain *domain, 1266 struct device *dev, ioasid_t pasid, 1267 struct iommu_domain *old) 1268 { 1269 if (old) 1270 intel_pasid_tear_down_entry(iommu, dev, pasid, false); 1271 1272 return intel_pasid_setup_second_level(iommu, domain, dev, pasid); 1273 } 1274 1275 static int domain_setup_passthrough(struct intel_iommu *iommu, 1276 struct device *dev, ioasid_t pasid, 1277 struct iommu_domain *old) 1278 { 1279 if (old) 1280 intel_pasid_tear_down_entry(iommu, dev, pasid, false); 1281 1282 return intel_pasid_setup_pass_through(iommu, dev, pasid); 1283 } 1284 1285 static int domain_setup_first_level(struct intel_iommu *iommu, 1286 struct dmar_domain *domain, 1287 struct device *dev, 1288 u32 pasid, struct iommu_domain *old) 1289 { 1290 struct pt_iommu_x86_64_hw_info pt_info; 1291 unsigned int flags = 0; 1292 1293 pt_iommu_x86_64_hw_info(&domain->fspt, &pt_info); 1294 if (WARN_ON(pt_info.levels != 4 && pt_info.levels != 5)) 1295 return -EINVAL; 1296 1297 if (pt_info.levels == 5) 1298 flags |= PASID_FLAG_FL5LP; 1299 1300 if (domain->force_snooping) 1301 flags |= PASID_FLAG_PAGE_SNOOP; 1302 1303 if (!(domain->fspt.x86_64_pt.common.features & 1304 BIT(PT_FEAT_DMA_INCOHERENT))) 1305 flags |= PASID_FLAG_PWSNP; 1306 1307 return __domain_setup_first_level(iommu, dev, pasid, 1308 domain_id_iommu(domain, iommu), 1309 pt_info.gcr3_pt, flags, old); 1310 } 1311 1312 static int dmar_domain_attach_device(struct dmar_domain *domain, 1313 struct device *dev) 1314 { 1315 struct device_domain_info *info = dev_iommu_priv_get(dev); 1316 struct intel_iommu *iommu = info->iommu; 1317 unsigned long flags; 1318 int ret; 1319 1320 ret = domain_attach_iommu(domain, iommu); 1321 if (ret) 1322 return ret; 1323 1324 info->domain = domain; 1325 info->domain_attached = true; 1326 spin_lock_irqsave(&domain->lock, flags); 1327 list_add(&info->link, &domain->devices); 1328 spin_unlock_irqrestore(&domain->lock, flags); 1329 1330 if (dev_is_real_dma_subdevice(dev)) 1331 return 0; 1332 1333 if (!sm_supported(iommu)) 1334 ret = domain_context_mapping(domain, dev); 1335 else if (intel_domain_is_fs_paging(domain)) 1336 ret = domain_setup_first_level(iommu, domain, dev, 1337 IOMMU_NO_PASID, NULL); 1338 else if (intel_domain_is_ss_paging(domain)) 1339 ret = domain_setup_second_level(iommu, domain, dev, 1340 IOMMU_NO_PASID, NULL); 1341 else if (WARN_ON(true)) 1342 ret = -EINVAL; 1343 1344 if (ret) 1345 goto out_block_translation; 1346 1347 ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID); 1348 if (ret) 1349 goto out_block_translation; 1350 1351 return 0; 1352 1353 out_block_translation: 1354 device_block_translation(dev); 1355 return ret; 1356 } 1357 1358 /** 1359 * device_rmrr_is_relaxable - Test whether the RMRR of this device 1360 * is relaxable (ie. is allowed to be not enforced under some conditions) 1361 * @dev: device handle 1362 * 1363 * We assume that PCI USB devices with RMRRs have them largely 1364 * for historical reasons and that the RMRR space is not actively used post 1365 * boot. This exclusion may change if vendors begin to abuse it. 1366 * 1367 * The same exception is made for graphics devices, with the requirement that 1368 * any use of the RMRR regions will be torn down before assigning the device 1369 * to a guest. 1370 * 1371 * Return: true if the RMRR is relaxable, false otherwise 1372 */ 1373 static bool device_rmrr_is_relaxable(struct device *dev) 1374 { 1375 struct pci_dev *pdev; 1376 1377 if (!dev_is_pci(dev)) 1378 return false; 1379 1380 pdev = to_pci_dev(dev); 1381 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev)) 1382 return true; 1383 else 1384 return false; 1385 } 1386 1387 static int device_def_domain_type(struct device *dev) 1388 { 1389 struct device_domain_info *info = dev_iommu_priv_get(dev); 1390 struct intel_iommu *iommu = info->iommu; 1391 1392 /* 1393 * Hardware does not support the passthrough translation mode. 1394 * Always use a dynamaic mapping domain. 1395 */ 1396 if (!ecap_pass_through(iommu->ecap)) 1397 return IOMMU_DOMAIN_DMA; 1398 1399 if (dev_is_pci(dev)) { 1400 struct pci_dev *pdev = to_pci_dev(dev); 1401 1402 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) 1403 return IOMMU_DOMAIN_IDENTITY; 1404 } 1405 1406 return 0; 1407 } 1408 1409 static void intel_iommu_init_qi(struct intel_iommu *iommu) 1410 { 1411 /* 1412 * Start from the sane iommu hardware state. 1413 * If the queued invalidation is already initialized by us 1414 * (for example, while enabling interrupt-remapping) then 1415 * we got the things already rolling from a sane state. 1416 */ 1417 if (!iommu->qi) { 1418 /* 1419 * Clear any previous faults. 1420 */ 1421 dmar_fault(-1, iommu); 1422 /* 1423 * Disable queued invalidation if supported and already enabled 1424 * before OS handover. 1425 */ 1426 dmar_disable_qi(iommu); 1427 } 1428 1429 if (dmar_enable_qi(iommu)) { 1430 /* 1431 * Queued Invalidate not enabled, use Register Based Invalidate 1432 */ 1433 iommu->flush.flush_context = __iommu_flush_context; 1434 iommu->flush.flush_iotlb = __iommu_flush_iotlb; 1435 pr_info("%s: Using Register based invalidation\n", 1436 iommu->name); 1437 } else { 1438 iommu->flush.flush_context = qi_flush_context; 1439 iommu->flush.flush_iotlb = qi_flush_iotlb; 1440 pr_info("%s: Using Queued invalidation\n", iommu->name); 1441 } 1442 } 1443 1444 static int copy_context_table(struct intel_iommu *iommu, 1445 struct root_entry *old_re, 1446 struct context_entry **tbl, 1447 int bus, bool ext) 1448 { 1449 int tbl_idx, pos = 0, idx, devfn, ret = 0, did; 1450 struct context_entry *new_ce = NULL, ce; 1451 struct context_entry *old_ce = NULL; 1452 struct root_entry re; 1453 phys_addr_t old_ce_phys; 1454 1455 tbl_idx = ext ? bus * 2 : bus; 1456 memcpy(&re, old_re, sizeof(re)); 1457 1458 for (devfn = 0; devfn < 256; devfn++) { 1459 /* First calculate the correct index */ 1460 idx = (ext ? devfn * 2 : devfn) % 256; 1461 1462 if (idx == 0) { 1463 /* First save what we may have and clean up */ 1464 if (new_ce) { 1465 tbl[tbl_idx] = new_ce; 1466 __iommu_flush_cache(iommu, new_ce, 1467 VTD_PAGE_SIZE); 1468 pos = 1; 1469 } 1470 1471 if (old_ce) 1472 memunmap(old_ce); 1473 1474 ret = 0; 1475 if (devfn < 0x80) 1476 old_ce_phys = root_entry_lctp(&re); 1477 else 1478 old_ce_phys = root_entry_uctp(&re); 1479 1480 if (!old_ce_phys) { 1481 if (ext && devfn == 0) { 1482 /* No LCTP, try UCTP */ 1483 devfn = 0x7f; 1484 continue; 1485 } else { 1486 goto out; 1487 } 1488 } 1489 1490 ret = -ENOMEM; 1491 old_ce = memremap(old_ce_phys, PAGE_SIZE, 1492 MEMREMAP_WB); 1493 if (!old_ce) 1494 goto out; 1495 1496 new_ce = iommu_alloc_pages_node_sz(iommu->node, 1497 GFP_KERNEL, SZ_4K); 1498 if (!new_ce) 1499 goto out_unmap; 1500 1501 ret = 0; 1502 } 1503 1504 /* Now copy the context entry */ 1505 memcpy(&ce, old_ce + idx, sizeof(ce)); 1506 1507 if (!context_present(&ce)) 1508 continue; 1509 1510 did = context_domain_id(&ce); 1511 if (did >= 0 && did < cap_ndoms(iommu->cap)) 1512 ida_alloc_range(&iommu->domain_ida, did, did, GFP_KERNEL); 1513 1514 set_context_copied(iommu, bus, devfn); 1515 new_ce[idx] = ce; 1516 } 1517 1518 tbl[tbl_idx + pos] = new_ce; 1519 1520 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE); 1521 1522 out_unmap: 1523 memunmap(old_ce); 1524 1525 out: 1526 return ret; 1527 } 1528 1529 static int copy_translation_tables(struct intel_iommu *iommu) 1530 { 1531 struct context_entry **ctxt_tbls; 1532 struct root_entry *old_rt; 1533 phys_addr_t old_rt_phys; 1534 int ctxt_table_entries; 1535 u64 rtaddr_reg; 1536 int bus, ret; 1537 bool new_ext, ext; 1538 1539 rtaddr_reg = readq(iommu->reg + DMAR_RTADDR_REG); 1540 ext = !!(rtaddr_reg & DMA_RTADDR_SMT); 1541 new_ext = !!sm_supported(iommu); 1542 1543 /* 1544 * The RTT bit can only be changed when translation is disabled, 1545 * but disabling translation means to open a window for data 1546 * corruption. So bail out and don't copy anything if we would 1547 * have to change the bit. 1548 */ 1549 if (new_ext != ext) 1550 return -EINVAL; 1551 1552 iommu->copied_tables = bitmap_zalloc(BIT_ULL(16), GFP_KERNEL); 1553 if (!iommu->copied_tables) 1554 return -ENOMEM; 1555 1556 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK; 1557 if (!old_rt_phys) 1558 return -EINVAL; 1559 1560 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB); 1561 if (!old_rt) 1562 return -ENOMEM; 1563 1564 /* This is too big for the stack - allocate it from slab */ 1565 ctxt_table_entries = ext ? 512 : 256; 1566 ret = -ENOMEM; 1567 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL); 1568 if (!ctxt_tbls) 1569 goto out_unmap; 1570 1571 for (bus = 0; bus < 256; bus++) { 1572 ret = copy_context_table(iommu, &old_rt[bus], 1573 ctxt_tbls, bus, ext); 1574 if (ret) { 1575 pr_err("%s: Failed to copy context table for bus %d\n", 1576 iommu->name, bus); 1577 continue; 1578 } 1579 } 1580 1581 spin_lock(&iommu->lock); 1582 1583 /* Context tables are copied, now write them to the root_entry table */ 1584 for (bus = 0; bus < 256; bus++) { 1585 int idx = ext ? bus * 2 : bus; 1586 u64 val; 1587 1588 if (ctxt_tbls[idx]) { 1589 val = virt_to_phys(ctxt_tbls[idx]) | 1; 1590 iommu->root_entry[bus].lo = val; 1591 } 1592 1593 if (!ext || !ctxt_tbls[idx + 1]) 1594 continue; 1595 1596 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1; 1597 iommu->root_entry[bus].hi = val; 1598 } 1599 1600 spin_unlock(&iommu->lock); 1601 1602 kfree(ctxt_tbls); 1603 1604 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE); 1605 1606 ret = 0; 1607 1608 out_unmap: 1609 memunmap(old_rt); 1610 1611 return ret; 1612 } 1613 1614 static int __init init_dmars(void) 1615 { 1616 struct dmar_drhd_unit *drhd; 1617 struct intel_iommu *iommu; 1618 int ret; 1619 1620 for_each_iommu(iommu, drhd) { 1621 if (drhd->ignored) { 1622 iommu_disable_translation(iommu); 1623 continue; 1624 } 1625 1626 /* 1627 * Find the max pasid size of all IOMMU's in the system. 1628 * We need to ensure the system pasid table is no bigger 1629 * than the smallest supported. 1630 */ 1631 if (pasid_supported(iommu)) { 1632 u32 temp = 2 << ecap_pss(iommu->ecap); 1633 1634 intel_pasid_max_id = min_t(u32, temp, 1635 intel_pasid_max_id); 1636 } 1637 1638 intel_iommu_init_qi(iommu); 1639 init_translation_status(iommu); 1640 1641 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) { 1642 iommu_disable_translation(iommu); 1643 clear_translation_pre_enabled(iommu); 1644 pr_warn("Translation was enabled for %s but we are not in kdump mode\n", 1645 iommu->name); 1646 } 1647 1648 /* 1649 * TBD: 1650 * we could share the same root & context tables 1651 * among all IOMMU's. Need to Split it later. 1652 */ 1653 ret = iommu_alloc_root_entry(iommu); 1654 if (ret) 1655 goto free_iommu; 1656 1657 if (translation_pre_enabled(iommu)) { 1658 pr_info("Translation already enabled - trying to copy translation structures\n"); 1659 1660 ret = copy_translation_tables(iommu); 1661 if (ret) { 1662 /* 1663 * We found the IOMMU with translation 1664 * enabled - but failed to copy over the 1665 * old root-entry table. Try to proceed 1666 * by disabling translation now and 1667 * allocating a clean root-entry table. 1668 * This might cause DMAR faults, but 1669 * probably the dump will still succeed. 1670 */ 1671 pr_err("Failed to copy translation tables from previous kernel for %s\n", 1672 iommu->name); 1673 iommu_disable_translation(iommu); 1674 clear_translation_pre_enabled(iommu); 1675 } else { 1676 pr_info("Copied translation tables from previous kernel for %s\n", 1677 iommu->name); 1678 } 1679 } 1680 1681 intel_svm_check(iommu); 1682 } 1683 1684 /* 1685 * Now that qi is enabled on all iommus, set the root entry and flush 1686 * caches. This is required on some Intel X58 chipsets, otherwise the 1687 * flush_context function will loop forever and the boot hangs. 1688 */ 1689 for_each_active_iommu(iommu, drhd) { 1690 iommu_flush_write_buffer(iommu); 1691 iommu_set_root_entry(iommu); 1692 } 1693 1694 check_tylersburg_isoch(); 1695 1696 /* 1697 * for each drhd 1698 * enable fault log 1699 * global invalidate context cache 1700 * global invalidate iotlb 1701 * enable translation 1702 */ 1703 for_each_iommu(iommu, drhd) { 1704 if (drhd->ignored) { 1705 /* 1706 * we always have to disable PMRs or DMA may fail on 1707 * this device 1708 */ 1709 if (force_on) 1710 iommu_disable_protect_mem_regions(iommu); 1711 continue; 1712 } 1713 1714 iommu_flush_write_buffer(iommu); 1715 1716 if (ecap_prs(iommu->ecap)) { 1717 /* 1718 * Call dmar_alloc_hwirq() with dmar_global_lock held, 1719 * could cause possible lock race condition. 1720 */ 1721 up_write(&dmar_global_lock); 1722 ret = intel_iommu_enable_prq(iommu); 1723 down_write(&dmar_global_lock); 1724 if (ret) 1725 goto free_iommu; 1726 } 1727 1728 ret = dmar_set_interrupt(iommu); 1729 if (ret) 1730 goto free_iommu; 1731 } 1732 1733 return 0; 1734 1735 free_iommu: 1736 for_each_active_iommu(iommu, drhd) { 1737 disable_dmar_iommu(iommu); 1738 free_dmar_iommu(iommu); 1739 } 1740 1741 return ret; 1742 } 1743 1744 static void __init init_no_remapping_devices(void) 1745 { 1746 struct dmar_drhd_unit *drhd; 1747 struct device *dev; 1748 int i; 1749 1750 for_each_drhd_unit(drhd) { 1751 if (!drhd->include_all) { 1752 for_each_active_dev_scope(drhd->devices, 1753 drhd->devices_cnt, i, dev) 1754 break; 1755 /* ignore DMAR unit if no devices exist */ 1756 if (i == drhd->devices_cnt) 1757 drhd->ignored = 1; 1758 } 1759 } 1760 1761 for_each_active_drhd_unit(drhd) { 1762 if (drhd->include_all) 1763 continue; 1764 1765 for_each_active_dev_scope(drhd->devices, 1766 drhd->devices_cnt, i, dev) 1767 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev))) 1768 break; 1769 if (i < drhd->devices_cnt) 1770 continue; 1771 1772 /* This IOMMU has *only* gfx devices. Either bypass it or 1773 set the gfx_mapped flag, as appropriate */ 1774 drhd->gfx_dedicated = 1; 1775 if (disable_igfx_iommu) 1776 drhd->ignored = 1; 1777 } 1778 } 1779 1780 #ifdef CONFIG_SUSPEND 1781 static int init_iommu_hw(void) 1782 { 1783 struct dmar_drhd_unit *drhd; 1784 struct intel_iommu *iommu = NULL; 1785 int ret; 1786 1787 for_each_active_iommu(iommu, drhd) { 1788 if (iommu->qi) { 1789 ret = dmar_reenable_qi(iommu); 1790 if (ret) 1791 return ret; 1792 } 1793 } 1794 1795 for_each_iommu(iommu, drhd) { 1796 if (drhd->ignored) { 1797 /* 1798 * we always have to disable PMRs or DMA may fail on 1799 * this device 1800 */ 1801 if (force_on) 1802 iommu_disable_protect_mem_regions(iommu); 1803 continue; 1804 } 1805 1806 iommu_flush_write_buffer(iommu); 1807 iommu_set_root_entry(iommu); 1808 iommu_enable_translation(iommu); 1809 iommu_disable_protect_mem_regions(iommu); 1810 } 1811 1812 return 0; 1813 } 1814 1815 static void iommu_flush_all(void) 1816 { 1817 struct dmar_drhd_unit *drhd; 1818 struct intel_iommu *iommu; 1819 1820 for_each_active_iommu(iommu, drhd) { 1821 iommu->flush.flush_context(iommu, 0, 0, 0, 1822 DMA_CCMD_GLOBAL_INVL); 1823 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 1824 DMA_TLB_GLOBAL_FLUSH); 1825 } 1826 } 1827 1828 static int iommu_suspend(void *data) 1829 { 1830 struct dmar_drhd_unit *drhd; 1831 struct intel_iommu *iommu = NULL; 1832 unsigned long flag; 1833 1834 iommu_flush_all(); 1835 1836 for_each_active_iommu(iommu, drhd) { 1837 iommu_disable_translation(iommu); 1838 1839 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1840 1841 iommu->iommu_state[SR_DMAR_FECTL_REG] = 1842 readl(iommu->reg + DMAR_FECTL_REG); 1843 iommu->iommu_state[SR_DMAR_FEDATA_REG] = 1844 readl(iommu->reg + DMAR_FEDATA_REG); 1845 iommu->iommu_state[SR_DMAR_FEADDR_REG] = 1846 readl(iommu->reg + DMAR_FEADDR_REG); 1847 iommu->iommu_state[SR_DMAR_FEUADDR_REG] = 1848 readl(iommu->reg + DMAR_FEUADDR_REG); 1849 1850 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1851 } 1852 return 0; 1853 } 1854 1855 static void iommu_resume(void *data) 1856 { 1857 struct dmar_drhd_unit *drhd; 1858 struct intel_iommu *iommu = NULL; 1859 unsigned long flag; 1860 1861 if (init_iommu_hw()) { 1862 if (force_on) 1863 panic("tboot: IOMMU setup failed, DMAR can not resume!\n"); 1864 else 1865 WARN(1, "IOMMU setup failed, DMAR can not resume!\n"); 1866 return; 1867 } 1868 1869 for_each_active_iommu(iommu, drhd) { 1870 1871 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1872 1873 writel(iommu->iommu_state[SR_DMAR_FECTL_REG], 1874 iommu->reg + DMAR_FECTL_REG); 1875 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG], 1876 iommu->reg + DMAR_FEDATA_REG); 1877 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG], 1878 iommu->reg + DMAR_FEADDR_REG); 1879 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG], 1880 iommu->reg + DMAR_FEUADDR_REG); 1881 1882 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1883 } 1884 } 1885 1886 static const struct syscore_ops iommu_syscore_ops = { 1887 .resume = iommu_resume, 1888 .suspend = iommu_suspend, 1889 }; 1890 1891 static struct syscore iommu_syscore = { 1892 .ops = &iommu_syscore_ops, 1893 }; 1894 1895 static void __init init_iommu_pm_ops(void) 1896 { 1897 register_syscore(&iommu_syscore); 1898 } 1899 1900 #else 1901 static inline void init_iommu_pm_ops(void) {} 1902 #endif /* CONFIG_PM */ 1903 1904 static int __init rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr) 1905 { 1906 if (!IS_ALIGNED(rmrr->base_address, PAGE_SIZE) || 1907 !IS_ALIGNED(rmrr->end_address + 1, PAGE_SIZE) || 1908 rmrr->end_address <= rmrr->base_address || 1909 arch_rmrr_sanity_check(rmrr)) 1910 return -EINVAL; 1911 1912 return 0; 1913 } 1914 1915 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg) 1916 { 1917 struct acpi_dmar_reserved_memory *rmrr; 1918 struct dmar_rmrr_unit *rmrru; 1919 1920 rmrr = (struct acpi_dmar_reserved_memory *)header; 1921 if (rmrr_sanity_check(rmrr)) { 1922 pr_warn(FW_BUG 1923 "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n" 1924 "BIOS vendor: %s; Ver: %s; Product Version: %s\n", 1925 rmrr->base_address, rmrr->end_address, 1926 dmi_get_system_info(DMI_BIOS_VENDOR), 1927 dmi_get_system_info(DMI_BIOS_VERSION), 1928 dmi_get_system_info(DMI_PRODUCT_VERSION)); 1929 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); 1930 } 1931 1932 rmrru = kzalloc_obj(*rmrru); 1933 if (!rmrru) 1934 goto out; 1935 1936 rmrru->hdr = header; 1937 1938 rmrru->base_address = rmrr->base_address; 1939 rmrru->end_address = rmrr->end_address; 1940 1941 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1), 1942 ((void *)rmrr) + rmrr->header.length, 1943 &rmrru->devices_cnt); 1944 if (rmrru->devices_cnt && rmrru->devices == NULL) 1945 goto free_rmrru; 1946 1947 list_add(&rmrru->list, &dmar_rmrr_units); 1948 1949 return 0; 1950 free_rmrru: 1951 kfree(rmrru); 1952 out: 1953 return -ENOMEM; 1954 } 1955 1956 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr) 1957 { 1958 struct dmar_atsr_unit *atsru; 1959 struct acpi_dmar_atsr *tmp; 1960 1961 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list, 1962 dmar_rcu_check()) { 1963 tmp = (struct acpi_dmar_atsr *)atsru->hdr; 1964 if (atsr->segment != tmp->segment) 1965 continue; 1966 if (atsr->header.length != tmp->header.length) 1967 continue; 1968 if (memcmp(atsr, tmp, atsr->header.length) == 0) 1969 return atsru; 1970 } 1971 1972 return NULL; 1973 } 1974 1975 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg) 1976 { 1977 struct acpi_dmar_atsr *atsr; 1978 struct dmar_atsr_unit *atsru; 1979 1980 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled) 1981 return 0; 1982 1983 atsr = container_of(hdr, struct acpi_dmar_atsr, header); 1984 atsru = dmar_find_atsr(atsr); 1985 if (atsru) 1986 return 0; 1987 1988 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL); 1989 if (!atsru) 1990 return -ENOMEM; 1991 1992 /* 1993 * If memory is allocated from slab by ACPI _DSM method, we need to 1994 * copy the memory content because the memory buffer will be freed 1995 * on return. 1996 */ 1997 atsru->hdr = (void *)(atsru + 1); 1998 memcpy(atsru->hdr, hdr, hdr->length); 1999 atsru->include_all = atsr->flags & 0x1; 2000 if (!atsru->include_all) { 2001 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1), 2002 (void *)atsr + atsr->header.length, 2003 &atsru->devices_cnt); 2004 if (atsru->devices_cnt && atsru->devices == NULL) { 2005 kfree(atsru); 2006 return -ENOMEM; 2007 } 2008 } 2009 2010 list_add_rcu(&atsru->list, &dmar_atsr_units); 2011 2012 return 0; 2013 } 2014 2015 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru) 2016 { 2017 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt); 2018 kfree(atsru); 2019 } 2020 2021 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg) 2022 { 2023 struct acpi_dmar_atsr *atsr; 2024 struct dmar_atsr_unit *atsru; 2025 2026 atsr = container_of(hdr, struct acpi_dmar_atsr, header); 2027 atsru = dmar_find_atsr(atsr); 2028 if (atsru) { 2029 list_del_rcu(&atsru->list); 2030 synchronize_rcu(); 2031 intel_iommu_free_atsr(atsru); 2032 } 2033 2034 return 0; 2035 } 2036 2037 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg) 2038 { 2039 int i; 2040 struct device *dev; 2041 struct acpi_dmar_atsr *atsr; 2042 struct dmar_atsr_unit *atsru; 2043 2044 atsr = container_of(hdr, struct acpi_dmar_atsr, header); 2045 atsru = dmar_find_atsr(atsr); 2046 if (!atsru) 2047 return 0; 2048 2049 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) { 2050 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt, 2051 i, dev) 2052 return -EBUSY; 2053 } 2054 2055 return 0; 2056 } 2057 2058 static struct dmar_satc_unit *dmar_find_satc(struct acpi_dmar_satc *satc) 2059 { 2060 struct dmar_satc_unit *satcu; 2061 struct acpi_dmar_satc *tmp; 2062 2063 list_for_each_entry_rcu(satcu, &dmar_satc_units, list, 2064 dmar_rcu_check()) { 2065 tmp = (struct acpi_dmar_satc *)satcu->hdr; 2066 if (satc->segment != tmp->segment) 2067 continue; 2068 if (satc->header.length != tmp->header.length) 2069 continue; 2070 if (memcmp(satc, tmp, satc->header.length) == 0) 2071 return satcu; 2072 } 2073 2074 return NULL; 2075 } 2076 2077 int dmar_parse_one_satc(struct acpi_dmar_header *hdr, void *arg) 2078 { 2079 struct acpi_dmar_satc *satc; 2080 struct dmar_satc_unit *satcu; 2081 2082 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled) 2083 return 0; 2084 2085 satc = container_of(hdr, struct acpi_dmar_satc, header); 2086 satcu = dmar_find_satc(satc); 2087 if (satcu) 2088 return 0; 2089 2090 satcu = kzalloc(sizeof(*satcu) + hdr->length, GFP_KERNEL); 2091 if (!satcu) 2092 return -ENOMEM; 2093 2094 satcu->hdr = (void *)(satcu + 1); 2095 memcpy(satcu->hdr, hdr, hdr->length); 2096 satcu->atc_required = satc->flags & 0x1; 2097 satcu->devices = dmar_alloc_dev_scope((void *)(satc + 1), 2098 (void *)satc + satc->header.length, 2099 &satcu->devices_cnt); 2100 if (satcu->devices_cnt && !satcu->devices) { 2101 kfree(satcu); 2102 return -ENOMEM; 2103 } 2104 list_add_rcu(&satcu->list, &dmar_satc_units); 2105 2106 return 0; 2107 } 2108 2109 static int intel_iommu_add(struct dmar_drhd_unit *dmaru) 2110 { 2111 struct intel_iommu *iommu = dmaru->iommu; 2112 int ret; 2113 2114 /* 2115 * Disable translation if already enabled prior to OS handover. 2116 */ 2117 if (iommu->gcmd & DMA_GCMD_TE) 2118 iommu_disable_translation(iommu); 2119 2120 ret = iommu_alloc_root_entry(iommu); 2121 if (ret) 2122 goto out; 2123 2124 intel_svm_check(iommu); 2125 2126 if (dmaru->ignored) { 2127 /* 2128 * we always have to disable PMRs or DMA may fail on this device 2129 */ 2130 if (force_on) 2131 iommu_disable_protect_mem_regions(iommu); 2132 return 0; 2133 } 2134 2135 intel_iommu_init_qi(iommu); 2136 iommu_flush_write_buffer(iommu); 2137 2138 if (ecap_prs(iommu->ecap)) { 2139 ret = intel_iommu_enable_prq(iommu); 2140 if (ret) 2141 goto disable_iommu; 2142 } 2143 2144 ret = dmar_set_interrupt(iommu); 2145 if (ret) 2146 goto disable_iommu; 2147 2148 iommu_set_root_entry(iommu); 2149 iommu_enable_translation(iommu); 2150 2151 iommu_disable_protect_mem_regions(iommu); 2152 return 0; 2153 2154 disable_iommu: 2155 disable_dmar_iommu(iommu); 2156 out: 2157 free_dmar_iommu(iommu); 2158 return ret; 2159 } 2160 2161 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert) 2162 { 2163 int ret = 0; 2164 struct intel_iommu *iommu = dmaru->iommu; 2165 2166 if (!intel_iommu_enabled) 2167 return 0; 2168 if (iommu == NULL) 2169 return -EINVAL; 2170 2171 if (insert) { 2172 ret = intel_iommu_add(dmaru); 2173 } else { 2174 disable_dmar_iommu(iommu); 2175 free_dmar_iommu(iommu); 2176 } 2177 2178 return ret; 2179 } 2180 2181 static void intel_iommu_free_dmars(void) 2182 { 2183 struct dmar_rmrr_unit *rmrru, *rmrr_n; 2184 struct dmar_atsr_unit *atsru, *atsr_n; 2185 struct dmar_satc_unit *satcu, *satc_n; 2186 2187 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) { 2188 list_del(&rmrru->list); 2189 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt); 2190 kfree(rmrru); 2191 } 2192 2193 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) { 2194 list_del(&atsru->list); 2195 intel_iommu_free_atsr(atsru); 2196 } 2197 list_for_each_entry_safe(satcu, satc_n, &dmar_satc_units, list) { 2198 list_del(&satcu->list); 2199 dmar_free_dev_scope(&satcu->devices, &satcu->devices_cnt); 2200 kfree(satcu); 2201 } 2202 } 2203 2204 static struct dmar_satc_unit *dmar_find_matched_satc_unit(struct pci_dev *dev) 2205 { 2206 struct dmar_satc_unit *satcu; 2207 struct acpi_dmar_satc *satc; 2208 struct device *tmp; 2209 int i; 2210 2211 rcu_read_lock(); 2212 2213 list_for_each_entry_rcu(satcu, &dmar_satc_units, list) { 2214 satc = container_of(satcu->hdr, struct acpi_dmar_satc, header); 2215 if (satc->segment != pci_domain_nr(dev->bus)) 2216 continue; 2217 for_each_dev_scope(satcu->devices, satcu->devices_cnt, i, tmp) 2218 if (to_pci_dev(tmp) == dev) 2219 goto out; 2220 } 2221 satcu = NULL; 2222 out: 2223 rcu_read_unlock(); 2224 return satcu; 2225 } 2226 2227 static bool dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu) 2228 { 2229 struct pci_dev *bridge = NULL; 2230 struct dmar_atsr_unit *atsru; 2231 struct dmar_satc_unit *satcu; 2232 struct acpi_dmar_atsr *atsr; 2233 bool supported = true; 2234 struct pci_bus *bus; 2235 struct device *tmp; 2236 int i; 2237 2238 dev = pci_physfn(dev); 2239 satcu = dmar_find_matched_satc_unit(dev); 2240 if (satcu) 2241 /* 2242 * This device supports ATS as it is in SATC table. 2243 * When IOMMU is in legacy mode, enabling ATS is done 2244 * automatically by HW for the device that requires 2245 * ATS, hence OS should not enable this device ATS 2246 * to avoid duplicated TLB invalidation. 2247 */ 2248 return !(satcu->atc_required && !sm_supported(iommu)); 2249 2250 for (bus = dev->bus; bus; bus = bus->parent) { 2251 bridge = bus->self; 2252 /* If it's an integrated device, allow ATS */ 2253 if (!bridge) 2254 return true; 2255 /* Connected via non-PCIe: no ATS */ 2256 if (!pci_is_pcie(bridge) || 2257 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) 2258 return false; 2259 /* If we found the root port, look it up in the ATSR */ 2260 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) 2261 break; 2262 } 2263 2264 rcu_read_lock(); 2265 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) { 2266 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); 2267 if (atsr->segment != pci_domain_nr(dev->bus)) 2268 continue; 2269 2270 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp) 2271 if (tmp == &bridge->dev) 2272 goto out; 2273 2274 if (atsru->include_all) 2275 goto out; 2276 } 2277 supported = false; 2278 out: 2279 rcu_read_unlock(); 2280 2281 return supported; 2282 } 2283 2284 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) 2285 { 2286 int ret; 2287 struct dmar_rmrr_unit *rmrru; 2288 struct dmar_atsr_unit *atsru; 2289 struct dmar_satc_unit *satcu; 2290 struct acpi_dmar_atsr *atsr; 2291 struct acpi_dmar_reserved_memory *rmrr; 2292 struct acpi_dmar_satc *satc; 2293 2294 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING) 2295 return 0; 2296 2297 list_for_each_entry(rmrru, &dmar_rmrr_units, list) { 2298 rmrr = container_of(rmrru->hdr, 2299 struct acpi_dmar_reserved_memory, header); 2300 if (info->event == BUS_NOTIFY_ADD_DEVICE) { 2301 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1), 2302 ((void *)rmrr) + rmrr->header.length, 2303 rmrr->segment, rmrru->devices, 2304 rmrru->devices_cnt); 2305 if (ret < 0) 2306 return ret; 2307 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) { 2308 dmar_remove_dev_scope(info, rmrr->segment, 2309 rmrru->devices, rmrru->devices_cnt); 2310 } 2311 } 2312 2313 list_for_each_entry(atsru, &dmar_atsr_units, list) { 2314 if (atsru->include_all) 2315 continue; 2316 2317 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); 2318 if (info->event == BUS_NOTIFY_ADD_DEVICE) { 2319 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1), 2320 (void *)atsr + atsr->header.length, 2321 atsr->segment, atsru->devices, 2322 atsru->devices_cnt); 2323 if (ret > 0) 2324 break; 2325 else if (ret < 0) 2326 return ret; 2327 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) { 2328 if (dmar_remove_dev_scope(info, atsr->segment, 2329 atsru->devices, atsru->devices_cnt)) 2330 break; 2331 } 2332 } 2333 list_for_each_entry(satcu, &dmar_satc_units, list) { 2334 satc = container_of(satcu->hdr, struct acpi_dmar_satc, header); 2335 if (info->event == BUS_NOTIFY_ADD_DEVICE) { 2336 ret = dmar_insert_dev_scope(info, (void *)(satc + 1), 2337 (void *)satc + satc->header.length, 2338 satc->segment, satcu->devices, 2339 satcu->devices_cnt); 2340 if (ret > 0) 2341 break; 2342 else if (ret < 0) 2343 return ret; 2344 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) { 2345 if (dmar_remove_dev_scope(info, satc->segment, 2346 satcu->devices, satcu->devices_cnt)) 2347 break; 2348 } 2349 } 2350 2351 return 0; 2352 } 2353 2354 static void intel_disable_iommus(void) 2355 { 2356 struct intel_iommu *iommu = NULL; 2357 struct dmar_drhd_unit *drhd; 2358 2359 for_each_iommu(iommu, drhd) 2360 iommu_disable_translation(iommu); 2361 } 2362 2363 void intel_iommu_shutdown(void) 2364 { 2365 struct dmar_drhd_unit *drhd; 2366 struct intel_iommu *iommu = NULL; 2367 2368 if (no_iommu || dmar_disabled) 2369 return; 2370 2371 /* 2372 * All other CPUs were brought down, hotplug interrupts were disabled, 2373 * no lock and RCU checking needed anymore 2374 */ 2375 list_for_each_entry(drhd, &dmar_drhd_units, list) { 2376 iommu = drhd->iommu; 2377 2378 /* Disable PMRs explicitly here. */ 2379 iommu_disable_protect_mem_regions(iommu); 2380 2381 /* Make sure the IOMMUs are switched off */ 2382 iommu_disable_translation(iommu); 2383 } 2384 } 2385 2386 static struct intel_iommu *dev_to_intel_iommu(struct device *dev) 2387 { 2388 struct iommu_device *iommu_dev = dev_to_iommu_device(dev); 2389 2390 return container_of(iommu_dev, struct intel_iommu, iommu); 2391 } 2392 2393 static ssize_t version_show(struct device *dev, 2394 struct device_attribute *attr, char *buf) 2395 { 2396 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 2397 u32 ver = readl(iommu->reg + DMAR_VER_REG); 2398 return sysfs_emit(buf, "%d:%d\n", 2399 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver)); 2400 } 2401 static DEVICE_ATTR_RO(version); 2402 2403 static ssize_t address_show(struct device *dev, 2404 struct device_attribute *attr, char *buf) 2405 { 2406 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 2407 return sysfs_emit(buf, "%llx\n", iommu->reg_phys); 2408 } 2409 static DEVICE_ATTR_RO(address); 2410 2411 static ssize_t cap_show(struct device *dev, 2412 struct device_attribute *attr, char *buf) 2413 { 2414 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 2415 return sysfs_emit(buf, "%llx\n", iommu->cap); 2416 } 2417 static DEVICE_ATTR_RO(cap); 2418 2419 static ssize_t ecap_show(struct device *dev, 2420 struct device_attribute *attr, char *buf) 2421 { 2422 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 2423 return sysfs_emit(buf, "%llx\n", iommu->ecap); 2424 } 2425 static DEVICE_ATTR_RO(ecap); 2426 2427 static ssize_t domains_supported_show(struct device *dev, 2428 struct device_attribute *attr, char *buf) 2429 { 2430 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 2431 return sysfs_emit(buf, "%ld\n", cap_ndoms(iommu->cap)); 2432 } 2433 static DEVICE_ATTR_RO(domains_supported); 2434 2435 static ssize_t domains_used_show(struct device *dev, 2436 struct device_attribute *attr, char *buf) 2437 { 2438 struct intel_iommu *iommu = dev_to_intel_iommu(dev); 2439 unsigned int count = 0; 2440 int id; 2441 2442 for (id = 0; id < cap_ndoms(iommu->cap); id++) 2443 if (ida_exists(&iommu->domain_ida, id)) 2444 count++; 2445 2446 return sysfs_emit(buf, "%d\n", count); 2447 } 2448 static DEVICE_ATTR_RO(domains_used); 2449 2450 static struct attribute *intel_iommu_attrs[] = { 2451 &dev_attr_version.attr, 2452 &dev_attr_address.attr, 2453 &dev_attr_cap.attr, 2454 &dev_attr_ecap.attr, 2455 &dev_attr_domains_supported.attr, 2456 &dev_attr_domains_used.attr, 2457 NULL, 2458 }; 2459 2460 static struct attribute_group intel_iommu_group = { 2461 .name = "intel-iommu", 2462 .attrs = intel_iommu_attrs, 2463 }; 2464 2465 const struct attribute_group *intel_iommu_groups[] = { 2466 &intel_iommu_group, 2467 NULL, 2468 }; 2469 2470 static bool has_external_pci(void) 2471 { 2472 struct pci_dev *pdev = NULL; 2473 2474 for_each_pci_dev(pdev) 2475 if (pdev->external_facing) { 2476 pci_dev_put(pdev); 2477 return true; 2478 } 2479 2480 return false; 2481 } 2482 2483 static int __init platform_optin_force_iommu(void) 2484 { 2485 if (!dmar_platform_optin() || no_platform_optin || !has_external_pci()) 2486 return 0; 2487 2488 if (no_iommu || dmar_disabled) 2489 pr_info("Intel-IOMMU force enabled due to platform opt in\n"); 2490 2491 /* 2492 * If Intel-IOMMU is disabled by default, we will apply identity 2493 * map for all devices except those marked as being untrusted. 2494 */ 2495 if (dmar_disabled) 2496 iommu_set_default_passthrough(false); 2497 2498 dmar_disabled = 0; 2499 no_iommu = 0; 2500 2501 return 1; 2502 } 2503 2504 static int __init probe_acpi_namespace_devices(void) 2505 { 2506 struct dmar_drhd_unit *drhd; 2507 /* To avoid a -Wunused-but-set-variable warning. */ 2508 struct intel_iommu *iommu __maybe_unused; 2509 struct device *dev; 2510 int i, ret = 0; 2511 2512 for_each_active_iommu(iommu, drhd) { 2513 for_each_active_dev_scope(drhd->devices, 2514 drhd->devices_cnt, i, dev) { 2515 struct acpi_device_physical_node *pn; 2516 struct acpi_device *adev; 2517 2518 if (dev->bus != &acpi_bus_type) 2519 continue; 2520 2521 up_read(&dmar_global_lock); 2522 adev = to_acpi_device(dev); 2523 mutex_lock(&adev->physical_node_lock); 2524 list_for_each_entry(pn, 2525 &adev->physical_node_list, node) { 2526 ret = iommu_probe_device(pn->dev); 2527 if (ret) 2528 break; 2529 } 2530 mutex_unlock(&adev->physical_node_lock); 2531 down_read(&dmar_global_lock); 2532 2533 if (ret) 2534 return ret; 2535 } 2536 } 2537 2538 return 0; 2539 } 2540 2541 static __init int tboot_force_iommu(void) 2542 { 2543 if (!tboot_enabled()) 2544 return 0; 2545 2546 if (no_iommu || dmar_disabled) 2547 pr_warn("Forcing Intel-IOMMU to enabled\n"); 2548 2549 dmar_disabled = 0; 2550 no_iommu = 0; 2551 2552 return 1; 2553 } 2554 2555 int __init intel_iommu_init(void) 2556 { 2557 int ret = -ENODEV; 2558 struct dmar_drhd_unit *drhd; 2559 struct intel_iommu *iommu; 2560 2561 /* 2562 * Intel IOMMU is required for a TXT/tboot launch or platform 2563 * opt in, so enforce that. 2564 */ 2565 force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) || 2566 platform_optin_force_iommu(); 2567 2568 down_write(&dmar_global_lock); 2569 if (dmar_table_init()) { 2570 if (force_on) 2571 panic("tboot: Failed to initialize DMAR table\n"); 2572 goto out_free_dmar; 2573 } 2574 2575 if (dmar_dev_scope_init() < 0) { 2576 if (force_on) 2577 panic("tboot: Failed to initialize DMAR device scope\n"); 2578 goto out_free_dmar; 2579 } 2580 2581 up_write(&dmar_global_lock); 2582 2583 /* 2584 * The bus notifier takes the dmar_global_lock, so lockdep will 2585 * complain later when we register it under the lock. 2586 */ 2587 dmar_register_bus_notifier(); 2588 2589 down_write(&dmar_global_lock); 2590 2591 if (!no_iommu) 2592 intel_iommu_debugfs_init(); 2593 2594 if (no_iommu || dmar_disabled) { 2595 /* 2596 * We exit the function here to ensure IOMMU's remapping and 2597 * mempool aren't setup, which means that the IOMMU's PMRs 2598 * won't be disabled via the call to init_dmars(). So disable 2599 * it explicitly here. The PMRs were setup by tboot prior to 2600 * calling SENTER, but the kernel is expected to reset/tear 2601 * down the PMRs. 2602 */ 2603 if (intel_iommu_tboot_noforce) { 2604 for_each_iommu(iommu, drhd) 2605 iommu_disable_protect_mem_regions(iommu); 2606 } 2607 2608 /* 2609 * Make sure the IOMMUs are switched off, even when we 2610 * boot into a kexec kernel and the previous kernel left 2611 * them enabled 2612 */ 2613 intel_disable_iommus(); 2614 goto out_free_dmar; 2615 } 2616 2617 if (list_empty(&dmar_rmrr_units)) 2618 pr_info("No RMRR found\n"); 2619 2620 if (list_empty(&dmar_atsr_units)) 2621 pr_info("No ATSR found\n"); 2622 2623 if (list_empty(&dmar_satc_units)) 2624 pr_info("No SATC found\n"); 2625 2626 init_no_remapping_devices(); 2627 2628 ret = init_dmars(); 2629 if (ret) { 2630 if (force_on) 2631 panic("tboot: Failed to initialize DMARs\n"); 2632 pr_err("Initialization failed\n"); 2633 goto out_free_dmar; 2634 } 2635 up_write(&dmar_global_lock); 2636 2637 init_iommu_pm_ops(); 2638 2639 down_read(&dmar_global_lock); 2640 for_each_active_iommu(iommu, drhd) { 2641 /* 2642 * The flush queue implementation does not perform 2643 * page-selective invalidations that are required for efficient 2644 * TLB flushes in virtual environments. The benefit of batching 2645 * is likely to be much lower than the overhead of synchronizing 2646 * the virtual and physical IOMMU page-tables. 2647 */ 2648 if (cap_caching_mode(iommu->cap) && 2649 !first_level_by_default(iommu)) { 2650 pr_info_once("IOMMU batching disallowed due to virtualization\n"); 2651 iommu_set_dma_strict(); 2652 } 2653 iommu_device_sysfs_add(&iommu->iommu, NULL, 2654 intel_iommu_groups, 2655 "%s", iommu->name); 2656 /* 2657 * The iommu device probe is protected by the iommu_probe_device_lock. 2658 * Release the dmar_global_lock before entering the device probe path 2659 * to avoid unnecessary lock order splat. 2660 */ 2661 up_read(&dmar_global_lock); 2662 iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL); 2663 down_read(&dmar_global_lock); 2664 2665 iommu_pmu_register(iommu); 2666 } 2667 2668 if (probe_acpi_namespace_devices()) 2669 pr_warn("ACPI name space devices didn't probe correctly\n"); 2670 2671 /* Finally, we enable the DMA remapping hardware. */ 2672 for_each_iommu(iommu, drhd) { 2673 if (!drhd->ignored && !translation_pre_enabled(iommu)) 2674 iommu_enable_translation(iommu); 2675 2676 iommu_disable_protect_mem_regions(iommu); 2677 } 2678 up_read(&dmar_global_lock); 2679 2680 pr_info("Intel(R) Virtualization Technology for Directed I/O\n"); 2681 2682 intel_iommu_enabled = 1; 2683 2684 return 0; 2685 2686 out_free_dmar: 2687 intel_iommu_free_dmars(); 2688 up_write(&dmar_global_lock); 2689 return ret; 2690 } 2691 2692 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque) 2693 { 2694 struct device_domain_info *info = opaque; 2695 2696 domain_context_clear_one(info, PCI_BUS_NUM(alias), alias & 0xff); 2697 return 0; 2698 } 2699 2700 /* 2701 * NB - intel-iommu lacks any sort of reference counting for the users of 2702 * dependent devices. If multiple endpoints have intersecting dependent 2703 * devices, unbinding the driver from any one of them will possibly leave 2704 * the others unable to operate. 2705 */ 2706 static void domain_context_clear(struct device_domain_info *info) 2707 { 2708 if (!dev_is_pci(info->dev)) { 2709 domain_context_clear_one(info, info->bus, info->devfn); 2710 return; 2711 } 2712 2713 pci_for_each_dma_alias(to_pci_dev(info->dev), 2714 &domain_context_clear_one_cb, info); 2715 iommu_disable_pci_ats(info); 2716 } 2717 2718 /* 2719 * Clear the page table pointer in context or pasid table entries so that 2720 * all DMA requests without PASID from the device are blocked. If the page 2721 * table has been set, clean up the data structures. 2722 */ 2723 void device_block_translation(struct device *dev) 2724 { 2725 struct device_domain_info *info = dev_iommu_priv_get(dev); 2726 struct intel_iommu *iommu = info->iommu; 2727 unsigned long flags; 2728 2729 /* Device in DMA blocking state. Noting to do. */ 2730 if (!info->domain_attached) 2731 return; 2732 2733 if (info->domain) 2734 cache_tag_unassign_domain(info->domain, dev, IOMMU_NO_PASID); 2735 2736 if (!dev_is_real_dma_subdevice(dev)) { 2737 if (sm_supported(iommu)) 2738 intel_pasid_tear_down_entry(iommu, dev, 2739 IOMMU_NO_PASID, false); 2740 else 2741 domain_context_clear(info); 2742 } 2743 2744 /* Device now in DMA blocking state. */ 2745 info->domain_attached = false; 2746 2747 if (!info->domain) 2748 return; 2749 2750 spin_lock_irqsave(&info->domain->lock, flags); 2751 list_del(&info->link); 2752 spin_unlock_irqrestore(&info->domain->lock, flags); 2753 2754 domain_detach_iommu(info->domain, iommu); 2755 info->domain = NULL; 2756 } 2757 2758 static int blocking_domain_attach_dev(struct iommu_domain *domain, 2759 struct device *dev, 2760 struct iommu_domain *old) 2761 { 2762 struct device_domain_info *info = dev_iommu_priv_get(dev); 2763 2764 iopf_for_domain_remove(info->domain ? &info->domain->domain : NULL, dev); 2765 device_block_translation(dev); 2766 return 0; 2767 } 2768 2769 static int blocking_domain_set_dev_pasid(struct iommu_domain *domain, 2770 struct device *dev, ioasid_t pasid, 2771 struct iommu_domain *old); 2772 2773 static struct iommu_domain blocking_domain = { 2774 .type = IOMMU_DOMAIN_BLOCKED, 2775 .ops = &(const struct iommu_domain_ops) { 2776 .attach_dev = blocking_domain_attach_dev, 2777 .set_dev_pasid = blocking_domain_set_dev_pasid, 2778 } 2779 }; 2780 2781 static struct dmar_domain *paging_domain_alloc(void) 2782 { 2783 struct dmar_domain *domain; 2784 2785 domain = kzalloc_obj(*domain); 2786 if (!domain) 2787 return ERR_PTR(-ENOMEM); 2788 2789 INIT_LIST_HEAD(&domain->devices); 2790 INIT_LIST_HEAD(&domain->dev_pasids); 2791 INIT_LIST_HEAD(&domain->cache_tags); 2792 spin_lock_init(&domain->lock); 2793 spin_lock_init(&domain->cache_lock); 2794 xa_init(&domain->iommu_array); 2795 INIT_LIST_HEAD(&domain->s1_domains); 2796 spin_lock_init(&domain->s1_lock); 2797 2798 return domain; 2799 } 2800 2801 static unsigned int compute_vasz_lg2_fs(struct intel_iommu *iommu, 2802 unsigned int *top_level) 2803 { 2804 unsigned int mgaw = cap_mgaw(iommu->cap); 2805 2806 /* 2807 * Spec 3.6 First-Stage Translation: 2808 * 2809 * Software must limit addresses to less than the minimum of MGAW 2810 * and the lower canonical address width implied by FSPM (i.e., 2811 * 47-bit when FSPM is 4-level and 56-bit when FSPM is 5-level). 2812 */ 2813 if (mgaw > 48 && cap_fl5lp_support(iommu->cap)) { 2814 *top_level = 4; 2815 return min(57, mgaw); 2816 } 2817 2818 /* Four level is always supported */ 2819 *top_level = 3; 2820 return min(48, mgaw); 2821 } 2822 2823 static struct iommu_domain * 2824 intel_iommu_domain_alloc_first_stage(struct device *dev, 2825 struct intel_iommu *iommu, u32 flags) 2826 { 2827 struct pt_iommu_x86_64_cfg cfg = {}; 2828 struct dmar_domain *dmar_domain; 2829 int ret; 2830 2831 if (flags & ~IOMMU_HWPT_ALLOC_PASID) 2832 return ERR_PTR(-EOPNOTSUPP); 2833 2834 /* Only SL is available in legacy mode */ 2835 if (!sm_supported(iommu) || !ecap_flts(iommu->ecap)) 2836 return ERR_PTR(-EOPNOTSUPP); 2837 2838 dmar_domain = paging_domain_alloc(); 2839 if (IS_ERR(dmar_domain)) 2840 return ERR_CAST(dmar_domain); 2841 2842 cfg.common.hw_max_vasz_lg2 = 2843 compute_vasz_lg2_fs(iommu, &cfg.top_level); 2844 cfg.common.hw_max_oasz_lg2 = 52; 2845 cfg.common.features = BIT(PT_FEAT_SIGN_EXTEND) | 2846 BIT(PT_FEAT_FLUSH_RANGE); 2847 /* First stage always uses scalable mode */ 2848 if (!ecap_smpwc(iommu->ecap)) 2849 cfg.common.features |= BIT(PT_FEAT_DMA_INCOHERENT); 2850 dmar_domain->iommu.iommu_device = dev; 2851 dmar_domain->iommu.nid = dev_to_node(dev); 2852 dmar_domain->domain.ops = &intel_fs_paging_domain_ops; 2853 /* 2854 * iotlb sync for map is only needed for legacy implementations that 2855 * explicitly require flushing internal write buffers to ensure memory 2856 * coherence. 2857 */ 2858 if (rwbf_required(iommu)) 2859 dmar_domain->iotlb_sync_map = true; 2860 2861 ret = pt_iommu_x86_64_init(&dmar_domain->fspt, &cfg, GFP_KERNEL); 2862 if (ret) { 2863 kfree(dmar_domain); 2864 return ERR_PTR(ret); 2865 } 2866 2867 if (!cap_fl1gp_support(iommu->cap)) 2868 dmar_domain->domain.pgsize_bitmap &= ~(u64)SZ_1G; 2869 if (!intel_iommu_superpage) 2870 dmar_domain->domain.pgsize_bitmap = SZ_4K; 2871 2872 return &dmar_domain->domain; 2873 } 2874 2875 static unsigned int compute_vasz_lg2_ss(struct intel_iommu *iommu, 2876 unsigned int *top_level) 2877 { 2878 unsigned int sagaw = cap_sagaw(iommu->cap); 2879 unsigned int mgaw = cap_mgaw(iommu->cap); 2880 2881 /* 2882 * Find the largest table size that both the mgaw and sagaw support. 2883 * This sets the valid range of IOVA and the top starting level. 2884 * Some HW may only support a 4 or 5 level walk but must limit IOVA to 2885 * 3 levels. 2886 */ 2887 if (mgaw > 48 && sagaw >= BIT(3)) { 2888 *top_level = 4; 2889 return min(57, mgaw); 2890 } else if (mgaw > 39 && sagaw >= BIT(2)) { 2891 *top_level = 3 + ffs(sagaw >> 3); 2892 return min(48, mgaw); 2893 } else if (mgaw > 30 && sagaw >= BIT(1)) { 2894 *top_level = 2 + ffs(sagaw >> 2); 2895 return min(39, mgaw); 2896 } 2897 return 0; 2898 } 2899 2900 static const struct iommu_dirty_ops intel_second_stage_dirty_ops = { 2901 IOMMU_PT_DIRTY_OPS(vtdss), 2902 .set_dirty_tracking = intel_iommu_set_dirty_tracking, 2903 }; 2904 2905 static struct iommu_domain * 2906 intel_iommu_domain_alloc_second_stage(struct device *dev, 2907 struct intel_iommu *iommu, u32 flags) 2908 { 2909 struct pt_iommu_vtdss_cfg cfg = {}; 2910 struct dmar_domain *dmar_domain; 2911 unsigned int sslps; 2912 int ret; 2913 2914 if (flags & 2915 (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING | 2916 IOMMU_HWPT_ALLOC_PASID))) 2917 return ERR_PTR(-EOPNOTSUPP); 2918 2919 if (((flags & IOMMU_HWPT_ALLOC_NEST_PARENT) && 2920 !nested_supported(iommu)) || 2921 ((flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) && 2922 !ssads_supported(iommu))) 2923 return ERR_PTR(-EOPNOTSUPP); 2924 2925 /* Legacy mode always supports second stage */ 2926 if (sm_supported(iommu) && !ecap_slts(iommu->ecap)) 2927 return ERR_PTR(-EOPNOTSUPP); 2928 2929 dmar_domain = paging_domain_alloc(); 2930 if (IS_ERR(dmar_domain)) 2931 return ERR_CAST(dmar_domain); 2932 2933 cfg.common.hw_max_vasz_lg2 = compute_vasz_lg2_ss(iommu, &cfg.top_level); 2934 cfg.common.hw_max_oasz_lg2 = 52; 2935 cfg.common.features = BIT(PT_FEAT_FLUSH_RANGE); 2936 2937 /* 2938 * Read-only mapping is disallowed on the domain which serves as the 2939 * parent in a nested configuration, due to HW errata 2940 * (ERRATA_772415_SPR17) 2941 */ 2942 if (flags & IOMMU_HWPT_ALLOC_NEST_PARENT) 2943 cfg.common.features |= BIT(PT_FEAT_VTDSS_FORCE_WRITEABLE); 2944 2945 if (!iommu_paging_structure_coherency(iommu)) 2946 cfg.common.features |= BIT(PT_FEAT_DMA_INCOHERENT); 2947 dmar_domain->iommu.iommu_device = dev; 2948 dmar_domain->iommu.nid = dev_to_node(dev); 2949 dmar_domain->domain.ops = &intel_ss_paging_domain_ops; 2950 dmar_domain->nested_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT; 2951 2952 if (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) 2953 dmar_domain->domain.dirty_ops = &intel_second_stage_dirty_ops; 2954 2955 ret = pt_iommu_vtdss_init(&dmar_domain->sspt, &cfg, GFP_KERNEL); 2956 if (ret) { 2957 kfree(dmar_domain); 2958 return ERR_PTR(ret); 2959 } 2960 2961 /* Adjust the supported page sizes to HW capability */ 2962 sslps = cap_super_page_val(iommu->cap); 2963 if (!(sslps & BIT(0))) 2964 dmar_domain->domain.pgsize_bitmap &= ~(u64)SZ_2M; 2965 if (!(sslps & BIT(1))) 2966 dmar_domain->domain.pgsize_bitmap &= ~(u64)SZ_1G; 2967 if (!intel_iommu_superpage) 2968 dmar_domain->domain.pgsize_bitmap = SZ_4K; 2969 2970 /* 2971 * Besides the internal write buffer flush, the caching mode used for 2972 * legacy nested translation (which utilizes shadowing page tables) 2973 * also requires iotlb sync on map. 2974 */ 2975 if (rwbf_required(iommu) || cap_caching_mode(iommu->cap)) 2976 dmar_domain->iotlb_sync_map = true; 2977 2978 return &dmar_domain->domain; 2979 } 2980 2981 static struct iommu_domain * 2982 intel_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags, 2983 const struct iommu_user_data *user_data) 2984 { 2985 struct device_domain_info *info = dev_iommu_priv_get(dev); 2986 struct intel_iommu *iommu = info->iommu; 2987 struct iommu_domain *domain; 2988 2989 if (user_data) 2990 return ERR_PTR(-EOPNOTSUPP); 2991 2992 /* Prefer first stage if possible by default. */ 2993 domain = intel_iommu_domain_alloc_first_stage(dev, iommu, flags); 2994 if (domain != ERR_PTR(-EOPNOTSUPP)) 2995 return domain; 2996 return intel_iommu_domain_alloc_second_stage(dev, iommu, flags); 2997 } 2998 2999 static void intel_iommu_domain_free(struct iommu_domain *domain) 3000 { 3001 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 3002 3003 if (WARN_ON(dmar_domain->nested_parent && 3004 !list_empty(&dmar_domain->s1_domains))) 3005 return; 3006 3007 if (WARN_ON(!list_empty(&dmar_domain->devices))) 3008 return; 3009 3010 pt_iommu_deinit(&dmar_domain->iommu); 3011 3012 kfree(dmar_domain->qi_batch); 3013 kfree(dmar_domain); 3014 } 3015 3016 static int paging_domain_compatible_first_stage(struct dmar_domain *dmar_domain, 3017 struct intel_iommu *iommu) 3018 { 3019 if (WARN_ON(dmar_domain->domain.dirty_ops || 3020 dmar_domain->nested_parent)) 3021 return -EINVAL; 3022 3023 /* Only SL is available in legacy mode */ 3024 if (!sm_supported(iommu) || !ecap_flts(iommu->ecap)) 3025 return -EINVAL; 3026 3027 if (!ecap_smpwc(iommu->ecap) && 3028 !(dmar_domain->fspt.x86_64_pt.common.features & 3029 BIT(PT_FEAT_DMA_INCOHERENT))) 3030 return -EINVAL; 3031 3032 /* Supports the number of table levels */ 3033 if (!cap_fl5lp_support(iommu->cap) && 3034 dmar_domain->fspt.x86_64_pt.common.max_vasz_lg2 > 48) 3035 return -EINVAL; 3036 3037 /* Same page size support */ 3038 if (!cap_fl1gp_support(iommu->cap) && 3039 (dmar_domain->domain.pgsize_bitmap & SZ_1G)) 3040 return -EINVAL; 3041 3042 /* iotlb sync on map requirement */ 3043 if ((rwbf_required(iommu)) && !dmar_domain->iotlb_sync_map) 3044 return -EINVAL; 3045 3046 return 0; 3047 } 3048 3049 static int 3050 paging_domain_compatible_second_stage(struct dmar_domain *dmar_domain, 3051 struct intel_iommu *iommu) 3052 { 3053 unsigned int vasz_lg2 = dmar_domain->sspt.vtdss_pt.common.max_vasz_lg2; 3054 unsigned int sslps = cap_super_page_val(iommu->cap); 3055 struct pt_iommu_vtdss_hw_info pt_info; 3056 3057 pt_iommu_vtdss_hw_info(&dmar_domain->sspt, &pt_info); 3058 3059 if (dmar_domain->domain.dirty_ops && !ssads_supported(iommu)) 3060 return -EINVAL; 3061 if (dmar_domain->nested_parent && !nested_supported(iommu)) 3062 return -EINVAL; 3063 3064 /* Legacy mode always supports second stage */ 3065 if (sm_supported(iommu) && !ecap_slts(iommu->ecap)) 3066 return -EINVAL; 3067 3068 if (!iommu_paging_structure_coherency(iommu) && 3069 !(dmar_domain->sspt.vtdss_pt.common.features & 3070 BIT(PT_FEAT_DMA_INCOHERENT))) 3071 return -EINVAL; 3072 3073 /* Address width falls within the capability */ 3074 if (cap_mgaw(iommu->cap) < vasz_lg2) 3075 return -EINVAL; 3076 3077 /* Page table level is supported. */ 3078 if (!(cap_sagaw(iommu->cap) & BIT(pt_info.aw))) 3079 return -EINVAL; 3080 3081 /* Same page size support */ 3082 if (!(sslps & BIT(0)) && (dmar_domain->domain.pgsize_bitmap & SZ_2M)) 3083 return -EINVAL; 3084 if (!(sslps & BIT(1)) && (dmar_domain->domain.pgsize_bitmap & SZ_1G)) 3085 return -EINVAL; 3086 3087 /* iotlb sync on map requirement */ 3088 if ((rwbf_required(iommu) || cap_caching_mode(iommu->cap)) && 3089 !dmar_domain->iotlb_sync_map) 3090 return -EINVAL; 3091 3092 /* 3093 * FIXME this is locked wrong, it needs to be under the 3094 * dmar_domain->lock 3095 */ 3096 if ((dmar_domain->sspt.vtdss_pt.common.features & 3097 BIT(PT_FEAT_VTDSS_FORCE_COHERENCE)) && 3098 !ecap_sc_support(iommu->ecap)) 3099 return -EINVAL; 3100 return 0; 3101 } 3102 3103 int paging_domain_compatible(struct iommu_domain *domain, struct device *dev) 3104 { 3105 struct device_domain_info *info = dev_iommu_priv_get(dev); 3106 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 3107 struct intel_iommu *iommu = info->iommu; 3108 int ret = -EINVAL; 3109 3110 if (intel_domain_is_fs_paging(dmar_domain)) 3111 ret = paging_domain_compatible_first_stage(dmar_domain, iommu); 3112 else if (intel_domain_is_ss_paging(dmar_domain)) 3113 ret = paging_domain_compatible_second_stage(dmar_domain, iommu); 3114 else if (WARN_ON(true)) 3115 ret = -EINVAL; 3116 if (ret) 3117 return ret; 3118 3119 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) && 3120 context_copied(iommu, info->bus, info->devfn)) 3121 return intel_pasid_setup_sm_context(dev); 3122 3123 return 0; 3124 } 3125 3126 static int intel_iommu_attach_device(struct iommu_domain *domain, 3127 struct device *dev, 3128 struct iommu_domain *old) 3129 { 3130 int ret; 3131 3132 device_block_translation(dev); 3133 3134 ret = paging_domain_compatible(domain, dev); 3135 if (ret) 3136 return ret; 3137 3138 ret = iopf_for_domain_set(domain, dev); 3139 if (ret) 3140 return ret; 3141 3142 ret = dmar_domain_attach_device(to_dmar_domain(domain), dev); 3143 if (ret) 3144 iopf_for_domain_remove(domain, dev); 3145 3146 return ret; 3147 } 3148 3149 static void intel_iommu_tlb_sync(struct iommu_domain *domain, 3150 struct iommu_iotlb_gather *gather) 3151 { 3152 cache_tag_flush_range(to_dmar_domain(domain), gather->start, 3153 gather->end, 3154 iommu_pages_list_empty(&gather->freelist)); 3155 iommu_put_pages_list(&gather->freelist); 3156 } 3157 3158 static bool domain_support_force_snooping(struct dmar_domain *domain) 3159 { 3160 struct device_domain_info *info; 3161 bool support = true; 3162 3163 assert_spin_locked(&domain->lock); 3164 list_for_each_entry(info, &domain->devices, link) { 3165 if (!ecap_sc_support(info->iommu->ecap)) { 3166 support = false; 3167 break; 3168 } 3169 } 3170 3171 return support; 3172 } 3173 3174 static bool intel_iommu_enforce_cache_coherency_fs(struct iommu_domain *domain) 3175 { 3176 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 3177 struct device_domain_info *info; 3178 3179 guard(spinlock_irqsave)(&dmar_domain->lock); 3180 3181 if (dmar_domain->force_snooping) 3182 return true; 3183 3184 if (!domain_support_force_snooping(dmar_domain)) 3185 return false; 3186 3187 dmar_domain->force_snooping = true; 3188 list_for_each_entry(info, &dmar_domain->devices, link) 3189 intel_pasid_setup_page_snoop_control(info->iommu, info->dev, 3190 IOMMU_NO_PASID); 3191 return true; 3192 } 3193 3194 static bool intel_iommu_enforce_cache_coherency_ss(struct iommu_domain *domain) 3195 { 3196 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 3197 3198 guard(spinlock_irqsave)(&dmar_domain->lock); 3199 if (!domain_support_force_snooping(dmar_domain)) 3200 return false; 3201 3202 /* 3203 * Second level page table supports per-PTE snoop control. The 3204 * iommu_map() interface will handle this by setting SNP bit. 3205 */ 3206 dmar_domain->sspt.vtdss_pt.common.features |= 3207 BIT(PT_FEAT_VTDSS_FORCE_COHERENCE); 3208 dmar_domain->force_snooping = true; 3209 return true; 3210 } 3211 3212 static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap) 3213 { 3214 struct device_domain_info *info = dev_iommu_priv_get(dev); 3215 3216 switch (cap) { 3217 case IOMMU_CAP_CACHE_COHERENCY: 3218 return true; 3219 case IOMMU_CAP_PRE_BOOT_PROTECTION: 3220 return dmar_platform_optin(); 3221 case IOMMU_CAP_ENFORCE_CACHE_COHERENCY: 3222 return ecap_sc_support(info->iommu->ecap); 3223 case IOMMU_CAP_DIRTY_TRACKING: 3224 return ssads_supported(info->iommu); 3225 case IOMMU_CAP_PCI_ATS_SUPPORTED: 3226 return info->ats_supported; 3227 default: 3228 return false; 3229 } 3230 } 3231 3232 static struct iommu_device *intel_iommu_probe_device(struct device *dev) 3233 { 3234 struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL; 3235 struct device_domain_info *info; 3236 struct intel_iommu *iommu; 3237 u8 bus, devfn; 3238 int ret; 3239 3240 iommu = device_lookup_iommu(dev, &bus, &devfn); 3241 if (!iommu || !iommu->iommu.ops) 3242 return ERR_PTR(-ENODEV); 3243 3244 info = kzalloc_obj(*info); 3245 if (!info) 3246 return ERR_PTR(-ENOMEM); 3247 3248 if (dev_is_real_dma_subdevice(dev)) { 3249 info->bus = pdev->bus->number; 3250 info->devfn = pdev->devfn; 3251 info->segment = pci_domain_nr(pdev->bus); 3252 } else { 3253 info->bus = bus; 3254 info->devfn = devfn; 3255 info->segment = iommu->segment; 3256 } 3257 3258 info->dev = dev; 3259 info->iommu = iommu; 3260 RB_CLEAR_NODE(&info->node); 3261 if (dev_is_pci(dev)) { 3262 if (ecap_dev_iotlb_support(iommu->ecap) && 3263 pci_ats_supported(pdev) && 3264 dmar_ats_supported(pdev, iommu)) { 3265 info->ats_supported = 1; 3266 info->dtlb_extra_inval = dev_needs_extra_dtlb_flush(pdev); 3267 3268 /* 3269 * For IOMMU that supports device IOTLB throttling 3270 * (DIT), we assign PFSID to the invalidation desc 3271 * of a VF such that IOMMU HW can gauge queue depth 3272 * at PF level. If DIT is not set, PFSID will be 3273 * treated as reserved, which should be set to 0. 3274 */ 3275 if (ecap_dit(iommu->ecap)) 3276 info->pfsid = pci_dev_id(pci_physfn(pdev)); 3277 info->ats_qdep = pci_ats_queue_depth(pdev); 3278 } 3279 if (sm_supported(iommu)) { 3280 if (pasid_supported(iommu)) { 3281 int features = pci_pasid_features(pdev); 3282 3283 if (features >= 0) 3284 info->pasid_supported = features | 1; 3285 } 3286 3287 if (info->ats_supported && ecap_prs(iommu->ecap) && 3288 ecap_pds(iommu->ecap) && pci_pri_supported(pdev)) 3289 info->pri_supported = 1; 3290 } 3291 } 3292 3293 dev_iommu_priv_set(dev, info); 3294 if (pdev && pci_ats_supported(pdev)) { 3295 pci_prepare_ats(pdev, VTD_PAGE_SHIFT); 3296 ret = device_rbtree_insert(iommu, info); 3297 if (ret) 3298 goto free; 3299 } 3300 3301 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { 3302 ret = intel_pasid_alloc_table(dev); 3303 if (ret) { 3304 dev_err(dev, "PASID table allocation failed\n"); 3305 goto clear_rbtree; 3306 } 3307 3308 if (!context_copied(iommu, info->bus, info->devfn)) { 3309 ret = intel_pasid_setup_sm_context(dev); 3310 if (ret) 3311 goto free_table; 3312 } 3313 } 3314 3315 intel_iommu_debugfs_create_dev(info); 3316 3317 return &iommu->iommu; 3318 free_table: 3319 intel_pasid_free_table(dev); 3320 clear_rbtree: 3321 device_rbtree_remove(info); 3322 free: 3323 kfree(info); 3324 3325 return ERR_PTR(ret); 3326 } 3327 3328 static void intel_iommu_probe_finalize(struct device *dev) 3329 { 3330 struct device_domain_info *info = dev_iommu_priv_get(dev); 3331 struct intel_iommu *iommu = info->iommu; 3332 3333 /* 3334 * The PCIe spec, in its wisdom, declares that the behaviour of the 3335 * device is undefined if you enable PASID support after ATS support. 3336 * So always enable PASID support on devices which have it, even if 3337 * we can't yet know if we're ever going to use it. 3338 */ 3339 if (info->pasid_supported && 3340 !pci_enable_pasid(to_pci_dev(dev), info->pasid_supported & ~1)) 3341 info->pasid_enabled = 1; 3342 3343 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { 3344 iommu_enable_pci_ats(info); 3345 /* Assign a DEVTLB cache tag to the default domain. */ 3346 if (info->ats_enabled && info->domain) { 3347 u16 did = domain_id_iommu(info->domain, iommu); 3348 3349 if (cache_tag_assign(info->domain, did, dev, 3350 IOMMU_NO_PASID, CACHE_TAG_DEVTLB)) 3351 iommu_disable_pci_ats(info); 3352 } 3353 } 3354 iommu_enable_pci_pri(info); 3355 } 3356 3357 static void intel_iommu_release_device(struct device *dev) 3358 { 3359 struct device_domain_info *info = dev_iommu_priv_get(dev); 3360 struct intel_iommu *iommu = info->iommu; 3361 3362 iommu_disable_pci_pri(info); 3363 iommu_disable_pci_ats(info); 3364 3365 if (info->pasid_enabled) { 3366 pci_disable_pasid(to_pci_dev(dev)); 3367 info->pasid_enabled = 0; 3368 } 3369 3370 mutex_lock(&iommu->iopf_lock); 3371 if (dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev))) 3372 device_rbtree_remove(info); 3373 mutex_unlock(&iommu->iopf_lock); 3374 3375 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) && 3376 !context_copied(iommu, info->bus, info->devfn)) 3377 intel_pasid_teardown_sm_context(dev); 3378 3379 intel_pasid_free_table(dev); 3380 intel_iommu_debugfs_remove_dev(info); 3381 kfree(info); 3382 } 3383 3384 static void intel_iommu_get_resv_regions(struct device *device, 3385 struct list_head *head) 3386 { 3387 int prot = DMA_PTE_READ | DMA_PTE_WRITE; 3388 struct iommu_resv_region *reg; 3389 struct dmar_rmrr_unit *rmrr; 3390 struct device *i_dev; 3391 int i; 3392 3393 rcu_read_lock(); 3394 for_each_rmrr_units(rmrr) { 3395 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, 3396 i, i_dev) { 3397 struct iommu_resv_region *resv; 3398 enum iommu_resv_type type; 3399 size_t length; 3400 3401 if (i_dev != device && 3402 !is_downstream_to_pci_bridge(device, i_dev)) 3403 continue; 3404 3405 length = rmrr->end_address - rmrr->base_address + 1; 3406 3407 type = device_rmrr_is_relaxable(device) ? 3408 IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT; 3409 3410 resv = iommu_alloc_resv_region(rmrr->base_address, 3411 length, prot, type, 3412 GFP_ATOMIC); 3413 if (!resv) 3414 break; 3415 3416 list_add_tail(&resv->list, head); 3417 } 3418 } 3419 rcu_read_unlock(); 3420 3421 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA 3422 if (dev_is_pci(device)) { 3423 struct pci_dev *pdev = to_pci_dev(device); 3424 3425 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) { 3426 reg = iommu_alloc_resv_region(0, 1UL << 24, prot, 3427 IOMMU_RESV_DIRECT_RELAXABLE, 3428 GFP_KERNEL); 3429 if (reg) 3430 list_add_tail(®->list, head); 3431 } 3432 } 3433 #endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */ 3434 3435 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START, 3436 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1, 3437 0, IOMMU_RESV_MSI, GFP_KERNEL); 3438 if (!reg) 3439 return; 3440 list_add_tail(®->list, head); 3441 } 3442 3443 static struct iommu_group *intel_iommu_device_group(struct device *dev) 3444 { 3445 if (dev_is_pci(dev)) 3446 return pci_device_group(dev); 3447 return generic_device_group(dev); 3448 } 3449 3450 int intel_iommu_enable_iopf(struct device *dev) 3451 { 3452 struct device_domain_info *info = dev_iommu_priv_get(dev); 3453 struct intel_iommu *iommu = info->iommu; 3454 int ret; 3455 3456 if (!info->pri_enabled) 3457 return -ENODEV; 3458 3459 /* pri_enabled is protected by the group mutex. */ 3460 iommu_group_mutex_assert(dev); 3461 if (info->iopf_refcount) { 3462 info->iopf_refcount++; 3463 return 0; 3464 } 3465 3466 ret = iopf_queue_add_device(iommu->iopf_queue, dev); 3467 if (ret) 3468 return ret; 3469 3470 info->iopf_refcount = 1; 3471 3472 return 0; 3473 } 3474 3475 void intel_iommu_disable_iopf(struct device *dev) 3476 { 3477 struct device_domain_info *info = dev_iommu_priv_get(dev); 3478 struct intel_iommu *iommu = info->iommu; 3479 3480 if (WARN_ON(!info->pri_enabled || !info->iopf_refcount)) 3481 return; 3482 3483 iommu_group_mutex_assert(dev); 3484 if (--info->iopf_refcount) 3485 return; 3486 3487 iopf_queue_remove_device(iommu->iopf_queue, dev); 3488 } 3489 3490 static bool intel_iommu_is_attach_deferred(struct device *dev) 3491 { 3492 struct device_domain_info *info = dev_iommu_priv_get(dev); 3493 3494 return translation_pre_enabled(info->iommu) && !info->domain; 3495 } 3496 3497 /* 3498 * Check that the device does not live on an external facing PCI port that is 3499 * marked as untrusted. Such devices should not be able to apply quirks and 3500 * thus not be able to bypass the IOMMU restrictions. 3501 */ 3502 static bool risky_device(struct pci_dev *pdev) 3503 { 3504 if (pdev->untrusted) { 3505 pci_info(pdev, 3506 "Skipping IOMMU quirk for dev [%04X:%04X] on untrusted PCI link\n", 3507 pdev->vendor, pdev->device); 3508 pci_info(pdev, "Please check with your BIOS/Platform vendor about this\n"); 3509 return true; 3510 } 3511 return false; 3512 } 3513 3514 static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain, 3515 unsigned long iova, size_t size) 3516 { 3517 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 3518 3519 if (dmar_domain->iotlb_sync_map) 3520 cache_tag_flush_range_np(dmar_domain, iova, iova + size - 1); 3521 3522 return 0; 3523 } 3524 3525 void domain_remove_dev_pasid(struct iommu_domain *domain, 3526 struct device *dev, ioasid_t pasid) 3527 { 3528 struct device_domain_info *info = dev_iommu_priv_get(dev); 3529 struct dev_pasid_info *curr, *dev_pasid = NULL; 3530 struct intel_iommu *iommu = info->iommu; 3531 struct dmar_domain *dmar_domain; 3532 unsigned long flags; 3533 3534 if (!domain) 3535 return; 3536 3537 /* Identity domain and blocked domain have no meta data for pasid. */ 3538 if (domain->type == IOMMU_DOMAIN_IDENTITY || domain->type == IOMMU_DOMAIN_BLOCKED) 3539 return; 3540 3541 dmar_domain = to_dmar_domain(domain); 3542 spin_lock_irqsave(&dmar_domain->lock, flags); 3543 list_for_each_entry(curr, &dmar_domain->dev_pasids, link_domain) { 3544 if (curr->dev == dev && curr->pasid == pasid) { 3545 list_del(&curr->link_domain); 3546 dev_pasid = curr; 3547 break; 3548 } 3549 } 3550 spin_unlock_irqrestore(&dmar_domain->lock, flags); 3551 3552 if (WARN_ON_ONCE(!dev_pasid)) 3553 return; 3554 3555 cache_tag_unassign_domain(dmar_domain, dev, pasid); 3556 domain_detach_iommu(dmar_domain, iommu); 3557 intel_iommu_debugfs_remove_dev_pasid(dev_pasid); 3558 kfree(dev_pasid); 3559 } 3560 3561 static int blocking_domain_set_dev_pasid(struct iommu_domain *domain, 3562 struct device *dev, ioasid_t pasid, 3563 struct iommu_domain *old) 3564 { 3565 struct device_domain_info *info = dev_iommu_priv_get(dev); 3566 3567 intel_pasid_tear_down_entry(info->iommu, dev, pasid, false); 3568 iopf_for_domain_remove(old, dev); 3569 domain_remove_dev_pasid(old, dev, pasid); 3570 3571 return 0; 3572 } 3573 3574 struct dev_pasid_info * 3575 domain_add_dev_pasid(struct iommu_domain *domain, 3576 struct device *dev, ioasid_t pasid) 3577 { 3578 struct device_domain_info *info = dev_iommu_priv_get(dev); 3579 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 3580 struct intel_iommu *iommu = info->iommu; 3581 struct dev_pasid_info *dev_pasid; 3582 unsigned long flags; 3583 int ret; 3584 3585 dev_pasid = kzalloc_obj(*dev_pasid); 3586 if (!dev_pasid) 3587 return ERR_PTR(-ENOMEM); 3588 3589 ret = domain_attach_iommu(dmar_domain, iommu); 3590 if (ret) 3591 goto out_free; 3592 3593 ret = cache_tag_assign_domain(dmar_domain, dev, pasid); 3594 if (ret) 3595 goto out_detach_iommu; 3596 3597 dev_pasid->dev = dev; 3598 dev_pasid->pasid = pasid; 3599 spin_lock_irqsave(&dmar_domain->lock, flags); 3600 list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids); 3601 spin_unlock_irqrestore(&dmar_domain->lock, flags); 3602 3603 return dev_pasid; 3604 out_detach_iommu: 3605 domain_detach_iommu(dmar_domain, iommu); 3606 out_free: 3607 kfree(dev_pasid); 3608 return ERR_PTR(ret); 3609 } 3610 3611 static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, 3612 struct device *dev, ioasid_t pasid, 3613 struct iommu_domain *old) 3614 { 3615 struct device_domain_info *info = dev_iommu_priv_get(dev); 3616 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 3617 struct intel_iommu *iommu = info->iommu; 3618 struct dev_pasid_info *dev_pasid; 3619 int ret; 3620 3621 if (WARN_ON_ONCE(!(domain->type & __IOMMU_DOMAIN_PAGING))) 3622 return -EINVAL; 3623 3624 if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev)) 3625 return -EOPNOTSUPP; 3626 3627 if (context_copied(iommu, info->bus, info->devfn)) 3628 return -EBUSY; 3629 3630 ret = paging_domain_compatible(domain, dev); 3631 if (ret) 3632 return ret; 3633 3634 dev_pasid = domain_add_dev_pasid(domain, dev, pasid); 3635 if (IS_ERR(dev_pasid)) 3636 return PTR_ERR(dev_pasid); 3637 3638 ret = iopf_for_domain_replace(domain, old, dev); 3639 if (ret) 3640 goto out_remove_dev_pasid; 3641 3642 if (intel_domain_is_fs_paging(dmar_domain)) 3643 ret = domain_setup_first_level(iommu, dmar_domain, 3644 dev, pasid, old); 3645 else if (intel_domain_is_ss_paging(dmar_domain)) 3646 ret = domain_setup_second_level(iommu, dmar_domain, 3647 dev, pasid, old); 3648 else if (WARN_ON(true)) 3649 ret = -EINVAL; 3650 3651 if (ret) 3652 goto out_unwind_iopf; 3653 3654 domain_remove_dev_pasid(old, dev, pasid); 3655 3656 intel_iommu_debugfs_create_dev_pasid(dev_pasid); 3657 3658 return 0; 3659 3660 out_unwind_iopf: 3661 iopf_for_domain_replace(old, domain, dev); 3662 out_remove_dev_pasid: 3663 domain_remove_dev_pasid(domain, dev, pasid); 3664 return ret; 3665 } 3666 3667 static void *intel_iommu_hw_info(struct device *dev, u32 *length, 3668 enum iommu_hw_info_type *type) 3669 { 3670 struct device_domain_info *info = dev_iommu_priv_get(dev); 3671 struct intel_iommu *iommu = info->iommu; 3672 struct iommu_hw_info_vtd *vtd; 3673 3674 if (*type != IOMMU_HW_INFO_TYPE_DEFAULT && 3675 *type != IOMMU_HW_INFO_TYPE_INTEL_VTD) 3676 return ERR_PTR(-EOPNOTSUPP); 3677 3678 vtd = kzalloc_obj(*vtd); 3679 if (!vtd) 3680 return ERR_PTR(-ENOMEM); 3681 3682 vtd->flags = IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17; 3683 vtd->cap_reg = iommu->cap; 3684 vtd->ecap_reg = iommu->ecap; 3685 *length = sizeof(*vtd); 3686 *type = IOMMU_HW_INFO_TYPE_INTEL_VTD; 3687 return vtd; 3688 } 3689 3690 /* Set dirty tracking for the devices that the domain has been attached. */ 3691 static int domain_set_dirty_tracking(struct dmar_domain *domain, bool enable) 3692 { 3693 struct device_domain_info *info; 3694 struct dev_pasid_info *dev_pasid; 3695 int ret = 0; 3696 3697 lockdep_assert_held(&domain->lock); 3698 3699 list_for_each_entry(info, &domain->devices, link) { 3700 ret = intel_pasid_setup_dirty_tracking(info->iommu, info->dev, 3701 IOMMU_NO_PASID, enable); 3702 if (ret) 3703 return ret; 3704 } 3705 3706 list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) { 3707 info = dev_iommu_priv_get(dev_pasid->dev); 3708 ret = intel_pasid_setup_dirty_tracking(info->iommu, info->dev, 3709 dev_pasid->pasid, enable); 3710 if (ret) 3711 break; 3712 } 3713 3714 return ret; 3715 } 3716 3717 static int parent_domain_set_dirty_tracking(struct dmar_domain *domain, 3718 bool enable) 3719 { 3720 struct dmar_domain *s1_domain; 3721 unsigned long flags; 3722 int ret; 3723 3724 spin_lock(&domain->s1_lock); 3725 list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) { 3726 spin_lock_irqsave(&s1_domain->lock, flags); 3727 ret = domain_set_dirty_tracking(s1_domain, enable); 3728 spin_unlock_irqrestore(&s1_domain->lock, flags); 3729 if (ret) 3730 goto err_unwind; 3731 } 3732 spin_unlock(&domain->s1_lock); 3733 return 0; 3734 3735 err_unwind: 3736 list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) { 3737 spin_lock_irqsave(&s1_domain->lock, flags); 3738 domain_set_dirty_tracking(s1_domain, domain->dirty_tracking); 3739 spin_unlock_irqrestore(&s1_domain->lock, flags); 3740 } 3741 spin_unlock(&domain->s1_lock); 3742 return ret; 3743 } 3744 3745 static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain, 3746 bool enable) 3747 { 3748 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 3749 int ret; 3750 3751 spin_lock(&dmar_domain->lock); 3752 if (dmar_domain->dirty_tracking == enable) 3753 goto out_unlock; 3754 3755 ret = domain_set_dirty_tracking(dmar_domain, enable); 3756 if (ret) 3757 goto err_unwind; 3758 3759 if (dmar_domain->nested_parent) { 3760 ret = parent_domain_set_dirty_tracking(dmar_domain, enable); 3761 if (ret) 3762 goto err_unwind; 3763 } 3764 3765 dmar_domain->dirty_tracking = enable; 3766 out_unlock: 3767 spin_unlock(&dmar_domain->lock); 3768 3769 return 0; 3770 3771 err_unwind: 3772 domain_set_dirty_tracking(dmar_domain, dmar_domain->dirty_tracking); 3773 spin_unlock(&dmar_domain->lock); 3774 return ret; 3775 } 3776 3777 static int context_setup_pass_through(struct device *dev, u8 bus, u8 devfn) 3778 { 3779 struct device_domain_info *info = dev_iommu_priv_get(dev); 3780 struct intel_iommu *iommu = info->iommu; 3781 struct context_entry *context; 3782 3783 spin_lock(&iommu->lock); 3784 context = iommu_context_addr(iommu, bus, devfn, 1); 3785 if (!context) { 3786 spin_unlock(&iommu->lock); 3787 return -ENOMEM; 3788 } 3789 3790 if (context_present(context) && !context_copied(iommu, bus, devfn)) { 3791 spin_unlock(&iommu->lock); 3792 return 0; 3793 } 3794 3795 copied_context_tear_down(iommu, context, bus, devfn); 3796 context_clear_entry(context); 3797 context_set_domain_id(context, FLPT_DEFAULT_DID); 3798 3799 /* 3800 * In pass through mode, AW must be programmed to indicate the largest 3801 * AGAW value supported by hardware. And ASR is ignored by hardware. 3802 */ 3803 context_set_address_width(context, iommu->msagaw); 3804 context_set_translation_type(context, CONTEXT_TT_PASS_THROUGH); 3805 context_set_fault_enable(context); 3806 context_set_present(context); 3807 if (!ecap_coherent(iommu->ecap)) 3808 clflush_cache_range(context, sizeof(*context)); 3809 context_present_cache_flush(iommu, FLPT_DEFAULT_DID, bus, devfn); 3810 spin_unlock(&iommu->lock); 3811 3812 return 0; 3813 } 3814 3815 static int context_setup_pass_through_cb(struct pci_dev *pdev, u16 alias, void *data) 3816 { 3817 struct device *dev = data; 3818 3819 return context_setup_pass_through(dev, PCI_BUS_NUM(alias), alias & 0xff); 3820 } 3821 3822 static int device_setup_pass_through(struct device *dev) 3823 { 3824 struct device_domain_info *info = dev_iommu_priv_get(dev); 3825 3826 if (!dev_is_pci(dev)) 3827 return context_setup_pass_through(dev, info->bus, info->devfn); 3828 3829 return pci_for_each_dma_alias(to_pci_dev(dev), 3830 context_setup_pass_through_cb, dev); 3831 } 3832 3833 static int identity_domain_attach_dev(struct iommu_domain *domain, 3834 struct device *dev, 3835 struct iommu_domain *old) 3836 { 3837 struct device_domain_info *info = dev_iommu_priv_get(dev); 3838 struct intel_iommu *iommu = info->iommu; 3839 int ret; 3840 3841 device_block_translation(dev); 3842 3843 if (dev_is_real_dma_subdevice(dev)) 3844 return 0; 3845 3846 /* 3847 * No PRI support with the global identity domain. No need to enable or 3848 * disable PRI in this path as the iommu has been put in the blocking 3849 * state. 3850 */ 3851 if (sm_supported(iommu)) 3852 ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID); 3853 else 3854 ret = device_setup_pass_through(dev); 3855 3856 if (!ret) 3857 info->domain_attached = true; 3858 3859 return ret; 3860 } 3861 3862 static int identity_domain_set_dev_pasid(struct iommu_domain *domain, 3863 struct device *dev, ioasid_t pasid, 3864 struct iommu_domain *old) 3865 { 3866 struct device_domain_info *info = dev_iommu_priv_get(dev); 3867 struct intel_iommu *iommu = info->iommu; 3868 int ret; 3869 3870 if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev)) 3871 return -EOPNOTSUPP; 3872 3873 ret = iopf_for_domain_replace(domain, old, dev); 3874 if (ret) 3875 return ret; 3876 3877 ret = domain_setup_passthrough(iommu, dev, pasid, old); 3878 if (ret) { 3879 iopf_for_domain_replace(old, domain, dev); 3880 return ret; 3881 } 3882 3883 domain_remove_dev_pasid(old, dev, pasid); 3884 return 0; 3885 } 3886 3887 static struct iommu_domain identity_domain = { 3888 .type = IOMMU_DOMAIN_IDENTITY, 3889 .ops = &(const struct iommu_domain_ops) { 3890 .attach_dev = identity_domain_attach_dev, 3891 .set_dev_pasid = identity_domain_set_dev_pasid, 3892 }, 3893 }; 3894 3895 const struct iommu_domain_ops intel_fs_paging_domain_ops = { 3896 IOMMU_PT_DOMAIN_OPS(x86_64), 3897 .attach_dev = intel_iommu_attach_device, 3898 .set_dev_pasid = intel_iommu_set_dev_pasid, 3899 .iotlb_sync_map = intel_iommu_iotlb_sync_map, 3900 .flush_iotlb_all = intel_flush_iotlb_all, 3901 .iotlb_sync = intel_iommu_tlb_sync, 3902 .free = intel_iommu_domain_free, 3903 .enforce_cache_coherency = intel_iommu_enforce_cache_coherency_fs, 3904 }; 3905 3906 const struct iommu_domain_ops intel_ss_paging_domain_ops = { 3907 IOMMU_PT_DOMAIN_OPS(vtdss), 3908 .attach_dev = intel_iommu_attach_device, 3909 .set_dev_pasid = intel_iommu_set_dev_pasid, 3910 .iotlb_sync_map = intel_iommu_iotlb_sync_map, 3911 .flush_iotlb_all = intel_flush_iotlb_all, 3912 .iotlb_sync = intel_iommu_tlb_sync, 3913 .free = intel_iommu_domain_free, 3914 .enforce_cache_coherency = intel_iommu_enforce_cache_coherency_ss, 3915 }; 3916 3917 const struct iommu_ops intel_iommu_ops = { 3918 .blocked_domain = &blocking_domain, 3919 .release_domain = &blocking_domain, 3920 .identity_domain = &identity_domain, 3921 .capable = intel_iommu_capable, 3922 .hw_info = intel_iommu_hw_info, 3923 .domain_alloc_paging_flags = intel_iommu_domain_alloc_paging_flags, 3924 .domain_alloc_sva = intel_svm_domain_alloc, 3925 .domain_alloc_nested = intel_iommu_domain_alloc_nested, 3926 .probe_device = intel_iommu_probe_device, 3927 .probe_finalize = intel_iommu_probe_finalize, 3928 .release_device = intel_iommu_release_device, 3929 .get_resv_regions = intel_iommu_get_resv_regions, 3930 .device_group = intel_iommu_device_group, 3931 .is_attach_deferred = intel_iommu_is_attach_deferred, 3932 .def_domain_type = device_def_domain_type, 3933 .page_response = intel_iommu_page_response, 3934 }; 3935 3936 static void quirk_iommu_igfx(struct pci_dev *dev) 3937 { 3938 if (risky_device(dev)) 3939 return; 3940 3941 pci_info(dev, "Disabling IOMMU for graphics on this chipset\n"); 3942 disable_igfx_iommu = 1; 3943 } 3944 3945 /* Q35 integrated gfx dmar support is totally busted. */ 3946 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x29b2, quirk_iommu_igfx); 3947 3948 /* G4x/GM45 integrated gfx dmar support is totally busted. */ 3949 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx); 3950 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx); 3951 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx); 3952 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx); 3953 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx); 3954 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx); 3955 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx); 3956 3957 /* QM57/QS57 integrated gfx malfunctions with dmar */ 3958 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_iommu_igfx); 3959 3960 /* Broadwell igfx malfunctions with dmar */ 3961 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx); 3962 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx); 3963 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx); 3964 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx); 3965 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx); 3966 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx); 3967 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx); 3968 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx); 3969 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx); 3970 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx); 3971 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx); 3972 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx); 3973 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx); 3974 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx); 3975 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx); 3976 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx); 3977 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx); 3978 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx); 3979 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx); 3980 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx); 3981 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx); 3982 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx); 3983 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx); 3984 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx); 3985 3986 static void quirk_iommu_rwbf(struct pci_dev *dev) 3987 { 3988 if (risky_device(dev)) 3989 return; 3990 3991 /* 3992 * Mobile 4 Series Chipset neglects to set RWBF capability, 3993 * but needs it. Same seems to hold for the desktop versions. 3994 */ 3995 pci_info(dev, "Forcing write-buffer flush capability\n"); 3996 rwbf_quirk = 1; 3997 } 3998 3999 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf); 4000 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf); 4001 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf); 4002 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf); 4003 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf); 4004 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf); 4005 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf); 4006 4007 #define GGC 0x52 4008 #define GGC_MEMORY_SIZE_MASK (0xf << 8) 4009 #define GGC_MEMORY_SIZE_NONE (0x0 << 8) 4010 #define GGC_MEMORY_SIZE_1M (0x1 << 8) 4011 #define GGC_MEMORY_SIZE_2M (0x3 << 8) 4012 #define GGC_MEMORY_VT_ENABLED (0x8 << 8) 4013 #define GGC_MEMORY_SIZE_2M_VT (0x9 << 8) 4014 #define GGC_MEMORY_SIZE_3M_VT (0xa << 8) 4015 #define GGC_MEMORY_SIZE_4M_VT (0xb << 8) 4016 4017 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev) 4018 { 4019 unsigned short ggc; 4020 4021 if (risky_device(dev)) 4022 return; 4023 4024 if (pci_read_config_word(dev, GGC, &ggc)) 4025 return; 4026 4027 if (!(ggc & GGC_MEMORY_VT_ENABLED)) { 4028 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n"); 4029 disable_igfx_iommu = 1; 4030 } else if (!disable_igfx_iommu) { 4031 /* we have to ensure the gfx device is idle before we flush */ 4032 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n"); 4033 iommu_set_dma_strict(); 4034 } 4035 } 4036 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt); 4037 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt); 4038 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt); 4039 4040 static void quirk_igfx_skip_te_disable(struct pci_dev *dev) 4041 { 4042 unsigned short ver; 4043 4044 if (!IS_GFX_DEVICE(dev)) 4045 return; 4046 4047 ver = (dev->device >> 8) & 0xff; 4048 if (ver != 0x45 && ver != 0x46 && ver != 0x4c && 4049 ver != 0x4e && ver != 0x8a && ver != 0x98 && 4050 ver != 0x9a && ver != 0xa7 && ver != 0x7d) 4051 return; 4052 4053 if (risky_device(dev)) 4054 return; 4055 4056 pci_info(dev, "Skip IOMMU disabling for graphics\n"); 4057 iommu_skip_te_disable = 1; 4058 } 4059 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_igfx_skip_te_disable); 4060 4061 /* On Tylersburg chipsets, some BIOSes have been known to enable the 4062 ISOCH DMAR unit for the Azalia sound device, but not give it any 4063 TLB entries, which causes it to deadlock. Check for that. We do 4064 this in a function called from init_dmars(), instead of in a PCI 4065 quirk, because we don't want to print the obnoxious "BIOS broken" 4066 message if VT-d is actually disabled. 4067 */ 4068 static void __init check_tylersburg_isoch(void) 4069 { 4070 struct pci_dev *pdev; 4071 uint32_t vtisochctrl; 4072 4073 /* If there's no Azalia in the system anyway, forget it. */ 4074 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL); 4075 if (!pdev) 4076 return; 4077 4078 if (risky_device(pdev)) { 4079 pci_dev_put(pdev); 4080 return; 4081 } 4082 4083 pci_dev_put(pdev); 4084 4085 /* System Management Registers. Might be hidden, in which case 4086 we can't do the sanity check. But that's OK, because the 4087 known-broken BIOSes _don't_ actually hide it, so far. */ 4088 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL); 4089 if (!pdev) 4090 return; 4091 4092 if (risky_device(pdev)) { 4093 pci_dev_put(pdev); 4094 return; 4095 } 4096 4097 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) { 4098 pci_dev_put(pdev); 4099 return; 4100 } 4101 4102 pci_dev_put(pdev); 4103 4104 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */ 4105 if (vtisochctrl & 1) 4106 return; 4107 4108 /* Drop all bits other than the number of TLB entries */ 4109 vtisochctrl &= 0x1c; 4110 4111 /* If we have the recommended number of TLB entries (16), fine. */ 4112 if (vtisochctrl == 0x10) 4113 return; 4114 4115 /* Zero TLB entries? You get to ride the short bus to school. */ 4116 if (!vtisochctrl) { 4117 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n" 4118 "BIOS vendor: %s; Ver: %s; Product Version: %s\n", 4119 dmi_get_system_info(DMI_BIOS_VENDOR), 4120 dmi_get_system_info(DMI_BIOS_VERSION), 4121 dmi_get_system_info(DMI_PRODUCT_VERSION)); 4122 iommu_identity_mapping |= IDENTMAP_AZALIA; 4123 return; 4124 } 4125 4126 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n", 4127 vtisochctrl); 4128 } 4129 4130 /* 4131 * Here we deal with a device TLB defect where device may inadvertently issue ATS 4132 * invalidation completion before posted writes initiated with translated address 4133 * that utilized translations matching the invalidation address range, violating 4134 * the invalidation completion ordering. 4135 * Therefore, any use cases that cannot guarantee DMA is stopped before unmap is 4136 * vulnerable to this defect. In other words, any dTLB invalidation initiated not 4137 * under the control of the trusted/privileged host device driver must use this 4138 * quirk. 4139 * Device TLBs are invalidated under the following six conditions: 4140 * 1. Device driver does DMA API unmap IOVA 4141 * 2. Device driver unbind a PASID from a process, sva_unbind_device() 4142 * 3. PASID is torn down, after PASID cache is flushed. e.g. process 4143 * exit_mmap() due to crash 4144 * 4. Under SVA usage, called by mmu_notifier.invalidate_range() where 4145 * VM has to free pages that were unmapped 4146 * 5. Userspace driver unmaps a DMA buffer 4147 * 6. Cache invalidation in vSVA usage (upcoming) 4148 * 4149 * For #1 and #2, device drivers are responsible for stopping DMA traffic 4150 * before unmap/unbind. For #3, iommu driver gets mmu_notifier to 4151 * invalidate TLB the same way as normal user unmap which will use this quirk. 4152 * The dTLB invalidation after PASID cache flush does not need this quirk. 4153 * 4154 * As a reminder, #6 will *NEED* this quirk as we enable nested translation. 4155 */ 4156 void quirk_extra_dev_tlb_flush(struct device_domain_info *info, 4157 unsigned long address, unsigned long mask, 4158 u32 pasid, u16 qdep) 4159 { 4160 u16 sid; 4161 4162 if (likely(!info->dtlb_extra_inval)) 4163 return; 4164 4165 sid = PCI_DEVID(info->bus, info->devfn); 4166 if (pasid == IOMMU_NO_PASID) { 4167 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, 4168 qdep, address, mask); 4169 } else { 4170 qi_flush_dev_iotlb_pasid(info->iommu, sid, info->pfsid, 4171 pasid, qdep, address, mask); 4172 } 4173 } 4174 4175 #define ecmd_get_status_code(res) (((res) & 0xff) >> 1) 4176 4177 /* 4178 * Function to submit a command to the enhanced command interface. The 4179 * valid enhanced command descriptions are defined in Table 47 of the 4180 * VT-d spec. The VT-d hardware implementation may support some but not 4181 * all commands, which can be determined by checking the Enhanced 4182 * Command Capability Register. 4183 * 4184 * Return values: 4185 * - 0: Command successful without any error; 4186 * - Negative: software error value; 4187 * - Nonzero positive: failure status code defined in Table 48. 4188 */ 4189 int ecmd_submit_sync(struct intel_iommu *iommu, u8 ecmd, u64 oa, u64 ob) 4190 { 4191 unsigned long flags; 4192 u64 res; 4193 int ret; 4194 4195 if (!cap_ecmds(iommu->cap)) 4196 return -ENODEV; 4197 4198 raw_spin_lock_irqsave(&iommu->register_lock, flags); 4199 4200 res = readq(iommu->reg + DMAR_ECRSP_REG); 4201 if (res & DMA_ECMD_ECRSP_IP) { 4202 ret = -EBUSY; 4203 goto err; 4204 } 4205 4206 /* 4207 * Unconditionally write the operand B, because 4208 * - There is no side effect if an ecmd doesn't require an 4209 * operand B, but we set the register to some value. 4210 * - It's not invoked in any critical path. The extra MMIO 4211 * write doesn't bring any performance concerns. 4212 */ 4213 writeq(ob, iommu->reg + DMAR_ECEO_REG); 4214 writeq(ecmd | (oa << DMA_ECMD_OA_SHIFT), iommu->reg + DMAR_ECMD_REG); 4215 4216 IOMMU_WAIT_OP(iommu, DMAR_ECRSP_REG, readq, 4217 !(res & DMA_ECMD_ECRSP_IP), res); 4218 4219 if (res & DMA_ECMD_ECRSP_IP) { 4220 ret = -ETIMEDOUT; 4221 goto err; 4222 } 4223 4224 ret = ecmd_get_status_code(res); 4225 err: 4226 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 4227 4228 return ret; 4229 } 4230 4231 MODULE_IMPORT_NS("GENERIC_PT_IOMMU"); 4232