1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * intel-pasid.c - PASID idr, table and entry manipulation 4 * 5 * Copyright (C) 2018 Intel Corporation 6 * 7 * Author: Lu Baolu <baolu.lu@linux.intel.com> 8 */ 9 10 #define pr_fmt(fmt) "DMAR: " fmt 11 12 #include <linux/bitops.h> 13 #include <linux/cpufeature.h> 14 #include <linux/dmar.h> 15 #include <linux/iommu.h> 16 #include <linux/memory.h> 17 #include <linux/pci.h> 18 #include <linux/pci-ats.h> 19 #include <linux/spinlock.h> 20 21 #include "iommu.h" 22 #include "pasid.h" 23 #include "../iommu-pages.h" 24 25 /* 26 * Intel IOMMU system wide PASID name space: 27 */ 28 u32 intel_pasid_max_id = PASID_MAX; 29 30 /* 31 * Per device pasid table management: 32 */ 33 34 /* 35 * Allocate a pasid table for @dev. It should be called in a 36 * single-thread context. 37 */ 38 int intel_pasid_alloc_table(struct device *dev) 39 { 40 struct device_domain_info *info; 41 struct pasid_table *pasid_table; 42 struct pasid_dir_entry *dir; 43 u32 max_pasid = 0; 44 int order, size; 45 46 might_sleep(); 47 info = dev_iommu_priv_get(dev); 48 if (WARN_ON(!info || !dev_is_pci(dev))) 49 return -ENODEV; 50 if (WARN_ON(info->pasid_table)) 51 return -EEXIST; 52 53 pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL); 54 if (!pasid_table) 55 return -ENOMEM; 56 57 if (info->pasid_supported) 58 max_pasid = min_t(u32, pci_max_pasids(to_pci_dev(dev)), 59 intel_pasid_max_id); 60 61 size = max_pasid >> (PASID_PDE_SHIFT - 3); 62 order = size ? get_order(size) : 0; 63 dir = iommu_alloc_pages_node_sz(info->iommu->node, GFP_KERNEL, 64 1 << (order + PAGE_SHIFT)); 65 if (!dir) { 66 kfree(pasid_table); 67 return -ENOMEM; 68 } 69 70 pasid_table->table = dir; 71 pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3); 72 info->pasid_table = pasid_table; 73 74 if (!ecap_coherent(info->iommu->ecap)) 75 clflush_cache_range(pasid_table->table, (1 << order) * PAGE_SIZE); 76 77 return 0; 78 } 79 80 void intel_pasid_free_table(struct device *dev) 81 { 82 struct device_domain_info *info; 83 struct pasid_table *pasid_table; 84 struct pasid_dir_entry *dir; 85 struct pasid_entry *table; 86 int i, max_pde; 87 88 info = dev_iommu_priv_get(dev); 89 if (!info || !dev_is_pci(dev) || !info->pasid_table) 90 return; 91 92 pasid_table = info->pasid_table; 93 info->pasid_table = NULL; 94 95 /* Free scalable mode PASID directory tables: */ 96 dir = pasid_table->table; 97 max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT; 98 for (i = 0; i < max_pde; i++) { 99 table = get_pasid_table_from_pde(&dir[i]); 100 iommu_free_pages(table); 101 } 102 103 iommu_free_pages(pasid_table->table); 104 kfree(pasid_table); 105 } 106 107 struct pasid_table *intel_pasid_get_table(struct device *dev) 108 { 109 struct device_domain_info *info; 110 111 info = dev_iommu_priv_get(dev); 112 if (!info) 113 return NULL; 114 115 return info->pasid_table; 116 } 117 118 static int intel_pasid_get_dev_max_id(struct device *dev) 119 { 120 struct device_domain_info *info; 121 122 info = dev_iommu_priv_get(dev); 123 if (!info || !info->pasid_table) 124 return 0; 125 126 return info->pasid_table->max_pasid; 127 } 128 129 static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) 130 { 131 struct device_domain_info *info; 132 struct pasid_table *pasid_table; 133 struct pasid_dir_entry *dir; 134 struct pasid_entry *entries; 135 int dir_index, index; 136 137 pasid_table = intel_pasid_get_table(dev); 138 if (WARN_ON(!pasid_table || pasid >= intel_pasid_get_dev_max_id(dev))) 139 return NULL; 140 141 dir = pasid_table->table; 142 info = dev_iommu_priv_get(dev); 143 dir_index = pasid >> PASID_PDE_SHIFT; 144 index = pasid & PASID_PTE_MASK; 145 146 retry: 147 entries = get_pasid_table_from_pde(&dir[dir_index]); 148 if (!entries) { 149 u64 tmp; 150 151 entries = iommu_alloc_pages_node_sz(info->iommu->node, 152 GFP_ATOMIC, SZ_4K); 153 if (!entries) 154 return NULL; 155 156 if (!ecap_coherent(info->iommu->ecap)) 157 clflush_cache_range(entries, VTD_PAGE_SIZE); 158 159 /* 160 * The pasid directory table entry won't be freed after 161 * allocation. No worry about the race with free and 162 * clear. However, this entry might be populated by others 163 * while we are preparing it. Use theirs with a retry. 164 */ 165 tmp = 0ULL; 166 if (!try_cmpxchg64(&dir[dir_index].val, &tmp, 167 (u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) { 168 iommu_free_pages(entries); 169 goto retry; 170 } 171 if (!ecap_coherent(info->iommu->ecap)) 172 clflush_cache_range(&dir[dir_index].val, sizeof(*dir)); 173 } 174 175 return &entries[index]; 176 } 177 178 /* 179 * Interfaces for PASID table entry manipulation: 180 */ 181 static void 182 intel_pasid_clear_entry(struct device *dev, u32 pasid, bool fault_ignore) 183 { 184 struct pasid_entry *pe; 185 186 pe = intel_pasid_get_entry(dev, pasid); 187 if (WARN_ON(!pe)) 188 return; 189 190 if (fault_ignore && pasid_pte_is_present(pe)) 191 pasid_clear_entry_with_fpd(pe); 192 else 193 pasid_clear_entry(pe); 194 } 195 196 static void 197 pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu, 198 u16 did, u32 pasid) 199 { 200 struct qi_desc desc; 201 202 desc.qw0 = QI_PC_DID(did) | QI_PC_GRAN(QI_PC_PASID_SEL) | 203 QI_PC_PASID(pasid) | QI_PC_TYPE; 204 desc.qw1 = 0; 205 desc.qw2 = 0; 206 desc.qw3 = 0; 207 208 qi_submit_sync(iommu, &desc, 1, 0); 209 } 210 211 static void 212 devtlb_invalidation_with_pasid(struct intel_iommu *iommu, 213 struct device *dev, u32 pasid) 214 { 215 struct device_domain_info *info; 216 u16 sid, qdep, pfsid; 217 218 info = dev_iommu_priv_get(dev); 219 if (!info || !info->ats_enabled) 220 return; 221 222 if (!pci_device_is_present(to_pci_dev(dev))) 223 return; 224 225 sid = PCI_DEVID(info->bus, info->devfn); 226 qdep = info->ats_qdep; 227 pfsid = info->pfsid; 228 229 /* 230 * When PASID 0 is used, it indicates RID2PASID(DMA request w/o PASID), 231 * devTLB flush w/o PASID should be used. For non-zero PASID under 232 * SVA usage, device could do DMA with multiple PASIDs. It is more 233 * efficient to flush devTLB specific to the PASID. 234 */ 235 if (pasid == IOMMU_NO_PASID) 236 qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT); 237 else 238 qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, pasid, qdep, 0, 64 - VTD_PAGE_SHIFT); 239 } 240 241 void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, 242 u32 pasid, bool fault_ignore) 243 { 244 struct pasid_entry *pte; 245 u16 did, pgtt; 246 247 spin_lock(&iommu->lock); 248 pte = intel_pasid_get_entry(dev, pasid); 249 if (WARN_ON(!pte)) { 250 spin_unlock(&iommu->lock); 251 return; 252 } 253 254 if (!pasid_pte_is_present(pte)) { 255 if (!pasid_pte_is_fault_disabled(pte)) { 256 WARN_ON(READ_ONCE(pte->val[0]) != 0); 257 spin_unlock(&iommu->lock); 258 return; 259 } 260 261 /* 262 * When a PASID is used for SVA by a device, it's possible 263 * that the pasid entry is non-present with the Fault 264 * Processing Disabled bit set. Clear the pasid entry and 265 * drain the PRQ for the PASID before return. 266 */ 267 pasid_clear_entry(pte); 268 spin_unlock(&iommu->lock); 269 intel_iommu_drain_pasid_prq(dev, pasid); 270 271 return; 272 } 273 274 did = pasid_get_domain_id(pte); 275 pgtt = pasid_pte_get_pgtt(pte); 276 pasid_clear_present(pte); 277 spin_unlock(&iommu->lock); 278 279 if (!ecap_coherent(iommu->ecap)) 280 clflush_cache_range(pte, sizeof(*pte)); 281 282 pasid_cache_invalidation_with_pasid(iommu, did, pasid); 283 284 if (pgtt == PASID_ENTRY_PGTT_PT || pgtt == PASID_ENTRY_PGTT_FL_ONLY) 285 qi_flush_piotlb(iommu, did, pasid, 0, -1, 0); 286 else 287 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); 288 289 devtlb_invalidation_with_pasid(iommu, dev, pasid); 290 intel_pasid_clear_entry(dev, pasid, fault_ignore); 291 if (!ecap_coherent(iommu->ecap)) 292 clflush_cache_range(pte, sizeof(*pte)); 293 294 if (!fault_ignore) 295 intel_iommu_drain_pasid_prq(dev, pasid); 296 } 297 298 /* 299 * This function flushes cache for a newly setup pasid table entry. 300 * Caller of it should not modify the in-use pasid table entries. 301 */ 302 static void pasid_flush_caches(struct intel_iommu *iommu, 303 struct pasid_entry *pte, 304 u32 pasid, u16 did) 305 { 306 if (!ecap_coherent(iommu->ecap)) 307 clflush_cache_range(pte, sizeof(*pte)); 308 309 if (cap_caching_mode(iommu->cap)) { 310 pasid_cache_invalidation_with_pasid(iommu, did, pasid); 311 qi_flush_piotlb(iommu, did, pasid, 0, -1, 0); 312 } else { 313 iommu_flush_write_buffer(iommu); 314 } 315 } 316 317 /* 318 * This function is supposed to be used after caller updates the fields 319 * except for the SSADE and P bit of a pasid table entry. It does the 320 * below: 321 * - Flush cacheline if needed 322 * - Flush the caches per Table 28 ”Guidance to Software for Invalidations“ 323 * of VT-d spec 5.0. 324 */ 325 static void intel_pasid_flush_present(struct intel_iommu *iommu, 326 struct device *dev, 327 u32 pasid, u16 did, 328 struct pasid_entry *pte) 329 { 330 if (!ecap_coherent(iommu->ecap)) 331 clflush_cache_range(pte, sizeof(*pte)); 332 333 /* 334 * VT-d spec 5.0 table28 states guides for cache invalidation: 335 * 336 * - PASID-selective-within-Domain PASID-cache invalidation 337 * - PASID-selective PASID-based IOTLB invalidation 338 * - If (pasid is RID_PASID) 339 * - Global Device-TLB invalidation to affected functions 340 * Else 341 * - PASID-based Device-TLB invalidation (with S=1 and 342 * Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions 343 */ 344 pasid_cache_invalidation_with_pasid(iommu, did, pasid); 345 qi_flush_piotlb(iommu, did, pasid, 0, -1, 0); 346 347 devtlb_invalidation_with_pasid(iommu, dev, pasid); 348 } 349 350 /* 351 * Set up the scalable mode pasid table entry for first only 352 * translation type. 353 */ 354 static void pasid_pte_config_first_level(struct intel_iommu *iommu, 355 struct pasid_entry *pte, 356 phys_addr_t fsptptr, u16 did, 357 int flags) 358 { 359 lockdep_assert_held(&iommu->lock); 360 361 pasid_clear_entry(pte); 362 363 /* Setup the first level page table pointer: */ 364 pasid_set_flptr(pte, fsptptr); 365 366 if (flags & PASID_FLAG_FL5LP) 367 pasid_set_flpm(pte, 1); 368 369 if (flags & PASID_FLAG_PAGE_SNOOP) 370 pasid_set_pgsnp(pte); 371 372 pasid_set_domain_id(pte, did); 373 pasid_set_address_width(pte, iommu->agaw); 374 pasid_set_page_snoop(pte, flags & PASID_FLAG_PWSNP); 375 376 /* Setup Present and PASID Granular Transfer Type: */ 377 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY); 378 pasid_set_present(pte); 379 } 380 381 int intel_pasid_setup_first_level(struct intel_iommu *iommu, struct device *dev, 382 phys_addr_t fsptptr, u32 pasid, u16 did, 383 int flags) 384 { 385 struct pasid_entry *pte; 386 387 if (!ecap_flts(iommu->ecap)) { 388 pr_err("No first level translation support on %s\n", 389 iommu->name); 390 return -EINVAL; 391 } 392 393 if ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)) { 394 pr_err("No 5-level paging support for first-level on %s\n", 395 iommu->name); 396 return -EINVAL; 397 } 398 399 spin_lock(&iommu->lock); 400 pte = intel_pasid_get_entry(dev, pasid); 401 if (!pte) { 402 spin_unlock(&iommu->lock); 403 return -ENODEV; 404 } 405 406 if (pasid_pte_is_present(pte)) { 407 spin_unlock(&iommu->lock); 408 return -EBUSY; 409 } 410 411 pasid_pte_config_first_level(iommu, pte, fsptptr, did, flags); 412 413 spin_unlock(&iommu->lock); 414 415 pasid_flush_caches(iommu, pte, pasid, did); 416 417 return 0; 418 } 419 420 /* 421 * Set up the scalable mode pasid entry for second only translation type. 422 */ 423 static void pasid_pte_config_second_level(struct intel_iommu *iommu, 424 struct pasid_entry *pte, 425 struct dmar_domain *domain, u16 did) 426 { 427 struct pt_iommu_vtdss_hw_info pt_info; 428 429 lockdep_assert_held(&iommu->lock); 430 431 pt_iommu_vtdss_hw_info(&domain->sspt, &pt_info); 432 pasid_clear_entry(pte); 433 pasid_set_domain_id(pte, did); 434 pasid_set_slptr(pte, pt_info.ssptptr); 435 pasid_set_address_width(pte, pt_info.aw); 436 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY); 437 pasid_set_fault_enable(pte); 438 pasid_set_page_snoop(pte, !(domain->sspt.vtdss_pt.common.features & 439 BIT(PT_FEAT_DMA_INCOHERENT))); 440 if (domain->dirty_tracking) 441 pasid_set_ssade(pte); 442 443 pasid_set_present(pte); 444 } 445 446 int intel_pasid_setup_second_level(struct intel_iommu *iommu, 447 struct dmar_domain *domain, 448 struct device *dev, u32 pasid) 449 { 450 struct pasid_entry *pte; 451 u16 did; 452 453 454 /* 455 * If hardware advertises no support for second level 456 * translation, return directly. 457 */ 458 if (!ecap_slts(iommu->ecap)) { 459 pr_err("No second level translation support on %s\n", 460 iommu->name); 461 return -EINVAL; 462 } 463 464 did = domain_id_iommu(domain, iommu); 465 466 spin_lock(&iommu->lock); 467 pte = intel_pasid_get_entry(dev, pasid); 468 if (!pte) { 469 spin_unlock(&iommu->lock); 470 return -ENODEV; 471 } 472 473 if (pasid_pte_is_present(pte)) { 474 spin_unlock(&iommu->lock); 475 return -EBUSY; 476 } 477 478 pasid_pte_config_second_level(iommu, pte, domain, did); 479 spin_unlock(&iommu->lock); 480 481 pasid_flush_caches(iommu, pte, pasid, did); 482 483 return 0; 484 } 485 486 /* 487 * Set up dirty tracking on a second only or nested translation type. 488 */ 489 int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu, 490 struct device *dev, u32 pasid, 491 bool enabled) 492 { 493 struct pasid_entry *pte; 494 u16 did, pgtt; 495 496 spin_lock(&iommu->lock); 497 498 pte = intel_pasid_get_entry(dev, pasid); 499 if (!pte) { 500 spin_unlock(&iommu->lock); 501 dev_err_ratelimited( 502 dev, "Failed to get pasid entry of PASID %d\n", pasid); 503 return -ENODEV; 504 } 505 506 did = pasid_get_domain_id(pte); 507 pgtt = pasid_pte_get_pgtt(pte); 508 if (pgtt != PASID_ENTRY_PGTT_SL_ONLY && 509 pgtt != PASID_ENTRY_PGTT_NESTED) { 510 spin_unlock(&iommu->lock); 511 dev_err_ratelimited( 512 dev, 513 "Dirty tracking not supported on translation type %d\n", 514 pgtt); 515 return -EOPNOTSUPP; 516 } 517 518 if (pasid_get_ssade(pte) == enabled) { 519 spin_unlock(&iommu->lock); 520 return 0; 521 } 522 523 if (enabled) 524 pasid_set_ssade(pte); 525 else 526 pasid_clear_ssade(pte); 527 spin_unlock(&iommu->lock); 528 529 if (!ecap_coherent(iommu->ecap)) 530 clflush_cache_range(pte, sizeof(*pte)); 531 532 /* 533 * From VT-d spec table 25 "Guidance to Software for Invalidations": 534 * 535 * - PASID-selective-within-Domain PASID-cache invalidation 536 * If (PGTT=SS or Nested) 537 * - Domain-selective IOTLB invalidation 538 * Else 539 * - PASID-selective PASID-based IOTLB invalidation 540 * - If (pasid is RID_PASID) 541 * - Global Device-TLB invalidation to affected functions 542 * Else 543 * - PASID-based Device-TLB invalidation (with S=1 and 544 * Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions 545 */ 546 pasid_cache_invalidation_with_pasid(iommu, did, pasid); 547 548 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); 549 550 devtlb_invalidation_with_pasid(iommu, dev, pasid); 551 552 return 0; 553 } 554 555 /* 556 * Set up the scalable mode pasid entry for passthrough translation type. 557 */ 558 static void pasid_pte_config_pass_through(struct intel_iommu *iommu, 559 struct pasid_entry *pte, u16 did) 560 { 561 lockdep_assert_held(&iommu->lock); 562 563 pasid_clear_entry(pte); 564 pasid_set_domain_id(pte, did); 565 pasid_set_address_width(pte, iommu->agaw); 566 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_PT); 567 pasid_set_fault_enable(pte); 568 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 569 pasid_set_present(pte); 570 } 571 572 int intel_pasid_setup_pass_through(struct intel_iommu *iommu, 573 struct device *dev, u32 pasid) 574 { 575 u16 did = FLPT_DEFAULT_DID; 576 struct pasid_entry *pte; 577 578 spin_lock(&iommu->lock); 579 pte = intel_pasid_get_entry(dev, pasid); 580 if (!pte) { 581 spin_unlock(&iommu->lock); 582 return -ENODEV; 583 } 584 585 if (pasid_pte_is_present(pte)) { 586 spin_unlock(&iommu->lock); 587 return -EBUSY; 588 } 589 590 pasid_pte_config_pass_through(iommu, pte, did); 591 spin_unlock(&iommu->lock); 592 593 pasid_flush_caches(iommu, pte, pasid, did); 594 595 return 0; 596 } 597 598 /* 599 * Set the page snoop control for a pasid entry which has been set up. 600 */ 601 void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu, 602 struct device *dev, u32 pasid) 603 { 604 struct pasid_entry *pte; 605 u16 did; 606 607 spin_lock(&iommu->lock); 608 pte = intel_pasid_get_entry(dev, pasid); 609 if (WARN_ON(!pte || !pasid_pte_is_present(pte))) { 610 spin_unlock(&iommu->lock); 611 return; 612 } 613 614 pasid_set_pgsnp(pte); 615 did = pasid_get_domain_id(pte); 616 spin_unlock(&iommu->lock); 617 618 intel_pasid_flush_present(iommu, dev, pasid, did, pte); 619 } 620 621 static void pasid_pte_config_nestd(struct intel_iommu *iommu, 622 struct pasid_entry *pte, 623 struct iommu_hwpt_vtd_s1 *s1_cfg, 624 struct dmar_domain *s2_domain, 625 u16 did) 626 { 627 struct pt_iommu_vtdss_hw_info pt_info; 628 629 lockdep_assert_held(&iommu->lock); 630 631 pt_iommu_vtdss_hw_info(&s2_domain->sspt, &pt_info); 632 633 pasid_clear_entry(pte); 634 635 if (s1_cfg->addr_width == ADDR_WIDTH_5LEVEL) 636 pasid_set_flpm(pte, 1); 637 638 pasid_set_flptr(pte, s1_cfg->pgtbl_addr); 639 640 if (s1_cfg->flags & IOMMU_VTD_S1_SRE) { 641 pasid_set_sre(pte); 642 if (s1_cfg->flags & IOMMU_VTD_S1_WPE) 643 pasid_set_wpe(pte); 644 } 645 646 if (s1_cfg->flags & IOMMU_VTD_S1_EAFE) 647 pasid_set_eafe(pte); 648 649 if (s2_domain->force_snooping) 650 pasid_set_pgsnp(pte); 651 652 pasid_set_slptr(pte, pt_info.ssptptr); 653 pasid_set_fault_enable(pte); 654 pasid_set_domain_id(pte, did); 655 pasid_set_address_width(pte, pt_info.aw); 656 pasid_set_page_snoop(pte, !(s2_domain->sspt.vtdss_pt.common.features & 657 BIT(PT_FEAT_DMA_INCOHERENT))); 658 if (s2_domain->dirty_tracking) 659 pasid_set_ssade(pte); 660 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED); 661 pasid_set_present(pte); 662 } 663 664 /** 665 * intel_pasid_setup_nested() - Set up PASID entry for nested translation. 666 * @iommu: IOMMU which the device belong to 667 * @dev: Device to be set up for translation 668 * @pasid: PASID to be programmed in the device PASID table 669 * @domain: User stage-1 domain nested on a stage-2 domain 670 * 671 * This is used for nested translation. The input domain should be 672 * nested type and nested on a parent with 'is_nested_parent' flag 673 * set. 674 */ 675 int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev, 676 u32 pasid, struct dmar_domain *domain) 677 { 678 struct iommu_hwpt_vtd_s1 *s1_cfg = &domain->s1_cfg; 679 struct dmar_domain *s2_domain = domain->s2_domain; 680 u16 did = domain_id_iommu(domain, iommu); 681 struct pasid_entry *pte; 682 683 /* Address width should match the address width supported by hardware */ 684 switch (s1_cfg->addr_width) { 685 case ADDR_WIDTH_4LEVEL: 686 break; 687 case ADDR_WIDTH_5LEVEL: 688 if (!cap_fl5lp_support(iommu->cap)) { 689 dev_err_ratelimited(dev, 690 "5-level paging not supported\n"); 691 return -EINVAL; 692 } 693 break; 694 default: 695 dev_err_ratelimited(dev, "Invalid stage-1 address width %d\n", 696 s1_cfg->addr_width); 697 return -EINVAL; 698 } 699 700 if ((s1_cfg->flags & IOMMU_VTD_S1_SRE) && !ecap_srs(iommu->ecap)) { 701 pr_err_ratelimited("No supervisor request support on %s\n", 702 iommu->name); 703 return -EINVAL; 704 } 705 706 if ((s1_cfg->flags & IOMMU_VTD_S1_EAFE) && !ecap_eafs(iommu->ecap)) { 707 pr_err_ratelimited("No extended access flag support on %s\n", 708 iommu->name); 709 return -EINVAL; 710 } 711 712 spin_lock(&iommu->lock); 713 pte = intel_pasid_get_entry(dev, pasid); 714 if (!pte) { 715 spin_unlock(&iommu->lock); 716 return -ENODEV; 717 } 718 if (pasid_pte_is_present(pte)) { 719 spin_unlock(&iommu->lock); 720 return -EBUSY; 721 } 722 723 pasid_pte_config_nestd(iommu, pte, s1_cfg, s2_domain, did); 724 spin_unlock(&iommu->lock); 725 726 pasid_flush_caches(iommu, pte, pasid, did); 727 728 return 0; 729 } 730 731 /* 732 * Interfaces to setup or teardown a pasid table to the scalable-mode 733 * context table entry: 734 */ 735 736 static void device_pasid_table_teardown(struct device *dev, u8 bus, u8 devfn) 737 { 738 struct device_domain_info *info = dev_iommu_priv_get(dev); 739 struct intel_iommu *iommu = info->iommu; 740 struct context_entry *context; 741 u16 did; 742 743 spin_lock(&iommu->lock); 744 context = iommu_context_addr(iommu, bus, devfn, false); 745 if (!context) { 746 spin_unlock(&iommu->lock); 747 return; 748 } 749 750 did = context_domain_id(context); 751 context_clear_entry(context); 752 __iommu_flush_cache(iommu, context, sizeof(*context)); 753 spin_unlock(&iommu->lock); 754 intel_context_flush_no_pasid(info, context, did); 755 } 756 757 static int pci_pasid_table_teardown(struct pci_dev *pdev, u16 alias, void *data) 758 { 759 struct device *dev = data; 760 761 if (dev == &pdev->dev) 762 device_pasid_table_teardown(dev, PCI_BUS_NUM(alias), alias & 0xff); 763 764 return 0; 765 } 766 767 void intel_pasid_teardown_sm_context(struct device *dev) 768 { 769 struct device_domain_info *info = dev_iommu_priv_get(dev); 770 771 if (!dev_is_pci(dev)) { 772 device_pasid_table_teardown(dev, info->bus, info->devfn); 773 return; 774 } 775 776 pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_teardown, dev); 777 } 778 779 /* 780 * Get the PASID directory size for scalable mode context entry. 781 * Value of X in the PDTS field of a scalable mode context entry 782 * indicates PASID directory with 2^(X + 7) entries. 783 */ 784 static unsigned long context_get_sm_pds(struct pasid_table *table) 785 { 786 unsigned long pds, max_pde; 787 788 max_pde = table->max_pasid >> PASID_PDE_SHIFT; 789 pds = find_first_bit(&max_pde, MAX_NR_PASID_BITS); 790 if (pds < 7) 791 return 0; 792 793 return pds - 7; 794 } 795 796 static int context_entry_set_pasid_table(struct context_entry *context, 797 struct device *dev) 798 { 799 struct device_domain_info *info = dev_iommu_priv_get(dev); 800 struct pasid_table *table = info->pasid_table; 801 struct intel_iommu *iommu = info->iommu; 802 unsigned long pds; 803 804 context_clear_entry(context); 805 806 pds = context_get_sm_pds(table); 807 context->lo = (u64)virt_to_phys(table->table) | context_pdts(pds); 808 context_set_sm_rid2pasid(context, IOMMU_NO_PASID); 809 810 if (info->ats_supported) 811 context_set_sm_dte(context); 812 if (info->pasid_supported) 813 context_set_pasid(context); 814 if (info->pri_supported) 815 context_set_sm_pre(context); 816 817 context_set_fault_enable(context); 818 context_set_present(context); 819 __iommu_flush_cache(iommu, context, sizeof(*context)); 820 821 return 0; 822 } 823 824 static int device_pasid_table_setup(struct device *dev, u8 bus, u8 devfn) 825 { 826 struct device_domain_info *info = dev_iommu_priv_get(dev); 827 struct intel_iommu *iommu = info->iommu; 828 struct context_entry *context; 829 830 spin_lock(&iommu->lock); 831 context = iommu_context_addr(iommu, bus, devfn, true); 832 if (!context) { 833 spin_unlock(&iommu->lock); 834 return -ENOMEM; 835 } 836 837 if (context_present(context) && !context_copied(iommu, bus, devfn)) { 838 spin_unlock(&iommu->lock); 839 return 0; 840 } 841 842 if (context_copied(iommu, bus, devfn)) { 843 context_clear_present(context); 844 __iommu_flush_cache(iommu, context, sizeof(*context)); 845 846 /* 847 * For kdump cases, old valid entries may be cached due to 848 * the in-flight DMA and copied pgtable, but there is no 849 * unmapping behaviour for them, thus we need explicit cache 850 * flushes for all affected domain IDs and PASIDs used in 851 * the copied PASID table. Given that we have no idea about 852 * which domain IDs and PASIDs were used in the copied tables, 853 * upgrade them to global PASID and IOTLB cache invalidation. 854 */ 855 iommu->flush.flush_context(iommu, 0, 856 PCI_DEVID(bus, devfn), 857 DMA_CCMD_MASK_NOBIT, 858 DMA_CCMD_DEVICE_INVL); 859 qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0); 860 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); 861 devtlb_invalidation_with_pasid(iommu, dev, IOMMU_NO_PASID); 862 863 context_clear_entry(context); 864 __iommu_flush_cache(iommu, context, sizeof(*context)); 865 866 /* 867 * At this point, the device is supposed to finish reset at 868 * its driver probe stage, so no in-flight DMA will exist, 869 * and we don't need to worry anymore hereafter. 870 */ 871 clear_context_copied(iommu, bus, devfn); 872 } 873 874 context_entry_set_pasid_table(context, dev); 875 spin_unlock(&iommu->lock); 876 877 /* 878 * It's a non-present to present mapping. If hardware doesn't cache 879 * non-present entry we don't need to flush the caches. If it does 880 * cache non-present entries, then it does so in the special 881 * domain #0, which we have to flush: 882 */ 883 if (cap_caching_mode(iommu->cap)) { 884 iommu->flush.flush_context(iommu, 0, 885 PCI_DEVID(bus, devfn), 886 DMA_CCMD_MASK_NOBIT, 887 DMA_CCMD_DEVICE_INVL); 888 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH); 889 } 890 891 return 0; 892 } 893 894 static int pci_pasid_table_setup(struct pci_dev *pdev, u16 alias, void *data) 895 { 896 struct device *dev = data; 897 898 if (dev != &pdev->dev) 899 return 0; 900 901 return device_pasid_table_setup(dev, PCI_BUS_NUM(alias), alias & 0xff); 902 } 903 904 /* 905 * Set the device's PASID table to its context table entry. 906 * 907 * The PASID table is set to the context entries of both device itself 908 * and its alias requester ID for DMA. 909 */ 910 int intel_pasid_setup_sm_context(struct device *dev) 911 { 912 struct device_domain_info *info = dev_iommu_priv_get(dev); 913 914 if (!dev_is_pci(dev)) 915 return device_pasid_table_setup(dev, info->bus, info->devfn); 916 917 return pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_setup, dev); 918 } 919 920 /* 921 * Global Device-TLB invalidation following changes in a context entry which 922 * was present. 923 */ 924 static void __context_flush_dev_iotlb(struct device_domain_info *info) 925 { 926 if (!info->ats_enabled) 927 return; 928 929 /* 930 * Skip dev-IOTLB flush for inaccessible PCIe devices to prevent the 931 * Intel IOMMU from waiting indefinitely for an ATS invalidation that 932 * cannot complete. 933 */ 934 if (!pci_device_is_present(to_pci_dev(info->dev))) 935 return; 936 937 qi_flush_dev_iotlb(info->iommu, PCI_DEVID(info->bus, info->devfn), 938 info->pfsid, info->ats_qdep, 0, MAX_AGAW_PFN_WIDTH); 939 940 /* 941 * There is no guarantee that the device DMA is stopped when it reaches 942 * here. Therefore, always attempt the extra device TLB invalidation 943 * quirk. The impact on performance is acceptable since this is not a 944 * performance-critical path. 945 */ 946 quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH, IOMMU_NO_PASID, 947 info->ats_qdep); 948 } 949 950 /* 951 * Cache invalidations after change in a context table entry that was present 952 * according to the Spec 6.5.3.3 (Guidance to Software for Invalidations). 953 * This helper can only be used when IOMMU is working in the legacy mode or 954 * IOMMU is in scalable mode but all PASID table entries of the device are 955 * non-present. 956 */ 957 void intel_context_flush_no_pasid(struct device_domain_info *info, 958 struct context_entry *context, u16 did) 959 { 960 struct intel_iommu *iommu = info->iommu; 961 962 /* 963 * Device-selective context-cache invalidation. The Domain-ID field 964 * of the Context-cache Invalidate Descriptor is ignored by hardware 965 * when operating in scalable mode. Therefore the @did value doesn't 966 * matter in scalable mode. 967 */ 968 iommu->flush.flush_context(iommu, did, PCI_DEVID(info->bus, info->devfn), 969 DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); 970 971 /* 972 * For legacy mode: 973 * - Domain-selective IOTLB invalidation 974 * - Global Device-TLB invalidation to all affected functions 975 */ 976 if (!sm_supported(iommu)) { 977 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); 978 __context_flush_dev_iotlb(info); 979 980 return; 981 } 982 983 __context_flush_dev_iotlb(info); 984 } 985