1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * intel-pasid.c - PASID idr, table and entry manipulation 4 * 5 * Copyright (C) 2018 Intel Corporation 6 * 7 * Author: Lu Baolu <baolu.lu@linux.intel.com> 8 */ 9 10 #define pr_fmt(fmt) "DMAR: " fmt 11 12 #include <linux/bitops.h> 13 #include <linux/cpufeature.h> 14 #include <linux/dmar.h> 15 #include <linux/iommu.h> 16 #include <linux/memory.h> 17 #include <linux/pci.h> 18 #include <linux/pci-ats.h> 19 #include <linux/spinlock.h> 20 21 #include "iommu.h" 22 #include "pasid.h" 23 24 /* 25 * Intel IOMMU system wide PASID name space: 26 */ 27 u32 intel_pasid_max_id = PASID_MAX; 28 29 /* 30 * Per device pasid table management: 31 */ 32 33 /* 34 * Allocate a pasid table for @dev. It should be called in a 35 * single-thread context. 36 */ 37 int intel_pasid_alloc_table(struct device *dev) 38 { 39 struct device_domain_info *info; 40 struct pasid_table *pasid_table; 41 struct page *pages; 42 u32 max_pasid = 0; 43 int order, size; 44 45 might_sleep(); 46 info = dev_iommu_priv_get(dev); 47 if (WARN_ON(!info || !dev_is_pci(dev))) 48 return -ENODEV; 49 if (WARN_ON(info->pasid_table)) 50 return -EEXIST; 51 52 pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL); 53 if (!pasid_table) 54 return -ENOMEM; 55 56 if (info->pasid_supported) 57 max_pasid = min_t(u32, pci_max_pasids(to_pci_dev(dev)), 58 intel_pasid_max_id); 59 60 size = max_pasid >> (PASID_PDE_SHIFT - 3); 61 order = size ? get_order(size) : 0; 62 pages = alloc_pages_node(info->iommu->node, 63 GFP_KERNEL | __GFP_ZERO, order); 64 if (!pages) { 65 kfree(pasid_table); 66 return -ENOMEM; 67 } 68 69 pasid_table->table = page_address(pages); 70 pasid_table->order = order; 71 pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3); 72 info->pasid_table = pasid_table; 73 74 if (!ecap_coherent(info->iommu->ecap)) 75 clflush_cache_range(pasid_table->table, (1 << order) * PAGE_SIZE); 76 77 return 0; 78 } 79 80 void intel_pasid_free_table(struct device *dev) 81 { 82 struct device_domain_info *info; 83 struct pasid_table *pasid_table; 84 struct pasid_dir_entry *dir; 85 struct pasid_entry *table; 86 int i, max_pde; 87 88 info = dev_iommu_priv_get(dev); 89 if (!info || !dev_is_pci(dev) || !info->pasid_table) 90 return; 91 92 pasid_table = info->pasid_table; 93 info->pasid_table = NULL; 94 95 /* Free scalable mode PASID directory tables: */ 96 dir = pasid_table->table; 97 max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT; 98 for (i = 0; i < max_pde; i++) { 99 table = get_pasid_table_from_pde(&dir[i]); 100 free_pgtable_page(table); 101 } 102 103 free_pages((unsigned long)pasid_table->table, pasid_table->order); 104 kfree(pasid_table); 105 } 106 107 struct pasid_table *intel_pasid_get_table(struct device *dev) 108 { 109 struct device_domain_info *info; 110 111 info = dev_iommu_priv_get(dev); 112 if (!info) 113 return NULL; 114 115 return info->pasid_table; 116 } 117 118 static int intel_pasid_get_dev_max_id(struct device *dev) 119 { 120 struct device_domain_info *info; 121 122 info = dev_iommu_priv_get(dev); 123 if (!info || !info->pasid_table) 124 return 0; 125 126 return info->pasid_table->max_pasid; 127 } 128 129 static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) 130 { 131 struct device_domain_info *info; 132 struct pasid_table *pasid_table; 133 struct pasid_dir_entry *dir; 134 struct pasid_entry *entries; 135 int dir_index, index; 136 137 pasid_table = intel_pasid_get_table(dev); 138 if (WARN_ON(!pasid_table || pasid >= intel_pasid_get_dev_max_id(dev))) 139 return NULL; 140 141 dir = pasid_table->table; 142 info = dev_iommu_priv_get(dev); 143 dir_index = pasid >> PASID_PDE_SHIFT; 144 index = pasid & PASID_PTE_MASK; 145 146 retry: 147 entries = get_pasid_table_from_pde(&dir[dir_index]); 148 if (!entries) { 149 entries = alloc_pgtable_page(info->iommu->node, GFP_ATOMIC); 150 if (!entries) 151 return NULL; 152 153 /* 154 * The pasid directory table entry won't be freed after 155 * allocation. No worry about the race with free and 156 * clear. However, this entry might be populated by others 157 * while we are preparing it. Use theirs with a retry. 158 */ 159 if (cmpxchg64(&dir[dir_index].val, 0ULL, 160 (u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) { 161 free_pgtable_page(entries); 162 goto retry; 163 } 164 if (!ecap_coherent(info->iommu->ecap)) { 165 clflush_cache_range(entries, VTD_PAGE_SIZE); 166 clflush_cache_range(&dir[dir_index].val, sizeof(*dir)); 167 } 168 } 169 170 return &entries[index]; 171 } 172 173 /* 174 * Interfaces for PASID table entry manipulation: 175 */ 176 static void 177 intel_pasid_clear_entry(struct device *dev, u32 pasid, bool fault_ignore) 178 { 179 struct pasid_entry *pe; 180 181 pe = intel_pasid_get_entry(dev, pasid); 182 if (WARN_ON(!pe)) 183 return; 184 185 if (fault_ignore && pasid_pte_is_present(pe)) 186 pasid_clear_entry_with_fpd(pe); 187 else 188 pasid_clear_entry(pe); 189 } 190 191 static void 192 pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu, 193 u16 did, u32 pasid) 194 { 195 struct qi_desc desc; 196 197 desc.qw0 = QI_PC_DID(did) | QI_PC_GRAN(QI_PC_PASID_SEL) | 198 QI_PC_PASID(pasid) | QI_PC_TYPE; 199 desc.qw1 = 0; 200 desc.qw2 = 0; 201 desc.qw3 = 0; 202 203 qi_submit_sync(iommu, &desc, 1, 0); 204 } 205 206 static void 207 devtlb_invalidation_with_pasid(struct intel_iommu *iommu, 208 struct device *dev, u32 pasid) 209 { 210 struct device_domain_info *info; 211 u16 sid, qdep, pfsid; 212 213 info = dev_iommu_priv_get(dev); 214 if (!info || !info->ats_enabled) 215 return; 216 217 if (pci_dev_is_disconnected(to_pci_dev(dev))) 218 return; 219 220 sid = info->bus << 8 | info->devfn; 221 qdep = info->ats_qdep; 222 pfsid = info->pfsid; 223 224 /* 225 * When PASID 0 is used, it indicates RID2PASID(DMA request w/o PASID), 226 * devTLB flush w/o PASID should be used. For non-zero PASID under 227 * SVA usage, device could do DMA with multiple PASIDs. It is more 228 * efficient to flush devTLB specific to the PASID. 229 */ 230 if (pasid == IOMMU_NO_PASID) 231 qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT); 232 else 233 qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, pasid, qdep, 0, 64 - VTD_PAGE_SHIFT); 234 } 235 236 void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, 237 u32 pasid, bool fault_ignore) 238 { 239 struct pasid_entry *pte; 240 u16 did, pgtt; 241 242 spin_lock(&iommu->lock); 243 pte = intel_pasid_get_entry(dev, pasid); 244 if (WARN_ON(!pte) || !pasid_pte_is_present(pte)) { 245 spin_unlock(&iommu->lock); 246 return; 247 } 248 249 did = pasid_get_domain_id(pte); 250 pgtt = pasid_pte_get_pgtt(pte); 251 intel_pasid_clear_entry(dev, pasid, fault_ignore); 252 spin_unlock(&iommu->lock); 253 254 if (!ecap_coherent(iommu->ecap)) 255 clflush_cache_range(pte, sizeof(*pte)); 256 257 pasid_cache_invalidation_with_pasid(iommu, did, pasid); 258 259 if (pgtt == PASID_ENTRY_PGTT_PT || pgtt == PASID_ENTRY_PGTT_FL_ONLY) 260 qi_flush_piotlb(iommu, did, pasid, 0, -1, 0); 261 else 262 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); 263 264 /* Device IOTLB doesn't need to be flushed in caching mode. */ 265 if (!cap_caching_mode(iommu->cap)) 266 devtlb_invalidation_with_pasid(iommu, dev, pasid); 267 } 268 269 /* 270 * This function flushes cache for a newly setup pasid table entry. 271 * Caller of it should not modify the in-use pasid table entries. 272 */ 273 static void pasid_flush_caches(struct intel_iommu *iommu, 274 struct pasid_entry *pte, 275 u32 pasid, u16 did) 276 { 277 if (!ecap_coherent(iommu->ecap)) 278 clflush_cache_range(pte, sizeof(*pte)); 279 280 if (cap_caching_mode(iommu->cap)) { 281 pasid_cache_invalidation_with_pasid(iommu, did, pasid); 282 qi_flush_piotlb(iommu, did, pasid, 0, -1, 0); 283 } else { 284 iommu_flush_write_buffer(iommu); 285 } 286 } 287 288 /* 289 * Set up the scalable mode pasid table entry for first only 290 * translation type. 291 */ 292 int intel_pasid_setup_first_level(struct intel_iommu *iommu, 293 struct device *dev, pgd_t *pgd, 294 u32 pasid, u16 did, int flags) 295 { 296 struct pasid_entry *pte; 297 298 if (!ecap_flts(iommu->ecap)) { 299 pr_err("No first level translation support on %s\n", 300 iommu->name); 301 return -EINVAL; 302 } 303 304 if ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)) { 305 pr_err("No 5-level paging support for first-level on %s\n", 306 iommu->name); 307 return -EINVAL; 308 } 309 310 spin_lock(&iommu->lock); 311 pte = intel_pasid_get_entry(dev, pasid); 312 if (!pte) { 313 spin_unlock(&iommu->lock); 314 return -ENODEV; 315 } 316 317 if (pasid_pte_is_present(pte)) { 318 spin_unlock(&iommu->lock); 319 return -EBUSY; 320 } 321 322 pasid_clear_entry(pte); 323 324 /* Setup the first level page table pointer: */ 325 pasid_set_flptr(pte, (u64)__pa(pgd)); 326 327 if (flags & PASID_FLAG_FL5LP) 328 pasid_set_flpm(pte, 1); 329 330 if (flags & PASID_FLAG_PAGE_SNOOP) 331 pasid_set_pgsnp(pte); 332 333 pasid_set_domain_id(pte, did); 334 pasid_set_address_width(pte, iommu->agaw); 335 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 336 pasid_set_nxe(pte); 337 338 /* Setup Present and PASID Granular Transfer Type: */ 339 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY); 340 pasid_set_present(pte); 341 spin_unlock(&iommu->lock); 342 343 pasid_flush_caches(iommu, pte, pasid, did); 344 345 return 0; 346 } 347 348 /* 349 * Skip top levels of page tables for iommu which has less agaw 350 * than default. Unnecessary for PT mode. 351 */ 352 static int iommu_skip_agaw(struct dmar_domain *domain, 353 struct intel_iommu *iommu, 354 struct dma_pte **pgd) 355 { 356 int agaw; 357 358 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { 359 *pgd = phys_to_virt(dma_pte_addr(*pgd)); 360 if (!dma_pte_present(*pgd)) 361 return -EINVAL; 362 } 363 364 return agaw; 365 } 366 367 /* 368 * Set up the scalable mode pasid entry for second only translation type. 369 */ 370 int intel_pasid_setup_second_level(struct intel_iommu *iommu, 371 struct dmar_domain *domain, 372 struct device *dev, u32 pasid) 373 { 374 struct pasid_entry *pte; 375 struct dma_pte *pgd; 376 u64 pgd_val; 377 int agaw; 378 u16 did; 379 380 /* 381 * If hardware advertises no support for second level 382 * translation, return directly. 383 */ 384 if (!ecap_slts(iommu->ecap)) { 385 pr_err("No second level translation support on %s\n", 386 iommu->name); 387 return -EINVAL; 388 } 389 390 pgd = domain->pgd; 391 agaw = iommu_skip_agaw(domain, iommu, &pgd); 392 if (agaw < 0) { 393 dev_err(dev, "Invalid domain page table\n"); 394 return -EINVAL; 395 } 396 397 pgd_val = virt_to_phys(pgd); 398 did = domain_id_iommu(domain, iommu); 399 400 spin_lock(&iommu->lock); 401 pte = intel_pasid_get_entry(dev, pasid); 402 if (!pte) { 403 spin_unlock(&iommu->lock); 404 return -ENODEV; 405 } 406 407 if (pasid_pte_is_present(pte)) { 408 spin_unlock(&iommu->lock); 409 return -EBUSY; 410 } 411 412 pasid_clear_entry(pte); 413 pasid_set_domain_id(pte, did); 414 pasid_set_slptr(pte, pgd_val); 415 pasid_set_address_width(pte, agaw); 416 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY); 417 pasid_set_fault_enable(pte); 418 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 419 if (domain->dirty_tracking) 420 pasid_set_ssade(pte); 421 422 pasid_set_present(pte); 423 spin_unlock(&iommu->lock); 424 425 pasid_flush_caches(iommu, pte, pasid, did); 426 427 return 0; 428 } 429 430 /* 431 * Set up dirty tracking on a second only or nested translation type. 432 */ 433 int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu, 434 struct device *dev, u32 pasid, 435 bool enabled) 436 { 437 struct pasid_entry *pte; 438 u16 did, pgtt; 439 440 spin_lock(&iommu->lock); 441 442 pte = intel_pasid_get_entry(dev, pasid); 443 if (!pte) { 444 spin_unlock(&iommu->lock); 445 dev_err_ratelimited( 446 dev, "Failed to get pasid entry of PASID %d\n", pasid); 447 return -ENODEV; 448 } 449 450 did = pasid_get_domain_id(pte); 451 pgtt = pasid_pte_get_pgtt(pte); 452 if (pgtt != PASID_ENTRY_PGTT_SL_ONLY && 453 pgtt != PASID_ENTRY_PGTT_NESTED) { 454 spin_unlock(&iommu->lock); 455 dev_err_ratelimited( 456 dev, 457 "Dirty tracking not supported on translation type %d\n", 458 pgtt); 459 return -EOPNOTSUPP; 460 } 461 462 if (pasid_get_ssade(pte) == enabled) { 463 spin_unlock(&iommu->lock); 464 return 0; 465 } 466 467 if (enabled) 468 pasid_set_ssade(pte); 469 else 470 pasid_clear_ssade(pte); 471 spin_unlock(&iommu->lock); 472 473 if (!ecap_coherent(iommu->ecap)) 474 clflush_cache_range(pte, sizeof(*pte)); 475 476 /* 477 * From VT-d spec table 25 "Guidance to Software for Invalidations": 478 * 479 * - PASID-selective-within-Domain PASID-cache invalidation 480 * If (PGTT=SS or Nested) 481 * - Domain-selective IOTLB invalidation 482 * Else 483 * - PASID-selective PASID-based IOTLB invalidation 484 * - If (pasid is RID_PASID) 485 * - Global Device-TLB invalidation to affected functions 486 * Else 487 * - PASID-based Device-TLB invalidation (with S=1 and 488 * Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions 489 */ 490 pasid_cache_invalidation_with_pasid(iommu, did, pasid); 491 492 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); 493 494 /* Device IOTLB doesn't need to be flushed in caching mode. */ 495 if (!cap_caching_mode(iommu->cap)) 496 devtlb_invalidation_with_pasid(iommu, dev, pasid); 497 498 return 0; 499 } 500 501 /* 502 * Set up the scalable mode pasid entry for passthrough translation type. 503 */ 504 int intel_pasid_setup_pass_through(struct intel_iommu *iommu, 505 struct device *dev, u32 pasid) 506 { 507 u16 did = FLPT_DEFAULT_DID; 508 struct pasid_entry *pte; 509 510 spin_lock(&iommu->lock); 511 pte = intel_pasid_get_entry(dev, pasid); 512 if (!pte) { 513 spin_unlock(&iommu->lock); 514 return -ENODEV; 515 } 516 517 if (pasid_pte_is_present(pte)) { 518 spin_unlock(&iommu->lock); 519 return -EBUSY; 520 } 521 522 pasid_clear_entry(pte); 523 pasid_set_domain_id(pte, did); 524 pasid_set_address_width(pte, iommu->agaw); 525 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_PT); 526 pasid_set_fault_enable(pte); 527 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 528 pasid_set_present(pte); 529 spin_unlock(&iommu->lock); 530 531 pasid_flush_caches(iommu, pte, pasid, did); 532 533 return 0; 534 } 535 536 /* 537 * Set the page snoop control for a pasid entry which has been set up. 538 */ 539 void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu, 540 struct device *dev, u32 pasid) 541 { 542 struct pasid_entry *pte; 543 u16 did; 544 545 spin_lock(&iommu->lock); 546 pte = intel_pasid_get_entry(dev, pasid); 547 if (WARN_ON(!pte || !pasid_pte_is_present(pte))) { 548 spin_unlock(&iommu->lock); 549 return; 550 } 551 552 pasid_set_pgsnp(pte); 553 did = pasid_get_domain_id(pte); 554 spin_unlock(&iommu->lock); 555 556 if (!ecap_coherent(iommu->ecap)) 557 clflush_cache_range(pte, sizeof(*pte)); 558 559 /* 560 * VT-d spec 3.4 table23 states guides for cache invalidation: 561 * 562 * - PASID-selective-within-Domain PASID-cache invalidation 563 * - PASID-selective PASID-based IOTLB invalidation 564 * - If (pasid is RID_PASID) 565 * - Global Device-TLB invalidation to affected functions 566 * Else 567 * - PASID-based Device-TLB invalidation (with S=1 and 568 * Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions 569 */ 570 pasid_cache_invalidation_with_pasid(iommu, did, pasid); 571 qi_flush_piotlb(iommu, did, pasid, 0, -1, 0); 572 573 /* Device IOTLB doesn't need to be flushed in caching mode. */ 574 if (!cap_caching_mode(iommu->cap)) 575 devtlb_invalidation_with_pasid(iommu, dev, pasid); 576 } 577 578 /** 579 * intel_pasid_setup_nested() - Set up PASID entry for nested translation. 580 * @iommu: IOMMU which the device belong to 581 * @dev: Device to be set up for translation 582 * @pasid: PASID to be programmed in the device PASID table 583 * @domain: User stage-1 domain nested on a stage-2 domain 584 * 585 * This is used for nested translation. The input domain should be 586 * nested type and nested on a parent with 'is_nested_parent' flag 587 * set. 588 */ 589 int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev, 590 u32 pasid, struct dmar_domain *domain) 591 { 592 struct iommu_hwpt_vtd_s1 *s1_cfg = &domain->s1_cfg; 593 pgd_t *s1_gpgd = (pgd_t *)(uintptr_t)domain->s1_pgtbl; 594 struct dmar_domain *s2_domain = domain->s2_domain; 595 u16 did = domain_id_iommu(domain, iommu); 596 struct dma_pte *pgd = s2_domain->pgd; 597 struct pasid_entry *pte; 598 599 /* Address width should match the address width supported by hardware */ 600 switch (s1_cfg->addr_width) { 601 case ADDR_WIDTH_4LEVEL: 602 break; 603 case ADDR_WIDTH_5LEVEL: 604 if (!cap_fl5lp_support(iommu->cap)) { 605 dev_err_ratelimited(dev, 606 "5-level paging not supported\n"); 607 return -EINVAL; 608 } 609 break; 610 default: 611 dev_err_ratelimited(dev, "Invalid stage-1 address width %d\n", 612 s1_cfg->addr_width); 613 return -EINVAL; 614 } 615 616 if ((s1_cfg->flags & IOMMU_VTD_S1_SRE) && !ecap_srs(iommu->ecap)) { 617 pr_err_ratelimited("No supervisor request support on %s\n", 618 iommu->name); 619 return -EINVAL; 620 } 621 622 if ((s1_cfg->flags & IOMMU_VTD_S1_EAFE) && !ecap_eafs(iommu->ecap)) { 623 pr_err_ratelimited("No extended access flag support on %s\n", 624 iommu->name); 625 return -EINVAL; 626 } 627 628 spin_lock(&iommu->lock); 629 pte = intel_pasid_get_entry(dev, pasid); 630 if (!pte) { 631 spin_unlock(&iommu->lock); 632 return -ENODEV; 633 } 634 if (pasid_pte_is_present(pte)) { 635 spin_unlock(&iommu->lock); 636 return -EBUSY; 637 } 638 639 pasid_clear_entry(pte); 640 641 if (s1_cfg->addr_width == ADDR_WIDTH_5LEVEL) 642 pasid_set_flpm(pte, 1); 643 644 pasid_set_flptr(pte, (uintptr_t)s1_gpgd); 645 646 if (s1_cfg->flags & IOMMU_VTD_S1_SRE) { 647 pasid_set_sre(pte); 648 if (s1_cfg->flags & IOMMU_VTD_S1_WPE) 649 pasid_set_wpe(pte); 650 } 651 652 if (s1_cfg->flags & IOMMU_VTD_S1_EAFE) 653 pasid_set_eafe(pte); 654 655 if (s2_domain->force_snooping) 656 pasid_set_pgsnp(pte); 657 658 pasid_set_slptr(pte, virt_to_phys(pgd)); 659 pasid_set_fault_enable(pte); 660 pasid_set_domain_id(pte, did); 661 pasid_set_address_width(pte, s2_domain->agaw); 662 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 663 if (s2_domain->dirty_tracking) 664 pasid_set_ssade(pte); 665 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED); 666 pasid_set_present(pte); 667 spin_unlock(&iommu->lock); 668 669 pasid_flush_caches(iommu, pte, pasid, did); 670 671 return 0; 672 } 673 674 /* 675 * Interfaces to setup or teardown a pasid table to the scalable-mode 676 * context table entry: 677 */ 678 679 static void device_pasid_table_teardown(struct device *dev, u8 bus, u8 devfn) 680 { 681 struct device_domain_info *info = dev_iommu_priv_get(dev); 682 struct intel_iommu *iommu = info->iommu; 683 struct context_entry *context; 684 685 spin_lock(&iommu->lock); 686 context = iommu_context_addr(iommu, bus, devfn, false); 687 if (!context) { 688 spin_unlock(&iommu->lock); 689 return; 690 } 691 692 context_clear_entry(context); 693 __iommu_flush_cache(iommu, context, sizeof(*context)); 694 spin_unlock(&iommu->lock); 695 696 /* 697 * Cache invalidation for changes to a scalable-mode context table 698 * entry. 699 * 700 * Section 6.5.3.3 of the VT-d spec: 701 * - Device-selective context-cache invalidation; 702 * - Domain-selective PASID-cache invalidation to affected domains 703 * (can be skipped if all PASID entries were not-present); 704 * - Domain-selective IOTLB invalidation to affected domains; 705 * - Global Device-TLB invalidation to affected functions. 706 * 707 * The iommu has been parked in the blocking state. All domains have 708 * been detached from the device or PASID. The PASID and IOTLB caches 709 * have been invalidated during the domain detach path. 710 */ 711 iommu->flush.flush_context(iommu, 0, PCI_DEVID(bus, devfn), 712 DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); 713 devtlb_invalidation_with_pasid(iommu, dev, IOMMU_NO_PASID); 714 } 715 716 static int pci_pasid_table_teardown(struct pci_dev *pdev, u16 alias, void *data) 717 { 718 struct device *dev = data; 719 720 if (dev == &pdev->dev) 721 device_pasid_table_teardown(dev, PCI_BUS_NUM(alias), alias & 0xff); 722 723 return 0; 724 } 725 726 void intel_pasid_teardown_sm_context(struct device *dev) 727 { 728 struct device_domain_info *info = dev_iommu_priv_get(dev); 729 730 if (!dev_is_pci(dev)) { 731 device_pasid_table_teardown(dev, info->bus, info->devfn); 732 return; 733 } 734 735 pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_teardown, dev); 736 } 737 738 /* 739 * Get the PASID directory size for scalable mode context entry. 740 * Value of X in the PDTS field of a scalable mode context entry 741 * indicates PASID directory with 2^(X + 7) entries. 742 */ 743 static unsigned long context_get_sm_pds(struct pasid_table *table) 744 { 745 unsigned long pds, max_pde; 746 747 max_pde = table->max_pasid >> PASID_PDE_SHIFT; 748 pds = find_first_bit(&max_pde, MAX_NR_PASID_BITS); 749 if (pds < 7) 750 return 0; 751 752 return pds - 7; 753 } 754 755 static int context_entry_set_pasid_table(struct context_entry *context, 756 struct device *dev) 757 { 758 struct device_domain_info *info = dev_iommu_priv_get(dev); 759 struct pasid_table *table = info->pasid_table; 760 struct intel_iommu *iommu = info->iommu; 761 unsigned long pds; 762 763 context_clear_entry(context); 764 765 pds = context_get_sm_pds(table); 766 context->lo = (u64)virt_to_phys(table->table) | context_pdts(pds); 767 context_set_sm_rid2pasid(context, IOMMU_NO_PASID); 768 769 if (info->ats_supported) 770 context_set_sm_dte(context); 771 if (info->pri_supported) 772 context_set_sm_pre(context); 773 if (info->pasid_supported) 774 context_set_pasid(context); 775 776 context_set_fault_enable(context); 777 context_set_present(context); 778 __iommu_flush_cache(iommu, context, sizeof(*context)); 779 780 return 0; 781 } 782 783 static int device_pasid_table_setup(struct device *dev, u8 bus, u8 devfn) 784 { 785 struct device_domain_info *info = dev_iommu_priv_get(dev); 786 struct intel_iommu *iommu = info->iommu; 787 struct context_entry *context; 788 789 spin_lock(&iommu->lock); 790 context = iommu_context_addr(iommu, bus, devfn, true); 791 if (!context) { 792 spin_unlock(&iommu->lock); 793 return -ENOMEM; 794 } 795 796 if (context_present(context) && !context_copied(iommu, bus, devfn)) { 797 spin_unlock(&iommu->lock); 798 return 0; 799 } 800 801 if (context_copied(iommu, bus, devfn)) { 802 context_clear_entry(context); 803 __iommu_flush_cache(iommu, context, sizeof(*context)); 804 805 /* 806 * For kdump cases, old valid entries may be cached due to 807 * the in-flight DMA and copied pgtable, but there is no 808 * unmapping behaviour for them, thus we need explicit cache 809 * flushes for all affected domain IDs and PASIDs used in 810 * the copied PASID table. Given that we have no idea about 811 * which domain IDs and PASIDs were used in the copied tables, 812 * upgrade them to global PASID and IOTLB cache invalidation. 813 */ 814 iommu->flush.flush_context(iommu, 0, 815 PCI_DEVID(bus, devfn), 816 DMA_CCMD_MASK_NOBIT, 817 DMA_CCMD_DEVICE_INVL); 818 qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0); 819 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); 820 devtlb_invalidation_with_pasid(iommu, dev, IOMMU_NO_PASID); 821 822 /* 823 * At this point, the device is supposed to finish reset at 824 * its driver probe stage, so no in-flight DMA will exist, 825 * and we don't need to worry anymore hereafter. 826 */ 827 clear_context_copied(iommu, bus, devfn); 828 } 829 830 context_entry_set_pasid_table(context, dev); 831 spin_unlock(&iommu->lock); 832 833 /* 834 * It's a non-present to present mapping. If hardware doesn't cache 835 * non-present entry we don't need to flush the caches. If it does 836 * cache non-present entries, then it does so in the special 837 * domain #0, which we have to flush: 838 */ 839 if (cap_caching_mode(iommu->cap)) { 840 iommu->flush.flush_context(iommu, 0, 841 PCI_DEVID(bus, devfn), 842 DMA_CCMD_MASK_NOBIT, 843 DMA_CCMD_DEVICE_INVL); 844 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH); 845 } 846 847 return 0; 848 } 849 850 static int pci_pasid_table_setup(struct pci_dev *pdev, u16 alias, void *data) 851 { 852 struct device *dev = data; 853 854 if (dev != &pdev->dev) 855 return 0; 856 857 return device_pasid_table_setup(dev, PCI_BUS_NUM(alias), alias & 0xff); 858 } 859 860 /* 861 * Set the device's PASID table to its context table entry. 862 * 863 * The PASID table is set to the context entries of both device itself 864 * and its alias requester ID for DMA. 865 */ 866 int intel_pasid_setup_sm_context(struct device *dev) 867 { 868 struct device_domain_info *info = dev_iommu_priv_get(dev); 869 870 if (!dev_is_pci(dev)) 871 return device_pasid_table_setup(dev, info->bus, info->devfn); 872 873 return pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_setup, dev); 874 } 875