1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * intel-pasid.c - PASID idr, table and entry manipulation 4 * 5 * Copyright (C) 2018 Intel Corporation 6 * 7 * Author: Lu Baolu <baolu.lu@linux.intel.com> 8 */ 9 10 #define pr_fmt(fmt) "DMAR: " fmt 11 12 #include <linux/bitops.h> 13 #include <linux/cpufeature.h> 14 #include <linux/dmar.h> 15 #include <linux/iommu.h> 16 #include <linux/memory.h> 17 #include <linux/pci.h> 18 #include <linux/pci-ats.h> 19 #include <linux/spinlock.h> 20 21 #include "iommu.h" 22 #include "pasid.h" 23 #include "../iommu-pages.h" 24 25 /* 26 * Intel IOMMU system wide PASID name space: 27 */ 28 u32 intel_pasid_max_id = PASID_MAX; 29 30 /* 31 * Per device pasid table management: 32 */ 33 34 /* 35 * Allocate a pasid table for @dev. It should be called in a 36 * single-thread context. 37 */ 38 int intel_pasid_alloc_table(struct device *dev) 39 { 40 struct device_domain_info *info; 41 struct pasid_table *pasid_table; 42 struct pasid_dir_entry *dir; 43 u32 max_pasid = 0; 44 int order, size; 45 46 might_sleep(); 47 info = dev_iommu_priv_get(dev); 48 if (WARN_ON(!info || !dev_is_pci(dev))) 49 return -ENODEV; 50 if (WARN_ON(info->pasid_table)) 51 return -EEXIST; 52 53 pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL); 54 if (!pasid_table) 55 return -ENOMEM; 56 57 if (info->pasid_supported) 58 max_pasid = min_t(u32, pci_max_pasids(to_pci_dev(dev)), 59 intel_pasid_max_id); 60 61 size = max_pasid >> (PASID_PDE_SHIFT - 3); 62 order = size ? get_order(size) : 0; 63 dir = iommu_alloc_pages_node(info->iommu->node, GFP_KERNEL, order); 64 if (!dir) { 65 kfree(pasid_table); 66 return -ENOMEM; 67 } 68 69 pasid_table->table = dir; 70 pasid_table->order = order; 71 pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3); 72 info->pasid_table = pasid_table; 73 74 if (!ecap_coherent(info->iommu->ecap)) 75 clflush_cache_range(pasid_table->table, (1 << order) * PAGE_SIZE); 76 77 return 0; 78 } 79 80 void intel_pasid_free_table(struct device *dev) 81 { 82 struct device_domain_info *info; 83 struct pasid_table *pasid_table; 84 struct pasid_dir_entry *dir; 85 struct pasid_entry *table; 86 int i, max_pde; 87 88 info = dev_iommu_priv_get(dev); 89 if (!info || !dev_is_pci(dev) || !info->pasid_table) 90 return; 91 92 pasid_table = info->pasid_table; 93 info->pasid_table = NULL; 94 95 /* Free scalable mode PASID directory tables: */ 96 dir = pasid_table->table; 97 max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT; 98 for (i = 0; i < max_pde; i++) { 99 table = get_pasid_table_from_pde(&dir[i]); 100 iommu_free_page(table); 101 } 102 103 iommu_free_pages(pasid_table->table, pasid_table->order); 104 kfree(pasid_table); 105 } 106 107 struct pasid_table *intel_pasid_get_table(struct device *dev) 108 { 109 struct device_domain_info *info; 110 111 info = dev_iommu_priv_get(dev); 112 if (!info) 113 return NULL; 114 115 return info->pasid_table; 116 } 117 118 static int intel_pasid_get_dev_max_id(struct device *dev) 119 { 120 struct device_domain_info *info; 121 122 info = dev_iommu_priv_get(dev); 123 if (!info || !info->pasid_table) 124 return 0; 125 126 return info->pasid_table->max_pasid; 127 } 128 129 static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) 130 { 131 struct device_domain_info *info; 132 struct pasid_table *pasid_table; 133 struct pasid_dir_entry *dir; 134 struct pasid_entry *entries; 135 int dir_index, index; 136 137 pasid_table = intel_pasid_get_table(dev); 138 if (WARN_ON(!pasid_table || pasid >= intel_pasid_get_dev_max_id(dev))) 139 return NULL; 140 141 dir = pasid_table->table; 142 info = dev_iommu_priv_get(dev); 143 dir_index = pasid >> PASID_PDE_SHIFT; 144 index = pasid & PASID_PTE_MASK; 145 146 retry: 147 entries = get_pasid_table_from_pde(&dir[dir_index]); 148 if (!entries) { 149 u64 tmp; 150 151 entries = iommu_alloc_page_node(info->iommu->node, GFP_ATOMIC); 152 if (!entries) 153 return NULL; 154 155 /* 156 * The pasid directory table entry won't be freed after 157 * allocation. No worry about the race with free and 158 * clear. However, this entry might be populated by others 159 * while we are preparing it. Use theirs with a retry. 160 */ 161 tmp = 0ULL; 162 if (!try_cmpxchg64(&dir[dir_index].val, &tmp, 163 (u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) { 164 iommu_free_page(entries); 165 goto retry; 166 } 167 if (!ecap_coherent(info->iommu->ecap)) { 168 clflush_cache_range(entries, VTD_PAGE_SIZE); 169 clflush_cache_range(&dir[dir_index].val, sizeof(*dir)); 170 } 171 } 172 173 return &entries[index]; 174 } 175 176 /* 177 * Interfaces for PASID table entry manipulation: 178 */ 179 static void 180 intel_pasid_clear_entry(struct device *dev, u32 pasid, bool fault_ignore) 181 { 182 struct pasid_entry *pe; 183 184 pe = intel_pasid_get_entry(dev, pasid); 185 if (WARN_ON(!pe)) 186 return; 187 188 if (fault_ignore && pasid_pte_is_present(pe)) 189 pasid_clear_entry_with_fpd(pe); 190 else 191 pasid_clear_entry(pe); 192 } 193 194 static void 195 pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu, 196 u16 did, u32 pasid) 197 { 198 struct qi_desc desc; 199 200 desc.qw0 = QI_PC_DID(did) | QI_PC_GRAN(QI_PC_PASID_SEL) | 201 QI_PC_PASID(pasid) | QI_PC_TYPE; 202 desc.qw1 = 0; 203 desc.qw2 = 0; 204 desc.qw3 = 0; 205 206 qi_submit_sync(iommu, &desc, 1, 0); 207 } 208 209 static void 210 devtlb_invalidation_with_pasid(struct intel_iommu *iommu, 211 struct device *dev, u32 pasid) 212 { 213 struct device_domain_info *info; 214 u16 sid, qdep, pfsid; 215 216 info = dev_iommu_priv_get(dev); 217 if (!info || !info->ats_enabled) 218 return; 219 220 if (pci_dev_is_disconnected(to_pci_dev(dev))) 221 return; 222 223 sid = PCI_DEVID(info->bus, info->devfn); 224 qdep = info->ats_qdep; 225 pfsid = info->pfsid; 226 227 /* 228 * When PASID 0 is used, it indicates RID2PASID(DMA request w/o PASID), 229 * devTLB flush w/o PASID should be used. For non-zero PASID under 230 * SVA usage, device could do DMA with multiple PASIDs. It is more 231 * efficient to flush devTLB specific to the PASID. 232 */ 233 if (pasid == IOMMU_NO_PASID) 234 qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT); 235 else 236 qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, pasid, qdep, 0, 64 - VTD_PAGE_SHIFT); 237 } 238 239 void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, 240 u32 pasid, bool fault_ignore) 241 { 242 struct pasid_entry *pte; 243 u16 did, pgtt; 244 245 spin_lock(&iommu->lock); 246 pte = intel_pasid_get_entry(dev, pasid); 247 if (WARN_ON(!pte)) { 248 spin_unlock(&iommu->lock); 249 return; 250 } 251 252 if (!pasid_pte_is_present(pte)) { 253 if (!pasid_pte_is_fault_disabled(pte)) { 254 WARN_ON(READ_ONCE(pte->val[0]) != 0); 255 spin_unlock(&iommu->lock); 256 return; 257 } 258 259 /* 260 * When a PASID is used for SVA by a device, it's possible 261 * that the pasid entry is non-present with the Fault 262 * Processing Disabled bit set. Clear the pasid entry and 263 * drain the PRQ for the PASID before return. 264 */ 265 pasid_clear_entry(pte); 266 spin_unlock(&iommu->lock); 267 intel_iommu_drain_pasid_prq(dev, pasid); 268 269 return; 270 } 271 272 did = pasid_get_domain_id(pte); 273 pgtt = pasid_pte_get_pgtt(pte); 274 intel_pasid_clear_entry(dev, pasid, fault_ignore); 275 spin_unlock(&iommu->lock); 276 277 if (!ecap_coherent(iommu->ecap)) 278 clflush_cache_range(pte, sizeof(*pte)); 279 280 pasid_cache_invalidation_with_pasid(iommu, did, pasid); 281 282 if (pgtt == PASID_ENTRY_PGTT_PT || pgtt == PASID_ENTRY_PGTT_FL_ONLY) 283 qi_flush_piotlb(iommu, did, pasid, 0, -1, 0); 284 else 285 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); 286 287 devtlb_invalidation_with_pasid(iommu, dev, pasid); 288 if (!fault_ignore) 289 intel_iommu_drain_pasid_prq(dev, pasid); 290 } 291 292 /* 293 * This function flushes cache for a newly setup pasid table entry. 294 * Caller of it should not modify the in-use pasid table entries. 295 */ 296 static void pasid_flush_caches(struct intel_iommu *iommu, 297 struct pasid_entry *pte, 298 u32 pasid, u16 did) 299 { 300 if (!ecap_coherent(iommu->ecap)) 301 clflush_cache_range(pte, sizeof(*pte)); 302 303 if (cap_caching_mode(iommu->cap)) { 304 pasid_cache_invalidation_with_pasid(iommu, did, pasid); 305 qi_flush_piotlb(iommu, did, pasid, 0, -1, 0); 306 } else { 307 iommu_flush_write_buffer(iommu); 308 } 309 } 310 311 /* 312 * This function is supposed to be used after caller updates the fields 313 * except for the SSADE and P bit of a pasid table entry. It does the 314 * below: 315 * - Flush cacheline if needed 316 * - Flush the caches per Table 28 ”Guidance to Software for Invalidations“ 317 * of VT-d spec 5.0. 318 */ 319 static void intel_pasid_flush_present(struct intel_iommu *iommu, 320 struct device *dev, 321 u32 pasid, u16 did, 322 struct pasid_entry *pte) 323 { 324 if (!ecap_coherent(iommu->ecap)) 325 clflush_cache_range(pte, sizeof(*pte)); 326 327 /* 328 * VT-d spec 5.0 table28 states guides for cache invalidation: 329 * 330 * - PASID-selective-within-Domain PASID-cache invalidation 331 * - PASID-selective PASID-based IOTLB invalidation 332 * - If (pasid is RID_PASID) 333 * - Global Device-TLB invalidation to affected functions 334 * Else 335 * - PASID-based Device-TLB invalidation (with S=1 and 336 * Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions 337 */ 338 pasid_cache_invalidation_with_pasid(iommu, did, pasid); 339 qi_flush_piotlb(iommu, did, pasid, 0, -1, 0); 340 341 devtlb_invalidation_with_pasid(iommu, dev, pasid); 342 } 343 344 /* 345 * Set up the scalable mode pasid table entry for first only 346 * translation type. 347 */ 348 static void pasid_pte_config_first_level(struct intel_iommu *iommu, 349 struct pasid_entry *pte, 350 pgd_t *pgd, u16 did, int flags) 351 { 352 lockdep_assert_held(&iommu->lock); 353 354 pasid_clear_entry(pte); 355 356 /* Setup the first level page table pointer: */ 357 pasid_set_flptr(pte, (u64)__pa(pgd)); 358 359 if (flags & PASID_FLAG_FL5LP) 360 pasid_set_flpm(pte, 1); 361 362 if (flags & PASID_FLAG_PAGE_SNOOP) 363 pasid_set_pgsnp(pte); 364 365 pasid_set_domain_id(pte, did); 366 pasid_set_address_width(pte, iommu->agaw); 367 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 368 369 /* Setup Present and PASID Granular Transfer Type: */ 370 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY); 371 pasid_set_present(pte); 372 } 373 374 int intel_pasid_setup_first_level(struct intel_iommu *iommu, 375 struct device *dev, pgd_t *pgd, 376 u32 pasid, u16 did, int flags) 377 { 378 struct pasid_entry *pte; 379 380 if (!ecap_flts(iommu->ecap)) { 381 pr_err("No first level translation support on %s\n", 382 iommu->name); 383 return -EINVAL; 384 } 385 386 if ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)) { 387 pr_err("No 5-level paging support for first-level on %s\n", 388 iommu->name); 389 return -EINVAL; 390 } 391 392 spin_lock(&iommu->lock); 393 pte = intel_pasid_get_entry(dev, pasid); 394 if (!pte) { 395 spin_unlock(&iommu->lock); 396 return -ENODEV; 397 } 398 399 if (pasid_pte_is_present(pte)) { 400 spin_unlock(&iommu->lock); 401 return -EBUSY; 402 } 403 404 pasid_pte_config_first_level(iommu, pte, pgd, did, flags); 405 406 spin_unlock(&iommu->lock); 407 408 pasid_flush_caches(iommu, pte, pasid, did); 409 410 return 0; 411 } 412 413 int intel_pasid_replace_first_level(struct intel_iommu *iommu, 414 struct device *dev, pgd_t *pgd, 415 u32 pasid, u16 did, u16 old_did, 416 int flags) 417 { 418 struct pasid_entry *pte, new_pte; 419 420 if (!ecap_flts(iommu->ecap)) { 421 pr_err("No first level translation support on %s\n", 422 iommu->name); 423 return -EINVAL; 424 } 425 426 if ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)) { 427 pr_err("No 5-level paging support for first-level on %s\n", 428 iommu->name); 429 return -EINVAL; 430 } 431 432 pasid_pte_config_first_level(iommu, &new_pte, pgd, did, flags); 433 434 spin_lock(&iommu->lock); 435 pte = intel_pasid_get_entry(dev, pasid); 436 if (!pte) { 437 spin_unlock(&iommu->lock); 438 return -ENODEV; 439 } 440 441 if (!pasid_pte_is_present(pte)) { 442 spin_unlock(&iommu->lock); 443 return -EINVAL; 444 } 445 446 WARN_ON(old_did != pasid_get_domain_id(pte)); 447 448 *pte = new_pte; 449 spin_unlock(&iommu->lock); 450 451 intel_pasid_flush_present(iommu, dev, pasid, old_did, pte); 452 intel_iommu_drain_pasid_prq(dev, pasid); 453 454 return 0; 455 } 456 457 /* 458 * Set up the scalable mode pasid entry for second only translation type. 459 */ 460 static void pasid_pte_config_second_level(struct intel_iommu *iommu, 461 struct pasid_entry *pte, 462 u64 pgd_val, int agaw, u16 did, 463 bool dirty_tracking) 464 { 465 lockdep_assert_held(&iommu->lock); 466 467 pasid_clear_entry(pte); 468 pasid_set_domain_id(pte, did); 469 pasid_set_slptr(pte, pgd_val); 470 pasid_set_address_width(pte, agaw); 471 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY); 472 pasid_set_fault_enable(pte); 473 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 474 if (dirty_tracking) 475 pasid_set_ssade(pte); 476 477 pasid_set_present(pte); 478 } 479 480 int intel_pasid_setup_second_level(struct intel_iommu *iommu, 481 struct dmar_domain *domain, 482 struct device *dev, u32 pasid) 483 { 484 struct pasid_entry *pte; 485 struct dma_pte *pgd; 486 u64 pgd_val; 487 u16 did; 488 489 /* 490 * If hardware advertises no support for second level 491 * translation, return directly. 492 */ 493 if (!ecap_slts(iommu->ecap)) { 494 pr_err("No second level translation support on %s\n", 495 iommu->name); 496 return -EINVAL; 497 } 498 499 pgd = domain->pgd; 500 pgd_val = virt_to_phys(pgd); 501 did = domain_id_iommu(domain, iommu); 502 503 spin_lock(&iommu->lock); 504 pte = intel_pasid_get_entry(dev, pasid); 505 if (!pte) { 506 spin_unlock(&iommu->lock); 507 return -ENODEV; 508 } 509 510 if (pasid_pte_is_present(pte)) { 511 spin_unlock(&iommu->lock); 512 return -EBUSY; 513 } 514 515 pasid_pte_config_second_level(iommu, pte, pgd_val, domain->agaw, 516 did, domain->dirty_tracking); 517 spin_unlock(&iommu->lock); 518 519 pasid_flush_caches(iommu, pte, pasid, did); 520 521 return 0; 522 } 523 524 int intel_pasid_replace_second_level(struct intel_iommu *iommu, 525 struct dmar_domain *domain, 526 struct device *dev, u16 old_did, 527 u32 pasid) 528 { 529 struct pasid_entry *pte, new_pte; 530 struct dma_pte *pgd; 531 u64 pgd_val; 532 u16 did; 533 534 /* 535 * If hardware advertises no support for second level 536 * translation, return directly. 537 */ 538 if (!ecap_slts(iommu->ecap)) { 539 pr_err("No second level translation support on %s\n", 540 iommu->name); 541 return -EINVAL; 542 } 543 544 pgd = domain->pgd; 545 pgd_val = virt_to_phys(pgd); 546 did = domain_id_iommu(domain, iommu); 547 548 pasid_pte_config_second_level(iommu, &new_pte, pgd_val, 549 domain->agaw, did, 550 domain->dirty_tracking); 551 552 spin_lock(&iommu->lock); 553 pte = intel_pasid_get_entry(dev, pasid); 554 if (!pte) { 555 spin_unlock(&iommu->lock); 556 return -ENODEV; 557 } 558 559 if (!pasid_pte_is_present(pte)) { 560 spin_unlock(&iommu->lock); 561 return -EINVAL; 562 } 563 564 WARN_ON(old_did != pasid_get_domain_id(pte)); 565 566 *pte = new_pte; 567 spin_unlock(&iommu->lock); 568 569 intel_pasid_flush_present(iommu, dev, pasid, old_did, pte); 570 intel_iommu_drain_pasid_prq(dev, pasid); 571 572 return 0; 573 } 574 575 /* 576 * Set up dirty tracking on a second only or nested translation type. 577 */ 578 int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu, 579 struct device *dev, u32 pasid, 580 bool enabled) 581 { 582 struct pasid_entry *pte; 583 u16 did, pgtt; 584 585 spin_lock(&iommu->lock); 586 587 pte = intel_pasid_get_entry(dev, pasid); 588 if (!pte) { 589 spin_unlock(&iommu->lock); 590 dev_err_ratelimited( 591 dev, "Failed to get pasid entry of PASID %d\n", pasid); 592 return -ENODEV; 593 } 594 595 did = pasid_get_domain_id(pte); 596 pgtt = pasid_pte_get_pgtt(pte); 597 if (pgtt != PASID_ENTRY_PGTT_SL_ONLY && 598 pgtt != PASID_ENTRY_PGTT_NESTED) { 599 spin_unlock(&iommu->lock); 600 dev_err_ratelimited( 601 dev, 602 "Dirty tracking not supported on translation type %d\n", 603 pgtt); 604 return -EOPNOTSUPP; 605 } 606 607 if (pasid_get_ssade(pte) == enabled) { 608 spin_unlock(&iommu->lock); 609 return 0; 610 } 611 612 if (enabled) 613 pasid_set_ssade(pte); 614 else 615 pasid_clear_ssade(pte); 616 spin_unlock(&iommu->lock); 617 618 if (!ecap_coherent(iommu->ecap)) 619 clflush_cache_range(pte, sizeof(*pte)); 620 621 /* 622 * From VT-d spec table 25 "Guidance to Software for Invalidations": 623 * 624 * - PASID-selective-within-Domain PASID-cache invalidation 625 * If (PGTT=SS or Nested) 626 * - Domain-selective IOTLB invalidation 627 * Else 628 * - PASID-selective PASID-based IOTLB invalidation 629 * - If (pasid is RID_PASID) 630 * - Global Device-TLB invalidation to affected functions 631 * Else 632 * - PASID-based Device-TLB invalidation (with S=1 and 633 * Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions 634 */ 635 pasid_cache_invalidation_with_pasid(iommu, did, pasid); 636 637 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); 638 639 devtlb_invalidation_with_pasid(iommu, dev, pasid); 640 641 return 0; 642 } 643 644 /* 645 * Set up the scalable mode pasid entry for passthrough translation type. 646 */ 647 static void pasid_pte_config_pass_through(struct intel_iommu *iommu, 648 struct pasid_entry *pte, u16 did) 649 { 650 lockdep_assert_held(&iommu->lock); 651 652 pasid_clear_entry(pte); 653 pasid_set_domain_id(pte, did); 654 pasid_set_address_width(pte, iommu->agaw); 655 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_PT); 656 pasid_set_fault_enable(pte); 657 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 658 pasid_set_present(pte); 659 } 660 661 int intel_pasid_setup_pass_through(struct intel_iommu *iommu, 662 struct device *dev, u32 pasid) 663 { 664 u16 did = FLPT_DEFAULT_DID; 665 struct pasid_entry *pte; 666 667 spin_lock(&iommu->lock); 668 pte = intel_pasid_get_entry(dev, pasid); 669 if (!pte) { 670 spin_unlock(&iommu->lock); 671 return -ENODEV; 672 } 673 674 if (pasid_pte_is_present(pte)) { 675 spin_unlock(&iommu->lock); 676 return -EBUSY; 677 } 678 679 pasid_pte_config_pass_through(iommu, pte, did); 680 spin_unlock(&iommu->lock); 681 682 pasid_flush_caches(iommu, pte, pasid, did); 683 684 return 0; 685 } 686 687 int intel_pasid_replace_pass_through(struct intel_iommu *iommu, 688 struct device *dev, u16 old_did, 689 u32 pasid) 690 { 691 struct pasid_entry *pte, new_pte; 692 u16 did = FLPT_DEFAULT_DID; 693 694 pasid_pte_config_pass_through(iommu, &new_pte, did); 695 696 spin_lock(&iommu->lock); 697 pte = intel_pasid_get_entry(dev, pasid); 698 if (!pte) { 699 spin_unlock(&iommu->lock); 700 return -ENODEV; 701 } 702 703 if (!pasid_pte_is_present(pte)) { 704 spin_unlock(&iommu->lock); 705 return -EINVAL; 706 } 707 708 WARN_ON(old_did != pasid_get_domain_id(pte)); 709 710 *pte = new_pte; 711 spin_unlock(&iommu->lock); 712 713 intel_pasid_flush_present(iommu, dev, pasid, old_did, pte); 714 intel_iommu_drain_pasid_prq(dev, pasid); 715 716 return 0; 717 } 718 719 /* 720 * Set the page snoop control for a pasid entry which has been set up. 721 */ 722 void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu, 723 struct device *dev, u32 pasid) 724 { 725 struct pasid_entry *pte; 726 u16 did; 727 728 spin_lock(&iommu->lock); 729 pte = intel_pasid_get_entry(dev, pasid); 730 if (WARN_ON(!pte || !pasid_pte_is_present(pte))) { 731 spin_unlock(&iommu->lock); 732 return; 733 } 734 735 pasid_set_pgsnp(pte); 736 did = pasid_get_domain_id(pte); 737 spin_unlock(&iommu->lock); 738 739 intel_pasid_flush_present(iommu, dev, pasid, did, pte); 740 } 741 742 static void pasid_pte_config_nestd(struct intel_iommu *iommu, 743 struct pasid_entry *pte, 744 struct iommu_hwpt_vtd_s1 *s1_cfg, 745 struct dmar_domain *s2_domain, 746 u16 did) 747 { 748 struct dma_pte *pgd = s2_domain->pgd; 749 750 lockdep_assert_held(&iommu->lock); 751 752 pasid_clear_entry(pte); 753 754 if (s1_cfg->addr_width == ADDR_WIDTH_5LEVEL) 755 pasid_set_flpm(pte, 1); 756 757 pasid_set_flptr(pte, s1_cfg->pgtbl_addr); 758 759 if (s1_cfg->flags & IOMMU_VTD_S1_SRE) { 760 pasid_set_sre(pte); 761 if (s1_cfg->flags & IOMMU_VTD_S1_WPE) 762 pasid_set_wpe(pte); 763 } 764 765 if (s1_cfg->flags & IOMMU_VTD_S1_EAFE) 766 pasid_set_eafe(pte); 767 768 if (s2_domain->force_snooping) 769 pasid_set_pgsnp(pte); 770 771 pasid_set_slptr(pte, virt_to_phys(pgd)); 772 pasid_set_fault_enable(pte); 773 pasid_set_domain_id(pte, did); 774 pasid_set_address_width(pte, s2_domain->agaw); 775 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 776 if (s2_domain->dirty_tracking) 777 pasid_set_ssade(pte); 778 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED); 779 pasid_set_present(pte); 780 } 781 782 /** 783 * intel_pasid_setup_nested() - Set up PASID entry for nested translation. 784 * @iommu: IOMMU which the device belong to 785 * @dev: Device to be set up for translation 786 * @pasid: PASID to be programmed in the device PASID table 787 * @domain: User stage-1 domain nested on a stage-2 domain 788 * 789 * This is used for nested translation. The input domain should be 790 * nested type and nested on a parent with 'is_nested_parent' flag 791 * set. 792 */ 793 int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev, 794 u32 pasid, struct dmar_domain *domain) 795 { 796 struct iommu_hwpt_vtd_s1 *s1_cfg = &domain->s1_cfg; 797 struct dmar_domain *s2_domain = domain->s2_domain; 798 u16 did = domain_id_iommu(domain, iommu); 799 struct pasid_entry *pte; 800 801 /* Address width should match the address width supported by hardware */ 802 switch (s1_cfg->addr_width) { 803 case ADDR_WIDTH_4LEVEL: 804 break; 805 case ADDR_WIDTH_5LEVEL: 806 if (!cap_fl5lp_support(iommu->cap)) { 807 dev_err_ratelimited(dev, 808 "5-level paging not supported\n"); 809 return -EINVAL; 810 } 811 break; 812 default: 813 dev_err_ratelimited(dev, "Invalid stage-1 address width %d\n", 814 s1_cfg->addr_width); 815 return -EINVAL; 816 } 817 818 if ((s1_cfg->flags & IOMMU_VTD_S1_SRE) && !ecap_srs(iommu->ecap)) { 819 pr_err_ratelimited("No supervisor request support on %s\n", 820 iommu->name); 821 return -EINVAL; 822 } 823 824 if ((s1_cfg->flags & IOMMU_VTD_S1_EAFE) && !ecap_eafs(iommu->ecap)) { 825 pr_err_ratelimited("No extended access flag support on %s\n", 826 iommu->name); 827 return -EINVAL; 828 } 829 830 spin_lock(&iommu->lock); 831 pte = intel_pasid_get_entry(dev, pasid); 832 if (!pte) { 833 spin_unlock(&iommu->lock); 834 return -ENODEV; 835 } 836 if (pasid_pte_is_present(pte)) { 837 spin_unlock(&iommu->lock); 838 return -EBUSY; 839 } 840 841 pasid_pte_config_nestd(iommu, pte, s1_cfg, s2_domain, did); 842 spin_unlock(&iommu->lock); 843 844 pasid_flush_caches(iommu, pte, pasid, did); 845 846 return 0; 847 } 848 849 int intel_pasid_replace_nested(struct intel_iommu *iommu, 850 struct device *dev, u32 pasid, 851 u16 old_did, struct dmar_domain *domain) 852 { 853 struct iommu_hwpt_vtd_s1 *s1_cfg = &domain->s1_cfg; 854 struct dmar_domain *s2_domain = domain->s2_domain; 855 u16 did = domain_id_iommu(domain, iommu); 856 struct pasid_entry *pte, new_pte; 857 858 /* Address width should match the address width supported by hardware */ 859 switch (s1_cfg->addr_width) { 860 case ADDR_WIDTH_4LEVEL: 861 break; 862 case ADDR_WIDTH_5LEVEL: 863 if (!cap_fl5lp_support(iommu->cap)) { 864 dev_err_ratelimited(dev, 865 "5-level paging not supported\n"); 866 return -EINVAL; 867 } 868 break; 869 default: 870 dev_err_ratelimited(dev, "Invalid stage-1 address width %d\n", 871 s1_cfg->addr_width); 872 return -EINVAL; 873 } 874 875 if ((s1_cfg->flags & IOMMU_VTD_S1_SRE) && !ecap_srs(iommu->ecap)) { 876 pr_err_ratelimited("No supervisor request support on %s\n", 877 iommu->name); 878 return -EINVAL; 879 } 880 881 if ((s1_cfg->flags & IOMMU_VTD_S1_EAFE) && !ecap_eafs(iommu->ecap)) { 882 pr_err_ratelimited("No extended access flag support on %s\n", 883 iommu->name); 884 return -EINVAL; 885 } 886 887 pasid_pte_config_nestd(iommu, &new_pte, s1_cfg, s2_domain, did); 888 889 spin_lock(&iommu->lock); 890 pte = intel_pasid_get_entry(dev, pasid); 891 if (!pte) { 892 spin_unlock(&iommu->lock); 893 return -ENODEV; 894 } 895 896 if (!pasid_pte_is_present(pte)) { 897 spin_unlock(&iommu->lock); 898 return -EINVAL; 899 } 900 901 WARN_ON(old_did != pasid_get_domain_id(pte)); 902 903 *pte = new_pte; 904 spin_unlock(&iommu->lock); 905 906 intel_pasid_flush_present(iommu, dev, pasid, old_did, pte); 907 intel_iommu_drain_pasid_prq(dev, pasid); 908 909 return 0; 910 } 911 912 /* 913 * Interfaces to setup or teardown a pasid table to the scalable-mode 914 * context table entry: 915 */ 916 917 static void device_pasid_table_teardown(struct device *dev, u8 bus, u8 devfn) 918 { 919 struct device_domain_info *info = dev_iommu_priv_get(dev); 920 struct intel_iommu *iommu = info->iommu; 921 struct context_entry *context; 922 u16 did; 923 924 spin_lock(&iommu->lock); 925 context = iommu_context_addr(iommu, bus, devfn, false); 926 if (!context) { 927 spin_unlock(&iommu->lock); 928 return; 929 } 930 931 did = context_domain_id(context); 932 context_clear_entry(context); 933 __iommu_flush_cache(iommu, context, sizeof(*context)); 934 spin_unlock(&iommu->lock); 935 intel_context_flush_present(info, context, did, false); 936 } 937 938 static int pci_pasid_table_teardown(struct pci_dev *pdev, u16 alias, void *data) 939 { 940 struct device *dev = data; 941 942 if (dev == &pdev->dev) 943 device_pasid_table_teardown(dev, PCI_BUS_NUM(alias), alias & 0xff); 944 945 return 0; 946 } 947 948 void intel_pasid_teardown_sm_context(struct device *dev) 949 { 950 struct device_domain_info *info = dev_iommu_priv_get(dev); 951 952 if (!dev_is_pci(dev)) { 953 device_pasid_table_teardown(dev, info->bus, info->devfn); 954 return; 955 } 956 957 pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_teardown, dev); 958 } 959 960 /* 961 * Get the PASID directory size for scalable mode context entry. 962 * Value of X in the PDTS field of a scalable mode context entry 963 * indicates PASID directory with 2^(X + 7) entries. 964 */ 965 static unsigned long context_get_sm_pds(struct pasid_table *table) 966 { 967 unsigned long pds, max_pde; 968 969 max_pde = table->max_pasid >> PASID_PDE_SHIFT; 970 pds = find_first_bit(&max_pde, MAX_NR_PASID_BITS); 971 if (pds < 7) 972 return 0; 973 974 return pds - 7; 975 } 976 977 static int context_entry_set_pasid_table(struct context_entry *context, 978 struct device *dev) 979 { 980 struct device_domain_info *info = dev_iommu_priv_get(dev); 981 struct pasid_table *table = info->pasid_table; 982 struct intel_iommu *iommu = info->iommu; 983 unsigned long pds; 984 985 context_clear_entry(context); 986 987 pds = context_get_sm_pds(table); 988 context->lo = (u64)virt_to_phys(table->table) | context_pdts(pds); 989 context_set_sm_rid2pasid(context, IOMMU_NO_PASID); 990 991 if (info->ats_supported) 992 context_set_sm_dte(context); 993 if (info->pasid_supported) 994 context_set_pasid(context); 995 996 context_set_fault_enable(context); 997 context_set_present(context); 998 __iommu_flush_cache(iommu, context, sizeof(*context)); 999 1000 return 0; 1001 } 1002 1003 static int device_pasid_table_setup(struct device *dev, u8 bus, u8 devfn) 1004 { 1005 struct device_domain_info *info = dev_iommu_priv_get(dev); 1006 struct intel_iommu *iommu = info->iommu; 1007 struct context_entry *context; 1008 1009 spin_lock(&iommu->lock); 1010 context = iommu_context_addr(iommu, bus, devfn, true); 1011 if (!context) { 1012 spin_unlock(&iommu->lock); 1013 return -ENOMEM; 1014 } 1015 1016 if (context_present(context) && !context_copied(iommu, bus, devfn)) { 1017 spin_unlock(&iommu->lock); 1018 return 0; 1019 } 1020 1021 if (context_copied(iommu, bus, devfn)) { 1022 context_clear_entry(context); 1023 __iommu_flush_cache(iommu, context, sizeof(*context)); 1024 1025 /* 1026 * For kdump cases, old valid entries may be cached due to 1027 * the in-flight DMA and copied pgtable, but there is no 1028 * unmapping behaviour for them, thus we need explicit cache 1029 * flushes for all affected domain IDs and PASIDs used in 1030 * the copied PASID table. Given that we have no idea about 1031 * which domain IDs and PASIDs were used in the copied tables, 1032 * upgrade them to global PASID and IOTLB cache invalidation. 1033 */ 1034 iommu->flush.flush_context(iommu, 0, 1035 PCI_DEVID(bus, devfn), 1036 DMA_CCMD_MASK_NOBIT, 1037 DMA_CCMD_DEVICE_INVL); 1038 qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0); 1039 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); 1040 devtlb_invalidation_with_pasid(iommu, dev, IOMMU_NO_PASID); 1041 1042 /* 1043 * At this point, the device is supposed to finish reset at 1044 * its driver probe stage, so no in-flight DMA will exist, 1045 * and we don't need to worry anymore hereafter. 1046 */ 1047 clear_context_copied(iommu, bus, devfn); 1048 } 1049 1050 context_entry_set_pasid_table(context, dev); 1051 spin_unlock(&iommu->lock); 1052 1053 /* 1054 * It's a non-present to present mapping. If hardware doesn't cache 1055 * non-present entry we don't need to flush the caches. If it does 1056 * cache non-present entries, then it does so in the special 1057 * domain #0, which we have to flush: 1058 */ 1059 if (cap_caching_mode(iommu->cap)) { 1060 iommu->flush.flush_context(iommu, 0, 1061 PCI_DEVID(bus, devfn), 1062 DMA_CCMD_MASK_NOBIT, 1063 DMA_CCMD_DEVICE_INVL); 1064 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH); 1065 } 1066 1067 return 0; 1068 } 1069 1070 static int pci_pasid_table_setup(struct pci_dev *pdev, u16 alias, void *data) 1071 { 1072 struct device *dev = data; 1073 1074 if (dev != &pdev->dev) 1075 return 0; 1076 1077 return device_pasid_table_setup(dev, PCI_BUS_NUM(alias), alias & 0xff); 1078 } 1079 1080 /* 1081 * Set the device's PASID table to its context table entry. 1082 * 1083 * The PASID table is set to the context entries of both device itself 1084 * and its alias requester ID for DMA. 1085 */ 1086 int intel_pasid_setup_sm_context(struct device *dev) 1087 { 1088 struct device_domain_info *info = dev_iommu_priv_get(dev); 1089 1090 if (!dev_is_pci(dev)) 1091 return device_pasid_table_setup(dev, info->bus, info->devfn); 1092 1093 return pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_setup, dev); 1094 } 1095 1096 /* 1097 * Global Device-TLB invalidation following changes in a context entry which 1098 * was present. 1099 */ 1100 static void __context_flush_dev_iotlb(struct device_domain_info *info) 1101 { 1102 if (!info->ats_enabled) 1103 return; 1104 1105 qi_flush_dev_iotlb(info->iommu, PCI_DEVID(info->bus, info->devfn), 1106 info->pfsid, info->ats_qdep, 0, MAX_AGAW_PFN_WIDTH); 1107 1108 /* 1109 * There is no guarantee that the device DMA is stopped when it reaches 1110 * here. Therefore, always attempt the extra device TLB invalidation 1111 * quirk. The impact on performance is acceptable since this is not a 1112 * performance-critical path. 1113 */ 1114 quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH, IOMMU_NO_PASID, 1115 info->ats_qdep); 1116 } 1117 1118 /* 1119 * Cache invalidations after change in a context table entry that was present 1120 * according to the Spec 6.5.3.3 (Guidance to Software for Invalidations). If 1121 * IOMMU is in scalable mode and all PASID table entries of the device were 1122 * non-present, set flush_domains to false. Otherwise, true. 1123 */ 1124 void intel_context_flush_present(struct device_domain_info *info, 1125 struct context_entry *context, 1126 u16 did, bool flush_domains) 1127 { 1128 struct intel_iommu *iommu = info->iommu; 1129 struct pasid_entry *pte; 1130 int i; 1131 1132 /* 1133 * Device-selective context-cache invalidation. The Domain-ID field 1134 * of the Context-cache Invalidate Descriptor is ignored by hardware 1135 * when operating in scalable mode. Therefore the @did value doesn't 1136 * matter in scalable mode. 1137 */ 1138 iommu->flush.flush_context(iommu, did, PCI_DEVID(info->bus, info->devfn), 1139 DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); 1140 1141 /* 1142 * For legacy mode: 1143 * - Domain-selective IOTLB invalidation 1144 * - Global Device-TLB invalidation to all affected functions 1145 */ 1146 if (!sm_supported(iommu)) { 1147 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); 1148 __context_flush_dev_iotlb(info); 1149 1150 return; 1151 } 1152 1153 /* 1154 * For scalable mode: 1155 * - Domain-selective PASID-cache invalidation to affected domains 1156 * - Domain-selective IOTLB invalidation to affected domains 1157 * - Global Device-TLB invalidation to affected functions 1158 */ 1159 if (flush_domains) { 1160 /* 1161 * If the IOMMU is running in scalable mode and there might 1162 * be potential PASID translations, the caller should hold 1163 * the lock to ensure that context changes and cache flushes 1164 * are atomic. 1165 */ 1166 assert_spin_locked(&iommu->lock); 1167 for (i = 0; i < info->pasid_table->max_pasid; i++) { 1168 pte = intel_pasid_get_entry(info->dev, i); 1169 if (!pte || !pasid_pte_is_present(pte)) 1170 continue; 1171 1172 did = pasid_get_domain_id(pte); 1173 qi_flush_pasid_cache(iommu, did, QI_PC_ALL_PASIDS, 0); 1174 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); 1175 } 1176 } 1177 1178 __context_flush_dev_iotlb(info); 1179 } 1180