1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * intel-pasid.c - PASID idr, table and entry manipulation 4 * 5 * Copyright (C) 2018 Intel Corporation 6 * 7 * Author: Lu Baolu <baolu.lu@linux.intel.com> 8 */ 9 10 #define pr_fmt(fmt) "DMAR: " fmt 11 12 #include <linux/bitops.h> 13 #include <linux/cpufeature.h> 14 #include <linux/dmar.h> 15 #include <linux/iommu.h> 16 #include <linux/memory.h> 17 #include <linux/pci.h> 18 #include <linux/pci-ats.h> 19 #include <linux/spinlock.h> 20 21 #include "iommu.h" 22 #include "pasid.h" 23 #include "../iommu-pages.h" 24 25 /* 26 * Intel IOMMU system wide PASID name space: 27 */ 28 u32 intel_pasid_max_id = PASID_MAX; 29 30 /* 31 * Per device pasid table management: 32 */ 33 34 /* 35 * Allocate a pasid table for @dev. It should be called in a 36 * single-thread context. 37 */ 38 int intel_pasid_alloc_table(struct device *dev) 39 { 40 struct device_domain_info *info; 41 struct pasid_table *pasid_table; 42 struct pasid_dir_entry *dir; 43 u32 max_pasid = 0; 44 int order, size; 45 46 might_sleep(); 47 info = dev_iommu_priv_get(dev); 48 if (WARN_ON(!info || !dev_is_pci(dev))) 49 return -ENODEV; 50 if (WARN_ON(info->pasid_table)) 51 return -EEXIST; 52 53 pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL); 54 if (!pasid_table) 55 return -ENOMEM; 56 57 if (info->pasid_supported) 58 max_pasid = min_t(u32, pci_max_pasids(to_pci_dev(dev)), 59 intel_pasid_max_id); 60 61 size = max_pasid >> (PASID_PDE_SHIFT - 3); 62 order = size ? get_order(size) : 0; 63 dir = iommu_alloc_pages_node(info->iommu->node, GFP_KERNEL, order); 64 if (!dir) { 65 kfree(pasid_table); 66 return -ENOMEM; 67 } 68 69 pasid_table->table = dir; 70 pasid_table->order = order; 71 pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3); 72 info->pasid_table = pasid_table; 73 74 if (!ecap_coherent(info->iommu->ecap)) 75 clflush_cache_range(pasid_table->table, (1 << order) * PAGE_SIZE); 76 77 return 0; 78 } 79 80 void intel_pasid_free_table(struct device *dev) 81 { 82 struct device_domain_info *info; 83 struct pasid_table *pasid_table; 84 struct pasid_dir_entry *dir; 85 struct pasid_entry *table; 86 int i, max_pde; 87 88 info = dev_iommu_priv_get(dev); 89 if (!info || !dev_is_pci(dev) || !info->pasid_table) 90 return; 91 92 pasid_table = info->pasid_table; 93 info->pasid_table = NULL; 94 95 /* Free scalable mode PASID directory tables: */ 96 dir = pasid_table->table; 97 max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT; 98 for (i = 0; i < max_pde; i++) { 99 table = get_pasid_table_from_pde(&dir[i]); 100 iommu_free_page(table); 101 } 102 103 iommu_free_pages(pasid_table->table, pasid_table->order); 104 kfree(pasid_table); 105 } 106 107 struct pasid_table *intel_pasid_get_table(struct device *dev) 108 { 109 struct device_domain_info *info; 110 111 info = dev_iommu_priv_get(dev); 112 if (!info) 113 return NULL; 114 115 return info->pasid_table; 116 } 117 118 static int intel_pasid_get_dev_max_id(struct device *dev) 119 { 120 struct device_domain_info *info; 121 122 info = dev_iommu_priv_get(dev); 123 if (!info || !info->pasid_table) 124 return 0; 125 126 return info->pasid_table->max_pasid; 127 } 128 129 static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) 130 { 131 struct device_domain_info *info; 132 struct pasid_table *pasid_table; 133 struct pasid_dir_entry *dir; 134 struct pasid_entry *entries; 135 int dir_index, index; 136 137 pasid_table = intel_pasid_get_table(dev); 138 if (WARN_ON(!pasid_table || pasid >= intel_pasid_get_dev_max_id(dev))) 139 return NULL; 140 141 dir = pasid_table->table; 142 info = dev_iommu_priv_get(dev); 143 dir_index = pasid >> PASID_PDE_SHIFT; 144 index = pasid & PASID_PTE_MASK; 145 146 retry: 147 entries = get_pasid_table_from_pde(&dir[dir_index]); 148 if (!entries) { 149 u64 tmp; 150 151 entries = iommu_alloc_page_node(info->iommu->node, GFP_ATOMIC); 152 if (!entries) 153 return NULL; 154 155 /* 156 * The pasid directory table entry won't be freed after 157 * allocation. No worry about the race with free and 158 * clear. However, this entry might be populated by others 159 * while we are preparing it. Use theirs with a retry. 160 */ 161 tmp = 0ULL; 162 if (!try_cmpxchg64(&dir[dir_index].val, &tmp, 163 (u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) { 164 iommu_free_page(entries); 165 goto retry; 166 } 167 if (!ecap_coherent(info->iommu->ecap)) { 168 clflush_cache_range(entries, VTD_PAGE_SIZE); 169 clflush_cache_range(&dir[dir_index].val, sizeof(*dir)); 170 } 171 } 172 173 return &entries[index]; 174 } 175 176 /* 177 * Interfaces for PASID table entry manipulation: 178 */ 179 static void 180 intel_pasid_clear_entry(struct device *dev, u32 pasid, bool fault_ignore) 181 { 182 struct pasid_entry *pe; 183 184 pe = intel_pasid_get_entry(dev, pasid); 185 if (WARN_ON(!pe)) 186 return; 187 188 if (fault_ignore && pasid_pte_is_present(pe)) 189 pasid_clear_entry_with_fpd(pe); 190 else 191 pasid_clear_entry(pe); 192 } 193 194 static void 195 pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu, 196 u16 did, u32 pasid) 197 { 198 struct qi_desc desc; 199 200 desc.qw0 = QI_PC_DID(did) | QI_PC_GRAN(QI_PC_PASID_SEL) | 201 QI_PC_PASID(pasid) | QI_PC_TYPE; 202 desc.qw1 = 0; 203 desc.qw2 = 0; 204 desc.qw3 = 0; 205 206 qi_submit_sync(iommu, &desc, 1, 0); 207 } 208 209 static void 210 devtlb_invalidation_with_pasid(struct intel_iommu *iommu, 211 struct device *dev, u32 pasid) 212 { 213 struct device_domain_info *info; 214 u16 sid, qdep, pfsid; 215 216 info = dev_iommu_priv_get(dev); 217 if (!info || !info->ats_enabled) 218 return; 219 220 if (pci_dev_is_disconnected(to_pci_dev(dev))) 221 return; 222 223 sid = PCI_DEVID(info->bus, info->devfn); 224 qdep = info->ats_qdep; 225 pfsid = info->pfsid; 226 227 /* 228 * When PASID 0 is used, it indicates RID2PASID(DMA request w/o PASID), 229 * devTLB flush w/o PASID should be used. For non-zero PASID under 230 * SVA usage, device could do DMA with multiple PASIDs. It is more 231 * efficient to flush devTLB specific to the PASID. 232 */ 233 if (pasid == IOMMU_NO_PASID) 234 qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT); 235 else 236 qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, pasid, qdep, 0, 64 - VTD_PAGE_SHIFT); 237 } 238 239 void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, 240 u32 pasid, bool fault_ignore) 241 { 242 struct pasid_entry *pte; 243 u16 did, pgtt; 244 245 spin_lock(&iommu->lock); 246 pte = intel_pasid_get_entry(dev, pasid); 247 if (WARN_ON(!pte) || !pasid_pte_is_present(pte)) { 248 spin_unlock(&iommu->lock); 249 return; 250 } 251 252 did = pasid_get_domain_id(pte); 253 pgtt = pasid_pte_get_pgtt(pte); 254 intel_pasid_clear_entry(dev, pasid, fault_ignore); 255 spin_unlock(&iommu->lock); 256 257 if (!ecap_coherent(iommu->ecap)) 258 clflush_cache_range(pte, sizeof(*pte)); 259 260 pasid_cache_invalidation_with_pasid(iommu, did, pasid); 261 262 if (pgtt == PASID_ENTRY_PGTT_PT || pgtt == PASID_ENTRY_PGTT_FL_ONLY) 263 qi_flush_piotlb(iommu, did, pasid, 0, -1, 0); 264 else 265 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); 266 267 devtlb_invalidation_with_pasid(iommu, dev, pasid); 268 intel_iommu_drain_pasid_prq(dev, pasid); 269 } 270 271 /* 272 * This function flushes cache for a newly setup pasid table entry. 273 * Caller of it should not modify the in-use pasid table entries. 274 */ 275 static void pasid_flush_caches(struct intel_iommu *iommu, 276 struct pasid_entry *pte, 277 u32 pasid, u16 did) 278 { 279 if (!ecap_coherent(iommu->ecap)) 280 clflush_cache_range(pte, sizeof(*pte)); 281 282 if (cap_caching_mode(iommu->cap)) { 283 pasid_cache_invalidation_with_pasid(iommu, did, pasid); 284 qi_flush_piotlb(iommu, did, pasid, 0, -1, 0); 285 } else { 286 iommu_flush_write_buffer(iommu); 287 } 288 } 289 290 /* 291 * This function is supposed to be used after caller updates the fields 292 * except for the SSADE and P bit of a pasid table entry. It does the 293 * below: 294 * - Flush cacheline if needed 295 * - Flush the caches per Table 28 ”Guidance to Software for Invalidations“ 296 * of VT-d spec 5.0. 297 */ 298 static void intel_pasid_flush_present(struct intel_iommu *iommu, 299 struct device *dev, 300 u32 pasid, u16 did, 301 struct pasid_entry *pte) 302 { 303 if (!ecap_coherent(iommu->ecap)) 304 clflush_cache_range(pte, sizeof(*pte)); 305 306 /* 307 * VT-d spec 5.0 table28 states guides for cache invalidation: 308 * 309 * - PASID-selective-within-Domain PASID-cache invalidation 310 * - PASID-selective PASID-based IOTLB invalidation 311 * - If (pasid is RID_PASID) 312 * - Global Device-TLB invalidation to affected functions 313 * Else 314 * - PASID-based Device-TLB invalidation (with S=1 and 315 * Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions 316 */ 317 pasid_cache_invalidation_with_pasid(iommu, did, pasid); 318 qi_flush_piotlb(iommu, did, pasid, 0, -1, 0); 319 320 devtlb_invalidation_with_pasid(iommu, dev, pasid); 321 } 322 323 /* 324 * Set up the scalable mode pasid table entry for first only 325 * translation type. 326 */ 327 static void pasid_pte_config_first_level(struct intel_iommu *iommu, 328 struct pasid_entry *pte, 329 pgd_t *pgd, u16 did, int flags) 330 { 331 lockdep_assert_held(&iommu->lock); 332 333 pasid_clear_entry(pte); 334 335 /* Setup the first level page table pointer: */ 336 pasid_set_flptr(pte, (u64)__pa(pgd)); 337 338 if (flags & PASID_FLAG_FL5LP) 339 pasid_set_flpm(pte, 1); 340 341 if (flags & PASID_FLAG_PAGE_SNOOP) 342 pasid_set_pgsnp(pte); 343 344 pasid_set_domain_id(pte, did); 345 pasid_set_address_width(pte, iommu->agaw); 346 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 347 348 /* Setup Present and PASID Granular Transfer Type: */ 349 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY); 350 pasid_set_present(pte); 351 } 352 353 int intel_pasid_setup_first_level(struct intel_iommu *iommu, 354 struct device *dev, pgd_t *pgd, 355 u32 pasid, u16 did, int flags) 356 { 357 struct pasid_entry *pte; 358 359 if (!ecap_flts(iommu->ecap)) { 360 pr_err("No first level translation support on %s\n", 361 iommu->name); 362 return -EINVAL; 363 } 364 365 if ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)) { 366 pr_err("No 5-level paging support for first-level on %s\n", 367 iommu->name); 368 return -EINVAL; 369 } 370 371 spin_lock(&iommu->lock); 372 pte = intel_pasid_get_entry(dev, pasid); 373 if (!pte) { 374 spin_unlock(&iommu->lock); 375 return -ENODEV; 376 } 377 378 if (pasid_pte_is_present(pte)) { 379 spin_unlock(&iommu->lock); 380 return -EBUSY; 381 } 382 383 pasid_pte_config_first_level(iommu, pte, pgd, did, flags); 384 385 spin_unlock(&iommu->lock); 386 387 pasid_flush_caches(iommu, pte, pasid, did); 388 389 return 0; 390 } 391 392 int intel_pasid_replace_first_level(struct intel_iommu *iommu, 393 struct device *dev, pgd_t *pgd, 394 u32 pasid, u16 did, u16 old_did, 395 int flags) 396 { 397 struct pasid_entry *pte, new_pte; 398 399 if (!ecap_flts(iommu->ecap)) { 400 pr_err("No first level translation support on %s\n", 401 iommu->name); 402 return -EINVAL; 403 } 404 405 if ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)) { 406 pr_err("No 5-level paging support for first-level on %s\n", 407 iommu->name); 408 return -EINVAL; 409 } 410 411 pasid_pte_config_first_level(iommu, &new_pte, pgd, did, flags); 412 413 spin_lock(&iommu->lock); 414 pte = intel_pasid_get_entry(dev, pasid); 415 if (!pte) { 416 spin_unlock(&iommu->lock); 417 return -ENODEV; 418 } 419 420 if (!pasid_pte_is_present(pte)) { 421 spin_unlock(&iommu->lock); 422 return -EINVAL; 423 } 424 425 WARN_ON(old_did != pasid_get_domain_id(pte)); 426 427 *pte = new_pte; 428 spin_unlock(&iommu->lock); 429 430 intel_pasid_flush_present(iommu, dev, pasid, old_did, pte); 431 intel_iommu_drain_pasid_prq(dev, pasid); 432 433 return 0; 434 } 435 436 /* 437 * Set up the scalable mode pasid entry for second only translation type. 438 */ 439 static void pasid_pte_config_second_level(struct intel_iommu *iommu, 440 struct pasid_entry *pte, 441 u64 pgd_val, int agaw, u16 did, 442 bool dirty_tracking) 443 { 444 lockdep_assert_held(&iommu->lock); 445 446 pasid_clear_entry(pte); 447 pasid_set_domain_id(pte, did); 448 pasid_set_slptr(pte, pgd_val); 449 pasid_set_address_width(pte, agaw); 450 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY); 451 pasid_set_fault_enable(pte); 452 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 453 if (dirty_tracking) 454 pasid_set_ssade(pte); 455 456 pasid_set_present(pte); 457 } 458 459 int intel_pasid_setup_second_level(struct intel_iommu *iommu, 460 struct dmar_domain *domain, 461 struct device *dev, u32 pasid) 462 { 463 struct pasid_entry *pte; 464 struct dma_pte *pgd; 465 u64 pgd_val; 466 u16 did; 467 468 /* 469 * If hardware advertises no support for second level 470 * translation, return directly. 471 */ 472 if (!ecap_slts(iommu->ecap)) { 473 pr_err("No second level translation support on %s\n", 474 iommu->name); 475 return -EINVAL; 476 } 477 478 pgd = domain->pgd; 479 pgd_val = virt_to_phys(pgd); 480 did = domain_id_iommu(domain, iommu); 481 482 spin_lock(&iommu->lock); 483 pte = intel_pasid_get_entry(dev, pasid); 484 if (!pte) { 485 spin_unlock(&iommu->lock); 486 return -ENODEV; 487 } 488 489 if (pasid_pte_is_present(pte)) { 490 spin_unlock(&iommu->lock); 491 return -EBUSY; 492 } 493 494 pasid_pte_config_second_level(iommu, pte, pgd_val, domain->agaw, 495 did, domain->dirty_tracking); 496 spin_unlock(&iommu->lock); 497 498 pasid_flush_caches(iommu, pte, pasid, did); 499 500 return 0; 501 } 502 503 int intel_pasid_replace_second_level(struct intel_iommu *iommu, 504 struct dmar_domain *domain, 505 struct device *dev, u16 old_did, 506 u32 pasid) 507 { 508 struct pasid_entry *pte, new_pte; 509 struct dma_pte *pgd; 510 u64 pgd_val; 511 u16 did; 512 513 /* 514 * If hardware advertises no support for second level 515 * translation, return directly. 516 */ 517 if (!ecap_slts(iommu->ecap)) { 518 pr_err("No second level translation support on %s\n", 519 iommu->name); 520 return -EINVAL; 521 } 522 523 pgd = domain->pgd; 524 pgd_val = virt_to_phys(pgd); 525 did = domain_id_iommu(domain, iommu); 526 527 pasid_pte_config_second_level(iommu, &new_pte, pgd_val, 528 domain->agaw, did, 529 domain->dirty_tracking); 530 531 spin_lock(&iommu->lock); 532 pte = intel_pasid_get_entry(dev, pasid); 533 if (!pte) { 534 spin_unlock(&iommu->lock); 535 return -ENODEV; 536 } 537 538 if (!pasid_pte_is_present(pte)) { 539 spin_unlock(&iommu->lock); 540 return -EINVAL; 541 } 542 543 WARN_ON(old_did != pasid_get_domain_id(pte)); 544 545 *pte = new_pte; 546 spin_unlock(&iommu->lock); 547 548 intel_pasid_flush_present(iommu, dev, pasid, old_did, pte); 549 intel_iommu_drain_pasid_prq(dev, pasid); 550 551 return 0; 552 } 553 554 /* 555 * Set up dirty tracking on a second only or nested translation type. 556 */ 557 int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu, 558 struct device *dev, u32 pasid, 559 bool enabled) 560 { 561 struct pasid_entry *pte; 562 u16 did, pgtt; 563 564 spin_lock(&iommu->lock); 565 566 pte = intel_pasid_get_entry(dev, pasid); 567 if (!pte) { 568 spin_unlock(&iommu->lock); 569 dev_err_ratelimited( 570 dev, "Failed to get pasid entry of PASID %d\n", pasid); 571 return -ENODEV; 572 } 573 574 did = pasid_get_domain_id(pte); 575 pgtt = pasid_pte_get_pgtt(pte); 576 if (pgtt != PASID_ENTRY_PGTT_SL_ONLY && 577 pgtt != PASID_ENTRY_PGTT_NESTED) { 578 spin_unlock(&iommu->lock); 579 dev_err_ratelimited( 580 dev, 581 "Dirty tracking not supported on translation type %d\n", 582 pgtt); 583 return -EOPNOTSUPP; 584 } 585 586 if (pasid_get_ssade(pte) == enabled) { 587 spin_unlock(&iommu->lock); 588 return 0; 589 } 590 591 if (enabled) 592 pasid_set_ssade(pte); 593 else 594 pasid_clear_ssade(pte); 595 spin_unlock(&iommu->lock); 596 597 if (!ecap_coherent(iommu->ecap)) 598 clflush_cache_range(pte, sizeof(*pte)); 599 600 /* 601 * From VT-d spec table 25 "Guidance to Software for Invalidations": 602 * 603 * - PASID-selective-within-Domain PASID-cache invalidation 604 * If (PGTT=SS or Nested) 605 * - Domain-selective IOTLB invalidation 606 * Else 607 * - PASID-selective PASID-based IOTLB invalidation 608 * - If (pasid is RID_PASID) 609 * - Global Device-TLB invalidation to affected functions 610 * Else 611 * - PASID-based Device-TLB invalidation (with S=1 and 612 * Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions 613 */ 614 pasid_cache_invalidation_with_pasid(iommu, did, pasid); 615 616 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); 617 618 devtlb_invalidation_with_pasid(iommu, dev, pasid); 619 620 return 0; 621 } 622 623 /* 624 * Set up the scalable mode pasid entry for passthrough translation type. 625 */ 626 static void pasid_pte_config_pass_through(struct intel_iommu *iommu, 627 struct pasid_entry *pte, u16 did) 628 { 629 lockdep_assert_held(&iommu->lock); 630 631 pasid_clear_entry(pte); 632 pasid_set_domain_id(pte, did); 633 pasid_set_address_width(pte, iommu->agaw); 634 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_PT); 635 pasid_set_fault_enable(pte); 636 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 637 pasid_set_present(pte); 638 } 639 640 int intel_pasid_setup_pass_through(struct intel_iommu *iommu, 641 struct device *dev, u32 pasid) 642 { 643 u16 did = FLPT_DEFAULT_DID; 644 struct pasid_entry *pte; 645 646 spin_lock(&iommu->lock); 647 pte = intel_pasid_get_entry(dev, pasid); 648 if (!pte) { 649 spin_unlock(&iommu->lock); 650 return -ENODEV; 651 } 652 653 if (pasid_pte_is_present(pte)) { 654 spin_unlock(&iommu->lock); 655 return -EBUSY; 656 } 657 658 pasid_pte_config_pass_through(iommu, pte, did); 659 spin_unlock(&iommu->lock); 660 661 pasid_flush_caches(iommu, pte, pasid, did); 662 663 return 0; 664 } 665 666 int intel_pasid_replace_pass_through(struct intel_iommu *iommu, 667 struct device *dev, u16 old_did, 668 u32 pasid) 669 { 670 struct pasid_entry *pte, new_pte; 671 u16 did = FLPT_DEFAULT_DID; 672 673 pasid_pte_config_pass_through(iommu, &new_pte, did); 674 675 spin_lock(&iommu->lock); 676 pte = intel_pasid_get_entry(dev, pasid); 677 if (!pte) { 678 spin_unlock(&iommu->lock); 679 return -ENODEV; 680 } 681 682 if (!pasid_pte_is_present(pte)) { 683 spin_unlock(&iommu->lock); 684 return -EINVAL; 685 } 686 687 WARN_ON(old_did != pasid_get_domain_id(pte)); 688 689 *pte = new_pte; 690 spin_unlock(&iommu->lock); 691 692 intel_pasid_flush_present(iommu, dev, pasid, old_did, pte); 693 intel_iommu_drain_pasid_prq(dev, pasid); 694 695 return 0; 696 } 697 698 /* 699 * Set the page snoop control for a pasid entry which has been set up. 700 */ 701 void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu, 702 struct device *dev, u32 pasid) 703 { 704 struct pasid_entry *pte; 705 u16 did; 706 707 spin_lock(&iommu->lock); 708 pte = intel_pasid_get_entry(dev, pasid); 709 if (WARN_ON(!pte || !pasid_pte_is_present(pte))) { 710 spin_unlock(&iommu->lock); 711 return; 712 } 713 714 pasid_set_pgsnp(pte); 715 did = pasid_get_domain_id(pte); 716 spin_unlock(&iommu->lock); 717 718 intel_pasid_flush_present(iommu, dev, pasid, did, pte); 719 } 720 721 static void pasid_pte_config_nestd(struct intel_iommu *iommu, 722 struct pasid_entry *pte, 723 struct iommu_hwpt_vtd_s1 *s1_cfg, 724 struct dmar_domain *s2_domain, 725 u16 did) 726 { 727 struct dma_pte *pgd = s2_domain->pgd; 728 729 lockdep_assert_held(&iommu->lock); 730 731 pasid_clear_entry(pte); 732 733 if (s1_cfg->addr_width == ADDR_WIDTH_5LEVEL) 734 pasid_set_flpm(pte, 1); 735 736 pasid_set_flptr(pte, s1_cfg->pgtbl_addr); 737 738 if (s1_cfg->flags & IOMMU_VTD_S1_SRE) { 739 pasid_set_sre(pte); 740 if (s1_cfg->flags & IOMMU_VTD_S1_WPE) 741 pasid_set_wpe(pte); 742 } 743 744 if (s1_cfg->flags & IOMMU_VTD_S1_EAFE) 745 pasid_set_eafe(pte); 746 747 if (s2_domain->force_snooping) 748 pasid_set_pgsnp(pte); 749 750 pasid_set_slptr(pte, virt_to_phys(pgd)); 751 pasid_set_fault_enable(pte); 752 pasid_set_domain_id(pte, did); 753 pasid_set_address_width(pte, s2_domain->agaw); 754 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 755 if (s2_domain->dirty_tracking) 756 pasid_set_ssade(pte); 757 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED); 758 pasid_set_present(pte); 759 } 760 761 /** 762 * intel_pasid_setup_nested() - Set up PASID entry for nested translation. 763 * @iommu: IOMMU which the device belong to 764 * @dev: Device to be set up for translation 765 * @pasid: PASID to be programmed in the device PASID table 766 * @domain: User stage-1 domain nested on a stage-2 domain 767 * 768 * This is used for nested translation. The input domain should be 769 * nested type and nested on a parent with 'is_nested_parent' flag 770 * set. 771 */ 772 int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev, 773 u32 pasid, struct dmar_domain *domain) 774 { 775 struct iommu_hwpt_vtd_s1 *s1_cfg = &domain->s1_cfg; 776 struct dmar_domain *s2_domain = domain->s2_domain; 777 u16 did = domain_id_iommu(domain, iommu); 778 struct pasid_entry *pte; 779 780 /* Address width should match the address width supported by hardware */ 781 switch (s1_cfg->addr_width) { 782 case ADDR_WIDTH_4LEVEL: 783 break; 784 case ADDR_WIDTH_5LEVEL: 785 if (!cap_fl5lp_support(iommu->cap)) { 786 dev_err_ratelimited(dev, 787 "5-level paging not supported\n"); 788 return -EINVAL; 789 } 790 break; 791 default: 792 dev_err_ratelimited(dev, "Invalid stage-1 address width %d\n", 793 s1_cfg->addr_width); 794 return -EINVAL; 795 } 796 797 if ((s1_cfg->flags & IOMMU_VTD_S1_SRE) && !ecap_srs(iommu->ecap)) { 798 pr_err_ratelimited("No supervisor request support on %s\n", 799 iommu->name); 800 return -EINVAL; 801 } 802 803 if ((s1_cfg->flags & IOMMU_VTD_S1_EAFE) && !ecap_eafs(iommu->ecap)) { 804 pr_err_ratelimited("No extended access flag support on %s\n", 805 iommu->name); 806 return -EINVAL; 807 } 808 809 spin_lock(&iommu->lock); 810 pte = intel_pasid_get_entry(dev, pasid); 811 if (!pte) { 812 spin_unlock(&iommu->lock); 813 return -ENODEV; 814 } 815 if (pasid_pte_is_present(pte)) { 816 spin_unlock(&iommu->lock); 817 return -EBUSY; 818 } 819 820 pasid_pte_config_nestd(iommu, pte, s1_cfg, s2_domain, did); 821 spin_unlock(&iommu->lock); 822 823 pasid_flush_caches(iommu, pte, pasid, did); 824 825 return 0; 826 } 827 828 int intel_pasid_replace_nested(struct intel_iommu *iommu, 829 struct device *dev, u32 pasid, 830 u16 old_did, struct dmar_domain *domain) 831 { 832 struct iommu_hwpt_vtd_s1 *s1_cfg = &domain->s1_cfg; 833 struct dmar_domain *s2_domain = domain->s2_domain; 834 u16 did = domain_id_iommu(domain, iommu); 835 struct pasid_entry *pte, new_pte; 836 837 /* Address width should match the address width supported by hardware */ 838 switch (s1_cfg->addr_width) { 839 case ADDR_WIDTH_4LEVEL: 840 break; 841 case ADDR_WIDTH_5LEVEL: 842 if (!cap_fl5lp_support(iommu->cap)) { 843 dev_err_ratelimited(dev, 844 "5-level paging not supported\n"); 845 return -EINVAL; 846 } 847 break; 848 default: 849 dev_err_ratelimited(dev, "Invalid stage-1 address width %d\n", 850 s1_cfg->addr_width); 851 return -EINVAL; 852 } 853 854 if ((s1_cfg->flags & IOMMU_VTD_S1_SRE) && !ecap_srs(iommu->ecap)) { 855 pr_err_ratelimited("No supervisor request support on %s\n", 856 iommu->name); 857 return -EINVAL; 858 } 859 860 if ((s1_cfg->flags & IOMMU_VTD_S1_EAFE) && !ecap_eafs(iommu->ecap)) { 861 pr_err_ratelimited("No extended access flag support on %s\n", 862 iommu->name); 863 return -EINVAL; 864 } 865 866 pasid_pte_config_nestd(iommu, &new_pte, s1_cfg, s2_domain, did); 867 868 spin_lock(&iommu->lock); 869 pte = intel_pasid_get_entry(dev, pasid); 870 if (!pte) { 871 spin_unlock(&iommu->lock); 872 return -ENODEV; 873 } 874 875 if (!pasid_pte_is_present(pte)) { 876 spin_unlock(&iommu->lock); 877 return -EINVAL; 878 } 879 880 WARN_ON(old_did != pasid_get_domain_id(pte)); 881 882 *pte = new_pte; 883 spin_unlock(&iommu->lock); 884 885 intel_pasid_flush_present(iommu, dev, pasid, old_did, pte); 886 intel_iommu_drain_pasid_prq(dev, pasid); 887 888 return 0; 889 } 890 891 /* 892 * Interfaces to setup or teardown a pasid table to the scalable-mode 893 * context table entry: 894 */ 895 896 static void device_pasid_table_teardown(struct device *dev, u8 bus, u8 devfn) 897 { 898 struct device_domain_info *info = dev_iommu_priv_get(dev); 899 struct intel_iommu *iommu = info->iommu; 900 struct context_entry *context; 901 u16 did; 902 903 spin_lock(&iommu->lock); 904 context = iommu_context_addr(iommu, bus, devfn, false); 905 if (!context) { 906 spin_unlock(&iommu->lock); 907 return; 908 } 909 910 did = context_domain_id(context); 911 context_clear_entry(context); 912 __iommu_flush_cache(iommu, context, sizeof(*context)); 913 spin_unlock(&iommu->lock); 914 intel_context_flush_present(info, context, did, false); 915 } 916 917 static int pci_pasid_table_teardown(struct pci_dev *pdev, u16 alias, void *data) 918 { 919 struct device *dev = data; 920 921 if (dev == &pdev->dev) 922 device_pasid_table_teardown(dev, PCI_BUS_NUM(alias), alias & 0xff); 923 924 return 0; 925 } 926 927 void intel_pasid_teardown_sm_context(struct device *dev) 928 { 929 struct device_domain_info *info = dev_iommu_priv_get(dev); 930 931 if (!dev_is_pci(dev)) { 932 device_pasid_table_teardown(dev, info->bus, info->devfn); 933 return; 934 } 935 936 pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_teardown, dev); 937 } 938 939 /* 940 * Get the PASID directory size for scalable mode context entry. 941 * Value of X in the PDTS field of a scalable mode context entry 942 * indicates PASID directory with 2^(X + 7) entries. 943 */ 944 static unsigned long context_get_sm_pds(struct pasid_table *table) 945 { 946 unsigned long pds, max_pde; 947 948 max_pde = table->max_pasid >> PASID_PDE_SHIFT; 949 pds = find_first_bit(&max_pde, MAX_NR_PASID_BITS); 950 if (pds < 7) 951 return 0; 952 953 return pds - 7; 954 } 955 956 static int context_entry_set_pasid_table(struct context_entry *context, 957 struct device *dev) 958 { 959 struct device_domain_info *info = dev_iommu_priv_get(dev); 960 struct pasid_table *table = info->pasid_table; 961 struct intel_iommu *iommu = info->iommu; 962 unsigned long pds; 963 964 context_clear_entry(context); 965 966 pds = context_get_sm_pds(table); 967 context->lo = (u64)virt_to_phys(table->table) | context_pdts(pds); 968 context_set_sm_rid2pasid(context, IOMMU_NO_PASID); 969 970 if (info->ats_supported) 971 context_set_sm_dte(context); 972 if (info->pasid_supported) 973 context_set_pasid(context); 974 975 context_set_fault_enable(context); 976 context_set_present(context); 977 __iommu_flush_cache(iommu, context, sizeof(*context)); 978 979 return 0; 980 } 981 982 static int device_pasid_table_setup(struct device *dev, u8 bus, u8 devfn) 983 { 984 struct device_domain_info *info = dev_iommu_priv_get(dev); 985 struct intel_iommu *iommu = info->iommu; 986 struct context_entry *context; 987 988 spin_lock(&iommu->lock); 989 context = iommu_context_addr(iommu, bus, devfn, true); 990 if (!context) { 991 spin_unlock(&iommu->lock); 992 return -ENOMEM; 993 } 994 995 if (context_present(context) && !context_copied(iommu, bus, devfn)) { 996 spin_unlock(&iommu->lock); 997 return 0; 998 } 999 1000 if (context_copied(iommu, bus, devfn)) { 1001 context_clear_entry(context); 1002 __iommu_flush_cache(iommu, context, sizeof(*context)); 1003 1004 /* 1005 * For kdump cases, old valid entries may be cached due to 1006 * the in-flight DMA and copied pgtable, but there is no 1007 * unmapping behaviour for them, thus we need explicit cache 1008 * flushes for all affected domain IDs and PASIDs used in 1009 * the copied PASID table. Given that we have no idea about 1010 * which domain IDs and PASIDs were used in the copied tables, 1011 * upgrade them to global PASID and IOTLB cache invalidation. 1012 */ 1013 iommu->flush.flush_context(iommu, 0, 1014 PCI_DEVID(bus, devfn), 1015 DMA_CCMD_MASK_NOBIT, 1016 DMA_CCMD_DEVICE_INVL); 1017 qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0); 1018 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); 1019 devtlb_invalidation_with_pasid(iommu, dev, IOMMU_NO_PASID); 1020 1021 /* 1022 * At this point, the device is supposed to finish reset at 1023 * its driver probe stage, so no in-flight DMA will exist, 1024 * and we don't need to worry anymore hereafter. 1025 */ 1026 clear_context_copied(iommu, bus, devfn); 1027 } 1028 1029 context_entry_set_pasid_table(context, dev); 1030 spin_unlock(&iommu->lock); 1031 1032 /* 1033 * It's a non-present to present mapping. If hardware doesn't cache 1034 * non-present entry we don't need to flush the caches. If it does 1035 * cache non-present entries, then it does so in the special 1036 * domain #0, which we have to flush: 1037 */ 1038 if (cap_caching_mode(iommu->cap)) { 1039 iommu->flush.flush_context(iommu, 0, 1040 PCI_DEVID(bus, devfn), 1041 DMA_CCMD_MASK_NOBIT, 1042 DMA_CCMD_DEVICE_INVL); 1043 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH); 1044 } 1045 1046 return 0; 1047 } 1048 1049 static int pci_pasid_table_setup(struct pci_dev *pdev, u16 alias, void *data) 1050 { 1051 struct device *dev = data; 1052 1053 if (dev != &pdev->dev) 1054 return 0; 1055 1056 return device_pasid_table_setup(dev, PCI_BUS_NUM(alias), alias & 0xff); 1057 } 1058 1059 /* 1060 * Set the device's PASID table to its context table entry. 1061 * 1062 * The PASID table is set to the context entries of both device itself 1063 * and its alias requester ID for DMA. 1064 */ 1065 int intel_pasid_setup_sm_context(struct device *dev) 1066 { 1067 struct device_domain_info *info = dev_iommu_priv_get(dev); 1068 1069 if (!dev_is_pci(dev)) 1070 return device_pasid_table_setup(dev, info->bus, info->devfn); 1071 1072 return pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_setup, dev); 1073 } 1074 1075 /* 1076 * Global Device-TLB invalidation following changes in a context entry which 1077 * was present. 1078 */ 1079 static void __context_flush_dev_iotlb(struct device_domain_info *info) 1080 { 1081 if (!info->ats_enabled) 1082 return; 1083 1084 qi_flush_dev_iotlb(info->iommu, PCI_DEVID(info->bus, info->devfn), 1085 info->pfsid, info->ats_qdep, 0, MAX_AGAW_PFN_WIDTH); 1086 1087 /* 1088 * There is no guarantee that the device DMA is stopped when it reaches 1089 * here. Therefore, always attempt the extra device TLB invalidation 1090 * quirk. The impact on performance is acceptable since this is not a 1091 * performance-critical path. 1092 */ 1093 quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH, IOMMU_NO_PASID, 1094 info->ats_qdep); 1095 } 1096 1097 /* 1098 * Cache invalidations after change in a context table entry that was present 1099 * according to the Spec 6.5.3.3 (Guidance to Software for Invalidations). If 1100 * IOMMU is in scalable mode and all PASID table entries of the device were 1101 * non-present, set flush_domains to false. Otherwise, true. 1102 */ 1103 void intel_context_flush_present(struct device_domain_info *info, 1104 struct context_entry *context, 1105 u16 did, bool flush_domains) 1106 { 1107 struct intel_iommu *iommu = info->iommu; 1108 struct pasid_entry *pte; 1109 int i; 1110 1111 /* 1112 * Device-selective context-cache invalidation. The Domain-ID field 1113 * of the Context-cache Invalidate Descriptor is ignored by hardware 1114 * when operating in scalable mode. Therefore the @did value doesn't 1115 * matter in scalable mode. 1116 */ 1117 iommu->flush.flush_context(iommu, did, PCI_DEVID(info->bus, info->devfn), 1118 DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); 1119 1120 /* 1121 * For legacy mode: 1122 * - Domain-selective IOTLB invalidation 1123 * - Global Device-TLB invalidation to all affected functions 1124 */ 1125 if (!sm_supported(iommu)) { 1126 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); 1127 __context_flush_dev_iotlb(info); 1128 1129 return; 1130 } 1131 1132 /* 1133 * For scalable mode: 1134 * - Domain-selective PASID-cache invalidation to affected domains 1135 * - Domain-selective IOTLB invalidation to affected domains 1136 * - Global Device-TLB invalidation to affected functions 1137 */ 1138 if (flush_domains) { 1139 /* 1140 * If the IOMMU is running in scalable mode and there might 1141 * be potential PASID translations, the caller should hold 1142 * the lock to ensure that context changes and cache flushes 1143 * are atomic. 1144 */ 1145 assert_spin_locked(&iommu->lock); 1146 for (i = 0; i < info->pasid_table->max_pasid; i++) { 1147 pte = intel_pasid_get_entry(info->dev, i); 1148 if (!pte || !pasid_pte_is_present(pte)) 1149 continue; 1150 1151 did = pasid_get_domain_id(pte); 1152 qi_flush_pasid_cache(iommu, did, QI_PC_ALL_PASIDS, 0); 1153 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); 1154 } 1155 } 1156 1157 __context_flush_dev_iotlb(info); 1158 } 1159