1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * intel-pasid.c - PASID idr, table and entry manipulation 4 * 5 * Copyright (C) 2018 Intel Corporation 6 * 7 * Author: Lu Baolu <baolu.lu@linux.intel.com> 8 */ 9 10 #define pr_fmt(fmt) "DMAR: " fmt 11 12 #include <linux/bitops.h> 13 #include <linux/cpufeature.h> 14 #include <linux/dmar.h> 15 #include <linux/iommu.h> 16 #include <linux/memory.h> 17 #include <linux/pci.h> 18 #include <linux/pci-ats.h> 19 #include <linux/spinlock.h> 20 21 #include "iommu.h" 22 #include "pasid.h" 23 #include "../iommu-pages.h" 24 25 /* 26 * Intel IOMMU system wide PASID name space: 27 */ 28 u32 intel_pasid_max_id = PASID_MAX; 29 30 /* 31 * Per device pasid table management: 32 */ 33 34 /* 35 * Allocate a pasid table for @dev. It should be called in a 36 * single-thread context. 37 */ 38 int intel_pasid_alloc_table(struct device *dev) 39 { 40 struct device_domain_info *info; 41 struct pasid_table *pasid_table; 42 struct pasid_dir_entry *dir; 43 u32 max_pasid = 0; 44 int order, size; 45 46 might_sleep(); 47 info = dev_iommu_priv_get(dev); 48 if (WARN_ON(!info || !dev_is_pci(dev))) 49 return -ENODEV; 50 if (WARN_ON(info->pasid_table)) 51 return -EEXIST; 52 53 pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL); 54 if (!pasid_table) 55 return -ENOMEM; 56 57 if (info->pasid_supported) 58 max_pasid = min_t(u32, pci_max_pasids(to_pci_dev(dev)), 59 intel_pasid_max_id); 60 61 size = max_pasid >> (PASID_PDE_SHIFT - 3); 62 order = size ? get_order(size) : 0; 63 dir = iommu_alloc_pages_node(info->iommu->node, GFP_KERNEL, order); 64 if (!dir) { 65 kfree(pasid_table); 66 return -ENOMEM; 67 } 68 69 pasid_table->table = dir; 70 pasid_table->order = order; 71 pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3); 72 info->pasid_table = pasid_table; 73 74 if (!ecap_coherent(info->iommu->ecap)) 75 clflush_cache_range(pasid_table->table, (1 << order) * PAGE_SIZE); 76 77 return 0; 78 } 79 80 void intel_pasid_free_table(struct device *dev) 81 { 82 struct device_domain_info *info; 83 struct pasid_table *pasid_table; 84 struct pasid_dir_entry *dir; 85 struct pasid_entry *table; 86 int i, max_pde; 87 88 info = dev_iommu_priv_get(dev); 89 if (!info || !dev_is_pci(dev) || !info->pasid_table) 90 return; 91 92 pasid_table = info->pasid_table; 93 info->pasid_table = NULL; 94 95 /* Free scalable mode PASID directory tables: */ 96 dir = pasid_table->table; 97 max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT; 98 for (i = 0; i < max_pde; i++) { 99 table = get_pasid_table_from_pde(&dir[i]); 100 iommu_free_page(table); 101 } 102 103 iommu_free_pages(pasid_table->table, pasid_table->order); 104 kfree(pasid_table); 105 } 106 107 struct pasid_table *intel_pasid_get_table(struct device *dev) 108 { 109 struct device_domain_info *info; 110 111 info = dev_iommu_priv_get(dev); 112 if (!info) 113 return NULL; 114 115 return info->pasid_table; 116 } 117 118 static int intel_pasid_get_dev_max_id(struct device *dev) 119 { 120 struct device_domain_info *info; 121 122 info = dev_iommu_priv_get(dev); 123 if (!info || !info->pasid_table) 124 return 0; 125 126 return info->pasid_table->max_pasid; 127 } 128 129 static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) 130 { 131 struct device_domain_info *info; 132 struct pasid_table *pasid_table; 133 struct pasid_dir_entry *dir; 134 struct pasid_entry *entries; 135 int dir_index, index; 136 137 pasid_table = intel_pasid_get_table(dev); 138 if (WARN_ON(!pasid_table || pasid >= intel_pasid_get_dev_max_id(dev))) 139 return NULL; 140 141 dir = pasid_table->table; 142 info = dev_iommu_priv_get(dev); 143 dir_index = pasid >> PASID_PDE_SHIFT; 144 index = pasid & PASID_PTE_MASK; 145 146 retry: 147 entries = get_pasid_table_from_pde(&dir[dir_index]); 148 if (!entries) { 149 u64 tmp; 150 151 entries = iommu_alloc_page_node(info->iommu->node, GFP_ATOMIC); 152 if (!entries) 153 return NULL; 154 155 /* 156 * The pasid directory table entry won't be freed after 157 * allocation. No worry about the race with free and 158 * clear. However, this entry might be populated by others 159 * while we are preparing it. Use theirs with a retry. 160 */ 161 tmp = 0ULL; 162 if (!try_cmpxchg64(&dir[dir_index].val, &tmp, 163 (u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) { 164 iommu_free_page(entries); 165 goto retry; 166 } 167 if (!ecap_coherent(info->iommu->ecap)) { 168 clflush_cache_range(entries, VTD_PAGE_SIZE); 169 clflush_cache_range(&dir[dir_index].val, sizeof(*dir)); 170 } 171 } 172 173 return &entries[index]; 174 } 175 176 /* 177 * Interfaces for PASID table entry manipulation: 178 */ 179 static void 180 intel_pasid_clear_entry(struct device *dev, u32 pasid, bool fault_ignore) 181 { 182 struct pasid_entry *pe; 183 184 pe = intel_pasid_get_entry(dev, pasid); 185 if (WARN_ON(!pe)) 186 return; 187 188 if (fault_ignore && pasid_pte_is_present(pe)) 189 pasid_clear_entry_with_fpd(pe); 190 else 191 pasid_clear_entry(pe); 192 } 193 194 static void 195 pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu, 196 u16 did, u32 pasid) 197 { 198 struct qi_desc desc; 199 200 desc.qw0 = QI_PC_DID(did) | QI_PC_GRAN(QI_PC_PASID_SEL) | 201 QI_PC_PASID(pasid) | QI_PC_TYPE; 202 desc.qw1 = 0; 203 desc.qw2 = 0; 204 desc.qw3 = 0; 205 206 qi_submit_sync(iommu, &desc, 1, 0); 207 } 208 209 static void 210 devtlb_invalidation_with_pasid(struct intel_iommu *iommu, 211 struct device *dev, u32 pasid) 212 { 213 struct device_domain_info *info; 214 u16 sid, qdep, pfsid; 215 216 info = dev_iommu_priv_get(dev); 217 if (!info || !info->ats_enabled) 218 return; 219 220 if (pci_dev_is_disconnected(to_pci_dev(dev))) 221 return; 222 223 sid = PCI_DEVID(info->bus, info->devfn); 224 qdep = info->ats_qdep; 225 pfsid = info->pfsid; 226 227 /* 228 * When PASID 0 is used, it indicates RID2PASID(DMA request w/o PASID), 229 * devTLB flush w/o PASID should be used. For non-zero PASID under 230 * SVA usage, device could do DMA with multiple PASIDs. It is more 231 * efficient to flush devTLB specific to the PASID. 232 */ 233 if (pasid == IOMMU_NO_PASID) 234 qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT); 235 else 236 qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, pasid, qdep, 0, 64 - VTD_PAGE_SHIFT); 237 } 238 239 void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, 240 u32 pasid, bool fault_ignore) 241 { 242 struct pasid_entry *pte; 243 u16 did, pgtt; 244 245 spin_lock(&iommu->lock); 246 pte = intel_pasid_get_entry(dev, pasid); 247 if (WARN_ON(!pte) || !pasid_pte_is_present(pte)) { 248 spin_unlock(&iommu->lock); 249 return; 250 } 251 252 did = pasid_get_domain_id(pte); 253 pgtt = pasid_pte_get_pgtt(pte); 254 intel_pasid_clear_entry(dev, pasid, fault_ignore); 255 spin_unlock(&iommu->lock); 256 257 if (!ecap_coherent(iommu->ecap)) 258 clflush_cache_range(pte, sizeof(*pte)); 259 260 pasid_cache_invalidation_with_pasid(iommu, did, pasid); 261 262 if (pgtt == PASID_ENTRY_PGTT_PT || pgtt == PASID_ENTRY_PGTT_FL_ONLY) 263 qi_flush_piotlb(iommu, did, pasid, 0, -1, 0); 264 else 265 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); 266 267 devtlb_invalidation_with_pasid(iommu, dev, pasid); 268 if (!fault_ignore) 269 intel_iommu_drain_pasid_prq(dev, pasid); 270 } 271 272 /* 273 * This function flushes cache for a newly setup pasid table entry. 274 * Caller of it should not modify the in-use pasid table entries. 275 */ 276 static void pasid_flush_caches(struct intel_iommu *iommu, 277 struct pasid_entry *pte, 278 u32 pasid, u16 did) 279 { 280 if (!ecap_coherent(iommu->ecap)) 281 clflush_cache_range(pte, sizeof(*pte)); 282 283 if (cap_caching_mode(iommu->cap)) { 284 pasid_cache_invalidation_with_pasid(iommu, did, pasid); 285 qi_flush_piotlb(iommu, did, pasid, 0, -1, 0); 286 } else { 287 iommu_flush_write_buffer(iommu); 288 } 289 } 290 291 /* 292 * This function is supposed to be used after caller updates the fields 293 * except for the SSADE and P bit of a pasid table entry. It does the 294 * below: 295 * - Flush cacheline if needed 296 * - Flush the caches per Table 28 ”Guidance to Software for Invalidations“ 297 * of VT-d spec 5.0. 298 */ 299 static void intel_pasid_flush_present(struct intel_iommu *iommu, 300 struct device *dev, 301 u32 pasid, u16 did, 302 struct pasid_entry *pte) 303 { 304 if (!ecap_coherent(iommu->ecap)) 305 clflush_cache_range(pte, sizeof(*pte)); 306 307 /* 308 * VT-d spec 5.0 table28 states guides for cache invalidation: 309 * 310 * - PASID-selective-within-Domain PASID-cache invalidation 311 * - PASID-selective PASID-based IOTLB invalidation 312 * - If (pasid is RID_PASID) 313 * - Global Device-TLB invalidation to affected functions 314 * Else 315 * - PASID-based Device-TLB invalidation (with S=1 and 316 * Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions 317 */ 318 pasid_cache_invalidation_with_pasid(iommu, did, pasid); 319 qi_flush_piotlb(iommu, did, pasid, 0, -1, 0); 320 321 devtlb_invalidation_with_pasid(iommu, dev, pasid); 322 } 323 324 /* 325 * Set up the scalable mode pasid table entry for first only 326 * translation type. 327 */ 328 static void pasid_pte_config_first_level(struct intel_iommu *iommu, 329 struct pasid_entry *pte, 330 pgd_t *pgd, u16 did, int flags) 331 { 332 lockdep_assert_held(&iommu->lock); 333 334 pasid_clear_entry(pte); 335 336 /* Setup the first level page table pointer: */ 337 pasid_set_flptr(pte, (u64)__pa(pgd)); 338 339 if (flags & PASID_FLAG_FL5LP) 340 pasid_set_flpm(pte, 1); 341 342 if (flags & PASID_FLAG_PAGE_SNOOP) 343 pasid_set_pgsnp(pte); 344 345 pasid_set_domain_id(pte, did); 346 pasid_set_address_width(pte, iommu->agaw); 347 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 348 349 /* Setup Present and PASID Granular Transfer Type: */ 350 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY); 351 pasid_set_present(pte); 352 } 353 354 int intel_pasid_setup_first_level(struct intel_iommu *iommu, 355 struct device *dev, pgd_t *pgd, 356 u32 pasid, u16 did, int flags) 357 { 358 struct pasid_entry *pte; 359 360 if (!ecap_flts(iommu->ecap)) { 361 pr_err("No first level translation support on %s\n", 362 iommu->name); 363 return -EINVAL; 364 } 365 366 if ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)) { 367 pr_err("No 5-level paging support for first-level on %s\n", 368 iommu->name); 369 return -EINVAL; 370 } 371 372 spin_lock(&iommu->lock); 373 pte = intel_pasid_get_entry(dev, pasid); 374 if (!pte) { 375 spin_unlock(&iommu->lock); 376 return -ENODEV; 377 } 378 379 if (pasid_pte_is_present(pte)) { 380 spin_unlock(&iommu->lock); 381 return -EBUSY; 382 } 383 384 pasid_pte_config_first_level(iommu, pte, pgd, did, flags); 385 386 spin_unlock(&iommu->lock); 387 388 pasid_flush_caches(iommu, pte, pasid, did); 389 390 return 0; 391 } 392 393 int intel_pasid_replace_first_level(struct intel_iommu *iommu, 394 struct device *dev, pgd_t *pgd, 395 u32 pasid, u16 did, u16 old_did, 396 int flags) 397 { 398 struct pasid_entry *pte, new_pte; 399 400 if (!ecap_flts(iommu->ecap)) { 401 pr_err("No first level translation support on %s\n", 402 iommu->name); 403 return -EINVAL; 404 } 405 406 if ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)) { 407 pr_err("No 5-level paging support for first-level on %s\n", 408 iommu->name); 409 return -EINVAL; 410 } 411 412 pasid_pte_config_first_level(iommu, &new_pte, pgd, did, flags); 413 414 spin_lock(&iommu->lock); 415 pte = intel_pasid_get_entry(dev, pasid); 416 if (!pte) { 417 spin_unlock(&iommu->lock); 418 return -ENODEV; 419 } 420 421 if (!pasid_pte_is_present(pte)) { 422 spin_unlock(&iommu->lock); 423 return -EINVAL; 424 } 425 426 WARN_ON(old_did != pasid_get_domain_id(pte)); 427 428 *pte = new_pte; 429 spin_unlock(&iommu->lock); 430 431 intel_pasid_flush_present(iommu, dev, pasid, old_did, pte); 432 intel_iommu_drain_pasid_prq(dev, pasid); 433 434 return 0; 435 } 436 437 /* 438 * Set up the scalable mode pasid entry for second only translation type. 439 */ 440 static void pasid_pte_config_second_level(struct intel_iommu *iommu, 441 struct pasid_entry *pte, 442 u64 pgd_val, int agaw, u16 did, 443 bool dirty_tracking) 444 { 445 lockdep_assert_held(&iommu->lock); 446 447 pasid_clear_entry(pte); 448 pasid_set_domain_id(pte, did); 449 pasid_set_slptr(pte, pgd_val); 450 pasid_set_address_width(pte, agaw); 451 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY); 452 pasid_set_fault_enable(pte); 453 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 454 if (dirty_tracking) 455 pasid_set_ssade(pte); 456 457 pasid_set_present(pte); 458 } 459 460 int intel_pasid_setup_second_level(struct intel_iommu *iommu, 461 struct dmar_domain *domain, 462 struct device *dev, u32 pasid) 463 { 464 struct pasid_entry *pte; 465 struct dma_pte *pgd; 466 u64 pgd_val; 467 u16 did; 468 469 /* 470 * If hardware advertises no support for second level 471 * translation, return directly. 472 */ 473 if (!ecap_slts(iommu->ecap)) { 474 pr_err("No second level translation support on %s\n", 475 iommu->name); 476 return -EINVAL; 477 } 478 479 pgd = domain->pgd; 480 pgd_val = virt_to_phys(pgd); 481 did = domain_id_iommu(domain, iommu); 482 483 spin_lock(&iommu->lock); 484 pte = intel_pasid_get_entry(dev, pasid); 485 if (!pte) { 486 spin_unlock(&iommu->lock); 487 return -ENODEV; 488 } 489 490 if (pasid_pte_is_present(pte)) { 491 spin_unlock(&iommu->lock); 492 return -EBUSY; 493 } 494 495 pasid_pte_config_second_level(iommu, pte, pgd_val, domain->agaw, 496 did, domain->dirty_tracking); 497 spin_unlock(&iommu->lock); 498 499 pasid_flush_caches(iommu, pte, pasid, did); 500 501 return 0; 502 } 503 504 int intel_pasid_replace_second_level(struct intel_iommu *iommu, 505 struct dmar_domain *domain, 506 struct device *dev, u16 old_did, 507 u32 pasid) 508 { 509 struct pasid_entry *pte, new_pte; 510 struct dma_pte *pgd; 511 u64 pgd_val; 512 u16 did; 513 514 /* 515 * If hardware advertises no support for second level 516 * translation, return directly. 517 */ 518 if (!ecap_slts(iommu->ecap)) { 519 pr_err("No second level translation support on %s\n", 520 iommu->name); 521 return -EINVAL; 522 } 523 524 pgd = domain->pgd; 525 pgd_val = virt_to_phys(pgd); 526 did = domain_id_iommu(domain, iommu); 527 528 pasid_pte_config_second_level(iommu, &new_pte, pgd_val, 529 domain->agaw, did, 530 domain->dirty_tracking); 531 532 spin_lock(&iommu->lock); 533 pte = intel_pasid_get_entry(dev, pasid); 534 if (!pte) { 535 spin_unlock(&iommu->lock); 536 return -ENODEV; 537 } 538 539 if (!pasid_pte_is_present(pte)) { 540 spin_unlock(&iommu->lock); 541 return -EINVAL; 542 } 543 544 WARN_ON(old_did != pasid_get_domain_id(pte)); 545 546 *pte = new_pte; 547 spin_unlock(&iommu->lock); 548 549 intel_pasid_flush_present(iommu, dev, pasid, old_did, pte); 550 intel_iommu_drain_pasid_prq(dev, pasid); 551 552 return 0; 553 } 554 555 /* 556 * Set up dirty tracking on a second only or nested translation type. 557 */ 558 int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu, 559 struct device *dev, u32 pasid, 560 bool enabled) 561 { 562 struct pasid_entry *pte; 563 u16 did, pgtt; 564 565 spin_lock(&iommu->lock); 566 567 pte = intel_pasid_get_entry(dev, pasid); 568 if (!pte) { 569 spin_unlock(&iommu->lock); 570 dev_err_ratelimited( 571 dev, "Failed to get pasid entry of PASID %d\n", pasid); 572 return -ENODEV; 573 } 574 575 did = pasid_get_domain_id(pte); 576 pgtt = pasid_pte_get_pgtt(pte); 577 if (pgtt != PASID_ENTRY_PGTT_SL_ONLY && 578 pgtt != PASID_ENTRY_PGTT_NESTED) { 579 spin_unlock(&iommu->lock); 580 dev_err_ratelimited( 581 dev, 582 "Dirty tracking not supported on translation type %d\n", 583 pgtt); 584 return -EOPNOTSUPP; 585 } 586 587 if (pasid_get_ssade(pte) == enabled) { 588 spin_unlock(&iommu->lock); 589 return 0; 590 } 591 592 if (enabled) 593 pasid_set_ssade(pte); 594 else 595 pasid_clear_ssade(pte); 596 spin_unlock(&iommu->lock); 597 598 if (!ecap_coherent(iommu->ecap)) 599 clflush_cache_range(pte, sizeof(*pte)); 600 601 /* 602 * From VT-d spec table 25 "Guidance to Software for Invalidations": 603 * 604 * - PASID-selective-within-Domain PASID-cache invalidation 605 * If (PGTT=SS or Nested) 606 * - Domain-selective IOTLB invalidation 607 * Else 608 * - PASID-selective PASID-based IOTLB invalidation 609 * - If (pasid is RID_PASID) 610 * - Global Device-TLB invalidation to affected functions 611 * Else 612 * - PASID-based Device-TLB invalidation (with S=1 and 613 * Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions 614 */ 615 pasid_cache_invalidation_with_pasid(iommu, did, pasid); 616 617 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); 618 619 devtlb_invalidation_with_pasid(iommu, dev, pasid); 620 621 return 0; 622 } 623 624 /* 625 * Set up the scalable mode pasid entry for passthrough translation type. 626 */ 627 static void pasid_pte_config_pass_through(struct intel_iommu *iommu, 628 struct pasid_entry *pte, u16 did) 629 { 630 lockdep_assert_held(&iommu->lock); 631 632 pasid_clear_entry(pte); 633 pasid_set_domain_id(pte, did); 634 pasid_set_address_width(pte, iommu->agaw); 635 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_PT); 636 pasid_set_fault_enable(pte); 637 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 638 pasid_set_present(pte); 639 } 640 641 int intel_pasid_setup_pass_through(struct intel_iommu *iommu, 642 struct device *dev, u32 pasid) 643 { 644 u16 did = FLPT_DEFAULT_DID; 645 struct pasid_entry *pte; 646 647 spin_lock(&iommu->lock); 648 pte = intel_pasid_get_entry(dev, pasid); 649 if (!pte) { 650 spin_unlock(&iommu->lock); 651 return -ENODEV; 652 } 653 654 if (pasid_pte_is_present(pte)) { 655 spin_unlock(&iommu->lock); 656 return -EBUSY; 657 } 658 659 pasid_pte_config_pass_through(iommu, pte, did); 660 spin_unlock(&iommu->lock); 661 662 pasid_flush_caches(iommu, pte, pasid, did); 663 664 return 0; 665 } 666 667 int intel_pasid_replace_pass_through(struct intel_iommu *iommu, 668 struct device *dev, u16 old_did, 669 u32 pasid) 670 { 671 struct pasid_entry *pte, new_pte; 672 u16 did = FLPT_DEFAULT_DID; 673 674 pasid_pte_config_pass_through(iommu, &new_pte, did); 675 676 spin_lock(&iommu->lock); 677 pte = intel_pasid_get_entry(dev, pasid); 678 if (!pte) { 679 spin_unlock(&iommu->lock); 680 return -ENODEV; 681 } 682 683 if (!pasid_pte_is_present(pte)) { 684 spin_unlock(&iommu->lock); 685 return -EINVAL; 686 } 687 688 WARN_ON(old_did != pasid_get_domain_id(pte)); 689 690 *pte = new_pte; 691 spin_unlock(&iommu->lock); 692 693 intel_pasid_flush_present(iommu, dev, pasid, old_did, pte); 694 intel_iommu_drain_pasid_prq(dev, pasid); 695 696 return 0; 697 } 698 699 /* 700 * Set the page snoop control for a pasid entry which has been set up. 701 */ 702 void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu, 703 struct device *dev, u32 pasid) 704 { 705 struct pasid_entry *pte; 706 u16 did; 707 708 spin_lock(&iommu->lock); 709 pte = intel_pasid_get_entry(dev, pasid); 710 if (WARN_ON(!pte || !pasid_pte_is_present(pte))) { 711 spin_unlock(&iommu->lock); 712 return; 713 } 714 715 pasid_set_pgsnp(pte); 716 did = pasid_get_domain_id(pte); 717 spin_unlock(&iommu->lock); 718 719 intel_pasid_flush_present(iommu, dev, pasid, did, pte); 720 } 721 722 static void pasid_pte_config_nestd(struct intel_iommu *iommu, 723 struct pasid_entry *pte, 724 struct iommu_hwpt_vtd_s1 *s1_cfg, 725 struct dmar_domain *s2_domain, 726 u16 did) 727 { 728 struct dma_pte *pgd = s2_domain->pgd; 729 730 lockdep_assert_held(&iommu->lock); 731 732 pasid_clear_entry(pte); 733 734 if (s1_cfg->addr_width == ADDR_WIDTH_5LEVEL) 735 pasid_set_flpm(pte, 1); 736 737 pasid_set_flptr(pte, s1_cfg->pgtbl_addr); 738 739 if (s1_cfg->flags & IOMMU_VTD_S1_SRE) { 740 pasid_set_sre(pte); 741 if (s1_cfg->flags & IOMMU_VTD_S1_WPE) 742 pasid_set_wpe(pte); 743 } 744 745 if (s1_cfg->flags & IOMMU_VTD_S1_EAFE) 746 pasid_set_eafe(pte); 747 748 if (s2_domain->force_snooping) 749 pasid_set_pgsnp(pte); 750 751 pasid_set_slptr(pte, virt_to_phys(pgd)); 752 pasid_set_fault_enable(pte); 753 pasid_set_domain_id(pte, did); 754 pasid_set_address_width(pte, s2_domain->agaw); 755 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 756 if (s2_domain->dirty_tracking) 757 pasid_set_ssade(pte); 758 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED); 759 pasid_set_present(pte); 760 } 761 762 /** 763 * intel_pasid_setup_nested() - Set up PASID entry for nested translation. 764 * @iommu: IOMMU which the device belong to 765 * @dev: Device to be set up for translation 766 * @pasid: PASID to be programmed in the device PASID table 767 * @domain: User stage-1 domain nested on a stage-2 domain 768 * 769 * This is used for nested translation. The input domain should be 770 * nested type and nested on a parent with 'is_nested_parent' flag 771 * set. 772 */ 773 int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev, 774 u32 pasid, struct dmar_domain *domain) 775 { 776 struct iommu_hwpt_vtd_s1 *s1_cfg = &domain->s1_cfg; 777 struct dmar_domain *s2_domain = domain->s2_domain; 778 u16 did = domain_id_iommu(domain, iommu); 779 struct pasid_entry *pte; 780 781 /* Address width should match the address width supported by hardware */ 782 switch (s1_cfg->addr_width) { 783 case ADDR_WIDTH_4LEVEL: 784 break; 785 case ADDR_WIDTH_5LEVEL: 786 if (!cap_fl5lp_support(iommu->cap)) { 787 dev_err_ratelimited(dev, 788 "5-level paging not supported\n"); 789 return -EINVAL; 790 } 791 break; 792 default: 793 dev_err_ratelimited(dev, "Invalid stage-1 address width %d\n", 794 s1_cfg->addr_width); 795 return -EINVAL; 796 } 797 798 if ((s1_cfg->flags & IOMMU_VTD_S1_SRE) && !ecap_srs(iommu->ecap)) { 799 pr_err_ratelimited("No supervisor request support on %s\n", 800 iommu->name); 801 return -EINVAL; 802 } 803 804 if ((s1_cfg->flags & IOMMU_VTD_S1_EAFE) && !ecap_eafs(iommu->ecap)) { 805 pr_err_ratelimited("No extended access flag support on %s\n", 806 iommu->name); 807 return -EINVAL; 808 } 809 810 spin_lock(&iommu->lock); 811 pte = intel_pasid_get_entry(dev, pasid); 812 if (!pte) { 813 spin_unlock(&iommu->lock); 814 return -ENODEV; 815 } 816 if (pasid_pte_is_present(pte)) { 817 spin_unlock(&iommu->lock); 818 return -EBUSY; 819 } 820 821 pasid_pte_config_nestd(iommu, pte, s1_cfg, s2_domain, did); 822 spin_unlock(&iommu->lock); 823 824 pasid_flush_caches(iommu, pte, pasid, did); 825 826 return 0; 827 } 828 829 int intel_pasid_replace_nested(struct intel_iommu *iommu, 830 struct device *dev, u32 pasid, 831 u16 old_did, struct dmar_domain *domain) 832 { 833 struct iommu_hwpt_vtd_s1 *s1_cfg = &domain->s1_cfg; 834 struct dmar_domain *s2_domain = domain->s2_domain; 835 u16 did = domain_id_iommu(domain, iommu); 836 struct pasid_entry *pte, new_pte; 837 838 /* Address width should match the address width supported by hardware */ 839 switch (s1_cfg->addr_width) { 840 case ADDR_WIDTH_4LEVEL: 841 break; 842 case ADDR_WIDTH_5LEVEL: 843 if (!cap_fl5lp_support(iommu->cap)) { 844 dev_err_ratelimited(dev, 845 "5-level paging not supported\n"); 846 return -EINVAL; 847 } 848 break; 849 default: 850 dev_err_ratelimited(dev, "Invalid stage-1 address width %d\n", 851 s1_cfg->addr_width); 852 return -EINVAL; 853 } 854 855 if ((s1_cfg->flags & IOMMU_VTD_S1_SRE) && !ecap_srs(iommu->ecap)) { 856 pr_err_ratelimited("No supervisor request support on %s\n", 857 iommu->name); 858 return -EINVAL; 859 } 860 861 if ((s1_cfg->flags & IOMMU_VTD_S1_EAFE) && !ecap_eafs(iommu->ecap)) { 862 pr_err_ratelimited("No extended access flag support on %s\n", 863 iommu->name); 864 return -EINVAL; 865 } 866 867 pasid_pte_config_nestd(iommu, &new_pte, s1_cfg, s2_domain, did); 868 869 spin_lock(&iommu->lock); 870 pte = intel_pasid_get_entry(dev, pasid); 871 if (!pte) { 872 spin_unlock(&iommu->lock); 873 return -ENODEV; 874 } 875 876 if (!pasid_pte_is_present(pte)) { 877 spin_unlock(&iommu->lock); 878 return -EINVAL; 879 } 880 881 WARN_ON(old_did != pasid_get_domain_id(pte)); 882 883 *pte = new_pte; 884 spin_unlock(&iommu->lock); 885 886 intel_pasid_flush_present(iommu, dev, pasid, old_did, pte); 887 intel_iommu_drain_pasid_prq(dev, pasid); 888 889 return 0; 890 } 891 892 /* 893 * Interfaces to setup or teardown a pasid table to the scalable-mode 894 * context table entry: 895 */ 896 897 static void device_pasid_table_teardown(struct device *dev, u8 bus, u8 devfn) 898 { 899 struct device_domain_info *info = dev_iommu_priv_get(dev); 900 struct intel_iommu *iommu = info->iommu; 901 struct context_entry *context; 902 u16 did; 903 904 spin_lock(&iommu->lock); 905 context = iommu_context_addr(iommu, bus, devfn, false); 906 if (!context) { 907 spin_unlock(&iommu->lock); 908 return; 909 } 910 911 did = context_domain_id(context); 912 context_clear_entry(context); 913 __iommu_flush_cache(iommu, context, sizeof(*context)); 914 spin_unlock(&iommu->lock); 915 intel_context_flush_present(info, context, did, false); 916 } 917 918 static int pci_pasid_table_teardown(struct pci_dev *pdev, u16 alias, void *data) 919 { 920 struct device *dev = data; 921 922 if (dev == &pdev->dev) 923 device_pasid_table_teardown(dev, PCI_BUS_NUM(alias), alias & 0xff); 924 925 return 0; 926 } 927 928 void intel_pasid_teardown_sm_context(struct device *dev) 929 { 930 struct device_domain_info *info = dev_iommu_priv_get(dev); 931 932 if (!dev_is_pci(dev)) { 933 device_pasid_table_teardown(dev, info->bus, info->devfn); 934 return; 935 } 936 937 pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_teardown, dev); 938 } 939 940 /* 941 * Get the PASID directory size for scalable mode context entry. 942 * Value of X in the PDTS field of a scalable mode context entry 943 * indicates PASID directory with 2^(X + 7) entries. 944 */ 945 static unsigned long context_get_sm_pds(struct pasid_table *table) 946 { 947 unsigned long pds, max_pde; 948 949 max_pde = table->max_pasid >> PASID_PDE_SHIFT; 950 pds = find_first_bit(&max_pde, MAX_NR_PASID_BITS); 951 if (pds < 7) 952 return 0; 953 954 return pds - 7; 955 } 956 957 static int context_entry_set_pasid_table(struct context_entry *context, 958 struct device *dev) 959 { 960 struct device_domain_info *info = dev_iommu_priv_get(dev); 961 struct pasid_table *table = info->pasid_table; 962 struct intel_iommu *iommu = info->iommu; 963 unsigned long pds; 964 965 context_clear_entry(context); 966 967 pds = context_get_sm_pds(table); 968 context->lo = (u64)virt_to_phys(table->table) | context_pdts(pds); 969 context_set_sm_rid2pasid(context, IOMMU_NO_PASID); 970 971 if (info->ats_supported) 972 context_set_sm_dte(context); 973 if (info->pasid_supported) 974 context_set_pasid(context); 975 976 context_set_fault_enable(context); 977 context_set_present(context); 978 __iommu_flush_cache(iommu, context, sizeof(*context)); 979 980 return 0; 981 } 982 983 static int device_pasid_table_setup(struct device *dev, u8 bus, u8 devfn) 984 { 985 struct device_domain_info *info = dev_iommu_priv_get(dev); 986 struct intel_iommu *iommu = info->iommu; 987 struct context_entry *context; 988 989 spin_lock(&iommu->lock); 990 context = iommu_context_addr(iommu, bus, devfn, true); 991 if (!context) { 992 spin_unlock(&iommu->lock); 993 return -ENOMEM; 994 } 995 996 if (context_present(context) && !context_copied(iommu, bus, devfn)) { 997 spin_unlock(&iommu->lock); 998 return 0; 999 } 1000 1001 if (context_copied(iommu, bus, devfn)) { 1002 context_clear_entry(context); 1003 __iommu_flush_cache(iommu, context, sizeof(*context)); 1004 1005 /* 1006 * For kdump cases, old valid entries may be cached due to 1007 * the in-flight DMA and copied pgtable, but there is no 1008 * unmapping behaviour for them, thus we need explicit cache 1009 * flushes for all affected domain IDs and PASIDs used in 1010 * the copied PASID table. Given that we have no idea about 1011 * which domain IDs and PASIDs were used in the copied tables, 1012 * upgrade them to global PASID and IOTLB cache invalidation. 1013 */ 1014 iommu->flush.flush_context(iommu, 0, 1015 PCI_DEVID(bus, devfn), 1016 DMA_CCMD_MASK_NOBIT, 1017 DMA_CCMD_DEVICE_INVL); 1018 qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0); 1019 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); 1020 devtlb_invalidation_with_pasid(iommu, dev, IOMMU_NO_PASID); 1021 1022 /* 1023 * At this point, the device is supposed to finish reset at 1024 * its driver probe stage, so no in-flight DMA will exist, 1025 * and we don't need to worry anymore hereafter. 1026 */ 1027 clear_context_copied(iommu, bus, devfn); 1028 } 1029 1030 context_entry_set_pasid_table(context, dev); 1031 spin_unlock(&iommu->lock); 1032 1033 /* 1034 * It's a non-present to present mapping. If hardware doesn't cache 1035 * non-present entry we don't need to flush the caches. If it does 1036 * cache non-present entries, then it does so in the special 1037 * domain #0, which we have to flush: 1038 */ 1039 if (cap_caching_mode(iommu->cap)) { 1040 iommu->flush.flush_context(iommu, 0, 1041 PCI_DEVID(bus, devfn), 1042 DMA_CCMD_MASK_NOBIT, 1043 DMA_CCMD_DEVICE_INVL); 1044 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH); 1045 } 1046 1047 return 0; 1048 } 1049 1050 static int pci_pasid_table_setup(struct pci_dev *pdev, u16 alias, void *data) 1051 { 1052 struct device *dev = data; 1053 1054 if (dev != &pdev->dev) 1055 return 0; 1056 1057 return device_pasid_table_setup(dev, PCI_BUS_NUM(alias), alias & 0xff); 1058 } 1059 1060 /* 1061 * Set the device's PASID table to its context table entry. 1062 * 1063 * The PASID table is set to the context entries of both device itself 1064 * and its alias requester ID for DMA. 1065 */ 1066 int intel_pasid_setup_sm_context(struct device *dev) 1067 { 1068 struct device_domain_info *info = dev_iommu_priv_get(dev); 1069 1070 if (!dev_is_pci(dev)) 1071 return device_pasid_table_setup(dev, info->bus, info->devfn); 1072 1073 return pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_setup, dev); 1074 } 1075 1076 /* 1077 * Global Device-TLB invalidation following changes in a context entry which 1078 * was present. 1079 */ 1080 static void __context_flush_dev_iotlb(struct device_domain_info *info) 1081 { 1082 if (!info->ats_enabled) 1083 return; 1084 1085 qi_flush_dev_iotlb(info->iommu, PCI_DEVID(info->bus, info->devfn), 1086 info->pfsid, info->ats_qdep, 0, MAX_AGAW_PFN_WIDTH); 1087 1088 /* 1089 * There is no guarantee that the device DMA is stopped when it reaches 1090 * here. Therefore, always attempt the extra device TLB invalidation 1091 * quirk. The impact on performance is acceptable since this is not a 1092 * performance-critical path. 1093 */ 1094 quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH, IOMMU_NO_PASID, 1095 info->ats_qdep); 1096 } 1097 1098 /* 1099 * Cache invalidations after change in a context table entry that was present 1100 * according to the Spec 6.5.3.3 (Guidance to Software for Invalidations). If 1101 * IOMMU is in scalable mode and all PASID table entries of the device were 1102 * non-present, set flush_domains to false. Otherwise, true. 1103 */ 1104 void intel_context_flush_present(struct device_domain_info *info, 1105 struct context_entry *context, 1106 u16 did, bool flush_domains) 1107 { 1108 struct intel_iommu *iommu = info->iommu; 1109 struct pasid_entry *pte; 1110 int i; 1111 1112 /* 1113 * Device-selective context-cache invalidation. The Domain-ID field 1114 * of the Context-cache Invalidate Descriptor is ignored by hardware 1115 * when operating in scalable mode. Therefore the @did value doesn't 1116 * matter in scalable mode. 1117 */ 1118 iommu->flush.flush_context(iommu, did, PCI_DEVID(info->bus, info->devfn), 1119 DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); 1120 1121 /* 1122 * For legacy mode: 1123 * - Domain-selective IOTLB invalidation 1124 * - Global Device-TLB invalidation to all affected functions 1125 */ 1126 if (!sm_supported(iommu)) { 1127 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); 1128 __context_flush_dev_iotlb(info); 1129 1130 return; 1131 } 1132 1133 /* 1134 * For scalable mode: 1135 * - Domain-selective PASID-cache invalidation to affected domains 1136 * - Domain-selective IOTLB invalidation to affected domains 1137 * - Global Device-TLB invalidation to affected functions 1138 */ 1139 if (flush_domains) { 1140 /* 1141 * If the IOMMU is running in scalable mode and there might 1142 * be potential PASID translations, the caller should hold 1143 * the lock to ensure that context changes and cache flushes 1144 * are atomic. 1145 */ 1146 assert_spin_locked(&iommu->lock); 1147 for (i = 0; i < info->pasid_table->max_pasid; i++) { 1148 pte = intel_pasid_get_entry(info->dev, i); 1149 if (!pte || !pasid_pte_is_present(pte)) 1150 continue; 1151 1152 did = pasid_get_domain_id(pte); 1153 qi_flush_pasid_cache(iommu, did, QI_PC_ALL_PASIDS, 0); 1154 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); 1155 } 1156 } 1157 1158 __context_flush_dev_iotlb(info); 1159 } 1160