1672cf6dfSJoerg Roedel // SPDX-License-Identifier: GPL-2.0-only 2672cf6dfSJoerg Roedel /* 3672cf6dfSJoerg Roedel * Copyright © 2015 Intel Corporation. 4672cf6dfSJoerg Roedel * 5672cf6dfSJoerg Roedel * Authors: David Woodhouse <dwmw2@infradead.org> 6672cf6dfSJoerg Roedel */ 7672cf6dfSJoerg Roedel 8672cf6dfSJoerg Roedel #include <linux/mmu_notifier.h> 9672cf6dfSJoerg Roedel #include <linux/sched.h> 10672cf6dfSJoerg Roedel #include <linux/sched/mm.h> 11672cf6dfSJoerg Roedel #include <linux/slab.h> 12672cf6dfSJoerg Roedel #include <linux/intel-svm.h> 13672cf6dfSJoerg Roedel #include <linux/rculist.h> 14672cf6dfSJoerg Roedel #include <linux/pci.h> 15672cf6dfSJoerg Roedel #include <linux/pci-ats.h> 16672cf6dfSJoerg Roedel #include <linux/dmar.h> 17672cf6dfSJoerg Roedel #include <linux/interrupt.h> 18672cf6dfSJoerg Roedel #include <linux/mm_types.h> 19100b8a14SLu Baolu #include <linux/xarray.h> 20672cf6dfSJoerg Roedel #include <linux/ioasid.h> 21672cf6dfSJoerg Roedel #include <asm/page.h> 2220f0afd1SFenghua Yu #include <asm/fpu/api.h> 23672cf6dfSJoerg Roedel 24*2585a279SLu Baolu #include "iommu.h" 2502f3effdSLu Baolu #include "pasid.h" 260f4834abSLu Baolu #include "perf.h" 2740483774SLu Baolu #include "../iommu-sva-lib.h" 28933ab6d3SLu Baolu #include "trace.h" 29672cf6dfSJoerg Roedel 30672cf6dfSJoerg Roedel static irqreturn_t prq_event_thread(int irq, void *d); 31c7b6bac9SFenghua Yu static void intel_svm_drain_prq(struct device *dev, u32 pasid); 3240483774SLu Baolu #define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva) 33672cf6dfSJoerg Roedel 34100b8a14SLu Baolu static DEFINE_XARRAY_ALLOC(pasid_private_array); 35100b8a14SLu Baolu static int pasid_private_add(ioasid_t pasid, void *priv) 36100b8a14SLu Baolu { 37100b8a14SLu Baolu return xa_alloc(&pasid_private_array, &pasid, priv, 38100b8a14SLu Baolu XA_LIMIT(pasid, pasid), GFP_ATOMIC); 39100b8a14SLu Baolu } 40100b8a14SLu Baolu 41100b8a14SLu Baolu static void pasid_private_remove(ioasid_t pasid) 42100b8a14SLu Baolu { 43100b8a14SLu Baolu xa_erase(&pasid_private_array, pasid); 44100b8a14SLu Baolu } 45100b8a14SLu Baolu 46100b8a14SLu Baolu static void *pasid_private_find(ioasid_t pasid) 47100b8a14SLu Baolu { 48100b8a14SLu Baolu return xa_load(&pasid_private_array, pasid); 49100b8a14SLu Baolu } 50100b8a14SLu Baolu 519e52cc0fSLu Baolu static struct intel_svm_dev * 529e52cc0fSLu Baolu svm_lookup_device_by_sid(struct intel_svm *svm, u16 sid) 539e52cc0fSLu Baolu { 549e52cc0fSLu Baolu struct intel_svm_dev *sdev = NULL, *t; 559e52cc0fSLu Baolu 569e52cc0fSLu Baolu rcu_read_lock(); 579e52cc0fSLu Baolu list_for_each_entry_rcu(t, &svm->devs, list) { 589e52cc0fSLu Baolu if (t->sid == sid) { 599e52cc0fSLu Baolu sdev = t; 609e52cc0fSLu Baolu break; 619e52cc0fSLu Baolu } 629e52cc0fSLu Baolu } 639e52cc0fSLu Baolu rcu_read_unlock(); 649e52cc0fSLu Baolu 659e52cc0fSLu Baolu return sdev; 669e52cc0fSLu Baolu } 679e52cc0fSLu Baolu 689e52cc0fSLu Baolu static struct intel_svm_dev * 699e52cc0fSLu Baolu svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev) 709e52cc0fSLu Baolu { 719e52cc0fSLu Baolu struct intel_svm_dev *sdev = NULL, *t; 729e52cc0fSLu Baolu 739e52cc0fSLu Baolu rcu_read_lock(); 749e52cc0fSLu Baolu list_for_each_entry_rcu(t, &svm->devs, list) { 759e52cc0fSLu Baolu if (t->dev == dev) { 769e52cc0fSLu Baolu sdev = t; 779e52cc0fSLu Baolu break; 789e52cc0fSLu Baolu } 799e52cc0fSLu Baolu } 809e52cc0fSLu Baolu rcu_read_unlock(); 819e52cc0fSLu Baolu 829e52cc0fSLu Baolu return sdev; 839e52cc0fSLu Baolu } 849e52cc0fSLu Baolu 85672cf6dfSJoerg Roedel int intel_svm_enable_prq(struct intel_iommu *iommu) 86672cf6dfSJoerg Roedel { 874c82b886SLu Baolu struct iopf_queue *iopfq; 88672cf6dfSJoerg Roedel struct page *pages; 89672cf6dfSJoerg Roedel int irq, ret; 90672cf6dfSJoerg Roedel 91672cf6dfSJoerg Roedel pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER); 92672cf6dfSJoerg Roedel if (!pages) { 93672cf6dfSJoerg Roedel pr_warn("IOMMU: %s: Failed to allocate page request queue\n", 94672cf6dfSJoerg Roedel iommu->name); 95672cf6dfSJoerg Roedel return -ENOMEM; 96672cf6dfSJoerg Roedel } 97672cf6dfSJoerg Roedel iommu->prq = page_address(pages); 98672cf6dfSJoerg Roedel 99672cf6dfSJoerg Roedel irq = dmar_alloc_hwirq(DMAR_UNITS_SUPPORTED + iommu->seq_id, iommu->node, iommu); 100672cf6dfSJoerg Roedel if (irq <= 0) { 101672cf6dfSJoerg Roedel pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n", 102672cf6dfSJoerg Roedel iommu->name); 103672cf6dfSJoerg Roedel ret = -EINVAL; 1044c82b886SLu Baolu goto free_prq; 105672cf6dfSJoerg Roedel } 106672cf6dfSJoerg Roedel iommu->pr_irq = irq; 107672cf6dfSJoerg Roedel 1084c82b886SLu Baolu snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name), 1094c82b886SLu Baolu "dmar%d-iopfq", iommu->seq_id); 1104c82b886SLu Baolu iopfq = iopf_queue_alloc(iommu->iopfq_name); 1114c82b886SLu Baolu if (!iopfq) { 1124c82b886SLu Baolu pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name); 1134c82b886SLu Baolu ret = -ENOMEM; 1144c82b886SLu Baolu goto free_hwirq; 1154c82b886SLu Baolu } 1164c82b886SLu Baolu iommu->iopf_queue = iopfq; 1174c82b886SLu Baolu 118672cf6dfSJoerg Roedel snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id); 119672cf6dfSJoerg Roedel 120672cf6dfSJoerg Roedel ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT, 121672cf6dfSJoerg Roedel iommu->prq_name, iommu); 122672cf6dfSJoerg Roedel if (ret) { 123672cf6dfSJoerg Roedel pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n", 124672cf6dfSJoerg Roedel iommu->name); 1254c82b886SLu Baolu goto free_iopfq; 126672cf6dfSJoerg Roedel } 127672cf6dfSJoerg Roedel dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); 128672cf6dfSJoerg Roedel dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); 129672cf6dfSJoerg Roedel dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER); 130672cf6dfSJoerg Roedel 131672cf6dfSJoerg Roedel init_completion(&iommu->prq_complete); 132672cf6dfSJoerg Roedel 133672cf6dfSJoerg Roedel return 0; 1344c82b886SLu Baolu 1354c82b886SLu Baolu free_iopfq: 1364c82b886SLu Baolu iopf_queue_free(iommu->iopf_queue); 1374c82b886SLu Baolu iommu->iopf_queue = NULL; 1384c82b886SLu Baolu free_hwirq: 1394c82b886SLu Baolu dmar_free_hwirq(irq); 1404c82b886SLu Baolu iommu->pr_irq = 0; 1414c82b886SLu Baolu free_prq: 1424c82b886SLu Baolu free_pages((unsigned long)iommu->prq, PRQ_ORDER); 1434c82b886SLu Baolu iommu->prq = NULL; 1444c82b886SLu Baolu 1454c82b886SLu Baolu return ret; 146672cf6dfSJoerg Roedel } 147672cf6dfSJoerg Roedel 148672cf6dfSJoerg Roedel int intel_svm_finish_prq(struct intel_iommu *iommu) 149672cf6dfSJoerg Roedel { 150672cf6dfSJoerg Roedel dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); 151672cf6dfSJoerg Roedel dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); 152672cf6dfSJoerg Roedel dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL); 153672cf6dfSJoerg Roedel 154672cf6dfSJoerg Roedel if (iommu->pr_irq) { 155672cf6dfSJoerg Roedel free_irq(iommu->pr_irq, iommu); 156672cf6dfSJoerg Roedel dmar_free_hwirq(iommu->pr_irq); 157672cf6dfSJoerg Roedel iommu->pr_irq = 0; 158672cf6dfSJoerg Roedel } 159672cf6dfSJoerg Roedel 1604c82b886SLu Baolu if (iommu->iopf_queue) { 1614c82b886SLu Baolu iopf_queue_free(iommu->iopf_queue); 1624c82b886SLu Baolu iommu->iopf_queue = NULL; 1634c82b886SLu Baolu } 1644c82b886SLu Baolu 165672cf6dfSJoerg Roedel free_pages((unsigned long)iommu->prq, PRQ_ORDER); 166672cf6dfSJoerg Roedel iommu->prq = NULL; 167672cf6dfSJoerg Roedel 168672cf6dfSJoerg Roedel return 0; 169672cf6dfSJoerg Roedel } 170672cf6dfSJoerg Roedel 171672cf6dfSJoerg Roedel void intel_svm_check(struct intel_iommu *iommu) 172672cf6dfSJoerg Roedel { 173672cf6dfSJoerg Roedel if (!pasid_supported(iommu)) 174672cf6dfSJoerg Roedel return; 175672cf6dfSJoerg Roedel 176672cf6dfSJoerg Roedel if (cpu_feature_enabled(X86_FEATURE_GBPAGES) && 177672cf6dfSJoerg Roedel !cap_fl1gp_support(iommu->cap)) { 178672cf6dfSJoerg Roedel pr_err("%s SVM disabled, incompatible 1GB page capability\n", 179672cf6dfSJoerg Roedel iommu->name); 180672cf6dfSJoerg Roedel return; 181672cf6dfSJoerg Roedel } 182672cf6dfSJoerg Roedel 183672cf6dfSJoerg Roedel if (cpu_feature_enabled(X86_FEATURE_LA57) && 184672cf6dfSJoerg Roedel !cap_5lp_support(iommu->cap)) { 185672cf6dfSJoerg Roedel pr_err("%s SVM disabled, incompatible paging mode\n", 186672cf6dfSJoerg Roedel iommu->name); 187672cf6dfSJoerg Roedel return; 188672cf6dfSJoerg Roedel } 189672cf6dfSJoerg Roedel 190672cf6dfSJoerg Roedel iommu->flags |= VTD_FLAG_SVM_CAPABLE; 191672cf6dfSJoerg Roedel } 192672cf6dfSJoerg Roedel 1932d6ffc63SLu Baolu static void __flush_svm_range_dev(struct intel_svm *svm, 1942d6ffc63SLu Baolu struct intel_svm_dev *sdev, 1952d6ffc63SLu Baolu unsigned long address, 1962d6ffc63SLu Baolu unsigned long pages, int ih) 197672cf6dfSJoerg Roedel { 198586081d3SLu Baolu struct device_domain_info *info = dev_iommu_priv_get(sdev->dev); 199672cf6dfSJoerg Roedel 2009872f9bdSLu Baolu if (WARN_ON(!pages)) 2019872f9bdSLu Baolu return; 202672cf6dfSJoerg Roedel 2039872f9bdSLu Baolu qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih); 2049872f9bdSLu Baolu if (info->ats_enabled) 2059872f9bdSLu Baolu qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid, 2069872f9bdSLu Baolu svm->pasid, sdev->qdep, address, 2079872f9bdSLu Baolu order_base_2(pages)); 208672cf6dfSJoerg Roedel } 209672cf6dfSJoerg Roedel 2102d6ffc63SLu Baolu static void intel_flush_svm_range_dev(struct intel_svm *svm, 2112d6ffc63SLu Baolu struct intel_svm_dev *sdev, 2122d6ffc63SLu Baolu unsigned long address, 2132d6ffc63SLu Baolu unsigned long pages, int ih) 2142d6ffc63SLu Baolu { 2152d6ffc63SLu Baolu unsigned long shift = ilog2(__roundup_pow_of_two(pages)); 2162d6ffc63SLu Baolu unsigned long align = (1ULL << (VTD_PAGE_SHIFT + shift)); 2172d6ffc63SLu Baolu unsigned long start = ALIGN_DOWN(address, align); 2182d6ffc63SLu Baolu unsigned long end = ALIGN(address + (pages << VTD_PAGE_SHIFT), align); 2192d6ffc63SLu Baolu 2202d6ffc63SLu Baolu while (start < end) { 2212d6ffc63SLu Baolu __flush_svm_range_dev(svm, sdev, start, align >> VTD_PAGE_SHIFT, ih); 2222d6ffc63SLu Baolu start += align; 2232d6ffc63SLu Baolu } 2242d6ffc63SLu Baolu } 2252d6ffc63SLu Baolu 226672cf6dfSJoerg Roedel static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address, 227672cf6dfSJoerg Roedel unsigned long pages, int ih) 228672cf6dfSJoerg Roedel { 229672cf6dfSJoerg Roedel struct intel_svm_dev *sdev; 230672cf6dfSJoerg Roedel 231672cf6dfSJoerg Roedel rcu_read_lock(); 232672cf6dfSJoerg Roedel list_for_each_entry_rcu(sdev, &svm->devs, list) 233672cf6dfSJoerg Roedel intel_flush_svm_range_dev(svm, sdev, address, pages, ih); 234672cf6dfSJoerg Roedel rcu_read_unlock(); 235672cf6dfSJoerg Roedel } 236672cf6dfSJoerg Roedel 237672cf6dfSJoerg Roedel /* Pages have been freed at this point */ 238672cf6dfSJoerg Roedel static void intel_invalidate_range(struct mmu_notifier *mn, 239672cf6dfSJoerg Roedel struct mm_struct *mm, 240672cf6dfSJoerg Roedel unsigned long start, unsigned long end) 241672cf6dfSJoerg Roedel { 242672cf6dfSJoerg Roedel struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); 243672cf6dfSJoerg Roedel 244672cf6dfSJoerg Roedel intel_flush_svm_range(svm, start, 245672cf6dfSJoerg Roedel (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0); 246672cf6dfSJoerg Roedel } 247672cf6dfSJoerg Roedel 248672cf6dfSJoerg Roedel static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) 249672cf6dfSJoerg Roedel { 250672cf6dfSJoerg Roedel struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); 251672cf6dfSJoerg Roedel struct intel_svm_dev *sdev; 252672cf6dfSJoerg Roedel 253672cf6dfSJoerg Roedel /* This might end up being called from exit_mmap(), *before* the page 254672cf6dfSJoerg Roedel * tables are cleared. And __mmu_notifier_release() will delete us from 255672cf6dfSJoerg Roedel * the list of notifiers so that our invalidate_range() callback doesn't 256672cf6dfSJoerg Roedel * get called when the page tables are cleared. So we need to protect 257672cf6dfSJoerg Roedel * against hardware accessing those page tables. 258672cf6dfSJoerg Roedel * 259672cf6dfSJoerg Roedel * We do it by clearing the entry in the PASID table and then flushing 260672cf6dfSJoerg Roedel * the IOTLB and the PASID table caches. This might upset hardware; 261672cf6dfSJoerg Roedel * perhaps we'll want to point the PASID to a dummy PGD (like the zero 262672cf6dfSJoerg Roedel * page) so that we end up taking a fault that the hardware really 263672cf6dfSJoerg Roedel * *has* to handle gracefully without affecting other processes. 264672cf6dfSJoerg Roedel */ 265672cf6dfSJoerg Roedel rcu_read_lock(); 266672cf6dfSJoerg Roedel list_for_each_entry_rcu(sdev, &svm->devs, list) 2679ad9f45bSLiu Yi L intel_pasid_tear_down_entry(sdev->iommu, sdev->dev, 268672cf6dfSJoerg Roedel svm->pasid, true); 269672cf6dfSJoerg Roedel rcu_read_unlock(); 270672cf6dfSJoerg Roedel 271672cf6dfSJoerg Roedel } 272672cf6dfSJoerg Roedel 273672cf6dfSJoerg Roedel static const struct mmu_notifier_ops intel_mmuops = { 274672cf6dfSJoerg Roedel .release = intel_mm_release, 275672cf6dfSJoerg Roedel .invalidate_range = intel_invalidate_range, 276672cf6dfSJoerg Roedel }; 277672cf6dfSJoerg Roedel 278672cf6dfSJoerg Roedel static DEFINE_MUTEX(pasid_mutex); 279672cf6dfSJoerg Roedel 28019abcf70SLu Baolu static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid, 28119abcf70SLu Baolu struct intel_svm **rsvm, 28219abcf70SLu Baolu struct intel_svm_dev **rsdev) 28319abcf70SLu Baolu { 2849e52cc0fSLu Baolu struct intel_svm_dev *sdev = NULL; 28519abcf70SLu Baolu struct intel_svm *svm; 28619abcf70SLu Baolu 28719abcf70SLu Baolu /* The caller should hold the pasid_mutex lock */ 28819abcf70SLu Baolu if (WARN_ON(!mutex_is_locked(&pasid_mutex))) 28919abcf70SLu Baolu return -EINVAL; 29019abcf70SLu Baolu 29119abcf70SLu Baolu if (pasid == INVALID_IOASID || pasid >= PASID_MAX) 29219abcf70SLu Baolu return -EINVAL; 29319abcf70SLu Baolu 294100b8a14SLu Baolu svm = pasid_private_find(pasid); 29519abcf70SLu Baolu if (IS_ERR(svm)) 29619abcf70SLu Baolu return PTR_ERR(svm); 29719abcf70SLu Baolu 29819abcf70SLu Baolu if (!svm) 29919abcf70SLu Baolu goto out; 30019abcf70SLu Baolu 30119abcf70SLu Baolu /* 30219abcf70SLu Baolu * If we found svm for the PASID, there must be at least one device 30319abcf70SLu Baolu * bond. 30419abcf70SLu Baolu */ 30519abcf70SLu Baolu if (WARN_ON(list_empty(&svm->devs))) 30619abcf70SLu Baolu return -EINVAL; 3079e52cc0fSLu Baolu sdev = svm_lookup_device_by_dev(svm, dev); 30819abcf70SLu Baolu 30919abcf70SLu Baolu out: 31019abcf70SLu Baolu *rsvm = svm; 31119abcf70SLu Baolu *rsdev = sdev; 31219abcf70SLu Baolu 31319abcf70SLu Baolu return 0; 31419abcf70SLu Baolu } 31519abcf70SLu Baolu 31640483774SLu Baolu static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm, 31740483774SLu Baolu unsigned int flags) 318672cf6dfSJoerg Roedel { 31940483774SLu Baolu ioasid_t max_pasid = dev_is_pci(dev) ? 32040483774SLu Baolu pci_max_pasids(to_pci_dev(dev)) : intel_pasid_max_id; 32140483774SLu Baolu 32240483774SLu Baolu return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1); 32340483774SLu Baolu } 32440483774SLu Baolu 32540483774SLu Baolu static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, 32640483774SLu Baolu struct device *dev, 32740483774SLu Baolu struct mm_struct *mm, 32840483774SLu Baolu unsigned int flags) 32940483774SLu Baolu { 330586081d3SLu Baolu struct device_domain_info *info = dev_iommu_priv_get(dev); 33140483774SLu Baolu unsigned long iflags, sflags; 332672cf6dfSJoerg Roedel struct intel_svm_dev *sdev; 33340483774SLu Baolu struct intel_svm *svm; 33440483774SLu Baolu int ret = 0; 335672cf6dfSJoerg Roedel 33640483774SLu Baolu svm = pasid_private_find(mm->pasid); 33740483774SLu Baolu if (!svm) { 33840483774SLu Baolu svm = kzalloc(sizeof(*svm), GFP_KERNEL); 33940483774SLu Baolu if (!svm) 34040483774SLu Baolu return ERR_PTR(-ENOMEM); 341672cf6dfSJoerg Roedel 34240483774SLu Baolu svm->pasid = mm->pasid; 34340483774SLu Baolu svm->mm = mm; 34440483774SLu Baolu svm->flags = flags; 34540483774SLu Baolu INIT_LIST_HEAD_RCU(&svm->devs); 346672cf6dfSJoerg Roedel 34740483774SLu Baolu if (!(flags & SVM_FLAG_SUPERVISOR_MODE)) { 34840483774SLu Baolu svm->notifier.ops = &intel_mmuops; 34940483774SLu Baolu ret = mmu_notifier_register(&svm->notifier, mm); 35040483774SLu Baolu if (ret) { 35140483774SLu Baolu kfree(svm); 35240483774SLu Baolu return ERR_PTR(ret); 353672cf6dfSJoerg Roedel } 354672cf6dfSJoerg Roedel } 355672cf6dfSJoerg Roedel 35640483774SLu Baolu ret = pasid_private_add(svm->pasid, svm); 35740483774SLu Baolu if (ret) { 35840483774SLu Baolu if (svm->notifier.ops) 35940483774SLu Baolu mmu_notifier_unregister(&svm->notifier, mm); 36040483774SLu Baolu kfree(svm); 36140483774SLu Baolu return ERR_PTR(ret); 36240483774SLu Baolu } 363672cf6dfSJoerg Roedel } 364672cf6dfSJoerg Roedel 365672cf6dfSJoerg Roedel /* Find the matching device in svm list */ 3669e52cc0fSLu Baolu sdev = svm_lookup_device_by_dev(svm, dev); 3679e52cc0fSLu Baolu if (sdev) { 368672cf6dfSJoerg Roedel sdev->users++; 369672cf6dfSJoerg Roedel goto success; 370672cf6dfSJoerg Roedel } 371672cf6dfSJoerg Roedel 372672cf6dfSJoerg Roedel sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); 373672cf6dfSJoerg Roedel if (!sdev) { 374672cf6dfSJoerg Roedel ret = -ENOMEM; 37540483774SLu Baolu goto free_svm; 376672cf6dfSJoerg Roedel } 37740483774SLu Baolu 378672cf6dfSJoerg Roedel sdev->dev = dev; 3799ad9f45bSLiu Yi L sdev->iommu = iommu; 380672cf6dfSJoerg Roedel sdev->did = FLPT_DEFAULT_DID; 381672cf6dfSJoerg Roedel sdev->sid = PCI_DEVID(info->bus, info->devfn); 38240483774SLu Baolu sdev->users = 1; 38340483774SLu Baolu sdev->pasid = svm->pasid; 38440483774SLu Baolu sdev->sva.dev = dev; 38540483774SLu Baolu init_rcu_head(&sdev->rcu); 386672cf6dfSJoerg Roedel if (info->ats_enabled) { 387672cf6dfSJoerg Roedel sdev->dev_iotlb = 1; 388672cf6dfSJoerg Roedel sdev->qdep = info->ats_qdep; 389672cf6dfSJoerg Roedel if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS) 390672cf6dfSJoerg Roedel sdev->qdep = 0; 391672cf6dfSJoerg Roedel } 392672cf6dfSJoerg Roedel 39340483774SLu Baolu /* Setup the pasid table: */ 39440483774SLu Baolu sflags = (flags & SVM_FLAG_SUPERVISOR_MODE) ? 39540483774SLu Baolu PASID_FLAG_SUPERVISOR_MODE : 0; 39640483774SLu Baolu sflags |= cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; 397420d42f6SLu Baolu spin_lock_irqsave(&iommu->lock, iflags); 39840483774SLu Baolu ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid, 39940483774SLu Baolu FLPT_DEFAULT_DID, sflags); 400420d42f6SLu Baolu spin_unlock_irqrestore(&iommu->lock, iflags); 401672cf6dfSJoerg Roedel 40240483774SLu Baolu if (ret) 40340483774SLu Baolu goto free_sdev; 40440483774SLu Baolu 405672cf6dfSJoerg Roedel list_add_rcu(&sdev->list, &svm->devs); 406672cf6dfSJoerg Roedel success: 40740483774SLu Baolu return &sdev->sva; 40840483774SLu Baolu 40940483774SLu Baolu free_sdev: 41040483774SLu Baolu kfree(sdev); 41140483774SLu Baolu free_svm: 41240483774SLu Baolu if (list_empty(&svm->devs)) { 41340483774SLu Baolu if (svm->notifier.ops) 41440483774SLu Baolu mmu_notifier_unregister(&svm->notifier, mm); 41540483774SLu Baolu pasid_private_remove(mm->pasid); 41640483774SLu Baolu kfree(svm); 41740483774SLu Baolu } 41840483774SLu Baolu 41940483774SLu Baolu return ERR_PTR(ret); 420672cf6dfSJoerg Roedel } 421672cf6dfSJoerg Roedel 422672cf6dfSJoerg Roedel /* Caller must hold pasid_mutex */ 423c7b6bac9SFenghua Yu static int intel_svm_unbind_mm(struct device *dev, u32 pasid) 424672cf6dfSJoerg Roedel { 425672cf6dfSJoerg Roedel struct intel_svm_dev *sdev; 426672cf6dfSJoerg Roedel struct intel_iommu *iommu; 427672cf6dfSJoerg Roedel struct intel_svm *svm; 42840483774SLu Baolu struct mm_struct *mm; 429672cf6dfSJoerg Roedel int ret = -EINVAL; 430672cf6dfSJoerg Roedel 431dd6692f1SLu Baolu iommu = device_to_iommu(dev, NULL, NULL); 432672cf6dfSJoerg Roedel if (!iommu) 433672cf6dfSJoerg Roedel goto out; 434672cf6dfSJoerg Roedel 43519abcf70SLu Baolu ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev); 43619abcf70SLu Baolu if (ret) 437672cf6dfSJoerg Roedel goto out; 43840483774SLu Baolu mm = svm->mm; 439672cf6dfSJoerg Roedel 44019abcf70SLu Baolu if (sdev) { 441672cf6dfSJoerg Roedel sdev->users--; 442672cf6dfSJoerg Roedel if (!sdev->users) { 443672cf6dfSJoerg Roedel list_del_rcu(&sdev->list); 444672cf6dfSJoerg Roedel /* Flush the PASID cache and IOTLB for this device. 445672cf6dfSJoerg Roedel * Note that we do depend on the hardware *not* using 446672cf6dfSJoerg Roedel * the PASID any more. Just as we depend on other 447672cf6dfSJoerg Roedel * devices never using PASIDs that they have no right 448672cf6dfSJoerg Roedel * to use. We have a *shared* PASID table, because it's 449672cf6dfSJoerg Roedel * large and has to be physically contiguous. So it's 450672cf6dfSJoerg Roedel * hard to be as defensive as we might like. */ 451672cf6dfSJoerg Roedel intel_pasid_tear_down_entry(iommu, dev, 452672cf6dfSJoerg Roedel svm->pasid, false); 453672cf6dfSJoerg Roedel intel_svm_drain_prq(dev, svm->pasid); 454672cf6dfSJoerg Roedel kfree_rcu(sdev, rcu); 455672cf6dfSJoerg Roedel 456672cf6dfSJoerg Roedel if (list_empty(&svm->devs)) { 45700ecd540SFenghua Yu if (svm->notifier.ops) 45840483774SLu Baolu mmu_notifier_unregister(&svm->notifier, mm); 459100b8a14SLu Baolu pasid_private_remove(svm->pasid); 460672cf6dfSJoerg Roedel /* We mandate that no page faults may be outstanding 461672cf6dfSJoerg Roedel * for the PASID when intel_svm_unbind_mm() is called. 462672cf6dfSJoerg Roedel * If that is not obeyed, subtle errors will happen. 463672cf6dfSJoerg Roedel * Let's make them less subtle... */ 464672cf6dfSJoerg Roedel memset(svm, 0x6b, sizeof(*svm)); 465672cf6dfSJoerg Roedel kfree(svm); 466672cf6dfSJoerg Roedel } 467672cf6dfSJoerg Roedel } 468672cf6dfSJoerg Roedel } 469672cf6dfSJoerg Roedel out: 470672cf6dfSJoerg Roedel return ret; 471672cf6dfSJoerg Roedel } 472672cf6dfSJoerg Roedel 473672cf6dfSJoerg Roedel /* Page request queue descriptor */ 474672cf6dfSJoerg Roedel struct page_req_dsc { 475672cf6dfSJoerg Roedel union { 476672cf6dfSJoerg Roedel struct { 477672cf6dfSJoerg Roedel u64 type:8; 478672cf6dfSJoerg Roedel u64 pasid_present:1; 479672cf6dfSJoerg Roedel u64 priv_data_present:1; 480672cf6dfSJoerg Roedel u64 rsvd:6; 481672cf6dfSJoerg Roedel u64 rid:16; 482672cf6dfSJoerg Roedel u64 pasid:20; 483672cf6dfSJoerg Roedel u64 exe_req:1; 484672cf6dfSJoerg Roedel u64 pm_req:1; 485672cf6dfSJoerg Roedel u64 rsvd2:10; 486672cf6dfSJoerg Roedel }; 487672cf6dfSJoerg Roedel u64 qw_0; 488672cf6dfSJoerg Roedel }; 489672cf6dfSJoerg Roedel union { 490672cf6dfSJoerg Roedel struct { 491672cf6dfSJoerg Roedel u64 rd_req:1; 492672cf6dfSJoerg Roedel u64 wr_req:1; 493672cf6dfSJoerg Roedel u64 lpig:1; 494672cf6dfSJoerg Roedel u64 prg_index:9; 495672cf6dfSJoerg Roedel u64 addr:52; 496672cf6dfSJoerg Roedel }; 497672cf6dfSJoerg Roedel u64 qw_1; 498672cf6dfSJoerg Roedel }; 499672cf6dfSJoerg Roedel u64 priv_data[2]; 500672cf6dfSJoerg Roedel }; 501672cf6dfSJoerg Roedel 502672cf6dfSJoerg Roedel static bool is_canonical_address(u64 addr) 503672cf6dfSJoerg Roedel { 504672cf6dfSJoerg Roedel int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1); 505672cf6dfSJoerg Roedel long saddr = (long) addr; 506672cf6dfSJoerg Roedel 507672cf6dfSJoerg Roedel return (((saddr << shift) >> shift) == saddr); 508672cf6dfSJoerg Roedel } 509672cf6dfSJoerg Roedel 510672cf6dfSJoerg Roedel /** 511672cf6dfSJoerg Roedel * intel_svm_drain_prq - Drain page requests and responses for a pasid 512672cf6dfSJoerg Roedel * @dev: target device 513672cf6dfSJoerg Roedel * @pasid: pasid for draining 514672cf6dfSJoerg Roedel * 515672cf6dfSJoerg Roedel * Drain all pending page requests and responses related to @pasid in both 516672cf6dfSJoerg Roedel * software and hardware. This is supposed to be called after the device 517672cf6dfSJoerg Roedel * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB 518672cf6dfSJoerg Roedel * and DevTLB have been invalidated. 519672cf6dfSJoerg Roedel * 520672cf6dfSJoerg Roedel * It waits until all pending page requests for @pasid in the page fault 521672cf6dfSJoerg Roedel * queue are completed by the prq handling thread. Then follow the steps 522672cf6dfSJoerg Roedel * described in VT-d spec CH7.10 to drain all page requests and page 523672cf6dfSJoerg Roedel * responses pending in the hardware. 524672cf6dfSJoerg Roedel */ 525c7b6bac9SFenghua Yu static void intel_svm_drain_prq(struct device *dev, u32 pasid) 526672cf6dfSJoerg Roedel { 527672cf6dfSJoerg Roedel struct device_domain_info *info; 528672cf6dfSJoerg Roedel struct dmar_domain *domain; 529672cf6dfSJoerg Roedel struct intel_iommu *iommu; 530672cf6dfSJoerg Roedel struct qi_desc desc[3]; 531672cf6dfSJoerg Roedel struct pci_dev *pdev; 532672cf6dfSJoerg Roedel int head, tail; 533672cf6dfSJoerg Roedel u16 sid, did; 534672cf6dfSJoerg Roedel int qdep; 535672cf6dfSJoerg Roedel 536586081d3SLu Baolu info = dev_iommu_priv_get(dev); 537672cf6dfSJoerg Roedel if (WARN_ON(!info || !dev_is_pci(dev))) 538672cf6dfSJoerg Roedel return; 539672cf6dfSJoerg Roedel 540672cf6dfSJoerg Roedel if (!info->pri_enabled) 541672cf6dfSJoerg Roedel return; 542672cf6dfSJoerg Roedel 543672cf6dfSJoerg Roedel iommu = info->iommu; 544672cf6dfSJoerg Roedel domain = info->domain; 545672cf6dfSJoerg Roedel pdev = to_pci_dev(dev); 546672cf6dfSJoerg Roedel sid = PCI_DEVID(info->bus, info->devfn); 547672cf6dfSJoerg Roedel did = domain->iommu_did[iommu->seq_id]; 548672cf6dfSJoerg Roedel qdep = pci_ats_queue_depth(pdev); 549672cf6dfSJoerg Roedel 550672cf6dfSJoerg Roedel /* 551672cf6dfSJoerg Roedel * Check and wait until all pending page requests in the queue are 552672cf6dfSJoerg Roedel * handled by the prq handling thread. 553672cf6dfSJoerg Roedel */ 554672cf6dfSJoerg Roedel prq_retry: 555672cf6dfSJoerg Roedel reinit_completion(&iommu->prq_complete); 556672cf6dfSJoerg Roedel tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; 557672cf6dfSJoerg Roedel head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; 558672cf6dfSJoerg Roedel while (head != tail) { 559672cf6dfSJoerg Roedel struct page_req_dsc *req; 560672cf6dfSJoerg Roedel 561672cf6dfSJoerg Roedel req = &iommu->prq[head / sizeof(*req)]; 562672cf6dfSJoerg Roedel if (!req->pasid_present || req->pasid != pasid) { 563672cf6dfSJoerg Roedel head = (head + sizeof(*req)) & PRQ_RING_MASK; 564672cf6dfSJoerg Roedel continue; 565672cf6dfSJoerg Roedel } 566672cf6dfSJoerg Roedel 567672cf6dfSJoerg Roedel wait_for_completion(&iommu->prq_complete); 568672cf6dfSJoerg Roedel goto prq_retry; 569672cf6dfSJoerg Roedel } 570672cf6dfSJoerg Roedel 5716ef05051SFenghua Yu /* 5726ef05051SFenghua Yu * A work in IO page fault workqueue may try to lock pasid_mutex now. 5736ef05051SFenghua Yu * Holding pasid_mutex while waiting in iopf_queue_flush_dev() for 5746ef05051SFenghua Yu * all works in the workqueue to finish may cause deadlock. 5756ef05051SFenghua Yu * 5766ef05051SFenghua Yu * It's unnecessary to hold pasid_mutex in iopf_queue_flush_dev(). 5776ef05051SFenghua Yu * Unlock it to allow the works to be handled while waiting for 5786ef05051SFenghua Yu * them to finish. 5796ef05051SFenghua Yu */ 5806ef05051SFenghua Yu lockdep_assert_held(&pasid_mutex); 5816ef05051SFenghua Yu mutex_unlock(&pasid_mutex); 582d5b9e4bfSLu Baolu iopf_queue_flush_dev(dev); 5836ef05051SFenghua Yu mutex_lock(&pasid_mutex); 584d5b9e4bfSLu Baolu 585672cf6dfSJoerg Roedel /* 586672cf6dfSJoerg Roedel * Perform steps described in VT-d spec CH7.10 to drain page 587672cf6dfSJoerg Roedel * requests and responses in hardware. 588672cf6dfSJoerg Roedel */ 589672cf6dfSJoerg Roedel memset(desc, 0, sizeof(desc)); 590672cf6dfSJoerg Roedel desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) | 591672cf6dfSJoerg Roedel QI_IWD_FENCE | 592672cf6dfSJoerg Roedel QI_IWD_TYPE; 593672cf6dfSJoerg Roedel desc[1].qw0 = QI_EIOTLB_PASID(pasid) | 594672cf6dfSJoerg Roedel QI_EIOTLB_DID(did) | 595672cf6dfSJoerg Roedel QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | 596672cf6dfSJoerg Roedel QI_EIOTLB_TYPE; 597672cf6dfSJoerg Roedel desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) | 598672cf6dfSJoerg Roedel QI_DEV_EIOTLB_SID(sid) | 599672cf6dfSJoerg Roedel QI_DEV_EIOTLB_QDEP(qdep) | 600672cf6dfSJoerg Roedel QI_DEIOTLB_TYPE | 601672cf6dfSJoerg Roedel QI_DEV_IOTLB_PFSID(info->pfsid); 602672cf6dfSJoerg Roedel qi_retry: 603672cf6dfSJoerg Roedel reinit_completion(&iommu->prq_complete); 604672cf6dfSJoerg Roedel qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN); 605672cf6dfSJoerg Roedel if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) { 606672cf6dfSJoerg Roedel wait_for_completion(&iommu->prq_complete); 607672cf6dfSJoerg Roedel goto qi_retry; 608672cf6dfSJoerg Roedel } 609672cf6dfSJoerg Roedel } 610672cf6dfSJoerg Roedel 611eb8d93eaSLu Baolu static int prq_to_iommu_prot(struct page_req_dsc *req) 612eb8d93eaSLu Baolu { 613eb8d93eaSLu Baolu int prot = 0; 614eb8d93eaSLu Baolu 615eb8d93eaSLu Baolu if (req->rd_req) 616eb8d93eaSLu Baolu prot |= IOMMU_FAULT_PERM_READ; 617eb8d93eaSLu Baolu if (req->wr_req) 618eb8d93eaSLu Baolu prot |= IOMMU_FAULT_PERM_WRITE; 619eb8d93eaSLu Baolu if (req->exe_req) 620eb8d93eaSLu Baolu prot |= IOMMU_FAULT_PERM_EXEC; 621eb8d93eaSLu Baolu if (req->pm_req) 622eb8d93eaSLu Baolu prot |= IOMMU_FAULT_PERM_PRIV; 623eb8d93eaSLu Baolu 624eb8d93eaSLu Baolu return prot; 625eb8d93eaSLu Baolu } 626eb8d93eaSLu Baolu 6270f4834abSLu Baolu static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev, 6280f4834abSLu Baolu struct page_req_dsc *desc) 629eb8d93eaSLu Baolu { 630eb8d93eaSLu Baolu struct iommu_fault_event event; 631eb8d93eaSLu Baolu 632eb8d93eaSLu Baolu if (!dev || !dev_is_pci(dev)) 633eb8d93eaSLu Baolu return -ENODEV; 634eb8d93eaSLu Baolu 635eb8d93eaSLu Baolu /* Fill in event data for device specific processing */ 636eb8d93eaSLu Baolu memset(&event, 0, sizeof(struct iommu_fault_event)); 637eb8d93eaSLu Baolu event.fault.type = IOMMU_FAULT_PAGE_REQ; 63803d20509SLu Baolu event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT; 639eb8d93eaSLu Baolu event.fault.prm.pasid = desc->pasid; 640eb8d93eaSLu Baolu event.fault.prm.grpid = desc->prg_index; 641eb8d93eaSLu Baolu event.fault.prm.perm = prq_to_iommu_prot(desc); 642eb8d93eaSLu Baolu 643eb8d93eaSLu Baolu if (desc->lpig) 644eb8d93eaSLu Baolu event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; 645eb8d93eaSLu Baolu if (desc->pasid_present) { 646eb8d93eaSLu Baolu event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; 647eb8d93eaSLu Baolu event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID; 648eb8d93eaSLu Baolu } 649eb8d93eaSLu Baolu if (desc->priv_data_present) { 650eb8d93eaSLu Baolu /* 651eb8d93eaSLu Baolu * Set last page in group bit if private data is present, 652eb8d93eaSLu Baolu * page response is required as it does for LPIG. 653eb8d93eaSLu Baolu * iommu_report_device_fault() doesn't understand this vendor 654eb8d93eaSLu Baolu * specific requirement thus we set last_page as a workaround. 655eb8d93eaSLu Baolu */ 656eb8d93eaSLu Baolu event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; 657eb8d93eaSLu Baolu event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; 658606636dcSGustavo A. R. Silva event.fault.prm.private_data[0] = desc->priv_data[0]; 659606636dcSGustavo A. R. Silva event.fault.prm.private_data[1] = desc->priv_data[1]; 6600f4834abSLu Baolu } else if (dmar_latency_enabled(iommu, DMAR_LATENCY_PRQ)) { 6610f4834abSLu Baolu /* 6620f4834abSLu Baolu * If the private data fields are not used by hardware, use it 6630f4834abSLu Baolu * to monitor the prq handle latency. 6640f4834abSLu Baolu */ 6650f4834abSLu Baolu event.fault.prm.private_data[0] = ktime_to_ns(ktime_get()); 666eb8d93eaSLu Baolu } 667eb8d93eaSLu Baolu 668eb8d93eaSLu Baolu return iommu_report_device_fault(dev, &event); 669eb8d93eaSLu Baolu } 670eb8d93eaSLu Baolu 671ae7f09b1SLu Baolu static void handle_bad_prq_event(struct intel_iommu *iommu, 672ae7f09b1SLu Baolu struct page_req_dsc *req, int result) 673672cf6dfSJoerg Roedel { 674ae7f09b1SLu Baolu struct qi_desc desc; 675672cf6dfSJoerg Roedel 676ae7f09b1SLu Baolu pr_err("%s: Invalid page request: %08llx %08llx\n", 677672cf6dfSJoerg Roedel iommu->name, ((unsigned long long *)req)[0], 678672cf6dfSJoerg Roedel ((unsigned long long *)req)[1]); 679672cf6dfSJoerg Roedel 680eb8d93eaSLu Baolu /* 681ae7f09b1SLu Baolu * Per VT-d spec. v3.0 ch7.7, system software must 682ae7f09b1SLu Baolu * respond with page group response if private data 683ae7f09b1SLu Baolu * is present (PDP) or last page in group (LPIG) bit 684ae7f09b1SLu Baolu * is set. This is an additional VT-d feature beyond 685ae7f09b1SLu Baolu * PCI ATS spec. 686eb8d93eaSLu Baolu */ 687ae7f09b1SLu Baolu if (!req->lpig && !req->priv_data_present) 688ae7f09b1SLu Baolu return; 689ae7f09b1SLu Baolu 690ae7f09b1SLu Baolu desc.qw0 = QI_PGRP_PASID(req->pasid) | 691ae7f09b1SLu Baolu QI_PGRP_DID(req->rid) | 692ae7f09b1SLu Baolu QI_PGRP_PASID_P(req->pasid_present) | 693ae7f09b1SLu Baolu QI_PGRP_PDP(req->priv_data_present) | 694ae7f09b1SLu Baolu QI_PGRP_RESP_CODE(result) | 695ae7f09b1SLu Baolu QI_PGRP_RESP_TYPE; 696ae7f09b1SLu Baolu desc.qw1 = QI_PGRP_IDX(req->prg_index) | 697ae7f09b1SLu Baolu QI_PGRP_LPIG(req->lpig); 698606636dcSGustavo A. R. Silva 699606636dcSGustavo A. R. Silva if (req->priv_data_present) { 700606636dcSGustavo A. R. Silva desc.qw2 = req->priv_data[0]; 701606636dcSGustavo A. R. Silva desc.qw3 = req->priv_data[1]; 702606636dcSGustavo A. R. Silva } else { 703ae7f09b1SLu Baolu desc.qw2 = 0; 704ae7f09b1SLu Baolu desc.qw3 = 0; 705606636dcSGustavo A. R. Silva } 706ae7f09b1SLu Baolu 707ae7f09b1SLu Baolu qi_submit_sync(iommu, &desc, 1, 0); 708eb8d93eaSLu Baolu } 709eb8d93eaSLu Baolu 710ae7f09b1SLu Baolu static irqreturn_t prq_event_thread(int irq, void *d) 711ae7f09b1SLu Baolu { 712ae7f09b1SLu Baolu struct intel_svm_dev *sdev = NULL; 713ae7f09b1SLu Baolu struct intel_iommu *iommu = d; 714ae7f09b1SLu Baolu struct intel_svm *svm = NULL; 715ae7f09b1SLu Baolu struct page_req_dsc *req; 716ae7f09b1SLu Baolu int head, tail, handled; 717ae7f09b1SLu Baolu u64 address; 718ae7f09b1SLu Baolu 719ae7f09b1SLu Baolu /* 720ae7f09b1SLu Baolu * Clear PPR bit before reading head/tail registers, to ensure that 721ae7f09b1SLu Baolu * we get a new interrupt if needed. 722ae7f09b1SLu Baolu */ 723ae7f09b1SLu Baolu writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG); 724ae7f09b1SLu Baolu 725ae7f09b1SLu Baolu tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; 726ae7f09b1SLu Baolu head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; 727ae7f09b1SLu Baolu handled = (head != tail); 728ae7f09b1SLu Baolu while (head != tail) { 729ae7f09b1SLu Baolu req = &iommu->prq[head / sizeof(*req)]; 730ae7f09b1SLu Baolu address = (u64)req->addr << VTD_PAGE_SHIFT; 731ae7f09b1SLu Baolu 732ae7f09b1SLu Baolu if (unlikely(!req->pasid_present)) { 733ae7f09b1SLu Baolu pr_err("IOMMU: %s: Page request without PASID\n", 734ae7f09b1SLu Baolu iommu->name); 735ae7f09b1SLu Baolu bad_req: 736ae7f09b1SLu Baolu svm = NULL; 737ae7f09b1SLu Baolu sdev = NULL; 738ae7f09b1SLu Baolu handle_bad_prq_event(iommu, req, QI_RESP_INVALID); 739ae7f09b1SLu Baolu goto prq_advance; 740ae7f09b1SLu Baolu } 741ae7f09b1SLu Baolu 742ae7f09b1SLu Baolu if (unlikely(!is_canonical_address(address))) { 743ae7f09b1SLu Baolu pr_err("IOMMU: %s: Address is not canonical\n", 744ae7f09b1SLu Baolu iommu->name); 745ae7f09b1SLu Baolu goto bad_req; 746ae7f09b1SLu Baolu } 747ae7f09b1SLu Baolu 748ae7f09b1SLu Baolu if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) { 749ae7f09b1SLu Baolu pr_err("IOMMU: %s: Page request in Privilege Mode\n", 750ae7f09b1SLu Baolu iommu->name); 751ae7f09b1SLu Baolu goto bad_req; 752ae7f09b1SLu Baolu } 753ae7f09b1SLu Baolu 754ae7f09b1SLu Baolu if (unlikely(req->exe_req && req->rd_req)) { 755ae7f09b1SLu Baolu pr_err("IOMMU: %s: Execution request not supported\n", 756ae7f09b1SLu Baolu iommu->name); 757ae7f09b1SLu Baolu goto bad_req; 758ae7f09b1SLu Baolu } 759ae7f09b1SLu Baolu 760da8669ffSLu Baolu /* Drop Stop Marker message. No need for a response. */ 761da8669ffSLu Baolu if (unlikely(req->lpig && !req->rd_req && !req->wr_req)) 762da8669ffSLu Baolu goto prq_advance; 763da8669ffSLu Baolu 764ae7f09b1SLu Baolu if (!svm || svm->pasid != req->pasid) { 765ae7f09b1SLu Baolu /* 766ae7f09b1SLu Baolu * It can't go away, because the driver is not permitted 767ae7f09b1SLu Baolu * to unbind the mm while any page faults are outstanding. 768ae7f09b1SLu Baolu */ 769ae7f09b1SLu Baolu svm = pasid_private_find(req->pasid); 770ae7f09b1SLu Baolu if (IS_ERR_OR_NULL(svm) || (svm->flags & SVM_FLAG_SUPERVISOR_MODE)) 771ae7f09b1SLu Baolu goto bad_req; 772ae7f09b1SLu Baolu } 773ae7f09b1SLu Baolu 774ae7f09b1SLu Baolu if (!sdev || sdev->sid != req->rid) { 775ae7f09b1SLu Baolu sdev = svm_lookup_device_by_sid(svm, req->rid); 776ae7f09b1SLu Baolu if (!sdev) 777ae7f09b1SLu Baolu goto bad_req; 778ae7f09b1SLu Baolu } 779ae7f09b1SLu Baolu 780e93a67f5SLu Baolu sdev->prq_seq_number++; 781e93a67f5SLu Baolu 782ae7f09b1SLu Baolu /* 783ae7f09b1SLu Baolu * If prq is to be handled outside iommu driver via receiver of 784ae7f09b1SLu Baolu * the fault notifiers, we skip the page response here. 785ae7f09b1SLu Baolu */ 7860f4834abSLu Baolu if (intel_svm_prq_report(iommu, sdev->dev, req)) 787d5b9e4bfSLu Baolu handle_bad_prq_event(iommu, req, QI_RESP_INVALID); 788e93a67f5SLu Baolu 789e93a67f5SLu Baolu trace_prq_report(iommu, sdev->dev, req->qw_0, req->qw_1, 790e93a67f5SLu Baolu req->priv_data[0], req->priv_data[1], 791e93a67f5SLu Baolu sdev->prq_seq_number); 792eb8d93eaSLu Baolu prq_advance: 793672cf6dfSJoerg Roedel head = (head + sizeof(*req)) & PRQ_RING_MASK; 794672cf6dfSJoerg Roedel } 795672cf6dfSJoerg Roedel 796672cf6dfSJoerg Roedel dmar_writeq(iommu->reg + DMAR_PQH_REG, tail); 797672cf6dfSJoerg Roedel 798672cf6dfSJoerg Roedel /* 799672cf6dfSJoerg Roedel * Clear the page request overflow bit and wake up all threads that 800672cf6dfSJoerg Roedel * are waiting for the completion of this handling. 801672cf6dfSJoerg Roedel */ 80228a77185SLu Baolu if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) { 80328a77185SLu Baolu pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n", 80428a77185SLu Baolu iommu->name); 80528a77185SLu Baolu head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; 80628a77185SLu Baolu tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; 80728a77185SLu Baolu if (head == tail) { 808d5b9e4bfSLu Baolu iopf_queue_discard_partial(iommu->iopf_queue); 809672cf6dfSJoerg Roedel writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG); 81028a77185SLu Baolu pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared", 81128a77185SLu Baolu iommu->name); 81228a77185SLu Baolu } 81328a77185SLu Baolu } 814672cf6dfSJoerg Roedel 815672cf6dfSJoerg Roedel if (!completion_done(&iommu->prq_complete)) 816672cf6dfSJoerg Roedel complete(&iommu->prq_complete); 817672cf6dfSJoerg Roedel 818672cf6dfSJoerg Roedel return IRQ_RETVAL(handled); 819672cf6dfSJoerg Roedel } 820672cf6dfSJoerg Roedel 82140483774SLu Baolu struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) 822672cf6dfSJoerg Roedel { 82340483774SLu Baolu struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); 8242a5054c6SFenghua Yu unsigned int flags = 0; 82540483774SLu Baolu struct iommu_sva *sva; 826672cf6dfSJoerg Roedel int ret; 827672cf6dfSJoerg Roedel 828672cf6dfSJoerg Roedel if (drvdata) 8292a5054c6SFenghua Yu flags = *(unsigned int *)drvdata; 830672cf6dfSJoerg Roedel 83140483774SLu Baolu if (flags & SVM_FLAG_SUPERVISOR_MODE) { 83240483774SLu Baolu if (!ecap_srs(iommu->ecap)) { 83340483774SLu Baolu dev_err(dev, "%s: Supervisor PASID not supported\n", 83440483774SLu Baolu iommu->name); 83540483774SLu Baolu return ERR_PTR(-EOPNOTSUPP); 83640483774SLu Baolu } 83740483774SLu Baolu 83840483774SLu Baolu if (mm) { 83940483774SLu Baolu dev_err(dev, "%s: Supervisor PASID with user provided mm\n", 84040483774SLu Baolu iommu->name); 84140483774SLu Baolu return ERR_PTR(-EINVAL); 84240483774SLu Baolu } 84340483774SLu Baolu 84440483774SLu Baolu mm = &init_mm; 84540483774SLu Baolu } 84640483774SLu Baolu 84740483774SLu Baolu mutex_lock(&pasid_mutex); 84840483774SLu Baolu ret = intel_svm_alloc_pasid(dev, mm, flags); 84940483774SLu Baolu if (ret) { 85040483774SLu Baolu mutex_unlock(&pasid_mutex); 85140483774SLu Baolu return ERR_PTR(ret); 85240483774SLu Baolu } 85340483774SLu Baolu 85440483774SLu Baolu sva = intel_svm_bind_mm(iommu, dev, mm, flags); 855672cf6dfSJoerg Roedel mutex_unlock(&pasid_mutex); 856672cf6dfSJoerg Roedel 857672cf6dfSJoerg Roedel return sva; 858672cf6dfSJoerg Roedel } 859672cf6dfSJoerg Roedel 860672cf6dfSJoerg Roedel void intel_svm_unbind(struct iommu_sva *sva) 861672cf6dfSJoerg Roedel { 86240483774SLu Baolu struct intel_svm_dev *sdev = to_intel_svm_dev(sva); 863672cf6dfSJoerg Roedel 864672cf6dfSJoerg Roedel mutex_lock(&pasid_mutex); 865672cf6dfSJoerg Roedel intel_svm_unbind_mm(sdev->dev, sdev->pasid); 866672cf6dfSJoerg Roedel mutex_unlock(&pasid_mutex); 867672cf6dfSJoerg Roedel } 868672cf6dfSJoerg Roedel 869c7b6bac9SFenghua Yu u32 intel_svm_get_pasid(struct iommu_sva *sva) 870672cf6dfSJoerg Roedel { 871672cf6dfSJoerg Roedel struct intel_svm_dev *sdev; 872c7b6bac9SFenghua Yu u32 pasid; 873672cf6dfSJoerg Roedel 874672cf6dfSJoerg Roedel mutex_lock(&pasid_mutex); 875672cf6dfSJoerg Roedel sdev = to_intel_svm_dev(sva); 876672cf6dfSJoerg Roedel pasid = sdev->pasid; 877672cf6dfSJoerg Roedel mutex_unlock(&pasid_mutex); 878672cf6dfSJoerg Roedel 879672cf6dfSJoerg Roedel return pasid; 880672cf6dfSJoerg Roedel } 8818b737121SLu Baolu 8828b737121SLu Baolu int intel_svm_page_response(struct device *dev, 8838b737121SLu Baolu struct iommu_fault_event *evt, 8848b737121SLu Baolu struct iommu_page_response *msg) 8858b737121SLu Baolu { 8868b737121SLu Baolu struct iommu_fault_page_request *prm; 8878b737121SLu Baolu struct intel_svm_dev *sdev = NULL; 8888b737121SLu Baolu struct intel_svm *svm = NULL; 8898b737121SLu Baolu struct intel_iommu *iommu; 8908b737121SLu Baolu bool private_present; 8918b737121SLu Baolu bool pasid_present; 8928b737121SLu Baolu bool last_page; 8938b737121SLu Baolu u8 bus, devfn; 8948b737121SLu Baolu int ret = 0; 8958b737121SLu Baolu u16 sid; 8968b737121SLu Baolu 8978b737121SLu Baolu if (!dev || !dev_is_pci(dev)) 8988b737121SLu Baolu return -ENODEV; 8998b737121SLu Baolu 9008b737121SLu Baolu iommu = device_to_iommu(dev, &bus, &devfn); 9018b737121SLu Baolu if (!iommu) 9028b737121SLu Baolu return -ENODEV; 9038b737121SLu Baolu 9048b737121SLu Baolu if (!msg || !evt) 9058b737121SLu Baolu return -EINVAL; 9068b737121SLu Baolu 9078b737121SLu Baolu mutex_lock(&pasid_mutex); 9088b737121SLu Baolu 9098b737121SLu Baolu prm = &evt->fault.prm; 9108b737121SLu Baolu sid = PCI_DEVID(bus, devfn); 9118b737121SLu Baolu pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; 9128b737121SLu Baolu private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; 9138b737121SLu Baolu last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; 9148b737121SLu Baolu 9158b737121SLu Baolu if (!pasid_present) { 9168b737121SLu Baolu ret = -EINVAL; 9178b737121SLu Baolu goto out; 9188b737121SLu Baolu } 9198b737121SLu Baolu 9208b737121SLu Baolu if (prm->pasid == 0 || prm->pasid >= PASID_MAX) { 9218b737121SLu Baolu ret = -EINVAL; 9228b737121SLu Baolu goto out; 9238b737121SLu Baolu } 9248b737121SLu Baolu 9258b737121SLu Baolu ret = pasid_to_svm_sdev(dev, prm->pasid, &svm, &sdev); 9268b737121SLu Baolu if (ret || !sdev) { 9278b737121SLu Baolu ret = -ENODEV; 9288b737121SLu Baolu goto out; 9298b737121SLu Baolu } 9308b737121SLu Baolu 9318b737121SLu Baolu /* 9328b737121SLu Baolu * Per VT-d spec. v3.0 ch7.7, system software must respond 9338b737121SLu Baolu * with page group response if private data is present (PDP) 9348b737121SLu Baolu * or last page in group (LPIG) bit is set. This is an 9358b737121SLu Baolu * additional VT-d requirement beyond PCI ATS spec. 9368b737121SLu Baolu */ 9378b737121SLu Baolu if (last_page || private_present) { 9388b737121SLu Baolu struct qi_desc desc; 9398b737121SLu Baolu 9408b737121SLu Baolu desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) | 9418b737121SLu Baolu QI_PGRP_PASID_P(pasid_present) | 9428b737121SLu Baolu QI_PGRP_PDP(private_present) | 9438b737121SLu Baolu QI_PGRP_RESP_CODE(msg->code) | 9448b737121SLu Baolu QI_PGRP_RESP_TYPE; 9458b737121SLu Baolu desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page); 9468b737121SLu Baolu desc.qw2 = 0; 9478b737121SLu Baolu desc.qw3 = 0; 948606636dcSGustavo A. R. Silva 949606636dcSGustavo A. R. Silva if (private_present) { 950606636dcSGustavo A. R. Silva desc.qw2 = prm->private_data[0]; 951606636dcSGustavo A. R. Silva desc.qw3 = prm->private_data[1]; 952606636dcSGustavo A. R. Silva } else if (prm->private_data[0]) { 9530f4834abSLu Baolu dmar_latency_update(iommu, DMAR_LATENCY_PRQ, 9540f4834abSLu Baolu ktime_to_ns(ktime_get()) - prm->private_data[0]); 955606636dcSGustavo A. R. Silva } 9568b737121SLu Baolu 9578b737121SLu Baolu qi_submit_sync(iommu, &desc, 1, 0); 9588b737121SLu Baolu } 9598b737121SLu Baolu out: 9608b737121SLu Baolu mutex_unlock(&pasid_mutex); 9618b737121SLu Baolu return ret; 9628b737121SLu Baolu } 963