1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright © 2015 Intel Corporation. 4 * 5 * Authors: David Woodhouse <dwmw2@infradead.org> 6 */ 7 8 #include <linux/mmu_notifier.h> 9 #include <linux/sched.h> 10 #include <linux/sched/mm.h> 11 #include <linux/slab.h> 12 #include <linux/intel-svm.h> 13 #include <linux/rculist.h> 14 #include <linux/pci.h> 15 #include <linux/pci-ats.h> 16 #include <linux/dmar.h> 17 #include <linux/interrupt.h> 18 #include <linux/mm_types.h> 19 #include <linux/xarray.h> 20 #include <linux/ioasid.h> 21 #include <asm/page.h> 22 #include <asm/fpu/api.h> 23 24 #include "iommu.h" 25 #include "pasid.h" 26 #include "perf.h" 27 #include "../iommu-sva-lib.h" 28 #include "trace.h" 29 30 static irqreturn_t prq_event_thread(int irq, void *d); 31 static void intel_svm_drain_prq(struct device *dev, u32 pasid); 32 #define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva) 33 34 static DEFINE_XARRAY_ALLOC(pasid_private_array); 35 static int pasid_private_add(ioasid_t pasid, void *priv) 36 { 37 return xa_alloc(&pasid_private_array, &pasid, priv, 38 XA_LIMIT(pasid, pasid), GFP_ATOMIC); 39 } 40 41 static void pasid_private_remove(ioasid_t pasid) 42 { 43 xa_erase(&pasid_private_array, pasid); 44 } 45 46 static void *pasid_private_find(ioasid_t pasid) 47 { 48 return xa_load(&pasid_private_array, pasid); 49 } 50 51 static struct intel_svm_dev * 52 svm_lookup_device_by_sid(struct intel_svm *svm, u16 sid) 53 { 54 struct intel_svm_dev *sdev = NULL, *t; 55 56 rcu_read_lock(); 57 list_for_each_entry_rcu(t, &svm->devs, list) { 58 if (t->sid == sid) { 59 sdev = t; 60 break; 61 } 62 } 63 rcu_read_unlock(); 64 65 return sdev; 66 } 67 68 static struct intel_svm_dev * 69 svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev) 70 { 71 struct intel_svm_dev *sdev = NULL, *t; 72 73 rcu_read_lock(); 74 list_for_each_entry_rcu(t, &svm->devs, list) { 75 if (t->dev == dev) { 76 sdev = t; 77 break; 78 } 79 } 80 rcu_read_unlock(); 81 82 return sdev; 83 } 84 85 int intel_svm_enable_prq(struct intel_iommu *iommu) 86 { 87 struct iopf_queue *iopfq; 88 struct page *pages; 89 int irq, ret; 90 91 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER); 92 if (!pages) { 93 pr_warn("IOMMU: %s: Failed to allocate page request queue\n", 94 iommu->name); 95 return -ENOMEM; 96 } 97 iommu->prq = page_address(pages); 98 99 irq = dmar_alloc_hwirq(DMAR_UNITS_SUPPORTED + iommu->seq_id, iommu->node, iommu); 100 if (irq <= 0) { 101 pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n", 102 iommu->name); 103 ret = -EINVAL; 104 goto free_prq; 105 } 106 iommu->pr_irq = irq; 107 108 snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name), 109 "dmar%d-iopfq", iommu->seq_id); 110 iopfq = iopf_queue_alloc(iommu->iopfq_name); 111 if (!iopfq) { 112 pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name); 113 ret = -ENOMEM; 114 goto free_hwirq; 115 } 116 iommu->iopf_queue = iopfq; 117 118 snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id); 119 120 ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT, 121 iommu->prq_name, iommu); 122 if (ret) { 123 pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n", 124 iommu->name); 125 goto free_iopfq; 126 } 127 dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); 128 dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); 129 dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER); 130 131 init_completion(&iommu->prq_complete); 132 133 return 0; 134 135 free_iopfq: 136 iopf_queue_free(iommu->iopf_queue); 137 iommu->iopf_queue = NULL; 138 free_hwirq: 139 dmar_free_hwirq(irq); 140 iommu->pr_irq = 0; 141 free_prq: 142 free_pages((unsigned long)iommu->prq, PRQ_ORDER); 143 iommu->prq = NULL; 144 145 return ret; 146 } 147 148 int intel_svm_finish_prq(struct intel_iommu *iommu) 149 { 150 dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); 151 dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); 152 dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL); 153 154 if (iommu->pr_irq) { 155 free_irq(iommu->pr_irq, iommu); 156 dmar_free_hwirq(iommu->pr_irq); 157 iommu->pr_irq = 0; 158 } 159 160 if (iommu->iopf_queue) { 161 iopf_queue_free(iommu->iopf_queue); 162 iommu->iopf_queue = NULL; 163 } 164 165 free_pages((unsigned long)iommu->prq, PRQ_ORDER); 166 iommu->prq = NULL; 167 168 return 0; 169 } 170 171 void intel_svm_check(struct intel_iommu *iommu) 172 { 173 if (!pasid_supported(iommu)) 174 return; 175 176 if (cpu_feature_enabled(X86_FEATURE_GBPAGES) && 177 !cap_fl1gp_support(iommu->cap)) { 178 pr_err("%s SVM disabled, incompatible 1GB page capability\n", 179 iommu->name); 180 return; 181 } 182 183 if (cpu_feature_enabled(X86_FEATURE_LA57) && 184 !cap_5lp_support(iommu->cap)) { 185 pr_err("%s SVM disabled, incompatible paging mode\n", 186 iommu->name); 187 return; 188 } 189 190 iommu->flags |= VTD_FLAG_SVM_CAPABLE; 191 } 192 193 static void __flush_svm_range_dev(struct intel_svm *svm, 194 struct intel_svm_dev *sdev, 195 unsigned long address, 196 unsigned long pages, int ih) 197 { 198 struct device_domain_info *info = dev_iommu_priv_get(sdev->dev); 199 200 if (WARN_ON(!pages)) 201 return; 202 203 qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih); 204 if (info->ats_enabled) 205 qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid, 206 svm->pasid, sdev->qdep, address, 207 order_base_2(pages)); 208 } 209 210 static void intel_flush_svm_range_dev(struct intel_svm *svm, 211 struct intel_svm_dev *sdev, 212 unsigned long address, 213 unsigned long pages, int ih) 214 { 215 unsigned long shift = ilog2(__roundup_pow_of_two(pages)); 216 unsigned long align = (1ULL << (VTD_PAGE_SHIFT + shift)); 217 unsigned long start = ALIGN_DOWN(address, align); 218 unsigned long end = ALIGN(address + (pages << VTD_PAGE_SHIFT), align); 219 220 while (start < end) { 221 __flush_svm_range_dev(svm, sdev, start, align >> VTD_PAGE_SHIFT, ih); 222 start += align; 223 } 224 } 225 226 static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address, 227 unsigned long pages, int ih) 228 { 229 struct intel_svm_dev *sdev; 230 231 rcu_read_lock(); 232 list_for_each_entry_rcu(sdev, &svm->devs, list) 233 intel_flush_svm_range_dev(svm, sdev, address, pages, ih); 234 rcu_read_unlock(); 235 } 236 237 /* Pages have been freed at this point */ 238 static void intel_invalidate_range(struct mmu_notifier *mn, 239 struct mm_struct *mm, 240 unsigned long start, unsigned long end) 241 { 242 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); 243 244 intel_flush_svm_range(svm, start, 245 (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0); 246 } 247 248 static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) 249 { 250 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); 251 struct intel_svm_dev *sdev; 252 253 /* This might end up being called from exit_mmap(), *before* the page 254 * tables are cleared. And __mmu_notifier_release() will delete us from 255 * the list of notifiers so that our invalidate_range() callback doesn't 256 * get called when the page tables are cleared. So we need to protect 257 * against hardware accessing those page tables. 258 * 259 * We do it by clearing the entry in the PASID table and then flushing 260 * the IOTLB and the PASID table caches. This might upset hardware; 261 * perhaps we'll want to point the PASID to a dummy PGD (like the zero 262 * page) so that we end up taking a fault that the hardware really 263 * *has* to handle gracefully without affecting other processes. 264 */ 265 rcu_read_lock(); 266 list_for_each_entry_rcu(sdev, &svm->devs, list) 267 intel_pasid_tear_down_entry(sdev->iommu, sdev->dev, 268 svm->pasid, true); 269 rcu_read_unlock(); 270 271 } 272 273 static const struct mmu_notifier_ops intel_mmuops = { 274 .release = intel_mm_release, 275 .invalidate_range = intel_invalidate_range, 276 }; 277 278 static DEFINE_MUTEX(pasid_mutex); 279 280 static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid, 281 struct intel_svm **rsvm, 282 struct intel_svm_dev **rsdev) 283 { 284 struct intel_svm_dev *sdev = NULL; 285 struct intel_svm *svm; 286 287 /* The caller should hold the pasid_mutex lock */ 288 if (WARN_ON(!mutex_is_locked(&pasid_mutex))) 289 return -EINVAL; 290 291 if (pasid == INVALID_IOASID || pasid >= PASID_MAX) 292 return -EINVAL; 293 294 svm = pasid_private_find(pasid); 295 if (IS_ERR(svm)) 296 return PTR_ERR(svm); 297 298 if (!svm) 299 goto out; 300 301 /* 302 * If we found svm for the PASID, there must be at least one device 303 * bond. 304 */ 305 if (WARN_ON(list_empty(&svm->devs))) 306 return -EINVAL; 307 sdev = svm_lookup_device_by_dev(svm, dev); 308 309 out: 310 *rsvm = svm; 311 *rsdev = sdev; 312 313 return 0; 314 } 315 316 static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm, 317 unsigned int flags) 318 { 319 ioasid_t max_pasid = dev_is_pci(dev) ? 320 pci_max_pasids(to_pci_dev(dev)) : intel_pasid_max_id; 321 322 return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1); 323 } 324 325 static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, 326 struct device *dev, 327 struct mm_struct *mm, 328 unsigned int flags) 329 { 330 struct device_domain_info *info = dev_iommu_priv_get(dev); 331 struct intel_svm_dev *sdev; 332 struct intel_svm *svm; 333 unsigned long sflags; 334 int ret = 0; 335 336 svm = pasid_private_find(mm->pasid); 337 if (!svm) { 338 svm = kzalloc(sizeof(*svm), GFP_KERNEL); 339 if (!svm) 340 return ERR_PTR(-ENOMEM); 341 342 svm->pasid = mm->pasid; 343 svm->mm = mm; 344 svm->flags = flags; 345 INIT_LIST_HEAD_RCU(&svm->devs); 346 347 if (!(flags & SVM_FLAG_SUPERVISOR_MODE)) { 348 svm->notifier.ops = &intel_mmuops; 349 ret = mmu_notifier_register(&svm->notifier, mm); 350 if (ret) { 351 kfree(svm); 352 return ERR_PTR(ret); 353 } 354 } 355 356 ret = pasid_private_add(svm->pasid, svm); 357 if (ret) { 358 if (svm->notifier.ops) 359 mmu_notifier_unregister(&svm->notifier, mm); 360 kfree(svm); 361 return ERR_PTR(ret); 362 } 363 } 364 365 /* Find the matching device in svm list */ 366 sdev = svm_lookup_device_by_dev(svm, dev); 367 if (sdev) { 368 sdev->users++; 369 goto success; 370 } 371 372 sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); 373 if (!sdev) { 374 ret = -ENOMEM; 375 goto free_svm; 376 } 377 378 sdev->dev = dev; 379 sdev->iommu = iommu; 380 sdev->did = FLPT_DEFAULT_DID; 381 sdev->sid = PCI_DEVID(info->bus, info->devfn); 382 sdev->users = 1; 383 sdev->pasid = svm->pasid; 384 sdev->sva.dev = dev; 385 init_rcu_head(&sdev->rcu); 386 if (info->ats_enabled) { 387 sdev->dev_iotlb = 1; 388 sdev->qdep = info->ats_qdep; 389 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS) 390 sdev->qdep = 0; 391 } 392 393 /* Setup the pasid table: */ 394 sflags = (flags & SVM_FLAG_SUPERVISOR_MODE) ? 395 PASID_FLAG_SUPERVISOR_MODE : 0; 396 sflags |= cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; 397 ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid, 398 FLPT_DEFAULT_DID, sflags); 399 if (ret) 400 goto free_sdev; 401 402 list_add_rcu(&sdev->list, &svm->devs); 403 success: 404 return &sdev->sva; 405 406 free_sdev: 407 kfree(sdev); 408 free_svm: 409 if (list_empty(&svm->devs)) { 410 if (svm->notifier.ops) 411 mmu_notifier_unregister(&svm->notifier, mm); 412 pasid_private_remove(mm->pasid); 413 kfree(svm); 414 } 415 416 return ERR_PTR(ret); 417 } 418 419 /* Caller must hold pasid_mutex */ 420 static int intel_svm_unbind_mm(struct device *dev, u32 pasid) 421 { 422 struct intel_svm_dev *sdev; 423 struct intel_iommu *iommu; 424 struct intel_svm *svm; 425 struct mm_struct *mm; 426 int ret = -EINVAL; 427 428 iommu = device_to_iommu(dev, NULL, NULL); 429 if (!iommu) 430 goto out; 431 432 ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev); 433 if (ret) 434 goto out; 435 mm = svm->mm; 436 437 if (sdev) { 438 sdev->users--; 439 if (!sdev->users) { 440 list_del_rcu(&sdev->list); 441 /* Flush the PASID cache and IOTLB for this device. 442 * Note that we do depend on the hardware *not* using 443 * the PASID any more. Just as we depend on other 444 * devices never using PASIDs that they have no right 445 * to use. We have a *shared* PASID table, because it's 446 * large and has to be physically contiguous. So it's 447 * hard to be as defensive as we might like. */ 448 intel_pasid_tear_down_entry(iommu, dev, 449 svm->pasid, false); 450 intel_svm_drain_prq(dev, svm->pasid); 451 kfree_rcu(sdev, rcu); 452 453 if (list_empty(&svm->devs)) { 454 if (svm->notifier.ops) 455 mmu_notifier_unregister(&svm->notifier, mm); 456 pasid_private_remove(svm->pasid); 457 /* We mandate that no page faults may be outstanding 458 * for the PASID when intel_svm_unbind_mm() is called. 459 * If that is not obeyed, subtle errors will happen. 460 * Let's make them less subtle... */ 461 memset(svm, 0x6b, sizeof(*svm)); 462 kfree(svm); 463 } 464 } 465 } 466 out: 467 return ret; 468 } 469 470 /* Page request queue descriptor */ 471 struct page_req_dsc { 472 union { 473 struct { 474 u64 type:8; 475 u64 pasid_present:1; 476 u64 priv_data_present:1; 477 u64 rsvd:6; 478 u64 rid:16; 479 u64 pasid:20; 480 u64 exe_req:1; 481 u64 pm_req:1; 482 u64 rsvd2:10; 483 }; 484 u64 qw_0; 485 }; 486 union { 487 struct { 488 u64 rd_req:1; 489 u64 wr_req:1; 490 u64 lpig:1; 491 u64 prg_index:9; 492 u64 addr:52; 493 }; 494 u64 qw_1; 495 }; 496 u64 priv_data[2]; 497 }; 498 499 static bool is_canonical_address(u64 addr) 500 { 501 int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1); 502 long saddr = (long) addr; 503 504 return (((saddr << shift) >> shift) == saddr); 505 } 506 507 /** 508 * intel_svm_drain_prq - Drain page requests and responses for a pasid 509 * @dev: target device 510 * @pasid: pasid for draining 511 * 512 * Drain all pending page requests and responses related to @pasid in both 513 * software and hardware. This is supposed to be called after the device 514 * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB 515 * and DevTLB have been invalidated. 516 * 517 * It waits until all pending page requests for @pasid in the page fault 518 * queue are completed by the prq handling thread. Then follow the steps 519 * described in VT-d spec CH7.10 to drain all page requests and page 520 * responses pending in the hardware. 521 */ 522 static void intel_svm_drain_prq(struct device *dev, u32 pasid) 523 { 524 struct device_domain_info *info; 525 struct dmar_domain *domain; 526 struct intel_iommu *iommu; 527 struct qi_desc desc[3]; 528 struct pci_dev *pdev; 529 int head, tail; 530 u16 sid, did; 531 int qdep; 532 533 info = dev_iommu_priv_get(dev); 534 if (WARN_ON(!info || !dev_is_pci(dev))) 535 return; 536 537 if (!info->pri_enabled) 538 return; 539 540 iommu = info->iommu; 541 domain = info->domain; 542 pdev = to_pci_dev(dev); 543 sid = PCI_DEVID(info->bus, info->devfn); 544 did = domain_id_iommu(domain, iommu); 545 qdep = pci_ats_queue_depth(pdev); 546 547 /* 548 * Check and wait until all pending page requests in the queue are 549 * handled by the prq handling thread. 550 */ 551 prq_retry: 552 reinit_completion(&iommu->prq_complete); 553 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; 554 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; 555 while (head != tail) { 556 struct page_req_dsc *req; 557 558 req = &iommu->prq[head / sizeof(*req)]; 559 if (!req->pasid_present || req->pasid != pasid) { 560 head = (head + sizeof(*req)) & PRQ_RING_MASK; 561 continue; 562 } 563 564 wait_for_completion(&iommu->prq_complete); 565 goto prq_retry; 566 } 567 568 /* 569 * A work in IO page fault workqueue may try to lock pasid_mutex now. 570 * Holding pasid_mutex while waiting in iopf_queue_flush_dev() for 571 * all works in the workqueue to finish may cause deadlock. 572 * 573 * It's unnecessary to hold pasid_mutex in iopf_queue_flush_dev(). 574 * Unlock it to allow the works to be handled while waiting for 575 * them to finish. 576 */ 577 lockdep_assert_held(&pasid_mutex); 578 mutex_unlock(&pasid_mutex); 579 iopf_queue_flush_dev(dev); 580 mutex_lock(&pasid_mutex); 581 582 /* 583 * Perform steps described in VT-d spec CH7.10 to drain page 584 * requests and responses in hardware. 585 */ 586 memset(desc, 0, sizeof(desc)); 587 desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) | 588 QI_IWD_FENCE | 589 QI_IWD_TYPE; 590 desc[1].qw0 = QI_EIOTLB_PASID(pasid) | 591 QI_EIOTLB_DID(did) | 592 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | 593 QI_EIOTLB_TYPE; 594 desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) | 595 QI_DEV_EIOTLB_SID(sid) | 596 QI_DEV_EIOTLB_QDEP(qdep) | 597 QI_DEIOTLB_TYPE | 598 QI_DEV_IOTLB_PFSID(info->pfsid); 599 qi_retry: 600 reinit_completion(&iommu->prq_complete); 601 qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN); 602 if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) { 603 wait_for_completion(&iommu->prq_complete); 604 goto qi_retry; 605 } 606 } 607 608 static int prq_to_iommu_prot(struct page_req_dsc *req) 609 { 610 int prot = 0; 611 612 if (req->rd_req) 613 prot |= IOMMU_FAULT_PERM_READ; 614 if (req->wr_req) 615 prot |= IOMMU_FAULT_PERM_WRITE; 616 if (req->exe_req) 617 prot |= IOMMU_FAULT_PERM_EXEC; 618 if (req->pm_req) 619 prot |= IOMMU_FAULT_PERM_PRIV; 620 621 return prot; 622 } 623 624 static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev, 625 struct page_req_dsc *desc) 626 { 627 struct iommu_fault_event event; 628 629 if (!dev || !dev_is_pci(dev)) 630 return -ENODEV; 631 632 /* Fill in event data for device specific processing */ 633 memset(&event, 0, sizeof(struct iommu_fault_event)); 634 event.fault.type = IOMMU_FAULT_PAGE_REQ; 635 event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT; 636 event.fault.prm.pasid = desc->pasid; 637 event.fault.prm.grpid = desc->prg_index; 638 event.fault.prm.perm = prq_to_iommu_prot(desc); 639 640 if (desc->lpig) 641 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; 642 if (desc->pasid_present) { 643 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; 644 event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID; 645 } 646 if (desc->priv_data_present) { 647 /* 648 * Set last page in group bit if private data is present, 649 * page response is required as it does for LPIG. 650 * iommu_report_device_fault() doesn't understand this vendor 651 * specific requirement thus we set last_page as a workaround. 652 */ 653 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; 654 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; 655 event.fault.prm.private_data[0] = desc->priv_data[0]; 656 event.fault.prm.private_data[1] = desc->priv_data[1]; 657 } else if (dmar_latency_enabled(iommu, DMAR_LATENCY_PRQ)) { 658 /* 659 * If the private data fields are not used by hardware, use it 660 * to monitor the prq handle latency. 661 */ 662 event.fault.prm.private_data[0] = ktime_to_ns(ktime_get()); 663 } 664 665 return iommu_report_device_fault(dev, &event); 666 } 667 668 static void handle_bad_prq_event(struct intel_iommu *iommu, 669 struct page_req_dsc *req, int result) 670 { 671 struct qi_desc desc; 672 673 pr_err("%s: Invalid page request: %08llx %08llx\n", 674 iommu->name, ((unsigned long long *)req)[0], 675 ((unsigned long long *)req)[1]); 676 677 /* 678 * Per VT-d spec. v3.0 ch7.7, system software must 679 * respond with page group response if private data 680 * is present (PDP) or last page in group (LPIG) bit 681 * is set. This is an additional VT-d feature beyond 682 * PCI ATS spec. 683 */ 684 if (!req->lpig && !req->priv_data_present) 685 return; 686 687 desc.qw0 = QI_PGRP_PASID(req->pasid) | 688 QI_PGRP_DID(req->rid) | 689 QI_PGRP_PASID_P(req->pasid_present) | 690 QI_PGRP_PDP(req->priv_data_present) | 691 QI_PGRP_RESP_CODE(result) | 692 QI_PGRP_RESP_TYPE; 693 desc.qw1 = QI_PGRP_IDX(req->prg_index) | 694 QI_PGRP_LPIG(req->lpig); 695 696 if (req->priv_data_present) { 697 desc.qw2 = req->priv_data[0]; 698 desc.qw3 = req->priv_data[1]; 699 } else { 700 desc.qw2 = 0; 701 desc.qw3 = 0; 702 } 703 704 qi_submit_sync(iommu, &desc, 1, 0); 705 } 706 707 static irqreturn_t prq_event_thread(int irq, void *d) 708 { 709 struct intel_svm_dev *sdev = NULL; 710 struct intel_iommu *iommu = d; 711 struct intel_svm *svm = NULL; 712 struct page_req_dsc *req; 713 int head, tail, handled; 714 u64 address; 715 716 /* 717 * Clear PPR bit before reading head/tail registers, to ensure that 718 * we get a new interrupt if needed. 719 */ 720 writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG); 721 722 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; 723 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; 724 handled = (head != tail); 725 while (head != tail) { 726 req = &iommu->prq[head / sizeof(*req)]; 727 address = (u64)req->addr << VTD_PAGE_SHIFT; 728 729 if (unlikely(!req->pasid_present)) { 730 pr_err("IOMMU: %s: Page request without PASID\n", 731 iommu->name); 732 bad_req: 733 svm = NULL; 734 sdev = NULL; 735 handle_bad_prq_event(iommu, req, QI_RESP_INVALID); 736 goto prq_advance; 737 } 738 739 if (unlikely(!is_canonical_address(address))) { 740 pr_err("IOMMU: %s: Address is not canonical\n", 741 iommu->name); 742 goto bad_req; 743 } 744 745 if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) { 746 pr_err("IOMMU: %s: Page request in Privilege Mode\n", 747 iommu->name); 748 goto bad_req; 749 } 750 751 if (unlikely(req->exe_req && req->rd_req)) { 752 pr_err("IOMMU: %s: Execution request not supported\n", 753 iommu->name); 754 goto bad_req; 755 } 756 757 /* Drop Stop Marker message. No need for a response. */ 758 if (unlikely(req->lpig && !req->rd_req && !req->wr_req)) 759 goto prq_advance; 760 761 if (!svm || svm->pasid != req->pasid) { 762 /* 763 * It can't go away, because the driver is not permitted 764 * to unbind the mm while any page faults are outstanding. 765 */ 766 svm = pasid_private_find(req->pasid); 767 if (IS_ERR_OR_NULL(svm) || (svm->flags & SVM_FLAG_SUPERVISOR_MODE)) 768 goto bad_req; 769 } 770 771 if (!sdev || sdev->sid != req->rid) { 772 sdev = svm_lookup_device_by_sid(svm, req->rid); 773 if (!sdev) 774 goto bad_req; 775 } 776 777 sdev->prq_seq_number++; 778 779 /* 780 * If prq is to be handled outside iommu driver via receiver of 781 * the fault notifiers, we skip the page response here. 782 */ 783 if (intel_svm_prq_report(iommu, sdev->dev, req)) 784 handle_bad_prq_event(iommu, req, QI_RESP_INVALID); 785 786 trace_prq_report(iommu, sdev->dev, req->qw_0, req->qw_1, 787 req->priv_data[0], req->priv_data[1], 788 sdev->prq_seq_number); 789 prq_advance: 790 head = (head + sizeof(*req)) & PRQ_RING_MASK; 791 } 792 793 dmar_writeq(iommu->reg + DMAR_PQH_REG, tail); 794 795 /* 796 * Clear the page request overflow bit and wake up all threads that 797 * are waiting for the completion of this handling. 798 */ 799 if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) { 800 pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n", 801 iommu->name); 802 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; 803 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; 804 if (head == tail) { 805 iopf_queue_discard_partial(iommu->iopf_queue); 806 writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG); 807 pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared", 808 iommu->name); 809 } 810 } 811 812 if (!completion_done(&iommu->prq_complete)) 813 complete(&iommu->prq_complete); 814 815 return IRQ_RETVAL(handled); 816 } 817 818 struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) 819 { 820 struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); 821 unsigned int flags = 0; 822 struct iommu_sva *sva; 823 int ret; 824 825 if (drvdata) 826 flags = *(unsigned int *)drvdata; 827 828 if (flags & SVM_FLAG_SUPERVISOR_MODE) { 829 if (!ecap_srs(iommu->ecap)) { 830 dev_err(dev, "%s: Supervisor PASID not supported\n", 831 iommu->name); 832 return ERR_PTR(-EOPNOTSUPP); 833 } 834 835 if (mm) { 836 dev_err(dev, "%s: Supervisor PASID with user provided mm\n", 837 iommu->name); 838 return ERR_PTR(-EINVAL); 839 } 840 841 mm = &init_mm; 842 } 843 844 mutex_lock(&pasid_mutex); 845 ret = intel_svm_alloc_pasid(dev, mm, flags); 846 if (ret) { 847 mutex_unlock(&pasid_mutex); 848 return ERR_PTR(ret); 849 } 850 851 sva = intel_svm_bind_mm(iommu, dev, mm, flags); 852 mutex_unlock(&pasid_mutex); 853 854 return sva; 855 } 856 857 void intel_svm_unbind(struct iommu_sva *sva) 858 { 859 struct intel_svm_dev *sdev = to_intel_svm_dev(sva); 860 861 mutex_lock(&pasid_mutex); 862 intel_svm_unbind_mm(sdev->dev, sdev->pasid); 863 mutex_unlock(&pasid_mutex); 864 } 865 866 u32 intel_svm_get_pasid(struct iommu_sva *sva) 867 { 868 struct intel_svm_dev *sdev; 869 u32 pasid; 870 871 mutex_lock(&pasid_mutex); 872 sdev = to_intel_svm_dev(sva); 873 pasid = sdev->pasid; 874 mutex_unlock(&pasid_mutex); 875 876 return pasid; 877 } 878 879 int intel_svm_page_response(struct device *dev, 880 struct iommu_fault_event *evt, 881 struct iommu_page_response *msg) 882 { 883 struct iommu_fault_page_request *prm; 884 struct intel_svm_dev *sdev = NULL; 885 struct intel_svm *svm = NULL; 886 struct intel_iommu *iommu; 887 bool private_present; 888 bool pasid_present; 889 bool last_page; 890 u8 bus, devfn; 891 int ret = 0; 892 u16 sid; 893 894 if (!dev || !dev_is_pci(dev)) 895 return -ENODEV; 896 897 iommu = device_to_iommu(dev, &bus, &devfn); 898 if (!iommu) 899 return -ENODEV; 900 901 if (!msg || !evt) 902 return -EINVAL; 903 904 mutex_lock(&pasid_mutex); 905 906 prm = &evt->fault.prm; 907 sid = PCI_DEVID(bus, devfn); 908 pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; 909 private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; 910 last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; 911 912 if (!pasid_present) { 913 ret = -EINVAL; 914 goto out; 915 } 916 917 if (prm->pasid == 0 || prm->pasid >= PASID_MAX) { 918 ret = -EINVAL; 919 goto out; 920 } 921 922 ret = pasid_to_svm_sdev(dev, prm->pasid, &svm, &sdev); 923 if (ret || !sdev) { 924 ret = -ENODEV; 925 goto out; 926 } 927 928 /* 929 * Per VT-d spec. v3.0 ch7.7, system software must respond 930 * with page group response if private data is present (PDP) 931 * or last page in group (LPIG) bit is set. This is an 932 * additional VT-d requirement beyond PCI ATS spec. 933 */ 934 if (last_page || private_present) { 935 struct qi_desc desc; 936 937 desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) | 938 QI_PGRP_PASID_P(pasid_present) | 939 QI_PGRP_PDP(private_present) | 940 QI_PGRP_RESP_CODE(msg->code) | 941 QI_PGRP_RESP_TYPE; 942 desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page); 943 desc.qw2 = 0; 944 desc.qw3 = 0; 945 946 if (private_present) { 947 desc.qw2 = prm->private_data[0]; 948 desc.qw3 = prm->private_data[1]; 949 } else if (prm->private_data[0]) { 950 dmar_latency_update(iommu, DMAR_LATENCY_PRQ, 951 ktime_to_ns(ktime_get()) - prm->private_data[0]); 952 } 953 954 qi_submit_sync(iommu, &desc, 1, 0); 955 } 956 out: 957 mutex_unlock(&pasid_mutex); 958 return ret; 959 } 960