xref: /linux/drivers/iommu/intel/svm.c (revision 6af91e3d2cfc8bb579b1aa2d22cd91f8c34acdf6)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright © 2015 Intel Corporation.
4  *
5  * Authors: David Woodhouse <dwmw2@infradead.org>
6  */
7 
8 #include <linux/mmu_notifier.h>
9 #include <linux/sched.h>
10 #include <linux/sched/mm.h>
11 #include <linux/slab.h>
12 #include <linux/rculist.h>
13 #include <linux/pci.h>
14 #include <linux/pci-ats.h>
15 #include <linux/dmar.h>
16 #include <linux/interrupt.h>
17 #include <linux/mm_types.h>
18 #include <linux/xarray.h>
19 #include <asm/page.h>
20 #include <asm/fpu/api.h>
21 
22 #include "iommu.h"
23 #include "pasid.h"
24 #include "perf.h"
25 #include "../iommu-pages.h"
26 #include "trace.h"
27 
28 static irqreturn_t prq_event_thread(int irq, void *d);
29 
30 int intel_svm_enable_prq(struct intel_iommu *iommu)
31 {
32 	struct iopf_queue *iopfq;
33 	int irq, ret;
34 
35 	iommu->prq = iommu_alloc_pages_node(iommu->node, GFP_KERNEL, PRQ_ORDER);
36 	if (!iommu->prq) {
37 		pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
38 			iommu->name);
39 		return -ENOMEM;
40 	}
41 
42 	irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PRQ + iommu->seq_id, iommu->node, iommu);
43 	if (irq <= 0) {
44 		pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
45 		       iommu->name);
46 		ret = -EINVAL;
47 		goto free_prq;
48 	}
49 	iommu->pr_irq = irq;
50 
51 	snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name),
52 		 "dmar%d-iopfq", iommu->seq_id);
53 	iopfq = iopf_queue_alloc(iommu->iopfq_name);
54 	if (!iopfq) {
55 		pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name);
56 		ret = -ENOMEM;
57 		goto free_hwirq;
58 	}
59 	iommu->iopf_queue = iopfq;
60 
61 	snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
62 
63 	ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
64 				   iommu->prq_name, iommu);
65 	if (ret) {
66 		pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
67 		       iommu->name);
68 		goto free_iopfq;
69 	}
70 	dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
71 	dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
72 	dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
73 
74 	init_completion(&iommu->prq_complete);
75 
76 	return 0;
77 
78 free_iopfq:
79 	iopf_queue_free(iommu->iopf_queue);
80 	iommu->iopf_queue = NULL;
81 free_hwirq:
82 	dmar_free_hwirq(irq);
83 	iommu->pr_irq = 0;
84 free_prq:
85 	iommu_free_pages(iommu->prq, PRQ_ORDER);
86 	iommu->prq = NULL;
87 
88 	return ret;
89 }
90 
91 int intel_svm_finish_prq(struct intel_iommu *iommu)
92 {
93 	dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
94 	dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
95 	dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
96 
97 	if (iommu->pr_irq) {
98 		free_irq(iommu->pr_irq, iommu);
99 		dmar_free_hwirq(iommu->pr_irq);
100 		iommu->pr_irq = 0;
101 	}
102 
103 	if (iommu->iopf_queue) {
104 		iopf_queue_free(iommu->iopf_queue);
105 		iommu->iopf_queue = NULL;
106 	}
107 
108 	iommu_free_pages(iommu->prq, PRQ_ORDER);
109 	iommu->prq = NULL;
110 
111 	return 0;
112 }
113 
114 void intel_svm_check(struct intel_iommu *iommu)
115 {
116 	if (!pasid_supported(iommu))
117 		return;
118 
119 	if (cpu_feature_enabled(X86_FEATURE_GBPAGES) &&
120 	    !cap_fl1gp_support(iommu->cap)) {
121 		pr_err("%s SVM disabled, incompatible 1GB page capability\n",
122 		       iommu->name);
123 		return;
124 	}
125 
126 	if (cpu_feature_enabled(X86_FEATURE_LA57) &&
127 	    !cap_fl5lp_support(iommu->cap)) {
128 		pr_err("%s SVM disabled, incompatible paging mode\n",
129 		       iommu->name);
130 		return;
131 	}
132 
133 	iommu->flags |= VTD_FLAG_SVM_CAPABLE;
134 }
135 
136 /* Pages have been freed at this point */
137 static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
138 					struct mm_struct *mm,
139 					unsigned long start, unsigned long end)
140 {
141 	struct dmar_domain *domain = container_of(mn, struct dmar_domain, notifier);
142 
143 	if (start == 0 && end == ULONG_MAX) {
144 		cache_tag_flush_all(domain);
145 		return;
146 	}
147 
148 	/*
149 	 * The mm_types defines vm_end as the first byte after the end address,
150 	 * different from IOMMU subsystem using the last address of an address
151 	 * range.
152 	 */
153 	cache_tag_flush_range(domain, start, end - 1, 0);
154 }
155 
156 static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
157 {
158 	struct dmar_domain *domain = container_of(mn, struct dmar_domain, notifier);
159 	struct dev_pasid_info *dev_pasid;
160 	struct device_domain_info *info;
161 	unsigned long flags;
162 
163 	/* This might end up being called from exit_mmap(), *before* the page
164 	 * tables are cleared. And __mmu_notifier_release() will delete us from
165 	 * the list of notifiers so that our invalidate_range() callback doesn't
166 	 * get called when the page tables are cleared. So we need to protect
167 	 * against hardware accessing those page tables.
168 	 *
169 	 * We do it by clearing the entry in the PASID table and then flushing
170 	 * the IOTLB and the PASID table caches. This might upset hardware;
171 	 * perhaps we'll want to point the PASID to a dummy PGD (like the zero
172 	 * page) so that we end up taking a fault that the hardware really
173 	 * *has* to handle gracefully without affecting other processes.
174 	 */
175 	spin_lock_irqsave(&domain->lock, flags);
176 	list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) {
177 		info = dev_iommu_priv_get(dev_pasid->dev);
178 		intel_pasid_tear_down_entry(info->iommu, dev_pasid->dev,
179 					    dev_pasid->pasid, true);
180 	}
181 	spin_unlock_irqrestore(&domain->lock, flags);
182 
183 }
184 
185 static void intel_mm_free_notifier(struct mmu_notifier *mn)
186 {
187 	kfree(container_of(mn, struct dmar_domain, notifier));
188 }
189 
190 static const struct mmu_notifier_ops intel_mmuops = {
191 	.release = intel_mm_release,
192 	.arch_invalidate_secondary_tlbs = intel_arch_invalidate_secondary_tlbs,
193 	.free_notifier = intel_mm_free_notifier,
194 };
195 
196 static int intel_svm_set_dev_pasid(struct iommu_domain *domain,
197 				   struct device *dev, ioasid_t pasid)
198 {
199 	struct device_domain_info *info = dev_iommu_priv_get(dev);
200 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
201 	struct intel_iommu *iommu = info->iommu;
202 	struct mm_struct *mm = domain->mm;
203 	struct dev_pasid_info *dev_pasid;
204 	unsigned long sflags;
205 	unsigned long flags;
206 	int ret = 0;
207 
208 	dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL);
209 	if (!dev_pasid)
210 		return -ENOMEM;
211 
212 	dev_pasid->dev = dev;
213 	dev_pasid->pasid = pasid;
214 
215 	ret = cache_tag_assign_domain(to_dmar_domain(domain), dev, pasid);
216 	if (ret)
217 		goto free_dev_pasid;
218 
219 	/* Setup the pasid table: */
220 	sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
221 	ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, pasid,
222 					    FLPT_DEFAULT_DID, sflags);
223 	if (ret)
224 		goto unassign_tag;
225 
226 	spin_lock_irqsave(&dmar_domain->lock, flags);
227 	list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids);
228 	spin_unlock_irqrestore(&dmar_domain->lock, flags);
229 
230 	return 0;
231 
232 unassign_tag:
233 	cache_tag_unassign_domain(to_dmar_domain(domain), dev, pasid);
234 free_dev_pasid:
235 	kfree(dev_pasid);
236 
237 	return ret;
238 }
239 
240 /* Page request queue descriptor */
241 struct page_req_dsc {
242 	union {
243 		struct {
244 			u64 type:8;
245 			u64 pasid_present:1;
246 			u64 rsvd:7;
247 			u64 rid:16;
248 			u64 pasid:20;
249 			u64 exe_req:1;
250 			u64 pm_req:1;
251 			u64 rsvd2:10;
252 		};
253 		u64 qw_0;
254 	};
255 	union {
256 		struct {
257 			u64 rd_req:1;
258 			u64 wr_req:1;
259 			u64 lpig:1;
260 			u64 prg_index:9;
261 			u64 addr:52;
262 		};
263 		u64 qw_1;
264 	};
265 	u64 qw_2;
266 	u64 qw_3;
267 };
268 
269 static bool is_canonical_address(u64 addr)
270 {
271 	int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
272 	long saddr = (long) addr;
273 
274 	return (((saddr << shift) >> shift) == saddr);
275 }
276 
277 /**
278  * intel_drain_pasid_prq - Drain page requests and responses for a pasid
279  * @dev: target device
280  * @pasid: pasid for draining
281  *
282  * Drain all pending page requests and responses related to @pasid in both
283  * software and hardware. This is supposed to be called after the device
284  * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB
285  * and DevTLB have been invalidated.
286  *
287  * It waits until all pending page requests for @pasid in the page fault
288  * queue are completed by the prq handling thread. Then follow the steps
289  * described in VT-d spec CH7.10 to drain all page requests and page
290  * responses pending in the hardware.
291  */
292 void intel_drain_pasid_prq(struct device *dev, u32 pasid)
293 {
294 	struct device_domain_info *info;
295 	struct dmar_domain *domain;
296 	struct intel_iommu *iommu;
297 	struct qi_desc desc[3];
298 	struct pci_dev *pdev;
299 	int head, tail;
300 	u16 sid, did;
301 	int qdep;
302 
303 	info = dev_iommu_priv_get(dev);
304 	if (WARN_ON(!info || !dev_is_pci(dev)))
305 		return;
306 
307 	if (!info->pri_enabled)
308 		return;
309 
310 	iommu = info->iommu;
311 	domain = info->domain;
312 	pdev = to_pci_dev(dev);
313 	sid = PCI_DEVID(info->bus, info->devfn);
314 	did = domain_id_iommu(domain, iommu);
315 	qdep = pci_ats_queue_depth(pdev);
316 
317 	/*
318 	 * Check and wait until all pending page requests in the queue are
319 	 * handled by the prq handling thread.
320 	 */
321 prq_retry:
322 	reinit_completion(&iommu->prq_complete);
323 	tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
324 	head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
325 	while (head != tail) {
326 		struct page_req_dsc *req;
327 
328 		req = &iommu->prq[head / sizeof(*req)];
329 		if (!req->pasid_present || req->pasid != pasid) {
330 			head = (head + sizeof(*req)) & PRQ_RING_MASK;
331 			continue;
332 		}
333 
334 		wait_for_completion(&iommu->prq_complete);
335 		goto prq_retry;
336 	}
337 
338 	iopf_queue_flush_dev(dev);
339 
340 	/*
341 	 * Perform steps described in VT-d spec CH7.10 to drain page
342 	 * requests and responses in hardware.
343 	 */
344 	memset(desc, 0, sizeof(desc));
345 	desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
346 			QI_IWD_FENCE |
347 			QI_IWD_TYPE;
348 	desc[1].qw0 = QI_EIOTLB_PASID(pasid) |
349 			QI_EIOTLB_DID(did) |
350 			QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
351 			QI_EIOTLB_TYPE;
352 	desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) |
353 			QI_DEV_EIOTLB_SID(sid) |
354 			QI_DEV_EIOTLB_QDEP(qdep) |
355 			QI_DEIOTLB_TYPE |
356 			QI_DEV_IOTLB_PFSID(info->pfsid);
357 qi_retry:
358 	reinit_completion(&iommu->prq_complete);
359 	qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN);
360 	if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
361 		wait_for_completion(&iommu->prq_complete);
362 		goto qi_retry;
363 	}
364 }
365 
366 static int prq_to_iommu_prot(struct page_req_dsc *req)
367 {
368 	int prot = 0;
369 
370 	if (req->rd_req)
371 		prot |= IOMMU_FAULT_PERM_READ;
372 	if (req->wr_req)
373 		prot |= IOMMU_FAULT_PERM_WRITE;
374 	if (req->exe_req)
375 		prot |= IOMMU_FAULT_PERM_EXEC;
376 	if (req->pm_req)
377 		prot |= IOMMU_FAULT_PERM_PRIV;
378 
379 	return prot;
380 }
381 
382 static void intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev,
383 				 struct page_req_dsc *desc)
384 {
385 	struct iopf_fault event = { };
386 
387 	/* Fill in event data for device specific processing */
388 	event.fault.type = IOMMU_FAULT_PAGE_REQ;
389 	event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT;
390 	event.fault.prm.pasid = desc->pasid;
391 	event.fault.prm.grpid = desc->prg_index;
392 	event.fault.prm.perm = prq_to_iommu_prot(desc);
393 
394 	if (desc->lpig)
395 		event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
396 	if (desc->pasid_present) {
397 		event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
398 		event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
399 	}
400 
401 	iommu_report_device_fault(dev, &event);
402 }
403 
404 static void handle_bad_prq_event(struct intel_iommu *iommu,
405 				 struct page_req_dsc *req, int result)
406 {
407 	struct qi_desc desc = { };
408 
409 	pr_err("%s: Invalid page request: %08llx %08llx\n",
410 	       iommu->name, ((unsigned long long *)req)[0],
411 	       ((unsigned long long *)req)[1]);
412 
413 	if (!req->lpig)
414 		return;
415 
416 	desc.qw0 = QI_PGRP_PASID(req->pasid) |
417 			QI_PGRP_DID(req->rid) |
418 			QI_PGRP_PASID_P(req->pasid_present) |
419 			QI_PGRP_RESP_CODE(result) |
420 			QI_PGRP_RESP_TYPE;
421 	desc.qw1 = QI_PGRP_IDX(req->prg_index) |
422 			QI_PGRP_LPIG(req->lpig);
423 
424 	qi_submit_sync(iommu, &desc, 1, 0);
425 }
426 
427 static irqreturn_t prq_event_thread(int irq, void *d)
428 {
429 	struct intel_iommu *iommu = d;
430 	struct page_req_dsc *req;
431 	int head, tail, handled;
432 	struct device *dev;
433 	u64 address;
434 
435 	/*
436 	 * Clear PPR bit before reading head/tail registers, to ensure that
437 	 * we get a new interrupt if needed.
438 	 */
439 	writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
440 
441 	tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
442 	head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
443 	handled = (head != tail);
444 	while (head != tail) {
445 		req = &iommu->prq[head / sizeof(*req)];
446 		address = (u64)req->addr << VTD_PAGE_SHIFT;
447 
448 		if (unlikely(!req->pasid_present)) {
449 			pr_err("IOMMU: %s: Page request without PASID\n",
450 			       iommu->name);
451 bad_req:
452 			handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
453 			goto prq_advance;
454 		}
455 
456 		if (unlikely(!is_canonical_address(address))) {
457 			pr_err("IOMMU: %s: Address is not canonical\n",
458 			       iommu->name);
459 			goto bad_req;
460 		}
461 
462 		if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) {
463 			pr_err("IOMMU: %s: Page request in Privilege Mode\n",
464 			       iommu->name);
465 			goto bad_req;
466 		}
467 
468 		if (unlikely(req->exe_req && req->rd_req)) {
469 			pr_err("IOMMU: %s: Execution request not supported\n",
470 			       iommu->name);
471 			goto bad_req;
472 		}
473 
474 		/* Drop Stop Marker message. No need for a response. */
475 		if (unlikely(req->lpig && !req->rd_req && !req->wr_req))
476 			goto prq_advance;
477 
478 		/*
479 		 * If prq is to be handled outside iommu driver via receiver of
480 		 * the fault notifiers, we skip the page response here.
481 		 */
482 		mutex_lock(&iommu->iopf_lock);
483 		dev = device_rbtree_find(iommu, req->rid);
484 		if (!dev) {
485 			mutex_unlock(&iommu->iopf_lock);
486 			goto bad_req;
487 		}
488 
489 		intel_svm_prq_report(iommu, dev, req);
490 		trace_prq_report(iommu, dev, req->qw_0, req->qw_1,
491 				 req->qw_2, req->qw_3,
492 				 iommu->prq_seq_number++);
493 		mutex_unlock(&iommu->iopf_lock);
494 prq_advance:
495 		head = (head + sizeof(*req)) & PRQ_RING_MASK;
496 	}
497 
498 	dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
499 
500 	/*
501 	 * Clear the page request overflow bit and wake up all threads that
502 	 * are waiting for the completion of this handling.
503 	 */
504 	if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
505 		pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n",
506 				    iommu->name);
507 		head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
508 		tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
509 		if (head == tail) {
510 			iopf_queue_discard_partial(iommu->iopf_queue);
511 			writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
512 			pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared",
513 					    iommu->name);
514 		}
515 	}
516 
517 	if (!completion_done(&iommu->prq_complete))
518 		complete(&iommu->prq_complete);
519 
520 	return IRQ_RETVAL(handled);
521 }
522 
523 void intel_svm_page_response(struct device *dev, struct iopf_fault *evt,
524 			     struct iommu_page_response *msg)
525 {
526 	struct device_domain_info *info = dev_iommu_priv_get(dev);
527 	struct intel_iommu *iommu = info->iommu;
528 	u8 bus = info->bus, devfn = info->devfn;
529 	struct iommu_fault_page_request *prm;
530 	struct qi_desc desc;
531 	bool pasid_present;
532 	bool last_page;
533 	u16 sid;
534 
535 	prm = &evt->fault.prm;
536 	sid = PCI_DEVID(bus, devfn);
537 	pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
538 	last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
539 
540 	desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) |
541 			QI_PGRP_PASID_P(pasid_present) |
542 			QI_PGRP_RESP_CODE(msg->code) |
543 			QI_PGRP_RESP_TYPE;
544 	desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
545 	desc.qw2 = 0;
546 	desc.qw3 = 0;
547 
548 	qi_submit_sync(iommu, &desc, 1, 0);
549 }
550 
551 static void intel_svm_domain_free(struct iommu_domain *domain)
552 {
553 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
554 
555 	/* dmar_domain free is deferred to the mmu free_notifier callback. */
556 	mmu_notifier_put(&dmar_domain->notifier);
557 }
558 
559 static const struct iommu_domain_ops intel_svm_domain_ops = {
560 	.set_dev_pasid		= intel_svm_set_dev_pasid,
561 	.free			= intel_svm_domain_free
562 };
563 
564 struct iommu_domain *intel_svm_domain_alloc(struct device *dev,
565 					    struct mm_struct *mm)
566 {
567 	struct dmar_domain *domain;
568 	int ret;
569 
570 	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
571 	if (!domain)
572 		return ERR_PTR(-ENOMEM);
573 
574 	domain->domain.ops = &intel_svm_domain_ops;
575 	domain->use_first_level = true;
576 	INIT_LIST_HEAD(&domain->dev_pasids);
577 	INIT_LIST_HEAD(&domain->cache_tags);
578 	spin_lock_init(&domain->cache_lock);
579 	spin_lock_init(&domain->lock);
580 
581 	domain->notifier.ops = &intel_mmuops;
582 	ret = mmu_notifier_register(&domain->notifier, mm);
583 	if (ret) {
584 		kfree(domain);
585 		return ERR_PTR(ret);
586 	}
587 
588 	return &domain->domain;
589 }
590