xref: /linux/drivers/iommu/intel/svm.c (revision c532de5a67a70f8533d495f8f2aaa9a0491c3ad0)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright © 2015 Intel Corporation.
4  *
5  * Authors: David Woodhouse <dwmw2@infradead.org>
6  */
7 
8 #include <linux/mmu_notifier.h>
9 #include <linux/sched.h>
10 #include <linux/sched/mm.h>
11 #include <linux/slab.h>
12 #include <linux/rculist.h>
13 #include <linux/pci.h>
14 #include <linux/pci-ats.h>
15 #include <linux/dmar.h>
16 #include <linux/interrupt.h>
17 #include <linux/mm_types.h>
18 #include <linux/xarray.h>
19 #include <asm/page.h>
20 #include <asm/fpu/api.h>
21 
22 #include "iommu.h"
23 #include "pasid.h"
24 #include "perf.h"
25 #include "../iommu-pages.h"
26 #include "trace.h"
27 
28 static irqreturn_t prq_event_thread(int irq, void *d);
29 
30 int intel_svm_enable_prq(struct intel_iommu *iommu)
31 {
32 	struct iopf_queue *iopfq;
33 	int irq, ret;
34 
35 	iommu->prq = iommu_alloc_pages_node(iommu->node, GFP_KERNEL, PRQ_ORDER);
36 	if (!iommu->prq) {
37 		pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
38 			iommu->name);
39 		return -ENOMEM;
40 	}
41 
42 	irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PRQ + iommu->seq_id, iommu->node, iommu);
43 	if (irq <= 0) {
44 		pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
45 		       iommu->name);
46 		ret = -EINVAL;
47 		goto free_prq;
48 	}
49 	iommu->pr_irq = irq;
50 
51 	snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name),
52 		 "dmar%d-iopfq", iommu->seq_id);
53 	iopfq = iopf_queue_alloc(iommu->iopfq_name);
54 	if (!iopfq) {
55 		pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name);
56 		ret = -ENOMEM;
57 		goto free_hwirq;
58 	}
59 	iommu->iopf_queue = iopfq;
60 
61 	snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
62 
63 	ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
64 				   iommu->prq_name, iommu);
65 	if (ret) {
66 		pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
67 		       iommu->name);
68 		goto free_iopfq;
69 	}
70 	dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
71 	dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
72 	dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
73 
74 	init_completion(&iommu->prq_complete);
75 
76 	return 0;
77 
78 free_iopfq:
79 	iopf_queue_free(iommu->iopf_queue);
80 	iommu->iopf_queue = NULL;
81 free_hwirq:
82 	dmar_free_hwirq(irq);
83 	iommu->pr_irq = 0;
84 free_prq:
85 	iommu_free_pages(iommu->prq, PRQ_ORDER);
86 	iommu->prq = NULL;
87 
88 	return ret;
89 }
90 
91 int intel_svm_finish_prq(struct intel_iommu *iommu)
92 {
93 	dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
94 	dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
95 	dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
96 
97 	if (iommu->pr_irq) {
98 		free_irq(iommu->pr_irq, iommu);
99 		dmar_free_hwirq(iommu->pr_irq);
100 		iommu->pr_irq = 0;
101 	}
102 
103 	if (iommu->iopf_queue) {
104 		iopf_queue_free(iommu->iopf_queue);
105 		iommu->iopf_queue = NULL;
106 	}
107 
108 	iommu_free_pages(iommu->prq, PRQ_ORDER);
109 	iommu->prq = NULL;
110 
111 	return 0;
112 }
113 
114 void intel_svm_check(struct intel_iommu *iommu)
115 {
116 	if (!pasid_supported(iommu))
117 		return;
118 
119 	if (cpu_feature_enabled(X86_FEATURE_GBPAGES) &&
120 	    !cap_fl1gp_support(iommu->cap)) {
121 		pr_err("%s SVM disabled, incompatible 1GB page capability\n",
122 		       iommu->name);
123 		return;
124 	}
125 
126 	if (cpu_feature_enabled(X86_FEATURE_LA57) &&
127 	    !cap_fl5lp_support(iommu->cap)) {
128 		pr_err("%s SVM disabled, incompatible paging mode\n",
129 		       iommu->name);
130 		return;
131 	}
132 
133 	iommu->flags |= VTD_FLAG_SVM_CAPABLE;
134 }
135 
136 /* Pages have been freed at this point */
137 static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
138 					struct mm_struct *mm,
139 					unsigned long start, unsigned long end)
140 {
141 	struct dmar_domain *domain = container_of(mn, struct dmar_domain, notifier);
142 
143 	if (start == 0 && end == ULONG_MAX) {
144 		cache_tag_flush_all(domain);
145 		return;
146 	}
147 
148 	/*
149 	 * The mm_types defines vm_end as the first byte after the end address,
150 	 * different from IOMMU subsystem using the last address of an address
151 	 * range.
152 	 */
153 	cache_tag_flush_range(domain, start, end - 1, 0);
154 }
155 
156 static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
157 {
158 	struct dmar_domain *domain = container_of(mn, struct dmar_domain, notifier);
159 	struct dev_pasid_info *dev_pasid;
160 	struct device_domain_info *info;
161 	unsigned long flags;
162 
163 	/* This might end up being called from exit_mmap(), *before* the page
164 	 * tables are cleared. And __mmu_notifier_release() will delete us from
165 	 * the list of notifiers so that our invalidate_range() callback doesn't
166 	 * get called when the page tables are cleared. So we need to protect
167 	 * against hardware accessing those page tables.
168 	 *
169 	 * We do it by clearing the entry in the PASID table and then flushing
170 	 * the IOTLB and the PASID table caches. This might upset hardware;
171 	 * perhaps we'll want to point the PASID to a dummy PGD (like the zero
172 	 * page) so that we end up taking a fault that the hardware really
173 	 * *has* to handle gracefully without affecting other processes.
174 	 */
175 	spin_lock_irqsave(&domain->lock, flags);
176 	list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) {
177 		info = dev_iommu_priv_get(dev_pasid->dev);
178 		intel_pasid_tear_down_entry(info->iommu, dev_pasid->dev,
179 					    dev_pasid->pasid, true);
180 	}
181 	spin_unlock_irqrestore(&domain->lock, flags);
182 
183 }
184 
185 static void intel_mm_free_notifier(struct mmu_notifier *mn)
186 {
187 	struct dmar_domain *domain = container_of(mn, struct dmar_domain, notifier);
188 
189 	kfree(domain->qi_batch);
190 	kfree(domain);
191 }
192 
193 static const struct mmu_notifier_ops intel_mmuops = {
194 	.release = intel_mm_release,
195 	.arch_invalidate_secondary_tlbs = intel_arch_invalidate_secondary_tlbs,
196 	.free_notifier = intel_mm_free_notifier,
197 };
198 
199 static int intel_svm_set_dev_pasid(struct iommu_domain *domain,
200 				   struct device *dev, ioasid_t pasid)
201 {
202 	struct device_domain_info *info = dev_iommu_priv_get(dev);
203 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
204 	struct intel_iommu *iommu = info->iommu;
205 	struct mm_struct *mm = domain->mm;
206 	struct dev_pasid_info *dev_pasid;
207 	unsigned long sflags;
208 	unsigned long flags;
209 	int ret = 0;
210 
211 	dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL);
212 	if (!dev_pasid)
213 		return -ENOMEM;
214 
215 	dev_pasid->dev = dev;
216 	dev_pasid->pasid = pasid;
217 
218 	ret = cache_tag_assign_domain(to_dmar_domain(domain), dev, pasid);
219 	if (ret)
220 		goto free_dev_pasid;
221 
222 	/* Setup the pasid table: */
223 	sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
224 	ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, pasid,
225 					    FLPT_DEFAULT_DID, sflags);
226 	if (ret)
227 		goto unassign_tag;
228 
229 	spin_lock_irqsave(&dmar_domain->lock, flags);
230 	list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids);
231 	spin_unlock_irqrestore(&dmar_domain->lock, flags);
232 
233 	return 0;
234 
235 unassign_tag:
236 	cache_tag_unassign_domain(to_dmar_domain(domain), dev, pasid);
237 free_dev_pasid:
238 	kfree(dev_pasid);
239 
240 	return ret;
241 }
242 
243 /* Page request queue descriptor */
244 struct page_req_dsc {
245 	union {
246 		struct {
247 			u64 type:8;
248 			u64 pasid_present:1;
249 			u64 rsvd:7;
250 			u64 rid:16;
251 			u64 pasid:20;
252 			u64 exe_req:1;
253 			u64 pm_req:1;
254 			u64 rsvd2:10;
255 		};
256 		u64 qw_0;
257 	};
258 	union {
259 		struct {
260 			u64 rd_req:1;
261 			u64 wr_req:1;
262 			u64 lpig:1;
263 			u64 prg_index:9;
264 			u64 addr:52;
265 		};
266 		u64 qw_1;
267 	};
268 	u64 qw_2;
269 	u64 qw_3;
270 };
271 
272 static bool is_canonical_address(u64 addr)
273 {
274 	int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
275 	long saddr = (long) addr;
276 
277 	return (((saddr << shift) >> shift) == saddr);
278 }
279 
280 /**
281  * intel_drain_pasid_prq - Drain page requests and responses for a pasid
282  * @dev: target device
283  * @pasid: pasid for draining
284  *
285  * Drain all pending page requests and responses related to @pasid in both
286  * software and hardware. This is supposed to be called after the device
287  * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB
288  * and DevTLB have been invalidated.
289  *
290  * It waits until all pending page requests for @pasid in the page fault
291  * queue are completed by the prq handling thread. Then follow the steps
292  * described in VT-d spec CH7.10 to drain all page requests and page
293  * responses pending in the hardware.
294  */
295 void intel_drain_pasid_prq(struct device *dev, u32 pasid)
296 {
297 	struct device_domain_info *info;
298 	struct dmar_domain *domain;
299 	struct intel_iommu *iommu;
300 	struct qi_desc desc[3];
301 	struct pci_dev *pdev;
302 	int head, tail;
303 	u16 sid, did;
304 	int qdep;
305 
306 	info = dev_iommu_priv_get(dev);
307 	if (WARN_ON(!info || !dev_is_pci(dev)))
308 		return;
309 
310 	if (!info->pri_enabled)
311 		return;
312 
313 	iommu = info->iommu;
314 	domain = info->domain;
315 	pdev = to_pci_dev(dev);
316 	sid = PCI_DEVID(info->bus, info->devfn);
317 	did = domain ? domain_id_iommu(domain, iommu) : FLPT_DEFAULT_DID;
318 	qdep = pci_ats_queue_depth(pdev);
319 
320 	/*
321 	 * Check and wait until all pending page requests in the queue are
322 	 * handled by the prq handling thread.
323 	 */
324 prq_retry:
325 	reinit_completion(&iommu->prq_complete);
326 	tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
327 	head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
328 	while (head != tail) {
329 		struct page_req_dsc *req;
330 
331 		req = &iommu->prq[head / sizeof(*req)];
332 		if (!req->pasid_present || req->pasid != pasid) {
333 			head = (head + sizeof(*req)) & PRQ_RING_MASK;
334 			continue;
335 		}
336 
337 		wait_for_completion(&iommu->prq_complete);
338 		goto prq_retry;
339 	}
340 
341 	iopf_queue_flush_dev(dev);
342 
343 	/*
344 	 * Perform steps described in VT-d spec CH7.10 to drain page
345 	 * requests and responses in hardware.
346 	 */
347 	memset(desc, 0, sizeof(desc));
348 	desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
349 			QI_IWD_FENCE |
350 			QI_IWD_TYPE;
351 	desc[1].qw0 = QI_EIOTLB_PASID(pasid) |
352 			QI_EIOTLB_DID(did) |
353 			QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
354 			QI_EIOTLB_TYPE;
355 	desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) |
356 			QI_DEV_EIOTLB_SID(sid) |
357 			QI_DEV_EIOTLB_QDEP(qdep) |
358 			QI_DEIOTLB_TYPE |
359 			QI_DEV_IOTLB_PFSID(info->pfsid);
360 qi_retry:
361 	reinit_completion(&iommu->prq_complete);
362 	qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN);
363 	if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
364 		wait_for_completion(&iommu->prq_complete);
365 		goto qi_retry;
366 	}
367 }
368 
369 static int prq_to_iommu_prot(struct page_req_dsc *req)
370 {
371 	int prot = 0;
372 
373 	if (req->rd_req)
374 		prot |= IOMMU_FAULT_PERM_READ;
375 	if (req->wr_req)
376 		prot |= IOMMU_FAULT_PERM_WRITE;
377 	if (req->exe_req)
378 		prot |= IOMMU_FAULT_PERM_EXEC;
379 	if (req->pm_req)
380 		prot |= IOMMU_FAULT_PERM_PRIV;
381 
382 	return prot;
383 }
384 
385 static void intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev,
386 				 struct page_req_dsc *desc)
387 {
388 	struct iopf_fault event = { };
389 
390 	/* Fill in event data for device specific processing */
391 	event.fault.type = IOMMU_FAULT_PAGE_REQ;
392 	event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT;
393 	event.fault.prm.pasid = desc->pasid;
394 	event.fault.prm.grpid = desc->prg_index;
395 	event.fault.prm.perm = prq_to_iommu_prot(desc);
396 
397 	if (desc->lpig)
398 		event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
399 	if (desc->pasid_present) {
400 		event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
401 		event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
402 	}
403 
404 	iommu_report_device_fault(dev, &event);
405 }
406 
407 static void handle_bad_prq_event(struct intel_iommu *iommu,
408 				 struct page_req_dsc *req, int result)
409 {
410 	struct qi_desc desc = { };
411 
412 	pr_err("%s: Invalid page request: %08llx %08llx\n",
413 	       iommu->name, ((unsigned long long *)req)[0],
414 	       ((unsigned long long *)req)[1]);
415 
416 	if (!req->lpig)
417 		return;
418 
419 	desc.qw0 = QI_PGRP_PASID(req->pasid) |
420 			QI_PGRP_DID(req->rid) |
421 			QI_PGRP_PASID_P(req->pasid_present) |
422 			QI_PGRP_RESP_CODE(result) |
423 			QI_PGRP_RESP_TYPE;
424 	desc.qw1 = QI_PGRP_IDX(req->prg_index) |
425 			QI_PGRP_LPIG(req->lpig);
426 
427 	qi_submit_sync(iommu, &desc, 1, 0);
428 }
429 
430 static irqreturn_t prq_event_thread(int irq, void *d)
431 {
432 	struct intel_iommu *iommu = d;
433 	struct page_req_dsc *req;
434 	int head, tail, handled;
435 	struct device *dev;
436 	u64 address;
437 
438 	/*
439 	 * Clear PPR bit before reading head/tail registers, to ensure that
440 	 * we get a new interrupt if needed.
441 	 */
442 	writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
443 
444 	tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
445 	head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
446 	handled = (head != tail);
447 	while (head != tail) {
448 		req = &iommu->prq[head / sizeof(*req)];
449 		address = (u64)req->addr << VTD_PAGE_SHIFT;
450 
451 		if (unlikely(!req->pasid_present)) {
452 			pr_err("IOMMU: %s: Page request without PASID\n",
453 			       iommu->name);
454 bad_req:
455 			handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
456 			goto prq_advance;
457 		}
458 
459 		if (unlikely(!is_canonical_address(address))) {
460 			pr_err("IOMMU: %s: Address is not canonical\n",
461 			       iommu->name);
462 			goto bad_req;
463 		}
464 
465 		if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) {
466 			pr_err("IOMMU: %s: Page request in Privilege Mode\n",
467 			       iommu->name);
468 			goto bad_req;
469 		}
470 
471 		if (unlikely(req->exe_req && req->rd_req)) {
472 			pr_err("IOMMU: %s: Execution request not supported\n",
473 			       iommu->name);
474 			goto bad_req;
475 		}
476 
477 		/* Drop Stop Marker message. No need for a response. */
478 		if (unlikely(req->lpig && !req->rd_req && !req->wr_req))
479 			goto prq_advance;
480 
481 		/*
482 		 * If prq is to be handled outside iommu driver via receiver of
483 		 * the fault notifiers, we skip the page response here.
484 		 */
485 		mutex_lock(&iommu->iopf_lock);
486 		dev = device_rbtree_find(iommu, req->rid);
487 		if (!dev) {
488 			mutex_unlock(&iommu->iopf_lock);
489 			goto bad_req;
490 		}
491 
492 		intel_svm_prq_report(iommu, dev, req);
493 		trace_prq_report(iommu, dev, req->qw_0, req->qw_1,
494 				 req->qw_2, req->qw_3,
495 				 iommu->prq_seq_number++);
496 		mutex_unlock(&iommu->iopf_lock);
497 prq_advance:
498 		head = (head + sizeof(*req)) & PRQ_RING_MASK;
499 	}
500 
501 	dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
502 
503 	/*
504 	 * Clear the page request overflow bit and wake up all threads that
505 	 * are waiting for the completion of this handling.
506 	 */
507 	if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
508 		pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n",
509 				    iommu->name);
510 		head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
511 		tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
512 		if (head == tail) {
513 			iopf_queue_discard_partial(iommu->iopf_queue);
514 			writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
515 			pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared",
516 					    iommu->name);
517 		}
518 	}
519 
520 	if (!completion_done(&iommu->prq_complete))
521 		complete(&iommu->prq_complete);
522 
523 	return IRQ_RETVAL(handled);
524 }
525 
526 void intel_svm_page_response(struct device *dev, struct iopf_fault *evt,
527 			     struct iommu_page_response *msg)
528 {
529 	struct device_domain_info *info = dev_iommu_priv_get(dev);
530 	struct intel_iommu *iommu = info->iommu;
531 	u8 bus = info->bus, devfn = info->devfn;
532 	struct iommu_fault_page_request *prm;
533 	struct qi_desc desc;
534 	bool pasid_present;
535 	bool last_page;
536 	u16 sid;
537 
538 	prm = &evt->fault.prm;
539 	sid = PCI_DEVID(bus, devfn);
540 	pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
541 	last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
542 
543 	desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) |
544 			QI_PGRP_PASID_P(pasid_present) |
545 			QI_PGRP_RESP_CODE(msg->code) |
546 			QI_PGRP_RESP_TYPE;
547 	desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
548 	desc.qw2 = 0;
549 	desc.qw3 = 0;
550 
551 	qi_submit_sync(iommu, &desc, 1, 0);
552 }
553 
554 static void intel_svm_domain_free(struct iommu_domain *domain)
555 {
556 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
557 
558 	/* dmar_domain free is deferred to the mmu free_notifier callback. */
559 	mmu_notifier_put(&dmar_domain->notifier);
560 }
561 
562 static const struct iommu_domain_ops intel_svm_domain_ops = {
563 	.set_dev_pasid		= intel_svm_set_dev_pasid,
564 	.free			= intel_svm_domain_free
565 };
566 
567 struct iommu_domain *intel_svm_domain_alloc(struct device *dev,
568 					    struct mm_struct *mm)
569 {
570 	struct dmar_domain *domain;
571 	int ret;
572 
573 	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
574 	if (!domain)
575 		return ERR_PTR(-ENOMEM);
576 
577 	domain->domain.ops = &intel_svm_domain_ops;
578 	domain->use_first_level = true;
579 	INIT_LIST_HEAD(&domain->dev_pasids);
580 	INIT_LIST_HEAD(&domain->cache_tags);
581 	spin_lock_init(&domain->cache_lock);
582 	spin_lock_init(&domain->lock);
583 
584 	domain->notifier.ops = &intel_mmuops;
585 	ret = mmu_notifier_register(&domain->notifier, mm);
586 	if (ret) {
587 		kfree(domain);
588 		return ERR_PTR(ret);
589 	}
590 
591 	return &domain->domain;
592 }
593