xref: /linux/drivers/iommu/intel/pasid.c (revision c48a7c44a1d02516309015b6134c9bb982e17008)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * intel-pasid.c - PASID idr, table and entry manipulation
4  *
5  * Copyright (C) 2018 Intel Corporation
6  *
7  * Author: Lu Baolu <baolu.lu@linux.intel.com>
8  */
9 
10 #define pr_fmt(fmt)	"DMAR: " fmt
11 
12 #include <linux/bitops.h>
13 #include <linux/cpufeature.h>
14 #include <linux/dmar.h>
15 #include <linux/iommu.h>
16 #include <linux/memory.h>
17 #include <linux/pci.h>
18 #include <linux/pci-ats.h>
19 #include <linux/spinlock.h>
20 
21 #include "iommu.h"
22 #include "pasid.h"
23 
24 /*
25  * Intel IOMMU system wide PASID name space:
26  */
27 u32 intel_pasid_max_id = PASID_MAX;
28 
29 int vcmd_alloc_pasid(struct intel_iommu *iommu, u32 *pasid)
30 {
31 	unsigned long flags;
32 	u8 status_code;
33 	int ret = 0;
34 	u64 res;
35 
36 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
37 	dmar_writeq(iommu->reg + DMAR_VCMD_REG, VCMD_CMD_ALLOC);
38 	IOMMU_WAIT_OP(iommu, DMAR_VCRSP_REG, dmar_readq,
39 		      !(res & VCMD_VRSP_IP), res);
40 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
41 
42 	status_code = VCMD_VRSP_SC(res);
43 	switch (status_code) {
44 	case VCMD_VRSP_SC_SUCCESS:
45 		*pasid = VCMD_VRSP_RESULT_PASID(res);
46 		break;
47 	case VCMD_VRSP_SC_NO_PASID_AVAIL:
48 		pr_info("IOMMU: %s: No PASID available\n", iommu->name);
49 		ret = -ENOSPC;
50 		break;
51 	default:
52 		ret = -ENODEV;
53 		pr_warn("IOMMU: %s: Unexpected error code %d\n",
54 			iommu->name, status_code);
55 	}
56 
57 	return ret;
58 }
59 
60 void vcmd_free_pasid(struct intel_iommu *iommu, u32 pasid)
61 {
62 	unsigned long flags;
63 	u8 status_code;
64 	u64 res;
65 
66 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
67 	dmar_writeq(iommu->reg + DMAR_VCMD_REG,
68 		    VCMD_CMD_OPERAND(pasid) | VCMD_CMD_FREE);
69 	IOMMU_WAIT_OP(iommu, DMAR_VCRSP_REG, dmar_readq,
70 		      !(res & VCMD_VRSP_IP), res);
71 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
72 
73 	status_code = VCMD_VRSP_SC(res);
74 	switch (status_code) {
75 	case VCMD_VRSP_SC_SUCCESS:
76 		break;
77 	case VCMD_VRSP_SC_INVALID_PASID:
78 		pr_info("IOMMU: %s: Invalid PASID\n", iommu->name);
79 		break;
80 	default:
81 		pr_warn("IOMMU: %s: Unexpected error code %d\n",
82 			iommu->name, status_code);
83 	}
84 }
85 
86 /*
87  * Per device pasid table management:
88  */
89 
90 /*
91  * Allocate a pasid table for @dev. It should be called in a
92  * single-thread context.
93  */
94 int intel_pasid_alloc_table(struct device *dev)
95 {
96 	struct device_domain_info *info;
97 	struct pasid_table *pasid_table;
98 	struct page *pages;
99 	u32 max_pasid = 0;
100 	int order, size;
101 
102 	might_sleep();
103 	info = dev_iommu_priv_get(dev);
104 	if (WARN_ON(!info || !dev_is_pci(dev)))
105 		return -ENODEV;
106 	if (WARN_ON(info->pasid_table))
107 		return -EEXIST;
108 
109 	pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL);
110 	if (!pasid_table)
111 		return -ENOMEM;
112 
113 	if (info->pasid_supported)
114 		max_pasid = min_t(u32, pci_max_pasids(to_pci_dev(dev)),
115 				  intel_pasid_max_id);
116 
117 	size = max_pasid >> (PASID_PDE_SHIFT - 3);
118 	order = size ? get_order(size) : 0;
119 	pages = alloc_pages_node(info->iommu->node,
120 				 GFP_KERNEL | __GFP_ZERO, order);
121 	if (!pages) {
122 		kfree(pasid_table);
123 		return -ENOMEM;
124 	}
125 
126 	pasid_table->table = page_address(pages);
127 	pasid_table->order = order;
128 	pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3);
129 	info->pasid_table = pasid_table;
130 
131 	if (!ecap_coherent(info->iommu->ecap))
132 		clflush_cache_range(pasid_table->table, (1 << order) * PAGE_SIZE);
133 
134 	return 0;
135 }
136 
137 void intel_pasid_free_table(struct device *dev)
138 {
139 	struct device_domain_info *info;
140 	struct pasid_table *pasid_table;
141 	struct pasid_dir_entry *dir;
142 	struct pasid_entry *table;
143 	int i, max_pde;
144 
145 	info = dev_iommu_priv_get(dev);
146 	if (!info || !dev_is_pci(dev) || !info->pasid_table)
147 		return;
148 
149 	pasid_table = info->pasid_table;
150 	info->pasid_table = NULL;
151 
152 	/* Free scalable mode PASID directory tables: */
153 	dir = pasid_table->table;
154 	max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT;
155 	for (i = 0; i < max_pde; i++) {
156 		table = get_pasid_table_from_pde(&dir[i]);
157 		free_pgtable_page(table);
158 	}
159 
160 	free_pages((unsigned long)pasid_table->table, pasid_table->order);
161 	kfree(pasid_table);
162 }
163 
164 struct pasid_table *intel_pasid_get_table(struct device *dev)
165 {
166 	struct device_domain_info *info;
167 
168 	info = dev_iommu_priv_get(dev);
169 	if (!info)
170 		return NULL;
171 
172 	return info->pasid_table;
173 }
174 
175 static int intel_pasid_get_dev_max_id(struct device *dev)
176 {
177 	struct device_domain_info *info;
178 
179 	info = dev_iommu_priv_get(dev);
180 	if (!info || !info->pasid_table)
181 		return 0;
182 
183 	return info->pasid_table->max_pasid;
184 }
185 
186 static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
187 {
188 	struct device_domain_info *info;
189 	struct pasid_table *pasid_table;
190 	struct pasid_dir_entry *dir;
191 	struct pasid_entry *entries;
192 	int dir_index, index;
193 
194 	pasid_table = intel_pasid_get_table(dev);
195 	if (WARN_ON(!pasid_table || pasid >= intel_pasid_get_dev_max_id(dev)))
196 		return NULL;
197 
198 	dir = pasid_table->table;
199 	info = dev_iommu_priv_get(dev);
200 	dir_index = pasid >> PASID_PDE_SHIFT;
201 	index = pasid & PASID_PTE_MASK;
202 
203 retry:
204 	entries = get_pasid_table_from_pde(&dir[dir_index]);
205 	if (!entries) {
206 		entries = alloc_pgtable_page(info->iommu->node, GFP_ATOMIC);
207 		if (!entries)
208 			return NULL;
209 
210 		/*
211 		 * The pasid directory table entry won't be freed after
212 		 * allocation. No worry about the race with free and
213 		 * clear. However, this entry might be populated by others
214 		 * while we are preparing it. Use theirs with a retry.
215 		 */
216 		if (cmpxchg64(&dir[dir_index].val, 0ULL,
217 			      (u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) {
218 			free_pgtable_page(entries);
219 			goto retry;
220 		}
221 		if (!ecap_coherent(info->iommu->ecap)) {
222 			clflush_cache_range(entries, VTD_PAGE_SIZE);
223 			clflush_cache_range(&dir[dir_index].val, sizeof(*dir));
224 		}
225 	}
226 
227 	return &entries[index];
228 }
229 
230 /*
231  * Interfaces for PASID table entry manipulation:
232  */
233 static inline void pasid_clear_entry(struct pasid_entry *pe)
234 {
235 	WRITE_ONCE(pe->val[0], 0);
236 	WRITE_ONCE(pe->val[1], 0);
237 	WRITE_ONCE(pe->val[2], 0);
238 	WRITE_ONCE(pe->val[3], 0);
239 	WRITE_ONCE(pe->val[4], 0);
240 	WRITE_ONCE(pe->val[5], 0);
241 	WRITE_ONCE(pe->val[6], 0);
242 	WRITE_ONCE(pe->val[7], 0);
243 }
244 
245 static inline void pasid_clear_entry_with_fpd(struct pasid_entry *pe)
246 {
247 	WRITE_ONCE(pe->val[0], PASID_PTE_FPD);
248 	WRITE_ONCE(pe->val[1], 0);
249 	WRITE_ONCE(pe->val[2], 0);
250 	WRITE_ONCE(pe->val[3], 0);
251 	WRITE_ONCE(pe->val[4], 0);
252 	WRITE_ONCE(pe->val[5], 0);
253 	WRITE_ONCE(pe->val[6], 0);
254 	WRITE_ONCE(pe->val[7], 0);
255 }
256 
257 static void
258 intel_pasid_clear_entry(struct device *dev, u32 pasid, bool fault_ignore)
259 {
260 	struct pasid_entry *pe;
261 
262 	pe = intel_pasid_get_entry(dev, pasid);
263 	if (WARN_ON(!pe))
264 		return;
265 
266 	if (fault_ignore && pasid_pte_is_present(pe))
267 		pasid_clear_entry_with_fpd(pe);
268 	else
269 		pasid_clear_entry(pe);
270 }
271 
272 static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits)
273 {
274 	u64 old;
275 
276 	old = READ_ONCE(*ptr);
277 	WRITE_ONCE(*ptr, (old & ~mask) | bits);
278 }
279 
280 static inline u64 pasid_get_bits(u64 *ptr)
281 {
282 	return READ_ONCE(*ptr);
283 }
284 
285 /*
286  * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode
287  * PASID entry.
288  */
289 static inline void
290 pasid_set_domain_id(struct pasid_entry *pe, u64 value)
291 {
292 	pasid_set_bits(&pe->val[1], GENMASK_ULL(15, 0), value);
293 }
294 
295 /*
296  * Get domain ID value of a scalable mode PASID entry.
297  */
298 static inline u16
299 pasid_get_domain_id(struct pasid_entry *pe)
300 {
301 	return (u16)(READ_ONCE(pe->val[1]) & GENMASK_ULL(15, 0));
302 }
303 
304 /*
305  * Setup the SLPTPTR(Second Level Page Table Pointer) field (Bit 12~63)
306  * of a scalable mode PASID entry.
307  */
308 static inline void
309 pasid_set_slptr(struct pasid_entry *pe, u64 value)
310 {
311 	pasid_set_bits(&pe->val[0], VTD_PAGE_MASK, value);
312 }
313 
314 /*
315  * Setup the AW(Address Width) field (Bit 2~4) of a scalable mode PASID
316  * entry.
317  */
318 static inline void
319 pasid_set_address_width(struct pasid_entry *pe, u64 value)
320 {
321 	pasid_set_bits(&pe->val[0], GENMASK_ULL(4, 2), value << 2);
322 }
323 
324 /*
325  * Setup the PGTT(PASID Granular Translation Type) field (Bit 6~8)
326  * of a scalable mode PASID entry.
327  */
328 static inline void
329 pasid_set_translation_type(struct pasid_entry *pe, u64 value)
330 {
331 	pasid_set_bits(&pe->val[0], GENMASK_ULL(8, 6), value << 6);
332 }
333 
334 /*
335  * Enable fault processing by clearing the FPD(Fault Processing
336  * Disable) field (Bit 1) of a scalable mode PASID entry.
337  */
338 static inline void pasid_set_fault_enable(struct pasid_entry *pe)
339 {
340 	pasid_set_bits(&pe->val[0], 1 << 1, 0);
341 }
342 
343 /*
344  * Enable second level A/D bits by setting the SLADE (Second Level
345  * Access Dirty Enable) field (Bit 9) of a scalable mode PASID
346  * entry.
347  */
348 static inline void pasid_set_ssade(struct pasid_entry *pe)
349 {
350 	pasid_set_bits(&pe->val[0], 1 << 9, 1 << 9);
351 }
352 
353 /*
354  * Disable second level A/D bits by clearing the SLADE (Second Level
355  * Access Dirty Enable) field (Bit 9) of a scalable mode PASID
356  * entry.
357  */
358 static inline void pasid_clear_ssade(struct pasid_entry *pe)
359 {
360 	pasid_set_bits(&pe->val[0], 1 << 9, 0);
361 }
362 
363 /*
364  * Checks if second level A/D bits specifically the SLADE (Second Level
365  * Access Dirty Enable) field (Bit 9) of a scalable mode PASID
366  * entry is set.
367  */
368 static inline bool pasid_get_ssade(struct pasid_entry *pe)
369 {
370 	return pasid_get_bits(&pe->val[0]) & (1 << 9);
371 }
372 
373 /*
374  * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a
375  * scalable mode PASID entry.
376  */
377 static inline void pasid_set_sre(struct pasid_entry *pe)
378 {
379 	pasid_set_bits(&pe->val[2], 1 << 0, 1);
380 }
381 
382 /*
383  * Setup the WPE(Write Protect Enable) field (Bit 132) of a
384  * scalable mode PASID entry.
385  */
386 static inline void pasid_set_wpe(struct pasid_entry *pe)
387 {
388 	pasid_set_bits(&pe->val[2], 1 << 4, 1 << 4);
389 }
390 
391 /*
392  * Setup the P(Present) field (Bit 0) of a scalable mode PASID
393  * entry.
394  */
395 static inline void pasid_set_present(struct pasid_entry *pe)
396 {
397 	pasid_set_bits(&pe->val[0], 1 << 0, 1);
398 }
399 
400 /*
401  * Setup Page Walk Snoop bit (Bit 87) of a scalable mode PASID
402  * entry.
403  */
404 static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value)
405 {
406 	pasid_set_bits(&pe->val[1], 1 << 23, value << 23);
407 }
408 
409 /*
410  * Setup No Execute Enable bit (Bit 133) of a scalable mode PASID
411  * entry. It is required when XD bit of the first level page table
412  * entry is about to be set.
413  */
414 static inline void pasid_set_nxe(struct pasid_entry *pe)
415 {
416 	pasid_set_bits(&pe->val[2], 1 << 5, 1 << 5);
417 }
418 
419 /*
420  * Setup the Page Snoop (PGSNP) field (Bit 88) of a scalable mode
421  * PASID entry.
422  */
423 static inline void
424 pasid_set_pgsnp(struct pasid_entry *pe)
425 {
426 	pasid_set_bits(&pe->val[1], 1ULL << 24, 1ULL << 24);
427 }
428 
429 /*
430  * Setup the First Level Page table Pointer field (Bit 140~191)
431  * of a scalable mode PASID entry.
432  */
433 static inline void
434 pasid_set_flptr(struct pasid_entry *pe, u64 value)
435 {
436 	pasid_set_bits(&pe->val[2], VTD_PAGE_MASK, value);
437 }
438 
439 /*
440  * Setup the First Level Paging Mode field (Bit 130~131) of a
441  * scalable mode PASID entry.
442  */
443 static inline void
444 pasid_set_flpm(struct pasid_entry *pe, u64 value)
445 {
446 	pasid_set_bits(&pe->val[2], GENMASK_ULL(3, 2), value << 2);
447 }
448 
449 /*
450  * Setup the Extended Access Flag Enable (EAFE) field (Bit 135)
451  * of a scalable mode PASID entry.
452  */
453 static inline void pasid_set_eafe(struct pasid_entry *pe)
454 {
455 	pasid_set_bits(&pe->val[2], 1 << 7, 1 << 7);
456 }
457 
458 static void
459 pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu,
460 				    u16 did, u32 pasid)
461 {
462 	struct qi_desc desc;
463 
464 	desc.qw0 = QI_PC_DID(did) | QI_PC_GRAN(QI_PC_PASID_SEL) |
465 		QI_PC_PASID(pasid) | QI_PC_TYPE;
466 	desc.qw1 = 0;
467 	desc.qw2 = 0;
468 	desc.qw3 = 0;
469 
470 	qi_submit_sync(iommu, &desc, 1, 0);
471 }
472 
473 static void
474 devtlb_invalidation_with_pasid(struct intel_iommu *iommu,
475 			       struct device *dev, u32 pasid)
476 {
477 	struct device_domain_info *info;
478 	u16 sid, qdep, pfsid;
479 
480 	info = dev_iommu_priv_get(dev);
481 	if (!info || !info->ats_enabled)
482 		return;
483 
484 	sid = info->bus << 8 | info->devfn;
485 	qdep = info->ats_qdep;
486 	pfsid = info->pfsid;
487 
488 	/*
489 	 * When PASID 0 is used, it indicates RID2PASID(DMA request w/o PASID),
490 	 * devTLB flush w/o PASID should be used. For non-zero PASID under
491 	 * SVA usage, device could do DMA with multiple PASIDs. It is more
492 	 * efficient to flush devTLB specific to the PASID.
493 	 */
494 	if (pasid == IOMMU_NO_PASID)
495 		qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT);
496 	else
497 		qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, pasid, qdep, 0, 64 - VTD_PAGE_SHIFT);
498 }
499 
500 void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
501 				 u32 pasid, bool fault_ignore)
502 {
503 	struct pasid_entry *pte;
504 	u16 did, pgtt;
505 
506 	spin_lock(&iommu->lock);
507 	pte = intel_pasid_get_entry(dev, pasid);
508 	if (WARN_ON(!pte) || !pasid_pte_is_present(pte)) {
509 		spin_unlock(&iommu->lock);
510 		return;
511 	}
512 
513 	did = pasid_get_domain_id(pte);
514 	pgtt = pasid_pte_get_pgtt(pte);
515 	intel_pasid_clear_entry(dev, pasid, fault_ignore);
516 	spin_unlock(&iommu->lock);
517 
518 	if (!ecap_coherent(iommu->ecap))
519 		clflush_cache_range(pte, sizeof(*pte));
520 
521 	pasid_cache_invalidation_with_pasid(iommu, did, pasid);
522 
523 	if (pgtt == PASID_ENTRY_PGTT_PT || pgtt == PASID_ENTRY_PGTT_FL_ONLY)
524 		qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
525 	else
526 		iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
527 
528 	/* Device IOTLB doesn't need to be flushed in caching mode. */
529 	if (!cap_caching_mode(iommu->cap))
530 		devtlb_invalidation_with_pasid(iommu, dev, pasid);
531 }
532 
533 /*
534  * This function flushes cache for a newly setup pasid table entry.
535  * Caller of it should not modify the in-use pasid table entries.
536  */
537 static void pasid_flush_caches(struct intel_iommu *iommu,
538 				struct pasid_entry *pte,
539 			       u32 pasid, u16 did)
540 {
541 	if (!ecap_coherent(iommu->ecap))
542 		clflush_cache_range(pte, sizeof(*pte));
543 
544 	if (cap_caching_mode(iommu->cap)) {
545 		pasid_cache_invalidation_with_pasid(iommu, did, pasid);
546 		qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
547 	} else {
548 		iommu_flush_write_buffer(iommu);
549 	}
550 }
551 
552 /*
553  * Set up the scalable mode pasid table entry for first only
554  * translation type.
555  */
556 int intel_pasid_setup_first_level(struct intel_iommu *iommu,
557 				  struct device *dev, pgd_t *pgd,
558 				  u32 pasid, u16 did, int flags)
559 {
560 	struct pasid_entry *pte;
561 
562 	if (!ecap_flts(iommu->ecap)) {
563 		pr_err("No first level translation support on %s\n",
564 		       iommu->name);
565 		return -EINVAL;
566 	}
567 
568 	if ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)) {
569 		pr_err("No 5-level paging support for first-level on %s\n",
570 		       iommu->name);
571 		return -EINVAL;
572 	}
573 
574 	spin_lock(&iommu->lock);
575 	pte = intel_pasid_get_entry(dev, pasid);
576 	if (!pte) {
577 		spin_unlock(&iommu->lock);
578 		return -ENODEV;
579 	}
580 
581 	if (pasid_pte_is_present(pte)) {
582 		spin_unlock(&iommu->lock);
583 		return -EBUSY;
584 	}
585 
586 	pasid_clear_entry(pte);
587 
588 	/* Setup the first level page table pointer: */
589 	pasid_set_flptr(pte, (u64)__pa(pgd));
590 
591 	if (flags & PASID_FLAG_FL5LP)
592 		pasid_set_flpm(pte, 1);
593 
594 	if (flags & PASID_FLAG_PAGE_SNOOP)
595 		pasid_set_pgsnp(pte);
596 
597 	pasid_set_domain_id(pte, did);
598 	pasid_set_address_width(pte, iommu->agaw);
599 	pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
600 	pasid_set_nxe(pte);
601 
602 	/* Setup Present and PASID Granular Transfer Type: */
603 	pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY);
604 	pasid_set_present(pte);
605 	spin_unlock(&iommu->lock);
606 
607 	pasid_flush_caches(iommu, pte, pasid, did);
608 
609 	return 0;
610 }
611 
612 /*
613  * Skip top levels of page tables for iommu which has less agaw
614  * than default. Unnecessary for PT mode.
615  */
616 static inline int iommu_skip_agaw(struct dmar_domain *domain,
617 				  struct intel_iommu *iommu,
618 				  struct dma_pte **pgd)
619 {
620 	int agaw;
621 
622 	for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
623 		*pgd = phys_to_virt(dma_pte_addr(*pgd));
624 		if (!dma_pte_present(*pgd))
625 			return -EINVAL;
626 	}
627 
628 	return agaw;
629 }
630 
631 /*
632  * Set up the scalable mode pasid entry for second only translation type.
633  */
634 int intel_pasid_setup_second_level(struct intel_iommu *iommu,
635 				   struct dmar_domain *domain,
636 				   struct device *dev, u32 pasid)
637 {
638 	struct pasid_entry *pte;
639 	struct dma_pte *pgd;
640 	u64 pgd_val;
641 	int agaw;
642 	u16 did;
643 
644 	/*
645 	 * If hardware advertises no support for second level
646 	 * translation, return directly.
647 	 */
648 	if (!ecap_slts(iommu->ecap)) {
649 		pr_err("No second level translation support on %s\n",
650 		       iommu->name);
651 		return -EINVAL;
652 	}
653 
654 	pgd = domain->pgd;
655 	agaw = iommu_skip_agaw(domain, iommu, &pgd);
656 	if (agaw < 0) {
657 		dev_err(dev, "Invalid domain page table\n");
658 		return -EINVAL;
659 	}
660 
661 	pgd_val = virt_to_phys(pgd);
662 	did = domain_id_iommu(domain, iommu);
663 
664 	spin_lock(&iommu->lock);
665 	pte = intel_pasid_get_entry(dev, pasid);
666 	if (!pte) {
667 		spin_unlock(&iommu->lock);
668 		return -ENODEV;
669 	}
670 
671 	if (pasid_pte_is_present(pte)) {
672 		spin_unlock(&iommu->lock);
673 		return -EBUSY;
674 	}
675 
676 	pasid_clear_entry(pte);
677 	pasid_set_domain_id(pte, did);
678 	pasid_set_slptr(pte, pgd_val);
679 	pasid_set_address_width(pte, agaw);
680 	pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY);
681 	pasid_set_fault_enable(pte);
682 	pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
683 	if (domain->dirty_tracking)
684 		pasid_set_ssade(pte);
685 
686 	pasid_set_present(pte);
687 	spin_unlock(&iommu->lock);
688 
689 	pasid_flush_caches(iommu, pte, pasid, did);
690 
691 	return 0;
692 }
693 
694 /*
695  * Set up dirty tracking on a second only or nested translation type.
696  */
697 int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu,
698 				     struct dmar_domain *domain,
699 				     struct device *dev, u32 pasid,
700 				     bool enabled)
701 {
702 	struct pasid_entry *pte;
703 	u16 did, pgtt;
704 
705 	spin_lock(&iommu->lock);
706 
707 	pte = intel_pasid_get_entry(dev, pasid);
708 	if (!pte) {
709 		spin_unlock(&iommu->lock);
710 		dev_err_ratelimited(
711 			dev, "Failed to get pasid entry of PASID %d\n", pasid);
712 		return -ENODEV;
713 	}
714 
715 	did = domain_id_iommu(domain, iommu);
716 	pgtt = pasid_pte_get_pgtt(pte);
717 	if (pgtt != PASID_ENTRY_PGTT_SL_ONLY &&
718 	    pgtt != PASID_ENTRY_PGTT_NESTED) {
719 		spin_unlock(&iommu->lock);
720 		dev_err_ratelimited(
721 			dev,
722 			"Dirty tracking not supported on translation type %d\n",
723 			pgtt);
724 		return -EOPNOTSUPP;
725 	}
726 
727 	if (pasid_get_ssade(pte) == enabled) {
728 		spin_unlock(&iommu->lock);
729 		return 0;
730 	}
731 
732 	if (enabled)
733 		pasid_set_ssade(pte);
734 	else
735 		pasid_clear_ssade(pte);
736 	spin_unlock(&iommu->lock);
737 
738 	if (!ecap_coherent(iommu->ecap))
739 		clflush_cache_range(pte, sizeof(*pte));
740 
741 	/*
742 	 * From VT-d spec table 25 "Guidance to Software for Invalidations":
743 	 *
744 	 * - PASID-selective-within-Domain PASID-cache invalidation
745 	 *   If (PGTT=SS or Nested)
746 	 *    - Domain-selective IOTLB invalidation
747 	 *   Else
748 	 *    - PASID-selective PASID-based IOTLB invalidation
749 	 * - If (pasid is RID_PASID)
750 	 *    - Global Device-TLB invalidation to affected functions
751 	 *   Else
752 	 *    - PASID-based Device-TLB invalidation (with S=1 and
753 	 *      Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions
754 	 */
755 	pasid_cache_invalidation_with_pasid(iommu, did, pasid);
756 
757 	iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
758 
759 	/* Device IOTLB doesn't need to be flushed in caching mode. */
760 	if (!cap_caching_mode(iommu->cap))
761 		devtlb_invalidation_with_pasid(iommu, dev, pasid);
762 
763 	return 0;
764 }
765 
766 /*
767  * Set up the scalable mode pasid entry for passthrough translation type.
768  */
769 int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
770 				   struct dmar_domain *domain,
771 				   struct device *dev, u32 pasid)
772 {
773 	u16 did = FLPT_DEFAULT_DID;
774 	struct pasid_entry *pte;
775 
776 	spin_lock(&iommu->lock);
777 	pte = intel_pasid_get_entry(dev, pasid);
778 	if (!pte) {
779 		spin_unlock(&iommu->lock);
780 		return -ENODEV;
781 	}
782 
783 	if (pasid_pte_is_present(pte)) {
784 		spin_unlock(&iommu->lock);
785 		return -EBUSY;
786 	}
787 
788 	pasid_clear_entry(pte);
789 	pasid_set_domain_id(pte, did);
790 	pasid_set_address_width(pte, iommu->agaw);
791 	pasid_set_translation_type(pte, PASID_ENTRY_PGTT_PT);
792 	pasid_set_fault_enable(pte);
793 	pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
794 	pasid_set_present(pte);
795 	spin_unlock(&iommu->lock);
796 
797 	pasid_flush_caches(iommu, pte, pasid, did);
798 
799 	return 0;
800 }
801 
802 /*
803  * Set the page snoop control for a pasid entry which has been set up.
804  */
805 void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu,
806 					  struct device *dev, u32 pasid)
807 {
808 	struct pasid_entry *pte;
809 	u16 did;
810 
811 	spin_lock(&iommu->lock);
812 	pte = intel_pasid_get_entry(dev, pasid);
813 	if (WARN_ON(!pte || !pasid_pte_is_present(pte))) {
814 		spin_unlock(&iommu->lock);
815 		return;
816 	}
817 
818 	pasid_set_pgsnp(pte);
819 	did = pasid_get_domain_id(pte);
820 	spin_unlock(&iommu->lock);
821 
822 	if (!ecap_coherent(iommu->ecap))
823 		clflush_cache_range(pte, sizeof(*pte));
824 
825 	/*
826 	 * VT-d spec 3.4 table23 states guides for cache invalidation:
827 	 *
828 	 * - PASID-selective-within-Domain PASID-cache invalidation
829 	 * - PASID-selective PASID-based IOTLB invalidation
830 	 * - If (pasid is RID_PASID)
831 	 *    - Global Device-TLB invalidation to affected functions
832 	 *   Else
833 	 *    - PASID-based Device-TLB invalidation (with S=1 and
834 	 *      Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions
835 	 */
836 	pasid_cache_invalidation_with_pasid(iommu, did, pasid);
837 	qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
838 
839 	/* Device IOTLB doesn't need to be flushed in caching mode. */
840 	if (!cap_caching_mode(iommu->cap))
841 		devtlb_invalidation_with_pasid(iommu, dev, pasid);
842 }
843 
844 /**
845  * intel_pasid_setup_nested() - Set up PASID entry for nested translation.
846  * @iommu:      IOMMU which the device belong to
847  * @dev:        Device to be set up for translation
848  * @pasid:      PASID to be programmed in the device PASID table
849  * @domain:     User stage-1 domain nested on a stage-2 domain
850  *
851  * This is used for nested translation. The input domain should be
852  * nested type and nested on a parent with 'is_nested_parent' flag
853  * set.
854  */
855 int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
856 			     u32 pasid, struct dmar_domain *domain)
857 {
858 	struct iommu_hwpt_vtd_s1 *s1_cfg = &domain->s1_cfg;
859 	pgd_t *s1_gpgd = (pgd_t *)(uintptr_t)domain->s1_pgtbl;
860 	struct dmar_domain *s2_domain = domain->s2_domain;
861 	u16 did = domain_id_iommu(domain, iommu);
862 	struct dma_pte *pgd = s2_domain->pgd;
863 	struct pasid_entry *pte;
864 
865 	/* Address width should match the address width supported by hardware */
866 	switch (s1_cfg->addr_width) {
867 	case ADDR_WIDTH_4LEVEL:
868 		break;
869 	case ADDR_WIDTH_5LEVEL:
870 		if (!cap_fl5lp_support(iommu->cap)) {
871 			dev_err_ratelimited(dev,
872 					    "5-level paging not supported\n");
873 			return -EINVAL;
874 		}
875 		break;
876 	default:
877 		dev_err_ratelimited(dev, "Invalid stage-1 address width %d\n",
878 				    s1_cfg->addr_width);
879 		return -EINVAL;
880 	}
881 
882 	if ((s1_cfg->flags & IOMMU_VTD_S1_SRE) && !ecap_srs(iommu->ecap)) {
883 		pr_err_ratelimited("No supervisor request support on %s\n",
884 				   iommu->name);
885 		return -EINVAL;
886 	}
887 
888 	if ((s1_cfg->flags & IOMMU_VTD_S1_EAFE) && !ecap_eafs(iommu->ecap)) {
889 		pr_err_ratelimited("No extended access flag support on %s\n",
890 				   iommu->name);
891 		return -EINVAL;
892 	}
893 
894 	spin_lock(&iommu->lock);
895 	pte = intel_pasid_get_entry(dev, pasid);
896 	if (!pte) {
897 		spin_unlock(&iommu->lock);
898 		return -ENODEV;
899 	}
900 	if (pasid_pte_is_present(pte)) {
901 		spin_unlock(&iommu->lock);
902 		return -EBUSY;
903 	}
904 
905 	pasid_clear_entry(pte);
906 
907 	if (s1_cfg->addr_width == ADDR_WIDTH_5LEVEL)
908 		pasid_set_flpm(pte, 1);
909 
910 	pasid_set_flptr(pte, (uintptr_t)s1_gpgd);
911 
912 	if (s1_cfg->flags & IOMMU_VTD_S1_SRE) {
913 		pasid_set_sre(pte);
914 		if (s1_cfg->flags & IOMMU_VTD_S1_WPE)
915 			pasid_set_wpe(pte);
916 	}
917 
918 	if (s1_cfg->flags & IOMMU_VTD_S1_EAFE)
919 		pasid_set_eafe(pte);
920 
921 	if (s2_domain->force_snooping)
922 		pasid_set_pgsnp(pte);
923 
924 	pasid_set_slptr(pte, virt_to_phys(pgd));
925 	pasid_set_fault_enable(pte);
926 	pasid_set_domain_id(pte, did);
927 	pasid_set_address_width(pte, s2_domain->agaw);
928 	pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
929 	pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED);
930 	pasid_set_present(pte);
931 	spin_unlock(&iommu->lock);
932 
933 	pasid_flush_caches(iommu, pte, pasid, did);
934 
935 	return 0;
936 }
937