1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * intel-pasid.c - PASID idr, table and entry manipulation
4 *
5 * Copyright (C) 2018 Intel Corporation
6 *
7 * Author: Lu Baolu <baolu.lu@linux.intel.com>
8 */
9
10 #define pr_fmt(fmt) "DMAR: " fmt
11
12 #include <linux/bitops.h>
13 #include <linux/cpufeature.h>
14 #include <linux/dmar.h>
15 #include <linux/iommu.h>
16 #include <linux/memory.h>
17 #include <linux/pci.h>
18 #include <linux/pci-ats.h>
19 #include <linux/spinlock.h>
20
21 #include "iommu.h"
22 #include "pasid.h"
23 #include "../iommu-pages.h"
24
25 /*
26 * Intel IOMMU system wide PASID name space:
27 */
28 u32 intel_pasid_max_id = PASID_MAX;
29
30 /*
31 * Per device pasid table management:
32 */
33
34 /*
35 * Allocate a pasid table for @dev. It should be called in a
36 * single-thread context.
37 */
intel_pasid_alloc_table(struct device * dev)38 int intel_pasid_alloc_table(struct device *dev)
39 {
40 struct device_domain_info *info;
41 struct pasid_table *pasid_table;
42 struct pasid_dir_entry *dir;
43 u32 max_pasid = 0;
44 int order, size;
45
46 might_sleep();
47 info = dev_iommu_priv_get(dev);
48 if (WARN_ON(!info || !dev_is_pci(dev)))
49 return -ENODEV;
50 if (WARN_ON(info->pasid_table))
51 return -EEXIST;
52
53 pasid_table = kzalloc_obj(*pasid_table);
54 if (!pasid_table)
55 return -ENOMEM;
56
57 if (info->pasid_supported)
58 max_pasid = min_t(u32, pci_max_pasids(to_pci_dev(dev)),
59 intel_pasid_max_id);
60
61 size = max_pasid >> (PASID_PDE_SHIFT - 3);
62 order = size ? get_order(size) : 0;
63 dir = iommu_alloc_pages_node_sz(info->iommu->node, GFP_KERNEL,
64 1 << (order + PAGE_SHIFT));
65 if (!dir) {
66 kfree(pasid_table);
67 return -ENOMEM;
68 }
69
70 pasid_table->table = dir;
71 pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3);
72 info->pasid_table = pasid_table;
73
74 if (!ecap_coherent(info->iommu->ecap))
75 clflush_cache_range(pasid_table->table, (1 << order) * PAGE_SIZE);
76
77 return 0;
78 }
79
intel_pasid_free_table(struct device * dev)80 void intel_pasid_free_table(struct device *dev)
81 {
82 struct device_domain_info *info;
83 struct pasid_table *pasid_table;
84 struct pasid_dir_entry *dir;
85 struct pasid_entry *table;
86 int i, max_pde;
87
88 info = dev_iommu_priv_get(dev);
89 if (!info || !dev_is_pci(dev) || !info->pasid_table)
90 return;
91
92 pasid_table = info->pasid_table;
93 info->pasid_table = NULL;
94
95 /* Free scalable mode PASID directory tables: */
96 dir = pasid_table->table;
97 max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT;
98 for (i = 0; i < max_pde; i++) {
99 table = get_pasid_table_from_pde(&dir[i]);
100 iommu_free_pages(table);
101 }
102
103 iommu_free_pages(pasid_table->table);
104 kfree(pasid_table);
105 }
106
intel_pasid_get_table(struct device * dev)107 struct pasid_table *intel_pasid_get_table(struct device *dev)
108 {
109 struct device_domain_info *info;
110
111 info = dev_iommu_priv_get(dev);
112 if (!info)
113 return NULL;
114
115 return info->pasid_table;
116 }
117
intel_pasid_get_dev_max_id(struct device * dev)118 static int intel_pasid_get_dev_max_id(struct device *dev)
119 {
120 struct device_domain_info *info;
121
122 info = dev_iommu_priv_get(dev);
123 if (!info || !info->pasid_table)
124 return 0;
125
126 return info->pasid_table->max_pasid;
127 }
128
intel_pasid_get_entry(struct device * dev,u32 pasid)129 static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
130 {
131 struct device_domain_info *info;
132 struct pasid_table *pasid_table;
133 struct pasid_dir_entry *dir;
134 struct pasid_entry *entries;
135 int dir_index, index;
136
137 pasid_table = intel_pasid_get_table(dev);
138 if (WARN_ON(!pasid_table || pasid >= intel_pasid_get_dev_max_id(dev)))
139 return NULL;
140
141 dir = pasid_table->table;
142 info = dev_iommu_priv_get(dev);
143 dir_index = pasid >> PASID_PDE_SHIFT;
144 index = pasid & PASID_PTE_MASK;
145
146 retry:
147 entries = get_pasid_table_from_pde(&dir[dir_index]);
148 if (!entries) {
149 u64 tmp;
150
151 entries = iommu_alloc_pages_node_sz(info->iommu->node,
152 GFP_ATOMIC, SZ_4K);
153 if (!entries)
154 return NULL;
155
156 if (!ecap_coherent(info->iommu->ecap))
157 clflush_cache_range(entries, VTD_PAGE_SIZE);
158
159 /*
160 * The pasid directory table entry won't be freed after
161 * allocation. No worry about the race with free and
162 * clear. However, this entry might be populated by others
163 * while we are preparing it. Use theirs with a retry.
164 */
165 tmp = 0ULL;
166 if (!try_cmpxchg64(&dir[dir_index].val, &tmp,
167 (u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) {
168 iommu_free_pages(entries);
169 goto retry;
170 }
171 if (!ecap_coherent(info->iommu->ecap))
172 clflush_cache_range(&dir[dir_index].val, sizeof(*dir));
173 }
174
175 return &entries[index];
176 }
177
178 /*
179 * Interfaces for PASID table entry manipulation:
180 */
181 static void
intel_pasid_clear_entry(struct device * dev,u32 pasid,bool fault_ignore)182 intel_pasid_clear_entry(struct device *dev, u32 pasid, bool fault_ignore)
183 {
184 struct pasid_entry *pe;
185
186 pe = intel_pasid_get_entry(dev, pasid);
187 if (WARN_ON(!pe))
188 return;
189
190 if (fault_ignore && pasid_pte_is_present(pe))
191 pasid_clear_entry_with_fpd(pe);
192 else
193 pasid_clear_entry(pe);
194 }
195
196 static void
pasid_cache_invalidation_with_pasid(struct intel_iommu * iommu,u16 did,u32 pasid)197 pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu,
198 u16 did, u32 pasid)
199 {
200 struct qi_desc desc;
201
202 desc.qw0 = QI_PC_DID(did) | QI_PC_GRAN(QI_PC_PASID_SEL) |
203 QI_PC_PASID(pasid) | QI_PC_TYPE;
204 desc.qw1 = 0;
205 desc.qw2 = 0;
206 desc.qw3 = 0;
207
208 qi_submit_sync(iommu, &desc, 1, 0);
209 }
210
211 static void
devtlb_invalidation_with_pasid(struct intel_iommu * iommu,struct device * dev,u32 pasid)212 devtlb_invalidation_with_pasid(struct intel_iommu *iommu,
213 struct device *dev, u32 pasid)
214 {
215 struct device_domain_info *info;
216 u16 sid, qdep, pfsid;
217
218 info = dev_iommu_priv_get(dev);
219 if (!info || !info->ats_enabled)
220 return;
221
222 if (!pci_device_is_present(to_pci_dev(dev)))
223 return;
224
225 sid = PCI_DEVID(info->bus, info->devfn);
226 qdep = info->ats_qdep;
227 pfsid = info->pfsid;
228
229 /*
230 * When PASID 0 is used, it indicates RID2PASID(DMA request w/o PASID),
231 * devTLB flush w/o PASID should be used. For non-zero PASID under
232 * SVA usage, device could do DMA with multiple PASIDs. It is more
233 * efficient to flush devTLB specific to the PASID.
234 */
235 if (pasid == IOMMU_NO_PASID)
236 qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT);
237 else
238 qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, pasid, qdep, 0, 64 - VTD_PAGE_SHIFT);
239 }
240
intel_pasid_tear_down_entry(struct intel_iommu * iommu,struct device * dev,u32 pasid,bool fault_ignore)241 void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
242 u32 pasid, bool fault_ignore)
243 {
244 struct pasid_entry *pte;
245 u16 did, pgtt;
246
247 spin_lock(&iommu->lock);
248 pte = intel_pasid_get_entry(dev, pasid);
249 if (WARN_ON(!pte)) {
250 spin_unlock(&iommu->lock);
251 return;
252 }
253
254 if (!pasid_pte_is_present(pte)) {
255 if (!pasid_pte_is_fault_disabled(pte)) {
256 WARN_ON(READ_ONCE(pte->val[0]) != 0);
257 spin_unlock(&iommu->lock);
258 return;
259 }
260
261 /*
262 * When a PASID is used for SVA by a device, it's possible
263 * that the pasid entry is non-present with the Fault
264 * Processing Disabled bit set. Clear the pasid entry and
265 * drain the PRQ for the PASID before return.
266 */
267 pasid_clear_entry(pte);
268 spin_unlock(&iommu->lock);
269 intel_iommu_drain_pasid_prq(dev, pasid);
270
271 return;
272 }
273
274 did = pasid_get_domain_id(pte);
275 pgtt = pasid_pte_get_pgtt(pte);
276 pasid_clear_present(pte);
277 spin_unlock(&iommu->lock);
278
279 if (!ecap_coherent(iommu->ecap))
280 clflush_cache_range(pte, sizeof(*pte));
281
282 pasid_cache_invalidation_with_pasid(iommu, did, pasid);
283
284 if (pgtt == PASID_ENTRY_PGTT_PT || pgtt == PASID_ENTRY_PGTT_FL_ONLY)
285 qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
286 else
287 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
288
289 devtlb_invalidation_with_pasid(iommu, dev, pasid);
290 intel_pasid_clear_entry(dev, pasid, fault_ignore);
291 if (!ecap_coherent(iommu->ecap))
292 clflush_cache_range(pte, sizeof(*pte));
293
294 if (!fault_ignore)
295 intel_iommu_drain_pasid_prq(dev, pasid);
296 }
297
298 /*
299 * This function flushes cache for a newly setup pasid table entry.
300 * Caller of it should not modify the in-use pasid table entries.
301 */
pasid_flush_caches(struct intel_iommu * iommu,struct pasid_entry * pte,u32 pasid,u16 did)302 static void pasid_flush_caches(struct intel_iommu *iommu,
303 struct pasid_entry *pte,
304 u32 pasid, u16 did)
305 {
306 if (!ecap_coherent(iommu->ecap))
307 clflush_cache_range(pte, sizeof(*pte));
308
309 if (cap_caching_mode(iommu->cap)) {
310 pasid_cache_invalidation_with_pasid(iommu, did, pasid);
311 qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
312 } else {
313 iommu_flush_write_buffer(iommu);
314 }
315 }
316
317 /*
318 * This function is supposed to be used after caller updates the fields
319 * except for the SSADE and P bit of a pasid table entry. It does the
320 * below:
321 * - Flush cacheline if needed
322 * - Flush the caches per Table 28 ”Guidance to Software for Invalidations“
323 * of VT-d spec 5.0.
324 */
intel_pasid_flush_present(struct intel_iommu * iommu,struct device * dev,u32 pasid,u16 did,struct pasid_entry * pte)325 static void intel_pasid_flush_present(struct intel_iommu *iommu,
326 struct device *dev,
327 u32 pasid, u16 did,
328 struct pasid_entry *pte)
329 {
330 if (!ecap_coherent(iommu->ecap))
331 clflush_cache_range(pte, sizeof(*pte));
332
333 /*
334 * VT-d spec 5.0 table28 states guides for cache invalidation:
335 *
336 * - PASID-selective-within-Domain PASID-cache invalidation
337 * - PASID-selective PASID-based IOTLB invalidation
338 * - If (pasid is RID_PASID)
339 * - Global Device-TLB invalidation to affected functions
340 * Else
341 * - PASID-based Device-TLB invalidation (with S=1 and
342 * Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions
343 */
344 pasid_cache_invalidation_with_pasid(iommu, did, pasid);
345 qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
346
347 devtlb_invalidation_with_pasid(iommu, dev, pasid);
348 }
349
350 /*
351 * Set up the scalable mode pasid table entry for first only
352 * translation type.
353 */
pasid_pte_config_first_level(struct intel_iommu * iommu,struct pasid_entry * pte,phys_addr_t fsptptr,u16 did,int flags)354 static void pasid_pte_config_first_level(struct intel_iommu *iommu,
355 struct pasid_entry *pte,
356 phys_addr_t fsptptr, u16 did,
357 int flags)
358 {
359 lockdep_assert_held(&iommu->lock);
360
361 pasid_clear_entry(pte);
362
363 /* Setup the first level page table pointer: */
364 pasid_set_flptr(pte, fsptptr);
365
366 if (flags & PASID_FLAG_FL5LP)
367 pasid_set_flpm(pte, 1);
368
369 if (flags & PASID_FLAG_PAGE_SNOOP)
370 pasid_set_pgsnp(pte);
371
372 pasid_set_domain_id(pte, did);
373 pasid_set_address_width(pte, iommu->agaw);
374 pasid_set_page_snoop(pte, flags & PASID_FLAG_PWSNP);
375
376 /* Setup Present and PASID Granular Transfer Type: */
377 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY);
378 pasid_set_present(pte);
379 }
380
intel_pasid_setup_first_level(struct intel_iommu * iommu,struct device * dev,phys_addr_t fsptptr,u32 pasid,u16 did,int flags)381 int intel_pasid_setup_first_level(struct intel_iommu *iommu, struct device *dev,
382 phys_addr_t fsptptr, u32 pasid, u16 did,
383 int flags)
384 {
385 struct pasid_entry *pte;
386
387 if (!ecap_flts(iommu->ecap)) {
388 pr_err("No first level translation support on %s\n",
389 iommu->name);
390 return -EINVAL;
391 }
392
393 if ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)) {
394 pr_err("No 5-level paging support for first-level on %s\n",
395 iommu->name);
396 return -EINVAL;
397 }
398
399 spin_lock(&iommu->lock);
400 pte = intel_pasid_get_entry(dev, pasid);
401 if (!pte) {
402 spin_unlock(&iommu->lock);
403 return -ENODEV;
404 }
405
406 if (pasid_pte_is_present(pte)) {
407 spin_unlock(&iommu->lock);
408 return -EBUSY;
409 }
410
411 pasid_pte_config_first_level(iommu, pte, fsptptr, did, flags);
412
413 spin_unlock(&iommu->lock);
414
415 pasid_flush_caches(iommu, pte, pasid, did);
416
417 return 0;
418 }
419
420 /*
421 * Set up the scalable mode pasid entry for second only translation type.
422 */
pasid_pte_config_second_level(struct intel_iommu * iommu,struct pasid_entry * pte,struct dmar_domain * domain,u16 did)423 static void pasid_pte_config_second_level(struct intel_iommu *iommu,
424 struct pasid_entry *pte,
425 struct dmar_domain *domain, u16 did)
426 {
427 struct pt_iommu_vtdss_hw_info pt_info;
428
429 lockdep_assert_held(&iommu->lock);
430
431 pt_iommu_vtdss_hw_info(&domain->sspt, &pt_info);
432 pasid_clear_entry(pte);
433 pasid_set_domain_id(pte, did);
434 pasid_set_slptr(pte, pt_info.ssptptr);
435 pasid_set_address_width(pte, pt_info.aw);
436 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY);
437 pasid_set_fault_enable(pte);
438 pasid_set_page_snoop(pte, !(domain->sspt.vtdss_pt.common.features &
439 BIT(PT_FEAT_DMA_INCOHERENT)));
440 if (domain->dirty_tracking)
441 pasid_set_ssade(pte);
442
443 pasid_set_present(pte);
444 }
445
intel_pasid_setup_second_level(struct intel_iommu * iommu,struct dmar_domain * domain,struct device * dev,u32 pasid)446 int intel_pasid_setup_second_level(struct intel_iommu *iommu,
447 struct dmar_domain *domain,
448 struct device *dev, u32 pasid)
449 {
450 struct pasid_entry *pte;
451 u16 did;
452
453
454 /*
455 * If hardware advertises no support for second level
456 * translation, return directly.
457 */
458 if (!ecap_slts(iommu->ecap)) {
459 pr_err("No second level translation support on %s\n",
460 iommu->name);
461 return -EINVAL;
462 }
463
464 did = domain_id_iommu(domain, iommu);
465
466 spin_lock(&iommu->lock);
467 pte = intel_pasid_get_entry(dev, pasid);
468 if (!pte) {
469 spin_unlock(&iommu->lock);
470 return -ENODEV;
471 }
472
473 if (pasid_pte_is_present(pte)) {
474 spin_unlock(&iommu->lock);
475 return -EBUSY;
476 }
477
478 pasid_pte_config_second_level(iommu, pte, domain, did);
479 spin_unlock(&iommu->lock);
480
481 pasid_flush_caches(iommu, pte, pasid, did);
482
483 return 0;
484 }
485
486 /*
487 * Set up dirty tracking on a second only or nested translation type.
488 */
intel_pasid_setup_dirty_tracking(struct intel_iommu * iommu,struct device * dev,u32 pasid,bool enabled)489 int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu,
490 struct device *dev, u32 pasid,
491 bool enabled)
492 {
493 struct pasid_entry *pte;
494 u16 did, pgtt;
495
496 spin_lock(&iommu->lock);
497
498 pte = intel_pasid_get_entry(dev, pasid);
499 if (!pte) {
500 spin_unlock(&iommu->lock);
501 dev_err_ratelimited(
502 dev, "Failed to get pasid entry of PASID %d\n", pasid);
503 return -ENODEV;
504 }
505
506 did = pasid_get_domain_id(pte);
507 pgtt = pasid_pte_get_pgtt(pte);
508 if (pgtt != PASID_ENTRY_PGTT_SL_ONLY &&
509 pgtt != PASID_ENTRY_PGTT_NESTED) {
510 spin_unlock(&iommu->lock);
511 dev_err_ratelimited(
512 dev,
513 "Dirty tracking not supported on translation type %d\n",
514 pgtt);
515 return -EOPNOTSUPP;
516 }
517
518 if (pasid_get_ssade(pte) == enabled) {
519 spin_unlock(&iommu->lock);
520 return 0;
521 }
522
523 if (enabled)
524 pasid_set_ssade(pte);
525 else
526 pasid_clear_ssade(pte);
527 spin_unlock(&iommu->lock);
528
529 if (!ecap_coherent(iommu->ecap))
530 clflush_cache_range(pte, sizeof(*pte));
531
532 /*
533 * From VT-d spec table 25 "Guidance to Software for Invalidations":
534 *
535 * - PASID-selective-within-Domain PASID-cache invalidation
536 * If (PGTT=SS or Nested)
537 * - Domain-selective IOTLB invalidation
538 * Else
539 * - PASID-selective PASID-based IOTLB invalidation
540 * - If (pasid is RID_PASID)
541 * - Global Device-TLB invalidation to affected functions
542 * Else
543 * - PASID-based Device-TLB invalidation (with S=1 and
544 * Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions
545 */
546 pasid_cache_invalidation_with_pasid(iommu, did, pasid);
547
548 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
549
550 devtlb_invalidation_with_pasid(iommu, dev, pasid);
551
552 return 0;
553 }
554
555 /*
556 * Set up the scalable mode pasid entry for passthrough translation type.
557 */
pasid_pte_config_pass_through(struct intel_iommu * iommu,struct pasid_entry * pte,u16 did)558 static void pasid_pte_config_pass_through(struct intel_iommu *iommu,
559 struct pasid_entry *pte, u16 did)
560 {
561 lockdep_assert_held(&iommu->lock);
562
563 pasid_clear_entry(pte);
564 pasid_set_domain_id(pte, did);
565 pasid_set_address_width(pte, iommu->agaw);
566 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_PT);
567 pasid_set_fault_enable(pte);
568 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
569 pasid_set_present(pte);
570 }
571
intel_pasid_setup_pass_through(struct intel_iommu * iommu,struct device * dev,u32 pasid)572 int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
573 struct device *dev, u32 pasid)
574 {
575 u16 did = FLPT_DEFAULT_DID;
576 struct pasid_entry *pte;
577
578 spin_lock(&iommu->lock);
579 pte = intel_pasid_get_entry(dev, pasid);
580 if (!pte) {
581 spin_unlock(&iommu->lock);
582 return -ENODEV;
583 }
584
585 if (pasid_pte_is_present(pte)) {
586 spin_unlock(&iommu->lock);
587 return -EBUSY;
588 }
589
590 pasid_pte_config_pass_through(iommu, pte, did);
591 spin_unlock(&iommu->lock);
592
593 pasid_flush_caches(iommu, pte, pasid, did);
594
595 return 0;
596 }
597
598 /*
599 * Set the page snoop control for a pasid entry which has been set up.
600 */
intel_pasid_setup_page_snoop_control(struct intel_iommu * iommu,struct device * dev,u32 pasid)601 void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu,
602 struct device *dev, u32 pasid)
603 {
604 struct pasid_entry *pte;
605 u16 did;
606
607 spin_lock(&iommu->lock);
608 pte = intel_pasid_get_entry(dev, pasid);
609 if (WARN_ON(!pte || !pasid_pte_is_present(pte))) {
610 spin_unlock(&iommu->lock);
611 return;
612 }
613
614 pasid_set_pgsnp(pte);
615 did = pasid_get_domain_id(pte);
616 spin_unlock(&iommu->lock);
617
618 intel_pasid_flush_present(iommu, dev, pasid, did, pte);
619 }
620
pasid_pte_config_nestd(struct intel_iommu * iommu,struct pasid_entry * pte,struct iommu_hwpt_vtd_s1 * s1_cfg,struct dmar_domain * s2_domain,u16 did)621 static void pasid_pte_config_nestd(struct intel_iommu *iommu,
622 struct pasid_entry *pte,
623 struct iommu_hwpt_vtd_s1 *s1_cfg,
624 struct dmar_domain *s2_domain,
625 u16 did)
626 {
627 struct pt_iommu_vtdss_hw_info pt_info;
628
629 lockdep_assert_held(&iommu->lock);
630
631 pt_iommu_vtdss_hw_info(&s2_domain->sspt, &pt_info);
632
633 pasid_clear_entry(pte);
634
635 if (s1_cfg->addr_width == ADDR_WIDTH_5LEVEL)
636 pasid_set_flpm(pte, 1);
637
638 pasid_set_flptr(pte, s1_cfg->pgtbl_addr);
639
640 if (s1_cfg->flags & IOMMU_VTD_S1_SRE) {
641 pasid_set_sre(pte);
642 if (s1_cfg->flags & IOMMU_VTD_S1_WPE)
643 pasid_set_wpe(pte);
644 }
645
646 if (s1_cfg->flags & IOMMU_VTD_S1_EAFE)
647 pasid_set_eafe(pte);
648
649 if (s2_domain->force_snooping)
650 pasid_set_pgsnp(pte);
651
652 pasid_set_slptr(pte, pt_info.ssptptr);
653 pasid_set_fault_enable(pte);
654 pasid_set_domain_id(pte, did);
655 pasid_set_address_width(pte, pt_info.aw);
656 pasid_set_page_snoop(pte, !(s2_domain->sspt.vtdss_pt.common.features &
657 BIT(PT_FEAT_DMA_INCOHERENT)));
658 if (s2_domain->dirty_tracking)
659 pasid_set_ssade(pte);
660 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED);
661 pasid_set_present(pte);
662 }
663
664 /**
665 * intel_pasid_setup_nested() - Set up PASID entry for nested translation.
666 * @iommu: IOMMU which the device belong to
667 * @dev: Device to be set up for translation
668 * @pasid: PASID to be programmed in the device PASID table
669 * @domain: User stage-1 domain nested on a stage-2 domain
670 *
671 * This is used for nested translation. The input domain should be
672 * nested type and nested on a parent with 'is_nested_parent' flag
673 * set.
674 */
intel_pasid_setup_nested(struct intel_iommu * iommu,struct device * dev,u32 pasid,struct dmar_domain * domain)675 int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
676 u32 pasid, struct dmar_domain *domain)
677 {
678 struct iommu_hwpt_vtd_s1 *s1_cfg = &domain->s1_cfg;
679 struct dmar_domain *s2_domain = domain->s2_domain;
680 u16 did = domain_id_iommu(domain, iommu);
681 struct pasid_entry *pte;
682
683 /* Address width should match the address width supported by hardware */
684 switch (s1_cfg->addr_width) {
685 case ADDR_WIDTH_4LEVEL:
686 break;
687 case ADDR_WIDTH_5LEVEL:
688 if (!cap_fl5lp_support(iommu->cap)) {
689 dev_err_ratelimited(dev,
690 "5-level paging not supported\n");
691 return -EINVAL;
692 }
693 break;
694 default:
695 dev_err_ratelimited(dev, "Invalid stage-1 address width %d\n",
696 s1_cfg->addr_width);
697 return -EINVAL;
698 }
699
700 if ((s1_cfg->flags & IOMMU_VTD_S1_SRE) && !ecap_srs(iommu->ecap)) {
701 pr_err_ratelimited("No supervisor request support on %s\n",
702 iommu->name);
703 return -EINVAL;
704 }
705
706 if ((s1_cfg->flags & IOMMU_VTD_S1_EAFE) && !ecap_eafs(iommu->ecap)) {
707 pr_err_ratelimited("No extended access flag support on %s\n",
708 iommu->name);
709 return -EINVAL;
710 }
711
712 spin_lock(&iommu->lock);
713 pte = intel_pasid_get_entry(dev, pasid);
714 if (!pte) {
715 spin_unlock(&iommu->lock);
716 return -ENODEV;
717 }
718 if (pasid_pte_is_present(pte)) {
719 spin_unlock(&iommu->lock);
720 return -EBUSY;
721 }
722
723 pasid_pte_config_nestd(iommu, pte, s1_cfg, s2_domain, did);
724 spin_unlock(&iommu->lock);
725
726 pasid_flush_caches(iommu, pte, pasid, did);
727
728 return 0;
729 }
730
731 /*
732 * Interfaces to setup or teardown a pasid table to the scalable-mode
733 * context table entry:
734 */
735
device_pasid_table_teardown(struct device * dev,u8 bus,u8 devfn)736 static void device_pasid_table_teardown(struct device *dev, u8 bus, u8 devfn)
737 {
738 struct device_domain_info *info = dev_iommu_priv_get(dev);
739 struct intel_iommu *iommu = info->iommu;
740 struct context_entry *context;
741 u16 did;
742
743 spin_lock(&iommu->lock);
744 context = iommu_context_addr(iommu, bus, devfn, false);
745 if (!context) {
746 spin_unlock(&iommu->lock);
747 return;
748 }
749
750 did = context_domain_id(context);
751 context_clear_entry(context);
752 __iommu_flush_cache(iommu, context, sizeof(*context));
753 spin_unlock(&iommu->lock);
754 intel_context_flush_no_pasid(info, context, did);
755 }
756
pci_pasid_table_teardown(struct pci_dev * pdev,u16 alias,void * data)757 static int pci_pasid_table_teardown(struct pci_dev *pdev, u16 alias, void *data)
758 {
759 struct device *dev = data;
760
761 if (dev == &pdev->dev)
762 device_pasid_table_teardown(dev, PCI_BUS_NUM(alias), alias & 0xff);
763
764 return 0;
765 }
766
intel_pasid_teardown_sm_context(struct device * dev)767 void intel_pasid_teardown_sm_context(struct device *dev)
768 {
769 struct device_domain_info *info = dev_iommu_priv_get(dev);
770
771 if (!dev_is_pci(dev)) {
772 device_pasid_table_teardown(dev, info->bus, info->devfn);
773 return;
774 }
775
776 pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_teardown, dev);
777 }
778
779 /*
780 * Get the PASID directory size for scalable mode context entry.
781 * Value of X in the PDTS field of a scalable mode context entry
782 * indicates PASID directory with 2^(X + 7) entries.
783 */
context_get_sm_pds(struct pasid_table * table)784 static unsigned long context_get_sm_pds(struct pasid_table *table)
785 {
786 unsigned long pds, max_pde;
787
788 max_pde = table->max_pasid >> PASID_PDE_SHIFT;
789 pds = find_first_bit(&max_pde, MAX_NR_PASID_BITS);
790 if (pds < 7)
791 return 0;
792
793 return pds - 7;
794 }
795
context_entry_set_pasid_table(struct context_entry * context,struct device * dev)796 static int context_entry_set_pasid_table(struct context_entry *context,
797 struct device *dev)
798 {
799 struct device_domain_info *info = dev_iommu_priv_get(dev);
800 struct pasid_table *table = info->pasid_table;
801 struct intel_iommu *iommu = info->iommu;
802 unsigned long pds;
803
804 context_clear_entry(context);
805
806 pds = context_get_sm_pds(table);
807 context->lo = (u64)virt_to_phys(table->table) | context_pdts(pds);
808 context_set_sm_rid2pasid(context, IOMMU_NO_PASID);
809
810 if (info->ats_supported)
811 context_set_sm_dte(context);
812 if (info->pasid_supported)
813 context_set_pasid(context);
814 if (info->pri_supported)
815 context_set_sm_pre(context);
816
817 context_set_fault_enable(context);
818 context_set_present(context);
819 __iommu_flush_cache(iommu, context, sizeof(*context));
820
821 return 0;
822 }
823
device_pasid_table_setup(struct device * dev,u8 bus,u8 devfn)824 static int device_pasid_table_setup(struct device *dev, u8 bus, u8 devfn)
825 {
826 struct device_domain_info *info = dev_iommu_priv_get(dev);
827 struct intel_iommu *iommu = info->iommu;
828 struct context_entry *context;
829
830 spin_lock(&iommu->lock);
831 context = iommu_context_addr(iommu, bus, devfn, true);
832 if (!context) {
833 spin_unlock(&iommu->lock);
834 return -ENOMEM;
835 }
836
837 if (context_present(context) && !context_copied(iommu, bus, devfn)) {
838 spin_unlock(&iommu->lock);
839 return 0;
840 }
841
842 if (context_copied(iommu, bus, devfn)) {
843 context_clear_present(context);
844 __iommu_flush_cache(iommu, context, sizeof(*context));
845
846 /*
847 * For kdump cases, old valid entries may be cached due to
848 * the in-flight DMA and copied pgtable, but there is no
849 * unmapping behaviour for them, thus we need explicit cache
850 * flushes for all affected domain IDs and PASIDs used in
851 * the copied PASID table. Given that we have no idea about
852 * which domain IDs and PASIDs were used in the copied tables,
853 * upgrade them to global PASID and IOTLB cache invalidation.
854 */
855 iommu->flush.flush_context(iommu, 0,
856 PCI_DEVID(bus, devfn),
857 DMA_CCMD_MASK_NOBIT,
858 DMA_CCMD_DEVICE_INVL);
859 qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
860 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
861 devtlb_invalidation_with_pasid(iommu, dev, IOMMU_NO_PASID);
862
863 context_clear_entry(context);
864 __iommu_flush_cache(iommu, context, sizeof(*context));
865
866 /*
867 * At this point, the device is supposed to finish reset at
868 * its driver probe stage, so no in-flight DMA will exist,
869 * and we don't need to worry anymore hereafter.
870 */
871 clear_context_copied(iommu, bus, devfn);
872 }
873
874 context_entry_set_pasid_table(context, dev);
875 spin_unlock(&iommu->lock);
876
877 /*
878 * It's a non-present to present mapping. If hardware doesn't cache
879 * non-present entry we don't need to flush the caches. If it does
880 * cache non-present entries, then it does so in the special
881 * domain #0, which we have to flush:
882 */
883 if (cap_caching_mode(iommu->cap)) {
884 iommu->flush.flush_context(iommu, 0,
885 PCI_DEVID(bus, devfn),
886 DMA_CCMD_MASK_NOBIT,
887 DMA_CCMD_DEVICE_INVL);
888 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH);
889 }
890
891 return 0;
892 }
893
pci_pasid_table_setup(struct pci_dev * pdev,u16 alias,void * data)894 static int pci_pasid_table_setup(struct pci_dev *pdev, u16 alias, void *data)
895 {
896 struct device *dev = data;
897
898 if (dev != &pdev->dev)
899 return 0;
900
901 return device_pasid_table_setup(dev, PCI_BUS_NUM(alias), alias & 0xff);
902 }
903
904 /*
905 * Set the device's PASID table to its context table entry.
906 *
907 * The PASID table is set to the context entries of both device itself
908 * and its alias requester ID for DMA.
909 */
intel_pasid_setup_sm_context(struct device * dev)910 int intel_pasid_setup_sm_context(struct device *dev)
911 {
912 struct device_domain_info *info = dev_iommu_priv_get(dev);
913
914 if (!dev_is_pci(dev))
915 return device_pasid_table_setup(dev, info->bus, info->devfn);
916
917 return pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_setup, dev);
918 }
919
920 /*
921 * Global Device-TLB invalidation following changes in a context entry which
922 * was present.
923 */
__context_flush_dev_iotlb(struct device_domain_info * info)924 static void __context_flush_dev_iotlb(struct device_domain_info *info)
925 {
926 if (!info->ats_enabled)
927 return;
928
929 /*
930 * Skip dev-IOTLB flush for inaccessible PCIe devices to prevent the
931 * Intel IOMMU from waiting indefinitely for an ATS invalidation that
932 * cannot complete.
933 */
934 if (!pci_device_is_present(to_pci_dev(info->dev)))
935 return;
936
937 qi_flush_dev_iotlb(info->iommu, PCI_DEVID(info->bus, info->devfn),
938 info->pfsid, info->ats_qdep, 0, MAX_AGAW_PFN_WIDTH);
939
940 /*
941 * There is no guarantee that the device DMA is stopped when it reaches
942 * here. Therefore, always attempt the extra device TLB invalidation
943 * quirk. The impact on performance is acceptable since this is not a
944 * performance-critical path.
945 */
946 quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH, IOMMU_NO_PASID,
947 info->ats_qdep);
948 }
949
950 /*
951 * Cache invalidations after change in a context table entry that was present
952 * according to the Spec 6.5.3.3 (Guidance to Software for Invalidations).
953 * This helper can only be used when IOMMU is working in the legacy mode or
954 * IOMMU is in scalable mode but all PASID table entries of the device are
955 * non-present.
956 */
intel_context_flush_no_pasid(struct device_domain_info * info,struct context_entry * context,u16 did)957 void intel_context_flush_no_pasid(struct device_domain_info *info,
958 struct context_entry *context, u16 did)
959 {
960 struct intel_iommu *iommu = info->iommu;
961
962 /*
963 * Device-selective context-cache invalidation. The Domain-ID field
964 * of the Context-cache Invalidate Descriptor is ignored by hardware
965 * when operating in scalable mode. Therefore the @did value doesn't
966 * matter in scalable mode.
967 */
968 iommu->flush.flush_context(iommu, did, PCI_DEVID(info->bus, info->devfn),
969 DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL);
970
971 /*
972 * For legacy mode:
973 * - Domain-selective IOTLB invalidation
974 * - Global Device-TLB invalidation to all affected functions
975 */
976 if (!sm_supported(iommu)) {
977 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
978 __context_flush_dev_iotlb(info);
979
980 return;
981 }
982
983 __context_flush_dev_iotlb(info);
984 }
985