1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * intel-pasid.c - PASID idr, table and entry manipulation
4 *
5 * Copyright (C) 2018 Intel Corporation
6 *
7 * Author: Lu Baolu <baolu.lu@linux.intel.com>
8 */
9
10 #define pr_fmt(fmt) "DMAR: " fmt
11
12 #include <linux/bitops.h>
13 #include <linux/cpufeature.h>
14 #include <linux/dmar.h>
15 #include <linux/iommu.h>
16 #include <linux/memory.h>
17 #include <linux/pci.h>
18 #include <linux/pci-ats.h>
19 #include <linux/spinlock.h>
20
21 #include "iommu.h"
22 #include "pasid.h"
23 #include "../iommu-pages.h"
24
25 /*
26 * Intel IOMMU system wide PASID name space:
27 */
28 u32 intel_pasid_max_id = PASID_MAX;
29
30 /*
31 * Per device pasid table management:
32 */
33
34 /*
35 * Allocate a pasid table for @dev. It should be called in a
36 * single-thread context.
37 */
intel_pasid_alloc_table(struct device * dev)38 int intel_pasid_alloc_table(struct device *dev)
39 {
40 struct device_domain_info *info;
41 struct pasid_table *pasid_table;
42 struct pasid_dir_entry *dir;
43 u32 max_pasid = 0;
44 int order, size;
45
46 might_sleep();
47 info = dev_iommu_priv_get(dev);
48 if (WARN_ON(!info || !dev_is_pci(dev)))
49 return -ENODEV;
50 if (WARN_ON(info->pasid_table))
51 return -EEXIST;
52
53 pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL);
54 if (!pasid_table)
55 return -ENOMEM;
56
57 if (info->pasid_supported)
58 max_pasid = min_t(u32, pci_max_pasids(to_pci_dev(dev)),
59 intel_pasid_max_id);
60
61 size = max_pasid >> (PASID_PDE_SHIFT - 3);
62 order = size ? get_order(size) : 0;
63 dir = iommu_alloc_pages_node(info->iommu->node, GFP_KERNEL, order);
64 if (!dir) {
65 kfree(pasid_table);
66 return -ENOMEM;
67 }
68
69 pasid_table->table = dir;
70 pasid_table->order = order;
71 pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3);
72 info->pasid_table = pasid_table;
73
74 if (!ecap_coherent(info->iommu->ecap))
75 clflush_cache_range(pasid_table->table, (1 << order) * PAGE_SIZE);
76
77 return 0;
78 }
79
intel_pasid_free_table(struct device * dev)80 void intel_pasid_free_table(struct device *dev)
81 {
82 struct device_domain_info *info;
83 struct pasid_table *pasid_table;
84 struct pasid_dir_entry *dir;
85 struct pasid_entry *table;
86 int i, max_pde;
87
88 info = dev_iommu_priv_get(dev);
89 if (!info || !dev_is_pci(dev) || !info->pasid_table)
90 return;
91
92 pasid_table = info->pasid_table;
93 info->pasid_table = NULL;
94
95 /* Free scalable mode PASID directory tables: */
96 dir = pasid_table->table;
97 max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT;
98 for (i = 0; i < max_pde; i++) {
99 table = get_pasid_table_from_pde(&dir[i]);
100 iommu_free_page(table);
101 }
102
103 iommu_free_pages(pasid_table->table, pasid_table->order);
104 kfree(pasid_table);
105 }
106
intel_pasid_get_table(struct device * dev)107 struct pasid_table *intel_pasid_get_table(struct device *dev)
108 {
109 struct device_domain_info *info;
110
111 info = dev_iommu_priv_get(dev);
112 if (!info)
113 return NULL;
114
115 return info->pasid_table;
116 }
117
intel_pasid_get_dev_max_id(struct device * dev)118 static int intel_pasid_get_dev_max_id(struct device *dev)
119 {
120 struct device_domain_info *info;
121
122 info = dev_iommu_priv_get(dev);
123 if (!info || !info->pasid_table)
124 return 0;
125
126 return info->pasid_table->max_pasid;
127 }
128
intel_pasid_get_entry(struct device * dev,u32 pasid)129 static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
130 {
131 struct device_domain_info *info;
132 struct pasid_table *pasid_table;
133 struct pasid_dir_entry *dir;
134 struct pasid_entry *entries;
135 int dir_index, index;
136
137 pasid_table = intel_pasid_get_table(dev);
138 if (WARN_ON(!pasid_table || pasid >= intel_pasid_get_dev_max_id(dev)))
139 return NULL;
140
141 dir = pasid_table->table;
142 info = dev_iommu_priv_get(dev);
143 dir_index = pasid >> PASID_PDE_SHIFT;
144 index = pasid & PASID_PTE_MASK;
145
146 retry:
147 entries = get_pasid_table_from_pde(&dir[dir_index]);
148 if (!entries) {
149 u64 tmp;
150
151 entries = iommu_alloc_page_node(info->iommu->node, GFP_ATOMIC);
152 if (!entries)
153 return NULL;
154
155 /*
156 * The pasid directory table entry won't be freed after
157 * allocation. No worry about the race with free and
158 * clear. However, this entry might be populated by others
159 * while we are preparing it. Use theirs with a retry.
160 */
161 tmp = 0ULL;
162 if (!try_cmpxchg64(&dir[dir_index].val, &tmp,
163 (u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) {
164 iommu_free_page(entries);
165 goto retry;
166 }
167 if (!ecap_coherent(info->iommu->ecap)) {
168 clflush_cache_range(entries, VTD_PAGE_SIZE);
169 clflush_cache_range(&dir[dir_index].val, sizeof(*dir));
170 }
171 }
172
173 return &entries[index];
174 }
175
176 /*
177 * Interfaces for PASID table entry manipulation:
178 */
179 static void
intel_pasid_clear_entry(struct device * dev,u32 pasid,bool fault_ignore)180 intel_pasid_clear_entry(struct device *dev, u32 pasid, bool fault_ignore)
181 {
182 struct pasid_entry *pe;
183
184 pe = intel_pasid_get_entry(dev, pasid);
185 if (WARN_ON(!pe))
186 return;
187
188 if (fault_ignore && pasid_pte_is_present(pe))
189 pasid_clear_entry_with_fpd(pe);
190 else
191 pasid_clear_entry(pe);
192 }
193
194 static void
pasid_cache_invalidation_with_pasid(struct intel_iommu * iommu,u16 did,u32 pasid)195 pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu,
196 u16 did, u32 pasid)
197 {
198 struct qi_desc desc;
199
200 desc.qw0 = QI_PC_DID(did) | QI_PC_GRAN(QI_PC_PASID_SEL) |
201 QI_PC_PASID(pasid) | QI_PC_TYPE;
202 desc.qw1 = 0;
203 desc.qw2 = 0;
204 desc.qw3 = 0;
205
206 qi_submit_sync(iommu, &desc, 1, 0);
207 }
208
209 static void
devtlb_invalidation_with_pasid(struct intel_iommu * iommu,struct device * dev,u32 pasid)210 devtlb_invalidation_with_pasid(struct intel_iommu *iommu,
211 struct device *dev, u32 pasid)
212 {
213 struct device_domain_info *info;
214 u16 sid, qdep, pfsid;
215
216 info = dev_iommu_priv_get(dev);
217 if (!info || !info->ats_enabled)
218 return;
219
220 if (pci_dev_is_disconnected(to_pci_dev(dev)))
221 return;
222
223 sid = info->bus << 8 | info->devfn;
224 qdep = info->ats_qdep;
225 pfsid = info->pfsid;
226
227 /*
228 * When PASID 0 is used, it indicates RID2PASID(DMA request w/o PASID),
229 * devTLB flush w/o PASID should be used. For non-zero PASID under
230 * SVA usage, device could do DMA with multiple PASIDs. It is more
231 * efficient to flush devTLB specific to the PASID.
232 */
233 if (pasid == IOMMU_NO_PASID)
234 qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT);
235 else
236 qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, pasid, qdep, 0, 64 - VTD_PAGE_SHIFT);
237 }
238
intel_pasid_tear_down_entry(struct intel_iommu * iommu,struct device * dev,u32 pasid,bool fault_ignore)239 void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
240 u32 pasid, bool fault_ignore)
241 {
242 struct pasid_entry *pte;
243 u16 did, pgtt;
244
245 spin_lock(&iommu->lock);
246 pte = intel_pasid_get_entry(dev, pasid);
247 if (WARN_ON(!pte) || !pasid_pte_is_present(pte)) {
248 spin_unlock(&iommu->lock);
249 return;
250 }
251
252 did = pasid_get_domain_id(pte);
253 pgtt = pasid_pte_get_pgtt(pte);
254 intel_pasid_clear_entry(dev, pasid, fault_ignore);
255 spin_unlock(&iommu->lock);
256
257 if (!ecap_coherent(iommu->ecap))
258 clflush_cache_range(pte, sizeof(*pte));
259
260 pasid_cache_invalidation_with_pasid(iommu, did, pasid);
261
262 if (pgtt == PASID_ENTRY_PGTT_PT || pgtt == PASID_ENTRY_PGTT_FL_ONLY)
263 qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
264 else
265 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
266
267 devtlb_invalidation_with_pasid(iommu, dev, pasid);
268 }
269
270 /*
271 * This function flushes cache for a newly setup pasid table entry.
272 * Caller of it should not modify the in-use pasid table entries.
273 */
pasid_flush_caches(struct intel_iommu * iommu,struct pasid_entry * pte,u32 pasid,u16 did)274 static void pasid_flush_caches(struct intel_iommu *iommu,
275 struct pasid_entry *pte,
276 u32 pasid, u16 did)
277 {
278 if (!ecap_coherent(iommu->ecap))
279 clflush_cache_range(pte, sizeof(*pte));
280
281 if (cap_caching_mode(iommu->cap)) {
282 pasid_cache_invalidation_with_pasid(iommu, did, pasid);
283 qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
284 } else {
285 iommu_flush_write_buffer(iommu);
286 }
287 }
288
289 /*
290 * Set up the scalable mode pasid table entry for first only
291 * translation type.
292 */
intel_pasid_setup_first_level(struct intel_iommu * iommu,struct device * dev,pgd_t * pgd,u32 pasid,u16 did,int flags)293 int intel_pasid_setup_first_level(struct intel_iommu *iommu,
294 struct device *dev, pgd_t *pgd,
295 u32 pasid, u16 did, int flags)
296 {
297 struct pasid_entry *pte;
298
299 if (!ecap_flts(iommu->ecap)) {
300 pr_err("No first level translation support on %s\n",
301 iommu->name);
302 return -EINVAL;
303 }
304
305 if ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)) {
306 pr_err("No 5-level paging support for first-level on %s\n",
307 iommu->name);
308 return -EINVAL;
309 }
310
311 spin_lock(&iommu->lock);
312 pte = intel_pasid_get_entry(dev, pasid);
313 if (!pte) {
314 spin_unlock(&iommu->lock);
315 return -ENODEV;
316 }
317
318 if (pasid_pte_is_present(pte)) {
319 spin_unlock(&iommu->lock);
320 return -EBUSY;
321 }
322
323 pasid_clear_entry(pte);
324
325 /* Setup the first level page table pointer: */
326 pasid_set_flptr(pte, (u64)__pa(pgd));
327
328 if (flags & PASID_FLAG_FL5LP)
329 pasid_set_flpm(pte, 1);
330
331 if (flags & PASID_FLAG_PAGE_SNOOP)
332 pasid_set_pgsnp(pte);
333
334 pasid_set_domain_id(pte, did);
335 pasid_set_address_width(pte, iommu->agaw);
336 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
337
338 /* Setup Present and PASID Granular Transfer Type: */
339 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY);
340 pasid_set_present(pte);
341 spin_unlock(&iommu->lock);
342
343 pasid_flush_caches(iommu, pte, pasid, did);
344
345 return 0;
346 }
347
348 /*
349 * Skip top levels of page tables for iommu which has less agaw
350 * than default. Unnecessary for PT mode.
351 */
iommu_skip_agaw(struct dmar_domain * domain,struct intel_iommu * iommu,struct dma_pte ** pgd)352 static int iommu_skip_agaw(struct dmar_domain *domain,
353 struct intel_iommu *iommu,
354 struct dma_pte **pgd)
355 {
356 int agaw;
357
358 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
359 *pgd = phys_to_virt(dma_pte_addr(*pgd));
360 if (!dma_pte_present(*pgd))
361 return -EINVAL;
362 }
363
364 return agaw;
365 }
366
367 /*
368 * Set up the scalable mode pasid entry for second only translation type.
369 */
intel_pasid_setup_second_level(struct intel_iommu * iommu,struct dmar_domain * domain,struct device * dev,u32 pasid)370 int intel_pasid_setup_second_level(struct intel_iommu *iommu,
371 struct dmar_domain *domain,
372 struct device *dev, u32 pasid)
373 {
374 struct pasid_entry *pte;
375 struct dma_pte *pgd;
376 u64 pgd_val;
377 int agaw;
378 u16 did;
379
380 /*
381 * If hardware advertises no support for second level
382 * translation, return directly.
383 */
384 if (!ecap_slts(iommu->ecap)) {
385 pr_err("No second level translation support on %s\n",
386 iommu->name);
387 return -EINVAL;
388 }
389
390 pgd = domain->pgd;
391 agaw = iommu_skip_agaw(domain, iommu, &pgd);
392 if (agaw < 0) {
393 dev_err(dev, "Invalid domain page table\n");
394 return -EINVAL;
395 }
396
397 pgd_val = virt_to_phys(pgd);
398 did = domain_id_iommu(domain, iommu);
399
400 spin_lock(&iommu->lock);
401 pte = intel_pasid_get_entry(dev, pasid);
402 if (!pte) {
403 spin_unlock(&iommu->lock);
404 return -ENODEV;
405 }
406
407 if (pasid_pte_is_present(pte)) {
408 spin_unlock(&iommu->lock);
409 return -EBUSY;
410 }
411
412 pasid_clear_entry(pte);
413 pasid_set_domain_id(pte, did);
414 pasid_set_slptr(pte, pgd_val);
415 pasid_set_address_width(pte, agaw);
416 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY);
417 pasid_set_fault_enable(pte);
418 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
419 if (domain->dirty_tracking)
420 pasid_set_ssade(pte);
421
422 pasid_set_present(pte);
423 spin_unlock(&iommu->lock);
424
425 pasid_flush_caches(iommu, pte, pasid, did);
426
427 return 0;
428 }
429
430 /*
431 * Set up dirty tracking on a second only or nested translation type.
432 */
intel_pasid_setup_dirty_tracking(struct intel_iommu * iommu,struct device * dev,u32 pasid,bool enabled)433 int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu,
434 struct device *dev, u32 pasid,
435 bool enabled)
436 {
437 struct pasid_entry *pte;
438 u16 did, pgtt;
439
440 spin_lock(&iommu->lock);
441
442 pte = intel_pasid_get_entry(dev, pasid);
443 if (!pte) {
444 spin_unlock(&iommu->lock);
445 dev_err_ratelimited(
446 dev, "Failed to get pasid entry of PASID %d\n", pasid);
447 return -ENODEV;
448 }
449
450 did = pasid_get_domain_id(pte);
451 pgtt = pasid_pte_get_pgtt(pte);
452 if (pgtt != PASID_ENTRY_PGTT_SL_ONLY &&
453 pgtt != PASID_ENTRY_PGTT_NESTED) {
454 spin_unlock(&iommu->lock);
455 dev_err_ratelimited(
456 dev,
457 "Dirty tracking not supported on translation type %d\n",
458 pgtt);
459 return -EOPNOTSUPP;
460 }
461
462 if (pasid_get_ssade(pte) == enabled) {
463 spin_unlock(&iommu->lock);
464 return 0;
465 }
466
467 if (enabled)
468 pasid_set_ssade(pte);
469 else
470 pasid_clear_ssade(pte);
471 spin_unlock(&iommu->lock);
472
473 if (!ecap_coherent(iommu->ecap))
474 clflush_cache_range(pte, sizeof(*pte));
475
476 /*
477 * From VT-d spec table 25 "Guidance to Software for Invalidations":
478 *
479 * - PASID-selective-within-Domain PASID-cache invalidation
480 * If (PGTT=SS or Nested)
481 * - Domain-selective IOTLB invalidation
482 * Else
483 * - PASID-selective PASID-based IOTLB invalidation
484 * - If (pasid is RID_PASID)
485 * - Global Device-TLB invalidation to affected functions
486 * Else
487 * - PASID-based Device-TLB invalidation (with S=1 and
488 * Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions
489 */
490 pasid_cache_invalidation_with_pasid(iommu, did, pasid);
491
492 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
493
494 devtlb_invalidation_with_pasid(iommu, dev, pasid);
495
496 return 0;
497 }
498
499 /*
500 * Set up the scalable mode pasid entry for passthrough translation type.
501 */
intel_pasid_setup_pass_through(struct intel_iommu * iommu,struct device * dev,u32 pasid)502 int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
503 struct device *dev, u32 pasid)
504 {
505 u16 did = FLPT_DEFAULT_DID;
506 struct pasid_entry *pte;
507
508 spin_lock(&iommu->lock);
509 pte = intel_pasid_get_entry(dev, pasid);
510 if (!pte) {
511 spin_unlock(&iommu->lock);
512 return -ENODEV;
513 }
514
515 if (pasid_pte_is_present(pte)) {
516 spin_unlock(&iommu->lock);
517 return -EBUSY;
518 }
519
520 pasid_clear_entry(pte);
521 pasid_set_domain_id(pte, did);
522 pasid_set_address_width(pte, iommu->agaw);
523 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_PT);
524 pasid_set_fault_enable(pte);
525 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
526 pasid_set_present(pte);
527 spin_unlock(&iommu->lock);
528
529 pasid_flush_caches(iommu, pte, pasid, did);
530
531 return 0;
532 }
533
534 /*
535 * Set the page snoop control for a pasid entry which has been set up.
536 */
intel_pasid_setup_page_snoop_control(struct intel_iommu * iommu,struct device * dev,u32 pasid)537 void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu,
538 struct device *dev, u32 pasid)
539 {
540 struct pasid_entry *pte;
541 u16 did;
542
543 spin_lock(&iommu->lock);
544 pte = intel_pasid_get_entry(dev, pasid);
545 if (WARN_ON(!pte || !pasid_pte_is_present(pte))) {
546 spin_unlock(&iommu->lock);
547 return;
548 }
549
550 pasid_set_pgsnp(pte);
551 did = pasid_get_domain_id(pte);
552 spin_unlock(&iommu->lock);
553
554 if (!ecap_coherent(iommu->ecap))
555 clflush_cache_range(pte, sizeof(*pte));
556
557 /*
558 * VT-d spec 3.4 table23 states guides for cache invalidation:
559 *
560 * - PASID-selective-within-Domain PASID-cache invalidation
561 * - PASID-selective PASID-based IOTLB invalidation
562 * - If (pasid is RID_PASID)
563 * - Global Device-TLB invalidation to affected functions
564 * Else
565 * - PASID-based Device-TLB invalidation (with S=1 and
566 * Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions
567 */
568 pasid_cache_invalidation_with_pasid(iommu, did, pasid);
569 qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
570
571 devtlb_invalidation_with_pasid(iommu, dev, pasid);
572 }
573
574 /**
575 * intel_pasid_setup_nested() - Set up PASID entry for nested translation.
576 * @iommu: IOMMU which the device belong to
577 * @dev: Device to be set up for translation
578 * @pasid: PASID to be programmed in the device PASID table
579 * @domain: User stage-1 domain nested on a stage-2 domain
580 *
581 * This is used for nested translation. The input domain should be
582 * nested type and nested on a parent with 'is_nested_parent' flag
583 * set.
584 */
intel_pasid_setup_nested(struct intel_iommu * iommu,struct device * dev,u32 pasid,struct dmar_domain * domain)585 int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
586 u32 pasid, struct dmar_domain *domain)
587 {
588 struct iommu_hwpt_vtd_s1 *s1_cfg = &domain->s1_cfg;
589 pgd_t *s1_gpgd = (pgd_t *)(uintptr_t)domain->s1_pgtbl;
590 struct dmar_domain *s2_domain = domain->s2_domain;
591 u16 did = domain_id_iommu(domain, iommu);
592 struct dma_pte *pgd = s2_domain->pgd;
593 struct pasid_entry *pte;
594
595 /* Address width should match the address width supported by hardware */
596 switch (s1_cfg->addr_width) {
597 case ADDR_WIDTH_4LEVEL:
598 break;
599 case ADDR_WIDTH_5LEVEL:
600 if (!cap_fl5lp_support(iommu->cap)) {
601 dev_err_ratelimited(dev,
602 "5-level paging not supported\n");
603 return -EINVAL;
604 }
605 break;
606 default:
607 dev_err_ratelimited(dev, "Invalid stage-1 address width %d\n",
608 s1_cfg->addr_width);
609 return -EINVAL;
610 }
611
612 if ((s1_cfg->flags & IOMMU_VTD_S1_SRE) && !ecap_srs(iommu->ecap)) {
613 pr_err_ratelimited("No supervisor request support on %s\n",
614 iommu->name);
615 return -EINVAL;
616 }
617
618 if ((s1_cfg->flags & IOMMU_VTD_S1_EAFE) && !ecap_eafs(iommu->ecap)) {
619 pr_err_ratelimited("No extended access flag support on %s\n",
620 iommu->name);
621 return -EINVAL;
622 }
623
624 spin_lock(&iommu->lock);
625 pte = intel_pasid_get_entry(dev, pasid);
626 if (!pte) {
627 spin_unlock(&iommu->lock);
628 return -ENODEV;
629 }
630 if (pasid_pte_is_present(pte)) {
631 spin_unlock(&iommu->lock);
632 return -EBUSY;
633 }
634
635 pasid_clear_entry(pte);
636
637 if (s1_cfg->addr_width == ADDR_WIDTH_5LEVEL)
638 pasid_set_flpm(pte, 1);
639
640 pasid_set_flptr(pte, (uintptr_t)s1_gpgd);
641
642 if (s1_cfg->flags & IOMMU_VTD_S1_SRE) {
643 pasid_set_sre(pte);
644 if (s1_cfg->flags & IOMMU_VTD_S1_WPE)
645 pasid_set_wpe(pte);
646 }
647
648 if (s1_cfg->flags & IOMMU_VTD_S1_EAFE)
649 pasid_set_eafe(pte);
650
651 if (s2_domain->force_snooping)
652 pasid_set_pgsnp(pte);
653
654 pasid_set_slptr(pte, virt_to_phys(pgd));
655 pasid_set_fault_enable(pte);
656 pasid_set_domain_id(pte, did);
657 pasid_set_address_width(pte, s2_domain->agaw);
658 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
659 if (s2_domain->dirty_tracking)
660 pasid_set_ssade(pte);
661 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED);
662 pasid_set_present(pte);
663 spin_unlock(&iommu->lock);
664
665 pasid_flush_caches(iommu, pte, pasid, did);
666
667 return 0;
668 }
669
670 /*
671 * Interfaces to setup or teardown a pasid table to the scalable-mode
672 * context table entry:
673 */
674
device_pasid_table_teardown(struct device * dev,u8 bus,u8 devfn)675 static void device_pasid_table_teardown(struct device *dev, u8 bus, u8 devfn)
676 {
677 struct device_domain_info *info = dev_iommu_priv_get(dev);
678 struct intel_iommu *iommu = info->iommu;
679 struct context_entry *context;
680 u16 did;
681
682 spin_lock(&iommu->lock);
683 context = iommu_context_addr(iommu, bus, devfn, false);
684 if (!context) {
685 spin_unlock(&iommu->lock);
686 return;
687 }
688
689 did = context_domain_id(context);
690 context_clear_entry(context);
691 __iommu_flush_cache(iommu, context, sizeof(*context));
692 spin_unlock(&iommu->lock);
693 intel_context_flush_present(info, context, did, false);
694 }
695
pci_pasid_table_teardown(struct pci_dev * pdev,u16 alias,void * data)696 static int pci_pasid_table_teardown(struct pci_dev *pdev, u16 alias, void *data)
697 {
698 struct device *dev = data;
699
700 if (dev == &pdev->dev)
701 device_pasid_table_teardown(dev, PCI_BUS_NUM(alias), alias & 0xff);
702
703 return 0;
704 }
705
intel_pasid_teardown_sm_context(struct device * dev)706 void intel_pasid_teardown_sm_context(struct device *dev)
707 {
708 struct device_domain_info *info = dev_iommu_priv_get(dev);
709
710 if (!dev_is_pci(dev)) {
711 device_pasid_table_teardown(dev, info->bus, info->devfn);
712 return;
713 }
714
715 pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_teardown, dev);
716 }
717
718 /*
719 * Get the PASID directory size for scalable mode context entry.
720 * Value of X in the PDTS field of a scalable mode context entry
721 * indicates PASID directory with 2^(X + 7) entries.
722 */
context_get_sm_pds(struct pasid_table * table)723 static unsigned long context_get_sm_pds(struct pasid_table *table)
724 {
725 unsigned long pds, max_pde;
726
727 max_pde = table->max_pasid >> PASID_PDE_SHIFT;
728 pds = find_first_bit(&max_pde, MAX_NR_PASID_BITS);
729 if (pds < 7)
730 return 0;
731
732 return pds - 7;
733 }
734
context_entry_set_pasid_table(struct context_entry * context,struct device * dev)735 static int context_entry_set_pasid_table(struct context_entry *context,
736 struct device *dev)
737 {
738 struct device_domain_info *info = dev_iommu_priv_get(dev);
739 struct pasid_table *table = info->pasid_table;
740 struct intel_iommu *iommu = info->iommu;
741 unsigned long pds;
742
743 context_clear_entry(context);
744
745 pds = context_get_sm_pds(table);
746 context->lo = (u64)virt_to_phys(table->table) | context_pdts(pds);
747 context_set_sm_rid2pasid(context, IOMMU_NO_PASID);
748
749 if (info->ats_supported)
750 context_set_sm_dte(context);
751 if (info->pasid_supported)
752 context_set_pasid(context);
753
754 context_set_fault_enable(context);
755 context_set_present(context);
756 __iommu_flush_cache(iommu, context, sizeof(*context));
757
758 return 0;
759 }
760
device_pasid_table_setup(struct device * dev,u8 bus,u8 devfn)761 static int device_pasid_table_setup(struct device *dev, u8 bus, u8 devfn)
762 {
763 struct device_domain_info *info = dev_iommu_priv_get(dev);
764 struct intel_iommu *iommu = info->iommu;
765 struct context_entry *context;
766
767 spin_lock(&iommu->lock);
768 context = iommu_context_addr(iommu, bus, devfn, true);
769 if (!context) {
770 spin_unlock(&iommu->lock);
771 return -ENOMEM;
772 }
773
774 if (context_present(context) && !context_copied(iommu, bus, devfn)) {
775 spin_unlock(&iommu->lock);
776 return 0;
777 }
778
779 if (context_copied(iommu, bus, devfn)) {
780 context_clear_entry(context);
781 __iommu_flush_cache(iommu, context, sizeof(*context));
782
783 /*
784 * For kdump cases, old valid entries may be cached due to
785 * the in-flight DMA and copied pgtable, but there is no
786 * unmapping behaviour for them, thus we need explicit cache
787 * flushes for all affected domain IDs and PASIDs used in
788 * the copied PASID table. Given that we have no idea about
789 * which domain IDs and PASIDs were used in the copied tables,
790 * upgrade them to global PASID and IOTLB cache invalidation.
791 */
792 iommu->flush.flush_context(iommu, 0,
793 PCI_DEVID(bus, devfn),
794 DMA_CCMD_MASK_NOBIT,
795 DMA_CCMD_DEVICE_INVL);
796 qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
797 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
798 devtlb_invalidation_with_pasid(iommu, dev, IOMMU_NO_PASID);
799
800 /*
801 * At this point, the device is supposed to finish reset at
802 * its driver probe stage, so no in-flight DMA will exist,
803 * and we don't need to worry anymore hereafter.
804 */
805 clear_context_copied(iommu, bus, devfn);
806 }
807
808 context_entry_set_pasid_table(context, dev);
809 spin_unlock(&iommu->lock);
810
811 /*
812 * It's a non-present to present mapping. If hardware doesn't cache
813 * non-present entry we don't need to flush the caches. If it does
814 * cache non-present entries, then it does so in the special
815 * domain #0, which we have to flush:
816 */
817 if (cap_caching_mode(iommu->cap)) {
818 iommu->flush.flush_context(iommu, 0,
819 PCI_DEVID(bus, devfn),
820 DMA_CCMD_MASK_NOBIT,
821 DMA_CCMD_DEVICE_INVL);
822 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH);
823 }
824
825 return 0;
826 }
827
pci_pasid_table_setup(struct pci_dev * pdev,u16 alias,void * data)828 static int pci_pasid_table_setup(struct pci_dev *pdev, u16 alias, void *data)
829 {
830 struct device *dev = data;
831
832 if (dev != &pdev->dev)
833 return 0;
834
835 return device_pasid_table_setup(dev, PCI_BUS_NUM(alias), alias & 0xff);
836 }
837
838 /*
839 * Set the device's PASID table to its context table entry.
840 *
841 * The PASID table is set to the context entries of both device itself
842 * and its alias requester ID for DMA.
843 */
intel_pasid_setup_sm_context(struct device * dev)844 int intel_pasid_setup_sm_context(struct device *dev)
845 {
846 struct device_domain_info *info = dev_iommu_priv_get(dev);
847
848 if (!dev_is_pci(dev))
849 return device_pasid_table_setup(dev, info->bus, info->devfn);
850
851 return pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_setup, dev);
852 }
853
854 /*
855 * Global Device-TLB invalidation following changes in a context entry which
856 * was present.
857 */
__context_flush_dev_iotlb(struct device_domain_info * info)858 static void __context_flush_dev_iotlb(struct device_domain_info *info)
859 {
860 if (!info->ats_enabled)
861 return;
862
863 qi_flush_dev_iotlb(info->iommu, PCI_DEVID(info->bus, info->devfn),
864 info->pfsid, info->ats_qdep, 0, MAX_AGAW_PFN_WIDTH);
865
866 /*
867 * There is no guarantee that the device DMA is stopped when it reaches
868 * here. Therefore, always attempt the extra device TLB invalidation
869 * quirk. The impact on performance is acceptable since this is not a
870 * performance-critical path.
871 */
872 quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH, IOMMU_NO_PASID,
873 info->ats_qdep);
874 }
875
876 /*
877 * Cache invalidations after change in a context table entry that was present
878 * according to the Spec 6.5.3.3 (Guidance to Software for Invalidations). If
879 * IOMMU is in scalable mode and all PASID table entries of the device were
880 * non-present, set flush_domains to false. Otherwise, true.
881 */
intel_context_flush_present(struct device_domain_info * info,struct context_entry * context,u16 did,bool flush_domains)882 void intel_context_flush_present(struct device_domain_info *info,
883 struct context_entry *context,
884 u16 did, bool flush_domains)
885 {
886 struct intel_iommu *iommu = info->iommu;
887 struct pasid_entry *pte;
888 int i;
889
890 /*
891 * Device-selective context-cache invalidation. The Domain-ID field
892 * of the Context-cache Invalidate Descriptor is ignored by hardware
893 * when operating in scalable mode. Therefore the @did value doesn't
894 * matter in scalable mode.
895 */
896 iommu->flush.flush_context(iommu, did, PCI_DEVID(info->bus, info->devfn),
897 DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL);
898
899 /*
900 * For legacy mode:
901 * - Domain-selective IOTLB invalidation
902 * - Global Device-TLB invalidation to all affected functions
903 */
904 if (!sm_supported(iommu)) {
905 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
906 __context_flush_dev_iotlb(info);
907
908 return;
909 }
910
911 /*
912 * For scalable mode:
913 * - Domain-selective PASID-cache invalidation to affected domains
914 * - Domain-selective IOTLB invalidation to affected domains
915 * - Global Device-TLB invalidation to affected functions
916 */
917 if (flush_domains) {
918 /*
919 * If the IOMMU is running in scalable mode and there might
920 * be potential PASID translations, the caller should hold
921 * the lock to ensure that context changes and cache flushes
922 * are atomic.
923 */
924 assert_spin_locked(&iommu->lock);
925 for (i = 0; i < info->pasid_table->max_pasid; i++) {
926 pte = intel_pasid_get_entry(info->dev, i);
927 if (!pte || !pasid_pte_is_present(pte))
928 continue;
929
930 did = pasid_get_domain_id(pte);
931 qi_flush_pasid_cache(iommu, did, QI_PC_ALL_PASIDS, 0);
932 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
933 }
934 }
935
936 __context_flush_dev_iotlb(info);
937 }
938