1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright © 2006-2014 Intel Corporation.
4 *
5 * Authors: David Woodhouse <dwmw2@infradead.org>,
6 * Ashok Raj <ashok.raj@intel.com>,
7 * Shaohua Li <shaohua.li@intel.com>,
8 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
9 * Fenghua Yu <fenghua.yu@intel.com>
10 * Joerg Roedel <jroedel@suse.de>
11 */
12
13 #define pr_fmt(fmt) "DMAR: " fmt
14 #define dev_fmt(fmt) pr_fmt(fmt)
15
16 #include <linux/crash_dump.h>
17 #include <linux/dma-direct.h>
18 #include <linux/dmi.h>
19 #include <linux/memory.h>
20 #include <linux/pci.h>
21 #include <linux/pci-ats.h>
22 #include <linux/spinlock.h>
23 #include <linux/syscore_ops.h>
24 #include <linux/tboot.h>
25 #include <uapi/linux/iommufd.h>
26
27 #include "iommu.h"
28 #include "../dma-iommu.h"
29 #include "../irq_remapping.h"
30 #include "../iommu-pages.h"
31 #include "pasid.h"
32 #include "perfmon.h"
33
34 #define ROOT_SIZE VTD_PAGE_SIZE
35 #define CONTEXT_SIZE VTD_PAGE_SIZE
36
37 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
38 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
39 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
40 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
41
42 #define IOAPIC_RANGE_START (0xfee00000)
43 #define IOAPIC_RANGE_END (0xfeefffff)
44 #define IOVA_START_ADDR (0x1000)
45
46 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
47
48 #define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << ((gaw) - VTD_PAGE_SHIFT)) - 1)
49 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << (gaw)) - 1)
50
51 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
52 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
53 #define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
54 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
55 #define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
56
57 static void __init check_tylersburg_isoch(void);
58 static int rwbf_quirk;
59
60 #define rwbf_required(iommu) (rwbf_quirk || cap_rwbf((iommu)->cap))
61
62 /*
63 * set to 1 to panic kernel if can't successfully enable VT-d
64 * (used when kernel is launched w/ TXT)
65 */
66 static int force_on = 0;
67 static int intel_iommu_tboot_noforce;
68 static int no_platform_optin;
69
70 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
71
72 /*
73 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
74 * if marked present.
75 */
root_entry_lctp(struct root_entry * re)76 static phys_addr_t root_entry_lctp(struct root_entry *re)
77 {
78 if (!(re->lo & 1))
79 return 0;
80
81 return re->lo & VTD_PAGE_MASK;
82 }
83
84 /*
85 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
86 * if marked present.
87 */
root_entry_uctp(struct root_entry * re)88 static phys_addr_t root_entry_uctp(struct root_entry *re)
89 {
90 if (!(re->hi & 1))
91 return 0;
92
93 return re->hi & VTD_PAGE_MASK;
94 }
95
device_rid_cmp_key(const void * key,const struct rb_node * node)96 static int device_rid_cmp_key(const void *key, const struct rb_node *node)
97 {
98 struct device_domain_info *info =
99 rb_entry(node, struct device_domain_info, node);
100 const u16 *rid_lhs = key;
101
102 if (*rid_lhs < PCI_DEVID(info->bus, info->devfn))
103 return -1;
104
105 if (*rid_lhs > PCI_DEVID(info->bus, info->devfn))
106 return 1;
107
108 return 0;
109 }
110
device_rid_cmp(struct rb_node * lhs,const struct rb_node * rhs)111 static int device_rid_cmp(struct rb_node *lhs, const struct rb_node *rhs)
112 {
113 struct device_domain_info *info =
114 rb_entry(lhs, struct device_domain_info, node);
115 u16 key = PCI_DEVID(info->bus, info->devfn);
116
117 return device_rid_cmp_key(&key, rhs);
118 }
119
120 /*
121 * Looks up an IOMMU-probed device using its source ID.
122 *
123 * Returns the pointer to the device if there is a match. Otherwise,
124 * returns NULL.
125 *
126 * Note that this helper doesn't guarantee that the device won't be
127 * released by the iommu subsystem after being returned. The caller
128 * should use its own synchronization mechanism to avoid the device
129 * being released during its use if its possibly the case.
130 */
device_rbtree_find(struct intel_iommu * iommu,u16 rid)131 struct device *device_rbtree_find(struct intel_iommu *iommu, u16 rid)
132 {
133 struct device_domain_info *info = NULL;
134 struct rb_node *node;
135 unsigned long flags;
136
137 spin_lock_irqsave(&iommu->device_rbtree_lock, flags);
138 node = rb_find(&rid, &iommu->device_rbtree, device_rid_cmp_key);
139 if (node)
140 info = rb_entry(node, struct device_domain_info, node);
141 spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
142
143 return info ? info->dev : NULL;
144 }
145
device_rbtree_insert(struct intel_iommu * iommu,struct device_domain_info * info)146 static int device_rbtree_insert(struct intel_iommu *iommu,
147 struct device_domain_info *info)
148 {
149 struct rb_node *curr;
150 unsigned long flags;
151
152 spin_lock_irqsave(&iommu->device_rbtree_lock, flags);
153 curr = rb_find_add(&info->node, &iommu->device_rbtree, device_rid_cmp);
154 spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
155 if (WARN_ON(curr))
156 return -EEXIST;
157
158 return 0;
159 }
160
device_rbtree_remove(struct device_domain_info * info)161 static void device_rbtree_remove(struct device_domain_info *info)
162 {
163 struct intel_iommu *iommu = info->iommu;
164 unsigned long flags;
165
166 spin_lock_irqsave(&iommu->device_rbtree_lock, flags);
167 rb_erase(&info->node, &iommu->device_rbtree);
168 spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
169 }
170
171 struct dmar_rmrr_unit {
172 struct list_head list; /* list of rmrr units */
173 struct acpi_dmar_header *hdr; /* ACPI header */
174 u64 base_address; /* reserved base address*/
175 u64 end_address; /* reserved end address */
176 struct dmar_dev_scope *devices; /* target devices */
177 int devices_cnt; /* target device count */
178 };
179
180 struct dmar_atsr_unit {
181 struct list_head list; /* list of ATSR units */
182 struct acpi_dmar_header *hdr; /* ACPI header */
183 struct dmar_dev_scope *devices; /* target devices */
184 int devices_cnt; /* target device count */
185 u8 include_all:1; /* include all ports */
186 };
187
188 struct dmar_satc_unit {
189 struct list_head list; /* list of SATC units */
190 struct acpi_dmar_header *hdr; /* ACPI header */
191 struct dmar_dev_scope *devices; /* target devices */
192 struct intel_iommu *iommu; /* the corresponding iommu */
193 int devices_cnt; /* target device count */
194 u8 atc_required:1; /* ATS is required */
195 };
196
197 static LIST_HEAD(dmar_atsr_units);
198 static LIST_HEAD(dmar_rmrr_units);
199 static LIST_HEAD(dmar_satc_units);
200
201 #define for_each_rmrr_units(rmrr) \
202 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
203
204 static void intel_iommu_domain_free(struct iommu_domain *domain);
205
206 int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON);
207 int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON);
208
209 int intel_iommu_enabled = 0;
210 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
211
212 static int intel_iommu_superpage = 1;
213 static int iommu_identity_mapping;
214 static int iommu_skip_te_disable;
215 static int disable_igfx_iommu;
216
217 #define IDENTMAP_AZALIA 4
218
219 const struct iommu_ops intel_iommu_ops;
220 static const struct iommu_dirty_ops intel_dirty_ops;
221
translation_pre_enabled(struct intel_iommu * iommu)222 static bool translation_pre_enabled(struct intel_iommu *iommu)
223 {
224 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
225 }
226
clear_translation_pre_enabled(struct intel_iommu * iommu)227 static void clear_translation_pre_enabled(struct intel_iommu *iommu)
228 {
229 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
230 }
231
init_translation_status(struct intel_iommu * iommu)232 static void init_translation_status(struct intel_iommu *iommu)
233 {
234 u32 gsts;
235
236 gsts = readl(iommu->reg + DMAR_GSTS_REG);
237 if (gsts & DMA_GSTS_TES)
238 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
239 }
240
intel_iommu_setup(char * str)241 static int __init intel_iommu_setup(char *str)
242 {
243 if (!str)
244 return -EINVAL;
245
246 while (*str) {
247 if (!strncmp(str, "on", 2)) {
248 dmar_disabled = 0;
249 pr_info("IOMMU enabled\n");
250 } else if (!strncmp(str, "off", 3)) {
251 dmar_disabled = 1;
252 no_platform_optin = 1;
253 pr_info("IOMMU disabled\n");
254 } else if (!strncmp(str, "igfx_off", 8)) {
255 disable_igfx_iommu = 1;
256 pr_info("Disable GFX device mapping\n");
257 } else if (!strncmp(str, "forcedac", 8)) {
258 pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n");
259 iommu_dma_forcedac = true;
260 } else if (!strncmp(str, "strict", 6)) {
261 pr_warn("intel_iommu=strict deprecated; use iommu.strict=1 instead\n");
262 iommu_set_dma_strict();
263 } else if (!strncmp(str, "sp_off", 6)) {
264 pr_info("Disable supported super page\n");
265 intel_iommu_superpage = 0;
266 } else if (!strncmp(str, "sm_on", 5)) {
267 pr_info("Enable scalable mode if hardware supports\n");
268 intel_iommu_sm = 1;
269 } else if (!strncmp(str, "sm_off", 6)) {
270 pr_info("Scalable mode is disallowed\n");
271 intel_iommu_sm = 0;
272 } else if (!strncmp(str, "tboot_noforce", 13)) {
273 pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
274 intel_iommu_tboot_noforce = 1;
275 } else {
276 pr_notice("Unknown option - '%s'\n", str);
277 }
278
279 str += strcspn(str, ",");
280 while (*str == ',')
281 str++;
282 }
283
284 return 1;
285 }
286 __setup("intel_iommu=", intel_iommu_setup);
287
domain_pfn_supported(struct dmar_domain * domain,unsigned long pfn)288 static int domain_pfn_supported(struct dmar_domain *domain, unsigned long pfn)
289 {
290 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
291
292 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
293 }
294
295 /*
296 * Calculate the Supported Adjusted Guest Address Widths of an IOMMU.
297 * Refer to 11.4.2 of the VT-d spec for the encoding of each bit of
298 * the returned SAGAW.
299 */
__iommu_calculate_sagaw(struct intel_iommu * iommu)300 static unsigned long __iommu_calculate_sagaw(struct intel_iommu *iommu)
301 {
302 unsigned long fl_sagaw, sl_sagaw;
303
304 fl_sagaw = BIT(2) | (cap_fl5lp_support(iommu->cap) ? BIT(3) : 0);
305 sl_sagaw = cap_sagaw(iommu->cap);
306
307 /* Second level only. */
308 if (!sm_supported(iommu) || !ecap_flts(iommu->ecap))
309 return sl_sagaw;
310
311 /* First level only. */
312 if (!ecap_slts(iommu->ecap))
313 return fl_sagaw;
314
315 return fl_sagaw & sl_sagaw;
316 }
317
__iommu_calculate_agaw(struct intel_iommu * iommu,int max_gaw)318 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
319 {
320 unsigned long sagaw;
321 int agaw;
322
323 sagaw = __iommu_calculate_sagaw(iommu);
324 for (agaw = width_to_agaw(max_gaw); agaw >= 0; agaw--) {
325 if (test_bit(agaw, &sagaw))
326 break;
327 }
328
329 return agaw;
330 }
331
332 /*
333 * Calculate max SAGAW for each iommu.
334 */
iommu_calculate_max_sagaw(struct intel_iommu * iommu)335 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
336 {
337 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
338 }
339
340 /*
341 * calculate agaw for each iommu.
342 * "SAGAW" may be different across iommus, use a default agaw, and
343 * get a supported less agaw for iommus that don't support the default agaw.
344 */
iommu_calculate_agaw(struct intel_iommu * iommu)345 int iommu_calculate_agaw(struct intel_iommu *iommu)
346 {
347 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
348 }
349
iommu_paging_structure_coherency(struct intel_iommu * iommu)350 static bool iommu_paging_structure_coherency(struct intel_iommu *iommu)
351 {
352 return sm_supported(iommu) ?
353 ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap);
354 }
355
356 /* Return the super pagesize bitmap if supported. */
domain_super_pgsize_bitmap(struct dmar_domain * domain)357 static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain)
358 {
359 unsigned long bitmap = 0;
360
361 /*
362 * 1-level super page supports page size of 2MiB, 2-level super page
363 * supports page size of both 2MiB and 1GiB.
364 */
365 if (domain->iommu_superpage == 1)
366 bitmap |= SZ_2M;
367 else if (domain->iommu_superpage == 2)
368 bitmap |= SZ_2M | SZ_1G;
369
370 return bitmap;
371 }
372
iommu_context_addr(struct intel_iommu * iommu,u8 bus,u8 devfn,int alloc)373 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
374 u8 devfn, int alloc)
375 {
376 struct root_entry *root = &iommu->root_entry[bus];
377 struct context_entry *context;
378 u64 *entry;
379
380 /*
381 * Except that the caller requested to allocate a new entry,
382 * returning a copied context entry makes no sense.
383 */
384 if (!alloc && context_copied(iommu, bus, devfn))
385 return NULL;
386
387 entry = &root->lo;
388 if (sm_supported(iommu)) {
389 if (devfn >= 0x80) {
390 devfn -= 0x80;
391 entry = &root->hi;
392 }
393 devfn *= 2;
394 }
395 if (*entry & 1)
396 context = phys_to_virt(*entry & VTD_PAGE_MASK);
397 else {
398 unsigned long phy_addr;
399 if (!alloc)
400 return NULL;
401
402 context = iommu_alloc_pages_node_sz(iommu->node, GFP_ATOMIC,
403 SZ_4K);
404 if (!context)
405 return NULL;
406
407 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
408 phy_addr = virt_to_phys((void *)context);
409 *entry = phy_addr | 1;
410 __iommu_flush_cache(iommu, entry, sizeof(*entry));
411 }
412 return &context[devfn];
413 }
414
415 /**
416 * is_downstream_to_pci_bridge - test if a device belongs to the PCI
417 * sub-hierarchy of a candidate PCI-PCI bridge
418 * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
419 * @bridge: the candidate PCI-PCI bridge
420 *
421 * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
422 */
423 static bool
is_downstream_to_pci_bridge(struct device * dev,struct device * bridge)424 is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
425 {
426 struct pci_dev *pdev, *pbridge;
427
428 if (!dev_is_pci(dev) || !dev_is_pci(bridge))
429 return false;
430
431 pdev = to_pci_dev(dev);
432 pbridge = to_pci_dev(bridge);
433
434 if (pbridge->subordinate &&
435 pbridge->subordinate->number <= pdev->bus->number &&
436 pbridge->subordinate->busn_res.end >= pdev->bus->number)
437 return true;
438
439 return false;
440 }
441
quirk_ioat_snb_local_iommu(struct pci_dev * pdev)442 static bool quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
443 {
444 struct dmar_drhd_unit *drhd;
445 u32 vtbar;
446 int rc;
447
448 /* We know that this device on this chipset has its own IOMMU.
449 * If we find it under a different IOMMU, then the BIOS is lying
450 * to us. Hope that the IOMMU for this device is actually
451 * disabled, and it needs no translation...
452 */
453 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
454 if (rc) {
455 /* "can't" happen */
456 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
457 return false;
458 }
459 vtbar &= 0xffff0000;
460
461 /* we know that the this iommu should be at offset 0xa000 from vtbar */
462 drhd = dmar_find_matched_drhd_unit(pdev);
463 if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
464 pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
465 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
466 return true;
467 }
468
469 return false;
470 }
471
iommu_is_dummy(struct intel_iommu * iommu,struct device * dev)472 static bool iommu_is_dummy(struct intel_iommu *iommu, struct device *dev)
473 {
474 if (!iommu || iommu->drhd->ignored)
475 return true;
476
477 if (dev_is_pci(dev)) {
478 struct pci_dev *pdev = to_pci_dev(dev);
479
480 if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
481 pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SNB &&
482 quirk_ioat_snb_local_iommu(pdev))
483 return true;
484 }
485
486 return false;
487 }
488
device_lookup_iommu(struct device * dev,u8 * bus,u8 * devfn)489 static struct intel_iommu *device_lookup_iommu(struct device *dev, u8 *bus, u8 *devfn)
490 {
491 struct dmar_drhd_unit *drhd = NULL;
492 struct pci_dev *pdev = NULL;
493 struct intel_iommu *iommu;
494 struct device *tmp;
495 u16 segment = 0;
496 int i;
497
498 if (!dev)
499 return NULL;
500
501 if (dev_is_pci(dev)) {
502 struct pci_dev *pf_pdev;
503
504 pdev = pci_real_dma_dev(to_pci_dev(dev));
505
506 /* VFs aren't listed in scope tables; we need to look up
507 * the PF instead to find the IOMMU. */
508 pf_pdev = pci_physfn(pdev);
509 dev = &pf_pdev->dev;
510 segment = pci_domain_nr(pdev->bus);
511 } else if (has_acpi_companion(dev))
512 dev = &ACPI_COMPANION(dev)->dev;
513
514 rcu_read_lock();
515 for_each_iommu(iommu, drhd) {
516 if (pdev && segment != drhd->segment)
517 continue;
518
519 for_each_active_dev_scope(drhd->devices,
520 drhd->devices_cnt, i, tmp) {
521 if (tmp == dev) {
522 /* For a VF use its original BDF# not that of the PF
523 * which we used for the IOMMU lookup. Strictly speaking
524 * we could do this for all PCI devices; we only need to
525 * get the BDF# from the scope table for ACPI matches. */
526 if (pdev && pdev->is_virtfn)
527 goto got_pdev;
528
529 if (bus && devfn) {
530 *bus = drhd->devices[i].bus;
531 *devfn = drhd->devices[i].devfn;
532 }
533 goto out;
534 }
535
536 if (is_downstream_to_pci_bridge(dev, tmp))
537 goto got_pdev;
538 }
539
540 if (pdev && drhd->include_all) {
541 got_pdev:
542 if (bus && devfn) {
543 *bus = pdev->bus->number;
544 *devfn = pdev->devfn;
545 }
546 goto out;
547 }
548 }
549 iommu = NULL;
550 out:
551 if (iommu_is_dummy(iommu, dev))
552 iommu = NULL;
553
554 rcu_read_unlock();
555
556 return iommu;
557 }
558
domain_flush_cache(struct dmar_domain * domain,void * addr,int size)559 static void domain_flush_cache(struct dmar_domain *domain,
560 void *addr, int size)
561 {
562 if (!domain->iommu_coherency)
563 clflush_cache_range(addr, size);
564 }
565
free_context_table(struct intel_iommu * iommu)566 static void free_context_table(struct intel_iommu *iommu)
567 {
568 struct context_entry *context;
569 int i;
570
571 if (!iommu->root_entry)
572 return;
573
574 for (i = 0; i < ROOT_ENTRY_NR; i++) {
575 context = iommu_context_addr(iommu, i, 0, 0);
576 if (context)
577 iommu_free_pages(context);
578
579 if (!sm_supported(iommu))
580 continue;
581
582 context = iommu_context_addr(iommu, i, 0x80, 0);
583 if (context)
584 iommu_free_pages(context);
585 }
586
587 iommu_free_pages(iommu->root_entry);
588 iommu->root_entry = NULL;
589 }
590
591 #ifdef CONFIG_DMAR_DEBUG
pgtable_walk(struct intel_iommu * iommu,unsigned long pfn,u8 bus,u8 devfn,struct dma_pte * parent,int level)592 static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn,
593 u8 bus, u8 devfn, struct dma_pte *parent, int level)
594 {
595 struct dma_pte *pte;
596 int offset;
597
598 while (1) {
599 offset = pfn_level_offset(pfn, level);
600 pte = &parent[offset];
601
602 pr_info("pte level: %d, pte value: 0x%016llx\n", level, pte->val);
603
604 if (!dma_pte_present(pte)) {
605 pr_info("page table not present at level %d\n", level - 1);
606 break;
607 }
608
609 if (level == 1 || dma_pte_superpage(pte))
610 break;
611
612 parent = phys_to_virt(dma_pte_addr(pte));
613 level--;
614 }
615 }
616
dmar_fault_dump_ptes(struct intel_iommu * iommu,u16 source_id,unsigned long long addr,u32 pasid)617 void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
618 unsigned long long addr, u32 pasid)
619 {
620 struct pasid_dir_entry *dir, *pde;
621 struct pasid_entry *entries, *pte;
622 struct context_entry *ctx_entry;
623 struct root_entry *rt_entry;
624 int i, dir_index, index, level;
625 u8 devfn = source_id & 0xff;
626 u8 bus = source_id >> 8;
627 struct dma_pte *pgtable;
628
629 pr_info("Dump %s table entries for IOVA 0x%llx\n", iommu->name, addr);
630
631 /* root entry dump */
632 if (!iommu->root_entry) {
633 pr_info("root table is not present\n");
634 return;
635 }
636 rt_entry = &iommu->root_entry[bus];
637
638 if (sm_supported(iommu))
639 pr_info("scalable mode root entry: hi 0x%016llx, low 0x%016llx\n",
640 rt_entry->hi, rt_entry->lo);
641 else
642 pr_info("root entry: 0x%016llx", rt_entry->lo);
643
644 /* context entry dump */
645 ctx_entry = iommu_context_addr(iommu, bus, devfn, 0);
646 if (!ctx_entry) {
647 pr_info("context table is not present\n");
648 return;
649 }
650
651 pr_info("context entry: hi 0x%016llx, low 0x%016llx\n",
652 ctx_entry->hi, ctx_entry->lo);
653
654 /* legacy mode does not require PASID entries */
655 if (!sm_supported(iommu)) {
656 if (!context_present(ctx_entry)) {
657 pr_info("legacy mode page table is not present\n");
658 return;
659 }
660 level = agaw_to_level(ctx_entry->hi & 7);
661 pgtable = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
662 goto pgtable_walk;
663 }
664
665 if (!context_present(ctx_entry)) {
666 pr_info("pasid directory table is not present\n");
667 return;
668 }
669
670 /* get the pointer to pasid directory entry */
671 dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
672
673 /* For request-without-pasid, get the pasid from context entry */
674 if (intel_iommu_sm && pasid == IOMMU_PASID_INVALID)
675 pasid = IOMMU_NO_PASID;
676
677 dir_index = pasid >> PASID_PDE_SHIFT;
678 pde = &dir[dir_index];
679 pr_info("pasid dir entry: 0x%016llx\n", pde->val);
680
681 /* get the pointer to the pasid table entry */
682 entries = get_pasid_table_from_pde(pde);
683 if (!entries) {
684 pr_info("pasid table is not present\n");
685 return;
686 }
687 index = pasid & PASID_PTE_MASK;
688 pte = &entries[index];
689 for (i = 0; i < ARRAY_SIZE(pte->val); i++)
690 pr_info("pasid table entry[%d]: 0x%016llx\n", i, pte->val[i]);
691
692 if (!pasid_pte_is_present(pte)) {
693 pr_info("scalable mode page table is not present\n");
694 return;
695 }
696
697 if (pasid_pte_get_pgtt(pte) == PASID_ENTRY_PGTT_FL_ONLY) {
698 level = pte->val[2] & BIT_ULL(2) ? 5 : 4;
699 pgtable = phys_to_virt(pte->val[2] & VTD_PAGE_MASK);
700 } else {
701 level = agaw_to_level((pte->val[0] >> 2) & 0x7);
702 pgtable = phys_to_virt(pte->val[0] & VTD_PAGE_MASK);
703 }
704
705 pgtable_walk:
706 pgtable_walk(iommu, addr >> VTD_PAGE_SHIFT, bus, devfn, pgtable, level);
707 }
708 #endif
709
pfn_to_dma_pte(struct dmar_domain * domain,unsigned long pfn,int * target_level,gfp_t gfp)710 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
711 unsigned long pfn, int *target_level,
712 gfp_t gfp)
713 {
714 struct dma_pte *parent, *pte;
715 int level = agaw_to_level(domain->agaw);
716 int offset;
717
718 if (!domain_pfn_supported(domain, pfn))
719 /* Address beyond IOMMU's addressing capabilities. */
720 return NULL;
721
722 parent = domain->pgd;
723
724 while (1) {
725 void *tmp_page;
726
727 offset = pfn_level_offset(pfn, level);
728 pte = &parent[offset];
729 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
730 break;
731 if (level == *target_level)
732 break;
733
734 if (!dma_pte_present(pte)) {
735 uint64_t pteval, tmp;
736
737 tmp_page = iommu_alloc_pages_node_sz(domain->nid, gfp,
738 SZ_4K);
739
740 if (!tmp_page)
741 return NULL;
742
743 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
744 pteval = virt_to_phys(tmp_page) | DMA_PTE_READ |
745 DMA_PTE_WRITE;
746 if (domain->use_first_level)
747 pteval |= DMA_FL_PTE_US | DMA_FL_PTE_ACCESS;
748
749 tmp = 0ULL;
750 if (!try_cmpxchg64(&pte->val, &tmp, pteval))
751 /* Someone else set it while we were thinking; use theirs. */
752 iommu_free_pages(tmp_page);
753 else
754 domain_flush_cache(domain, pte, sizeof(*pte));
755 }
756 if (level == 1)
757 break;
758
759 parent = phys_to_virt(dma_pte_addr(pte));
760 level--;
761 }
762
763 if (!*target_level)
764 *target_level = level;
765
766 return pte;
767 }
768
769 /* return address's pte at specific level */
dma_pfn_level_pte(struct dmar_domain * domain,unsigned long pfn,int level,int * large_page)770 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
771 unsigned long pfn,
772 int level, int *large_page)
773 {
774 struct dma_pte *parent, *pte;
775 int total = agaw_to_level(domain->agaw);
776 int offset;
777
778 parent = domain->pgd;
779 while (level <= total) {
780 offset = pfn_level_offset(pfn, total);
781 pte = &parent[offset];
782 if (level == total)
783 return pte;
784
785 if (!dma_pte_present(pte)) {
786 *large_page = total;
787 break;
788 }
789
790 if (dma_pte_superpage(pte)) {
791 *large_page = total;
792 return pte;
793 }
794
795 parent = phys_to_virt(dma_pte_addr(pte));
796 total--;
797 }
798 return NULL;
799 }
800
801 /* clear last level pte, a tlb flush should be followed */
dma_pte_clear_range(struct dmar_domain * domain,unsigned long start_pfn,unsigned long last_pfn)802 static void dma_pte_clear_range(struct dmar_domain *domain,
803 unsigned long start_pfn,
804 unsigned long last_pfn)
805 {
806 unsigned int large_page;
807 struct dma_pte *first_pte, *pte;
808
809 if (WARN_ON(!domain_pfn_supported(domain, last_pfn)) ||
810 WARN_ON(start_pfn > last_pfn))
811 return;
812
813 /* we don't need lock here; nobody else touches the iova range */
814 do {
815 large_page = 1;
816 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
817 if (!pte) {
818 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
819 continue;
820 }
821 do {
822 dma_clear_pte(pte);
823 start_pfn += lvl_to_nr_pages(large_page);
824 pte++;
825 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
826
827 domain_flush_cache(domain, first_pte,
828 (void *)pte - (void *)first_pte);
829
830 } while (start_pfn && start_pfn <= last_pfn);
831 }
832
dma_pte_free_level(struct dmar_domain * domain,int level,int retain_level,struct dma_pte * pte,unsigned long pfn,unsigned long start_pfn,unsigned long last_pfn)833 static void dma_pte_free_level(struct dmar_domain *domain, int level,
834 int retain_level, struct dma_pte *pte,
835 unsigned long pfn, unsigned long start_pfn,
836 unsigned long last_pfn)
837 {
838 pfn = max(start_pfn, pfn);
839 pte = &pte[pfn_level_offset(pfn, level)];
840
841 do {
842 unsigned long level_pfn;
843 struct dma_pte *level_pte;
844
845 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
846 goto next;
847
848 level_pfn = pfn & level_mask(level);
849 level_pte = phys_to_virt(dma_pte_addr(pte));
850
851 if (level > 2) {
852 dma_pte_free_level(domain, level - 1, retain_level,
853 level_pte, level_pfn, start_pfn,
854 last_pfn);
855 }
856
857 /*
858 * Free the page table if we're below the level we want to
859 * retain and the range covers the entire table.
860 */
861 if (level < retain_level && !(start_pfn > level_pfn ||
862 last_pfn < level_pfn + level_size(level) - 1)) {
863 dma_clear_pte(pte);
864 domain_flush_cache(domain, pte, sizeof(*pte));
865 iommu_free_pages(level_pte);
866 }
867 next:
868 pfn += level_size(level);
869 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
870 }
871
872 /*
873 * clear last level (leaf) ptes and free page table pages below the
874 * level we wish to keep intact.
875 */
dma_pte_free_pagetable(struct dmar_domain * domain,unsigned long start_pfn,unsigned long last_pfn,int retain_level)876 static void dma_pte_free_pagetable(struct dmar_domain *domain,
877 unsigned long start_pfn,
878 unsigned long last_pfn,
879 int retain_level)
880 {
881 dma_pte_clear_range(domain, start_pfn, last_pfn);
882
883 /* We don't need lock here; nobody else touches the iova range */
884 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
885 domain->pgd, 0, start_pfn, last_pfn);
886
887 /* free pgd */
888 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
889 iommu_free_pages(domain->pgd);
890 domain->pgd = NULL;
891 }
892 }
893
894 /* When a page at a given level is being unlinked from its parent, we don't
895 need to *modify* it at all. All we need to do is make a list of all the
896 pages which can be freed just as soon as we've flushed the IOTLB and we
897 know the hardware page-walk will no longer touch them.
898 The 'pte' argument is the *parent* PTE, pointing to the page that is to
899 be freed. */
dma_pte_list_pagetables(struct dmar_domain * domain,int level,struct dma_pte * parent_pte,struct iommu_pages_list * freelist)900 static void dma_pte_list_pagetables(struct dmar_domain *domain,
901 int level, struct dma_pte *parent_pte,
902 struct iommu_pages_list *freelist)
903 {
904 struct dma_pte *pte = phys_to_virt(dma_pte_addr(parent_pte));
905
906 iommu_pages_list_add(freelist, pte);
907
908 if (level == 1)
909 return;
910
911 do {
912 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
913 dma_pte_list_pagetables(domain, level - 1, pte, freelist);
914 pte++;
915 } while (!first_pte_in_page(pte));
916 }
917
dma_pte_clear_level(struct dmar_domain * domain,int level,struct dma_pte * pte,unsigned long pfn,unsigned long start_pfn,unsigned long last_pfn,struct iommu_pages_list * freelist)918 static void dma_pte_clear_level(struct dmar_domain *domain, int level,
919 struct dma_pte *pte, unsigned long pfn,
920 unsigned long start_pfn, unsigned long last_pfn,
921 struct iommu_pages_list *freelist)
922 {
923 struct dma_pte *first_pte = NULL, *last_pte = NULL;
924
925 pfn = max(start_pfn, pfn);
926 pte = &pte[pfn_level_offset(pfn, level)];
927
928 do {
929 unsigned long level_pfn = pfn & level_mask(level);
930
931 if (!dma_pte_present(pte))
932 goto next;
933
934 /* If range covers entire pagetable, free it */
935 if (start_pfn <= level_pfn &&
936 last_pfn >= level_pfn + level_size(level) - 1) {
937 /* These suborbinate page tables are going away entirely. Don't
938 bother to clear them; we're just going to *free* them. */
939 if (level > 1 && !dma_pte_superpage(pte))
940 dma_pte_list_pagetables(domain, level - 1, pte, freelist);
941
942 dma_clear_pte(pte);
943 if (!first_pte)
944 first_pte = pte;
945 last_pte = pte;
946 } else if (level > 1) {
947 /* Recurse down into a level that isn't *entirely* obsolete */
948 dma_pte_clear_level(domain, level - 1,
949 phys_to_virt(dma_pte_addr(pte)),
950 level_pfn, start_pfn, last_pfn,
951 freelist);
952 }
953 next:
954 pfn = level_pfn + level_size(level);
955 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
956
957 if (first_pte)
958 domain_flush_cache(domain, first_pte,
959 (void *)++last_pte - (void *)first_pte);
960 }
961
962 /* We can't just free the pages because the IOMMU may still be walking
963 the page tables, and may have cached the intermediate levels. The
964 pages can only be freed after the IOTLB flush has been done. */
domain_unmap(struct dmar_domain * domain,unsigned long start_pfn,unsigned long last_pfn,struct iommu_pages_list * freelist)965 static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn,
966 unsigned long last_pfn,
967 struct iommu_pages_list *freelist)
968 {
969 if (WARN_ON(!domain_pfn_supported(domain, last_pfn)) ||
970 WARN_ON(start_pfn > last_pfn))
971 return;
972
973 /* we don't need lock here; nobody else touches the iova range */
974 dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
975 domain->pgd, 0, start_pfn, last_pfn, freelist);
976
977 /* free pgd */
978 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
979 iommu_pages_list_add(freelist, domain->pgd);
980 domain->pgd = NULL;
981 }
982 }
983
984 /* iommu handling */
iommu_alloc_root_entry(struct intel_iommu * iommu)985 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
986 {
987 struct root_entry *root;
988
989 root = iommu_alloc_pages_node_sz(iommu->node, GFP_ATOMIC, SZ_4K);
990 if (!root) {
991 pr_err("Allocating root entry for %s failed\n",
992 iommu->name);
993 return -ENOMEM;
994 }
995
996 __iommu_flush_cache(iommu, root, ROOT_SIZE);
997 iommu->root_entry = root;
998
999 return 0;
1000 }
1001
iommu_set_root_entry(struct intel_iommu * iommu)1002 static void iommu_set_root_entry(struct intel_iommu *iommu)
1003 {
1004 u64 addr;
1005 u32 sts;
1006 unsigned long flag;
1007
1008 addr = virt_to_phys(iommu->root_entry);
1009 if (sm_supported(iommu))
1010 addr |= DMA_RTADDR_SMT;
1011
1012 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1013 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1014
1015 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1016
1017 /* Make sure hardware complete it */
1018 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1019 readl, (sts & DMA_GSTS_RTPS), sts);
1020
1021 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1022
1023 /*
1024 * Hardware invalidates all DMA remapping hardware translation
1025 * caches as part of SRTP flow.
1026 */
1027 if (cap_esrtps(iommu->cap))
1028 return;
1029
1030 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1031 if (sm_supported(iommu))
1032 qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
1033 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1034 }
1035
iommu_flush_write_buffer(struct intel_iommu * iommu)1036 void iommu_flush_write_buffer(struct intel_iommu *iommu)
1037 {
1038 u32 val;
1039 unsigned long flag;
1040
1041 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1042 return;
1043
1044 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1045 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1046
1047 /* Make sure hardware complete it */
1048 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1049 readl, (!(val & DMA_GSTS_WBFS)), val);
1050
1051 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1052 }
1053
1054 /* return value determine if we need a write buffer flush */
__iommu_flush_context(struct intel_iommu * iommu,u16 did,u16 source_id,u8 function_mask,u64 type)1055 static void __iommu_flush_context(struct intel_iommu *iommu,
1056 u16 did, u16 source_id, u8 function_mask,
1057 u64 type)
1058 {
1059 u64 val = 0;
1060 unsigned long flag;
1061
1062 switch (type) {
1063 case DMA_CCMD_GLOBAL_INVL:
1064 val = DMA_CCMD_GLOBAL_INVL;
1065 break;
1066 case DMA_CCMD_DOMAIN_INVL:
1067 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1068 break;
1069 case DMA_CCMD_DEVICE_INVL:
1070 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1071 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1072 break;
1073 default:
1074 pr_warn("%s: Unexpected context-cache invalidation type 0x%llx\n",
1075 iommu->name, type);
1076 return;
1077 }
1078 val |= DMA_CCMD_ICC;
1079
1080 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1081 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1082
1083 /* Make sure hardware complete it */
1084 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1085 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1086
1087 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1088 }
1089
__iommu_flush_iotlb(struct intel_iommu * iommu,u16 did,u64 addr,unsigned int size_order,u64 type)1090 void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
1091 unsigned int size_order, u64 type)
1092 {
1093 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1094 u64 val = 0, val_iva = 0;
1095 unsigned long flag;
1096
1097 switch (type) {
1098 case DMA_TLB_GLOBAL_FLUSH:
1099 /* global flush doesn't need set IVA_REG */
1100 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1101 break;
1102 case DMA_TLB_DSI_FLUSH:
1103 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1104 break;
1105 case DMA_TLB_PSI_FLUSH:
1106 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1107 /* IH bit is passed in as part of address */
1108 val_iva = size_order | addr;
1109 break;
1110 default:
1111 pr_warn("%s: Unexpected iotlb invalidation type 0x%llx\n",
1112 iommu->name, type);
1113 return;
1114 }
1115
1116 if (cap_write_drain(iommu->cap))
1117 val |= DMA_TLB_WRITE_DRAIN;
1118
1119 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1120 /* Note: Only uses first TLB reg currently */
1121 if (val_iva)
1122 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1123 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1124
1125 /* Make sure hardware complete it */
1126 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1127 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1128
1129 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1130
1131 /* check IOTLB invalidation granularity */
1132 if (DMA_TLB_IAIG(val) == 0)
1133 pr_err("Flush IOTLB failed\n");
1134 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1135 pr_debug("TLB flush request %Lx, actual %Lx\n",
1136 (unsigned long long)DMA_TLB_IIRG(type),
1137 (unsigned long long)DMA_TLB_IAIG(val));
1138 }
1139
1140 static struct device_domain_info *
domain_lookup_dev_info(struct dmar_domain * domain,struct intel_iommu * iommu,u8 bus,u8 devfn)1141 domain_lookup_dev_info(struct dmar_domain *domain,
1142 struct intel_iommu *iommu, u8 bus, u8 devfn)
1143 {
1144 struct device_domain_info *info;
1145 unsigned long flags;
1146
1147 spin_lock_irqsave(&domain->lock, flags);
1148 list_for_each_entry(info, &domain->devices, link) {
1149 if (info->iommu == iommu && info->bus == bus &&
1150 info->devfn == devfn) {
1151 spin_unlock_irqrestore(&domain->lock, flags);
1152 return info;
1153 }
1154 }
1155 spin_unlock_irqrestore(&domain->lock, flags);
1156
1157 return NULL;
1158 }
1159
1160 /*
1161 * The extra devTLB flush quirk impacts those QAT devices with PCI device
1162 * IDs ranging from 0x4940 to 0x4943. It is exempted from risky_device()
1163 * check because it applies only to the built-in QAT devices and it doesn't
1164 * grant additional privileges.
1165 */
1166 #define BUGGY_QAT_DEVID_MASK 0x4940
dev_needs_extra_dtlb_flush(struct pci_dev * pdev)1167 static bool dev_needs_extra_dtlb_flush(struct pci_dev *pdev)
1168 {
1169 if (pdev->vendor != PCI_VENDOR_ID_INTEL)
1170 return false;
1171
1172 if ((pdev->device & 0xfffc) != BUGGY_QAT_DEVID_MASK)
1173 return false;
1174
1175 return true;
1176 }
1177
iommu_enable_pci_ats(struct device_domain_info * info)1178 static void iommu_enable_pci_ats(struct device_domain_info *info)
1179 {
1180 struct pci_dev *pdev;
1181
1182 if (!info->ats_supported)
1183 return;
1184
1185 pdev = to_pci_dev(info->dev);
1186 if (!pci_ats_page_aligned(pdev))
1187 return;
1188
1189 if (!pci_enable_ats(pdev, VTD_PAGE_SHIFT))
1190 info->ats_enabled = 1;
1191 }
1192
iommu_disable_pci_ats(struct device_domain_info * info)1193 static void iommu_disable_pci_ats(struct device_domain_info *info)
1194 {
1195 if (!info->ats_enabled)
1196 return;
1197
1198 pci_disable_ats(to_pci_dev(info->dev));
1199 info->ats_enabled = 0;
1200 }
1201
iommu_enable_pci_pri(struct device_domain_info * info)1202 static void iommu_enable_pci_pri(struct device_domain_info *info)
1203 {
1204 struct pci_dev *pdev;
1205
1206 if (!info->ats_enabled || !info->pri_supported)
1207 return;
1208
1209 pdev = to_pci_dev(info->dev);
1210 /* PASID is required in PRG Response Message. */
1211 if (info->pasid_enabled && !pci_prg_resp_pasid_required(pdev))
1212 return;
1213
1214 if (pci_reset_pri(pdev))
1215 return;
1216
1217 if (!pci_enable_pri(pdev, PRQ_DEPTH))
1218 info->pri_enabled = 1;
1219 }
1220
iommu_disable_pci_pri(struct device_domain_info * info)1221 static void iommu_disable_pci_pri(struct device_domain_info *info)
1222 {
1223 if (!info->pri_enabled)
1224 return;
1225
1226 if (WARN_ON(info->iopf_refcount))
1227 iopf_queue_remove_device(info->iommu->iopf_queue, info->dev);
1228
1229 pci_disable_pri(to_pci_dev(info->dev));
1230 info->pri_enabled = 0;
1231 }
1232
intel_flush_iotlb_all(struct iommu_domain * domain)1233 static void intel_flush_iotlb_all(struct iommu_domain *domain)
1234 {
1235 cache_tag_flush_all(to_dmar_domain(domain));
1236 }
1237
iommu_disable_protect_mem_regions(struct intel_iommu * iommu)1238 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1239 {
1240 u32 pmen;
1241 unsigned long flags;
1242
1243 if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1244 return;
1245
1246 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1247 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1248 pmen &= ~DMA_PMEN_EPM;
1249 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1250
1251 /* wait for the protected region status bit to clear */
1252 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1253 readl, !(pmen & DMA_PMEN_PRS), pmen);
1254
1255 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1256 }
1257
iommu_enable_translation(struct intel_iommu * iommu)1258 static void iommu_enable_translation(struct intel_iommu *iommu)
1259 {
1260 u32 sts;
1261 unsigned long flags;
1262
1263 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1264 iommu->gcmd |= DMA_GCMD_TE;
1265 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1266
1267 /* Make sure hardware complete it */
1268 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1269 readl, (sts & DMA_GSTS_TES), sts);
1270
1271 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1272 }
1273
iommu_disable_translation(struct intel_iommu * iommu)1274 static void iommu_disable_translation(struct intel_iommu *iommu)
1275 {
1276 u32 sts;
1277 unsigned long flag;
1278
1279 if (iommu_skip_te_disable && iommu->drhd->gfx_dedicated &&
1280 (cap_read_drain(iommu->cap) || cap_write_drain(iommu->cap)))
1281 return;
1282
1283 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1284 iommu->gcmd &= ~DMA_GCMD_TE;
1285 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1286
1287 /* Make sure hardware complete it */
1288 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1289 readl, (!(sts & DMA_GSTS_TES)), sts);
1290
1291 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1292 }
1293
disable_dmar_iommu(struct intel_iommu * iommu)1294 static void disable_dmar_iommu(struct intel_iommu *iommu)
1295 {
1296 /*
1297 * All iommu domains must have been detached from the devices,
1298 * hence there should be no domain IDs in use.
1299 */
1300 if (WARN_ON(!ida_is_empty(&iommu->domain_ida)))
1301 return;
1302
1303 if (iommu->gcmd & DMA_GCMD_TE)
1304 iommu_disable_translation(iommu);
1305 }
1306
free_dmar_iommu(struct intel_iommu * iommu)1307 static void free_dmar_iommu(struct intel_iommu *iommu)
1308 {
1309 if (iommu->copied_tables) {
1310 bitmap_free(iommu->copied_tables);
1311 iommu->copied_tables = NULL;
1312 }
1313
1314 /* free context mapping */
1315 free_context_table(iommu);
1316
1317 if (ecap_prs(iommu->ecap))
1318 intel_iommu_finish_prq(iommu);
1319 }
1320
1321 /*
1322 * Check and return whether first level is used by default for
1323 * DMA translation.
1324 */
first_level_by_default(struct intel_iommu * iommu)1325 static bool first_level_by_default(struct intel_iommu *iommu)
1326 {
1327 /* Only SL is available in legacy mode */
1328 if (!sm_supported(iommu))
1329 return false;
1330
1331 /* Only level (either FL or SL) is available, just use it */
1332 if (ecap_flts(iommu->ecap) ^ ecap_slts(iommu->ecap))
1333 return ecap_flts(iommu->ecap);
1334
1335 return true;
1336 }
1337
domain_attach_iommu(struct dmar_domain * domain,struct intel_iommu * iommu)1338 int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
1339 {
1340 struct iommu_domain_info *info, *curr;
1341 int num, ret = -ENOSPC;
1342
1343 if (domain->domain.type == IOMMU_DOMAIN_SVA)
1344 return 0;
1345
1346 info = kzalloc(sizeof(*info), GFP_KERNEL);
1347 if (!info)
1348 return -ENOMEM;
1349
1350 guard(mutex)(&iommu->did_lock);
1351 curr = xa_load(&domain->iommu_array, iommu->seq_id);
1352 if (curr) {
1353 curr->refcnt++;
1354 kfree(info);
1355 return 0;
1356 }
1357
1358 num = ida_alloc_range(&iommu->domain_ida, IDA_START_DID,
1359 cap_ndoms(iommu->cap) - 1, GFP_KERNEL);
1360 if (num < 0) {
1361 pr_err("%s: No free domain ids\n", iommu->name);
1362 goto err_unlock;
1363 }
1364
1365 info->refcnt = 1;
1366 info->did = num;
1367 info->iommu = iommu;
1368 curr = xa_cmpxchg(&domain->iommu_array, iommu->seq_id,
1369 NULL, info, GFP_KERNEL);
1370 if (curr) {
1371 ret = xa_err(curr) ? : -EBUSY;
1372 goto err_clear;
1373 }
1374
1375 return 0;
1376
1377 err_clear:
1378 ida_free(&iommu->domain_ida, info->did);
1379 err_unlock:
1380 kfree(info);
1381 return ret;
1382 }
1383
domain_detach_iommu(struct dmar_domain * domain,struct intel_iommu * iommu)1384 void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
1385 {
1386 struct iommu_domain_info *info;
1387
1388 if (domain->domain.type == IOMMU_DOMAIN_SVA)
1389 return;
1390
1391 guard(mutex)(&iommu->did_lock);
1392 info = xa_load(&domain->iommu_array, iommu->seq_id);
1393 if (--info->refcnt == 0) {
1394 ida_free(&iommu->domain_ida, info->did);
1395 xa_erase(&domain->iommu_array, iommu->seq_id);
1396 kfree(info);
1397 }
1398 }
1399
1400 /*
1401 * For kdump cases, old valid entries may be cached due to the
1402 * in-flight DMA and copied pgtable, but there is no unmapping
1403 * behaviour for them, thus we need an explicit cache flush for
1404 * the newly-mapped device. For kdump, at this point, the device
1405 * is supposed to finish reset at its driver probe stage, so no
1406 * in-flight DMA will exist, and we don't need to worry anymore
1407 * hereafter.
1408 */
copied_context_tear_down(struct intel_iommu * iommu,struct context_entry * context,u8 bus,u8 devfn)1409 static void copied_context_tear_down(struct intel_iommu *iommu,
1410 struct context_entry *context,
1411 u8 bus, u8 devfn)
1412 {
1413 u16 did_old;
1414
1415 if (!context_copied(iommu, bus, devfn))
1416 return;
1417
1418 assert_spin_locked(&iommu->lock);
1419
1420 did_old = context_domain_id(context);
1421 context_clear_entry(context);
1422
1423 if (did_old < cap_ndoms(iommu->cap)) {
1424 iommu->flush.flush_context(iommu, did_old,
1425 PCI_DEVID(bus, devfn),
1426 DMA_CCMD_MASK_NOBIT,
1427 DMA_CCMD_DEVICE_INVL);
1428 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
1429 DMA_TLB_DSI_FLUSH);
1430 }
1431
1432 clear_context_copied(iommu, bus, devfn);
1433 }
1434
1435 /*
1436 * It's a non-present to present mapping. If hardware doesn't cache
1437 * non-present entry we only need to flush the write-buffer. If the
1438 * _does_ cache non-present entries, then it does so in the special
1439 * domain #0, which we have to flush:
1440 */
context_present_cache_flush(struct intel_iommu * iommu,u16 did,u8 bus,u8 devfn)1441 static void context_present_cache_flush(struct intel_iommu *iommu, u16 did,
1442 u8 bus, u8 devfn)
1443 {
1444 if (cap_caching_mode(iommu->cap)) {
1445 iommu->flush.flush_context(iommu, 0,
1446 PCI_DEVID(bus, devfn),
1447 DMA_CCMD_MASK_NOBIT,
1448 DMA_CCMD_DEVICE_INVL);
1449 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
1450 } else {
1451 iommu_flush_write_buffer(iommu);
1452 }
1453 }
1454
domain_context_mapping_one(struct dmar_domain * domain,struct intel_iommu * iommu,u8 bus,u8 devfn)1455 static int domain_context_mapping_one(struct dmar_domain *domain,
1456 struct intel_iommu *iommu,
1457 u8 bus, u8 devfn)
1458 {
1459 struct device_domain_info *info =
1460 domain_lookup_dev_info(domain, iommu, bus, devfn);
1461 u16 did = domain_id_iommu(domain, iommu);
1462 int translation = CONTEXT_TT_MULTI_LEVEL;
1463 struct dma_pte *pgd = domain->pgd;
1464 struct context_entry *context;
1465 int ret;
1466
1467 if (WARN_ON(!intel_domain_is_ss_paging(domain)))
1468 return -EINVAL;
1469
1470 pr_debug("Set context mapping for %02x:%02x.%d\n",
1471 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1472
1473 spin_lock(&iommu->lock);
1474 ret = -ENOMEM;
1475 context = iommu_context_addr(iommu, bus, devfn, 1);
1476 if (!context)
1477 goto out_unlock;
1478
1479 ret = 0;
1480 if (context_present(context) && !context_copied(iommu, bus, devfn))
1481 goto out_unlock;
1482
1483 copied_context_tear_down(iommu, context, bus, devfn);
1484 context_clear_entry(context);
1485 context_set_domain_id(context, did);
1486
1487 if (info && info->ats_supported)
1488 translation = CONTEXT_TT_DEV_IOTLB;
1489 else
1490 translation = CONTEXT_TT_MULTI_LEVEL;
1491
1492 context_set_address_root(context, virt_to_phys(pgd));
1493 context_set_address_width(context, domain->agaw);
1494 context_set_translation_type(context, translation);
1495 context_set_fault_enable(context);
1496 context_set_present(context);
1497 if (!ecap_coherent(iommu->ecap))
1498 clflush_cache_range(context, sizeof(*context));
1499 context_present_cache_flush(iommu, did, bus, devfn);
1500 ret = 0;
1501
1502 out_unlock:
1503 spin_unlock(&iommu->lock);
1504
1505 return ret;
1506 }
1507
domain_context_mapping_cb(struct pci_dev * pdev,u16 alias,void * opaque)1508 static int domain_context_mapping_cb(struct pci_dev *pdev,
1509 u16 alias, void *opaque)
1510 {
1511 struct device_domain_info *info = dev_iommu_priv_get(&pdev->dev);
1512 struct intel_iommu *iommu = info->iommu;
1513 struct dmar_domain *domain = opaque;
1514
1515 return domain_context_mapping_one(domain, iommu,
1516 PCI_BUS_NUM(alias), alias & 0xff);
1517 }
1518
1519 static int
domain_context_mapping(struct dmar_domain * domain,struct device * dev)1520 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
1521 {
1522 struct device_domain_info *info = dev_iommu_priv_get(dev);
1523 struct intel_iommu *iommu = info->iommu;
1524 u8 bus = info->bus, devfn = info->devfn;
1525 int ret;
1526
1527 if (!dev_is_pci(dev))
1528 return domain_context_mapping_one(domain, iommu, bus, devfn);
1529
1530 ret = pci_for_each_dma_alias(to_pci_dev(dev),
1531 domain_context_mapping_cb, domain);
1532 if (ret)
1533 return ret;
1534
1535 iommu_enable_pci_ats(info);
1536
1537 return 0;
1538 }
1539
1540 /* Return largest possible superpage level for a given mapping */
hardware_largepage_caps(struct dmar_domain * domain,unsigned long iov_pfn,unsigned long phy_pfn,unsigned long pages)1541 static int hardware_largepage_caps(struct dmar_domain *domain, unsigned long iov_pfn,
1542 unsigned long phy_pfn, unsigned long pages)
1543 {
1544 int support, level = 1;
1545 unsigned long pfnmerge;
1546
1547 support = domain->iommu_superpage;
1548
1549 /* To use a large page, the virtual *and* physical addresses
1550 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1551 of them will mean we have to use smaller pages. So just
1552 merge them and check both at once. */
1553 pfnmerge = iov_pfn | phy_pfn;
1554
1555 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1556 pages >>= VTD_STRIDE_SHIFT;
1557 if (!pages)
1558 break;
1559 pfnmerge >>= VTD_STRIDE_SHIFT;
1560 level++;
1561 support--;
1562 }
1563 return level;
1564 }
1565
1566 /*
1567 * Ensure that old small page tables are removed to make room for superpage(s).
1568 * We're going to add new large pages, so make sure we don't remove their parent
1569 * tables. The IOTLB/devTLBs should be flushed if any PDE/PTEs are cleared.
1570 */
switch_to_super_page(struct dmar_domain * domain,unsigned long start_pfn,unsigned long end_pfn,int level)1571 static void switch_to_super_page(struct dmar_domain *domain,
1572 unsigned long start_pfn,
1573 unsigned long end_pfn, int level)
1574 {
1575 unsigned long lvl_pages = lvl_to_nr_pages(level);
1576 struct dma_pte *pte = NULL;
1577
1578 while (start_pfn <= end_pfn) {
1579 if (!pte)
1580 pte = pfn_to_dma_pte(domain, start_pfn, &level,
1581 GFP_ATOMIC);
1582
1583 if (dma_pte_present(pte)) {
1584 dma_pte_free_pagetable(domain, start_pfn,
1585 start_pfn + lvl_pages - 1,
1586 level + 1);
1587
1588 cache_tag_flush_range(domain, start_pfn << VTD_PAGE_SHIFT,
1589 end_pfn << VTD_PAGE_SHIFT, 0);
1590 }
1591
1592 pte++;
1593 start_pfn += lvl_pages;
1594 if (first_pte_in_page(pte))
1595 pte = NULL;
1596 }
1597 }
1598
1599 static int
__domain_mapping(struct dmar_domain * domain,unsigned long iov_pfn,unsigned long phys_pfn,unsigned long nr_pages,int prot,gfp_t gfp)1600 __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1601 unsigned long phys_pfn, unsigned long nr_pages, int prot,
1602 gfp_t gfp)
1603 {
1604 struct dma_pte *first_pte = NULL, *pte = NULL;
1605 unsigned int largepage_lvl = 0;
1606 unsigned long lvl_pages = 0;
1607 phys_addr_t pteval;
1608 u64 attr;
1609
1610 if (unlikely(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1)))
1611 return -EINVAL;
1612
1613 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1614 return -EINVAL;
1615
1616 if (!(prot & DMA_PTE_WRITE) && domain->nested_parent) {
1617 pr_err_ratelimited("Read-only mapping is disallowed on the domain which serves as the parent in a nested configuration, due to HW errata (ERRATA_772415_SPR17)\n");
1618 return -EINVAL;
1619 }
1620
1621 attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
1622 if (domain->use_first_level) {
1623 attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS;
1624 if (prot & DMA_PTE_WRITE)
1625 attr |= DMA_FL_PTE_DIRTY;
1626 }
1627
1628 domain->has_mappings = true;
1629
1630 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
1631
1632 while (nr_pages > 0) {
1633 uint64_t tmp;
1634
1635 if (!pte) {
1636 largepage_lvl = hardware_largepage_caps(domain, iov_pfn,
1637 phys_pfn, nr_pages);
1638
1639 pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl,
1640 gfp);
1641 if (!pte)
1642 return -ENOMEM;
1643 first_pte = pte;
1644
1645 lvl_pages = lvl_to_nr_pages(largepage_lvl);
1646
1647 /* It is large page*/
1648 if (largepage_lvl > 1) {
1649 unsigned long end_pfn;
1650 unsigned long pages_to_remove;
1651
1652 pteval |= DMA_PTE_LARGE_PAGE;
1653 pages_to_remove = min_t(unsigned long, nr_pages,
1654 nr_pte_to_next_page(pte) * lvl_pages);
1655 end_pfn = iov_pfn + pages_to_remove - 1;
1656 switch_to_super_page(domain, iov_pfn, end_pfn, largepage_lvl);
1657 } else {
1658 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
1659 }
1660
1661 }
1662 /* We don't need lock here, nobody else
1663 * touches the iova range
1664 */
1665 tmp = 0ULL;
1666 if (!try_cmpxchg64_local(&pte->val, &tmp, pteval)) {
1667 static int dumps = 5;
1668 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1669 iov_pfn, tmp, (unsigned long long)pteval);
1670 if (dumps) {
1671 dumps--;
1672 debug_dma_dump_mappings(NULL);
1673 }
1674 WARN_ON(1);
1675 }
1676
1677 nr_pages -= lvl_pages;
1678 iov_pfn += lvl_pages;
1679 phys_pfn += lvl_pages;
1680 pteval += lvl_pages * VTD_PAGE_SIZE;
1681
1682 /* If the next PTE would be the first in a new page, then we
1683 * need to flush the cache on the entries we've just written.
1684 * And then we'll need to recalculate 'pte', so clear it and
1685 * let it get set again in the if (!pte) block above.
1686 *
1687 * If we're done (!nr_pages) we need to flush the cache too.
1688 *
1689 * Also if we've been setting superpages, we may need to
1690 * recalculate 'pte' and switch back to smaller pages for the
1691 * end of the mapping, if the trailing size is not enough to
1692 * use another superpage (i.e. nr_pages < lvl_pages).
1693 */
1694 pte++;
1695 if (!nr_pages || first_pte_in_page(pte) ||
1696 (largepage_lvl > 1 && nr_pages < lvl_pages)) {
1697 domain_flush_cache(domain, first_pte,
1698 (void *)pte - (void *)first_pte);
1699 pte = NULL;
1700 }
1701 }
1702
1703 return 0;
1704 }
1705
domain_context_clear_one(struct device_domain_info * info,u8 bus,u8 devfn)1706 static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 devfn)
1707 {
1708 struct intel_iommu *iommu = info->iommu;
1709 struct context_entry *context;
1710 u16 did;
1711
1712 spin_lock(&iommu->lock);
1713 context = iommu_context_addr(iommu, bus, devfn, 0);
1714 if (!context) {
1715 spin_unlock(&iommu->lock);
1716 return;
1717 }
1718
1719 did = context_domain_id(context);
1720 context_clear_entry(context);
1721 __iommu_flush_cache(iommu, context, sizeof(*context));
1722 spin_unlock(&iommu->lock);
1723 intel_context_flush_no_pasid(info, context, did);
1724 }
1725
__domain_setup_first_level(struct intel_iommu * iommu,struct device * dev,ioasid_t pasid,u16 did,phys_addr_t fsptptr,int flags,struct iommu_domain * old)1726 int __domain_setup_first_level(struct intel_iommu *iommu, struct device *dev,
1727 ioasid_t pasid, u16 did, phys_addr_t fsptptr,
1728 int flags, struct iommu_domain *old)
1729 {
1730 if (!old)
1731 return intel_pasid_setup_first_level(iommu, dev, fsptptr, pasid,
1732 did, flags);
1733 return intel_pasid_replace_first_level(iommu, dev, fsptptr, pasid, did,
1734 iommu_domain_did(old, iommu),
1735 flags);
1736 }
1737
domain_setup_second_level(struct intel_iommu * iommu,struct dmar_domain * domain,struct device * dev,ioasid_t pasid,struct iommu_domain * old)1738 static int domain_setup_second_level(struct intel_iommu *iommu,
1739 struct dmar_domain *domain,
1740 struct device *dev, ioasid_t pasid,
1741 struct iommu_domain *old)
1742 {
1743 if (!old)
1744 return intel_pasid_setup_second_level(iommu, domain,
1745 dev, pasid);
1746 return intel_pasid_replace_second_level(iommu, domain, dev,
1747 iommu_domain_did(old, iommu),
1748 pasid);
1749 }
1750
domain_setup_passthrough(struct intel_iommu * iommu,struct device * dev,ioasid_t pasid,struct iommu_domain * old)1751 static int domain_setup_passthrough(struct intel_iommu *iommu,
1752 struct device *dev, ioasid_t pasid,
1753 struct iommu_domain *old)
1754 {
1755 if (!old)
1756 return intel_pasid_setup_pass_through(iommu, dev, pasid);
1757 return intel_pasid_replace_pass_through(iommu, dev,
1758 iommu_domain_did(old, iommu),
1759 pasid);
1760 }
1761
domain_setup_first_level(struct intel_iommu * iommu,struct dmar_domain * domain,struct device * dev,u32 pasid,struct iommu_domain * old)1762 static int domain_setup_first_level(struct intel_iommu *iommu,
1763 struct dmar_domain *domain,
1764 struct device *dev,
1765 u32 pasid, struct iommu_domain *old)
1766 {
1767 struct dma_pte *pgd = domain->pgd;
1768 int level, flags = 0;
1769
1770 level = agaw_to_level(domain->agaw);
1771 if (level != 4 && level != 5)
1772 return -EINVAL;
1773
1774 if (level == 5)
1775 flags |= PASID_FLAG_FL5LP;
1776
1777 if (domain->force_snooping)
1778 flags |= PASID_FLAG_PAGE_SNOOP;
1779
1780 return __domain_setup_first_level(iommu, dev, pasid,
1781 domain_id_iommu(domain, iommu),
1782 __pa(pgd), flags, old);
1783 }
1784
dmar_domain_attach_device(struct dmar_domain * domain,struct device * dev)1785 static int dmar_domain_attach_device(struct dmar_domain *domain,
1786 struct device *dev)
1787 {
1788 struct device_domain_info *info = dev_iommu_priv_get(dev);
1789 struct intel_iommu *iommu = info->iommu;
1790 unsigned long flags;
1791 int ret;
1792
1793 ret = domain_attach_iommu(domain, iommu);
1794 if (ret)
1795 return ret;
1796
1797 info->domain = domain;
1798 info->domain_attached = true;
1799 spin_lock_irqsave(&domain->lock, flags);
1800 list_add(&info->link, &domain->devices);
1801 spin_unlock_irqrestore(&domain->lock, flags);
1802
1803 if (dev_is_real_dma_subdevice(dev))
1804 return 0;
1805
1806 if (!sm_supported(iommu))
1807 ret = domain_context_mapping(domain, dev);
1808 else if (intel_domain_is_fs_paging(domain))
1809 ret = domain_setup_first_level(iommu, domain, dev,
1810 IOMMU_NO_PASID, NULL);
1811 else if (intel_domain_is_ss_paging(domain))
1812 ret = domain_setup_second_level(iommu, domain, dev,
1813 IOMMU_NO_PASID, NULL);
1814 else if (WARN_ON(true))
1815 ret = -EINVAL;
1816
1817 if (ret)
1818 goto out_block_translation;
1819
1820 ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID);
1821 if (ret)
1822 goto out_block_translation;
1823
1824 return 0;
1825
1826 out_block_translation:
1827 device_block_translation(dev);
1828 return ret;
1829 }
1830
1831 /**
1832 * device_rmrr_is_relaxable - Test whether the RMRR of this device
1833 * is relaxable (ie. is allowed to be not enforced under some conditions)
1834 * @dev: device handle
1835 *
1836 * We assume that PCI USB devices with RMRRs have them largely
1837 * for historical reasons and that the RMRR space is not actively used post
1838 * boot. This exclusion may change if vendors begin to abuse it.
1839 *
1840 * The same exception is made for graphics devices, with the requirement that
1841 * any use of the RMRR regions will be torn down before assigning the device
1842 * to a guest.
1843 *
1844 * Return: true if the RMRR is relaxable, false otherwise
1845 */
device_rmrr_is_relaxable(struct device * dev)1846 static bool device_rmrr_is_relaxable(struct device *dev)
1847 {
1848 struct pci_dev *pdev;
1849
1850 if (!dev_is_pci(dev))
1851 return false;
1852
1853 pdev = to_pci_dev(dev);
1854 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
1855 return true;
1856 else
1857 return false;
1858 }
1859
device_def_domain_type(struct device * dev)1860 static int device_def_domain_type(struct device *dev)
1861 {
1862 struct device_domain_info *info = dev_iommu_priv_get(dev);
1863 struct intel_iommu *iommu = info->iommu;
1864
1865 /*
1866 * Hardware does not support the passthrough translation mode.
1867 * Always use a dynamaic mapping domain.
1868 */
1869 if (!ecap_pass_through(iommu->ecap))
1870 return IOMMU_DOMAIN_DMA;
1871
1872 if (dev_is_pci(dev)) {
1873 struct pci_dev *pdev = to_pci_dev(dev);
1874
1875 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
1876 return IOMMU_DOMAIN_IDENTITY;
1877 }
1878
1879 return 0;
1880 }
1881
intel_iommu_init_qi(struct intel_iommu * iommu)1882 static void intel_iommu_init_qi(struct intel_iommu *iommu)
1883 {
1884 /*
1885 * Start from the sane iommu hardware state.
1886 * If the queued invalidation is already initialized by us
1887 * (for example, while enabling interrupt-remapping) then
1888 * we got the things already rolling from a sane state.
1889 */
1890 if (!iommu->qi) {
1891 /*
1892 * Clear any previous faults.
1893 */
1894 dmar_fault(-1, iommu);
1895 /*
1896 * Disable queued invalidation if supported and already enabled
1897 * before OS handover.
1898 */
1899 dmar_disable_qi(iommu);
1900 }
1901
1902 if (dmar_enable_qi(iommu)) {
1903 /*
1904 * Queued Invalidate not enabled, use Register Based Invalidate
1905 */
1906 iommu->flush.flush_context = __iommu_flush_context;
1907 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
1908 pr_info("%s: Using Register based invalidation\n",
1909 iommu->name);
1910 } else {
1911 iommu->flush.flush_context = qi_flush_context;
1912 iommu->flush.flush_iotlb = qi_flush_iotlb;
1913 pr_info("%s: Using Queued invalidation\n", iommu->name);
1914 }
1915 }
1916
copy_context_table(struct intel_iommu * iommu,struct root_entry * old_re,struct context_entry ** tbl,int bus,bool ext)1917 static int copy_context_table(struct intel_iommu *iommu,
1918 struct root_entry *old_re,
1919 struct context_entry **tbl,
1920 int bus, bool ext)
1921 {
1922 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
1923 struct context_entry *new_ce = NULL, ce;
1924 struct context_entry *old_ce = NULL;
1925 struct root_entry re;
1926 phys_addr_t old_ce_phys;
1927
1928 tbl_idx = ext ? bus * 2 : bus;
1929 memcpy(&re, old_re, sizeof(re));
1930
1931 for (devfn = 0; devfn < 256; devfn++) {
1932 /* First calculate the correct index */
1933 idx = (ext ? devfn * 2 : devfn) % 256;
1934
1935 if (idx == 0) {
1936 /* First save what we may have and clean up */
1937 if (new_ce) {
1938 tbl[tbl_idx] = new_ce;
1939 __iommu_flush_cache(iommu, new_ce,
1940 VTD_PAGE_SIZE);
1941 pos = 1;
1942 }
1943
1944 if (old_ce)
1945 memunmap(old_ce);
1946
1947 ret = 0;
1948 if (devfn < 0x80)
1949 old_ce_phys = root_entry_lctp(&re);
1950 else
1951 old_ce_phys = root_entry_uctp(&re);
1952
1953 if (!old_ce_phys) {
1954 if (ext && devfn == 0) {
1955 /* No LCTP, try UCTP */
1956 devfn = 0x7f;
1957 continue;
1958 } else {
1959 goto out;
1960 }
1961 }
1962
1963 ret = -ENOMEM;
1964 old_ce = memremap(old_ce_phys, PAGE_SIZE,
1965 MEMREMAP_WB);
1966 if (!old_ce)
1967 goto out;
1968
1969 new_ce = iommu_alloc_pages_node_sz(iommu->node,
1970 GFP_KERNEL, SZ_4K);
1971 if (!new_ce)
1972 goto out_unmap;
1973
1974 ret = 0;
1975 }
1976
1977 /* Now copy the context entry */
1978 memcpy(&ce, old_ce + idx, sizeof(ce));
1979
1980 if (!context_present(&ce))
1981 continue;
1982
1983 did = context_domain_id(&ce);
1984 if (did >= 0 && did < cap_ndoms(iommu->cap))
1985 ida_alloc_range(&iommu->domain_ida, did, did, GFP_KERNEL);
1986
1987 set_context_copied(iommu, bus, devfn);
1988 new_ce[idx] = ce;
1989 }
1990
1991 tbl[tbl_idx + pos] = new_ce;
1992
1993 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
1994
1995 out_unmap:
1996 memunmap(old_ce);
1997
1998 out:
1999 return ret;
2000 }
2001
copy_translation_tables(struct intel_iommu * iommu)2002 static int copy_translation_tables(struct intel_iommu *iommu)
2003 {
2004 struct context_entry **ctxt_tbls;
2005 struct root_entry *old_rt;
2006 phys_addr_t old_rt_phys;
2007 int ctxt_table_entries;
2008 u64 rtaddr_reg;
2009 int bus, ret;
2010 bool new_ext, ext;
2011
2012 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
2013 ext = !!(rtaddr_reg & DMA_RTADDR_SMT);
2014 new_ext = !!sm_supported(iommu);
2015
2016 /*
2017 * The RTT bit can only be changed when translation is disabled,
2018 * but disabling translation means to open a window for data
2019 * corruption. So bail out and don't copy anything if we would
2020 * have to change the bit.
2021 */
2022 if (new_ext != ext)
2023 return -EINVAL;
2024
2025 iommu->copied_tables = bitmap_zalloc(BIT_ULL(16), GFP_KERNEL);
2026 if (!iommu->copied_tables)
2027 return -ENOMEM;
2028
2029 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
2030 if (!old_rt_phys)
2031 return -EINVAL;
2032
2033 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
2034 if (!old_rt)
2035 return -ENOMEM;
2036
2037 /* This is too big for the stack - allocate it from slab */
2038 ctxt_table_entries = ext ? 512 : 256;
2039 ret = -ENOMEM;
2040 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
2041 if (!ctxt_tbls)
2042 goto out_unmap;
2043
2044 for (bus = 0; bus < 256; bus++) {
2045 ret = copy_context_table(iommu, &old_rt[bus],
2046 ctxt_tbls, bus, ext);
2047 if (ret) {
2048 pr_err("%s: Failed to copy context table for bus %d\n",
2049 iommu->name, bus);
2050 continue;
2051 }
2052 }
2053
2054 spin_lock(&iommu->lock);
2055
2056 /* Context tables are copied, now write them to the root_entry table */
2057 for (bus = 0; bus < 256; bus++) {
2058 int idx = ext ? bus * 2 : bus;
2059 u64 val;
2060
2061 if (ctxt_tbls[idx]) {
2062 val = virt_to_phys(ctxt_tbls[idx]) | 1;
2063 iommu->root_entry[bus].lo = val;
2064 }
2065
2066 if (!ext || !ctxt_tbls[idx + 1])
2067 continue;
2068
2069 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
2070 iommu->root_entry[bus].hi = val;
2071 }
2072
2073 spin_unlock(&iommu->lock);
2074
2075 kfree(ctxt_tbls);
2076
2077 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
2078
2079 ret = 0;
2080
2081 out_unmap:
2082 memunmap(old_rt);
2083
2084 return ret;
2085 }
2086
init_dmars(void)2087 static int __init init_dmars(void)
2088 {
2089 struct dmar_drhd_unit *drhd;
2090 struct intel_iommu *iommu;
2091 int ret;
2092
2093 for_each_iommu(iommu, drhd) {
2094 if (drhd->ignored) {
2095 iommu_disable_translation(iommu);
2096 continue;
2097 }
2098
2099 /*
2100 * Find the max pasid size of all IOMMU's in the system.
2101 * We need to ensure the system pasid table is no bigger
2102 * than the smallest supported.
2103 */
2104 if (pasid_supported(iommu)) {
2105 u32 temp = 2 << ecap_pss(iommu->ecap);
2106
2107 intel_pasid_max_id = min_t(u32, temp,
2108 intel_pasid_max_id);
2109 }
2110
2111 intel_iommu_init_qi(iommu);
2112 init_translation_status(iommu);
2113
2114 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
2115 iommu_disable_translation(iommu);
2116 clear_translation_pre_enabled(iommu);
2117 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
2118 iommu->name);
2119 }
2120
2121 /*
2122 * TBD:
2123 * we could share the same root & context tables
2124 * among all IOMMU's. Need to Split it later.
2125 */
2126 ret = iommu_alloc_root_entry(iommu);
2127 if (ret)
2128 goto free_iommu;
2129
2130 if (translation_pre_enabled(iommu)) {
2131 pr_info("Translation already enabled - trying to copy translation structures\n");
2132
2133 ret = copy_translation_tables(iommu);
2134 if (ret) {
2135 /*
2136 * We found the IOMMU with translation
2137 * enabled - but failed to copy over the
2138 * old root-entry table. Try to proceed
2139 * by disabling translation now and
2140 * allocating a clean root-entry table.
2141 * This might cause DMAR faults, but
2142 * probably the dump will still succeed.
2143 */
2144 pr_err("Failed to copy translation tables from previous kernel for %s\n",
2145 iommu->name);
2146 iommu_disable_translation(iommu);
2147 clear_translation_pre_enabled(iommu);
2148 } else {
2149 pr_info("Copied translation tables from previous kernel for %s\n",
2150 iommu->name);
2151 }
2152 }
2153
2154 intel_svm_check(iommu);
2155 }
2156
2157 /*
2158 * Now that qi is enabled on all iommus, set the root entry and flush
2159 * caches. This is required on some Intel X58 chipsets, otherwise the
2160 * flush_context function will loop forever and the boot hangs.
2161 */
2162 for_each_active_iommu(iommu, drhd) {
2163 iommu_flush_write_buffer(iommu);
2164 iommu_set_root_entry(iommu);
2165 }
2166
2167 check_tylersburg_isoch();
2168
2169 /*
2170 * for each drhd
2171 * enable fault log
2172 * global invalidate context cache
2173 * global invalidate iotlb
2174 * enable translation
2175 */
2176 for_each_iommu(iommu, drhd) {
2177 if (drhd->ignored) {
2178 /*
2179 * we always have to disable PMRs or DMA may fail on
2180 * this device
2181 */
2182 if (force_on)
2183 iommu_disable_protect_mem_regions(iommu);
2184 continue;
2185 }
2186
2187 iommu_flush_write_buffer(iommu);
2188
2189 if (ecap_prs(iommu->ecap)) {
2190 /*
2191 * Call dmar_alloc_hwirq() with dmar_global_lock held,
2192 * could cause possible lock race condition.
2193 */
2194 up_write(&dmar_global_lock);
2195 ret = intel_iommu_enable_prq(iommu);
2196 down_write(&dmar_global_lock);
2197 if (ret)
2198 goto free_iommu;
2199 }
2200
2201 ret = dmar_set_interrupt(iommu);
2202 if (ret)
2203 goto free_iommu;
2204 }
2205
2206 return 0;
2207
2208 free_iommu:
2209 for_each_active_iommu(iommu, drhd) {
2210 disable_dmar_iommu(iommu);
2211 free_dmar_iommu(iommu);
2212 }
2213
2214 return ret;
2215 }
2216
init_no_remapping_devices(void)2217 static void __init init_no_remapping_devices(void)
2218 {
2219 struct dmar_drhd_unit *drhd;
2220 struct device *dev;
2221 int i;
2222
2223 for_each_drhd_unit(drhd) {
2224 if (!drhd->include_all) {
2225 for_each_active_dev_scope(drhd->devices,
2226 drhd->devices_cnt, i, dev)
2227 break;
2228 /* ignore DMAR unit if no devices exist */
2229 if (i == drhd->devices_cnt)
2230 drhd->ignored = 1;
2231 }
2232 }
2233
2234 for_each_active_drhd_unit(drhd) {
2235 if (drhd->include_all)
2236 continue;
2237
2238 for_each_active_dev_scope(drhd->devices,
2239 drhd->devices_cnt, i, dev)
2240 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
2241 break;
2242 if (i < drhd->devices_cnt)
2243 continue;
2244
2245 /* This IOMMU has *only* gfx devices. Either bypass it or
2246 set the gfx_mapped flag, as appropriate */
2247 drhd->gfx_dedicated = 1;
2248 if (disable_igfx_iommu)
2249 drhd->ignored = 1;
2250 }
2251 }
2252
2253 #ifdef CONFIG_SUSPEND
init_iommu_hw(void)2254 static int init_iommu_hw(void)
2255 {
2256 struct dmar_drhd_unit *drhd;
2257 struct intel_iommu *iommu = NULL;
2258 int ret;
2259
2260 for_each_active_iommu(iommu, drhd) {
2261 if (iommu->qi) {
2262 ret = dmar_reenable_qi(iommu);
2263 if (ret)
2264 return ret;
2265 }
2266 }
2267
2268 for_each_iommu(iommu, drhd) {
2269 if (drhd->ignored) {
2270 /*
2271 * we always have to disable PMRs or DMA may fail on
2272 * this device
2273 */
2274 if (force_on)
2275 iommu_disable_protect_mem_regions(iommu);
2276 continue;
2277 }
2278
2279 iommu_flush_write_buffer(iommu);
2280 iommu_set_root_entry(iommu);
2281 iommu_enable_translation(iommu);
2282 iommu_disable_protect_mem_regions(iommu);
2283 }
2284
2285 return 0;
2286 }
2287
iommu_flush_all(void)2288 static void iommu_flush_all(void)
2289 {
2290 struct dmar_drhd_unit *drhd;
2291 struct intel_iommu *iommu;
2292
2293 for_each_active_iommu(iommu, drhd) {
2294 iommu->flush.flush_context(iommu, 0, 0, 0,
2295 DMA_CCMD_GLOBAL_INVL);
2296 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2297 DMA_TLB_GLOBAL_FLUSH);
2298 }
2299 }
2300
iommu_suspend(void)2301 static int iommu_suspend(void)
2302 {
2303 struct dmar_drhd_unit *drhd;
2304 struct intel_iommu *iommu = NULL;
2305 unsigned long flag;
2306
2307 iommu_flush_all();
2308
2309 for_each_active_iommu(iommu, drhd) {
2310 iommu_disable_translation(iommu);
2311
2312 raw_spin_lock_irqsave(&iommu->register_lock, flag);
2313
2314 iommu->iommu_state[SR_DMAR_FECTL_REG] =
2315 readl(iommu->reg + DMAR_FECTL_REG);
2316 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
2317 readl(iommu->reg + DMAR_FEDATA_REG);
2318 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
2319 readl(iommu->reg + DMAR_FEADDR_REG);
2320 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
2321 readl(iommu->reg + DMAR_FEUADDR_REG);
2322
2323 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
2324 }
2325 return 0;
2326 }
2327
iommu_resume(void)2328 static void iommu_resume(void)
2329 {
2330 struct dmar_drhd_unit *drhd;
2331 struct intel_iommu *iommu = NULL;
2332 unsigned long flag;
2333
2334 if (init_iommu_hw()) {
2335 if (force_on)
2336 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
2337 else
2338 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
2339 return;
2340 }
2341
2342 for_each_active_iommu(iommu, drhd) {
2343
2344 raw_spin_lock_irqsave(&iommu->register_lock, flag);
2345
2346 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
2347 iommu->reg + DMAR_FECTL_REG);
2348 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
2349 iommu->reg + DMAR_FEDATA_REG);
2350 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
2351 iommu->reg + DMAR_FEADDR_REG);
2352 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
2353 iommu->reg + DMAR_FEUADDR_REG);
2354
2355 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
2356 }
2357 }
2358
2359 static struct syscore_ops iommu_syscore_ops = {
2360 .resume = iommu_resume,
2361 .suspend = iommu_suspend,
2362 };
2363
init_iommu_pm_ops(void)2364 static void __init init_iommu_pm_ops(void)
2365 {
2366 register_syscore_ops(&iommu_syscore_ops);
2367 }
2368
2369 #else
init_iommu_pm_ops(void)2370 static inline void init_iommu_pm_ops(void) {}
2371 #endif /* CONFIG_PM */
2372
rmrr_sanity_check(struct acpi_dmar_reserved_memory * rmrr)2373 static int __init rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
2374 {
2375 if (!IS_ALIGNED(rmrr->base_address, PAGE_SIZE) ||
2376 !IS_ALIGNED(rmrr->end_address + 1, PAGE_SIZE) ||
2377 rmrr->end_address <= rmrr->base_address ||
2378 arch_rmrr_sanity_check(rmrr))
2379 return -EINVAL;
2380
2381 return 0;
2382 }
2383
dmar_parse_one_rmrr(struct acpi_dmar_header * header,void * arg)2384 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
2385 {
2386 struct acpi_dmar_reserved_memory *rmrr;
2387 struct dmar_rmrr_unit *rmrru;
2388
2389 rmrr = (struct acpi_dmar_reserved_memory *)header;
2390 if (rmrr_sanity_check(rmrr)) {
2391 pr_warn(FW_BUG
2392 "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n"
2393 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2394 rmrr->base_address, rmrr->end_address,
2395 dmi_get_system_info(DMI_BIOS_VENDOR),
2396 dmi_get_system_info(DMI_BIOS_VERSION),
2397 dmi_get_system_info(DMI_PRODUCT_VERSION));
2398 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
2399 }
2400
2401 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
2402 if (!rmrru)
2403 goto out;
2404
2405 rmrru->hdr = header;
2406
2407 rmrru->base_address = rmrr->base_address;
2408 rmrru->end_address = rmrr->end_address;
2409
2410 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
2411 ((void *)rmrr) + rmrr->header.length,
2412 &rmrru->devices_cnt);
2413 if (rmrru->devices_cnt && rmrru->devices == NULL)
2414 goto free_rmrru;
2415
2416 list_add(&rmrru->list, &dmar_rmrr_units);
2417
2418 return 0;
2419 free_rmrru:
2420 kfree(rmrru);
2421 out:
2422 return -ENOMEM;
2423 }
2424
dmar_find_atsr(struct acpi_dmar_atsr * atsr)2425 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
2426 {
2427 struct dmar_atsr_unit *atsru;
2428 struct acpi_dmar_atsr *tmp;
2429
2430 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list,
2431 dmar_rcu_check()) {
2432 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
2433 if (atsr->segment != tmp->segment)
2434 continue;
2435 if (atsr->header.length != tmp->header.length)
2436 continue;
2437 if (memcmp(atsr, tmp, atsr->header.length) == 0)
2438 return atsru;
2439 }
2440
2441 return NULL;
2442 }
2443
dmar_parse_one_atsr(struct acpi_dmar_header * hdr,void * arg)2444 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
2445 {
2446 struct acpi_dmar_atsr *atsr;
2447 struct dmar_atsr_unit *atsru;
2448
2449 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
2450 return 0;
2451
2452 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
2453 atsru = dmar_find_atsr(atsr);
2454 if (atsru)
2455 return 0;
2456
2457 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
2458 if (!atsru)
2459 return -ENOMEM;
2460
2461 /*
2462 * If memory is allocated from slab by ACPI _DSM method, we need to
2463 * copy the memory content because the memory buffer will be freed
2464 * on return.
2465 */
2466 atsru->hdr = (void *)(atsru + 1);
2467 memcpy(atsru->hdr, hdr, hdr->length);
2468 atsru->include_all = atsr->flags & 0x1;
2469 if (!atsru->include_all) {
2470 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
2471 (void *)atsr + atsr->header.length,
2472 &atsru->devices_cnt);
2473 if (atsru->devices_cnt && atsru->devices == NULL) {
2474 kfree(atsru);
2475 return -ENOMEM;
2476 }
2477 }
2478
2479 list_add_rcu(&atsru->list, &dmar_atsr_units);
2480
2481 return 0;
2482 }
2483
intel_iommu_free_atsr(struct dmar_atsr_unit * atsru)2484 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
2485 {
2486 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
2487 kfree(atsru);
2488 }
2489
dmar_release_one_atsr(struct acpi_dmar_header * hdr,void * arg)2490 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
2491 {
2492 struct acpi_dmar_atsr *atsr;
2493 struct dmar_atsr_unit *atsru;
2494
2495 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
2496 atsru = dmar_find_atsr(atsr);
2497 if (atsru) {
2498 list_del_rcu(&atsru->list);
2499 synchronize_rcu();
2500 intel_iommu_free_atsr(atsru);
2501 }
2502
2503 return 0;
2504 }
2505
dmar_check_one_atsr(struct acpi_dmar_header * hdr,void * arg)2506 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
2507 {
2508 int i;
2509 struct device *dev;
2510 struct acpi_dmar_atsr *atsr;
2511 struct dmar_atsr_unit *atsru;
2512
2513 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
2514 atsru = dmar_find_atsr(atsr);
2515 if (!atsru)
2516 return 0;
2517
2518 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
2519 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
2520 i, dev)
2521 return -EBUSY;
2522 }
2523
2524 return 0;
2525 }
2526
dmar_find_satc(struct acpi_dmar_satc * satc)2527 static struct dmar_satc_unit *dmar_find_satc(struct acpi_dmar_satc *satc)
2528 {
2529 struct dmar_satc_unit *satcu;
2530 struct acpi_dmar_satc *tmp;
2531
2532 list_for_each_entry_rcu(satcu, &dmar_satc_units, list,
2533 dmar_rcu_check()) {
2534 tmp = (struct acpi_dmar_satc *)satcu->hdr;
2535 if (satc->segment != tmp->segment)
2536 continue;
2537 if (satc->header.length != tmp->header.length)
2538 continue;
2539 if (memcmp(satc, tmp, satc->header.length) == 0)
2540 return satcu;
2541 }
2542
2543 return NULL;
2544 }
2545
dmar_parse_one_satc(struct acpi_dmar_header * hdr,void * arg)2546 int dmar_parse_one_satc(struct acpi_dmar_header *hdr, void *arg)
2547 {
2548 struct acpi_dmar_satc *satc;
2549 struct dmar_satc_unit *satcu;
2550
2551 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
2552 return 0;
2553
2554 satc = container_of(hdr, struct acpi_dmar_satc, header);
2555 satcu = dmar_find_satc(satc);
2556 if (satcu)
2557 return 0;
2558
2559 satcu = kzalloc(sizeof(*satcu) + hdr->length, GFP_KERNEL);
2560 if (!satcu)
2561 return -ENOMEM;
2562
2563 satcu->hdr = (void *)(satcu + 1);
2564 memcpy(satcu->hdr, hdr, hdr->length);
2565 satcu->atc_required = satc->flags & 0x1;
2566 satcu->devices = dmar_alloc_dev_scope((void *)(satc + 1),
2567 (void *)satc + satc->header.length,
2568 &satcu->devices_cnt);
2569 if (satcu->devices_cnt && !satcu->devices) {
2570 kfree(satcu);
2571 return -ENOMEM;
2572 }
2573 list_add_rcu(&satcu->list, &dmar_satc_units);
2574
2575 return 0;
2576 }
2577
intel_iommu_add(struct dmar_drhd_unit * dmaru)2578 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
2579 {
2580 struct intel_iommu *iommu = dmaru->iommu;
2581 int ret;
2582
2583 /*
2584 * Disable translation if already enabled prior to OS handover.
2585 */
2586 if (iommu->gcmd & DMA_GCMD_TE)
2587 iommu_disable_translation(iommu);
2588
2589 ret = iommu_alloc_root_entry(iommu);
2590 if (ret)
2591 goto out;
2592
2593 intel_svm_check(iommu);
2594
2595 if (dmaru->ignored) {
2596 /*
2597 * we always have to disable PMRs or DMA may fail on this device
2598 */
2599 if (force_on)
2600 iommu_disable_protect_mem_regions(iommu);
2601 return 0;
2602 }
2603
2604 intel_iommu_init_qi(iommu);
2605 iommu_flush_write_buffer(iommu);
2606
2607 if (ecap_prs(iommu->ecap)) {
2608 ret = intel_iommu_enable_prq(iommu);
2609 if (ret)
2610 goto disable_iommu;
2611 }
2612
2613 ret = dmar_set_interrupt(iommu);
2614 if (ret)
2615 goto disable_iommu;
2616
2617 iommu_set_root_entry(iommu);
2618 iommu_enable_translation(iommu);
2619
2620 iommu_disable_protect_mem_regions(iommu);
2621 return 0;
2622
2623 disable_iommu:
2624 disable_dmar_iommu(iommu);
2625 out:
2626 free_dmar_iommu(iommu);
2627 return ret;
2628 }
2629
dmar_iommu_hotplug(struct dmar_drhd_unit * dmaru,bool insert)2630 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
2631 {
2632 int ret = 0;
2633 struct intel_iommu *iommu = dmaru->iommu;
2634
2635 if (!intel_iommu_enabled)
2636 return 0;
2637 if (iommu == NULL)
2638 return -EINVAL;
2639
2640 if (insert) {
2641 ret = intel_iommu_add(dmaru);
2642 } else {
2643 disable_dmar_iommu(iommu);
2644 free_dmar_iommu(iommu);
2645 }
2646
2647 return ret;
2648 }
2649
intel_iommu_free_dmars(void)2650 static void intel_iommu_free_dmars(void)
2651 {
2652 struct dmar_rmrr_unit *rmrru, *rmrr_n;
2653 struct dmar_atsr_unit *atsru, *atsr_n;
2654 struct dmar_satc_unit *satcu, *satc_n;
2655
2656 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
2657 list_del(&rmrru->list);
2658 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
2659 kfree(rmrru);
2660 }
2661
2662 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
2663 list_del(&atsru->list);
2664 intel_iommu_free_atsr(atsru);
2665 }
2666 list_for_each_entry_safe(satcu, satc_n, &dmar_satc_units, list) {
2667 list_del(&satcu->list);
2668 dmar_free_dev_scope(&satcu->devices, &satcu->devices_cnt);
2669 kfree(satcu);
2670 }
2671 }
2672
dmar_find_matched_satc_unit(struct pci_dev * dev)2673 static struct dmar_satc_unit *dmar_find_matched_satc_unit(struct pci_dev *dev)
2674 {
2675 struct dmar_satc_unit *satcu;
2676 struct acpi_dmar_satc *satc;
2677 struct device *tmp;
2678 int i;
2679
2680 rcu_read_lock();
2681
2682 list_for_each_entry_rcu(satcu, &dmar_satc_units, list) {
2683 satc = container_of(satcu->hdr, struct acpi_dmar_satc, header);
2684 if (satc->segment != pci_domain_nr(dev->bus))
2685 continue;
2686 for_each_dev_scope(satcu->devices, satcu->devices_cnt, i, tmp)
2687 if (to_pci_dev(tmp) == dev)
2688 goto out;
2689 }
2690 satcu = NULL;
2691 out:
2692 rcu_read_unlock();
2693 return satcu;
2694 }
2695
dmar_ats_supported(struct pci_dev * dev,struct intel_iommu * iommu)2696 static bool dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu)
2697 {
2698 struct pci_dev *bridge = NULL;
2699 struct dmar_atsr_unit *atsru;
2700 struct dmar_satc_unit *satcu;
2701 struct acpi_dmar_atsr *atsr;
2702 bool supported = true;
2703 struct pci_bus *bus;
2704 struct device *tmp;
2705 int i;
2706
2707 dev = pci_physfn(dev);
2708 satcu = dmar_find_matched_satc_unit(dev);
2709 if (satcu)
2710 /*
2711 * This device supports ATS as it is in SATC table.
2712 * When IOMMU is in legacy mode, enabling ATS is done
2713 * automatically by HW for the device that requires
2714 * ATS, hence OS should not enable this device ATS
2715 * to avoid duplicated TLB invalidation.
2716 */
2717 return !(satcu->atc_required && !sm_supported(iommu));
2718
2719 for (bus = dev->bus; bus; bus = bus->parent) {
2720 bridge = bus->self;
2721 /* If it's an integrated device, allow ATS */
2722 if (!bridge)
2723 return true;
2724 /* Connected via non-PCIe: no ATS */
2725 if (!pci_is_pcie(bridge) ||
2726 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
2727 return false;
2728 /* If we found the root port, look it up in the ATSR */
2729 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
2730 break;
2731 }
2732
2733 rcu_read_lock();
2734 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
2735 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
2736 if (atsr->segment != pci_domain_nr(dev->bus))
2737 continue;
2738
2739 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
2740 if (tmp == &bridge->dev)
2741 goto out;
2742
2743 if (atsru->include_all)
2744 goto out;
2745 }
2746 supported = false;
2747 out:
2748 rcu_read_unlock();
2749
2750 return supported;
2751 }
2752
dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info * info)2753 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
2754 {
2755 int ret;
2756 struct dmar_rmrr_unit *rmrru;
2757 struct dmar_atsr_unit *atsru;
2758 struct dmar_satc_unit *satcu;
2759 struct acpi_dmar_atsr *atsr;
2760 struct acpi_dmar_reserved_memory *rmrr;
2761 struct acpi_dmar_satc *satc;
2762
2763 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
2764 return 0;
2765
2766 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
2767 rmrr = container_of(rmrru->hdr,
2768 struct acpi_dmar_reserved_memory, header);
2769 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
2770 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
2771 ((void *)rmrr) + rmrr->header.length,
2772 rmrr->segment, rmrru->devices,
2773 rmrru->devices_cnt);
2774 if (ret < 0)
2775 return ret;
2776 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
2777 dmar_remove_dev_scope(info, rmrr->segment,
2778 rmrru->devices, rmrru->devices_cnt);
2779 }
2780 }
2781
2782 list_for_each_entry(atsru, &dmar_atsr_units, list) {
2783 if (atsru->include_all)
2784 continue;
2785
2786 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
2787 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
2788 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
2789 (void *)atsr + atsr->header.length,
2790 atsr->segment, atsru->devices,
2791 atsru->devices_cnt);
2792 if (ret > 0)
2793 break;
2794 else if (ret < 0)
2795 return ret;
2796 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
2797 if (dmar_remove_dev_scope(info, atsr->segment,
2798 atsru->devices, atsru->devices_cnt))
2799 break;
2800 }
2801 }
2802 list_for_each_entry(satcu, &dmar_satc_units, list) {
2803 satc = container_of(satcu->hdr, struct acpi_dmar_satc, header);
2804 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
2805 ret = dmar_insert_dev_scope(info, (void *)(satc + 1),
2806 (void *)satc + satc->header.length,
2807 satc->segment, satcu->devices,
2808 satcu->devices_cnt);
2809 if (ret > 0)
2810 break;
2811 else if (ret < 0)
2812 return ret;
2813 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
2814 if (dmar_remove_dev_scope(info, satc->segment,
2815 satcu->devices, satcu->devices_cnt))
2816 break;
2817 }
2818 }
2819
2820 return 0;
2821 }
2822
intel_disable_iommus(void)2823 static void intel_disable_iommus(void)
2824 {
2825 struct intel_iommu *iommu = NULL;
2826 struct dmar_drhd_unit *drhd;
2827
2828 for_each_iommu(iommu, drhd)
2829 iommu_disable_translation(iommu);
2830 }
2831
intel_iommu_shutdown(void)2832 void intel_iommu_shutdown(void)
2833 {
2834 struct dmar_drhd_unit *drhd;
2835 struct intel_iommu *iommu = NULL;
2836
2837 if (no_iommu || dmar_disabled)
2838 return;
2839
2840 /*
2841 * All other CPUs were brought down, hotplug interrupts were disabled,
2842 * no lock and RCU checking needed anymore
2843 */
2844 list_for_each_entry(drhd, &dmar_drhd_units, list) {
2845 iommu = drhd->iommu;
2846
2847 /* Disable PMRs explicitly here. */
2848 iommu_disable_protect_mem_regions(iommu);
2849
2850 /* Make sure the IOMMUs are switched off */
2851 iommu_disable_translation(iommu);
2852 }
2853 }
2854
dev_to_intel_iommu(struct device * dev)2855 static struct intel_iommu *dev_to_intel_iommu(struct device *dev)
2856 {
2857 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
2858
2859 return container_of(iommu_dev, struct intel_iommu, iommu);
2860 }
2861
version_show(struct device * dev,struct device_attribute * attr,char * buf)2862 static ssize_t version_show(struct device *dev,
2863 struct device_attribute *attr, char *buf)
2864 {
2865 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2866 u32 ver = readl(iommu->reg + DMAR_VER_REG);
2867 return sysfs_emit(buf, "%d:%d\n",
2868 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
2869 }
2870 static DEVICE_ATTR_RO(version);
2871
address_show(struct device * dev,struct device_attribute * attr,char * buf)2872 static ssize_t address_show(struct device *dev,
2873 struct device_attribute *attr, char *buf)
2874 {
2875 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2876 return sysfs_emit(buf, "%llx\n", iommu->reg_phys);
2877 }
2878 static DEVICE_ATTR_RO(address);
2879
cap_show(struct device * dev,struct device_attribute * attr,char * buf)2880 static ssize_t cap_show(struct device *dev,
2881 struct device_attribute *attr, char *buf)
2882 {
2883 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2884 return sysfs_emit(buf, "%llx\n", iommu->cap);
2885 }
2886 static DEVICE_ATTR_RO(cap);
2887
ecap_show(struct device * dev,struct device_attribute * attr,char * buf)2888 static ssize_t ecap_show(struct device *dev,
2889 struct device_attribute *attr, char *buf)
2890 {
2891 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2892 return sysfs_emit(buf, "%llx\n", iommu->ecap);
2893 }
2894 static DEVICE_ATTR_RO(ecap);
2895
domains_supported_show(struct device * dev,struct device_attribute * attr,char * buf)2896 static ssize_t domains_supported_show(struct device *dev,
2897 struct device_attribute *attr, char *buf)
2898 {
2899 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2900 return sysfs_emit(buf, "%ld\n", cap_ndoms(iommu->cap));
2901 }
2902 static DEVICE_ATTR_RO(domains_supported);
2903
domains_used_show(struct device * dev,struct device_attribute * attr,char * buf)2904 static ssize_t domains_used_show(struct device *dev,
2905 struct device_attribute *attr, char *buf)
2906 {
2907 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2908 unsigned int count = 0;
2909 int id;
2910
2911 for (id = 0; id < cap_ndoms(iommu->cap); id++)
2912 if (ida_exists(&iommu->domain_ida, id))
2913 count++;
2914
2915 return sysfs_emit(buf, "%d\n", count);
2916 }
2917 static DEVICE_ATTR_RO(domains_used);
2918
2919 static struct attribute *intel_iommu_attrs[] = {
2920 &dev_attr_version.attr,
2921 &dev_attr_address.attr,
2922 &dev_attr_cap.attr,
2923 &dev_attr_ecap.attr,
2924 &dev_attr_domains_supported.attr,
2925 &dev_attr_domains_used.attr,
2926 NULL,
2927 };
2928
2929 static struct attribute_group intel_iommu_group = {
2930 .name = "intel-iommu",
2931 .attrs = intel_iommu_attrs,
2932 };
2933
2934 const struct attribute_group *intel_iommu_groups[] = {
2935 &intel_iommu_group,
2936 NULL,
2937 };
2938
has_external_pci(void)2939 static bool has_external_pci(void)
2940 {
2941 struct pci_dev *pdev = NULL;
2942
2943 for_each_pci_dev(pdev)
2944 if (pdev->external_facing) {
2945 pci_dev_put(pdev);
2946 return true;
2947 }
2948
2949 return false;
2950 }
2951
platform_optin_force_iommu(void)2952 static int __init platform_optin_force_iommu(void)
2953 {
2954 if (!dmar_platform_optin() || no_platform_optin || !has_external_pci())
2955 return 0;
2956
2957 if (no_iommu || dmar_disabled)
2958 pr_info("Intel-IOMMU force enabled due to platform opt in\n");
2959
2960 /*
2961 * If Intel-IOMMU is disabled by default, we will apply identity
2962 * map for all devices except those marked as being untrusted.
2963 */
2964 if (dmar_disabled)
2965 iommu_set_default_passthrough(false);
2966
2967 dmar_disabled = 0;
2968 no_iommu = 0;
2969
2970 return 1;
2971 }
2972
probe_acpi_namespace_devices(void)2973 static int __init probe_acpi_namespace_devices(void)
2974 {
2975 struct dmar_drhd_unit *drhd;
2976 /* To avoid a -Wunused-but-set-variable warning. */
2977 struct intel_iommu *iommu __maybe_unused;
2978 struct device *dev;
2979 int i, ret = 0;
2980
2981 for_each_active_iommu(iommu, drhd) {
2982 for_each_active_dev_scope(drhd->devices,
2983 drhd->devices_cnt, i, dev) {
2984 struct acpi_device_physical_node *pn;
2985 struct acpi_device *adev;
2986
2987 if (dev->bus != &acpi_bus_type)
2988 continue;
2989
2990 up_read(&dmar_global_lock);
2991 adev = to_acpi_device(dev);
2992 mutex_lock(&adev->physical_node_lock);
2993 list_for_each_entry(pn,
2994 &adev->physical_node_list, node) {
2995 ret = iommu_probe_device(pn->dev);
2996 if (ret)
2997 break;
2998 }
2999 mutex_unlock(&adev->physical_node_lock);
3000 down_read(&dmar_global_lock);
3001
3002 if (ret)
3003 return ret;
3004 }
3005 }
3006
3007 return 0;
3008 }
3009
tboot_force_iommu(void)3010 static __init int tboot_force_iommu(void)
3011 {
3012 if (!tboot_enabled())
3013 return 0;
3014
3015 if (no_iommu || dmar_disabled)
3016 pr_warn("Forcing Intel-IOMMU to enabled\n");
3017
3018 dmar_disabled = 0;
3019 no_iommu = 0;
3020
3021 return 1;
3022 }
3023
intel_iommu_init(void)3024 int __init intel_iommu_init(void)
3025 {
3026 int ret = -ENODEV;
3027 struct dmar_drhd_unit *drhd;
3028 struct intel_iommu *iommu;
3029
3030 /*
3031 * Intel IOMMU is required for a TXT/tboot launch or platform
3032 * opt in, so enforce that.
3033 */
3034 force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) ||
3035 platform_optin_force_iommu();
3036
3037 down_write(&dmar_global_lock);
3038 if (dmar_table_init()) {
3039 if (force_on)
3040 panic("tboot: Failed to initialize DMAR table\n");
3041 goto out_free_dmar;
3042 }
3043
3044 if (dmar_dev_scope_init() < 0) {
3045 if (force_on)
3046 panic("tboot: Failed to initialize DMAR device scope\n");
3047 goto out_free_dmar;
3048 }
3049
3050 up_write(&dmar_global_lock);
3051
3052 /*
3053 * The bus notifier takes the dmar_global_lock, so lockdep will
3054 * complain later when we register it under the lock.
3055 */
3056 dmar_register_bus_notifier();
3057
3058 down_write(&dmar_global_lock);
3059
3060 if (!no_iommu)
3061 intel_iommu_debugfs_init();
3062
3063 if (no_iommu || dmar_disabled) {
3064 /*
3065 * We exit the function here to ensure IOMMU's remapping and
3066 * mempool aren't setup, which means that the IOMMU's PMRs
3067 * won't be disabled via the call to init_dmars(). So disable
3068 * it explicitly here. The PMRs were setup by tboot prior to
3069 * calling SENTER, but the kernel is expected to reset/tear
3070 * down the PMRs.
3071 */
3072 if (intel_iommu_tboot_noforce) {
3073 for_each_iommu(iommu, drhd)
3074 iommu_disable_protect_mem_regions(iommu);
3075 }
3076
3077 /*
3078 * Make sure the IOMMUs are switched off, even when we
3079 * boot into a kexec kernel and the previous kernel left
3080 * them enabled
3081 */
3082 intel_disable_iommus();
3083 goto out_free_dmar;
3084 }
3085
3086 if (list_empty(&dmar_rmrr_units))
3087 pr_info("No RMRR found\n");
3088
3089 if (list_empty(&dmar_atsr_units))
3090 pr_info("No ATSR found\n");
3091
3092 if (list_empty(&dmar_satc_units))
3093 pr_info("No SATC found\n");
3094
3095 init_no_remapping_devices();
3096
3097 ret = init_dmars();
3098 if (ret) {
3099 if (force_on)
3100 panic("tboot: Failed to initialize DMARs\n");
3101 pr_err("Initialization failed\n");
3102 goto out_free_dmar;
3103 }
3104 up_write(&dmar_global_lock);
3105
3106 init_iommu_pm_ops();
3107
3108 down_read(&dmar_global_lock);
3109 for_each_active_iommu(iommu, drhd) {
3110 /*
3111 * The flush queue implementation does not perform
3112 * page-selective invalidations that are required for efficient
3113 * TLB flushes in virtual environments. The benefit of batching
3114 * is likely to be much lower than the overhead of synchronizing
3115 * the virtual and physical IOMMU page-tables.
3116 */
3117 if (cap_caching_mode(iommu->cap) &&
3118 !first_level_by_default(iommu)) {
3119 pr_info_once("IOMMU batching disallowed due to virtualization\n");
3120 iommu_set_dma_strict();
3121 }
3122 iommu_device_sysfs_add(&iommu->iommu, NULL,
3123 intel_iommu_groups,
3124 "%s", iommu->name);
3125 /*
3126 * The iommu device probe is protected by the iommu_probe_device_lock.
3127 * Release the dmar_global_lock before entering the device probe path
3128 * to avoid unnecessary lock order splat.
3129 */
3130 up_read(&dmar_global_lock);
3131 iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
3132 down_read(&dmar_global_lock);
3133
3134 iommu_pmu_register(iommu);
3135 }
3136
3137 if (probe_acpi_namespace_devices())
3138 pr_warn("ACPI name space devices didn't probe correctly\n");
3139
3140 /* Finally, we enable the DMA remapping hardware. */
3141 for_each_iommu(iommu, drhd) {
3142 if (!drhd->ignored && !translation_pre_enabled(iommu))
3143 iommu_enable_translation(iommu);
3144
3145 iommu_disable_protect_mem_regions(iommu);
3146 }
3147 up_read(&dmar_global_lock);
3148
3149 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
3150
3151 intel_iommu_enabled = 1;
3152
3153 return 0;
3154
3155 out_free_dmar:
3156 intel_iommu_free_dmars();
3157 up_write(&dmar_global_lock);
3158 return ret;
3159 }
3160
domain_context_clear_one_cb(struct pci_dev * pdev,u16 alias,void * opaque)3161 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
3162 {
3163 struct device_domain_info *info = opaque;
3164
3165 domain_context_clear_one(info, PCI_BUS_NUM(alias), alias & 0xff);
3166 return 0;
3167 }
3168
3169 /*
3170 * NB - intel-iommu lacks any sort of reference counting for the users of
3171 * dependent devices. If multiple endpoints have intersecting dependent
3172 * devices, unbinding the driver from any one of them will possibly leave
3173 * the others unable to operate.
3174 */
domain_context_clear(struct device_domain_info * info)3175 static void domain_context_clear(struct device_domain_info *info)
3176 {
3177 if (!dev_is_pci(info->dev)) {
3178 domain_context_clear_one(info, info->bus, info->devfn);
3179 return;
3180 }
3181
3182 pci_for_each_dma_alias(to_pci_dev(info->dev),
3183 &domain_context_clear_one_cb, info);
3184 iommu_disable_pci_ats(info);
3185 }
3186
3187 /*
3188 * Clear the page table pointer in context or pasid table entries so that
3189 * all DMA requests without PASID from the device are blocked. If the page
3190 * table has been set, clean up the data structures.
3191 */
device_block_translation(struct device * dev)3192 void device_block_translation(struct device *dev)
3193 {
3194 struct device_domain_info *info = dev_iommu_priv_get(dev);
3195 struct intel_iommu *iommu = info->iommu;
3196 unsigned long flags;
3197
3198 /* Device in DMA blocking state. Noting to do. */
3199 if (!info->domain_attached)
3200 return;
3201
3202 if (info->domain)
3203 cache_tag_unassign_domain(info->domain, dev, IOMMU_NO_PASID);
3204
3205 if (!dev_is_real_dma_subdevice(dev)) {
3206 if (sm_supported(iommu))
3207 intel_pasid_tear_down_entry(iommu, dev,
3208 IOMMU_NO_PASID, false);
3209 else
3210 domain_context_clear(info);
3211 }
3212
3213 /* Device now in DMA blocking state. */
3214 info->domain_attached = false;
3215
3216 if (!info->domain)
3217 return;
3218
3219 spin_lock_irqsave(&info->domain->lock, flags);
3220 list_del(&info->link);
3221 spin_unlock_irqrestore(&info->domain->lock, flags);
3222
3223 domain_detach_iommu(info->domain, iommu);
3224 info->domain = NULL;
3225 }
3226
blocking_domain_attach_dev(struct iommu_domain * domain,struct device * dev)3227 static int blocking_domain_attach_dev(struct iommu_domain *domain,
3228 struct device *dev)
3229 {
3230 struct device_domain_info *info = dev_iommu_priv_get(dev);
3231
3232 iopf_for_domain_remove(info->domain ? &info->domain->domain : NULL, dev);
3233 device_block_translation(dev);
3234 return 0;
3235 }
3236
3237 static int blocking_domain_set_dev_pasid(struct iommu_domain *domain,
3238 struct device *dev, ioasid_t pasid,
3239 struct iommu_domain *old);
3240
3241 static struct iommu_domain blocking_domain = {
3242 .type = IOMMU_DOMAIN_BLOCKED,
3243 .ops = &(const struct iommu_domain_ops) {
3244 .attach_dev = blocking_domain_attach_dev,
3245 .set_dev_pasid = blocking_domain_set_dev_pasid,
3246 }
3247 };
3248
iommu_superpage_capability(struct intel_iommu * iommu,bool first_stage)3249 static int iommu_superpage_capability(struct intel_iommu *iommu, bool first_stage)
3250 {
3251 if (!intel_iommu_superpage)
3252 return 0;
3253
3254 if (first_stage)
3255 return cap_fl1gp_support(iommu->cap) ? 2 : 1;
3256
3257 return fls(cap_super_page_val(iommu->cap));
3258 }
3259
paging_domain_alloc(struct device * dev,bool first_stage)3260 static struct dmar_domain *paging_domain_alloc(struct device *dev, bool first_stage)
3261 {
3262 struct device_domain_info *info = dev_iommu_priv_get(dev);
3263 struct intel_iommu *iommu = info->iommu;
3264 struct dmar_domain *domain;
3265 int addr_width;
3266
3267 domain = kzalloc(sizeof(*domain), GFP_KERNEL);
3268 if (!domain)
3269 return ERR_PTR(-ENOMEM);
3270
3271 INIT_LIST_HEAD(&domain->devices);
3272 INIT_LIST_HEAD(&domain->dev_pasids);
3273 INIT_LIST_HEAD(&domain->cache_tags);
3274 spin_lock_init(&domain->lock);
3275 spin_lock_init(&domain->cache_lock);
3276 xa_init(&domain->iommu_array);
3277 INIT_LIST_HEAD(&domain->s1_domains);
3278 spin_lock_init(&domain->s1_lock);
3279
3280 domain->nid = dev_to_node(dev);
3281 domain->use_first_level = first_stage;
3282
3283 domain->domain.type = IOMMU_DOMAIN_UNMANAGED;
3284
3285 /* calculate the address width */
3286 addr_width = agaw_to_width(iommu->agaw);
3287 if (addr_width > cap_mgaw(iommu->cap))
3288 addr_width = cap_mgaw(iommu->cap);
3289 domain->gaw = addr_width;
3290 domain->agaw = iommu->agaw;
3291 domain->max_addr = __DOMAIN_MAX_ADDR(addr_width);
3292
3293 /* iommu memory access coherency */
3294 domain->iommu_coherency = iommu_paging_structure_coherency(iommu);
3295
3296 /* pagesize bitmap */
3297 domain->domain.pgsize_bitmap = SZ_4K;
3298 domain->iommu_superpage = iommu_superpage_capability(iommu, first_stage);
3299 domain->domain.pgsize_bitmap |= domain_super_pgsize_bitmap(domain);
3300
3301 /*
3302 * IOVA aperture: First-level translation restricts the input-address
3303 * to a canonical address (i.e., address bits 63:N have the same value
3304 * as address bit [N-1], where N is 48-bits with 4-level paging and
3305 * 57-bits with 5-level paging). Hence, skip bit [N-1].
3306 */
3307 domain->domain.geometry.force_aperture = true;
3308 domain->domain.geometry.aperture_start = 0;
3309 if (first_stage)
3310 domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1);
3311 else
3312 domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw);
3313
3314 /* always allocate the top pgd */
3315 domain->pgd = iommu_alloc_pages_node_sz(domain->nid, GFP_KERNEL, SZ_4K);
3316 if (!domain->pgd) {
3317 kfree(domain);
3318 return ERR_PTR(-ENOMEM);
3319 }
3320 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3321
3322 return domain;
3323 }
3324
3325 static struct iommu_domain *
intel_iommu_domain_alloc_first_stage(struct device * dev,struct intel_iommu * iommu,u32 flags)3326 intel_iommu_domain_alloc_first_stage(struct device *dev,
3327 struct intel_iommu *iommu, u32 flags)
3328 {
3329 struct dmar_domain *dmar_domain;
3330
3331 if (flags & ~IOMMU_HWPT_ALLOC_PASID)
3332 return ERR_PTR(-EOPNOTSUPP);
3333
3334 /* Only SL is available in legacy mode */
3335 if (!sm_supported(iommu) || !ecap_flts(iommu->ecap))
3336 return ERR_PTR(-EOPNOTSUPP);
3337
3338 dmar_domain = paging_domain_alloc(dev, true);
3339 if (IS_ERR(dmar_domain))
3340 return ERR_CAST(dmar_domain);
3341
3342 dmar_domain->domain.ops = &intel_fs_paging_domain_ops;
3343 /*
3344 * iotlb sync for map is only needed for legacy implementations that
3345 * explicitly require flushing internal write buffers to ensure memory
3346 * coherence.
3347 */
3348 if (rwbf_required(iommu))
3349 dmar_domain->iotlb_sync_map = true;
3350
3351 return &dmar_domain->domain;
3352 }
3353
3354 static struct iommu_domain *
intel_iommu_domain_alloc_second_stage(struct device * dev,struct intel_iommu * iommu,u32 flags)3355 intel_iommu_domain_alloc_second_stage(struct device *dev,
3356 struct intel_iommu *iommu, u32 flags)
3357 {
3358 struct dmar_domain *dmar_domain;
3359
3360 if (flags &
3361 (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
3362 IOMMU_HWPT_ALLOC_PASID)))
3363 return ERR_PTR(-EOPNOTSUPP);
3364
3365 if (((flags & IOMMU_HWPT_ALLOC_NEST_PARENT) &&
3366 !nested_supported(iommu)) ||
3367 ((flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) &&
3368 !ssads_supported(iommu)))
3369 return ERR_PTR(-EOPNOTSUPP);
3370
3371 /* Legacy mode always supports second stage */
3372 if (sm_supported(iommu) && !ecap_slts(iommu->ecap))
3373 return ERR_PTR(-EOPNOTSUPP);
3374
3375 dmar_domain = paging_domain_alloc(dev, false);
3376 if (IS_ERR(dmar_domain))
3377 return ERR_CAST(dmar_domain);
3378
3379 dmar_domain->domain.ops = &intel_ss_paging_domain_ops;
3380 dmar_domain->nested_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT;
3381
3382 if (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING)
3383 dmar_domain->domain.dirty_ops = &intel_dirty_ops;
3384
3385 /*
3386 * Besides the internal write buffer flush, the caching mode used for
3387 * legacy nested translation (which utilizes shadowing page tables)
3388 * also requires iotlb sync on map.
3389 */
3390 if (rwbf_required(iommu) || cap_caching_mode(iommu->cap))
3391 dmar_domain->iotlb_sync_map = true;
3392
3393 return &dmar_domain->domain;
3394 }
3395
3396 static struct iommu_domain *
intel_iommu_domain_alloc_paging_flags(struct device * dev,u32 flags,const struct iommu_user_data * user_data)3397 intel_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags,
3398 const struct iommu_user_data *user_data)
3399 {
3400 struct device_domain_info *info = dev_iommu_priv_get(dev);
3401 struct intel_iommu *iommu = info->iommu;
3402 struct iommu_domain *domain;
3403
3404 if (user_data)
3405 return ERR_PTR(-EOPNOTSUPP);
3406
3407 /* Prefer first stage if possible by default. */
3408 domain = intel_iommu_domain_alloc_first_stage(dev, iommu, flags);
3409 if (domain != ERR_PTR(-EOPNOTSUPP))
3410 return domain;
3411 return intel_iommu_domain_alloc_second_stage(dev, iommu, flags);
3412 }
3413
intel_iommu_domain_free(struct iommu_domain * domain)3414 static void intel_iommu_domain_free(struct iommu_domain *domain)
3415 {
3416 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3417
3418 if (WARN_ON(dmar_domain->nested_parent &&
3419 !list_empty(&dmar_domain->s1_domains)))
3420 return;
3421
3422 if (WARN_ON(!list_empty(&dmar_domain->devices)))
3423 return;
3424
3425 if (dmar_domain->pgd) {
3426 struct iommu_pages_list freelist =
3427 IOMMU_PAGES_LIST_INIT(freelist);
3428
3429 domain_unmap(dmar_domain, 0, DOMAIN_MAX_PFN(dmar_domain->gaw),
3430 &freelist);
3431 iommu_put_pages_list(&freelist);
3432 }
3433
3434 kfree(dmar_domain->qi_batch);
3435 kfree(dmar_domain);
3436 }
3437
paging_domain_compatible_first_stage(struct dmar_domain * dmar_domain,struct intel_iommu * iommu)3438 static int paging_domain_compatible_first_stage(struct dmar_domain *dmar_domain,
3439 struct intel_iommu *iommu)
3440 {
3441 if (WARN_ON(dmar_domain->domain.dirty_ops ||
3442 dmar_domain->nested_parent))
3443 return -EINVAL;
3444
3445 /* Only SL is available in legacy mode */
3446 if (!sm_supported(iommu) || !ecap_flts(iommu->ecap))
3447 return -EINVAL;
3448
3449 /* Same page size support */
3450 if (!cap_fl1gp_support(iommu->cap) &&
3451 (dmar_domain->domain.pgsize_bitmap & SZ_1G))
3452 return -EINVAL;
3453
3454 /* iotlb sync on map requirement */
3455 if ((rwbf_required(iommu)) && !dmar_domain->iotlb_sync_map)
3456 return -EINVAL;
3457
3458 return 0;
3459 }
3460
3461 static int
paging_domain_compatible_second_stage(struct dmar_domain * dmar_domain,struct intel_iommu * iommu)3462 paging_domain_compatible_second_stage(struct dmar_domain *dmar_domain,
3463 struct intel_iommu *iommu)
3464 {
3465 unsigned int sslps = cap_super_page_val(iommu->cap);
3466
3467 if (dmar_domain->domain.dirty_ops && !ssads_supported(iommu))
3468 return -EINVAL;
3469 if (dmar_domain->nested_parent && !nested_supported(iommu))
3470 return -EINVAL;
3471
3472 /* Legacy mode always supports second stage */
3473 if (sm_supported(iommu) && !ecap_slts(iommu->ecap))
3474 return -EINVAL;
3475
3476 /* Same page size support */
3477 if (!(sslps & BIT(0)) && (dmar_domain->domain.pgsize_bitmap & SZ_2M))
3478 return -EINVAL;
3479 if (!(sslps & BIT(1)) && (dmar_domain->domain.pgsize_bitmap & SZ_1G))
3480 return -EINVAL;
3481
3482 /* iotlb sync on map requirement */
3483 if ((rwbf_required(iommu) || cap_caching_mode(iommu->cap)) &&
3484 !dmar_domain->iotlb_sync_map)
3485 return -EINVAL;
3486
3487 return 0;
3488 }
3489
paging_domain_compatible(struct iommu_domain * domain,struct device * dev)3490 int paging_domain_compatible(struct iommu_domain *domain, struct device *dev)
3491 {
3492 struct device_domain_info *info = dev_iommu_priv_get(dev);
3493 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3494 struct intel_iommu *iommu = info->iommu;
3495 int ret = -EINVAL;
3496 int addr_width;
3497
3498 if (intel_domain_is_fs_paging(dmar_domain))
3499 ret = paging_domain_compatible_first_stage(dmar_domain, iommu);
3500 else if (intel_domain_is_ss_paging(dmar_domain))
3501 ret = paging_domain_compatible_second_stage(dmar_domain, iommu);
3502 else if (WARN_ON(true))
3503 ret = -EINVAL;
3504 if (ret)
3505 return ret;
3506
3507 /*
3508 * FIXME this is locked wrong, it needs to be under the
3509 * dmar_domain->lock
3510 */
3511 if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap))
3512 return -EINVAL;
3513
3514 if (dmar_domain->iommu_coherency !=
3515 iommu_paging_structure_coherency(iommu))
3516 return -EINVAL;
3517
3518
3519 /* check if this iommu agaw is sufficient for max mapped address */
3520 addr_width = agaw_to_width(iommu->agaw);
3521 if (addr_width > cap_mgaw(iommu->cap))
3522 addr_width = cap_mgaw(iommu->cap);
3523
3524 if (dmar_domain->gaw > addr_width || dmar_domain->agaw > iommu->agaw)
3525 return -EINVAL;
3526
3527 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) &&
3528 context_copied(iommu, info->bus, info->devfn))
3529 return intel_pasid_setup_sm_context(dev);
3530
3531 return 0;
3532 }
3533
intel_iommu_attach_device(struct iommu_domain * domain,struct device * dev)3534 static int intel_iommu_attach_device(struct iommu_domain *domain,
3535 struct device *dev)
3536 {
3537 int ret;
3538
3539 device_block_translation(dev);
3540
3541 ret = paging_domain_compatible(domain, dev);
3542 if (ret)
3543 return ret;
3544
3545 ret = iopf_for_domain_set(domain, dev);
3546 if (ret)
3547 return ret;
3548
3549 ret = dmar_domain_attach_device(to_dmar_domain(domain), dev);
3550 if (ret)
3551 iopf_for_domain_remove(domain, dev);
3552
3553 return ret;
3554 }
3555
intel_iommu_map(struct iommu_domain * domain,unsigned long iova,phys_addr_t hpa,size_t size,int iommu_prot,gfp_t gfp)3556 static int intel_iommu_map(struct iommu_domain *domain,
3557 unsigned long iova, phys_addr_t hpa,
3558 size_t size, int iommu_prot, gfp_t gfp)
3559 {
3560 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3561 u64 max_addr;
3562 int prot = 0;
3563
3564 if (iommu_prot & IOMMU_READ)
3565 prot |= DMA_PTE_READ;
3566 if (iommu_prot & IOMMU_WRITE)
3567 prot |= DMA_PTE_WRITE;
3568 if (dmar_domain->set_pte_snp)
3569 prot |= DMA_PTE_SNP;
3570
3571 max_addr = iova + size;
3572 if (dmar_domain->max_addr < max_addr) {
3573 u64 end;
3574
3575 /* check if minimum agaw is sufficient for mapped address */
3576 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
3577 if (end < max_addr) {
3578 pr_err("%s: iommu width (%d) is not "
3579 "sufficient for the mapped address (%llx)\n",
3580 __func__, dmar_domain->gaw, max_addr);
3581 return -EFAULT;
3582 }
3583 dmar_domain->max_addr = max_addr;
3584 }
3585 /* Round up size to next multiple of PAGE_SIZE, if it and
3586 the low bits of hpa would take us onto the next page */
3587 size = aligned_nrpages(hpa, size);
3588 return __domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
3589 hpa >> VTD_PAGE_SHIFT, size, prot, gfp);
3590 }
3591
intel_iommu_map_pages(struct iommu_domain * domain,unsigned long iova,phys_addr_t paddr,size_t pgsize,size_t pgcount,int prot,gfp_t gfp,size_t * mapped)3592 static int intel_iommu_map_pages(struct iommu_domain *domain,
3593 unsigned long iova, phys_addr_t paddr,
3594 size_t pgsize, size_t pgcount,
3595 int prot, gfp_t gfp, size_t *mapped)
3596 {
3597 unsigned long pgshift = __ffs(pgsize);
3598 size_t size = pgcount << pgshift;
3599 int ret;
3600
3601 if (pgsize != SZ_4K && pgsize != SZ_2M && pgsize != SZ_1G)
3602 return -EINVAL;
3603
3604 if (!IS_ALIGNED(iova | paddr, pgsize))
3605 return -EINVAL;
3606
3607 ret = intel_iommu_map(domain, iova, paddr, size, prot, gfp);
3608 if (!ret && mapped)
3609 *mapped = size;
3610
3611 return ret;
3612 }
3613
intel_iommu_unmap(struct iommu_domain * domain,unsigned long iova,size_t size,struct iommu_iotlb_gather * gather)3614 static size_t intel_iommu_unmap(struct iommu_domain *domain,
3615 unsigned long iova, size_t size,
3616 struct iommu_iotlb_gather *gather)
3617 {
3618 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3619 unsigned long start_pfn, last_pfn;
3620 int level = 0;
3621
3622 /* Cope with horrid API which requires us to unmap more than the
3623 size argument if it happens to be a large-page mapping. */
3624 if (unlikely(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT,
3625 &level, GFP_ATOMIC)))
3626 return 0;
3627
3628 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
3629 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
3630
3631 start_pfn = iova >> VTD_PAGE_SHIFT;
3632 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
3633
3634 domain_unmap(dmar_domain, start_pfn, last_pfn, &gather->freelist);
3635
3636 if (dmar_domain->max_addr == iova + size)
3637 dmar_domain->max_addr = iova;
3638
3639 /*
3640 * We do not use page-selective IOTLB invalidation in flush queue,
3641 * so there is no need to track page and sync iotlb.
3642 */
3643 if (!iommu_iotlb_gather_queued(gather))
3644 iommu_iotlb_gather_add_page(domain, gather, iova, size);
3645
3646 return size;
3647 }
3648
intel_iommu_unmap_pages(struct iommu_domain * domain,unsigned long iova,size_t pgsize,size_t pgcount,struct iommu_iotlb_gather * gather)3649 static size_t intel_iommu_unmap_pages(struct iommu_domain *domain,
3650 unsigned long iova,
3651 size_t pgsize, size_t pgcount,
3652 struct iommu_iotlb_gather *gather)
3653 {
3654 unsigned long pgshift = __ffs(pgsize);
3655 size_t size = pgcount << pgshift;
3656
3657 return intel_iommu_unmap(domain, iova, size, gather);
3658 }
3659
intel_iommu_tlb_sync(struct iommu_domain * domain,struct iommu_iotlb_gather * gather)3660 static void intel_iommu_tlb_sync(struct iommu_domain *domain,
3661 struct iommu_iotlb_gather *gather)
3662 {
3663 cache_tag_flush_range(to_dmar_domain(domain), gather->start,
3664 gather->end,
3665 iommu_pages_list_empty(&gather->freelist));
3666 iommu_put_pages_list(&gather->freelist);
3667 }
3668
intel_iommu_iova_to_phys(struct iommu_domain * domain,dma_addr_t iova)3669 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3670 dma_addr_t iova)
3671 {
3672 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3673 struct dma_pte *pte;
3674 int level = 0;
3675 u64 phys = 0;
3676
3677 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level,
3678 GFP_ATOMIC);
3679 if (pte && dma_pte_present(pte))
3680 phys = dma_pte_addr(pte) +
3681 (iova & (BIT_MASK(level_to_offset_bits(level) +
3682 VTD_PAGE_SHIFT) - 1));
3683
3684 return phys;
3685 }
3686
domain_support_force_snooping(struct dmar_domain * domain)3687 static bool domain_support_force_snooping(struct dmar_domain *domain)
3688 {
3689 struct device_domain_info *info;
3690 bool support = true;
3691
3692 assert_spin_locked(&domain->lock);
3693 list_for_each_entry(info, &domain->devices, link) {
3694 if (!ecap_sc_support(info->iommu->ecap)) {
3695 support = false;
3696 break;
3697 }
3698 }
3699
3700 return support;
3701 }
3702
intel_iommu_enforce_cache_coherency_fs(struct iommu_domain * domain)3703 static bool intel_iommu_enforce_cache_coherency_fs(struct iommu_domain *domain)
3704 {
3705 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3706 struct device_domain_info *info;
3707
3708 guard(spinlock_irqsave)(&dmar_domain->lock);
3709
3710 if (dmar_domain->force_snooping)
3711 return true;
3712
3713 if (!domain_support_force_snooping(dmar_domain))
3714 return false;
3715
3716 dmar_domain->force_snooping = true;
3717 list_for_each_entry(info, &dmar_domain->devices, link)
3718 intel_pasid_setup_page_snoop_control(info->iommu, info->dev,
3719 IOMMU_NO_PASID);
3720 return true;
3721 }
3722
intel_iommu_enforce_cache_coherency_ss(struct iommu_domain * domain)3723 static bool intel_iommu_enforce_cache_coherency_ss(struct iommu_domain *domain)
3724 {
3725 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3726
3727 guard(spinlock_irqsave)(&dmar_domain->lock);
3728 if (!domain_support_force_snooping(dmar_domain) ||
3729 dmar_domain->has_mappings)
3730 return false;
3731
3732 /*
3733 * Second level page table supports per-PTE snoop control. The
3734 * iommu_map() interface will handle this by setting SNP bit.
3735 */
3736 dmar_domain->set_pte_snp = true;
3737 dmar_domain->force_snooping = true;
3738 return true;
3739 }
3740
intel_iommu_capable(struct device * dev,enum iommu_cap cap)3741 static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap)
3742 {
3743 struct device_domain_info *info = dev_iommu_priv_get(dev);
3744
3745 switch (cap) {
3746 case IOMMU_CAP_CACHE_COHERENCY:
3747 case IOMMU_CAP_DEFERRED_FLUSH:
3748 return true;
3749 case IOMMU_CAP_PRE_BOOT_PROTECTION:
3750 return dmar_platform_optin();
3751 case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
3752 return ecap_sc_support(info->iommu->ecap);
3753 case IOMMU_CAP_DIRTY_TRACKING:
3754 return ssads_supported(info->iommu);
3755 default:
3756 return false;
3757 }
3758 }
3759
intel_iommu_probe_device(struct device * dev)3760 static struct iommu_device *intel_iommu_probe_device(struct device *dev)
3761 {
3762 struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL;
3763 struct device_domain_info *info;
3764 struct intel_iommu *iommu;
3765 u8 bus, devfn;
3766 int ret;
3767
3768 iommu = device_lookup_iommu(dev, &bus, &devfn);
3769 if (!iommu || !iommu->iommu.ops)
3770 return ERR_PTR(-ENODEV);
3771
3772 info = kzalloc(sizeof(*info), GFP_KERNEL);
3773 if (!info)
3774 return ERR_PTR(-ENOMEM);
3775
3776 if (dev_is_real_dma_subdevice(dev)) {
3777 info->bus = pdev->bus->number;
3778 info->devfn = pdev->devfn;
3779 info->segment = pci_domain_nr(pdev->bus);
3780 } else {
3781 info->bus = bus;
3782 info->devfn = devfn;
3783 info->segment = iommu->segment;
3784 }
3785
3786 info->dev = dev;
3787 info->iommu = iommu;
3788 if (dev_is_pci(dev)) {
3789 if (ecap_dev_iotlb_support(iommu->ecap) &&
3790 pci_ats_supported(pdev) &&
3791 dmar_ats_supported(pdev, iommu)) {
3792 info->ats_supported = 1;
3793 info->dtlb_extra_inval = dev_needs_extra_dtlb_flush(pdev);
3794
3795 /*
3796 * For IOMMU that supports device IOTLB throttling
3797 * (DIT), we assign PFSID to the invalidation desc
3798 * of a VF such that IOMMU HW can gauge queue depth
3799 * at PF level. If DIT is not set, PFSID will be
3800 * treated as reserved, which should be set to 0.
3801 */
3802 if (ecap_dit(iommu->ecap))
3803 info->pfsid = pci_dev_id(pci_physfn(pdev));
3804 info->ats_qdep = pci_ats_queue_depth(pdev);
3805 }
3806 if (sm_supported(iommu)) {
3807 if (pasid_supported(iommu)) {
3808 int features = pci_pasid_features(pdev);
3809
3810 if (features >= 0)
3811 info->pasid_supported = features | 1;
3812 }
3813
3814 if (info->ats_supported && ecap_prs(iommu->ecap) &&
3815 pci_pri_supported(pdev))
3816 info->pri_supported = 1;
3817 }
3818 }
3819
3820 dev_iommu_priv_set(dev, info);
3821 if (pdev && pci_ats_supported(pdev)) {
3822 pci_prepare_ats(pdev, VTD_PAGE_SHIFT);
3823 ret = device_rbtree_insert(iommu, info);
3824 if (ret)
3825 goto free;
3826 }
3827
3828 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
3829 ret = intel_pasid_alloc_table(dev);
3830 if (ret) {
3831 dev_err(dev, "PASID table allocation failed\n");
3832 goto clear_rbtree;
3833 }
3834
3835 if (!context_copied(iommu, info->bus, info->devfn)) {
3836 ret = intel_pasid_setup_sm_context(dev);
3837 if (ret)
3838 goto free_table;
3839 }
3840 }
3841
3842 intel_iommu_debugfs_create_dev(info);
3843
3844 return &iommu->iommu;
3845 free_table:
3846 intel_pasid_free_table(dev);
3847 clear_rbtree:
3848 device_rbtree_remove(info);
3849 free:
3850 kfree(info);
3851
3852 return ERR_PTR(ret);
3853 }
3854
intel_iommu_probe_finalize(struct device * dev)3855 static void intel_iommu_probe_finalize(struct device *dev)
3856 {
3857 struct device_domain_info *info = dev_iommu_priv_get(dev);
3858 struct intel_iommu *iommu = info->iommu;
3859
3860 /*
3861 * The PCIe spec, in its wisdom, declares that the behaviour of the
3862 * device is undefined if you enable PASID support after ATS support.
3863 * So always enable PASID support on devices which have it, even if
3864 * we can't yet know if we're ever going to use it.
3865 */
3866 if (info->pasid_supported &&
3867 !pci_enable_pasid(to_pci_dev(dev), info->pasid_supported & ~1))
3868 info->pasid_enabled = 1;
3869
3870 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
3871 iommu_enable_pci_ats(info);
3872 /* Assign a DEVTLB cache tag to the default domain. */
3873 if (info->ats_enabled && info->domain) {
3874 u16 did = domain_id_iommu(info->domain, iommu);
3875
3876 if (cache_tag_assign(info->domain, did, dev,
3877 IOMMU_NO_PASID, CACHE_TAG_DEVTLB))
3878 iommu_disable_pci_ats(info);
3879 }
3880 }
3881 iommu_enable_pci_pri(info);
3882 }
3883
intel_iommu_release_device(struct device * dev)3884 static void intel_iommu_release_device(struct device *dev)
3885 {
3886 struct device_domain_info *info = dev_iommu_priv_get(dev);
3887 struct intel_iommu *iommu = info->iommu;
3888
3889 iommu_disable_pci_pri(info);
3890 iommu_disable_pci_ats(info);
3891
3892 if (info->pasid_enabled) {
3893 pci_disable_pasid(to_pci_dev(dev));
3894 info->pasid_enabled = 0;
3895 }
3896
3897 mutex_lock(&iommu->iopf_lock);
3898 if (dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev)))
3899 device_rbtree_remove(info);
3900 mutex_unlock(&iommu->iopf_lock);
3901
3902 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) &&
3903 !context_copied(iommu, info->bus, info->devfn))
3904 intel_pasid_teardown_sm_context(dev);
3905
3906 intel_pasid_free_table(dev);
3907 intel_iommu_debugfs_remove_dev(info);
3908 kfree(info);
3909 }
3910
intel_iommu_get_resv_regions(struct device * device,struct list_head * head)3911 static void intel_iommu_get_resv_regions(struct device *device,
3912 struct list_head *head)
3913 {
3914 int prot = DMA_PTE_READ | DMA_PTE_WRITE;
3915 struct iommu_resv_region *reg;
3916 struct dmar_rmrr_unit *rmrr;
3917 struct device *i_dev;
3918 int i;
3919
3920 rcu_read_lock();
3921 for_each_rmrr_units(rmrr) {
3922 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3923 i, i_dev) {
3924 struct iommu_resv_region *resv;
3925 enum iommu_resv_type type;
3926 size_t length;
3927
3928 if (i_dev != device &&
3929 !is_downstream_to_pci_bridge(device, i_dev))
3930 continue;
3931
3932 length = rmrr->end_address - rmrr->base_address + 1;
3933
3934 type = device_rmrr_is_relaxable(device) ?
3935 IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT;
3936
3937 resv = iommu_alloc_resv_region(rmrr->base_address,
3938 length, prot, type,
3939 GFP_ATOMIC);
3940 if (!resv)
3941 break;
3942
3943 list_add_tail(&resv->list, head);
3944 }
3945 }
3946 rcu_read_unlock();
3947
3948 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
3949 if (dev_is_pci(device)) {
3950 struct pci_dev *pdev = to_pci_dev(device);
3951
3952 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
3953 reg = iommu_alloc_resv_region(0, 1UL << 24, prot,
3954 IOMMU_RESV_DIRECT_RELAXABLE,
3955 GFP_KERNEL);
3956 if (reg)
3957 list_add_tail(®->list, head);
3958 }
3959 }
3960 #endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
3961
3962 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
3963 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
3964 0, IOMMU_RESV_MSI, GFP_KERNEL);
3965 if (!reg)
3966 return;
3967 list_add_tail(®->list, head);
3968 }
3969
intel_iommu_device_group(struct device * dev)3970 static struct iommu_group *intel_iommu_device_group(struct device *dev)
3971 {
3972 if (dev_is_pci(dev))
3973 return pci_device_group(dev);
3974 return generic_device_group(dev);
3975 }
3976
intel_iommu_enable_iopf(struct device * dev)3977 int intel_iommu_enable_iopf(struct device *dev)
3978 {
3979 struct device_domain_info *info = dev_iommu_priv_get(dev);
3980 struct intel_iommu *iommu = info->iommu;
3981 int ret;
3982
3983 if (!info->pri_enabled)
3984 return -ENODEV;
3985
3986 /* pri_enabled is protected by the group mutex. */
3987 iommu_group_mutex_assert(dev);
3988 if (info->iopf_refcount) {
3989 info->iopf_refcount++;
3990 return 0;
3991 }
3992
3993 ret = iopf_queue_add_device(iommu->iopf_queue, dev);
3994 if (ret)
3995 return ret;
3996
3997 info->iopf_refcount = 1;
3998
3999 return 0;
4000 }
4001
intel_iommu_disable_iopf(struct device * dev)4002 void intel_iommu_disable_iopf(struct device *dev)
4003 {
4004 struct device_domain_info *info = dev_iommu_priv_get(dev);
4005 struct intel_iommu *iommu = info->iommu;
4006
4007 if (WARN_ON(!info->pri_enabled || !info->iopf_refcount))
4008 return;
4009
4010 iommu_group_mutex_assert(dev);
4011 if (--info->iopf_refcount)
4012 return;
4013
4014 iopf_queue_remove_device(iommu->iopf_queue, dev);
4015 }
4016
intel_iommu_is_attach_deferred(struct device * dev)4017 static bool intel_iommu_is_attach_deferred(struct device *dev)
4018 {
4019 struct device_domain_info *info = dev_iommu_priv_get(dev);
4020
4021 return translation_pre_enabled(info->iommu) && !info->domain;
4022 }
4023
4024 /*
4025 * Check that the device does not live on an external facing PCI port that is
4026 * marked as untrusted. Such devices should not be able to apply quirks and
4027 * thus not be able to bypass the IOMMU restrictions.
4028 */
risky_device(struct pci_dev * pdev)4029 static bool risky_device(struct pci_dev *pdev)
4030 {
4031 if (pdev->untrusted) {
4032 pci_info(pdev,
4033 "Skipping IOMMU quirk for dev [%04X:%04X] on untrusted PCI link\n",
4034 pdev->vendor, pdev->device);
4035 pci_info(pdev, "Please check with your BIOS/Platform vendor about this\n");
4036 return true;
4037 }
4038 return false;
4039 }
4040
intel_iommu_iotlb_sync_map(struct iommu_domain * domain,unsigned long iova,size_t size)4041 static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
4042 unsigned long iova, size_t size)
4043 {
4044 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4045
4046 if (dmar_domain->iotlb_sync_map)
4047 cache_tag_flush_range_np(dmar_domain, iova, iova + size - 1);
4048
4049 return 0;
4050 }
4051
domain_remove_dev_pasid(struct iommu_domain * domain,struct device * dev,ioasid_t pasid)4052 void domain_remove_dev_pasid(struct iommu_domain *domain,
4053 struct device *dev, ioasid_t pasid)
4054 {
4055 struct device_domain_info *info = dev_iommu_priv_get(dev);
4056 struct dev_pasid_info *curr, *dev_pasid = NULL;
4057 struct intel_iommu *iommu = info->iommu;
4058 struct dmar_domain *dmar_domain;
4059 unsigned long flags;
4060
4061 if (!domain)
4062 return;
4063
4064 /* Identity domain has no meta data for pasid. */
4065 if (domain->type == IOMMU_DOMAIN_IDENTITY)
4066 return;
4067
4068 dmar_domain = to_dmar_domain(domain);
4069 spin_lock_irqsave(&dmar_domain->lock, flags);
4070 list_for_each_entry(curr, &dmar_domain->dev_pasids, link_domain) {
4071 if (curr->dev == dev && curr->pasid == pasid) {
4072 list_del(&curr->link_domain);
4073 dev_pasid = curr;
4074 break;
4075 }
4076 }
4077 spin_unlock_irqrestore(&dmar_domain->lock, flags);
4078
4079 cache_tag_unassign_domain(dmar_domain, dev, pasid);
4080 domain_detach_iommu(dmar_domain, iommu);
4081 if (!WARN_ON_ONCE(!dev_pasid)) {
4082 intel_iommu_debugfs_remove_dev_pasid(dev_pasid);
4083 kfree(dev_pasid);
4084 }
4085 }
4086
blocking_domain_set_dev_pasid(struct iommu_domain * domain,struct device * dev,ioasid_t pasid,struct iommu_domain * old)4087 static int blocking_domain_set_dev_pasid(struct iommu_domain *domain,
4088 struct device *dev, ioasid_t pasid,
4089 struct iommu_domain *old)
4090 {
4091 struct device_domain_info *info = dev_iommu_priv_get(dev);
4092
4093 intel_pasid_tear_down_entry(info->iommu, dev, pasid, false);
4094 iopf_for_domain_remove(old, dev);
4095 domain_remove_dev_pasid(old, dev, pasid);
4096
4097 return 0;
4098 }
4099
4100 struct dev_pasid_info *
domain_add_dev_pasid(struct iommu_domain * domain,struct device * dev,ioasid_t pasid)4101 domain_add_dev_pasid(struct iommu_domain *domain,
4102 struct device *dev, ioasid_t pasid)
4103 {
4104 struct device_domain_info *info = dev_iommu_priv_get(dev);
4105 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4106 struct intel_iommu *iommu = info->iommu;
4107 struct dev_pasid_info *dev_pasid;
4108 unsigned long flags;
4109 int ret;
4110
4111 dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL);
4112 if (!dev_pasid)
4113 return ERR_PTR(-ENOMEM);
4114
4115 ret = domain_attach_iommu(dmar_domain, iommu);
4116 if (ret)
4117 goto out_free;
4118
4119 ret = cache_tag_assign_domain(dmar_domain, dev, pasid);
4120 if (ret)
4121 goto out_detach_iommu;
4122
4123 dev_pasid->dev = dev;
4124 dev_pasid->pasid = pasid;
4125 spin_lock_irqsave(&dmar_domain->lock, flags);
4126 list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids);
4127 spin_unlock_irqrestore(&dmar_domain->lock, flags);
4128
4129 return dev_pasid;
4130 out_detach_iommu:
4131 domain_detach_iommu(dmar_domain, iommu);
4132 out_free:
4133 kfree(dev_pasid);
4134 return ERR_PTR(ret);
4135 }
4136
intel_iommu_set_dev_pasid(struct iommu_domain * domain,struct device * dev,ioasid_t pasid,struct iommu_domain * old)4137 static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
4138 struct device *dev, ioasid_t pasid,
4139 struct iommu_domain *old)
4140 {
4141 struct device_domain_info *info = dev_iommu_priv_get(dev);
4142 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4143 struct intel_iommu *iommu = info->iommu;
4144 struct dev_pasid_info *dev_pasid;
4145 int ret;
4146
4147 if (WARN_ON_ONCE(!(domain->type & __IOMMU_DOMAIN_PAGING)))
4148 return -EINVAL;
4149
4150 if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
4151 return -EOPNOTSUPP;
4152
4153 if (domain->dirty_ops)
4154 return -EINVAL;
4155
4156 if (context_copied(iommu, info->bus, info->devfn))
4157 return -EBUSY;
4158
4159 ret = paging_domain_compatible(domain, dev);
4160 if (ret)
4161 return ret;
4162
4163 dev_pasid = domain_add_dev_pasid(domain, dev, pasid);
4164 if (IS_ERR(dev_pasid))
4165 return PTR_ERR(dev_pasid);
4166
4167 ret = iopf_for_domain_replace(domain, old, dev);
4168 if (ret)
4169 goto out_remove_dev_pasid;
4170
4171 if (intel_domain_is_fs_paging(dmar_domain))
4172 ret = domain_setup_first_level(iommu, dmar_domain,
4173 dev, pasid, old);
4174 else if (intel_domain_is_ss_paging(dmar_domain))
4175 ret = domain_setup_second_level(iommu, dmar_domain,
4176 dev, pasid, old);
4177 else if (WARN_ON(true))
4178 ret = -EINVAL;
4179
4180 if (ret)
4181 goto out_unwind_iopf;
4182
4183 domain_remove_dev_pasid(old, dev, pasid);
4184
4185 intel_iommu_debugfs_create_dev_pasid(dev_pasid);
4186
4187 return 0;
4188
4189 out_unwind_iopf:
4190 iopf_for_domain_replace(old, domain, dev);
4191 out_remove_dev_pasid:
4192 domain_remove_dev_pasid(domain, dev, pasid);
4193 return ret;
4194 }
4195
intel_iommu_hw_info(struct device * dev,u32 * length,u32 * type)4196 static void *intel_iommu_hw_info(struct device *dev, u32 *length, u32 *type)
4197 {
4198 struct device_domain_info *info = dev_iommu_priv_get(dev);
4199 struct intel_iommu *iommu = info->iommu;
4200 struct iommu_hw_info_vtd *vtd;
4201
4202 vtd = kzalloc(sizeof(*vtd), GFP_KERNEL);
4203 if (!vtd)
4204 return ERR_PTR(-ENOMEM);
4205
4206 vtd->flags = IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17;
4207 vtd->cap_reg = iommu->cap;
4208 vtd->ecap_reg = iommu->ecap;
4209 *length = sizeof(*vtd);
4210 *type = IOMMU_HW_INFO_TYPE_INTEL_VTD;
4211 return vtd;
4212 }
4213
4214 /*
4215 * Set dirty tracking for the device list of a domain. The caller must
4216 * hold the domain->lock when calling it.
4217 */
device_set_dirty_tracking(struct list_head * devices,bool enable)4218 static int device_set_dirty_tracking(struct list_head *devices, bool enable)
4219 {
4220 struct device_domain_info *info;
4221 int ret = 0;
4222
4223 list_for_each_entry(info, devices, link) {
4224 ret = intel_pasid_setup_dirty_tracking(info->iommu, info->dev,
4225 IOMMU_NO_PASID, enable);
4226 if (ret)
4227 break;
4228 }
4229
4230 return ret;
4231 }
4232
parent_domain_set_dirty_tracking(struct dmar_domain * domain,bool enable)4233 static int parent_domain_set_dirty_tracking(struct dmar_domain *domain,
4234 bool enable)
4235 {
4236 struct dmar_domain *s1_domain;
4237 unsigned long flags;
4238 int ret;
4239
4240 spin_lock(&domain->s1_lock);
4241 list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) {
4242 spin_lock_irqsave(&s1_domain->lock, flags);
4243 ret = device_set_dirty_tracking(&s1_domain->devices, enable);
4244 spin_unlock_irqrestore(&s1_domain->lock, flags);
4245 if (ret)
4246 goto err_unwind;
4247 }
4248 spin_unlock(&domain->s1_lock);
4249 return 0;
4250
4251 err_unwind:
4252 list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) {
4253 spin_lock_irqsave(&s1_domain->lock, flags);
4254 device_set_dirty_tracking(&s1_domain->devices,
4255 domain->dirty_tracking);
4256 spin_unlock_irqrestore(&s1_domain->lock, flags);
4257 }
4258 spin_unlock(&domain->s1_lock);
4259 return ret;
4260 }
4261
intel_iommu_set_dirty_tracking(struct iommu_domain * domain,bool enable)4262 static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain,
4263 bool enable)
4264 {
4265 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4266 int ret;
4267
4268 spin_lock(&dmar_domain->lock);
4269 if (dmar_domain->dirty_tracking == enable)
4270 goto out_unlock;
4271
4272 ret = device_set_dirty_tracking(&dmar_domain->devices, enable);
4273 if (ret)
4274 goto err_unwind;
4275
4276 if (dmar_domain->nested_parent) {
4277 ret = parent_domain_set_dirty_tracking(dmar_domain, enable);
4278 if (ret)
4279 goto err_unwind;
4280 }
4281
4282 dmar_domain->dirty_tracking = enable;
4283 out_unlock:
4284 spin_unlock(&dmar_domain->lock);
4285
4286 return 0;
4287
4288 err_unwind:
4289 device_set_dirty_tracking(&dmar_domain->devices,
4290 dmar_domain->dirty_tracking);
4291 spin_unlock(&dmar_domain->lock);
4292 return ret;
4293 }
4294
intel_iommu_read_and_clear_dirty(struct iommu_domain * domain,unsigned long iova,size_t size,unsigned long flags,struct iommu_dirty_bitmap * dirty)4295 static int intel_iommu_read_and_clear_dirty(struct iommu_domain *domain,
4296 unsigned long iova, size_t size,
4297 unsigned long flags,
4298 struct iommu_dirty_bitmap *dirty)
4299 {
4300 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4301 unsigned long end = iova + size - 1;
4302 unsigned long pgsize;
4303
4304 /*
4305 * IOMMUFD core calls into a dirty tracking disabled domain without an
4306 * IOVA bitmap set in order to clean dirty bits in all PTEs that might
4307 * have occurred when we stopped dirty tracking. This ensures that we
4308 * never inherit dirtied bits from a previous cycle.
4309 */
4310 if (!dmar_domain->dirty_tracking && dirty->bitmap)
4311 return -EINVAL;
4312
4313 do {
4314 struct dma_pte *pte;
4315 int lvl = 0;
4316
4317 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &lvl,
4318 GFP_ATOMIC);
4319 pgsize = level_size(lvl) << VTD_PAGE_SHIFT;
4320 if (!pte || !dma_pte_present(pte)) {
4321 iova += pgsize;
4322 continue;
4323 }
4324
4325 if (dma_sl_pte_test_and_clear_dirty(pte, flags))
4326 iommu_dirty_bitmap_record(dirty, iova, pgsize);
4327 iova += pgsize;
4328 } while (iova < end);
4329
4330 return 0;
4331 }
4332
4333 static const struct iommu_dirty_ops intel_dirty_ops = {
4334 .set_dirty_tracking = intel_iommu_set_dirty_tracking,
4335 .read_and_clear_dirty = intel_iommu_read_and_clear_dirty,
4336 };
4337
context_setup_pass_through(struct device * dev,u8 bus,u8 devfn)4338 static int context_setup_pass_through(struct device *dev, u8 bus, u8 devfn)
4339 {
4340 struct device_domain_info *info = dev_iommu_priv_get(dev);
4341 struct intel_iommu *iommu = info->iommu;
4342 struct context_entry *context;
4343
4344 spin_lock(&iommu->lock);
4345 context = iommu_context_addr(iommu, bus, devfn, 1);
4346 if (!context) {
4347 spin_unlock(&iommu->lock);
4348 return -ENOMEM;
4349 }
4350
4351 if (context_present(context) && !context_copied(iommu, bus, devfn)) {
4352 spin_unlock(&iommu->lock);
4353 return 0;
4354 }
4355
4356 copied_context_tear_down(iommu, context, bus, devfn);
4357 context_clear_entry(context);
4358 context_set_domain_id(context, FLPT_DEFAULT_DID);
4359
4360 /*
4361 * In pass through mode, AW must be programmed to indicate the largest
4362 * AGAW value supported by hardware. And ASR is ignored by hardware.
4363 */
4364 context_set_address_width(context, iommu->msagaw);
4365 context_set_translation_type(context, CONTEXT_TT_PASS_THROUGH);
4366 context_set_fault_enable(context);
4367 context_set_present(context);
4368 if (!ecap_coherent(iommu->ecap))
4369 clflush_cache_range(context, sizeof(*context));
4370 context_present_cache_flush(iommu, FLPT_DEFAULT_DID, bus, devfn);
4371 spin_unlock(&iommu->lock);
4372
4373 return 0;
4374 }
4375
context_setup_pass_through_cb(struct pci_dev * pdev,u16 alias,void * data)4376 static int context_setup_pass_through_cb(struct pci_dev *pdev, u16 alias, void *data)
4377 {
4378 struct device *dev = data;
4379
4380 return context_setup_pass_through(dev, PCI_BUS_NUM(alias), alias & 0xff);
4381 }
4382
device_setup_pass_through(struct device * dev)4383 static int device_setup_pass_through(struct device *dev)
4384 {
4385 struct device_domain_info *info = dev_iommu_priv_get(dev);
4386
4387 if (!dev_is_pci(dev))
4388 return context_setup_pass_through(dev, info->bus, info->devfn);
4389
4390 return pci_for_each_dma_alias(to_pci_dev(dev),
4391 context_setup_pass_through_cb, dev);
4392 }
4393
identity_domain_attach_dev(struct iommu_domain * domain,struct device * dev)4394 static int identity_domain_attach_dev(struct iommu_domain *domain, struct device *dev)
4395 {
4396 struct device_domain_info *info = dev_iommu_priv_get(dev);
4397 struct intel_iommu *iommu = info->iommu;
4398 int ret;
4399
4400 device_block_translation(dev);
4401
4402 if (dev_is_real_dma_subdevice(dev))
4403 return 0;
4404
4405 /*
4406 * No PRI support with the global identity domain. No need to enable or
4407 * disable PRI in this path as the iommu has been put in the blocking
4408 * state.
4409 */
4410 if (sm_supported(iommu))
4411 ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID);
4412 else
4413 ret = device_setup_pass_through(dev);
4414
4415 if (!ret)
4416 info->domain_attached = true;
4417
4418 return ret;
4419 }
4420
identity_domain_set_dev_pasid(struct iommu_domain * domain,struct device * dev,ioasid_t pasid,struct iommu_domain * old)4421 static int identity_domain_set_dev_pasid(struct iommu_domain *domain,
4422 struct device *dev, ioasid_t pasid,
4423 struct iommu_domain *old)
4424 {
4425 struct device_domain_info *info = dev_iommu_priv_get(dev);
4426 struct intel_iommu *iommu = info->iommu;
4427 int ret;
4428
4429 if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
4430 return -EOPNOTSUPP;
4431
4432 ret = iopf_for_domain_replace(domain, old, dev);
4433 if (ret)
4434 return ret;
4435
4436 ret = domain_setup_passthrough(iommu, dev, pasid, old);
4437 if (ret) {
4438 iopf_for_domain_replace(old, domain, dev);
4439 return ret;
4440 }
4441
4442 domain_remove_dev_pasid(old, dev, pasid);
4443 return 0;
4444 }
4445
4446 static struct iommu_domain identity_domain = {
4447 .type = IOMMU_DOMAIN_IDENTITY,
4448 .ops = &(const struct iommu_domain_ops) {
4449 .attach_dev = identity_domain_attach_dev,
4450 .set_dev_pasid = identity_domain_set_dev_pasid,
4451 },
4452 };
4453
4454 const struct iommu_domain_ops intel_fs_paging_domain_ops = {
4455 .attach_dev = intel_iommu_attach_device,
4456 .set_dev_pasid = intel_iommu_set_dev_pasid,
4457 .map_pages = intel_iommu_map_pages,
4458 .unmap_pages = intel_iommu_unmap_pages,
4459 .iotlb_sync_map = intel_iommu_iotlb_sync_map,
4460 .flush_iotlb_all = intel_flush_iotlb_all,
4461 .iotlb_sync = intel_iommu_tlb_sync,
4462 .iova_to_phys = intel_iommu_iova_to_phys,
4463 .free = intel_iommu_domain_free,
4464 .enforce_cache_coherency = intel_iommu_enforce_cache_coherency_fs,
4465 };
4466
4467 const struct iommu_domain_ops intel_ss_paging_domain_ops = {
4468 .attach_dev = intel_iommu_attach_device,
4469 .set_dev_pasid = intel_iommu_set_dev_pasid,
4470 .map_pages = intel_iommu_map_pages,
4471 .unmap_pages = intel_iommu_unmap_pages,
4472 .iotlb_sync_map = intel_iommu_iotlb_sync_map,
4473 .flush_iotlb_all = intel_flush_iotlb_all,
4474 .iotlb_sync = intel_iommu_tlb_sync,
4475 .iova_to_phys = intel_iommu_iova_to_phys,
4476 .free = intel_iommu_domain_free,
4477 .enforce_cache_coherency = intel_iommu_enforce_cache_coherency_ss,
4478 };
4479
4480 const struct iommu_ops intel_iommu_ops = {
4481 .blocked_domain = &blocking_domain,
4482 .release_domain = &blocking_domain,
4483 .identity_domain = &identity_domain,
4484 .capable = intel_iommu_capable,
4485 .hw_info = intel_iommu_hw_info,
4486 .domain_alloc_paging_flags = intel_iommu_domain_alloc_paging_flags,
4487 .domain_alloc_sva = intel_svm_domain_alloc,
4488 .domain_alloc_nested = intel_iommu_domain_alloc_nested,
4489 .probe_device = intel_iommu_probe_device,
4490 .probe_finalize = intel_iommu_probe_finalize,
4491 .release_device = intel_iommu_release_device,
4492 .get_resv_regions = intel_iommu_get_resv_regions,
4493 .device_group = intel_iommu_device_group,
4494 .is_attach_deferred = intel_iommu_is_attach_deferred,
4495 .def_domain_type = device_def_domain_type,
4496 .page_response = intel_iommu_page_response,
4497 };
4498
quirk_iommu_igfx(struct pci_dev * dev)4499 static void quirk_iommu_igfx(struct pci_dev *dev)
4500 {
4501 if (risky_device(dev))
4502 return;
4503
4504 pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
4505 disable_igfx_iommu = 1;
4506 }
4507
4508 /* G4x/GM45 integrated gfx dmar support is totally busted. */
4509 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
4510 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
4511 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx);
4512 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx);
4513 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
4514 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
4515 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
4516
4517 /* QM57/QS57 integrated gfx malfunctions with dmar */
4518 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_iommu_igfx);
4519
4520 /* Broadwell igfx malfunctions with dmar */
4521 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
4522 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
4523 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx);
4524 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx);
4525 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx);
4526 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx);
4527 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx);
4528 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx);
4529 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx);
4530 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx);
4531 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx);
4532 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx);
4533 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx);
4534 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx);
4535 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx);
4536 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx);
4537 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx);
4538 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx);
4539 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx);
4540 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx);
4541 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx);
4542 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
4543 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
4544 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
4545
quirk_iommu_rwbf(struct pci_dev * dev)4546 static void quirk_iommu_rwbf(struct pci_dev *dev)
4547 {
4548 if (risky_device(dev))
4549 return;
4550
4551 /*
4552 * Mobile 4 Series Chipset neglects to set RWBF capability,
4553 * but needs it. Same seems to hold for the desktop versions.
4554 */
4555 pci_info(dev, "Forcing write-buffer flush capability\n");
4556 rwbf_quirk = 1;
4557 }
4558
4559 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4560 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4561 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4562 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4563 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4564 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4565 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
4566
4567 #define GGC 0x52
4568 #define GGC_MEMORY_SIZE_MASK (0xf << 8)
4569 #define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4570 #define GGC_MEMORY_SIZE_1M (0x1 << 8)
4571 #define GGC_MEMORY_SIZE_2M (0x3 << 8)
4572 #define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4573 #define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4574 #define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4575 #define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4576
quirk_calpella_no_shadow_gtt(struct pci_dev * dev)4577 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4578 {
4579 unsigned short ggc;
4580
4581 if (risky_device(dev))
4582 return;
4583
4584 if (pci_read_config_word(dev, GGC, &ggc))
4585 return;
4586
4587 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4588 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4589 disable_igfx_iommu = 1;
4590 } else if (!disable_igfx_iommu) {
4591 /* we have to ensure the gfx device is idle before we flush */
4592 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
4593 iommu_set_dma_strict();
4594 }
4595 }
4596 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4597 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4598 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4599
quirk_igfx_skip_te_disable(struct pci_dev * dev)4600 static void quirk_igfx_skip_te_disable(struct pci_dev *dev)
4601 {
4602 unsigned short ver;
4603
4604 if (!IS_GFX_DEVICE(dev))
4605 return;
4606
4607 ver = (dev->device >> 8) & 0xff;
4608 if (ver != 0x45 && ver != 0x46 && ver != 0x4c &&
4609 ver != 0x4e && ver != 0x8a && ver != 0x98 &&
4610 ver != 0x9a && ver != 0xa7 && ver != 0x7d)
4611 return;
4612
4613 if (risky_device(dev))
4614 return;
4615
4616 pci_info(dev, "Skip IOMMU disabling for graphics\n");
4617 iommu_skip_te_disable = 1;
4618 }
4619 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_igfx_skip_te_disable);
4620
4621 /* On Tylersburg chipsets, some BIOSes have been known to enable the
4622 ISOCH DMAR unit for the Azalia sound device, but not give it any
4623 TLB entries, which causes it to deadlock. Check for that. We do
4624 this in a function called from init_dmars(), instead of in a PCI
4625 quirk, because we don't want to print the obnoxious "BIOS broken"
4626 message if VT-d is actually disabled.
4627 */
check_tylersburg_isoch(void)4628 static void __init check_tylersburg_isoch(void)
4629 {
4630 struct pci_dev *pdev;
4631 uint32_t vtisochctrl;
4632
4633 /* If there's no Azalia in the system anyway, forget it. */
4634 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4635 if (!pdev)
4636 return;
4637
4638 if (risky_device(pdev)) {
4639 pci_dev_put(pdev);
4640 return;
4641 }
4642
4643 pci_dev_put(pdev);
4644
4645 /* System Management Registers. Might be hidden, in which case
4646 we can't do the sanity check. But that's OK, because the
4647 known-broken BIOSes _don't_ actually hide it, so far. */
4648 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4649 if (!pdev)
4650 return;
4651
4652 if (risky_device(pdev)) {
4653 pci_dev_put(pdev);
4654 return;
4655 }
4656
4657 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4658 pci_dev_put(pdev);
4659 return;
4660 }
4661
4662 pci_dev_put(pdev);
4663
4664 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4665 if (vtisochctrl & 1)
4666 return;
4667
4668 /* Drop all bits other than the number of TLB entries */
4669 vtisochctrl &= 0x1c;
4670
4671 /* If we have the recommended number of TLB entries (16), fine. */
4672 if (vtisochctrl == 0x10)
4673 return;
4674
4675 /* Zero TLB entries? You get to ride the short bus to school. */
4676 if (!vtisochctrl) {
4677 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4678 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4679 dmi_get_system_info(DMI_BIOS_VENDOR),
4680 dmi_get_system_info(DMI_BIOS_VERSION),
4681 dmi_get_system_info(DMI_PRODUCT_VERSION));
4682 iommu_identity_mapping |= IDENTMAP_AZALIA;
4683 return;
4684 }
4685
4686 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4687 vtisochctrl);
4688 }
4689
4690 /*
4691 * Here we deal with a device TLB defect where device may inadvertently issue ATS
4692 * invalidation completion before posted writes initiated with translated address
4693 * that utilized translations matching the invalidation address range, violating
4694 * the invalidation completion ordering.
4695 * Therefore, any use cases that cannot guarantee DMA is stopped before unmap is
4696 * vulnerable to this defect. In other words, any dTLB invalidation initiated not
4697 * under the control of the trusted/privileged host device driver must use this
4698 * quirk.
4699 * Device TLBs are invalidated under the following six conditions:
4700 * 1. Device driver does DMA API unmap IOVA
4701 * 2. Device driver unbind a PASID from a process, sva_unbind_device()
4702 * 3. PASID is torn down, after PASID cache is flushed. e.g. process
4703 * exit_mmap() due to crash
4704 * 4. Under SVA usage, called by mmu_notifier.invalidate_range() where
4705 * VM has to free pages that were unmapped
4706 * 5. Userspace driver unmaps a DMA buffer
4707 * 6. Cache invalidation in vSVA usage (upcoming)
4708 *
4709 * For #1 and #2, device drivers are responsible for stopping DMA traffic
4710 * before unmap/unbind. For #3, iommu driver gets mmu_notifier to
4711 * invalidate TLB the same way as normal user unmap which will use this quirk.
4712 * The dTLB invalidation after PASID cache flush does not need this quirk.
4713 *
4714 * As a reminder, #6 will *NEED* this quirk as we enable nested translation.
4715 */
quirk_extra_dev_tlb_flush(struct device_domain_info * info,unsigned long address,unsigned long mask,u32 pasid,u16 qdep)4716 void quirk_extra_dev_tlb_flush(struct device_domain_info *info,
4717 unsigned long address, unsigned long mask,
4718 u32 pasid, u16 qdep)
4719 {
4720 u16 sid;
4721
4722 if (likely(!info->dtlb_extra_inval))
4723 return;
4724
4725 sid = PCI_DEVID(info->bus, info->devfn);
4726 if (pasid == IOMMU_NO_PASID) {
4727 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
4728 qdep, address, mask);
4729 } else {
4730 qi_flush_dev_iotlb_pasid(info->iommu, sid, info->pfsid,
4731 pasid, qdep, address, mask);
4732 }
4733 }
4734
4735 #define ecmd_get_status_code(res) (((res) & 0xff) >> 1)
4736
4737 /*
4738 * Function to submit a command to the enhanced command interface. The
4739 * valid enhanced command descriptions are defined in Table 47 of the
4740 * VT-d spec. The VT-d hardware implementation may support some but not
4741 * all commands, which can be determined by checking the Enhanced
4742 * Command Capability Register.
4743 *
4744 * Return values:
4745 * - 0: Command successful without any error;
4746 * - Negative: software error value;
4747 * - Nonzero positive: failure status code defined in Table 48.
4748 */
ecmd_submit_sync(struct intel_iommu * iommu,u8 ecmd,u64 oa,u64 ob)4749 int ecmd_submit_sync(struct intel_iommu *iommu, u8 ecmd, u64 oa, u64 ob)
4750 {
4751 unsigned long flags;
4752 u64 res;
4753 int ret;
4754
4755 if (!cap_ecmds(iommu->cap))
4756 return -ENODEV;
4757
4758 raw_spin_lock_irqsave(&iommu->register_lock, flags);
4759
4760 res = dmar_readq(iommu->reg + DMAR_ECRSP_REG);
4761 if (res & DMA_ECMD_ECRSP_IP) {
4762 ret = -EBUSY;
4763 goto err;
4764 }
4765
4766 /*
4767 * Unconditionally write the operand B, because
4768 * - There is no side effect if an ecmd doesn't require an
4769 * operand B, but we set the register to some value.
4770 * - It's not invoked in any critical path. The extra MMIO
4771 * write doesn't bring any performance concerns.
4772 */
4773 dmar_writeq(iommu->reg + DMAR_ECEO_REG, ob);
4774 dmar_writeq(iommu->reg + DMAR_ECMD_REG, ecmd | (oa << DMA_ECMD_OA_SHIFT));
4775
4776 IOMMU_WAIT_OP(iommu, DMAR_ECRSP_REG, dmar_readq,
4777 !(res & DMA_ECMD_ECRSP_IP), res);
4778
4779 if (res & DMA_ECMD_ECRSP_IP) {
4780 ret = -ETIMEDOUT;
4781 goto err;
4782 }
4783
4784 ret = ecmd_get_status_code(res);
4785 err:
4786 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
4787
4788 return ret;
4789 }
4790