1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright © 2006-2014 Intel Corporation.
4 *
5 * Authors: David Woodhouse <dwmw2@infradead.org>,
6 * Ashok Raj <ashok.raj@intel.com>,
7 * Shaohua Li <shaohua.li@intel.com>,
8 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
9 * Fenghua Yu <fenghua.yu@intel.com>
10 * Joerg Roedel <jroedel@suse.de>
11 */
12
13 #define pr_fmt(fmt) "DMAR: " fmt
14 #define dev_fmt(fmt) pr_fmt(fmt)
15
16 #include <linux/crash_dump.h>
17 #include <linux/dma-direct.h>
18 #include <linux/dmi.h>
19 #include <linux/memory.h>
20 #include <linux/pci.h>
21 #include <linux/pci-ats.h>
22 #include <linux/spinlock.h>
23 #include <linux/syscore_ops.h>
24 #include <linux/tboot.h>
25 #include <uapi/linux/iommufd.h>
26
27 #include "iommu.h"
28 #include "../dma-iommu.h"
29 #include "../irq_remapping.h"
30 #include "../iommu-pages.h"
31 #include "pasid.h"
32 #include "perfmon.h"
33
34 #define ROOT_SIZE VTD_PAGE_SIZE
35 #define CONTEXT_SIZE VTD_PAGE_SIZE
36
37 #define IS_GFX_DEVICE(pdev) pci_is_display(pdev)
38 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
39 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
40 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
41
42 #define IOAPIC_RANGE_START (0xfee00000)
43 #define IOAPIC_RANGE_END (0xfeefffff)
44 #define IOVA_START_ADDR (0x1000)
45
46 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
47
48 static void __init check_tylersburg_isoch(void);
49 static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain,
50 bool enable);
51 static int rwbf_quirk;
52
53 #define rwbf_required(iommu) (rwbf_quirk || cap_rwbf((iommu)->cap))
54
55 /*
56 * set to 1 to panic kernel if can't successfully enable VT-d
57 * (used when kernel is launched w/ TXT)
58 */
59 static int force_on = 0;
60 static int intel_iommu_tboot_noforce;
61 static int no_platform_optin;
62
63 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
64
65 /*
66 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
67 * if marked present.
68 */
root_entry_lctp(struct root_entry * re)69 static phys_addr_t root_entry_lctp(struct root_entry *re)
70 {
71 if (!(re->lo & 1))
72 return 0;
73
74 return re->lo & VTD_PAGE_MASK;
75 }
76
77 /*
78 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
79 * if marked present.
80 */
root_entry_uctp(struct root_entry * re)81 static phys_addr_t root_entry_uctp(struct root_entry *re)
82 {
83 if (!(re->hi & 1))
84 return 0;
85
86 return re->hi & VTD_PAGE_MASK;
87 }
88
device_rid_cmp_key(const void * key,const struct rb_node * node)89 static int device_rid_cmp_key(const void *key, const struct rb_node *node)
90 {
91 struct device_domain_info *info =
92 rb_entry(node, struct device_domain_info, node);
93 const u16 *rid_lhs = key;
94
95 if (*rid_lhs < PCI_DEVID(info->bus, info->devfn))
96 return -1;
97
98 if (*rid_lhs > PCI_DEVID(info->bus, info->devfn))
99 return 1;
100
101 return 0;
102 }
103
device_rid_cmp(struct rb_node * lhs,const struct rb_node * rhs)104 static int device_rid_cmp(struct rb_node *lhs, const struct rb_node *rhs)
105 {
106 struct device_domain_info *info =
107 rb_entry(lhs, struct device_domain_info, node);
108 u16 key = PCI_DEVID(info->bus, info->devfn);
109
110 return device_rid_cmp_key(&key, rhs);
111 }
112
113 /*
114 * Looks up an IOMMU-probed device using its source ID.
115 *
116 * Returns the pointer to the device if there is a match. Otherwise,
117 * returns NULL.
118 *
119 * Note that this helper doesn't guarantee that the device won't be
120 * released by the iommu subsystem after being returned. The caller
121 * should use its own synchronization mechanism to avoid the device
122 * being released during its use if its possibly the case.
123 */
device_rbtree_find(struct intel_iommu * iommu,u16 rid)124 struct device *device_rbtree_find(struct intel_iommu *iommu, u16 rid)
125 {
126 struct device_domain_info *info = NULL;
127 struct rb_node *node;
128 unsigned long flags;
129
130 spin_lock_irqsave(&iommu->device_rbtree_lock, flags);
131 node = rb_find(&rid, &iommu->device_rbtree, device_rid_cmp_key);
132 if (node)
133 info = rb_entry(node, struct device_domain_info, node);
134 spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
135
136 return info ? info->dev : NULL;
137 }
138
device_rbtree_insert(struct intel_iommu * iommu,struct device_domain_info * info)139 static int device_rbtree_insert(struct intel_iommu *iommu,
140 struct device_domain_info *info)
141 {
142 struct rb_node *curr;
143 unsigned long flags;
144
145 spin_lock_irqsave(&iommu->device_rbtree_lock, flags);
146 curr = rb_find_add(&info->node, &iommu->device_rbtree, device_rid_cmp);
147 spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
148 if (WARN_ON(curr))
149 return -EEXIST;
150
151 return 0;
152 }
153
device_rbtree_remove(struct device_domain_info * info)154 static void device_rbtree_remove(struct device_domain_info *info)
155 {
156 struct intel_iommu *iommu = info->iommu;
157 unsigned long flags;
158
159 spin_lock_irqsave(&iommu->device_rbtree_lock, flags);
160 rb_erase(&info->node, &iommu->device_rbtree);
161 spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
162 }
163
164 struct dmar_rmrr_unit {
165 struct list_head list; /* list of rmrr units */
166 struct acpi_dmar_header *hdr; /* ACPI header */
167 u64 base_address; /* reserved base address*/
168 u64 end_address; /* reserved end address */
169 struct dmar_dev_scope *devices; /* target devices */
170 int devices_cnt; /* target device count */
171 };
172
173 struct dmar_atsr_unit {
174 struct list_head list; /* list of ATSR units */
175 struct acpi_dmar_header *hdr; /* ACPI header */
176 struct dmar_dev_scope *devices; /* target devices */
177 int devices_cnt; /* target device count */
178 u8 include_all:1; /* include all ports */
179 };
180
181 struct dmar_satc_unit {
182 struct list_head list; /* list of SATC units */
183 struct acpi_dmar_header *hdr; /* ACPI header */
184 struct dmar_dev_scope *devices; /* target devices */
185 struct intel_iommu *iommu; /* the corresponding iommu */
186 int devices_cnt; /* target device count */
187 u8 atc_required:1; /* ATS is required */
188 };
189
190 static LIST_HEAD(dmar_atsr_units);
191 static LIST_HEAD(dmar_rmrr_units);
192 static LIST_HEAD(dmar_satc_units);
193
194 #define for_each_rmrr_units(rmrr) \
195 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
196
197 static void intel_iommu_domain_free(struct iommu_domain *domain);
198
199 int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON);
200 int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON);
201
202 int intel_iommu_enabled = 0;
203 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
204
205 static int intel_iommu_superpage = 1;
206 static int iommu_identity_mapping;
207 static int iommu_skip_te_disable;
208 static int disable_igfx_iommu;
209
210 #define IDENTMAP_AZALIA 4
211
212 const struct iommu_ops intel_iommu_ops;
213
translation_pre_enabled(struct intel_iommu * iommu)214 static bool translation_pre_enabled(struct intel_iommu *iommu)
215 {
216 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
217 }
218
clear_translation_pre_enabled(struct intel_iommu * iommu)219 static void clear_translation_pre_enabled(struct intel_iommu *iommu)
220 {
221 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
222 }
223
init_translation_status(struct intel_iommu * iommu)224 static void init_translation_status(struct intel_iommu *iommu)
225 {
226 u32 gsts;
227
228 gsts = readl(iommu->reg + DMAR_GSTS_REG);
229 if (gsts & DMA_GSTS_TES)
230 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
231 }
232
intel_iommu_setup(char * str)233 static int __init intel_iommu_setup(char *str)
234 {
235 if (!str)
236 return -EINVAL;
237
238 while (*str) {
239 if (!strncmp(str, "on", 2)) {
240 dmar_disabled = 0;
241 pr_info("IOMMU enabled\n");
242 } else if (!strncmp(str, "off", 3)) {
243 dmar_disabled = 1;
244 no_platform_optin = 1;
245 pr_info("IOMMU disabled\n");
246 } else if (!strncmp(str, "igfx_off", 8)) {
247 disable_igfx_iommu = 1;
248 pr_info("Disable GFX device mapping\n");
249 } else if (!strncmp(str, "forcedac", 8)) {
250 pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n");
251 iommu_dma_forcedac = true;
252 } else if (!strncmp(str, "strict", 6)) {
253 pr_warn("intel_iommu=strict deprecated; use iommu.strict=1 instead\n");
254 iommu_set_dma_strict();
255 } else if (!strncmp(str, "sp_off", 6)) {
256 pr_info("Disable supported super page\n");
257 intel_iommu_superpage = 0;
258 } else if (!strncmp(str, "sm_on", 5)) {
259 pr_info("Enable scalable mode if hardware supports\n");
260 intel_iommu_sm = 1;
261 } else if (!strncmp(str, "sm_off", 6)) {
262 pr_info("Scalable mode is disallowed\n");
263 intel_iommu_sm = 0;
264 } else if (!strncmp(str, "tboot_noforce", 13)) {
265 pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
266 intel_iommu_tboot_noforce = 1;
267 } else {
268 pr_notice("Unknown option - '%s'\n", str);
269 }
270
271 str += strcspn(str, ",");
272 while (*str == ',')
273 str++;
274 }
275
276 return 1;
277 }
278 __setup("intel_iommu=", intel_iommu_setup);
279
280 /*
281 * Calculate the Supported Adjusted Guest Address Widths of an IOMMU.
282 * Refer to 11.4.2 of the VT-d spec for the encoding of each bit of
283 * the returned SAGAW.
284 */
__iommu_calculate_sagaw(struct intel_iommu * iommu)285 static unsigned long __iommu_calculate_sagaw(struct intel_iommu *iommu)
286 {
287 unsigned long fl_sagaw, sl_sagaw;
288
289 fl_sagaw = BIT(2) | (cap_fl5lp_support(iommu->cap) ? BIT(3) : 0);
290 sl_sagaw = cap_sagaw(iommu->cap);
291
292 /* Second level only. */
293 if (!sm_supported(iommu) || !ecap_flts(iommu->ecap))
294 return sl_sagaw;
295
296 /* First level only. */
297 if (!ecap_slts(iommu->ecap))
298 return fl_sagaw;
299
300 return fl_sagaw & sl_sagaw;
301 }
302
__iommu_calculate_agaw(struct intel_iommu * iommu,int max_gaw)303 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
304 {
305 unsigned long sagaw;
306 int agaw;
307
308 sagaw = __iommu_calculate_sagaw(iommu);
309 for (agaw = width_to_agaw(max_gaw); agaw >= 0; agaw--) {
310 if (test_bit(agaw, &sagaw))
311 break;
312 }
313
314 return agaw;
315 }
316
317 /*
318 * Calculate max SAGAW for each iommu.
319 */
iommu_calculate_max_sagaw(struct intel_iommu * iommu)320 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
321 {
322 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
323 }
324
325 /*
326 * calculate agaw for each iommu.
327 * "SAGAW" may be different across iommus, use a default agaw, and
328 * get a supported less agaw for iommus that don't support the default agaw.
329 */
iommu_calculate_agaw(struct intel_iommu * iommu)330 int iommu_calculate_agaw(struct intel_iommu *iommu)
331 {
332 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
333 }
334
iommu_paging_structure_coherency(struct intel_iommu * iommu)335 static bool iommu_paging_structure_coherency(struct intel_iommu *iommu)
336 {
337 return sm_supported(iommu) ?
338 ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap);
339 }
340
iommu_context_addr(struct intel_iommu * iommu,u8 bus,u8 devfn,int alloc)341 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
342 u8 devfn, int alloc)
343 {
344 struct root_entry *root = &iommu->root_entry[bus];
345 struct context_entry *context;
346 u64 *entry;
347
348 /*
349 * Except that the caller requested to allocate a new entry,
350 * returning a copied context entry makes no sense.
351 */
352 if (!alloc && context_copied(iommu, bus, devfn))
353 return NULL;
354
355 entry = &root->lo;
356 if (sm_supported(iommu)) {
357 if (devfn >= 0x80) {
358 devfn -= 0x80;
359 entry = &root->hi;
360 }
361 devfn *= 2;
362 }
363 if (*entry & 1)
364 context = phys_to_virt(*entry & VTD_PAGE_MASK);
365 else {
366 unsigned long phy_addr;
367 if (!alloc)
368 return NULL;
369
370 context = iommu_alloc_pages_node_sz(iommu->node, GFP_ATOMIC,
371 SZ_4K);
372 if (!context)
373 return NULL;
374
375 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
376 phy_addr = virt_to_phys((void *)context);
377 *entry = phy_addr | 1;
378 __iommu_flush_cache(iommu, entry, sizeof(*entry));
379 }
380 return &context[devfn];
381 }
382
383 /**
384 * is_downstream_to_pci_bridge - test if a device belongs to the PCI
385 * sub-hierarchy of a candidate PCI-PCI bridge
386 * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
387 * @bridge: the candidate PCI-PCI bridge
388 *
389 * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
390 */
391 static bool
is_downstream_to_pci_bridge(struct device * dev,struct device * bridge)392 is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
393 {
394 struct pci_dev *pdev, *pbridge;
395
396 if (!dev_is_pci(dev) || !dev_is_pci(bridge))
397 return false;
398
399 pdev = to_pci_dev(dev);
400 pbridge = to_pci_dev(bridge);
401
402 if (pbridge->subordinate &&
403 pbridge->subordinate->number <= pdev->bus->number &&
404 pbridge->subordinate->busn_res.end >= pdev->bus->number)
405 return true;
406
407 return false;
408 }
409
quirk_ioat_snb_local_iommu(struct pci_dev * pdev)410 static bool quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
411 {
412 struct dmar_drhd_unit *drhd;
413 u32 vtbar;
414 int rc;
415
416 /* We know that this device on this chipset has its own IOMMU.
417 * If we find it under a different IOMMU, then the BIOS is lying
418 * to us. Hope that the IOMMU for this device is actually
419 * disabled, and it needs no translation...
420 */
421 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
422 if (rc) {
423 /* "can't" happen */
424 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
425 return false;
426 }
427 vtbar &= 0xffff0000;
428
429 /* we know that the this iommu should be at offset 0xa000 from vtbar */
430 drhd = dmar_find_matched_drhd_unit(pdev);
431 if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
432 pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
433 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
434 return true;
435 }
436
437 return false;
438 }
439
iommu_is_dummy(struct intel_iommu * iommu,struct device * dev)440 static bool iommu_is_dummy(struct intel_iommu *iommu, struct device *dev)
441 {
442 if (!iommu || iommu->drhd->ignored)
443 return true;
444
445 if (dev_is_pci(dev)) {
446 struct pci_dev *pdev = to_pci_dev(dev);
447
448 if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
449 pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SNB &&
450 quirk_ioat_snb_local_iommu(pdev))
451 return true;
452 }
453
454 return false;
455 }
456
device_lookup_iommu(struct device * dev,u8 * bus,u8 * devfn)457 static struct intel_iommu *device_lookup_iommu(struct device *dev, u8 *bus, u8 *devfn)
458 {
459 struct dmar_drhd_unit *drhd = NULL;
460 struct pci_dev *pdev = NULL;
461 struct intel_iommu *iommu;
462 struct device *tmp;
463 u16 segment = 0;
464 int i;
465
466 if (!dev)
467 return NULL;
468
469 if (dev_is_pci(dev)) {
470 struct pci_dev *pf_pdev;
471
472 pdev = pci_real_dma_dev(to_pci_dev(dev));
473
474 /* VFs aren't listed in scope tables; we need to look up
475 * the PF instead to find the IOMMU. */
476 pf_pdev = pci_physfn(pdev);
477 dev = &pf_pdev->dev;
478 segment = pci_domain_nr(pdev->bus);
479 } else if (has_acpi_companion(dev))
480 dev = &ACPI_COMPANION(dev)->dev;
481
482 rcu_read_lock();
483 for_each_iommu(iommu, drhd) {
484 if (pdev && segment != drhd->segment)
485 continue;
486
487 for_each_active_dev_scope(drhd->devices,
488 drhd->devices_cnt, i, tmp) {
489 if (tmp == dev) {
490 /* For a VF use its original BDF# not that of the PF
491 * which we used for the IOMMU lookup. Strictly speaking
492 * we could do this for all PCI devices; we only need to
493 * get the BDF# from the scope table for ACPI matches. */
494 if (pdev && pdev->is_virtfn)
495 goto got_pdev;
496
497 if (bus && devfn) {
498 *bus = drhd->devices[i].bus;
499 *devfn = drhd->devices[i].devfn;
500 }
501 goto out;
502 }
503
504 if (is_downstream_to_pci_bridge(dev, tmp))
505 goto got_pdev;
506 }
507
508 if (pdev && drhd->include_all) {
509 got_pdev:
510 if (bus && devfn) {
511 *bus = pdev->bus->number;
512 *devfn = pdev->devfn;
513 }
514 goto out;
515 }
516 }
517 iommu = NULL;
518 out:
519 if (iommu_is_dummy(iommu, dev))
520 iommu = NULL;
521
522 rcu_read_unlock();
523
524 return iommu;
525 }
526
free_context_table(struct intel_iommu * iommu)527 static void free_context_table(struct intel_iommu *iommu)
528 {
529 struct context_entry *context;
530 int i;
531
532 if (!iommu->root_entry)
533 return;
534
535 for (i = 0; i < ROOT_ENTRY_NR; i++) {
536 context = iommu_context_addr(iommu, i, 0, 0);
537 if (context)
538 iommu_free_pages(context);
539
540 if (!sm_supported(iommu))
541 continue;
542
543 context = iommu_context_addr(iommu, i, 0x80, 0);
544 if (context)
545 iommu_free_pages(context);
546 }
547
548 iommu_free_pages(iommu->root_entry);
549 iommu->root_entry = NULL;
550 }
551
552 #ifdef CONFIG_DMAR_DEBUG
pgtable_walk(struct intel_iommu * iommu,unsigned long pfn,u8 bus,u8 devfn,struct dma_pte * parent,int level)553 static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn,
554 u8 bus, u8 devfn, struct dma_pte *parent, int level)
555 {
556 struct dma_pte *pte;
557 int offset;
558
559 while (1) {
560 offset = pfn_level_offset(pfn, level);
561 pte = &parent[offset];
562
563 pr_info("pte level: %d, pte value: 0x%016llx\n", level, pte->val);
564
565 if (!dma_pte_present(pte)) {
566 pr_info("page table not present at level %d\n", level - 1);
567 break;
568 }
569
570 if (level == 1 || dma_pte_superpage(pte))
571 break;
572
573 parent = phys_to_virt(dma_pte_addr(pte));
574 level--;
575 }
576 }
577
dmar_fault_dump_ptes(struct intel_iommu * iommu,u16 source_id,unsigned long long addr,u32 pasid)578 void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
579 unsigned long long addr, u32 pasid)
580 {
581 struct pasid_dir_entry *dir, *pde;
582 struct pasid_entry *entries, *pte;
583 struct context_entry *ctx_entry;
584 struct root_entry *rt_entry;
585 int i, dir_index, index, level;
586 u8 devfn = source_id & 0xff;
587 u8 bus = source_id >> 8;
588 struct dma_pte *pgtable;
589
590 pr_info("Dump %s table entries for IOVA 0x%llx\n", iommu->name, addr);
591
592 /* root entry dump */
593 if (!iommu->root_entry) {
594 pr_info("root table is not present\n");
595 return;
596 }
597 rt_entry = &iommu->root_entry[bus];
598
599 if (sm_supported(iommu))
600 pr_info("scalable mode root entry: hi 0x%016llx, low 0x%016llx\n",
601 rt_entry->hi, rt_entry->lo);
602 else
603 pr_info("root entry: 0x%016llx", rt_entry->lo);
604
605 /* context entry dump */
606 ctx_entry = iommu_context_addr(iommu, bus, devfn, 0);
607 if (!ctx_entry) {
608 pr_info("context table is not present\n");
609 return;
610 }
611
612 pr_info("context entry: hi 0x%016llx, low 0x%016llx\n",
613 ctx_entry->hi, ctx_entry->lo);
614
615 /* legacy mode does not require PASID entries */
616 if (!sm_supported(iommu)) {
617 if (!context_present(ctx_entry)) {
618 pr_info("legacy mode page table is not present\n");
619 return;
620 }
621 level = agaw_to_level(ctx_entry->hi & 7);
622 pgtable = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
623 goto pgtable_walk;
624 }
625
626 if (!context_present(ctx_entry)) {
627 pr_info("pasid directory table is not present\n");
628 return;
629 }
630
631 /* get the pointer to pasid directory entry */
632 dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
633
634 /* For request-without-pasid, get the pasid from context entry */
635 if (intel_iommu_sm && pasid == IOMMU_PASID_INVALID)
636 pasid = IOMMU_NO_PASID;
637
638 dir_index = pasid >> PASID_PDE_SHIFT;
639 pde = &dir[dir_index];
640 pr_info("pasid dir entry: 0x%016llx\n", pde->val);
641
642 /* get the pointer to the pasid table entry */
643 entries = get_pasid_table_from_pde(pde);
644 if (!entries) {
645 pr_info("pasid table is not present\n");
646 return;
647 }
648 index = pasid & PASID_PTE_MASK;
649 pte = &entries[index];
650 for (i = 0; i < ARRAY_SIZE(pte->val); i++)
651 pr_info("pasid table entry[%d]: 0x%016llx\n", i, pte->val[i]);
652
653 if (!pasid_pte_is_present(pte)) {
654 pr_info("scalable mode page table is not present\n");
655 return;
656 }
657
658 if (pasid_pte_get_pgtt(pte) == PASID_ENTRY_PGTT_FL_ONLY) {
659 level = pte->val[2] & BIT_ULL(2) ? 5 : 4;
660 pgtable = phys_to_virt(pte->val[2] & VTD_PAGE_MASK);
661 } else {
662 level = agaw_to_level((pte->val[0] >> 2) & 0x7);
663 pgtable = phys_to_virt(pte->val[0] & VTD_PAGE_MASK);
664 }
665
666 pgtable_walk:
667 pgtable_walk(iommu, addr >> VTD_PAGE_SHIFT, bus, devfn, pgtable, level);
668 }
669 #endif
670
671 /* iommu handling */
iommu_alloc_root_entry(struct intel_iommu * iommu)672 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
673 {
674 struct root_entry *root;
675
676 root = iommu_alloc_pages_node_sz(iommu->node, GFP_ATOMIC, SZ_4K);
677 if (!root) {
678 pr_err("Allocating root entry for %s failed\n",
679 iommu->name);
680 return -ENOMEM;
681 }
682
683 __iommu_flush_cache(iommu, root, ROOT_SIZE);
684 iommu->root_entry = root;
685
686 return 0;
687 }
688
iommu_set_root_entry(struct intel_iommu * iommu)689 static void iommu_set_root_entry(struct intel_iommu *iommu)
690 {
691 u64 addr;
692 u32 sts;
693 unsigned long flag;
694
695 addr = virt_to_phys(iommu->root_entry);
696 if (sm_supported(iommu))
697 addr |= DMA_RTADDR_SMT;
698
699 raw_spin_lock_irqsave(&iommu->register_lock, flag);
700 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
701
702 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
703
704 /* Make sure hardware complete it */
705 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
706 readl, (sts & DMA_GSTS_RTPS), sts);
707
708 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
709
710 /*
711 * Hardware invalidates all DMA remapping hardware translation
712 * caches as part of SRTP flow.
713 */
714 if (cap_esrtps(iommu->cap))
715 return;
716
717 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
718 if (sm_supported(iommu))
719 qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
720 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
721 }
722
iommu_flush_write_buffer(struct intel_iommu * iommu)723 void iommu_flush_write_buffer(struct intel_iommu *iommu)
724 {
725 u32 val;
726 unsigned long flag;
727
728 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
729 return;
730
731 raw_spin_lock_irqsave(&iommu->register_lock, flag);
732 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
733
734 /* Make sure hardware complete it */
735 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
736 readl, (!(val & DMA_GSTS_WBFS)), val);
737
738 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
739 }
740
741 /* return value determine if we need a write buffer flush */
__iommu_flush_context(struct intel_iommu * iommu,u16 did,u16 source_id,u8 function_mask,u64 type)742 static void __iommu_flush_context(struct intel_iommu *iommu,
743 u16 did, u16 source_id, u8 function_mask,
744 u64 type)
745 {
746 u64 val = 0;
747 unsigned long flag;
748
749 switch (type) {
750 case DMA_CCMD_GLOBAL_INVL:
751 val = DMA_CCMD_GLOBAL_INVL;
752 break;
753 case DMA_CCMD_DOMAIN_INVL:
754 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
755 break;
756 case DMA_CCMD_DEVICE_INVL:
757 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
758 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
759 break;
760 default:
761 pr_warn("%s: Unexpected context-cache invalidation type 0x%llx\n",
762 iommu->name, type);
763 return;
764 }
765 val |= DMA_CCMD_ICC;
766
767 raw_spin_lock_irqsave(&iommu->register_lock, flag);
768 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
769
770 /* Make sure hardware complete it */
771 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
772 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
773
774 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
775 }
776
__iommu_flush_iotlb(struct intel_iommu * iommu,u16 did,u64 addr,unsigned int size_order,u64 type)777 void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
778 unsigned int size_order, u64 type)
779 {
780 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
781 u64 val = 0, val_iva = 0;
782 unsigned long flag;
783
784 switch (type) {
785 case DMA_TLB_GLOBAL_FLUSH:
786 /* global flush doesn't need set IVA_REG */
787 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
788 break;
789 case DMA_TLB_DSI_FLUSH:
790 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
791 break;
792 case DMA_TLB_PSI_FLUSH:
793 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
794 /* IH bit is passed in as part of address */
795 val_iva = size_order | addr;
796 break;
797 default:
798 pr_warn("%s: Unexpected iotlb invalidation type 0x%llx\n",
799 iommu->name, type);
800 return;
801 }
802
803 if (cap_write_drain(iommu->cap))
804 val |= DMA_TLB_WRITE_DRAIN;
805
806 raw_spin_lock_irqsave(&iommu->register_lock, flag);
807 /* Note: Only uses first TLB reg currently */
808 if (val_iva)
809 dmar_writeq(iommu->reg + tlb_offset, val_iva);
810 dmar_writeq(iommu->reg + tlb_offset + 8, val);
811
812 /* Make sure hardware complete it */
813 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
814 dmar_readq, (!(val & DMA_TLB_IVT)), val);
815
816 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
817
818 /* check IOTLB invalidation granularity */
819 if (DMA_TLB_IAIG(val) == 0)
820 pr_err("Flush IOTLB failed\n");
821 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
822 pr_debug("TLB flush request %Lx, actual %Lx\n",
823 (unsigned long long)DMA_TLB_IIRG(type),
824 (unsigned long long)DMA_TLB_IAIG(val));
825 }
826
827 static struct device_domain_info *
domain_lookup_dev_info(struct dmar_domain * domain,struct intel_iommu * iommu,u8 bus,u8 devfn)828 domain_lookup_dev_info(struct dmar_domain *domain,
829 struct intel_iommu *iommu, u8 bus, u8 devfn)
830 {
831 struct device_domain_info *info;
832 unsigned long flags;
833
834 spin_lock_irqsave(&domain->lock, flags);
835 list_for_each_entry(info, &domain->devices, link) {
836 if (info->iommu == iommu && info->bus == bus &&
837 info->devfn == devfn) {
838 spin_unlock_irqrestore(&domain->lock, flags);
839 return info;
840 }
841 }
842 spin_unlock_irqrestore(&domain->lock, flags);
843
844 return NULL;
845 }
846
847 /*
848 * The extra devTLB flush quirk impacts those QAT devices with PCI device
849 * IDs ranging from 0x4940 to 0x4943. It is exempted from risky_device()
850 * check because it applies only to the built-in QAT devices and it doesn't
851 * grant additional privileges.
852 */
853 #define BUGGY_QAT_DEVID_MASK 0x4940
dev_needs_extra_dtlb_flush(struct pci_dev * pdev)854 static bool dev_needs_extra_dtlb_flush(struct pci_dev *pdev)
855 {
856 if (pdev->vendor != PCI_VENDOR_ID_INTEL)
857 return false;
858
859 if ((pdev->device & 0xfffc) != BUGGY_QAT_DEVID_MASK)
860 return false;
861
862 return true;
863 }
864
iommu_enable_pci_ats(struct device_domain_info * info)865 static void iommu_enable_pci_ats(struct device_domain_info *info)
866 {
867 struct pci_dev *pdev;
868
869 if (!info->ats_supported)
870 return;
871
872 pdev = to_pci_dev(info->dev);
873 if (!pci_ats_page_aligned(pdev))
874 return;
875
876 if (!pci_enable_ats(pdev, VTD_PAGE_SHIFT))
877 info->ats_enabled = 1;
878 }
879
iommu_disable_pci_ats(struct device_domain_info * info)880 static void iommu_disable_pci_ats(struct device_domain_info *info)
881 {
882 if (!info->ats_enabled)
883 return;
884
885 pci_disable_ats(to_pci_dev(info->dev));
886 info->ats_enabled = 0;
887 }
888
iommu_enable_pci_pri(struct device_domain_info * info)889 static void iommu_enable_pci_pri(struct device_domain_info *info)
890 {
891 struct pci_dev *pdev;
892
893 if (!info->ats_enabled || !info->pri_supported)
894 return;
895
896 pdev = to_pci_dev(info->dev);
897 /* PASID is required in PRG Response Message. */
898 if (info->pasid_enabled && !pci_prg_resp_pasid_required(pdev))
899 return;
900
901 if (pci_reset_pri(pdev))
902 return;
903
904 if (!pci_enable_pri(pdev, PRQ_DEPTH))
905 info->pri_enabled = 1;
906 }
907
iommu_disable_pci_pri(struct device_domain_info * info)908 static void iommu_disable_pci_pri(struct device_domain_info *info)
909 {
910 if (!info->pri_enabled)
911 return;
912
913 if (WARN_ON(info->iopf_refcount))
914 iopf_queue_remove_device(info->iommu->iopf_queue, info->dev);
915
916 pci_disable_pri(to_pci_dev(info->dev));
917 info->pri_enabled = 0;
918 }
919
intel_flush_iotlb_all(struct iommu_domain * domain)920 static void intel_flush_iotlb_all(struct iommu_domain *domain)
921 {
922 cache_tag_flush_all(to_dmar_domain(domain));
923 }
924
iommu_disable_protect_mem_regions(struct intel_iommu * iommu)925 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
926 {
927 u32 pmen;
928 unsigned long flags;
929
930 if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
931 return;
932
933 raw_spin_lock_irqsave(&iommu->register_lock, flags);
934 pmen = readl(iommu->reg + DMAR_PMEN_REG);
935 pmen &= ~DMA_PMEN_EPM;
936 writel(pmen, iommu->reg + DMAR_PMEN_REG);
937
938 /* wait for the protected region status bit to clear */
939 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
940 readl, !(pmen & DMA_PMEN_PRS), pmen);
941
942 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
943 }
944
iommu_enable_translation(struct intel_iommu * iommu)945 static void iommu_enable_translation(struct intel_iommu *iommu)
946 {
947 u32 sts;
948 unsigned long flags;
949
950 raw_spin_lock_irqsave(&iommu->register_lock, flags);
951 iommu->gcmd |= DMA_GCMD_TE;
952 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
953
954 /* Make sure hardware complete it */
955 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
956 readl, (sts & DMA_GSTS_TES), sts);
957
958 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
959 }
960
iommu_disable_translation(struct intel_iommu * iommu)961 static void iommu_disable_translation(struct intel_iommu *iommu)
962 {
963 u32 sts;
964 unsigned long flag;
965
966 if (iommu_skip_te_disable && iommu->drhd->gfx_dedicated &&
967 (cap_read_drain(iommu->cap) || cap_write_drain(iommu->cap)))
968 return;
969
970 raw_spin_lock_irqsave(&iommu->register_lock, flag);
971 iommu->gcmd &= ~DMA_GCMD_TE;
972 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
973
974 /* Make sure hardware complete it */
975 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
976 readl, (!(sts & DMA_GSTS_TES)), sts);
977
978 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
979 }
980
disable_dmar_iommu(struct intel_iommu * iommu)981 static void disable_dmar_iommu(struct intel_iommu *iommu)
982 {
983 /*
984 * All iommu domains must have been detached from the devices,
985 * hence there should be no domain IDs in use.
986 */
987 if (WARN_ON(!ida_is_empty(&iommu->domain_ida)))
988 return;
989
990 if (iommu->gcmd & DMA_GCMD_TE)
991 iommu_disable_translation(iommu);
992 }
993
free_dmar_iommu(struct intel_iommu * iommu)994 static void free_dmar_iommu(struct intel_iommu *iommu)
995 {
996 if (iommu->copied_tables) {
997 bitmap_free(iommu->copied_tables);
998 iommu->copied_tables = NULL;
999 }
1000
1001 /* free context mapping */
1002 free_context_table(iommu);
1003
1004 if (ecap_prs(iommu->ecap))
1005 intel_iommu_finish_prq(iommu);
1006 }
1007
1008 /*
1009 * Check and return whether first level is used by default for
1010 * DMA translation.
1011 */
first_level_by_default(struct intel_iommu * iommu)1012 static bool first_level_by_default(struct intel_iommu *iommu)
1013 {
1014 /* Only SL is available in legacy mode */
1015 if (!sm_supported(iommu))
1016 return false;
1017
1018 /* Only level (either FL or SL) is available, just use it */
1019 if (ecap_flts(iommu->ecap) ^ ecap_slts(iommu->ecap))
1020 return ecap_flts(iommu->ecap);
1021
1022 return true;
1023 }
1024
domain_attach_iommu(struct dmar_domain * domain,struct intel_iommu * iommu)1025 int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
1026 {
1027 struct iommu_domain_info *info, *curr;
1028 int num, ret = -ENOSPC;
1029
1030 if (domain->domain.type == IOMMU_DOMAIN_SVA)
1031 return 0;
1032
1033 info = kzalloc_obj(*info);
1034 if (!info)
1035 return -ENOMEM;
1036
1037 guard(mutex)(&iommu->did_lock);
1038 curr = xa_load(&domain->iommu_array, iommu->seq_id);
1039 if (curr) {
1040 curr->refcnt++;
1041 kfree(info);
1042 return 0;
1043 }
1044
1045 num = ida_alloc_range(&iommu->domain_ida, IDA_START_DID,
1046 cap_ndoms(iommu->cap) - 1, GFP_KERNEL);
1047 if (num < 0) {
1048 pr_err("%s: No free domain ids\n", iommu->name);
1049 goto err_unlock;
1050 }
1051
1052 info->refcnt = 1;
1053 info->did = num;
1054 info->iommu = iommu;
1055 curr = xa_cmpxchg(&domain->iommu_array, iommu->seq_id,
1056 NULL, info, GFP_KERNEL);
1057 if (curr) {
1058 ret = xa_err(curr) ? : -EBUSY;
1059 goto err_clear;
1060 }
1061
1062 return 0;
1063
1064 err_clear:
1065 ida_free(&iommu->domain_ida, info->did);
1066 err_unlock:
1067 kfree(info);
1068 return ret;
1069 }
1070
domain_detach_iommu(struct dmar_domain * domain,struct intel_iommu * iommu)1071 void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
1072 {
1073 struct iommu_domain_info *info;
1074
1075 if (domain->domain.type == IOMMU_DOMAIN_SVA)
1076 return;
1077
1078 guard(mutex)(&iommu->did_lock);
1079 info = xa_load(&domain->iommu_array, iommu->seq_id);
1080 if (--info->refcnt == 0) {
1081 ida_free(&iommu->domain_ida, info->did);
1082 xa_erase(&domain->iommu_array, iommu->seq_id);
1083 kfree(info);
1084 }
1085 }
1086
1087 /*
1088 * For kdump cases, old valid entries may be cached due to the
1089 * in-flight DMA and copied pgtable, but there is no unmapping
1090 * behaviour for them, thus we need an explicit cache flush for
1091 * the newly-mapped device. For kdump, at this point, the device
1092 * is supposed to finish reset at its driver probe stage, so no
1093 * in-flight DMA will exist, and we don't need to worry anymore
1094 * hereafter.
1095 */
copied_context_tear_down(struct intel_iommu * iommu,struct context_entry * context,u8 bus,u8 devfn)1096 static void copied_context_tear_down(struct intel_iommu *iommu,
1097 struct context_entry *context,
1098 u8 bus, u8 devfn)
1099 {
1100 u16 did_old;
1101
1102 if (!context_copied(iommu, bus, devfn))
1103 return;
1104
1105 assert_spin_locked(&iommu->lock);
1106
1107 did_old = context_domain_id(context);
1108 context_clear_entry(context);
1109
1110 if (did_old < cap_ndoms(iommu->cap)) {
1111 iommu->flush.flush_context(iommu, did_old,
1112 PCI_DEVID(bus, devfn),
1113 DMA_CCMD_MASK_NOBIT,
1114 DMA_CCMD_DEVICE_INVL);
1115 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
1116 DMA_TLB_DSI_FLUSH);
1117 }
1118
1119 clear_context_copied(iommu, bus, devfn);
1120 }
1121
1122 /*
1123 * It's a non-present to present mapping. If hardware doesn't cache
1124 * non-present entry we only need to flush the write-buffer. If the
1125 * _does_ cache non-present entries, then it does so in the special
1126 * domain #0, which we have to flush:
1127 */
context_present_cache_flush(struct intel_iommu * iommu,u16 did,u8 bus,u8 devfn)1128 static void context_present_cache_flush(struct intel_iommu *iommu, u16 did,
1129 u8 bus, u8 devfn)
1130 {
1131 if (cap_caching_mode(iommu->cap)) {
1132 iommu->flush.flush_context(iommu, 0,
1133 PCI_DEVID(bus, devfn),
1134 DMA_CCMD_MASK_NOBIT,
1135 DMA_CCMD_DEVICE_INVL);
1136 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
1137 } else {
1138 iommu_flush_write_buffer(iommu);
1139 }
1140 }
1141
domain_context_mapping_one(struct dmar_domain * domain,struct intel_iommu * iommu,u8 bus,u8 devfn)1142 static int domain_context_mapping_one(struct dmar_domain *domain,
1143 struct intel_iommu *iommu,
1144 u8 bus, u8 devfn)
1145 {
1146 struct device_domain_info *info =
1147 domain_lookup_dev_info(domain, iommu, bus, devfn);
1148 u16 did = domain_id_iommu(domain, iommu);
1149 int translation = CONTEXT_TT_MULTI_LEVEL;
1150 struct pt_iommu_vtdss_hw_info pt_info;
1151 struct context_entry *context;
1152 int ret;
1153
1154 if (WARN_ON(!intel_domain_is_ss_paging(domain)))
1155 return -EINVAL;
1156
1157 pt_iommu_vtdss_hw_info(&domain->sspt, &pt_info);
1158
1159 pr_debug("Set context mapping for %02x:%02x.%d\n",
1160 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1161
1162 spin_lock(&iommu->lock);
1163 ret = -ENOMEM;
1164 context = iommu_context_addr(iommu, bus, devfn, 1);
1165 if (!context)
1166 goto out_unlock;
1167
1168 ret = 0;
1169 if (context_present(context) && !context_copied(iommu, bus, devfn))
1170 goto out_unlock;
1171
1172 copied_context_tear_down(iommu, context, bus, devfn);
1173 context_clear_entry(context);
1174 context_set_domain_id(context, did);
1175
1176 if (info && info->ats_supported)
1177 translation = CONTEXT_TT_DEV_IOTLB;
1178 else
1179 translation = CONTEXT_TT_MULTI_LEVEL;
1180
1181 context_set_address_root(context, pt_info.ssptptr);
1182 context_set_address_width(context, pt_info.aw);
1183 context_set_translation_type(context, translation);
1184 context_set_fault_enable(context);
1185 context_set_present(context);
1186 if (!ecap_coherent(iommu->ecap))
1187 clflush_cache_range(context, sizeof(*context));
1188 context_present_cache_flush(iommu, did, bus, devfn);
1189 ret = 0;
1190
1191 out_unlock:
1192 spin_unlock(&iommu->lock);
1193
1194 return ret;
1195 }
1196
domain_context_mapping_cb(struct pci_dev * pdev,u16 alias,void * opaque)1197 static int domain_context_mapping_cb(struct pci_dev *pdev,
1198 u16 alias, void *opaque)
1199 {
1200 struct device_domain_info *info = dev_iommu_priv_get(&pdev->dev);
1201 struct intel_iommu *iommu = info->iommu;
1202 struct dmar_domain *domain = opaque;
1203
1204 return domain_context_mapping_one(domain, iommu,
1205 PCI_BUS_NUM(alias), alias & 0xff);
1206 }
1207
1208 static int
domain_context_mapping(struct dmar_domain * domain,struct device * dev)1209 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
1210 {
1211 struct device_domain_info *info = dev_iommu_priv_get(dev);
1212 struct intel_iommu *iommu = info->iommu;
1213 u8 bus = info->bus, devfn = info->devfn;
1214 int ret;
1215
1216 if (!dev_is_pci(dev))
1217 return domain_context_mapping_one(domain, iommu, bus, devfn);
1218
1219 ret = pci_for_each_dma_alias(to_pci_dev(dev),
1220 domain_context_mapping_cb, domain);
1221 if (ret)
1222 return ret;
1223
1224 iommu_enable_pci_ats(info);
1225
1226 return 0;
1227 }
1228
domain_context_clear_one(struct device_domain_info * info,u8 bus,u8 devfn)1229 static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 devfn)
1230 {
1231 struct intel_iommu *iommu = info->iommu;
1232 struct context_entry *context;
1233 u16 did;
1234
1235 spin_lock(&iommu->lock);
1236 context = iommu_context_addr(iommu, bus, devfn, 0);
1237 if (!context) {
1238 spin_unlock(&iommu->lock);
1239 return;
1240 }
1241
1242 did = context_domain_id(context);
1243 context_clear_present(context);
1244 __iommu_flush_cache(iommu, context, sizeof(*context));
1245 spin_unlock(&iommu->lock);
1246 intel_context_flush_no_pasid(info, context, did);
1247 context_clear_entry(context);
1248 __iommu_flush_cache(iommu, context, sizeof(*context));
1249 }
1250
__domain_setup_first_level(struct intel_iommu * iommu,struct device * dev,ioasid_t pasid,u16 did,phys_addr_t fsptptr,int flags,struct iommu_domain * old)1251 int __domain_setup_first_level(struct intel_iommu *iommu, struct device *dev,
1252 ioasid_t pasid, u16 did, phys_addr_t fsptptr,
1253 int flags, struct iommu_domain *old)
1254 {
1255 if (old)
1256 intel_pasid_tear_down_entry(iommu, dev, pasid, false);
1257
1258 return intel_pasid_setup_first_level(iommu, dev, fsptptr, pasid, did, flags);
1259 }
1260
domain_setup_second_level(struct intel_iommu * iommu,struct dmar_domain * domain,struct device * dev,ioasid_t pasid,struct iommu_domain * old)1261 static int domain_setup_second_level(struct intel_iommu *iommu,
1262 struct dmar_domain *domain,
1263 struct device *dev, ioasid_t pasid,
1264 struct iommu_domain *old)
1265 {
1266 if (old)
1267 intel_pasid_tear_down_entry(iommu, dev, pasid, false);
1268
1269 return intel_pasid_setup_second_level(iommu, domain, dev, pasid);
1270 }
1271
domain_setup_passthrough(struct intel_iommu * iommu,struct device * dev,ioasid_t pasid,struct iommu_domain * old)1272 static int domain_setup_passthrough(struct intel_iommu *iommu,
1273 struct device *dev, ioasid_t pasid,
1274 struct iommu_domain *old)
1275 {
1276 if (old)
1277 intel_pasid_tear_down_entry(iommu, dev, pasid, false);
1278
1279 return intel_pasid_setup_pass_through(iommu, dev, pasid);
1280 }
1281
domain_setup_first_level(struct intel_iommu * iommu,struct dmar_domain * domain,struct device * dev,u32 pasid,struct iommu_domain * old)1282 static int domain_setup_first_level(struct intel_iommu *iommu,
1283 struct dmar_domain *domain,
1284 struct device *dev,
1285 u32 pasid, struct iommu_domain *old)
1286 {
1287 struct pt_iommu_x86_64_hw_info pt_info;
1288 unsigned int flags = 0;
1289
1290 pt_iommu_x86_64_hw_info(&domain->fspt, &pt_info);
1291 if (WARN_ON(pt_info.levels != 4 && pt_info.levels != 5))
1292 return -EINVAL;
1293
1294 if (pt_info.levels == 5)
1295 flags |= PASID_FLAG_FL5LP;
1296
1297 if (domain->force_snooping)
1298 flags |= PASID_FLAG_PAGE_SNOOP;
1299
1300 if (!(domain->fspt.x86_64_pt.common.features &
1301 BIT(PT_FEAT_DMA_INCOHERENT)))
1302 flags |= PASID_FLAG_PWSNP;
1303
1304 return __domain_setup_first_level(iommu, dev, pasid,
1305 domain_id_iommu(domain, iommu),
1306 pt_info.gcr3_pt, flags, old);
1307 }
1308
dmar_domain_attach_device(struct dmar_domain * domain,struct device * dev)1309 static int dmar_domain_attach_device(struct dmar_domain *domain,
1310 struct device *dev)
1311 {
1312 struct device_domain_info *info = dev_iommu_priv_get(dev);
1313 struct intel_iommu *iommu = info->iommu;
1314 unsigned long flags;
1315 int ret;
1316
1317 ret = domain_attach_iommu(domain, iommu);
1318 if (ret)
1319 return ret;
1320
1321 info->domain = domain;
1322 info->domain_attached = true;
1323 spin_lock_irqsave(&domain->lock, flags);
1324 list_add(&info->link, &domain->devices);
1325 spin_unlock_irqrestore(&domain->lock, flags);
1326
1327 if (dev_is_real_dma_subdevice(dev))
1328 return 0;
1329
1330 if (!sm_supported(iommu))
1331 ret = domain_context_mapping(domain, dev);
1332 else if (intel_domain_is_fs_paging(domain))
1333 ret = domain_setup_first_level(iommu, domain, dev,
1334 IOMMU_NO_PASID, NULL);
1335 else if (intel_domain_is_ss_paging(domain))
1336 ret = domain_setup_second_level(iommu, domain, dev,
1337 IOMMU_NO_PASID, NULL);
1338 else if (WARN_ON(true))
1339 ret = -EINVAL;
1340
1341 if (ret)
1342 goto out_block_translation;
1343
1344 ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID);
1345 if (ret)
1346 goto out_block_translation;
1347
1348 return 0;
1349
1350 out_block_translation:
1351 device_block_translation(dev);
1352 return ret;
1353 }
1354
1355 /**
1356 * device_rmrr_is_relaxable - Test whether the RMRR of this device
1357 * is relaxable (ie. is allowed to be not enforced under some conditions)
1358 * @dev: device handle
1359 *
1360 * We assume that PCI USB devices with RMRRs have them largely
1361 * for historical reasons and that the RMRR space is not actively used post
1362 * boot. This exclusion may change if vendors begin to abuse it.
1363 *
1364 * The same exception is made for graphics devices, with the requirement that
1365 * any use of the RMRR regions will be torn down before assigning the device
1366 * to a guest.
1367 *
1368 * Return: true if the RMRR is relaxable, false otherwise
1369 */
device_rmrr_is_relaxable(struct device * dev)1370 static bool device_rmrr_is_relaxable(struct device *dev)
1371 {
1372 struct pci_dev *pdev;
1373
1374 if (!dev_is_pci(dev))
1375 return false;
1376
1377 pdev = to_pci_dev(dev);
1378 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
1379 return true;
1380 else
1381 return false;
1382 }
1383
device_def_domain_type(struct device * dev)1384 static int device_def_domain_type(struct device *dev)
1385 {
1386 struct device_domain_info *info = dev_iommu_priv_get(dev);
1387 struct intel_iommu *iommu = info->iommu;
1388
1389 /*
1390 * Hardware does not support the passthrough translation mode.
1391 * Always use a dynamaic mapping domain.
1392 */
1393 if (!ecap_pass_through(iommu->ecap))
1394 return IOMMU_DOMAIN_DMA;
1395
1396 if (dev_is_pci(dev)) {
1397 struct pci_dev *pdev = to_pci_dev(dev);
1398
1399 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
1400 return IOMMU_DOMAIN_IDENTITY;
1401 }
1402
1403 return 0;
1404 }
1405
intel_iommu_init_qi(struct intel_iommu * iommu)1406 static void intel_iommu_init_qi(struct intel_iommu *iommu)
1407 {
1408 /*
1409 * Start from the sane iommu hardware state.
1410 * If the queued invalidation is already initialized by us
1411 * (for example, while enabling interrupt-remapping) then
1412 * we got the things already rolling from a sane state.
1413 */
1414 if (!iommu->qi) {
1415 /*
1416 * Clear any previous faults.
1417 */
1418 dmar_fault(-1, iommu);
1419 /*
1420 * Disable queued invalidation if supported and already enabled
1421 * before OS handover.
1422 */
1423 dmar_disable_qi(iommu);
1424 }
1425
1426 if (dmar_enable_qi(iommu)) {
1427 /*
1428 * Queued Invalidate not enabled, use Register Based Invalidate
1429 */
1430 iommu->flush.flush_context = __iommu_flush_context;
1431 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
1432 pr_info("%s: Using Register based invalidation\n",
1433 iommu->name);
1434 } else {
1435 iommu->flush.flush_context = qi_flush_context;
1436 iommu->flush.flush_iotlb = qi_flush_iotlb;
1437 pr_info("%s: Using Queued invalidation\n", iommu->name);
1438 }
1439 }
1440
copy_context_table(struct intel_iommu * iommu,struct root_entry * old_re,struct context_entry ** tbl,int bus,bool ext)1441 static int copy_context_table(struct intel_iommu *iommu,
1442 struct root_entry *old_re,
1443 struct context_entry **tbl,
1444 int bus, bool ext)
1445 {
1446 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
1447 struct context_entry *new_ce = NULL, ce;
1448 struct context_entry *old_ce = NULL;
1449 struct root_entry re;
1450 phys_addr_t old_ce_phys;
1451
1452 tbl_idx = ext ? bus * 2 : bus;
1453 memcpy(&re, old_re, sizeof(re));
1454
1455 for (devfn = 0; devfn < 256; devfn++) {
1456 /* First calculate the correct index */
1457 idx = (ext ? devfn * 2 : devfn) % 256;
1458
1459 if (idx == 0) {
1460 /* First save what we may have and clean up */
1461 if (new_ce) {
1462 tbl[tbl_idx] = new_ce;
1463 __iommu_flush_cache(iommu, new_ce,
1464 VTD_PAGE_SIZE);
1465 pos = 1;
1466 }
1467
1468 if (old_ce)
1469 memunmap(old_ce);
1470
1471 ret = 0;
1472 if (devfn < 0x80)
1473 old_ce_phys = root_entry_lctp(&re);
1474 else
1475 old_ce_phys = root_entry_uctp(&re);
1476
1477 if (!old_ce_phys) {
1478 if (ext && devfn == 0) {
1479 /* No LCTP, try UCTP */
1480 devfn = 0x7f;
1481 continue;
1482 } else {
1483 goto out;
1484 }
1485 }
1486
1487 ret = -ENOMEM;
1488 old_ce = memremap(old_ce_phys, PAGE_SIZE,
1489 MEMREMAP_WB);
1490 if (!old_ce)
1491 goto out;
1492
1493 new_ce = iommu_alloc_pages_node_sz(iommu->node,
1494 GFP_KERNEL, SZ_4K);
1495 if (!new_ce)
1496 goto out_unmap;
1497
1498 ret = 0;
1499 }
1500
1501 /* Now copy the context entry */
1502 memcpy(&ce, old_ce + idx, sizeof(ce));
1503
1504 if (!context_present(&ce))
1505 continue;
1506
1507 did = context_domain_id(&ce);
1508 if (did >= 0 && did < cap_ndoms(iommu->cap))
1509 ida_alloc_range(&iommu->domain_ida, did, did, GFP_KERNEL);
1510
1511 set_context_copied(iommu, bus, devfn);
1512 new_ce[idx] = ce;
1513 }
1514
1515 tbl[tbl_idx + pos] = new_ce;
1516
1517 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
1518
1519 out_unmap:
1520 memunmap(old_ce);
1521
1522 out:
1523 return ret;
1524 }
1525
copy_translation_tables(struct intel_iommu * iommu)1526 static int copy_translation_tables(struct intel_iommu *iommu)
1527 {
1528 struct context_entry **ctxt_tbls;
1529 struct root_entry *old_rt;
1530 phys_addr_t old_rt_phys;
1531 int ctxt_table_entries;
1532 u64 rtaddr_reg;
1533 int bus, ret;
1534 bool new_ext, ext;
1535
1536 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
1537 ext = !!(rtaddr_reg & DMA_RTADDR_SMT);
1538 new_ext = !!sm_supported(iommu);
1539
1540 /*
1541 * The RTT bit can only be changed when translation is disabled,
1542 * but disabling translation means to open a window for data
1543 * corruption. So bail out and don't copy anything if we would
1544 * have to change the bit.
1545 */
1546 if (new_ext != ext)
1547 return -EINVAL;
1548
1549 iommu->copied_tables = bitmap_zalloc(BIT_ULL(16), GFP_KERNEL);
1550 if (!iommu->copied_tables)
1551 return -ENOMEM;
1552
1553 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
1554 if (!old_rt_phys)
1555 return -EINVAL;
1556
1557 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
1558 if (!old_rt)
1559 return -ENOMEM;
1560
1561 /* This is too big for the stack - allocate it from slab */
1562 ctxt_table_entries = ext ? 512 : 256;
1563 ret = -ENOMEM;
1564 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
1565 if (!ctxt_tbls)
1566 goto out_unmap;
1567
1568 for (bus = 0; bus < 256; bus++) {
1569 ret = copy_context_table(iommu, &old_rt[bus],
1570 ctxt_tbls, bus, ext);
1571 if (ret) {
1572 pr_err("%s: Failed to copy context table for bus %d\n",
1573 iommu->name, bus);
1574 continue;
1575 }
1576 }
1577
1578 spin_lock(&iommu->lock);
1579
1580 /* Context tables are copied, now write them to the root_entry table */
1581 for (bus = 0; bus < 256; bus++) {
1582 int idx = ext ? bus * 2 : bus;
1583 u64 val;
1584
1585 if (ctxt_tbls[idx]) {
1586 val = virt_to_phys(ctxt_tbls[idx]) | 1;
1587 iommu->root_entry[bus].lo = val;
1588 }
1589
1590 if (!ext || !ctxt_tbls[idx + 1])
1591 continue;
1592
1593 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
1594 iommu->root_entry[bus].hi = val;
1595 }
1596
1597 spin_unlock(&iommu->lock);
1598
1599 kfree(ctxt_tbls);
1600
1601 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
1602
1603 ret = 0;
1604
1605 out_unmap:
1606 memunmap(old_rt);
1607
1608 return ret;
1609 }
1610
init_dmars(void)1611 static int __init init_dmars(void)
1612 {
1613 struct dmar_drhd_unit *drhd;
1614 struct intel_iommu *iommu;
1615 int ret;
1616
1617 for_each_iommu(iommu, drhd) {
1618 if (drhd->ignored) {
1619 iommu_disable_translation(iommu);
1620 continue;
1621 }
1622
1623 /*
1624 * Find the max pasid size of all IOMMU's in the system.
1625 * We need to ensure the system pasid table is no bigger
1626 * than the smallest supported.
1627 */
1628 if (pasid_supported(iommu)) {
1629 u32 temp = 2 << ecap_pss(iommu->ecap);
1630
1631 intel_pasid_max_id = min_t(u32, temp,
1632 intel_pasid_max_id);
1633 }
1634
1635 intel_iommu_init_qi(iommu);
1636 init_translation_status(iommu);
1637
1638 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
1639 iommu_disable_translation(iommu);
1640 clear_translation_pre_enabled(iommu);
1641 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
1642 iommu->name);
1643 }
1644
1645 /*
1646 * TBD:
1647 * we could share the same root & context tables
1648 * among all IOMMU's. Need to Split it later.
1649 */
1650 ret = iommu_alloc_root_entry(iommu);
1651 if (ret)
1652 goto free_iommu;
1653
1654 if (translation_pre_enabled(iommu)) {
1655 pr_info("Translation already enabled - trying to copy translation structures\n");
1656
1657 ret = copy_translation_tables(iommu);
1658 if (ret) {
1659 /*
1660 * We found the IOMMU with translation
1661 * enabled - but failed to copy over the
1662 * old root-entry table. Try to proceed
1663 * by disabling translation now and
1664 * allocating a clean root-entry table.
1665 * This might cause DMAR faults, but
1666 * probably the dump will still succeed.
1667 */
1668 pr_err("Failed to copy translation tables from previous kernel for %s\n",
1669 iommu->name);
1670 iommu_disable_translation(iommu);
1671 clear_translation_pre_enabled(iommu);
1672 } else {
1673 pr_info("Copied translation tables from previous kernel for %s\n",
1674 iommu->name);
1675 }
1676 }
1677
1678 intel_svm_check(iommu);
1679 }
1680
1681 /*
1682 * Now that qi is enabled on all iommus, set the root entry and flush
1683 * caches. This is required on some Intel X58 chipsets, otherwise the
1684 * flush_context function will loop forever and the boot hangs.
1685 */
1686 for_each_active_iommu(iommu, drhd) {
1687 iommu_flush_write_buffer(iommu);
1688 iommu_set_root_entry(iommu);
1689 }
1690
1691 check_tylersburg_isoch();
1692
1693 /*
1694 * for each drhd
1695 * enable fault log
1696 * global invalidate context cache
1697 * global invalidate iotlb
1698 * enable translation
1699 */
1700 for_each_iommu(iommu, drhd) {
1701 if (drhd->ignored) {
1702 /*
1703 * we always have to disable PMRs or DMA may fail on
1704 * this device
1705 */
1706 if (force_on)
1707 iommu_disable_protect_mem_regions(iommu);
1708 continue;
1709 }
1710
1711 iommu_flush_write_buffer(iommu);
1712
1713 if (ecap_prs(iommu->ecap)) {
1714 /*
1715 * Call dmar_alloc_hwirq() with dmar_global_lock held,
1716 * could cause possible lock race condition.
1717 */
1718 up_write(&dmar_global_lock);
1719 ret = intel_iommu_enable_prq(iommu);
1720 down_write(&dmar_global_lock);
1721 if (ret)
1722 goto free_iommu;
1723 }
1724
1725 ret = dmar_set_interrupt(iommu);
1726 if (ret)
1727 goto free_iommu;
1728 }
1729
1730 return 0;
1731
1732 free_iommu:
1733 for_each_active_iommu(iommu, drhd) {
1734 disable_dmar_iommu(iommu);
1735 free_dmar_iommu(iommu);
1736 }
1737
1738 return ret;
1739 }
1740
init_no_remapping_devices(void)1741 static void __init init_no_remapping_devices(void)
1742 {
1743 struct dmar_drhd_unit *drhd;
1744 struct device *dev;
1745 int i;
1746
1747 for_each_drhd_unit(drhd) {
1748 if (!drhd->include_all) {
1749 for_each_active_dev_scope(drhd->devices,
1750 drhd->devices_cnt, i, dev)
1751 break;
1752 /* ignore DMAR unit if no devices exist */
1753 if (i == drhd->devices_cnt)
1754 drhd->ignored = 1;
1755 }
1756 }
1757
1758 for_each_active_drhd_unit(drhd) {
1759 if (drhd->include_all)
1760 continue;
1761
1762 for_each_active_dev_scope(drhd->devices,
1763 drhd->devices_cnt, i, dev)
1764 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
1765 break;
1766 if (i < drhd->devices_cnt)
1767 continue;
1768
1769 /* This IOMMU has *only* gfx devices. Either bypass it or
1770 set the gfx_mapped flag, as appropriate */
1771 drhd->gfx_dedicated = 1;
1772 if (disable_igfx_iommu)
1773 drhd->ignored = 1;
1774 }
1775 }
1776
1777 #ifdef CONFIG_SUSPEND
init_iommu_hw(void)1778 static int init_iommu_hw(void)
1779 {
1780 struct dmar_drhd_unit *drhd;
1781 struct intel_iommu *iommu = NULL;
1782 int ret;
1783
1784 for_each_active_iommu(iommu, drhd) {
1785 if (iommu->qi) {
1786 ret = dmar_reenable_qi(iommu);
1787 if (ret)
1788 return ret;
1789 }
1790 }
1791
1792 for_each_iommu(iommu, drhd) {
1793 if (drhd->ignored) {
1794 /*
1795 * we always have to disable PMRs or DMA may fail on
1796 * this device
1797 */
1798 if (force_on)
1799 iommu_disable_protect_mem_regions(iommu);
1800 continue;
1801 }
1802
1803 iommu_flush_write_buffer(iommu);
1804 iommu_set_root_entry(iommu);
1805 iommu_enable_translation(iommu);
1806 iommu_disable_protect_mem_regions(iommu);
1807 }
1808
1809 return 0;
1810 }
1811
iommu_flush_all(void)1812 static void iommu_flush_all(void)
1813 {
1814 struct dmar_drhd_unit *drhd;
1815 struct intel_iommu *iommu;
1816
1817 for_each_active_iommu(iommu, drhd) {
1818 iommu->flush.flush_context(iommu, 0, 0, 0,
1819 DMA_CCMD_GLOBAL_INVL);
1820 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1821 DMA_TLB_GLOBAL_FLUSH);
1822 }
1823 }
1824
iommu_suspend(void * data)1825 static int iommu_suspend(void *data)
1826 {
1827 struct dmar_drhd_unit *drhd;
1828 struct intel_iommu *iommu = NULL;
1829 unsigned long flag;
1830
1831 iommu_flush_all();
1832
1833 for_each_active_iommu(iommu, drhd) {
1834 iommu_disable_translation(iommu);
1835
1836 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1837
1838 iommu->iommu_state[SR_DMAR_FECTL_REG] =
1839 readl(iommu->reg + DMAR_FECTL_REG);
1840 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
1841 readl(iommu->reg + DMAR_FEDATA_REG);
1842 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
1843 readl(iommu->reg + DMAR_FEADDR_REG);
1844 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
1845 readl(iommu->reg + DMAR_FEUADDR_REG);
1846
1847 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1848 }
1849 return 0;
1850 }
1851
iommu_resume(void * data)1852 static void iommu_resume(void *data)
1853 {
1854 struct dmar_drhd_unit *drhd;
1855 struct intel_iommu *iommu = NULL;
1856 unsigned long flag;
1857
1858 if (init_iommu_hw()) {
1859 if (force_on)
1860 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
1861 else
1862 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
1863 return;
1864 }
1865
1866 for_each_active_iommu(iommu, drhd) {
1867
1868 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1869
1870 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
1871 iommu->reg + DMAR_FECTL_REG);
1872 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
1873 iommu->reg + DMAR_FEDATA_REG);
1874 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
1875 iommu->reg + DMAR_FEADDR_REG);
1876 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
1877 iommu->reg + DMAR_FEUADDR_REG);
1878
1879 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1880 }
1881 }
1882
1883 static const struct syscore_ops iommu_syscore_ops = {
1884 .resume = iommu_resume,
1885 .suspend = iommu_suspend,
1886 };
1887
1888 static struct syscore iommu_syscore = {
1889 .ops = &iommu_syscore_ops,
1890 };
1891
init_iommu_pm_ops(void)1892 static void __init init_iommu_pm_ops(void)
1893 {
1894 register_syscore(&iommu_syscore);
1895 }
1896
1897 #else
init_iommu_pm_ops(void)1898 static inline void init_iommu_pm_ops(void) {}
1899 #endif /* CONFIG_PM */
1900
rmrr_sanity_check(struct acpi_dmar_reserved_memory * rmrr)1901 static int __init rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
1902 {
1903 if (!IS_ALIGNED(rmrr->base_address, PAGE_SIZE) ||
1904 !IS_ALIGNED(rmrr->end_address + 1, PAGE_SIZE) ||
1905 rmrr->end_address <= rmrr->base_address ||
1906 arch_rmrr_sanity_check(rmrr))
1907 return -EINVAL;
1908
1909 return 0;
1910 }
1911
dmar_parse_one_rmrr(struct acpi_dmar_header * header,void * arg)1912 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
1913 {
1914 struct acpi_dmar_reserved_memory *rmrr;
1915 struct dmar_rmrr_unit *rmrru;
1916
1917 rmrr = (struct acpi_dmar_reserved_memory *)header;
1918 if (rmrr_sanity_check(rmrr)) {
1919 pr_warn(FW_BUG
1920 "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n"
1921 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
1922 rmrr->base_address, rmrr->end_address,
1923 dmi_get_system_info(DMI_BIOS_VENDOR),
1924 dmi_get_system_info(DMI_BIOS_VERSION),
1925 dmi_get_system_info(DMI_PRODUCT_VERSION));
1926 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
1927 }
1928
1929 rmrru = kzalloc_obj(*rmrru);
1930 if (!rmrru)
1931 goto out;
1932
1933 rmrru->hdr = header;
1934
1935 rmrru->base_address = rmrr->base_address;
1936 rmrru->end_address = rmrr->end_address;
1937
1938 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
1939 ((void *)rmrr) + rmrr->header.length,
1940 &rmrru->devices_cnt);
1941 if (rmrru->devices_cnt && rmrru->devices == NULL)
1942 goto free_rmrru;
1943
1944 list_add(&rmrru->list, &dmar_rmrr_units);
1945
1946 return 0;
1947 free_rmrru:
1948 kfree(rmrru);
1949 out:
1950 return -ENOMEM;
1951 }
1952
dmar_find_atsr(struct acpi_dmar_atsr * atsr)1953 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
1954 {
1955 struct dmar_atsr_unit *atsru;
1956 struct acpi_dmar_atsr *tmp;
1957
1958 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list,
1959 dmar_rcu_check()) {
1960 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
1961 if (atsr->segment != tmp->segment)
1962 continue;
1963 if (atsr->header.length != tmp->header.length)
1964 continue;
1965 if (memcmp(atsr, tmp, atsr->header.length) == 0)
1966 return atsru;
1967 }
1968
1969 return NULL;
1970 }
1971
dmar_parse_one_atsr(struct acpi_dmar_header * hdr,void * arg)1972 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
1973 {
1974 struct acpi_dmar_atsr *atsr;
1975 struct dmar_atsr_unit *atsru;
1976
1977 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
1978 return 0;
1979
1980 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
1981 atsru = dmar_find_atsr(atsr);
1982 if (atsru)
1983 return 0;
1984
1985 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
1986 if (!atsru)
1987 return -ENOMEM;
1988
1989 /*
1990 * If memory is allocated from slab by ACPI _DSM method, we need to
1991 * copy the memory content because the memory buffer will be freed
1992 * on return.
1993 */
1994 atsru->hdr = (void *)(atsru + 1);
1995 memcpy(atsru->hdr, hdr, hdr->length);
1996 atsru->include_all = atsr->flags & 0x1;
1997 if (!atsru->include_all) {
1998 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
1999 (void *)atsr + atsr->header.length,
2000 &atsru->devices_cnt);
2001 if (atsru->devices_cnt && atsru->devices == NULL) {
2002 kfree(atsru);
2003 return -ENOMEM;
2004 }
2005 }
2006
2007 list_add_rcu(&atsru->list, &dmar_atsr_units);
2008
2009 return 0;
2010 }
2011
intel_iommu_free_atsr(struct dmar_atsr_unit * atsru)2012 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
2013 {
2014 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
2015 kfree(atsru);
2016 }
2017
dmar_release_one_atsr(struct acpi_dmar_header * hdr,void * arg)2018 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
2019 {
2020 struct acpi_dmar_atsr *atsr;
2021 struct dmar_atsr_unit *atsru;
2022
2023 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
2024 atsru = dmar_find_atsr(atsr);
2025 if (atsru) {
2026 list_del_rcu(&atsru->list);
2027 synchronize_rcu();
2028 intel_iommu_free_atsr(atsru);
2029 }
2030
2031 return 0;
2032 }
2033
dmar_check_one_atsr(struct acpi_dmar_header * hdr,void * arg)2034 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
2035 {
2036 int i;
2037 struct device *dev;
2038 struct acpi_dmar_atsr *atsr;
2039 struct dmar_atsr_unit *atsru;
2040
2041 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
2042 atsru = dmar_find_atsr(atsr);
2043 if (!atsru)
2044 return 0;
2045
2046 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
2047 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
2048 i, dev)
2049 return -EBUSY;
2050 }
2051
2052 return 0;
2053 }
2054
dmar_find_satc(struct acpi_dmar_satc * satc)2055 static struct dmar_satc_unit *dmar_find_satc(struct acpi_dmar_satc *satc)
2056 {
2057 struct dmar_satc_unit *satcu;
2058 struct acpi_dmar_satc *tmp;
2059
2060 list_for_each_entry_rcu(satcu, &dmar_satc_units, list,
2061 dmar_rcu_check()) {
2062 tmp = (struct acpi_dmar_satc *)satcu->hdr;
2063 if (satc->segment != tmp->segment)
2064 continue;
2065 if (satc->header.length != tmp->header.length)
2066 continue;
2067 if (memcmp(satc, tmp, satc->header.length) == 0)
2068 return satcu;
2069 }
2070
2071 return NULL;
2072 }
2073
dmar_parse_one_satc(struct acpi_dmar_header * hdr,void * arg)2074 int dmar_parse_one_satc(struct acpi_dmar_header *hdr, void *arg)
2075 {
2076 struct acpi_dmar_satc *satc;
2077 struct dmar_satc_unit *satcu;
2078
2079 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
2080 return 0;
2081
2082 satc = container_of(hdr, struct acpi_dmar_satc, header);
2083 satcu = dmar_find_satc(satc);
2084 if (satcu)
2085 return 0;
2086
2087 satcu = kzalloc(sizeof(*satcu) + hdr->length, GFP_KERNEL);
2088 if (!satcu)
2089 return -ENOMEM;
2090
2091 satcu->hdr = (void *)(satcu + 1);
2092 memcpy(satcu->hdr, hdr, hdr->length);
2093 satcu->atc_required = satc->flags & 0x1;
2094 satcu->devices = dmar_alloc_dev_scope((void *)(satc + 1),
2095 (void *)satc + satc->header.length,
2096 &satcu->devices_cnt);
2097 if (satcu->devices_cnt && !satcu->devices) {
2098 kfree(satcu);
2099 return -ENOMEM;
2100 }
2101 list_add_rcu(&satcu->list, &dmar_satc_units);
2102
2103 return 0;
2104 }
2105
intel_iommu_add(struct dmar_drhd_unit * dmaru)2106 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
2107 {
2108 struct intel_iommu *iommu = dmaru->iommu;
2109 int ret;
2110
2111 /*
2112 * Disable translation if already enabled prior to OS handover.
2113 */
2114 if (iommu->gcmd & DMA_GCMD_TE)
2115 iommu_disable_translation(iommu);
2116
2117 ret = iommu_alloc_root_entry(iommu);
2118 if (ret)
2119 goto out;
2120
2121 intel_svm_check(iommu);
2122
2123 if (dmaru->ignored) {
2124 /*
2125 * we always have to disable PMRs or DMA may fail on this device
2126 */
2127 if (force_on)
2128 iommu_disable_protect_mem_regions(iommu);
2129 return 0;
2130 }
2131
2132 intel_iommu_init_qi(iommu);
2133 iommu_flush_write_buffer(iommu);
2134
2135 if (ecap_prs(iommu->ecap)) {
2136 ret = intel_iommu_enable_prq(iommu);
2137 if (ret)
2138 goto disable_iommu;
2139 }
2140
2141 ret = dmar_set_interrupt(iommu);
2142 if (ret)
2143 goto disable_iommu;
2144
2145 iommu_set_root_entry(iommu);
2146 iommu_enable_translation(iommu);
2147
2148 iommu_disable_protect_mem_regions(iommu);
2149 return 0;
2150
2151 disable_iommu:
2152 disable_dmar_iommu(iommu);
2153 out:
2154 free_dmar_iommu(iommu);
2155 return ret;
2156 }
2157
dmar_iommu_hotplug(struct dmar_drhd_unit * dmaru,bool insert)2158 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
2159 {
2160 int ret = 0;
2161 struct intel_iommu *iommu = dmaru->iommu;
2162
2163 if (!intel_iommu_enabled)
2164 return 0;
2165 if (iommu == NULL)
2166 return -EINVAL;
2167
2168 if (insert) {
2169 ret = intel_iommu_add(dmaru);
2170 } else {
2171 disable_dmar_iommu(iommu);
2172 free_dmar_iommu(iommu);
2173 }
2174
2175 return ret;
2176 }
2177
intel_iommu_free_dmars(void)2178 static void intel_iommu_free_dmars(void)
2179 {
2180 struct dmar_rmrr_unit *rmrru, *rmrr_n;
2181 struct dmar_atsr_unit *atsru, *atsr_n;
2182 struct dmar_satc_unit *satcu, *satc_n;
2183
2184 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
2185 list_del(&rmrru->list);
2186 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
2187 kfree(rmrru);
2188 }
2189
2190 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
2191 list_del(&atsru->list);
2192 intel_iommu_free_atsr(atsru);
2193 }
2194 list_for_each_entry_safe(satcu, satc_n, &dmar_satc_units, list) {
2195 list_del(&satcu->list);
2196 dmar_free_dev_scope(&satcu->devices, &satcu->devices_cnt);
2197 kfree(satcu);
2198 }
2199 }
2200
dmar_find_matched_satc_unit(struct pci_dev * dev)2201 static struct dmar_satc_unit *dmar_find_matched_satc_unit(struct pci_dev *dev)
2202 {
2203 struct dmar_satc_unit *satcu;
2204 struct acpi_dmar_satc *satc;
2205 struct device *tmp;
2206 int i;
2207
2208 rcu_read_lock();
2209
2210 list_for_each_entry_rcu(satcu, &dmar_satc_units, list) {
2211 satc = container_of(satcu->hdr, struct acpi_dmar_satc, header);
2212 if (satc->segment != pci_domain_nr(dev->bus))
2213 continue;
2214 for_each_dev_scope(satcu->devices, satcu->devices_cnt, i, tmp)
2215 if (to_pci_dev(tmp) == dev)
2216 goto out;
2217 }
2218 satcu = NULL;
2219 out:
2220 rcu_read_unlock();
2221 return satcu;
2222 }
2223
dmar_ats_supported(struct pci_dev * dev,struct intel_iommu * iommu)2224 static bool dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu)
2225 {
2226 struct pci_dev *bridge = NULL;
2227 struct dmar_atsr_unit *atsru;
2228 struct dmar_satc_unit *satcu;
2229 struct acpi_dmar_atsr *atsr;
2230 bool supported = true;
2231 struct pci_bus *bus;
2232 struct device *tmp;
2233 int i;
2234
2235 dev = pci_physfn(dev);
2236 satcu = dmar_find_matched_satc_unit(dev);
2237 if (satcu)
2238 /*
2239 * This device supports ATS as it is in SATC table.
2240 * When IOMMU is in legacy mode, enabling ATS is done
2241 * automatically by HW for the device that requires
2242 * ATS, hence OS should not enable this device ATS
2243 * to avoid duplicated TLB invalidation.
2244 */
2245 return !(satcu->atc_required && !sm_supported(iommu));
2246
2247 for (bus = dev->bus; bus; bus = bus->parent) {
2248 bridge = bus->self;
2249 /* If it's an integrated device, allow ATS */
2250 if (!bridge)
2251 return true;
2252 /* Connected via non-PCIe: no ATS */
2253 if (!pci_is_pcie(bridge) ||
2254 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
2255 return false;
2256 /* If we found the root port, look it up in the ATSR */
2257 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
2258 break;
2259 }
2260
2261 rcu_read_lock();
2262 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
2263 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
2264 if (atsr->segment != pci_domain_nr(dev->bus))
2265 continue;
2266
2267 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
2268 if (tmp == &bridge->dev)
2269 goto out;
2270
2271 if (atsru->include_all)
2272 goto out;
2273 }
2274 supported = false;
2275 out:
2276 rcu_read_unlock();
2277
2278 return supported;
2279 }
2280
dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info * info)2281 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
2282 {
2283 int ret;
2284 struct dmar_rmrr_unit *rmrru;
2285 struct dmar_atsr_unit *atsru;
2286 struct dmar_satc_unit *satcu;
2287 struct acpi_dmar_atsr *atsr;
2288 struct acpi_dmar_reserved_memory *rmrr;
2289 struct acpi_dmar_satc *satc;
2290
2291 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
2292 return 0;
2293
2294 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
2295 rmrr = container_of(rmrru->hdr,
2296 struct acpi_dmar_reserved_memory, header);
2297 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
2298 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
2299 ((void *)rmrr) + rmrr->header.length,
2300 rmrr->segment, rmrru->devices,
2301 rmrru->devices_cnt);
2302 if (ret < 0)
2303 return ret;
2304 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
2305 dmar_remove_dev_scope(info, rmrr->segment,
2306 rmrru->devices, rmrru->devices_cnt);
2307 }
2308 }
2309
2310 list_for_each_entry(atsru, &dmar_atsr_units, list) {
2311 if (atsru->include_all)
2312 continue;
2313
2314 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
2315 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
2316 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
2317 (void *)atsr + atsr->header.length,
2318 atsr->segment, atsru->devices,
2319 atsru->devices_cnt);
2320 if (ret > 0)
2321 break;
2322 else if (ret < 0)
2323 return ret;
2324 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
2325 if (dmar_remove_dev_scope(info, atsr->segment,
2326 atsru->devices, atsru->devices_cnt))
2327 break;
2328 }
2329 }
2330 list_for_each_entry(satcu, &dmar_satc_units, list) {
2331 satc = container_of(satcu->hdr, struct acpi_dmar_satc, header);
2332 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
2333 ret = dmar_insert_dev_scope(info, (void *)(satc + 1),
2334 (void *)satc + satc->header.length,
2335 satc->segment, satcu->devices,
2336 satcu->devices_cnt);
2337 if (ret > 0)
2338 break;
2339 else if (ret < 0)
2340 return ret;
2341 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
2342 if (dmar_remove_dev_scope(info, satc->segment,
2343 satcu->devices, satcu->devices_cnt))
2344 break;
2345 }
2346 }
2347
2348 return 0;
2349 }
2350
intel_disable_iommus(void)2351 static void intel_disable_iommus(void)
2352 {
2353 struct intel_iommu *iommu = NULL;
2354 struct dmar_drhd_unit *drhd;
2355
2356 for_each_iommu(iommu, drhd)
2357 iommu_disable_translation(iommu);
2358 }
2359
intel_iommu_shutdown(void)2360 void intel_iommu_shutdown(void)
2361 {
2362 struct dmar_drhd_unit *drhd;
2363 struct intel_iommu *iommu = NULL;
2364
2365 if (no_iommu || dmar_disabled)
2366 return;
2367
2368 /*
2369 * All other CPUs were brought down, hotplug interrupts were disabled,
2370 * no lock and RCU checking needed anymore
2371 */
2372 list_for_each_entry(drhd, &dmar_drhd_units, list) {
2373 iommu = drhd->iommu;
2374
2375 /* Disable PMRs explicitly here. */
2376 iommu_disable_protect_mem_regions(iommu);
2377
2378 /* Make sure the IOMMUs are switched off */
2379 iommu_disable_translation(iommu);
2380 }
2381 }
2382
dev_to_intel_iommu(struct device * dev)2383 static struct intel_iommu *dev_to_intel_iommu(struct device *dev)
2384 {
2385 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
2386
2387 return container_of(iommu_dev, struct intel_iommu, iommu);
2388 }
2389
version_show(struct device * dev,struct device_attribute * attr,char * buf)2390 static ssize_t version_show(struct device *dev,
2391 struct device_attribute *attr, char *buf)
2392 {
2393 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2394 u32 ver = readl(iommu->reg + DMAR_VER_REG);
2395 return sysfs_emit(buf, "%d:%d\n",
2396 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
2397 }
2398 static DEVICE_ATTR_RO(version);
2399
address_show(struct device * dev,struct device_attribute * attr,char * buf)2400 static ssize_t address_show(struct device *dev,
2401 struct device_attribute *attr, char *buf)
2402 {
2403 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2404 return sysfs_emit(buf, "%llx\n", iommu->reg_phys);
2405 }
2406 static DEVICE_ATTR_RO(address);
2407
cap_show(struct device * dev,struct device_attribute * attr,char * buf)2408 static ssize_t cap_show(struct device *dev,
2409 struct device_attribute *attr, char *buf)
2410 {
2411 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2412 return sysfs_emit(buf, "%llx\n", iommu->cap);
2413 }
2414 static DEVICE_ATTR_RO(cap);
2415
ecap_show(struct device * dev,struct device_attribute * attr,char * buf)2416 static ssize_t ecap_show(struct device *dev,
2417 struct device_attribute *attr, char *buf)
2418 {
2419 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2420 return sysfs_emit(buf, "%llx\n", iommu->ecap);
2421 }
2422 static DEVICE_ATTR_RO(ecap);
2423
domains_supported_show(struct device * dev,struct device_attribute * attr,char * buf)2424 static ssize_t domains_supported_show(struct device *dev,
2425 struct device_attribute *attr, char *buf)
2426 {
2427 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2428 return sysfs_emit(buf, "%ld\n", cap_ndoms(iommu->cap));
2429 }
2430 static DEVICE_ATTR_RO(domains_supported);
2431
domains_used_show(struct device * dev,struct device_attribute * attr,char * buf)2432 static ssize_t domains_used_show(struct device *dev,
2433 struct device_attribute *attr, char *buf)
2434 {
2435 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2436 unsigned int count = 0;
2437 int id;
2438
2439 for (id = 0; id < cap_ndoms(iommu->cap); id++)
2440 if (ida_exists(&iommu->domain_ida, id))
2441 count++;
2442
2443 return sysfs_emit(buf, "%d\n", count);
2444 }
2445 static DEVICE_ATTR_RO(domains_used);
2446
2447 static struct attribute *intel_iommu_attrs[] = {
2448 &dev_attr_version.attr,
2449 &dev_attr_address.attr,
2450 &dev_attr_cap.attr,
2451 &dev_attr_ecap.attr,
2452 &dev_attr_domains_supported.attr,
2453 &dev_attr_domains_used.attr,
2454 NULL,
2455 };
2456
2457 static struct attribute_group intel_iommu_group = {
2458 .name = "intel-iommu",
2459 .attrs = intel_iommu_attrs,
2460 };
2461
2462 const struct attribute_group *intel_iommu_groups[] = {
2463 &intel_iommu_group,
2464 NULL,
2465 };
2466
has_external_pci(void)2467 static bool has_external_pci(void)
2468 {
2469 struct pci_dev *pdev = NULL;
2470
2471 for_each_pci_dev(pdev)
2472 if (pdev->external_facing) {
2473 pci_dev_put(pdev);
2474 return true;
2475 }
2476
2477 return false;
2478 }
2479
platform_optin_force_iommu(void)2480 static int __init platform_optin_force_iommu(void)
2481 {
2482 if (!dmar_platform_optin() || no_platform_optin || !has_external_pci())
2483 return 0;
2484
2485 if (no_iommu || dmar_disabled)
2486 pr_info("Intel-IOMMU force enabled due to platform opt in\n");
2487
2488 /*
2489 * If Intel-IOMMU is disabled by default, we will apply identity
2490 * map for all devices except those marked as being untrusted.
2491 */
2492 if (dmar_disabled)
2493 iommu_set_default_passthrough(false);
2494
2495 dmar_disabled = 0;
2496 no_iommu = 0;
2497
2498 return 1;
2499 }
2500
probe_acpi_namespace_devices(void)2501 static int __init probe_acpi_namespace_devices(void)
2502 {
2503 struct dmar_drhd_unit *drhd;
2504 /* To avoid a -Wunused-but-set-variable warning. */
2505 struct intel_iommu *iommu __maybe_unused;
2506 struct device *dev;
2507 int i, ret = 0;
2508
2509 for_each_active_iommu(iommu, drhd) {
2510 for_each_active_dev_scope(drhd->devices,
2511 drhd->devices_cnt, i, dev) {
2512 struct acpi_device_physical_node *pn;
2513 struct acpi_device *adev;
2514
2515 if (dev->bus != &acpi_bus_type)
2516 continue;
2517
2518 up_read(&dmar_global_lock);
2519 adev = to_acpi_device(dev);
2520 mutex_lock(&adev->physical_node_lock);
2521 list_for_each_entry(pn,
2522 &adev->physical_node_list, node) {
2523 ret = iommu_probe_device(pn->dev);
2524 if (ret)
2525 break;
2526 }
2527 mutex_unlock(&adev->physical_node_lock);
2528 down_read(&dmar_global_lock);
2529
2530 if (ret)
2531 return ret;
2532 }
2533 }
2534
2535 return 0;
2536 }
2537
tboot_force_iommu(void)2538 static __init int tboot_force_iommu(void)
2539 {
2540 if (!tboot_enabled())
2541 return 0;
2542
2543 if (no_iommu || dmar_disabled)
2544 pr_warn("Forcing Intel-IOMMU to enabled\n");
2545
2546 dmar_disabled = 0;
2547 no_iommu = 0;
2548
2549 return 1;
2550 }
2551
intel_iommu_init(void)2552 int __init intel_iommu_init(void)
2553 {
2554 int ret = -ENODEV;
2555 struct dmar_drhd_unit *drhd;
2556 struct intel_iommu *iommu;
2557
2558 /*
2559 * Intel IOMMU is required for a TXT/tboot launch or platform
2560 * opt in, so enforce that.
2561 */
2562 force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) ||
2563 platform_optin_force_iommu();
2564
2565 down_write(&dmar_global_lock);
2566 if (dmar_table_init()) {
2567 if (force_on)
2568 panic("tboot: Failed to initialize DMAR table\n");
2569 goto out_free_dmar;
2570 }
2571
2572 if (dmar_dev_scope_init() < 0) {
2573 if (force_on)
2574 panic("tboot: Failed to initialize DMAR device scope\n");
2575 goto out_free_dmar;
2576 }
2577
2578 up_write(&dmar_global_lock);
2579
2580 /*
2581 * The bus notifier takes the dmar_global_lock, so lockdep will
2582 * complain later when we register it under the lock.
2583 */
2584 dmar_register_bus_notifier();
2585
2586 down_write(&dmar_global_lock);
2587
2588 if (!no_iommu)
2589 intel_iommu_debugfs_init();
2590
2591 if (no_iommu || dmar_disabled) {
2592 /*
2593 * We exit the function here to ensure IOMMU's remapping and
2594 * mempool aren't setup, which means that the IOMMU's PMRs
2595 * won't be disabled via the call to init_dmars(). So disable
2596 * it explicitly here. The PMRs were setup by tboot prior to
2597 * calling SENTER, but the kernel is expected to reset/tear
2598 * down the PMRs.
2599 */
2600 if (intel_iommu_tboot_noforce) {
2601 for_each_iommu(iommu, drhd)
2602 iommu_disable_protect_mem_regions(iommu);
2603 }
2604
2605 /*
2606 * Make sure the IOMMUs are switched off, even when we
2607 * boot into a kexec kernel and the previous kernel left
2608 * them enabled
2609 */
2610 intel_disable_iommus();
2611 goto out_free_dmar;
2612 }
2613
2614 if (list_empty(&dmar_rmrr_units))
2615 pr_info("No RMRR found\n");
2616
2617 if (list_empty(&dmar_atsr_units))
2618 pr_info("No ATSR found\n");
2619
2620 if (list_empty(&dmar_satc_units))
2621 pr_info("No SATC found\n");
2622
2623 init_no_remapping_devices();
2624
2625 ret = init_dmars();
2626 if (ret) {
2627 if (force_on)
2628 panic("tboot: Failed to initialize DMARs\n");
2629 pr_err("Initialization failed\n");
2630 goto out_free_dmar;
2631 }
2632 up_write(&dmar_global_lock);
2633
2634 init_iommu_pm_ops();
2635
2636 down_read(&dmar_global_lock);
2637 for_each_active_iommu(iommu, drhd) {
2638 /*
2639 * The flush queue implementation does not perform
2640 * page-selective invalidations that are required for efficient
2641 * TLB flushes in virtual environments. The benefit of batching
2642 * is likely to be much lower than the overhead of synchronizing
2643 * the virtual and physical IOMMU page-tables.
2644 */
2645 if (cap_caching_mode(iommu->cap) &&
2646 !first_level_by_default(iommu)) {
2647 pr_info_once("IOMMU batching disallowed due to virtualization\n");
2648 iommu_set_dma_strict();
2649 }
2650 iommu_device_sysfs_add(&iommu->iommu, NULL,
2651 intel_iommu_groups,
2652 "%s", iommu->name);
2653 /*
2654 * The iommu device probe is protected by the iommu_probe_device_lock.
2655 * Release the dmar_global_lock before entering the device probe path
2656 * to avoid unnecessary lock order splat.
2657 */
2658 up_read(&dmar_global_lock);
2659 iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
2660 down_read(&dmar_global_lock);
2661
2662 iommu_pmu_register(iommu);
2663 }
2664
2665 if (probe_acpi_namespace_devices())
2666 pr_warn("ACPI name space devices didn't probe correctly\n");
2667
2668 /* Finally, we enable the DMA remapping hardware. */
2669 for_each_iommu(iommu, drhd) {
2670 if (!drhd->ignored && !translation_pre_enabled(iommu))
2671 iommu_enable_translation(iommu);
2672
2673 iommu_disable_protect_mem_regions(iommu);
2674 }
2675 up_read(&dmar_global_lock);
2676
2677 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
2678
2679 intel_iommu_enabled = 1;
2680
2681 return 0;
2682
2683 out_free_dmar:
2684 intel_iommu_free_dmars();
2685 up_write(&dmar_global_lock);
2686 return ret;
2687 }
2688
domain_context_clear_one_cb(struct pci_dev * pdev,u16 alias,void * opaque)2689 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
2690 {
2691 struct device_domain_info *info = opaque;
2692
2693 domain_context_clear_one(info, PCI_BUS_NUM(alias), alias & 0xff);
2694 return 0;
2695 }
2696
2697 /*
2698 * NB - intel-iommu lacks any sort of reference counting for the users of
2699 * dependent devices. If multiple endpoints have intersecting dependent
2700 * devices, unbinding the driver from any one of them will possibly leave
2701 * the others unable to operate.
2702 */
domain_context_clear(struct device_domain_info * info)2703 static void domain_context_clear(struct device_domain_info *info)
2704 {
2705 if (!dev_is_pci(info->dev)) {
2706 domain_context_clear_one(info, info->bus, info->devfn);
2707 return;
2708 }
2709
2710 pci_for_each_dma_alias(to_pci_dev(info->dev),
2711 &domain_context_clear_one_cb, info);
2712 iommu_disable_pci_ats(info);
2713 }
2714
2715 /*
2716 * Clear the page table pointer in context or pasid table entries so that
2717 * all DMA requests without PASID from the device are blocked. If the page
2718 * table has been set, clean up the data structures.
2719 */
device_block_translation(struct device * dev)2720 void device_block_translation(struct device *dev)
2721 {
2722 struct device_domain_info *info = dev_iommu_priv_get(dev);
2723 struct intel_iommu *iommu = info->iommu;
2724 unsigned long flags;
2725
2726 /* Device in DMA blocking state. Noting to do. */
2727 if (!info->domain_attached)
2728 return;
2729
2730 if (info->domain)
2731 cache_tag_unassign_domain(info->domain, dev, IOMMU_NO_PASID);
2732
2733 if (!dev_is_real_dma_subdevice(dev)) {
2734 if (sm_supported(iommu))
2735 intel_pasid_tear_down_entry(iommu, dev,
2736 IOMMU_NO_PASID, false);
2737 else
2738 domain_context_clear(info);
2739 }
2740
2741 /* Device now in DMA blocking state. */
2742 info->domain_attached = false;
2743
2744 if (!info->domain)
2745 return;
2746
2747 spin_lock_irqsave(&info->domain->lock, flags);
2748 list_del(&info->link);
2749 spin_unlock_irqrestore(&info->domain->lock, flags);
2750
2751 domain_detach_iommu(info->domain, iommu);
2752 info->domain = NULL;
2753 }
2754
blocking_domain_attach_dev(struct iommu_domain * domain,struct device * dev,struct iommu_domain * old)2755 static int blocking_domain_attach_dev(struct iommu_domain *domain,
2756 struct device *dev,
2757 struct iommu_domain *old)
2758 {
2759 struct device_domain_info *info = dev_iommu_priv_get(dev);
2760
2761 iopf_for_domain_remove(info->domain ? &info->domain->domain : NULL, dev);
2762 device_block_translation(dev);
2763 return 0;
2764 }
2765
2766 static int blocking_domain_set_dev_pasid(struct iommu_domain *domain,
2767 struct device *dev, ioasid_t pasid,
2768 struct iommu_domain *old);
2769
2770 static struct iommu_domain blocking_domain = {
2771 .type = IOMMU_DOMAIN_BLOCKED,
2772 .ops = &(const struct iommu_domain_ops) {
2773 .attach_dev = blocking_domain_attach_dev,
2774 .set_dev_pasid = blocking_domain_set_dev_pasid,
2775 }
2776 };
2777
paging_domain_alloc(void)2778 static struct dmar_domain *paging_domain_alloc(void)
2779 {
2780 struct dmar_domain *domain;
2781
2782 domain = kzalloc_obj(*domain);
2783 if (!domain)
2784 return ERR_PTR(-ENOMEM);
2785
2786 INIT_LIST_HEAD(&domain->devices);
2787 INIT_LIST_HEAD(&domain->dev_pasids);
2788 INIT_LIST_HEAD(&domain->cache_tags);
2789 spin_lock_init(&domain->lock);
2790 spin_lock_init(&domain->cache_lock);
2791 xa_init(&domain->iommu_array);
2792 INIT_LIST_HEAD(&domain->s1_domains);
2793 spin_lock_init(&domain->s1_lock);
2794
2795 return domain;
2796 }
2797
compute_vasz_lg2_fs(struct intel_iommu * iommu,unsigned int * top_level)2798 static unsigned int compute_vasz_lg2_fs(struct intel_iommu *iommu,
2799 unsigned int *top_level)
2800 {
2801 unsigned int mgaw = cap_mgaw(iommu->cap);
2802
2803 /*
2804 * Spec 3.6 First-Stage Translation:
2805 *
2806 * Software must limit addresses to less than the minimum of MGAW
2807 * and the lower canonical address width implied by FSPM (i.e.,
2808 * 47-bit when FSPM is 4-level and 56-bit when FSPM is 5-level).
2809 */
2810 if (mgaw > 48 && cap_fl5lp_support(iommu->cap)) {
2811 *top_level = 4;
2812 return min(57, mgaw);
2813 }
2814
2815 /* Four level is always supported */
2816 *top_level = 3;
2817 return min(48, mgaw);
2818 }
2819
2820 static struct iommu_domain *
intel_iommu_domain_alloc_first_stage(struct device * dev,struct intel_iommu * iommu,u32 flags)2821 intel_iommu_domain_alloc_first_stage(struct device *dev,
2822 struct intel_iommu *iommu, u32 flags)
2823 {
2824 struct pt_iommu_x86_64_cfg cfg = {};
2825 struct dmar_domain *dmar_domain;
2826 int ret;
2827
2828 if (flags & ~IOMMU_HWPT_ALLOC_PASID)
2829 return ERR_PTR(-EOPNOTSUPP);
2830
2831 /* Only SL is available in legacy mode */
2832 if (!sm_supported(iommu) || !ecap_flts(iommu->ecap))
2833 return ERR_PTR(-EOPNOTSUPP);
2834
2835 dmar_domain = paging_domain_alloc();
2836 if (IS_ERR(dmar_domain))
2837 return ERR_CAST(dmar_domain);
2838
2839 cfg.common.hw_max_vasz_lg2 =
2840 compute_vasz_lg2_fs(iommu, &cfg.top_level);
2841 cfg.common.hw_max_oasz_lg2 = 52;
2842 cfg.common.features = BIT(PT_FEAT_SIGN_EXTEND) |
2843 BIT(PT_FEAT_FLUSH_RANGE);
2844 /* First stage always uses scalable mode */
2845 if (!ecap_smpwc(iommu->ecap))
2846 cfg.common.features |= BIT(PT_FEAT_DMA_INCOHERENT);
2847 dmar_domain->iommu.iommu_device = dev;
2848 dmar_domain->iommu.nid = dev_to_node(dev);
2849 dmar_domain->domain.ops = &intel_fs_paging_domain_ops;
2850 /*
2851 * iotlb sync for map is only needed for legacy implementations that
2852 * explicitly require flushing internal write buffers to ensure memory
2853 * coherence.
2854 */
2855 if (rwbf_required(iommu))
2856 dmar_domain->iotlb_sync_map = true;
2857
2858 ret = pt_iommu_x86_64_init(&dmar_domain->fspt, &cfg, GFP_KERNEL);
2859 if (ret) {
2860 kfree(dmar_domain);
2861 return ERR_PTR(ret);
2862 }
2863
2864 if (!cap_fl1gp_support(iommu->cap))
2865 dmar_domain->domain.pgsize_bitmap &= ~(u64)SZ_1G;
2866 if (!intel_iommu_superpage)
2867 dmar_domain->domain.pgsize_bitmap = SZ_4K;
2868
2869 return &dmar_domain->domain;
2870 }
2871
compute_vasz_lg2_ss(struct intel_iommu * iommu,unsigned int * top_level)2872 static unsigned int compute_vasz_lg2_ss(struct intel_iommu *iommu,
2873 unsigned int *top_level)
2874 {
2875 unsigned int sagaw = cap_sagaw(iommu->cap);
2876 unsigned int mgaw = cap_mgaw(iommu->cap);
2877
2878 /*
2879 * Find the largest table size that both the mgaw and sagaw support.
2880 * This sets the valid range of IOVA and the top starting level.
2881 * Some HW may only support a 4 or 5 level walk but must limit IOVA to
2882 * 3 levels.
2883 */
2884 if (mgaw > 48 && sagaw >= BIT(3)) {
2885 *top_level = 4;
2886 return min(57, mgaw);
2887 } else if (mgaw > 39 && sagaw >= BIT(2)) {
2888 *top_level = 3 + ffs(sagaw >> 3);
2889 return min(48, mgaw);
2890 } else if (mgaw > 30 && sagaw >= BIT(1)) {
2891 *top_level = 2 + ffs(sagaw >> 2);
2892 return min(39, mgaw);
2893 }
2894 return 0;
2895 }
2896
2897 static const struct iommu_dirty_ops intel_second_stage_dirty_ops = {
2898 IOMMU_PT_DIRTY_OPS(vtdss),
2899 .set_dirty_tracking = intel_iommu_set_dirty_tracking,
2900 };
2901
2902 static struct iommu_domain *
intel_iommu_domain_alloc_second_stage(struct device * dev,struct intel_iommu * iommu,u32 flags)2903 intel_iommu_domain_alloc_second_stage(struct device *dev,
2904 struct intel_iommu *iommu, u32 flags)
2905 {
2906 struct pt_iommu_vtdss_cfg cfg = {};
2907 struct dmar_domain *dmar_domain;
2908 unsigned int sslps;
2909 int ret;
2910
2911 if (flags &
2912 (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
2913 IOMMU_HWPT_ALLOC_PASID)))
2914 return ERR_PTR(-EOPNOTSUPP);
2915
2916 if (((flags & IOMMU_HWPT_ALLOC_NEST_PARENT) &&
2917 !nested_supported(iommu)) ||
2918 ((flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) &&
2919 !ssads_supported(iommu)))
2920 return ERR_PTR(-EOPNOTSUPP);
2921
2922 /* Legacy mode always supports second stage */
2923 if (sm_supported(iommu) && !ecap_slts(iommu->ecap))
2924 return ERR_PTR(-EOPNOTSUPP);
2925
2926 dmar_domain = paging_domain_alloc();
2927 if (IS_ERR(dmar_domain))
2928 return ERR_CAST(dmar_domain);
2929
2930 cfg.common.hw_max_vasz_lg2 = compute_vasz_lg2_ss(iommu, &cfg.top_level);
2931 cfg.common.hw_max_oasz_lg2 = 52;
2932 cfg.common.features = BIT(PT_FEAT_FLUSH_RANGE);
2933
2934 /*
2935 * Read-only mapping is disallowed on the domain which serves as the
2936 * parent in a nested configuration, due to HW errata
2937 * (ERRATA_772415_SPR17)
2938 */
2939 if (flags & IOMMU_HWPT_ALLOC_NEST_PARENT)
2940 cfg.common.features |= BIT(PT_FEAT_VTDSS_FORCE_WRITEABLE);
2941
2942 if (!iommu_paging_structure_coherency(iommu))
2943 cfg.common.features |= BIT(PT_FEAT_DMA_INCOHERENT);
2944 dmar_domain->iommu.iommu_device = dev;
2945 dmar_domain->iommu.nid = dev_to_node(dev);
2946 dmar_domain->domain.ops = &intel_ss_paging_domain_ops;
2947 dmar_domain->nested_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT;
2948
2949 if (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING)
2950 dmar_domain->domain.dirty_ops = &intel_second_stage_dirty_ops;
2951
2952 ret = pt_iommu_vtdss_init(&dmar_domain->sspt, &cfg, GFP_KERNEL);
2953 if (ret) {
2954 kfree(dmar_domain);
2955 return ERR_PTR(ret);
2956 }
2957
2958 /* Adjust the supported page sizes to HW capability */
2959 sslps = cap_super_page_val(iommu->cap);
2960 if (!(sslps & BIT(0)))
2961 dmar_domain->domain.pgsize_bitmap &= ~(u64)SZ_2M;
2962 if (!(sslps & BIT(1)))
2963 dmar_domain->domain.pgsize_bitmap &= ~(u64)SZ_1G;
2964 if (!intel_iommu_superpage)
2965 dmar_domain->domain.pgsize_bitmap = SZ_4K;
2966
2967 /*
2968 * Besides the internal write buffer flush, the caching mode used for
2969 * legacy nested translation (which utilizes shadowing page tables)
2970 * also requires iotlb sync on map.
2971 */
2972 if (rwbf_required(iommu) || cap_caching_mode(iommu->cap))
2973 dmar_domain->iotlb_sync_map = true;
2974
2975 return &dmar_domain->domain;
2976 }
2977
2978 static struct iommu_domain *
intel_iommu_domain_alloc_paging_flags(struct device * dev,u32 flags,const struct iommu_user_data * user_data)2979 intel_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags,
2980 const struct iommu_user_data *user_data)
2981 {
2982 struct device_domain_info *info = dev_iommu_priv_get(dev);
2983 struct intel_iommu *iommu = info->iommu;
2984 struct iommu_domain *domain;
2985
2986 if (user_data)
2987 return ERR_PTR(-EOPNOTSUPP);
2988
2989 /* Prefer first stage if possible by default. */
2990 domain = intel_iommu_domain_alloc_first_stage(dev, iommu, flags);
2991 if (domain != ERR_PTR(-EOPNOTSUPP))
2992 return domain;
2993 return intel_iommu_domain_alloc_second_stage(dev, iommu, flags);
2994 }
2995
intel_iommu_domain_free(struct iommu_domain * domain)2996 static void intel_iommu_domain_free(struct iommu_domain *domain)
2997 {
2998 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
2999
3000 if (WARN_ON(dmar_domain->nested_parent &&
3001 !list_empty(&dmar_domain->s1_domains)))
3002 return;
3003
3004 if (WARN_ON(!list_empty(&dmar_domain->devices)))
3005 return;
3006
3007 pt_iommu_deinit(&dmar_domain->iommu);
3008
3009 kfree(dmar_domain->qi_batch);
3010 kfree(dmar_domain);
3011 }
3012
paging_domain_compatible_first_stage(struct dmar_domain * dmar_domain,struct intel_iommu * iommu)3013 static int paging_domain_compatible_first_stage(struct dmar_domain *dmar_domain,
3014 struct intel_iommu *iommu)
3015 {
3016 if (WARN_ON(dmar_domain->domain.dirty_ops ||
3017 dmar_domain->nested_parent))
3018 return -EINVAL;
3019
3020 /* Only SL is available in legacy mode */
3021 if (!sm_supported(iommu) || !ecap_flts(iommu->ecap))
3022 return -EINVAL;
3023
3024 if (!ecap_smpwc(iommu->ecap) &&
3025 !(dmar_domain->fspt.x86_64_pt.common.features &
3026 BIT(PT_FEAT_DMA_INCOHERENT)))
3027 return -EINVAL;
3028
3029 /* Supports the number of table levels */
3030 if (!cap_fl5lp_support(iommu->cap) &&
3031 dmar_domain->fspt.x86_64_pt.common.max_vasz_lg2 > 48)
3032 return -EINVAL;
3033
3034 /* Same page size support */
3035 if (!cap_fl1gp_support(iommu->cap) &&
3036 (dmar_domain->domain.pgsize_bitmap & SZ_1G))
3037 return -EINVAL;
3038
3039 /* iotlb sync on map requirement */
3040 if ((rwbf_required(iommu)) && !dmar_domain->iotlb_sync_map)
3041 return -EINVAL;
3042
3043 return 0;
3044 }
3045
3046 static int
paging_domain_compatible_second_stage(struct dmar_domain * dmar_domain,struct intel_iommu * iommu)3047 paging_domain_compatible_second_stage(struct dmar_domain *dmar_domain,
3048 struct intel_iommu *iommu)
3049 {
3050 unsigned int vasz_lg2 = dmar_domain->sspt.vtdss_pt.common.max_vasz_lg2;
3051 unsigned int sslps = cap_super_page_val(iommu->cap);
3052 struct pt_iommu_vtdss_hw_info pt_info;
3053
3054 pt_iommu_vtdss_hw_info(&dmar_domain->sspt, &pt_info);
3055
3056 if (dmar_domain->domain.dirty_ops && !ssads_supported(iommu))
3057 return -EINVAL;
3058 if (dmar_domain->nested_parent && !nested_supported(iommu))
3059 return -EINVAL;
3060
3061 /* Legacy mode always supports second stage */
3062 if (sm_supported(iommu) && !ecap_slts(iommu->ecap))
3063 return -EINVAL;
3064
3065 if (!iommu_paging_structure_coherency(iommu) &&
3066 !(dmar_domain->sspt.vtdss_pt.common.features &
3067 BIT(PT_FEAT_DMA_INCOHERENT)))
3068 return -EINVAL;
3069
3070 /* Address width falls within the capability */
3071 if (cap_mgaw(iommu->cap) < vasz_lg2)
3072 return -EINVAL;
3073
3074 /* Page table level is supported. */
3075 if (!(cap_sagaw(iommu->cap) & BIT(pt_info.aw)))
3076 return -EINVAL;
3077
3078 /* Same page size support */
3079 if (!(sslps & BIT(0)) && (dmar_domain->domain.pgsize_bitmap & SZ_2M))
3080 return -EINVAL;
3081 if (!(sslps & BIT(1)) && (dmar_domain->domain.pgsize_bitmap & SZ_1G))
3082 return -EINVAL;
3083
3084 /* iotlb sync on map requirement */
3085 if ((rwbf_required(iommu) || cap_caching_mode(iommu->cap)) &&
3086 !dmar_domain->iotlb_sync_map)
3087 return -EINVAL;
3088
3089 /*
3090 * FIXME this is locked wrong, it needs to be under the
3091 * dmar_domain->lock
3092 */
3093 if ((dmar_domain->sspt.vtdss_pt.common.features &
3094 BIT(PT_FEAT_VTDSS_FORCE_COHERENCE)) &&
3095 !ecap_sc_support(iommu->ecap))
3096 return -EINVAL;
3097 return 0;
3098 }
3099
paging_domain_compatible(struct iommu_domain * domain,struct device * dev)3100 int paging_domain_compatible(struct iommu_domain *domain, struct device *dev)
3101 {
3102 struct device_domain_info *info = dev_iommu_priv_get(dev);
3103 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3104 struct intel_iommu *iommu = info->iommu;
3105 int ret = -EINVAL;
3106
3107 if (intel_domain_is_fs_paging(dmar_domain))
3108 ret = paging_domain_compatible_first_stage(dmar_domain, iommu);
3109 else if (intel_domain_is_ss_paging(dmar_domain))
3110 ret = paging_domain_compatible_second_stage(dmar_domain, iommu);
3111 else if (WARN_ON(true))
3112 ret = -EINVAL;
3113 if (ret)
3114 return ret;
3115
3116 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) &&
3117 context_copied(iommu, info->bus, info->devfn))
3118 return intel_pasid_setup_sm_context(dev);
3119
3120 return 0;
3121 }
3122
intel_iommu_attach_device(struct iommu_domain * domain,struct device * dev,struct iommu_domain * old)3123 static int intel_iommu_attach_device(struct iommu_domain *domain,
3124 struct device *dev,
3125 struct iommu_domain *old)
3126 {
3127 int ret;
3128
3129 device_block_translation(dev);
3130
3131 ret = paging_domain_compatible(domain, dev);
3132 if (ret)
3133 return ret;
3134
3135 ret = iopf_for_domain_set(domain, dev);
3136 if (ret)
3137 return ret;
3138
3139 ret = dmar_domain_attach_device(to_dmar_domain(domain), dev);
3140 if (ret)
3141 iopf_for_domain_remove(domain, dev);
3142
3143 return ret;
3144 }
3145
intel_iommu_tlb_sync(struct iommu_domain * domain,struct iommu_iotlb_gather * gather)3146 static void intel_iommu_tlb_sync(struct iommu_domain *domain,
3147 struct iommu_iotlb_gather *gather)
3148 {
3149 cache_tag_flush_range(to_dmar_domain(domain), gather->start,
3150 gather->end,
3151 iommu_pages_list_empty(&gather->freelist));
3152 iommu_put_pages_list(&gather->freelist);
3153 }
3154
domain_support_force_snooping(struct dmar_domain * domain)3155 static bool domain_support_force_snooping(struct dmar_domain *domain)
3156 {
3157 struct device_domain_info *info;
3158 bool support = true;
3159
3160 assert_spin_locked(&domain->lock);
3161 list_for_each_entry(info, &domain->devices, link) {
3162 if (!ecap_sc_support(info->iommu->ecap)) {
3163 support = false;
3164 break;
3165 }
3166 }
3167
3168 return support;
3169 }
3170
intel_iommu_enforce_cache_coherency_fs(struct iommu_domain * domain)3171 static bool intel_iommu_enforce_cache_coherency_fs(struct iommu_domain *domain)
3172 {
3173 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3174 struct device_domain_info *info;
3175
3176 guard(spinlock_irqsave)(&dmar_domain->lock);
3177
3178 if (dmar_domain->force_snooping)
3179 return true;
3180
3181 if (!domain_support_force_snooping(dmar_domain))
3182 return false;
3183
3184 dmar_domain->force_snooping = true;
3185 list_for_each_entry(info, &dmar_domain->devices, link)
3186 intel_pasid_setup_page_snoop_control(info->iommu, info->dev,
3187 IOMMU_NO_PASID);
3188 return true;
3189 }
3190
intel_iommu_enforce_cache_coherency_ss(struct iommu_domain * domain)3191 static bool intel_iommu_enforce_cache_coherency_ss(struct iommu_domain *domain)
3192 {
3193 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3194
3195 guard(spinlock_irqsave)(&dmar_domain->lock);
3196 if (!domain_support_force_snooping(dmar_domain))
3197 return false;
3198
3199 /*
3200 * Second level page table supports per-PTE snoop control. The
3201 * iommu_map() interface will handle this by setting SNP bit.
3202 */
3203 dmar_domain->sspt.vtdss_pt.common.features |=
3204 BIT(PT_FEAT_VTDSS_FORCE_COHERENCE);
3205 dmar_domain->force_snooping = true;
3206 return true;
3207 }
3208
intel_iommu_capable(struct device * dev,enum iommu_cap cap)3209 static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap)
3210 {
3211 struct device_domain_info *info = dev_iommu_priv_get(dev);
3212
3213 switch (cap) {
3214 case IOMMU_CAP_CACHE_COHERENCY:
3215 case IOMMU_CAP_DEFERRED_FLUSH:
3216 return true;
3217 case IOMMU_CAP_PRE_BOOT_PROTECTION:
3218 return dmar_platform_optin();
3219 case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
3220 return ecap_sc_support(info->iommu->ecap);
3221 case IOMMU_CAP_DIRTY_TRACKING:
3222 return ssads_supported(info->iommu);
3223 default:
3224 return false;
3225 }
3226 }
3227
intel_iommu_probe_device(struct device * dev)3228 static struct iommu_device *intel_iommu_probe_device(struct device *dev)
3229 {
3230 struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL;
3231 struct device_domain_info *info;
3232 struct intel_iommu *iommu;
3233 u8 bus, devfn;
3234 int ret;
3235
3236 iommu = device_lookup_iommu(dev, &bus, &devfn);
3237 if (!iommu || !iommu->iommu.ops)
3238 return ERR_PTR(-ENODEV);
3239
3240 info = kzalloc_obj(*info);
3241 if (!info)
3242 return ERR_PTR(-ENOMEM);
3243
3244 if (dev_is_real_dma_subdevice(dev)) {
3245 info->bus = pdev->bus->number;
3246 info->devfn = pdev->devfn;
3247 info->segment = pci_domain_nr(pdev->bus);
3248 } else {
3249 info->bus = bus;
3250 info->devfn = devfn;
3251 info->segment = iommu->segment;
3252 }
3253
3254 info->dev = dev;
3255 info->iommu = iommu;
3256 if (dev_is_pci(dev)) {
3257 if (ecap_dev_iotlb_support(iommu->ecap) &&
3258 pci_ats_supported(pdev) &&
3259 dmar_ats_supported(pdev, iommu)) {
3260 info->ats_supported = 1;
3261 info->dtlb_extra_inval = dev_needs_extra_dtlb_flush(pdev);
3262
3263 /*
3264 * For IOMMU that supports device IOTLB throttling
3265 * (DIT), we assign PFSID to the invalidation desc
3266 * of a VF such that IOMMU HW can gauge queue depth
3267 * at PF level. If DIT is not set, PFSID will be
3268 * treated as reserved, which should be set to 0.
3269 */
3270 if (ecap_dit(iommu->ecap))
3271 info->pfsid = pci_dev_id(pci_physfn(pdev));
3272 info->ats_qdep = pci_ats_queue_depth(pdev);
3273 }
3274 if (sm_supported(iommu)) {
3275 if (pasid_supported(iommu)) {
3276 int features = pci_pasid_features(pdev);
3277
3278 if (features >= 0)
3279 info->pasid_supported = features | 1;
3280 }
3281
3282 if (info->ats_supported && ecap_prs(iommu->ecap) &&
3283 ecap_pds(iommu->ecap) && pci_pri_supported(pdev))
3284 info->pri_supported = 1;
3285 }
3286 }
3287
3288 dev_iommu_priv_set(dev, info);
3289 if (pdev && pci_ats_supported(pdev)) {
3290 pci_prepare_ats(pdev, VTD_PAGE_SHIFT);
3291 ret = device_rbtree_insert(iommu, info);
3292 if (ret)
3293 goto free;
3294 }
3295
3296 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
3297 ret = intel_pasid_alloc_table(dev);
3298 if (ret) {
3299 dev_err(dev, "PASID table allocation failed\n");
3300 goto clear_rbtree;
3301 }
3302
3303 if (!context_copied(iommu, info->bus, info->devfn)) {
3304 ret = intel_pasid_setup_sm_context(dev);
3305 if (ret)
3306 goto free_table;
3307 }
3308 }
3309
3310 intel_iommu_debugfs_create_dev(info);
3311
3312 return &iommu->iommu;
3313 free_table:
3314 intel_pasid_free_table(dev);
3315 clear_rbtree:
3316 device_rbtree_remove(info);
3317 free:
3318 kfree(info);
3319
3320 return ERR_PTR(ret);
3321 }
3322
intel_iommu_probe_finalize(struct device * dev)3323 static void intel_iommu_probe_finalize(struct device *dev)
3324 {
3325 struct device_domain_info *info = dev_iommu_priv_get(dev);
3326 struct intel_iommu *iommu = info->iommu;
3327
3328 /*
3329 * The PCIe spec, in its wisdom, declares that the behaviour of the
3330 * device is undefined if you enable PASID support after ATS support.
3331 * So always enable PASID support on devices which have it, even if
3332 * we can't yet know if we're ever going to use it.
3333 */
3334 if (info->pasid_supported &&
3335 !pci_enable_pasid(to_pci_dev(dev), info->pasid_supported & ~1))
3336 info->pasid_enabled = 1;
3337
3338 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
3339 iommu_enable_pci_ats(info);
3340 /* Assign a DEVTLB cache tag to the default domain. */
3341 if (info->ats_enabled && info->domain) {
3342 u16 did = domain_id_iommu(info->domain, iommu);
3343
3344 if (cache_tag_assign(info->domain, did, dev,
3345 IOMMU_NO_PASID, CACHE_TAG_DEVTLB))
3346 iommu_disable_pci_ats(info);
3347 }
3348 }
3349 iommu_enable_pci_pri(info);
3350 }
3351
intel_iommu_release_device(struct device * dev)3352 static void intel_iommu_release_device(struct device *dev)
3353 {
3354 struct device_domain_info *info = dev_iommu_priv_get(dev);
3355 struct intel_iommu *iommu = info->iommu;
3356
3357 iommu_disable_pci_pri(info);
3358 iommu_disable_pci_ats(info);
3359
3360 if (info->pasid_enabled) {
3361 pci_disable_pasid(to_pci_dev(dev));
3362 info->pasid_enabled = 0;
3363 }
3364
3365 mutex_lock(&iommu->iopf_lock);
3366 if (dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev)))
3367 device_rbtree_remove(info);
3368 mutex_unlock(&iommu->iopf_lock);
3369
3370 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) &&
3371 !context_copied(iommu, info->bus, info->devfn))
3372 intel_pasid_teardown_sm_context(dev);
3373
3374 intel_pasid_free_table(dev);
3375 intel_iommu_debugfs_remove_dev(info);
3376 kfree(info);
3377 }
3378
intel_iommu_get_resv_regions(struct device * device,struct list_head * head)3379 static void intel_iommu_get_resv_regions(struct device *device,
3380 struct list_head *head)
3381 {
3382 int prot = DMA_PTE_READ | DMA_PTE_WRITE;
3383 struct iommu_resv_region *reg;
3384 struct dmar_rmrr_unit *rmrr;
3385 struct device *i_dev;
3386 int i;
3387
3388 rcu_read_lock();
3389 for_each_rmrr_units(rmrr) {
3390 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3391 i, i_dev) {
3392 struct iommu_resv_region *resv;
3393 enum iommu_resv_type type;
3394 size_t length;
3395
3396 if (i_dev != device &&
3397 !is_downstream_to_pci_bridge(device, i_dev))
3398 continue;
3399
3400 length = rmrr->end_address - rmrr->base_address + 1;
3401
3402 type = device_rmrr_is_relaxable(device) ?
3403 IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT;
3404
3405 resv = iommu_alloc_resv_region(rmrr->base_address,
3406 length, prot, type,
3407 GFP_ATOMIC);
3408 if (!resv)
3409 break;
3410
3411 list_add_tail(&resv->list, head);
3412 }
3413 }
3414 rcu_read_unlock();
3415
3416 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
3417 if (dev_is_pci(device)) {
3418 struct pci_dev *pdev = to_pci_dev(device);
3419
3420 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
3421 reg = iommu_alloc_resv_region(0, 1UL << 24, prot,
3422 IOMMU_RESV_DIRECT_RELAXABLE,
3423 GFP_KERNEL);
3424 if (reg)
3425 list_add_tail(®->list, head);
3426 }
3427 }
3428 #endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
3429
3430 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
3431 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
3432 0, IOMMU_RESV_MSI, GFP_KERNEL);
3433 if (!reg)
3434 return;
3435 list_add_tail(®->list, head);
3436 }
3437
intel_iommu_device_group(struct device * dev)3438 static struct iommu_group *intel_iommu_device_group(struct device *dev)
3439 {
3440 if (dev_is_pci(dev))
3441 return pci_device_group(dev);
3442 return generic_device_group(dev);
3443 }
3444
intel_iommu_enable_iopf(struct device * dev)3445 int intel_iommu_enable_iopf(struct device *dev)
3446 {
3447 struct device_domain_info *info = dev_iommu_priv_get(dev);
3448 struct intel_iommu *iommu = info->iommu;
3449 int ret;
3450
3451 if (!info->pri_enabled)
3452 return -ENODEV;
3453
3454 /* pri_enabled is protected by the group mutex. */
3455 iommu_group_mutex_assert(dev);
3456 if (info->iopf_refcount) {
3457 info->iopf_refcount++;
3458 return 0;
3459 }
3460
3461 ret = iopf_queue_add_device(iommu->iopf_queue, dev);
3462 if (ret)
3463 return ret;
3464
3465 info->iopf_refcount = 1;
3466
3467 return 0;
3468 }
3469
intel_iommu_disable_iopf(struct device * dev)3470 void intel_iommu_disable_iopf(struct device *dev)
3471 {
3472 struct device_domain_info *info = dev_iommu_priv_get(dev);
3473 struct intel_iommu *iommu = info->iommu;
3474
3475 if (WARN_ON(!info->pri_enabled || !info->iopf_refcount))
3476 return;
3477
3478 iommu_group_mutex_assert(dev);
3479 if (--info->iopf_refcount)
3480 return;
3481
3482 iopf_queue_remove_device(iommu->iopf_queue, dev);
3483 }
3484
intel_iommu_is_attach_deferred(struct device * dev)3485 static bool intel_iommu_is_attach_deferred(struct device *dev)
3486 {
3487 struct device_domain_info *info = dev_iommu_priv_get(dev);
3488
3489 return translation_pre_enabled(info->iommu) && !info->domain;
3490 }
3491
3492 /*
3493 * Check that the device does not live on an external facing PCI port that is
3494 * marked as untrusted. Such devices should not be able to apply quirks and
3495 * thus not be able to bypass the IOMMU restrictions.
3496 */
risky_device(struct pci_dev * pdev)3497 static bool risky_device(struct pci_dev *pdev)
3498 {
3499 if (pdev->untrusted) {
3500 pci_info(pdev,
3501 "Skipping IOMMU quirk for dev [%04X:%04X] on untrusted PCI link\n",
3502 pdev->vendor, pdev->device);
3503 pci_info(pdev, "Please check with your BIOS/Platform vendor about this\n");
3504 return true;
3505 }
3506 return false;
3507 }
3508
intel_iommu_iotlb_sync_map(struct iommu_domain * domain,unsigned long iova,size_t size)3509 static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
3510 unsigned long iova, size_t size)
3511 {
3512 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3513
3514 if (dmar_domain->iotlb_sync_map)
3515 cache_tag_flush_range_np(dmar_domain, iova, iova + size - 1);
3516
3517 return 0;
3518 }
3519
domain_remove_dev_pasid(struct iommu_domain * domain,struct device * dev,ioasid_t pasid)3520 void domain_remove_dev_pasid(struct iommu_domain *domain,
3521 struct device *dev, ioasid_t pasid)
3522 {
3523 struct device_domain_info *info = dev_iommu_priv_get(dev);
3524 struct dev_pasid_info *curr, *dev_pasid = NULL;
3525 struct intel_iommu *iommu = info->iommu;
3526 struct dmar_domain *dmar_domain;
3527 unsigned long flags;
3528
3529 if (!domain)
3530 return;
3531
3532 /* Identity domain has no meta data for pasid. */
3533 if (domain->type == IOMMU_DOMAIN_IDENTITY)
3534 return;
3535
3536 dmar_domain = to_dmar_domain(domain);
3537 spin_lock_irqsave(&dmar_domain->lock, flags);
3538 list_for_each_entry(curr, &dmar_domain->dev_pasids, link_domain) {
3539 if (curr->dev == dev && curr->pasid == pasid) {
3540 list_del(&curr->link_domain);
3541 dev_pasid = curr;
3542 break;
3543 }
3544 }
3545 spin_unlock_irqrestore(&dmar_domain->lock, flags);
3546
3547 cache_tag_unassign_domain(dmar_domain, dev, pasid);
3548 domain_detach_iommu(dmar_domain, iommu);
3549 if (!WARN_ON_ONCE(!dev_pasid)) {
3550 intel_iommu_debugfs_remove_dev_pasid(dev_pasid);
3551 kfree(dev_pasid);
3552 }
3553 }
3554
blocking_domain_set_dev_pasid(struct iommu_domain * domain,struct device * dev,ioasid_t pasid,struct iommu_domain * old)3555 static int blocking_domain_set_dev_pasid(struct iommu_domain *domain,
3556 struct device *dev, ioasid_t pasid,
3557 struct iommu_domain *old)
3558 {
3559 struct device_domain_info *info = dev_iommu_priv_get(dev);
3560
3561 intel_pasid_tear_down_entry(info->iommu, dev, pasid, false);
3562 iopf_for_domain_remove(old, dev);
3563 domain_remove_dev_pasid(old, dev, pasid);
3564
3565 return 0;
3566 }
3567
3568 struct dev_pasid_info *
domain_add_dev_pasid(struct iommu_domain * domain,struct device * dev,ioasid_t pasid)3569 domain_add_dev_pasid(struct iommu_domain *domain,
3570 struct device *dev, ioasid_t pasid)
3571 {
3572 struct device_domain_info *info = dev_iommu_priv_get(dev);
3573 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3574 struct intel_iommu *iommu = info->iommu;
3575 struct dev_pasid_info *dev_pasid;
3576 unsigned long flags;
3577 int ret;
3578
3579 dev_pasid = kzalloc_obj(*dev_pasid);
3580 if (!dev_pasid)
3581 return ERR_PTR(-ENOMEM);
3582
3583 ret = domain_attach_iommu(dmar_domain, iommu);
3584 if (ret)
3585 goto out_free;
3586
3587 ret = cache_tag_assign_domain(dmar_domain, dev, pasid);
3588 if (ret)
3589 goto out_detach_iommu;
3590
3591 dev_pasid->dev = dev;
3592 dev_pasid->pasid = pasid;
3593 spin_lock_irqsave(&dmar_domain->lock, flags);
3594 list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids);
3595 spin_unlock_irqrestore(&dmar_domain->lock, flags);
3596
3597 return dev_pasid;
3598 out_detach_iommu:
3599 domain_detach_iommu(dmar_domain, iommu);
3600 out_free:
3601 kfree(dev_pasid);
3602 return ERR_PTR(ret);
3603 }
3604
intel_iommu_set_dev_pasid(struct iommu_domain * domain,struct device * dev,ioasid_t pasid,struct iommu_domain * old)3605 static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
3606 struct device *dev, ioasid_t pasid,
3607 struct iommu_domain *old)
3608 {
3609 struct device_domain_info *info = dev_iommu_priv_get(dev);
3610 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3611 struct intel_iommu *iommu = info->iommu;
3612 struct dev_pasid_info *dev_pasid;
3613 int ret;
3614
3615 if (WARN_ON_ONCE(!(domain->type & __IOMMU_DOMAIN_PAGING)))
3616 return -EINVAL;
3617
3618 if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
3619 return -EOPNOTSUPP;
3620
3621 if (domain->dirty_ops)
3622 return -EINVAL;
3623
3624 if (context_copied(iommu, info->bus, info->devfn))
3625 return -EBUSY;
3626
3627 ret = paging_domain_compatible(domain, dev);
3628 if (ret)
3629 return ret;
3630
3631 dev_pasid = domain_add_dev_pasid(domain, dev, pasid);
3632 if (IS_ERR(dev_pasid))
3633 return PTR_ERR(dev_pasid);
3634
3635 ret = iopf_for_domain_replace(domain, old, dev);
3636 if (ret)
3637 goto out_remove_dev_pasid;
3638
3639 if (intel_domain_is_fs_paging(dmar_domain))
3640 ret = domain_setup_first_level(iommu, dmar_domain,
3641 dev, pasid, old);
3642 else if (intel_domain_is_ss_paging(dmar_domain))
3643 ret = domain_setup_second_level(iommu, dmar_domain,
3644 dev, pasid, old);
3645 else if (WARN_ON(true))
3646 ret = -EINVAL;
3647
3648 if (ret)
3649 goto out_unwind_iopf;
3650
3651 domain_remove_dev_pasid(old, dev, pasid);
3652
3653 intel_iommu_debugfs_create_dev_pasid(dev_pasid);
3654
3655 return 0;
3656
3657 out_unwind_iopf:
3658 iopf_for_domain_replace(old, domain, dev);
3659 out_remove_dev_pasid:
3660 domain_remove_dev_pasid(domain, dev, pasid);
3661 return ret;
3662 }
3663
intel_iommu_hw_info(struct device * dev,u32 * length,enum iommu_hw_info_type * type)3664 static void *intel_iommu_hw_info(struct device *dev, u32 *length,
3665 enum iommu_hw_info_type *type)
3666 {
3667 struct device_domain_info *info = dev_iommu_priv_get(dev);
3668 struct intel_iommu *iommu = info->iommu;
3669 struct iommu_hw_info_vtd *vtd;
3670
3671 if (*type != IOMMU_HW_INFO_TYPE_DEFAULT &&
3672 *type != IOMMU_HW_INFO_TYPE_INTEL_VTD)
3673 return ERR_PTR(-EOPNOTSUPP);
3674
3675 vtd = kzalloc_obj(*vtd);
3676 if (!vtd)
3677 return ERR_PTR(-ENOMEM);
3678
3679 vtd->flags = IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17;
3680 vtd->cap_reg = iommu->cap;
3681 vtd->ecap_reg = iommu->ecap;
3682 *length = sizeof(*vtd);
3683 *type = IOMMU_HW_INFO_TYPE_INTEL_VTD;
3684 return vtd;
3685 }
3686
3687 /*
3688 * Set dirty tracking for the device list of a domain. The caller must
3689 * hold the domain->lock when calling it.
3690 */
device_set_dirty_tracking(struct list_head * devices,bool enable)3691 static int device_set_dirty_tracking(struct list_head *devices, bool enable)
3692 {
3693 struct device_domain_info *info;
3694 int ret = 0;
3695
3696 list_for_each_entry(info, devices, link) {
3697 ret = intel_pasid_setup_dirty_tracking(info->iommu, info->dev,
3698 IOMMU_NO_PASID, enable);
3699 if (ret)
3700 break;
3701 }
3702
3703 return ret;
3704 }
3705
parent_domain_set_dirty_tracking(struct dmar_domain * domain,bool enable)3706 static int parent_domain_set_dirty_tracking(struct dmar_domain *domain,
3707 bool enable)
3708 {
3709 struct dmar_domain *s1_domain;
3710 unsigned long flags;
3711 int ret;
3712
3713 spin_lock(&domain->s1_lock);
3714 list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) {
3715 spin_lock_irqsave(&s1_domain->lock, flags);
3716 ret = device_set_dirty_tracking(&s1_domain->devices, enable);
3717 spin_unlock_irqrestore(&s1_domain->lock, flags);
3718 if (ret)
3719 goto err_unwind;
3720 }
3721 spin_unlock(&domain->s1_lock);
3722 return 0;
3723
3724 err_unwind:
3725 list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) {
3726 spin_lock_irqsave(&s1_domain->lock, flags);
3727 device_set_dirty_tracking(&s1_domain->devices,
3728 domain->dirty_tracking);
3729 spin_unlock_irqrestore(&s1_domain->lock, flags);
3730 }
3731 spin_unlock(&domain->s1_lock);
3732 return ret;
3733 }
3734
intel_iommu_set_dirty_tracking(struct iommu_domain * domain,bool enable)3735 static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain,
3736 bool enable)
3737 {
3738 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3739 int ret;
3740
3741 spin_lock(&dmar_domain->lock);
3742 if (dmar_domain->dirty_tracking == enable)
3743 goto out_unlock;
3744
3745 ret = device_set_dirty_tracking(&dmar_domain->devices, enable);
3746 if (ret)
3747 goto err_unwind;
3748
3749 if (dmar_domain->nested_parent) {
3750 ret = parent_domain_set_dirty_tracking(dmar_domain, enable);
3751 if (ret)
3752 goto err_unwind;
3753 }
3754
3755 dmar_domain->dirty_tracking = enable;
3756 out_unlock:
3757 spin_unlock(&dmar_domain->lock);
3758
3759 return 0;
3760
3761 err_unwind:
3762 device_set_dirty_tracking(&dmar_domain->devices,
3763 dmar_domain->dirty_tracking);
3764 spin_unlock(&dmar_domain->lock);
3765 return ret;
3766 }
3767
context_setup_pass_through(struct device * dev,u8 bus,u8 devfn)3768 static int context_setup_pass_through(struct device *dev, u8 bus, u8 devfn)
3769 {
3770 struct device_domain_info *info = dev_iommu_priv_get(dev);
3771 struct intel_iommu *iommu = info->iommu;
3772 struct context_entry *context;
3773
3774 spin_lock(&iommu->lock);
3775 context = iommu_context_addr(iommu, bus, devfn, 1);
3776 if (!context) {
3777 spin_unlock(&iommu->lock);
3778 return -ENOMEM;
3779 }
3780
3781 if (context_present(context) && !context_copied(iommu, bus, devfn)) {
3782 spin_unlock(&iommu->lock);
3783 return 0;
3784 }
3785
3786 copied_context_tear_down(iommu, context, bus, devfn);
3787 context_clear_entry(context);
3788 context_set_domain_id(context, FLPT_DEFAULT_DID);
3789
3790 /*
3791 * In pass through mode, AW must be programmed to indicate the largest
3792 * AGAW value supported by hardware. And ASR is ignored by hardware.
3793 */
3794 context_set_address_width(context, iommu->msagaw);
3795 context_set_translation_type(context, CONTEXT_TT_PASS_THROUGH);
3796 context_set_fault_enable(context);
3797 context_set_present(context);
3798 if (!ecap_coherent(iommu->ecap))
3799 clflush_cache_range(context, sizeof(*context));
3800 context_present_cache_flush(iommu, FLPT_DEFAULT_DID, bus, devfn);
3801 spin_unlock(&iommu->lock);
3802
3803 return 0;
3804 }
3805
context_setup_pass_through_cb(struct pci_dev * pdev,u16 alias,void * data)3806 static int context_setup_pass_through_cb(struct pci_dev *pdev, u16 alias, void *data)
3807 {
3808 struct device *dev = data;
3809
3810 return context_setup_pass_through(dev, PCI_BUS_NUM(alias), alias & 0xff);
3811 }
3812
device_setup_pass_through(struct device * dev)3813 static int device_setup_pass_through(struct device *dev)
3814 {
3815 struct device_domain_info *info = dev_iommu_priv_get(dev);
3816
3817 if (!dev_is_pci(dev))
3818 return context_setup_pass_through(dev, info->bus, info->devfn);
3819
3820 return pci_for_each_dma_alias(to_pci_dev(dev),
3821 context_setup_pass_through_cb, dev);
3822 }
3823
identity_domain_attach_dev(struct iommu_domain * domain,struct device * dev,struct iommu_domain * old)3824 static int identity_domain_attach_dev(struct iommu_domain *domain,
3825 struct device *dev,
3826 struct iommu_domain *old)
3827 {
3828 struct device_domain_info *info = dev_iommu_priv_get(dev);
3829 struct intel_iommu *iommu = info->iommu;
3830 int ret;
3831
3832 device_block_translation(dev);
3833
3834 if (dev_is_real_dma_subdevice(dev))
3835 return 0;
3836
3837 /*
3838 * No PRI support with the global identity domain. No need to enable or
3839 * disable PRI in this path as the iommu has been put in the blocking
3840 * state.
3841 */
3842 if (sm_supported(iommu))
3843 ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID);
3844 else
3845 ret = device_setup_pass_through(dev);
3846
3847 if (!ret)
3848 info->domain_attached = true;
3849
3850 return ret;
3851 }
3852
identity_domain_set_dev_pasid(struct iommu_domain * domain,struct device * dev,ioasid_t pasid,struct iommu_domain * old)3853 static int identity_domain_set_dev_pasid(struct iommu_domain *domain,
3854 struct device *dev, ioasid_t pasid,
3855 struct iommu_domain *old)
3856 {
3857 struct device_domain_info *info = dev_iommu_priv_get(dev);
3858 struct intel_iommu *iommu = info->iommu;
3859 int ret;
3860
3861 if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
3862 return -EOPNOTSUPP;
3863
3864 ret = iopf_for_domain_replace(domain, old, dev);
3865 if (ret)
3866 return ret;
3867
3868 ret = domain_setup_passthrough(iommu, dev, pasid, old);
3869 if (ret) {
3870 iopf_for_domain_replace(old, domain, dev);
3871 return ret;
3872 }
3873
3874 domain_remove_dev_pasid(old, dev, pasid);
3875 return 0;
3876 }
3877
3878 static struct iommu_domain identity_domain = {
3879 .type = IOMMU_DOMAIN_IDENTITY,
3880 .ops = &(const struct iommu_domain_ops) {
3881 .attach_dev = identity_domain_attach_dev,
3882 .set_dev_pasid = identity_domain_set_dev_pasid,
3883 },
3884 };
3885
3886 const struct iommu_domain_ops intel_fs_paging_domain_ops = {
3887 IOMMU_PT_DOMAIN_OPS(x86_64),
3888 .attach_dev = intel_iommu_attach_device,
3889 .set_dev_pasid = intel_iommu_set_dev_pasid,
3890 .iotlb_sync_map = intel_iommu_iotlb_sync_map,
3891 .flush_iotlb_all = intel_flush_iotlb_all,
3892 .iotlb_sync = intel_iommu_tlb_sync,
3893 .free = intel_iommu_domain_free,
3894 .enforce_cache_coherency = intel_iommu_enforce_cache_coherency_fs,
3895 };
3896
3897 const struct iommu_domain_ops intel_ss_paging_domain_ops = {
3898 IOMMU_PT_DOMAIN_OPS(vtdss),
3899 .attach_dev = intel_iommu_attach_device,
3900 .set_dev_pasid = intel_iommu_set_dev_pasid,
3901 .iotlb_sync_map = intel_iommu_iotlb_sync_map,
3902 .flush_iotlb_all = intel_flush_iotlb_all,
3903 .iotlb_sync = intel_iommu_tlb_sync,
3904 .free = intel_iommu_domain_free,
3905 .enforce_cache_coherency = intel_iommu_enforce_cache_coherency_ss,
3906 };
3907
3908 const struct iommu_ops intel_iommu_ops = {
3909 .blocked_domain = &blocking_domain,
3910 .release_domain = &blocking_domain,
3911 .identity_domain = &identity_domain,
3912 .capable = intel_iommu_capable,
3913 .hw_info = intel_iommu_hw_info,
3914 .domain_alloc_paging_flags = intel_iommu_domain_alloc_paging_flags,
3915 .domain_alloc_sva = intel_svm_domain_alloc,
3916 .domain_alloc_nested = intel_iommu_domain_alloc_nested,
3917 .probe_device = intel_iommu_probe_device,
3918 .probe_finalize = intel_iommu_probe_finalize,
3919 .release_device = intel_iommu_release_device,
3920 .get_resv_regions = intel_iommu_get_resv_regions,
3921 .device_group = intel_iommu_device_group,
3922 .is_attach_deferred = intel_iommu_is_attach_deferred,
3923 .def_domain_type = device_def_domain_type,
3924 .page_response = intel_iommu_page_response,
3925 };
3926
quirk_iommu_igfx(struct pci_dev * dev)3927 static void quirk_iommu_igfx(struct pci_dev *dev)
3928 {
3929 if (risky_device(dev))
3930 return;
3931
3932 pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
3933 disable_igfx_iommu = 1;
3934 }
3935
3936 /* G4x/GM45 integrated gfx dmar support is totally busted. */
3937 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
3938 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
3939 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx);
3940 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx);
3941 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
3942 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
3943 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
3944
3945 /* QM57/QS57 integrated gfx malfunctions with dmar */
3946 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_iommu_igfx);
3947
3948 /* Broadwell igfx malfunctions with dmar */
3949 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
3950 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
3951 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx);
3952 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx);
3953 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx);
3954 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx);
3955 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx);
3956 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx);
3957 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx);
3958 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx);
3959 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx);
3960 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx);
3961 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx);
3962 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx);
3963 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx);
3964 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx);
3965 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx);
3966 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx);
3967 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx);
3968 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx);
3969 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx);
3970 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
3971 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
3972 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
3973
quirk_iommu_rwbf(struct pci_dev * dev)3974 static void quirk_iommu_rwbf(struct pci_dev *dev)
3975 {
3976 if (risky_device(dev))
3977 return;
3978
3979 /*
3980 * Mobile 4 Series Chipset neglects to set RWBF capability,
3981 * but needs it. Same seems to hold for the desktop versions.
3982 */
3983 pci_info(dev, "Forcing write-buffer flush capability\n");
3984 rwbf_quirk = 1;
3985 }
3986
3987 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
3988 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
3989 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
3990 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
3991 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
3992 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
3993 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
3994
3995 #define GGC 0x52
3996 #define GGC_MEMORY_SIZE_MASK (0xf << 8)
3997 #define GGC_MEMORY_SIZE_NONE (0x0 << 8)
3998 #define GGC_MEMORY_SIZE_1M (0x1 << 8)
3999 #define GGC_MEMORY_SIZE_2M (0x3 << 8)
4000 #define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4001 #define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4002 #define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4003 #define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4004
quirk_calpella_no_shadow_gtt(struct pci_dev * dev)4005 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4006 {
4007 unsigned short ggc;
4008
4009 if (risky_device(dev))
4010 return;
4011
4012 if (pci_read_config_word(dev, GGC, &ggc))
4013 return;
4014
4015 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4016 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4017 disable_igfx_iommu = 1;
4018 } else if (!disable_igfx_iommu) {
4019 /* we have to ensure the gfx device is idle before we flush */
4020 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
4021 iommu_set_dma_strict();
4022 }
4023 }
4024 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4025 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4026 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4027
quirk_igfx_skip_te_disable(struct pci_dev * dev)4028 static void quirk_igfx_skip_te_disable(struct pci_dev *dev)
4029 {
4030 unsigned short ver;
4031
4032 if (!IS_GFX_DEVICE(dev))
4033 return;
4034
4035 ver = (dev->device >> 8) & 0xff;
4036 if (ver != 0x45 && ver != 0x46 && ver != 0x4c &&
4037 ver != 0x4e && ver != 0x8a && ver != 0x98 &&
4038 ver != 0x9a && ver != 0xa7 && ver != 0x7d)
4039 return;
4040
4041 if (risky_device(dev))
4042 return;
4043
4044 pci_info(dev, "Skip IOMMU disabling for graphics\n");
4045 iommu_skip_te_disable = 1;
4046 }
4047 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_igfx_skip_te_disable);
4048
4049 /* On Tylersburg chipsets, some BIOSes have been known to enable the
4050 ISOCH DMAR unit for the Azalia sound device, but not give it any
4051 TLB entries, which causes it to deadlock. Check for that. We do
4052 this in a function called from init_dmars(), instead of in a PCI
4053 quirk, because we don't want to print the obnoxious "BIOS broken"
4054 message if VT-d is actually disabled.
4055 */
check_tylersburg_isoch(void)4056 static void __init check_tylersburg_isoch(void)
4057 {
4058 struct pci_dev *pdev;
4059 uint32_t vtisochctrl;
4060
4061 /* If there's no Azalia in the system anyway, forget it. */
4062 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4063 if (!pdev)
4064 return;
4065
4066 if (risky_device(pdev)) {
4067 pci_dev_put(pdev);
4068 return;
4069 }
4070
4071 pci_dev_put(pdev);
4072
4073 /* System Management Registers. Might be hidden, in which case
4074 we can't do the sanity check. But that's OK, because the
4075 known-broken BIOSes _don't_ actually hide it, so far. */
4076 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4077 if (!pdev)
4078 return;
4079
4080 if (risky_device(pdev)) {
4081 pci_dev_put(pdev);
4082 return;
4083 }
4084
4085 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4086 pci_dev_put(pdev);
4087 return;
4088 }
4089
4090 pci_dev_put(pdev);
4091
4092 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4093 if (vtisochctrl & 1)
4094 return;
4095
4096 /* Drop all bits other than the number of TLB entries */
4097 vtisochctrl &= 0x1c;
4098
4099 /* If we have the recommended number of TLB entries (16), fine. */
4100 if (vtisochctrl == 0x10)
4101 return;
4102
4103 /* Zero TLB entries? You get to ride the short bus to school. */
4104 if (!vtisochctrl) {
4105 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4106 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4107 dmi_get_system_info(DMI_BIOS_VENDOR),
4108 dmi_get_system_info(DMI_BIOS_VERSION),
4109 dmi_get_system_info(DMI_PRODUCT_VERSION));
4110 iommu_identity_mapping |= IDENTMAP_AZALIA;
4111 return;
4112 }
4113
4114 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4115 vtisochctrl);
4116 }
4117
4118 /*
4119 * Here we deal with a device TLB defect where device may inadvertently issue ATS
4120 * invalidation completion before posted writes initiated with translated address
4121 * that utilized translations matching the invalidation address range, violating
4122 * the invalidation completion ordering.
4123 * Therefore, any use cases that cannot guarantee DMA is stopped before unmap is
4124 * vulnerable to this defect. In other words, any dTLB invalidation initiated not
4125 * under the control of the trusted/privileged host device driver must use this
4126 * quirk.
4127 * Device TLBs are invalidated under the following six conditions:
4128 * 1. Device driver does DMA API unmap IOVA
4129 * 2. Device driver unbind a PASID from a process, sva_unbind_device()
4130 * 3. PASID is torn down, after PASID cache is flushed. e.g. process
4131 * exit_mmap() due to crash
4132 * 4. Under SVA usage, called by mmu_notifier.invalidate_range() where
4133 * VM has to free pages that were unmapped
4134 * 5. Userspace driver unmaps a DMA buffer
4135 * 6. Cache invalidation in vSVA usage (upcoming)
4136 *
4137 * For #1 and #2, device drivers are responsible for stopping DMA traffic
4138 * before unmap/unbind. For #3, iommu driver gets mmu_notifier to
4139 * invalidate TLB the same way as normal user unmap which will use this quirk.
4140 * The dTLB invalidation after PASID cache flush does not need this quirk.
4141 *
4142 * As a reminder, #6 will *NEED* this quirk as we enable nested translation.
4143 */
quirk_extra_dev_tlb_flush(struct device_domain_info * info,unsigned long address,unsigned long mask,u32 pasid,u16 qdep)4144 void quirk_extra_dev_tlb_flush(struct device_domain_info *info,
4145 unsigned long address, unsigned long mask,
4146 u32 pasid, u16 qdep)
4147 {
4148 u16 sid;
4149
4150 if (likely(!info->dtlb_extra_inval))
4151 return;
4152
4153 sid = PCI_DEVID(info->bus, info->devfn);
4154 if (pasid == IOMMU_NO_PASID) {
4155 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
4156 qdep, address, mask);
4157 } else {
4158 qi_flush_dev_iotlb_pasid(info->iommu, sid, info->pfsid,
4159 pasid, qdep, address, mask);
4160 }
4161 }
4162
4163 #define ecmd_get_status_code(res) (((res) & 0xff) >> 1)
4164
4165 /*
4166 * Function to submit a command to the enhanced command interface. The
4167 * valid enhanced command descriptions are defined in Table 47 of the
4168 * VT-d spec. The VT-d hardware implementation may support some but not
4169 * all commands, which can be determined by checking the Enhanced
4170 * Command Capability Register.
4171 *
4172 * Return values:
4173 * - 0: Command successful without any error;
4174 * - Negative: software error value;
4175 * - Nonzero positive: failure status code defined in Table 48.
4176 */
ecmd_submit_sync(struct intel_iommu * iommu,u8 ecmd,u64 oa,u64 ob)4177 int ecmd_submit_sync(struct intel_iommu *iommu, u8 ecmd, u64 oa, u64 ob)
4178 {
4179 unsigned long flags;
4180 u64 res;
4181 int ret;
4182
4183 if (!cap_ecmds(iommu->cap))
4184 return -ENODEV;
4185
4186 raw_spin_lock_irqsave(&iommu->register_lock, flags);
4187
4188 res = dmar_readq(iommu->reg + DMAR_ECRSP_REG);
4189 if (res & DMA_ECMD_ECRSP_IP) {
4190 ret = -EBUSY;
4191 goto err;
4192 }
4193
4194 /*
4195 * Unconditionally write the operand B, because
4196 * - There is no side effect if an ecmd doesn't require an
4197 * operand B, but we set the register to some value.
4198 * - It's not invoked in any critical path. The extra MMIO
4199 * write doesn't bring any performance concerns.
4200 */
4201 dmar_writeq(iommu->reg + DMAR_ECEO_REG, ob);
4202 dmar_writeq(iommu->reg + DMAR_ECMD_REG, ecmd | (oa << DMA_ECMD_OA_SHIFT));
4203
4204 IOMMU_WAIT_OP(iommu, DMAR_ECRSP_REG, dmar_readq,
4205 !(res & DMA_ECMD_ECRSP_IP), res);
4206
4207 if (res & DMA_ECMD_ECRSP_IP) {
4208 ret = -ETIMEDOUT;
4209 goto err;
4210 }
4211
4212 ret = ecmd_get_status_code(res);
4213 err:
4214 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
4215
4216 return ret;
4217 }
4218
4219 MODULE_IMPORT_NS("GENERIC_PT_IOMMU");
4220