1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright © 2006-2014 Intel Corporation.
4 *
5 * Authors: David Woodhouse <dwmw2@infradead.org>,
6 * Ashok Raj <ashok.raj@intel.com>,
7 * Shaohua Li <shaohua.li@intel.com>,
8 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
9 * Fenghua Yu <fenghua.yu@intel.com>
10 * Joerg Roedel <jroedel@suse.de>
11 */
12
13 #define pr_fmt(fmt) "DMAR: " fmt
14 #define dev_fmt(fmt) pr_fmt(fmt)
15
16 #include <linux/crash_dump.h>
17 #include <linux/dma-direct.h>
18 #include <linux/dmi.h>
19 #include <linux/memory.h>
20 #include <linux/pci.h>
21 #include <linux/pci-ats.h>
22 #include <linux/spinlock.h>
23 #include <linux/syscore_ops.h>
24 #include <linux/tboot.h>
25 #include <uapi/linux/iommufd.h>
26
27 #include "iommu.h"
28 #include "../dma-iommu.h"
29 #include "../irq_remapping.h"
30 #include "../iommu-pages.h"
31 #include "pasid.h"
32 #include "perfmon.h"
33
34 #define ROOT_SIZE VTD_PAGE_SIZE
35 #define CONTEXT_SIZE VTD_PAGE_SIZE
36
37 #define IS_GFX_DEVICE(pdev) pci_is_display(pdev)
38 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
39 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
40 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
41
42 #define IOAPIC_RANGE_START (0xfee00000)
43 #define IOAPIC_RANGE_END (0xfeefffff)
44 #define IOVA_START_ADDR (0x1000)
45
46 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
47
48 static void __init check_tylersburg_isoch(void);
49 static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain,
50 bool enable);
51 static int rwbf_quirk;
52
53 #define rwbf_required(iommu) (rwbf_quirk || cap_rwbf((iommu)->cap))
54
55 /*
56 * set to 1 to panic kernel if can't successfully enable VT-d
57 * (used when kernel is launched w/ TXT)
58 */
59 static int force_on = 0;
60 static int intel_iommu_tboot_noforce;
61 static int no_platform_optin;
62
63 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
64
65 /*
66 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
67 * if marked present.
68 */
root_entry_lctp(struct root_entry * re)69 static phys_addr_t root_entry_lctp(struct root_entry *re)
70 {
71 if (!(re->lo & 1))
72 return 0;
73
74 return re->lo & VTD_PAGE_MASK;
75 }
76
77 /*
78 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
79 * if marked present.
80 */
root_entry_uctp(struct root_entry * re)81 static phys_addr_t root_entry_uctp(struct root_entry *re)
82 {
83 if (!(re->hi & 1))
84 return 0;
85
86 return re->hi & VTD_PAGE_MASK;
87 }
88
device_rid_cmp_key(const void * key,const struct rb_node * node)89 static int device_rid_cmp_key(const void *key, const struct rb_node *node)
90 {
91 struct device_domain_info *info =
92 rb_entry(node, struct device_domain_info, node);
93 const u16 *rid_lhs = key;
94
95 if (*rid_lhs < PCI_DEVID(info->bus, info->devfn))
96 return -1;
97
98 if (*rid_lhs > PCI_DEVID(info->bus, info->devfn))
99 return 1;
100
101 return 0;
102 }
103
device_rid_cmp(struct rb_node * lhs,const struct rb_node * rhs)104 static int device_rid_cmp(struct rb_node *lhs, const struct rb_node *rhs)
105 {
106 struct device_domain_info *info =
107 rb_entry(lhs, struct device_domain_info, node);
108 u16 key = PCI_DEVID(info->bus, info->devfn);
109
110 return device_rid_cmp_key(&key, rhs);
111 }
112
113 /*
114 * Looks up an IOMMU-probed device using its source ID.
115 *
116 * Returns the pointer to the device if there is a match. Otherwise,
117 * returns NULL.
118 *
119 * Note that this helper doesn't guarantee that the device won't be
120 * released by the iommu subsystem after being returned. The caller
121 * should use its own synchronization mechanism to avoid the device
122 * being released during its use if its possibly the case.
123 */
device_rbtree_find(struct intel_iommu * iommu,u16 rid)124 struct device *device_rbtree_find(struct intel_iommu *iommu, u16 rid)
125 {
126 struct device_domain_info *info = NULL;
127 struct rb_node *node;
128 unsigned long flags;
129
130 spin_lock_irqsave(&iommu->device_rbtree_lock, flags);
131 node = rb_find(&rid, &iommu->device_rbtree, device_rid_cmp_key);
132 if (node)
133 info = rb_entry(node, struct device_domain_info, node);
134 spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
135
136 return info ? info->dev : NULL;
137 }
138
device_rbtree_insert(struct intel_iommu * iommu,struct device_domain_info * info)139 static int device_rbtree_insert(struct intel_iommu *iommu,
140 struct device_domain_info *info)
141 {
142 struct rb_node *curr;
143 unsigned long flags;
144
145 spin_lock_irqsave(&iommu->device_rbtree_lock, flags);
146 curr = rb_find_add(&info->node, &iommu->device_rbtree, device_rid_cmp);
147 spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
148 if (WARN_ON(curr))
149 return -EEXIST;
150
151 return 0;
152 }
153
device_rbtree_remove(struct device_domain_info * info)154 static void device_rbtree_remove(struct device_domain_info *info)
155 {
156 struct intel_iommu *iommu = info->iommu;
157 unsigned long flags;
158
159 spin_lock_irqsave(&iommu->device_rbtree_lock, flags);
160 rb_erase(&info->node, &iommu->device_rbtree);
161 spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
162 }
163
164 struct dmar_rmrr_unit {
165 struct list_head list; /* list of rmrr units */
166 struct acpi_dmar_header *hdr; /* ACPI header */
167 u64 base_address; /* reserved base address*/
168 u64 end_address; /* reserved end address */
169 struct dmar_dev_scope *devices; /* target devices */
170 int devices_cnt; /* target device count */
171 };
172
173 struct dmar_atsr_unit {
174 struct list_head list; /* list of ATSR units */
175 struct acpi_dmar_header *hdr; /* ACPI header */
176 struct dmar_dev_scope *devices; /* target devices */
177 int devices_cnt; /* target device count */
178 u8 include_all:1; /* include all ports */
179 };
180
181 struct dmar_satc_unit {
182 struct list_head list; /* list of SATC units */
183 struct acpi_dmar_header *hdr; /* ACPI header */
184 struct dmar_dev_scope *devices; /* target devices */
185 struct intel_iommu *iommu; /* the corresponding iommu */
186 int devices_cnt; /* target device count */
187 u8 atc_required:1; /* ATS is required */
188 };
189
190 static LIST_HEAD(dmar_atsr_units);
191 static LIST_HEAD(dmar_rmrr_units);
192 static LIST_HEAD(dmar_satc_units);
193
194 #define for_each_rmrr_units(rmrr) \
195 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
196
197 static void intel_iommu_domain_free(struct iommu_domain *domain);
198
199 int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON);
200 int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON);
201
202 int intel_iommu_enabled = 0;
203 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
204
205 static int intel_iommu_superpage = 1;
206 static int iommu_identity_mapping;
207 static int iommu_skip_te_disable;
208 static int disable_igfx_iommu;
209
210 #define IDENTMAP_AZALIA 4
211
212 const struct iommu_ops intel_iommu_ops;
213
translation_pre_enabled(struct intel_iommu * iommu)214 static bool translation_pre_enabled(struct intel_iommu *iommu)
215 {
216 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
217 }
218
clear_translation_pre_enabled(struct intel_iommu * iommu)219 static void clear_translation_pre_enabled(struct intel_iommu *iommu)
220 {
221 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
222 }
223
init_translation_status(struct intel_iommu * iommu)224 static void init_translation_status(struct intel_iommu *iommu)
225 {
226 u32 gsts;
227
228 gsts = readl(iommu->reg + DMAR_GSTS_REG);
229 if (gsts & DMA_GSTS_TES)
230 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
231 }
232
intel_iommu_setup(char * str)233 static int __init intel_iommu_setup(char *str)
234 {
235 if (!str)
236 return -EINVAL;
237
238 while (*str) {
239 if (!strncmp(str, "on", 2)) {
240 dmar_disabled = 0;
241 pr_info("IOMMU enabled\n");
242 } else if (!strncmp(str, "off", 3)) {
243 dmar_disabled = 1;
244 no_platform_optin = 1;
245 pr_info("IOMMU disabled\n");
246 } else if (!strncmp(str, "igfx_off", 8)) {
247 disable_igfx_iommu = 1;
248 pr_info("Disable GFX device mapping\n");
249 } else if (!strncmp(str, "forcedac", 8)) {
250 pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n");
251 iommu_dma_forcedac = true;
252 } else if (!strncmp(str, "strict", 6)) {
253 pr_warn("intel_iommu=strict deprecated; use iommu.strict=1 instead\n");
254 iommu_set_dma_strict();
255 } else if (!strncmp(str, "sp_off", 6)) {
256 pr_info("Disable supported super page\n");
257 intel_iommu_superpage = 0;
258 } else if (!strncmp(str, "sm_on", 5)) {
259 pr_info("Enable scalable mode if hardware supports\n");
260 intel_iommu_sm = 1;
261 } else if (!strncmp(str, "sm_off", 6)) {
262 pr_info("Scalable mode is disallowed\n");
263 intel_iommu_sm = 0;
264 } else if (!strncmp(str, "tboot_noforce", 13)) {
265 pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
266 intel_iommu_tboot_noforce = 1;
267 } else {
268 pr_notice("Unknown option - '%s'\n", str);
269 }
270
271 str += strcspn(str, ",");
272 while (*str == ',')
273 str++;
274 }
275
276 return 1;
277 }
278 __setup("intel_iommu=", intel_iommu_setup);
279
280 /*
281 * Calculate the Supported Adjusted Guest Address Widths of an IOMMU.
282 * Refer to 11.4.2 of the VT-d spec for the encoding of each bit of
283 * the returned SAGAW.
284 */
__iommu_calculate_sagaw(struct intel_iommu * iommu)285 static unsigned long __iommu_calculate_sagaw(struct intel_iommu *iommu)
286 {
287 unsigned long fl_sagaw, sl_sagaw;
288
289 fl_sagaw = BIT(2) | (cap_fl5lp_support(iommu->cap) ? BIT(3) : 0);
290 sl_sagaw = cap_sagaw(iommu->cap);
291
292 /* Second level only. */
293 if (!sm_supported(iommu) || !ecap_flts(iommu->ecap))
294 return sl_sagaw;
295
296 /* First level only. */
297 if (!ecap_slts(iommu->ecap))
298 return fl_sagaw;
299
300 return fl_sagaw & sl_sagaw;
301 }
302
__iommu_calculate_agaw(struct intel_iommu * iommu,int max_gaw)303 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
304 {
305 unsigned long sagaw;
306 int agaw;
307
308 sagaw = __iommu_calculate_sagaw(iommu);
309 for (agaw = width_to_agaw(max_gaw); agaw >= 0; agaw--) {
310 if (test_bit(agaw, &sagaw))
311 break;
312 }
313
314 return agaw;
315 }
316
317 /*
318 * Calculate max SAGAW for each iommu.
319 */
iommu_calculate_max_sagaw(struct intel_iommu * iommu)320 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
321 {
322 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
323 }
324
325 /*
326 * calculate agaw for each iommu.
327 * "SAGAW" may be different across iommus, use a default agaw, and
328 * get a supported less agaw for iommus that don't support the default agaw.
329 */
iommu_calculate_agaw(struct intel_iommu * iommu)330 int iommu_calculate_agaw(struct intel_iommu *iommu)
331 {
332 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
333 }
334
iommu_paging_structure_coherency(struct intel_iommu * iommu)335 static bool iommu_paging_structure_coherency(struct intel_iommu *iommu)
336 {
337 return sm_supported(iommu) ?
338 ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap);
339 }
340
iommu_context_addr(struct intel_iommu * iommu,u8 bus,u8 devfn,int alloc)341 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
342 u8 devfn, int alloc)
343 {
344 struct root_entry *root = &iommu->root_entry[bus];
345 struct context_entry *context;
346 u64 *entry;
347
348 /*
349 * Except that the caller requested to allocate a new entry,
350 * returning a copied context entry makes no sense.
351 */
352 if (!alloc && context_copied(iommu, bus, devfn))
353 return NULL;
354
355 entry = &root->lo;
356 if (sm_supported(iommu)) {
357 if (devfn >= 0x80) {
358 devfn -= 0x80;
359 entry = &root->hi;
360 }
361 devfn *= 2;
362 }
363 if (*entry & 1)
364 context = phys_to_virt(*entry & VTD_PAGE_MASK);
365 else {
366 unsigned long phy_addr;
367 if (!alloc)
368 return NULL;
369
370 context = iommu_alloc_pages_node_sz(iommu->node, GFP_ATOMIC,
371 SZ_4K);
372 if (!context)
373 return NULL;
374
375 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
376 phy_addr = virt_to_phys((void *)context);
377 *entry = phy_addr | 1;
378 __iommu_flush_cache(iommu, entry, sizeof(*entry));
379 }
380 return &context[devfn];
381 }
382
383 /**
384 * is_downstream_to_pci_bridge - test if a device belongs to the PCI
385 * sub-hierarchy of a candidate PCI-PCI bridge
386 * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
387 * @bridge: the candidate PCI-PCI bridge
388 *
389 * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
390 */
391 static bool
is_downstream_to_pci_bridge(struct device * dev,struct device * bridge)392 is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
393 {
394 struct pci_dev *pdev, *pbridge;
395
396 if (!dev_is_pci(dev) || !dev_is_pci(bridge))
397 return false;
398
399 pdev = to_pci_dev(dev);
400 pbridge = to_pci_dev(bridge);
401
402 if (pbridge->subordinate &&
403 pbridge->subordinate->number <= pdev->bus->number &&
404 pbridge->subordinate->busn_res.end >= pdev->bus->number)
405 return true;
406
407 return false;
408 }
409
quirk_ioat_snb_local_iommu(struct pci_dev * pdev)410 static bool quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
411 {
412 struct dmar_drhd_unit *drhd;
413 u32 vtbar;
414 int rc;
415
416 /* We know that this device on this chipset has its own IOMMU.
417 * If we find it under a different IOMMU, then the BIOS is lying
418 * to us. Hope that the IOMMU for this device is actually
419 * disabled, and it needs no translation...
420 */
421 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
422 if (rc) {
423 /* "can't" happen */
424 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
425 return false;
426 }
427 vtbar &= 0xffff0000;
428
429 /* we know that the this iommu should be at offset 0xa000 from vtbar */
430 drhd = dmar_find_matched_drhd_unit(pdev);
431 if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
432 pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
433 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
434 return true;
435 }
436
437 return false;
438 }
439
iommu_is_dummy(struct intel_iommu * iommu,struct device * dev)440 static bool iommu_is_dummy(struct intel_iommu *iommu, struct device *dev)
441 {
442 if (!iommu || iommu->drhd->ignored)
443 return true;
444
445 if (dev_is_pci(dev)) {
446 struct pci_dev *pdev = to_pci_dev(dev);
447
448 if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
449 pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SNB &&
450 quirk_ioat_snb_local_iommu(pdev))
451 return true;
452 }
453
454 return false;
455 }
456
device_lookup_iommu(struct device * dev,u8 * bus,u8 * devfn)457 static struct intel_iommu *device_lookup_iommu(struct device *dev, u8 *bus, u8 *devfn)
458 {
459 struct dmar_drhd_unit *drhd = NULL;
460 struct pci_dev *pdev = NULL;
461 struct intel_iommu *iommu;
462 struct device *tmp;
463 u16 segment = 0;
464 int i;
465
466 if (!dev)
467 return NULL;
468
469 if (dev_is_pci(dev)) {
470 struct pci_dev *pf_pdev;
471
472 pdev = pci_real_dma_dev(to_pci_dev(dev));
473
474 /* VFs aren't listed in scope tables; we need to look up
475 * the PF instead to find the IOMMU. */
476 pf_pdev = pci_physfn(pdev);
477 dev = &pf_pdev->dev;
478 segment = pci_domain_nr(pdev->bus);
479 } else if (has_acpi_companion(dev))
480 dev = &ACPI_COMPANION(dev)->dev;
481
482 rcu_read_lock();
483 for_each_iommu(iommu, drhd) {
484 if (pdev && segment != drhd->segment)
485 continue;
486
487 for_each_active_dev_scope(drhd->devices,
488 drhd->devices_cnt, i, tmp) {
489 if (tmp == dev) {
490 /* For a VF use its original BDF# not that of the PF
491 * which we used for the IOMMU lookup. Strictly speaking
492 * we could do this for all PCI devices; we only need to
493 * get the BDF# from the scope table for ACPI matches. */
494 if (pdev && pdev->is_virtfn)
495 goto got_pdev;
496
497 if (bus && devfn) {
498 *bus = drhd->devices[i].bus;
499 *devfn = drhd->devices[i].devfn;
500 }
501 goto out;
502 }
503
504 if (is_downstream_to_pci_bridge(dev, tmp))
505 goto got_pdev;
506 }
507
508 if (pdev && drhd->include_all) {
509 got_pdev:
510 if (bus && devfn) {
511 *bus = pdev->bus->number;
512 *devfn = pdev->devfn;
513 }
514 goto out;
515 }
516 }
517 iommu = NULL;
518 out:
519 if (iommu_is_dummy(iommu, dev))
520 iommu = NULL;
521
522 rcu_read_unlock();
523
524 return iommu;
525 }
526
free_context_table(struct intel_iommu * iommu)527 static void free_context_table(struct intel_iommu *iommu)
528 {
529 struct context_entry *context;
530 int i;
531
532 if (!iommu->root_entry)
533 return;
534
535 for (i = 0; i < ROOT_ENTRY_NR; i++) {
536 context = iommu_context_addr(iommu, i, 0, 0);
537 if (context)
538 iommu_free_pages(context);
539
540 if (!sm_supported(iommu))
541 continue;
542
543 context = iommu_context_addr(iommu, i, 0x80, 0);
544 if (context)
545 iommu_free_pages(context);
546 }
547
548 iommu_free_pages(iommu->root_entry);
549 iommu->root_entry = NULL;
550 }
551
552 #ifdef CONFIG_DMAR_DEBUG
pgtable_walk(struct intel_iommu * iommu,unsigned long pfn,u8 bus,u8 devfn,struct dma_pte * parent,int level)553 static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn,
554 u8 bus, u8 devfn, struct dma_pte *parent, int level)
555 {
556 struct dma_pte *pte;
557 int offset;
558
559 while (1) {
560 offset = pfn_level_offset(pfn, level);
561 pte = &parent[offset];
562
563 pr_info("pte level: %d, pte value: 0x%016llx\n", level, pte->val);
564
565 if (!dma_pte_present(pte)) {
566 pr_info("page table not present at level %d\n", level - 1);
567 break;
568 }
569
570 if (level == 1 || dma_pte_superpage(pte))
571 break;
572
573 parent = phys_to_virt(dma_pte_addr(pte));
574 level--;
575 }
576 }
577
dmar_fault_dump_ptes(struct intel_iommu * iommu,u16 source_id,unsigned long long addr,u32 pasid)578 void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
579 unsigned long long addr, u32 pasid)
580 {
581 struct pasid_dir_entry *dir, *pde;
582 struct pasid_entry *entries, *pte;
583 struct context_entry *ctx_entry;
584 struct root_entry *rt_entry;
585 int i, dir_index, index, level;
586 u8 devfn = source_id & 0xff;
587 u8 bus = source_id >> 8;
588 struct dma_pte *pgtable;
589
590 pr_info("Dump %s table entries for IOVA 0x%llx\n", iommu->name, addr);
591
592 /* root entry dump */
593 if (!iommu->root_entry) {
594 pr_info("root table is not present\n");
595 return;
596 }
597 rt_entry = &iommu->root_entry[bus];
598
599 if (sm_supported(iommu))
600 pr_info("scalable mode root entry: hi 0x%016llx, low 0x%016llx\n",
601 rt_entry->hi, rt_entry->lo);
602 else
603 pr_info("root entry: 0x%016llx", rt_entry->lo);
604
605 /* context entry dump */
606 ctx_entry = iommu_context_addr(iommu, bus, devfn, 0);
607 if (!ctx_entry) {
608 pr_info("context table is not present\n");
609 return;
610 }
611
612 pr_info("context entry: hi 0x%016llx, low 0x%016llx\n",
613 ctx_entry->hi, ctx_entry->lo);
614
615 /* legacy mode does not require PASID entries */
616 if (!sm_supported(iommu)) {
617 if (!context_present(ctx_entry)) {
618 pr_info("legacy mode page table is not present\n");
619 return;
620 }
621 level = agaw_to_level(ctx_entry->hi & 7);
622 pgtable = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
623 goto pgtable_walk;
624 }
625
626 if (!context_present(ctx_entry)) {
627 pr_info("pasid directory table is not present\n");
628 return;
629 }
630
631 /* get the pointer to pasid directory entry */
632 dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
633
634 /* For request-without-pasid, get the pasid from context entry */
635 if (intel_iommu_sm && pasid == IOMMU_PASID_INVALID)
636 pasid = IOMMU_NO_PASID;
637
638 dir_index = pasid >> PASID_PDE_SHIFT;
639 pde = &dir[dir_index];
640 pr_info("pasid dir entry: 0x%016llx\n", pde->val);
641
642 /* get the pointer to the pasid table entry */
643 entries = get_pasid_table_from_pde(pde);
644 if (!entries) {
645 pr_info("pasid table is not present\n");
646 return;
647 }
648 index = pasid & PASID_PTE_MASK;
649 pte = &entries[index];
650 for (i = 0; i < ARRAY_SIZE(pte->val); i++)
651 pr_info("pasid table entry[%d]: 0x%016llx\n", i, pte->val[i]);
652
653 if (!pasid_pte_is_present(pte)) {
654 pr_info("scalable mode page table is not present\n");
655 return;
656 }
657
658 if (pasid_pte_get_pgtt(pte) == PASID_ENTRY_PGTT_FL_ONLY) {
659 level = pte->val[2] & BIT_ULL(2) ? 5 : 4;
660 pgtable = phys_to_virt(pte->val[2] & VTD_PAGE_MASK);
661 } else {
662 level = agaw_to_level((pte->val[0] >> 2) & 0x7);
663 pgtable = phys_to_virt(pte->val[0] & VTD_PAGE_MASK);
664 }
665
666 pgtable_walk:
667 pgtable_walk(iommu, addr >> VTD_PAGE_SHIFT, bus, devfn, pgtable, level);
668 }
669 #endif
670
671 /* iommu handling */
iommu_alloc_root_entry(struct intel_iommu * iommu)672 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
673 {
674 struct root_entry *root;
675
676 root = iommu_alloc_pages_node_sz(iommu->node, GFP_ATOMIC, SZ_4K);
677 if (!root) {
678 pr_err("Allocating root entry for %s failed\n",
679 iommu->name);
680 return -ENOMEM;
681 }
682
683 __iommu_flush_cache(iommu, root, ROOT_SIZE);
684 iommu->root_entry = root;
685
686 return 0;
687 }
688
iommu_set_root_entry(struct intel_iommu * iommu)689 static void iommu_set_root_entry(struct intel_iommu *iommu)
690 {
691 u64 addr;
692 u32 sts;
693 unsigned long flag;
694
695 addr = virt_to_phys(iommu->root_entry);
696 if (sm_supported(iommu))
697 addr |= DMA_RTADDR_SMT;
698
699 raw_spin_lock_irqsave(&iommu->register_lock, flag);
700 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
701
702 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
703
704 /* Make sure hardware complete it */
705 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
706 readl, (sts & DMA_GSTS_RTPS), sts);
707
708 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
709
710 /*
711 * Hardware invalidates all DMA remapping hardware translation
712 * caches as part of SRTP flow.
713 */
714 if (cap_esrtps(iommu->cap))
715 return;
716
717 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
718 if (sm_supported(iommu))
719 qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
720 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
721 }
722
iommu_flush_write_buffer(struct intel_iommu * iommu)723 void iommu_flush_write_buffer(struct intel_iommu *iommu)
724 {
725 u32 val;
726 unsigned long flag;
727
728 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
729 return;
730
731 raw_spin_lock_irqsave(&iommu->register_lock, flag);
732 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
733
734 /* Make sure hardware complete it */
735 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
736 readl, (!(val & DMA_GSTS_WBFS)), val);
737
738 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
739 }
740
741 /* return value determine if we need a write buffer flush */
__iommu_flush_context(struct intel_iommu * iommu,u16 did,u16 source_id,u8 function_mask,u64 type)742 static void __iommu_flush_context(struct intel_iommu *iommu,
743 u16 did, u16 source_id, u8 function_mask,
744 u64 type)
745 {
746 u64 val = 0;
747 unsigned long flag;
748
749 switch (type) {
750 case DMA_CCMD_GLOBAL_INVL:
751 val = DMA_CCMD_GLOBAL_INVL;
752 break;
753 case DMA_CCMD_DOMAIN_INVL:
754 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
755 break;
756 case DMA_CCMD_DEVICE_INVL:
757 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
758 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
759 break;
760 default:
761 pr_warn("%s: Unexpected context-cache invalidation type 0x%llx\n",
762 iommu->name, type);
763 return;
764 }
765 val |= DMA_CCMD_ICC;
766
767 raw_spin_lock_irqsave(&iommu->register_lock, flag);
768 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
769
770 /* Make sure hardware complete it */
771 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
772 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
773
774 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
775 }
776
__iommu_flush_iotlb(struct intel_iommu * iommu,u16 did,u64 addr,unsigned int size_order,u64 type)777 void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
778 unsigned int size_order, u64 type)
779 {
780 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
781 u64 val = 0, val_iva = 0;
782 unsigned long flag;
783
784 switch (type) {
785 case DMA_TLB_GLOBAL_FLUSH:
786 /* global flush doesn't need set IVA_REG */
787 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
788 break;
789 case DMA_TLB_DSI_FLUSH:
790 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
791 break;
792 case DMA_TLB_PSI_FLUSH:
793 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
794 /* IH bit is passed in as part of address */
795 val_iva = size_order | addr;
796 break;
797 default:
798 pr_warn("%s: Unexpected iotlb invalidation type 0x%llx\n",
799 iommu->name, type);
800 return;
801 }
802
803 if (cap_write_drain(iommu->cap))
804 val |= DMA_TLB_WRITE_DRAIN;
805
806 raw_spin_lock_irqsave(&iommu->register_lock, flag);
807 /* Note: Only uses first TLB reg currently */
808 if (val_iva)
809 dmar_writeq(iommu->reg + tlb_offset, val_iva);
810 dmar_writeq(iommu->reg + tlb_offset + 8, val);
811
812 /* Make sure hardware complete it */
813 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
814 dmar_readq, (!(val & DMA_TLB_IVT)), val);
815
816 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
817
818 /* check IOTLB invalidation granularity */
819 if (DMA_TLB_IAIG(val) == 0)
820 pr_err("Flush IOTLB failed\n");
821 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
822 pr_debug("TLB flush request %Lx, actual %Lx\n",
823 (unsigned long long)DMA_TLB_IIRG(type),
824 (unsigned long long)DMA_TLB_IAIG(val));
825 }
826
827 static struct device_domain_info *
domain_lookup_dev_info(struct dmar_domain * domain,struct intel_iommu * iommu,u8 bus,u8 devfn)828 domain_lookup_dev_info(struct dmar_domain *domain,
829 struct intel_iommu *iommu, u8 bus, u8 devfn)
830 {
831 struct device_domain_info *info;
832 unsigned long flags;
833
834 spin_lock_irqsave(&domain->lock, flags);
835 list_for_each_entry(info, &domain->devices, link) {
836 if (info->iommu == iommu && info->bus == bus &&
837 info->devfn == devfn) {
838 spin_unlock_irqrestore(&domain->lock, flags);
839 return info;
840 }
841 }
842 spin_unlock_irqrestore(&domain->lock, flags);
843
844 return NULL;
845 }
846
847 /*
848 * The extra devTLB flush quirk impacts those QAT devices with PCI device
849 * IDs ranging from 0x4940 to 0x4943. It is exempted from risky_device()
850 * check because it applies only to the built-in QAT devices and it doesn't
851 * grant additional privileges.
852 */
853 #define BUGGY_QAT_DEVID_MASK 0x4940
dev_needs_extra_dtlb_flush(struct pci_dev * pdev)854 static bool dev_needs_extra_dtlb_flush(struct pci_dev *pdev)
855 {
856 if (pdev->vendor != PCI_VENDOR_ID_INTEL)
857 return false;
858
859 if ((pdev->device & 0xfffc) != BUGGY_QAT_DEVID_MASK)
860 return false;
861
862 return true;
863 }
864
iommu_enable_pci_ats(struct device_domain_info * info)865 static void iommu_enable_pci_ats(struct device_domain_info *info)
866 {
867 struct pci_dev *pdev;
868
869 if (!info->ats_supported)
870 return;
871
872 pdev = to_pci_dev(info->dev);
873 if (!pci_ats_page_aligned(pdev))
874 return;
875
876 if (!pci_enable_ats(pdev, VTD_PAGE_SHIFT))
877 info->ats_enabled = 1;
878 }
879
iommu_disable_pci_ats(struct device_domain_info * info)880 static void iommu_disable_pci_ats(struct device_domain_info *info)
881 {
882 if (!info->ats_enabled)
883 return;
884
885 pci_disable_ats(to_pci_dev(info->dev));
886 info->ats_enabled = 0;
887 }
888
iommu_enable_pci_pri(struct device_domain_info * info)889 static void iommu_enable_pci_pri(struct device_domain_info *info)
890 {
891 struct pci_dev *pdev;
892
893 if (!info->ats_enabled || !info->pri_supported)
894 return;
895
896 pdev = to_pci_dev(info->dev);
897 /* PASID is required in PRG Response Message. */
898 if (info->pasid_enabled && !pci_prg_resp_pasid_required(pdev))
899 return;
900
901 if (pci_reset_pri(pdev))
902 return;
903
904 if (!pci_enable_pri(pdev, PRQ_DEPTH))
905 info->pri_enabled = 1;
906 }
907
iommu_disable_pci_pri(struct device_domain_info * info)908 static void iommu_disable_pci_pri(struct device_domain_info *info)
909 {
910 if (!info->pri_enabled)
911 return;
912
913 if (WARN_ON(info->iopf_refcount))
914 iopf_queue_remove_device(info->iommu->iopf_queue, info->dev);
915
916 pci_disable_pri(to_pci_dev(info->dev));
917 info->pri_enabled = 0;
918 }
919
intel_flush_iotlb_all(struct iommu_domain * domain)920 static void intel_flush_iotlb_all(struct iommu_domain *domain)
921 {
922 cache_tag_flush_all(to_dmar_domain(domain));
923 }
924
iommu_disable_protect_mem_regions(struct intel_iommu * iommu)925 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
926 {
927 u32 pmen;
928 unsigned long flags;
929
930 if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
931 return;
932
933 raw_spin_lock_irqsave(&iommu->register_lock, flags);
934 pmen = readl(iommu->reg + DMAR_PMEN_REG);
935 pmen &= ~DMA_PMEN_EPM;
936 writel(pmen, iommu->reg + DMAR_PMEN_REG);
937
938 /* wait for the protected region status bit to clear */
939 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
940 readl, !(pmen & DMA_PMEN_PRS), pmen);
941
942 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
943 }
944
iommu_enable_translation(struct intel_iommu * iommu)945 static void iommu_enable_translation(struct intel_iommu *iommu)
946 {
947 u32 sts;
948 unsigned long flags;
949
950 raw_spin_lock_irqsave(&iommu->register_lock, flags);
951 iommu->gcmd |= DMA_GCMD_TE;
952 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
953
954 /* Make sure hardware complete it */
955 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
956 readl, (sts & DMA_GSTS_TES), sts);
957
958 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
959 }
960
iommu_disable_translation(struct intel_iommu * iommu)961 static void iommu_disable_translation(struct intel_iommu *iommu)
962 {
963 u32 sts;
964 unsigned long flag;
965
966 if (iommu_skip_te_disable && iommu->drhd->gfx_dedicated &&
967 (cap_read_drain(iommu->cap) || cap_write_drain(iommu->cap)))
968 return;
969
970 raw_spin_lock_irqsave(&iommu->register_lock, flag);
971 iommu->gcmd &= ~DMA_GCMD_TE;
972 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
973
974 /* Make sure hardware complete it */
975 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
976 readl, (!(sts & DMA_GSTS_TES)), sts);
977
978 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
979 }
980
disable_dmar_iommu(struct intel_iommu * iommu)981 static void disable_dmar_iommu(struct intel_iommu *iommu)
982 {
983 /*
984 * All iommu domains must have been detached from the devices,
985 * hence there should be no domain IDs in use.
986 */
987 if (WARN_ON(!ida_is_empty(&iommu->domain_ida)))
988 return;
989
990 if (iommu->gcmd & DMA_GCMD_TE)
991 iommu_disable_translation(iommu);
992 }
993
free_dmar_iommu(struct intel_iommu * iommu)994 static void free_dmar_iommu(struct intel_iommu *iommu)
995 {
996 if (iommu->copied_tables) {
997 bitmap_free(iommu->copied_tables);
998 iommu->copied_tables = NULL;
999 }
1000
1001 /* free context mapping */
1002 free_context_table(iommu);
1003
1004 if (ecap_prs(iommu->ecap))
1005 intel_iommu_finish_prq(iommu);
1006 }
1007
1008 /*
1009 * Check and return whether first level is used by default for
1010 * DMA translation.
1011 */
first_level_by_default(struct intel_iommu * iommu)1012 static bool first_level_by_default(struct intel_iommu *iommu)
1013 {
1014 /* Only SL is available in legacy mode */
1015 if (!sm_supported(iommu))
1016 return false;
1017
1018 /* Only level (either FL or SL) is available, just use it */
1019 if (ecap_flts(iommu->ecap) ^ ecap_slts(iommu->ecap))
1020 return ecap_flts(iommu->ecap);
1021
1022 return true;
1023 }
1024
domain_attach_iommu(struct dmar_domain * domain,struct intel_iommu * iommu)1025 int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
1026 {
1027 struct iommu_domain_info *info, *curr;
1028 int num, ret = -ENOSPC;
1029
1030 if (domain->domain.type == IOMMU_DOMAIN_SVA)
1031 return 0;
1032
1033 info = kzalloc(sizeof(*info), GFP_KERNEL);
1034 if (!info)
1035 return -ENOMEM;
1036
1037 guard(mutex)(&iommu->did_lock);
1038 curr = xa_load(&domain->iommu_array, iommu->seq_id);
1039 if (curr) {
1040 curr->refcnt++;
1041 kfree(info);
1042 return 0;
1043 }
1044
1045 num = ida_alloc_range(&iommu->domain_ida, IDA_START_DID,
1046 cap_ndoms(iommu->cap) - 1, GFP_KERNEL);
1047 if (num < 0) {
1048 pr_err("%s: No free domain ids\n", iommu->name);
1049 goto err_unlock;
1050 }
1051
1052 info->refcnt = 1;
1053 info->did = num;
1054 info->iommu = iommu;
1055 curr = xa_cmpxchg(&domain->iommu_array, iommu->seq_id,
1056 NULL, info, GFP_KERNEL);
1057 if (curr) {
1058 ret = xa_err(curr) ? : -EBUSY;
1059 goto err_clear;
1060 }
1061
1062 return 0;
1063
1064 err_clear:
1065 ida_free(&iommu->domain_ida, info->did);
1066 err_unlock:
1067 kfree(info);
1068 return ret;
1069 }
1070
domain_detach_iommu(struct dmar_domain * domain,struct intel_iommu * iommu)1071 void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
1072 {
1073 struct iommu_domain_info *info;
1074
1075 if (domain->domain.type == IOMMU_DOMAIN_SVA)
1076 return;
1077
1078 guard(mutex)(&iommu->did_lock);
1079 info = xa_load(&domain->iommu_array, iommu->seq_id);
1080 if (--info->refcnt == 0) {
1081 ida_free(&iommu->domain_ida, info->did);
1082 xa_erase(&domain->iommu_array, iommu->seq_id);
1083 kfree(info);
1084 }
1085 }
1086
1087 /*
1088 * For kdump cases, old valid entries may be cached due to the
1089 * in-flight DMA and copied pgtable, but there is no unmapping
1090 * behaviour for them, thus we need an explicit cache flush for
1091 * the newly-mapped device. For kdump, at this point, the device
1092 * is supposed to finish reset at its driver probe stage, so no
1093 * in-flight DMA will exist, and we don't need to worry anymore
1094 * hereafter.
1095 */
copied_context_tear_down(struct intel_iommu * iommu,struct context_entry * context,u8 bus,u8 devfn)1096 static void copied_context_tear_down(struct intel_iommu *iommu,
1097 struct context_entry *context,
1098 u8 bus, u8 devfn)
1099 {
1100 u16 did_old;
1101
1102 if (!context_copied(iommu, bus, devfn))
1103 return;
1104
1105 assert_spin_locked(&iommu->lock);
1106
1107 did_old = context_domain_id(context);
1108 context_clear_entry(context);
1109
1110 if (did_old < cap_ndoms(iommu->cap)) {
1111 iommu->flush.flush_context(iommu, did_old,
1112 PCI_DEVID(bus, devfn),
1113 DMA_CCMD_MASK_NOBIT,
1114 DMA_CCMD_DEVICE_INVL);
1115 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
1116 DMA_TLB_DSI_FLUSH);
1117 }
1118
1119 clear_context_copied(iommu, bus, devfn);
1120 }
1121
1122 /*
1123 * It's a non-present to present mapping. If hardware doesn't cache
1124 * non-present entry we only need to flush the write-buffer. If the
1125 * _does_ cache non-present entries, then it does so in the special
1126 * domain #0, which we have to flush:
1127 */
context_present_cache_flush(struct intel_iommu * iommu,u16 did,u8 bus,u8 devfn)1128 static void context_present_cache_flush(struct intel_iommu *iommu, u16 did,
1129 u8 bus, u8 devfn)
1130 {
1131 if (cap_caching_mode(iommu->cap)) {
1132 iommu->flush.flush_context(iommu, 0,
1133 PCI_DEVID(bus, devfn),
1134 DMA_CCMD_MASK_NOBIT,
1135 DMA_CCMD_DEVICE_INVL);
1136 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
1137 } else {
1138 iommu_flush_write_buffer(iommu);
1139 }
1140 }
1141
domain_context_mapping_one(struct dmar_domain * domain,struct intel_iommu * iommu,u8 bus,u8 devfn)1142 static int domain_context_mapping_one(struct dmar_domain *domain,
1143 struct intel_iommu *iommu,
1144 u8 bus, u8 devfn)
1145 {
1146 struct device_domain_info *info =
1147 domain_lookup_dev_info(domain, iommu, bus, devfn);
1148 u16 did = domain_id_iommu(domain, iommu);
1149 int translation = CONTEXT_TT_MULTI_LEVEL;
1150 struct pt_iommu_vtdss_hw_info pt_info;
1151 struct context_entry *context;
1152 int ret;
1153
1154 if (WARN_ON(!intel_domain_is_ss_paging(domain)))
1155 return -EINVAL;
1156
1157 pt_iommu_vtdss_hw_info(&domain->sspt, &pt_info);
1158
1159 pr_debug("Set context mapping for %02x:%02x.%d\n",
1160 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1161
1162 spin_lock(&iommu->lock);
1163 ret = -ENOMEM;
1164 context = iommu_context_addr(iommu, bus, devfn, 1);
1165 if (!context)
1166 goto out_unlock;
1167
1168 ret = 0;
1169 if (context_present(context) && !context_copied(iommu, bus, devfn))
1170 goto out_unlock;
1171
1172 copied_context_tear_down(iommu, context, bus, devfn);
1173 context_clear_entry(context);
1174 context_set_domain_id(context, did);
1175
1176 if (info && info->ats_supported)
1177 translation = CONTEXT_TT_DEV_IOTLB;
1178 else
1179 translation = CONTEXT_TT_MULTI_LEVEL;
1180
1181 context_set_address_root(context, pt_info.ssptptr);
1182 context_set_address_width(context, pt_info.aw);
1183 context_set_translation_type(context, translation);
1184 context_set_fault_enable(context);
1185 context_set_present(context);
1186 if (!ecap_coherent(iommu->ecap))
1187 clflush_cache_range(context, sizeof(*context));
1188 context_present_cache_flush(iommu, did, bus, devfn);
1189 ret = 0;
1190
1191 out_unlock:
1192 spin_unlock(&iommu->lock);
1193
1194 return ret;
1195 }
1196
domain_context_mapping_cb(struct pci_dev * pdev,u16 alias,void * opaque)1197 static int domain_context_mapping_cb(struct pci_dev *pdev,
1198 u16 alias, void *opaque)
1199 {
1200 struct device_domain_info *info = dev_iommu_priv_get(&pdev->dev);
1201 struct intel_iommu *iommu = info->iommu;
1202 struct dmar_domain *domain = opaque;
1203
1204 return domain_context_mapping_one(domain, iommu,
1205 PCI_BUS_NUM(alias), alias & 0xff);
1206 }
1207
1208 static int
domain_context_mapping(struct dmar_domain * domain,struct device * dev)1209 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
1210 {
1211 struct device_domain_info *info = dev_iommu_priv_get(dev);
1212 struct intel_iommu *iommu = info->iommu;
1213 u8 bus = info->bus, devfn = info->devfn;
1214 int ret;
1215
1216 if (!dev_is_pci(dev))
1217 return domain_context_mapping_one(domain, iommu, bus, devfn);
1218
1219 ret = pci_for_each_dma_alias(to_pci_dev(dev),
1220 domain_context_mapping_cb, domain);
1221 if (ret)
1222 return ret;
1223
1224 iommu_enable_pci_ats(info);
1225
1226 return 0;
1227 }
1228
domain_context_clear_one(struct device_domain_info * info,u8 bus,u8 devfn)1229 static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 devfn)
1230 {
1231 struct intel_iommu *iommu = info->iommu;
1232 struct context_entry *context;
1233 u16 did;
1234
1235 spin_lock(&iommu->lock);
1236 context = iommu_context_addr(iommu, bus, devfn, 0);
1237 if (!context) {
1238 spin_unlock(&iommu->lock);
1239 return;
1240 }
1241
1242 did = context_domain_id(context);
1243 context_clear_entry(context);
1244 __iommu_flush_cache(iommu, context, sizeof(*context));
1245 spin_unlock(&iommu->lock);
1246 intel_context_flush_no_pasid(info, context, did);
1247 }
1248
__domain_setup_first_level(struct intel_iommu * iommu,struct device * dev,ioasid_t pasid,u16 did,phys_addr_t fsptptr,int flags,struct iommu_domain * old)1249 int __domain_setup_first_level(struct intel_iommu *iommu, struct device *dev,
1250 ioasid_t pasid, u16 did, phys_addr_t fsptptr,
1251 int flags, struct iommu_domain *old)
1252 {
1253 if (!old)
1254 return intel_pasid_setup_first_level(iommu, dev, fsptptr, pasid,
1255 did, flags);
1256 return intel_pasid_replace_first_level(iommu, dev, fsptptr, pasid, did,
1257 iommu_domain_did(old, iommu),
1258 flags);
1259 }
1260
domain_setup_second_level(struct intel_iommu * iommu,struct dmar_domain * domain,struct device * dev,ioasid_t pasid,struct iommu_domain * old)1261 static int domain_setup_second_level(struct intel_iommu *iommu,
1262 struct dmar_domain *domain,
1263 struct device *dev, ioasid_t pasid,
1264 struct iommu_domain *old)
1265 {
1266 if (!old)
1267 return intel_pasid_setup_second_level(iommu, domain,
1268 dev, pasid);
1269 return intel_pasid_replace_second_level(iommu, domain, dev,
1270 iommu_domain_did(old, iommu),
1271 pasid);
1272 }
1273
domain_setup_passthrough(struct intel_iommu * iommu,struct device * dev,ioasid_t pasid,struct iommu_domain * old)1274 static int domain_setup_passthrough(struct intel_iommu *iommu,
1275 struct device *dev, ioasid_t pasid,
1276 struct iommu_domain *old)
1277 {
1278 if (!old)
1279 return intel_pasid_setup_pass_through(iommu, dev, pasid);
1280 return intel_pasid_replace_pass_through(iommu, dev,
1281 iommu_domain_did(old, iommu),
1282 pasid);
1283 }
1284
domain_setup_first_level(struct intel_iommu * iommu,struct dmar_domain * domain,struct device * dev,u32 pasid,struct iommu_domain * old)1285 static int domain_setup_first_level(struct intel_iommu *iommu,
1286 struct dmar_domain *domain,
1287 struct device *dev,
1288 u32 pasid, struct iommu_domain *old)
1289 {
1290 struct pt_iommu_x86_64_hw_info pt_info;
1291 unsigned int flags = 0;
1292
1293 pt_iommu_x86_64_hw_info(&domain->fspt, &pt_info);
1294 if (WARN_ON(pt_info.levels != 4 && pt_info.levels != 5))
1295 return -EINVAL;
1296
1297 if (pt_info.levels == 5)
1298 flags |= PASID_FLAG_FL5LP;
1299
1300 if (domain->force_snooping)
1301 flags |= PASID_FLAG_PAGE_SNOOP;
1302
1303 if (!(domain->fspt.x86_64_pt.common.features &
1304 BIT(PT_FEAT_DMA_INCOHERENT)))
1305 flags |= PASID_FLAG_PWSNP;
1306
1307 return __domain_setup_first_level(iommu, dev, pasid,
1308 domain_id_iommu(domain, iommu),
1309 pt_info.gcr3_pt, flags, old);
1310 }
1311
dmar_domain_attach_device(struct dmar_domain * domain,struct device * dev)1312 static int dmar_domain_attach_device(struct dmar_domain *domain,
1313 struct device *dev)
1314 {
1315 struct device_domain_info *info = dev_iommu_priv_get(dev);
1316 struct intel_iommu *iommu = info->iommu;
1317 unsigned long flags;
1318 int ret;
1319
1320 ret = domain_attach_iommu(domain, iommu);
1321 if (ret)
1322 return ret;
1323
1324 info->domain = domain;
1325 info->domain_attached = true;
1326 spin_lock_irqsave(&domain->lock, flags);
1327 list_add(&info->link, &domain->devices);
1328 spin_unlock_irqrestore(&domain->lock, flags);
1329
1330 if (dev_is_real_dma_subdevice(dev))
1331 return 0;
1332
1333 if (!sm_supported(iommu))
1334 ret = domain_context_mapping(domain, dev);
1335 else if (intel_domain_is_fs_paging(domain))
1336 ret = domain_setup_first_level(iommu, domain, dev,
1337 IOMMU_NO_PASID, NULL);
1338 else if (intel_domain_is_ss_paging(domain))
1339 ret = domain_setup_second_level(iommu, domain, dev,
1340 IOMMU_NO_PASID, NULL);
1341 else if (WARN_ON(true))
1342 ret = -EINVAL;
1343
1344 if (ret)
1345 goto out_block_translation;
1346
1347 ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID);
1348 if (ret)
1349 goto out_block_translation;
1350
1351 return 0;
1352
1353 out_block_translation:
1354 device_block_translation(dev);
1355 return ret;
1356 }
1357
1358 /**
1359 * device_rmrr_is_relaxable - Test whether the RMRR of this device
1360 * is relaxable (ie. is allowed to be not enforced under some conditions)
1361 * @dev: device handle
1362 *
1363 * We assume that PCI USB devices with RMRRs have them largely
1364 * for historical reasons and that the RMRR space is not actively used post
1365 * boot. This exclusion may change if vendors begin to abuse it.
1366 *
1367 * The same exception is made for graphics devices, with the requirement that
1368 * any use of the RMRR regions will be torn down before assigning the device
1369 * to a guest.
1370 *
1371 * Return: true if the RMRR is relaxable, false otherwise
1372 */
device_rmrr_is_relaxable(struct device * dev)1373 static bool device_rmrr_is_relaxable(struct device *dev)
1374 {
1375 struct pci_dev *pdev;
1376
1377 if (!dev_is_pci(dev))
1378 return false;
1379
1380 pdev = to_pci_dev(dev);
1381 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
1382 return true;
1383 else
1384 return false;
1385 }
1386
device_def_domain_type(struct device * dev)1387 static int device_def_domain_type(struct device *dev)
1388 {
1389 struct device_domain_info *info = dev_iommu_priv_get(dev);
1390 struct intel_iommu *iommu = info->iommu;
1391
1392 /*
1393 * Hardware does not support the passthrough translation mode.
1394 * Always use a dynamaic mapping domain.
1395 */
1396 if (!ecap_pass_through(iommu->ecap))
1397 return IOMMU_DOMAIN_DMA;
1398
1399 if (dev_is_pci(dev)) {
1400 struct pci_dev *pdev = to_pci_dev(dev);
1401
1402 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
1403 return IOMMU_DOMAIN_IDENTITY;
1404 }
1405
1406 return 0;
1407 }
1408
intel_iommu_init_qi(struct intel_iommu * iommu)1409 static void intel_iommu_init_qi(struct intel_iommu *iommu)
1410 {
1411 /*
1412 * Start from the sane iommu hardware state.
1413 * If the queued invalidation is already initialized by us
1414 * (for example, while enabling interrupt-remapping) then
1415 * we got the things already rolling from a sane state.
1416 */
1417 if (!iommu->qi) {
1418 /*
1419 * Clear any previous faults.
1420 */
1421 dmar_fault(-1, iommu);
1422 /*
1423 * Disable queued invalidation if supported and already enabled
1424 * before OS handover.
1425 */
1426 dmar_disable_qi(iommu);
1427 }
1428
1429 if (dmar_enable_qi(iommu)) {
1430 /*
1431 * Queued Invalidate not enabled, use Register Based Invalidate
1432 */
1433 iommu->flush.flush_context = __iommu_flush_context;
1434 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
1435 pr_info("%s: Using Register based invalidation\n",
1436 iommu->name);
1437 } else {
1438 iommu->flush.flush_context = qi_flush_context;
1439 iommu->flush.flush_iotlb = qi_flush_iotlb;
1440 pr_info("%s: Using Queued invalidation\n", iommu->name);
1441 }
1442 }
1443
copy_context_table(struct intel_iommu * iommu,struct root_entry * old_re,struct context_entry ** tbl,int bus,bool ext)1444 static int copy_context_table(struct intel_iommu *iommu,
1445 struct root_entry *old_re,
1446 struct context_entry **tbl,
1447 int bus, bool ext)
1448 {
1449 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
1450 struct context_entry *new_ce = NULL, ce;
1451 struct context_entry *old_ce = NULL;
1452 struct root_entry re;
1453 phys_addr_t old_ce_phys;
1454
1455 tbl_idx = ext ? bus * 2 : bus;
1456 memcpy(&re, old_re, sizeof(re));
1457
1458 for (devfn = 0; devfn < 256; devfn++) {
1459 /* First calculate the correct index */
1460 idx = (ext ? devfn * 2 : devfn) % 256;
1461
1462 if (idx == 0) {
1463 /* First save what we may have and clean up */
1464 if (new_ce) {
1465 tbl[tbl_idx] = new_ce;
1466 __iommu_flush_cache(iommu, new_ce,
1467 VTD_PAGE_SIZE);
1468 pos = 1;
1469 }
1470
1471 if (old_ce)
1472 memunmap(old_ce);
1473
1474 ret = 0;
1475 if (devfn < 0x80)
1476 old_ce_phys = root_entry_lctp(&re);
1477 else
1478 old_ce_phys = root_entry_uctp(&re);
1479
1480 if (!old_ce_phys) {
1481 if (ext && devfn == 0) {
1482 /* No LCTP, try UCTP */
1483 devfn = 0x7f;
1484 continue;
1485 } else {
1486 goto out;
1487 }
1488 }
1489
1490 ret = -ENOMEM;
1491 old_ce = memremap(old_ce_phys, PAGE_SIZE,
1492 MEMREMAP_WB);
1493 if (!old_ce)
1494 goto out;
1495
1496 new_ce = iommu_alloc_pages_node_sz(iommu->node,
1497 GFP_KERNEL, SZ_4K);
1498 if (!new_ce)
1499 goto out_unmap;
1500
1501 ret = 0;
1502 }
1503
1504 /* Now copy the context entry */
1505 memcpy(&ce, old_ce + idx, sizeof(ce));
1506
1507 if (!context_present(&ce))
1508 continue;
1509
1510 did = context_domain_id(&ce);
1511 if (did >= 0 && did < cap_ndoms(iommu->cap))
1512 ida_alloc_range(&iommu->domain_ida, did, did, GFP_KERNEL);
1513
1514 set_context_copied(iommu, bus, devfn);
1515 new_ce[idx] = ce;
1516 }
1517
1518 tbl[tbl_idx + pos] = new_ce;
1519
1520 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
1521
1522 out_unmap:
1523 memunmap(old_ce);
1524
1525 out:
1526 return ret;
1527 }
1528
copy_translation_tables(struct intel_iommu * iommu)1529 static int copy_translation_tables(struct intel_iommu *iommu)
1530 {
1531 struct context_entry **ctxt_tbls;
1532 struct root_entry *old_rt;
1533 phys_addr_t old_rt_phys;
1534 int ctxt_table_entries;
1535 u64 rtaddr_reg;
1536 int bus, ret;
1537 bool new_ext, ext;
1538
1539 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
1540 ext = !!(rtaddr_reg & DMA_RTADDR_SMT);
1541 new_ext = !!sm_supported(iommu);
1542
1543 /*
1544 * The RTT bit can only be changed when translation is disabled,
1545 * but disabling translation means to open a window for data
1546 * corruption. So bail out and don't copy anything if we would
1547 * have to change the bit.
1548 */
1549 if (new_ext != ext)
1550 return -EINVAL;
1551
1552 iommu->copied_tables = bitmap_zalloc(BIT_ULL(16), GFP_KERNEL);
1553 if (!iommu->copied_tables)
1554 return -ENOMEM;
1555
1556 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
1557 if (!old_rt_phys)
1558 return -EINVAL;
1559
1560 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
1561 if (!old_rt)
1562 return -ENOMEM;
1563
1564 /* This is too big for the stack - allocate it from slab */
1565 ctxt_table_entries = ext ? 512 : 256;
1566 ret = -ENOMEM;
1567 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
1568 if (!ctxt_tbls)
1569 goto out_unmap;
1570
1571 for (bus = 0; bus < 256; bus++) {
1572 ret = copy_context_table(iommu, &old_rt[bus],
1573 ctxt_tbls, bus, ext);
1574 if (ret) {
1575 pr_err("%s: Failed to copy context table for bus %d\n",
1576 iommu->name, bus);
1577 continue;
1578 }
1579 }
1580
1581 spin_lock(&iommu->lock);
1582
1583 /* Context tables are copied, now write them to the root_entry table */
1584 for (bus = 0; bus < 256; bus++) {
1585 int idx = ext ? bus * 2 : bus;
1586 u64 val;
1587
1588 if (ctxt_tbls[idx]) {
1589 val = virt_to_phys(ctxt_tbls[idx]) | 1;
1590 iommu->root_entry[bus].lo = val;
1591 }
1592
1593 if (!ext || !ctxt_tbls[idx + 1])
1594 continue;
1595
1596 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
1597 iommu->root_entry[bus].hi = val;
1598 }
1599
1600 spin_unlock(&iommu->lock);
1601
1602 kfree(ctxt_tbls);
1603
1604 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
1605
1606 ret = 0;
1607
1608 out_unmap:
1609 memunmap(old_rt);
1610
1611 return ret;
1612 }
1613
init_dmars(void)1614 static int __init init_dmars(void)
1615 {
1616 struct dmar_drhd_unit *drhd;
1617 struct intel_iommu *iommu;
1618 int ret;
1619
1620 for_each_iommu(iommu, drhd) {
1621 if (drhd->ignored) {
1622 iommu_disable_translation(iommu);
1623 continue;
1624 }
1625
1626 /*
1627 * Find the max pasid size of all IOMMU's in the system.
1628 * We need to ensure the system pasid table is no bigger
1629 * than the smallest supported.
1630 */
1631 if (pasid_supported(iommu)) {
1632 u32 temp = 2 << ecap_pss(iommu->ecap);
1633
1634 intel_pasid_max_id = min_t(u32, temp,
1635 intel_pasid_max_id);
1636 }
1637
1638 intel_iommu_init_qi(iommu);
1639 init_translation_status(iommu);
1640
1641 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
1642 iommu_disable_translation(iommu);
1643 clear_translation_pre_enabled(iommu);
1644 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
1645 iommu->name);
1646 }
1647
1648 /*
1649 * TBD:
1650 * we could share the same root & context tables
1651 * among all IOMMU's. Need to Split it later.
1652 */
1653 ret = iommu_alloc_root_entry(iommu);
1654 if (ret)
1655 goto free_iommu;
1656
1657 if (translation_pre_enabled(iommu)) {
1658 pr_info("Translation already enabled - trying to copy translation structures\n");
1659
1660 ret = copy_translation_tables(iommu);
1661 if (ret) {
1662 /*
1663 * We found the IOMMU with translation
1664 * enabled - but failed to copy over the
1665 * old root-entry table. Try to proceed
1666 * by disabling translation now and
1667 * allocating a clean root-entry table.
1668 * This might cause DMAR faults, but
1669 * probably the dump will still succeed.
1670 */
1671 pr_err("Failed to copy translation tables from previous kernel for %s\n",
1672 iommu->name);
1673 iommu_disable_translation(iommu);
1674 clear_translation_pre_enabled(iommu);
1675 } else {
1676 pr_info("Copied translation tables from previous kernel for %s\n",
1677 iommu->name);
1678 }
1679 }
1680
1681 intel_svm_check(iommu);
1682 }
1683
1684 /*
1685 * Now that qi is enabled on all iommus, set the root entry and flush
1686 * caches. This is required on some Intel X58 chipsets, otherwise the
1687 * flush_context function will loop forever and the boot hangs.
1688 */
1689 for_each_active_iommu(iommu, drhd) {
1690 iommu_flush_write_buffer(iommu);
1691 iommu_set_root_entry(iommu);
1692 }
1693
1694 check_tylersburg_isoch();
1695
1696 /*
1697 * for each drhd
1698 * enable fault log
1699 * global invalidate context cache
1700 * global invalidate iotlb
1701 * enable translation
1702 */
1703 for_each_iommu(iommu, drhd) {
1704 if (drhd->ignored) {
1705 /*
1706 * we always have to disable PMRs or DMA may fail on
1707 * this device
1708 */
1709 if (force_on)
1710 iommu_disable_protect_mem_regions(iommu);
1711 continue;
1712 }
1713
1714 iommu_flush_write_buffer(iommu);
1715
1716 if (ecap_prs(iommu->ecap)) {
1717 /*
1718 * Call dmar_alloc_hwirq() with dmar_global_lock held,
1719 * could cause possible lock race condition.
1720 */
1721 up_write(&dmar_global_lock);
1722 ret = intel_iommu_enable_prq(iommu);
1723 down_write(&dmar_global_lock);
1724 if (ret)
1725 goto free_iommu;
1726 }
1727
1728 ret = dmar_set_interrupt(iommu);
1729 if (ret)
1730 goto free_iommu;
1731 }
1732
1733 return 0;
1734
1735 free_iommu:
1736 for_each_active_iommu(iommu, drhd) {
1737 disable_dmar_iommu(iommu);
1738 free_dmar_iommu(iommu);
1739 }
1740
1741 return ret;
1742 }
1743
init_no_remapping_devices(void)1744 static void __init init_no_remapping_devices(void)
1745 {
1746 struct dmar_drhd_unit *drhd;
1747 struct device *dev;
1748 int i;
1749
1750 for_each_drhd_unit(drhd) {
1751 if (!drhd->include_all) {
1752 for_each_active_dev_scope(drhd->devices,
1753 drhd->devices_cnt, i, dev)
1754 break;
1755 /* ignore DMAR unit if no devices exist */
1756 if (i == drhd->devices_cnt)
1757 drhd->ignored = 1;
1758 }
1759 }
1760
1761 for_each_active_drhd_unit(drhd) {
1762 if (drhd->include_all)
1763 continue;
1764
1765 for_each_active_dev_scope(drhd->devices,
1766 drhd->devices_cnt, i, dev)
1767 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
1768 break;
1769 if (i < drhd->devices_cnt)
1770 continue;
1771
1772 /* This IOMMU has *only* gfx devices. Either bypass it or
1773 set the gfx_mapped flag, as appropriate */
1774 drhd->gfx_dedicated = 1;
1775 if (disable_igfx_iommu)
1776 drhd->ignored = 1;
1777 }
1778 }
1779
1780 #ifdef CONFIG_SUSPEND
init_iommu_hw(void)1781 static int init_iommu_hw(void)
1782 {
1783 struct dmar_drhd_unit *drhd;
1784 struct intel_iommu *iommu = NULL;
1785 int ret;
1786
1787 for_each_active_iommu(iommu, drhd) {
1788 if (iommu->qi) {
1789 ret = dmar_reenable_qi(iommu);
1790 if (ret)
1791 return ret;
1792 }
1793 }
1794
1795 for_each_iommu(iommu, drhd) {
1796 if (drhd->ignored) {
1797 /*
1798 * we always have to disable PMRs or DMA may fail on
1799 * this device
1800 */
1801 if (force_on)
1802 iommu_disable_protect_mem_regions(iommu);
1803 continue;
1804 }
1805
1806 iommu_flush_write_buffer(iommu);
1807 iommu_set_root_entry(iommu);
1808 iommu_enable_translation(iommu);
1809 iommu_disable_protect_mem_regions(iommu);
1810 }
1811
1812 return 0;
1813 }
1814
iommu_flush_all(void)1815 static void iommu_flush_all(void)
1816 {
1817 struct dmar_drhd_unit *drhd;
1818 struct intel_iommu *iommu;
1819
1820 for_each_active_iommu(iommu, drhd) {
1821 iommu->flush.flush_context(iommu, 0, 0, 0,
1822 DMA_CCMD_GLOBAL_INVL);
1823 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1824 DMA_TLB_GLOBAL_FLUSH);
1825 }
1826 }
1827
iommu_suspend(void * data)1828 static int iommu_suspend(void *data)
1829 {
1830 struct dmar_drhd_unit *drhd;
1831 struct intel_iommu *iommu = NULL;
1832 unsigned long flag;
1833
1834 iommu_flush_all();
1835
1836 for_each_active_iommu(iommu, drhd) {
1837 iommu_disable_translation(iommu);
1838
1839 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1840
1841 iommu->iommu_state[SR_DMAR_FECTL_REG] =
1842 readl(iommu->reg + DMAR_FECTL_REG);
1843 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
1844 readl(iommu->reg + DMAR_FEDATA_REG);
1845 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
1846 readl(iommu->reg + DMAR_FEADDR_REG);
1847 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
1848 readl(iommu->reg + DMAR_FEUADDR_REG);
1849
1850 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1851 }
1852 return 0;
1853 }
1854
iommu_resume(void * data)1855 static void iommu_resume(void *data)
1856 {
1857 struct dmar_drhd_unit *drhd;
1858 struct intel_iommu *iommu = NULL;
1859 unsigned long flag;
1860
1861 if (init_iommu_hw()) {
1862 if (force_on)
1863 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
1864 else
1865 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
1866 return;
1867 }
1868
1869 for_each_active_iommu(iommu, drhd) {
1870
1871 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1872
1873 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
1874 iommu->reg + DMAR_FECTL_REG);
1875 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
1876 iommu->reg + DMAR_FEDATA_REG);
1877 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
1878 iommu->reg + DMAR_FEADDR_REG);
1879 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
1880 iommu->reg + DMAR_FEUADDR_REG);
1881
1882 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1883 }
1884 }
1885
1886 static const struct syscore_ops iommu_syscore_ops = {
1887 .resume = iommu_resume,
1888 .suspend = iommu_suspend,
1889 };
1890
1891 static struct syscore iommu_syscore = {
1892 .ops = &iommu_syscore_ops,
1893 };
1894
init_iommu_pm_ops(void)1895 static void __init init_iommu_pm_ops(void)
1896 {
1897 register_syscore(&iommu_syscore);
1898 }
1899
1900 #else
init_iommu_pm_ops(void)1901 static inline void init_iommu_pm_ops(void) {}
1902 #endif /* CONFIG_PM */
1903
rmrr_sanity_check(struct acpi_dmar_reserved_memory * rmrr)1904 static int __init rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
1905 {
1906 if (!IS_ALIGNED(rmrr->base_address, PAGE_SIZE) ||
1907 !IS_ALIGNED(rmrr->end_address + 1, PAGE_SIZE) ||
1908 rmrr->end_address <= rmrr->base_address ||
1909 arch_rmrr_sanity_check(rmrr))
1910 return -EINVAL;
1911
1912 return 0;
1913 }
1914
dmar_parse_one_rmrr(struct acpi_dmar_header * header,void * arg)1915 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
1916 {
1917 struct acpi_dmar_reserved_memory *rmrr;
1918 struct dmar_rmrr_unit *rmrru;
1919
1920 rmrr = (struct acpi_dmar_reserved_memory *)header;
1921 if (rmrr_sanity_check(rmrr)) {
1922 pr_warn(FW_BUG
1923 "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n"
1924 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
1925 rmrr->base_address, rmrr->end_address,
1926 dmi_get_system_info(DMI_BIOS_VENDOR),
1927 dmi_get_system_info(DMI_BIOS_VERSION),
1928 dmi_get_system_info(DMI_PRODUCT_VERSION));
1929 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
1930 }
1931
1932 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
1933 if (!rmrru)
1934 goto out;
1935
1936 rmrru->hdr = header;
1937
1938 rmrru->base_address = rmrr->base_address;
1939 rmrru->end_address = rmrr->end_address;
1940
1941 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
1942 ((void *)rmrr) + rmrr->header.length,
1943 &rmrru->devices_cnt);
1944 if (rmrru->devices_cnt && rmrru->devices == NULL)
1945 goto free_rmrru;
1946
1947 list_add(&rmrru->list, &dmar_rmrr_units);
1948
1949 return 0;
1950 free_rmrru:
1951 kfree(rmrru);
1952 out:
1953 return -ENOMEM;
1954 }
1955
dmar_find_atsr(struct acpi_dmar_atsr * atsr)1956 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
1957 {
1958 struct dmar_atsr_unit *atsru;
1959 struct acpi_dmar_atsr *tmp;
1960
1961 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list,
1962 dmar_rcu_check()) {
1963 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
1964 if (atsr->segment != tmp->segment)
1965 continue;
1966 if (atsr->header.length != tmp->header.length)
1967 continue;
1968 if (memcmp(atsr, tmp, atsr->header.length) == 0)
1969 return atsru;
1970 }
1971
1972 return NULL;
1973 }
1974
dmar_parse_one_atsr(struct acpi_dmar_header * hdr,void * arg)1975 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
1976 {
1977 struct acpi_dmar_atsr *atsr;
1978 struct dmar_atsr_unit *atsru;
1979
1980 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
1981 return 0;
1982
1983 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
1984 atsru = dmar_find_atsr(atsr);
1985 if (atsru)
1986 return 0;
1987
1988 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
1989 if (!atsru)
1990 return -ENOMEM;
1991
1992 /*
1993 * If memory is allocated from slab by ACPI _DSM method, we need to
1994 * copy the memory content because the memory buffer will be freed
1995 * on return.
1996 */
1997 atsru->hdr = (void *)(atsru + 1);
1998 memcpy(atsru->hdr, hdr, hdr->length);
1999 atsru->include_all = atsr->flags & 0x1;
2000 if (!atsru->include_all) {
2001 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
2002 (void *)atsr + atsr->header.length,
2003 &atsru->devices_cnt);
2004 if (atsru->devices_cnt && atsru->devices == NULL) {
2005 kfree(atsru);
2006 return -ENOMEM;
2007 }
2008 }
2009
2010 list_add_rcu(&atsru->list, &dmar_atsr_units);
2011
2012 return 0;
2013 }
2014
intel_iommu_free_atsr(struct dmar_atsr_unit * atsru)2015 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
2016 {
2017 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
2018 kfree(atsru);
2019 }
2020
dmar_release_one_atsr(struct acpi_dmar_header * hdr,void * arg)2021 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
2022 {
2023 struct acpi_dmar_atsr *atsr;
2024 struct dmar_atsr_unit *atsru;
2025
2026 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
2027 atsru = dmar_find_atsr(atsr);
2028 if (atsru) {
2029 list_del_rcu(&atsru->list);
2030 synchronize_rcu();
2031 intel_iommu_free_atsr(atsru);
2032 }
2033
2034 return 0;
2035 }
2036
dmar_check_one_atsr(struct acpi_dmar_header * hdr,void * arg)2037 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
2038 {
2039 int i;
2040 struct device *dev;
2041 struct acpi_dmar_atsr *atsr;
2042 struct dmar_atsr_unit *atsru;
2043
2044 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
2045 atsru = dmar_find_atsr(atsr);
2046 if (!atsru)
2047 return 0;
2048
2049 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
2050 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
2051 i, dev)
2052 return -EBUSY;
2053 }
2054
2055 return 0;
2056 }
2057
dmar_find_satc(struct acpi_dmar_satc * satc)2058 static struct dmar_satc_unit *dmar_find_satc(struct acpi_dmar_satc *satc)
2059 {
2060 struct dmar_satc_unit *satcu;
2061 struct acpi_dmar_satc *tmp;
2062
2063 list_for_each_entry_rcu(satcu, &dmar_satc_units, list,
2064 dmar_rcu_check()) {
2065 tmp = (struct acpi_dmar_satc *)satcu->hdr;
2066 if (satc->segment != tmp->segment)
2067 continue;
2068 if (satc->header.length != tmp->header.length)
2069 continue;
2070 if (memcmp(satc, tmp, satc->header.length) == 0)
2071 return satcu;
2072 }
2073
2074 return NULL;
2075 }
2076
dmar_parse_one_satc(struct acpi_dmar_header * hdr,void * arg)2077 int dmar_parse_one_satc(struct acpi_dmar_header *hdr, void *arg)
2078 {
2079 struct acpi_dmar_satc *satc;
2080 struct dmar_satc_unit *satcu;
2081
2082 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
2083 return 0;
2084
2085 satc = container_of(hdr, struct acpi_dmar_satc, header);
2086 satcu = dmar_find_satc(satc);
2087 if (satcu)
2088 return 0;
2089
2090 satcu = kzalloc(sizeof(*satcu) + hdr->length, GFP_KERNEL);
2091 if (!satcu)
2092 return -ENOMEM;
2093
2094 satcu->hdr = (void *)(satcu + 1);
2095 memcpy(satcu->hdr, hdr, hdr->length);
2096 satcu->atc_required = satc->flags & 0x1;
2097 satcu->devices = dmar_alloc_dev_scope((void *)(satc + 1),
2098 (void *)satc + satc->header.length,
2099 &satcu->devices_cnt);
2100 if (satcu->devices_cnt && !satcu->devices) {
2101 kfree(satcu);
2102 return -ENOMEM;
2103 }
2104 list_add_rcu(&satcu->list, &dmar_satc_units);
2105
2106 return 0;
2107 }
2108
intel_iommu_add(struct dmar_drhd_unit * dmaru)2109 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
2110 {
2111 struct intel_iommu *iommu = dmaru->iommu;
2112 int ret;
2113
2114 /*
2115 * Disable translation if already enabled prior to OS handover.
2116 */
2117 if (iommu->gcmd & DMA_GCMD_TE)
2118 iommu_disable_translation(iommu);
2119
2120 ret = iommu_alloc_root_entry(iommu);
2121 if (ret)
2122 goto out;
2123
2124 intel_svm_check(iommu);
2125
2126 if (dmaru->ignored) {
2127 /*
2128 * we always have to disable PMRs or DMA may fail on this device
2129 */
2130 if (force_on)
2131 iommu_disable_protect_mem_regions(iommu);
2132 return 0;
2133 }
2134
2135 intel_iommu_init_qi(iommu);
2136 iommu_flush_write_buffer(iommu);
2137
2138 if (ecap_prs(iommu->ecap)) {
2139 ret = intel_iommu_enable_prq(iommu);
2140 if (ret)
2141 goto disable_iommu;
2142 }
2143
2144 ret = dmar_set_interrupt(iommu);
2145 if (ret)
2146 goto disable_iommu;
2147
2148 iommu_set_root_entry(iommu);
2149 iommu_enable_translation(iommu);
2150
2151 iommu_disable_protect_mem_regions(iommu);
2152 return 0;
2153
2154 disable_iommu:
2155 disable_dmar_iommu(iommu);
2156 out:
2157 free_dmar_iommu(iommu);
2158 return ret;
2159 }
2160
dmar_iommu_hotplug(struct dmar_drhd_unit * dmaru,bool insert)2161 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
2162 {
2163 int ret = 0;
2164 struct intel_iommu *iommu = dmaru->iommu;
2165
2166 if (!intel_iommu_enabled)
2167 return 0;
2168 if (iommu == NULL)
2169 return -EINVAL;
2170
2171 if (insert) {
2172 ret = intel_iommu_add(dmaru);
2173 } else {
2174 disable_dmar_iommu(iommu);
2175 free_dmar_iommu(iommu);
2176 }
2177
2178 return ret;
2179 }
2180
intel_iommu_free_dmars(void)2181 static void intel_iommu_free_dmars(void)
2182 {
2183 struct dmar_rmrr_unit *rmrru, *rmrr_n;
2184 struct dmar_atsr_unit *atsru, *atsr_n;
2185 struct dmar_satc_unit *satcu, *satc_n;
2186
2187 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
2188 list_del(&rmrru->list);
2189 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
2190 kfree(rmrru);
2191 }
2192
2193 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
2194 list_del(&atsru->list);
2195 intel_iommu_free_atsr(atsru);
2196 }
2197 list_for_each_entry_safe(satcu, satc_n, &dmar_satc_units, list) {
2198 list_del(&satcu->list);
2199 dmar_free_dev_scope(&satcu->devices, &satcu->devices_cnt);
2200 kfree(satcu);
2201 }
2202 }
2203
dmar_find_matched_satc_unit(struct pci_dev * dev)2204 static struct dmar_satc_unit *dmar_find_matched_satc_unit(struct pci_dev *dev)
2205 {
2206 struct dmar_satc_unit *satcu;
2207 struct acpi_dmar_satc *satc;
2208 struct device *tmp;
2209 int i;
2210
2211 rcu_read_lock();
2212
2213 list_for_each_entry_rcu(satcu, &dmar_satc_units, list) {
2214 satc = container_of(satcu->hdr, struct acpi_dmar_satc, header);
2215 if (satc->segment != pci_domain_nr(dev->bus))
2216 continue;
2217 for_each_dev_scope(satcu->devices, satcu->devices_cnt, i, tmp)
2218 if (to_pci_dev(tmp) == dev)
2219 goto out;
2220 }
2221 satcu = NULL;
2222 out:
2223 rcu_read_unlock();
2224 return satcu;
2225 }
2226
dmar_ats_supported(struct pci_dev * dev,struct intel_iommu * iommu)2227 static bool dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu)
2228 {
2229 struct pci_dev *bridge = NULL;
2230 struct dmar_atsr_unit *atsru;
2231 struct dmar_satc_unit *satcu;
2232 struct acpi_dmar_atsr *atsr;
2233 bool supported = true;
2234 struct pci_bus *bus;
2235 struct device *tmp;
2236 int i;
2237
2238 dev = pci_physfn(dev);
2239 satcu = dmar_find_matched_satc_unit(dev);
2240 if (satcu)
2241 /*
2242 * This device supports ATS as it is in SATC table.
2243 * When IOMMU is in legacy mode, enabling ATS is done
2244 * automatically by HW for the device that requires
2245 * ATS, hence OS should not enable this device ATS
2246 * to avoid duplicated TLB invalidation.
2247 */
2248 return !(satcu->atc_required && !sm_supported(iommu));
2249
2250 for (bus = dev->bus; bus; bus = bus->parent) {
2251 bridge = bus->self;
2252 /* If it's an integrated device, allow ATS */
2253 if (!bridge)
2254 return true;
2255 /* Connected via non-PCIe: no ATS */
2256 if (!pci_is_pcie(bridge) ||
2257 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
2258 return false;
2259 /* If we found the root port, look it up in the ATSR */
2260 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
2261 break;
2262 }
2263
2264 rcu_read_lock();
2265 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
2266 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
2267 if (atsr->segment != pci_domain_nr(dev->bus))
2268 continue;
2269
2270 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
2271 if (tmp == &bridge->dev)
2272 goto out;
2273
2274 if (atsru->include_all)
2275 goto out;
2276 }
2277 supported = false;
2278 out:
2279 rcu_read_unlock();
2280
2281 return supported;
2282 }
2283
dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info * info)2284 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
2285 {
2286 int ret;
2287 struct dmar_rmrr_unit *rmrru;
2288 struct dmar_atsr_unit *atsru;
2289 struct dmar_satc_unit *satcu;
2290 struct acpi_dmar_atsr *atsr;
2291 struct acpi_dmar_reserved_memory *rmrr;
2292 struct acpi_dmar_satc *satc;
2293
2294 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
2295 return 0;
2296
2297 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
2298 rmrr = container_of(rmrru->hdr,
2299 struct acpi_dmar_reserved_memory, header);
2300 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
2301 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
2302 ((void *)rmrr) + rmrr->header.length,
2303 rmrr->segment, rmrru->devices,
2304 rmrru->devices_cnt);
2305 if (ret < 0)
2306 return ret;
2307 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
2308 dmar_remove_dev_scope(info, rmrr->segment,
2309 rmrru->devices, rmrru->devices_cnt);
2310 }
2311 }
2312
2313 list_for_each_entry(atsru, &dmar_atsr_units, list) {
2314 if (atsru->include_all)
2315 continue;
2316
2317 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
2318 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
2319 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
2320 (void *)atsr + atsr->header.length,
2321 atsr->segment, atsru->devices,
2322 atsru->devices_cnt);
2323 if (ret > 0)
2324 break;
2325 else if (ret < 0)
2326 return ret;
2327 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
2328 if (dmar_remove_dev_scope(info, atsr->segment,
2329 atsru->devices, atsru->devices_cnt))
2330 break;
2331 }
2332 }
2333 list_for_each_entry(satcu, &dmar_satc_units, list) {
2334 satc = container_of(satcu->hdr, struct acpi_dmar_satc, header);
2335 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
2336 ret = dmar_insert_dev_scope(info, (void *)(satc + 1),
2337 (void *)satc + satc->header.length,
2338 satc->segment, satcu->devices,
2339 satcu->devices_cnt);
2340 if (ret > 0)
2341 break;
2342 else if (ret < 0)
2343 return ret;
2344 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
2345 if (dmar_remove_dev_scope(info, satc->segment,
2346 satcu->devices, satcu->devices_cnt))
2347 break;
2348 }
2349 }
2350
2351 return 0;
2352 }
2353
intel_disable_iommus(void)2354 static void intel_disable_iommus(void)
2355 {
2356 struct intel_iommu *iommu = NULL;
2357 struct dmar_drhd_unit *drhd;
2358
2359 for_each_iommu(iommu, drhd)
2360 iommu_disable_translation(iommu);
2361 }
2362
intel_iommu_shutdown(void)2363 void intel_iommu_shutdown(void)
2364 {
2365 struct dmar_drhd_unit *drhd;
2366 struct intel_iommu *iommu = NULL;
2367
2368 if (no_iommu || dmar_disabled)
2369 return;
2370
2371 /*
2372 * All other CPUs were brought down, hotplug interrupts were disabled,
2373 * no lock and RCU checking needed anymore
2374 */
2375 list_for_each_entry(drhd, &dmar_drhd_units, list) {
2376 iommu = drhd->iommu;
2377
2378 /* Disable PMRs explicitly here. */
2379 iommu_disable_protect_mem_regions(iommu);
2380
2381 /* Make sure the IOMMUs are switched off */
2382 iommu_disable_translation(iommu);
2383 }
2384 }
2385
dev_to_intel_iommu(struct device * dev)2386 static struct intel_iommu *dev_to_intel_iommu(struct device *dev)
2387 {
2388 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
2389
2390 return container_of(iommu_dev, struct intel_iommu, iommu);
2391 }
2392
version_show(struct device * dev,struct device_attribute * attr,char * buf)2393 static ssize_t version_show(struct device *dev,
2394 struct device_attribute *attr, char *buf)
2395 {
2396 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2397 u32 ver = readl(iommu->reg + DMAR_VER_REG);
2398 return sysfs_emit(buf, "%d:%d\n",
2399 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
2400 }
2401 static DEVICE_ATTR_RO(version);
2402
address_show(struct device * dev,struct device_attribute * attr,char * buf)2403 static ssize_t address_show(struct device *dev,
2404 struct device_attribute *attr, char *buf)
2405 {
2406 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2407 return sysfs_emit(buf, "%llx\n", iommu->reg_phys);
2408 }
2409 static DEVICE_ATTR_RO(address);
2410
cap_show(struct device * dev,struct device_attribute * attr,char * buf)2411 static ssize_t cap_show(struct device *dev,
2412 struct device_attribute *attr, char *buf)
2413 {
2414 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2415 return sysfs_emit(buf, "%llx\n", iommu->cap);
2416 }
2417 static DEVICE_ATTR_RO(cap);
2418
ecap_show(struct device * dev,struct device_attribute * attr,char * buf)2419 static ssize_t ecap_show(struct device *dev,
2420 struct device_attribute *attr, char *buf)
2421 {
2422 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2423 return sysfs_emit(buf, "%llx\n", iommu->ecap);
2424 }
2425 static DEVICE_ATTR_RO(ecap);
2426
domains_supported_show(struct device * dev,struct device_attribute * attr,char * buf)2427 static ssize_t domains_supported_show(struct device *dev,
2428 struct device_attribute *attr, char *buf)
2429 {
2430 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2431 return sysfs_emit(buf, "%ld\n", cap_ndoms(iommu->cap));
2432 }
2433 static DEVICE_ATTR_RO(domains_supported);
2434
domains_used_show(struct device * dev,struct device_attribute * attr,char * buf)2435 static ssize_t domains_used_show(struct device *dev,
2436 struct device_attribute *attr, char *buf)
2437 {
2438 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2439 unsigned int count = 0;
2440 int id;
2441
2442 for (id = 0; id < cap_ndoms(iommu->cap); id++)
2443 if (ida_exists(&iommu->domain_ida, id))
2444 count++;
2445
2446 return sysfs_emit(buf, "%d\n", count);
2447 }
2448 static DEVICE_ATTR_RO(domains_used);
2449
2450 static struct attribute *intel_iommu_attrs[] = {
2451 &dev_attr_version.attr,
2452 &dev_attr_address.attr,
2453 &dev_attr_cap.attr,
2454 &dev_attr_ecap.attr,
2455 &dev_attr_domains_supported.attr,
2456 &dev_attr_domains_used.attr,
2457 NULL,
2458 };
2459
2460 static struct attribute_group intel_iommu_group = {
2461 .name = "intel-iommu",
2462 .attrs = intel_iommu_attrs,
2463 };
2464
2465 const struct attribute_group *intel_iommu_groups[] = {
2466 &intel_iommu_group,
2467 NULL,
2468 };
2469
has_external_pci(void)2470 static bool has_external_pci(void)
2471 {
2472 struct pci_dev *pdev = NULL;
2473
2474 for_each_pci_dev(pdev)
2475 if (pdev->external_facing) {
2476 pci_dev_put(pdev);
2477 return true;
2478 }
2479
2480 return false;
2481 }
2482
platform_optin_force_iommu(void)2483 static int __init platform_optin_force_iommu(void)
2484 {
2485 if (!dmar_platform_optin() || no_platform_optin || !has_external_pci())
2486 return 0;
2487
2488 if (no_iommu || dmar_disabled)
2489 pr_info("Intel-IOMMU force enabled due to platform opt in\n");
2490
2491 /*
2492 * If Intel-IOMMU is disabled by default, we will apply identity
2493 * map for all devices except those marked as being untrusted.
2494 */
2495 if (dmar_disabled)
2496 iommu_set_default_passthrough(false);
2497
2498 dmar_disabled = 0;
2499 no_iommu = 0;
2500
2501 return 1;
2502 }
2503
probe_acpi_namespace_devices(void)2504 static int __init probe_acpi_namespace_devices(void)
2505 {
2506 struct dmar_drhd_unit *drhd;
2507 /* To avoid a -Wunused-but-set-variable warning. */
2508 struct intel_iommu *iommu __maybe_unused;
2509 struct device *dev;
2510 int i, ret = 0;
2511
2512 for_each_active_iommu(iommu, drhd) {
2513 for_each_active_dev_scope(drhd->devices,
2514 drhd->devices_cnt, i, dev) {
2515 struct acpi_device_physical_node *pn;
2516 struct acpi_device *adev;
2517
2518 if (dev->bus != &acpi_bus_type)
2519 continue;
2520
2521 up_read(&dmar_global_lock);
2522 adev = to_acpi_device(dev);
2523 mutex_lock(&adev->physical_node_lock);
2524 list_for_each_entry(pn,
2525 &adev->physical_node_list, node) {
2526 ret = iommu_probe_device(pn->dev);
2527 if (ret)
2528 break;
2529 }
2530 mutex_unlock(&adev->physical_node_lock);
2531 down_read(&dmar_global_lock);
2532
2533 if (ret)
2534 return ret;
2535 }
2536 }
2537
2538 return 0;
2539 }
2540
tboot_force_iommu(void)2541 static __init int tboot_force_iommu(void)
2542 {
2543 if (!tboot_enabled())
2544 return 0;
2545
2546 if (no_iommu || dmar_disabled)
2547 pr_warn("Forcing Intel-IOMMU to enabled\n");
2548
2549 dmar_disabled = 0;
2550 no_iommu = 0;
2551
2552 return 1;
2553 }
2554
intel_iommu_init(void)2555 int __init intel_iommu_init(void)
2556 {
2557 int ret = -ENODEV;
2558 struct dmar_drhd_unit *drhd;
2559 struct intel_iommu *iommu;
2560
2561 /*
2562 * Intel IOMMU is required for a TXT/tboot launch or platform
2563 * opt in, so enforce that.
2564 */
2565 force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) ||
2566 platform_optin_force_iommu();
2567
2568 down_write(&dmar_global_lock);
2569 if (dmar_table_init()) {
2570 if (force_on)
2571 panic("tboot: Failed to initialize DMAR table\n");
2572 goto out_free_dmar;
2573 }
2574
2575 if (dmar_dev_scope_init() < 0) {
2576 if (force_on)
2577 panic("tboot: Failed to initialize DMAR device scope\n");
2578 goto out_free_dmar;
2579 }
2580
2581 up_write(&dmar_global_lock);
2582
2583 /*
2584 * The bus notifier takes the dmar_global_lock, so lockdep will
2585 * complain later when we register it under the lock.
2586 */
2587 dmar_register_bus_notifier();
2588
2589 down_write(&dmar_global_lock);
2590
2591 if (!no_iommu)
2592 intel_iommu_debugfs_init();
2593
2594 if (no_iommu || dmar_disabled) {
2595 /*
2596 * We exit the function here to ensure IOMMU's remapping and
2597 * mempool aren't setup, which means that the IOMMU's PMRs
2598 * won't be disabled via the call to init_dmars(). So disable
2599 * it explicitly here. The PMRs were setup by tboot prior to
2600 * calling SENTER, but the kernel is expected to reset/tear
2601 * down the PMRs.
2602 */
2603 if (intel_iommu_tboot_noforce) {
2604 for_each_iommu(iommu, drhd)
2605 iommu_disable_protect_mem_regions(iommu);
2606 }
2607
2608 /*
2609 * Make sure the IOMMUs are switched off, even when we
2610 * boot into a kexec kernel and the previous kernel left
2611 * them enabled
2612 */
2613 intel_disable_iommus();
2614 goto out_free_dmar;
2615 }
2616
2617 if (list_empty(&dmar_rmrr_units))
2618 pr_info("No RMRR found\n");
2619
2620 if (list_empty(&dmar_atsr_units))
2621 pr_info("No ATSR found\n");
2622
2623 if (list_empty(&dmar_satc_units))
2624 pr_info("No SATC found\n");
2625
2626 init_no_remapping_devices();
2627
2628 ret = init_dmars();
2629 if (ret) {
2630 if (force_on)
2631 panic("tboot: Failed to initialize DMARs\n");
2632 pr_err("Initialization failed\n");
2633 goto out_free_dmar;
2634 }
2635 up_write(&dmar_global_lock);
2636
2637 init_iommu_pm_ops();
2638
2639 down_read(&dmar_global_lock);
2640 for_each_active_iommu(iommu, drhd) {
2641 /*
2642 * The flush queue implementation does not perform
2643 * page-selective invalidations that are required for efficient
2644 * TLB flushes in virtual environments. The benefit of batching
2645 * is likely to be much lower than the overhead of synchronizing
2646 * the virtual and physical IOMMU page-tables.
2647 */
2648 if (cap_caching_mode(iommu->cap) &&
2649 !first_level_by_default(iommu)) {
2650 pr_info_once("IOMMU batching disallowed due to virtualization\n");
2651 iommu_set_dma_strict();
2652 }
2653 iommu_device_sysfs_add(&iommu->iommu, NULL,
2654 intel_iommu_groups,
2655 "%s", iommu->name);
2656 /*
2657 * The iommu device probe is protected by the iommu_probe_device_lock.
2658 * Release the dmar_global_lock before entering the device probe path
2659 * to avoid unnecessary lock order splat.
2660 */
2661 up_read(&dmar_global_lock);
2662 iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
2663 down_read(&dmar_global_lock);
2664
2665 iommu_pmu_register(iommu);
2666 }
2667
2668 if (probe_acpi_namespace_devices())
2669 pr_warn("ACPI name space devices didn't probe correctly\n");
2670
2671 /* Finally, we enable the DMA remapping hardware. */
2672 for_each_iommu(iommu, drhd) {
2673 if (!drhd->ignored && !translation_pre_enabled(iommu))
2674 iommu_enable_translation(iommu);
2675
2676 iommu_disable_protect_mem_regions(iommu);
2677 }
2678 up_read(&dmar_global_lock);
2679
2680 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
2681
2682 intel_iommu_enabled = 1;
2683
2684 return 0;
2685
2686 out_free_dmar:
2687 intel_iommu_free_dmars();
2688 up_write(&dmar_global_lock);
2689 return ret;
2690 }
2691
domain_context_clear_one_cb(struct pci_dev * pdev,u16 alias,void * opaque)2692 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
2693 {
2694 struct device_domain_info *info = opaque;
2695
2696 domain_context_clear_one(info, PCI_BUS_NUM(alias), alias & 0xff);
2697 return 0;
2698 }
2699
2700 /*
2701 * NB - intel-iommu lacks any sort of reference counting for the users of
2702 * dependent devices. If multiple endpoints have intersecting dependent
2703 * devices, unbinding the driver from any one of them will possibly leave
2704 * the others unable to operate.
2705 */
domain_context_clear(struct device_domain_info * info)2706 static void domain_context_clear(struct device_domain_info *info)
2707 {
2708 if (!dev_is_pci(info->dev)) {
2709 domain_context_clear_one(info, info->bus, info->devfn);
2710 return;
2711 }
2712
2713 pci_for_each_dma_alias(to_pci_dev(info->dev),
2714 &domain_context_clear_one_cb, info);
2715 iommu_disable_pci_ats(info);
2716 }
2717
2718 /*
2719 * Clear the page table pointer in context or pasid table entries so that
2720 * all DMA requests without PASID from the device are blocked. If the page
2721 * table has been set, clean up the data structures.
2722 */
device_block_translation(struct device * dev)2723 void device_block_translation(struct device *dev)
2724 {
2725 struct device_domain_info *info = dev_iommu_priv_get(dev);
2726 struct intel_iommu *iommu = info->iommu;
2727 unsigned long flags;
2728
2729 /* Device in DMA blocking state. Noting to do. */
2730 if (!info->domain_attached)
2731 return;
2732
2733 if (info->domain)
2734 cache_tag_unassign_domain(info->domain, dev, IOMMU_NO_PASID);
2735
2736 if (!dev_is_real_dma_subdevice(dev)) {
2737 if (sm_supported(iommu))
2738 intel_pasid_tear_down_entry(iommu, dev,
2739 IOMMU_NO_PASID, false);
2740 else
2741 domain_context_clear(info);
2742 }
2743
2744 /* Device now in DMA blocking state. */
2745 info->domain_attached = false;
2746
2747 if (!info->domain)
2748 return;
2749
2750 spin_lock_irqsave(&info->domain->lock, flags);
2751 list_del(&info->link);
2752 spin_unlock_irqrestore(&info->domain->lock, flags);
2753
2754 domain_detach_iommu(info->domain, iommu);
2755 info->domain = NULL;
2756 }
2757
blocking_domain_attach_dev(struct iommu_domain * domain,struct device * dev,struct iommu_domain * old)2758 static int blocking_domain_attach_dev(struct iommu_domain *domain,
2759 struct device *dev,
2760 struct iommu_domain *old)
2761 {
2762 struct device_domain_info *info = dev_iommu_priv_get(dev);
2763
2764 iopf_for_domain_remove(info->domain ? &info->domain->domain : NULL, dev);
2765 device_block_translation(dev);
2766 return 0;
2767 }
2768
2769 static int blocking_domain_set_dev_pasid(struct iommu_domain *domain,
2770 struct device *dev, ioasid_t pasid,
2771 struct iommu_domain *old);
2772
2773 static struct iommu_domain blocking_domain = {
2774 .type = IOMMU_DOMAIN_BLOCKED,
2775 .ops = &(const struct iommu_domain_ops) {
2776 .attach_dev = blocking_domain_attach_dev,
2777 .set_dev_pasid = blocking_domain_set_dev_pasid,
2778 }
2779 };
2780
paging_domain_alloc(void)2781 static struct dmar_domain *paging_domain_alloc(void)
2782 {
2783 struct dmar_domain *domain;
2784
2785 domain = kzalloc(sizeof(*domain), GFP_KERNEL);
2786 if (!domain)
2787 return ERR_PTR(-ENOMEM);
2788
2789 INIT_LIST_HEAD(&domain->devices);
2790 INIT_LIST_HEAD(&domain->dev_pasids);
2791 INIT_LIST_HEAD(&domain->cache_tags);
2792 spin_lock_init(&domain->lock);
2793 spin_lock_init(&domain->cache_lock);
2794 xa_init(&domain->iommu_array);
2795 INIT_LIST_HEAD(&domain->s1_domains);
2796 spin_lock_init(&domain->s1_lock);
2797
2798 return domain;
2799 }
2800
compute_vasz_lg2_fs(struct intel_iommu * iommu,unsigned int * top_level)2801 static unsigned int compute_vasz_lg2_fs(struct intel_iommu *iommu,
2802 unsigned int *top_level)
2803 {
2804 unsigned int mgaw = cap_mgaw(iommu->cap);
2805
2806 /*
2807 * Spec 3.6 First-Stage Translation:
2808 *
2809 * Software must limit addresses to less than the minimum of MGAW
2810 * and the lower canonical address width implied by FSPM (i.e.,
2811 * 47-bit when FSPM is 4-level and 56-bit when FSPM is 5-level).
2812 */
2813 if (mgaw > 48 && cap_fl5lp_support(iommu->cap)) {
2814 *top_level = 4;
2815 return min(57, mgaw);
2816 }
2817
2818 /* Four level is always supported */
2819 *top_level = 3;
2820 return min(48, mgaw);
2821 }
2822
2823 static struct iommu_domain *
intel_iommu_domain_alloc_first_stage(struct device * dev,struct intel_iommu * iommu,u32 flags)2824 intel_iommu_domain_alloc_first_stage(struct device *dev,
2825 struct intel_iommu *iommu, u32 flags)
2826 {
2827 struct pt_iommu_x86_64_cfg cfg = {};
2828 struct dmar_domain *dmar_domain;
2829 int ret;
2830
2831 if (flags & ~IOMMU_HWPT_ALLOC_PASID)
2832 return ERR_PTR(-EOPNOTSUPP);
2833
2834 /* Only SL is available in legacy mode */
2835 if (!sm_supported(iommu) || !ecap_flts(iommu->ecap))
2836 return ERR_PTR(-EOPNOTSUPP);
2837
2838 dmar_domain = paging_domain_alloc();
2839 if (IS_ERR(dmar_domain))
2840 return ERR_CAST(dmar_domain);
2841
2842 cfg.common.hw_max_vasz_lg2 =
2843 compute_vasz_lg2_fs(iommu, &cfg.top_level);
2844 cfg.common.hw_max_oasz_lg2 = 52;
2845 cfg.common.features = BIT(PT_FEAT_SIGN_EXTEND) |
2846 BIT(PT_FEAT_FLUSH_RANGE);
2847 /* First stage always uses scalable mode */
2848 if (!ecap_smpwc(iommu->ecap))
2849 cfg.common.features |= BIT(PT_FEAT_DMA_INCOHERENT);
2850 dmar_domain->iommu.iommu_device = dev;
2851 dmar_domain->iommu.nid = dev_to_node(dev);
2852 dmar_domain->domain.ops = &intel_fs_paging_domain_ops;
2853 /*
2854 * iotlb sync for map is only needed for legacy implementations that
2855 * explicitly require flushing internal write buffers to ensure memory
2856 * coherence.
2857 */
2858 if (rwbf_required(iommu))
2859 dmar_domain->iotlb_sync_map = true;
2860
2861 ret = pt_iommu_x86_64_init(&dmar_domain->fspt, &cfg, GFP_KERNEL);
2862 if (ret) {
2863 kfree(dmar_domain);
2864 return ERR_PTR(ret);
2865 }
2866
2867 if (!cap_fl1gp_support(iommu->cap))
2868 dmar_domain->domain.pgsize_bitmap &= ~(u64)SZ_1G;
2869 if (!intel_iommu_superpage)
2870 dmar_domain->domain.pgsize_bitmap = SZ_4K;
2871
2872 return &dmar_domain->domain;
2873 }
2874
compute_vasz_lg2_ss(struct intel_iommu * iommu,unsigned int * top_level)2875 static unsigned int compute_vasz_lg2_ss(struct intel_iommu *iommu,
2876 unsigned int *top_level)
2877 {
2878 unsigned int sagaw = cap_sagaw(iommu->cap);
2879 unsigned int mgaw = cap_mgaw(iommu->cap);
2880
2881 /*
2882 * Find the largest table size that both the mgaw and sagaw support.
2883 * This sets the valid range of IOVA and the top starting level.
2884 * Some HW may only support a 4 or 5 level walk but must limit IOVA to
2885 * 3 levels.
2886 */
2887 if (mgaw > 48 && sagaw >= BIT(3)) {
2888 *top_level = 4;
2889 return min(57, mgaw);
2890 } else if (mgaw > 39 && sagaw >= BIT(2)) {
2891 *top_level = 3 + ffs(sagaw >> 3);
2892 return min(48, mgaw);
2893 } else if (mgaw > 30 && sagaw >= BIT(1)) {
2894 *top_level = 2 + ffs(sagaw >> 2);
2895 return min(39, mgaw);
2896 }
2897 return 0;
2898 }
2899
2900 static const struct iommu_dirty_ops intel_second_stage_dirty_ops = {
2901 IOMMU_PT_DIRTY_OPS(vtdss),
2902 .set_dirty_tracking = intel_iommu_set_dirty_tracking,
2903 };
2904
2905 static struct iommu_domain *
intel_iommu_domain_alloc_second_stage(struct device * dev,struct intel_iommu * iommu,u32 flags)2906 intel_iommu_domain_alloc_second_stage(struct device *dev,
2907 struct intel_iommu *iommu, u32 flags)
2908 {
2909 struct pt_iommu_vtdss_cfg cfg = {};
2910 struct dmar_domain *dmar_domain;
2911 unsigned int sslps;
2912 int ret;
2913
2914 if (flags &
2915 (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
2916 IOMMU_HWPT_ALLOC_PASID)))
2917 return ERR_PTR(-EOPNOTSUPP);
2918
2919 if (((flags & IOMMU_HWPT_ALLOC_NEST_PARENT) &&
2920 !nested_supported(iommu)) ||
2921 ((flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) &&
2922 !ssads_supported(iommu)))
2923 return ERR_PTR(-EOPNOTSUPP);
2924
2925 /* Legacy mode always supports second stage */
2926 if (sm_supported(iommu) && !ecap_slts(iommu->ecap))
2927 return ERR_PTR(-EOPNOTSUPP);
2928
2929 dmar_domain = paging_domain_alloc();
2930 if (IS_ERR(dmar_domain))
2931 return ERR_CAST(dmar_domain);
2932
2933 cfg.common.hw_max_vasz_lg2 = compute_vasz_lg2_ss(iommu, &cfg.top_level);
2934 cfg.common.hw_max_oasz_lg2 = 52;
2935 cfg.common.features = BIT(PT_FEAT_FLUSH_RANGE);
2936
2937 /*
2938 * Read-only mapping is disallowed on the domain which serves as the
2939 * parent in a nested configuration, due to HW errata
2940 * (ERRATA_772415_SPR17)
2941 */
2942 if (flags & IOMMU_HWPT_ALLOC_NEST_PARENT)
2943 cfg.common.features |= BIT(PT_FEAT_VTDSS_FORCE_WRITEABLE);
2944
2945 if (!iommu_paging_structure_coherency(iommu))
2946 cfg.common.features |= BIT(PT_FEAT_DMA_INCOHERENT);
2947 dmar_domain->iommu.iommu_device = dev;
2948 dmar_domain->iommu.nid = dev_to_node(dev);
2949 dmar_domain->domain.ops = &intel_ss_paging_domain_ops;
2950 dmar_domain->nested_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT;
2951
2952 if (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING)
2953 dmar_domain->domain.dirty_ops = &intel_second_stage_dirty_ops;
2954
2955 ret = pt_iommu_vtdss_init(&dmar_domain->sspt, &cfg, GFP_KERNEL);
2956 if (ret) {
2957 kfree(dmar_domain);
2958 return ERR_PTR(ret);
2959 }
2960
2961 /* Adjust the supported page sizes to HW capability */
2962 sslps = cap_super_page_val(iommu->cap);
2963 if (!(sslps & BIT(0)))
2964 dmar_domain->domain.pgsize_bitmap &= ~(u64)SZ_2M;
2965 if (!(sslps & BIT(1)))
2966 dmar_domain->domain.pgsize_bitmap &= ~(u64)SZ_1G;
2967 if (!intel_iommu_superpage)
2968 dmar_domain->domain.pgsize_bitmap = SZ_4K;
2969
2970 /*
2971 * Besides the internal write buffer flush, the caching mode used for
2972 * legacy nested translation (which utilizes shadowing page tables)
2973 * also requires iotlb sync on map.
2974 */
2975 if (rwbf_required(iommu) || cap_caching_mode(iommu->cap))
2976 dmar_domain->iotlb_sync_map = true;
2977
2978 return &dmar_domain->domain;
2979 }
2980
2981 static struct iommu_domain *
intel_iommu_domain_alloc_paging_flags(struct device * dev,u32 flags,const struct iommu_user_data * user_data)2982 intel_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags,
2983 const struct iommu_user_data *user_data)
2984 {
2985 struct device_domain_info *info = dev_iommu_priv_get(dev);
2986 struct intel_iommu *iommu = info->iommu;
2987 struct iommu_domain *domain;
2988
2989 if (user_data)
2990 return ERR_PTR(-EOPNOTSUPP);
2991
2992 /* Prefer first stage if possible by default. */
2993 domain = intel_iommu_domain_alloc_first_stage(dev, iommu, flags);
2994 if (domain != ERR_PTR(-EOPNOTSUPP))
2995 return domain;
2996 return intel_iommu_domain_alloc_second_stage(dev, iommu, flags);
2997 }
2998
intel_iommu_domain_free(struct iommu_domain * domain)2999 static void intel_iommu_domain_free(struct iommu_domain *domain)
3000 {
3001 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3002
3003 if (WARN_ON(dmar_domain->nested_parent &&
3004 !list_empty(&dmar_domain->s1_domains)))
3005 return;
3006
3007 if (WARN_ON(!list_empty(&dmar_domain->devices)))
3008 return;
3009
3010 pt_iommu_deinit(&dmar_domain->iommu);
3011
3012 kfree(dmar_domain->qi_batch);
3013 kfree(dmar_domain);
3014 }
3015
paging_domain_compatible_first_stage(struct dmar_domain * dmar_domain,struct intel_iommu * iommu)3016 static int paging_domain_compatible_first_stage(struct dmar_domain *dmar_domain,
3017 struct intel_iommu *iommu)
3018 {
3019 if (WARN_ON(dmar_domain->domain.dirty_ops ||
3020 dmar_domain->nested_parent))
3021 return -EINVAL;
3022
3023 /* Only SL is available in legacy mode */
3024 if (!sm_supported(iommu) || !ecap_flts(iommu->ecap))
3025 return -EINVAL;
3026
3027 if (!ecap_smpwc(iommu->ecap) &&
3028 !(dmar_domain->fspt.x86_64_pt.common.features &
3029 BIT(PT_FEAT_DMA_INCOHERENT)))
3030 return -EINVAL;
3031
3032 /* Supports the number of table levels */
3033 if (!cap_fl5lp_support(iommu->cap) &&
3034 dmar_domain->fspt.x86_64_pt.common.max_vasz_lg2 > 48)
3035 return -EINVAL;
3036
3037 /* Same page size support */
3038 if (!cap_fl1gp_support(iommu->cap) &&
3039 (dmar_domain->domain.pgsize_bitmap & SZ_1G))
3040 return -EINVAL;
3041
3042 /* iotlb sync on map requirement */
3043 if ((rwbf_required(iommu)) && !dmar_domain->iotlb_sync_map)
3044 return -EINVAL;
3045
3046 return 0;
3047 }
3048
3049 static int
paging_domain_compatible_second_stage(struct dmar_domain * dmar_domain,struct intel_iommu * iommu)3050 paging_domain_compatible_second_stage(struct dmar_domain *dmar_domain,
3051 struct intel_iommu *iommu)
3052 {
3053 unsigned int vasz_lg2 = dmar_domain->sspt.vtdss_pt.common.max_vasz_lg2;
3054 unsigned int sslps = cap_super_page_val(iommu->cap);
3055 struct pt_iommu_vtdss_hw_info pt_info;
3056
3057 pt_iommu_vtdss_hw_info(&dmar_domain->sspt, &pt_info);
3058
3059 if (dmar_domain->domain.dirty_ops && !ssads_supported(iommu))
3060 return -EINVAL;
3061 if (dmar_domain->nested_parent && !nested_supported(iommu))
3062 return -EINVAL;
3063
3064 /* Legacy mode always supports second stage */
3065 if (sm_supported(iommu) && !ecap_slts(iommu->ecap))
3066 return -EINVAL;
3067
3068 if (!iommu_paging_structure_coherency(iommu) &&
3069 !(dmar_domain->sspt.vtdss_pt.common.features &
3070 BIT(PT_FEAT_DMA_INCOHERENT)))
3071 return -EINVAL;
3072
3073 /* Address width falls within the capability */
3074 if (cap_mgaw(iommu->cap) < vasz_lg2)
3075 return -EINVAL;
3076
3077 /* Page table level is supported. */
3078 if (!(cap_sagaw(iommu->cap) & BIT(pt_info.aw)))
3079 return -EINVAL;
3080
3081 /* Same page size support */
3082 if (!(sslps & BIT(0)) && (dmar_domain->domain.pgsize_bitmap & SZ_2M))
3083 return -EINVAL;
3084 if (!(sslps & BIT(1)) && (dmar_domain->domain.pgsize_bitmap & SZ_1G))
3085 return -EINVAL;
3086
3087 /* iotlb sync on map requirement */
3088 if ((rwbf_required(iommu) || cap_caching_mode(iommu->cap)) &&
3089 !dmar_domain->iotlb_sync_map)
3090 return -EINVAL;
3091
3092 /*
3093 * FIXME this is locked wrong, it needs to be under the
3094 * dmar_domain->lock
3095 */
3096 if ((dmar_domain->sspt.vtdss_pt.common.features &
3097 BIT(PT_FEAT_VTDSS_FORCE_COHERENCE)) &&
3098 !ecap_sc_support(iommu->ecap))
3099 return -EINVAL;
3100 return 0;
3101 }
3102
paging_domain_compatible(struct iommu_domain * domain,struct device * dev)3103 int paging_domain_compatible(struct iommu_domain *domain, struct device *dev)
3104 {
3105 struct device_domain_info *info = dev_iommu_priv_get(dev);
3106 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3107 struct intel_iommu *iommu = info->iommu;
3108 int ret = -EINVAL;
3109
3110 if (intel_domain_is_fs_paging(dmar_domain))
3111 ret = paging_domain_compatible_first_stage(dmar_domain, iommu);
3112 else if (intel_domain_is_ss_paging(dmar_domain))
3113 ret = paging_domain_compatible_second_stage(dmar_domain, iommu);
3114 else if (WARN_ON(true))
3115 ret = -EINVAL;
3116 if (ret)
3117 return ret;
3118
3119 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) &&
3120 context_copied(iommu, info->bus, info->devfn))
3121 return intel_pasid_setup_sm_context(dev);
3122
3123 return 0;
3124 }
3125
intel_iommu_attach_device(struct iommu_domain * domain,struct device * dev,struct iommu_domain * old)3126 static int intel_iommu_attach_device(struct iommu_domain *domain,
3127 struct device *dev,
3128 struct iommu_domain *old)
3129 {
3130 int ret;
3131
3132 device_block_translation(dev);
3133
3134 ret = paging_domain_compatible(domain, dev);
3135 if (ret)
3136 return ret;
3137
3138 ret = iopf_for_domain_set(domain, dev);
3139 if (ret)
3140 return ret;
3141
3142 ret = dmar_domain_attach_device(to_dmar_domain(domain), dev);
3143 if (ret)
3144 iopf_for_domain_remove(domain, dev);
3145
3146 return ret;
3147 }
3148
intel_iommu_tlb_sync(struct iommu_domain * domain,struct iommu_iotlb_gather * gather)3149 static void intel_iommu_tlb_sync(struct iommu_domain *domain,
3150 struct iommu_iotlb_gather *gather)
3151 {
3152 cache_tag_flush_range(to_dmar_domain(domain), gather->start,
3153 gather->end,
3154 iommu_pages_list_empty(&gather->freelist));
3155 iommu_put_pages_list(&gather->freelist);
3156 }
3157
domain_support_force_snooping(struct dmar_domain * domain)3158 static bool domain_support_force_snooping(struct dmar_domain *domain)
3159 {
3160 struct device_domain_info *info;
3161 bool support = true;
3162
3163 assert_spin_locked(&domain->lock);
3164 list_for_each_entry(info, &domain->devices, link) {
3165 if (!ecap_sc_support(info->iommu->ecap)) {
3166 support = false;
3167 break;
3168 }
3169 }
3170
3171 return support;
3172 }
3173
intel_iommu_enforce_cache_coherency_fs(struct iommu_domain * domain)3174 static bool intel_iommu_enforce_cache_coherency_fs(struct iommu_domain *domain)
3175 {
3176 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3177 struct device_domain_info *info;
3178
3179 guard(spinlock_irqsave)(&dmar_domain->lock);
3180
3181 if (dmar_domain->force_snooping)
3182 return true;
3183
3184 if (!domain_support_force_snooping(dmar_domain))
3185 return false;
3186
3187 dmar_domain->force_snooping = true;
3188 list_for_each_entry(info, &dmar_domain->devices, link)
3189 intel_pasid_setup_page_snoop_control(info->iommu, info->dev,
3190 IOMMU_NO_PASID);
3191 return true;
3192 }
3193
intel_iommu_enforce_cache_coherency_ss(struct iommu_domain * domain)3194 static bool intel_iommu_enforce_cache_coherency_ss(struct iommu_domain *domain)
3195 {
3196 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3197
3198 guard(spinlock_irqsave)(&dmar_domain->lock);
3199 if (!domain_support_force_snooping(dmar_domain))
3200 return false;
3201
3202 /*
3203 * Second level page table supports per-PTE snoop control. The
3204 * iommu_map() interface will handle this by setting SNP bit.
3205 */
3206 dmar_domain->sspt.vtdss_pt.common.features |=
3207 BIT(PT_FEAT_VTDSS_FORCE_COHERENCE);
3208 dmar_domain->force_snooping = true;
3209 return true;
3210 }
3211
intel_iommu_capable(struct device * dev,enum iommu_cap cap)3212 static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap)
3213 {
3214 struct device_domain_info *info = dev_iommu_priv_get(dev);
3215
3216 switch (cap) {
3217 case IOMMU_CAP_CACHE_COHERENCY:
3218 case IOMMU_CAP_DEFERRED_FLUSH:
3219 return true;
3220 case IOMMU_CAP_PRE_BOOT_PROTECTION:
3221 return dmar_platform_optin();
3222 case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
3223 return ecap_sc_support(info->iommu->ecap);
3224 case IOMMU_CAP_DIRTY_TRACKING:
3225 return ssads_supported(info->iommu);
3226 default:
3227 return false;
3228 }
3229 }
3230
intel_iommu_probe_device(struct device * dev)3231 static struct iommu_device *intel_iommu_probe_device(struct device *dev)
3232 {
3233 struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL;
3234 struct device_domain_info *info;
3235 struct intel_iommu *iommu;
3236 u8 bus, devfn;
3237 int ret;
3238
3239 iommu = device_lookup_iommu(dev, &bus, &devfn);
3240 if (!iommu || !iommu->iommu.ops)
3241 return ERR_PTR(-ENODEV);
3242
3243 info = kzalloc(sizeof(*info), GFP_KERNEL);
3244 if (!info)
3245 return ERR_PTR(-ENOMEM);
3246
3247 if (dev_is_real_dma_subdevice(dev)) {
3248 info->bus = pdev->bus->number;
3249 info->devfn = pdev->devfn;
3250 info->segment = pci_domain_nr(pdev->bus);
3251 } else {
3252 info->bus = bus;
3253 info->devfn = devfn;
3254 info->segment = iommu->segment;
3255 }
3256
3257 info->dev = dev;
3258 info->iommu = iommu;
3259 if (dev_is_pci(dev)) {
3260 if (ecap_dev_iotlb_support(iommu->ecap) &&
3261 pci_ats_supported(pdev) &&
3262 dmar_ats_supported(pdev, iommu)) {
3263 info->ats_supported = 1;
3264 info->dtlb_extra_inval = dev_needs_extra_dtlb_flush(pdev);
3265
3266 /*
3267 * For IOMMU that supports device IOTLB throttling
3268 * (DIT), we assign PFSID to the invalidation desc
3269 * of a VF such that IOMMU HW can gauge queue depth
3270 * at PF level. If DIT is not set, PFSID will be
3271 * treated as reserved, which should be set to 0.
3272 */
3273 if (ecap_dit(iommu->ecap))
3274 info->pfsid = pci_dev_id(pci_physfn(pdev));
3275 info->ats_qdep = pci_ats_queue_depth(pdev);
3276 }
3277 if (sm_supported(iommu)) {
3278 if (pasid_supported(iommu)) {
3279 int features = pci_pasid_features(pdev);
3280
3281 if (features >= 0)
3282 info->pasid_supported = features | 1;
3283 }
3284
3285 if (info->ats_supported && ecap_prs(iommu->ecap) &&
3286 ecap_pds(iommu->ecap) && pci_pri_supported(pdev))
3287 info->pri_supported = 1;
3288 }
3289 }
3290
3291 dev_iommu_priv_set(dev, info);
3292 if (pdev && pci_ats_supported(pdev)) {
3293 pci_prepare_ats(pdev, VTD_PAGE_SHIFT);
3294 ret = device_rbtree_insert(iommu, info);
3295 if (ret)
3296 goto free;
3297 }
3298
3299 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
3300 ret = intel_pasid_alloc_table(dev);
3301 if (ret) {
3302 dev_err(dev, "PASID table allocation failed\n");
3303 goto clear_rbtree;
3304 }
3305
3306 if (!context_copied(iommu, info->bus, info->devfn)) {
3307 ret = intel_pasid_setup_sm_context(dev);
3308 if (ret)
3309 goto free_table;
3310 }
3311 }
3312
3313 intel_iommu_debugfs_create_dev(info);
3314
3315 return &iommu->iommu;
3316 free_table:
3317 intel_pasid_free_table(dev);
3318 clear_rbtree:
3319 device_rbtree_remove(info);
3320 free:
3321 kfree(info);
3322
3323 return ERR_PTR(ret);
3324 }
3325
intel_iommu_probe_finalize(struct device * dev)3326 static void intel_iommu_probe_finalize(struct device *dev)
3327 {
3328 struct device_domain_info *info = dev_iommu_priv_get(dev);
3329 struct intel_iommu *iommu = info->iommu;
3330
3331 /*
3332 * The PCIe spec, in its wisdom, declares that the behaviour of the
3333 * device is undefined if you enable PASID support after ATS support.
3334 * So always enable PASID support on devices which have it, even if
3335 * we can't yet know if we're ever going to use it.
3336 */
3337 if (info->pasid_supported &&
3338 !pci_enable_pasid(to_pci_dev(dev), info->pasid_supported & ~1))
3339 info->pasid_enabled = 1;
3340
3341 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
3342 iommu_enable_pci_ats(info);
3343 /* Assign a DEVTLB cache tag to the default domain. */
3344 if (info->ats_enabled && info->domain) {
3345 u16 did = domain_id_iommu(info->domain, iommu);
3346
3347 if (cache_tag_assign(info->domain, did, dev,
3348 IOMMU_NO_PASID, CACHE_TAG_DEVTLB))
3349 iommu_disable_pci_ats(info);
3350 }
3351 }
3352 iommu_enable_pci_pri(info);
3353 }
3354
intel_iommu_release_device(struct device * dev)3355 static void intel_iommu_release_device(struct device *dev)
3356 {
3357 struct device_domain_info *info = dev_iommu_priv_get(dev);
3358 struct intel_iommu *iommu = info->iommu;
3359
3360 iommu_disable_pci_pri(info);
3361 iommu_disable_pci_ats(info);
3362
3363 if (info->pasid_enabled) {
3364 pci_disable_pasid(to_pci_dev(dev));
3365 info->pasid_enabled = 0;
3366 }
3367
3368 mutex_lock(&iommu->iopf_lock);
3369 if (dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev)))
3370 device_rbtree_remove(info);
3371 mutex_unlock(&iommu->iopf_lock);
3372
3373 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) &&
3374 !context_copied(iommu, info->bus, info->devfn))
3375 intel_pasid_teardown_sm_context(dev);
3376
3377 intel_pasid_free_table(dev);
3378 intel_iommu_debugfs_remove_dev(info);
3379 kfree(info);
3380 }
3381
intel_iommu_get_resv_regions(struct device * device,struct list_head * head)3382 static void intel_iommu_get_resv_regions(struct device *device,
3383 struct list_head *head)
3384 {
3385 int prot = DMA_PTE_READ | DMA_PTE_WRITE;
3386 struct iommu_resv_region *reg;
3387 struct dmar_rmrr_unit *rmrr;
3388 struct device *i_dev;
3389 int i;
3390
3391 rcu_read_lock();
3392 for_each_rmrr_units(rmrr) {
3393 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3394 i, i_dev) {
3395 struct iommu_resv_region *resv;
3396 enum iommu_resv_type type;
3397 size_t length;
3398
3399 if (i_dev != device &&
3400 !is_downstream_to_pci_bridge(device, i_dev))
3401 continue;
3402
3403 length = rmrr->end_address - rmrr->base_address + 1;
3404
3405 type = device_rmrr_is_relaxable(device) ?
3406 IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT;
3407
3408 resv = iommu_alloc_resv_region(rmrr->base_address,
3409 length, prot, type,
3410 GFP_ATOMIC);
3411 if (!resv)
3412 break;
3413
3414 list_add_tail(&resv->list, head);
3415 }
3416 }
3417 rcu_read_unlock();
3418
3419 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
3420 if (dev_is_pci(device)) {
3421 struct pci_dev *pdev = to_pci_dev(device);
3422
3423 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
3424 reg = iommu_alloc_resv_region(0, 1UL << 24, prot,
3425 IOMMU_RESV_DIRECT_RELAXABLE,
3426 GFP_KERNEL);
3427 if (reg)
3428 list_add_tail(®->list, head);
3429 }
3430 }
3431 #endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
3432
3433 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
3434 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
3435 0, IOMMU_RESV_MSI, GFP_KERNEL);
3436 if (!reg)
3437 return;
3438 list_add_tail(®->list, head);
3439 }
3440
intel_iommu_device_group(struct device * dev)3441 static struct iommu_group *intel_iommu_device_group(struct device *dev)
3442 {
3443 if (dev_is_pci(dev))
3444 return pci_device_group(dev);
3445 return generic_device_group(dev);
3446 }
3447
intel_iommu_enable_iopf(struct device * dev)3448 int intel_iommu_enable_iopf(struct device *dev)
3449 {
3450 struct device_domain_info *info = dev_iommu_priv_get(dev);
3451 struct intel_iommu *iommu = info->iommu;
3452 int ret;
3453
3454 if (!info->pri_enabled)
3455 return -ENODEV;
3456
3457 /* pri_enabled is protected by the group mutex. */
3458 iommu_group_mutex_assert(dev);
3459 if (info->iopf_refcount) {
3460 info->iopf_refcount++;
3461 return 0;
3462 }
3463
3464 ret = iopf_queue_add_device(iommu->iopf_queue, dev);
3465 if (ret)
3466 return ret;
3467
3468 info->iopf_refcount = 1;
3469
3470 return 0;
3471 }
3472
intel_iommu_disable_iopf(struct device * dev)3473 void intel_iommu_disable_iopf(struct device *dev)
3474 {
3475 struct device_domain_info *info = dev_iommu_priv_get(dev);
3476 struct intel_iommu *iommu = info->iommu;
3477
3478 if (WARN_ON(!info->pri_enabled || !info->iopf_refcount))
3479 return;
3480
3481 iommu_group_mutex_assert(dev);
3482 if (--info->iopf_refcount)
3483 return;
3484
3485 iopf_queue_remove_device(iommu->iopf_queue, dev);
3486 }
3487
intel_iommu_is_attach_deferred(struct device * dev)3488 static bool intel_iommu_is_attach_deferred(struct device *dev)
3489 {
3490 struct device_domain_info *info = dev_iommu_priv_get(dev);
3491
3492 return translation_pre_enabled(info->iommu) && !info->domain;
3493 }
3494
3495 /*
3496 * Check that the device does not live on an external facing PCI port that is
3497 * marked as untrusted. Such devices should not be able to apply quirks and
3498 * thus not be able to bypass the IOMMU restrictions.
3499 */
risky_device(struct pci_dev * pdev)3500 static bool risky_device(struct pci_dev *pdev)
3501 {
3502 if (pdev->untrusted) {
3503 pci_info(pdev,
3504 "Skipping IOMMU quirk for dev [%04X:%04X] on untrusted PCI link\n",
3505 pdev->vendor, pdev->device);
3506 pci_info(pdev, "Please check with your BIOS/Platform vendor about this\n");
3507 return true;
3508 }
3509 return false;
3510 }
3511
intel_iommu_iotlb_sync_map(struct iommu_domain * domain,unsigned long iova,size_t size)3512 static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
3513 unsigned long iova, size_t size)
3514 {
3515 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3516
3517 if (dmar_domain->iotlb_sync_map)
3518 cache_tag_flush_range_np(dmar_domain, iova, iova + size - 1);
3519
3520 return 0;
3521 }
3522
domain_remove_dev_pasid(struct iommu_domain * domain,struct device * dev,ioasid_t pasid)3523 void domain_remove_dev_pasid(struct iommu_domain *domain,
3524 struct device *dev, ioasid_t pasid)
3525 {
3526 struct device_domain_info *info = dev_iommu_priv_get(dev);
3527 struct dev_pasid_info *curr, *dev_pasid = NULL;
3528 struct intel_iommu *iommu = info->iommu;
3529 struct dmar_domain *dmar_domain;
3530 unsigned long flags;
3531
3532 if (!domain)
3533 return;
3534
3535 /* Identity domain has no meta data for pasid. */
3536 if (domain->type == IOMMU_DOMAIN_IDENTITY)
3537 return;
3538
3539 dmar_domain = to_dmar_domain(domain);
3540 spin_lock_irqsave(&dmar_domain->lock, flags);
3541 list_for_each_entry(curr, &dmar_domain->dev_pasids, link_domain) {
3542 if (curr->dev == dev && curr->pasid == pasid) {
3543 list_del(&curr->link_domain);
3544 dev_pasid = curr;
3545 break;
3546 }
3547 }
3548 spin_unlock_irqrestore(&dmar_domain->lock, flags);
3549
3550 cache_tag_unassign_domain(dmar_domain, dev, pasid);
3551 domain_detach_iommu(dmar_domain, iommu);
3552 if (!WARN_ON_ONCE(!dev_pasid)) {
3553 intel_iommu_debugfs_remove_dev_pasid(dev_pasid);
3554 kfree(dev_pasid);
3555 }
3556 }
3557
blocking_domain_set_dev_pasid(struct iommu_domain * domain,struct device * dev,ioasid_t pasid,struct iommu_domain * old)3558 static int blocking_domain_set_dev_pasid(struct iommu_domain *domain,
3559 struct device *dev, ioasid_t pasid,
3560 struct iommu_domain *old)
3561 {
3562 struct device_domain_info *info = dev_iommu_priv_get(dev);
3563
3564 intel_pasid_tear_down_entry(info->iommu, dev, pasid, false);
3565 iopf_for_domain_remove(old, dev);
3566 domain_remove_dev_pasid(old, dev, pasid);
3567
3568 return 0;
3569 }
3570
3571 struct dev_pasid_info *
domain_add_dev_pasid(struct iommu_domain * domain,struct device * dev,ioasid_t pasid)3572 domain_add_dev_pasid(struct iommu_domain *domain,
3573 struct device *dev, ioasid_t pasid)
3574 {
3575 struct device_domain_info *info = dev_iommu_priv_get(dev);
3576 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3577 struct intel_iommu *iommu = info->iommu;
3578 struct dev_pasid_info *dev_pasid;
3579 unsigned long flags;
3580 int ret;
3581
3582 dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL);
3583 if (!dev_pasid)
3584 return ERR_PTR(-ENOMEM);
3585
3586 ret = domain_attach_iommu(dmar_domain, iommu);
3587 if (ret)
3588 goto out_free;
3589
3590 ret = cache_tag_assign_domain(dmar_domain, dev, pasid);
3591 if (ret)
3592 goto out_detach_iommu;
3593
3594 dev_pasid->dev = dev;
3595 dev_pasid->pasid = pasid;
3596 spin_lock_irqsave(&dmar_domain->lock, flags);
3597 list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids);
3598 spin_unlock_irqrestore(&dmar_domain->lock, flags);
3599
3600 return dev_pasid;
3601 out_detach_iommu:
3602 domain_detach_iommu(dmar_domain, iommu);
3603 out_free:
3604 kfree(dev_pasid);
3605 return ERR_PTR(ret);
3606 }
3607
intel_iommu_set_dev_pasid(struct iommu_domain * domain,struct device * dev,ioasid_t pasid,struct iommu_domain * old)3608 static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
3609 struct device *dev, ioasid_t pasid,
3610 struct iommu_domain *old)
3611 {
3612 struct device_domain_info *info = dev_iommu_priv_get(dev);
3613 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3614 struct intel_iommu *iommu = info->iommu;
3615 struct dev_pasid_info *dev_pasid;
3616 int ret;
3617
3618 if (WARN_ON_ONCE(!(domain->type & __IOMMU_DOMAIN_PAGING)))
3619 return -EINVAL;
3620
3621 if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
3622 return -EOPNOTSUPP;
3623
3624 if (domain->dirty_ops)
3625 return -EINVAL;
3626
3627 if (context_copied(iommu, info->bus, info->devfn))
3628 return -EBUSY;
3629
3630 ret = paging_domain_compatible(domain, dev);
3631 if (ret)
3632 return ret;
3633
3634 dev_pasid = domain_add_dev_pasid(domain, dev, pasid);
3635 if (IS_ERR(dev_pasid))
3636 return PTR_ERR(dev_pasid);
3637
3638 ret = iopf_for_domain_replace(domain, old, dev);
3639 if (ret)
3640 goto out_remove_dev_pasid;
3641
3642 if (intel_domain_is_fs_paging(dmar_domain))
3643 ret = domain_setup_first_level(iommu, dmar_domain,
3644 dev, pasid, old);
3645 else if (intel_domain_is_ss_paging(dmar_domain))
3646 ret = domain_setup_second_level(iommu, dmar_domain,
3647 dev, pasid, old);
3648 else if (WARN_ON(true))
3649 ret = -EINVAL;
3650
3651 if (ret)
3652 goto out_unwind_iopf;
3653
3654 domain_remove_dev_pasid(old, dev, pasid);
3655
3656 intel_iommu_debugfs_create_dev_pasid(dev_pasid);
3657
3658 return 0;
3659
3660 out_unwind_iopf:
3661 iopf_for_domain_replace(old, domain, dev);
3662 out_remove_dev_pasid:
3663 domain_remove_dev_pasid(domain, dev, pasid);
3664 return ret;
3665 }
3666
intel_iommu_hw_info(struct device * dev,u32 * length,enum iommu_hw_info_type * type)3667 static void *intel_iommu_hw_info(struct device *dev, u32 *length,
3668 enum iommu_hw_info_type *type)
3669 {
3670 struct device_domain_info *info = dev_iommu_priv_get(dev);
3671 struct intel_iommu *iommu = info->iommu;
3672 struct iommu_hw_info_vtd *vtd;
3673
3674 if (*type != IOMMU_HW_INFO_TYPE_DEFAULT &&
3675 *type != IOMMU_HW_INFO_TYPE_INTEL_VTD)
3676 return ERR_PTR(-EOPNOTSUPP);
3677
3678 vtd = kzalloc(sizeof(*vtd), GFP_KERNEL);
3679 if (!vtd)
3680 return ERR_PTR(-ENOMEM);
3681
3682 vtd->flags = IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17;
3683 vtd->cap_reg = iommu->cap;
3684 vtd->ecap_reg = iommu->ecap;
3685 *length = sizeof(*vtd);
3686 *type = IOMMU_HW_INFO_TYPE_INTEL_VTD;
3687 return vtd;
3688 }
3689
3690 /*
3691 * Set dirty tracking for the device list of a domain. The caller must
3692 * hold the domain->lock when calling it.
3693 */
device_set_dirty_tracking(struct list_head * devices,bool enable)3694 static int device_set_dirty_tracking(struct list_head *devices, bool enable)
3695 {
3696 struct device_domain_info *info;
3697 int ret = 0;
3698
3699 list_for_each_entry(info, devices, link) {
3700 ret = intel_pasid_setup_dirty_tracking(info->iommu, info->dev,
3701 IOMMU_NO_PASID, enable);
3702 if (ret)
3703 break;
3704 }
3705
3706 return ret;
3707 }
3708
parent_domain_set_dirty_tracking(struct dmar_domain * domain,bool enable)3709 static int parent_domain_set_dirty_tracking(struct dmar_domain *domain,
3710 bool enable)
3711 {
3712 struct dmar_domain *s1_domain;
3713 unsigned long flags;
3714 int ret;
3715
3716 spin_lock(&domain->s1_lock);
3717 list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) {
3718 spin_lock_irqsave(&s1_domain->lock, flags);
3719 ret = device_set_dirty_tracking(&s1_domain->devices, enable);
3720 spin_unlock_irqrestore(&s1_domain->lock, flags);
3721 if (ret)
3722 goto err_unwind;
3723 }
3724 spin_unlock(&domain->s1_lock);
3725 return 0;
3726
3727 err_unwind:
3728 list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) {
3729 spin_lock_irqsave(&s1_domain->lock, flags);
3730 device_set_dirty_tracking(&s1_domain->devices,
3731 domain->dirty_tracking);
3732 spin_unlock_irqrestore(&s1_domain->lock, flags);
3733 }
3734 spin_unlock(&domain->s1_lock);
3735 return ret;
3736 }
3737
intel_iommu_set_dirty_tracking(struct iommu_domain * domain,bool enable)3738 static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain,
3739 bool enable)
3740 {
3741 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3742 int ret;
3743
3744 spin_lock(&dmar_domain->lock);
3745 if (dmar_domain->dirty_tracking == enable)
3746 goto out_unlock;
3747
3748 ret = device_set_dirty_tracking(&dmar_domain->devices, enable);
3749 if (ret)
3750 goto err_unwind;
3751
3752 if (dmar_domain->nested_parent) {
3753 ret = parent_domain_set_dirty_tracking(dmar_domain, enable);
3754 if (ret)
3755 goto err_unwind;
3756 }
3757
3758 dmar_domain->dirty_tracking = enable;
3759 out_unlock:
3760 spin_unlock(&dmar_domain->lock);
3761
3762 return 0;
3763
3764 err_unwind:
3765 device_set_dirty_tracking(&dmar_domain->devices,
3766 dmar_domain->dirty_tracking);
3767 spin_unlock(&dmar_domain->lock);
3768 return ret;
3769 }
3770
context_setup_pass_through(struct device * dev,u8 bus,u8 devfn)3771 static int context_setup_pass_through(struct device *dev, u8 bus, u8 devfn)
3772 {
3773 struct device_domain_info *info = dev_iommu_priv_get(dev);
3774 struct intel_iommu *iommu = info->iommu;
3775 struct context_entry *context;
3776
3777 spin_lock(&iommu->lock);
3778 context = iommu_context_addr(iommu, bus, devfn, 1);
3779 if (!context) {
3780 spin_unlock(&iommu->lock);
3781 return -ENOMEM;
3782 }
3783
3784 if (context_present(context) && !context_copied(iommu, bus, devfn)) {
3785 spin_unlock(&iommu->lock);
3786 return 0;
3787 }
3788
3789 copied_context_tear_down(iommu, context, bus, devfn);
3790 context_clear_entry(context);
3791 context_set_domain_id(context, FLPT_DEFAULT_DID);
3792
3793 /*
3794 * In pass through mode, AW must be programmed to indicate the largest
3795 * AGAW value supported by hardware. And ASR is ignored by hardware.
3796 */
3797 context_set_address_width(context, iommu->msagaw);
3798 context_set_translation_type(context, CONTEXT_TT_PASS_THROUGH);
3799 context_set_fault_enable(context);
3800 context_set_present(context);
3801 if (!ecap_coherent(iommu->ecap))
3802 clflush_cache_range(context, sizeof(*context));
3803 context_present_cache_flush(iommu, FLPT_DEFAULT_DID, bus, devfn);
3804 spin_unlock(&iommu->lock);
3805
3806 return 0;
3807 }
3808
context_setup_pass_through_cb(struct pci_dev * pdev,u16 alias,void * data)3809 static int context_setup_pass_through_cb(struct pci_dev *pdev, u16 alias, void *data)
3810 {
3811 struct device *dev = data;
3812
3813 return context_setup_pass_through(dev, PCI_BUS_NUM(alias), alias & 0xff);
3814 }
3815
device_setup_pass_through(struct device * dev)3816 static int device_setup_pass_through(struct device *dev)
3817 {
3818 struct device_domain_info *info = dev_iommu_priv_get(dev);
3819
3820 if (!dev_is_pci(dev))
3821 return context_setup_pass_through(dev, info->bus, info->devfn);
3822
3823 return pci_for_each_dma_alias(to_pci_dev(dev),
3824 context_setup_pass_through_cb, dev);
3825 }
3826
identity_domain_attach_dev(struct iommu_domain * domain,struct device * dev,struct iommu_domain * old)3827 static int identity_domain_attach_dev(struct iommu_domain *domain,
3828 struct device *dev,
3829 struct iommu_domain *old)
3830 {
3831 struct device_domain_info *info = dev_iommu_priv_get(dev);
3832 struct intel_iommu *iommu = info->iommu;
3833 int ret;
3834
3835 device_block_translation(dev);
3836
3837 if (dev_is_real_dma_subdevice(dev))
3838 return 0;
3839
3840 /*
3841 * No PRI support with the global identity domain. No need to enable or
3842 * disable PRI in this path as the iommu has been put in the blocking
3843 * state.
3844 */
3845 if (sm_supported(iommu))
3846 ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID);
3847 else
3848 ret = device_setup_pass_through(dev);
3849
3850 if (!ret)
3851 info->domain_attached = true;
3852
3853 return ret;
3854 }
3855
identity_domain_set_dev_pasid(struct iommu_domain * domain,struct device * dev,ioasid_t pasid,struct iommu_domain * old)3856 static int identity_domain_set_dev_pasid(struct iommu_domain *domain,
3857 struct device *dev, ioasid_t pasid,
3858 struct iommu_domain *old)
3859 {
3860 struct device_domain_info *info = dev_iommu_priv_get(dev);
3861 struct intel_iommu *iommu = info->iommu;
3862 int ret;
3863
3864 if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
3865 return -EOPNOTSUPP;
3866
3867 ret = iopf_for_domain_replace(domain, old, dev);
3868 if (ret)
3869 return ret;
3870
3871 ret = domain_setup_passthrough(iommu, dev, pasid, old);
3872 if (ret) {
3873 iopf_for_domain_replace(old, domain, dev);
3874 return ret;
3875 }
3876
3877 domain_remove_dev_pasid(old, dev, pasid);
3878 return 0;
3879 }
3880
3881 static struct iommu_domain identity_domain = {
3882 .type = IOMMU_DOMAIN_IDENTITY,
3883 .ops = &(const struct iommu_domain_ops) {
3884 .attach_dev = identity_domain_attach_dev,
3885 .set_dev_pasid = identity_domain_set_dev_pasid,
3886 },
3887 };
3888
3889 const struct iommu_domain_ops intel_fs_paging_domain_ops = {
3890 IOMMU_PT_DOMAIN_OPS(x86_64),
3891 .attach_dev = intel_iommu_attach_device,
3892 .set_dev_pasid = intel_iommu_set_dev_pasid,
3893 .iotlb_sync_map = intel_iommu_iotlb_sync_map,
3894 .flush_iotlb_all = intel_flush_iotlb_all,
3895 .iotlb_sync = intel_iommu_tlb_sync,
3896 .free = intel_iommu_domain_free,
3897 .enforce_cache_coherency = intel_iommu_enforce_cache_coherency_fs,
3898 };
3899
3900 const struct iommu_domain_ops intel_ss_paging_domain_ops = {
3901 IOMMU_PT_DOMAIN_OPS(vtdss),
3902 .attach_dev = intel_iommu_attach_device,
3903 .set_dev_pasid = intel_iommu_set_dev_pasid,
3904 .iotlb_sync_map = intel_iommu_iotlb_sync_map,
3905 .flush_iotlb_all = intel_flush_iotlb_all,
3906 .iotlb_sync = intel_iommu_tlb_sync,
3907 .free = intel_iommu_domain_free,
3908 .enforce_cache_coherency = intel_iommu_enforce_cache_coherency_ss,
3909 };
3910
3911 const struct iommu_ops intel_iommu_ops = {
3912 .blocked_domain = &blocking_domain,
3913 .release_domain = &blocking_domain,
3914 .identity_domain = &identity_domain,
3915 .capable = intel_iommu_capable,
3916 .hw_info = intel_iommu_hw_info,
3917 .domain_alloc_paging_flags = intel_iommu_domain_alloc_paging_flags,
3918 .domain_alloc_sva = intel_svm_domain_alloc,
3919 .domain_alloc_nested = intel_iommu_domain_alloc_nested,
3920 .probe_device = intel_iommu_probe_device,
3921 .probe_finalize = intel_iommu_probe_finalize,
3922 .release_device = intel_iommu_release_device,
3923 .get_resv_regions = intel_iommu_get_resv_regions,
3924 .device_group = intel_iommu_device_group,
3925 .is_attach_deferred = intel_iommu_is_attach_deferred,
3926 .def_domain_type = device_def_domain_type,
3927 .page_response = intel_iommu_page_response,
3928 };
3929
quirk_iommu_igfx(struct pci_dev * dev)3930 static void quirk_iommu_igfx(struct pci_dev *dev)
3931 {
3932 if (risky_device(dev))
3933 return;
3934
3935 pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
3936 disable_igfx_iommu = 1;
3937 }
3938
3939 /* G4x/GM45 integrated gfx dmar support is totally busted. */
3940 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
3941 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
3942 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx);
3943 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx);
3944 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
3945 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
3946 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
3947
3948 /* QM57/QS57 integrated gfx malfunctions with dmar */
3949 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_iommu_igfx);
3950
3951 /* Broadwell igfx malfunctions with dmar */
3952 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
3953 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
3954 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx);
3955 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx);
3956 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx);
3957 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx);
3958 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx);
3959 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx);
3960 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx);
3961 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx);
3962 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx);
3963 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx);
3964 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx);
3965 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx);
3966 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx);
3967 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx);
3968 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx);
3969 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx);
3970 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx);
3971 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx);
3972 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx);
3973 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
3974 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
3975 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
3976
quirk_iommu_rwbf(struct pci_dev * dev)3977 static void quirk_iommu_rwbf(struct pci_dev *dev)
3978 {
3979 if (risky_device(dev))
3980 return;
3981
3982 /*
3983 * Mobile 4 Series Chipset neglects to set RWBF capability,
3984 * but needs it. Same seems to hold for the desktop versions.
3985 */
3986 pci_info(dev, "Forcing write-buffer flush capability\n");
3987 rwbf_quirk = 1;
3988 }
3989
3990 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
3991 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
3992 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
3993 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
3994 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
3995 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
3996 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
3997
3998 #define GGC 0x52
3999 #define GGC_MEMORY_SIZE_MASK (0xf << 8)
4000 #define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4001 #define GGC_MEMORY_SIZE_1M (0x1 << 8)
4002 #define GGC_MEMORY_SIZE_2M (0x3 << 8)
4003 #define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4004 #define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4005 #define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4006 #define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4007
quirk_calpella_no_shadow_gtt(struct pci_dev * dev)4008 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4009 {
4010 unsigned short ggc;
4011
4012 if (risky_device(dev))
4013 return;
4014
4015 if (pci_read_config_word(dev, GGC, &ggc))
4016 return;
4017
4018 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4019 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4020 disable_igfx_iommu = 1;
4021 } else if (!disable_igfx_iommu) {
4022 /* we have to ensure the gfx device is idle before we flush */
4023 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
4024 iommu_set_dma_strict();
4025 }
4026 }
4027 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4028 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4029 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4030
quirk_igfx_skip_te_disable(struct pci_dev * dev)4031 static void quirk_igfx_skip_te_disable(struct pci_dev *dev)
4032 {
4033 unsigned short ver;
4034
4035 if (!IS_GFX_DEVICE(dev))
4036 return;
4037
4038 ver = (dev->device >> 8) & 0xff;
4039 if (ver != 0x45 && ver != 0x46 && ver != 0x4c &&
4040 ver != 0x4e && ver != 0x8a && ver != 0x98 &&
4041 ver != 0x9a && ver != 0xa7 && ver != 0x7d)
4042 return;
4043
4044 if (risky_device(dev))
4045 return;
4046
4047 pci_info(dev, "Skip IOMMU disabling for graphics\n");
4048 iommu_skip_te_disable = 1;
4049 }
4050 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_igfx_skip_te_disable);
4051
4052 /* On Tylersburg chipsets, some BIOSes have been known to enable the
4053 ISOCH DMAR unit for the Azalia sound device, but not give it any
4054 TLB entries, which causes it to deadlock. Check for that. We do
4055 this in a function called from init_dmars(), instead of in a PCI
4056 quirk, because we don't want to print the obnoxious "BIOS broken"
4057 message if VT-d is actually disabled.
4058 */
check_tylersburg_isoch(void)4059 static void __init check_tylersburg_isoch(void)
4060 {
4061 struct pci_dev *pdev;
4062 uint32_t vtisochctrl;
4063
4064 /* If there's no Azalia in the system anyway, forget it. */
4065 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4066 if (!pdev)
4067 return;
4068
4069 if (risky_device(pdev)) {
4070 pci_dev_put(pdev);
4071 return;
4072 }
4073
4074 pci_dev_put(pdev);
4075
4076 /* System Management Registers. Might be hidden, in which case
4077 we can't do the sanity check. But that's OK, because the
4078 known-broken BIOSes _don't_ actually hide it, so far. */
4079 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4080 if (!pdev)
4081 return;
4082
4083 if (risky_device(pdev)) {
4084 pci_dev_put(pdev);
4085 return;
4086 }
4087
4088 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4089 pci_dev_put(pdev);
4090 return;
4091 }
4092
4093 pci_dev_put(pdev);
4094
4095 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4096 if (vtisochctrl & 1)
4097 return;
4098
4099 /* Drop all bits other than the number of TLB entries */
4100 vtisochctrl &= 0x1c;
4101
4102 /* If we have the recommended number of TLB entries (16), fine. */
4103 if (vtisochctrl == 0x10)
4104 return;
4105
4106 /* Zero TLB entries? You get to ride the short bus to school. */
4107 if (!vtisochctrl) {
4108 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4109 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4110 dmi_get_system_info(DMI_BIOS_VENDOR),
4111 dmi_get_system_info(DMI_BIOS_VERSION),
4112 dmi_get_system_info(DMI_PRODUCT_VERSION));
4113 iommu_identity_mapping |= IDENTMAP_AZALIA;
4114 return;
4115 }
4116
4117 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4118 vtisochctrl);
4119 }
4120
4121 /*
4122 * Here we deal with a device TLB defect where device may inadvertently issue ATS
4123 * invalidation completion before posted writes initiated with translated address
4124 * that utilized translations matching the invalidation address range, violating
4125 * the invalidation completion ordering.
4126 * Therefore, any use cases that cannot guarantee DMA is stopped before unmap is
4127 * vulnerable to this defect. In other words, any dTLB invalidation initiated not
4128 * under the control of the trusted/privileged host device driver must use this
4129 * quirk.
4130 * Device TLBs are invalidated under the following six conditions:
4131 * 1. Device driver does DMA API unmap IOVA
4132 * 2. Device driver unbind a PASID from a process, sva_unbind_device()
4133 * 3. PASID is torn down, after PASID cache is flushed. e.g. process
4134 * exit_mmap() due to crash
4135 * 4. Under SVA usage, called by mmu_notifier.invalidate_range() where
4136 * VM has to free pages that were unmapped
4137 * 5. Userspace driver unmaps a DMA buffer
4138 * 6. Cache invalidation in vSVA usage (upcoming)
4139 *
4140 * For #1 and #2, device drivers are responsible for stopping DMA traffic
4141 * before unmap/unbind. For #3, iommu driver gets mmu_notifier to
4142 * invalidate TLB the same way as normal user unmap which will use this quirk.
4143 * The dTLB invalidation after PASID cache flush does not need this quirk.
4144 *
4145 * As a reminder, #6 will *NEED* this quirk as we enable nested translation.
4146 */
quirk_extra_dev_tlb_flush(struct device_domain_info * info,unsigned long address,unsigned long mask,u32 pasid,u16 qdep)4147 void quirk_extra_dev_tlb_flush(struct device_domain_info *info,
4148 unsigned long address, unsigned long mask,
4149 u32 pasid, u16 qdep)
4150 {
4151 u16 sid;
4152
4153 if (likely(!info->dtlb_extra_inval))
4154 return;
4155
4156 sid = PCI_DEVID(info->bus, info->devfn);
4157 if (pasid == IOMMU_NO_PASID) {
4158 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
4159 qdep, address, mask);
4160 } else {
4161 qi_flush_dev_iotlb_pasid(info->iommu, sid, info->pfsid,
4162 pasid, qdep, address, mask);
4163 }
4164 }
4165
4166 #define ecmd_get_status_code(res) (((res) & 0xff) >> 1)
4167
4168 /*
4169 * Function to submit a command to the enhanced command interface. The
4170 * valid enhanced command descriptions are defined in Table 47 of the
4171 * VT-d spec. The VT-d hardware implementation may support some but not
4172 * all commands, which can be determined by checking the Enhanced
4173 * Command Capability Register.
4174 *
4175 * Return values:
4176 * - 0: Command successful without any error;
4177 * - Negative: software error value;
4178 * - Nonzero positive: failure status code defined in Table 48.
4179 */
ecmd_submit_sync(struct intel_iommu * iommu,u8 ecmd,u64 oa,u64 ob)4180 int ecmd_submit_sync(struct intel_iommu *iommu, u8 ecmd, u64 oa, u64 ob)
4181 {
4182 unsigned long flags;
4183 u64 res;
4184 int ret;
4185
4186 if (!cap_ecmds(iommu->cap))
4187 return -ENODEV;
4188
4189 raw_spin_lock_irqsave(&iommu->register_lock, flags);
4190
4191 res = dmar_readq(iommu->reg + DMAR_ECRSP_REG);
4192 if (res & DMA_ECMD_ECRSP_IP) {
4193 ret = -EBUSY;
4194 goto err;
4195 }
4196
4197 /*
4198 * Unconditionally write the operand B, because
4199 * - There is no side effect if an ecmd doesn't require an
4200 * operand B, but we set the register to some value.
4201 * - It's not invoked in any critical path. The extra MMIO
4202 * write doesn't bring any performance concerns.
4203 */
4204 dmar_writeq(iommu->reg + DMAR_ECEO_REG, ob);
4205 dmar_writeq(iommu->reg + DMAR_ECMD_REG, ecmd | (oa << DMA_ECMD_OA_SHIFT));
4206
4207 IOMMU_WAIT_OP(iommu, DMAR_ECRSP_REG, dmar_readq,
4208 !(res & DMA_ECMD_ECRSP_IP), res);
4209
4210 if (res & DMA_ECMD_ECRSP_IP) {
4211 ret = -ETIMEDOUT;
4212 goto err;
4213 }
4214
4215 ret = ecmd_get_status_code(res);
4216 err:
4217 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
4218
4219 return ret;
4220 }
4221
4222 MODULE_IMPORT_NS("GENERIC_PT_IOMMU");
4223