xref: /linux/drivers/iommu/intel/iommu.c (revision dd8a3c6cd531dca5917111a94fa3074077f6ba5a)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright © 2006-2014 Intel Corporation.
4  *
5  * Authors: David Woodhouse <dwmw2@infradead.org>,
6  *          Ashok Raj <ashok.raj@intel.com>,
7  *          Shaohua Li <shaohua.li@intel.com>,
8  *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
9  *          Fenghua Yu <fenghua.yu@intel.com>
10  *          Joerg Roedel <jroedel@suse.de>
11  */
12 
13 #define pr_fmt(fmt)     "DMAR: " fmt
14 #define dev_fmt(fmt)    pr_fmt(fmt)
15 
16 #include <linux/crash_dump.h>
17 #include <linux/dma-direct.h>
18 #include <linux/dmi.h>
19 #include <linux/memory.h>
20 #include <linux/pci.h>
21 #include <linux/pci-ats.h>
22 #include <linux/spinlock.h>
23 #include <linux/syscore_ops.h>
24 #include <linux/tboot.h>
25 #include <uapi/linux/iommufd.h>
26 
27 #include "iommu.h"
28 #include "../dma-iommu.h"
29 #include "../irq_remapping.h"
30 #include "../iommu-pages.h"
31 #include "pasid.h"
32 #include "perfmon.h"
33 
34 #define ROOT_SIZE		VTD_PAGE_SIZE
35 #define CONTEXT_SIZE		VTD_PAGE_SIZE
36 
37 #define IS_GFX_DEVICE(pdev) pci_is_display(pdev)
38 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
39 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
40 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
41 
42 #define IOAPIC_RANGE_START	(0xfee00000)
43 #define IOAPIC_RANGE_END	(0xfeefffff)
44 #define IOVA_START_ADDR		(0x1000)
45 
46 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
47 
48 static void __init check_tylersburg_isoch(void);
49 static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain,
50 					  bool enable);
51 static int rwbf_quirk;
52 
53 #define rwbf_required(iommu)	(rwbf_quirk || cap_rwbf((iommu)->cap))
54 
55 /*
56  * set to 1 to panic kernel if can't successfully enable VT-d
57  * (used when kernel is launched w/ TXT)
58  */
59 static int force_on = 0;
60 static int intel_iommu_tboot_noforce;
61 static int no_platform_optin;
62 
63 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
64 
65 /*
66  * Take a root_entry and return the Lower Context Table Pointer (LCTP)
67  * if marked present.
68  */
69 static phys_addr_t root_entry_lctp(struct root_entry *re)
70 {
71 	if (!(re->lo & 1))
72 		return 0;
73 
74 	return re->lo & VTD_PAGE_MASK;
75 }
76 
77 /*
78  * Take a root_entry and return the Upper Context Table Pointer (UCTP)
79  * if marked present.
80  */
81 static phys_addr_t root_entry_uctp(struct root_entry *re)
82 {
83 	if (!(re->hi & 1))
84 		return 0;
85 
86 	return re->hi & VTD_PAGE_MASK;
87 }
88 
89 static int device_rid_cmp_key(const void *key, const struct rb_node *node)
90 {
91 	struct device_domain_info *info =
92 		rb_entry(node, struct device_domain_info, node);
93 	const u16 *rid_lhs = key;
94 
95 	if (*rid_lhs < PCI_DEVID(info->bus, info->devfn))
96 		return -1;
97 
98 	if (*rid_lhs > PCI_DEVID(info->bus, info->devfn))
99 		return 1;
100 
101 	return 0;
102 }
103 
104 static int device_rid_cmp(struct rb_node *lhs, const struct rb_node *rhs)
105 {
106 	struct device_domain_info *info =
107 		rb_entry(lhs, struct device_domain_info, node);
108 	u16 key = PCI_DEVID(info->bus, info->devfn);
109 
110 	return device_rid_cmp_key(&key, rhs);
111 }
112 
113 /*
114  * Looks up an IOMMU-probed device using its source ID.
115  *
116  * Returns the pointer to the device if there is a match. Otherwise,
117  * returns NULL.
118  *
119  * Note that this helper doesn't guarantee that the device won't be
120  * released by the iommu subsystem after being returned. The caller
121  * should use its own synchronization mechanism to avoid the device
122  * being released during its use if its possibly the case.
123  */
124 struct device *device_rbtree_find(struct intel_iommu *iommu, u16 rid)
125 {
126 	struct device_domain_info *info = NULL;
127 	struct rb_node *node;
128 	unsigned long flags;
129 
130 	spin_lock_irqsave(&iommu->device_rbtree_lock, flags);
131 	node = rb_find(&rid, &iommu->device_rbtree, device_rid_cmp_key);
132 	if (node)
133 		info = rb_entry(node, struct device_domain_info, node);
134 	spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
135 
136 	return info ? info->dev : NULL;
137 }
138 
139 static int device_rbtree_insert(struct intel_iommu *iommu,
140 				struct device_domain_info *info)
141 {
142 	struct rb_node *curr;
143 	unsigned long flags;
144 
145 	spin_lock_irqsave(&iommu->device_rbtree_lock, flags);
146 	curr = rb_find_add(&info->node, &iommu->device_rbtree, device_rid_cmp);
147 	spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
148 	if (WARN_ON(curr))
149 		return -EEXIST;
150 
151 	return 0;
152 }
153 
154 static void device_rbtree_remove(struct device_domain_info *info)
155 {
156 	struct intel_iommu *iommu = info->iommu;
157 	unsigned long flags;
158 
159 	spin_lock_irqsave(&iommu->device_rbtree_lock, flags);
160 	if (!RB_EMPTY_NODE(&info->node)) {
161 		rb_erase(&info->node, &iommu->device_rbtree);
162 		RB_CLEAR_NODE(&info->node);
163 	}
164 	spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
165 }
166 
167 struct dmar_rmrr_unit {
168 	struct list_head list;		/* list of rmrr units	*/
169 	struct acpi_dmar_header *hdr;	/* ACPI header		*/
170 	u64	base_address;		/* reserved base address*/
171 	u64	end_address;		/* reserved end address */
172 	struct dmar_dev_scope *devices;	/* target devices */
173 	int	devices_cnt;		/* target device count */
174 };
175 
176 struct dmar_atsr_unit {
177 	struct list_head list;		/* list of ATSR units */
178 	struct acpi_dmar_header *hdr;	/* ACPI header */
179 	struct dmar_dev_scope *devices;	/* target devices */
180 	int devices_cnt;		/* target device count */
181 	u8 include_all:1;		/* include all ports */
182 };
183 
184 struct dmar_satc_unit {
185 	struct list_head list;		/* list of SATC units */
186 	struct acpi_dmar_header *hdr;	/* ACPI header */
187 	struct dmar_dev_scope *devices;	/* target devices */
188 	struct intel_iommu *iommu;	/* the corresponding iommu */
189 	int devices_cnt;		/* target device count */
190 	u8 atc_required:1;		/* ATS is required */
191 };
192 
193 static LIST_HEAD(dmar_atsr_units);
194 static LIST_HEAD(dmar_rmrr_units);
195 static LIST_HEAD(dmar_satc_units);
196 
197 #define for_each_rmrr_units(rmrr) \
198 	list_for_each_entry(rmrr, &dmar_rmrr_units, list)
199 
200 static void intel_iommu_domain_free(struct iommu_domain *domain);
201 
202 int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON);
203 int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON);
204 
205 int intel_iommu_enabled = 0;
206 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
207 
208 static int intel_iommu_superpage = 1;
209 static int iommu_identity_mapping;
210 static int iommu_skip_te_disable;
211 static int disable_igfx_iommu;
212 
213 #define IDENTMAP_AZALIA		4
214 
215 const struct iommu_ops intel_iommu_ops;
216 
217 static bool translation_pre_enabled(struct intel_iommu *iommu)
218 {
219 	return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
220 }
221 
222 static void clear_translation_pre_enabled(struct intel_iommu *iommu)
223 {
224 	iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
225 }
226 
227 static void init_translation_status(struct intel_iommu *iommu)
228 {
229 	u32 gsts;
230 
231 	gsts = readl(iommu->reg + DMAR_GSTS_REG);
232 	if (gsts & DMA_GSTS_TES)
233 		iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
234 }
235 
236 static int __init intel_iommu_setup(char *str)
237 {
238 	if (!str)
239 		return -EINVAL;
240 
241 	while (*str) {
242 		if (!strncmp(str, "on", 2)) {
243 			dmar_disabled = 0;
244 			pr_info("IOMMU enabled\n");
245 		} else if (!strncmp(str, "off", 3)) {
246 			dmar_disabled = 1;
247 			no_platform_optin = 1;
248 			pr_info("IOMMU disabled\n");
249 		} else if (!strncmp(str, "igfx_off", 8)) {
250 			disable_igfx_iommu = 1;
251 			pr_info("Disable GFX device mapping\n");
252 		} else if (!strncmp(str, "forcedac", 8)) {
253 			pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n");
254 			iommu_dma_forcedac = true;
255 		} else if (!strncmp(str, "strict", 6)) {
256 			pr_warn("intel_iommu=strict deprecated; use iommu.strict=1 instead\n");
257 			iommu_set_dma_strict();
258 		} else if (!strncmp(str, "sp_off", 6)) {
259 			pr_info("Disable supported super page\n");
260 			intel_iommu_superpage = 0;
261 		} else if (!strncmp(str, "sm_on", 5)) {
262 			pr_info("Enable scalable mode if hardware supports\n");
263 			intel_iommu_sm = 1;
264 		} else if (!strncmp(str, "sm_off", 6)) {
265 			pr_info("Scalable mode is disallowed\n");
266 			intel_iommu_sm = 0;
267 		} else if (!strncmp(str, "tboot_noforce", 13)) {
268 			pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
269 			intel_iommu_tboot_noforce = 1;
270 		} else {
271 			pr_notice("Unknown option - '%s'\n", str);
272 		}
273 
274 		str += strcspn(str, ",");
275 		while (*str == ',')
276 			str++;
277 	}
278 
279 	return 1;
280 }
281 __setup("intel_iommu=", intel_iommu_setup);
282 
283 /*
284  * Calculate the Supported Adjusted Guest Address Widths of an IOMMU.
285  * Refer to 11.4.2 of the VT-d spec for the encoding of each bit of
286  * the returned SAGAW.
287  */
288 static unsigned long __iommu_calculate_sagaw(struct intel_iommu *iommu)
289 {
290 	unsigned long fl_sagaw, sl_sagaw;
291 
292 	fl_sagaw = BIT(2) | (cap_fl5lp_support(iommu->cap) ? BIT(3) : 0);
293 	sl_sagaw = cap_sagaw(iommu->cap);
294 
295 	/* Second level only. */
296 	if (!sm_supported(iommu) || !ecap_flts(iommu->ecap))
297 		return sl_sagaw;
298 
299 	/* First level only. */
300 	if (!ecap_slts(iommu->ecap))
301 		return fl_sagaw;
302 
303 	return fl_sagaw & sl_sagaw;
304 }
305 
306 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
307 {
308 	unsigned long sagaw;
309 	int agaw;
310 
311 	sagaw = __iommu_calculate_sagaw(iommu);
312 	for (agaw = width_to_agaw(max_gaw); agaw >= 0; agaw--) {
313 		if (test_bit(agaw, &sagaw))
314 			break;
315 	}
316 
317 	return agaw;
318 }
319 
320 /*
321  * Calculate max SAGAW for each iommu.
322  */
323 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
324 {
325 	return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
326 }
327 
328 /*
329  * calculate agaw for each iommu.
330  * "SAGAW" may be different across iommus, use a default agaw, and
331  * get a supported less agaw for iommus that don't support the default agaw.
332  */
333 int iommu_calculate_agaw(struct intel_iommu *iommu)
334 {
335 	return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
336 }
337 
338 static bool iommu_paging_structure_coherency(struct intel_iommu *iommu)
339 {
340 	return sm_supported(iommu) ?
341 			ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap);
342 }
343 
344 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
345 					 u8 devfn, int alloc)
346 {
347 	struct root_entry *root = &iommu->root_entry[bus];
348 	struct context_entry *context;
349 	u64 *entry;
350 
351 	/*
352 	 * Except that the caller requested to allocate a new entry,
353 	 * returning a copied context entry makes no sense.
354 	 */
355 	if (!alloc && context_copied(iommu, bus, devfn))
356 		return NULL;
357 
358 	entry = &root->lo;
359 	if (sm_supported(iommu)) {
360 		if (devfn >= 0x80) {
361 			devfn -= 0x80;
362 			entry = &root->hi;
363 		}
364 		devfn *= 2;
365 	}
366 	if (*entry & 1)
367 		context = phys_to_virt(*entry & VTD_PAGE_MASK);
368 	else {
369 		unsigned long phy_addr;
370 		if (!alloc)
371 			return NULL;
372 
373 		context = iommu_alloc_pages_node_sz(iommu->node, GFP_ATOMIC,
374 						    SZ_4K);
375 		if (!context)
376 			return NULL;
377 
378 		__iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
379 		phy_addr = virt_to_phys((void *)context);
380 		*entry = phy_addr | 1;
381 		__iommu_flush_cache(iommu, entry, sizeof(*entry));
382 	}
383 	return &context[devfn];
384 }
385 
386 /**
387  * is_downstream_to_pci_bridge - test if a device belongs to the PCI
388  *				 sub-hierarchy of a candidate PCI-PCI bridge
389  * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
390  * @bridge: the candidate PCI-PCI bridge
391  *
392  * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
393  */
394 static bool
395 is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
396 {
397 	struct pci_dev *pdev, *pbridge;
398 
399 	if (!dev_is_pci(dev) || !dev_is_pci(bridge))
400 		return false;
401 
402 	pdev = to_pci_dev(dev);
403 	pbridge = to_pci_dev(bridge);
404 
405 	if (pbridge->subordinate &&
406 	    pbridge->subordinate->number <= pdev->bus->number &&
407 	    pbridge->subordinate->busn_res.end >= pdev->bus->number)
408 		return true;
409 
410 	return false;
411 }
412 
413 static bool quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
414 {
415 	struct dmar_drhd_unit *drhd;
416 	u32 vtbar;
417 	int rc;
418 
419 	/* We know that this device on this chipset has its own IOMMU.
420 	 * If we find it under a different IOMMU, then the BIOS is lying
421 	 * to us. Hope that the IOMMU for this device is actually
422 	 * disabled, and it needs no translation...
423 	 */
424 	rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
425 	if (rc) {
426 		/* "can't" happen */
427 		dev_info(&pdev->dev, "failed to run vt-d quirk\n");
428 		return false;
429 	}
430 	vtbar &= 0xffff0000;
431 
432 	/* we know that the this iommu should be at offset 0xa000 from vtbar */
433 	drhd = dmar_find_matched_drhd_unit(pdev);
434 	if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
435 		pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
436 		add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
437 		return true;
438 	}
439 
440 	return false;
441 }
442 
443 static bool iommu_is_dummy(struct intel_iommu *iommu, struct device *dev)
444 {
445 	if (!iommu || iommu->drhd->ignored)
446 		return true;
447 
448 	if (dev_is_pci(dev)) {
449 		struct pci_dev *pdev = to_pci_dev(dev);
450 
451 		if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
452 		    pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SNB &&
453 		    quirk_ioat_snb_local_iommu(pdev))
454 			return true;
455 	}
456 
457 	return false;
458 }
459 
460 static struct intel_iommu *device_lookup_iommu(struct device *dev, u8 *bus, u8 *devfn)
461 {
462 	struct dmar_drhd_unit *drhd = NULL;
463 	struct pci_dev *pdev = NULL;
464 	struct intel_iommu *iommu;
465 	struct device *tmp;
466 	u16 segment = 0;
467 	int i;
468 
469 	if (!dev)
470 		return NULL;
471 
472 	if (dev_is_pci(dev)) {
473 		struct pci_dev *pf_pdev;
474 
475 		pdev = pci_real_dma_dev(to_pci_dev(dev));
476 
477 		/* VFs aren't listed in scope tables; we need to look up
478 		 * the PF instead to find the IOMMU. */
479 		pf_pdev = pci_physfn(pdev);
480 		dev = &pf_pdev->dev;
481 		segment = pci_domain_nr(pdev->bus);
482 	} else if (has_acpi_companion(dev))
483 		dev = &ACPI_COMPANION(dev)->dev;
484 
485 	rcu_read_lock();
486 	for_each_iommu(iommu, drhd) {
487 		if (pdev && segment != drhd->segment)
488 			continue;
489 
490 		for_each_active_dev_scope(drhd->devices,
491 					  drhd->devices_cnt, i, tmp) {
492 			if (tmp == dev) {
493 				/* For a VF use its original BDF# not that of the PF
494 				 * which we used for the IOMMU lookup. Strictly speaking
495 				 * we could do this for all PCI devices; we only need to
496 				 * get the BDF# from the scope table for ACPI matches. */
497 				if (pdev && pdev->is_virtfn)
498 					goto got_pdev;
499 
500 				if (bus && devfn) {
501 					*bus = drhd->devices[i].bus;
502 					*devfn = drhd->devices[i].devfn;
503 				}
504 				goto out;
505 			}
506 
507 			if (is_downstream_to_pci_bridge(dev, tmp))
508 				goto got_pdev;
509 		}
510 
511 		if (pdev && drhd->include_all) {
512 got_pdev:
513 			if (bus && devfn) {
514 				*bus = pdev->bus->number;
515 				*devfn = pdev->devfn;
516 			}
517 			goto out;
518 		}
519 	}
520 	iommu = NULL;
521 out:
522 	if (iommu_is_dummy(iommu, dev))
523 		iommu = NULL;
524 
525 	rcu_read_unlock();
526 
527 	return iommu;
528 }
529 
530 static void free_context_table(struct intel_iommu *iommu)
531 {
532 	struct context_entry *context;
533 	int i;
534 
535 	if (!iommu->root_entry)
536 		return;
537 
538 	for (i = 0; i < ROOT_ENTRY_NR; i++) {
539 		context = iommu_context_addr(iommu, i, 0, 0);
540 		if (context)
541 			iommu_free_pages(context);
542 
543 		if (!sm_supported(iommu))
544 			continue;
545 
546 		context = iommu_context_addr(iommu, i, 0x80, 0);
547 		if (context)
548 			iommu_free_pages(context);
549 	}
550 
551 	iommu_free_pages(iommu->root_entry);
552 	iommu->root_entry = NULL;
553 }
554 
555 #ifdef CONFIG_DMAR_DEBUG
556 static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn,
557 			 u8 bus, u8 devfn, struct dma_pte *parent, int level)
558 {
559 	struct dma_pte *pte;
560 	int offset;
561 
562 	while (1) {
563 		offset = pfn_level_offset(pfn, level);
564 		pte = &parent[offset];
565 
566 		pr_info("pte level: %d, pte value: 0x%016llx\n", level, pte->val);
567 
568 		if (!dma_pte_present(pte)) {
569 			pr_info("page table not present at level %d\n", level - 1);
570 			break;
571 		}
572 
573 		if (level == 1 || dma_pte_superpage(pte))
574 			break;
575 
576 		parent = phys_to_virt(dma_pte_addr(pte));
577 		level--;
578 	}
579 }
580 
581 void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
582 			  unsigned long long addr, u32 pasid)
583 {
584 	struct pasid_dir_entry *dir, *pde;
585 	struct pasid_entry *entries, *pte;
586 	struct context_entry *ctx_entry;
587 	struct root_entry *rt_entry;
588 	int i, dir_index, index, level;
589 	u8 devfn = source_id & 0xff;
590 	u8 bus = source_id >> 8;
591 	struct dma_pte *pgtable;
592 
593 	pr_info("Dump %s table entries for IOVA 0x%llx\n", iommu->name, addr);
594 
595 	/* root entry dump */
596 	if (!iommu->root_entry) {
597 		pr_info("root table is not present\n");
598 		return;
599 	}
600 	rt_entry = &iommu->root_entry[bus];
601 
602 	if (sm_supported(iommu))
603 		pr_info("scalable mode root entry: hi 0x%016llx, low 0x%016llx\n",
604 			rt_entry->hi, rt_entry->lo);
605 	else
606 		pr_info("root entry: 0x%016llx", rt_entry->lo);
607 
608 	/* context entry dump */
609 	ctx_entry = iommu_context_addr(iommu, bus, devfn, 0);
610 	if (!ctx_entry) {
611 		pr_info("context table is not present\n");
612 		return;
613 	}
614 
615 	pr_info("context entry: hi 0x%016llx, low 0x%016llx\n",
616 		ctx_entry->hi, ctx_entry->lo);
617 
618 	/* legacy mode does not require PASID entries */
619 	if (!sm_supported(iommu)) {
620 		if (!context_present(ctx_entry)) {
621 			pr_info("legacy mode page table is not present\n");
622 			return;
623 		}
624 		level = agaw_to_level(ctx_entry->hi & 7);
625 		pgtable = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
626 		goto pgtable_walk;
627 	}
628 
629 	if (!context_present(ctx_entry)) {
630 		pr_info("pasid directory table is not present\n");
631 		return;
632 	}
633 
634 	/* get the pointer to pasid directory entry */
635 	dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
636 
637 	/* For request-without-pasid, get the pasid from context entry */
638 	if (intel_iommu_sm && pasid == IOMMU_PASID_INVALID)
639 		pasid = IOMMU_NO_PASID;
640 
641 	dir_index = pasid >> PASID_PDE_SHIFT;
642 	pde = &dir[dir_index];
643 	pr_info("pasid dir entry: 0x%016llx\n", pde->val);
644 
645 	/* get the pointer to the pasid table entry */
646 	entries = get_pasid_table_from_pde(pde);
647 	if (!entries) {
648 		pr_info("pasid table is not present\n");
649 		return;
650 	}
651 	index = pasid & PASID_PTE_MASK;
652 	pte = &entries[index];
653 	for (i = 0; i < ARRAY_SIZE(pte->val); i++)
654 		pr_info("pasid table entry[%d]: 0x%016llx\n", i, pte->val[i]);
655 
656 	if (!pasid_pte_is_present(pte)) {
657 		pr_info("scalable mode page table is not present\n");
658 		return;
659 	}
660 
661 	if (pasid_pte_get_pgtt(pte) == PASID_ENTRY_PGTT_FL_ONLY) {
662 		level = pte->val[2] & BIT_ULL(2) ? 5 : 4;
663 		pgtable = phys_to_virt(pte->val[2] & VTD_PAGE_MASK);
664 	} else {
665 		level = agaw_to_level((pte->val[0] >> 2) & 0x7);
666 		pgtable = phys_to_virt(pte->val[0] & VTD_PAGE_MASK);
667 	}
668 
669 pgtable_walk:
670 	pgtable_walk(iommu, addr >> VTD_PAGE_SHIFT, bus, devfn, pgtable, level);
671 }
672 #endif
673 
674 /* iommu handling */
675 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
676 {
677 	struct root_entry *root;
678 
679 	root = iommu_alloc_pages_node_sz(iommu->node, GFP_ATOMIC, SZ_4K);
680 	if (!root) {
681 		pr_err("Allocating root entry for %s failed\n",
682 			iommu->name);
683 		return -ENOMEM;
684 	}
685 
686 	__iommu_flush_cache(iommu, root, ROOT_SIZE);
687 	iommu->root_entry = root;
688 
689 	return 0;
690 }
691 
692 static void iommu_set_root_entry(struct intel_iommu *iommu)
693 {
694 	u64 addr;
695 	u32 sts;
696 	unsigned long flag;
697 
698 	addr = virt_to_phys(iommu->root_entry);
699 	if (sm_supported(iommu))
700 		addr |= DMA_RTADDR_SMT;
701 
702 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
703 	writeq(addr, iommu->reg + DMAR_RTADDR_REG);
704 
705 	writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
706 
707 	/* Make sure hardware complete it */
708 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
709 		      readl, (sts & DMA_GSTS_RTPS), sts);
710 
711 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
712 
713 	/*
714 	 * Hardware invalidates all DMA remapping hardware translation
715 	 * caches as part of SRTP flow.
716 	 */
717 	if (cap_esrtps(iommu->cap))
718 		return;
719 
720 	iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
721 	if (sm_supported(iommu))
722 		qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
723 	iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
724 }
725 
726 void iommu_flush_write_buffer(struct intel_iommu *iommu)
727 {
728 	u32 val;
729 	unsigned long flag;
730 
731 	if (!rwbf_quirk && !cap_rwbf(iommu->cap))
732 		return;
733 
734 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
735 	writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
736 
737 	/* Make sure hardware complete it */
738 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
739 		      readl, (!(val & DMA_GSTS_WBFS)), val);
740 
741 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
742 }
743 
744 /* return value determine if we need a write buffer flush */
745 static void __iommu_flush_context(struct intel_iommu *iommu,
746 				  u16 did, u16 source_id, u8 function_mask,
747 				  u64 type)
748 {
749 	u64 val = 0;
750 	unsigned long flag;
751 
752 	switch (type) {
753 	case DMA_CCMD_GLOBAL_INVL:
754 		val = DMA_CCMD_GLOBAL_INVL;
755 		break;
756 	case DMA_CCMD_DOMAIN_INVL:
757 		val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
758 		break;
759 	case DMA_CCMD_DEVICE_INVL:
760 		val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
761 			| DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
762 		break;
763 	default:
764 		pr_warn("%s: Unexpected context-cache invalidation type 0x%llx\n",
765 			iommu->name, type);
766 		return;
767 	}
768 	val |= DMA_CCMD_ICC;
769 
770 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
771 	writeq(val, iommu->reg + DMAR_CCMD_REG);
772 
773 	/* Make sure hardware complete it */
774 	IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
775 		readq, (!(val & DMA_CCMD_ICC)), val);
776 
777 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
778 }
779 
780 void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
781 			 unsigned int size_order, u64 type)
782 {
783 	int tlb_offset = ecap_iotlb_offset(iommu->ecap);
784 	u64 val = 0, val_iva = 0;
785 	unsigned long flag;
786 
787 	switch (type) {
788 	case DMA_TLB_GLOBAL_FLUSH:
789 		/* global flush doesn't need set IVA_REG */
790 		val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
791 		break;
792 	case DMA_TLB_DSI_FLUSH:
793 		val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
794 		break;
795 	case DMA_TLB_PSI_FLUSH:
796 		val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
797 		/* IH bit is passed in as part of address */
798 		val_iva = size_order | addr;
799 		break;
800 	default:
801 		pr_warn("%s: Unexpected iotlb invalidation type 0x%llx\n",
802 			iommu->name, type);
803 		return;
804 	}
805 
806 	if (cap_write_drain(iommu->cap))
807 		val |= DMA_TLB_WRITE_DRAIN;
808 
809 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
810 	/* Note: Only uses first TLB reg currently */
811 	if (val_iva)
812 		writeq(val_iva, iommu->reg + tlb_offset);
813 	writeq(val, iommu->reg + tlb_offset + 8);
814 
815 	/* Make sure hardware complete it */
816 	IOMMU_WAIT_OP(iommu, tlb_offset + 8,
817 		readq, (!(val & DMA_TLB_IVT)), val);
818 
819 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
820 
821 	/* check IOTLB invalidation granularity */
822 	if (DMA_TLB_IAIG(val) == 0)
823 		pr_err("Flush IOTLB failed\n");
824 	if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
825 		pr_debug("TLB flush request %Lx, actual %Lx\n",
826 			(unsigned long long)DMA_TLB_IIRG(type),
827 			(unsigned long long)DMA_TLB_IAIG(val));
828 }
829 
830 static struct device_domain_info *
831 domain_lookup_dev_info(struct dmar_domain *domain,
832 		       struct intel_iommu *iommu, u8 bus, u8 devfn)
833 {
834 	struct device_domain_info *info;
835 	unsigned long flags;
836 
837 	spin_lock_irqsave(&domain->lock, flags);
838 	list_for_each_entry(info, &domain->devices, link) {
839 		if (info->iommu == iommu && info->bus == bus &&
840 		    info->devfn == devfn) {
841 			spin_unlock_irqrestore(&domain->lock, flags);
842 			return info;
843 		}
844 	}
845 	spin_unlock_irqrestore(&domain->lock, flags);
846 
847 	return NULL;
848 }
849 
850 /*
851  * The extra devTLB flush quirk impacts those QAT devices with PCI device
852  * IDs ranging from 0x4940 to 0x4943. It is exempted from risky_device()
853  * check because it applies only to the built-in QAT devices and it doesn't
854  * grant additional privileges.
855  */
856 #define BUGGY_QAT_DEVID_MASK 0x4940
857 static bool dev_needs_extra_dtlb_flush(struct pci_dev *pdev)
858 {
859 	if (pdev->vendor != PCI_VENDOR_ID_INTEL)
860 		return false;
861 
862 	if ((pdev->device & 0xfffc) != BUGGY_QAT_DEVID_MASK)
863 		return false;
864 
865 	return true;
866 }
867 
868 static void iommu_enable_pci_ats(struct device_domain_info *info)
869 {
870 	struct pci_dev *pdev;
871 
872 	if (!info->ats_supported)
873 		return;
874 
875 	pdev = to_pci_dev(info->dev);
876 	if (!pci_ats_page_aligned(pdev))
877 		return;
878 
879 	if (!pci_enable_ats(pdev, VTD_PAGE_SHIFT))
880 		info->ats_enabled = 1;
881 }
882 
883 static void iommu_disable_pci_ats(struct device_domain_info *info)
884 {
885 	if (!info->ats_enabled)
886 		return;
887 
888 	pci_disable_ats(to_pci_dev(info->dev));
889 	info->ats_enabled = 0;
890 }
891 
892 static void iommu_enable_pci_pri(struct device_domain_info *info)
893 {
894 	struct pci_dev *pdev;
895 
896 	if (!info->ats_enabled || !info->pri_supported)
897 		return;
898 
899 	pdev = to_pci_dev(info->dev);
900 	/* PASID is required in PRG Response Message. */
901 	if (info->pasid_enabled && !pci_prg_resp_pasid_required(pdev))
902 		return;
903 
904 	if (pci_reset_pri(pdev))
905 		return;
906 
907 	if (!pci_enable_pri(pdev, PRQ_DEPTH))
908 		info->pri_enabled = 1;
909 }
910 
911 static void iommu_disable_pci_pri(struct device_domain_info *info)
912 {
913 	if (!info->pri_enabled)
914 		return;
915 
916 	if (WARN_ON(info->iopf_refcount))
917 		iopf_queue_remove_device(info->iommu->iopf_queue, info->dev);
918 
919 	pci_disable_pri(to_pci_dev(info->dev));
920 	info->pri_enabled = 0;
921 }
922 
923 static void intel_flush_iotlb_all(struct iommu_domain *domain)
924 {
925 	cache_tag_flush_all(to_dmar_domain(domain));
926 }
927 
928 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
929 {
930 	u32 pmen;
931 	unsigned long flags;
932 
933 	if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
934 		return;
935 
936 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
937 	pmen = readl(iommu->reg + DMAR_PMEN_REG);
938 	pmen &= ~DMA_PMEN_EPM;
939 	writel(pmen, iommu->reg + DMAR_PMEN_REG);
940 
941 	/* wait for the protected region status bit to clear */
942 	IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
943 		readl, !(pmen & DMA_PMEN_PRS), pmen);
944 
945 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
946 }
947 
948 static void iommu_enable_translation(struct intel_iommu *iommu)
949 {
950 	u32 sts;
951 	unsigned long flags;
952 
953 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
954 	iommu->gcmd |= DMA_GCMD_TE;
955 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
956 
957 	/* Make sure hardware complete it */
958 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
959 		      readl, (sts & DMA_GSTS_TES), sts);
960 
961 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
962 }
963 
964 static void iommu_disable_translation(struct intel_iommu *iommu)
965 {
966 	u32 sts;
967 	unsigned long flag;
968 
969 	if (iommu_skip_te_disable && iommu->drhd->gfx_dedicated &&
970 	    (cap_read_drain(iommu->cap) || cap_write_drain(iommu->cap)))
971 		return;
972 
973 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
974 	iommu->gcmd &= ~DMA_GCMD_TE;
975 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
976 
977 	/* Make sure hardware complete it */
978 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
979 		      readl, (!(sts & DMA_GSTS_TES)), sts);
980 
981 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
982 }
983 
984 static void disable_dmar_iommu(struct intel_iommu *iommu)
985 {
986 	/*
987 	 * All iommu domains must have been detached from the devices,
988 	 * hence there should be no domain IDs in use.
989 	 */
990 	if (WARN_ON(!ida_is_empty(&iommu->domain_ida)))
991 		return;
992 
993 	if (iommu->gcmd & DMA_GCMD_TE)
994 		iommu_disable_translation(iommu);
995 }
996 
997 static void free_dmar_iommu(struct intel_iommu *iommu)
998 {
999 	if (iommu->copied_tables) {
1000 		bitmap_free(iommu->copied_tables);
1001 		iommu->copied_tables = NULL;
1002 	}
1003 
1004 	/* free context mapping */
1005 	free_context_table(iommu);
1006 
1007 	if (ecap_prs(iommu->ecap))
1008 		intel_iommu_finish_prq(iommu);
1009 }
1010 
1011 /*
1012  * Check and return whether first level is used by default for
1013  * DMA translation.
1014  */
1015 static bool first_level_by_default(struct intel_iommu *iommu)
1016 {
1017 	/* Only SL is available in legacy mode */
1018 	if (!sm_supported(iommu))
1019 		return false;
1020 
1021 	/* Only level (either FL or SL) is available, just use it */
1022 	if (ecap_flts(iommu->ecap) ^ ecap_slts(iommu->ecap))
1023 		return ecap_flts(iommu->ecap);
1024 
1025 	return true;
1026 }
1027 
1028 int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
1029 {
1030 	struct iommu_domain_info *info, *curr;
1031 	int num, ret = -ENOSPC;
1032 
1033 	if (domain->domain.type == IOMMU_DOMAIN_SVA)
1034 		return 0;
1035 
1036 	info = kzalloc_obj(*info);
1037 	if (!info)
1038 		return -ENOMEM;
1039 
1040 	guard(mutex)(&iommu->did_lock);
1041 	curr = xa_load(&domain->iommu_array, iommu->seq_id);
1042 	if (curr) {
1043 		curr->refcnt++;
1044 		kfree(info);
1045 		return 0;
1046 	}
1047 
1048 	num = ida_alloc_range(&iommu->domain_ida, IDA_START_DID,
1049 			      cap_ndoms(iommu->cap) - 1, GFP_KERNEL);
1050 	if (num < 0) {
1051 		pr_err("%s: No free domain ids\n", iommu->name);
1052 		goto err_unlock;
1053 	}
1054 
1055 	info->refcnt	= 1;
1056 	info->did	= num;
1057 	info->iommu	= iommu;
1058 	curr = xa_cmpxchg(&domain->iommu_array, iommu->seq_id,
1059 			  NULL, info, GFP_KERNEL);
1060 	if (curr) {
1061 		ret = xa_err(curr) ? : -EBUSY;
1062 		goto err_clear;
1063 	}
1064 
1065 	return 0;
1066 
1067 err_clear:
1068 	ida_free(&iommu->domain_ida, info->did);
1069 err_unlock:
1070 	kfree(info);
1071 	return ret;
1072 }
1073 
1074 void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
1075 {
1076 	struct iommu_domain_info *info;
1077 
1078 	if (domain->domain.type == IOMMU_DOMAIN_SVA)
1079 		return;
1080 
1081 	guard(mutex)(&iommu->did_lock);
1082 	info = xa_load(&domain->iommu_array, iommu->seq_id);
1083 	if (--info->refcnt == 0) {
1084 		ida_free(&iommu->domain_ida, info->did);
1085 		xa_erase(&domain->iommu_array, iommu->seq_id);
1086 		kfree(info);
1087 	}
1088 }
1089 
1090 /*
1091  * For kdump cases, old valid entries may be cached due to the
1092  * in-flight DMA and copied pgtable, but there is no unmapping
1093  * behaviour for them, thus we need an explicit cache flush for
1094  * the newly-mapped device. For kdump, at this point, the device
1095  * is supposed to finish reset at its driver probe stage, so no
1096  * in-flight DMA will exist, and we don't need to worry anymore
1097  * hereafter.
1098  */
1099 static void copied_context_tear_down(struct intel_iommu *iommu,
1100 				     struct context_entry *context,
1101 				     u8 bus, u8 devfn)
1102 {
1103 	u16 did_old;
1104 
1105 	if (!context_copied(iommu, bus, devfn))
1106 		return;
1107 
1108 	assert_spin_locked(&iommu->lock);
1109 
1110 	did_old = context_domain_id(context);
1111 	context_clear_entry(context);
1112 
1113 	if (did_old < cap_ndoms(iommu->cap)) {
1114 		iommu->flush.flush_context(iommu, did_old,
1115 					   PCI_DEVID(bus, devfn),
1116 					   DMA_CCMD_MASK_NOBIT,
1117 					   DMA_CCMD_DEVICE_INVL);
1118 		iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
1119 					 DMA_TLB_DSI_FLUSH);
1120 	}
1121 
1122 	clear_context_copied(iommu, bus, devfn);
1123 }
1124 
1125 /*
1126  * It's a non-present to present mapping. If hardware doesn't cache
1127  * non-present entry we only need to flush the write-buffer. If the
1128  * _does_ cache non-present entries, then it does so in the special
1129  * domain #0, which we have to flush:
1130  */
1131 static void context_present_cache_flush(struct intel_iommu *iommu, u16 did,
1132 					u8 bus, u8 devfn)
1133 {
1134 	if (cap_caching_mode(iommu->cap)) {
1135 		iommu->flush.flush_context(iommu, 0,
1136 					   PCI_DEVID(bus, devfn),
1137 					   DMA_CCMD_MASK_NOBIT,
1138 					   DMA_CCMD_DEVICE_INVL);
1139 		iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
1140 	} else {
1141 		iommu_flush_write_buffer(iommu);
1142 	}
1143 }
1144 
1145 static int domain_context_mapping_one(struct dmar_domain *domain,
1146 				      struct intel_iommu *iommu,
1147 				      u8 bus, u8 devfn)
1148 {
1149 	struct device_domain_info *info =
1150 			domain_lookup_dev_info(domain, iommu, bus, devfn);
1151 	u16 did = domain_id_iommu(domain, iommu);
1152 	int translation = CONTEXT_TT_MULTI_LEVEL;
1153 	struct pt_iommu_vtdss_hw_info pt_info;
1154 	struct context_entry *context;
1155 	int ret;
1156 
1157 	if (WARN_ON(!intel_domain_is_ss_paging(domain)))
1158 		return -EINVAL;
1159 
1160 	pt_iommu_vtdss_hw_info(&domain->sspt, &pt_info);
1161 
1162 	pr_debug("Set context mapping for %02x:%02x.%d\n",
1163 		bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1164 
1165 	spin_lock(&iommu->lock);
1166 	ret = -ENOMEM;
1167 	context = iommu_context_addr(iommu, bus, devfn, 1);
1168 	if (!context)
1169 		goto out_unlock;
1170 
1171 	ret = 0;
1172 	if (context_present(context) && !context_copied(iommu, bus, devfn))
1173 		goto out_unlock;
1174 
1175 	copied_context_tear_down(iommu, context, bus, devfn);
1176 	context_clear_entry(context);
1177 	context_set_domain_id(context, did);
1178 
1179 	if (info && info->ats_supported)
1180 		translation = CONTEXT_TT_DEV_IOTLB;
1181 	else
1182 		translation = CONTEXT_TT_MULTI_LEVEL;
1183 
1184 	context_set_address_root(context, pt_info.ssptptr);
1185 	context_set_address_width(context, pt_info.aw);
1186 	context_set_translation_type(context, translation);
1187 	context_set_fault_enable(context);
1188 	context_set_present(context);
1189 	if (!ecap_coherent(iommu->ecap))
1190 		clflush_cache_range(context, sizeof(*context));
1191 	context_present_cache_flush(iommu, did, bus, devfn);
1192 	ret = 0;
1193 
1194 out_unlock:
1195 	spin_unlock(&iommu->lock);
1196 
1197 	return ret;
1198 }
1199 
1200 static int domain_context_mapping_cb(struct pci_dev *pdev,
1201 				     u16 alias, void *opaque)
1202 {
1203 	struct device_domain_info *info = dev_iommu_priv_get(&pdev->dev);
1204 	struct intel_iommu *iommu = info->iommu;
1205 	struct dmar_domain *domain = opaque;
1206 
1207 	return domain_context_mapping_one(domain, iommu,
1208 					  PCI_BUS_NUM(alias), alias & 0xff);
1209 }
1210 
1211 static int
1212 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
1213 {
1214 	struct device_domain_info *info = dev_iommu_priv_get(dev);
1215 	struct intel_iommu *iommu = info->iommu;
1216 	u8 bus = info->bus, devfn = info->devfn;
1217 	int ret;
1218 
1219 	if (!dev_is_pci(dev))
1220 		return domain_context_mapping_one(domain, iommu, bus, devfn);
1221 
1222 	ret = pci_for_each_dma_alias(to_pci_dev(dev),
1223 				     domain_context_mapping_cb, domain);
1224 	if (ret)
1225 		return ret;
1226 
1227 	iommu_enable_pci_ats(info);
1228 
1229 	return 0;
1230 }
1231 
1232 static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 devfn)
1233 {
1234 	struct intel_iommu *iommu = info->iommu;
1235 	struct context_entry *context;
1236 	u16 did;
1237 
1238 	spin_lock(&iommu->lock);
1239 	context = iommu_context_addr(iommu, bus, devfn, 0);
1240 	if (!context) {
1241 		spin_unlock(&iommu->lock);
1242 		return;
1243 	}
1244 
1245 	did = context_domain_id(context);
1246 	context_clear_present(context);
1247 	__iommu_flush_cache(iommu, context, sizeof(*context));
1248 	spin_unlock(&iommu->lock);
1249 	intel_context_flush_no_pasid(info, context, did);
1250 	context_clear_entry(context);
1251 	__iommu_flush_cache(iommu, context, sizeof(*context));
1252 }
1253 
1254 int __domain_setup_first_level(struct intel_iommu *iommu, struct device *dev,
1255 			       ioasid_t pasid, u16 did, phys_addr_t fsptptr,
1256 			       int flags, struct iommu_domain *old)
1257 {
1258 	if (old)
1259 		intel_pasid_tear_down_entry(iommu, dev, pasid, false);
1260 
1261 	return intel_pasid_setup_first_level(iommu, dev, fsptptr, pasid, did, flags);
1262 }
1263 
1264 static int domain_setup_second_level(struct intel_iommu *iommu,
1265 				     struct dmar_domain *domain,
1266 				     struct device *dev, ioasid_t pasid,
1267 				     struct iommu_domain *old)
1268 {
1269 	if (old)
1270 		intel_pasid_tear_down_entry(iommu, dev, pasid, false);
1271 
1272 	return intel_pasid_setup_second_level(iommu, domain, dev, pasid);
1273 }
1274 
1275 static int domain_setup_passthrough(struct intel_iommu *iommu,
1276 				    struct device *dev, ioasid_t pasid,
1277 				    struct iommu_domain *old)
1278 {
1279 	if (old)
1280 		intel_pasid_tear_down_entry(iommu, dev, pasid, false);
1281 
1282 	return intel_pasid_setup_pass_through(iommu, dev, pasid);
1283 }
1284 
1285 static int domain_setup_first_level(struct intel_iommu *iommu,
1286 				    struct dmar_domain *domain,
1287 				    struct device *dev,
1288 				    u32 pasid, struct iommu_domain *old)
1289 {
1290 	struct pt_iommu_x86_64_hw_info pt_info;
1291 	unsigned int flags = 0;
1292 
1293 	pt_iommu_x86_64_hw_info(&domain->fspt, &pt_info);
1294 	if (WARN_ON(pt_info.levels != 4 && pt_info.levels != 5))
1295 		return -EINVAL;
1296 
1297 	if (pt_info.levels == 5)
1298 		flags |= PASID_FLAG_FL5LP;
1299 
1300 	if (domain->force_snooping)
1301 		flags |= PASID_FLAG_PAGE_SNOOP;
1302 
1303 	if (!(domain->fspt.x86_64_pt.common.features &
1304 	      BIT(PT_FEAT_DMA_INCOHERENT)))
1305 		flags |= PASID_FLAG_PWSNP;
1306 
1307 	return __domain_setup_first_level(iommu, dev, pasid,
1308 					  domain_id_iommu(domain, iommu),
1309 					  pt_info.gcr3_pt, flags, old);
1310 }
1311 
1312 static int dmar_domain_attach_device(struct dmar_domain *domain,
1313 				     struct device *dev)
1314 {
1315 	struct device_domain_info *info = dev_iommu_priv_get(dev);
1316 	struct intel_iommu *iommu = info->iommu;
1317 	unsigned long flags;
1318 	int ret;
1319 
1320 	ret = domain_attach_iommu(domain, iommu);
1321 	if (ret)
1322 		return ret;
1323 
1324 	info->domain = domain;
1325 	info->domain_attached = true;
1326 	spin_lock_irqsave(&domain->lock, flags);
1327 	list_add(&info->link, &domain->devices);
1328 	spin_unlock_irqrestore(&domain->lock, flags);
1329 
1330 	if (dev_is_real_dma_subdevice(dev))
1331 		return 0;
1332 
1333 	if (!sm_supported(iommu))
1334 		ret = domain_context_mapping(domain, dev);
1335 	else if (intel_domain_is_fs_paging(domain))
1336 		ret = domain_setup_first_level(iommu, domain, dev,
1337 					       IOMMU_NO_PASID, NULL);
1338 	else if (intel_domain_is_ss_paging(domain))
1339 		ret = domain_setup_second_level(iommu, domain, dev,
1340 						IOMMU_NO_PASID, NULL);
1341 	else if (WARN_ON(true))
1342 		ret = -EINVAL;
1343 
1344 	if (ret)
1345 		goto out_block_translation;
1346 
1347 	ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID);
1348 	if (ret)
1349 		goto out_block_translation;
1350 
1351 	return 0;
1352 
1353 out_block_translation:
1354 	device_block_translation(dev);
1355 	return ret;
1356 }
1357 
1358 /**
1359  * device_rmrr_is_relaxable - Test whether the RMRR of this device
1360  * is relaxable (ie. is allowed to be not enforced under some conditions)
1361  * @dev: device handle
1362  *
1363  * We assume that PCI USB devices with RMRRs have them largely
1364  * for historical reasons and that the RMRR space is not actively used post
1365  * boot.  This exclusion may change if vendors begin to abuse it.
1366  *
1367  * The same exception is made for graphics devices, with the requirement that
1368  * any use of the RMRR regions will be torn down before assigning the device
1369  * to a guest.
1370  *
1371  * Return: true if the RMRR is relaxable, false otherwise
1372  */
1373 static bool device_rmrr_is_relaxable(struct device *dev)
1374 {
1375 	struct pci_dev *pdev;
1376 
1377 	if (!dev_is_pci(dev))
1378 		return false;
1379 
1380 	pdev = to_pci_dev(dev);
1381 	if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
1382 		return true;
1383 	else
1384 		return false;
1385 }
1386 
1387 static int device_def_domain_type(struct device *dev)
1388 {
1389 	struct device_domain_info *info = dev_iommu_priv_get(dev);
1390 	struct intel_iommu *iommu = info->iommu;
1391 
1392 	/*
1393 	 * Hardware does not support the passthrough translation mode.
1394 	 * Always use a dynamaic mapping domain.
1395 	 */
1396 	if (!ecap_pass_through(iommu->ecap))
1397 		return IOMMU_DOMAIN_DMA;
1398 
1399 	if (dev_is_pci(dev)) {
1400 		struct pci_dev *pdev = to_pci_dev(dev);
1401 
1402 		if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
1403 			return IOMMU_DOMAIN_IDENTITY;
1404 	}
1405 
1406 	return 0;
1407 }
1408 
1409 static void intel_iommu_init_qi(struct intel_iommu *iommu)
1410 {
1411 	/*
1412 	 * Start from the sane iommu hardware state.
1413 	 * If the queued invalidation is already initialized by us
1414 	 * (for example, while enabling interrupt-remapping) then
1415 	 * we got the things already rolling from a sane state.
1416 	 */
1417 	if (!iommu->qi) {
1418 		/*
1419 		 * Clear any previous faults.
1420 		 */
1421 		dmar_fault(-1, iommu);
1422 		/*
1423 		 * Disable queued invalidation if supported and already enabled
1424 		 * before OS handover.
1425 		 */
1426 		dmar_disable_qi(iommu);
1427 	}
1428 
1429 	if (dmar_enable_qi(iommu)) {
1430 		/*
1431 		 * Queued Invalidate not enabled, use Register Based Invalidate
1432 		 */
1433 		iommu->flush.flush_context = __iommu_flush_context;
1434 		iommu->flush.flush_iotlb = __iommu_flush_iotlb;
1435 		pr_info("%s: Using Register based invalidation\n",
1436 			iommu->name);
1437 	} else {
1438 		iommu->flush.flush_context = qi_flush_context;
1439 		iommu->flush.flush_iotlb = qi_flush_iotlb;
1440 		pr_info("%s: Using Queued invalidation\n", iommu->name);
1441 	}
1442 }
1443 
1444 static int copy_context_table(struct intel_iommu *iommu,
1445 			      struct root_entry *old_re,
1446 			      struct context_entry **tbl,
1447 			      int bus, bool ext)
1448 {
1449 	int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
1450 	struct context_entry *new_ce = NULL, ce;
1451 	struct context_entry *old_ce = NULL;
1452 	struct root_entry re;
1453 	phys_addr_t old_ce_phys;
1454 
1455 	tbl_idx = ext ? bus * 2 : bus;
1456 	memcpy(&re, old_re, sizeof(re));
1457 
1458 	for (devfn = 0; devfn < 256; devfn++) {
1459 		/* First calculate the correct index */
1460 		idx = (ext ? devfn * 2 : devfn) % 256;
1461 
1462 		if (idx == 0) {
1463 			/* First save what we may have and clean up */
1464 			if (new_ce) {
1465 				tbl[tbl_idx] = new_ce;
1466 				__iommu_flush_cache(iommu, new_ce,
1467 						    VTD_PAGE_SIZE);
1468 				pos = 1;
1469 			}
1470 
1471 			if (old_ce)
1472 				memunmap(old_ce);
1473 
1474 			ret = 0;
1475 			if (devfn < 0x80)
1476 				old_ce_phys = root_entry_lctp(&re);
1477 			else
1478 				old_ce_phys = root_entry_uctp(&re);
1479 
1480 			if (!old_ce_phys) {
1481 				if (ext && devfn == 0) {
1482 					/* No LCTP, try UCTP */
1483 					devfn = 0x7f;
1484 					continue;
1485 				} else {
1486 					goto out;
1487 				}
1488 			}
1489 
1490 			ret = -ENOMEM;
1491 			old_ce = memremap(old_ce_phys, PAGE_SIZE,
1492 					MEMREMAP_WB);
1493 			if (!old_ce)
1494 				goto out;
1495 
1496 			new_ce = iommu_alloc_pages_node_sz(iommu->node,
1497 							   GFP_KERNEL, SZ_4K);
1498 			if (!new_ce)
1499 				goto out_unmap;
1500 
1501 			ret = 0;
1502 		}
1503 
1504 		/* Now copy the context entry */
1505 		memcpy(&ce, old_ce + idx, sizeof(ce));
1506 
1507 		if (!context_present(&ce))
1508 			continue;
1509 
1510 		did = context_domain_id(&ce);
1511 		if (did >= 0 && did < cap_ndoms(iommu->cap))
1512 			ida_alloc_range(&iommu->domain_ida, did, did, GFP_KERNEL);
1513 
1514 		set_context_copied(iommu, bus, devfn);
1515 		new_ce[idx] = ce;
1516 	}
1517 
1518 	tbl[tbl_idx + pos] = new_ce;
1519 
1520 	__iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
1521 
1522 out_unmap:
1523 	memunmap(old_ce);
1524 
1525 out:
1526 	return ret;
1527 }
1528 
1529 static int copy_translation_tables(struct intel_iommu *iommu)
1530 {
1531 	struct context_entry **ctxt_tbls;
1532 	struct root_entry *old_rt;
1533 	phys_addr_t old_rt_phys;
1534 	int ctxt_table_entries;
1535 	u64 rtaddr_reg;
1536 	int bus, ret;
1537 	bool new_ext, ext;
1538 
1539 	rtaddr_reg = readq(iommu->reg + DMAR_RTADDR_REG);
1540 	ext        = !!(rtaddr_reg & DMA_RTADDR_SMT);
1541 	new_ext    = !!sm_supported(iommu);
1542 
1543 	/*
1544 	 * The RTT bit can only be changed when translation is disabled,
1545 	 * but disabling translation means to open a window for data
1546 	 * corruption. So bail out and don't copy anything if we would
1547 	 * have to change the bit.
1548 	 */
1549 	if (new_ext != ext)
1550 		return -EINVAL;
1551 
1552 	iommu->copied_tables = bitmap_zalloc(BIT_ULL(16), GFP_KERNEL);
1553 	if (!iommu->copied_tables)
1554 		return -ENOMEM;
1555 
1556 	old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
1557 	if (!old_rt_phys)
1558 		return -EINVAL;
1559 
1560 	old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
1561 	if (!old_rt)
1562 		return -ENOMEM;
1563 
1564 	/* This is too big for the stack - allocate it from slab */
1565 	ctxt_table_entries = ext ? 512 : 256;
1566 	ret = -ENOMEM;
1567 	ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
1568 	if (!ctxt_tbls)
1569 		goto out_unmap;
1570 
1571 	for (bus = 0; bus < 256; bus++) {
1572 		ret = copy_context_table(iommu, &old_rt[bus],
1573 					 ctxt_tbls, bus, ext);
1574 		if (ret) {
1575 			pr_err("%s: Failed to copy context table for bus %d\n",
1576 				iommu->name, bus);
1577 			continue;
1578 		}
1579 	}
1580 
1581 	spin_lock(&iommu->lock);
1582 
1583 	/* Context tables are copied, now write them to the root_entry table */
1584 	for (bus = 0; bus < 256; bus++) {
1585 		int idx = ext ? bus * 2 : bus;
1586 		u64 val;
1587 
1588 		if (ctxt_tbls[idx]) {
1589 			val = virt_to_phys(ctxt_tbls[idx]) | 1;
1590 			iommu->root_entry[bus].lo = val;
1591 		}
1592 
1593 		if (!ext || !ctxt_tbls[idx + 1])
1594 			continue;
1595 
1596 		val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
1597 		iommu->root_entry[bus].hi = val;
1598 	}
1599 
1600 	spin_unlock(&iommu->lock);
1601 
1602 	kfree(ctxt_tbls);
1603 
1604 	__iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
1605 
1606 	ret = 0;
1607 
1608 out_unmap:
1609 	memunmap(old_rt);
1610 
1611 	return ret;
1612 }
1613 
1614 static int __init init_dmars(void)
1615 {
1616 	struct dmar_drhd_unit *drhd;
1617 	struct intel_iommu *iommu;
1618 	int ret;
1619 
1620 	for_each_iommu(iommu, drhd) {
1621 		if (drhd->ignored) {
1622 			iommu_disable_translation(iommu);
1623 			continue;
1624 		}
1625 
1626 		/*
1627 		 * Find the max pasid size of all IOMMU's in the system.
1628 		 * We need to ensure the system pasid table is no bigger
1629 		 * than the smallest supported.
1630 		 */
1631 		if (pasid_supported(iommu)) {
1632 			u32 temp = 2 << ecap_pss(iommu->ecap);
1633 
1634 			intel_pasid_max_id = min_t(u32, temp,
1635 						   intel_pasid_max_id);
1636 		}
1637 
1638 		intel_iommu_init_qi(iommu);
1639 		init_translation_status(iommu);
1640 
1641 		if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
1642 			iommu_disable_translation(iommu);
1643 			clear_translation_pre_enabled(iommu);
1644 			pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
1645 				iommu->name);
1646 		}
1647 
1648 		/*
1649 		 * TBD:
1650 		 * we could share the same root & context tables
1651 		 * among all IOMMU's. Need to Split it later.
1652 		 */
1653 		ret = iommu_alloc_root_entry(iommu);
1654 		if (ret)
1655 			goto free_iommu;
1656 
1657 		if (translation_pre_enabled(iommu)) {
1658 			pr_info("Translation already enabled - trying to copy translation structures\n");
1659 
1660 			ret = copy_translation_tables(iommu);
1661 			if (ret) {
1662 				/*
1663 				 * We found the IOMMU with translation
1664 				 * enabled - but failed to copy over the
1665 				 * old root-entry table. Try to proceed
1666 				 * by disabling translation now and
1667 				 * allocating a clean root-entry table.
1668 				 * This might cause DMAR faults, but
1669 				 * probably the dump will still succeed.
1670 				 */
1671 				pr_err("Failed to copy translation tables from previous kernel for %s\n",
1672 				       iommu->name);
1673 				iommu_disable_translation(iommu);
1674 				clear_translation_pre_enabled(iommu);
1675 			} else {
1676 				pr_info("Copied translation tables from previous kernel for %s\n",
1677 					iommu->name);
1678 			}
1679 		}
1680 
1681 		intel_svm_check(iommu);
1682 	}
1683 
1684 	/*
1685 	 * Now that qi is enabled on all iommus, set the root entry and flush
1686 	 * caches. This is required on some Intel X58 chipsets, otherwise the
1687 	 * flush_context function will loop forever and the boot hangs.
1688 	 */
1689 	for_each_active_iommu(iommu, drhd) {
1690 		iommu_flush_write_buffer(iommu);
1691 		iommu_set_root_entry(iommu);
1692 	}
1693 
1694 	check_tylersburg_isoch();
1695 
1696 	/*
1697 	 * for each drhd
1698 	 *   enable fault log
1699 	 *   global invalidate context cache
1700 	 *   global invalidate iotlb
1701 	 *   enable translation
1702 	 */
1703 	for_each_iommu(iommu, drhd) {
1704 		if (drhd->ignored) {
1705 			/*
1706 			 * we always have to disable PMRs or DMA may fail on
1707 			 * this device
1708 			 */
1709 			if (force_on)
1710 				iommu_disable_protect_mem_regions(iommu);
1711 			continue;
1712 		}
1713 
1714 		iommu_flush_write_buffer(iommu);
1715 
1716 		if (ecap_prs(iommu->ecap)) {
1717 			/*
1718 			 * Call dmar_alloc_hwirq() with dmar_global_lock held,
1719 			 * could cause possible lock race condition.
1720 			 */
1721 			up_write(&dmar_global_lock);
1722 			ret = intel_iommu_enable_prq(iommu);
1723 			down_write(&dmar_global_lock);
1724 			if (ret)
1725 				goto free_iommu;
1726 		}
1727 
1728 		ret = dmar_set_interrupt(iommu);
1729 		if (ret)
1730 			goto free_iommu;
1731 	}
1732 
1733 	return 0;
1734 
1735 free_iommu:
1736 	for_each_active_iommu(iommu, drhd) {
1737 		disable_dmar_iommu(iommu);
1738 		free_dmar_iommu(iommu);
1739 	}
1740 
1741 	return ret;
1742 }
1743 
1744 static void __init init_no_remapping_devices(void)
1745 {
1746 	struct dmar_drhd_unit *drhd;
1747 	struct device *dev;
1748 	int i;
1749 
1750 	for_each_drhd_unit(drhd) {
1751 		if (!drhd->include_all) {
1752 			for_each_active_dev_scope(drhd->devices,
1753 						  drhd->devices_cnt, i, dev)
1754 				break;
1755 			/* ignore DMAR unit if no devices exist */
1756 			if (i == drhd->devices_cnt)
1757 				drhd->ignored = 1;
1758 		}
1759 	}
1760 
1761 	for_each_active_drhd_unit(drhd) {
1762 		if (drhd->include_all)
1763 			continue;
1764 
1765 		for_each_active_dev_scope(drhd->devices,
1766 					  drhd->devices_cnt, i, dev)
1767 			if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
1768 				break;
1769 		if (i < drhd->devices_cnt)
1770 			continue;
1771 
1772 		/* This IOMMU has *only* gfx devices. Either bypass it or
1773 		   set the gfx_mapped flag, as appropriate */
1774 		drhd->gfx_dedicated = 1;
1775 		if (disable_igfx_iommu)
1776 			drhd->ignored = 1;
1777 	}
1778 }
1779 
1780 #ifdef CONFIG_SUSPEND
1781 static int init_iommu_hw(void)
1782 {
1783 	struct dmar_drhd_unit *drhd;
1784 	struct intel_iommu *iommu = NULL;
1785 	int ret;
1786 
1787 	for_each_active_iommu(iommu, drhd) {
1788 		if (iommu->qi) {
1789 			ret = dmar_reenable_qi(iommu);
1790 			if (ret)
1791 				return ret;
1792 		}
1793 	}
1794 
1795 	for_each_iommu(iommu, drhd) {
1796 		if (drhd->ignored) {
1797 			/*
1798 			 * we always have to disable PMRs or DMA may fail on
1799 			 * this device
1800 			 */
1801 			if (force_on)
1802 				iommu_disable_protect_mem_regions(iommu);
1803 			continue;
1804 		}
1805 
1806 		iommu_flush_write_buffer(iommu);
1807 		iommu_set_root_entry(iommu);
1808 		iommu_enable_translation(iommu);
1809 		iommu_disable_protect_mem_regions(iommu);
1810 	}
1811 
1812 	return 0;
1813 }
1814 
1815 static void iommu_flush_all(void)
1816 {
1817 	struct dmar_drhd_unit *drhd;
1818 	struct intel_iommu *iommu;
1819 
1820 	for_each_active_iommu(iommu, drhd) {
1821 		iommu->flush.flush_context(iommu, 0, 0, 0,
1822 					   DMA_CCMD_GLOBAL_INVL);
1823 		iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1824 					 DMA_TLB_GLOBAL_FLUSH);
1825 	}
1826 }
1827 
1828 static int iommu_suspend(void *data)
1829 {
1830 	struct dmar_drhd_unit *drhd;
1831 	struct intel_iommu *iommu = NULL;
1832 	unsigned long flag;
1833 
1834 	iommu_flush_all();
1835 
1836 	for_each_active_iommu(iommu, drhd) {
1837 		iommu_disable_translation(iommu);
1838 
1839 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
1840 
1841 		iommu->iommu_state[SR_DMAR_FECTL_REG] =
1842 			readl(iommu->reg + DMAR_FECTL_REG);
1843 		iommu->iommu_state[SR_DMAR_FEDATA_REG] =
1844 			readl(iommu->reg + DMAR_FEDATA_REG);
1845 		iommu->iommu_state[SR_DMAR_FEADDR_REG] =
1846 			readl(iommu->reg + DMAR_FEADDR_REG);
1847 		iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
1848 			readl(iommu->reg + DMAR_FEUADDR_REG);
1849 
1850 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1851 	}
1852 	return 0;
1853 }
1854 
1855 static void iommu_resume(void *data)
1856 {
1857 	struct dmar_drhd_unit *drhd;
1858 	struct intel_iommu *iommu = NULL;
1859 	unsigned long flag;
1860 
1861 	if (init_iommu_hw()) {
1862 		if (force_on)
1863 			panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
1864 		else
1865 			WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
1866 		return;
1867 	}
1868 
1869 	for_each_active_iommu(iommu, drhd) {
1870 
1871 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
1872 
1873 		writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
1874 			iommu->reg + DMAR_FECTL_REG);
1875 		writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
1876 			iommu->reg + DMAR_FEDATA_REG);
1877 		writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
1878 			iommu->reg + DMAR_FEADDR_REG);
1879 		writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
1880 			iommu->reg + DMAR_FEUADDR_REG);
1881 
1882 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1883 	}
1884 }
1885 
1886 static const struct syscore_ops iommu_syscore_ops = {
1887 	.resume		= iommu_resume,
1888 	.suspend	= iommu_suspend,
1889 };
1890 
1891 static struct syscore iommu_syscore = {
1892 	.ops = &iommu_syscore_ops,
1893 };
1894 
1895 static void __init init_iommu_pm_ops(void)
1896 {
1897 	register_syscore(&iommu_syscore);
1898 }
1899 
1900 #else
1901 static inline void init_iommu_pm_ops(void) {}
1902 #endif	/* CONFIG_PM */
1903 
1904 static int __init rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
1905 {
1906 	if (!IS_ALIGNED(rmrr->base_address, PAGE_SIZE) ||
1907 	    !IS_ALIGNED(rmrr->end_address + 1, PAGE_SIZE) ||
1908 	    rmrr->end_address <= rmrr->base_address ||
1909 	    arch_rmrr_sanity_check(rmrr))
1910 		return -EINVAL;
1911 
1912 	return 0;
1913 }
1914 
1915 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
1916 {
1917 	struct acpi_dmar_reserved_memory *rmrr;
1918 	struct dmar_rmrr_unit *rmrru;
1919 
1920 	rmrr = (struct acpi_dmar_reserved_memory *)header;
1921 	if (rmrr_sanity_check(rmrr)) {
1922 		pr_warn(FW_BUG
1923 			   "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n"
1924 			   "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
1925 			   rmrr->base_address, rmrr->end_address,
1926 			   dmi_get_system_info(DMI_BIOS_VENDOR),
1927 			   dmi_get_system_info(DMI_BIOS_VERSION),
1928 			   dmi_get_system_info(DMI_PRODUCT_VERSION));
1929 		add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
1930 	}
1931 
1932 	rmrru = kzalloc_obj(*rmrru);
1933 	if (!rmrru)
1934 		goto out;
1935 
1936 	rmrru->hdr = header;
1937 
1938 	rmrru->base_address = rmrr->base_address;
1939 	rmrru->end_address = rmrr->end_address;
1940 
1941 	rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
1942 				((void *)rmrr) + rmrr->header.length,
1943 				&rmrru->devices_cnt);
1944 	if (rmrru->devices_cnt && rmrru->devices == NULL)
1945 		goto free_rmrru;
1946 
1947 	list_add(&rmrru->list, &dmar_rmrr_units);
1948 
1949 	return 0;
1950 free_rmrru:
1951 	kfree(rmrru);
1952 out:
1953 	return -ENOMEM;
1954 }
1955 
1956 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
1957 {
1958 	struct dmar_atsr_unit *atsru;
1959 	struct acpi_dmar_atsr *tmp;
1960 
1961 	list_for_each_entry_rcu(atsru, &dmar_atsr_units, list,
1962 				dmar_rcu_check()) {
1963 		tmp = (struct acpi_dmar_atsr *)atsru->hdr;
1964 		if (atsr->segment != tmp->segment)
1965 			continue;
1966 		if (atsr->header.length != tmp->header.length)
1967 			continue;
1968 		if (memcmp(atsr, tmp, atsr->header.length) == 0)
1969 			return atsru;
1970 	}
1971 
1972 	return NULL;
1973 }
1974 
1975 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
1976 {
1977 	struct acpi_dmar_atsr *atsr;
1978 	struct dmar_atsr_unit *atsru;
1979 
1980 	if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
1981 		return 0;
1982 
1983 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
1984 	atsru = dmar_find_atsr(atsr);
1985 	if (atsru)
1986 		return 0;
1987 
1988 	atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
1989 	if (!atsru)
1990 		return -ENOMEM;
1991 
1992 	/*
1993 	 * If memory is allocated from slab by ACPI _DSM method, we need to
1994 	 * copy the memory content because the memory buffer will be freed
1995 	 * on return.
1996 	 */
1997 	atsru->hdr = (void *)(atsru + 1);
1998 	memcpy(atsru->hdr, hdr, hdr->length);
1999 	atsru->include_all = atsr->flags & 0x1;
2000 	if (!atsru->include_all) {
2001 		atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
2002 				(void *)atsr + atsr->header.length,
2003 				&atsru->devices_cnt);
2004 		if (atsru->devices_cnt && atsru->devices == NULL) {
2005 			kfree(atsru);
2006 			return -ENOMEM;
2007 		}
2008 	}
2009 
2010 	list_add_rcu(&atsru->list, &dmar_atsr_units);
2011 
2012 	return 0;
2013 }
2014 
2015 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
2016 {
2017 	dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
2018 	kfree(atsru);
2019 }
2020 
2021 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
2022 {
2023 	struct acpi_dmar_atsr *atsr;
2024 	struct dmar_atsr_unit *atsru;
2025 
2026 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
2027 	atsru = dmar_find_atsr(atsr);
2028 	if (atsru) {
2029 		list_del_rcu(&atsru->list);
2030 		synchronize_rcu();
2031 		intel_iommu_free_atsr(atsru);
2032 	}
2033 
2034 	return 0;
2035 }
2036 
2037 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
2038 {
2039 	int i;
2040 	struct device *dev;
2041 	struct acpi_dmar_atsr *atsr;
2042 	struct dmar_atsr_unit *atsru;
2043 
2044 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
2045 	atsru = dmar_find_atsr(atsr);
2046 	if (!atsru)
2047 		return 0;
2048 
2049 	if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
2050 		for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
2051 					  i, dev)
2052 			return -EBUSY;
2053 	}
2054 
2055 	return 0;
2056 }
2057 
2058 static struct dmar_satc_unit *dmar_find_satc(struct acpi_dmar_satc *satc)
2059 {
2060 	struct dmar_satc_unit *satcu;
2061 	struct acpi_dmar_satc *tmp;
2062 
2063 	list_for_each_entry_rcu(satcu, &dmar_satc_units, list,
2064 				dmar_rcu_check()) {
2065 		tmp = (struct acpi_dmar_satc *)satcu->hdr;
2066 		if (satc->segment != tmp->segment)
2067 			continue;
2068 		if (satc->header.length != tmp->header.length)
2069 			continue;
2070 		if (memcmp(satc, tmp, satc->header.length) == 0)
2071 			return satcu;
2072 	}
2073 
2074 	return NULL;
2075 }
2076 
2077 int dmar_parse_one_satc(struct acpi_dmar_header *hdr, void *arg)
2078 {
2079 	struct acpi_dmar_satc *satc;
2080 	struct dmar_satc_unit *satcu;
2081 
2082 	if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
2083 		return 0;
2084 
2085 	satc = container_of(hdr, struct acpi_dmar_satc, header);
2086 	satcu = dmar_find_satc(satc);
2087 	if (satcu)
2088 		return 0;
2089 
2090 	satcu = kzalloc(sizeof(*satcu) + hdr->length, GFP_KERNEL);
2091 	if (!satcu)
2092 		return -ENOMEM;
2093 
2094 	satcu->hdr = (void *)(satcu + 1);
2095 	memcpy(satcu->hdr, hdr, hdr->length);
2096 	satcu->atc_required = satc->flags & 0x1;
2097 	satcu->devices = dmar_alloc_dev_scope((void *)(satc + 1),
2098 					      (void *)satc + satc->header.length,
2099 					      &satcu->devices_cnt);
2100 	if (satcu->devices_cnt && !satcu->devices) {
2101 		kfree(satcu);
2102 		return -ENOMEM;
2103 	}
2104 	list_add_rcu(&satcu->list, &dmar_satc_units);
2105 
2106 	return 0;
2107 }
2108 
2109 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
2110 {
2111 	struct intel_iommu *iommu = dmaru->iommu;
2112 	int ret;
2113 
2114 	/*
2115 	 * Disable translation if already enabled prior to OS handover.
2116 	 */
2117 	if (iommu->gcmd & DMA_GCMD_TE)
2118 		iommu_disable_translation(iommu);
2119 
2120 	ret = iommu_alloc_root_entry(iommu);
2121 	if (ret)
2122 		goto out;
2123 
2124 	intel_svm_check(iommu);
2125 
2126 	if (dmaru->ignored) {
2127 		/*
2128 		 * we always have to disable PMRs or DMA may fail on this device
2129 		 */
2130 		if (force_on)
2131 			iommu_disable_protect_mem_regions(iommu);
2132 		return 0;
2133 	}
2134 
2135 	intel_iommu_init_qi(iommu);
2136 	iommu_flush_write_buffer(iommu);
2137 
2138 	if (ecap_prs(iommu->ecap)) {
2139 		ret = intel_iommu_enable_prq(iommu);
2140 		if (ret)
2141 			goto disable_iommu;
2142 	}
2143 
2144 	ret = dmar_set_interrupt(iommu);
2145 	if (ret)
2146 		goto disable_iommu;
2147 
2148 	iommu_set_root_entry(iommu);
2149 	iommu_enable_translation(iommu);
2150 
2151 	iommu_disable_protect_mem_regions(iommu);
2152 	return 0;
2153 
2154 disable_iommu:
2155 	disable_dmar_iommu(iommu);
2156 out:
2157 	free_dmar_iommu(iommu);
2158 	return ret;
2159 }
2160 
2161 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
2162 {
2163 	int ret = 0;
2164 	struct intel_iommu *iommu = dmaru->iommu;
2165 
2166 	if (!intel_iommu_enabled)
2167 		return 0;
2168 	if (iommu == NULL)
2169 		return -EINVAL;
2170 
2171 	if (insert) {
2172 		ret = intel_iommu_add(dmaru);
2173 	} else {
2174 		disable_dmar_iommu(iommu);
2175 		free_dmar_iommu(iommu);
2176 	}
2177 
2178 	return ret;
2179 }
2180 
2181 static void intel_iommu_free_dmars(void)
2182 {
2183 	struct dmar_rmrr_unit *rmrru, *rmrr_n;
2184 	struct dmar_atsr_unit *atsru, *atsr_n;
2185 	struct dmar_satc_unit *satcu, *satc_n;
2186 
2187 	list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
2188 		list_del(&rmrru->list);
2189 		dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
2190 		kfree(rmrru);
2191 	}
2192 
2193 	list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
2194 		list_del(&atsru->list);
2195 		intel_iommu_free_atsr(atsru);
2196 	}
2197 	list_for_each_entry_safe(satcu, satc_n, &dmar_satc_units, list) {
2198 		list_del(&satcu->list);
2199 		dmar_free_dev_scope(&satcu->devices, &satcu->devices_cnt);
2200 		kfree(satcu);
2201 	}
2202 }
2203 
2204 static struct dmar_satc_unit *dmar_find_matched_satc_unit(struct pci_dev *dev)
2205 {
2206 	struct dmar_satc_unit *satcu;
2207 	struct acpi_dmar_satc *satc;
2208 	struct device *tmp;
2209 	int i;
2210 
2211 	rcu_read_lock();
2212 
2213 	list_for_each_entry_rcu(satcu, &dmar_satc_units, list) {
2214 		satc = container_of(satcu->hdr, struct acpi_dmar_satc, header);
2215 		if (satc->segment != pci_domain_nr(dev->bus))
2216 			continue;
2217 		for_each_dev_scope(satcu->devices, satcu->devices_cnt, i, tmp)
2218 			if (to_pci_dev(tmp) == dev)
2219 				goto out;
2220 	}
2221 	satcu = NULL;
2222 out:
2223 	rcu_read_unlock();
2224 	return satcu;
2225 }
2226 
2227 static bool dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu)
2228 {
2229 	struct pci_dev *bridge = NULL;
2230 	struct dmar_atsr_unit *atsru;
2231 	struct dmar_satc_unit *satcu;
2232 	struct acpi_dmar_atsr *atsr;
2233 	bool supported = true;
2234 	struct pci_bus *bus;
2235 	struct device *tmp;
2236 	int i;
2237 
2238 	dev = pci_physfn(dev);
2239 	satcu = dmar_find_matched_satc_unit(dev);
2240 	if (satcu)
2241 		/*
2242 		 * This device supports ATS as it is in SATC table.
2243 		 * When IOMMU is in legacy mode, enabling ATS is done
2244 		 * automatically by HW for the device that requires
2245 		 * ATS, hence OS should not enable this device ATS
2246 		 * to avoid duplicated TLB invalidation.
2247 		 */
2248 		return !(satcu->atc_required && !sm_supported(iommu));
2249 
2250 	for (bus = dev->bus; bus; bus = bus->parent) {
2251 		bridge = bus->self;
2252 		/* If it's an integrated device, allow ATS */
2253 		if (!bridge)
2254 			return true;
2255 		/* Connected via non-PCIe: no ATS */
2256 		if (!pci_is_pcie(bridge) ||
2257 		    pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
2258 			return false;
2259 		/* If we found the root port, look it up in the ATSR */
2260 		if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
2261 			break;
2262 	}
2263 
2264 	rcu_read_lock();
2265 	list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
2266 		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
2267 		if (atsr->segment != pci_domain_nr(dev->bus))
2268 			continue;
2269 
2270 		for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
2271 			if (tmp == &bridge->dev)
2272 				goto out;
2273 
2274 		if (atsru->include_all)
2275 			goto out;
2276 	}
2277 	supported = false;
2278 out:
2279 	rcu_read_unlock();
2280 
2281 	return supported;
2282 }
2283 
2284 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
2285 {
2286 	int ret;
2287 	struct dmar_rmrr_unit *rmrru;
2288 	struct dmar_atsr_unit *atsru;
2289 	struct dmar_satc_unit *satcu;
2290 	struct acpi_dmar_atsr *atsr;
2291 	struct acpi_dmar_reserved_memory *rmrr;
2292 	struct acpi_dmar_satc *satc;
2293 
2294 	if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
2295 		return 0;
2296 
2297 	list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
2298 		rmrr = container_of(rmrru->hdr,
2299 				    struct acpi_dmar_reserved_memory, header);
2300 		if (info->event == BUS_NOTIFY_ADD_DEVICE) {
2301 			ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
2302 				((void *)rmrr) + rmrr->header.length,
2303 				rmrr->segment, rmrru->devices,
2304 				rmrru->devices_cnt);
2305 			if (ret < 0)
2306 				return ret;
2307 		} else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
2308 			dmar_remove_dev_scope(info, rmrr->segment,
2309 				rmrru->devices, rmrru->devices_cnt);
2310 		}
2311 	}
2312 
2313 	list_for_each_entry(atsru, &dmar_atsr_units, list) {
2314 		if (atsru->include_all)
2315 			continue;
2316 
2317 		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
2318 		if (info->event == BUS_NOTIFY_ADD_DEVICE) {
2319 			ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
2320 					(void *)atsr + atsr->header.length,
2321 					atsr->segment, atsru->devices,
2322 					atsru->devices_cnt);
2323 			if (ret > 0)
2324 				break;
2325 			else if (ret < 0)
2326 				return ret;
2327 		} else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
2328 			if (dmar_remove_dev_scope(info, atsr->segment,
2329 					atsru->devices, atsru->devices_cnt))
2330 				break;
2331 		}
2332 	}
2333 	list_for_each_entry(satcu, &dmar_satc_units, list) {
2334 		satc = container_of(satcu->hdr, struct acpi_dmar_satc, header);
2335 		if (info->event == BUS_NOTIFY_ADD_DEVICE) {
2336 			ret = dmar_insert_dev_scope(info, (void *)(satc + 1),
2337 					(void *)satc + satc->header.length,
2338 					satc->segment, satcu->devices,
2339 					satcu->devices_cnt);
2340 			if (ret > 0)
2341 				break;
2342 			else if (ret < 0)
2343 				return ret;
2344 		} else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
2345 			if (dmar_remove_dev_scope(info, satc->segment,
2346 					satcu->devices, satcu->devices_cnt))
2347 				break;
2348 		}
2349 	}
2350 
2351 	return 0;
2352 }
2353 
2354 static void intel_disable_iommus(void)
2355 {
2356 	struct intel_iommu *iommu = NULL;
2357 	struct dmar_drhd_unit *drhd;
2358 
2359 	for_each_iommu(iommu, drhd)
2360 		iommu_disable_translation(iommu);
2361 }
2362 
2363 void intel_iommu_shutdown(void)
2364 {
2365 	struct dmar_drhd_unit *drhd;
2366 	struct intel_iommu *iommu = NULL;
2367 
2368 	if (no_iommu || dmar_disabled)
2369 		return;
2370 
2371 	/*
2372 	 * All other CPUs were brought down, hotplug interrupts were disabled,
2373 	 * no lock and RCU checking needed anymore
2374 	 */
2375 	list_for_each_entry(drhd, &dmar_drhd_units, list) {
2376 		iommu = drhd->iommu;
2377 
2378 		/* Disable PMRs explicitly here. */
2379 		iommu_disable_protect_mem_regions(iommu);
2380 
2381 		/* Make sure the IOMMUs are switched off */
2382 		iommu_disable_translation(iommu);
2383 	}
2384 }
2385 
2386 static struct intel_iommu *dev_to_intel_iommu(struct device *dev)
2387 {
2388 	struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
2389 
2390 	return container_of(iommu_dev, struct intel_iommu, iommu);
2391 }
2392 
2393 static ssize_t version_show(struct device *dev,
2394 			    struct device_attribute *attr, char *buf)
2395 {
2396 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2397 	u32 ver = readl(iommu->reg + DMAR_VER_REG);
2398 	return sysfs_emit(buf, "%d:%d\n",
2399 			  DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
2400 }
2401 static DEVICE_ATTR_RO(version);
2402 
2403 static ssize_t address_show(struct device *dev,
2404 			    struct device_attribute *attr, char *buf)
2405 {
2406 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2407 	return sysfs_emit(buf, "%llx\n", iommu->reg_phys);
2408 }
2409 static DEVICE_ATTR_RO(address);
2410 
2411 static ssize_t cap_show(struct device *dev,
2412 			struct device_attribute *attr, char *buf)
2413 {
2414 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2415 	return sysfs_emit(buf, "%llx\n", iommu->cap);
2416 }
2417 static DEVICE_ATTR_RO(cap);
2418 
2419 static ssize_t ecap_show(struct device *dev,
2420 			 struct device_attribute *attr, char *buf)
2421 {
2422 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2423 	return sysfs_emit(buf, "%llx\n", iommu->ecap);
2424 }
2425 static DEVICE_ATTR_RO(ecap);
2426 
2427 static ssize_t domains_supported_show(struct device *dev,
2428 				      struct device_attribute *attr, char *buf)
2429 {
2430 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2431 	return sysfs_emit(buf, "%ld\n", cap_ndoms(iommu->cap));
2432 }
2433 static DEVICE_ATTR_RO(domains_supported);
2434 
2435 static ssize_t domains_used_show(struct device *dev,
2436 				 struct device_attribute *attr, char *buf)
2437 {
2438 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2439 	unsigned int count = 0;
2440 	int id;
2441 
2442 	for (id = 0; id < cap_ndoms(iommu->cap); id++)
2443 		if (ida_exists(&iommu->domain_ida, id))
2444 			count++;
2445 
2446 	return sysfs_emit(buf, "%d\n", count);
2447 }
2448 static DEVICE_ATTR_RO(domains_used);
2449 
2450 static struct attribute *intel_iommu_attrs[] = {
2451 	&dev_attr_version.attr,
2452 	&dev_attr_address.attr,
2453 	&dev_attr_cap.attr,
2454 	&dev_attr_ecap.attr,
2455 	&dev_attr_domains_supported.attr,
2456 	&dev_attr_domains_used.attr,
2457 	NULL,
2458 };
2459 
2460 static struct attribute_group intel_iommu_group = {
2461 	.name = "intel-iommu",
2462 	.attrs = intel_iommu_attrs,
2463 };
2464 
2465 const struct attribute_group *intel_iommu_groups[] = {
2466 	&intel_iommu_group,
2467 	NULL,
2468 };
2469 
2470 static bool has_external_pci(void)
2471 {
2472 	struct pci_dev *pdev = NULL;
2473 
2474 	for_each_pci_dev(pdev)
2475 		if (pdev->external_facing) {
2476 			pci_dev_put(pdev);
2477 			return true;
2478 		}
2479 
2480 	return false;
2481 }
2482 
2483 static int __init platform_optin_force_iommu(void)
2484 {
2485 	if (!dmar_platform_optin() || no_platform_optin || !has_external_pci())
2486 		return 0;
2487 
2488 	if (no_iommu || dmar_disabled)
2489 		pr_info("Intel-IOMMU force enabled due to platform opt in\n");
2490 
2491 	/*
2492 	 * If Intel-IOMMU is disabled by default, we will apply identity
2493 	 * map for all devices except those marked as being untrusted.
2494 	 */
2495 	if (dmar_disabled)
2496 		iommu_set_default_passthrough(false);
2497 
2498 	dmar_disabled = 0;
2499 	no_iommu = 0;
2500 
2501 	return 1;
2502 }
2503 
2504 static int __init probe_acpi_namespace_devices(void)
2505 {
2506 	struct dmar_drhd_unit *drhd;
2507 	/* To avoid a -Wunused-but-set-variable warning. */
2508 	struct intel_iommu *iommu __maybe_unused;
2509 	struct device *dev;
2510 	int i, ret = 0;
2511 
2512 	for_each_active_iommu(iommu, drhd) {
2513 		for_each_active_dev_scope(drhd->devices,
2514 					  drhd->devices_cnt, i, dev) {
2515 			struct acpi_device_physical_node *pn;
2516 			struct acpi_device *adev;
2517 
2518 			if (dev->bus != &acpi_bus_type)
2519 				continue;
2520 
2521 			up_read(&dmar_global_lock);
2522 			adev = to_acpi_device(dev);
2523 			mutex_lock(&adev->physical_node_lock);
2524 			list_for_each_entry(pn,
2525 					    &adev->physical_node_list, node) {
2526 				ret = iommu_probe_device(pn->dev);
2527 				if (ret)
2528 					break;
2529 			}
2530 			mutex_unlock(&adev->physical_node_lock);
2531 			down_read(&dmar_global_lock);
2532 
2533 			if (ret)
2534 				return ret;
2535 		}
2536 	}
2537 
2538 	return 0;
2539 }
2540 
2541 static __init int tboot_force_iommu(void)
2542 {
2543 	if (!tboot_enabled())
2544 		return 0;
2545 
2546 	if (no_iommu || dmar_disabled)
2547 		pr_warn("Forcing Intel-IOMMU to enabled\n");
2548 
2549 	dmar_disabled = 0;
2550 	no_iommu = 0;
2551 
2552 	return 1;
2553 }
2554 
2555 int __init intel_iommu_init(void)
2556 {
2557 	int ret = -ENODEV;
2558 	struct dmar_drhd_unit *drhd;
2559 	struct intel_iommu *iommu;
2560 
2561 	/*
2562 	 * Intel IOMMU is required for a TXT/tboot launch or platform
2563 	 * opt in, so enforce that.
2564 	 */
2565 	force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) ||
2566 		    platform_optin_force_iommu();
2567 
2568 	down_write(&dmar_global_lock);
2569 	if (dmar_table_init()) {
2570 		if (force_on)
2571 			panic("tboot: Failed to initialize DMAR table\n");
2572 		goto out_free_dmar;
2573 	}
2574 
2575 	if (dmar_dev_scope_init() < 0) {
2576 		if (force_on)
2577 			panic("tboot: Failed to initialize DMAR device scope\n");
2578 		goto out_free_dmar;
2579 	}
2580 
2581 	up_write(&dmar_global_lock);
2582 
2583 	/*
2584 	 * The bus notifier takes the dmar_global_lock, so lockdep will
2585 	 * complain later when we register it under the lock.
2586 	 */
2587 	dmar_register_bus_notifier();
2588 
2589 	down_write(&dmar_global_lock);
2590 
2591 	if (!no_iommu)
2592 		intel_iommu_debugfs_init();
2593 
2594 	if (no_iommu || dmar_disabled) {
2595 		/*
2596 		 * We exit the function here to ensure IOMMU's remapping and
2597 		 * mempool aren't setup, which means that the IOMMU's PMRs
2598 		 * won't be disabled via the call to init_dmars(). So disable
2599 		 * it explicitly here. The PMRs were setup by tboot prior to
2600 		 * calling SENTER, but the kernel is expected to reset/tear
2601 		 * down the PMRs.
2602 		 */
2603 		if (intel_iommu_tboot_noforce) {
2604 			for_each_iommu(iommu, drhd)
2605 				iommu_disable_protect_mem_regions(iommu);
2606 		}
2607 
2608 		/*
2609 		 * Make sure the IOMMUs are switched off, even when we
2610 		 * boot into a kexec kernel and the previous kernel left
2611 		 * them enabled
2612 		 */
2613 		intel_disable_iommus();
2614 		goto out_free_dmar;
2615 	}
2616 
2617 	if (list_empty(&dmar_rmrr_units))
2618 		pr_info("No RMRR found\n");
2619 
2620 	if (list_empty(&dmar_atsr_units))
2621 		pr_info("No ATSR found\n");
2622 
2623 	if (list_empty(&dmar_satc_units))
2624 		pr_info("No SATC found\n");
2625 
2626 	init_no_remapping_devices();
2627 
2628 	ret = init_dmars();
2629 	if (ret) {
2630 		if (force_on)
2631 			panic("tboot: Failed to initialize DMARs\n");
2632 		pr_err("Initialization failed\n");
2633 		goto out_free_dmar;
2634 	}
2635 	up_write(&dmar_global_lock);
2636 
2637 	init_iommu_pm_ops();
2638 
2639 	down_read(&dmar_global_lock);
2640 	for_each_active_iommu(iommu, drhd) {
2641 		/*
2642 		 * The flush queue implementation does not perform
2643 		 * page-selective invalidations that are required for efficient
2644 		 * TLB flushes in virtual environments.  The benefit of batching
2645 		 * is likely to be much lower than the overhead of synchronizing
2646 		 * the virtual and physical IOMMU page-tables.
2647 		 */
2648 		if (cap_caching_mode(iommu->cap) &&
2649 		    !first_level_by_default(iommu)) {
2650 			pr_info_once("IOMMU batching disallowed due to virtualization\n");
2651 			iommu_set_dma_strict();
2652 		}
2653 		iommu_device_sysfs_add(&iommu->iommu, NULL,
2654 				       intel_iommu_groups,
2655 				       "%s", iommu->name);
2656 		/*
2657 		 * The iommu device probe is protected by the iommu_probe_device_lock.
2658 		 * Release the dmar_global_lock before entering the device probe path
2659 		 * to avoid unnecessary lock order splat.
2660 		 */
2661 		up_read(&dmar_global_lock);
2662 		iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
2663 		down_read(&dmar_global_lock);
2664 
2665 		iommu_pmu_register(iommu);
2666 	}
2667 
2668 	if (probe_acpi_namespace_devices())
2669 		pr_warn("ACPI name space devices didn't probe correctly\n");
2670 
2671 	/* Finally, we enable the DMA remapping hardware. */
2672 	for_each_iommu(iommu, drhd) {
2673 		if (!drhd->ignored && !translation_pre_enabled(iommu))
2674 			iommu_enable_translation(iommu);
2675 
2676 		iommu_disable_protect_mem_regions(iommu);
2677 	}
2678 	up_read(&dmar_global_lock);
2679 
2680 	pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
2681 
2682 	intel_iommu_enabled = 1;
2683 
2684 	return 0;
2685 
2686 out_free_dmar:
2687 	intel_iommu_free_dmars();
2688 	up_write(&dmar_global_lock);
2689 	return ret;
2690 }
2691 
2692 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
2693 {
2694 	struct device_domain_info *info = opaque;
2695 
2696 	domain_context_clear_one(info, PCI_BUS_NUM(alias), alias & 0xff);
2697 	return 0;
2698 }
2699 
2700 /*
2701  * NB - intel-iommu lacks any sort of reference counting for the users of
2702  * dependent devices.  If multiple endpoints have intersecting dependent
2703  * devices, unbinding the driver from any one of them will possibly leave
2704  * the others unable to operate.
2705  */
2706 static void domain_context_clear(struct device_domain_info *info)
2707 {
2708 	if (!dev_is_pci(info->dev)) {
2709 		domain_context_clear_one(info, info->bus, info->devfn);
2710 		return;
2711 	}
2712 
2713 	pci_for_each_dma_alias(to_pci_dev(info->dev),
2714 			       &domain_context_clear_one_cb, info);
2715 	iommu_disable_pci_ats(info);
2716 }
2717 
2718 /*
2719  * Clear the page table pointer in context or pasid table entries so that
2720  * all DMA requests without PASID from the device are blocked. If the page
2721  * table has been set, clean up the data structures.
2722  */
2723 void device_block_translation(struct device *dev)
2724 {
2725 	struct device_domain_info *info = dev_iommu_priv_get(dev);
2726 	struct intel_iommu *iommu = info->iommu;
2727 	unsigned long flags;
2728 
2729 	/* Device in DMA blocking state. Noting to do. */
2730 	if (!info->domain_attached)
2731 		return;
2732 
2733 	if (info->domain)
2734 		cache_tag_unassign_domain(info->domain, dev, IOMMU_NO_PASID);
2735 
2736 	if (!dev_is_real_dma_subdevice(dev)) {
2737 		if (sm_supported(iommu))
2738 			intel_pasid_tear_down_entry(iommu, dev,
2739 						    IOMMU_NO_PASID, false);
2740 		else
2741 			domain_context_clear(info);
2742 	}
2743 
2744 	/* Device now in DMA blocking state. */
2745 	info->domain_attached = false;
2746 
2747 	if (!info->domain)
2748 		return;
2749 
2750 	spin_lock_irqsave(&info->domain->lock, flags);
2751 	list_del(&info->link);
2752 	spin_unlock_irqrestore(&info->domain->lock, flags);
2753 
2754 	domain_detach_iommu(info->domain, iommu);
2755 	info->domain = NULL;
2756 }
2757 
2758 static int blocking_domain_attach_dev(struct iommu_domain *domain,
2759 				      struct device *dev,
2760 				      struct iommu_domain *old)
2761 {
2762 	struct device_domain_info *info = dev_iommu_priv_get(dev);
2763 
2764 	iopf_for_domain_remove(info->domain ? &info->domain->domain : NULL, dev);
2765 	device_block_translation(dev);
2766 	return 0;
2767 }
2768 
2769 static int blocking_domain_set_dev_pasid(struct iommu_domain *domain,
2770 					 struct device *dev, ioasid_t pasid,
2771 					 struct iommu_domain *old);
2772 
2773 static struct iommu_domain blocking_domain = {
2774 	.type = IOMMU_DOMAIN_BLOCKED,
2775 	.ops = &(const struct iommu_domain_ops) {
2776 		.attach_dev	= blocking_domain_attach_dev,
2777 		.set_dev_pasid	= blocking_domain_set_dev_pasid,
2778 	}
2779 };
2780 
2781 static struct dmar_domain *paging_domain_alloc(void)
2782 {
2783 	struct dmar_domain *domain;
2784 
2785 	domain = kzalloc_obj(*domain);
2786 	if (!domain)
2787 		return ERR_PTR(-ENOMEM);
2788 
2789 	INIT_LIST_HEAD(&domain->devices);
2790 	INIT_LIST_HEAD(&domain->dev_pasids);
2791 	INIT_LIST_HEAD(&domain->cache_tags);
2792 	spin_lock_init(&domain->lock);
2793 	spin_lock_init(&domain->cache_lock);
2794 	xa_init(&domain->iommu_array);
2795 	INIT_LIST_HEAD(&domain->s1_domains);
2796 	spin_lock_init(&domain->s1_lock);
2797 
2798 	return domain;
2799 }
2800 
2801 static unsigned int compute_vasz_lg2_fs(struct intel_iommu *iommu,
2802 					unsigned int *top_level)
2803 {
2804 	unsigned int mgaw = cap_mgaw(iommu->cap);
2805 
2806 	/*
2807 	 * Spec 3.6 First-Stage Translation:
2808 	 *
2809 	 * Software must limit addresses to less than the minimum of MGAW
2810 	 * and the lower canonical address width implied by FSPM (i.e.,
2811 	 * 47-bit when FSPM is 4-level and 56-bit when FSPM is 5-level).
2812 	 */
2813 	if (mgaw > 48 && cap_fl5lp_support(iommu->cap)) {
2814 		*top_level = 4;
2815 		return min(57, mgaw);
2816 	}
2817 
2818 	/* Four level is always supported */
2819 	*top_level = 3;
2820 	return min(48, mgaw);
2821 }
2822 
2823 static struct iommu_domain *
2824 intel_iommu_domain_alloc_first_stage(struct device *dev,
2825 				     struct intel_iommu *iommu, u32 flags)
2826 {
2827 	struct pt_iommu_x86_64_cfg cfg = {};
2828 	struct dmar_domain *dmar_domain;
2829 	int ret;
2830 
2831 	if (flags & ~IOMMU_HWPT_ALLOC_PASID)
2832 		return ERR_PTR(-EOPNOTSUPP);
2833 
2834 	/* Only SL is available in legacy mode */
2835 	if (!sm_supported(iommu) || !ecap_flts(iommu->ecap))
2836 		return ERR_PTR(-EOPNOTSUPP);
2837 
2838 	dmar_domain = paging_domain_alloc();
2839 	if (IS_ERR(dmar_domain))
2840 		return ERR_CAST(dmar_domain);
2841 
2842 	cfg.common.hw_max_vasz_lg2 =
2843 		compute_vasz_lg2_fs(iommu, &cfg.top_level);
2844 	cfg.common.hw_max_oasz_lg2 = 52;
2845 	cfg.common.features = BIT(PT_FEAT_SIGN_EXTEND) |
2846 			      BIT(PT_FEAT_FLUSH_RANGE);
2847 	/* First stage always uses scalable mode */
2848 	if (!ecap_smpwc(iommu->ecap))
2849 		cfg.common.features |= BIT(PT_FEAT_DMA_INCOHERENT);
2850 	dmar_domain->iommu.iommu_device = dev;
2851 	dmar_domain->iommu.nid = dev_to_node(dev);
2852 	dmar_domain->domain.ops = &intel_fs_paging_domain_ops;
2853 	/*
2854 	 * iotlb sync for map is only needed for legacy implementations that
2855 	 * explicitly require flushing internal write buffers to ensure memory
2856 	 * coherence.
2857 	 */
2858 	if (rwbf_required(iommu))
2859 		dmar_domain->iotlb_sync_map = true;
2860 
2861 	ret = pt_iommu_x86_64_init(&dmar_domain->fspt, &cfg, GFP_KERNEL);
2862 	if (ret) {
2863 		kfree(dmar_domain);
2864 		return ERR_PTR(ret);
2865 	}
2866 
2867 	if (!cap_fl1gp_support(iommu->cap))
2868 		dmar_domain->domain.pgsize_bitmap &= ~(u64)SZ_1G;
2869 	if (!intel_iommu_superpage)
2870 		dmar_domain->domain.pgsize_bitmap = SZ_4K;
2871 
2872 	return &dmar_domain->domain;
2873 }
2874 
2875 static unsigned int compute_vasz_lg2_ss(struct intel_iommu *iommu,
2876 					unsigned int *top_level)
2877 {
2878 	unsigned int sagaw = cap_sagaw(iommu->cap);
2879 	unsigned int mgaw = cap_mgaw(iommu->cap);
2880 
2881 	/*
2882 	 * Find the largest table size that both the mgaw and sagaw support.
2883 	 * This sets the valid range of IOVA and the top starting level.
2884 	 * Some HW may only support a 4 or 5 level walk but must limit IOVA to
2885 	 * 3 levels.
2886 	 */
2887 	if (mgaw > 48 && sagaw >= BIT(3)) {
2888 		*top_level = 4;
2889 		return min(57, mgaw);
2890 	} else if (mgaw > 39 && sagaw >= BIT(2)) {
2891 		*top_level = 3 + ffs(sagaw >> 3);
2892 		return min(48, mgaw);
2893 	} else if (mgaw > 30 && sagaw >= BIT(1)) {
2894 		*top_level = 2 + ffs(sagaw >> 2);
2895 		return min(39, mgaw);
2896 	}
2897 	return 0;
2898 }
2899 
2900 static const struct iommu_dirty_ops intel_second_stage_dirty_ops = {
2901 	IOMMU_PT_DIRTY_OPS(vtdss),
2902 	.set_dirty_tracking = intel_iommu_set_dirty_tracking,
2903 };
2904 
2905 static struct iommu_domain *
2906 intel_iommu_domain_alloc_second_stage(struct device *dev,
2907 				      struct intel_iommu *iommu, u32 flags)
2908 {
2909 	struct pt_iommu_vtdss_cfg cfg = {};
2910 	struct dmar_domain *dmar_domain;
2911 	unsigned int sslps;
2912 	int ret;
2913 
2914 	if (flags &
2915 	    (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
2916 	       IOMMU_HWPT_ALLOC_PASID)))
2917 		return ERR_PTR(-EOPNOTSUPP);
2918 
2919 	if (((flags & IOMMU_HWPT_ALLOC_NEST_PARENT) &&
2920 	     !nested_supported(iommu)) ||
2921 	    ((flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) &&
2922 	     !ssads_supported(iommu)))
2923 		return ERR_PTR(-EOPNOTSUPP);
2924 
2925 	/* Legacy mode always supports second stage */
2926 	if (sm_supported(iommu) && !ecap_slts(iommu->ecap))
2927 		return ERR_PTR(-EOPNOTSUPP);
2928 
2929 	dmar_domain = paging_domain_alloc();
2930 	if (IS_ERR(dmar_domain))
2931 		return ERR_CAST(dmar_domain);
2932 
2933 	cfg.common.hw_max_vasz_lg2 = compute_vasz_lg2_ss(iommu, &cfg.top_level);
2934 	cfg.common.hw_max_oasz_lg2 = 52;
2935 	cfg.common.features = BIT(PT_FEAT_FLUSH_RANGE);
2936 
2937 	/*
2938 	 * Read-only mapping is disallowed on the domain which serves as the
2939 	 * parent in a nested configuration, due to HW errata
2940 	 * (ERRATA_772415_SPR17)
2941 	 */
2942 	if (flags & IOMMU_HWPT_ALLOC_NEST_PARENT)
2943 		cfg.common.features |= BIT(PT_FEAT_VTDSS_FORCE_WRITEABLE);
2944 
2945 	if (!iommu_paging_structure_coherency(iommu))
2946 		cfg.common.features |= BIT(PT_FEAT_DMA_INCOHERENT);
2947 	dmar_domain->iommu.iommu_device = dev;
2948 	dmar_domain->iommu.nid = dev_to_node(dev);
2949 	dmar_domain->domain.ops = &intel_ss_paging_domain_ops;
2950 	dmar_domain->nested_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT;
2951 
2952 	if (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING)
2953 		dmar_domain->domain.dirty_ops = &intel_second_stage_dirty_ops;
2954 
2955 	ret = pt_iommu_vtdss_init(&dmar_domain->sspt, &cfg, GFP_KERNEL);
2956 	if (ret) {
2957 		kfree(dmar_domain);
2958 		return ERR_PTR(ret);
2959 	}
2960 
2961 	/* Adjust the supported page sizes to HW capability */
2962 	sslps = cap_super_page_val(iommu->cap);
2963 	if (!(sslps & BIT(0)))
2964 		dmar_domain->domain.pgsize_bitmap &= ~(u64)SZ_2M;
2965 	if (!(sslps & BIT(1)))
2966 		dmar_domain->domain.pgsize_bitmap &= ~(u64)SZ_1G;
2967 	if (!intel_iommu_superpage)
2968 		dmar_domain->domain.pgsize_bitmap = SZ_4K;
2969 
2970 	/*
2971 	 * Besides the internal write buffer flush, the caching mode used for
2972 	 * legacy nested translation (which utilizes shadowing page tables)
2973 	 * also requires iotlb sync on map.
2974 	 */
2975 	if (rwbf_required(iommu) || cap_caching_mode(iommu->cap))
2976 		dmar_domain->iotlb_sync_map = true;
2977 
2978 	return &dmar_domain->domain;
2979 }
2980 
2981 static struct iommu_domain *
2982 intel_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags,
2983 				      const struct iommu_user_data *user_data)
2984 {
2985 	struct device_domain_info *info = dev_iommu_priv_get(dev);
2986 	struct intel_iommu *iommu = info->iommu;
2987 	struct iommu_domain *domain;
2988 
2989 	if (user_data)
2990 		return ERR_PTR(-EOPNOTSUPP);
2991 
2992 	/* Prefer first stage if possible by default. */
2993 	domain = intel_iommu_domain_alloc_first_stage(dev, iommu, flags);
2994 	if (domain != ERR_PTR(-EOPNOTSUPP))
2995 		return domain;
2996 	return intel_iommu_domain_alloc_second_stage(dev, iommu, flags);
2997 }
2998 
2999 static void intel_iommu_domain_free(struct iommu_domain *domain)
3000 {
3001 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3002 
3003 	if (WARN_ON(dmar_domain->nested_parent &&
3004 		    !list_empty(&dmar_domain->s1_domains)))
3005 		return;
3006 
3007 	if (WARN_ON(!list_empty(&dmar_domain->devices)))
3008 		return;
3009 
3010 	pt_iommu_deinit(&dmar_domain->iommu);
3011 
3012 	kfree(dmar_domain->qi_batch);
3013 	kfree(dmar_domain);
3014 }
3015 
3016 static int paging_domain_compatible_first_stage(struct dmar_domain *dmar_domain,
3017 						struct intel_iommu *iommu)
3018 {
3019 	if (WARN_ON(dmar_domain->domain.dirty_ops ||
3020 		    dmar_domain->nested_parent))
3021 		return -EINVAL;
3022 
3023 	/* Only SL is available in legacy mode */
3024 	if (!sm_supported(iommu) || !ecap_flts(iommu->ecap))
3025 		return -EINVAL;
3026 
3027 	if (!ecap_smpwc(iommu->ecap) &&
3028 	    !(dmar_domain->fspt.x86_64_pt.common.features &
3029 	      BIT(PT_FEAT_DMA_INCOHERENT)))
3030 		return -EINVAL;
3031 
3032 	/* Supports the number of table levels */
3033 	if (!cap_fl5lp_support(iommu->cap) &&
3034 	    dmar_domain->fspt.x86_64_pt.common.max_vasz_lg2 > 48)
3035 		return -EINVAL;
3036 
3037 	/* Same page size support */
3038 	if (!cap_fl1gp_support(iommu->cap) &&
3039 	    (dmar_domain->domain.pgsize_bitmap & SZ_1G))
3040 		return -EINVAL;
3041 
3042 	/* iotlb sync on map requirement */
3043 	if ((rwbf_required(iommu)) && !dmar_domain->iotlb_sync_map)
3044 		return -EINVAL;
3045 
3046 	return 0;
3047 }
3048 
3049 static int
3050 paging_domain_compatible_second_stage(struct dmar_domain *dmar_domain,
3051 				      struct intel_iommu *iommu)
3052 {
3053 	unsigned int vasz_lg2 = dmar_domain->sspt.vtdss_pt.common.max_vasz_lg2;
3054 	unsigned int sslps = cap_super_page_val(iommu->cap);
3055 	struct pt_iommu_vtdss_hw_info pt_info;
3056 
3057 	pt_iommu_vtdss_hw_info(&dmar_domain->sspt, &pt_info);
3058 
3059 	if (dmar_domain->domain.dirty_ops && !ssads_supported(iommu))
3060 		return -EINVAL;
3061 	if (dmar_domain->nested_parent && !nested_supported(iommu))
3062 		return -EINVAL;
3063 
3064 	/* Legacy mode always supports second stage */
3065 	if (sm_supported(iommu) && !ecap_slts(iommu->ecap))
3066 		return -EINVAL;
3067 
3068 	if (!iommu_paging_structure_coherency(iommu) &&
3069 	    !(dmar_domain->sspt.vtdss_pt.common.features &
3070 	      BIT(PT_FEAT_DMA_INCOHERENT)))
3071 		return -EINVAL;
3072 
3073 	/* Address width falls within the capability */
3074 	if (cap_mgaw(iommu->cap) < vasz_lg2)
3075 		return -EINVAL;
3076 
3077 	/* Page table level is supported. */
3078 	if (!(cap_sagaw(iommu->cap) & BIT(pt_info.aw)))
3079 		return -EINVAL;
3080 
3081 	/* Same page size support */
3082 	if (!(sslps & BIT(0)) && (dmar_domain->domain.pgsize_bitmap & SZ_2M))
3083 		return -EINVAL;
3084 	if (!(sslps & BIT(1)) && (dmar_domain->domain.pgsize_bitmap & SZ_1G))
3085 		return -EINVAL;
3086 
3087 	/* iotlb sync on map requirement */
3088 	if ((rwbf_required(iommu) || cap_caching_mode(iommu->cap)) &&
3089 	    !dmar_domain->iotlb_sync_map)
3090 		return -EINVAL;
3091 
3092 	/*
3093 	 * FIXME this is locked wrong, it needs to be under the
3094 	 * dmar_domain->lock
3095 	 */
3096 	if ((dmar_domain->sspt.vtdss_pt.common.features &
3097 	     BIT(PT_FEAT_VTDSS_FORCE_COHERENCE)) &&
3098 	    !ecap_sc_support(iommu->ecap))
3099 		return -EINVAL;
3100 	return 0;
3101 }
3102 
3103 int paging_domain_compatible(struct iommu_domain *domain, struct device *dev)
3104 {
3105 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3106 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3107 	struct intel_iommu *iommu = info->iommu;
3108 	int ret = -EINVAL;
3109 
3110 	if (intel_domain_is_fs_paging(dmar_domain))
3111 		ret = paging_domain_compatible_first_stage(dmar_domain, iommu);
3112 	else if (intel_domain_is_ss_paging(dmar_domain))
3113 		ret = paging_domain_compatible_second_stage(dmar_domain, iommu);
3114 	else if (WARN_ON(true))
3115 		ret = -EINVAL;
3116 	if (ret)
3117 		return ret;
3118 
3119 	if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) &&
3120 	    context_copied(iommu, info->bus, info->devfn))
3121 		return intel_pasid_setup_sm_context(dev);
3122 
3123 	return 0;
3124 }
3125 
3126 static int intel_iommu_attach_device(struct iommu_domain *domain,
3127 				     struct device *dev,
3128 				     struct iommu_domain *old)
3129 {
3130 	int ret;
3131 
3132 	device_block_translation(dev);
3133 
3134 	ret = paging_domain_compatible(domain, dev);
3135 	if (ret)
3136 		return ret;
3137 
3138 	ret = iopf_for_domain_set(domain, dev);
3139 	if (ret)
3140 		return ret;
3141 
3142 	ret = dmar_domain_attach_device(to_dmar_domain(domain), dev);
3143 	if (ret)
3144 		iopf_for_domain_remove(domain, dev);
3145 
3146 	return ret;
3147 }
3148 
3149 static void intel_iommu_tlb_sync(struct iommu_domain *domain,
3150 				 struct iommu_iotlb_gather *gather)
3151 {
3152 	cache_tag_flush_range(to_dmar_domain(domain), gather->start,
3153 			      gather->end,
3154 			      iommu_pages_list_empty(&gather->freelist));
3155 	iommu_put_pages_list(&gather->freelist);
3156 }
3157 
3158 static bool domain_support_force_snooping(struct dmar_domain *domain)
3159 {
3160 	struct device_domain_info *info;
3161 	bool support = true;
3162 
3163 	assert_spin_locked(&domain->lock);
3164 	list_for_each_entry(info, &domain->devices, link) {
3165 		if (!ecap_sc_support(info->iommu->ecap)) {
3166 			support = false;
3167 			break;
3168 		}
3169 	}
3170 
3171 	return support;
3172 }
3173 
3174 static bool intel_iommu_enforce_cache_coherency_fs(struct iommu_domain *domain)
3175 {
3176 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3177 	struct device_domain_info *info;
3178 
3179 	guard(spinlock_irqsave)(&dmar_domain->lock);
3180 
3181 	if (dmar_domain->force_snooping)
3182 		return true;
3183 
3184 	if (!domain_support_force_snooping(dmar_domain))
3185 		return false;
3186 
3187 	dmar_domain->force_snooping = true;
3188 	list_for_each_entry(info, &dmar_domain->devices, link)
3189 		intel_pasid_setup_page_snoop_control(info->iommu, info->dev,
3190 						     IOMMU_NO_PASID);
3191 	return true;
3192 }
3193 
3194 static bool intel_iommu_enforce_cache_coherency_ss(struct iommu_domain *domain)
3195 {
3196 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3197 
3198 	guard(spinlock_irqsave)(&dmar_domain->lock);
3199 	if (!domain_support_force_snooping(dmar_domain))
3200 		return false;
3201 
3202 	/*
3203 	 * Second level page table supports per-PTE snoop control. The
3204 	 * iommu_map() interface will handle this by setting SNP bit.
3205 	 */
3206 	dmar_domain->sspt.vtdss_pt.common.features |=
3207 		BIT(PT_FEAT_VTDSS_FORCE_COHERENCE);
3208 	dmar_domain->force_snooping = true;
3209 	return true;
3210 }
3211 
3212 static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap)
3213 {
3214 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3215 
3216 	switch (cap) {
3217 	case IOMMU_CAP_CACHE_COHERENCY:
3218 		return true;
3219 	case IOMMU_CAP_PRE_BOOT_PROTECTION:
3220 		return dmar_platform_optin();
3221 	case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
3222 		return ecap_sc_support(info->iommu->ecap);
3223 	case IOMMU_CAP_DIRTY_TRACKING:
3224 		return ssads_supported(info->iommu);
3225 	case IOMMU_CAP_PCI_ATS_SUPPORTED:
3226 		return info->ats_supported;
3227 	default:
3228 		return false;
3229 	}
3230 }
3231 
3232 static struct iommu_device *intel_iommu_probe_device(struct device *dev)
3233 {
3234 	struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL;
3235 	struct device_domain_info *info;
3236 	struct intel_iommu *iommu;
3237 	u8 bus, devfn;
3238 	int ret;
3239 
3240 	iommu = device_lookup_iommu(dev, &bus, &devfn);
3241 	if (!iommu || !iommu->iommu.ops)
3242 		return ERR_PTR(-ENODEV);
3243 
3244 	info = kzalloc_obj(*info);
3245 	if (!info)
3246 		return ERR_PTR(-ENOMEM);
3247 
3248 	if (dev_is_real_dma_subdevice(dev)) {
3249 		info->bus = pdev->bus->number;
3250 		info->devfn = pdev->devfn;
3251 		info->segment = pci_domain_nr(pdev->bus);
3252 	} else {
3253 		info->bus = bus;
3254 		info->devfn = devfn;
3255 		info->segment = iommu->segment;
3256 	}
3257 
3258 	info->dev = dev;
3259 	info->iommu = iommu;
3260 	RB_CLEAR_NODE(&info->node);
3261 	if (dev_is_pci(dev)) {
3262 		if (ecap_dev_iotlb_support(iommu->ecap) &&
3263 		    pci_ats_supported(pdev) &&
3264 		    dmar_ats_supported(pdev, iommu)) {
3265 			info->ats_supported = 1;
3266 			info->dtlb_extra_inval = dev_needs_extra_dtlb_flush(pdev);
3267 
3268 			/*
3269 			 * For IOMMU that supports device IOTLB throttling
3270 			 * (DIT), we assign PFSID to the invalidation desc
3271 			 * of a VF such that IOMMU HW can gauge queue depth
3272 			 * at PF level. If DIT is not set, PFSID will be
3273 			 * treated as reserved, which should be set to 0.
3274 			 */
3275 			if (ecap_dit(iommu->ecap))
3276 				info->pfsid = pci_dev_id(pci_physfn(pdev));
3277 			info->ats_qdep = pci_ats_queue_depth(pdev);
3278 		}
3279 		if (sm_supported(iommu)) {
3280 			if (pasid_supported(iommu)) {
3281 				int features = pci_pasid_features(pdev);
3282 
3283 				if (features >= 0)
3284 					info->pasid_supported = features | 1;
3285 			}
3286 
3287 			if (info->ats_supported && ecap_prs(iommu->ecap) &&
3288 			    ecap_pds(iommu->ecap) && pci_pri_supported(pdev))
3289 				info->pri_supported = 1;
3290 		}
3291 	}
3292 
3293 	dev_iommu_priv_set(dev, info);
3294 	if (pdev && pci_ats_supported(pdev)) {
3295 		pci_prepare_ats(pdev, VTD_PAGE_SHIFT);
3296 		ret = device_rbtree_insert(iommu, info);
3297 		if (ret)
3298 			goto free;
3299 	}
3300 
3301 	if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
3302 		ret = intel_pasid_alloc_table(dev);
3303 		if (ret) {
3304 			dev_err(dev, "PASID table allocation failed\n");
3305 			goto clear_rbtree;
3306 		}
3307 
3308 		if (!context_copied(iommu, info->bus, info->devfn)) {
3309 			ret = intel_pasid_setup_sm_context(dev);
3310 			if (ret)
3311 				goto free_table;
3312 		}
3313 	}
3314 
3315 	intel_iommu_debugfs_create_dev(info);
3316 
3317 	return &iommu->iommu;
3318 free_table:
3319 	intel_pasid_free_table(dev);
3320 clear_rbtree:
3321 	device_rbtree_remove(info);
3322 free:
3323 	kfree(info);
3324 
3325 	return ERR_PTR(ret);
3326 }
3327 
3328 static void intel_iommu_probe_finalize(struct device *dev)
3329 {
3330 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3331 	struct intel_iommu *iommu = info->iommu;
3332 
3333 	/*
3334 	 * The PCIe spec, in its wisdom, declares that the behaviour of the
3335 	 * device is undefined if you enable PASID support after ATS support.
3336 	 * So always enable PASID support on devices which have it, even if
3337 	 * we can't yet know if we're ever going to use it.
3338 	 */
3339 	if (info->pasid_supported &&
3340 	    !pci_enable_pasid(to_pci_dev(dev), info->pasid_supported & ~1))
3341 		info->pasid_enabled = 1;
3342 
3343 	if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
3344 		iommu_enable_pci_ats(info);
3345 		/* Assign a DEVTLB cache tag to the default domain. */
3346 		if (info->ats_enabled && info->domain) {
3347 			u16 did = domain_id_iommu(info->domain, iommu);
3348 
3349 			if (cache_tag_assign(info->domain, did, dev,
3350 					     IOMMU_NO_PASID, CACHE_TAG_DEVTLB))
3351 				iommu_disable_pci_ats(info);
3352 		}
3353 	}
3354 	iommu_enable_pci_pri(info);
3355 }
3356 
3357 static void intel_iommu_release_device(struct device *dev)
3358 {
3359 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3360 	struct intel_iommu *iommu = info->iommu;
3361 
3362 	iommu_disable_pci_pri(info);
3363 	iommu_disable_pci_ats(info);
3364 
3365 	if (info->pasid_enabled) {
3366 		pci_disable_pasid(to_pci_dev(dev));
3367 		info->pasid_enabled = 0;
3368 	}
3369 
3370 	mutex_lock(&iommu->iopf_lock);
3371 	if (dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev)))
3372 		device_rbtree_remove(info);
3373 	mutex_unlock(&iommu->iopf_lock);
3374 
3375 	if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) &&
3376 	    !context_copied(iommu, info->bus, info->devfn))
3377 		intel_pasid_teardown_sm_context(dev);
3378 
3379 	intel_pasid_free_table(dev);
3380 	intel_iommu_debugfs_remove_dev(info);
3381 	kfree(info);
3382 }
3383 
3384 static void intel_iommu_get_resv_regions(struct device *device,
3385 					 struct list_head *head)
3386 {
3387 	int prot = DMA_PTE_READ | DMA_PTE_WRITE;
3388 	struct iommu_resv_region *reg;
3389 	struct dmar_rmrr_unit *rmrr;
3390 	struct device *i_dev;
3391 	int i;
3392 
3393 	rcu_read_lock();
3394 	for_each_rmrr_units(rmrr) {
3395 		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3396 					  i, i_dev) {
3397 			struct iommu_resv_region *resv;
3398 			enum iommu_resv_type type;
3399 			size_t length;
3400 
3401 			if (i_dev != device &&
3402 			    !is_downstream_to_pci_bridge(device, i_dev))
3403 				continue;
3404 
3405 			length = rmrr->end_address - rmrr->base_address + 1;
3406 
3407 			type = device_rmrr_is_relaxable(device) ?
3408 				IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT;
3409 
3410 			resv = iommu_alloc_resv_region(rmrr->base_address,
3411 						       length, prot, type,
3412 						       GFP_ATOMIC);
3413 			if (!resv)
3414 				break;
3415 
3416 			list_add_tail(&resv->list, head);
3417 		}
3418 	}
3419 	rcu_read_unlock();
3420 
3421 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
3422 	if (dev_is_pci(device)) {
3423 		struct pci_dev *pdev = to_pci_dev(device);
3424 
3425 		if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
3426 			reg = iommu_alloc_resv_region(0, 1UL << 24, prot,
3427 					IOMMU_RESV_DIRECT_RELAXABLE,
3428 					GFP_KERNEL);
3429 			if (reg)
3430 				list_add_tail(&reg->list, head);
3431 		}
3432 	}
3433 #endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
3434 
3435 	reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
3436 				      IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
3437 				      0, IOMMU_RESV_MSI, GFP_KERNEL);
3438 	if (!reg)
3439 		return;
3440 	list_add_tail(&reg->list, head);
3441 }
3442 
3443 static struct iommu_group *intel_iommu_device_group(struct device *dev)
3444 {
3445 	if (dev_is_pci(dev))
3446 		return pci_device_group(dev);
3447 	return generic_device_group(dev);
3448 }
3449 
3450 int intel_iommu_enable_iopf(struct device *dev)
3451 {
3452 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3453 	struct intel_iommu *iommu = info->iommu;
3454 	int ret;
3455 
3456 	if (!info->pri_enabled)
3457 		return -ENODEV;
3458 
3459 	/* pri_enabled is protected by the group mutex. */
3460 	iommu_group_mutex_assert(dev);
3461 	if (info->iopf_refcount) {
3462 		info->iopf_refcount++;
3463 		return 0;
3464 	}
3465 
3466 	ret = iopf_queue_add_device(iommu->iopf_queue, dev);
3467 	if (ret)
3468 		return ret;
3469 
3470 	info->iopf_refcount = 1;
3471 
3472 	return 0;
3473 }
3474 
3475 void intel_iommu_disable_iopf(struct device *dev)
3476 {
3477 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3478 	struct intel_iommu *iommu = info->iommu;
3479 
3480 	if (WARN_ON(!info->pri_enabled || !info->iopf_refcount))
3481 		return;
3482 
3483 	iommu_group_mutex_assert(dev);
3484 	if (--info->iopf_refcount)
3485 		return;
3486 
3487 	iopf_queue_remove_device(iommu->iopf_queue, dev);
3488 }
3489 
3490 static bool intel_iommu_is_attach_deferred(struct device *dev)
3491 {
3492 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3493 
3494 	return translation_pre_enabled(info->iommu) && !info->domain;
3495 }
3496 
3497 /*
3498  * Check that the device does not live on an external facing PCI port that is
3499  * marked as untrusted. Such devices should not be able to apply quirks and
3500  * thus not be able to bypass the IOMMU restrictions.
3501  */
3502 static bool risky_device(struct pci_dev *pdev)
3503 {
3504 	if (pdev->untrusted) {
3505 		pci_info(pdev,
3506 			 "Skipping IOMMU quirk for dev [%04X:%04X] on untrusted PCI link\n",
3507 			 pdev->vendor, pdev->device);
3508 		pci_info(pdev, "Please check with your BIOS/Platform vendor about this\n");
3509 		return true;
3510 	}
3511 	return false;
3512 }
3513 
3514 static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
3515 				      unsigned long iova, size_t size)
3516 {
3517 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3518 
3519 	if (dmar_domain->iotlb_sync_map)
3520 		cache_tag_flush_range_np(dmar_domain, iova, iova + size - 1);
3521 
3522 	return 0;
3523 }
3524 
3525 void domain_remove_dev_pasid(struct iommu_domain *domain,
3526 			     struct device *dev, ioasid_t pasid)
3527 {
3528 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3529 	struct dev_pasid_info *curr, *dev_pasid = NULL;
3530 	struct intel_iommu *iommu = info->iommu;
3531 	struct dmar_domain *dmar_domain;
3532 	unsigned long flags;
3533 
3534 	if (!domain)
3535 		return;
3536 
3537 	/* Identity domain and blocked domain have no meta data for pasid. */
3538 	if (domain->type == IOMMU_DOMAIN_IDENTITY || domain->type == IOMMU_DOMAIN_BLOCKED)
3539 		return;
3540 
3541 	dmar_domain = to_dmar_domain(domain);
3542 	spin_lock_irqsave(&dmar_domain->lock, flags);
3543 	list_for_each_entry(curr, &dmar_domain->dev_pasids, link_domain) {
3544 		if (curr->dev == dev && curr->pasid == pasid) {
3545 			list_del(&curr->link_domain);
3546 			dev_pasid = curr;
3547 			break;
3548 		}
3549 	}
3550 	spin_unlock_irqrestore(&dmar_domain->lock, flags);
3551 
3552 	if (WARN_ON_ONCE(!dev_pasid))
3553 		return;
3554 
3555 	cache_tag_unassign_domain(dmar_domain, dev, pasid);
3556 	domain_detach_iommu(dmar_domain, iommu);
3557 	intel_iommu_debugfs_remove_dev_pasid(dev_pasid);
3558 	kfree(dev_pasid);
3559 }
3560 
3561 static int blocking_domain_set_dev_pasid(struct iommu_domain *domain,
3562 					 struct device *dev, ioasid_t pasid,
3563 					 struct iommu_domain *old)
3564 {
3565 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3566 
3567 	intel_pasid_tear_down_entry(info->iommu, dev, pasid, false);
3568 	iopf_for_domain_remove(old, dev);
3569 	domain_remove_dev_pasid(old, dev, pasid);
3570 
3571 	return 0;
3572 }
3573 
3574 struct dev_pasid_info *
3575 domain_add_dev_pasid(struct iommu_domain *domain,
3576 		     struct device *dev, ioasid_t pasid)
3577 {
3578 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3579 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3580 	struct intel_iommu *iommu = info->iommu;
3581 	struct dev_pasid_info *dev_pasid;
3582 	unsigned long flags;
3583 	int ret;
3584 
3585 	dev_pasid = kzalloc_obj(*dev_pasid);
3586 	if (!dev_pasid)
3587 		return ERR_PTR(-ENOMEM);
3588 
3589 	ret = domain_attach_iommu(dmar_domain, iommu);
3590 	if (ret)
3591 		goto out_free;
3592 
3593 	ret = cache_tag_assign_domain(dmar_domain, dev, pasid);
3594 	if (ret)
3595 		goto out_detach_iommu;
3596 
3597 	dev_pasid->dev = dev;
3598 	dev_pasid->pasid = pasid;
3599 	spin_lock_irqsave(&dmar_domain->lock, flags);
3600 	list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids);
3601 	spin_unlock_irqrestore(&dmar_domain->lock, flags);
3602 
3603 	return dev_pasid;
3604 out_detach_iommu:
3605 	domain_detach_iommu(dmar_domain, iommu);
3606 out_free:
3607 	kfree(dev_pasid);
3608 	return ERR_PTR(ret);
3609 }
3610 
3611 static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
3612 				     struct device *dev, ioasid_t pasid,
3613 				     struct iommu_domain *old)
3614 {
3615 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3616 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3617 	struct intel_iommu *iommu = info->iommu;
3618 	struct dev_pasid_info *dev_pasid;
3619 	int ret;
3620 
3621 	if (WARN_ON_ONCE(!(domain->type & __IOMMU_DOMAIN_PAGING)))
3622 		return -EINVAL;
3623 
3624 	if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
3625 		return -EOPNOTSUPP;
3626 
3627 	if (context_copied(iommu, info->bus, info->devfn))
3628 		return -EBUSY;
3629 
3630 	ret = paging_domain_compatible(domain, dev);
3631 	if (ret)
3632 		return ret;
3633 
3634 	dev_pasid = domain_add_dev_pasid(domain, dev, pasid);
3635 	if (IS_ERR(dev_pasid))
3636 		return PTR_ERR(dev_pasid);
3637 
3638 	ret = iopf_for_domain_replace(domain, old, dev);
3639 	if (ret)
3640 		goto out_remove_dev_pasid;
3641 
3642 	if (intel_domain_is_fs_paging(dmar_domain))
3643 		ret = domain_setup_first_level(iommu, dmar_domain,
3644 					       dev, pasid, old);
3645 	else if (intel_domain_is_ss_paging(dmar_domain))
3646 		ret = domain_setup_second_level(iommu, dmar_domain,
3647 						dev, pasid, old);
3648 	else if (WARN_ON(true))
3649 		ret = -EINVAL;
3650 
3651 	if (ret)
3652 		goto out_unwind_iopf;
3653 
3654 	domain_remove_dev_pasid(old, dev, pasid);
3655 
3656 	intel_iommu_debugfs_create_dev_pasid(dev_pasid);
3657 
3658 	return 0;
3659 
3660 out_unwind_iopf:
3661 	iopf_for_domain_replace(old, domain, dev);
3662 out_remove_dev_pasid:
3663 	domain_remove_dev_pasid(domain, dev, pasid);
3664 	return ret;
3665 }
3666 
3667 static void *intel_iommu_hw_info(struct device *dev, u32 *length,
3668 				 enum iommu_hw_info_type *type)
3669 {
3670 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3671 	struct intel_iommu *iommu = info->iommu;
3672 	struct iommu_hw_info_vtd *vtd;
3673 
3674 	if (*type != IOMMU_HW_INFO_TYPE_DEFAULT &&
3675 	    *type != IOMMU_HW_INFO_TYPE_INTEL_VTD)
3676 		return ERR_PTR(-EOPNOTSUPP);
3677 
3678 	vtd = kzalloc_obj(*vtd);
3679 	if (!vtd)
3680 		return ERR_PTR(-ENOMEM);
3681 
3682 	vtd->flags = IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17;
3683 	vtd->cap_reg = iommu->cap;
3684 	vtd->ecap_reg = iommu->ecap;
3685 	*length = sizeof(*vtd);
3686 	*type = IOMMU_HW_INFO_TYPE_INTEL_VTD;
3687 	return vtd;
3688 }
3689 
3690 /* Set dirty tracking for the devices that the domain has been attached. */
3691 static int domain_set_dirty_tracking(struct dmar_domain *domain, bool enable)
3692 {
3693 	struct device_domain_info *info;
3694 	struct dev_pasid_info *dev_pasid;
3695 	int ret = 0;
3696 
3697 	lockdep_assert_held(&domain->lock);
3698 
3699 	list_for_each_entry(info, &domain->devices, link) {
3700 		ret = intel_pasid_setup_dirty_tracking(info->iommu, info->dev,
3701 						       IOMMU_NO_PASID, enable);
3702 		if (ret)
3703 			return ret;
3704 	}
3705 
3706 	list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) {
3707 		info = dev_iommu_priv_get(dev_pasid->dev);
3708 		ret = intel_pasid_setup_dirty_tracking(info->iommu, info->dev,
3709 						       dev_pasid->pasid, enable);
3710 		if (ret)
3711 			break;
3712 	}
3713 
3714 	return ret;
3715 }
3716 
3717 static int parent_domain_set_dirty_tracking(struct dmar_domain *domain,
3718 					    bool enable)
3719 {
3720 	struct dmar_domain *s1_domain;
3721 	unsigned long flags;
3722 	int ret;
3723 
3724 	spin_lock(&domain->s1_lock);
3725 	list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) {
3726 		spin_lock_irqsave(&s1_domain->lock, flags);
3727 		ret = domain_set_dirty_tracking(s1_domain, enable);
3728 		spin_unlock_irqrestore(&s1_domain->lock, flags);
3729 		if (ret)
3730 			goto err_unwind;
3731 	}
3732 	spin_unlock(&domain->s1_lock);
3733 	return 0;
3734 
3735 err_unwind:
3736 	list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) {
3737 		spin_lock_irqsave(&s1_domain->lock, flags);
3738 		domain_set_dirty_tracking(s1_domain, domain->dirty_tracking);
3739 		spin_unlock_irqrestore(&s1_domain->lock, flags);
3740 	}
3741 	spin_unlock(&domain->s1_lock);
3742 	return ret;
3743 }
3744 
3745 static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain,
3746 					  bool enable)
3747 {
3748 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3749 	int ret;
3750 
3751 	spin_lock(&dmar_domain->lock);
3752 	if (dmar_domain->dirty_tracking == enable)
3753 		goto out_unlock;
3754 
3755 	ret = domain_set_dirty_tracking(dmar_domain, enable);
3756 	if (ret)
3757 		goto err_unwind;
3758 
3759 	if (dmar_domain->nested_parent) {
3760 		ret = parent_domain_set_dirty_tracking(dmar_domain, enable);
3761 		if (ret)
3762 			goto err_unwind;
3763 	}
3764 
3765 	dmar_domain->dirty_tracking = enable;
3766 out_unlock:
3767 	spin_unlock(&dmar_domain->lock);
3768 
3769 	return 0;
3770 
3771 err_unwind:
3772 	domain_set_dirty_tracking(dmar_domain, dmar_domain->dirty_tracking);
3773 	spin_unlock(&dmar_domain->lock);
3774 	return ret;
3775 }
3776 
3777 static int context_setup_pass_through(struct device *dev, u8 bus, u8 devfn)
3778 {
3779 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3780 	struct intel_iommu *iommu = info->iommu;
3781 	struct context_entry *context;
3782 
3783 	spin_lock(&iommu->lock);
3784 	context = iommu_context_addr(iommu, bus, devfn, 1);
3785 	if (!context) {
3786 		spin_unlock(&iommu->lock);
3787 		return -ENOMEM;
3788 	}
3789 
3790 	if (context_present(context) && !context_copied(iommu, bus, devfn)) {
3791 		spin_unlock(&iommu->lock);
3792 		return 0;
3793 	}
3794 
3795 	copied_context_tear_down(iommu, context, bus, devfn);
3796 	context_clear_entry(context);
3797 	context_set_domain_id(context, FLPT_DEFAULT_DID);
3798 
3799 	/*
3800 	 * In pass through mode, AW must be programmed to indicate the largest
3801 	 * AGAW value supported by hardware. And ASR is ignored by hardware.
3802 	 */
3803 	context_set_address_width(context, iommu->msagaw);
3804 	context_set_translation_type(context, CONTEXT_TT_PASS_THROUGH);
3805 	context_set_fault_enable(context);
3806 	context_set_present(context);
3807 	if (!ecap_coherent(iommu->ecap))
3808 		clflush_cache_range(context, sizeof(*context));
3809 	context_present_cache_flush(iommu, FLPT_DEFAULT_DID, bus, devfn);
3810 	spin_unlock(&iommu->lock);
3811 
3812 	return 0;
3813 }
3814 
3815 static int context_setup_pass_through_cb(struct pci_dev *pdev, u16 alias, void *data)
3816 {
3817 	struct device *dev = data;
3818 
3819 	return context_setup_pass_through(dev, PCI_BUS_NUM(alias), alias & 0xff);
3820 }
3821 
3822 static int device_setup_pass_through(struct device *dev)
3823 {
3824 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3825 
3826 	if (!dev_is_pci(dev))
3827 		return context_setup_pass_through(dev, info->bus, info->devfn);
3828 
3829 	return pci_for_each_dma_alias(to_pci_dev(dev),
3830 				      context_setup_pass_through_cb, dev);
3831 }
3832 
3833 static int identity_domain_attach_dev(struct iommu_domain *domain,
3834 				      struct device *dev,
3835 				      struct iommu_domain *old)
3836 {
3837 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3838 	struct intel_iommu *iommu = info->iommu;
3839 	int ret;
3840 
3841 	device_block_translation(dev);
3842 
3843 	if (dev_is_real_dma_subdevice(dev))
3844 		return 0;
3845 
3846 	/*
3847 	 * No PRI support with the global identity domain. No need to enable or
3848 	 * disable PRI in this path as the iommu has been put in the blocking
3849 	 * state.
3850 	 */
3851 	if (sm_supported(iommu))
3852 		ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID);
3853 	else
3854 		ret = device_setup_pass_through(dev);
3855 
3856 	if (!ret)
3857 		info->domain_attached = true;
3858 
3859 	return ret;
3860 }
3861 
3862 static int identity_domain_set_dev_pasid(struct iommu_domain *domain,
3863 					 struct device *dev, ioasid_t pasid,
3864 					 struct iommu_domain *old)
3865 {
3866 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3867 	struct intel_iommu *iommu = info->iommu;
3868 	int ret;
3869 
3870 	if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
3871 		return -EOPNOTSUPP;
3872 
3873 	ret = iopf_for_domain_replace(domain, old, dev);
3874 	if (ret)
3875 		return ret;
3876 
3877 	ret = domain_setup_passthrough(iommu, dev, pasid, old);
3878 	if (ret) {
3879 		iopf_for_domain_replace(old, domain, dev);
3880 		return ret;
3881 	}
3882 
3883 	domain_remove_dev_pasid(old, dev, pasid);
3884 	return 0;
3885 }
3886 
3887 static struct iommu_domain identity_domain = {
3888 	.type = IOMMU_DOMAIN_IDENTITY,
3889 	.ops = &(const struct iommu_domain_ops) {
3890 		.attach_dev	= identity_domain_attach_dev,
3891 		.set_dev_pasid	= identity_domain_set_dev_pasid,
3892 	},
3893 };
3894 
3895 const struct iommu_domain_ops intel_fs_paging_domain_ops = {
3896 	IOMMU_PT_DOMAIN_OPS(x86_64),
3897 	.attach_dev = intel_iommu_attach_device,
3898 	.set_dev_pasid = intel_iommu_set_dev_pasid,
3899 	.iotlb_sync_map = intel_iommu_iotlb_sync_map,
3900 	.flush_iotlb_all = intel_flush_iotlb_all,
3901 	.iotlb_sync = intel_iommu_tlb_sync,
3902 	.free = intel_iommu_domain_free,
3903 	.enforce_cache_coherency = intel_iommu_enforce_cache_coherency_fs,
3904 };
3905 
3906 const struct iommu_domain_ops intel_ss_paging_domain_ops = {
3907 	IOMMU_PT_DOMAIN_OPS(vtdss),
3908 	.attach_dev = intel_iommu_attach_device,
3909 	.set_dev_pasid = intel_iommu_set_dev_pasid,
3910 	.iotlb_sync_map = intel_iommu_iotlb_sync_map,
3911 	.flush_iotlb_all = intel_flush_iotlb_all,
3912 	.iotlb_sync = intel_iommu_tlb_sync,
3913 	.free = intel_iommu_domain_free,
3914 	.enforce_cache_coherency = intel_iommu_enforce_cache_coherency_ss,
3915 };
3916 
3917 const struct iommu_ops intel_iommu_ops = {
3918 	.blocked_domain		= &blocking_domain,
3919 	.release_domain		= &blocking_domain,
3920 	.identity_domain	= &identity_domain,
3921 	.capable		= intel_iommu_capable,
3922 	.hw_info		= intel_iommu_hw_info,
3923 	.domain_alloc_paging_flags = intel_iommu_domain_alloc_paging_flags,
3924 	.domain_alloc_sva	= intel_svm_domain_alloc,
3925 	.domain_alloc_nested	= intel_iommu_domain_alloc_nested,
3926 	.probe_device		= intel_iommu_probe_device,
3927 	.probe_finalize		= intel_iommu_probe_finalize,
3928 	.release_device		= intel_iommu_release_device,
3929 	.get_resv_regions	= intel_iommu_get_resv_regions,
3930 	.device_group		= intel_iommu_device_group,
3931 	.is_attach_deferred	= intel_iommu_is_attach_deferred,
3932 	.def_domain_type	= device_def_domain_type,
3933 	.page_response		= intel_iommu_page_response,
3934 };
3935 
3936 static void quirk_iommu_igfx(struct pci_dev *dev)
3937 {
3938 	if (risky_device(dev))
3939 		return;
3940 
3941 	pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
3942 	disable_igfx_iommu = 1;
3943 }
3944 
3945 /* Q35 integrated gfx dmar support is totally busted. */
3946 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x29b2, quirk_iommu_igfx);
3947 
3948 /* G4x/GM45 integrated gfx dmar support is totally busted. */
3949 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
3950 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
3951 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx);
3952 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx);
3953 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
3954 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
3955 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
3956 
3957 /* QM57/QS57 integrated gfx malfunctions with dmar */
3958 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_iommu_igfx);
3959 
3960 /* Broadwell igfx malfunctions with dmar */
3961 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
3962 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
3963 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx);
3964 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx);
3965 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx);
3966 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx);
3967 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx);
3968 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx);
3969 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx);
3970 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx);
3971 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx);
3972 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx);
3973 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx);
3974 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx);
3975 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx);
3976 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx);
3977 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx);
3978 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx);
3979 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx);
3980 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx);
3981 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx);
3982 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
3983 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
3984 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
3985 
3986 static void quirk_iommu_rwbf(struct pci_dev *dev)
3987 {
3988 	if (risky_device(dev))
3989 		return;
3990 
3991 	/*
3992 	 * Mobile 4 Series Chipset neglects to set RWBF capability,
3993 	 * but needs it. Same seems to hold for the desktop versions.
3994 	 */
3995 	pci_info(dev, "Forcing write-buffer flush capability\n");
3996 	rwbf_quirk = 1;
3997 }
3998 
3999 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4000 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4001 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4002 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4003 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4004 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4005 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
4006 
4007 #define GGC 0x52
4008 #define GGC_MEMORY_SIZE_MASK	(0xf << 8)
4009 #define GGC_MEMORY_SIZE_NONE	(0x0 << 8)
4010 #define GGC_MEMORY_SIZE_1M	(0x1 << 8)
4011 #define GGC_MEMORY_SIZE_2M	(0x3 << 8)
4012 #define GGC_MEMORY_VT_ENABLED	(0x8 << 8)
4013 #define GGC_MEMORY_SIZE_2M_VT	(0x9 << 8)
4014 #define GGC_MEMORY_SIZE_3M_VT	(0xa << 8)
4015 #define GGC_MEMORY_SIZE_4M_VT	(0xb << 8)
4016 
4017 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4018 {
4019 	unsigned short ggc;
4020 
4021 	if (risky_device(dev))
4022 		return;
4023 
4024 	if (pci_read_config_word(dev, GGC, &ggc))
4025 		return;
4026 
4027 	if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4028 		pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4029 		disable_igfx_iommu = 1;
4030 	} else if (!disable_igfx_iommu) {
4031 		/* we have to ensure the gfx device is idle before we flush */
4032 		pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
4033 		iommu_set_dma_strict();
4034 	}
4035 }
4036 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4037 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4038 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4039 
4040 static void quirk_igfx_skip_te_disable(struct pci_dev *dev)
4041 {
4042 	unsigned short ver;
4043 
4044 	if (!IS_GFX_DEVICE(dev))
4045 		return;
4046 
4047 	ver = (dev->device >> 8) & 0xff;
4048 	if (ver != 0x45 && ver != 0x46 && ver != 0x4c &&
4049 	    ver != 0x4e && ver != 0x8a && ver != 0x98 &&
4050 	    ver != 0x9a && ver != 0xa7 && ver != 0x7d)
4051 		return;
4052 
4053 	if (risky_device(dev))
4054 		return;
4055 
4056 	pci_info(dev, "Skip IOMMU disabling for graphics\n");
4057 	iommu_skip_te_disable = 1;
4058 }
4059 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_igfx_skip_te_disable);
4060 
4061 /* On Tylersburg chipsets, some BIOSes have been known to enable the
4062    ISOCH DMAR unit for the Azalia sound device, but not give it any
4063    TLB entries, which causes it to deadlock. Check for that.  We do
4064    this in a function called from init_dmars(), instead of in a PCI
4065    quirk, because we don't want to print the obnoxious "BIOS broken"
4066    message if VT-d is actually disabled.
4067 */
4068 static void __init check_tylersburg_isoch(void)
4069 {
4070 	struct pci_dev *pdev;
4071 	uint32_t vtisochctrl;
4072 
4073 	/* If there's no Azalia in the system anyway, forget it. */
4074 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4075 	if (!pdev)
4076 		return;
4077 
4078 	if (risky_device(pdev)) {
4079 		pci_dev_put(pdev);
4080 		return;
4081 	}
4082 
4083 	pci_dev_put(pdev);
4084 
4085 	/* System Management Registers. Might be hidden, in which case
4086 	   we can't do the sanity check. But that's OK, because the
4087 	   known-broken BIOSes _don't_ actually hide it, so far. */
4088 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4089 	if (!pdev)
4090 		return;
4091 
4092 	if (risky_device(pdev)) {
4093 		pci_dev_put(pdev);
4094 		return;
4095 	}
4096 
4097 	if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4098 		pci_dev_put(pdev);
4099 		return;
4100 	}
4101 
4102 	pci_dev_put(pdev);
4103 
4104 	/* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4105 	if (vtisochctrl & 1)
4106 		return;
4107 
4108 	/* Drop all bits other than the number of TLB entries */
4109 	vtisochctrl &= 0x1c;
4110 
4111 	/* If we have the recommended number of TLB entries (16), fine. */
4112 	if (vtisochctrl == 0x10)
4113 		return;
4114 
4115 	/* Zero TLB entries? You get to ride the short bus to school. */
4116 	if (!vtisochctrl) {
4117 		WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4118 		     "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4119 		     dmi_get_system_info(DMI_BIOS_VENDOR),
4120 		     dmi_get_system_info(DMI_BIOS_VERSION),
4121 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
4122 		iommu_identity_mapping |= IDENTMAP_AZALIA;
4123 		return;
4124 	}
4125 
4126 	pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4127 	       vtisochctrl);
4128 }
4129 
4130 /*
4131  * Here we deal with a device TLB defect where device may inadvertently issue ATS
4132  * invalidation completion before posted writes initiated with translated address
4133  * that utilized translations matching the invalidation address range, violating
4134  * the invalidation completion ordering.
4135  * Therefore, any use cases that cannot guarantee DMA is stopped before unmap is
4136  * vulnerable to this defect. In other words, any dTLB invalidation initiated not
4137  * under the control of the trusted/privileged host device driver must use this
4138  * quirk.
4139  * Device TLBs are invalidated under the following six conditions:
4140  * 1. Device driver does DMA API unmap IOVA
4141  * 2. Device driver unbind a PASID from a process, sva_unbind_device()
4142  * 3. PASID is torn down, after PASID cache is flushed. e.g. process
4143  *    exit_mmap() due to crash
4144  * 4. Under SVA usage, called by mmu_notifier.invalidate_range() where
4145  *    VM has to free pages that were unmapped
4146  * 5. Userspace driver unmaps a DMA buffer
4147  * 6. Cache invalidation in vSVA usage (upcoming)
4148  *
4149  * For #1 and #2, device drivers are responsible for stopping DMA traffic
4150  * before unmap/unbind. For #3, iommu driver gets mmu_notifier to
4151  * invalidate TLB the same way as normal user unmap which will use this quirk.
4152  * The dTLB invalidation after PASID cache flush does not need this quirk.
4153  *
4154  * As a reminder, #6 will *NEED* this quirk as we enable nested translation.
4155  */
4156 void quirk_extra_dev_tlb_flush(struct device_domain_info *info,
4157 			       unsigned long address, unsigned long mask,
4158 			       u32 pasid, u16 qdep)
4159 {
4160 	u16 sid;
4161 
4162 	if (likely(!info->dtlb_extra_inval))
4163 		return;
4164 
4165 	sid = PCI_DEVID(info->bus, info->devfn);
4166 	if (pasid == IOMMU_NO_PASID) {
4167 		qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
4168 				   qdep, address, mask);
4169 	} else {
4170 		qi_flush_dev_iotlb_pasid(info->iommu, sid, info->pfsid,
4171 					 pasid, qdep, address, mask);
4172 	}
4173 }
4174 
4175 #define ecmd_get_status_code(res)	(((res) & 0xff) >> 1)
4176 
4177 /*
4178  * Function to submit a command to the enhanced command interface. The
4179  * valid enhanced command descriptions are defined in Table 47 of the
4180  * VT-d spec. The VT-d hardware implementation may support some but not
4181  * all commands, which can be determined by checking the Enhanced
4182  * Command Capability Register.
4183  *
4184  * Return values:
4185  *  - 0: Command successful without any error;
4186  *  - Negative: software error value;
4187  *  - Nonzero positive: failure status code defined in Table 48.
4188  */
4189 int ecmd_submit_sync(struct intel_iommu *iommu, u8 ecmd, u64 oa, u64 ob)
4190 {
4191 	unsigned long flags;
4192 	u64 res;
4193 	int ret;
4194 
4195 	if (!cap_ecmds(iommu->cap))
4196 		return -ENODEV;
4197 
4198 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
4199 
4200 	res = readq(iommu->reg + DMAR_ECRSP_REG);
4201 	if (res & DMA_ECMD_ECRSP_IP) {
4202 		ret = -EBUSY;
4203 		goto err;
4204 	}
4205 
4206 	/*
4207 	 * Unconditionally write the operand B, because
4208 	 * - There is no side effect if an ecmd doesn't require an
4209 	 *   operand B, but we set the register to some value.
4210 	 * - It's not invoked in any critical path. The extra MMIO
4211 	 *   write doesn't bring any performance concerns.
4212 	 */
4213 	writeq(ob, iommu->reg + DMAR_ECEO_REG);
4214 	writeq(ecmd | (oa << DMA_ECMD_OA_SHIFT), iommu->reg + DMAR_ECMD_REG);
4215 
4216 	IOMMU_WAIT_OP(iommu, DMAR_ECRSP_REG, readq,
4217 		      !(res & DMA_ECMD_ECRSP_IP), res);
4218 
4219 	if (res & DMA_ECMD_ECRSP_IP) {
4220 		ret = -ETIMEDOUT;
4221 		goto err;
4222 	}
4223 
4224 	ret = ecmd_get_status_code(res);
4225 err:
4226 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
4227 
4228 	return ret;
4229 }
4230 
4231 MODULE_IMPORT_NS("GENERIC_PT_IOMMU");
4232