xref: /linux/drivers/iommu/intel/iommu.c (revision 4359a011e259a4608afc7fb3635370c9d4ba5943)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright © 2006-2014 Intel Corporation.
4  *
5  * Authors: David Woodhouse <dwmw2@infradead.org>,
6  *          Ashok Raj <ashok.raj@intel.com>,
7  *          Shaohua Li <shaohua.li@intel.com>,
8  *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
9  *          Fenghua Yu <fenghua.yu@intel.com>
10  *          Joerg Roedel <jroedel@suse.de>
11  */
12 
13 #define pr_fmt(fmt)     "DMAR: " fmt
14 #define dev_fmt(fmt)    pr_fmt(fmt)
15 
16 #include <linux/crash_dump.h>
17 #include <linux/dma-direct.h>
18 #include <linux/dma-iommu.h>
19 #include <linux/dmi.h>
20 #include <linux/intel-svm.h>
21 #include <linux/memory.h>
22 #include <linux/pci.h>
23 #include <linux/pci-ats.h>
24 #include <linux/spinlock.h>
25 #include <linux/syscore_ops.h>
26 #include <linux/tboot.h>
27 
28 #include "iommu.h"
29 #include "../irq_remapping.h"
30 #include "../iommu-sva-lib.h"
31 #include "pasid.h"
32 #include "cap_audit.h"
33 
34 #define ROOT_SIZE		VTD_PAGE_SIZE
35 #define CONTEXT_SIZE		VTD_PAGE_SIZE
36 
37 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
38 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
39 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
40 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
41 
42 #define IOAPIC_RANGE_START	(0xfee00000)
43 #define IOAPIC_RANGE_END	(0xfeefffff)
44 #define IOVA_START_ADDR		(0x1000)
45 
46 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
47 
48 #define MAX_AGAW_WIDTH 64
49 #define MAX_AGAW_PFN_WIDTH	(MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
50 
51 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << ((gaw) - VTD_PAGE_SHIFT)) - 1)
52 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << (gaw)) - 1)
53 
54 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
55    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
56 #define DOMAIN_MAX_PFN(gaw)	((unsigned long) min_t(uint64_t, \
57 				__DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
58 #define DOMAIN_MAX_ADDR(gaw)	(((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
59 
60 /* IO virtual address start page frame number */
61 #define IOVA_START_PFN		(1)
62 
63 #define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT)
64 
65 /* page table handling */
66 #define LEVEL_STRIDE		(9)
67 #define LEVEL_MASK		(((u64)1 << LEVEL_STRIDE) - 1)
68 
69 static inline int agaw_to_level(int agaw)
70 {
71 	return agaw + 2;
72 }
73 
74 static inline int agaw_to_width(int agaw)
75 {
76 	return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
77 }
78 
79 static inline int width_to_agaw(int width)
80 {
81 	return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
82 }
83 
84 static inline unsigned int level_to_offset_bits(int level)
85 {
86 	return (level - 1) * LEVEL_STRIDE;
87 }
88 
89 static inline int pfn_level_offset(u64 pfn, int level)
90 {
91 	return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
92 }
93 
94 static inline u64 level_mask(int level)
95 {
96 	return -1ULL << level_to_offset_bits(level);
97 }
98 
99 static inline u64 level_size(int level)
100 {
101 	return 1ULL << level_to_offset_bits(level);
102 }
103 
104 static inline u64 align_to_level(u64 pfn, int level)
105 {
106 	return (pfn + level_size(level) - 1) & level_mask(level);
107 }
108 
109 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
110 {
111 	return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
112 }
113 
114 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
115    are never going to work. */
116 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
117 {
118 	return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
119 }
120 static inline unsigned long page_to_dma_pfn(struct page *pg)
121 {
122 	return mm_to_dma_pfn(page_to_pfn(pg));
123 }
124 static inline unsigned long virt_to_dma_pfn(void *p)
125 {
126 	return page_to_dma_pfn(virt_to_page(p));
127 }
128 
129 static void __init check_tylersburg_isoch(void);
130 static int rwbf_quirk;
131 
132 /*
133  * set to 1 to panic kernel if can't successfully enable VT-d
134  * (used when kernel is launched w/ TXT)
135  */
136 static int force_on = 0;
137 static int intel_iommu_tboot_noforce;
138 static int no_platform_optin;
139 
140 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
141 
142 /*
143  * Take a root_entry and return the Lower Context Table Pointer (LCTP)
144  * if marked present.
145  */
146 static phys_addr_t root_entry_lctp(struct root_entry *re)
147 {
148 	if (!(re->lo & 1))
149 		return 0;
150 
151 	return re->lo & VTD_PAGE_MASK;
152 }
153 
154 /*
155  * Take a root_entry and return the Upper Context Table Pointer (UCTP)
156  * if marked present.
157  */
158 static phys_addr_t root_entry_uctp(struct root_entry *re)
159 {
160 	if (!(re->hi & 1))
161 		return 0;
162 
163 	return re->hi & VTD_PAGE_MASK;
164 }
165 
166 static inline void context_set_present(struct context_entry *context)
167 {
168 	context->lo |= 1;
169 }
170 
171 static inline void context_set_fault_enable(struct context_entry *context)
172 {
173 	context->lo &= (((u64)-1) << 2) | 1;
174 }
175 
176 static inline void context_set_translation_type(struct context_entry *context,
177 						unsigned long value)
178 {
179 	context->lo &= (((u64)-1) << 4) | 3;
180 	context->lo |= (value & 3) << 2;
181 }
182 
183 static inline void context_set_address_root(struct context_entry *context,
184 					    unsigned long value)
185 {
186 	context->lo &= ~VTD_PAGE_MASK;
187 	context->lo |= value & VTD_PAGE_MASK;
188 }
189 
190 static inline void context_set_address_width(struct context_entry *context,
191 					     unsigned long value)
192 {
193 	context->hi |= value & 7;
194 }
195 
196 static inline void context_set_domain_id(struct context_entry *context,
197 					 unsigned long value)
198 {
199 	context->hi |= (value & ((1 << 16) - 1)) << 8;
200 }
201 
202 static inline int context_domain_id(struct context_entry *c)
203 {
204 	return((c->hi >> 8) & 0xffff);
205 }
206 
207 static inline void context_clear_entry(struct context_entry *context)
208 {
209 	context->lo = 0;
210 	context->hi = 0;
211 }
212 
213 static inline bool context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn)
214 {
215 	if (!iommu->copied_tables)
216 		return false;
217 
218 	return test_bit(((long)bus << 8) | devfn, iommu->copied_tables);
219 }
220 
221 static inline void
222 set_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn)
223 {
224 	set_bit(((long)bus << 8) | devfn, iommu->copied_tables);
225 }
226 
227 static inline void
228 clear_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn)
229 {
230 	clear_bit(((long)bus << 8) | devfn, iommu->copied_tables);
231 }
232 
233 /*
234  * This domain is a statically identity mapping domain.
235  *	1. This domain creats a static 1:1 mapping to all usable memory.
236  * 	2. It maps to each iommu if successful.
237  *	3. Each iommu mapps to this domain if successful.
238  */
239 static struct dmar_domain *si_domain;
240 static int hw_pass_through = 1;
241 
242 struct dmar_rmrr_unit {
243 	struct list_head list;		/* list of rmrr units	*/
244 	struct acpi_dmar_header *hdr;	/* ACPI header		*/
245 	u64	base_address;		/* reserved base address*/
246 	u64	end_address;		/* reserved end address */
247 	struct dmar_dev_scope *devices;	/* target devices */
248 	int	devices_cnt;		/* target device count */
249 };
250 
251 struct dmar_atsr_unit {
252 	struct list_head list;		/* list of ATSR units */
253 	struct acpi_dmar_header *hdr;	/* ACPI header */
254 	struct dmar_dev_scope *devices;	/* target devices */
255 	int devices_cnt;		/* target device count */
256 	u8 include_all:1;		/* include all ports */
257 };
258 
259 struct dmar_satc_unit {
260 	struct list_head list;		/* list of SATC units */
261 	struct acpi_dmar_header *hdr;	/* ACPI header */
262 	struct dmar_dev_scope *devices;	/* target devices */
263 	struct intel_iommu *iommu;	/* the corresponding iommu */
264 	int devices_cnt;		/* target device count */
265 	u8 atc_required:1;		/* ATS is required */
266 };
267 
268 static LIST_HEAD(dmar_atsr_units);
269 static LIST_HEAD(dmar_rmrr_units);
270 static LIST_HEAD(dmar_satc_units);
271 
272 #define for_each_rmrr_units(rmrr) \
273 	list_for_each_entry(rmrr, &dmar_rmrr_units, list)
274 
275 static void dmar_remove_one_dev_info(struct device *dev);
276 
277 int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON);
278 int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON);
279 
280 int intel_iommu_enabled = 0;
281 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
282 
283 static int dmar_map_gfx = 1;
284 static int intel_iommu_superpage = 1;
285 static int iommu_identity_mapping;
286 static int iommu_skip_te_disable;
287 
288 #define IDENTMAP_GFX		2
289 #define IDENTMAP_AZALIA		4
290 
291 const struct iommu_ops intel_iommu_ops;
292 
293 static bool translation_pre_enabled(struct intel_iommu *iommu)
294 {
295 	return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
296 }
297 
298 static void clear_translation_pre_enabled(struct intel_iommu *iommu)
299 {
300 	iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
301 }
302 
303 static void init_translation_status(struct intel_iommu *iommu)
304 {
305 	u32 gsts;
306 
307 	gsts = readl(iommu->reg + DMAR_GSTS_REG);
308 	if (gsts & DMA_GSTS_TES)
309 		iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
310 }
311 
312 static int __init intel_iommu_setup(char *str)
313 {
314 	if (!str)
315 		return -EINVAL;
316 
317 	while (*str) {
318 		if (!strncmp(str, "on", 2)) {
319 			dmar_disabled = 0;
320 			pr_info("IOMMU enabled\n");
321 		} else if (!strncmp(str, "off", 3)) {
322 			dmar_disabled = 1;
323 			no_platform_optin = 1;
324 			pr_info("IOMMU disabled\n");
325 		} else if (!strncmp(str, "igfx_off", 8)) {
326 			dmar_map_gfx = 0;
327 			pr_info("Disable GFX device mapping\n");
328 		} else if (!strncmp(str, "forcedac", 8)) {
329 			pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n");
330 			iommu_dma_forcedac = true;
331 		} else if (!strncmp(str, "strict", 6)) {
332 			pr_warn("intel_iommu=strict deprecated; use iommu.strict=1 instead\n");
333 			iommu_set_dma_strict();
334 		} else if (!strncmp(str, "sp_off", 6)) {
335 			pr_info("Disable supported super page\n");
336 			intel_iommu_superpage = 0;
337 		} else if (!strncmp(str, "sm_on", 5)) {
338 			pr_info("Enable scalable mode if hardware supports\n");
339 			intel_iommu_sm = 1;
340 		} else if (!strncmp(str, "sm_off", 6)) {
341 			pr_info("Scalable mode is disallowed\n");
342 			intel_iommu_sm = 0;
343 		} else if (!strncmp(str, "tboot_noforce", 13)) {
344 			pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
345 			intel_iommu_tboot_noforce = 1;
346 		} else {
347 			pr_notice("Unknown option - '%s'\n", str);
348 		}
349 
350 		str += strcspn(str, ",");
351 		while (*str == ',')
352 			str++;
353 	}
354 
355 	return 1;
356 }
357 __setup("intel_iommu=", intel_iommu_setup);
358 
359 void *alloc_pgtable_page(int node)
360 {
361 	struct page *page;
362 	void *vaddr = NULL;
363 
364 	page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
365 	if (page)
366 		vaddr = page_address(page);
367 	return vaddr;
368 }
369 
370 void free_pgtable_page(void *vaddr)
371 {
372 	free_page((unsigned long)vaddr);
373 }
374 
375 static inline int domain_type_is_si(struct dmar_domain *domain)
376 {
377 	return domain->domain.type == IOMMU_DOMAIN_IDENTITY;
378 }
379 
380 static inline bool domain_use_first_level(struct dmar_domain *domain)
381 {
382 	return domain->flags & DOMAIN_FLAG_USE_FIRST_LEVEL;
383 }
384 
385 static inline int domain_pfn_supported(struct dmar_domain *domain,
386 				       unsigned long pfn)
387 {
388 	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
389 
390 	return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
391 }
392 
393 /*
394  * Calculate the Supported Adjusted Guest Address Widths of an IOMMU.
395  * Refer to 11.4.2 of the VT-d spec for the encoding of each bit of
396  * the returned SAGAW.
397  */
398 static unsigned long __iommu_calculate_sagaw(struct intel_iommu *iommu)
399 {
400 	unsigned long fl_sagaw, sl_sagaw;
401 
402 	fl_sagaw = BIT(2) | (cap_5lp_support(iommu->cap) ? BIT(3) : 0);
403 	sl_sagaw = cap_sagaw(iommu->cap);
404 
405 	/* Second level only. */
406 	if (!sm_supported(iommu) || !ecap_flts(iommu->ecap))
407 		return sl_sagaw;
408 
409 	/* First level only. */
410 	if (!ecap_slts(iommu->ecap))
411 		return fl_sagaw;
412 
413 	return fl_sagaw & sl_sagaw;
414 }
415 
416 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
417 {
418 	unsigned long sagaw;
419 	int agaw;
420 
421 	sagaw = __iommu_calculate_sagaw(iommu);
422 	for (agaw = width_to_agaw(max_gaw); agaw >= 0; agaw--) {
423 		if (test_bit(agaw, &sagaw))
424 			break;
425 	}
426 
427 	return agaw;
428 }
429 
430 /*
431  * Calculate max SAGAW for each iommu.
432  */
433 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
434 {
435 	return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
436 }
437 
438 /*
439  * calculate agaw for each iommu.
440  * "SAGAW" may be different across iommus, use a default agaw, and
441  * get a supported less agaw for iommus that don't support the default agaw.
442  */
443 int iommu_calculate_agaw(struct intel_iommu *iommu)
444 {
445 	return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
446 }
447 
448 static inline bool iommu_paging_structure_coherency(struct intel_iommu *iommu)
449 {
450 	return sm_supported(iommu) ?
451 			ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap);
452 }
453 
454 static void domain_update_iommu_coherency(struct dmar_domain *domain)
455 {
456 	struct iommu_domain_info *info;
457 	struct dmar_drhd_unit *drhd;
458 	struct intel_iommu *iommu;
459 	bool found = false;
460 	unsigned long i;
461 
462 	domain->iommu_coherency = true;
463 	xa_for_each(&domain->iommu_array, i, info) {
464 		found = true;
465 		if (!iommu_paging_structure_coherency(info->iommu)) {
466 			domain->iommu_coherency = false;
467 			break;
468 		}
469 	}
470 	if (found)
471 		return;
472 
473 	/* No hardware attached; use lowest common denominator */
474 	rcu_read_lock();
475 	for_each_active_iommu(iommu, drhd) {
476 		if (!iommu_paging_structure_coherency(iommu)) {
477 			domain->iommu_coherency = false;
478 			break;
479 		}
480 	}
481 	rcu_read_unlock();
482 }
483 
484 static int domain_update_iommu_superpage(struct dmar_domain *domain,
485 					 struct intel_iommu *skip)
486 {
487 	struct dmar_drhd_unit *drhd;
488 	struct intel_iommu *iommu;
489 	int mask = 0x3;
490 
491 	if (!intel_iommu_superpage)
492 		return 0;
493 
494 	/* set iommu_superpage to the smallest common denominator */
495 	rcu_read_lock();
496 	for_each_active_iommu(iommu, drhd) {
497 		if (iommu != skip) {
498 			if (domain && domain_use_first_level(domain)) {
499 				if (!cap_fl1gp_support(iommu->cap))
500 					mask = 0x1;
501 			} else {
502 				mask &= cap_super_page_val(iommu->cap);
503 			}
504 
505 			if (!mask)
506 				break;
507 		}
508 	}
509 	rcu_read_unlock();
510 
511 	return fls(mask);
512 }
513 
514 static int domain_update_device_node(struct dmar_domain *domain)
515 {
516 	struct device_domain_info *info;
517 	int nid = NUMA_NO_NODE;
518 	unsigned long flags;
519 
520 	spin_lock_irqsave(&domain->lock, flags);
521 	list_for_each_entry(info, &domain->devices, link) {
522 		/*
523 		 * There could possibly be multiple device numa nodes as devices
524 		 * within the same domain may sit behind different IOMMUs. There
525 		 * isn't perfect answer in such situation, so we select first
526 		 * come first served policy.
527 		 */
528 		nid = dev_to_node(info->dev);
529 		if (nid != NUMA_NO_NODE)
530 			break;
531 	}
532 	spin_unlock_irqrestore(&domain->lock, flags);
533 
534 	return nid;
535 }
536 
537 static void domain_update_iotlb(struct dmar_domain *domain);
538 
539 /* Return the super pagesize bitmap if supported. */
540 static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain)
541 {
542 	unsigned long bitmap = 0;
543 
544 	/*
545 	 * 1-level super page supports page size of 2MiB, 2-level super page
546 	 * supports page size of both 2MiB and 1GiB.
547 	 */
548 	if (domain->iommu_superpage == 1)
549 		bitmap |= SZ_2M;
550 	else if (domain->iommu_superpage == 2)
551 		bitmap |= SZ_2M | SZ_1G;
552 
553 	return bitmap;
554 }
555 
556 /* Some capabilities may be different across iommus */
557 static void domain_update_iommu_cap(struct dmar_domain *domain)
558 {
559 	domain_update_iommu_coherency(domain);
560 	domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL);
561 
562 	/*
563 	 * If RHSA is missing, we should default to the device numa domain
564 	 * as fall back.
565 	 */
566 	if (domain->nid == NUMA_NO_NODE)
567 		domain->nid = domain_update_device_node(domain);
568 
569 	/*
570 	 * First-level translation restricts the input-address to a
571 	 * canonical address (i.e., address bits 63:N have the same
572 	 * value as address bit [N-1], where N is 48-bits with 4-level
573 	 * paging and 57-bits with 5-level paging). Hence, skip bit
574 	 * [N-1].
575 	 */
576 	if (domain_use_first_level(domain))
577 		domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1);
578 	else
579 		domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw);
580 
581 	domain->domain.pgsize_bitmap |= domain_super_pgsize_bitmap(domain);
582 	domain_update_iotlb(domain);
583 }
584 
585 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
586 					 u8 devfn, int alloc)
587 {
588 	struct root_entry *root = &iommu->root_entry[bus];
589 	struct context_entry *context;
590 	u64 *entry;
591 
592 	/*
593 	 * Except that the caller requested to allocate a new entry,
594 	 * returning a copied context entry makes no sense.
595 	 */
596 	if (!alloc && context_copied(iommu, bus, devfn))
597 		return NULL;
598 
599 	entry = &root->lo;
600 	if (sm_supported(iommu)) {
601 		if (devfn >= 0x80) {
602 			devfn -= 0x80;
603 			entry = &root->hi;
604 		}
605 		devfn *= 2;
606 	}
607 	if (*entry & 1)
608 		context = phys_to_virt(*entry & VTD_PAGE_MASK);
609 	else {
610 		unsigned long phy_addr;
611 		if (!alloc)
612 			return NULL;
613 
614 		context = alloc_pgtable_page(iommu->node);
615 		if (!context)
616 			return NULL;
617 
618 		__iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
619 		phy_addr = virt_to_phys((void *)context);
620 		*entry = phy_addr | 1;
621 		__iommu_flush_cache(iommu, entry, sizeof(*entry));
622 	}
623 	return &context[devfn];
624 }
625 
626 /**
627  * is_downstream_to_pci_bridge - test if a device belongs to the PCI
628  *				 sub-hierarchy of a candidate PCI-PCI bridge
629  * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
630  * @bridge: the candidate PCI-PCI bridge
631  *
632  * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
633  */
634 static bool
635 is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
636 {
637 	struct pci_dev *pdev, *pbridge;
638 
639 	if (!dev_is_pci(dev) || !dev_is_pci(bridge))
640 		return false;
641 
642 	pdev = to_pci_dev(dev);
643 	pbridge = to_pci_dev(bridge);
644 
645 	if (pbridge->subordinate &&
646 	    pbridge->subordinate->number <= pdev->bus->number &&
647 	    pbridge->subordinate->busn_res.end >= pdev->bus->number)
648 		return true;
649 
650 	return false;
651 }
652 
653 static bool quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
654 {
655 	struct dmar_drhd_unit *drhd;
656 	u32 vtbar;
657 	int rc;
658 
659 	/* We know that this device on this chipset has its own IOMMU.
660 	 * If we find it under a different IOMMU, then the BIOS is lying
661 	 * to us. Hope that the IOMMU for this device is actually
662 	 * disabled, and it needs no translation...
663 	 */
664 	rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
665 	if (rc) {
666 		/* "can't" happen */
667 		dev_info(&pdev->dev, "failed to run vt-d quirk\n");
668 		return false;
669 	}
670 	vtbar &= 0xffff0000;
671 
672 	/* we know that the this iommu should be at offset 0xa000 from vtbar */
673 	drhd = dmar_find_matched_drhd_unit(pdev);
674 	if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
675 		pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
676 		add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
677 		return true;
678 	}
679 
680 	return false;
681 }
682 
683 static bool iommu_is_dummy(struct intel_iommu *iommu, struct device *dev)
684 {
685 	if (!iommu || iommu->drhd->ignored)
686 		return true;
687 
688 	if (dev_is_pci(dev)) {
689 		struct pci_dev *pdev = to_pci_dev(dev);
690 
691 		if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
692 		    pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SNB &&
693 		    quirk_ioat_snb_local_iommu(pdev))
694 			return true;
695 	}
696 
697 	return false;
698 }
699 
700 struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
701 {
702 	struct dmar_drhd_unit *drhd = NULL;
703 	struct pci_dev *pdev = NULL;
704 	struct intel_iommu *iommu;
705 	struct device *tmp;
706 	u16 segment = 0;
707 	int i;
708 
709 	if (!dev)
710 		return NULL;
711 
712 	if (dev_is_pci(dev)) {
713 		struct pci_dev *pf_pdev;
714 
715 		pdev = pci_real_dma_dev(to_pci_dev(dev));
716 
717 		/* VFs aren't listed in scope tables; we need to look up
718 		 * the PF instead to find the IOMMU. */
719 		pf_pdev = pci_physfn(pdev);
720 		dev = &pf_pdev->dev;
721 		segment = pci_domain_nr(pdev->bus);
722 	} else if (has_acpi_companion(dev))
723 		dev = &ACPI_COMPANION(dev)->dev;
724 
725 	rcu_read_lock();
726 	for_each_iommu(iommu, drhd) {
727 		if (pdev && segment != drhd->segment)
728 			continue;
729 
730 		for_each_active_dev_scope(drhd->devices,
731 					  drhd->devices_cnt, i, tmp) {
732 			if (tmp == dev) {
733 				/* For a VF use its original BDF# not that of the PF
734 				 * which we used for the IOMMU lookup. Strictly speaking
735 				 * we could do this for all PCI devices; we only need to
736 				 * get the BDF# from the scope table for ACPI matches. */
737 				if (pdev && pdev->is_virtfn)
738 					goto got_pdev;
739 
740 				if (bus && devfn) {
741 					*bus = drhd->devices[i].bus;
742 					*devfn = drhd->devices[i].devfn;
743 				}
744 				goto out;
745 			}
746 
747 			if (is_downstream_to_pci_bridge(dev, tmp))
748 				goto got_pdev;
749 		}
750 
751 		if (pdev && drhd->include_all) {
752 got_pdev:
753 			if (bus && devfn) {
754 				*bus = pdev->bus->number;
755 				*devfn = pdev->devfn;
756 			}
757 			goto out;
758 		}
759 	}
760 	iommu = NULL;
761 out:
762 	if (iommu_is_dummy(iommu, dev))
763 		iommu = NULL;
764 
765 	rcu_read_unlock();
766 
767 	return iommu;
768 }
769 
770 static void domain_flush_cache(struct dmar_domain *domain,
771 			       void *addr, int size)
772 {
773 	if (!domain->iommu_coherency)
774 		clflush_cache_range(addr, size);
775 }
776 
777 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
778 {
779 	struct context_entry *context;
780 	int ret = 0;
781 
782 	spin_lock(&iommu->lock);
783 	context = iommu_context_addr(iommu, bus, devfn, 0);
784 	if (context)
785 		ret = context_present(context);
786 	spin_unlock(&iommu->lock);
787 	return ret;
788 }
789 
790 static void free_context_table(struct intel_iommu *iommu)
791 {
792 	struct context_entry *context;
793 	int i;
794 
795 	if (!iommu->root_entry)
796 		return;
797 
798 	for (i = 0; i < ROOT_ENTRY_NR; i++) {
799 		context = iommu_context_addr(iommu, i, 0, 0);
800 		if (context)
801 			free_pgtable_page(context);
802 
803 		if (!sm_supported(iommu))
804 			continue;
805 
806 		context = iommu_context_addr(iommu, i, 0x80, 0);
807 		if (context)
808 			free_pgtable_page(context);
809 	}
810 
811 	free_pgtable_page(iommu->root_entry);
812 	iommu->root_entry = NULL;
813 }
814 
815 #ifdef CONFIG_DMAR_DEBUG
816 static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn,
817 			 u8 bus, u8 devfn, struct dma_pte *parent, int level)
818 {
819 	struct dma_pte *pte;
820 	int offset;
821 
822 	while (1) {
823 		offset = pfn_level_offset(pfn, level);
824 		pte = &parent[offset];
825 		if (!pte || (dma_pte_superpage(pte) || !dma_pte_present(pte))) {
826 			pr_info("PTE not present at level %d\n", level);
827 			break;
828 		}
829 
830 		pr_info("pte level: %d, pte value: 0x%016llx\n", level, pte->val);
831 
832 		if (level == 1)
833 			break;
834 
835 		parent = phys_to_virt(dma_pte_addr(pte));
836 		level--;
837 	}
838 }
839 
840 void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
841 			  unsigned long long addr, u32 pasid)
842 {
843 	struct pasid_dir_entry *dir, *pde;
844 	struct pasid_entry *entries, *pte;
845 	struct context_entry *ctx_entry;
846 	struct root_entry *rt_entry;
847 	int i, dir_index, index, level;
848 	u8 devfn = source_id & 0xff;
849 	u8 bus = source_id >> 8;
850 	struct dma_pte *pgtable;
851 
852 	pr_info("Dump %s table entries for IOVA 0x%llx\n", iommu->name, addr);
853 
854 	/* root entry dump */
855 	rt_entry = &iommu->root_entry[bus];
856 	if (!rt_entry) {
857 		pr_info("root table entry is not present\n");
858 		return;
859 	}
860 
861 	if (sm_supported(iommu))
862 		pr_info("scalable mode root entry: hi 0x%016llx, low 0x%016llx\n",
863 			rt_entry->hi, rt_entry->lo);
864 	else
865 		pr_info("root entry: 0x%016llx", rt_entry->lo);
866 
867 	/* context entry dump */
868 	ctx_entry = iommu_context_addr(iommu, bus, devfn, 0);
869 	if (!ctx_entry) {
870 		pr_info("context table entry is not present\n");
871 		return;
872 	}
873 
874 	pr_info("context entry: hi 0x%016llx, low 0x%016llx\n",
875 		ctx_entry->hi, ctx_entry->lo);
876 
877 	/* legacy mode does not require PASID entries */
878 	if (!sm_supported(iommu)) {
879 		level = agaw_to_level(ctx_entry->hi & 7);
880 		pgtable = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
881 		goto pgtable_walk;
882 	}
883 
884 	/* get the pointer to pasid directory entry */
885 	dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
886 	if (!dir) {
887 		pr_info("pasid directory entry is not present\n");
888 		return;
889 	}
890 	/* For request-without-pasid, get the pasid from context entry */
891 	if (intel_iommu_sm && pasid == INVALID_IOASID)
892 		pasid = PASID_RID2PASID;
893 
894 	dir_index = pasid >> PASID_PDE_SHIFT;
895 	pde = &dir[dir_index];
896 	pr_info("pasid dir entry: 0x%016llx\n", pde->val);
897 
898 	/* get the pointer to the pasid table entry */
899 	entries = get_pasid_table_from_pde(pde);
900 	if (!entries) {
901 		pr_info("pasid table entry is not present\n");
902 		return;
903 	}
904 	index = pasid & PASID_PTE_MASK;
905 	pte = &entries[index];
906 	for (i = 0; i < ARRAY_SIZE(pte->val); i++)
907 		pr_info("pasid table entry[%d]: 0x%016llx\n", i, pte->val[i]);
908 
909 	if (pasid_pte_get_pgtt(pte) == PASID_ENTRY_PGTT_FL_ONLY) {
910 		level = pte->val[2] & BIT_ULL(2) ? 5 : 4;
911 		pgtable = phys_to_virt(pte->val[2] & VTD_PAGE_MASK);
912 	} else {
913 		level = agaw_to_level((pte->val[0] >> 2) & 0x7);
914 		pgtable = phys_to_virt(pte->val[0] & VTD_PAGE_MASK);
915 	}
916 
917 pgtable_walk:
918 	pgtable_walk(iommu, addr >> VTD_PAGE_SHIFT, bus, devfn, pgtable, level);
919 }
920 #endif
921 
922 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
923 				      unsigned long pfn, int *target_level)
924 {
925 	struct dma_pte *parent, *pte;
926 	int level = agaw_to_level(domain->agaw);
927 	int offset;
928 
929 	BUG_ON(!domain->pgd);
930 
931 	if (!domain_pfn_supported(domain, pfn))
932 		/* Address beyond IOMMU's addressing capabilities. */
933 		return NULL;
934 
935 	parent = domain->pgd;
936 
937 	while (1) {
938 		void *tmp_page;
939 
940 		offset = pfn_level_offset(pfn, level);
941 		pte = &parent[offset];
942 		if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
943 			break;
944 		if (level == *target_level)
945 			break;
946 
947 		if (!dma_pte_present(pte)) {
948 			uint64_t pteval;
949 
950 			tmp_page = alloc_pgtable_page(domain->nid);
951 
952 			if (!tmp_page)
953 				return NULL;
954 
955 			domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
956 			pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
957 			if (domain_use_first_level(domain)) {
958 				pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US;
959 				if (iommu_is_dma_domain(&domain->domain))
960 					pteval |= DMA_FL_PTE_ACCESS;
961 			}
962 			if (cmpxchg64(&pte->val, 0ULL, pteval))
963 				/* Someone else set it while we were thinking; use theirs. */
964 				free_pgtable_page(tmp_page);
965 			else
966 				domain_flush_cache(domain, pte, sizeof(*pte));
967 		}
968 		if (level == 1)
969 			break;
970 
971 		parent = phys_to_virt(dma_pte_addr(pte));
972 		level--;
973 	}
974 
975 	if (!*target_level)
976 		*target_level = level;
977 
978 	return pte;
979 }
980 
981 /* return address's pte at specific level */
982 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
983 					 unsigned long pfn,
984 					 int level, int *large_page)
985 {
986 	struct dma_pte *parent, *pte;
987 	int total = agaw_to_level(domain->agaw);
988 	int offset;
989 
990 	parent = domain->pgd;
991 	while (level <= total) {
992 		offset = pfn_level_offset(pfn, total);
993 		pte = &parent[offset];
994 		if (level == total)
995 			return pte;
996 
997 		if (!dma_pte_present(pte)) {
998 			*large_page = total;
999 			break;
1000 		}
1001 
1002 		if (dma_pte_superpage(pte)) {
1003 			*large_page = total;
1004 			return pte;
1005 		}
1006 
1007 		parent = phys_to_virt(dma_pte_addr(pte));
1008 		total--;
1009 	}
1010 	return NULL;
1011 }
1012 
1013 /* clear last level pte, a tlb flush should be followed */
1014 static void dma_pte_clear_range(struct dmar_domain *domain,
1015 				unsigned long start_pfn,
1016 				unsigned long last_pfn)
1017 {
1018 	unsigned int large_page;
1019 	struct dma_pte *first_pte, *pte;
1020 
1021 	BUG_ON(!domain_pfn_supported(domain, start_pfn));
1022 	BUG_ON(!domain_pfn_supported(domain, last_pfn));
1023 	BUG_ON(start_pfn > last_pfn);
1024 
1025 	/* we don't need lock here; nobody else touches the iova range */
1026 	do {
1027 		large_page = 1;
1028 		first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
1029 		if (!pte) {
1030 			start_pfn = align_to_level(start_pfn + 1, large_page + 1);
1031 			continue;
1032 		}
1033 		do {
1034 			dma_clear_pte(pte);
1035 			start_pfn += lvl_to_nr_pages(large_page);
1036 			pte++;
1037 		} while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1038 
1039 		domain_flush_cache(domain, first_pte,
1040 				   (void *)pte - (void *)first_pte);
1041 
1042 	} while (start_pfn && start_pfn <= last_pfn);
1043 }
1044 
1045 static void dma_pte_free_level(struct dmar_domain *domain, int level,
1046 			       int retain_level, struct dma_pte *pte,
1047 			       unsigned long pfn, unsigned long start_pfn,
1048 			       unsigned long last_pfn)
1049 {
1050 	pfn = max(start_pfn, pfn);
1051 	pte = &pte[pfn_level_offset(pfn, level)];
1052 
1053 	do {
1054 		unsigned long level_pfn;
1055 		struct dma_pte *level_pte;
1056 
1057 		if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1058 			goto next;
1059 
1060 		level_pfn = pfn & level_mask(level);
1061 		level_pte = phys_to_virt(dma_pte_addr(pte));
1062 
1063 		if (level > 2) {
1064 			dma_pte_free_level(domain, level - 1, retain_level,
1065 					   level_pte, level_pfn, start_pfn,
1066 					   last_pfn);
1067 		}
1068 
1069 		/*
1070 		 * Free the page table if we're below the level we want to
1071 		 * retain and the range covers the entire table.
1072 		 */
1073 		if (level < retain_level && !(start_pfn > level_pfn ||
1074 		      last_pfn < level_pfn + level_size(level) - 1)) {
1075 			dma_clear_pte(pte);
1076 			domain_flush_cache(domain, pte, sizeof(*pte));
1077 			free_pgtable_page(level_pte);
1078 		}
1079 next:
1080 		pfn += level_size(level);
1081 	} while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1082 }
1083 
1084 /*
1085  * clear last level (leaf) ptes and free page table pages below the
1086  * level we wish to keep intact.
1087  */
1088 static void dma_pte_free_pagetable(struct dmar_domain *domain,
1089 				   unsigned long start_pfn,
1090 				   unsigned long last_pfn,
1091 				   int retain_level)
1092 {
1093 	dma_pte_clear_range(domain, start_pfn, last_pfn);
1094 
1095 	/* We don't need lock here; nobody else touches the iova range */
1096 	dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
1097 			   domain->pgd, 0, start_pfn, last_pfn);
1098 
1099 	/* free pgd */
1100 	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1101 		free_pgtable_page(domain->pgd);
1102 		domain->pgd = NULL;
1103 	}
1104 }
1105 
1106 /* When a page at a given level is being unlinked from its parent, we don't
1107    need to *modify* it at all. All we need to do is make a list of all the
1108    pages which can be freed just as soon as we've flushed the IOTLB and we
1109    know the hardware page-walk will no longer touch them.
1110    The 'pte' argument is the *parent* PTE, pointing to the page that is to
1111    be freed. */
1112 static void dma_pte_list_pagetables(struct dmar_domain *domain,
1113 				    int level, struct dma_pte *pte,
1114 				    struct list_head *freelist)
1115 {
1116 	struct page *pg;
1117 
1118 	pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1119 	list_add_tail(&pg->lru, freelist);
1120 
1121 	if (level == 1)
1122 		return;
1123 
1124 	pte = page_address(pg);
1125 	do {
1126 		if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1127 			dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1128 		pte++;
1129 	} while (!first_pte_in_page(pte));
1130 }
1131 
1132 static void dma_pte_clear_level(struct dmar_domain *domain, int level,
1133 				struct dma_pte *pte, unsigned long pfn,
1134 				unsigned long start_pfn, unsigned long last_pfn,
1135 				struct list_head *freelist)
1136 {
1137 	struct dma_pte *first_pte = NULL, *last_pte = NULL;
1138 
1139 	pfn = max(start_pfn, pfn);
1140 	pte = &pte[pfn_level_offset(pfn, level)];
1141 
1142 	do {
1143 		unsigned long level_pfn = pfn & level_mask(level);
1144 
1145 		if (!dma_pte_present(pte))
1146 			goto next;
1147 
1148 		/* If range covers entire pagetable, free it */
1149 		if (start_pfn <= level_pfn &&
1150 		    last_pfn >= level_pfn + level_size(level) - 1) {
1151 			/* These suborbinate page tables are going away entirely. Don't
1152 			   bother to clear them; we're just going to *free* them. */
1153 			if (level > 1 && !dma_pte_superpage(pte))
1154 				dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1155 
1156 			dma_clear_pte(pte);
1157 			if (!first_pte)
1158 				first_pte = pte;
1159 			last_pte = pte;
1160 		} else if (level > 1) {
1161 			/* Recurse down into a level that isn't *entirely* obsolete */
1162 			dma_pte_clear_level(domain, level - 1,
1163 					    phys_to_virt(dma_pte_addr(pte)),
1164 					    level_pfn, start_pfn, last_pfn,
1165 					    freelist);
1166 		}
1167 next:
1168 		pfn = level_pfn + level_size(level);
1169 	} while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1170 
1171 	if (first_pte)
1172 		domain_flush_cache(domain, first_pte,
1173 				   (void *)++last_pte - (void *)first_pte);
1174 }
1175 
1176 /* We can't just free the pages because the IOMMU may still be walking
1177    the page tables, and may have cached the intermediate levels. The
1178    pages can only be freed after the IOTLB flush has been done. */
1179 static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn,
1180 			 unsigned long last_pfn, struct list_head *freelist)
1181 {
1182 	BUG_ON(!domain_pfn_supported(domain, start_pfn));
1183 	BUG_ON(!domain_pfn_supported(domain, last_pfn));
1184 	BUG_ON(start_pfn > last_pfn);
1185 
1186 	/* we don't need lock here; nobody else touches the iova range */
1187 	dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1188 			    domain->pgd, 0, start_pfn, last_pfn, freelist);
1189 
1190 	/* free pgd */
1191 	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1192 		struct page *pgd_page = virt_to_page(domain->pgd);
1193 		list_add_tail(&pgd_page->lru, freelist);
1194 		domain->pgd = NULL;
1195 	}
1196 }
1197 
1198 /* iommu handling */
1199 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1200 {
1201 	struct root_entry *root;
1202 
1203 	root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1204 	if (!root) {
1205 		pr_err("Allocating root entry for %s failed\n",
1206 			iommu->name);
1207 		return -ENOMEM;
1208 	}
1209 
1210 	__iommu_flush_cache(iommu, root, ROOT_SIZE);
1211 	iommu->root_entry = root;
1212 
1213 	return 0;
1214 }
1215 
1216 static void iommu_set_root_entry(struct intel_iommu *iommu)
1217 {
1218 	u64 addr;
1219 	u32 sts;
1220 	unsigned long flag;
1221 
1222 	addr = virt_to_phys(iommu->root_entry);
1223 	if (sm_supported(iommu))
1224 		addr |= DMA_RTADDR_SMT;
1225 
1226 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1227 	dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1228 
1229 	writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1230 
1231 	/* Make sure hardware complete it */
1232 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1233 		      readl, (sts & DMA_GSTS_RTPS), sts);
1234 
1235 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1236 
1237 	iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1238 	if (sm_supported(iommu))
1239 		qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
1240 	iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1241 }
1242 
1243 void iommu_flush_write_buffer(struct intel_iommu *iommu)
1244 {
1245 	u32 val;
1246 	unsigned long flag;
1247 
1248 	if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1249 		return;
1250 
1251 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1252 	writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1253 
1254 	/* Make sure hardware complete it */
1255 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1256 		      readl, (!(val & DMA_GSTS_WBFS)), val);
1257 
1258 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1259 }
1260 
1261 /* return value determine if we need a write buffer flush */
1262 static void __iommu_flush_context(struct intel_iommu *iommu,
1263 				  u16 did, u16 source_id, u8 function_mask,
1264 				  u64 type)
1265 {
1266 	u64 val = 0;
1267 	unsigned long flag;
1268 
1269 	switch (type) {
1270 	case DMA_CCMD_GLOBAL_INVL:
1271 		val = DMA_CCMD_GLOBAL_INVL;
1272 		break;
1273 	case DMA_CCMD_DOMAIN_INVL:
1274 		val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1275 		break;
1276 	case DMA_CCMD_DEVICE_INVL:
1277 		val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1278 			| DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1279 		break;
1280 	default:
1281 		BUG();
1282 	}
1283 	val |= DMA_CCMD_ICC;
1284 
1285 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1286 	dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1287 
1288 	/* Make sure hardware complete it */
1289 	IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1290 		dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1291 
1292 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1293 }
1294 
1295 /* return value determine if we need a write buffer flush */
1296 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1297 				u64 addr, unsigned int size_order, u64 type)
1298 {
1299 	int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1300 	u64 val = 0, val_iva = 0;
1301 	unsigned long flag;
1302 
1303 	switch (type) {
1304 	case DMA_TLB_GLOBAL_FLUSH:
1305 		/* global flush doesn't need set IVA_REG */
1306 		val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1307 		break;
1308 	case DMA_TLB_DSI_FLUSH:
1309 		val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1310 		break;
1311 	case DMA_TLB_PSI_FLUSH:
1312 		val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1313 		/* IH bit is passed in as part of address */
1314 		val_iva = size_order | addr;
1315 		break;
1316 	default:
1317 		BUG();
1318 	}
1319 	/* Note: set drain read/write */
1320 #if 0
1321 	/*
1322 	 * This is probably to be super secure.. Looks like we can
1323 	 * ignore it without any impact.
1324 	 */
1325 	if (cap_read_drain(iommu->cap))
1326 		val |= DMA_TLB_READ_DRAIN;
1327 #endif
1328 	if (cap_write_drain(iommu->cap))
1329 		val |= DMA_TLB_WRITE_DRAIN;
1330 
1331 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1332 	/* Note: Only uses first TLB reg currently */
1333 	if (val_iva)
1334 		dmar_writeq(iommu->reg + tlb_offset, val_iva);
1335 	dmar_writeq(iommu->reg + tlb_offset + 8, val);
1336 
1337 	/* Make sure hardware complete it */
1338 	IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1339 		dmar_readq, (!(val & DMA_TLB_IVT)), val);
1340 
1341 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1342 
1343 	/* check IOTLB invalidation granularity */
1344 	if (DMA_TLB_IAIG(val) == 0)
1345 		pr_err("Flush IOTLB failed\n");
1346 	if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1347 		pr_debug("TLB flush request %Lx, actual %Lx\n",
1348 			(unsigned long long)DMA_TLB_IIRG(type),
1349 			(unsigned long long)DMA_TLB_IAIG(val));
1350 }
1351 
1352 static struct device_domain_info *
1353 iommu_support_dev_iotlb(struct dmar_domain *domain, struct intel_iommu *iommu,
1354 			u8 bus, u8 devfn)
1355 {
1356 	struct device_domain_info *info;
1357 	unsigned long flags;
1358 
1359 	if (!iommu->qi)
1360 		return NULL;
1361 
1362 	spin_lock_irqsave(&domain->lock, flags);
1363 	list_for_each_entry(info, &domain->devices, link) {
1364 		if (info->iommu == iommu && info->bus == bus &&
1365 		    info->devfn == devfn) {
1366 			spin_unlock_irqrestore(&domain->lock, flags);
1367 			return info->ats_supported ? info : NULL;
1368 		}
1369 	}
1370 	spin_unlock_irqrestore(&domain->lock, flags);
1371 
1372 	return NULL;
1373 }
1374 
1375 static void domain_update_iotlb(struct dmar_domain *domain)
1376 {
1377 	struct device_domain_info *info;
1378 	bool has_iotlb_device = false;
1379 	unsigned long flags;
1380 
1381 	spin_lock_irqsave(&domain->lock, flags);
1382 	list_for_each_entry(info, &domain->devices, link) {
1383 		if (info->ats_enabled) {
1384 			has_iotlb_device = true;
1385 			break;
1386 		}
1387 	}
1388 	domain->has_iotlb_device = has_iotlb_device;
1389 	spin_unlock_irqrestore(&domain->lock, flags);
1390 }
1391 
1392 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1393 {
1394 	struct pci_dev *pdev;
1395 
1396 	if (!info || !dev_is_pci(info->dev))
1397 		return;
1398 
1399 	pdev = to_pci_dev(info->dev);
1400 	/* For IOMMU that supports device IOTLB throttling (DIT), we assign
1401 	 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1402 	 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1403 	 * reserved, which should be set to 0.
1404 	 */
1405 	if (!ecap_dit(info->iommu->ecap))
1406 		info->pfsid = 0;
1407 	else {
1408 		struct pci_dev *pf_pdev;
1409 
1410 		/* pdev will be returned if device is not a vf */
1411 		pf_pdev = pci_physfn(pdev);
1412 		info->pfsid = pci_dev_id(pf_pdev);
1413 	}
1414 
1415 #ifdef CONFIG_INTEL_IOMMU_SVM
1416 	/* The PCIe spec, in its wisdom, declares that the behaviour of
1417 	   the device if you enable PASID support after ATS support is
1418 	   undefined. So always enable PASID support on devices which
1419 	   have it, even if we can't yet know if we're ever going to
1420 	   use it. */
1421 	if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1422 		info->pasid_enabled = 1;
1423 
1424 	if (info->pri_supported &&
1425 	    (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1)  &&
1426 	    !pci_reset_pri(pdev) && !pci_enable_pri(pdev, PRQ_DEPTH))
1427 		info->pri_enabled = 1;
1428 #endif
1429 	if (info->ats_supported && pci_ats_page_aligned(pdev) &&
1430 	    !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1431 		info->ats_enabled = 1;
1432 		domain_update_iotlb(info->domain);
1433 		info->ats_qdep = pci_ats_queue_depth(pdev);
1434 	}
1435 }
1436 
1437 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1438 {
1439 	struct pci_dev *pdev;
1440 
1441 	if (!dev_is_pci(info->dev))
1442 		return;
1443 
1444 	pdev = to_pci_dev(info->dev);
1445 
1446 	if (info->ats_enabled) {
1447 		pci_disable_ats(pdev);
1448 		info->ats_enabled = 0;
1449 		domain_update_iotlb(info->domain);
1450 	}
1451 #ifdef CONFIG_INTEL_IOMMU_SVM
1452 	if (info->pri_enabled) {
1453 		pci_disable_pri(pdev);
1454 		info->pri_enabled = 0;
1455 	}
1456 	if (info->pasid_enabled) {
1457 		pci_disable_pasid(pdev);
1458 		info->pasid_enabled = 0;
1459 	}
1460 #endif
1461 }
1462 
1463 static void __iommu_flush_dev_iotlb(struct device_domain_info *info,
1464 				    u64 addr, unsigned int mask)
1465 {
1466 	u16 sid, qdep;
1467 
1468 	if (!info || !info->ats_enabled)
1469 		return;
1470 
1471 	sid = info->bus << 8 | info->devfn;
1472 	qdep = info->ats_qdep;
1473 	qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1474 			   qdep, addr, mask);
1475 }
1476 
1477 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1478 				  u64 addr, unsigned mask)
1479 {
1480 	struct device_domain_info *info;
1481 	unsigned long flags;
1482 
1483 	if (!domain->has_iotlb_device)
1484 		return;
1485 
1486 	spin_lock_irqsave(&domain->lock, flags);
1487 	list_for_each_entry(info, &domain->devices, link)
1488 		__iommu_flush_dev_iotlb(info, addr, mask);
1489 	spin_unlock_irqrestore(&domain->lock, flags);
1490 }
1491 
1492 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1493 				  struct dmar_domain *domain,
1494 				  unsigned long pfn, unsigned int pages,
1495 				  int ih, int map)
1496 {
1497 	unsigned int aligned_pages = __roundup_pow_of_two(pages);
1498 	unsigned int mask = ilog2(aligned_pages);
1499 	uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1500 	u16 did = domain_id_iommu(domain, iommu);
1501 
1502 	BUG_ON(pages == 0);
1503 
1504 	if (ih)
1505 		ih = 1 << 6;
1506 
1507 	if (domain_use_first_level(domain)) {
1508 		qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, pages, ih);
1509 	} else {
1510 		unsigned long bitmask = aligned_pages - 1;
1511 
1512 		/*
1513 		 * PSI masks the low order bits of the base address. If the
1514 		 * address isn't aligned to the mask, then compute a mask value
1515 		 * needed to ensure the target range is flushed.
1516 		 */
1517 		if (unlikely(bitmask & pfn)) {
1518 			unsigned long end_pfn = pfn + pages - 1, shared_bits;
1519 
1520 			/*
1521 			 * Since end_pfn <= pfn + bitmask, the only way bits
1522 			 * higher than bitmask can differ in pfn and end_pfn is
1523 			 * by carrying. This means after masking out bitmask,
1524 			 * high bits starting with the first set bit in
1525 			 * shared_bits are all equal in both pfn and end_pfn.
1526 			 */
1527 			shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
1528 			mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG;
1529 		}
1530 
1531 		/*
1532 		 * Fallback to domain selective flush if no PSI support or
1533 		 * the size is too big.
1534 		 */
1535 		if (!cap_pgsel_inv(iommu->cap) ||
1536 		    mask > cap_max_amask_val(iommu->cap))
1537 			iommu->flush.flush_iotlb(iommu, did, 0, 0,
1538 							DMA_TLB_DSI_FLUSH);
1539 		else
1540 			iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1541 							DMA_TLB_PSI_FLUSH);
1542 	}
1543 
1544 	/*
1545 	 * In caching mode, changes of pages from non-present to present require
1546 	 * flush. However, device IOTLB doesn't need to be flushed in this case.
1547 	 */
1548 	if (!cap_caching_mode(iommu->cap) || !map)
1549 		iommu_flush_dev_iotlb(domain, addr, mask);
1550 }
1551 
1552 /* Notification for newly created mappings */
1553 static inline void __mapping_notify_one(struct intel_iommu *iommu,
1554 					struct dmar_domain *domain,
1555 					unsigned long pfn, unsigned int pages)
1556 {
1557 	/*
1558 	 * It's a non-present to present mapping. Only flush if caching mode
1559 	 * and second level.
1560 	 */
1561 	if (cap_caching_mode(iommu->cap) && !domain_use_first_level(domain))
1562 		iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1563 	else
1564 		iommu_flush_write_buffer(iommu);
1565 }
1566 
1567 static void intel_flush_iotlb_all(struct iommu_domain *domain)
1568 {
1569 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
1570 	struct iommu_domain_info *info;
1571 	unsigned long idx;
1572 
1573 	xa_for_each(&dmar_domain->iommu_array, idx, info) {
1574 		struct intel_iommu *iommu = info->iommu;
1575 		u16 did = domain_id_iommu(dmar_domain, iommu);
1576 
1577 		if (domain_use_first_level(dmar_domain))
1578 			qi_flush_piotlb(iommu, did, PASID_RID2PASID, 0, -1, 0);
1579 		else
1580 			iommu->flush.flush_iotlb(iommu, did, 0, 0,
1581 						 DMA_TLB_DSI_FLUSH);
1582 
1583 		if (!cap_caching_mode(iommu->cap))
1584 			iommu_flush_dev_iotlb(dmar_domain, 0, MAX_AGAW_PFN_WIDTH);
1585 	}
1586 }
1587 
1588 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1589 {
1590 	u32 pmen;
1591 	unsigned long flags;
1592 
1593 	if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1594 		return;
1595 
1596 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1597 	pmen = readl(iommu->reg + DMAR_PMEN_REG);
1598 	pmen &= ~DMA_PMEN_EPM;
1599 	writel(pmen, iommu->reg + DMAR_PMEN_REG);
1600 
1601 	/* wait for the protected region status bit to clear */
1602 	IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1603 		readl, !(pmen & DMA_PMEN_PRS), pmen);
1604 
1605 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1606 }
1607 
1608 static void iommu_enable_translation(struct intel_iommu *iommu)
1609 {
1610 	u32 sts;
1611 	unsigned long flags;
1612 
1613 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1614 	iommu->gcmd |= DMA_GCMD_TE;
1615 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1616 
1617 	/* Make sure hardware complete it */
1618 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1619 		      readl, (sts & DMA_GSTS_TES), sts);
1620 
1621 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1622 }
1623 
1624 static void iommu_disable_translation(struct intel_iommu *iommu)
1625 {
1626 	u32 sts;
1627 	unsigned long flag;
1628 
1629 	if (iommu_skip_te_disable && iommu->drhd->gfx_dedicated &&
1630 	    (cap_read_drain(iommu->cap) || cap_write_drain(iommu->cap)))
1631 		return;
1632 
1633 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1634 	iommu->gcmd &= ~DMA_GCMD_TE;
1635 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1636 
1637 	/* Make sure hardware complete it */
1638 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1639 		      readl, (!(sts & DMA_GSTS_TES)), sts);
1640 
1641 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1642 }
1643 
1644 static int iommu_init_domains(struct intel_iommu *iommu)
1645 {
1646 	u32 ndomains;
1647 
1648 	ndomains = cap_ndoms(iommu->cap);
1649 	pr_debug("%s: Number of Domains supported <%d>\n",
1650 		 iommu->name, ndomains);
1651 
1652 	spin_lock_init(&iommu->lock);
1653 
1654 	iommu->domain_ids = bitmap_zalloc(ndomains, GFP_KERNEL);
1655 	if (!iommu->domain_ids)
1656 		return -ENOMEM;
1657 
1658 	/*
1659 	 * If Caching mode is set, then invalid translations are tagged
1660 	 * with domain-id 0, hence we need to pre-allocate it. We also
1661 	 * use domain-id 0 as a marker for non-allocated domain-id, so
1662 	 * make sure it is not used for a real domain.
1663 	 */
1664 	set_bit(0, iommu->domain_ids);
1665 
1666 	/*
1667 	 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1668 	 * entry for first-level or pass-through translation modes should
1669 	 * be programmed with a domain id different from those used for
1670 	 * second-level or nested translation. We reserve a domain id for
1671 	 * this purpose.
1672 	 */
1673 	if (sm_supported(iommu))
1674 		set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1675 
1676 	return 0;
1677 }
1678 
1679 static void disable_dmar_iommu(struct intel_iommu *iommu)
1680 {
1681 	if (!iommu->domain_ids)
1682 		return;
1683 
1684 	/*
1685 	 * All iommu domains must have been detached from the devices,
1686 	 * hence there should be no domain IDs in use.
1687 	 */
1688 	if (WARN_ON(bitmap_weight(iommu->domain_ids, cap_ndoms(iommu->cap))
1689 		    > NUM_RESERVED_DID))
1690 		return;
1691 
1692 	if (iommu->gcmd & DMA_GCMD_TE)
1693 		iommu_disable_translation(iommu);
1694 }
1695 
1696 static void free_dmar_iommu(struct intel_iommu *iommu)
1697 {
1698 	if (iommu->domain_ids) {
1699 		bitmap_free(iommu->domain_ids);
1700 		iommu->domain_ids = NULL;
1701 	}
1702 
1703 	if (iommu->copied_tables) {
1704 		bitmap_free(iommu->copied_tables);
1705 		iommu->copied_tables = NULL;
1706 	}
1707 
1708 	/* free context mapping */
1709 	free_context_table(iommu);
1710 
1711 #ifdef CONFIG_INTEL_IOMMU_SVM
1712 	if (pasid_supported(iommu)) {
1713 		if (ecap_prs(iommu->ecap))
1714 			intel_svm_finish_prq(iommu);
1715 	}
1716 	if (vccap_pasid(iommu->vccap))
1717 		ioasid_unregister_allocator(&iommu->pasid_allocator);
1718 
1719 #endif
1720 }
1721 
1722 /*
1723  * Check and return whether first level is used by default for
1724  * DMA translation.
1725  */
1726 static bool first_level_by_default(unsigned int type)
1727 {
1728 	/* Only SL is available in legacy mode */
1729 	if (!scalable_mode_support())
1730 		return false;
1731 
1732 	/* Only level (either FL or SL) is available, just use it */
1733 	if (intel_cap_flts_sanity() ^ intel_cap_slts_sanity())
1734 		return intel_cap_flts_sanity();
1735 
1736 	/* Both levels are available, decide it based on domain type */
1737 	return type != IOMMU_DOMAIN_UNMANAGED;
1738 }
1739 
1740 static struct dmar_domain *alloc_domain(unsigned int type)
1741 {
1742 	struct dmar_domain *domain;
1743 
1744 	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
1745 	if (!domain)
1746 		return NULL;
1747 
1748 	domain->nid = NUMA_NO_NODE;
1749 	if (first_level_by_default(type))
1750 		domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL;
1751 	domain->has_iotlb_device = false;
1752 	INIT_LIST_HEAD(&domain->devices);
1753 	spin_lock_init(&domain->lock);
1754 	xa_init(&domain->iommu_array);
1755 
1756 	return domain;
1757 }
1758 
1759 static int domain_attach_iommu(struct dmar_domain *domain,
1760 			       struct intel_iommu *iommu)
1761 {
1762 	struct iommu_domain_info *info, *curr;
1763 	unsigned long ndomains;
1764 	int num, ret = -ENOSPC;
1765 
1766 	info = kzalloc(sizeof(*info), GFP_KERNEL);
1767 	if (!info)
1768 		return -ENOMEM;
1769 
1770 	spin_lock(&iommu->lock);
1771 	curr = xa_load(&domain->iommu_array, iommu->seq_id);
1772 	if (curr) {
1773 		curr->refcnt++;
1774 		spin_unlock(&iommu->lock);
1775 		kfree(info);
1776 		return 0;
1777 	}
1778 
1779 	ndomains = cap_ndoms(iommu->cap);
1780 	num = find_first_zero_bit(iommu->domain_ids, ndomains);
1781 	if (num >= ndomains) {
1782 		pr_err("%s: No free domain ids\n", iommu->name);
1783 		goto err_unlock;
1784 	}
1785 
1786 	set_bit(num, iommu->domain_ids);
1787 	info->refcnt	= 1;
1788 	info->did	= num;
1789 	info->iommu	= iommu;
1790 	curr = xa_cmpxchg(&domain->iommu_array, iommu->seq_id,
1791 			  NULL, info, GFP_ATOMIC);
1792 	if (curr) {
1793 		ret = xa_err(curr) ? : -EBUSY;
1794 		goto err_clear;
1795 	}
1796 	domain_update_iommu_cap(domain);
1797 
1798 	spin_unlock(&iommu->lock);
1799 	return 0;
1800 
1801 err_clear:
1802 	clear_bit(info->did, iommu->domain_ids);
1803 err_unlock:
1804 	spin_unlock(&iommu->lock);
1805 	kfree(info);
1806 	return ret;
1807 }
1808 
1809 static void domain_detach_iommu(struct dmar_domain *domain,
1810 				struct intel_iommu *iommu)
1811 {
1812 	struct iommu_domain_info *info;
1813 
1814 	spin_lock(&iommu->lock);
1815 	info = xa_load(&domain->iommu_array, iommu->seq_id);
1816 	if (--info->refcnt == 0) {
1817 		clear_bit(info->did, iommu->domain_ids);
1818 		xa_erase(&domain->iommu_array, iommu->seq_id);
1819 		domain->nid = NUMA_NO_NODE;
1820 		domain_update_iommu_cap(domain);
1821 		kfree(info);
1822 	}
1823 	spin_unlock(&iommu->lock);
1824 }
1825 
1826 static inline int guestwidth_to_adjustwidth(int gaw)
1827 {
1828 	int agaw;
1829 	int r = (gaw - 12) % 9;
1830 
1831 	if (r == 0)
1832 		agaw = gaw;
1833 	else
1834 		agaw = gaw + 9 - r;
1835 	if (agaw > 64)
1836 		agaw = 64;
1837 	return agaw;
1838 }
1839 
1840 static void domain_exit(struct dmar_domain *domain)
1841 {
1842 	if (domain->pgd) {
1843 		LIST_HEAD(freelist);
1844 
1845 		domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw), &freelist);
1846 		put_pages_list(&freelist);
1847 	}
1848 
1849 	if (WARN_ON(!list_empty(&domain->devices)))
1850 		return;
1851 
1852 	kfree(domain);
1853 }
1854 
1855 /*
1856  * Get the PASID directory size for scalable mode context entry.
1857  * Value of X in the PDTS field of a scalable mode context entry
1858  * indicates PASID directory with 2^(X + 7) entries.
1859  */
1860 static inline unsigned long context_get_sm_pds(struct pasid_table *table)
1861 {
1862 	unsigned long pds, max_pde;
1863 
1864 	max_pde = table->max_pasid >> PASID_PDE_SHIFT;
1865 	pds = find_first_bit(&max_pde, MAX_NR_PASID_BITS);
1866 	if (pds < 7)
1867 		return 0;
1868 
1869 	return pds - 7;
1870 }
1871 
1872 /*
1873  * Set the RID_PASID field of a scalable mode context entry. The
1874  * IOMMU hardware will use the PASID value set in this field for
1875  * DMA translations of DMA requests without PASID.
1876  */
1877 static inline void
1878 context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
1879 {
1880 	context->hi |= pasid & ((1 << 20) - 1);
1881 }
1882 
1883 /*
1884  * Set the DTE(Device-TLB Enable) field of a scalable mode context
1885  * entry.
1886  */
1887 static inline void context_set_sm_dte(struct context_entry *context)
1888 {
1889 	context->lo |= (1 << 2);
1890 }
1891 
1892 /*
1893  * Set the PRE(Page Request Enable) field of a scalable mode context
1894  * entry.
1895  */
1896 static inline void context_set_sm_pre(struct context_entry *context)
1897 {
1898 	context->lo |= (1 << 4);
1899 }
1900 
1901 /* Convert value to context PASID directory size field coding. */
1902 #define context_pdts(pds)	(((pds) & 0x7) << 9)
1903 
1904 static int domain_context_mapping_one(struct dmar_domain *domain,
1905 				      struct intel_iommu *iommu,
1906 				      struct pasid_table *table,
1907 				      u8 bus, u8 devfn)
1908 {
1909 	struct device_domain_info *info =
1910 			iommu_support_dev_iotlb(domain, iommu, bus, devfn);
1911 	u16 did = domain_id_iommu(domain, iommu);
1912 	int translation = CONTEXT_TT_MULTI_LEVEL;
1913 	struct context_entry *context;
1914 	int ret;
1915 
1916 	WARN_ON(did == 0);
1917 
1918 	if (hw_pass_through && domain_type_is_si(domain))
1919 		translation = CONTEXT_TT_PASS_THROUGH;
1920 
1921 	pr_debug("Set context mapping for %02x:%02x.%d\n",
1922 		bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1923 
1924 	BUG_ON(!domain->pgd);
1925 
1926 	spin_lock(&iommu->lock);
1927 	ret = -ENOMEM;
1928 	context = iommu_context_addr(iommu, bus, devfn, 1);
1929 	if (!context)
1930 		goto out_unlock;
1931 
1932 	ret = 0;
1933 	if (context_present(context) && !context_copied(iommu, bus, devfn))
1934 		goto out_unlock;
1935 
1936 	/*
1937 	 * For kdump cases, old valid entries may be cached due to the
1938 	 * in-flight DMA and copied pgtable, but there is no unmapping
1939 	 * behaviour for them, thus we need an explicit cache flush for
1940 	 * the newly-mapped device. For kdump, at this point, the device
1941 	 * is supposed to finish reset at its driver probe stage, so no
1942 	 * in-flight DMA will exist, and we don't need to worry anymore
1943 	 * hereafter.
1944 	 */
1945 	if (context_copied(iommu, bus, devfn)) {
1946 		u16 did_old = context_domain_id(context);
1947 
1948 		if (did_old < cap_ndoms(iommu->cap)) {
1949 			iommu->flush.flush_context(iommu, did_old,
1950 						   (((u16)bus) << 8) | devfn,
1951 						   DMA_CCMD_MASK_NOBIT,
1952 						   DMA_CCMD_DEVICE_INVL);
1953 			iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
1954 						 DMA_TLB_DSI_FLUSH);
1955 		}
1956 
1957 		clear_context_copied(iommu, bus, devfn);
1958 	}
1959 
1960 	context_clear_entry(context);
1961 
1962 	if (sm_supported(iommu)) {
1963 		unsigned long pds;
1964 
1965 		WARN_ON(!table);
1966 
1967 		/* Setup the PASID DIR pointer: */
1968 		pds = context_get_sm_pds(table);
1969 		context->lo = (u64)virt_to_phys(table->table) |
1970 				context_pdts(pds);
1971 
1972 		/* Setup the RID_PASID field: */
1973 		context_set_sm_rid2pasid(context, PASID_RID2PASID);
1974 
1975 		/*
1976 		 * Setup the Device-TLB enable bit and Page request
1977 		 * Enable bit:
1978 		 */
1979 		if (info && info->ats_supported)
1980 			context_set_sm_dte(context);
1981 		if (info && info->pri_supported)
1982 			context_set_sm_pre(context);
1983 	} else {
1984 		struct dma_pte *pgd = domain->pgd;
1985 		int agaw;
1986 
1987 		context_set_domain_id(context, did);
1988 
1989 		if (translation != CONTEXT_TT_PASS_THROUGH) {
1990 			/*
1991 			 * Skip top levels of page tables for iommu which has
1992 			 * less agaw than default. Unnecessary for PT mode.
1993 			 */
1994 			for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
1995 				ret = -ENOMEM;
1996 				pgd = phys_to_virt(dma_pte_addr(pgd));
1997 				if (!dma_pte_present(pgd))
1998 					goto out_unlock;
1999 			}
2000 
2001 			if (info && info->ats_supported)
2002 				translation = CONTEXT_TT_DEV_IOTLB;
2003 			else
2004 				translation = CONTEXT_TT_MULTI_LEVEL;
2005 
2006 			context_set_address_root(context, virt_to_phys(pgd));
2007 			context_set_address_width(context, agaw);
2008 		} else {
2009 			/*
2010 			 * In pass through mode, AW must be programmed to
2011 			 * indicate the largest AGAW value supported by
2012 			 * hardware. And ASR is ignored by hardware.
2013 			 */
2014 			context_set_address_width(context, iommu->msagaw);
2015 		}
2016 
2017 		context_set_translation_type(context, translation);
2018 	}
2019 
2020 	context_set_fault_enable(context);
2021 	context_set_present(context);
2022 	if (!ecap_coherent(iommu->ecap))
2023 		clflush_cache_range(context, sizeof(*context));
2024 
2025 	/*
2026 	 * It's a non-present to present mapping. If hardware doesn't cache
2027 	 * non-present entry we only need to flush the write-buffer. If the
2028 	 * _does_ cache non-present entries, then it does so in the special
2029 	 * domain #0, which we have to flush:
2030 	 */
2031 	if (cap_caching_mode(iommu->cap)) {
2032 		iommu->flush.flush_context(iommu, 0,
2033 					   (((u16)bus) << 8) | devfn,
2034 					   DMA_CCMD_MASK_NOBIT,
2035 					   DMA_CCMD_DEVICE_INVL);
2036 		iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
2037 	} else {
2038 		iommu_flush_write_buffer(iommu);
2039 	}
2040 	iommu_enable_dev_iotlb(info);
2041 
2042 	ret = 0;
2043 
2044 out_unlock:
2045 	spin_unlock(&iommu->lock);
2046 
2047 	return ret;
2048 }
2049 
2050 struct domain_context_mapping_data {
2051 	struct dmar_domain *domain;
2052 	struct intel_iommu *iommu;
2053 	struct pasid_table *table;
2054 };
2055 
2056 static int domain_context_mapping_cb(struct pci_dev *pdev,
2057 				     u16 alias, void *opaque)
2058 {
2059 	struct domain_context_mapping_data *data = opaque;
2060 
2061 	return domain_context_mapping_one(data->domain, data->iommu,
2062 					  data->table, PCI_BUS_NUM(alias),
2063 					  alias & 0xff);
2064 }
2065 
2066 static int
2067 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
2068 {
2069 	struct domain_context_mapping_data data;
2070 	struct pasid_table *table;
2071 	struct intel_iommu *iommu;
2072 	u8 bus, devfn;
2073 
2074 	iommu = device_to_iommu(dev, &bus, &devfn);
2075 	if (!iommu)
2076 		return -ENODEV;
2077 
2078 	table = intel_pasid_get_table(dev);
2079 
2080 	if (!dev_is_pci(dev))
2081 		return domain_context_mapping_one(domain, iommu, table,
2082 						  bus, devfn);
2083 
2084 	data.domain = domain;
2085 	data.iommu = iommu;
2086 	data.table = table;
2087 
2088 	return pci_for_each_dma_alias(to_pci_dev(dev),
2089 				      &domain_context_mapping_cb, &data);
2090 }
2091 
2092 static int domain_context_mapped_cb(struct pci_dev *pdev,
2093 				    u16 alias, void *opaque)
2094 {
2095 	struct intel_iommu *iommu = opaque;
2096 
2097 	return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2098 }
2099 
2100 static int domain_context_mapped(struct device *dev)
2101 {
2102 	struct intel_iommu *iommu;
2103 	u8 bus, devfn;
2104 
2105 	iommu = device_to_iommu(dev, &bus, &devfn);
2106 	if (!iommu)
2107 		return -ENODEV;
2108 
2109 	if (!dev_is_pci(dev))
2110 		return device_context_mapped(iommu, bus, devfn);
2111 
2112 	return !pci_for_each_dma_alias(to_pci_dev(dev),
2113 				       domain_context_mapped_cb, iommu);
2114 }
2115 
2116 /* Returns a number of VTD pages, but aligned to MM page size */
2117 static inline unsigned long aligned_nrpages(unsigned long host_addr,
2118 					    size_t size)
2119 {
2120 	host_addr &= ~PAGE_MASK;
2121 	return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2122 }
2123 
2124 /* Return largest possible superpage level for a given mapping */
2125 static inline int hardware_largepage_caps(struct dmar_domain *domain,
2126 					  unsigned long iov_pfn,
2127 					  unsigned long phy_pfn,
2128 					  unsigned long pages)
2129 {
2130 	int support, level = 1;
2131 	unsigned long pfnmerge;
2132 
2133 	support = domain->iommu_superpage;
2134 
2135 	/* To use a large page, the virtual *and* physical addresses
2136 	   must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2137 	   of them will mean we have to use smaller pages. So just
2138 	   merge them and check both at once. */
2139 	pfnmerge = iov_pfn | phy_pfn;
2140 
2141 	while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2142 		pages >>= VTD_STRIDE_SHIFT;
2143 		if (!pages)
2144 			break;
2145 		pfnmerge >>= VTD_STRIDE_SHIFT;
2146 		level++;
2147 		support--;
2148 	}
2149 	return level;
2150 }
2151 
2152 /*
2153  * Ensure that old small page tables are removed to make room for superpage(s).
2154  * We're going to add new large pages, so make sure we don't remove their parent
2155  * tables. The IOTLB/devTLBs should be flushed if any PDE/PTEs are cleared.
2156  */
2157 static void switch_to_super_page(struct dmar_domain *domain,
2158 				 unsigned long start_pfn,
2159 				 unsigned long end_pfn, int level)
2160 {
2161 	unsigned long lvl_pages = lvl_to_nr_pages(level);
2162 	struct iommu_domain_info *info;
2163 	struct dma_pte *pte = NULL;
2164 	unsigned long i;
2165 
2166 	while (start_pfn <= end_pfn) {
2167 		if (!pte)
2168 			pte = pfn_to_dma_pte(domain, start_pfn, &level);
2169 
2170 		if (dma_pte_present(pte)) {
2171 			dma_pte_free_pagetable(domain, start_pfn,
2172 					       start_pfn + lvl_pages - 1,
2173 					       level + 1);
2174 
2175 			xa_for_each(&domain->iommu_array, i, info)
2176 				iommu_flush_iotlb_psi(info->iommu, domain,
2177 						      start_pfn, lvl_pages,
2178 						      0, 0);
2179 		}
2180 
2181 		pte++;
2182 		start_pfn += lvl_pages;
2183 		if (first_pte_in_page(pte))
2184 			pte = NULL;
2185 	}
2186 }
2187 
2188 static int
2189 __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2190 		 unsigned long phys_pfn, unsigned long nr_pages, int prot)
2191 {
2192 	struct dma_pte *first_pte = NULL, *pte = NULL;
2193 	unsigned int largepage_lvl = 0;
2194 	unsigned long lvl_pages = 0;
2195 	phys_addr_t pteval;
2196 	u64 attr;
2197 
2198 	BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2199 
2200 	if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2201 		return -EINVAL;
2202 
2203 	attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
2204 	attr |= DMA_FL_PTE_PRESENT;
2205 	if (domain_use_first_level(domain)) {
2206 		attr |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS;
2207 		if (prot & DMA_PTE_WRITE)
2208 			attr |= DMA_FL_PTE_DIRTY;
2209 	}
2210 
2211 	pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
2212 
2213 	while (nr_pages > 0) {
2214 		uint64_t tmp;
2215 
2216 		if (!pte) {
2217 			largepage_lvl = hardware_largepage_caps(domain, iov_pfn,
2218 					phys_pfn, nr_pages);
2219 
2220 			pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2221 			if (!pte)
2222 				return -ENOMEM;
2223 			first_pte = pte;
2224 
2225 			lvl_pages = lvl_to_nr_pages(largepage_lvl);
2226 
2227 			/* It is large page*/
2228 			if (largepage_lvl > 1) {
2229 				unsigned long end_pfn;
2230 				unsigned long pages_to_remove;
2231 
2232 				pteval |= DMA_PTE_LARGE_PAGE;
2233 				pages_to_remove = min_t(unsigned long, nr_pages,
2234 							nr_pte_to_next_page(pte) * lvl_pages);
2235 				end_pfn = iov_pfn + pages_to_remove - 1;
2236 				switch_to_super_page(domain, iov_pfn, end_pfn, largepage_lvl);
2237 			} else {
2238 				pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2239 			}
2240 
2241 		}
2242 		/* We don't need lock here, nobody else
2243 		 * touches the iova range
2244 		 */
2245 		tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2246 		if (tmp) {
2247 			static int dumps = 5;
2248 			pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2249 				iov_pfn, tmp, (unsigned long long)pteval);
2250 			if (dumps) {
2251 				dumps--;
2252 				debug_dma_dump_mappings(NULL);
2253 			}
2254 			WARN_ON(1);
2255 		}
2256 
2257 		nr_pages -= lvl_pages;
2258 		iov_pfn += lvl_pages;
2259 		phys_pfn += lvl_pages;
2260 		pteval += lvl_pages * VTD_PAGE_SIZE;
2261 
2262 		/* If the next PTE would be the first in a new page, then we
2263 		 * need to flush the cache on the entries we've just written.
2264 		 * And then we'll need to recalculate 'pte', so clear it and
2265 		 * let it get set again in the if (!pte) block above.
2266 		 *
2267 		 * If we're done (!nr_pages) we need to flush the cache too.
2268 		 *
2269 		 * Also if we've been setting superpages, we may need to
2270 		 * recalculate 'pte' and switch back to smaller pages for the
2271 		 * end of the mapping, if the trailing size is not enough to
2272 		 * use another superpage (i.e. nr_pages < lvl_pages).
2273 		 */
2274 		pte++;
2275 		if (!nr_pages || first_pte_in_page(pte) ||
2276 		    (largepage_lvl > 1 && nr_pages < lvl_pages)) {
2277 			domain_flush_cache(domain, first_pte,
2278 					   (void *)pte - (void *)first_pte);
2279 			pte = NULL;
2280 		}
2281 	}
2282 
2283 	return 0;
2284 }
2285 
2286 static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 devfn)
2287 {
2288 	struct intel_iommu *iommu = info->iommu;
2289 	struct context_entry *context;
2290 	u16 did_old;
2291 
2292 	if (!iommu)
2293 		return;
2294 
2295 	spin_lock(&iommu->lock);
2296 	context = iommu_context_addr(iommu, bus, devfn, 0);
2297 	if (!context) {
2298 		spin_unlock(&iommu->lock);
2299 		return;
2300 	}
2301 
2302 	if (sm_supported(iommu)) {
2303 		if (hw_pass_through && domain_type_is_si(info->domain))
2304 			did_old = FLPT_DEFAULT_DID;
2305 		else
2306 			did_old = domain_id_iommu(info->domain, iommu);
2307 	} else {
2308 		did_old = context_domain_id(context);
2309 	}
2310 
2311 	context_clear_entry(context);
2312 	__iommu_flush_cache(iommu, context, sizeof(*context));
2313 	spin_unlock(&iommu->lock);
2314 	iommu->flush.flush_context(iommu,
2315 				   did_old,
2316 				   (((u16)bus) << 8) | devfn,
2317 				   DMA_CCMD_MASK_NOBIT,
2318 				   DMA_CCMD_DEVICE_INVL);
2319 
2320 	if (sm_supported(iommu))
2321 		qi_flush_pasid_cache(iommu, did_old, QI_PC_ALL_PASIDS, 0);
2322 
2323 	iommu->flush.flush_iotlb(iommu,
2324 				 did_old,
2325 				 0,
2326 				 0,
2327 				 DMA_TLB_DSI_FLUSH);
2328 
2329 	__iommu_flush_dev_iotlb(info, 0, MAX_AGAW_PFN_WIDTH);
2330 }
2331 
2332 static int domain_setup_first_level(struct intel_iommu *iommu,
2333 				    struct dmar_domain *domain,
2334 				    struct device *dev,
2335 				    u32 pasid)
2336 {
2337 	struct dma_pte *pgd = domain->pgd;
2338 	int agaw, level;
2339 	int flags = 0;
2340 
2341 	/*
2342 	 * Skip top levels of page tables for iommu which has
2343 	 * less agaw than default. Unnecessary for PT mode.
2344 	 */
2345 	for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2346 		pgd = phys_to_virt(dma_pte_addr(pgd));
2347 		if (!dma_pte_present(pgd))
2348 			return -ENOMEM;
2349 	}
2350 
2351 	level = agaw_to_level(agaw);
2352 	if (level != 4 && level != 5)
2353 		return -EINVAL;
2354 
2355 	if (pasid != PASID_RID2PASID)
2356 		flags |= PASID_FLAG_SUPERVISOR_MODE;
2357 	if (level == 5)
2358 		flags |= PASID_FLAG_FL5LP;
2359 
2360 	if (domain->force_snooping)
2361 		flags |= PASID_FLAG_PAGE_SNOOP;
2362 
2363 	return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid,
2364 					     domain_id_iommu(domain, iommu),
2365 					     flags);
2366 }
2367 
2368 static bool dev_is_real_dma_subdevice(struct device *dev)
2369 {
2370 	return dev && dev_is_pci(dev) &&
2371 	       pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev);
2372 }
2373 
2374 static int iommu_domain_identity_map(struct dmar_domain *domain,
2375 				     unsigned long first_vpfn,
2376 				     unsigned long last_vpfn)
2377 {
2378 	/*
2379 	 * RMRR range might have overlap with physical memory range,
2380 	 * clear it first
2381 	 */
2382 	dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2383 
2384 	return __domain_mapping(domain, first_vpfn,
2385 				first_vpfn, last_vpfn - first_vpfn + 1,
2386 				DMA_PTE_READ|DMA_PTE_WRITE);
2387 }
2388 
2389 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2390 
2391 static int __init si_domain_init(int hw)
2392 {
2393 	struct dmar_rmrr_unit *rmrr;
2394 	struct device *dev;
2395 	int i, nid, ret;
2396 
2397 	si_domain = alloc_domain(IOMMU_DOMAIN_IDENTITY);
2398 	if (!si_domain)
2399 		return -EFAULT;
2400 
2401 	if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2402 		domain_exit(si_domain);
2403 		return -EFAULT;
2404 	}
2405 
2406 	if (hw)
2407 		return 0;
2408 
2409 	for_each_online_node(nid) {
2410 		unsigned long start_pfn, end_pfn;
2411 		int i;
2412 
2413 		for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2414 			ret = iommu_domain_identity_map(si_domain,
2415 					mm_to_dma_pfn(start_pfn),
2416 					mm_to_dma_pfn(end_pfn));
2417 			if (ret)
2418 				return ret;
2419 		}
2420 	}
2421 
2422 	/*
2423 	 * Identity map the RMRRs so that devices with RMRRs could also use
2424 	 * the si_domain.
2425 	 */
2426 	for_each_rmrr_units(rmrr) {
2427 		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2428 					  i, dev) {
2429 			unsigned long long start = rmrr->base_address;
2430 			unsigned long long end = rmrr->end_address;
2431 
2432 			if (WARN_ON(end < start ||
2433 				    end >> agaw_to_width(si_domain->agaw)))
2434 				continue;
2435 
2436 			ret = iommu_domain_identity_map(si_domain,
2437 					mm_to_dma_pfn(start >> PAGE_SHIFT),
2438 					mm_to_dma_pfn(end >> PAGE_SHIFT));
2439 			if (ret)
2440 				return ret;
2441 		}
2442 	}
2443 
2444 	return 0;
2445 }
2446 
2447 static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2448 {
2449 	struct device_domain_info *info = dev_iommu_priv_get(dev);
2450 	struct intel_iommu *iommu;
2451 	unsigned long flags;
2452 	u8 bus, devfn;
2453 	int ret;
2454 
2455 	iommu = device_to_iommu(dev, &bus, &devfn);
2456 	if (!iommu)
2457 		return -ENODEV;
2458 
2459 	ret = domain_attach_iommu(domain, iommu);
2460 	if (ret)
2461 		return ret;
2462 	info->domain = domain;
2463 	spin_lock_irqsave(&domain->lock, flags);
2464 	list_add(&info->link, &domain->devices);
2465 	spin_unlock_irqrestore(&domain->lock, flags);
2466 
2467 	/* PASID table is mandatory for a PCI device in scalable mode. */
2468 	if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
2469 		ret = intel_pasid_alloc_table(dev);
2470 		if (ret) {
2471 			dev_err(dev, "PASID table allocation failed\n");
2472 			dmar_remove_one_dev_info(dev);
2473 			return ret;
2474 		}
2475 
2476 		/* Setup the PASID entry for requests without PASID: */
2477 		if (hw_pass_through && domain_type_is_si(domain))
2478 			ret = intel_pasid_setup_pass_through(iommu, domain,
2479 					dev, PASID_RID2PASID);
2480 		else if (domain_use_first_level(domain))
2481 			ret = domain_setup_first_level(iommu, domain, dev,
2482 					PASID_RID2PASID);
2483 		else
2484 			ret = intel_pasid_setup_second_level(iommu, domain,
2485 					dev, PASID_RID2PASID);
2486 		if (ret) {
2487 			dev_err(dev, "Setup RID2PASID failed\n");
2488 			dmar_remove_one_dev_info(dev);
2489 			return ret;
2490 		}
2491 	}
2492 
2493 	ret = domain_context_mapping(domain, dev);
2494 	if (ret) {
2495 		dev_err(dev, "Domain context map failed\n");
2496 		dmar_remove_one_dev_info(dev);
2497 		return ret;
2498 	}
2499 
2500 	return 0;
2501 }
2502 
2503 static bool device_has_rmrr(struct device *dev)
2504 {
2505 	struct dmar_rmrr_unit *rmrr;
2506 	struct device *tmp;
2507 	int i;
2508 
2509 	rcu_read_lock();
2510 	for_each_rmrr_units(rmrr) {
2511 		/*
2512 		 * Return TRUE if this RMRR contains the device that
2513 		 * is passed in.
2514 		 */
2515 		for_each_active_dev_scope(rmrr->devices,
2516 					  rmrr->devices_cnt, i, tmp)
2517 			if (tmp == dev ||
2518 			    is_downstream_to_pci_bridge(dev, tmp)) {
2519 				rcu_read_unlock();
2520 				return true;
2521 			}
2522 	}
2523 	rcu_read_unlock();
2524 	return false;
2525 }
2526 
2527 /**
2528  * device_rmrr_is_relaxable - Test whether the RMRR of this device
2529  * is relaxable (ie. is allowed to be not enforced under some conditions)
2530  * @dev: device handle
2531  *
2532  * We assume that PCI USB devices with RMRRs have them largely
2533  * for historical reasons and that the RMRR space is not actively used post
2534  * boot.  This exclusion may change if vendors begin to abuse it.
2535  *
2536  * The same exception is made for graphics devices, with the requirement that
2537  * any use of the RMRR regions will be torn down before assigning the device
2538  * to a guest.
2539  *
2540  * Return: true if the RMRR is relaxable, false otherwise
2541  */
2542 static bool device_rmrr_is_relaxable(struct device *dev)
2543 {
2544 	struct pci_dev *pdev;
2545 
2546 	if (!dev_is_pci(dev))
2547 		return false;
2548 
2549 	pdev = to_pci_dev(dev);
2550 	if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2551 		return true;
2552 	else
2553 		return false;
2554 }
2555 
2556 /*
2557  * There are a couple cases where we need to restrict the functionality of
2558  * devices associated with RMRRs.  The first is when evaluating a device for
2559  * identity mapping because problems exist when devices are moved in and out
2560  * of domains and their respective RMRR information is lost.  This means that
2561  * a device with associated RMRRs will never be in a "passthrough" domain.
2562  * The second is use of the device through the IOMMU API.  This interface
2563  * expects to have full control of the IOVA space for the device.  We cannot
2564  * satisfy both the requirement that RMRR access is maintained and have an
2565  * unencumbered IOVA space.  We also have no ability to quiesce the device's
2566  * use of the RMRR space or even inform the IOMMU API user of the restriction.
2567  * We therefore prevent devices associated with an RMRR from participating in
2568  * the IOMMU API, which eliminates them from device assignment.
2569  *
2570  * In both cases, devices which have relaxable RMRRs are not concerned by this
2571  * restriction. See device_rmrr_is_relaxable comment.
2572  */
2573 static bool device_is_rmrr_locked(struct device *dev)
2574 {
2575 	if (!device_has_rmrr(dev))
2576 		return false;
2577 
2578 	if (device_rmrr_is_relaxable(dev))
2579 		return false;
2580 
2581 	return true;
2582 }
2583 
2584 /*
2585  * Return the required default domain type for a specific device.
2586  *
2587  * @dev: the device in query
2588  * @startup: true if this is during early boot
2589  *
2590  * Returns:
2591  *  - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
2592  *  - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain
2593  *  - 0: both identity and dynamic domains work for this device
2594  */
2595 static int device_def_domain_type(struct device *dev)
2596 {
2597 	if (dev_is_pci(dev)) {
2598 		struct pci_dev *pdev = to_pci_dev(dev);
2599 
2600 		if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2601 			return IOMMU_DOMAIN_IDENTITY;
2602 
2603 		if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2604 			return IOMMU_DOMAIN_IDENTITY;
2605 	}
2606 
2607 	return 0;
2608 }
2609 
2610 static void intel_iommu_init_qi(struct intel_iommu *iommu)
2611 {
2612 	/*
2613 	 * Start from the sane iommu hardware state.
2614 	 * If the queued invalidation is already initialized by us
2615 	 * (for example, while enabling interrupt-remapping) then
2616 	 * we got the things already rolling from a sane state.
2617 	 */
2618 	if (!iommu->qi) {
2619 		/*
2620 		 * Clear any previous faults.
2621 		 */
2622 		dmar_fault(-1, iommu);
2623 		/*
2624 		 * Disable queued invalidation if supported and already enabled
2625 		 * before OS handover.
2626 		 */
2627 		dmar_disable_qi(iommu);
2628 	}
2629 
2630 	if (dmar_enable_qi(iommu)) {
2631 		/*
2632 		 * Queued Invalidate not enabled, use Register Based Invalidate
2633 		 */
2634 		iommu->flush.flush_context = __iommu_flush_context;
2635 		iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2636 		pr_info("%s: Using Register based invalidation\n",
2637 			iommu->name);
2638 	} else {
2639 		iommu->flush.flush_context = qi_flush_context;
2640 		iommu->flush.flush_iotlb = qi_flush_iotlb;
2641 		pr_info("%s: Using Queued invalidation\n", iommu->name);
2642 	}
2643 }
2644 
2645 static int copy_context_table(struct intel_iommu *iommu,
2646 			      struct root_entry *old_re,
2647 			      struct context_entry **tbl,
2648 			      int bus, bool ext)
2649 {
2650 	int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
2651 	struct context_entry *new_ce = NULL, ce;
2652 	struct context_entry *old_ce = NULL;
2653 	struct root_entry re;
2654 	phys_addr_t old_ce_phys;
2655 
2656 	tbl_idx = ext ? bus * 2 : bus;
2657 	memcpy(&re, old_re, sizeof(re));
2658 
2659 	for (devfn = 0; devfn < 256; devfn++) {
2660 		/* First calculate the correct index */
2661 		idx = (ext ? devfn * 2 : devfn) % 256;
2662 
2663 		if (idx == 0) {
2664 			/* First save what we may have and clean up */
2665 			if (new_ce) {
2666 				tbl[tbl_idx] = new_ce;
2667 				__iommu_flush_cache(iommu, new_ce,
2668 						    VTD_PAGE_SIZE);
2669 				pos = 1;
2670 			}
2671 
2672 			if (old_ce)
2673 				memunmap(old_ce);
2674 
2675 			ret = 0;
2676 			if (devfn < 0x80)
2677 				old_ce_phys = root_entry_lctp(&re);
2678 			else
2679 				old_ce_phys = root_entry_uctp(&re);
2680 
2681 			if (!old_ce_phys) {
2682 				if (ext && devfn == 0) {
2683 					/* No LCTP, try UCTP */
2684 					devfn = 0x7f;
2685 					continue;
2686 				} else {
2687 					goto out;
2688 				}
2689 			}
2690 
2691 			ret = -ENOMEM;
2692 			old_ce = memremap(old_ce_phys, PAGE_SIZE,
2693 					MEMREMAP_WB);
2694 			if (!old_ce)
2695 				goto out;
2696 
2697 			new_ce = alloc_pgtable_page(iommu->node);
2698 			if (!new_ce)
2699 				goto out_unmap;
2700 
2701 			ret = 0;
2702 		}
2703 
2704 		/* Now copy the context entry */
2705 		memcpy(&ce, old_ce + idx, sizeof(ce));
2706 
2707 		if (!context_present(&ce))
2708 			continue;
2709 
2710 		did = context_domain_id(&ce);
2711 		if (did >= 0 && did < cap_ndoms(iommu->cap))
2712 			set_bit(did, iommu->domain_ids);
2713 
2714 		set_context_copied(iommu, bus, devfn);
2715 		new_ce[idx] = ce;
2716 	}
2717 
2718 	tbl[tbl_idx + pos] = new_ce;
2719 
2720 	__iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
2721 
2722 out_unmap:
2723 	memunmap(old_ce);
2724 
2725 out:
2726 	return ret;
2727 }
2728 
2729 static int copy_translation_tables(struct intel_iommu *iommu)
2730 {
2731 	struct context_entry **ctxt_tbls;
2732 	struct root_entry *old_rt;
2733 	phys_addr_t old_rt_phys;
2734 	int ctxt_table_entries;
2735 	u64 rtaddr_reg;
2736 	int bus, ret;
2737 	bool new_ext, ext;
2738 
2739 	rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
2740 	ext        = !!(rtaddr_reg & DMA_RTADDR_SMT);
2741 	new_ext    = !!sm_supported(iommu);
2742 
2743 	/*
2744 	 * The RTT bit can only be changed when translation is disabled,
2745 	 * but disabling translation means to open a window for data
2746 	 * corruption. So bail out and don't copy anything if we would
2747 	 * have to change the bit.
2748 	 */
2749 	if (new_ext != ext)
2750 		return -EINVAL;
2751 
2752 	iommu->copied_tables = bitmap_zalloc(BIT_ULL(16), GFP_KERNEL);
2753 	if (!iommu->copied_tables)
2754 		return -ENOMEM;
2755 
2756 	old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
2757 	if (!old_rt_phys)
2758 		return -EINVAL;
2759 
2760 	old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
2761 	if (!old_rt)
2762 		return -ENOMEM;
2763 
2764 	/* This is too big for the stack - allocate it from slab */
2765 	ctxt_table_entries = ext ? 512 : 256;
2766 	ret = -ENOMEM;
2767 	ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
2768 	if (!ctxt_tbls)
2769 		goto out_unmap;
2770 
2771 	for (bus = 0; bus < 256; bus++) {
2772 		ret = copy_context_table(iommu, &old_rt[bus],
2773 					 ctxt_tbls, bus, ext);
2774 		if (ret) {
2775 			pr_err("%s: Failed to copy context table for bus %d\n",
2776 				iommu->name, bus);
2777 			continue;
2778 		}
2779 	}
2780 
2781 	spin_lock(&iommu->lock);
2782 
2783 	/* Context tables are copied, now write them to the root_entry table */
2784 	for (bus = 0; bus < 256; bus++) {
2785 		int idx = ext ? bus * 2 : bus;
2786 		u64 val;
2787 
2788 		if (ctxt_tbls[idx]) {
2789 			val = virt_to_phys(ctxt_tbls[idx]) | 1;
2790 			iommu->root_entry[bus].lo = val;
2791 		}
2792 
2793 		if (!ext || !ctxt_tbls[idx + 1])
2794 			continue;
2795 
2796 		val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
2797 		iommu->root_entry[bus].hi = val;
2798 	}
2799 
2800 	spin_unlock(&iommu->lock);
2801 
2802 	kfree(ctxt_tbls);
2803 
2804 	__iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
2805 
2806 	ret = 0;
2807 
2808 out_unmap:
2809 	memunmap(old_rt);
2810 
2811 	return ret;
2812 }
2813 
2814 #ifdef CONFIG_INTEL_IOMMU_SVM
2815 static ioasid_t intel_vcmd_ioasid_alloc(ioasid_t min, ioasid_t max, void *data)
2816 {
2817 	struct intel_iommu *iommu = data;
2818 	ioasid_t ioasid;
2819 
2820 	if (!iommu)
2821 		return INVALID_IOASID;
2822 	/*
2823 	 * VT-d virtual command interface always uses the full 20 bit
2824 	 * PASID range. Host can partition guest PASID range based on
2825 	 * policies but it is out of guest's control.
2826 	 */
2827 	if (min < PASID_MIN || max > intel_pasid_max_id)
2828 		return INVALID_IOASID;
2829 
2830 	if (vcmd_alloc_pasid(iommu, &ioasid))
2831 		return INVALID_IOASID;
2832 
2833 	return ioasid;
2834 }
2835 
2836 static void intel_vcmd_ioasid_free(ioasid_t ioasid, void *data)
2837 {
2838 	struct intel_iommu *iommu = data;
2839 
2840 	if (!iommu)
2841 		return;
2842 	/*
2843 	 * Sanity check the ioasid owner is done at upper layer, e.g. VFIO
2844 	 * We can only free the PASID when all the devices are unbound.
2845 	 */
2846 	if (ioasid_find(NULL, ioasid, NULL)) {
2847 		pr_alert("Cannot free active IOASID %d\n", ioasid);
2848 		return;
2849 	}
2850 	vcmd_free_pasid(iommu, ioasid);
2851 }
2852 
2853 static void register_pasid_allocator(struct intel_iommu *iommu)
2854 {
2855 	/*
2856 	 * If we are running in the host, no need for custom allocator
2857 	 * in that PASIDs are allocated from the host system-wide.
2858 	 */
2859 	if (!cap_caching_mode(iommu->cap))
2860 		return;
2861 
2862 	if (!sm_supported(iommu)) {
2863 		pr_warn("VT-d Scalable Mode not enabled, no PASID allocation\n");
2864 		return;
2865 	}
2866 
2867 	/*
2868 	 * Register a custom PASID allocator if we are running in a guest,
2869 	 * guest PASID must be obtained via virtual command interface.
2870 	 * There can be multiple vIOMMUs in each guest but only one allocator
2871 	 * is active. All vIOMMU allocators will eventually be calling the same
2872 	 * host allocator.
2873 	 */
2874 	if (!vccap_pasid(iommu->vccap))
2875 		return;
2876 
2877 	pr_info("Register custom PASID allocator\n");
2878 	iommu->pasid_allocator.alloc = intel_vcmd_ioasid_alloc;
2879 	iommu->pasid_allocator.free = intel_vcmd_ioasid_free;
2880 	iommu->pasid_allocator.pdata = (void *)iommu;
2881 	if (ioasid_register_allocator(&iommu->pasid_allocator)) {
2882 		pr_warn("Custom PASID allocator failed, scalable mode disabled\n");
2883 		/*
2884 		 * Disable scalable mode on this IOMMU if there
2885 		 * is no custom allocator. Mixing SM capable vIOMMU
2886 		 * and non-SM vIOMMU are not supported.
2887 		 */
2888 		intel_iommu_sm = 0;
2889 	}
2890 }
2891 #endif
2892 
2893 static int __init init_dmars(void)
2894 {
2895 	struct dmar_drhd_unit *drhd;
2896 	struct intel_iommu *iommu;
2897 	int ret;
2898 
2899 	ret = intel_cap_audit(CAP_AUDIT_STATIC_DMAR, NULL);
2900 	if (ret)
2901 		goto free_iommu;
2902 
2903 	for_each_iommu(iommu, drhd) {
2904 		if (drhd->ignored) {
2905 			iommu_disable_translation(iommu);
2906 			continue;
2907 		}
2908 
2909 		/*
2910 		 * Find the max pasid size of all IOMMU's in the system.
2911 		 * We need to ensure the system pasid table is no bigger
2912 		 * than the smallest supported.
2913 		 */
2914 		if (pasid_supported(iommu)) {
2915 			u32 temp = 2 << ecap_pss(iommu->ecap);
2916 
2917 			intel_pasid_max_id = min_t(u32, temp,
2918 						   intel_pasid_max_id);
2919 		}
2920 
2921 		intel_iommu_init_qi(iommu);
2922 
2923 		ret = iommu_init_domains(iommu);
2924 		if (ret)
2925 			goto free_iommu;
2926 
2927 		init_translation_status(iommu);
2928 
2929 		if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
2930 			iommu_disable_translation(iommu);
2931 			clear_translation_pre_enabled(iommu);
2932 			pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
2933 				iommu->name);
2934 		}
2935 
2936 		/*
2937 		 * TBD:
2938 		 * we could share the same root & context tables
2939 		 * among all IOMMU's. Need to Split it later.
2940 		 */
2941 		ret = iommu_alloc_root_entry(iommu);
2942 		if (ret)
2943 			goto free_iommu;
2944 
2945 		if (translation_pre_enabled(iommu)) {
2946 			pr_info("Translation already enabled - trying to copy translation structures\n");
2947 
2948 			ret = copy_translation_tables(iommu);
2949 			if (ret) {
2950 				/*
2951 				 * We found the IOMMU with translation
2952 				 * enabled - but failed to copy over the
2953 				 * old root-entry table. Try to proceed
2954 				 * by disabling translation now and
2955 				 * allocating a clean root-entry table.
2956 				 * This might cause DMAR faults, but
2957 				 * probably the dump will still succeed.
2958 				 */
2959 				pr_err("Failed to copy translation tables from previous kernel for %s\n",
2960 				       iommu->name);
2961 				iommu_disable_translation(iommu);
2962 				clear_translation_pre_enabled(iommu);
2963 			} else {
2964 				pr_info("Copied translation tables from previous kernel for %s\n",
2965 					iommu->name);
2966 			}
2967 		}
2968 
2969 		if (!ecap_pass_through(iommu->ecap))
2970 			hw_pass_through = 0;
2971 		intel_svm_check(iommu);
2972 	}
2973 
2974 	/*
2975 	 * Now that qi is enabled on all iommus, set the root entry and flush
2976 	 * caches. This is required on some Intel X58 chipsets, otherwise the
2977 	 * flush_context function will loop forever and the boot hangs.
2978 	 */
2979 	for_each_active_iommu(iommu, drhd) {
2980 		iommu_flush_write_buffer(iommu);
2981 #ifdef CONFIG_INTEL_IOMMU_SVM
2982 		register_pasid_allocator(iommu);
2983 #endif
2984 		iommu_set_root_entry(iommu);
2985 	}
2986 
2987 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
2988 	dmar_map_gfx = 0;
2989 #endif
2990 
2991 	if (!dmar_map_gfx)
2992 		iommu_identity_mapping |= IDENTMAP_GFX;
2993 
2994 	check_tylersburg_isoch();
2995 
2996 	ret = si_domain_init(hw_pass_through);
2997 	if (ret)
2998 		goto free_iommu;
2999 
3000 	/*
3001 	 * for each drhd
3002 	 *   enable fault log
3003 	 *   global invalidate context cache
3004 	 *   global invalidate iotlb
3005 	 *   enable translation
3006 	 */
3007 	for_each_iommu(iommu, drhd) {
3008 		if (drhd->ignored) {
3009 			/*
3010 			 * we always have to disable PMRs or DMA may fail on
3011 			 * this device
3012 			 */
3013 			if (force_on)
3014 				iommu_disable_protect_mem_regions(iommu);
3015 			continue;
3016 		}
3017 
3018 		iommu_flush_write_buffer(iommu);
3019 
3020 #ifdef CONFIG_INTEL_IOMMU_SVM
3021 		if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
3022 			/*
3023 			 * Call dmar_alloc_hwirq() with dmar_global_lock held,
3024 			 * could cause possible lock race condition.
3025 			 */
3026 			up_write(&dmar_global_lock);
3027 			ret = intel_svm_enable_prq(iommu);
3028 			down_write(&dmar_global_lock);
3029 			if (ret)
3030 				goto free_iommu;
3031 		}
3032 #endif
3033 		ret = dmar_set_interrupt(iommu);
3034 		if (ret)
3035 			goto free_iommu;
3036 	}
3037 
3038 	return 0;
3039 
3040 free_iommu:
3041 	for_each_active_iommu(iommu, drhd) {
3042 		disable_dmar_iommu(iommu);
3043 		free_dmar_iommu(iommu);
3044 	}
3045 
3046 	return ret;
3047 }
3048 
3049 static void __init init_no_remapping_devices(void)
3050 {
3051 	struct dmar_drhd_unit *drhd;
3052 	struct device *dev;
3053 	int i;
3054 
3055 	for_each_drhd_unit(drhd) {
3056 		if (!drhd->include_all) {
3057 			for_each_active_dev_scope(drhd->devices,
3058 						  drhd->devices_cnt, i, dev)
3059 				break;
3060 			/* ignore DMAR unit if no devices exist */
3061 			if (i == drhd->devices_cnt)
3062 				drhd->ignored = 1;
3063 		}
3064 	}
3065 
3066 	for_each_active_drhd_unit(drhd) {
3067 		if (drhd->include_all)
3068 			continue;
3069 
3070 		for_each_active_dev_scope(drhd->devices,
3071 					  drhd->devices_cnt, i, dev)
3072 			if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
3073 				break;
3074 		if (i < drhd->devices_cnt)
3075 			continue;
3076 
3077 		/* This IOMMU has *only* gfx devices. Either bypass it or
3078 		   set the gfx_mapped flag, as appropriate */
3079 		drhd->gfx_dedicated = 1;
3080 		if (!dmar_map_gfx)
3081 			drhd->ignored = 1;
3082 	}
3083 }
3084 
3085 #ifdef CONFIG_SUSPEND
3086 static int init_iommu_hw(void)
3087 {
3088 	struct dmar_drhd_unit *drhd;
3089 	struct intel_iommu *iommu = NULL;
3090 
3091 	for_each_active_iommu(iommu, drhd)
3092 		if (iommu->qi)
3093 			dmar_reenable_qi(iommu);
3094 
3095 	for_each_iommu(iommu, drhd) {
3096 		if (drhd->ignored) {
3097 			/*
3098 			 * we always have to disable PMRs or DMA may fail on
3099 			 * this device
3100 			 */
3101 			if (force_on)
3102 				iommu_disable_protect_mem_regions(iommu);
3103 			continue;
3104 		}
3105 
3106 		iommu_flush_write_buffer(iommu);
3107 		iommu_set_root_entry(iommu);
3108 		iommu_enable_translation(iommu);
3109 		iommu_disable_protect_mem_regions(iommu);
3110 	}
3111 
3112 	return 0;
3113 }
3114 
3115 static void iommu_flush_all(void)
3116 {
3117 	struct dmar_drhd_unit *drhd;
3118 	struct intel_iommu *iommu;
3119 
3120 	for_each_active_iommu(iommu, drhd) {
3121 		iommu->flush.flush_context(iommu, 0, 0, 0,
3122 					   DMA_CCMD_GLOBAL_INVL);
3123 		iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3124 					 DMA_TLB_GLOBAL_FLUSH);
3125 	}
3126 }
3127 
3128 static int iommu_suspend(void)
3129 {
3130 	struct dmar_drhd_unit *drhd;
3131 	struct intel_iommu *iommu = NULL;
3132 	unsigned long flag;
3133 
3134 	for_each_active_iommu(iommu, drhd) {
3135 		iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
3136 					     GFP_KERNEL);
3137 		if (!iommu->iommu_state)
3138 			goto nomem;
3139 	}
3140 
3141 	iommu_flush_all();
3142 
3143 	for_each_active_iommu(iommu, drhd) {
3144 		iommu_disable_translation(iommu);
3145 
3146 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
3147 
3148 		iommu->iommu_state[SR_DMAR_FECTL_REG] =
3149 			readl(iommu->reg + DMAR_FECTL_REG);
3150 		iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3151 			readl(iommu->reg + DMAR_FEDATA_REG);
3152 		iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3153 			readl(iommu->reg + DMAR_FEADDR_REG);
3154 		iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3155 			readl(iommu->reg + DMAR_FEUADDR_REG);
3156 
3157 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3158 	}
3159 	return 0;
3160 
3161 nomem:
3162 	for_each_active_iommu(iommu, drhd)
3163 		kfree(iommu->iommu_state);
3164 
3165 	return -ENOMEM;
3166 }
3167 
3168 static void iommu_resume(void)
3169 {
3170 	struct dmar_drhd_unit *drhd;
3171 	struct intel_iommu *iommu = NULL;
3172 	unsigned long flag;
3173 
3174 	if (init_iommu_hw()) {
3175 		if (force_on)
3176 			panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3177 		else
3178 			WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3179 		return;
3180 	}
3181 
3182 	for_each_active_iommu(iommu, drhd) {
3183 
3184 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
3185 
3186 		writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3187 			iommu->reg + DMAR_FECTL_REG);
3188 		writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3189 			iommu->reg + DMAR_FEDATA_REG);
3190 		writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3191 			iommu->reg + DMAR_FEADDR_REG);
3192 		writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3193 			iommu->reg + DMAR_FEUADDR_REG);
3194 
3195 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3196 	}
3197 
3198 	for_each_active_iommu(iommu, drhd)
3199 		kfree(iommu->iommu_state);
3200 }
3201 
3202 static struct syscore_ops iommu_syscore_ops = {
3203 	.resume		= iommu_resume,
3204 	.suspend	= iommu_suspend,
3205 };
3206 
3207 static void __init init_iommu_pm_ops(void)
3208 {
3209 	register_syscore_ops(&iommu_syscore_ops);
3210 }
3211 
3212 #else
3213 static inline void init_iommu_pm_ops(void) {}
3214 #endif	/* CONFIG_PM */
3215 
3216 static int __init rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
3217 {
3218 	if (!IS_ALIGNED(rmrr->base_address, PAGE_SIZE) ||
3219 	    !IS_ALIGNED(rmrr->end_address + 1, PAGE_SIZE) ||
3220 	    rmrr->end_address <= rmrr->base_address ||
3221 	    arch_rmrr_sanity_check(rmrr))
3222 		return -EINVAL;
3223 
3224 	return 0;
3225 }
3226 
3227 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
3228 {
3229 	struct acpi_dmar_reserved_memory *rmrr;
3230 	struct dmar_rmrr_unit *rmrru;
3231 
3232 	rmrr = (struct acpi_dmar_reserved_memory *)header;
3233 	if (rmrr_sanity_check(rmrr)) {
3234 		pr_warn(FW_BUG
3235 			   "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n"
3236 			   "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
3237 			   rmrr->base_address, rmrr->end_address,
3238 			   dmi_get_system_info(DMI_BIOS_VENDOR),
3239 			   dmi_get_system_info(DMI_BIOS_VERSION),
3240 			   dmi_get_system_info(DMI_PRODUCT_VERSION));
3241 		add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
3242 	}
3243 
3244 	rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3245 	if (!rmrru)
3246 		goto out;
3247 
3248 	rmrru->hdr = header;
3249 
3250 	rmrru->base_address = rmrr->base_address;
3251 	rmrru->end_address = rmrr->end_address;
3252 
3253 	rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3254 				((void *)rmrr) + rmrr->header.length,
3255 				&rmrru->devices_cnt);
3256 	if (rmrru->devices_cnt && rmrru->devices == NULL)
3257 		goto free_rmrru;
3258 
3259 	list_add(&rmrru->list, &dmar_rmrr_units);
3260 
3261 	return 0;
3262 free_rmrru:
3263 	kfree(rmrru);
3264 out:
3265 	return -ENOMEM;
3266 }
3267 
3268 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
3269 {
3270 	struct dmar_atsr_unit *atsru;
3271 	struct acpi_dmar_atsr *tmp;
3272 
3273 	list_for_each_entry_rcu(atsru, &dmar_atsr_units, list,
3274 				dmar_rcu_check()) {
3275 		tmp = (struct acpi_dmar_atsr *)atsru->hdr;
3276 		if (atsr->segment != tmp->segment)
3277 			continue;
3278 		if (atsr->header.length != tmp->header.length)
3279 			continue;
3280 		if (memcmp(atsr, tmp, atsr->header.length) == 0)
3281 			return atsru;
3282 	}
3283 
3284 	return NULL;
3285 }
3286 
3287 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3288 {
3289 	struct acpi_dmar_atsr *atsr;
3290 	struct dmar_atsr_unit *atsru;
3291 
3292 	if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
3293 		return 0;
3294 
3295 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3296 	atsru = dmar_find_atsr(atsr);
3297 	if (atsru)
3298 		return 0;
3299 
3300 	atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
3301 	if (!atsru)
3302 		return -ENOMEM;
3303 
3304 	/*
3305 	 * If memory is allocated from slab by ACPI _DSM method, we need to
3306 	 * copy the memory content because the memory buffer will be freed
3307 	 * on return.
3308 	 */
3309 	atsru->hdr = (void *)(atsru + 1);
3310 	memcpy(atsru->hdr, hdr, hdr->length);
3311 	atsru->include_all = atsr->flags & 0x1;
3312 	if (!atsru->include_all) {
3313 		atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
3314 				(void *)atsr + atsr->header.length,
3315 				&atsru->devices_cnt);
3316 		if (atsru->devices_cnt && atsru->devices == NULL) {
3317 			kfree(atsru);
3318 			return -ENOMEM;
3319 		}
3320 	}
3321 
3322 	list_add_rcu(&atsru->list, &dmar_atsr_units);
3323 
3324 	return 0;
3325 }
3326 
3327 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3328 {
3329 	dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3330 	kfree(atsru);
3331 }
3332 
3333 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3334 {
3335 	struct acpi_dmar_atsr *atsr;
3336 	struct dmar_atsr_unit *atsru;
3337 
3338 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3339 	atsru = dmar_find_atsr(atsr);
3340 	if (atsru) {
3341 		list_del_rcu(&atsru->list);
3342 		synchronize_rcu();
3343 		intel_iommu_free_atsr(atsru);
3344 	}
3345 
3346 	return 0;
3347 }
3348 
3349 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3350 {
3351 	int i;
3352 	struct device *dev;
3353 	struct acpi_dmar_atsr *atsr;
3354 	struct dmar_atsr_unit *atsru;
3355 
3356 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3357 	atsru = dmar_find_atsr(atsr);
3358 	if (!atsru)
3359 		return 0;
3360 
3361 	if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
3362 		for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
3363 					  i, dev)
3364 			return -EBUSY;
3365 	}
3366 
3367 	return 0;
3368 }
3369 
3370 static struct dmar_satc_unit *dmar_find_satc(struct acpi_dmar_satc *satc)
3371 {
3372 	struct dmar_satc_unit *satcu;
3373 	struct acpi_dmar_satc *tmp;
3374 
3375 	list_for_each_entry_rcu(satcu, &dmar_satc_units, list,
3376 				dmar_rcu_check()) {
3377 		tmp = (struct acpi_dmar_satc *)satcu->hdr;
3378 		if (satc->segment != tmp->segment)
3379 			continue;
3380 		if (satc->header.length != tmp->header.length)
3381 			continue;
3382 		if (memcmp(satc, tmp, satc->header.length) == 0)
3383 			return satcu;
3384 	}
3385 
3386 	return NULL;
3387 }
3388 
3389 int dmar_parse_one_satc(struct acpi_dmar_header *hdr, void *arg)
3390 {
3391 	struct acpi_dmar_satc *satc;
3392 	struct dmar_satc_unit *satcu;
3393 
3394 	if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
3395 		return 0;
3396 
3397 	satc = container_of(hdr, struct acpi_dmar_satc, header);
3398 	satcu = dmar_find_satc(satc);
3399 	if (satcu)
3400 		return 0;
3401 
3402 	satcu = kzalloc(sizeof(*satcu) + hdr->length, GFP_KERNEL);
3403 	if (!satcu)
3404 		return -ENOMEM;
3405 
3406 	satcu->hdr = (void *)(satcu + 1);
3407 	memcpy(satcu->hdr, hdr, hdr->length);
3408 	satcu->atc_required = satc->flags & 0x1;
3409 	satcu->devices = dmar_alloc_dev_scope((void *)(satc + 1),
3410 					      (void *)satc + satc->header.length,
3411 					      &satcu->devices_cnt);
3412 	if (satcu->devices_cnt && !satcu->devices) {
3413 		kfree(satcu);
3414 		return -ENOMEM;
3415 	}
3416 	list_add_rcu(&satcu->list, &dmar_satc_units);
3417 
3418 	return 0;
3419 }
3420 
3421 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
3422 {
3423 	int sp, ret;
3424 	struct intel_iommu *iommu = dmaru->iommu;
3425 
3426 	ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_DMAR, iommu);
3427 	if (ret)
3428 		goto out;
3429 
3430 	if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
3431 		pr_warn("%s: Doesn't support hardware pass through.\n",
3432 			iommu->name);
3433 		return -ENXIO;
3434 	}
3435 
3436 	sp = domain_update_iommu_superpage(NULL, iommu) - 1;
3437 	if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
3438 		pr_warn("%s: Doesn't support large page.\n",
3439 			iommu->name);
3440 		return -ENXIO;
3441 	}
3442 
3443 	/*
3444 	 * Disable translation if already enabled prior to OS handover.
3445 	 */
3446 	if (iommu->gcmd & DMA_GCMD_TE)
3447 		iommu_disable_translation(iommu);
3448 
3449 	ret = iommu_init_domains(iommu);
3450 	if (ret == 0)
3451 		ret = iommu_alloc_root_entry(iommu);
3452 	if (ret)
3453 		goto out;
3454 
3455 	intel_svm_check(iommu);
3456 
3457 	if (dmaru->ignored) {
3458 		/*
3459 		 * we always have to disable PMRs or DMA may fail on this device
3460 		 */
3461 		if (force_on)
3462 			iommu_disable_protect_mem_regions(iommu);
3463 		return 0;
3464 	}
3465 
3466 	intel_iommu_init_qi(iommu);
3467 	iommu_flush_write_buffer(iommu);
3468 
3469 #ifdef CONFIG_INTEL_IOMMU_SVM
3470 	if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
3471 		ret = intel_svm_enable_prq(iommu);
3472 		if (ret)
3473 			goto disable_iommu;
3474 	}
3475 #endif
3476 	ret = dmar_set_interrupt(iommu);
3477 	if (ret)
3478 		goto disable_iommu;
3479 
3480 	iommu_set_root_entry(iommu);
3481 	iommu_enable_translation(iommu);
3482 
3483 	iommu_disable_protect_mem_regions(iommu);
3484 	return 0;
3485 
3486 disable_iommu:
3487 	disable_dmar_iommu(iommu);
3488 out:
3489 	free_dmar_iommu(iommu);
3490 	return ret;
3491 }
3492 
3493 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
3494 {
3495 	int ret = 0;
3496 	struct intel_iommu *iommu = dmaru->iommu;
3497 
3498 	if (!intel_iommu_enabled)
3499 		return 0;
3500 	if (iommu == NULL)
3501 		return -EINVAL;
3502 
3503 	if (insert) {
3504 		ret = intel_iommu_add(dmaru);
3505 	} else {
3506 		disable_dmar_iommu(iommu);
3507 		free_dmar_iommu(iommu);
3508 	}
3509 
3510 	return ret;
3511 }
3512 
3513 static void intel_iommu_free_dmars(void)
3514 {
3515 	struct dmar_rmrr_unit *rmrru, *rmrr_n;
3516 	struct dmar_atsr_unit *atsru, *atsr_n;
3517 	struct dmar_satc_unit *satcu, *satc_n;
3518 
3519 	list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3520 		list_del(&rmrru->list);
3521 		dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3522 		kfree(rmrru);
3523 	}
3524 
3525 	list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3526 		list_del(&atsru->list);
3527 		intel_iommu_free_atsr(atsru);
3528 	}
3529 	list_for_each_entry_safe(satcu, satc_n, &dmar_satc_units, list) {
3530 		list_del(&satcu->list);
3531 		dmar_free_dev_scope(&satcu->devices, &satcu->devices_cnt);
3532 		kfree(satcu);
3533 	}
3534 }
3535 
3536 static struct dmar_satc_unit *dmar_find_matched_satc_unit(struct pci_dev *dev)
3537 {
3538 	struct dmar_satc_unit *satcu;
3539 	struct acpi_dmar_satc *satc;
3540 	struct device *tmp;
3541 	int i;
3542 
3543 	dev = pci_physfn(dev);
3544 	rcu_read_lock();
3545 
3546 	list_for_each_entry_rcu(satcu, &dmar_satc_units, list) {
3547 		satc = container_of(satcu->hdr, struct acpi_dmar_satc, header);
3548 		if (satc->segment != pci_domain_nr(dev->bus))
3549 			continue;
3550 		for_each_dev_scope(satcu->devices, satcu->devices_cnt, i, tmp)
3551 			if (to_pci_dev(tmp) == dev)
3552 				goto out;
3553 	}
3554 	satcu = NULL;
3555 out:
3556 	rcu_read_unlock();
3557 	return satcu;
3558 }
3559 
3560 static int dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu)
3561 {
3562 	int i, ret = 1;
3563 	struct pci_bus *bus;
3564 	struct pci_dev *bridge = NULL;
3565 	struct device *tmp;
3566 	struct acpi_dmar_atsr *atsr;
3567 	struct dmar_atsr_unit *atsru;
3568 	struct dmar_satc_unit *satcu;
3569 
3570 	dev = pci_physfn(dev);
3571 	satcu = dmar_find_matched_satc_unit(dev);
3572 	if (satcu)
3573 		/*
3574 		 * This device supports ATS as it is in SATC table.
3575 		 * When IOMMU is in legacy mode, enabling ATS is done
3576 		 * automatically by HW for the device that requires
3577 		 * ATS, hence OS should not enable this device ATS
3578 		 * to avoid duplicated TLB invalidation.
3579 		 */
3580 		return !(satcu->atc_required && !sm_supported(iommu));
3581 
3582 	for (bus = dev->bus; bus; bus = bus->parent) {
3583 		bridge = bus->self;
3584 		/* If it's an integrated device, allow ATS */
3585 		if (!bridge)
3586 			return 1;
3587 		/* Connected via non-PCIe: no ATS */
3588 		if (!pci_is_pcie(bridge) ||
3589 		    pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
3590 			return 0;
3591 		/* If we found the root port, look it up in the ATSR */
3592 		if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
3593 			break;
3594 	}
3595 
3596 	rcu_read_lock();
3597 	list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3598 		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3599 		if (atsr->segment != pci_domain_nr(dev->bus))
3600 			continue;
3601 
3602 		for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
3603 			if (tmp == &bridge->dev)
3604 				goto out;
3605 
3606 		if (atsru->include_all)
3607 			goto out;
3608 	}
3609 	ret = 0;
3610 out:
3611 	rcu_read_unlock();
3612 
3613 	return ret;
3614 }
3615 
3616 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
3617 {
3618 	int ret;
3619 	struct dmar_rmrr_unit *rmrru;
3620 	struct dmar_atsr_unit *atsru;
3621 	struct dmar_satc_unit *satcu;
3622 	struct acpi_dmar_atsr *atsr;
3623 	struct acpi_dmar_reserved_memory *rmrr;
3624 	struct acpi_dmar_satc *satc;
3625 
3626 	if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
3627 		return 0;
3628 
3629 	list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
3630 		rmrr = container_of(rmrru->hdr,
3631 				    struct acpi_dmar_reserved_memory, header);
3632 		if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3633 			ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
3634 				((void *)rmrr) + rmrr->header.length,
3635 				rmrr->segment, rmrru->devices,
3636 				rmrru->devices_cnt);
3637 			if (ret < 0)
3638 				return ret;
3639 		} else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
3640 			dmar_remove_dev_scope(info, rmrr->segment,
3641 				rmrru->devices, rmrru->devices_cnt);
3642 		}
3643 	}
3644 
3645 	list_for_each_entry(atsru, &dmar_atsr_units, list) {
3646 		if (atsru->include_all)
3647 			continue;
3648 
3649 		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3650 		if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3651 			ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
3652 					(void *)atsr + atsr->header.length,
3653 					atsr->segment, atsru->devices,
3654 					atsru->devices_cnt);
3655 			if (ret > 0)
3656 				break;
3657 			else if (ret < 0)
3658 				return ret;
3659 		} else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
3660 			if (dmar_remove_dev_scope(info, atsr->segment,
3661 					atsru->devices, atsru->devices_cnt))
3662 				break;
3663 		}
3664 	}
3665 	list_for_each_entry(satcu, &dmar_satc_units, list) {
3666 		satc = container_of(satcu->hdr, struct acpi_dmar_satc, header);
3667 		if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3668 			ret = dmar_insert_dev_scope(info, (void *)(satc + 1),
3669 					(void *)satc + satc->header.length,
3670 					satc->segment, satcu->devices,
3671 					satcu->devices_cnt);
3672 			if (ret > 0)
3673 				break;
3674 			else if (ret < 0)
3675 				return ret;
3676 		} else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
3677 			if (dmar_remove_dev_scope(info, satc->segment,
3678 					satcu->devices, satcu->devices_cnt))
3679 				break;
3680 		}
3681 	}
3682 
3683 	return 0;
3684 }
3685 
3686 static int intel_iommu_memory_notifier(struct notifier_block *nb,
3687 				       unsigned long val, void *v)
3688 {
3689 	struct memory_notify *mhp = v;
3690 	unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
3691 	unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn +
3692 			mhp->nr_pages - 1);
3693 
3694 	switch (val) {
3695 	case MEM_GOING_ONLINE:
3696 		if (iommu_domain_identity_map(si_domain,
3697 					      start_vpfn, last_vpfn)) {
3698 			pr_warn("Failed to build identity map for [%lx-%lx]\n",
3699 				start_vpfn, last_vpfn);
3700 			return NOTIFY_BAD;
3701 		}
3702 		break;
3703 
3704 	case MEM_OFFLINE:
3705 	case MEM_CANCEL_ONLINE:
3706 		{
3707 			struct dmar_drhd_unit *drhd;
3708 			struct intel_iommu *iommu;
3709 			LIST_HEAD(freelist);
3710 
3711 			domain_unmap(si_domain, start_vpfn, last_vpfn, &freelist);
3712 
3713 			rcu_read_lock();
3714 			for_each_active_iommu(iommu, drhd)
3715 				iommu_flush_iotlb_psi(iommu, si_domain,
3716 					start_vpfn, mhp->nr_pages,
3717 					list_empty(&freelist), 0);
3718 			rcu_read_unlock();
3719 			put_pages_list(&freelist);
3720 		}
3721 		break;
3722 	}
3723 
3724 	return NOTIFY_OK;
3725 }
3726 
3727 static struct notifier_block intel_iommu_memory_nb = {
3728 	.notifier_call = intel_iommu_memory_notifier,
3729 	.priority = 0
3730 };
3731 
3732 static void intel_disable_iommus(void)
3733 {
3734 	struct intel_iommu *iommu = NULL;
3735 	struct dmar_drhd_unit *drhd;
3736 
3737 	for_each_iommu(iommu, drhd)
3738 		iommu_disable_translation(iommu);
3739 }
3740 
3741 void intel_iommu_shutdown(void)
3742 {
3743 	struct dmar_drhd_unit *drhd;
3744 	struct intel_iommu *iommu = NULL;
3745 
3746 	if (no_iommu || dmar_disabled)
3747 		return;
3748 
3749 	down_write(&dmar_global_lock);
3750 
3751 	/* Disable PMRs explicitly here. */
3752 	for_each_iommu(iommu, drhd)
3753 		iommu_disable_protect_mem_regions(iommu);
3754 
3755 	/* Make sure the IOMMUs are switched off */
3756 	intel_disable_iommus();
3757 
3758 	up_write(&dmar_global_lock);
3759 }
3760 
3761 static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
3762 {
3763 	struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
3764 
3765 	return container_of(iommu_dev, struct intel_iommu, iommu);
3766 }
3767 
3768 static ssize_t version_show(struct device *dev,
3769 			    struct device_attribute *attr, char *buf)
3770 {
3771 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
3772 	u32 ver = readl(iommu->reg + DMAR_VER_REG);
3773 	return sprintf(buf, "%d:%d\n",
3774 		       DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
3775 }
3776 static DEVICE_ATTR_RO(version);
3777 
3778 static ssize_t address_show(struct device *dev,
3779 			    struct device_attribute *attr, char *buf)
3780 {
3781 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
3782 	return sprintf(buf, "%llx\n", iommu->reg_phys);
3783 }
3784 static DEVICE_ATTR_RO(address);
3785 
3786 static ssize_t cap_show(struct device *dev,
3787 			struct device_attribute *attr, char *buf)
3788 {
3789 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
3790 	return sprintf(buf, "%llx\n", iommu->cap);
3791 }
3792 static DEVICE_ATTR_RO(cap);
3793 
3794 static ssize_t ecap_show(struct device *dev,
3795 			 struct device_attribute *attr, char *buf)
3796 {
3797 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
3798 	return sprintf(buf, "%llx\n", iommu->ecap);
3799 }
3800 static DEVICE_ATTR_RO(ecap);
3801 
3802 static ssize_t domains_supported_show(struct device *dev,
3803 				      struct device_attribute *attr, char *buf)
3804 {
3805 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
3806 	return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
3807 }
3808 static DEVICE_ATTR_RO(domains_supported);
3809 
3810 static ssize_t domains_used_show(struct device *dev,
3811 				 struct device_attribute *attr, char *buf)
3812 {
3813 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
3814 	return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
3815 						  cap_ndoms(iommu->cap)));
3816 }
3817 static DEVICE_ATTR_RO(domains_used);
3818 
3819 static struct attribute *intel_iommu_attrs[] = {
3820 	&dev_attr_version.attr,
3821 	&dev_attr_address.attr,
3822 	&dev_attr_cap.attr,
3823 	&dev_attr_ecap.attr,
3824 	&dev_attr_domains_supported.attr,
3825 	&dev_attr_domains_used.attr,
3826 	NULL,
3827 };
3828 
3829 static struct attribute_group intel_iommu_group = {
3830 	.name = "intel-iommu",
3831 	.attrs = intel_iommu_attrs,
3832 };
3833 
3834 const struct attribute_group *intel_iommu_groups[] = {
3835 	&intel_iommu_group,
3836 	NULL,
3837 };
3838 
3839 static inline bool has_external_pci(void)
3840 {
3841 	struct pci_dev *pdev = NULL;
3842 
3843 	for_each_pci_dev(pdev)
3844 		if (pdev->external_facing)
3845 			return true;
3846 
3847 	return false;
3848 }
3849 
3850 static int __init platform_optin_force_iommu(void)
3851 {
3852 	if (!dmar_platform_optin() || no_platform_optin || !has_external_pci())
3853 		return 0;
3854 
3855 	if (no_iommu || dmar_disabled)
3856 		pr_info("Intel-IOMMU force enabled due to platform opt in\n");
3857 
3858 	/*
3859 	 * If Intel-IOMMU is disabled by default, we will apply identity
3860 	 * map for all devices except those marked as being untrusted.
3861 	 */
3862 	if (dmar_disabled)
3863 		iommu_set_default_passthrough(false);
3864 
3865 	dmar_disabled = 0;
3866 	no_iommu = 0;
3867 
3868 	return 1;
3869 }
3870 
3871 static int __init probe_acpi_namespace_devices(void)
3872 {
3873 	struct dmar_drhd_unit *drhd;
3874 	/* To avoid a -Wunused-but-set-variable warning. */
3875 	struct intel_iommu *iommu __maybe_unused;
3876 	struct device *dev;
3877 	int i, ret = 0;
3878 
3879 	for_each_active_iommu(iommu, drhd) {
3880 		for_each_active_dev_scope(drhd->devices,
3881 					  drhd->devices_cnt, i, dev) {
3882 			struct acpi_device_physical_node *pn;
3883 			struct iommu_group *group;
3884 			struct acpi_device *adev;
3885 
3886 			if (dev->bus != &acpi_bus_type)
3887 				continue;
3888 
3889 			adev = to_acpi_device(dev);
3890 			mutex_lock(&adev->physical_node_lock);
3891 			list_for_each_entry(pn,
3892 					    &adev->physical_node_list, node) {
3893 				group = iommu_group_get(pn->dev);
3894 				if (group) {
3895 					iommu_group_put(group);
3896 					continue;
3897 				}
3898 
3899 				pn->dev->bus->iommu_ops = &intel_iommu_ops;
3900 				ret = iommu_probe_device(pn->dev);
3901 				if (ret)
3902 					break;
3903 			}
3904 			mutex_unlock(&adev->physical_node_lock);
3905 
3906 			if (ret)
3907 				return ret;
3908 		}
3909 	}
3910 
3911 	return 0;
3912 }
3913 
3914 static __init int tboot_force_iommu(void)
3915 {
3916 	if (!tboot_enabled())
3917 		return 0;
3918 
3919 	if (no_iommu || dmar_disabled)
3920 		pr_warn("Forcing Intel-IOMMU to enabled\n");
3921 
3922 	dmar_disabled = 0;
3923 	no_iommu = 0;
3924 
3925 	return 1;
3926 }
3927 
3928 int __init intel_iommu_init(void)
3929 {
3930 	int ret = -ENODEV;
3931 	struct dmar_drhd_unit *drhd;
3932 	struct intel_iommu *iommu;
3933 
3934 	/*
3935 	 * Intel IOMMU is required for a TXT/tboot launch or platform
3936 	 * opt in, so enforce that.
3937 	 */
3938 	force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) ||
3939 		    platform_optin_force_iommu();
3940 
3941 	down_write(&dmar_global_lock);
3942 	if (dmar_table_init()) {
3943 		if (force_on)
3944 			panic("tboot: Failed to initialize DMAR table\n");
3945 		goto out_free_dmar;
3946 	}
3947 
3948 	if (dmar_dev_scope_init() < 0) {
3949 		if (force_on)
3950 			panic("tboot: Failed to initialize DMAR device scope\n");
3951 		goto out_free_dmar;
3952 	}
3953 
3954 	up_write(&dmar_global_lock);
3955 
3956 	/*
3957 	 * The bus notifier takes the dmar_global_lock, so lockdep will
3958 	 * complain later when we register it under the lock.
3959 	 */
3960 	dmar_register_bus_notifier();
3961 
3962 	down_write(&dmar_global_lock);
3963 
3964 	if (!no_iommu)
3965 		intel_iommu_debugfs_init();
3966 
3967 	if (no_iommu || dmar_disabled) {
3968 		/*
3969 		 * We exit the function here to ensure IOMMU's remapping and
3970 		 * mempool aren't setup, which means that the IOMMU's PMRs
3971 		 * won't be disabled via the call to init_dmars(). So disable
3972 		 * it explicitly here. The PMRs were setup by tboot prior to
3973 		 * calling SENTER, but the kernel is expected to reset/tear
3974 		 * down the PMRs.
3975 		 */
3976 		if (intel_iommu_tboot_noforce) {
3977 			for_each_iommu(iommu, drhd)
3978 				iommu_disable_protect_mem_regions(iommu);
3979 		}
3980 
3981 		/*
3982 		 * Make sure the IOMMUs are switched off, even when we
3983 		 * boot into a kexec kernel and the previous kernel left
3984 		 * them enabled
3985 		 */
3986 		intel_disable_iommus();
3987 		goto out_free_dmar;
3988 	}
3989 
3990 	if (list_empty(&dmar_rmrr_units))
3991 		pr_info("No RMRR found\n");
3992 
3993 	if (list_empty(&dmar_atsr_units))
3994 		pr_info("No ATSR found\n");
3995 
3996 	if (list_empty(&dmar_satc_units))
3997 		pr_info("No SATC found\n");
3998 
3999 	init_no_remapping_devices();
4000 
4001 	ret = init_dmars();
4002 	if (ret) {
4003 		if (force_on)
4004 			panic("tboot: Failed to initialize DMARs\n");
4005 		pr_err("Initialization failed\n");
4006 		goto out_free_dmar;
4007 	}
4008 	up_write(&dmar_global_lock);
4009 
4010 	init_iommu_pm_ops();
4011 
4012 	down_read(&dmar_global_lock);
4013 	for_each_active_iommu(iommu, drhd) {
4014 		/*
4015 		 * The flush queue implementation does not perform
4016 		 * page-selective invalidations that are required for efficient
4017 		 * TLB flushes in virtual environments.  The benefit of batching
4018 		 * is likely to be much lower than the overhead of synchronizing
4019 		 * the virtual and physical IOMMU page-tables.
4020 		 */
4021 		if (cap_caching_mode(iommu->cap)) {
4022 			pr_info_once("IOMMU batching disallowed due to virtualization\n");
4023 			iommu_set_dma_strict();
4024 		}
4025 		iommu_device_sysfs_add(&iommu->iommu, NULL,
4026 				       intel_iommu_groups,
4027 				       "%s", iommu->name);
4028 		iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
4029 	}
4030 	up_read(&dmar_global_lock);
4031 
4032 	bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4033 	if (si_domain && !hw_pass_through)
4034 		register_memory_notifier(&intel_iommu_memory_nb);
4035 
4036 	down_read(&dmar_global_lock);
4037 	if (probe_acpi_namespace_devices())
4038 		pr_warn("ACPI name space devices didn't probe correctly\n");
4039 
4040 	/* Finally, we enable the DMA remapping hardware. */
4041 	for_each_iommu(iommu, drhd) {
4042 		if (!drhd->ignored && !translation_pre_enabled(iommu))
4043 			iommu_enable_translation(iommu);
4044 
4045 		iommu_disable_protect_mem_regions(iommu);
4046 	}
4047 	up_read(&dmar_global_lock);
4048 
4049 	pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
4050 
4051 	intel_iommu_enabled = 1;
4052 
4053 	return 0;
4054 
4055 out_free_dmar:
4056 	intel_iommu_free_dmars();
4057 	up_write(&dmar_global_lock);
4058 	return ret;
4059 }
4060 
4061 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4062 {
4063 	struct device_domain_info *info = opaque;
4064 
4065 	domain_context_clear_one(info, PCI_BUS_NUM(alias), alias & 0xff);
4066 	return 0;
4067 }
4068 
4069 /*
4070  * NB - intel-iommu lacks any sort of reference counting for the users of
4071  * dependent devices.  If multiple endpoints have intersecting dependent
4072  * devices, unbinding the driver from any one of them will possibly leave
4073  * the others unable to operate.
4074  */
4075 static void domain_context_clear(struct device_domain_info *info)
4076 {
4077 	if (!info->iommu || !info->dev || !dev_is_pci(info->dev))
4078 		return;
4079 
4080 	pci_for_each_dma_alias(to_pci_dev(info->dev),
4081 			       &domain_context_clear_one_cb, info);
4082 }
4083 
4084 static void dmar_remove_one_dev_info(struct device *dev)
4085 {
4086 	struct device_domain_info *info = dev_iommu_priv_get(dev);
4087 	struct dmar_domain *domain = info->domain;
4088 	struct intel_iommu *iommu = info->iommu;
4089 	unsigned long flags;
4090 
4091 	if (!dev_is_real_dma_subdevice(info->dev)) {
4092 		if (dev_is_pci(info->dev) && sm_supported(iommu))
4093 			intel_pasid_tear_down_entry(iommu, info->dev,
4094 					PASID_RID2PASID, false);
4095 
4096 		iommu_disable_dev_iotlb(info);
4097 		domain_context_clear(info);
4098 		intel_pasid_free_table(info->dev);
4099 	}
4100 
4101 	spin_lock_irqsave(&domain->lock, flags);
4102 	list_del(&info->link);
4103 	spin_unlock_irqrestore(&domain->lock, flags);
4104 
4105 	domain_detach_iommu(domain, iommu);
4106 	info->domain = NULL;
4107 }
4108 
4109 static int md_domain_init(struct dmar_domain *domain, int guest_width)
4110 {
4111 	int adjust_width;
4112 
4113 	/* calculate AGAW */
4114 	domain->gaw = guest_width;
4115 	adjust_width = guestwidth_to_adjustwidth(guest_width);
4116 	domain->agaw = width_to_agaw(adjust_width);
4117 
4118 	domain->iommu_coherency = false;
4119 	domain->iommu_superpage = 0;
4120 	domain->max_addr = 0;
4121 
4122 	/* always allocate the top pgd */
4123 	domain->pgd = alloc_pgtable_page(domain->nid);
4124 	if (!domain->pgd)
4125 		return -ENOMEM;
4126 	domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4127 	return 0;
4128 }
4129 
4130 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
4131 {
4132 	struct dmar_domain *dmar_domain;
4133 	struct iommu_domain *domain;
4134 
4135 	switch (type) {
4136 	case IOMMU_DOMAIN_DMA:
4137 	case IOMMU_DOMAIN_DMA_FQ:
4138 	case IOMMU_DOMAIN_UNMANAGED:
4139 		dmar_domain = alloc_domain(type);
4140 		if (!dmar_domain) {
4141 			pr_err("Can't allocate dmar_domain\n");
4142 			return NULL;
4143 		}
4144 		if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4145 			pr_err("Domain initialization failed\n");
4146 			domain_exit(dmar_domain);
4147 			return NULL;
4148 		}
4149 
4150 		domain = &dmar_domain->domain;
4151 		domain->geometry.aperture_start = 0;
4152 		domain->geometry.aperture_end   =
4153 				__DOMAIN_MAX_ADDR(dmar_domain->gaw);
4154 		domain->geometry.force_aperture = true;
4155 
4156 		return domain;
4157 	case IOMMU_DOMAIN_IDENTITY:
4158 		return &si_domain->domain;
4159 	default:
4160 		return NULL;
4161 	}
4162 
4163 	return NULL;
4164 }
4165 
4166 static void intel_iommu_domain_free(struct iommu_domain *domain)
4167 {
4168 	if (domain != &si_domain->domain)
4169 		domain_exit(to_dmar_domain(domain));
4170 }
4171 
4172 static int prepare_domain_attach_device(struct iommu_domain *domain,
4173 					struct device *dev)
4174 {
4175 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4176 	struct intel_iommu *iommu;
4177 	int addr_width;
4178 
4179 	iommu = device_to_iommu(dev, NULL, NULL);
4180 	if (!iommu)
4181 		return -ENODEV;
4182 
4183 	if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap))
4184 		return -EOPNOTSUPP;
4185 
4186 	/* check if this iommu agaw is sufficient for max mapped address */
4187 	addr_width = agaw_to_width(iommu->agaw);
4188 	if (addr_width > cap_mgaw(iommu->cap))
4189 		addr_width = cap_mgaw(iommu->cap);
4190 
4191 	if (dmar_domain->max_addr > (1LL << addr_width)) {
4192 		dev_err(dev, "%s: iommu width (%d) is not "
4193 		        "sufficient for the mapped address (%llx)\n",
4194 		        __func__, addr_width, dmar_domain->max_addr);
4195 		return -EFAULT;
4196 	}
4197 	dmar_domain->gaw = addr_width;
4198 
4199 	/*
4200 	 * Knock out extra levels of page tables if necessary
4201 	 */
4202 	while (iommu->agaw < dmar_domain->agaw) {
4203 		struct dma_pte *pte;
4204 
4205 		pte = dmar_domain->pgd;
4206 		if (dma_pte_present(pte)) {
4207 			dmar_domain->pgd = phys_to_virt(dma_pte_addr(pte));
4208 			free_pgtable_page(pte);
4209 		}
4210 		dmar_domain->agaw--;
4211 	}
4212 
4213 	return 0;
4214 }
4215 
4216 static int intel_iommu_attach_device(struct iommu_domain *domain,
4217 				     struct device *dev)
4218 {
4219 	int ret;
4220 
4221 	if (domain->type == IOMMU_DOMAIN_UNMANAGED &&
4222 	    device_is_rmrr_locked(dev)) {
4223 		dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement.  Contact your platform vendor.\n");
4224 		return -EPERM;
4225 	}
4226 
4227 	/* normally dev is not mapped */
4228 	if (unlikely(domain_context_mapped(dev))) {
4229 		struct device_domain_info *info = dev_iommu_priv_get(dev);
4230 
4231 		if (info->domain)
4232 			dmar_remove_one_dev_info(dev);
4233 	}
4234 
4235 	ret = prepare_domain_attach_device(domain, dev);
4236 	if (ret)
4237 		return ret;
4238 
4239 	return domain_add_dev_info(to_dmar_domain(domain), dev);
4240 }
4241 
4242 static void intel_iommu_detach_device(struct iommu_domain *domain,
4243 				      struct device *dev)
4244 {
4245 	dmar_remove_one_dev_info(dev);
4246 }
4247 
4248 static int intel_iommu_map(struct iommu_domain *domain,
4249 			   unsigned long iova, phys_addr_t hpa,
4250 			   size_t size, int iommu_prot, gfp_t gfp)
4251 {
4252 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4253 	u64 max_addr;
4254 	int prot = 0;
4255 
4256 	if (iommu_prot & IOMMU_READ)
4257 		prot |= DMA_PTE_READ;
4258 	if (iommu_prot & IOMMU_WRITE)
4259 		prot |= DMA_PTE_WRITE;
4260 	if (dmar_domain->set_pte_snp)
4261 		prot |= DMA_PTE_SNP;
4262 
4263 	max_addr = iova + size;
4264 	if (dmar_domain->max_addr < max_addr) {
4265 		u64 end;
4266 
4267 		/* check if minimum agaw is sufficient for mapped address */
4268 		end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4269 		if (end < max_addr) {
4270 			pr_err("%s: iommu width (%d) is not "
4271 			       "sufficient for the mapped address (%llx)\n",
4272 			       __func__, dmar_domain->gaw, max_addr);
4273 			return -EFAULT;
4274 		}
4275 		dmar_domain->max_addr = max_addr;
4276 	}
4277 	/* Round up size to next multiple of PAGE_SIZE, if it and
4278 	   the low bits of hpa would take us onto the next page */
4279 	size = aligned_nrpages(hpa, size);
4280 	return __domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4281 				hpa >> VTD_PAGE_SHIFT, size, prot);
4282 }
4283 
4284 static int intel_iommu_map_pages(struct iommu_domain *domain,
4285 				 unsigned long iova, phys_addr_t paddr,
4286 				 size_t pgsize, size_t pgcount,
4287 				 int prot, gfp_t gfp, size_t *mapped)
4288 {
4289 	unsigned long pgshift = __ffs(pgsize);
4290 	size_t size = pgcount << pgshift;
4291 	int ret;
4292 
4293 	if (pgsize != SZ_4K && pgsize != SZ_2M && pgsize != SZ_1G)
4294 		return -EINVAL;
4295 
4296 	if (!IS_ALIGNED(iova | paddr, pgsize))
4297 		return -EINVAL;
4298 
4299 	ret = intel_iommu_map(domain, iova, paddr, size, prot, gfp);
4300 	if (!ret && mapped)
4301 		*mapped = size;
4302 
4303 	return ret;
4304 }
4305 
4306 static size_t intel_iommu_unmap(struct iommu_domain *domain,
4307 				unsigned long iova, size_t size,
4308 				struct iommu_iotlb_gather *gather)
4309 {
4310 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4311 	unsigned long start_pfn, last_pfn;
4312 	int level = 0;
4313 
4314 	/* Cope with horrid API which requires us to unmap more than the
4315 	   size argument if it happens to be a large-page mapping. */
4316 	BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
4317 
4318 	if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4319 		size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4320 
4321 	start_pfn = iova >> VTD_PAGE_SHIFT;
4322 	last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4323 
4324 	domain_unmap(dmar_domain, start_pfn, last_pfn, &gather->freelist);
4325 
4326 	if (dmar_domain->max_addr == iova + size)
4327 		dmar_domain->max_addr = iova;
4328 
4329 	iommu_iotlb_gather_add_page(domain, gather, iova, size);
4330 
4331 	return size;
4332 }
4333 
4334 static size_t intel_iommu_unmap_pages(struct iommu_domain *domain,
4335 				      unsigned long iova,
4336 				      size_t pgsize, size_t pgcount,
4337 				      struct iommu_iotlb_gather *gather)
4338 {
4339 	unsigned long pgshift = __ffs(pgsize);
4340 	size_t size = pgcount << pgshift;
4341 
4342 	return intel_iommu_unmap(domain, iova, size, gather);
4343 }
4344 
4345 static void intel_iommu_tlb_sync(struct iommu_domain *domain,
4346 				 struct iommu_iotlb_gather *gather)
4347 {
4348 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4349 	unsigned long iova_pfn = IOVA_PFN(gather->start);
4350 	size_t size = gather->end - gather->start;
4351 	struct iommu_domain_info *info;
4352 	unsigned long start_pfn;
4353 	unsigned long nrpages;
4354 	unsigned long i;
4355 
4356 	nrpages = aligned_nrpages(gather->start, size);
4357 	start_pfn = mm_to_dma_pfn(iova_pfn);
4358 
4359 	xa_for_each(&dmar_domain->iommu_array, i, info)
4360 		iommu_flush_iotlb_psi(info->iommu, dmar_domain,
4361 				      start_pfn, nrpages,
4362 				      list_empty(&gather->freelist), 0);
4363 
4364 	put_pages_list(&gather->freelist);
4365 }
4366 
4367 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4368 					    dma_addr_t iova)
4369 {
4370 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4371 	struct dma_pte *pte;
4372 	int level = 0;
4373 	u64 phys = 0;
4374 
4375 	pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
4376 	if (pte && dma_pte_present(pte))
4377 		phys = dma_pte_addr(pte) +
4378 			(iova & (BIT_MASK(level_to_offset_bits(level) +
4379 						VTD_PAGE_SHIFT) - 1));
4380 
4381 	return phys;
4382 }
4383 
4384 static bool domain_support_force_snooping(struct dmar_domain *domain)
4385 {
4386 	struct device_domain_info *info;
4387 	bool support = true;
4388 
4389 	assert_spin_locked(&domain->lock);
4390 	list_for_each_entry(info, &domain->devices, link) {
4391 		if (!ecap_sc_support(info->iommu->ecap)) {
4392 			support = false;
4393 			break;
4394 		}
4395 	}
4396 
4397 	return support;
4398 }
4399 
4400 static void domain_set_force_snooping(struct dmar_domain *domain)
4401 {
4402 	struct device_domain_info *info;
4403 
4404 	assert_spin_locked(&domain->lock);
4405 	/*
4406 	 * Second level page table supports per-PTE snoop control. The
4407 	 * iommu_map() interface will handle this by setting SNP bit.
4408 	 */
4409 	if (!domain_use_first_level(domain)) {
4410 		domain->set_pte_snp = true;
4411 		return;
4412 	}
4413 
4414 	list_for_each_entry(info, &domain->devices, link)
4415 		intel_pasid_setup_page_snoop_control(info->iommu, info->dev,
4416 						     PASID_RID2PASID);
4417 }
4418 
4419 static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain)
4420 {
4421 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4422 	unsigned long flags;
4423 
4424 	if (dmar_domain->force_snooping)
4425 		return true;
4426 
4427 	spin_lock_irqsave(&dmar_domain->lock, flags);
4428 	if (!domain_support_force_snooping(dmar_domain)) {
4429 		spin_unlock_irqrestore(&dmar_domain->lock, flags);
4430 		return false;
4431 	}
4432 
4433 	domain_set_force_snooping(dmar_domain);
4434 	dmar_domain->force_snooping = true;
4435 	spin_unlock_irqrestore(&dmar_domain->lock, flags);
4436 
4437 	return true;
4438 }
4439 
4440 static bool intel_iommu_capable(enum iommu_cap cap)
4441 {
4442 	if (cap == IOMMU_CAP_CACHE_COHERENCY)
4443 		return true;
4444 	if (cap == IOMMU_CAP_INTR_REMAP)
4445 		return irq_remapping_enabled == 1;
4446 	if (cap == IOMMU_CAP_PRE_BOOT_PROTECTION)
4447 		return dmar_platform_optin();
4448 
4449 	return false;
4450 }
4451 
4452 static struct iommu_device *intel_iommu_probe_device(struct device *dev)
4453 {
4454 	struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL;
4455 	struct device_domain_info *info;
4456 	struct intel_iommu *iommu;
4457 	u8 bus, devfn;
4458 
4459 	iommu = device_to_iommu(dev, &bus, &devfn);
4460 	if (!iommu)
4461 		return ERR_PTR(-ENODEV);
4462 
4463 	info = kzalloc(sizeof(*info), GFP_KERNEL);
4464 	if (!info)
4465 		return ERR_PTR(-ENOMEM);
4466 
4467 	if (dev_is_real_dma_subdevice(dev)) {
4468 		info->bus = pdev->bus->number;
4469 		info->devfn = pdev->devfn;
4470 		info->segment = pci_domain_nr(pdev->bus);
4471 	} else {
4472 		info->bus = bus;
4473 		info->devfn = devfn;
4474 		info->segment = iommu->segment;
4475 	}
4476 
4477 	info->dev = dev;
4478 	info->iommu = iommu;
4479 	if (dev_is_pci(dev)) {
4480 		if (ecap_dev_iotlb_support(iommu->ecap) &&
4481 		    pci_ats_supported(pdev) &&
4482 		    dmar_ats_supported(pdev, iommu))
4483 			info->ats_supported = 1;
4484 
4485 		if (sm_supported(iommu)) {
4486 			if (pasid_supported(iommu)) {
4487 				int features = pci_pasid_features(pdev);
4488 
4489 				if (features >= 0)
4490 					info->pasid_supported = features | 1;
4491 			}
4492 
4493 			if (info->ats_supported && ecap_prs(iommu->ecap) &&
4494 			    pci_pri_supported(pdev))
4495 				info->pri_supported = 1;
4496 		}
4497 	}
4498 
4499 	dev_iommu_priv_set(dev, info);
4500 
4501 	return &iommu->iommu;
4502 }
4503 
4504 static void intel_iommu_release_device(struct device *dev)
4505 {
4506 	struct device_domain_info *info = dev_iommu_priv_get(dev);
4507 
4508 	dmar_remove_one_dev_info(dev);
4509 	dev_iommu_priv_set(dev, NULL);
4510 	kfree(info);
4511 	set_dma_ops(dev, NULL);
4512 }
4513 
4514 static void intel_iommu_probe_finalize(struct device *dev)
4515 {
4516 	set_dma_ops(dev, NULL);
4517 	iommu_setup_dma_ops(dev, 0, U64_MAX);
4518 }
4519 
4520 static void intel_iommu_get_resv_regions(struct device *device,
4521 					 struct list_head *head)
4522 {
4523 	int prot = DMA_PTE_READ | DMA_PTE_WRITE;
4524 	struct iommu_resv_region *reg;
4525 	struct dmar_rmrr_unit *rmrr;
4526 	struct device *i_dev;
4527 	int i;
4528 
4529 	down_read(&dmar_global_lock);
4530 	for_each_rmrr_units(rmrr) {
4531 		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
4532 					  i, i_dev) {
4533 			struct iommu_resv_region *resv;
4534 			enum iommu_resv_type type;
4535 			size_t length;
4536 
4537 			if (i_dev != device &&
4538 			    !is_downstream_to_pci_bridge(device, i_dev))
4539 				continue;
4540 
4541 			length = rmrr->end_address - rmrr->base_address + 1;
4542 
4543 			type = device_rmrr_is_relaxable(device) ?
4544 				IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT;
4545 
4546 			resv = iommu_alloc_resv_region(rmrr->base_address,
4547 						       length, prot, type);
4548 			if (!resv)
4549 				break;
4550 
4551 			list_add_tail(&resv->list, head);
4552 		}
4553 	}
4554 	up_read(&dmar_global_lock);
4555 
4556 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
4557 	if (dev_is_pci(device)) {
4558 		struct pci_dev *pdev = to_pci_dev(device);
4559 
4560 		if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
4561 			reg = iommu_alloc_resv_region(0, 1UL << 24, prot,
4562 						   IOMMU_RESV_DIRECT_RELAXABLE);
4563 			if (reg)
4564 				list_add_tail(&reg->list, head);
4565 		}
4566 	}
4567 #endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
4568 
4569 	reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
4570 				      IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
4571 				      0, IOMMU_RESV_MSI);
4572 	if (!reg)
4573 		return;
4574 	list_add_tail(&reg->list, head);
4575 }
4576 
4577 int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
4578 {
4579 	struct device_domain_info *info = dev_iommu_priv_get(dev);
4580 	struct context_entry *context;
4581 	struct dmar_domain *domain;
4582 	u64 ctx_lo;
4583 	int ret;
4584 
4585 	domain = info->domain;
4586 	if (!domain)
4587 		return -EINVAL;
4588 
4589 	spin_lock(&iommu->lock);
4590 	ret = -EINVAL;
4591 	if (!info->pasid_supported)
4592 		goto out;
4593 
4594 	context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
4595 	if (WARN_ON(!context))
4596 		goto out;
4597 
4598 	ctx_lo = context[0].lo;
4599 
4600 	if (!(ctx_lo & CONTEXT_PASIDE)) {
4601 		ctx_lo |= CONTEXT_PASIDE;
4602 		context[0].lo = ctx_lo;
4603 		wmb();
4604 		iommu->flush.flush_context(iommu,
4605 					   domain_id_iommu(domain, iommu),
4606 					   PCI_DEVID(info->bus, info->devfn),
4607 					   DMA_CCMD_MASK_NOBIT,
4608 					   DMA_CCMD_DEVICE_INVL);
4609 	}
4610 
4611 	/* Enable PASID support in the device, if it wasn't already */
4612 	if (!info->pasid_enabled)
4613 		iommu_enable_dev_iotlb(info);
4614 
4615 	ret = 0;
4616 
4617  out:
4618 	spin_unlock(&iommu->lock);
4619 
4620 	return ret;
4621 }
4622 
4623 static struct iommu_group *intel_iommu_device_group(struct device *dev)
4624 {
4625 	if (dev_is_pci(dev))
4626 		return pci_device_group(dev);
4627 	return generic_device_group(dev);
4628 }
4629 
4630 static int intel_iommu_enable_sva(struct device *dev)
4631 {
4632 	struct device_domain_info *info = dev_iommu_priv_get(dev);
4633 	struct intel_iommu *iommu;
4634 	int ret;
4635 
4636 	if (!info || dmar_disabled)
4637 		return -EINVAL;
4638 
4639 	iommu = info->iommu;
4640 	if (!iommu)
4641 		return -EINVAL;
4642 
4643 	if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE))
4644 		return -ENODEV;
4645 
4646 	if (intel_iommu_enable_pasid(iommu, dev))
4647 		return -ENODEV;
4648 
4649 	if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled)
4650 		return -EINVAL;
4651 
4652 	ret = iopf_queue_add_device(iommu->iopf_queue, dev);
4653 	if (!ret)
4654 		ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
4655 
4656 	return ret;
4657 }
4658 
4659 static int intel_iommu_disable_sva(struct device *dev)
4660 {
4661 	struct device_domain_info *info = dev_iommu_priv_get(dev);
4662 	struct intel_iommu *iommu = info->iommu;
4663 	int ret;
4664 
4665 	ret = iommu_unregister_device_fault_handler(dev);
4666 	if (!ret)
4667 		ret = iopf_queue_remove_device(iommu->iopf_queue, dev);
4668 
4669 	return ret;
4670 }
4671 
4672 static int intel_iommu_enable_iopf(struct device *dev)
4673 {
4674 	struct device_domain_info *info = dev_iommu_priv_get(dev);
4675 
4676 	if (info && info->pri_supported)
4677 		return 0;
4678 
4679 	return -ENODEV;
4680 }
4681 
4682 static int
4683 intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
4684 {
4685 	switch (feat) {
4686 	case IOMMU_DEV_FEAT_IOPF:
4687 		return intel_iommu_enable_iopf(dev);
4688 
4689 	case IOMMU_DEV_FEAT_SVA:
4690 		return intel_iommu_enable_sva(dev);
4691 
4692 	default:
4693 		return -ENODEV;
4694 	}
4695 }
4696 
4697 static int
4698 intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
4699 {
4700 	switch (feat) {
4701 	case IOMMU_DEV_FEAT_IOPF:
4702 		return 0;
4703 
4704 	case IOMMU_DEV_FEAT_SVA:
4705 		return intel_iommu_disable_sva(dev);
4706 
4707 	default:
4708 		return -ENODEV;
4709 	}
4710 }
4711 
4712 static bool intel_iommu_is_attach_deferred(struct device *dev)
4713 {
4714 	struct device_domain_info *info = dev_iommu_priv_get(dev);
4715 
4716 	return translation_pre_enabled(info->iommu) && !info->domain;
4717 }
4718 
4719 /*
4720  * Check that the device does not live on an external facing PCI port that is
4721  * marked as untrusted. Such devices should not be able to apply quirks and
4722  * thus not be able to bypass the IOMMU restrictions.
4723  */
4724 static bool risky_device(struct pci_dev *pdev)
4725 {
4726 	if (pdev->untrusted) {
4727 		pci_info(pdev,
4728 			 "Skipping IOMMU quirk for dev [%04X:%04X] on untrusted PCI link\n",
4729 			 pdev->vendor, pdev->device);
4730 		pci_info(pdev, "Please check with your BIOS/Platform vendor about this\n");
4731 		return true;
4732 	}
4733 	return false;
4734 }
4735 
4736 static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
4737 				       unsigned long iova, size_t size)
4738 {
4739 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4740 	unsigned long pages = aligned_nrpages(iova, size);
4741 	unsigned long pfn = iova >> VTD_PAGE_SHIFT;
4742 	struct iommu_domain_info *info;
4743 	unsigned long i;
4744 
4745 	xa_for_each(&dmar_domain->iommu_array, i, info)
4746 		__mapping_notify_one(info->iommu, dmar_domain, pfn, pages);
4747 }
4748 
4749 const struct iommu_ops intel_iommu_ops = {
4750 	.capable		= intel_iommu_capable,
4751 	.domain_alloc		= intel_iommu_domain_alloc,
4752 	.probe_device		= intel_iommu_probe_device,
4753 	.probe_finalize		= intel_iommu_probe_finalize,
4754 	.release_device		= intel_iommu_release_device,
4755 	.get_resv_regions	= intel_iommu_get_resv_regions,
4756 	.device_group		= intel_iommu_device_group,
4757 	.dev_enable_feat	= intel_iommu_dev_enable_feat,
4758 	.dev_disable_feat	= intel_iommu_dev_disable_feat,
4759 	.is_attach_deferred	= intel_iommu_is_attach_deferred,
4760 	.def_domain_type	= device_def_domain_type,
4761 	.pgsize_bitmap		= SZ_4K,
4762 #ifdef CONFIG_INTEL_IOMMU_SVM
4763 	.sva_bind		= intel_svm_bind,
4764 	.sva_unbind		= intel_svm_unbind,
4765 	.sva_get_pasid		= intel_svm_get_pasid,
4766 	.page_response		= intel_svm_page_response,
4767 #endif
4768 	.default_domain_ops = &(const struct iommu_domain_ops) {
4769 		.attach_dev		= intel_iommu_attach_device,
4770 		.detach_dev		= intel_iommu_detach_device,
4771 		.map_pages		= intel_iommu_map_pages,
4772 		.unmap_pages		= intel_iommu_unmap_pages,
4773 		.iotlb_sync_map		= intel_iommu_iotlb_sync_map,
4774 		.flush_iotlb_all        = intel_flush_iotlb_all,
4775 		.iotlb_sync		= intel_iommu_tlb_sync,
4776 		.iova_to_phys		= intel_iommu_iova_to_phys,
4777 		.free			= intel_iommu_domain_free,
4778 		.enforce_cache_coherency = intel_iommu_enforce_cache_coherency,
4779 	}
4780 };
4781 
4782 static void quirk_iommu_igfx(struct pci_dev *dev)
4783 {
4784 	if (risky_device(dev))
4785 		return;
4786 
4787 	pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
4788 	dmar_map_gfx = 0;
4789 }
4790 
4791 /* G4x/GM45 integrated gfx dmar support is totally busted. */
4792 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
4793 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
4794 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx);
4795 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx);
4796 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
4797 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
4798 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
4799 
4800 /* Broadwell igfx malfunctions with dmar */
4801 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
4802 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
4803 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx);
4804 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx);
4805 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx);
4806 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx);
4807 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx);
4808 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx);
4809 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx);
4810 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx);
4811 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx);
4812 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx);
4813 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx);
4814 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx);
4815 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx);
4816 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx);
4817 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx);
4818 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx);
4819 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx);
4820 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx);
4821 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx);
4822 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
4823 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
4824 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
4825 
4826 static void quirk_iommu_rwbf(struct pci_dev *dev)
4827 {
4828 	if (risky_device(dev))
4829 		return;
4830 
4831 	/*
4832 	 * Mobile 4 Series Chipset neglects to set RWBF capability,
4833 	 * but needs it. Same seems to hold for the desktop versions.
4834 	 */
4835 	pci_info(dev, "Forcing write-buffer flush capability\n");
4836 	rwbf_quirk = 1;
4837 }
4838 
4839 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4840 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4841 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4842 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4843 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4844 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4845 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
4846 
4847 #define GGC 0x52
4848 #define GGC_MEMORY_SIZE_MASK	(0xf << 8)
4849 #define GGC_MEMORY_SIZE_NONE	(0x0 << 8)
4850 #define GGC_MEMORY_SIZE_1M	(0x1 << 8)
4851 #define GGC_MEMORY_SIZE_2M	(0x3 << 8)
4852 #define GGC_MEMORY_VT_ENABLED	(0x8 << 8)
4853 #define GGC_MEMORY_SIZE_2M_VT	(0x9 << 8)
4854 #define GGC_MEMORY_SIZE_3M_VT	(0xa << 8)
4855 #define GGC_MEMORY_SIZE_4M_VT	(0xb << 8)
4856 
4857 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4858 {
4859 	unsigned short ggc;
4860 
4861 	if (risky_device(dev))
4862 		return;
4863 
4864 	if (pci_read_config_word(dev, GGC, &ggc))
4865 		return;
4866 
4867 	if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4868 		pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4869 		dmar_map_gfx = 0;
4870 	} else if (dmar_map_gfx) {
4871 		/* we have to ensure the gfx device is idle before we flush */
4872 		pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
4873 		iommu_set_dma_strict();
4874 	}
4875 }
4876 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4877 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4878 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4879 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4880 
4881 static void quirk_igfx_skip_te_disable(struct pci_dev *dev)
4882 {
4883 	unsigned short ver;
4884 
4885 	if (!IS_GFX_DEVICE(dev))
4886 		return;
4887 
4888 	ver = (dev->device >> 8) & 0xff;
4889 	if (ver != 0x45 && ver != 0x46 && ver != 0x4c &&
4890 	    ver != 0x4e && ver != 0x8a && ver != 0x98 &&
4891 	    ver != 0x9a && ver != 0xa7)
4892 		return;
4893 
4894 	if (risky_device(dev))
4895 		return;
4896 
4897 	pci_info(dev, "Skip IOMMU disabling for graphics\n");
4898 	iommu_skip_te_disable = 1;
4899 }
4900 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_igfx_skip_te_disable);
4901 
4902 /* On Tylersburg chipsets, some BIOSes have been known to enable the
4903    ISOCH DMAR unit for the Azalia sound device, but not give it any
4904    TLB entries, which causes it to deadlock. Check for that.  We do
4905    this in a function called from init_dmars(), instead of in a PCI
4906    quirk, because we don't want to print the obnoxious "BIOS broken"
4907    message if VT-d is actually disabled.
4908 */
4909 static void __init check_tylersburg_isoch(void)
4910 {
4911 	struct pci_dev *pdev;
4912 	uint32_t vtisochctrl;
4913 
4914 	/* If there's no Azalia in the system anyway, forget it. */
4915 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4916 	if (!pdev)
4917 		return;
4918 
4919 	if (risky_device(pdev)) {
4920 		pci_dev_put(pdev);
4921 		return;
4922 	}
4923 
4924 	pci_dev_put(pdev);
4925 
4926 	/* System Management Registers. Might be hidden, in which case
4927 	   we can't do the sanity check. But that's OK, because the
4928 	   known-broken BIOSes _don't_ actually hide it, so far. */
4929 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4930 	if (!pdev)
4931 		return;
4932 
4933 	if (risky_device(pdev)) {
4934 		pci_dev_put(pdev);
4935 		return;
4936 	}
4937 
4938 	if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4939 		pci_dev_put(pdev);
4940 		return;
4941 	}
4942 
4943 	pci_dev_put(pdev);
4944 
4945 	/* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4946 	if (vtisochctrl & 1)
4947 		return;
4948 
4949 	/* Drop all bits other than the number of TLB entries */
4950 	vtisochctrl &= 0x1c;
4951 
4952 	/* If we have the recommended number of TLB entries (16), fine. */
4953 	if (vtisochctrl == 0x10)
4954 		return;
4955 
4956 	/* Zero TLB entries? You get to ride the short bus to school. */
4957 	if (!vtisochctrl) {
4958 		WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4959 		     "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4960 		     dmi_get_system_info(DMI_BIOS_VENDOR),
4961 		     dmi_get_system_info(DMI_BIOS_VERSION),
4962 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
4963 		iommu_identity_mapping |= IDENTMAP_AZALIA;
4964 		return;
4965 	}
4966 
4967 	pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4968 	       vtisochctrl);
4969 }
4970