xref: /linux/drivers/iommu/intel/iommu.c (revision 208eed95fc710827b100266c9450ae84d46727bd)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright © 2006-2014 Intel Corporation.
4  *
5  * Authors: David Woodhouse <dwmw2@infradead.org>,
6  *          Ashok Raj <ashok.raj@intel.com>,
7  *          Shaohua Li <shaohua.li@intel.com>,
8  *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
9  *          Fenghua Yu <fenghua.yu@intel.com>
10  *          Joerg Roedel <jroedel@suse.de>
11  */
12 
13 #define pr_fmt(fmt)     "DMAR: " fmt
14 #define dev_fmt(fmt)    pr_fmt(fmt)
15 
16 #include <linux/crash_dump.h>
17 #include <linux/dma-direct.h>
18 #include <linux/dmi.h>
19 #include <linux/memory.h>
20 #include <linux/pci.h>
21 #include <linux/pci-ats.h>
22 #include <linux/spinlock.h>
23 #include <linux/syscore_ops.h>
24 #include <linux/tboot.h>
25 #include <uapi/linux/iommufd.h>
26 
27 #include "iommu.h"
28 #include "../dma-iommu.h"
29 #include "../irq_remapping.h"
30 #include "../iommu-pages.h"
31 #include "pasid.h"
32 #include "perfmon.h"
33 
34 #define ROOT_SIZE		VTD_PAGE_SIZE
35 #define CONTEXT_SIZE		VTD_PAGE_SIZE
36 
37 #define IS_GFX_DEVICE(pdev) pci_is_display(pdev)
38 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
39 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
40 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
41 
42 #define IOAPIC_RANGE_START	(0xfee00000)
43 #define IOAPIC_RANGE_END	(0xfeefffff)
44 #define IOVA_START_ADDR		(0x1000)
45 
46 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
47 
48 static void __init check_tylersburg_isoch(void);
49 static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain,
50 					  bool enable);
51 static int rwbf_quirk;
52 
53 #define rwbf_required(iommu)	(rwbf_quirk || cap_rwbf((iommu)->cap))
54 
55 /*
56  * set to 1 to panic kernel if can't successfully enable VT-d
57  * (used when kernel is launched w/ TXT)
58  */
59 static int force_on = 0;
60 static int intel_iommu_tboot_noforce;
61 static int no_platform_optin;
62 
63 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
64 
65 /*
66  * Take a root_entry and return the Lower Context Table Pointer (LCTP)
67  * if marked present.
68  */
root_entry_lctp(struct root_entry * re)69 static phys_addr_t root_entry_lctp(struct root_entry *re)
70 {
71 	if (!(re->lo & 1))
72 		return 0;
73 
74 	return re->lo & VTD_PAGE_MASK;
75 }
76 
77 /*
78  * Take a root_entry and return the Upper Context Table Pointer (UCTP)
79  * if marked present.
80  */
root_entry_uctp(struct root_entry * re)81 static phys_addr_t root_entry_uctp(struct root_entry *re)
82 {
83 	if (!(re->hi & 1))
84 		return 0;
85 
86 	return re->hi & VTD_PAGE_MASK;
87 }
88 
device_rid_cmp_key(const void * key,const struct rb_node * node)89 static int device_rid_cmp_key(const void *key, const struct rb_node *node)
90 {
91 	struct device_domain_info *info =
92 		rb_entry(node, struct device_domain_info, node);
93 	const u16 *rid_lhs = key;
94 
95 	if (*rid_lhs < PCI_DEVID(info->bus, info->devfn))
96 		return -1;
97 
98 	if (*rid_lhs > PCI_DEVID(info->bus, info->devfn))
99 		return 1;
100 
101 	return 0;
102 }
103 
device_rid_cmp(struct rb_node * lhs,const struct rb_node * rhs)104 static int device_rid_cmp(struct rb_node *lhs, const struct rb_node *rhs)
105 {
106 	struct device_domain_info *info =
107 		rb_entry(lhs, struct device_domain_info, node);
108 	u16 key = PCI_DEVID(info->bus, info->devfn);
109 
110 	return device_rid_cmp_key(&key, rhs);
111 }
112 
113 /*
114  * Looks up an IOMMU-probed device using its source ID.
115  *
116  * Returns the pointer to the device if there is a match. Otherwise,
117  * returns NULL.
118  *
119  * Note that this helper doesn't guarantee that the device won't be
120  * released by the iommu subsystem after being returned. The caller
121  * should use its own synchronization mechanism to avoid the device
122  * being released during its use if its possibly the case.
123  */
device_rbtree_find(struct intel_iommu * iommu,u16 rid)124 struct device *device_rbtree_find(struct intel_iommu *iommu, u16 rid)
125 {
126 	struct device_domain_info *info = NULL;
127 	struct rb_node *node;
128 	unsigned long flags;
129 
130 	spin_lock_irqsave(&iommu->device_rbtree_lock, flags);
131 	node = rb_find(&rid, &iommu->device_rbtree, device_rid_cmp_key);
132 	if (node)
133 		info = rb_entry(node, struct device_domain_info, node);
134 	spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
135 
136 	return info ? info->dev : NULL;
137 }
138 
device_rbtree_insert(struct intel_iommu * iommu,struct device_domain_info * info)139 static int device_rbtree_insert(struct intel_iommu *iommu,
140 				struct device_domain_info *info)
141 {
142 	struct rb_node *curr;
143 	unsigned long flags;
144 
145 	spin_lock_irqsave(&iommu->device_rbtree_lock, flags);
146 	curr = rb_find_add(&info->node, &iommu->device_rbtree, device_rid_cmp);
147 	spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
148 	if (WARN_ON(curr))
149 		return -EEXIST;
150 
151 	return 0;
152 }
153 
device_rbtree_remove(struct device_domain_info * info)154 static void device_rbtree_remove(struct device_domain_info *info)
155 {
156 	struct intel_iommu *iommu = info->iommu;
157 	unsigned long flags;
158 
159 	spin_lock_irqsave(&iommu->device_rbtree_lock, flags);
160 	rb_erase(&info->node, &iommu->device_rbtree);
161 	spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
162 }
163 
164 struct dmar_rmrr_unit {
165 	struct list_head list;		/* list of rmrr units	*/
166 	struct acpi_dmar_header *hdr;	/* ACPI header		*/
167 	u64	base_address;		/* reserved base address*/
168 	u64	end_address;		/* reserved end address */
169 	struct dmar_dev_scope *devices;	/* target devices */
170 	int	devices_cnt;		/* target device count */
171 };
172 
173 struct dmar_atsr_unit {
174 	struct list_head list;		/* list of ATSR units */
175 	struct acpi_dmar_header *hdr;	/* ACPI header */
176 	struct dmar_dev_scope *devices;	/* target devices */
177 	int devices_cnt;		/* target device count */
178 	u8 include_all:1;		/* include all ports */
179 };
180 
181 struct dmar_satc_unit {
182 	struct list_head list;		/* list of SATC units */
183 	struct acpi_dmar_header *hdr;	/* ACPI header */
184 	struct dmar_dev_scope *devices;	/* target devices */
185 	struct intel_iommu *iommu;	/* the corresponding iommu */
186 	int devices_cnt;		/* target device count */
187 	u8 atc_required:1;		/* ATS is required */
188 };
189 
190 static LIST_HEAD(dmar_atsr_units);
191 static LIST_HEAD(dmar_rmrr_units);
192 static LIST_HEAD(dmar_satc_units);
193 
194 #define for_each_rmrr_units(rmrr) \
195 	list_for_each_entry(rmrr, &dmar_rmrr_units, list)
196 
197 static void intel_iommu_domain_free(struct iommu_domain *domain);
198 
199 int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON);
200 int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON);
201 
202 int intel_iommu_enabled = 0;
203 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
204 
205 static int intel_iommu_superpage = 1;
206 static int iommu_identity_mapping;
207 static int iommu_skip_te_disable;
208 static int disable_igfx_iommu;
209 
210 #define IDENTMAP_AZALIA		4
211 
212 const struct iommu_ops intel_iommu_ops;
213 
translation_pre_enabled(struct intel_iommu * iommu)214 static bool translation_pre_enabled(struct intel_iommu *iommu)
215 {
216 	return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
217 }
218 
clear_translation_pre_enabled(struct intel_iommu * iommu)219 static void clear_translation_pre_enabled(struct intel_iommu *iommu)
220 {
221 	iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
222 }
223 
init_translation_status(struct intel_iommu * iommu)224 static void init_translation_status(struct intel_iommu *iommu)
225 {
226 	u32 gsts;
227 
228 	gsts = readl(iommu->reg + DMAR_GSTS_REG);
229 	if (gsts & DMA_GSTS_TES)
230 		iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
231 }
232 
intel_iommu_setup(char * str)233 static int __init intel_iommu_setup(char *str)
234 {
235 	if (!str)
236 		return -EINVAL;
237 
238 	while (*str) {
239 		if (!strncmp(str, "on", 2)) {
240 			dmar_disabled = 0;
241 			pr_info("IOMMU enabled\n");
242 		} else if (!strncmp(str, "off", 3)) {
243 			dmar_disabled = 1;
244 			no_platform_optin = 1;
245 			pr_info("IOMMU disabled\n");
246 		} else if (!strncmp(str, "igfx_off", 8)) {
247 			disable_igfx_iommu = 1;
248 			pr_info("Disable GFX device mapping\n");
249 		} else if (!strncmp(str, "forcedac", 8)) {
250 			pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n");
251 			iommu_dma_forcedac = true;
252 		} else if (!strncmp(str, "strict", 6)) {
253 			pr_warn("intel_iommu=strict deprecated; use iommu.strict=1 instead\n");
254 			iommu_set_dma_strict();
255 		} else if (!strncmp(str, "sp_off", 6)) {
256 			pr_info("Disable supported super page\n");
257 			intel_iommu_superpage = 0;
258 		} else if (!strncmp(str, "sm_on", 5)) {
259 			pr_info("Enable scalable mode if hardware supports\n");
260 			intel_iommu_sm = 1;
261 		} else if (!strncmp(str, "sm_off", 6)) {
262 			pr_info("Scalable mode is disallowed\n");
263 			intel_iommu_sm = 0;
264 		} else if (!strncmp(str, "tboot_noforce", 13)) {
265 			pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
266 			intel_iommu_tboot_noforce = 1;
267 		} else {
268 			pr_notice("Unknown option - '%s'\n", str);
269 		}
270 
271 		str += strcspn(str, ",");
272 		while (*str == ',')
273 			str++;
274 	}
275 
276 	return 1;
277 }
278 __setup("intel_iommu=", intel_iommu_setup);
279 
280 /*
281  * Calculate the Supported Adjusted Guest Address Widths of an IOMMU.
282  * Refer to 11.4.2 of the VT-d spec for the encoding of each bit of
283  * the returned SAGAW.
284  */
__iommu_calculate_sagaw(struct intel_iommu * iommu)285 static unsigned long __iommu_calculate_sagaw(struct intel_iommu *iommu)
286 {
287 	unsigned long fl_sagaw, sl_sagaw;
288 
289 	fl_sagaw = BIT(2) | (cap_fl5lp_support(iommu->cap) ? BIT(3) : 0);
290 	sl_sagaw = cap_sagaw(iommu->cap);
291 
292 	/* Second level only. */
293 	if (!sm_supported(iommu) || !ecap_flts(iommu->ecap))
294 		return sl_sagaw;
295 
296 	/* First level only. */
297 	if (!ecap_slts(iommu->ecap))
298 		return fl_sagaw;
299 
300 	return fl_sagaw & sl_sagaw;
301 }
302 
__iommu_calculate_agaw(struct intel_iommu * iommu,int max_gaw)303 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
304 {
305 	unsigned long sagaw;
306 	int agaw;
307 
308 	sagaw = __iommu_calculate_sagaw(iommu);
309 	for (agaw = width_to_agaw(max_gaw); agaw >= 0; agaw--) {
310 		if (test_bit(agaw, &sagaw))
311 			break;
312 	}
313 
314 	return agaw;
315 }
316 
317 /*
318  * Calculate max SAGAW for each iommu.
319  */
iommu_calculate_max_sagaw(struct intel_iommu * iommu)320 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
321 {
322 	return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
323 }
324 
325 /*
326  * calculate agaw for each iommu.
327  * "SAGAW" may be different across iommus, use a default agaw, and
328  * get a supported less agaw for iommus that don't support the default agaw.
329  */
iommu_calculate_agaw(struct intel_iommu * iommu)330 int iommu_calculate_agaw(struct intel_iommu *iommu)
331 {
332 	return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
333 }
334 
iommu_paging_structure_coherency(struct intel_iommu * iommu)335 static bool iommu_paging_structure_coherency(struct intel_iommu *iommu)
336 {
337 	return sm_supported(iommu) ?
338 			ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap);
339 }
340 
iommu_context_addr(struct intel_iommu * iommu,u8 bus,u8 devfn,int alloc)341 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
342 					 u8 devfn, int alloc)
343 {
344 	struct root_entry *root = &iommu->root_entry[bus];
345 	struct context_entry *context;
346 	u64 *entry;
347 
348 	/*
349 	 * Except that the caller requested to allocate a new entry,
350 	 * returning a copied context entry makes no sense.
351 	 */
352 	if (!alloc && context_copied(iommu, bus, devfn))
353 		return NULL;
354 
355 	entry = &root->lo;
356 	if (sm_supported(iommu)) {
357 		if (devfn >= 0x80) {
358 			devfn -= 0x80;
359 			entry = &root->hi;
360 		}
361 		devfn *= 2;
362 	}
363 	if (*entry & 1)
364 		context = phys_to_virt(*entry & VTD_PAGE_MASK);
365 	else {
366 		unsigned long phy_addr;
367 		if (!alloc)
368 			return NULL;
369 
370 		context = iommu_alloc_pages_node_sz(iommu->node, GFP_ATOMIC,
371 						    SZ_4K);
372 		if (!context)
373 			return NULL;
374 
375 		__iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
376 		phy_addr = virt_to_phys((void *)context);
377 		*entry = phy_addr | 1;
378 		__iommu_flush_cache(iommu, entry, sizeof(*entry));
379 	}
380 	return &context[devfn];
381 }
382 
383 /**
384  * is_downstream_to_pci_bridge - test if a device belongs to the PCI
385  *				 sub-hierarchy of a candidate PCI-PCI bridge
386  * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
387  * @bridge: the candidate PCI-PCI bridge
388  *
389  * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
390  */
391 static bool
is_downstream_to_pci_bridge(struct device * dev,struct device * bridge)392 is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
393 {
394 	struct pci_dev *pdev, *pbridge;
395 
396 	if (!dev_is_pci(dev) || !dev_is_pci(bridge))
397 		return false;
398 
399 	pdev = to_pci_dev(dev);
400 	pbridge = to_pci_dev(bridge);
401 
402 	if (pbridge->subordinate &&
403 	    pbridge->subordinate->number <= pdev->bus->number &&
404 	    pbridge->subordinate->busn_res.end >= pdev->bus->number)
405 		return true;
406 
407 	return false;
408 }
409 
quirk_ioat_snb_local_iommu(struct pci_dev * pdev)410 static bool quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
411 {
412 	struct dmar_drhd_unit *drhd;
413 	u32 vtbar;
414 	int rc;
415 
416 	/* We know that this device on this chipset has its own IOMMU.
417 	 * If we find it under a different IOMMU, then the BIOS is lying
418 	 * to us. Hope that the IOMMU for this device is actually
419 	 * disabled, and it needs no translation...
420 	 */
421 	rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
422 	if (rc) {
423 		/* "can't" happen */
424 		dev_info(&pdev->dev, "failed to run vt-d quirk\n");
425 		return false;
426 	}
427 	vtbar &= 0xffff0000;
428 
429 	/* we know that the this iommu should be at offset 0xa000 from vtbar */
430 	drhd = dmar_find_matched_drhd_unit(pdev);
431 	if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
432 		pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
433 		add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
434 		return true;
435 	}
436 
437 	return false;
438 }
439 
iommu_is_dummy(struct intel_iommu * iommu,struct device * dev)440 static bool iommu_is_dummy(struct intel_iommu *iommu, struct device *dev)
441 {
442 	if (!iommu || iommu->drhd->ignored)
443 		return true;
444 
445 	if (dev_is_pci(dev)) {
446 		struct pci_dev *pdev = to_pci_dev(dev);
447 
448 		if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
449 		    pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SNB &&
450 		    quirk_ioat_snb_local_iommu(pdev))
451 			return true;
452 	}
453 
454 	return false;
455 }
456 
device_lookup_iommu(struct device * dev,u8 * bus,u8 * devfn)457 static struct intel_iommu *device_lookup_iommu(struct device *dev, u8 *bus, u8 *devfn)
458 {
459 	struct dmar_drhd_unit *drhd = NULL;
460 	struct pci_dev *pdev = NULL;
461 	struct intel_iommu *iommu;
462 	struct device *tmp;
463 	u16 segment = 0;
464 	int i;
465 
466 	if (!dev)
467 		return NULL;
468 
469 	if (dev_is_pci(dev)) {
470 		struct pci_dev *pf_pdev;
471 
472 		pdev = pci_real_dma_dev(to_pci_dev(dev));
473 
474 		/* VFs aren't listed in scope tables; we need to look up
475 		 * the PF instead to find the IOMMU. */
476 		pf_pdev = pci_physfn(pdev);
477 		dev = &pf_pdev->dev;
478 		segment = pci_domain_nr(pdev->bus);
479 	} else if (has_acpi_companion(dev))
480 		dev = &ACPI_COMPANION(dev)->dev;
481 
482 	rcu_read_lock();
483 	for_each_iommu(iommu, drhd) {
484 		if (pdev && segment != drhd->segment)
485 			continue;
486 
487 		for_each_active_dev_scope(drhd->devices,
488 					  drhd->devices_cnt, i, tmp) {
489 			if (tmp == dev) {
490 				/* For a VF use its original BDF# not that of the PF
491 				 * which we used for the IOMMU lookup. Strictly speaking
492 				 * we could do this for all PCI devices; we only need to
493 				 * get the BDF# from the scope table for ACPI matches. */
494 				if (pdev && pdev->is_virtfn)
495 					goto got_pdev;
496 
497 				if (bus && devfn) {
498 					*bus = drhd->devices[i].bus;
499 					*devfn = drhd->devices[i].devfn;
500 				}
501 				goto out;
502 			}
503 
504 			if (is_downstream_to_pci_bridge(dev, tmp))
505 				goto got_pdev;
506 		}
507 
508 		if (pdev && drhd->include_all) {
509 got_pdev:
510 			if (bus && devfn) {
511 				*bus = pdev->bus->number;
512 				*devfn = pdev->devfn;
513 			}
514 			goto out;
515 		}
516 	}
517 	iommu = NULL;
518 out:
519 	if (iommu_is_dummy(iommu, dev))
520 		iommu = NULL;
521 
522 	rcu_read_unlock();
523 
524 	return iommu;
525 }
526 
free_context_table(struct intel_iommu * iommu)527 static void free_context_table(struct intel_iommu *iommu)
528 {
529 	struct context_entry *context;
530 	int i;
531 
532 	if (!iommu->root_entry)
533 		return;
534 
535 	for (i = 0; i < ROOT_ENTRY_NR; i++) {
536 		context = iommu_context_addr(iommu, i, 0, 0);
537 		if (context)
538 			iommu_free_pages(context);
539 
540 		if (!sm_supported(iommu))
541 			continue;
542 
543 		context = iommu_context_addr(iommu, i, 0x80, 0);
544 		if (context)
545 			iommu_free_pages(context);
546 	}
547 
548 	iommu_free_pages(iommu->root_entry);
549 	iommu->root_entry = NULL;
550 }
551 
552 #ifdef CONFIG_DMAR_DEBUG
pgtable_walk(struct intel_iommu * iommu,unsigned long pfn,u8 bus,u8 devfn,struct dma_pte * parent,int level)553 static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn,
554 			 u8 bus, u8 devfn, struct dma_pte *parent, int level)
555 {
556 	struct dma_pte *pte;
557 	int offset;
558 
559 	while (1) {
560 		offset = pfn_level_offset(pfn, level);
561 		pte = &parent[offset];
562 
563 		pr_info("pte level: %d, pte value: 0x%016llx\n", level, pte->val);
564 
565 		if (!dma_pte_present(pte)) {
566 			pr_info("page table not present at level %d\n", level - 1);
567 			break;
568 		}
569 
570 		if (level == 1 || dma_pte_superpage(pte))
571 			break;
572 
573 		parent = phys_to_virt(dma_pte_addr(pte));
574 		level--;
575 	}
576 }
577 
dmar_fault_dump_ptes(struct intel_iommu * iommu,u16 source_id,unsigned long long addr,u32 pasid)578 void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
579 			  unsigned long long addr, u32 pasid)
580 {
581 	struct pasid_dir_entry *dir, *pde;
582 	struct pasid_entry *entries, *pte;
583 	struct context_entry *ctx_entry;
584 	struct root_entry *rt_entry;
585 	int i, dir_index, index, level;
586 	u8 devfn = source_id & 0xff;
587 	u8 bus = source_id >> 8;
588 	struct dma_pte *pgtable;
589 
590 	pr_info("Dump %s table entries for IOVA 0x%llx\n", iommu->name, addr);
591 
592 	/* root entry dump */
593 	if (!iommu->root_entry) {
594 		pr_info("root table is not present\n");
595 		return;
596 	}
597 	rt_entry = &iommu->root_entry[bus];
598 
599 	if (sm_supported(iommu))
600 		pr_info("scalable mode root entry: hi 0x%016llx, low 0x%016llx\n",
601 			rt_entry->hi, rt_entry->lo);
602 	else
603 		pr_info("root entry: 0x%016llx", rt_entry->lo);
604 
605 	/* context entry dump */
606 	ctx_entry = iommu_context_addr(iommu, bus, devfn, 0);
607 	if (!ctx_entry) {
608 		pr_info("context table is not present\n");
609 		return;
610 	}
611 
612 	pr_info("context entry: hi 0x%016llx, low 0x%016llx\n",
613 		ctx_entry->hi, ctx_entry->lo);
614 
615 	/* legacy mode does not require PASID entries */
616 	if (!sm_supported(iommu)) {
617 		if (!context_present(ctx_entry)) {
618 			pr_info("legacy mode page table is not present\n");
619 			return;
620 		}
621 		level = agaw_to_level(ctx_entry->hi & 7);
622 		pgtable = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
623 		goto pgtable_walk;
624 	}
625 
626 	if (!context_present(ctx_entry)) {
627 		pr_info("pasid directory table is not present\n");
628 		return;
629 	}
630 
631 	/* get the pointer to pasid directory entry */
632 	dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
633 
634 	/* For request-without-pasid, get the pasid from context entry */
635 	if (intel_iommu_sm && pasid == IOMMU_PASID_INVALID)
636 		pasid = IOMMU_NO_PASID;
637 
638 	dir_index = pasid >> PASID_PDE_SHIFT;
639 	pde = &dir[dir_index];
640 	pr_info("pasid dir entry: 0x%016llx\n", pde->val);
641 
642 	/* get the pointer to the pasid table entry */
643 	entries = get_pasid_table_from_pde(pde);
644 	if (!entries) {
645 		pr_info("pasid table is not present\n");
646 		return;
647 	}
648 	index = pasid & PASID_PTE_MASK;
649 	pte = &entries[index];
650 	for (i = 0; i < ARRAY_SIZE(pte->val); i++)
651 		pr_info("pasid table entry[%d]: 0x%016llx\n", i, pte->val[i]);
652 
653 	if (!pasid_pte_is_present(pte)) {
654 		pr_info("scalable mode page table is not present\n");
655 		return;
656 	}
657 
658 	if (pasid_pte_get_pgtt(pte) == PASID_ENTRY_PGTT_FL_ONLY) {
659 		level = pte->val[2] & BIT_ULL(2) ? 5 : 4;
660 		pgtable = phys_to_virt(pte->val[2] & VTD_PAGE_MASK);
661 	} else {
662 		level = agaw_to_level((pte->val[0] >> 2) & 0x7);
663 		pgtable = phys_to_virt(pte->val[0] & VTD_PAGE_MASK);
664 	}
665 
666 pgtable_walk:
667 	pgtable_walk(iommu, addr >> VTD_PAGE_SHIFT, bus, devfn, pgtable, level);
668 }
669 #endif
670 
671 /* iommu handling */
iommu_alloc_root_entry(struct intel_iommu * iommu)672 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
673 {
674 	struct root_entry *root;
675 
676 	root = iommu_alloc_pages_node_sz(iommu->node, GFP_ATOMIC, SZ_4K);
677 	if (!root) {
678 		pr_err("Allocating root entry for %s failed\n",
679 			iommu->name);
680 		return -ENOMEM;
681 	}
682 
683 	__iommu_flush_cache(iommu, root, ROOT_SIZE);
684 	iommu->root_entry = root;
685 
686 	return 0;
687 }
688 
iommu_set_root_entry(struct intel_iommu * iommu)689 static void iommu_set_root_entry(struct intel_iommu *iommu)
690 {
691 	u64 addr;
692 	u32 sts;
693 	unsigned long flag;
694 
695 	addr = virt_to_phys(iommu->root_entry);
696 	if (sm_supported(iommu))
697 		addr |= DMA_RTADDR_SMT;
698 
699 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
700 	dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
701 
702 	writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
703 
704 	/* Make sure hardware complete it */
705 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
706 		      readl, (sts & DMA_GSTS_RTPS), sts);
707 
708 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
709 
710 	/*
711 	 * Hardware invalidates all DMA remapping hardware translation
712 	 * caches as part of SRTP flow.
713 	 */
714 	if (cap_esrtps(iommu->cap))
715 		return;
716 
717 	iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
718 	if (sm_supported(iommu))
719 		qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
720 	iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
721 }
722 
iommu_flush_write_buffer(struct intel_iommu * iommu)723 void iommu_flush_write_buffer(struct intel_iommu *iommu)
724 {
725 	u32 val;
726 	unsigned long flag;
727 
728 	if (!rwbf_quirk && !cap_rwbf(iommu->cap))
729 		return;
730 
731 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
732 	writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
733 
734 	/* Make sure hardware complete it */
735 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
736 		      readl, (!(val & DMA_GSTS_WBFS)), val);
737 
738 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
739 }
740 
741 /* return value determine if we need a write buffer flush */
__iommu_flush_context(struct intel_iommu * iommu,u16 did,u16 source_id,u8 function_mask,u64 type)742 static void __iommu_flush_context(struct intel_iommu *iommu,
743 				  u16 did, u16 source_id, u8 function_mask,
744 				  u64 type)
745 {
746 	u64 val = 0;
747 	unsigned long flag;
748 
749 	switch (type) {
750 	case DMA_CCMD_GLOBAL_INVL:
751 		val = DMA_CCMD_GLOBAL_INVL;
752 		break;
753 	case DMA_CCMD_DOMAIN_INVL:
754 		val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
755 		break;
756 	case DMA_CCMD_DEVICE_INVL:
757 		val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
758 			| DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
759 		break;
760 	default:
761 		pr_warn("%s: Unexpected context-cache invalidation type 0x%llx\n",
762 			iommu->name, type);
763 		return;
764 	}
765 	val |= DMA_CCMD_ICC;
766 
767 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
768 	dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
769 
770 	/* Make sure hardware complete it */
771 	IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
772 		dmar_readq, (!(val & DMA_CCMD_ICC)), val);
773 
774 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
775 }
776 
__iommu_flush_iotlb(struct intel_iommu * iommu,u16 did,u64 addr,unsigned int size_order,u64 type)777 void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
778 			 unsigned int size_order, u64 type)
779 {
780 	int tlb_offset = ecap_iotlb_offset(iommu->ecap);
781 	u64 val = 0, val_iva = 0;
782 	unsigned long flag;
783 
784 	switch (type) {
785 	case DMA_TLB_GLOBAL_FLUSH:
786 		/* global flush doesn't need set IVA_REG */
787 		val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
788 		break;
789 	case DMA_TLB_DSI_FLUSH:
790 		val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
791 		break;
792 	case DMA_TLB_PSI_FLUSH:
793 		val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
794 		/* IH bit is passed in as part of address */
795 		val_iva = size_order | addr;
796 		break;
797 	default:
798 		pr_warn("%s: Unexpected iotlb invalidation type 0x%llx\n",
799 			iommu->name, type);
800 		return;
801 	}
802 
803 	if (cap_write_drain(iommu->cap))
804 		val |= DMA_TLB_WRITE_DRAIN;
805 
806 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
807 	/* Note: Only uses first TLB reg currently */
808 	if (val_iva)
809 		dmar_writeq(iommu->reg + tlb_offset, val_iva);
810 	dmar_writeq(iommu->reg + tlb_offset + 8, val);
811 
812 	/* Make sure hardware complete it */
813 	IOMMU_WAIT_OP(iommu, tlb_offset + 8,
814 		dmar_readq, (!(val & DMA_TLB_IVT)), val);
815 
816 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
817 
818 	/* check IOTLB invalidation granularity */
819 	if (DMA_TLB_IAIG(val) == 0)
820 		pr_err("Flush IOTLB failed\n");
821 	if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
822 		pr_debug("TLB flush request %Lx, actual %Lx\n",
823 			(unsigned long long)DMA_TLB_IIRG(type),
824 			(unsigned long long)DMA_TLB_IAIG(val));
825 }
826 
827 static struct device_domain_info *
domain_lookup_dev_info(struct dmar_domain * domain,struct intel_iommu * iommu,u8 bus,u8 devfn)828 domain_lookup_dev_info(struct dmar_domain *domain,
829 		       struct intel_iommu *iommu, u8 bus, u8 devfn)
830 {
831 	struct device_domain_info *info;
832 	unsigned long flags;
833 
834 	spin_lock_irqsave(&domain->lock, flags);
835 	list_for_each_entry(info, &domain->devices, link) {
836 		if (info->iommu == iommu && info->bus == bus &&
837 		    info->devfn == devfn) {
838 			spin_unlock_irqrestore(&domain->lock, flags);
839 			return info;
840 		}
841 	}
842 	spin_unlock_irqrestore(&domain->lock, flags);
843 
844 	return NULL;
845 }
846 
847 /*
848  * The extra devTLB flush quirk impacts those QAT devices with PCI device
849  * IDs ranging from 0x4940 to 0x4943. It is exempted from risky_device()
850  * check because it applies only to the built-in QAT devices and it doesn't
851  * grant additional privileges.
852  */
853 #define BUGGY_QAT_DEVID_MASK 0x4940
dev_needs_extra_dtlb_flush(struct pci_dev * pdev)854 static bool dev_needs_extra_dtlb_flush(struct pci_dev *pdev)
855 {
856 	if (pdev->vendor != PCI_VENDOR_ID_INTEL)
857 		return false;
858 
859 	if ((pdev->device & 0xfffc) != BUGGY_QAT_DEVID_MASK)
860 		return false;
861 
862 	return true;
863 }
864 
iommu_enable_pci_ats(struct device_domain_info * info)865 static void iommu_enable_pci_ats(struct device_domain_info *info)
866 {
867 	struct pci_dev *pdev;
868 
869 	if (!info->ats_supported)
870 		return;
871 
872 	pdev = to_pci_dev(info->dev);
873 	if (!pci_ats_page_aligned(pdev))
874 		return;
875 
876 	if (!pci_enable_ats(pdev, VTD_PAGE_SHIFT))
877 		info->ats_enabled = 1;
878 }
879 
iommu_disable_pci_ats(struct device_domain_info * info)880 static void iommu_disable_pci_ats(struct device_domain_info *info)
881 {
882 	if (!info->ats_enabled)
883 		return;
884 
885 	pci_disable_ats(to_pci_dev(info->dev));
886 	info->ats_enabled = 0;
887 }
888 
iommu_enable_pci_pri(struct device_domain_info * info)889 static void iommu_enable_pci_pri(struct device_domain_info *info)
890 {
891 	struct pci_dev *pdev;
892 
893 	if (!info->ats_enabled || !info->pri_supported)
894 		return;
895 
896 	pdev = to_pci_dev(info->dev);
897 	/* PASID is required in PRG Response Message. */
898 	if (info->pasid_enabled && !pci_prg_resp_pasid_required(pdev))
899 		return;
900 
901 	if (pci_reset_pri(pdev))
902 		return;
903 
904 	if (!pci_enable_pri(pdev, PRQ_DEPTH))
905 		info->pri_enabled = 1;
906 }
907 
iommu_disable_pci_pri(struct device_domain_info * info)908 static void iommu_disable_pci_pri(struct device_domain_info *info)
909 {
910 	if (!info->pri_enabled)
911 		return;
912 
913 	if (WARN_ON(info->iopf_refcount))
914 		iopf_queue_remove_device(info->iommu->iopf_queue, info->dev);
915 
916 	pci_disable_pri(to_pci_dev(info->dev));
917 	info->pri_enabled = 0;
918 }
919 
intel_flush_iotlb_all(struct iommu_domain * domain)920 static void intel_flush_iotlb_all(struct iommu_domain *domain)
921 {
922 	cache_tag_flush_all(to_dmar_domain(domain));
923 }
924 
iommu_disable_protect_mem_regions(struct intel_iommu * iommu)925 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
926 {
927 	u32 pmen;
928 	unsigned long flags;
929 
930 	if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
931 		return;
932 
933 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
934 	pmen = readl(iommu->reg + DMAR_PMEN_REG);
935 	pmen &= ~DMA_PMEN_EPM;
936 	writel(pmen, iommu->reg + DMAR_PMEN_REG);
937 
938 	/* wait for the protected region status bit to clear */
939 	IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
940 		readl, !(pmen & DMA_PMEN_PRS), pmen);
941 
942 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
943 }
944 
iommu_enable_translation(struct intel_iommu * iommu)945 static void iommu_enable_translation(struct intel_iommu *iommu)
946 {
947 	u32 sts;
948 	unsigned long flags;
949 
950 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
951 	iommu->gcmd |= DMA_GCMD_TE;
952 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
953 
954 	/* Make sure hardware complete it */
955 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
956 		      readl, (sts & DMA_GSTS_TES), sts);
957 
958 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
959 }
960 
iommu_disable_translation(struct intel_iommu * iommu)961 static void iommu_disable_translation(struct intel_iommu *iommu)
962 {
963 	u32 sts;
964 	unsigned long flag;
965 
966 	if (iommu_skip_te_disable && iommu->drhd->gfx_dedicated &&
967 	    (cap_read_drain(iommu->cap) || cap_write_drain(iommu->cap)))
968 		return;
969 
970 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
971 	iommu->gcmd &= ~DMA_GCMD_TE;
972 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
973 
974 	/* Make sure hardware complete it */
975 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
976 		      readl, (!(sts & DMA_GSTS_TES)), sts);
977 
978 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
979 }
980 
disable_dmar_iommu(struct intel_iommu * iommu)981 static void disable_dmar_iommu(struct intel_iommu *iommu)
982 {
983 	/*
984 	 * All iommu domains must have been detached from the devices,
985 	 * hence there should be no domain IDs in use.
986 	 */
987 	if (WARN_ON(!ida_is_empty(&iommu->domain_ida)))
988 		return;
989 
990 	if (iommu->gcmd & DMA_GCMD_TE)
991 		iommu_disable_translation(iommu);
992 }
993 
free_dmar_iommu(struct intel_iommu * iommu)994 static void free_dmar_iommu(struct intel_iommu *iommu)
995 {
996 	if (iommu->copied_tables) {
997 		bitmap_free(iommu->copied_tables);
998 		iommu->copied_tables = NULL;
999 	}
1000 
1001 	/* free context mapping */
1002 	free_context_table(iommu);
1003 
1004 	if (ecap_prs(iommu->ecap))
1005 		intel_iommu_finish_prq(iommu);
1006 }
1007 
1008 /*
1009  * Check and return whether first level is used by default for
1010  * DMA translation.
1011  */
first_level_by_default(struct intel_iommu * iommu)1012 static bool first_level_by_default(struct intel_iommu *iommu)
1013 {
1014 	/* Only SL is available in legacy mode */
1015 	if (!sm_supported(iommu))
1016 		return false;
1017 
1018 	/* Only level (either FL or SL) is available, just use it */
1019 	if (ecap_flts(iommu->ecap) ^ ecap_slts(iommu->ecap))
1020 		return ecap_flts(iommu->ecap);
1021 
1022 	return true;
1023 }
1024 
domain_attach_iommu(struct dmar_domain * domain,struct intel_iommu * iommu)1025 int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
1026 {
1027 	struct iommu_domain_info *info, *curr;
1028 	int num, ret = -ENOSPC;
1029 
1030 	if (domain->domain.type == IOMMU_DOMAIN_SVA)
1031 		return 0;
1032 
1033 	info = kzalloc(sizeof(*info), GFP_KERNEL);
1034 	if (!info)
1035 		return -ENOMEM;
1036 
1037 	guard(mutex)(&iommu->did_lock);
1038 	curr = xa_load(&domain->iommu_array, iommu->seq_id);
1039 	if (curr) {
1040 		curr->refcnt++;
1041 		kfree(info);
1042 		return 0;
1043 	}
1044 
1045 	num = ida_alloc_range(&iommu->domain_ida, IDA_START_DID,
1046 			      cap_ndoms(iommu->cap) - 1, GFP_KERNEL);
1047 	if (num < 0) {
1048 		pr_err("%s: No free domain ids\n", iommu->name);
1049 		goto err_unlock;
1050 	}
1051 
1052 	info->refcnt	= 1;
1053 	info->did	= num;
1054 	info->iommu	= iommu;
1055 	curr = xa_cmpxchg(&domain->iommu_array, iommu->seq_id,
1056 			  NULL, info, GFP_KERNEL);
1057 	if (curr) {
1058 		ret = xa_err(curr) ? : -EBUSY;
1059 		goto err_clear;
1060 	}
1061 
1062 	return 0;
1063 
1064 err_clear:
1065 	ida_free(&iommu->domain_ida, info->did);
1066 err_unlock:
1067 	kfree(info);
1068 	return ret;
1069 }
1070 
domain_detach_iommu(struct dmar_domain * domain,struct intel_iommu * iommu)1071 void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
1072 {
1073 	struct iommu_domain_info *info;
1074 
1075 	if (domain->domain.type == IOMMU_DOMAIN_SVA)
1076 		return;
1077 
1078 	guard(mutex)(&iommu->did_lock);
1079 	info = xa_load(&domain->iommu_array, iommu->seq_id);
1080 	if (--info->refcnt == 0) {
1081 		ida_free(&iommu->domain_ida, info->did);
1082 		xa_erase(&domain->iommu_array, iommu->seq_id);
1083 		kfree(info);
1084 	}
1085 }
1086 
1087 /*
1088  * For kdump cases, old valid entries may be cached due to the
1089  * in-flight DMA and copied pgtable, but there is no unmapping
1090  * behaviour for them, thus we need an explicit cache flush for
1091  * the newly-mapped device. For kdump, at this point, the device
1092  * is supposed to finish reset at its driver probe stage, so no
1093  * in-flight DMA will exist, and we don't need to worry anymore
1094  * hereafter.
1095  */
copied_context_tear_down(struct intel_iommu * iommu,struct context_entry * context,u8 bus,u8 devfn)1096 static void copied_context_tear_down(struct intel_iommu *iommu,
1097 				     struct context_entry *context,
1098 				     u8 bus, u8 devfn)
1099 {
1100 	u16 did_old;
1101 
1102 	if (!context_copied(iommu, bus, devfn))
1103 		return;
1104 
1105 	assert_spin_locked(&iommu->lock);
1106 
1107 	did_old = context_domain_id(context);
1108 	context_clear_entry(context);
1109 
1110 	if (did_old < cap_ndoms(iommu->cap)) {
1111 		iommu->flush.flush_context(iommu, did_old,
1112 					   PCI_DEVID(bus, devfn),
1113 					   DMA_CCMD_MASK_NOBIT,
1114 					   DMA_CCMD_DEVICE_INVL);
1115 		iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
1116 					 DMA_TLB_DSI_FLUSH);
1117 	}
1118 
1119 	clear_context_copied(iommu, bus, devfn);
1120 }
1121 
1122 /*
1123  * It's a non-present to present mapping. If hardware doesn't cache
1124  * non-present entry we only need to flush the write-buffer. If the
1125  * _does_ cache non-present entries, then it does so in the special
1126  * domain #0, which we have to flush:
1127  */
context_present_cache_flush(struct intel_iommu * iommu,u16 did,u8 bus,u8 devfn)1128 static void context_present_cache_flush(struct intel_iommu *iommu, u16 did,
1129 					u8 bus, u8 devfn)
1130 {
1131 	if (cap_caching_mode(iommu->cap)) {
1132 		iommu->flush.flush_context(iommu, 0,
1133 					   PCI_DEVID(bus, devfn),
1134 					   DMA_CCMD_MASK_NOBIT,
1135 					   DMA_CCMD_DEVICE_INVL);
1136 		iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
1137 	} else {
1138 		iommu_flush_write_buffer(iommu);
1139 	}
1140 }
1141 
domain_context_mapping_one(struct dmar_domain * domain,struct intel_iommu * iommu,u8 bus,u8 devfn)1142 static int domain_context_mapping_one(struct dmar_domain *domain,
1143 				      struct intel_iommu *iommu,
1144 				      u8 bus, u8 devfn)
1145 {
1146 	struct device_domain_info *info =
1147 			domain_lookup_dev_info(domain, iommu, bus, devfn);
1148 	u16 did = domain_id_iommu(domain, iommu);
1149 	int translation = CONTEXT_TT_MULTI_LEVEL;
1150 	struct pt_iommu_vtdss_hw_info pt_info;
1151 	struct context_entry *context;
1152 	int ret;
1153 
1154 	if (WARN_ON(!intel_domain_is_ss_paging(domain)))
1155 		return -EINVAL;
1156 
1157 	pt_iommu_vtdss_hw_info(&domain->sspt, &pt_info);
1158 
1159 	pr_debug("Set context mapping for %02x:%02x.%d\n",
1160 		bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1161 
1162 	spin_lock(&iommu->lock);
1163 	ret = -ENOMEM;
1164 	context = iommu_context_addr(iommu, bus, devfn, 1);
1165 	if (!context)
1166 		goto out_unlock;
1167 
1168 	ret = 0;
1169 	if (context_present(context) && !context_copied(iommu, bus, devfn))
1170 		goto out_unlock;
1171 
1172 	copied_context_tear_down(iommu, context, bus, devfn);
1173 	context_clear_entry(context);
1174 	context_set_domain_id(context, did);
1175 
1176 	if (info && info->ats_supported)
1177 		translation = CONTEXT_TT_DEV_IOTLB;
1178 	else
1179 		translation = CONTEXT_TT_MULTI_LEVEL;
1180 
1181 	context_set_address_root(context, pt_info.ssptptr);
1182 	context_set_address_width(context, pt_info.aw);
1183 	context_set_translation_type(context, translation);
1184 	context_set_fault_enable(context);
1185 	context_set_present(context);
1186 	if (!ecap_coherent(iommu->ecap))
1187 		clflush_cache_range(context, sizeof(*context));
1188 	context_present_cache_flush(iommu, did, bus, devfn);
1189 	ret = 0;
1190 
1191 out_unlock:
1192 	spin_unlock(&iommu->lock);
1193 
1194 	return ret;
1195 }
1196 
domain_context_mapping_cb(struct pci_dev * pdev,u16 alias,void * opaque)1197 static int domain_context_mapping_cb(struct pci_dev *pdev,
1198 				     u16 alias, void *opaque)
1199 {
1200 	struct device_domain_info *info = dev_iommu_priv_get(&pdev->dev);
1201 	struct intel_iommu *iommu = info->iommu;
1202 	struct dmar_domain *domain = opaque;
1203 
1204 	return domain_context_mapping_one(domain, iommu,
1205 					  PCI_BUS_NUM(alias), alias & 0xff);
1206 }
1207 
1208 static int
domain_context_mapping(struct dmar_domain * domain,struct device * dev)1209 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
1210 {
1211 	struct device_domain_info *info = dev_iommu_priv_get(dev);
1212 	struct intel_iommu *iommu = info->iommu;
1213 	u8 bus = info->bus, devfn = info->devfn;
1214 	int ret;
1215 
1216 	if (!dev_is_pci(dev))
1217 		return domain_context_mapping_one(domain, iommu, bus, devfn);
1218 
1219 	ret = pci_for_each_dma_alias(to_pci_dev(dev),
1220 				     domain_context_mapping_cb, domain);
1221 	if (ret)
1222 		return ret;
1223 
1224 	iommu_enable_pci_ats(info);
1225 
1226 	return 0;
1227 }
1228 
domain_context_clear_one(struct device_domain_info * info,u8 bus,u8 devfn)1229 static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 devfn)
1230 {
1231 	struct intel_iommu *iommu = info->iommu;
1232 	struct context_entry *context;
1233 	u16 did;
1234 
1235 	spin_lock(&iommu->lock);
1236 	context = iommu_context_addr(iommu, bus, devfn, 0);
1237 	if (!context) {
1238 		spin_unlock(&iommu->lock);
1239 		return;
1240 	}
1241 
1242 	did = context_domain_id(context);
1243 	context_clear_entry(context);
1244 	__iommu_flush_cache(iommu, context, sizeof(*context));
1245 	spin_unlock(&iommu->lock);
1246 	intel_context_flush_no_pasid(info, context, did);
1247 }
1248 
__domain_setup_first_level(struct intel_iommu * iommu,struct device * dev,ioasid_t pasid,u16 did,phys_addr_t fsptptr,int flags,struct iommu_domain * old)1249 int __domain_setup_first_level(struct intel_iommu *iommu, struct device *dev,
1250 			       ioasid_t pasid, u16 did, phys_addr_t fsptptr,
1251 			       int flags, struct iommu_domain *old)
1252 {
1253 	if (!old)
1254 		return intel_pasid_setup_first_level(iommu, dev, fsptptr, pasid,
1255 						     did, flags);
1256 	return intel_pasid_replace_first_level(iommu, dev, fsptptr, pasid, did,
1257 					       iommu_domain_did(old, iommu),
1258 					       flags);
1259 }
1260 
domain_setup_second_level(struct intel_iommu * iommu,struct dmar_domain * domain,struct device * dev,ioasid_t pasid,struct iommu_domain * old)1261 static int domain_setup_second_level(struct intel_iommu *iommu,
1262 				     struct dmar_domain *domain,
1263 				     struct device *dev, ioasid_t pasid,
1264 				     struct iommu_domain *old)
1265 {
1266 	if (!old)
1267 		return intel_pasid_setup_second_level(iommu, domain,
1268 						      dev, pasid);
1269 	return intel_pasid_replace_second_level(iommu, domain, dev,
1270 						iommu_domain_did(old, iommu),
1271 						pasid);
1272 }
1273 
domain_setup_passthrough(struct intel_iommu * iommu,struct device * dev,ioasid_t pasid,struct iommu_domain * old)1274 static int domain_setup_passthrough(struct intel_iommu *iommu,
1275 				    struct device *dev, ioasid_t pasid,
1276 				    struct iommu_domain *old)
1277 {
1278 	if (!old)
1279 		return intel_pasid_setup_pass_through(iommu, dev, pasid);
1280 	return intel_pasid_replace_pass_through(iommu, dev,
1281 						iommu_domain_did(old, iommu),
1282 						pasid);
1283 }
1284 
domain_setup_first_level(struct intel_iommu * iommu,struct dmar_domain * domain,struct device * dev,u32 pasid,struct iommu_domain * old)1285 static int domain_setup_first_level(struct intel_iommu *iommu,
1286 				    struct dmar_domain *domain,
1287 				    struct device *dev,
1288 				    u32 pasid, struct iommu_domain *old)
1289 {
1290 	struct pt_iommu_x86_64_hw_info pt_info;
1291 	unsigned int flags = 0;
1292 
1293 	pt_iommu_x86_64_hw_info(&domain->fspt, &pt_info);
1294 	if (WARN_ON(pt_info.levels != 4 && pt_info.levels != 5))
1295 		return -EINVAL;
1296 
1297 	if (pt_info.levels == 5)
1298 		flags |= PASID_FLAG_FL5LP;
1299 
1300 	if (domain->force_snooping)
1301 		flags |= PASID_FLAG_PAGE_SNOOP;
1302 
1303 	if (!(domain->fspt.x86_64_pt.common.features &
1304 	      BIT(PT_FEAT_DMA_INCOHERENT)))
1305 		flags |= PASID_FLAG_PWSNP;
1306 
1307 	return __domain_setup_first_level(iommu, dev, pasid,
1308 					  domain_id_iommu(domain, iommu),
1309 					  pt_info.gcr3_pt, flags, old);
1310 }
1311 
dmar_domain_attach_device(struct dmar_domain * domain,struct device * dev)1312 static int dmar_domain_attach_device(struct dmar_domain *domain,
1313 				     struct device *dev)
1314 {
1315 	struct device_domain_info *info = dev_iommu_priv_get(dev);
1316 	struct intel_iommu *iommu = info->iommu;
1317 	unsigned long flags;
1318 	int ret;
1319 
1320 	ret = domain_attach_iommu(domain, iommu);
1321 	if (ret)
1322 		return ret;
1323 
1324 	info->domain = domain;
1325 	info->domain_attached = true;
1326 	spin_lock_irqsave(&domain->lock, flags);
1327 	list_add(&info->link, &domain->devices);
1328 	spin_unlock_irqrestore(&domain->lock, flags);
1329 
1330 	if (dev_is_real_dma_subdevice(dev))
1331 		return 0;
1332 
1333 	if (!sm_supported(iommu))
1334 		ret = domain_context_mapping(domain, dev);
1335 	else if (intel_domain_is_fs_paging(domain))
1336 		ret = domain_setup_first_level(iommu, domain, dev,
1337 					       IOMMU_NO_PASID, NULL);
1338 	else if (intel_domain_is_ss_paging(domain))
1339 		ret = domain_setup_second_level(iommu, domain, dev,
1340 						IOMMU_NO_PASID, NULL);
1341 	else if (WARN_ON(true))
1342 		ret = -EINVAL;
1343 
1344 	if (ret)
1345 		goto out_block_translation;
1346 
1347 	ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID);
1348 	if (ret)
1349 		goto out_block_translation;
1350 
1351 	return 0;
1352 
1353 out_block_translation:
1354 	device_block_translation(dev);
1355 	return ret;
1356 }
1357 
1358 /**
1359  * device_rmrr_is_relaxable - Test whether the RMRR of this device
1360  * is relaxable (ie. is allowed to be not enforced under some conditions)
1361  * @dev: device handle
1362  *
1363  * We assume that PCI USB devices with RMRRs have them largely
1364  * for historical reasons and that the RMRR space is not actively used post
1365  * boot.  This exclusion may change if vendors begin to abuse it.
1366  *
1367  * The same exception is made for graphics devices, with the requirement that
1368  * any use of the RMRR regions will be torn down before assigning the device
1369  * to a guest.
1370  *
1371  * Return: true if the RMRR is relaxable, false otherwise
1372  */
device_rmrr_is_relaxable(struct device * dev)1373 static bool device_rmrr_is_relaxable(struct device *dev)
1374 {
1375 	struct pci_dev *pdev;
1376 
1377 	if (!dev_is_pci(dev))
1378 		return false;
1379 
1380 	pdev = to_pci_dev(dev);
1381 	if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
1382 		return true;
1383 	else
1384 		return false;
1385 }
1386 
device_def_domain_type(struct device * dev)1387 static int device_def_domain_type(struct device *dev)
1388 {
1389 	struct device_domain_info *info = dev_iommu_priv_get(dev);
1390 	struct intel_iommu *iommu = info->iommu;
1391 
1392 	/*
1393 	 * Hardware does not support the passthrough translation mode.
1394 	 * Always use a dynamaic mapping domain.
1395 	 */
1396 	if (!ecap_pass_through(iommu->ecap))
1397 		return IOMMU_DOMAIN_DMA;
1398 
1399 	if (dev_is_pci(dev)) {
1400 		struct pci_dev *pdev = to_pci_dev(dev);
1401 
1402 		if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
1403 			return IOMMU_DOMAIN_IDENTITY;
1404 	}
1405 
1406 	return 0;
1407 }
1408 
intel_iommu_init_qi(struct intel_iommu * iommu)1409 static void intel_iommu_init_qi(struct intel_iommu *iommu)
1410 {
1411 	/*
1412 	 * Start from the sane iommu hardware state.
1413 	 * If the queued invalidation is already initialized by us
1414 	 * (for example, while enabling interrupt-remapping) then
1415 	 * we got the things already rolling from a sane state.
1416 	 */
1417 	if (!iommu->qi) {
1418 		/*
1419 		 * Clear any previous faults.
1420 		 */
1421 		dmar_fault(-1, iommu);
1422 		/*
1423 		 * Disable queued invalidation if supported and already enabled
1424 		 * before OS handover.
1425 		 */
1426 		dmar_disable_qi(iommu);
1427 	}
1428 
1429 	if (dmar_enable_qi(iommu)) {
1430 		/*
1431 		 * Queued Invalidate not enabled, use Register Based Invalidate
1432 		 */
1433 		iommu->flush.flush_context = __iommu_flush_context;
1434 		iommu->flush.flush_iotlb = __iommu_flush_iotlb;
1435 		pr_info("%s: Using Register based invalidation\n",
1436 			iommu->name);
1437 	} else {
1438 		iommu->flush.flush_context = qi_flush_context;
1439 		iommu->flush.flush_iotlb = qi_flush_iotlb;
1440 		pr_info("%s: Using Queued invalidation\n", iommu->name);
1441 	}
1442 }
1443 
copy_context_table(struct intel_iommu * iommu,struct root_entry * old_re,struct context_entry ** tbl,int bus,bool ext)1444 static int copy_context_table(struct intel_iommu *iommu,
1445 			      struct root_entry *old_re,
1446 			      struct context_entry **tbl,
1447 			      int bus, bool ext)
1448 {
1449 	int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
1450 	struct context_entry *new_ce = NULL, ce;
1451 	struct context_entry *old_ce = NULL;
1452 	struct root_entry re;
1453 	phys_addr_t old_ce_phys;
1454 
1455 	tbl_idx = ext ? bus * 2 : bus;
1456 	memcpy(&re, old_re, sizeof(re));
1457 
1458 	for (devfn = 0; devfn < 256; devfn++) {
1459 		/* First calculate the correct index */
1460 		idx = (ext ? devfn * 2 : devfn) % 256;
1461 
1462 		if (idx == 0) {
1463 			/* First save what we may have and clean up */
1464 			if (new_ce) {
1465 				tbl[tbl_idx] = new_ce;
1466 				__iommu_flush_cache(iommu, new_ce,
1467 						    VTD_PAGE_SIZE);
1468 				pos = 1;
1469 			}
1470 
1471 			if (old_ce)
1472 				memunmap(old_ce);
1473 
1474 			ret = 0;
1475 			if (devfn < 0x80)
1476 				old_ce_phys = root_entry_lctp(&re);
1477 			else
1478 				old_ce_phys = root_entry_uctp(&re);
1479 
1480 			if (!old_ce_phys) {
1481 				if (ext && devfn == 0) {
1482 					/* No LCTP, try UCTP */
1483 					devfn = 0x7f;
1484 					continue;
1485 				} else {
1486 					goto out;
1487 				}
1488 			}
1489 
1490 			ret = -ENOMEM;
1491 			old_ce = memremap(old_ce_phys, PAGE_SIZE,
1492 					MEMREMAP_WB);
1493 			if (!old_ce)
1494 				goto out;
1495 
1496 			new_ce = iommu_alloc_pages_node_sz(iommu->node,
1497 							   GFP_KERNEL, SZ_4K);
1498 			if (!new_ce)
1499 				goto out_unmap;
1500 
1501 			ret = 0;
1502 		}
1503 
1504 		/* Now copy the context entry */
1505 		memcpy(&ce, old_ce + idx, sizeof(ce));
1506 
1507 		if (!context_present(&ce))
1508 			continue;
1509 
1510 		did = context_domain_id(&ce);
1511 		if (did >= 0 && did < cap_ndoms(iommu->cap))
1512 			ida_alloc_range(&iommu->domain_ida, did, did, GFP_KERNEL);
1513 
1514 		set_context_copied(iommu, bus, devfn);
1515 		new_ce[idx] = ce;
1516 	}
1517 
1518 	tbl[tbl_idx + pos] = new_ce;
1519 
1520 	__iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
1521 
1522 out_unmap:
1523 	memunmap(old_ce);
1524 
1525 out:
1526 	return ret;
1527 }
1528 
copy_translation_tables(struct intel_iommu * iommu)1529 static int copy_translation_tables(struct intel_iommu *iommu)
1530 {
1531 	struct context_entry **ctxt_tbls;
1532 	struct root_entry *old_rt;
1533 	phys_addr_t old_rt_phys;
1534 	int ctxt_table_entries;
1535 	u64 rtaddr_reg;
1536 	int bus, ret;
1537 	bool new_ext, ext;
1538 
1539 	rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
1540 	ext        = !!(rtaddr_reg & DMA_RTADDR_SMT);
1541 	new_ext    = !!sm_supported(iommu);
1542 
1543 	/*
1544 	 * The RTT bit can only be changed when translation is disabled,
1545 	 * but disabling translation means to open a window for data
1546 	 * corruption. So bail out and don't copy anything if we would
1547 	 * have to change the bit.
1548 	 */
1549 	if (new_ext != ext)
1550 		return -EINVAL;
1551 
1552 	iommu->copied_tables = bitmap_zalloc(BIT_ULL(16), GFP_KERNEL);
1553 	if (!iommu->copied_tables)
1554 		return -ENOMEM;
1555 
1556 	old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
1557 	if (!old_rt_phys)
1558 		return -EINVAL;
1559 
1560 	old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
1561 	if (!old_rt)
1562 		return -ENOMEM;
1563 
1564 	/* This is too big for the stack - allocate it from slab */
1565 	ctxt_table_entries = ext ? 512 : 256;
1566 	ret = -ENOMEM;
1567 	ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
1568 	if (!ctxt_tbls)
1569 		goto out_unmap;
1570 
1571 	for (bus = 0; bus < 256; bus++) {
1572 		ret = copy_context_table(iommu, &old_rt[bus],
1573 					 ctxt_tbls, bus, ext);
1574 		if (ret) {
1575 			pr_err("%s: Failed to copy context table for bus %d\n",
1576 				iommu->name, bus);
1577 			continue;
1578 		}
1579 	}
1580 
1581 	spin_lock(&iommu->lock);
1582 
1583 	/* Context tables are copied, now write them to the root_entry table */
1584 	for (bus = 0; bus < 256; bus++) {
1585 		int idx = ext ? bus * 2 : bus;
1586 		u64 val;
1587 
1588 		if (ctxt_tbls[idx]) {
1589 			val = virt_to_phys(ctxt_tbls[idx]) | 1;
1590 			iommu->root_entry[bus].lo = val;
1591 		}
1592 
1593 		if (!ext || !ctxt_tbls[idx + 1])
1594 			continue;
1595 
1596 		val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
1597 		iommu->root_entry[bus].hi = val;
1598 	}
1599 
1600 	spin_unlock(&iommu->lock);
1601 
1602 	kfree(ctxt_tbls);
1603 
1604 	__iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
1605 
1606 	ret = 0;
1607 
1608 out_unmap:
1609 	memunmap(old_rt);
1610 
1611 	return ret;
1612 }
1613 
init_dmars(void)1614 static int __init init_dmars(void)
1615 {
1616 	struct dmar_drhd_unit *drhd;
1617 	struct intel_iommu *iommu;
1618 	int ret;
1619 
1620 	for_each_iommu(iommu, drhd) {
1621 		if (drhd->ignored) {
1622 			iommu_disable_translation(iommu);
1623 			continue;
1624 		}
1625 
1626 		/*
1627 		 * Find the max pasid size of all IOMMU's in the system.
1628 		 * We need to ensure the system pasid table is no bigger
1629 		 * than the smallest supported.
1630 		 */
1631 		if (pasid_supported(iommu)) {
1632 			u32 temp = 2 << ecap_pss(iommu->ecap);
1633 
1634 			intel_pasid_max_id = min_t(u32, temp,
1635 						   intel_pasid_max_id);
1636 		}
1637 
1638 		intel_iommu_init_qi(iommu);
1639 		init_translation_status(iommu);
1640 
1641 		if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
1642 			iommu_disable_translation(iommu);
1643 			clear_translation_pre_enabled(iommu);
1644 			pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
1645 				iommu->name);
1646 		}
1647 
1648 		/*
1649 		 * TBD:
1650 		 * we could share the same root & context tables
1651 		 * among all IOMMU's. Need to Split it later.
1652 		 */
1653 		ret = iommu_alloc_root_entry(iommu);
1654 		if (ret)
1655 			goto free_iommu;
1656 
1657 		if (translation_pre_enabled(iommu)) {
1658 			pr_info("Translation already enabled - trying to copy translation structures\n");
1659 
1660 			ret = copy_translation_tables(iommu);
1661 			if (ret) {
1662 				/*
1663 				 * We found the IOMMU with translation
1664 				 * enabled - but failed to copy over the
1665 				 * old root-entry table. Try to proceed
1666 				 * by disabling translation now and
1667 				 * allocating a clean root-entry table.
1668 				 * This might cause DMAR faults, but
1669 				 * probably the dump will still succeed.
1670 				 */
1671 				pr_err("Failed to copy translation tables from previous kernel for %s\n",
1672 				       iommu->name);
1673 				iommu_disable_translation(iommu);
1674 				clear_translation_pre_enabled(iommu);
1675 			} else {
1676 				pr_info("Copied translation tables from previous kernel for %s\n",
1677 					iommu->name);
1678 			}
1679 		}
1680 
1681 		intel_svm_check(iommu);
1682 	}
1683 
1684 	/*
1685 	 * Now that qi is enabled on all iommus, set the root entry and flush
1686 	 * caches. This is required on some Intel X58 chipsets, otherwise the
1687 	 * flush_context function will loop forever and the boot hangs.
1688 	 */
1689 	for_each_active_iommu(iommu, drhd) {
1690 		iommu_flush_write_buffer(iommu);
1691 		iommu_set_root_entry(iommu);
1692 	}
1693 
1694 	check_tylersburg_isoch();
1695 
1696 	/*
1697 	 * for each drhd
1698 	 *   enable fault log
1699 	 *   global invalidate context cache
1700 	 *   global invalidate iotlb
1701 	 *   enable translation
1702 	 */
1703 	for_each_iommu(iommu, drhd) {
1704 		if (drhd->ignored) {
1705 			/*
1706 			 * we always have to disable PMRs or DMA may fail on
1707 			 * this device
1708 			 */
1709 			if (force_on)
1710 				iommu_disable_protect_mem_regions(iommu);
1711 			continue;
1712 		}
1713 
1714 		iommu_flush_write_buffer(iommu);
1715 
1716 		if (ecap_prs(iommu->ecap)) {
1717 			/*
1718 			 * Call dmar_alloc_hwirq() with dmar_global_lock held,
1719 			 * could cause possible lock race condition.
1720 			 */
1721 			up_write(&dmar_global_lock);
1722 			ret = intel_iommu_enable_prq(iommu);
1723 			down_write(&dmar_global_lock);
1724 			if (ret)
1725 				goto free_iommu;
1726 		}
1727 
1728 		ret = dmar_set_interrupt(iommu);
1729 		if (ret)
1730 			goto free_iommu;
1731 	}
1732 
1733 	return 0;
1734 
1735 free_iommu:
1736 	for_each_active_iommu(iommu, drhd) {
1737 		disable_dmar_iommu(iommu);
1738 		free_dmar_iommu(iommu);
1739 	}
1740 
1741 	return ret;
1742 }
1743 
init_no_remapping_devices(void)1744 static void __init init_no_remapping_devices(void)
1745 {
1746 	struct dmar_drhd_unit *drhd;
1747 	struct device *dev;
1748 	int i;
1749 
1750 	for_each_drhd_unit(drhd) {
1751 		if (!drhd->include_all) {
1752 			for_each_active_dev_scope(drhd->devices,
1753 						  drhd->devices_cnt, i, dev)
1754 				break;
1755 			/* ignore DMAR unit if no devices exist */
1756 			if (i == drhd->devices_cnt)
1757 				drhd->ignored = 1;
1758 		}
1759 	}
1760 
1761 	for_each_active_drhd_unit(drhd) {
1762 		if (drhd->include_all)
1763 			continue;
1764 
1765 		for_each_active_dev_scope(drhd->devices,
1766 					  drhd->devices_cnt, i, dev)
1767 			if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
1768 				break;
1769 		if (i < drhd->devices_cnt)
1770 			continue;
1771 
1772 		/* This IOMMU has *only* gfx devices. Either bypass it or
1773 		   set the gfx_mapped flag, as appropriate */
1774 		drhd->gfx_dedicated = 1;
1775 		if (disable_igfx_iommu)
1776 			drhd->ignored = 1;
1777 	}
1778 }
1779 
1780 #ifdef CONFIG_SUSPEND
init_iommu_hw(void)1781 static int init_iommu_hw(void)
1782 {
1783 	struct dmar_drhd_unit *drhd;
1784 	struct intel_iommu *iommu = NULL;
1785 	int ret;
1786 
1787 	for_each_active_iommu(iommu, drhd) {
1788 		if (iommu->qi) {
1789 			ret = dmar_reenable_qi(iommu);
1790 			if (ret)
1791 				return ret;
1792 		}
1793 	}
1794 
1795 	for_each_iommu(iommu, drhd) {
1796 		if (drhd->ignored) {
1797 			/*
1798 			 * we always have to disable PMRs or DMA may fail on
1799 			 * this device
1800 			 */
1801 			if (force_on)
1802 				iommu_disable_protect_mem_regions(iommu);
1803 			continue;
1804 		}
1805 
1806 		iommu_flush_write_buffer(iommu);
1807 		iommu_set_root_entry(iommu);
1808 		iommu_enable_translation(iommu);
1809 		iommu_disable_protect_mem_regions(iommu);
1810 	}
1811 
1812 	return 0;
1813 }
1814 
iommu_flush_all(void)1815 static void iommu_flush_all(void)
1816 {
1817 	struct dmar_drhd_unit *drhd;
1818 	struct intel_iommu *iommu;
1819 
1820 	for_each_active_iommu(iommu, drhd) {
1821 		iommu->flush.flush_context(iommu, 0, 0, 0,
1822 					   DMA_CCMD_GLOBAL_INVL);
1823 		iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1824 					 DMA_TLB_GLOBAL_FLUSH);
1825 	}
1826 }
1827 
iommu_suspend(void * data)1828 static int iommu_suspend(void *data)
1829 {
1830 	struct dmar_drhd_unit *drhd;
1831 	struct intel_iommu *iommu = NULL;
1832 	unsigned long flag;
1833 
1834 	iommu_flush_all();
1835 
1836 	for_each_active_iommu(iommu, drhd) {
1837 		iommu_disable_translation(iommu);
1838 
1839 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
1840 
1841 		iommu->iommu_state[SR_DMAR_FECTL_REG] =
1842 			readl(iommu->reg + DMAR_FECTL_REG);
1843 		iommu->iommu_state[SR_DMAR_FEDATA_REG] =
1844 			readl(iommu->reg + DMAR_FEDATA_REG);
1845 		iommu->iommu_state[SR_DMAR_FEADDR_REG] =
1846 			readl(iommu->reg + DMAR_FEADDR_REG);
1847 		iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
1848 			readl(iommu->reg + DMAR_FEUADDR_REG);
1849 
1850 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1851 	}
1852 	return 0;
1853 }
1854 
iommu_resume(void * data)1855 static void iommu_resume(void *data)
1856 {
1857 	struct dmar_drhd_unit *drhd;
1858 	struct intel_iommu *iommu = NULL;
1859 	unsigned long flag;
1860 
1861 	if (init_iommu_hw()) {
1862 		if (force_on)
1863 			panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
1864 		else
1865 			WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
1866 		return;
1867 	}
1868 
1869 	for_each_active_iommu(iommu, drhd) {
1870 
1871 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
1872 
1873 		writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
1874 			iommu->reg + DMAR_FECTL_REG);
1875 		writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
1876 			iommu->reg + DMAR_FEDATA_REG);
1877 		writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
1878 			iommu->reg + DMAR_FEADDR_REG);
1879 		writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
1880 			iommu->reg + DMAR_FEUADDR_REG);
1881 
1882 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1883 	}
1884 }
1885 
1886 static const struct syscore_ops iommu_syscore_ops = {
1887 	.resume		= iommu_resume,
1888 	.suspend	= iommu_suspend,
1889 };
1890 
1891 static struct syscore iommu_syscore = {
1892 	.ops = &iommu_syscore_ops,
1893 };
1894 
init_iommu_pm_ops(void)1895 static void __init init_iommu_pm_ops(void)
1896 {
1897 	register_syscore(&iommu_syscore);
1898 }
1899 
1900 #else
init_iommu_pm_ops(void)1901 static inline void init_iommu_pm_ops(void) {}
1902 #endif	/* CONFIG_PM */
1903 
rmrr_sanity_check(struct acpi_dmar_reserved_memory * rmrr)1904 static int __init rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
1905 {
1906 	if (!IS_ALIGNED(rmrr->base_address, PAGE_SIZE) ||
1907 	    !IS_ALIGNED(rmrr->end_address + 1, PAGE_SIZE) ||
1908 	    rmrr->end_address <= rmrr->base_address ||
1909 	    arch_rmrr_sanity_check(rmrr))
1910 		return -EINVAL;
1911 
1912 	return 0;
1913 }
1914 
dmar_parse_one_rmrr(struct acpi_dmar_header * header,void * arg)1915 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
1916 {
1917 	struct acpi_dmar_reserved_memory *rmrr;
1918 	struct dmar_rmrr_unit *rmrru;
1919 
1920 	rmrr = (struct acpi_dmar_reserved_memory *)header;
1921 	if (rmrr_sanity_check(rmrr)) {
1922 		pr_warn(FW_BUG
1923 			   "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n"
1924 			   "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
1925 			   rmrr->base_address, rmrr->end_address,
1926 			   dmi_get_system_info(DMI_BIOS_VENDOR),
1927 			   dmi_get_system_info(DMI_BIOS_VERSION),
1928 			   dmi_get_system_info(DMI_PRODUCT_VERSION));
1929 		add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
1930 	}
1931 
1932 	rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
1933 	if (!rmrru)
1934 		goto out;
1935 
1936 	rmrru->hdr = header;
1937 
1938 	rmrru->base_address = rmrr->base_address;
1939 	rmrru->end_address = rmrr->end_address;
1940 
1941 	rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
1942 				((void *)rmrr) + rmrr->header.length,
1943 				&rmrru->devices_cnt);
1944 	if (rmrru->devices_cnt && rmrru->devices == NULL)
1945 		goto free_rmrru;
1946 
1947 	list_add(&rmrru->list, &dmar_rmrr_units);
1948 
1949 	return 0;
1950 free_rmrru:
1951 	kfree(rmrru);
1952 out:
1953 	return -ENOMEM;
1954 }
1955 
dmar_find_atsr(struct acpi_dmar_atsr * atsr)1956 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
1957 {
1958 	struct dmar_atsr_unit *atsru;
1959 	struct acpi_dmar_atsr *tmp;
1960 
1961 	list_for_each_entry_rcu(atsru, &dmar_atsr_units, list,
1962 				dmar_rcu_check()) {
1963 		tmp = (struct acpi_dmar_atsr *)atsru->hdr;
1964 		if (atsr->segment != tmp->segment)
1965 			continue;
1966 		if (atsr->header.length != tmp->header.length)
1967 			continue;
1968 		if (memcmp(atsr, tmp, atsr->header.length) == 0)
1969 			return atsru;
1970 	}
1971 
1972 	return NULL;
1973 }
1974 
dmar_parse_one_atsr(struct acpi_dmar_header * hdr,void * arg)1975 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
1976 {
1977 	struct acpi_dmar_atsr *atsr;
1978 	struct dmar_atsr_unit *atsru;
1979 
1980 	if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
1981 		return 0;
1982 
1983 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
1984 	atsru = dmar_find_atsr(atsr);
1985 	if (atsru)
1986 		return 0;
1987 
1988 	atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
1989 	if (!atsru)
1990 		return -ENOMEM;
1991 
1992 	/*
1993 	 * If memory is allocated from slab by ACPI _DSM method, we need to
1994 	 * copy the memory content because the memory buffer will be freed
1995 	 * on return.
1996 	 */
1997 	atsru->hdr = (void *)(atsru + 1);
1998 	memcpy(atsru->hdr, hdr, hdr->length);
1999 	atsru->include_all = atsr->flags & 0x1;
2000 	if (!atsru->include_all) {
2001 		atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
2002 				(void *)atsr + atsr->header.length,
2003 				&atsru->devices_cnt);
2004 		if (atsru->devices_cnt && atsru->devices == NULL) {
2005 			kfree(atsru);
2006 			return -ENOMEM;
2007 		}
2008 	}
2009 
2010 	list_add_rcu(&atsru->list, &dmar_atsr_units);
2011 
2012 	return 0;
2013 }
2014 
intel_iommu_free_atsr(struct dmar_atsr_unit * atsru)2015 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
2016 {
2017 	dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
2018 	kfree(atsru);
2019 }
2020 
dmar_release_one_atsr(struct acpi_dmar_header * hdr,void * arg)2021 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
2022 {
2023 	struct acpi_dmar_atsr *atsr;
2024 	struct dmar_atsr_unit *atsru;
2025 
2026 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
2027 	atsru = dmar_find_atsr(atsr);
2028 	if (atsru) {
2029 		list_del_rcu(&atsru->list);
2030 		synchronize_rcu();
2031 		intel_iommu_free_atsr(atsru);
2032 	}
2033 
2034 	return 0;
2035 }
2036 
dmar_check_one_atsr(struct acpi_dmar_header * hdr,void * arg)2037 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
2038 {
2039 	int i;
2040 	struct device *dev;
2041 	struct acpi_dmar_atsr *atsr;
2042 	struct dmar_atsr_unit *atsru;
2043 
2044 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
2045 	atsru = dmar_find_atsr(atsr);
2046 	if (!atsru)
2047 		return 0;
2048 
2049 	if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
2050 		for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
2051 					  i, dev)
2052 			return -EBUSY;
2053 	}
2054 
2055 	return 0;
2056 }
2057 
dmar_find_satc(struct acpi_dmar_satc * satc)2058 static struct dmar_satc_unit *dmar_find_satc(struct acpi_dmar_satc *satc)
2059 {
2060 	struct dmar_satc_unit *satcu;
2061 	struct acpi_dmar_satc *tmp;
2062 
2063 	list_for_each_entry_rcu(satcu, &dmar_satc_units, list,
2064 				dmar_rcu_check()) {
2065 		tmp = (struct acpi_dmar_satc *)satcu->hdr;
2066 		if (satc->segment != tmp->segment)
2067 			continue;
2068 		if (satc->header.length != tmp->header.length)
2069 			continue;
2070 		if (memcmp(satc, tmp, satc->header.length) == 0)
2071 			return satcu;
2072 	}
2073 
2074 	return NULL;
2075 }
2076 
dmar_parse_one_satc(struct acpi_dmar_header * hdr,void * arg)2077 int dmar_parse_one_satc(struct acpi_dmar_header *hdr, void *arg)
2078 {
2079 	struct acpi_dmar_satc *satc;
2080 	struct dmar_satc_unit *satcu;
2081 
2082 	if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
2083 		return 0;
2084 
2085 	satc = container_of(hdr, struct acpi_dmar_satc, header);
2086 	satcu = dmar_find_satc(satc);
2087 	if (satcu)
2088 		return 0;
2089 
2090 	satcu = kzalloc(sizeof(*satcu) + hdr->length, GFP_KERNEL);
2091 	if (!satcu)
2092 		return -ENOMEM;
2093 
2094 	satcu->hdr = (void *)(satcu + 1);
2095 	memcpy(satcu->hdr, hdr, hdr->length);
2096 	satcu->atc_required = satc->flags & 0x1;
2097 	satcu->devices = dmar_alloc_dev_scope((void *)(satc + 1),
2098 					      (void *)satc + satc->header.length,
2099 					      &satcu->devices_cnt);
2100 	if (satcu->devices_cnt && !satcu->devices) {
2101 		kfree(satcu);
2102 		return -ENOMEM;
2103 	}
2104 	list_add_rcu(&satcu->list, &dmar_satc_units);
2105 
2106 	return 0;
2107 }
2108 
intel_iommu_add(struct dmar_drhd_unit * dmaru)2109 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
2110 {
2111 	struct intel_iommu *iommu = dmaru->iommu;
2112 	int ret;
2113 
2114 	/*
2115 	 * Disable translation if already enabled prior to OS handover.
2116 	 */
2117 	if (iommu->gcmd & DMA_GCMD_TE)
2118 		iommu_disable_translation(iommu);
2119 
2120 	ret = iommu_alloc_root_entry(iommu);
2121 	if (ret)
2122 		goto out;
2123 
2124 	intel_svm_check(iommu);
2125 
2126 	if (dmaru->ignored) {
2127 		/*
2128 		 * we always have to disable PMRs or DMA may fail on this device
2129 		 */
2130 		if (force_on)
2131 			iommu_disable_protect_mem_regions(iommu);
2132 		return 0;
2133 	}
2134 
2135 	intel_iommu_init_qi(iommu);
2136 	iommu_flush_write_buffer(iommu);
2137 
2138 	if (ecap_prs(iommu->ecap)) {
2139 		ret = intel_iommu_enable_prq(iommu);
2140 		if (ret)
2141 			goto disable_iommu;
2142 	}
2143 
2144 	ret = dmar_set_interrupt(iommu);
2145 	if (ret)
2146 		goto disable_iommu;
2147 
2148 	iommu_set_root_entry(iommu);
2149 	iommu_enable_translation(iommu);
2150 
2151 	iommu_disable_protect_mem_regions(iommu);
2152 	return 0;
2153 
2154 disable_iommu:
2155 	disable_dmar_iommu(iommu);
2156 out:
2157 	free_dmar_iommu(iommu);
2158 	return ret;
2159 }
2160 
dmar_iommu_hotplug(struct dmar_drhd_unit * dmaru,bool insert)2161 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
2162 {
2163 	int ret = 0;
2164 	struct intel_iommu *iommu = dmaru->iommu;
2165 
2166 	if (!intel_iommu_enabled)
2167 		return 0;
2168 	if (iommu == NULL)
2169 		return -EINVAL;
2170 
2171 	if (insert) {
2172 		ret = intel_iommu_add(dmaru);
2173 	} else {
2174 		disable_dmar_iommu(iommu);
2175 		free_dmar_iommu(iommu);
2176 	}
2177 
2178 	return ret;
2179 }
2180 
intel_iommu_free_dmars(void)2181 static void intel_iommu_free_dmars(void)
2182 {
2183 	struct dmar_rmrr_unit *rmrru, *rmrr_n;
2184 	struct dmar_atsr_unit *atsru, *atsr_n;
2185 	struct dmar_satc_unit *satcu, *satc_n;
2186 
2187 	list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
2188 		list_del(&rmrru->list);
2189 		dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
2190 		kfree(rmrru);
2191 	}
2192 
2193 	list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
2194 		list_del(&atsru->list);
2195 		intel_iommu_free_atsr(atsru);
2196 	}
2197 	list_for_each_entry_safe(satcu, satc_n, &dmar_satc_units, list) {
2198 		list_del(&satcu->list);
2199 		dmar_free_dev_scope(&satcu->devices, &satcu->devices_cnt);
2200 		kfree(satcu);
2201 	}
2202 }
2203 
dmar_find_matched_satc_unit(struct pci_dev * dev)2204 static struct dmar_satc_unit *dmar_find_matched_satc_unit(struct pci_dev *dev)
2205 {
2206 	struct dmar_satc_unit *satcu;
2207 	struct acpi_dmar_satc *satc;
2208 	struct device *tmp;
2209 	int i;
2210 
2211 	rcu_read_lock();
2212 
2213 	list_for_each_entry_rcu(satcu, &dmar_satc_units, list) {
2214 		satc = container_of(satcu->hdr, struct acpi_dmar_satc, header);
2215 		if (satc->segment != pci_domain_nr(dev->bus))
2216 			continue;
2217 		for_each_dev_scope(satcu->devices, satcu->devices_cnt, i, tmp)
2218 			if (to_pci_dev(tmp) == dev)
2219 				goto out;
2220 	}
2221 	satcu = NULL;
2222 out:
2223 	rcu_read_unlock();
2224 	return satcu;
2225 }
2226 
dmar_ats_supported(struct pci_dev * dev,struct intel_iommu * iommu)2227 static bool dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu)
2228 {
2229 	struct pci_dev *bridge = NULL;
2230 	struct dmar_atsr_unit *atsru;
2231 	struct dmar_satc_unit *satcu;
2232 	struct acpi_dmar_atsr *atsr;
2233 	bool supported = true;
2234 	struct pci_bus *bus;
2235 	struct device *tmp;
2236 	int i;
2237 
2238 	dev = pci_physfn(dev);
2239 	satcu = dmar_find_matched_satc_unit(dev);
2240 	if (satcu)
2241 		/*
2242 		 * This device supports ATS as it is in SATC table.
2243 		 * When IOMMU is in legacy mode, enabling ATS is done
2244 		 * automatically by HW for the device that requires
2245 		 * ATS, hence OS should not enable this device ATS
2246 		 * to avoid duplicated TLB invalidation.
2247 		 */
2248 		return !(satcu->atc_required && !sm_supported(iommu));
2249 
2250 	for (bus = dev->bus; bus; bus = bus->parent) {
2251 		bridge = bus->self;
2252 		/* If it's an integrated device, allow ATS */
2253 		if (!bridge)
2254 			return true;
2255 		/* Connected via non-PCIe: no ATS */
2256 		if (!pci_is_pcie(bridge) ||
2257 		    pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
2258 			return false;
2259 		/* If we found the root port, look it up in the ATSR */
2260 		if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
2261 			break;
2262 	}
2263 
2264 	rcu_read_lock();
2265 	list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
2266 		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
2267 		if (atsr->segment != pci_domain_nr(dev->bus))
2268 			continue;
2269 
2270 		for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
2271 			if (tmp == &bridge->dev)
2272 				goto out;
2273 
2274 		if (atsru->include_all)
2275 			goto out;
2276 	}
2277 	supported = false;
2278 out:
2279 	rcu_read_unlock();
2280 
2281 	return supported;
2282 }
2283 
dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info * info)2284 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
2285 {
2286 	int ret;
2287 	struct dmar_rmrr_unit *rmrru;
2288 	struct dmar_atsr_unit *atsru;
2289 	struct dmar_satc_unit *satcu;
2290 	struct acpi_dmar_atsr *atsr;
2291 	struct acpi_dmar_reserved_memory *rmrr;
2292 	struct acpi_dmar_satc *satc;
2293 
2294 	if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
2295 		return 0;
2296 
2297 	list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
2298 		rmrr = container_of(rmrru->hdr,
2299 				    struct acpi_dmar_reserved_memory, header);
2300 		if (info->event == BUS_NOTIFY_ADD_DEVICE) {
2301 			ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
2302 				((void *)rmrr) + rmrr->header.length,
2303 				rmrr->segment, rmrru->devices,
2304 				rmrru->devices_cnt);
2305 			if (ret < 0)
2306 				return ret;
2307 		} else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
2308 			dmar_remove_dev_scope(info, rmrr->segment,
2309 				rmrru->devices, rmrru->devices_cnt);
2310 		}
2311 	}
2312 
2313 	list_for_each_entry(atsru, &dmar_atsr_units, list) {
2314 		if (atsru->include_all)
2315 			continue;
2316 
2317 		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
2318 		if (info->event == BUS_NOTIFY_ADD_DEVICE) {
2319 			ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
2320 					(void *)atsr + atsr->header.length,
2321 					atsr->segment, atsru->devices,
2322 					atsru->devices_cnt);
2323 			if (ret > 0)
2324 				break;
2325 			else if (ret < 0)
2326 				return ret;
2327 		} else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
2328 			if (dmar_remove_dev_scope(info, atsr->segment,
2329 					atsru->devices, atsru->devices_cnt))
2330 				break;
2331 		}
2332 	}
2333 	list_for_each_entry(satcu, &dmar_satc_units, list) {
2334 		satc = container_of(satcu->hdr, struct acpi_dmar_satc, header);
2335 		if (info->event == BUS_NOTIFY_ADD_DEVICE) {
2336 			ret = dmar_insert_dev_scope(info, (void *)(satc + 1),
2337 					(void *)satc + satc->header.length,
2338 					satc->segment, satcu->devices,
2339 					satcu->devices_cnt);
2340 			if (ret > 0)
2341 				break;
2342 			else if (ret < 0)
2343 				return ret;
2344 		} else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
2345 			if (dmar_remove_dev_scope(info, satc->segment,
2346 					satcu->devices, satcu->devices_cnt))
2347 				break;
2348 		}
2349 	}
2350 
2351 	return 0;
2352 }
2353 
intel_disable_iommus(void)2354 static void intel_disable_iommus(void)
2355 {
2356 	struct intel_iommu *iommu = NULL;
2357 	struct dmar_drhd_unit *drhd;
2358 
2359 	for_each_iommu(iommu, drhd)
2360 		iommu_disable_translation(iommu);
2361 }
2362 
intel_iommu_shutdown(void)2363 void intel_iommu_shutdown(void)
2364 {
2365 	struct dmar_drhd_unit *drhd;
2366 	struct intel_iommu *iommu = NULL;
2367 
2368 	if (no_iommu || dmar_disabled)
2369 		return;
2370 
2371 	/*
2372 	 * All other CPUs were brought down, hotplug interrupts were disabled,
2373 	 * no lock and RCU checking needed anymore
2374 	 */
2375 	list_for_each_entry(drhd, &dmar_drhd_units, list) {
2376 		iommu = drhd->iommu;
2377 
2378 		/* Disable PMRs explicitly here. */
2379 		iommu_disable_protect_mem_regions(iommu);
2380 
2381 		/* Make sure the IOMMUs are switched off */
2382 		iommu_disable_translation(iommu);
2383 	}
2384 }
2385 
dev_to_intel_iommu(struct device * dev)2386 static struct intel_iommu *dev_to_intel_iommu(struct device *dev)
2387 {
2388 	struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
2389 
2390 	return container_of(iommu_dev, struct intel_iommu, iommu);
2391 }
2392 
version_show(struct device * dev,struct device_attribute * attr,char * buf)2393 static ssize_t version_show(struct device *dev,
2394 			    struct device_attribute *attr, char *buf)
2395 {
2396 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2397 	u32 ver = readl(iommu->reg + DMAR_VER_REG);
2398 	return sysfs_emit(buf, "%d:%d\n",
2399 			  DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
2400 }
2401 static DEVICE_ATTR_RO(version);
2402 
address_show(struct device * dev,struct device_attribute * attr,char * buf)2403 static ssize_t address_show(struct device *dev,
2404 			    struct device_attribute *attr, char *buf)
2405 {
2406 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2407 	return sysfs_emit(buf, "%llx\n", iommu->reg_phys);
2408 }
2409 static DEVICE_ATTR_RO(address);
2410 
cap_show(struct device * dev,struct device_attribute * attr,char * buf)2411 static ssize_t cap_show(struct device *dev,
2412 			struct device_attribute *attr, char *buf)
2413 {
2414 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2415 	return sysfs_emit(buf, "%llx\n", iommu->cap);
2416 }
2417 static DEVICE_ATTR_RO(cap);
2418 
ecap_show(struct device * dev,struct device_attribute * attr,char * buf)2419 static ssize_t ecap_show(struct device *dev,
2420 			 struct device_attribute *attr, char *buf)
2421 {
2422 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2423 	return sysfs_emit(buf, "%llx\n", iommu->ecap);
2424 }
2425 static DEVICE_ATTR_RO(ecap);
2426 
domains_supported_show(struct device * dev,struct device_attribute * attr,char * buf)2427 static ssize_t domains_supported_show(struct device *dev,
2428 				      struct device_attribute *attr, char *buf)
2429 {
2430 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2431 	return sysfs_emit(buf, "%ld\n", cap_ndoms(iommu->cap));
2432 }
2433 static DEVICE_ATTR_RO(domains_supported);
2434 
domains_used_show(struct device * dev,struct device_attribute * attr,char * buf)2435 static ssize_t domains_used_show(struct device *dev,
2436 				 struct device_attribute *attr, char *buf)
2437 {
2438 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
2439 	unsigned int count = 0;
2440 	int id;
2441 
2442 	for (id = 0; id < cap_ndoms(iommu->cap); id++)
2443 		if (ida_exists(&iommu->domain_ida, id))
2444 			count++;
2445 
2446 	return sysfs_emit(buf, "%d\n", count);
2447 }
2448 static DEVICE_ATTR_RO(domains_used);
2449 
2450 static struct attribute *intel_iommu_attrs[] = {
2451 	&dev_attr_version.attr,
2452 	&dev_attr_address.attr,
2453 	&dev_attr_cap.attr,
2454 	&dev_attr_ecap.attr,
2455 	&dev_attr_domains_supported.attr,
2456 	&dev_attr_domains_used.attr,
2457 	NULL,
2458 };
2459 
2460 static struct attribute_group intel_iommu_group = {
2461 	.name = "intel-iommu",
2462 	.attrs = intel_iommu_attrs,
2463 };
2464 
2465 const struct attribute_group *intel_iommu_groups[] = {
2466 	&intel_iommu_group,
2467 	NULL,
2468 };
2469 
has_external_pci(void)2470 static bool has_external_pci(void)
2471 {
2472 	struct pci_dev *pdev = NULL;
2473 
2474 	for_each_pci_dev(pdev)
2475 		if (pdev->external_facing) {
2476 			pci_dev_put(pdev);
2477 			return true;
2478 		}
2479 
2480 	return false;
2481 }
2482 
platform_optin_force_iommu(void)2483 static int __init platform_optin_force_iommu(void)
2484 {
2485 	if (!dmar_platform_optin() || no_platform_optin || !has_external_pci())
2486 		return 0;
2487 
2488 	if (no_iommu || dmar_disabled)
2489 		pr_info("Intel-IOMMU force enabled due to platform opt in\n");
2490 
2491 	/*
2492 	 * If Intel-IOMMU is disabled by default, we will apply identity
2493 	 * map for all devices except those marked as being untrusted.
2494 	 */
2495 	if (dmar_disabled)
2496 		iommu_set_default_passthrough(false);
2497 
2498 	dmar_disabled = 0;
2499 	no_iommu = 0;
2500 
2501 	return 1;
2502 }
2503 
probe_acpi_namespace_devices(void)2504 static int __init probe_acpi_namespace_devices(void)
2505 {
2506 	struct dmar_drhd_unit *drhd;
2507 	/* To avoid a -Wunused-but-set-variable warning. */
2508 	struct intel_iommu *iommu __maybe_unused;
2509 	struct device *dev;
2510 	int i, ret = 0;
2511 
2512 	for_each_active_iommu(iommu, drhd) {
2513 		for_each_active_dev_scope(drhd->devices,
2514 					  drhd->devices_cnt, i, dev) {
2515 			struct acpi_device_physical_node *pn;
2516 			struct acpi_device *adev;
2517 
2518 			if (dev->bus != &acpi_bus_type)
2519 				continue;
2520 
2521 			up_read(&dmar_global_lock);
2522 			adev = to_acpi_device(dev);
2523 			mutex_lock(&adev->physical_node_lock);
2524 			list_for_each_entry(pn,
2525 					    &adev->physical_node_list, node) {
2526 				ret = iommu_probe_device(pn->dev);
2527 				if (ret)
2528 					break;
2529 			}
2530 			mutex_unlock(&adev->physical_node_lock);
2531 			down_read(&dmar_global_lock);
2532 
2533 			if (ret)
2534 				return ret;
2535 		}
2536 	}
2537 
2538 	return 0;
2539 }
2540 
tboot_force_iommu(void)2541 static __init int tboot_force_iommu(void)
2542 {
2543 	if (!tboot_enabled())
2544 		return 0;
2545 
2546 	if (no_iommu || dmar_disabled)
2547 		pr_warn("Forcing Intel-IOMMU to enabled\n");
2548 
2549 	dmar_disabled = 0;
2550 	no_iommu = 0;
2551 
2552 	return 1;
2553 }
2554 
intel_iommu_init(void)2555 int __init intel_iommu_init(void)
2556 {
2557 	int ret = -ENODEV;
2558 	struct dmar_drhd_unit *drhd;
2559 	struct intel_iommu *iommu;
2560 
2561 	/*
2562 	 * Intel IOMMU is required for a TXT/tboot launch or platform
2563 	 * opt in, so enforce that.
2564 	 */
2565 	force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) ||
2566 		    platform_optin_force_iommu();
2567 
2568 	down_write(&dmar_global_lock);
2569 	if (dmar_table_init()) {
2570 		if (force_on)
2571 			panic("tboot: Failed to initialize DMAR table\n");
2572 		goto out_free_dmar;
2573 	}
2574 
2575 	if (dmar_dev_scope_init() < 0) {
2576 		if (force_on)
2577 			panic("tboot: Failed to initialize DMAR device scope\n");
2578 		goto out_free_dmar;
2579 	}
2580 
2581 	up_write(&dmar_global_lock);
2582 
2583 	/*
2584 	 * The bus notifier takes the dmar_global_lock, so lockdep will
2585 	 * complain later when we register it under the lock.
2586 	 */
2587 	dmar_register_bus_notifier();
2588 
2589 	down_write(&dmar_global_lock);
2590 
2591 	if (!no_iommu)
2592 		intel_iommu_debugfs_init();
2593 
2594 	if (no_iommu || dmar_disabled) {
2595 		/*
2596 		 * We exit the function here to ensure IOMMU's remapping and
2597 		 * mempool aren't setup, which means that the IOMMU's PMRs
2598 		 * won't be disabled via the call to init_dmars(). So disable
2599 		 * it explicitly here. The PMRs were setup by tboot prior to
2600 		 * calling SENTER, but the kernel is expected to reset/tear
2601 		 * down the PMRs.
2602 		 */
2603 		if (intel_iommu_tboot_noforce) {
2604 			for_each_iommu(iommu, drhd)
2605 				iommu_disable_protect_mem_regions(iommu);
2606 		}
2607 
2608 		/*
2609 		 * Make sure the IOMMUs are switched off, even when we
2610 		 * boot into a kexec kernel and the previous kernel left
2611 		 * them enabled
2612 		 */
2613 		intel_disable_iommus();
2614 		goto out_free_dmar;
2615 	}
2616 
2617 	if (list_empty(&dmar_rmrr_units))
2618 		pr_info("No RMRR found\n");
2619 
2620 	if (list_empty(&dmar_atsr_units))
2621 		pr_info("No ATSR found\n");
2622 
2623 	if (list_empty(&dmar_satc_units))
2624 		pr_info("No SATC found\n");
2625 
2626 	init_no_remapping_devices();
2627 
2628 	ret = init_dmars();
2629 	if (ret) {
2630 		if (force_on)
2631 			panic("tboot: Failed to initialize DMARs\n");
2632 		pr_err("Initialization failed\n");
2633 		goto out_free_dmar;
2634 	}
2635 	up_write(&dmar_global_lock);
2636 
2637 	init_iommu_pm_ops();
2638 
2639 	down_read(&dmar_global_lock);
2640 	for_each_active_iommu(iommu, drhd) {
2641 		/*
2642 		 * The flush queue implementation does not perform
2643 		 * page-selective invalidations that are required for efficient
2644 		 * TLB flushes in virtual environments.  The benefit of batching
2645 		 * is likely to be much lower than the overhead of synchronizing
2646 		 * the virtual and physical IOMMU page-tables.
2647 		 */
2648 		if (cap_caching_mode(iommu->cap) &&
2649 		    !first_level_by_default(iommu)) {
2650 			pr_info_once("IOMMU batching disallowed due to virtualization\n");
2651 			iommu_set_dma_strict();
2652 		}
2653 		iommu_device_sysfs_add(&iommu->iommu, NULL,
2654 				       intel_iommu_groups,
2655 				       "%s", iommu->name);
2656 		/*
2657 		 * The iommu device probe is protected by the iommu_probe_device_lock.
2658 		 * Release the dmar_global_lock before entering the device probe path
2659 		 * to avoid unnecessary lock order splat.
2660 		 */
2661 		up_read(&dmar_global_lock);
2662 		iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
2663 		down_read(&dmar_global_lock);
2664 
2665 		iommu_pmu_register(iommu);
2666 	}
2667 
2668 	if (probe_acpi_namespace_devices())
2669 		pr_warn("ACPI name space devices didn't probe correctly\n");
2670 
2671 	/* Finally, we enable the DMA remapping hardware. */
2672 	for_each_iommu(iommu, drhd) {
2673 		if (!drhd->ignored && !translation_pre_enabled(iommu))
2674 			iommu_enable_translation(iommu);
2675 
2676 		iommu_disable_protect_mem_regions(iommu);
2677 	}
2678 	up_read(&dmar_global_lock);
2679 
2680 	pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
2681 
2682 	intel_iommu_enabled = 1;
2683 
2684 	return 0;
2685 
2686 out_free_dmar:
2687 	intel_iommu_free_dmars();
2688 	up_write(&dmar_global_lock);
2689 	return ret;
2690 }
2691 
domain_context_clear_one_cb(struct pci_dev * pdev,u16 alias,void * opaque)2692 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
2693 {
2694 	struct device_domain_info *info = opaque;
2695 
2696 	domain_context_clear_one(info, PCI_BUS_NUM(alias), alias & 0xff);
2697 	return 0;
2698 }
2699 
2700 /*
2701  * NB - intel-iommu lacks any sort of reference counting for the users of
2702  * dependent devices.  If multiple endpoints have intersecting dependent
2703  * devices, unbinding the driver from any one of them will possibly leave
2704  * the others unable to operate.
2705  */
domain_context_clear(struct device_domain_info * info)2706 static void domain_context_clear(struct device_domain_info *info)
2707 {
2708 	if (!dev_is_pci(info->dev)) {
2709 		domain_context_clear_one(info, info->bus, info->devfn);
2710 		return;
2711 	}
2712 
2713 	pci_for_each_dma_alias(to_pci_dev(info->dev),
2714 			       &domain_context_clear_one_cb, info);
2715 	iommu_disable_pci_ats(info);
2716 }
2717 
2718 /*
2719  * Clear the page table pointer in context or pasid table entries so that
2720  * all DMA requests without PASID from the device are blocked. If the page
2721  * table has been set, clean up the data structures.
2722  */
device_block_translation(struct device * dev)2723 void device_block_translation(struct device *dev)
2724 {
2725 	struct device_domain_info *info = dev_iommu_priv_get(dev);
2726 	struct intel_iommu *iommu = info->iommu;
2727 	unsigned long flags;
2728 
2729 	/* Device in DMA blocking state. Noting to do. */
2730 	if (!info->domain_attached)
2731 		return;
2732 
2733 	if (info->domain)
2734 		cache_tag_unassign_domain(info->domain, dev, IOMMU_NO_PASID);
2735 
2736 	if (!dev_is_real_dma_subdevice(dev)) {
2737 		if (sm_supported(iommu))
2738 			intel_pasid_tear_down_entry(iommu, dev,
2739 						    IOMMU_NO_PASID, false);
2740 		else
2741 			domain_context_clear(info);
2742 	}
2743 
2744 	/* Device now in DMA blocking state. */
2745 	info->domain_attached = false;
2746 
2747 	if (!info->domain)
2748 		return;
2749 
2750 	spin_lock_irqsave(&info->domain->lock, flags);
2751 	list_del(&info->link);
2752 	spin_unlock_irqrestore(&info->domain->lock, flags);
2753 
2754 	domain_detach_iommu(info->domain, iommu);
2755 	info->domain = NULL;
2756 }
2757 
blocking_domain_attach_dev(struct iommu_domain * domain,struct device * dev,struct iommu_domain * old)2758 static int blocking_domain_attach_dev(struct iommu_domain *domain,
2759 				      struct device *dev,
2760 				      struct iommu_domain *old)
2761 {
2762 	struct device_domain_info *info = dev_iommu_priv_get(dev);
2763 
2764 	iopf_for_domain_remove(info->domain ? &info->domain->domain : NULL, dev);
2765 	device_block_translation(dev);
2766 	return 0;
2767 }
2768 
2769 static int blocking_domain_set_dev_pasid(struct iommu_domain *domain,
2770 					 struct device *dev, ioasid_t pasid,
2771 					 struct iommu_domain *old);
2772 
2773 static struct iommu_domain blocking_domain = {
2774 	.type = IOMMU_DOMAIN_BLOCKED,
2775 	.ops = &(const struct iommu_domain_ops) {
2776 		.attach_dev	= blocking_domain_attach_dev,
2777 		.set_dev_pasid	= blocking_domain_set_dev_pasid,
2778 	}
2779 };
2780 
paging_domain_alloc(void)2781 static struct dmar_domain *paging_domain_alloc(void)
2782 {
2783 	struct dmar_domain *domain;
2784 
2785 	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
2786 	if (!domain)
2787 		return ERR_PTR(-ENOMEM);
2788 
2789 	INIT_LIST_HEAD(&domain->devices);
2790 	INIT_LIST_HEAD(&domain->dev_pasids);
2791 	INIT_LIST_HEAD(&domain->cache_tags);
2792 	spin_lock_init(&domain->lock);
2793 	spin_lock_init(&domain->cache_lock);
2794 	xa_init(&domain->iommu_array);
2795 	INIT_LIST_HEAD(&domain->s1_domains);
2796 	spin_lock_init(&domain->s1_lock);
2797 
2798 	return domain;
2799 }
2800 
compute_vasz_lg2_fs(struct intel_iommu * iommu,unsigned int * top_level)2801 static unsigned int compute_vasz_lg2_fs(struct intel_iommu *iommu,
2802 					unsigned int *top_level)
2803 {
2804 	unsigned int mgaw = cap_mgaw(iommu->cap);
2805 
2806 	/*
2807 	 * Spec 3.6 First-Stage Translation:
2808 	 *
2809 	 * Software must limit addresses to less than the minimum of MGAW
2810 	 * and the lower canonical address width implied by FSPM (i.e.,
2811 	 * 47-bit when FSPM is 4-level and 56-bit when FSPM is 5-level).
2812 	 */
2813 	if (mgaw > 48 && cap_fl5lp_support(iommu->cap)) {
2814 		*top_level = 4;
2815 		return min(57, mgaw);
2816 	}
2817 
2818 	/* Four level is always supported */
2819 	*top_level = 3;
2820 	return min(48, mgaw);
2821 }
2822 
2823 static struct iommu_domain *
intel_iommu_domain_alloc_first_stage(struct device * dev,struct intel_iommu * iommu,u32 flags)2824 intel_iommu_domain_alloc_first_stage(struct device *dev,
2825 				     struct intel_iommu *iommu, u32 flags)
2826 {
2827 	struct pt_iommu_x86_64_cfg cfg = {};
2828 	struct dmar_domain *dmar_domain;
2829 	int ret;
2830 
2831 	if (flags & ~IOMMU_HWPT_ALLOC_PASID)
2832 		return ERR_PTR(-EOPNOTSUPP);
2833 
2834 	/* Only SL is available in legacy mode */
2835 	if (!sm_supported(iommu) || !ecap_flts(iommu->ecap))
2836 		return ERR_PTR(-EOPNOTSUPP);
2837 
2838 	dmar_domain = paging_domain_alloc();
2839 	if (IS_ERR(dmar_domain))
2840 		return ERR_CAST(dmar_domain);
2841 
2842 	cfg.common.hw_max_vasz_lg2 =
2843 		compute_vasz_lg2_fs(iommu, &cfg.top_level);
2844 	cfg.common.hw_max_oasz_lg2 = 52;
2845 	cfg.common.features = BIT(PT_FEAT_SIGN_EXTEND) |
2846 			      BIT(PT_FEAT_FLUSH_RANGE);
2847 	/* First stage always uses scalable mode */
2848 	if (!ecap_smpwc(iommu->ecap))
2849 		cfg.common.features |= BIT(PT_FEAT_DMA_INCOHERENT);
2850 	dmar_domain->iommu.iommu_device = dev;
2851 	dmar_domain->iommu.nid = dev_to_node(dev);
2852 	dmar_domain->domain.ops = &intel_fs_paging_domain_ops;
2853 	/*
2854 	 * iotlb sync for map is only needed for legacy implementations that
2855 	 * explicitly require flushing internal write buffers to ensure memory
2856 	 * coherence.
2857 	 */
2858 	if (rwbf_required(iommu))
2859 		dmar_domain->iotlb_sync_map = true;
2860 
2861 	ret = pt_iommu_x86_64_init(&dmar_domain->fspt, &cfg, GFP_KERNEL);
2862 	if (ret) {
2863 		kfree(dmar_domain);
2864 		return ERR_PTR(ret);
2865 	}
2866 
2867 	if (!cap_fl1gp_support(iommu->cap))
2868 		dmar_domain->domain.pgsize_bitmap &= ~(u64)SZ_1G;
2869 	if (!intel_iommu_superpage)
2870 		dmar_domain->domain.pgsize_bitmap = SZ_4K;
2871 
2872 	return &dmar_domain->domain;
2873 }
2874 
compute_vasz_lg2_ss(struct intel_iommu * iommu,unsigned int * top_level)2875 static unsigned int compute_vasz_lg2_ss(struct intel_iommu *iommu,
2876 					unsigned int *top_level)
2877 {
2878 	unsigned int sagaw = cap_sagaw(iommu->cap);
2879 	unsigned int mgaw = cap_mgaw(iommu->cap);
2880 
2881 	/*
2882 	 * Find the largest table size that both the mgaw and sagaw support.
2883 	 * This sets the valid range of IOVA and the top starting level.
2884 	 * Some HW may only support a 4 or 5 level walk but must limit IOVA to
2885 	 * 3 levels.
2886 	 */
2887 	if (mgaw > 48 && sagaw >= BIT(3)) {
2888 		*top_level = 4;
2889 		return min(57, mgaw);
2890 	} else if (mgaw > 39 && sagaw >= BIT(2)) {
2891 		*top_level = 3 + ffs(sagaw >> 3);
2892 		return min(48, mgaw);
2893 	} else if (mgaw > 30 && sagaw >= BIT(1)) {
2894 		*top_level = 2 + ffs(sagaw >> 2);
2895 		return min(39, mgaw);
2896 	}
2897 	return 0;
2898 }
2899 
2900 static const struct iommu_dirty_ops intel_second_stage_dirty_ops = {
2901 	IOMMU_PT_DIRTY_OPS(vtdss),
2902 	.set_dirty_tracking = intel_iommu_set_dirty_tracking,
2903 };
2904 
2905 static struct iommu_domain *
intel_iommu_domain_alloc_second_stage(struct device * dev,struct intel_iommu * iommu,u32 flags)2906 intel_iommu_domain_alloc_second_stage(struct device *dev,
2907 				      struct intel_iommu *iommu, u32 flags)
2908 {
2909 	struct pt_iommu_vtdss_cfg cfg = {};
2910 	struct dmar_domain *dmar_domain;
2911 	unsigned int sslps;
2912 	int ret;
2913 
2914 	if (flags &
2915 	    (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
2916 	       IOMMU_HWPT_ALLOC_PASID)))
2917 		return ERR_PTR(-EOPNOTSUPP);
2918 
2919 	if (((flags & IOMMU_HWPT_ALLOC_NEST_PARENT) &&
2920 	     !nested_supported(iommu)) ||
2921 	    ((flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) &&
2922 	     !ssads_supported(iommu)))
2923 		return ERR_PTR(-EOPNOTSUPP);
2924 
2925 	/* Legacy mode always supports second stage */
2926 	if (sm_supported(iommu) && !ecap_slts(iommu->ecap))
2927 		return ERR_PTR(-EOPNOTSUPP);
2928 
2929 	dmar_domain = paging_domain_alloc();
2930 	if (IS_ERR(dmar_domain))
2931 		return ERR_CAST(dmar_domain);
2932 
2933 	cfg.common.hw_max_vasz_lg2 = compute_vasz_lg2_ss(iommu, &cfg.top_level);
2934 	cfg.common.hw_max_oasz_lg2 = 52;
2935 	cfg.common.features = BIT(PT_FEAT_FLUSH_RANGE);
2936 
2937 	/*
2938 	 * Read-only mapping is disallowed on the domain which serves as the
2939 	 * parent in a nested configuration, due to HW errata
2940 	 * (ERRATA_772415_SPR17)
2941 	 */
2942 	if (flags & IOMMU_HWPT_ALLOC_NEST_PARENT)
2943 		cfg.common.features |= BIT(PT_FEAT_VTDSS_FORCE_WRITEABLE);
2944 
2945 	if (!iommu_paging_structure_coherency(iommu))
2946 		cfg.common.features |= BIT(PT_FEAT_DMA_INCOHERENT);
2947 	dmar_domain->iommu.iommu_device = dev;
2948 	dmar_domain->iommu.nid = dev_to_node(dev);
2949 	dmar_domain->domain.ops = &intel_ss_paging_domain_ops;
2950 	dmar_domain->nested_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT;
2951 
2952 	if (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING)
2953 		dmar_domain->domain.dirty_ops = &intel_second_stage_dirty_ops;
2954 
2955 	ret = pt_iommu_vtdss_init(&dmar_domain->sspt, &cfg, GFP_KERNEL);
2956 	if (ret) {
2957 		kfree(dmar_domain);
2958 		return ERR_PTR(ret);
2959 	}
2960 
2961 	/* Adjust the supported page sizes to HW capability */
2962 	sslps = cap_super_page_val(iommu->cap);
2963 	if (!(sslps & BIT(0)))
2964 		dmar_domain->domain.pgsize_bitmap &= ~(u64)SZ_2M;
2965 	if (!(sslps & BIT(1)))
2966 		dmar_domain->domain.pgsize_bitmap &= ~(u64)SZ_1G;
2967 	if (!intel_iommu_superpage)
2968 		dmar_domain->domain.pgsize_bitmap = SZ_4K;
2969 
2970 	/*
2971 	 * Besides the internal write buffer flush, the caching mode used for
2972 	 * legacy nested translation (which utilizes shadowing page tables)
2973 	 * also requires iotlb sync on map.
2974 	 */
2975 	if (rwbf_required(iommu) || cap_caching_mode(iommu->cap))
2976 		dmar_domain->iotlb_sync_map = true;
2977 
2978 	return &dmar_domain->domain;
2979 }
2980 
2981 static struct iommu_domain *
intel_iommu_domain_alloc_paging_flags(struct device * dev,u32 flags,const struct iommu_user_data * user_data)2982 intel_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags,
2983 				      const struct iommu_user_data *user_data)
2984 {
2985 	struct device_domain_info *info = dev_iommu_priv_get(dev);
2986 	struct intel_iommu *iommu = info->iommu;
2987 	struct iommu_domain *domain;
2988 
2989 	if (user_data)
2990 		return ERR_PTR(-EOPNOTSUPP);
2991 
2992 	/* Prefer first stage if possible by default. */
2993 	domain = intel_iommu_domain_alloc_first_stage(dev, iommu, flags);
2994 	if (domain != ERR_PTR(-EOPNOTSUPP))
2995 		return domain;
2996 	return intel_iommu_domain_alloc_second_stage(dev, iommu, flags);
2997 }
2998 
intel_iommu_domain_free(struct iommu_domain * domain)2999 static void intel_iommu_domain_free(struct iommu_domain *domain)
3000 {
3001 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3002 
3003 	if (WARN_ON(dmar_domain->nested_parent &&
3004 		    !list_empty(&dmar_domain->s1_domains)))
3005 		return;
3006 
3007 	if (WARN_ON(!list_empty(&dmar_domain->devices)))
3008 		return;
3009 
3010 	pt_iommu_deinit(&dmar_domain->iommu);
3011 
3012 	kfree(dmar_domain->qi_batch);
3013 	kfree(dmar_domain);
3014 }
3015 
paging_domain_compatible_first_stage(struct dmar_domain * dmar_domain,struct intel_iommu * iommu)3016 static int paging_domain_compatible_first_stage(struct dmar_domain *dmar_domain,
3017 						struct intel_iommu *iommu)
3018 {
3019 	if (WARN_ON(dmar_domain->domain.dirty_ops ||
3020 		    dmar_domain->nested_parent))
3021 		return -EINVAL;
3022 
3023 	/* Only SL is available in legacy mode */
3024 	if (!sm_supported(iommu) || !ecap_flts(iommu->ecap))
3025 		return -EINVAL;
3026 
3027 	if (!ecap_smpwc(iommu->ecap) &&
3028 	    !(dmar_domain->fspt.x86_64_pt.common.features &
3029 	      BIT(PT_FEAT_DMA_INCOHERENT)))
3030 		return -EINVAL;
3031 
3032 	/* Supports the number of table levels */
3033 	if (!cap_fl5lp_support(iommu->cap) &&
3034 	    dmar_domain->fspt.x86_64_pt.common.max_vasz_lg2 > 48)
3035 		return -EINVAL;
3036 
3037 	/* Same page size support */
3038 	if (!cap_fl1gp_support(iommu->cap) &&
3039 	    (dmar_domain->domain.pgsize_bitmap & SZ_1G))
3040 		return -EINVAL;
3041 
3042 	/* iotlb sync on map requirement */
3043 	if ((rwbf_required(iommu)) && !dmar_domain->iotlb_sync_map)
3044 		return -EINVAL;
3045 
3046 	return 0;
3047 }
3048 
3049 static int
paging_domain_compatible_second_stage(struct dmar_domain * dmar_domain,struct intel_iommu * iommu)3050 paging_domain_compatible_second_stage(struct dmar_domain *dmar_domain,
3051 				      struct intel_iommu *iommu)
3052 {
3053 	unsigned int vasz_lg2 = dmar_domain->sspt.vtdss_pt.common.max_vasz_lg2;
3054 	unsigned int sslps = cap_super_page_val(iommu->cap);
3055 	struct pt_iommu_vtdss_hw_info pt_info;
3056 
3057 	pt_iommu_vtdss_hw_info(&dmar_domain->sspt, &pt_info);
3058 
3059 	if (dmar_domain->domain.dirty_ops && !ssads_supported(iommu))
3060 		return -EINVAL;
3061 	if (dmar_domain->nested_parent && !nested_supported(iommu))
3062 		return -EINVAL;
3063 
3064 	/* Legacy mode always supports second stage */
3065 	if (sm_supported(iommu) && !ecap_slts(iommu->ecap))
3066 		return -EINVAL;
3067 
3068 	if (!iommu_paging_structure_coherency(iommu) &&
3069 	    !(dmar_domain->sspt.vtdss_pt.common.features &
3070 	      BIT(PT_FEAT_DMA_INCOHERENT)))
3071 		return -EINVAL;
3072 
3073 	/* Address width falls within the capability */
3074 	if (cap_mgaw(iommu->cap) < vasz_lg2)
3075 		return -EINVAL;
3076 
3077 	/* Page table level is supported. */
3078 	if (!(cap_sagaw(iommu->cap) & BIT(pt_info.aw)))
3079 		return -EINVAL;
3080 
3081 	/* Same page size support */
3082 	if (!(sslps & BIT(0)) && (dmar_domain->domain.pgsize_bitmap & SZ_2M))
3083 		return -EINVAL;
3084 	if (!(sslps & BIT(1)) && (dmar_domain->domain.pgsize_bitmap & SZ_1G))
3085 		return -EINVAL;
3086 
3087 	/* iotlb sync on map requirement */
3088 	if ((rwbf_required(iommu) || cap_caching_mode(iommu->cap)) &&
3089 	    !dmar_domain->iotlb_sync_map)
3090 		return -EINVAL;
3091 
3092 	/*
3093 	 * FIXME this is locked wrong, it needs to be under the
3094 	 * dmar_domain->lock
3095 	 */
3096 	if ((dmar_domain->sspt.vtdss_pt.common.features &
3097 	     BIT(PT_FEAT_VTDSS_FORCE_COHERENCE)) &&
3098 	    !ecap_sc_support(iommu->ecap))
3099 		return -EINVAL;
3100 	return 0;
3101 }
3102 
paging_domain_compatible(struct iommu_domain * domain,struct device * dev)3103 int paging_domain_compatible(struct iommu_domain *domain, struct device *dev)
3104 {
3105 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3106 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3107 	struct intel_iommu *iommu = info->iommu;
3108 	int ret = -EINVAL;
3109 
3110 	if (intel_domain_is_fs_paging(dmar_domain))
3111 		ret = paging_domain_compatible_first_stage(dmar_domain, iommu);
3112 	else if (intel_domain_is_ss_paging(dmar_domain))
3113 		ret = paging_domain_compatible_second_stage(dmar_domain, iommu);
3114 	else if (WARN_ON(true))
3115 		ret = -EINVAL;
3116 	if (ret)
3117 		return ret;
3118 
3119 	if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) &&
3120 	    context_copied(iommu, info->bus, info->devfn))
3121 		return intel_pasid_setup_sm_context(dev);
3122 
3123 	return 0;
3124 }
3125 
intel_iommu_attach_device(struct iommu_domain * domain,struct device * dev,struct iommu_domain * old)3126 static int intel_iommu_attach_device(struct iommu_domain *domain,
3127 				     struct device *dev,
3128 				     struct iommu_domain *old)
3129 {
3130 	int ret;
3131 
3132 	device_block_translation(dev);
3133 
3134 	ret = paging_domain_compatible(domain, dev);
3135 	if (ret)
3136 		return ret;
3137 
3138 	ret = iopf_for_domain_set(domain, dev);
3139 	if (ret)
3140 		return ret;
3141 
3142 	ret = dmar_domain_attach_device(to_dmar_domain(domain), dev);
3143 	if (ret)
3144 		iopf_for_domain_remove(domain, dev);
3145 
3146 	return ret;
3147 }
3148 
intel_iommu_tlb_sync(struct iommu_domain * domain,struct iommu_iotlb_gather * gather)3149 static void intel_iommu_tlb_sync(struct iommu_domain *domain,
3150 				 struct iommu_iotlb_gather *gather)
3151 {
3152 	cache_tag_flush_range(to_dmar_domain(domain), gather->start,
3153 			      gather->end,
3154 			      iommu_pages_list_empty(&gather->freelist));
3155 	iommu_put_pages_list(&gather->freelist);
3156 }
3157 
domain_support_force_snooping(struct dmar_domain * domain)3158 static bool domain_support_force_snooping(struct dmar_domain *domain)
3159 {
3160 	struct device_domain_info *info;
3161 	bool support = true;
3162 
3163 	assert_spin_locked(&domain->lock);
3164 	list_for_each_entry(info, &domain->devices, link) {
3165 		if (!ecap_sc_support(info->iommu->ecap)) {
3166 			support = false;
3167 			break;
3168 		}
3169 	}
3170 
3171 	return support;
3172 }
3173 
intel_iommu_enforce_cache_coherency_fs(struct iommu_domain * domain)3174 static bool intel_iommu_enforce_cache_coherency_fs(struct iommu_domain *domain)
3175 {
3176 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3177 	struct device_domain_info *info;
3178 
3179 	guard(spinlock_irqsave)(&dmar_domain->lock);
3180 
3181 	if (dmar_domain->force_snooping)
3182 		return true;
3183 
3184 	if (!domain_support_force_snooping(dmar_domain))
3185 		return false;
3186 
3187 	dmar_domain->force_snooping = true;
3188 	list_for_each_entry(info, &dmar_domain->devices, link)
3189 		intel_pasid_setup_page_snoop_control(info->iommu, info->dev,
3190 						     IOMMU_NO_PASID);
3191 	return true;
3192 }
3193 
intel_iommu_enforce_cache_coherency_ss(struct iommu_domain * domain)3194 static bool intel_iommu_enforce_cache_coherency_ss(struct iommu_domain *domain)
3195 {
3196 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3197 
3198 	guard(spinlock_irqsave)(&dmar_domain->lock);
3199 	if (!domain_support_force_snooping(dmar_domain))
3200 		return false;
3201 
3202 	/*
3203 	 * Second level page table supports per-PTE snoop control. The
3204 	 * iommu_map() interface will handle this by setting SNP bit.
3205 	 */
3206 	dmar_domain->sspt.vtdss_pt.common.features |=
3207 		BIT(PT_FEAT_VTDSS_FORCE_COHERENCE);
3208 	dmar_domain->force_snooping = true;
3209 	return true;
3210 }
3211 
intel_iommu_capable(struct device * dev,enum iommu_cap cap)3212 static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap)
3213 {
3214 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3215 
3216 	switch (cap) {
3217 	case IOMMU_CAP_CACHE_COHERENCY:
3218 	case IOMMU_CAP_DEFERRED_FLUSH:
3219 		return true;
3220 	case IOMMU_CAP_PRE_BOOT_PROTECTION:
3221 		return dmar_platform_optin();
3222 	case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
3223 		return ecap_sc_support(info->iommu->ecap);
3224 	case IOMMU_CAP_DIRTY_TRACKING:
3225 		return ssads_supported(info->iommu);
3226 	default:
3227 		return false;
3228 	}
3229 }
3230 
intel_iommu_probe_device(struct device * dev)3231 static struct iommu_device *intel_iommu_probe_device(struct device *dev)
3232 {
3233 	struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL;
3234 	struct device_domain_info *info;
3235 	struct intel_iommu *iommu;
3236 	u8 bus, devfn;
3237 	int ret;
3238 
3239 	iommu = device_lookup_iommu(dev, &bus, &devfn);
3240 	if (!iommu || !iommu->iommu.ops)
3241 		return ERR_PTR(-ENODEV);
3242 
3243 	info = kzalloc(sizeof(*info), GFP_KERNEL);
3244 	if (!info)
3245 		return ERR_PTR(-ENOMEM);
3246 
3247 	if (dev_is_real_dma_subdevice(dev)) {
3248 		info->bus = pdev->bus->number;
3249 		info->devfn = pdev->devfn;
3250 		info->segment = pci_domain_nr(pdev->bus);
3251 	} else {
3252 		info->bus = bus;
3253 		info->devfn = devfn;
3254 		info->segment = iommu->segment;
3255 	}
3256 
3257 	info->dev = dev;
3258 	info->iommu = iommu;
3259 	if (dev_is_pci(dev)) {
3260 		if (ecap_dev_iotlb_support(iommu->ecap) &&
3261 		    pci_ats_supported(pdev) &&
3262 		    dmar_ats_supported(pdev, iommu)) {
3263 			info->ats_supported = 1;
3264 			info->dtlb_extra_inval = dev_needs_extra_dtlb_flush(pdev);
3265 
3266 			/*
3267 			 * For IOMMU that supports device IOTLB throttling
3268 			 * (DIT), we assign PFSID to the invalidation desc
3269 			 * of a VF such that IOMMU HW can gauge queue depth
3270 			 * at PF level. If DIT is not set, PFSID will be
3271 			 * treated as reserved, which should be set to 0.
3272 			 */
3273 			if (ecap_dit(iommu->ecap))
3274 				info->pfsid = pci_dev_id(pci_physfn(pdev));
3275 			info->ats_qdep = pci_ats_queue_depth(pdev);
3276 		}
3277 		if (sm_supported(iommu)) {
3278 			if (pasid_supported(iommu)) {
3279 				int features = pci_pasid_features(pdev);
3280 
3281 				if (features >= 0)
3282 					info->pasid_supported = features | 1;
3283 			}
3284 
3285 			if (info->ats_supported && ecap_prs(iommu->ecap) &&
3286 			    ecap_pds(iommu->ecap) && pci_pri_supported(pdev))
3287 				info->pri_supported = 1;
3288 		}
3289 	}
3290 
3291 	dev_iommu_priv_set(dev, info);
3292 	if (pdev && pci_ats_supported(pdev)) {
3293 		pci_prepare_ats(pdev, VTD_PAGE_SHIFT);
3294 		ret = device_rbtree_insert(iommu, info);
3295 		if (ret)
3296 			goto free;
3297 	}
3298 
3299 	if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
3300 		ret = intel_pasid_alloc_table(dev);
3301 		if (ret) {
3302 			dev_err(dev, "PASID table allocation failed\n");
3303 			goto clear_rbtree;
3304 		}
3305 
3306 		if (!context_copied(iommu, info->bus, info->devfn)) {
3307 			ret = intel_pasid_setup_sm_context(dev);
3308 			if (ret)
3309 				goto free_table;
3310 		}
3311 	}
3312 
3313 	intel_iommu_debugfs_create_dev(info);
3314 
3315 	return &iommu->iommu;
3316 free_table:
3317 	intel_pasid_free_table(dev);
3318 clear_rbtree:
3319 	device_rbtree_remove(info);
3320 free:
3321 	kfree(info);
3322 
3323 	return ERR_PTR(ret);
3324 }
3325 
intel_iommu_probe_finalize(struct device * dev)3326 static void intel_iommu_probe_finalize(struct device *dev)
3327 {
3328 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3329 	struct intel_iommu *iommu = info->iommu;
3330 
3331 	/*
3332 	 * The PCIe spec, in its wisdom, declares that the behaviour of the
3333 	 * device is undefined if you enable PASID support after ATS support.
3334 	 * So always enable PASID support on devices which have it, even if
3335 	 * we can't yet know if we're ever going to use it.
3336 	 */
3337 	if (info->pasid_supported &&
3338 	    !pci_enable_pasid(to_pci_dev(dev), info->pasid_supported & ~1))
3339 		info->pasid_enabled = 1;
3340 
3341 	if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
3342 		iommu_enable_pci_ats(info);
3343 		/* Assign a DEVTLB cache tag to the default domain. */
3344 		if (info->ats_enabled && info->domain) {
3345 			u16 did = domain_id_iommu(info->domain, iommu);
3346 
3347 			if (cache_tag_assign(info->domain, did, dev,
3348 					     IOMMU_NO_PASID, CACHE_TAG_DEVTLB))
3349 				iommu_disable_pci_ats(info);
3350 		}
3351 	}
3352 	iommu_enable_pci_pri(info);
3353 }
3354 
intel_iommu_release_device(struct device * dev)3355 static void intel_iommu_release_device(struct device *dev)
3356 {
3357 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3358 	struct intel_iommu *iommu = info->iommu;
3359 
3360 	iommu_disable_pci_pri(info);
3361 	iommu_disable_pci_ats(info);
3362 
3363 	if (info->pasid_enabled) {
3364 		pci_disable_pasid(to_pci_dev(dev));
3365 		info->pasid_enabled = 0;
3366 	}
3367 
3368 	mutex_lock(&iommu->iopf_lock);
3369 	if (dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev)))
3370 		device_rbtree_remove(info);
3371 	mutex_unlock(&iommu->iopf_lock);
3372 
3373 	if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) &&
3374 	    !context_copied(iommu, info->bus, info->devfn))
3375 		intel_pasid_teardown_sm_context(dev);
3376 
3377 	intel_pasid_free_table(dev);
3378 	intel_iommu_debugfs_remove_dev(info);
3379 	kfree(info);
3380 }
3381 
intel_iommu_get_resv_regions(struct device * device,struct list_head * head)3382 static void intel_iommu_get_resv_regions(struct device *device,
3383 					 struct list_head *head)
3384 {
3385 	int prot = DMA_PTE_READ | DMA_PTE_WRITE;
3386 	struct iommu_resv_region *reg;
3387 	struct dmar_rmrr_unit *rmrr;
3388 	struct device *i_dev;
3389 	int i;
3390 
3391 	rcu_read_lock();
3392 	for_each_rmrr_units(rmrr) {
3393 		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3394 					  i, i_dev) {
3395 			struct iommu_resv_region *resv;
3396 			enum iommu_resv_type type;
3397 			size_t length;
3398 
3399 			if (i_dev != device &&
3400 			    !is_downstream_to_pci_bridge(device, i_dev))
3401 				continue;
3402 
3403 			length = rmrr->end_address - rmrr->base_address + 1;
3404 
3405 			type = device_rmrr_is_relaxable(device) ?
3406 				IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT;
3407 
3408 			resv = iommu_alloc_resv_region(rmrr->base_address,
3409 						       length, prot, type,
3410 						       GFP_ATOMIC);
3411 			if (!resv)
3412 				break;
3413 
3414 			list_add_tail(&resv->list, head);
3415 		}
3416 	}
3417 	rcu_read_unlock();
3418 
3419 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
3420 	if (dev_is_pci(device)) {
3421 		struct pci_dev *pdev = to_pci_dev(device);
3422 
3423 		if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
3424 			reg = iommu_alloc_resv_region(0, 1UL << 24, prot,
3425 					IOMMU_RESV_DIRECT_RELAXABLE,
3426 					GFP_KERNEL);
3427 			if (reg)
3428 				list_add_tail(&reg->list, head);
3429 		}
3430 	}
3431 #endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
3432 
3433 	reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
3434 				      IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
3435 				      0, IOMMU_RESV_MSI, GFP_KERNEL);
3436 	if (!reg)
3437 		return;
3438 	list_add_tail(&reg->list, head);
3439 }
3440 
intel_iommu_device_group(struct device * dev)3441 static struct iommu_group *intel_iommu_device_group(struct device *dev)
3442 {
3443 	if (dev_is_pci(dev))
3444 		return pci_device_group(dev);
3445 	return generic_device_group(dev);
3446 }
3447 
intel_iommu_enable_iopf(struct device * dev)3448 int intel_iommu_enable_iopf(struct device *dev)
3449 {
3450 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3451 	struct intel_iommu *iommu = info->iommu;
3452 	int ret;
3453 
3454 	if (!info->pri_enabled)
3455 		return -ENODEV;
3456 
3457 	/* pri_enabled is protected by the group mutex. */
3458 	iommu_group_mutex_assert(dev);
3459 	if (info->iopf_refcount) {
3460 		info->iopf_refcount++;
3461 		return 0;
3462 	}
3463 
3464 	ret = iopf_queue_add_device(iommu->iopf_queue, dev);
3465 	if (ret)
3466 		return ret;
3467 
3468 	info->iopf_refcount = 1;
3469 
3470 	return 0;
3471 }
3472 
intel_iommu_disable_iopf(struct device * dev)3473 void intel_iommu_disable_iopf(struct device *dev)
3474 {
3475 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3476 	struct intel_iommu *iommu = info->iommu;
3477 
3478 	if (WARN_ON(!info->pri_enabled || !info->iopf_refcount))
3479 		return;
3480 
3481 	iommu_group_mutex_assert(dev);
3482 	if (--info->iopf_refcount)
3483 		return;
3484 
3485 	iopf_queue_remove_device(iommu->iopf_queue, dev);
3486 }
3487 
intel_iommu_is_attach_deferred(struct device * dev)3488 static bool intel_iommu_is_attach_deferred(struct device *dev)
3489 {
3490 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3491 
3492 	return translation_pre_enabled(info->iommu) && !info->domain;
3493 }
3494 
3495 /*
3496  * Check that the device does not live on an external facing PCI port that is
3497  * marked as untrusted. Such devices should not be able to apply quirks and
3498  * thus not be able to bypass the IOMMU restrictions.
3499  */
risky_device(struct pci_dev * pdev)3500 static bool risky_device(struct pci_dev *pdev)
3501 {
3502 	if (pdev->untrusted) {
3503 		pci_info(pdev,
3504 			 "Skipping IOMMU quirk for dev [%04X:%04X] on untrusted PCI link\n",
3505 			 pdev->vendor, pdev->device);
3506 		pci_info(pdev, "Please check with your BIOS/Platform vendor about this\n");
3507 		return true;
3508 	}
3509 	return false;
3510 }
3511 
intel_iommu_iotlb_sync_map(struct iommu_domain * domain,unsigned long iova,size_t size)3512 static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
3513 				      unsigned long iova, size_t size)
3514 {
3515 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3516 
3517 	if (dmar_domain->iotlb_sync_map)
3518 		cache_tag_flush_range_np(dmar_domain, iova, iova + size - 1);
3519 
3520 	return 0;
3521 }
3522 
domain_remove_dev_pasid(struct iommu_domain * domain,struct device * dev,ioasid_t pasid)3523 void domain_remove_dev_pasid(struct iommu_domain *domain,
3524 			     struct device *dev, ioasid_t pasid)
3525 {
3526 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3527 	struct dev_pasid_info *curr, *dev_pasid = NULL;
3528 	struct intel_iommu *iommu = info->iommu;
3529 	struct dmar_domain *dmar_domain;
3530 	unsigned long flags;
3531 
3532 	if (!domain)
3533 		return;
3534 
3535 	/* Identity domain has no meta data for pasid. */
3536 	if (domain->type == IOMMU_DOMAIN_IDENTITY)
3537 		return;
3538 
3539 	dmar_domain = to_dmar_domain(domain);
3540 	spin_lock_irqsave(&dmar_domain->lock, flags);
3541 	list_for_each_entry(curr, &dmar_domain->dev_pasids, link_domain) {
3542 		if (curr->dev == dev && curr->pasid == pasid) {
3543 			list_del(&curr->link_domain);
3544 			dev_pasid = curr;
3545 			break;
3546 		}
3547 	}
3548 	spin_unlock_irqrestore(&dmar_domain->lock, flags);
3549 
3550 	cache_tag_unassign_domain(dmar_domain, dev, pasid);
3551 	domain_detach_iommu(dmar_domain, iommu);
3552 	if (!WARN_ON_ONCE(!dev_pasid)) {
3553 		intel_iommu_debugfs_remove_dev_pasid(dev_pasid);
3554 		kfree(dev_pasid);
3555 	}
3556 }
3557 
blocking_domain_set_dev_pasid(struct iommu_domain * domain,struct device * dev,ioasid_t pasid,struct iommu_domain * old)3558 static int blocking_domain_set_dev_pasid(struct iommu_domain *domain,
3559 					 struct device *dev, ioasid_t pasid,
3560 					 struct iommu_domain *old)
3561 {
3562 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3563 
3564 	intel_pasid_tear_down_entry(info->iommu, dev, pasid, false);
3565 	iopf_for_domain_remove(old, dev);
3566 	domain_remove_dev_pasid(old, dev, pasid);
3567 
3568 	return 0;
3569 }
3570 
3571 struct dev_pasid_info *
domain_add_dev_pasid(struct iommu_domain * domain,struct device * dev,ioasid_t pasid)3572 domain_add_dev_pasid(struct iommu_domain *domain,
3573 		     struct device *dev, ioasid_t pasid)
3574 {
3575 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3576 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3577 	struct intel_iommu *iommu = info->iommu;
3578 	struct dev_pasid_info *dev_pasid;
3579 	unsigned long flags;
3580 	int ret;
3581 
3582 	dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL);
3583 	if (!dev_pasid)
3584 		return ERR_PTR(-ENOMEM);
3585 
3586 	ret = domain_attach_iommu(dmar_domain, iommu);
3587 	if (ret)
3588 		goto out_free;
3589 
3590 	ret = cache_tag_assign_domain(dmar_domain, dev, pasid);
3591 	if (ret)
3592 		goto out_detach_iommu;
3593 
3594 	dev_pasid->dev = dev;
3595 	dev_pasid->pasid = pasid;
3596 	spin_lock_irqsave(&dmar_domain->lock, flags);
3597 	list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids);
3598 	spin_unlock_irqrestore(&dmar_domain->lock, flags);
3599 
3600 	return dev_pasid;
3601 out_detach_iommu:
3602 	domain_detach_iommu(dmar_domain, iommu);
3603 out_free:
3604 	kfree(dev_pasid);
3605 	return ERR_PTR(ret);
3606 }
3607 
intel_iommu_set_dev_pasid(struct iommu_domain * domain,struct device * dev,ioasid_t pasid,struct iommu_domain * old)3608 static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
3609 				     struct device *dev, ioasid_t pasid,
3610 				     struct iommu_domain *old)
3611 {
3612 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3613 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3614 	struct intel_iommu *iommu = info->iommu;
3615 	struct dev_pasid_info *dev_pasid;
3616 	int ret;
3617 
3618 	if (WARN_ON_ONCE(!(domain->type & __IOMMU_DOMAIN_PAGING)))
3619 		return -EINVAL;
3620 
3621 	if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
3622 		return -EOPNOTSUPP;
3623 
3624 	if (domain->dirty_ops)
3625 		return -EINVAL;
3626 
3627 	if (context_copied(iommu, info->bus, info->devfn))
3628 		return -EBUSY;
3629 
3630 	ret = paging_domain_compatible(domain, dev);
3631 	if (ret)
3632 		return ret;
3633 
3634 	dev_pasid = domain_add_dev_pasid(domain, dev, pasid);
3635 	if (IS_ERR(dev_pasid))
3636 		return PTR_ERR(dev_pasid);
3637 
3638 	ret = iopf_for_domain_replace(domain, old, dev);
3639 	if (ret)
3640 		goto out_remove_dev_pasid;
3641 
3642 	if (intel_domain_is_fs_paging(dmar_domain))
3643 		ret = domain_setup_first_level(iommu, dmar_domain,
3644 					       dev, pasid, old);
3645 	else if (intel_domain_is_ss_paging(dmar_domain))
3646 		ret = domain_setup_second_level(iommu, dmar_domain,
3647 						dev, pasid, old);
3648 	else if (WARN_ON(true))
3649 		ret = -EINVAL;
3650 
3651 	if (ret)
3652 		goto out_unwind_iopf;
3653 
3654 	domain_remove_dev_pasid(old, dev, pasid);
3655 
3656 	intel_iommu_debugfs_create_dev_pasid(dev_pasid);
3657 
3658 	return 0;
3659 
3660 out_unwind_iopf:
3661 	iopf_for_domain_replace(old, domain, dev);
3662 out_remove_dev_pasid:
3663 	domain_remove_dev_pasid(domain, dev, pasid);
3664 	return ret;
3665 }
3666 
intel_iommu_hw_info(struct device * dev,u32 * length,enum iommu_hw_info_type * type)3667 static void *intel_iommu_hw_info(struct device *dev, u32 *length,
3668 				 enum iommu_hw_info_type *type)
3669 {
3670 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3671 	struct intel_iommu *iommu = info->iommu;
3672 	struct iommu_hw_info_vtd *vtd;
3673 
3674 	if (*type != IOMMU_HW_INFO_TYPE_DEFAULT &&
3675 	    *type != IOMMU_HW_INFO_TYPE_INTEL_VTD)
3676 		return ERR_PTR(-EOPNOTSUPP);
3677 
3678 	vtd = kzalloc(sizeof(*vtd), GFP_KERNEL);
3679 	if (!vtd)
3680 		return ERR_PTR(-ENOMEM);
3681 
3682 	vtd->flags = IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17;
3683 	vtd->cap_reg = iommu->cap;
3684 	vtd->ecap_reg = iommu->ecap;
3685 	*length = sizeof(*vtd);
3686 	*type = IOMMU_HW_INFO_TYPE_INTEL_VTD;
3687 	return vtd;
3688 }
3689 
3690 /*
3691  * Set dirty tracking for the device list of a domain. The caller must
3692  * hold the domain->lock when calling it.
3693  */
device_set_dirty_tracking(struct list_head * devices,bool enable)3694 static int device_set_dirty_tracking(struct list_head *devices, bool enable)
3695 {
3696 	struct device_domain_info *info;
3697 	int ret = 0;
3698 
3699 	list_for_each_entry(info, devices, link) {
3700 		ret = intel_pasid_setup_dirty_tracking(info->iommu, info->dev,
3701 						       IOMMU_NO_PASID, enable);
3702 		if (ret)
3703 			break;
3704 	}
3705 
3706 	return ret;
3707 }
3708 
parent_domain_set_dirty_tracking(struct dmar_domain * domain,bool enable)3709 static int parent_domain_set_dirty_tracking(struct dmar_domain *domain,
3710 					    bool enable)
3711 {
3712 	struct dmar_domain *s1_domain;
3713 	unsigned long flags;
3714 	int ret;
3715 
3716 	spin_lock(&domain->s1_lock);
3717 	list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) {
3718 		spin_lock_irqsave(&s1_domain->lock, flags);
3719 		ret = device_set_dirty_tracking(&s1_domain->devices, enable);
3720 		spin_unlock_irqrestore(&s1_domain->lock, flags);
3721 		if (ret)
3722 			goto err_unwind;
3723 	}
3724 	spin_unlock(&domain->s1_lock);
3725 	return 0;
3726 
3727 err_unwind:
3728 	list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) {
3729 		spin_lock_irqsave(&s1_domain->lock, flags);
3730 		device_set_dirty_tracking(&s1_domain->devices,
3731 					  domain->dirty_tracking);
3732 		spin_unlock_irqrestore(&s1_domain->lock, flags);
3733 	}
3734 	spin_unlock(&domain->s1_lock);
3735 	return ret;
3736 }
3737 
intel_iommu_set_dirty_tracking(struct iommu_domain * domain,bool enable)3738 static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain,
3739 					  bool enable)
3740 {
3741 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
3742 	int ret;
3743 
3744 	spin_lock(&dmar_domain->lock);
3745 	if (dmar_domain->dirty_tracking == enable)
3746 		goto out_unlock;
3747 
3748 	ret = device_set_dirty_tracking(&dmar_domain->devices, enable);
3749 	if (ret)
3750 		goto err_unwind;
3751 
3752 	if (dmar_domain->nested_parent) {
3753 		ret = parent_domain_set_dirty_tracking(dmar_domain, enable);
3754 		if (ret)
3755 			goto err_unwind;
3756 	}
3757 
3758 	dmar_domain->dirty_tracking = enable;
3759 out_unlock:
3760 	spin_unlock(&dmar_domain->lock);
3761 
3762 	return 0;
3763 
3764 err_unwind:
3765 	device_set_dirty_tracking(&dmar_domain->devices,
3766 				  dmar_domain->dirty_tracking);
3767 	spin_unlock(&dmar_domain->lock);
3768 	return ret;
3769 }
3770 
context_setup_pass_through(struct device * dev,u8 bus,u8 devfn)3771 static int context_setup_pass_through(struct device *dev, u8 bus, u8 devfn)
3772 {
3773 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3774 	struct intel_iommu *iommu = info->iommu;
3775 	struct context_entry *context;
3776 
3777 	spin_lock(&iommu->lock);
3778 	context = iommu_context_addr(iommu, bus, devfn, 1);
3779 	if (!context) {
3780 		spin_unlock(&iommu->lock);
3781 		return -ENOMEM;
3782 	}
3783 
3784 	if (context_present(context) && !context_copied(iommu, bus, devfn)) {
3785 		spin_unlock(&iommu->lock);
3786 		return 0;
3787 	}
3788 
3789 	copied_context_tear_down(iommu, context, bus, devfn);
3790 	context_clear_entry(context);
3791 	context_set_domain_id(context, FLPT_DEFAULT_DID);
3792 
3793 	/*
3794 	 * In pass through mode, AW must be programmed to indicate the largest
3795 	 * AGAW value supported by hardware. And ASR is ignored by hardware.
3796 	 */
3797 	context_set_address_width(context, iommu->msagaw);
3798 	context_set_translation_type(context, CONTEXT_TT_PASS_THROUGH);
3799 	context_set_fault_enable(context);
3800 	context_set_present(context);
3801 	if (!ecap_coherent(iommu->ecap))
3802 		clflush_cache_range(context, sizeof(*context));
3803 	context_present_cache_flush(iommu, FLPT_DEFAULT_DID, bus, devfn);
3804 	spin_unlock(&iommu->lock);
3805 
3806 	return 0;
3807 }
3808 
context_setup_pass_through_cb(struct pci_dev * pdev,u16 alias,void * data)3809 static int context_setup_pass_through_cb(struct pci_dev *pdev, u16 alias, void *data)
3810 {
3811 	struct device *dev = data;
3812 
3813 	return context_setup_pass_through(dev, PCI_BUS_NUM(alias), alias & 0xff);
3814 }
3815 
device_setup_pass_through(struct device * dev)3816 static int device_setup_pass_through(struct device *dev)
3817 {
3818 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3819 
3820 	if (!dev_is_pci(dev))
3821 		return context_setup_pass_through(dev, info->bus, info->devfn);
3822 
3823 	return pci_for_each_dma_alias(to_pci_dev(dev),
3824 				      context_setup_pass_through_cb, dev);
3825 }
3826 
identity_domain_attach_dev(struct iommu_domain * domain,struct device * dev,struct iommu_domain * old)3827 static int identity_domain_attach_dev(struct iommu_domain *domain,
3828 				      struct device *dev,
3829 				      struct iommu_domain *old)
3830 {
3831 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3832 	struct intel_iommu *iommu = info->iommu;
3833 	int ret;
3834 
3835 	device_block_translation(dev);
3836 
3837 	if (dev_is_real_dma_subdevice(dev))
3838 		return 0;
3839 
3840 	/*
3841 	 * No PRI support with the global identity domain. No need to enable or
3842 	 * disable PRI in this path as the iommu has been put in the blocking
3843 	 * state.
3844 	 */
3845 	if (sm_supported(iommu))
3846 		ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID);
3847 	else
3848 		ret = device_setup_pass_through(dev);
3849 
3850 	if (!ret)
3851 		info->domain_attached = true;
3852 
3853 	return ret;
3854 }
3855 
identity_domain_set_dev_pasid(struct iommu_domain * domain,struct device * dev,ioasid_t pasid,struct iommu_domain * old)3856 static int identity_domain_set_dev_pasid(struct iommu_domain *domain,
3857 					 struct device *dev, ioasid_t pasid,
3858 					 struct iommu_domain *old)
3859 {
3860 	struct device_domain_info *info = dev_iommu_priv_get(dev);
3861 	struct intel_iommu *iommu = info->iommu;
3862 	int ret;
3863 
3864 	if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
3865 		return -EOPNOTSUPP;
3866 
3867 	ret = iopf_for_domain_replace(domain, old, dev);
3868 	if (ret)
3869 		return ret;
3870 
3871 	ret = domain_setup_passthrough(iommu, dev, pasid, old);
3872 	if (ret) {
3873 		iopf_for_domain_replace(old, domain, dev);
3874 		return ret;
3875 	}
3876 
3877 	domain_remove_dev_pasid(old, dev, pasid);
3878 	return 0;
3879 }
3880 
3881 static struct iommu_domain identity_domain = {
3882 	.type = IOMMU_DOMAIN_IDENTITY,
3883 	.ops = &(const struct iommu_domain_ops) {
3884 		.attach_dev	= identity_domain_attach_dev,
3885 		.set_dev_pasid	= identity_domain_set_dev_pasid,
3886 	},
3887 };
3888 
3889 const struct iommu_domain_ops intel_fs_paging_domain_ops = {
3890 	IOMMU_PT_DOMAIN_OPS(x86_64),
3891 	.attach_dev = intel_iommu_attach_device,
3892 	.set_dev_pasid = intel_iommu_set_dev_pasid,
3893 	.iotlb_sync_map = intel_iommu_iotlb_sync_map,
3894 	.flush_iotlb_all = intel_flush_iotlb_all,
3895 	.iotlb_sync = intel_iommu_tlb_sync,
3896 	.free = intel_iommu_domain_free,
3897 	.enforce_cache_coherency = intel_iommu_enforce_cache_coherency_fs,
3898 };
3899 
3900 const struct iommu_domain_ops intel_ss_paging_domain_ops = {
3901 	IOMMU_PT_DOMAIN_OPS(vtdss),
3902 	.attach_dev = intel_iommu_attach_device,
3903 	.set_dev_pasid = intel_iommu_set_dev_pasid,
3904 	.iotlb_sync_map = intel_iommu_iotlb_sync_map,
3905 	.flush_iotlb_all = intel_flush_iotlb_all,
3906 	.iotlb_sync = intel_iommu_tlb_sync,
3907 	.free = intel_iommu_domain_free,
3908 	.enforce_cache_coherency = intel_iommu_enforce_cache_coherency_ss,
3909 };
3910 
3911 const struct iommu_ops intel_iommu_ops = {
3912 	.blocked_domain		= &blocking_domain,
3913 	.release_domain		= &blocking_domain,
3914 	.identity_domain	= &identity_domain,
3915 	.capable		= intel_iommu_capable,
3916 	.hw_info		= intel_iommu_hw_info,
3917 	.domain_alloc_paging_flags = intel_iommu_domain_alloc_paging_flags,
3918 	.domain_alloc_sva	= intel_svm_domain_alloc,
3919 	.domain_alloc_nested	= intel_iommu_domain_alloc_nested,
3920 	.probe_device		= intel_iommu_probe_device,
3921 	.probe_finalize		= intel_iommu_probe_finalize,
3922 	.release_device		= intel_iommu_release_device,
3923 	.get_resv_regions	= intel_iommu_get_resv_regions,
3924 	.device_group		= intel_iommu_device_group,
3925 	.is_attach_deferred	= intel_iommu_is_attach_deferred,
3926 	.def_domain_type	= device_def_domain_type,
3927 	.page_response		= intel_iommu_page_response,
3928 };
3929 
quirk_iommu_igfx(struct pci_dev * dev)3930 static void quirk_iommu_igfx(struct pci_dev *dev)
3931 {
3932 	if (risky_device(dev))
3933 		return;
3934 
3935 	pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
3936 	disable_igfx_iommu = 1;
3937 }
3938 
3939 /* G4x/GM45 integrated gfx dmar support is totally busted. */
3940 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
3941 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
3942 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx);
3943 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx);
3944 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
3945 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
3946 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
3947 
3948 /* QM57/QS57 integrated gfx malfunctions with dmar */
3949 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_iommu_igfx);
3950 
3951 /* Broadwell igfx malfunctions with dmar */
3952 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
3953 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
3954 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx);
3955 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx);
3956 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx);
3957 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx);
3958 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx);
3959 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx);
3960 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx);
3961 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx);
3962 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx);
3963 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx);
3964 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx);
3965 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx);
3966 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx);
3967 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx);
3968 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx);
3969 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx);
3970 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx);
3971 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx);
3972 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx);
3973 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
3974 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
3975 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
3976 
quirk_iommu_rwbf(struct pci_dev * dev)3977 static void quirk_iommu_rwbf(struct pci_dev *dev)
3978 {
3979 	if (risky_device(dev))
3980 		return;
3981 
3982 	/*
3983 	 * Mobile 4 Series Chipset neglects to set RWBF capability,
3984 	 * but needs it. Same seems to hold for the desktop versions.
3985 	 */
3986 	pci_info(dev, "Forcing write-buffer flush capability\n");
3987 	rwbf_quirk = 1;
3988 }
3989 
3990 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
3991 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
3992 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
3993 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
3994 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
3995 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
3996 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
3997 
3998 #define GGC 0x52
3999 #define GGC_MEMORY_SIZE_MASK	(0xf << 8)
4000 #define GGC_MEMORY_SIZE_NONE	(0x0 << 8)
4001 #define GGC_MEMORY_SIZE_1M	(0x1 << 8)
4002 #define GGC_MEMORY_SIZE_2M	(0x3 << 8)
4003 #define GGC_MEMORY_VT_ENABLED	(0x8 << 8)
4004 #define GGC_MEMORY_SIZE_2M_VT	(0x9 << 8)
4005 #define GGC_MEMORY_SIZE_3M_VT	(0xa << 8)
4006 #define GGC_MEMORY_SIZE_4M_VT	(0xb << 8)
4007 
quirk_calpella_no_shadow_gtt(struct pci_dev * dev)4008 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4009 {
4010 	unsigned short ggc;
4011 
4012 	if (risky_device(dev))
4013 		return;
4014 
4015 	if (pci_read_config_word(dev, GGC, &ggc))
4016 		return;
4017 
4018 	if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4019 		pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4020 		disable_igfx_iommu = 1;
4021 	} else if (!disable_igfx_iommu) {
4022 		/* we have to ensure the gfx device is idle before we flush */
4023 		pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
4024 		iommu_set_dma_strict();
4025 	}
4026 }
4027 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4028 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4029 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4030 
quirk_igfx_skip_te_disable(struct pci_dev * dev)4031 static void quirk_igfx_skip_te_disable(struct pci_dev *dev)
4032 {
4033 	unsigned short ver;
4034 
4035 	if (!IS_GFX_DEVICE(dev))
4036 		return;
4037 
4038 	ver = (dev->device >> 8) & 0xff;
4039 	if (ver != 0x45 && ver != 0x46 && ver != 0x4c &&
4040 	    ver != 0x4e && ver != 0x8a && ver != 0x98 &&
4041 	    ver != 0x9a && ver != 0xa7 && ver != 0x7d)
4042 		return;
4043 
4044 	if (risky_device(dev))
4045 		return;
4046 
4047 	pci_info(dev, "Skip IOMMU disabling for graphics\n");
4048 	iommu_skip_te_disable = 1;
4049 }
4050 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_igfx_skip_te_disable);
4051 
4052 /* On Tylersburg chipsets, some BIOSes have been known to enable the
4053    ISOCH DMAR unit for the Azalia sound device, but not give it any
4054    TLB entries, which causes it to deadlock. Check for that.  We do
4055    this in a function called from init_dmars(), instead of in a PCI
4056    quirk, because we don't want to print the obnoxious "BIOS broken"
4057    message if VT-d is actually disabled.
4058 */
check_tylersburg_isoch(void)4059 static void __init check_tylersburg_isoch(void)
4060 {
4061 	struct pci_dev *pdev;
4062 	uint32_t vtisochctrl;
4063 
4064 	/* If there's no Azalia in the system anyway, forget it. */
4065 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4066 	if (!pdev)
4067 		return;
4068 
4069 	if (risky_device(pdev)) {
4070 		pci_dev_put(pdev);
4071 		return;
4072 	}
4073 
4074 	pci_dev_put(pdev);
4075 
4076 	/* System Management Registers. Might be hidden, in which case
4077 	   we can't do the sanity check. But that's OK, because the
4078 	   known-broken BIOSes _don't_ actually hide it, so far. */
4079 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4080 	if (!pdev)
4081 		return;
4082 
4083 	if (risky_device(pdev)) {
4084 		pci_dev_put(pdev);
4085 		return;
4086 	}
4087 
4088 	if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4089 		pci_dev_put(pdev);
4090 		return;
4091 	}
4092 
4093 	pci_dev_put(pdev);
4094 
4095 	/* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4096 	if (vtisochctrl & 1)
4097 		return;
4098 
4099 	/* Drop all bits other than the number of TLB entries */
4100 	vtisochctrl &= 0x1c;
4101 
4102 	/* If we have the recommended number of TLB entries (16), fine. */
4103 	if (vtisochctrl == 0x10)
4104 		return;
4105 
4106 	/* Zero TLB entries? You get to ride the short bus to school. */
4107 	if (!vtisochctrl) {
4108 		WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4109 		     "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4110 		     dmi_get_system_info(DMI_BIOS_VENDOR),
4111 		     dmi_get_system_info(DMI_BIOS_VERSION),
4112 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
4113 		iommu_identity_mapping |= IDENTMAP_AZALIA;
4114 		return;
4115 	}
4116 
4117 	pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4118 	       vtisochctrl);
4119 }
4120 
4121 /*
4122  * Here we deal with a device TLB defect where device may inadvertently issue ATS
4123  * invalidation completion before posted writes initiated with translated address
4124  * that utilized translations matching the invalidation address range, violating
4125  * the invalidation completion ordering.
4126  * Therefore, any use cases that cannot guarantee DMA is stopped before unmap is
4127  * vulnerable to this defect. In other words, any dTLB invalidation initiated not
4128  * under the control of the trusted/privileged host device driver must use this
4129  * quirk.
4130  * Device TLBs are invalidated under the following six conditions:
4131  * 1. Device driver does DMA API unmap IOVA
4132  * 2. Device driver unbind a PASID from a process, sva_unbind_device()
4133  * 3. PASID is torn down, after PASID cache is flushed. e.g. process
4134  *    exit_mmap() due to crash
4135  * 4. Under SVA usage, called by mmu_notifier.invalidate_range() where
4136  *    VM has to free pages that were unmapped
4137  * 5. Userspace driver unmaps a DMA buffer
4138  * 6. Cache invalidation in vSVA usage (upcoming)
4139  *
4140  * For #1 and #2, device drivers are responsible for stopping DMA traffic
4141  * before unmap/unbind. For #3, iommu driver gets mmu_notifier to
4142  * invalidate TLB the same way as normal user unmap which will use this quirk.
4143  * The dTLB invalidation after PASID cache flush does not need this quirk.
4144  *
4145  * As a reminder, #6 will *NEED* this quirk as we enable nested translation.
4146  */
quirk_extra_dev_tlb_flush(struct device_domain_info * info,unsigned long address,unsigned long mask,u32 pasid,u16 qdep)4147 void quirk_extra_dev_tlb_flush(struct device_domain_info *info,
4148 			       unsigned long address, unsigned long mask,
4149 			       u32 pasid, u16 qdep)
4150 {
4151 	u16 sid;
4152 
4153 	if (likely(!info->dtlb_extra_inval))
4154 		return;
4155 
4156 	sid = PCI_DEVID(info->bus, info->devfn);
4157 	if (pasid == IOMMU_NO_PASID) {
4158 		qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
4159 				   qdep, address, mask);
4160 	} else {
4161 		qi_flush_dev_iotlb_pasid(info->iommu, sid, info->pfsid,
4162 					 pasid, qdep, address, mask);
4163 	}
4164 }
4165 
4166 #define ecmd_get_status_code(res)	(((res) & 0xff) >> 1)
4167 
4168 /*
4169  * Function to submit a command to the enhanced command interface. The
4170  * valid enhanced command descriptions are defined in Table 47 of the
4171  * VT-d spec. The VT-d hardware implementation may support some but not
4172  * all commands, which can be determined by checking the Enhanced
4173  * Command Capability Register.
4174  *
4175  * Return values:
4176  *  - 0: Command successful without any error;
4177  *  - Negative: software error value;
4178  *  - Nonzero positive: failure status code defined in Table 48.
4179  */
ecmd_submit_sync(struct intel_iommu * iommu,u8 ecmd,u64 oa,u64 ob)4180 int ecmd_submit_sync(struct intel_iommu *iommu, u8 ecmd, u64 oa, u64 ob)
4181 {
4182 	unsigned long flags;
4183 	u64 res;
4184 	int ret;
4185 
4186 	if (!cap_ecmds(iommu->cap))
4187 		return -ENODEV;
4188 
4189 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
4190 
4191 	res = dmar_readq(iommu->reg + DMAR_ECRSP_REG);
4192 	if (res & DMA_ECMD_ECRSP_IP) {
4193 		ret = -EBUSY;
4194 		goto err;
4195 	}
4196 
4197 	/*
4198 	 * Unconditionally write the operand B, because
4199 	 * - There is no side effect if an ecmd doesn't require an
4200 	 *   operand B, but we set the register to some value.
4201 	 * - It's not invoked in any critical path. The extra MMIO
4202 	 *   write doesn't bring any performance concerns.
4203 	 */
4204 	dmar_writeq(iommu->reg + DMAR_ECEO_REG, ob);
4205 	dmar_writeq(iommu->reg + DMAR_ECMD_REG, ecmd | (oa << DMA_ECMD_OA_SHIFT));
4206 
4207 	IOMMU_WAIT_OP(iommu, DMAR_ECRSP_REG, dmar_readq,
4208 		      !(res & DMA_ECMD_ECRSP_IP), res);
4209 
4210 	if (res & DMA_ECMD_ECRSP_IP) {
4211 		ret = -ETIMEDOUT;
4212 		goto err;
4213 	}
4214 
4215 	ret = ecmd_get_status_code(res);
4216 err:
4217 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
4218 
4219 	return ret;
4220 }
4221 
4222 MODULE_IMPORT_NS("GENERIC_PT_IOMMU");
4223