xref: /linux/drivers/iommu/amd/init.c (revision 1b0975ee3bdd3eb19a47371c26fd7ef8f7f6b599)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
4  * Author: Joerg Roedel <jroedel@suse.de>
5  *         Leo Duran <leo.duran@amd.com>
6  */
7 
8 #define pr_fmt(fmt)     "AMD-Vi: " fmt
9 #define dev_fmt(fmt)    pr_fmt(fmt)
10 
11 #include <linux/pci.h>
12 #include <linux/acpi.h>
13 #include <linux/list.h>
14 #include <linux/bitmap.h>
15 #include <linux/slab.h>
16 #include <linux/syscore_ops.h>
17 #include <linux/interrupt.h>
18 #include <linux/msi.h>
19 #include <linux/irq.h>
20 #include <linux/amd-iommu.h>
21 #include <linux/export.h>
22 #include <linux/kmemleak.h>
23 #include <linux/cc_platform.h>
24 #include <linux/iopoll.h>
25 #include <asm/pci-direct.h>
26 #include <asm/iommu.h>
27 #include <asm/apic.h>
28 #include <asm/gart.h>
29 #include <asm/x86_init.h>
30 #include <asm/io_apic.h>
31 #include <asm/irq_remapping.h>
32 #include <asm/set_memory.h>
33 
34 #include <linux/crash_dump.h>
35 
36 #include "amd_iommu.h"
37 #include "../irq_remapping.h"
38 
39 /*
40  * definitions for the ACPI scanning code
41  */
42 #define IVRS_HEADER_LENGTH 48
43 
44 #define ACPI_IVHD_TYPE_MAX_SUPPORTED	0x40
45 #define ACPI_IVMD_TYPE_ALL              0x20
46 #define ACPI_IVMD_TYPE                  0x21
47 #define ACPI_IVMD_TYPE_RANGE            0x22
48 
49 #define IVHD_DEV_ALL                    0x01
50 #define IVHD_DEV_SELECT                 0x02
51 #define IVHD_DEV_SELECT_RANGE_START     0x03
52 #define IVHD_DEV_RANGE_END              0x04
53 #define IVHD_DEV_ALIAS                  0x42
54 #define IVHD_DEV_ALIAS_RANGE            0x43
55 #define IVHD_DEV_EXT_SELECT             0x46
56 #define IVHD_DEV_EXT_SELECT_RANGE       0x47
57 #define IVHD_DEV_SPECIAL		0x48
58 #define IVHD_DEV_ACPI_HID		0xf0
59 
60 #define UID_NOT_PRESENT                 0
61 #define UID_IS_INTEGER                  1
62 #define UID_IS_CHARACTER                2
63 
64 #define IVHD_SPECIAL_IOAPIC		1
65 #define IVHD_SPECIAL_HPET		2
66 
67 #define IVHD_FLAG_HT_TUN_EN_MASK        0x01
68 #define IVHD_FLAG_PASSPW_EN_MASK        0x02
69 #define IVHD_FLAG_RESPASSPW_EN_MASK     0x04
70 #define IVHD_FLAG_ISOC_EN_MASK          0x08
71 
72 #define IVMD_FLAG_EXCL_RANGE            0x08
73 #define IVMD_FLAG_IW                    0x04
74 #define IVMD_FLAG_IR                    0x02
75 #define IVMD_FLAG_UNITY_MAP             0x01
76 
77 #define ACPI_DEVFLAG_INITPASS           0x01
78 #define ACPI_DEVFLAG_EXTINT             0x02
79 #define ACPI_DEVFLAG_NMI                0x04
80 #define ACPI_DEVFLAG_SYSMGT1            0x10
81 #define ACPI_DEVFLAG_SYSMGT2            0x20
82 #define ACPI_DEVFLAG_LINT0              0x40
83 #define ACPI_DEVFLAG_LINT1              0x80
84 #define ACPI_DEVFLAG_ATSDIS             0x10000000
85 
86 #define LOOP_TIMEOUT	2000000
87 
88 #define IVRS_GET_SBDF_ID(seg, bus, dev, fn)	(((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \
89 						 | ((dev & 0x1f) << 3) | (fn & 0x7))
90 
91 /*
92  * ACPI table definitions
93  *
94  * These data structures are laid over the table to parse the important values
95  * out of it.
96  */
97 
98 /*
99  * structure describing one IOMMU in the ACPI table. Typically followed by one
100  * or more ivhd_entrys.
101  */
102 struct ivhd_header {
103 	u8 type;
104 	u8 flags;
105 	u16 length;
106 	u16 devid;
107 	u16 cap_ptr;
108 	u64 mmio_phys;
109 	u16 pci_seg;
110 	u16 info;
111 	u32 efr_attr;
112 
113 	/* Following only valid on IVHD type 11h and 40h */
114 	u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */
115 	u64 efr_reg2;
116 } __attribute__((packed));
117 
118 /*
119  * A device entry describing which devices a specific IOMMU translates and
120  * which requestor ids they use.
121  */
122 struct ivhd_entry {
123 	u8 type;
124 	u16 devid;
125 	u8 flags;
126 	struct_group(ext_hid,
127 		u32 ext;
128 		u32 hidh;
129 	);
130 	u64 cid;
131 	u8 uidf;
132 	u8 uidl;
133 	u8 uid;
134 } __attribute__((packed));
135 
136 /*
137  * An AMD IOMMU memory definition structure. It defines things like exclusion
138  * ranges for devices and regions that should be unity mapped.
139  */
140 struct ivmd_header {
141 	u8 type;
142 	u8 flags;
143 	u16 length;
144 	u16 devid;
145 	u16 aux;
146 	u16 pci_seg;
147 	u8  resv[6];
148 	u64 range_start;
149 	u64 range_length;
150 } __attribute__((packed));
151 
152 bool amd_iommu_dump;
153 bool amd_iommu_irq_remap __read_mostly;
154 
155 enum io_pgtable_fmt amd_iommu_pgtable = AMD_IOMMU_V1;
156 /* Guest page table level */
157 int amd_iommu_gpt_level = PAGE_MODE_4_LEVEL;
158 
159 int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
160 static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
161 
162 static bool amd_iommu_detected;
163 static bool amd_iommu_disabled __initdata;
164 static bool amd_iommu_force_enable __initdata;
165 static bool amd_iommu_irtcachedis;
166 static int amd_iommu_target_ivhd_type;
167 
168 /* Global EFR and EFR2 registers */
169 u64 amd_iommu_efr;
170 u64 amd_iommu_efr2;
171 
172 /* SNP is enabled on the system? */
173 bool amd_iommu_snp_en;
174 EXPORT_SYMBOL(amd_iommu_snp_en);
175 
176 LIST_HEAD(amd_iommu_pci_seg_list);	/* list of all PCI segments */
177 LIST_HEAD(amd_iommu_list);		/* list of all AMD IOMMUs in the
178 					   system */
179 
180 /* Array to assign indices to IOMMUs*/
181 struct amd_iommu *amd_iommus[MAX_IOMMUS];
182 
183 /* Number of IOMMUs present in the system */
184 static int amd_iommus_present;
185 
186 /* IOMMUs have a non-present cache? */
187 bool amd_iommu_np_cache __read_mostly;
188 bool amd_iommu_iotlb_sup __read_mostly = true;
189 
190 u32 amd_iommu_max_pasid __read_mostly = ~0;
191 
192 bool amd_iommu_v2_present __read_mostly;
193 static bool amd_iommu_pc_present __read_mostly;
194 bool amdr_ivrs_remap_support __read_mostly;
195 
196 bool amd_iommu_force_isolation __read_mostly;
197 
198 /*
199  * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap
200  * to know which ones are already in use.
201  */
202 unsigned long *amd_iommu_pd_alloc_bitmap;
203 
204 enum iommu_init_state {
205 	IOMMU_START_STATE,
206 	IOMMU_IVRS_DETECTED,
207 	IOMMU_ACPI_FINISHED,
208 	IOMMU_ENABLED,
209 	IOMMU_PCI_INIT,
210 	IOMMU_INTERRUPTS_EN,
211 	IOMMU_INITIALIZED,
212 	IOMMU_NOT_FOUND,
213 	IOMMU_INIT_ERROR,
214 	IOMMU_CMDLINE_DISABLED,
215 };
216 
217 /* Early ioapic and hpet maps from kernel command line */
218 #define EARLY_MAP_SIZE		4
219 static struct devid_map __initdata early_ioapic_map[EARLY_MAP_SIZE];
220 static struct devid_map __initdata early_hpet_map[EARLY_MAP_SIZE];
221 static struct acpihid_map_entry __initdata early_acpihid_map[EARLY_MAP_SIZE];
222 
223 static int __initdata early_ioapic_map_size;
224 static int __initdata early_hpet_map_size;
225 static int __initdata early_acpihid_map_size;
226 
227 static bool __initdata cmdline_maps;
228 
229 static enum iommu_init_state init_state = IOMMU_START_STATE;
230 
231 static int amd_iommu_enable_interrupts(void);
232 static int __init iommu_go_to_state(enum iommu_init_state state);
233 static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg);
234 
235 static bool amd_iommu_pre_enabled = true;
236 
237 static u32 amd_iommu_ivinfo __initdata;
238 
239 bool translation_pre_enabled(struct amd_iommu *iommu)
240 {
241 	return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
242 }
243 
244 static void clear_translation_pre_enabled(struct amd_iommu *iommu)
245 {
246 	iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
247 }
248 
249 static void init_translation_status(struct amd_iommu *iommu)
250 {
251 	u64 ctrl;
252 
253 	ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
254 	if (ctrl & (1<<CONTROL_IOMMU_EN))
255 		iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
256 }
257 
258 static inline unsigned long tbl_size(int entry_size, int last_bdf)
259 {
260 	unsigned shift = PAGE_SHIFT +
261 			 get_order((last_bdf + 1) * entry_size);
262 
263 	return 1UL << shift;
264 }
265 
266 int amd_iommu_get_num_iommus(void)
267 {
268 	return amd_iommus_present;
269 }
270 
271 /*
272  * Iterate through all the IOMMUs to get common EFR
273  * masks among all IOMMUs and warn if found inconsistency.
274  */
275 static void get_global_efr(void)
276 {
277 	struct amd_iommu *iommu;
278 
279 	for_each_iommu(iommu) {
280 		u64 tmp = iommu->features;
281 		u64 tmp2 = iommu->features2;
282 
283 		if (list_is_first(&iommu->list, &amd_iommu_list)) {
284 			amd_iommu_efr = tmp;
285 			amd_iommu_efr2 = tmp2;
286 			continue;
287 		}
288 
289 		if (amd_iommu_efr == tmp &&
290 		    amd_iommu_efr2 == tmp2)
291 			continue;
292 
293 		pr_err(FW_BUG
294 		       "Found inconsistent EFR/EFR2 %#llx,%#llx (global %#llx,%#llx) on iommu%d (%04x:%02x:%02x.%01x).\n",
295 		       tmp, tmp2, amd_iommu_efr, amd_iommu_efr2,
296 		       iommu->index, iommu->pci_seg->id,
297 		       PCI_BUS_NUM(iommu->devid), PCI_SLOT(iommu->devid),
298 		       PCI_FUNC(iommu->devid));
299 
300 		amd_iommu_efr &= tmp;
301 		amd_iommu_efr2 &= tmp2;
302 	}
303 
304 	pr_info("Using global IVHD EFR:%#llx, EFR2:%#llx\n", amd_iommu_efr, amd_iommu_efr2);
305 }
306 
307 static bool check_feature_on_all_iommus(u64 mask)
308 {
309 	return !!(amd_iommu_efr & mask);
310 }
311 
312 static inline int check_feature_gpt_level(void)
313 {
314 	return ((amd_iommu_efr >> FEATURE_GATS_SHIFT) & FEATURE_GATS_MASK);
315 }
316 
317 /*
318  * For IVHD type 0x11/0x40, EFR is also available via IVHD.
319  * Default to IVHD EFR since it is available sooner
320  * (i.e. before PCI init).
321  */
322 static void __init early_iommu_features_init(struct amd_iommu *iommu,
323 					     struct ivhd_header *h)
324 {
325 	if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP) {
326 		iommu->features = h->efr_reg;
327 		iommu->features2 = h->efr_reg2;
328 	}
329 	if (amd_iommu_ivinfo & IOMMU_IVINFO_DMA_REMAP)
330 		amdr_ivrs_remap_support = true;
331 }
332 
333 /* Access to l1 and l2 indexed register spaces */
334 
335 static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
336 {
337 	u32 val;
338 
339 	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
340 	pci_read_config_dword(iommu->dev, 0xfc, &val);
341 	return val;
342 }
343 
344 static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val)
345 {
346 	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31));
347 	pci_write_config_dword(iommu->dev, 0xfc, val);
348 	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
349 }
350 
351 static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address)
352 {
353 	u32 val;
354 
355 	pci_write_config_dword(iommu->dev, 0xf0, address);
356 	pci_read_config_dword(iommu->dev, 0xf4, &val);
357 	return val;
358 }
359 
360 static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val)
361 {
362 	pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8));
363 	pci_write_config_dword(iommu->dev, 0xf4, val);
364 }
365 
366 /****************************************************************************
367  *
368  * AMD IOMMU MMIO register space handling functions
369  *
370  * These functions are used to program the IOMMU device registers in
371  * MMIO space required for that driver.
372  *
373  ****************************************************************************/
374 
375 /*
376  * This function set the exclusion range in the IOMMU. DMA accesses to the
377  * exclusion range are passed through untranslated
378  */
379 static void iommu_set_exclusion_range(struct amd_iommu *iommu)
380 {
381 	u64 start = iommu->exclusion_start & PAGE_MASK;
382 	u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK;
383 	u64 entry;
384 
385 	if (!iommu->exclusion_start)
386 		return;
387 
388 	entry = start | MMIO_EXCL_ENABLE_MASK;
389 	memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
390 			&entry, sizeof(entry));
391 
392 	entry = limit;
393 	memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
394 			&entry, sizeof(entry));
395 }
396 
397 static void iommu_set_cwwb_range(struct amd_iommu *iommu)
398 {
399 	u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem);
400 	u64 entry = start & PM_ADDR_MASK;
401 
402 	if (!check_feature_on_all_iommus(FEATURE_SNP))
403 		return;
404 
405 	/* Note:
406 	 * Re-purpose Exclusion base/limit registers for Completion wait
407 	 * write-back base/limit.
408 	 */
409 	memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
410 		    &entry, sizeof(entry));
411 
412 	/* Note:
413 	 * Default to 4 Kbytes, which can be specified by setting base
414 	 * address equal to the limit address.
415 	 */
416 	memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
417 		    &entry, sizeof(entry));
418 }
419 
420 /* Programs the physical address of the device table into the IOMMU hardware */
421 static void iommu_set_device_table(struct amd_iommu *iommu)
422 {
423 	u64 entry;
424 	u32 dev_table_size = iommu->pci_seg->dev_table_size;
425 	void *dev_table = (void *)get_dev_table(iommu);
426 
427 	BUG_ON(iommu->mmio_base == NULL);
428 
429 	entry = iommu_virt_to_phys(dev_table);
430 	entry |= (dev_table_size >> 12) - 1;
431 	memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
432 			&entry, sizeof(entry));
433 }
434 
435 /* Generic functions to enable/disable certain features of the IOMMU. */
436 static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
437 {
438 	u64 ctrl;
439 
440 	ctrl = readq(iommu->mmio_base +  MMIO_CONTROL_OFFSET);
441 	ctrl |= (1ULL << bit);
442 	writeq(ctrl, iommu->mmio_base +  MMIO_CONTROL_OFFSET);
443 }
444 
445 static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
446 {
447 	u64 ctrl;
448 
449 	ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
450 	ctrl &= ~(1ULL << bit);
451 	writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
452 }
453 
454 static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout)
455 {
456 	u64 ctrl;
457 
458 	ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
459 	ctrl &= ~CTRL_INV_TO_MASK;
460 	ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK;
461 	writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
462 }
463 
464 /* Function to enable the hardware */
465 static void iommu_enable(struct amd_iommu *iommu)
466 {
467 	iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
468 }
469 
470 static void iommu_disable(struct amd_iommu *iommu)
471 {
472 	if (!iommu->mmio_base)
473 		return;
474 
475 	/* Disable command buffer */
476 	iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
477 
478 	/* Disable event logging and event interrupts */
479 	iommu_feature_disable(iommu, CONTROL_EVT_INT_EN);
480 	iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
481 
482 	/* Disable IOMMU GA_LOG */
483 	iommu_feature_disable(iommu, CONTROL_GALOG_EN);
484 	iommu_feature_disable(iommu, CONTROL_GAINT_EN);
485 
486 	/* Disable IOMMU hardware itself */
487 	iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
488 
489 	/* Clear IRTE cache disabling bit */
490 	iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS);
491 }
492 
493 /*
494  * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
495  * the system has one.
496  */
497 static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end)
498 {
499 	if (!request_mem_region(address, end, "amd_iommu")) {
500 		pr_err("Can not reserve memory region %llx-%llx for mmio\n",
501 			address, end);
502 		pr_err("This is a BIOS bug. Please contact your hardware vendor\n");
503 		return NULL;
504 	}
505 
506 	return (u8 __iomem *)ioremap(address, end);
507 }
508 
509 static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
510 {
511 	if (iommu->mmio_base)
512 		iounmap(iommu->mmio_base);
513 	release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end);
514 }
515 
516 static inline u32 get_ivhd_header_size(struct ivhd_header *h)
517 {
518 	u32 size = 0;
519 
520 	switch (h->type) {
521 	case 0x10:
522 		size = 24;
523 		break;
524 	case 0x11:
525 	case 0x40:
526 		size = 40;
527 		break;
528 	}
529 	return size;
530 }
531 
532 /****************************************************************************
533  *
534  * The functions below belong to the first pass of AMD IOMMU ACPI table
535  * parsing. In this pass we try to find out the highest device id this
536  * code has to handle. Upon this information the size of the shared data
537  * structures is determined later.
538  *
539  ****************************************************************************/
540 
541 /*
542  * This function calculates the length of a given IVHD entry
543  */
544 static inline int ivhd_entry_length(u8 *ivhd)
545 {
546 	u32 type = ((struct ivhd_entry *)ivhd)->type;
547 
548 	if (type < 0x80) {
549 		return 0x04 << (*ivhd >> 6);
550 	} else if (type == IVHD_DEV_ACPI_HID) {
551 		/* For ACPI_HID, offset 21 is uid len */
552 		return *((u8 *)ivhd + 21) + 22;
553 	}
554 	return 0;
555 }
556 
557 /*
558  * After reading the highest device id from the IOMMU PCI capability header
559  * this function looks if there is a higher device id defined in the ACPI table
560  */
561 static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
562 {
563 	u8 *p = (void *)h, *end = (void *)h;
564 	struct ivhd_entry *dev;
565 	int last_devid = -EINVAL;
566 
567 	u32 ivhd_size = get_ivhd_header_size(h);
568 
569 	if (!ivhd_size) {
570 		pr_err("Unsupported IVHD type %#x\n", h->type);
571 		return -EINVAL;
572 	}
573 
574 	p += ivhd_size;
575 	end += h->length;
576 
577 	while (p < end) {
578 		dev = (struct ivhd_entry *)p;
579 		switch (dev->type) {
580 		case IVHD_DEV_ALL:
581 			/* Use maximum BDF value for DEV_ALL */
582 			return 0xffff;
583 		case IVHD_DEV_SELECT:
584 		case IVHD_DEV_RANGE_END:
585 		case IVHD_DEV_ALIAS:
586 		case IVHD_DEV_EXT_SELECT:
587 			/* all the above subfield types refer to device ids */
588 			if (dev->devid > last_devid)
589 				last_devid = dev->devid;
590 			break;
591 		default:
592 			break;
593 		}
594 		p += ivhd_entry_length(p);
595 	}
596 
597 	WARN_ON(p != end);
598 
599 	return last_devid;
600 }
601 
602 static int __init check_ivrs_checksum(struct acpi_table_header *table)
603 {
604 	int i;
605 	u8 checksum = 0, *p = (u8 *)table;
606 
607 	for (i = 0; i < table->length; ++i)
608 		checksum += p[i];
609 	if (checksum != 0) {
610 		/* ACPI table corrupt */
611 		pr_err(FW_BUG "IVRS invalid checksum\n");
612 		return -ENODEV;
613 	}
614 
615 	return 0;
616 }
617 
618 /*
619  * Iterate over all IVHD entries in the ACPI table and find the highest device
620  * id which we need to handle. This is the first of three functions which parse
621  * the ACPI table. So we check the checksum here.
622  */
623 static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_seg)
624 {
625 	u8 *p = (u8 *)table, *end = (u8 *)table;
626 	struct ivhd_header *h;
627 	int last_devid, last_bdf = 0;
628 
629 	p += IVRS_HEADER_LENGTH;
630 
631 	end += table->length;
632 	while (p < end) {
633 		h = (struct ivhd_header *)p;
634 		if (h->pci_seg == pci_seg &&
635 		    h->type == amd_iommu_target_ivhd_type) {
636 			last_devid = find_last_devid_from_ivhd(h);
637 
638 			if (last_devid < 0)
639 				return -EINVAL;
640 			if (last_devid > last_bdf)
641 				last_bdf = last_devid;
642 		}
643 		p += h->length;
644 	}
645 	WARN_ON(p != end);
646 
647 	return last_bdf;
648 }
649 
650 /****************************************************************************
651  *
652  * The following functions belong to the code path which parses the ACPI table
653  * the second time. In this ACPI parsing iteration we allocate IOMMU specific
654  * data structures, initialize the per PCI segment device/alias/rlookup table
655  * and also basically initialize the hardware.
656  *
657  ****************************************************************************/
658 
659 /* Allocate per PCI segment device table */
660 static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg)
661 {
662 	pci_seg->dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO | GFP_DMA32,
663 						      get_order(pci_seg->dev_table_size));
664 	if (!pci_seg->dev_table)
665 		return -ENOMEM;
666 
667 	return 0;
668 }
669 
670 static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg)
671 {
672 	free_pages((unsigned long)pci_seg->dev_table,
673 		    get_order(pci_seg->dev_table_size));
674 	pci_seg->dev_table = NULL;
675 }
676 
677 /* Allocate per PCI segment IOMMU rlookup table. */
678 static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
679 {
680 	pci_seg->rlookup_table = (void *)__get_free_pages(
681 						GFP_KERNEL | __GFP_ZERO,
682 						get_order(pci_seg->rlookup_table_size));
683 	if (pci_seg->rlookup_table == NULL)
684 		return -ENOMEM;
685 
686 	return 0;
687 }
688 
689 static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
690 {
691 	free_pages((unsigned long)pci_seg->rlookup_table,
692 		   get_order(pci_seg->rlookup_table_size));
693 	pci_seg->rlookup_table = NULL;
694 }
695 
696 static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
697 {
698 	pci_seg->irq_lookup_table = (void *)__get_free_pages(
699 					     GFP_KERNEL | __GFP_ZERO,
700 					     get_order(pci_seg->rlookup_table_size));
701 	kmemleak_alloc(pci_seg->irq_lookup_table,
702 		       pci_seg->rlookup_table_size, 1, GFP_KERNEL);
703 	if (pci_seg->irq_lookup_table == NULL)
704 		return -ENOMEM;
705 
706 	return 0;
707 }
708 
709 static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
710 {
711 	kmemleak_free(pci_seg->irq_lookup_table);
712 	free_pages((unsigned long)pci_seg->irq_lookup_table,
713 		   get_order(pci_seg->rlookup_table_size));
714 	pci_seg->irq_lookup_table = NULL;
715 }
716 
717 static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg)
718 {
719 	int i;
720 
721 	pci_seg->alias_table = (void *)__get_free_pages(GFP_KERNEL,
722 					get_order(pci_seg->alias_table_size));
723 	if (!pci_seg->alias_table)
724 		return -ENOMEM;
725 
726 	/*
727 	 * let all alias entries point to itself
728 	 */
729 	for (i = 0; i <= pci_seg->last_bdf; ++i)
730 		pci_seg->alias_table[i] = i;
731 
732 	return 0;
733 }
734 
735 static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg)
736 {
737 	free_pages((unsigned long)pci_seg->alias_table,
738 		   get_order(pci_seg->alias_table_size));
739 	pci_seg->alias_table = NULL;
740 }
741 
742 /*
743  * Allocates the command buffer. This buffer is per AMD IOMMU. We can
744  * write commands to that buffer later and the IOMMU will execute them
745  * asynchronously
746  */
747 static int __init alloc_command_buffer(struct amd_iommu *iommu)
748 {
749 	iommu->cmd_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
750 						  get_order(CMD_BUFFER_SIZE));
751 
752 	return iommu->cmd_buf ? 0 : -ENOMEM;
753 }
754 
755 /*
756  * This function restarts event logging in case the IOMMU experienced
757  * an event log buffer overflow.
758  */
759 void amd_iommu_restart_event_logging(struct amd_iommu *iommu)
760 {
761 	iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
762 	iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
763 }
764 
765 /*
766  * This function restarts event logging in case the IOMMU experienced
767  * an GA log overflow.
768  */
769 void amd_iommu_restart_ga_log(struct amd_iommu *iommu)
770 {
771 	u32 status;
772 
773 	status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
774 	if (status & MMIO_STATUS_GALOG_RUN_MASK)
775 		return;
776 
777 	pr_info_ratelimited("IOMMU GA Log restarting\n");
778 
779 	iommu_feature_disable(iommu, CONTROL_GALOG_EN);
780 	iommu_feature_disable(iommu, CONTROL_GAINT_EN);
781 
782 	writel(MMIO_STATUS_GALOG_OVERFLOW_MASK,
783 	       iommu->mmio_base + MMIO_STATUS_OFFSET);
784 
785 	iommu_feature_enable(iommu, CONTROL_GAINT_EN);
786 	iommu_feature_enable(iommu, CONTROL_GALOG_EN);
787 }
788 
789 /*
790  * This function resets the command buffer if the IOMMU stopped fetching
791  * commands from it.
792  */
793 static void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
794 {
795 	iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
796 
797 	writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
798 	writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
799 	iommu->cmd_buf_head = 0;
800 	iommu->cmd_buf_tail = 0;
801 
802 	iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
803 }
804 
805 /*
806  * This function writes the command buffer address to the hardware and
807  * enables it.
808  */
809 static void iommu_enable_command_buffer(struct amd_iommu *iommu)
810 {
811 	u64 entry;
812 
813 	BUG_ON(iommu->cmd_buf == NULL);
814 
815 	entry = iommu_virt_to_phys(iommu->cmd_buf);
816 	entry |= MMIO_CMD_SIZE_512;
817 
818 	memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
819 		    &entry, sizeof(entry));
820 
821 	amd_iommu_reset_cmd_buffer(iommu);
822 }
823 
824 /*
825  * This function disables the command buffer
826  */
827 static void iommu_disable_command_buffer(struct amd_iommu *iommu)
828 {
829 	iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
830 }
831 
832 static void __init free_command_buffer(struct amd_iommu *iommu)
833 {
834 	free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
835 }
836 
837 static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu,
838 					 gfp_t gfp, size_t size)
839 {
840 	int order = get_order(size);
841 	void *buf = (void *)__get_free_pages(gfp, order);
842 
843 	if (buf &&
844 	    check_feature_on_all_iommus(FEATURE_SNP) &&
845 	    set_memory_4k((unsigned long)buf, (1 << order))) {
846 		free_pages((unsigned long)buf, order);
847 		buf = NULL;
848 	}
849 
850 	return buf;
851 }
852 
853 /* allocates the memory where the IOMMU will log its events to */
854 static int __init alloc_event_buffer(struct amd_iommu *iommu)
855 {
856 	iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO,
857 					      EVT_BUFFER_SIZE);
858 
859 	return iommu->evt_buf ? 0 : -ENOMEM;
860 }
861 
862 static void iommu_enable_event_buffer(struct amd_iommu *iommu)
863 {
864 	u64 entry;
865 
866 	BUG_ON(iommu->evt_buf == NULL);
867 
868 	entry = iommu_virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
869 
870 	memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
871 		    &entry, sizeof(entry));
872 
873 	/* set head and tail to zero manually */
874 	writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
875 	writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
876 
877 	iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
878 }
879 
880 /*
881  * This function disables the event log buffer
882  */
883 static void iommu_disable_event_buffer(struct amd_iommu *iommu)
884 {
885 	iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
886 }
887 
888 static void __init free_event_buffer(struct amd_iommu *iommu)
889 {
890 	free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
891 }
892 
893 /* allocates the memory where the IOMMU will log its events to */
894 static int __init alloc_ppr_log(struct amd_iommu *iommu)
895 {
896 	iommu->ppr_log = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO,
897 					      PPR_LOG_SIZE);
898 
899 	return iommu->ppr_log ? 0 : -ENOMEM;
900 }
901 
902 static void iommu_enable_ppr_log(struct amd_iommu *iommu)
903 {
904 	u64 entry;
905 
906 	if (iommu->ppr_log == NULL)
907 		return;
908 
909 	entry = iommu_virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512;
910 
911 	memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET,
912 		    &entry, sizeof(entry));
913 
914 	/* set head and tail to zero manually */
915 	writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
916 	writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
917 
918 	iommu_feature_enable(iommu, CONTROL_PPRLOG_EN);
919 	iommu_feature_enable(iommu, CONTROL_PPR_EN);
920 }
921 
922 static void __init free_ppr_log(struct amd_iommu *iommu)
923 {
924 	free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE));
925 }
926 
927 static void free_ga_log(struct amd_iommu *iommu)
928 {
929 #ifdef CONFIG_IRQ_REMAP
930 	free_pages((unsigned long)iommu->ga_log, get_order(GA_LOG_SIZE));
931 	free_pages((unsigned long)iommu->ga_log_tail, get_order(8));
932 #endif
933 }
934 
935 #ifdef CONFIG_IRQ_REMAP
936 static int iommu_ga_log_enable(struct amd_iommu *iommu)
937 {
938 	u32 status, i;
939 	u64 entry;
940 
941 	if (!iommu->ga_log)
942 		return -EINVAL;
943 
944 	entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512;
945 	memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET,
946 		    &entry, sizeof(entry));
947 	entry = (iommu_virt_to_phys(iommu->ga_log_tail) &
948 		 (BIT_ULL(52)-1)) & ~7ULL;
949 	memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET,
950 		    &entry, sizeof(entry));
951 	writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
952 	writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET);
953 
954 
955 	iommu_feature_enable(iommu, CONTROL_GAINT_EN);
956 	iommu_feature_enable(iommu, CONTROL_GALOG_EN);
957 
958 	for (i = 0; i < LOOP_TIMEOUT; ++i) {
959 		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
960 		if (status & (MMIO_STATUS_GALOG_RUN_MASK))
961 			break;
962 		udelay(10);
963 	}
964 
965 	if (WARN_ON(i >= LOOP_TIMEOUT))
966 		return -EINVAL;
967 
968 	return 0;
969 }
970 
971 static int iommu_init_ga_log(struct amd_iommu *iommu)
972 {
973 	if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
974 		return 0;
975 
976 	iommu->ga_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
977 					get_order(GA_LOG_SIZE));
978 	if (!iommu->ga_log)
979 		goto err_out;
980 
981 	iommu->ga_log_tail = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
982 					get_order(8));
983 	if (!iommu->ga_log_tail)
984 		goto err_out;
985 
986 	return 0;
987 err_out:
988 	free_ga_log(iommu);
989 	return -EINVAL;
990 }
991 #endif /* CONFIG_IRQ_REMAP */
992 
993 static int __init alloc_cwwb_sem(struct amd_iommu *iommu)
994 {
995 	iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, 1);
996 
997 	return iommu->cmd_sem ? 0 : -ENOMEM;
998 }
999 
1000 static void __init free_cwwb_sem(struct amd_iommu *iommu)
1001 {
1002 	if (iommu->cmd_sem)
1003 		free_page((unsigned long)iommu->cmd_sem);
1004 }
1005 
1006 static void iommu_enable_xt(struct amd_iommu *iommu)
1007 {
1008 #ifdef CONFIG_IRQ_REMAP
1009 	/*
1010 	 * XT mode (32-bit APIC destination ID) requires
1011 	 * GA mode (128-bit IRTE support) as a prerequisite.
1012 	 */
1013 	if (AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir) &&
1014 	    amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
1015 		iommu_feature_enable(iommu, CONTROL_XT_EN);
1016 #endif /* CONFIG_IRQ_REMAP */
1017 }
1018 
1019 static void iommu_enable_gt(struct amd_iommu *iommu)
1020 {
1021 	if (!iommu_feature(iommu, FEATURE_GT))
1022 		return;
1023 
1024 	iommu_feature_enable(iommu, CONTROL_GT_EN);
1025 }
1026 
1027 /* sets a specific bit in the device table entry. */
1028 static void __set_dev_entry_bit(struct dev_table_entry *dev_table,
1029 				u16 devid, u8 bit)
1030 {
1031 	int i = (bit >> 6) & 0x03;
1032 	int _bit = bit & 0x3f;
1033 
1034 	dev_table[devid].data[i] |= (1UL << _bit);
1035 }
1036 
1037 static void set_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit)
1038 {
1039 	struct dev_table_entry *dev_table = get_dev_table(iommu);
1040 
1041 	return __set_dev_entry_bit(dev_table, devid, bit);
1042 }
1043 
1044 static int __get_dev_entry_bit(struct dev_table_entry *dev_table,
1045 			       u16 devid, u8 bit)
1046 {
1047 	int i = (bit >> 6) & 0x03;
1048 	int _bit = bit & 0x3f;
1049 
1050 	return (dev_table[devid].data[i] & (1UL << _bit)) >> _bit;
1051 }
1052 
1053 static int get_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit)
1054 {
1055 	struct dev_table_entry *dev_table = get_dev_table(iommu);
1056 
1057 	return __get_dev_entry_bit(dev_table, devid, bit);
1058 }
1059 
1060 static bool __copy_device_table(struct amd_iommu *iommu)
1061 {
1062 	u64 int_ctl, int_tab_len, entry = 0;
1063 	struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
1064 	struct dev_table_entry *old_devtb = NULL;
1065 	u32 lo, hi, devid, old_devtb_size;
1066 	phys_addr_t old_devtb_phys;
1067 	u16 dom_id, dte_v, irq_v;
1068 	gfp_t gfp_flag;
1069 	u64 tmp;
1070 
1071 	/* Each IOMMU use separate device table with the same size */
1072 	lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
1073 	hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
1074 	entry = (((u64) hi) << 32) + lo;
1075 
1076 	old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12;
1077 	if (old_devtb_size != pci_seg->dev_table_size) {
1078 		pr_err("The device table size of IOMMU:%d is not expected!\n",
1079 			iommu->index);
1080 		return false;
1081 	}
1082 
1083 	/*
1084 	 * When SME is enabled in the first kernel, the entry includes the
1085 	 * memory encryption mask(sme_me_mask), we must remove the memory
1086 	 * encryption mask to obtain the true physical address in kdump kernel.
1087 	 */
1088 	old_devtb_phys = __sme_clr(entry) & PAGE_MASK;
1089 
1090 	if (old_devtb_phys >= 0x100000000ULL) {
1091 		pr_err("The address of old device table is above 4G, not trustworthy!\n");
1092 		return false;
1093 	}
1094 	old_devtb = (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) && is_kdump_kernel())
1095 		    ? (__force void *)ioremap_encrypted(old_devtb_phys,
1096 							pci_seg->dev_table_size)
1097 		    : memremap(old_devtb_phys, pci_seg->dev_table_size, MEMREMAP_WB);
1098 
1099 	if (!old_devtb)
1100 		return false;
1101 
1102 	gfp_flag = GFP_KERNEL | __GFP_ZERO | GFP_DMA32;
1103 	pci_seg->old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag,
1104 						    get_order(pci_seg->dev_table_size));
1105 	if (pci_seg->old_dev_tbl_cpy == NULL) {
1106 		pr_err("Failed to allocate memory for copying old device table!\n");
1107 		memunmap(old_devtb);
1108 		return false;
1109 	}
1110 
1111 	for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
1112 		pci_seg->old_dev_tbl_cpy[devid] = old_devtb[devid];
1113 		dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK;
1114 		dte_v = old_devtb[devid].data[0] & DTE_FLAG_V;
1115 
1116 		if (dte_v && dom_id) {
1117 			pci_seg->old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0];
1118 			pci_seg->old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1];
1119 			__set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
1120 			/* If gcr3 table existed, mask it out */
1121 			if (old_devtb[devid].data[0] & DTE_FLAG_GV) {
1122 				tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
1123 				tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
1124 				pci_seg->old_dev_tbl_cpy[devid].data[1] &= ~tmp;
1125 				tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A;
1126 				tmp |= DTE_FLAG_GV;
1127 				pci_seg->old_dev_tbl_cpy[devid].data[0] &= ~tmp;
1128 			}
1129 		}
1130 
1131 		irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE;
1132 		int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK;
1133 		int_tab_len = old_devtb[devid].data[2] & DTE_INTTABLEN_MASK;
1134 		if (irq_v && (int_ctl || int_tab_len)) {
1135 			if ((int_ctl != DTE_IRQ_REMAP_INTCTL) ||
1136 			    (int_tab_len != DTE_INTTABLEN)) {
1137 				pr_err("Wrong old irq remapping flag: %#x\n", devid);
1138 				memunmap(old_devtb);
1139 				return false;
1140 			}
1141 
1142 			pci_seg->old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2];
1143 		}
1144 	}
1145 	memunmap(old_devtb);
1146 
1147 	return true;
1148 }
1149 
1150 static bool copy_device_table(void)
1151 {
1152 	struct amd_iommu *iommu;
1153 	struct amd_iommu_pci_seg *pci_seg;
1154 
1155 	if (!amd_iommu_pre_enabled)
1156 		return false;
1157 
1158 	pr_warn("Translation is already enabled - trying to copy translation structures\n");
1159 
1160 	/*
1161 	 * All IOMMUs within PCI segment shares common device table.
1162 	 * Hence copy device table only once per PCI segment.
1163 	 */
1164 	for_each_pci_segment(pci_seg) {
1165 		for_each_iommu(iommu) {
1166 			if (pci_seg->id != iommu->pci_seg->id)
1167 				continue;
1168 			if (!__copy_device_table(iommu))
1169 				return false;
1170 			break;
1171 		}
1172 	}
1173 
1174 	return true;
1175 }
1176 
1177 void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid)
1178 {
1179 	int sysmgt;
1180 
1181 	sysmgt = get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1) |
1182 		 (get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2) << 1);
1183 
1184 	if (sysmgt == 0x01)
1185 		set_dev_entry_bit(iommu, devid, DEV_ENTRY_IW);
1186 }
1187 
1188 /*
1189  * This function takes the device specific flags read from the ACPI
1190  * table and sets up the device table entry with that information
1191  */
1192 static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
1193 					   u16 devid, u32 flags, u32 ext_flags)
1194 {
1195 	if (flags & ACPI_DEVFLAG_INITPASS)
1196 		set_dev_entry_bit(iommu, devid, DEV_ENTRY_INIT_PASS);
1197 	if (flags & ACPI_DEVFLAG_EXTINT)
1198 		set_dev_entry_bit(iommu, devid, DEV_ENTRY_EINT_PASS);
1199 	if (flags & ACPI_DEVFLAG_NMI)
1200 		set_dev_entry_bit(iommu, devid, DEV_ENTRY_NMI_PASS);
1201 	if (flags & ACPI_DEVFLAG_SYSMGT1)
1202 		set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1);
1203 	if (flags & ACPI_DEVFLAG_SYSMGT2)
1204 		set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2);
1205 	if (flags & ACPI_DEVFLAG_LINT0)
1206 		set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT0_PASS);
1207 	if (flags & ACPI_DEVFLAG_LINT1)
1208 		set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT1_PASS);
1209 
1210 	amd_iommu_apply_erratum_63(iommu, devid);
1211 
1212 	amd_iommu_set_rlookup_table(iommu, devid);
1213 }
1214 
1215 int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line)
1216 {
1217 	struct devid_map *entry;
1218 	struct list_head *list;
1219 
1220 	if (type == IVHD_SPECIAL_IOAPIC)
1221 		list = &ioapic_map;
1222 	else if (type == IVHD_SPECIAL_HPET)
1223 		list = &hpet_map;
1224 	else
1225 		return -EINVAL;
1226 
1227 	list_for_each_entry(entry, list, list) {
1228 		if (!(entry->id == id && entry->cmd_line))
1229 			continue;
1230 
1231 		pr_info("Command-line override present for %s id %d - ignoring\n",
1232 			type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id);
1233 
1234 		*devid = entry->devid;
1235 
1236 		return 0;
1237 	}
1238 
1239 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1240 	if (!entry)
1241 		return -ENOMEM;
1242 
1243 	entry->id	= id;
1244 	entry->devid	= *devid;
1245 	entry->cmd_line	= cmd_line;
1246 
1247 	list_add_tail(&entry->list, list);
1248 
1249 	return 0;
1250 }
1251 
1252 static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u32 *devid,
1253 				      bool cmd_line)
1254 {
1255 	struct acpihid_map_entry *entry;
1256 	struct list_head *list = &acpihid_map;
1257 
1258 	list_for_each_entry(entry, list, list) {
1259 		if (strcmp(entry->hid, hid) ||
1260 		    (*uid && *entry->uid && strcmp(entry->uid, uid)) ||
1261 		    !entry->cmd_line)
1262 			continue;
1263 
1264 		pr_info("Command-line override for hid:%s uid:%s\n",
1265 			hid, uid);
1266 		*devid = entry->devid;
1267 		return 0;
1268 	}
1269 
1270 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1271 	if (!entry)
1272 		return -ENOMEM;
1273 
1274 	memcpy(entry->uid, uid, strlen(uid));
1275 	memcpy(entry->hid, hid, strlen(hid));
1276 	entry->devid = *devid;
1277 	entry->cmd_line	= cmd_line;
1278 	entry->root_devid = (entry->devid & (~0x7));
1279 
1280 	pr_info("%s, add hid:%s, uid:%s, rdevid:%d\n",
1281 		entry->cmd_line ? "cmd" : "ivrs",
1282 		entry->hid, entry->uid, entry->root_devid);
1283 
1284 	list_add_tail(&entry->list, list);
1285 	return 0;
1286 }
1287 
1288 static int __init add_early_maps(void)
1289 {
1290 	int i, ret;
1291 
1292 	for (i = 0; i < early_ioapic_map_size; ++i) {
1293 		ret = add_special_device(IVHD_SPECIAL_IOAPIC,
1294 					 early_ioapic_map[i].id,
1295 					 &early_ioapic_map[i].devid,
1296 					 early_ioapic_map[i].cmd_line);
1297 		if (ret)
1298 			return ret;
1299 	}
1300 
1301 	for (i = 0; i < early_hpet_map_size; ++i) {
1302 		ret = add_special_device(IVHD_SPECIAL_HPET,
1303 					 early_hpet_map[i].id,
1304 					 &early_hpet_map[i].devid,
1305 					 early_hpet_map[i].cmd_line);
1306 		if (ret)
1307 			return ret;
1308 	}
1309 
1310 	for (i = 0; i < early_acpihid_map_size; ++i) {
1311 		ret = add_acpi_hid_device(early_acpihid_map[i].hid,
1312 					  early_acpihid_map[i].uid,
1313 					  &early_acpihid_map[i].devid,
1314 					  early_acpihid_map[i].cmd_line);
1315 		if (ret)
1316 			return ret;
1317 	}
1318 
1319 	return 0;
1320 }
1321 
1322 /*
1323  * Takes a pointer to an AMD IOMMU entry in the ACPI table and
1324  * initializes the hardware and our data structures with it.
1325  */
1326 static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
1327 					struct ivhd_header *h)
1328 {
1329 	u8 *p = (u8 *)h;
1330 	u8 *end = p, flags = 0;
1331 	u16 devid = 0, devid_start = 0, devid_to = 0, seg_id;
1332 	u32 dev_i, ext_flags = 0;
1333 	bool alias = false;
1334 	struct ivhd_entry *e;
1335 	struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
1336 	u32 ivhd_size;
1337 	int ret;
1338 
1339 
1340 	ret = add_early_maps();
1341 	if (ret)
1342 		return ret;
1343 
1344 	amd_iommu_apply_ivrs_quirks();
1345 
1346 	/*
1347 	 * First save the recommended feature enable bits from ACPI
1348 	 */
1349 	iommu->acpi_flags = h->flags;
1350 
1351 	/*
1352 	 * Done. Now parse the device entries
1353 	 */
1354 	ivhd_size = get_ivhd_header_size(h);
1355 	if (!ivhd_size) {
1356 		pr_err("Unsupported IVHD type %#x\n", h->type);
1357 		return -EINVAL;
1358 	}
1359 
1360 	p += ivhd_size;
1361 
1362 	end += h->length;
1363 
1364 
1365 	while (p < end) {
1366 		e = (struct ivhd_entry *)p;
1367 		seg_id = pci_seg->id;
1368 
1369 		switch (e->type) {
1370 		case IVHD_DEV_ALL:
1371 
1372 			DUMP_printk("  DEV_ALL\t\t\tflags: %02x\n", e->flags);
1373 
1374 			for (dev_i = 0; dev_i <= pci_seg->last_bdf; ++dev_i)
1375 				set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0);
1376 			break;
1377 		case IVHD_DEV_SELECT:
1378 
1379 			DUMP_printk("  DEV_SELECT\t\t\t devid: %04x:%02x:%02x.%x "
1380 				    "flags: %02x\n",
1381 				    seg_id, PCI_BUS_NUM(e->devid),
1382 				    PCI_SLOT(e->devid),
1383 				    PCI_FUNC(e->devid),
1384 				    e->flags);
1385 
1386 			devid = e->devid;
1387 			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1388 			break;
1389 		case IVHD_DEV_SELECT_RANGE_START:
1390 
1391 			DUMP_printk("  DEV_SELECT_RANGE_START\t "
1392 				    "devid: %04x:%02x:%02x.%x flags: %02x\n",
1393 				    seg_id, PCI_BUS_NUM(e->devid),
1394 				    PCI_SLOT(e->devid),
1395 				    PCI_FUNC(e->devid),
1396 				    e->flags);
1397 
1398 			devid_start = e->devid;
1399 			flags = e->flags;
1400 			ext_flags = 0;
1401 			alias = false;
1402 			break;
1403 		case IVHD_DEV_ALIAS:
1404 
1405 			DUMP_printk("  DEV_ALIAS\t\t\t devid: %04x:%02x:%02x.%x "
1406 				    "flags: %02x devid_to: %02x:%02x.%x\n",
1407 				    seg_id, PCI_BUS_NUM(e->devid),
1408 				    PCI_SLOT(e->devid),
1409 				    PCI_FUNC(e->devid),
1410 				    e->flags,
1411 				    PCI_BUS_NUM(e->ext >> 8),
1412 				    PCI_SLOT(e->ext >> 8),
1413 				    PCI_FUNC(e->ext >> 8));
1414 
1415 			devid = e->devid;
1416 			devid_to = e->ext >> 8;
1417 			set_dev_entry_from_acpi(iommu, devid   , e->flags, 0);
1418 			set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
1419 			pci_seg->alias_table[devid] = devid_to;
1420 			break;
1421 		case IVHD_DEV_ALIAS_RANGE:
1422 
1423 			DUMP_printk("  DEV_ALIAS_RANGE\t\t "
1424 				    "devid: %04x:%02x:%02x.%x flags: %02x "
1425 				    "devid_to: %04x:%02x:%02x.%x\n",
1426 				    seg_id, PCI_BUS_NUM(e->devid),
1427 				    PCI_SLOT(e->devid),
1428 				    PCI_FUNC(e->devid),
1429 				    e->flags,
1430 				    seg_id, PCI_BUS_NUM(e->ext >> 8),
1431 				    PCI_SLOT(e->ext >> 8),
1432 				    PCI_FUNC(e->ext >> 8));
1433 
1434 			devid_start = e->devid;
1435 			flags = e->flags;
1436 			devid_to = e->ext >> 8;
1437 			ext_flags = 0;
1438 			alias = true;
1439 			break;
1440 		case IVHD_DEV_EXT_SELECT:
1441 
1442 			DUMP_printk("  DEV_EXT_SELECT\t\t devid: %04x:%02x:%02x.%x "
1443 				    "flags: %02x ext: %08x\n",
1444 				    seg_id, PCI_BUS_NUM(e->devid),
1445 				    PCI_SLOT(e->devid),
1446 				    PCI_FUNC(e->devid),
1447 				    e->flags, e->ext);
1448 
1449 			devid = e->devid;
1450 			set_dev_entry_from_acpi(iommu, devid, e->flags,
1451 						e->ext);
1452 			break;
1453 		case IVHD_DEV_EXT_SELECT_RANGE:
1454 
1455 			DUMP_printk("  DEV_EXT_SELECT_RANGE\t devid: "
1456 				    "%04x:%02x:%02x.%x flags: %02x ext: %08x\n",
1457 				    seg_id, PCI_BUS_NUM(e->devid),
1458 				    PCI_SLOT(e->devid),
1459 				    PCI_FUNC(e->devid),
1460 				    e->flags, e->ext);
1461 
1462 			devid_start = e->devid;
1463 			flags = e->flags;
1464 			ext_flags = e->ext;
1465 			alias = false;
1466 			break;
1467 		case IVHD_DEV_RANGE_END:
1468 
1469 			DUMP_printk("  DEV_RANGE_END\t\t devid: %04x:%02x:%02x.%x\n",
1470 				    seg_id, PCI_BUS_NUM(e->devid),
1471 				    PCI_SLOT(e->devid),
1472 				    PCI_FUNC(e->devid));
1473 
1474 			devid = e->devid;
1475 			for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
1476 				if (alias) {
1477 					pci_seg->alias_table[dev_i] = devid_to;
1478 					set_dev_entry_from_acpi(iommu,
1479 						devid_to, flags, ext_flags);
1480 				}
1481 				set_dev_entry_from_acpi(iommu, dev_i,
1482 							flags, ext_flags);
1483 			}
1484 			break;
1485 		case IVHD_DEV_SPECIAL: {
1486 			u8 handle, type;
1487 			const char *var;
1488 			u32 devid;
1489 			int ret;
1490 
1491 			handle = e->ext & 0xff;
1492 			devid = PCI_SEG_DEVID_TO_SBDF(seg_id, (e->ext >> 8));
1493 			type   = (e->ext >> 24) & 0xff;
1494 
1495 			if (type == IVHD_SPECIAL_IOAPIC)
1496 				var = "IOAPIC";
1497 			else if (type == IVHD_SPECIAL_HPET)
1498 				var = "HPET";
1499 			else
1500 				var = "UNKNOWN";
1501 
1502 			DUMP_printk("  DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x\n",
1503 				    var, (int)handle,
1504 				    seg_id, PCI_BUS_NUM(devid),
1505 				    PCI_SLOT(devid),
1506 				    PCI_FUNC(devid));
1507 
1508 			ret = add_special_device(type, handle, &devid, false);
1509 			if (ret)
1510 				return ret;
1511 
1512 			/*
1513 			 * add_special_device might update the devid in case a
1514 			 * command-line override is present. So call
1515 			 * set_dev_entry_from_acpi after add_special_device.
1516 			 */
1517 			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1518 
1519 			break;
1520 		}
1521 		case IVHD_DEV_ACPI_HID: {
1522 			u32 devid;
1523 			u8 hid[ACPIHID_HID_LEN];
1524 			u8 uid[ACPIHID_UID_LEN];
1525 			int ret;
1526 
1527 			if (h->type != 0x40) {
1528 				pr_err(FW_BUG "Invalid IVHD device type %#x\n",
1529 				       e->type);
1530 				break;
1531 			}
1532 
1533 			BUILD_BUG_ON(sizeof(e->ext_hid) != ACPIHID_HID_LEN - 1);
1534 			memcpy(hid, &e->ext_hid, ACPIHID_HID_LEN - 1);
1535 			hid[ACPIHID_HID_LEN - 1] = '\0';
1536 
1537 			if (!(*hid)) {
1538 				pr_err(FW_BUG "Invalid HID.\n");
1539 				break;
1540 			}
1541 
1542 			uid[0] = '\0';
1543 			switch (e->uidf) {
1544 			case UID_NOT_PRESENT:
1545 
1546 				if (e->uidl != 0)
1547 					pr_warn(FW_BUG "Invalid UID length.\n");
1548 
1549 				break;
1550 			case UID_IS_INTEGER:
1551 
1552 				sprintf(uid, "%d", e->uid);
1553 
1554 				break;
1555 			case UID_IS_CHARACTER:
1556 
1557 				memcpy(uid, &e->uid, e->uidl);
1558 				uid[e->uidl] = '\0';
1559 
1560 				break;
1561 			default:
1562 				break;
1563 			}
1564 
1565 			devid = PCI_SEG_DEVID_TO_SBDF(seg_id, e->devid);
1566 			DUMP_printk("  DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x\n",
1567 				    hid, uid, seg_id,
1568 				    PCI_BUS_NUM(devid),
1569 				    PCI_SLOT(devid),
1570 				    PCI_FUNC(devid));
1571 
1572 			flags = e->flags;
1573 
1574 			ret = add_acpi_hid_device(hid, uid, &devid, false);
1575 			if (ret)
1576 				return ret;
1577 
1578 			/*
1579 			 * add_special_device might update the devid in case a
1580 			 * command-line override is present. So call
1581 			 * set_dev_entry_from_acpi after add_special_device.
1582 			 */
1583 			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1584 
1585 			break;
1586 		}
1587 		default:
1588 			break;
1589 		}
1590 
1591 		p += ivhd_entry_length(p);
1592 	}
1593 
1594 	return 0;
1595 }
1596 
1597 /* Allocate PCI segment data structure */
1598 static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id,
1599 					  struct acpi_table_header *ivrs_base)
1600 {
1601 	struct amd_iommu_pci_seg *pci_seg;
1602 	int last_bdf;
1603 
1604 	/*
1605 	 * First parse ACPI tables to find the largest Bus/Dev/Func we need to
1606 	 * handle in this PCI segment. Upon this information the shared data
1607 	 * structures for the PCI segments in the system will be allocated.
1608 	 */
1609 	last_bdf = find_last_devid_acpi(ivrs_base, id);
1610 	if (last_bdf < 0)
1611 		return NULL;
1612 
1613 	pci_seg = kzalloc(sizeof(struct amd_iommu_pci_seg), GFP_KERNEL);
1614 	if (pci_seg == NULL)
1615 		return NULL;
1616 
1617 	pci_seg->last_bdf = last_bdf;
1618 	DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf);
1619 	pci_seg->dev_table_size     = tbl_size(DEV_TABLE_ENTRY_SIZE, last_bdf);
1620 	pci_seg->alias_table_size   = tbl_size(ALIAS_TABLE_ENTRY_SIZE, last_bdf);
1621 	pci_seg->rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE, last_bdf);
1622 
1623 	pci_seg->id = id;
1624 	init_llist_head(&pci_seg->dev_data_list);
1625 	INIT_LIST_HEAD(&pci_seg->unity_map);
1626 	list_add_tail(&pci_seg->list, &amd_iommu_pci_seg_list);
1627 
1628 	if (alloc_dev_table(pci_seg))
1629 		return NULL;
1630 	if (alloc_alias_table(pci_seg))
1631 		return NULL;
1632 	if (alloc_rlookup_table(pci_seg))
1633 		return NULL;
1634 
1635 	return pci_seg;
1636 }
1637 
1638 static struct amd_iommu_pci_seg *__init get_pci_segment(u16 id,
1639 					struct acpi_table_header *ivrs_base)
1640 {
1641 	struct amd_iommu_pci_seg *pci_seg;
1642 
1643 	for_each_pci_segment(pci_seg) {
1644 		if (pci_seg->id == id)
1645 			return pci_seg;
1646 	}
1647 
1648 	return alloc_pci_segment(id, ivrs_base);
1649 }
1650 
1651 static void __init free_pci_segments(void)
1652 {
1653 	struct amd_iommu_pci_seg *pci_seg, *next;
1654 
1655 	for_each_pci_segment_safe(pci_seg, next) {
1656 		list_del(&pci_seg->list);
1657 		free_irq_lookup_table(pci_seg);
1658 		free_rlookup_table(pci_seg);
1659 		free_alias_table(pci_seg);
1660 		free_dev_table(pci_seg);
1661 		kfree(pci_seg);
1662 	}
1663 }
1664 
1665 static void __init free_iommu_one(struct amd_iommu *iommu)
1666 {
1667 	free_cwwb_sem(iommu);
1668 	free_command_buffer(iommu);
1669 	free_event_buffer(iommu);
1670 	free_ppr_log(iommu);
1671 	free_ga_log(iommu);
1672 	iommu_unmap_mmio_space(iommu);
1673 }
1674 
1675 static void __init free_iommu_all(void)
1676 {
1677 	struct amd_iommu *iommu, *next;
1678 
1679 	for_each_iommu_safe(iommu, next) {
1680 		list_del(&iommu->list);
1681 		free_iommu_one(iommu);
1682 		kfree(iommu);
1683 	}
1684 }
1685 
1686 /*
1687  * Family15h Model 10h-1fh erratum 746 (IOMMU Logging May Stall Translations)
1688  * Workaround:
1689  *     BIOS should disable L2B micellaneous clock gating by setting
1690  *     L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b
1691  */
1692 static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu)
1693 {
1694 	u32 value;
1695 
1696 	if ((boot_cpu_data.x86 != 0x15) ||
1697 	    (boot_cpu_data.x86_model < 0x10) ||
1698 	    (boot_cpu_data.x86_model > 0x1f))
1699 		return;
1700 
1701 	pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1702 	pci_read_config_dword(iommu->dev, 0xf4, &value);
1703 
1704 	if (value & BIT(2))
1705 		return;
1706 
1707 	/* Select NB indirect register 0x90 and enable writing */
1708 	pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8));
1709 
1710 	pci_write_config_dword(iommu->dev, 0xf4, value | 0x4);
1711 	pci_info(iommu->dev, "Applying erratum 746 workaround\n");
1712 
1713 	/* Clear the enable writing bit */
1714 	pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1715 }
1716 
1717 /*
1718  * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission)
1719  * Workaround:
1720  *     BIOS should enable ATS write permission check by setting
1721  *     L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b
1722  */
1723 static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu)
1724 {
1725 	u32 value;
1726 
1727 	if ((boot_cpu_data.x86 != 0x15) ||
1728 	    (boot_cpu_data.x86_model < 0x30) ||
1729 	    (boot_cpu_data.x86_model > 0x3f))
1730 		return;
1731 
1732 	/* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */
1733 	value = iommu_read_l2(iommu, 0x47);
1734 
1735 	if (value & BIT(0))
1736 		return;
1737 
1738 	/* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */
1739 	iommu_write_l2(iommu, 0x47, value | BIT(0));
1740 
1741 	pci_info(iommu->dev, "Applying ATS write check workaround\n");
1742 }
1743 
1744 /*
1745  * This function glues the initialization function for one IOMMU
1746  * together and also allocates the command buffer and programs the
1747  * hardware. It does NOT enable the IOMMU. This is done afterwards.
1748  */
1749 static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
1750 				 struct acpi_table_header *ivrs_base)
1751 {
1752 	struct amd_iommu_pci_seg *pci_seg;
1753 
1754 	pci_seg = get_pci_segment(h->pci_seg, ivrs_base);
1755 	if (pci_seg == NULL)
1756 		return -ENOMEM;
1757 	iommu->pci_seg = pci_seg;
1758 
1759 	raw_spin_lock_init(&iommu->lock);
1760 	atomic64_set(&iommu->cmd_sem_val, 0);
1761 
1762 	/* Add IOMMU to internal data structures */
1763 	list_add_tail(&iommu->list, &amd_iommu_list);
1764 	iommu->index = amd_iommus_present++;
1765 
1766 	if (unlikely(iommu->index >= MAX_IOMMUS)) {
1767 		WARN(1, "System has more IOMMUs than supported by this driver\n");
1768 		return -ENOSYS;
1769 	}
1770 
1771 	/* Index is fine - add IOMMU to the array */
1772 	amd_iommus[iommu->index] = iommu;
1773 
1774 	/*
1775 	 * Copy data from ACPI table entry to the iommu struct
1776 	 */
1777 	iommu->devid   = h->devid;
1778 	iommu->cap_ptr = h->cap_ptr;
1779 	iommu->mmio_phys = h->mmio_phys;
1780 
1781 	switch (h->type) {
1782 	case 0x10:
1783 		/* Check if IVHD EFR contains proper max banks/counters */
1784 		if ((h->efr_attr != 0) &&
1785 		    ((h->efr_attr & (0xF << 13)) != 0) &&
1786 		    ((h->efr_attr & (0x3F << 17)) != 0))
1787 			iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1788 		else
1789 			iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1790 
1791 		/*
1792 		 * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports.
1793 		 * GAM also requires GA mode. Therefore, we need to
1794 		 * check cmpxchg16b support before enabling it.
1795 		 */
1796 		if (!boot_cpu_has(X86_FEATURE_CX16) ||
1797 		    ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0))
1798 			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
1799 		break;
1800 	case 0x11:
1801 	case 0x40:
1802 		if (h->efr_reg & (1 << 9))
1803 			iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1804 		else
1805 			iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1806 
1807 		/*
1808 		 * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports.
1809 		 * XT, GAM also requires GA mode. Therefore, we need to
1810 		 * check cmpxchg16b support before enabling them.
1811 		 */
1812 		if (!boot_cpu_has(X86_FEATURE_CX16) ||
1813 		    ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0)) {
1814 			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
1815 			break;
1816 		}
1817 
1818 		if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT))
1819 			amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE;
1820 
1821 		early_iommu_features_init(iommu, h);
1822 
1823 		break;
1824 	default:
1825 		return -EINVAL;
1826 	}
1827 
1828 	iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys,
1829 						iommu->mmio_phys_end);
1830 	if (!iommu->mmio_base)
1831 		return -ENOMEM;
1832 
1833 	return init_iommu_from_acpi(iommu, h);
1834 }
1835 
1836 static int __init init_iommu_one_late(struct amd_iommu *iommu)
1837 {
1838 	int ret;
1839 
1840 	if (alloc_cwwb_sem(iommu))
1841 		return -ENOMEM;
1842 
1843 	if (alloc_command_buffer(iommu))
1844 		return -ENOMEM;
1845 
1846 	if (alloc_event_buffer(iommu))
1847 		return -ENOMEM;
1848 
1849 	iommu->int_enabled = false;
1850 
1851 	init_translation_status(iommu);
1852 	if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
1853 		iommu_disable(iommu);
1854 		clear_translation_pre_enabled(iommu);
1855 		pr_warn("Translation was enabled for IOMMU:%d but we are not in kdump mode\n",
1856 			iommu->index);
1857 	}
1858 	if (amd_iommu_pre_enabled)
1859 		amd_iommu_pre_enabled = translation_pre_enabled(iommu);
1860 
1861 	if (amd_iommu_irq_remap) {
1862 		ret = amd_iommu_create_irq_domain(iommu);
1863 		if (ret)
1864 			return ret;
1865 	}
1866 
1867 	/*
1868 	 * Make sure IOMMU is not considered to translate itself. The IVRS
1869 	 * table tells us so, but this is a lie!
1870 	 */
1871 	iommu->pci_seg->rlookup_table[iommu->devid] = NULL;
1872 
1873 	return 0;
1874 }
1875 
1876 /**
1877  * get_highest_supported_ivhd_type - Look up the appropriate IVHD type
1878  * @ivrs: Pointer to the IVRS header
1879  *
1880  * This function search through all IVDB of the maximum supported IVHD
1881  */
1882 static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs)
1883 {
1884 	u8 *base = (u8 *)ivrs;
1885 	struct ivhd_header *ivhd = (struct ivhd_header *)
1886 					(base + IVRS_HEADER_LENGTH);
1887 	u8 last_type = ivhd->type;
1888 	u16 devid = ivhd->devid;
1889 
1890 	while (((u8 *)ivhd - base < ivrs->length) &&
1891 	       (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) {
1892 		u8 *p = (u8 *) ivhd;
1893 
1894 		if (ivhd->devid == devid)
1895 			last_type = ivhd->type;
1896 		ivhd = (struct ivhd_header *)(p + ivhd->length);
1897 	}
1898 
1899 	return last_type;
1900 }
1901 
1902 /*
1903  * Iterates over all IOMMU entries in the ACPI table, allocates the
1904  * IOMMU structure and initializes it with init_iommu_one()
1905  */
1906 static int __init init_iommu_all(struct acpi_table_header *table)
1907 {
1908 	u8 *p = (u8 *)table, *end = (u8 *)table;
1909 	struct ivhd_header *h;
1910 	struct amd_iommu *iommu;
1911 	int ret;
1912 
1913 	end += table->length;
1914 	p += IVRS_HEADER_LENGTH;
1915 
1916 	/* Phase 1: Process all IVHD blocks */
1917 	while (p < end) {
1918 		h = (struct ivhd_header *)p;
1919 		if (*p == amd_iommu_target_ivhd_type) {
1920 
1921 			DUMP_printk("device: %04x:%02x:%02x.%01x cap: %04x "
1922 				    "flags: %01x info %04x\n",
1923 				    h->pci_seg, PCI_BUS_NUM(h->devid),
1924 				    PCI_SLOT(h->devid), PCI_FUNC(h->devid),
1925 				    h->cap_ptr, h->flags, h->info);
1926 			DUMP_printk("       mmio-addr: %016llx\n",
1927 				    h->mmio_phys);
1928 
1929 			iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
1930 			if (iommu == NULL)
1931 				return -ENOMEM;
1932 
1933 			ret = init_iommu_one(iommu, h, table);
1934 			if (ret)
1935 				return ret;
1936 		}
1937 		p += h->length;
1938 
1939 	}
1940 	WARN_ON(p != end);
1941 
1942 	/* Phase 2 : Early feature support check */
1943 	get_global_efr();
1944 
1945 	/* Phase 3 : Enabling IOMMU features */
1946 	for_each_iommu(iommu) {
1947 		ret = init_iommu_one_late(iommu);
1948 		if (ret)
1949 			return ret;
1950 	}
1951 
1952 	return 0;
1953 }
1954 
1955 static void init_iommu_perf_ctr(struct amd_iommu *iommu)
1956 {
1957 	u64 val;
1958 	struct pci_dev *pdev = iommu->dev;
1959 
1960 	if (!iommu_feature(iommu, FEATURE_PC))
1961 		return;
1962 
1963 	amd_iommu_pc_present = true;
1964 
1965 	pci_info(pdev, "IOMMU performance counters supported\n");
1966 
1967 	val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET);
1968 	iommu->max_banks = (u8) ((val >> 12) & 0x3f);
1969 	iommu->max_counters = (u8) ((val >> 7) & 0xf);
1970 
1971 	return;
1972 }
1973 
1974 static ssize_t amd_iommu_show_cap(struct device *dev,
1975 				  struct device_attribute *attr,
1976 				  char *buf)
1977 {
1978 	struct amd_iommu *iommu = dev_to_amd_iommu(dev);
1979 	return sysfs_emit(buf, "%x\n", iommu->cap);
1980 }
1981 static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL);
1982 
1983 static ssize_t amd_iommu_show_features(struct device *dev,
1984 				       struct device_attribute *attr,
1985 				       char *buf)
1986 {
1987 	struct amd_iommu *iommu = dev_to_amd_iommu(dev);
1988 	return sysfs_emit(buf, "%llx:%llx\n", iommu->features2, iommu->features);
1989 }
1990 static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL);
1991 
1992 static struct attribute *amd_iommu_attrs[] = {
1993 	&dev_attr_cap.attr,
1994 	&dev_attr_features.attr,
1995 	NULL,
1996 };
1997 
1998 static struct attribute_group amd_iommu_group = {
1999 	.name = "amd-iommu",
2000 	.attrs = amd_iommu_attrs,
2001 };
2002 
2003 static const struct attribute_group *amd_iommu_groups[] = {
2004 	&amd_iommu_group,
2005 	NULL,
2006 };
2007 
2008 /*
2009  * Note: IVHD 0x11 and 0x40 also contains exact copy
2010  * of the IOMMU Extended Feature Register [MMIO Offset 0030h].
2011  * Default to EFR in IVHD since it is available sooner (i.e. before PCI init).
2012  */
2013 static void __init late_iommu_features_init(struct amd_iommu *iommu)
2014 {
2015 	u64 features, features2;
2016 
2017 	if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
2018 		return;
2019 
2020 	/* read extended feature bits */
2021 	features = readq(iommu->mmio_base + MMIO_EXT_FEATURES);
2022 	features2 = readq(iommu->mmio_base + MMIO_EXT_FEATURES2);
2023 
2024 	if (!iommu->features) {
2025 		iommu->features = features;
2026 		iommu->features2 = features2;
2027 		return;
2028 	}
2029 
2030 	/*
2031 	 * Sanity check and warn if EFR values from
2032 	 * IVHD and MMIO conflict.
2033 	 */
2034 	if (features != iommu->features ||
2035 	    features2 != iommu->features2) {
2036 		pr_warn(FW_WARN
2037 			"EFR mismatch. Use IVHD EFR (%#llx : %#llx), EFR2 (%#llx : %#llx).\n",
2038 			features, iommu->features,
2039 			features2, iommu->features2);
2040 	}
2041 }
2042 
2043 static int __init iommu_init_pci(struct amd_iommu *iommu)
2044 {
2045 	int cap_ptr = iommu->cap_ptr;
2046 	int ret;
2047 
2048 	iommu->dev = pci_get_domain_bus_and_slot(iommu->pci_seg->id,
2049 						 PCI_BUS_NUM(iommu->devid),
2050 						 iommu->devid & 0xff);
2051 	if (!iommu->dev)
2052 		return -ENODEV;
2053 
2054 	/* Prevent binding other PCI device drivers to IOMMU devices */
2055 	iommu->dev->match_driver = false;
2056 
2057 	pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
2058 			      &iommu->cap);
2059 
2060 	if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
2061 		amd_iommu_iotlb_sup = false;
2062 
2063 	late_iommu_features_init(iommu);
2064 
2065 	if (iommu_feature(iommu, FEATURE_GT)) {
2066 		int glxval;
2067 		u32 max_pasid;
2068 		u64 pasmax;
2069 
2070 		pasmax = iommu->features & FEATURE_PASID_MASK;
2071 		pasmax >>= FEATURE_PASID_SHIFT;
2072 		max_pasid  = (1 << (pasmax + 1)) - 1;
2073 
2074 		amd_iommu_max_pasid = min(amd_iommu_max_pasid, max_pasid);
2075 
2076 		BUG_ON(amd_iommu_max_pasid & ~PASID_MASK);
2077 
2078 		glxval   = iommu->features & FEATURE_GLXVAL_MASK;
2079 		glxval >>= FEATURE_GLXVAL_SHIFT;
2080 
2081 		if (amd_iommu_max_glx_val == -1)
2082 			amd_iommu_max_glx_val = glxval;
2083 		else
2084 			amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval);
2085 	}
2086 
2087 	if (iommu_feature(iommu, FEATURE_GT) &&
2088 	    iommu_feature(iommu, FEATURE_PPR)) {
2089 		iommu->is_iommu_v2   = true;
2090 		amd_iommu_v2_present = true;
2091 	}
2092 
2093 	if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu))
2094 		return -ENOMEM;
2095 
2096 	if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) {
2097 		pr_info("Using strict mode due to virtualization\n");
2098 		iommu_set_dma_strict();
2099 		amd_iommu_np_cache = true;
2100 	}
2101 
2102 	init_iommu_perf_ctr(iommu);
2103 
2104 	if (amd_iommu_pgtable == AMD_IOMMU_V2) {
2105 		if (!iommu_feature(iommu, FEATURE_GIOSUP) ||
2106 		    !iommu_feature(iommu, FEATURE_GT)) {
2107 			pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n");
2108 			amd_iommu_pgtable = AMD_IOMMU_V1;
2109 		} else if (iommu_default_passthrough()) {
2110 			pr_warn("V2 page table doesn't support passthrough mode. Fallback to v1.\n");
2111 			amd_iommu_pgtable = AMD_IOMMU_V1;
2112 		}
2113 	}
2114 
2115 	if (is_rd890_iommu(iommu->dev)) {
2116 		int i, j;
2117 
2118 		iommu->root_pdev =
2119 			pci_get_domain_bus_and_slot(iommu->pci_seg->id,
2120 						    iommu->dev->bus->number,
2121 						    PCI_DEVFN(0, 0));
2122 
2123 		/*
2124 		 * Some rd890 systems may not be fully reconfigured by the
2125 		 * BIOS, so it's necessary for us to store this information so
2126 		 * it can be reprogrammed on resume
2127 		 */
2128 		pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4,
2129 				&iommu->stored_addr_lo);
2130 		pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8,
2131 				&iommu->stored_addr_hi);
2132 
2133 		/* Low bit locks writes to configuration space */
2134 		iommu->stored_addr_lo &= ~1;
2135 
2136 		for (i = 0; i < 6; i++)
2137 			for (j = 0; j < 0x12; j++)
2138 				iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j);
2139 
2140 		for (i = 0; i < 0x83; i++)
2141 			iommu->stored_l2[i] = iommu_read_l2(iommu, i);
2142 	}
2143 
2144 	amd_iommu_erratum_746_workaround(iommu);
2145 	amd_iommu_ats_write_check_workaround(iommu);
2146 
2147 	ret = iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev,
2148 			       amd_iommu_groups, "ivhd%d", iommu->index);
2149 	if (ret)
2150 		return ret;
2151 
2152 	iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL);
2153 
2154 	return pci_enable_device(iommu->dev);
2155 }
2156 
2157 static void print_iommu_info(void)
2158 {
2159 	static const char * const feat_str[] = {
2160 		"PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
2161 		"IA", "GA", "HE", "PC"
2162 	};
2163 	struct amd_iommu *iommu;
2164 
2165 	for_each_iommu(iommu) {
2166 		struct pci_dev *pdev = iommu->dev;
2167 		int i;
2168 
2169 		pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr);
2170 
2171 		if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
2172 			pr_info("Extended features (%#llx, %#llx):", iommu->features, iommu->features2);
2173 
2174 			for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
2175 				if (iommu_feature(iommu, (1ULL << i)))
2176 					pr_cont(" %s", feat_str[i]);
2177 			}
2178 
2179 			if (iommu->features & FEATURE_GAM_VAPIC)
2180 				pr_cont(" GA_vAPIC");
2181 
2182 			if (iommu->features & FEATURE_SNP)
2183 				pr_cont(" SNP");
2184 
2185 			pr_cont("\n");
2186 		}
2187 	}
2188 	if (irq_remapping_enabled) {
2189 		pr_info("Interrupt remapping enabled\n");
2190 		if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2191 			pr_info("X2APIC enabled\n");
2192 	}
2193 	if (amd_iommu_pgtable == AMD_IOMMU_V2) {
2194 		pr_info("V2 page table enabled (Paging mode : %d level)\n",
2195 			amd_iommu_gpt_level);
2196 	}
2197 }
2198 
2199 static int __init amd_iommu_init_pci(void)
2200 {
2201 	struct amd_iommu *iommu;
2202 	struct amd_iommu_pci_seg *pci_seg;
2203 	int ret;
2204 
2205 	for_each_iommu(iommu) {
2206 		ret = iommu_init_pci(iommu);
2207 		if (ret) {
2208 			pr_err("IOMMU%d: Failed to initialize IOMMU Hardware (error=%d)!\n",
2209 			       iommu->index, ret);
2210 			goto out;
2211 		}
2212 		/* Need to setup range after PCI init */
2213 		iommu_set_cwwb_range(iommu);
2214 	}
2215 
2216 	/*
2217 	 * Order is important here to make sure any unity map requirements are
2218 	 * fulfilled. The unity mappings are created and written to the device
2219 	 * table during the iommu_init_pci() call.
2220 	 *
2221 	 * After that we call init_device_table_dma() to make sure any
2222 	 * uninitialized DTE will block DMA, and in the end we flush the caches
2223 	 * of all IOMMUs to make sure the changes to the device table are
2224 	 * active.
2225 	 */
2226 	for_each_pci_segment(pci_seg)
2227 		init_device_table_dma(pci_seg);
2228 
2229 	for_each_iommu(iommu)
2230 		iommu_flush_all_caches(iommu);
2231 
2232 	print_iommu_info();
2233 
2234 out:
2235 	return ret;
2236 }
2237 
2238 /****************************************************************************
2239  *
2240  * The following functions initialize the MSI interrupts for all IOMMUs
2241  * in the system. It's a bit challenging because there could be multiple
2242  * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per
2243  * pci_dev.
2244  *
2245  ****************************************************************************/
2246 
2247 static int iommu_setup_msi(struct amd_iommu *iommu)
2248 {
2249 	int r;
2250 
2251 	r = pci_enable_msi(iommu->dev);
2252 	if (r)
2253 		return r;
2254 
2255 	r = request_threaded_irq(iommu->dev->irq,
2256 				 amd_iommu_int_handler,
2257 				 amd_iommu_int_thread,
2258 				 0, "AMD-Vi",
2259 				 iommu);
2260 
2261 	if (r) {
2262 		pci_disable_msi(iommu->dev);
2263 		return r;
2264 	}
2265 
2266 	return 0;
2267 }
2268 
2269 union intcapxt {
2270 	u64	capxt;
2271 	struct {
2272 		u64	reserved_0		:  2,
2273 			dest_mode_logical	:  1,
2274 			reserved_1		:  5,
2275 			destid_0_23		: 24,
2276 			vector			:  8,
2277 			reserved_2		: 16,
2278 			destid_24_31		:  8;
2279 	};
2280 } __attribute__ ((packed));
2281 
2282 
2283 static struct irq_chip intcapxt_controller;
2284 
2285 static int intcapxt_irqdomain_activate(struct irq_domain *domain,
2286 				       struct irq_data *irqd, bool reserve)
2287 {
2288 	return 0;
2289 }
2290 
2291 static void intcapxt_irqdomain_deactivate(struct irq_domain *domain,
2292 					  struct irq_data *irqd)
2293 {
2294 }
2295 
2296 
2297 static int intcapxt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
2298 				    unsigned int nr_irqs, void *arg)
2299 {
2300 	struct irq_alloc_info *info = arg;
2301 	int i, ret;
2302 
2303 	if (!info || info->type != X86_IRQ_ALLOC_TYPE_AMDVI)
2304 		return -EINVAL;
2305 
2306 	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
2307 	if (ret < 0)
2308 		return ret;
2309 
2310 	for (i = virq; i < virq + nr_irqs; i++) {
2311 		struct irq_data *irqd = irq_domain_get_irq_data(domain, i);
2312 
2313 		irqd->chip = &intcapxt_controller;
2314 		irqd->chip_data = info->data;
2315 		__irq_set_handler(i, handle_edge_irq, 0, "edge");
2316 	}
2317 
2318 	return ret;
2319 }
2320 
2321 static void intcapxt_irqdomain_free(struct irq_domain *domain, unsigned int virq,
2322 				    unsigned int nr_irqs)
2323 {
2324 	irq_domain_free_irqs_top(domain, virq, nr_irqs);
2325 }
2326 
2327 
2328 static void intcapxt_unmask_irq(struct irq_data *irqd)
2329 {
2330 	struct amd_iommu *iommu = irqd->chip_data;
2331 	struct irq_cfg *cfg = irqd_cfg(irqd);
2332 	union intcapxt xt;
2333 
2334 	xt.capxt = 0ULL;
2335 	xt.dest_mode_logical = apic->dest_mode_logical;
2336 	xt.vector = cfg->vector;
2337 	xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0);
2338 	xt.destid_24_31 = cfg->dest_apicid >> 24;
2339 
2340 	/**
2341 	 * Current IOMMU implementation uses the same IRQ for all
2342 	 * 3 IOMMU interrupts.
2343 	 */
2344 	writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
2345 	writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
2346 	writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
2347 }
2348 
2349 static void intcapxt_mask_irq(struct irq_data *irqd)
2350 {
2351 	struct amd_iommu *iommu = irqd->chip_data;
2352 
2353 	writeq(0, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
2354 	writeq(0, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
2355 	writeq(0, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
2356 }
2357 
2358 
2359 static int intcapxt_set_affinity(struct irq_data *irqd,
2360 				 const struct cpumask *mask, bool force)
2361 {
2362 	struct irq_data *parent = irqd->parent_data;
2363 	int ret;
2364 
2365 	ret = parent->chip->irq_set_affinity(parent, mask, force);
2366 	if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
2367 		return ret;
2368 	return 0;
2369 }
2370 
2371 static int intcapxt_set_wake(struct irq_data *irqd, unsigned int on)
2372 {
2373 	return on ? -EOPNOTSUPP : 0;
2374 }
2375 
2376 static struct irq_chip intcapxt_controller = {
2377 	.name			= "IOMMU-MSI",
2378 	.irq_unmask		= intcapxt_unmask_irq,
2379 	.irq_mask		= intcapxt_mask_irq,
2380 	.irq_ack		= irq_chip_ack_parent,
2381 	.irq_retrigger		= irq_chip_retrigger_hierarchy,
2382 	.irq_set_affinity       = intcapxt_set_affinity,
2383 	.irq_set_wake		= intcapxt_set_wake,
2384 	.flags			= IRQCHIP_MASK_ON_SUSPEND,
2385 };
2386 
2387 static const struct irq_domain_ops intcapxt_domain_ops = {
2388 	.alloc			= intcapxt_irqdomain_alloc,
2389 	.free			= intcapxt_irqdomain_free,
2390 	.activate		= intcapxt_irqdomain_activate,
2391 	.deactivate		= intcapxt_irqdomain_deactivate,
2392 };
2393 
2394 
2395 static struct irq_domain *iommu_irqdomain;
2396 
2397 static struct irq_domain *iommu_get_irqdomain(void)
2398 {
2399 	struct fwnode_handle *fn;
2400 
2401 	/* No need for locking here (yet) as the init is single-threaded */
2402 	if (iommu_irqdomain)
2403 		return iommu_irqdomain;
2404 
2405 	fn = irq_domain_alloc_named_fwnode("AMD-Vi-MSI");
2406 	if (!fn)
2407 		return NULL;
2408 
2409 	iommu_irqdomain = irq_domain_create_hierarchy(x86_vector_domain, 0, 0,
2410 						      fn, &intcapxt_domain_ops,
2411 						      NULL);
2412 	if (!iommu_irqdomain)
2413 		irq_domain_free_fwnode(fn);
2414 
2415 	return iommu_irqdomain;
2416 }
2417 
2418 static int iommu_setup_intcapxt(struct amd_iommu *iommu)
2419 {
2420 	struct irq_domain *domain;
2421 	struct irq_alloc_info info;
2422 	int irq, ret;
2423 	int node = dev_to_node(&iommu->dev->dev);
2424 
2425 	domain = iommu_get_irqdomain();
2426 	if (!domain)
2427 		return -ENXIO;
2428 
2429 	init_irq_alloc_info(&info, NULL);
2430 	info.type = X86_IRQ_ALLOC_TYPE_AMDVI;
2431 	info.data = iommu;
2432 
2433 	irq = irq_domain_alloc_irqs(domain, 1, node, &info);
2434 	if (irq < 0) {
2435 		irq_domain_remove(domain);
2436 		return irq;
2437 	}
2438 
2439 	ret = request_threaded_irq(irq, amd_iommu_int_handler,
2440 				   amd_iommu_int_thread, 0, "AMD-Vi", iommu);
2441 	if (ret) {
2442 		irq_domain_free_irqs(irq, 1);
2443 		irq_domain_remove(domain);
2444 		return ret;
2445 	}
2446 
2447 	return 0;
2448 }
2449 
2450 static int iommu_init_irq(struct amd_iommu *iommu)
2451 {
2452 	int ret;
2453 
2454 	if (iommu->int_enabled)
2455 		goto enable_faults;
2456 
2457 	if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2458 		ret = iommu_setup_intcapxt(iommu);
2459 	else if (iommu->dev->msi_cap)
2460 		ret = iommu_setup_msi(iommu);
2461 	else
2462 		ret = -ENODEV;
2463 
2464 	if (ret)
2465 		return ret;
2466 
2467 	iommu->int_enabled = true;
2468 enable_faults:
2469 
2470 	if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2471 		iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN);
2472 
2473 	iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
2474 
2475 	if (iommu->ppr_log != NULL)
2476 		iommu_feature_enable(iommu, CONTROL_PPRINT_EN);
2477 	return 0;
2478 }
2479 
2480 /****************************************************************************
2481  *
2482  * The next functions belong to the third pass of parsing the ACPI
2483  * table. In this last pass the memory mapping requirements are
2484  * gathered (like exclusion and unity mapping ranges).
2485  *
2486  ****************************************************************************/
2487 
2488 static void __init free_unity_maps(void)
2489 {
2490 	struct unity_map_entry *entry, *next;
2491 	struct amd_iommu_pci_seg *p, *pci_seg;
2492 
2493 	for_each_pci_segment_safe(pci_seg, p) {
2494 		list_for_each_entry_safe(entry, next, &pci_seg->unity_map, list) {
2495 			list_del(&entry->list);
2496 			kfree(entry);
2497 		}
2498 	}
2499 }
2500 
2501 /* called for unity map ACPI definition */
2502 static int __init init_unity_map_range(struct ivmd_header *m,
2503 				       struct acpi_table_header *ivrs_base)
2504 {
2505 	struct unity_map_entry *e = NULL;
2506 	struct amd_iommu_pci_seg *pci_seg;
2507 	char *s;
2508 
2509 	pci_seg = get_pci_segment(m->pci_seg, ivrs_base);
2510 	if (pci_seg == NULL)
2511 		return -ENOMEM;
2512 
2513 	e = kzalloc(sizeof(*e), GFP_KERNEL);
2514 	if (e == NULL)
2515 		return -ENOMEM;
2516 
2517 	switch (m->type) {
2518 	default:
2519 		kfree(e);
2520 		return 0;
2521 	case ACPI_IVMD_TYPE:
2522 		s = "IVMD_TYPEi\t\t\t";
2523 		e->devid_start = e->devid_end = m->devid;
2524 		break;
2525 	case ACPI_IVMD_TYPE_ALL:
2526 		s = "IVMD_TYPE_ALL\t\t";
2527 		e->devid_start = 0;
2528 		e->devid_end = pci_seg->last_bdf;
2529 		break;
2530 	case ACPI_IVMD_TYPE_RANGE:
2531 		s = "IVMD_TYPE_RANGE\t\t";
2532 		e->devid_start = m->devid;
2533 		e->devid_end = m->aux;
2534 		break;
2535 	}
2536 	e->address_start = PAGE_ALIGN(m->range_start);
2537 	e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
2538 	e->prot = m->flags >> 1;
2539 
2540 	/*
2541 	 * Treat per-device exclusion ranges as r/w unity-mapped regions
2542 	 * since some buggy BIOSes might lead to the overwritten exclusion
2543 	 * range (exclusion_start and exclusion_length members). This
2544 	 * happens when there are multiple exclusion ranges (IVMD entries)
2545 	 * defined in ACPI table.
2546 	 */
2547 	if (m->flags & IVMD_FLAG_EXCL_RANGE)
2548 		e->prot = (IVMD_FLAG_IW | IVMD_FLAG_IR) >> 1;
2549 
2550 	DUMP_printk("%s devid_start: %04x:%02x:%02x.%x devid_end: "
2551 		    "%04x:%02x:%02x.%x range_start: %016llx range_end: %016llx"
2552 		    " flags: %x\n", s, m->pci_seg,
2553 		    PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start),
2554 		    PCI_FUNC(e->devid_start), m->pci_seg,
2555 		    PCI_BUS_NUM(e->devid_end),
2556 		    PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
2557 		    e->address_start, e->address_end, m->flags);
2558 
2559 	list_add_tail(&e->list, &pci_seg->unity_map);
2560 
2561 	return 0;
2562 }
2563 
2564 /* iterates over all memory definitions we find in the ACPI table */
2565 static int __init init_memory_definitions(struct acpi_table_header *table)
2566 {
2567 	u8 *p = (u8 *)table, *end = (u8 *)table;
2568 	struct ivmd_header *m;
2569 
2570 	end += table->length;
2571 	p += IVRS_HEADER_LENGTH;
2572 
2573 	while (p < end) {
2574 		m = (struct ivmd_header *)p;
2575 		if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE))
2576 			init_unity_map_range(m, table);
2577 
2578 		p += m->length;
2579 	}
2580 
2581 	return 0;
2582 }
2583 
2584 /*
2585  * Init the device table to not allow DMA access for devices
2586  */
2587 static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
2588 {
2589 	u32 devid;
2590 	struct dev_table_entry *dev_table = pci_seg->dev_table;
2591 
2592 	if (dev_table == NULL)
2593 		return;
2594 
2595 	for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
2596 		__set_dev_entry_bit(dev_table, devid, DEV_ENTRY_VALID);
2597 		if (!amd_iommu_snp_en)
2598 			__set_dev_entry_bit(dev_table, devid, DEV_ENTRY_TRANSLATION);
2599 	}
2600 }
2601 
2602 static void __init uninit_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
2603 {
2604 	u32 devid;
2605 	struct dev_table_entry *dev_table = pci_seg->dev_table;
2606 
2607 	if (dev_table == NULL)
2608 		return;
2609 
2610 	for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
2611 		dev_table[devid].data[0] = 0ULL;
2612 		dev_table[devid].data[1] = 0ULL;
2613 	}
2614 }
2615 
2616 static void init_device_table(void)
2617 {
2618 	struct amd_iommu_pci_seg *pci_seg;
2619 	u32 devid;
2620 
2621 	if (!amd_iommu_irq_remap)
2622 		return;
2623 
2624 	for_each_pci_segment(pci_seg) {
2625 		for (devid = 0; devid <= pci_seg->last_bdf; ++devid)
2626 			__set_dev_entry_bit(pci_seg->dev_table,
2627 					    devid, DEV_ENTRY_IRQ_TBL_EN);
2628 	}
2629 }
2630 
2631 static void iommu_init_flags(struct amd_iommu *iommu)
2632 {
2633 	iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ?
2634 		iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
2635 		iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
2636 
2637 	iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ?
2638 		iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
2639 		iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
2640 
2641 	iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
2642 		iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
2643 		iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
2644 
2645 	iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ?
2646 		iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
2647 		iommu_feature_disable(iommu, CONTROL_ISOC_EN);
2648 
2649 	/*
2650 	 * make IOMMU memory accesses cache coherent
2651 	 */
2652 	iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
2653 
2654 	/* Set IOTLB invalidation timeout to 1s */
2655 	iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S);
2656 }
2657 
2658 static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
2659 {
2660 	int i, j;
2661 	u32 ioc_feature_control;
2662 	struct pci_dev *pdev = iommu->root_pdev;
2663 
2664 	/* RD890 BIOSes may not have completely reconfigured the iommu */
2665 	if (!is_rd890_iommu(iommu->dev) || !pdev)
2666 		return;
2667 
2668 	/*
2669 	 * First, we need to ensure that the iommu is enabled. This is
2670 	 * controlled by a register in the northbridge
2671 	 */
2672 
2673 	/* Select Northbridge indirect register 0x75 and enable writing */
2674 	pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7));
2675 	pci_read_config_dword(pdev, 0x64, &ioc_feature_control);
2676 
2677 	/* Enable the iommu */
2678 	if (!(ioc_feature_control & 0x1))
2679 		pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1);
2680 
2681 	/* Restore the iommu BAR */
2682 	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
2683 			       iommu->stored_addr_lo);
2684 	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8,
2685 			       iommu->stored_addr_hi);
2686 
2687 	/* Restore the l1 indirect regs for each of the 6 l1s */
2688 	for (i = 0; i < 6; i++)
2689 		for (j = 0; j < 0x12; j++)
2690 			iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]);
2691 
2692 	/* Restore the l2 indirect regs */
2693 	for (i = 0; i < 0x83; i++)
2694 		iommu_write_l2(iommu, i, iommu->stored_l2[i]);
2695 
2696 	/* Lock PCI setup registers */
2697 	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
2698 			       iommu->stored_addr_lo | 1);
2699 }
2700 
2701 static void iommu_enable_ga(struct amd_iommu *iommu)
2702 {
2703 #ifdef CONFIG_IRQ_REMAP
2704 	switch (amd_iommu_guest_ir) {
2705 	case AMD_IOMMU_GUEST_IR_VAPIC:
2706 	case AMD_IOMMU_GUEST_IR_LEGACY_GA:
2707 		iommu_feature_enable(iommu, CONTROL_GA_EN);
2708 		iommu->irte_ops = &irte_128_ops;
2709 		break;
2710 	default:
2711 		iommu->irte_ops = &irte_32_ops;
2712 		break;
2713 	}
2714 #endif
2715 }
2716 
2717 static void iommu_disable_irtcachedis(struct amd_iommu *iommu)
2718 {
2719 	iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS);
2720 }
2721 
2722 static void iommu_enable_irtcachedis(struct amd_iommu *iommu)
2723 {
2724 	u64 ctrl;
2725 
2726 	if (!amd_iommu_irtcachedis)
2727 		return;
2728 
2729 	/*
2730 	 * Note:
2731 	 * The support for IRTCacheDis feature is dertermined by
2732 	 * checking if the bit is writable.
2733 	 */
2734 	iommu_feature_enable(iommu, CONTROL_IRTCACHEDIS);
2735 	ctrl = readq(iommu->mmio_base +  MMIO_CONTROL_OFFSET);
2736 	ctrl &= (1ULL << CONTROL_IRTCACHEDIS);
2737 	if (ctrl)
2738 		iommu->irtcachedis_enabled = true;
2739 	pr_info("iommu%d (%#06x) : IRT cache is %s\n",
2740 		iommu->index, iommu->devid,
2741 		iommu->irtcachedis_enabled ? "disabled" : "enabled");
2742 }
2743 
2744 static void early_enable_iommu(struct amd_iommu *iommu)
2745 {
2746 	iommu_disable(iommu);
2747 	iommu_init_flags(iommu);
2748 	iommu_set_device_table(iommu);
2749 	iommu_enable_command_buffer(iommu);
2750 	iommu_enable_event_buffer(iommu);
2751 	iommu_set_exclusion_range(iommu);
2752 	iommu_enable_ga(iommu);
2753 	iommu_enable_xt(iommu);
2754 	iommu_enable_irtcachedis(iommu);
2755 	iommu_enable(iommu);
2756 	iommu_flush_all_caches(iommu);
2757 }
2758 
2759 /*
2760  * This function finally enables all IOMMUs found in the system after
2761  * they have been initialized.
2762  *
2763  * Or if in kdump kernel and IOMMUs are all pre-enabled, try to copy
2764  * the old content of device table entries. Not this case or copy failed,
2765  * just continue as normal kernel does.
2766  */
2767 static void early_enable_iommus(void)
2768 {
2769 	struct amd_iommu *iommu;
2770 	struct amd_iommu_pci_seg *pci_seg;
2771 
2772 	if (!copy_device_table()) {
2773 		/*
2774 		 * If come here because of failure in copying device table from old
2775 		 * kernel with all IOMMUs enabled, print error message and try to
2776 		 * free allocated old_dev_tbl_cpy.
2777 		 */
2778 		if (amd_iommu_pre_enabled)
2779 			pr_err("Failed to copy DEV table from previous kernel.\n");
2780 
2781 		for_each_pci_segment(pci_seg) {
2782 			if (pci_seg->old_dev_tbl_cpy != NULL) {
2783 				free_pages((unsigned long)pci_seg->old_dev_tbl_cpy,
2784 						get_order(pci_seg->dev_table_size));
2785 				pci_seg->old_dev_tbl_cpy = NULL;
2786 			}
2787 		}
2788 
2789 		for_each_iommu(iommu) {
2790 			clear_translation_pre_enabled(iommu);
2791 			early_enable_iommu(iommu);
2792 		}
2793 	} else {
2794 		pr_info("Copied DEV table from previous kernel.\n");
2795 
2796 		for_each_pci_segment(pci_seg) {
2797 			free_pages((unsigned long)pci_seg->dev_table,
2798 				   get_order(pci_seg->dev_table_size));
2799 			pci_seg->dev_table = pci_seg->old_dev_tbl_cpy;
2800 		}
2801 
2802 		for_each_iommu(iommu) {
2803 			iommu_disable_command_buffer(iommu);
2804 			iommu_disable_event_buffer(iommu);
2805 			iommu_disable_irtcachedis(iommu);
2806 			iommu_enable_command_buffer(iommu);
2807 			iommu_enable_event_buffer(iommu);
2808 			iommu_enable_ga(iommu);
2809 			iommu_enable_xt(iommu);
2810 			iommu_enable_irtcachedis(iommu);
2811 			iommu_set_device_table(iommu);
2812 			iommu_flush_all_caches(iommu);
2813 		}
2814 	}
2815 }
2816 
2817 static void enable_iommus_v2(void)
2818 {
2819 	struct amd_iommu *iommu;
2820 
2821 	for_each_iommu(iommu) {
2822 		iommu_enable_ppr_log(iommu);
2823 		iommu_enable_gt(iommu);
2824 	}
2825 }
2826 
2827 static void enable_iommus_vapic(void)
2828 {
2829 #ifdef CONFIG_IRQ_REMAP
2830 	u32 status, i;
2831 	struct amd_iommu *iommu;
2832 
2833 	for_each_iommu(iommu) {
2834 		/*
2835 		 * Disable GALog if already running. It could have been enabled
2836 		 * in the previous boot before kdump.
2837 		 */
2838 		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
2839 		if (!(status & MMIO_STATUS_GALOG_RUN_MASK))
2840 			continue;
2841 
2842 		iommu_feature_disable(iommu, CONTROL_GALOG_EN);
2843 		iommu_feature_disable(iommu, CONTROL_GAINT_EN);
2844 
2845 		/*
2846 		 * Need to set and poll check the GALOGRun bit to zero before
2847 		 * we can set/ modify GA Log registers safely.
2848 		 */
2849 		for (i = 0; i < LOOP_TIMEOUT; ++i) {
2850 			status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
2851 			if (!(status & MMIO_STATUS_GALOG_RUN_MASK))
2852 				break;
2853 			udelay(10);
2854 		}
2855 
2856 		if (WARN_ON(i >= LOOP_TIMEOUT))
2857 			return;
2858 	}
2859 
2860 	if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
2861 	    !check_feature_on_all_iommus(FEATURE_GAM_VAPIC)) {
2862 		amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
2863 		return;
2864 	}
2865 
2866 	if (amd_iommu_snp_en &&
2867 	    !FEATURE_SNPAVICSUP_GAM(amd_iommu_efr2)) {
2868 		pr_warn("Force to disable Virtual APIC due to SNP\n");
2869 		amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
2870 		return;
2871 	}
2872 
2873 	/* Enabling GAM and SNPAVIC support */
2874 	for_each_iommu(iommu) {
2875 		if (iommu_init_ga_log(iommu) ||
2876 		    iommu_ga_log_enable(iommu))
2877 			return;
2878 
2879 		iommu_feature_enable(iommu, CONTROL_GAM_EN);
2880 		if (amd_iommu_snp_en)
2881 			iommu_feature_enable(iommu, CONTROL_SNPAVIC_EN);
2882 	}
2883 
2884 	amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP);
2885 	pr_info("Virtual APIC enabled\n");
2886 #endif
2887 }
2888 
2889 static void enable_iommus(void)
2890 {
2891 	early_enable_iommus();
2892 	enable_iommus_vapic();
2893 	enable_iommus_v2();
2894 }
2895 
2896 static void disable_iommus(void)
2897 {
2898 	struct amd_iommu *iommu;
2899 
2900 	for_each_iommu(iommu)
2901 		iommu_disable(iommu);
2902 
2903 #ifdef CONFIG_IRQ_REMAP
2904 	if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
2905 		amd_iommu_irq_ops.capability &= ~(1 << IRQ_POSTING_CAP);
2906 #endif
2907 }
2908 
2909 /*
2910  * Suspend/Resume support
2911  * disable suspend until real resume implemented
2912  */
2913 
2914 static void amd_iommu_resume(void)
2915 {
2916 	struct amd_iommu *iommu;
2917 
2918 	for_each_iommu(iommu)
2919 		iommu_apply_resume_quirks(iommu);
2920 
2921 	/* re-load the hardware */
2922 	enable_iommus();
2923 
2924 	amd_iommu_enable_interrupts();
2925 }
2926 
2927 static int amd_iommu_suspend(void)
2928 {
2929 	/* disable IOMMUs to go out of the way for BIOS */
2930 	disable_iommus();
2931 
2932 	return 0;
2933 }
2934 
2935 static struct syscore_ops amd_iommu_syscore_ops = {
2936 	.suspend = amd_iommu_suspend,
2937 	.resume = amd_iommu_resume,
2938 };
2939 
2940 static void __init free_iommu_resources(void)
2941 {
2942 	kmem_cache_destroy(amd_iommu_irq_cache);
2943 	amd_iommu_irq_cache = NULL;
2944 
2945 	free_iommu_all();
2946 	free_pci_segments();
2947 }
2948 
2949 /* SB IOAPIC is always on this device in AMD systems */
2950 #define IOAPIC_SB_DEVID		((0x00 << 8) | PCI_DEVFN(0x14, 0))
2951 
2952 static bool __init check_ioapic_information(void)
2953 {
2954 	const char *fw_bug = FW_BUG;
2955 	bool ret, has_sb_ioapic;
2956 	int idx;
2957 
2958 	has_sb_ioapic = false;
2959 	ret           = false;
2960 
2961 	/*
2962 	 * If we have map overrides on the kernel command line the
2963 	 * messages in this function might not describe firmware bugs
2964 	 * anymore - so be careful
2965 	 */
2966 	if (cmdline_maps)
2967 		fw_bug = "";
2968 
2969 	for (idx = 0; idx < nr_ioapics; idx++) {
2970 		int devid, id = mpc_ioapic_id(idx);
2971 
2972 		devid = get_ioapic_devid(id);
2973 		if (devid < 0) {
2974 			pr_err("%s: IOAPIC[%d] not in IVRS table\n",
2975 				fw_bug, id);
2976 			ret = false;
2977 		} else if (devid == IOAPIC_SB_DEVID) {
2978 			has_sb_ioapic = true;
2979 			ret           = true;
2980 		}
2981 	}
2982 
2983 	if (!has_sb_ioapic) {
2984 		/*
2985 		 * We expect the SB IOAPIC to be listed in the IVRS
2986 		 * table. The system timer is connected to the SB IOAPIC
2987 		 * and if we don't have it in the list the system will
2988 		 * panic at boot time.  This situation usually happens
2989 		 * when the BIOS is buggy and provides us the wrong
2990 		 * device id for the IOAPIC in the system.
2991 		 */
2992 		pr_err("%s: No southbridge IOAPIC found\n", fw_bug);
2993 	}
2994 
2995 	if (!ret)
2996 		pr_err("Disabling interrupt remapping\n");
2997 
2998 	return ret;
2999 }
3000 
3001 static void __init free_dma_resources(void)
3002 {
3003 	free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
3004 		   get_order(MAX_DOMAIN_ID/8));
3005 	amd_iommu_pd_alloc_bitmap = NULL;
3006 
3007 	free_unity_maps();
3008 }
3009 
3010 static void __init ivinfo_init(void *ivrs)
3011 {
3012 	amd_iommu_ivinfo = *((u32 *)(ivrs + IOMMU_IVINFO_OFFSET));
3013 }
3014 
3015 /*
3016  * This is the hardware init function for AMD IOMMU in the system.
3017  * This function is called either from amd_iommu_init or from the interrupt
3018  * remapping setup code.
3019  *
3020  * This function basically parses the ACPI table for AMD IOMMU (IVRS)
3021  * four times:
3022  *
3023  *	1 pass) Discover the most comprehensive IVHD type to use.
3024  *
3025  *	2 pass) Find the highest PCI device id the driver has to handle.
3026  *		Upon this information the size of the data structures is
3027  *		determined that needs to be allocated.
3028  *
3029  *	3 pass) Initialize the data structures just allocated with the
3030  *		information in the ACPI table about available AMD IOMMUs
3031  *		in the system. It also maps the PCI devices in the
3032  *		system to specific IOMMUs
3033  *
3034  *	4 pass) After the basic data structures are allocated and
3035  *		initialized we update them with information about memory
3036  *		remapping requirements parsed out of the ACPI table in
3037  *		this last pass.
3038  *
3039  * After everything is set up the IOMMUs are enabled and the necessary
3040  * hotplug and suspend notifiers are registered.
3041  */
3042 static int __init early_amd_iommu_init(void)
3043 {
3044 	struct acpi_table_header *ivrs_base;
3045 	int remap_cache_sz, ret;
3046 	acpi_status status;
3047 
3048 	if (!amd_iommu_detected)
3049 		return -ENODEV;
3050 
3051 	status = acpi_get_table("IVRS", 0, &ivrs_base);
3052 	if (status == AE_NOT_FOUND)
3053 		return -ENODEV;
3054 	else if (ACPI_FAILURE(status)) {
3055 		const char *err = acpi_format_exception(status);
3056 		pr_err("IVRS table error: %s\n", err);
3057 		return -EINVAL;
3058 	}
3059 
3060 	/*
3061 	 * Validate checksum here so we don't need to do it when
3062 	 * we actually parse the table
3063 	 */
3064 	ret = check_ivrs_checksum(ivrs_base);
3065 	if (ret)
3066 		goto out;
3067 
3068 	ivinfo_init(ivrs_base);
3069 
3070 	amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base);
3071 	DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type);
3072 
3073 	/* Device table - directly used by all IOMMUs */
3074 	ret = -ENOMEM;
3075 
3076 	amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(
3077 					    GFP_KERNEL | __GFP_ZERO,
3078 					    get_order(MAX_DOMAIN_ID/8));
3079 	if (amd_iommu_pd_alloc_bitmap == NULL)
3080 		goto out;
3081 
3082 	/*
3083 	 * never allocate domain 0 because its used as the non-allocated and
3084 	 * error value placeholder
3085 	 */
3086 	__set_bit(0, amd_iommu_pd_alloc_bitmap);
3087 
3088 	/*
3089 	 * now the data structures are allocated and basically initialized
3090 	 * start the real acpi table scan
3091 	 */
3092 	ret = init_iommu_all(ivrs_base);
3093 	if (ret)
3094 		goto out;
3095 
3096 	/* 5 level guest page table */
3097 	if (cpu_feature_enabled(X86_FEATURE_LA57) &&
3098 	    check_feature_gpt_level() == GUEST_PGTABLE_5_LEVEL)
3099 		amd_iommu_gpt_level = PAGE_MODE_5_LEVEL;
3100 
3101 	/* Disable any previously enabled IOMMUs */
3102 	if (!is_kdump_kernel() || amd_iommu_disabled)
3103 		disable_iommus();
3104 
3105 	if (amd_iommu_irq_remap)
3106 		amd_iommu_irq_remap = check_ioapic_information();
3107 
3108 	if (amd_iommu_irq_remap) {
3109 		struct amd_iommu_pci_seg *pci_seg;
3110 		/*
3111 		 * Interrupt remapping enabled, create kmem_cache for the
3112 		 * remapping tables.
3113 		 */
3114 		ret = -ENOMEM;
3115 		if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
3116 			remap_cache_sz = MAX_IRQS_PER_TABLE * sizeof(u32);
3117 		else
3118 			remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2);
3119 		amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache",
3120 							remap_cache_sz,
3121 							DTE_INTTAB_ALIGNMENT,
3122 							0, NULL);
3123 		if (!amd_iommu_irq_cache)
3124 			goto out;
3125 
3126 		for_each_pci_segment(pci_seg) {
3127 			if (alloc_irq_lookup_table(pci_seg))
3128 				goto out;
3129 		}
3130 	}
3131 
3132 	ret = init_memory_definitions(ivrs_base);
3133 	if (ret)
3134 		goto out;
3135 
3136 	/* init the device table */
3137 	init_device_table();
3138 
3139 out:
3140 	/* Don't leak any ACPI memory */
3141 	acpi_put_table(ivrs_base);
3142 
3143 	return ret;
3144 }
3145 
3146 static int amd_iommu_enable_interrupts(void)
3147 {
3148 	struct amd_iommu *iommu;
3149 	int ret = 0;
3150 
3151 	for_each_iommu(iommu) {
3152 		ret = iommu_init_irq(iommu);
3153 		if (ret)
3154 			goto out;
3155 	}
3156 
3157 out:
3158 	return ret;
3159 }
3160 
3161 static bool __init detect_ivrs(void)
3162 {
3163 	struct acpi_table_header *ivrs_base;
3164 	acpi_status status;
3165 	int i;
3166 
3167 	status = acpi_get_table("IVRS", 0, &ivrs_base);
3168 	if (status == AE_NOT_FOUND)
3169 		return false;
3170 	else if (ACPI_FAILURE(status)) {
3171 		const char *err = acpi_format_exception(status);
3172 		pr_err("IVRS table error: %s\n", err);
3173 		return false;
3174 	}
3175 
3176 	acpi_put_table(ivrs_base);
3177 
3178 	if (amd_iommu_force_enable)
3179 		goto out;
3180 
3181 	/* Don't use IOMMU if there is Stoney Ridge graphics */
3182 	for (i = 0; i < 32; i++) {
3183 		u32 pci_id;
3184 
3185 		pci_id = read_pci_config(0, i, 0, 0);
3186 		if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) {
3187 			pr_info("Disable IOMMU on Stoney Ridge\n");
3188 			return false;
3189 		}
3190 	}
3191 
3192 out:
3193 	/* Make sure ACS will be enabled during PCI probe */
3194 	pci_request_acs();
3195 
3196 	return true;
3197 }
3198 
3199 /****************************************************************************
3200  *
3201  * AMD IOMMU Initialization State Machine
3202  *
3203  ****************************************************************************/
3204 
3205 static int __init state_next(void)
3206 {
3207 	int ret = 0;
3208 
3209 	switch (init_state) {
3210 	case IOMMU_START_STATE:
3211 		if (!detect_ivrs()) {
3212 			init_state	= IOMMU_NOT_FOUND;
3213 			ret		= -ENODEV;
3214 		} else {
3215 			init_state	= IOMMU_IVRS_DETECTED;
3216 		}
3217 		break;
3218 	case IOMMU_IVRS_DETECTED:
3219 		if (amd_iommu_disabled) {
3220 			init_state = IOMMU_CMDLINE_DISABLED;
3221 			ret = -EINVAL;
3222 		} else {
3223 			ret = early_amd_iommu_init();
3224 			init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
3225 		}
3226 		break;
3227 	case IOMMU_ACPI_FINISHED:
3228 		early_enable_iommus();
3229 		x86_platform.iommu_shutdown = disable_iommus;
3230 		init_state = IOMMU_ENABLED;
3231 		break;
3232 	case IOMMU_ENABLED:
3233 		register_syscore_ops(&amd_iommu_syscore_ops);
3234 		ret = amd_iommu_init_pci();
3235 		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT;
3236 		enable_iommus_vapic();
3237 		enable_iommus_v2();
3238 		break;
3239 	case IOMMU_PCI_INIT:
3240 		ret = amd_iommu_enable_interrupts();
3241 		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN;
3242 		break;
3243 	case IOMMU_INTERRUPTS_EN:
3244 		init_state = IOMMU_INITIALIZED;
3245 		break;
3246 	case IOMMU_INITIALIZED:
3247 		/* Nothing to do */
3248 		break;
3249 	case IOMMU_NOT_FOUND:
3250 	case IOMMU_INIT_ERROR:
3251 	case IOMMU_CMDLINE_DISABLED:
3252 		/* Error states => do nothing */
3253 		ret = -EINVAL;
3254 		break;
3255 	default:
3256 		/* Unknown state */
3257 		BUG();
3258 	}
3259 
3260 	if (ret) {
3261 		free_dma_resources();
3262 		if (!irq_remapping_enabled) {
3263 			disable_iommus();
3264 			free_iommu_resources();
3265 		} else {
3266 			struct amd_iommu *iommu;
3267 			struct amd_iommu_pci_seg *pci_seg;
3268 
3269 			for_each_pci_segment(pci_seg)
3270 				uninit_device_table_dma(pci_seg);
3271 
3272 			for_each_iommu(iommu)
3273 				iommu_flush_all_caches(iommu);
3274 		}
3275 	}
3276 	return ret;
3277 }
3278 
3279 static int __init iommu_go_to_state(enum iommu_init_state state)
3280 {
3281 	int ret = -EINVAL;
3282 
3283 	while (init_state != state) {
3284 		if (init_state == IOMMU_NOT_FOUND         ||
3285 		    init_state == IOMMU_INIT_ERROR        ||
3286 		    init_state == IOMMU_CMDLINE_DISABLED)
3287 			break;
3288 		ret = state_next();
3289 	}
3290 
3291 	return ret;
3292 }
3293 
3294 #ifdef CONFIG_IRQ_REMAP
3295 int __init amd_iommu_prepare(void)
3296 {
3297 	int ret;
3298 
3299 	amd_iommu_irq_remap = true;
3300 
3301 	ret = iommu_go_to_state(IOMMU_ACPI_FINISHED);
3302 	if (ret) {
3303 		amd_iommu_irq_remap = false;
3304 		return ret;
3305 	}
3306 
3307 	return amd_iommu_irq_remap ? 0 : -ENODEV;
3308 }
3309 
3310 int __init amd_iommu_enable(void)
3311 {
3312 	int ret;
3313 
3314 	ret = iommu_go_to_state(IOMMU_ENABLED);
3315 	if (ret)
3316 		return ret;
3317 
3318 	irq_remapping_enabled = 1;
3319 	return amd_iommu_xt_mode;
3320 }
3321 
3322 void amd_iommu_disable(void)
3323 {
3324 	amd_iommu_suspend();
3325 }
3326 
3327 int amd_iommu_reenable(int mode)
3328 {
3329 	amd_iommu_resume();
3330 
3331 	return 0;
3332 }
3333 
3334 int __init amd_iommu_enable_faulting(void)
3335 {
3336 	/* We enable MSI later when PCI is initialized */
3337 	return 0;
3338 }
3339 #endif
3340 
3341 /*
3342  * This is the core init function for AMD IOMMU hardware in the system.
3343  * This function is called from the generic x86 DMA layer initialization
3344  * code.
3345  */
3346 static int __init amd_iommu_init(void)
3347 {
3348 	struct amd_iommu *iommu;
3349 	int ret;
3350 
3351 	ret = iommu_go_to_state(IOMMU_INITIALIZED);
3352 #ifdef CONFIG_GART_IOMMU
3353 	if (ret && list_empty(&amd_iommu_list)) {
3354 		/*
3355 		 * We failed to initialize the AMD IOMMU - try fallback
3356 		 * to GART if possible.
3357 		 */
3358 		gart_iommu_init();
3359 	}
3360 #endif
3361 
3362 	for_each_iommu(iommu)
3363 		amd_iommu_debugfs_setup(iommu);
3364 
3365 	return ret;
3366 }
3367 
3368 static bool amd_iommu_sme_check(void)
3369 {
3370 	if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) ||
3371 	    (boot_cpu_data.x86 != 0x17))
3372 		return true;
3373 
3374 	/* For Fam17h, a specific level of support is required */
3375 	if (boot_cpu_data.microcode >= 0x08001205)
3376 		return true;
3377 
3378 	if ((boot_cpu_data.microcode >= 0x08001126) &&
3379 	    (boot_cpu_data.microcode <= 0x080011ff))
3380 		return true;
3381 
3382 	pr_notice("IOMMU not currently supported when SME is active\n");
3383 
3384 	return false;
3385 }
3386 
3387 /****************************************************************************
3388  *
3389  * Early detect code. This code runs at IOMMU detection time in the DMA
3390  * layer. It just looks if there is an IVRS ACPI table to detect AMD
3391  * IOMMUs
3392  *
3393  ****************************************************************************/
3394 int __init amd_iommu_detect(void)
3395 {
3396 	int ret;
3397 
3398 	if (no_iommu || (iommu_detected && !gart_iommu_aperture))
3399 		return -ENODEV;
3400 
3401 	if (!amd_iommu_sme_check())
3402 		return -ENODEV;
3403 
3404 	ret = iommu_go_to_state(IOMMU_IVRS_DETECTED);
3405 	if (ret)
3406 		return ret;
3407 
3408 	amd_iommu_detected = true;
3409 	iommu_detected = 1;
3410 	x86_init.iommu.iommu_init = amd_iommu_init;
3411 
3412 	return 1;
3413 }
3414 
3415 /****************************************************************************
3416  *
3417  * Parsing functions for the AMD IOMMU specific kernel command line
3418  * options.
3419  *
3420  ****************************************************************************/
3421 
3422 static int __init parse_amd_iommu_dump(char *str)
3423 {
3424 	amd_iommu_dump = true;
3425 
3426 	return 1;
3427 }
3428 
3429 static int __init parse_amd_iommu_intr(char *str)
3430 {
3431 	for (; *str; ++str) {
3432 		if (strncmp(str, "legacy", 6) == 0) {
3433 			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
3434 			break;
3435 		}
3436 		if (strncmp(str, "vapic", 5) == 0) {
3437 			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
3438 			break;
3439 		}
3440 	}
3441 	return 1;
3442 }
3443 
3444 static int __init parse_amd_iommu_options(char *str)
3445 {
3446 	if (!str)
3447 		return -EINVAL;
3448 
3449 	while (*str) {
3450 		if (strncmp(str, "fullflush", 9) == 0) {
3451 			pr_warn("amd_iommu=fullflush deprecated; use iommu.strict=1 instead\n");
3452 			iommu_set_dma_strict();
3453 		} else if (strncmp(str, "force_enable", 12) == 0) {
3454 			amd_iommu_force_enable = true;
3455 		} else if (strncmp(str, "off", 3) == 0) {
3456 			amd_iommu_disabled = true;
3457 		} else if (strncmp(str, "force_isolation", 15) == 0) {
3458 			amd_iommu_force_isolation = true;
3459 		} else if (strncmp(str, "pgtbl_v1", 8) == 0) {
3460 			amd_iommu_pgtable = AMD_IOMMU_V1;
3461 		} else if (strncmp(str, "pgtbl_v2", 8) == 0) {
3462 			amd_iommu_pgtable = AMD_IOMMU_V2;
3463 		} else if (strncmp(str, "irtcachedis", 11) == 0) {
3464 			amd_iommu_irtcachedis = true;
3465 		} else {
3466 			pr_notice("Unknown option - '%s'\n", str);
3467 		}
3468 
3469 		str += strcspn(str, ",");
3470 		while (*str == ',')
3471 			str++;
3472 	}
3473 
3474 	return 1;
3475 }
3476 
3477 static int __init parse_ivrs_ioapic(char *str)
3478 {
3479 	u32 seg = 0, bus, dev, fn;
3480 	int id, i;
3481 	u32 devid;
3482 
3483 	if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3484 	    sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5)
3485 		goto found;
3486 
3487 	if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3488 	    sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) {
3489 		pr_warn("ivrs_ioapic%s option format deprecated; use ivrs_ioapic=%d@%04x:%02x:%02x.%d instead\n",
3490 			str, id, seg, bus, dev, fn);
3491 		goto found;
3492 	}
3493 
3494 	pr_err("Invalid command line: ivrs_ioapic%s\n", str);
3495 	return 1;
3496 
3497 found:
3498 	if (early_ioapic_map_size == EARLY_MAP_SIZE) {
3499 		pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n",
3500 			str);
3501 		return 1;
3502 	}
3503 
3504 	devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
3505 
3506 	cmdline_maps			= true;
3507 	i				= early_ioapic_map_size++;
3508 	early_ioapic_map[i].id		= id;
3509 	early_ioapic_map[i].devid	= devid;
3510 	early_ioapic_map[i].cmd_line	= true;
3511 
3512 	return 1;
3513 }
3514 
3515 static int __init parse_ivrs_hpet(char *str)
3516 {
3517 	u32 seg = 0, bus, dev, fn;
3518 	int id, i;
3519 	u32 devid;
3520 
3521 	if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3522 	    sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5)
3523 		goto found;
3524 
3525 	if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3526 	    sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) {
3527 		pr_warn("ivrs_hpet%s option format deprecated; use ivrs_hpet=%d@%04x:%02x:%02x.%d instead\n",
3528 			str, id, seg, bus, dev, fn);
3529 		goto found;
3530 	}
3531 
3532 	pr_err("Invalid command line: ivrs_hpet%s\n", str);
3533 	return 1;
3534 
3535 found:
3536 	if (early_hpet_map_size == EARLY_MAP_SIZE) {
3537 		pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n",
3538 			str);
3539 		return 1;
3540 	}
3541 
3542 	devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
3543 
3544 	cmdline_maps			= true;
3545 	i				= early_hpet_map_size++;
3546 	early_hpet_map[i].id		= id;
3547 	early_hpet_map[i].devid		= devid;
3548 	early_hpet_map[i].cmd_line	= true;
3549 
3550 	return 1;
3551 }
3552 
3553 #define ACPIID_LEN (ACPIHID_UID_LEN + ACPIHID_HID_LEN)
3554 
3555 static int __init parse_ivrs_acpihid(char *str)
3556 {
3557 	u32 seg = 0, bus, dev, fn;
3558 	char *hid, *uid, *p, *addr;
3559 	char acpiid[ACPIID_LEN] = {0};
3560 	int i;
3561 
3562 	addr = strchr(str, '@');
3563 	if (!addr) {
3564 		addr = strchr(str, '=');
3565 		if (!addr)
3566 			goto not_found;
3567 
3568 		++addr;
3569 
3570 		if (strlen(addr) > ACPIID_LEN)
3571 			goto not_found;
3572 
3573 		if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 ||
3574 		    sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) {
3575 			pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n",
3576 				str, acpiid, seg, bus, dev, fn);
3577 			goto found;
3578 		}
3579 		goto not_found;
3580 	}
3581 
3582 	/* We have the '@', make it the terminator to get just the acpiid */
3583 	*addr++ = 0;
3584 
3585 	if (strlen(str) > ACPIID_LEN + 1)
3586 		goto not_found;
3587 
3588 	if (sscanf(str, "=%s", acpiid) != 1)
3589 		goto not_found;
3590 
3591 	if (sscanf(addr, "%x:%x.%x", &bus, &dev, &fn) == 3 ||
3592 	    sscanf(addr, "%x:%x:%x.%x", &seg, &bus, &dev, &fn) == 4)
3593 		goto found;
3594 
3595 not_found:
3596 	pr_err("Invalid command line: ivrs_acpihid%s\n", str);
3597 	return 1;
3598 
3599 found:
3600 	p = acpiid;
3601 	hid = strsep(&p, ":");
3602 	uid = p;
3603 
3604 	if (!hid || !(*hid) || !uid) {
3605 		pr_err("Invalid command line: hid or uid\n");
3606 		return 1;
3607 	}
3608 
3609 	/*
3610 	 * Ignore leading zeroes after ':', so e.g., AMDI0095:00
3611 	 * will match AMDI0095:0 in the second strcmp in acpi_dev_hid_uid_match
3612 	 */
3613 	while (*uid == '0' && *(uid + 1))
3614 		uid++;
3615 
3616 	i = early_acpihid_map_size++;
3617 	memcpy(early_acpihid_map[i].hid, hid, strlen(hid));
3618 	memcpy(early_acpihid_map[i].uid, uid, strlen(uid));
3619 	early_acpihid_map[i].devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
3620 	early_acpihid_map[i].cmd_line	= true;
3621 
3622 	return 1;
3623 }
3624 
3625 __setup("amd_iommu_dump",	parse_amd_iommu_dump);
3626 __setup("amd_iommu=",		parse_amd_iommu_options);
3627 __setup("amd_iommu_intr=",	parse_amd_iommu_intr);
3628 __setup("ivrs_ioapic",		parse_ivrs_ioapic);
3629 __setup("ivrs_hpet",		parse_ivrs_hpet);
3630 __setup("ivrs_acpihid",		parse_ivrs_acpihid);
3631 
3632 bool amd_iommu_v2_supported(void)
3633 {
3634 	/* CPU page table size should match IOMMU guest page table size */
3635 	if (cpu_feature_enabled(X86_FEATURE_LA57) &&
3636 	    amd_iommu_gpt_level != PAGE_MODE_5_LEVEL)
3637 		return false;
3638 
3639 	/*
3640 	 * Since DTE[Mode]=0 is prohibited on SNP-enabled system
3641 	 * (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without
3642 	 * setting up IOMMUv1 page table.
3643 	 */
3644 	return amd_iommu_v2_present && !amd_iommu_snp_en;
3645 }
3646 EXPORT_SYMBOL(amd_iommu_v2_supported);
3647 
3648 struct amd_iommu *get_amd_iommu(unsigned int idx)
3649 {
3650 	unsigned int i = 0;
3651 	struct amd_iommu *iommu;
3652 
3653 	for_each_iommu(iommu)
3654 		if (i++ == idx)
3655 			return iommu;
3656 	return NULL;
3657 }
3658 
3659 /****************************************************************************
3660  *
3661  * IOMMU EFR Performance Counter support functionality. This code allows
3662  * access to the IOMMU PC functionality.
3663  *
3664  ****************************************************************************/
3665 
3666 u8 amd_iommu_pc_get_max_banks(unsigned int idx)
3667 {
3668 	struct amd_iommu *iommu = get_amd_iommu(idx);
3669 
3670 	if (iommu)
3671 		return iommu->max_banks;
3672 
3673 	return 0;
3674 }
3675 EXPORT_SYMBOL(amd_iommu_pc_get_max_banks);
3676 
3677 bool amd_iommu_pc_supported(void)
3678 {
3679 	return amd_iommu_pc_present;
3680 }
3681 EXPORT_SYMBOL(amd_iommu_pc_supported);
3682 
3683 u8 amd_iommu_pc_get_max_counters(unsigned int idx)
3684 {
3685 	struct amd_iommu *iommu = get_amd_iommu(idx);
3686 
3687 	if (iommu)
3688 		return iommu->max_counters;
3689 
3690 	return 0;
3691 }
3692 EXPORT_SYMBOL(amd_iommu_pc_get_max_counters);
3693 
3694 static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
3695 				u8 fxn, u64 *value, bool is_write)
3696 {
3697 	u32 offset;
3698 	u32 max_offset_lim;
3699 
3700 	/* Make sure the IOMMU PC resource is available */
3701 	if (!amd_iommu_pc_present)
3702 		return -ENODEV;
3703 
3704 	/* Check for valid iommu and pc register indexing */
3705 	if (WARN_ON(!iommu || (fxn > 0x28) || (fxn & 7)))
3706 		return -ENODEV;
3707 
3708 	offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn);
3709 
3710 	/* Limit the offset to the hw defined mmio region aperture */
3711 	max_offset_lim = (u32)(((0x40 | iommu->max_banks) << 12) |
3712 				(iommu->max_counters << 8) | 0x28);
3713 	if ((offset < MMIO_CNTR_REG_OFFSET) ||
3714 	    (offset > max_offset_lim))
3715 		return -EINVAL;
3716 
3717 	if (is_write) {
3718 		u64 val = *value & GENMASK_ULL(47, 0);
3719 
3720 		writel((u32)val, iommu->mmio_base + offset);
3721 		writel((val >> 32), iommu->mmio_base + offset + 4);
3722 	} else {
3723 		*value = readl(iommu->mmio_base + offset + 4);
3724 		*value <<= 32;
3725 		*value |= readl(iommu->mmio_base + offset);
3726 		*value &= GENMASK_ULL(47, 0);
3727 	}
3728 
3729 	return 0;
3730 }
3731 
3732 int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
3733 {
3734 	if (!iommu)
3735 		return -EINVAL;
3736 
3737 	return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false);
3738 }
3739 
3740 int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
3741 {
3742 	if (!iommu)
3743 		return -EINVAL;
3744 
3745 	return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true);
3746 }
3747 
3748 #ifdef CONFIG_AMD_MEM_ENCRYPT
3749 int amd_iommu_snp_enable(void)
3750 {
3751 	/*
3752 	 * The SNP support requires that IOMMU must be enabled, and is
3753 	 * not configured in the passthrough mode.
3754 	 */
3755 	if (no_iommu || iommu_default_passthrough()) {
3756 		pr_err("SNP: IOMMU is disabled or configured in passthrough mode, SNP cannot be supported");
3757 		return -EINVAL;
3758 	}
3759 
3760 	/*
3761 	 * Prevent enabling SNP after IOMMU_ENABLED state because this process
3762 	 * affect how IOMMU driver sets up data structures and configures
3763 	 * IOMMU hardware.
3764 	 */
3765 	if (init_state > IOMMU_ENABLED) {
3766 		pr_err("SNP: Too late to enable SNP for IOMMU.\n");
3767 		return -EINVAL;
3768 	}
3769 
3770 	amd_iommu_snp_en = check_feature_on_all_iommus(FEATURE_SNP);
3771 	if (!amd_iommu_snp_en)
3772 		return -EINVAL;
3773 
3774 	pr_info("SNP enabled\n");
3775 
3776 	/* Enforce IOMMU v1 pagetable when SNP is enabled. */
3777 	if (amd_iommu_pgtable != AMD_IOMMU_V1) {
3778 		pr_warn("Force to using AMD IOMMU v1 page table due to SNP\n");
3779 		amd_iommu_pgtable = AMD_IOMMU_V1;
3780 	}
3781 
3782 	return 0;
3783 }
3784 #endif
3785