xref: /linux/drivers/iommu/amd/init.c (revision fa7431eb99245e0a283d470101e44be1d2c2aeb3)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
4  * Author: Joerg Roedel <jroedel@suse.de>
5  *         Leo Duran <leo.duran@amd.com>
6  */
7 
8 #define pr_fmt(fmt)     "AMD-Vi: " fmt
9 #define dev_fmt(fmt)    pr_fmt(fmt)
10 
11 #include <linux/pci.h>
12 #include <linux/acpi.h>
13 #include <linux/list.h>
14 #include <linux/bitmap.h>
15 #include <linux/syscore_ops.h>
16 #include <linux/interrupt.h>
17 #include <linux/msi.h>
18 #include <linux/irq.h>
19 #include <linux/amd-iommu.h>
20 #include <linux/export.h>
21 #include <linux/kmemleak.h>
22 #include <linux/cc_platform.h>
23 #include <linux/iopoll.h>
24 #include <asm/pci-direct.h>
25 #include <asm/iommu.h>
26 #include <asm/apic.h>
27 #include <asm/gart.h>
28 #include <asm/x86_init.h>
29 #include <asm/io_apic.h>
30 #include <asm/irq_remapping.h>
31 #include <asm/set_memory.h>
32 #include <asm/sev.h>
33 
34 #include <linux/crash_dump.h>
35 
36 #include "amd_iommu.h"
37 #include "../irq_remapping.h"
38 #include "../iommu-pages.h"
39 
40 /*
41  * definitions for the ACPI scanning code
42  */
43 #define IVRS_HEADER_LENGTH 48
44 
45 #define ACPI_IVHD_TYPE_MAX_SUPPORTED	0x40
46 #define ACPI_IVMD_TYPE_ALL              0x20
47 #define ACPI_IVMD_TYPE                  0x21
48 #define ACPI_IVMD_TYPE_RANGE            0x22
49 
50 #define IVHD_DEV_ALL                    0x01
51 #define IVHD_DEV_SELECT                 0x02
52 #define IVHD_DEV_SELECT_RANGE_START     0x03
53 #define IVHD_DEV_RANGE_END              0x04
54 #define IVHD_DEV_ALIAS                  0x42
55 #define IVHD_DEV_ALIAS_RANGE            0x43
56 #define IVHD_DEV_EXT_SELECT             0x46
57 #define IVHD_DEV_EXT_SELECT_RANGE       0x47
58 #define IVHD_DEV_SPECIAL		0x48
59 #define IVHD_DEV_ACPI_HID		0xf0
60 
61 #define UID_NOT_PRESENT                 0
62 #define UID_IS_INTEGER                  1
63 #define UID_IS_CHARACTER                2
64 
65 #define IVHD_SPECIAL_IOAPIC		1
66 #define IVHD_SPECIAL_HPET		2
67 
68 #define IVHD_FLAG_HT_TUN_EN_MASK        0x01
69 #define IVHD_FLAG_PASSPW_EN_MASK        0x02
70 #define IVHD_FLAG_RESPASSPW_EN_MASK     0x04
71 #define IVHD_FLAG_ISOC_EN_MASK          0x08
72 
73 #define IVMD_FLAG_EXCL_RANGE            0x08
74 #define IVMD_FLAG_IW                    0x04
75 #define IVMD_FLAG_IR                    0x02
76 #define IVMD_FLAG_UNITY_MAP             0x01
77 
78 #define ACPI_DEVFLAG_INITPASS           0x01
79 #define ACPI_DEVFLAG_EXTINT             0x02
80 #define ACPI_DEVFLAG_NMI                0x04
81 #define ACPI_DEVFLAG_SYSMGT1            0x10
82 #define ACPI_DEVFLAG_SYSMGT2            0x20
83 #define ACPI_DEVFLAG_LINT0              0x40
84 #define ACPI_DEVFLAG_LINT1              0x80
85 #define ACPI_DEVFLAG_ATSDIS             0x10000000
86 
87 #define IVRS_GET_SBDF_ID(seg, bus, dev, fn)	(((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \
88 						 | ((dev & 0x1f) << 3) | (fn & 0x7))
89 
90 /*
91  * ACPI table definitions
92  *
93  * These data structures are laid over the table to parse the important values
94  * out of it.
95  */
96 
97 /*
98  * structure describing one IOMMU in the ACPI table. Typically followed by one
99  * or more ivhd_entrys.
100  */
101 struct ivhd_header {
102 	u8 type;
103 	u8 flags;
104 	u16 length;
105 	u16 devid;
106 	u16 cap_ptr;
107 	u64 mmio_phys;
108 	u16 pci_seg;
109 	u16 info;
110 	u32 efr_attr;
111 
112 	/* Following only valid on IVHD type 11h and 40h */
113 	u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */
114 	u64 efr_reg2;
115 } __attribute__((packed));
116 
117 /*
118  * A device entry describing which devices a specific IOMMU translates and
119  * which requestor ids they use.
120  */
121 struct ivhd_entry {
122 	u8 type;
123 	u16 devid;
124 	u8 flags;
125 	struct_group(ext_hid,
126 		u32 ext;
127 		u32 hidh;
128 	);
129 	u64 cid;
130 	u8 uidf;
131 	u8 uidl;
132 	u8 uid;
133 } __attribute__((packed));
134 
135 int amd_iommu_evtlog_size = EVTLOG_SIZE_DEF;
136 int amd_iommu_pprlog_size = PPRLOG_SIZE_DEF;
137 
138 /*
139  * An AMD IOMMU memory definition structure. It defines things like exclusion
140  * ranges for devices and regions that should be unity mapped.
141  */
142 struct ivmd_header {
143 	u8 type;
144 	u8 flags;
145 	u16 length;
146 	u16 devid;
147 	u16 aux;
148 	u16 pci_seg;
149 	u8  resv[6];
150 	u64 range_start;
151 	u64 range_length;
152 } __attribute__((packed));
153 
154 bool amd_iommu_dump;
155 bool amd_iommu_irq_remap __read_mostly;
156 
157 enum protection_domain_mode amd_iommu_pgtable = PD_MODE_V1;
158 /* Host page table level */
159 u8 amd_iommu_hpt_level;
160 /* Guest page table level */
161 int amd_iommu_gpt_level = PAGE_MODE_4_LEVEL;
162 
163 int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
164 static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
165 
166 static bool amd_iommu_detected;
167 static bool amd_iommu_disabled __initdata;
168 static bool amd_iommu_force_enable __initdata;
169 static bool amd_iommu_irtcachedis;
170 static int amd_iommu_target_ivhd_type;
171 
172 /* Global EFR and EFR2 registers */
173 u64 amd_iommu_efr;
174 u64 amd_iommu_efr2;
175 
176 /* Host (v1) page table is not supported*/
177 bool amd_iommu_hatdis;
178 
179 /* SNP is enabled on the system? */
180 bool amd_iommu_snp_en;
181 EXPORT_SYMBOL(amd_iommu_snp_en);
182 
183 LIST_HEAD(amd_iommu_pci_seg_list);	/* list of all PCI segments */
184 LIST_HEAD(amd_iommu_list);		/* list of all AMD IOMMUs in the system */
185 LIST_HEAD(amd_ivhd_dev_flags_list);	/* list of all IVHD device entry settings */
186 
187 /* Number of IOMMUs present in the system */
188 static int amd_iommus_present;
189 
190 /* IOMMUs have a non-present cache? */
191 bool amd_iommu_np_cache __read_mostly;
192 bool amd_iommu_iotlb_sup __read_mostly = true;
193 
194 static bool amd_iommu_pc_present __read_mostly;
195 bool amdr_ivrs_remap_support __read_mostly;
196 
197 bool amd_iommu_force_isolation __read_mostly;
198 
199 unsigned long amd_iommu_pgsize_bitmap __ro_after_init = AMD_IOMMU_PGSIZES;
200 
201 enum iommu_init_state {
202 	IOMMU_START_STATE,
203 	IOMMU_IVRS_DETECTED,
204 	IOMMU_ACPI_FINISHED,
205 	IOMMU_ENABLED,
206 	IOMMU_PCI_INIT,
207 	IOMMU_INTERRUPTS_EN,
208 	IOMMU_INITIALIZED,
209 	IOMMU_NOT_FOUND,
210 	IOMMU_INIT_ERROR,
211 	IOMMU_CMDLINE_DISABLED,
212 };
213 
214 /* Early ioapic and hpet maps from kernel command line */
215 #define EARLY_MAP_SIZE		4
216 static struct devid_map __initdata early_ioapic_map[EARLY_MAP_SIZE];
217 static struct devid_map __initdata early_hpet_map[EARLY_MAP_SIZE];
218 static struct acpihid_map_entry __initdata early_acpihid_map[EARLY_MAP_SIZE];
219 
220 static int __initdata early_ioapic_map_size;
221 static int __initdata early_hpet_map_size;
222 static int __initdata early_acpihid_map_size;
223 
224 static bool __initdata cmdline_maps;
225 
226 static enum iommu_init_state init_state = IOMMU_START_STATE;
227 
228 static int amd_iommu_enable_interrupts(void);
229 static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg);
230 
231 static bool amd_iommu_pre_enabled = true;
232 
233 static u32 amd_iommu_ivinfo __initdata;
234 
translation_pre_enabled(struct amd_iommu * iommu)235 bool translation_pre_enabled(struct amd_iommu *iommu)
236 {
237 	return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
238 }
239 
clear_translation_pre_enabled(struct amd_iommu * iommu)240 static void clear_translation_pre_enabled(struct amd_iommu *iommu)
241 {
242 	iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
243 }
244 
init_translation_status(struct amd_iommu * iommu)245 static void init_translation_status(struct amd_iommu *iommu)
246 {
247 	u64 ctrl;
248 
249 	ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
250 	if (ctrl & (1<<CONTROL_IOMMU_EN))
251 		iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
252 }
253 
amd_iommu_get_num_iommus(void)254 int amd_iommu_get_num_iommus(void)
255 {
256 	return amd_iommus_present;
257 }
258 
amd_iommu_ht_range_ignore(void)259 bool amd_iommu_ht_range_ignore(void)
260 {
261 	return check_feature2(FEATURE_HT_RANGE_IGNORE);
262 }
263 
264 /*
265  * Iterate through all the IOMMUs to get common EFR
266  * masks among all IOMMUs and warn if found inconsistency.
267  */
get_global_efr(void)268 static __init void get_global_efr(void)
269 {
270 	struct amd_iommu *iommu;
271 
272 	for_each_iommu(iommu) {
273 		u64 tmp = iommu->features;
274 		u64 tmp2 = iommu->features2;
275 
276 		if (list_is_first(&iommu->list, &amd_iommu_list)) {
277 			amd_iommu_efr = tmp;
278 			amd_iommu_efr2 = tmp2;
279 			continue;
280 		}
281 
282 		if (amd_iommu_efr == tmp &&
283 		    amd_iommu_efr2 == tmp2)
284 			continue;
285 
286 		pr_err(FW_BUG
287 		       "Found inconsistent EFR/EFR2 %#llx,%#llx (global %#llx,%#llx) on iommu%d (%04x:%02x:%02x.%01x).\n",
288 		       tmp, tmp2, amd_iommu_efr, amd_iommu_efr2,
289 		       iommu->index, iommu->pci_seg->id,
290 		       PCI_BUS_NUM(iommu->devid), PCI_SLOT(iommu->devid),
291 		       PCI_FUNC(iommu->devid));
292 
293 		amd_iommu_efr &= tmp;
294 		amd_iommu_efr2 &= tmp2;
295 	}
296 
297 	pr_info("Using global IVHD EFR:%#llx, EFR2:%#llx\n", amd_iommu_efr, amd_iommu_efr2);
298 }
299 
300 /*
301  * For IVHD type 0x11/0x40, EFR is also available via IVHD.
302  * Default to IVHD EFR since it is available sooner
303  * (i.e. before PCI init).
304  */
early_iommu_features_init(struct amd_iommu * iommu,struct ivhd_header * h)305 static void __init early_iommu_features_init(struct amd_iommu *iommu,
306 					     struct ivhd_header *h)
307 {
308 	if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP) {
309 		iommu->features = h->efr_reg;
310 		iommu->features2 = h->efr_reg2;
311 	}
312 	if (amd_iommu_ivinfo & IOMMU_IVINFO_DMA_REMAP)
313 		amdr_ivrs_remap_support = true;
314 }
315 
316 /* Access to l1 and l2 indexed register spaces */
317 
iommu_read_l1(struct amd_iommu * iommu,u16 l1,u8 address)318 static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
319 {
320 	u32 val;
321 
322 	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
323 	pci_read_config_dword(iommu->dev, 0xfc, &val);
324 	return val;
325 }
326 
iommu_write_l1(struct amd_iommu * iommu,u16 l1,u8 address,u32 val)327 static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val)
328 {
329 	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31));
330 	pci_write_config_dword(iommu->dev, 0xfc, val);
331 	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
332 }
333 
iommu_read_l2(struct amd_iommu * iommu,u8 address)334 static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address)
335 {
336 	u32 val;
337 
338 	pci_write_config_dword(iommu->dev, 0xf0, address);
339 	pci_read_config_dword(iommu->dev, 0xf4, &val);
340 	return val;
341 }
342 
iommu_write_l2(struct amd_iommu * iommu,u8 address,u32 val)343 static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val)
344 {
345 	pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8));
346 	pci_write_config_dword(iommu->dev, 0xf4, val);
347 }
348 
349 /****************************************************************************
350  *
351  * AMD IOMMU MMIO register space handling functions
352  *
353  * These functions are used to program the IOMMU device registers in
354  * MMIO space required for that driver.
355  *
356  ****************************************************************************/
357 
358 /*
359  * This function set the exclusion range in the IOMMU. DMA accesses to the
360  * exclusion range are passed through untranslated
361  */
iommu_set_exclusion_range(struct amd_iommu * iommu)362 static void iommu_set_exclusion_range(struct amd_iommu *iommu)
363 {
364 	u64 start = iommu->exclusion_start & PAGE_MASK;
365 	u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK;
366 	u64 entry;
367 
368 	if (!iommu->exclusion_start)
369 		return;
370 
371 	entry = start | MMIO_EXCL_ENABLE_MASK;
372 	memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
373 			&entry, sizeof(entry));
374 
375 	entry = limit;
376 	memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
377 			&entry, sizeof(entry));
378 }
379 
iommu_set_cwwb_range(struct amd_iommu * iommu)380 static void iommu_set_cwwb_range(struct amd_iommu *iommu)
381 {
382 	u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem);
383 	u64 entry = start & PM_ADDR_MASK;
384 
385 	if (!check_feature(FEATURE_SNP))
386 		return;
387 
388 	/* Note:
389 	 * Re-purpose Exclusion base/limit registers for Completion wait
390 	 * write-back base/limit.
391 	 */
392 	memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
393 		    &entry, sizeof(entry));
394 
395 	/* Note:
396 	 * Default to 4 Kbytes, which can be specified by setting base
397 	 * address equal to the limit address.
398 	 */
399 	memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
400 		    &entry, sizeof(entry));
401 }
402 
403 /* Programs the physical address of the device table into the IOMMU hardware */
iommu_set_device_table(struct amd_iommu * iommu)404 static void iommu_set_device_table(struct amd_iommu *iommu)
405 {
406 	u64 entry;
407 	u32 dev_table_size = iommu->pci_seg->dev_table_size;
408 	void *dev_table = (void *)get_dev_table(iommu);
409 
410 	BUG_ON(iommu->mmio_base == NULL);
411 
412 	if (is_kdump_kernel())
413 		return;
414 
415 	entry = iommu_virt_to_phys(dev_table);
416 	entry |= (dev_table_size >> 12) - 1;
417 	memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
418 			&entry, sizeof(entry));
419 }
420 
iommu_feature_set(struct amd_iommu * iommu,u64 val,u64 mask,u8 shift)421 static void iommu_feature_set(struct amd_iommu *iommu, u64 val, u64 mask, u8 shift)
422 {
423 	u64 ctrl;
424 
425 	ctrl = readq(iommu->mmio_base +  MMIO_CONTROL_OFFSET);
426 	mask <<= shift;
427 	ctrl &= ~mask;
428 	ctrl |= (val << shift) & mask;
429 	writeq(ctrl, iommu->mmio_base +  MMIO_CONTROL_OFFSET);
430 }
431 
432 /* Generic functions to enable/disable certain features of the IOMMU. */
iommu_feature_enable(struct amd_iommu * iommu,u8 bit)433 void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
434 {
435 	iommu_feature_set(iommu, 1ULL, 1ULL, bit);
436 }
437 
iommu_feature_disable(struct amd_iommu * iommu,u8 bit)438 static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
439 {
440 	iommu_feature_set(iommu, 0ULL, 1ULL, bit);
441 }
442 
443 /* Function to enable the hardware */
iommu_enable(struct amd_iommu * iommu)444 static void iommu_enable(struct amd_iommu *iommu)
445 {
446 	iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
447 }
448 
iommu_disable(struct amd_iommu * iommu)449 static void iommu_disable(struct amd_iommu *iommu)
450 {
451 	if (!iommu->mmio_base)
452 		return;
453 
454 	/* Disable command buffer */
455 	iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
456 
457 	/* Disable event logging and event interrupts */
458 	iommu_feature_disable(iommu, CONTROL_EVT_INT_EN);
459 	iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
460 
461 	/* Disable IOMMU GA_LOG */
462 	iommu_feature_disable(iommu, CONTROL_GALOG_EN);
463 	iommu_feature_disable(iommu, CONTROL_GAINT_EN);
464 
465 	/* Disable IOMMU PPR logging */
466 	iommu_feature_disable(iommu, CONTROL_PPRLOG_EN);
467 	iommu_feature_disable(iommu, CONTROL_PPRINT_EN);
468 
469 	/* Disable IOMMU hardware itself */
470 	iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
471 
472 	/* Clear IRTE cache disabling bit */
473 	iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS);
474 }
475 
476 /*
477  * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
478  * the system has one.
479  */
iommu_map_mmio_space(u64 address,u64 end)480 static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end)
481 {
482 	if (!request_mem_region(address, end, "amd_iommu")) {
483 		pr_err("Can not reserve memory region %llx-%llx for mmio\n",
484 			address, end);
485 		pr_err("This is a BIOS bug. Please contact your hardware vendor\n");
486 		return NULL;
487 	}
488 
489 	return (u8 __iomem *)ioremap(address, end);
490 }
491 
iommu_unmap_mmio_space(struct amd_iommu * iommu)492 static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
493 {
494 	if (iommu->mmio_base)
495 		iounmap(iommu->mmio_base);
496 	release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end);
497 }
498 
get_ivhd_header_size(struct ivhd_header * h)499 static inline u32 get_ivhd_header_size(struct ivhd_header *h)
500 {
501 	u32 size = 0;
502 
503 	switch (h->type) {
504 	case 0x10:
505 		size = 24;
506 		break;
507 	case 0x11:
508 	case 0x40:
509 		size = 40;
510 		break;
511 	}
512 	return size;
513 }
514 
515 /****************************************************************************
516  *
517  * The functions below belong to the first pass of AMD IOMMU ACPI table
518  * parsing. In this pass we try to find out the highest device id this
519  * code has to handle. Upon this information the size of the shared data
520  * structures is determined later.
521  *
522  ****************************************************************************/
523 
524 /*
525  * This function calculates the length of a given IVHD entry
526  */
ivhd_entry_length(u8 * ivhd)527 static inline int ivhd_entry_length(u8 *ivhd)
528 {
529 	u32 type = ((struct ivhd_entry *)ivhd)->type;
530 
531 	if (type < 0x80) {
532 		return 0x04 << (*ivhd >> 6);
533 	} else if (type == IVHD_DEV_ACPI_HID) {
534 		/* For ACPI_HID, offset 21 is uid len */
535 		return *((u8 *)ivhd + 21) + 22;
536 	}
537 	return 0;
538 }
539 
540 /*
541  * After reading the highest device id from the IOMMU PCI capability header
542  * this function looks if there is a higher device id defined in the ACPI table
543  */
find_last_devid_from_ivhd(struct ivhd_header * h)544 static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
545 {
546 	u8 *p = (void *)h, *end = (void *)h;
547 	struct ivhd_entry *dev;
548 	int last_devid = -EINVAL;
549 
550 	u32 ivhd_size = get_ivhd_header_size(h);
551 
552 	if (!ivhd_size) {
553 		pr_err("Unsupported IVHD type %#x\n", h->type);
554 		return -EINVAL;
555 	}
556 
557 	p += ivhd_size;
558 	end += h->length;
559 
560 	while (p < end) {
561 		dev = (struct ivhd_entry *)p;
562 		switch (dev->type) {
563 		case IVHD_DEV_ALL:
564 			/* Use maximum BDF value for DEV_ALL */
565 			return 0xffff;
566 		case IVHD_DEV_SELECT:
567 		case IVHD_DEV_RANGE_END:
568 		case IVHD_DEV_ALIAS:
569 		case IVHD_DEV_EXT_SELECT:
570 			/* all the above subfield types refer to device ids */
571 			if (dev->devid > last_devid)
572 				last_devid = dev->devid;
573 			break;
574 		default:
575 			break;
576 		}
577 		p += ivhd_entry_length(p);
578 	}
579 
580 	WARN_ON(p != end);
581 
582 	return last_devid;
583 }
584 
check_ivrs_checksum(struct acpi_table_header * table)585 static int __init check_ivrs_checksum(struct acpi_table_header *table)
586 {
587 	int i;
588 	u8 checksum = 0, *p = (u8 *)table;
589 
590 	for (i = 0; i < table->length; ++i)
591 		checksum += p[i];
592 	if (checksum != 0) {
593 		/* ACPI table corrupt */
594 		pr_err(FW_BUG "IVRS invalid checksum\n");
595 		return -ENODEV;
596 	}
597 
598 	return 0;
599 }
600 
601 /*
602  * Iterate over all IVHD entries in the ACPI table and find the highest device
603  * id which we need to handle. This is the first of three functions which parse
604  * the ACPI table. So we check the checksum here.
605  */
find_last_devid_acpi(struct acpi_table_header * table,u16 pci_seg)606 static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_seg)
607 {
608 	u8 *p = (u8 *)table, *end = (u8 *)table;
609 	struct ivhd_header *h;
610 	int last_devid, last_bdf = 0;
611 
612 	p += IVRS_HEADER_LENGTH;
613 
614 	end += table->length;
615 	while (p < end) {
616 		h = (struct ivhd_header *)p;
617 		if (h->pci_seg == pci_seg &&
618 		    h->type == amd_iommu_target_ivhd_type) {
619 			last_devid = find_last_devid_from_ivhd(h);
620 
621 			if (last_devid < 0)
622 				return -EINVAL;
623 			if (last_devid > last_bdf)
624 				last_bdf = last_devid;
625 		}
626 		p += h->length;
627 	}
628 	WARN_ON(p != end);
629 
630 	return last_bdf;
631 }
632 
633 /****************************************************************************
634  *
635  * The following functions belong to the code path which parses the ACPI table
636  * the second time. In this ACPI parsing iteration we allocate IOMMU specific
637  * data structures, initialize the per PCI segment device/alias/rlookup table
638  * and also basically initialize the hardware.
639  *
640  ****************************************************************************/
641 
642 /* Allocate per PCI segment device table */
alloc_dev_table(struct amd_iommu_pci_seg * pci_seg)643 static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg)
644 {
645 	pci_seg->dev_table = iommu_alloc_pages_sz(GFP_KERNEL | GFP_DMA32,
646 						  pci_seg->dev_table_size);
647 	if (!pci_seg->dev_table)
648 		return -ENOMEM;
649 
650 	return 0;
651 }
652 
free_dev_table(struct amd_iommu_pci_seg * pci_seg)653 static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg)
654 {
655 	if (is_kdump_kernel())
656 		memunmap((void *)pci_seg->dev_table);
657 	else
658 		iommu_free_pages(pci_seg->dev_table);
659 	pci_seg->dev_table = NULL;
660 }
661 
662 /* Allocate per PCI segment IOMMU rlookup table. */
alloc_rlookup_table(struct amd_iommu_pci_seg * pci_seg)663 static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
664 {
665 	pci_seg->rlookup_table = kvzalloc_objs(*pci_seg->rlookup_table,
666 					       pci_seg->last_bdf + 1);
667 	if (pci_seg->rlookup_table == NULL)
668 		return -ENOMEM;
669 
670 	return 0;
671 }
672 
free_rlookup_table(struct amd_iommu_pci_seg * pci_seg)673 static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
674 {
675 	kvfree(pci_seg->rlookup_table);
676 	pci_seg->rlookup_table = NULL;
677 }
678 
alloc_irq_lookup_table(struct amd_iommu_pci_seg * pci_seg)679 static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
680 {
681 	pci_seg->irq_lookup_table = kvzalloc_objs(*pci_seg->irq_lookup_table,
682 						  pci_seg->last_bdf + 1);
683 	if (pci_seg->irq_lookup_table == NULL)
684 		return -ENOMEM;
685 
686 	return 0;
687 }
688 
free_irq_lookup_table(struct amd_iommu_pci_seg * pci_seg)689 static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
690 {
691 	kvfree(pci_seg->irq_lookup_table);
692 	pci_seg->irq_lookup_table = NULL;
693 }
694 
alloc_alias_table(struct amd_iommu_pci_seg * pci_seg)695 static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg)
696 {
697 	int i;
698 
699 	pci_seg->alias_table = kvmalloc_objs(*pci_seg->alias_table,
700 					     pci_seg->last_bdf + 1);
701 	if (!pci_seg->alias_table)
702 		return -ENOMEM;
703 
704 	/*
705 	 * let all alias entries point to itself
706 	 */
707 	for (i = 0; i <= pci_seg->last_bdf; ++i)
708 		pci_seg->alias_table[i] = i;
709 
710 	return 0;
711 }
712 
free_alias_table(struct amd_iommu_pci_seg * pci_seg)713 static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg)
714 {
715 	kvfree(pci_seg->alias_table);
716 	pci_seg->alias_table = NULL;
717 }
718 
iommu_memremap(unsigned long paddr,size_t size)719 static inline void *iommu_memremap(unsigned long paddr, size_t size)
720 {
721 	phys_addr_t phys;
722 
723 	if (!paddr)
724 		return NULL;
725 
726 	/*
727 	 * Obtain true physical address in kdump kernel when SME is enabled.
728 	 * Currently, previous kernel with SME enabled and kdump kernel
729 	 * with SME support disabled is not supported.
730 	 */
731 	phys = __sme_clr(paddr);
732 
733 	if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
734 		return (__force void *)ioremap_encrypted(phys, size);
735 	else
736 		return memremap(phys, size, MEMREMAP_WB);
737 }
738 
739 /*
740  * Allocates the command buffer. This buffer is per AMD IOMMU. We can
741  * write commands to that buffer later and the IOMMU will execute them
742  * asynchronously
743  */
alloc_command_buffer(struct amd_iommu * iommu)744 static int __init alloc_command_buffer(struct amd_iommu *iommu)
745 {
746 	iommu->cmd_buf = iommu_alloc_pages_sz(GFP_KERNEL, CMD_BUFFER_SIZE);
747 
748 	return iommu->cmd_buf ? 0 : -ENOMEM;
749 }
750 
751 /*
752  * Interrupt handler has processed all pending events and adjusted head
753  * and tail pointer. Reset overflow mask and restart logging again.
754  */
amd_iommu_restart_log(struct amd_iommu * iommu,const char * evt_type,u8 cntrl_intr,u8 cntrl_log,u32 status_run_mask,u32 status_overflow_mask)755 void amd_iommu_restart_log(struct amd_iommu *iommu, const char *evt_type,
756 			   u8 cntrl_intr, u8 cntrl_log,
757 			   u32 status_run_mask, u32 status_overflow_mask)
758 {
759 	u32 status;
760 
761 	status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
762 	if (status & status_run_mask)
763 		return;
764 
765 	pr_info_ratelimited("IOMMU %s log restarting\n", evt_type);
766 
767 	iommu_feature_disable(iommu, cntrl_log);
768 	iommu_feature_disable(iommu, cntrl_intr);
769 
770 	writel(status_overflow_mask, iommu->mmio_base + MMIO_STATUS_OFFSET);
771 
772 	iommu_feature_enable(iommu, cntrl_intr);
773 	iommu_feature_enable(iommu, cntrl_log);
774 }
775 
776 /*
777  * This function restarts event logging in case the IOMMU experienced
778  * an event log buffer overflow.
779  */
amd_iommu_restart_event_logging(struct amd_iommu * iommu)780 void amd_iommu_restart_event_logging(struct amd_iommu *iommu)
781 {
782 	amd_iommu_restart_log(iommu, "Event", CONTROL_EVT_INT_EN,
783 			      CONTROL_EVT_LOG_EN, MMIO_STATUS_EVT_RUN_MASK,
784 			      MMIO_STATUS_EVT_OVERFLOW_MASK);
785 }
786 
787 /*
788  * This function restarts event logging in case the IOMMU experienced
789  * GA log overflow.
790  */
amd_iommu_restart_ga_log(struct amd_iommu * iommu)791 void amd_iommu_restart_ga_log(struct amd_iommu *iommu)
792 {
793 	amd_iommu_restart_log(iommu, "GA", CONTROL_GAINT_EN,
794 			      CONTROL_GALOG_EN, MMIO_STATUS_GALOG_RUN_MASK,
795 			      MMIO_STATUS_GALOG_OVERFLOW_MASK);
796 }
797 
798 /*
799  * This function resets the command buffer if the IOMMU stopped fetching
800  * commands from it.
801  */
amd_iommu_reset_cmd_buffer(struct amd_iommu * iommu)802 static void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
803 {
804 	iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
805 
806 	writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
807 	writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
808 	iommu->cmd_buf_head = 0;
809 	iommu->cmd_buf_tail = 0;
810 
811 	iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
812 }
813 
814 /*
815  * This function writes the command buffer address to the hardware and
816  * enables it.
817  */
iommu_enable_command_buffer(struct amd_iommu * iommu)818 static void iommu_enable_command_buffer(struct amd_iommu *iommu)
819 {
820 	u64 entry;
821 
822 	BUG_ON(iommu->cmd_buf == NULL);
823 
824 	if (!is_kdump_kernel()) {
825 		/*
826 		 * Command buffer is re-used for kdump kernel and setting
827 		 * of MMIO register is not required.
828 		 */
829 		entry = iommu_virt_to_phys(iommu->cmd_buf);
830 		entry |= MMIO_CMD_SIZE_512;
831 		memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
832 			    &entry, sizeof(entry));
833 	}
834 
835 	amd_iommu_reset_cmd_buffer(iommu);
836 }
837 
838 /*
839  * This function disables the command buffer
840  */
iommu_disable_command_buffer(struct amd_iommu * iommu)841 static void iommu_disable_command_buffer(struct amd_iommu *iommu)
842 {
843 	iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
844 }
845 
free_command_buffer(struct amd_iommu * iommu)846 static void __init free_command_buffer(struct amd_iommu *iommu)
847 {
848 	iommu_free_pages(iommu->cmd_buf);
849 }
850 
iommu_alloc_4k_pages(struct amd_iommu * iommu,gfp_t gfp,size_t size)851 void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, gfp_t gfp,
852 				  size_t size)
853 {
854 	int nid = iommu->dev ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE;
855 	void *buf;
856 
857 	size = PAGE_ALIGN(size);
858 	buf = iommu_alloc_pages_node_sz(nid, gfp, size);
859 	if (!buf)
860 		return NULL;
861 	if (check_feature(FEATURE_SNP) &&
862 	    set_memory_4k((unsigned long)buf, size / PAGE_SIZE)) {
863 		iommu_free_pages(buf);
864 		return NULL;
865 	}
866 
867 	return buf;
868 }
869 
870 /* allocates the memory where the IOMMU will log its events to */
alloc_event_buffer(void)871 static int __init alloc_event_buffer(void)
872 {
873 	struct amd_iommu *iommu;
874 
875 	for_each_iommu(iommu) {
876 		iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL,
877 						      amd_iommu_evtlog_size);
878 		if (!iommu->evt_buf)
879 			return -ENOMEM;
880 	}
881 
882 	return 0;
883 }
884 
iommu_enable_event_buffer(void)885 static void iommu_enable_event_buffer(void)
886 {
887 	struct amd_iommu *iommu;
888 	u64 entry;
889 
890 	for_each_iommu(iommu) {
891 		BUG_ON(iommu->evt_buf == NULL);
892 
893 		if (!is_kdump_kernel()) {
894 			/*
895 			 * Event buffer is re-used for kdump kernel and setting
896 			 * of MMIO register is not required.
897 			 */
898 			entry = iommu_virt_to_phys(iommu->evt_buf);
899 			entry |= (amd_iommu_evtlog_size == EVTLOG_SIZE_DEF) ?
900 				EVTLOG_LEN_MASK_DEF : EVTLOG_LEN_MASK_MAX;
901 
902 			memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
903 				    &entry, sizeof(entry));
904 		}
905 
906 		/* set head and tail to zero manually */
907 		writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
908 		writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
909 
910 		iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
911 	}
912 }
913 
914 /*
915  * This function disables the event log buffer
916  */
iommu_disable_event_buffer(struct amd_iommu * iommu)917 static void iommu_disable_event_buffer(struct amd_iommu *iommu)
918 {
919 	iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
920 }
921 
free_event_buffer(struct amd_iommu * iommu)922 static void __init free_event_buffer(struct amd_iommu *iommu)
923 {
924 	iommu_free_pages(iommu->evt_buf);
925 }
926 
free_ga_log(struct amd_iommu * iommu)927 static void free_ga_log(struct amd_iommu *iommu)
928 {
929 #ifdef CONFIG_IRQ_REMAP
930 	iommu_free_pages(iommu->ga_log);
931 	iommu_free_pages(iommu->ga_log_tail);
932 #endif
933 }
934 
935 #ifdef CONFIG_IRQ_REMAP
iommu_ga_log_enable(struct amd_iommu * iommu)936 static int iommu_ga_log_enable(struct amd_iommu *iommu)
937 {
938 	u32 status, i;
939 	u64 entry;
940 
941 	if (!iommu->ga_log)
942 		return -EINVAL;
943 
944 	entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512;
945 	memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET,
946 		    &entry, sizeof(entry));
947 	entry = (iommu_virt_to_phys(iommu->ga_log_tail) &
948 		 (BIT_ULL(52)-1)) & ~7ULL;
949 	memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET,
950 		    &entry, sizeof(entry));
951 	writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
952 	writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET);
953 
954 
955 	iommu_feature_enable(iommu, CONTROL_GAINT_EN);
956 	iommu_feature_enable(iommu, CONTROL_GALOG_EN);
957 
958 	for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) {
959 		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
960 		if (status & (MMIO_STATUS_GALOG_RUN_MASK))
961 			break;
962 		udelay(10);
963 	}
964 
965 	if (WARN_ON(i >= MMIO_STATUS_TIMEOUT))
966 		return -EINVAL;
967 
968 	return 0;
969 }
970 
iommu_init_ga_log(struct amd_iommu * iommu)971 static int iommu_init_ga_log(struct amd_iommu *iommu)
972 {
973 	int nid = iommu->dev ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE;
974 
975 	if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
976 		return 0;
977 
978 	iommu->ga_log = iommu_alloc_pages_node_sz(nid, GFP_KERNEL, GA_LOG_SIZE);
979 	if (!iommu->ga_log)
980 		goto err_out;
981 
982 	iommu->ga_log_tail = iommu_alloc_pages_node_sz(nid, GFP_KERNEL, 8);
983 	if (!iommu->ga_log_tail)
984 		goto err_out;
985 
986 	return 0;
987 err_out:
988 	free_ga_log(iommu);
989 	return -EINVAL;
990 }
991 #endif /* CONFIG_IRQ_REMAP */
992 
alloc_cwwb_sem(struct amd_iommu * iommu)993 static int __init alloc_cwwb_sem(struct amd_iommu *iommu)
994 {
995 	iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL, 1);
996 	if (!iommu->cmd_sem)
997 		return -ENOMEM;
998 	iommu->cmd_sem_paddr = iommu_virt_to_phys((void *)iommu->cmd_sem);
999 	return 0;
1000 }
1001 
remap_event_buffer(void)1002 static int __init remap_event_buffer(void)
1003 {
1004 	struct amd_iommu *iommu;
1005 	u64 paddr;
1006 
1007 	pr_info_once("Re-using event buffer from the previous kernel\n");
1008 	for_each_iommu(iommu) {
1009 		paddr = readq(iommu->mmio_base + MMIO_EVT_BUF_OFFSET) & PM_ADDR_MASK;
1010 		iommu->evt_buf = iommu_memremap(paddr, amd_iommu_evtlog_size);
1011 		if (!iommu->evt_buf)
1012 			return -ENOMEM;
1013 	}
1014 
1015 	return 0;
1016 }
1017 
remap_command_buffer(struct amd_iommu * iommu)1018 static int __init remap_command_buffer(struct amd_iommu *iommu)
1019 {
1020 	u64 paddr;
1021 
1022 	pr_info_once("Re-using command buffer from the previous kernel\n");
1023 	paddr = readq(iommu->mmio_base + MMIO_CMD_BUF_OFFSET) & PM_ADDR_MASK;
1024 	iommu->cmd_buf = iommu_memremap(paddr, CMD_BUFFER_SIZE);
1025 
1026 	return iommu->cmd_buf ? 0 : -ENOMEM;
1027 }
1028 
remap_or_alloc_cwwb_sem(struct amd_iommu * iommu)1029 static int __init remap_or_alloc_cwwb_sem(struct amd_iommu *iommu)
1030 {
1031 	u64 paddr;
1032 
1033 	if (check_feature(FEATURE_SNP)) {
1034 		/*
1035 		 * When SNP is enabled, the exclusion base register is used for the
1036 		 * completion wait buffer (CWB) address. Read and re-use it.
1037 		 */
1038 		pr_info_once("Re-using CWB buffers from the previous kernel\n");
1039 		paddr = readq(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET) & PM_ADDR_MASK;
1040 		iommu->cmd_sem = iommu_memremap(paddr, PAGE_SIZE);
1041 		if (!iommu->cmd_sem)
1042 			return -ENOMEM;
1043 		iommu->cmd_sem_paddr = paddr;
1044 	} else {
1045 		return alloc_cwwb_sem(iommu);
1046 	}
1047 
1048 	return 0;
1049 }
1050 
alloc_iommu_buffers(struct amd_iommu * iommu)1051 static int __init alloc_iommu_buffers(struct amd_iommu *iommu)
1052 {
1053 	int ret;
1054 
1055 	/*
1056 	 * Reuse/Remap the previous kernel's allocated completion wait
1057 	 * command and event buffers for kdump boot.
1058 	 */
1059 	if (is_kdump_kernel()) {
1060 		ret = remap_or_alloc_cwwb_sem(iommu);
1061 		if (ret)
1062 			return ret;
1063 
1064 		ret = remap_command_buffer(iommu);
1065 		if (ret)
1066 			return ret;
1067 	} else {
1068 		ret = alloc_cwwb_sem(iommu);
1069 		if (ret)
1070 			return ret;
1071 
1072 		ret = alloc_command_buffer(iommu);
1073 		if (ret)
1074 			return ret;
1075 	}
1076 
1077 	return 0;
1078 }
1079 
free_cwwb_sem(struct amd_iommu * iommu)1080 static void __init free_cwwb_sem(struct amd_iommu *iommu)
1081 {
1082 	if (iommu->cmd_sem)
1083 		iommu_free_pages((void *)iommu->cmd_sem);
1084 }
unmap_cwwb_sem(struct amd_iommu * iommu)1085 static void __init unmap_cwwb_sem(struct amd_iommu *iommu)
1086 {
1087 	if (iommu->cmd_sem) {
1088 		if (check_feature(FEATURE_SNP))
1089 			memunmap((void *)iommu->cmd_sem);
1090 		else
1091 			iommu_free_pages((void *)iommu->cmd_sem);
1092 	}
1093 }
1094 
unmap_command_buffer(struct amd_iommu * iommu)1095 static void __init unmap_command_buffer(struct amd_iommu *iommu)
1096 {
1097 	memunmap((void *)iommu->cmd_buf);
1098 }
1099 
unmap_event_buffer(struct amd_iommu * iommu)1100 static void __init unmap_event_buffer(struct amd_iommu *iommu)
1101 {
1102 	memunmap(iommu->evt_buf);
1103 }
1104 
free_iommu_buffers(struct amd_iommu * iommu)1105 static void __init free_iommu_buffers(struct amd_iommu *iommu)
1106 {
1107 	if (is_kdump_kernel()) {
1108 		unmap_cwwb_sem(iommu);
1109 		unmap_command_buffer(iommu);
1110 		unmap_event_buffer(iommu);
1111 	} else {
1112 		free_cwwb_sem(iommu);
1113 		free_command_buffer(iommu);
1114 		free_event_buffer(iommu);
1115 	}
1116 }
1117 
iommu_enable_xt(struct amd_iommu * iommu)1118 static void iommu_enable_xt(struct amd_iommu *iommu)
1119 {
1120 #ifdef CONFIG_IRQ_REMAP
1121 	/*
1122 	 * XT mode (32-bit APIC destination ID) requires
1123 	 * GA mode (128-bit IRTE support) as a prerequisite.
1124 	 */
1125 	if (AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir) &&
1126 	    amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
1127 		iommu_feature_enable(iommu, CONTROL_XT_EN);
1128 #endif /* CONFIG_IRQ_REMAP */
1129 }
1130 
iommu_enable_gt(struct amd_iommu * iommu)1131 static void iommu_enable_gt(struct amd_iommu *iommu)
1132 {
1133 	if (!check_feature(FEATURE_GT))
1134 		return;
1135 
1136 	iommu_feature_enable(iommu, CONTROL_GT_EN);
1137 
1138 	/*
1139 	 * This feature needs to be enabled prior to a call
1140 	 * to iommu_snp_enable(). Since this function is called
1141 	 * in early_enable_iommu(), it is safe to enable here.
1142 	 */
1143 	if (check_feature2(FEATURE_GCR3TRPMODE))
1144 		iommu_feature_enable(iommu, CONTROL_GCR3TRPMODE);
1145 }
1146 
1147 /* sets a specific bit in the device table entry. */
set_dte_bit(struct dev_table_entry * dte,u8 bit)1148 static void set_dte_bit(struct dev_table_entry *dte, u8 bit)
1149 {
1150 	int i = (bit >> 6) & 0x03;
1151 	int _bit = bit & 0x3f;
1152 
1153 	dte->data[i] |= (1UL << _bit);
1154 }
1155 
__reuse_device_table(struct amd_iommu * iommu)1156 static bool __reuse_device_table(struct amd_iommu *iommu)
1157 {
1158 	struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
1159 	struct dev_table_entry *old_dev_tbl_entry;
1160 	u32 lo, hi, old_devtb_size, devid;
1161 	phys_addr_t old_devtb_phys;
1162 	u16 dom_id;
1163 	bool dte_v;
1164 	u64 entry;
1165 
1166 	/* Each IOMMU use separate device table with the same size */
1167 	lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
1168 	hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
1169 	entry = (((u64) hi) << 32) + lo;
1170 
1171 	old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12;
1172 	if (old_devtb_size != pci_seg->dev_table_size) {
1173 		pr_err("The device table size of IOMMU:%d is not expected!\n",
1174 			iommu->index);
1175 		return false;
1176 	}
1177 
1178 	/*
1179 	 * When SME is enabled in the first kernel, the entry includes the
1180 	 * memory encryption mask(sme_me_mask), we must remove the memory
1181 	 * encryption mask to obtain the true physical address in kdump kernel.
1182 	 */
1183 	old_devtb_phys = __sme_clr(entry) & PAGE_MASK;
1184 
1185 	if (old_devtb_phys >= 0x100000000ULL) {
1186 		pr_err("The address of old device table is above 4G, not trustworthy!\n");
1187 		return false;
1188 	}
1189 
1190 	/*
1191 	 * Re-use the previous kernel's device table for kdump.
1192 	 */
1193 	pci_seg->old_dev_tbl_cpy = iommu_memremap(old_devtb_phys, pci_seg->dev_table_size);
1194 	if (pci_seg->old_dev_tbl_cpy == NULL) {
1195 		pr_err("Failed to remap memory for reusing old device table!\n");
1196 		return false;
1197 	}
1198 
1199 	for (devid = 0; devid <= pci_seg->last_bdf; devid++) {
1200 		old_dev_tbl_entry = &pci_seg->old_dev_tbl_cpy[devid];
1201 		dte_v = FIELD_GET(DTE_FLAG_V, old_dev_tbl_entry->data[0]);
1202 		dom_id = FIELD_GET(DTE_DOMID_MASK, old_dev_tbl_entry->data[1]);
1203 
1204 		if (!dte_v || !dom_id)
1205 			continue;
1206 		/*
1207 		 * ID reservation can fail with -ENOSPC when there
1208 		 * are multiple devices present in the same domain,
1209 		 * hence check only for -ENOMEM.
1210 		 */
1211 		if (amd_iommu_pdom_id_reserve(dom_id, GFP_KERNEL) == -ENOMEM)
1212 			return false;
1213 	}
1214 
1215 	return true;
1216 }
1217 
reuse_device_table(void)1218 static bool reuse_device_table(void)
1219 {
1220 	struct amd_iommu *iommu;
1221 	struct amd_iommu_pci_seg *pci_seg;
1222 
1223 	if (!amd_iommu_pre_enabled)
1224 		return false;
1225 
1226 	pr_warn("Translation is already enabled - trying to reuse translation structures\n");
1227 
1228 	/*
1229 	 * All IOMMUs within PCI segment shares common device table.
1230 	 * Hence reuse device table only once per PCI segment.
1231 	 */
1232 	for_each_pci_segment(pci_seg) {
1233 		for_each_iommu(iommu) {
1234 			if (pci_seg->id != iommu->pci_seg->id)
1235 				continue;
1236 			if (!__reuse_device_table(iommu))
1237 				return false;
1238 			break;
1239 		}
1240 	}
1241 
1242 	return true;
1243 }
1244 
amd_iommu_get_ivhd_dte_flags(u16 segid,u16 devid)1245 struct dev_table_entry *amd_iommu_get_ivhd_dte_flags(u16 segid, u16 devid)
1246 {
1247 	struct ivhd_dte_flags *e;
1248 	unsigned int best_len = UINT_MAX;
1249 	struct dev_table_entry *dte = NULL;
1250 
1251 	for_each_ivhd_dte_flags(e) {
1252 		/*
1253 		 * Need to go through the whole list to find the smallest range,
1254 		 * which contains the devid.
1255 		 */
1256 		if ((e->segid == segid) &&
1257 		    (e->devid_first <= devid) && (devid <= e->devid_last)) {
1258 			unsigned int len = e->devid_last - e->devid_first;
1259 
1260 			if (len < best_len) {
1261 				dte = &(e->dte);
1262 				best_len = len;
1263 			}
1264 		}
1265 	}
1266 	return dte;
1267 }
1268 
search_ivhd_dte_flags(u16 segid,u16 first,u16 last)1269 static bool search_ivhd_dte_flags(u16 segid, u16 first, u16 last)
1270 {
1271 	struct ivhd_dte_flags *e;
1272 
1273 	for_each_ivhd_dte_flags(e) {
1274 		if ((e->segid == segid) &&
1275 		    (e->devid_first == first) &&
1276 		    (e->devid_last == last))
1277 			return true;
1278 	}
1279 	return false;
1280 }
1281 
1282 /*
1283  * This function takes the device specific flags read from the ACPI
1284  * table and sets up the device table entry with that information
1285  */
1286 static void __init
set_dev_entry_from_acpi_range(struct amd_iommu * iommu,u16 first,u16 last,u32 flags,u32 ext_flags)1287 set_dev_entry_from_acpi_range(struct amd_iommu *iommu, u16 first, u16 last,
1288 			      u32 flags, u32 ext_flags)
1289 {
1290 	int i;
1291 	struct dev_table_entry dte = {};
1292 
1293 	/* Parse IVHD DTE setting flags and store information */
1294 	if (flags) {
1295 		struct ivhd_dte_flags *d;
1296 
1297 		if (search_ivhd_dte_flags(iommu->pci_seg->id, first, last))
1298 			return;
1299 
1300 		d = kzalloc_obj(struct ivhd_dte_flags);
1301 		if (!d)
1302 			return;
1303 
1304 		pr_debug("%s: devid range %#x:%#x\n", __func__, first, last);
1305 
1306 		if (flags & ACPI_DEVFLAG_INITPASS)
1307 			set_dte_bit(&dte, DEV_ENTRY_INIT_PASS);
1308 		if (flags & ACPI_DEVFLAG_EXTINT)
1309 			set_dte_bit(&dte, DEV_ENTRY_EINT_PASS);
1310 		if (flags & ACPI_DEVFLAG_NMI)
1311 			set_dte_bit(&dte, DEV_ENTRY_NMI_PASS);
1312 		if (flags & ACPI_DEVFLAG_SYSMGT1)
1313 			set_dte_bit(&dte, DEV_ENTRY_SYSMGT1);
1314 		if (flags & ACPI_DEVFLAG_SYSMGT2)
1315 			set_dte_bit(&dte, DEV_ENTRY_SYSMGT2);
1316 		if (flags & ACPI_DEVFLAG_LINT0)
1317 			set_dte_bit(&dte, DEV_ENTRY_LINT0_PASS);
1318 		if (flags & ACPI_DEVFLAG_LINT1)
1319 			set_dte_bit(&dte, DEV_ENTRY_LINT1_PASS);
1320 
1321 		/* Apply erratum 63, which needs info in initial_dte */
1322 		if (FIELD_GET(DTE_DATA1_SYSMGT_MASK, dte.data[1]) == 0x1)
1323 			dte.data[0] |= DTE_FLAG_IW;
1324 
1325 		memcpy(&d->dte, &dte, sizeof(dte));
1326 		d->segid = iommu->pci_seg->id;
1327 		d->devid_first = first;
1328 		d->devid_last = last;
1329 		list_add_tail(&d->list, &amd_ivhd_dev_flags_list);
1330 	}
1331 
1332 	for (i = first; i <= last; i++)  {
1333 		if (flags) {
1334 			struct dev_table_entry *dev_table = get_dev_table(iommu);
1335 
1336 			memcpy(&dev_table[i], &dte, sizeof(dte));
1337 		}
1338 		amd_iommu_set_rlookup_table(iommu, i);
1339 	}
1340 }
1341 
set_dev_entry_from_acpi(struct amd_iommu * iommu,u16 devid,u32 flags,u32 ext_flags)1342 static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
1343 					   u16 devid, u32 flags, u32 ext_flags)
1344 {
1345 	set_dev_entry_from_acpi_range(iommu, devid, devid, flags, ext_flags);
1346 }
1347 
add_special_device(u8 type,u8 id,u32 * devid,bool cmd_line)1348 int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line)
1349 {
1350 	struct devid_map *entry;
1351 	struct list_head *list;
1352 
1353 	if (type == IVHD_SPECIAL_IOAPIC)
1354 		list = &ioapic_map;
1355 	else if (type == IVHD_SPECIAL_HPET)
1356 		list = &hpet_map;
1357 	else
1358 		return -EINVAL;
1359 
1360 	list_for_each_entry(entry, list, list) {
1361 		if (!(entry->id == id && entry->cmd_line))
1362 			continue;
1363 
1364 		pr_info("Command-line override present for %s id %d - ignoring\n",
1365 			type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id);
1366 
1367 		*devid = entry->devid;
1368 
1369 		return 0;
1370 	}
1371 
1372 	entry = kzalloc_obj(*entry);
1373 	if (!entry)
1374 		return -ENOMEM;
1375 
1376 	entry->id	= id;
1377 	entry->devid	= *devid;
1378 	entry->cmd_line	= cmd_line;
1379 
1380 	list_add_tail(&entry->list, list);
1381 
1382 	return 0;
1383 }
1384 
add_acpi_hid_device(u8 * hid,u8 * uid,u32 * devid,bool cmd_line)1385 static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u32 *devid,
1386 				      bool cmd_line)
1387 {
1388 	struct acpihid_map_entry *entry;
1389 	struct list_head *list = &acpihid_map;
1390 
1391 	list_for_each_entry(entry, list, list) {
1392 		if (strcmp(entry->hid, hid) ||
1393 		    (*uid && *entry->uid && strcmp(entry->uid, uid)) ||
1394 		    !entry->cmd_line)
1395 			continue;
1396 
1397 		pr_info("Command-line override for hid:%s uid:%s\n",
1398 			hid, uid);
1399 		*devid = entry->devid;
1400 		return 0;
1401 	}
1402 
1403 	entry = kzalloc_obj(*entry);
1404 	if (!entry)
1405 		return -ENOMEM;
1406 
1407 	memcpy(entry->uid, uid, strlen(uid));
1408 	memcpy(entry->hid, hid, strlen(hid));
1409 	entry->devid = *devid;
1410 	entry->cmd_line	= cmd_line;
1411 	entry->root_devid = (entry->devid & (~0x7));
1412 
1413 	pr_info("%s, add hid:%s, uid:%s, rdevid:%#x\n",
1414 		entry->cmd_line ? "cmd" : "ivrs",
1415 		entry->hid, entry->uid, entry->root_devid);
1416 
1417 	list_add_tail(&entry->list, list);
1418 	return 0;
1419 }
1420 
add_early_maps(void)1421 static int __init add_early_maps(void)
1422 {
1423 	int i, ret;
1424 
1425 	for (i = 0; i < early_ioapic_map_size; ++i) {
1426 		ret = add_special_device(IVHD_SPECIAL_IOAPIC,
1427 					 early_ioapic_map[i].id,
1428 					 &early_ioapic_map[i].devid,
1429 					 early_ioapic_map[i].cmd_line);
1430 		if (ret)
1431 			return ret;
1432 	}
1433 
1434 	for (i = 0; i < early_hpet_map_size; ++i) {
1435 		ret = add_special_device(IVHD_SPECIAL_HPET,
1436 					 early_hpet_map[i].id,
1437 					 &early_hpet_map[i].devid,
1438 					 early_hpet_map[i].cmd_line);
1439 		if (ret)
1440 			return ret;
1441 	}
1442 
1443 	for (i = 0; i < early_acpihid_map_size; ++i) {
1444 		ret = add_acpi_hid_device(early_acpihid_map[i].hid,
1445 					  early_acpihid_map[i].uid,
1446 					  &early_acpihid_map[i].devid,
1447 					  early_acpihid_map[i].cmd_line);
1448 		if (ret)
1449 			return ret;
1450 	}
1451 
1452 	return 0;
1453 }
1454 
1455 /*
1456  * Takes a pointer to an AMD IOMMU entry in the ACPI table and
1457  * initializes the hardware and our data structures with it.
1458  */
init_iommu_from_acpi(struct amd_iommu * iommu,struct ivhd_header * h)1459 static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
1460 					struct ivhd_header *h)
1461 {
1462 	u8 *p = (u8 *)h;
1463 	u8 *end = p, flags = 0;
1464 	u16 devid = 0, devid_start = 0, devid_to = 0, seg_id;
1465 	u32 dev_i, ext_flags = 0;
1466 	bool alias = false;
1467 	struct ivhd_entry *e;
1468 	struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
1469 	u32 ivhd_size;
1470 	int ret;
1471 
1472 
1473 	ret = add_early_maps();
1474 	if (ret)
1475 		return ret;
1476 
1477 	amd_iommu_apply_ivrs_quirks();
1478 
1479 	/*
1480 	 * First save the recommended feature enable bits from ACPI
1481 	 */
1482 	iommu->acpi_flags = h->flags;
1483 
1484 	/*
1485 	 * Done. Now parse the device entries
1486 	 */
1487 	ivhd_size = get_ivhd_header_size(h);
1488 	if (!ivhd_size) {
1489 		pr_err("Unsupported IVHD type %#x\n", h->type);
1490 		return -EINVAL;
1491 	}
1492 
1493 	p += ivhd_size;
1494 
1495 	end += h->length;
1496 
1497 
1498 	while (p < end) {
1499 		e = (struct ivhd_entry *)p;
1500 		seg_id = pci_seg->id;
1501 
1502 		switch (e->type) {
1503 		case IVHD_DEV_ALL:
1504 
1505 			DUMP_printk("  DEV_ALL\t\t\tsetting: %#02x\n", e->flags);
1506 			set_dev_entry_from_acpi_range(iommu, 0, pci_seg->last_bdf, e->flags, 0);
1507 			break;
1508 		case IVHD_DEV_SELECT:
1509 
1510 			DUMP_printk("  DEV_SELECT\t\t\tdevid: %04x:%02x:%02x.%x flags: %#02x\n",
1511 				    seg_id, PCI_BUS_NUM(e->devid),
1512 				    PCI_SLOT(e->devid),
1513 				    PCI_FUNC(e->devid),
1514 				    e->flags);
1515 
1516 			devid = e->devid;
1517 			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1518 			break;
1519 		case IVHD_DEV_SELECT_RANGE_START:
1520 
1521 			DUMP_printk("  DEV_SELECT_RANGE_START\tdevid: %04x:%02x:%02x.%x flags: %#02x\n",
1522 				    seg_id, PCI_BUS_NUM(e->devid),
1523 				    PCI_SLOT(e->devid),
1524 				    PCI_FUNC(e->devid),
1525 				    e->flags);
1526 
1527 			devid_start = e->devid;
1528 			flags = e->flags;
1529 			ext_flags = 0;
1530 			alias = false;
1531 			break;
1532 		case IVHD_DEV_ALIAS:
1533 
1534 			DUMP_printk("  DEV_ALIAS\t\t\tdevid: %04x:%02x:%02x.%x flags: %#02x devid_to: %02x:%02x.%x\n",
1535 				    seg_id, PCI_BUS_NUM(e->devid),
1536 				    PCI_SLOT(e->devid),
1537 				    PCI_FUNC(e->devid),
1538 				    e->flags,
1539 				    PCI_BUS_NUM(e->ext >> 8),
1540 				    PCI_SLOT(e->ext >> 8),
1541 				    PCI_FUNC(e->ext >> 8));
1542 
1543 			devid = e->devid;
1544 			devid_to = e->ext >> 8;
1545 			set_dev_entry_from_acpi(iommu, devid   , e->flags, 0);
1546 			set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
1547 			pci_seg->alias_table[devid] = devid_to;
1548 			break;
1549 		case IVHD_DEV_ALIAS_RANGE:
1550 
1551 			DUMP_printk("  DEV_ALIAS_RANGE\t\tdevid: %04x:%02x:%02x.%x flags: %#02x devid_to: %04x:%02x:%02x.%x\n",
1552 				    seg_id, PCI_BUS_NUM(e->devid),
1553 				    PCI_SLOT(e->devid),
1554 				    PCI_FUNC(e->devid),
1555 				    e->flags,
1556 				    seg_id, PCI_BUS_NUM(e->ext >> 8),
1557 				    PCI_SLOT(e->ext >> 8),
1558 				    PCI_FUNC(e->ext >> 8));
1559 
1560 			devid_start = e->devid;
1561 			flags = e->flags;
1562 			devid_to = e->ext >> 8;
1563 			ext_flags = 0;
1564 			alias = true;
1565 			break;
1566 		case IVHD_DEV_EXT_SELECT:
1567 
1568 			DUMP_printk("  DEV_EXT_SELECT\t\tdevid: %04x:%02x:%02x.%x flags: %#02x ext: %08x\n",
1569 				    seg_id, PCI_BUS_NUM(e->devid),
1570 				    PCI_SLOT(e->devid),
1571 				    PCI_FUNC(e->devid),
1572 				    e->flags, e->ext);
1573 
1574 			devid = e->devid;
1575 			set_dev_entry_from_acpi(iommu, devid, e->flags,
1576 						e->ext);
1577 			break;
1578 		case IVHD_DEV_EXT_SELECT_RANGE:
1579 
1580 			DUMP_printk("  DEV_EXT_SELECT_RANGE\tdevid: %04x:%02x:%02x.%x flags: %#02x ext: %08x\n",
1581 				    seg_id, PCI_BUS_NUM(e->devid),
1582 				    PCI_SLOT(e->devid),
1583 				    PCI_FUNC(e->devid),
1584 				    e->flags, e->ext);
1585 
1586 			devid_start = e->devid;
1587 			flags = e->flags;
1588 			ext_flags = e->ext;
1589 			alias = false;
1590 			break;
1591 		case IVHD_DEV_RANGE_END:
1592 
1593 			DUMP_printk("  DEV_RANGE_END\t\tdevid: %04x:%02x:%02x.%x\n",
1594 				    seg_id, PCI_BUS_NUM(e->devid),
1595 				    PCI_SLOT(e->devid),
1596 				    PCI_FUNC(e->devid));
1597 
1598 			devid = e->devid;
1599 			if (alias) {
1600 				for (dev_i = devid_start; dev_i <= devid; ++dev_i)
1601 					pci_seg->alias_table[dev_i] = devid_to;
1602 				set_dev_entry_from_acpi(iommu, devid_to, flags, ext_flags);
1603 			}
1604 			set_dev_entry_from_acpi_range(iommu, devid_start, devid, flags, ext_flags);
1605 			break;
1606 		case IVHD_DEV_SPECIAL: {
1607 			u8 handle, type;
1608 			const char *var;
1609 			u32 devid;
1610 			int ret;
1611 
1612 			handle = e->ext & 0xff;
1613 			devid = PCI_SEG_DEVID_TO_SBDF(seg_id, (e->ext >> 8));
1614 			type   = (e->ext >> 24) & 0xff;
1615 
1616 			if (type == IVHD_SPECIAL_IOAPIC)
1617 				var = "IOAPIC";
1618 			else if (type == IVHD_SPECIAL_HPET)
1619 				var = "HPET";
1620 			else
1621 				var = "UNKNOWN";
1622 
1623 			DUMP_printk("  DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x, flags: %#02x\n",
1624 				    var, (int)handle,
1625 				    seg_id, PCI_BUS_NUM(devid),
1626 				    PCI_SLOT(devid),
1627 				    PCI_FUNC(devid),
1628 				    e->flags);
1629 
1630 			ret = add_special_device(type, handle, &devid, false);
1631 			if (ret)
1632 				return ret;
1633 
1634 			/*
1635 			 * add_special_device might update the devid in case a
1636 			 * command-line override is present. So call
1637 			 * set_dev_entry_from_acpi after add_special_device.
1638 			 */
1639 			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1640 
1641 			break;
1642 		}
1643 		case IVHD_DEV_ACPI_HID: {
1644 			u32 devid;
1645 			u8 hid[ACPIHID_HID_LEN];
1646 			u8 uid[ACPIHID_UID_LEN];
1647 			int ret;
1648 
1649 			if (h->type != 0x40) {
1650 				pr_err(FW_BUG "Invalid IVHD device type %#x\n",
1651 				       e->type);
1652 				break;
1653 			}
1654 
1655 			BUILD_BUG_ON(sizeof(e->ext_hid) != ACPIHID_HID_LEN - 1);
1656 			memcpy(hid, &e->ext_hid, ACPIHID_HID_LEN - 1);
1657 			hid[ACPIHID_HID_LEN - 1] = '\0';
1658 
1659 			if (!(*hid)) {
1660 				pr_err(FW_BUG "Invalid HID.\n");
1661 				break;
1662 			}
1663 
1664 			uid[0] = '\0';
1665 			switch (e->uidf) {
1666 			case UID_NOT_PRESENT:
1667 
1668 				if (e->uidl != 0)
1669 					pr_warn(FW_BUG "Invalid UID length.\n");
1670 
1671 				break;
1672 			case UID_IS_INTEGER:
1673 
1674 				sprintf(uid, "%d", e->uid);
1675 
1676 				break;
1677 			case UID_IS_CHARACTER:
1678 
1679 				memcpy(uid, &e->uid, e->uidl);
1680 				uid[e->uidl] = '\0';
1681 
1682 				break;
1683 			default:
1684 				break;
1685 			}
1686 
1687 			devid = PCI_SEG_DEVID_TO_SBDF(seg_id, e->devid);
1688 			DUMP_printk("  DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x, flags: %#02x\n",
1689 				    hid, uid, seg_id,
1690 				    PCI_BUS_NUM(devid),
1691 				    PCI_SLOT(devid),
1692 				    PCI_FUNC(devid),
1693 				    e->flags);
1694 
1695 			flags = e->flags;
1696 
1697 			ret = add_acpi_hid_device(hid, uid, &devid, false);
1698 			if (ret)
1699 				return ret;
1700 
1701 			/*
1702 			 * add_special_device might update the devid in case a
1703 			 * command-line override is present. So call
1704 			 * set_dev_entry_from_acpi after add_special_device.
1705 			 */
1706 			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1707 
1708 			break;
1709 		}
1710 		default:
1711 			break;
1712 		}
1713 
1714 		p += ivhd_entry_length(p);
1715 	}
1716 
1717 	return 0;
1718 }
1719 
1720 /* Allocate PCI segment data structure */
alloc_pci_segment(u16 id,struct acpi_table_header * ivrs_base)1721 static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id,
1722 					  struct acpi_table_header *ivrs_base)
1723 {
1724 	struct amd_iommu_pci_seg *pci_seg;
1725 	int last_bdf;
1726 
1727 	/*
1728 	 * First parse ACPI tables to find the largest Bus/Dev/Func we need to
1729 	 * handle in this PCI segment. Upon this information the shared data
1730 	 * structures for the PCI segments in the system will be allocated.
1731 	 */
1732 	last_bdf = find_last_devid_acpi(ivrs_base, id);
1733 	if (last_bdf < 0)
1734 		return NULL;
1735 
1736 	pci_seg = kzalloc_obj(struct amd_iommu_pci_seg);
1737 	if (pci_seg == NULL)
1738 		return NULL;
1739 
1740 	pci_seg->last_bdf = last_bdf;
1741 	DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf);
1742 	pci_seg->dev_table_size =
1743 		max(roundup_pow_of_two((last_bdf + 1) * DEV_TABLE_ENTRY_SIZE),
1744 		    SZ_4K);
1745 
1746 	pci_seg->id = id;
1747 	init_llist_head(&pci_seg->dev_data_list);
1748 	INIT_LIST_HEAD(&pci_seg->unity_map);
1749 	list_add_tail(&pci_seg->list, &amd_iommu_pci_seg_list);
1750 
1751 	if (alloc_dev_table(pci_seg))
1752 		goto err_free_pci_seg;
1753 	if (alloc_alias_table(pci_seg))
1754 		goto err_free_dev_table;
1755 	if (alloc_rlookup_table(pci_seg))
1756 		goto err_free_alias_table;
1757 
1758 	return pci_seg;
1759 
1760 err_free_alias_table:
1761 	free_alias_table(pci_seg);
1762 err_free_dev_table:
1763 	free_dev_table(pci_seg);
1764 err_free_pci_seg:
1765 	list_del(&pci_seg->list);
1766 	kfree(pci_seg);
1767 	return NULL;
1768 }
1769 
get_pci_segment(u16 id,struct acpi_table_header * ivrs_base)1770 static struct amd_iommu_pci_seg *__init get_pci_segment(u16 id,
1771 					struct acpi_table_header *ivrs_base)
1772 {
1773 	struct amd_iommu_pci_seg *pci_seg;
1774 
1775 	for_each_pci_segment(pci_seg) {
1776 		if (pci_seg->id == id)
1777 			return pci_seg;
1778 	}
1779 
1780 	return alloc_pci_segment(id, ivrs_base);
1781 }
1782 
free_pci_segments(void)1783 static void __init free_pci_segments(void)
1784 {
1785 	struct amd_iommu_pci_seg *pci_seg, *next;
1786 
1787 	for_each_pci_segment_safe(pci_seg, next) {
1788 		list_del(&pci_seg->list);
1789 		free_irq_lookup_table(pci_seg);
1790 		free_rlookup_table(pci_seg);
1791 		free_alias_table(pci_seg);
1792 		free_dev_table(pci_seg);
1793 		kfree(pci_seg);
1794 	}
1795 }
1796 
free_sysfs(struct amd_iommu * iommu)1797 static void __init free_sysfs(struct amd_iommu *iommu)
1798 {
1799 	if (iommu->iommu.dev) {
1800 		iommu_device_unregister(&iommu->iommu);
1801 		iommu_device_sysfs_remove(&iommu->iommu);
1802 	}
1803 }
1804 
free_iommu_one(struct amd_iommu * iommu)1805 static void __init free_iommu_one(struct amd_iommu *iommu)
1806 {
1807 	free_sysfs(iommu);
1808 	free_iommu_buffers(iommu);
1809 	amd_iommu_free_ppr_log(iommu);
1810 	free_ga_log(iommu);
1811 	iommu_unmap_mmio_space(iommu);
1812 	amd_iommu_iopf_uninit(iommu);
1813 }
1814 
free_iommu_all(void)1815 static void __init free_iommu_all(void)
1816 {
1817 	struct amd_iommu *iommu, *next;
1818 
1819 	for_each_iommu_safe(iommu, next) {
1820 		list_del(&iommu->list);
1821 		free_iommu_one(iommu);
1822 		kfree(iommu);
1823 	}
1824 }
1825 
1826 /*
1827  * Family15h Model 10h-1fh erratum 746 (IOMMU Logging May Stall Translations)
1828  * Workaround:
1829  *     BIOS should disable L2B micellaneous clock gating by setting
1830  *     L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b
1831  */
amd_iommu_erratum_746_workaround(struct amd_iommu * iommu)1832 static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu)
1833 {
1834 	u32 value;
1835 
1836 	if ((boot_cpu_data.x86 != 0x15) ||
1837 	    (boot_cpu_data.x86_model < 0x10) ||
1838 	    (boot_cpu_data.x86_model > 0x1f))
1839 		return;
1840 
1841 	pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1842 	pci_read_config_dword(iommu->dev, 0xf4, &value);
1843 
1844 	if (value & BIT(2))
1845 		return;
1846 
1847 	/* Select NB indirect register 0x90 and enable writing */
1848 	pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8));
1849 
1850 	pci_write_config_dword(iommu->dev, 0xf4, value | 0x4);
1851 	pci_info(iommu->dev, "Applying erratum 746 workaround\n");
1852 
1853 	/* Clear the enable writing bit */
1854 	pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1855 }
1856 
1857 /*
1858  * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission)
1859  * Workaround:
1860  *     BIOS should enable ATS write permission check by setting
1861  *     L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b
1862  */
amd_iommu_ats_write_check_workaround(struct amd_iommu * iommu)1863 static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu)
1864 {
1865 	u32 value;
1866 
1867 	if ((boot_cpu_data.x86 != 0x15) ||
1868 	    (boot_cpu_data.x86_model < 0x30) ||
1869 	    (boot_cpu_data.x86_model > 0x3f))
1870 		return;
1871 
1872 	/* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */
1873 	value = iommu_read_l2(iommu, 0x47);
1874 
1875 	if (value & BIT(0))
1876 		return;
1877 
1878 	/* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */
1879 	iommu_write_l2(iommu, 0x47, value | BIT(0));
1880 
1881 	pci_info(iommu->dev, "Applying ATS write check workaround\n");
1882 }
1883 
1884 /*
1885  * This function glues the initialization function for one IOMMU
1886  * together and also allocates the command buffer and programs the
1887  * hardware. It does NOT enable the IOMMU. This is done afterwards.
1888  */
init_iommu_one(struct amd_iommu * iommu,struct ivhd_header * h,struct acpi_table_header * ivrs_base)1889 static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
1890 				 struct acpi_table_header *ivrs_base)
1891 {
1892 	struct amd_iommu_pci_seg *pci_seg;
1893 
1894 	pci_seg = get_pci_segment(h->pci_seg, ivrs_base);
1895 	if (pci_seg == NULL)
1896 		return -ENOMEM;
1897 	iommu->pci_seg = pci_seg;
1898 
1899 	raw_spin_lock_init(&iommu->lock);
1900 	iommu->cmd_sem_val = 0;
1901 
1902 	/* Add IOMMU to internal data structures */
1903 	list_add_tail(&iommu->list, &amd_iommu_list);
1904 	iommu->index = amd_iommus_present++;
1905 
1906 	if (unlikely(iommu->index >= MAX_IOMMUS)) {
1907 		WARN(1, "System has more IOMMUs than supported by this driver\n");
1908 		return -ENOSYS;
1909 	}
1910 
1911 	/*
1912 	 * Copy data from ACPI table entry to the iommu struct
1913 	 */
1914 	iommu->devid   = h->devid;
1915 	iommu->cap_ptr = h->cap_ptr;
1916 	iommu->mmio_phys = h->mmio_phys;
1917 
1918 	switch (h->type) {
1919 	case 0x10:
1920 		/* Check if IVHD EFR contains proper max banks/counters */
1921 		if ((h->efr_attr != 0) &&
1922 		    ((h->efr_attr & (0xF << 13)) != 0) &&
1923 		    ((h->efr_attr & (0x3F << 17)) != 0))
1924 			iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1925 		else
1926 			iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1927 
1928 		/* GAM requires GA mode. */
1929 		if ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0)
1930 			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
1931 		break;
1932 	case 0x11:
1933 	case 0x40:
1934 		if (h->efr_reg & (1 << 9))
1935 			iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1936 		else
1937 			iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1938 
1939 		/* XT and GAM require GA mode. */
1940 		if ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0) {
1941 			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
1942 			break;
1943 		}
1944 
1945 		if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT))
1946 			amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE;
1947 
1948 		if (h->efr_attr & BIT(IOMMU_IVHD_ATTR_HATDIS_SHIFT)) {
1949 			pr_warn_once("Host Address Translation is not supported.\n");
1950 			amd_iommu_hatdis = true;
1951 		}
1952 
1953 		early_iommu_features_init(iommu, h);
1954 
1955 		break;
1956 	default:
1957 		return -EINVAL;
1958 	}
1959 
1960 	iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys,
1961 						iommu->mmio_phys_end);
1962 	if (!iommu->mmio_base)
1963 		return -ENOMEM;
1964 
1965 	return init_iommu_from_acpi(iommu, h);
1966 }
1967 
init_iommu_one_late(struct amd_iommu * iommu)1968 static int __init init_iommu_one_late(struct amd_iommu *iommu)
1969 {
1970 	int ret;
1971 
1972 	ret = alloc_iommu_buffers(iommu);
1973 	if (ret)
1974 		return ret;
1975 
1976 	iommu->int_enabled = false;
1977 
1978 	init_translation_status(iommu);
1979 	if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
1980 		iommu_disable(iommu);
1981 		clear_translation_pre_enabled(iommu);
1982 		pr_warn("Translation was enabled for IOMMU:%d but we are not in kdump mode\n",
1983 			iommu->index);
1984 	}
1985 	if (amd_iommu_pre_enabled)
1986 		amd_iommu_pre_enabled = translation_pre_enabled(iommu);
1987 
1988 	if (amd_iommu_irq_remap) {
1989 		ret = amd_iommu_create_irq_domain(iommu);
1990 		if (ret)
1991 			return ret;
1992 	}
1993 
1994 	/*
1995 	 * Make sure IOMMU is not considered to translate itself. The IVRS
1996 	 * table tells us so, but this is a lie!
1997 	 */
1998 	iommu->pci_seg->rlookup_table[iommu->devid] = NULL;
1999 
2000 	return 0;
2001 }
2002 
2003 /**
2004  * get_highest_supported_ivhd_type - Look up the appropriate IVHD type
2005  * @ivrs: Pointer to the IVRS header
2006  *
2007  * This function search through all IVDB of the maximum supported IVHD
2008  */
get_highest_supported_ivhd_type(struct acpi_table_header * ivrs)2009 static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs)
2010 {
2011 	u8 *base = (u8 *)ivrs;
2012 	struct ivhd_header *ivhd = (struct ivhd_header *)
2013 					(base + IVRS_HEADER_LENGTH);
2014 	u8 last_type = ivhd->type;
2015 	u16 devid = ivhd->devid;
2016 
2017 	while (((u8 *)ivhd - base < ivrs->length) &&
2018 	       (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) {
2019 		u8 *p = (u8 *) ivhd;
2020 
2021 		if (ivhd->devid == devid)
2022 			last_type = ivhd->type;
2023 		ivhd = (struct ivhd_header *)(p + ivhd->length);
2024 	}
2025 
2026 	return last_type;
2027 }
2028 
2029 /*
2030  * Iterates over all IOMMU entries in the ACPI table, allocates the
2031  * IOMMU structure and initializes it with init_iommu_one()
2032  */
init_iommu_all(struct acpi_table_header * table)2033 static int __init init_iommu_all(struct acpi_table_header *table)
2034 {
2035 	u8 *p = (u8 *)table, *end = (u8 *)table;
2036 	struct ivhd_header *h;
2037 	struct amd_iommu *iommu;
2038 	int ret;
2039 
2040 	end += table->length;
2041 	p += IVRS_HEADER_LENGTH;
2042 
2043 	/* Phase 1: Process all IVHD blocks */
2044 	while (p < end) {
2045 		h = (struct ivhd_header *)p;
2046 		if (*p == amd_iommu_target_ivhd_type) {
2047 
2048 			DUMP_printk("device: %04x:%02x:%02x.%01x cap: %04x "
2049 				    "flags: %01x info %04x\n",
2050 				    h->pci_seg, PCI_BUS_NUM(h->devid),
2051 				    PCI_SLOT(h->devid), PCI_FUNC(h->devid),
2052 				    h->cap_ptr, h->flags, h->info);
2053 			DUMP_printk("       mmio-addr: %016llx\n",
2054 				    h->mmio_phys);
2055 
2056 			iommu = kzalloc_obj(struct amd_iommu);
2057 			if (iommu == NULL)
2058 				return -ENOMEM;
2059 
2060 			ret = init_iommu_one(iommu, h, table);
2061 			if (ret)
2062 				return ret;
2063 		}
2064 		p += h->length;
2065 
2066 	}
2067 	WARN_ON(p != end);
2068 
2069 	/* Phase 2 : Early feature support check */
2070 	get_global_efr();
2071 
2072 	/* Phase 3 : Enabling IOMMU features */
2073 	for_each_iommu(iommu) {
2074 		ret = init_iommu_one_late(iommu);
2075 		if (ret)
2076 			return ret;
2077 	}
2078 
2079 	return 0;
2080 }
2081 
init_iommu_perf_ctr(struct amd_iommu * iommu)2082 static void init_iommu_perf_ctr(struct amd_iommu *iommu)
2083 {
2084 	u64 val;
2085 	struct pci_dev *pdev = iommu->dev;
2086 
2087 	if (!check_feature(FEATURE_PC))
2088 		return;
2089 
2090 	amd_iommu_pc_present = true;
2091 
2092 	pci_info(pdev, "IOMMU performance counters supported\n");
2093 
2094 	val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET);
2095 	iommu->max_banks = (u8) ((val >> 12) & 0x3f);
2096 	iommu->max_counters = (u8) ((val >> 7) & 0xf);
2097 
2098 	return;
2099 }
2100 
amd_iommu_show_cap(struct device * dev,struct device_attribute * attr,char * buf)2101 static ssize_t amd_iommu_show_cap(struct device *dev,
2102 				  struct device_attribute *attr,
2103 				  char *buf)
2104 {
2105 	struct amd_iommu *iommu = dev_to_amd_iommu(dev);
2106 	return sysfs_emit(buf, "%x\n", iommu->cap);
2107 }
2108 static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL);
2109 
amd_iommu_show_features(struct device * dev,struct device_attribute * attr,char * buf)2110 static ssize_t amd_iommu_show_features(struct device *dev,
2111 				       struct device_attribute *attr,
2112 				       char *buf)
2113 {
2114 	return sysfs_emit(buf, "%llx:%llx\n", amd_iommu_efr, amd_iommu_efr2);
2115 }
2116 static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL);
2117 
2118 static struct attribute *amd_iommu_attrs[] = {
2119 	&dev_attr_cap.attr,
2120 	&dev_attr_features.attr,
2121 	NULL,
2122 };
2123 
2124 static struct attribute_group amd_iommu_group = {
2125 	.name = "amd-iommu",
2126 	.attrs = amd_iommu_attrs,
2127 };
2128 
2129 static const struct attribute_group *amd_iommu_groups[] = {
2130 	&amd_iommu_group,
2131 	NULL,
2132 };
2133 
2134 /*
2135  * Note: IVHD 0x11 and 0x40 also contains exact copy
2136  * of the IOMMU Extended Feature Register [MMIO Offset 0030h].
2137  * Default to EFR in IVHD since it is available sooner (i.e. before PCI init).
2138  */
late_iommu_features_init(struct amd_iommu * iommu)2139 static void __init late_iommu_features_init(struct amd_iommu *iommu)
2140 {
2141 	u64 features, features2;
2142 
2143 	if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
2144 		return;
2145 
2146 	/* read extended feature bits */
2147 	features = readq(iommu->mmio_base + MMIO_EXT_FEATURES);
2148 	features2 = readq(iommu->mmio_base + MMIO_EXT_FEATURES2);
2149 
2150 	if (!amd_iommu_efr) {
2151 		amd_iommu_efr = features;
2152 		amd_iommu_efr2 = features2;
2153 		return;
2154 	}
2155 
2156 	/*
2157 	 * Sanity check and warn if EFR values from
2158 	 * IVHD and MMIO conflict.
2159 	 */
2160 	if (features != amd_iommu_efr ||
2161 	    features2 != amd_iommu_efr2) {
2162 		pr_warn(FW_WARN
2163 			"EFR mismatch. Use IVHD EFR (%#llx : %#llx), EFR2 (%#llx : %#llx).\n",
2164 			features, amd_iommu_efr,
2165 			features2, amd_iommu_efr2);
2166 	}
2167 }
2168 
iommu_init_pci(struct amd_iommu * iommu)2169 static int __init iommu_init_pci(struct amd_iommu *iommu)
2170 {
2171 	int cap_ptr = iommu->cap_ptr;
2172 	int ret;
2173 
2174 	iommu->dev = pci_get_domain_bus_and_slot(iommu->pci_seg->id,
2175 						 PCI_BUS_NUM(iommu->devid),
2176 						 iommu->devid & 0xff);
2177 	if (!iommu->dev)
2178 		return -ENODEV;
2179 
2180 	/* ACPI _PRT won't have an IRQ for IOMMU */
2181 	iommu->dev->irq_managed = 1;
2182 
2183 	pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
2184 			      &iommu->cap);
2185 
2186 	if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
2187 		amd_iommu_iotlb_sup = false;
2188 
2189 	late_iommu_features_init(iommu);
2190 
2191 	if (check_feature(FEATURE_GT)) {
2192 		int glxval;
2193 		u64 pasmax;
2194 
2195 		pasmax = FIELD_GET(FEATURE_PASMAX, amd_iommu_efr);
2196 		iommu->iommu.max_pasids = (1 << (pasmax + 1)) - 1;
2197 
2198 		BUG_ON(iommu->iommu.max_pasids & ~PASID_MASK);
2199 
2200 		glxval = FIELD_GET(FEATURE_GLX, amd_iommu_efr);
2201 
2202 		if (amd_iommu_max_glx_val == -1)
2203 			amd_iommu_max_glx_val = glxval;
2204 		else
2205 			amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval);
2206 
2207 		iommu_enable_gt(iommu);
2208 	}
2209 
2210 	if (check_feature(FEATURE_PPR) && amd_iommu_alloc_ppr_log(iommu))
2211 		return -ENOMEM;
2212 
2213 	if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) {
2214 		pr_info("Using strict mode due to virtualization\n");
2215 		iommu_set_dma_strict();
2216 		amd_iommu_np_cache = true;
2217 	}
2218 
2219 	init_iommu_perf_ctr(iommu);
2220 
2221 	if (is_rd890_iommu(iommu->dev)) {
2222 		int i, j;
2223 
2224 		iommu->root_pdev =
2225 			pci_get_domain_bus_and_slot(iommu->pci_seg->id,
2226 						    iommu->dev->bus->number,
2227 						    PCI_DEVFN(0, 0));
2228 
2229 		/*
2230 		 * Some rd890 systems may not be fully reconfigured by the
2231 		 * BIOS, so it's necessary for us to store this information so
2232 		 * it can be reprogrammed on resume
2233 		 */
2234 		pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4,
2235 				&iommu->stored_addr_lo);
2236 		pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8,
2237 				&iommu->stored_addr_hi);
2238 
2239 		/* Low bit locks writes to configuration space */
2240 		iommu->stored_addr_lo &= ~1;
2241 
2242 		for (i = 0; i < 6; i++)
2243 			for (j = 0; j < 0x12; j++)
2244 				iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j);
2245 
2246 		for (i = 0; i < 0x83; i++)
2247 			iommu->stored_l2[i] = iommu_read_l2(iommu, i);
2248 	}
2249 
2250 	amd_iommu_erratum_746_workaround(iommu);
2251 	amd_iommu_ats_write_check_workaround(iommu);
2252 
2253 	ret = iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev,
2254 			       amd_iommu_groups, "ivhd%d", iommu->index);
2255 	if (ret)
2256 		return ret;
2257 
2258 	/*
2259 	 * Allocate per IOMMU IOPF queue here so that in attach device path,
2260 	 * PRI capable device can be added to IOPF queue
2261 	 */
2262 	if (amd_iommu_gt_ppr_supported()) {
2263 		ret = amd_iommu_iopf_init(iommu);
2264 		if (ret)
2265 			return ret;
2266 	}
2267 
2268 	ret = iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL);
2269 	if (ret || amd_iommu_pgtable == PD_MODE_NONE) {
2270 		/*
2271 		 * Remove sysfs if DMA translation is not supported by the
2272 		 * IOMMU. Do not return an error to enable IRQ remapping
2273 		 * in state_next(), DTE[V, TV] must eventually be set to 0.
2274 		 */
2275 		iommu_device_sysfs_remove(&iommu->iommu);
2276 	}
2277 
2278 	return pci_enable_device(iommu->dev);
2279 }
2280 
print_iommu_info(void)2281 static void print_iommu_info(void)
2282 {
2283 	int i;
2284 	static const char * const feat_str[] = {
2285 		"PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
2286 		"IA", "GA", "HE", "PC"
2287 	};
2288 
2289 	if (amd_iommu_efr) {
2290 		pr_info("Extended features (%#llx, %#llx):", amd_iommu_efr, amd_iommu_efr2);
2291 
2292 		for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
2293 			if (check_feature(1ULL << i))
2294 				pr_cont(" %s", feat_str[i]);
2295 		}
2296 
2297 		if (check_feature(FEATURE_GAM_VAPIC))
2298 			pr_cont(" GA_vAPIC");
2299 
2300 		if (check_feature(FEATURE_SNP))
2301 			pr_cont(" SNP");
2302 
2303 		if (check_feature2(FEATURE_SEVSNPIO_SUP))
2304 			pr_cont(" SEV-TIO");
2305 
2306 		pr_cont("\n");
2307 	}
2308 
2309 	if (irq_remapping_enabled) {
2310 		pr_info("Interrupt remapping enabled\n");
2311 		if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2312 			pr_info("X2APIC enabled\n");
2313 	}
2314 	if (amd_iommu_pgtable == PD_MODE_V2) {
2315 		pr_info("V2 page table enabled (Paging mode : %d level)\n",
2316 			amd_iommu_gpt_level);
2317 	}
2318 }
2319 
amd_iommu_init_pci(void)2320 static int __init amd_iommu_init_pci(void)
2321 {
2322 	struct amd_iommu *iommu;
2323 	struct amd_iommu_pci_seg *pci_seg;
2324 	int ret;
2325 
2326 	/* Init global identity domain before registering IOMMU */
2327 	amd_iommu_init_identity_domain();
2328 
2329 	for_each_iommu(iommu) {
2330 		ret = iommu_init_pci(iommu);
2331 		if (ret) {
2332 			pr_err("IOMMU%d: Failed to initialize IOMMU Hardware (error=%d)!\n",
2333 			       iommu->index, ret);
2334 			goto out;
2335 		}
2336 		/* Need to setup range after PCI init */
2337 		iommu_set_cwwb_range(iommu);
2338 	}
2339 
2340 	/*
2341 	 * Order is important here to make sure any unity map requirements are
2342 	 * fulfilled. The unity mappings are created and written to the device
2343 	 * table during the iommu_init_pci() call.
2344 	 *
2345 	 * After that we call init_device_table_dma() to make sure any
2346 	 * uninitialized DTE will block DMA, and in the end we flush the caches
2347 	 * of all IOMMUs to make sure the changes to the device table are
2348 	 * active.
2349 	 */
2350 	for_each_pci_segment(pci_seg)
2351 		init_device_table_dma(pci_seg);
2352 
2353 	for_each_iommu(iommu)
2354 		amd_iommu_flush_all_caches(iommu);
2355 
2356 	print_iommu_info();
2357 
2358 out:
2359 	return ret;
2360 }
2361 
2362 /****************************************************************************
2363  *
2364  * The following functions initialize the MSI interrupts for all IOMMUs
2365  * in the system. It's a bit challenging because there could be multiple
2366  * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per
2367  * pci_dev.
2368  *
2369  ****************************************************************************/
2370 
iommu_setup_msi(struct amd_iommu * iommu)2371 static int iommu_setup_msi(struct amd_iommu *iommu)
2372 {
2373 	int r;
2374 
2375 	r = pci_enable_msi(iommu->dev);
2376 	if (r)
2377 		return r;
2378 
2379 	r = request_threaded_irq(iommu->dev->irq, NULL, amd_iommu_int_thread,
2380 				 IRQF_ONESHOT, "AMD-Vi", iommu);
2381 	if (r) {
2382 		pci_disable_msi(iommu->dev);
2383 		return r;
2384 	}
2385 
2386 	return 0;
2387 }
2388 
2389 union intcapxt {
2390 	u64	capxt;
2391 	struct {
2392 		u64	reserved_0		:  2,
2393 			dest_mode_logical	:  1,
2394 			reserved_1		:  5,
2395 			destid_0_23		: 24,
2396 			vector			:  8,
2397 			reserved_2		: 16,
2398 			destid_24_31		:  8;
2399 	};
2400 } __attribute__ ((packed));
2401 
2402 
2403 static struct irq_chip intcapxt_controller;
2404 
intcapxt_irqdomain_activate(struct irq_domain * domain,struct irq_data * irqd,bool reserve)2405 static int intcapxt_irqdomain_activate(struct irq_domain *domain,
2406 				       struct irq_data *irqd, bool reserve)
2407 {
2408 	return 0;
2409 }
2410 
intcapxt_irqdomain_deactivate(struct irq_domain * domain,struct irq_data * irqd)2411 static void intcapxt_irqdomain_deactivate(struct irq_domain *domain,
2412 					  struct irq_data *irqd)
2413 {
2414 }
2415 
2416 
intcapxt_irqdomain_alloc(struct irq_domain * domain,unsigned int virq,unsigned int nr_irqs,void * arg)2417 static int intcapxt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
2418 				    unsigned int nr_irqs, void *arg)
2419 {
2420 	struct irq_alloc_info *info = arg;
2421 	int i, ret;
2422 
2423 	if (!info || info->type != X86_IRQ_ALLOC_TYPE_AMDVI)
2424 		return -EINVAL;
2425 
2426 	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
2427 	if (ret < 0)
2428 		return ret;
2429 
2430 	for (i = virq; i < virq + nr_irqs; i++) {
2431 		struct irq_data *irqd = irq_domain_get_irq_data(domain, i);
2432 
2433 		irqd->chip = &intcapxt_controller;
2434 		irqd->hwirq = info->hwirq;
2435 		irqd->chip_data = info->data;
2436 		__irq_set_handler(i, handle_edge_irq, 0, "edge");
2437 	}
2438 
2439 	return ret;
2440 }
2441 
intcapxt_irqdomain_free(struct irq_domain * domain,unsigned int virq,unsigned int nr_irqs)2442 static void intcapxt_irqdomain_free(struct irq_domain *domain, unsigned int virq,
2443 				    unsigned int nr_irqs)
2444 {
2445 	irq_domain_free_irqs_top(domain, virq, nr_irqs);
2446 }
2447 
2448 
intcapxt_unmask_irq(struct irq_data * irqd)2449 static void intcapxt_unmask_irq(struct irq_data *irqd)
2450 {
2451 	struct amd_iommu *iommu = irqd->chip_data;
2452 	struct irq_cfg *cfg = irqd_cfg(irqd);
2453 	union intcapxt xt;
2454 
2455 	xt.capxt = 0ULL;
2456 	xt.dest_mode_logical = apic->dest_mode_logical;
2457 	xt.vector = cfg->vector;
2458 	xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0);
2459 	xt.destid_24_31 = cfg->dest_apicid >> 24;
2460 
2461 	writeq(xt.capxt, iommu->mmio_base + irqd->hwirq);
2462 }
2463 
intcapxt_mask_irq(struct irq_data * irqd)2464 static void intcapxt_mask_irq(struct irq_data *irqd)
2465 {
2466 	struct amd_iommu *iommu = irqd->chip_data;
2467 
2468 	writeq(0, iommu->mmio_base + irqd->hwirq);
2469 }
2470 
2471 
intcapxt_set_affinity(struct irq_data * irqd,const struct cpumask * mask,bool force)2472 static int intcapxt_set_affinity(struct irq_data *irqd,
2473 				 const struct cpumask *mask, bool force)
2474 {
2475 	struct irq_data *parent = irqd->parent_data;
2476 	int ret;
2477 
2478 	ret = parent->chip->irq_set_affinity(parent, mask, force);
2479 	if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
2480 		return ret;
2481 	return 0;
2482 }
2483 
intcapxt_set_wake(struct irq_data * irqd,unsigned int on)2484 static int intcapxt_set_wake(struct irq_data *irqd, unsigned int on)
2485 {
2486 	return on ? -EOPNOTSUPP : 0;
2487 }
2488 
2489 static struct irq_chip intcapxt_controller = {
2490 	.name			= "IOMMU-MSI",
2491 	.irq_unmask		= intcapxt_unmask_irq,
2492 	.irq_mask		= intcapxt_mask_irq,
2493 	.irq_ack		= irq_chip_ack_parent,
2494 	.irq_retrigger		= irq_chip_retrigger_hierarchy,
2495 	.irq_set_affinity       = intcapxt_set_affinity,
2496 	.irq_set_wake		= intcapxt_set_wake,
2497 	.flags			= IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_MOVE_DEFERRED,
2498 };
2499 
2500 static const struct irq_domain_ops intcapxt_domain_ops = {
2501 	.alloc			= intcapxt_irqdomain_alloc,
2502 	.free			= intcapxt_irqdomain_free,
2503 	.activate		= intcapxt_irqdomain_activate,
2504 	.deactivate		= intcapxt_irqdomain_deactivate,
2505 };
2506 
2507 
2508 static struct irq_domain *iommu_irqdomain;
2509 
iommu_get_irqdomain(void)2510 static struct irq_domain *iommu_get_irqdomain(void)
2511 {
2512 	struct fwnode_handle *fn;
2513 
2514 	/* No need for locking here (yet) as the init is single-threaded */
2515 	if (iommu_irqdomain)
2516 		return iommu_irqdomain;
2517 
2518 	fn = irq_domain_alloc_named_fwnode("AMD-Vi-MSI");
2519 	if (!fn)
2520 		return NULL;
2521 
2522 	iommu_irqdomain = irq_domain_create_hierarchy(x86_vector_domain, 0, 0,
2523 						      fn, &intcapxt_domain_ops,
2524 						      NULL);
2525 	if (!iommu_irqdomain)
2526 		irq_domain_free_fwnode(fn);
2527 
2528 	return iommu_irqdomain;
2529 }
2530 
__iommu_setup_intcapxt(struct amd_iommu * iommu,const char * devname,int hwirq,irq_handler_t thread_fn)2531 static int __iommu_setup_intcapxt(struct amd_iommu *iommu, const char *devname,
2532 				  int hwirq, irq_handler_t thread_fn)
2533 {
2534 	struct irq_domain *domain;
2535 	struct irq_alloc_info info;
2536 	int irq, ret;
2537 	int node = dev_to_node(&iommu->dev->dev);
2538 
2539 	domain = iommu_get_irqdomain();
2540 	if (!domain)
2541 		return -ENXIO;
2542 
2543 	init_irq_alloc_info(&info, NULL);
2544 	info.type = X86_IRQ_ALLOC_TYPE_AMDVI;
2545 	info.data = iommu;
2546 	info.hwirq = hwirq;
2547 
2548 	irq = irq_domain_alloc_irqs(domain, 1, node, &info);
2549 	if (irq < 0) {
2550 		irq_domain_remove(domain);
2551 		return irq;
2552 	}
2553 
2554 	ret = request_threaded_irq(irq, NULL, thread_fn, IRQF_ONESHOT, devname,
2555 				   iommu);
2556 	if (ret) {
2557 		irq_domain_free_irqs(irq, 1);
2558 		irq_domain_remove(domain);
2559 		return ret;
2560 	}
2561 
2562 	return 0;
2563 }
2564 
iommu_setup_intcapxt(struct amd_iommu * iommu)2565 static int iommu_setup_intcapxt(struct amd_iommu *iommu)
2566 {
2567 	int ret;
2568 
2569 	snprintf(iommu->evt_irq_name, sizeof(iommu->evt_irq_name),
2570 		 "AMD-Vi%d-Evt", iommu->index);
2571 	ret = __iommu_setup_intcapxt(iommu, iommu->evt_irq_name,
2572 				     MMIO_INTCAPXT_EVT_OFFSET,
2573 				     amd_iommu_int_thread_evtlog);
2574 	if (ret)
2575 		return ret;
2576 
2577 	snprintf(iommu->ppr_irq_name, sizeof(iommu->ppr_irq_name),
2578 		 "AMD-Vi%d-PPR", iommu->index);
2579 	ret = __iommu_setup_intcapxt(iommu, iommu->ppr_irq_name,
2580 				     MMIO_INTCAPXT_PPR_OFFSET,
2581 				     amd_iommu_int_thread_pprlog);
2582 	if (ret)
2583 		return ret;
2584 
2585 #ifdef CONFIG_IRQ_REMAP
2586 	snprintf(iommu->ga_irq_name, sizeof(iommu->ga_irq_name),
2587 		 "AMD-Vi%d-GA", iommu->index);
2588 	ret = __iommu_setup_intcapxt(iommu, iommu->ga_irq_name,
2589 				     MMIO_INTCAPXT_GALOG_OFFSET,
2590 				     amd_iommu_int_thread_galog);
2591 #endif
2592 
2593 	return ret;
2594 }
2595 
iommu_init_irq(struct amd_iommu * iommu)2596 static int iommu_init_irq(struct amd_iommu *iommu)
2597 {
2598 	int ret;
2599 
2600 	if (iommu->int_enabled)
2601 		goto enable_faults;
2602 
2603 	if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2604 		ret = iommu_setup_intcapxt(iommu);
2605 	else if (iommu->dev->msi_cap)
2606 		ret = iommu_setup_msi(iommu);
2607 	else
2608 		ret = -ENODEV;
2609 
2610 	if (ret)
2611 		return ret;
2612 
2613 	iommu->int_enabled = true;
2614 enable_faults:
2615 
2616 	if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2617 		iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN);
2618 
2619 	iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
2620 
2621 	return 0;
2622 }
2623 
2624 /****************************************************************************
2625  *
2626  * The next functions belong to the third pass of parsing the ACPI
2627  * table. In this last pass the memory mapping requirements are
2628  * gathered (like exclusion and unity mapping ranges).
2629  *
2630  ****************************************************************************/
2631 
free_unity_maps(void)2632 static void __init free_unity_maps(void)
2633 {
2634 	struct unity_map_entry *entry, *next;
2635 	struct amd_iommu_pci_seg *p, *pci_seg;
2636 
2637 	for_each_pci_segment_safe(pci_seg, p) {
2638 		list_for_each_entry_safe(entry, next, &pci_seg->unity_map, list) {
2639 			list_del(&entry->list);
2640 			kfree(entry);
2641 		}
2642 	}
2643 }
2644 
2645 /* called for unity map ACPI definition */
init_unity_map_range(struct ivmd_header * m,struct acpi_table_header * ivrs_base)2646 static int __init init_unity_map_range(struct ivmd_header *m,
2647 				       struct acpi_table_header *ivrs_base)
2648 {
2649 	struct unity_map_entry *e = NULL;
2650 	struct amd_iommu_pci_seg *pci_seg;
2651 	char *s;
2652 
2653 	pci_seg = get_pci_segment(m->pci_seg, ivrs_base);
2654 	if (pci_seg == NULL)
2655 		return -ENOMEM;
2656 
2657 	e = kzalloc_obj(*e);
2658 	if (e == NULL)
2659 		return -ENOMEM;
2660 
2661 	switch (m->type) {
2662 	default:
2663 		kfree(e);
2664 		return 0;
2665 	case ACPI_IVMD_TYPE:
2666 		s = "IVMD_TYPEi\t\t\t";
2667 		e->devid_start = e->devid_end = m->devid;
2668 		break;
2669 	case ACPI_IVMD_TYPE_ALL:
2670 		s = "IVMD_TYPE_ALL\t\t";
2671 		e->devid_start = 0;
2672 		e->devid_end = pci_seg->last_bdf;
2673 		break;
2674 	case ACPI_IVMD_TYPE_RANGE:
2675 		s = "IVMD_TYPE_RANGE\t\t";
2676 		e->devid_start = m->devid;
2677 		e->devid_end = m->aux;
2678 		break;
2679 	}
2680 	e->address_start = PAGE_ALIGN(m->range_start);
2681 	e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
2682 	e->prot = m->flags >> 1;
2683 
2684 	/*
2685 	 * Treat per-device exclusion ranges as r/w unity-mapped regions
2686 	 * since some buggy BIOSes might lead to the overwritten exclusion
2687 	 * range (exclusion_start and exclusion_length members). This
2688 	 * happens when there are multiple exclusion ranges (IVMD entries)
2689 	 * defined in ACPI table.
2690 	 */
2691 	if (m->flags & IVMD_FLAG_EXCL_RANGE)
2692 		e->prot = (IVMD_FLAG_IW | IVMD_FLAG_IR) >> 1;
2693 
2694 	DUMP_printk("%s devid_start: %04x:%02x:%02x.%x devid_end: "
2695 		    "%04x:%02x:%02x.%x range_start: %016llx range_end: %016llx"
2696 		    " flags: %x\n", s, m->pci_seg,
2697 		    PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start),
2698 		    PCI_FUNC(e->devid_start), m->pci_seg,
2699 		    PCI_BUS_NUM(e->devid_end),
2700 		    PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
2701 		    e->address_start, e->address_end, m->flags);
2702 
2703 	list_add_tail(&e->list, &pci_seg->unity_map);
2704 
2705 	return 0;
2706 }
2707 
2708 /* iterates over all memory definitions we find in the ACPI table */
init_memory_definitions(struct acpi_table_header * table)2709 static int __init init_memory_definitions(struct acpi_table_header *table)
2710 {
2711 	u8 *p = (u8 *)table, *end = (u8 *)table;
2712 	struct ivmd_header *m;
2713 
2714 	end += table->length;
2715 	p += IVRS_HEADER_LENGTH;
2716 
2717 	while (p < end) {
2718 		m = (struct ivmd_header *)p;
2719 		if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE))
2720 			init_unity_map_range(m, table);
2721 
2722 		p += m->length;
2723 	}
2724 
2725 	return 0;
2726 }
2727 
2728 /*
2729  * Init the device table to not allow DMA access for devices
2730  */
init_device_table_dma(struct amd_iommu_pci_seg * pci_seg)2731 static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
2732 {
2733 	u32 devid;
2734 	struct dev_table_entry *dev_table = pci_seg->dev_table;
2735 
2736 	if (!dev_table || amd_iommu_pgtable == PD_MODE_NONE)
2737 		return;
2738 
2739 	for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
2740 		set_dte_bit(&dev_table[devid], DEV_ENTRY_VALID);
2741 		if (!amd_iommu_snp_en)
2742 			set_dte_bit(&dev_table[devid], DEV_ENTRY_TRANSLATION);
2743 	}
2744 }
2745 
uninit_device_table_dma(struct amd_iommu_pci_seg * pci_seg)2746 static void __init uninit_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
2747 {
2748 	u32 devid;
2749 	struct dev_table_entry *dev_table = pci_seg->dev_table;
2750 
2751 	if (dev_table == NULL)
2752 		return;
2753 
2754 	for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
2755 		dev_table[devid].data[0] = 0ULL;
2756 		dev_table[devid].data[1] = 0ULL;
2757 	}
2758 }
2759 
init_device_table(void)2760 static void init_device_table(void)
2761 {
2762 	struct amd_iommu_pci_seg *pci_seg;
2763 	u32 devid;
2764 
2765 	if (!amd_iommu_irq_remap)
2766 		return;
2767 
2768 	for_each_pci_segment(pci_seg) {
2769 		for (devid = 0; devid <= pci_seg->last_bdf; ++devid)
2770 			set_dte_bit(&pci_seg->dev_table[devid], DEV_ENTRY_IRQ_TBL_EN);
2771 	}
2772 }
2773 
iommu_init_flags(struct amd_iommu * iommu)2774 static void iommu_init_flags(struct amd_iommu *iommu)
2775 {
2776 	iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ?
2777 		iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
2778 		iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
2779 
2780 	iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ?
2781 		iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
2782 		iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
2783 
2784 	iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
2785 		iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
2786 		iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
2787 
2788 	iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ?
2789 		iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
2790 		iommu_feature_disable(iommu, CONTROL_ISOC_EN);
2791 
2792 	/*
2793 	 * make IOMMU memory accesses cache coherent
2794 	 */
2795 	iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
2796 
2797 	/* Set IOTLB invalidation timeout to 1s */
2798 	iommu_feature_set(iommu, CTRL_INV_TO_1S, CTRL_INV_TO_MASK, CONTROL_INV_TIMEOUT);
2799 
2800 	/* Enable Enhanced Peripheral Page Request Handling */
2801 	if (check_feature(FEATURE_EPHSUP))
2802 		iommu_feature_enable(iommu, CONTROL_EPH_EN);
2803 }
2804 
iommu_apply_resume_quirks(struct amd_iommu * iommu)2805 static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
2806 {
2807 	int i, j;
2808 	u32 ioc_feature_control;
2809 	struct pci_dev *pdev = iommu->root_pdev;
2810 
2811 	/* RD890 BIOSes may not have completely reconfigured the iommu */
2812 	if (!is_rd890_iommu(iommu->dev) || !pdev)
2813 		return;
2814 
2815 	/*
2816 	 * First, we need to ensure that the iommu is enabled. This is
2817 	 * controlled by a register in the northbridge
2818 	 */
2819 
2820 	/* Select Northbridge indirect register 0x75 and enable writing */
2821 	pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7));
2822 	pci_read_config_dword(pdev, 0x64, &ioc_feature_control);
2823 
2824 	/* Enable the iommu */
2825 	if (!(ioc_feature_control & 0x1))
2826 		pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1);
2827 
2828 	/* Restore the iommu BAR */
2829 	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
2830 			       iommu->stored_addr_lo);
2831 	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8,
2832 			       iommu->stored_addr_hi);
2833 
2834 	/* Restore the l1 indirect regs for each of the 6 l1s */
2835 	for (i = 0; i < 6; i++)
2836 		for (j = 0; j < 0x12; j++)
2837 			iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]);
2838 
2839 	/* Restore the l2 indirect regs */
2840 	for (i = 0; i < 0x83; i++)
2841 		iommu_write_l2(iommu, i, iommu->stored_l2[i]);
2842 
2843 	/* Lock PCI setup registers */
2844 	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
2845 			       iommu->stored_addr_lo | 1);
2846 }
2847 
iommu_enable_ga(struct amd_iommu * iommu)2848 static void iommu_enable_ga(struct amd_iommu *iommu)
2849 {
2850 #ifdef CONFIG_IRQ_REMAP
2851 	switch (amd_iommu_guest_ir) {
2852 	case AMD_IOMMU_GUEST_IR_VAPIC:
2853 	case AMD_IOMMU_GUEST_IR_LEGACY_GA:
2854 		iommu_feature_enable(iommu, CONTROL_GA_EN);
2855 		iommu->irte_ops = &irte_128_ops;
2856 		break;
2857 	default:
2858 		iommu->irte_ops = &irte_32_ops;
2859 		break;
2860 	}
2861 #endif
2862 }
2863 
iommu_disable_irtcachedis(struct amd_iommu * iommu)2864 static void iommu_disable_irtcachedis(struct amd_iommu *iommu)
2865 {
2866 	iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS);
2867 }
2868 
iommu_enable_irtcachedis(struct amd_iommu * iommu)2869 static void iommu_enable_irtcachedis(struct amd_iommu *iommu)
2870 {
2871 	u64 ctrl;
2872 
2873 	if (!amd_iommu_irtcachedis)
2874 		return;
2875 
2876 	/*
2877 	 * Note:
2878 	 * The support for IRTCacheDis feature is dertermined by
2879 	 * checking if the bit is writable.
2880 	 */
2881 	iommu_feature_enable(iommu, CONTROL_IRTCACHEDIS);
2882 	ctrl = readq(iommu->mmio_base +  MMIO_CONTROL_OFFSET);
2883 	ctrl &= (1ULL << CONTROL_IRTCACHEDIS);
2884 	if (ctrl)
2885 		iommu->irtcachedis_enabled = true;
2886 	pr_info("iommu%d (%#06x) : IRT cache is %s\n",
2887 		iommu->index, iommu->devid,
2888 		iommu->irtcachedis_enabled ? "disabled" : "enabled");
2889 }
2890 
iommu_enable_2k_int(struct amd_iommu * iommu)2891 static void iommu_enable_2k_int(struct amd_iommu *iommu)
2892 {
2893 	if (!FEATURE_NUM_INT_REMAP_SUP_2K(amd_iommu_efr2))
2894 		return;
2895 
2896 	iommu_feature_set(iommu,
2897 			  CONTROL_NUM_INT_REMAP_MODE_2K,
2898 			  CONTROL_NUM_INT_REMAP_MODE_MASK,
2899 			  CONTROL_NUM_INT_REMAP_MODE);
2900 }
2901 
early_enable_iommu(struct amd_iommu * iommu)2902 static void early_enable_iommu(struct amd_iommu *iommu)
2903 {
2904 	iommu_disable(iommu);
2905 	iommu_init_flags(iommu);
2906 	iommu_set_device_table(iommu);
2907 	iommu_enable_command_buffer(iommu);
2908 	iommu_set_exclusion_range(iommu);
2909 	iommu_enable_gt(iommu);
2910 	iommu_enable_ga(iommu);
2911 	iommu_enable_xt(iommu);
2912 	iommu_enable_irtcachedis(iommu);
2913 	iommu_enable_2k_int(iommu);
2914 	iommu_enable(iommu);
2915 	amd_iommu_flush_all_caches(iommu);
2916 }
2917 
2918 /*
2919  * This function finally enables all IOMMUs found in the system after
2920  * they have been initialized.
2921  *
2922  * Or if in kdump kernel and IOMMUs are all pre-enabled, try to reuse
2923  * the old content of device table entries. Not this case or reuse failed,
2924  * just continue as normal kernel does.
2925  */
early_enable_iommus(void)2926 static void early_enable_iommus(void)
2927 {
2928 	struct amd_iommu *iommu;
2929 	struct amd_iommu_pci_seg *pci_seg;
2930 
2931 	if (!reuse_device_table()) {
2932 		/*
2933 		 * If come here because of failure in reusing device table from old
2934 		 * kernel with all IOMMUs enabled, print error message and try to
2935 		 * free allocated old_dev_tbl_cpy.
2936 		 */
2937 		if (amd_iommu_pre_enabled) {
2938 			pr_err("Failed to reuse DEV table from previous kernel.\n");
2939 			/*
2940 			 * Bail out early if unable to remap/reuse DEV table from
2941 			 * previous kernel if SNP enabled as IOMMU commands will
2942 			 * time out without DEV table and cause kdump boot panic.
2943 			 */
2944 			BUG_ON(check_feature(FEATURE_SNP));
2945 		}
2946 
2947 		for_each_pci_segment(pci_seg) {
2948 			if (pci_seg->old_dev_tbl_cpy != NULL) {
2949 				memunmap((void *)pci_seg->old_dev_tbl_cpy);
2950 				pci_seg->old_dev_tbl_cpy = NULL;
2951 			}
2952 		}
2953 
2954 		for_each_iommu(iommu) {
2955 			clear_translation_pre_enabled(iommu);
2956 			early_enable_iommu(iommu);
2957 		}
2958 	} else {
2959 		pr_info("Reused DEV table from previous kernel.\n");
2960 
2961 		for_each_pci_segment(pci_seg) {
2962 			iommu_free_pages(pci_seg->dev_table);
2963 			pci_seg->dev_table = pci_seg->old_dev_tbl_cpy;
2964 		}
2965 
2966 		for_each_iommu(iommu) {
2967 			iommu_disable_command_buffer(iommu);
2968 			iommu_disable_event_buffer(iommu);
2969 			iommu_disable_irtcachedis(iommu);
2970 			iommu_enable_command_buffer(iommu);
2971 			iommu_enable_ga(iommu);
2972 			iommu_enable_xt(iommu);
2973 			iommu_enable_irtcachedis(iommu);
2974 			iommu_enable_2k_int(iommu);
2975 			iommu_set_device_table(iommu);
2976 			amd_iommu_flush_all_caches(iommu);
2977 		}
2978 	}
2979 }
2980 
enable_iommus_ppr(void)2981 static void enable_iommus_ppr(void)
2982 {
2983 	struct amd_iommu *iommu;
2984 
2985 	if (!amd_iommu_gt_ppr_supported())
2986 		return;
2987 
2988 	for_each_iommu(iommu)
2989 		amd_iommu_enable_ppr_log(iommu);
2990 }
2991 
enable_iommus_vapic(void)2992 static void enable_iommus_vapic(void)
2993 {
2994 #ifdef CONFIG_IRQ_REMAP
2995 	u32 status, i;
2996 	struct amd_iommu *iommu;
2997 
2998 	for_each_iommu(iommu) {
2999 		/*
3000 		 * Disable GALog if already running. It could have been enabled
3001 		 * in the previous boot before kdump.
3002 		 */
3003 		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
3004 		if (!(status & MMIO_STATUS_GALOG_RUN_MASK))
3005 			continue;
3006 
3007 		iommu_feature_disable(iommu, CONTROL_GALOG_EN);
3008 		iommu_feature_disable(iommu, CONTROL_GAINT_EN);
3009 
3010 		/*
3011 		 * Need to set and poll check the GALOGRun bit to zero before
3012 		 * we can set/ modify GA Log registers safely.
3013 		 */
3014 		for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) {
3015 			status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
3016 			if (!(status & MMIO_STATUS_GALOG_RUN_MASK))
3017 				break;
3018 			udelay(10);
3019 		}
3020 
3021 		if (WARN_ON(i >= MMIO_STATUS_TIMEOUT))
3022 			return;
3023 	}
3024 
3025 	if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
3026 	    !check_feature(FEATURE_GAM_VAPIC)) {
3027 		amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
3028 		return;
3029 	}
3030 
3031 	if (amd_iommu_snp_en &&
3032 	    !FEATURE_SNPAVICSUP_GAM(amd_iommu_efr2)) {
3033 		pr_warn("Force to disable Virtual APIC due to SNP\n");
3034 		amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
3035 		return;
3036 	}
3037 
3038 	/* Enabling GAM and SNPAVIC support */
3039 	for_each_iommu(iommu) {
3040 		if (iommu_init_ga_log(iommu) ||
3041 		    iommu_ga_log_enable(iommu))
3042 			return;
3043 
3044 		iommu_feature_enable(iommu, CONTROL_GAM_EN);
3045 		if (amd_iommu_snp_en)
3046 			iommu_feature_enable(iommu, CONTROL_SNPAVIC_EN);
3047 	}
3048 
3049 	amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP);
3050 	pr_info("Virtual APIC enabled\n");
3051 #endif
3052 }
3053 
disable_iommus(void)3054 static void disable_iommus(void)
3055 {
3056 	struct amd_iommu *iommu;
3057 
3058 	for_each_iommu(iommu)
3059 		iommu_disable(iommu);
3060 
3061 #ifdef CONFIG_IRQ_REMAP
3062 	if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
3063 		amd_iommu_irq_ops.capability &= ~(1 << IRQ_POSTING_CAP);
3064 #endif
3065 }
3066 
3067 /*
3068  * Suspend/Resume support
3069  * disable suspend until real resume implemented
3070  */
3071 
amd_iommu_resume(void * data)3072 static void amd_iommu_resume(void *data)
3073 {
3074 	struct amd_iommu *iommu;
3075 
3076 	for_each_iommu(iommu)
3077 		iommu_apply_resume_quirks(iommu);
3078 
3079 	/* re-load the hardware */
3080 	for_each_iommu(iommu)
3081 		early_enable_iommu(iommu);
3082 
3083 	iommu_enable_event_buffer();
3084 	amd_iommu_enable_interrupts();
3085 }
3086 
amd_iommu_suspend(void * data)3087 static int amd_iommu_suspend(void *data)
3088 {
3089 	/* disable IOMMUs to go out of the way for BIOS */
3090 	disable_iommus();
3091 
3092 	return 0;
3093 }
3094 
3095 static const struct syscore_ops amd_iommu_syscore_ops = {
3096 	.suspend = amd_iommu_suspend,
3097 	.resume = amd_iommu_resume,
3098 };
3099 
3100 static struct syscore amd_iommu_syscore = {
3101 	.ops = &amd_iommu_syscore_ops,
3102 };
3103 
free_iommu_resources(void)3104 static void __init free_iommu_resources(void)
3105 {
3106 	free_iommu_all();
3107 	free_pci_segments();
3108 }
3109 
3110 /* SB IOAPIC is always on this device in AMD systems */
3111 #define IOAPIC_SB_DEVID		((0x00 << 8) | PCI_DEVFN(0x14, 0))
3112 
check_ioapic_information(void)3113 static bool __init check_ioapic_information(void)
3114 {
3115 	const char *fw_bug = FW_BUG;
3116 	bool ret, has_sb_ioapic;
3117 	int idx;
3118 
3119 	has_sb_ioapic = false;
3120 	ret           = false;
3121 
3122 	/*
3123 	 * If we have map overrides on the kernel command line the
3124 	 * messages in this function might not describe firmware bugs
3125 	 * anymore - so be careful
3126 	 */
3127 	if (cmdline_maps)
3128 		fw_bug = "";
3129 
3130 	for (idx = 0; idx < nr_ioapics; idx++) {
3131 		int devid, id = mpc_ioapic_id(idx);
3132 
3133 		devid = get_ioapic_devid(id);
3134 		if (devid < 0) {
3135 			pr_err("%s: IOAPIC[%d] not in IVRS table\n",
3136 				fw_bug, id);
3137 			ret = false;
3138 		} else if (devid == IOAPIC_SB_DEVID) {
3139 			has_sb_ioapic = true;
3140 			ret           = true;
3141 		}
3142 	}
3143 
3144 	if (!has_sb_ioapic) {
3145 		/*
3146 		 * We expect the SB IOAPIC to be listed in the IVRS
3147 		 * table. The system timer is connected to the SB IOAPIC
3148 		 * and if we don't have it in the list the system will
3149 		 * panic at boot time.  This situation usually happens
3150 		 * when the BIOS is buggy and provides us the wrong
3151 		 * device id for the IOAPIC in the system.
3152 		 */
3153 		pr_err("%s: No southbridge IOAPIC found\n", fw_bug);
3154 	}
3155 
3156 	if (!ret)
3157 		pr_err("Disabling interrupt remapping\n");
3158 
3159 	return ret;
3160 }
3161 
free_dma_resources(void)3162 static void __init free_dma_resources(void)
3163 {
3164 	amd_iommu_pdom_id_destroy();
3165 	free_unity_maps();
3166 }
3167 
ivinfo_init(void * ivrs)3168 static void __init ivinfo_init(void *ivrs)
3169 {
3170 	amd_iommu_ivinfo = *((u32 *)(ivrs + IOMMU_IVINFO_OFFSET));
3171 }
3172 
3173 /*
3174  * This is the hardware init function for AMD IOMMU in the system.
3175  * This function is called either from amd_iommu_init or from the interrupt
3176  * remapping setup code.
3177  *
3178  * This function basically parses the ACPI table for AMD IOMMU (IVRS)
3179  * four times:
3180  *
3181  *	1 pass) Discover the most comprehensive IVHD type to use.
3182  *
3183  *	2 pass) Find the highest PCI device id the driver has to handle.
3184  *		Upon this information the size of the data structures is
3185  *		determined that needs to be allocated.
3186  *
3187  *	3 pass) Initialize the data structures just allocated with the
3188  *		information in the ACPI table about available AMD IOMMUs
3189  *		in the system. It also maps the PCI devices in the
3190  *		system to specific IOMMUs
3191  *
3192  *	4 pass) After the basic data structures are allocated and
3193  *		initialized we update them with information about memory
3194  *		remapping requirements parsed out of the ACPI table in
3195  *		this last pass.
3196  *
3197  * After everything is set up the IOMMUs are enabled and the necessary
3198  * hotplug and suspend notifiers are registered.
3199  */
early_amd_iommu_init(void)3200 static int __init early_amd_iommu_init(void)
3201 {
3202 	struct acpi_table_header *ivrs_base;
3203 	int ret;
3204 	acpi_status status;
3205 	u8 efr_hats;
3206 
3207 	if (!amd_iommu_detected)
3208 		return -ENODEV;
3209 
3210 	status = acpi_get_table("IVRS", 0, &ivrs_base);
3211 	if (status == AE_NOT_FOUND)
3212 		return -ENODEV;
3213 	else if (ACPI_FAILURE(status)) {
3214 		const char *err = acpi_format_exception(status);
3215 		pr_err("IVRS table error: %s\n", err);
3216 		return -EINVAL;
3217 	}
3218 
3219 	if (!boot_cpu_has(X86_FEATURE_CX16)) {
3220 		pr_err("Failed to initialize. The CMPXCHG16B feature is required.\n");
3221 		ret = -EINVAL;
3222 		goto out;
3223 	}
3224 
3225 	/*
3226 	 * Validate checksum here so we don't need to do it when
3227 	 * we actually parse the table
3228 	 */
3229 	ret = check_ivrs_checksum(ivrs_base);
3230 	if (ret)
3231 		goto out;
3232 
3233 	ivinfo_init(ivrs_base);
3234 
3235 	amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base);
3236 	DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type);
3237 
3238 	/*
3239 	 * now the data structures are allocated and basically initialized
3240 	 * start the real acpi table scan
3241 	 */
3242 	ret = init_iommu_all(ivrs_base);
3243 	if (ret)
3244 		goto out;
3245 
3246 	/* 5 level guest page table */
3247 	if (cpu_feature_enabled(X86_FEATURE_LA57) &&
3248 	    FIELD_GET(FEATURE_GATS, amd_iommu_efr) == GUEST_PGTABLE_5_LEVEL)
3249 		amd_iommu_gpt_level = PAGE_MODE_5_LEVEL;
3250 
3251 	efr_hats = FIELD_GET(FEATURE_HATS, amd_iommu_efr);
3252 	if (efr_hats != 0x3) {
3253 		/*
3254 		 * efr[HATS] bits specify the maximum host translation level
3255 		 * supported, with LEVEL 4 being initial max level.
3256 		 */
3257 		amd_iommu_hpt_level = efr_hats + PAGE_MODE_4_LEVEL;
3258 	} else {
3259 		pr_warn_once(FW_BUG "Disable host address translation due to invalid translation level (%#x).\n",
3260 			     efr_hats);
3261 		amd_iommu_hatdis = true;
3262 	}
3263 
3264 	if (amd_iommu_pgtable == PD_MODE_V2) {
3265 		if (!amd_iommu_v2_pgtbl_supported()) {
3266 			pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n");
3267 			amd_iommu_pgtable = PD_MODE_V1;
3268 		}
3269 	}
3270 
3271 	if (amd_iommu_hatdis) {
3272 		/*
3273 		 * Host (v1) page table is not available. Attempt to use
3274 		 * Guest (v2) page table.
3275 		 */
3276 		if (amd_iommu_v2_pgtbl_supported())
3277 			amd_iommu_pgtable = PD_MODE_V2;
3278 		else
3279 			amd_iommu_pgtable = PD_MODE_NONE;
3280 	}
3281 
3282 	/* Disable any previously enabled IOMMUs */
3283 	if (!is_kdump_kernel() || amd_iommu_disabled)
3284 		disable_iommus();
3285 
3286 	if (amd_iommu_irq_remap)
3287 		amd_iommu_irq_remap = check_ioapic_information();
3288 
3289 	if (amd_iommu_irq_remap) {
3290 		struct amd_iommu_pci_seg *pci_seg;
3291 		ret = -ENOMEM;
3292 		for_each_pci_segment(pci_seg) {
3293 			if (alloc_irq_lookup_table(pci_seg))
3294 				goto out;
3295 		}
3296 	}
3297 
3298 	ret = init_memory_definitions(ivrs_base);
3299 	if (ret)
3300 		goto out;
3301 
3302 	/* init the device table */
3303 	init_device_table();
3304 
3305 out:
3306 	/* Don't leak any ACPI memory */
3307 	acpi_put_table(ivrs_base);
3308 
3309 	return ret;
3310 }
3311 
amd_iommu_enable_interrupts(void)3312 static int amd_iommu_enable_interrupts(void)
3313 {
3314 	struct amd_iommu *iommu;
3315 	int ret = 0;
3316 
3317 	for_each_iommu(iommu) {
3318 		ret = iommu_init_irq(iommu);
3319 		if (ret)
3320 			goto out;
3321 	}
3322 
3323 	/*
3324 	 * Interrupt handler is ready to process interrupts. Enable
3325 	 * PPR and GA log interrupt for all IOMMUs.
3326 	 */
3327 	enable_iommus_vapic();
3328 	enable_iommus_ppr();
3329 
3330 out:
3331 	return ret;
3332 }
3333 
detect_ivrs(void)3334 static bool __init detect_ivrs(void)
3335 {
3336 	struct acpi_table_header *ivrs_base;
3337 	acpi_status status;
3338 	int i;
3339 
3340 	status = acpi_get_table("IVRS", 0, &ivrs_base);
3341 	if (status == AE_NOT_FOUND)
3342 		return false;
3343 	else if (ACPI_FAILURE(status)) {
3344 		const char *err = acpi_format_exception(status);
3345 		pr_err("IVRS table error: %s\n", err);
3346 		return false;
3347 	}
3348 
3349 	acpi_put_table(ivrs_base);
3350 
3351 	if (amd_iommu_force_enable)
3352 		goto out;
3353 
3354 	/* Don't use IOMMU if there is Stoney Ridge graphics */
3355 	for (i = 0; i < 32; i++) {
3356 		u32 pci_id;
3357 
3358 		pci_id = read_pci_config(0, i, 0, 0);
3359 		if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) {
3360 			pr_info("Disable IOMMU on Stoney Ridge\n");
3361 			return false;
3362 		}
3363 	}
3364 
3365 out:
3366 	/* Make sure ACS will be enabled during PCI probe */
3367 	pci_request_acs();
3368 
3369 	return true;
3370 }
3371 
iommu_snp_enable(void)3372 static __init void iommu_snp_enable(void)
3373 {
3374 #ifdef CONFIG_KVM_AMD_SEV
3375 	if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
3376 		return;
3377 	/*
3378 	 * The SNP support requires that IOMMU must be enabled, and is
3379 	 * configured with V1 page table (DTE[Mode] = 0 is not supported).
3380 	 */
3381 	if (no_iommu || iommu_default_passthrough()) {
3382 		pr_warn("SNP: IOMMU disabled or configured in passthrough mode, SNP cannot be supported.\n");
3383 		goto disable_snp;
3384 	}
3385 
3386 	if (amd_iommu_pgtable != PD_MODE_V1) {
3387 		pr_warn("SNP: IOMMU is configured with V2 page table mode, SNP cannot be supported.\n");
3388 		goto disable_snp;
3389 	}
3390 
3391 	amd_iommu_snp_en = check_feature(FEATURE_SNP);
3392 	if (!amd_iommu_snp_en) {
3393 		pr_warn("SNP: IOMMU SNP feature not enabled, SNP cannot be supported.\n");
3394 		goto disable_snp;
3395 	}
3396 
3397 	/*
3398 	 * Enable host SNP support once SNP support is checked on IOMMU.
3399 	 */
3400 	if (snp_rmptable_init()) {
3401 		pr_warn("SNP: RMP initialization failed, SNP cannot be supported.\n");
3402 		goto disable_snp;
3403 	}
3404 
3405 	pr_info("IOMMU SNP support enabled.\n");
3406 	return;
3407 
3408 disable_snp:
3409 	cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
3410 #endif
3411 }
3412 
amd_iommu_apply_erratum_snp(void)3413 static void amd_iommu_apply_erratum_snp(void)
3414 {
3415 #ifdef CONFIG_KVM_AMD_SEV
3416 	if (!amd_iommu_snp_en)
3417 		return;
3418 
3419 	/* Errata fix for Family 0x19 */
3420 	if (boot_cpu_data.x86 != 0x19)
3421 		return;
3422 
3423 	/* Set event log buffer size to max */
3424 	amd_iommu_evtlog_size = EVTLOG_SIZE_MAX;
3425 	pr_info("Applying erratum: Increase Event log size to 0x%x\n",
3426 		amd_iommu_evtlog_size);
3427 
3428 	/*
3429 	 * Set PPR log buffer size to max.
3430 	 * (Family 0x19, model < 0x10 doesn't support PPR when SNP is enabled).
3431 	 */
3432 	if (boot_cpu_data.x86_model >= 0x10) {
3433 		amd_iommu_pprlog_size = PPRLOG_SIZE_MAX;
3434 		pr_info("Applying erratum: Increase PPR log size to 0x%x\n",
3435 			amd_iommu_pprlog_size);
3436 	}
3437 #endif
3438 }
3439 
3440 /****************************************************************************
3441  *
3442  * AMD IOMMU Initialization State Machine
3443  *
3444  ****************************************************************************/
3445 
state_next(void)3446 static int __init state_next(void)
3447 {
3448 	int ret = 0;
3449 
3450 	switch (init_state) {
3451 	case IOMMU_START_STATE:
3452 		if (!detect_ivrs()) {
3453 			init_state	= IOMMU_NOT_FOUND;
3454 			ret		= -ENODEV;
3455 		} else {
3456 			init_state	= IOMMU_IVRS_DETECTED;
3457 		}
3458 		break;
3459 	case IOMMU_IVRS_DETECTED:
3460 		if (amd_iommu_disabled) {
3461 			init_state = IOMMU_CMDLINE_DISABLED;
3462 			ret = -EINVAL;
3463 		} else {
3464 			ret = early_amd_iommu_init();
3465 			init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
3466 		}
3467 		break;
3468 	case IOMMU_ACPI_FINISHED:
3469 		early_enable_iommus();
3470 		x86_platform.iommu_shutdown = disable_iommus;
3471 		init_state = IOMMU_ENABLED;
3472 		break;
3473 	case IOMMU_ENABLED:
3474 		register_syscore(&amd_iommu_syscore);
3475 		iommu_snp_enable();
3476 
3477 		amd_iommu_apply_erratum_snp();
3478 
3479 		/* Allocate/enable event log buffer */
3480 		if (is_kdump_kernel())
3481 			ret = remap_event_buffer();
3482 		else
3483 			ret = alloc_event_buffer();
3484 
3485 		if (ret) {
3486 			init_state = IOMMU_INIT_ERROR;
3487 			break;
3488 		}
3489 		iommu_enable_event_buffer();
3490 
3491 		ret = amd_iommu_init_pci();
3492 		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT;
3493 		break;
3494 	case IOMMU_PCI_INIT:
3495 		ret = amd_iommu_enable_interrupts();
3496 		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN;
3497 		break;
3498 	case IOMMU_INTERRUPTS_EN:
3499 		init_state = IOMMU_INITIALIZED;
3500 		break;
3501 	case IOMMU_INITIALIZED:
3502 		/* Nothing to do */
3503 		break;
3504 	case IOMMU_NOT_FOUND:
3505 	case IOMMU_INIT_ERROR:
3506 	case IOMMU_CMDLINE_DISABLED:
3507 		/* Error states => do nothing */
3508 		ret = -EINVAL;
3509 		break;
3510 	default:
3511 		/* Unknown state */
3512 		BUG();
3513 	}
3514 
3515 	if (ret) {
3516 		free_dma_resources();
3517 		if (!irq_remapping_enabled) {
3518 			disable_iommus();
3519 			free_iommu_resources();
3520 		} else {
3521 			struct amd_iommu *iommu;
3522 			struct amd_iommu_pci_seg *pci_seg;
3523 
3524 			for_each_pci_segment(pci_seg)
3525 				uninit_device_table_dma(pci_seg);
3526 
3527 			for_each_iommu(iommu)
3528 				amd_iommu_flush_all_caches(iommu);
3529 		}
3530 	}
3531 	return ret;
3532 }
3533 
iommu_go_to_state(enum iommu_init_state state)3534 static int __init iommu_go_to_state(enum iommu_init_state state)
3535 {
3536 	int ret = -EINVAL;
3537 
3538 	while (init_state != state) {
3539 		if (init_state == IOMMU_NOT_FOUND         ||
3540 		    init_state == IOMMU_INIT_ERROR        ||
3541 		    init_state == IOMMU_CMDLINE_DISABLED)
3542 			break;
3543 		ret = state_next();
3544 	}
3545 
3546 	/*
3547 	 * SNP platform initilazation requires IOMMUs to be fully configured.
3548 	 * If the SNP support on IOMMUs has NOT been checked, simply mark SNP
3549 	 * as unsupported. If the SNP support on IOMMUs has been checked and
3550 	 * host SNP support enabled but RMP enforcement has not been enabled
3551 	 * in IOMMUs, then the system is in a half-baked state, but can limp
3552 	 * along as all memory should be Hypervisor-Owned in the RMP. WARN,
3553 	 * but leave SNP as "supported" to avoid confusing the kernel.
3554 	 */
3555 	if (ret && cc_platform_has(CC_ATTR_HOST_SEV_SNP) &&
3556 	    !WARN_ON_ONCE(amd_iommu_snp_en))
3557 		cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
3558 
3559 	return ret;
3560 }
3561 
3562 #ifdef CONFIG_IRQ_REMAP
amd_iommu_prepare(void)3563 int __init amd_iommu_prepare(void)
3564 {
3565 	int ret;
3566 
3567 	amd_iommu_irq_remap = true;
3568 
3569 	ret = iommu_go_to_state(IOMMU_ACPI_FINISHED);
3570 	if (ret) {
3571 		amd_iommu_irq_remap = false;
3572 		return ret;
3573 	}
3574 
3575 	return amd_iommu_irq_remap ? 0 : -ENODEV;
3576 }
3577 
amd_iommu_enable(void)3578 int __init amd_iommu_enable(void)
3579 {
3580 	int ret;
3581 
3582 	ret = iommu_go_to_state(IOMMU_ENABLED);
3583 	if (ret)
3584 		return ret;
3585 
3586 	irq_remapping_enabled = 1;
3587 	return amd_iommu_xt_mode;
3588 }
3589 
amd_iommu_disable(void)3590 void amd_iommu_disable(void)
3591 {
3592 	amd_iommu_suspend(NULL);
3593 }
3594 
amd_iommu_reenable(int mode)3595 int amd_iommu_reenable(int mode)
3596 {
3597 	amd_iommu_resume(NULL);
3598 
3599 	return 0;
3600 }
3601 
amd_iommu_enable_faulting(unsigned int cpu)3602 int amd_iommu_enable_faulting(unsigned int cpu)
3603 {
3604 	/* We enable MSI later when PCI is initialized */
3605 	return 0;
3606 }
3607 #endif
3608 
3609 /*
3610  * This is the core init function for AMD IOMMU hardware in the system.
3611  * This function is called from the generic x86 DMA layer initialization
3612  * code.
3613  */
amd_iommu_init(void)3614 static int __init amd_iommu_init(void)
3615 {
3616 	int ret;
3617 
3618 	ret = iommu_go_to_state(IOMMU_INITIALIZED);
3619 #ifdef CONFIG_GART_IOMMU
3620 	if (ret && list_empty(&amd_iommu_list)) {
3621 		/*
3622 		 * We failed to initialize the AMD IOMMU - try fallback
3623 		 * to GART if possible.
3624 		 */
3625 		gart_iommu_init();
3626 	}
3627 #endif
3628 
3629 	if (!ret)
3630 		amd_iommu_debugfs_setup();
3631 
3632 	return ret;
3633 }
3634 
amd_iommu_sme_check(void)3635 static bool amd_iommu_sme_check(void)
3636 {
3637 	if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) ||
3638 	    (boot_cpu_data.x86 != 0x17))
3639 		return true;
3640 
3641 	/* For Fam17h, a specific level of support is required */
3642 	if (boot_cpu_data.microcode >= 0x08001205)
3643 		return true;
3644 
3645 	if ((boot_cpu_data.microcode >= 0x08001126) &&
3646 	    (boot_cpu_data.microcode <= 0x080011ff))
3647 		return true;
3648 
3649 	pr_notice("IOMMU not currently supported when SME is active\n");
3650 
3651 	return false;
3652 }
3653 
3654 /****************************************************************************
3655  *
3656  * Early detect code. This code runs at IOMMU detection time in the DMA
3657  * layer. It just looks if there is an IVRS ACPI table to detect AMD
3658  * IOMMUs
3659  *
3660  ****************************************************************************/
amd_iommu_detect(void)3661 void __init amd_iommu_detect(void)
3662 {
3663 	int ret;
3664 
3665 	if (no_iommu || (iommu_detected && !gart_iommu_aperture))
3666 		goto disable_snp;
3667 
3668 	if (!amd_iommu_sme_check())
3669 		goto disable_snp;
3670 
3671 	ret = iommu_go_to_state(IOMMU_IVRS_DETECTED);
3672 	if (ret)
3673 		goto disable_snp;
3674 
3675 	amd_iommu_detected = true;
3676 	iommu_detected = 1;
3677 	x86_init.iommu.iommu_init = amd_iommu_init;
3678 	return;
3679 
3680 disable_snp:
3681 	if (cc_platform_has(CC_ATTR_HOST_SEV_SNP))
3682 		cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
3683 }
3684 
3685 /****************************************************************************
3686  *
3687  * Parsing functions for the AMD IOMMU specific kernel command line
3688  * options.
3689  *
3690  ****************************************************************************/
3691 
parse_amd_iommu_dump(char * str)3692 static int __init parse_amd_iommu_dump(char *str)
3693 {
3694 	amd_iommu_dump = true;
3695 
3696 	return 1;
3697 }
3698 
parse_amd_iommu_intr(char * str)3699 static int __init parse_amd_iommu_intr(char *str)
3700 {
3701 	for (; *str; ++str) {
3702 		if (strncmp(str, "legacy", 6) == 0) {
3703 			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
3704 			break;
3705 		}
3706 		if (strncmp(str, "vapic", 5) == 0) {
3707 			amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
3708 			break;
3709 		}
3710 	}
3711 	return 1;
3712 }
3713 
parse_amd_iommu_options(char * str)3714 static int __init parse_amd_iommu_options(char *str)
3715 {
3716 	if (!str)
3717 		return -EINVAL;
3718 
3719 	while (*str) {
3720 		if (strncmp(str, "fullflush", 9) == 0) {
3721 			pr_warn("amd_iommu=fullflush deprecated; use iommu.strict=1 instead\n");
3722 			iommu_set_dma_strict();
3723 		} else if (strncmp(str, "force_enable", 12) == 0) {
3724 			amd_iommu_force_enable = true;
3725 		} else if (strncmp(str, "off", 3) == 0) {
3726 			amd_iommu_disabled = true;
3727 		} else if (strncmp(str, "force_isolation", 15) == 0) {
3728 			amd_iommu_force_isolation = true;
3729 		} else if (strncmp(str, "pgtbl_v1", 8) == 0) {
3730 			amd_iommu_pgtable = PD_MODE_V1;
3731 		} else if (strncmp(str, "pgtbl_v2", 8) == 0) {
3732 			amd_iommu_pgtable = PD_MODE_V2;
3733 		} else if (strncmp(str, "irtcachedis", 11) == 0) {
3734 			amd_iommu_irtcachedis = true;
3735 		} else if (strncmp(str, "nohugepages", 11) == 0) {
3736 			pr_info("Restricting V1 page-sizes to 4KiB");
3737 			amd_iommu_pgsize_bitmap = AMD_IOMMU_PGSIZES_4K;
3738 		} else if (strncmp(str, "v2_pgsizes_only", 15) == 0) {
3739 			pr_info("Restricting V1 page-sizes to 4KiB/2MiB/1GiB");
3740 			amd_iommu_pgsize_bitmap = AMD_IOMMU_PGSIZES_V2;
3741 		} else {
3742 			pr_notice("Unknown option - '%s'\n", str);
3743 		}
3744 
3745 		str += strcspn(str, ",");
3746 		while (*str == ',')
3747 			str++;
3748 	}
3749 
3750 	return 1;
3751 }
3752 
parse_ivrs_ioapic(char * str)3753 static int __init parse_ivrs_ioapic(char *str)
3754 {
3755 	u32 seg = 0, bus, dev, fn;
3756 	int id, i;
3757 	u32 devid;
3758 
3759 	if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3760 	    sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5)
3761 		goto found;
3762 
3763 	if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3764 	    sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) {
3765 		pr_warn("ivrs_ioapic%s option format deprecated; use ivrs_ioapic=%d@%04x:%02x:%02x.%d instead\n",
3766 			str, id, seg, bus, dev, fn);
3767 		goto found;
3768 	}
3769 
3770 	pr_err("Invalid command line: ivrs_ioapic%s\n", str);
3771 	return 1;
3772 
3773 found:
3774 	if (early_ioapic_map_size == EARLY_MAP_SIZE) {
3775 		pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n",
3776 			str);
3777 		return 1;
3778 	}
3779 
3780 	devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
3781 
3782 	cmdline_maps			= true;
3783 	i				= early_ioapic_map_size++;
3784 	early_ioapic_map[i].id		= id;
3785 	early_ioapic_map[i].devid	= devid;
3786 	early_ioapic_map[i].cmd_line	= true;
3787 
3788 	return 1;
3789 }
3790 
parse_ivrs_hpet(char * str)3791 static int __init parse_ivrs_hpet(char *str)
3792 {
3793 	u32 seg = 0, bus, dev, fn;
3794 	int id, i;
3795 	u32 devid;
3796 
3797 	if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3798 	    sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5)
3799 		goto found;
3800 
3801 	if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3802 	    sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) {
3803 		pr_warn("ivrs_hpet%s option format deprecated; use ivrs_hpet=%d@%04x:%02x:%02x.%d instead\n",
3804 			str, id, seg, bus, dev, fn);
3805 		goto found;
3806 	}
3807 
3808 	pr_err("Invalid command line: ivrs_hpet%s\n", str);
3809 	return 1;
3810 
3811 found:
3812 	if (early_hpet_map_size == EARLY_MAP_SIZE) {
3813 		pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n",
3814 			str);
3815 		return 1;
3816 	}
3817 
3818 	devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
3819 
3820 	cmdline_maps			= true;
3821 	i				= early_hpet_map_size++;
3822 	early_hpet_map[i].id		= id;
3823 	early_hpet_map[i].devid		= devid;
3824 	early_hpet_map[i].cmd_line	= true;
3825 
3826 	return 1;
3827 }
3828 
3829 #define ACPIID_LEN (ACPIHID_UID_LEN + ACPIHID_HID_LEN)
3830 
parse_ivrs_acpihid(char * str)3831 static int __init parse_ivrs_acpihid(char *str)
3832 {
3833 	u32 seg = 0, bus, dev, fn;
3834 	char *hid, *uid, *p, *addr;
3835 	char acpiid[ACPIID_LEN + 1] = { }; /* size with NULL terminator */
3836 	int i;
3837 
3838 	addr = strchr(str, '@');
3839 	if (!addr) {
3840 		addr = strchr(str, '=');
3841 		if (!addr)
3842 			goto not_found;
3843 
3844 		++addr;
3845 
3846 		if (strlen(addr) > ACPIID_LEN)
3847 			goto not_found;
3848 
3849 		if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 ||
3850 		    sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) {
3851 			pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n",
3852 				str, acpiid, seg, bus, dev, fn);
3853 			goto found;
3854 		}
3855 		goto not_found;
3856 	}
3857 
3858 	/* We have the '@', make it the terminator to get just the acpiid */
3859 	*addr++ = 0;
3860 
3861 	if (strlen(str) > ACPIID_LEN)
3862 		goto not_found;
3863 
3864 	if (sscanf(str, "=%s", acpiid) != 1)
3865 		goto not_found;
3866 
3867 	if (sscanf(addr, "%x:%x.%x", &bus, &dev, &fn) == 3 ||
3868 	    sscanf(addr, "%x:%x:%x.%x", &seg, &bus, &dev, &fn) == 4)
3869 		goto found;
3870 
3871 not_found:
3872 	pr_err("Invalid command line: ivrs_acpihid%s\n", str);
3873 	return 1;
3874 
3875 found:
3876 	p = acpiid;
3877 	hid = strsep(&p, ":");
3878 	uid = p;
3879 
3880 	if (!hid || !(*hid) || !uid) {
3881 		pr_err("Invalid command line: hid or uid\n");
3882 		return 1;
3883 	}
3884 
3885 	/*
3886 	 * Ignore leading zeroes after ':', so e.g., AMDI0095:00
3887 	 * will match AMDI0095:0 in the second strcmp in acpi_dev_hid_uid_match
3888 	 */
3889 	while (*uid == '0' && *(uid + 1))
3890 		uid++;
3891 
3892 	if (strlen(hid) >= ACPIHID_HID_LEN) {
3893 		pr_err("Invalid command line: hid is too long\n");
3894 		return 1;
3895 	} else if (strlen(uid) >= ACPIHID_UID_LEN) {
3896 		pr_err("Invalid command line: uid is too long\n");
3897 		return 1;
3898 	}
3899 
3900 	i = early_acpihid_map_size++;
3901 	memcpy(early_acpihid_map[i].hid, hid, strlen(hid));
3902 	memcpy(early_acpihid_map[i].uid, uid, strlen(uid));
3903 	early_acpihid_map[i].devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
3904 	early_acpihid_map[i].cmd_line	= true;
3905 
3906 	return 1;
3907 }
3908 
3909 __setup("amd_iommu_dump",	parse_amd_iommu_dump);
3910 __setup("amd_iommu=",		parse_amd_iommu_options);
3911 __setup("amd_iommu_intr=",	parse_amd_iommu_intr);
3912 __setup("ivrs_ioapic",		parse_ivrs_ioapic);
3913 __setup("ivrs_hpet",		parse_ivrs_hpet);
3914 __setup("ivrs_acpihid",		parse_ivrs_acpihid);
3915 
amd_iommu_pasid_supported(void)3916 bool amd_iommu_pasid_supported(void)
3917 {
3918 	/* CPU page table size should match IOMMU guest page table size */
3919 	if (cpu_feature_enabled(X86_FEATURE_LA57) &&
3920 	    amd_iommu_gpt_level != PAGE_MODE_5_LEVEL)
3921 		return false;
3922 
3923 	/*
3924 	 * Since DTE[Mode]=0 is prohibited on SNP-enabled system
3925 	 * (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without
3926 	 * setting up IOMMUv1 page table.
3927 	 */
3928 	return amd_iommu_gt_ppr_supported() && !amd_iommu_snp_en;
3929 }
3930 
get_amd_iommu(unsigned int idx)3931 struct amd_iommu *get_amd_iommu(unsigned int idx)
3932 {
3933 	unsigned int i = 0;
3934 	struct amd_iommu *iommu;
3935 
3936 	for_each_iommu(iommu)
3937 		if (i++ == idx)
3938 			return iommu;
3939 	return NULL;
3940 }
3941 
3942 /****************************************************************************
3943  *
3944  * IOMMU EFR Performance Counter support functionality. This code allows
3945  * access to the IOMMU PC functionality.
3946  *
3947  ****************************************************************************/
3948 
amd_iommu_pc_get_max_banks(unsigned int idx)3949 u8 amd_iommu_pc_get_max_banks(unsigned int idx)
3950 {
3951 	struct amd_iommu *iommu = get_amd_iommu(idx);
3952 
3953 	if (iommu)
3954 		return iommu->max_banks;
3955 
3956 	return 0;
3957 }
3958 
amd_iommu_pc_supported(void)3959 bool amd_iommu_pc_supported(void)
3960 {
3961 	return amd_iommu_pc_present;
3962 }
3963 
amd_iommu_pc_get_max_counters(unsigned int idx)3964 u8 amd_iommu_pc_get_max_counters(unsigned int idx)
3965 {
3966 	struct amd_iommu *iommu = get_amd_iommu(idx);
3967 
3968 	if (iommu)
3969 		return iommu->max_counters;
3970 
3971 	return 0;
3972 }
3973 
iommu_pc_get_set_reg(struct amd_iommu * iommu,u8 bank,u8 cntr,u8 fxn,u64 * value,bool is_write)3974 static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
3975 				u8 fxn, u64 *value, bool is_write)
3976 {
3977 	u32 offset;
3978 	u32 max_offset_lim;
3979 
3980 	/* Make sure the IOMMU PC resource is available */
3981 	if (!amd_iommu_pc_present)
3982 		return -ENODEV;
3983 
3984 	/* Check for valid iommu and pc register indexing */
3985 	if (WARN_ON(!iommu || (fxn > 0x28) || (fxn & 7)))
3986 		return -ENODEV;
3987 
3988 	offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn);
3989 
3990 	/* Limit the offset to the hw defined mmio region aperture */
3991 	max_offset_lim = (u32)(((0x40 | iommu->max_banks) << 12) |
3992 				(iommu->max_counters << 8) | 0x28);
3993 	if ((offset < MMIO_CNTR_REG_OFFSET) ||
3994 	    (offset > max_offset_lim))
3995 		return -EINVAL;
3996 
3997 	if (is_write) {
3998 		u64 val = *value & GENMASK_ULL(47, 0);
3999 
4000 		writel((u32)val, iommu->mmio_base + offset);
4001 		writel((val >> 32), iommu->mmio_base + offset + 4);
4002 	} else {
4003 		*value = readl(iommu->mmio_base + offset + 4);
4004 		*value <<= 32;
4005 		*value |= readl(iommu->mmio_base + offset);
4006 		*value &= GENMASK_ULL(47, 0);
4007 	}
4008 
4009 	return 0;
4010 }
4011 
amd_iommu_pc_get_reg(struct amd_iommu * iommu,u8 bank,u8 cntr,u8 fxn,u64 * value)4012 int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
4013 {
4014 	if (!iommu)
4015 		return -EINVAL;
4016 
4017 	return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false);
4018 }
4019 
amd_iommu_pc_set_reg(struct amd_iommu * iommu,u8 bank,u8 cntr,u8 fxn,u64 * value)4020 int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
4021 {
4022 	if (!iommu)
4023 		return -EINVAL;
4024 
4025 	return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true);
4026 }
4027 
4028 #ifdef CONFIG_KVM_AMD_SEV
iommu_page_make_shared(void * page)4029 static int iommu_page_make_shared(void *page)
4030 {
4031 	unsigned long paddr, pfn;
4032 
4033 	paddr = iommu_virt_to_phys(page);
4034 	/* Cbit maybe set in the paddr */
4035 	pfn = __sme_clr(paddr) >> PAGE_SHIFT;
4036 
4037 	if (!(pfn % PTRS_PER_PMD)) {
4038 		int ret, level;
4039 		bool assigned;
4040 
4041 		ret = snp_lookup_rmpentry(pfn, &assigned, &level);
4042 		if (ret) {
4043 			pr_warn("IOMMU PFN %lx RMP lookup failed, ret %d\n", pfn, ret);
4044 			return ret;
4045 		}
4046 
4047 		if (!assigned) {
4048 			pr_warn("IOMMU PFN %lx not assigned in RMP table\n", pfn);
4049 			return -EINVAL;
4050 		}
4051 
4052 		if (level > PG_LEVEL_4K) {
4053 			ret = psmash(pfn);
4054 			if (!ret)
4055 				goto done;
4056 
4057 			pr_warn("PSMASH failed for IOMMU PFN %lx huge RMP entry, ret: %d, level: %d\n",
4058 				pfn, ret, level);
4059 			return ret;
4060 		}
4061 	}
4062 
4063 done:
4064 	return rmp_make_shared(pfn, PG_LEVEL_4K);
4065 }
4066 
iommu_make_shared(void * va,size_t size)4067 static int iommu_make_shared(void *va, size_t size)
4068 {
4069 	void *page;
4070 	int ret;
4071 
4072 	if (!va)
4073 		return 0;
4074 
4075 	for (page = va; page < (va + size); page += PAGE_SIZE) {
4076 		ret = iommu_page_make_shared(page);
4077 		if (ret)
4078 			return ret;
4079 	}
4080 
4081 	return 0;
4082 }
4083 
amd_iommu_snp_disable(void)4084 int amd_iommu_snp_disable(void)
4085 {
4086 	struct amd_iommu *iommu;
4087 	int ret;
4088 
4089 	if (!amd_iommu_snp_en)
4090 		return 0;
4091 
4092 	for_each_iommu(iommu) {
4093 		ret = iommu_make_shared(iommu->evt_buf, amd_iommu_evtlog_size);
4094 		if (ret)
4095 			return ret;
4096 
4097 		ret = iommu_make_shared(iommu->ppr_log, amd_iommu_pprlog_size);
4098 		if (ret)
4099 			return ret;
4100 
4101 		ret = iommu_make_shared((void *)iommu->cmd_sem, PAGE_SIZE);
4102 		if (ret)
4103 			return ret;
4104 	}
4105 
4106 	return 0;
4107 }
4108 EXPORT_SYMBOL_GPL(amd_iommu_snp_disable);
4109 
amd_iommu_sev_tio_supported(void)4110 bool amd_iommu_sev_tio_supported(void)
4111 {
4112 	return check_feature2(FEATURE_SEVSNPIO_SUP);
4113 }
4114 EXPORT_SYMBOL_GPL(amd_iommu_sev_tio_supported);
4115 #endif
4116