1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
4 * Author: Joerg Roedel <jroedel@suse.de>
5 * Leo Duran <leo.duran@amd.com>
6 */
7
8 #define pr_fmt(fmt) "AMD-Vi: " fmt
9 #define dev_fmt(fmt) pr_fmt(fmt)
10
11 #include <linux/pci.h>
12 #include <linux/acpi.h>
13 #include <linux/list.h>
14 #include <linux/bitmap.h>
15 #include <linux/syscore_ops.h>
16 #include <linux/interrupt.h>
17 #include <linux/msi.h>
18 #include <linux/irq.h>
19 #include <linux/amd-iommu.h>
20 #include <linux/export.h>
21 #include <linux/kmemleak.h>
22 #include <linux/cc_platform.h>
23 #include <linux/iopoll.h>
24 #include <asm/pci-direct.h>
25 #include <asm/iommu.h>
26 #include <asm/apic.h>
27 #include <asm/gart.h>
28 #include <asm/x86_init.h>
29 #include <asm/io_apic.h>
30 #include <asm/irq_remapping.h>
31 #include <asm/set_memory.h>
32 #include <asm/sev.h>
33
34 #include <linux/crash_dump.h>
35
36 #include "amd_iommu.h"
37 #include "../irq_remapping.h"
38 #include "../iommu-pages.h"
39
40 /*
41 * definitions for the ACPI scanning code
42 */
43 #define IVRS_HEADER_LENGTH 48
44
45 #define ACPI_IVHD_TYPE_MAX_SUPPORTED 0x40
46 #define ACPI_IVMD_TYPE_ALL 0x20
47 #define ACPI_IVMD_TYPE 0x21
48 #define ACPI_IVMD_TYPE_RANGE 0x22
49
50 #define IVHD_DEV_ALL 0x01
51 #define IVHD_DEV_SELECT 0x02
52 #define IVHD_DEV_SELECT_RANGE_START 0x03
53 #define IVHD_DEV_RANGE_END 0x04
54 #define IVHD_DEV_ALIAS 0x42
55 #define IVHD_DEV_ALIAS_RANGE 0x43
56 #define IVHD_DEV_EXT_SELECT 0x46
57 #define IVHD_DEV_EXT_SELECT_RANGE 0x47
58 #define IVHD_DEV_SPECIAL 0x48
59 #define IVHD_DEV_ACPI_HID 0xf0
60
61 #define UID_NOT_PRESENT 0
62 #define UID_IS_INTEGER 1
63 #define UID_IS_CHARACTER 2
64
65 #define IVHD_SPECIAL_IOAPIC 1
66 #define IVHD_SPECIAL_HPET 2
67
68 #define IVHD_FLAG_HT_TUN_EN_MASK 0x01
69 #define IVHD_FLAG_PASSPW_EN_MASK 0x02
70 #define IVHD_FLAG_RESPASSPW_EN_MASK 0x04
71 #define IVHD_FLAG_ISOC_EN_MASK 0x08
72
73 #define IVMD_FLAG_EXCL_RANGE 0x08
74 #define IVMD_FLAG_IW 0x04
75 #define IVMD_FLAG_IR 0x02
76 #define IVMD_FLAG_UNITY_MAP 0x01
77
78 #define ACPI_DEVFLAG_INITPASS 0x01
79 #define ACPI_DEVFLAG_EXTINT 0x02
80 #define ACPI_DEVFLAG_NMI 0x04
81 #define ACPI_DEVFLAG_SYSMGT1 0x10
82 #define ACPI_DEVFLAG_SYSMGT2 0x20
83 #define ACPI_DEVFLAG_LINT0 0x40
84 #define ACPI_DEVFLAG_LINT1 0x80
85 #define ACPI_DEVFLAG_ATSDIS 0x10000000
86
87 #define IVRS_GET_SBDF_ID(seg, bus, dev, fn) (((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \
88 | ((dev & 0x1f) << 3) | (fn & 0x7))
89
90 /*
91 * ACPI table definitions
92 *
93 * These data structures are laid over the table to parse the important values
94 * out of it.
95 */
96
97 /*
98 * structure describing one IOMMU in the ACPI table. Typically followed by one
99 * or more ivhd_entrys.
100 */
101 struct ivhd_header {
102 u8 type;
103 u8 flags;
104 u16 length;
105 u16 devid;
106 u16 cap_ptr;
107 u64 mmio_phys;
108 u16 pci_seg;
109 u16 info;
110 u32 efr_attr;
111
112 /* Following only valid on IVHD type 11h and 40h */
113 u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */
114 u64 efr_reg2;
115 } __attribute__((packed));
116
117 /*
118 * A device entry describing which devices a specific IOMMU translates and
119 * which requestor ids they use.
120 */
121 struct ivhd_entry {
122 u8 type;
123 u16 devid;
124 u8 flags;
125 struct_group(ext_hid,
126 u32 ext;
127 u32 hidh;
128 );
129 u64 cid;
130 u8 uidf;
131 u8 uidl;
132 u8 uid;
133 } __attribute__((packed));
134
135 /*
136 * An AMD IOMMU memory definition structure. It defines things like exclusion
137 * ranges for devices and regions that should be unity mapped.
138 */
139 struct ivmd_header {
140 u8 type;
141 u8 flags;
142 u16 length;
143 u16 devid;
144 u16 aux;
145 u16 pci_seg;
146 u8 resv[6];
147 u64 range_start;
148 u64 range_length;
149 } __attribute__((packed));
150
151 bool amd_iommu_dump;
152 bool amd_iommu_irq_remap __read_mostly;
153
154 enum protection_domain_mode amd_iommu_pgtable = PD_MODE_V1;
155 /* Host page table level */
156 u8 amd_iommu_hpt_level;
157 /* Guest page table level */
158 int amd_iommu_gpt_level = PAGE_MODE_4_LEVEL;
159
160 int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
161 static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
162
163 static bool amd_iommu_detected;
164 static bool amd_iommu_disabled __initdata;
165 static bool amd_iommu_force_enable __initdata;
166 static bool amd_iommu_irtcachedis;
167 static int amd_iommu_target_ivhd_type;
168
169 /* Global EFR and EFR2 registers */
170 u64 amd_iommu_efr;
171 u64 amd_iommu_efr2;
172
173 /* Host (v1) page table is not supported*/
174 bool amd_iommu_hatdis;
175
176 /* SNP is enabled on the system? */
177 bool amd_iommu_snp_en;
178 EXPORT_SYMBOL(amd_iommu_snp_en);
179
180 LIST_HEAD(amd_iommu_pci_seg_list); /* list of all PCI segments */
181 LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the system */
182 LIST_HEAD(amd_ivhd_dev_flags_list); /* list of all IVHD device entry settings */
183
184 /* Number of IOMMUs present in the system */
185 static int amd_iommus_present;
186
187 /* IOMMUs have a non-present cache? */
188 bool amd_iommu_np_cache __read_mostly;
189 bool amd_iommu_iotlb_sup __read_mostly = true;
190
191 static bool amd_iommu_pc_present __read_mostly;
192 bool amdr_ivrs_remap_support __read_mostly;
193
194 bool amd_iommu_force_isolation __read_mostly;
195
196 unsigned long amd_iommu_pgsize_bitmap __ro_after_init = AMD_IOMMU_PGSIZES;
197
198 enum iommu_init_state {
199 IOMMU_START_STATE,
200 IOMMU_IVRS_DETECTED,
201 IOMMU_ACPI_FINISHED,
202 IOMMU_ENABLED,
203 IOMMU_PCI_INIT,
204 IOMMU_INTERRUPTS_EN,
205 IOMMU_INITIALIZED,
206 IOMMU_NOT_FOUND,
207 IOMMU_INIT_ERROR,
208 IOMMU_CMDLINE_DISABLED,
209 };
210
211 /* Early ioapic and hpet maps from kernel command line */
212 #define EARLY_MAP_SIZE 4
213 static struct devid_map __initdata early_ioapic_map[EARLY_MAP_SIZE];
214 static struct devid_map __initdata early_hpet_map[EARLY_MAP_SIZE];
215 static struct acpihid_map_entry __initdata early_acpihid_map[EARLY_MAP_SIZE];
216
217 static int __initdata early_ioapic_map_size;
218 static int __initdata early_hpet_map_size;
219 static int __initdata early_acpihid_map_size;
220
221 static bool __initdata cmdline_maps;
222
223 static enum iommu_init_state init_state = IOMMU_START_STATE;
224
225 static int amd_iommu_enable_interrupts(void);
226 static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg);
227
228 static bool amd_iommu_pre_enabled = true;
229
230 static u32 amd_iommu_ivinfo __initdata;
231
translation_pre_enabled(struct amd_iommu * iommu)232 bool translation_pre_enabled(struct amd_iommu *iommu)
233 {
234 return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
235 }
236
clear_translation_pre_enabled(struct amd_iommu * iommu)237 static void clear_translation_pre_enabled(struct amd_iommu *iommu)
238 {
239 iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
240 }
241
init_translation_status(struct amd_iommu * iommu)242 static void init_translation_status(struct amd_iommu *iommu)
243 {
244 u64 ctrl;
245
246 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
247 if (ctrl & (1<<CONTROL_IOMMU_EN))
248 iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
249 }
250
amd_iommu_get_num_iommus(void)251 int amd_iommu_get_num_iommus(void)
252 {
253 return amd_iommus_present;
254 }
255
amd_iommu_ht_range_ignore(void)256 bool amd_iommu_ht_range_ignore(void)
257 {
258 return check_feature2(FEATURE_HT_RANGE_IGNORE);
259 }
260
261 /*
262 * Iterate through all the IOMMUs to get common EFR
263 * masks among all IOMMUs and warn if found inconsistency.
264 */
get_global_efr(void)265 static __init void get_global_efr(void)
266 {
267 struct amd_iommu *iommu;
268
269 for_each_iommu(iommu) {
270 u64 tmp = iommu->features;
271 u64 tmp2 = iommu->features2;
272
273 if (list_is_first(&iommu->list, &amd_iommu_list)) {
274 amd_iommu_efr = tmp;
275 amd_iommu_efr2 = tmp2;
276 continue;
277 }
278
279 if (amd_iommu_efr == tmp &&
280 amd_iommu_efr2 == tmp2)
281 continue;
282
283 pr_err(FW_BUG
284 "Found inconsistent EFR/EFR2 %#llx,%#llx (global %#llx,%#llx) on iommu%d (%04x:%02x:%02x.%01x).\n",
285 tmp, tmp2, amd_iommu_efr, amd_iommu_efr2,
286 iommu->index, iommu->pci_seg->id,
287 PCI_BUS_NUM(iommu->devid), PCI_SLOT(iommu->devid),
288 PCI_FUNC(iommu->devid));
289
290 amd_iommu_efr &= tmp;
291 amd_iommu_efr2 &= tmp2;
292 }
293
294 pr_info("Using global IVHD EFR:%#llx, EFR2:%#llx\n", amd_iommu_efr, amd_iommu_efr2);
295 }
296
297 /*
298 * For IVHD type 0x11/0x40, EFR is also available via IVHD.
299 * Default to IVHD EFR since it is available sooner
300 * (i.e. before PCI init).
301 */
early_iommu_features_init(struct amd_iommu * iommu,struct ivhd_header * h)302 static void __init early_iommu_features_init(struct amd_iommu *iommu,
303 struct ivhd_header *h)
304 {
305 if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP) {
306 iommu->features = h->efr_reg;
307 iommu->features2 = h->efr_reg2;
308 }
309 if (amd_iommu_ivinfo & IOMMU_IVINFO_DMA_REMAP)
310 amdr_ivrs_remap_support = true;
311 }
312
313 /* Access to l1 and l2 indexed register spaces */
314
iommu_read_l1(struct amd_iommu * iommu,u16 l1,u8 address)315 static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
316 {
317 u32 val;
318
319 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
320 pci_read_config_dword(iommu->dev, 0xfc, &val);
321 return val;
322 }
323
iommu_write_l1(struct amd_iommu * iommu,u16 l1,u8 address,u32 val)324 static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val)
325 {
326 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31));
327 pci_write_config_dword(iommu->dev, 0xfc, val);
328 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
329 }
330
iommu_read_l2(struct amd_iommu * iommu,u8 address)331 static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address)
332 {
333 u32 val;
334
335 pci_write_config_dword(iommu->dev, 0xf0, address);
336 pci_read_config_dword(iommu->dev, 0xf4, &val);
337 return val;
338 }
339
iommu_write_l2(struct amd_iommu * iommu,u8 address,u32 val)340 static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val)
341 {
342 pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8));
343 pci_write_config_dword(iommu->dev, 0xf4, val);
344 }
345
346 /****************************************************************************
347 *
348 * AMD IOMMU MMIO register space handling functions
349 *
350 * These functions are used to program the IOMMU device registers in
351 * MMIO space required for that driver.
352 *
353 ****************************************************************************/
354
355 /*
356 * This function set the exclusion range in the IOMMU. DMA accesses to the
357 * exclusion range are passed through untranslated
358 */
iommu_set_exclusion_range(struct amd_iommu * iommu)359 static void iommu_set_exclusion_range(struct amd_iommu *iommu)
360 {
361 u64 start = iommu->exclusion_start & PAGE_MASK;
362 u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK;
363 u64 entry;
364
365 if (!iommu->exclusion_start)
366 return;
367
368 entry = start | MMIO_EXCL_ENABLE_MASK;
369 memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
370 &entry, sizeof(entry));
371
372 entry = limit;
373 memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
374 &entry, sizeof(entry));
375 }
376
iommu_set_cwwb_range(struct amd_iommu * iommu)377 static void iommu_set_cwwb_range(struct amd_iommu *iommu)
378 {
379 u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem);
380 u64 entry = start & PM_ADDR_MASK;
381
382 if (!check_feature(FEATURE_SNP))
383 return;
384
385 /* Note:
386 * Re-purpose Exclusion base/limit registers for Completion wait
387 * write-back base/limit.
388 */
389 memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
390 &entry, sizeof(entry));
391
392 /* Note:
393 * Default to 4 Kbytes, which can be specified by setting base
394 * address equal to the limit address.
395 */
396 memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
397 &entry, sizeof(entry));
398 }
399
400 /* Programs the physical address of the device table into the IOMMU hardware */
iommu_set_device_table(struct amd_iommu * iommu)401 static void iommu_set_device_table(struct amd_iommu *iommu)
402 {
403 u64 entry;
404 u32 dev_table_size = iommu->pci_seg->dev_table_size;
405 void *dev_table = (void *)get_dev_table(iommu);
406
407 BUG_ON(iommu->mmio_base == NULL);
408
409 if (is_kdump_kernel())
410 return;
411
412 entry = iommu_virt_to_phys(dev_table);
413 entry |= (dev_table_size >> 12) - 1;
414 memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
415 &entry, sizeof(entry));
416 }
417
iommu_feature_set(struct amd_iommu * iommu,u64 val,u64 mask,u8 shift)418 static void iommu_feature_set(struct amd_iommu *iommu, u64 val, u64 mask, u8 shift)
419 {
420 u64 ctrl;
421
422 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
423 mask <<= shift;
424 ctrl &= ~mask;
425 ctrl |= (val << shift) & mask;
426 writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
427 }
428
429 /* Generic functions to enable/disable certain features of the IOMMU. */
iommu_feature_enable(struct amd_iommu * iommu,u8 bit)430 void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
431 {
432 iommu_feature_set(iommu, 1ULL, 1ULL, bit);
433 }
434
iommu_feature_disable(struct amd_iommu * iommu,u8 bit)435 static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
436 {
437 iommu_feature_set(iommu, 0ULL, 1ULL, bit);
438 }
439
440 /* Function to enable the hardware */
iommu_enable(struct amd_iommu * iommu)441 static void iommu_enable(struct amd_iommu *iommu)
442 {
443 iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
444 }
445
iommu_disable(struct amd_iommu * iommu)446 static void iommu_disable(struct amd_iommu *iommu)
447 {
448 if (!iommu->mmio_base)
449 return;
450
451 /* Disable command buffer */
452 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
453
454 /* Disable event logging and event interrupts */
455 iommu_feature_disable(iommu, CONTROL_EVT_INT_EN);
456 iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
457
458 /* Disable IOMMU GA_LOG */
459 iommu_feature_disable(iommu, CONTROL_GALOG_EN);
460 iommu_feature_disable(iommu, CONTROL_GAINT_EN);
461
462 /* Disable IOMMU PPR logging */
463 iommu_feature_disable(iommu, CONTROL_PPRLOG_EN);
464 iommu_feature_disable(iommu, CONTROL_PPRINT_EN);
465
466 /* Disable IOMMU hardware itself */
467 iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
468
469 /* Clear IRTE cache disabling bit */
470 iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS);
471 }
472
473 /*
474 * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
475 * the system has one.
476 */
iommu_map_mmio_space(u64 address,u64 end)477 static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end)
478 {
479 if (!request_mem_region(address, end, "amd_iommu")) {
480 pr_err("Can not reserve memory region %llx-%llx for mmio\n",
481 address, end);
482 pr_err("This is a BIOS bug. Please contact your hardware vendor\n");
483 return NULL;
484 }
485
486 return (u8 __iomem *)ioremap(address, end);
487 }
488
iommu_unmap_mmio_space(struct amd_iommu * iommu)489 static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
490 {
491 if (iommu->mmio_base)
492 iounmap(iommu->mmio_base);
493 release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end);
494 }
495
get_ivhd_header_size(struct ivhd_header * h)496 static inline u32 get_ivhd_header_size(struct ivhd_header *h)
497 {
498 u32 size = 0;
499
500 switch (h->type) {
501 case 0x10:
502 size = 24;
503 break;
504 case 0x11:
505 case 0x40:
506 size = 40;
507 break;
508 }
509 return size;
510 }
511
512 /****************************************************************************
513 *
514 * The functions below belong to the first pass of AMD IOMMU ACPI table
515 * parsing. In this pass we try to find out the highest device id this
516 * code has to handle. Upon this information the size of the shared data
517 * structures is determined later.
518 *
519 ****************************************************************************/
520
521 /*
522 * This function calculates the length of a given IVHD entry
523 */
ivhd_entry_length(u8 * ivhd)524 static inline int ivhd_entry_length(u8 *ivhd)
525 {
526 u32 type = ((struct ivhd_entry *)ivhd)->type;
527
528 if (type < 0x80) {
529 return 0x04 << (*ivhd >> 6);
530 } else if (type == IVHD_DEV_ACPI_HID) {
531 /* For ACPI_HID, offset 21 is uid len */
532 return *((u8 *)ivhd + 21) + 22;
533 }
534 return 0;
535 }
536
537 /*
538 * After reading the highest device id from the IOMMU PCI capability header
539 * this function looks if there is a higher device id defined in the ACPI table
540 */
find_last_devid_from_ivhd(struct ivhd_header * h)541 static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
542 {
543 u8 *p = (void *)h, *end = (void *)h;
544 struct ivhd_entry *dev;
545 int last_devid = -EINVAL;
546
547 u32 ivhd_size = get_ivhd_header_size(h);
548
549 if (!ivhd_size) {
550 pr_err("Unsupported IVHD type %#x\n", h->type);
551 return -EINVAL;
552 }
553
554 p += ivhd_size;
555 end += h->length;
556
557 while (p < end) {
558 dev = (struct ivhd_entry *)p;
559 switch (dev->type) {
560 case IVHD_DEV_ALL:
561 /* Use maximum BDF value for DEV_ALL */
562 return 0xffff;
563 case IVHD_DEV_SELECT:
564 case IVHD_DEV_RANGE_END:
565 case IVHD_DEV_ALIAS:
566 case IVHD_DEV_EXT_SELECT:
567 /* all the above subfield types refer to device ids */
568 if (dev->devid > last_devid)
569 last_devid = dev->devid;
570 break;
571 default:
572 break;
573 }
574 p += ivhd_entry_length(p);
575 }
576
577 WARN_ON(p != end);
578
579 return last_devid;
580 }
581
check_ivrs_checksum(struct acpi_table_header * table)582 static int __init check_ivrs_checksum(struct acpi_table_header *table)
583 {
584 int i;
585 u8 checksum = 0, *p = (u8 *)table;
586
587 for (i = 0; i < table->length; ++i)
588 checksum += p[i];
589 if (checksum != 0) {
590 /* ACPI table corrupt */
591 pr_err(FW_BUG "IVRS invalid checksum\n");
592 return -ENODEV;
593 }
594
595 return 0;
596 }
597
598 /*
599 * Iterate over all IVHD entries in the ACPI table and find the highest device
600 * id which we need to handle. This is the first of three functions which parse
601 * the ACPI table. So we check the checksum here.
602 */
find_last_devid_acpi(struct acpi_table_header * table,u16 pci_seg)603 static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_seg)
604 {
605 u8 *p = (u8 *)table, *end = (u8 *)table;
606 struct ivhd_header *h;
607 int last_devid, last_bdf = 0;
608
609 p += IVRS_HEADER_LENGTH;
610
611 end += table->length;
612 while (p < end) {
613 h = (struct ivhd_header *)p;
614 if (h->pci_seg == pci_seg &&
615 h->type == amd_iommu_target_ivhd_type) {
616 last_devid = find_last_devid_from_ivhd(h);
617
618 if (last_devid < 0)
619 return -EINVAL;
620 if (last_devid > last_bdf)
621 last_bdf = last_devid;
622 }
623 p += h->length;
624 }
625 WARN_ON(p != end);
626
627 return last_bdf;
628 }
629
630 /****************************************************************************
631 *
632 * The following functions belong to the code path which parses the ACPI table
633 * the second time. In this ACPI parsing iteration we allocate IOMMU specific
634 * data structures, initialize the per PCI segment device/alias/rlookup table
635 * and also basically initialize the hardware.
636 *
637 ****************************************************************************/
638
639 /* Allocate per PCI segment device table */
alloc_dev_table(struct amd_iommu_pci_seg * pci_seg)640 static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg)
641 {
642 pci_seg->dev_table = iommu_alloc_pages_sz(GFP_KERNEL | GFP_DMA32,
643 pci_seg->dev_table_size);
644 if (!pci_seg->dev_table)
645 return -ENOMEM;
646
647 return 0;
648 }
649
free_dev_table(struct amd_iommu_pci_seg * pci_seg)650 static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg)
651 {
652 if (is_kdump_kernel())
653 memunmap((void *)pci_seg->dev_table);
654 else
655 iommu_free_pages(pci_seg->dev_table);
656 pci_seg->dev_table = NULL;
657 }
658
659 /* Allocate per PCI segment IOMMU rlookup table. */
alloc_rlookup_table(struct amd_iommu_pci_seg * pci_seg)660 static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
661 {
662 pci_seg->rlookup_table = kvcalloc(pci_seg->last_bdf + 1,
663 sizeof(*pci_seg->rlookup_table),
664 GFP_KERNEL);
665 if (pci_seg->rlookup_table == NULL)
666 return -ENOMEM;
667
668 return 0;
669 }
670
free_rlookup_table(struct amd_iommu_pci_seg * pci_seg)671 static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
672 {
673 kvfree(pci_seg->rlookup_table);
674 pci_seg->rlookup_table = NULL;
675 }
676
alloc_irq_lookup_table(struct amd_iommu_pci_seg * pci_seg)677 static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
678 {
679 pci_seg->irq_lookup_table = kvcalloc(pci_seg->last_bdf + 1,
680 sizeof(*pci_seg->irq_lookup_table),
681 GFP_KERNEL);
682 if (pci_seg->irq_lookup_table == NULL)
683 return -ENOMEM;
684
685 return 0;
686 }
687
free_irq_lookup_table(struct amd_iommu_pci_seg * pci_seg)688 static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
689 {
690 kvfree(pci_seg->irq_lookup_table);
691 pci_seg->irq_lookup_table = NULL;
692 }
693
alloc_alias_table(struct amd_iommu_pci_seg * pci_seg)694 static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg)
695 {
696 int i;
697
698 pci_seg->alias_table = kvmalloc_array(pci_seg->last_bdf + 1,
699 sizeof(*pci_seg->alias_table),
700 GFP_KERNEL);
701 if (!pci_seg->alias_table)
702 return -ENOMEM;
703
704 /*
705 * let all alias entries point to itself
706 */
707 for (i = 0; i <= pci_seg->last_bdf; ++i)
708 pci_seg->alias_table[i] = i;
709
710 return 0;
711 }
712
free_alias_table(struct amd_iommu_pci_seg * pci_seg)713 static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg)
714 {
715 kvfree(pci_seg->alias_table);
716 pci_seg->alias_table = NULL;
717 }
718
iommu_memremap(unsigned long paddr,size_t size)719 static inline void *iommu_memremap(unsigned long paddr, size_t size)
720 {
721 phys_addr_t phys;
722
723 if (!paddr)
724 return NULL;
725
726 /*
727 * Obtain true physical address in kdump kernel when SME is enabled.
728 * Currently, previous kernel with SME enabled and kdump kernel
729 * with SME support disabled is not supported.
730 */
731 phys = __sme_clr(paddr);
732
733 if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
734 return (__force void *)ioremap_encrypted(phys, size);
735 else
736 return memremap(phys, size, MEMREMAP_WB);
737 }
738
739 /*
740 * Allocates the command buffer. This buffer is per AMD IOMMU. We can
741 * write commands to that buffer later and the IOMMU will execute them
742 * asynchronously
743 */
alloc_command_buffer(struct amd_iommu * iommu)744 static int __init alloc_command_buffer(struct amd_iommu *iommu)
745 {
746 iommu->cmd_buf = iommu_alloc_pages_sz(GFP_KERNEL, CMD_BUFFER_SIZE);
747
748 return iommu->cmd_buf ? 0 : -ENOMEM;
749 }
750
751 /*
752 * Interrupt handler has processed all pending events and adjusted head
753 * and tail pointer. Reset overflow mask and restart logging again.
754 */
amd_iommu_restart_log(struct amd_iommu * iommu,const char * evt_type,u8 cntrl_intr,u8 cntrl_log,u32 status_run_mask,u32 status_overflow_mask)755 void amd_iommu_restart_log(struct amd_iommu *iommu, const char *evt_type,
756 u8 cntrl_intr, u8 cntrl_log,
757 u32 status_run_mask, u32 status_overflow_mask)
758 {
759 u32 status;
760
761 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
762 if (status & status_run_mask)
763 return;
764
765 pr_info_ratelimited("IOMMU %s log restarting\n", evt_type);
766
767 iommu_feature_disable(iommu, cntrl_log);
768 iommu_feature_disable(iommu, cntrl_intr);
769
770 writel(status_overflow_mask, iommu->mmio_base + MMIO_STATUS_OFFSET);
771
772 iommu_feature_enable(iommu, cntrl_intr);
773 iommu_feature_enable(iommu, cntrl_log);
774 }
775
776 /*
777 * This function restarts event logging in case the IOMMU experienced
778 * an event log buffer overflow.
779 */
amd_iommu_restart_event_logging(struct amd_iommu * iommu)780 void amd_iommu_restart_event_logging(struct amd_iommu *iommu)
781 {
782 amd_iommu_restart_log(iommu, "Event", CONTROL_EVT_INT_EN,
783 CONTROL_EVT_LOG_EN, MMIO_STATUS_EVT_RUN_MASK,
784 MMIO_STATUS_EVT_OVERFLOW_MASK);
785 }
786
787 /*
788 * This function restarts event logging in case the IOMMU experienced
789 * GA log overflow.
790 */
amd_iommu_restart_ga_log(struct amd_iommu * iommu)791 void amd_iommu_restart_ga_log(struct amd_iommu *iommu)
792 {
793 amd_iommu_restart_log(iommu, "GA", CONTROL_GAINT_EN,
794 CONTROL_GALOG_EN, MMIO_STATUS_GALOG_RUN_MASK,
795 MMIO_STATUS_GALOG_OVERFLOW_MASK);
796 }
797
798 /*
799 * This function resets the command buffer if the IOMMU stopped fetching
800 * commands from it.
801 */
amd_iommu_reset_cmd_buffer(struct amd_iommu * iommu)802 static void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
803 {
804 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
805
806 writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
807 writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
808 iommu->cmd_buf_head = 0;
809 iommu->cmd_buf_tail = 0;
810
811 iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
812 }
813
814 /*
815 * This function writes the command buffer address to the hardware and
816 * enables it.
817 */
iommu_enable_command_buffer(struct amd_iommu * iommu)818 static void iommu_enable_command_buffer(struct amd_iommu *iommu)
819 {
820 u64 entry;
821
822 BUG_ON(iommu->cmd_buf == NULL);
823
824 if (!is_kdump_kernel()) {
825 /*
826 * Command buffer is re-used for kdump kernel and setting
827 * of MMIO register is not required.
828 */
829 entry = iommu_virt_to_phys(iommu->cmd_buf);
830 entry |= MMIO_CMD_SIZE_512;
831 memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
832 &entry, sizeof(entry));
833 }
834
835 amd_iommu_reset_cmd_buffer(iommu);
836 }
837
838 /*
839 * This function disables the command buffer
840 */
iommu_disable_command_buffer(struct amd_iommu * iommu)841 static void iommu_disable_command_buffer(struct amd_iommu *iommu)
842 {
843 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
844 }
845
free_command_buffer(struct amd_iommu * iommu)846 static void __init free_command_buffer(struct amd_iommu *iommu)
847 {
848 iommu_free_pages(iommu->cmd_buf);
849 }
850
iommu_alloc_4k_pages(struct amd_iommu * iommu,gfp_t gfp,size_t size)851 void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, gfp_t gfp,
852 size_t size)
853 {
854 void *buf;
855
856 size = PAGE_ALIGN(size);
857 buf = iommu_alloc_pages_sz(gfp, size);
858 if (!buf)
859 return NULL;
860 if (check_feature(FEATURE_SNP) &&
861 set_memory_4k((unsigned long)buf, size / PAGE_SIZE)) {
862 iommu_free_pages(buf);
863 return NULL;
864 }
865
866 return buf;
867 }
868
869 /* allocates the memory where the IOMMU will log its events to */
alloc_event_buffer(struct amd_iommu * iommu)870 static int __init alloc_event_buffer(struct amd_iommu *iommu)
871 {
872 iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL,
873 EVT_BUFFER_SIZE);
874
875 return iommu->evt_buf ? 0 : -ENOMEM;
876 }
877
iommu_enable_event_buffer(struct amd_iommu * iommu)878 static void iommu_enable_event_buffer(struct amd_iommu *iommu)
879 {
880 u64 entry;
881
882 BUG_ON(iommu->evt_buf == NULL);
883
884 if (!is_kdump_kernel()) {
885 /*
886 * Event buffer is re-used for kdump kernel and setting
887 * of MMIO register is not required.
888 */
889 entry = iommu_virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
890 memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
891 &entry, sizeof(entry));
892 }
893
894 /* set head and tail to zero manually */
895 writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
896 writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
897
898 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
899 }
900
901 /*
902 * This function disables the event log buffer
903 */
iommu_disable_event_buffer(struct amd_iommu * iommu)904 static void iommu_disable_event_buffer(struct amd_iommu *iommu)
905 {
906 iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
907 }
908
free_event_buffer(struct amd_iommu * iommu)909 static void __init free_event_buffer(struct amd_iommu *iommu)
910 {
911 iommu_free_pages(iommu->evt_buf);
912 }
913
free_ga_log(struct amd_iommu * iommu)914 static void free_ga_log(struct amd_iommu *iommu)
915 {
916 #ifdef CONFIG_IRQ_REMAP
917 iommu_free_pages(iommu->ga_log);
918 iommu_free_pages(iommu->ga_log_tail);
919 #endif
920 }
921
922 #ifdef CONFIG_IRQ_REMAP
iommu_ga_log_enable(struct amd_iommu * iommu)923 static int iommu_ga_log_enable(struct amd_iommu *iommu)
924 {
925 u32 status, i;
926 u64 entry;
927
928 if (!iommu->ga_log)
929 return -EINVAL;
930
931 entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512;
932 memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET,
933 &entry, sizeof(entry));
934 entry = (iommu_virt_to_phys(iommu->ga_log_tail) &
935 (BIT_ULL(52)-1)) & ~7ULL;
936 memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET,
937 &entry, sizeof(entry));
938 writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
939 writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET);
940
941
942 iommu_feature_enable(iommu, CONTROL_GAINT_EN);
943 iommu_feature_enable(iommu, CONTROL_GALOG_EN);
944
945 for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) {
946 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
947 if (status & (MMIO_STATUS_GALOG_RUN_MASK))
948 break;
949 udelay(10);
950 }
951
952 if (WARN_ON(i >= MMIO_STATUS_TIMEOUT))
953 return -EINVAL;
954
955 return 0;
956 }
957
iommu_init_ga_log(struct amd_iommu * iommu)958 static int iommu_init_ga_log(struct amd_iommu *iommu)
959 {
960 if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
961 return 0;
962
963 iommu->ga_log = iommu_alloc_pages_sz(GFP_KERNEL, GA_LOG_SIZE);
964 if (!iommu->ga_log)
965 goto err_out;
966
967 iommu->ga_log_tail = iommu_alloc_pages_sz(GFP_KERNEL, 8);
968 if (!iommu->ga_log_tail)
969 goto err_out;
970
971 return 0;
972 err_out:
973 free_ga_log(iommu);
974 return -EINVAL;
975 }
976 #endif /* CONFIG_IRQ_REMAP */
977
alloc_cwwb_sem(struct amd_iommu * iommu)978 static int __init alloc_cwwb_sem(struct amd_iommu *iommu)
979 {
980 iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL, 1);
981 if (!iommu->cmd_sem)
982 return -ENOMEM;
983 iommu->cmd_sem_paddr = iommu_virt_to_phys((void *)iommu->cmd_sem);
984 return 0;
985 }
986
remap_event_buffer(struct amd_iommu * iommu)987 static int __init remap_event_buffer(struct amd_iommu *iommu)
988 {
989 u64 paddr;
990
991 pr_info_once("Re-using event buffer from the previous kernel\n");
992 paddr = readq(iommu->mmio_base + MMIO_EVT_BUF_OFFSET) & PM_ADDR_MASK;
993 iommu->evt_buf = iommu_memremap(paddr, EVT_BUFFER_SIZE);
994
995 return iommu->evt_buf ? 0 : -ENOMEM;
996 }
997
remap_command_buffer(struct amd_iommu * iommu)998 static int __init remap_command_buffer(struct amd_iommu *iommu)
999 {
1000 u64 paddr;
1001
1002 pr_info_once("Re-using command buffer from the previous kernel\n");
1003 paddr = readq(iommu->mmio_base + MMIO_CMD_BUF_OFFSET) & PM_ADDR_MASK;
1004 iommu->cmd_buf = iommu_memremap(paddr, CMD_BUFFER_SIZE);
1005
1006 return iommu->cmd_buf ? 0 : -ENOMEM;
1007 }
1008
remap_or_alloc_cwwb_sem(struct amd_iommu * iommu)1009 static int __init remap_or_alloc_cwwb_sem(struct amd_iommu *iommu)
1010 {
1011 u64 paddr;
1012
1013 if (check_feature(FEATURE_SNP)) {
1014 /*
1015 * When SNP is enabled, the exclusion base register is used for the
1016 * completion wait buffer (CWB) address. Read and re-use it.
1017 */
1018 pr_info_once("Re-using CWB buffers from the previous kernel\n");
1019 paddr = readq(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET) & PM_ADDR_MASK;
1020 iommu->cmd_sem = iommu_memremap(paddr, PAGE_SIZE);
1021 if (!iommu->cmd_sem)
1022 return -ENOMEM;
1023 iommu->cmd_sem_paddr = paddr;
1024 } else {
1025 return alloc_cwwb_sem(iommu);
1026 }
1027
1028 return 0;
1029 }
1030
alloc_iommu_buffers(struct amd_iommu * iommu)1031 static int __init alloc_iommu_buffers(struct amd_iommu *iommu)
1032 {
1033 int ret;
1034
1035 /*
1036 * Reuse/Remap the previous kernel's allocated completion wait
1037 * command and event buffers for kdump boot.
1038 */
1039 if (is_kdump_kernel()) {
1040 ret = remap_or_alloc_cwwb_sem(iommu);
1041 if (ret)
1042 return ret;
1043
1044 ret = remap_command_buffer(iommu);
1045 if (ret)
1046 return ret;
1047
1048 ret = remap_event_buffer(iommu);
1049 if (ret)
1050 return ret;
1051 } else {
1052 ret = alloc_cwwb_sem(iommu);
1053 if (ret)
1054 return ret;
1055
1056 ret = alloc_command_buffer(iommu);
1057 if (ret)
1058 return ret;
1059
1060 ret = alloc_event_buffer(iommu);
1061 if (ret)
1062 return ret;
1063 }
1064
1065 return 0;
1066 }
1067
free_cwwb_sem(struct amd_iommu * iommu)1068 static void __init free_cwwb_sem(struct amd_iommu *iommu)
1069 {
1070 if (iommu->cmd_sem)
1071 iommu_free_pages((void *)iommu->cmd_sem);
1072 }
unmap_cwwb_sem(struct amd_iommu * iommu)1073 static void __init unmap_cwwb_sem(struct amd_iommu *iommu)
1074 {
1075 if (iommu->cmd_sem) {
1076 if (check_feature(FEATURE_SNP))
1077 memunmap((void *)iommu->cmd_sem);
1078 else
1079 iommu_free_pages((void *)iommu->cmd_sem);
1080 }
1081 }
1082
unmap_command_buffer(struct amd_iommu * iommu)1083 static void __init unmap_command_buffer(struct amd_iommu *iommu)
1084 {
1085 memunmap((void *)iommu->cmd_buf);
1086 }
1087
unmap_event_buffer(struct amd_iommu * iommu)1088 static void __init unmap_event_buffer(struct amd_iommu *iommu)
1089 {
1090 memunmap(iommu->evt_buf);
1091 }
1092
free_iommu_buffers(struct amd_iommu * iommu)1093 static void __init free_iommu_buffers(struct amd_iommu *iommu)
1094 {
1095 if (is_kdump_kernel()) {
1096 unmap_cwwb_sem(iommu);
1097 unmap_command_buffer(iommu);
1098 unmap_event_buffer(iommu);
1099 } else {
1100 free_cwwb_sem(iommu);
1101 free_command_buffer(iommu);
1102 free_event_buffer(iommu);
1103 }
1104 }
1105
iommu_enable_xt(struct amd_iommu * iommu)1106 static void iommu_enable_xt(struct amd_iommu *iommu)
1107 {
1108 #ifdef CONFIG_IRQ_REMAP
1109 /*
1110 * XT mode (32-bit APIC destination ID) requires
1111 * GA mode (128-bit IRTE support) as a prerequisite.
1112 */
1113 if (AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir) &&
1114 amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
1115 iommu_feature_enable(iommu, CONTROL_XT_EN);
1116 #endif /* CONFIG_IRQ_REMAP */
1117 }
1118
iommu_enable_gt(struct amd_iommu * iommu)1119 static void iommu_enable_gt(struct amd_iommu *iommu)
1120 {
1121 if (!check_feature(FEATURE_GT))
1122 return;
1123
1124 iommu_feature_enable(iommu, CONTROL_GT_EN);
1125 }
1126
1127 /* sets a specific bit in the device table entry. */
set_dte_bit(struct dev_table_entry * dte,u8 bit)1128 static void set_dte_bit(struct dev_table_entry *dte, u8 bit)
1129 {
1130 int i = (bit >> 6) & 0x03;
1131 int _bit = bit & 0x3f;
1132
1133 dte->data[i] |= (1UL << _bit);
1134 }
1135
__reuse_device_table(struct amd_iommu * iommu)1136 static bool __reuse_device_table(struct amd_iommu *iommu)
1137 {
1138 struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
1139 struct dev_table_entry *old_dev_tbl_entry;
1140 u32 lo, hi, old_devtb_size, devid;
1141 phys_addr_t old_devtb_phys;
1142 u16 dom_id;
1143 bool dte_v;
1144 u64 entry;
1145
1146 /* Each IOMMU use separate device table with the same size */
1147 lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
1148 hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
1149 entry = (((u64) hi) << 32) + lo;
1150
1151 old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12;
1152 if (old_devtb_size != pci_seg->dev_table_size) {
1153 pr_err("The device table size of IOMMU:%d is not expected!\n",
1154 iommu->index);
1155 return false;
1156 }
1157
1158 /*
1159 * When SME is enabled in the first kernel, the entry includes the
1160 * memory encryption mask(sme_me_mask), we must remove the memory
1161 * encryption mask to obtain the true physical address in kdump kernel.
1162 */
1163 old_devtb_phys = __sme_clr(entry) & PAGE_MASK;
1164
1165 if (old_devtb_phys >= 0x100000000ULL) {
1166 pr_err("The address of old device table is above 4G, not trustworthy!\n");
1167 return false;
1168 }
1169
1170 /*
1171 * Re-use the previous kernel's device table for kdump.
1172 */
1173 pci_seg->old_dev_tbl_cpy = iommu_memremap(old_devtb_phys, pci_seg->dev_table_size);
1174 if (pci_seg->old_dev_tbl_cpy == NULL) {
1175 pr_err("Failed to remap memory for reusing old device table!\n");
1176 return false;
1177 }
1178
1179 for (devid = 0; devid <= pci_seg->last_bdf; devid++) {
1180 old_dev_tbl_entry = &pci_seg->old_dev_tbl_cpy[devid];
1181 dte_v = FIELD_GET(DTE_FLAG_V, old_dev_tbl_entry->data[0]);
1182 dom_id = FIELD_GET(DEV_DOMID_MASK, old_dev_tbl_entry->data[1]);
1183
1184 if (!dte_v || !dom_id)
1185 continue;
1186 /*
1187 * ID reservation can fail with -ENOSPC when there
1188 * are multiple devices present in the same domain,
1189 * hence check only for -ENOMEM.
1190 */
1191 if (amd_iommu_pdom_id_reserve(dom_id, GFP_KERNEL) == -ENOMEM)
1192 return false;
1193 }
1194
1195 return true;
1196 }
1197
reuse_device_table(void)1198 static bool reuse_device_table(void)
1199 {
1200 struct amd_iommu *iommu;
1201 struct amd_iommu_pci_seg *pci_seg;
1202
1203 if (!amd_iommu_pre_enabled)
1204 return false;
1205
1206 pr_warn("Translation is already enabled - trying to reuse translation structures\n");
1207
1208 /*
1209 * All IOMMUs within PCI segment shares common device table.
1210 * Hence reuse device table only once per PCI segment.
1211 */
1212 for_each_pci_segment(pci_seg) {
1213 for_each_iommu(iommu) {
1214 if (pci_seg->id != iommu->pci_seg->id)
1215 continue;
1216 if (!__reuse_device_table(iommu))
1217 return false;
1218 break;
1219 }
1220 }
1221
1222 return true;
1223 }
1224
amd_iommu_get_ivhd_dte_flags(u16 segid,u16 devid)1225 struct dev_table_entry *amd_iommu_get_ivhd_dte_flags(u16 segid, u16 devid)
1226 {
1227 struct ivhd_dte_flags *e;
1228 unsigned int best_len = UINT_MAX;
1229 struct dev_table_entry *dte = NULL;
1230
1231 for_each_ivhd_dte_flags(e) {
1232 /*
1233 * Need to go through the whole list to find the smallest range,
1234 * which contains the devid.
1235 */
1236 if ((e->segid == segid) &&
1237 (e->devid_first <= devid) && (devid <= e->devid_last)) {
1238 unsigned int len = e->devid_last - e->devid_first;
1239
1240 if (len < best_len) {
1241 dte = &(e->dte);
1242 best_len = len;
1243 }
1244 }
1245 }
1246 return dte;
1247 }
1248
search_ivhd_dte_flags(u16 segid,u16 first,u16 last)1249 static bool search_ivhd_dte_flags(u16 segid, u16 first, u16 last)
1250 {
1251 struct ivhd_dte_flags *e;
1252
1253 for_each_ivhd_dte_flags(e) {
1254 if ((e->segid == segid) &&
1255 (e->devid_first == first) &&
1256 (e->devid_last == last))
1257 return true;
1258 }
1259 return false;
1260 }
1261
1262 /*
1263 * This function takes the device specific flags read from the ACPI
1264 * table and sets up the device table entry with that information
1265 */
1266 static void __init
set_dev_entry_from_acpi_range(struct amd_iommu * iommu,u16 first,u16 last,u32 flags,u32 ext_flags)1267 set_dev_entry_from_acpi_range(struct amd_iommu *iommu, u16 first, u16 last,
1268 u32 flags, u32 ext_flags)
1269 {
1270 int i;
1271 struct dev_table_entry dte = {};
1272
1273 /* Parse IVHD DTE setting flags and store information */
1274 if (flags) {
1275 struct ivhd_dte_flags *d;
1276
1277 if (search_ivhd_dte_flags(iommu->pci_seg->id, first, last))
1278 return;
1279
1280 d = kzalloc(sizeof(struct ivhd_dte_flags), GFP_KERNEL);
1281 if (!d)
1282 return;
1283
1284 pr_debug("%s: devid range %#x:%#x\n", __func__, first, last);
1285
1286 if (flags & ACPI_DEVFLAG_INITPASS)
1287 set_dte_bit(&dte, DEV_ENTRY_INIT_PASS);
1288 if (flags & ACPI_DEVFLAG_EXTINT)
1289 set_dte_bit(&dte, DEV_ENTRY_EINT_PASS);
1290 if (flags & ACPI_DEVFLAG_NMI)
1291 set_dte_bit(&dte, DEV_ENTRY_NMI_PASS);
1292 if (flags & ACPI_DEVFLAG_SYSMGT1)
1293 set_dte_bit(&dte, DEV_ENTRY_SYSMGT1);
1294 if (flags & ACPI_DEVFLAG_SYSMGT2)
1295 set_dte_bit(&dte, DEV_ENTRY_SYSMGT2);
1296 if (flags & ACPI_DEVFLAG_LINT0)
1297 set_dte_bit(&dte, DEV_ENTRY_LINT0_PASS);
1298 if (flags & ACPI_DEVFLAG_LINT1)
1299 set_dte_bit(&dte, DEV_ENTRY_LINT1_PASS);
1300
1301 /* Apply erratum 63, which needs info in initial_dte */
1302 if (FIELD_GET(DTE_DATA1_SYSMGT_MASK, dte.data[1]) == 0x1)
1303 dte.data[0] |= DTE_FLAG_IW;
1304
1305 memcpy(&d->dte, &dte, sizeof(dte));
1306 d->segid = iommu->pci_seg->id;
1307 d->devid_first = first;
1308 d->devid_last = last;
1309 list_add_tail(&d->list, &amd_ivhd_dev_flags_list);
1310 }
1311
1312 for (i = first; i <= last; i++) {
1313 if (flags) {
1314 struct dev_table_entry *dev_table = get_dev_table(iommu);
1315
1316 memcpy(&dev_table[i], &dte, sizeof(dte));
1317 }
1318 amd_iommu_set_rlookup_table(iommu, i);
1319 }
1320 }
1321
set_dev_entry_from_acpi(struct amd_iommu * iommu,u16 devid,u32 flags,u32 ext_flags)1322 static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
1323 u16 devid, u32 flags, u32 ext_flags)
1324 {
1325 set_dev_entry_from_acpi_range(iommu, devid, devid, flags, ext_flags);
1326 }
1327
add_special_device(u8 type,u8 id,u32 * devid,bool cmd_line)1328 int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line)
1329 {
1330 struct devid_map *entry;
1331 struct list_head *list;
1332
1333 if (type == IVHD_SPECIAL_IOAPIC)
1334 list = &ioapic_map;
1335 else if (type == IVHD_SPECIAL_HPET)
1336 list = &hpet_map;
1337 else
1338 return -EINVAL;
1339
1340 list_for_each_entry(entry, list, list) {
1341 if (!(entry->id == id && entry->cmd_line))
1342 continue;
1343
1344 pr_info("Command-line override present for %s id %d - ignoring\n",
1345 type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id);
1346
1347 *devid = entry->devid;
1348
1349 return 0;
1350 }
1351
1352 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1353 if (!entry)
1354 return -ENOMEM;
1355
1356 entry->id = id;
1357 entry->devid = *devid;
1358 entry->cmd_line = cmd_line;
1359
1360 list_add_tail(&entry->list, list);
1361
1362 return 0;
1363 }
1364
add_acpi_hid_device(u8 * hid,u8 * uid,u32 * devid,bool cmd_line)1365 static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u32 *devid,
1366 bool cmd_line)
1367 {
1368 struct acpihid_map_entry *entry;
1369 struct list_head *list = &acpihid_map;
1370
1371 list_for_each_entry(entry, list, list) {
1372 if (strcmp(entry->hid, hid) ||
1373 (*uid && *entry->uid && strcmp(entry->uid, uid)) ||
1374 !entry->cmd_line)
1375 continue;
1376
1377 pr_info("Command-line override for hid:%s uid:%s\n",
1378 hid, uid);
1379 *devid = entry->devid;
1380 return 0;
1381 }
1382
1383 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1384 if (!entry)
1385 return -ENOMEM;
1386
1387 memcpy(entry->uid, uid, strlen(uid));
1388 memcpy(entry->hid, hid, strlen(hid));
1389 entry->devid = *devid;
1390 entry->cmd_line = cmd_line;
1391 entry->root_devid = (entry->devid & (~0x7));
1392
1393 pr_info("%s, add hid:%s, uid:%s, rdevid:%#x\n",
1394 entry->cmd_line ? "cmd" : "ivrs",
1395 entry->hid, entry->uid, entry->root_devid);
1396
1397 list_add_tail(&entry->list, list);
1398 return 0;
1399 }
1400
add_early_maps(void)1401 static int __init add_early_maps(void)
1402 {
1403 int i, ret;
1404
1405 for (i = 0; i < early_ioapic_map_size; ++i) {
1406 ret = add_special_device(IVHD_SPECIAL_IOAPIC,
1407 early_ioapic_map[i].id,
1408 &early_ioapic_map[i].devid,
1409 early_ioapic_map[i].cmd_line);
1410 if (ret)
1411 return ret;
1412 }
1413
1414 for (i = 0; i < early_hpet_map_size; ++i) {
1415 ret = add_special_device(IVHD_SPECIAL_HPET,
1416 early_hpet_map[i].id,
1417 &early_hpet_map[i].devid,
1418 early_hpet_map[i].cmd_line);
1419 if (ret)
1420 return ret;
1421 }
1422
1423 for (i = 0; i < early_acpihid_map_size; ++i) {
1424 ret = add_acpi_hid_device(early_acpihid_map[i].hid,
1425 early_acpihid_map[i].uid,
1426 &early_acpihid_map[i].devid,
1427 early_acpihid_map[i].cmd_line);
1428 if (ret)
1429 return ret;
1430 }
1431
1432 return 0;
1433 }
1434
1435 /*
1436 * Takes a pointer to an AMD IOMMU entry in the ACPI table and
1437 * initializes the hardware and our data structures with it.
1438 */
init_iommu_from_acpi(struct amd_iommu * iommu,struct ivhd_header * h)1439 static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
1440 struct ivhd_header *h)
1441 {
1442 u8 *p = (u8 *)h;
1443 u8 *end = p, flags = 0;
1444 u16 devid = 0, devid_start = 0, devid_to = 0, seg_id;
1445 u32 dev_i, ext_flags = 0;
1446 bool alias = false;
1447 struct ivhd_entry *e;
1448 struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
1449 u32 ivhd_size;
1450 int ret;
1451
1452
1453 ret = add_early_maps();
1454 if (ret)
1455 return ret;
1456
1457 amd_iommu_apply_ivrs_quirks();
1458
1459 /*
1460 * First save the recommended feature enable bits from ACPI
1461 */
1462 iommu->acpi_flags = h->flags;
1463
1464 /*
1465 * Done. Now parse the device entries
1466 */
1467 ivhd_size = get_ivhd_header_size(h);
1468 if (!ivhd_size) {
1469 pr_err("Unsupported IVHD type %#x\n", h->type);
1470 return -EINVAL;
1471 }
1472
1473 p += ivhd_size;
1474
1475 end += h->length;
1476
1477
1478 while (p < end) {
1479 e = (struct ivhd_entry *)p;
1480 seg_id = pci_seg->id;
1481
1482 switch (e->type) {
1483 case IVHD_DEV_ALL:
1484
1485 DUMP_printk(" DEV_ALL\t\t\tsetting: %#02x\n", e->flags);
1486 set_dev_entry_from_acpi_range(iommu, 0, pci_seg->last_bdf, e->flags, 0);
1487 break;
1488 case IVHD_DEV_SELECT:
1489
1490 DUMP_printk(" DEV_SELECT\t\t\tdevid: %04x:%02x:%02x.%x flags: %#02x\n",
1491 seg_id, PCI_BUS_NUM(e->devid),
1492 PCI_SLOT(e->devid),
1493 PCI_FUNC(e->devid),
1494 e->flags);
1495
1496 devid = e->devid;
1497 set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1498 break;
1499 case IVHD_DEV_SELECT_RANGE_START:
1500
1501 DUMP_printk(" DEV_SELECT_RANGE_START\tdevid: %04x:%02x:%02x.%x flags: %#02x\n",
1502 seg_id, PCI_BUS_NUM(e->devid),
1503 PCI_SLOT(e->devid),
1504 PCI_FUNC(e->devid),
1505 e->flags);
1506
1507 devid_start = e->devid;
1508 flags = e->flags;
1509 ext_flags = 0;
1510 alias = false;
1511 break;
1512 case IVHD_DEV_ALIAS:
1513
1514 DUMP_printk(" DEV_ALIAS\t\t\tdevid: %04x:%02x:%02x.%x flags: %#02x devid_to: %02x:%02x.%x\n",
1515 seg_id, PCI_BUS_NUM(e->devid),
1516 PCI_SLOT(e->devid),
1517 PCI_FUNC(e->devid),
1518 e->flags,
1519 PCI_BUS_NUM(e->ext >> 8),
1520 PCI_SLOT(e->ext >> 8),
1521 PCI_FUNC(e->ext >> 8));
1522
1523 devid = e->devid;
1524 devid_to = e->ext >> 8;
1525 set_dev_entry_from_acpi(iommu, devid , e->flags, 0);
1526 set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
1527 pci_seg->alias_table[devid] = devid_to;
1528 break;
1529 case IVHD_DEV_ALIAS_RANGE:
1530
1531 DUMP_printk(" DEV_ALIAS_RANGE\t\tdevid: %04x:%02x:%02x.%x flags: %#02x devid_to: %04x:%02x:%02x.%x\n",
1532 seg_id, PCI_BUS_NUM(e->devid),
1533 PCI_SLOT(e->devid),
1534 PCI_FUNC(e->devid),
1535 e->flags,
1536 seg_id, PCI_BUS_NUM(e->ext >> 8),
1537 PCI_SLOT(e->ext >> 8),
1538 PCI_FUNC(e->ext >> 8));
1539
1540 devid_start = e->devid;
1541 flags = e->flags;
1542 devid_to = e->ext >> 8;
1543 ext_flags = 0;
1544 alias = true;
1545 break;
1546 case IVHD_DEV_EXT_SELECT:
1547
1548 DUMP_printk(" DEV_EXT_SELECT\t\tdevid: %04x:%02x:%02x.%x flags: %#02x ext: %08x\n",
1549 seg_id, PCI_BUS_NUM(e->devid),
1550 PCI_SLOT(e->devid),
1551 PCI_FUNC(e->devid),
1552 e->flags, e->ext);
1553
1554 devid = e->devid;
1555 set_dev_entry_from_acpi(iommu, devid, e->flags,
1556 e->ext);
1557 break;
1558 case IVHD_DEV_EXT_SELECT_RANGE:
1559
1560 DUMP_printk(" DEV_EXT_SELECT_RANGE\tdevid: %04x:%02x:%02x.%x flags: %#02x ext: %08x\n",
1561 seg_id, PCI_BUS_NUM(e->devid),
1562 PCI_SLOT(e->devid),
1563 PCI_FUNC(e->devid),
1564 e->flags, e->ext);
1565
1566 devid_start = e->devid;
1567 flags = e->flags;
1568 ext_flags = e->ext;
1569 alias = false;
1570 break;
1571 case IVHD_DEV_RANGE_END:
1572
1573 DUMP_printk(" DEV_RANGE_END\t\tdevid: %04x:%02x:%02x.%x\n",
1574 seg_id, PCI_BUS_NUM(e->devid),
1575 PCI_SLOT(e->devid),
1576 PCI_FUNC(e->devid));
1577
1578 devid = e->devid;
1579 if (alias) {
1580 for (dev_i = devid_start; dev_i <= devid; ++dev_i)
1581 pci_seg->alias_table[dev_i] = devid_to;
1582 set_dev_entry_from_acpi(iommu, devid_to, flags, ext_flags);
1583 }
1584 set_dev_entry_from_acpi_range(iommu, devid_start, devid, flags, ext_flags);
1585 break;
1586 case IVHD_DEV_SPECIAL: {
1587 u8 handle, type;
1588 const char *var;
1589 u32 devid;
1590 int ret;
1591
1592 handle = e->ext & 0xff;
1593 devid = PCI_SEG_DEVID_TO_SBDF(seg_id, (e->ext >> 8));
1594 type = (e->ext >> 24) & 0xff;
1595
1596 if (type == IVHD_SPECIAL_IOAPIC)
1597 var = "IOAPIC";
1598 else if (type == IVHD_SPECIAL_HPET)
1599 var = "HPET";
1600 else
1601 var = "UNKNOWN";
1602
1603 DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x, flags: %#02x\n",
1604 var, (int)handle,
1605 seg_id, PCI_BUS_NUM(devid),
1606 PCI_SLOT(devid),
1607 PCI_FUNC(devid),
1608 e->flags);
1609
1610 ret = add_special_device(type, handle, &devid, false);
1611 if (ret)
1612 return ret;
1613
1614 /*
1615 * add_special_device might update the devid in case a
1616 * command-line override is present. So call
1617 * set_dev_entry_from_acpi after add_special_device.
1618 */
1619 set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1620
1621 break;
1622 }
1623 case IVHD_DEV_ACPI_HID: {
1624 u32 devid;
1625 u8 hid[ACPIHID_HID_LEN];
1626 u8 uid[ACPIHID_UID_LEN];
1627 int ret;
1628
1629 if (h->type != 0x40) {
1630 pr_err(FW_BUG "Invalid IVHD device type %#x\n",
1631 e->type);
1632 break;
1633 }
1634
1635 BUILD_BUG_ON(sizeof(e->ext_hid) != ACPIHID_HID_LEN - 1);
1636 memcpy(hid, &e->ext_hid, ACPIHID_HID_LEN - 1);
1637 hid[ACPIHID_HID_LEN - 1] = '\0';
1638
1639 if (!(*hid)) {
1640 pr_err(FW_BUG "Invalid HID.\n");
1641 break;
1642 }
1643
1644 uid[0] = '\0';
1645 switch (e->uidf) {
1646 case UID_NOT_PRESENT:
1647
1648 if (e->uidl != 0)
1649 pr_warn(FW_BUG "Invalid UID length.\n");
1650
1651 break;
1652 case UID_IS_INTEGER:
1653
1654 sprintf(uid, "%d", e->uid);
1655
1656 break;
1657 case UID_IS_CHARACTER:
1658
1659 memcpy(uid, &e->uid, e->uidl);
1660 uid[e->uidl] = '\0';
1661
1662 break;
1663 default:
1664 break;
1665 }
1666
1667 devid = PCI_SEG_DEVID_TO_SBDF(seg_id, e->devid);
1668 DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x, flags: %#02x\n",
1669 hid, uid, seg_id,
1670 PCI_BUS_NUM(devid),
1671 PCI_SLOT(devid),
1672 PCI_FUNC(devid),
1673 e->flags);
1674
1675 flags = e->flags;
1676
1677 ret = add_acpi_hid_device(hid, uid, &devid, false);
1678 if (ret)
1679 return ret;
1680
1681 /*
1682 * add_special_device might update the devid in case a
1683 * command-line override is present. So call
1684 * set_dev_entry_from_acpi after add_special_device.
1685 */
1686 set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1687
1688 break;
1689 }
1690 default:
1691 break;
1692 }
1693
1694 p += ivhd_entry_length(p);
1695 }
1696
1697 return 0;
1698 }
1699
1700 /* Allocate PCI segment data structure */
alloc_pci_segment(u16 id,struct acpi_table_header * ivrs_base)1701 static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id,
1702 struct acpi_table_header *ivrs_base)
1703 {
1704 struct amd_iommu_pci_seg *pci_seg;
1705 int last_bdf;
1706
1707 /*
1708 * First parse ACPI tables to find the largest Bus/Dev/Func we need to
1709 * handle in this PCI segment. Upon this information the shared data
1710 * structures for the PCI segments in the system will be allocated.
1711 */
1712 last_bdf = find_last_devid_acpi(ivrs_base, id);
1713 if (last_bdf < 0)
1714 return NULL;
1715
1716 pci_seg = kzalloc(sizeof(struct amd_iommu_pci_seg), GFP_KERNEL);
1717 if (pci_seg == NULL)
1718 return NULL;
1719
1720 pci_seg->last_bdf = last_bdf;
1721 DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf);
1722 pci_seg->dev_table_size =
1723 max(roundup_pow_of_two((last_bdf + 1) * DEV_TABLE_ENTRY_SIZE),
1724 SZ_4K);
1725
1726 pci_seg->id = id;
1727 init_llist_head(&pci_seg->dev_data_list);
1728 INIT_LIST_HEAD(&pci_seg->unity_map);
1729 list_add_tail(&pci_seg->list, &amd_iommu_pci_seg_list);
1730
1731 if (alloc_dev_table(pci_seg))
1732 goto err_free_pci_seg;
1733 if (alloc_alias_table(pci_seg))
1734 goto err_free_dev_table;
1735 if (alloc_rlookup_table(pci_seg))
1736 goto err_free_alias_table;
1737
1738 return pci_seg;
1739
1740 err_free_alias_table:
1741 free_alias_table(pci_seg);
1742 err_free_dev_table:
1743 free_dev_table(pci_seg);
1744 err_free_pci_seg:
1745 list_del(&pci_seg->list);
1746 kfree(pci_seg);
1747 return NULL;
1748 }
1749
get_pci_segment(u16 id,struct acpi_table_header * ivrs_base)1750 static struct amd_iommu_pci_seg *__init get_pci_segment(u16 id,
1751 struct acpi_table_header *ivrs_base)
1752 {
1753 struct amd_iommu_pci_seg *pci_seg;
1754
1755 for_each_pci_segment(pci_seg) {
1756 if (pci_seg->id == id)
1757 return pci_seg;
1758 }
1759
1760 return alloc_pci_segment(id, ivrs_base);
1761 }
1762
free_pci_segments(void)1763 static void __init free_pci_segments(void)
1764 {
1765 struct amd_iommu_pci_seg *pci_seg, *next;
1766
1767 for_each_pci_segment_safe(pci_seg, next) {
1768 list_del(&pci_seg->list);
1769 free_irq_lookup_table(pci_seg);
1770 free_rlookup_table(pci_seg);
1771 free_alias_table(pci_seg);
1772 free_dev_table(pci_seg);
1773 kfree(pci_seg);
1774 }
1775 }
1776
free_sysfs(struct amd_iommu * iommu)1777 static void __init free_sysfs(struct amd_iommu *iommu)
1778 {
1779 if (iommu->iommu.dev) {
1780 iommu_device_unregister(&iommu->iommu);
1781 iommu_device_sysfs_remove(&iommu->iommu);
1782 }
1783 }
1784
free_iommu_one(struct amd_iommu * iommu)1785 static void __init free_iommu_one(struct amd_iommu *iommu)
1786 {
1787 free_sysfs(iommu);
1788 free_iommu_buffers(iommu);
1789 amd_iommu_free_ppr_log(iommu);
1790 free_ga_log(iommu);
1791 iommu_unmap_mmio_space(iommu);
1792 amd_iommu_iopf_uninit(iommu);
1793 }
1794
free_iommu_all(void)1795 static void __init free_iommu_all(void)
1796 {
1797 struct amd_iommu *iommu, *next;
1798
1799 for_each_iommu_safe(iommu, next) {
1800 list_del(&iommu->list);
1801 free_iommu_one(iommu);
1802 kfree(iommu);
1803 }
1804 }
1805
1806 /*
1807 * Family15h Model 10h-1fh erratum 746 (IOMMU Logging May Stall Translations)
1808 * Workaround:
1809 * BIOS should disable L2B micellaneous clock gating by setting
1810 * L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b
1811 */
amd_iommu_erratum_746_workaround(struct amd_iommu * iommu)1812 static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu)
1813 {
1814 u32 value;
1815
1816 if ((boot_cpu_data.x86 != 0x15) ||
1817 (boot_cpu_data.x86_model < 0x10) ||
1818 (boot_cpu_data.x86_model > 0x1f))
1819 return;
1820
1821 pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1822 pci_read_config_dword(iommu->dev, 0xf4, &value);
1823
1824 if (value & BIT(2))
1825 return;
1826
1827 /* Select NB indirect register 0x90 and enable writing */
1828 pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8));
1829
1830 pci_write_config_dword(iommu->dev, 0xf4, value | 0x4);
1831 pci_info(iommu->dev, "Applying erratum 746 workaround\n");
1832
1833 /* Clear the enable writing bit */
1834 pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1835 }
1836
1837 /*
1838 * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission)
1839 * Workaround:
1840 * BIOS should enable ATS write permission check by setting
1841 * L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b
1842 */
amd_iommu_ats_write_check_workaround(struct amd_iommu * iommu)1843 static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu)
1844 {
1845 u32 value;
1846
1847 if ((boot_cpu_data.x86 != 0x15) ||
1848 (boot_cpu_data.x86_model < 0x30) ||
1849 (boot_cpu_data.x86_model > 0x3f))
1850 return;
1851
1852 /* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */
1853 value = iommu_read_l2(iommu, 0x47);
1854
1855 if (value & BIT(0))
1856 return;
1857
1858 /* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */
1859 iommu_write_l2(iommu, 0x47, value | BIT(0));
1860
1861 pci_info(iommu->dev, "Applying ATS write check workaround\n");
1862 }
1863
1864 /*
1865 * This function glues the initialization function for one IOMMU
1866 * together and also allocates the command buffer and programs the
1867 * hardware. It does NOT enable the IOMMU. This is done afterwards.
1868 */
init_iommu_one(struct amd_iommu * iommu,struct ivhd_header * h,struct acpi_table_header * ivrs_base)1869 static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
1870 struct acpi_table_header *ivrs_base)
1871 {
1872 struct amd_iommu_pci_seg *pci_seg;
1873
1874 pci_seg = get_pci_segment(h->pci_seg, ivrs_base);
1875 if (pci_seg == NULL)
1876 return -ENOMEM;
1877 iommu->pci_seg = pci_seg;
1878
1879 raw_spin_lock_init(&iommu->lock);
1880 atomic64_set(&iommu->cmd_sem_val, 0);
1881
1882 /* Add IOMMU to internal data structures */
1883 list_add_tail(&iommu->list, &amd_iommu_list);
1884 iommu->index = amd_iommus_present++;
1885
1886 if (unlikely(iommu->index >= MAX_IOMMUS)) {
1887 WARN(1, "System has more IOMMUs than supported by this driver\n");
1888 return -ENOSYS;
1889 }
1890
1891 /*
1892 * Copy data from ACPI table entry to the iommu struct
1893 */
1894 iommu->devid = h->devid;
1895 iommu->cap_ptr = h->cap_ptr;
1896 iommu->mmio_phys = h->mmio_phys;
1897
1898 switch (h->type) {
1899 case 0x10:
1900 /* Check if IVHD EFR contains proper max banks/counters */
1901 if ((h->efr_attr != 0) &&
1902 ((h->efr_attr & (0xF << 13)) != 0) &&
1903 ((h->efr_attr & (0x3F << 17)) != 0))
1904 iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1905 else
1906 iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1907
1908 /* GAM requires GA mode. */
1909 if ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0)
1910 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
1911 break;
1912 case 0x11:
1913 case 0x40:
1914 if (h->efr_reg & (1 << 9))
1915 iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1916 else
1917 iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1918
1919 /* XT and GAM require GA mode. */
1920 if ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0) {
1921 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
1922 break;
1923 }
1924
1925 if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT))
1926 amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE;
1927
1928 if (h->efr_attr & BIT(IOMMU_IVHD_ATTR_HATDIS_SHIFT)) {
1929 pr_warn_once("Host Address Translation is not supported.\n");
1930 amd_iommu_hatdis = true;
1931 }
1932
1933 early_iommu_features_init(iommu, h);
1934
1935 break;
1936 default:
1937 return -EINVAL;
1938 }
1939
1940 iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys,
1941 iommu->mmio_phys_end);
1942 if (!iommu->mmio_base)
1943 return -ENOMEM;
1944
1945 return init_iommu_from_acpi(iommu, h);
1946 }
1947
init_iommu_one_late(struct amd_iommu * iommu)1948 static int __init init_iommu_one_late(struct amd_iommu *iommu)
1949 {
1950 int ret;
1951
1952 ret = alloc_iommu_buffers(iommu);
1953 if (ret)
1954 return ret;
1955
1956 iommu->int_enabled = false;
1957
1958 init_translation_status(iommu);
1959 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
1960 iommu_disable(iommu);
1961 clear_translation_pre_enabled(iommu);
1962 pr_warn("Translation was enabled for IOMMU:%d but we are not in kdump mode\n",
1963 iommu->index);
1964 }
1965 if (amd_iommu_pre_enabled)
1966 amd_iommu_pre_enabled = translation_pre_enabled(iommu);
1967
1968 if (amd_iommu_irq_remap) {
1969 ret = amd_iommu_create_irq_domain(iommu);
1970 if (ret)
1971 return ret;
1972 }
1973
1974 /*
1975 * Make sure IOMMU is not considered to translate itself. The IVRS
1976 * table tells us so, but this is a lie!
1977 */
1978 iommu->pci_seg->rlookup_table[iommu->devid] = NULL;
1979
1980 return 0;
1981 }
1982
1983 /**
1984 * get_highest_supported_ivhd_type - Look up the appropriate IVHD type
1985 * @ivrs: Pointer to the IVRS header
1986 *
1987 * This function search through all IVDB of the maximum supported IVHD
1988 */
get_highest_supported_ivhd_type(struct acpi_table_header * ivrs)1989 static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs)
1990 {
1991 u8 *base = (u8 *)ivrs;
1992 struct ivhd_header *ivhd = (struct ivhd_header *)
1993 (base + IVRS_HEADER_LENGTH);
1994 u8 last_type = ivhd->type;
1995 u16 devid = ivhd->devid;
1996
1997 while (((u8 *)ivhd - base < ivrs->length) &&
1998 (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) {
1999 u8 *p = (u8 *) ivhd;
2000
2001 if (ivhd->devid == devid)
2002 last_type = ivhd->type;
2003 ivhd = (struct ivhd_header *)(p + ivhd->length);
2004 }
2005
2006 return last_type;
2007 }
2008
2009 /*
2010 * Iterates over all IOMMU entries in the ACPI table, allocates the
2011 * IOMMU structure and initializes it with init_iommu_one()
2012 */
init_iommu_all(struct acpi_table_header * table)2013 static int __init init_iommu_all(struct acpi_table_header *table)
2014 {
2015 u8 *p = (u8 *)table, *end = (u8 *)table;
2016 struct ivhd_header *h;
2017 struct amd_iommu *iommu;
2018 int ret;
2019
2020 end += table->length;
2021 p += IVRS_HEADER_LENGTH;
2022
2023 /* Phase 1: Process all IVHD blocks */
2024 while (p < end) {
2025 h = (struct ivhd_header *)p;
2026 if (*p == amd_iommu_target_ivhd_type) {
2027
2028 DUMP_printk("device: %04x:%02x:%02x.%01x cap: %04x "
2029 "flags: %01x info %04x\n",
2030 h->pci_seg, PCI_BUS_NUM(h->devid),
2031 PCI_SLOT(h->devid), PCI_FUNC(h->devid),
2032 h->cap_ptr, h->flags, h->info);
2033 DUMP_printk(" mmio-addr: %016llx\n",
2034 h->mmio_phys);
2035
2036 iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
2037 if (iommu == NULL)
2038 return -ENOMEM;
2039
2040 ret = init_iommu_one(iommu, h, table);
2041 if (ret)
2042 return ret;
2043 }
2044 p += h->length;
2045
2046 }
2047 WARN_ON(p != end);
2048
2049 /* Phase 2 : Early feature support check */
2050 get_global_efr();
2051
2052 /* Phase 3 : Enabling IOMMU features */
2053 for_each_iommu(iommu) {
2054 ret = init_iommu_one_late(iommu);
2055 if (ret)
2056 return ret;
2057 }
2058
2059 return 0;
2060 }
2061
init_iommu_perf_ctr(struct amd_iommu * iommu)2062 static void init_iommu_perf_ctr(struct amd_iommu *iommu)
2063 {
2064 u64 val;
2065 struct pci_dev *pdev = iommu->dev;
2066
2067 if (!check_feature(FEATURE_PC))
2068 return;
2069
2070 amd_iommu_pc_present = true;
2071
2072 pci_info(pdev, "IOMMU performance counters supported\n");
2073
2074 val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET);
2075 iommu->max_banks = (u8) ((val >> 12) & 0x3f);
2076 iommu->max_counters = (u8) ((val >> 7) & 0xf);
2077
2078 return;
2079 }
2080
amd_iommu_show_cap(struct device * dev,struct device_attribute * attr,char * buf)2081 static ssize_t amd_iommu_show_cap(struct device *dev,
2082 struct device_attribute *attr,
2083 char *buf)
2084 {
2085 struct amd_iommu *iommu = dev_to_amd_iommu(dev);
2086 return sysfs_emit(buf, "%x\n", iommu->cap);
2087 }
2088 static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL);
2089
amd_iommu_show_features(struct device * dev,struct device_attribute * attr,char * buf)2090 static ssize_t amd_iommu_show_features(struct device *dev,
2091 struct device_attribute *attr,
2092 char *buf)
2093 {
2094 return sysfs_emit(buf, "%llx:%llx\n", amd_iommu_efr, amd_iommu_efr2);
2095 }
2096 static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL);
2097
2098 static struct attribute *amd_iommu_attrs[] = {
2099 &dev_attr_cap.attr,
2100 &dev_attr_features.attr,
2101 NULL,
2102 };
2103
2104 static struct attribute_group amd_iommu_group = {
2105 .name = "amd-iommu",
2106 .attrs = amd_iommu_attrs,
2107 };
2108
2109 static const struct attribute_group *amd_iommu_groups[] = {
2110 &amd_iommu_group,
2111 NULL,
2112 };
2113
2114 /*
2115 * Note: IVHD 0x11 and 0x40 also contains exact copy
2116 * of the IOMMU Extended Feature Register [MMIO Offset 0030h].
2117 * Default to EFR in IVHD since it is available sooner (i.e. before PCI init).
2118 */
late_iommu_features_init(struct amd_iommu * iommu)2119 static void __init late_iommu_features_init(struct amd_iommu *iommu)
2120 {
2121 u64 features, features2;
2122
2123 if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
2124 return;
2125
2126 /* read extended feature bits */
2127 features = readq(iommu->mmio_base + MMIO_EXT_FEATURES);
2128 features2 = readq(iommu->mmio_base + MMIO_EXT_FEATURES2);
2129
2130 if (!amd_iommu_efr) {
2131 amd_iommu_efr = features;
2132 amd_iommu_efr2 = features2;
2133 return;
2134 }
2135
2136 /*
2137 * Sanity check and warn if EFR values from
2138 * IVHD and MMIO conflict.
2139 */
2140 if (features != amd_iommu_efr ||
2141 features2 != amd_iommu_efr2) {
2142 pr_warn(FW_WARN
2143 "EFR mismatch. Use IVHD EFR (%#llx : %#llx), EFR2 (%#llx : %#llx).\n",
2144 features, amd_iommu_efr,
2145 features2, amd_iommu_efr2);
2146 }
2147 }
2148
iommu_init_pci(struct amd_iommu * iommu)2149 static int __init iommu_init_pci(struct amd_iommu *iommu)
2150 {
2151 int cap_ptr = iommu->cap_ptr;
2152 int ret;
2153
2154 iommu->dev = pci_get_domain_bus_and_slot(iommu->pci_seg->id,
2155 PCI_BUS_NUM(iommu->devid),
2156 iommu->devid & 0xff);
2157 if (!iommu->dev)
2158 return -ENODEV;
2159
2160 /* ACPI _PRT won't have an IRQ for IOMMU */
2161 iommu->dev->irq_managed = 1;
2162
2163 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
2164 &iommu->cap);
2165
2166 if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
2167 amd_iommu_iotlb_sup = false;
2168
2169 late_iommu_features_init(iommu);
2170
2171 if (check_feature(FEATURE_GT)) {
2172 int glxval;
2173 u64 pasmax;
2174
2175 pasmax = FIELD_GET(FEATURE_PASMAX, amd_iommu_efr);
2176 iommu->iommu.max_pasids = (1 << (pasmax + 1)) - 1;
2177
2178 BUG_ON(iommu->iommu.max_pasids & ~PASID_MASK);
2179
2180 glxval = FIELD_GET(FEATURE_GLX, amd_iommu_efr);
2181
2182 if (amd_iommu_max_glx_val == -1)
2183 amd_iommu_max_glx_val = glxval;
2184 else
2185 amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval);
2186
2187 iommu_enable_gt(iommu);
2188 }
2189
2190 if (check_feature(FEATURE_PPR) && amd_iommu_alloc_ppr_log(iommu))
2191 return -ENOMEM;
2192
2193 if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) {
2194 pr_info("Using strict mode due to virtualization\n");
2195 iommu_set_dma_strict();
2196 amd_iommu_np_cache = true;
2197 }
2198
2199 init_iommu_perf_ctr(iommu);
2200
2201 if (is_rd890_iommu(iommu->dev)) {
2202 int i, j;
2203
2204 iommu->root_pdev =
2205 pci_get_domain_bus_and_slot(iommu->pci_seg->id,
2206 iommu->dev->bus->number,
2207 PCI_DEVFN(0, 0));
2208
2209 /*
2210 * Some rd890 systems may not be fully reconfigured by the
2211 * BIOS, so it's necessary for us to store this information so
2212 * it can be reprogrammed on resume
2213 */
2214 pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4,
2215 &iommu->stored_addr_lo);
2216 pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8,
2217 &iommu->stored_addr_hi);
2218
2219 /* Low bit locks writes to configuration space */
2220 iommu->stored_addr_lo &= ~1;
2221
2222 for (i = 0; i < 6; i++)
2223 for (j = 0; j < 0x12; j++)
2224 iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j);
2225
2226 for (i = 0; i < 0x83; i++)
2227 iommu->stored_l2[i] = iommu_read_l2(iommu, i);
2228 }
2229
2230 amd_iommu_erratum_746_workaround(iommu);
2231 amd_iommu_ats_write_check_workaround(iommu);
2232
2233 ret = iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev,
2234 amd_iommu_groups, "ivhd%d", iommu->index);
2235 if (ret)
2236 return ret;
2237
2238 /*
2239 * Allocate per IOMMU IOPF queue here so that in attach device path,
2240 * PRI capable device can be added to IOPF queue
2241 */
2242 if (amd_iommu_gt_ppr_supported()) {
2243 ret = amd_iommu_iopf_init(iommu);
2244 if (ret)
2245 return ret;
2246 }
2247
2248 ret = iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL);
2249 if (ret || amd_iommu_pgtable == PD_MODE_NONE) {
2250 /*
2251 * Remove sysfs if DMA translation is not supported by the
2252 * IOMMU. Do not return an error to enable IRQ remapping
2253 * in state_next(), DTE[V, TV] must eventually be set to 0.
2254 */
2255 iommu_device_sysfs_remove(&iommu->iommu);
2256 }
2257
2258 return pci_enable_device(iommu->dev);
2259 }
2260
print_iommu_info(void)2261 static void print_iommu_info(void)
2262 {
2263 int i;
2264 static const char * const feat_str[] = {
2265 "PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
2266 "IA", "GA", "HE", "PC"
2267 };
2268
2269 if (amd_iommu_efr) {
2270 pr_info("Extended features (%#llx, %#llx):", amd_iommu_efr, amd_iommu_efr2);
2271
2272 for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
2273 if (check_feature(1ULL << i))
2274 pr_cont(" %s", feat_str[i]);
2275 }
2276
2277 if (check_feature(FEATURE_GAM_VAPIC))
2278 pr_cont(" GA_vAPIC");
2279
2280 if (check_feature(FEATURE_SNP))
2281 pr_cont(" SNP");
2282
2283 if (check_feature2(FEATURE_SEVSNPIO_SUP))
2284 pr_cont(" SEV-TIO");
2285
2286 pr_cont("\n");
2287 }
2288
2289 if (irq_remapping_enabled) {
2290 pr_info("Interrupt remapping enabled\n");
2291 if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2292 pr_info("X2APIC enabled\n");
2293 }
2294 if (amd_iommu_pgtable == PD_MODE_V2) {
2295 pr_info("V2 page table enabled (Paging mode : %d level)\n",
2296 amd_iommu_gpt_level);
2297 }
2298 }
2299
amd_iommu_init_pci(void)2300 static int __init amd_iommu_init_pci(void)
2301 {
2302 struct amd_iommu *iommu;
2303 struct amd_iommu_pci_seg *pci_seg;
2304 int ret;
2305
2306 /* Init global identity domain before registering IOMMU */
2307 amd_iommu_init_identity_domain();
2308
2309 for_each_iommu(iommu) {
2310 ret = iommu_init_pci(iommu);
2311 if (ret) {
2312 pr_err("IOMMU%d: Failed to initialize IOMMU Hardware (error=%d)!\n",
2313 iommu->index, ret);
2314 goto out;
2315 }
2316 /* Need to setup range after PCI init */
2317 iommu_set_cwwb_range(iommu);
2318 }
2319
2320 /*
2321 * Order is important here to make sure any unity map requirements are
2322 * fulfilled. The unity mappings are created and written to the device
2323 * table during the iommu_init_pci() call.
2324 *
2325 * After that we call init_device_table_dma() to make sure any
2326 * uninitialized DTE will block DMA, and in the end we flush the caches
2327 * of all IOMMUs to make sure the changes to the device table are
2328 * active.
2329 */
2330 for_each_pci_segment(pci_seg)
2331 init_device_table_dma(pci_seg);
2332
2333 for_each_iommu(iommu)
2334 amd_iommu_flush_all_caches(iommu);
2335
2336 print_iommu_info();
2337
2338 out:
2339 return ret;
2340 }
2341
2342 /****************************************************************************
2343 *
2344 * The following functions initialize the MSI interrupts for all IOMMUs
2345 * in the system. It's a bit challenging because there could be multiple
2346 * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per
2347 * pci_dev.
2348 *
2349 ****************************************************************************/
2350
iommu_setup_msi(struct amd_iommu * iommu)2351 static int iommu_setup_msi(struct amd_iommu *iommu)
2352 {
2353 int r;
2354
2355 r = pci_enable_msi(iommu->dev);
2356 if (r)
2357 return r;
2358
2359 r = request_threaded_irq(iommu->dev->irq,
2360 amd_iommu_int_handler,
2361 amd_iommu_int_thread,
2362 0, "AMD-Vi",
2363 iommu);
2364
2365 if (r) {
2366 pci_disable_msi(iommu->dev);
2367 return r;
2368 }
2369
2370 return 0;
2371 }
2372
2373 union intcapxt {
2374 u64 capxt;
2375 struct {
2376 u64 reserved_0 : 2,
2377 dest_mode_logical : 1,
2378 reserved_1 : 5,
2379 destid_0_23 : 24,
2380 vector : 8,
2381 reserved_2 : 16,
2382 destid_24_31 : 8;
2383 };
2384 } __attribute__ ((packed));
2385
2386
2387 static struct irq_chip intcapxt_controller;
2388
intcapxt_irqdomain_activate(struct irq_domain * domain,struct irq_data * irqd,bool reserve)2389 static int intcapxt_irqdomain_activate(struct irq_domain *domain,
2390 struct irq_data *irqd, bool reserve)
2391 {
2392 return 0;
2393 }
2394
intcapxt_irqdomain_deactivate(struct irq_domain * domain,struct irq_data * irqd)2395 static void intcapxt_irqdomain_deactivate(struct irq_domain *domain,
2396 struct irq_data *irqd)
2397 {
2398 }
2399
2400
intcapxt_irqdomain_alloc(struct irq_domain * domain,unsigned int virq,unsigned int nr_irqs,void * arg)2401 static int intcapxt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
2402 unsigned int nr_irqs, void *arg)
2403 {
2404 struct irq_alloc_info *info = arg;
2405 int i, ret;
2406
2407 if (!info || info->type != X86_IRQ_ALLOC_TYPE_AMDVI)
2408 return -EINVAL;
2409
2410 ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
2411 if (ret < 0)
2412 return ret;
2413
2414 for (i = virq; i < virq + nr_irqs; i++) {
2415 struct irq_data *irqd = irq_domain_get_irq_data(domain, i);
2416
2417 irqd->chip = &intcapxt_controller;
2418 irqd->hwirq = info->hwirq;
2419 irqd->chip_data = info->data;
2420 __irq_set_handler(i, handle_edge_irq, 0, "edge");
2421 }
2422
2423 return ret;
2424 }
2425
intcapxt_irqdomain_free(struct irq_domain * domain,unsigned int virq,unsigned int nr_irqs)2426 static void intcapxt_irqdomain_free(struct irq_domain *domain, unsigned int virq,
2427 unsigned int nr_irqs)
2428 {
2429 irq_domain_free_irqs_top(domain, virq, nr_irqs);
2430 }
2431
2432
intcapxt_unmask_irq(struct irq_data * irqd)2433 static void intcapxt_unmask_irq(struct irq_data *irqd)
2434 {
2435 struct amd_iommu *iommu = irqd->chip_data;
2436 struct irq_cfg *cfg = irqd_cfg(irqd);
2437 union intcapxt xt;
2438
2439 xt.capxt = 0ULL;
2440 xt.dest_mode_logical = apic->dest_mode_logical;
2441 xt.vector = cfg->vector;
2442 xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0);
2443 xt.destid_24_31 = cfg->dest_apicid >> 24;
2444
2445 writeq(xt.capxt, iommu->mmio_base + irqd->hwirq);
2446 }
2447
intcapxt_mask_irq(struct irq_data * irqd)2448 static void intcapxt_mask_irq(struct irq_data *irqd)
2449 {
2450 struct amd_iommu *iommu = irqd->chip_data;
2451
2452 writeq(0, iommu->mmio_base + irqd->hwirq);
2453 }
2454
2455
intcapxt_set_affinity(struct irq_data * irqd,const struct cpumask * mask,bool force)2456 static int intcapxt_set_affinity(struct irq_data *irqd,
2457 const struct cpumask *mask, bool force)
2458 {
2459 struct irq_data *parent = irqd->parent_data;
2460 int ret;
2461
2462 ret = parent->chip->irq_set_affinity(parent, mask, force);
2463 if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
2464 return ret;
2465 return 0;
2466 }
2467
intcapxt_set_wake(struct irq_data * irqd,unsigned int on)2468 static int intcapxt_set_wake(struct irq_data *irqd, unsigned int on)
2469 {
2470 return on ? -EOPNOTSUPP : 0;
2471 }
2472
2473 static struct irq_chip intcapxt_controller = {
2474 .name = "IOMMU-MSI",
2475 .irq_unmask = intcapxt_unmask_irq,
2476 .irq_mask = intcapxt_mask_irq,
2477 .irq_ack = irq_chip_ack_parent,
2478 .irq_retrigger = irq_chip_retrigger_hierarchy,
2479 .irq_set_affinity = intcapxt_set_affinity,
2480 .irq_set_wake = intcapxt_set_wake,
2481 .flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_MOVE_DEFERRED,
2482 };
2483
2484 static const struct irq_domain_ops intcapxt_domain_ops = {
2485 .alloc = intcapxt_irqdomain_alloc,
2486 .free = intcapxt_irqdomain_free,
2487 .activate = intcapxt_irqdomain_activate,
2488 .deactivate = intcapxt_irqdomain_deactivate,
2489 };
2490
2491
2492 static struct irq_domain *iommu_irqdomain;
2493
iommu_get_irqdomain(void)2494 static struct irq_domain *iommu_get_irqdomain(void)
2495 {
2496 struct fwnode_handle *fn;
2497
2498 /* No need for locking here (yet) as the init is single-threaded */
2499 if (iommu_irqdomain)
2500 return iommu_irqdomain;
2501
2502 fn = irq_domain_alloc_named_fwnode("AMD-Vi-MSI");
2503 if (!fn)
2504 return NULL;
2505
2506 iommu_irqdomain = irq_domain_create_hierarchy(x86_vector_domain, 0, 0,
2507 fn, &intcapxt_domain_ops,
2508 NULL);
2509 if (!iommu_irqdomain)
2510 irq_domain_free_fwnode(fn);
2511
2512 return iommu_irqdomain;
2513 }
2514
__iommu_setup_intcapxt(struct amd_iommu * iommu,const char * devname,int hwirq,irq_handler_t thread_fn)2515 static int __iommu_setup_intcapxt(struct amd_iommu *iommu, const char *devname,
2516 int hwirq, irq_handler_t thread_fn)
2517 {
2518 struct irq_domain *domain;
2519 struct irq_alloc_info info;
2520 int irq, ret;
2521 int node = dev_to_node(&iommu->dev->dev);
2522
2523 domain = iommu_get_irqdomain();
2524 if (!domain)
2525 return -ENXIO;
2526
2527 init_irq_alloc_info(&info, NULL);
2528 info.type = X86_IRQ_ALLOC_TYPE_AMDVI;
2529 info.data = iommu;
2530 info.hwirq = hwirq;
2531
2532 irq = irq_domain_alloc_irqs(domain, 1, node, &info);
2533 if (irq < 0) {
2534 irq_domain_remove(domain);
2535 return irq;
2536 }
2537
2538 ret = request_threaded_irq(irq, amd_iommu_int_handler,
2539 thread_fn, 0, devname, iommu);
2540 if (ret) {
2541 irq_domain_free_irqs(irq, 1);
2542 irq_domain_remove(domain);
2543 return ret;
2544 }
2545
2546 return 0;
2547 }
2548
iommu_setup_intcapxt(struct amd_iommu * iommu)2549 static int iommu_setup_intcapxt(struct amd_iommu *iommu)
2550 {
2551 int ret;
2552
2553 snprintf(iommu->evt_irq_name, sizeof(iommu->evt_irq_name),
2554 "AMD-Vi%d-Evt", iommu->index);
2555 ret = __iommu_setup_intcapxt(iommu, iommu->evt_irq_name,
2556 MMIO_INTCAPXT_EVT_OFFSET,
2557 amd_iommu_int_thread_evtlog);
2558 if (ret)
2559 return ret;
2560
2561 snprintf(iommu->ppr_irq_name, sizeof(iommu->ppr_irq_name),
2562 "AMD-Vi%d-PPR", iommu->index);
2563 ret = __iommu_setup_intcapxt(iommu, iommu->ppr_irq_name,
2564 MMIO_INTCAPXT_PPR_OFFSET,
2565 amd_iommu_int_thread_pprlog);
2566 if (ret)
2567 return ret;
2568
2569 #ifdef CONFIG_IRQ_REMAP
2570 snprintf(iommu->ga_irq_name, sizeof(iommu->ga_irq_name),
2571 "AMD-Vi%d-GA", iommu->index);
2572 ret = __iommu_setup_intcapxt(iommu, iommu->ga_irq_name,
2573 MMIO_INTCAPXT_GALOG_OFFSET,
2574 amd_iommu_int_thread_galog);
2575 #endif
2576
2577 return ret;
2578 }
2579
iommu_init_irq(struct amd_iommu * iommu)2580 static int iommu_init_irq(struct amd_iommu *iommu)
2581 {
2582 int ret;
2583
2584 if (iommu->int_enabled)
2585 goto enable_faults;
2586
2587 if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2588 ret = iommu_setup_intcapxt(iommu);
2589 else if (iommu->dev->msi_cap)
2590 ret = iommu_setup_msi(iommu);
2591 else
2592 ret = -ENODEV;
2593
2594 if (ret)
2595 return ret;
2596
2597 iommu->int_enabled = true;
2598 enable_faults:
2599
2600 if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2601 iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN);
2602
2603 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
2604
2605 return 0;
2606 }
2607
2608 /****************************************************************************
2609 *
2610 * The next functions belong to the third pass of parsing the ACPI
2611 * table. In this last pass the memory mapping requirements are
2612 * gathered (like exclusion and unity mapping ranges).
2613 *
2614 ****************************************************************************/
2615
free_unity_maps(void)2616 static void __init free_unity_maps(void)
2617 {
2618 struct unity_map_entry *entry, *next;
2619 struct amd_iommu_pci_seg *p, *pci_seg;
2620
2621 for_each_pci_segment_safe(pci_seg, p) {
2622 list_for_each_entry_safe(entry, next, &pci_seg->unity_map, list) {
2623 list_del(&entry->list);
2624 kfree(entry);
2625 }
2626 }
2627 }
2628
2629 /* called for unity map ACPI definition */
init_unity_map_range(struct ivmd_header * m,struct acpi_table_header * ivrs_base)2630 static int __init init_unity_map_range(struct ivmd_header *m,
2631 struct acpi_table_header *ivrs_base)
2632 {
2633 struct unity_map_entry *e = NULL;
2634 struct amd_iommu_pci_seg *pci_seg;
2635 char *s;
2636
2637 pci_seg = get_pci_segment(m->pci_seg, ivrs_base);
2638 if (pci_seg == NULL)
2639 return -ENOMEM;
2640
2641 e = kzalloc(sizeof(*e), GFP_KERNEL);
2642 if (e == NULL)
2643 return -ENOMEM;
2644
2645 switch (m->type) {
2646 default:
2647 kfree(e);
2648 return 0;
2649 case ACPI_IVMD_TYPE:
2650 s = "IVMD_TYPEi\t\t\t";
2651 e->devid_start = e->devid_end = m->devid;
2652 break;
2653 case ACPI_IVMD_TYPE_ALL:
2654 s = "IVMD_TYPE_ALL\t\t";
2655 e->devid_start = 0;
2656 e->devid_end = pci_seg->last_bdf;
2657 break;
2658 case ACPI_IVMD_TYPE_RANGE:
2659 s = "IVMD_TYPE_RANGE\t\t";
2660 e->devid_start = m->devid;
2661 e->devid_end = m->aux;
2662 break;
2663 }
2664 e->address_start = PAGE_ALIGN(m->range_start);
2665 e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
2666 e->prot = m->flags >> 1;
2667
2668 /*
2669 * Treat per-device exclusion ranges as r/w unity-mapped regions
2670 * since some buggy BIOSes might lead to the overwritten exclusion
2671 * range (exclusion_start and exclusion_length members). This
2672 * happens when there are multiple exclusion ranges (IVMD entries)
2673 * defined in ACPI table.
2674 */
2675 if (m->flags & IVMD_FLAG_EXCL_RANGE)
2676 e->prot = (IVMD_FLAG_IW | IVMD_FLAG_IR) >> 1;
2677
2678 DUMP_printk("%s devid_start: %04x:%02x:%02x.%x devid_end: "
2679 "%04x:%02x:%02x.%x range_start: %016llx range_end: %016llx"
2680 " flags: %x\n", s, m->pci_seg,
2681 PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start),
2682 PCI_FUNC(e->devid_start), m->pci_seg,
2683 PCI_BUS_NUM(e->devid_end),
2684 PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
2685 e->address_start, e->address_end, m->flags);
2686
2687 list_add_tail(&e->list, &pci_seg->unity_map);
2688
2689 return 0;
2690 }
2691
2692 /* iterates over all memory definitions we find in the ACPI table */
init_memory_definitions(struct acpi_table_header * table)2693 static int __init init_memory_definitions(struct acpi_table_header *table)
2694 {
2695 u8 *p = (u8 *)table, *end = (u8 *)table;
2696 struct ivmd_header *m;
2697
2698 end += table->length;
2699 p += IVRS_HEADER_LENGTH;
2700
2701 while (p < end) {
2702 m = (struct ivmd_header *)p;
2703 if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE))
2704 init_unity_map_range(m, table);
2705
2706 p += m->length;
2707 }
2708
2709 return 0;
2710 }
2711
2712 /*
2713 * Init the device table to not allow DMA access for devices
2714 */
init_device_table_dma(struct amd_iommu_pci_seg * pci_seg)2715 static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
2716 {
2717 u32 devid;
2718 struct dev_table_entry *dev_table = pci_seg->dev_table;
2719
2720 if (!dev_table || amd_iommu_pgtable == PD_MODE_NONE)
2721 return;
2722
2723 for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
2724 set_dte_bit(&dev_table[devid], DEV_ENTRY_VALID);
2725 if (!amd_iommu_snp_en)
2726 set_dte_bit(&dev_table[devid], DEV_ENTRY_TRANSLATION);
2727 }
2728 }
2729
uninit_device_table_dma(struct amd_iommu_pci_seg * pci_seg)2730 static void __init uninit_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
2731 {
2732 u32 devid;
2733 struct dev_table_entry *dev_table = pci_seg->dev_table;
2734
2735 if (dev_table == NULL)
2736 return;
2737
2738 for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
2739 dev_table[devid].data[0] = 0ULL;
2740 dev_table[devid].data[1] = 0ULL;
2741 }
2742 }
2743
init_device_table(void)2744 static void init_device_table(void)
2745 {
2746 struct amd_iommu_pci_seg *pci_seg;
2747 u32 devid;
2748
2749 if (!amd_iommu_irq_remap)
2750 return;
2751
2752 for_each_pci_segment(pci_seg) {
2753 for (devid = 0; devid <= pci_seg->last_bdf; ++devid)
2754 set_dte_bit(&pci_seg->dev_table[devid], DEV_ENTRY_IRQ_TBL_EN);
2755 }
2756 }
2757
iommu_init_flags(struct amd_iommu * iommu)2758 static void iommu_init_flags(struct amd_iommu *iommu)
2759 {
2760 iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ?
2761 iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
2762 iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
2763
2764 iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ?
2765 iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
2766 iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
2767
2768 iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
2769 iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
2770 iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
2771
2772 iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ?
2773 iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
2774 iommu_feature_disable(iommu, CONTROL_ISOC_EN);
2775
2776 /*
2777 * make IOMMU memory accesses cache coherent
2778 */
2779 iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
2780
2781 /* Set IOTLB invalidation timeout to 1s */
2782 iommu_feature_set(iommu, CTRL_INV_TO_1S, CTRL_INV_TO_MASK, CONTROL_INV_TIMEOUT);
2783
2784 /* Enable Enhanced Peripheral Page Request Handling */
2785 if (check_feature(FEATURE_EPHSUP))
2786 iommu_feature_enable(iommu, CONTROL_EPH_EN);
2787 }
2788
iommu_apply_resume_quirks(struct amd_iommu * iommu)2789 static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
2790 {
2791 int i, j;
2792 u32 ioc_feature_control;
2793 struct pci_dev *pdev = iommu->root_pdev;
2794
2795 /* RD890 BIOSes may not have completely reconfigured the iommu */
2796 if (!is_rd890_iommu(iommu->dev) || !pdev)
2797 return;
2798
2799 /*
2800 * First, we need to ensure that the iommu is enabled. This is
2801 * controlled by a register in the northbridge
2802 */
2803
2804 /* Select Northbridge indirect register 0x75 and enable writing */
2805 pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7));
2806 pci_read_config_dword(pdev, 0x64, &ioc_feature_control);
2807
2808 /* Enable the iommu */
2809 if (!(ioc_feature_control & 0x1))
2810 pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1);
2811
2812 /* Restore the iommu BAR */
2813 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
2814 iommu->stored_addr_lo);
2815 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8,
2816 iommu->stored_addr_hi);
2817
2818 /* Restore the l1 indirect regs for each of the 6 l1s */
2819 for (i = 0; i < 6; i++)
2820 for (j = 0; j < 0x12; j++)
2821 iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]);
2822
2823 /* Restore the l2 indirect regs */
2824 for (i = 0; i < 0x83; i++)
2825 iommu_write_l2(iommu, i, iommu->stored_l2[i]);
2826
2827 /* Lock PCI setup registers */
2828 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
2829 iommu->stored_addr_lo | 1);
2830 }
2831
iommu_enable_ga(struct amd_iommu * iommu)2832 static void iommu_enable_ga(struct amd_iommu *iommu)
2833 {
2834 #ifdef CONFIG_IRQ_REMAP
2835 switch (amd_iommu_guest_ir) {
2836 case AMD_IOMMU_GUEST_IR_VAPIC:
2837 case AMD_IOMMU_GUEST_IR_LEGACY_GA:
2838 iommu_feature_enable(iommu, CONTROL_GA_EN);
2839 iommu->irte_ops = &irte_128_ops;
2840 break;
2841 default:
2842 iommu->irte_ops = &irte_32_ops;
2843 break;
2844 }
2845 #endif
2846 }
2847
iommu_disable_irtcachedis(struct amd_iommu * iommu)2848 static void iommu_disable_irtcachedis(struct amd_iommu *iommu)
2849 {
2850 iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS);
2851 }
2852
iommu_enable_irtcachedis(struct amd_iommu * iommu)2853 static void iommu_enable_irtcachedis(struct amd_iommu *iommu)
2854 {
2855 u64 ctrl;
2856
2857 if (!amd_iommu_irtcachedis)
2858 return;
2859
2860 /*
2861 * Note:
2862 * The support for IRTCacheDis feature is dertermined by
2863 * checking if the bit is writable.
2864 */
2865 iommu_feature_enable(iommu, CONTROL_IRTCACHEDIS);
2866 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
2867 ctrl &= (1ULL << CONTROL_IRTCACHEDIS);
2868 if (ctrl)
2869 iommu->irtcachedis_enabled = true;
2870 pr_info("iommu%d (%#06x) : IRT cache is %s\n",
2871 iommu->index, iommu->devid,
2872 iommu->irtcachedis_enabled ? "disabled" : "enabled");
2873 }
2874
iommu_enable_2k_int(struct amd_iommu * iommu)2875 static void iommu_enable_2k_int(struct amd_iommu *iommu)
2876 {
2877 if (!FEATURE_NUM_INT_REMAP_SUP_2K(amd_iommu_efr2))
2878 return;
2879
2880 iommu_feature_set(iommu,
2881 CONTROL_NUM_INT_REMAP_MODE_2K,
2882 CONTROL_NUM_INT_REMAP_MODE_MASK,
2883 CONTROL_NUM_INT_REMAP_MODE);
2884 }
2885
early_enable_iommu(struct amd_iommu * iommu)2886 static void early_enable_iommu(struct amd_iommu *iommu)
2887 {
2888 iommu_disable(iommu);
2889 iommu_init_flags(iommu);
2890 iommu_set_device_table(iommu);
2891 iommu_enable_command_buffer(iommu);
2892 iommu_enable_event_buffer(iommu);
2893 iommu_set_exclusion_range(iommu);
2894 iommu_enable_gt(iommu);
2895 iommu_enable_ga(iommu);
2896 iommu_enable_xt(iommu);
2897 iommu_enable_irtcachedis(iommu);
2898 iommu_enable_2k_int(iommu);
2899 iommu_enable(iommu);
2900 amd_iommu_flush_all_caches(iommu);
2901 }
2902
2903 /*
2904 * This function finally enables all IOMMUs found in the system after
2905 * they have been initialized.
2906 *
2907 * Or if in kdump kernel and IOMMUs are all pre-enabled, try to reuse
2908 * the old content of device table entries. Not this case or reuse failed,
2909 * just continue as normal kernel does.
2910 */
early_enable_iommus(void)2911 static void early_enable_iommus(void)
2912 {
2913 struct amd_iommu *iommu;
2914 struct amd_iommu_pci_seg *pci_seg;
2915
2916 if (!reuse_device_table()) {
2917 /*
2918 * If come here because of failure in reusing device table from old
2919 * kernel with all IOMMUs enabled, print error message and try to
2920 * free allocated old_dev_tbl_cpy.
2921 */
2922 if (amd_iommu_pre_enabled) {
2923 pr_err("Failed to reuse DEV table from previous kernel.\n");
2924 /*
2925 * Bail out early if unable to remap/reuse DEV table from
2926 * previous kernel if SNP enabled as IOMMU commands will
2927 * time out without DEV table and cause kdump boot panic.
2928 */
2929 BUG_ON(check_feature(FEATURE_SNP));
2930 }
2931
2932 for_each_pci_segment(pci_seg) {
2933 if (pci_seg->old_dev_tbl_cpy != NULL) {
2934 memunmap((void *)pci_seg->old_dev_tbl_cpy);
2935 pci_seg->old_dev_tbl_cpy = NULL;
2936 }
2937 }
2938
2939 for_each_iommu(iommu) {
2940 clear_translation_pre_enabled(iommu);
2941 early_enable_iommu(iommu);
2942 }
2943 } else {
2944 pr_info("Reused DEV table from previous kernel.\n");
2945
2946 for_each_pci_segment(pci_seg) {
2947 iommu_free_pages(pci_seg->dev_table);
2948 pci_seg->dev_table = pci_seg->old_dev_tbl_cpy;
2949 }
2950
2951 for_each_iommu(iommu) {
2952 iommu_disable_command_buffer(iommu);
2953 iommu_disable_event_buffer(iommu);
2954 iommu_disable_irtcachedis(iommu);
2955 iommu_enable_command_buffer(iommu);
2956 iommu_enable_event_buffer(iommu);
2957 iommu_enable_ga(iommu);
2958 iommu_enable_xt(iommu);
2959 iommu_enable_irtcachedis(iommu);
2960 iommu_enable_2k_int(iommu);
2961 iommu_set_device_table(iommu);
2962 amd_iommu_flush_all_caches(iommu);
2963 }
2964 }
2965 }
2966
enable_iommus_ppr(void)2967 static void enable_iommus_ppr(void)
2968 {
2969 struct amd_iommu *iommu;
2970
2971 if (!amd_iommu_gt_ppr_supported())
2972 return;
2973
2974 for_each_iommu(iommu)
2975 amd_iommu_enable_ppr_log(iommu);
2976 }
2977
enable_iommus_vapic(void)2978 static void enable_iommus_vapic(void)
2979 {
2980 #ifdef CONFIG_IRQ_REMAP
2981 u32 status, i;
2982 struct amd_iommu *iommu;
2983
2984 for_each_iommu(iommu) {
2985 /*
2986 * Disable GALog if already running. It could have been enabled
2987 * in the previous boot before kdump.
2988 */
2989 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
2990 if (!(status & MMIO_STATUS_GALOG_RUN_MASK))
2991 continue;
2992
2993 iommu_feature_disable(iommu, CONTROL_GALOG_EN);
2994 iommu_feature_disable(iommu, CONTROL_GAINT_EN);
2995
2996 /*
2997 * Need to set and poll check the GALOGRun bit to zero before
2998 * we can set/ modify GA Log registers safely.
2999 */
3000 for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) {
3001 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
3002 if (!(status & MMIO_STATUS_GALOG_RUN_MASK))
3003 break;
3004 udelay(10);
3005 }
3006
3007 if (WARN_ON(i >= MMIO_STATUS_TIMEOUT))
3008 return;
3009 }
3010
3011 if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
3012 !check_feature(FEATURE_GAM_VAPIC)) {
3013 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
3014 return;
3015 }
3016
3017 if (amd_iommu_snp_en &&
3018 !FEATURE_SNPAVICSUP_GAM(amd_iommu_efr2)) {
3019 pr_warn("Force to disable Virtual APIC due to SNP\n");
3020 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
3021 return;
3022 }
3023
3024 /* Enabling GAM and SNPAVIC support */
3025 for_each_iommu(iommu) {
3026 if (iommu_init_ga_log(iommu) ||
3027 iommu_ga_log_enable(iommu))
3028 return;
3029
3030 iommu_feature_enable(iommu, CONTROL_GAM_EN);
3031 if (amd_iommu_snp_en)
3032 iommu_feature_enable(iommu, CONTROL_SNPAVIC_EN);
3033 }
3034
3035 amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP);
3036 pr_info("Virtual APIC enabled\n");
3037 #endif
3038 }
3039
disable_iommus(void)3040 static void disable_iommus(void)
3041 {
3042 struct amd_iommu *iommu;
3043
3044 for_each_iommu(iommu)
3045 iommu_disable(iommu);
3046
3047 #ifdef CONFIG_IRQ_REMAP
3048 if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
3049 amd_iommu_irq_ops.capability &= ~(1 << IRQ_POSTING_CAP);
3050 #endif
3051 }
3052
3053 /*
3054 * Suspend/Resume support
3055 * disable suspend until real resume implemented
3056 */
3057
amd_iommu_resume(void * data)3058 static void amd_iommu_resume(void *data)
3059 {
3060 struct amd_iommu *iommu;
3061
3062 for_each_iommu(iommu)
3063 iommu_apply_resume_quirks(iommu);
3064
3065 /* re-load the hardware */
3066 for_each_iommu(iommu)
3067 early_enable_iommu(iommu);
3068
3069 amd_iommu_enable_interrupts();
3070 }
3071
amd_iommu_suspend(void * data)3072 static int amd_iommu_suspend(void *data)
3073 {
3074 /* disable IOMMUs to go out of the way for BIOS */
3075 disable_iommus();
3076
3077 return 0;
3078 }
3079
3080 static const struct syscore_ops amd_iommu_syscore_ops = {
3081 .suspend = amd_iommu_suspend,
3082 .resume = amd_iommu_resume,
3083 };
3084
3085 static struct syscore amd_iommu_syscore = {
3086 .ops = &amd_iommu_syscore_ops,
3087 };
3088
free_iommu_resources(void)3089 static void __init free_iommu_resources(void)
3090 {
3091 free_iommu_all();
3092 free_pci_segments();
3093 }
3094
3095 /* SB IOAPIC is always on this device in AMD systems */
3096 #define IOAPIC_SB_DEVID ((0x00 << 8) | PCI_DEVFN(0x14, 0))
3097
check_ioapic_information(void)3098 static bool __init check_ioapic_information(void)
3099 {
3100 const char *fw_bug = FW_BUG;
3101 bool ret, has_sb_ioapic;
3102 int idx;
3103
3104 has_sb_ioapic = false;
3105 ret = false;
3106
3107 /*
3108 * If we have map overrides on the kernel command line the
3109 * messages in this function might not describe firmware bugs
3110 * anymore - so be careful
3111 */
3112 if (cmdline_maps)
3113 fw_bug = "";
3114
3115 for (idx = 0; idx < nr_ioapics; idx++) {
3116 int devid, id = mpc_ioapic_id(idx);
3117
3118 devid = get_ioapic_devid(id);
3119 if (devid < 0) {
3120 pr_err("%s: IOAPIC[%d] not in IVRS table\n",
3121 fw_bug, id);
3122 ret = false;
3123 } else if (devid == IOAPIC_SB_DEVID) {
3124 has_sb_ioapic = true;
3125 ret = true;
3126 }
3127 }
3128
3129 if (!has_sb_ioapic) {
3130 /*
3131 * We expect the SB IOAPIC to be listed in the IVRS
3132 * table. The system timer is connected to the SB IOAPIC
3133 * and if we don't have it in the list the system will
3134 * panic at boot time. This situation usually happens
3135 * when the BIOS is buggy and provides us the wrong
3136 * device id for the IOAPIC in the system.
3137 */
3138 pr_err("%s: No southbridge IOAPIC found\n", fw_bug);
3139 }
3140
3141 if (!ret)
3142 pr_err("Disabling interrupt remapping\n");
3143
3144 return ret;
3145 }
3146
free_dma_resources(void)3147 static void __init free_dma_resources(void)
3148 {
3149 amd_iommu_pdom_id_destroy();
3150 free_unity_maps();
3151 }
3152
ivinfo_init(void * ivrs)3153 static void __init ivinfo_init(void *ivrs)
3154 {
3155 amd_iommu_ivinfo = *((u32 *)(ivrs + IOMMU_IVINFO_OFFSET));
3156 }
3157
3158 /*
3159 * This is the hardware init function for AMD IOMMU in the system.
3160 * This function is called either from amd_iommu_init or from the interrupt
3161 * remapping setup code.
3162 *
3163 * This function basically parses the ACPI table for AMD IOMMU (IVRS)
3164 * four times:
3165 *
3166 * 1 pass) Discover the most comprehensive IVHD type to use.
3167 *
3168 * 2 pass) Find the highest PCI device id the driver has to handle.
3169 * Upon this information the size of the data structures is
3170 * determined that needs to be allocated.
3171 *
3172 * 3 pass) Initialize the data structures just allocated with the
3173 * information in the ACPI table about available AMD IOMMUs
3174 * in the system. It also maps the PCI devices in the
3175 * system to specific IOMMUs
3176 *
3177 * 4 pass) After the basic data structures are allocated and
3178 * initialized we update them with information about memory
3179 * remapping requirements parsed out of the ACPI table in
3180 * this last pass.
3181 *
3182 * After everything is set up the IOMMUs are enabled and the necessary
3183 * hotplug and suspend notifiers are registered.
3184 */
early_amd_iommu_init(void)3185 static int __init early_amd_iommu_init(void)
3186 {
3187 struct acpi_table_header *ivrs_base;
3188 int ret;
3189 acpi_status status;
3190 u8 efr_hats;
3191
3192 if (!amd_iommu_detected)
3193 return -ENODEV;
3194
3195 status = acpi_get_table("IVRS", 0, &ivrs_base);
3196 if (status == AE_NOT_FOUND)
3197 return -ENODEV;
3198 else if (ACPI_FAILURE(status)) {
3199 const char *err = acpi_format_exception(status);
3200 pr_err("IVRS table error: %s\n", err);
3201 return -EINVAL;
3202 }
3203
3204 if (!boot_cpu_has(X86_FEATURE_CX16)) {
3205 pr_err("Failed to initialize. The CMPXCHG16B feature is required.\n");
3206 ret = -EINVAL;
3207 goto out;
3208 }
3209
3210 /*
3211 * Validate checksum here so we don't need to do it when
3212 * we actually parse the table
3213 */
3214 ret = check_ivrs_checksum(ivrs_base);
3215 if (ret)
3216 goto out;
3217
3218 ivinfo_init(ivrs_base);
3219
3220 amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base);
3221 DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type);
3222
3223 /*
3224 * now the data structures are allocated and basically initialized
3225 * start the real acpi table scan
3226 */
3227 ret = init_iommu_all(ivrs_base);
3228 if (ret)
3229 goto out;
3230
3231 /* 5 level guest page table */
3232 if (cpu_feature_enabled(X86_FEATURE_LA57) &&
3233 FIELD_GET(FEATURE_GATS, amd_iommu_efr) == GUEST_PGTABLE_5_LEVEL)
3234 amd_iommu_gpt_level = PAGE_MODE_5_LEVEL;
3235
3236 efr_hats = FIELD_GET(FEATURE_HATS, amd_iommu_efr);
3237 if (efr_hats != 0x3) {
3238 /*
3239 * efr[HATS] bits specify the maximum host translation level
3240 * supported, with LEVEL 4 being initial max level.
3241 */
3242 amd_iommu_hpt_level = efr_hats + PAGE_MODE_4_LEVEL;
3243 } else {
3244 pr_warn_once(FW_BUG "Disable host address translation due to invalid translation level (%#x).\n",
3245 efr_hats);
3246 amd_iommu_hatdis = true;
3247 }
3248
3249 if (amd_iommu_pgtable == PD_MODE_V2) {
3250 if (!amd_iommu_v2_pgtbl_supported()) {
3251 pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n");
3252 amd_iommu_pgtable = PD_MODE_V1;
3253 }
3254 }
3255
3256 if (amd_iommu_hatdis) {
3257 /*
3258 * Host (v1) page table is not available. Attempt to use
3259 * Guest (v2) page table.
3260 */
3261 if (amd_iommu_v2_pgtbl_supported())
3262 amd_iommu_pgtable = PD_MODE_V2;
3263 else
3264 amd_iommu_pgtable = PD_MODE_NONE;
3265 }
3266
3267 /* Disable any previously enabled IOMMUs */
3268 if (!is_kdump_kernel() || amd_iommu_disabled)
3269 disable_iommus();
3270
3271 if (amd_iommu_irq_remap)
3272 amd_iommu_irq_remap = check_ioapic_information();
3273
3274 if (amd_iommu_irq_remap) {
3275 struct amd_iommu_pci_seg *pci_seg;
3276 ret = -ENOMEM;
3277 for_each_pci_segment(pci_seg) {
3278 if (alloc_irq_lookup_table(pci_seg))
3279 goto out;
3280 }
3281 }
3282
3283 ret = init_memory_definitions(ivrs_base);
3284 if (ret)
3285 goto out;
3286
3287 /* init the device table */
3288 init_device_table();
3289
3290 out:
3291 /* Don't leak any ACPI memory */
3292 acpi_put_table(ivrs_base);
3293
3294 return ret;
3295 }
3296
amd_iommu_enable_interrupts(void)3297 static int amd_iommu_enable_interrupts(void)
3298 {
3299 struct amd_iommu *iommu;
3300 int ret = 0;
3301
3302 for_each_iommu(iommu) {
3303 ret = iommu_init_irq(iommu);
3304 if (ret)
3305 goto out;
3306 }
3307
3308 /*
3309 * Interrupt handler is ready to process interrupts. Enable
3310 * PPR and GA log interrupt for all IOMMUs.
3311 */
3312 enable_iommus_vapic();
3313 enable_iommus_ppr();
3314
3315 out:
3316 return ret;
3317 }
3318
detect_ivrs(void)3319 static bool __init detect_ivrs(void)
3320 {
3321 struct acpi_table_header *ivrs_base;
3322 acpi_status status;
3323 int i;
3324
3325 status = acpi_get_table("IVRS", 0, &ivrs_base);
3326 if (status == AE_NOT_FOUND)
3327 return false;
3328 else if (ACPI_FAILURE(status)) {
3329 const char *err = acpi_format_exception(status);
3330 pr_err("IVRS table error: %s\n", err);
3331 return false;
3332 }
3333
3334 acpi_put_table(ivrs_base);
3335
3336 if (amd_iommu_force_enable)
3337 goto out;
3338
3339 /* Don't use IOMMU if there is Stoney Ridge graphics */
3340 for (i = 0; i < 32; i++) {
3341 u32 pci_id;
3342
3343 pci_id = read_pci_config(0, i, 0, 0);
3344 if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) {
3345 pr_info("Disable IOMMU on Stoney Ridge\n");
3346 return false;
3347 }
3348 }
3349
3350 out:
3351 /* Make sure ACS will be enabled during PCI probe */
3352 pci_request_acs();
3353
3354 return true;
3355 }
3356
iommu_snp_enable(void)3357 static __init void iommu_snp_enable(void)
3358 {
3359 #ifdef CONFIG_KVM_AMD_SEV
3360 if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
3361 return;
3362 /*
3363 * The SNP support requires that IOMMU must be enabled, and is
3364 * configured with V1 page table (DTE[Mode] = 0 is not supported).
3365 */
3366 if (no_iommu || iommu_default_passthrough()) {
3367 pr_warn("SNP: IOMMU disabled or configured in passthrough mode, SNP cannot be supported.\n");
3368 goto disable_snp;
3369 }
3370
3371 if (amd_iommu_pgtable != PD_MODE_V1) {
3372 pr_warn("SNP: IOMMU is configured with V2 page table mode, SNP cannot be supported.\n");
3373 goto disable_snp;
3374 }
3375
3376 amd_iommu_snp_en = check_feature(FEATURE_SNP);
3377 if (!amd_iommu_snp_en) {
3378 pr_warn("SNP: IOMMU SNP feature not enabled, SNP cannot be supported.\n");
3379 goto disable_snp;
3380 }
3381
3382 /*
3383 * Enable host SNP support once SNP support is checked on IOMMU.
3384 */
3385 if (snp_rmptable_init()) {
3386 pr_warn("SNP: RMP initialization failed, SNP cannot be supported.\n");
3387 goto disable_snp;
3388 }
3389
3390 pr_info("IOMMU SNP support enabled.\n");
3391 return;
3392
3393 disable_snp:
3394 cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
3395 #endif
3396 }
3397
3398 /****************************************************************************
3399 *
3400 * AMD IOMMU Initialization State Machine
3401 *
3402 ****************************************************************************/
3403
state_next(void)3404 static int __init state_next(void)
3405 {
3406 int ret = 0;
3407
3408 switch (init_state) {
3409 case IOMMU_START_STATE:
3410 if (!detect_ivrs()) {
3411 init_state = IOMMU_NOT_FOUND;
3412 ret = -ENODEV;
3413 } else {
3414 init_state = IOMMU_IVRS_DETECTED;
3415 }
3416 break;
3417 case IOMMU_IVRS_DETECTED:
3418 if (amd_iommu_disabled) {
3419 init_state = IOMMU_CMDLINE_DISABLED;
3420 ret = -EINVAL;
3421 } else {
3422 ret = early_amd_iommu_init();
3423 init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
3424 }
3425 break;
3426 case IOMMU_ACPI_FINISHED:
3427 early_enable_iommus();
3428 x86_platform.iommu_shutdown = disable_iommus;
3429 init_state = IOMMU_ENABLED;
3430 break;
3431 case IOMMU_ENABLED:
3432 register_syscore(&amd_iommu_syscore);
3433 iommu_snp_enable();
3434 ret = amd_iommu_init_pci();
3435 init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT;
3436 break;
3437 case IOMMU_PCI_INIT:
3438 ret = amd_iommu_enable_interrupts();
3439 init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN;
3440 break;
3441 case IOMMU_INTERRUPTS_EN:
3442 init_state = IOMMU_INITIALIZED;
3443 break;
3444 case IOMMU_INITIALIZED:
3445 /* Nothing to do */
3446 break;
3447 case IOMMU_NOT_FOUND:
3448 case IOMMU_INIT_ERROR:
3449 case IOMMU_CMDLINE_DISABLED:
3450 /* Error states => do nothing */
3451 ret = -EINVAL;
3452 break;
3453 default:
3454 /* Unknown state */
3455 BUG();
3456 }
3457
3458 if (ret) {
3459 free_dma_resources();
3460 if (!irq_remapping_enabled) {
3461 disable_iommus();
3462 free_iommu_resources();
3463 } else {
3464 struct amd_iommu *iommu;
3465 struct amd_iommu_pci_seg *pci_seg;
3466
3467 for_each_pci_segment(pci_seg)
3468 uninit_device_table_dma(pci_seg);
3469
3470 for_each_iommu(iommu)
3471 amd_iommu_flush_all_caches(iommu);
3472 }
3473 }
3474 return ret;
3475 }
3476
iommu_go_to_state(enum iommu_init_state state)3477 static int __init iommu_go_to_state(enum iommu_init_state state)
3478 {
3479 int ret = -EINVAL;
3480
3481 while (init_state != state) {
3482 if (init_state == IOMMU_NOT_FOUND ||
3483 init_state == IOMMU_INIT_ERROR ||
3484 init_state == IOMMU_CMDLINE_DISABLED)
3485 break;
3486 ret = state_next();
3487 }
3488
3489 /*
3490 * SNP platform initilazation requires IOMMUs to be fully configured.
3491 * If the SNP support on IOMMUs has NOT been checked, simply mark SNP
3492 * as unsupported. If the SNP support on IOMMUs has been checked and
3493 * host SNP support enabled but RMP enforcement has not been enabled
3494 * in IOMMUs, then the system is in a half-baked state, but can limp
3495 * along as all memory should be Hypervisor-Owned in the RMP. WARN,
3496 * but leave SNP as "supported" to avoid confusing the kernel.
3497 */
3498 if (ret && cc_platform_has(CC_ATTR_HOST_SEV_SNP) &&
3499 !WARN_ON_ONCE(amd_iommu_snp_en))
3500 cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
3501
3502 return ret;
3503 }
3504
3505 #ifdef CONFIG_IRQ_REMAP
amd_iommu_prepare(void)3506 int __init amd_iommu_prepare(void)
3507 {
3508 int ret;
3509
3510 amd_iommu_irq_remap = true;
3511
3512 ret = iommu_go_to_state(IOMMU_ACPI_FINISHED);
3513 if (ret) {
3514 amd_iommu_irq_remap = false;
3515 return ret;
3516 }
3517
3518 return amd_iommu_irq_remap ? 0 : -ENODEV;
3519 }
3520
amd_iommu_enable(void)3521 int __init amd_iommu_enable(void)
3522 {
3523 int ret;
3524
3525 ret = iommu_go_to_state(IOMMU_ENABLED);
3526 if (ret)
3527 return ret;
3528
3529 irq_remapping_enabled = 1;
3530 return amd_iommu_xt_mode;
3531 }
3532
amd_iommu_disable(void)3533 void amd_iommu_disable(void)
3534 {
3535 amd_iommu_suspend(NULL);
3536 }
3537
amd_iommu_reenable(int mode)3538 int amd_iommu_reenable(int mode)
3539 {
3540 amd_iommu_resume(NULL);
3541
3542 return 0;
3543 }
3544
amd_iommu_enable_faulting(unsigned int cpu)3545 int amd_iommu_enable_faulting(unsigned int cpu)
3546 {
3547 /* We enable MSI later when PCI is initialized */
3548 return 0;
3549 }
3550 #endif
3551
3552 /*
3553 * This is the core init function for AMD IOMMU hardware in the system.
3554 * This function is called from the generic x86 DMA layer initialization
3555 * code.
3556 */
amd_iommu_init(void)3557 static int __init amd_iommu_init(void)
3558 {
3559 int ret;
3560
3561 ret = iommu_go_to_state(IOMMU_INITIALIZED);
3562 #ifdef CONFIG_GART_IOMMU
3563 if (ret && list_empty(&amd_iommu_list)) {
3564 /*
3565 * We failed to initialize the AMD IOMMU - try fallback
3566 * to GART if possible.
3567 */
3568 gart_iommu_init();
3569 }
3570 #endif
3571
3572 if (!ret)
3573 amd_iommu_debugfs_setup();
3574
3575 return ret;
3576 }
3577
amd_iommu_sme_check(void)3578 static bool amd_iommu_sme_check(void)
3579 {
3580 if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) ||
3581 (boot_cpu_data.x86 != 0x17))
3582 return true;
3583
3584 /* For Fam17h, a specific level of support is required */
3585 if (boot_cpu_data.microcode >= 0x08001205)
3586 return true;
3587
3588 if ((boot_cpu_data.microcode >= 0x08001126) &&
3589 (boot_cpu_data.microcode <= 0x080011ff))
3590 return true;
3591
3592 pr_notice("IOMMU not currently supported when SME is active\n");
3593
3594 return false;
3595 }
3596
3597 /****************************************************************************
3598 *
3599 * Early detect code. This code runs at IOMMU detection time in the DMA
3600 * layer. It just looks if there is an IVRS ACPI table to detect AMD
3601 * IOMMUs
3602 *
3603 ****************************************************************************/
amd_iommu_detect(void)3604 void __init amd_iommu_detect(void)
3605 {
3606 int ret;
3607
3608 if (no_iommu || (iommu_detected && !gart_iommu_aperture))
3609 goto disable_snp;
3610
3611 if (!amd_iommu_sme_check())
3612 goto disable_snp;
3613
3614 ret = iommu_go_to_state(IOMMU_IVRS_DETECTED);
3615 if (ret)
3616 goto disable_snp;
3617
3618 amd_iommu_detected = true;
3619 iommu_detected = 1;
3620 x86_init.iommu.iommu_init = amd_iommu_init;
3621 return;
3622
3623 disable_snp:
3624 if (cc_platform_has(CC_ATTR_HOST_SEV_SNP))
3625 cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
3626 }
3627
3628 /****************************************************************************
3629 *
3630 * Parsing functions for the AMD IOMMU specific kernel command line
3631 * options.
3632 *
3633 ****************************************************************************/
3634
parse_amd_iommu_dump(char * str)3635 static int __init parse_amd_iommu_dump(char *str)
3636 {
3637 amd_iommu_dump = true;
3638
3639 return 1;
3640 }
3641
parse_amd_iommu_intr(char * str)3642 static int __init parse_amd_iommu_intr(char *str)
3643 {
3644 for (; *str; ++str) {
3645 if (strncmp(str, "legacy", 6) == 0) {
3646 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
3647 break;
3648 }
3649 if (strncmp(str, "vapic", 5) == 0) {
3650 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
3651 break;
3652 }
3653 }
3654 return 1;
3655 }
3656
parse_amd_iommu_options(char * str)3657 static int __init parse_amd_iommu_options(char *str)
3658 {
3659 if (!str)
3660 return -EINVAL;
3661
3662 while (*str) {
3663 if (strncmp(str, "fullflush", 9) == 0) {
3664 pr_warn("amd_iommu=fullflush deprecated; use iommu.strict=1 instead\n");
3665 iommu_set_dma_strict();
3666 } else if (strncmp(str, "force_enable", 12) == 0) {
3667 amd_iommu_force_enable = true;
3668 } else if (strncmp(str, "off", 3) == 0) {
3669 amd_iommu_disabled = true;
3670 } else if (strncmp(str, "force_isolation", 15) == 0) {
3671 amd_iommu_force_isolation = true;
3672 } else if (strncmp(str, "pgtbl_v1", 8) == 0) {
3673 amd_iommu_pgtable = PD_MODE_V1;
3674 } else if (strncmp(str, "pgtbl_v2", 8) == 0) {
3675 amd_iommu_pgtable = PD_MODE_V2;
3676 } else if (strncmp(str, "irtcachedis", 11) == 0) {
3677 amd_iommu_irtcachedis = true;
3678 } else if (strncmp(str, "nohugepages", 11) == 0) {
3679 pr_info("Restricting V1 page-sizes to 4KiB");
3680 amd_iommu_pgsize_bitmap = AMD_IOMMU_PGSIZES_4K;
3681 } else if (strncmp(str, "v2_pgsizes_only", 15) == 0) {
3682 pr_info("Restricting V1 page-sizes to 4KiB/2MiB/1GiB");
3683 amd_iommu_pgsize_bitmap = AMD_IOMMU_PGSIZES_V2;
3684 } else {
3685 pr_notice("Unknown option - '%s'\n", str);
3686 }
3687
3688 str += strcspn(str, ",");
3689 while (*str == ',')
3690 str++;
3691 }
3692
3693 return 1;
3694 }
3695
parse_ivrs_ioapic(char * str)3696 static int __init parse_ivrs_ioapic(char *str)
3697 {
3698 u32 seg = 0, bus, dev, fn;
3699 int id, i;
3700 u32 devid;
3701
3702 if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3703 sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5)
3704 goto found;
3705
3706 if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3707 sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) {
3708 pr_warn("ivrs_ioapic%s option format deprecated; use ivrs_ioapic=%d@%04x:%02x:%02x.%d instead\n",
3709 str, id, seg, bus, dev, fn);
3710 goto found;
3711 }
3712
3713 pr_err("Invalid command line: ivrs_ioapic%s\n", str);
3714 return 1;
3715
3716 found:
3717 if (early_ioapic_map_size == EARLY_MAP_SIZE) {
3718 pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n",
3719 str);
3720 return 1;
3721 }
3722
3723 devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
3724
3725 cmdline_maps = true;
3726 i = early_ioapic_map_size++;
3727 early_ioapic_map[i].id = id;
3728 early_ioapic_map[i].devid = devid;
3729 early_ioapic_map[i].cmd_line = true;
3730
3731 return 1;
3732 }
3733
parse_ivrs_hpet(char * str)3734 static int __init parse_ivrs_hpet(char *str)
3735 {
3736 u32 seg = 0, bus, dev, fn;
3737 int id, i;
3738 u32 devid;
3739
3740 if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3741 sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5)
3742 goto found;
3743
3744 if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3745 sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) {
3746 pr_warn("ivrs_hpet%s option format deprecated; use ivrs_hpet=%d@%04x:%02x:%02x.%d instead\n",
3747 str, id, seg, bus, dev, fn);
3748 goto found;
3749 }
3750
3751 pr_err("Invalid command line: ivrs_hpet%s\n", str);
3752 return 1;
3753
3754 found:
3755 if (early_hpet_map_size == EARLY_MAP_SIZE) {
3756 pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n",
3757 str);
3758 return 1;
3759 }
3760
3761 devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
3762
3763 cmdline_maps = true;
3764 i = early_hpet_map_size++;
3765 early_hpet_map[i].id = id;
3766 early_hpet_map[i].devid = devid;
3767 early_hpet_map[i].cmd_line = true;
3768
3769 return 1;
3770 }
3771
3772 #define ACPIID_LEN (ACPIHID_UID_LEN + ACPIHID_HID_LEN)
3773
parse_ivrs_acpihid(char * str)3774 static int __init parse_ivrs_acpihid(char *str)
3775 {
3776 u32 seg = 0, bus, dev, fn;
3777 char *hid, *uid, *p, *addr;
3778 char acpiid[ACPIID_LEN + 1] = { }; /* size with NULL terminator */
3779 int i;
3780
3781 addr = strchr(str, '@');
3782 if (!addr) {
3783 addr = strchr(str, '=');
3784 if (!addr)
3785 goto not_found;
3786
3787 ++addr;
3788
3789 if (strlen(addr) > ACPIID_LEN)
3790 goto not_found;
3791
3792 if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 ||
3793 sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) {
3794 pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n",
3795 str, acpiid, seg, bus, dev, fn);
3796 goto found;
3797 }
3798 goto not_found;
3799 }
3800
3801 /* We have the '@', make it the terminator to get just the acpiid */
3802 *addr++ = 0;
3803
3804 if (strlen(str) > ACPIID_LEN)
3805 goto not_found;
3806
3807 if (sscanf(str, "=%s", acpiid) != 1)
3808 goto not_found;
3809
3810 if (sscanf(addr, "%x:%x.%x", &bus, &dev, &fn) == 3 ||
3811 sscanf(addr, "%x:%x:%x.%x", &seg, &bus, &dev, &fn) == 4)
3812 goto found;
3813
3814 not_found:
3815 pr_err("Invalid command line: ivrs_acpihid%s\n", str);
3816 return 1;
3817
3818 found:
3819 p = acpiid;
3820 hid = strsep(&p, ":");
3821 uid = p;
3822
3823 if (!hid || !(*hid) || !uid) {
3824 pr_err("Invalid command line: hid or uid\n");
3825 return 1;
3826 }
3827
3828 /*
3829 * Ignore leading zeroes after ':', so e.g., AMDI0095:00
3830 * will match AMDI0095:0 in the second strcmp in acpi_dev_hid_uid_match
3831 */
3832 while (*uid == '0' && *(uid + 1))
3833 uid++;
3834
3835 if (strlen(hid) >= ACPIHID_HID_LEN) {
3836 pr_err("Invalid command line: hid is too long\n");
3837 return 1;
3838 } else if (strlen(uid) >= ACPIHID_UID_LEN) {
3839 pr_err("Invalid command line: uid is too long\n");
3840 return 1;
3841 }
3842
3843 i = early_acpihid_map_size++;
3844 memcpy(early_acpihid_map[i].hid, hid, strlen(hid));
3845 memcpy(early_acpihid_map[i].uid, uid, strlen(uid));
3846 early_acpihid_map[i].devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
3847 early_acpihid_map[i].cmd_line = true;
3848
3849 return 1;
3850 }
3851
3852 __setup("amd_iommu_dump", parse_amd_iommu_dump);
3853 __setup("amd_iommu=", parse_amd_iommu_options);
3854 __setup("amd_iommu_intr=", parse_amd_iommu_intr);
3855 __setup("ivrs_ioapic", parse_ivrs_ioapic);
3856 __setup("ivrs_hpet", parse_ivrs_hpet);
3857 __setup("ivrs_acpihid", parse_ivrs_acpihid);
3858
amd_iommu_pasid_supported(void)3859 bool amd_iommu_pasid_supported(void)
3860 {
3861 /* CPU page table size should match IOMMU guest page table size */
3862 if (cpu_feature_enabled(X86_FEATURE_LA57) &&
3863 amd_iommu_gpt_level != PAGE_MODE_5_LEVEL)
3864 return false;
3865
3866 /*
3867 * Since DTE[Mode]=0 is prohibited on SNP-enabled system
3868 * (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without
3869 * setting up IOMMUv1 page table.
3870 */
3871 return amd_iommu_gt_ppr_supported() && !amd_iommu_snp_en;
3872 }
3873
get_amd_iommu(unsigned int idx)3874 struct amd_iommu *get_amd_iommu(unsigned int idx)
3875 {
3876 unsigned int i = 0;
3877 struct amd_iommu *iommu;
3878
3879 for_each_iommu(iommu)
3880 if (i++ == idx)
3881 return iommu;
3882 return NULL;
3883 }
3884
3885 /****************************************************************************
3886 *
3887 * IOMMU EFR Performance Counter support functionality. This code allows
3888 * access to the IOMMU PC functionality.
3889 *
3890 ****************************************************************************/
3891
amd_iommu_pc_get_max_banks(unsigned int idx)3892 u8 amd_iommu_pc_get_max_banks(unsigned int idx)
3893 {
3894 struct amd_iommu *iommu = get_amd_iommu(idx);
3895
3896 if (iommu)
3897 return iommu->max_banks;
3898
3899 return 0;
3900 }
3901
amd_iommu_pc_supported(void)3902 bool amd_iommu_pc_supported(void)
3903 {
3904 return amd_iommu_pc_present;
3905 }
3906
amd_iommu_pc_get_max_counters(unsigned int idx)3907 u8 amd_iommu_pc_get_max_counters(unsigned int idx)
3908 {
3909 struct amd_iommu *iommu = get_amd_iommu(idx);
3910
3911 if (iommu)
3912 return iommu->max_counters;
3913
3914 return 0;
3915 }
3916
iommu_pc_get_set_reg(struct amd_iommu * iommu,u8 bank,u8 cntr,u8 fxn,u64 * value,bool is_write)3917 static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
3918 u8 fxn, u64 *value, bool is_write)
3919 {
3920 u32 offset;
3921 u32 max_offset_lim;
3922
3923 /* Make sure the IOMMU PC resource is available */
3924 if (!amd_iommu_pc_present)
3925 return -ENODEV;
3926
3927 /* Check for valid iommu and pc register indexing */
3928 if (WARN_ON(!iommu || (fxn > 0x28) || (fxn & 7)))
3929 return -ENODEV;
3930
3931 offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn);
3932
3933 /* Limit the offset to the hw defined mmio region aperture */
3934 max_offset_lim = (u32)(((0x40 | iommu->max_banks) << 12) |
3935 (iommu->max_counters << 8) | 0x28);
3936 if ((offset < MMIO_CNTR_REG_OFFSET) ||
3937 (offset > max_offset_lim))
3938 return -EINVAL;
3939
3940 if (is_write) {
3941 u64 val = *value & GENMASK_ULL(47, 0);
3942
3943 writel((u32)val, iommu->mmio_base + offset);
3944 writel((val >> 32), iommu->mmio_base + offset + 4);
3945 } else {
3946 *value = readl(iommu->mmio_base + offset + 4);
3947 *value <<= 32;
3948 *value |= readl(iommu->mmio_base + offset);
3949 *value &= GENMASK_ULL(47, 0);
3950 }
3951
3952 return 0;
3953 }
3954
amd_iommu_pc_get_reg(struct amd_iommu * iommu,u8 bank,u8 cntr,u8 fxn,u64 * value)3955 int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
3956 {
3957 if (!iommu)
3958 return -EINVAL;
3959
3960 return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false);
3961 }
3962
amd_iommu_pc_set_reg(struct amd_iommu * iommu,u8 bank,u8 cntr,u8 fxn,u64 * value)3963 int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
3964 {
3965 if (!iommu)
3966 return -EINVAL;
3967
3968 return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true);
3969 }
3970
3971 #ifdef CONFIG_KVM_AMD_SEV
iommu_page_make_shared(void * page)3972 static int iommu_page_make_shared(void *page)
3973 {
3974 unsigned long paddr, pfn;
3975
3976 paddr = iommu_virt_to_phys(page);
3977 /* Cbit maybe set in the paddr */
3978 pfn = __sme_clr(paddr) >> PAGE_SHIFT;
3979
3980 if (!(pfn % PTRS_PER_PMD)) {
3981 int ret, level;
3982 bool assigned;
3983
3984 ret = snp_lookup_rmpentry(pfn, &assigned, &level);
3985 if (ret) {
3986 pr_warn("IOMMU PFN %lx RMP lookup failed, ret %d\n", pfn, ret);
3987 return ret;
3988 }
3989
3990 if (!assigned) {
3991 pr_warn("IOMMU PFN %lx not assigned in RMP table\n", pfn);
3992 return -EINVAL;
3993 }
3994
3995 if (level > PG_LEVEL_4K) {
3996 ret = psmash(pfn);
3997 if (!ret)
3998 goto done;
3999
4000 pr_warn("PSMASH failed for IOMMU PFN %lx huge RMP entry, ret: %d, level: %d\n",
4001 pfn, ret, level);
4002 return ret;
4003 }
4004 }
4005
4006 done:
4007 return rmp_make_shared(pfn, PG_LEVEL_4K);
4008 }
4009
iommu_make_shared(void * va,size_t size)4010 static int iommu_make_shared(void *va, size_t size)
4011 {
4012 void *page;
4013 int ret;
4014
4015 if (!va)
4016 return 0;
4017
4018 for (page = va; page < (va + size); page += PAGE_SIZE) {
4019 ret = iommu_page_make_shared(page);
4020 if (ret)
4021 return ret;
4022 }
4023
4024 return 0;
4025 }
4026
amd_iommu_snp_disable(void)4027 int amd_iommu_snp_disable(void)
4028 {
4029 struct amd_iommu *iommu;
4030 int ret;
4031
4032 if (!amd_iommu_snp_en)
4033 return 0;
4034
4035 for_each_iommu(iommu) {
4036 ret = iommu_make_shared(iommu->evt_buf, EVT_BUFFER_SIZE);
4037 if (ret)
4038 return ret;
4039
4040 ret = iommu_make_shared(iommu->ppr_log, PPR_LOG_SIZE);
4041 if (ret)
4042 return ret;
4043
4044 ret = iommu_make_shared((void *)iommu->cmd_sem, PAGE_SIZE);
4045 if (ret)
4046 return ret;
4047 }
4048
4049 return 0;
4050 }
4051 EXPORT_SYMBOL_GPL(amd_iommu_snp_disable);
4052
amd_iommu_sev_tio_supported(void)4053 bool amd_iommu_sev_tio_supported(void)
4054 {
4055 return check_feature2(FEATURE_SEVSNPIO_SUP);
4056 }
4057 EXPORT_SYMBOL_GPL(amd_iommu_sev_tio_supported);
4058 #endif
4059