1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. 4 * Author: Joerg Roedel <jroedel@suse.de> 5 * Leo Duran <leo.duran@amd.com> 6 */ 7 8 #define pr_fmt(fmt) "AMD-Vi: " fmt 9 #define dev_fmt(fmt) pr_fmt(fmt) 10 11 #include <linux/pci.h> 12 #include <linux/acpi.h> 13 #include <linux/list.h> 14 #include <linux/bitmap.h> 15 #include <linux/syscore_ops.h> 16 #include <linux/interrupt.h> 17 #include <linux/msi.h> 18 #include <linux/irq.h> 19 #include <linux/amd-iommu.h> 20 #include <linux/export.h> 21 #include <linux/kmemleak.h> 22 #include <linux/cc_platform.h> 23 #include <linux/iopoll.h> 24 #include <asm/pci-direct.h> 25 #include <asm/iommu.h> 26 #include <asm/apic.h> 27 #include <asm/gart.h> 28 #include <asm/x86_init.h> 29 #include <asm/io_apic.h> 30 #include <asm/irq_remapping.h> 31 #include <asm/set_memory.h> 32 #include <asm/sev.h> 33 34 #include <linux/crash_dump.h> 35 36 #include "amd_iommu.h" 37 #include "../irq_remapping.h" 38 #include "../iommu-pages.h" 39 40 /* 41 * definitions for the ACPI scanning code 42 */ 43 #define IVRS_HEADER_LENGTH 48 44 45 #define ACPI_IVHD_TYPE_MAX_SUPPORTED 0x40 46 #define ACPI_IVMD_TYPE_ALL 0x20 47 #define ACPI_IVMD_TYPE 0x21 48 #define ACPI_IVMD_TYPE_RANGE 0x22 49 50 #define IVHD_DEV_ALL 0x01 51 #define IVHD_DEV_SELECT 0x02 52 #define IVHD_DEV_SELECT_RANGE_START 0x03 53 #define IVHD_DEV_RANGE_END 0x04 54 #define IVHD_DEV_ALIAS 0x42 55 #define IVHD_DEV_ALIAS_RANGE 0x43 56 #define IVHD_DEV_EXT_SELECT 0x46 57 #define IVHD_DEV_EXT_SELECT_RANGE 0x47 58 #define IVHD_DEV_SPECIAL 0x48 59 #define IVHD_DEV_ACPI_HID 0xf0 60 61 #define UID_NOT_PRESENT 0 62 #define UID_IS_INTEGER 1 63 #define UID_IS_CHARACTER 2 64 65 #define IVHD_SPECIAL_IOAPIC 1 66 #define IVHD_SPECIAL_HPET 2 67 68 #define IVHD_FLAG_HT_TUN_EN_MASK 0x01 69 #define IVHD_FLAG_PASSPW_EN_MASK 0x02 70 #define IVHD_FLAG_RESPASSPW_EN_MASK 0x04 71 #define IVHD_FLAG_ISOC_EN_MASK 0x08 72 73 #define IVMD_FLAG_EXCL_RANGE 0x08 74 #define IVMD_FLAG_IW 0x04 75 #define IVMD_FLAG_IR 0x02 76 #define IVMD_FLAG_UNITY_MAP 0x01 77 78 #define ACPI_DEVFLAG_INITPASS 0x01 79 #define ACPI_DEVFLAG_EXTINT 0x02 80 #define ACPI_DEVFLAG_NMI 0x04 81 #define ACPI_DEVFLAG_SYSMGT1 0x10 82 #define ACPI_DEVFLAG_SYSMGT2 0x20 83 #define ACPI_DEVFLAG_LINT0 0x40 84 #define ACPI_DEVFLAG_LINT1 0x80 85 #define ACPI_DEVFLAG_ATSDIS 0x10000000 86 87 #define IVRS_GET_SBDF_ID(seg, bus, dev, fn) (((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \ 88 | ((dev & 0x1f) << 3) | (fn & 0x7)) 89 90 /* 91 * ACPI table definitions 92 * 93 * These data structures are laid over the table to parse the important values 94 * out of it. 95 */ 96 97 /* 98 * structure describing one IOMMU in the ACPI table. Typically followed by one 99 * or more ivhd_entrys. 100 */ 101 struct ivhd_header { 102 u8 type; 103 u8 flags; 104 u16 length; 105 u16 devid; 106 u16 cap_ptr; 107 u64 mmio_phys; 108 u16 pci_seg; 109 u16 info; 110 u32 efr_attr; 111 112 /* Following only valid on IVHD type 11h and 40h */ 113 u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */ 114 u64 efr_reg2; 115 } __attribute__((packed)); 116 117 /* 118 * A device entry describing which devices a specific IOMMU translates and 119 * which requestor ids they use. 120 */ 121 struct ivhd_entry { 122 u8 type; 123 u16 devid; 124 u8 flags; 125 struct_group(ext_hid, 126 u32 ext; 127 u32 hidh; 128 ); 129 u64 cid; 130 u8 uidf; 131 u8 uidl; 132 u8 uid; 133 } __attribute__((packed)); 134 135 int amd_iommu_evtlog_size = EVTLOG_SIZE_DEF; 136 int amd_iommu_pprlog_size = PPRLOG_SIZE_DEF; 137 138 /* 139 * An AMD IOMMU memory definition structure. It defines things like exclusion 140 * ranges for devices and regions that should be unity mapped. 141 */ 142 struct ivmd_header { 143 u8 type; 144 u8 flags; 145 u16 length; 146 u16 devid; 147 u16 aux; 148 u16 pci_seg; 149 u8 resv[6]; 150 u64 range_start; 151 u64 range_length; 152 } __attribute__((packed)); 153 154 bool amd_iommu_dump; 155 bool amd_iommu_irq_remap __read_mostly; 156 157 enum protection_domain_mode amd_iommu_pgtable = PD_MODE_V1; 158 /* Virtual address size */ 159 u8 amd_iommu_hpt_vasize; 160 /* Guest page table level */ 161 int amd_iommu_gpt_level = PAGE_MODE_4_LEVEL; 162 163 int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC; 164 static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE; 165 166 static bool amd_iommu_detected; 167 static bool amd_iommu_disabled __initdata; 168 static bool amd_iommu_force_enable __initdata; 169 static bool amd_iommu_irtcachedis; 170 static int amd_iommu_target_ivhd_type; 171 172 /* Global EFR and EFR2 registers */ 173 u64 amd_iommu_efr; 174 u64 amd_iommu_efr2; 175 176 /* Host (v1) page table is not supported*/ 177 bool amd_iommu_hatdis; 178 179 /* SNP is enabled on the system? */ 180 bool amd_iommu_snp_en; 181 EXPORT_SYMBOL(amd_iommu_snp_en); 182 183 LIST_HEAD(amd_iommu_pci_seg_list); /* list of all PCI segments */ 184 LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the system */ 185 LIST_HEAD(amd_ivhd_dev_flags_list); /* list of all IVHD device entry settings */ 186 187 /* Number of IOMMUs present in the system */ 188 static int amd_iommus_present; 189 190 /* IOMMUs have a non-present cache? */ 191 bool amd_iommu_np_cache __read_mostly; 192 bool amd_iommu_iotlb_sup __read_mostly = true; 193 194 static bool amd_iommu_pc_present __read_mostly; 195 bool amdr_ivrs_remap_support __read_mostly; 196 197 bool amd_iommu_force_isolation __read_mostly; 198 199 unsigned long amd_iommu_pgsize_bitmap __ro_after_init = AMD_IOMMU_PGSIZES; 200 201 enum iommu_init_state { 202 IOMMU_START_STATE, 203 IOMMU_IVRS_DETECTED, 204 IOMMU_ACPI_FINISHED, 205 IOMMU_ENABLED, 206 IOMMU_PCI_INIT, 207 IOMMU_INTERRUPTS_EN, 208 IOMMU_INITIALIZED, 209 IOMMU_NOT_FOUND, 210 IOMMU_INIT_ERROR, 211 IOMMU_CMDLINE_DISABLED, 212 }; 213 214 /* Early ioapic and hpet maps from kernel command line */ 215 #define EARLY_MAP_SIZE 4 216 static struct devid_map __initdata early_ioapic_map[EARLY_MAP_SIZE]; 217 static struct devid_map __initdata early_hpet_map[EARLY_MAP_SIZE]; 218 static struct acpihid_map_entry __initdata early_acpihid_map[EARLY_MAP_SIZE]; 219 220 static int __initdata early_ioapic_map_size; 221 static int __initdata early_hpet_map_size; 222 static int __initdata early_acpihid_map_size; 223 224 static bool __initdata cmdline_maps; 225 226 static enum iommu_init_state init_state = IOMMU_START_STATE; 227 228 static int amd_iommu_enable_interrupts(void); 229 static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg); 230 231 static bool amd_iommu_pre_enabled = true; 232 233 static u32 amd_iommu_ivinfo __initdata; 234 235 bool translation_pre_enabled(struct amd_iommu *iommu) 236 { 237 return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED); 238 } 239 240 static void clear_translation_pre_enabled(struct amd_iommu *iommu) 241 { 242 iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED; 243 } 244 245 static void init_translation_status(struct amd_iommu *iommu) 246 { 247 u64 ctrl; 248 249 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 250 if (ctrl & (1<<CONTROL_IOMMU_EN)) 251 iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED; 252 } 253 254 int amd_iommu_get_num_iommus(void) 255 { 256 return amd_iommus_present; 257 } 258 259 bool amd_iommu_ht_range_ignore(void) 260 { 261 return check_feature2(FEATURE_HT_RANGE_IGNORE); 262 } 263 264 /* 265 * Iterate through all the IOMMUs to get common EFR 266 * masks among all IOMMUs and warn if found inconsistency. 267 */ 268 static __init void get_global_efr(void) 269 { 270 struct amd_iommu *iommu; 271 272 for_each_iommu(iommu) { 273 u64 tmp = iommu->features; 274 u64 tmp2 = iommu->features2; 275 276 if (list_is_first(&iommu->list, &amd_iommu_list)) { 277 amd_iommu_efr = tmp; 278 amd_iommu_efr2 = tmp2; 279 continue; 280 } 281 282 if (amd_iommu_efr == tmp && 283 amd_iommu_efr2 == tmp2) 284 continue; 285 286 pr_err(FW_BUG 287 "Found inconsistent EFR/EFR2 %#llx,%#llx (global %#llx,%#llx) on iommu%d (%04x:%02x:%02x.%01x).\n", 288 tmp, tmp2, amd_iommu_efr, amd_iommu_efr2, 289 iommu->index, iommu->pci_seg->id, 290 PCI_BUS_NUM(iommu->devid), PCI_SLOT(iommu->devid), 291 PCI_FUNC(iommu->devid)); 292 293 amd_iommu_efr &= tmp; 294 amd_iommu_efr2 &= tmp2; 295 } 296 297 pr_info("Using global IVHD EFR:%#llx, EFR2:%#llx\n", amd_iommu_efr, amd_iommu_efr2); 298 } 299 300 /* 301 * For IVHD type 0x11/0x40, EFR is also available via IVHD. 302 * Default to IVHD EFR since it is available sooner 303 * (i.e. before PCI init). 304 */ 305 static void __init early_iommu_features_init(struct amd_iommu *iommu, 306 struct ivhd_header *h) 307 { 308 if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP) { 309 iommu->features = h->efr_reg; 310 iommu->features2 = h->efr_reg2; 311 } 312 if (amd_iommu_ivinfo & IOMMU_IVINFO_DMA_REMAP) 313 amdr_ivrs_remap_support = true; 314 } 315 316 /* Access to l1 and l2 indexed register spaces */ 317 318 static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address) 319 { 320 u32 val; 321 322 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); 323 pci_read_config_dword(iommu->dev, 0xfc, &val); 324 return val; 325 } 326 327 static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val) 328 { 329 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31)); 330 pci_write_config_dword(iommu->dev, 0xfc, val); 331 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); 332 } 333 334 static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address) 335 { 336 u32 val; 337 338 pci_write_config_dword(iommu->dev, 0xf0, address); 339 pci_read_config_dword(iommu->dev, 0xf4, &val); 340 return val; 341 } 342 343 static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val) 344 { 345 pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8)); 346 pci_write_config_dword(iommu->dev, 0xf4, val); 347 } 348 349 /**************************************************************************** 350 * 351 * AMD IOMMU MMIO register space handling functions 352 * 353 * These functions are used to program the IOMMU device registers in 354 * MMIO space required for that driver. 355 * 356 ****************************************************************************/ 357 358 static void iommu_set_cwwb_range(struct amd_iommu *iommu) 359 { 360 u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem); 361 u64 entry = start & PM_ADDR_MASK; 362 363 if (!check_feature(FEATURE_SNP)) 364 return; 365 366 /* Note: 367 * Re-purpose Exclusion base/limit registers for Completion wait 368 * write-back base/limit. 369 */ 370 memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET, 371 &entry, sizeof(entry)); 372 373 /* Note: 374 * Default to 4 Kbytes, which can be specified by setting base 375 * address equal to the limit address. 376 */ 377 memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET, 378 &entry, sizeof(entry)); 379 } 380 381 /* Programs the physical address of the device table into the IOMMU hardware */ 382 static void iommu_set_device_table(struct amd_iommu *iommu) 383 { 384 u64 entry; 385 u32 dev_table_size = iommu->pci_seg->dev_table_size; 386 void *dev_table = (void *)get_dev_table(iommu); 387 388 BUG_ON(iommu->mmio_base == NULL); 389 390 if (is_kdump_kernel()) 391 return; 392 393 entry = iommu_virt_to_phys(dev_table); 394 entry |= (dev_table_size >> 12) - 1; 395 memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET, 396 &entry, sizeof(entry)); 397 } 398 399 static void iommu_feature_set(struct amd_iommu *iommu, u64 val, u64 mask, u8 shift) 400 { 401 u64 ctrl; 402 403 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 404 mask <<= shift; 405 ctrl &= ~mask; 406 ctrl |= (val << shift) & mask; 407 writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); 408 } 409 410 /* Generic functions to enable/disable certain features of the IOMMU. */ 411 void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) 412 { 413 iommu_feature_set(iommu, 1ULL, 1ULL, bit); 414 } 415 416 static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit) 417 { 418 iommu_feature_set(iommu, 0ULL, 1ULL, bit); 419 } 420 421 /* Function to enable the hardware */ 422 static void iommu_enable(struct amd_iommu *iommu) 423 { 424 iommu_feature_enable(iommu, CONTROL_IOMMU_EN); 425 } 426 427 static void iommu_disable(struct amd_iommu *iommu) 428 { 429 if (!iommu->mmio_base) 430 return; 431 432 /* Disable command buffer */ 433 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); 434 435 /* Disable event logging and event interrupts */ 436 iommu_feature_disable(iommu, CONTROL_EVT_INT_EN); 437 iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN); 438 439 /* Disable IOMMU GA_LOG */ 440 iommu_feature_disable(iommu, CONTROL_GALOG_EN); 441 iommu_feature_disable(iommu, CONTROL_GAINT_EN); 442 443 /* Disable IOMMU PPR logging */ 444 iommu_feature_disable(iommu, CONTROL_PPRLOG_EN); 445 iommu_feature_disable(iommu, CONTROL_PPRINT_EN); 446 447 /* Disable IOMMU hardware itself */ 448 iommu_feature_disable(iommu, CONTROL_IOMMU_EN); 449 450 /* Clear IRTE cache disabling bit */ 451 iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS); 452 } 453 454 /* 455 * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in 456 * the system has one. 457 */ 458 static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end) 459 { 460 if (!request_mem_region(address, end, "amd_iommu")) { 461 pr_err("Can not reserve memory region %llx-%llx for mmio\n", 462 address, end); 463 pr_err("This is a BIOS bug. Please contact your hardware vendor\n"); 464 return NULL; 465 } 466 467 return (u8 __iomem *)ioremap(address, end); 468 } 469 470 static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) 471 { 472 if (iommu->mmio_base) 473 iounmap(iommu->mmio_base); 474 release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end); 475 } 476 477 static inline u32 get_ivhd_header_size(struct ivhd_header *h) 478 { 479 u32 size = 0; 480 481 switch (h->type) { 482 case 0x10: 483 size = 24; 484 break; 485 case 0x11: 486 case 0x40: 487 size = 40; 488 break; 489 } 490 return size; 491 } 492 493 /**************************************************************************** 494 * 495 * The functions below belong to the first pass of AMD IOMMU ACPI table 496 * parsing. In this pass we try to find out the highest device id this 497 * code has to handle. Upon this information the size of the shared data 498 * structures is determined later. 499 * 500 ****************************************************************************/ 501 502 /* 503 * This function calculates the length of a given IVHD entry 504 */ 505 static inline int ivhd_entry_length(u8 *ivhd) 506 { 507 u32 type = ((struct ivhd_entry *)ivhd)->type; 508 509 if (type < 0x80) { 510 return 0x04 << (*ivhd >> 6); 511 } else if (type == IVHD_DEV_ACPI_HID) { 512 /* For ACPI_HID, offset 21 is uid len */ 513 return *((u8 *)ivhd + 21) + 22; 514 } 515 return 0; 516 } 517 518 /* 519 * After reading the highest device id from the IOMMU PCI capability header 520 * this function looks if there is a higher device id defined in the ACPI table 521 */ 522 static int __init find_last_devid_from_ivhd(struct ivhd_header *h) 523 { 524 u8 *p = (void *)h, *end = (void *)h; 525 struct ivhd_entry *dev; 526 int last_devid = -EINVAL; 527 528 u32 ivhd_size = get_ivhd_header_size(h); 529 530 if (!ivhd_size) { 531 pr_err("Unsupported IVHD type %#x\n", h->type); 532 return -EINVAL; 533 } 534 535 p += ivhd_size; 536 end += h->length; 537 538 while (p < end) { 539 dev = (struct ivhd_entry *)p; 540 switch (dev->type) { 541 case IVHD_DEV_ALL: 542 /* Use maximum BDF value for DEV_ALL */ 543 return 0xffff; 544 case IVHD_DEV_SELECT: 545 case IVHD_DEV_RANGE_END: 546 case IVHD_DEV_ALIAS: 547 case IVHD_DEV_EXT_SELECT: 548 /* all the above subfield types refer to device ids */ 549 if (dev->devid > last_devid) 550 last_devid = dev->devid; 551 break; 552 default: 553 break; 554 } 555 p += ivhd_entry_length(p); 556 } 557 558 WARN_ON(p != end); 559 560 return last_devid; 561 } 562 563 static int __init check_ivrs_checksum(struct acpi_table_header *table) 564 { 565 int i; 566 u8 checksum = 0, *p = (u8 *)table; 567 568 for (i = 0; i < table->length; ++i) 569 checksum += p[i]; 570 if (checksum != 0) { 571 /* ACPI table corrupt */ 572 pr_err(FW_BUG "IVRS invalid checksum\n"); 573 return -ENODEV; 574 } 575 576 return 0; 577 } 578 579 /* 580 * Iterate over all IVHD entries in the ACPI table and find the highest device 581 * id which we need to handle. This is the first of three functions which parse 582 * the ACPI table. So we check the checksum here. 583 */ 584 static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_seg) 585 { 586 u8 *p = (u8 *)table, *end = (u8 *)table; 587 struct ivhd_header *h; 588 int last_devid, last_bdf = 0; 589 590 p += IVRS_HEADER_LENGTH; 591 592 end += table->length; 593 while (p < end) { 594 h = (struct ivhd_header *)p; 595 if (h->pci_seg == pci_seg && 596 h->type == amd_iommu_target_ivhd_type) { 597 last_devid = find_last_devid_from_ivhd(h); 598 599 if (last_devid < 0) 600 return -EINVAL; 601 if (last_devid > last_bdf) 602 last_bdf = last_devid; 603 } 604 p += h->length; 605 } 606 WARN_ON(p != end); 607 608 return last_bdf; 609 } 610 611 /**************************************************************************** 612 * 613 * The following functions belong to the code path which parses the ACPI table 614 * the second time. In this ACPI parsing iteration we allocate IOMMU specific 615 * data structures, initialize the per PCI segment device/alias/rlookup table 616 * and also basically initialize the hardware. 617 * 618 ****************************************************************************/ 619 620 /* Allocate per PCI segment device table */ 621 static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg) 622 { 623 pci_seg->dev_table = iommu_alloc_pages_sz(GFP_KERNEL | GFP_DMA32, 624 pci_seg->dev_table_size); 625 if (!pci_seg->dev_table) 626 return -ENOMEM; 627 628 return 0; 629 } 630 631 static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg) 632 { 633 if (is_kdump_kernel()) 634 memunmap((void *)pci_seg->dev_table); 635 else 636 iommu_free_pages(pci_seg->dev_table); 637 pci_seg->dev_table = NULL; 638 } 639 640 /* Allocate per PCI segment IOMMU rlookup table. */ 641 static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg) 642 { 643 pci_seg->rlookup_table = kvzalloc_objs(*pci_seg->rlookup_table, 644 pci_seg->last_bdf + 1); 645 if (pci_seg->rlookup_table == NULL) 646 return -ENOMEM; 647 648 return 0; 649 } 650 651 static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg) 652 { 653 kvfree(pci_seg->rlookup_table); 654 pci_seg->rlookup_table = NULL; 655 } 656 657 static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) 658 { 659 pci_seg->irq_lookup_table = kvzalloc_objs(*pci_seg->irq_lookup_table, 660 pci_seg->last_bdf + 1); 661 if (pci_seg->irq_lookup_table == NULL) 662 return -ENOMEM; 663 664 return 0; 665 } 666 667 static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) 668 { 669 kvfree(pci_seg->irq_lookup_table); 670 pci_seg->irq_lookup_table = NULL; 671 } 672 673 static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg) 674 { 675 int i; 676 677 pci_seg->alias_table = kvmalloc_objs(*pci_seg->alias_table, 678 pci_seg->last_bdf + 1); 679 if (!pci_seg->alias_table) 680 return -ENOMEM; 681 682 /* 683 * let all alias entries point to itself 684 */ 685 for (i = 0; i <= pci_seg->last_bdf; ++i) 686 pci_seg->alias_table[i] = i; 687 688 return 0; 689 } 690 691 static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg) 692 { 693 kvfree(pci_seg->alias_table); 694 pci_seg->alias_table = NULL; 695 } 696 697 static inline void *iommu_memremap(unsigned long paddr, size_t size) 698 { 699 phys_addr_t phys; 700 701 if (!paddr) 702 return NULL; 703 704 /* 705 * Obtain true physical address in kdump kernel when SME is enabled. 706 * Currently, previous kernel with SME enabled and kdump kernel 707 * with SME support disabled is not supported. 708 */ 709 phys = __sme_clr(paddr); 710 711 if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) 712 return (__force void *)ioremap_encrypted(phys, size); 713 else 714 return memremap(phys, size, MEMREMAP_WB); 715 } 716 717 /* 718 * Allocates the command buffer. This buffer is per AMD IOMMU. We can 719 * write commands to that buffer later and the IOMMU will execute them 720 * asynchronously 721 */ 722 static int __init alloc_command_buffer(struct amd_iommu *iommu) 723 { 724 iommu->cmd_buf = iommu_alloc_pages_sz(GFP_KERNEL, CMD_BUFFER_SIZE); 725 726 return iommu->cmd_buf ? 0 : -ENOMEM; 727 } 728 729 /* 730 * Interrupt handler has processed all pending events and adjusted head 731 * and tail pointer. Reset overflow mask and restart logging again. 732 */ 733 void amd_iommu_restart_log(struct amd_iommu *iommu, const char *evt_type, 734 u8 cntrl_intr, u8 cntrl_log, 735 u32 status_run_mask, u32 status_overflow_mask) 736 { 737 u32 status; 738 739 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 740 if (status & status_run_mask) 741 return; 742 743 pr_info_ratelimited("IOMMU %s log restarting\n", evt_type); 744 745 iommu_feature_disable(iommu, cntrl_log); 746 iommu_feature_disable(iommu, cntrl_intr); 747 748 writel(status_overflow_mask, iommu->mmio_base + MMIO_STATUS_OFFSET); 749 750 iommu_feature_enable(iommu, cntrl_intr); 751 iommu_feature_enable(iommu, cntrl_log); 752 } 753 754 /* 755 * This function restarts event logging in case the IOMMU experienced 756 * an event log buffer overflow. 757 */ 758 void amd_iommu_restart_event_logging(struct amd_iommu *iommu) 759 { 760 amd_iommu_restart_log(iommu, "Event", CONTROL_EVT_INT_EN, 761 CONTROL_EVT_LOG_EN, MMIO_STATUS_EVT_RUN_MASK, 762 MMIO_STATUS_EVT_OVERFLOW_MASK); 763 } 764 765 /* 766 * This function restarts event logging in case the IOMMU experienced 767 * GA log overflow. 768 */ 769 void amd_iommu_restart_ga_log(struct amd_iommu *iommu) 770 { 771 amd_iommu_restart_log(iommu, "GA", CONTROL_GAINT_EN, 772 CONTROL_GALOG_EN, MMIO_STATUS_GALOG_RUN_MASK, 773 MMIO_STATUS_GALOG_OVERFLOW_MASK); 774 } 775 776 /* 777 * This function resets the command buffer if the IOMMU stopped fetching 778 * commands from it. 779 */ 780 static void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu) 781 { 782 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); 783 784 writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); 785 writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); 786 iommu->cmd_buf_head = 0; 787 iommu->cmd_buf_tail = 0; 788 789 iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); 790 } 791 792 /* 793 * This function writes the command buffer address to the hardware and 794 * enables it. 795 */ 796 static void iommu_enable_command_buffer(struct amd_iommu *iommu) 797 { 798 u64 entry; 799 800 BUG_ON(iommu->cmd_buf == NULL); 801 802 if (!is_kdump_kernel()) { 803 /* 804 * Command buffer is re-used for kdump kernel and setting 805 * of MMIO register is not required. 806 */ 807 entry = iommu_virt_to_phys(iommu->cmd_buf); 808 entry |= MMIO_CMD_SIZE_512; 809 memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, 810 &entry, sizeof(entry)); 811 } 812 813 amd_iommu_reset_cmd_buffer(iommu); 814 } 815 816 /* 817 * This function disables the command buffer 818 */ 819 static void iommu_disable_command_buffer(struct amd_iommu *iommu) 820 { 821 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); 822 } 823 824 static void __init free_command_buffer(struct amd_iommu *iommu) 825 { 826 iommu_free_pages(iommu->cmd_buf); 827 } 828 829 void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, gfp_t gfp, 830 size_t size) 831 { 832 int nid = iommu->dev ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE; 833 void *buf; 834 835 size = PAGE_ALIGN(size); 836 buf = iommu_alloc_pages_node_sz(nid, gfp, size); 837 if (!buf) 838 return NULL; 839 if (check_feature(FEATURE_SNP) && 840 set_memory_4k((unsigned long)buf, size / PAGE_SIZE)) { 841 iommu_free_pages(buf); 842 return NULL; 843 } 844 845 return buf; 846 } 847 848 /* allocates the memory where the IOMMU will log its events to */ 849 static int __init alloc_event_buffer(void) 850 { 851 struct amd_iommu *iommu; 852 853 for_each_iommu(iommu) { 854 iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL, 855 amd_iommu_evtlog_size); 856 if (!iommu->evt_buf) 857 return -ENOMEM; 858 } 859 860 return 0; 861 } 862 863 static void iommu_enable_event_buffer(void) 864 { 865 struct amd_iommu *iommu; 866 u64 entry; 867 868 for_each_iommu(iommu) { 869 BUG_ON(iommu->evt_buf == NULL); 870 871 if (!is_kdump_kernel()) { 872 /* 873 * Event buffer is re-used for kdump kernel and setting 874 * of MMIO register is not required. 875 */ 876 entry = iommu_virt_to_phys(iommu->evt_buf); 877 entry |= (amd_iommu_evtlog_size == EVTLOG_SIZE_DEF) ? 878 EVTLOG_LEN_MASK_DEF : EVTLOG_LEN_MASK_MAX; 879 880 memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, 881 &entry, sizeof(entry)); 882 } 883 884 /* set head and tail to zero manually */ 885 writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); 886 writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); 887 888 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); 889 } 890 } 891 892 /* 893 * This function disables the event log buffer 894 */ 895 static void iommu_disable_event_buffer(struct amd_iommu *iommu) 896 { 897 iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN); 898 } 899 900 static void __init free_event_buffer(struct amd_iommu *iommu) 901 { 902 iommu_free_pages(iommu->evt_buf); 903 } 904 905 static void free_ga_log(struct amd_iommu *iommu) 906 { 907 #ifdef CONFIG_IRQ_REMAP 908 iommu_free_pages(iommu->ga_log); 909 iommu_free_pages(iommu->ga_log_tail); 910 #endif 911 } 912 913 #ifdef CONFIG_IRQ_REMAP 914 static int iommu_ga_log_enable(struct amd_iommu *iommu) 915 { 916 u32 status, i; 917 u64 entry; 918 919 if (!iommu->ga_log) 920 return -EINVAL; 921 922 entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512; 923 memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET, 924 &entry, sizeof(entry)); 925 entry = (iommu_virt_to_phys(iommu->ga_log_tail) & 926 (BIT_ULL(52)-1)) & ~7ULL; 927 memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET, 928 &entry, sizeof(entry)); 929 writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET); 930 writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET); 931 932 933 iommu_feature_enable(iommu, CONTROL_GAINT_EN); 934 iommu_feature_enable(iommu, CONTROL_GALOG_EN); 935 936 for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) { 937 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 938 if (status & (MMIO_STATUS_GALOG_RUN_MASK)) 939 break; 940 udelay(10); 941 } 942 943 if (WARN_ON(i >= MMIO_STATUS_TIMEOUT)) 944 return -EINVAL; 945 946 return 0; 947 } 948 949 static int iommu_init_ga_log(struct amd_iommu *iommu) 950 { 951 int nid = iommu->dev ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE; 952 953 if (WARN_ON_ONCE(!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))) 954 return -EINVAL; 955 956 iommu->ga_log = iommu_alloc_pages_node_sz(nid, GFP_KERNEL, GA_LOG_SIZE); 957 if (!iommu->ga_log) 958 goto err_out; 959 960 iommu->ga_log_tail = iommu_alloc_pages_node_sz(nid, GFP_KERNEL, 8); 961 if (!iommu->ga_log_tail) 962 goto err_out; 963 964 return 0; 965 err_out: 966 free_ga_log(iommu); 967 return -EINVAL; 968 } 969 #endif /* CONFIG_IRQ_REMAP */ 970 971 static int __init alloc_cwwb_sem(struct amd_iommu *iommu) 972 { 973 iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL, 1); 974 if (!iommu->cmd_sem) 975 return -ENOMEM; 976 iommu->cmd_sem_paddr = iommu_virt_to_phys((void *)iommu->cmd_sem); 977 return 0; 978 } 979 980 static int __init remap_event_buffer(void) 981 { 982 struct amd_iommu *iommu; 983 u64 paddr; 984 985 pr_info_once("Re-using event buffer from the previous kernel\n"); 986 for_each_iommu(iommu) { 987 paddr = readq(iommu->mmio_base + MMIO_EVT_BUF_OFFSET) & PM_ADDR_MASK; 988 iommu->evt_buf = iommu_memremap(paddr, amd_iommu_evtlog_size); 989 if (!iommu->evt_buf) 990 return -ENOMEM; 991 } 992 993 return 0; 994 } 995 996 static int __init remap_command_buffer(struct amd_iommu *iommu) 997 { 998 u64 paddr; 999 1000 pr_info_once("Re-using command buffer from the previous kernel\n"); 1001 paddr = readq(iommu->mmio_base + MMIO_CMD_BUF_OFFSET) & PM_ADDR_MASK; 1002 iommu->cmd_buf = iommu_memremap(paddr, CMD_BUFFER_SIZE); 1003 1004 return iommu->cmd_buf ? 0 : -ENOMEM; 1005 } 1006 1007 static int __init remap_or_alloc_cwwb_sem(struct amd_iommu *iommu) 1008 { 1009 u64 paddr; 1010 1011 if (check_feature(FEATURE_SNP)) { 1012 /* 1013 * When SNP is enabled, the exclusion base register is used for the 1014 * completion wait buffer (CWB) address. Read and re-use it. 1015 */ 1016 pr_info_once("Re-using CWB buffers from the previous kernel\n"); 1017 paddr = readq(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET) & PM_ADDR_MASK; 1018 iommu->cmd_sem = iommu_memremap(paddr, PAGE_SIZE); 1019 if (!iommu->cmd_sem) 1020 return -ENOMEM; 1021 iommu->cmd_sem_paddr = paddr; 1022 } else { 1023 return alloc_cwwb_sem(iommu); 1024 } 1025 1026 return 0; 1027 } 1028 1029 static int __init alloc_iommu_buffers(struct amd_iommu *iommu) 1030 { 1031 int ret; 1032 1033 /* 1034 * Reuse/Remap the previous kernel's allocated completion wait 1035 * command and event buffers for kdump boot. 1036 */ 1037 if (is_kdump_kernel()) { 1038 ret = remap_or_alloc_cwwb_sem(iommu); 1039 if (ret) 1040 return ret; 1041 1042 ret = remap_command_buffer(iommu); 1043 if (ret) 1044 return ret; 1045 } else { 1046 ret = alloc_cwwb_sem(iommu); 1047 if (ret) 1048 return ret; 1049 1050 ret = alloc_command_buffer(iommu); 1051 if (ret) 1052 return ret; 1053 } 1054 1055 return 0; 1056 } 1057 1058 static void __init free_cwwb_sem(struct amd_iommu *iommu) 1059 { 1060 if (iommu->cmd_sem) 1061 iommu_free_pages((void *)iommu->cmd_sem); 1062 } 1063 static void __init unmap_cwwb_sem(struct amd_iommu *iommu) 1064 { 1065 if (iommu->cmd_sem) { 1066 if (check_feature(FEATURE_SNP)) 1067 memunmap((void *)iommu->cmd_sem); 1068 else 1069 iommu_free_pages((void *)iommu->cmd_sem); 1070 } 1071 } 1072 1073 static void __init unmap_command_buffer(struct amd_iommu *iommu) 1074 { 1075 memunmap((void *)iommu->cmd_buf); 1076 } 1077 1078 static void __init unmap_event_buffer(struct amd_iommu *iommu) 1079 { 1080 memunmap(iommu->evt_buf); 1081 } 1082 1083 static void __init free_iommu_buffers(struct amd_iommu *iommu) 1084 { 1085 if (is_kdump_kernel()) { 1086 unmap_cwwb_sem(iommu); 1087 unmap_command_buffer(iommu); 1088 unmap_event_buffer(iommu); 1089 } else { 1090 free_cwwb_sem(iommu); 1091 free_command_buffer(iommu); 1092 free_event_buffer(iommu); 1093 } 1094 } 1095 1096 static void iommu_enable_xt(struct amd_iommu *iommu) 1097 { 1098 #ifdef CONFIG_IRQ_REMAP 1099 /* 1100 * XT mode (32-bit APIC destination ID) requires 1101 * GA mode (128-bit IRTE support) as a prerequisite. 1102 */ 1103 if (AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir) && 1104 amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 1105 iommu_feature_enable(iommu, CONTROL_XT_EN); 1106 #endif /* CONFIG_IRQ_REMAP */ 1107 } 1108 1109 static void iommu_enable_gt(struct amd_iommu *iommu) 1110 { 1111 if (!check_feature(FEATURE_GT)) 1112 return; 1113 1114 iommu_feature_enable(iommu, CONTROL_GT_EN); 1115 1116 /* 1117 * This feature needs to be enabled prior to a call 1118 * to iommu_snp_enable(). Since this function is called 1119 * in early_enable_iommu(), it is safe to enable here. 1120 */ 1121 if (check_feature2(FEATURE_GCR3TRPMODE)) 1122 iommu_feature_enable(iommu, CONTROL_GCR3TRPMODE); 1123 } 1124 1125 /* sets a specific bit in the device table entry. */ 1126 static void set_dte_bit(struct dev_table_entry *dte, u8 bit) 1127 { 1128 int i = (bit >> 6) & 0x03; 1129 int _bit = bit & 0x3f; 1130 1131 dte->data[i] |= (1UL << _bit); 1132 } 1133 1134 static bool __reuse_device_table(struct amd_iommu *iommu) 1135 { 1136 struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; 1137 struct dev_table_entry *old_dev_tbl_entry; 1138 u32 lo, hi, old_devtb_size, devid; 1139 phys_addr_t old_devtb_phys; 1140 u16 dom_id; 1141 bool dte_v; 1142 u64 entry; 1143 1144 /* Each IOMMU use separate device table with the same size */ 1145 lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET); 1146 hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4); 1147 entry = (((u64) hi) << 32) + lo; 1148 1149 old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12; 1150 if (old_devtb_size != pci_seg->dev_table_size) { 1151 pr_err("The device table size of IOMMU:%d is not expected!\n", 1152 iommu->index); 1153 return false; 1154 } 1155 1156 /* 1157 * When SME is enabled in the first kernel, the entry includes the 1158 * memory encryption mask(sme_me_mask), we must remove the memory 1159 * encryption mask to obtain the true physical address in kdump kernel. 1160 */ 1161 old_devtb_phys = __sme_clr(entry) & PAGE_MASK; 1162 1163 if (old_devtb_phys >= 0x100000000ULL) { 1164 pr_err("The address of old device table is above 4G, not trustworthy!\n"); 1165 return false; 1166 } 1167 1168 /* 1169 * Re-use the previous kernel's device table for kdump. 1170 */ 1171 pci_seg->old_dev_tbl_cpy = iommu_memremap(old_devtb_phys, pci_seg->dev_table_size); 1172 if (pci_seg->old_dev_tbl_cpy == NULL) { 1173 pr_err("Failed to remap memory for reusing old device table!\n"); 1174 return false; 1175 } 1176 1177 for (devid = 0; devid <= pci_seg->last_bdf; devid++) { 1178 old_dev_tbl_entry = &pci_seg->old_dev_tbl_cpy[devid]; 1179 dte_v = FIELD_GET(DTE_FLAG_V, old_dev_tbl_entry->data[0]); 1180 dom_id = FIELD_GET(DTE_DOMID_MASK, old_dev_tbl_entry->data[1]); 1181 1182 if (!dte_v || !dom_id) 1183 continue; 1184 /* 1185 * ID reservation can fail with -ENOSPC when there 1186 * are multiple devices present in the same domain, 1187 * hence check only for -ENOMEM. 1188 */ 1189 if (amd_iommu_pdom_id_reserve(dom_id, GFP_KERNEL) == -ENOMEM) 1190 return false; 1191 } 1192 1193 return true; 1194 } 1195 1196 static bool reuse_device_table(void) 1197 { 1198 struct amd_iommu *iommu; 1199 struct amd_iommu_pci_seg *pci_seg; 1200 1201 if (!amd_iommu_pre_enabled) 1202 return false; 1203 1204 pr_warn("Translation is already enabled - trying to reuse translation structures\n"); 1205 1206 /* 1207 * All IOMMUs within PCI segment shares common device table. 1208 * Hence reuse device table only once per PCI segment. 1209 */ 1210 for_each_pci_segment(pci_seg) { 1211 for_each_iommu(iommu) { 1212 if (pci_seg->id != iommu->pci_seg->id) 1213 continue; 1214 if (!__reuse_device_table(iommu)) 1215 return false; 1216 break; 1217 } 1218 } 1219 1220 return true; 1221 } 1222 1223 struct dev_table_entry *amd_iommu_get_ivhd_dte_flags(u16 segid, u16 devid) 1224 { 1225 struct ivhd_dte_flags *e; 1226 unsigned int best_len = UINT_MAX; 1227 struct dev_table_entry *dte = NULL; 1228 1229 for_each_ivhd_dte_flags(e) { 1230 /* 1231 * Need to go through the whole list to find the smallest range, 1232 * which contains the devid. 1233 */ 1234 if ((e->segid == segid) && 1235 (e->devid_first <= devid) && (devid <= e->devid_last)) { 1236 unsigned int len = e->devid_last - e->devid_first; 1237 1238 if (len < best_len) { 1239 dte = &(e->dte); 1240 best_len = len; 1241 } 1242 } 1243 } 1244 return dte; 1245 } 1246 1247 static bool search_ivhd_dte_flags(u16 segid, u16 first, u16 last) 1248 { 1249 struct ivhd_dte_flags *e; 1250 1251 for_each_ivhd_dte_flags(e) { 1252 if ((e->segid == segid) && 1253 (e->devid_first == first) && 1254 (e->devid_last == last)) 1255 return true; 1256 } 1257 return false; 1258 } 1259 1260 /* 1261 * This function takes the device specific flags read from the ACPI 1262 * table and sets up the device table entry with that information 1263 */ 1264 static void __init 1265 set_dev_entry_from_acpi_range(struct amd_iommu *iommu, u16 first, u16 last, 1266 u32 flags, u32 ext_flags) 1267 { 1268 int i; 1269 struct dev_table_entry dte = {}; 1270 1271 /* Parse IVHD DTE setting flags and store information */ 1272 if (flags) { 1273 struct ivhd_dte_flags *d; 1274 1275 if (search_ivhd_dte_flags(iommu->pci_seg->id, first, last)) 1276 return; 1277 1278 d = kzalloc_obj(struct ivhd_dte_flags); 1279 if (!d) 1280 return; 1281 1282 pr_debug("%s: devid range %#x:%#x\n", __func__, first, last); 1283 1284 if (flags & ACPI_DEVFLAG_INITPASS) 1285 set_dte_bit(&dte, DEV_ENTRY_INIT_PASS); 1286 if (flags & ACPI_DEVFLAG_EXTINT) 1287 set_dte_bit(&dte, DEV_ENTRY_EINT_PASS); 1288 if (flags & ACPI_DEVFLAG_NMI) 1289 set_dte_bit(&dte, DEV_ENTRY_NMI_PASS); 1290 if (flags & ACPI_DEVFLAG_SYSMGT1) 1291 set_dte_bit(&dte, DEV_ENTRY_SYSMGT1); 1292 if (flags & ACPI_DEVFLAG_SYSMGT2) 1293 set_dte_bit(&dte, DEV_ENTRY_SYSMGT2); 1294 if (flags & ACPI_DEVFLAG_LINT0) 1295 set_dte_bit(&dte, DEV_ENTRY_LINT0_PASS); 1296 if (flags & ACPI_DEVFLAG_LINT1) 1297 set_dte_bit(&dte, DEV_ENTRY_LINT1_PASS); 1298 1299 /* Apply erratum 63, which needs info in initial_dte */ 1300 if (FIELD_GET(DTE_DATA1_SYSMGT_MASK, dte.data[1]) == 0x1) 1301 dte.data[0] |= DTE_FLAG_IW; 1302 1303 memcpy(&d->dte, &dte, sizeof(dte)); 1304 d->segid = iommu->pci_seg->id; 1305 d->devid_first = first; 1306 d->devid_last = last; 1307 list_add_tail(&d->list, &amd_ivhd_dev_flags_list); 1308 } 1309 1310 for (i = first; i <= last; i++) { 1311 if (flags) { 1312 struct dev_table_entry *dev_table = get_dev_table(iommu); 1313 1314 memcpy(&dev_table[i], &dte, sizeof(dte)); 1315 } 1316 amd_iommu_set_rlookup_table(iommu, i); 1317 } 1318 } 1319 1320 static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, 1321 u16 devid, u32 flags, u32 ext_flags) 1322 { 1323 set_dev_entry_from_acpi_range(iommu, devid, devid, flags, ext_flags); 1324 } 1325 1326 int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line) 1327 { 1328 struct devid_map *entry; 1329 struct list_head *list; 1330 1331 if (type == IVHD_SPECIAL_IOAPIC) 1332 list = &ioapic_map; 1333 else if (type == IVHD_SPECIAL_HPET) 1334 list = &hpet_map; 1335 else 1336 return -EINVAL; 1337 1338 list_for_each_entry(entry, list, list) { 1339 if (!(entry->id == id && entry->cmd_line)) 1340 continue; 1341 1342 pr_info("Command-line override present for %s id %d - ignoring\n", 1343 type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id); 1344 1345 *devid = entry->devid; 1346 1347 return 0; 1348 } 1349 1350 entry = kzalloc_obj(*entry); 1351 if (!entry) 1352 return -ENOMEM; 1353 1354 entry->id = id; 1355 entry->devid = *devid; 1356 entry->cmd_line = cmd_line; 1357 1358 list_add_tail(&entry->list, list); 1359 1360 return 0; 1361 } 1362 1363 static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u32 *devid, 1364 bool cmd_line) 1365 { 1366 struct acpihid_map_entry *entry; 1367 struct list_head *list = &acpihid_map; 1368 1369 list_for_each_entry(entry, list, list) { 1370 if (strcmp(entry->hid, hid) || 1371 (*uid && *entry->uid && strcmp(entry->uid, uid)) || 1372 !entry->cmd_line) 1373 continue; 1374 1375 pr_info("Command-line override for hid:%s uid:%s\n", 1376 hid, uid); 1377 *devid = entry->devid; 1378 return 0; 1379 } 1380 1381 entry = kzalloc_obj(*entry); 1382 if (!entry) 1383 return -ENOMEM; 1384 1385 memcpy(entry->uid, uid, strlen(uid)); 1386 memcpy(entry->hid, hid, strlen(hid)); 1387 entry->devid = *devid; 1388 entry->cmd_line = cmd_line; 1389 entry->root_devid = (entry->devid & (~0x7)); 1390 1391 pr_info("%s, add hid:%s, uid:%s, rdevid:%#x\n", 1392 entry->cmd_line ? "cmd" : "ivrs", 1393 entry->hid, entry->uid, entry->root_devid); 1394 1395 list_add_tail(&entry->list, list); 1396 return 0; 1397 } 1398 1399 static int __init add_early_maps(void) 1400 { 1401 int i, ret; 1402 1403 for (i = 0; i < early_ioapic_map_size; ++i) { 1404 ret = add_special_device(IVHD_SPECIAL_IOAPIC, 1405 early_ioapic_map[i].id, 1406 &early_ioapic_map[i].devid, 1407 early_ioapic_map[i].cmd_line); 1408 if (ret) 1409 return ret; 1410 } 1411 1412 for (i = 0; i < early_hpet_map_size; ++i) { 1413 ret = add_special_device(IVHD_SPECIAL_HPET, 1414 early_hpet_map[i].id, 1415 &early_hpet_map[i].devid, 1416 early_hpet_map[i].cmd_line); 1417 if (ret) 1418 return ret; 1419 } 1420 1421 for (i = 0; i < early_acpihid_map_size; ++i) { 1422 ret = add_acpi_hid_device(early_acpihid_map[i].hid, 1423 early_acpihid_map[i].uid, 1424 &early_acpihid_map[i].devid, 1425 early_acpihid_map[i].cmd_line); 1426 if (ret) 1427 return ret; 1428 } 1429 1430 return 0; 1431 } 1432 1433 /* 1434 * Takes a pointer to an AMD IOMMU entry in the ACPI table and 1435 * initializes the hardware and our data structures with it. 1436 */ 1437 static int __init init_iommu_from_acpi(struct amd_iommu *iommu, 1438 struct ivhd_header *h) 1439 { 1440 u8 *p = (u8 *)h; 1441 u8 *end = p, flags = 0; 1442 u16 devid = 0, devid_start = 0, devid_to = 0, seg_id; 1443 u32 dev_i, ext_flags = 0; 1444 bool alias = false; 1445 struct ivhd_entry *e; 1446 struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; 1447 u32 ivhd_size; 1448 int ret; 1449 1450 1451 ret = add_early_maps(); 1452 if (ret) 1453 return ret; 1454 1455 amd_iommu_apply_ivrs_quirks(); 1456 1457 /* 1458 * First save the recommended feature enable bits from ACPI 1459 */ 1460 iommu->acpi_flags = h->flags; 1461 1462 /* 1463 * Done. Now parse the device entries 1464 */ 1465 ivhd_size = get_ivhd_header_size(h); 1466 if (!ivhd_size) { 1467 pr_err("Unsupported IVHD type %#x\n", h->type); 1468 return -EINVAL; 1469 } 1470 1471 p += ivhd_size; 1472 1473 end += h->length; 1474 1475 1476 while (p < end) { 1477 e = (struct ivhd_entry *)p; 1478 seg_id = pci_seg->id; 1479 1480 switch (e->type) { 1481 case IVHD_DEV_ALL: 1482 1483 DUMP_printk(" DEV_ALL\t\t\tsetting: %#02x\n", e->flags); 1484 set_dev_entry_from_acpi_range(iommu, 0, pci_seg->last_bdf, e->flags, 0); 1485 break; 1486 case IVHD_DEV_SELECT: 1487 1488 DUMP_printk(" DEV_SELECT\t\t\tdevid: %04x:%02x:%02x.%x flags: %#02x\n", 1489 seg_id, PCI_BUS_NUM(e->devid), 1490 PCI_SLOT(e->devid), 1491 PCI_FUNC(e->devid), 1492 e->flags); 1493 1494 devid = e->devid; 1495 set_dev_entry_from_acpi(iommu, devid, e->flags, 0); 1496 break; 1497 case IVHD_DEV_SELECT_RANGE_START: 1498 1499 DUMP_printk(" DEV_SELECT_RANGE_START\tdevid: %04x:%02x:%02x.%x flags: %#02x\n", 1500 seg_id, PCI_BUS_NUM(e->devid), 1501 PCI_SLOT(e->devid), 1502 PCI_FUNC(e->devid), 1503 e->flags); 1504 1505 devid_start = e->devid; 1506 flags = e->flags; 1507 ext_flags = 0; 1508 alias = false; 1509 break; 1510 case IVHD_DEV_ALIAS: 1511 1512 DUMP_printk(" DEV_ALIAS\t\t\tdevid: %04x:%02x:%02x.%x flags: %#02x devid_to: %02x:%02x.%x\n", 1513 seg_id, PCI_BUS_NUM(e->devid), 1514 PCI_SLOT(e->devid), 1515 PCI_FUNC(e->devid), 1516 e->flags, 1517 PCI_BUS_NUM(e->ext >> 8), 1518 PCI_SLOT(e->ext >> 8), 1519 PCI_FUNC(e->ext >> 8)); 1520 1521 devid = e->devid; 1522 devid_to = e->ext >> 8; 1523 set_dev_entry_from_acpi(iommu, devid , e->flags, 0); 1524 set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0); 1525 pci_seg->alias_table[devid] = devid_to; 1526 break; 1527 case IVHD_DEV_ALIAS_RANGE: 1528 1529 DUMP_printk(" DEV_ALIAS_RANGE\t\tdevid: %04x:%02x:%02x.%x flags: %#02x devid_to: %04x:%02x:%02x.%x\n", 1530 seg_id, PCI_BUS_NUM(e->devid), 1531 PCI_SLOT(e->devid), 1532 PCI_FUNC(e->devid), 1533 e->flags, 1534 seg_id, PCI_BUS_NUM(e->ext >> 8), 1535 PCI_SLOT(e->ext >> 8), 1536 PCI_FUNC(e->ext >> 8)); 1537 1538 devid_start = e->devid; 1539 flags = e->flags; 1540 devid_to = e->ext >> 8; 1541 ext_flags = 0; 1542 alias = true; 1543 break; 1544 case IVHD_DEV_EXT_SELECT: 1545 1546 DUMP_printk(" DEV_EXT_SELECT\t\tdevid: %04x:%02x:%02x.%x flags: %#02x ext: %08x\n", 1547 seg_id, PCI_BUS_NUM(e->devid), 1548 PCI_SLOT(e->devid), 1549 PCI_FUNC(e->devid), 1550 e->flags, e->ext); 1551 1552 devid = e->devid; 1553 set_dev_entry_from_acpi(iommu, devid, e->flags, 1554 e->ext); 1555 break; 1556 case IVHD_DEV_EXT_SELECT_RANGE: 1557 1558 DUMP_printk(" DEV_EXT_SELECT_RANGE\tdevid: %04x:%02x:%02x.%x flags: %#02x ext: %08x\n", 1559 seg_id, PCI_BUS_NUM(e->devid), 1560 PCI_SLOT(e->devid), 1561 PCI_FUNC(e->devid), 1562 e->flags, e->ext); 1563 1564 devid_start = e->devid; 1565 flags = e->flags; 1566 ext_flags = e->ext; 1567 alias = false; 1568 break; 1569 case IVHD_DEV_RANGE_END: 1570 1571 DUMP_printk(" DEV_RANGE_END\t\tdevid: %04x:%02x:%02x.%x\n", 1572 seg_id, PCI_BUS_NUM(e->devid), 1573 PCI_SLOT(e->devid), 1574 PCI_FUNC(e->devid)); 1575 1576 devid = e->devid; 1577 if (alias) { 1578 for (dev_i = devid_start; dev_i <= devid; ++dev_i) 1579 pci_seg->alias_table[dev_i] = devid_to; 1580 set_dev_entry_from_acpi(iommu, devid_to, flags, ext_flags); 1581 } 1582 set_dev_entry_from_acpi_range(iommu, devid_start, devid, flags, ext_flags); 1583 break; 1584 case IVHD_DEV_SPECIAL: { 1585 u8 handle, type; 1586 const char *var; 1587 u32 devid; 1588 int ret; 1589 1590 handle = e->ext & 0xff; 1591 devid = PCI_SEG_DEVID_TO_SBDF(seg_id, (e->ext >> 8)); 1592 type = (e->ext >> 24) & 0xff; 1593 1594 if (type == IVHD_SPECIAL_IOAPIC) 1595 var = "IOAPIC"; 1596 else if (type == IVHD_SPECIAL_HPET) 1597 var = "HPET"; 1598 else 1599 var = "UNKNOWN"; 1600 1601 DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x, flags: %#02x\n", 1602 var, (int)handle, 1603 seg_id, PCI_BUS_NUM(devid), 1604 PCI_SLOT(devid), 1605 PCI_FUNC(devid), 1606 e->flags); 1607 1608 ret = add_special_device(type, handle, &devid, false); 1609 if (ret) 1610 return ret; 1611 1612 /* 1613 * add_special_device might update the devid in case a 1614 * command-line override is present. So call 1615 * set_dev_entry_from_acpi after add_special_device. 1616 */ 1617 set_dev_entry_from_acpi(iommu, devid, e->flags, 0); 1618 1619 break; 1620 } 1621 case IVHD_DEV_ACPI_HID: { 1622 u32 devid; 1623 u8 hid[ACPIHID_HID_LEN]; 1624 u8 uid[ACPIHID_UID_LEN]; 1625 int ret; 1626 1627 if (h->type != 0x40) { 1628 pr_err(FW_BUG "Invalid IVHD device type %#x\n", 1629 e->type); 1630 break; 1631 } 1632 1633 BUILD_BUG_ON(sizeof(e->ext_hid) != ACPIHID_HID_LEN - 1); 1634 memcpy(hid, &e->ext_hid, ACPIHID_HID_LEN - 1); 1635 hid[ACPIHID_HID_LEN - 1] = '\0'; 1636 1637 if (!(*hid)) { 1638 pr_err(FW_BUG "Invalid HID.\n"); 1639 break; 1640 } 1641 1642 uid[0] = '\0'; 1643 switch (e->uidf) { 1644 case UID_NOT_PRESENT: 1645 1646 if (e->uidl != 0) 1647 pr_warn(FW_BUG "Invalid UID length.\n"); 1648 1649 break; 1650 case UID_IS_INTEGER: 1651 1652 sprintf(uid, "%d", e->uid); 1653 1654 break; 1655 case UID_IS_CHARACTER: 1656 1657 memcpy(uid, &e->uid, e->uidl); 1658 uid[e->uidl] = '\0'; 1659 1660 break; 1661 default: 1662 break; 1663 } 1664 1665 devid = PCI_SEG_DEVID_TO_SBDF(seg_id, e->devid); 1666 DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x, flags: %#02x\n", 1667 hid, uid, seg_id, 1668 PCI_BUS_NUM(devid), 1669 PCI_SLOT(devid), 1670 PCI_FUNC(devid), 1671 e->flags); 1672 1673 flags = e->flags; 1674 1675 ret = add_acpi_hid_device(hid, uid, &devid, false); 1676 if (ret) 1677 return ret; 1678 1679 /* 1680 * add_special_device might update the devid in case a 1681 * command-line override is present. So call 1682 * set_dev_entry_from_acpi after add_special_device. 1683 */ 1684 set_dev_entry_from_acpi(iommu, devid, e->flags, 0); 1685 1686 break; 1687 } 1688 default: 1689 break; 1690 } 1691 1692 p += ivhd_entry_length(p); 1693 } 1694 1695 return 0; 1696 } 1697 1698 /* Allocate PCI segment data structure */ 1699 static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id, 1700 struct acpi_table_header *ivrs_base) 1701 { 1702 struct amd_iommu_pci_seg *pci_seg; 1703 int last_bdf; 1704 1705 /* 1706 * First parse ACPI tables to find the largest Bus/Dev/Func we need to 1707 * handle in this PCI segment. Upon this information the shared data 1708 * structures for the PCI segments in the system will be allocated. 1709 */ 1710 last_bdf = find_last_devid_acpi(ivrs_base, id); 1711 if (last_bdf < 0) 1712 return NULL; 1713 1714 pci_seg = kzalloc_obj(struct amd_iommu_pci_seg); 1715 if (pci_seg == NULL) 1716 return NULL; 1717 1718 pci_seg->last_bdf = last_bdf; 1719 DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf); 1720 pci_seg->dev_table_size = 1721 max(roundup_pow_of_two((last_bdf + 1) * DEV_TABLE_ENTRY_SIZE), 1722 SZ_4K); 1723 1724 pci_seg->id = id; 1725 init_llist_head(&pci_seg->dev_data_list); 1726 INIT_LIST_HEAD(&pci_seg->unity_map); 1727 list_add_tail(&pci_seg->list, &amd_iommu_pci_seg_list); 1728 1729 if (alloc_dev_table(pci_seg)) 1730 goto err_free_pci_seg; 1731 if (alloc_alias_table(pci_seg)) 1732 goto err_free_dev_table; 1733 if (alloc_rlookup_table(pci_seg)) 1734 goto err_free_alias_table; 1735 1736 return pci_seg; 1737 1738 err_free_alias_table: 1739 free_alias_table(pci_seg); 1740 err_free_dev_table: 1741 free_dev_table(pci_seg); 1742 err_free_pci_seg: 1743 list_del(&pci_seg->list); 1744 kfree(pci_seg); 1745 return NULL; 1746 } 1747 1748 static struct amd_iommu_pci_seg *__init get_pci_segment(u16 id, 1749 struct acpi_table_header *ivrs_base) 1750 { 1751 struct amd_iommu_pci_seg *pci_seg; 1752 1753 for_each_pci_segment(pci_seg) { 1754 if (pci_seg->id == id) 1755 return pci_seg; 1756 } 1757 1758 return alloc_pci_segment(id, ivrs_base); 1759 } 1760 1761 static void __init free_pci_segments(void) 1762 { 1763 struct amd_iommu_pci_seg *pci_seg, *next; 1764 1765 for_each_pci_segment_safe(pci_seg, next) { 1766 list_del(&pci_seg->list); 1767 free_irq_lookup_table(pci_seg); 1768 free_rlookup_table(pci_seg); 1769 free_alias_table(pci_seg); 1770 free_dev_table(pci_seg); 1771 kfree(pci_seg); 1772 } 1773 } 1774 1775 static void __init free_sysfs(struct amd_iommu *iommu) 1776 { 1777 if (iommu->iommu.dev) { 1778 iommu_device_unregister(&iommu->iommu); 1779 iommu_device_sysfs_remove(&iommu->iommu); 1780 } 1781 } 1782 1783 static void __init free_iommu_one(struct amd_iommu *iommu) 1784 { 1785 free_sysfs(iommu); 1786 free_iommu_buffers(iommu); 1787 amd_iommu_free_ppr_log(iommu); 1788 free_ga_log(iommu); 1789 iommu_unmap_mmio_space(iommu); 1790 amd_iommu_iopf_uninit(iommu); 1791 } 1792 1793 static void __init free_iommu_all(void) 1794 { 1795 struct amd_iommu *iommu, *next; 1796 1797 for_each_iommu_safe(iommu, next) { 1798 list_del(&iommu->list); 1799 free_iommu_one(iommu); 1800 kfree(iommu); 1801 } 1802 } 1803 1804 /* 1805 * Family15h Model 10h-1fh erratum 746 (IOMMU Logging May Stall Translations) 1806 * Workaround: 1807 * BIOS should disable L2B micellaneous clock gating by setting 1808 * L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b 1809 */ 1810 static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu) 1811 { 1812 u32 value; 1813 1814 if ((boot_cpu_data.x86 != 0x15) || 1815 (boot_cpu_data.x86_model < 0x10) || 1816 (boot_cpu_data.x86_model > 0x1f)) 1817 return; 1818 1819 pci_write_config_dword(iommu->dev, 0xf0, 0x90); 1820 pci_read_config_dword(iommu->dev, 0xf4, &value); 1821 1822 if (value & BIT(2)) 1823 return; 1824 1825 /* Select NB indirect register 0x90 and enable writing */ 1826 pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8)); 1827 1828 pci_write_config_dword(iommu->dev, 0xf4, value | 0x4); 1829 pci_info(iommu->dev, "Applying erratum 746 workaround\n"); 1830 1831 /* Clear the enable writing bit */ 1832 pci_write_config_dword(iommu->dev, 0xf0, 0x90); 1833 } 1834 1835 /* 1836 * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission) 1837 * Workaround: 1838 * BIOS should enable ATS write permission check by setting 1839 * L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b 1840 */ 1841 static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu) 1842 { 1843 u32 value; 1844 1845 if ((boot_cpu_data.x86 != 0x15) || 1846 (boot_cpu_data.x86_model < 0x30) || 1847 (boot_cpu_data.x86_model > 0x3f)) 1848 return; 1849 1850 /* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */ 1851 value = iommu_read_l2(iommu, 0x47); 1852 1853 if (value & BIT(0)) 1854 return; 1855 1856 /* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */ 1857 iommu_write_l2(iommu, 0x47, value | BIT(0)); 1858 1859 pci_info(iommu->dev, "Applying ATS write check workaround\n"); 1860 } 1861 1862 /* 1863 * This function glues the initialization function for one IOMMU 1864 * together and also allocates the command buffer and programs the 1865 * hardware. It does NOT enable the IOMMU. This is done afterwards. 1866 */ 1867 static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, 1868 struct acpi_table_header *ivrs_base) 1869 { 1870 struct amd_iommu_pci_seg *pci_seg; 1871 1872 pci_seg = get_pci_segment(h->pci_seg, ivrs_base); 1873 if (pci_seg == NULL) 1874 return -ENOMEM; 1875 iommu->pci_seg = pci_seg; 1876 1877 raw_spin_lock_init(&iommu->lock); 1878 iommu->cmd_sem_val = 0; 1879 1880 /* Add IOMMU to internal data structures */ 1881 list_add_tail(&iommu->list, &amd_iommu_list); 1882 iommu->index = amd_iommus_present++; 1883 1884 if (unlikely(iommu->index >= MAX_IOMMUS)) { 1885 WARN(1, "System has more IOMMUs than supported by this driver\n"); 1886 return -ENOSYS; 1887 } 1888 1889 /* 1890 * Copy data from ACPI table entry to the iommu struct 1891 */ 1892 iommu->devid = h->devid; 1893 iommu->cap_ptr = h->cap_ptr; 1894 iommu->mmio_phys = h->mmio_phys; 1895 1896 switch (h->type) { 1897 case 0x10: 1898 /* Check if IVHD EFR contains proper max banks/counters */ 1899 if ((h->efr_attr != 0) && 1900 ((h->efr_attr & (0xF << 13)) != 0) && 1901 ((h->efr_attr & (0x3F << 17)) != 0)) 1902 iommu->mmio_phys_end = MMIO_REG_END_OFFSET; 1903 else 1904 iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; 1905 1906 /* GAM requires GA mode. */ 1907 if ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0) 1908 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; 1909 break; 1910 case 0x11: 1911 case 0x40: 1912 if (h->efr_reg & (1 << 9)) 1913 iommu->mmio_phys_end = MMIO_REG_END_OFFSET; 1914 else 1915 iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; 1916 1917 /* XT and GAM require GA mode. */ 1918 if ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0) { 1919 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; 1920 } else { 1921 if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT)) 1922 amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE; 1923 } 1924 1925 if (h->efr_attr & BIT(IOMMU_IVHD_ATTR_HATDIS_SHIFT)) { 1926 pr_warn_once("Host Address Translation is not supported.\n"); 1927 amd_iommu_hatdis = true; 1928 } 1929 1930 early_iommu_features_init(iommu, h); 1931 1932 break; 1933 default: 1934 return -EINVAL; 1935 } 1936 1937 iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys, 1938 iommu->mmio_phys_end); 1939 if (!iommu->mmio_base) 1940 return -ENOMEM; 1941 1942 return init_iommu_from_acpi(iommu, h); 1943 } 1944 1945 static int __init init_iommu_one_late(struct amd_iommu *iommu) 1946 { 1947 int ret; 1948 1949 ret = alloc_iommu_buffers(iommu); 1950 if (ret) 1951 return ret; 1952 1953 iommu->int_enabled = false; 1954 1955 init_translation_status(iommu); 1956 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) { 1957 iommu_disable(iommu); 1958 clear_translation_pre_enabled(iommu); 1959 pr_warn("Translation was enabled for IOMMU:%d but we are not in kdump mode\n", 1960 iommu->index); 1961 } 1962 if (amd_iommu_pre_enabled) 1963 amd_iommu_pre_enabled = translation_pre_enabled(iommu); 1964 1965 if (amd_iommu_irq_remap) { 1966 ret = amd_iommu_create_irq_domain(iommu); 1967 if (ret) 1968 return ret; 1969 } 1970 1971 /* 1972 * Make sure IOMMU is not considered to translate itself. The IVRS 1973 * table tells us so, but this is a lie! 1974 */ 1975 iommu->pci_seg->rlookup_table[iommu->devid] = NULL; 1976 1977 return 0; 1978 } 1979 1980 /** 1981 * get_highest_supported_ivhd_type - Look up the appropriate IVHD type 1982 * @ivrs: Pointer to the IVRS header 1983 * 1984 * This function search through all IVDB of the maximum supported IVHD 1985 */ 1986 static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs) 1987 { 1988 u8 *base = (u8 *)ivrs; 1989 struct ivhd_header *ivhd = (struct ivhd_header *) 1990 (base + IVRS_HEADER_LENGTH); 1991 u8 last_type = ivhd->type; 1992 u16 devid = ivhd->devid; 1993 1994 while (((u8 *)ivhd - base < ivrs->length) && 1995 (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) { 1996 u8 *p = (u8 *) ivhd; 1997 1998 if (ivhd->devid == devid) 1999 last_type = ivhd->type; 2000 ivhd = (struct ivhd_header *)(p + ivhd->length); 2001 } 2002 2003 return last_type; 2004 } 2005 2006 /* 2007 * Iterates over all IOMMU entries in the ACPI table, allocates the 2008 * IOMMU structure and initializes it with init_iommu_one() 2009 */ 2010 static int __init init_iommu_all(struct acpi_table_header *table) 2011 { 2012 u8 *p = (u8 *)table, *end = (u8 *)table; 2013 struct ivhd_header *h; 2014 struct amd_iommu *iommu; 2015 int ret; 2016 2017 end += table->length; 2018 p += IVRS_HEADER_LENGTH; 2019 2020 /* Phase 1: Process all IVHD blocks */ 2021 while (p < end) { 2022 h = (struct ivhd_header *)p; 2023 if (*p == amd_iommu_target_ivhd_type) { 2024 2025 DUMP_printk("device: %04x:%02x:%02x.%01x cap: %04x " 2026 "flags: %01x info %04x\n", 2027 h->pci_seg, PCI_BUS_NUM(h->devid), 2028 PCI_SLOT(h->devid), PCI_FUNC(h->devid), 2029 h->cap_ptr, h->flags, h->info); 2030 DUMP_printk(" mmio-addr: %016llx\n", 2031 h->mmio_phys); 2032 2033 iommu = kzalloc_obj(struct amd_iommu); 2034 if (iommu == NULL) 2035 return -ENOMEM; 2036 2037 ret = init_iommu_one(iommu, h, table); 2038 if (ret) 2039 return ret; 2040 } 2041 p += h->length; 2042 2043 } 2044 WARN_ON(p != end); 2045 2046 /* Phase 2 : Early feature support check */ 2047 get_global_efr(); 2048 2049 /* Phase 3 : Enabling IOMMU features */ 2050 for_each_iommu(iommu) { 2051 ret = init_iommu_one_late(iommu); 2052 if (ret) 2053 return ret; 2054 } 2055 2056 return 0; 2057 } 2058 2059 static void init_iommu_perf_ctr(struct amd_iommu *iommu) 2060 { 2061 u64 val; 2062 struct pci_dev *pdev = iommu->dev; 2063 2064 if (!check_feature(FEATURE_PC)) 2065 return; 2066 2067 amd_iommu_pc_present = true; 2068 2069 pci_info(pdev, "IOMMU performance counters supported\n"); 2070 2071 val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET); 2072 iommu->max_banks = (u8) ((val >> 12) & 0x3f); 2073 iommu->max_counters = (u8) ((val >> 7) & 0xf); 2074 2075 return; 2076 } 2077 2078 static ssize_t amd_iommu_show_cap(struct device *dev, 2079 struct device_attribute *attr, 2080 char *buf) 2081 { 2082 struct amd_iommu *iommu = dev_to_amd_iommu(dev); 2083 return sysfs_emit(buf, "%x\n", iommu->cap); 2084 } 2085 static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL); 2086 2087 static ssize_t amd_iommu_show_features(struct device *dev, 2088 struct device_attribute *attr, 2089 char *buf) 2090 { 2091 return sysfs_emit(buf, "%llx:%llx\n", amd_iommu_efr, amd_iommu_efr2); 2092 } 2093 static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL); 2094 2095 static struct attribute *amd_iommu_attrs[] = { 2096 &dev_attr_cap.attr, 2097 &dev_attr_features.attr, 2098 NULL, 2099 }; 2100 2101 static struct attribute_group amd_iommu_group = { 2102 .name = "amd-iommu", 2103 .attrs = amd_iommu_attrs, 2104 }; 2105 2106 static const struct attribute_group *amd_iommu_groups[] = { 2107 &amd_iommu_group, 2108 NULL, 2109 }; 2110 2111 /* 2112 * Note: IVHD 0x11 and 0x40 also contains exact copy 2113 * of the IOMMU Extended Feature Register [MMIO Offset 0030h]. 2114 * Default to EFR in IVHD since it is available sooner (i.e. before PCI init). 2115 */ 2116 static void __init late_iommu_features_init(struct amd_iommu *iommu) 2117 { 2118 u64 features, features2; 2119 2120 if (!(iommu->cap & (1 << IOMMU_CAP_EFR))) 2121 return; 2122 2123 /* read extended feature bits */ 2124 features = readq(iommu->mmio_base + MMIO_EXT_FEATURES); 2125 features2 = readq(iommu->mmio_base + MMIO_EXT_FEATURES2); 2126 2127 if (!amd_iommu_efr) { 2128 amd_iommu_efr = features; 2129 amd_iommu_efr2 = features2; 2130 return; 2131 } 2132 2133 /* 2134 * Sanity check and warn if EFR values from 2135 * IVHD and MMIO conflict. 2136 */ 2137 if (features != amd_iommu_efr || 2138 features2 != amd_iommu_efr2) { 2139 pr_warn(FW_WARN 2140 "EFR mismatch. Use IVHD EFR (%#llx : %#llx), EFR2 (%#llx : %#llx).\n", 2141 features, amd_iommu_efr, 2142 features2, amd_iommu_efr2); 2143 } 2144 } 2145 2146 static int __init iommu_init_pci(struct amd_iommu *iommu) 2147 { 2148 int cap_ptr = iommu->cap_ptr; 2149 int ret; 2150 2151 iommu->dev = pci_get_domain_bus_and_slot(iommu->pci_seg->id, 2152 PCI_BUS_NUM(iommu->devid), 2153 iommu->devid & 0xff); 2154 if (!iommu->dev) 2155 return -ENODEV; 2156 2157 /* ACPI _PRT won't have an IRQ for IOMMU */ 2158 iommu->dev->irq_managed = 1; 2159 2160 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, 2161 &iommu->cap); 2162 2163 if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB))) 2164 amd_iommu_iotlb_sup = false; 2165 2166 late_iommu_features_init(iommu); 2167 2168 if (check_feature(FEATURE_GT)) { 2169 int glxval; 2170 u64 pasmax; 2171 2172 pasmax = FIELD_GET(FEATURE_PASMAX, amd_iommu_efr); 2173 iommu->iommu.max_pasids = (1 << (pasmax + 1)) - 1; 2174 2175 BUG_ON(iommu->iommu.max_pasids & ~PASID_MASK); 2176 2177 glxval = FIELD_GET(FEATURE_GLX, amd_iommu_efr); 2178 2179 if (amd_iommu_max_glx_val == -1) 2180 amd_iommu_max_glx_val = glxval; 2181 else 2182 amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval); 2183 2184 iommu_enable_gt(iommu); 2185 } 2186 2187 if (check_feature(FEATURE_PPR) && amd_iommu_alloc_ppr_log(iommu)) 2188 return -ENOMEM; 2189 2190 if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) { 2191 pr_info("Using strict mode due to virtualization\n"); 2192 iommu_set_dma_strict(); 2193 amd_iommu_np_cache = true; 2194 } 2195 2196 init_iommu_perf_ctr(iommu); 2197 2198 if (is_rd890_iommu(iommu->dev)) { 2199 int i, j; 2200 2201 iommu->root_pdev = 2202 pci_get_domain_bus_and_slot(iommu->pci_seg->id, 2203 iommu->dev->bus->number, 2204 PCI_DEVFN(0, 0)); 2205 2206 /* 2207 * Some rd890 systems may not be fully reconfigured by the 2208 * BIOS, so it's necessary for us to store this information so 2209 * it can be reprogrammed on resume 2210 */ 2211 pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4, 2212 &iommu->stored_addr_lo); 2213 pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8, 2214 &iommu->stored_addr_hi); 2215 2216 /* Low bit locks writes to configuration space */ 2217 iommu->stored_addr_lo &= ~1; 2218 2219 for (i = 0; i < 6; i++) 2220 for (j = 0; j < 0x12; j++) 2221 iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j); 2222 2223 for (i = 0; i < 0x83; i++) 2224 iommu->stored_l2[i] = iommu_read_l2(iommu, i); 2225 } 2226 2227 amd_iommu_erratum_746_workaround(iommu); 2228 amd_iommu_ats_write_check_workaround(iommu); 2229 2230 ret = iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev, 2231 amd_iommu_groups, "ivhd%d", iommu->index); 2232 if (ret) 2233 return ret; 2234 2235 /* 2236 * Allocate per IOMMU IOPF queue here so that in attach device path, 2237 * PRI capable device can be added to IOPF queue 2238 */ 2239 if (amd_iommu_gt_ppr_supported()) { 2240 ret = amd_iommu_iopf_init(iommu); 2241 if (ret) 2242 return ret; 2243 } 2244 2245 ret = iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL); 2246 if (ret || amd_iommu_pgtable == PD_MODE_NONE) { 2247 /* 2248 * Remove sysfs if DMA translation is not supported by the 2249 * IOMMU. Do not return an error to enable IRQ remapping 2250 * in state_next(), DTE[V, TV] must eventually be set to 0. 2251 */ 2252 iommu_device_sysfs_remove(&iommu->iommu); 2253 } 2254 2255 return pci_enable_device(iommu->dev); 2256 } 2257 2258 static void print_iommu_info(void) 2259 { 2260 int i; 2261 static const char * const feat_str[] = { 2262 "PreF", "PPR", "X2APIC", "NX", "GT", "[5]", 2263 "IA", "GA", "HE", "PC" 2264 }; 2265 2266 if (amd_iommu_efr) { 2267 pr_info("Extended features (%#llx, %#llx):", amd_iommu_efr, amd_iommu_efr2); 2268 2269 for (i = 0; i < ARRAY_SIZE(feat_str); ++i) { 2270 if (check_feature(1ULL << i)) 2271 pr_cont(" %s", feat_str[i]); 2272 } 2273 2274 if (check_feature(FEATURE_GAM_VAPIC)) 2275 pr_cont(" GA_vAPIC"); 2276 2277 if (check_feature(FEATURE_SNP)) 2278 pr_cont(" SNP"); 2279 2280 if (check_feature2(FEATURE_SEVSNPIO_SUP)) 2281 pr_cont(" SEV-TIO"); 2282 2283 pr_cont("\n"); 2284 } 2285 2286 if (irq_remapping_enabled) { 2287 pr_info("Interrupt remapping enabled\n"); 2288 if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 2289 pr_info("X2APIC enabled\n"); 2290 } 2291 if (amd_iommu_pgtable == PD_MODE_V2) { 2292 pr_info("V2 page table enabled (Paging mode : %d level)\n", 2293 amd_iommu_gpt_level); 2294 } 2295 } 2296 2297 static int __init amd_iommu_init_pci(void) 2298 { 2299 struct amd_iommu *iommu; 2300 struct amd_iommu_pci_seg *pci_seg; 2301 int ret; 2302 2303 /* Init global identity domain before registering IOMMU */ 2304 amd_iommu_init_identity_domain(); 2305 2306 for_each_iommu(iommu) { 2307 ret = iommu_init_pci(iommu); 2308 if (ret) { 2309 pr_err("IOMMU%d: Failed to initialize IOMMU Hardware (error=%d)!\n", 2310 iommu->index, ret); 2311 goto out; 2312 } 2313 /* Need to setup range after PCI init */ 2314 iommu_set_cwwb_range(iommu); 2315 } 2316 2317 /* 2318 * Order is important here to make sure any unity map requirements are 2319 * fulfilled. The unity mappings are created and written to the device 2320 * table during the iommu_init_pci() call. 2321 * 2322 * After that we call init_device_table_dma() to make sure any 2323 * uninitialized DTE will block DMA, and in the end we flush the caches 2324 * of all IOMMUs to make sure the changes to the device table are 2325 * active. 2326 */ 2327 for_each_pci_segment(pci_seg) 2328 init_device_table_dma(pci_seg); 2329 2330 for_each_iommu(iommu) 2331 amd_iommu_flush_all_caches(iommu); 2332 2333 print_iommu_info(); 2334 2335 out: 2336 return ret; 2337 } 2338 2339 /**************************************************************************** 2340 * 2341 * The following functions initialize the MSI interrupts for all IOMMUs 2342 * in the system. It's a bit challenging because there could be multiple 2343 * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per 2344 * pci_dev. 2345 * 2346 ****************************************************************************/ 2347 2348 static int iommu_setup_msi(struct amd_iommu *iommu) 2349 { 2350 int r; 2351 2352 r = pci_enable_msi(iommu->dev); 2353 if (r) 2354 return r; 2355 2356 r = request_threaded_irq(iommu->dev->irq, NULL, amd_iommu_int_thread, 2357 IRQF_ONESHOT, "AMD-Vi", iommu); 2358 if (r) { 2359 pci_disable_msi(iommu->dev); 2360 return r; 2361 } 2362 2363 return 0; 2364 } 2365 2366 union intcapxt { 2367 u64 capxt; 2368 struct { 2369 u64 reserved_0 : 2, 2370 dest_mode_logical : 1, 2371 reserved_1 : 5, 2372 destid_0_23 : 24, 2373 vector : 8, 2374 reserved_2 : 16, 2375 destid_24_31 : 8; 2376 }; 2377 } __attribute__ ((packed)); 2378 2379 2380 static struct irq_chip intcapxt_controller; 2381 2382 static int intcapxt_irqdomain_activate(struct irq_domain *domain, 2383 struct irq_data *irqd, bool reserve) 2384 { 2385 return 0; 2386 } 2387 2388 static void intcapxt_irqdomain_deactivate(struct irq_domain *domain, 2389 struct irq_data *irqd) 2390 { 2391 } 2392 2393 2394 static int intcapxt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, 2395 unsigned int nr_irqs, void *arg) 2396 { 2397 struct irq_alloc_info *info = arg; 2398 int i, ret; 2399 2400 if (!info || info->type != X86_IRQ_ALLOC_TYPE_AMDVI) 2401 return -EINVAL; 2402 2403 ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); 2404 if (ret < 0) 2405 return ret; 2406 2407 for (i = virq; i < virq + nr_irqs; i++) { 2408 struct irq_data *irqd = irq_domain_get_irq_data(domain, i); 2409 2410 irqd->chip = &intcapxt_controller; 2411 irqd->hwirq = info->hwirq; 2412 irqd->chip_data = info->data; 2413 __irq_set_handler(i, handle_edge_irq, 0, "edge"); 2414 } 2415 2416 return ret; 2417 } 2418 2419 static void intcapxt_irqdomain_free(struct irq_domain *domain, unsigned int virq, 2420 unsigned int nr_irqs) 2421 { 2422 irq_domain_free_irqs_top(domain, virq, nr_irqs); 2423 } 2424 2425 2426 static void intcapxt_unmask_irq(struct irq_data *irqd) 2427 { 2428 struct amd_iommu *iommu = irqd->chip_data; 2429 struct irq_cfg *cfg = irqd_cfg(irqd); 2430 union intcapxt xt; 2431 2432 xt.capxt = 0ULL; 2433 xt.dest_mode_logical = apic->dest_mode_logical; 2434 xt.vector = cfg->vector; 2435 xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0); 2436 xt.destid_24_31 = cfg->dest_apicid >> 24; 2437 2438 writeq(xt.capxt, iommu->mmio_base + irqd->hwirq); 2439 } 2440 2441 static void intcapxt_mask_irq(struct irq_data *irqd) 2442 { 2443 struct amd_iommu *iommu = irqd->chip_data; 2444 2445 writeq(0, iommu->mmio_base + irqd->hwirq); 2446 } 2447 2448 2449 static int intcapxt_set_affinity(struct irq_data *irqd, 2450 const struct cpumask *mask, bool force) 2451 { 2452 struct irq_data *parent = irqd->parent_data; 2453 int ret; 2454 2455 ret = parent->chip->irq_set_affinity(parent, mask, force); 2456 if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) 2457 return ret; 2458 return 0; 2459 } 2460 2461 static int intcapxt_set_wake(struct irq_data *irqd, unsigned int on) 2462 { 2463 return on ? -EOPNOTSUPP : 0; 2464 } 2465 2466 static struct irq_chip intcapxt_controller = { 2467 .name = "IOMMU-MSI", 2468 .irq_unmask = intcapxt_unmask_irq, 2469 .irq_mask = intcapxt_mask_irq, 2470 .irq_ack = irq_chip_ack_parent, 2471 .irq_retrigger = irq_chip_retrigger_hierarchy, 2472 .irq_set_affinity = intcapxt_set_affinity, 2473 .irq_set_wake = intcapxt_set_wake, 2474 .flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_MOVE_DEFERRED, 2475 }; 2476 2477 static const struct irq_domain_ops intcapxt_domain_ops = { 2478 .alloc = intcapxt_irqdomain_alloc, 2479 .free = intcapxt_irqdomain_free, 2480 .activate = intcapxt_irqdomain_activate, 2481 .deactivate = intcapxt_irqdomain_deactivate, 2482 }; 2483 2484 2485 static struct irq_domain *iommu_irqdomain; 2486 2487 static struct irq_domain *iommu_get_irqdomain(void) 2488 { 2489 struct fwnode_handle *fn; 2490 2491 /* No need for locking here (yet) as the init is single-threaded */ 2492 if (iommu_irqdomain) 2493 return iommu_irqdomain; 2494 2495 fn = irq_domain_alloc_named_fwnode("AMD-Vi-MSI"); 2496 if (!fn) 2497 return NULL; 2498 2499 iommu_irqdomain = irq_domain_create_hierarchy(x86_vector_domain, 0, 0, 2500 fn, &intcapxt_domain_ops, 2501 NULL); 2502 if (!iommu_irqdomain) 2503 irq_domain_free_fwnode(fn); 2504 2505 return iommu_irqdomain; 2506 } 2507 2508 static int __iommu_setup_intcapxt(struct amd_iommu *iommu, const char *devname, 2509 int hwirq, irq_handler_t thread_fn) 2510 { 2511 struct irq_domain *domain; 2512 struct irq_alloc_info info; 2513 int irq, ret; 2514 int node = dev_to_node(&iommu->dev->dev); 2515 2516 domain = iommu_get_irqdomain(); 2517 if (!domain) 2518 return -ENXIO; 2519 2520 init_irq_alloc_info(&info, NULL); 2521 info.type = X86_IRQ_ALLOC_TYPE_AMDVI; 2522 info.data = iommu; 2523 info.hwirq = hwirq; 2524 2525 irq = irq_domain_alloc_irqs(domain, 1, node, &info); 2526 if (irq < 0) { 2527 irq_domain_remove(domain); 2528 return irq; 2529 } 2530 2531 ret = request_threaded_irq(irq, NULL, thread_fn, IRQF_ONESHOT, devname, 2532 iommu); 2533 if (ret) { 2534 irq_domain_free_irqs(irq, 1); 2535 irq_domain_remove(domain); 2536 return ret; 2537 } 2538 2539 return 0; 2540 } 2541 2542 static int iommu_setup_intcapxt(struct amd_iommu *iommu) 2543 { 2544 int ret; 2545 2546 snprintf(iommu->evt_irq_name, sizeof(iommu->evt_irq_name), 2547 "AMD-Vi%d-Evt", iommu->index); 2548 ret = __iommu_setup_intcapxt(iommu, iommu->evt_irq_name, 2549 MMIO_INTCAPXT_EVT_OFFSET, 2550 amd_iommu_int_thread_evtlog); 2551 if (ret) 2552 return ret; 2553 2554 snprintf(iommu->ppr_irq_name, sizeof(iommu->ppr_irq_name), 2555 "AMD-Vi%d-PPR", iommu->index); 2556 ret = __iommu_setup_intcapxt(iommu, iommu->ppr_irq_name, 2557 MMIO_INTCAPXT_PPR_OFFSET, 2558 amd_iommu_int_thread_pprlog); 2559 if (ret) 2560 return ret; 2561 2562 #ifdef CONFIG_IRQ_REMAP 2563 snprintf(iommu->ga_irq_name, sizeof(iommu->ga_irq_name), 2564 "AMD-Vi%d-GA", iommu->index); 2565 ret = __iommu_setup_intcapxt(iommu, iommu->ga_irq_name, 2566 MMIO_INTCAPXT_GALOG_OFFSET, 2567 amd_iommu_int_thread_galog); 2568 #endif 2569 2570 return ret; 2571 } 2572 2573 static int iommu_init_irq(struct amd_iommu *iommu) 2574 { 2575 int ret; 2576 2577 if (iommu->int_enabled) 2578 goto enable_faults; 2579 2580 if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 2581 ret = iommu_setup_intcapxt(iommu); 2582 else if (iommu->dev->msi_cap) 2583 ret = iommu_setup_msi(iommu); 2584 else 2585 ret = -ENODEV; 2586 2587 if (ret) 2588 return ret; 2589 2590 iommu->int_enabled = true; 2591 enable_faults: 2592 2593 if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 2594 iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN); 2595 2596 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); 2597 2598 return 0; 2599 } 2600 2601 /**************************************************************************** 2602 * 2603 * The next functions belong to the third pass of parsing the ACPI 2604 * table. In this last pass the memory mapping requirements are 2605 * gathered (like exclusion and unity mapping ranges). 2606 * 2607 ****************************************************************************/ 2608 2609 static void __init free_unity_maps(void) 2610 { 2611 struct unity_map_entry *entry, *next; 2612 struct amd_iommu_pci_seg *p, *pci_seg; 2613 2614 for_each_pci_segment_safe(pci_seg, p) { 2615 list_for_each_entry_safe(entry, next, &pci_seg->unity_map, list) { 2616 list_del(&entry->list); 2617 kfree(entry); 2618 } 2619 } 2620 } 2621 2622 /* called for unity map ACPI definition */ 2623 static int __init init_unity_map_range(struct ivmd_header *m, 2624 struct acpi_table_header *ivrs_base) 2625 { 2626 struct unity_map_entry *e = NULL; 2627 struct amd_iommu_pci_seg *pci_seg; 2628 char *s; 2629 2630 pci_seg = get_pci_segment(m->pci_seg, ivrs_base); 2631 if (pci_seg == NULL) 2632 return -ENOMEM; 2633 2634 e = kzalloc_obj(*e); 2635 if (e == NULL) 2636 return -ENOMEM; 2637 2638 switch (m->type) { 2639 default: 2640 kfree(e); 2641 return 0; 2642 case ACPI_IVMD_TYPE: 2643 s = "IVMD_TYPEi\t\t\t"; 2644 e->devid_start = e->devid_end = m->devid; 2645 break; 2646 case ACPI_IVMD_TYPE_ALL: 2647 s = "IVMD_TYPE_ALL\t\t"; 2648 e->devid_start = 0; 2649 e->devid_end = pci_seg->last_bdf; 2650 break; 2651 case ACPI_IVMD_TYPE_RANGE: 2652 s = "IVMD_TYPE_RANGE\t\t"; 2653 e->devid_start = m->devid; 2654 e->devid_end = m->aux; 2655 break; 2656 } 2657 e->address_start = PAGE_ALIGN(m->range_start); 2658 e->address_end = e->address_start + PAGE_ALIGN(m->range_length); 2659 e->prot = m->flags >> 1; 2660 2661 /* 2662 * Treat per-device exclusion ranges as r/w unity-mapped regions 2663 * since some buggy BIOSes might lead to the overwritten exclusion 2664 * range (exclusion_start and exclusion_length members). This 2665 * happens when there are multiple exclusion ranges (IVMD entries) 2666 * defined in ACPI table. 2667 */ 2668 if (m->flags & IVMD_FLAG_EXCL_RANGE) 2669 e->prot = (IVMD_FLAG_IW | IVMD_FLAG_IR) >> 1; 2670 2671 DUMP_printk("%s devid_start: %04x:%02x:%02x.%x devid_end: " 2672 "%04x:%02x:%02x.%x range_start: %016llx range_end: %016llx" 2673 " flags: %x\n", s, m->pci_seg, 2674 PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start), 2675 PCI_FUNC(e->devid_start), m->pci_seg, 2676 PCI_BUS_NUM(e->devid_end), 2677 PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end), 2678 e->address_start, e->address_end, m->flags); 2679 2680 list_add_tail(&e->list, &pci_seg->unity_map); 2681 2682 return 0; 2683 } 2684 2685 /* iterates over all memory definitions we find in the ACPI table */ 2686 static int __init init_memory_definitions(struct acpi_table_header *table) 2687 { 2688 u8 *p = (u8 *)table, *end = (u8 *)table; 2689 struct ivmd_header *m; 2690 2691 end += table->length; 2692 p += IVRS_HEADER_LENGTH; 2693 2694 while (p < end) { 2695 m = (struct ivmd_header *)p; 2696 if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE)) 2697 init_unity_map_range(m, table); 2698 2699 p += m->length; 2700 } 2701 2702 return 0; 2703 } 2704 2705 /* 2706 * Init the device table to not allow DMA access for devices 2707 */ 2708 static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg) 2709 { 2710 u32 devid; 2711 struct dev_table_entry *dev_table = pci_seg->dev_table; 2712 2713 if (!dev_table || amd_iommu_pgtable == PD_MODE_NONE) 2714 return; 2715 2716 for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { 2717 set_dte_bit(&dev_table[devid], DEV_ENTRY_VALID); 2718 if (!amd_iommu_snp_en) 2719 set_dte_bit(&dev_table[devid], DEV_ENTRY_TRANSLATION); 2720 } 2721 } 2722 2723 static void __init uninit_device_table_dma(struct amd_iommu_pci_seg *pci_seg) 2724 { 2725 u32 devid; 2726 struct dev_table_entry *dev_table = pci_seg->dev_table; 2727 2728 if (dev_table == NULL) 2729 return; 2730 2731 for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { 2732 dev_table[devid].data[0] = 0ULL; 2733 dev_table[devid].data[1] = 0ULL; 2734 } 2735 } 2736 2737 static void init_device_table(void) 2738 { 2739 struct amd_iommu_pci_seg *pci_seg; 2740 u32 devid; 2741 2742 if (!amd_iommu_irq_remap) 2743 return; 2744 2745 for_each_pci_segment(pci_seg) { 2746 for (devid = 0; devid <= pci_seg->last_bdf; ++devid) 2747 set_dte_bit(&pci_seg->dev_table[devid], DEV_ENTRY_IRQ_TBL_EN); 2748 } 2749 } 2750 2751 static void iommu_init_flags(struct amd_iommu *iommu) 2752 { 2753 iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ? 2754 iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) : 2755 iommu_feature_disable(iommu, CONTROL_HT_TUN_EN); 2756 2757 iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ? 2758 iommu_feature_enable(iommu, CONTROL_PASSPW_EN) : 2759 iommu_feature_disable(iommu, CONTROL_PASSPW_EN); 2760 2761 iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ? 2762 iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) : 2763 iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN); 2764 2765 iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ? 2766 iommu_feature_enable(iommu, CONTROL_ISOC_EN) : 2767 iommu_feature_disable(iommu, CONTROL_ISOC_EN); 2768 2769 /* 2770 * make IOMMU memory accesses cache coherent 2771 */ 2772 iommu_feature_enable(iommu, CONTROL_COHERENT_EN); 2773 2774 /* Set IOTLB invalidation timeout to 1s */ 2775 iommu_feature_set(iommu, CTRL_INV_TO_1S, CTRL_INV_TO_MASK, CONTROL_INV_TIMEOUT); 2776 2777 /* Enable Enhanced Peripheral Page Request Handling */ 2778 if (check_feature(FEATURE_EPHSUP)) 2779 iommu_feature_enable(iommu, CONTROL_EPH_EN); 2780 } 2781 2782 static void iommu_apply_resume_quirks(struct amd_iommu *iommu) 2783 { 2784 int i, j; 2785 u32 ioc_feature_control; 2786 struct pci_dev *pdev = iommu->root_pdev; 2787 2788 /* RD890 BIOSes may not have completely reconfigured the iommu */ 2789 if (!is_rd890_iommu(iommu->dev) || !pdev) 2790 return; 2791 2792 /* 2793 * First, we need to ensure that the iommu is enabled. This is 2794 * controlled by a register in the northbridge 2795 */ 2796 2797 /* Select Northbridge indirect register 0x75 and enable writing */ 2798 pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7)); 2799 pci_read_config_dword(pdev, 0x64, &ioc_feature_control); 2800 2801 /* Enable the iommu */ 2802 if (!(ioc_feature_control & 0x1)) 2803 pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1); 2804 2805 /* Restore the iommu BAR */ 2806 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, 2807 iommu->stored_addr_lo); 2808 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8, 2809 iommu->stored_addr_hi); 2810 2811 /* Restore the l1 indirect regs for each of the 6 l1s */ 2812 for (i = 0; i < 6; i++) 2813 for (j = 0; j < 0x12; j++) 2814 iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]); 2815 2816 /* Restore the l2 indirect regs */ 2817 for (i = 0; i < 0x83; i++) 2818 iommu_write_l2(iommu, i, iommu->stored_l2[i]); 2819 2820 /* Lock PCI setup registers */ 2821 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, 2822 iommu->stored_addr_lo | 1); 2823 } 2824 2825 static void iommu_enable_ga(struct amd_iommu *iommu) 2826 { 2827 #ifdef CONFIG_IRQ_REMAP 2828 switch (amd_iommu_guest_ir) { 2829 case AMD_IOMMU_GUEST_IR_VAPIC: 2830 case AMD_IOMMU_GUEST_IR_LEGACY_GA: 2831 iommu_feature_enable(iommu, CONTROL_GA_EN); 2832 iommu->irte_ops = &irte_128_ops; 2833 break; 2834 default: 2835 iommu->irte_ops = &irte_32_ops; 2836 break; 2837 } 2838 #endif 2839 } 2840 2841 static void iommu_disable_irtcachedis(struct amd_iommu *iommu) 2842 { 2843 iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS); 2844 } 2845 2846 static void iommu_enable_irtcachedis(struct amd_iommu *iommu) 2847 { 2848 u64 ctrl; 2849 2850 if (!amd_iommu_irtcachedis) 2851 return; 2852 2853 /* 2854 * Note: 2855 * The support for IRTCacheDis feature is dertermined by 2856 * checking if the bit is writable. 2857 */ 2858 iommu_feature_enable(iommu, CONTROL_IRTCACHEDIS); 2859 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 2860 ctrl &= (1ULL << CONTROL_IRTCACHEDIS); 2861 if (ctrl) 2862 iommu->irtcachedis_enabled = true; 2863 pr_info("iommu%d (%#06x) : IRT cache is %s\n", 2864 iommu->index, iommu->devid, 2865 iommu->irtcachedis_enabled ? "disabled" : "enabled"); 2866 } 2867 2868 static void iommu_enable_2k_int(struct amd_iommu *iommu) 2869 { 2870 if (!FEATURE_NUM_INT_REMAP_SUP_2K(amd_iommu_efr2)) 2871 return; 2872 2873 iommu_feature_set(iommu, 2874 CONTROL_NUM_INT_REMAP_MODE_2K, 2875 CONTROL_NUM_INT_REMAP_MODE_MASK, 2876 CONTROL_NUM_INT_REMAP_MODE); 2877 } 2878 2879 static void early_enable_iommu(struct amd_iommu *iommu) 2880 { 2881 iommu_disable(iommu); 2882 iommu_init_flags(iommu); 2883 iommu_set_device_table(iommu); 2884 iommu_enable_command_buffer(iommu); 2885 iommu_enable_gt(iommu); 2886 iommu_enable_ga(iommu); 2887 iommu_enable_xt(iommu); 2888 iommu_enable_irtcachedis(iommu); 2889 iommu_enable_2k_int(iommu); 2890 iommu_enable(iommu); 2891 amd_iommu_flush_all_caches(iommu); 2892 } 2893 2894 /* 2895 * This function finally enables all IOMMUs found in the system after 2896 * they have been initialized. 2897 * 2898 * Or if in kdump kernel and IOMMUs are all pre-enabled, try to reuse 2899 * the old content of device table entries. Not this case or reuse failed, 2900 * just continue as normal kernel does. 2901 */ 2902 static void early_enable_iommus(void) 2903 { 2904 struct amd_iommu *iommu; 2905 struct amd_iommu_pci_seg *pci_seg; 2906 2907 if (!reuse_device_table()) { 2908 /* 2909 * If come here because of failure in reusing device table from old 2910 * kernel with all IOMMUs enabled, print error message and try to 2911 * free allocated old_dev_tbl_cpy. 2912 */ 2913 if (amd_iommu_pre_enabled) { 2914 pr_err("Failed to reuse DEV table from previous kernel.\n"); 2915 /* 2916 * Bail out early if unable to remap/reuse DEV table from 2917 * previous kernel if SNP enabled as IOMMU commands will 2918 * time out without DEV table and cause kdump boot panic. 2919 */ 2920 BUG_ON(check_feature(FEATURE_SNP)); 2921 } 2922 2923 for_each_pci_segment(pci_seg) { 2924 if (pci_seg->old_dev_tbl_cpy != NULL) { 2925 memunmap((void *)pci_seg->old_dev_tbl_cpy); 2926 pci_seg->old_dev_tbl_cpy = NULL; 2927 } 2928 } 2929 2930 for_each_iommu(iommu) { 2931 clear_translation_pre_enabled(iommu); 2932 early_enable_iommu(iommu); 2933 } 2934 } else { 2935 pr_info("Reused DEV table from previous kernel.\n"); 2936 2937 for_each_pci_segment(pci_seg) { 2938 iommu_free_pages(pci_seg->dev_table); 2939 pci_seg->dev_table = pci_seg->old_dev_tbl_cpy; 2940 } 2941 2942 for_each_iommu(iommu) { 2943 iommu_disable_command_buffer(iommu); 2944 iommu_disable_event_buffer(iommu); 2945 iommu_disable_irtcachedis(iommu); 2946 iommu_enable_command_buffer(iommu); 2947 iommu_enable_ga(iommu); 2948 iommu_enable_xt(iommu); 2949 iommu_enable_irtcachedis(iommu); 2950 iommu_enable_2k_int(iommu); 2951 iommu_set_device_table(iommu); 2952 amd_iommu_flush_all_caches(iommu); 2953 } 2954 } 2955 } 2956 2957 static void enable_iommus_ppr(void) 2958 { 2959 struct amd_iommu *iommu; 2960 2961 if (!amd_iommu_gt_ppr_supported()) 2962 return; 2963 2964 for_each_iommu(iommu) 2965 amd_iommu_enable_ppr_log(iommu); 2966 } 2967 2968 static void enable_iommus_vapic(void) 2969 { 2970 #ifdef CONFIG_IRQ_REMAP 2971 u32 status, i; 2972 struct amd_iommu *iommu; 2973 2974 for_each_iommu(iommu) { 2975 /* 2976 * Disable GALog if already running. It could have been enabled 2977 * in the previous boot before kdump. 2978 */ 2979 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 2980 if (!(status & MMIO_STATUS_GALOG_RUN_MASK)) 2981 continue; 2982 2983 iommu_feature_disable(iommu, CONTROL_GALOG_EN); 2984 iommu_feature_disable(iommu, CONTROL_GAINT_EN); 2985 2986 /* 2987 * Need to set and poll check the GALOGRun bit to zero before 2988 * we can set/ modify GA Log registers safely. 2989 */ 2990 for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) { 2991 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 2992 if (!(status & MMIO_STATUS_GALOG_RUN_MASK)) 2993 break; 2994 udelay(10); 2995 } 2996 2997 if (WARN_ON(i >= MMIO_STATUS_TIMEOUT)) 2998 return; 2999 } 3000 3001 if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) 3002 return; 3003 3004 if (!check_feature(FEATURE_GAM_VAPIC)) { 3005 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; 3006 return; 3007 } 3008 3009 if (amd_iommu_snp_en && 3010 !FEATURE_SNPAVICSUP_GAM(amd_iommu_efr2)) { 3011 pr_warn("Force to disable Virtual APIC due to SNP\n"); 3012 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; 3013 return; 3014 } 3015 3016 /* Enabling GAM and SNPAVIC support */ 3017 for_each_iommu(iommu) { 3018 if (iommu_init_ga_log(iommu) || 3019 iommu_ga_log_enable(iommu)) 3020 return; 3021 3022 iommu_feature_enable(iommu, CONTROL_GAM_EN); 3023 if (amd_iommu_snp_en) 3024 iommu_feature_enable(iommu, CONTROL_SNPAVIC_EN); 3025 } 3026 3027 amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP); 3028 pr_info("Virtual APIC enabled\n"); 3029 #endif 3030 } 3031 3032 static void disable_iommus(void) 3033 { 3034 struct amd_iommu *iommu; 3035 3036 for_each_iommu(iommu) 3037 iommu_disable(iommu); 3038 3039 #ifdef CONFIG_IRQ_REMAP 3040 if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) 3041 amd_iommu_irq_ops.capability &= ~(1 << IRQ_POSTING_CAP); 3042 #endif 3043 } 3044 3045 /* 3046 * Suspend/Resume support 3047 * disable suspend until real resume implemented 3048 */ 3049 3050 static void amd_iommu_resume(void *data) 3051 { 3052 struct amd_iommu *iommu; 3053 3054 for_each_iommu(iommu) 3055 iommu_apply_resume_quirks(iommu); 3056 3057 /* re-load the hardware */ 3058 for_each_iommu(iommu) 3059 early_enable_iommu(iommu); 3060 3061 iommu_enable_event_buffer(); 3062 amd_iommu_enable_interrupts(); 3063 } 3064 3065 static int amd_iommu_suspend(void *data) 3066 { 3067 /* disable IOMMUs to go out of the way for BIOS */ 3068 disable_iommus(); 3069 3070 return 0; 3071 } 3072 3073 static const struct syscore_ops amd_iommu_syscore_ops = { 3074 .suspend = amd_iommu_suspend, 3075 .resume = amd_iommu_resume, 3076 }; 3077 3078 static struct syscore amd_iommu_syscore = { 3079 .ops = &amd_iommu_syscore_ops, 3080 }; 3081 3082 static void __init free_iommu_resources(void) 3083 { 3084 free_iommu_all(); 3085 free_pci_segments(); 3086 } 3087 3088 /* SB IOAPIC is always on this device in AMD systems */ 3089 #define IOAPIC_SB_DEVID ((0x00 << 8) | PCI_DEVFN(0x14, 0)) 3090 3091 /* SB IOAPIC for Hygon family 18h model 4h is on the device 0xb */ 3092 #define IOAPIC_SB_DEVID_FAM18H_M4H ((0x00 << 8) | PCI_DEVFN(0xb, 0)) 3093 3094 static bool __init check_ioapic_information(void) 3095 { 3096 const char *fw_bug = FW_BUG; 3097 bool ret, has_sb_ioapic; 3098 int idx; 3099 3100 has_sb_ioapic = false; 3101 ret = false; 3102 3103 /* 3104 * If we have map overrides on the kernel command line the 3105 * messages in this function might not describe firmware bugs 3106 * anymore - so be careful 3107 */ 3108 if (cmdline_maps) 3109 fw_bug = ""; 3110 3111 for (idx = 0; idx < nr_ioapics; idx++) { 3112 int devid, id = mpc_ioapic_id(idx); 3113 3114 devid = get_ioapic_devid(id); 3115 if (devid < 0) { 3116 pr_err("%s: IOAPIC[%d] not in IVRS table\n", 3117 fw_bug, id); 3118 ret = false; 3119 } else if (devid == IOAPIC_SB_DEVID || 3120 (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON && 3121 boot_cpu_data.x86 == 0x18 && 3122 boot_cpu_data.x86_model >= 0x4 && 3123 boot_cpu_data.x86_model <= 0xf && 3124 devid == IOAPIC_SB_DEVID_FAM18H_M4H)) { 3125 has_sb_ioapic = true; 3126 ret = true; 3127 } 3128 } 3129 3130 if (!has_sb_ioapic) { 3131 /* 3132 * We expect the SB IOAPIC to be listed in the IVRS 3133 * table. The system timer is connected to the SB IOAPIC 3134 * and if we don't have it in the list the system will 3135 * panic at boot time. This situation usually happens 3136 * when the BIOS is buggy and provides us the wrong 3137 * device id for the IOAPIC in the system. 3138 */ 3139 pr_err("%s: No southbridge IOAPIC found\n", fw_bug); 3140 } 3141 3142 if (!ret) 3143 pr_err("Disabling interrupt remapping\n"); 3144 3145 return ret; 3146 } 3147 3148 static void __init free_dma_resources(void) 3149 { 3150 amd_iommu_pdom_id_destroy(); 3151 free_unity_maps(); 3152 } 3153 3154 static void __init ivinfo_init(void *ivrs) 3155 { 3156 amd_iommu_ivinfo = *((u32 *)(ivrs + IOMMU_IVINFO_OFFSET)); 3157 } 3158 3159 /* 3160 * This is the hardware init function for AMD IOMMU in the system. 3161 * This function is called either from amd_iommu_init or from the interrupt 3162 * remapping setup code. 3163 * 3164 * This function basically parses the ACPI table for AMD IOMMU (IVRS) 3165 * four times: 3166 * 3167 * 1 pass) Discover the most comprehensive IVHD type to use. 3168 * 3169 * 2 pass) Find the highest PCI device id the driver has to handle. 3170 * Upon this information the size of the data structures is 3171 * determined that needs to be allocated. 3172 * 3173 * 3 pass) Initialize the data structures just allocated with the 3174 * information in the ACPI table about available AMD IOMMUs 3175 * in the system. It also maps the PCI devices in the 3176 * system to specific IOMMUs 3177 * 3178 * 4 pass) After the basic data structures are allocated and 3179 * initialized we update them with information about memory 3180 * remapping requirements parsed out of the ACPI table in 3181 * this last pass. 3182 * 3183 * After everything is set up the IOMMUs are enabled and the necessary 3184 * hotplug and suspend notifiers are registered. 3185 */ 3186 static int __init early_amd_iommu_init(void) 3187 { 3188 struct acpi_table_header *ivrs_base; 3189 int ret; 3190 acpi_status status; 3191 u8 efr_hats, max_vasize; 3192 3193 if (!amd_iommu_detected) 3194 return -ENODEV; 3195 3196 status = acpi_get_table("IVRS", 0, &ivrs_base); 3197 if (status == AE_NOT_FOUND) 3198 return -ENODEV; 3199 else if (ACPI_FAILURE(status)) { 3200 const char *err = acpi_format_exception(status); 3201 pr_err("IVRS table error: %s\n", err); 3202 return -EINVAL; 3203 } 3204 3205 if (!boot_cpu_has(X86_FEATURE_CX16)) { 3206 pr_err("Failed to initialize. The CMPXCHG16B feature is required.\n"); 3207 ret = -EINVAL; 3208 goto out; 3209 } 3210 3211 /* 3212 * Validate checksum here so we don't need to do it when 3213 * we actually parse the table 3214 */ 3215 ret = check_ivrs_checksum(ivrs_base); 3216 if (ret) 3217 goto out; 3218 3219 ivinfo_init(ivrs_base); 3220 3221 max_vasize = FIELD_GET(IOMMU_IVINFO_VASIZE, amd_iommu_ivinfo); 3222 if (!max_vasize) 3223 max_vasize = 64; 3224 3225 amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base); 3226 DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type); 3227 3228 /* 3229 * now the data structures are allocated and basically initialized 3230 * start the real acpi table scan 3231 */ 3232 ret = init_iommu_all(ivrs_base); 3233 if (ret) 3234 goto out; 3235 3236 /* 5 level guest page table */ 3237 if (cpu_feature_enabled(X86_FEATURE_LA57) && 3238 FIELD_GET(FEATURE_GATS, amd_iommu_efr) == GUEST_PGTABLE_5_LEVEL) 3239 amd_iommu_gpt_level = PAGE_MODE_5_LEVEL; 3240 3241 efr_hats = FIELD_GET(FEATURE_HATS, amd_iommu_efr); 3242 if (efr_hats != 0x3) { 3243 /* 3244 * efr[HATS] bits specify the maximum host translation level 3245 * supported, with LEVEL 4 being initial max level. 3246 */ 3247 amd_iommu_hpt_vasize = min_t(unsigned int, max_vasize, 3248 (efr_hats + PAGE_MODE_4_LEVEL - 1) * 9 + 21); 3249 } else { 3250 pr_warn_once(FW_BUG "Disable host address translation due to invalid translation level (%#x).\n", 3251 efr_hats); 3252 amd_iommu_hatdis = true; 3253 } 3254 3255 if (amd_iommu_pgtable == PD_MODE_V2) { 3256 if (!amd_iommu_v2_pgtbl_supported()) { 3257 pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n"); 3258 amd_iommu_pgtable = PD_MODE_V1; 3259 } 3260 } 3261 3262 if (amd_iommu_hatdis) { 3263 /* 3264 * Host (v1) page table is not available. Attempt to use 3265 * Guest (v2) page table. 3266 */ 3267 if (amd_iommu_v2_pgtbl_supported()) 3268 amd_iommu_pgtable = PD_MODE_V2; 3269 else 3270 amd_iommu_pgtable = PD_MODE_NONE; 3271 } 3272 3273 /* Disable any previously enabled IOMMUs */ 3274 if (!is_kdump_kernel() || amd_iommu_disabled) 3275 disable_iommus(); 3276 3277 if (amd_iommu_irq_remap) 3278 amd_iommu_irq_remap = check_ioapic_information(); 3279 3280 if (amd_iommu_irq_remap) { 3281 struct amd_iommu_pci_seg *pci_seg; 3282 ret = -ENOMEM; 3283 for_each_pci_segment(pci_seg) { 3284 if (alloc_irq_lookup_table(pci_seg)) 3285 goto out; 3286 } 3287 } 3288 3289 ret = init_memory_definitions(ivrs_base); 3290 if (ret) 3291 goto out; 3292 3293 /* init the device table */ 3294 init_device_table(); 3295 3296 out: 3297 /* Don't leak any ACPI memory */ 3298 acpi_put_table(ivrs_base); 3299 3300 return ret; 3301 } 3302 3303 static int amd_iommu_enable_interrupts(void) 3304 { 3305 struct amd_iommu *iommu; 3306 int ret = 0; 3307 3308 for_each_iommu(iommu) { 3309 ret = iommu_init_irq(iommu); 3310 if (ret) 3311 goto out; 3312 } 3313 3314 /* 3315 * Interrupt handler is ready to process interrupts. Enable 3316 * PPR and GA log interrupt for all IOMMUs. 3317 */ 3318 enable_iommus_vapic(); 3319 enable_iommus_ppr(); 3320 3321 out: 3322 return ret; 3323 } 3324 3325 static bool __init detect_ivrs(void) 3326 { 3327 struct acpi_table_header *ivrs_base; 3328 acpi_status status; 3329 int i; 3330 3331 status = acpi_get_table("IVRS", 0, &ivrs_base); 3332 if (status == AE_NOT_FOUND) 3333 return false; 3334 else if (ACPI_FAILURE(status)) { 3335 const char *err = acpi_format_exception(status); 3336 pr_err("IVRS table error: %s\n", err); 3337 return false; 3338 } 3339 3340 acpi_put_table(ivrs_base); 3341 3342 if (amd_iommu_force_enable) 3343 goto out; 3344 3345 /* Don't use IOMMU if there is Stoney Ridge graphics */ 3346 for (i = 0; i < 32; i++) { 3347 u32 pci_id; 3348 3349 pci_id = read_pci_config(0, i, 0, 0); 3350 if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) { 3351 pr_info("Disable IOMMU on Stoney Ridge\n"); 3352 return false; 3353 } 3354 } 3355 3356 out: 3357 /* Make sure ACS will be enabled during PCI probe */ 3358 pci_request_acs(); 3359 3360 return true; 3361 } 3362 3363 static __init void iommu_snp_enable(void) 3364 { 3365 #ifdef CONFIG_KVM_AMD_SEV 3366 if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) 3367 return; 3368 /* 3369 * The SNP support requires that IOMMU must be enabled, and is 3370 * configured with V1 page table (DTE[Mode] = 0 is not supported). 3371 */ 3372 if (no_iommu || iommu_default_passthrough()) { 3373 pr_warn("SNP: IOMMU disabled or configured in passthrough mode, SNP cannot be supported.\n"); 3374 goto disable_snp; 3375 } 3376 3377 if (amd_iommu_pgtable != PD_MODE_V1) { 3378 pr_warn("SNP: IOMMU is configured with V2 page table mode, SNP cannot be supported.\n"); 3379 goto disable_snp; 3380 } 3381 3382 amd_iommu_snp_en = check_feature(FEATURE_SNP); 3383 if (!amd_iommu_snp_en) { 3384 pr_warn("SNP: IOMMU SNP feature not enabled, SNP cannot be supported.\n"); 3385 goto disable_snp; 3386 } 3387 3388 /* 3389 * Enable host SNP support once SNP support is checked on IOMMU. 3390 */ 3391 if (snp_rmptable_init()) { 3392 pr_warn("SNP: RMP initialization failed, SNP cannot be supported.\n"); 3393 goto disable_snp; 3394 } 3395 3396 pr_info("IOMMU SNP support enabled.\n"); 3397 return; 3398 3399 disable_snp: 3400 cc_platform_clear(CC_ATTR_HOST_SEV_SNP); 3401 #endif 3402 } 3403 3404 static void amd_iommu_apply_erratum_snp(void) 3405 { 3406 #ifdef CONFIG_KVM_AMD_SEV 3407 if (!amd_iommu_snp_en) 3408 return; 3409 3410 /* Errata fix for Family 0x19 */ 3411 if (boot_cpu_data.x86 != 0x19) 3412 return; 3413 3414 /* Set event log buffer size to max */ 3415 amd_iommu_evtlog_size = EVTLOG_SIZE_MAX; 3416 pr_info("Applying erratum: Increase Event log size to 0x%x\n", 3417 amd_iommu_evtlog_size); 3418 3419 /* 3420 * Set PPR log buffer size to max. 3421 * (Family 0x19, model < 0x10 doesn't support PPR when SNP is enabled). 3422 */ 3423 if (boot_cpu_data.x86_model >= 0x10) { 3424 amd_iommu_pprlog_size = PPRLOG_SIZE_MAX; 3425 pr_info("Applying erratum: Increase PPR log size to 0x%x\n", 3426 amd_iommu_pprlog_size); 3427 } 3428 #endif 3429 } 3430 3431 /**************************************************************************** 3432 * 3433 * AMD IOMMU Initialization State Machine 3434 * 3435 ****************************************************************************/ 3436 3437 static int __init state_next(void) 3438 { 3439 int ret = 0; 3440 3441 switch (init_state) { 3442 case IOMMU_START_STATE: 3443 if (!detect_ivrs()) { 3444 init_state = IOMMU_NOT_FOUND; 3445 ret = -ENODEV; 3446 } else { 3447 init_state = IOMMU_IVRS_DETECTED; 3448 } 3449 break; 3450 case IOMMU_IVRS_DETECTED: 3451 if (amd_iommu_disabled) { 3452 init_state = IOMMU_CMDLINE_DISABLED; 3453 ret = -EINVAL; 3454 } else { 3455 ret = early_amd_iommu_init(); 3456 init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED; 3457 } 3458 break; 3459 case IOMMU_ACPI_FINISHED: 3460 early_enable_iommus(); 3461 x86_platform.iommu_shutdown = disable_iommus; 3462 init_state = IOMMU_ENABLED; 3463 break; 3464 case IOMMU_ENABLED: 3465 register_syscore(&amd_iommu_syscore); 3466 iommu_snp_enable(); 3467 3468 amd_iommu_apply_erratum_snp(); 3469 3470 /* Allocate/enable event log buffer */ 3471 if (is_kdump_kernel()) 3472 ret = remap_event_buffer(); 3473 else 3474 ret = alloc_event_buffer(); 3475 3476 if (ret) { 3477 init_state = IOMMU_INIT_ERROR; 3478 break; 3479 } 3480 iommu_enable_event_buffer(); 3481 3482 ret = amd_iommu_init_pci(); 3483 init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT; 3484 break; 3485 case IOMMU_PCI_INIT: 3486 ret = amd_iommu_enable_interrupts(); 3487 init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN; 3488 break; 3489 case IOMMU_INTERRUPTS_EN: 3490 init_state = IOMMU_INITIALIZED; 3491 break; 3492 case IOMMU_INITIALIZED: 3493 /* Nothing to do */ 3494 break; 3495 case IOMMU_NOT_FOUND: 3496 case IOMMU_INIT_ERROR: 3497 case IOMMU_CMDLINE_DISABLED: 3498 /* Error states => do nothing */ 3499 ret = -EINVAL; 3500 break; 3501 default: 3502 /* Unknown state */ 3503 BUG(); 3504 } 3505 3506 if (ret) { 3507 free_dma_resources(); 3508 if (!irq_remapping_enabled) { 3509 disable_iommus(); 3510 free_iommu_resources(); 3511 } else { 3512 struct amd_iommu *iommu; 3513 struct amd_iommu_pci_seg *pci_seg; 3514 3515 for_each_pci_segment(pci_seg) 3516 uninit_device_table_dma(pci_seg); 3517 3518 for_each_iommu(iommu) 3519 amd_iommu_flush_all_caches(iommu); 3520 } 3521 } 3522 return ret; 3523 } 3524 3525 static int __init iommu_go_to_state(enum iommu_init_state state) 3526 { 3527 int ret = -EINVAL; 3528 3529 while (init_state != state) { 3530 if (init_state == IOMMU_NOT_FOUND || 3531 init_state == IOMMU_INIT_ERROR || 3532 init_state == IOMMU_CMDLINE_DISABLED) 3533 break; 3534 ret = state_next(); 3535 } 3536 3537 /* 3538 * SNP platform initilazation requires IOMMUs to be fully configured. 3539 * If the SNP support on IOMMUs has NOT been checked, simply mark SNP 3540 * as unsupported. If the SNP support on IOMMUs has been checked and 3541 * host SNP support enabled but RMP enforcement has not been enabled 3542 * in IOMMUs, then the system is in a half-baked state, but can limp 3543 * along as all memory should be Hypervisor-Owned in the RMP. WARN, 3544 * but leave SNP as "supported" to avoid confusing the kernel. 3545 */ 3546 if (ret && cc_platform_has(CC_ATTR_HOST_SEV_SNP) && 3547 !WARN_ON_ONCE(amd_iommu_snp_en)) 3548 cc_platform_clear(CC_ATTR_HOST_SEV_SNP); 3549 3550 return ret; 3551 } 3552 3553 #ifdef CONFIG_IRQ_REMAP 3554 int __init amd_iommu_prepare(void) 3555 { 3556 int ret; 3557 3558 amd_iommu_irq_remap = true; 3559 3560 ret = iommu_go_to_state(IOMMU_ACPI_FINISHED); 3561 if (ret) { 3562 amd_iommu_irq_remap = false; 3563 return ret; 3564 } 3565 3566 return amd_iommu_irq_remap ? 0 : -ENODEV; 3567 } 3568 3569 int __init amd_iommu_enable(void) 3570 { 3571 int ret; 3572 3573 ret = iommu_go_to_state(IOMMU_ENABLED); 3574 if (ret) 3575 return ret; 3576 3577 irq_remapping_enabled = 1; 3578 return amd_iommu_xt_mode; 3579 } 3580 3581 void amd_iommu_disable(void) 3582 { 3583 amd_iommu_suspend(NULL); 3584 } 3585 3586 int amd_iommu_reenable(int mode) 3587 { 3588 amd_iommu_resume(NULL); 3589 3590 return 0; 3591 } 3592 3593 int amd_iommu_enable_faulting(unsigned int cpu) 3594 { 3595 /* We enable MSI later when PCI is initialized */ 3596 return 0; 3597 } 3598 #endif 3599 3600 /* 3601 * This is the core init function for AMD IOMMU hardware in the system. 3602 * This function is called from the generic x86 DMA layer initialization 3603 * code. 3604 */ 3605 static int __init amd_iommu_init(void) 3606 { 3607 int ret; 3608 3609 ret = iommu_go_to_state(IOMMU_INITIALIZED); 3610 #ifdef CONFIG_GART_IOMMU 3611 if (ret && list_empty(&amd_iommu_list)) { 3612 /* 3613 * We failed to initialize the AMD IOMMU - try fallback 3614 * to GART if possible. 3615 */ 3616 gart_iommu_init(); 3617 } 3618 #endif 3619 3620 if (!ret) 3621 amd_iommu_debugfs_setup(); 3622 3623 return ret; 3624 } 3625 3626 static bool amd_iommu_sme_check(void) 3627 { 3628 if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) || 3629 (boot_cpu_data.x86 != 0x17)) 3630 return true; 3631 3632 /* For Fam17h, a specific level of support is required */ 3633 if (boot_cpu_data.microcode >= 0x08001205) 3634 return true; 3635 3636 if ((boot_cpu_data.microcode >= 0x08001126) && 3637 (boot_cpu_data.microcode <= 0x080011ff)) 3638 return true; 3639 3640 pr_notice("IOMMU not currently supported when SME is active\n"); 3641 3642 return false; 3643 } 3644 3645 /**************************************************************************** 3646 * 3647 * Early detect code. This code runs at IOMMU detection time in the DMA 3648 * layer. It just looks if there is an IVRS ACPI table to detect AMD 3649 * IOMMUs 3650 * 3651 ****************************************************************************/ 3652 void __init amd_iommu_detect(void) 3653 { 3654 int ret; 3655 3656 if (no_iommu || (iommu_detected && !gart_iommu_aperture)) 3657 goto disable_snp; 3658 3659 if (!amd_iommu_sme_check()) 3660 goto disable_snp; 3661 3662 ret = iommu_go_to_state(IOMMU_IVRS_DETECTED); 3663 if (ret) 3664 goto disable_snp; 3665 3666 amd_iommu_detected = true; 3667 iommu_detected = 1; 3668 x86_init.iommu.iommu_init = amd_iommu_init; 3669 return; 3670 3671 disable_snp: 3672 if (cc_platform_has(CC_ATTR_HOST_SEV_SNP)) 3673 cc_platform_clear(CC_ATTR_HOST_SEV_SNP); 3674 } 3675 3676 /**************************************************************************** 3677 * 3678 * Parsing functions for the AMD IOMMU specific kernel command line 3679 * options. 3680 * 3681 ****************************************************************************/ 3682 3683 static int __init parse_amd_iommu_dump(char *str) 3684 { 3685 amd_iommu_dump = true; 3686 3687 return 1; 3688 } 3689 3690 static int __init parse_amd_iommu_intr(char *str) 3691 { 3692 for (; *str; ++str) { 3693 if (strncmp(str, "legacy", 6) == 0) { 3694 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; 3695 break; 3696 } 3697 if (strncmp(str, "vapic", 5) == 0) { 3698 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC; 3699 break; 3700 } 3701 } 3702 return 1; 3703 } 3704 3705 static int __init parse_amd_iommu_options(char *str) 3706 { 3707 if (!str) 3708 return -EINVAL; 3709 3710 while (*str) { 3711 if (strncmp(str, "fullflush", 9) == 0) { 3712 pr_warn("amd_iommu=fullflush deprecated; use iommu.strict=1 instead\n"); 3713 iommu_set_dma_strict(); 3714 } else if (strncmp(str, "force_enable", 12) == 0) { 3715 amd_iommu_force_enable = true; 3716 } else if (strncmp(str, "off", 3) == 0) { 3717 amd_iommu_disabled = true; 3718 } else if (strncmp(str, "force_isolation", 15) == 0) { 3719 amd_iommu_force_isolation = true; 3720 } else if (strncmp(str, "pgtbl_v1", 8) == 0) { 3721 amd_iommu_pgtable = PD_MODE_V1; 3722 } else if (strncmp(str, "pgtbl_v2", 8) == 0) { 3723 amd_iommu_pgtable = PD_MODE_V2; 3724 } else if (strncmp(str, "irtcachedis", 11) == 0) { 3725 amd_iommu_irtcachedis = true; 3726 } else if (strncmp(str, "nohugepages", 11) == 0) { 3727 pr_info("Restricting V1 page-sizes to 4KiB"); 3728 amd_iommu_pgsize_bitmap = AMD_IOMMU_PGSIZES_4K; 3729 } else if (strncmp(str, "v2_pgsizes_only", 15) == 0) { 3730 pr_info("Restricting V1 page-sizes to 4KiB/2MiB/1GiB"); 3731 amd_iommu_pgsize_bitmap = AMD_IOMMU_PGSIZES_V2; 3732 } else { 3733 pr_notice("Unknown option - '%s'\n", str); 3734 } 3735 3736 str += strcspn(str, ","); 3737 while (*str == ',') 3738 str++; 3739 } 3740 3741 return 1; 3742 } 3743 3744 static int __init parse_ivrs_ioapic(char *str) 3745 { 3746 u32 seg = 0, bus, dev, fn; 3747 int id, i; 3748 u32 devid; 3749 3750 if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 || 3751 sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) 3752 goto found; 3753 3754 if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 || 3755 sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) { 3756 pr_warn("ivrs_ioapic%s option format deprecated; use ivrs_ioapic=%d@%04x:%02x:%02x.%d instead\n", 3757 str, id, seg, bus, dev, fn); 3758 goto found; 3759 } 3760 3761 pr_err("Invalid command line: ivrs_ioapic%s\n", str); 3762 return 1; 3763 3764 found: 3765 if (early_ioapic_map_size == EARLY_MAP_SIZE) { 3766 pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n", 3767 str); 3768 return 1; 3769 } 3770 3771 devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn); 3772 3773 cmdline_maps = true; 3774 i = early_ioapic_map_size++; 3775 early_ioapic_map[i].id = id; 3776 early_ioapic_map[i].devid = devid; 3777 early_ioapic_map[i].cmd_line = true; 3778 3779 return 1; 3780 } 3781 3782 static int __init parse_ivrs_hpet(char *str) 3783 { 3784 u32 seg = 0, bus, dev, fn; 3785 int id, i; 3786 u32 devid; 3787 3788 if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 || 3789 sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) 3790 goto found; 3791 3792 if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 || 3793 sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) { 3794 pr_warn("ivrs_hpet%s option format deprecated; use ivrs_hpet=%d@%04x:%02x:%02x.%d instead\n", 3795 str, id, seg, bus, dev, fn); 3796 goto found; 3797 } 3798 3799 pr_err("Invalid command line: ivrs_hpet%s\n", str); 3800 return 1; 3801 3802 found: 3803 if (early_hpet_map_size == EARLY_MAP_SIZE) { 3804 pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n", 3805 str); 3806 return 1; 3807 } 3808 3809 devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn); 3810 3811 cmdline_maps = true; 3812 i = early_hpet_map_size++; 3813 early_hpet_map[i].id = id; 3814 early_hpet_map[i].devid = devid; 3815 early_hpet_map[i].cmd_line = true; 3816 3817 return 1; 3818 } 3819 3820 #define ACPIID_LEN (ACPIHID_UID_LEN + ACPIHID_HID_LEN) 3821 3822 static int __init parse_ivrs_acpihid(char *str) 3823 { 3824 u32 seg = 0, bus, dev, fn; 3825 char *hid, *uid, *p, *addr; 3826 char acpiid[ACPIID_LEN + 1] = { }; /* size with NULL terminator */ 3827 int i; 3828 3829 addr = strchr(str, '@'); 3830 if (!addr) { 3831 addr = strchr(str, '='); 3832 if (!addr) 3833 goto not_found; 3834 3835 ++addr; 3836 3837 if (strlen(addr) > ACPIID_LEN) 3838 goto not_found; 3839 3840 if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 || 3841 sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) { 3842 pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n", 3843 str, acpiid, seg, bus, dev, fn); 3844 goto found; 3845 } 3846 goto not_found; 3847 } 3848 3849 /* We have the '@', make it the terminator to get just the acpiid */ 3850 *addr++ = 0; 3851 3852 if (strlen(str) > ACPIID_LEN) 3853 goto not_found; 3854 3855 if (sscanf(str, "=%s", acpiid) != 1) 3856 goto not_found; 3857 3858 if (sscanf(addr, "%x:%x.%x", &bus, &dev, &fn) == 3 || 3859 sscanf(addr, "%x:%x:%x.%x", &seg, &bus, &dev, &fn) == 4) 3860 goto found; 3861 3862 not_found: 3863 pr_err("Invalid command line: ivrs_acpihid%s\n", str); 3864 return 1; 3865 3866 found: 3867 p = acpiid; 3868 hid = strsep(&p, ":"); 3869 uid = p; 3870 3871 if (!hid || !(*hid) || !uid) { 3872 pr_err("Invalid command line: hid or uid\n"); 3873 return 1; 3874 } 3875 3876 /* 3877 * Ignore leading zeroes after ':', so e.g., AMDI0095:00 3878 * will match AMDI0095:0 in the second strcmp in acpi_dev_hid_uid_match 3879 */ 3880 while (*uid == '0' && *(uid + 1)) 3881 uid++; 3882 3883 if (strlen(hid) >= ACPIHID_HID_LEN) { 3884 pr_err("Invalid command line: hid is too long\n"); 3885 return 1; 3886 } else if (strlen(uid) >= ACPIHID_UID_LEN) { 3887 pr_err("Invalid command line: uid is too long\n"); 3888 return 1; 3889 } 3890 3891 i = early_acpihid_map_size++; 3892 memcpy(early_acpihid_map[i].hid, hid, strlen(hid)); 3893 memcpy(early_acpihid_map[i].uid, uid, strlen(uid)); 3894 early_acpihid_map[i].devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn); 3895 early_acpihid_map[i].cmd_line = true; 3896 3897 return 1; 3898 } 3899 3900 __setup("amd_iommu_dump", parse_amd_iommu_dump); 3901 __setup("amd_iommu=", parse_amd_iommu_options); 3902 __setup("amd_iommu_intr=", parse_amd_iommu_intr); 3903 __setup("ivrs_ioapic", parse_ivrs_ioapic); 3904 __setup("ivrs_hpet", parse_ivrs_hpet); 3905 __setup("ivrs_acpihid", parse_ivrs_acpihid); 3906 3907 bool amd_iommu_pasid_supported(void) 3908 { 3909 /* CPU page table size should match IOMMU guest page table size */ 3910 if (cpu_feature_enabled(X86_FEATURE_LA57) && 3911 amd_iommu_gpt_level != PAGE_MODE_5_LEVEL) 3912 return false; 3913 3914 /* 3915 * Since DTE[Mode]=0 is prohibited on SNP-enabled system 3916 * (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without 3917 * setting up IOMMUv1 page table. 3918 */ 3919 return amd_iommu_gt_ppr_supported() && !amd_iommu_snp_en; 3920 } 3921 3922 struct amd_iommu *get_amd_iommu(unsigned int idx) 3923 { 3924 unsigned int i = 0; 3925 struct amd_iommu *iommu; 3926 3927 for_each_iommu(iommu) 3928 if (i++ == idx) 3929 return iommu; 3930 return NULL; 3931 } 3932 3933 /**************************************************************************** 3934 * 3935 * IOMMU EFR Performance Counter support functionality. This code allows 3936 * access to the IOMMU PC functionality. 3937 * 3938 ****************************************************************************/ 3939 3940 u8 amd_iommu_pc_get_max_banks(unsigned int idx) 3941 { 3942 struct amd_iommu *iommu = get_amd_iommu(idx); 3943 3944 if (iommu) 3945 return iommu->max_banks; 3946 3947 return 0; 3948 } 3949 3950 bool amd_iommu_pc_supported(void) 3951 { 3952 return amd_iommu_pc_present; 3953 } 3954 3955 u8 amd_iommu_pc_get_max_counters(unsigned int idx) 3956 { 3957 struct amd_iommu *iommu = get_amd_iommu(idx); 3958 3959 if (iommu) 3960 return iommu->max_counters; 3961 3962 return 0; 3963 } 3964 3965 static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, 3966 u8 fxn, u64 *value, bool is_write) 3967 { 3968 u32 offset; 3969 u32 max_offset_lim; 3970 3971 /* Make sure the IOMMU PC resource is available */ 3972 if (!amd_iommu_pc_present) 3973 return -ENODEV; 3974 3975 /* Check for valid iommu and pc register indexing */ 3976 if (WARN_ON(!iommu || (fxn > 0x28) || (fxn & 7))) 3977 return -ENODEV; 3978 3979 offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn); 3980 3981 /* Limit the offset to the hw defined mmio region aperture */ 3982 max_offset_lim = (u32)(((0x40 | iommu->max_banks) << 12) | 3983 (iommu->max_counters << 8) | 0x28); 3984 if ((offset < MMIO_CNTR_REG_OFFSET) || 3985 (offset > max_offset_lim)) 3986 return -EINVAL; 3987 3988 if (is_write) { 3989 u64 val = *value & GENMASK_ULL(47, 0); 3990 3991 writel((u32)val, iommu->mmio_base + offset); 3992 writel((val >> 32), iommu->mmio_base + offset + 4); 3993 } else { 3994 *value = readl(iommu->mmio_base + offset + 4); 3995 *value <<= 32; 3996 *value |= readl(iommu->mmio_base + offset); 3997 *value &= GENMASK_ULL(47, 0); 3998 } 3999 4000 return 0; 4001 } 4002 4003 int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value) 4004 { 4005 if (!iommu) 4006 return -EINVAL; 4007 4008 return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false); 4009 } 4010 4011 int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value) 4012 { 4013 if (!iommu) 4014 return -EINVAL; 4015 4016 return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true); 4017 } 4018 4019 #ifdef CONFIG_KVM_AMD_SEV 4020 static int iommu_page_make_shared(void *page) 4021 { 4022 unsigned long paddr, pfn; 4023 4024 paddr = iommu_virt_to_phys(page); 4025 /* Cbit maybe set in the paddr */ 4026 pfn = __sme_clr(paddr) >> PAGE_SHIFT; 4027 4028 if (!(pfn % PTRS_PER_PMD)) { 4029 int ret, level; 4030 bool assigned; 4031 4032 ret = snp_lookup_rmpentry(pfn, &assigned, &level); 4033 if (ret) { 4034 pr_warn("IOMMU PFN %lx RMP lookup failed, ret %d\n", pfn, ret); 4035 return ret; 4036 } 4037 4038 if (!assigned) { 4039 pr_warn("IOMMU PFN %lx not assigned in RMP table\n", pfn); 4040 return -EINVAL; 4041 } 4042 4043 if (level > PG_LEVEL_4K) { 4044 ret = psmash(pfn); 4045 if (!ret) 4046 goto done; 4047 4048 pr_warn("PSMASH failed for IOMMU PFN %lx huge RMP entry, ret: %d, level: %d\n", 4049 pfn, ret, level); 4050 return ret; 4051 } 4052 } 4053 4054 done: 4055 return rmp_make_shared(pfn, PG_LEVEL_4K); 4056 } 4057 4058 static int iommu_make_shared(void *va, size_t size) 4059 { 4060 void *page; 4061 int ret; 4062 4063 if (!va) 4064 return 0; 4065 4066 for (page = va; page < (va + size); page += PAGE_SIZE) { 4067 ret = iommu_page_make_shared(page); 4068 if (ret) 4069 return ret; 4070 } 4071 4072 return 0; 4073 } 4074 4075 int amd_iommu_snp_disable(void) 4076 { 4077 struct amd_iommu *iommu; 4078 int ret; 4079 4080 if (!amd_iommu_snp_en) 4081 return 0; 4082 4083 for_each_iommu(iommu) { 4084 ret = iommu_make_shared(iommu->evt_buf, amd_iommu_evtlog_size); 4085 if (ret) 4086 return ret; 4087 4088 ret = iommu_make_shared(iommu->ppr_log, amd_iommu_pprlog_size); 4089 if (ret) 4090 return ret; 4091 4092 ret = iommu_make_shared((void *)iommu->cmd_sem, PAGE_SIZE); 4093 if (ret) 4094 return ret; 4095 } 4096 4097 return 0; 4098 } 4099 EXPORT_SYMBOL_GPL(amd_iommu_snp_disable); 4100 4101 bool amd_iommu_sev_tio_supported(void) 4102 { 4103 return check_feature2(FEATURE_SEVSNPIO_SUP); 4104 } 4105 EXPORT_SYMBOL_GPL(amd_iommu_sev_tio_supported); 4106 #endif 4107