1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. 4 * Author: Joerg Roedel <jroedel@suse.de> 5 * Leo Duran <leo.duran@amd.com> 6 */ 7 8 #define pr_fmt(fmt) "AMD-Vi: " fmt 9 #define dev_fmt(fmt) pr_fmt(fmt) 10 11 #include <linux/pci.h> 12 #include <linux/acpi.h> 13 #include <linux/list.h> 14 #include <linux/bitmap.h> 15 #include <linux/syscore_ops.h> 16 #include <linux/interrupt.h> 17 #include <linux/msi.h> 18 #include <linux/irq.h> 19 #include <linux/amd-iommu.h> 20 #include <linux/export.h> 21 #include <linux/kmemleak.h> 22 #include <linux/cc_platform.h> 23 #include <linux/iopoll.h> 24 #include <asm/pci-direct.h> 25 #include <asm/iommu.h> 26 #include <asm/apic.h> 27 #include <asm/gart.h> 28 #include <asm/x86_init.h> 29 #include <asm/io_apic.h> 30 #include <asm/irq_remapping.h> 31 #include <asm/set_memory.h> 32 #include <asm/sev.h> 33 34 #include <linux/crash_dump.h> 35 36 #include "amd_iommu.h" 37 #include "../irq_remapping.h" 38 #include "../iommu-pages.h" 39 40 /* 41 * definitions for the ACPI scanning code 42 */ 43 #define IVRS_HEADER_LENGTH 48 44 45 #define ACPI_IVHD_TYPE_MAX_SUPPORTED 0x40 46 #define ACPI_IVMD_TYPE_ALL 0x20 47 #define ACPI_IVMD_TYPE 0x21 48 #define ACPI_IVMD_TYPE_RANGE 0x22 49 50 #define IVHD_DEV_ALL 0x01 51 #define IVHD_DEV_SELECT 0x02 52 #define IVHD_DEV_SELECT_RANGE_START 0x03 53 #define IVHD_DEV_RANGE_END 0x04 54 #define IVHD_DEV_ALIAS 0x42 55 #define IVHD_DEV_ALIAS_RANGE 0x43 56 #define IVHD_DEV_EXT_SELECT 0x46 57 #define IVHD_DEV_EXT_SELECT_RANGE 0x47 58 #define IVHD_DEV_SPECIAL 0x48 59 #define IVHD_DEV_ACPI_HID 0xf0 60 61 #define UID_NOT_PRESENT 0 62 #define UID_IS_INTEGER 1 63 #define UID_IS_CHARACTER 2 64 65 #define IVHD_SPECIAL_IOAPIC 1 66 #define IVHD_SPECIAL_HPET 2 67 68 #define IVHD_FLAG_HT_TUN_EN_MASK 0x01 69 #define IVHD_FLAG_PASSPW_EN_MASK 0x02 70 #define IVHD_FLAG_RESPASSPW_EN_MASK 0x04 71 #define IVHD_FLAG_ISOC_EN_MASK 0x08 72 73 #define IVMD_FLAG_EXCL_RANGE 0x08 74 #define IVMD_FLAG_IW 0x04 75 #define IVMD_FLAG_IR 0x02 76 #define IVMD_FLAG_UNITY_MAP 0x01 77 78 #define ACPI_DEVFLAG_INITPASS 0x01 79 #define ACPI_DEVFLAG_EXTINT 0x02 80 #define ACPI_DEVFLAG_NMI 0x04 81 #define ACPI_DEVFLAG_SYSMGT1 0x10 82 #define ACPI_DEVFLAG_SYSMGT2 0x20 83 #define ACPI_DEVFLAG_LINT0 0x40 84 #define ACPI_DEVFLAG_LINT1 0x80 85 #define ACPI_DEVFLAG_ATSDIS 0x10000000 86 87 #define IVRS_GET_SBDF_ID(seg, bus, dev, fn) (((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \ 88 | ((dev & 0x1f) << 3) | (fn & 0x7)) 89 90 /* 91 * ACPI table definitions 92 * 93 * These data structures are laid over the table to parse the important values 94 * out of it. 95 */ 96 97 /* 98 * structure describing one IOMMU in the ACPI table. Typically followed by one 99 * or more ivhd_entrys. 100 */ 101 struct ivhd_header { 102 u8 type; 103 u8 flags; 104 u16 length; 105 u16 devid; 106 u16 cap_ptr; 107 u64 mmio_phys; 108 u16 pci_seg; 109 u16 info; 110 u32 efr_attr; 111 112 /* Following only valid on IVHD type 11h and 40h */ 113 u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */ 114 u64 efr_reg2; 115 } __attribute__((packed)); 116 117 /* 118 * A device entry describing which devices a specific IOMMU translates and 119 * which requestor ids they use. 120 */ 121 struct ivhd_entry { 122 u8 type; 123 u16 devid; 124 u8 flags; 125 struct_group(ext_hid, 126 u32 ext; 127 u32 hidh; 128 ); 129 u64 cid; 130 u8 uidf; 131 u8 uidl; 132 u8 uid; 133 } __attribute__((packed)); 134 135 /* 136 * An AMD IOMMU memory definition structure. It defines things like exclusion 137 * ranges for devices and regions that should be unity mapped. 138 */ 139 struct ivmd_header { 140 u8 type; 141 u8 flags; 142 u16 length; 143 u16 devid; 144 u16 aux; 145 u16 pci_seg; 146 u8 resv[6]; 147 u64 range_start; 148 u64 range_length; 149 } __attribute__((packed)); 150 151 bool amd_iommu_dump; 152 bool amd_iommu_irq_remap __read_mostly; 153 154 enum protection_domain_mode amd_iommu_pgtable = PD_MODE_V1; 155 /* Guest page table level */ 156 int amd_iommu_gpt_level = PAGE_MODE_4_LEVEL; 157 158 int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC; 159 static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE; 160 161 static bool amd_iommu_detected; 162 static bool amd_iommu_disabled __initdata; 163 static bool amd_iommu_force_enable __initdata; 164 static bool amd_iommu_irtcachedis; 165 static int amd_iommu_target_ivhd_type; 166 167 /* Global EFR and EFR2 registers */ 168 u64 amd_iommu_efr; 169 u64 amd_iommu_efr2; 170 171 /* SNP is enabled on the system? */ 172 bool amd_iommu_snp_en; 173 EXPORT_SYMBOL(amd_iommu_snp_en); 174 175 LIST_HEAD(amd_iommu_pci_seg_list); /* list of all PCI segments */ 176 LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the system */ 177 LIST_HEAD(amd_ivhd_dev_flags_list); /* list of all IVHD device entry settings */ 178 179 /* Number of IOMMUs present in the system */ 180 static int amd_iommus_present; 181 182 /* IOMMUs have a non-present cache? */ 183 bool amd_iommu_np_cache __read_mostly; 184 bool amd_iommu_iotlb_sup __read_mostly = true; 185 186 static bool amd_iommu_pc_present __read_mostly; 187 bool amdr_ivrs_remap_support __read_mostly; 188 189 bool amd_iommu_force_isolation __read_mostly; 190 191 unsigned long amd_iommu_pgsize_bitmap __ro_after_init = AMD_IOMMU_PGSIZES; 192 193 enum iommu_init_state { 194 IOMMU_START_STATE, 195 IOMMU_IVRS_DETECTED, 196 IOMMU_ACPI_FINISHED, 197 IOMMU_ENABLED, 198 IOMMU_PCI_INIT, 199 IOMMU_INTERRUPTS_EN, 200 IOMMU_INITIALIZED, 201 IOMMU_NOT_FOUND, 202 IOMMU_INIT_ERROR, 203 IOMMU_CMDLINE_DISABLED, 204 }; 205 206 /* Early ioapic and hpet maps from kernel command line */ 207 #define EARLY_MAP_SIZE 4 208 static struct devid_map __initdata early_ioapic_map[EARLY_MAP_SIZE]; 209 static struct devid_map __initdata early_hpet_map[EARLY_MAP_SIZE]; 210 static struct acpihid_map_entry __initdata early_acpihid_map[EARLY_MAP_SIZE]; 211 212 static int __initdata early_ioapic_map_size; 213 static int __initdata early_hpet_map_size; 214 static int __initdata early_acpihid_map_size; 215 216 static bool __initdata cmdline_maps; 217 218 static enum iommu_init_state init_state = IOMMU_START_STATE; 219 220 static int amd_iommu_enable_interrupts(void); 221 static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg); 222 223 static bool amd_iommu_pre_enabled = true; 224 225 static u32 amd_iommu_ivinfo __initdata; 226 227 bool translation_pre_enabled(struct amd_iommu *iommu) 228 { 229 return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED); 230 } 231 232 static void clear_translation_pre_enabled(struct amd_iommu *iommu) 233 { 234 iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED; 235 } 236 237 static void init_translation_status(struct amd_iommu *iommu) 238 { 239 u64 ctrl; 240 241 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 242 if (ctrl & (1<<CONTROL_IOMMU_EN)) 243 iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED; 244 } 245 246 static inline unsigned long tbl_size(int entry_size, int last_bdf) 247 { 248 unsigned shift = PAGE_SHIFT + 249 get_order((last_bdf + 1) * entry_size); 250 251 return 1UL << shift; 252 } 253 254 int amd_iommu_get_num_iommus(void) 255 { 256 return amd_iommus_present; 257 } 258 259 /* 260 * Iterate through all the IOMMUs to get common EFR 261 * masks among all IOMMUs and warn if found inconsistency. 262 */ 263 static __init void get_global_efr(void) 264 { 265 struct amd_iommu *iommu; 266 267 for_each_iommu(iommu) { 268 u64 tmp = iommu->features; 269 u64 tmp2 = iommu->features2; 270 271 if (list_is_first(&iommu->list, &amd_iommu_list)) { 272 amd_iommu_efr = tmp; 273 amd_iommu_efr2 = tmp2; 274 continue; 275 } 276 277 if (amd_iommu_efr == tmp && 278 amd_iommu_efr2 == tmp2) 279 continue; 280 281 pr_err(FW_BUG 282 "Found inconsistent EFR/EFR2 %#llx,%#llx (global %#llx,%#llx) on iommu%d (%04x:%02x:%02x.%01x).\n", 283 tmp, tmp2, amd_iommu_efr, amd_iommu_efr2, 284 iommu->index, iommu->pci_seg->id, 285 PCI_BUS_NUM(iommu->devid), PCI_SLOT(iommu->devid), 286 PCI_FUNC(iommu->devid)); 287 288 amd_iommu_efr &= tmp; 289 amd_iommu_efr2 &= tmp2; 290 } 291 292 pr_info("Using global IVHD EFR:%#llx, EFR2:%#llx\n", amd_iommu_efr, amd_iommu_efr2); 293 } 294 295 /* 296 * For IVHD type 0x11/0x40, EFR is also available via IVHD. 297 * Default to IVHD EFR since it is available sooner 298 * (i.e. before PCI init). 299 */ 300 static void __init early_iommu_features_init(struct amd_iommu *iommu, 301 struct ivhd_header *h) 302 { 303 if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP) { 304 iommu->features = h->efr_reg; 305 iommu->features2 = h->efr_reg2; 306 } 307 if (amd_iommu_ivinfo & IOMMU_IVINFO_DMA_REMAP) 308 amdr_ivrs_remap_support = true; 309 } 310 311 /* Access to l1 and l2 indexed register spaces */ 312 313 static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address) 314 { 315 u32 val; 316 317 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); 318 pci_read_config_dword(iommu->dev, 0xfc, &val); 319 return val; 320 } 321 322 static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val) 323 { 324 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31)); 325 pci_write_config_dword(iommu->dev, 0xfc, val); 326 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); 327 } 328 329 static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address) 330 { 331 u32 val; 332 333 pci_write_config_dword(iommu->dev, 0xf0, address); 334 pci_read_config_dword(iommu->dev, 0xf4, &val); 335 return val; 336 } 337 338 static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val) 339 { 340 pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8)); 341 pci_write_config_dword(iommu->dev, 0xf4, val); 342 } 343 344 /**************************************************************************** 345 * 346 * AMD IOMMU MMIO register space handling functions 347 * 348 * These functions are used to program the IOMMU device registers in 349 * MMIO space required for that driver. 350 * 351 ****************************************************************************/ 352 353 /* 354 * This function set the exclusion range in the IOMMU. DMA accesses to the 355 * exclusion range are passed through untranslated 356 */ 357 static void iommu_set_exclusion_range(struct amd_iommu *iommu) 358 { 359 u64 start = iommu->exclusion_start & PAGE_MASK; 360 u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK; 361 u64 entry; 362 363 if (!iommu->exclusion_start) 364 return; 365 366 entry = start | MMIO_EXCL_ENABLE_MASK; 367 memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET, 368 &entry, sizeof(entry)); 369 370 entry = limit; 371 memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET, 372 &entry, sizeof(entry)); 373 } 374 375 static void iommu_set_cwwb_range(struct amd_iommu *iommu) 376 { 377 u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem); 378 u64 entry = start & PM_ADDR_MASK; 379 380 if (!check_feature(FEATURE_SNP)) 381 return; 382 383 /* Note: 384 * Re-purpose Exclusion base/limit registers for Completion wait 385 * write-back base/limit. 386 */ 387 memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET, 388 &entry, sizeof(entry)); 389 390 /* Note: 391 * Default to 4 Kbytes, which can be specified by setting base 392 * address equal to the limit address. 393 */ 394 memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET, 395 &entry, sizeof(entry)); 396 } 397 398 /* Programs the physical address of the device table into the IOMMU hardware */ 399 static void iommu_set_device_table(struct amd_iommu *iommu) 400 { 401 u64 entry; 402 u32 dev_table_size = iommu->pci_seg->dev_table_size; 403 void *dev_table = (void *)get_dev_table(iommu); 404 405 BUG_ON(iommu->mmio_base == NULL); 406 407 entry = iommu_virt_to_phys(dev_table); 408 entry |= (dev_table_size >> 12) - 1; 409 memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET, 410 &entry, sizeof(entry)); 411 } 412 413 static void iommu_feature_set(struct amd_iommu *iommu, u64 val, u64 mask, u8 shift) 414 { 415 u64 ctrl; 416 417 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 418 mask <<= shift; 419 ctrl &= ~mask; 420 ctrl |= (val << shift) & mask; 421 writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); 422 } 423 424 /* Generic functions to enable/disable certain features of the IOMMU. */ 425 void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) 426 { 427 iommu_feature_set(iommu, 1ULL, 1ULL, bit); 428 } 429 430 static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit) 431 { 432 iommu_feature_set(iommu, 0ULL, 1ULL, bit); 433 } 434 435 /* Function to enable the hardware */ 436 static void iommu_enable(struct amd_iommu *iommu) 437 { 438 iommu_feature_enable(iommu, CONTROL_IOMMU_EN); 439 } 440 441 static void iommu_disable(struct amd_iommu *iommu) 442 { 443 if (!iommu->mmio_base) 444 return; 445 446 /* Disable command buffer */ 447 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); 448 449 /* Disable event logging and event interrupts */ 450 iommu_feature_disable(iommu, CONTROL_EVT_INT_EN); 451 iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN); 452 453 /* Disable IOMMU GA_LOG */ 454 iommu_feature_disable(iommu, CONTROL_GALOG_EN); 455 iommu_feature_disable(iommu, CONTROL_GAINT_EN); 456 457 /* Disable IOMMU PPR logging */ 458 iommu_feature_disable(iommu, CONTROL_PPRLOG_EN); 459 iommu_feature_disable(iommu, CONTROL_PPRINT_EN); 460 461 /* Disable IOMMU hardware itself */ 462 iommu_feature_disable(iommu, CONTROL_IOMMU_EN); 463 464 /* Clear IRTE cache disabling bit */ 465 iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS); 466 } 467 468 /* 469 * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in 470 * the system has one. 471 */ 472 static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end) 473 { 474 if (!request_mem_region(address, end, "amd_iommu")) { 475 pr_err("Can not reserve memory region %llx-%llx for mmio\n", 476 address, end); 477 pr_err("This is a BIOS bug. Please contact your hardware vendor\n"); 478 return NULL; 479 } 480 481 return (u8 __iomem *)ioremap(address, end); 482 } 483 484 static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) 485 { 486 if (iommu->mmio_base) 487 iounmap(iommu->mmio_base); 488 release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end); 489 } 490 491 static inline u32 get_ivhd_header_size(struct ivhd_header *h) 492 { 493 u32 size = 0; 494 495 switch (h->type) { 496 case 0x10: 497 size = 24; 498 break; 499 case 0x11: 500 case 0x40: 501 size = 40; 502 break; 503 } 504 return size; 505 } 506 507 /**************************************************************************** 508 * 509 * The functions below belong to the first pass of AMD IOMMU ACPI table 510 * parsing. In this pass we try to find out the highest device id this 511 * code has to handle. Upon this information the size of the shared data 512 * structures is determined later. 513 * 514 ****************************************************************************/ 515 516 /* 517 * This function calculates the length of a given IVHD entry 518 */ 519 static inline int ivhd_entry_length(u8 *ivhd) 520 { 521 u32 type = ((struct ivhd_entry *)ivhd)->type; 522 523 if (type < 0x80) { 524 return 0x04 << (*ivhd >> 6); 525 } else if (type == IVHD_DEV_ACPI_HID) { 526 /* For ACPI_HID, offset 21 is uid len */ 527 return *((u8 *)ivhd + 21) + 22; 528 } 529 return 0; 530 } 531 532 /* 533 * After reading the highest device id from the IOMMU PCI capability header 534 * this function looks if there is a higher device id defined in the ACPI table 535 */ 536 static int __init find_last_devid_from_ivhd(struct ivhd_header *h) 537 { 538 u8 *p = (void *)h, *end = (void *)h; 539 struct ivhd_entry *dev; 540 int last_devid = -EINVAL; 541 542 u32 ivhd_size = get_ivhd_header_size(h); 543 544 if (!ivhd_size) { 545 pr_err("Unsupported IVHD type %#x\n", h->type); 546 return -EINVAL; 547 } 548 549 p += ivhd_size; 550 end += h->length; 551 552 while (p < end) { 553 dev = (struct ivhd_entry *)p; 554 switch (dev->type) { 555 case IVHD_DEV_ALL: 556 /* Use maximum BDF value for DEV_ALL */ 557 return 0xffff; 558 case IVHD_DEV_SELECT: 559 case IVHD_DEV_RANGE_END: 560 case IVHD_DEV_ALIAS: 561 case IVHD_DEV_EXT_SELECT: 562 /* all the above subfield types refer to device ids */ 563 if (dev->devid > last_devid) 564 last_devid = dev->devid; 565 break; 566 default: 567 break; 568 } 569 p += ivhd_entry_length(p); 570 } 571 572 WARN_ON(p != end); 573 574 return last_devid; 575 } 576 577 static int __init check_ivrs_checksum(struct acpi_table_header *table) 578 { 579 int i; 580 u8 checksum = 0, *p = (u8 *)table; 581 582 for (i = 0; i < table->length; ++i) 583 checksum += p[i]; 584 if (checksum != 0) { 585 /* ACPI table corrupt */ 586 pr_err(FW_BUG "IVRS invalid checksum\n"); 587 return -ENODEV; 588 } 589 590 return 0; 591 } 592 593 /* 594 * Iterate over all IVHD entries in the ACPI table and find the highest device 595 * id which we need to handle. This is the first of three functions which parse 596 * the ACPI table. So we check the checksum here. 597 */ 598 static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_seg) 599 { 600 u8 *p = (u8 *)table, *end = (u8 *)table; 601 struct ivhd_header *h; 602 int last_devid, last_bdf = 0; 603 604 p += IVRS_HEADER_LENGTH; 605 606 end += table->length; 607 while (p < end) { 608 h = (struct ivhd_header *)p; 609 if (h->pci_seg == pci_seg && 610 h->type == amd_iommu_target_ivhd_type) { 611 last_devid = find_last_devid_from_ivhd(h); 612 613 if (last_devid < 0) 614 return -EINVAL; 615 if (last_devid > last_bdf) 616 last_bdf = last_devid; 617 } 618 p += h->length; 619 } 620 WARN_ON(p != end); 621 622 return last_bdf; 623 } 624 625 /**************************************************************************** 626 * 627 * The following functions belong to the code path which parses the ACPI table 628 * the second time. In this ACPI parsing iteration we allocate IOMMU specific 629 * data structures, initialize the per PCI segment device/alias/rlookup table 630 * and also basically initialize the hardware. 631 * 632 ****************************************************************************/ 633 634 /* Allocate per PCI segment device table */ 635 static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg) 636 { 637 pci_seg->dev_table = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32, 638 get_order(pci_seg->dev_table_size)); 639 if (!pci_seg->dev_table) 640 return -ENOMEM; 641 642 return 0; 643 } 644 645 static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg) 646 { 647 iommu_free_pages(pci_seg->dev_table, 648 get_order(pci_seg->dev_table_size)); 649 pci_seg->dev_table = NULL; 650 } 651 652 /* Allocate per PCI segment IOMMU rlookup table. */ 653 static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg) 654 { 655 pci_seg->rlookup_table = iommu_alloc_pages(GFP_KERNEL, 656 get_order(pci_seg->rlookup_table_size)); 657 if (pci_seg->rlookup_table == NULL) 658 return -ENOMEM; 659 660 return 0; 661 } 662 663 static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg) 664 { 665 iommu_free_pages(pci_seg->rlookup_table, 666 get_order(pci_seg->rlookup_table_size)); 667 pci_seg->rlookup_table = NULL; 668 } 669 670 static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) 671 { 672 pci_seg->irq_lookup_table = iommu_alloc_pages(GFP_KERNEL, 673 get_order(pci_seg->rlookup_table_size)); 674 kmemleak_alloc(pci_seg->irq_lookup_table, 675 pci_seg->rlookup_table_size, 1, GFP_KERNEL); 676 if (pci_seg->irq_lookup_table == NULL) 677 return -ENOMEM; 678 679 return 0; 680 } 681 682 static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) 683 { 684 kmemleak_free(pci_seg->irq_lookup_table); 685 iommu_free_pages(pci_seg->irq_lookup_table, 686 get_order(pci_seg->rlookup_table_size)); 687 pci_seg->irq_lookup_table = NULL; 688 } 689 690 static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg) 691 { 692 int i; 693 694 pci_seg->alias_table = iommu_alloc_pages(GFP_KERNEL, 695 get_order(pci_seg->alias_table_size)); 696 if (!pci_seg->alias_table) 697 return -ENOMEM; 698 699 /* 700 * let all alias entries point to itself 701 */ 702 for (i = 0; i <= pci_seg->last_bdf; ++i) 703 pci_seg->alias_table[i] = i; 704 705 return 0; 706 } 707 708 static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg) 709 { 710 iommu_free_pages(pci_seg->alias_table, 711 get_order(pci_seg->alias_table_size)); 712 pci_seg->alias_table = NULL; 713 } 714 715 /* 716 * Allocates the command buffer. This buffer is per AMD IOMMU. We can 717 * write commands to that buffer later and the IOMMU will execute them 718 * asynchronously 719 */ 720 static int __init alloc_command_buffer(struct amd_iommu *iommu) 721 { 722 iommu->cmd_buf = iommu_alloc_pages(GFP_KERNEL, 723 get_order(CMD_BUFFER_SIZE)); 724 725 return iommu->cmd_buf ? 0 : -ENOMEM; 726 } 727 728 /* 729 * Interrupt handler has processed all pending events and adjusted head 730 * and tail pointer. Reset overflow mask and restart logging again. 731 */ 732 void amd_iommu_restart_log(struct amd_iommu *iommu, const char *evt_type, 733 u8 cntrl_intr, u8 cntrl_log, 734 u32 status_run_mask, u32 status_overflow_mask) 735 { 736 u32 status; 737 738 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 739 if (status & status_run_mask) 740 return; 741 742 pr_info_ratelimited("IOMMU %s log restarting\n", evt_type); 743 744 iommu_feature_disable(iommu, cntrl_log); 745 iommu_feature_disable(iommu, cntrl_intr); 746 747 writel(status_overflow_mask, iommu->mmio_base + MMIO_STATUS_OFFSET); 748 749 iommu_feature_enable(iommu, cntrl_intr); 750 iommu_feature_enable(iommu, cntrl_log); 751 } 752 753 /* 754 * This function restarts event logging in case the IOMMU experienced 755 * an event log buffer overflow. 756 */ 757 void amd_iommu_restart_event_logging(struct amd_iommu *iommu) 758 { 759 amd_iommu_restart_log(iommu, "Event", CONTROL_EVT_INT_EN, 760 CONTROL_EVT_LOG_EN, MMIO_STATUS_EVT_RUN_MASK, 761 MMIO_STATUS_EVT_OVERFLOW_MASK); 762 } 763 764 /* 765 * This function restarts event logging in case the IOMMU experienced 766 * GA log overflow. 767 */ 768 void amd_iommu_restart_ga_log(struct amd_iommu *iommu) 769 { 770 amd_iommu_restart_log(iommu, "GA", CONTROL_GAINT_EN, 771 CONTROL_GALOG_EN, MMIO_STATUS_GALOG_RUN_MASK, 772 MMIO_STATUS_GALOG_OVERFLOW_MASK); 773 } 774 775 /* 776 * This function resets the command buffer if the IOMMU stopped fetching 777 * commands from it. 778 */ 779 static void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu) 780 { 781 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); 782 783 writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); 784 writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); 785 iommu->cmd_buf_head = 0; 786 iommu->cmd_buf_tail = 0; 787 788 iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); 789 } 790 791 /* 792 * This function writes the command buffer address to the hardware and 793 * enables it. 794 */ 795 static void iommu_enable_command_buffer(struct amd_iommu *iommu) 796 { 797 u64 entry; 798 799 BUG_ON(iommu->cmd_buf == NULL); 800 801 entry = iommu_virt_to_phys(iommu->cmd_buf); 802 entry |= MMIO_CMD_SIZE_512; 803 804 memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, 805 &entry, sizeof(entry)); 806 807 amd_iommu_reset_cmd_buffer(iommu); 808 } 809 810 /* 811 * This function disables the command buffer 812 */ 813 static void iommu_disable_command_buffer(struct amd_iommu *iommu) 814 { 815 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); 816 } 817 818 static void __init free_command_buffer(struct amd_iommu *iommu) 819 { 820 iommu_free_pages(iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); 821 } 822 823 void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, gfp_t gfp, 824 size_t size) 825 { 826 int order = get_order(size); 827 void *buf = iommu_alloc_pages(gfp, order); 828 829 if (buf && 830 check_feature(FEATURE_SNP) && 831 set_memory_4k((unsigned long)buf, (1 << order))) { 832 iommu_free_pages(buf, order); 833 buf = NULL; 834 } 835 836 return buf; 837 } 838 839 /* allocates the memory where the IOMMU will log its events to */ 840 static int __init alloc_event_buffer(struct amd_iommu *iommu) 841 { 842 iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL, 843 EVT_BUFFER_SIZE); 844 845 return iommu->evt_buf ? 0 : -ENOMEM; 846 } 847 848 static void iommu_enable_event_buffer(struct amd_iommu *iommu) 849 { 850 u64 entry; 851 852 BUG_ON(iommu->evt_buf == NULL); 853 854 entry = iommu_virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; 855 856 memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, 857 &entry, sizeof(entry)); 858 859 /* set head and tail to zero manually */ 860 writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); 861 writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); 862 863 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); 864 } 865 866 /* 867 * This function disables the event log buffer 868 */ 869 static void iommu_disable_event_buffer(struct amd_iommu *iommu) 870 { 871 iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN); 872 } 873 874 static void __init free_event_buffer(struct amd_iommu *iommu) 875 { 876 iommu_free_pages(iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); 877 } 878 879 static void free_ga_log(struct amd_iommu *iommu) 880 { 881 #ifdef CONFIG_IRQ_REMAP 882 iommu_free_pages(iommu->ga_log, get_order(GA_LOG_SIZE)); 883 iommu_free_pages(iommu->ga_log_tail, get_order(8)); 884 #endif 885 } 886 887 #ifdef CONFIG_IRQ_REMAP 888 static int iommu_ga_log_enable(struct amd_iommu *iommu) 889 { 890 u32 status, i; 891 u64 entry; 892 893 if (!iommu->ga_log) 894 return -EINVAL; 895 896 entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512; 897 memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET, 898 &entry, sizeof(entry)); 899 entry = (iommu_virt_to_phys(iommu->ga_log_tail) & 900 (BIT_ULL(52)-1)) & ~7ULL; 901 memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET, 902 &entry, sizeof(entry)); 903 writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET); 904 writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET); 905 906 907 iommu_feature_enable(iommu, CONTROL_GAINT_EN); 908 iommu_feature_enable(iommu, CONTROL_GALOG_EN); 909 910 for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) { 911 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 912 if (status & (MMIO_STATUS_GALOG_RUN_MASK)) 913 break; 914 udelay(10); 915 } 916 917 if (WARN_ON(i >= MMIO_STATUS_TIMEOUT)) 918 return -EINVAL; 919 920 return 0; 921 } 922 923 static int iommu_init_ga_log(struct amd_iommu *iommu) 924 { 925 if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) 926 return 0; 927 928 iommu->ga_log = iommu_alloc_pages(GFP_KERNEL, get_order(GA_LOG_SIZE)); 929 if (!iommu->ga_log) 930 goto err_out; 931 932 iommu->ga_log_tail = iommu_alloc_pages(GFP_KERNEL, get_order(8)); 933 if (!iommu->ga_log_tail) 934 goto err_out; 935 936 return 0; 937 err_out: 938 free_ga_log(iommu); 939 return -EINVAL; 940 } 941 #endif /* CONFIG_IRQ_REMAP */ 942 943 static int __init alloc_cwwb_sem(struct amd_iommu *iommu) 944 { 945 iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL, 1); 946 947 return iommu->cmd_sem ? 0 : -ENOMEM; 948 } 949 950 static void __init free_cwwb_sem(struct amd_iommu *iommu) 951 { 952 if (iommu->cmd_sem) 953 iommu_free_page((void *)iommu->cmd_sem); 954 } 955 956 static void iommu_enable_xt(struct amd_iommu *iommu) 957 { 958 #ifdef CONFIG_IRQ_REMAP 959 /* 960 * XT mode (32-bit APIC destination ID) requires 961 * GA mode (128-bit IRTE support) as a prerequisite. 962 */ 963 if (AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir) && 964 amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 965 iommu_feature_enable(iommu, CONTROL_XT_EN); 966 #endif /* CONFIG_IRQ_REMAP */ 967 } 968 969 static void iommu_enable_gt(struct amd_iommu *iommu) 970 { 971 if (!check_feature(FEATURE_GT)) 972 return; 973 974 iommu_feature_enable(iommu, CONTROL_GT_EN); 975 } 976 977 /* sets a specific bit in the device table entry. */ 978 static void set_dte_bit(struct dev_table_entry *dte, u8 bit) 979 { 980 int i = (bit >> 6) & 0x03; 981 int _bit = bit & 0x3f; 982 983 dte->data[i] |= (1UL << _bit); 984 } 985 986 static bool __copy_device_table(struct amd_iommu *iommu) 987 { 988 u64 int_ctl, int_tab_len, entry = 0; 989 struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; 990 struct dev_table_entry *old_devtb = NULL; 991 u32 lo, hi, devid, old_devtb_size; 992 phys_addr_t old_devtb_phys; 993 u16 dom_id, dte_v, irq_v; 994 u64 tmp; 995 996 /* Each IOMMU use separate device table with the same size */ 997 lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET); 998 hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4); 999 entry = (((u64) hi) << 32) + lo; 1000 1001 old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12; 1002 if (old_devtb_size != pci_seg->dev_table_size) { 1003 pr_err("The device table size of IOMMU:%d is not expected!\n", 1004 iommu->index); 1005 return false; 1006 } 1007 1008 /* 1009 * When SME is enabled in the first kernel, the entry includes the 1010 * memory encryption mask(sme_me_mask), we must remove the memory 1011 * encryption mask to obtain the true physical address in kdump kernel. 1012 */ 1013 old_devtb_phys = __sme_clr(entry) & PAGE_MASK; 1014 1015 if (old_devtb_phys >= 0x100000000ULL) { 1016 pr_err("The address of old device table is above 4G, not trustworthy!\n"); 1017 return false; 1018 } 1019 old_devtb = (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) && is_kdump_kernel()) 1020 ? (__force void *)ioremap_encrypted(old_devtb_phys, 1021 pci_seg->dev_table_size) 1022 : memremap(old_devtb_phys, pci_seg->dev_table_size, MEMREMAP_WB); 1023 1024 if (!old_devtb) 1025 return false; 1026 1027 pci_seg->old_dev_tbl_cpy = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32, 1028 get_order(pci_seg->dev_table_size)); 1029 if (pci_seg->old_dev_tbl_cpy == NULL) { 1030 pr_err("Failed to allocate memory for copying old device table!\n"); 1031 memunmap(old_devtb); 1032 return false; 1033 } 1034 1035 for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { 1036 pci_seg->old_dev_tbl_cpy[devid] = old_devtb[devid]; 1037 dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK; 1038 dte_v = old_devtb[devid].data[0] & DTE_FLAG_V; 1039 1040 if (dte_v && dom_id) { 1041 pci_seg->old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0]; 1042 pci_seg->old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1]; 1043 /* Reserve the Domain IDs used by previous kernel */ 1044 if (ida_alloc_range(&pdom_ids, dom_id, dom_id, GFP_ATOMIC) != dom_id) { 1045 pr_err("Failed to reserve domain ID 0x%x\n", dom_id); 1046 memunmap(old_devtb); 1047 return false; 1048 } 1049 /* If gcr3 table existed, mask it out */ 1050 if (old_devtb[devid].data[0] & DTE_FLAG_GV) { 1051 tmp = (DTE_GCR3_30_15 | DTE_GCR3_51_31); 1052 pci_seg->old_dev_tbl_cpy[devid].data[1] &= ~tmp; 1053 tmp = (DTE_GCR3_14_12 | DTE_FLAG_GV); 1054 pci_seg->old_dev_tbl_cpy[devid].data[0] &= ~tmp; 1055 } 1056 } 1057 1058 irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE; 1059 int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK; 1060 int_tab_len = old_devtb[devid].data[2] & DTE_INTTABLEN_MASK; 1061 if (irq_v && (int_ctl || int_tab_len)) { 1062 if ((int_ctl != DTE_IRQ_REMAP_INTCTL) || 1063 (int_tab_len != DTE_INTTABLEN_512 && 1064 int_tab_len != DTE_INTTABLEN_2K)) { 1065 pr_err("Wrong old irq remapping flag: %#x\n", devid); 1066 memunmap(old_devtb); 1067 return false; 1068 } 1069 1070 pci_seg->old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2]; 1071 } 1072 } 1073 memunmap(old_devtb); 1074 1075 return true; 1076 } 1077 1078 static bool copy_device_table(void) 1079 { 1080 struct amd_iommu *iommu; 1081 struct amd_iommu_pci_seg *pci_seg; 1082 1083 if (!amd_iommu_pre_enabled) 1084 return false; 1085 1086 pr_warn("Translation is already enabled - trying to copy translation structures\n"); 1087 1088 /* 1089 * All IOMMUs within PCI segment shares common device table. 1090 * Hence copy device table only once per PCI segment. 1091 */ 1092 for_each_pci_segment(pci_seg) { 1093 for_each_iommu(iommu) { 1094 if (pci_seg->id != iommu->pci_seg->id) 1095 continue; 1096 if (!__copy_device_table(iommu)) 1097 return false; 1098 break; 1099 } 1100 } 1101 1102 return true; 1103 } 1104 1105 struct dev_table_entry *amd_iommu_get_ivhd_dte_flags(u16 segid, u16 devid) 1106 { 1107 struct ivhd_dte_flags *e; 1108 unsigned int best_len = UINT_MAX; 1109 struct dev_table_entry *dte = NULL; 1110 1111 for_each_ivhd_dte_flags(e) { 1112 /* 1113 * Need to go through the whole list to find the smallest range, 1114 * which contains the devid. 1115 */ 1116 if ((e->segid == segid) && 1117 (e->devid_first <= devid) && (devid <= e->devid_last)) { 1118 unsigned int len = e->devid_last - e->devid_first; 1119 1120 if (len < best_len) { 1121 dte = &(e->dte); 1122 best_len = len; 1123 } 1124 } 1125 } 1126 return dte; 1127 } 1128 1129 static bool search_ivhd_dte_flags(u16 segid, u16 first, u16 last) 1130 { 1131 struct ivhd_dte_flags *e; 1132 1133 for_each_ivhd_dte_flags(e) { 1134 if ((e->segid == segid) && 1135 (e->devid_first == first) && 1136 (e->devid_last == last)) 1137 return true; 1138 } 1139 return false; 1140 } 1141 1142 /* 1143 * This function takes the device specific flags read from the ACPI 1144 * table and sets up the device table entry with that information 1145 */ 1146 static void __init 1147 set_dev_entry_from_acpi_range(struct amd_iommu *iommu, u16 first, u16 last, 1148 u32 flags, u32 ext_flags) 1149 { 1150 int i; 1151 struct dev_table_entry dte = {}; 1152 1153 /* Parse IVHD DTE setting flags and store information */ 1154 if (flags) { 1155 struct ivhd_dte_flags *d; 1156 1157 if (search_ivhd_dte_flags(iommu->pci_seg->id, first, last)) 1158 return; 1159 1160 d = kzalloc(sizeof(struct ivhd_dte_flags), GFP_KERNEL); 1161 if (!d) 1162 return; 1163 1164 pr_debug("%s: devid range %#x:%#x\n", __func__, first, last); 1165 1166 if (flags & ACPI_DEVFLAG_INITPASS) 1167 set_dte_bit(&dte, DEV_ENTRY_INIT_PASS); 1168 if (flags & ACPI_DEVFLAG_EXTINT) 1169 set_dte_bit(&dte, DEV_ENTRY_EINT_PASS); 1170 if (flags & ACPI_DEVFLAG_NMI) 1171 set_dte_bit(&dte, DEV_ENTRY_NMI_PASS); 1172 if (flags & ACPI_DEVFLAG_SYSMGT1) 1173 set_dte_bit(&dte, DEV_ENTRY_SYSMGT1); 1174 if (flags & ACPI_DEVFLAG_SYSMGT2) 1175 set_dte_bit(&dte, DEV_ENTRY_SYSMGT2); 1176 if (flags & ACPI_DEVFLAG_LINT0) 1177 set_dte_bit(&dte, DEV_ENTRY_LINT0_PASS); 1178 if (flags & ACPI_DEVFLAG_LINT1) 1179 set_dte_bit(&dte, DEV_ENTRY_LINT1_PASS); 1180 1181 /* Apply erratum 63, which needs info in initial_dte */ 1182 if (FIELD_GET(DTE_DATA1_SYSMGT_MASK, dte.data[1]) == 0x1) 1183 dte.data[0] |= DTE_FLAG_IW; 1184 1185 memcpy(&d->dte, &dte, sizeof(dte)); 1186 d->segid = iommu->pci_seg->id; 1187 d->devid_first = first; 1188 d->devid_last = last; 1189 list_add_tail(&d->list, &amd_ivhd_dev_flags_list); 1190 } 1191 1192 for (i = first; i <= last; i++) { 1193 if (flags) { 1194 struct dev_table_entry *dev_table = get_dev_table(iommu); 1195 1196 memcpy(&dev_table[i], &dte, sizeof(dte)); 1197 } 1198 amd_iommu_set_rlookup_table(iommu, i); 1199 } 1200 } 1201 1202 static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, 1203 u16 devid, u32 flags, u32 ext_flags) 1204 { 1205 set_dev_entry_from_acpi_range(iommu, devid, devid, flags, ext_flags); 1206 } 1207 1208 int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line) 1209 { 1210 struct devid_map *entry; 1211 struct list_head *list; 1212 1213 if (type == IVHD_SPECIAL_IOAPIC) 1214 list = &ioapic_map; 1215 else if (type == IVHD_SPECIAL_HPET) 1216 list = &hpet_map; 1217 else 1218 return -EINVAL; 1219 1220 list_for_each_entry(entry, list, list) { 1221 if (!(entry->id == id && entry->cmd_line)) 1222 continue; 1223 1224 pr_info("Command-line override present for %s id %d - ignoring\n", 1225 type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id); 1226 1227 *devid = entry->devid; 1228 1229 return 0; 1230 } 1231 1232 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 1233 if (!entry) 1234 return -ENOMEM; 1235 1236 entry->id = id; 1237 entry->devid = *devid; 1238 entry->cmd_line = cmd_line; 1239 1240 list_add_tail(&entry->list, list); 1241 1242 return 0; 1243 } 1244 1245 static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u32 *devid, 1246 bool cmd_line) 1247 { 1248 struct acpihid_map_entry *entry; 1249 struct list_head *list = &acpihid_map; 1250 1251 list_for_each_entry(entry, list, list) { 1252 if (strcmp(entry->hid, hid) || 1253 (*uid && *entry->uid && strcmp(entry->uid, uid)) || 1254 !entry->cmd_line) 1255 continue; 1256 1257 pr_info("Command-line override for hid:%s uid:%s\n", 1258 hid, uid); 1259 *devid = entry->devid; 1260 return 0; 1261 } 1262 1263 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 1264 if (!entry) 1265 return -ENOMEM; 1266 1267 memcpy(entry->uid, uid, strlen(uid)); 1268 memcpy(entry->hid, hid, strlen(hid)); 1269 entry->devid = *devid; 1270 entry->cmd_line = cmd_line; 1271 entry->root_devid = (entry->devid & (~0x7)); 1272 1273 pr_info("%s, add hid:%s, uid:%s, rdevid:%#x\n", 1274 entry->cmd_line ? "cmd" : "ivrs", 1275 entry->hid, entry->uid, entry->root_devid); 1276 1277 list_add_tail(&entry->list, list); 1278 return 0; 1279 } 1280 1281 static int __init add_early_maps(void) 1282 { 1283 int i, ret; 1284 1285 for (i = 0; i < early_ioapic_map_size; ++i) { 1286 ret = add_special_device(IVHD_SPECIAL_IOAPIC, 1287 early_ioapic_map[i].id, 1288 &early_ioapic_map[i].devid, 1289 early_ioapic_map[i].cmd_line); 1290 if (ret) 1291 return ret; 1292 } 1293 1294 for (i = 0; i < early_hpet_map_size; ++i) { 1295 ret = add_special_device(IVHD_SPECIAL_HPET, 1296 early_hpet_map[i].id, 1297 &early_hpet_map[i].devid, 1298 early_hpet_map[i].cmd_line); 1299 if (ret) 1300 return ret; 1301 } 1302 1303 for (i = 0; i < early_acpihid_map_size; ++i) { 1304 ret = add_acpi_hid_device(early_acpihid_map[i].hid, 1305 early_acpihid_map[i].uid, 1306 &early_acpihid_map[i].devid, 1307 early_acpihid_map[i].cmd_line); 1308 if (ret) 1309 return ret; 1310 } 1311 1312 return 0; 1313 } 1314 1315 /* 1316 * Takes a pointer to an AMD IOMMU entry in the ACPI table and 1317 * initializes the hardware and our data structures with it. 1318 */ 1319 static int __init init_iommu_from_acpi(struct amd_iommu *iommu, 1320 struct ivhd_header *h) 1321 { 1322 u8 *p = (u8 *)h; 1323 u8 *end = p, flags = 0; 1324 u16 devid = 0, devid_start = 0, devid_to = 0, seg_id; 1325 u32 dev_i, ext_flags = 0; 1326 bool alias = false; 1327 struct ivhd_entry *e; 1328 struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; 1329 u32 ivhd_size; 1330 int ret; 1331 1332 1333 ret = add_early_maps(); 1334 if (ret) 1335 return ret; 1336 1337 amd_iommu_apply_ivrs_quirks(); 1338 1339 /* 1340 * First save the recommended feature enable bits from ACPI 1341 */ 1342 iommu->acpi_flags = h->flags; 1343 1344 /* 1345 * Done. Now parse the device entries 1346 */ 1347 ivhd_size = get_ivhd_header_size(h); 1348 if (!ivhd_size) { 1349 pr_err("Unsupported IVHD type %#x\n", h->type); 1350 return -EINVAL; 1351 } 1352 1353 p += ivhd_size; 1354 1355 end += h->length; 1356 1357 1358 while (p < end) { 1359 e = (struct ivhd_entry *)p; 1360 seg_id = pci_seg->id; 1361 1362 switch (e->type) { 1363 case IVHD_DEV_ALL: 1364 1365 DUMP_printk(" DEV_ALL\t\t\tsetting: %#02x\n", e->flags); 1366 set_dev_entry_from_acpi_range(iommu, 0, pci_seg->last_bdf, e->flags, 0); 1367 break; 1368 case IVHD_DEV_SELECT: 1369 1370 DUMP_printk(" DEV_SELECT\t\t\tdevid: %04x:%02x:%02x.%x flags: %#02x\n", 1371 seg_id, PCI_BUS_NUM(e->devid), 1372 PCI_SLOT(e->devid), 1373 PCI_FUNC(e->devid), 1374 e->flags); 1375 1376 devid = e->devid; 1377 set_dev_entry_from_acpi(iommu, devid, e->flags, 0); 1378 break; 1379 case IVHD_DEV_SELECT_RANGE_START: 1380 1381 DUMP_printk(" DEV_SELECT_RANGE_START\tdevid: %04x:%02x:%02x.%x flags: %#02x\n", 1382 seg_id, PCI_BUS_NUM(e->devid), 1383 PCI_SLOT(e->devid), 1384 PCI_FUNC(e->devid), 1385 e->flags); 1386 1387 devid_start = e->devid; 1388 flags = e->flags; 1389 ext_flags = 0; 1390 alias = false; 1391 break; 1392 case IVHD_DEV_ALIAS: 1393 1394 DUMP_printk(" DEV_ALIAS\t\t\tdevid: %04x:%02x:%02x.%x flags: %#02x devid_to: %02x:%02x.%x\n", 1395 seg_id, PCI_BUS_NUM(e->devid), 1396 PCI_SLOT(e->devid), 1397 PCI_FUNC(e->devid), 1398 e->flags, 1399 PCI_BUS_NUM(e->ext >> 8), 1400 PCI_SLOT(e->ext >> 8), 1401 PCI_FUNC(e->ext >> 8)); 1402 1403 devid = e->devid; 1404 devid_to = e->ext >> 8; 1405 set_dev_entry_from_acpi(iommu, devid , e->flags, 0); 1406 set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0); 1407 pci_seg->alias_table[devid] = devid_to; 1408 break; 1409 case IVHD_DEV_ALIAS_RANGE: 1410 1411 DUMP_printk(" DEV_ALIAS_RANGE\t\tdevid: %04x:%02x:%02x.%x flags: %#02x devid_to: %04x:%02x:%02x.%x\n", 1412 seg_id, PCI_BUS_NUM(e->devid), 1413 PCI_SLOT(e->devid), 1414 PCI_FUNC(e->devid), 1415 e->flags, 1416 seg_id, PCI_BUS_NUM(e->ext >> 8), 1417 PCI_SLOT(e->ext >> 8), 1418 PCI_FUNC(e->ext >> 8)); 1419 1420 devid_start = e->devid; 1421 flags = e->flags; 1422 devid_to = e->ext >> 8; 1423 ext_flags = 0; 1424 alias = true; 1425 break; 1426 case IVHD_DEV_EXT_SELECT: 1427 1428 DUMP_printk(" DEV_EXT_SELECT\t\tdevid: %04x:%02x:%02x.%x flags: %#02x ext: %08x\n", 1429 seg_id, PCI_BUS_NUM(e->devid), 1430 PCI_SLOT(e->devid), 1431 PCI_FUNC(e->devid), 1432 e->flags, e->ext); 1433 1434 devid = e->devid; 1435 set_dev_entry_from_acpi(iommu, devid, e->flags, 1436 e->ext); 1437 break; 1438 case IVHD_DEV_EXT_SELECT_RANGE: 1439 1440 DUMP_printk(" DEV_EXT_SELECT_RANGE\tdevid: %04x:%02x:%02x.%x flags: %#02x ext: %08x\n", 1441 seg_id, PCI_BUS_NUM(e->devid), 1442 PCI_SLOT(e->devid), 1443 PCI_FUNC(e->devid), 1444 e->flags, e->ext); 1445 1446 devid_start = e->devid; 1447 flags = e->flags; 1448 ext_flags = e->ext; 1449 alias = false; 1450 break; 1451 case IVHD_DEV_RANGE_END: 1452 1453 DUMP_printk(" DEV_RANGE_END\t\tdevid: %04x:%02x:%02x.%x\n", 1454 seg_id, PCI_BUS_NUM(e->devid), 1455 PCI_SLOT(e->devid), 1456 PCI_FUNC(e->devid)); 1457 1458 devid = e->devid; 1459 for (dev_i = devid_start; dev_i <= devid; ++dev_i) { 1460 if (alias) 1461 pci_seg->alias_table[dev_i] = devid_to; 1462 } 1463 set_dev_entry_from_acpi_range(iommu, devid_start, devid, flags, ext_flags); 1464 set_dev_entry_from_acpi(iommu, devid_to, flags, ext_flags); 1465 break; 1466 case IVHD_DEV_SPECIAL: { 1467 u8 handle, type; 1468 const char *var; 1469 u32 devid; 1470 int ret; 1471 1472 handle = e->ext & 0xff; 1473 devid = PCI_SEG_DEVID_TO_SBDF(seg_id, (e->ext >> 8)); 1474 type = (e->ext >> 24) & 0xff; 1475 1476 if (type == IVHD_SPECIAL_IOAPIC) 1477 var = "IOAPIC"; 1478 else if (type == IVHD_SPECIAL_HPET) 1479 var = "HPET"; 1480 else 1481 var = "UNKNOWN"; 1482 1483 DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x, flags: %#02x\n", 1484 var, (int)handle, 1485 seg_id, PCI_BUS_NUM(devid), 1486 PCI_SLOT(devid), 1487 PCI_FUNC(devid), 1488 e->flags); 1489 1490 ret = add_special_device(type, handle, &devid, false); 1491 if (ret) 1492 return ret; 1493 1494 /* 1495 * add_special_device might update the devid in case a 1496 * command-line override is present. So call 1497 * set_dev_entry_from_acpi after add_special_device. 1498 */ 1499 set_dev_entry_from_acpi(iommu, devid, e->flags, 0); 1500 1501 break; 1502 } 1503 case IVHD_DEV_ACPI_HID: { 1504 u32 devid; 1505 u8 hid[ACPIHID_HID_LEN]; 1506 u8 uid[ACPIHID_UID_LEN]; 1507 int ret; 1508 1509 if (h->type != 0x40) { 1510 pr_err(FW_BUG "Invalid IVHD device type %#x\n", 1511 e->type); 1512 break; 1513 } 1514 1515 BUILD_BUG_ON(sizeof(e->ext_hid) != ACPIHID_HID_LEN - 1); 1516 memcpy(hid, &e->ext_hid, ACPIHID_HID_LEN - 1); 1517 hid[ACPIHID_HID_LEN - 1] = '\0'; 1518 1519 if (!(*hid)) { 1520 pr_err(FW_BUG "Invalid HID.\n"); 1521 break; 1522 } 1523 1524 uid[0] = '\0'; 1525 switch (e->uidf) { 1526 case UID_NOT_PRESENT: 1527 1528 if (e->uidl != 0) 1529 pr_warn(FW_BUG "Invalid UID length.\n"); 1530 1531 break; 1532 case UID_IS_INTEGER: 1533 1534 sprintf(uid, "%d", e->uid); 1535 1536 break; 1537 case UID_IS_CHARACTER: 1538 1539 memcpy(uid, &e->uid, e->uidl); 1540 uid[e->uidl] = '\0'; 1541 1542 break; 1543 default: 1544 break; 1545 } 1546 1547 devid = PCI_SEG_DEVID_TO_SBDF(seg_id, e->devid); 1548 DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x, flags: %#02x\n", 1549 hid, uid, seg_id, 1550 PCI_BUS_NUM(devid), 1551 PCI_SLOT(devid), 1552 PCI_FUNC(devid), 1553 e->flags); 1554 1555 flags = e->flags; 1556 1557 ret = add_acpi_hid_device(hid, uid, &devid, false); 1558 if (ret) 1559 return ret; 1560 1561 /* 1562 * add_special_device might update the devid in case a 1563 * command-line override is present. So call 1564 * set_dev_entry_from_acpi after add_special_device. 1565 */ 1566 set_dev_entry_from_acpi(iommu, devid, e->flags, 0); 1567 1568 break; 1569 } 1570 default: 1571 break; 1572 } 1573 1574 p += ivhd_entry_length(p); 1575 } 1576 1577 return 0; 1578 } 1579 1580 /* Allocate PCI segment data structure */ 1581 static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id, 1582 struct acpi_table_header *ivrs_base) 1583 { 1584 struct amd_iommu_pci_seg *pci_seg; 1585 int last_bdf; 1586 1587 /* 1588 * First parse ACPI tables to find the largest Bus/Dev/Func we need to 1589 * handle in this PCI segment. Upon this information the shared data 1590 * structures for the PCI segments in the system will be allocated. 1591 */ 1592 last_bdf = find_last_devid_acpi(ivrs_base, id); 1593 if (last_bdf < 0) 1594 return NULL; 1595 1596 pci_seg = kzalloc(sizeof(struct amd_iommu_pci_seg), GFP_KERNEL); 1597 if (pci_seg == NULL) 1598 return NULL; 1599 1600 pci_seg->last_bdf = last_bdf; 1601 DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf); 1602 pci_seg->dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE, last_bdf); 1603 pci_seg->alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE, last_bdf); 1604 pci_seg->rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE, last_bdf); 1605 1606 pci_seg->id = id; 1607 init_llist_head(&pci_seg->dev_data_list); 1608 INIT_LIST_HEAD(&pci_seg->unity_map); 1609 list_add_tail(&pci_seg->list, &amd_iommu_pci_seg_list); 1610 1611 if (alloc_dev_table(pci_seg)) 1612 return NULL; 1613 if (alloc_alias_table(pci_seg)) 1614 return NULL; 1615 if (alloc_rlookup_table(pci_seg)) 1616 return NULL; 1617 1618 return pci_seg; 1619 } 1620 1621 static struct amd_iommu_pci_seg *__init get_pci_segment(u16 id, 1622 struct acpi_table_header *ivrs_base) 1623 { 1624 struct amd_iommu_pci_seg *pci_seg; 1625 1626 for_each_pci_segment(pci_seg) { 1627 if (pci_seg->id == id) 1628 return pci_seg; 1629 } 1630 1631 return alloc_pci_segment(id, ivrs_base); 1632 } 1633 1634 static void __init free_pci_segments(void) 1635 { 1636 struct amd_iommu_pci_seg *pci_seg, *next; 1637 1638 for_each_pci_segment_safe(pci_seg, next) { 1639 list_del(&pci_seg->list); 1640 free_irq_lookup_table(pci_seg); 1641 free_rlookup_table(pci_seg); 1642 free_alias_table(pci_seg); 1643 free_dev_table(pci_seg); 1644 kfree(pci_seg); 1645 } 1646 } 1647 1648 static void __init free_sysfs(struct amd_iommu *iommu) 1649 { 1650 if (iommu->iommu.dev) { 1651 iommu_device_unregister(&iommu->iommu); 1652 iommu_device_sysfs_remove(&iommu->iommu); 1653 } 1654 } 1655 1656 static void __init free_iommu_one(struct amd_iommu *iommu) 1657 { 1658 free_sysfs(iommu); 1659 free_cwwb_sem(iommu); 1660 free_command_buffer(iommu); 1661 free_event_buffer(iommu); 1662 amd_iommu_free_ppr_log(iommu); 1663 free_ga_log(iommu); 1664 iommu_unmap_mmio_space(iommu); 1665 amd_iommu_iopf_uninit(iommu); 1666 } 1667 1668 static void __init free_iommu_all(void) 1669 { 1670 struct amd_iommu *iommu, *next; 1671 1672 for_each_iommu_safe(iommu, next) { 1673 list_del(&iommu->list); 1674 free_iommu_one(iommu); 1675 kfree(iommu); 1676 } 1677 } 1678 1679 /* 1680 * Family15h Model 10h-1fh erratum 746 (IOMMU Logging May Stall Translations) 1681 * Workaround: 1682 * BIOS should disable L2B micellaneous clock gating by setting 1683 * L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b 1684 */ 1685 static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu) 1686 { 1687 u32 value; 1688 1689 if ((boot_cpu_data.x86 != 0x15) || 1690 (boot_cpu_data.x86_model < 0x10) || 1691 (boot_cpu_data.x86_model > 0x1f)) 1692 return; 1693 1694 pci_write_config_dword(iommu->dev, 0xf0, 0x90); 1695 pci_read_config_dword(iommu->dev, 0xf4, &value); 1696 1697 if (value & BIT(2)) 1698 return; 1699 1700 /* Select NB indirect register 0x90 and enable writing */ 1701 pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8)); 1702 1703 pci_write_config_dword(iommu->dev, 0xf4, value | 0x4); 1704 pci_info(iommu->dev, "Applying erratum 746 workaround\n"); 1705 1706 /* Clear the enable writing bit */ 1707 pci_write_config_dword(iommu->dev, 0xf0, 0x90); 1708 } 1709 1710 /* 1711 * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission) 1712 * Workaround: 1713 * BIOS should enable ATS write permission check by setting 1714 * L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b 1715 */ 1716 static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu) 1717 { 1718 u32 value; 1719 1720 if ((boot_cpu_data.x86 != 0x15) || 1721 (boot_cpu_data.x86_model < 0x30) || 1722 (boot_cpu_data.x86_model > 0x3f)) 1723 return; 1724 1725 /* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */ 1726 value = iommu_read_l2(iommu, 0x47); 1727 1728 if (value & BIT(0)) 1729 return; 1730 1731 /* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */ 1732 iommu_write_l2(iommu, 0x47, value | BIT(0)); 1733 1734 pci_info(iommu->dev, "Applying ATS write check workaround\n"); 1735 } 1736 1737 /* 1738 * This function glues the initialization function for one IOMMU 1739 * together and also allocates the command buffer and programs the 1740 * hardware. It does NOT enable the IOMMU. This is done afterwards. 1741 */ 1742 static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, 1743 struct acpi_table_header *ivrs_base) 1744 { 1745 struct amd_iommu_pci_seg *pci_seg; 1746 1747 pci_seg = get_pci_segment(h->pci_seg, ivrs_base); 1748 if (pci_seg == NULL) 1749 return -ENOMEM; 1750 iommu->pci_seg = pci_seg; 1751 1752 raw_spin_lock_init(&iommu->lock); 1753 atomic64_set(&iommu->cmd_sem_val, 0); 1754 1755 /* Add IOMMU to internal data structures */ 1756 list_add_tail(&iommu->list, &amd_iommu_list); 1757 iommu->index = amd_iommus_present++; 1758 1759 if (unlikely(iommu->index >= MAX_IOMMUS)) { 1760 WARN(1, "System has more IOMMUs than supported by this driver\n"); 1761 return -ENOSYS; 1762 } 1763 1764 /* 1765 * Copy data from ACPI table entry to the iommu struct 1766 */ 1767 iommu->devid = h->devid; 1768 iommu->cap_ptr = h->cap_ptr; 1769 iommu->mmio_phys = h->mmio_phys; 1770 1771 switch (h->type) { 1772 case 0x10: 1773 /* Check if IVHD EFR contains proper max banks/counters */ 1774 if ((h->efr_attr != 0) && 1775 ((h->efr_attr & (0xF << 13)) != 0) && 1776 ((h->efr_attr & (0x3F << 17)) != 0)) 1777 iommu->mmio_phys_end = MMIO_REG_END_OFFSET; 1778 else 1779 iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; 1780 1781 /* GAM requires GA mode. */ 1782 if ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0) 1783 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; 1784 break; 1785 case 0x11: 1786 case 0x40: 1787 if (h->efr_reg & (1 << 9)) 1788 iommu->mmio_phys_end = MMIO_REG_END_OFFSET; 1789 else 1790 iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; 1791 1792 /* XT and GAM require GA mode. */ 1793 if ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0) { 1794 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; 1795 break; 1796 } 1797 1798 if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT)) 1799 amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE; 1800 1801 early_iommu_features_init(iommu, h); 1802 1803 break; 1804 default: 1805 return -EINVAL; 1806 } 1807 1808 iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys, 1809 iommu->mmio_phys_end); 1810 if (!iommu->mmio_base) 1811 return -ENOMEM; 1812 1813 return init_iommu_from_acpi(iommu, h); 1814 } 1815 1816 static int __init init_iommu_one_late(struct amd_iommu *iommu) 1817 { 1818 int ret; 1819 1820 if (alloc_cwwb_sem(iommu)) 1821 return -ENOMEM; 1822 1823 if (alloc_command_buffer(iommu)) 1824 return -ENOMEM; 1825 1826 if (alloc_event_buffer(iommu)) 1827 return -ENOMEM; 1828 1829 iommu->int_enabled = false; 1830 1831 init_translation_status(iommu); 1832 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) { 1833 iommu_disable(iommu); 1834 clear_translation_pre_enabled(iommu); 1835 pr_warn("Translation was enabled for IOMMU:%d but we are not in kdump mode\n", 1836 iommu->index); 1837 } 1838 if (amd_iommu_pre_enabled) 1839 amd_iommu_pre_enabled = translation_pre_enabled(iommu); 1840 1841 if (amd_iommu_irq_remap) { 1842 ret = amd_iommu_create_irq_domain(iommu); 1843 if (ret) 1844 return ret; 1845 } 1846 1847 /* 1848 * Make sure IOMMU is not considered to translate itself. The IVRS 1849 * table tells us so, but this is a lie! 1850 */ 1851 iommu->pci_seg->rlookup_table[iommu->devid] = NULL; 1852 1853 return 0; 1854 } 1855 1856 /** 1857 * get_highest_supported_ivhd_type - Look up the appropriate IVHD type 1858 * @ivrs: Pointer to the IVRS header 1859 * 1860 * This function search through all IVDB of the maximum supported IVHD 1861 */ 1862 static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs) 1863 { 1864 u8 *base = (u8 *)ivrs; 1865 struct ivhd_header *ivhd = (struct ivhd_header *) 1866 (base + IVRS_HEADER_LENGTH); 1867 u8 last_type = ivhd->type; 1868 u16 devid = ivhd->devid; 1869 1870 while (((u8 *)ivhd - base < ivrs->length) && 1871 (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) { 1872 u8 *p = (u8 *) ivhd; 1873 1874 if (ivhd->devid == devid) 1875 last_type = ivhd->type; 1876 ivhd = (struct ivhd_header *)(p + ivhd->length); 1877 } 1878 1879 return last_type; 1880 } 1881 1882 /* 1883 * Iterates over all IOMMU entries in the ACPI table, allocates the 1884 * IOMMU structure and initializes it with init_iommu_one() 1885 */ 1886 static int __init init_iommu_all(struct acpi_table_header *table) 1887 { 1888 u8 *p = (u8 *)table, *end = (u8 *)table; 1889 struct ivhd_header *h; 1890 struct amd_iommu *iommu; 1891 int ret; 1892 1893 end += table->length; 1894 p += IVRS_HEADER_LENGTH; 1895 1896 /* Phase 1: Process all IVHD blocks */ 1897 while (p < end) { 1898 h = (struct ivhd_header *)p; 1899 if (*p == amd_iommu_target_ivhd_type) { 1900 1901 DUMP_printk("device: %04x:%02x:%02x.%01x cap: %04x " 1902 "flags: %01x info %04x\n", 1903 h->pci_seg, PCI_BUS_NUM(h->devid), 1904 PCI_SLOT(h->devid), PCI_FUNC(h->devid), 1905 h->cap_ptr, h->flags, h->info); 1906 DUMP_printk(" mmio-addr: %016llx\n", 1907 h->mmio_phys); 1908 1909 iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL); 1910 if (iommu == NULL) 1911 return -ENOMEM; 1912 1913 ret = init_iommu_one(iommu, h, table); 1914 if (ret) 1915 return ret; 1916 } 1917 p += h->length; 1918 1919 } 1920 WARN_ON(p != end); 1921 1922 /* Phase 2 : Early feature support check */ 1923 get_global_efr(); 1924 1925 /* Phase 3 : Enabling IOMMU features */ 1926 for_each_iommu(iommu) { 1927 ret = init_iommu_one_late(iommu); 1928 if (ret) 1929 return ret; 1930 } 1931 1932 return 0; 1933 } 1934 1935 static void init_iommu_perf_ctr(struct amd_iommu *iommu) 1936 { 1937 u64 val; 1938 struct pci_dev *pdev = iommu->dev; 1939 1940 if (!check_feature(FEATURE_PC)) 1941 return; 1942 1943 amd_iommu_pc_present = true; 1944 1945 pci_info(pdev, "IOMMU performance counters supported\n"); 1946 1947 val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET); 1948 iommu->max_banks = (u8) ((val >> 12) & 0x3f); 1949 iommu->max_counters = (u8) ((val >> 7) & 0xf); 1950 1951 return; 1952 } 1953 1954 static ssize_t amd_iommu_show_cap(struct device *dev, 1955 struct device_attribute *attr, 1956 char *buf) 1957 { 1958 struct amd_iommu *iommu = dev_to_amd_iommu(dev); 1959 return sysfs_emit(buf, "%x\n", iommu->cap); 1960 } 1961 static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL); 1962 1963 static ssize_t amd_iommu_show_features(struct device *dev, 1964 struct device_attribute *attr, 1965 char *buf) 1966 { 1967 return sysfs_emit(buf, "%llx:%llx\n", amd_iommu_efr, amd_iommu_efr2); 1968 } 1969 static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL); 1970 1971 static struct attribute *amd_iommu_attrs[] = { 1972 &dev_attr_cap.attr, 1973 &dev_attr_features.attr, 1974 NULL, 1975 }; 1976 1977 static struct attribute_group amd_iommu_group = { 1978 .name = "amd-iommu", 1979 .attrs = amd_iommu_attrs, 1980 }; 1981 1982 static const struct attribute_group *amd_iommu_groups[] = { 1983 &amd_iommu_group, 1984 NULL, 1985 }; 1986 1987 /* 1988 * Note: IVHD 0x11 and 0x40 also contains exact copy 1989 * of the IOMMU Extended Feature Register [MMIO Offset 0030h]. 1990 * Default to EFR in IVHD since it is available sooner (i.e. before PCI init). 1991 */ 1992 static void __init late_iommu_features_init(struct amd_iommu *iommu) 1993 { 1994 u64 features, features2; 1995 1996 if (!(iommu->cap & (1 << IOMMU_CAP_EFR))) 1997 return; 1998 1999 /* read extended feature bits */ 2000 features = readq(iommu->mmio_base + MMIO_EXT_FEATURES); 2001 features2 = readq(iommu->mmio_base + MMIO_EXT_FEATURES2); 2002 2003 if (!amd_iommu_efr) { 2004 amd_iommu_efr = features; 2005 amd_iommu_efr2 = features2; 2006 return; 2007 } 2008 2009 /* 2010 * Sanity check and warn if EFR values from 2011 * IVHD and MMIO conflict. 2012 */ 2013 if (features != amd_iommu_efr || 2014 features2 != amd_iommu_efr2) { 2015 pr_warn(FW_WARN 2016 "EFR mismatch. Use IVHD EFR (%#llx : %#llx), EFR2 (%#llx : %#llx).\n", 2017 features, amd_iommu_efr, 2018 features2, amd_iommu_efr2); 2019 } 2020 } 2021 2022 static int __init iommu_init_pci(struct amd_iommu *iommu) 2023 { 2024 int cap_ptr = iommu->cap_ptr; 2025 int ret; 2026 2027 iommu->dev = pci_get_domain_bus_and_slot(iommu->pci_seg->id, 2028 PCI_BUS_NUM(iommu->devid), 2029 iommu->devid & 0xff); 2030 if (!iommu->dev) 2031 return -ENODEV; 2032 2033 /* Prevent binding other PCI device drivers to IOMMU devices */ 2034 iommu->dev->match_driver = false; 2035 2036 /* ACPI _PRT won't have an IRQ for IOMMU */ 2037 iommu->dev->irq_managed = 1; 2038 2039 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, 2040 &iommu->cap); 2041 2042 if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB))) 2043 amd_iommu_iotlb_sup = false; 2044 2045 late_iommu_features_init(iommu); 2046 2047 if (check_feature(FEATURE_GT)) { 2048 int glxval; 2049 u64 pasmax; 2050 2051 pasmax = FIELD_GET(FEATURE_PASMAX, amd_iommu_efr); 2052 iommu->iommu.max_pasids = (1 << (pasmax + 1)) - 1; 2053 2054 BUG_ON(iommu->iommu.max_pasids & ~PASID_MASK); 2055 2056 glxval = FIELD_GET(FEATURE_GLX, amd_iommu_efr); 2057 2058 if (amd_iommu_max_glx_val == -1) 2059 amd_iommu_max_glx_val = glxval; 2060 else 2061 amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval); 2062 2063 iommu_enable_gt(iommu); 2064 } 2065 2066 if (check_feature(FEATURE_PPR) && amd_iommu_alloc_ppr_log(iommu)) 2067 return -ENOMEM; 2068 2069 if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) { 2070 pr_info("Using strict mode due to virtualization\n"); 2071 iommu_set_dma_strict(); 2072 amd_iommu_np_cache = true; 2073 } 2074 2075 init_iommu_perf_ctr(iommu); 2076 2077 if (is_rd890_iommu(iommu->dev)) { 2078 int i, j; 2079 2080 iommu->root_pdev = 2081 pci_get_domain_bus_and_slot(iommu->pci_seg->id, 2082 iommu->dev->bus->number, 2083 PCI_DEVFN(0, 0)); 2084 2085 /* 2086 * Some rd890 systems may not be fully reconfigured by the 2087 * BIOS, so it's necessary for us to store this information so 2088 * it can be reprogrammed on resume 2089 */ 2090 pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4, 2091 &iommu->stored_addr_lo); 2092 pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8, 2093 &iommu->stored_addr_hi); 2094 2095 /* Low bit locks writes to configuration space */ 2096 iommu->stored_addr_lo &= ~1; 2097 2098 for (i = 0; i < 6; i++) 2099 for (j = 0; j < 0x12; j++) 2100 iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j); 2101 2102 for (i = 0; i < 0x83; i++) 2103 iommu->stored_l2[i] = iommu_read_l2(iommu, i); 2104 } 2105 2106 amd_iommu_erratum_746_workaround(iommu); 2107 amd_iommu_ats_write_check_workaround(iommu); 2108 2109 ret = iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev, 2110 amd_iommu_groups, "ivhd%d", iommu->index); 2111 if (ret) 2112 return ret; 2113 2114 /* 2115 * Allocate per IOMMU IOPF queue here so that in attach device path, 2116 * PRI capable device can be added to IOPF queue 2117 */ 2118 if (amd_iommu_gt_ppr_supported()) { 2119 ret = amd_iommu_iopf_init(iommu); 2120 if (ret) 2121 return ret; 2122 } 2123 2124 iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL); 2125 2126 return pci_enable_device(iommu->dev); 2127 } 2128 2129 static void print_iommu_info(void) 2130 { 2131 int i; 2132 static const char * const feat_str[] = { 2133 "PreF", "PPR", "X2APIC", "NX", "GT", "[5]", 2134 "IA", "GA", "HE", "PC" 2135 }; 2136 2137 if (amd_iommu_efr) { 2138 pr_info("Extended features (%#llx, %#llx):", amd_iommu_efr, amd_iommu_efr2); 2139 2140 for (i = 0; i < ARRAY_SIZE(feat_str); ++i) { 2141 if (check_feature(1ULL << i)) 2142 pr_cont(" %s", feat_str[i]); 2143 } 2144 2145 if (check_feature(FEATURE_GAM_VAPIC)) 2146 pr_cont(" GA_vAPIC"); 2147 2148 if (check_feature(FEATURE_SNP)) 2149 pr_cont(" SNP"); 2150 2151 pr_cont("\n"); 2152 } 2153 2154 if (irq_remapping_enabled) { 2155 pr_info("Interrupt remapping enabled\n"); 2156 if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 2157 pr_info("X2APIC enabled\n"); 2158 } 2159 if (amd_iommu_pgtable == PD_MODE_V2) { 2160 pr_info("V2 page table enabled (Paging mode : %d level)\n", 2161 amd_iommu_gpt_level); 2162 } 2163 } 2164 2165 static int __init amd_iommu_init_pci(void) 2166 { 2167 struct amd_iommu *iommu; 2168 struct amd_iommu_pci_seg *pci_seg; 2169 int ret; 2170 2171 /* Init global identity domain before registering IOMMU */ 2172 amd_iommu_init_identity_domain(); 2173 2174 for_each_iommu(iommu) { 2175 ret = iommu_init_pci(iommu); 2176 if (ret) { 2177 pr_err("IOMMU%d: Failed to initialize IOMMU Hardware (error=%d)!\n", 2178 iommu->index, ret); 2179 goto out; 2180 } 2181 /* Need to setup range after PCI init */ 2182 iommu_set_cwwb_range(iommu); 2183 } 2184 2185 /* 2186 * Order is important here to make sure any unity map requirements are 2187 * fulfilled. The unity mappings are created and written to the device 2188 * table during the iommu_init_pci() call. 2189 * 2190 * After that we call init_device_table_dma() to make sure any 2191 * uninitialized DTE will block DMA, and in the end we flush the caches 2192 * of all IOMMUs to make sure the changes to the device table are 2193 * active. 2194 */ 2195 for_each_pci_segment(pci_seg) 2196 init_device_table_dma(pci_seg); 2197 2198 for_each_iommu(iommu) 2199 amd_iommu_flush_all_caches(iommu); 2200 2201 print_iommu_info(); 2202 2203 out: 2204 return ret; 2205 } 2206 2207 /**************************************************************************** 2208 * 2209 * The following functions initialize the MSI interrupts for all IOMMUs 2210 * in the system. It's a bit challenging because there could be multiple 2211 * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per 2212 * pci_dev. 2213 * 2214 ****************************************************************************/ 2215 2216 static int iommu_setup_msi(struct amd_iommu *iommu) 2217 { 2218 int r; 2219 2220 r = pci_enable_msi(iommu->dev); 2221 if (r) 2222 return r; 2223 2224 r = request_threaded_irq(iommu->dev->irq, 2225 amd_iommu_int_handler, 2226 amd_iommu_int_thread, 2227 0, "AMD-Vi", 2228 iommu); 2229 2230 if (r) { 2231 pci_disable_msi(iommu->dev); 2232 return r; 2233 } 2234 2235 return 0; 2236 } 2237 2238 union intcapxt { 2239 u64 capxt; 2240 struct { 2241 u64 reserved_0 : 2, 2242 dest_mode_logical : 1, 2243 reserved_1 : 5, 2244 destid_0_23 : 24, 2245 vector : 8, 2246 reserved_2 : 16, 2247 destid_24_31 : 8; 2248 }; 2249 } __attribute__ ((packed)); 2250 2251 2252 static struct irq_chip intcapxt_controller; 2253 2254 static int intcapxt_irqdomain_activate(struct irq_domain *domain, 2255 struct irq_data *irqd, bool reserve) 2256 { 2257 return 0; 2258 } 2259 2260 static void intcapxt_irqdomain_deactivate(struct irq_domain *domain, 2261 struct irq_data *irqd) 2262 { 2263 } 2264 2265 2266 static int intcapxt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, 2267 unsigned int nr_irqs, void *arg) 2268 { 2269 struct irq_alloc_info *info = arg; 2270 int i, ret; 2271 2272 if (!info || info->type != X86_IRQ_ALLOC_TYPE_AMDVI) 2273 return -EINVAL; 2274 2275 ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); 2276 if (ret < 0) 2277 return ret; 2278 2279 for (i = virq; i < virq + nr_irqs; i++) { 2280 struct irq_data *irqd = irq_domain_get_irq_data(domain, i); 2281 2282 irqd->chip = &intcapxt_controller; 2283 irqd->hwirq = info->hwirq; 2284 irqd->chip_data = info->data; 2285 __irq_set_handler(i, handle_edge_irq, 0, "edge"); 2286 } 2287 2288 return ret; 2289 } 2290 2291 static void intcapxt_irqdomain_free(struct irq_domain *domain, unsigned int virq, 2292 unsigned int nr_irqs) 2293 { 2294 irq_domain_free_irqs_top(domain, virq, nr_irqs); 2295 } 2296 2297 2298 static void intcapxt_unmask_irq(struct irq_data *irqd) 2299 { 2300 struct amd_iommu *iommu = irqd->chip_data; 2301 struct irq_cfg *cfg = irqd_cfg(irqd); 2302 union intcapxt xt; 2303 2304 xt.capxt = 0ULL; 2305 xt.dest_mode_logical = apic->dest_mode_logical; 2306 xt.vector = cfg->vector; 2307 xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0); 2308 xt.destid_24_31 = cfg->dest_apicid >> 24; 2309 2310 writeq(xt.capxt, iommu->mmio_base + irqd->hwirq); 2311 } 2312 2313 static void intcapxt_mask_irq(struct irq_data *irqd) 2314 { 2315 struct amd_iommu *iommu = irqd->chip_data; 2316 2317 writeq(0, iommu->mmio_base + irqd->hwirq); 2318 } 2319 2320 2321 static int intcapxt_set_affinity(struct irq_data *irqd, 2322 const struct cpumask *mask, bool force) 2323 { 2324 struct irq_data *parent = irqd->parent_data; 2325 int ret; 2326 2327 ret = parent->chip->irq_set_affinity(parent, mask, force); 2328 if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) 2329 return ret; 2330 return 0; 2331 } 2332 2333 static int intcapxt_set_wake(struct irq_data *irqd, unsigned int on) 2334 { 2335 return on ? -EOPNOTSUPP : 0; 2336 } 2337 2338 static struct irq_chip intcapxt_controller = { 2339 .name = "IOMMU-MSI", 2340 .irq_unmask = intcapxt_unmask_irq, 2341 .irq_mask = intcapxt_mask_irq, 2342 .irq_ack = irq_chip_ack_parent, 2343 .irq_retrigger = irq_chip_retrigger_hierarchy, 2344 .irq_set_affinity = intcapxt_set_affinity, 2345 .irq_set_wake = intcapxt_set_wake, 2346 .flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_MOVE_DEFERRED, 2347 }; 2348 2349 static const struct irq_domain_ops intcapxt_domain_ops = { 2350 .alloc = intcapxt_irqdomain_alloc, 2351 .free = intcapxt_irqdomain_free, 2352 .activate = intcapxt_irqdomain_activate, 2353 .deactivate = intcapxt_irqdomain_deactivate, 2354 }; 2355 2356 2357 static struct irq_domain *iommu_irqdomain; 2358 2359 static struct irq_domain *iommu_get_irqdomain(void) 2360 { 2361 struct fwnode_handle *fn; 2362 2363 /* No need for locking here (yet) as the init is single-threaded */ 2364 if (iommu_irqdomain) 2365 return iommu_irqdomain; 2366 2367 fn = irq_domain_alloc_named_fwnode("AMD-Vi-MSI"); 2368 if (!fn) 2369 return NULL; 2370 2371 iommu_irqdomain = irq_domain_create_hierarchy(x86_vector_domain, 0, 0, 2372 fn, &intcapxt_domain_ops, 2373 NULL); 2374 if (!iommu_irqdomain) 2375 irq_domain_free_fwnode(fn); 2376 2377 return iommu_irqdomain; 2378 } 2379 2380 static int __iommu_setup_intcapxt(struct amd_iommu *iommu, const char *devname, 2381 int hwirq, irq_handler_t thread_fn) 2382 { 2383 struct irq_domain *domain; 2384 struct irq_alloc_info info; 2385 int irq, ret; 2386 int node = dev_to_node(&iommu->dev->dev); 2387 2388 domain = iommu_get_irqdomain(); 2389 if (!domain) 2390 return -ENXIO; 2391 2392 init_irq_alloc_info(&info, NULL); 2393 info.type = X86_IRQ_ALLOC_TYPE_AMDVI; 2394 info.data = iommu; 2395 info.hwirq = hwirq; 2396 2397 irq = irq_domain_alloc_irqs(domain, 1, node, &info); 2398 if (irq < 0) { 2399 irq_domain_remove(domain); 2400 return irq; 2401 } 2402 2403 ret = request_threaded_irq(irq, amd_iommu_int_handler, 2404 thread_fn, 0, devname, iommu); 2405 if (ret) { 2406 irq_domain_free_irqs(irq, 1); 2407 irq_domain_remove(domain); 2408 return ret; 2409 } 2410 2411 return 0; 2412 } 2413 2414 static int iommu_setup_intcapxt(struct amd_iommu *iommu) 2415 { 2416 int ret; 2417 2418 snprintf(iommu->evt_irq_name, sizeof(iommu->evt_irq_name), 2419 "AMD-Vi%d-Evt", iommu->index); 2420 ret = __iommu_setup_intcapxt(iommu, iommu->evt_irq_name, 2421 MMIO_INTCAPXT_EVT_OFFSET, 2422 amd_iommu_int_thread_evtlog); 2423 if (ret) 2424 return ret; 2425 2426 snprintf(iommu->ppr_irq_name, sizeof(iommu->ppr_irq_name), 2427 "AMD-Vi%d-PPR", iommu->index); 2428 ret = __iommu_setup_intcapxt(iommu, iommu->ppr_irq_name, 2429 MMIO_INTCAPXT_PPR_OFFSET, 2430 amd_iommu_int_thread_pprlog); 2431 if (ret) 2432 return ret; 2433 2434 #ifdef CONFIG_IRQ_REMAP 2435 snprintf(iommu->ga_irq_name, sizeof(iommu->ga_irq_name), 2436 "AMD-Vi%d-GA", iommu->index); 2437 ret = __iommu_setup_intcapxt(iommu, iommu->ga_irq_name, 2438 MMIO_INTCAPXT_GALOG_OFFSET, 2439 amd_iommu_int_thread_galog); 2440 #endif 2441 2442 return ret; 2443 } 2444 2445 static int iommu_init_irq(struct amd_iommu *iommu) 2446 { 2447 int ret; 2448 2449 if (iommu->int_enabled) 2450 goto enable_faults; 2451 2452 if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 2453 ret = iommu_setup_intcapxt(iommu); 2454 else if (iommu->dev->msi_cap) 2455 ret = iommu_setup_msi(iommu); 2456 else 2457 ret = -ENODEV; 2458 2459 if (ret) 2460 return ret; 2461 2462 iommu->int_enabled = true; 2463 enable_faults: 2464 2465 if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 2466 iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN); 2467 2468 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); 2469 2470 return 0; 2471 } 2472 2473 /**************************************************************************** 2474 * 2475 * The next functions belong to the third pass of parsing the ACPI 2476 * table. In this last pass the memory mapping requirements are 2477 * gathered (like exclusion and unity mapping ranges). 2478 * 2479 ****************************************************************************/ 2480 2481 static void __init free_unity_maps(void) 2482 { 2483 struct unity_map_entry *entry, *next; 2484 struct amd_iommu_pci_seg *p, *pci_seg; 2485 2486 for_each_pci_segment_safe(pci_seg, p) { 2487 list_for_each_entry_safe(entry, next, &pci_seg->unity_map, list) { 2488 list_del(&entry->list); 2489 kfree(entry); 2490 } 2491 } 2492 } 2493 2494 /* called for unity map ACPI definition */ 2495 static int __init init_unity_map_range(struct ivmd_header *m, 2496 struct acpi_table_header *ivrs_base) 2497 { 2498 struct unity_map_entry *e = NULL; 2499 struct amd_iommu_pci_seg *pci_seg; 2500 char *s; 2501 2502 pci_seg = get_pci_segment(m->pci_seg, ivrs_base); 2503 if (pci_seg == NULL) 2504 return -ENOMEM; 2505 2506 e = kzalloc(sizeof(*e), GFP_KERNEL); 2507 if (e == NULL) 2508 return -ENOMEM; 2509 2510 switch (m->type) { 2511 default: 2512 kfree(e); 2513 return 0; 2514 case ACPI_IVMD_TYPE: 2515 s = "IVMD_TYPEi\t\t\t"; 2516 e->devid_start = e->devid_end = m->devid; 2517 break; 2518 case ACPI_IVMD_TYPE_ALL: 2519 s = "IVMD_TYPE_ALL\t\t"; 2520 e->devid_start = 0; 2521 e->devid_end = pci_seg->last_bdf; 2522 break; 2523 case ACPI_IVMD_TYPE_RANGE: 2524 s = "IVMD_TYPE_RANGE\t\t"; 2525 e->devid_start = m->devid; 2526 e->devid_end = m->aux; 2527 break; 2528 } 2529 e->address_start = PAGE_ALIGN(m->range_start); 2530 e->address_end = e->address_start + PAGE_ALIGN(m->range_length); 2531 e->prot = m->flags >> 1; 2532 2533 /* 2534 * Treat per-device exclusion ranges as r/w unity-mapped regions 2535 * since some buggy BIOSes might lead to the overwritten exclusion 2536 * range (exclusion_start and exclusion_length members). This 2537 * happens when there are multiple exclusion ranges (IVMD entries) 2538 * defined in ACPI table. 2539 */ 2540 if (m->flags & IVMD_FLAG_EXCL_RANGE) 2541 e->prot = (IVMD_FLAG_IW | IVMD_FLAG_IR) >> 1; 2542 2543 DUMP_printk("%s devid_start: %04x:%02x:%02x.%x devid_end: " 2544 "%04x:%02x:%02x.%x range_start: %016llx range_end: %016llx" 2545 " flags: %x\n", s, m->pci_seg, 2546 PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start), 2547 PCI_FUNC(e->devid_start), m->pci_seg, 2548 PCI_BUS_NUM(e->devid_end), 2549 PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end), 2550 e->address_start, e->address_end, m->flags); 2551 2552 list_add_tail(&e->list, &pci_seg->unity_map); 2553 2554 return 0; 2555 } 2556 2557 /* iterates over all memory definitions we find in the ACPI table */ 2558 static int __init init_memory_definitions(struct acpi_table_header *table) 2559 { 2560 u8 *p = (u8 *)table, *end = (u8 *)table; 2561 struct ivmd_header *m; 2562 2563 end += table->length; 2564 p += IVRS_HEADER_LENGTH; 2565 2566 while (p < end) { 2567 m = (struct ivmd_header *)p; 2568 if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE)) 2569 init_unity_map_range(m, table); 2570 2571 p += m->length; 2572 } 2573 2574 return 0; 2575 } 2576 2577 /* 2578 * Init the device table to not allow DMA access for devices 2579 */ 2580 static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg) 2581 { 2582 u32 devid; 2583 struct dev_table_entry *dev_table = pci_seg->dev_table; 2584 2585 if (dev_table == NULL) 2586 return; 2587 2588 for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { 2589 set_dte_bit(&dev_table[devid], DEV_ENTRY_VALID); 2590 if (!amd_iommu_snp_en) 2591 set_dte_bit(&dev_table[devid], DEV_ENTRY_TRANSLATION); 2592 } 2593 } 2594 2595 static void __init uninit_device_table_dma(struct amd_iommu_pci_seg *pci_seg) 2596 { 2597 u32 devid; 2598 struct dev_table_entry *dev_table = pci_seg->dev_table; 2599 2600 if (dev_table == NULL) 2601 return; 2602 2603 for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { 2604 dev_table[devid].data[0] = 0ULL; 2605 dev_table[devid].data[1] = 0ULL; 2606 } 2607 } 2608 2609 static void init_device_table(void) 2610 { 2611 struct amd_iommu_pci_seg *pci_seg; 2612 u32 devid; 2613 2614 if (!amd_iommu_irq_remap) 2615 return; 2616 2617 for_each_pci_segment(pci_seg) { 2618 for (devid = 0; devid <= pci_seg->last_bdf; ++devid) 2619 set_dte_bit(&pci_seg->dev_table[devid], DEV_ENTRY_IRQ_TBL_EN); 2620 } 2621 } 2622 2623 static void iommu_init_flags(struct amd_iommu *iommu) 2624 { 2625 iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ? 2626 iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) : 2627 iommu_feature_disable(iommu, CONTROL_HT_TUN_EN); 2628 2629 iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ? 2630 iommu_feature_enable(iommu, CONTROL_PASSPW_EN) : 2631 iommu_feature_disable(iommu, CONTROL_PASSPW_EN); 2632 2633 iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ? 2634 iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) : 2635 iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN); 2636 2637 iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ? 2638 iommu_feature_enable(iommu, CONTROL_ISOC_EN) : 2639 iommu_feature_disable(iommu, CONTROL_ISOC_EN); 2640 2641 /* 2642 * make IOMMU memory accesses cache coherent 2643 */ 2644 iommu_feature_enable(iommu, CONTROL_COHERENT_EN); 2645 2646 /* Set IOTLB invalidation timeout to 1s */ 2647 iommu_feature_set(iommu, CTRL_INV_TO_1S, CTRL_INV_TO_MASK, CONTROL_INV_TIMEOUT); 2648 2649 /* Enable Enhanced Peripheral Page Request Handling */ 2650 if (check_feature(FEATURE_EPHSUP)) 2651 iommu_feature_enable(iommu, CONTROL_EPH_EN); 2652 } 2653 2654 static void iommu_apply_resume_quirks(struct amd_iommu *iommu) 2655 { 2656 int i, j; 2657 u32 ioc_feature_control; 2658 struct pci_dev *pdev = iommu->root_pdev; 2659 2660 /* RD890 BIOSes may not have completely reconfigured the iommu */ 2661 if (!is_rd890_iommu(iommu->dev) || !pdev) 2662 return; 2663 2664 /* 2665 * First, we need to ensure that the iommu is enabled. This is 2666 * controlled by a register in the northbridge 2667 */ 2668 2669 /* Select Northbridge indirect register 0x75 and enable writing */ 2670 pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7)); 2671 pci_read_config_dword(pdev, 0x64, &ioc_feature_control); 2672 2673 /* Enable the iommu */ 2674 if (!(ioc_feature_control & 0x1)) 2675 pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1); 2676 2677 /* Restore the iommu BAR */ 2678 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, 2679 iommu->stored_addr_lo); 2680 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8, 2681 iommu->stored_addr_hi); 2682 2683 /* Restore the l1 indirect regs for each of the 6 l1s */ 2684 for (i = 0; i < 6; i++) 2685 for (j = 0; j < 0x12; j++) 2686 iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]); 2687 2688 /* Restore the l2 indirect regs */ 2689 for (i = 0; i < 0x83; i++) 2690 iommu_write_l2(iommu, i, iommu->stored_l2[i]); 2691 2692 /* Lock PCI setup registers */ 2693 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, 2694 iommu->stored_addr_lo | 1); 2695 } 2696 2697 static void iommu_enable_ga(struct amd_iommu *iommu) 2698 { 2699 #ifdef CONFIG_IRQ_REMAP 2700 switch (amd_iommu_guest_ir) { 2701 case AMD_IOMMU_GUEST_IR_VAPIC: 2702 case AMD_IOMMU_GUEST_IR_LEGACY_GA: 2703 iommu_feature_enable(iommu, CONTROL_GA_EN); 2704 iommu->irte_ops = &irte_128_ops; 2705 break; 2706 default: 2707 iommu->irte_ops = &irte_32_ops; 2708 break; 2709 } 2710 #endif 2711 } 2712 2713 static void iommu_disable_irtcachedis(struct amd_iommu *iommu) 2714 { 2715 iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS); 2716 } 2717 2718 static void iommu_enable_irtcachedis(struct amd_iommu *iommu) 2719 { 2720 u64 ctrl; 2721 2722 if (!amd_iommu_irtcachedis) 2723 return; 2724 2725 /* 2726 * Note: 2727 * The support for IRTCacheDis feature is dertermined by 2728 * checking if the bit is writable. 2729 */ 2730 iommu_feature_enable(iommu, CONTROL_IRTCACHEDIS); 2731 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 2732 ctrl &= (1ULL << CONTROL_IRTCACHEDIS); 2733 if (ctrl) 2734 iommu->irtcachedis_enabled = true; 2735 pr_info("iommu%d (%#06x) : IRT cache is %s\n", 2736 iommu->index, iommu->devid, 2737 iommu->irtcachedis_enabled ? "disabled" : "enabled"); 2738 } 2739 2740 static void iommu_enable_2k_int(struct amd_iommu *iommu) 2741 { 2742 if (!FEATURE_NUM_INT_REMAP_SUP_2K(amd_iommu_efr2)) 2743 return; 2744 2745 iommu_feature_set(iommu, 2746 CONTROL_NUM_INT_REMAP_MODE_2K, 2747 CONTROL_NUM_INT_REMAP_MODE_MASK, 2748 CONTROL_NUM_INT_REMAP_MODE); 2749 } 2750 2751 static void early_enable_iommu(struct amd_iommu *iommu) 2752 { 2753 iommu_disable(iommu); 2754 iommu_init_flags(iommu); 2755 iommu_set_device_table(iommu); 2756 iommu_enable_command_buffer(iommu); 2757 iommu_enable_event_buffer(iommu); 2758 iommu_set_exclusion_range(iommu); 2759 iommu_enable_gt(iommu); 2760 iommu_enable_ga(iommu); 2761 iommu_enable_xt(iommu); 2762 iommu_enable_irtcachedis(iommu); 2763 iommu_enable_2k_int(iommu); 2764 iommu_enable(iommu); 2765 amd_iommu_flush_all_caches(iommu); 2766 } 2767 2768 /* 2769 * This function finally enables all IOMMUs found in the system after 2770 * they have been initialized. 2771 * 2772 * Or if in kdump kernel and IOMMUs are all pre-enabled, try to copy 2773 * the old content of device table entries. Not this case or copy failed, 2774 * just continue as normal kernel does. 2775 */ 2776 static void early_enable_iommus(void) 2777 { 2778 struct amd_iommu *iommu; 2779 struct amd_iommu_pci_seg *pci_seg; 2780 2781 if (!copy_device_table()) { 2782 /* 2783 * If come here because of failure in copying device table from old 2784 * kernel with all IOMMUs enabled, print error message and try to 2785 * free allocated old_dev_tbl_cpy. 2786 */ 2787 if (amd_iommu_pre_enabled) 2788 pr_err("Failed to copy DEV table from previous kernel.\n"); 2789 2790 for_each_pci_segment(pci_seg) { 2791 if (pci_seg->old_dev_tbl_cpy != NULL) { 2792 iommu_free_pages(pci_seg->old_dev_tbl_cpy, 2793 get_order(pci_seg->dev_table_size)); 2794 pci_seg->old_dev_tbl_cpy = NULL; 2795 } 2796 } 2797 2798 for_each_iommu(iommu) { 2799 clear_translation_pre_enabled(iommu); 2800 early_enable_iommu(iommu); 2801 } 2802 } else { 2803 pr_info("Copied DEV table from previous kernel.\n"); 2804 2805 for_each_pci_segment(pci_seg) { 2806 iommu_free_pages(pci_seg->dev_table, 2807 get_order(pci_seg->dev_table_size)); 2808 pci_seg->dev_table = pci_seg->old_dev_tbl_cpy; 2809 } 2810 2811 for_each_iommu(iommu) { 2812 iommu_disable_command_buffer(iommu); 2813 iommu_disable_event_buffer(iommu); 2814 iommu_disable_irtcachedis(iommu); 2815 iommu_enable_command_buffer(iommu); 2816 iommu_enable_event_buffer(iommu); 2817 iommu_enable_ga(iommu); 2818 iommu_enable_xt(iommu); 2819 iommu_enable_irtcachedis(iommu); 2820 iommu_enable_2k_int(iommu); 2821 iommu_set_device_table(iommu); 2822 amd_iommu_flush_all_caches(iommu); 2823 } 2824 } 2825 } 2826 2827 static void enable_iommus_ppr(void) 2828 { 2829 struct amd_iommu *iommu; 2830 2831 if (!amd_iommu_gt_ppr_supported()) 2832 return; 2833 2834 for_each_iommu(iommu) 2835 amd_iommu_enable_ppr_log(iommu); 2836 } 2837 2838 static void enable_iommus_vapic(void) 2839 { 2840 #ifdef CONFIG_IRQ_REMAP 2841 u32 status, i; 2842 struct amd_iommu *iommu; 2843 2844 for_each_iommu(iommu) { 2845 /* 2846 * Disable GALog if already running. It could have been enabled 2847 * in the previous boot before kdump. 2848 */ 2849 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 2850 if (!(status & MMIO_STATUS_GALOG_RUN_MASK)) 2851 continue; 2852 2853 iommu_feature_disable(iommu, CONTROL_GALOG_EN); 2854 iommu_feature_disable(iommu, CONTROL_GAINT_EN); 2855 2856 /* 2857 * Need to set and poll check the GALOGRun bit to zero before 2858 * we can set/ modify GA Log registers safely. 2859 */ 2860 for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) { 2861 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 2862 if (!(status & MMIO_STATUS_GALOG_RUN_MASK)) 2863 break; 2864 udelay(10); 2865 } 2866 2867 if (WARN_ON(i >= MMIO_STATUS_TIMEOUT)) 2868 return; 2869 } 2870 2871 if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) && 2872 !check_feature(FEATURE_GAM_VAPIC)) { 2873 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; 2874 return; 2875 } 2876 2877 if (amd_iommu_snp_en && 2878 !FEATURE_SNPAVICSUP_GAM(amd_iommu_efr2)) { 2879 pr_warn("Force to disable Virtual APIC due to SNP\n"); 2880 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; 2881 return; 2882 } 2883 2884 /* Enabling GAM and SNPAVIC support */ 2885 for_each_iommu(iommu) { 2886 if (iommu_init_ga_log(iommu) || 2887 iommu_ga_log_enable(iommu)) 2888 return; 2889 2890 iommu_feature_enable(iommu, CONTROL_GAM_EN); 2891 if (amd_iommu_snp_en) 2892 iommu_feature_enable(iommu, CONTROL_SNPAVIC_EN); 2893 } 2894 2895 amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP); 2896 pr_info("Virtual APIC enabled\n"); 2897 #endif 2898 } 2899 2900 static void disable_iommus(void) 2901 { 2902 struct amd_iommu *iommu; 2903 2904 for_each_iommu(iommu) 2905 iommu_disable(iommu); 2906 2907 #ifdef CONFIG_IRQ_REMAP 2908 if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) 2909 amd_iommu_irq_ops.capability &= ~(1 << IRQ_POSTING_CAP); 2910 #endif 2911 } 2912 2913 /* 2914 * Suspend/Resume support 2915 * disable suspend until real resume implemented 2916 */ 2917 2918 static void amd_iommu_resume(void) 2919 { 2920 struct amd_iommu *iommu; 2921 2922 for_each_iommu(iommu) 2923 iommu_apply_resume_quirks(iommu); 2924 2925 /* re-load the hardware */ 2926 for_each_iommu(iommu) 2927 early_enable_iommu(iommu); 2928 2929 amd_iommu_enable_interrupts(); 2930 } 2931 2932 static int amd_iommu_suspend(void) 2933 { 2934 /* disable IOMMUs to go out of the way for BIOS */ 2935 disable_iommus(); 2936 2937 return 0; 2938 } 2939 2940 static struct syscore_ops amd_iommu_syscore_ops = { 2941 .suspend = amd_iommu_suspend, 2942 .resume = amd_iommu_resume, 2943 }; 2944 2945 static void __init free_iommu_resources(void) 2946 { 2947 free_iommu_all(); 2948 free_pci_segments(); 2949 } 2950 2951 /* SB IOAPIC is always on this device in AMD systems */ 2952 #define IOAPIC_SB_DEVID ((0x00 << 8) | PCI_DEVFN(0x14, 0)) 2953 2954 static bool __init check_ioapic_information(void) 2955 { 2956 const char *fw_bug = FW_BUG; 2957 bool ret, has_sb_ioapic; 2958 int idx; 2959 2960 has_sb_ioapic = false; 2961 ret = false; 2962 2963 /* 2964 * If we have map overrides on the kernel command line the 2965 * messages in this function might not describe firmware bugs 2966 * anymore - so be careful 2967 */ 2968 if (cmdline_maps) 2969 fw_bug = ""; 2970 2971 for (idx = 0; idx < nr_ioapics; idx++) { 2972 int devid, id = mpc_ioapic_id(idx); 2973 2974 devid = get_ioapic_devid(id); 2975 if (devid < 0) { 2976 pr_err("%s: IOAPIC[%d] not in IVRS table\n", 2977 fw_bug, id); 2978 ret = false; 2979 } else if (devid == IOAPIC_SB_DEVID) { 2980 has_sb_ioapic = true; 2981 ret = true; 2982 } 2983 } 2984 2985 if (!has_sb_ioapic) { 2986 /* 2987 * We expect the SB IOAPIC to be listed in the IVRS 2988 * table. The system timer is connected to the SB IOAPIC 2989 * and if we don't have it in the list the system will 2990 * panic at boot time. This situation usually happens 2991 * when the BIOS is buggy and provides us the wrong 2992 * device id for the IOAPIC in the system. 2993 */ 2994 pr_err("%s: No southbridge IOAPIC found\n", fw_bug); 2995 } 2996 2997 if (!ret) 2998 pr_err("Disabling interrupt remapping\n"); 2999 3000 return ret; 3001 } 3002 3003 static void __init free_dma_resources(void) 3004 { 3005 ida_destroy(&pdom_ids); 3006 3007 free_unity_maps(); 3008 } 3009 3010 static void __init ivinfo_init(void *ivrs) 3011 { 3012 amd_iommu_ivinfo = *((u32 *)(ivrs + IOMMU_IVINFO_OFFSET)); 3013 } 3014 3015 /* 3016 * This is the hardware init function for AMD IOMMU in the system. 3017 * This function is called either from amd_iommu_init or from the interrupt 3018 * remapping setup code. 3019 * 3020 * This function basically parses the ACPI table for AMD IOMMU (IVRS) 3021 * four times: 3022 * 3023 * 1 pass) Discover the most comprehensive IVHD type to use. 3024 * 3025 * 2 pass) Find the highest PCI device id the driver has to handle. 3026 * Upon this information the size of the data structures is 3027 * determined that needs to be allocated. 3028 * 3029 * 3 pass) Initialize the data structures just allocated with the 3030 * information in the ACPI table about available AMD IOMMUs 3031 * in the system. It also maps the PCI devices in the 3032 * system to specific IOMMUs 3033 * 3034 * 4 pass) After the basic data structures are allocated and 3035 * initialized we update them with information about memory 3036 * remapping requirements parsed out of the ACPI table in 3037 * this last pass. 3038 * 3039 * After everything is set up the IOMMUs are enabled and the necessary 3040 * hotplug and suspend notifiers are registered. 3041 */ 3042 static int __init early_amd_iommu_init(void) 3043 { 3044 struct acpi_table_header *ivrs_base; 3045 int ret; 3046 acpi_status status; 3047 3048 if (!amd_iommu_detected) 3049 return -ENODEV; 3050 3051 status = acpi_get_table("IVRS", 0, &ivrs_base); 3052 if (status == AE_NOT_FOUND) 3053 return -ENODEV; 3054 else if (ACPI_FAILURE(status)) { 3055 const char *err = acpi_format_exception(status); 3056 pr_err("IVRS table error: %s\n", err); 3057 return -EINVAL; 3058 } 3059 3060 if (!boot_cpu_has(X86_FEATURE_CX16)) { 3061 pr_err("Failed to initialize. The CMPXCHG16B feature is required.\n"); 3062 return -EINVAL; 3063 } 3064 3065 /* 3066 * Validate checksum here so we don't need to do it when 3067 * we actually parse the table 3068 */ 3069 ret = check_ivrs_checksum(ivrs_base); 3070 if (ret) 3071 goto out; 3072 3073 ivinfo_init(ivrs_base); 3074 3075 amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base); 3076 DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type); 3077 3078 /* 3079 * now the data structures are allocated and basically initialized 3080 * start the real acpi table scan 3081 */ 3082 ret = init_iommu_all(ivrs_base); 3083 if (ret) 3084 goto out; 3085 3086 /* 5 level guest page table */ 3087 if (cpu_feature_enabled(X86_FEATURE_LA57) && 3088 FIELD_GET(FEATURE_GATS, amd_iommu_efr) == GUEST_PGTABLE_5_LEVEL) 3089 amd_iommu_gpt_level = PAGE_MODE_5_LEVEL; 3090 3091 if (amd_iommu_pgtable == PD_MODE_V2) { 3092 if (!amd_iommu_v2_pgtbl_supported()) { 3093 pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n"); 3094 amd_iommu_pgtable = PD_MODE_V1; 3095 } 3096 } 3097 3098 /* Disable any previously enabled IOMMUs */ 3099 if (!is_kdump_kernel() || amd_iommu_disabled) 3100 disable_iommus(); 3101 3102 if (amd_iommu_irq_remap) 3103 amd_iommu_irq_remap = check_ioapic_information(); 3104 3105 if (amd_iommu_irq_remap) { 3106 struct amd_iommu_pci_seg *pci_seg; 3107 ret = -ENOMEM; 3108 for_each_pci_segment(pci_seg) { 3109 if (alloc_irq_lookup_table(pci_seg)) 3110 goto out; 3111 } 3112 } 3113 3114 ret = init_memory_definitions(ivrs_base); 3115 if (ret) 3116 goto out; 3117 3118 /* init the device table */ 3119 init_device_table(); 3120 3121 out: 3122 /* Don't leak any ACPI memory */ 3123 acpi_put_table(ivrs_base); 3124 3125 return ret; 3126 } 3127 3128 static int amd_iommu_enable_interrupts(void) 3129 { 3130 struct amd_iommu *iommu; 3131 int ret = 0; 3132 3133 for_each_iommu(iommu) { 3134 ret = iommu_init_irq(iommu); 3135 if (ret) 3136 goto out; 3137 } 3138 3139 /* 3140 * Interrupt handler is ready to process interrupts. Enable 3141 * PPR and GA log interrupt for all IOMMUs. 3142 */ 3143 enable_iommus_vapic(); 3144 enable_iommus_ppr(); 3145 3146 out: 3147 return ret; 3148 } 3149 3150 static bool __init detect_ivrs(void) 3151 { 3152 struct acpi_table_header *ivrs_base; 3153 acpi_status status; 3154 int i; 3155 3156 status = acpi_get_table("IVRS", 0, &ivrs_base); 3157 if (status == AE_NOT_FOUND) 3158 return false; 3159 else if (ACPI_FAILURE(status)) { 3160 const char *err = acpi_format_exception(status); 3161 pr_err("IVRS table error: %s\n", err); 3162 return false; 3163 } 3164 3165 acpi_put_table(ivrs_base); 3166 3167 if (amd_iommu_force_enable) 3168 goto out; 3169 3170 /* Don't use IOMMU if there is Stoney Ridge graphics */ 3171 for (i = 0; i < 32; i++) { 3172 u32 pci_id; 3173 3174 pci_id = read_pci_config(0, i, 0, 0); 3175 if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) { 3176 pr_info("Disable IOMMU on Stoney Ridge\n"); 3177 return false; 3178 } 3179 } 3180 3181 out: 3182 /* Make sure ACS will be enabled during PCI probe */ 3183 pci_request_acs(); 3184 3185 return true; 3186 } 3187 3188 static __init void iommu_snp_enable(void) 3189 { 3190 #ifdef CONFIG_KVM_AMD_SEV 3191 if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) 3192 return; 3193 /* 3194 * The SNP support requires that IOMMU must be enabled, and is 3195 * configured with V1 page table (DTE[Mode] = 0 is not supported). 3196 */ 3197 if (no_iommu || iommu_default_passthrough()) { 3198 pr_warn("SNP: IOMMU disabled or configured in passthrough mode, SNP cannot be supported.\n"); 3199 goto disable_snp; 3200 } 3201 3202 if (amd_iommu_pgtable != PD_MODE_V1) { 3203 pr_warn("SNP: IOMMU is configured with V2 page table mode, SNP cannot be supported.\n"); 3204 goto disable_snp; 3205 } 3206 3207 amd_iommu_snp_en = check_feature(FEATURE_SNP); 3208 if (!amd_iommu_snp_en) { 3209 pr_warn("SNP: IOMMU SNP feature not enabled, SNP cannot be supported.\n"); 3210 goto disable_snp; 3211 } 3212 3213 /* 3214 * Enable host SNP support once SNP support is checked on IOMMU. 3215 */ 3216 if (snp_rmptable_init()) { 3217 pr_warn("SNP: RMP initialization failed, SNP cannot be supported.\n"); 3218 goto disable_snp; 3219 } 3220 3221 pr_info("IOMMU SNP support enabled.\n"); 3222 return; 3223 3224 disable_snp: 3225 cc_platform_clear(CC_ATTR_HOST_SEV_SNP); 3226 #endif 3227 } 3228 3229 /**************************************************************************** 3230 * 3231 * AMD IOMMU Initialization State Machine 3232 * 3233 ****************************************************************************/ 3234 3235 static int __init state_next(void) 3236 { 3237 int ret = 0; 3238 3239 switch (init_state) { 3240 case IOMMU_START_STATE: 3241 if (!detect_ivrs()) { 3242 init_state = IOMMU_NOT_FOUND; 3243 ret = -ENODEV; 3244 } else { 3245 init_state = IOMMU_IVRS_DETECTED; 3246 } 3247 break; 3248 case IOMMU_IVRS_DETECTED: 3249 if (amd_iommu_disabled) { 3250 init_state = IOMMU_CMDLINE_DISABLED; 3251 ret = -EINVAL; 3252 } else { 3253 ret = early_amd_iommu_init(); 3254 init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED; 3255 } 3256 break; 3257 case IOMMU_ACPI_FINISHED: 3258 early_enable_iommus(); 3259 x86_platform.iommu_shutdown = disable_iommus; 3260 init_state = IOMMU_ENABLED; 3261 break; 3262 case IOMMU_ENABLED: 3263 register_syscore_ops(&amd_iommu_syscore_ops); 3264 iommu_snp_enable(); 3265 ret = amd_iommu_init_pci(); 3266 init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT; 3267 break; 3268 case IOMMU_PCI_INIT: 3269 ret = amd_iommu_enable_interrupts(); 3270 init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN; 3271 break; 3272 case IOMMU_INTERRUPTS_EN: 3273 init_state = IOMMU_INITIALIZED; 3274 break; 3275 case IOMMU_INITIALIZED: 3276 /* Nothing to do */ 3277 break; 3278 case IOMMU_NOT_FOUND: 3279 case IOMMU_INIT_ERROR: 3280 case IOMMU_CMDLINE_DISABLED: 3281 /* Error states => do nothing */ 3282 ret = -EINVAL; 3283 break; 3284 default: 3285 /* Unknown state */ 3286 BUG(); 3287 } 3288 3289 if (ret) { 3290 free_dma_resources(); 3291 if (!irq_remapping_enabled) { 3292 disable_iommus(); 3293 free_iommu_resources(); 3294 } else { 3295 struct amd_iommu *iommu; 3296 struct amd_iommu_pci_seg *pci_seg; 3297 3298 for_each_pci_segment(pci_seg) 3299 uninit_device_table_dma(pci_seg); 3300 3301 for_each_iommu(iommu) 3302 amd_iommu_flush_all_caches(iommu); 3303 } 3304 } 3305 return ret; 3306 } 3307 3308 static int __init iommu_go_to_state(enum iommu_init_state state) 3309 { 3310 int ret = -EINVAL; 3311 3312 while (init_state != state) { 3313 if (init_state == IOMMU_NOT_FOUND || 3314 init_state == IOMMU_INIT_ERROR || 3315 init_state == IOMMU_CMDLINE_DISABLED) 3316 break; 3317 ret = state_next(); 3318 } 3319 3320 /* 3321 * SNP platform initilazation requires IOMMUs to be fully configured. 3322 * If the SNP support on IOMMUs has NOT been checked, simply mark SNP 3323 * as unsupported. If the SNP support on IOMMUs has been checked and 3324 * host SNP support enabled but RMP enforcement has not been enabled 3325 * in IOMMUs, then the system is in a half-baked state, but can limp 3326 * along as all memory should be Hypervisor-Owned in the RMP. WARN, 3327 * but leave SNP as "supported" to avoid confusing the kernel. 3328 */ 3329 if (ret && cc_platform_has(CC_ATTR_HOST_SEV_SNP) && 3330 !WARN_ON_ONCE(amd_iommu_snp_en)) 3331 cc_platform_clear(CC_ATTR_HOST_SEV_SNP); 3332 3333 return ret; 3334 } 3335 3336 #ifdef CONFIG_IRQ_REMAP 3337 int __init amd_iommu_prepare(void) 3338 { 3339 int ret; 3340 3341 amd_iommu_irq_remap = true; 3342 3343 ret = iommu_go_to_state(IOMMU_ACPI_FINISHED); 3344 if (ret) { 3345 amd_iommu_irq_remap = false; 3346 return ret; 3347 } 3348 3349 return amd_iommu_irq_remap ? 0 : -ENODEV; 3350 } 3351 3352 int __init amd_iommu_enable(void) 3353 { 3354 int ret; 3355 3356 ret = iommu_go_to_state(IOMMU_ENABLED); 3357 if (ret) 3358 return ret; 3359 3360 irq_remapping_enabled = 1; 3361 return amd_iommu_xt_mode; 3362 } 3363 3364 void amd_iommu_disable(void) 3365 { 3366 amd_iommu_suspend(); 3367 } 3368 3369 int amd_iommu_reenable(int mode) 3370 { 3371 amd_iommu_resume(); 3372 3373 return 0; 3374 } 3375 3376 int amd_iommu_enable_faulting(unsigned int cpu) 3377 { 3378 /* We enable MSI later when PCI is initialized */ 3379 return 0; 3380 } 3381 #endif 3382 3383 /* 3384 * This is the core init function for AMD IOMMU hardware in the system. 3385 * This function is called from the generic x86 DMA layer initialization 3386 * code. 3387 */ 3388 static int __init amd_iommu_init(void) 3389 { 3390 struct amd_iommu *iommu; 3391 int ret; 3392 3393 ret = iommu_go_to_state(IOMMU_INITIALIZED); 3394 #ifdef CONFIG_GART_IOMMU 3395 if (ret && list_empty(&amd_iommu_list)) { 3396 /* 3397 * We failed to initialize the AMD IOMMU - try fallback 3398 * to GART if possible. 3399 */ 3400 gart_iommu_init(); 3401 } 3402 #endif 3403 3404 for_each_iommu(iommu) 3405 amd_iommu_debugfs_setup(iommu); 3406 3407 return ret; 3408 } 3409 3410 static bool amd_iommu_sme_check(void) 3411 { 3412 if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) || 3413 (boot_cpu_data.x86 != 0x17)) 3414 return true; 3415 3416 /* For Fam17h, a specific level of support is required */ 3417 if (boot_cpu_data.microcode >= 0x08001205) 3418 return true; 3419 3420 if ((boot_cpu_data.microcode >= 0x08001126) && 3421 (boot_cpu_data.microcode <= 0x080011ff)) 3422 return true; 3423 3424 pr_notice("IOMMU not currently supported when SME is active\n"); 3425 3426 return false; 3427 } 3428 3429 /**************************************************************************** 3430 * 3431 * Early detect code. This code runs at IOMMU detection time in the DMA 3432 * layer. It just looks if there is an IVRS ACPI table to detect AMD 3433 * IOMMUs 3434 * 3435 ****************************************************************************/ 3436 void __init amd_iommu_detect(void) 3437 { 3438 int ret; 3439 3440 if (no_iommu || (iommu_detected && !gart_iommu_aperture)) 3441 goto disable_snp; 3442 3443 if (!amd_iommu_sme_check()) 3444 goto disable_snp; 3445 3446 ret = iommu_go_to_state(IOMMU_IVRS_DETECTED); 3447 if (ret) 3448 goto disable_snp; 3449 3450 amd_iommu_detected = true; 3451 iommu_detected = 1; 3452 x86_init.iommu.iommu_init = amd_iommu_init; 3453 return; 3454 3455 disable_snp: 3456 if (cc_platform_has(CC_ATTR_HOST_SEV_SNP)) 3457 cc_platform_clear(CC_ATTR_HOST_SEV_SNP); 3458 } 3459 3460 /**************************************************************************** 3461 * 3462 * Parsing functions for the AMD IOMMU specific kernel command line 3463 * options. 3464 * 3465 ****************************************************************************/ 3466 3467 static int __init parse_amd_iommu_dump(char *str) 3468 { 3469 amd_iommu_dump = true; 3470 3471 return 1; 3472 } 3473 3474 static int __init parse_amd_iommu_intr(char *str) 3475 { 3476 for (; *str; ++str) { 3477 if (strncmp(str, "legacy", 6) == 0) { 3478 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; 3479 break; 3480 } 3481 if (strncmp(str, "vapic", 5) == 0) { 3482 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC; 3483 break; 3484 } 3485 } 3486 return 1; 3487 } 3488 3489 static int __init parse_amd_iommu_options(char *str) 3490 { 3491 if (!str) 3492 return -EINVAL; 3493 3494 while (*str) { 3495 if (strncmp(str, "fullflush", 9) == 0) { 3496 pr_warn("amd_iommu=fullflush deprecated; use iommu.strict=1 instead\n"); 3497 iommu_set_dma_strict(); 3498 } else if (strncmp(str, "force_enable", 12) == 0) { 3499 amd_iommu_force_enable = true; 3500 } else if (strncmp(str, "off", 3) == 0) { 3501 amd_iommu_disabled = true; 3502 } else if (strncmp(str, "force_isolation", 15) == 0) { 3503 amd_iommu_force_isolation = true; 3504 } else if (strncmp(str, "pgtbl_v1", 8) == 0) { 3505 amd_iommu_pgtable = PD_MODE_V1; 3506 } else if (strncmp(str, "pgtbl_v2", 8) == 0) { 3507 amd_iommu_pgtable = PD_MODE_V2; 3508 } else if (strncmp(str, "irtcachedis", 11) == 0) { 3509 amd_iommu_irtcachedis = true; 3510 } else if (strncmp(str, "nohugepages", 11) == 0) { 3511 pr_info("Restricting V1 page-sizes to 4KiB"); 3512 amd_iommu_pgsize_bitmap = AMD_IOMMU_PGSIZES_4K; 3513 } else if (strncmp(str, "v2_pgsizes_only", 15) == 0) { 3514 pr_info("Restricting V1 page-sizes to 4KiB/2MiB/1GiB"); 3515 amd_iommu_pgsize_bitmap = AMD_IOMMU_PGSIZES_V2; 3516 } else { 3517 pr_notice("Unknown option - '%s'\n", str); 3518 } 3519 3520 str += strcspn(str, ","); 3521 while (*str == ',') 3522 str++; 3523 } 3524 3525 return 1; 3526 } 3527 3528 static int __init parse_ivrs_ioapic(char *str) 3529 { 3530 u32 seg = 0, bus, dev, fn; 3531 int id, i; 3532 u32 devid; 3533 3534 if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 || 3535 sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) 3536 goto found; 3537 3538 if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 || 3539 sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) { 3540 pr_warn("ivrs_ioapic%s option format deprecated; use ivrs_ioapic=%d@%04x:%02x:%02x.%d instead\n", 3541 str, id, seg, bus, dev, fn); 3542 goto found; 3543 } 3544 3545 pr_err("Invalid command line: ivrs_ioapic%s\n", str); 3546 return 1; 3547 3548 found: 3549 if (early_ioapic_map_size == EARLY_MAP_SIZE) { 3550 pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n", 3551 str); 3552 return 1; 3553 } 3554 3555 devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn); 3556 3557 cmdline_maps = true; 3558 i = early_ioapic_map_size++; 3559 early_ioapic_map[i].id = id; 3560 early_ioapic_map[i].devid = devid; 3561 early_ioapic_map[i].cmd_line = true; 3562 3563 return 1; 3564 } 3565 3566 static int __init parse_ivrs_hpet(char *str) 3567 { 3568 u32 seg = 0, bus, dev, fn; 3569 int id, i; 3570 u32 devid; 3571 3572 if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 || 3573 sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) 3574 goto found; 3575 3576 if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 || 3577 sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) { 3578 pr_warn("ivrs_hpet%s option format deprecated; use ivrs_hpet=%d@%04x:%02x:%02x.%d instead\n", 3579 str, id, seg, bus, dev, fn); 3580 goto found; 3581 } 3582 3583 pr_err("Invalid command line: ivrs_hpet%s\n", str); 3584 return 1; 3585 3586 found: 3587 if (early_hpet_map_size == EARLY_MAP_SIZE) { 3588 pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n", 3589 str); 3590 return 1; 3591 } 3592 3593 devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn); 3594 3595 cmdline_maps = true; 3596 i = early_hpet_map_size++; 3597 early_hpet_map[i].id = id; 3598 early_hpet_map[i].devid = devid; 3599 early_hpet_map[i].cmd_line = true; 3600 3601 return 1; 3602 } 3603 3604 #define ACPIID_LEN (ACPIHID_UID_LEN + ACPIHID_HID_LEN) 3605 3606 static int __init parse_ivrs_acpihid(char *str) 3607 { 3608 u32 seg = 0, bus, dev, fn; 3609 char *hid, *uid, *p, *addr; 3610 char acpiid[ACPIID_LEN] = {0}; 3611 int i; 3612 3613 addr = strchr(str, '@'); 3614 if (!addr) { 3615 addr = strchr(str, '='); 3616 if (!addr) 3617 goto not_found; 3618 3619 ++addr; 3620 3621 if (strlen(addr) > ACPIID_LEN) 3622 goto not_found; 3623 3624 if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 || 3625 sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) { 3626 pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n", 3627 str, acpiid, seg, bus, dev, fn); 3628 goto found; 3629 } 3630 goto not_found; 3631 } 3632 3633 /* We have the '@', make it the terminator to get just the acpiid */ 3634 *addr++ = 0; 3635 3636 if (strlen(str) > ACPIID_LEN + 1) 3637 goto not_found; 3638 3639 if (sscanf(str, "=%s", acpiid) != 1) 3640 goto not_found; 3641 3642 if (sscanf(addr, "%x:%x.%x", &bus, &dev, &fn) == 3 || 3643 sscanf(addr, "%x:%x:%x.%x", &seg, &bus, &dev, &fn) == 4) 3644 goto found; 3645 3646 not_found: 3647 pr_err("Invalid command line: ivrs_acpihid%s\n", str); 3648 return 1; 3649 3650 found: 3651 p = acpiid; 3652 hid = strsep(&p, ":"); 3653 uid = p; 3654 3655 if (!hid || !(*hid) || !uid) { 3656 pr_err("Invalid command line: hid or uid\n"); 3657 return 1; 3658 } 3659 3660 /* 3661 * Ignore leading zeroes after ':', so e.g., AMDI0095:00 3662 * will match AMDI0095:0 in the second strcmp in acpi_dev_hid_uid_match 3663 */ 3664 while (*uid == '0' && *(uid + 1)) 3665 uid++; 3666 3667 i = early_acpihid_map_size++; 3668 memcpy(early_acpihid_map[i].hid, hid, strlen(hid)); 3669 memcpy(early_acpihid_map[i].uid, uid, strlen(uid)); 3670 early_acpihid_map[i].devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn); 3671 early_acpihid_map[i].cmd_line = true; 3672 3673 return 1; 3674 } 3675 3676 __setup("amd_iommu_dump", parse_amd_iommu_dump); 3677 __setup("amd_iommu=", parse_amd_iommu_options); 3678 __setup("amd_iommu_intr=", parse_amd_iommu_intr); 3679 __setup("ivrs_ioapic", parse_ivrs_ioapic); 3680 __setup("ivrs_hpet", parse_ivrs_hpet); 3681 __setup("ivrs_acpihid", parse_ivrs_acpihid); 3682 3683 bool amd_iommu_pasid_supported(void) 3684 { 3685 /* CPU page table size should match IOMMU guest page table size */ 3686 if (cpu_feature_enabled(X86_FEATURE_LA57) && 3687 amd_iommu_gpt_level != PAGE_MODE_5_LEVEL) 3688 return false; 3689 3690 /* 3691 * Since DTE[Mode]=0 is prohibited on SNP-enabled system 3692 * (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without 3693 * setting up IOMMUv1 page table. 3694 */ 3695 return amd_iommu_gt_ppr_supported() && !amd_iommu_snp_en; 3696 } 3697 3698 struct amd_iommu *get_amd_iommu(unsigned int idx) 3699 { 3700 unsigned int i = 0; 3701 struct amd_iommu *iommu; 3702 3703 for_each_iommu(iommu) 3704 if (i++ == idx) 3705 return iommu; 3706 return NULL; 3707 } 3708 3709 /**************************************************************************** 3710 * 3711 * IOMMU EFR Performance Counter support functionality. This code allows 3712 * access to the IOMMU PC functionality. 3713 * 3714 ****************************************************************************/ 3715 3716 u8 amd_iommu_pc_get_max_banks(unsigned int idx) 3717 { 3718 struct amd_iommu *iommu = get_amd_iommu(idx); 3719 3720 if (iommu) 3721 return iommu->max_banks; 3722 3723 return 0; 3724 } 3725 3726 bool amd_iommu_pc_supported(void) 3727 { 3728 return amd_iommu_pc_present; 3729 } 3730 3731 u8 amd_iommu_pc_get_max_counters(unsigned int idx) 3732 { 3733 struct amd_iommu *iommu = get_amd_iommu(idx); 3734 3735 if (iommu) 3736 return iommu->max_counters; 3737 3738 return 0; 3739 } 3740 3741 static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, 3742 u8 fxn, u64 *value, bool is_write) 3743 { 3744 u32 offset; 3745 u32 max_offset_lim; 3746 3747 /* Make sure the IOMMU PC resource is available */ 3748 if (!amd_iommu_pc_present) 3749 return -ENODEV; 3750 3751 /* Check for valid iommu and pc register indexing */ 3752 if (WARN_ON(!iommu || (fxn > 0x28) || (fxn & 7))) 3753 return -ENODEV; 3754 3755 offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn); 3756 3757 /* Limit the offset to the hw defined mmio region aperture */ 3758 max_offset_lim = (u32)(((0x40 | iommu->max_banks) << 12) | 3759 (iommu->max_counters << 8) | 0x28); 3760 if ((offset < MMIO_CNTR_REG_OFFSET) || 3761 (offset > max_offset_lim)) 3762 return -EINVAL; 3763 3764 if (is_write) { 3765 u64 val = *value & GENMASK_ULL(47, 0); 3766 3767 writel((u32)val, iommu->mmio_base + offset); 3768 writel((val >> 32), iommu->mmio_base + offset + 4); 3769 } else { 3770 *value = readl(iommu->mmio_base + offset + 4); 3771 *value <<= 32; 3772 *value |= readl(iommu->mmio_base + offset); 3773 *value &= GENMASK_ULL(47, 0); 3774 } 3775 3776 return 0; 3777 } 3778 3779 int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value) 3780 { 3781 if (!iommu) 3782 return -EINVAL; 3783 3784 return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false); 3785 } 3786 3787 int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value) 3788 { 3789 if (!iommu) 3790 return -EINVAL; 3791 3792 return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true); 3793 } 3794 3795 #ifdef CONFIG_KVM_AMD_SEV 3796 static int iommu_page_make_shared(void *page) 3797 { 3798 unsigned long paddr, pfn; 3799 3800 paddr = iommu_virt_to_phys(page); 3801 /* Cbit maybe set in the paddr */ 3802 pfn = __sme_clr(paddr) >> PAGE_SHIFT; 3803 3804 if (!(pfn % PTRS_PER_PMD)) { 3805 int ret, level; 3806 bool assigned; 3807 3808 ret = snp_lookup_rmpentry(pfn, &assigned, &level); 3809 if (ret) { 3810 pr_warn("IOMMU PFN %lx RMP lookup failed, ret %d\n", pfn, ret); 3811 return ret; 3812 } 3813 3814 if (!assigned) { 3815 pr_warn("IOMMU PFN %lx not assigned in RMP table\n", pfn); 3816 return -EINVAL; 3817 } 3818 3819 if (level > PG_LEVEL_4K) { 3820 ret = psmash(pfn); 3821 if (!ret) 3822 goto done; 3823 3824 pr_warn("PSMASH failed for IOMMU PFN %lx huge RMP entry, ret: %d, level: %d\n", 3825 pfn, ret, level); 3826 return ret; 3827 } 3828 } 3829 3830 done: 3831 return rmp_make_shared(pfn, PG_LEVEL_4K); 3832 } 3833 3834 static int iommu_make_shared(void *va, size_t size) 3835 { 3836 void *page; 3837 int ret; 3838 3839 if (!va) 3840 return 0; 3841 3842 for (page = va; page < (va + size); page += PAGE_SIZE) { 3843 ret = iommu_page_make_shared(page); 3844 if (ret) 3845 return ret; 3846 } 3847 3848 return 0; 3849 } 3850 3851 int amd_iommu_snp_disable(void) 3852 { 3853 struct amd_iommu *iommu; 3854 int ret; 3855 3856 if (!amd_iommu_snp_en) 3857 return 0; 3858 3859 for_each_iommu(iommu) { 3860 ret = iommu_make_shared(iommu->evt_buf, EVT_BUFFER_SIZE); 3861 if (ret) 3862 return ret; 3863 3864 ret = iommu_make_shared(iommu->ppr_log, PPR_LOG_SIZE); 3865 if (ret) 3866 return ret; 3867 3868 ret = iommu_make_shared((void *)iommu->cmd_sem, PAGE_SIZE); 3869 if (ret) 3870 return ret; 3871 } 3872 3873 return 0; 3874 } 3875 EXPORT_SYMBOL_GPL(amd_iommu_snp_disable); 3876 #endif 3877