1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. 4 * Author: Joerg Roedel <jroedel@suse.de> 5 * Leo Duran <leo.duran@amd.com> 6 */ 7 8 #define pr_fmt(fmt) "AMD-Vi: " fmt 9 #define dev_fmt(fmt) pr_fmt(fmt) 10 11 #include <linux/pci.h> 12 #include <linux/acpi.h> 13 #include <linux/list.h> 14 #include <linux/bitmap.h> 15 #include <linux/slab.h> 16 #include <linux/syscore_ops.h> 17 #include <linux/interrupt.h> 18 #include <linux/msi.h> 19 #include <linux/irq.h> 20 #include <linux/amd-iommu.h> 21 #include <linux/export.h> 22 #include <linux/kmemleak.h> 23 #include <linux/cc_platform.h> 24 #include <linux/iopoll.h> 25 #include <asm/pci-direct.h> 26 #include <asm/iommu.h> 27 #include <asm/apic.h> 28 #include <asm/gart.h> 29 #include <asm/x86_init.h> 30 #include <asm/io_apic.h> 31 #include <asm/irq_remapping.h> 32 #include <asm/set_memory.h> 33 #include <asm/sev.h> 34 35 #include <linux/crash_dump.h> 36 37 #include "amd_iommu.h" 38 #include "../irq_remapping.h" 39 #include "../iommu-pages.h" 40 41 /* 42 * definitions for the ACPI scanning code 43 */ 44 #define IVRS_HEADER_LENGTH 48 45 46 #define ACPI_IVHD_TYPE_MAX_SUPPORTED 0x40 47 #define ACPI_IVMD_TYPE_ALL 0x20 48 #define ACPI_IVMD_TYPE 0x21 49 #define ACPI_IVMD_TYPE_RANGE 0x22 50 51 #define IVHD_DEV_ALL 0x01 52 #define IVHD_DEV_SELECT 0x02 53 #define IVHD_DEV_SELECT_RANGE_START 0x03 54 #define IVHD_DEV_RANGE_END 0x04 55 #define IVHD_DEV_ALIAS 0x42 56 #define IVHD_DEV_ALIAS_RANGE 0x43 57 #define IVHD_DEV_EXT_SELECT 0x46 58 #define IVHD_DEV_EXT_SELECT_RANGE 0x47 59 #define IVHD_DEV_SPECIAL 0x48 60 #define IVHD_DEV_ACPI_HID 0xf0 61 62 #define UID_NOT_PRESENT 0 63 #define UID_IS_INTEGER 1 64 #define UID_IS_CHARACTER 2 65 66 #define IVHD_SPECIAL_IOAPIC 1 67 #define IVHD_SPECIAL_HPET 2 68 69 #define IVHD_FLAG_HT_TUN_EN_MASK 0x01 70 #define IVHD_FLAG_PASSPW_EN_MASK 0x02 71 #define IVHD_FLAG_RESPASSPW_EN_MASK 0x04 72 #define IVHD_FLAG_ISOC_EN_MASK 0x08 73 74 #define IVMD_FLAG_EXCL_RANGE 0x08 75 #define IVMD_FLAG_IW 0x04 76 #define IVMD_FLAG_IR 0x02 77 #define IVMD_FLAG_UNITY_MAP 0x01 78 79 #define ACPI_DEVFLAG_INITPASS 0x01 80 #define ACPI_DEVFLAG_EXTINT 0x02 81 #define ACPI_DEVFLAG_NMI 0x04 82 #define ACPI_DEVFLAG_SYSMGT1 0x10 83 #define ACPI_DEVFLAG_SYSMGT2 0x20 84 #define ACPI_DEVFLAG_LINT0 0x40 85 #define ACPI_DEVFLAG_LINT1 0x80 86 #define ACPI_DEVFLAG_ATSDIS 0x10000000 87 88 #define IVRS_GET_SBDF_ID(seg, bus, dev, fn) (((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \ 89 | ((dev & 0x1f) << 3) | (fn & 0x7)) 90 91 /* 92 * ACPI table definitions 93 * 94 * These data structures are laid over the table to parse the important values 95 * out of it. 96 */ 97 98 /* 99 * structure describing one IOMMU in the ACPI table. Typically followed by one 100 * or more ivhd_entrys. 101 */ 102 struct ivhd_header { 103 u8 type; 104 u8 flags; 105 u16 length; 106 u16 devid; 107 u16 cap_ptr; 108 u64 mmio_phys; 109 u16 pci_seg; 110 u16 info; 111 u32 efr_attr; 112 113 /* Following only valid on IVHD type 11h and 40h */ 114 u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */ 115 u64 efr_reg2; 116 } __attribute__((packed)); 117 118 /* 119 * A device entry describing which devices a specific IOMMU translates and 120 * which requestor ids they use. 121 */ 122 struct ivhd_entry { 123 u8 type; 124 u16 devid; 125 u8 flags; 126 struct_group(ext_hid, 127 u32 ext; 128 u32 hidh; 129 ); 130 u64 cid; 131 u8 uidf; 132 u8 uidl; 133 u8 uid; 134 } __attribute__((packed)); 135 136 /* 137 * An AMD IOMMU memory definition structure. It defines things like exclusion 138 * ranges for devices and regions that should be unity mapped. 139 */ 140 struct ivmd_header { 141 u8 type; 142 u8 flags; 143 u16 length; 144 u16 devid; 145 u16 aux; 146 u16 pci_seg; 147 u8 resv[6]; 148 u64 range_start; 149 u64 range_length; 150 } __attribute__((packed)); 151 152 bool amd_iommu_dump; 153 bool amd_iommu_irq_remap __read_mostly; 154 155 enum io_pgtable_fmt amd_iommu_pgtable = AMD_IOMMU_V1; 156 /* Guest page table level */ 157 int amd_iommu_gpt_level = PAGE_MODE_4_LEVEL; 158 159 int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC; 160 static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE; 161 162 static bool amd_iommu_detected; 163 static bool amd_iommu_disabled __initdata; 164 static bool amd_iommu_force_enable __initdata; 165 static bool amd_iommu_irtcachedis; 166 static int amd_iommu_target_ivhd_type; 167 168 /* Global EFR and EFR2 registers */ 169 u64 amd_iommu_efr; 170 u64 amd_iommu_efr2; 171 172 /* SNP is enabled on the system? */ 173 bool amd_iommu_snp_en; 174 EXPORT_SYMBOL(amd_iommu_snp_en); 175 176 LIST_HEAD(amd_iommu_pci_seg_list); /* list of all PCI segments */ 177 LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the 178 system */ 179 180 /* Array to assign indices to IOMMUs*/ 181 struct amd_iommu *amd_iommus[MAX_IOMMUS]; 182 183 /* Number of IOMMUs present in the system */ 184 static int amd_iommus_present; 185 186 /* IOMMUs have a non-present cache? */ 187 bool amd_iommu_np_cache __read_mostly; 188 bool amd_iommu_iotlb_sup __read_mostly = true; 189 190 static bool amd_iommu_pc_present __read_mostly; 191 bool amdr_ivrs_remap_support __read_mostly; 192 193 bool amd_iommu_force_isolation __read_mostly; 194 195 /* 196 * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap 197 * to know which ones are already in use. 198 */ 199 unsigned long *amd_iommu_pd_alloc_bitmap; 200 201 enum iommu_init_state { 202 IOMMU_START_STATE, 203 IOMMU_IVRS_DETECTED, 204 IOMMU_ACPI_FINISHED, 205 IOMMU_ENABLED, 206 IOMMU_PCI_INIT, 207 IOMMU_INTERRUPTS_EN, 208 IOMMU_INITIALIZED, 209 IOMMU_NOT_FOUND, 210 IOMMU_INIT_ERROR, 211 IOMMU_CMDLINE_DISABLED, 212 }; 213 214 /* Early ioapic and hpet maps from kernel command line */ 215 #define EARLY_MAP_SIZE 4 216 static struct devid_map __initdata early_ioapic_map[EARLY_MAP_SIZE]; 217 static struct devid_map __initdata early_hpet_map[EARLY_MAP_SIZE]; 218 static struct acpihid_map_entry __initdata early_acpihid_map[EARLY_MAP_SIZE]; 219 220 static int __initdata early_ioapic_map_size; 221 static int __initdata early_hpet_map_size; 222 static int __initdata early_acpihid_map_size; 223 224 static bool __initdata cmdline_maps; 225 226 static enum iommu_init_state init_state = IOMMU_START_STATE; 227 228 static int amd_iommu_enable_interrupts(void); 229 static int __init iommu_go_to_state(enum iommu_init_state state); 230 static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg); 231 232 static bool amd_iommu_pre_enabled = true; 233 234 static u32 amd_iommu_ivinfo __initdata; 235 236 bool translation_pre_enabled(struct amd_iommu *iommu) 237 { 238 return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED); 239 } 240 241 static void clear_translation_pre_enabled(struct amd_iommu *iommu) 242 { 243 iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED; 244 } 245 246 static void init_translation_status(struct amd_iommu *iommu) 247 { 248 u64 ctrl; 249 250 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 251 if (ctrl & (1<<CONTROL_IOMMU_EN)) 252 iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED; 253 } 254 255 static inline unsigned long tbl_size(int entry_size, int last_bdf) 256 { 257 unsigned shift = PAGE_SHIFT + 258 get_order((last_bdf + 1) * entry_size); 259 260 return 1UL << shift; 261 } 262 263 int amd_iommu_get_num_iommus(void) 264 { 265 return amd_iommus_present; 266 } 267 268 /* 269 * Iterate through all the IOMMUs to get common EFR 270 * masks among all IOMMUs and warn if found inconsistency. 271 */ 272 static __init void get_global_efr(void) 273 { 274 struct amd_iommu *iommu; 275 276 for_each_iommu(iommu) { 277 u64 tmp = iommu->features; 278 u64 tmp2 = iommu->features2; 279 280 if (list_is_first(&iommu->list, &amd_iommu_list)) { 281 amd_iommu_efr = tmp; 282 amd_iommu_efr2 = tmp2; 283 continue; 284 } 285 286 if (amd_iommu_efr == tmp && 287 amd_iommu_efr2 == tmp2) 288 continue; 289 290 pr_err(FW_BUG 291 "Found inconsistent EFR/EFR2 %#llx,%#llx (global %#llx,%#llx) on iommu%d (%04x:%02x:%02x.%01x).\n", 292 tmp, tmp2, amd_iommu_efr, amd_iommu_efr2, 293 iommu->index, iommu->pci_seg->id, 294 PCI_BUS_NUM(iommu->devid), PCI_SLOT(iommu->devid), 295 PCI_FUNC(iommu->devid)); 296 297 amd_iommu_efr &= tmp; 298 amd_iommu_efr2 &= tmp2; 299 } 300 301 pr_info("Using global IVHD EFR:%#llx, EFR2:%#llx\n", amd_iommu_efr, amd_iommu_efr2); 302 } 303 304 /* 305 * For IVHD type 0x11/0x40, EFR is also available via IVHD. 306 * Default to IVHD EFR since it is available sooner 307 * (i.e. before PCI init). 308 */ 309 static void __init early_iommu_features_init(struct amd_iommu *iommu, 310 struct ivhd_header *h) 311 { 312 if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP) { 313 iommu->features = h->efr_reg; 314 iommu->features2 = h->efr_reg2; 315 } 316 if (amd_iommu_ivinfo & IOMMU_IVINFO_DMA_REMAP) 317 amdr_ivrs_remap_support = true; 318 } 319 320 /* Access to l1 and l2 indexed register spaces */ 321 322 static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address) 323 { 324 u32 val; 325 326 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); 327 pci_read_config_dword(iommu->dev, 0xfc, &val); 328 return val; 329 } 330 331 static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val) 332 { 333 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31)); 334 pci_write_config_dword(iommu->dev, 0xfc, val); 335 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); 336 } 337 338 static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address) 339 { 340 u32 val; 341 342 pci_write_config_dword(iommu->dev, 0xf0, address); 343 pci_read_config_dword(iommu->dev, 0xf4, &val); 344 return val; 345 } 346 347 static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val) 348 { 349 pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8)); 350 pci_write_config_dword(iommu->dev, 0xf4, val); 351 } 352 353 /**************************************************************************** 354 * 355 * AMD IOMMU MMIO register space handling functions 356 * 357 * These functions are used to program the IOMMU device registers in 358 * MMIO space required for that driver. 359 * 360 ****************************************************************************/ 361 362 /* 363 * This function set the exclusion range in the IOMMU. DMA accesses to the 364 * exclusion range are passed through untranslated 365 */ 366 static void iommu_set_exclusion_range(struct amd_iommu *iommu) 367 { 368 u64 start = iommu->exclusion_start & PAGE_MASK; 369 u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK; 370 u64 entry; 371 372 if (!iommu->exclusion_start) 373 return; 374 375 entry = start | MMIO_EXCL_ENABLE_MASK; 376 memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET, 377 &entry, sizeof(entry)); 378 379 entry = limit; 380 memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET, 381 &entry, sizeof(entry)); 382 } 383 384 static void iommu_set_cwwb_range(struct amd_iommu *iommu) 385 { 386 u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem); 387 u64 entry = start & PM_ADDR_MASK; 388 389 if (!check_feature(FEATURE_SNP)) 390 return; 391 392 /* Note: 393 * Re-purpose Exclusion base/limit registers for Completion wait 394 * write-back base/limit. 395 */ 396 memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET, 397 &entry, sizeof(entry)); 398 399 /* Note: 400 * Default to 4 Kbytes, which can be specified by setting base 401 * address equal to the limit address. 402 */ 403 memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET, 404 &entry, sizeof(entry)); 405 } 406 407 /* Programs the physical address of the device table into the IOMMU hardware */ 408 static void iommu_set_device_table(struct amd_iommu *iommu) 409 { 410 u64 entry; 411 u32 dev_table_size = iommu->pci_seg->dev_table_size; 412 void *dev_table = (void *)get_dev_table(iommu); 413 414 BUG_ON(iommu->mmio_base == NULL); 415 416 entry = iommu_virt_to_phys(dev_table); 417 entry |= (dev_table_size >> 12) - 1; 418 memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET, 419 &entry, sizeof(entry)); 420 } 421 422 /* Generic functions to enable/disable certain features of the IOMMU. */ 423 void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) 424 { 425 u64 ctrl; 426 427 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 428 ctrl |= (1ULL << bit); 429 writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); 430 } 431 432 static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit) 433 { 434 u64 ctrl; 435 436 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 437 ctrl &= ~(1ULL << bit); 438 writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); 439 } 440 441 static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout) 442 { 443 u64 ctrl; 444 445 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 446 ctrl &= ~CTRL_INV_TO_MASK; 447 ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK; 448 writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); 449 } 450 451 /* Function to enable the hardware */ 452 static void iommu_enable(struct amd_iommu *iommu) 453 { 454 iommu_feature_enable(iommu, CONTROL_IOMMU_EN); 455 } 456 457 static void iommu_disable(struct amd_iommu *iommu) 458 { 459 if (!iommu->mmio_base) 460 return; 461 462 /* Disable command buffer */ 463 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); 464 465 /* Disable event logging and event interrupts */ 466 iommu_feature_disable(iommu, CONTROL_EVT_INT_EN); 467 iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN); 468 469 /* Disable IOMMU GA_LOG */ 470 iommu_feature_disable(iommu, CONTROL_GALOG_EN); 471 iommu_feature_disable(iommu, CONTROL_GAINT_EN); 472 473 /* Disable IOMMU PPR logging */ 474 iommu_feature_disable(iommu, CONTROL_PPRLOG_EN); 475 iommu_feature_disable(iommu, CONTROL_PPRINT_EN); 476 477 /* Disable IOMMU hardware itself */ 478 iommu_feature_disable(iommu, CONTROL_IOMMU_EN); 479 480 /* Clear IRTE cache disabling bit */ 481 iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS); 482 } 483 484 /* 485 * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in 486 * the system has one. 487 */ 488 static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end) 489 { 490 if (!request_mem_region(address, end, "amd_iommu")) { 491 pr_err("Can not reserve memory region %llx-%llx for mmio\n", 492 address, end); 493 pr_err("This is a BIOS bug. Please contact your hardware vendor\n"); 494 return NULL; 495 } 496 497 return (u8 __iomem *)ioremap(address, end); 498 } 499 500 static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) 501 { 502 if (iommu->mmio_base) 503 iounmap(iommu->mmio_base); 504 release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end); 505 } 506 507 static inline u32 get_ivhd_header_size(struct ivhd_header *h) 508 { 509 u32 size = 0; 510 511 switch (h->type) { 512 case 0x10: 513 size = 24; 514 break; 515 case 0x11: 516 case 0x40: 517 size = 40; 518 break; 519 } 520 return size; 521 } 522 523 /**************************************************************************** 524 * 525 * The functions below belong to the first pass of AMD IOMMU ACPI table 526 * parsing. In this pass we try to find out the highest device id this 527 * code has to handle. Upon this information the size of the shared data 528 * structures is determined later. 529 * 530 ****************************************************************************/ 531 532 /* 533 * This function calculates the length of a given IVHD entry 534 */ 535 static inline int ivhd_entry_length(u8 *ivhd) 536 { 537 u32 type = ((struct ivhd_entry *)ivhd)->type; 538 539 if (type < 0x80) { 540 return 0x04 << (*ivhd >> 6); 541 } else if (type == IVHD_DEV_ACPI_HID) { 542 /* For ACPI_HID, offset 21 is uid len */ 543 return *((u8 *)ivhd + 21) + 22; 544 } 545 return 0; 546 } 547 548 /* 549 * After reading the highest device id from the IOMMU PCI capability header 550 * this function looks if there is a higher device id defined in the ACPI table 551 */ 552 static int __init find_last_devid_from_ivhd(struct ivhd_header *h) 553 { 554 u8 *p = (void *)h, *end = (void *)h; 555 struct ivhd_entry *dev; 556 int last_devid = -EINVAL; 557 558 u32 ivhd_size = get_ivhd_header_size(h); 559 560 if (!ivhd_size) { 561 pr_err("Unsupported IVHD type %#x\n", h->type); 562 return -EINVAL; 563 } 564 565 p += ivhd_size; 566 end += h->length; 567 568 while (p < end) { 569 dev = (struct ivhd_entry *)p; 570 switch (dev->type) { 571 case IVHD_DEV_ALL: 572 /* Use maximum BDF value for DEV_ALL */ 573 return 0xffff; 574 case IVHD_DEV_SELECT: 575 case IVHD_DEV_RANGE_END: 576 case IVHD_DEV_ALIAS: 577 case IVHD_DEV_EXT_SELECT: 578 /* all the above subfield types refer to device ids */ 579 if (dev->devid > last_devid) 580 last_devid = dev->devid; 581 break; 582 default: 583 break; 584 } 585 p += ivhd_entry_length(p); 586 } 587 588 WARN_ON(p != end); 589 590 return last_devid; 591 } 592 593 static int __init check_ivrs_checksum(struct acpi_table_header *table) 594 { 595 int i; 596 u8 checksum = 0, *p = (u8 *)table; 597 598 for (i = 0; i < table->length; ++i) 599 checksum += p[i]; 600 if (checksum != 0) { 601 /* ACPI table corrupt */ 602 pr_err(FW_BUG "IVRS invalid checksum\n"); 603 return -ENODEV; 604 } 605 606 return 0; 607 } 608 609 /* 610 * Iterate over all IVHD entries in the ACPI table and find the highest device 611 * id which we need to handle. This is the first of three functions which parse 612 * the ACPI table. So we check the checksum here. 613 */ 614 static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_seg) 615 { 616 u8 *p = (u8 *)table, *end = (u8 *)table; 617 struct ivhd_header *h; 618 int last_devid, last_bdf = 0; 619 620 p += IVRS_HEADER_LENGTH; 621 622 end += table->length; 623 while (p < end) { 624 h = (struct ivhd_header *)p; 625 if (h->pci_seg == pci_seg && 626 h->type == amd_iommu_target_ivhd_type) { 627 last_devid = find_last_devid_from_ivhd(h); 628 629 if (last_devid < 0) 630 return -EINVAL; 631 if (last_devid > last_bdf) 632 last_bdf = last_devid; 633 } 634 p += h->length; 635 } 636 WARN_ON(p != end); 637 638 return last_bdf; 639 } 640 641 /**************************************************************************** 642 * 643 * The following functions belong to the code path which parses the ACPI table 644 * the second time. In this ACPI parsing iteration we allocate IOMMU specific 645 * data structures, initialize the per PCI segment device/alias/rlookup table 646 * and also basically initialize the hardware. 647 * 648 ****************************************************************************/ 649 650 /* Allocate per PCI segment device table */ 651 static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg) 652 { 653 pci_seg->dev_table = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32, 654 get_order(pci_seg->dev_table_size)); 655 if (!pci_seg->dev_table) 656 return -ENOMEM; 657 658 return 0; 659 } 660 661 static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg) 662 { 663 iommu_free_pages(pci_seg->dev_table, 664 get_order(pci_seg->dev_table_size)); 665 pci_seg->dev_table = NULL; 666 } 667 668 /* Allocate per PCI segment IOMMU rlookup table. */ 669 static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg) 670 { 671 pci_seg->rlookup_table = iommu_alloc_pages(GFP_KERNEL, 672 get_order(pci_seg->rlookup_table_size)); 673 if (pci_seg->rlookup_table == NULL) 674 return -ENOMEM; 675 676 return 0; 677 } 678 679 static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg) 680 { 681 iommu_free_pages(pci_seg->rlookup_table, 682 get_order(pci_seg->rlookup_table_size)); 683 pci_seg->rlookup_table = NULL; 684 } 685 686 static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) 687 { 688 pci_seg->irq_lookup_table = iommu_alloc_pages(GFP_KERNEL, 689 get_order(pci_seg->rlookup_table_size)); 690 kmemleak_alloc(pci_seg->irq_lookup_table, 691 pci_seg->rlookup_table_size, 1, GFP_KERNEL); 692 if (pci_seg->irq_lookup_table == NULL) 693 return -ENOMEM; 694 695 return 0; 696 } 697 698 static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) 699 { 700 kmemleak_free(pci_seg->irq_lookup_table); 701 iommu_free_pages(pci_seg->irq_lookup_table, 702 get_order(pci_seg->rlookup_table_size)); 703 pci_seg->irq_lookup_table = NULL; 704 } 705 706 static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg) 707 { 708 int i; 709 710 pci_seg->alias_table = iommu_alloc_pages(GFP_KERNEL, 711 get_order(pci_seg->alias_table_size)); 712 if (!pci_seg->alias_table) 713 return -ENOMEM; 714 715 /* 716 * let all alias entries point to itself 717 */ 718 for (i = 0; i <= pci_seg->last_bdf; ++i) 719 pci_seg->alias_table[i] = i; 720 721 return 0; 722 } 723 724 static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg) 725 { 726 iommu_free_pages(pci_seg->alias_table, 727 get_order(pci_seg->alias_table_size)); 728 pci_seg->alias_table = NULL; 729 } 730 731 /* 732 * Allocates the command buffer. This buffer is per AMD IOMMU. We can 733 * write commands to that buffer later and the IOMMU will execute them 734 * asynchronously 735 */ 736 static int __init alloc_command_buffer(struct amd_iommu *iommu) 737 { 738 iommu->cmd_buf = iommu_alloc_pages(GFP_KERNEL, 739 get_order(CMD_BUFFER_SIZE)); 740 741 return iommu->cmd_buf ? 0 : -ENOMEM; 742 } 743 744 /* 745 * Interrupt handler has processed all pending events and adjusted head 746 * and tail pointer. Reset overflow mask and restart logging again. 747 */ 748 void amd_iommu_restart_log(struct amd_iommu *iommu, const char *evt_type, 749 u8 cntrl_intr, u8 cntrl_log, 750 u32 status_run_mask, u32 status_overflow_mask) 751 { 752 u32 status; 753 754 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 755 if (status & status_run_mask) 756 return; 757 758 pr_info_ratelimited("IOMMU %s log restarting\n", evt_type); 759 760 iommu_feature_disable(iommu, cntrl_log); 761 iommu_feature_disable(iommu, cntrl_intr); 762 763 writel(status_overflow_mask, iommu->mmio_base + MMIO_STATUS_OFFSET); 764 765 iommu_feature_enable(iommu, cntrl_intr); 766 iommu_feature_enable(iommu, cntrl_log); 767 } 768 769 /* 770 * This function restarts event logging in case the IOMMU experienced 771 * an event log buffer overflow. 772 */ 773 void amd_iommu_restart_event_logging(struct amd_iommu *iommu) 774 { 775 amd_iommu_restart_log(iommu, "Event", CONTROL_EVT_INT_EN, 776 CONTROL_EVT_LOG_EN, MMIO_STATUS_EVT_RUN_MASK, 777 MMIO_STATUS_EVT_OVERFLOW_MASK); 778 } 779 780 /* 781 * This function restarts event logging in case the IOMMU experienced 782 * GA log overflow. 783 */ 784 void amd_iommu_restart_ga_log(struct amd_iommu *iommu) 785 { 786 amd_iommu_restart_log(iommu, "GA", CONTROL_GAINT_EN, 787 CONTROL_GALOG_EN, MMIO_STATUS_GALOG_RUN_MASK, 788 MMIO_STATUS_GALOG_OVERFLOW_MASK); 789 } 790 791 /* 792 * This function resets the command buffer if the IOMMU stopped fetching 793 * commands from it. 794 */ 795 static void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu) 796 { 797 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); 798 799 writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); 800 writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); 801 iommu->cmd_buf_head = 0; 802 iommu->cmd_buf_tail = 0; 803 804 iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); 805 } 806 807 /* 808 * This function writes the command buffer address to the hardware and 809 * enables it. 810 */ 811 static void iommu_enable_command_buffer(struct amd_iommu *iommu) 812 { 813 u64 entry; 814 815 BUG_ON(iommu->cmd_buf == NULL); 816 817 entry = iommu_virt_to_phys(iommu->cmd_buf); 818 entry |= MMIO_CMD_SIZE_512; 819 820 memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, 821 &entry, sizeof(entry)); 822 823 amd_iommu_reset_cmd_buffer(iommu); 824 } 825 826 /* 827 * This function disables the command buffer 828 */ 829 static void iommu_disable_command_buffer(struct amd_iommu *iommu) 830 { 831 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); 832 } 833 834 static void __init free_command_buffer(struct amd_iommu *iommu) 835 { 836 iommu_free_pages(iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); 837 } 838 839 void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, gfp_t gfp, 840 size_t size) 841 { 842 int order = get_order(size); 843 void *buf = iommu_alloc_pages(gfp, order); 844 845 if (buf && 846 check_feature(FEATURE_SNP) && 847 set_memory_4k((unsigned long)buf, (1 << order))) { 848 iommu_free_pages(buf, order); 849 buf = NULL; 850 } 851 852 return buf; 853 } 854 855 /* allocates the memory where the IOMMU will log its events to */ 856 static int __init alloc_event_buffer(struct amd_iommu *iommu) 857 { 858 iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL, 859 EVT_BUFFER_SIZE); 860 861 return iommu->evt_buf ? 0 : -ENOMEM; 862 } 863 864 static void iommu_enable_event_buffer(struct amd_iommu *iommu) 865 { 866 u64 entry; 867 868 BUG_ON(iommu->evt_buf == NULL); 869 870 entry = iommu_virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; 871 872 memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, 873 &entry, sizeof(entry)); 874 875 /* set head and tail to zero manually */ 876 writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); 877 writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); 878 879 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); 880 } 881 882 /* 883 * This function disables the event log buffer 884 */ 885 static void iommu_disable_event_buffer(struct amd_iommu *iommu) 886 { 887 iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN); 888 } 889 890 static void __init free_event_buffer(struct amd_iommu *iommu) 891 { 892 iommu_free_pages(iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); 893 } 894 895 static void free_ga_log(struct amd_iommu *iommu) 896 { 897 #ifdef CONFIG_IRQ_REMAP 898 iommu_free_pages(iommu->ga_log, get_order(GA_LOG_SIZE)); 899 iommu_free_pages(iommu->ga_log_tail, get_order(8)); 900 #endif 901 } 902 903 #ifdef CONFIG_IRQ_REMAP 904 static int iommu_ga_log_enable(struct amd_iommu *iommu) 905 { 906 u32 status, i; 907 u64 entry; 908 909 if (!iommu->ga_log) 910 return -EINVAL; 911 912 entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512; 913 memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET, 914 &entry, sizeof(entry)); 915 entry = (iommu_virt_to_phys(iommu->ga_log_tail) & 916 (BIT_ULL(52)-1)) & ~7ULL; 917 memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET, 918 &entry, sizeof(entry)); 919 writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET); 920 writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET); 921 922 923 iommu_feature_enable(iommu, CONTROL_GAINT_EN); 924 iommu_feature_enable(iommu, CONTROL_GALOG_EN); 925 926 for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) { 927 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 928 if (status & (MMIO_STATUS_GALOG_RUN_MASK)) 929 break; 930 udelay(10); 931 } 932 933 if (WARN_ON(i >= MMIO_STATUS_TIMEOUT)) 934 return -EINVAL; 935 936 return 0; 937 } 938 939 static int iommu_init_ga_log(struct amd_iommu *iommu) 940 { 941 if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) 942 return 0; 943 944 iommu->ga_log = iommu_alloc_pages(GFP_KERNEL, get_order(GA_LOG_SIZE)); 945 if (!iommu->ga_log) 946 goto err_out; 947 948 iommu->ga_log_tail = iommu_alloc_pages(GFP_KERNEL, get_order(8)); 949 if (!iommu->ga_log_tail) 950 goto err_out; 951 952 return 0; 953 err_out: 954 free_ga_log(iommu); 955 return -EINVAL; 956 } 957 #endif /* CONFIG_IRQ_REMAP */ 958 959 static int __init alloc_cwwb_sem(struct amd_iommu *iommu) 960 { 961 iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL, 1); 962 963 return iommu->cmd_sem ? 0 : -ENOMEM; 964 } 965 966 static void __init free_cwwb_sem(struct amd_iommu *iommu) 967 { 968 if (iommu->cmd_sem) 969 iommu_free_page((void *)iommu->cmd_sem); 970 } 971 972 static void iommu_enable_xt(struct amd_iommu *iommu) 973 { 974 #ifdef CONFIG_IRQ_REMAP 975 /* 976 * XT mode (32-bit APIC destination ID) requires 977 * GA mode (128-bit IRTE support) as a prerequisite. 978 */ 979 if (AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir) && 980 amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 981 iommu_feature_enable(iommu, CONTROL_XT_EN); 982 #endif /* CONFIG_IRQ_REMAP */ 983 } 984 985 static void iommu_enable_gt(struct amd_iommu *iommu) 986 { 987 if (!check_feature(FEATURE_GT)) 988 return; 989 990 iommu_feature_enable(iommu, CONTROL_GT_EN); 991 } 992 993 /* sets a specific bit in the device table entry. */ 994 static void __set_dev_entry_bit(struct dev_table_entry *dev_table, 995 u16 devid, u8 bit) 996 { 997 int i = (bit >> 6) & 0x03; 998 int _bit = bit & 0x3f; 999 1000 dev_table[devid].data[i] |= (1UL << _bit); 1001 } 1002 1003 static void set_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit) 1004 { 1005 struct dev_table_entry *dev_table = get_dev_table(iommu); 1006 1007 return __set_dev_entry_bit(dev_table, devid, bit); 1008 } 1009 1010 static int __get_dev_entry_bit(struct dev_table_entry *dev_table, 1011 u16 devid, u8 bit) 1012 { 1013 int i = (bit >> 6) & 0x03; 1014 int _bit = bit & 0x3f; 1015 1016 return (dev_table[devid].data[i] & (1UL << _bit)) >> _bit; 1017 } 1018 1019 static int get_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit) 1020 { 1021 struct dev_table_entry *dev_table = get_dev_table(iommu); 1022 1023 return __get_dev_entry_bit(dev_table, devid, bit); 1024 } 1025 1026 static bool __copy_device_table(struct amd_iommu *iommu) 1027 { 1028 u64 int_ctl, int_tab_len, entry = 0; 1029 struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; 1030 struct dev_table_entry *old_devtb = NULL; 1031 u32 lo, hi, devid, old_devtb_size; 1032 phys_addr_t old_devtb_phys; 1033 u16 dom_id, dte_v, irq_v; 1034 u64 tmp; 1035 1036 /* Each IOMMU use separate device table with the same size */ 1037 lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET); 1038 hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4); 1039 entry = (((u64) hi) << 32) + lo; 1040 1041 old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12; 1042 if (old_devtb_size != pci_seg->dev_table_size) { 1043 pr_err("The device table size of IOMMU:%d is not expected!\n", 1044 iommu->index); 1045 return false; 1046 } 1047 1048 /* 1049 * When SME is enabled in the first kernel, the entry includes the 1050 * memory encryption mask(sme_me_mask), we must remove the memory 1051 * encryption mask to obtain the true physical address in kdump kernel. 1052 */ 1053 old_devtb_phys = __sme_clr(entry) & PAGE_MASK; 1054 1055 if (old_devtb_phys >= 0x100000000ULL) { 1056 pr_err("The address of old device table is above 4G, not trustworthy!\n"); 1057 return false; 1058 } 1059 old_devtb = (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) && is_kdump_kernel()) 1060 ? (__force void *)ioremap_encrypted(old_devtb_phys, 1061 pci_seg->dev_table_size) 1062 : memremap(old_devtb_phys, pci_seg->dev_table_size, MEMREMAP_WB); 1063 1064 if (!old_devtb) 1065 return false; 1066 1067 pci_seg->old_dev_tbl_cpy = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32, 1068 get_order(pci_seg->dev_table_size)); 1069 if (pci_seg->old_dev_tbl_cpy == NULL) { 1070 pr_err("Failed to allocate memory for copying old device table!\n"); 1071 memunmap(old_devtb); 1072 return false; 1073 } 1074 1075 for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { 1076 pci_seg->old_dev_tbl_cpy[devid] = old_devtb[devid]; 1077 dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK; 1078 dte_v = old_devtb[devid].data[0] & DTE_FLAG_V; 1079 1080 if (dte_v && dom_id) { 1081 pci_seg->old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0]; 1082 pci_seg->old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1]; 1083 __set_bit(dom_id, amd_iommu_pd_alloc_bitmap); 1084 /* If gcr3 table existed, mask it out */ 1085 if (old_devtb[devid].data[0] & DTE_FLAG_GV) { 1086 tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B; 1087 tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C; 1088 pci_seg->old_dev_tbl_cpy[devid].data[1] &= ~tmp; 1089 tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A; 1090 tmp |= DTE_FLAG_GV; 1091 pci_seg->old_dev_tbl_cpy[devid].data[0] &= ~tmp; 1092 } 1093 } 1094 1095 irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE; 1096 int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK; 1097 int_tab_len = old_devtb[devid].data[2] & DTE_INTTABLEN_MASK; 1098 if (irq_v && (int_ctl || int_tab_len)) { 1099 if ((int_ctl != DTE_IRQ_REMAP_INTCTL) || 1100 (int_tab_len != DTE_INTTABLEN)) { 1101 pr_err("Wrong old irq remapping flag: %#x\n", devid); 1102 memunmap(old_devtb); 1103 return false; 1104 } 1105 1106 pci_seg->old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2]; 1107 } 1108 } 1109 memunmap(old_devtb); 1110 1111 return true; 1112 } 1113 1114 static bool copy_device_table(void) 1115 { 1116 struct amd_iommu *iommu; 1117 struct amd_iommu_pci_seg *pci_seg; 1118 1119 if (!amd_iommu_pre_enabled) 1120 return false; 1121 1122 pr_warn("Translation is already enabled - trying to copy translation structures\n"); 1123 1124 /* 1125 * All IOMMUs within PCI segment shares common device table. 1126 * Hence copy device table only once per PCI segment. 1127 */ 1128 for_each_pci_segment(pci_seg) { 1129 for_each_iommu(iommu) { 1130 if (pci_seg->id != iommu->pci_seg->id) 1131 continue; 1132 if (!__copy_device_table(iommu)) 1133 return false; 1134 break; 1135 } 1136 } 1137 1138 return true; 1139 } 1140 1141 void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid) 1142 { 1143 int sysmgt; 1144 1145 sysmgt = get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1) | 1146 (get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2) << 1); 1147 1148 if (sysmgt == 0x01) 1149 set_dev_entry_bit(iommu, devid, DEV_ENTRY_IW); 1150 } 1151 1152 /* 1153 * This function takes the device specific flags read from the ACPI 1154 * table and sets up the device table entry with that information 1155 */ 1156 static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, 1157 u16 devid, u32 flags, u32 ext_flags) 1158 { 1159 if (flags & ACPI_DEVFLAG_INITPASS) 1160 set_dev_entry_bit(iommu, devid, DEV_ENTRY_INIT_PASS); 1161 if (flags & ACPI_DEVFLAG_EXTINT) 1162 set_dev_entry_bit(iommu, devid, DEV_ENTRY_EINT_PASS); 1163 if (flags & ACPI_DEVFLAG_NMI) 1164 set_dev_entry_bit(iommu, devid, DEV_ENTRY_NMI_PASS); 1165 if (flags & ACPI_DEVFLAG_SYSMGT1) 1166 set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1); 1167 if (flags & ACPI_DEVFLAG_SYSMGT2) 1168 set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2); 1169 if (flags & ACPI_DEVFLAG_LINT0) 1170 set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT0_PASS); 1171 if (flags & ACPI_DEVFLAG_LINT1) 1172 set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT1_PASS); 1173 1174 amd_iommu_apply_erratum_63(iommu, devid); 1175 1176 amd_iommu_set_rlookup_table(iommu, devid); 1177 } 1178 1179 int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line) 1180 { 1181 struct devid_map *entry; 1182 struct list_head *list; 1183 1184 if (type == IVHD_SPECIAL_IOAPIC) 1185 list = &ioapic_map; 1186 else if (type == IVHD_SPECIAL_HPET) 1187 list = &hpet_map; 1188 else 1189 return -EINVAL; 1190 1191 list_for_each_entry(entry, list, list) { 1192 if (!(entry->id == id && entry->cmd_line)) 1193 continue; 1194 1195 pr_info("Command-line override present for %s id %d - ignoring\n", 1196 type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id); 1197 1198 *devid = entry->devid; 1199 1200 return 0; 1201 } 1202 1203 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 1204 if (!entry) 1205 return -ENOMEM; 1206 1207 entry->id = id; 1208 entry->devid = *devid; 1209 entry->cmd_line = cmd_line; 1210 1211 list_add_tail(&entry->list, list); 1212 1213 return 0; 1214 } 1215 1216 static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u32 *devid, 1217 bool cmd_line) 1218 { 1219 struct acpihid_map_entry *entry; 1220 struct list_head *list = &acpihid_map; 1221 1222 list_for_each_entry(entry, list, list) { 1223 if (strcmp(entry->hid, hid) || 1224 (*uid && *entry->uid && strcmp(entry->uid, uid)) || 1225 !entry->cmd_line) 1226 continue; 1227 1228 pr_info("Command-line override for hid:%s uid:%s\n", 1229 hid, uid); 1230 *devid = entry->devid; 1231 return 0; 1232 } 1233 1234 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 1235 if (!entry) 1236 return -ENOMEM; 1237 1238 memcpy(entry->uid, uid, strlen(uid)); 1239 memcpy(entry->hid, hid, strlen(hid)); 1240 entry->devid = *devid; 1241 entry->cmd_line = cmd_line; 1242 entry->root_devid = (entry->devid & (~0x7)); 1243 1244 pr_info("%s, add hid:%s, uid:%s, rdevid:%d\n", 1245 entry->cmd_line ? "cmd" : "ivrs", 1246 entry->hid, entry->uid, entry->root_devid); 1247 1248 list_add_tail(&entry->list, list); 1249 return 0; 1250 } 1251 1252 static int __init add_early_maps(void) 1253 { 1254 int i, ret; 1255 1256 for (i = 0; i < early_ioapic_map_size; ++i) { 1257 ret = add_special_device(IVHD_SPECIAL_IOAPIC, 1258 early_ioapic_map[i].id, 1259 &early_ioapic_map[i].devid, 1260 early_ioapic_map[i].cmd_line); 1261 if (ret) 1262 return ret; 1263 } 1264 1265 for (i = 0; i < early_hpet_map_size; ++i) { 1266 ret = add_special_device(IVHD_SPECIAL_HPET, 1267 early_hpet_map[i].id, 1268 &early_hpet_map[i].devid, 1269 early_hpet_map[i].cmd_line); 1270 if (ret) 1271 return ret; 1272 } 1273 1274 for (i = 0; i < early_acpihid_map_size; ++i) { 1275 ret = add_acpi_hid_device(early_acpihid_map[i].hid, 1276 early_acpihid_map[i].uid, 1277 &early_acpihid_map[i].devid, 1278 early_acpihid_map[i].cmd_line); 1279 if (ret) 1280 return ret; 1281 } 1282 1283 return 0; 1284 } 1285 1286 /* 1287 * Takes a pointer to an AMD IOMMU entry in the ACPI table and 1288 * initializes the hardware and our data structures with it. 1289 */ 1290 static int __init init_iommu_from_acpi(struct amd_iommu *iommu, 1291 struct ivhd_header *h) 1292 { 1293 u8 *p = (u8 *)h; 1294 u8 *end = p, flags = 0; 1295 u16 devid = 0, devid_start = 0, devid_to = 0, seg_id; 1296 u32 dev_i, ext_flags = 0; 1297 bool alias = false; 1298 struct ivhd_entry *e; 1299 struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; 1300 u32 ivhd_size; 1301 int ret; 1302 1303 1304 ret = add_early_maps(); 1305 if (ret) 1306 return ret; 1307 1308 amd_iommu_apply_ivrs_quirks(); 1309 1310 /* 1311 * First save the recommended feature enable bits from ACPI 1312 */ 1313 iommu->acpi_flags = h->flags; 1314 1315 /* 1316 * Done. Now parse the device entries 1317 */ 1318 ivhd_size = get_ivhd_header_size(h); 1319 if (!ivhd_size) { 1320 pr_err("Unsupported IVHD type %#x\n", h->type); 1321 return -EINVAL; 1322 } 1323 1324 p += ivhd_size; 1325 1326 end += h->length; 1327 1328 1329 while (p < end) { 1330 e = (struct ivhd_entry *)p; 1331 seg_id = pci_seg->id; 1332 1333 switch (e->type) { 1334 case IVHD_DEV_ALL: 1335 1336 DUMP_printk(" DEV_ALL\t\t\tflags: %02x\n", e->flags); 1337 1338 for (dev_i = 0; dev_i <= pci_seg->last_bdf; ++dev_i) 1339 set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0); 1340 break; 1341 case IVHD_DEV_SELECT: 1342 1343 DUMP_printk(" DEV_SELECT\t\t\t devid: %04x:%02x:%02x.%x " 1344 "flags: %02x\n", 1345 seg_id, PCI_BUS_NUM(e->devid), 1346 PCI_SLOT(e->devid), 1347 PCI_FUNC(e->devid), 1348 e->flags); 1349 1350 devid = e->devid; 1351 set_dev_entry_from_acpi(iommu, devid, e->flags, 0); 1352 break; 1353 case IVHD_DEV_SELECT_RANGE_START: 1354 1355 DUMP_printk(" DEV_SELECT_RANGE_START\t " 1356 "devid: %04x:%02x:%02x.%x flags: %02x\n", 1357 seg_id, PCI_BUS_NUM(e->devid), 1358 PCI_SLOT(e->devid), 1359 PCI_FUNC(e->devid), 1360 e->flags); 1361 1362 devid_start = e->devid; 1363 flags = e->flags; 1364 ext_flags = 0; 1365 alias = false; 1366 break; 1367 case IVHD_DEV_ALIAS: 1368 1369 DUMP_printk(" DEV_ALIAS\t\t\t devid: %04x:%02x:%02x.%x " 1370 "flags: %02x devid_to: %02x:%02x.%x\n", 1371 seg_id, PCI_BUS_NUM(e->devid), 1372 PCI_SLOT(e->devid), 1373 PCI_FUNC(e->devid), 1374 e->flags, 1375 PCI_BUS_NUM(e->ext >> 8), 1376 PCI_SLOT(e->ext >> 8), 1377 PCI_FUNC(e->ext >> 8)); 1378 1379 devid = e->devid; 1380 devid_to = e->ext >> 8; 1381 set_dev_entry_from_acpi(iommu, devid , e->flags, 0); 1382 set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0); 1383 pci_seg->alias_table[devid] = devid_to; 1384 break; 1385 case IVHD_DEV_ALIAS_RANGE: 1386 1387 DUMP_printk(" DEV_ALIAS_RANGE\t\t " 1388 "devid: %04x:%02x:%02x.%x flags: %02x " 1389 "devid_to: %04x:%02x:%02x.%x\n", 1390 seg_id, PCI_BUS_NUM(e->devid), 1391 PCI_SLOT(e->devid), 1392 PCI_FUNC(e->devid), 1393 e->flags, 1394 seg_id, PCI_BUS_NUM(e->ext >> 8), 1395 PCI_SLOT(e->ext >> 8), 1396 PCI_FUNC(e->ext >> 8)); 1397 1398 devid_start = e->devid; 1399 flags = e->flags; 1400 devid_to = e->ext >> 8; 1401 ext_flags = 0; 1402 alias = true; 1403 break; 1404 case IVHD_DEV_EXT_SELECT: 1405 1406 DUMP_printk(" DEV_EXT_SELECT\t\t devid: %04x:%02x:%02x.%x " 1407 "flags: %02x ext: %08x\n", 1408 seg_id, PCI_BUS_NUM(e->devid), 1409 PCI_SLOT(e->devid), 1410 PCI_FUNC(e->devid), 1411 e->flags, e->ext); 1412 1413 devid = e->devid; 1414 set_dev_entry_from_acpi(iommu, devid, e->flags, 1415 e->ext); 1416 break; 1417 case IVHD_DEV_EXT_SELECT_RANGE: 1418 1419 DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: " 1420 "%04x:%02x:%02x.%x flags: %02x ext: %08x\n", 1421 seg_id, PCI_BUS_NUM(e->devid), 1422 PCI_SLOT(e->devid), 1423 PCI_FUNC(e->devid), 1424 e->flags, e->ext); 1425 1426 devid_start = e->devid; 1427 flags = e->flags; 1428 ext_flags = e->ext; 1429 alias = false; 1430 break; 1431 case IVHD_DEV_RANGE_END: 1432 1433 DUMP_printk(" DEV_RANGE_END\t\t devid: %04x:%02x:%02x.%x\n", 1434 seg_id, PCI_BUS_NUM(e->devid), 1435 PCI_SLOT(e->devid), 1436 PCI_FUNC(e->devid)); 1437 1438 devid = e->devid; 1439 for (dev_i = devid_start; dev_i <= devid; ++dev_i) { 1440 if (alias) { 1441 pci_seg->alias_table[dev_i] = devid_to; 1442 set_dev_entry_from_acpi(iommu, 1443 devid_to, flags, ext_flags); 1444 } 1445 set_dev_entry_from_acpi(iommu, dev_i, 1446 flags, ext_flags); 1447 } 1448 break; 1449 case IVHD_DEV_SPECIAL: { 1450 u8 handle, type; 1451 const char *var; 1452 u32 devid; 1453 int ret; 1454 1455 handle = e->ext & 0xff; 1456 devid = PCI_SEG_DEVID_TO_SBDF(seg_id, (e->ext >> 8)); 1457 type = (e->ext >> 24) & 0xff; 1458 1459 if (type == IVHD_SPECIAL_IOAPIC) 1460 var = "IOAPIC"; 1461 else if (type == IVHD_SPECIAL_HPET) 1462 var = "HPET"; 1463 else 1464 var = "UNKNOWN"; 1465 1466 DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x\n", 1467 var, (int)handle, 1468 seg_id, PCI_BUS_NUM(devid), 1469 PCI_SLOT(devid), 1470 PCI_FUNC(devid)); 1471 1472 ret = add_special_device(type, handle, &devid, false); 1473 if (ret) 1474 return ret; 1475 1476 /* 1477 * add_special_device might update the devid in case a 1478 * command-line override is present. So call 1479 * set_dev_entry_from_acpi after add_special_device. 1480 */ 1481 set_dev_entry_from_acpi(iommu, devid, e->flags, 0); 1482 1483 break; 1484 } 1485 case IVHD_DEV_ACPI_HID: { 1486 u32 devid; 1487 u8 hid[ACPIHID_HID_LEN]; 1488 u8 uid[ACPIHID_UID_LEN]; 1489 int ret; 1490 1491 if (h->type != 0x40) { 1492 pr_err(FW_BUG "Invalid IVHD device type %#x\n", 1493 e->type); 1494 break; 1495 } 1496 1497 BUILD_BUG_ON(sizeof(e->ext_hid) != ACPIHID_HID_LEN - 1); 1498 memcpy(hid, &e->ext_hid, ACPIHID_HID_LEN - 1); 1499 hid[ACPIHID_HID_LEN - 1] = '\0'; 1500 1501 if (!(*hid)) { 1502 pr_err(FW_BUG "Invalid HID.\n"); 1503 break; 1504 } 1505 1506 uid[0] = '\0'; 1507 switch (e->uidf) { 1508 case UID_NOT_PRESENT: 1509 1510 if (e->uidl != 0) 1511 pr_warn(FW_BUG "Invalid UID length.\n"); 1512 1513 break; 1514 case UID_IS_INTEGER: 1515 1516 sprintf(uid, "%d", e->uid); 1517 1518 break; 1519 case UID_IS_CHARACTER: 1520 1521 memcpy(uid, &e->uid, e->uidl); 1522 uid[e->uidl] = '\0'; 1523 1524 break; 1525 default: 1526 break; 1527 } 1528 1529 devid = PCI_SEG_DEVID_TO_SBDF(seg_id, e->devid); 1530 DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x\n", 1531 hid, uid, seg_id, 1532 PCI_BUS_NUM(devid), 1533 PCI_SLOT(devid), 1534 PCI_FUNC(devid)); 1535 1536 flags = e->flags; 1537 1538 ret = add_acpi_hid_device(hid, uid, &devid, false); 1539 if (ret) 1540 return ret; 1541 1542 /* 1543 * add_special_device might update the devid in case a 1544 * command-line override is present. So call 1545 * set_dev_entry_from_acpi after add_special_device. 1546 */ 1547 set_dev_entry_from_acpi(iommu, devid, e->flags, 0); 1548 1549 break; 1550 } 1551 default: 1552 break; 1553 } 1554 1555 p += ivhd_entry_length(p); 1556 } 1557 1558 return 0; 1559 } 1560 1561 /* Allocate PCI segment data structure */ 1562 static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id, 1563 struct acpi_table_header *ivrs_base) 1564 { 1565 struct amd_iommu_pci_seg *pci_seg; 1566 int last_bdf; 1567 1568 /* 1569 * First parse ACPI tables to find the largest Bus/Dev/Func we need to 1570 * handle in this PCI segment. Upon this information the shared data 1571 * structures for the PCI segments in the system will be allocated. 1572 */ 1573 last_bdf = find_last_devid_acpi(ivrs_base, id); 1574 if (last_bdf < 0) 1575 return NULL; 1576 1577 pci_seg = kzalloc(sizeof(struct amd_iommu_pci_seg), GFP_KERNEL); 1578 if (pci_seg == NULL) 1579 return NULL; 1580 1581 pci_seg->last_bdf = last_bdf; 1582 DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf); 1583 pci_seg->dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE, last_bdf); 1584 pci_seg->alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE, last_bdf); 1585 pci_seg->rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE, last_bdf); 1586 1587 pci_seg->id = id; 1588 init_llist_head(&pci_seg->dev_data_list); 1589 INIT_LIST_HEAD(&pci_seg->unity_map); 1590 list_add_tail(&pci_seg->list, &amd_iommu_pci_seg_list); 1591 1592 if (alloc_dev_table(pci_seg)) 1593 return NULL; 1594 if (alloc_alias_table(pci_seg)) 1595 return NULL; 1596 if (alloc_rlookup_table(pci_seg)) 1597 return NULL; 1598 1599 return pci_seg; 1600 } 1601 1602 static struct amd_iommu_pci_seg *__init get_pci_segment(u16 id, 1603 struct acpi_table_header *ivrs_base) 1604 { 1605 struct amd_iommu_pci_seg *pci_seg; 1606 1607 for_each_pci_segment(pci_seg) { 1608 if (pci_seg->id == id) 1609 return pci_seg; 1610 } 1611 1612 return alloc_pci_segment(id, ivrs_base); 1613 } 1614 1615 static void __init free_pci_segments(void) 1616 { 1617 struct amd_iommu_pci_seg *pci_seg, *next; 1618 1619 for_each_pci_segment_safe(pci_seg, next) { 1620 list_del(&pci_seg->list); 1621 free_irq_lookup_table(pci_seg); 1622 free_rlookup_table(pci_seg); 1623 free_alias_table(pci_seg); 1624 free_dev_table(pci_seg); 1625 kfree(pci_seg); 1626 } 1627 } 1628 1629 static void __init free_iommu_one(struct amd_iommu *iommu) 1630 { 1631 free_cwwb_sem(iommu); 1632 free_command_buffer(iommu); 1633 free_event_buffer(iommu); 1634 amd_iommu_free_ppr_log(iommu); 1635 free_ga_log(iommu); 1636 iommu_unmap_mmio_space(iommu); 1637 amd_iommu_iopf_uninit(iommu); 1638 } 1639 1640 static void __init free_iommu_all(void) 1641 { 1642 struct amd_iommu *iommu, *next; 1643 1644 for_each_iommu_safe(iommu, next) { 1645 list_del(&iommu->list); 1646 free_iommu_one(iommu); 1647 kfree(iommu); 1648 } 1649 } 1650 1651 /* 1652 * Family15h Model 10h-1fh erratum 746 (IOMMU Logging May Stall Translations) 1653 * Workaround: 1654 * BIOS should disable L2B micellaneous clock gating by setting 1655 * L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b 1656 */ 1657 static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu) 1658 { 1659 u32 value; 1660 1661 if ((boot_cpu_data.x86 != 0x15) || 1662 (boot_cpu_data.x86_model < 0x10) || 1663 (boot_cpu_data.x86_model > 0x1f)) 1664 return; 1665 1666 pci_write_config_dword(iommu->dev, 0xf0, 0x90); 1667 pci_read_config_dword(iommu->dev, 0xf4, &value); 1668 1669 if (value & BIT(2)) 1670 return; 1671 1672 /* Select NB indirect register 0x90 and enable writing */ 1673 pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8)); 1674 1675 pci_write_config_dword(iommu->dev, 0xf4, value | 0x4); 1676 pci_info(iommu->dev, "Applying erratum 746 workaround\n"); 1677 1678 /* Clear the enable writing bit */ 1679 pci_write_config_dword(iommu->dev, 0xf0, 0x90); 1680 } 1681 1682 /* 1683 * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission) 1684 * Workaround: 1685 * BIOS should enable ATS write permission check by setting 1686 * L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b 1687 */ 1688 static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu) 1689 { 1690 u32 value; 1691 1692 if ((boot_cpu_data.x86 != 0x15) || 1693 (boot_cpu_data.x86_model < 0x30) || 1694 (boot_cpu_data.x86_model > 0x3f)) 1695 return; 1696 1697 /* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */ 1698 value = iommu_read_l2(iommu, 0x47); 1699 1700 if (value & BIT(0)) 1701 return; 1702 1703 /* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */ 1704 iommu_write_l2(iommu, 0x47, value | BIT(0)); 1705 1706 pci_info(iommu->dev, "Applying ATS write check workaround\n"); 1707 } 1708 1709 /* 1710 * This function glues the initialization function for one IOMMU 1711 * together and also allocates the command buffer and programs the 1712 * hardware. It does NOT enable the IOMMU. This is done afterwards. 1713 */ 1714 static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, 1715 struct acpi_table_header *ivrs_base) 1716 { 1717 struct amd_iommu_pci_seg *pci_seg; 1718 1719 pci_seg = get_pci_segment(h->pci_seg, ivrs_base); 1720 if (pci_seg == NULL) 1721 return -ENOMEM; 1722 iommu->pci_seg = pci_seg; 1723 1724 raw_spin_lock_init(&iommu->lock); 1725 atomic64_set(&iommu->cmd_sem_val, 0); 1726 1727 /* Add IOMMU to internal data structures */ 1728 list_add_tail(&iommu->list, &amd_iommu_list); 1729 iommu->index = amd_iommus_present++; 1730 1731 if (unlikely(iommu->index >= MAX_IOMMUS)) { 1732 WARN(1, "System has more IOMMUs than supported by this driver\n"); 1733 return -ENOSYS; 1734 } 1735 1736 /* Index is fine - add IOMMU to the array */ 1737 amd_iommus[iommu->index] = iommu; 1738 1739 /* 1740 * Copy data from ACPI table entry to the iommu struct 1741 */ 1742 iommu->devid = h->devid; 1743 iommu->cap_ptr = h->cap_ptr; 1744 iommu->mmio_phys = h->mmio_phys; 1745 1746 switch (h->type) { 1747 case 0x10: 1748 /* Check if IVHD EFR contains proper max banks/counters */ 1749 if ((h->efr_attr != 0) && 1750 ((h->efr_attr & (0xF << 13)) != 0) && 1751 ((h->efr_attr & (0x3F << 17)) != 0)) 1752 iommu->mmio_phys_end = MMIO_REG_END_OFFSET; 1753 else 1754 iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; 1755 1756 /* 1757 * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports. 1758 * GAM also requires GA mode. Therefore, we need to 1759 * check cmpxchg16b support before enabling it. 1760 */ 1761 if (!boot_cpu_has(X86_FEATURE_CX16) || 1762 ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0)) 1763 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; 1764 break; 1765 case 0x11: 1766 case 0x40: 1767 if (h->efr_reg & (1 << 9)) 1768 iommu->mmio_phys_end = MMIO_REG_END_OFFSET; 1769 else 1770 iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; 1771 1772 /* 1773 * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports. 1774 * XT, GAM also requires GA mode. Therefore, we need to 1775 * check cmpxchg16b support before enabling them. 1776 */ 1777 if (!boot_cpu_has(X86_FEATURE_CX16) || 1778 ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0)) { 1779 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; 1780 break; 1781 } 1782 1783 if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT)) 1784 amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE; 1785 1786 early_iommu_features_init(iommu, h); 1787 1788 break; 1789 default: 1790 return -EINVAL; 1791 } 1792 1793 iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys, 1794 iommu->mmio_phys_end); 1795 if (!iommu->mmio_base) 1796 return -ENOMEM; 1797 1798 return init_iommu_from_acpi(iommu, h); 1799 } 1800 1801 static int __init init_iommu_one_late(struct amd_iommu *iommu) 1802 { 1803 int ret; 1804 1805 if (alloc_cwwb_sem(iommu)) 1806 return -ENOMEM; 1807 1808 if (alloc_command_buffer(iommu)) 1809 return -ENOMEM; 1810 1811 if (alloc_event_buffer(iommu)) 1812 return -ENOMEM; 1813 1814 iommu->int_enabled = false; 1815 1816 init_translation_status(iommu); 1817 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) { 1818 iommu_disable(iommu); 1819 clear_translation_pre_enabled(iommu); 1820 pr_warn("Translation was enabled for IOMMU:%d but we are not in kdump mode\n", 1821 iommu->index); 1822 } 1823 if (amd_iommu_pre_enabled) 1824 amd_iommu_pre_enabled = translation_pre_enabled(iommu); 1825 1826 if (amd_iommu_irq_remap) { 1827 ret = amd_iommu_create_irq_domain(iommu); 1828 if (ret) 1829 return ret; 1830 } 1831 1832 /* 1833 * Make sure IOMMU is not considered to translate itself. The IVRS 1834 * table tells us so, but this is a lie! 1835 */ 1836 iommu->pci_seg->rlookup_table[iommu->devid] = NULL; 1837 1838 return 0; 1839 } 1840 1841 /** 1842 * get_highest_supported_ivhd_type - Look up the appropriate IVHD type 1843 * @ivrs: Pointer to the IVRS header 1844 * 1845 * This function search through all IVDB of the maximum supported IVHD 1846 */ 1847 static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs) 1848 { 1849 u8 *base = (u8 *)ivrs; 1850 struct ivhd_header *ivhd = (struct ivhd_header *) 1851 (base + IVRS_HEADER_LENGTH); 1852 u8 last_type = ivhd->type; 1853 u16 devid = ivhd->devid; 1854 1855 while (((u8 *)ivhd - base < ivrs->length) && 1856 (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) { 1857 u8 *p = (u8 *) ivhd; 1858 1859 if (ivhd->devid == devid) 1860 last_type = ivhd->type; 1861 ivhd = (struct ivhd_header *)(p + ivhd->length); 1862 } 1863 1864 return last_type; 1865 } 1866 1867 /* 1868 * Iterates over all IOMMU entries in the ACPI table, allocates the 1869 * IOMMU structure and initializes it with init_iommu_one() 1870 */ 1871 static int __init init_iommu_all(struct acpi_table_header *table) 1872 { 1873 u8 *p = (u8 *)table, *end = (u8 *)table; 1874 struct ivhd_header *h; 1875 struct amd_iommu *iommu; 1876 int ret; 1877 1878 end += table->length; 1879 p += IVRS_HEADER_LENGTH; 1880 1881 /* Phase 1: Process all IVHD blocks */ 1882 while (p < end) { 1883 h = (struct ivhd_header *)p; 1884 if (*p == amd_iommu_target_ivhd_type) { 1885 1886 DUMP_printk("device: %04x:%02x:%02x.%01x cap: %04x " 1887 "flags: %01x info %04x\n", 1888 h->pci_seg, PCI_BUS_NUM(h->devid), 1889 PCI_SLOT(h->devid), PCI_FUNC(h->devid), 1890 h->cap_ptr, h->flags, h->info); 1891 DUMP_printk(" mmio-addr: %016llx\n", 1892 h->mmio_phys); 1893 1894 iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL); 1895 if (iommu == NULL) 1896 return -ENOMEM; 1897 1898 ret = init_iommu_one(iommu, h, table); 1899 if (ret) 1900 return ret; 1901 } 1902 p += h->length; 1903 1904 } 1905 WARN_ON(p != end); 1906 1907 /* Phase 2 : Early feature support check */ 1908 get_global_efr(); 1909 1910 /* Phase 3 : Enabling IOMMU features */ 1911 for_each_iommu(iommu) { 1912 ret = init_iommu_one_late(iommu); 1913 if (ret) 1914 return ret; 1915 } 1916 1917 return 0; 1918 } 1919 1920 static void init_iommu_perf_ctr(struct amd_iommu *iommu) 1921 { 1922 u64 val; 1923 struct pci_dev *pdev = iommu->dev; 1924 1925 if (!check_feature(FEATURE_PC)) 1926 return; 1927 1928 amd_iommu_pc_present = true; 1929 1930 pci_info(pdev, "IOMMU performance counters supported\n"); 1931 1932 val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET); 1933 iommu->max_banks = (u8) ((val >> 12) & 0x3f); 1934 iommu->max_counters = (u8) ((val >> 7) & 0xf); 1935 1936 return; 1937 } 1938 1939 static ssize_t amd_iommu_show_cap(struct device *dev, 1940 struct device_attribute *attr, 1941 char *buf) 1942 { 1943 struct amd_iommu *iommu = dev_to_amd_iommu(dev); 1944 return sysfs_emit(buf, "%x\n", iommu->cap); 1945 } 1946 static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL); 1947 1948 static ssize_t amd_iommu_show_features(struct device *dev, 1949 struct device_attribute *attr, 1950 char *buf) 1951 { 1952 return sysfs_emit(buf, "%llx:%llx\n", amd_iommu_efr, amd_iommu_efr2); 1953 } 1954 static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL); 1955 1956 static struct attribute *amd_iommu_attrs[] = { 1957 &dev_attr_cap.attr, 1958 &dev_attr_features.attr, 1959 NULL, 1960 }; 1961 1962 static struct attribute_group amd_iommu_group = { 1963 .name = "amd-iommu", 1964 .attrs = amd_iommu_attrs, 1965 }; 1966 1967 static const struct attribute_group *amd_iommu_groups[] = { 1968 &amd_iommu_group, 1969 NULL, 1970 }; 1971 1972 /* 1973 * Note: IVHD 0x11 and 0x40 also contains exact copy 1974 * of the IOMMU Extended Feature Register [MMIO Offset 0030h]. 1975 * Default to EFR in IVHD since it is available sooner (i.e. before PCI init). 1976 */ 1977 static void __init late_iommu_features_init(struct amd_iommu *iommu) 1978 { 1979 u64 features, features2; 1980 1981 if (!(iommu->cap & (1 << IOMMU_CAP_EFR))) 1982 return; 1983 1984 /* read extended feature bits */ 1985 features = readq(iommu->mmio_base + MMIO_EXT_FEATURES); 1986 features2 = readq(iommu->mmio_base + MMIO_EXT_FEATURES2); 1987 1988 if (!amd_iommu_efr) { 1989 amd_iommu_efr = features; 1990 amd_iommu_efr2 = features2; 1991 return; 1992 } 1993 1994 /* 1995 * Sanity check and warn if EFR values from 1996 * IVHD and MMIO conflict. 1997 */ 1998 if (features != amd_iommu_efr || 1999 features2 != amd_iommu_efr2) { 2000 pr_warn(FW_WARN 2001 "EFR mismatch. Use IVHD EFR (%#llx : %#llx), EFR2 (%#llx : %#llx).\n", 2002 features, amd_iommu_efr, 2003 features2, amd_iommu_efr2); 2004 } 2005 } 2006 2007 static int __init iommu_init_pci(struct amd_iommu *iommu) 2008 { 2009 int cap_ptr = iommu->cap_ptr; 2010 int ret; 2011 2012 iommu->dev = pci_get_domain_bus_and_slot(iommu->pci_seg->id, 2013 PCI_BUS_NUM(iommu->devid), 2014 iommu->devid & 0xff); 2015 if (!iommu->dev) 2016 return -ENODEV; 2017 2018 /* Prevent binding other PCI device drivers to IOMMU devices */ 2019 iommu->dev->match_driver = false; 2020 2021 /* ACPI _PRT won't have an IRQ for IOMMU */ 2022 iommu->dev->irq_managed = 1; 2023 2024 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, 2025 &iommu->cap); 2026 2027 if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB))) 2028 amd_iommu_iotlb_sup = false; 2029 2030 late_iommu_features_init(iommu); 2031 2032 if (check_feature(FEATURE_GT)) { 2033 int glxval; 2034 u64 pasmax; 2035 2036 pasmax = amd_iommu_efr & FEATURE_PASID_MASK; 2037 pasmax >>= FEATURE_PASID_SHIFT; 2038 iommu->iommu.max_pasids = (1 << (pasmax + 1)) - 1; 2039 2040 BUG_ON(iommu->iommu.max_pasids & ~PASID_MASK); 2041 2042 glxval = amd_iommu_efr & FEATURE_GLXVAL_MASK; 2043 glxval >>= FEATURE_GLXVAL_SHIFT; 2044 2045 if (amd_iommu_max_glx_val == -1) 2046 amd_iommu_max_glx_val = glxval; 2047 else 2048 amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval); 2049 2050 iommu_enable_gt(iommu); 2051 } 2052 2053 if (check_feature(FEATURE_PPR) && amd_iommu_alloc_ppr_log(iommu)) 2054 return -ENOMEM; 2055 2056 if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) { 2057 pr_info("Using strict mode due to virtualization\n"); 2058 iommu_set_dma_strict(); 2059 amd_iommu_np_cache = true; 2060 } 2061 2062 init_iommu_perf_ctr(iommu); 2063 2064 if (amd_iommu_pgtable == AMD_IOMMU_V2) { 2065 if (!check_feature(FEATURE_GIOSUP) || 2066 !check_feature(FEATURE_GT)) { 2067 pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n"); 2068 amd_iommu_pgtable = AMD_IOMMU_V1; 2069 } 2070 } 2071 2072 if (is_rd890_iommu(iommu->dev)) { 2073 int i, j; 2074 2075 iommu->root_pdev = 2076 pci_get_domain_bus_and_slot(iommu->pci_seg->id, 2077 iommu->dev->bus->number, 2078 PCI_DEVFN(0, 0)); 2079 2080 /* 2081 * Some rd890 systems may not be fully reconfigured by the 2082 * BIOS, so it's necessary for us to store this information so 2083 * it can be reprogrammed on resume 2084 */ 2085 pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4, 2086 &iommu->stored_addr_lo); 2087 pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8, 2088 &iommu->stored_addr_hi); 2089 2090 /* Low bit locks writes to configuration space */ 2091 iommu->stored_addr_lo &= ~1; 2092 2093 for (i = 0; i < 6; i++) 2094 for (j = 0; j < 0x12; j++) 2095 iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j); 2096 2097 for (i = 0; i < 0x83; i++) 2098 iommu->stored_l2[i] = iommu_read_l2(iommu, i); 2099 } 2100 2101 amd_iommu_erratum_746_workaround(iommu); 2102 amd_iommu_ats_write_check_workaround(iommu); 2103 2104 ret = iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev, 2105 amd_iommu_groups, "ivhd%d", iommu->index); 2106 if (ret) 2107 return ret; 2108 2109 /* 2110 * Allocate per IOMMU IOPF queue here so that in attach device path, 2111 * PRI capable device can be added to IOPF queue 2112 */ 2113 if (amd_iommu_gt_ppr_supported()) { 2114 ret = amd_iommu_iopf_init(iommu); 2115 if (ret) 2116 return ret; 2117 } 2118 2119 iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL); 2120 2121 return pci_enable_device(iommu->dev); 2122 } 2123 2124 static void print_iommu_info(void) 2125 { 2126 int i; 2127 static const char * const feat_str[] = { 2128 "PreF", "PPR", "X2APIC", "NX", "GT", "[5]", 2129 "IA", "GA", "HE", "PC" 2130 }; 2131 2132 if (amd_iommu_efr) { 2133 pr_info("Extended features (%#llx, %#llx):", amd_iommu_efr, amd_iommu_efr2); 2134 2135 for (i = 0; i < ARRAY_SIZE(feat_str); ++i) { 2136 if (check_feature(1ULL << i)) 2137 pr_cont(" %s", feat_str[i]); 2138 } 2139 2140 if (check_feature(FEATURE_GAM_VAPIC)) 2141 pr_cont(" GA_vAPIC"); 2142 2143 if (check_feature(FEATURE_SNP)) 2144 pr_cont(" SNP"); 2145 2146 pr_cont("\n"); 2147 } 2148 2149 if (irq_remapping_enabled) { 2150 pr_info("Interrupt remapping enabled\n"); 2151 if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 2152 pr_info("X2APIC enabled\n"); 2153 } 2154 if (amd_iommu_pgtable == AMD_IOMMU_V2) { 2155 pr_info("V2 page table enabled (Paging mode : %d level)\n", 2156 amd_iommu_gpt_level); 2157 } 2158 } 2159 2160 static int __init amd_iommu_init_pci(void) 2161 { 2162 struct amd_iommu *iommu; 2163 struct amd_iommu_pci_seg *pci_seg; 2164 int ret; 2165 2166 for_each_iommu(iommu) { 2167 ret = iommu_init_pci(iommu); 2168 if (ret) { 2169 pr_err("IOMMU%d: Failed to initialize IOMMU Hardware (error=%d)!\n", 2170 iommu->index, ret); 2171 goto out; 2172 } 2173 /* Need to setup range after PCI init */ 2174 iommu_set_cwwb_range(iommu); 2175 } 2176 2177 /* 2178 * Order is important here to make sure any unity map requirements are 2179 * fulfilled. The unity mappings are created and written to the device 2180 * table during the iommu_init_pci() call. 2181 * 2182 * After that we call init_device_table_dma() to make sure any 2183 * uninitialized DTE will block DMA, and in the end we flush the caches 2184 * of all IOMMUs to make sure the changes to the device table are 2185 * active. 2186 */ 2187 for_each_pci_segment(pci_seg) 2188 init_device_table_dma(pci_seg); 2189 2190 for_each_iommu(iommu) 2191 amd_iommu_flush_all_caches(iommu); 2192 2193 print_iommu_info(); 2194 2195 out: 2196 return ret; 2197 } 2198 2199 /**************************************************************************** 2200 * 2201 * The following functions initialize the MSI interrupts for all IOMMUs 2202 * in the system. It's a bit challenging because there could be multiple 2203 * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per 2204 * pci_dev. 2205 * 2206 ****************************************************************************/ 2207 2208 static int iommu_setup_msi(struct amd_iommu *iommu) 2209 { 2210 int r; 2211 2212 r = pci_enable_msi(iommu->dev); 2213 if (r) 2214 return r; 2215 2216 r = request_threaded_irq(iommu->dev->irq, 2217 amd_iommu_int_handler, 2218 amd_iommu_int_thread, 2219 0, "AMD-Vi", 2220 iommu); 2221 2222 if (r) { 2223 pci_disable_msi(iommu->dev); 2224 return r; 2225 } 2226 2227 return 0; 2228 } 2229 2230 union intcapxt { 2231 u64 capxt; 2232 struct { 2233 u64 reserved_0 : 2, 2234 dest_mode_logical : 1, 2235 reserved_1 : 5, 2236 destid_0_23 : 24, 2237 vector : 8, 2238 reserved_2 : 16, 2239 destid_24_31 : 8; 2240 }; 2241 } __attribute__ ((packed)); 2242 2243 2244 static struct irq_chip intcapxt_controller; 2245 2246 static int intcapxt_irqdomain_activate(struct irq_domain *domain, 2247 struct irq_data *irqd, bool reserve) 2248 { 2249 return 0; 2250 } 2251 2252 static void intcapxt_irqdomain_deactivate(struct irq_domain *domain, 2253 struct irq_data *irqd) 2254 { 2255 } 2256 2257 2258 static int intcapxt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, 2259 unsigned int nr_irqs, void *arg) 2260 { 2261 struct irq_alloc_info *info = arg; 2262 int i, ret; 2263 2264 if (!info || info->type != X86_IRQ_ALLOC_TYPE_AMDVI) 2265 return -EINVAL; 2266 2267 ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); 2268 if (ret < 0) 2269 return ret; 2270 2271 for (i = virq; i < virq + nr_irqs; i++) { 2272 struct irq_data *irqd = irq_domain_get_irq_data(domain, i); 2273 2274 irqd->chip = &intcapxt_controller; 2275 irqd->hwirq = info->hwirq; 2276 irqd->chip_data = info->data; 2277 __irq_set_handler(i, handle_edge_irq, 0, "edge"); 2278 } 2279 2280 return ret; 2281 } 2282 2283 static void intcapxt_irqdomain_free(struct irq_domain *domain, unsigned int virq, 2284 unsigned int nr_irqs) 2285 { 2286 irq_domain_free_irqs_top(domain, virq, nr_irqs); 2287 } 2288 2289 2290 static void intcapxt_unmask_irq(struct irq_data *irqd) 2291 { 2292 struct amd_iommu *iommu = irqd->chip_data; 2293 struct irq_cfg *cfg = irqd_cfg(irqd); 2294 union intcapxt xt; 2295 2296 xt.capxt = 0ULL; 2297 xt.dest_mode_logical = apic->dest_mode_logical; 2298 xt.vector = cfg->vector; 2299 xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0); 2300 xt.destid_24_31 = cfg->dest_apicid >> 24; 2301 2302 writeq(xt.capxt, iommu->mmio_base + irqd->hwirq); 2303 } 2304 2305 static void intcapxt_mask_irq(struct irq_data *irqd) 2306 { 2307 struct amd_iommu *iommu = irqd->chip_data; 2308 2309 writeq(0, iommu->mmio_base + irqd->hwirq); 2310 } 2311 2312 2313 static int intcapxt_set_affinity(struct irq_data *irqd, 2314 const struct cpumask *mask, bool force) 2315 { 2316 struct irq_data *parent = irqd->parent_data; 2317 int ret; 2318 2319 ret = parent->chip->irq_set_affinity(parent, mask, force); 2320 if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) 2321 return ret; 2322 return 0; 2323 } 2324 2325 static int intcapxt_set_wake(struct irq_data *irqd, unsigned int on) 2326 { 2327 return on ? -EOPNOTSUPP : 0; 2328 } 2329 2330 static struct irq_chip intcapxt_controller = { 2331 .name = "IOMMU-MSI", 2332 .irq_unmask = intcapxt_unmask_irq, 2333 .irq_mask = intcapxt_mask_irq, 2334 .irq_ack = irq_chip_ack_parent, 2335 .irq_retrigger = irq_chip_retrigger_hierarchy, 2336 .irq_set_affinity = intcapxt_set_affinity, 2337 .irq_set_wake = intcapxt_set_wake, 2338 .flags = IRQCHIP_MASK_ON_SUSPEND, 2339 }; 2340 2341 static const struct irq_domain_ops intcapxt_domain_ops = { 2342 .alloc = intcapxt_irqdomain_alloc, 2343 .free = intcapxt_irqdomain_free, 2344 .activate = intcapxt_irqdomain_activate, 2345 .deactivate = intcapxt_irqdomain_deactivate, 2346 }; 2347 2348 2349 static struct irq_domain *iommu_irqdomain; 2350 2351 static struct irq_domain *iommu_get_irqdomain(void) 2352 { 2353 struct fwnode_handle *fn; 2354 2355 /* No need for locking here (yet) as the init is single-threaded */ 2356 if (iommu_irqdomain) 2357 return iommu_irqdomain; 2358 2359 fn = irq_domain_alloc_named_fwnode("AMD-Vi-MSI"); 2360 if (!fn) 2361 return NULL; 2362 2363 iommu_irqdomain = irq_domain_create_hierarchy(x86_vector_domain, 0, 0, 2364 fn, &intcapxt_domain_ops, 2365 NULL); 2366 if (!iommu_irqdomain) 2367 irq_domain_free_fwnode(fn); 2368 2369 return iommu_irqdomain; 2370 } 2371 2372 static int __iommu_setup_intcapxt(struct amd_iommu *iommu, const char *devname, 2373 int hwirq, irq_handler_t thread_fn) 2374 { 2375 struct irq_domain *domain; 2376 struct irq_alloc_info info; 2377 int irq, ret; 2378 int node = dev_to_node(&iommu->dev->dev); 2379 2380 domain = iommu_get_irqdomain(); 2381 if (!domain) 2382 return -ENXIO; 2383 2384 init_irq_alloc_info(&info, NULL); 2385 info.type = X86_IRQ_ALLOC_TYPE_AMDVI; 2386 info.data = iommu; 2387 info.hwirq = hwirq; 2388 2389 irq = irq_domain_alloc_irqs(domain, 1, node, &info); 2390 if (irq < 0) { 2391 irq_domain_remove(domain); 2392 return irq; 2393 } 2394 2395 ret = request_threaded_irq(irq, amd_iommu_int_handler, 2396 thread_fn, 0, devname, iommu); 2397 if (ret) { 2398 irq_domain_free_irqs(irq, 1); 2399 irq_domain_remove(domain); 2400 return ret; 2401 } 2402 2403 return 0; 2404 } 2405 2406 static int iommu_setup_intcapxt(struct amd_iommu *iommu) 2407 { 2408 int ret; 2409 2410 snprintf(iommu->evt_irq_name, sizeof(iommu->evt_irq_name), 2411 "AMD-Vi%d-Evt", iommu->index); 2412 ret = __iommu_setup_intcapxt(iommu, iommu->evt_irq_name, 2413 MMIO_INTCAPXT_EVT_OFFSET, 2414 amd_iommu_int_thread_evtlog); 2415 if (ret) 2416 return ret; 2417 2418 snprintf(iommu->ppr_irq_name, sizeof(iommu->ppr_irq_name), 2419 "AMD-Vi%d-PPR", iommu->index); 2420 ret = __iommu_setup_intcapxt(iommu, iommu->ppr_irq_name, 2421 MMIO_INTCAPXT_PPR_OFFSET, 2422 amd_iommu_int_thread_pprlog); 2423 if (ret) 2424 return ret; 2425 2426 #ifdef CONFIG_IRQ_REMAP 2427 snprintf(iommu->ga_irq_name, sizeof(iommu->ga_irq_name), 2428 "AMD-Vi%d-GA", iommu->index); 2429 ret = __iommu_setup_intcapxt(iommu, iommu->ga_irq_name, 2430 MMIO_INTCAPXT_GALOG_OFFSET, 2431 amd_iommu_int_thread_galog); 2432 #endif 2433 2434 return ret; 2435 } 2436 2437 static int iommu_init_irq(struct amd_iommu *iommu) 2438 { 2439 int ret; 2440 2441 if (iommu->int_enabled) 2442 goto enable_faults; 2443 2444 if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 2445 ret = iommu_setup_intcapxt(iommu); 2446 else if (iommu->dev->msi_cap) 2447 ret = iommu_setup_msi(iommu); 2448 else 2449 ret = -ENODEV; 2450 2451 if (ret) 2452 return ret; 2453 2454 iommu->int_enabled = true; 2455 enable_faults: 2456 2457 if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 2458 iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN); 2459 2460 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); 2461 2462 return 0; 2463 } 2464 2465 /**************************************************************************** 2466 * 2467 * The next functions belong to the third pass of parsing the ACPI 2468 * table. In this last pass the memory mapping requirements are 2469 * gathered (like exclusion and unity mapping ranges). 2470 * 2471 ****************************************************************************/ 2472 2473 static void __init free_unity_maps(void) 2474 { 2475 struct unity_map_entry *entry, *next; 2476 struct amd_iommu_pci_seg *p, *pci_seg; 2477 2478 for_each_pci_segment_safe(pci_seg, p) { 2479 list_for_each_entry_safe(entry, next, &pci_seg->unity_map, list) { 2480 list_del(&entry->list); 2481 kfree(entry); 2482 } 2483 } 2484 } 2485 2486 /* called for unity map ACPI definition */ 2487 static int __init init_unity_map_range(struct ivmd_header *m, 2488 struct acpi_table_header *ivrs_base) 2489 { 2490 struct unity_map_entry *e = NULL; 2491 struct amd_iommu_pci_seg *pci_seg; 2492 char *s; 2493 2494 pci_seg = get_pci_segment(m->pci_seg, ivrs_base); 2495 if (pci_seg == NULL) 2496 return -ENOMEM; 2497 2498 e = kzalloc(sizeof(*e), GFP_KERNEL); 2499 if (e == NULL) 2500 return -ENOMEM; 2501 2502 switch (m->type) { 2503 default: 2504 kfree(e); 2505 return 0; 2506 case ACPI_IVMD_TYPE: 2507 s = "IVMD_TYPEi\t\t\t"; 2508 e->devid_start = e->devid_end = m->devid; 2509 break; 2510 case ACPI_IVMD_TYPE_ALL: 2511 s = "IVMD_TYPE_ALL\t\t"; 2512 e->devid_start = 0; 2513 e->devid_end = pci_seg->last_bdf; 2514 break; 2515 case ACPI_IVMD_TYPE_RANGE: 2516 s = "IVMD_TYPE_RANGE\t\t"; 2517 e->devid_start = m->devid; 2518 e->devid_end = m->aux; 2519 break; 2520 } 2521 e->address_start = PAGE_ALIGN(m->range_start); 2522 e->address_end = e->address_start + PAGE_ALIGN(m->range_length); 2523 e->prot = m->flags >> 1; 2524 2525 /* 2526 * Treat per-device exclusion ranges as r/w unity-mapped regions 2527 * since some buggy BIOSes might lead to the overwritten exclusion 2528 * range (exclusion_start and exclusion_length members). This 2529 * happens when there are multiple exclusion ranges (IVMD entries) 2530 * defined in ACPI table. 2531 */ 2532 if (m->flags & IVMD_FLAG_EXCL_RANGE) 2533 e->prot = (IVMD_FLAG_IW | IVMD_FLAG_IR) >> 1; 2534 2535 DUMP_printk("%s devid_start: %04x:%02x:%02x.%x devid_end: " 2536 "%04x:%02x:%02x.%x range_start: %016llx range_end: %016llx" 2537 " flags: %x\n", s, m->pci_seg, 2538 PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start), 2539 PCI_FUNC(e->devid_start), m->pci_seg, 2540 PCI_BUS_NUM(e->devid_end), 2541 PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end), 2542 e->address_start, e->address_end, m->flags); 2543 2544 list_add_tail(&e->list, &pci_seg->unity_map); 2545 2546 return 0; 2547 } 2548 2549 /* iterates over all memory definitions we find in the ACPI table */ 2550 static int __init init_memory_definitions(struct acpi_table_header *table) 2551 { 2552 u8 *p = (u8 *)table, *end = (u8 *)table; 2553 struct ivmd_header *m; 2554 2555 end += table->length; 2556 p += IVRS_HEADER_LENGTH; 2557 2558 while (p < end) { 2559 m = (struct ivmd_header *)p; 2560 if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE)) 2561 init_unity_map_range(m, table); 2562 2563 p += m->length; 2564 } 2565 2566 return 0; 2567 } 2568 2569 /* 2570 * Init the device table to not allow DMA access for devices 2571 */ 2572 static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg) 2573 { 2574 u32 devid; 2575 struct dev_table_entry *dev_table = pci_seg->dev_table; 2576 2577 if (dev_table == NULL) 2578 return; 2579 2580 for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { 2581 __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_VALID); 2582 if (!amd_iommu_snp_en) 2583 __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_TRANSLATION); 2584 } 2585 } 2586 2587 static void __init uninit_device_table_dma(struct amd_iommu_pci_seg *pci_seg) 2588 { 2589 u32 devid; 2590 struct dev_table_entry *dev_table = pci_seg->dev_table; 2591 2592 if (dev_table == NULL) 2593 return; 2594 2595 for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { 2596 dev_table[devid].data[0] = 0ULL; 2597 dev_table[devid].data[1] = 0ULL; 2598 } 2599 } 2600 2601 static void init_device_table(void) 2602 { 2603 struct amd_iommu_pci_seg *pci_seg; 2604 u32 devid; 2605 2606 if (!amd_iommu_irq_remap) 2607 return; 2608 2609 for_each_pci_segment(pci_seg) { 2610 for (devid = 0; devid <= pci_seg->last_bdf; ++devid) 2611 __set_dev_entry_bit(pci_seg->dev_table, 2612 devid, DEV_ENTRY_IRQ_TBL_EN); 2613 } 2614 } 2615 2616 static void iommu_init_flags(struct amd_iommu *iommu) 2617 { 2618 iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ? 2619 iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) : 2620 iommu_feature_disable(iommu, CONTROL_HT_TUN_EN); 2621 2622 iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ? 2623 iommu_feature_enable(iommu, CONTROL_PASSPW_EN) : 2624 iommu_feature_disable(iommu, CONTROL_PASSPW_EN); 2625 2626 iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ? 2627 iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) : 2628 iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN); 2629 2630 iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ? 2631 iommu_feature_enable(iommu, CONTROL_ISOC_EN) : 2632 iommu_feature_disable(iommu, CONTROL_ISOC_EN); 2633 2634 /* 2635 * make IOMMU memory accesses cache coherent 2636 */ 2637 iommu_feature_enable(iommu, CONTROL_COHERENT_EN); 2638 2639 /* Set IOTLB invalidation timeout to 1s */ 2640 iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S); 2641 } 2642 2643 static void iommu_apply_resume_quirks(struct amd_iommu *iommu) 2644 { 2645 int i, j; 2646 u32 ioc_feature_control; 2647 struct pci_dev *pdev = iommu->root_pdev; 2648 2649 /* RD890 BIOSes may not have completely reconfigured the iommu */ 2650 if (!is_rd890_iommu(iommu->dev) || !pdev) 2651 return; 2652 2653 /* 2654 * First, we need to ensure that the iommu is enabled. This is 2655 * controlled by a register in the northbridge 2656 */ 2657 2658 /* Select Northbridge indirect register 0x75 and enable writing */ 2659 pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7)); 2660 pci_read_config_dword(pdev, 0x64, &ioc_feature_control); 2661 2662 /* Enable the iommu */ 2663 if (!(ioc_feature_control & 0x1)) 2664 pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1); 2665 2666 /* Restore the iommu BAR */ 2667 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, 2668 iommu->stored_addr_lo); 2669 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8, 2670 iommu->stored_addr_hi); 2671 2672 /* Restore the l1 indirect regs for each of the 6 l1s */ 2673 for (i = 0; i < 6; i++) 2674 for (j = 0; j < 0x12; j++) 2675 iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]); 2676 2677 /* Restore the l2 indirect regs */ 2678 for (i = 0; i < 0x83; i++) 2679 iommu_write_l2(iommu, i, iommu->stored_l2[i]); 2680 2681 /* Lock PCI setup registers */ 2682 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, 2683 iommu->stored_addr_lo | 1); 2684 } 2685 2686 static void iommu_enable_ga(struct amd_iommu *iommu) 2687 { 2688 #ifdef CONFIG_IRQ_REMAP 2689 switch (amd_iommu_guest_ir) { 2690 case AMD_IOMMU_GUEST_IR_VAPIC: 2691 case AMD_IOMMU_GUEST_IR_LEGACY_GA: 2692 iommu_feature_enable(iommu, CONTROL_GA_EN); 2693 iommu->irte_ops = &irte_128_ops; 2694 break; 2695 default: 2696 iommu->irte_ops = &irte_32_ops; 2697 break; 2698 } 2699 #endif 2700 } 2701 2702 static void iommu_disable_irtcachedis(struct amd_iommu *iommu) 2703 { 2704 iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS); 2705 } 2706 2707 static void iommu_enable_irtcachedis(struct amd_iommu *iommu) 2708 { 2709 u64 ctrl; 2710 2711 if (!amd_iommu_irtcachedis) 2712 return; 2713 2714 /* 2715 * Note: 2716 * The support for IRTCacheDis feature is dertermined by 2717 * checking if the bit is writable. 2718 */ 2719 iommu_feature_enable(iommu, CONTROL_IRTCACHEDIS); 2720 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 2721 ctrl &= (1ULL << CONTROL_IRTCACHEDIS); 2722 if (ctrl) 2723 iommu->irtcachedis_enabled = true; 2724 pr_info("iommu%d (%#06x) : IRT cache is %s\n", 2725 iommu->index, iommu->devid, 2726 iommu->irtcachedis_enabled ? "disabled" : "enabled"); 2727 } 2728 2729 static void early_enable_iommu(struct amd_iommu *iommu) 2730 { 2731 iommu_disable(iommu); 2732 iommu_init_flags(iommu); 2733 iommu_set_device_table(iommu); 2734 iommu_enable_command_buffer(iommu); 2735 iommu_enable_event_buffer(iommu); 2736 iommu_set_exclusion_range(iommu); 2737 iommu_enable_ga(iommu); 2738 iommu_enable_xt(iommu); 2739 iommu_enable_irtcachedis(iommu); 2740 iommu_enable(iommu); 2741 amd_iommu_flush_all_caches(iommu); 2742 } 2743 2744 /* 2745 * This function finally enables all IOMMUs found in the system after 2746 * they have been initialized. 2747 * 2748 * Or if in kdump kernel and IOMMUs are all pre-enabled, try to copy 2749 * the old content of device table entries. Not this case or copy failed, 2750 * just continue as normal kernel does. 2751 */ 2752 static void early_enable_iommus(void) 2753 { 2754 struct amd_iommu *iommu; 2755 struct amd_iommu_pci_seg *pci_seg; 2756 2757 if (!copy_device_table()) { 2758 /* 2759 * If come here because of failure in copying device table from old 2760 * kernel with all IOMMUs enabled, print error message and try to 2761 * free allocated old_dev_tbl_cpy. 2762 */ 2763 if (amd_iommu_pre_enabled) 2764 pr_err("Failed to copy DEV table from previous kernel.\n"); 2765 2766 for_each_pci_segment(pci_seg) { 2767 if (pci_seg->old_dev_tbl_cpy != NULL) { 2768 iommu_free_pages(pci_seg->old_dev_tbl_cpy, 2769 get_order(pci_seg->dev_table_size)); 2770 pci_seg->old_dev_tbl_cpy = NULL; 2771 } 2772 } 2773 2774 for_each_iommu(iommu) { 2775 clear_translation_pre_enabled(iommu); 2776 early_enable_iommu(iommu); 2777 } 2778 } else { 2779 pr_info("Copied DEV table from previous kernel.\n"); 2780 2781 for_each_pci_segment(pci_seg) { 2782 iommu_free_pages(pci_seg->dev_table, 2783 get_order(pci_seg->dev_table_size)); 2784 pci_seg->dev_table = pci_seg->old_dev_tbl_cpy; 2785 } 2786 2787 for_each_iommu(iommu) { 2788 iommu_disable_command_buffer(iommu); 2789 iommu_disable_event_buffer(iommu); 2790 iommu_disable_irtcachedis(iommu); 2791 iommu_enable_command_buffer(iommu); 2792 iommu_enable_event_buffer(iommu); 2793 iommu_enable_ga(iommu); 2794 iommu_enable_xt(iommu); 2795 iommu_enable_irtcachedis(iommu); 2796 iommu_set_device_table(iommu); 2797 amd_iommu_flush_all_caches(iommu); 2798 } 2799 } 2800 } 2801 2802 static void enable_iommus_ppr(void) 2803 { 2804 struct amd_iommu *iommu; 2805 2806 if (!amd_iommu_gt_ppr_supported()) 2807 return; 2808 2809 for_each_iommu(iommu) 2810 amd_iommu_enable_ppr_log(iommu); 2811 } 2812 2813 static void enable_iommus_vapic(void) 2814 { 2815 #ifdef CONFIG_IRQ_REMAP 2816 u32 status, i; 2817 struct amd_iommu *iommu; 2818 2819 for_each_iommu(iommu) { 2820 /* 2821 * Disable GALog if already running. It could have been enabled 2822 * in the previous boot before kdump. 2823 */ 2824 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 2825 if (!(status & MMIO_STATUS_GALOG_RUN_MASK)) 2826 continue; 2827 2828 iommu_feature_disable(iommu, CONTROL_GALOG_EN); 2829 iommu_feature_disable(iommu, CONTROL_GAINT_EN); 2830 2831 /* 2832 * Need to set and poll check the GALOGRun bit to zero before 2833 * we can set/ modify GA Log registers safely. 2834 */ 2835 for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) { 2836 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 2837 if (!(status & MMIO_STATUS_GALOG_RUN_MASK)) 2838 break; 2839 udelay(10); 2840 } 2841 2842 if (WARN_ON(i >= MMIO_STATUS_TIMEOUT)) 2843 return; 2844 } 2845 2846 if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) && 2847 !check_feature(FEATURE_GAM_VAPIC)) { 2848 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; 2849 return; 2850 } 2851 2852 if (amd_iommu_snp_en && 2853 !FEATURE_SNPAVICSUP_GAM(amd_iommu_efr2)) { 2854 pr_warn("Force to disable Virtual APIC due to SNP\n"); 2855 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; 2856 return; 2857 } 2858 2859 /* Enabling GAM and SNPAVIC support */ 2860 for_each_iommu(iommu) { 2861 if (iommu_init_ga_log(iommu) || 2862 iommu_ga_log_enable(iommu)) 2863 return; 2864 2865 iommu_feature_enable(iommu, CONTROL_GAM_EN); 2866 if (amd_iommu_snp_en) 2867 iommu_feature_enable(iommu, CONTROL_SNPAVIC_EN); 2868 } 2869 2870 amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP); 2871 pr_info("Virtual APIC enabled\n"); 2872 #endif 2873 } 2874 2875 static void enable_iommus(void) 2876 { 2877 early_enable_iommus(); 2878 } 2879 2880 static void disable_iommus(void) 2881 { 2882 struct amd_iommu *iommu; 2883 2884 for_each_iommu(iommu) 2885 iommu_disable(iommu); 2886 2887 #ifdef CONFIG_IRQ_REMAP 2888 if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) 2889 amd_iommu_irq_ops.capability &= ~(1 << IRQ_POSTING_CAP); 2890 #endif 2891 } 2892 2893 /* 2894 * Suspend/Resume support 2895 * disable suspend until real resume implemented 2896 */ 2897 2898 static void amd_iommu_resume(void) 2899 { 2900 struct amd_iommu *iommu; 2901 2902 for_each_iommu(iommu) 2903 iommu_apply_resume_quirks(iommu); 2904 2905 /* re-load the hardware */ 2906 enable_iommus(); 2907 2908 amd_iommu_enable_interrupts(); 2909 } 2910 2911 static int amd_iommu_suspend(void) 2912 { 2913 /* disable IOMMUs to go out of the way for BIOS */ 2914 disable_iommus(); 2915 2916 return 0; 2917 } 2918 2919 static struct syscore_ops amd_iommu_syscore_ops = { 2920 .suspend = amd_iommu_suspend, 2921 .resume = amd_iommu_resume, 2922 }; 2923 2924 static void __init free_iommu_resources(void) 2925 { 2926 kmem_cache_destroy(amd_iommu_irq_cache); 2927 amd_iommu_irq_cache = NULL; 2928 2929 free_iommu_all(); 2930 free_pci_segments(); 2931 } 2932 2933 /* SB IOAPIC is always on this device in AMD systems */ 2934 #define IOAPIC_SB_DEVID ((0x00 << 8) | PCI_DEVFN(0x14, 0)) 2935 2936 static bool __init check_ioapic_information(void) 2937 { 2938 const char *fw_bug = FW_BUG; 2939 bool ret, has_sb_ioapic; 2940 int idx; 2941 2942 has_sb_ioapic = false; 2943 ret = false; 2944 2945 /* 2946 * If we have map overrides on the kernel command line the 2947 * messages in this function might not describe firmware bugs 2948 * anymore - so be careful 2949 */ 2950 if (cmdline_maps) 2951 fw_bug = ""; 2952 2953 for (idx = 0; idx < nr_ioapics; idx++) { 2954 int devid, id = mpc_ioapic_id(idx); 2955 2956 devid = get_ioapic_devid(id); 2957 if (devid < 0) { 2958 pr_err("%s: IOAPIC[%d] not in IVRS table\n", 2959 fw_bug, id); 2960 ret = false; 2961 } else if (devid == IOAPIC_SB_DEVID) { 2962 has_sb_ioapic = true; 2963 ret = true; 2964 } 2965 } 2966 2967 if (!has_sb_ioapic) { 2968 /* 2969 * We expect the SB IOAPIC to be listed in the IVRS 2970 * table. The system timer is connected to the SB IOAPIC 2971 * and if we don't have it in the list the system will 2972 * panic at boot time. This situation usually happens 2973 * when the BIOS is buggy and provides us the wrong 2974 * device id for the IOAPIC in the system. 2975 */ 2976 pr_err("%s: No southbridge IOAPIC found\n", fw_bug); 2977 } 2978 2979 if (!ret) 2980 pr_err("Disabling interrupt remapping\n"); 2981 2982 return ret; 2983 } 2984 2985 static void __init free_dma_resources(void) 2986 { 2987 iommu_free_pages(amd_iommu_pd_alloc_bitmap, 2988 get_order(MAX_DOMAIN_ID / 8)); 2989 amd_iommu_pd_alloc_bitmap = NULL; 2990 2991 free_unity_maps(); 2992 } 2993 2994 static void __init ivinfo_init(void *ivrs) 2995 { 2996 amd_iommu_ivinfo = *((u32 *)(ivrs + IOMMU_IVINFO_OFFSET)); 2997 } 2998 2999 /* 3000 * This is the hardware init function for AMD IOMMU in the system. 3001 * This function is called either from amd_iommu_init or from the interrupt 3002 * remapping setup code. 3003 * 3004 * This function basically parses the ACPI table for AMD IOMMU (IVRS) 3005 * four times: 3006 * 3007 * 1 pass) Discover the most comprehensive IVHD type to use. 3008 * 3009 * 2 pass) Find the highest PCI device id the driver has to handle. 3010 * Upon this information the size of the data structures is 3011 * determined that needs to be allocated. 3012 * 3013 * 3 pass) Initialize the data structures just allocated with the 3014 * information in the ACPI table about available AMD IOMMUs 3015 * in the system. It also maps the PCI devices in the 3016 * system to specific IOMMUs 3017 * 3018 * 4 pass) After the basic data structures are allocated and 3019 * initialized we update them with information about memory 3020 * remapping requirements parsed out of the ACPI table in 3021 * this last pass. 3022 * 3023 * After everything is set up the IOMMUs are enabled and the necessary 3024 * hotplug and suspend notifiers are registered. 3025 */ 3026 static int __init early_amd_iommu_init(void) 3027 { 3028 struct acpi_table_header *ivrs_base; 3029 int remap_cache_sz, ret; 3030 acpi_status status; 3031 3032 if (!amd_iommu_detected) 3033 return -ENODEV; 3034 3035 status = acpi_get_table("IVRS", 0, &ivrs_base); 3036 if (status == AE_NOT_FOUND) 3037 return -ENODEV; 3038 else if (ACPI_FAILURE(status)) { 3039 const char *err = acpi_format_exception(status); 3040 pr_err("IVRS table error: %s\n", err); 3041 return -EINVAL; 3042 } 3043 3044 /* 3045 * Validate checksum here so we don't need to do it when 3046 * we actually parse the table 3047 */ 3048 ret = check_ivrs_checksum(ivrs_base); 3049 if (ret) 3050 goto out; 3051 3052 ivinfo_init(ivrs_base); 3053 3054 amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base); 3055 DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type); 3056 3057 /* Device table - directly used by all IOMMUs */ 3058 ret = -ENOMEM; 3059 3060 amd_iommu_pd_alloc_bitmap = iommu_alloc_pages(GFP_KERNEL, 3061 get_order(MAX_DOMAIN_ID / 8)); 3062 if (amd_iommu_pd_alloc_bitmap == NULL) 3063 goto out; 3064 3065 /* 3066 * never allocate domain 0 because its used as the non-allocated and 3067 * error value placeholder 3068 */ 3069 __set_bit(0, amd_iommu_pd_alloc_bitmap); 3070 3071 /* 3072 * now the data structures are allocated and basically initialized 3073 * start the real acpi table scan 3074 */ 3075 ret = init_iommu_all(ivrs_base); 3076 if (ret) 3077 goto out; 3078 3079 /* 5 level guest page table */ 3080 if (cpu_feature_enabled(X86_FEATURE_LA57) && 3081 check_feature_gpt_level() == GUEST_PGTABLE_5_LEVEL) 3082 amd_iommu_gpt_level = PAGE_MODE_5_LEVEL; 3083 3084 /* Disable any previously enabled IOMMUs */ 3085 if (!is_kdump_kernel() || amd_iommu_disabled) 3086 disable_iommus(); 3087 3088 if (amd_iommu_irq_remap) 3089 amd_iommu_irq_remap = check_ioapic_information(); 3090 3091 if (amd_iommu_irq_remap) { 3092 struct amd_iommu_pci_seg *pci_seg; 3093 /* 3094 * Interrupt remapping enabled, create kmem_cache for the 3095 * remapping tables. 3096 */ 3097 ret = -ENOMEM; 3098 if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir)) 3099 remap_cache_sz = MAX_IRQS_PER_TABLE * sizeof(u32); 3100 else 3101 remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2); 3102 amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache", 3103 remap_cache_sz, 3104 DTE_INTTAB_ALIGNMENT, 3105 0, NULL); 3106 if (!amd_iommu_irq_cache) 3107 goto out; 3108 3109 for_each_pci_segment(pci_seg) { 3110 if (alloc_irq_lookup_table(pci_seg)) 3111 goto out; 3112 } 3113 } 3114 3115 ret = init_memory_definitions(ivrs_base); 3116 if (ret) 3117 goto out; 3118 3119 /* init the device table */ 3120 init_device_table(); 3121 3122 out: 3123 /* Don't leak any ACPI memory */ 3124 acpi_put_table(ivrs_base); 3125 3126 return ret; 3127 } 3128 3129 static int amd_iommu_enable_interrupts(void) 3130 { 3131 struct amd_iommu *iommu; 3132 int ret = 0; 3133 3134 for_each_iommu(iommu) { 3135 ret = iommu_init_irq(iommu); 3136 if (ret) 3137 goto out; 3138 } 3139 3140 /* 3141 * Interrupt handler is ready to process interrupts. Enable 3142 * PPR and GA log interrupt for all IOMMUs. 3143 */ 3144 enable_iommus_vapic(); 3145 enable_iommus_ppr(); 3146 3147 out: 3148 return ret; 3149 } 3150 3151 static bool __init detect_ivrs(void) 3152 { 3153 struct acpi_table_header *ivrs_base; 3154 acpi_status status; 3155 int i; 3156 3157 status = acpi_get_table("IVRS", 0, &ivrs_base); 3158 if (status == AE_NOT_FOUND) 3159 return false; 3160 else if (ACPI_FAILURE(status)) { 3161 const char *err = acpi_format_exception(status); 3162 pr_err("IVRS table error: %s\n", err); 3163 return false; 3164 } 3165 3166 acpi_put_table(ivrs_base); 3167 3168 if (amd_iommu_force_enable) 3169 goto out; 3170 3171 /* Don't use IOMMU if there is Stoney Ridge graphics */ 3172 for (i = 0; i < 32; i++) { 3173 u32 pci_id; 3174 3175 pci_id = read_pci_config(0, i, 0, 0); 3176 if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) { 3177 pr_info("Disable IOMMU on Stoney Ridge\n"); 3178 return false; 3179 } 3180 } 3181 3182 out: 3183 /* Make sure ACS will be enabled during PCI probe */ 3184 pci_request_acs(); 3185 3186 return true; 3187 } 3188 3189 static void iommu_snp_enable(void) 3190 { 3191 #ifdef CONFIG_KVM_AMD_SEV 3192 if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) 3193 return; 3194 /* 3195 * The SNP support requires that IOMMU must be enabled, and is 3196 * configured with V1 page table (DTE[Mode] = 0 is not supported). 3197 */ 3198 if (no_iommu || iommu_default_passthrough()) { 3199 pr_warn("SNP: IOMMU disabled or configured in passthrough mode, SNP cannot be supported.\n"); 3200 goto disable_snp; 3201 } 3202 3203 if (amd_iommu_pgtable != AMD_IOMMU_V1) { 3204 pr_warn("SNP: IOMMU is configured with V2 page table mode, SNP cannot be supported.\n"); 3205 goto disable_snp; 3206 } 3207 3208 amd_iommu_snp_en = check_feature(FEATURE_SNP); 3209 if (!amd_iommu_snp_en) { 3210 pr_warn("SNP: IOMMU SNP feature not enabled, SNP cannot be supported.\n"); 3211 goto disable_snp; 3212 } 3213 3214 pr_info("IOMMU SNP support enabled.\n"); 3215 return; 3216 3217 disable_snp: 3218 cc_platform_clear(CC_ATTR_HOST_SEV_SNP); 3219 #endif 3220 } 3221 3222 /**************************************************************************** 3223 * 3224 * AMD IOMMU Initialization State Machine 3225 * 3226 ****************************************************************************/ 3227 3228 static int __init state_next(void) 3229 { 3230 int ret = 0; 3231 3232 switch (init_state) { 3233 case IOMMU_START_STATE: 3234 if (!detect_ivrs()) { 3235 init_state = IOMMU_NOT_FOUND; 3236 ret = -ENODEV; 3237 } else { 3238 init_state = IOMMU_IVRS_DETECTED; 3239 } 3240 break; 3241 case IOMMU_IVRS_DETECTED: 3242 if (amd_iommu_disabled) { 3243 init_state = IOMMU_CMDLINE_DISABLED; 3244 ret = -EINVAL; 3245 } else { 3246 ret = early_amd_iommu_init(); 3247 init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED; 3248 } 3249 break; 3250 case IOMMU_ACPI_FINISHED: 3251 early_enable_iommus(); 3252 x86_platform.iommu_shutdown = disable_iommus; 3253 init_state = IOMMU_ENABLED; 3254 break; 3255 case IOMMU_ENABLED: 3256 register_syscore_ops(&amd_iommu_syscore_ops); 3257 iommu_snp_enable(); 3258 ret = amd_iommu_init_pci(); 3259 init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT; 3260 break; 3261 case IOMMU_PCI_INIT: 3262 ret = amd_iommu_enable_interrupts(); 3263 init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN; 3264 break; 3265 case IOMMU_INTERRUPTS_EN: 3266 init_state = IOMMU_INITIALIZED; 3267 break; 3268 case IOMMU_INITIALIZED: 3269 /* Nothing to do */ 3270 break; 3271 case IOMMU_NOT_FOUND: 3272 case IOMMU_INIT_ERROR: 3273 case IOMMU_CMDLINE_DISABLED: 3274 /* Error states => do nothing */ 3275 ret = -EINVAL; 3276 break; 3277 default: 3278 /* Unknown state */ 3279 BUG(); 3280 } 3281 3282 if (ret) { 3283 free_dma_resources(); 3284 if (!irq_remapping_enabled) { 3285 disable_iommus(); 3286 free_iommu_resources(); 3287 } else { 3288 struct amd_iommu *iommu; 3289 struct amd_iommu_pci_seg *pci_seg; 3290 3291 for_each_pci_segment(pci_seg) 3292 uninit_device_table_dma(pci_seg); 3293 3294 for_each_iommu(iommu) 3295 amd_iommu_flush_all_caches(iommu); 3296 } 3297 } 3298 return ret; 3299 } 3300 3301 static int __init iommu_go_to_state(enum iommu_init_state state) 3302 { 3303 int ret = -EINVAL; 3304 3305 while (init_state != state) { 3306 if (init_state == IOMMU_NOT_FOUND || 3307 init_state == IOMMU_INIT_ERROR || 3308 init_state == IOMMU_CMDLINE_DISABLED) 3309 break; 3310 ret = state_next(); 3311 } 3312 3313 return ret; 3314 } 3315 3316 #ifdef CONFIG_IRQ_REMAP 3317 int __init amd_iommu_prepare(void) 3318 { 3319 int ret; 3320 3321 amd_iommu_irq_remap = true; 3322 3323 ret = iommu_go_to_state(IOMMU_ACPI_FINISHED); 3324 if (ret) { 3325 amd_iommu_irq_remap = false; 3326 return ret; 3327 } 3328 3329 return amd_iommu_irq_remap ? 0 : -ENODEV; 3330 } 3331 3332 int __init amd_iommu_enable(void) 3333 { 3334 int ret; 3335 3336 ret = iommu_go_to_state(IOMMU_ENABLED); 3337 if (ret) 3338 return ret; 3339 3340 irq_remapping_enabled = 1; 3341 return amd_iommu_xt_mode; 3342 } 3343 3344 void amd_iommu_disable(void) 3345 { 3346 amd_iommu_suspend(); 3347 } 3348 3349 int amd_iommu_reenable(int mode) 3350 { 3351 amd_iommu_resume(); 3352 3353 return 0; 3354 } 3355 3356 int __init amd_iommu_enable_faulting(unsigned int cpu) 3357 { 3358 /* We enable MSI later when PCI is initialized */ 3359 return 0; 3360 } 3361 #endif 3362 3363 /* 3364 * This is the core init function for AMD IOMMU hardware in the system. 3365 * This function is called from the generic x86 DMA layer initialization 3366 * code. 3367 */ 3368 static int __init amd_iommu_init(void) 3369 { 3370 struct amd_iommu *iommu; 3371 int ret; 3372 3373 ret = iommu_go_to_state(IOMMU_INITIALIZED); 3374 #ifdef CONFIG_GART_IOMMU 3375 if (ret && list_empty(&amd_iommu_list)) { 3376 /* 3377 * We failed to initialize the AMD IOMMU - try fallback 3378 * to GART if possible. 3379 */ 3380 gart_iommu_init(); 3381 } 3382 #endif 3383 3384 for_each_iommu(iommu) 3385 amd_iommu_debugfs_setup(iommu); 3386 3387 return ret; 3388 } 3389 3390 static bool amd_iommu_sme_check(void) 3391 { 3392 if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) || 3393 (boot_cpu_data.x86 != 0x17)) 3394 return true; 3395 3396 /* For Fam17h, a specific level of support is required */ 3397 if (boot_cpu_data.microcode >= 0x08001205) 3398 return true; 3399 3400 if ((boot_cpu_data.microcode >= 0x08001126) && 3401 (boot_cpu_data.microcode <= 0x080011ff)) 3402 return true; 3403 3404 pr_notice("IOMMU not currently supported when SME is active\n"); 3405 3406 return false; 3407 } 3408 3409 /**************************************************************************** 3410 * 3411 * Early detect code. This code runs at IOMMU detection time in the DMA 3412 * layer. It just looks if there is an IVRS ACPI table to detect AMD 3413 * IOMMUs 3414 * 3415 ****************************************************************************/ 3416 int __init amd_iommu_detect(void) 3417 { 3418 int ret; 3419 3420 if (no_iommu || (iommu_detected && !gart_iommu_aperture)) 3421 return -ENODEV; 3422 3423 if (!amd_iommu_sme_check()) 3424 return -ENODEV; 3425 3426 ret = iommu_go_to_state(IOMMU_IVRS_DETECTED); 3427 if (ret) 3428 return ret; 3429 3430 amd_iommu_detected = true; 3431 iommu_detected = 1; 3432 x86_init.iommu.iommu_init = amd_iommu_init; 3433 3434 return 1; 3435 } 3436 3437 /**************************************************************************** 3438 * 3439 * Parsing functions for the AMD IOMMU specific kernel command line 3440 * options. 3441 * 3442 ****************************************************************************/ 3443 3444 static int __init parse_amd_iommu_dump(char *str) 3445 { 3446 amd_iommu_dump = true; 3447 3448 return 1; 3449 } 3450 3451 static int __init parse_amd_iommu_intr(char *str) 3452 { 3453 for (; *str; ++str) { 3454 if (strncmp(str, "legacy", 6) == 0) { 3455 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; 3456 break; 3457 } 3458 if (strncmp(str, "vapic", 5) == 0) { 3459 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC; 3460 break; 3461 } 3462 } 3463 return 1; 3464 } 3465 3466 static int __init parse_amd_iommu_options(char *str) 3467 { 3468 if (!str) 3469 return -EINVAL; 3470 3471 while (*str) { 3472 if (strncmp(str, "fullflush", 9) == 0) { 3473 pr_warn("amd_iommu=fullflush deprecated; use iommu.strict=1 instead\n"); 3474 iommu_set_dma_strict(); 3475 } else if (strncmp(str, "force_enable", 12) == 0) { 3476 amd_iommu_force_enable = true; 3477 } else if (strncmp(str, "off", 3) == 0) { 3478 amd_iommu_disabled = true; 3479 } else if (strncmp(str, "force_isolation", 15) == 0) { 3480 amd_iommu_force_isolation = true; 3481 } else if (strncmp(str, "pgtbl_v1", 8) == 0) { 3482 amd_iommu_pgtable = AMD_IOMMU_V1; 3483 } else if (strncmp(str, "pgtbl_v2", 8) == 0) { 3484 amd_iommu_pgtable = AMD_IOMMU_V2; 3485 } else if (strncmp(str, "irtcachedis", 11) == 0) { 3486 amd_iommu_irtcachedis = true; 3487 } else { 3488 pr_notice("Unknown option - '%s'\n", str); 3489 } 3490 3491 str += strcspn(str, ","); 3492 while (*str == ',') 3493 str++; 3494 } 3495 3496 return 1; 3497 } 3498 3499 static int __init parse_ivrs_ioapic(char *str) 3500 { 3501 u32 seg = 0, bus, dev, fn; 3502 int id, i; 3503 u32 devid; 3504 3505 if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 || 3506 sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) 3507 goto found; 3508 3509 if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 || 3510 sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) { 3511 pr_warn("ivrs_ioapic%s option format deprecated; use ivrs_ioapic=%d@%04x:%02x:%02x.%d instead\n", 3512 str, id, seg, bus, dev, fn); 3513 goto found; 3514 } 3515 3516 pr_err("Invalid command line: ivrs_ioapic%s\n", str); 3517 return 1; 3518 3519 found: 3520 if (early_ioapic_map_size == EARLY_MAP_SIZE) { 3521 pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n", 3522 str); 3523 return 1; 3524 } 3525 3526 devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn); 3527 3528 cmdline_maps = true; 3529 i = early_ioapic_map_size++; 3530 early_ioapic_map[i].id = id; 3531 early_ioapic_map[i].devid = devid; 3532 early_ioapic_map[i].cmd_line = true; 3533 3534 return 1; 3535 } 3536 3537 static int __init parse_ivrs_hpet(char *str) 3538 { 3539 u32 seg = 0, bus, dev, fn; 3540 int id, i; 3541 u32 devid; 3542 3543 if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 || 3544 sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) 3545 goto found; 3546 3547 if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 || 3548 sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) { 3549 pr_warn("ivrs_hpet%s option format deprecated; use ivrs_hpet=%d@%04x:%02x:%02x.%d instead\n", 3550 str, id, seg, bus, dev, fn); 3551 goto found; 3552 } 3553 3554 pr_err("Invalid command line: ivrs_hpet%s\n", str); 3555 return 1; 3556 3557 found: 3558 if (early_hpet_map_size == EARLY_MAP_SIZE) { 3559 pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n", 3560 str); 3561 return 1; 3562 } 3563 3564 devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn); 3565 3566 cmdline_maps = true; 3567 i = early_hpet_map_size++; 3568 early_hpet_map[i].id = id; 3569 early_hpet_map[i].devid = devid; 3570 early_hpet_map[i].cmd_line = true; 3571 3572 return 1; 3573 } 3574 3575 #define ACPIID_LEN (ACPIHID_UID_LEN + ACPIHID_HID_LEN) 3576 3577 static int __init parse_ivrs_acpihid(char *str) 3578 { 3579 u32 seg = 0, bus, dev, fn; 3580 char *hid, *uid, *p, *addr; 3581 char acpiid[ACPIID_LEN] = {0}; 3582 int i; 3583 3584 addr = strchr(str, '@'); 3585 if (!addr) { 3586 addr = strchr(str, '='); 3587 if (!addr) 3588 goto not_found; 3589 3590 ++addr; 3591 3592 if (strlen(addr) > ACPIID_LEN) 3593 goto not_found; 3594 3595 if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 || 3596 sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) { 3597 pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n", 3598 str, acpiid, seg, bus, dev, fn); 3599 goto found; 3600 } 3601 goto not_found; 3602 } 3603 3604 /* We have the '@', make it the terminator to get just the acpiid */ 3605 *addr++ = 0; 3606 3607 if (strlen(str) > ACPIID_LEN + 1) 3608 goto not_found; 3609 3610 if (sscanf(str, "=%s", acpiid) != 1) 3611 goto not_found; 3612 3613 if (sscanf(addr, "%x:%x.%x", &bus, &dev, &fn) == 3 || 3614 sscanf(addr, "%x:%x:%x.%x", &seg, &bus, &dev, &fn) == 4) 3615 goto found; 3616 3617 not_found: 3618 pr_err("Invalid command line: ivrs_acpihid%s\n", str); 3619 return 1; 3620 3621 found: 3622 p = acpiid; 3623 hid = strsep(&p, ":"); 3624 uid = p; 3625 3626 if (!hid || !(*hid) || !uid) { 3627 pr_err("Invalid command line: hid or uid\n"); 3628 return 1; 3629 } 3630 3631 /* 3632 * Ignore leading zeroes after ':', so e.g., AMDI0095:00 3633 * will match AMDI0095:0 in the second strcmp in acpi_dev_hid_uid_match 3634 */ 3635 while (*uid == '0' && *(uid + 1)) 3636 uid++; 3637 3638 i = early_acpihid_map_size++; 3639 memcpy(early_acpihid_map[i].hid, hid, strlen(hid)); 3640 memcpy(early_acpihid_map[i].uid, uid, strlen(uid)); 3641 early_acpihid_map[i].devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn); 3642 early_acpihid_map[i].cmd_line = true; 3643 3644 return 1; 3645 } 3646 3647 __setup("amd_iommu_dump", parse_amd_iommu_dump); 3648 __setup("amd_iommu=", parse_amd_iommu_options); 3649 __setup("amd_iommu_intr=", parse_amd_iommu_intr); 3650 __setup("ivrs_ioapic", parse_ivrs_ioapic); 3651 __setup("ivrs_hpet", parse_ivrs_hpet); 3652 __setup("ivrs_acpihid", parse_ivrs_acpihid); 3653 3654 bool amd_iommu_pasid_supported(void) 3655 { 3656 /* CPU page table size should match IOMMU guest page table size */ 3657 if (cpu_feature_enabled(X86_FEATURE_LA57) && 3658 amd_iommu_gpt_level != PAGE_MODE_5_LEVEL) 3659 return false; 3660 3661 /* 3662 * Since DTE[Mode]=0 is prohibited on SNP-enabled system 3663 * (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without 3664 * setting up IOMMUv1 page table. 3665 */ 3666 return amd_iommu_gt_ppr_supported() && !amd_iommu_snp_en; 3667 } 3668 3669 struct amd_iommu *get_amd_iommu(unsigned int idx) 3670 { 3671 unsigned int i = 0; 3672 struct amd_iommu *iommu; 3673 3674 for_each_iommu(iommu) 3675 if (i++ == idx) 3676 return iommu; 3677 return NULL; 3678 } 3679 3680 /**************************************************************************** 3681 * 3682 * IOMMU EFR Performance Counter support functionality. This code allows 3683 * access to the IOMMU PC functionality. 3684 * 3685 ****************************************************************************/ 3686 3687 u8 amd_iommu_pc_get_max_banks(unsigned int idx) 3688 { 3689 struct amd_iommu *iommu = get_amd_iommu(idx); 3690 3691 if (iommu) 3692 return iommu->max_banks; 3693 3694 return 0; 3695 } 3696 3697 bool amd_iommu_pc_supported(void) 3698 { 3699 return amd_iommu_pc_present; 3700 } 3701 3702 u8 amd_iommu_pc_get_max_counters(unsigned int idx) 3703 { 3704 struct amd_iommu *iommu = get_amd_iommu(idx); 3705 3706 if (iommu) 3707 return iommu->max_counters; 3708 3709 return 0; 3710 } 3711 3712 static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, 3713 u8 fxn, u64 *value, bool is_write) 3714 { 3715 u32 offset; 3716 u32 max_offset_lim; 3717 3718 /* Make sure the IOMMU PC resource is available */ 3719 if (!amd_iommu_pc_present) 3720 return -ENODEV; 3721 3722 /* Check for valid iommu and pc register indexing */ 3723 if (WARN_ON(!iommu || (fxn > 0x28) || (fxn & 7))) 3724 return -ENODEV; 3725 3726 offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn); 3727 3728 /* Limit the offset to the hw defined mmio region aperture */ 3729 max_offset_lim = (u32)(((0x40 | iommu->max_banks) << 12) | 3730 (iommu->max_counters << 8) | 0x28); 3731 if ((offset < MMIO_CNTR_REG_OFFSET) || 3732 (offset > max_offset_lim)) 3733 return -EINVAL; 3734 3735 if (is_write) { 3736 u64 val = *value & GENMASK_ULL(47, 0); 3737 3738 writel((u32)val, iommu->mmio_base + offset); 3739 writel((val >> 32), iommu->mmio_base + offset + 4); 3740 } else { 3741 *value = readl(iommu->mmio_base + offset + 4); 3742 *value <<= 32; 3743 *value |= readl(iommu->mmio_base + offset); 3744 *value &= GENMASK_ULL(47, 0); 3745 } 3746 3747 return 0; 3748 } 3749 3750 int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value) 3751 { 3752 if (!iommu) 3753 return -EINVAL; 3754 3755 return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false); 3756 } 3757 3758 int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value) 3759 { 3760 if (!iommu) 3761 return -EINVAL; 3762 3763 return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true); 3764 } 3765 3766 #ifdef CONFIG_KVM_AMD_SEV 3767 static int iommu_page_make_shared(void *page) 3768 { 3769 unsigned long paddr, pfn; 3770 3771 paddr = iommu_virt_to_phys(page); 3772 /* Cbit maybe set in the paddr */ 3773 pfn = __sme_clr(paddr) >> PAGE_SHIFT; 3774 3775 if (!(pfn % PTRS_PER_PMD)) { 3776 int ret, level; 3777 bool assigned; 3778 3779 ret = snp_lookup_rmpentry(pfn, &assigned, &level); 3780 if (ret) { 3781 pr_warn("IOMMU PFN %lx RMP lookup failed, ret %d\n", pfn, ret); 3782 return ret; 3783 } 3784 3785 if (!assigned) { 3786 pr_warn("IOMMU PFN %lx not assigned in RMP table\n", pfn); 3787 return -EINVAL; 3788 } 3789 3790 if (level > PG_LEVEL_4K) { 3791 ret = psmash(pfn); 3792 if (!ret) 3793 goto done; 3794 3795 pr_warn("PSMASH failed for IOMMU PFN %lx huge RMP entry, ret: %d, level: %d\n", 3796 pfn, ret, level); 3797 return ret; 3798 } 3799 } 3800 3801 done: 3802 return rmp_make_shared(pfn, PG_LEVEL_4K); 3803 } 3804 3805 static int iommu_make_shared(void *va, size_t size) 3806 { 3807 void *page; 3808 int ret; 3809 3810 if (!va) 3811 return 0; 3812 3813 for (page = va; page < (va + size); page += PAGE_SIZE) { 3814 ret = iommu_page_make_shared(page); 3815 if (ret) 3816 return ret; 3817 } 3818 3819 return 0; 3820 } 3821 3822 int amd_iommu_snp_disable(void) 3823 { 3824 struct amd_iommu *iommu; 3825 int ret; 3826 3827 if (!amd_iommu_snp_en) 3828 return 0; 3829 3830 for_each_iommu(iommu) { 3831 ret = iommu_make_shared(iommu->evt_buf, EVT_BUFFER_SIZE); 3832 if (ret) 3833 return ret; 3834 3835 ret = iommu_make_shared(iommu->ppr_log, PPR_LOG_SIZE); 3836 if (ret) 3837 return ret; 3838 3839 ret = iommu_make_shared((void *)iommu->cmd_sem, PAGE_SIZE); 3840 if (ret) 3841 return ret; 3842 } 3843 3844 return 0; 3845 } 3846 EXPORT_SYMBOL_GPL(amd_iommu_snp_disable); 3847 #endif 3848