1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. 4 * Author: Joerg Roedel <jroedel@suse.de> 5 * Leo Duran <leo.duran@amd.com> 6 */ 7 8 #define pr_fmt(fmt) "AMD-Vi: " fmt 9 #define dev_fmt(fmt) pr_fmt(fmt) 10 11 #include <linux/pci.h> 12 #include <linux/acpi.h> 13 #include <linux/list.h> 14 #include <linux/bitmap.h> 15 #include <linux/slab.h> 16 #include <linux/syscore_ops.h> 17 #include <linux/interrupt.h> 18 #include <linux/msi.h> 19 #include <linux/irq.h> 20 #include <linux/amd-iommu.h> 21 #include <linux/export.h> 22 #include <linux/kmemleak.h> 23 #include <linux/cc_platform.h> 24 #include <linux/iopoll.h> 25 #include <asm/pci-direct.h> 26 #include <asm/iommu.h> 27 #include <asm/apic.h> 28 #include <asm/gart.h> 29 #include <asm/x86_init.h> 30 #include <asm/io_apic.h> 31 #include <asm/irq_remapping.h> 32 #include <asm/set_memory.h> 33 34 #include <linux/crash_dump.h> 35 36 #include "amd_iommu.h" 37 #include "../irq_remapping.h" 38 39 /* 40 * definitions for the ACPI scanning code 41 */ 42 #define IVRS_HEADER_LENGTH 48 43 44 #define ACPI_IVHD_TYPE_MAX_SUPPORTED 0x40 45 #define ACPI_IVMD_TYPE_ALL 0x20 46 #define ACPI_IVMD_TYPE 0x21 47 #define ACPI_IVMD_TYPE_RANGE 0x22 48 49 #define IVHD_DEV_ALL 0x01 50 #define IVHD_DEV_SELECT 0x02 51 #define IVHD_DEV_SELECT_RANGE_START 0x03 52 #define IVHD_DEV_RANGE_END 0x04 53 #define IVHD_DEV_ALIAS 0x42 54 #define IVHD_DEV_ALIAS_RANGE 0x43 55 #define IVHD_DEV_EXT_SELECT 0x46 56 #define IVHD_DEV_EXT_SELECT_RANGE 0x47 57 #define IVHD_DEV_SPECIAL 0x48 58 #define IVHD_DEV_ACPI_HID 0xf0 59 60 #define UID_NOT_PRESENT 0 61 #define UID_IS_INTEGER 1 62 #define UID_IS_CHARACTER 2 63 64 #define IVHD_SPECIAL_IOAPIC 1 65 #define IVHD_SPECIAL_HPET 2 66 67 #define IVHD_FLAG_HT_TUN_EN_MASK 0x01 68 #define IVHD_FLAG_PASSPW_EN_MASK 0x02 69 #define IVHD_FLAG_RESPASSPW_EN_MASK 0x04 70 #define IVHD_FLAG_ISOC_EN_MASK 0x08 71 72 #define IVMD_FLAG_EXCL_RANGE 0x08 73 #define IVMD_FLAG_IW 0x04 74 #define IVMD_FLAG_IR 0x02 75 #define IVMD_FLAG_UNITY_MAP 0x01 76 77 #define ACPI_DEVFLAG_INITPASS 0x01 78 #define ACPI_DEVFLAG_EXTINT 0x02 79 #define ACPI_DEVFLAG_NMI 0x04 80 #define ACPI_DEVFLAG_SYSMGT1 0x10 81 #define ACPI_DEVFLAG_SYSMGT2 0x20 82 #define ACPI_DEVFLAG_LINT0 0x40 83 #define ACPI_DEVFLAG_LINT1 0x80 84 #define ACPI_DEVFLAG_ATSDIS 0x10000000 85 86 #define IVRS_GET_SBDF_ID(seg, bus, dev, fn) (((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \ 87 | ((dev & 0x1f) << 3) | (fn & 0x7)) 88 89 /* 90 * ACPI table definitions 91 * 92 * These data structures are laid over the table to parse the important values 93 * out of it. 94 */ 95 96 /* 97 * structure describing one IOMMU in the ACPI table. Typically followed by one 98 * or more ivhd_entrys. 99 */ 100 struct ivhd_header { 101 u8 type; 102 u8 flags; 103 u16 length; 104 u16 devid; 105 u16 cap_ptr; 106 u64 mmio_phys; 107 u16 pci_seg; 108 u16 info; 109 u32 efr_attr; 110 111 /* Following only valid on IVHD type 11h and 40h */ 112 u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */ 113 u64 efr_reg2; 114 } __attribute__((packed)); 115 116 /* 117 * A device entry describing which devices a specific IOMMU translates and 118 * which requestor ids they use. 119 */ 120 struct ivhd_entry { 121 u8 type; 122 u16 devid; 123 u8 flags; 124 struct_group(ext_hid, 125 u32 ext; 126 u32 hidh; 127 ); 128 u64 cid; 129 u8 uidf; 130 u8 uidl; 131 u8 uid; 132 } __attribute__((packed)); 133 134 /* 135 * An AMD IOMMU memory definition structure. It defines things like exclusion 136 * ranges for devices and regions that should be unity mapped. 137 */ 138 struct ivmd_header { 139 u8 type; 140 u8 flags; 141 u16 length; 142 u16 devid; 143 u16 aux; 144 u16 pci_seg; 145 u8 resv[6]; 146 u64 range_start; 147 u64 range_length; 148 } __attribute__((packed)); 149 150 bool amd_iommu_dump; 151 bool amd_iommu_irq_remap __read_mostly; 152 153 enum io_pgtable_fmt amd_iommu_pgtable = AMD_IOMMU_V1; 154 /* Guest page table level */ 155 int amd_iommu_gpt_level = PAGE_MODE_4_LEVEL; 156 157 int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC; 158 static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE; 159 160 static bool amd_iommu_detected; 161 static bool amd_iommu_disabled __initdata; 162 static bool amd_iommu_force_enable __initdata; 163 static bool amd_iommu_irtcachedis; 164 static int amd_iommu_target_ivhd_type; 165 166 /* Global EFR and EFR2 registers */ 167 u64 amd_iommu_efr; 168 u64 amd_iommu_efr2; 169 170 /* SNP is enabled on the system? */ 171 bool amd_iommu_snp_en; 172 EXPORT_SYMBOL(amd_iommu_snp_en); 173 174 LIST_HEAD(amd_iommu_pci_seg_list); /* list of all PCI segments */ 175 LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the 176 system */ 177 178 /* Array to assign indices to IOMMUs*/ 179 struct amd_iommu *amd_iommus[MAX_IOMMUS]; 180 181 /* Number of IOMMUs present in the system */ 182 static int amd_iommus_present; 183 184 /* IOMMUs have a non-present cache? */ 185 bool amd_iommu_np_cache __read_mostly; 186 bool amd_iommu_iotlb_sup __read_mostly = true; 187 188 static bool amd_iommu_pc_present __read_mostly; 189 bool amdr_ivrs_remap_support __read_mostly; 190 191 bool amd_iommu_force_isolation __read_mostly; 192 193 /* 194 * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap 195 * to know which ones are already in use. 196 */ 197 unsigned long *amd_iommu_pd_alloc_bitmap; 198 199 enum iommu_init_state { 200 IOMMU_START_STATE, 201 IOMMU_IVRS_DETECTED, 202 IOMMU_ACPI_FINISHED, 203 IOMMU_ENABLED, 204 IOMMU_PCI_INIT, 205 IOMMU_INTERRUPTS_EN, 206 IOMMU_INITIALIZED, 207 IOMMU_NOT_FOUND, 208 IOMMU_INIT_ERROR, 209 IOMMU_CMDLINE_DISABLED, 210 }; 211 212 /* Early ioapic and hpet maps from kernel command line */ 213 #define EARLY_MAP_SIZE 4 214 static struct devid_map __initdata early_ioapic_map[EARLY_MAP_SIZE]; 215 static struct devid_map __initdata early_hpet_map[EARLY_MAP_SIZE]; 216 static struct acpihid_map_entry __initdata early_acpihid_map[EARLY_MAP_SIZE]; 217 218 static int __initdata early_ioapic_map_size; 219 static int __initdata early_hpet_map_size; 220 static int __initdata early_acpihid_map_size; 221 222 static bool __initdata cmdline_maps; 223 224 static enum iommu_init_state init_state = IOMMU_START_STATE; 225 226 static int amd_iommu_enable_interrupts(void); 227 static int __init iommu_go_to_state(enum iommu_init_state state); 228 static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg); 229 230 static bool amd_iommu_pre_enabled = true; 231 232 static u32 amd_iommu_ivinfo __initdata; 233 234 bool translation_pre_enabled(struct amd_iommu *iommu) 235 { 236 return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED); 237 } 238 239 static void clear_translation_pre_enabled(struct amd_iommu *iommu) 240 { 241 iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED; 242 } 243 244 static void init_translation_status(struct amd_iommu *iommu) 245 { 246 u64 ctrl; 247 248 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 249 if (ctrl & (1<<CONTROL_IOMMU_EN)) 250 iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED; 251 } 252 253 static inline unsigned long tbl_size(int entry_size, int last_bdf) 254 { 255 unsigned shift = PAGE_SHIFT + 256 get_order((last_bdf + 1) * entry_size); 257 258 return 1UL << shift; 259 } 260 261 int amd_iommu_get_num_iommus(void) 262 { 263 return amd_iommus_present; 264 } 265 266 /* 267 * Iterate through all the IOMMUs to get common EFR 268 * masks among all IOMMUs and warn if found inconsistency. 269 */ 270 static __init void get_global_efr(void) 271 { 272 struct amd_iommu *iommu; 273 274 for_each_iommu(iommu) { 275 u64 tmp = iommu->features; 276 u64 tmp2 = iommu->features2; 277 278 if (list_is_first(&iommu->list, &amd_iommu_list)) { 279 amd_iommu_efr = tmp; 280 amd_iommu_efr2 = tmp2; 281 continue; 282 } 283 284 if (amd_iommu_efr == tmp && 285 amd_iommu_efr2 == tmp2) 286 continue; 287 288 pr_err(FW_BUG 289 "Found inconsistent EFR/EFR2 %#llx,%#llx (global %#llx,%#llx) on iommu%d (%04x:%02x:%02x.%01x).\n", 290 tmp, tmp2, amd_iommu_efr, amd_iommu_efr2, 291 iommu->index, iommu->pci_seg->id, 292 PCI_BUS_NUM(iommu->devid), PCI_SLOT(iommu->devid), 293 PCI_FUNC(iommu->devid)); 294 295 amd_iommu_efr &= tmp; 296 amd_iommu_efr2 &= tmp2; 297 } 298 299 pr_info("Using global IVHD EFR:%#llx, EFR2:%#llx\n", amd_iommu_efr, amd_iommu_efr2); 300 } 301 302 /* 303 * For IVHD type 0x11/0x40, EFR is also available via IVHD. 304 * Default to IVHD EFR since it is available sooner 305 * (i.e. before PCI init). 306 */ 307 static void __init early_iommu_features_init(struct amd_iommu *iommu, 308 struct ivhd_header *h) 309 { 310 if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP) { 311 iommu->features = h->efr_reg; 312 iommu->features2 = h->efr_reg2; 313 } 314 if (amd_iommu_ivinfo & IOMMU_IVINFO_DMA_REMAP) 315 amdr_ivrs_remap_support = true; 316 } 317 318 /* Access to l1 and l2 indexed register spaces */ 319 320 static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address) 321 { 322 u32 val; 323 324 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); 325 pci_read_config_dword(iommu->dev, 0xfc, &val); 326 return val; 327 } 328 329 static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val) 330 { 331 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31)); 332 pci_write_config_dword(iommu->dev, 0xfc, val); 333 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); 334 } 335 336 static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address) 337 { 338 u32 val; 339 340 pci_write_config_dword(iommu->dev, 0xf0, address); 341 pci_read_config_dword(iommu->dev, 0xf4, &val); 342 return val; 343 } 344 345 static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val) 346 { 347 pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8)); 348 pci_write_config_dword(iommu->dev, 0xf4, val); 349 } 350 351 /**************************************************************************** 352 * 353 * AMD IOMMU MMIO register space handling functions 354 * 355 * These functions are used to program the IOMMU device registers in 356 * MMIO space required for that driver. 357 * 358 ****************************************************************************/ 359 360 /* 361 * This function set the exclusion range in the IOMMU. DMA accesses to the 362 * exclusion range are passed through untranslated 363 */ 364 static void iommu_set_exclusion_range(struct amd_iommu *iommu) 365 { 366 u64 start = iommu->exclusion_start & PAGE_MASK; 367 u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK; 368 u64 entry; 369 370 if (!iommu->exclusion_start) 371 return; 372 373 entry = start | MMIO_EXCL_ENABLE_MASK; 374 memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET, 375 &entry, sizeof(entry)); 376 377 entry = limit; 378 memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET, 379 &entry, sizeof(entry)); 380 } 381 382 static void iommu_set_cwwb_range(struct amd_iommu *iommu) 383 { 384 u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem); 385 u64 entry = start & PM_ADDR_MASK; 386 387 if (!check_feature(FEATURE_SNP)) 388 return; 389 390 /* Note: 391 * Re-purpose Exclusion base/limit registers for Completion wait 392 * write-back base/limit. 393 */ 394 memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET, 395 &entry, sizeof(entry)); 396 397 /* Note: 398 * Default to 4 Kbytes, which can be specified by setting base 399 * address equal to the limit address. 400 */ 401 memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET, 402 &entry, sizeof(entry)); 403 } 404 405 /* Programs the physical address of the device table into the IOMMU hardware */ 406 static void iommu_set_device_table(struct amd_iommu *iommu) 407 { 408 u64 entry; 409 u32 dev_table_size = iommu->pci_seg->dev_table_size; 410 void *dev_table = (void *)get_dev_table(iommu); 411 412 BUG_ON(iommu->mmio_base == NULL); 413 414 entry = iommu_virt_to_phys(dev_table); 415 entry |= (dev_table_size >> 12) - 1; 416 memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET, 417 &entry, sizeof(entry)); 418 } 419 420 /* Generic functions to enable/disable certain features of the IOMMU. */ 421 static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) 422 { 423 u64 ctrl; 424 425 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 426 ctrl |= (1ULL << bit); 427 writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); 428 } 429 430 static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit) 431 { 432 u64 ctrl; 433 434 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 435 ctrl &= ~(1ULL << bit); 436 writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); 437 } 438 439 static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout) 440 { 441 u64 ctrl; 442 443 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 444 ctrl &= ~CTRL_INV_TO_MASK; 445 ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK; 446 writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); 447 } 448 449 /* Function to enable the hardware */ 450 static void iommu_enable(struct amd_iommu *iommu) 451 { 452 iommu_feature_enable(iommu, CONTROL_IOMMU_EN); 453 } 454 455 static void iommu_disable(struct amd_iommu *iommu) 456 { 457 if (!iommu->mmio_base) 458 return; 459 460 /* Disable command buffer */ 461 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); 462 463 /* Disable event logging and event interrupts */ 464 iommu_feature_disable(iommu, CONTROL_EVT_INT_EN); 465 iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN); 466 467 /* Disable IOMMU GA_LOG */ 468 iommu_feature_disable(iommu, CONTROL_GALOG_EN); 469 iommu_feature_disable(iommu, CONTROL_GAINT_EN); 470 471 /* Disable IOMMU PPR logging */ 472 iommu_feature_disable(iommu, CONTROL_PPRLOG_EN); 473 iommu_feature_disable(iommu, CONTROL_PPRINT_EN); 474 475 /* Disable IOMMU hardware itself */ 476 iommu_feature_disable(iommu, CONTROL_IOMMU_EN); 477 478 /* Clear IRTE cache disabling bit */ 479 iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS); 480 } 481 482 /* 483 * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in 484 * the system has one. 485 */ 486 static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end) 487 { 488 if (!request_mem_region(address, end, "amd_iommu")) { 489 pr_err("Can not reserve memory region %llx-%llx for mmio\n", 490 address, end); 491 pr_err("This is a BIOS bug. Please contact your hardware vendor\n"); 492 return NULL; 493 } 494 495 return (u8 __iomem *)ioremap(address, end); 496 } 497 498 static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) 499 { 500 if (iommu->mmio_base) 501 iounmap(iommu->mmio_base); 502 release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end); 503 } 504 505 static inline u32 get_ivhd_header_size(struct ivhd_header *h) 506 { 507 u32 size = 0; 508 509 switch (h->type) { 510 case 0x10: 511 size = 24; 512 break; 513 case 0x11: 514 case 0x40: 515 size = 40; 516 break; 517 } 518 return size; 519 } 520 521 /**************************************************************************** 522 * 523 * The functions below belong to the first pass of AMD IOMMU ACPI table 524 * parsing. In this pass we try to find out the highest device id this 525 * code has to handle. Upon this information the size of the shared data 526 * structures is determined later. 527 * 528 ****************************************************************************/ 529 530 /* 531 * This function calculates the length of a given IVHD entry 532 */ 533 static inline int ivhd_entry_length(u8 *ivhd) 534 { 535 u32 type = ((struct ivhd_entry *)ivhd)->type; 536 537 if (type < 0x80) { 538 return 0x04 << (*ivhd >> 6); 539 } else if (type == IVHD_DEV_ACPI_HID) { 540 /* For ACPI_HID, offset 21 is uid len */ 541 return *((u8 *)ivhd + 21) + 22; 542 } 543 return 0; 544 } 545 546 /* 547 * After reading the highest device id from the IOMMU PCI capability header 548 * this function looks if there is a higher device id defined in the ACPI table 549 */ 550 static int __init find_last_devid_from_ivhd(struct ivhd_header *h) 551 { 552 u8 *p = (void *)h, *end = (void *)h; 553 struct ivhd_entry *dev; 554 int last_devid = -EINVAL; 555 556 u32 ivhd_size = get_ivhd_header_size(h); 557 558 if (!ivhd_size) { 559 pr_err("Unsupported IVHD type %#x\n", h->type); 560 return -EINVAL; 561 } 562 563 p += ivhd_size; 564 end += h->length; 565 566 while (p < end) { 567 dev = (struct ivhd_entry *)p; 568 switch (dev->type) { 569 case IVHD_DEV_ALL: 570 /* Use maximum BDF value for DEV_ALL */ 571 return 0xffff; 572 case IVHD_DEV_SELECT: 573 case IVHD_DEV_RANGE_END: 574 case IVHD_DEV_ALIAS: 575 case IVHD_DEV_EXT_SELECT: 576 /* all the above subfield types refer to device ids */ 577 if (dev->devid > last_devid) 578 last_devid = dev->devid; 579 break; 580 default: 581 break; 582 } 583 p += ivhd_entry_length(p); 584 } 585 586 WARN_ON(p != end); 587 588 return last_devid; 589 } 590 591 static int __init check_ivrs_checksum(struct acpi_table_header *table) 592 { 593 int i; 594 u8 checksum = 0, *p = (u8 *)table; 595 596 for (i = 0; i < table->length; ++i) 597 checksum += p[i]; 598 if (checksum != 0) { 599 /* ACPI table corrupt */ 600 pr_err(FW_BUG "IVRS invalid checksum\n"); 601 return -ENODEV; 602 } 603 604 return 0; 605 } 606 607 /* 608 * Iterate over all IVHD entries in the ACPI table and find the highest device 609 * id which we need to handle. This is the first of three functions which parse 610 * the ACPI table. So we check the checksum here. 611 */ 612 static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_seg) 613 { 614 u8 *p = (u8 *)table, *end = (u8 *)table; 615 struct ivhd_header *h; 616 int last_devid, last_bdf = 0; 617 618 p += IVRS_HEADER_LENGTH; 619 620 end += table->length; 621 while (p < end) { 622 h = (struct ivhd_header *)p; 623 if (h->pci_seg == pci_seg && 624 h->type == amd_iommu_target_ivhd_type) { 625 last_devid = find_last_devid_from_ivhd(h); 626 627 if (last_devid < 0) 628 return -EINVAL; 629 if (last_devid > last_bdf) 630 last_bdf = last_devid; 631 } 632 p += h->length; 633 } 634 WARN_ON(p != end); 635 636 return last_bdf; 637 } 638 639 /**************************************************************************** 640 * 641 * The following functions belong to the code path which parses the ACPI table 642 * the second time. In this ACPI parsing iteration we allocate IOMMU specific 643 * data structures, initialize the per PCI segment device/alias/rlookup table 644 * and also basically initialize the hardware. 645 * 646 ****************************************************************************/ 647 648 /* Allocate per PCI segment device table */ 649 static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg) 650 { 651 pci_seg->dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO | GFP_DMA32, 652 get_order(pci_seg->dev_table_size)); 653 if (!pci_seg->dev_table) 654 return -ENOMEM; 655 656 return 0; 657 } 658 659 static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg) 660 { 661 free_pages((unsigned long)pci_seg->dev_table, 662 get_order(pci_seg->dev_table_size)); 663 pci_seg->dev_table = NULL; 664 } 665 666 /* Allocate per PCI segment IOMMU rlookup table. */ 667 static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg) 668 { 669 pci_seg->rlookup_table = (void *)__get_free_pages( 670 GFP_KERNEL | __GFP_ZERO, 671 get_order(pci_seg->rlookup_table_size)); 672 if (pci_seg->rlookup_table == NULL) 673 return -ENOMEM; 674 675 return 0; 676 } 677 678 static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg) 679 { 680 free_pages((unsigned long)pci_seg->rlookup_table, 681 get_order(pci_seg->rlookup_table_size)); 682 pci_seg->rlookup_table = NULL; 683 } 684 685 static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) 686 { 687 pci_seg->irq_lookup_table = (void *)__get_free_pages( 688 GFP_KERNEL | __GFP_ZERO, 689 get_order(pci_seg->rlookup_table_size)); 690 kmemleak_alloc(pci_seg->irq_lookup_table, 691 pci_seg->rlookup_table_size, 1, GFP_KERNEL); 692 if (pci_seg->irq_lookup_table == NULL) 693 return -ENOMEM; 694 695 return 0; 696 } 697 698 static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) 699 { 700 kmemleak_free(pci_seg->irq_lookup_table); 701 free_pages((unsigned long)pci_seg->irq_lookup_table, 702 get_order(pci_seg->rlookup_table_size)); 703 pci_seg->irq_lookup_table = NULL; 704 } 705 706 static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg) 707 { 708 int i; 709 710 pci_seg->alias_table = (void *)__get_free_pages(GFP_KERNEL, 711 get_order(pci_seg->alias_table_size)); 712 if (!pci_seg->alias_table) 713 return -ENOMEM; 714 715 /* 716 * let all alias entries point to itself 717 */ 718 for (i = 0; i <= pci_seg->last_bdf; ++i) 719 pci_seg->alias_table[i] = i; 720 721 return 0; 722 } 723 724 static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg) 725 { 726 free_pages((unsigned long)pci_seg->alias_table, 727 get_order(pci_seg->alias_table_size)); 728 pci_seg->alias_table = NULL; 729 } 730 731 /* 732 * Allocates the command buffer. This buffer is per AMD IOMMU. We can 733 * write commands to that buffer later and the IOMMU will execute them 734 * asynchronously 735 */ 736 static int __init alloc_command_buffer(struct amd_iommu *iommu) 737 { 738 iommu->cmd_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 739 get_order(CMD_BUFFER_SIZE)); 740 741 return iommu->cmd_buf ? 0 : -ENOMEM; 742 } 743 744 /* 745 * Interrupt handler has processed all pending events and adjusted head 746 * and tail pointer. Reset overflow mask and restart logging again. 747 */ 748 static void amd_iommu_restart_log(struct amd_iommu *iommu, const char *evt_type, 749 u8 cntrl_intr, u8 cntrl_log, 750 u32 status_run_mask, u32 status_overflow_mask) 751 { 752 u32 status; 753 754 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 755 if (status & status_run_mask) 756 return; 757 758 pr_info_ratelimited("IOMMU %s log restarting\n", evt_type); 759 760 iommu_feature_disable(iommu, cntrl_log); 761 iommu_feature_disable(iommu, cntrl_intr); 762 763 writel(status_overflow_mask, iommu->mmio_base + MMIO_STATUS_OFFSET); 764 765 iommu_feature_enable(iommu, cntrl_intr); 766 iommu_feature_enable(iommu, cntrl_log); 767 } 768 769 /* 770 * This function restarts event logging in case the IOMMU experienced 771 * an event log buffer overflow. 772 */ 773 void amd_iommu_restart_event_logging(struct amd_iommu *iommu) 774 { 775 amd_iommu_restart_log(iommu, "Event", CONTROL_EVT_INT_EN, 776 CONTROL_EVT_LOG_EN, MMIO_STATUS_EVT_RUN_MASK, 777 MMIO_STATUS_EVT_OVERFLOW_MASK); 778 } 779 780 /* 781 * This function restarts event logging in case the IOMMU experienced 782 * GA log overflow. 783 */ 784 void amd_iommu_restart_ga_log(struct amd_iommu *iommu) 785 { 786 amd_iommu_restart_log(iommu, "GA", CONTROL_GAINT_EN, 787 CONTROL_GALOG_EN, MMIO_STATUS_GALOG_RUN_MASK, 788 MMIO_STATUS_GALOG_OVERFLOW_MASK); 789 } 790 791 /* 792 * This function restarts ppr logging in case the IOMMU experienced 793 * PPR log overflow. 794 */ 795 void amd_iommu_restart_ppr_log(struct amd_iommu *iommu) 796 { 797 amd_iommu_restart_log(iommu, "PPR", CONTROL_PPRINT_EN, 798 CONTROL_PPRLOG_EN, MMIO_STATUS_PPR_RUN_MASK, 799 MMIO_STATUS_PPR_OVERFLOW_MASK); 800 } 801 802 /* 803 * This function resets the command buffer if the IOMMU stopped fetching 804 * commands from it. 805 */ 806 static void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu) 807 { 808 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); 809 810 writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); 811 writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); 812 iommu->cmd_buf_head = 0; 813 iommu->cmd_buf_tail = 0; 814 815 iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); 816 } 817 818 /* 819 * This function writes the command buffer address to the hardware and 820 * enables it. 821 */ 822 static void iommu_enable_command_buffer(struct amd_iommu *iommu) 823 { 824 u64 entry; 825 826 BUG_ON(iommu->cmd_buf == NULL); 827 828 entry = iommu_virt_to_phys(iommu->cmd_buf); 829 entry |= MMIO_CMD_SIZE_512; 830 831 memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, 832 &entry, sizeof(entry)); 833 834 amd_iommu_reset_cmd_buffer(iommu); 835 } 836 837 /* 838 * This function disables the command buffer 839 */ 840 static void iommu_disable_command_buffer(struct amd_iommu *iommu) 841 { 842 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); 843 } 844 845 static void __init free_command_buffer(struct amd_iommu *iommu) 846 { 847 free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); 848 } 849 850 static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, 851 gfp_t gfp, size_t size) 852 { 853 int order = get_order(size); 854 void *buf = (void *)__get_free_pages(gfp, order); 855 856 if (buf && 857 check_feature(FEATURE_SNP) && 858 set_memory_4k((unsigned long)buf, (1 << order))) { 859 free_pages((unsigned long)buf, order); 860 buf = NULL; 861 } 862 863 return buf; 864 } 865 866 /* allocates the memory where the IOMMU will log its events to */ 867 static int __init alloc_event_buffer(struct amd_iommu *iommu) 868 { 869 iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, 870 EVT_BUFFER_SIZE); 871 872 return iommu->evt_buf ? 0 : -ENOMEM; 873 } 874 875 static void iommu_enable_event_buffer(struct amd_iommu *iommu) 876 { 877 u64 entry; 878 879 BUG_ON(iommu->evt_buf == NULL); 880 881 entry = iommu_virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; 882 883 memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, 884 &entry, sizeof(entry)); 885 886 /* set head and tail to zero manually */ 887 writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); 888 writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); 889 890 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); 891 } 892 893 /* 894 * This function disables the event log buffer 895 */ 896 static void iommu_disable_event_buffer(struct amd_iommu *iommu) 897 { 898 iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN); 899 } 900 901 static void __init free_event_buffer(struct amd_iommu *iommu) 902 { 903 free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); 904 } 905 906 /* allocates the memory where the IOMMU will log its events to */ 907 static int __init alloc_ppr_log(struct amd_iommu *iommu) 908 { 909 iommu->ppr_log = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, 910 PPR_LOG_SIZE); 911 912 return iommu->ppr_log ? 0 : -ENOMEM; 913 } 914 915 static void iommu_enable_ppr_log(struct amd_iommu *iommu) 916 { 917 u64 entry; 918 919 if (iommu->ppr_log == NULL) 920 return; 921 922 iommu_feature_enable(iommu, CONTROL_PPR_EN); 923 924 entry = iommu_virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512; 925 926 memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET, 927 &entry, sizeof(entry)); 928 929 /* set head and tail to zero manually */ 930 writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); 931 writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); 932 933 iommu_feature_enable(iommu, CONTROL_PPRLOG_EN); 934 iommu_feature_enable(iommu, CONTROL_PPRINT_EN); 935 } 936 937 static void __init free_ppr_log(struct amd_iommu *iommu) 938 { 939 free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE)); 940 } 941 942 static void free_ga_log(struct amd_iommu *iommu) 943 { 944 #ifdef CONFIG_IRQ_REMAP 945 free_pages((unsigned long)iommu->ga_log, get_order(GA_LOG_SIZE)); 946 free_pages((unsigned long)iommu->ga_log_tail, get_order(8)); 947 #endif 948 } 949 950 #ifdef CONFIG_IRQ_REMAP 951 static int iommu_ga_log_enable(struct amd_iommu *iommu) 952 { 953 u32 status, i; 954 u64 entry; 955 956 if (!iommu->ga_log) 957 return -EINVAL; 958 959 entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512; 960 memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET, 961 &entry, sizeof(entry)); 962 entry = (iommu_virt_to_phys(iommu->ga_log_tail) & 963 (BIT_ULL(52)-1)) & ~7ULL; 964 memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET, 965 &entry, sizeof(entry)); 966 writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET); 967 writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET); 968 969 970 iommu_feature_enable(iommu, CONTROL_GAINT_EN); 971 iommu_feature_enable(iommu, CONTROL_GALOG_EN); 972 973 for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) { 974 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 975 if (status & (MMIO_STATUS_GALOG_RUN_MASK)) 976 break; 977 udelay(10); 978 } 979 980 if (WARN_ON(i >= MMIO_STATUS_TIMEOUT)) 981 return -EINVAL; 982 983 return 0; 984 } 985 986 static int iommu_init_ga_log(struct amd_iommu *iommu) 987 { 988 if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) 989 return 0; 990 991 iommu->ga_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 992 get_order(GA_LOG_SIZE)); 993 if (!iommu->ga_log) 994 goto err_out; 995 996 iommu->ga_log_tail = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 997 get_order(8)); 998 if (!iommu->ga_log_tail) 999 goto err_out; 1000 1001 return 0; 1002 err_out: 1003 free_ga_log(iommu); 1004 return -EINVAL; 1005 } 1006 #endif /* CONFIG_IRQ_REMAP */ 1007 1008 static int __init alloc_cwwb_sem(struct amd_iommu *iommu) 1009 { 1010 iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, 1); 1011 1012 return iommu->cmd_sem ? 0 : -ENOMEM; 1013 } 1014 1015 static void __init free_cwwb_sem(struct amd_iommu *iommu) 1016 { 1017 if (iommu->cmd_sem) 1018 free_page((unsigned long)iommu->cmd_sem); 1019 } 1020 1021 static void iommu_enable_xt(struct amd_iommu *iommu) 1022 { 1023 #ifdef CONFIG_IRQ_REMAP 1024 /* 1025 * XT mode (32-bit APIC destination ID) requires 1026 * GA mode (128-bit IRTE support) as a prerequisite. 1027 */ 1028 if (AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir) && 1029 amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 1030 iommu_feature_enable(iommu, CONTROL_XT_EN); 1031 #endif /* CONFIG_IRQ_REMAP */ 1032 } 1033 1034 static void iommu_enable_gt(struct amd_iommu *iommu) 1035 { 1036 if (!check_feature(FEATURE_GT)) 1037 return; 1038 1039 iommu_feature_enable(iommu, CONTROL_GT_EN); 1040 } 1041 1042 /* sets a specific bit in the device table entry. */ 1043 static void __set_dev_entry_bit(struct dev_table_entry *dev_table, 1044 u16 devid, u8 bit) 1045 { 1046 int i = (bit >> 6) & 0x03; 1047 int _bit = bit & 0x3f; 1048 1049 dev_table[devid].data[i] |= (1UL << _bit); 1050 } 1051 1052 static void set_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit) 1053 { 1054 struct dev_table_entry *dev_table = get_dev_table(iommu); 1055 1056 return __set_dev_entry_bit(dev_table, devid, bit); 1057 } 1058 1059 static int __get_dev_entry_bit(struct dev_table_entry *dev_table, 1060 u16 devid, u8 bit) 1061 { 1062 int i = (bit >> 6) & 0x03; 1063 int _bit = bit & 0x3f; 1064 1065 return (dev_table[devid].data[i] & (1UL << _bit)) >> _bit; 1066 } 1067 1068 static int get_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit) 1069 { 1070 struct dev_table_entry *dev_table = get_dev_table(iommu); 1071 1072 return __get_dev_entry_bit(dev_table, devid, bit); 1073 } 1074 1075 static bool __copy_device_table(struct amd_iommu *iommu) 1076 { 1077 u64 int_ctl, int_tab_len, entry = 0; 1078 struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; 1079 struct dev_table_entry *old_devtb = NULL; 1080 u32 lo, hi, devid, old_devtb_size; 1081 phys_addr_t old_devtb_phys; 1082 u16 dom_id, dte_v, irq_v; 1083 gfp_t gfp_flag; 1084 u64 tmp; 1085 1086 /* Each IOMMU use separate device table with the same size */ 1087 lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET); 1088 hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4); 1089 entry = (((u64) hi) << 32) + lo; 1090 1091 old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12; 1092 if (old_devtb_size != pci_seg->dev_table_size) { 1093 pr_err("The device table size of IOMMU:%d is not expected!\n", 1094 iommu->index); 1095 return false; 1096 } 1097 1098 /* 1099 * When SME is enabled in the first kernel, the entry includes the 1100 * memory encryption mask(sme_me_mask), we must remove the memory 1101 * encryption mask to obtain the true physical address in kdump kernel. 1102 */ 1103 old_devtb_phys = __sme_clr(entry) & PAGE_MASK; 1104 1105 if (old_devtb_phys >= 0x100000000ULL) { 1106 pr_err("The address of old device table is above 4G, not trustworthy!\n"); 1107 return false; 1108 } 1109 old_devtb = (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) && is_kdump_kernel()) 1110 ? (__force void *)ioremap_encrypted(old_devtb_phys, 1111 pci_seg->dev_table_size) 1112 : memremap(old_devtb_phys, pci_seg->dev_table_size, MEMREMAP_WB); 1113 1114 if (!old_devtb) 1115 return false; 1116 1117 gfp_flag = GFP_KERNEL | __GFP_ZERO | GFP_DMA32; 1118 pci_seg->old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag, 1119 get_order(pci_seg->dev_table_size)); 1120 if (pci_seg->old_dev_tbl_cpy == NULL) { 1121 pr_err("Failed to allocate memory for copying old device table!\n"); 1122 memunmap(old_devtb); 1123 return false; 1124 } 1125 1126 for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { 1127 pci_seg->old_dev_tbl_cpy[devid] = old_devtb[devid]; 1128 dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK; 1129 dte_v = old_devtb[devid].data[0] & DTE_FLAG_V; 1130 1131 if (dte_v && dom_id) { 1132 pci_seg->old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0]; 1133 pci_seg->old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1]; 1134 __set_bit(dom_id, amd_iommu_pd_alloc_bitmap); 1135 /* If gcr3 table existed, mask it out */ 1136 if (old_devtb[devid].data[0] & DTE_FLAG_GV) { 1137 tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B; 1138 tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C; 1139 pci_seg->old_dev_tbl_cpy[devid].data[1] &= ~tmp; 1140 tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A; 1141 tmp |= DTE_FLAG_GV; 1142 pci_seg->old_dev_tbl_cpy[devid].data[0] &= ~tmp; 1143 } 1144 } 1145 1146 irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE; 1147 int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK; 1148 int_tab_len = old_devtb[devid].data[2] & DTE_INTTABLEN_MASK; 1149 if (irq_v && (int_ctl || int_tab_len)) { 1150 if ((int_ctl != DTE_IRQ_REMAP_INTCTL) || 1151 (int_tab_len != DTE_INTTABLEN)) { 1152 pr_err("Wrong old irq remapping flag: %#x\n", devid); 1153 memunmap(old_devtb); 1154 return false; 1155 } 1156 1157 pci_seg->old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2]; 1158 } 1159 } 1160 memunmap(old_devtb); 1161 1162 return true; 1163 } 1164 1165 static bool copy_device_table(void) 1166 { 1167 struct amd_iommu *iommu; 1168 struct amd_iommu_pci_seg *pci_seg; 1169 1170 if (!amd_iommu_pre_enabled) 1171 return false; 1172 1173 pr_warn("Translation is already enabled - trying to copy translation structures\n"); 1174 1175 /* 1176 * All IOMMUs within PCI segment shares common device table. 1177 * Hence copy device table only once per PCI segment. 1178 */ 1179 for_each_pci_segment(pci_seg) { 1180 for_each_iommu(iommu) { 1181 if (pci_seg->id != iommu->pci_seg->id) 1182 continue; 1183 if (!__copy_device_table(iommu)) 1184 return false; 1185 break; 1186 } 1187 } 1188 1189 return true; 1190 } 1191 1192 void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid) 1193 { 1194 int sysmgt; 1195 1196 sysmgt = get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1) | 1197 (get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2) << 1); 1198 1199 if (sysmgt == 0x01) 1200 set_dev_entry_bit(iommu, devid, DEV_ENTRY_IW); 1201 } 1202 1203 /* 1204 * This function takes the device specific flags read from the ACPI 1205 * table and sets up the device table entry with that information 1206 */ 1207 static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, 1208 u16 devid, u32 flags, u32 ext_flags) 1209 { 1210 if (flags & ACPI_DEVFLAG_INITPASS) 1211 set_dev_entry_bit(iommu, devid, DEV_ENTRY_INIT_PASS); 1212 if (flags & ACPI_DEVFLAG_EXTINT) 1213 set_dev_entry_bit(iommu, devid, DEV_ENTRY_EINT_PASS); 1214 if (flags & ACPI_DEVFLAG_NMI) 1215 set_dev_entry_bit(iommu, devid, DEV_ENTRY_NMI_PASS); 1216 if (flags & ACPI_DEVFLAG_SYSMGT1) 1217 set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1); 1218 if (flags & ACPI_DEVFLAG_SYSMGT2) 1219 set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2); 1220 if (flags & ACPI_DEVFLAG_LINT0) 1221 set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT0_PASS); 1222 if (flags & ACPI_DEVFLAG_LINT1) 1223 set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT1_PASS); 1224 1225 amd_iommu_apply_erratum_63(iommu, devid); 1226 1227 amd_iommu_set_rlookup_table(iommu, devid); 1228 } 1229 1230 int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line) 1231 { 1232 struct devid_map *entry; 1233 struct list_head *list; 1234 1235 if (type == IVHD_SPECIAL_IOAPIC) 1236 list = &ioapic_map; 1237 else if (type == IVHD_SPECIAL_HPET) 1238 list = &hpet_map; 1239 else 1240 return -EINVAL; 1241 1242 list_for_each_entry(entry, list, list) { 1243 if (!(entry->id == id && entry->cmd_line)) 1244 continue; 1245 1246 pr_info("Command-line override present for %s id %d - ignoring\n", 1247 type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id); 1248 1249 *devid = entry->devid; 1250 1251 return 0; 1252 } 1253 1254 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 1255 if (!entry) 1256 return -ENOMEM; 1257 1258 entry->id = id; 1259 entry->devid = *devid; 1260 entry->cmd_line = cmd_line; 1261 1262 list_add_tail(&entry->list, list); 1263 1264 return 0; 1265 } 1266 1267 static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u32 *devid, 1268 bool cmd_line) 1269 { 1270 struct acpihid_map_entry *entry; 1271 struct list_head *list = &acpihid_map; 1272 1273 list_for_each_entry(entry, list, list) { 1274 if (strcmp(entry->hid, hid) || 1275 (*uid && *entry->uid && strcmp(entry->uid, uid)) || 1276 !entry->cmd_line) 1277 continue; 1278 1279 pr_info("Command-line override for hid:%s uid:%s\n", 1280 hid, uid); 1281 *devid = entry->devid; 1282 return 0; 1283 } 1284 1285 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 1286 if (!entry) 1287 return -ENOMEM; 1288 1289 memcpy(entry->uid, uid, strlen(uid)); 1290 memcpy(entry->hid, hid, strlen(hid)); 1291 entry->devid = *devid; 1292 entry->cmd_line = cmd_line; 1293 entry->root_devid = (entry->devid & (~0x7)); 1294 1295 pr_info("%s, add hid:%s, uid:%s, rdevid:%d\n", 1296 entry->cmd_line ? "cmd" : "ivrs", 1297 entry->hid, entry->uid, entry->root_devid); 1298 1299 list_add_tail(&entry->list, list); 1300 return 0; 1301 } 1302 1303 static int __init add_early_maps(void) 1304 { 1305 int i, ret; 1306 1307 for (i = 0; i < early_ioapic_map_size; ++i) { 1308 ret = add_special_device(IVHD_SPECIAL_IOAPIC, 1309 early_ioapic_map[i].id, 1310 &early_ioapic_map[i].devid, 1311 early_ioapic_map[i].cmd_line); 1312 if (ret) 1313 return ret; 1314 } 1315 1316 for (i = 0; i < early_hpet_map_size; ++i) { 1317 ret = add_special_device(IVHD_SPECIAL_HPET, 1318 early_hpet_map[i].id, 1319 &early_hpet_map[i].devid, 1320 early_hpet_map[i].cmd_line); 1321 if (ret) 1322 return ret; 1323 } 1324 1325 for (i = 0; i < early_acpihid_map_size; ++i) { 1326 ret = add_acpi_hid_device(early_acpihid_map[i].hid, 1327 early_acpihid_map[i].uid, 1328 &early_acpihid_map[i].devid, 1329 early_acpihid_map[i].cmd_line); 1330 if (ret) 1331 return ret; 1332 } 1333 1334 return 0; 1335 } 1336 1337 /* 1338 * Takes a pointer to an AMD IOMMU entry in the ACPI table and 1339 * initializes the hardware and our data structures with it. 1340 */ 1341 static int __init init_iommu_from_acpi(struct amd_iommu *iommu, 1342 struct ivhd_header *h) 1343 { 1344 u8 *p = (u8 *)h; 1345 u8 *end = p, flags = 0; 1346 u16 devid = 0, devid_start = 0, devid_to = 0, seg_id; 1347 u32 dev_i, ext_flags = 0; 1348 bool alias = false; 1349 struct ivhd_entry *e; 1350 struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; 1351 u32 ivhd_size; 1352 int ret; 1353 1354 1355 ret = add_early_maps(); 1356 if (ret) 1357 return ret; 1358 1359 amd_iommu_apply_ivrs_quirks(); 1360 1361 /* 1362 * First save the recommended feature enable bits from ACPI 1363 */ 1364 iommu->acpi_flags = h->flags; 1365 1366 /* 1367 * Done. Now parse the device entries 1368 */ 1369 ivhd_size = get_ivhd_header_size(h); 1370 if (!ivhd_size) { 1371 pr_err("Unsupported IVHD type %#x\n", h->type); 1372 return -EINVAL; 1373 } 1374 1375 p += ivhd_size; 1376 1377 end += h->length; 1378 1379 1380 while (p < end) { 1381 e = (struct ivhd_entry *)p; 1382 seg_id = pci_seg->id; 1383 1384 switch (e->type) { 1385 case IVHD_DEV_ALL: 1386 1387 DUMP_printk(" DEV_ALL\t\t\tflags: %02x\n", e->flags); 1388 1389 for (dev_i = 0; dev_i <= pci_seg->last_bdf; ++dev_i) 1390 set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0); 1391 break; 1392 case IVHD_DEV_SELECT: 1393 1394 DUMP_printk(" DEV_SELECT\t\t\t devid: %04x:%02x:%02x.%x " 1395 "flags: %02x\n", 1396 seg_id, PCI_BUS_NUM(e->devid), 1397 PCI_SLOT(e->devid), 1398 PCI_FUNC(e->devid), 1399 e->flags); 1400 1401 devid = e->devid; 1402 set_dev_entry_from_acpi(iommu, devid, e->flags, 0); 1403 break; 1404 case IVHD_DEV_SELECT_RANGE_START: 1405 1406 DUMP_printk(" DEV_SELECT_RANGE_START\t " 1407 "devid: %04x:%02x:%02x.%x flags: %02x\n", 1408 seg_id, PCI_BUS_NUM(e->devid), 1409 PCI_SLOT(e->devid), 1410 PCI_FUNC(e->devid), 1411 e->flags); 1412 1413 devid_start = e->devid; 1414 flags = e->flags; 1415 ext_flags = 0; 1416 alias = false; 1417 break; 1418 case IVHD_DEV_ALIAS: 1419 1420 DUMP_printk(" DEV_ALIAS\t\t\t devid: %04x:%02x:%02x.%x " 1421 "flags: %02x devid_to: %02x:%02x.%x\n", 1422 seg_id, PCI_BUS_NUM(e->devid), 1423 PCI_SLOT(e->devid), 1424 PCI_FUNC(e->devid), 1425 e->flags, 1426 PCI_BUS_NUM(e->ext >> 8), 1427 PCI_SLOT(e->ext >> 8), 1428 PCI_FUNC(e->ext >> 8)); 1429 1430 devid = e->devid; 1431 devid_to = e->ext >> 8; 1432 set_dev_entry_from_acpi(iommu, devid , e->flags, 0); 1433 set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0); 1434 pci_seg->alias_table[devid] = devid_to; 1435 break; 1436 case IVHD_DEV_ALIAS_RANGE: 1437 1438 DUMP_printk(" DEV_ALIAS_RANGE\t\t " 1439 "devid: %04x:%02x:%02x.%x flags: %02x " 1440 "devid_to: %04x:%02x:%02x.%x\n", 1441 seg_id, PCI_BUS_NUM(e->devid), 1442 PCI_SLOT(e->devid), 1443 PCI_FUNC(e->devid), 1444 e->flags, 1445 seg_id, PCI_BUS_NUM(e->ext >> 8), 1446 PCI_SLOT(e->ext >> 8), 1447 PCI_FUNC(e->ext >> 8)); 1448 1449 devid_start = e->devid; 1450 flags = e->flags; 1451 devid_to = e->ext >> 8; 1452 ext_flags = 0; 1453 alias = true; 1454 break; 1455 case IVHD_DEV_EXT_SELECT: 1456 1457 DUMP_printk(" DEV_EXT_SELECT\t\t devid: %04x:%02x:%02x.%x " 1458 "flags: %02x ext: %08x\n", 1459 seg_id, PCI_BUS_NUM(e->devid), 1460 PCI_SLOT(e->devid), 1461 PCI_FUNC(e->devid), 1462 e->flags, e->ext); 1463 1464 devid = e->devid; 1465 set_dev_entry_from_acpi(iommu, devid, e->flags, 1466 e->ext); 1467 break; 1468 case IVHD_DEV_EXT_SELECT_RANGE: 1469 1470 DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: " 1471 "%04x:%02x:%02x.%x flags: %02x ext: %08x\n", 1472 seg_id, PCI_BUS_NUM(e->devid), 1473 PCI_SLOT(e->devid), 1474 PCI_FUNC(e->devid), 1475 e->flags, e->ext); 1476 1477 devid_start = e->devid; 1478 flags = e->flags; 1479 ext_flags = e->ext; 1480 alias = false; 1481 break; 1482 case IVHD_DEV_RANGE_END: 1483 1484 DUMP_printk(" DEV_RANGE_END\t\t devid: %04x:%02x:%02x.%x\n", 1485 seg_id, PCI_BUS_NUM(e->devid), 1486 PCI_SLOT(e->devid), 1487 PCI_FUNC(e->devid)); 1488 1489 devid = e->devid; 1490 for (dev_i = devid_start; dev_i <= devid; ++dev_i) { 1491 if (alias) { 1492 pci_seg->alias_table[dev_i] = devid_to; 1493 set_dev_entry_from_acpi(iommu, 1494 devid_to, flags, ext_flags); 1495 } 1496 set_dev_entry_from_acpi(iommu, dev_i, 1497 flags, ext_flags); 1498 } 1499 break; 1500 case IVHD_DEV_SPECIAL: { 1501 u8 handle, type; 1502 const char *var; 1503 u32 devid; 1504 int ret; 1505 1506 handle = e->ext & 0xff; 1507 devid = PCI_SEG_DEVID_TO_SBDF(seg_id, (e->ext >> 8)); 1508 type = (e->ext >> 24) & 0xff; 1509 1510 if (type == IVHD_SPECIAL_IOAPIC) 1511 var = "IOAPIC"; 1512 else if (type == IVHD_SPECIAL_HPET) 1513 var = "HPET"; 1514 else 1515 var = "UNKNOWN"; 1516 1517 DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x\n", 1518 var, (int)handle, 1519 seg_id, PCI_BUS_NUM(devid), 1520 PCI_SLOT(devid), 1521 PCI_FUNC(devid)); 1522 1523 ret = add_special_device(type, handle, &devid, false); 1524 if (ret) 1525 return ret; 1526 1527 /* 1528 * add_special_device might update the devid in case a 1529 * command-line override is present. So call 1530 * set_dev_entry_from_acpi after add_special_device. 1531 */ 1532 set_dev_entry_from_acpi(iommu, devid, e->flags, 0); 1533 1534 break; 1535 } 1536 case IVHD_DEV_ACPI_HID: { 1537 u32 devid; 1538 u8 hid[ACPIHID_HID_LEN]; 1539 u8 uid[ACPIHID_UID_LEN]; 1540 int ret; 1541 1542 if (h->type != 0x40) { 1543 pr_err(FW_BUG "Invalid IVHD device type %#x\n", 1544 e->type); 1545 break; 1546 } 1547 1548 BUILD_BUG_ON(sizeof(e->ext_hid) != ACPIHID_HID_LEN - 1); 1549 memcpy(hid, &e->ext_hid, ACPIHID_HID_LEN - 1); 1550 hid[ACPIHID_HID_LEN - 1] = '\0'; 1551 1552 if (!(*hid)) { 1553 pr_err(FW_BUG "Invalid HID.\n"); 1554 break; 1555 } 1556 1557 uid[0] = '\0'; 1558 switch (e->uidf) { 1559 case UID_NOT_PRESENT: 1560 1561 if (e->uidl != 0) 1562 pr_warn(FW_BUG "Invalid UID length.\n"); 1563 1564 break; 1565 case UID_IS_INTEGER: 1566 1567 sprintf(uid, "%d", e->uid); 1568 1569 break; 1570 case UID_IS_CHARACTER: 1571 1572 memcpy(uid, &e->uid, e->uidl); 1573 uid[e->uidl] = '\0'; 1574 1575 break; 1576 default: 1577 break; 1578 } 1579 1580 devid = PCI_SEG_DEVID_TO_SBDF(seg_id, e->devid); 1581 DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x\n", 1582 hid, uid, seg_id, 1583 PCI_BUS_NUM(devid), 1584 PCI_SLOT(devid), 1585 PCI_FUNC(devid)); 1586 1587 flags = e->flags; 1588 1589 ret = add_acpi_hid_device(hid, uid, &devid, false); 1590 if (ret) 1591 return ret; 1592 1593 /* 1594 * add_special_device might update the devid in case a 1595 * command-line override is present. So call 1596 * set_dev_entry_from_acpi after add_special_device. 1597 */ 1598 set_dev_entry_from_acpi(iommu, devid, e->flags, 0); 1599 1600 break; 1601 } 1602 default: 1603 break; 1604 } 1605 1606 p += ivhd_entry_length(p); 1607 } 1608 1609 return 0; 1610 } 1611 1612 /* Allocate PCI segment data structure */ 1613 static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id, 1614 struct acpi_table_header *ivrs_base) 1615 { 1616 struct amd_iommu_pci_seg *pci_seg; 1617 int last_bdf; 1618 1619 /* 1620 * First parse ACPI tables to find the largest Bus/Dev/Func we need to 1621 * handle in this PCI segment. Upon this information the shared data 1622 * structures for the PCI segments in the system will be allocated. 1623 */ 1624 last_bdf = find_last_devid_acpi(ivrs_base, id); 1625 if (last_bdf < 0) 1626 return NULL; 1627 1628 pci_seg = kzalloc(sizeof(struct amd_iommu_pci_seg), GFP_KERNEL); 1629 if (pci_seg == NULL) 1630 return NULL; 1631 1632 pci_seg->last_bdf = last_bdf; 1633 DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf); 1634 pci_seg->dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE, last_bdf); 1635 pci_seg->alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE, last_bdf); 1636 pci_seg->rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE, last_bdf); 1637 1638 pci_seg->id = id; 1639 init_llist_head(&pci_seg->dev_data_list); 1640 INIT_LIST_HEAD(&pci_seg->unity_map); 1641 list_add_tail(&pci_seg->list, &amd_iommu_pci_seg_list); 1642 1643 if (alloc_dev_table(pci_seg)) 1644 return NULL; 1645 if (alloc_alias_table(pci_seg)) 1646 return NULL; 1647 if (alloc_rlookup_table(pci_seg)) 1648 return NULL; 1649 1650 return pci_seg; 1651 } 1652 1653 static struct amd_iommu_pci_seg *__init get_pci_segment(u16 id, 1654 struct acpi_table_header *ivrs_base) 1655 { 1656 struct amd_iommu_pci_seg *pci_seg; 1657 1658 for_each_pci_segment(pci_seg) { 1659 if (pci_seg->id == id) 1660 return pci_seg; 1661 } 1662 1663 return alloc_pci_segment(id, ivrs_base); 1664 } 1665 1666 static void __init free_pci_segments(void) 1667 { 1668 struct amd_iommu_pci_seg *pci_seg, *next; 1669 1670 for_each_pci_segment_safe(pci_seg, next) { 1671 list_del(&pci_seg->list); 1672 free_irq_lookup_table(pci_seg); 1673 free_rlookup_table(pci_seg); 1674 free_alias_table(pci_seg); 1675 free_dev_table(pci_seg); 1676 kfree(pci_seg); 1677 } 1678 } 1679 1680 static void __init free_iommu_one(struct amd_iommu *iommu) 1681 { 1682 free_cwwb_sem(iommu); 1683 free_command_buffer(iommu); 1684 free_event_buffer(iommu); 1685 free_ppr_log(iommu); 1686 free_ga_log(iommu); 1687 iommu_unmap_mmio_space(iommu); 1688 } 1689 1690 static void __init free_iommu_all(void) 1691 { 1692 struct amd_iommu *iommu, *next; 1693 1694 for_each_iommu_safe(iommu, next) { 1695 list_del(&iommu->list); 1696 free_iommu_one(iommu); 1697 kfree(iommu); 1698 } 1699 } 1700 1701 /* 1702 * Family15h Model 10h-1fh erratum 746 (IOMMU Logging May Stall Translations) 1703 * Workaround: 1704 * BIOS should disable L2B micellaneous clock gating by setting 1705 * L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b 1706 */ 1707 static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu) 1708 { 1709 u32 value; 1710 1711 if ((boot_cpu_data.x86 != 0x15) || 1712 (boot_cpu_data.x86_model < 0x10) || 1713 (boot_cpu_data.x86_model > 0x1f)) 1714 return; 1715 1716 pci_write_config_dword(iommu->dev, 0xf0, 0x90); 1717 pci_read_config_dword(iommu->dev, 0xf4, &value); 1718 1719 if (value & BIT(2)) 1720 return; 1721 1722 /* Select NB indirect register 0x90 and enable writing */ 1723 pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8)); 1724 1725 pci_write_config_dword(iommu->dev, 0xf4, value | 0x4); 1726 pci_info(iommu->dev, "Applying erratum 746 workaround\n"); 1727 1728 /* Clear the enable writing bit */ 1729 pci_write_config_dword(iommu->dev, 0xf0, 0x90); 1730 } 1731 1732 /* 1733 * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission) 1734 * Workaround: 1735 * BIOS should enable ATS write permission check by setting 1736 * L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b 1737 */ 1738 static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu) 1739 { 1740 u32 value; 1741 1742 if ((boot_cpu_data.x86 != 0x15) || 1743 (boot_cpu_data.x86_model < 0x30) || 1744 (boot_cpu_data.x86_model > 0x3f)) 1745 return; 1746 1747 /* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */ 1748 value = iommu_read_l2(iommu, 0x47); 1749 1750 if (value & BIT(0)) 1751 return; 1752 1753 /* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */ 1754 iommu_write_l2(iommu, 0x47, value | BIT(0)); 1755 1756 pci_info(iommu->dev, "Applying ATS write check workaround\n"); 1757 } 1758 1759 /* 1760 * This function glues the initialization function for one IOMMU 1761 * together and also allocates the command buffer and programs the 1762 * hardware. It does NOT enable the IOMMU. This is done afterwards. 1763 */ 1764 static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, 1765 struct acpi_table_header *ivrs_base) 1766 { 1767 struct amd_iommu_pci_seg *pci_seg; 1768 1769 pci_seg = get_pci_segment(h->pci_seg, ivrs_base); 1770 if (pci_seg == NULL) 1771 return -ENOMEM; 1772 iommu->pci_seg = pci_seg; 1773 1774 raw_spin_lock_init(&iommu->lock); 1775 atomic64_set(&iommu->cmd_sem_val, 0); 1776 1777 /* Add IOMMU to internal data structures */ 1778 list_add_tail(&iommu->list, &amd_iommu_list); 1779 iommu->index = amd_iommus_present++; 1780 1781 if (unlikely(iommu->index >= MAX_IOMMUS)) { 1782 WARN(1, "System has more IOMMUs than supported by this driver\n"); 1783 return -ENOSYS; 1784 } 1785 1786 /* Index is fine - add IOMMU to the array */ 1787 amd_iommus[iommu->index] = iommu; 1788 1789 /* 1790 * Copy data from ACPI table entry to the iommu struct 1791 */ 1792 iommu->devid = h->devid; 1793 iommu->cap_ptr = h->cap_ptr; 1794 iommu->mmio_phys = h->mmio_phys; 1795 1796 switch (h->type) { 1797 case 0x10: 1798 /* Check if IVHD EFR contains proper max banks/counters */ 1799 if ((h->efr_attr != 0) && 1800 ((h->efr_attr & (0xF << 13)) != 0) && 1801 ((h->efr_attr & (0x3F << 17)) != 0)) 1802 iommu->mmio_phys_end = MMIO_REG_END_OFFSET; 1803 else 1804 iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; 1805 1806 /* 1807 * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports. 1808 * GAM also requires GA mode. Therefore, we need to 1809 * check cmpxchg16b support before enabling it. 1810 */ 1811 if (!boot_cpu_has(X86_FEATURE_CX16) || 1812 ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0)) 1813 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; 1814 break; 1815 case 0x11: 1816 case 0x40: 1817 if (h->efr_reg & (1 << 9)) 1818 iommu->mmio_phys_end = MMIO_REG_END_OFFSET; 1819 else 1820 iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; 1821 1822 /* 1823 * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports. 1824 * XT, GAM also requires GA mode. Therefore, we need to 1825 * check cmpxchg16b support before enabling them. 1826 */ 1827 if (!boot_cpu_has(X86_FEATURE_CX16) || 1828 ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0)) { 1829 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; 1830 break; 1831 } 1832 1833 if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT)) 1834 amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE; 1835 1836 early_iommu_features_init(iommu, h); 1837 1838 break; 1839 default: 1840 return -EINVAL; 1841 } 1842 1843 iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys, 1844 iommu->mmio_phys_end); 1845 if (!iommu->mmio_base) 1846 return -ENOMEM; 1847 1848 return init_iommu_from_acpi(iommu, h); 1849 } 1850 1851 static int __init init_iommu_one_late(struct amd_iommu *iommu) 1852 { 1853 int ret; 1854 1855 if (alloc_cwwb_sem(iommu)) 1856 return -ENOMEM; 1857 1858 if (alloc_command_buffer(iommu)) 1859 return -ENOMEM; 1860 1861 if (alloc_event_buffer(iommu)) 1862 return -ENOMEM; 1863 1864 iommu->int_enabled = false; 1865 1866 init_translation_status(iommu); 1867 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) { 1868 iommu_disable(iommu); 1869 clear_translation_pre_enabled(iommu); 1870 pr_warn("Translation was enabled for IOMMU:%d but we are not in kdump mode\n", 1871 iommu->index); 1872 } 1873 if (amd_iommu_pre_enabled) 1874 amd_iommu_pre_enabled = translation_pre_enabled(iommu); 1875 1876 if (amd_iommu_irq_remap) { 1877 ret = amd_iommu_create_irq_domain(iommu); 1878 if (ret) 1879 return ret; 1880 } 1881 1882 /* 1883 * Make sure IOMMU is not considered to translate itself. The IVRS 1884 * table tells us so, but this is a lie! 1885 */ 1886 iommu->pci_seg->rlookup_table[iommu->devid] = NULL; 1887 1888 return 0; 1889 } 1890 1891 /** 1892 * get_highest_supported_ivhd_type - Look up the appropriate IVHD type 1893 * @ivrs: Pointer to the IVRS header 1894 * 1895 * This function search through all IVDB of the maximum supported IVHD 1896 */ 1897 static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs) 1898 { 1899 u8 *base = (u8 *)ivrs; 1900 struct ivhd_header *ivhd = (struct ivhd_header *) 1901 (base + IVRS_HEADER_LENGTH); 1902 u8 last_type = ivhd->type; 1903 u16 devid = ivhd->devid; 1904 1905 while (((u8 *)ivhd - base < ivrs->length) && 1906 (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) { 1907 u8 *p = (u8 *) ivhd; 1908 1909 if (ivhd->devid == devid) 1910 last_type = ivhd->type; 1911 ivhd = (struct ivhd_header *)(p + ivhd->length); 1912 } 1913 1914 return last_type; 1915 } 1916 1917 /* 1918 * Iterates over all IOMMU entries in the ACPI table, allocates the 1919 * IOMMU structure and initializes it with init_iommu_one() 1920 */ 1921 static int __init init_iommu_all(struct acpi_table_header *table) 1922 { 1923 u8 *p = (u8 *)table, *end = (u8 *)table; 1924 struct ivhd_header *h; 1925 struct amd_iommu *iommu; 1926 int ret; 1927 1928 end += table->length; 1929 p += IVRS_HEADER_LENGTH; 1930 1931 /* Phase 1: Process all IVHD blocks */ 1932 while (p < end) { 1933 h = (struct ivhd_header *)p; 1934 if (*p == amd_iommu_target_ivhd_type) { 1935 1936 DUMP_printk("device: %04x:%02x:%02x.%01x cap: %04x " 1937 "flags: %01x info %04x\n", 1938 h->pci_seg, PCI_BUS_NUM(h->devid), 1939 PCI_SLOT(h->devid), PCI_FUNC(h->devid), 1940 h->cap_ptr, h->flags, h->info); 1941 DUMP_printk(" mmio-addr: %016llx\n", 1942 h->mmio_phys); 1943 1944 iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL); 1945 if (iommu == NULL) 1946 return -ENOMEM; 1947 1948 ret = init_iommu_one(iommu, h, table); 1949 if (ret) 1950 return ret; 1951 } 1952 p += h->length; 1953 1954 } 1955 WARN_ON(p != end); 1956 1957 /* Phase 2 : Early feature support check */ 1958 get_global_efr(); 1959 1960 /* Phase 3 : Enabling IOMMU features */ 1961 for_each_iommu(iommu) { 1962 ret = init_iommu_one_late(iommu); 1963 if (ret) 1964 return ret; 1965 } 1966 1967 return 0; 1968 } 1969 1970 static void init_iommu_perf_ctr(struct amd_iommu *iommu) 1971 { 1972 u64 val; 1973 struct pci_dev *pdev = iommu->dev; 1974 1975 if (!check_feature(FEATURE_PC)) 1976 return; 1977 1978 amd_iommu_pc_present = true; 1979 1980 pci_info(pdev, "IOMMU performance counters supported\n"); 1981 1982 val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET); 1983 iommu->max_banks = (u8) ((val >> 12) & 0x3f); 1984 iommu->max_counters = (u8) ((val >> 7) & 0xf); 1985 1986 return; 1987 } 1988 1989 static ssize_t amd_iommu_show_cap(struct device *dev, 1990 struct device_attribute *attr, 1991 char *buf) 1992 { 1993 struct amd_iommu *iommu = dev_to_amd_iommu(dev); 1994 return sysfs_emit(buf, "%x\n", iommu->cap); 1995 } 1996 static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL); 1997 1998 static ssize_t amd_iommu_show_features(struct device *dev, 1999 struct device_attribute *attr, 2000 char *buf) 2001 { 2002 return sysfs_emit(buf, "%llx:%llx\n", amd_iommu_efr, amd_iommu_efr2); 2003 } 2004 static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL); 2005 2006 static struct attribute *amd_iommu_attrs[] = { 2007 &dev_attr_cap.attr, 2008 &dev_attr_features.attr, 2009 NULL, 2010 }; 2011 2012 static struct attribute_group amd_iommu_group = { 2013 .name = "amd-iommu", 2014 .attrs = amd_iommu_attrs, 2015 }; 2016 2017 static const struct attribute_group *amd_iommu_groups[] = { 2018 &amd_iommu_group, 2019 NULL, 2020 }; 2021 2022 /* 2023 * Note: IVHD 0x11 and 0x40 also contains exact copy 2024 * of the IOMMU Extended Feature Register [MMIO Offset 0030h]. 2025 * Default to EFR in IVHD since it is available sooner (i.e. before PCI init). 2026 */ 2027 static void __init late_iommu_features_init(struct amd_iommu *iommu) 2028 { 2029 u64 features, features2; 2030 2031 if (!(iommu->cap & (1 << IOMMU_CAP_EFR))) 2032 return; 2033 2034 /* read extended feature bits */ 2035 features = readq(iommu->mmio_base + MMIO_EXT_FEATURES); 2036 features2 = readq(iommu->mmio_base + MMIO_EXT_FEATURES2); 2037 2038 if (!amd_iommu_efr) { 2039 amd_iommu_efr = features; 2040 amd_iommu_efr2 = features2; 2041 return; 2042 } 2043 2044 /* 2045 * Sanity check and warn if EFR values from 2046 * IVHD and MMIO conflict. 2047 */ 2048 if (features != amd_iommu_efr || 2049 features2 != amd_iommu_efr2) { 2050 pr_warn(FW_WARN 2051 "EFR mismatch. Use IVHD EFR (%#llx : %#llx), EFR2 (%#llx : %#llx).\n", 2052 features, amd_iommu_efr, 2053 features2, amd_iommu_efr2); 2054 } 2055 } 2056 2057 static int __init iommu_init_pci(struct amd_iommu *iommu) 2058 { 2059 int cap_ptr = iommu->cap_ptr; 2060 int ret; 2061 2062 iommu->dev = pci_get_domain_bus_and_slot(iommu->pci_seg->id, 2063 PCI_BUS_NUM(iommu->devid), 2064 iommu->devid & 0xff); 2065 if (!iommu->dev) 2066 return -ENODEV; 2067 2068 /* Prevent binding other PCI device drivers to IOMMU devices */ 2069 iommu->dev->match_driver = false; 2070 2071 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, 2072 &iommu->cap); 2073 2074 if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB))) 2075 amd_iommu_iotlb_sup = false; 2076 2077 late_iommu_features_init(iommu); 2078 2079 if (check_feature(FEATURE_GT)) { 2080 int glxval; 2081 u64 pasmax; 2082 2083 pasmax = amd_iommu_efr & FEATURE_PASID_MASK; 2084 pasmax >>= FEATURE_PASID_SHIFT; 2085 iommu->iommu.max_pasids = (1 << (pasmax + 1)) - 1; 2086 2087 BUG_ON(iommu->iommu.max_pasids & ~PASID_MASK); 2088 2089 glxval = amd_iommu_efr & FEATURE_GLXVAL_MASK; 2090 glxval >>= FEATURE_GLXVAL_SHIFT; 2091 2092 if (amd_iommu_max_glx_val == -1) 2093 amd_iommu_max_glx_val = glxval; 2094 else 2095 amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval); 2096 } 2097 2098 if (check_feature(FEATURE_PPR) && alloc_ppr_log(iommu)) 2099 return -ENOMEM; 2100 2101 if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) { 2102 pr_info("Using strict mode due to virtualization\n"); 2103 iommu_set_dma_strict(); 2104 amd_iommu_np_cache = true; 2105 } 2106 2107 init_iommu_perf_ctr(iommu); 2108 2109 if (amd_iommu_pgtable == AMD_IOMMU_V2) { 2110 if (!check_feature(FEATURE_GIOSUP) || 2111 !check_feature(FEATURE_GT)) { 2112 pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n"); 2113 amd_iommu_pgtable = AMD_IOMMU_V1; 2114 } 2115 } 2116 2117 if (is_rd890_iommu(iommu->dev)) { 2118 int i, j; 2119 2120 iommu->root_pdev = 2121 pci_get_domain_bus_and_slot(iommu->pci_seg->id, 2122 iommu->dev->bus->number, 2123 PCI_DEVFN(0, 0)); 2124 2125 /* 2126 * Some rd890 systems may not be fully reconfigured by the 2127 * BIOS, so it's necessary for us to store this information so 2128 * it can be reprogrammed on resume 2129 */ 2130 pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4, 2131 &iommu->stored_addr_lo); 2132 pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8, 2133 &iommu->stored_addr_hi); 2134 2135 /* Low bit locks writes to configuration space */ 2136 iommu->stored_addr_lo &= ~1; 2137 2138 for (i = 0; i < 6; i++) 2139 for (j = 0; j < 0x12; j++) 2140 iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j); 2141 2142 for (i = 0; i < 0x83; i++) 2143 iommu->stored_l2[i] = iommu_read_l2(iommu, i); 2144 } 2145 2146 amd_iommu_erratum_746_workaround(iommu); 2147 amd_iommu_ats_write_check_workaround(iommu); 2148 2149 ret = iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev, 2150 amd_iommu_groups, "ivhd%d", iommu->index); 2151 if (ret) 2152 return ret; 2153 2154 iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL); 2155 2156 return pci_enable_device(iommu->dev); 2157 } 2158 2159 static void print_iommu_info(void) 2160 { 2161 int i; 2162 static const char * const feat_str[] = { 2163 "PreF", "PPR", "X2APIC", "NX", "GT", "[5]", 2164 "IA", "GA", "HE", "PC" 2165 }; 2166 2167 if (amd_iommu_efr) { 2168 pr_info("Extended features (%#llx, %#llx):", amd_iommu_efr, amd_iommu_efr2); 2169 2170 for (i = 0; i < ARRAY_SIZE(feat_str); ++i) { 2171 if (check_feature(1ULL << i)) 2172 pr_cont(" %s", feat_str[i]); 2173 } 2174 2175 if (check_feature(FEATURE_GAM_VAPIC)) 2176 pr_cont(" GA_vAPIC"); 2177 2178 if (check_feature(FEATURE_SNP)) 2179 pr_cont(" SNP"); 2180 2181 pr_cont("\n"); 2182 } 2183 2184 if (irq_remapping_enabled) { 2185 pr_info("Interrupt remapping enabled\n"); 2186 if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 2187 pr_info("X2APIC enabled\n"); 2188 } 2189 if (amd_iommu_pgtable == AMD_IOMMU_V2) { 2190 pr_info("V2 page table enabled (Paging mode : %d level)\n", 2191 amd_iommu_gpt_level); 2192 } 2193 } 2194 2195 static int __init amd_iommu_init_pci(void) 2196 { 2197 struct amd_iommu *iommu; 2198 struct amd_iommu_pci_seg *pci_seg; 2199 int ret; 2200 2201 for_each_iommu(iommu) { 2202 ret = iommu_init_pci(iommu); 2203 if (ret) { 2204 pr_err("IOMMU%d: Failed to initialize IOMMU Hardware (error=%d)!\n", 2205 iommu->index, ret); 2206 goto out; 2207 } 2208 /* Need to setup range after PCI init */ 2209 iommu_set_cwwb_range(iommu); 2210 } 2211 2212 /* 2213 * Order is important here to make sure any unity map requirements are 2214 * fulfilled. The unity mappings are created and written to the device 2215 * table during the iommu_init_pci() call. 2216 * 2217 * After that we call init_device_table_dma() to make sure any 2218 * uninitialized DTE will block DMA, and in the end we flush the caches 2219 * of all IOMMUs to make sure the changes to the device table are 2220 * active. 2221 */ 2222 for_each_pci_segment(pci_seg) 2223 init_device_table_dma(pci_seg); 2224 2225 for_each_iommu(iommu) 2226 amd_iommu_flush_all_caches(iommu); 2227 2228 print_iommu_info(); 2229 2230 out: 2231 return ret; 2232 } 2233 2234 /**************************************************************************** 2235 * 2236 * The following functions initialize the MSI interrupts for all IOMMUs 2237 * in the system. It's a bit challenging because there could be multiple 2238 * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per 2239 * pci_dev. 2240 * 2241 ****************************************************************************/ 2242 2243 static int iommu_setup_msi(struct amd_iommu *iommu) 2244 { 2245 int r; 2246 2247 r = pci_enable_msi(iommu->dev); 2248 if (r) 2249 return r; 2250 2251 r = request_threaded_irq(iommu->dev->irq, 2252 amd_iommu_int_handler, 2253 amd_iommu_int_thread, 2254 0, "AMD-Vi", 2255 iommu); 2256 2257 if (r) { 2258 pci_disable_msi(iommu->dev); 2259 return r; 2260 } 2261 2262 return 0; 2263 } 2264 2265 union intcapxt { 2266 u64 capxt; 2267 struct { 2268 u64 reserved_0 : 2, 2269 dest_mode_logical : 1, 2270 reserved_1 : 5, 2271 destid_0_23 : 24, 2272 vector : 8, 2273 reserved_2 : 16, 2274 destid_24_31 : 8; 2275 }; 2276 } __attribute__ ((packed)); 2277 2278 2279 static struct irq_chip intcapxt_controller; 2280 2281 static int intcapxt_irqdomain_activate(struct irq_domain *domain, 2282 struct irq_data *irqd, bool reserve) 2283 { 2284 return 0; 2285 } 2286 2287 static void intcapxt_irqdomain_deactivate(struct irq_domain *domain, 2288 struct irq_data *irqd) 2289 { 2290 } 2291 2292 2293 static int intcapxt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, 2294 unsigned int nr_irqs, void *arg) 2295 { 2296 struct irq_alloc_info *info = arg; 2297 int i, ret; 2298 2299 if (!info || info->type != X86_IRQ_ALLOC_TYPE_AMDVI) 2300 return -EINVAL; 2301 2302 ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); 2303 if (ret < 0) 2304 return ret; 2305 2306 for (i = virq; i < virq + nr_irqs; i++) { 2307 struct irq_data *irqd = irq_domain_get_irq_data(domain, i); 2308 2309 irqd->chip = &intcapxt_controller; 2310 irqd->hwirq = info->hwirq; 2311 irqd->chip_data = info->data; 2312 __irq_set_handler(i, handle_edge_irq, 0, "edge"); 2313 } 2314 2315 return ret; 2316 } 2317 2318 static void intcapxt_irqdomain_free(struct irq_domain *domain, unsigned int virq, 2319 unsigned int nr_irqs) 2320 { 2321 irq_domain_free_irqs_top(domain, virq, nr_irqs); 2322 } 2323 2324 2325 static void intcapxt_unmask_irq(struct irq_data *irqd) 2326 { 2327 struct amd_iommu *iommu = irqd->chip_data; 2328 struct irq_cfg *cfg = irqd_cfg(irqd); 2329 union intcapxt xt; 2330 2331 xt.capxt = 0ULL; 2332 xt.dest_mode_logical = apic->dest_mode_logical; 2333 xt.vector = cfg->vector; 2334 xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0); 2335 xt.destid_24_31 = cfg->dest_apicid >> 24; 2336 2337 writeq(xt.capxt, iommu->mmio_base + irqd->hwirq); 2338 } 2339 2340 static void intcapxt_mask_irq(struct irq_data *irqd) 2341 { 2342 struct amd_iommu *iommu = irqd->chip_data; 2343 2344 writeq(0, iommu->mmio_base + irqd->hwirq); 2345 } 2346 2347 2348 static int intcapxt_set_affinity(struct irq_data *irqd, 2349 const struct cpumask *mask, bool force) 2350 { 2351 struct irq_data *parent = irqd->parent_data; 2352 int ret; 2353 2354 ret = parent->chip->irq_set_affinity(parent, mask, force); 2355 if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) 2356 return ret; 2357 return 0; 2358 } 2359 2360 static int intcapxt_set_wake(struct irq_data *irqd, unsigned int on) 2361 { 2362 return on ? -EOPNOTSUPP : 0; 2363 } 2364 2365 static struct irq_chip intcapxt_controller = { 2366 .name = "IOMMU-MSI", 2367 .irq_unmask = intcapxt_unmask_irq, 2368 .irq_mask = intcapxt_mask_irq, 2369 .irq_ack = irq_chip_ack_parent, 2370 .irq_retrigger = irq_chip_retrigger_hierarchy, 2371 .irq_set_affinity = intcapxt_set_affinity, 2372 .irq_set_wake = intcapxt_set_wake, 2373 .flags = IRQCHIP_MASK_ON_SUSPEND, 2374 }; 2375 2376 static const struct irq_domain_ops intcapxt_domain_ops = { 2377 .alloc = intcapxt_irqdomain_alloc, 2378 .free = intcapxt_irqdomain_free, 2379 .activate = intcapxt_irqdomain_activate, 2380 .deactivate = intcapxt_irqdomain_deactivate, 2381 }; 2382 2383 2384 static struct irq_domain *iommu_irqdomain; 2385 2386 static struct irq_domain *iommu_get_irqdomain(void) 2387 { 2388 struct fwnode_handle *fn; 2389 2390 /* No need for locking here (yet) as the init is single-threaded */ 2391 if (iommu_irqdomain) 2392 return iommu_irqdomain; 2393 2394 fn = irq_domain_alloc_named_fwnode("AMD-Vi-MSI"); 2395 if (!fn) 2396 return NULL; 2397 2398 iommu_irqdomain = irq_domain_create_hierarchy(x86_vector_domain, 0, 0, 2399 fn, &intcapxt_domain_ops, 2400 NULL); 2401 if (!iommu_irqdomain) 2402 irq_domain_free_fwnode(fn); 2403 2404 return iommu_irqdomain; 2405 } 2406 2407 static int __iommu_setup_intcapxt(struct amd_iommu *iommu, const char *devname, 2408 int hwirq, irq_handler_t thread_fn) 2409 { 2410 struct irq_domain *domain; 2411 struct irq_alloc_info info; 2412 int irq, ret; 2413 int node = dev_to_node(&iommu->dev->dev); 2414 2415 domain = iommu_get_irqdomain(); 2416 if (!domain) 2417 return -ENXIO; 2418 2419 init_irq_alloc_info(&info, NULL); 2420 info.type = X86_IRQ_ALLOC_TYPE_AMDVI; 2421 info.data = iommu; 2422 info.hwirq = hwirq; 2423 2424 irq = irq_domain_alloc_irqs(domain, 1, node, &info); 2425 if (irq < 0) { 2426 irq_domain_remove(domain); 2427 return irq; 2428 } 2429 2430 ret = request_threaded_irq(irq, amd_iommu_int_handler, 2431 thread_fn, 0, devname, iommu); 2432 if (ret) { 2433 irq_domain_free_irqs(irq, 1); 2434 irq_domain_remove(domain); 2435 return ret; 2436 } 2437 2438 return 0; 2439 } 2440 2441 static int iommu_setup_intcapxt(struct amd_iommu *iommu) 2442 { 2443 int ret; 2444 2445 snprintf(iommu->evt_irq_name, sizeof(iommu->evt_irq_name), 2446 "AMD-Vi%d-Evt", iommu->index); 2447 ret = __iommu_setup_intcapxt(iommu, iommu->evt_irq_name, 2448 MMIO_INTCAPXT_EVT_OFFSET, 2449 amd_iommu_int_thread_evtlog); 2450 if (ret) 2451 return ret; 2452 2453 snprintf(iommu->ppr_irq_name, sizeof(iommu->ppr_irq_name), 2454 "AMD-Vi%d-PPR", iommu->index); 2455 ret = __iommu_setup_intcapxt(iommu, iommu->ppr_irq_name, 2456 MMIO_INTCAPXT_PPR_OFFSET, 2457 amd_iommu_int_thread_pprlog); 2458 if (ret) 2459 return ret; 2460 2461 #ifdef CONFIG_IRQ_REMAP 2462 snprintf(iommu->ga_irq_name, sizeof(iommu->ga_irq_name), 2463 "AMD-Vi%d-GA", iommu->index); 2464 ret = __iommu_setup_intcapxt(iommu, iommu->ga_irq_name, 2465 MMIO_INTCAPXT_GALOG_OFFSET, 2466 amd_iommu_int_thread_galog); 2467 #endif 2468 2469 return ret; 2470 } 2471 2472 static int iommu_init_irq(struct amd_iommu *iommu) 2473 { 2474 int ret; 2475 2476 if (iommu->int_enabled) 2477 goto enable_faults; 2478 2479 if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 2480 ret = iommu_setup_intcapxt(iommu); 2481 else if (iommu->dev->msi_cap) 2482 ret = iommu_setup_msi(iommu); 2483 else 2484 ret = -ENODEV; 2485 2486 if (ret) 2487 return ret; 2488 2489 iommu->int_enabled = true; 2490 enable_faults: 2491 2492 if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) 2493 iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN); 2494 2495 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); 2496 2497 return 0; 2498 } 2499 2500 /**************************************************************************** 2501 * 2502 * The next functions belong to the third pass of parsing the ACPI 2503 * table. In this last pass the memory mapping requirements are 2504 * gathered (like exclusion and unity mapping ranges). 2505 * 2506 ****************************************************************************/ 2507 2508 static void __init free_unity_maps(void) 2509 { 2510 struct unity_map_entry *entry, *next; 2511 struct amd_iommu_pci_seg *p, *pci_seg; 2512 2513 for_each_pci_segment_safe(pci_seg, p) { 2514 list_for_each_entry_safe(entry, next, &pci_seg->unity_map, list) { 2515 list_del(&entry->list); 2516 kfree(entry); 2517 } 2518 } 2519 } 2520 2521 /* called for unity map ACPI definition */ 2522 static int __init init_unity_map_range(struct ivmd_header *m, 2523 struct acpi_table_header *ivrs_base) 2524 { 2525 struct unity_map_entry *e = NULL; 2526 struct amd_iommu_pci_seg *pci_seg; 2527 char *s; 2528 2529 pci_seg = get_pci_segment(m->pci_seg, ivrs_base); 2530 if (pci_seg == NULL) 2531 return -ENOMEM; 2532 2533 e = kzalloc(sizeof(*e), GFP_KERNEL); 2534 if (e == NULL) 2535 return -ENOMEM; 2536 2537 switch (m->type) { 2538 default: 2539 kfree(e); 2540 return 0; 2541 case ACPI_IVMD_TYPE: 2542 s = "IVMD_TYPEi\t\t\t"; 2543 e->devid_start = e->devid_end = m->devid; 2544 break; 2545 case ACPI_IVMD_TYPE_ALL: 2546 s = "IVMD_TYPE_ALL\t\t"; 2547 e->devid_start = 0; 2548 e->devid_end = pci_seg->last_bdf; 2549 break; 2550 case ACPI_IVMD_TYPE_RANGE: 2551 s = "IVMD_TYPE_RANGE\t\t"; 2552 e->devid_start = m->devid; 2553 e->devid_end = m->aux; 2554 break; 2555 } 2556 e->address_start = PAGE_ALIGN(m->range_start); 2557 e->address_end = e->address_start + PAGE_ALIGN(m->range_length); 2558 e->prot = m->flags >> 1; 2559 2560 /* 2561 * Treat per-device exclusion ranges as r/w unity-mapped regions 2562 * since some buggy BIOSes might lead to the overwritten exclusion 2563 * range (exclusion_start and exclusion_length members). This 2564 * happens when there are multiple exclusion ranges (IVMD entries) 2565 * defined in ACPI table. 2566 */ 2567 if (m->flags & IVMD_FLAG_EXCL_RANGE) 2568 e->prot = (IVMD_FLAG_IW | IVMD_FLAG_IR) >> 1; 2569 2570 DUMP_printk("%s devid_start: %04x:%02x:%02x.%x devid_end: " 2571 "%04x:%02x:%02x.%x range_start: %016llx range_end: %016llx" 2572 " flags: %x\n", s, m->pci_seg, 2573 PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start), 2574 PCI_FUNC(e->devid_start), m->pci_seg, 2575 PCI_BUS_NUM(e->devid_end), 2576 PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end), 2577 e->address_start, e->address_end, m->flags); 2578 2579 list_add_tail(&e->list, &pci_seg->unity_map); 2580 2581 return 0; 2582 } 2583 2584 /* iterates over all memory definitions we find in the ACPI table */ 2585 static int __init init_memory_definitions(struct acpi_table_header *table) 2586 { 2587 u8 *p = (u8 *)table, *end = (u8 *)table; 2588 struct ivmd_header *m; 2589 2590 end += table->length; 2591 p += IVRS_HEADER_LENGTH; 2592 2593 while (p < end) { 2594 m = (struct ivmd_header *)p; 2595 if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE)) 2596 init_unity_map_range(m, table); 2597 2598 p += m->length; 2599 } 2600 2601 return 0; 2602 } 2603 2604 /* 2605 * Init the device table to not allow DMA access for devices 2606 */ 2607 static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg) 2608 { 2609 u32 devid; 2610 struct dev_table_entry *dev_table = pci_seg->dev_table; 2611 2612 if (dev_table == NULL) 2613 return; 2614 2615 for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { 2616 __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_VALID); 2617 if (!amd_iommu_snp_en) 2618 __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_TRANSLATION); 2619 } 2620 } 2621 2622 static void __init uninit_device_table_dma(struct amd_iommu_pci_seg *pci_seg) 2623 { 2624 u32 devid; 2625 struct dev_table_entry *dev_table = pci_seg->dev_table; 2626 2627 if (dev_table == NULL) 2628 return; 2629 2630 for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { 2631 dev_table[devid].data[0] = 0ULL; 2632 dev_table[devid].data[1] = 0ULL; 2633 } 2634 } 2635 2636 static void init_device_table(void) 2637 { 2638 struct amd_iommu_pci_seg *pci_seg; 2639 u32 devid; 2640 2641 if (!amd_iommu_irq_remap) 2642 return; 2643 2644 for_each_pci_segment(pci_seg) { 2645 for (devid = 0; devid <= pci_seg->last_bdf; ++devid) 2646 __set_dev_entry_bit(pci_seg->dev_table, 2647 devid, DEV_ENTRY_IRQ_TBL_EN); 2648 } 2649 } 2650 2651 static void iommu_init_flags(struct amd_iommu *iommu) 2652 { 2653 iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ? 2654 iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) : 2655 iommu_feature_disable(iommu, CONTROL_HT_TUN_EN); 2656 2657 iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ? 2658 iommu_feature_enable(iommu, CONTROL_PASSPW_EN) : 2659 iommu_feature_disable(iommu, CONTROL_PASSPW_EN); 2660 2661 iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ? 2662 iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) : 2663 iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN); 2664 2665 iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ? 2666 iommu_feature_enable(iommu, CONTROL_ISOC_EN) : 2667 iommu_feature_disable(iommu, CONTROL_ISOC_EN); 2668 2669 /* 2670 * make IOMMU memory accesses cache coherent 2671 */ 2672 iommu_feature_enable(iommu, CONTROL_COHERENT_EN); 2673 2674 /* Set IOTLB invalidation timeout to 1s */ 2675 iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S); 2676 } 2677 2678 static void iommu_apply_resume_quirks(struct amd_iommu *iommu) 2679 { 2680 int i, j; 2681 u32 ioc_feature_control; 2682 struct pci_dev *pdev = iommu->root_pdev; 2683 2684 /* RD890 BIOSes may not have completely reconfigured the iommu */ 2685 if (!is_rd890_iommu(iommu->dev) || !pdev) 2686 return; 2687 2688 /* 2689 * First, we need to ensure that the iommu is enabled. This is 2690 * controlled by a register in the northbridge 2691 */ 2692 2693 /* Select Northbridge indirect register 0x75 and enable writing */ 2694 pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7)); 2695 pci_read_config_dword(pdev, 0x64, &ioc_feature_control); 2696 2697 /* Enable the iommu */ 2698 if (!(ioc_feature_control & 0x1)) 2699 pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1); 2700 2701 /* Restore the iommu BAR */ 2702 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, 2703 iommu->stored_addr_lo); 2704 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8, 2705 iommu->stored_addr_hi); 2706 2707 /* Restore the l1 indirect regs for each of the 6 l1s */ 2708 for (i = 0; i < 6; i++) 2709 for (j = 0; j < 0x12; j++) 2710 iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]); 2711 2712 /* Restore the l2 indirect regs */ 2713 for (i = 0; i < 0x83; i++) 2714 iommu_write_l2(iommu, i, iommu->stored_l2[i]); 2715 2716 /* Lock PCI setup registers */ 2717 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, 2718 iommu->stored_addr_lo | 1); 2719 } 2720 2721 static void iommu_enable_ga(struct amd_iommu *iommu) 2722 { 2723 #ifdef CONFIG_IRQ_REMAP 2724 switch (amd_iommu_guest_ir) { 2725 case AMD_IOMMU_GUEST_IR_VAPIC: 2726 case AMD_IOMMU_GUEST_IR_LEGACY_GA: 2727 iommu_feature_enable(iommu, CONTROL_GA_EN); 2728 iommu->irte_ops = &irte_128_ops; 2729 break; 2730 default: 2731 iommu->irte_ops = &irte_32_ops; 2732 break; 2733 } 2734 #endif 2735 } 2736 2737 static void iommu_disable_irtcachedis(struct amd_iommu *iommu) 2738 { 2739 iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS); 2740 } 2741 2742 static void iommu_enable_irtcachedis(struct amd_iommu *iommu) 2743 { 2744 u64 ctrl; 2745 2746 if (!amd_iommu_irtcachedis) 2747 return; 2748 2749 /* 2750 * Note: 2751 * The support for IRTCacheDis feature is dertermined by 2752 * checking if the bit is writable. 2753 */ 2754 iommu_feature_enable(iommu, CONTROL_IRTCACHEDIS); 2755 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); 2756 ctrl &= (1ULL << CONTROL_IRTCACHEDIS); 2757 if (ctrl) 2758 iommu->irtcachedis_enabled = true; 2759 pr_info("iommu%d (%#06x) : IRT cache is %s\n", 2760 iommu->index, iommu->devid, 2761 iommu->irtcachedis_enabled ? "disabled" : "enabled"); 2762 } 2763 2764 static void early_enable_iommu(struct amd_iommu *iommu) 2765 { 2766 iommu_disable(iommu); 2767 iommu_init_flags(iommu); 2768 iommu_set_device_table(iommu); 2769 iommu_enable_command_buffer(iommu); 2770 iommu_enable_event_buffer(iommu); 2771 iommu_set_exclusion_range(iommu); 2772 iommu_enable_ga(iommu); 2773 iommu_enable_xt(iommu); 2774 iommu_enable_irtcachedis(iommu); 2775 iommu_enable(iommu); 2776 amd_iommu_flush_all_caches(iommu); 2777 } 2778 2779 /* 2780 * This function finally enables all IOMMUs found in the system after 2781 * they have been initialized. 2782 * 2783 * Or if in kdump kernel and IOMMUs are all pre-enabled, try to copy 2784 * the old content of device table entries. Not this case or copy failed, 2785 * just continue as normal kernel does. 2786 */ 2787 static void early_enable_iommus(void) 2788 { 2789 struct amd_iommu *iommu; 2790 struct amd_iommu_pci_seg *pci_seg; 2791 2792 if (!copy_device_table()) { 2793 /* 2794 * If come here because of failure in copying device table from old 2795 * kernel with all IOMMUs enabled, print error message and try to 2796 * free allocated old_dev_tbl_cpy. 2797 */ 2798 if (amd_iommu_pre_enabled) 2799 pr_err("Failed to copy DEV table from previous kernel.\n"); 2800 2801 for_each_pci_segment(pci_seg) { 2802 if (pci_seg->old_dev_tbl_cpy != NULL) { 2803 free_pages((unsigned long)pci_seg->old_dev_tbl_cpy, 2804 get_order(pci_seg->dev_table_size)); 2805 pci_seg->old_dev_tbl_cpy = NULL; 2806 } 2807 } 2808 2809 for_each_iommu(iommu) { 2810 clear_translation_pre_enabled(iommu); 2811 early_enable_iommu(iommu); 2812 } 2813 } else { 2814 pr_info("Copied DEV table from previous kernel.\n"); 2815 2816 for_each_pci_segment(pci_seg) { 2817 free_pages((unsigned long)pci_seg->dev_table, 2818 get_order(pci_seg->dev_table_size)); 2819 pci_seg->dev_table = pci_seg->old_dev_tbl_cpy; 2820 } 2821 2822 for_each_iommu(iommu) { 2823 iommu_disable_command_buffer(iommu); 2824 iommu_disable_event_buffer(iommu); 2825 iommu_disable_irtcachedis(iommu); 2826 iommu_enable_command_buffer(iommu); 2827 iommu_enable_event_buffer(iommu); 2828 iommu_enable_ga(iommu); 2829 iommu_enable_xt(iommu); 2830 iommu_enable_irtcachedis(iommu); 2831 iommu_set_device_table(iommu); 2832 amd_iommu_flush_all_caches(iommu); 2833 } 2834 } 2835 } 2836 2837 static void enable_iommus_v2(void) 2838 { 2839 struct amd_iommu *iommu; 2840 2841 for_each_iommu(iommu) { 2842 iommu_enable_ppr_log(iommu); 2843 iommu_enable_gt(iommu); 2844 } 2845 } 2846 2847 static void enable_iommus_vapic(void) 2848 { 2849 #ifdef CONFIG_IRQ_REMAP 2850 u32 status, i; 2851 struct amd_iommu *iommu; 2852 2853 for_each_iommu(iommu) { 2854 /* 2855 * Disable GALog if already running. It could have been enabled 2856 * in the previous boot before kdump. 2857 */ 2858 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 2859 if (!(status & MMIO_STATUS_GALOG_RUN_MASK)) 2860 continue; 2861 2862 iommu_feature_disable(iommu, CONTROL_GALOG_EN); 2863 iommu_feature_disable(iommu, CONTROL_GAINT_EN); 2864 2865 /* 2866 * Need to set and poll check the GALOGRun bit to zero before 2867 * we can set/ modify GA Log registers safely. 2868 */ 2869 for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) { 2870 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 2871 if (!(status & MMIO_STATUS_GALOG_RUN_MASK)) 2872 break; 2873 udelay(10); 2874 } 2875 2876 if (WARN_ON(i >= MMIO_STATUS_TIMEOUT)) 2877 return; 2878 } 2879 2880 if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) && 2881 !check_feature(FEATURE_GAM_VAPIC)) { 2882 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; 2883 return; 2884 } 2885 2886 if (amd_iommu_snp_en && 2887 !FEATURE_SNPAVICSUP_GAM(amd_iommu_efr2)) { 2888 pr_warn("Force to disable Virtual APIC due to SNP\n"); 2889 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; 2890 return; 2891 } 2892 2893 /* Enabling GAM and SNPAVIC support */ 2894 for_each_iommu(iommu) { 2895 if (iommu_init_ga_log(iommu) || 2896 iommu_ga_log_enable(iommu)) 2897 return; 2898 2899 iommu_feature_enable(iommu, CONTROL_GAM_EN); 2900 if (amd_iommu_snp_en) 2901 iommu_feature_enable(iommu, CONTROL_SNPAVIC_EN); 2902 } 2903 2904 amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP); 2905 pr_info("Virtual APIC enabled\n"); 2906 #endif 2907 } 2908 2909 static void enable_iommus(void) 2910 { 2911 early_enable_iommus(); 2912 } 2913 2914 static void disable_iommus(void) 2915 { 2916 struct amd_iommu *iommu; 2917 2918 for_each_iommu(iommu) 2919 iommu_disable(iommu); 2920 2921 #ifdef CONFIG_IRQ_REMAP 2922 if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) 2923 amd_iommu_irq_ops.capability &= ~(1 << IRQ_POSTING_CAP); 2924 #endif 2925 } 2926 2927 /* 2928 * Suspend/Resume support 2929 * disable suspend until real resume implemented 2930 */ 2931 2932 static void amd_iommu_resume(void) 2933 { 2934 struct amd_iommu *iommu; 2935 2936 for_each_iommu(iommu) 2937 iommu_apply_resume_quirks(iommu); 2938 2939 /* re-load the hardware */ 2940 enable_iommus(); 2941 2942 amd_iommu_enable_interrupts(); 2943 } 2944 2945 static int amd_iommu_suspend(void) 2946 { 2947 /* disable IOMMUs to go out of the way for BIOS */ 2948 disable_iommus(); 2949 2950 return 0; 2951 } 2952 2953 static struct syscore_ops amd_iommu_syscore_ops = { 2954 .suspend = amd_iommu_suspend, 2955 .resume = amd_iommu_resume, 2956 }; 2957 2958 static void __init free_iommu_resources(void) 2959 { 2960 kmem_cache_destroy(amd_iommu_irq_cache); 2961 amd_iommu_irq_cache = NULL; 2962 2963 free_iommu_all(); 2964 free_pci_segments(); 2965 } 2966 2967 /* SB IOAPIC is always on this device in AMD systems */ 2968 #define IOAPIC_SB_DEVID ((0x00 << 8) | PCI_DEVFN(0x14, 0)) 2969 2970 static bool __init check_ioapic_information(void) 2971 { 2972 const char *fw_bug = FW_BUG; 2973 bool ret, has_sb_ioapic; 2974 int idx; 2975 2976 has_sb_ioapic = false; 2977 ret = false; 2978 2979 /* 2980 * If we have map overrides on the kernel command line the 2981 * messages in this function might not describe firmware bugs 2982 * anymore - so be careful 2983 */ 2984 if (cmdline_maps) 2985 fw_bug = ""; 2986 2987 for (idx = 0; idx < nr_ioapics; idx++) { 2988 int devid, id = mpc_ioapic_id(idx); 2989 2990 devid = get_ioapic_devid(id); 2991 if (devid < 0) { 2992 pr_err("%s: IOAPIC[%d] not in IVRS table\n", 2993 fw_bug, id); 2994 ret = false; 2995 } else if (devid == IOAPIC_SB_DEVID) { 2996 has_sb_ioapic = true; 2997 ret = true; 2998 } 2999 } 3000 3001 if (!has_sb_ioapic) { 3002 /* 3003 * We expect the SB IOAPIC to be listed in the IVRS 3004 * table. The system timer is connected to the SB IOAPIC 3005 * and if we don't have it in the list the system will 3006 * panic at boot time. This situation usually happens 3007 * when the BIOS is buggy and provides us the wrong 3008 * device id for the IOAPIC in the system. 3009 */ 3010 pr_err("%s: No southbridge IOAPIC found\n", fw_bug); 3011 } 3012 3013 if (!ret) 3014 pr_err("Disabling interrupt remapping\n"); 3015 3016 return ret; 3017 } 3018 3019 static void __init free_dma_resources(void) 3020 { 3021 free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 3022 get_order(MAX_DOMAIN_ID/8)); 3023 amd_iommu_pd_alloc_bitmap = NULL; 3024 3025 free_unity_maps(); 3026 } 3027 3028 static void __init ivinfo_init(void *ivrs) 3029 { 3030 amd_iommu_ivinfo = *((u32 *)(ivrs + IOMMU_IVINFO_OFFSET)); 3031 } 3032 3033 /* 3034 * This is the hardware init function for AMD IOMMU in the system. 3035 * This function is called either from amd_iommu_init or from the interrupt 3036 * remapping setup code. 3037 * 3038 * This function basically parses the ACPI table for AMD IOMMU (IVRS) 3039 * four times: 3040 * 3041 * 1 pass) Discover the most comprehensive IVHD type to use. 3042 * 3043 * 2 pass) Find the highest PCI device id the driver has to handle. 3044 * Upon this information the size of the data structures is 3045 * determined that needs to be allocated. 3046 * 3047 * 3 pass) Initialize the data structures just allocated with the 3048 * information in the ACPI table about available AMD IOMMUs 3049 * in the system. It also maps the PCI devices in the 3050 * system to specific IOMMUs 3051 * 3052 * 4 pass) After the basic data structures are allocated and 3053 * initialized we update them with information about memory 3054 * remapping requirements parsed out of the ACPI table in 3055 * this last pass. 3056 * 3057 * After everything is set up the IOMMUs are enabled and the necessary 3058 * hotplug and suspend notifiers are registered. 3059 */ 3060 static int __init early_amd_iommu_init(void) 3061 { 3062 struct acpi_table_header *ivrs_base; 3063 int remap_cache_sz, ret; 3064 acpi_status status; 3065 3066 if (!amd_iommu_detected) 3067 return -ENODEV; 3068 3069 status = acpi_get_table("IVRS", 0, &ivrs_base); 3070 if (status == AE_NOT_FOUND) 3071 return -ENODEV; 3072 else if (ACPI_FAILURE(status)) { 3073 const char *err = acpi_format_exception(status); 3074 pr_err("IVRS table error: %s\n", err); 3075 return -EINVAL; 3076 } 3077 3078 /* 3079 * Validate checksum here so we don't need to do it when 3080 * we actually parse the table 3081 */ 3082 ret = check_ivrs_checksum(ivrs_base); 3083 if (ret) 3084 goto out; 3085 3086 ivinfo_init(ivrs_base); 3087 3088 amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base); 3089 DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type); 3090 3091 /* Device table - directly used by all IOMMUs */ 3092 ret = -ENOMEM; 3093 3094 amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( 3095 GFP_KERNEL | __GFP_ZERO, 3096 get_order(MAX_DOMAIN_ID/8)); 3097 if (amd_iommu_pd_alloc_bitmap == NULL) 3098 goto out; 3099 3100 /* 3101 * never allocate domain 0 because its used as the non-allocated and 3102 * error value placeholder 3103 */ 3104 __set_bit(0, amd_iommu_pd_alloc_bitmap); 3105 3106 /* 3107 * now the data structures are allocated and basically initialized 3108 * start the real acpi table scan 3109 */ 3110 ret = init_iommu_all(ivrs_base); 3111 if (ret) 3112 goto out; 3113 3114 /* 5 level guest page table */ 3115 if (cpu_feature_enabled(X86_FEATURE_LA57) && 3116 check_feature_gpt_level() == GUEST_PGTABLE_5_LEVEL) 3117 amd_iommu_gpt_level = PAGE_MODE_5_LEVEL; 3118 3119 /* Disable any previously enabled IOMMUs */ 3120 if (!is_kdump_kernel() || amd_iommu_disabled) 3121 disable_iommus(); 3122 3123 if (amd_iommu_irq_remap) 3124 amd_iommu_irq_remap = check_ioapic_information(); 3125 3126 if (amd_iommu_irq_remap) { 3127 struct amd_iommu_pci_seg *pci_seg; 3128 /* 3129 * Interrupt remapping enabled, create kmem_cache for the 3130 * remapping tables. 3131 */ 3132 ret = -ENOMEM; 3133 if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir)) 3134 remap_cache_sz = MAX_IRQS_PER_TABLE * sizeof(u32); 3135 else 3136 remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2); 3137 amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache", 3138 remap_cache_sz, 3139 DTE_INTTAB_ALIGNMENT, 3140 0, NULL); 3141 if (!amd_iommu_irq_cache) 3142 goto out; 3143 3144 for_each_pci_segment(pci_seg) { 3145 if (alloc_irq_lookup_table(pci_seg)) 3146 goto out; 3147 } 3148 } 3149 3150 ret = init_memory_definitions(ivrs_base); 3151 if (ret) 3152 goto out; 3153 3154 /* init the device table */ 3155 init_device_table(); 3156 3157 out: 3158 /* Don't leak any ACPI memory */ 3159 acpi_put_table(ivrs_base); 3160 3161 return ret; 3162 } 3163 3164 static int amd_iommu_enable_interrupts(void) 3165 { 3166 struct amd_iommu *iommu; 3167 int ret = 0; 3168 3169 for_each_iommu(iommu) { 3170 ret = iommu_init_irq(iommu); 3171 if (ret) 3172 goto out; 3173 } 3174 3175 /* 3176 * Interrupt handler is ready to process interrupts. Enable 3177 * PPR and GA log interrupt for all IOMMUs. 3178 */ 3179 enable_iommus_vapic(); 3180 enable_iommus_v2(); 3181 3182 out: 3183 return ret; 3184 } 3185 3186 static bool __init detect_ivrs(void) 3187 { 3188 struct acpi_table_header *ivrs_base; 3189 acpi_status status; 3190 int i; 3191 3192 status = acpi_get_table("IVRS", 0, &ivrs_base); 3193 if (status == AE_NOT_FOUND) 3194 return false; 3195 else if (ACPI_FAILURE(status)) { 3196 const char *err = acpi_format_exception(status); 3197 pr_err("IVRS table error: %s\n", err); 3198 return false; 3199 } 3200 3201 acpi_put_table(ivrs_base); 3202 3203 if (amd_iommu_force_enable) 3204 goto out; 3205 3206 /* Don't use IOMMU if there is Stoney Ridge graphics */ 3207 for (i = 0; i < 32; i++) { 3208 u32 pci_id; 3209 3210 pci_id = read_pci_config(0, i, 0, 0); 3211 if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) { 3212 pr_info("Disable IOMMU on Stoney Ridge\n"); 3213 return false; 3214 } 3215 } 3216 3217 out: 3218 /* Make sure ACS will be enabled during PCI probe */ 3219 pci_request_acs(); 3220 3221 return true; 3222 } 3223 3224 /**************************************************************************** 3225 * 3226 * AMD IOMMU Initialization State Machine 3227 * 3228 ****************************************************************************/ 3229 3230 static int __init state_next(void) 3231 { 3232 int ret = 0; 3233 3234 switch (init_state) { 3235 case IOMMU_START_STATE: 3236 if (!detect_ivrs()) { 3237 init_state = IOMMU_NOT_FOUND; 3238 ret = -ENODEV; 3239 } else { 3240 init_state = IOMMU_IVRS_DETECTED; 3241 } 3242 break; 3243 case IOMMU_IVRS_DETECTED: 3244 if (amd_iommu_disabled) { 3245 init_state = IOMMU_CMDLINE_DISABLED; 3246 ret = -EINVAL; 3247 } else { 3248 ret = early_amd_iommu_init(); 3249 init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED; 3250 } 3251 break; 3252 case IOMMU_ACPI_FINISHED: 3253 early_enable_iommus(); 3254 x86_platform.iommu_shutdown = disable_iommus; 3255 init_state = IOMMU_ENABLED; 3256 break; 3257 case IOMMU_ENABLED: 3258 register_syscore_ops(&amd_iommu_syscore_ops); 3259 ret = amd_iommu_init_pci(); 3260 init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT; 3261 break; 3262 case IOMMU_PCI_INIT: 3263 ret = amd_iommu_enable_interrupts(); 3264 init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN; 3265 break; 3266 case IOMMU_INTERRUPTS_EN: 3267 init_state = IOMMU_INITIALIZED; 3268 break; 3269 case IOMMU_INITIALIZED: 3270 /* Nothing to do */ 3271 break; 3272 case IOMMU_NOT_FOUND: 3273 case IOMMU_INIT_ERROR: 3274 case IOMMU_CMDLINE_DISABLED: 3275 /* Error states => do nothing */ 3276 ret = -EINVAL; 3277 break; 3278 default: 3279 /* Unknown state */ 3280 BUG(); 3281 } 3282 3283 if (ret) { 3284 free_dma_resources(); 3285 if (!irq_remapping_enabled) { 3286 disable_iommus(); 3287 free_iommu_resources(); 3288 } else { 3289 struct amd_iommu *iommu; 3290 struct amd_iommu_pci_seg *pci_seg; 3291 3292 for_each_pci_segment(pci_seg) 3293 uninit_device_table_dma(pci_seg); 3294 3295 for_each_iommu(iommu) 3296 amd_iommu_flush_all_caches(iommu); 3297 } 3298 } 3299 return ret; 3300 } 3301 3302 static int __init iommu_go_to_state(enum iommu_init_state state) 3303 { 3304 int ret = -EINVAL; 3305 3306 while (init_state != state) { 3307 if (init_state == IOMMU_NOT_FOUND || 3308 init_state == IOMMU_INIT_ERROR || 3309 init_state == IOMMU_CMDLINE_DISABLED) 3310 break; 3311 ret = state_next(); 3312 } 3313 3314 return ret; 3315 } 3316 3317 #ifdef CONFIG_IRQ_REMAP 3318 int __init amd_iommu_prepare(void) 3319 { 3320 int ret; 3321 3322 amd_iommu_irq_remap = true; 3323 3324 ret = iommu_go_to_state(IOMMU_ACPI_FINISHED); 3325 if (ret) { 3326 amd_iommu_irq_remap = false; 3327 return ret; 3328 } 3329 3330 return amd_iommu_irq_remap ? 0 : -ENODEV; 3331 } 3332 3333 int __init amd_iommu_enable(void) 3334 { 3335 int ret; 3336 3337 ret = iommu_go_to_state(IOMMU_ENABLED); 3338 if (ret) 3339 return ret; 3340 3341 irq_remapping_enabled = 1; 3342 return amd_iommu_xt_mode; 3343 } 3344 3345 void amd_iommu_disable(void) 3346 { 3347 amd_iommu_suspend(); 3348 } 3349 3350 int amd_iommu_reenable(int mode) 3351 { 3352 amd_iommu_resume(); 3353 3354 return 0; 3355 } 3356 3357 int __init amd_iommu_enable_faulting(void) 3358 { 3359 /* We enable MSI later when PCI is initialized */ 3360 return 0; 3361 } 3362 #endif 3363 3364 /* 3365 * This is the core init function for AMD IOMMU hardware in the system. 3366 * This function is called from the generic x86 DMA layer initialization 3367 * code. 3368 */ 3369 static int __init amd_iommu_init(void) 3370 { 3371 struct amd_iommu *iommu; 3372 int ret; 3373 3374 ret = iommu_go_to_state(IOMMU_INITIALIZED); 3375 #ifdef CONFIG_GART_IOMMU 3376 if (ret && list_empty(&amd_iommu_list)) { 3377 /* 3378 * We failed to initialize the AMD IOMMU - try fallback 3379 * to GART if possible. 3380 */ 3381 gart_iommu_init(); 3382 } 3383 #endif 3384 3385 for_each_iommu(iommu) 3386 amd_iommu_debugfs_setup(iommu); 3387 3388 return ret; 3389 } 3390 3391 static bool amd_iommu_sme_check(void) 3392 { 3393 if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) || 3394 (boot_cpu_data.x86 != 0x17)) 3395 return true; 3396 3397 /* For Fam17h, a specific level of support is required */ 3398 if (boot_cpu_data.microcode >= 0x08001205) 3399 return true; 3400 3401 if ((boot_cpu_data.microcode >= 0x08001126) && 3402 (boot_cpu_data.microcode <= 0x080011ff)) 3403 return true; 3404 3405 pr_notice("IOMMU not currently supported when SME is active\n"); 3406 3407 return false; 3408 } 3409 3410 /**************************************************************************** 3411 * 3412 * Early detect code. This code runs at IOMMU detection time in the DMA 3413 * layer. It just looks if there is an IVRS ACPI table to detect AMD 3414 * IOMMUs 3415 * 3416 ****************************************************************************/ 3417 int __init amd_iommu_detect(void) 3418 { 3419 int ret; 3420 3421 if (no_iommu || (iommu_detected && !gart_iommu_aperture)) 3422 return -ENODEV; 3423 3424 if (!amd_iommu_sme_check()) 3425 return -ENODEV; 3426 3427 ret = iommu_go_to_state(IOMMU_IVRS_DETECTED); 3428 if (ret) 3429 return ret; 3430 3431 amd_iommu_detected = true; 3432 iommu_detected = 1; 3433 x86_init.iommu.iommu_init = amd_iommu_init; 3434 3435 return 1; 3436 } 3437 3438 /**************************************************************************** 3439 * 3440 * Parsing functions for the AMD IOMMU specific kernel command line 3441 * options. 3442 * 3443 ****************************************************************************/ 3444 3445 static int __init parse_amd_iommu_dump(char *str) 3446 { 3447 amd_iommu_dump = true; 3448 3449 return 1; 3450 } 3451 3452 static int __init parse_amd_iommu_intr(char *str) 3453 { 3454 for (; *str; ++str) { 3455 if (strncmp(str, "legacy", 6) == 0) { 3456 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; 3457 break; 3458 } 3459 if (strncmp(str, "vapic", 5) == 0) { 3460 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC; 3461 break; 3462 } 3463 } 3464 return 1; 3465 } 3466 3467 static int __init parse_amd_iommu_options(char *str) 3468 { 3469 if (!str) 3470 return -EINVAL; 3471 3472 while (*str) { 3473 if (strncmp(str, "fullflush", 9) == 0) { 3474 pr_warn("amd_iommu=fullflush deprecated; use iommu.strict=1 instead\n"); 3475 iommu_set_dma_strict(); 3476 } else if (strncmp(str, "force_enable", 12) == 0) { 3477 amd_iommu_force_enable = true; 3478 } else if (strncmp(str, "off", 3) == 0) { 3479 amd_iommu_disabled = true; 3480 } else if (strncmp(str, "force_isolation", 15) == 0) { 3481 amd_iommu_force_isolation = true; 3482 } else if (strncmp(str, "pgtbl_v1", 8) == 0) { 3483 amd_iommu_pgtable = AMD_IOMMU_V1; 3484 } else if (strncmp(str, "pgtbl_v2", 8) == 0) { 3485 amd_iommu_pgtable = AMD_IOMMU_V2; 3486 } else if (strncmp(str, "irtcachedis", 11) == 0) { 3487 amd_iommu_irtcachedis = true; 3488 } else { 3489 pr_notice("Unknown option - '%s'\n", str); 3490 } 3491 3492 str += strcspn(str, ","); 3493 while (*str == ',') 3494 str++; 3495 } 3496 3497 return 1; 3498 } 3499 3500 static int __init parse_ivrs_ioapic(char *str) 3501 { 3502 u32 seg = 0, bus, dev, fn; 3503 int id, i; 3504 u32 devid; 3505 3506 if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 || 3507 sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) 3508 goto found; 3509 3510 if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 || 3511 sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) { 3512 pr_warn("ivrs_ioapic%s option format deprecated; use ivrs_ioapic=%d@%04x:%02x:%02x.%d instead\n", 3513 str, id, seg, bus, dev, fn); 3514 goto found; 3515 } 3516 3517 pr_err("Invalid command line: ivrs_ioapic%s\n", str); 3518 return 1; 3519 3520 found: 3521 if (early_ioapic_map_size == EARLY_MAP_SIZE) { 3522 pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n", 3523 str); 3524 return 1; 3525 } 3526 3527 devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn); 3528 3529 cmdline_maps = true; 3530 i = early_ioapic_map_size++; 3531 early_ioapic_map[i].id = id; 3532 early_ioapic_map[i].devid = devid; 3533 early_ioapic_map[i].cmd_line = true; 3534 3535 return 1; 3536 } 3537 3538 static int __init parse_ivrs_hpet(char *str) 3539 { 3540 u32 seg = 0, bus, dev, fn; 3541 int id, i; 3542 u32 devid; 3543 3544 if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 || 3545 sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) 3546 goto found; 3547 3548 if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 || 3549 sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) { 3550 pr_warn("ivrs_hpet%s option format deprecated; use ivrs_hpet=%d@%04x:%02x:%02x.%d instead\n", 3551 str, id, seg, bus, dev, fn); 3552 goto found; 3553 } 3554 3555 pr_err("Invalid command line: ivrs_hpet%s\n", str); 3556 return 1; 3557 3558 found: 3559 if (early_hpet_map_size == EARLY_MAP_SIZE) { 3560 pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n", 3561 str); 3562 return 1; 3563 } 3564 3565 devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn); 3566 3567 cmdline_maps = true; 3568 i = early_hpet_map_size++; 3569 early_hpet_map[i].id = id; 3570 early_hpet_map[i].devid = devid; 3571 early_hpet_map[i].cmd_line = true; 3572 3573 return 1; 3574 } 3575 3576 #define ACPIID_LEN (ACPIHID_UID_LEN + ACPIHID_HID_LEN) 3577 3578 static int __init parse_ivrs_acpihid(char *str) 3579 { 3580 u32 seg = 0, bus, dev, fn; 3581 char *hid, *uid, *p, *addr; 3582 char acpiid[ACPIID_LEN] = {0}; 3583 int i; 3584 3585 addr = strchr(str, '@'); 3586 if (!addr) { 3587 addr = strchr(str, '='); 3588 if (!addr) 3589 goto not_found; 3590 3591 ++addr; 3592 3593 if (strlen(addr) > ACPIID_LEN) 3594 goto not_found; 3595 3596 if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 || 3597 sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) { 3598 pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n", 3599 str, acpiid, seg, bus, dev, fn); 3600 goto found; 3601 } 3602 goto not_found; 3603 } 3604 3605 /* We have the '@', make it the terminator to get just the acpiid */ 3606 *addr++ = 0; 3607 3608 if (strlen(str) > ACPIID_LEN + 1) 3609 goto not_found; 3610 3611 if (sscanf(str, "=%s", acpiid) != 1) 3612 goto not_found; 3613 3614 if (sscanf(addr, "%x:%x.%x", &bus, &dev, &fn) == 3 || 3615 sscanf(addr, "%x:%x:%x.%x", &seg, &bus, &dev, &fn) == 4) 3616 goto found; 3617 3618 not_found: 3619 pr_err("Invalid command line: ivrs_acpihid%s\n", str); 3620 return 1; 3621 3622 found: 3623 p = acpiid; 3624 hid = strsep(&p, ":"); 3625 uid = p; 3626 3627 if (!hid || !(*hid) || !uid) { 3628 pr_err("Invalid command line: hid or uid\n"); 3629 return 1; 3630 } 3631 3632 /* 3633 * Ignore leading zeroes after ':', so e.g., AMDI0095:00 3634 * will match AMDI0095:0 in the second strcmp in acpi_dev_hid_uid_match 3635 */ 3636 while (*uid == '0' && *(uid + 1)) 3637 uid++; 3638 3639 i = early_acpihid_map_size++; 3640 memcpy(early_acpihid_map[i].hid, hid, strlen(hid)); 3641 memcpy(early_acpihid_map[i].uid, uid, strlen(uid)); 3642 early_acpihid_map[i].devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn); 3643 early_acpihid_map[i].cmd_line = true; 3644 3645 return 1; 3646 } 3647 3648 __setup("amd_iommu_dump", parse_amd_iommu_dump); 3649 __setup("amd_iommu=", parse_amd_iommu_options); 3650 __setup("amd_iommu_intr=", parse_amd_iommu_intr); 3651 __setup("ivrs_ioapic", parse_ivrs_ioapic); 3652 __setup("ivrs_hpet", parse_ivrs_hpet); 3653 __setup("ivrs_acpihid", parse_ivrs_acpihid); 3654 3655 bool amd_iommu_v2_supported(void) 3656 { 3657 /* CPU page table size should match IOMMU guest page table size */ 3658 if (cpu_feature_enabled(X86_FEATURE_LA57) && 3659 amd_iommu_gpt_level != PAGE_MODE_5_LEVEL) 3660 return false; 3661 3662 /* 3663 * Since DTE[Mode]=0 is prohibited on SNP-enabled system 3664 * (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without 3665 * setting up IOMMUv1 page table. 3666 */ 3667 return amd_iommu_gt_ppr_supported() && !amd_iommu_snp_en; 3668 } 3669 3670 struct amd_iommu *get_amd_iommu(unsigned int idx) 3671 { 3672 unsigned int i = 0; 3673 struct amd_iommu *iommu; 3674 3675 for_each_iommu(iommu) 3676 if (i++ == idx) 3677 return iommu; 3678 return NULL; 3679 } 3680 3681 /**************************************************************************** 3682 * 3683 * IOMMU EFR Performance Counter support functionality. This code allows 3684 * access to the IOMMU PC functionality. 3685 * 3686 ****************************************************************************/ 3687 3688 u8 amd_iommu_pc_get_max_banks(unsigned int idx) 3689 { 3690 struct amd_iommu *iommu = get_amd_iommu(idx); 3691 3692 if (iommu) 3693 return iommu->max_banks; 3694 3695 return 0; 3696 } 3697 EXPORT_SYMBOL(amd_iommu_pc_get_max_banks); 3698 3699 bool amd_iommu_pc_supported(void) 3700 { 3701 return amd_iommu_pc_present; 3702 } 3703 EXPORT_SYMBOL(amd_iommu_pc_supported); 3704 3705 u8 amd_iommu_pc_get_max_counters(unsigned int idx) 3706 { 3707 struct amd_iommu *iommu = get_amd_iommu(idx); 3708 3709 if (iommu) 3710 return iommu->max_counters; 3711 3712 return 0; 3713 } 3714 EXPORT_SYMBOL(amd_iommu_pc_get_max_counters); 3715 3716 static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, 3717 u8 fxn, u64 *value, bool is_write) 3718 { 3719 u32 offset; 3720 u32 max_offset_lim; 3721 3722 /* Make sure the IOMMU PC resource is available */ 3723 if (!amd_iommu_pc_present) 3724 return -ENODEV; 3725 3726 /* Check for valid iommu and pc register indexing */ 3727 if (WARN_ON(!iommu || (fxn > 0x28) || (fxn & 7))) 3728 return -ENODEV; 3729 3730 offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn); 3731 3732 /* Limit the offset to the hw defined mmio region aperture */ 3733 max_offset_lim = (u32)(((0x40 | iommu->max_banks) << 12) | 3734 (iommu->max_counters << 8) | 0x28); 3735 if ((offset < MMIO_CNTR_REG_OFFSET) || 3736 (offset > max_offset_lim)) 3737 return -EINVAL; 3738 3739 if (is_write) { 3740 u64 val = *value & GENMASK_ULL(47, 0); 3741 3742 writel((u32)val, iommu->mmio_base + offset); 3743 writel((val >> 32), iommu->mmio_base + offset + 4); 3744 } else { 3745 *value = readl(iommu->mmio_base + offset + 4); 3746 *value <<= 32; 3747 *value |= readl(iommu->mmio_base + offset); 3748 *value &= GENMASK_ULL(47, 0); 3749 } 3750 3751 return 0; 3752 } 3753 3754 int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value) 3755 { 3756 if (!iommu) 3757 return -EINVAL; 3758 3759 return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false); 3760 } 3761 3762 int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value) 3763 { 3764 if (!iommu) 3765 return -EINVAL; 3766 3767 return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true); 3768 } 3769 3770 #ifdef CONFIG_AMD_MEM_ENCRYPT 3771 int amd_iommu_snp_enable(void) 3772 { 3773 /* 3774 * The SNP support requires that IOMMU must be enabled, and is 3775 * not configured in the passthrough mode. 3776 */ 3777 if (no_iommu || iommu_default_passthrough()) { 3778 pr_err("SNP: IOMMU is disabled or configured in passthrough mode, SNP cannot be supported"); 3779 return -EINVAL; 3780 } 3781 3782 /* 3783 * Prevent enabling SNP after IOMMU_ENABLED state because this process 3784 * affect how IOMMU driver sets up data structures and configures 3785 * IOMMU hardware. 3786 */ 3787 if (init_state > IOMMU_ENABLED) { 3788 pr_err("SNP: Too late to enable SNP for IOMMU.\n"); 3789 return -EINVAL; 3790 } 3791 3792 amd_iommu_snp_en = check_feature(FEATURE_SNP); 3793 if (!amd_iommu_snp_en) 3794 return -EINVAL; 3795 3796 pr_info("SNP enabled\n"); 3797 3798 /* Enforce IOMMU v1 pagetable when SNP is enabled. */ 3799 if (amd_iommu_pgtable != AMD_IOMMU_V1) { 3800 pr_warn("Force to using AMD IOMMU v1 page table due to SNP\n"); 3801 amd_iommu_pgtable = AMD_IOMMU_V1; 3802 } 3803 3804 return 0; 3805 } 3806 #endif 3807