1 // SPDX-License-Identifier: GPL-2.0 2 3 #define pr_fmt(fmt) "DMAR-IR: " fmt 4 5 #include <linux/interrupt.h> 6 #include <linux/dmar.h> 7 #include <linux/spinlock.h> 8 #include <linux/slab.h> 9 #include <linux/jiffies.h> 10 #include <linux/hpet.h> 11 #include <linux/pci.h> 12 #include <linux/irq.h> 13 #include <linux/acpi.h> 14 #include <linux/irqdomain.h> 15 #include <linux/crash_dump.h> 16 #include <asm/io_apic.h> 17 #include <asm/apic.h> 18 #include <asm/smp.h> 19 #include <asm/cpu.h> 20 #include <asm/irq_remapping.h> 21 #include <asm/pci-direct.h> 22 #include <asm/posted_intr.h> 23 24 #include "iommu.h" 25 #include "../irq_remapping.h" 26 #include "../iommu-pages.h" 27 28 enum irq_mode { 29 IRQ_REMAPPING, 30 IRQ_POSTING, 31 }; 32 33 struct ioapic_scope { 34 struct intel_iommu *iommu; 35 unsigned int id; 36 unsigned int bus; /* PCI bus number */ 37 unsigned int devfn; /* PCI devfn number */ 38 }; 39 40 struct hpet_scope { 41 struct intel_iommu *iommu; 42 u8 id; 43 unsigned int bus; 44 unsigned int devfn; 45 }; 46 47 struct irq_2_iommu { 48 struct intel_iommu *iommu; 49 u16 irte_index; 50 u16 sub_handle; 51 u8 irte_mask; 52 enum irq_mode mode; 53 bool posted_msi; 54 }; 55 56 struct intel_ir_data { 57 struct irq_2_iommu irq_2_iommu; 58 struct irte irte_entry; 59 union { 60 struct msi_msg msi_entry; 61 }; 62 }; 63 64 #define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0) 65 #define IRTE_DEST(dest) ((eim_mode) ? dest : dest << 8) 66 67 static int __read_mostly eim_mode; 68 static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; 69 static struct hpet_scope ir_hpet[MAX_HPET_TBS]; 70 71 /* 72 * Lock ordering: 73 * ->dmar_global_lock 74 * ->irq_2_ir_lock 75 * ->qi->q_lock 76 * ->iommu->register_lock 77 * Note: 78 * intel_irq_remap_ops.{supported,prepare,enable,disable,reenable} are called 79 * in single-threaded environment with interrupt disabled, so no need to tabke 80 * the dmar_global_lock. 81 */ 82 DEFINE_RAW_SPINLOCK(irq_2_ir_lock); 83 static const struct irq_domain_ops intel_ir_domain_ops; 84 85 static void iommu_disable_irq_remapping(struct intel_iommu *iommu); 86 static int __init parse_ioapics_under_ir(void); 87 static const struct msi_parent_ops dmar_msi_parent_ops; 88 89 static bool ir_pre_enabled(struct intel_iommu *iommu) 90 { 91 return (iommu->flags & VTD_FLAG_IRQ_REMAP_PRE_ENABLED); 92 } 93 94 static void clear_ir_pre_enabled(struct intel_iommu *iommu) 95 { 96 iommu->flags &= ~VTD_FLAG_IRQ_REMAP_PRE_ENABLED; 97 } 98 99 static void init_ir_status(struct intel_iommu *iommu) 100 { 101 u32 gsts; 102 103 gsts = readl(iommu->reg + DMAR_GSTS_REG); 104 if (gsts & DMA_GSTS_IRES) 105 iommu->flags |= VTD_FLAG_IRQ_REMAP_PRE_ENABLED; 106 } 107 108 static int alloc_irte(struct intel_iommu *iommu, 109 struct irq_2_iommu *irq_iommu, u16 count) 110 { 111 struct ir_table *table = iommu->ir_table; 112 unsigned int mask = 0; 113 unsigned long flags; 114 int index; 115 116 if (!count || !irq_iommu) 117 return -1; 118 119 if (count > 1) { 120 count = __roundup_pow_of_two(count); 121 mask = ilog2(count); 122 } 123 124 if (mask > ecap_max_handle_mask(iommu->ecap)) { 125 pr_err("Requested mask %x exceeds the max invalidation handle" 126 " mask value %Lx\n", mask, 127 ecap_max_handle_mask(iommu->ecap)); 128 return -1; 129 } 130 131 raw_spin_lock_irqsave(&irq_2_ir_lock, flags); 132 index = bitmap_find_free_region(table->bitmap, 133 INTR_REMAP_TABLE_ENTRIES, mask); 134 if (index < 0) { 135 pr_warn("IR%d: can't allocate an IRTE\n", iommu->seq_id); 136 } else { 137 irq_iommu->iommu = iommu; 138 irq_iommu->irte_index = index; 139 irq_iommu->sub_handle = 0; 140 irq_iommu->irte_mask = mask; 141 irq_iommu->mode = IRQ_REMAPPING; 142 } 143 raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); 144 145 return index; 146 } 147 148 static int qi_flush_iec(struct intel_iommu *iommu, int index, int mask) 149 { 150 struct qi_desc desc; 151 152 desc.qw0 = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask) 153 | QI_IEC_SELECTIVE; 154 desc.qw1 = 0; 155 desc.qw2 = 0; 156 desc.qw3 = 0; 157 158 return qi_submit_sync(iommu, &desc, 1, 0); 159 } 160 161 static int modify_irte(struct irq_2_iommu *irq_iommu, 162 struct irte *irte_modified) 163 { 164 struct intel_iommu *iommu; 165 unsigned long flags; 166 struct irte *irte; 167 int rc, index; 168 169 if (!irq_iommu) 170 return -1; 171 172 raw_spin_lock_irqsave(&irq_2_ir_lock, flags); 173 174 iommu = irq_iommu->iommu; 175 176 index = irq_iommu->irte_index + irq_iommu->sub_handle; 177 irte = &iommu->ir_table->base[index]; 178 179 if ((irte->pst == 1) || (irte_modified->pst == 1)) { 180 /* 181 * We use cmpxchg16 to atomically update the 128-bit IRTE, 182 * and it cannot be updated by the hardware or other processors 183 * behind us, so the return value of cmpxchg16 should be the 184 * same as the old value. 185 */ 186 u128 old = irte->irte; 187 WARN_ON(!try_cmpxchg128(&irte->irte, &old, irte_modified->irte)); 188 } else { 189 WRITE_ONCE(irte->low, irte_modified->low); 190 WRITE_ONCE(irte->high, irte_modified->high); 191 } 192 __iommu_flush_cache(iommu, irte, sizeof(*irte)); 193 194 rc = qi_flush_iec(iommu, index, 0); 195 196 /* Update iommu mode according to the IRTE mode */ 197 irq_iommu->mode = irte->pst ? IRQ_POSTING : IRQ_REMAPPING; 198 raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); 199 200 return rc; 201 } 202 203 static struct intel_iommu *map_hpet_to_iommu(u8 hpet_id) 204 { 205 int i; 206 207 for (i = 0; i < MAX_HPET_TBS; i++) { 208 if (ir_hpet[i].id == hpet_id && ir_hpet[i].iommu) 209 return ir_hpet[i].iommu; 210 } 211 return NULL; 212 } 213 214 static struct intel_iommu *map_ioapic_to_iommu(int apic) 215 { 216 int i; 217 218 for (i = 0; i < MAX_IO_APICS; i++) { 219 if (ir_ioapic[i].id == apic && ir_ioapic[i].iommu) 220 return ir_ioapic[i].iommu; 221 } 222 return NULL; 223 } 224 225 static struct irq_domain *map_dev_to_ir(struct pci_dev *dev) 226 { 227 struct dmar_drhd_unit *drhd = dmar_find_matched_drhd_unit(dev); 228 229 return drhd ? drhd->iommu->ir_domain : NULL; 230 } 231 232 static int clear_entries(struct irq_2_iommu *irq_iommu) 233 { 234 struct irte *start, *entry, *end; 235 struct intel_iommu *iommu; 236 int index; 237 238 if (irq_iommu->sub_handle) 239 return 0; 240 241 iommu = irq_iommu->iommu; 242 index = irq_iommu->irte_index; 243 244 start = iommu->ir_table->base + index; 245 end = start + (1 << irq_iommu->irte_mask); 246 247 for (entry = start; entry < end; entry++) { 248 WRITE_ONCE(entry->low, 0); 249 WRITE_ONCE(entry->high, 0); 250 } 251 bitmap_release_region(iommu->ir_table->bitmap, index, 252 irq_iommu->irte_mask); 253 254 return qi_flush_iec(iommu, index, irq_iommu->irte_mask); 255 } 256 257 /* 258 * source validation type 259 */ 260 #define SVT_NO_VERIFY 0x0 /* no verification is required */ 261 #define SVT_VERIFY_SID_SQ 0x1 /* verify using SID and SQ fields */ 262 #define SVT_VERIFY_BUS 0x2 /* verify bus of request-id */ 263 264 /* 265 * source-id qualifier 266 */ 267 #define SQ_ALL_16 0x0 /* verify all 16 bits of request-id */ 268 #define SQ_13_IGNORE_1 0x1 /* verify most significant 13 bits, ignore 269 * the third least significant bit 270 */ 271 #define SQ_13_IGNORE_2 0x2 /* verify most significant 13 bits, ignore 272 * the second and third least significant bits 273 */ 274 #define SQ_13_IGNORE_3 0x3 /* verify most significant 13 bits, ignore 275 * the least three significant bits 276 */ 277 278 /* 279 * set SVT, SQ and SID fields of irte to verify 280 * source ids of interrupt requests 281 */ 282 static void set_irte_sid(struct irte *irte, unsigned int svt, 283 unsigned int sq, unsigned int sid) 284 { 285 if (disable_sourceid_checking) 286 svt = SVT_NO_VERIFY; 287 irte->svt = svt; 288 irte->sq = sq; 289 irte->sid = sid; 290 } 291 292 /* 293 * Set an IRTE to match only the bus number. Interrupt requests that reference 294 * this IRTE must have a requester-id whose bus number is between or equal 295 * to the start_bus and end_bus arguments. 296 */ 297 static void set_irte_verify_bus(struct irte *irte, unsigned int start_bus, 298 unsigned int end_bus) 299 { 300 set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16, 301 (start_bus << 8) | end_bus); 302 } 303 304 static int set_ioapic_sid(struct irte *irte, int apic) 305 { 306 int i; 307 u16 sid = 0; 308 309 if (!irte) 310 return -1; 311 312 for (i = 0; i < MAX_IO_APICS; i++) { 313 if (ir_ioapic[i].iommu && ir_ioapic[i].id == apic) { 314 sid = PCI_DEVID(ir_ioapic[i].bus, ir_ioapic[i].devfn); 315 break; 316 } 317 } 318 319 if (sid == 0) { 320 pr_warn("Failed to set source-id of IOAPIC (%d)\n", apic); 321 return -1; 322 } 323 324 set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, sid); 325 326 return 0; 327 } 328 329 static int set_hpet_sid(struct irte *irte, u8 id) 330 { 331 int i; 332 u16 sid = 0; 333 334 if (!irte) 335 return -1; 336 337 for (i = 0; i < MAX_HPET_TBS; i++) { 338 if (ir_hpet[i].iommu && ir_hpet[i].id == id) { 339 sid = PCI_DEVID(ir_hpet[i].bus, ir_hpet[i].devfn); 340 break; 341 } 342 } 343 344 if (sid == 0) { 345 pr_warn("Failed to set source-id of HPET block (%d)\n", id); 346 return -1; 347 } 348 349 /* 350 * Should really use SQ_ALL_16. Some platforms are broken. 351 * While we figure out the right quirks for these broken platforms, use 352 * SQ_13_IGNORE_3 for now. 353 */ 354 set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_13_IGNORE_3, sid); 355 356 return 0; 357 } 358 359 struct set_msi_sid_data { 360 struct pci_dev *pdev; 361 u16 alias; 362 int count; 363 int busmatch_count; 364 }; 365 366 static int set_msi_sid_cb(struct pci_dev *pdev, u16 alias, void *opaque) 367 { 368 struct set_msi_sid_data *data = opaque; 369 370 if (data->count == 0 || PCI_BUS_NUM(alias) == PCI_BUS_NUM(data->alias)) 371 data->busmatch_count++; 372 373 data->pdev = pdev; 374 data->alias = alias; 375 data->count++; 376 377 return 0; 378 } 379 380 static int set_msi_sid(struct irte *irte, struct pci_dev *dev) 381 { 382 struct set_msi_sid_data data; 383 384 if (!irte || !dev) 385 return -1; 386 387 data.count = 0; 388 data.busmatch_count = 0; 389 pci_for_each_dma_alias(dev, set_msi_sid_cb, &data); 390 391 /* 392 * DMA alias provides us with a PCI device and alias. The only case 393 * where the it will return an alias on a different bus than the 394 * device is the case of a PCIe-to-PCI bridge, where the alias is for 395 * the subordinate bus. In this case we can only verify the bus. 396 * 397 * If there are multiple aliases, all with the same bus number, 398 * then all we can do is verify the bus. This is typical in NTB 399 * hardware which use proxy IDs where the device will generate traffic 400 * from multiple devfn numbers on the same bus. 401 * 402 * If the alias device is on a different bus than our source device 403 * then we have a topology based alias, use it. 404 * 405 * Otherwise, the alias is for a device DMA quirk and we cannot 406 * assume that MSI uses the same requester ID. Therefore use the 407 * original device. 408 */ 409 if (PCI_BUS_NUM(data.alias) != data.pdev->bus->number) 410 set_irte_verify_bus(irte, PCI_BUS_NUM(data.alias), 411 dev->bus->number); 412 else if (data.count >= 2 && data.busmatch_count == data.count) 413 set_irte_verify_bus(irte, dev->bus->number, dev->bus->number); 414 else if (data.pdev->bus->number != dev->bus->number) 415 set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, data.alias); 416 else 417 set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, 418 pci_dev_id(dev)); 419 420 return 0; 421 } 422 423 static int iommu_load_old_irte(struct intel_iommu *iommu) 424 { 425 struct irte *old_ir_table; 426 phys_addr_t irt_phys; 427 unsigned int i; 428 size_t size; 429 u64 irta; 430 431 /* Check whether the old ir-table has the same size as ours */ 432 irta = dmar_readq(iommu->reg + DMAR_IRTA_REG); 433 if ((irta & INTR_REMAP_TABLE_REG_SIZE_MASK) 434 != INTR_REMAP_TABLE_REG_SIZE) 435 return -EINVAL; 436 437 irt_phys = irta & VTD_PAGE_MASK; 438 size = INTR_REMAP_TABLE_ENTRIES*sizeof(struct irte); 439 440 /* Map the old IR table */ 441 old_ir_table = memremap(irt_phys, size, MEMREMAP_WB); 442 if (!old_ir_table) 443 return -ENOMEM; 444 445 /* Copy data over */ 446 memcpy(iommu->ir_table->base, old_ir_table, size); 447 448 __iommu_flush_cache(iommu, iommu->ir_table->base, size); 449 450 /* 451 * Now check the table for used entries and mark those as 452 * allocated in the bitmap 453 */ 454 for (i = 0; i < INTR_REMAP_TABLE_ENTRIES; i++) { 455 if (iommu->ir_table->base[i].present) 456 bitmap_set(iommu->ir_table->bitmap, i, 1); 457 } 458 459 memunmap(old_ir_table); 460 461 return 0; 462 } 463 464 465 static void iommu_set_irq_remapping(struct intel_iommu *iommu, int mode) 466 { 467 unsigned long flags; 468 u64 addr; 469 u32 sts; 470 471 addr = virt_to_phys((void *)iommu->ir_table->base); 472 473 raw_spin_lock_irqsave(&iommu->register_lock, flags); 474 475 dmar_writeq(iommu->reg + DMAR_IRTA_REG, 476 (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE); 477 478 /* Set interrupt-remapping table pointer */ 479 writel(iommu->gcmd | DMA_GCMD_SIRTP, iommu->reg + DMAR_GCMD_REG); 480 481 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 482 readl, (sts & DMA_GSTS_IRTPS), sts); 483 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 484 485 /* 486 * Global invalidation of interrupt entry cache to make sure the 487 * hardware uses the new irq remapping table. 488 */ 489 if (!cap_esirtps(iommu->cap)) 490 qi_global_iec(iommu); 491 } 492 493 static void iommu_enable_irq_remapping(struct intel_iommu *iommu) 494 { 495 unsigned long flags; 496 u32 sts; 497 498 raw_spin_lock_irqsave(&iommu->register_lock, flags); 499 500 /* Enable interrupt-remapping */ 501 iommu->gcmd |= DMA_GCMD_IRE; 502 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); 503 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 504 readl, (sts & DMA_GSTS_IRES), sts); 505 506 /* Block compatibility-format MSIs */ 507 if (sts & DMA_GSTS_CFIS) { 508 iommu->gcmd &= ~DMA_GCMD_CFI; 509 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); 510 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 511 readl, !(sts & DMA_GSTS_CFIS), sts); 512 } 513 514 /* 515 * With CFI clear in the Global Command register, we should be 516 * protected from dangerous (i.e. compatibility) interrupts 517 * regardless of x2apic status. Check just to be sure. 518 */ 519 if (sts & DMA_GSTS_CFIS) 520 WARN(1, KERN_WARNING 521 "Compatibility-format IRQs enabled despite intr remapping;\n" 522 "you are vulnerable to IRQ injection.\n"); 523 524 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 525 } 526 527 static int intel_setup_irq_remapping(struct intel_iommu *iommu) 528 { 529 struct ir_table *ir_table; 530 struct fwnode_handle *fn; 531 unsigned long *bitmap; 532 void *ir_table_base; 533 534 if (iommu->ir_table) 535 return 0; 536 537 ir_table = kzalloc(sizeof(struct ir_table), GFP_KERNEL); 538 if (!ir_table) 539 return -ENOMEM; 540 541 ir_table_base = iommu_alloc_pages_node(iommu->node, GFP_KERNEL, 542 INTR_REMAP_PAGE_ORDER); 543 if (!ir_table_base) { 544 pr_err("IR%d: failed to allocate pages of order %d\n", 545 iommu->seq_id, INTR_REMAP_PAGE_ORDER); 546 goto out_free_table; 547 } 548 549 bitmap = bitmap_zalloc(INTR_REMAP_TABLE_ENTRIES, GFP_KERNEL); 550 if (bitmap == NULL) { 551 pr_err("IR%d: failed to allocate bitmap\n", iommu->seq_id); 552 goto out_free_pages; 553 } 554 555 fn = irq_domain_alloc_named_id_fwnode("INTEL-IR", iommu->seq_id); 556 if (!fn) 557 goto out_free_bitmap; 558 559 iommu->ir_domain = 560 irq_domain_create_hierarchy(arch_get_ir_parent_domain(), 561 0, INTR_REMAP_TABLE_ENTRIES, 562 fn, &intel_ir_domain_ops, 563 iommu); 564 if (!iommu->ir_domain) { 565 pr_err("IR%d: failed to allocate irqdomain\n", iommu->seq_id); 566 goto out_free_fwnode; 567 } 568 569 irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_DMAR); 570 iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT | 571 IRQ_DOMAIN_FLAG_ISOLATED_MSI; 572 iommu->ir_domain->msi_parent_ops = &dmar_msi_parent_ops; 573 574 ir_table->base = ir_table_base; 575 ir_table->bitmap = bitmap; 576 iommu->ir_table = ir_table; 577 578 /* 579 * If the queued invalidation is already initialized, 580 * shouldn't disable it. 581 */ 582 if (!iommu->qi) { 583 /* 584 * Clear previous faults. 585 */ 586 dmar_fault(-1, iommu); 587 dmar_disable_qi(iommu); 588 589 if (dmar_enable_qi(iommu)) { 590 pr_err("Failed to enable queued invalidation\n"); 591 goto out_free_ir_domain; 592 } 593 } 594 595 init_ir_status(iommu); 596 597 if (ir_pre_enabled(iommu)) { 598 if (!is_kdump_kernel()) { 599 pr_info_once("IRQ remapping was enabled on %s but we are not in kdump mode\n", 600 iommu->name); 601 clear_ir_pre_enabled(iommu); 602 iommu_disable_irq_remapping(iommu); 603 } else if (iommu_load_old_irte(iommu)) 604 pr_err("Failed to copy IR table for %s from previous kernel\n", 605 iommu->name); 606 else 607 pr_info("Copied IR table for %s from previous kernel\n", 608 iommu->name); 609 } 610 611 iommu_set_irq_remapping(iommu, eim_mode); 612 613 return 0; 614 615 out_free_ir_domain: 616 irq_domain_remove(iommu->ir_domain); 617 iommu->ir_domain = NULL; 618 out_free_fwnode: 619 irq_domain_free_fwnode(fn); 620 out_free_bitmap: 621 bitmap_free(bitmap); 622 out_free_pages: 623 iommu_free_pages(ir_table_base, INTR_REMAP_PAGE_ORDER); 624 out_free_table: 625 kfree(ir_table); 626 627 iommu->ir_table = NULL; 628 629 return -ENOMEM; 630 } 631 632 static void intel_teardown_irq_remapping(struct intel_iommu *iommu) 633 { 634 struct fwnode_handle *fn; 635 636 if (iommu && iommu->ir_table) { 637 if (iommu->ir_domain) { 638 fn = iommu->ir_domain->fwnode; 639 640 irq_domain_remove(iommu->ir_domain); 641 irq_domain_free_fwnode(fn); 642 iommu->ir_domain = NULL; 643 } 644 iommu_free_pages(iommu->ir_table->base, INTR_REMAP_PAGE_ORDER); 645 bitmap_free(iommu->ir_table->bitmap); 646 kfree(iommu->ir_table); 647 iommu->ir_table = NULL; 648 } 649 } 650 651 /* 652 * Disable Interrupt Remapping. 653 */ 654 static void iommu_disable_irq_remapping(struct intel_iommu *iommu) 655 { 656 unsigned long flags; 657 u32 sts; 658 659 if (!ecap_ir_support(iommu->ecap)) 660 return; 661 662 /* 663 * global invalidation of interrupt entry cache before disabling 664 * interrupt-remapping. 665 */ 666 if (!cap_esirtps(iommu->cap)) 667 qi_global_iec(iommu); 668 669 raw_spin_lock_irqsave(&iommu->register_lock, flags); 670 671 sts = readl(iommu->reg + DMAR_GSTS_REG); 672 if (!(sts & DMA_GSTS_IRES)) 673 goto end; 674 675 iommu->gcmd &= ~DMA_GCMD_IRE; 676 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); 677 678 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 679 readl, !(sts & DMA_GSTS_IRES), sts); 680 681 end: 682 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 683 } 684 685 static int __init dmar_x2apic_optout(void) 686 { 687 struct acpi_table_dmar *dmar; 688 dmar = (struct acpi_table_dmar *)dmar_tbl; 689 if (!dmar || no_x2apic_optout) 690 return 0; 691 return dmar->flags & DMAR_X2APIC_OPT_OUT; 692 } 693 694 static void __init intel_cleanup_irq_remapping(void) 695 { 696 struct dmar_drhd_unit *drhd; 697 struct intel_iommu *iommu; 698 699 for_each_iommu(iommu, drhd) { 700 if (ecap_ir_support(iommu->ecap)) { 701 iommu_disable_irq_remapping(iommu); 702 intel_teardown_irq_remapping(iommu); 703 } 704 } 705 706 if (x2apic_supported()) 707 pr_warn("Failed to enable irq remapping. You are vulnerable to irq-injection attacks.\n"); 708 } 709 710 static int __init intel_prepare_irq_remapping(void) 711 { 712 struct dmar_drhd_unit *drhd; 713 struct intel_iommu *iommu; 714 int eim = 0; 715 716 if (irq_remap_broken) { 717 pr_warn("This system BIOS has enabled interrupt remapping\n" 718 "on a chipset that contains an erratum making that\n" 719 "feature unstable. To maintain system stability\n" 720 "interrupt remapping is being disabled. Please\n" 721 "contact your BIOS vendor for an update\n"); 722 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); 723 return -ENODEV; 724 } 725 726 if (dmar_table_init() < 0) 727 return -ENODEV; 728 729 if (!dmar_ir_support()) 730 return -ENODEV; 731 732 if (parse_ioapics_under_ir()) { 733 pr_info("Not enabling interrupt remapping\n"); 734 goto error; 735 } 736 737 /* First make sure all IOMMUs support IRQ remapping */ 738 for_each_iommu(iommu, drhd) 739 if (!ecap_ir_support(iommu->ecap)) 740 goto error; 741 742 /* Detect remapping mode: lapic or x2apic */ 743 if (x2apic_supported()) { 744 eim = !dmar_x2apic_optout(); 745 if (!eim) { 746 pr_info("x2apic is disabled because BIOS sets x2apic opt out bit."); 747 pr_info("Use 'intremap=no_x2apic_optout' to override the BIOS setting.\n"); 748 } 749 } 750 751 for_each_iommu(iommu, drhd) { 752 if (eim && !ecap_eim_support(iommu->ecap)) { 753 pr_info("%s does not support EIM\n", iommu->name); 754 eim = 0; 755 } 756 } 757 758 eim_mode = eim; 759 if (eim) 760 pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n"); 761 762 /* Do the initializations early */ 763 for_each_iommu(iommu, drhd) { 764 if (intel_setup_irq_remapping(iommu)) { 765 pr_err("Failed to setup irq remapping for %s\n", 766 iommu->name); 767 goto error; 768 } 769 } 770 771 return 0; 772 773 error: 774 intel_cleanup_irq_remapping(); 775 return -ENODEV; 776 } 777 778 /* 779 * Set Posted-Interrupts capability. 780 */ 781 static inline void set_irq_posting_cap(void) 782 { 783 struct dmar_drhd_unit *drhd; 784 struct intel_iommu *iommu; 785 786 if (!disable_irq_post) { 787 /* 788 * If IRTE is in posted format, the 'pda' field goes across the 789 * 64-bit boundary, we need use cmpxchg16b to atomically update 790 * it. We only expose posted-interrupt when X86_FEATURE_CX16 791 * is supported. Actually, hardware platforms supporting PI 792 * should have X86_FEATURE_CX16 support, this has been confirmed 793 * with Intel hardware guys. 794 */ 795 if (boot_cpu_has(X86_FEATURE_CX16)) 796 intel_irq_remap_ops.capability |= 1 << IRQ_POSTING_CAP; 797 798 for_each_iommu(iommu, drhd) 799 if (!cap_pi_support(iommu->cap)) { 800 intel_irq_remap_ops.capability &= 801 ~(1 << IRQ_POSTING_CAP); 802 break; 803 } 804 } 805 } 806 807 static int __init intel_enable_irq_remapping(void) 808 { 809 struct dmar_drhd_unit *drhd; 810 struct intel_iommu *iommu; 811 bool setup = false; 812 813 /* 814 * Setup Interrupt-remapping for all the DRHD's now. 815 */ 816 for_each_iommu(iommu, drhd) { 817 if (!ir_pre_enabled(iommu)) 818 iommu_enable_irq_remapping(iommu); 819 setup = true; 820 } 821 822 if (!setup) 823 goto error; 824 825 irq_remapping_enabled = 1; 826 827 set_irq_posting_cap(); 828 829 pr_info("Enabled IRQ remapping in %s mode\n", eim_mode ? "x2apic" : "xapic"); 830 831 return eim_mode ? IRQ_REMAP_X2APIC_MODE : IRQ_REMAP_XAPIC_MODE; 832 833 error: 834 intel_cleanup_irq_remapping(); 835 return -1; 836 } 837 838 static int ir_parse_one_hpet_scope(struct acpi_dmar_device_scope *scope, 839 struct intel_iommu *iommu, 840 struct acpi_dmar_hardware_unit *drhd) 841 { 842 struct acpi_dmar_pci_path *path; 843 u8 bus; 844 int count, free = -1; 845 846 bus = scope->bus; 847 path = (struct acpi_dmar_pci_path *)(scope + 1); 848 count = (scope->length - sizeof(struct acpi_dmar_device_scope)) 849 / sizeof(struct acpi_dmar_pci_path); 850 851 while (--count > 0) { 852 /* 853 * Access PCI directly due to the PCI 854 * subsystem isn't initialized yet. 855 */ 856 bus = read_pci_config_byte(bus, path->device, path->function, 857 PCI_SECONDARY_BUS); 858 path++; 859 } 860 861 for (count = 0; count < MAX_HPET_TBS; count++) { 862 if (ir_hpet[count].iommu == iommu && 863 ir_hpet[count].id == scope->enumeration_id) 864 return 0; 865 else if (ir_hpet[count].iommu == NULL && free == -1) 866 free = count; 867 } 868 if (free == -1) { 869 pr_warn("Exceeded Max HPET blocks\n"); 870 return -ENOSPC; 871 } 872 873 ir_hpet[free].iommu = iommu; 874 ir_hpet[free].id = scope->enumeration_id; 875 ir_hpet[free].bus = bus; 876 ir_hpet[free].devfn = PCI_DEVFN(path->device, path->function); 877 pr_info("HPET id %d under DRHD base 0x%Lx\n", 878 scope->enumeration_id, drhd->address); 879 880 return 0; 881 } 882 883 static int ir_parse_one_ioapic_scope(struct acpi_dmar_device_scope *scope, 884 struct intel_iommu *iommu, 885 struct acpi_dmar_hardware_unit *drhd) 886 { 887 struct acpi_dmar_pci_path *path; 888 u8 bus; 889 int count, free = -1; 890 891 bus = scope->bus; 892 path = (struct acpi_dmar_pci_path *)(scope + 1); 893 count = (scope->length - sizeof(struct acpi_dmar_device_scope)) 894 / sizeof(struct acpi_dmar_pci_path); 895 896 while (--count > 0) { 897 /* 898 * Access PCI directly due to the PCI 899 * subsystem isn't initialized yet. 900 */ 901 bus = read_pci_config_byte(bus, path->device, path->function, 902 PCI_SECONDARY_BUS); 903 path++; 904 } 905 906 for (count = 0; count < MAX_IO_APICS; count++) { 907 if (ir_ioapic[count].iommu == iommu && 908 ir_ioapic[count].id == scope->enumeration_id) 909 return 0; 910 else if (ir_ioapic[count].iommu == NULL && free == -1) 911 free = count; 912 } 913 if (free == -1) { 914 pr_warn("Exceeded Max IO APICS\n"); 915 return -ENOSPC; 916 } 917 918 ir_ioapic[free].bus = bus; 919 ir_ioapic[free].devfn = PCI_DEVFN(path->device, path->function); 920 ir_ioapic[free].iommu = iommu; 921 ir_ioapic[free].id = scope->enumeration_id; 922 pr_info("IOAPIC id %d under DRHD base 0x%Lx IOMMU %d\n", 923 scope->enumeration_id, drhd->address, iommu->seq_id); 924 925 return 0; 926 } 927 928 static int ir_parse_ioapic_hpet_scope(struct acpi_dmar_header *header, 929 struct intel_iommu *iommu) 930 { 931 int ret = 0; 932 struct acpi_dmar_hardware_unit *drhd; 933 struct acpi_dmar_device_scope *scope; 934 void *start, *end; 935 936 drhd = (struct acpi_dmar_hardware_unit *)header; 937 start = (void *)(drhd + 1); 938 end = ((void *)drhd) + header->length; 939 940 while (start < end && ret == 0) { 941 scope = start; 942 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC) 943 ret = ir_parse_one_ioapic_scope(scope, iommu, drhd); 944 else if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_HPET) 945 ret = ir_parse_one_hpet_scope(scope, iommu, drhd); 946 start += scope->length; 947 } 948 949 return ret; 950 } 951 952 static void ir_remove_ioapic_hpet_scope(struct intel_iommu *iommu) 953 { 954 int i; 955 956 for (i = 0; i < MAX_HPET_TBS; i++) 957 if (ir_hpet[i].iommu == iommu) 958 ir_hpet[i].iommu = NULL; 959 960 for (i = 0; i < MAX_IO_APICS; i++) 961 if (ir_ioapic[i].iommu == iommu) 962 ir_ioapic[i].iommu = NULL; 963 } 964 965 /* 966 * Finds the assocaition between IOAPIC's and its Interrupt-remapping 967 * hardware unit. 968 */ 969 static int __init parse_ioapics_under_ir(void) 970 { 971 struct dmar_drhd_unit *drhd; 972 struct intel_iommu *iommu; 973 bool ir_supported = false; 974 int ioapic_idx; 975 976 for_each_iommu(iommu, drhd) { 977 int ret; 978 979 if (!ecap_ir_support(iommu->ecap)) 980 continue; 981 982 ret = ir_parse_ioapic_hpet_scope(drhd->hdr, iommu); 983 if (ret) 984 return ret; 985 986 ir_supported = true; 987 } 988 989 if (!ir_supported) 990 return -ENODEV; 991 992 for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) { 993 int ioapic_id = mpc_ioapic_id(ioapic_idx); 994 if (!map_ioapic_to_iommu(ioapic_id)) { 995 pr_err(FW_BUG "ioapic %d has no mapping iommu, " 996 "interrupt remapping will be disabled\n", 997 ioapic_id); 998 return -1; 999 } 1000 } 1001 1002 return 0; 1003 } 1004 1005 static int __init ir_dev_scope_init(void) 1006 { 1007 int ret; 1008 1009 if (!irq_remapping_enabled) 1010 return 0; 1011 1012 down_write(&dmar_global_lock); 1013 ret = dmar_dev_scope_init(); 1014 up_write(&dmar_global_lock); 1015 1016 return ret; 1017 } 1018 rootfs_initcall(ir_dev_scope_init); 1019 1020 static void disable_irq_remapping(void) 1021 { 1022 struct dmar_drhd_unit *drhd; 1023 struct intel_iommu *iommu = NULL; 1024 1025 /* 1026 * Disable Interrupt-remapping for all the DRHD's now. 1027 */ 1028 for_each_iommu(iommu, drhd) { 1029 if (!ecap_ir_support(iommu->ecap)) 1030 continue; 1031 1032 iommu_disable_irq_remapping(iommu); 1033 } 1034 1035 /* 1036 * Clear Posted-Interrupts capability. 1037 */ 1038 if (!disable_irq_post) 1039 intel_irq_remap_ops.capability &= ~(1 << IRQ_POSTING_CAP); 1040 } 1041 1042 static int reenable_irq_remapping(int eim) 1043 { 1044 struct dmar_drhd_unit *drhd; 1045 bool setup = false; 1046 struct intel_iommu *iommu = NULL; 1047 1048 for_each_iommu(iommu, drhd) 1049 if (iommu->qi) 1050 dmar_reenable_qi(iommu); 1051 1052 /* 1053 * Setup Interrupt-remapping for all the DRHD's now. 1054 */ 1055 for_each_iommu(iommu, drhd) { 1056 if (!ecap_ir_support(iommu->ecap)) 1057 continue; 1058 1059 /* Set up interrupt remapping for iommu.*/ 1060 iommu_set_irq_remapping(iommu, eim); 1061 iommu_enable_irq_remapping(iommu); 1062 setup = true; 1063 } 1064 1065 if (!setup) 1066 goto error; 1067 1068 set_irq_posting_cap(); 1069 1070 return 0; 1071 1072 error: 1073 /* 1074 * handle error condition gracefully here! 1075 */ 1076 return -1; 1077 } 1078 1079 /* 1080 * Store the MSI remapping domain pointer in the device if enabled. 1081 * 1082 * This is called from dmar_pci_bus_add_dev() so it works even when DMA 1083 * remapping is disabled. Only update the pointer if the device is not 1084 * already handled by a non default PCI/MSI interrupt domain. This protects 1085 * e.g. VMD devices. 1086 */ 1087 void intel_irq_remap_add_device(struct dmar_pci_notify_info *info) 1088 { 1089 if (!irq_remapping_enabled || !pci_dev_has_default_msi_parent_domain(info->dev)) 1090 return; 1091 1092 dev_set_msi_domain(&info->dev->dev, map_dev_to_ir(info->dev)); 1093 } 1094 1095 static void prepare_irte(struct irte *irte, int vector, unsigned int dest) 1096 { 1097 memset(irte, 0, sizeof(*irte)); 1098 1099 irte->present = 1; 1100 irte->dst_mode = apic->dest_mode_logical; 1101 /* 1102 * Trigger mode in the IRTE will always be edge, and for IO-APIC, the 1103 * actual level or edge trigger will be setup in the IO-APIC 1104 * RTE. This will help simplify level triggered irq migration. 1105 * For more details, see the comments (in io_apic.c) explainig IO-APIC 1106 * irq migration in the presence of interrupt-remapping. 1107 */ 1108 irte->trigger_mode = 0; 1109 irte->dlvry_mode = APIC_DELIVERY_MODE_FIXED; 1110 irte->vector = vector; 1111 irte->dest_id = IRTE_DEST(dest); 1112 irte->redir_hint = 1; 1113 } 1114 1115 static void prepare_irte_posted(struct irte *irte) 1116 { 1117 memset(irte, 0, sizeof(*irte)); 1118 1119 irte->present = 1; 1120 irte->p_pst = 1; 1121 } 1122 1123 struct irq_remap_ops intel_irq_remap_ops = { 1124 .prepare = intel_prepare_irq_remapping, 1125 .enable = intel_enable_irq_remapping, 1126 .disable = disable_irq_remapping, 1127 .reenable = reenable_irq_remapping, 1128 .enable_faulting = enable_drhd_fault_handling, 1129 }; 1130 1131 #ifdef CONFIG_X86_POSTED_MSI 1132 1133 static phys_addr_t get_pi_desc_addr(struct irq_data *irqd) 1134 { 1135 int cpu = cpumask_first(irq_data_get_effective_affinity_mask(irqd)); 1136 1137 if (WARN_ON(cpu >= nr_cpu_ids)) 1138 return 0; 1139 1140 return __pa(per_cpu_ptr(&posted_msi_pi_desc, cpu)); 1141 } 1142 1143 static void intel_ir_reconfigure_irte_posted(struct irq_data *irqd) 1144 { 1145 struct intel_ir_data *ir_data = irqd->chip_data; 1146 struct irte *irte = &ir_data->irte_entry; 1147 struct irte irte_pi; 1148 u64 pid_addr; 1149 1150 pid_addr = get_pi_desc_addr(irqd); 1151 1152 if (!pid_addr) { 1153 pr_warn("Failed to setup IRQ %d for posted mode", irqd->irq); 1154 return; 1155 } 1156 1157 memset(&irte_pi, 0, sizeof(irte_pi)); 1158 1159 /* The shared IRTE already be set up as posted during alloc_irte */ 1160 dmar_copy_shared_irte(&irte_pi, irte); 1161 1162 irte_pi.pda_l = (pid_addr >> (32 - PDA_LOW_BIT)) & ~(-1UL << PDA_LOW_BIT); 1163 irte_pi.pda_h = (pid_addr >> 32) & ~(-1UL << PDA_HIGH_BIT); 1164 1165 modify_irte(&ir_data->irq_2_iommu, &irte_pi); 1166 } 1167 1168 #else 1169 static inline void intel_ir_reconfigure_irte_posted(struct irq_data *irqd) {} 1170 #endif 1171 1172 static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force) 1173 { 1174 struct intel_ir_data *ir_data = irqd->chip_data; 1175 struct irte *irte = &ir_data->irte_entry; 1176 struct irq_cfg *cfg = irqd_cfg(irqd); 1177 1178 /* 1179 * Atomically updates the IRTE with the new destination, vector 1180 * and flushes the interrupt entry cache. 1181 */ 1182 irte->vector = cfg->vector; 1183 irte->dest_id = IRTE_DEST(cfg->dest_apicid); 1184 1185 if (ir_data->irq_2_iommu.posted_msi) 1186 intel_ir_reconfigure_irte_posted(irqd); 1187 else if (force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING) 1188 modify_irte(&ir_data->irq_2_iommu, irte); 1189 } 1190 1191 /* 1192 * Migrate the IO-APIC irq in the presence of intr-remapping. 1193 * 1194 * For both level and edge triggered, irq migration is a simple atomic 1195 * update(of vector and cpu destination) of IRTE and flush the hardware cache. 1196 * 1197 * For level triggered, we eliminate the io-apic RTE modification (with the 1198 * updated vector information), by using a virtual vector (io-apic pin number). 1199 * Real vector that is used for interrupting cpu will be coming from 1200 * the interrupt-remapping table entry. 1201 * 1202 * As the migration is a simple atomic update of IRTE, the same mechanism 1203 * is used to migrate MSI irq's in the presence of interrupt-remapping. 1204 */ 1205 static int 1206 intel_ir_set_affinity(struct irq_data *data, const struct cpumask *mask, 1207 bool force) 1208 { 1209 struct irq_data *parent = data->parent_data; 1210 struct irq_cfg *cfg = irqd_cfg(data); 1211 int ret; 1212 1213 ret = parent->chip->irq_set_affinity(parent, mask, force); 1214 if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) 1215 return ret; 1216 1217 intel_ir_reconfigure_irte(data, false); 1218 /* 1219 * After this point, all the interrupts will start arriving 1220 * at the new destination. So, time to cleanup the previous 1221 * vector allocation. 1222 */ 1223 vector_schedule_cleanup(cfg); 1224 1225 return IRQ_SET_MASK_OK_DONE; 1226 } 1227 1228 static void intel_ir_compose_msi_msg(struct irq_data *irq_data, 1229 struct msi_msg *msg) 1230 { 1231 struct intel_ir_data *ir_data = irq_data->chip_data; 1232 1233 *msg = ir_data->msi_entry; 1234 } 1235 1236 static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info) 1237 { 1238 struct intel_ir_data *ir_data = data->chip_data; 1239 struct vcpu_data *vcpu_pi_info = info; 1240 1241 /* stop posting interrupts, back to the default mode */ 1242 if (!vcpu_pi_info) { 1243 modify_irte(&ir_data->irq_2_iommu, &ir_data->irte_entry); 1244 } else { 1245 struct irte irte_pi; 1246 1247 /* 1248 * We are not caching the posted interrupt entry. We 1249 * copy the data from the remapped entry and modify 1250 * the fields which are relevant for posted mode. The 1251 * cached remapped entry is used for switching back to 1252 * remapped mode. 1253 */ 1254 memset(&irte_pi, 0, sizeof(irte_pi)); 1255 dmar_copy_shared_irte(&irte_pi, &ir_data->irte_entry); 1256 1257 /* Update the posted mode fields */ 1258 irte_pi.p_pst = 1; 1259 irte_pi.p_urgent = 0; 1260 irte_pi.p_vector = vcpu_pi_info->vector; 1261 irte_pi.pda_l = (vcpu_pi_info->pi_desc_addr >> 1262 (32 - PDA_LOW_BIT)) & ~(-1UL << PDA_LOW_BIT); 1263 irte_pi.pda_h = (vcpu_pi_info->pi_desc_addr >> 32) & 1264 ~(-1UL << PDA_HIGH_BIT); 1265 1266 modify_irte(&ir_data->irq_2_iommu, &irte_pi); 1267 } 1268 1269 return 0; 1270 } 1271 1272 static struct irq_chip intel_ir_chip = { 1273 .name = "INTEL-IR", 1274 .irq_ack = apic_ack_irq, 1275 .irq_set_affinity = intel_ir_set_affinity, 1276 .irq_compose_msi_msg = intel_ir_compose_msi_msg, 1277 .irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity, 1278 }; 1279 1280 /* 1281 * With posted MSIs, all vectors are multiplexed into a single notification 1282 * vector. Devices MSIs are then dispatched in a demux loop where 1283 * EOIs can be coalesced as well. 1284 * 1285 * "INTEL-IR-POST" IRQ chip does not do EOI on ACK, thus the dummy irq_ack() 1286 * function. Instead EOI is performed by the posted interrupt notification 1287 * handler. 1288 * 1289 * For the example below, 3 MSIs are coalesced into one CPU notification. Only 1290 * one apic_eoi() is needed. 1291 * 1292 * __sysvec_posted_msi_notification() 1293 * irq_enter(); 1294 * handle_edge_irq() 1295 * irq_chip_ack_parent() 1296 * dummy(); // No EOI 1297 * handle_irq_event() 1298 * driver_handler() 1299 * handle_edge_irq() 1300 * irq_chip_ack_parent() 1301 * dummy(); // No EOI 1302 * handle_irq_event() 1303 * driver_handler() 1304 * handle_edge_irq() 1305 * irq_chip_ack_parent() 1306 * dummy(); // No EOI 1307 * handle_irq_event() 1308 * driver_handler() 1309 * apic_eoi() 1310 * irq_exit() 1311 */ 1312 1313 static void dummy_ack(struct irq_data *d) { } 1314 1315 static struct irq_chip intel_ir_chip_post_msi = { 1316 .name = "INTEL-IR-POST", 1317 .irq_ack = dummy_ack, 1318 .irq_set_affinity = intel_ir_set_affinity, 1319 .irq_compose_msi_msg = intel_ir_compose_msi_msg, 1320 .irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity, 1321 }; 1322 1323 static void fill_msi_msg(struct msi_msg *msg, u32 index, u32 subhandle) 1324 { 1325 memset(msg, 0, sizeof(*msg)); 1326 1327 msg->arch_addr_lo.dmar_base_address = X86_MSI_BASE_ADDRESS_LOW; 1328 msg->arch_addr_lo.dmar_subhandle_valid = true; 1329 msg->arch_addr_lo.dmar_format = true; 1330 msg->arch_addr_lo.dmar_index_0_14 = index & 0x7FFF; 1331 msg->arch_addr_lo.dmar_index_15 = !!(index & 0x8000); 1332 1333 msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH; 1334 1335 msg->arch_data.dmar_subhandle = subhandle; 1336 } 1337 1338 static void intel_irq_remapping_prepare_irte(struct intel_ir_data *data, 1339 struct irq_cfg *irq_cfg, 1340 struct irq_alloc_info *info, 1341 int index, int sub_handle) 1342 { 1343 struct irte *irte = &data->irte_entry; 1344 1345 prepare_irte(irte, irq_cfg->vector, irq_cfg->dest_apicid); 1346 1347 switch (info->type) { 1348 case X86_IRQ_ALLOC_TYPE_IOAPIC: 1349 /* Set source-id of interrupt request */ 1350 set_ioapic_sid(irte, info->devid); 1351 apic_pr_verbose("IOAPIC[%d]: Set IRTE entry (P:%d FPD:%d Dst_Mode:%d Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X Avail:%X Vector:%02X Dest:%08X SID:%04X SQ:%X SVT:%X)\n", 1352 info->devid, irte->present, irte->fpd, irte->dst_mode, 1353 irte->redir_hint, irte->trigger_mode, irte->dlvry_mode, 1354 irte->avail, irte->vector, irte->dest_id, irte->sid, 1355 irte->sq, irte->svt); 1356 sub_handle = info->ioapic.pin; 1357 break; 1358 case X86_IRQ_ALLOC_TYPE_HPET: 1359 set_hpet_sid(irte, info->devid); 1360 break; 1361 case X86_IRQ_ALLOC_TYPE_PCI_MSI: 1362 case X86_IRQ_ALLOC_TYPE_PCI_MSIX: 1363 if (posted_msi_supported()) { 1364 prepare_irte_posted(irte); 1365 data->irq_2_iommu.posted_msi = 1; 1366 } 1367 1368 set_msi_sid(irte, 1369 pci_real_dma_dev(msi_desc_to_pci_dev(info->desc))); 1370 break; 1371 default: 1372 BUG_ON(1); 1373 break; 1374 } 1375 fill_msi_msg(&data->msi_entry, index, sub_handle); 1376 } 1377 1378 static void intel_free_irq_resources(struct irq_domain *domain, 1379 unsigned int virq, unsigned int nr_irqs) 1380 { 1381 struct irq_data *irq_data; 1382 struct intel_ir_data *data; 1383 struct irq_2_iommu *irq_iommu; 1384 unsigned long flags; 1385 int i; 1386 for (i = 0; i < nr_irqs; i++) { 1387 irq_data = irq_domain_get_irq_data(domain, virq + i); 1388 if (irq_data && irq_data->chip_data) { 1389 data = irq_data->chip_data; 1390 irq_iommu = &data->irq_2_iommu; 1391 raw_spin_lock_irqsave(&irq_2_ir_lock, flags); 1392 clear_entries(irq_iommu); 1393 raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); 1394 irq_domain_reset_irq_data(irq_data); 1395 kfree(data); 1396 } 1397 } 1398 } 1399 1400 static int intel_irq_remapping_alloc(struct irq_domain *domain, 1401 unsigned int virq, unsigned int nr_irqs, 1402 void *arg) 1403 { 1404 struct intel_iommu *iommu = domain->host_data; 1405 struct irq_alloc_info *info = arg; 1406 struct intel_ir_data *data, *ird; 1407 struct irq_data *irq_data; 1408 struct irq_cfg *irq_cfg; 1409 int i, ret, index; 1410 1411 if (!info || !iommu) 1412 return -EINVAL; 1413 if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_PCI_MSI) 1414 return -EINVAL; 1415 1416 ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); 1417 if (ret < 0) 1418 return ret; 1419 1420 ret = -ENOMEM; 1421 data = kzalloc(sizeof(*data), GFP_KERNEL); 1422 if (!data) 1423 goto out_free_parent; 1424 1425 index = alloc_irte(iommu, &data->irq_2_iommu, nr_irqs); 1426 if (index < 0) { 1427 pr_warn("Failed to allocate IRTE\n"); 1428 kfree(data); 1429 goto out_free_parent; 1430 } 1431 1432 for (i = 0; i < nr_irqs; i++) { 1433 irq_data = irq_domain_get_irq_data(domain, virq + i); 1434 irq_cfg = irqd_cfg(irq_data); 1435 if (!irq_data || !irq_cfg) { 1436 if (!i) 1437 kfree(data); 1438 ret = -EINVAL; 1439 goto out_free_data; 1440 } 1441 1442 if (i > 0) { 1443 ird = kzalloc(sizeof(*ird), GFP_KERNEL); 1444 if (!ird) 1445 goto out_free_data; 1446 /* Initialize the common data */ 1447 ird->irq_2_iommu = data->irq_2_iommu; 1448 ird->irq_2_iommu.sub_handle = i; 1449 } else { 1450 ird = data; 1451 } 1452 1453 irq_data->hwirq = (index << 16) + i; 1454 irq_data->chip_data = ird; 1455 if (posted_msi_supported() && 1456 ((info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI) || 1457 (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSIX))) 1458 irq_data->chip = &intel_ir_chip_post_msi; 1459 else 1460 irq_data->chip = &intel_ir_chip; 1461 intel_irq_remapping_prepare_irte(ird, irq_cfg, info, index, i); 1462 } 1463 return 0; 1464 1465 out_free_data: 1466 intel_free_irq_resources(domain, virq, i); 1467 out_free_parent: 1468 irq_domain_free_irqs_common(domain, virq, nr_irqs); 1469 return ret; 1470 } 1471 1472 static void intel_irq_remapping_free(struct irq_domain *domain, 1473 unsigned int virq, unsigned int nr_irqs) 1474 { 1475 intel_free_irq_resources(domain, virq, nr_irqs); 1476 irq_domain_free_irqs_common(domain, virq, nr_irqs); 1477 } 1478 1479 static int intel_irq_remapping_activate(struct irq_domain *domain, 1480 struct irq_data *irq_data, bool reserve) 1481 { 1482 intel_ir_reconfigure_irte(irq_data, true); 1483 return 0; 1484 } 1485 1486 static void intel_irq_remapping_deactivate(struct irq_domain *domain, 1487 struct irq_data *irq_data) 1488 { 1489 struct intel_ir_data *data = irq_data->chip_data; 1490 struct irte entry; 1491 1492 memset(&entry, 0, sizeof(entry)); 1493 modify_irte(&data->irq_2_iommu, &entry); 1494 } 1495 1496 static int intel_irq_remapping_select(struct irq_domain *d, 1497 struct irq_fwspec *fwspec, 1498 enum irq_domain_bus_token bus_token) 1499 { 1500 struct intel_iommu *iommu = NULL; 1501 1502 if (x86_fwspec_is_ioapic(fwspec)) 1503 iommu = map_ioapic_to_iommu(fwspec->param[0]); 1504 else if (x86_fwspec_is_hpet(fwspec)) 1505 iommu = map_hpet_to_iommu(fwspec->param[0]); 1506 1507 return iommu && d == iommu->ir_domain; 1508 } 1509 1510 static const struct irq_domain_ops intel_ir_domain_ops = { 1511 .select = intel_irq_remapping_select, 1512 .alloc = intel_irq_remapping_alloc, 1513 .free = intel_irq_remapping_free, 1514 .activate = intel_irq_remapping_activate, 1515 .deactivate = intel_irq_remapping_deactivate, 1516 }; 1517 1518 static const struct msi_parent_ops dmar_msi_parent_ops = { 1519 .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED | MSI_FLAG_MULTI_PCI_MSI, 1520 .prefix = "IR-", 1521 .init_dev_msi_info = msi_parent_init_dev_msi_info, 1522 }; 1523 1524 /* 1525 * Support of Interrupt Remapping Unit Hotplug 1526 */ 1527 static int dmar_ir_add(struct dmar_drhd_unit *dmaru, struct intel_iommu *iommu) 1528 { 1529 int ret; 1530 int eim = x2apic_enabled(); 1531 1532 if (eim && !ecap_eim_support(iommu->ecap)) { 1533 pr_info("DRHD %Lx: EIM not supported by DRHD, ecap %Lx\n", 1534 iommu->reg_phys, iommu->ecap); 1535 return -ENODEV; 1536 } 1537 1538 if (ir_parse_ioapic_hpet_scope(dmaru->hdr, iommu)) { 1539 pr_warn("DRHD %Lx: failed to parse managed IOAPIC/HPET\n", 1540 iommu->reg_phys); 1541 return -ENODEV; 1542 } 1543 1544 /* TODO: check all IOAPICs are covered by IOMMU */ 1545 1546 /* Setup Interrupt-remapping now. */ 1547 ret = intel_setup_irq_remapping(iommu); 1548 if (ret) { 1549 pr_err("Failed to setup irq remapping for %s\n", 1550 iommu->name); 1551 intel_teardown_irq_remapping(iommu); 1552 ir_remove_ioapic_hpet_scope(iommu); 1553 } else { 1554 iommu_enable_irq_remapping(iommu); 1555 } 1556 1557 return ret; 1558 } 1559 1560 int dmar_ir_hotplug(struct dmar_drhd_unit *dmaru, bool insert) 1561 { 1562 int ret = 0; 1563 struct intel_iommu *iommu = dmaru->iommu; 1564 1565 if (!irq_remapping_enabled) 1566 return 0; 1567 if (iommu == NULL) 1568 return -EINVAL; 1569 if (!ecap_ir_support(iommu->ecap)) 1570 return 0; 1571 if (irq_remapping_cap(IRQ_POSTING_CAP) && 1572 !cap_pi_support(iommu->cap)) 1573 return -EBUSY; 1574 1575 if (insert) { 1576 if (!iommu->ir_table) 1577 ret = dmar_ir_add(dmaru, iommu); 1578 } else { 1579 if (iommu->ir_table) { 1580 if (!bitmap_empty(iommu->ir_table->bitmap, 1581 INTR_REMAP_TABLE_ENTRIES)) { 1582 ret = -EBUSY; 1583 } else { 1584 iommu_disable_irq_remapping(iommu); 1585 intel_teardown_irq_remapping(iommu); 1586 ir_remove_ioapic_hpet_scope(iommu); 1587 } 1588 } 1589 } 1590 1591 return ret; 1592 } 1593