1*c14efec1SMukesh R // SPDX-License-Identifier: GPL-2.0 2*c14efec1SMukesh R 3*c14efec1SMukesh R /* 4*c14efec1SMukesh R * Hyper-V stub IOMMU driver. 5*c14efec1SMukesh R * 6*c14efec1SMukesh R * Copyright (C) 2019, Microsoft, Inc. 7*c14efec1SMukesh R * 8*c14efec1SMukesh R * Author : Lan Tianyu <Tianyu.Lan@microsoft.com> 9*c14efec1SMukesh R */ 10*c14efec1SMukesh R 11*c14efec1SMukesh R #include <linux/types.h> 12*c14efec1SMukesh R #include <linux/interrupt.h> 13*c14efec1SMukesh R #include <linux/irq.h> 14*c14efec1SMukesh R #include <linux/iommu.h> 15*c14efec1SMukesh R #include <linux/module.h> 16*c14efec1SMukesh R 17*c14efec1SMukesh R #include <asm/apic.h> 18*c14efec1SMukesh R #include <asm/cpu.h> 19*c14efec1SMukesh R #include <asm/hw_irq.h> 20*c14efec1SMukesh R #include <asm/io_apic.h> 21*c14efec1SMukesh R #include <asm/irq_remapping.h> 22*c14efec1SMukesh R #include <asm/hypervisor.h> 23*c14efec1SMukesh R #include <asm/mshyperv.h> 24*c14efec1SMukesh R 25*c14efec1SMukesh R #include "../irq_remapping.h" 26*c14efec1SMukesh R 27*c14efec1SMukesh R /* 28*c14efec1SMukesh R * According 82093AA IO-APIC spec , IO APIC has a 24-entry Interrupt 29*c14efec1SMukesh R * Redirection Table. Hyper-V exposes one single IO-APIC and so define 30*c14efec1SMukesh R * 24 IO APIC remmapping entries. 31*c14efec1SMukesh R */ 32*c14efec1SMukesh R #define IOAPIC_REMAPPING_ENTRY 24 33*c14efec1SMukesh R 34*c14efec1SMukesh R static cpumask_t ioapic_max_cpumask = { CPU_BITS_NONE }; 35*c14efec1SMukesh R static struct irq_domain *ioapic_ir_domain; 36*c14efec1SMukesh R 37*c14efec1SMukesh R static int hyperv_ir_set_affinity(struct irq_data *data, 38*c14efec1SMukesh R const struct cpumask *mask, bool force) 39*c14efec1SMukesh R { 40*c14efec1SMukesh R struct irq_data *parent = data->parent_data; 41*c14efec1SMukesh R struct irq_cfg *cfg = irqd_cfg(data); 42*c14efec1SMukesh R int ret; 43*c14efec1SMukesh R 44*c14efec1SMukesh R /* Return error If new irq affinity is out of ioapic_max_cpumask. */ 45*c14efec1SMukesh R if (!cpumask_subset(mask, &ioapic_max_cpumask)) 46*c14efec1SMukesh R return -EINVAL; 47*c14efec1SMukesh R 48*c14efec1SMukesh R ret = parent->chip->irq_set_affinity(parent, mask, force); 49*c14efec1SMukesh R if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) 50*c14efec1SMukesh R return ret; 51*c14efec1SMukesh R 52*c14efec1SMukesh R vector_schedule_cleanup(cfg); 53*c14efec1SMukesh R 54*c14efec1SMukesh R return 0; 55*c14efec1SMukesh R } 56*c14efec1SMukesh R 57*c14efec1SMukesh R static struct irq_chip hyperv_ir_chip = { 58*c14efec1SMukesh R .name = "HYPERV-IR", 59*c14efec1SMukesh R .irq_ack = apic_ack_irq, 60*c14efec1SMukesh R .irq_set_affinity = hyperv_ir_set_affinity, 61*c14efec1SMukesh R }; 62*c14efec1SMukesh R 63*c14efec1SMukesh R static int hyperv_irq_remapping_alloc(struct irq_domain *domain, 64*c14efec1SMukesh R unsigned int virq, unsigned int nr_irqs, 65*c14efec1SMukesh R void *arg) 66*c14efec1SMukesh R { 67*c14efec1SMukesh R struct irq_alloc_info *info = arg; 68*c14efec1SMukesh R struct irq_data *irq_data; 69*c14efec1SMukesh R int ret = 0; 70*c14efec1SMukesh R 71*c14efec1SMukesh R if (!info || info->type != X86_IRQ_ALLOC_TYPE_IOAPIC || nr_irqs > 1) 72*c14efec1SMukesh R return -EINVAL; 73*c14efec1SMukesh R 74*c14efec1SMukesh R ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); 75*c14efec1SMukesh R if (ret < 0) 76*c14efec1SMukesh R return ret; 77*c14efec1SMukesh R 78*c14efec1SMukesh R irq_data = irq_domain_get_irq_data(domain, virq); 79*c14efec1SMukesh R if (!irq_data) { 80*c14efec1SMukesh R irq_domain_free_irqs_common(domain, virq, nr_irqs); 81*c14efec1SMukesh R return -EINVAL; 82*c14efec1SMukesh R } 83*c14efec1SMukesh R 84*c14efec1SMukesh R irq_data->chip = &hyperv_ir_chip; 85*c14efec1SMukesh R 86*c14efec1SMukesh R /* 87*c14efec1SMukesh R * Hypver-V IO APIC irq affinity should be in the scope of 88*c14efec1SMukesh R * ioapic_max_cpumask because no irq remapping support. 89*c14efec1SMukesh R */ 90*c14efec1SMukesh R irq_data_update_affinity(irq_data, &ioapic_max_cpumask); 91*c14efec1SMukesh R 92*c14efec1SMukesh R return 0; 93*c14efec1SMukesh R } 94*c14efec1SMukesh R 95*c14efec1SMukesh R static void hyperv_irq_remapping_free(struct irq_domain *domain, 96*c14efec1SMukesh R unsigned int virq, unsigned int nr_irqs) 97*c14efec1SMukesh R { 98*c14efec1SMukesh R irq_domain_free_irqs_common(domain, virq, nr_irqs); 99*c14efec1SMukesh R } 100*c14efec1SMukesh R 101*c14efec1SMukesh R static int hyperv_irq_remapping_select(struct irq_domain *d, 102*c14efec1SMukesh R struct irq_fwspec *fwspec, 103*c14efec1SMukesh R enum irq_domain_bus_token bus_token) 104*c14efec1SMukesh R { 105*c14efec1SMukesh R /* Claim the only I/O APIC emulated by Hyper-V */ 106*c14efec1SMukesh R return x86_fwspec_is_ioapic(fwspec); 107*c14efec1SMukesh R } 108*c14efec1SMukesh R 109*c14efec1SMukesh R static const struct irq_domain_ops hyperv_ir_domain_ops = { 110*c14efec1SMukesh R .select = hyperv_irq_remapping_select, 111*c14efec1SMukesh R .alloc = hyperv_irq_remapping_alloc, 112*c14efec1SMukesh R .free = hyperv_irq_remapping_free, 113*c14efec1SMukesh R }; 114*c14efec1SMukesh R 115*c14efec1SMukesh R static const struct irq_domain_ops hyperv_root_ir_domain_ops; 116*c14efec1SMukesh R static int __init hyperv_prepare_irq_remapping(void) 117*c14efec1SMukesh R { 118*c14efec1SMukesh R struct fwnode_handle *fn; 119*c14efec1SMukesh R int i; 120*c14efec1SMukesh R const char *name; 121*c14efec1SMukesh R const struct irq_domain_ops *ops; 122*c14efec1SMukesh R 123*c14efec1SMukesh R /* 124*c14efec1SMukesh R * For a Hyper-V root partition, ms_hyperv_msi_ext_dest_id() 125*c14efec1SMukesh R * will always return false. 126*c14efec1SMukesh R */ 127*c14efec1SMukesh R if (!hypervisor_is_type(X86_HYPER_MS_HYPERV) || 128*c14efec1SMukesh R x86_init.hyper.msi_ext_dest_id()) 129*c14efec1SMukesh R return -ENODEV; 130*c14efec1SMukesh R 131*c14efec1SMukesh R if (hv_root_partition()) { 132*c14efec1SMukesh R name = "HYPERV-ROOT-IR"; 133*c14efec1SMukesh R ops = &hyperv_root_ir_domain_ops; 134*c14efec1SMukesh R } else { 135*c14efec1SMukesh R name = "HYPERV-IR"; 136*c14efec1SMukesh R ops = &hyperv_ir_domain_ops; 137*c14efec1SMukesh R } 138*c14efec1SMukesh R 139*c14efec1SMukesh R fn = irq_domain_alloc_named_id_fwnode(name, 0); 140*c14efec1SMukesh R if (!fn) 141*c14efec1SMukesh R return -ENOMEM; 142*c14efec1SMukesh R 143*c14efec1SMukesh R ioapic_ir_domain = 144*c14efec1SMukesh R irq_domain_create_hierarchy(arch_get_ir_parent_domain(), 145*c14efec1SMukesh R 0, IOAPIC_REMAPPING_ENTRY, fn, ops, NULL); 146*c14efec1SMukesh R 147*c14efec1SMukesh R if (!ioapic_ir_domain) { 148*c14efec1SMukesh R irq_domain_free_fwnode(fn); 149*c14efec1SMukesh R return -ENOMEM; 150*c14efec1SMukesh R } 151*c14efec1SMukesh R 152*c14efec1SMukesh R if (hv_root_partition()) 153*c14efec1SMukesh R return 0; /* The rest is only relevant to guests */ 154*c14efec1SMukesh R 155*c14efec1SMukesh R /* 156*c14efec1SMukesh R * Hyper-V doesn't provide irq remapping function for 157*c14efec1SMukesh R * IO-APIC and so IO-APIC only accepts 8-bit APIC ID. 158*c14efec1SMukesh R * Cpu's APIC ID is read from ACPI MADT table and APIC IDs 159*c14efec1SMukesh R * in the MADT table on Hyper-v are sorted monotonic increasingly. 160*c14efec1SMukesh R * APIC ID reflects cpu topology. There maybe some APIC ID 161*c14efec1SMukesh R * gaps when cpu number in a socket is not power of two. Prepare 162*c14efec1SMukesh R * max cpu affinity for IOAPIC irqs. Scan cpu 0-255 and set cpu 163*c14efec1SMukesh R * into ioapic_max_cpumask if its APIC ID is less than 256. 164*c14efec1SMukesh R */ 165*c14efec1SMukesh R for (i = min_t(unsigned int, nr_cpu_ids - 1, 255); i >= 0; i--) 166*c14efec1SMukesh R if (cpu_possible(i) && cpu_physical_id(i) < 256) 167*c14efec1SMukesh R cpumask_set_cpu(i, &ioapic_max_cpumask); 168*c14efec1SMukesh R 169*c14efec1SMukesh R return 0; 170*c14efec1SMukesh R } 171*c14efec1SMukesh R 172*c14efec1SMukesh R static int __init hyperv_enable_irq_remapping(void) 173*c14efec1SMukesh R { 174*c14efec1SMukesh R if (x2apic_supported()) 175*c14efec1SMukesh R return IRQ_REMAP_X2APIC_MODE; 176*c14efec1SMukesh R return IRQ_REMAP_XAPIC_MODE; 177*c14efec1SMukesh R } 178*c14efec1SMukesh R 179*c14efec1SMukesh R struct irq_remap_ops hyperv_irq_remap_ops = { 180*c14efec1SMukesh R .prepare = hyperv_prepare_irq_remapping, 181*c14efec1SMukesh R .enable = hyperv_enable_irq_remapping, 182*c14efec1SMukesh R }; 183*c14efec1SMukesh R 184*c14efec1SMukesh R /* IRQ remapping domain when Linux runs as the root partition */ 185*c14efec1SMukesh R struct hyperv_root_ir_data { 186*c14efec1SMukesh R u8 ioapic_id; 187*c14efec1SMukesh R bool is_level; 188*c14efec1SMukesh R struct hv_interrupt_entry entry; 189*c14efec1SMukesh R }; 190*c14efec1SMukesh R 191*c14efec1SMukesh R static void 192*c14efec1SMukesh R hyperv_root_ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg) 193*c14efec1SMukesh R { 194*c14efec1SMukesh R struct hyperv_root_ir_data *data = irq_data->chip_data; 195*c14efec1SMukesh R struct hv_interrupt_entry entry; 196*c14efec1SMukesh R const struct cpumask *affinity; 197*c14efec1SMukesh R struct IO_APIC_route_entry e; 198*c14efec1SMukesh R struct irq_cfg *cfg; 199*c14efec1SMukesh R int cpu, ioapic_id; 200*c14efec1SMukesh R u32 vector; 201*c14efec1SMukesh R 202*c14efec1SMukesh R cfg = irqd_cfg(irq_data); 203*c14efec1SMukesh R affinity = irq_data_get_effective_affinity_mask(irq_data); 204*c14efec1SMukesh R cpu = cpumask_first_and(affinity, cpu_online_mask); 205*c14efec1SMukesh R 206*c14efec1SMukesh R vector = cfg->vector; 207*c14efec1SMukesh R ioapic_id = data->ioapic_id; 208*c14efec1SMukesh R 209*c14efec1SMukesh R if (data->entry.source == HV_DEVICE_TYPE_IOAPIC 210*c14efec1SMukesh R && data->entry.ioapic_rte.as_uint64) { 211*c14efec1SMukesh R entry = data->entry; 212*c14efec1SMukesh R 213*c14efec1SMukesh R (void)hv_unmap_ioapic_interrupt(ioapic_id, &entry); 214*c14efec1SMukesh R 215*c14efec1SMukesh R data->entry.ioapic_rte.as_uint64 = 0; 216*c14efec1SMukesh R data->entry.source = 0; /* Invalid source */ 217*c14efec1SMukesh R } 218*c14efec1SMukesh R 219*c14efec1SMukesh R 220*c14efec1SMukesh R if (hv_map_ioapic_interrupt(ioapic_id, data->is_level, cpu, 221*c14efec1SMukesh R vector, &entry)) 222*c14efec1SMukesh R return; 223*c14efec1SMukesh R 224*c14efec1SMukesh R data->entry = entry; 225*c14efec1SMukesh R 226*c14efec1SMukesh R /* Turn it into an IO_APIC_route_entry, and generate MSI MSG. */ 227*c14efec1SMukesh R e.w1 = entry.ioapic_rte.low_uint32; 228*c14efec1SMukesh R e.w2 = entry.ioapic_rte.high_uint32; 229*c14efec1SMukesh R 230*c14efec1SMukesh R memset(msg, 0, sizeof(*msg)); 231*c14efec1SMukesh R msg->arch_data.vector = e.vector; 232*c14efec1SMukesh R msg->arch_data.delivery_mode = e.delivery_mode; 233*c14efec1SMukesh R msg->arch_addr_lo.dest_mode_logical = e.dest_mode_logical; 234*c14efec1SMukesh R msg->arch_addr_lo.dmar_format = e.ir_format; 235*c14efec1SMukesh R msg->arch_addr_lo.dmar_index_0_14 = e.ir_index_0_14; 236*c14efec1SMukesh R } 237*c14efec1SMukesh R 238*c14efec1SMukesh R static int hyperv_root_ir_set_affinity(struct irq_data *data, 239*c14efec1SMukesh R const struct cpumask *mask, bool force) 240*c14efec1SMukesh R { 241*c14efec1SMukesh R struct irq_data *parent = data->parent_data; 242*c14efec1SMukesh R struct irq_cfg *cfg = irqd_cfg(data); 243*c14efec1SMukesh R int ret; 244*c14efec1SMukesh R 245*c14efec1SMukesh R ret = parent->chip->irq_set_affinity(parent, mask, force); 246*c14efec1SMukesh R if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) 247*c14efec1SMukesh R return ret; 248*c14efec1SMukesh R 249*c14efec1SMukesh R vector_schedule_cleanup(cfg); 250*c14efec1SMukesh R 251*c14efec1SMukesh R return 0; 252*c14efec1SMukesh R } 253*c14efec1SMukesh R 254*c14efec1SMukesh R static struct irq_chip hyperv_root_ir_chip = { 255*c14efec1SMukesh R .name = "HYPERV-ROOT-IR", 256*c14efec1SMukesh R .irq_ack = apic_ack_irq, 257*c14efec1SMukesh R .irq_set_affinity = hyperv_root_ir_set_affinity, 258*c14efec1SMukesh R .irq_compose_msi_msg = hyperv_root_ir_compose_msi_msg, 259*c14efec1SMukesh R }; 260*c14efec1SMukesh R 261*c14efec1SMukesh R static int hyperv_root_irq_remapping_alloc(struct irq_domain *domain, 262*c14efec1SMukesh R unsigned int virq, unsigned int nr_irqs, 263*c14efec1SMukesh R void *arg) 264*c14efec1SMukesh R { 265*c14efec1SMukesh R struct irq_alloc_info *info = arg; 266*c14efec1SMukesh R struct irq_data *irq_data; 267*c14efec1SMukesh R struct hyperv_root_ir_data *data; 268*c14efec1SMukesh R int ret = 0; 269*c14efec1SMukesh R 270*c14efec1SMukesh R if (!info || info->type != X86_IRQ_ALLOC_TYPE_IOAPIC || nr_irqs > 1) 271*c14efec1SMukesh R return -EINVAL; 272*c14efec1SMukesh R 273*c14efec1SMukesh R ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); 274*c14efec1SMukesh R if (ret < 0) 275*c14efec1SMukesh R return ret; 276*c14efec1SMukesh R 277*c14efec1SMukesh R data = kzalloc_obj(*data); 278*c14efec1SMukesh R if (!data) { 279*c14efec1SMukesh R irq_domain_free_irqs_common(domain, virq, nr_irqs); 280*c14efec1SMukesh R return -ENOMEM; 281*c14efec1SMukesh R } 282*c14efec1SMukesh R 283*c14efec1SMukesh R irq_data = irq_domain_get_irq_data(domain, virq); 284*c14efec1SMukesh R if (!irq_data) { 285*c14efec1SMukesh R kfree(data); 286*c14efec1SMukesh R irq_domain_free_irqs_common(domain, virq, nr_irqs); 287*c14efec1SMukesh R return -EINVAL; 288*c14efec1SMukesh R } 289*c14efec1SMukesh R 290*c14efec1SMukesh R data->ioapic_id = info->devid; 291*c14efec1SMukesh R data->is_level = info->ioapic.is_level; 292*c14efec1SMukesh R 293*c14efec1SMukesh R irq_data->chip = &hyperv_root_ir_chip; 294*c14efec1SMukesh R irq_data->chip_data = data; 295*c14efec1SMukesh R 296*c14efec1SMukesh R return 0; 297*c14efec1SMukesh R } 298*c14efec1SMukesh R 299*c14efec1SMukesh R static void hyperv_root_irq_remapping_free(struct irq_domain *domain, 300*c14efec1SMukesh R unsigned int virq, unsigned int nr_irqs) 301*c14efec1SMukesh R { 302*c14efec1SMukesh R struct irq_data *irq_data; 303*c14efec1SMukesh R struct hyperv_root_ir_data *data; 304*c14efec1SMukesh R struct hv_interrupt_entry *e; 305*c14efec1SMukesh R int i; 306*c14efec1SMukesh R 307*c14efec1SMukesh R for (i = 0; i < nr_irqs; i++) { 308*c14efec1SMukesh R irq_data = irq_domain_get_irq_data(domain, virq + i); 309*c14efec1SMukesh R 310*c14efec1SMukesh R if (irq_data && irq_data->chip_data) { 311*c14efec1SMukesh R data = irq_data->chip_data; 312*c14efec1SMukesh R e = &data->entry; 313*c14efec1SMukesh R 314*c14efec1SMukesh R if (e->source == HV_DEVICE_TYPE_IOAPIC && 315*c14efec1SMukesh R e->ioapic_rte.as_uint64) 316*c14efec1SMukesh R (void)hv_unmap_ioapic_interrupt(data->ioapic_id, 317*c14efec1SMukesh R &data->entry); 318*c14efec1SMukesh R 319*c14efec1SMukesh R kfree(data); 320*c14efec1SMukesh R } 321*c14efec1SMukesh R } 322*c14efec1SMukesh R 323*c14efec1SMukesh R irq_domain_free_irqs_common(domain, virq, nr_irqs); 324*c14efec1SMukesh R } 325*c14efec1SMukesh R 326*c14efec1SMukesh R static const struct irq_domain_ops hyperv_root_ir_domain_ops = { 327*c14efec1SMukesh R .select = hyperv_irq_remapping_select, 328*c14efec1SMukesh R .alloc = hyperv_root_irq_remapping_alloc, 329*c14efec1SMukesh R .free = hyperv_root_irq_remapping_free, 330*c14efec1SMukesh R }; 331