1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Irqdomain for Linux to run as the root partition on Microsoft Hypervisor. 5 * 6 * Authors: 7 * Sunil Muthuswamy <sunilmut@microsoft.com> 8 * Wei Liu <wei.liu@kernel.org> 9 */ 10 11 #include <linux/pci.h> 12 #include <linux/irq.h> 13 #include <linux/export.h> 14 #include <asm/mshyperv.h> 15 16 static int hv_map_interrupt(union hv_device_id device_id, bool level, 17 int cpu, int vector, struct hv_interrupt_entry *entry) 18 { 19 struct hv_input_map_device_interrupt *input; 20 struct hv_output_map_device_interrupt *output; 21 struct hv_device_interrupt_descriptor *intr_desc; 22 unsigned long flags; 23 u64 status; 24 int nr_bank, var_size; 25 26 local_irq_save(flags); 27 28 input = *this_cpu_ptr(hyperv_pcpu_input_arg); 29 output = *this_cpu_ptr(hyperv_pcpu_output_arg); 30 31 intr_desc = &input->interrupt_descriptor; 32 memset(input, 0, sizeof(*input)); 33 input->partition_id = hv_current_partition_id; 34 input->device_id = device_id.as_uint64; 35 intr_desc->interrupt_type = HV_X64_INTERRUPT_TYPE_FIXED; 36 intr_desc->vector_count = 1; 37 intr_desc->target.vector = vector; 38 39 if (level) 40 intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_LEVEL; 41 else 42 intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_EDGE; 43 44 intr_desc->target.vp_set.valid_bank_mask = 0; 45 intr_desc->target.vp_set.format = HV_GENERIC_SET_SPARSE_4K; 46 nr_bank = cpumask_to_vpset(&(intr_desc->target.vp_set), cpumask_of(cpu)); 47 if (nr_bank < 0) { 48 local_irq_restore(flags); 49 pr_err("%s: unable to generate VP set\n", __func__); 50 return -EINVAL; 51 } 52 intr_desc->target.flags = HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET; 53 54 /* 55 * var-sized hypercall, var-size starts after vp_mask (thus 56 * vp_set.format does not count, but vp_set.valid_bank_mask 57 * does). 58 */ 59 var_size = nr_bank + 1; 60 61 status = hv_do_rep_hypercall(HVCALL_MAP_DEVICE_INTERRUPT, 0, var_size, 62 input, output); 63 *entry = output->interrupt_entry; 64 65 local_irq_restore(flags); 66 67 if (!hv_result_success(status)) 68 hv_status_err(status, "\n"); 69 70 return hv_result_to_errno(status); 71 } 72 73 static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry) 74 { 75 unsigned long flags; 76 struct hv_input_unmap_device_interrupt *input; 77 struct hv_interrupt_entry *intr_entry; 78 u64 status; 79 80 local_irq_save(flags); 81 input = *this_cpu_ptr(hyperv_pcpu_input_arg); 82 83 memset(input, 0, sizeof(*input)); 84 intr_entry = &input->interrupt_entry; 85 input->partition_id = hv_current_partition_id; 86 input->device_id = id; 87 *intr_entry = *old_entry; 88 89 status = hv_do_hypercall(HVCALL_UNMAP_DEVICE_INTERRUPT, input, NULL); 90 local_irq_restore(flags); 91 92 if (!hv_result_success(status)) 93 hv_status_err(status, "\n"); 94 95 return hv_result_to_errno(status); 96 } 97 98 #ifdef CONFIG_PCI_MSI 99 struct rid_data { 100 struct pci_dev *bridge; 101 u32 rid; 102 }; 103 104 static int get_rid_cb(struct pci_dev *pdev, u16 alias, void *data) 105 { 106 struct rid_data *rd = data; 107 u8 bus = PCI_BUS_NUM(rd->rid); 108 109 if (pdev->bus->number != bus || PCI_BUS_NUM(alias) != bus) { 110 rd->bridge = pdev; 111 rd->rid = alias; 112 } 113 114 return 0; 115 } 116 117 static union hv_device_id hv_build_pci_dev_id(struct pci_dev *dev) 118 { 119 union hv_device_id dev_id; 120 struct rid_data data = { 121 .bridge = NULL, 122 .rid = PCI_DEVID(dev->bus->number, dev->devfn) 123 }; 124 125 pci_for_each_dma_alias(dev, get_rid_cb, &data); 126 127 dev_id.as_uint64 = 0; 128 dev_id.device_type = HV_DEVICE_TYPE_PCI; 129 dev_id.pci.segment = pci_domain_nr(dev->bus); 130 131 dev_id.pci.bdf.bus = PCI_BUS_NUM(data.rid); 132 dev_id.pci.bdf.device = PCI_SLOT(data.rid); 133 dev_id.pci.bdf.function = PCI_FUNC(data.rid); 134 dev_id.pci.source_shadow = HV_SOURCE_SHADOW_NONE; 135 136 if (data.bridge) { 137 int pos; 138 139 /* 140 * Microsoft Hypervisor requires a bus range when the bridge is 141 * running in PCI-X mode. 142 * 143 * To distinguish conventional vs PCI-X bridge, we can check 144 * the bridge's PCI-X Secondary Status Register, Secondary Bus 145 * Mode and Frequency bits. See PCI Express to PCI/PCI-X Bridge 146 * Specification Revision 1.0 5.2.2.1.3. 147 * 148 * Value zero means it is in conventional mode, otherwise it is 149 * in PCI-X mode. 150 */ 151 152 pos = pci_find_capability(data.bridge, PCI_CAP_ID_PCIX); 153 if (pos) { 154 u16 status; 155 156 pci_read_config_word(data.bridge, pos + 157 PCI_X_BRIDGE_SSTATUS, &status); 158 159 if (status & PCI_X_SSTATUS_FREQ) { 160 /* Non-zero, PCI-X mode */ 161 u8 sec_bus, sub_bus; 162 163 dev_id.pci.source_shadow = HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE; 164 165 pci_read_config_byte(data.bridge, PCI_SECONDARY_BUS, &sec_bus); 166 dev_id.pci.shadow_bus_range.secondary_bus = sec_bus; 167 pci_read_config_byte(data.bridge, PCI_SUBORDINATE_BUS, &sub_bus); 168 dev_id.pci.shadow_bus_range.subordinate_bus = sub_bus; 169 } 170 } 171 } 172 173 return dev_id; 174 } 175 176 /** 177 * hv_map_msi_interrupt() - "Map" the MSI IRQ in the hypervisor. 178 * @data: Describes the IRQ 179 * @out_entry: Hypervisor (MSI) interrupt entry (can be NULL) 180 * 181 * Map the IRQ in the hypervisor by issuing a MAP_DEVICE_INTERRUPT hypercall. 182 * 183 * Return: 0 on success, -errno on failure 184 */ 185 int hv_map_msi_interrupt(struct irq_data *data, 186 struct hv_interrupt_entry *out_entry) 187 { 188 struct irq_cfg *cfg = irqd_cfg(data); 189 struct hv_interrupt_entry dummy; 190 union hv_device_id device_id; 191 struct msi_desc *msidesc; 192 struct pci_dev *dev; 193 int cpu; 194 195 msidesc = irq_data_get_msi_desc(data); 196 dev = msi_desc_to_pci_dev(msidesc); 197 device_id = hv_build_pci_dev_id(dev); 198 cpu = cpumask_first(irq_data_get_effective_affinity_mask(data)); 199 200 return hv_map_interrupt(device_id, false, cpu, cfg->vector, 201 out_entry ? out_entry : &dummy); 202 } 203 EXPORT_SYMBOL_GPL(hv_map_msi_interrupt); 204 205 static inline void entry_to_msi_msg(struct hv_interrupt_entry *entry, struct msi_msg *msg) 206 { 207 /* High address is always 0 */ 208 msg->address_hi = 0; 209 msg->address_lo = entry->msi_entry.address.as_uint32; 210 msg->data = entry->msi_entry.data.as_uint32; 211 } 212 213 static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry); 214 static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) 215 { 216 struct hv_interrupt_entry *stored_entry; 217 struct irq_cfg *cfg = irqd_cfg(data); 218 struct msi_desc *msidesc; 219 struct pci_dev *dev; 220 int ret; 221 222 msidesc = irq_data_get_msi_desc(data); 223 dev = msi_desc_to_pci_dev(msidesc); 224 225 if (!cfg) { 226 pr_debug("%s: cfg is NULL", __func__); 227 return; 228 } 229 230 if (data->chip_data) { 231 /* 232 * This interrupt is already mapped. Let's unmap first. 233 * 234 * We don't use retarget interrupt hypercalls here because 235 * Microsoft Hypervisor doesn't allow root to change the vector 236 * or specify VPs outside of the set that is initially used 237 * during mapping. 238 */ 239 stored_entry = data->chip_data; 240 data->chip_data = NULL; 241 242 ret = hv_unmap_msi_interrupt(dev, stored_entry); 243 244 kfree(stored_entry); 245 246 if (ret) 247 return; 248 } 249 250 stored_entry = kzalloc(sizeof(*stored_entry), GFP_ATOMIC); 251 if (!stored_entry) { 252 pr_debug("%s: failed to allocate chip data\n", __func__); 253 return; 254 } 255 256 ret = hv_map_msi_interrupt(data, stored_entry); 257 if (ret) { 258 kfree(stored_entry); 259 return; 260 } 261 262 data->chip_data = stored_entry; 263 entry_to_msi_msg(data->chip_data, msg); 264 265 return; 266 } 267 268 static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry) 269 { 270 return hv_unmap_interrupt(hv_build_pci_dev_id(dev).as_uint64, old_entry); 271 } 272 273 static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd) 274 { 275 struct hv_interrupt_entry old_entry; 276 struct msi_msg msg; 277 278 if (!irqd->chip_data) { 279 pr_debug("%s: no chip data\n!", __func__); 280 return; 281 } 282 283 old_entry = *(struct hv_interrupt_entry *)irqd->chip_data; 284 entry_to_msi_msg(&old_entry, &msg); 285 286 kfree(irqd->chip_data); 287 irqd->chip_data = NULL; 288 289 (void)hv_unmap_msi_interrupt(dev, &old_entry); 290 } 291 292 static void hv_msi_free_irq(struct irq_domain *domain, 293 struct msi_domain_info *info, unsigned int virq) 294 { 295 struct irq_data *irqd = irq_get_irq_data(virq); 296 struct msi_desc *desc; 297 298 if (!irqd) 299 return; 300 301 desc = irq_data_get_msi_desc(irqd); 302 if (!desc || !desc->irq || WARN_ON_ONCE(!dev_is_pci(desc->dev))) 303 return; 304 305 hv_teardown_msi_irq(to_pci_dev(desc->dev), irqd); 306 } 307 308 /* 309 * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, 310 * which implement the MSI or MSI-X Capability Structure. 311 */ 312 static struct irq_chip hv_pci_msi_controller = { 313 .name = "HV-PCI-MSI", 314 .irq_unmask = pci_msi_unmask_irq, 315 .irq_mask = pci_msi_mask_irq, 316 .irq_ack = irq_chip_ack_parent, 317 .irq_retrigger = irq_chip_retrigger_hierarchy, 318 .irq_compose_msi_msg = hv_irq_compose_msi_msg, 319 .irq_set_affinity = msi_domain_set_affinity, 320 .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MOVE_DEFERRED, 321 }; 322 323 static struct msi_domain_ops pci_msi_domain_ops = { 324 .msi_free = hv_msi_free_irq, 325 .msi_prepare = pci_msi_prepare, 326 }; 327 328 static struct msi_domain_info hv_pci_msi_domain_info = { 329 .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | 330 MSI_FLAG_PCI_MSIX, 331 .ops = &pci_msi_domain_ops, 332 .chip = &hv_pci_msi_controller, 333 .handler = handle_edge_irq, 334 .handler_name = "edge", 335 }; 336 337 struct irq_domain * __init hv_create_pci_msi_domain(void) 338 { 339 struct irq_domain *d = NULL; 340 struct fwnode_handle *fn; 341 342 fn = irq_domain_alloc_named_fwnode("HV-PCI-MSI"); 343 if (fn) 344 d = pci_msi_create_irq_domain(fn, &hv_pci_msi_domain_info, x86_vector_domain); 345 346 /* No point in going further if we can't get an irq domain */ 347 BUG_ON(!d); 348 349 return d; 350 } 351 352 #endif /* CONFIG_PCI_MSI */ 353 354 int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry) 355 { 356 union hv_device_id device_id; 357 358 device_id.as_uint64 = 0; 359 device_id.device_type = HV_DEVICE_TYPE_IOAPIC; 360 device_id.ioapic.ioapic_id = (u8)ioapic_id; 361 362 return hv_unmap_interrupt(device_id.as_uint64, entry); 363 } 364 EXPORT_SYMBOL_GPL(hv_unmap_ioapic_interrupt); 365 366 int hv_map_ioapic_interrupt(int ioapic_id, bool level, int cpu, int vector, 367 struct hv_interrupt_entry *entry) 368 { 369 union hv_device_id device_id; 370 371 device_id.as_uint64 = 0; 372 device_id.device_type = HV_DEVICE_TYPE_IOAPIC; 373 device_id.ioapic.ioapic_id = (u8)ioapic_id; 374 375 return hv_map_interrupt(device_id, level, cpu, vector, entry); 376 } 377 EXPORT_SYMBOL_GPL(hv_map_ioapic_interrupt); 378