xref: /linux/arch/x86/hyperv/irqdomain.c (revision 982cb55bec4a8aa73569dba4739a3de1ee2c25d9)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Irqdomain for Linux to run as the root partition on Microsoft Hypervisor.
4  *
5  * Authors:
6  *  Sunil Muthuswamy <sunilmut@microsoft.com>
7  *  Wei Liu <wei.liu@kernel.org>
8  */
9 
10 #include <linux/pci.h>
11 #include <linux/irq.h>
12 #include <linux/export.h>
13 #include <linux/irqchip/irq-msi-lib.h>
14 #include <asm/mshyperv.h>
15 
16 static int hv_map_interrupt(union hv_device_id hv_devid, bool level,
17 		int cpu, int vector, struct hv_interrupt_entry *ret_entry)
18 {
19 	struct hv_input_map_device_interrupt *input;
20 	struct hv_output_map_device_interrupt *output;
21 	struct hv_device_interrupt_descriptor *intr_desc;
22 	unsigned long flags;
23 	u64 status;
24 	int nr_bank, var_size;
25 
26 	local_irq_save(flags);
27 
28 	input = *this_cpu_ptr(hyperv_pcpu_input_arg);
29 	output = *this_cpu_ptr(hyperv_pcpu_output_arg);
30 
31 	intr_desc = &input->interrupt_descriptor;
32 	memset(input, 0, sizeof(*input));
33 	input->partition_id = hv_current_partition_id;
34 	input->device_id = hv_devid.as_uint64;
35 	intr_desc->interrupt_type = HV_X64_INTERRUPT_TYPE_FIXED;
36 	intr_desc->vector_count = 1;
37 	intr_desc->target.vector = vector;
38 
39 	if (level)
40 		intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_LEVEL;
41 	else
42 		intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_EDGE;
43 
44 	intr_desc->target.vp_set.valid_bank_mask = 0;
45 	intr_desc->target.vp_set.format = HV_GENERIC_SET_SPARSE_4K;
46 	nr_bank = cpumask_to_vpset(&intr_desc->target.vp_set, cpumask_of(cpu));
47 	if (nr_bank < 0) {
48 		local_irq_restore(flags);
49 		pr_err("%s: unable to generate VP set\n", __func__);
50 		return -EINVAL;
51 	}
52 	intr_desc->target.flags = HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET;
53 
54 	/*
55 	 * var-sized hypercall, var-size starts after vp_mask (thus
56 	 * vp_set.format does not count, but vp_set.valid_bank_mask
57 	 * does).
58 	 */
59 	var_size = nr_bank + 1;
60 
61 	status = hv_do_rep_hypercall(HVCALL_MAP_DEVICE_INTERRUPT, 0, var_size,
62 			input, output);
63 	*ret_entry = output->interrupt_entry;
64 
65 	local_irq_restore(flags);
66 
67 	if (!hv_result_success(status))
68 		hv_status_err(status, "\n");
69 
70 	return hv_result_to_errno(status);
71 }
72 
73 static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *irq_entry)
74 {
75 	unsigned long flags;
76 	struct hv_input_unmap_device_interrupt *input;
77 	u64 status;
78 
79 	local_irq_save(flags);
80 	input = *this_cpu_ptr(hyperv_pcpu_input_arg);
81 
82 	memset(input, 0, sizeof(*input));
83 	input->partition_id = hv_current_partition_id;
84 	input->device_id = id;
85 	input->interrupt_entry = *irq_entry;
86 
87 	status = hv_do_hypercall(HVCALL_UNMAP_DEVICE_INTERRUPT, input, NULL);
88 	local_irq_restore(flags);
89 
90 	if (!hv_result_success(status))
91 		hv_status_err(status, "\n");
92 
93 	return hv_result_to_errno(status);
94 }
95 
96 #ifdef CONFIG_PCI_MSI
97 struct rid_data {
98 	struct pci_dev *bridge;
99 	u32 rid;
100 };
101 
102 static int get_rid_cb(struct pci_dev *pdev, u16 alias, void *data)
103 {
104 	struct rid_data *rd = data;
105 	u8 bus = PCI_BUS_NUM(rd->rid);
106 
107 	if (pdev->bus->number != bus || PCI_BUS_NUM(alias) != bus) {
108 		rd->bridge = pdev;
109 		rd->rid = alias;
110 	}
111 
112 	return 0;
113 }
114 
115 static union hv_device_id hv_build_devid_type_pci(struct pci_dev *pdev)
116 {
117 	int pos;
118 	union hv_device_id hv_devid;
119 	struct rid_data data = {
120 		.bridge = NULL,
121 		.rid = PCI_DEVID(pdev->bus->number, pdev->devfn)
122 	};
123 
124 	pci_for_each_dma_alias(pdev, get_rid_cb, &data);
125 
126 	hv_devid.as_uint64 = 0;
127 	hv_devid.device_type = HV_DEVICE_TYPE_PCI;
128 	hv_devid.pci.segment = pci_domain_nr(pdev->bus);
129 
130 	hv_devid.pci.bdf.bus = PCI_BUS_NUM(data.rid);
131 	hv_devid.pci.bdf.device = PCI_SLOT(data.rid);
132 	hv_devid.pci.bdf.function = PCI_FUNC(data.rid);
133 	hv_devid.pci.source_shadow = HV_SOURCE_SHADOW_NONE;
134 
135 	if (data.bridge == NULL)
136 		goto out;
137 
138 	/*
139 	 * Microsoft Hypervisor requires a bus range when the bridge is
140 	 * running in PCI-X mode.
141 	 *
142 	 * To distinguish conventional vs PCI-X bridge, we can check
143 	 * the bridge's PCI-X Secondary Status Register, Secondary Bus
144 	 * Mode and Frequency bits. See PCI Express to PCI/PCI-X Bridge
145 	 * Specification Revision 1.0 5.2.2.1.3.
146 	 *
147 	 * Value zero means it is in conventional mode, otherwise it is
148 	 * in PCI-X mode.
149 	 */
150 
151 	pos = pci_find_capability(data.bridge, PCI_CAP_ID_PCIX);
152 	if (pos) {
153 		u16 status;
154 
155 		pci_read_config_word(data.bridge, pos + PCI_X_BRIDGE_SSTATUS,
156 				     &status);
157 
158 		if (status & PCI_X_SSTATUS_FREQ) {
159 			/* Non-zero, PCI-X mode */
160 			u8 sec_bus, sub_bus;
161 
162 			hv_devid.pci.source_shadow =
163 					     HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE;
164 
165 			pci_read_config_byte(data.bridge, PCI_SECONDARY_BUS,
166 					     &sec_bus);
167 			hv_devid.pci.shadow_bus_range.secondary_bus = sec_bus;
168 			pci_read_config_byte(data.bridge, PCI_SUBORDINATE_BUS,
169 					     &sub_bus);
170 			hv_devid.pci.shadow_bus_range.subordinate_bus = sub_bus;
171 		}
172 	}
173 
174 out:
175 	return hv_devid;
176 }
177 
178 /*
179  * hv_map_msi_interrupt() - Map the MSI IRQ in the hypervisor.
180  * @data:      Describes the IRQ
181  * @out_entry: Hypervisor (MSI) interrupt entry (can be NULL)
182  *
183  * Map the IRQ in the hypervisor by issuing a MAP_DEVICE_INTERRUPT hypercall.
184  *
185  * Return: 0 on success, -errno on failure
186  */
187 int hv_map_msi_interrupt(struct irq_data *data,
188 			 struct hv_interrupt_entry *out_entry)
189 {
190 	struct irq_cfg *cfg = irqd_cfg(data);
191 	struct hv_interrupt_entry dummy;
192 	union hv_device_id hv_devid;
193 	struct msi_desc *msidesc;
194 	struct pci_dev *pdev;
195 	int cpu;
196 
197 	msidesc = irq_data_get_msi_desc(data);
198 	pdev = msi_desc_to_pci_dev(msidesc);
199 	hv_devid = hv_build_devid_type_pci(pdev);
200 	cpu = cpumask_first(irq_data_get_effective_affinity_mask(data));
201 
202 	return hv_map_interrupt(hv_devid, false, cpu, cfg->vector,
203 				out_entry ? out_entry : &dummy);
204 }
205 EXPORT_SYMBOL_GPL(hv_map_msi_interrupt);
206 
207 static void entry_to_msi_msg(struct hv_interrupt_entry *entry,
208 			     struct msi_msg *msg)
209 {
210 	/* High address is always 0 */
211 	msg->address_hi = 0;
212 	msg->address_lo = entry->msi_entry.address.as_uint32;
213 	msg->data = entry->msi_entry.data.as_uint32;
214 }
215 
216 static int hv_unmap_msi_interrupt(struct pci_dev *pdev,
217 				  struct hv_interrupt_entry *irq_entry);
218 
219 static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
220 {
221 	struct hv_interrupt_entry *stored_entry;
222 	struct irq_cfg *cfg = irqd_cfg(data);
223 	struct msi_desc *msidesc;
224 	struct pci_dev *pdev;
225 	int ret;
226 
227 	msidesc = irq_data_get_msi_desc(data);
228 	pdev = msi_desc_to_pci_dev(msidesc);
229 
230 	if (!cfg) {
231 		pr_debug("%s: cfg is NULL", __func__);
232 		return;
233 	}
234 
235 	if (data->chip_data) {
236 		/*
237 		 * This interrupt is already mapped. Let's unmap first.
238 		 *
239 		 * We don't use retarget interrupt hypercalls here because
240 		 * Microsoft Hypervisor doesn't allow root to change the vector
241 		 * or specify VPs outside of the set that is initially used
242 		 * during mapping.
243 		 */
244 		stored_entry = data->chip_data;
245 		data->chip_data = NULL;
246 
247 		ret = hv_unmap_msi_interrupt(pdev, stored_entry);
248 
249 		kfree(stored_entry);
250 
251 		if (ret)
252 			return;
253 	}
254 
255 	stored_entry = kzalloc_obj(*stored_entry, GFP_ATOMIC);
256 	if (!stored_entry)
257 		return;
258 
259 	ret = hv_map_msi_interrupt(data, stored_entry);
260 	if (ret) {
261 		kfree(stored_entry);
262 		return;
263 	}
264 
265 	data->chip_data = stored_entry;
266 	entry_to_msi_msg(data->chip_data, msg);
267 }
268 
269 static int hv_unmap_msi_interrupt(struct pci_dev *pdev,
270 				  struct hv_interrupt_entry *irq_entry)
271 {
272 	union hv_device_id hv_devid;
273 
274 	hv_devid = hv_build_devid_type_pci(pdev);
275 	return hv_unmap_interrupt(hv_devid.as_uint64, irq_entry);
276 }
277 
278 /* NB: during map, hv_interrupt_entry is saved via data->chip_data */
279 static void hv_teardown_msi_irq(struct pci_dev *pdev, struct irq_data *irqd)
280 {
281 	struct hv_interrupt_entry irq_entry;
282 	struct msi_msg msg;
283 
284 	if (!irqd->chip_data) {
285 		pr_debug("%s: no chip data\n!", __func__);
286 		return;
287 	}
288 
289 	irq_entry = *(struct hv_interrupt_entry *)irqd->chip_data;
290 	entry_to_msi_msg(&irq_entry, &msg);
291 
292 	kfree(irqd->chip_data);
293 	irqd->chip_data = NULL;
294 
295 	(void)hv_unmap_msi_interrupt(pdev, &irq_entry);
296 }
297 
298 /*
299  * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
300  * which implement the MSI or MSI-X Capability Structure.
301  */
302 static struct irq_chip hv_pci_msi_controller = {
303 	.name			= "HV-PCI-MSI",
304 	.irq_ack		= irq_chip_ack_parent,
305 	.irq_compose_msi_msg	= hv_irq_compose_msi_msg,
306 	.irq_set_affinity	= irq_chip_set_affinity_parent,
307 };
308 
309 static bool hv_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
310 				 struct irq_domain *real_parent,
311 				 struct msi_domain_info *info)
312 {
313 	struct irq_chip *chip = info->chip;
314 
315 	if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info))
316 		return false;
317 
318 	chip->flags |= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MOVE_DEFERRED;
319 
320 	info->ops->msi_prepare = pci_msi_prepare;
321 
322 	return true;
323 }
324 
325 #define HV_MSI_FLAGS_SUPPORTED	(MSI_GENERIC_FLAGS_MASK | MSI_FLAG_PCI_MSIX)
326 #define HV_MSI_FLAGS_REQUIRED	(MSI_FLAG_USE_DEF_DOM_OPS |	\
327 				 MSI_FLAG_USE_DEF_CHIP_OPS)
328 
329 static struct msi_parent_ops hv_msi_parent_ops = {
330 	.supported_flags	= HV_MSI_FLAGS_SUPPORTED,
331 	.required_flags		= HV_MSI_FLAGS_REQUIRED,
332 	.bus_select_token	= DOMAIN_BUS_NEXUS,
333 	.bus_select_mask	= MATCH_PCI_MSI,
334 	.chip_flags		= MSI_CHIP_FLAG_SET_ACK,
335 	.prefix			= "HV-",
336 	.init_dev_msi_info	= hv_init_dev_msi_info,
337 };
338 
339 /* Allocate nr_irqs IRQs for the given irq domain */
340 static int hv_msi_domain_alloc(struct irq_domain *d, unsigned int virq,
341 			       unsigned int nr_irqs, void *arg)
342 {
343 	/*
344 	 * TODO: The allocation bits of hv_irq_compose_msi_msg(), i.e.
345 	 *	 everything except entry_to_msi_msg() should be in here.
346 	 */
347 	int ret;
348 
349 	ret = irq_domain_alloc_irqs_parent(d, virq, nr_irqs, arg);
350 	if (ret)
351 		return ret;
352 
353 	for (int i = 0; i < nr_irqs; ++i) {
354 		irq_domain_set_info(d, virq + i, 0, &hv_pci_msi_controller,
355 				    NULL, handle_edge_irq, NULL, "edge");
356 	}
357 
358 	return 0;
359 }
360 
361 static void hv_msi_domain_free(struct irq_domain *d, unsigned int virq,
362 			       unsigned int nr_irqs)
363 {
364 	for (int i = 0; i < nr_irqs; ++i) {
365 		struct irq_data *irqd = irq_domain_get_irq_data(d, virq);
366 		struct msi_desc *desc;
367 
368 		desc = irq_data_get_msi_desc(irqd);
369 		if (!desc || !desc->irq || WARN_ON_ONCE(!dev_is_pci(desc->dev)))
370 			continue;
371 
372 		hv_teardown_msi_irq(to_pci_dev(desc->dev), irqd);
373 	}
374 
375 	irq_domain_free_irqs_top(d, virq, nr_irqs);
376 }
377 
378 static const struct irq_domain_ops hv_msi_domain_ops = {
379 	.select	= msi_lib_irq_domain_select,
380 	.alloc	= hv_msi_domain_alloc,
381 	.free	= hv_msi_domain_free,
382 };
383 
384 struct irq_domain * __init hv_create_pci_msi_domain(void)
385 {
386 	struct irq_domain *d = NULL;
387 
388 	struct irq_domain_info info = {
389 		.fwnode		= irq_domain_alloc_named_fwnode("HV-PCI-MSI"),
390 		.ops		= &hv_msi_domain_ops,
391 		.parent		= x86_vector_domain,
392 	};
393 
394 	if (info.fwnode)
395 		d = msi_create_parent_irq_domain(&info, &hv_msi_parent_ops);
396 
397 	/* No point in going further if we can't get an irq domain */
398 	BUG_ON(!d);
399 
400 	return d;
401 }
402 
403 #endif /* CONFIG_PCI_MSI */
404 
405 int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry)
406 {
407 	union hv_device_id hv_devid;
408 
409 	hv_devid.as_uint64 = 0;
410 	hv_devid.device_type = HV_DEVICE_TYPE_IOAPIC;
411 	hv_devid.ioapic.ioapic_id = (u8)ioapic_id;
412 
413 	return hv_unmap_interrupt(hv_devid.as_uint64, entry);
414 }
415 EXPORT_SYMBOL_GPL(hv_unmap_ioapic_interrupt);
416 
417 int hv_map_ioapic_interrupt(int ioapic_id, bool level, int cpu, int vector,
418 		struct hv_interrupt_entry *entry)
419 {
420 	union hv_device_id hv_devid;
421 
422 	hv_devid.as_uint64 = 0;
423 	hv_devid.device_type = HV_DEVICE_TYPE_IOAPIC;
424 	hv_devid.ioapic.ioapic_id = (u8)ioapic_id;
425 
426 	return hv_map_interrupt(hv_devid, level, cpu, vector, entry);
427 }
428 EXPORT_SYMBOL_GPL(hv_map_ioapic_interrupt);
429