xref: /linux/arch/x86/hyperv/irqdomain.c (revision 221533629550e920580ab428f13ffebf54063b95)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Irqdomain for Linux to run as the root partition on Microsoft Hypervisor.
5  *
6  * Authors:
7  *  Sunil Muthuswamy <sunilmut@microsoft.com>
8  *  Wei Liu <wei.liu@kernel.org>
9  */
10 
11 #include <linux/pci.h>
12 #include <linux/irq.h>
13 #include <linux/export.h>
14 #include <linux/irqchip/irq-msi-lib.h>
15 #include <asm/mshyperv.h>
16 
hv_map_interrupt(union hv_device_id device_id,bool level,int cpu,int vector,struct hv_interrupt_entry * entry)17 static int hv_map_interrupt(union hv_device_id device_id, bool level,
18 		int cpu, int vector, struct hv_interrupt_entry *entry)
19 {
20 	struct hv_input_map_device_interrupt *input;
21 	struct hv_output_map_device_interrupt *output;
22 	struct hv_device_interrupt_descriptor *intr_desc;
23 	unsigned long flags;
24 	u64 status;
25 	int nr_bank, var_size;
26 
27 	local_irq_save(flags);
28 
29 	input = *this_cpu_ptr(hyperv_pcpu_input_arg);
30 	output = *this_cpu_ptr(hyperv_pcpu_output_arg);
31 
32 	intr_desc = &input->interrupt_descriptor;
33 	memset(input, 0, sizeof(*input));
34 	input->partition_id = hv_current_partition_id;
35 	input->device_id = device_id.as_uint64;
36 	intr_desc->interrupt_type = HV_X64_INTERRUPT_TYPE_FIXED;
37 	intr_desc->vector_count = 1;
38 	intr_desc->target.vector = vector;
39 
40 	if (level)
41 		intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_LEVEL;
42 	else
43 		intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_EDGE;
44 
45 	intr_desc->target.vp_set.valid_bank_mask = 0;
46 	intr_desc->target.vp_set.format = HV_GENERIC_SET_SPARSE_4K;
47 	nr_bank = cpumask_to_vpset(&(intr_desc->target.vp_set), cpumask_of(cpu));
48 	if (nr_bank < 0) {
49 		local_irq_restore(flags);
50 		pr_err("%s: unable to generate VP set\n", __func__);
51 		return -EINVAL;
52 	}
53 	intr_desc->target.flags = HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET;
54 
55 	/*
56 	 * var-sized hypercall, var-size starts after vp_mask (thus
57 	 * vp_set.format does not count, but vp_set.valid_bank_mask
58 	 * does).
59 	 */
60 	var_size = nr_bank + 1;
61 
62 	status = hv_do_rep_hypercall(HVCALL_MAP_DEVICE_INTERRUPT, 0, var_size,
63 			input, output);
64 	*entry = output->interrupt_entry;
65 
66 	local_irq_restore(flags);
67 
68 	if (!hv_result_success(status))
69 		hv_status_err(status, "\n");
70 
71 	return hv_result_to_errno(status);
72 }
73 
hv_unmap_interrupt(u64 id,struct hv_interrupt_entry * old_entry)74 static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry)
75 {
76 	unsigned long flags;
77 	struct hv_input_unmap_device_interrupt *input;
78 	struct hv_interrupt_entry *intr_entry;
79 	u64 status;
80 
81 	local_irq_save(flags);
82 	input = *this_cpu_ptr(hyperv_pcpu_input_arg);
83 
84 	memset(input, 0, sizeof(*input));
85 	intr_entry = &input->interrupt_entry;
86 	input->partition_id = hv_current_partition_id;
87 	input->device_id = id;
88 	*intr_entry = *old_entry;
89 
90 	status = hv_do_hypercall(HVCALL_UNMAP_DEVICE_INTERRUPT, input, NULL);
91 	local_irq_restore(flags);
92 
93 	if (!hv_result_success(status))
94 		hv_status_err(status, "\n");
95 
96 	return hv_result_to_errno(status);
97 }
98 
99 #ifdef CONFIG_PCI_MSI
100 struct rid_data {
101 	struct pci_dev *bridge;
102 	u32 rid;
103 };
104 
get_rid_cb(struct pci_dev * pdev,u16 alias,void * data)105 static int get_rid_cb(struct pci_dev *pdev, u16 alias, void *data)
106 {
107 	struct rid_data *rd = data;
108 	u8 bus = PCI_BUS_NUM(rd->rid);
109 
110 	if (pdev->bus->number != bus || PCI_BUS_NUM(alias) != bus) {
111 		rd->bridge = pdev;
112 		rd->rid = alias;
113 	}
114 
115 	return 0;
116 }
117 
hv_build_pci_dev_id(struct pci_dev * dev)118 static union hv_device_id hv_build_pci_dev_id(struct pci_dev *dev)
119 {
120 	union hv_device_id dev_id;
121 	struct rid_data data = {
122 		.bridge = NULL,
123 		.rid = PCI_DEVID(dev->bus->number, dev->devfn)
124 	};
125 
126 	pci_for_each_dma_alias(dev, get_rid_cb, &data);
127 
128 	dev_id.as_uint64 = 0;
129 	dev_id.device_type = HV_DEVICE_TYPE_PCI;
130 	dev_id.pci.segment = pci_domain_nr(dev->bus);
131 
132 	dev_id.pci.bdf.bus = PCI_BUS_NUM(data.rid);
133 	dev_id.pci.bdf.device = PCI_SLOT(data.rid);
134 	dev_id.pci.bdf.function = PCI_FUNC(data.rid);
135 	dev_id.pci.source_shadow = HV_SOURCE_SHADOW_NONE;
136 
137 	if (data.bridge) {
138 		int pos;
139 
140 		/*
141 		 * Microsoft Hypervisor requires a bus range when the bridge is
142 		 * running in PCI-X mode.
143 		 *
144 		 * To distinguish conventional vs PCI-X bridge, we can check
145 		 * the bridge's PCI-X Secondary Status Register, Secondary Bus
146 		 * Mode and Frequency bits. See PCI Express to PCI/PCI-X Bridge
147 		 * Specification Revision 1.0 5.2.2.1.3.
148 		 *
149 		 * Value zero means it is in conventional mode, otherwise it is
150 		 * in PCI-X mode.
151 		 */
152 
153 		pos = pci_find_capability(data.bridge, PCI_CAP_ID_PCIX);
154 		if (pos) {
155 			u16 status;
156 
157 			pci_read_config_word(data.bridge, pos +
158 					PCI_X_BRIDGE_SSTATUS, &status);
159 
160 			if (status & PCI_X_SSTATUS_FREQ) {
161 				/* Non-zero, PCI-X mode */
162 				u8 sec_bus, sub_bus;
163 
164 				dev_id.pci.source_shadow = HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE;
165 
166 				pci_read_config_byte(data.bridge, PCI_SECONDARY_BUS, &sec_bus);
167 				dev_id.pci.shadow_bus_range.secondary_bus = sec_bus;
168 				pci_read_config_byte(data.bridge, PCI_SUBORDINATE_BUS, &sub_bus);
169 				dev_id.pci.shadow_bus_range.subordinate_bus = sub_bus;
170 			}
171 		}
172 	}
173 
174 	return dev_id;
175 }
176 
177 /**
178  * hv_map_msi_interrupt() - "Map" the MSI IRQ in the hypervisor.
179  * @data:      Describes the IRQ
180  * @out_entry: Hypervisor (MSI) interrupt entry (can be NULL)
181  *
182  * Map the IRQ in the hypervisor by issuing a MAP_DEVICE_INTERRUPT hypercall.
183  *
184  * Return: 0 on success, -errno on failure
185  */
hv_map_msi_interrupt(struct irq_data * data,struct hv_interrupt_entry * out_entry)186 int hv_map_msi_interrupt(struct irq_data *data,
187 			 struct hv_interrupt_entry *out_entry)
188 {
189 	struct irq_cfg *cfg = irqd_cfg(data);
190 	struct hv_interrupt_entry dummy;
191 	union hv_device_id device_id;
192 	struct msi_desc *msidesc;
193 	struct pci_dev *dev;
194 	int cpu;
195 
196 	msidesc = irq_data_get_msi_desc(data);
197 	dev = msi_desc_to_pci_dev(msidesc);
198 	device_id = hv_build_pci_dev_id(dev);
199 	cpu = cpumask_first(irq_data_get_effective_affinity_mask(data));
200 
201 	return hv_map_interrupt(device_id, false, cpu, cfg->vector,
202 				out_entry ? out_entry : &dummy);
203 }
204 EXPORT_SYMBOL_GPL(hv_map_msi_interrupt);
205 
entry_to_msi_msg(struct hv_interrupt_entry * entry,struct msi_msg * msg)206 static inline void entry_to_msi_msg(struct hv_interrupt_entry *entry, struct msi_msg *msg)
207 {
208 	/* High address is always 0 */
209 	msg->address_hi = 0;
210 	msg->address_lo = entry->msi_entry.address.as_uint32;
211 	msg->data = entry->msi_entry.data.as_uint32;
212 }
213 
214 static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry);
hv_irq_compose_msi_msg(struct irq_data * data,struct msi_msg * msg)215 static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
216 {
217 	struct hv_interrupt_entry *stored_entry;
218 	struct irq_cfg *cfg = irqd_cfg(data);
219 	struct msi_desc *msidesc;
220 	struct pci_dev *dev;
221 	int ret;
222 
223 	msidesc = irq_data_get_msi_desc(data);
224 	dev = msi_desc_to_pci_dev(msidesc);
225 
226 	if (!cfg) {
227 		pr_debug("%s: cfg is NULL", __func__);
228 		return;
229 	}
230 
231 	if (data->chip_data) {
232 		/*
233 		 * This interrupt is already mapped. Let's unmap first.
234 		 *
235 		 * We don't use retarget interrupt hypercalls here because
236 		 * Microsoft Hypervisor doesn't allow root to change the vector
237 		 * or specify VPs outside of the set that is initially used
238 		 * during mapping.
239 		 */
240 		stored_entry = data->chip_data;
241 		data->chip_data = NULL;
242 
243 		ret = hv_unmap_msi_interrupt(dev, stored_entry);
244 
245 		kfree(stored_entry);
246 
247 		if (ret)
248 			return;
249 	}
250 
251 	stored_entry = kzalloc(sizeof(*stored_entry), GFP_ATOMIC);
252 	if (!stored_entry) {
253 		pr_debug("%s: failed to allocate chip data\n", __func__);
254 		return;
255 	}
256 
257 	ret = hv_map_msi_interrupt(data, stored_entry);
258 	if (ret) {
259 		kfree(stored_entry);
260 		return;
261 	}
262 
263 	data->chip_data = stored_entry;
264 	entry_to_msi_msg(data->chip_data, msg);
265 
266 	return;
267 }
268 
hv_unmap_msi_interrupt(struct pci_dev * dev,struct hv_interrupt_entry * old_entry)269 static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry)
270 {
271 	return hv_unmap_interrupt(hv_build_pci_dev_id(dev).as_uint64, old_entry);
272 }
273 
hv_teardown_msi_irq(struct pci_dev * dev,struct irq_data * irqd)274 static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd)
275 {
276 	struct hv_interrupt_entry old_entry;
277 	struct msi_msg msg;
278 
279 	if (!irqd->chip_data) {
280 		pr_debug("%s: no chip data\n!", __func__);
281 		return;
282 	}
283 
284 	old_entry = *(struct hv_interrupt_entry *)irqd->chip_data;
285 	entry_to_msi_msg(&old_entry, &msg);
286 
287 	kfree(irqd->chip_data);
288 	irqd->chip_data = NULL;
289 
290 	(void)hv_unmap_msi_interrupt(dev, &old_entry);
291 }
292 
293 /*
294  * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
295  * which implement the MSI or MSI-X Capability Structure.
296  */
297 static struct irq_chip hv_pci_msi_controller = {
298 	.name			= "HV-PCI-MSI",
299 	.irq_ack		= irq_chip_ack_parent,
300 	.irq_compose_msi_msg	= hv_irq_compose_msi_msg,
301 	.irq_set_affinity	= irq_chip_set_affinity_parent,
302 };
303 
hv_init_dev_msi_info(struct device * dev,struct irq_domain * domain,struct irq_domain * real_parent,struct msi_domain_info * info)304 static bool hv_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
305 				 struct irq_domain *real_parent, struct msi_domain_info *info)
306 {
307 	struct irq_chip *chip = info->chip;
308 
309 	if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info))
310 		return false;
311 
312 	chip->flags |= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MOVE_DEFERRED;
313 
314 	info->ops->msi_prepare = pci_msi_prepare;
315 
316 	return true;
317 }
318 
319 #define HV_MSI_FLAGS_SUPPORTED	(MSI_GENERIC_FLAGS_MASK | MSI_FLAG_PCI_MSIX)
320 #define HV_MSI_FLAGS_REQUIRED	(MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS)
321 
322 static struct msi_parent_ops hv_msi_parent_ops = {
323 	.supported_flags	= HV_MSI_FLAGS_SUPPORTED,
324 	.required_flags		= HV_MSI_FLAGS_REQUIRED,
325 	.bus_select_token	= DOMAIN_BUS_NEXUS,
326 	.bus_select_mask	= MATCH_PCI_MSI,
327 	.chip_flags		= MSI_CHIP_FLAG_SET_ACK,
328 	.prefix			= "HV-",
329 	.init_dev_msi_info	= hv_init_dev_msi_info,
330 };
331 
hv_msi_domain_alloc(struct irq_domain * d,unsigned int virq,unsigned int nr_irqs,void * arg)332 static int hv_msi_domain_alloc(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs,
333 			       void *arg)
334 {
335 	/*
336 	 * TODO: The allocation bits of hv_irq_compose_msi_msg(), i.e. everything except
337 	 * entry_to_msi_msg() should be in here.
338 	 */
339 
340 	int ret;
341 
342 	ret = irq_domain_alloc_irqs_parent(d, virq, nr_irqs, arg);
343 	if (ret)
344 		return ret;
345 
346 	for (int i = 0; i < nr_irqs; ++i) {
347 		irq_domain_set_info(d, virq + i, 0, &hv_pci_msi_controller, NULL,
348 				    handle_edge_irq, NULL, "edge");
349 	}
350 	return 0;
351 }
352 
hv_msi_domain_free(struct irq_domain * d,unsigned int virq,unsigned int nr_irqs)353 static void hv_msi_domain_free(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs)
354 {
355 	for (int i = 0; i < nr_irqs; ++i) {
356 		struct irq_data *irqd = irq_domain_get_irq_data(d, virq);
357 		struct msi_desc *desc;
358 
359 		desc = irq_data_get_msi_desc(irqd);
360 		if (!desc || !desc->irq || WARN_ON_ONCE(!dev_is_pci(desc->dev)))
361 			continue;
362 
363 		hv_teardown_msi_irq(to_pci_dev(desc->dev), irqd);
364 	}
365 	irq_domain_free_irqs_top(d, virq, nr_irqs);
366 }
367 
368 static const struct irq_domain_ops hv_msi_domain_ops = {
369 	.select	= msi_lib_irq_domain_select,
370 	.alloc	= hv_msi_domain_alloc,
371 	.free	= hv_msi_domain_free,
372 };
373 
hv_create_pci_msi_domain(void)374 struct irq_domain * __init hv_create_pci_msi_domain(void)
375 {
376 	struct irq_domain *d = NULL;
377 
378 	struct irq_domain_info info = {
379 		.fwnode		= irq_domain_alloc_named_fwnode("HV-PCI-MSI"),
380 		.ops		= &hv_msi_domain_ops,
381 		.parent		= x86_vector_domain,
382 	};
383 
384 	if (info.fwnode)
385 		d = msi_create_parent_irq_domain(&info, &hv_msi_parent_ops);
386 
387 	/* No point in going further if we can't get an irq domain */
388 	BUG_ON(!d);
389 
390 	return d;
391 }
392 
393 #endif /* CONFIG_PCI_MSI */
394 
hv_unmap_ioapic_interrupt(int ioapic_id,struct hv_interrupt_entry * entry)395 int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry)
396 {
397 	union hv_device_id device_id;
398 
399 	device_id.as_uint64 = 0;
400 	device_id.device_type = HV_DEVICE_TYPE_IOAPIC;
401 	device_id.ioapic.ioapic_id = (u8)ioapic_id;
402 
403 	return hv_unmap_interrupt(device_id.as_uint64, entry);
404 }
405 EXPORT_SYMBOL_GPL(hv_unmap_ioapic_interrupt);
406 
hv_map_ioapic_interrupt(int ioapic_id,bool level,int cpu,int vector,struct hv_interrupt_entry * entry)407 int hv_map_ioapic_interrupt(int ioapic_id, bool level, int cpu, int vector,
408 		struct hv_interrupt_entry *entry)
409 {
410 	union hv_device_id device_id;
411 
412 	device_id.as_uint64 = 0;
413 	device_id.device_type = HV_DEVICE_TYPE_IOAPIC;
414 	device_id.ioapic.ioapic_id = (u8)ioapic_id;
415 
416 	return hv_map_interrupt(device_id, level, cpu, vector, entry);
417 }
418 EXPORT_SYMBOL_GPL(hv_map_ioapic_interrupt);
419