xref: /linux/arch/x86/hyperv/irqdomain.c (revision 5f054ef2e0f1ca7d32ac48e275d08e2ac29d84f3)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Irqdomain for Linux to run as the root partition on Microsoft Hypervisor.
5  *
6  * Authors:
7  *  Sunil Muthuswamy <sunilmut@microsoft.com>
8  *  Wei Liu <wei.liu@kernel.org>
9  */
10 
11 #include <linux/pci.h>
12 #include <linux/irq.h>
13 #include <linux/export.h>
14 #include <asm/mshyperv.h>
15 
hv_map_interrupt(union hv_device_id device_id,bool level,int cpu,int vector,struct hv_interrupt_entry * entry)16 static int hv_map_interrupt(union hv_device_id device_id, bool level,
17 		int cpu, int vector, struct hv_interrupt_entry *entry)
18 {
19 	struct hv_input_map_device_interrupt *input;
20 	struct hv_output_map_device_interrupt *output;
21 	struct hv_device_interrupt_descriptor *intr_desc;
22 	unsigned long flags;
23 	u64 status;
24 	int nr_bank, var_size;
25 
26 	local_irq_save(flags);
27 
28 	input = *this_cpu_ptr(hyperv_pcpu_input_arg);
29 	output = *this_cpu_ptr(hyperv_pcpu_output_arg);
30 
31 	intr_desc = &input->interrupt_descriptor;
32 	memset(input, 0, sizeof(*input));
33 	input->partition_id = hv_current_partition_id;
34 	input->device_id = device_id.as_uint64;
35 	intr_desc->interrupt_type = HV_X64_INTERRUPT_TYPE_FIXED;
36 	intr_desc->vector_count = 1;
37 	intr_desc->target.vector = vector;
38 
39 	if (level)
40 		intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_LEVEL;
41 	else
42 		intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_EDGE;
43 
44 	intr_desc->target.vp_set.valid_bank_mask = 0;
45 	intr_desc->target.vp_set.format = HV_GENERIC_SET_SPARSE_4K;
46 	nr_bank = cpumask_to_vpset(&(intr_desc->target.vp_set), cpumask_of(cpu));
47 	if (nr_bank < 0) {
48 		local_irq_restore(flags);
49 		pr_err("%s: unable to generate VP set\n", __func__);
50 		return -EINVAL;
51 	}
52 	intr_desc->target.flags = HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET;
53 
54 	/*
55 	 * var-sized hypercall, var-size starts after vp_mask (thus
56 	 * vp_set.format does not count, but vp_set.valid_bank_mask
57 	 * does).
58 	 */
59 	var_size = nr_bank + 1;
60 
61 	status = hv_do_rep_hypercall(HVCALL_MAP_DEVICE_INTERRUPT, 0, var_size,
62 			input, output);
63 	*entry = output->interrupt_entry;
64 
65 	local_irq_restore(flags);
66 
67 	if (!hv_result_success(status))
68 		hv_status_err(status, "\n");
69 
70 	return hv_result_to_errno(status);
71 }
72 
hv_unmap_interrupt(u64 id,struct hv_interrupt_entry * old_entry)73 static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry)
74 {
75 	unsigned long flags;
76 	struct hv_input_unmap_device_interrupt *input;
77 	struct hv_interrupt_entry *intr_entry;
78 	u64 status;
79 
80 	local_irq_save(flags);
81 	input = *this_cpu_ptr(hyperv_pcpu_input_arg);
82 
83 	memset(input, 0, sizeof(*input));
84 	intr_entry = &input->interrupt_entry;
85 	input->partition_id = hv_current_partition_id;
86 	input->device_id = id;
87 	*intr_entry = *old_entry;
88 
89 	status = hv_do_hypercall(HVCALL_UNMAP_DEVICE_INTERRUPT, input, NULL);
90 	local_irq_restore(flags);
91 
92 	if (!hv_result_success(status))
93 		hv_status_err(status, "\n");
94 
95 	return hv_result_to_errno(status);
96 }
97 
98 #ifdef CONFIG_PCI_MSI
99 struct rid_data {
100 	struct pci_dev *bridge;
101 	u32 rid;
102 };
103 
get_rid_cb(struct pci_dev * pdev,u16 alias,void * data)104 static int get_rid_cb(struct pci_dev *pdev, u16 alias, void *data)
105 {
106 	struct rid_data *rd = data;
107 	u8 bus = PCI_BUS_NUM(rd->rid);
108 
109 	if (pdev->bus->number != bus || PCI_BUS_NUM(alias) != bus) {
110 		rd->bridge = pdev;
111 		rd->rid = alias;
112 	}
113 
114 	return 0;
115 }
116 
hv_build_pci_dev_id(struct pci_dev * dev)117 static union hv_device_id hv_build_pci_dev_id(struct pci_dev *dev)
118 {
119 	union hv_device_id dev_id;
120 	struct rid_data data = {
121 		.bridge = NULL,
122 		.rid = PCI_DEVID(dev->bus->number, dev->devfn)
123 	};
124 
125 	pci_for_each_dma_alias(dev, get_rid_cb, &data);
126 
127 	dev_id.as_uint64 = 0;
128 	dev_id.device_type = HV_DEVICE_TYPE_PCI;
129 	dev_id.pci.segment = pci_domain_nr(dev->bus);
130 
131 	dev_id.pci.bdf.bus = PCI_BUS_NUM(data.rid);
132 	dev_id.pci.bdf.device = PCI_SLOT(data.rid);
133 	dev_id.pci.bdf.function = PCI_FUNC(data.rid);
134 	dev_id.pci.source_shadow = HV_SOURCE_SHADOW_NONE;
135 
136 	if (data.bridge) {
137 		int pos;
138 
139 		/*
140 		 * Microsoft Hypervisor requires a bus range when the bridge is
141 		 * running in PCI-X mode.
142 		 *
143 		 * To distinguish conventional vs PCI-X bridge, we can check
144 		 * the bridge's PCI-X Secondary Status Register, Secondary Bus
145 		 * Mode and Frequency bits. See PCI Express to PCI/PCI-X Bridge
146 		 * Specification Revision 1.0 5.2.2.1.3.
147 		 *
148 		 * Value zero means it is in conventional mode, otherwise it is
149 		 * in PCI-X mode.
150 		 */
151 
152 		pos = pci_find_capability(data.bridge, PCI_CAP_ID_PCIX);
153 		if (pos) {
154 			u16 status;
155 
156 			pci_read_config_word(data.bridge, pos +
157 					PCI_X_BRIDGE_SSTATUS, &status);
158 
159 			if (status & PCI_X_SSTATUS_FREQ) {
160 				/* Non-zero, PCI-X mode */
161 				u8 sec_bus, sub_bus;
162 
163 				dev_id.pci.source_shadow = HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE;
164 
165 				pci_read_config_byte(data.bridge, PCI_SECONDARY_BUS, &sec_bus);
166 				dev_id.pci.shadow_bus_range.secondary_bus = sec_bus;
167 				pci_read_config_byte(data.bridge, PCI_SUBORDINATE_BUS, &sub_bus);
168 				dev_id.pci.shadow_bus_range.subordinate_bus = sub_bus;
169 			}
170 		}
171 	}
172 
173 	return dev_id;
174 }
175 
176 /**
177  * hv_map_msi_interrupt() - "Map" the MSI IRQ in the hypervisor.
178  * @data:      Describes the IRQ
179  * @out_entry: Hypervisor (MSI) interrupt entry (can be NULL)
180  *
181  * Map the IRQ in the hypervisor by issuing a MAP_DEVICE_INTERRUPT hypercall.
182  *
183  * Return: 0 on success, -errno on failure
184  */
hv_map_msi_interrupt(struct irq_data * data,struct hv_interrupt_entry * out_entry)185 int hv_map_msi_interrupt(struct irq_data *data,
186 			 struct hv_interrupt_entry *out_entry)
187 {
188 	struct irq_cfg *cfg = irqd_cfg(data);
189 	struct hv_interrupt_entry dummy;
190 	union hv_device_id device_id;
191 	struct msi_desc *msidesc;
192 	struct pci_dev *dev;
193 	int cpu;
194 
195 	msidesc = irq_data_get_msi_desc(data);
196 	dev = msi_desc_to_pci_dev(msidesc);
197 	device_id = hv_build_pci_dev_id(dev);
198 	cpu = cpumask_first(irq_data_get_effective_affinity_mask(data));
199 
200 	return hv_map_interrupt(device_id, false, cpu, cfg->vector,
201 				out_entry ? out_entry : &dummy);
202 }
203 EXPORT_SYMBOL_GPL(hv_map_msi_interrupt);
204 
entry_to_msi_msg(struct hv_interrupt_entry * entry,struct msi_msg * msg)205 static inline void entry_to_msi_msg(struct hv_interrupt_entry *entry, struct msi_msg *msg)
206 {
207 	/* High address is always 0 */
208 	msg->address_hi = 0;
209 	msg->address_lo = entry->msi_entry.address.as_uint32;
210 	msg->data = entry->msi_entry.data.as_uint32;
211 }
212 
213 static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry);
hv_irq_compose_msi_msg(struct irq_data * data,struct msi_msg * msg)214 static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
215 {
216 	struct hv_interrupt_entry *stored_entry;
217 	struct irq_cfg *cfg = irqd_cfg(data);
218 	struct msi_desc *msidesc;
219 	struct pci_dev *dev;
220 	int ret;
221 
222 	msidesc = irq_data_get_msi_desc(data);
223 	dev = msi_desc_to_pci_dev(msidesc);
224 
225 	if (!cfg) {
226 		pr_debug("%s: cfg is NULL", __func__);
227 		return;
228 	}
229 
230 	if (data->chip_data) {
231 		/*
232 		 * This interrupt is already mapped. Let's unmap first.
233 		 *
234 		 * We don't use retarget interrupt hypercalls here because
235 		 * Microsoft Hypervisor doesn't allow root to change the vector
236 		 * or specify VPs outside of the set that is initially used
237 		 * during mapping.
238 		 */
239 		stored_entry = data->chip_data;
240 		data->chip_data = NULL;
241 
242 		ret = hv_unmap_msi_interrupt(dev, stored_entry);
243 
244 		kfree(stored_entry);
245 
246 		if (ret)
247 			return;
248 	}
249 
250 	stored_entry = kzalloc(sizeof(*stored_entry), GFP_ATOMIC);
251 	if (!stored_entry) {
252 		pr_debug("%s: failed to allocate chip data\n", __func__);
253 		return;
254 	}
255 
256 	ret = hv_map_msi_interrupt(data, stored_entry);
257 	if (ret) {
258 		kfree(stored_entry);
259 		return;
260 	}
261 
262 	data->chip_data = stored_entry;
263 	entry_to_msi_msg(data->chip_data, msg);
264 
265 	return;
266 }
267 
hv_unmap_msi_interrupt(struct pci_dev * dev,struct hv_interrupt_entry * old_entry)268 static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry)
269 {
270 	return hv_unmap_interrupt(hv_build_pci_dev_id(dev).as_uint64, old_entry);
271 }
272 
hv_teardown_msi_irq(struct pci_dev * dev,struct irq_data * irqd)273 static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd)
274 {
275 	struct hv_interrupt_entry old_entry;
276 	struct msi_msg msg;
277 
278 	if (!irqd->chip_data) {
279 		pr_debug("%s: no chip data\n!", __func__);
280 		return;
281 	}
282 
283 	old_entry = *(struct hv_interrupt_entry *)irqd->chip_data;
284 	entry_to_msi_msg(&old_entry, &msg);
285 
286 	kfree(irqd->chip_data);
287 	irqd->chip_data = NULL;
288 
289 	(void)hv_unmap_msi_interrupt(dev, &old_entry);
290 }
291 
hv_msi_free_irq(struct irq_domain * domain,struct msi_domain_info * info,unsigned int virq)292 static void hv_msi_free_irq(struct irq_domain *domain,
293 			    struct msi_domain_info *info, unsigned int virq)
294 {
295 	struct irq_data *irqd = irq_get_irq_data(virq);
296 	struct msi_desc *desc;
297 
298 	if (!irqd)
299 		return;
300 
301 	desc = irq_data_get_msi_desc(irqd);
302 	if (!desc || !desc->irq || WARN_ON_ONCE(!dev_is_pci(desc->dev)))
303 		return;
304 
305 	hv_teardown_msi_irq(to_pci_dev(desc->dev), irqd);
306 }
307 
308 /*
309  * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
310  * which implement the MSI or MSI-X Capability Structure.
311  */
312 static struct irq_chip hv_pci_msi_controller = {
313 	.name			= "HV-PCI-MSI",
314 	.irq_unmask		= pci_msi_unmask_irq,
315 	.irq_mask		= pci_msi_mask_irq,
316 	.irq_ack		= irq_chip_ack_parent,
317 	.irq_retrigger		= irq_chip_retrigger_hierarchy,
318 	.irq_compose_msi_msg	= hv_irq_compose_msi_msg,
319 	.irq_set_affinity	= msi_domain_set_affinity,
320 	.flags			= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MOVE_DEFERRED,
321 };
322 
323 static struct msi_domain_ops pci_msi_domain_ops = {
324 	.msi_free		= hv_msi_free_irq,
325 	.msi_prepare		= pci_msi_prepare,
326 };
327 
328 static struct msi_domain_info hv_pci_msi_domain_info = {
329 	.flags		= MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
330 			  MSI_FLAG_PCI_MSIX,
331 	.ops		= &pci_msi_domain_ops,
332 	.chip		= &hv_pci_msi_controller,
333 	.handler	= handle_edge_irq,
334 	.handler_name	= "edge",
335 };
336 
hv_create_pci_msi_domain(void)337 struct irq_domain * __init hv_create_pci_msi_domain(void)
338 {
339 	struct irq_domain *d = NULL;
340 	struct fwnode_handle *fn;
341 
342 	fn = irq_domain_alloc_named_fwnode("HV-PCI-MSI");
343 	if (fn)
344 		d = pci_msi_create_irq_domain(fn, &hv_pci_msi_domain_info, x86_vector_domain);
345 
346 	/* No point in going further if we can't get an irq domain */
347 	BUG_ON(!d);
348 
349 	return d;
350 }
351 
352 #endif /* CONFIG_PCI_MSI */
353 
hv_unmap_ioapic_interrupt(int ioapic_id,struct hv_interrupt_entry * entry)354 int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry)
355 {
356 	union hv_device_id device_id;
357 
358 	device_id.as_uint64 = 0;
359 	device_id.device_type = HV_DEVICE_TYPE_IOAPIC;
360 	device_id.ioapic.ioapic_id = (u8)ioapic_id;
361 
362 	return hv_unmap_interrupt(device_id.as_uint64, entry);
363 }
364 EXPORT_SYMBOL_GPL(hv_unmap_ioapic_interrupt);
365 
hv_map_ioapic_interrupt(int ioapic_id,bool level,int cpu,int vector,struct hv_interrupt_entry * entry)366 int hv_map_ioapic_interrupt(int ioapic_id, bool level, int cpu, int vector,
367 		struct hv_interrupt_entry *entry)
368 {
369 	union hv_device_id device_id;
370 
371 	device_id.as_uint64 = 0;
372 	device_id.device_type = HV_DEVICE_TYPE_IOAPIC;
373 	device_id.ioapic.ioapic_id = (u8)ioapic_id;
374 
375 	return hv_map_interrupt(device_id, level, cpu, vector, entry);
376 }
377 EXPORT_SYMBOL_GPL(hv_map_ioapic_interrupt);
378