xref: /linux/arch/x86/pci/xen.c (revision 7b49a3fb69e785a2425c8dc7dbd0779a0a4c0eb2)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Xen PCI - handle PCI (INTx) and MSI infrastructure calls for PV, HVM and
4  * initial domain support. We also handle the DSDT _PRT callbacks for GSI's
5  * used in HVM and initial domain mode (PV does not parse ACPI, so it has no
6  * concept of GSIs). Under PV we hook under the pnbbios API for IRQs and
7  * 0xcf8 PCI configuration read/write.
8  *
9  *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
10  *           Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
11  *           Stefano Stabellini <stefano.stabellini@eu.citrix.com>
12  */
13 #include <linux/export.h>
14 #include <linux/init.h>
15 #include <linux/pci.h>
16 #include <linux/acpi.h>
17 
18 #include <linux/io.h>
19 #include <asm/io_apic.h>
20 #include <asm/pci_x86.h>
21 #include <asm/cpuid/api.h>
22 
23 #include <asm/xen/hypervisor.h>
24 
25 #include <xen/features.h>
26 #include <xen/events.h>
27 #include <xen/pci.h>
28 #include <asm/xen/pci.h>
29 #include <asm/xen/cpuid.h>
30 #include <asm/apic.h>
31 #include <asm/acpi.h>
32 #include <asm/i8259.h>
33 
34 static int xen_pcifront_enable_irq(struct pci_dev *dev)
35 {
36 	int rc;
37 	int share = 1;
38 	int pirq;
39 	u8 gsi;
40 
41 	rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi);
42 	if (rc) {
43 		dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n",
44 			 rc);
45 		return pcibios_err_to_errno(rc);
46 	}
47 	/* In PV DomU the Xen PCI backend puts the PIRQ in the interrupt line.*/
48 	pirq = gsi;
49 
50 	if (gsi < nr_legacy_irqs())
51 		share = 0;
52 
53 	rc = xen_bind_pirq_gsi_to_irq(gsi, pirq, share, "pcifront");
54 	if (rc < 0) {
55 		dev_warn(&dev->dev, "Xen PCI: failed to bind GSI%d (PIRQ%d) to IRQ: %d\n",
56 			 gsi, pirq, rc);
57 		return rc;
58 	}
59 
60 	dev->irq = rc;
61 	dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq);
62 	return 0;
63 }
64 
65 #ifdef CONFIG_ACPI
66 static int xen_register_pirq(u32 gsi, int triggering, bool set_pirq)
67 {
68 	int rc, pirq = -1, irq;
69 	struct physdev_map_pirq map_irq;
70 	int shareable = 0;
71 	char *name;
72 
73 	irq = xen_irq_from_gsi(gsi);
74 	if (irq > 0)
75 		return irq;
76 
77 	if (set_pirq)
78 		pirq = gsi;
79 
80 	map_irq.domid = DOMID_SELF;
81 	map_irq.type = MAP_PIRQ_TYPE_GSI;
82 	map_irq.index = gsi;
83 	map_irq.pirq = pirq;
84 
85 	rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
86 	if (rc) {
87 		printk(KERN_WARNING "xen map irq failed %d\n", rc);
88 		return -1;
89 	}
90 
91 	if (triggering == ACPI_EDGE_SENSITIVE) {
92 		shareable = 0;
93 		name = "ioapic-edge";
94 	} else {
95 		shareable = 1;
96 		name = "ioapic-level";
97 	}
98 
99 	irq = xen_bind_pirq_gsi_to_irq(gsi, map_irq.pirq, shareable, name);
100 	if (irq < 0)
101 		goto out;
102 
103 	printk(KERN_DEBUG "xen: --> pirq=%d -> irq=%d (gsi=%d)\n", map_irq.pirq, irq, gsi);
104 out:
105 	return irq;
106 }
107 
108 static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
109 				     int trigger, int polarity)
110 {
111 	if (!xen_hvm_domain())
112 		return -1;
113 
114 	return xen_register_pirq(gsi, trigger,
115 				 false /* no mapping of GSI to PIRQ */);
116 }
117 
118 #ifdef CONFIG_XEN_PV_DOM0
119 static int xen_register_gsi(u32 gsi, int triggering, int polarity)
120 {
121 	int rc, irq;
122 	struct physdev_setup_gsi setup_gsi;
123 
124 	if (!xen_pv_domain())
125 		return -1;
126 
127 	printk(KERN_DEBUG "xen: registering gsi %u triggering %d polarity %d\n",
128 			gsi, triggering, polarity);
129 
130 	irq = xen_register_pirq(gsi, triggering, true);
131 
132 	setup_gsi.gsi = gsi;
133 	setup_gsi.triggering = (triggering == ACPI_EDGE_SENSITIVE ? 0 : 1);
134 	setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
135 
136 	rc = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi);
137 	if (rc == -EEXIST)
138 		printk(KERN_INFO "Already setup the GSI :%d\n", gsi);
139 	else if (rc) {
140 		printk(KERN_ERR "Failed to setup GSI :%d, err_code:%d\n",
141 				gsi, rc);
142 	}
143 
144 	return irq;
145 }
146 
147 static int acpi_register_gsi_xen(struct device *dev, u32 gsi,
148 				 int trigger, int polarity)
149 {
150 	return xen_register_gsi(gsi, trigger, polarity);
151 }
152 #endif
153 #endif
154 
155 #if defined(CONFIG_PCI_MSI)
156 #include <linux/msi.h>
157 
158 struct xen_pci_frontend_ops *xen_pci_frontend;
159 EXPORT_SYMBOL_GPL(xen_pci_frontend);
160 
161 struct xen_msi_ops {
162 	int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type);
163 	void (*teardown_msi_irqs)(struct pci_dev *dev);
164 };
165 
166 static struct xen_msi_ops xen_msi_ops __ro_after_init;
167 
168 static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
169 {
170 	int irq, ret, i;
171 	struct msi_desc *msidesc;
172 	int *v;
173 
174 	if (type == PCI_CAP_ID_MSI && nvec > 1)
175 		return 1;
176 
177 	v = kzalloc_objs(int, max(1, nvec));
178 	if (!v)
179 		return -ENOMEM;
180 
181 	if (type == PCI_CAP_ID_MSIX)
182 		ret = xen_pci_frontend_enable_msix(dev, v, nvec);
183 	else
184 		ret = xen_pci_frontend_enable_msi(dev, v);
185 	if (ret)
186 		goto error;
187 	i = 0;
188 	msi_for_each_desc(msidesc, &dev->dev, MSI_DESC_NOTASSOCIATED) {
189 		irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i],
190 					       (type == PCI_CAP_ID_MSI) ? nvec : 1,
191 					       (type == PCI_CAP_ID_MSIX) ?
192 					       "pcifront-msi-x" :
193 					       "pcifront-msi",
194 						DOMID_SELF);
195 		if (irq < 0) {
196 			ret = irq;
197 			goto free;
198 		}
199 		i++;
200 	}
201 	kfree(v);
202 	return msi_device_populate_sysfs(&dev->dev);
203 
204 error:
205 	if (ret == -ENOSYS)
206 		dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n");
207 	else if (ret)
208 		dev_err(&dev->dev, "Xen PCI frontend error: %d!\n", ret);
209 free:
210 	kfree(v);
211 	return ret;
212 }
213 
214 static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq,
215 		struct msi_msg *msg)
216 {
217 	/*
218 	 * We set vector == 0 to tell the hypervisor we don't care about
219 	 * it, but we want a pirq setup instead.  We use the dest_id fields
220 	 * to pass the pirq that we want.
221 	 */
222 	memset(msg, 0, sizeof(*msg));
223 	msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH;
224 	msg->arch_addr_hi.destid_8_31 = pirq >> 8;
225 	msg->arch_addr_lo.destid_0_7 = pirq & 0xFF;
226 	msg->arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW;
227 	msg->arch_data.delivery_mode = APIC_DELIVERY_MODE_EXTINT;
228 }
229 
230 static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
231 {
232 	int irq, pirq;
233 	struct msi_desc *msidesc;
234 	struct msi_msg msg;
235 
236 	if (type == PCI_CAP_ID_MSI && nvec > 1)
237 		return 1;
238 
239 	msi_for_each_desc(msidesc, &dev->dev, MSI_DESC_NOTASSOCIATED) {
240 		pirq = xen_allocate_pirq_msi(dev, msidesc);
241 		if (pirq < 0) {
242 			irq = -ENODEV;
243 			goto error;
244 		}
245 		xen_msi_compose_msg(dev, pirq, &msg);
246 		__pci_write_msi_msg(msidesc, &msg);
247 		dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq);
248 		irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq,
249 					       (type == PCI_CAP_ID_MSI) ? nvec : 1,
250 					       (type == PCI_CAP_ID_MSIX) ?
251 					       "msi-x" : "msi",
252 					       DOMID_SELF);
253 		if (irq < 0)
254 			goto error;
255 		dev_dbg(&dev->dev,
256 			"xen: msi --> pirq=%d --> irq=%d\n", pirq, irq);
257 	}
258 	return msi_device_populate_sysfs(&dev->dev);
259 
260 error:
261 	dev_err(&dev->dev, "Failed to create MSI%s! ret=%d!\n",
262 		type == PCI_CAP_ID_MSI ? "" : "-X", irq);
263 	return irq;
264 }
265 
266 #ifdef CONFIG_XEN_PV_DOM0
267 static bool __read_mostly pci_seg_supported = true;
268 
269 static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
270 {
271 	int ret = 0;
272 	struct msi_desc *msidesc;
273 
274 	msi_for_each_desc(msidesc, &dev->dev, MSI_DESC_NOTASSOCIATED) {
275 		struct physdev_map_pirq map_irq;
276 		domid_t domid;
277 
278 		domid = ret = xen_find_device_domain_owner(dev);
279 		/* N.B. Casting int's -ENODEV to uint16_t results in 0xFFED,
280 		 * hence check ret value for < 0. */
281 		if (ret < 0)
282 			domid = DOMID_SELF;
283 
284 		memset(&map_irq, 0, sizeof(map_irq));
285 		map_irq.domid = domid;
286 		map_irq.type = MAP_PIRQ_TYPE_MSI_SEG;
287 		map_irq.index = -1;
288 		map_irq.pirq = -1;
289 		map_irq.bus = dev->bus->number |
290 			      (pci_domain_nr(dev->bus) << 16);
291 		map_irq.devfn = dev->devfn;
292 
293 		if (type == PCI_CAP_ID_MSI && nvec > 1) {
294 			map_irq.type = MAP_PIRQ_TYPE_MULTI_MSI;
295 			map_irq.entry_nr = nvec;
296 		} else if (type == PCI_CAP_ID_MSIX) {
297 			int pos;
298 			unsigned long flags;
299 			u32 table_offset, bir;
300 
301 			pos = dev->msix_cap;
302 			pci_read_config_dword(dev, pos + PCI_MSIX_TABLE,
303 					      &table_offset);
304 			bir = (u8)(table_offset & PCI_MSIX_TABLE_BIR);
305 			flags = pci_resource_flags(dev, bir);
306 			if (!flags || (flags & IORESOURCE_UNSET))
307 				return -EINVAL;
308 
309 			map_irq.table_base = pci_resource_start(dev, bir);
310 			map_irq.entry_nr = msidesc->msi_index;
311 		}
312 
313 		ret = -EINVAL;
314 		if (pci_seg_supported)
315 			ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq,
316 						    &map_irq);
317 		if (type == PCI_CAP_ID_MSI && nvec > 1 && ret) {
318 			/*
319 			 * If MAP_PIRQ_TYPE_MULTI_MSI is not available
320 			 * there's nothing else we can do in this case.
321 			 * Just set ret > 0 so driver can retry with
322 			 * single MSI.
323 			 */
324 			ret = 1;
325 			goto out;
326 		}
327 		if (ret == -EINVAL && !pci_domain_nr(dev->bus)) {
328 			map_irq.type = MAP_PIRQ_TYPE_MSI;
329 			map_irq.index = -1;
330 			map_irq.pirq = -1;
331 			map_irq.bus = dev->bus->number;
332 			ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq,
333 						    &map_irq);
334 			if (ret != -EINVAL)
335 				pci_seg_supported = false;
336 		}
337 		if (ret) {
338 			dev_warn(&dev->dev, "xen map irq failed %d for %d domain\n",
339 				 ret, domid);
340 			goto out;
341 		}
342 
343 		ret = xen_bind_pirq_msi_to_irq(dev, msidesc, map_irq.pirq,
344 		                               (type == PCI_CAP_ID_MSI) ? nvec : 1,
345 		                               (type == PCI_CAP_ID_MSIX) ? "msi-x" : "msi",
346 		                               domid);
347 		if (ret < 0)
348 			goto out;
349 	}
350 	ret = msi_device_populate_sysfs(&dev->dev);
351 out:
352 	return ret;
353 }
354 
355 bool xen_initdom_restore_msi(struct pci_dev *dev)
356 {
357 	int ret = 0;
358 
359 	if (!xen_initial_domain())
360 		return true;
361 
362 	if (pci_seg_supported) {
363 		struct physdev_pci_device restore_ext;
364 
365 		restore_ext.seg = pci_domain_nr(dev->bus);
366 		restore_ext.bus = dev->bus->number;
367 		restore_ext.devfn = dev->devfn;
368 		ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi_ext,
369 					&restore_ext);
370 		if (ret == -ENOSYS)
371 			pci_seg_supported = false;
372 		WARN(ret && ret != -ENOSYS, "restore_msi_ext -> %d\n", ret);
373 	}
374 	if (!pci_seg_supported) {
375 		struct physdev_restore_msi restore;
376 
377 		restore.bus = dev->bus->number;
378 		restore.devfn = dev->devfn;
379 		ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi, &restore);
380 		WARN(ret && ret != -ENOSYS, "restore_msi -> %d\n", ret);
381 	}
382 	return false;
383 }
384 #else /* CONFIG_XEN_PV_DOM0 */
385 #define xen_initdom_setup_msi_irqs	NULL
386 #endif /* !CONFIG_XEN_PV_DOM0 */
387 
388 static void xen_teardown_msi_irqs(struct pci_dev *dev)
389 {
390 	struct msi_desc *msidesc;
391 	int i;
392 
393 	msi_for_each_desc(msidesc, &dev->dev, MSI_DESC_ASSOCIATED) {
394 		for (i = 0; i < msidesc->nvec_used; i++)
395 			xen_destroy_irq(msidesc->irq + i);
396 		msidesc->irq = 0;
397 	}
398 
399 	msi_device_destroy_sysfs(&dev->dev);
400 }
401 
402 static void xen_pv_teardown_msi_irqs(struct pci_dev *dev)
403 {
404 	if (dev->msix_enabled)
405 		xen_pci_frontend_disable_msix(dev);
406 	else
407 		xen_pci_frontend_disable_msi(dev);
408 
409 	xen_teardown_msi_irqs(dev);
410 }
411 
412 static int xen_msi_domain_alloc_irqs(struct irq_domain *domain,
413 				     struct device *dev,  int nvec)
414 {
415 	int type;
416 
417 	if (WARN_ON_ONCE(!dev_is_pci(dev)))
418 		return -EINVAL;
419 
420 	type = to_pci_dev(dev)->msix_enabled ? PCI_CAP_ID_MSIX : PCI_CAP_ID_MSI;
421 
422 	return xen_msi_ops.setup_msi_irqs(to_pci_dev(dev), nvec, type);
423 }
424 
425 static void xen_msi_domain_free_irqs(struct irq_domain *domain,
426 				     struct device *dev)
427 {
428 	if (WARN_ON_ONCE(!dev_is_pci(dev)))
429 		return;
430 
431 	xen_msi_ops.teardown_msi_irqs(to_pci_dev(dev));
432 }
433 
434 static struct msi_domain_ops xen_pci_msi_domain_ops = {
435 	.domain_alloc_irqs	= xen_msi_domain_alloc_irqs,
436 	.domain_free_irqs	= xen_msi_domain_free_irqs,
437 };
438 
439 static struct msi_domain_info xen_pci_msi_domain_info = {
440 	.flags			= MSI_FLAG_PCI_MSIX | MSI_FLAG_FREE_MSI_DESCS |
441 				  MSI_FLAG_DEV_SYSFS | MSI_FLAG_NO_MASK,
442 	.ops			= &xen_pci_msi_domain_ops,
443 };
444 
445 /*
446  * This irq domain is a blatant violation of the irq domain design, but
447  * distangling XEN into real irq domains is not a job for mere mortals with
448  * limited XENology. But it's the least dangerous way for a mere mortal to
449  * get rid of the arch_*_msi_irqs() hackery in order to store the irq
450  * domain pointer in struct device. This irq domain wrappery allows to do
451  * that without breaking XEN terminally.
452  */
453 static __init struct irq_domain *xen_create_pci_msi_domain(void)
454 {
455 	struct irq_domain *d = NULL;
456 	struct fwnode_handle *fn;
457 
458 	fn = irq_domain_alloc_named_fwnode("XEN-MSI");
459 	if (fn)
460 		d = msi_create_irq_domain(fn, &xen_pci_msi_domain_info, NULL);
461 
462 	/* FIXME: No idea how to survive if this fails */
463 	BUG_ON(!d);
464 
465 	return d;
466 }
467 
468 static __init void xen_setup_pci_msi(void)
469 {
470 	if (xen_pv_domain()) {
471 		if (xen_initial_domain())
472 			xen_msi_ops.setup_msi_irqs = xen_initdom_setup_msi_irqs;
473 		else
474 			xen_msi_ops.setup_msi_irqs = xen_setup_msi_irqs;
475 		xen_msi_ops.teardown_msi_irqs = xen_pv_teardown_msi_irqs;
476 	} else if (xen_hvm_domain()) {
477 		xen_msi_ops.setup_msi_irqs = xen_hvm_setup_msi_irqs;
478 		xen_msi_ops.teardown_msi_irqs = xen_teardown_msi_irqs;
479 	} else {
480 		WARN_ON_ONCE(1);
481 		return;
482 	}
483 
484 	/*
485 	 * Override the PCI/MSI irq domain init function. No point
486 	 * in allocating the native domain and never use it.
487 	 */
488 	x86_init.irqs.create_pci_msi_domain = xen_create_pci_msi_domain;
489 }
490 
491 #else /* CONFIG_PCI_MSI */
492 static inline void xen_setup_pci_msi(void) { }
493 #endif /* CONFIG_PCI_MSI */
494 
495 int __init pci_xen_init(void)
496 {
497 	if (!xen_pv_domain() || xen_initial_domain())
498 		return -ENODEV;
499 
500 	printk(KERN_INFO "PCI: setting up Xen PCI frontend stub\n");
501 
502 	pcibios_set_cache_line_size();
503 
504 	pcibios_enable_irq = xen_pcifront_enable_irq;
505 	pcibios_disable_irq = NULL;
506 
507 	/* Keep ACPI out of the picture */
508 	acpi_noirq_set();
509 
510 	xen_setup_pci_msi();
511 	return 0;
512 }
513 
514 #ifdef CONFIG_PCI_MSI
515 static void __init xen_hvm_msi_init(void)
516 {
517 	if (!apic_is_disabled) {
518 		/*
519 		 * If hardware supports (x2)APIC virtualization (as indicated
520 		 * by hypervisor's leaf 4) then we don't need to use pirqs/
521 		 * event channels for MSI handling and instead use regular
522 		 * APIC processing
523 		 */
524 		uint32_t eax = cpuid_eax(xen_cpuid_base() + 4);
525 
526 		if (((eax & XEN_HVM_CPUID_X2APIC_VIRT) && x2apic_mode) ||
527 		    ((eax & XEN_HVM_CPUID_APIC_ACCESS_VIRT) && boot_cpu_has(X86_FEATURE_APIC)))
528 			return;
529 	}
530 	xen_setup_pci_msi();
531 }
532 #endif
533 
534 int __init pci_xen_hvm_init(void)
535 {
536 	if (!xen_have_vector_callback || !xen_feature(XENFEAT_hvm_pirqs))
537 		return 0;
538 
539 #ifdef CONFIG_ACPI
540 	/*
541 	 * We don't want to change the actual ACPI delivery model,
542 	 * just how GSIs get registered.
543 	 */
544 	__acpi_register_gsi = acpi_register_gsi_xen_hvm;
545 	__acpi_unregister_gsi = NULL;
546 #endif
547 
548 #ifdef CONFIG_PCI_MSI
549 	/*
550 	 * We need to wait until after x2apic is initialized
551 	 * before we can set MSI IRQ ops.
552 	 */
553 	x86_platform.apic_post_init = xen_hvm_msi_init;
554 #endif
555 	return 0;
556 }
557 
558 #ifdef CONFIG_XEN_PV_DOM0
559 int __init pci_xen_initial_domain(void)
560 {
561 	int irq;
562 
563 	xen_setup_pci_msi();
564 	__acpi_register_gsi = acpi_register_gsi_xen;
565 	__acpi_unregister_gsi = NULL;
566 	/*
567 	 * Pre-allocate the legacy IRQs.  Use NR_LEGACY_IRQS here
568 	 * because we don't have a PIC and thus nr_legacy_irqs() is zero.
569 	 */
570 	for (irq = 0; irq < NR_IRQS_LEGACY; irq++) {
571 		int trigger, polarity;
572 
573 		if (acpi_get_override_irq(irq, &trigger, &polarity) == -1)
574 			continue;
575 
576 		xen_register_pirq(irq,
577 			trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE,
578 			true /* Map GSI to PIRQ */);
579 	}
580 	if (0 == nr_ioapics) {
581 		for (irq = 0; irq < nr_legacy_irqs(); irq++)
582 			xen_bind_pirq_gsi_to_irq(irq, irq, 0, "xt-pic");
583 	}
584 	return 0;
585 }
586 #endif
587 
588