xref: /linux/arch/um/drivers/virt-pci.c (revision 21ab7031cbff8c6b6f608234e18ffe0473e98f9d)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2020 Intel Corporation
4  * Author: Johannes Berg <johannes@sipsolutions.net>
5  */
6 #include <linux/module.h>
7 #include <linux/pci.h>
8 #include <linux/virtio.h>
9 #include <linux/virtio_config.h>
10 #include <linux/logic_iomem.h>
11 #include <linux/irqdomain.h>
12 #include <linux/virtio_pcidev.h>
13 #include <linux/virtio-uml.h>
14 #include <linux/delay.h>
15 #include <linux/msi.h>
16 #include <asm/unaligned.h>
17 #include <irq_kern.h>
18 
19 #define MAX_DEVICES 8
20 #define MAX_MSI_VECTORS 32
21 #define CFG_SPACE_SIZE 4096
22 
23 /* for MSI-X we have a 32-bit payload */
24 #define MAX_IRQ_MSG_SIZE (sizeof(struct virtio_pcidev_msg) + sizeof(u32))
25 #define NUM_IRQ_MSGS	10
26 
27 #define HANDLE_NO_FREE(ptr) ((void *)((unsigned long)(ptr) | 1))
28 #define HANDLE_IS_NO_FREE(ptr) ((unsigned long)(ptr) & 1)
29 
30 struct um_pci_device {
31 	struct virtio_device *vdev;
32 
33 	/* for now just standard BARs */
34 	u8 resptr[PCI_STD_NUM_BARS];
35 
36 	struct virtqueue *cmd_vq, *irq_vq;
37 
38 #define UM_PCI_STAT_WAITING	0
39 	unsigned long status;
40 
41 	int irq;
42 };
43 
44 struct um_pci_device_reg {
45 	struct um_pci_device *dev;
46 	void __iomem *iomem;
47 };
48 
49 static struct pci_host_bridge *bridge;
50 static DEFINE_MUTEX(um_pci_mtx);
51 static struct um_pci_device_reg um_pci_devices[MAX_DEVICES];
52 static struct fwnode_handle *um_pci_fwnode;
53 static struct irq_domain *um_pci_inner_domain;
54 static struct irq_domain *um_pci_msi_domain;
55 static unsigned long um_pci_msi_used[BITS_TO_LONGS(MAX_MSI_VECTORS)];
56 
57 #define UM_VIRT_PCI_MAXDELAY 40000
58 
59 struct um_pci_message_buffer {
60 	struct virtio_pcidev_msg hdr;
61 	u8 data[8];
62 };
63 
64 static struct um_pci_message_buffer __percpu *um_pci_msg_bufs;
65 
66 static int um_pci_send_cmd(struct um_pci_device *dev,
67 			   struct virtio_pcidev_msg *cmd,
68 			   unsigned int cmd_size,
69 			   const void *extra, unsigned int extra_size,
70 			   void *out, unsigned int out_size)
71 {
72 	struct scatterlist out_sg, extra_sg, in_sg;
73 	struct scatterlist *sgs_list[] = {
74 		[0] = &out_sg,
75 		[1] = extra ? &extra_sg : &in_sg,
76 		[2] = extra ? &in_sg : NULL,
77 	};
78 	struct um_pci_message_buffer *buf;
79 	int delay_count = 0;
80 	int ret, len;
81 	bool posted;
82 
83 	if (WARN_ON(cmd_size < sizeof(*cmd) || cmd_size > sizeof(*buf)))
84 		return -EINVAL;
85 
86 	switch (cmd->op) {
87 	case VIRTIO_PCIDEV_OP_CFG_WRITE:
88 	case VIRTIO_PCIDEV_OP_MMIO_WRITE:
89 	case VIRTIO_PCIDEV_OP_MMIO_MEMSET:
90 		/* in PCI, writes are posted, so don't wait */
91 		posted = !out;
92 		WARN_ON(!posted);
93 		break;
94 	default:
95 		posted = false;
96 		break;
97 	}
98 
99 	buf = get_cpu_var(um_pci_msg_bufs);
100 	if (buf)
101 		memcpy(buf, cmd, cmd_size);
102 
103 	if (posted) {
104 		u8 *ncmd = kmalloc(cmd_size + extra_size, GFP_ATOMIC);
105 
106 		if (ncmd) {
107 			memcpy(ncmd, cmd, cmd_size);
108 			if (extra)
109 				memcpy(ncmd + cmd_size, extra, extra_size);
110 			cmd = (void *)ncmd;
111 			cmd_size += extra_size;
112 			extra = NULL;
113 			extra_size = 0;
114 		} else {
115 			/* try without allocating memory */
116 			posted = false;
117 			cmd = (void *)buf;
118 		}
119 	} else {
120 		cmd = (void *)buf;
121 	}
122 
123 	sg_init_one(&out_sg, cmd, cmd_size);
124 	if (extra)
125 		sg_init_one(&extra_sg, extra, extra_size);
126 	if (out)
127 		sg_init_one(&in_sg, out, out_size);
128 
129 	/* add to internal virtio queue */
130 	ret = virtqueue_add_sgs(dev->cmd_vq, sgs_list,
131 				extra ? 2 : 1,
132 				out ? 1 : 0,
133 				posted ? cmd : HANDLE_NO_FREE(cmd),
134 				GFP_ATOMIC);
135 	if (ret)
136 		goto out;
137 
138 	if (posted) {
139 		virtqueue_kick(dev->cmd_vq);
140 		ret = 0;
141 		goto out;
142 	}
143 
144 	/* kick and poll for getting a response on the queue */
145 	set_bit(UM_PCI_STAT_WAITING, &dev->status);
146 	virtqueue_kick(dev->cmd_vq);
147 
148 	while (1) {
149 		void *completed = virtqueue_get_buf(dev->cmd_vq, &len);
150 
151 		if (completed == HANDLE_NO_FREE(cmd))
152 			break;
153 
154 		if (completed && !HANDLE_IS_NO_FREE(completed))
155 			kfree(completed);
156 
157 		if (WARN_ONCE(virtqueue_is_broken(dev->cmd_vq) ||
158 			      ++delay_count > UM_VIRT_PCI_MAXDELAY,
159 			      "um virt-pci delay: %d", delay_count)) {
160 			ret = -EIO;
161 			break;
162 		}
163 		udelay(1);
164 	}
165 	clear_bit(UM_PCI_STAT_WAITING, &dev->status);
166 
167 out:
168 	put_cpu_var(um_pci_msg_bufs);
169 	return ret;
170 }
171 
172 static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset,
173 					  int size)
174 {
175 	struct um_pci_device_reg *reg = priv;
176 	struct um_pci_device *dev = reg->dev;
177 	struct virtio_pcidev_msg hdr = {
178 		.op = VIRTIO_PCIDEV_OP_CFG_READ,
179 		.size = size,
180 		.addr = offset,
181 	};
182 	/* buf->data is maximum size - we may only use parts of it */
183 	struct um_pci_message_buffer *buf;
184 	u8 *data;
185 	unsigned long ret = ULONG_MAX;
186 	size_t bytes = sizeof(buf->data);
187 
188 	if (!dev)
189 		return ULONG_MAX;
190 
191 	buf = get_cpu_var(um_pci_msg_bufs);
192 	data = buf->data;
193 
194 	if (buf)
195 		memset(data, 0xff, bytes);
196 
197 	switch (size) {
198 	case 1:
199 	case 2:
200 	case 4:
201 #ifdef CONFIG_64BIT
202 	case 8:
203 #endif
204 		break;
205 	default:
206 		WARN(1, "invalid config space read size %d\n", size);
207 		goto out;
208 	}
209 
210 	if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, bytes))
211 		goto out;
212 
213 	switch (size) {
214 	case 1:
215 		ret = data[0];
216 		break;
217 	case 2:
218 		ret = le16_to_cpup((void *)data);
219 		break;
220 	case 4:
221 		ret = le32_to_cpup((void *)data);
222 		break;
223 #ifdef CONFIG_64BIT
224 	case 8:
225 		ret = le64_to_cpup((void *)data);
226 		break;
227 #endif
228 	default:
229 		break;
230 	}
231 
232 out:
233 	put_cpu_var(um_pci_msg_bufs);
234 	return ret;
235 }
236 
237 static void um_pci_cfgspace_write(void *priv, unsigned int offset, int size,
238 				  unsigned long val)
239 {
240 	struct um_pci_device_reg *reg = priv;
241 	struct um_pci_device *dev = reg->dev;
242 	struct {
243 		struct virtio_pcidev_msg hdr;
244 		/* maximum size - we may only use parts of it */
245 		u8 data[8];
246 	} msg = {
247 		.hdr = {
248 			.op = VIRTIO_PCIDEV_OP_CFG_WRITE,
249 			.size = size,
250 			.addr = offset,
251 		},
252 	};
253 
254 	if (!dev)
255 		return;
256 
257 	switch (size) {
258 	case 1:
259 		msg.data[0] = (u8)val;
260 		break;
261 	case 2:
262 		put_unaligned_le16(val, (void *)msg.data);
263 		break;
264 	case 4:
265 		put_unaligned_le32(val, (void *)msg.data);
266 		break;
267 #ifdef CONFIG_64BIT
268 	case 8:
269 		put_unaligned_le64(val, (void *)msg.data);
270 		break;
271 #endif
272 	default:
273 		WARN(1, "invalid config space write size %d\n", size);
274 		return;
275 	}
276 
277 	WARN_ON(um_pci_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0));
278 }
279 
280 static const struct logic_iomem_ops um_pci_device_cfgspace_ops = {
281 	.read = um_pci_cfgspace_read,
282 	.write = um_pci_cfgspace_write,
283 };
284 
285 static void um_pci_bar_copy_from(void *priv, void *buffer,
286 				 unsigned int offset, int size)
287 {
288 	u8 *resptr = priv;
289 	struct um_pci_device *dev = container_of(resptr - *resptr,
290 						 struct um_pci_device,
291 						 resptr[0]);
292 	struct virtio_pcidev_msg hdr = {
293 		.op = VIRTIO_PCIDEV_OP_MMIO_READ,
294 		.bar = *resptr,
295 		.size = size,
296 		.addr = offset,
297 	};
298 
299 	memset(buffer, 0xff, size);
300 
301 	um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, buffer, size);
302 }
303 
304 static unsigned long um_pci_bar_read(void *priv, unsigned int offset,
305 				     int size)
306 {
307 	/* buf->data is maximum size - we may only use parts of it */
308 	struct um_pci_message_buffer *buf;
309 	u8 *data;
310 	unsigned long ret = ULONG_MAX;
311 
312 	buf = get_cpu_var(um_pci_msg_bufs);
313 	data = buf->data;
314 
315 	switch (size) {
316 	case 1:
317 	case 2:
318 	case 4:
319 #ifdef CONFIG_64BIT
320 	case 8:
321 #endif
322 		break;
323 	default:
324 		WARN(1, "invalid config space read size %d\n", size);
325 		goto out;
326 	}
327 
328 	um_pci_bar_copy_from(priv, data, offset, size);
329 
330 	switch (size) {
331 	case 1:
332 		ret = data[0];
333 		break;
334 	case 2:
335 		ret = le16_to_cpup((void *)data);
336 		break;
337 	case 4:
338 		ret = le32_to_cpup((void *)data);
339 		break;
340 #ifdef CONFIG_64BIT
341 	case 8:
342 		ret = le64_to_cpup((void *)data);
343 		break;
344 #endif
345 	default:
346 		break;
347 	}
348 
349 out:
350 	put_cpu_var(um_pci_msg_bufs);
351 	return ret;
352 }
353 
354 static void um_pci_bar_copy_to(void *priv, unsigned int offset,
355 			       const void *buffer, int size)
356 {
357 	u8 *resptr = priv;
358 	struct um_pci_device *dev = container_of(resptr - *resptr,
359 						 struct um_pci_device,
360 						 resptr[0]);
361 	struct virtio_pcidev_msg hdr = {
362 		.op = VIRTIO_PCIDEV_OP_MMIO_WRITE,
363 		.bar = *resptr,
364 		.size = size,
365 		.addr = offset,
366 	};
367 
368 	um_pci_send_cmd(dev, &hdr, sizeof(hdr), buffer, size, NULL, 0);
369 }
370 
371 static void um_pci_bar_write(void *priv, unsigned int offset, int size,
372 			     unsigned long val)
373 {
374 	/* maximum size - we may only use parts of it */
375 	u8 data[8];
376 
377 	switch (size) {
378 	case 1:
379 		data[0] = (u8)val;
380 		break;
381 	case 2:
382 		put_unaligned_le16(val, (void *)data);
383 		break;
384 	case 4:
385 		put_unaligned_le32(val, (void *)data);
386 		break;
387 #ifdef CONFIG_64BIT
388 	case 8:
389 		put_unaligned_le64(val, (void *)data);
390 		break;
391 #endif
392 	default:
393 		WARN(1, "invalid config space write size %d\n", size);
394 		return;
395 	}
396 
397 	um_pci_bar_copy_to(priv, offset, data, size);
398 }
399 
400 static void um_pci_bar_set(void *priv, unsigned int offset, u8 value, int size)
401 {
402 	u8 *resptr = priv;
403 	struct um_pci_device *dev = container_of(resptr - *resptr,
404 						 struct um_pci_device,
405 						 resptr[0]);
406 	struct {
407 		struct virtio_pcidev_msg hdr;
408 		u8 data;
409 	} msg = {
410 		.hdr = {
411 			.op = VIRTIO_PCIDEV_OP_CFG_WRITE,
412 			.bar = *resptr,
413 			.size = size,
414 			.addr = offset,
415 		},
416 		.data = value,
417 	};
418 
419 	um_pci_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0);
420 }
421 
422 static const struct logic_iomem_ops um_pci_device_bar_ops = {
423 	.read = um_pci_bar_read,
424 	.write = um_pci_bar_write,
425 	.set = um_pci_bar_set,
426 	.copy_from = um_pci_bar_copy_from,
427 	.copy_to = um_pci_bar_copy_to,
428 };
429 
430 static void __iomem *um_pci_map_bus(struct pci_bus *bus, unsigned int devfn,
431 				    int where)
432 {
433 	struct um_pci_device_reg *dev;
434 	unsigned int busn = bus->number;
435 
436 	if (busn > 0)
437 		return NULL;
438 
439 	/* not allowing functions for now ... */
440 	if (devfn % 8)
441 		return NULL;
442 
443 	if (devfn / 8 >= ARRAY_SIZE(um_pci_devices))
444 		return NULL;
445 
446 	dev = &um_pci_devices[devfn / 8];
447 	if (!dev)
448 		return NULL;
449 
450 	return (void __iomem *)((unsigned long)dev->iomem + where);
451 }
452 
453 static struct pci_ops um_pci_ops = {
454 	.map_bus = um_pci_map_bus,
455 	.read = pci_generic_config_read,
456 	.write = pci_generic_config_write,
457 };
458 
459 static void um_pci_rescan(void)
460 {
461 	pci_lock_rescan_remove();
462 	pci_rescan_bus(bridge->bus);
463 	pci_unlock_rescan_remove();
464 }
465 
466 static void um_pci_irq_vq_addbuf(struct virtqueue *vq, void *buf, bool kick)
467 {
468 	struct scatterlist sg[1];
469 
470 	sg_init_one(sg, buf, MAX_IRQ_MSG_SIZE);
471 	if (virtqueue_add_inbuf(vq, sg, 1, buf, GFP_ATOMIC))
472 		kfree(buf);
473 	else if (kick)
474 		virtqueue_kick(vq);
475 }
476 
477 static void um_pci_handle_irq_message(struct virtqueue *vq,
478 				      struct virtio_pcidev_msg *msg)
479 {
480 	struct virtio_device *vdev = vq->vdev;
481 	struct um_pci_device *dev = vdev->priv;
482 
483 	/* we should properly chain interrupts, but on ARCH=um we don't care */
484 
485 	switch (msg->op) {
486 	case VIRTIO_PCIDEV_OP_INT:
487 		generic_handle_irq(dev->irq);
488 		break;
489 	case VIRTIO_PCIDEV_OP_MSI:
490 		/* our MSI message is just the interrupt number */
491 		if (msg->size == sizeof(u32))
492 			generic_handle_irq(le32_to_cpup((void *)msg->data));
493 		else
494 			generic_handle_irq(le16_to_cpup((void *)msg->data));
495 		break;
496 	case VIRTIO_PCIDEV_OP_PME:
497 		/* nothing to do - we already woke up due to the message */
498 		break;
499 	default:
500 		dev_err(&vdev->dev, "unexpected virt-pci message %d\n", msg->op);
501 		break;
502 	}
503 }
504 
505 static void um_pci_cmd_vq_cb(struct virtqueue *vq)
506 {
507 	struct virtio_device *vdev = vq->vdev;
508 	struct um_pci_device *dev = vdev->priv;
509 	void *cmd;
510 	int len;
511 
512 	if (test_bit(UM_PCI_STAT_WAITING, &dev->status))
513 		return;
514 
515 	while ((cmd = virtqueue_get_buf(vq, &len))) {
516 		if (WARN_ON(HANDLE_IS_NO_FREE(cmd)))
517 			continue;
518 		kfree(cmd);
519 	}
520 }
521 
522 static void um_pci_irq_vq_cb(struct virtqueue *vq)
523 {
524 	struct virtio_pcidev_msg *msg;
525 	int len;
526 
527 	while ((msg = virtqueue_get_buf(vq, &len))) {
528 		if (len >= sizeof(*msg))
529 			um_pci_handle_irq_message(vq, msg);
530 
531 		/* recycle the message buffer */
532 		um_pci_irq_vq_addbuf(vq, msg, true);
533 	}
534 }
535 
536 static int um_pci_init_vqs(struct um_pci_device *dev)
537 {
538 	struct virtqueue *vqs[2];
539 	static const char *const names[2] = { "cmd", "irq" };
540 	vq_callback_t *cbs[2] = { um_pci_cmd_vq_cb, um_pci_irq_vq_cb };
541 	int err, i;
542 
543 	err = virtio_find_vqs(dev->vdev, 2, vqs, cbs, names, NULL);
544 	if (err)
545 		return err;
546 
547 	dev->cmd_vq = vqs[0];
548 	dev->irq_vq = vqs[1];
549 
550 	virtio_device_ready(dev->vdev);
551 
552 	for (i = 0; i < NUM_IRQ_MSGS; i++) {
553 		void *msg = kzalloc(MAX_IRQ_MSG_SIZE, GFP_KERNEL);
554 
555 		if (msg)
556 			um_pci_irq_vq_addbuf(dev->irq_vq, msg, false);
557 	}
558 
559 	virtqueue_kick(dev->irq_vq);
560 
561 	return 0;
562 }
563 
564 static int um_pci_virtio_probe(struct virtio_device *vdev)
565 {
566 	struct um_pci_device *dev;
567 	int i, free = -1;
568 	int err = -ENOSPC;
569 
570 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
571 	if (!dev)
572 		return -ENOMEM;
573 
574 	dev->vdev = vdev;
575 	vdev->priv = dev;
576 
577 	mutex_lock(&um_pci_mtx);
578 	for (i = 0; i < MAX_DEVICES; i++) {
579 		if (um_pci_devices[i].dev)
580 			continue;
581 		free = i;
582 		break;
583 	}
584 
585 	if (free < 0)
586 		goto error;
587 
588 	err = um_pci_init_vqs(dev);
589 	if (err)
590 		goto error;
591 
592 	dev->irq = irq_alloc_desc(numa_node_id());
593 	if (dev->irq < 0) {
594 		err = dev->irq;
595 		goto err_reset;
596 	}
597 	um_pci_devices[free].dev = dev;
598 	vdev->priv = dev;
599 
600 	mutex_unlock(&um_pci_mtx);
601 
602 	device_set_wakeup_enable(&vdev->dev, true);
603 
604 	/*
605 	 * In order to do suspend-resume properly, don't allow VQs
606 	 * to be suspended.
607 	 */
608 	virtio_uml_set_no_vq_suspend(vdev, true);
609 
610 	um_pci_rescan();
611 	return 0;
612 err_reset:
613 	virtio_reset_device(vdev);
614 	vdev->config->del_vqs(vdev);
615 error:
616 	mutex_unlock(&um_pci_mtx);
617 	kfree(dev);
618 	return err;
619 }
620 
621 static void um_pci_virtio_remove(struct virtio_device *vdev)
622 {
623 	struct um_pci_device *dev = vdev->priv;
624 	int i;
625 
626         /* Stop all virtqueues */
627         virtio_reset_device(vdev);
628         vdev->config->del_vqs(vdev);
629 
630 	device_set_wakeup_enable(&vdev->dev, false);
631 
632 	mutex_lock(&um_pci_mtx);
633 	for (i = 0; i < MAX_DEVICES; i++) {
634 		if (um_pci_devices[i].dev != dev)
635 			continue;
636 		um_pci_devices[i].dev = NULL;
637 		irq_free_desc(dev->irq);
638 	}
639 	mutex_unlock(&um_pci_mtx);
640 
641 	um_pci_rescan();
642 
643 	kfree(dev);
644 }
645 
646 static struct virtio_device_id id_table[] = {
647 	{ CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID, VIRTIO_DEV_ANY_ID },
648 	{ 0 },
649 };
650 MODULE_DEVICE_TABLE(virtio, id_table);
651 
652 static struct virtio_driver um_pci_virtio_driver = {
653 	.driver.name = "virtio-pci",
654 	.driver.owner = THIS_MODULE,
655 	.id_table = id_table,
656 	.probe = um_pci_virtio_probe,
657 	.remove = um_pci_virtio_remove,
658 };
659 
660 static struct resource virt_cfgspace_resource = {
661 	.name = "PCI config space",
662 	.start = 0xf0000000 - MAX_DEVICES * CFG_SPACE_SIZE,
663 	.end = 0xf0000000 - 1,
664 	.flags = IORESOURCE_MEM,
665 };
666 
667 static long um_pci_map_cfgspace(unsigned long offset, size_t size,
668 				const struct logic_iomem_ops **ops,
669 				void **priv)
670 {
671 	if (WARN_ON(size > CFG_SPACE_SIZE || offset % CFG_SPACE_SIZE))
672 		return -EINVAL;
673 
674 	if (offset / CFG_SPACE_SIZE < MAX_DEVICES) {
675 		*ops = &um_pci_device_cfgspace_ops;
676 		*priv = &um_pci_devices[offset / CFG_SPACE_SIZE];
677 		return 0;
678 	}
679 
680 	WARN(1, "cannot map offset 0x%lx/0x%zx\n", offset, size);
681 	return -ENOENT;
682 }
683 
684 static const struct logic_iomem_region_ops um_pci_cfgspace_ops = {
685 	.map = um_pci_map_cfgspace,
686 };
687 
688 static struct resource virt_iomem_resource = {
689 	.name = "PCI iomem",
690 	.start = 0xf0000000,
691 	.end = 0xffffffff,
692 	.flags = IORESOURCE_MEM,
693 };
694 
695 struct um_pci_map_iomem_data {
696 	unsigned long offset;
697 	size_t size;
698 	const struct logic_iomem_ops **ops;
699 	void **priv;
700 	long ret;
701 };
702 
703 static int um_pci_map_iomem_walk(struct pci_dev *pdev, void *_data)
704 {
705 	struct um_pci_map_iomem_data *data = _data;
706 	struct um_pci_device_reg *reg = &um_pci_devices[pdev->devfn / 8];
707 	struct um_pci_device *dev;
708 	int i;
709 
710 	if (!reg->dev)
711 		return 0;
712 
713 	for (i = 0; i < ARRAY_SIZE(dev->resptr); i++) {
714 		struct resource *r = &pdev->resource[i];
715 
716 		if ((r->flags & IORESOURCE_TYPE_BITS) != IORESOURCE_MEM)
717 			continue;
718 
719 		/*
720 		 * must be the whole or part of the resource,
721 		 * not allowed to only overlap
722 		 */
723 		if (data->offset < r->start || data->offset > r->end)
724 			continue;
725 		if (data->offset + data->size - 1 > r->end)
726 			continue;
727 
728 		dev = reg->dev;
729 		*data->ops = &um_pci_device_bar_ops;
730 		dev->resptr[i] = i;
731 		*data->priv = &dev->resptr[i];
732 		data->ret = data->offset - r->start;
733 
734 		/* no need to continue */
735 		return 1;
736 	}
737 
738 	return 0;
739 }
740 
741 static long um_pci_map_iomem(unsigned long offset, size_t size,
742 			     const struct logic_iomem_ops **ops,
743 			     void **priv)
744 {
745 	struct um_pci_map_iomem_data data = {
746 		/* we want the full address here */
747 		.offset = offset + virt_iomem_resource.start,
748 		.size = size,
749 		.ops = ops,
750 		.priv = priv,
751 		.ret = -ENOENT,
752 	};
753 
754 	pci_walk_bus(bridge->bus, um_pci_map_iomem_walk, &data);
755 	return data.ret;
756 }
757 
758 static const struct logic_iomem_region_ops um_pci_iomem_ops = {
759 	.map = um_pci_map_iomem,
760 };
761 
762 static void um_pci_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
763 {
764 	/*
765 	 * This is a very low address and not actually valid 'physical' memory
766 	 * in UML, so we can simply map MSI(-X) vectors to there, it cannot be
767 	 * legitimately written to by the device in any other way.
768 	 * We use the (virtual) IRQ number here as the message to simplify the
769 	 * code that receives the message, where for now we simply trust the
770 	 * device to send the correct message.
771 	 */
772 	msg->address_hi = 0;
773 	msg->address_lo = 0xa0000;
774 	msg->data = data->irq;
775 }
776 
777 static struct irq_chip um_pci_msi_bottom_irq_chip = {
778 	.name = "UM virtio MSI",
779 	.irq_compose_msi_msg = um_pci_compose_msi_msg,
780 };
781 
782 static int um_pci_inner_domain_alloc(struct irq_domain *domain,
783 				     unsigned int virq, unsigned int nr_irqs,
784 				     void *args)
785 {
786 	unsigned long bit;
787 
788 	WARN_ON(nr_irqs != 1);
789 
790 	mutex_lock(&um_pci_mtx);
791 	bit = find_first_zero_bit(um_pci_msi_used, MAX_MSI_VECTORS);
792 	if (bit >= MAX_MSI_VECTORS) {
793 		mutex_unlock(&um_pci_mtx);
794 		return -ENOSPC;
795 	}
796 
797 	set_bit(bit, um_pci_msi_used);
798 	mutex_unlock(&um_pci_mtx);
799 
800 	irq_domain_set_info(domain, virq, bit, &um_pci_msi_bottom_irq_chip,
801 			    domain->host_data, handle_simple_irq,
802 			    NULL, NULL);
803 
804 	return 0;
805 }
806 
807 static void um_pci_inner_domain_free(struct irq_domain *domain,
808 				     unsigned int virq, unsigned int nr_irqs)
809 {
810 	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
811 
812 	mutex_lock(&um_pci_mtx);
813 
814 	if (!test_bit(d->hwirq, um_pci_msi_used))
815 		pr_err("trying to free unused MSI#%lu\n", d->hwirq);
816 	else
817 		__clear_bit(d->hwirq, um_pci_msi_used);
818 
819 	mutex_unlock(&um_pci_mtx);
820 }
821 
822 static const struct irq_domain_ops um_pci_inner_domain_ops = {
823 	.alloc = um_pci_inner_domain_alloc,
824 	.free = um_pci_inner_domain_free,
825 };
826 
827 static struct irq_chip um_pci_msi_irq_chip = {
828 	.name = "UM virtio PCIe MSI",
829 	.irq_mask = pci_msi_mask_irq,
830 	.irq_unmask = pci_msi_unmask_irq,
831 };
832 
833 static struct msi_domain_info um_pci_msi_domain_info = {
834 	.flags	= MSI_FLAG_USE_DEF_DOM_OPS |
835 		  MSI_FLAG_USE_DEF_CHIP_OPS |
836 		  MSI_FLAG_PCI_MSIX,
837 	.chip	= &um_pci_msi_irq_chip,
838 };
839 
840 static struct resource busn_resource = {
841 	.name	= "PCI busn",
842 	.start	= 0,
843 	.end	= 0,
844 	.flags	= IORESOURCE_BUS,
845 };
846 
847 static int um_pci_map_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
848 {
849 	struct um_pci_device_reg *reg = &um_pci_devices[pdev->devfn / 8];
850 
851 	if (WARN_ON(!reg->dev))
852 		return -EINVAL;
853 
854 	/* Yes, we map all pins to the same IRQ ... doesn't matter for now. */
855 	return reg->dev->irq;
856 }
857 
858 void *pci_root_bus_fwnode(struct pci_bus *bus)
859 {
860 	return um_pci_fwnode;
861 }
862 
863 static int __init um_pci_init(void)
864 {
865 	int err, i;
866 
867 	WARN_ON(logic_iomem_add_region(&virt_cfgspace_resource,
868 				       &um_pci_cfgspace_ops));
869 	WARN_ON(logic_iomem_add_region(&virt_iomem_resource,
870 				       &um_pci_iomem_ops));
871 
872 	if (WARN(CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID < 0,
873 		 "No virtio device ID configured for PCI - no PCI support\n"))
874 		return 0;
875 
876 	um_pci_msg_bufs = alloc_percpu(struct um_pci_message_buffer);
877 	if (!um_pci_msg_bufs)
878 		return -ENOMEM;
879 
880 	bridge = pci_alloc_host_bridge(0);
881 	if (!bridge) {
882 		err = -ENOMEM;
883 		goto free;
884 	}
885 
886 	um_pci_fwnode = irq_domain_alloc_named_fwnode("um-pci");
887 	if (!um_pci_fwnode) {
888 		err = -ENOMEM;
889 		goto free;
890 	}
891 
892 	um_pci_inner_domain = __irq_domain_add(um_pci_fwnode, MAX_MSI_VECTORS,
893 					       MAX_MSI_VECTORS, 0,
894 					       &um_pci_inner_domain_ops, NULL);
895 	if (!um_pci_inner_domain) {
896 		err = -ENOMEM;
897 		goto free;
898 	}
899 
900 	um_pci_msi_domain = pci_msi_create_irq_domain(um_pci_fwnode,
901 						      &um_pci_msi_domain_info,
902 						      um_pci_inner_domain);
903 	if (!um_pci_msi_domain) {
904 		err = -ENOMEM;
905 		goto free;
906 	}
907 
908 	pci_add_resource(&bridge->windows, &virt_iomem_resource);
909 	pci_add_resource(&bridge->windows, &busn_resource);
910 	bridge->ops = &um_pci_ops;
911 	bridge->map_irq = um_pci_map_irq;
912 
913 	for (i = 0; i < MAX_DEVICES; i++) {
914 		resource_size_t start;
915 
916 		start = virt_cfgspace_resource.start + i * CFG_SPACE_SIZE;
917 		um_pci_devices[i].iomem = ioremap(start, CFG_SPACE_SIZE);
918 		if (WARN(!um_pci_devices[i].iomem, "failed to map %d\n", i)) {
919 			err = -ENOMEM;
920 			goto free;
921 		}
922 	}
923 
924 	err = pci_host_probe(bridge);
925 	if (err)
926 		goto free;
927 
928 	err = register_virtio_driver(&um_pci_virtio_driver);
929 	if (err)
930 		goto free;
931 	return 0;
932 free:
933 	if (um_pci_inner_domain)
934 		irq_domain_remove(um_pci_inner_domain);
935 	if (um_pci_fwnode)
936 		irq_domain_free_fwnode(um_pci_fwnode);
937 	if (bridge) {
938 		pci_free_resource_list(&bridge->windows);
939 		pci_free_host_bridge(bridge);
940 	}
941 	free_percpu(um_pci_msg_bufs);
942 	return err;
943 }
944 module_init(um_pci_init);
945 
946 static void __exit um_pci_exit(void)
947 {
948 	unregister_virtio_driver(&um_pci_virtio_driver);
949 	irq_domain_remove(um_pci_msi_domain);
950 	irq_domain_remove(um_pci_inner_domain);
951 	pci_free_resource_list(&bridge->windows);
952 	pci_free_host_bridge(bridge);
953 	free_percpu(um_pci_msg_bufs);
954 }
955 module_exit(um_pci_exit);
956