xref: /freebsd/sys/dev/vmware/vmci/vmci.c (revision ae7e8a02e6e93455e026036132c4d053b2c12ad9)
1 /*-
2  * Copyright (c) 2018 VMware, Inc.
3  *
4  * SPDX-License-Identifier: (BSD-2-Clause OR GPL-2.0)
5  */
6 
7 /* Driver for VMware Virtual Machine Communication Interface (VMCI) device. */
8 
9 #include <sys/cdefs.h>
10 __FBSDID("$FreeBSD$");
11 
12 #include <sys/param.h>
13 #include <sys/bus.h>
14 #include <sys/kernel.h>
15 #include <sys/malloc.h>
16 #include <sys/module.h>
17 #include <sys/rman.h>
18 #include <sys/systm.h>
19 
20 #include <dev/pci/pcireg.h>
21 #include <dev/pci/pcivar.h>
22 
23 #include <machine/bus.h>
24 
25 #include "vmci.h"
26 #include "vmci_doorbell.h"
27 #include "vmci_driver.h"
28 #include "vmci_kernel_defs.h"
29 #include "vmci_queue_pair.h"
30 
31 static int	vmci_probe(device_t);
32 static int	vmci_attach(device_t);
33 static int	vmci_detach(device_t);
34 static int	vmci_shutdown(device_t);
35 
36 static int	vmci_map_bars(struct vmci_softc *);
37 static void	vmci_unmap_bars(struct vmci_softc *);
38 
39 static int	vmci_config_capabilities(struct vmci_softc *);
40 
41 static int	vmci_dma_malloc_int(struct vmci_softc *, bus_size_t,
42 		    bus_size_t, struct vmci_dma_alloc *);
43 static void	vmci_dma_free_int(struct vmci_softc *,
44 		    struct vmci_dma_alloc *);
45 
46 static int	vmci_config_interrupts(struct vmci_softc *);
47 static int	vmci_config_interrupt(struct vmci_softc *);
48 static int	vmci_check_intr_cnt(struct vmci_softc *);
49 static int	vmci_allocate_interrupt_resources(struct vmci_softc *);
50 static int	vmci_setup_interrupts(struct vmci_softc *);
51 static void	vmci_dismantle_interrupts(struct vmci_softc *);
52 static void	vmci_interrupt(void *);
53 static void	vmci_interrupt_bm(void *);
54 static void	dispatch_datagrams(void *, int);
55 static void	process_bitmap(void *, int);
56 
57 static void	vmci_delayed_work_fn_cb(void *context, int data);
58 
59 static device_method_t vmci_methods[] = {
60 	/* Device interface. */
61 	DEVMETHOD(device_probe,		vmci_probe),
62 	DEVMETHOD(device_attach,	vmci_attach),
63 	DEVMETHOD(device_detach,	vmci_detach),
64 	DEVMETHOD(device_shutdown,	vmci_shutdown),
65 
66 	DEVMETHOD_END
67 };
68 
69 static driver_t vmci_driver = {
70 	"vmci", vmci_methods, sizeof(struct vmci_softc)
71 };
72 
73 static devclass_t vmci_devclass;
74 DRIVER_MODULE(vmci, pci, vmci_driver, vmci_devclass, 0, 0);
75 MODULE_VERSION(vmci, VMCI_VERSION);
76 const struct {
77 	uint16_t vendor;
78 	uint16_t device;
79 	const char *desc;
80 } vmci_ids[] = {
81 	{ VMCI_VMWARE_VENDOR_ID, VMCI_VMWARE_DEVICE_ID,
82 	    "VMware Virtual Machine Communication Interface" },
83 };
84 MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, vmci, vmci_ids,
85     nitems(vmci_ids));
86 
87 MODULE_DEPEND(vmci, pci, 1, 1, 1);
88 
89 static struct vmci_softc *vmci_sc;
90 
91 #define LGPFX	"vmci: "
92 /*
93  * Allocate a buffer for incoming datagrams globally to avoid repeated
94  * allocation in the interrupt handler's atomic context.
95  */
96 static uint8_t *data_buffer = NULL;
97 static uint32_t data_buffer_size = VMCI_MAX_DG_SIZE;
98 
99 struct vmci_delayed_work_info {
100 	vmci_work_fn	*work_fn;
101 	void		*data;
102 	vmci_list_item(vmci_delayed_work_info) entry;
103 };
104 
105 /*
106  *------------------------------------------------------------------------------
107  *
108  * vmci_probe --
109  *
110  *     Probe to see if the VMCI device is present.
111  *
112  * Results:
113  *     BUS_PROBE_DEFAULT if device exists, ENXIO otherwise.
114  *
115  * Side effects:
116  *     None.
117  *
118  *------------------------------------------------------------------------------
119  */
120 
121 static int
122 vmci_probe(device_t dev)
123 {
124 
125 	if (pci_get_vendor(dev) == vmci_ids[0].vendor &&
126 	    pci_get_device(dev) == vmci_ids[0].device) {
127 		device_set_desc(dev, vmci_ids[0].desc);
128 
129 		return (BUS_PROBE_DEFAULT);
130 	}
131 
132 	return (ENXIO);
133 }
134 
135 /*
136  *------------------------------------------------------------------------------
137  *
138  * vmci_attach --
139  *
140  *     Attach VMCI device to the system after vmci_probe() has been called and
141  *     the device has been detected.
142  *
143  * Results:
144  *     0 if success, ENXIO otherwise.
145  *
146  * Side effects:
147  *     None.
148  *
149  *------------------------------------------------------------------------------
150  */
151 
152 static int
153 vmci_attach(device_t dev)
154 {
155 	struct vmci_softc *sc;
156 	int error, i;
157 
158 	sc = device_get_softc(dev);
159 	sc->vmci_dev = dev;
160 	vmci_sc = sc;
161 
162 	data_buffer = NULL;
163 	sc->vmci_num_intr = 0;
164 	for (i = 0; i < VMCI_MAX_INTRS; i++) {
165 		sc->vmci_intrs[i].vmci_irq = NULL;
166 		sc->vmci_intrs[i].vmci_handler = NULL;
167 	}
168 
169 	TASK_INIT(&sc->vmci_interrupt_dq_task, 0, dispatch_datagrams, sc);
170 	TASK_INIT(&sc->vmci_interrupt_bm_task, 0, process_bitmap, sc);
171 
172 	TASK_INIT(&sc->vmci_delayed_work_task, 0, vmci_delayed_work_fn_cb, sc);
173 
174 	pci_enable_busmaster(dev);
175 
176 	mtx_init(&sc->vmci_spinlock, "VMCI Spinlock", NULL, MTX_SPIN);
177 	mtx_init(&sc->vmci_delayed_work_lock, "VMCI Delayed Work Lock",
178 	    NULL, MTX_DEF);
179 
180 	error = vmci_map_bars(sc);
181 	if (error) {
182 		VMCI_LOG_ERROR(LGPFX"Failed to map PCI BARs.\n");
183 		goto fail;
184 	}
185 
186 	error = vmci_config_capabilities(sc);
187 	if (error) {
188 		VMCI_LOG_ERROR(LGPFX"Failed to configure capabilities.\n");
189 		goto fail;
190 	}
191 
192 	vmci_list_init(&sc->vmci_delayed_work_infos);
193 
194 	vmci_components_init();
195 	vmci_util_init();
196 	error = vmci_qp_guest_endpoints_init();
197 	if (error) {
198 		VMCI_LOG_ERROR(LGPFX"vmci_qp_guest_endpoints_init failed.\n");
199 		goto fail;
200 	}
201 
202 	error = vmci_config_interrupts(sc);
203 	if (error)
204 		VMCI_LOG_ERROR(LGPFX"Failed to enable interrupts.\n");
205 
206 fail:
207 	if (error) {
208 		vmci_detach(dev);
209 		return (ENXIO);
210 	}
211 
212 	return (0);
213 }
214 
215 /*
216  *------------------------------------------------------------------------------
217  *
218  * vmci_detach --
219  *
220  *     Detach the VMCI device.
221  *
222  * Results:
223  *     0
224  *
225  * Side effects:
226  *     None.
227  *
228  *------------------------------------------------------------------------------
229  */
230 
231 static int
232 vmci_detach(device_t dev)
233 {
234 	struct vmci_softc *sc;
235 
236 	sc = device_get_softc(dev);
237 
238 	vmci_qp_guest_endpoints_exit();
239 	vmci_util_exit();
240 
241 	vmci_dismantle_interrupts(sc);
242 
243 	vmci_components_cleanup();
244 
245 	taskqueue_drain(taskqueue_thread, &sc->vmci_delayed_work_task);
246 	mtx_destroy(&sc->vmci_delayed_work_lock);
247 
248 	if (sc->vmci_res0 != NULL)
249 		bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
250 		    VMCI_CONTROL_ADDR, VMCI_CONTROL_RESET);
251 
252 	if (sc->vmci_notifications_bitmap.dma_vaddr != NULL)
253 		vmci_dma_free(&sc->vmci_notifications_bitmap);
254 
255 	vmci_unmap_bars(sc);
256 
257 	mtx_destroy(&sc->vmci_spinlock);
258 
259 	pci_disable_busmaster(dev);
260 
261 	return (0);
262 }
263 
264 /*
265  *------------------------------------------------------------------------------
266  *
267  * vmci_shutdown --
268  *
269  *     This function is called during system shutdown. We don't do anything.
270  *
271  * Results:
272  *     0
273  *
274  * Side effects:
275  *     None.
276  *
277  *------------------------------------------------------------------------------
278  */
279 
280 static int
281 vmci_shutdown(device_t dev)
282 {
283 
284 	return (0);
285 }
286 
287 /*
288  *------------------------------------------------------------------------------
289  *
290  * vmci_map_bars --
291  *
292  *     Maps the PCI I/O and MMIO BARs.
293  *
294  * Results:
295  *     0 on success, ENXIO otherwise.
296  *
297  * Side effects:
298  *     None.
299  *
300  *------------------------------------------------------------------------------
301  */
302 
303 static int
304 vmci_map_bars(struct vmci_softc *sc)
305 {
306 	int rid;
307 
308 	/* Map the PCI I/O BAR: BAR0 */
309 	rid = PCIR_BAR(0);
310 	sc->vmci_res0 = bus_alloc_resource_any(sc->vmci_dev, SYS_RES_IOPORT,
311 	    &rid, RF_ACTIVE);
312 	if (sc->vmci_res0 == NULL) {
313 		VMCI_LOG_ERROR(LGPFX"Could not map: BAR0\n");
314 		return (ENXIO);
315 	}
316 
317 	sc->vmci_iot0 = rman_get_bustag(sc->vmci_res0);
318 	sc->vmci_ioh0 = rman_get_bushandle(sc->vmci_res0);
319 	sc->vmci_ioaddr = rman_get_start(sc->vmci_res0);
320 
321 	/* Map the PCI MMIO BAR: BAR1 */
322 	rid = PCIR_BAR(1);
323 	sc->vmci_res1 = bus_alloc_resource_any(sc->vmci_dev, SYS_RES_MEMORY,
324 	    &rid, RF_ACTIVE);
325 	if (sc->vmci_res1 == NULL) {
326 		VMCI_LOG_ERROR(LGPFX"Could not map: BAR1\n");
327 		return (ENXIO);
328 	}
329 
330 	sc->vmci_iot1 = rman_get_bustag(sc->vmci_res1);
331 	sc->vmci_ioh1 = rman_get_bushandle(sc->vmci_res1);
332 
333 	return (0);
334 }
335 
336 /*
337  *------------------------------------------------------------------------------
338  *
339  * vmci_unmap_bars --
340  *
341  *     Unmaps the VMCI PCI I/O and MMIO BARs.
342  *
343  * Results:
344  *     None.
345  *
346  * Side effects:
347  *     None.
348  *
349  *------------------------------------------------------------------------------
350  */
351 
352 static void
353 vmci_unmap_bars(struct vmci_softc *sc)
354 {
355 	int rid;
356 
357 	if (sc->vmci_res0 != NULL) {
358 		rid = PCIR_BAR(0);
359 		bus_release_resource(sc->vmci_dev, SYS_RES_IOPORT, rid,
360 		    sc->vmci_res0);
361 		sc->vmci_res0 = NULL;
362 	}
363 
364 	if (sc->vmci_res1 != NULL) {
365 		rid = PCIR_BAR(1);
366 		bus_release_resource(sc->vmci_dev, SYS_RES_MEMORY, rid,
367 		    sc->vmci_res1);
368 		sc->vmci_res1 = NULL;
369 	}
370 }
371 
372 /*
373  *------------------------------------------------------------------------------
374  *
375  * vmci_config_capabilities --
376  *
377  *     Check the VMCI device capabilities and configure the device accordingly.
378  *
379  * Results:
380  *     0 if success, ENODEV otherwise.
381  *
382  * Side effects:
383  *     Device capabilities are enabled.
384  *
385  *------------------------------------------------------------------------------
386  */
387 
388 static int
389 vmci_config_capabilities(struct vmci_softc *sc)
390 {
391 	unsigned long bitmap_PPN;
392 	int error;
393 
394 	/*
395 	 * Verify that the VMCI device supports the capabilities that we
396 	 * need. Datagrams are necessary and notifications will be used
397 	 * if the device supports it.
398 	 */
399 	sc->capabilities = bus_space_read_4(sc->vmci_iot0, sc->vmci_ioh0,
400 	    VMCI_CAPS_ADDR);
401 
402 	if ((sc->capabilities & VMCI_CAPS_DATAGRAM) == 0) {
403 		VMCI_LOG_ERROR(LGPFX"VMCI device does not support "
404 		    "datagrams.\n");
405 		return (ENODEV);
406 	}
407 
408 	if (sc->capabilities & VMCI_CAPS_NOTIFICATIONS) {
409 		sc->capabilities = VMCI_CAPS_DATAGRAM;
410 		error = vmci_dma_malloc(PAGE_SIZE, 1,
411 		    &sc->vmci_notifications_bitmap);
412 		if (error)
413 			VMCI_LOG_ERROR(LGPFX"Failed to alloc memory for "
414 			    "notification bitmap.\n");
415 		else {
416 			memset(sc->vmci_notifications_bitmap.dma_vaddr, 0,
417 			    PAGE_SIZE);
418 			sc->capabilities |= VMCI_CAPS_NOTIFICATIONS;
419 		}
420 	} else
421 		sc->capabilities = VMCI_CAPS_DATAGRAM;
422 
423 	/* Let the host know which capabilities we intend to use. */
424 	bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
425 	    VMCI_CAPS_ADDR, sc->capabilities);
426 
427 	/*
428 	 * Register notification bitmap with device if that capability is
429 	 * used.
430 	 */
431 	if (sc->capabilities & VMCI_CAPS_NOTIFICATIONS) {
432 		bitmap_PPN =
433 		    sc->vmci_notifications_bitmap.dma_paddr >> PAGE_SHIFT;
434 		vmci_register_notification_bitmap(bitmap_PPN);
435 	}
436 
437 	/* Check host capabilities. */
438 	if (!vmci_check_host_capabilities())
439 		return (ENODEV);
440 
441 	return (0);
442 }
443 
444 /*
445  *------------------------------------------------------------------------------
446  *
447  * vmci_dmamap_cb --
448  *
449  *     Callback to receive mapping information resulting from the load of a
450  *     bus_dmamap_t via bus_dmamap_load()
451  *
452  * Results:
453  *     None.
454  *
455  * Side effects:
456  *     None.
457  *
458  *------------------------------------------------------------------------------
459  */
460 
461 static void
462 vmci_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
463 {
464 	bus_addr_t *baddr = arg;
465 
466 	if (error == 0)
467 		*baddr = segs->ds_addr;
468 }
469 
470 /*
471  *------------------------------------------------------------------------------
472  *
473  * vmci_dma_malloc_int --
474  *
475  *     Internal function that allocates DMA memory.
476  *
477  * Results:
478  *     0 if success.
479  *     ENOMEM if insufficient memory.
480  *     EINPROGRESS if mapping is deferred.
481  *     EINVAL if the request was invalid.
482  *
483  * Side effects:
484  *     DMA memory is allocated.
485  *
486  *------------------------------------------------------------------------------
487  */
488 
489 static int
490 vmci_dma_malloc_int(struct vmci_softc *sc, bus_size_t size, bus_size_t align,
491     struct vmci_dma_alloc *dma)
492 {
493 	int error;
494 
495 	bzero(dma, sizeof(struct vmci_dma_alloc));
496 
497 	error = bus_dma_tag_create(bus_get_dma_tag(vmci_sc->vmci_dev),
498 	    align, 0,		/* alignment, bounds */
499 	    BUS_SPACE_MAXADDR,	/* lowaddr */
500 	    BUS_SPACE_MAXADDR,	/* highaddr */
501 	    NULL, NULL,		/* filter, filterarg */
502 	    size,		/* maxsize */
503 	    1,			/* nsegments */
504 	    size,		/* maxsegsize */
505 	    BUS_DMA_ALLOCNOW,	/* flags */
506 	    NULL,		/* lockfunc */
507 	    NULL,		/* lockfuncarg */
508 	    &dma->dma_tag);
509 	if (error) {
510 		VMCI_LOG_ERROR(LGPFX"bus_dma_tag_create failed: %d\n", error);
511 		goto fail;
512 	}
513 
514 	error = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
515 	    BUS_DMA_ZERO | BUS_DMA_NOWAIT, &dma->dma_map);
516 	if (error) {
517 		VMCI_LOG_ERROR(LGPFX"bus_dmamem_alloc failed: %d\n", error);
518 		goto fail;
519 	}
520 
521 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
522 	    size, vmci_dmamap_cb, &dma->dma_paddr, BUS_DMA_NOWAIT);
523 	if (error) {
524 		VMCI_LOG_ERROR(LGPFX"bus_dmamap_load failed: %d\n", error);
525 		goto fail;
526 	}
527 
528 	dma->dma_size = size;
529 
530 fail:
531 	if (error)
532 		vmci_dma_free(dma);
533 
534 	return (error);
535 }
536 
537 /*
538  *------------------------------------------------------------------------------
539  *
540  * vmci_dma_malloc --
541  *
542  *     This function is a wrapper around vmci_dma_malloc_int for callers
543  *     outside of this module. Since we only support a single VMCI device, this
544  *     wrapper provides access to the device softc structure.
545  *
546  * Results:
547  *     0 if success.
548  *     ENOMEM if insufficient memory.
549  *     EINPROGRESS if mapping is deferred.
550  *     EINVAL if the request was invalid.
551  *
552  * Side effects:
553  *     DMA memory is allocated.
554  *
555  *------------------------------------------------------------------------------
556  */
557 
558 int
559 vmci_dma_malloc(bus_size_t size, bus_size_t align, struct vmci_dma_alloc *dma)
560 {
561 
562 	return (vmci_dma_malloc_int(vmci_sc, size, align, dma));
563 }
564 
565 /*
566  *------------------------------------------------------------------------------
567  *
568  * vmci_dma_free_int --
569  *
570  *     Internal function that frees DMA memory.
571  *
572  * Results:
573  *     None.
574  *
575  * Side effects:
576  *     Frees DMA memory.
577  *
578  *------------------------------------------------------------------------------
579  */
580 
581 static void
582 vmci_dma_free_int(struct vmci_softc *sc, struct vmci_dma_alloc *dma)
583 {
584 
585 	if (dma->dma_tag != NULL) {
586 		if (dma->dma_paddr != 0) {
587 			bus_dmamap_sync(dma->dma_tag, dma->dma_map,
588 			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
589 			bus_dmamap_unload(dma->dma_tag, dma->dma_map);
590 		}
591 
592 		if (dma->dma_vaddr != NULL)
593 			bus_dmamem_free(dma->dma_tag, dma->dma_vaddr,
594 			    dma->dma_map);
595 
596 		bus_dma_tag_destroy(dma->dma_tag);
597 	}
598 	bzero(dma, sizeof(struct vmci_dma_alloc));
599 }
600 
601 /*
602  *------------------------------------------------------------------------------
603  *
604  * vmci_dma_free --
605  *
606  *     This function is a wrapper around vmci_dma_free_int for callers outside
607  *     of this module. Since we only support a single VMCI device, this wrapper
608  *     provides access to the device softc structure.
609  *
610  * Results:
611  *     None.
612  *
613  * Side effects:
614  *     Frees DMA memory.
615  *
616  *------------------------------------------------------------------------------
617  */
618 
619 void
620 vmci_dma_free(struct vmci_dma_alloc *dma)
621 {
622 
623 	vmci_dma_free_int(vmci_sc, dma);
624 }
625 
626 /*
627  *------------------------------------------------------------------------------
628  *
629  * vmci_config_interrupts --
630  *
631  *     Configures and enables interrupts. Try to configure MSI-X. If this fails,
632  *     try to configure MSI. If even this fails, try legacy interrupts.
633  *
634  * Results:
635  *     0 if success.
636  *     ENOMEM if insufficient memory.
637  *     ENODEV if the device doesn't support interrupts.
638  *     ENXIO if the device configuration failed.
639  *
640  * Side effects:
641  *     Interrupts get enabled if successful.
642  *
643  *------------------------------------------------------------------------------
644  */
645 
646 static int
647 vmci_config_interrupts(struct vmci_softc *sc)
648 {
649 	int error;
650 
651 	data_buffer = malloc(data_buffer_size, M_DEVBUF, M_ZERO | M_NOWAIT);
652 	if (data_buffer == NULL)
653 		return (ENOMEM);
654 
655 	sc->vmci_intr_type = VMCI_INTR_TYPE_MSIX;
656 	error = vmci_config_interrupt(sc);
657 	if (error) {
658 		sc->vmci_intr_type = VMCI_INTR_TYPE_MSI;
659 		error = vmci_config_interrupt(sc);
660 	}
661 	if (error) {
662 		sc->vmci_intr_type = VMCI_INTR_TYPE_INTX;
663 		error = vmci_config_interrupt(sc);
664 	}
665 	if (error)
666 		return (error);
667 
668 	/* Enable specific interrupt bits. */
669 	if (sc->capabilities & VMCI_CAPS_NOTIFICATIONS)
670 		bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
671 		    VMCI_IMR_ADDR, VMCI_IMR_DATAGRAM | VMCI_IMR_NOTIFICATION);
672 	else
673 		bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
674 		    VMCI_IMR_ADDR, VMCI_IMR_DATAGRAM);
675 
676 	/* Enable interrupts. */
677 	bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
678 	    VMCI_CONTROL_ADDR, VMCI_CONTROL_INT_ENABLE);
679 
680 	return (0);
681 }
682 
683 /*
684  *------------------------------------------------------------------------------
685  *
686  * vmci_config_interrupt --
687  *
688  *     Check the number of interrupts supported, allocate resources and setup
689  *     interrupts.
690  *
691  * Results:
692  *     0 if success.
693  *     ENOMEM if insufficient memory.
694  *     ENODEV if the device doesn't support interrupts.
695  *     ENXIO if the device configuration failed.
696  *
697  * Side effects:
698  *     Resources get allocated and interrupts get setup (but not enabled) if
699  *     successful.
700  *
701  *------------------------------------------------------------------------------
702  */
703 
704 static int
705 vmci_config_interrupt(struct vmci_softc *sc)
706 {
707 	int error;
708 
709 	error = vmci_check_intr_cnt(sc);
710 	if (error)
711 		return (error);
712 
713 	error = vmci_allocate_interrupt_resources(sc);
714 	if (error)
715 		return (error);
716 
717 	error = vmci_setup_interrupts(sc);
718 	if (error)
719 		return (error);
720 
721 	return (0);
722 }
723 
724 /*
725  *------------------------------------------------------------------------------
726  *
727  * vmci_check_intr_cnt --
728  *
729  *     Check the number of interrupts supported by the device and ask PCI bus
730  *     to allocate appropriate number of interrupts.
731  *
732  * Results:
733  *     0 if success.
734  *     ENODEV if the device doesn't support any interrupts.
735  *     ENXIO if the device configuration failed.
736  *
737  * Side effects:
738  *     Resources get allocated on success.
739  *
740  *------------------------------------------------------------------------------
741  */
742 
743 static int
744 vmci_check_intr_cnt(struct vmci_softc *sc)
745 {
746 
747 	if (sc->vmci_intr_type == VMCI_INTR_TYPE_INTX) {
748 		sc->vmci_num_intr = 1;
749 		return (0);
750 	}
751 
752 	/*
753 	 * Make sure that the device supports the required number of MSI/MSI-X
754 	 * messages. We try for 2 MSI-X messages but 1 is good too. We need at
755 	 * least 1 MSI message.
756 	 */
757 	sc->vmci_num_intr = (sc->vmci_intr_type == VMCI_INTR_TYPE_MSIX) ?
758 	    pci_msix_count(sc->vmci_dev) : pci_msi_count(sc->vmci_dev);
759 
760 	if (!sc->vmci_num_intr) {
761 		VMCI_LOG_ERROR(LGPFX"Device does not support any interrupt"
762 		    " messages");
763 		return (ENODEV);
764 	}
765 
766 	sc->vmci_num_intr = (sc->vmci_intr_type == VMCI_INTR_TYPE_MSIX) ?
767 	    VMCI_MAX_INTRS : 1;
768 	if (sc->vmci_intr_type == VMCI_INTR_TYPE_MSIX) {
769 		if (pci_alloc_msix(sc->vmci_dev, &sc->vmci_num_intr))
770 			return (ENXIO);
771 	} else if (sc->vmci_intr_type == VMCI_INTR_TYPE_MSI) {
772 		if (pci_alloc_msi(sc->vmci_dev, &sc->vmci_num_intr))
773 			return (ENXIO);
774 	}
775 
776 	return (0);
777 }
778 
779 /*
780  *------------------------------------------------------------------------------
781  *
782  * vmci_allocate_interrupt_resources --
783  *
784  *     Allocate resources necessary for interrupts.
785  *
786  * Results:
787  *     0 if success, ENXIO otherwise.
788  *
789  * Side effects:
790  *     Resources get allocated on success.
791  *
792  *------------------------------------------------------------------------------
793  */
794 
795 static int
796 vmci_allocate_interrupt_resources(struct vmci_softc *sc)
797 {
798 	struct resource *irq;
799 	int flags, i, rid;
800 
801 	flags = RF_ACTIVE;
802 	flags |= (sc->vmci_num_intr == 1) ? RF_SHAREABLE : 0;
803 	rid = (sc->vmci_intr_type == VMCI_INTR_TYPE_INTX) ? 0 : 1;
804 
805 	for (i = 0; i < sc->vmci_num_intr; i++, rid++) {
806 		irq = bus_alloc_resource_any(sc->vmci_dev, SYS_RES_IRQ, &rid,
807 		    flags);
808 		if (irq == NULL)
809 			return (ENXIO);
810 		sc->vmci_intrs[i].vmci_irq = irq;
811 		sc->vmci_intrs[i].vmci_rid = rid;
812 	}
813 
814 	return (0);
815 }
816 
817 /*
818  *------------------------------------------------------------------------------
819  *
820  * vmci_setup_interrupts --
821  *
822  *     Sets up the interrupts.
823  *
824  * Results:
825  *     0 if success, appropriate error code from bus_setup_intr otherwise.
826  *
827  * Side effects:
828  *     Interrupt handler gets attached.
829  *
830  *------------------------------------------------------------------------------
831  */
832 
833 static int
834 vmci_setup_interrupts(struct vmci_softc *sc)
835 {
836 	struct vmci_interrupt *intr;
837 	int error, flags;
838 
839 	flags = INTR_TYPE_NET | INTR_MPSAFE;
840 	if (sc->vmci_num_intr > 1)
841 		flags |= INTR_EXCL;
842 
843 	intr = &sc->vmci_intrs[0];
844 	error = bus_setup_intr(sc->vmci_dev, intr->vmci_irq, flags, NULL,
845 	    vmci_interrupt, NULL, &intr->vmci_handler);
846 	if (error)
847 		return (error);
848 
849 	if (sc->vmci_num_intr == 2) {
850 		bus_describe_intr(sc->vmci_dev, intr->vmci_irq,
851 		    intr->vmci_handler, "dg");
852 		intr = &sc->vmci_intrs[1];
853 		error = bus_setup_intr(sc->vmci_dev, intr->vmci_irq, flags,
854 		    NULL, vmci_interrupt_bm, NULL, &intr->vmci_handler);
855 		if (error)
856 			return (error);
857 		bus_describe_intr(sc->vmci_dev, intr->vmci_irq,
858 		    intr->vmci_handler, "bm");
859 	}
860 
861 	return (0);
862 }
863 
864 /*
865  *------------------------------------------------------------------------------
866  *
867  * vmci_interrupt --
868  *
869  *     Interrupt handler for legacy or MSI interrupt, or for first MSI-X
870  *     interrupt (vector VMCI_INTR_DATAGRAM).
871  *
872  * Results:
873  *     None.
874  *
875  * Side effects:
876  *     None.
877  *
878  *------------------------------------------------------------------------------
879  */
880 
881 static void
882 vmci_interrupt(void *arg)
883 {
884 
885 	if (vmci_sc->vmci_num_intr == 2)
886 		taskqueue_enqueue(taskqueue_swi,
887 		    &vmci_sc->vmci_interrupt_dq_task);
888 	else {
889 		unsigned int icr;
890 
891 		icr = inl(vmci_sc->vmci_ioaddr + VMCI_ICR_ADDR);
892 		if (icr == 0 || icr == 0xffffffff)
893 			return;
894 		if (icr & VMCI_ICR_DATAGRAM) {
895 			taskqueue_enqueue(taskqueue_swi,
896 			    &vmci_sc->vmci_interrupt_dq_task);
897 			icr &= ~VMCI_ICR_DATAGRAM;
898 		}
899 		if (icr & VMCI_ICR_NOTIFICATION) {
900 			taskqueue_enqueue(taskqueue_swi,
901 			    &vmci_sc->vmci_interrupt_bm_task);
902 			icr &= ~VMCI_ICR_NOTIFICATION;
903 		}
904 		if (icr != 0)
905 			VMCI_LOG_INFO(LGPFX"Ignoring unknown interrupt "
906 			    "cause");
907 	}
908 }
909 
910 /*
911  *------------------------------------------------------------------------------
912  *
913  * vmci_interrupt_bm --
914  *
915  *     Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION,
916  *     which is for the notification bitmap. Will only get called if we are
917  *     using MSI-X with exclusive vectors.
918  *
919  * Results:
920  *     None.
921  *
922  * Side effects:
923  *     None.
924  *
925  *------------------------------------------------------------------------------
926  */
927 
928 static void
929 vmci_interrupt_bm(void *arg)
930 {
931 
932 	ASSERT(vmci_sc->vmci_num_intr == 2);
933 	taskqueue_enqueue(taskqueue_swi, &vmci_sc->vmci_interrupt_bm_task);
934 }
935 
936 /*
937  *------------------------------------------------------------------------------
938  *
939  * dispatch_datagrams --
940  *
941  *     Reads and dispatches incoming datagrams.
942  *
943  * Results:
944  *     None.
945  *
946  * Side effects:
947  *     Reads data from the device.
948  *
949  *------------------------------------------------------------------------------
950  */
951 
952 static void
953 dispatch_datagrams(void *context, int data)
954 {
955 
956 	if (data_buffer == NULL)
957 		VMCI_LOG_INFO(LGPFX"dispatch_datagrams(): no buffer "
958 		    "present");
959 
960 	vmci_read_datagrams_from_port((vmci_io_handle) 0,
961 	    vmci_sc->vmci_ioaddr + VMCI_DATA_IN_ADDR,
962 	    data_buffer, data_buffer_size);
963 }
964 
965 /*
966  *------------------------------------------------------------------------------
967  *
968  * process_bitmap --
969  *
970  *     Scans the notification bitmap for raised flags, clears them and handles
971  *     the notifications.
972  *
973  * Results:
974  *     None.
975  *
976  * Side effects:
977  *     None.
978  *
979  *------------------------------------------------------------------------------
980  */
981 
982 static void
983 process_bitmap(void *context, int data)
984 {
985 
986 	if (vmci_sc->vmci_notifications_bitmap.dma_vaddr == NULL)
987 		VMCI_LOG_INFO(LGPFX"process_bitmaps(): no bitmap present");
988 
989 	vmci_scan_notification_bitmap(
990 	    vmci_sc->vmci_notifications_bitmap.dma_vaddr);
991 }
992 
993 /*
994  *------------------------------------------------------------------------------
995  *
996  * vmci_dismantle_interrupts --
997  *
998  *     Releases resources, detaches the interrupt handler and drains the task
999  *     queue.
1000  *
1001  * Results:
1002  *     None.
1003  *
1004  * Side effects:
1005  *     No more interrupts.
1006  *
1007  *------------------------------------------------------------------------------
1008  */
1009 
1010 static void
1011 vmci_dismantle_interrupts(struct vmci_softc *sc)
1012 {
1013 	struct vmci_interrupt *intr;
1014 	int i;
1015 
1016 	for (i = 0; i < sc->vmci_num_intr; i++) {
1017 		intr = &sc->vmci_intrs[i];
1018 		if (intr->vmci_handler != NULL) {
1019 			bus_teardown_intr(sc->vmci_dev, intr->vmci_irq,
1020 			    intr->vmci_handler);
1021 			intr->vmci_handler = NULL;
1022 		}
1023 		if (intr->vmci_irq != NULL) {
1024 			bus_release_resource(sc->vmci_dev, SYS_RES_IRQ,
1025 			    intr->vmci_rid, intr->vmci_irq);
1026 			intr->vmci_irq = NULL;
1027 			intr->vmci_rid = -1;
1028 		}
1029 	}
1030 
1031 	if ((sc->vmci_intr_type != VMCI_INTR_TYPE_INTX) &&
1032 	    (sc->vmci_num_intr))
1033 		pci_release_msi(sc->vmci_dev);
1034 
1035 	taskqueue_drain(taskqueue_swi, &sc->vmci_interrupt_dq_task);
1036 	taskqueue_drain(taskqueue_swi, &sc->vmci_interrupt_bm_task);
1037 
1038 	if (data_buffer != NULL)
1039 		free(data_buffer, M_DEVBUF);
1040 }
1041 
1042 /*
1043  *------------------------------------------------------------------------------
1044  *
1045  * vmci_delayed_work_fn_cb --
1046  *
1047  *     Callback function that executes the queued up delayed work functions.
1048  *
1049  * Results:
1050  *     None.
1051  *
1052  * Side effects:
1053  *     None.
1054  *
1055  *------------------------------------------------------------------------------
1056  */
1057 
1058 static void
1059 vmci_delayed_work_fn_cb(void *context, int data)
1060 {
1061 	vmci_list(vmci_delayed_work_info) temp_list;
1062 
1063 	vmci_list_init(&temp_list);
1064 
1065 	/*
1066 	 * Swap vmci_delayed_work_infos list with the empty temp_list while
1067 	 * holding a lock. vmci_delayed_work_infos would then be an empty list
1068 	 * and temp_list would contain the elements from the original
1069 	 * vmci_delayed_work_infos. Finally, iterate through temp_list
1070 	 * executing the delayed callbacks.
1071 	 */
1072 
1073 	mtx_lock(&vmci_sc->vmci_delayed_work_lock);
1074 	vmci_list_swap(&temp_list, &vmci_sc->vmci_delayed_work_infos,
1075 	    vmci_delayed_work_info, entry);
1076 	mtx_unlock(&vmci_sc->vmci_delayed_work_lock);
1077 
1078 	while (!vmci_list_empty(&temp_list)) {
1079 		struct vmci_delayed_work_info *delayed_work_info =
1080 		    vmci_list_first(&temp_list);
1081 
1082 		delayed_work_info->work_fn(delayed_work_info->data);
1083 
1084 		vmci_list_remove(delayed_work_info, entry);
1085 		vmci_free_kernel_mem(delayed_work_info,
1086 		    sizeof(*delayed_work_info));
1087 	}
1088 }
1089 
1090 /*
1091  *------------------------------------------------------------------------------
1092  *
1093  * vmci_schedule_delayed_work_fn --
1094  *
1095  *     Schedule the specified callback.
1096  *
1097  * Results:
1098  *     0 if success, error code otherwise.
1099  *
1100  * Side effects:
1101  *     None.
1102  *
1103  *------------------------------------------------------------------------------
1104  */
1105 
1106 int
1107 vmci_schedule_delayed_work_fn(vmci_work_fn *work_fn, void *data)
1108 {
1109 	struct vmci_delayed_work_info *delayed_work_info;
1110 
1111 	delayed_work_info = vmci_alloc_kernel_mem(sizeof(*delayed_work_info),
1112 	    VMCI_MEMORY_ATOMIC);
1113 
1114 	if (!delayed_work_info)
1115 		return (VMCI_ERROR_NO_MEM);
1116 
1117 	delayed_work_info->work_fn = work_fn;
1118 	delayed_work_info->data = data;
1119 	mtx_lock(&vmci_sc->vmci_delayed_work_lock);
1120 	vmci_list_insert(&vmci_sc->vmci_delayed_work_infos,
1121 	    delayed_work_info, entry);
1122 	mtx_unlock(&vmci_sc->vmci_delayed_work_lock);
1123 
1124 	taskqueue_enqueue(taskqueue_thread,
1125 	    &vmci_sc->vmci_delayed_work_task);
1126 
1127 	return (VMCI_SUCCESS);
1128 }
1129 
1130 /*
1131  *------------------------------------------------------------------------------
1132  *
1133  * vmci_send_datagram --
1134  *
1135  *     VM to hypervisor call mechanism.
1136  *
1137  * Results:
1138  *     The result of the hypercall.
1139  *
1140  * Side effects:
1141  *     None.
1142  *
1143  *------------------------------------------------------------------------------
1144  */
1145 
1146 int
1147 vmci_send_datagram(struct vmci_datagram *dg)
1148 {
1149 	int result;
1150 
1151 	if (dg == NULL)
1152 		return (VMCI_ERROR_INVALID_ARGS);
1153 
1154 	/*
1155 	 * Need to acquire spinlock on the device because
1156 	 * the datagram data may be spread over multiple pages and the monitor
1157 	 * may interleave device user rpc calls from multiple VCPUs. Acquiring
1158 	 * the spinlock precludes that possibility. Disabling interrupts to
1159 	 * avoid incoming datagrams during a "rep out" and possibly landing up
1160 	 * in this function.
1161 	 */
1162 	mtx_lock_spin(&vmci_sc->vmci_spinlock);
1163 
1164 	/*
1165 	 * Send the datagram and retrieve the return value from the result
1166 	 * register.
1167 	 */
1168 	__asm__ __volatile__(
1169 	    "cld\n\t"
1170 	    "rep outsb\n\t"
1171 	    : /* No output. */
1172 	    : "d"(vmci_sc->vmci_ioaddr + VMCI_DATA_OUT_ADDR),
1173 	    "c"(VMCI_DG_SIZE(dg)), "S"(dg)
1174 	    );
1175 
1176 	/*
1177 	 * XXX: Should read result high port as well when updating handlers to
1178 	 * return 64bit.
1179 	 */
1180 
1181 	result = bus_space_read_4(vmci_sc->vmci_iot0,
1182 	    vmci_sc->vmci_ioh0, VMCI_RESULT_LOW_ADDR);
1183 	mtx_unlock_spin(&vmci_sc->vmci_spinlock);
1184 
1185 	return (result);
1186 }
1187