xref: /freebsd/sys/dev/vmware/vmci/vmci.c (revision 53120fbb68952b7d620c2c0e1cf05c5017fc1b27)
1 /*-
2  * Copyright (c) 2018 VMware, Inc.
3  *
4  * SPDX-License-Identifier: (BSD-2-Clause OR GPL-2.0)
5  */
6 
7 /* Driver for VMware Virtual Machine Communication Interface (VMCI) device. */
8 
9 #include <sys/param.h>
10 #include <sys/bus.h>
11 #include <sys/kernel.h>
12 #include <sys/malloc.h>
13 #include <sys/module.h>
14 #include <sys/rman.h>
15 #include <sys/systm.h>
16 
17 #include <dev/pci/pcireg.h>
18 #include <dev/pci/pcivar.h>
19 
20 #include <machine/bus.h>
21 
22 #include "vmci.h"
23 #include "vmci_doorbell.h"
24 #include "vmci_driver.h"
25 #include "vmci_kernel_defs.h"
26 #include "vmci_queue_pair.h"
27 
28 static int	vmci_probe(device_t);
29 static int	vmci_attach(device_t);
30 static int	vmci_detach(device_t);
31 static int	vmci_shutdown(device_t);
32 
33 static int	vmci_map_bars(struct vmci_softc *);
34 static void	vmci_unmap_bars(struct vmci_softc *);
35 
36 static int	vmci_config_capabilities(struct vmci_softc *);
37 
38 static int	vmci_dma_malloc_int(struct vmci_softc *, bus_size_t,
39 		    bus_size_t, struct vmci_dma_alloc *);
40 static void	vmci_dma_free_int(struct vmci_softc *,
41 		    struct vmci_dma_alloc *);
42 
43 static int	vmci_config_interrupts(struct vmci_softc *);
44 static int	vmci_config_interrupt(struct vmci_softc *);
45 static int	vmci_check_intr_cnt(struct vmci_softc *);
46 static int	vmci_allocate_interrupt_resources(struct vmci_softc *);
47 static int	vmci_setup_interrupts(struct vmci_softc *);
48 static void	vmci_dismantle_interrupts(struct vmci_softc *);
49 static void	vmci_interrupt(void *);
50 static void	vmci_interrupt_bm(void *);
51 static void	dispatch_datagrams(void *, int);
52 static void	process_bitmap(void *, int);
53 
54 static void	vmci_delayed_work_fn_cb(void *context, int data);
55 
56 static device_method_t vmci_methods[] = {
57 	/* Device interface. */
58 	DEVMETHOD(device_probe,		vmci_probe),
59 	DEVMETHOD(device_attach,	vmci_attach),
60 	DEVMETHOD(device_detach,	vmci_detach),
61 	DEVMETHOD(device_shutdown,	vmci_shutdown),
62 
63 	DEVMETHOD_END
64 };
65 
66 static driver_t vmci_driver = {
67 	"vmci", vmci_methods, sizeof(struct vmci_softc)
68 };
69 
70 DRIVER_MODULE(vmci, pci, vmci_driver, 0, 0);
71 MODULE_VERSION(vmci, VMCI_VERSION);
72 const struct {
73 	uint16_t vendor;
74 	uint16_t device;
75 	const char *desc;
76 } vmci_ids[] = {
77 	{ VMCI_VMWARE_VENDOR_ID, VMCI_VMWARE_DEVICE_ID,
78 	    "VMware Virtual Machine Communication Interface" },
79 };
80 MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, vmci, vmci_ids,
81     nitems(vmci_ids));
82 
83 MODULE_DEPEND(vmci, pci, 1, 1, 1);
84 
85 static struct vmci_softc *vmci_sc;
86 
87 #define LGPFX	"vmci: "
88 /*
89  * Allocate a buffer for incoming datagrams globally to avoid repeated
90  * allocation in the interrupt handler's atomic context.
91  */
92 static uint8_t *data_buffer = NULL;
93 static uint32_t data_buffer_size = VMCI_MAX_DG_SIZE;
94 
95 struct vmci_delayed_work_info {
96 	vmci_work_fn	*work_fn;
97 	void		*data;
98 	vmci_list_item(vmci_delayed_work_info) entry;
99 };
100 
101 /*
102  *------------------------------------------------------------------------------
103  *
104  * vmci_probe --
105  *
106  *     Probe to see if the VMCI device is present.
107  *
108  * Results:
109  *     BUS_PROBE_DEFAULT if device exists, ENXIO otherwise.
110  *
111  * Side effects:
112  *     None.
113  *
114  *------------------------------------------------------------------------------
115  */
116 
117 static int
118 vmci_probe(device_t dev)
119 {
120 
121 	if (pci_get_vendor(dev) == vmci_ids[0].vendor &&
122 	    pci_get_device(dev) == vmci_ids[0].device) {
123 		device_set_desc(dev, vmci_ids[0].desc);
124 
125 		return (BUS_PROBE_DEFAULT);
126 	}
127 
128 	return (ENXIO);
129 }
130 
131 /*
132  *------------------------------------------------------------------------------
133  *
134  * vmci_attach --
135  *
136  *     Attach VMCI device to the system after vmci_probe() has been called and
137  *     the device has been detected.
138  *
139  * Results:
140  *     0 if success, ENXIO otherwise.
141  *
142  * Side effects:
143  *     None.
144  *
145  *------------------------------------------------------------------------------
146  */
147 
148 static int
149 vmci_attach(device_t dev)
150 {
151 	struct vmci_softc *sc;
152 	int error, i;
153 
154 	sc = device_get_softc(dev);
155 	sc->vmci_dev = dev;
156 	vmci_sc = sc;
157 
158 	data_buffer = NULL;
159 	sc->vmci_num_intr = 0;
160 	for (i = 0; i < VMCI_MAX_INTRS; i++) {
161 		sc->vmci_intrs[i].vmci_irq = NULL;
162 		sc->vmci_intrs[i].vmci_handler = NULL;
163 	}
164 
165 	TASK_INIT(&sc->vmci_interrupt_dq_task, 0, dispatch_datagrams, sc);
166 	TASK_INIT(&sc->vmci_interrupt_bm_task, 0, process_bitmap, sc);
167 
168 	TASK_INIT(&sc->vmci_delayed_work_task, 0, vmci_delayed_work_fn_cb, sc);
169 
170 	pci_enable_busmaster(dev);
171 
172 	mtx_init(&sc->vmci_spinlock, "VMCI Spinlock", NULL, MTX_SPIN);
173 	mtx_init(&sc->vmci_delayed_work_lock, "VMCI Delayed Work Lock",
174 	    NULL, MTX_DEF);
175 
176 	error = vmci_map_bars(sc);
177 	if (error) {
178 		VMCI_LOG_ERROR(LGPFX"Failed to map PCI BARs.\n");
179 		goto fail;
180 	}
181 
182 	error = vmci_config_capabilities(sc);
183 	if (error) {
184 		VMCI_LOG_ERROR(LGPFX"Failed to configure capabilities.\n");
185 		goto fail;
186 	}
187 
188 	vmci_list_init(&sc->vmci_delayed_work_infos);
189 
190 	vmci_components_init();
191 	vmci_util_init();
192 	error = vmci_qp_guest_endpoints_init();
193 	if (error) {
194 		VMCI_LOG_ERROR(LGPFX"vmci_qp_guest_endpoints_init failed.\n");
195 		goto fail;
196 	}
197 
198 	error = vmci_config_interrupts(sc);
199 	if (error)
200 		VMCI_LOG_ERROR(LGPFX"Failed to enable interrupts.\n");
201 
202 fail:
203 	if (error) {
204 		vmci_detach(dev);
205 		return (ENXIO);
206 	}
207 
208 	return (0);
209 }
210 
211 /*
212  *------------------------------------------------------------------------------
213  *
214  * vmci_detach --
215  *
216  *     Detach the VMCI device.
217  *
218  * Results:
219  *     0
220  *
221  * Side effects:
222  *     None.
223  *
224  *------------------------------------------------------------------------------
225  */
226 
227 static int
228 vmci_detach(device_t dev)
229 {
230 	struct vmci_softc *sc;
231 
232 	sc = device_get_softc(dev);
233 
234 	vmci_qp_guest_endpoints_exit();
235 	vmci_util_exit();
236 
237 	vmci_dismantle_interrupts(sc);
238 
239 	vmci_components_cleanup();
240 
241 	if mtx_initialized(&sc->vmci_spinlock) {
242 		taskqueue_drain(taskqueue_thread, &sc->vmci_delayed_work_task);
243 		mtx_destroy(&sc->vmci_delayed_work_lock);
244 	}
245 
246 	if (sc->vmci_res0 != NULL)
247 		bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
248 		    VMCI_CONTROL_ADDR, VMCI_CONTROL_RESET);
249 
250 	if (sc->vmci_notifications_bitmap.dma_vaddr != NULL)
251 		vmci_dma_free(&sc->vmci_notifications_bitmap);
252 
253 	vmci_unmap_bars(sc);
254 
255 	if mtx_initialized(&sc->vmci_spinlock)
256 		mtx_destroy(&sc->vmci_spinlock);
257 
258 	pci_disable_busmaster(dev);
259 
260 	return (0);
261 }
262 
263 /*
264  *------------------------------------------------------------------------------
265  *
266  * vmci_shutdown --
267  *
268  *     This function is called during system shutdown. We don't do anything.
269  *
270  * Results:
271  *     0
272  *
273  * Side effects:
274  *     None.
275  *
276  *------------------------------------------------------------------------------
277  */
278 
279 static int
280 vmci_shutdown(device_t dev)
281 {
282 
283 	return (0);
284 }
285 
286 /*
287  *------------------------------------------------------------------------------
288  *
289  * vmci_map_bars --
290  *
291  *     Maps the PCI I/O and MMIO BARs.
292  *
293  * Results:
294  *     0 on success, ENXIO otherwise.
295  *
296  * Side effects:
297  *     None.
298  *
299  *------------------------------------------------------------------------------
300  */
301 
302 static int
303 vmci_map_bars(struct vmci_softc *sc)
304 {
305 	int rid;
306 
307 	/* Map the PCI I/O BAR: BAR0 */
308 	rid = PCIR_BAR(0);
309 	sc->vmci_res0 = bus_alloc_resource_any(sc->vmci_dev, SYS_RES_IOPORT,
310 	    &rid, RF_ACTIVE);
311 	if (sc->vmci_res0 == NULL) {
312 		VMCI_LOG_ERROR(LGPFX"Could not map: BAR0\n");
313 		return (ENXIO);
314 	}
315 
316 	sc->vmci_iot0 = rman_get_bustag(sc->vmci_res0);
317 	sc->vmci_ioh0 = rman_get_bushandle(sc->vmci_res0);
318 	sc->vmci_ioaddr = rman_get_start(sc->vmci_res0);
319 
320 	/* Map the PCI MMIO BAR: BAR1 */
321 	rid = PCIR_BAR(1);
322 	sc->vmci_res1 = bus_alloc_resource_any(sc->vmci_dev, SYS_RES_MEMORY,
323 	    &rid, RF_ACTIVE);
324 	if (sc->vmci_res1 == NULL) {
325 		VMCI_LOG_ERROR(LGPFX"Could not map: BAR1\n");
326 		return (ENXIO);
327 	}
328 
329 	sc->vmci_iot1 = rman_get_bustag(sc->vmci_res1);
330 	sc->vmci_ioh1 = rman_get_bushandle(sc->vmci_res1);
331 
332 	return (0);
333 }
334 
335 /*
336  *------------------------------------------------------------------------------
337  *
338  * vmci_unmap_bars --
339  *
340  *     Unmaps the VMCI PCI I/O and MMIO BARs.
341  *
342  * Results:
343  *     None.
344  *
345  * Side effects:
346  *     None.
347  *
348  *------------------------------------------------------------------------------
349  */
350 
351 static void
352 vmci_unmap_bars(struct vmci_softc *sc)
353 {
354 	int rid;
355 
356 	if (sc->vmci_res0 != NULL) {
357 		rid = PCIR_BAR(0);
358 		bus_release_resource(sc->vmci_dev, SYS_RES_IOPORT, rid,
359 		    sc->vmci_res0);
360 		sc->vmci_res0 = NULL;
361 	}
362 
363 	if (sc->vmci_res1 != NULL) {
364 		rid = PCIR_BAR(1);
365 		bus_release_resource(sc->vmci_dev, SYS_RES_MEMORY, rid,
366 		    sc->vmci_res1);
367 		sc->vmci_res1 = NULL;
368 	}
369 }
370 
371 /*
372  *------------------------------------------------------------------------------
373  *
374  * vmci_config_capabilities --
375  *
376  *     Check the VMCI device capabilities and configure the device accordingly.
377  *
378  * Results:
379  *     0 if success, ENODEV otherwise.
380  *
381  * Side effects:
382  *     Device capabilities are enabled.
383  *
384  *------------------------------------------------------------------------------
385  */
386 
387 static int
388 vmci_config_capabilities(struct vmci_softc *sc)
389 {
390 	unsigned long bitmap_PPN;
391 	int error;
392 
393 	/*
394 	 * Verify that the VMCI device supports the capabilities that we
395 	 * need. Datagrams are necessary and notifications will be used
396 	 * if the device supports it.
397 	 */
398 	sc->capabilities = bus_space_read_4(sc->vmci_iot0, sc->vmci_ioh0,
399 	    VMCI_CAPS_ADDR);
400 
401 	if ((sc->capabilities & VMCI_CAPS_DATAGRAM) == 0) {
402 		VMCI_LOG_ERROR(LGPFX"VMCI device does not support "
403 		    "datagrams.\n");
404 		return (ENODEV);
405 	}
406 
407 	if (sc->capabilities & VMCI_CAPS_NOTIFICATIONS) {
408 		sc->capabilities = VMCI_CAPS_DATAGRAM;
409 		error = vmci_dma_malloc(PAGE_SIZE, 1,
410 		    &sc->vmci_notifications_bitmap);
411 		if (error)
412 			VMCI_LOG_ERROR(LGPFX"Failed to alloc memory for "
413 			    "notification bitmap.\n");
414 		else {
415 			memset(sc->vmci_notifications_bitmap.dma_vaddr, 0,
416 			    PAGE_SIZE);
417 			sc->capabilities |= VMCI_CAPS_NOTIFICATIONS;
418 		}
419 	} else
420 		sc->capabilities = VMCI_CAPS_DATAGRAM;
421 
422 	/* Let the host know which capabilities we intend to use. */
423 	bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
424 	    VMCI_CAPS_ADDR, sc->capabilities);
425 
426 	/*
427 	 * Register notification bitmap with device if that capability is
428 	 * used.
429 	 */
430 	if (sc->capabilities & VMCI_CAPS_NOTIFICATIONS) {
431 		bitmap_PPN =
432 		    sc->vmci_notifications_bitmap.dma_paddr >> PAGE_SHIFT;
433 		vmci_register_notification_bitmap(bitmap_PPN);
434 	}
435 
436 	/* Check host capabilities. */
437 	if (!vmci_check_host_capabilities())
438 		return (ENODEV);
439 
440 	return (0);
441 }
442 
443 /*
444  *------------------------------------------------------------------------------
445  *
446  * vmci_dmamap_cb --
447  *
448  *     Callback to receive mapping information resulting from the load of a
449  *     bus_dmamap_t via bus_dmamap_load()
450  *
451  * Results:
452  *     None.
453  *
454  * Side effects:
455  *     None.
456  *
457  *------------------------------------------------------------------------------
458  */
459 
460 static void
461 vmci_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
462 {
463 	bus_addr_t *baddr = arg;
464 
465 	if (error == 0)
466 		*baddr = segs->ds_addr;
467 }
468 
469 /*
470  *------------------------------------------------------------------------------
471  *
472  * vmci_dma_malloc_int --
473  *
474  *     Internal function that allocates DMA memory.
475  *
476  * Results:
477  *     0 if success.
478  *     ENOMEM if insufficient memory.
479  *     EINPROGRESS if mapping is deferred.
480  *     EINVAL if the request was invalid.
481  *
482  * Side effects:
483  *     DMA memory is allocated.
484  *
485  *------------------------------------------------------------------------------
486  */
487 
488 static int
489 vmci_dma_malloc_int(struct vmci_softc *sc, bus_size_t size, bus_size_t align,
490     struct vmci_dma_alloc *dma)
491 {
492 	int error;
493 
494 	bzero(dma, sizeof(struct vmci_dma_alloc));
495 
496 	error = bus_dma_tag_create(bus_get_dma_tag(vmci_sc->vmci_dev),
497 	    align, 0,		/* alignment, bounds */
498 	    BUS_SPACE_MAXADDR,	/* lowaddr */
499 	    BUS_SPACE_MAXADDR,	/* highaddr */
500 	    NULL, NULL,		/* filter, filterarg */
501 	    size,		/* maxsize */
502 	    1,			/* nsegments */
503 	    size,		/* maxsegsize */
504 	    BUS_DMA_ALLOCNOW,	/* flags */
505 	    NULL,		/* lockfunc */
506 	    NULL,		/* lockfuncarg */
507 	    &dma->dma_tag);
508 	if (error) {
509 		VMCI_LOG_ERROR(LGPFX"bus_dma_tag_create failed: %d\n", error);
510 		goto fail;
511 	}
512 
513 	error = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
514 	    BUS_DMA_ZERO | BUS_DMA_NOWAIT, &dma->dma_map);
515 	if (error) {
516 		VMCI_LOG_ERROR(LGPFX"bus_dmamem_alloc failed: %d\n", error);
517 		goto fail;
518 	}
519 
520 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
521 	    size, vmci_dmamap_cb, &dma->dma_paddr, BUS_DMA_NOWAIT);
522 	if (error) {
523 		VMCI_LOG_ERROR(LGPFX"bus_dmamap_load failed: %d\n", error);
524 		goto fail;
525 	}
526 
527 	dma->dma_size = size;
528 
529 fail:
530 	if (error)
531 		vmci_dma_free(dma);
532 
533 	return (error);
534 }
535 
536 /*
537  *------------------------------------------------------------------------------
538  *
539  * vmci_dma_malloc --
540  *
541  *     This function is a wrapper around vmci_dma_malloc_int for callers
542  *     outside of this module. Since we only support a single VMCI device, this
543  *     wrapper provides access to the device softc structure.
544  *
545  * Results:
546  *     0 if success.
547  *     ENOMEM if insufficient memory.
548  *     EINPROGRESS if mapping is deferred.
549  *     EINVAL if the request was invalid.
550  *
551  * Side effects:
552  *     DMA memory is allocated.
553  *
554  *------------------------------------------------------------------------------
555  */
556 
557 int
558 vmci_dma_malloc(bus_size_t size, bus_size_t align, struct vmci_dma_alloc *dma)
559 {
560 
561 	return (vmci_dma_malloc_int(vmci_sc, size, align, dma));
562 }
563 
564 /*
565  *------------------------------------------------------------------------------
566  *
567  * vmci_dma_free_int --
568  *
569  *     Internal function that frees DMA memory.
570  *
571  * Results:
572  *     None.
573  *
574  * Side effects:
575  *     Frees DMA memory.
576  *
577  *------------------------------------------------------------------------------
578  */
579 
580 static void
581 vmci_dma_free_int(struct vmci_softc *sc, struct vmci_dma_alloc *dma)
582 {
583 
584 	if (dma->dma_tag != NULL) {
585 		if (dma->dma_paddr != 0) {
586 			bus_dmamap_sync(dma->dma_tag, dma->dma_map,
587 			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
588 			bus_dmamap_unload(dma->dma_tag, dma->dma_map);
589 		}
590 
591 		if (dma->dma_vaddr != NULL)
592 			bus_dmamem_free(dma->dma_tag, dma->dma_vaddr,
593 			    dma->dma_map);
594 
595 		bus_dma_tag_destroy(dma->dma_tag);
596 	}
597 	bzero(dma, sizeof(struct vmci_dma_alloc));
598 }
599 
600 /*
601  *------------------------------------------------------------------------------
602  *
603  * vmci_dma_free --
604  *
605  *     This function is a wrapper around vmci_dma_free_int for callers outside
606  *     of this module. Since we only support a single VMCI device, this wrapper
607  *     provides access to the device softc structure.
608  *
609  * Results:
610  *     None.
611  *
612  * Side effects:
613  *     Frees DMA memory.
614  *
615  *------------------------------------------------------------------------------
616  */
617 
618 void
619 vmci_dma_free(struct vmci_dma_alloc *dma)
620 {
621 
622 	vmci_dma_free_int(vmci_sc, dma);
623 }
624 
625 /*
626  *------------------------------------------------------------------------------
627  *
628  * vmci_config_interrupts --
629  *
630  *     Configures and enables interrupts. Try to configure MSI-X. If this fails,
631  *     try to configure MSI. If even this fails, try legacy interrupts.
632  *
633  * Results:
634  *     0 if success.
635  *     ENOMEM if insufficient memory.
636  *     ENODEV if the device doesn't support interrupts.
637  *     ENXIO if the device configuration failed.
638  *
639  * Side effects:
640  *     Interrupts get enabled if successful.
641  *
642  *------------------------------------------------------------------------------
643  */
644 
645 static int
646 vmci_config_interrupts(struct vmci_softc *sc)
647 {
648 	int error;
649 
650 	data_buffer = malloc(data_buffer_size, M_DEVBUF, M_ZERO | M_NOWAIT);
651 	if (data_buffer == NULL)
652 		return (ENOMEM);
653 
654 	sc->vmci_intr_type = VMCI_INTR_TYPE_MSIX;
655 	error = vmci_config_interrupt(sc);
656 	if (error) {
657 		sc->vmci_intr_type = VMCI_INTR_TYPE_MSI;
658 		error = vmci_config_interrupt(sc);
659 	}
660 	if (error) {
661 		sc->vmci_intr_type = VMCI_INTR_TYPE_INTX;
662 		error = vmci_config_interrupt(sc);
663 	}
664 	if (error)
665 		return (error);
666 
667 	/* Enable specific interrupt bits. */
668 	if (sc->capabilities & VMCI_CAPS_NOTIFICATIONS)
669 		bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
670 		    VMCI_IMR_ADDR, VMCI_IMR_DATAGRAM | VMCI_IMR_NOTIFICATION);
671 	else
672 		bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
673 		    VMCI_IMR_ADDR, VMCI_IMR_DATAGRAM);
674 
675 	/* Enable interrupts. */
676 	bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
677 	    VMCI_CONTROL_ADDR, VMCI_CONTROL_INT_ENABLE);
678 
679 	return (0);
680 }
681 
682 /*
683  *------------------------------------------------------------------------------
684  *
685  * vmci_config_interrupt --
686  *
687  *     Check the number of interrupts supported, allocate resources and setup
688  *     interrupts.
689  *
690  * Results:
691  *     0 if success.
692  *     ENOMEM if insufficient memory.
693  *     ENODEV if the device doesn't support interrupts.
694  *     ENXIO if the device configuration failed.
695  *
696  * Side effects:
697  *     Resources get allocated and interrupts get setup (but not enabled) if
698  *     successful.
699  *
700  *------------------------------------------------------------------------------
701  */
702 
703 static int
704 vmci_config_interrupt(struct vmci_softc *sc)
705 {
706 	int error;
707 
708 	error = vmci_check_intr_cnt(sc);
709 	if (error)
710 		return (error);
711 
712 	error = vmci_allocate_interrupt_resources(sc);
713 	if (error)
714 		return (error);
715 
716 	error = vmci_setup_interrupts(sc);
717 	if (error)
718 		return (error);
719 
720 	return (0);
721 }
722 
723 /*
724  *------------------------------------------------------------------------------
725  *
726  * vmci_check_intr_cnt --
727  *
728  *     Check the number of interrupts supported by the device and ask PCI bus
729  *     to allocate appropriate number of interrupts.
730  *
731  * Results:
732  *     0 if success.
733  *     ENODEV if the device doesn't support any interrupts.
734  *     ENXIO if the device configuration failed.
735  *
736  * Side effects:
737  *     Resources get allocated on success.
738  *
739  *------------------------------------------------------------------------------
740  */
741 
742 static int
743 vmci_check_intr_cnt(struct vmci_softc *sc)
744 {
745 
746 	if (sc->vmci_intr_type == VMCI_INTR_TYPE_INTX) {
747 		sc->vmci_num_intr = 1;
748 		return (0);
749 	}
750 
751 	/*
752 	 * Make sure that the device supports the required number of MSI/MSI-X
753 	 * messages. We try for 2 MSI-X messages but 1 is good too. We need at
754 	 * least 1 MSI message.
755 	 */
756 	sc->vmci_num_intr = (sc->vmci_intr_type == VMCI_INTR_TYPE_MSIX) ?
757 	    pci_msix_count(sc->vmci_dev) : pci_msi_count(sc->vmci_dev);
758 
759 	if (!sc->vmci_num_intr) {
760 		VMCI_LOG_ERROR(LGPFX"Device does not support any interrupt"
761 		    " messages");
762 		return (ENODEV);
763 	}
764 
765 	sc->vmci_num_intr = (sc->vmci_intr_type == VMCI_INTR_TYPE_MSIX) ?
766 	    VMCI_MAX_INTRS : 1;
767 	if (sc->vmci_intr_type == VMCI_INTR_TYPE_MSIX) {
768 		if (pci_alloc_msix(sc->vmci_dev, &sc->vmci_num_intr))
769 			return (ENXIO);
770 	} else if (sc->vmci_intr_type == VMCI_INTR_TYPE_MSI) {
771 		if (pci_alloc_msi(sc->vmci_dev, &sc->vmci_num_intr))
772 			return (ENXIO);
773 	}
774 
775 	return (0);
776 }
777 
778 /*
779  *------------------------------------------------------------------------------
780  *
781  * vmci_allocate_interrupt_resources --
782  *
783  *     Allocate resources necessary for interrupts.
784  *
785  * Results:
786  *     0 if success, ENXIO otherwise.
787  *
788  * Side effects:
789  *     Resources get allocated on success.
790  *
791  *------------------------------------------------------------------------------
792  */
793 
794 static int
795 vmci_allocate_interrupt_resources(struct vmci_softc *sc)
796 {
797 	struct resource *irq;
798 	int flags, i, rid;
799 
800 	flags = RF_ACTIVE;
801 	flags |= (sc->vmci_num_intr == 1) ? RF_SHAREABLE : 0;
802 	rid = (sc->vmci_intr_type == VMCI_INTR_TYPE_INTX) ? 0 : 1;
803 
804 	for (i = 0; i < sc->vmci_num_intr; i++, rid++) {
805 		irq = bus_alloc_resource_any(sc->vmci_dev, SYS_RES_IRQ, &rid,
806 		    flags);
807 		if (irq == NULL)
808 			return (ENXIO);
809 		sc->vmci_intrs[i].vmci_irq = irq;
810 		sc->vmci_intrs[i].vmci_rid = rid;
811 	}
812 
813 	return (0);
814 }
815 
816 /*
817  *------------------------------------------------------------------------------
818  *
819  * vmci_setup_interrupts --
820  *
821  *     Sets up the interrupts.
822  *
823  * Results:
824  *     0 if success, appropriate error code from bus_setup_intr otherwise.
825  *
826  * Side effects:
827  *     Interrupt handler gets attached.
828  *
829  *------------------------------------------------------------------------------
830  */
831 
832 static int
833 vmci_setup_interrupts(struct vmci_softc *sc)
834 {
835 	struct vmci_interrupt *intr;
836 	int error, flags;
837 
838 	flags = INTR_TYPE_NET | INTR_MPSAFE;
839 	if (sc->vmci_num_intr > 1)
840 		flags |= INTR_EXCL;
841 
842 	intr = &sc->vmci_intrs[0];
843 	error = bus_setup_intr(sc->vmci_dev, intr->vmci_irq, flags, NULL,
844 	    vmci_interrupt, NULL, &intr->vmci_handler);
845 	if (error)
846 		return (error);
847 
848 	if (sc->vmci_num_intr == 2) {
849 		bus_describe_intr(sc->vmci_dev, intr->vmci_irq,
850 		    intr->vmci_handler, "dg");
851 		intr = &sc->vmci_intrs[1];
852 		error = bus_setup_intr(sc->vmci_dev, intr->vmci_irq, flags,
853 		    NULL, vmci_interrupt_bm, NULL, &intr->vmci_handler);
854 		if (error)
855 			return (error);
856 		bus_describe_intr(sc->vmci_dev, intr->vmci_irq,
857 		    intr->vmci_handler, "bm");
858 	}
859 
860 	return (0);
861 }
862 
863 /*
864  *------------------------------------------------------------------------------
865  *
866  * vmci_interrupt --
867  *
868  *     Interrupt handler for legacy or MSI interrupt, or for first MSI-X
869  *     interrupt (vector VMCI_INTR_DATAGRAM).
870  *
871  * Results:
872  *     None.
873  *
874  * Side effects:
875  *     None.
876  *
877  *------------------------------------------------------------------------------
878  */
879 
880 static void
881 vmci_interrupt(void *arg)
882 {
883 
884 	if (vmci_sc->vmci_num_intr == 2)
885 		taskqueue_enqueue(taskqueue_swi,
886 		    &vmci_sc->vmci_interrupt_dq_task);
887 	else {
888 		unsigned int icr;
889 
890 		icr = inl(vmci_sc->vmci_ioaddr + VMCI_ICR_ADDR);
891 		if (icr == 0 || icr == 0xffffffff)
892 			return;
893 		if (icr & VMCI_ICR_DATAGRAM) {
894 			taskqueue_enqueue(taskqueue_swi,
895 			    &vmci_sc->vmci_interrupt_dq_task);
896 			icr &= ~VMCI_ICR_DATAGRAM;
897 		}
898 		if (icr & VMCI_ICR_NOTIFICATION) {
899 			taskqueue_enqueue(taskqueue_swi,
900 			    &vmci_sc->vmci_interrupt_bm_task);
901 			icr &= ~VMCI_ICR_NOTIFICATION;
902 		}
903 		if (icr != 0)
904 			VMCI_LOG_INFO(LGPFX"Ignoring unknown interrupt "
905 			    "cause");
906 	}
907 }
908 
909 /*
910  *------------------------------------------------------------------------------
911  *
912  * vmci_interrupt_bm --
913  *
914  *     Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION,
915  *     which is for the notification bitmap. Will only get called if we are
916  *     using MSI-X with exclusive vectors.
917  *
918  * Results:
919  *     None.
920  *
921  * Side effects:
922  *     None.
923  *
924  *------------------------------------------------------------------------------
925  */
926 
927 static void
928 vmci_interrupt_bm(void *arg)
929 {
930 
931 	ASSERT(vmci_sc->vmci_num_intr == 2);
932 	taskqueue_enqueue(taskqueue_swi, &vmci_sc->vmci_interrupt_bm_task);
933 }
934 
935 /*
936  *------------------------------------------------------------------------------
937  *
938  * dispatch_datagrams --
939  *
940  *     Reads and dispatches incoming datagrams.
941  *
942  * Results:
943  *     None.
944  *
945  * Side effects:
946  *     Reads data from the device.
947  *
948  *------------------------------------------------------------------------------
949  */
950 
951 static void
952 dispatch_datagrams(void *context, int data)
953 {
954 
955 	if (data_buffer == NULL)
956 		VMCI_LOG_INFO(LGPFX"dispatch_datagrams(): no buffer "
957 		    "present");
958 
959 	vmci_read_datagrams_from_port((vmci_io_handle) 0,
960 	    vmci_sc->vmci_ioaddr + VMCI_DATA_IN_ADDR,
961 	    data_buffer, data_buffer_size);
962 }
963 
964 /*
965  *------------------------------------------------------------------------------
966  *
967  * process_bitmap --
968  *
969  *     Scans the notification bitmap for raised flags, clears them and handles
970  *     the notifications.
971  *
972  * Results:
973  *     None.
974  *
975  * Side effects:
976  *     None.
977  *
978  *------------------------------------------------------------------------------
979  */
980 
981 static void
982 process_bitmap(void *context, int data)
983 {
984 
985 	if (vmci_sc->vmci_notifications_bitmap.dma_vaddr == NULL)
986 		VMCI_LOG_INFO(LGPFX"process_bitmaps(): no bitmap present");
987 
988 	vmci_scan_notification_bitmap(
989 	    vmci_sc->vmci_notifications_bitmap.dma_vaddr);
990 }
991 
992 /*
993  *------------------------------------------------------------------------------
994  *
995  * vmci_dismantle_interrupts --
996  *
997  *     Releases resources, detaches the interrupt handler and drains the task
998  *     queue.
999  *
1000  * Results:
1001  *     None.
1002  *
1003  * Side effects:
1004  *     No more interrupts.
1005  *
1006  *------------------------------------------------------------------------------
1007  */
1008 
1009 static void
1010 vmci_dismantle_interrupts(struct vmci_softc *sc)
1011 {
1012 	struct vmci_interrupt *intr;
1013 	int i;
1014 
1015 	for (i = 0; i < sc->vmci_num_intr; i++) {
1016 		intr = &sc->vmci_intrs[i];
1017 		if (intr->vmci_handler != NULL) {
1018 			bus_teardown_intr(sc->vmci_dev, intr->vmci_irq,
1019 			    intr->vmci_handler);
1020 			intr->vmci_handler = NULL;
1021 		}
1022 		if (intr->vmci_irq != NULL) {
1023 			bus_release_resource(sc->vmci_dev, SYS_RES_IRQ,
1024 			    intr->vmci_rid, intr->vmci_irq);
1025 			intr->vmci_irq = NULL;
1026 			intr->vmci_rid = -1;
1027 		}
1028 	}
1029 
1030 	if ((sc->vmci_intr_type != VMCI_INTR_TYPE_INTX) &&
1031 	    (sc->vmci_num_intr))
1032 		pci_release_msi(sc->vmci_dev);
1033 
1034 	taskqueue_drain(taskqueue_swi, &sc->vmci_interrupt_dq_task);
1035 	taskqueue_drain(taskqueue_swi, &sc->vmci_interrupt_bm_task);
1036 
1037 	if (data_buffer != NULL)
1038 		free(data_buffer, M_DEVBUF);
1039 }
1040 
1041 /*
1042  *------------------------------------------------------------------------------
1043  *
1044  * vmci_delayed_work_fn_cb --
1045  *
1046  *     Callback function that executes the queued up delayed work functions.
1047  *
1048  * Results:
1049  *     None.
1050  *
1051  * Side effects:
1052  *     None.
1053  *
1054  *------------------------------------------------------------------------------
1055  */
1056 
1057 static void
1058 vmci_delayed_work_fn_cb(void *context, int data)
1059 {
1060 	vmci_list(vmci_delayed_work_info) temp_list;
1061 
1062 	vmci_list_init(&temp_list);
1063 
1064 	/*
1065 	 * Swap vmci_delayed_work_infos list with the empty temp_list while
1066 	 * holding a lock. vmci_delayed_work_infos would then be an empty list
1067 	 * and temp_list would contain the elements from the original
1068 	 * vmci_delayed_work_infos. Finally, iterate through temp_list
1069 	 * executing the delayed callbacks.
1070 	 */
1071 
1072 	mtx_lock(&vmci_sc->vmci_delayed_work_lock);
1073 	vmci_list_swap(&temp_list, &vmci_sc->vmci_delayed_work_infos,
1074 	    vmci_delayed_work_info, entry);
1075 	mtx_unlock(&vmci_sc->vmci_delayed_work_lock);
1076 
1077 	while (!vmci_list_empty(&temp_list)) {
1078 		struct vmci_delayed_work_info *delayed_work_info =
1079 		    vmci_list_first(&temp_list);
1080 
1081 		delayed_work_info->work_fn(delayed_work_info->data);
1082 
1083 		vmci_list_remove(delayed_work_info, entry);
1084 		vmci_free_kernel_mem(delayed_work_info,
1085 		    sizeof(*delayed_work_info));
1086 	}
1087 }
1088 
1089 /*
1090  *------------------------------------------------------------------------------
1091  *
1092  * vmci_schedule_delayed_work_fn --
1093  *
1094  *     Schedule the specified callback.
1095  *
1096  * Results:
1097  *     0 if success, error code otherwise.
1098  *
1099  * Side effects:
1100  *     None.
1101  *
1102  *------------------------------------------------------------------------------
1103  */
1104 
1105 int
1106 vmci_schedule_delayed_work_fn(vmci_work_fn *work_fn, void *data)
1107 {
1108 	struct vmci_delayed_work_info *delayed_work_info;
1109 
1110 	delayed_work_info = vmci_alloc_kernel_mem(sizeof(*delayed_work_info),
1111 	    VMCI_MEMORY_ATOMIC);
1112 
1113 	if (!delayed_work_info)
1114 		return (VMCI_ERROR_NO_MEM);
1115 
1116 	delayed_work_info->work_fn = work_fn;
1117 	delayed_work_info->data = data;
1118 	mtx_lock(&vmci_sc->vmci_delayed_work_lock);
1119 	vmci_list_insert(&vmci_sc->vmci_delayed_work_infos,
1120 	    delayed_work_info, entry);
1121 	mtx_unlock(&vmci_sc->vmci_delayed_work_lock);
1122 
1123 	taskqueue_enqueue(taskqueue_thread,
1124 	    &vmci_sc->vmci_delayed_work_task);
1125 
1126 	return (VMCI_SUCCESS);
1127 }
1128 
1129 /*
1130  *------------------------------------------------------------------------------
1131  *
1132  * vmci_send_datagram --
1133  *
1134  *     VM to hypervisor call mechanism.
1135  *
1136  * Results:
1137  *     The result of the hypercall.
1138  *
1139  * Side effects:
1140  *     None.
1141  *
1142  *------------------------------------------------------------------------------
1143  */
1144 
1145 int
1146 vmci_send_datagram(struct vmci_datagram *dg)
1147 {
1148 	int result;
1149 
1150 	if (dg == NULL)
1151 		return (VMCI_ERROR_INVALID_ARGS);
1152 
1153 	/*
1154 	 * Need to acquire spinlock on the device because
1155 	 * the datagram data may be spread over multiple pages and the monitor
1156 	 * may interleave device user rpc calls from multiple VCPUs. Acquiring
1157 	 * the spinlock precludes that possibility. Disabling interrupts to
1158 	 * avoid incoming datagrams during a "rep out" and possibly landing up
1159 	 * in this function.
1160 	 */
1161 	mtx_lock_spin(&vmci_sc->vmci_spinlock);
1162 
1163 	/*
1164 	 * Send the datagram and retrieve the return value from the result
1165 	 * register.
1166 	 */
1167 	__asm__ __volatile__(
1168 	    "cld\n\t"
1169 	    "rep outsb\n\t"
1170 	    : /* No output. */
1171 	    : "d"(vmci_sc->vmci_ioaddr + VMCI_DATA_OUT_ADDR),
1172 	    "c"(VMCI_DG_SIZE(dg)), "S"(dg)
1173 	    );
1174 
1175 	/*
1176 	 * XXX: Should read result high port as well when updating handlers to
1177 	 * return 64bit.
1178 	 */
1179 
1180 	result = bus_space_read_4(vmci_sc->vmci_iot0,
1181 	    vmci_sc->vmci_ioh0, VMCI_RESULT_LOW_ADDR);
1182 	mtx_unlock_spin(&vmci_sc->vmci_spinlock);
1183 
1184 	return (result);
1185 }
1186