xref: /freebsd/sys/dev/vmware/vmci/vmci.c (revision 6683132d54bd6d589889e43dabdc53d35e38a028)
1 /*-
2  * Copyright (c) 2018 VMware, Inc.
3  *
4  * SPDX-License-Identifier: (BSD-2-Clause OR GPL-2.0)
5  */
6 
7 /* Driver for VMware Virtual Machine Communication Interface (VMCI) device. */
8 
9 #include <sys/cdefs.h>
10 __FBSDID("$FreeBSD$");
11 
12 #include <sys/param.h>
13 #include <sys/bus.h>
14 #include <sys/kernel.h>
15 #include <sys/malloc.h>
16 #include <sys/module.h>
17 #include <sys/rman.h>
18 #include <sys/systm.h>
19 
20 #include <dev/pci/pcireg.h>
21 #include <dev/pci/pcivar.h>
22 
23 #include <machine/bus.h>
24 
25 #include "vmci.h"
26 #include "vmci_doorbell.h"
27 #include "vmci_driver.h"
28 #include "vmci_kernel_defs.h"
29 #include "vmci_queue_pair.h"
30 
31 static int	vmci_probe(device_t);
32 static int	vmci_attach(device_t);
33 static int	vmci_detach(device_t);
34 static int	vmci_shutdown(device_t);
35 
36 static int	vmci_map_bars(struct vmci_softc *);
37 static void	vmci_unmap_bars(struct vmci_softc *);
38 
39 static int	vmci_config_capabilities(struct vmci_softc *);
40 
41 static int	vmci_dma_malloc_int(struct vmci_softc *, bus_size_t,
42 		    bus_size_t, struct vmci_dma_alloc *);
43 static void	vmci_dma_free_int(struct vmci_softc *,
44 		    struct vmci_dma_alloc *);
45 
46 static int	vmci_config_interrupts(struct vmci_softc *);
47 static int	vmci_config_interrupt(struct vmci_softc *);
48 static int	vmci_check_intr_cnt(struct vmci_softc *);
49 static int	vmci_allocate_interrupt_resources(struct vmci_softc *);
50 static int	vmci_setup_interrupts(struct vmci_softc *);
51 static void	vmci_dismantle_interrupts(struct vmci_softc *);
52 static void	vmci_interrupt(void *);
53 static void	vmci_interrupt_bm(void *);
54 static void	dispatch_datagrams(void *, int);
55 static void	process_bitmap(void *, int);
56 
57 static void	vmci_delayed_work_fn_cb(void *context, int data);
58 
59 static device_method_t vmci_methods[] = {
60 	/* Device interface. */
61 	DEVMETHOD(device_probe,		vmci_probe),
62 	DEVMETHOD(device_attach,	vmci_attach),
63 	DEVMETHOD(device_detach,	vmci_detach),
64 	DEVMETHOD(device_shutdown,	vmci_shutdown),
65 
66 	DEVMETHOD_END
67 };
68 
69 static driver_t vmci_driver = {
70 	"vmci", vmci_methods, sizeof(struct vmci_softc)
71 };
72 
73 static devclass_t vmci_devclass;
74 DRIVER_MODULE(vmci, pci, vmci_driver, vmci_devclass, 0, 0);
75 MODULE_VERSION(vmci, VMCI_VERSION);
76 
77 MODULE_DEPEND(vmci, pci, 1, 1, 1);
78 
79 static struct vmci_softc *vmci_sc;
80 
81 #define LGPFX	"vmci: "
82 /*
83  * Allocate a buffer for incoming datagrams globally to avoid repeated
84  * allocation in the interrupt handler's atomic context.
85  */
86 static uint8_t *data_buffer = NULL;
87 static uint32_t data_buffer_size = VMCI_MAX_DG_SIZE;
88 
89 struct vmci_delayed_work_info {
90 	vmci_work_fn	*work_fn;
91 	void		*data;
92 	vmci_list_item(vmci_delayed_work_info) entry;
93 };
94 
95 /*
96  *------------------------------------------------------------------------------
97  *
98  * vmci_probe --
99  *
100  *     Probe to see if the VMCI device is present.
101  *
102  * Results:
103  *     BUS_PROBE_DEFAULT if device exists, ENXIO otherwise.
104  *
105  * Side effects:
106  *     None.
107  *
108  *------------------------------------------------------------------------------
109  */
110 
111 static int
112 vmci_probe(device_t dev)
113 {
114 
115 	if (pci_get_vendor(dev) == VMCI_VMWARE_VENDOR_ID &&
116 	    pci_get_device(dev) == VMCI_VMWARE_DEVICE_ID) {
117 		device_set_desc(dev,
118 		    "VMware Virtual Machine Communication Interface");
119 
120 		return (BUS_PROBE_DEFAULT);
121 	}
122 
123 	return (ENXIO);
124 }
125 
126 /*
127  *------------------------------------------------------------------------------
128  *
129  * vmci_attach --
130  *
131  *     Attach VMCI device to the system after vmci_probe() has been called and
132  *     the device has been detected.
133  *
134  * Results:
135  *     0 if success, ENXIO otherwise.
136  *
137  * Side effects:
138  *     None.
139  *
140  *------------------------------------------------------------------------------
141  */
142 
143 static int
144 vmci_attach(device_t dev)
145 {
146 	struct vmci_softc *sc;
147 	int error, i;
148 
149 	sc = device_get_softc(dev);
150 	sc->vmci_dev = dev;
151 	vmci_sc = sc;
152 
153 	data_buffer = NULL;
154 	sc->vmci_num_intr = 0;
155 	for (i = 0; i < VMCI_MAX_INTRS; i++) {
156 		sc->vmci_intrs[i].vmci_irq = NULL;
157 		sc->vmci_intrs[i].vmci_handler = NULL;
158 	}
159 
160 	TASK_INIT(&sc->vmci_interrupt_dq_task, 0, dispatch_datagrams, sc);
161 	TASK_INIT(&sc->vmci_interrupt_bm_task, 0, process_bitmap, sc);
162 
163 	TASK_INIT(&sc->vmci_delayed_work_task, 0, vmci_delayed_work_fn_cb, sc);
164 
165 	pci_enable_busmaster(dev);
166 
167 	mtx_init(&sc->vmci_spinlock, "VMCI Spinlock", NULL, MTX_SPIN);
168 	mtx_init(&sc->vmci_delayed_work_lock, "VMCI Delayed Work Lock",
169 	    NULL, MTX_DEF);
170 
171 	error = vmci_map_bars(sc);
172 	if (error) {
173 		VMCI_LOG_ERROR(LGPFX"Failed to map PCI BARs.\n");
174 		goto fail;
175 	}
176 
177 	error = vmci_config_capabilities(sc);
178 	if (error) {
179 		VMCI_LOG_ERROR(LGPFX"Failed to configure capabilities.\n");
180 		goto fail;
181 	}
182 
183 	vmci_list_init(&sc->vmci_delayed_work_infos);
184 
185 	vmci_components_init();
186 	vmci_util_init();
187 	error = vmci_qp_guest_endpoints_init();
188 	if (error) {
189 		VMCI_LOG_ERROR(LGPFX"vmci_qp_guest_endpoints_init failed.\n");
190 		goto fail;
191 	}
192 
193 	error = vmci_config_interrupts(sc);
194 	if (error)
195 		VMCI_LOG_ERROR(LGPFX"Failed to enable interrupts.\n");
196 
197 fail:
198 	if (error) {
199 		vmci_detach(dev);
200 		return (ENXIO);
201 	}
202 
203 	return (0);
204 }
205 
206 /*
207  *------------------------------------------------------------------------------
208  *
209  * vmci_detach --
210  *
211  *     Detach the VMCI device.
212  *
213  * Results:
214  *     0
215  *
216  * Side effects:
217  *     None.
218  *
219  *------------------------------------------------------------------------------
220  */
221 
222 static int
223 vmci_detach(device_t dev)
224 {
225 	struct vmci_softc *sc;
226 
227 	sc = device_get_softc(dev);
228 
229 	vmci_qp_guest_endpoints_exit();
230 	vmci_util_exit();
231 
232 	vmci_dismantle_interrupts(sc);
233 
234 	vmci_components_cleanup();
235 
236 	taskqueue_drain(taskqueue_thread, &sc->vmci_delayed_work_task);
237 	mtx_destroy(&sc->vmci_delayed_work_lock);
238 
239 	if (sc->vmci_res0 != NULL)
240 		bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
241 		    VMCI_CONTROL_ADDR, VMCI_CONTROL_RESET);
242 
243 	if (sc->vmci_notifications_bitmap.dma_vaddr != NULL)
244 		vmci_dma_free(&sc->vmci_notifications_bitmap);
245 
246 	vmci_unmap_bars(sc);
247 
248 	mtx_destroy(&sc->vmci_spinlock);
249 
250 	pci_disable_busmaster(dev);
251 
252 	return (0);
253 }
254 
255 /*
256  *------------------------------------------------------------------------------
257  *
258  * vmci_shutdown --
259  *
260  *     This function is called during system shutdown. We don't do anything.
261  *
262  * Results:
263  *     0
264  *
265  * Side effects:
266  *     None.
267  *
268  *------------------------------------------------------------------------------
269  */
270 
271 static int
272 vmci_shutdown(device_t dev)
273 {
274 
275 	return (0);
276 }
277 
278 /*
279  *------------------------------------------------------------------------------
280  *
281  * vmci_map_bars --
282  *
283  *     Maps the PCI I/O and MMIO BARs.
284  *
285  * Results:
286  *     0 on success, ENXIO otherwise.
287  *
288  * Side effects:
289  *     None.
290  *
291  *------------------------------------------------------------------------------
292  */
293 
294 static int
295 vmci_map_bars(struct vmci_softc *sc)
296 {
297 	int rid;
298 
299 	/* Map the PCI I/O BAR: BAR0 */
300 	rid = PCIR_BAR(0);
301 	sc->vmci_res0 = bus_alloc_resource_any(sc->vmci_dev, SYS_RES_IOPORT,
302 	    &rid, RF_ACTIVE);
303 	if (sc->vmci_res0 == NULL) {
304 		VMCI_LOG_ERROR(LGPFX"Could not map: BAR0\n");
305 		return (ENXIO);
306 	}
307 
308 	sc->vmci_iot0 = rman_get_bustag(sc->vmci_res0);
309 	sc->vmci_ioh0 = rman_get_bushandle(sc->vmci_res0);
310 	sc->vmci_ioaddr = rman_get_start(sc->vmci_res0);
311 
312 	/* Map the PCI MMIO BAR: BAR1 */
313 	rid = PCIR_BAR(1);
314 	sc->vmci_res1 = bus_alloc_resource_any(sc->vmci_dev, SYS_RES_MEMORY,
315 	    &rid, RF_ACTIVE);
316 	if (sc->vmci_res1 == NULL) {
317 		VMCI_LOG_ERROR(LGPFX"Could not map: BAR1\n");
318 		return (ENXIO);
319 	}
320 
321 	sc->vmci_iot1 = rman_get_bustag(sc->vmci_res1);
322 	sc->vmci_ioh1 = rman_get_bushandle(sc->vmci_res1);
323 
324 	return (0);
325 }
326 
327 /*
328  *------------------------------------------------------------------------------
329  *
330  * vmci_unmap_bars --
331  *
332  *     Unmaps the VMCI PCI I/O and MMIO BARs.
333  *
334  * Results:
335  *     None.
336  *
337  * Side effects:
338  *     None.
339  *
340  *------------------------------------------------------------------------------
341  */
342 
343 static void
344 vmci_unmap_bars(struct vmci_softc *sc)
345 {
346 	int rid;
347 
348 	if (sc->vmci_res0 != NULL) {
349 		rid = PCIR_BAR(0);
350 		bus_release_resource(sc->vmci_dev, SYS_RES_IOPORT, rid,
351 		    sc->vmci_res0);
352 		sc->vmci_res0 = NULL;
353 	}
354 
355 	if (sc->vmci_res1 != NULL) {
356 		rid = PCIR_BAR(1);
357 		bus_release_resource(sc->vmci_dev, SYS_RES_MEMORY, rid,
358 		    sc->vmci_res1);
359 		sc->vmci_res1 = NULL;
360 	}
361 }
362 
363 /*
364  *------------------------------------------------------------------------------
365  *
366  * vmci_config_capabilities --
367  *
368  *     Check the VMCI device capabilities and configure the device accordingly.
369  *
370  * Results:
371  *     0 if success, ENODEV otherwise.
372  *
373  * Side effects:
374  *     Device capabilities are enabled.
375  *
376  *------------------------------------------------------------------------------
377  */
378 
379 static int
380 vmci_config_capabilities(struct vmci_softc *sc)
381 {
382 	unsigned long bitmap_PPN;
383 	int error;
384 
385 	/*
386 	 * Verify that the VMCI device supports the capabilities that we
387 	 * need. Datagrams are necessary and notifications will be used
388 	 * if the device supports it.
389 	 */
390 	sc->capabilities = bus_space_read_4(sc->vmci_iot0, sc->vmci_ioh0,
391 	    VMCI_CAPS_ADDR);
392 
393 	if ((sc->capabilities & VMCI_CAPS_DATAGRAM) == 0) {
394 		VMCI_LOG_ERROR(LGPFX"VMCI device does not support "
395 		    "datagrams.\n");
396 		return (ENODEV);
397 	}
398 
399 	if (sc->capabilities & VMCI_CAPS_NOTIFICATIONS) {
400 		sc->capabilities = VMCI_CAPS_DATAGRAM;
401 		error = vmci_dma_malloc(PAGE_SIZE, 1,
402 		    &sc->vmci_notifications_bitmap);
403 		if (error)
404 			VMCI_LOG_ERROR(LGPFX"Failed to alloc memory for "
405 			    "notification bitmap.\n");
406 		else {
407 			memset(sc->vmci_notifications_bitmap.dma_vaddr, 0,
408 			    PAGE_SIZE);
409 			sc->capabilities |= VMCI_CAPS_NOTIFICATIONS;
410 		}
411 	} else
412 		sc->capabilities = VMCI_CAPS_DATAGRAM;
413 
414 	/* Let the host know which capabilities we intend to use. */
415 	bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
416 	    VMCI_CAPS_ADDR, sc->capabilities);
417 
418 	/*
419 	 * Register notification bitmap with device if that capability is
420 	 * used.
421 	 */
422 	if (sc->capabilities & VMCI_CAPS_NOTIFICATIONS) {
423 		bitmap_PPN =
424 		    sc->vmci_notifications_bitmap.dma_paddr >> PAGE_SHIFT;
425 		vmci_register_notification_bitmap(bitmap_PPN);
426 	}
427 
428 	/* Check host capabilities. */
429 	if (!vmci_check_host_capabilities())
430 		return (ENODEV);
431 
432 	return (0);
433 }
434 
435 /*
436  *------------------------------------------------------------------------------
437  *
438  * vmci_dmamap_cb --
439  *
440  *     Callback to receive mapping information resulting from the load of a
441  *     bus_dmamap_t via bus_dmamap_load()
442  *
443  * Results:
444  *     None.
445  *
446  * Side effects:
447  *     None.
448  *
449  *------------------------------------------------------------------------------
450  */
451 
452 static void
453 vmci_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
454 {
455 	bus_addr_t *baddr = arg;
456 
457 	if (error == 0)
458 		*baddr = segs->ds_addr;
459 }
460 
461 /*
462  *------------------------------------------------------------------------------
463  *
464  * vmci_dma_malloc_int --
465  *
466  *     Internal function that allocates DMA memory.
467  *
468  * Results:
469  *     0 if success.
470  *     ENOMEM if insufficient memory.
471  *     EINPROGRESS if mapping is deferred.
472  *     EINVAL if the request was invalid.
473  *
474  * Side effects:
475  *     DMA memory is allocated.
476  *
477  *------------------------------------------------------------------------------
478  */
479 
480 static int
481 vmci_dma_malloc_int(struct vmci_softc *sc, bus_size_t size, bus_size_t align,
482     struct vmci_dma_alloc *dma)
483 {
484 	int error;
485 
486 	bzero(dma, sizeof(struct vmci_dma_alloc));
487 
488 	error = bus_dma_tag_create(bus_get_dma_tag(vmci_sc->vmci_dev),
489 	    align, 0,		/* alignment, bounds */
490 	    BUS_SPACE_MAXADDR,	/* lowaddr */
491 	    BUS_SPACE_MAXADDR,	/* highaddr */
492 	    NULL, NULL,		/* filter, filterarg */
493 	    size,		/* maxsize */
494 	    1,			/* nsegments */
495 	    size,		/* maxsegsize */
496 	    BUS_DMA_ALLOCNOW,	/* flags */
497 	    NULL,		/* lockfunc */
498 	    NULL,		/* lockfuncarg */
499 	    &dma->dma_tag);
500 	if (error) {
501 		VMCI_LOG_ERROR(LGPFX"bus_dma_tag_create failed: %d\n", error);
502 		goto fail;
503 	}
504 
505 	error = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
506 	    BUS_DMA_ZERO | BUS_DMA_NOWAIT, &dma->dma_map);
507 	if (error) {
508 		VMCI_LOG_ERROR(LGPFX"bus_dmamem_alloc failed: %d\n", error);
509 		goto fail;
510 	}
511 
512 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
513 	    size, vmci_dmamap_cb, &dma->dma_paddr, BUS_DMA_NOWAIT);
514 	if (error) {
515 		VMCI_LOG_ERROR(LGPFX"bus_dmamap_load failed: %d\n", error);
516 		goto fail;
517 	}
518 
519 	dma->dma_size = size;
520 
521 fail:
522 	if (error)
523 		vmci_dma_free(dma);
524 
525 	return (error);
526 }
527 
528 /*
529  *------------------------------------------------------------------------------
530  *
531  * vmci_dma_malloc --
532  *
533  *     This function is a wrapper around vmci_dma_malloc_int for callers
534  *     outside of this module. Since we only support a single VMCI device, this
535  *     wrapper provides access to the device softc structure.
536  *
537  * Results:
538  *     0 if success.
539  *     ENOMEM if insufficient memory.
540  *     EINPROGRESS if mapping is deferred.
541  *     EINVAL if the request was invalid.
542  *
543  * Side effects:
544  *     DMA memory is allocated.
545  *
546  *------------------------------------------------------------------------------
547  */
548 
549 int
550 vmci_dma_malloc(bus_size_t size, bus_size_t align, struct vmci_dma_alloc *dma)
551 {
552 
553 	return (vmci_dma_malloc_int(vmci_sc, size, align, dma));
554 }
555 
556 /*
557  *------------------------------------------------------------------------------
558  *
559  * vmci_dma_free_int --
560  *
561  *     Internal function that frees DMA memory.
562  *
563  * Results:
564  *     None.
565  *
566  * Side effects:
567  *     Frees DMA memory.
568  *
569  *------------------------------------------------------------------------------
570  */
571 
572 static void
573 vmci_dma_free_int(struct vmci_softc *sc, struct vmci_dma_alloc *dma)
574 {
575 
576 	if (dma->dma_tag != NULL) {
577 		if (dma->dma_paddr != 0) {
578 			bus_dmamap_sync(dma->dma_tag, dma->dma_map,
579 			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
580 			bus_dmamap_unload(dma->dma_tag, dma->dma_map);
581 		}
582 
583 		if (dma->dma_vaddr != NULL)
584 			bus_dmamem_free(dma->dma_tag, dma->dma_vaddr,
585 			    dma->dma_map);
586 
587 		bus_dma_tag_destroy(dma->dma_tag);
588 	}
589 	bzero(dma, sizeof(struct vmci_dma_alloc));
590 }
591 
592 /*
593  *------------------------------------------------------------------------------
594  *
595  * vmci_dma_free --
596  *
597  *     This function is a wrapper around vmci_dma_free_int for callers outside
598  *     of this module. Since we only support a single VMCI device, this wrapper
599  *     provides access to the device softc structure.
600  *
601  * Results:
602  *     None.
603  *
604  * Side effects:
605  *     Frees DMA memory.
606  *
607  *------------------------------------------------------------------------------
608  */
609 
610 void
611 vmci_dma_free(struct vmci_dma_alloc *dma)
612 {
613 
614 	vmci_dma_free_int(vmci_sc, dma);
615 }
616 
617 /*
618  *------------------------------------------------------------------------------
619  *
620  * vmci_config_interrupts --
621  *
622  *     Configures and enables interrupts. Try to configure MSI-X. If this fails,
623  *     try to configure MSI. If even this fails, try legacy interrupts.
624  *
625  * Results:
626  *     0 if success.
627  *     ENOMEM if insufficient memory.
628  *     ENODEV if the device doesn't support interrupts.
629  *     ENXIO if the device configuration failed.
630  *
631  * Side effects:
632  *     Interrupts get enabled if successful.
633  *
634  *------------------------------------------------------------------------------
635  */
636 
637 static int
638 vmci_config_interrupts(struct vmci_softc *sc)
639 {
640 	int error;
641 
642 	data_buffer = malloc(data_buffer_size, M_DEVBUF, M_ZERO | M_NOWAIT);
643 	if (data_buffer == NULL)
644 		return (ENOMEM);
645 
646 	sc->vmci_intr_type = VMCI_INTR_TYPE_MSIX;
647 	error = vmci_config_interrupt(sc);
648 	if (error) {
649 		sc->vmci_intr_type = VMCI_INTR_TYPE_MSI;
650 		error = vmci_config_interrupt(sc);
651 	}
652 	if (error) {
653 		sc->vmci_intr_type = VMCI_INTR_TYPE_INTX;
654 		error = vmci_config_interrupt(sc);
655 	}
656 	if (error)
657 		return (error);
658 
659 	/* Enable specific interrupt bits. */
660 	if (sc->capabilities & VMCI_CAPS_NOTIFICATIONS)
661 		bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
662 		    VMCI_IMR_ADDR, VMCI_IMR_DATAGRAM | VMCI_IMR_NOTIFICATION);
663 	else
664 		bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
665 		    VMCI_IMR_ADDR, VMCI_IMR_DATAGRAM);
666 
667 	/* Enable interrupts. */
668 	bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
669 	    VMCI_CONTROL_ADDR, VMCI_CONTROL_INT_ENABLE);
670 
671 	return (0);
672 }
673 
674 /*
675  *------------------------------------------------------------------------------
676  *
677  * vmci_config_interrupt --
678  *
679  *     Check the number of interrupts supported, allocate resources and setup
680  *     interrupts.
681  *
682  * Results:
683  *     0 if success.
684  *     ENOMEM if insufficient memory.
685  *     ENODEV if the device doesn't support interrupts.
686  *     ENXIO if the device configuration failed.
687  *
688  * Side effects:
689  *     Resources get allocated and interrupts get setup (but not enabled) if
690  *     successful.
691  *
692  *------------------------------------------------------------------------------
693  */
694 
695 static int
696 vmci_config_interrupt(struct vmci_softc *sc)
697 {
698 	int error;
699 
700 	error = vmci_check_intr_cnt(sc);
701 	if (error)
702 		return (error);
703 
704 	error = vmci_allocate_interrupt_resources(sc);
705 	if (error)
706 		return (error);
707 
708 	error = vmci_setup_interrupts(sc);
709 	if (error)
710 		return (error);
711 
712 	return (0);
713 }
714 
715 /*
716  *------------------------------------------------------------------------------
717  *
718  * vmci_check_intr_cnt --
719  *
720  *     Check the number of interrupts supported by the device and ask PCI bus
721  *     to allocate appropriate number of interrupts.
722  *
723  * Results:
724  *     0 if success.
725  *     ENODEV if the device doesn't support any interrupts.
726  *     ENXIO if the device configuration failed.
727  *
728  * Side effects:
729  *     Resources get allocated on success.
730  *
731  *------------------------------------------------------------------------------
732  */
733 
734 static int
735 vmci_check_intr_cnt(struct vmci_softc *sc)
736 {
737 
738 	if (sc->vmci_intr_type == VMCI_INTR_TYPE_INTX) {
739 		sc->vmci_num_intr = 1;
740 		return (0);
741 	}
742 
743 	/*
744 	 * Make sure that the device supports the required number of MSI/MSI-X
745 	 * messages. We try for 2 MSI-X messages but 1 is good too. We need at
746 	 * least 1 MSI message.
747 	 */
748 	sc->vmci_num_intr = (sc->vmci_intr_type == VMCI_INTR_TYPE_MSIX) ?
749 	    pci_msix_count(sc->vmci_dev) : pci_msi_count(sc->vmci_dev);
750 
751 	if (!sc->vmci_num_intr) {
752 		VMCI_LOG_ERROR(LGPFX"Device does not support any interrupt"
753 		    " messages");
754 		return (ENODEV);
755 	}
756 
757 	sc->vmci_num_intr = (sc->vmci_intr_type == VMCI_INTR_TYPE_MSIX) ?
758 	    VMCI_MAX_INTRS : 1;
759 	if (sc->vmci_intr_type == VMCI_INTR_TYPE_MSIX) {
760 		if (pci_alloc_msix(sc->vmci_dev, &sc->vmci_num_intr))
761 			return (ENXIO);
762 	} else if (sc->vmci_intr_type == VMCI_INTR_TYPE_MSI) {
763 		if (pci_alloc_msi(sc->vmci_dev, &sc->vmci_num_intr))
764 			return (ENXIO);
765 	}
766 
767 	return (0);
768 }
769 
770 /*
771  *------------------------------------------------------------------------------
772  *
773  * vmci_allocate_interrupt_resources --
774  *
775  *     Allocate resources necessary for interrupts.
776  *
777  * Results:
778  *     0 if success, ENXIO otherwise.
779  *
780  * Side effects:
781  *     Resources get allocated on success.
782  *
783  *------------------------------------------------------------------------------
784  */
785 
786 static int
787 vmci_allocate_interrupt_resources(struct vmci_softc *sc)
788 {
789 	struct resource *irq;
790 	int flags, i, rid;
791 
792 	flags = RF_ACTIVE;
793 	flags |= (sc->vmci_num_intr == 1) ? RF_SHAREABLE : 0;
794 	rid = (sc->vmci_intr_type == VMCI_INTR_TYPE_INTX) ? 0 : 1;
795 
796 	for (i = 0; i < sc->vmci_num_intr; i++, rid++) {
797 		irq = bus_alloc_resource_any(sc->vmci_dev, SYS_RES_IRQ, &rid,
798 		    flags);
799 		if (irq == NULL)
800 			return (ENXIO);
801 		sc->vmci_intrs[i].vmci_irq = irq;
802 		sc->vmci_intrs[i].vmci_rid = rid;
803 	}
804 
805 	return (0);
806 }
807 
808 /*
809  *------------------------------------------------------------------------------
810  *
811  * vmci_setup_interrupts --
812  *
813  *     Sets up the interrupts.
814  *
815  * Results:
816  *     0 if success, appropriate error code from bus_setup_intr otherwise.
817  *
818  * Side effects:
819  *     Interrupt handler gets attached.
820  *
821  *------------------------------------------------------------------------------
822  */
823 
824 static int
825 vmci_setup_interrupts(struct vmci_softc *sc)
826 {
827 	struct vmci_interrupt *intr;
828 	int error, flags;
829 
830 	flags = INTR_TYPE_NET | INTR_MPSAFE;
831 	if (sc->vmci_num_intr > 1)
832 		flags |= INTR_EXCL;
833 
834 	intr = &sc->vmci_intrs[0];
835 	error = bus_setup_intr(sc->vmci_dev, intr->vmci_irq, flags, NULL,
836 	    vmci_interrupt, NULL, &intr->vmci_handler);
837 	if (error)
838 		return (error);
839 	bus_describe_intr(sc->vmci_dev, intr->vmci_irq, intr->vmci_handler,
840 	    "vmci_interrupt");
841 
842 	if (sc->vmci_num_intr == 2) {
843 		intr = &sc->vmci_intrs[1];
844 		error = bus_setup_intr(sc->vmci_dev, intr->vmci_irq, flags,
845 		    NULL, vmci_interrupt_bm, NULL, &intr->vmci_handler);
846 		if (error)
847 			return (error);
848 		bus_describe_intr(sc->vmci_dev, intr->vmci_irq,
849 		    intr->vmci_handler, "vmci_interrupt_bm");
850 	}
851 
852 	return (0);
853 }
854 
855 /*
856  *------------------------------------------------------------------------------
857  *
858  * vmci_interrupt --
859  *
860  *     Interrupt handler for legacy or MSI interrupt, or for first MSI-X
861  *     interrupt (vector VMCI_INTR_DATAGRAM).
862  *
863  * Results:
864  *     None.
865  *
866  * Side effects:
867  *     None.
868  *
869  *------------------------------------------------------------------------------
870  */
871 
872 static void
873 vmci_interrupt(void *arg)
874 {
875 
876 	if (vmci_sc->vmci_num_intr == 2)
877 		taskqueue_enqueue(taskqueue_swi,
878 		    &vmci_sc->vmci_interrupt_dq_task);
879 	else {
880 		unsigned int icr;
881 
882 		icr = inl(vmci_sc->vmci_ioaddr + VMCI_ICR_ADDR);
883 		if (icr == 0 || icr == 0xffffffff)
884 			return;
885 		if (icr & VMCI_ICR_DATAGRAM) {
886 			taskqueue_enqueue(taskqueue_swi,
887 			    &vmci_sc->vmci_interrupt_dq_task);
888 			icr &= ~VMCI_ICR_DATAGRAM;
889 		}
890 		if (icr & VMCI_ICR_NOTIFICATION) {
891 			taskqueue_enqueue(taskqueue_swi,
892 			    &vmci_sc->vmci_interrupt_bm_task);
893 			icr &= ~VMCI_ICR_NOTIFICATION;
894 		}
895 		if (icr != 0)
896 			VMCI_LOG_INFO(LGPFX"Ignoring unknown interrupt "
897 			    "cause");
898 	}
899 }
900 
901 /*
902  *------------------------------------------------------------------------------
903  *
904  * vmci_interrupt_bm --
905  *
906  *     Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION,
907  *     which is for the notification bitmap. Will only get called if we are
908  *     using MSI-X with exclusive vectors.
909  *
910  * Results:
911  *     None.
912  *
913  * Side effects:
914  *     None.
915  *
916  *------------------------------------------------------------------------------
917  */
918 
919 static void
920 vmci_interrupt_bm(void *arg)
921 {
922 
923 	ASSERT(vmci_sc->vmci_num_intr == 2);
924 	taskqueue_enqueue(taskqueue_swi, &vmci_sc->vmci_interrupt_bm_task);
925 }
926 
927 /*
928  *------------------------------------------------------------------------------
929  *
930  * dispatch_datagrams --
931  *
932  *     Reads and dispatches incoming datagrams.
933  *
934  * Results:
935  *     None.
936  *
937  * Side effects:
938  *     Reads data from the device.
939  *
940  *------------------------------------------------------------------------------
941  */
942 
943 static void
944 dispatch_datagrams(void *context, int data)
945 {
946 
947 	if (data_buffer == NULL)
948 		VMCI_LOG_INFO(LGPFX"dispatch_datagrams(): no buffer "
949 		    "present");
950 
951 	vmci_read_datagrams_from_port((vmci_io_handle) 0,
952 	    vmci_sc->vmci_ioaddr + VMCI_DATA_IN_ADDR,
953 	    data_buffer, data_buffer_size);
954 }
955 
956 /*
957  *------------------------------------------------------------------------------
958  *
959  * process_bitmap --
960  *
961  *     Scans the notification bitmap for raised flags, clears them and handles
962  *     the notifications.
963  *
964  * Results:
965  *     None.
966  *
967  * Side effects:
968  *     None.
969  *
970  *------------------------------------------------------------------------------
971  */
972 
973 static void
974 process_bitmap(void *context, int data)
975 {
976 
977 	if (vmci_sc->vmci_notifications_bitmap.dma_vaddr == NULL)
978 		VMCI_LOG_INFO(LGPFX"process_bitmaps(): no bitmap present");
979 
980 	vmci_scan_notification_bitmap(
981 	    vmci_sc->vmci_notifications_bitmap.dma_vaddr);
982 }
983 
984 /*
985  *------------------------------------------------------------------------------
986  *
987  * vmci_dismantle_interrupts --
988  *
989  *     Releases resources, detaches the interrupt handler and drains the task
990  *     queue.
991  *
992  * Results:
993  *     None.
994  *
995  * Side effects:
996  *     No more interrupts.
997  *
998  *------------------------------------------------------------------------------
999  */
1000 
1001 static void
1002 vmci_dismantle_interrupts(struct vmci_softc *sc)
1003 {
1004 	struct vmci_interrupt *intr;
1005 	int i;
1006 
1007 	for (i = 0; i < sc->vmci_num_intr; i++) {
1008 		intr = &sc->vmci_intrs[i];
1009 		if (intr->vmci_handler != NULL) {
1010 			bus_teardown_intr(sc->vmci_dev, intr->vmci_irq,
1011 			    intr->vmci_handler);
1012 			intr->vmci_handler = NULL;
1013 		}
1014 		if (intr->vmci_irq != NULL) {
1015 			bus_release_resource(sc->vmci_dev, SYS_RES_IRQ,
1016 			    intr->vmci_rid, intr->vmci_irq);
1017 			intr->vmci_irq = NULL;
1018 			intr->vmci_rid = -1;
1019 		}
1020 	}
1021 
1022 	if ((sc->vmci_intr_type != VMCI_INTR_TYPE_INTX) &&
1023 	    (sc->vmci_num_intr))
1024 		pci_release_msi(sc->vmci_dev);
1025 
1026 	taskqueue_drain(taskqueue_swi, &sc->vmci_interrupt_dq_task);
1027 	taskqueue_drain(taskqueue_swi, &sc->vmci_interrupt_bm_task);
1028 
1029 	if (data_buffer != NULL)
1030 		free(data_buffer, M_DEVBUF);
1031 }
1032 
1033 /*
1034  *------------------------------------------------------------------------------
1035  *
1036  * vmci_delayed_work_fn_cb --
1037  *
1038  *     Callback function that executes the queued up delayed work functions.
1039  *
1040  * Results:
1041  *     None.
1042  *
1043  * Side effects:
1044  *     None.
1045  *
1046  *------------------------------------------------------------------------------
1047  */
1048 
1049 static void
1050 vmci_delayed_work_fn_cb(void *context, int data)
1051 {
1052 	vmci_list(vmci_delayed_work_info) temp_list;
1053 
1054 	vmci_list_init(&temp_list);
1055 
1056 	/*
1057 	 * Swap vmci_delayed_work_infos list with the empty temp_list while
1058 	 * holding a lock. vmci_delayed_work_infos would then be an empty list
1059 	 * and temp_list would contain the elements from the original
1060 	 * vmci_delayed_work_infos. Finally, iterate through temp_list
1061 	 * executing the delayed callbacks.
1062 	 */
1063 
1064 	mtx_lock(&vmci_sc->vmci_delayed_work_lock);
1065 	vmci_list_swap(&temp_list, &vmci_sc->vmci_delayed_work_infos,
1066 	    vmci_delayed_work_info, entry);
1067 	mtx_unlock(&vmci_sc->vmci_delayed_work_lock);
1068 
1069 	while (!vmci_list_empty(&temp_list)) {
1070 		struct vmci_delayed_work_info *delayed_work_info =
1071 		    vmci_list_first(&temp_list);
1072 
1073 		delayed_work_info->work_fn(delayed_work_info->data);
1074 
1075 		vmci_list_remove(delayed_work_info, entry);
1076 		vmci_free_kernel_mem(delayed_work_info,
1077 		    sizeof(*delayed_work_info));
1078 	}
1079 }
1080 
1081 /*
1082  *------------------------------------------------------------------------------
1083  *
1084  * vmci_schedule_delayed_work_fn --
1085  *
1086  *     Schedule the specified callback.
1087  *
1088  * Results:
1089  *     0 if success, error code otherwise.
1090  *
1091  * Side effects:
1092  *     None.
1093  *
1094  *------------------------------------------------------------------------------
1095  */
1096 
1097 int
1098 vmci_schedule_delayed_work_fn(vmci_work_fn *work_fn, void *data)
1099 {
1100 	struct vmci_delayed_work_info *delayed_work_info;
1101 
1102 	delayed_work_info = vmci_alloc_kernel_mem(sizeof(*delayed_work_info),
1103 	    VMCI_MEMORY_ATOMIC);
1104 
1105 	if (!delayed_work_info)
1106 		return (VMCI_ERROR_NO_MEM);
1107 
1108 	delayed_work_info->work_fn = work_fn;
1109 	delayed_work_info->data = data;
1110 	mtx_lock(&vmci_sc->vmci_delayed_work_lock);
1111 	vmci_list_insert(&vmci_sc->vmci_delayed_work_infos,
1112 	    delayed_work_info, entry);
1113 	mtx_unlock(&vmci_sc->vmci_delayed_work_lock);
1114 
1115 	taskqueue_enqueue(taskqueue_thread,
1116 	    &vmci_sc->vmci_delayed_work_task);
1117 
1118 	return (VMCI_SUCCESS);
1119 }
1120 
1121 /*
1122  *------------------------------------------------------------------------------
1123  *
1124  * vmci_send_datagram --
1125  *
1126  *     VM to hypervisor call mechanism.
1127  *
1128  * Results:
1129  *     The result of the hypercall.
1130  *
1131  * Side effects:
1132  *     None.
1133  *
1134  *------------------------------------------------------------------------------
1135  */
1136 
1137 int
1138 vmci_send_datagram(struct vmci_datagram *dg)
1139 {
1140 	int result;
1141 
1142 	if (dg == NULL)
1143 		return (VMCI_ERROR_INVALID_ARGS);
1144 
1145 	/*
1146 	 * Need to acquire spinlock on the device because
1147 	 * the datagram data may be spread over multiple pages and the monitor
1148 	 * may interleave device user rpc calls from multiple VCPUs. Acquiring
1149 	 * the spinlock precludes that possibility. Disabling interrupts to
1150 	 * avoid incoming datagrams during a "rep out" and possibly landing up
1151 	 * in this function.
1152 	 */
1153 	mtx_lock_spin(&vmci_sc->vmci_spinlock);
1154 
1155 	/*
1156 	 * Send the datagram and retrieve the return value from the result
1157 	 * register.
1158 	 */
1159 	__asm__ __volatile__(
1160 	    "cld\n\t"
1161 	    "rep outsb\n\t"
1162 	    : /* No output. */
1163 	    : "d"(vmci_sc->vmci_ioaddr + VMCI_DATA_OUT_ADDR),
1164 	    "c"(VMCI_DG_SIZE(dg)), "S"(dg)
1165 	    );
1166 
1167 	/*
1168 	 * XXX: Should read result high port as well when updating handlers to
1169 	 * return 64bit.
1170 	 */
1171 
1172 	result = bus_space_read_4(vmci_sc->vmci_iot0,
1173 	    vmci_sc->vmci_ioh0, VMCI_RESULT_LOW_ADDR);
1174 	mtx_unlock_spin(&vmci_sc->vmci_spinlock);
1175 
1176 	return (result);
1177 }
1178