xref: /freebsd/sys/dev/vmware/vmci/vmci.c (revision 1165fc9a526630487a1feb63daef65c5aee1a583)
1 /*-
2  * Copyright (c) 2018 VMware, Inc.
3  *
4  * SPDX-License-Identifier: (BSD-2-Clause OR GPL-2.0)
5  */
6 
7 /* Driver for VMware Virtual Machine Communication Interface (VMCI) device. */
8 
9 #include <sys/cdefs.h>
10 __FBSDID("$FreeBSD$");
11 
12 #include <sys/param.h>
13 #include <sys/bus.h>
14 #include <sys/kernel.h>
15 #include <sys/malloc.h>
16 #include <sys/module.h>
17 #include <sys/rman.h>
18 #include <sys/systm.h>
19 
20 #include <dev/pci/pcireg.h>
21 #include <dev/pci/pcivar.h>
22 
23 #include <machine/bus.h>
24 
25 #include "vmci.h"
26 #include "vmci_doorbell.h"
27 #include "vmci_driver.h"
28 #include "vmci_kernel_defs.h"
29 #include "vmci_queue_pair.h"
30 
31 static int	vmci_probe(device_t);
32 static int	vmci_attach(device_t);
33 static int	vmci_detach(device_t);
34 static int	vmci_shutdown(device_t);
35 
36 static int	vmci_map_bars(struct vmci_softc *);
37 static void	vmci_unmap_bars(struct vmci_softc *);
38 
39 static int	vmci_config_capabilities(struct vmci_softc *);
40 
41 static int	vmci_dma_malloc_int(struct vmci_softc *, bus_size_t,
42 		    bus_size_t, struct vmci_dma_alloc *);
43 static void	vmci_dma_free_int(struct vmci_softc *,
44 		    struct vmci_dma_alloc *);
45 
46 static int	vmci_config_interrupts(struct vmci_softc *);
47 static int	vmci_config_interrupt(struct vmci_softc *);
48 static int	vmci_check_intr_cnt(struct vmci_softc *);
49 static int	vmci_allocate_interrupt_resources(struct vmci_softc *);
50 static int	vmci_setup_interrupts(struct vmci_softc *);
51 static void	vmci_dismantle_interrupts(struct vmci_softc *);
52 static void	vmci_interrupt(void *);
53 static void	vmci_interrupt_bm(void *);
54 static void	dispatch_datagrams(void *, int);
55 static void	process_bitmap(void *, int);
56 
57 static void	vmci_delayed_work_fn_cb(void *context, int data);
58 
59 static device_method_t vmci_methods[] = {
60 	/* Device interface. */
61 	DEVMETHOD(device_probe,		vmci_probe),
62 	DEVMETHOD(device_attach,	vmci_attach),
63 	DEVMETHOD(device_detach,	vmci_detach),
64 	DEVMETHOD(device_shutdown,	vmci_shutdown),
65 
66 	DEVMETHOD_END
67 };
68 
69 static driver_t vmci_driver = {
70 	"vmci", vmci_methods, sizeof(struct vmci_softc)
71 };
72 
73 DRIVER_MODULE(vmci, pci, vmci_driver, 0, 0);
74 MODULE_VERSION(vmci, VMCI_VERSION);
75 const struct {
76 	uint16_t vendor;
77 	uint16_t device;
78 	const char *desc;
79 } vmci_ids[] = {
80 	{ VMCI_VMWARE_VENDOR_ID, VMCI_VMWARE_DEVICE_ID,
81 	    "VMware Virtual Machine Communication Interface" },
82 };
83 MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, vmci, vmci_ids,
84     nitems(vmci_ids));
85 
86 MODULE_DEPEND(vmci, pci, 1, 1, 1);
87 
88 static struct vmci_softc *vmci_sc;
89 
90 #define LGPFX	"vmci: "
91 /*
92  * Allocate a buffer for incoming datagrams globally to avoid repeated
93  * allocation in the interrupt handler's atomic context.
94  */
95 static uint8_t *data_buffer = NULL;
96 static uint32_t data_buffer_size = VMCI_MAX_DG_SIZE;
97 
98 struct vmci_delayed_work_info {
99 	vmci_work_fn	*work_fn;
100 	void		*data;
101 	vmci_list_item(vmci_delayed_work_info) entry;
102 };
103 
104 /*
105  *------------------------------------------------------------------------------
106  *
107  * vmci_probe --
108  *
109  *     Probe to see if the VMCI device is present.
110  *
111  * Results:
112  *     BUS_PROBE_DEFAULT if device exists, ENXIO otherwise.
113  *
114  * Side effects:
115  *     None.
116  *
117  *------------------------------------------------------------------------------
118  */
119 
120 static int
121 vmci_probe(device_t dev)
122 {
123 
124 	if (pci_get_vendor(dev) == vmci_ids[0].vendor &&
125 	    pci_get_device(dev) == vmci_ids[0].device) {
126 		device_set_desc(dev, vmci_ids[0].desc);
127 
128 		return (BUS_PROBE_DEFAULT);
129 	}
130 
131 	return (ENXIO);
132 }
133 
134 /*
135  *------------------------------------------------------------------------------
136  *
137  * vmci_attach --
138  *
139  *     Attach VMCI device to the system after vmci_probe() has been called and
140  *     the device has been detected.
141  *
142  * Results:
143  *     0 if success, ENXIO otherwise.
144  *
145  * Side effects:
146  *     None.
147  *
148  *------------------------------------------------------------------------------
149  */
150 
151 static int
152 vmci_attach(device_t dev)
153 {
154 	struct vmci_softc *sc;
155 	int error, i;
156 
157 	sc = device_get_softc(dev);
158 	sc->vmci_dev = dev;
159 	vmci_sc = sc;
160 
161 	data_buffer = NULL;
162 	sc->vmci_num_intr = 0;
163 	for (i = 0; i < VMCI_MAX_INTRS; i++) {
164 		sc->vmci_intrs[i].vmci_irq = NULL;
165 		sc->vmci_intrs[i].vmci_handler = NULL;
166 	}
167 
168 	TASK_INIT(&sc->vmci_interrupt_dq_task, 0, dispatch_datagrams, sc);
169 	TASK_INIT(&sc->vmci_interrupt_bm_task, 0, process_bitmap, sc);
170 
171 	TASK_INIT(&sc->vmci_delayed_work_task, 0, vmci_delayed_work_fn_cb, sc);
172 
173 	pci_enable_busmaster(dev);
174 
175 	mtx_init(&sc->vmci_spinlock, "VMCI Spinlock", NULL, MTX_SPIN);
176 	mtx_init(&sc->vmci_delayed_work_lock, "VMCI Delayed Work Lock",
177 	    NULL, MTX_DEF);
178 
179 	error = vmci_map_bars(sc);
180 	if (error) {
181 		VMCI_LOG_ERROR(LGPFX"Failed to map PCI BARs.\n");
182 		goto fail;
183 	}
184 
185 	error = vmci_config_capabilities(sc);
186 	if (error) {
187 		VMCI_LOG_ERROR(LGPFX"Failed to configure capabilities.\n");
188 		goto fail;
189 	}
190 
191 	vmci_list_init(&sc->vmci_delayed_work_infos);
192 
193 	vmci_components_init();
194 	vmci_util_init();
195 	error = vmci_qp_guest_endpoints_init();
196 	if (error) {
197 		VMCI_LOG_ERROR(LGPFX"vmci_qp_guest_endpoints_init failed.\n");
198 		goto fail;
199 	}
200 
201 	error = vmci_config_interrupts(sc);
202 	if (error)
203 		VMCI_LOG_ERROR(LGPFX"Failed to enable interrupts.\n");
204 
205 fail:
206 	if (error) {
207 		vmci_detach(dev);
208 		return (ENXIO);
209 	}
210 
211 	return (0);
212 }
213 
214 /*
215  *------------------------------------------------------------------------------
216  *
217  * vmci_detach --
218  *
219  *     Detach the VMCI device.
220  *
221  * Results:
222  *     0
223  *
224  * Side effects:
225  *     None.
226  *
227  *------------------------------------------------------------------------------
228  */
229 
230 static int
231 vmci_detach(device_t dev)
232 {
233 	struct vmci_softc *sc;
234 
235 	sc = device_get_softc(dev);
236 
237 	vmci_qp_guest_endpoints_exit();
238 	vmci_util_exit();
239 
240 	vmci_dismantle_interrupts(sc);
241 
242 	vmci_components_cleanup();
243 
244 	if mtx_initialized(&sc->vmci_spinlock) {
245 		taskqueue_drain(taskqueue_thread, &sc->vmci_delayed_work_task);
246 		mtx_destroy(&sc->vmci_delayed_work_lock);
247 	}
248 
249 	if (sc->vmci_res0 != NULL)
250 		bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
251 		    VMCI_CONTROL_ADDR, VMCI_CONTROL_RESET);
252 
253 	if (sc->vmci_notifications_bitmap.dma_vaddr != NULL)
254 		vmci_dma_free(&sc->vmci_notifications_bitmap);
255 
256 	vmci_unmap_bars(sc);
257 
258 	if mtx_initialized(&sc->vmci_spinlock)
259 		mtx_destroy(&sc->vmci_spinlock);
260 
261 	pci_disable_busmaster(dev);
262 
263 	return (0);
264 }
265 
266 /*
267  *------------------------------------------------------------------------------
268  *
269  * vmci_shutdown --
270  *
271  *     This function is called during system shutdown. We don't do anything.
272  *
273  * Results:
274  *     0
275  *
276  * Side effects:
277  *     None.
278  *
279  *------------------------------------------------------------------------------
280  */
281 
282 static int
283 vmci_shutdown(device_t dev)
284 {
285 
286 	return (0);
287 }
288 
289 /*
290  *------------------------------------------------------------------------------
291  *
292  * vmci_map_bars --
293  *
294  *     Maps the PCI I/O and MMIO BARs.
295  *
296  * Results:
297  *     0 on success, ENXIO otherwise.
298  *
299  * Side effects:
300  *     None.
301  *
302  *------------------------------------------------------------------------------
303  */
304 
305 static int
306 vmci_map_bars(struct vmci_softc *sc)
307 {
308 	int rid;
309 
310 	/* Map the PCI I/O BAR: BAR0 */
311 	rid = PCIR_BAR(0);
312 	sc->vmci_res0 = bus_alloc_resource_any(sc->vmci_dev, SYS_RES_IOPORT,
313 	    &rid, RF_ACTIVE);
314 	if (sc->vmci_res0 == NULL) {
315 		VMCI_LOG_ERROR(LGPFX"Could not map: BAR0\n");
316 		return (ENXIO);
317 	}
318 
319 	sc->vmci_iot0 = rman_get_bustag(sc->vmci_res0);
320 	sc->vmci_ioh0 = rman_get_bushandle(sc->vmci_res0);
321 	sc->vmci_ioaddr = rman_get_start(sc->vmci_res0);
322 
323 	/* Map the PCI MMIO BAR: BAR1 */
324 	rid = PCIR_BAR(1);
325 	sc->vmci_res1 = bus_alloc_resource_any(sc->vmci_dev, SYS_RES_MEMORY,
326 	    &rid, RF_ACTIVE);
327 	if (sc->vmci_res1 == NULL) {
328 		VMCI_LOG_ERROR(LGPFX"Could not map: BAR1\n");
329 		return (ENXIO);
330 	}
331 
332 	sc->vmci_iot1 = rman_get_bustag(sc->vmci_res1);
333 	sc->vmci_ioh1 = rman_get_bushandle(sc->vmci_res1);
334 
335 	return (0);
336 }
337 
338 /*
339  *------------------------------------------------------------------------------
340  *
341  * vmci_unmap_bars --
342  *
343  *     Unmaps the VMCI PCI I/O and MMIO BARs.
344  *
345  * Results:
346  *     None.
347  *
348  * Side effects:
349  *     None.
350  *
351  *------------------------------------------------------------------------------
352  */
353 
354 static void
355 vmci_unmap_bars(struct vmci_softc *sc)
356 {
357 	int rid;
358 
359 	if (sc->vmci_res0 != NULL) {
360 		rid = PCIR_BAR(0);
361 		bus_release_resource(sc->vmci_dev, SYS_RES_IOPORT, rid,
362 		    sc->vmci_res0);
363 		sc->vmci_res0 = NULL;
364 	}
365 
366 	if (sc->vmci_res1 != NULL) {
367 		rid = PCIR_BAR(1);
368 		bus_release_resource(sc->vmci_dev, SYS_RES_MEMORY, rid,
369 		    sc->vmci_res1);
370 		sc->vmci_res1 = NULL;
371 	}
372 }
373 
374 /*
375  *------------------------------------------------------------------------------
376  *
377  * vmci_config_capabilities --
378  *
379  *     Check the VMCI device capabilities and configure the device accordingly.
380  *
381  * Results:
382  *     0 if success, ENODEV otherwise.
383  *
384  * Side effects:
385  *     Device capabilities are enabled.
386  *
387  *------------------------------------------------------------------------------
388  */
389 
390 static int
391 vmci_config_capabilities(struct vmci_softc *sc)
392 {
393 	unsigned long bitmap_PPN;
394 	int error;
395 
396 	/*
397 	 * Verify that the VMCI device supports the capabilities that we
398 	 * need. Datagrams are necessary and notifications will be used
399 	 * if the device supports it.
400 	 */
401 	sc->capabilities = bus_space_read_4(sc->vmci_iot0, sc->vmci_ioh0,
402 	    VMCI_CAPS_ADDR);
403 
404 	if ((sc->capabilities & VMCI_CAPS_DATAGRAM) == 0) {
405 		VMCI_LOG_ERROR(LGPFX"VMCI device does not support "
406 		    "datagrams.\n");
407 		return (ENODEV);
408 	}
409 
410 	if (sc->capabilities & VMCI_CAPS_NOTIFICATIONS) {
411 		sc->capabilities = VMCI_CAPS_DATAGRAM;
412 		error = vmci_dma_malloc(PAGE_SIZE, 1,
413 		    &sc->vmci_notifications_bitmap);
414 		if (error)
415 			VMCI_LOG_ERROR(LGPFX"Failed to alloc memory for "
416 			    "notification bitmap.\n");
417 		else {
418 			memset(sc->vmci_notifications_bitmap.dma_vaddr, 0,
419 			    PAGE_SIZE);
420 			sc->capabilities |= VMCI_CAPS_NOTIFICATIONS;
421 		}
422 	} else
423 		sc->capabilities = VMCI_CAPS_DATAGRAM;
424 
425 	/* Let the host know which capabilities we intend to use. */
426 	bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
427 	    VMCI_CAPS_ADDR, sc->capabilities);
428 
429 	/*
430 	 * Register notification bitmap with device if that capability is
431 	 * used.
432 	 */
433 	if (sc->capabilities & VMCI_CAPS_NOTIFICATIONS) {
434 		bitmap_PPN =
435 		    sc->vmci_notifications_bitmap.dma_paddr >> PAGE_SHIFT;
436 		vmci_register_notification_bitmap(bitmap_PPN);
437 	}
438 
439 	/* Check host capabilities. */
440 	if (!vmci_check_host_capabilities())
441 		return (ENODEV);
442 
443 	return (0);
444 }
445 
446 /*
447  *------------------------------------------------------------------------------
448  *
449  * vmci_dmamap_cb --
450  *
451  *     Callback to receive mapping information resulting from the load of a
452  *     bus_dmamap_t via bus_dmamap_load()
453  *
454  * Results:
455  *     None.
456  *
457  * Side effects:
458  *     None.
459  *
460  *------------------------------------------------------------------------------
461  */
462 
463 static void
464 vmci_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
465 {
466 	bus_addr_t *baddr = arg;
467 
468 	if (error == 0)
469 		*baddr = segs->ds_addr;
470 }
471 
472 /*
473  *------------------------------------------------------------------------------
474  *
475  * vmci_dma_malloc_int --
476  *
477  *     Internal function that allocates DMA memory.
478  *
479  * Results:
480  *     0 if success.
481  *     ENOMEM if insufficient memory.
482  *     EINPROGRESS if mapping is deferred.
483  *     EINVAL if the request was invalid.
484  *
485  * Side effects:
486  *     DMA memory is allocated.
487  *
488  *------------------------------------------------------------------------------
489  */
490 
491 static int
492 vmci_dma_malloc_int(struct vmci_softc *sc, bus_size_t size, bus_size_t align,
493     struct vmci_dma_alloc *dma)
494 {
495 	int error;
496 
497 	bzero(dma, sizeof(struct vmci_dma_alloc));
498 
499 	error = bus_dma_tag_create(bus_get_dma_tag(vmci_sc->vmci_dev),
500 	    align, 0,		/* alignment, bounds */
501 	    BUS_SPACE_MAXADDR,	/* lowaddr */
502 	    BUS_SPACE_MAXADDR,	/* highaddr */
503 	    NULL, NULL,		/* filter, filterarg */
504 	    size,		/* maxsize */
505 	    1,			/* nsegments */
506 	    size,		/* maxsegsize */
507 	    BUS_DMA_ALLOCNOW,	/* flags */
508 	    NULL,		/* lockfunc */
509 	    NULL,		/* lockfuncarg */
510 	    &dma->dma_tag);
511 	if (error) {
512 		VMCI_LOG_ERROR(LGPFX"bus_dma_tag_create failed: %d\n", error);
513 		goto fail;
514 	}
515 
516 	error = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
517 	    BUS_DMA_ZERO | BUS_DMA_NOWAIT, &dma->dma_map);
518 	if (error) {
519 		VMCI_LOG_ERROR(LGPFX"bus_dmamem_alloc failed: %d\n", error);
520 		goto fail;
521 	}
522 
523 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
524 	    size, vmci_dmamap_cb, &dma->dma_paddr, BUS_DMA_NOWAIT);
525 	if (error) {
526 		VMCI_LOG_ERROR(LGPFX"bus_dmamap_load failed: %d\n", error);
527 		goto fail;
528 	}
529 
530 	dma->dma_size = size;
531 
532 fail:
533 	if (error)
534 		vmci_dma_free(dma);
535 
536 	return (error);
537 }
538 
539 /*
540  *------------------------------------------------------------------------------
541  *
542  * vmci_dma_malloc --
543  *
544  *     This function is a wrapper around vmci_dma_malloc_int for callers
545  *     outside of this module. Since we only support a single VMCI device, this
546  *     wrapper provides access to the device softc structure.
547  *
548  * Results:
549  *     0 if success.
550  *     ENOMEM if insufficient memory.
551  *     EINPROGRESS if mapping is deferred.
552  *     EINVAL if the request was invalid.
553  *
554  * Side effects:
555  *     DMA memory is allocated.
556  *
557  *------------------------------------------------------------------------------
558  */
559 
560 int
561 vmci_dma_malloc(bus_size_t size, bus_size_t align, struct vmci_dma_alloc *dma)
562 {
563 
564 	return (vmci_dma_malloc_int(vmci_sc, size, align, dma));
565 }
566 
567 /*
568  *------------------------------------------------------------------------------
569  *
570  * vmci_dma_free_int --
571  *
572  *     Internal function that frees DMA memory.
573  *
574  * Results:
575  *     None.
576  *
577  * Side effects:
578  *     Frees DMA memory.
579  *
580  *------------------------------------------------------------------------------
581  */
582 
583 static void
584 vmci_dma_free_int(struct vmci_softc *sc, struct vmci_dma_alloc *dma)
585 {
586 
587 	if (dma->dma_tag != NULL) {
588 		if (dma->dma_paddr != 0) {
589 			bus_dmamap_sync(dma->dma_tag, dma->dma_map,
590 			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
591 			bus_dmamap_unload(dma->dma_tag, dma->dma_map);
592 		}
593 
594 		if (dma->dma_vaddr != NULL)
595 			bus_dmamem_free(dma->dma_tag, dma->dma_vaddr,
596 			    dma->dma_map);
597 
598 		bus_dma_tag_destroy(dma->dma_tag);
599 	}
600 	bzero(dma, sizeof(struct vmci_dma_alloc));
601 }
602 
603 /*
604  *------------------------------------------------------------------------------
605  *
606  * vmci_dma_free --
607  *
608  *     This function is a wrapper around vmci_dma_free_int for callers outside
609  *     of this module. Since we only support a single VMCI device, this wrapper
610  *     provides access to the device softc structure.
611  *
612  * Results:
613  *     None.
614  *
615  * Side effects:
616  *     Frees DMA memory.
617  *
618  *------------------------------------------------------------------------------
619  */
620 
621 void
622 vmci_dma_free(struct vmci_dma_alloc *dma)
623 {
624 
625 	vmci_dma_free_int(vmci_sc, dma);
626 }
627 
628 /*
629  *------------------------------------------------------------------------------
630  *
631  * vmci_config_interrupts --
632  *
633  *     Configures and enables interrupts. Try to configure MSI-X. If this fails,
634  *     try to configure MSI. If even this fails, try legacy interrupts.
635  *
636  * Results:
637  *     0 if success.
638  *     ENOMEM if insufficient memory.
639  *     ENODEV if the device doesn't support interrupts.
640  *     ENXIO if the device configuration failed.
641  *
642  * Side effects:
643  *     Interrupts get enabled if successful.
644  *
645  *------------------------------------------------------------------------------
646  */
647 
648 static int
649 vmci_config_interrupts(struct vmci_softc *sc)
650 {
651 	int error;
652 
653 	data_buffer = malloc(data_buffer_size, M_DEVBUF, M_ZERO | M_NOWAIT);
654 	if (data_buffer == NULL)
655 		return (ENOMEM);
656 
657 	sc->vmci_intr_type = VMCI_INTR_TYPE_MSIX;
658 	error = vmci_config_interrupt(sc);
659 	if (error) {
660 		sc->vmci_intr_type = VMCI_INTR_TYPE_MSI;
661 		error = vmci_config_interrupt(sc);
662 	}
663 	if (error) {
664 		sc->vmci_intr_type = VMCI_INTR_TYPE_INTX;
665 		error = vmci_config_interrupt(sc);
666 	}
667 	if (error)
668 		return (error);
669 
670 	/* Enable specific interrupt bits. */
671 	if (sc->capabilities & VMCI_CAPS_NOTIFICATIONS)
672 		bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
673 		    VMCI_IMR_ADDR, VMCI_IMR_DATAGRAM | VMCI_IMR_NOTIFICATION);
674 	else
675 		bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
676 		    VMCI_IMR_ADDR, VMCI_IMR_DATAGRAM);
677 
678 	/* Enable interrupts. */
679 	bus_space_write_4(sc->vmci_iot0, sc->vmci_ioh0,
680 	    VMCI_CONTROL_ADDR, VMCI_CONTROL_INT_ENABLE);
681 
682 	return (0);
683 }
684 
685 /*
686  *------------------------------------------------------------------------------
687  *
688  * vmci_config_interrupt --
689  *
690  *     Check the number of interrupts supported, allocate resources and setup
691  *     interrupts.
692  *
693  * Results:
694  *     0 if success.
695  *     ENOMEM if insufficient memory.
696  *     ENODEV if the device doesn't support interrupts.
697  *     ENXIO if the device configuration failed.
698  *
699  * Side effects:
700  *     Resources get allocated and interrupts get setup (but not enabled) if
701  *     successful.
702  *
703  *------------------------------------------------------------------------------
704  */
705 
706 static int
707 vmci_config_interrupt(struct vmci_softc *sc)
708 {
709 	int error;
710 
711 	error = vmci_check_intr_cnt(sc);
712 	if (error)
713 		return (error);
714 
715 	error = vmci_allocate_interrupt_resources(sc);
716 	if (error)
717 		return (error);
718 
719 	error = vmci_setup_interrupts(sc);
720 	if (error)
721 		return (error);
722 
723 	return (0);
724 }
725 
726 /*
727  *------------------------------------------------------------------------------
728  *
729  * vmci_check_intr_cnt --
730  *
731  *     Check the number of interrupts supported by the device and ask PCI bus
732  *     to allocate appropriate number of interrupts.
733  *
734  * Results:
735  *     0 if success.
736  *     ENODEV if the device doesn't support any interrupts.
737  *     ENXIO if the device configuration failed.
738  *
739  * Side effects:
740  *     Resources get allocated on success.
741  *
742  *------------------------------------------------------------------------------
743  */
744 
745 static int
746 vmci_check_intr_cnt(struct vmci_softc *sc)
747 {
748 
749 	if (sc->vmci_intr_type == VMCI_INTR_TYPE_INTX) {
750 		sc->vmci_num_intr = 1;
751 		return (0);
752 	}
753 
754 	/*
755 	 * Make sure that the device supports the required number of MSI/MSI-X
756 	 * messages. We try for 2 MSI-X messages but 1 is good too. We need at
757 	 * least 1 MSI message.
758 	 */
759 	sc->vmci_num_intr = (sc->vmci_intr_type == VMCI_INTR_TYPE_MSIX) ?
760 	    pci_msix_count(sc->vmci_dev) : pci_msi_count(sc->vmci_dev);
761 
762 	if (!sc->vmci_num_intr) {
763 		VMCI_LOG_ERROR(LGPFX"Device does not support any interrupt"
764 		    " messages");
765 		return (ENODEV);
766 	}
767 
768 	sc->vmci_num_intr = (sc->vmci_intr_type == VMCI_INTR_TYPE_MSIX) ?
769 	    VMCI_MAX_INTRS : 1;
770 	if (sc->vmci_intr_type == VMCI_INTR_TYPE_MSIX) {
771 		if (pci_alloc_msix(sc->vmci_dev, &sc->vmci_num_intr))
772 			return (ENXIO);
773 	} else if (sc->vmci_intr_type == VMCI_INTR_TYPE_MSI) {
774 		if (pci_alloc_msi(sc->vmci_dev, &sc->vmci_num_intr))
775 			return (ENXIO);
776 	}
777 
778 	return (0);
779 }
780 
781 /*
782  *------------------------------------------------------------------------------
783  *
784  * vmci_allocate_interrupt_resources --
785  *
786  *     Allocate resources necessary for interrupts.
787  *
788  * Results:
789  *     0 if success, ENXIO otherwise.
790  *
791  * Side effects:
792  *     Resources get allocated on success.
793  *
794  *------------------------------------------------------------------------------
795  */
796 
797 static int
798 vmci_allocate_interrupt_resources(struct vmci_softc *sc)
799 {
800 	struct resource *irq;
801 	int flags, i, rid;
802 
803 	flags = RF_ACTIVE;
804 	flags |= (sc->vmci_num_intr == 1) ? RF_SHAREABLE : 0;
805 	rid = (sc->vmci_intr_type == VMCI_INTR_TYPE_INTX) ? 0 : 1;
806 
807 	for (i = 0; i < sc->vmci_num_intr; i++, rid++) {
808 		irq = bus_alloc_resource_any(sc->vmci_dev, SYS_RES_IRQ, &rid,
809 		    flags);
810 		if (irq == NULL)
811 			return (ENXIO);
812 		sc->vmci_intrs[i].vmci_irq = irq;
813 		sc->vmci_intrs[i].vmci_rid = rid;
814 	}
815 
816 	return (0);
817 }
818 
819 /*
820  *------------------------------------------------------------------------------
821  *
822  * vmci_setup_interrupts --
823  *
824  *     Sets up the interrupts.
825  *
826  * Results:
827  *     0 if success, appropriate error code from bus_setup_intr otherwise.
828  *
829  * Side effects:
830  *     Interrupt handler gets attached.
831  *
832  *------------------------------------------------------------------------------
833  */
834 
835 static int
836 vmci_setup_interrupts(struct vmci_softc *sc)
837 {
838 	struct vmci_interrupt *intr;
839 	int error, flags;
840 
841 	flags = INTR_TYPE_NET | INTR_MPSAFE;
842 	if (sc->vmci_num_intr > 1)
843 		flags |= INTR_EXCL;
844 
845 	intr = &sc->vmci_intrs[0];
846 	error = bus_setup_intr(sc->vmci_dev, intr->vmci_irq, flags, NULL,
847 	    vmci_interrupt, NULL, &intr->vmci_handler);
848 	if (error)
849 		return (error);
850 
851 	if (sc->vmci_num_intr == 2) {
852 		bus_describe_intr(sc->vmci_dev, intr->vmci_irq,
853 		    intr->vmci_handler, "dg");
854 		intr = &sc->vmci_intrs[1];
855 		error = bus_setup_intr(sc->vmci_dev, intr->vmci_irq, flags,
856 		    NULL, vmci_interrupt_bm, NULL, &intr->vmci_handler);
857 		if (error)
858 			return (error);
859 		bus_describe_intr(sc->vmci_dev, intr->vmci_irq,
860 		    intr->vmci_handler, "bm");
861 	}
862 
863 	return (0);
864 }
865 
866 /*
867  *------------------------------------------------------------------------------
868  *
869  * vmci_interrupt --
870  *
871  *     Interrupt handler for legacy or MSI interrupt, or for first MSI-X
872  *     interrupt (vector VMCI_INTR_DATAGRAM).
873  *
874  * Results:
875  *     None.
876  *
877  * Side effects:
878  *     None.
879  *
880  *------------------------------------------------------------------------------
881  */
882 
883 static void
884 vmci_interrupt(void *arg)
885 {
886 
887 	if (vmci_sc->vmci_num_intr == 2)
888 		taskqueue_enqueue(taskqueue_swi,
889 		    &vmci_sc->vmci_interrupt_dq_task);
890 	else {
891 		unsigned int icr;
892 
893 		icr = inl(vmci_sc->vmci_ioaddr + VMCI_ICR_ADDR);
894 		if (icr == 0 || icr == 0xffffffff)
895 			return;
896 		if (icr & VMCI_ICR_DATAGRAM) {
897 			taskqueue_enqueue(taskqueue_swi,
898 			    &vmci_sc->vmci_interrupt_dq_task);
899 			icr &= ~VMCI_ICR_DATAGRAM;
900 		}
901 		if (icr & VMCI_ICR_NOTIFICATION) {
902 			taskqueue_enqueue(taskqueue_swi,
903 			    &vmci_sc->vmci_interrupt_bm_task);
904 			icr &= ~VMCI_ICR_NOTIFICATION;
905 		}
906 		if (icr != 0)
907 			VMCI_LOG_INFO(LGPFX"Ignoring unknown interrupt "
908 			    "cause");
909 	}
910 }
911 
912 /*
913  *------------------------------------------------------------------------------
914  *
915  * vmci_interrupt_bm --
916  *
917  *     Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION,
918  *     which is for the notification bitmap. Will only get called if we are
919  *     using MSI-X with exclusive vectors.
920  *
921  * Results:
922  *     None.
923  *
924  * Side effects:
925  *     None.
926  *
927  *------------------------------------------------------------------------------
928  */
929 
930 static void
931 vmci_interrupt_bm(void *arg)
932 {
933 
934 	ASSERT(vmci_sc->vmci_num_intr == 2);
935 	taskqueue_enqueue(taskqueue_swi, &vmci_sc->vmci_interrupt_bm_task);
936 }
937 
938 /*
939  *------------------------------------------------------------------------------
940  *
941  * dispatch_datagrams --
942  *
943  *     Reads and dispatches incoming datagrams.
944  *
945  * Results:
946  *     None.
947  *
948  * Side effects:
949  *     Reads data from the device.
950  *
951  *------------------------------------------------------------------------------
952  */
953 
954 static void
955 dispatch_datagrams(void *context, int data)
956 {
957 
958 	if (data_buffer == NULL)
959 		VMCI_LOG_INFO(LGPFX"dispatch_datagrams(): no buffer "
960 		    "present");
961 
962 	vmci_read_datagrams_from_port((vmci_io_handle) 0,
963 	    vmci_sc->vmci_ioaddr + VMCI_DATA_IN_ADDR,
964 	    data_buffer, data_buffer_size);
965 }
966 
967 /*
968  *------------------------------------------------------------------------------
969  *
970  * process_bitmap --
971  *
972  *     Scans the notification bitmap for raised flags, clears them and handles
973  *     the notifications.
974  *
975  * Results:
976  *     None.
977  *
978  * Side effects:
979  *     None.
980  *
981  *------------------------------------------------------------------------------
982  */
983 
984 static void
985 process_bitmap(void *context, int data)
986 {
987 
988 	if (vmci_sc->vmci_notifications_bitmap.dma_vaddr == NULL)
989 		VMCI_LOG_INFO(LGPFX"process_bitmaps(): no bitmap present");
990 
991 	vmci_scan_notification_bitmap(
992 	    vmci_sc->vmci_notifications_bitmap.dma_vaddr);
993 }
994 
995 /*
996  *------------------------------------------------------------------------------
997  *
998  * vmci_dismantle_interrupts --
999  *
1000  *     Releases resources, detaches the interrupt handler and drains the task
1001  *     queue.
1002  *
1003  * Results:
1004  *     None.
1005  *
1006  * Side effects:
1007  *     No more interrupts.
1008  *
1009  *------------------------------------------------------------------------------
1010  */
1011 
1012 static void
1013 vmci_dismantle_interrupts(struct vmci_softc *sc)
1014 {
1015 	struct vmci_interrupt *intr;
1016 	int i;
1017 
1018 	for (i = 0; i < sc->vmci_num_intr; i++) {
1019 		intr = &sc->vmci_intrs[i];
1020 		if (intr->vmci_handler != NULL) {
1021 			bus_teardown_intr(sc->vmci_dev, intr->vmci_irq,
1022 			    intr->vmci_handler);
1023 			intr->vmci_handler = NULL;
1024 		}
1025 		if (intr->vmci_irq != NULL) {
1026 			bus_release_resource(sc->vmci_dev, SYS_RES_IRQ,
1027 			    intr->vmci_rid, intr->vmci_irq);
1028 			intr->vmci_irq = NULL;
1029 			intr->vmci_rid = -1;
1030 		}
1031 	}
1032 
1033 	if ((sc->vmci_intr_type != VMCI_INTR_TYPE_INTX) &&
1034 	    (sc->vmci_num_intr))
1035 		pci_release_msi(sc->vmci_dev);
1036 
1037 	taskqueue_drain(taskqueue_swi, &sc->vmci_interrupt_dq_task);
1038 	taskqueue_drain(taskqueue_swi, &sc->vmci_interrupt_bm_task);
1039 
1040 	if (data_buffer != NULL)
1041 		free(data_buffer, M_DEVBUF);
1042 }
1043 
1044 /*
1045  *------------------------------------------------------------------------------
1046  *
1047  * vmci_delayed_work_fn_cb --
1048  *
1049  *     Callback function that executes the queued up delayed work functions.
1050  *
1051  * Results:
1052  *     None.
1053  *
1054  * Side effects:
1055  *     None.
1056  *
1057  *------------------------------------------------------------------------------
1058  */
1059 
1060 static void
1061 vmci_delayed_work_fn_cb(void *context, int data)
1062 {
1063 	vmci_list(vmci_delayed_work_info) temp_list;
1064 
1065 	vmci_list_init(&temp_list);
1066 
1067 	/*
1068 	 * Swap vmci_delayed_work_infos list with the empty temp_list while
1069 	 * holding a lock. vmci_delayed_work_infos would then be an empty list
1070 	 * and temp_list would contain the elements from the original
1071 	 * vmci_delayed_work_infos. Finally, iterate through temp_list
1072 	 * executing the delayed callbacks.
1073 	 */
1074 
1075 	mtx_lock(&vmci_sc->vmci_delayed_work_lock);
1076 	vmci_list_swap(&temp_list, &vmci_sc->vmci_delayed_work_infos,
1077 	    vmci_delayed_work_info, entry);
1078 	mtx_unlock(&vmci_sc->vmci_delayed_work_lock);
1079 
1080 	while (!vmci_list_empty(&temp_list)) {
1081 		struct vmci_delayed_work_info *delayed_work_info =
1082 		    vmci_list_first(&temp_list);
1083 
1084 		delayed_work_info->work_fn(delayed_work_info->data);
1085 
1086 		vmci_list_remove(delayed_work_info, entry);
1087 		vmci_free_kernel_mem(delayed_work_info,
1088 		    sizeof(*delayed_work_info));
1089 	}
1090 }
1091 
1092 /*
1093  *------------------------------------------------------------------------------
1094  *
1095  * vmci_schedule_delayed_work_fn --
1096  *
1097  *     Schedule the specified callback.
1098  *
1099  * Results:
1100  *     0 if success, error code otherwise.
1101  *
1102  * Side effects:
1103  *     None.
1104  *
1105  *------------------------------------------------------------------------------
1106  */
1107 
1108 int
1109 vmci_schedule_delayed_work_fn(vmci_work_fn *work_fn, void *data)
1110 {
1111 	struct vmci_delayed_work_info *delayed_work_info;
1112 
1113 	delayed_work_info = vmci_alloc_kernel_mem(sizeof(*delayed_work_info),
1114 	    VMCI_MEMORY_ATOMIC);
1115 
1116 	if (!delayed_work_info)
1117 		return (VMCI_ERROR_NO_MEM);
1118 
1119 	delayed_work_info->work_fn = work_fn;
1120 	delayed_work_info->data = data;
1121 	mtx_lock(&vmci_sc->vmci_delayed_work_lock);
1122 	vmci_list_insert(&vmci_sc->vmci_delayed_work_infos,
1123 	    delayed_work_info, entry);
1124 	mtx_unlock(&vmci_sc->vmci_delayed_work_lock);
1125 
1126 	taskqueue_enqueue(taskqueue_thread,
1127 	    &vmci_sc->vmci_delayed_work_task);
1128 
1129 	return (VMCI_SUCCESS);
1130 }
1131 
1132 /*
1133  *------------------------------------------------------------------------------
1134  *
1135  * vmci_send_datagram --
1136  *
1137  *     VM to hypervisor call mechanism.
1138  *
1139  * Results:
1140  *     The result of the hypercall.
1141  *
1142  * Side effects:
1143  *     None.
1144  *
1145  *------------------------------------------------------------------------------
1146  */
1147 
1148 int
1149 vmci_send_datagram(struct vmci_datagram *dg)
1150 {
1151 	int result;
1152 
1153 	if (dg == NULL)
1154 		return (VMCI_ERROR_INVALID_ARGS);
1155 
1156 	/*
1157 	 * Need to acquire spinlock on the device because
1158 	 * the datagram data may be spread over multiple pages and the monitor
1159 	 * may interleave device user rpc calls from multiple VCPUs. Acquiring
1160 	 * the spinlock precludes that possibility. Disabling interrupts to
1161 	 * avoid incoming datagrams during a "rep out" and possibly landing up
1162 	 * in this function.
1163 	 */
1164 	mtx_lock_spin(&vmci_sc->vmci_spinlock);
1165 
1166 	/*
1167 	 * Send the datagram and retrieve the return value from the result
1168 	 * register.
1169 	 */
1170 	__asm__ __volatile__(
1171 	    "cld\n\t"
1172 	    "rep outsb\n\t"
1173 	    : /* No output. */
1174 	    : "d"(vmci_sc->vmci_ioaddr + VMCI_DATA_OUT_ADDR),
1175 	    "c"(VMCI_DG_SIZE(dg)), "S"(dg)
1176 	    );
1177 
1178 	/*
1179 	 * XXX: Should read result high port as well when updating handlers to
1180 	 * return 64bit.
1181 	 */
1182 
1183 	result = bus_space_read_4(vmci_sc->vmci_iot0,
1184 	    vmci_sc->vmci_ioh0, VMCI_RESULT_LOW_ADDR);
1185 	mtx_unlock_spin(&vmci_sc->vmci_spinlock);
1186 
1187 	return (result);
1188 }
1189