xref: /freebsd/sys/dev/virtio/pci/virtio_pci.c (revision 8d20be1e22095c27faf8fe8b2f0d089739cc742e)
1 /*-
2  * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /* Driver for the VirtIO PCI interface. */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/bus.h>
35 #include <sys/kernel.h>
36 #include <sys/module.h>
37 #include <sys/malloc.h>
38 
39 #include <machine/bus.h>
40 #include <machine/resource.h>
41 #include <sys/bus.h>
42 #include <sys/rman.h>
43 
44 #include <dev/pci/pcivar.h>
45 #include <dev/pci/pcireg.h>
46 
47 #include <dev/virtio/virtio.h>
48 #include <dev/virtio/virtqueue.h>
49 #include <dev/virtio/pci/virtio_pci.h>
50 
51 #include "virtio_bus_if.h"
52 #include "virtio_if.h"
53 
54 struct vtpci_interrupt {
55 	struct resource		*vti_irq;
56 	int			 vti_rid;
57 	void			*vti_handler;
58 };
59 
60 struct vtpci_virtqueue {
61 	struct virtqueue	*vtv_vq;
62 	int			 vtv_no_intr;
63 };
64 
65 struct vtpci_softc {
66 	device_t			 vtpci_dev;
67 	struct resource			*vtpci_res;
68 	struct resource			*vtpci_msix_res;
69 	uint64_t			 vtpci_features;
70 	uint32_t			 vtpci_flags;
71 #define VTPCI_FLAG_NO_MSI		0x0001
72 #define VTPCI_FLAG_NO_MSIX		0x0002
73 #define VTPCI_FLAG_LEGACY		0x1000
74 #define VTPCI_FLAG_MSI			0x2000
75 #define VTPCI_FLAG_MSIX			0x4000
76 #define VTPCI_FLAG_SHARED_MSIX		0x8000
77 #define VTPCI_FLAG_ITYPE_MASK		0xF000
78 
79 	/* This "bus" will only ever have one child. */
80 	device_t			 vtpci_child_dev;
81 	struct virtio_feature_desc	*vtpci_child_feat_desc;
82 
83 	int				 vtpci_nvqs;
84 	struct vtpci_virtqueue		*vtpci_vqs;
85 
86 	/*
87 	 * Ideally, each virtqueue that the driver provides a callback for will
88 	 * receive its own MSIX vector. If there are not sufficient vectors
89 	 * available, then attempt to have all the VQs share one vector. For
90 	 * MSIX, the configuration changed notifications must be on their own
91 	 * vector.
92 	 *
93 	 * If MSIX is not available, we will attempt to have the whole device
94 	 * share one MSI vector, and then, finally, one legacy interrupt.
95 	 */
96 	struct vtpci_interrupt		 vtpci_device_interrupt;
97 	struct vtpci_interrupt		*vtpci_msix_vq_interrupts;
98 	int				 vtpci_nmsix_resources;
99 };
100 
101 static int	vtpci_probe(device_t);
102 static int	vtpci_attach(device_t);
103 static int	vtpci_detach(device_t);
104 static int	vtpci_suspend(device_t);
105 static int	vtpci_resume(device_t);
106 static int	vtpci_shutdown(device_t);
107 static void	vtpci_driver_added(device_t, driver_t *);
108 static void	vtpci_child_detached(device_t, device_t);
109 static int	vtpci_read_ivar(device_t, device_t, int, uintptr_t *);
110 static int	vtpci_write_ivar(device_t, device_t, int, uintptr_t);
111 
112 static uint64_t	vtpci_negotiate_features(device_t, uint64_t);
113 static int	vtpci_with_feature(device_t, uint64_t);
114 static int	vtpci_alloc_virtqueues(device_t, int, int,
115 		    struct vq_alloc_info *);
116 static int	vtpci_setup_intr(device_t, enum intr_type);
117 static void	vtpci_stop(device_t);
118 static int	vtpci_reinit(device_t, uint64_t);
119 static void	vtpci_reinit_complete(device_t);
120 static void	vtpci_notify_virtqueue(device_t, uint16_t);
121 static uint8_t	vtpci_get_status(device_t);
122 static void	vtpci_set_status(device_t, uint8_t);
123 static void	vtpci_read_dev_config(device_t, bus_size_t, void *, int);
124 static void	vtpci_write_dev_config(device_t, bus_size_t, void *, int);
125 
126 static void	vtpci_describe_features(struct vtpci_softc *, const char *,
127 		    uint64_t);
128 static void	vtpci_probe_and_attach_child(struct vtpci_softc *);
129 
130 static int	vtpci_alloc_msix(struct vtpci_softc *, int);
131 static int	vtpci_alloc_msi(struct vtpci_softc *);
132 static int	vtpci_alloc_intr_msix_pervq(struct vtpci_softc *);
133 static int	vtpci_alloc_intr_msix_shared(struct vtpci_softc *);
134 static int	vtpci_alloc_intr_msi(struct vtpci_softc *);
135 static int	vtpci_alloc_intr_legacy(struct vtpci_softc *);
136 static int	vtpci_alloc_interrupt(struct vtpci_softc *, int, int,
137 		    struct vtpci_interrupt *);
138 static int	vtpci_alloc_intr_resources(struct vtpci_softc *);
139 
140 static int	vtpci_setup_legacy_interrupt(struct vtpci_softc *,
141 		    enum intr_type);
142 static int	vtpci_setup_pervq_msix_interrupts(struct vtpci_softc *,
143 		    enum intr_type);
144 static int	vtpci_setup_msix_interrupts(struct vtpci_softc *,
145 		    enum intr_type);
146 static int	vtpci_setup_interrupts(struct vtpci_softc *, enum intr_type);
147 
148 static int	vtpci_register_msix_vector(struct vtpci_softc *, int,
149 		    struct vtpci_interrupt *);
150 static int	vtpci_set_host_msix_vectors(struct vtpci_softc *);
151 static int	vtpci_reinit_virtqueue(struct vtpci_softc *, int);
152 
153 static void	vtpci_free_interrupt(struct vtpci_softc *,
154 		    struct vtpci_interrupt *);
155 static void	vtpci_free_interrupts(struct vtpci_softc *);
156 static void	vtpci_free_virtqueues(struct vtpci_softc *);
157 static void	vtpci_release_child_resources(struct vtpci_softc *);
158 static void	vtpci_cleanup_setup_intr_attempt(struct vtpci_softc *);
159 static void	vtpci_reset(struct vtpci_softc *);
160 
161 static void	vtpci_select_virtqueue(struct vtpci_softc *, int);
162 
163 static void	vtpci_legacy_intr(void *);
164 static int	vtpci_vq_shared_intr_filter(void *);
165 static void	vtpci_vq_shared_intr(void *);
166 static int	vtpci_vq_intr_filter(void *);
167 static void	vtpci_vq_intr(void *);
168 static void	vtpci_config_intr(void *);
169 
170 #define vtpci_setup_msi_interrupt vtpci_setup_legacy_interrupt
171 
172 /*
173  * I/O port read/write wrappers.
174  */
175 #define vtpci_read_config_1(sc, o)	bus_read_1((sc)->vtpci_res, (o))
176 #define vtpci_read_config_2(sc, o)	bus_read_2((sc)->vtpci_res, (o))
177 #define vtpci_read_config_4(sc, o)	bus_read_4((sc)->vtpci_res, (o))
178 #define vtpci_write_config_1(sc, o, v)	bus_write_1((sc)->vtpci_res, (o), (v))
179 #define vtpci_write_config_2(sc, o, v)	bus_write_2((sc)->vtpci_res, (o), (v))
180 #define vtpci_write_config_4(sc, o, v)	bus_write_4((sc)->vtpci_res, (o), (v))
181 
182 /* Tunables. */
183 static int vtpci_disable_msix = 0;
184 TUNABLE_INT("hw.virtio.pci.disable_msix", &vtpci_disable_msix);
185 
186 static device_method_t vtpci_methods[] = {
187 	/* Device interface. */
188 	DEVMETHOD(device_probe,			  vtpci_probe),
189 	DEVMETHOD(device_attach,		  vtpci_attach),
190 	DEVMETHOD(device_detach,		  vtpci_detach),
191 	DEVMETHOD(device_suspend,		  vtpci_suspend),
192 	DEVMETHOD(device_resume,		  vtpci_resume),
193 	DEVMETHOD(device_shutdown,		  vtpci_shutdown),
194 
195 	/* Bus interface. */
196 	DEVMETHOD(bus_driver_added,		  vtpci_driver_added),
197 	DEVMETHOD(bus_child_detached,		  vtpci_child_detached),
198 	DEVMETHOD(bus_read_ivar,		  vtpci_read_ivar),
199 	DEVMETHOD(bus_write_ivar,		  vtpci_write_ivar),
200 
201 	/* VirtIO bus interface. */
202 	DEVMETHOD(virtio_bus_negotiate_features,  vtpci_negotiate_features),
203 	DEVMETHOD(virtio_bus_with_feature,	  vtpci_with_feature),
204 	DEVMETHOD(virtio_bus_alloc_virtqueues,	  vtpci_alloc_virtqueues),
205 	DEVMETHOD(virtio_bus_setup_intr,	  vtpci_setup_intr),
206 	DEVMETHOD(virtio_bus_stop,		  vtpci_stop),
207 	DEVMETHOD(virtio_bus_reinit,		  vtpci_reinit),
208 	DEVMETHOD(virtio_bus_reinit_complete,	  vtpci_reinit_complete),
209 	DEVMETHOD(virtio_bus_notify_vq,		  vtpci_notify_virtqueue),
210 	DEVMETHOD(virtio_bus_read_device_config,  vtpci_read_dev_config),
211 	DEVMETHOD(virtio_bus_write_device_config, vtpci_write_dev_config),
212 
213 	DEVMETHOD_END
214 };
215 
216 static driver_t vtpci_driver = {
217 	"virtio_pci",
218 	vtpci_methods,
219 	sizeof(struct vtpci_softc)
220 };
221 
222 devclass_t vtpci_devclass;
223 
224 DRIVER_MODULE(virtio_pci, pci, vtpci_driver, vtpci_devclass, 0, 0);
225 MODULE_VERSION(virtio_pci, 1);
226 MODULE_DEPEND(virtio_pci, pci, 1, 1, 1);
227 MODULE_DEPEND(virtio_pci, virtio, 1, 1, 1);
228 
229 static int
230 vtpci_probe(device_t dev)
231 {
232 	char desc[36];
233 	const char *name;
234 
235 	if (pci_get_vendor(dev) != VIRTIO_PCI_VENDORID)
236 		return (ENXIO);
237 
238 	if (pci_get_device(dev) < VIRTIO_PCI_DEVICEID_MIN ||
239 	    pci_get_device(dev) > VIRTIO_PCI_DEVICEID_MAX)
240 		return (ENXIO);
241 
242 	if (pci_get_revid(dev) != VIRTIO_PCI_ABI_VERSION)
243 		return (ENXIO);
244 
245 	name = virtio_device_name(pci_get_subdevice(dev));
246 	if (name == NULL)
247 		name = "Unknown";
248 
249 	snprintf(desc, sizeof(desc), "VirtIO PCI %s adapter", name);
250 	device_set_desc_copy(dev, desc);
251 
252 	return (BUS_PROBE_DEFAULT);
253 }
254 
255 static int
256 vtpci_attach(device_t dev)
257 {
258 	struct vtpci_softc *sc;
259 	device_t child;
260 	int rid;
261 
262 	sc = device_get_softc(dev);
263 	sc->vtpci_dev = dev;
264 
265 	pci_enable_busmaster(dev);
266 
267 	rid = PCIR_BAR(0);
268 	sc->vtpci_res = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &rid,
269 	    RF_ACTIVE);
270 	if (sc->vtpci_res == NULL) {
271 		device_printf(dev, "cannot map I/O space\n");
272 		return (ENXIO);
273 	}
274 
275 	if (pci_find_cap(dev, PCIY_MSI, NULL) != 0)
276 		sc->vtpci_flags |= VTPCI_FLAG_NO_MSI;
277 
278 	if (pci_find_cap(dev, PCIY_MSIX, NULL) == 0) {
279 		rid = PCIR_BAR(1);
280 		sc->vtpci_msix_res = bus_alloc_resource_any(dev,
281 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
282 	}
283 
284 	if (sc->vtpci_msix_res == NULL)
285 		sc->vtpci_flags |= VTPCI_FLAG_NO_MSIX;
286 
287 	vtpci_reset(sc);
288 
289 	/* Tell the host we've noticed this device. */
290 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK);
291 
292 	if ((child = device_add_child(dev, NULL, -1)) == NULL) {
293 		device_printf(dev, "cannot create child device\n");
294 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_FAILED);
295 		vtpci_detach(dev);
296 		return (ENOMEM);
297 	}
298 
299 	sc->vtpci_child_dev = child;
300 	vtpci_probe_and_attach_child(sc);
301 
302 	return (0);
303 }
304 
305 static int
306 vtpci_detach(device_t dev)
307 {
308 	struct vtpci_softc *sc;
309 	device_t child;
310 	int error;
311 
312 	sc = device_get_softc(dev);
313 
314 	if ((child = sc->vtpci_child_dev) != NULL) {
315 		error = device_delete_child(dev, child);
316 		if (error)
317 			return (error);
318 		sc->vtpci_child_dev = NULL;
319 	}
320 
321 	vtpci_reset(sc);
322 
323 	if (sc->vtpci_msix_res != NULL) {
324 		bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(1),
325 		    sc->vtpci_msix_res);
326 		sc->vtpci_msix_res = NULL;
327 	}
328 
329 	if (sc->vtpci_res != NULL) {
330 		bus_release_resource(dev, SYS_RES_IOPORT, PCIR_BAR(0),
331 		    sc->vtpci_res);
332 		sc->vtpci_res = NULL;
333 	}
334 
335 	return (0);
336 }
337 
338 static int
339 vtpci_suspend(device_t dev)
340 {
341 
342 	return (bus_generic_suspend(dev));
343 }
344 
345 static int
346 vtpci_resume(device_t dev)
347 {
348 
349 	return (bus_generic_resume(dev));
350 }
351 
352 static int
353 vtpci_shutdown(device_t dev)
354 {
355 
356 	(void) bus_generic_shutdown(dev);
357 	/* Forcibly stop the host device. */
358 	vtpci_stop(dev);
359 
360 	return (0);
361 }
362 
363 static void
364 vtpci_driver_added(device_t dev, driver_t *driver)
365 {
366 	struct vtpci_softc *sc;
367 
368 	sc = device_get_softc(dev);
369 
370 	vtpci_probe_and_attach_child(sc);
371 }
372 
373 static void
374 vtpci_child_detached(device_t dev, device_t child)
375 {
376 	struct vtpci_softc *sc;
377 
378 	sc = device_get_softc(dev);
379 
380 	vtpci_reset(sc);
381 	vtpci_release_child_resources(sc);
382 }
383 
384 static int
385 vtpci_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
386 {
387 	struct vtpci_softc *sc;
388 
389 	sc = device_get_softc(dev);
390 
391 	if (sc->vtpci_child_dev != child)
392 		return (ENOENT);
393 
394 	switch (index) {
395 	case VIRTIO_IVAR_DEVTYPE:
396 	case VIRTIO_IVAR_SUBDEVICE:
397 		*result = pci_get_subdevice(dev);
398 		break;
399 	case VIRTIO_IVAR_VENDOR:
400 		*result = pci_get_vendor(dev);
401 		break;
402 	case VIRTIO_IVAR_DEVICE:
403 		*result = pci_get_device(dev);
404 		break;
405 	case VIRTIO_IVAR_SUBVENDOR:
406 		*result = pci_get_subdevice(dev);
407 		break;
408 	default:
409 		return (ENOENT);
410 	}
411 
412 	return (0);
413 }
414 
415 static int
416 vtpci_write_ivar(device_t dev, device_t child, int index, uintptr_t value)
417 {
418 	struct vtpci_softc *sc;
419 
420 	sc = device_get_softc(dev);
421 
422 	if (sc->vtpci_child_dev != child)
423 		return (ENOENT);
424 
425 	switch (index) {
426 	case VIRTIO_IVAR_FEATURE_DESC:
427 		sc->vtpci_child_feat_desc = (void *) value;
428 		break;
429 	default:
430 		return (ENOENT);
431 	}
432 
433 	return (0);
434 }
435 
436 static uint64_t
437 vtpci_negotiate_features(device_t dev, uint64_t child_features)
438 {
439 	struct vtpci_softc *sc;
440 	uint64_t host_features, features;
441 
442 	sc = device_get_softc(dev);
443 
444 	host_features = vtpci_read_config_4(sc, VIRTIO_PCI_HOST_FEATURES);
445 	vtpci_describe_features(sc, "host", host_features);
446 
447 	/*
448 	 * Limit negotiated features to what the driver, virtqueue, and
449 	 * host all support.
450 	 */
451 	features = host_features & child_features;
452 	features = virtqueue_filter_features(features);
453 	sc->vtpci_features = features;
454 
455 	vtpci_describe_features(sc, "negotiated", features);
456 	vtpci_write_config_4(sc, VIRTIO_PCI_GUEST_FEATURES, features);
457 
458 	return (features);
459 }
460 
461 static int
462 vtpci_with_feature(device_t dev, uint64_t feature)
463 {
464 	struct vtpci_softc *sc;
465 
466 	sc = device_get_softc(dev);
467 
468 	return ((sc->vtpci_features & feature) != 0);
469 }
470 
471 static int
472 vtpci_alloc_virtqueues(device_t dev, int flags, int nvqs,
473     struct vq_alloc_info *vq_info)
474 {
475 	struct vtpci_softc *sc;
476 	struct virtqueue *vq;
477 	struct vtpci_virtqueue *vqx;
478 	struct vq_alloc_info *info;
479 	int idx, error;
480 	uint16_t size;
481 
482 	sc = device_get_softc(dev);
483 
484 	if (sc->vtpci_nvqs != 0)
485 		return (EALREADY);
486 	if (nvqs <= 0)
487 		return (EINVAL);
488 
489 	sc->vtpci_vqs = malloc(nvqs * sizeof(struct vtpci_virtqueue),
490 	    M_DEVBUF, M_NOWAIT | M_ZERO);
491 	if (sc->vtpci_vqs == NULL)
492 		return (ENOMEM);
493 
494 	for (idx = 0; idx < nvqs; idx++) {
495 		vqx = &sc->vtpci_vqs[idx];
496 		info = &vq_info[idx];
497 
498 		vtpci_select_virtqueue(sc, idx);
499 		size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM);
500 
501 		error = virtqueue_alloc(dev, idx, size, VIRTIO_PCI_VRING_ALIGN,
502 		    0xFFFFFFFFUL, info, &vq);
503 		if (error) {
504 			device_printf(dev,
505 			    "cannot allocate virtqueue %d: %d\n", idx, error);
506 			break;
507 		}
508 
509 		vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN,
510 		    virtqueue_paddr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
511 
512 		vqx->vtv_vq = *info->vqai_vq = vq;
513 		vqx->vtv_no_intr = info->vqai_intr == NULL;
514 
515 		sc->vtpci_nvqs++;
516 	}
517 
518 	if (error)
519 		vtpci_free_virtqueues(sc);
520 
521 	return (error);
522 }
523 
524 static int
525 vtpci_setup_intr(device_t dev, enum intr_type type)
526 {
527 	struct vtpci_softc *sc;
528 	int attempt, error;
529 
530 	sc = device_get_softc(dev);
531 
532 	for (attempt = 0; attempt < 5; attempt++) {
533 		/*
534 		 * Start with the most desirable interrupt configuration and
535 		 * fallback towards less desirable ones.
536 		 */
537 		switch (attempt) {
538 		case 0:
539 			error = vtpci_alloc_intr_msix_pervq(sc);
540 			break;
541 		case 1:
542 			error = vtpci_alloc_intr_msix_shared(sc);
543 			break;
544 		case 2:
545 			error = vtpci_alloc_intr_msi(sc);
546 			break;
547 		case 3:
548 			error = vtpci_alloc_intr_legacy(sc);
549 			break;
550 		default:
551 			device_printf(dev,
552 			    "exhausted all interrupt allocation attempts\n");
553 			return (ENXIO);
554 		}
555 
556 		if (error == 0 && vtpci_setup_interrupts(sc, type) == 0)
557 			break;
558 
559 		vtpci_cleanup_setup_intr_attempt(sc);
560 	}
561 
562 	if (bootverbose) {
563 		if (sc->vtpci_flags & VTPCI_FLAG_LEGACY)
564 			device_printf(dev, "using legacy interrupt\n");
565 		else if (sc->vtpci_flags & VTPCI_FLAG_MSI)
566 			device_printf(dev, "using MSI interrupt\n");
567 		else if (sc->vtpci_flags & VTPCI_FLAG_SHARED_MSIX)
568 			device_printf(dev, "using shared MSIX interrupts\n");
569 		else
570 			device_printf(dev, "using per VQ MSIX interrupts\n");
571 	}
572 
573 	return (0);
574 }
575 
576 static void
577 vtpci_stop(device_t dev)
578 {
579 
580 	vtpci_reset(device_get_softc(dev));
581 }
582 
583 static int
584 vtpci_reinit(device_t dev, uint64_t features)
585 {
586 	struct vtpci_softc *sc;
587 	int idx, error;
588 
589 	sc = device_get_softc(dev);
590 
591 	/*
592 	 * Redrive the device initialization. This is a bit of an abuse of
593 	 * the specification, but VirtualBox, QEMU/KVM, and BHyVe seem to
594 	 * play nice.
595 	 *
596 	 * We do not allow the host device to change from what was originally
597 	 * negotiated beyond what the guest driver changed. MSIX state should
598 	 * not change, number of virtqueues and their size remain the same, etc.
599 	 * This will need to be rethought when we want to support migration.
600 	 */
601 
602 	if (vtpci_get_status(dev) != VIRTIO_CONFIG_STATUS_RESET)
603 		vtpci_stop(dev);
604 
605 	/*
606 	 * Quickly drive the status through ACK and DRIVER. The device
607 	 * does not become usable again until vtpci_reinit_complete().
608 	 */
609 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK);
610 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER);
611 
612 	vtpci_negotiate_features(dev, features);
613 
614 	for (idx = 0; idx < sc->vtpci_nvqs; idx++) {
615 		error = vtpci_reinit_virtqueue(sc, idx);
616 		if (error)
617 			return (error);
618 	}
619 
620 	if (sc->vtpci_flags & VTPCI_FLAG_MSIX) {
621 		error = vtpci_set_host_msix_vectors(sc);
622 		if (error)
623 			return (error);
624 	}
625 
626 	return (0);
627 }
628 
629 static void
630 vtpci_reinit_complete(device_t dev)
631 {
632 
633 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER_OK);
634 }
635 
636 static void
637 vtpci_notify_virtqueue(device_t dev, uint16_t queue)
638 {
639 	struct vtpci_softc *sc;
640 
641 	sc = device_get_softc(dev);
642 
643 	vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_NOTIFY, queue);
644 }
645 
646 static uint8_t
647 vtpci_get_status(device_t dev)
648 {
649 	struct vtpci_softc *sc;
650 
651 	sc = device_get_softc(dev);
652 
653 	return (vtpci_read_config_1(sc, VIRTIO_PCI_STATUS));
654 }
655 
656 static void
657 vtpci_set_status(device_t dev, uint8_t status)
658 {
659 	struct vtpci_softc *sc;
660 
661 	sc = device_get_softc(dev);
662 
663 	if (status != VIRTIO_CONFIG_STATUS_RESET)
664 		status |= vtpci_get_status(dev);
665 
666 	vtpci_write_config_1(sc, VIRTIO_PCI_STATUS, status);
667 }
668 
669 static void
670 vtpci_read_dev_config(device_t dev, bus_size_t offset,
671     void *dst, int length)
672 {
673 	struct vtpci_softc *sc;
674 	bus_size_t off;
675 	uint8_t *d;
676 	int size;
677 
678 	sc = device_get_softc(dev);
679 	off = VIRTIO_PCI_CONFIG(sc) + offset;
680 
681 	for (d = dst; length > 0; d += size, off += size, length -= size) {
682 		if (length >= 4) {
683 			size = 4;
684 			*(uint32_t *)d = vtpci_read_config_4(sc, off);
685 		} else if (length >= 2) {
686 			size = 2;
687 			*(uint16_t *)d = vtpci_read_config_2(sc, off);
688 		} else {
689 			size = 1;
690 			*d = vtpci_read_config_1(sc, off);
691 		}
692 	}
693 }
694 
695 static void
696 vtpci_write_dev_config(device_t dev, bus_size_t offset,
697     void *src, int length)
698 {
699 	struct vtpci_softc *sc;
700 	bus_size_t off;
701 	uint8_t *s;
702 	int size;
703 
704 	sc = device_get_softc(dev);
705 	off = VIRTIO_PCI_CONFIG(sc) + offset;
706 
707 	for (s = src; length > 0; s += size, off += size, length -= size) {
708 		if (length >= 4) {
709 			size = 4;
710 			vtpci_write_config_4(sc, off, *(uint32_t *)s);
711 		} else if (length >= 2) {
712 			size = 2;
713 			vtpci_write_config_2(sc, off, *(uint16_t *)s);
714 		} else {
715 			size = 1;
716 			vtpci_write_config_1(sc, off, *s);
717 		}
718 	}
719 }
720 
721 static void
722 vtpci_describe_features(struct vtpci_softc *sc, const char *msg,
723     uint64_t features)
724 {
725 	device_t dev, child;
726 
727 	dev = sc->vtpci_dev;
728 	child = sc->vtpci_child_dev;
729 
730 	if (device_is_attached(child) && bootverbose == 0)
731 		return;
732 
733 	virtio_describe(dev, msg, features, sc->vtpci_child_feat_desc);
734 }
735 
736 static void
737 vtpci_probe_and_attach_child(struct vtpci_softc *sc)
738 {
739 	device_t dev, child;
740 
741 	dev = sc->vtpci_dev;
742 	child = sc->vtpci_child_dev;
743 
744 	if (child == NULL)
745 		return;
746 
747 	if (device_get_state(child) != DS_NOTPRESENT)
748 		return;
749 
750 	if (device_probe(child) != 0)
751 		return;
752 
753 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER);
754 	if (device_attach(child) != 0) {
755 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_FAILED);
756 		vtpci_reset(sc);
757 		vtpci_release_child_resources(sc);
758 		/* Reset status for future attempt. */
759 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK);
760 	} else {
761 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER_OK);
762 		VIRTIO_ATTACH_COMPLETED(child);
763 	}
764 }
765 
766 static int
767 vtpci_alloc_msix(struct vtpci_softc *sc, int nvectors)
768 {
769 	device_t dev;
770 	int nmsix, cnt, required;
771 
772 	dev = sc->vtpci_dev;
773 
774 	/* Allocate an additional vector for the config changes. */
775 	required = nvectors + 1;
776 
777 	nmsix = pci_msix_count(dev);
778 	if (nmsix < required)
779 		return (1);
780 
781 	cnt = required;
782 	if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) {
783 		sc->vtpci_nmsix_resources = required;
784 		return (0);
785 	}
786 
787 	pci_release_msi(dev);
788 
789 	return (1);
790 }
791 
792 static int
793 vtpci_alloc_msi(struct vtpci_softc *sc)
794 {
795 	device_t dev;
796 	int nmsi, cnt, required;
797 
798 	dev = sc->vtpci_dev;
799 	required = 1;
800 
801 	nmsi = pci_msi_count(dev);
802 	if (nmsi < required)
803 		return (1);
804 
805 	cnt = required;
806 	if (pci_alloc_msi(dev, &cnt) == 0 && cnt >= required)
807 		return (0);
808 
809 	pci_release_msi(dev);
810 
811 	return (1);
812 }
813 
814 static int
815 vtpci_alloc_intr_msix_pervq(struct vtpci_softc *sc)
816 {
817 	int i, nvectors, error;
818 
819 	if (vtpci_disable_msix != 0 ||
820 	    sc->vtpci_flags & VTPCI_FLAG_NO_MSIX)
821 		return (ENOTSUP);
822 
823 	for (nvectors = 0, i = 0; i < sc->vtpci_nvqs; i++) {
824 		if (sc->vtpci_vqs[i].vtv_no_intr == 0)
825 			nvectors++;
826 	}
827 
828 	error = vtpci_alloc_msix(sc, nvectors);
829 	if (error)
830 		return (error);
831 
832 	sc->vtpci_flags |= VTPCI_FLAG_MSIX;
833 
834 	return (0);
835 }
836 
837 static int
838 vtpci_alloc_intr_msix_shared(struct vtpci_softc *sc)
839 {
840 	int error;
841 
842 	if (vtpci_disable_msix != 0 ||
843 	    sc->vtpci_flags & VTPCI_FLAG_NO_MSIX)
844 		return (ENOTSUP);
845 
846 	error = vtpci_alloc_msix(sc, 1);
847 	if (error)
848 		return (error);
849 
850 	sc->vtpci_flags |= VTPCI_FLAG_MSIX | VTPCI_FLAG_SHARED_MSIX;
851 
852 	return (0);
853 }
854 
855 static int
856 vtpci_alloc_intr_msi(struct vtpci_softc *sc)
857 {
858 	int error;
859 
860 	/* Only BHyVe supports MSI. */
861 	if (sc->vtpci_flags & VTPCI_FLAG_NO_MSI)
862 		return (ENOTSUP);
863 
864 	error = vtpci_alloc_msi(sc);
865 	if (error)
866 		return (error);
867 
868 	sc->vtpci_flags |= VTPCI_FLAG_MSI;
869 
870 	return (0);
871 }
872 
873 static int
874 vtpci_alloc_intr_legacy(struct vtpci_softc *sc)
875 {
876 
877 	sc->vtpci_flags |= VTPCI_FLAG_LEGACY;
878 
879 	return (0);
880 }
881 
882 static int
883 vtpci_alloc_interrupt(struct vtpci_softc *sc, int rid, int flags,
884     struct vtpci_interrupt *intr)
885 {
886 	struct resource *irq;
887 
888 	irq = bus_alloc_resource_any(sc->vtpci_dev, SYS_RES_IRQ, &rid, flags);
889 	if (irq == NULL)
890 		return (ENXIO);
891 
892 	intr->vti_irq = irq;
893 	intr->vti_rid = rid;
894 
895 	return (0);
896 }
897 
898 static int
899 vtpci_alloc_intr_resources(struct vtpci_softc *sc)
900 {
901 	struct vtpci_interrupt *intr;
902 	int i, rid, flags, nvq_intrs, error;
903 
904 	rid = 0;
905 	flags = RF_ACTIVE;
906 
907 	if (sc->vtpci_flags & VTPCI_FLAG_LEGACY)
908 		flags |= RF_SHAREABLE;
909 	else
910 		rid = 1;
911 
912 	/*
913 	 * For legacy and MSI interrupts, this single resource handles all
914 	 * interrupts. For MSIX, this resource is used for the configuration
915 	 * changed interrupt.
916 	 */
917 	intr = &sc->vtpci_device_interrupt;
918 	error = vtpci_alloc_interrupt(sc, rid, flags, intr);
919 	if (error || sc->vtpci_flags & (VTPCI_FLAG_LEGACY | VTPCI_FLAG_MSI))
920 		return (error);
921 
922 	/* Subtract one for the configuration changed interrupt. */
923 	nvq_intrs = sc->vtpci_nmsix_resources - 1;
924 
925 	intr = sc->vtpci_msix_vq_interrupts = malloc(nvq_intrs *
926 	    sizeof(struct vtpci_interrupt), M_DEVBUF, M_NOWAIT | M_ZERO);
927 	if (sc->vtpci_msix_vq_interrupts == NULL)
928 		return (ENOMEM);
929 
930 	for (i = 0, rid++; i < nvq_intrs; i++, rid++, intr++) {
931 		error = vtpci_alloc_interrupt(sc, rid, flags, intr);
932 		if (error)
933 			return (error);
934 	}
935 
936 	return (0);
937 }
938 
939 static int
940 vtpci_setup_legacy_interrupt(struct vtpci_softc *sc, enum intr_type type)
941 {
942 	struct vtpci_interrupt *intr;
943 	int error;
944 
945 	intr = &sc->vtpci_device_interrupt;
946 	error = bus_setup_intr(sc->vtpci_dev, intr->vti_irq, type, NULL,
947 	    vtpci_legacy_intr, sc, &intr->vti_handler);
948 
949 	return (error);
950 }
951 
952 static int
953 vtpci_setup_pervq_msix_interrupts(struct vtpci_softc *sc, enum intr_type type)
954 {
955 	struct vtpci_virtqueue *vqx;
956 	struct vtpci_interrupt *intr;
957 	int i, error;
958 
959 	intr = sc->vtpci_msix_vq_interrupts;
960 
961 	for (i = 0; i < sc->vtpci_nvqs; i++) {
962 		vqx = &sc->vtpci_vqs[i];
963 
964 		if (vqx->vtv_no_intr)
965 			continue;
966 
967 		error = bus_setup_intr(sc->vtpci_dev, intr->vti_irq, type,
968 		    vtpci_vq_intr_filter, vtpci_vq_intr, vqx->vtv_vq,
969 		    &intr->vti_handler);
970 		if (error)
971 			return (error);
972 
973 		intr++;
974 	}
975 
976 	return (0);
977 }
978 
979 static int
980 vtpci_setup_msix_interrupts(struct vtpci_softc *sc, enum intr_type type)
981 {
982 	device_t dev;
983 	struct vtpci_interrupt *intr;
984 	int error;
985 
986 	dev = sc->vtpci_dev;
987 	intr = &sc->vtpci_device_interrupt;
988 
989 	error = bus_setup_intr(dev, intr->vti_irq, type, NULL,
990 	    vtpci_config_intr, sc, &intr->vti_handler);
991 	if (error)
992 		return (error);
993 
994 	if (sc->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) {
995 		intr = sc->vtpci_msix_vq_interrupts;
996 		error = bus_setup_intr(dev, intr->vti_irq, type,
997 		    vtpci_vq_shared_intr_filter, vtpci_vq_shared_intr, sc,
998 		    &intr->vti_handler);
999 	} else
1000 		error = vtpci_setup_pervq_msix_interrupts(sc, type);
1001 
1002 	return (error ? error : vtpci_set_host_msix_vectors(sc));
1003 }
1004 
1005 static int
1006 vtpci_setup_interrupts(struct vtpci_softc *sc, enum intr_type type)
1007 {
1008 	int error;
1009 
1010 	type |= INTR_MPSAFE;
1011 	KASSERT(sc->vtpci_flags & VTPCI_FLAG_ITYPE_MASK,
1012 	    ("%s: no interrupt type selected %#x", __func__, sc->vtpci_flags));
1013 
1014 	error = vtpci_alloc_intr_resources(sc);
1015 	if (error)
1016 		return (error);
1017 
1018 	if (sc->vtpci_flags & VTPCI_FLAG_LEGACY)
1019 		error = vtpci_setup_legacy_interrupt(sc, type);
1020 	else if (sc->vtpci_flags & VTPCI_FLAG_MSI)
1021 		error = vtpci_setup_msi_interrupt(sc, type);
1022 	else
1023 		error = vtpci_setup_msix_interrupts(sc, type);
1024 
1025 	return (error);
1026 }
1027 
1028 static int
1029 vtpci_register_msix_vector(struct vtpci_softc *sc, int offset,
1030     struct vtpci_interrupt *intr)
1031 {
1032 	device_t dev;
1033 	uint16_t vector;
1034 
1035 	dev = sc->vtpci_dev;
1036 
1037 	if (intr != NULL) {
1038 		/* Map from guest rid to host vector. */
1039 		vector = intr->vti_rid - 1;
1040 	} else
1041 		vector = VIRTIO_MSI_NO_VECTOR;
1042 
1043 	vtpci_write_config_2(sc, offset, vector);
1044 
1045 	/* Read vector to determine if the host had sufficient resources. */
1046 	if (vtpci_read_config_2(sc, offset) != vector) {
1047 		device_printf(dev,
1048 		    "insufficient host resources for MSIX interrupts\n");
1049 		return (ENODEV);
1050 	}
1051 
1052 	return (0);
1053 }
1054 
1055 static int
1056 vtpci_set_host_msix_vectors(struct vtpci_softc *sc)
1057 {
1058 	struct vtpci_interrupt *intr, *tintr;
1059 	int idx, offset, error;
1060 
1061 	intr = &sc->vtpci_device_interrupt;
1062 	offset = VIRTIO_MSI_CONFIG_VECTOR;
1063 
1064 	error = vtpci_register_msix_vector(sc, offset, intr);
1065 	if (error)
1066 		return (error);
1067 
1068 	intr = sc->vtpci_msix_vq_interrupts;
1069 	offset = VIRTIO_MSI_QUEUE_VECTOR;
1070 
1071 	for (idx = 0; idx < sc->vtpci_nvqs; idx++) {
1072 		vtpci_select_virtqueue(sc, idx);
1073 
1074 		if (sc->vtpci_vqs[idx].vtv_no_intr)
1075 			tintr = NULL;
1076 		else
1077 			tintr = intr;
1078 
1079 		error = vtpci_register_msix_vector(sc, offset, tintr);
1080 		if (error)
1081 			break;
1082 
1083 		/*
1084 		 * For shared MSIX, all the virtqueues share the first
1085 		 * interrupt.
1086 		 */
1087 		if ((sc->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) == 0)
1088 			intr++;
1089 	}
1090 
1091 	return (error);
1092 }
1093 
1094 static int
1095 vtpci_reinit_virtqueue(struct vtpci_softc *sc, int idx)
1096 {
1097 	struct vtpci_virtqueue *vqx;
1098 	struct virtqueue *vq;
1099 	int error;
1100 	uint16_t size;
1101 
1102 	vqx = &sc->vtpci_vqs[idx];
1103 	vq = vqx->vtv_vq;
1104 
1105 	KASSERT(vq != NULL, ("%s: vq %d not allocated", __func__, idx));
1106 
1107 	vtpci_select_virtqueue(sc, idx);
1108 	size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM);
1109 
1110 	error = virtqueue_reinit(vq, size);
1111 	if (error)
1112 		return (error);
1113 
1114 	vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN,
1115 	    virtqueue_paddr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
1116 
1117 	return (0);
1118 }
1119 
1120 static void
1121 vtpci_free_interrupt(struct vtpci_softc *sc, struct vtpci_interrupt *intr)
1122 {
1123 	device_t dev;
1124 
1125 	dev = sc->vtpci_dev;
1126 
1127 	if (intr->vti_handler != NULL) {
1128 		bus_teardown_intr(dev, intr->vti_irq, intr->vti_handler);
1129 		intr->vti_handler = NULL;
1130 	}
1131 
1132 	if (intr->vti_irq != NULL) {
1133 		bus_release_resource(dev, SYS_RES_IRQ, intr->vti_rid,
1134 		    intr->vti_irq);
1135 		intr->vti_irq = NULL;
1136 		intr->vti_rid = -1;
1137 	}
1138 }
1139 
1140 static void
1141 vtpci_free_interrupts(struct vtpci_softc *sc)
1142 {
1143 	struct vtpci_interrupt *intr;
1144 	int i, nvq_intrs;
1145 
1146 	vtpci_free_interrupt(sc, &sc->vtpci_device_interrupt);
1147 
1148 	if (sc->vtpci_nmsix_resources != 0) {
1149 		nvq_intrs = sc->vtpci_nmsix_resources - 1;
1150 		sc->vtpci_nmsix_resources = 0;
1151 
1152 		intr = sc->vtpci_msix_vq_interrupts;
1153 		if (intr != NULL) {
1154 			for (i = 0; i < nvq_intrs; i++, intr++)
1155 				vtpci_free_interrupt(sc, intr);
1156 
1157 			free(sc->vtpci_msix_vq_interrupts, M_DEVBUF);
1158 			sc->vtpci_msix_vq_interrupts = NULL;
1159 		}
1160 	}
1161 
1162 	if (sc->vtpci_flags & (VTPCI_FLAG_MSI | VTPCI_FLAG_MSIX))
1163 		pci_release_msi(sc->vtpci_dev);
1164 
1165 	sc->vtpci_flags &= ~VTPCI_FLAG_ITYPE_MASK;
1166 }
1167 
1168 static void
1169 vtpci_free_virtqueues(struct vtpci_softc *sc)
1170 {
1171 	struct vtpci_virtqueue *vqx;
1172 	int idx;
1173 
1174 	for (idx = 0; idx < sc->vtpci_nvqs; idx++) {
1175 		vqx = &sc->vtpci_vqs[idx];
1176 
1177 		vtpci_select_virtqueue(sc, idx);
1178 		vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN, 0);
1179 
1180 		virtqueue_free(vqx->vtv_vq);
1181 		vqx->vtv_vq = NULL;
1182 	}
1183 
1184 	free(sc->vtpci_vqs, M_DEVBUF);
1185 	sc->vtpci_vqs = NULL;
1186 	sc->vtpci_nvqs = 0;
1187 }
1188 
1189 static void
1190 vtpci_release_child_resources(struct vtpci_softc *sc)
1191 {
1192 
1193 	vtpci_free_interrupts(sc);
1194 	vtpci_free_virtqueues(sc);
1195 }
1196 
1197 static void
1198 vtpci_cleanup_setup_intr_attempt(struct vtpci_softc *sc)
1199 {
1200 	int idx;
1201 
1202 	if (sc->vtpci_flags & VTPCI_FLAG_MSIX) {
1203 		vtpci_write_config_2(sc, VIRTIO_MSI_CONFIG_VECTOR,
1204 		    VIRTIO_MSI_NO_VECTOR);
1205 
1206 		for (idx = 0; idx < sc->vtpci_nvqs; idx++) {
1207 			vtpci_select_virtqueue(sc, idx);
1208 			vtpci_write_config_2(sc, VIRTIO_MSI_QUEUE_VECTOR,
1209 			    VIRTIO_MSI_NO_VECTOR);
1210 		}
1211 	}
1212 
1213 	vtpci_free_interrupts(sc);
1214 }
1215 
1216 static void
1217 vtpci_reset(struct vtpci_softc *sc)
1218 {
1219 
1220 	/*
1221 	 * Setting the status to RESET sets the host device to
1222 	 * the original, uninitialized state.
1223 	 */
1224 	vtpci_set_status(sc->vtpci_dev, VIRTIO_CONFIG_STATUS_RESET);
1225 }
1226 
1227 static void
1228 vtpci_select_virtqueue(struct vtpci_softc *sc, int idx)
1229 {
1230 
1231 	vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, idx);
1232 }
1233 
1234 static void
1235 vtpci_legacy_intr(void *xsc)
1236 {
1237 	struct vtpci_softc *sc;
1238 	struct vtpci_virtqueue *vqx;
1239 	int i;
1240 	uint8_t isr;
1241 
1242 	sc = xsc;
1243 	vqx = &sc->vtpci_vqs[0];
1244 
1245 	/* Reading the ISR also clears it. */
1246 	isr = vtpci_read_config_1(sc, VIRTIO_PCI_ISR);
1247 
1248 	if (isr & VIRTIO_PCI_ISR_CONFIG)
1249 		vtpci_config_intr(sc);
1250 
1251 	if (isr & VIRTIO_PCI_ISR_INTR) {
1252 		for (i = 0; i < sc->vtpci_nvqs; i++, vqx++) {
1253 			if (vqx->vtv_no_intr == 0)
1254 				virtqueue_intr(vqx->vtv_vq);
1255 		}
1256 	}
1257 }
1258 
1259 static int
1260 vtpci_vq_shared_intr_filter(void *xsc)
1261 {
1262 	struct vtpci_softc *sc;
1263 	struct vtpci_virtqueue *vqx;
1264 	int i, rc;
1265 
1266 	rc = 0;
1267 	sc = xsc;
1268 	vqx = &sc->vtpci_vqs[0];
1269 
1270 	for (i = 0; i < sc->vtpci_nvqs; i++, vqx++) {
1271 		if (vqx->vtv_no_intr == 0)
1272 			rc |= virtqueue_intr_filter(vqx->vtv_vq);
1273 	}
1274 
1275 	return (rc ? FILTER_SCHEDULE_THREAD : FILTER_STRAY);
1276 }
1277 
1278 static void
1279 vtpci_vq_shared_intr(void *xsc)
1280 {
1281 	struct vtpci_softc *sc;
1282 	struct vtpci_virtqueue *vqx;
1283 	int i;
1284 
1285 	sc = xsc;
1286 	vqx = &sc->vtpci_vqs[0];
1287 
1288 	for (i = 0; i < sc->vtpci_nvqs; i++, vqx++) {
1289 		if (vqx->vtv_no_intr == 0)
1290 			virtqueue_intr(vqx->vtv_vq);
1291 	}
1292 }
1293 
1294 static int
1295 vtpci_vq_intr_filter(void *xvq)
1296 {
1297 	struct virtqueue *vq;
1298 	int rc;
1299 
1300 	vq = xvq;
1301 	rc = virtqueue_intr_filter(vq);
1302 
1303 	return (rc ? FILTER_SCHEDULE_THREAD : FILTER_STRAY);
1304 }
1305 
1306 static void
1307 vtpci_vq_intr(void *xvq)
1308 {
1309 	struct virtqueue *vq;
1310 
1311 	vq = xvq;
1312 	virtqueue_intr(vq);
1313 }
1314 
1315 static void
1316 vtpci_config_intr(void *xsc)
1317 {
1318 	struct vtpci_softc *sc;
1319 	device_t child;
1320 
1321 	sc = xsc;
1322 	child = sc->vtpci_child_dev;
1323 
1324 	if (child != NULL)
1325 		VIRTIO_CONFIG_CHANGE(child);
1326 }
1327