xref: /freebsd/sys/dev/virtio/pci/virtio_pci.c (revision 955c8cbb4960e6cf3602de144b1b9154a5092968)
1 /*-
2  * Copyright (c) 2011, Bryan Venteicher <bryanv@daemoninthecloset.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /* Driver for the VirtIO PCI interface. */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/bus.h>
35 #include <sys/kernel.h>
36 #include <sys/module.h>
37 #include <sys/malloc.h>
38 
39 #include <machine/bus.h>
40 #include <machine/resource.h>
41 #include <sys/bus.h>
42 #include <sys/rman.h>
43 
44 #include <dev/pci/pcivar.h>
45 #include <dev/pci/pcireg.h>
46 
47 #include <dev/virtio/virtio.h>
48 #include <dev/virtio/virtqueue.h>
49 #include <dev/virtio/pci/virtio_pci.h>
50 
51 #include "virtio_bus_if.h"
52 #include "virtio_if.h"
53 
54 struct vtpci_softc {
55 	device_t			 vtpci_dev;
56 	struct resource			*vtpci_res;
57 	struct resource			*vtpci_msix_res;
58 	uint64_t			 vtpci_features;
59 	uint32_t			 vtpci_flags;
60 #define VTPCI_FLAG_NO_MSI		0x0001
61 #define VTPCI_FLAG_NO_MSIX		0x0002
62 #define VTPCI_FLAG_LEGACY		0x1000
63 #define VTPCI_FLAG_MSI			0x2000
64 #define VTPCI_FLAG_MSIX			0x4000
65 #define VTPCI_FLAG_SHARED_MSIX		0x8000
66 #define VTPCI_FLAG_ITYPE_MASK		0xF000
67 
68 	/* This "bus" will only ever have one child. */
69 	device_t			 vtpci_child_dev;
70 	struct virtio_feature_desc	*vtpci_child_feat_desc;
71 
72 	/*
73 	 * Ideally, each virtqueue that the driver provides a callback for
74 	 * will receive its own MSIX vector. If there are not sufficient
75 	 * vectors available, we will then attempt to have all the VQs
76 	 * share one vector. Note that when using MSIX, the configuration
77 	 * changed notifications must be on their own vector.
78 	 *
79 	 * If MSIX is not available, we will attempt to have the whole
80 	 * device share one MSI vector, and then, finally, one legacy
81 	 * interrupt.
82 	 */
83 	int				 vtpci_nvqs;
84 	struct vtpci_virtqueue {
85 		struct virtqueue *vq;
86 		/* Device did not provide a callback for this virtqueue. */
87 		int		  no_intr;
88 		/* Index into vtpci_intr_res[] below. Unused, then -1. */
89 		int		  ires_idx;
90 	} vtpci_vqx[VIRTIO_MAX_VIRTQUEUES];
91 
92 	/*
93 	 * When using MSIX interrupts, the first element of vtpci_intr_res[]
94 	 * is always the configuration changed notifications. The remaining
95 	 * element(s) are used for the virtqueues.
96 	 *
97 	 * With MSI and legacy interrupts, only the first element of
98 	 * vtpci_intr_res[] is used.
99 	 */
100 	int				 vtpci_nintr_res;
101 	struct vtpci_intr_resource {
102 		struct resource	*irq;
103 		int		 rid;
104 		void		*intrhand;
105 	} vtpci_intr_res[1 + VIRTIO_MAX_VIRTQUEUES];
106 };
107 
108 static int	vtpci_probe(device_t);
109 static int	vtpci_attach(device_t);
110 static int	vtpci_detach(device_t);
111 static int	vtpci_suspend(device_t);
112 static int	vtpci_resume(device_t);
113 static int	vtpci_shutdown(device_t);
114 static void	vtpci_driver_added(device_t, driver_t *);
115 static void	vtpci_child_detached(device_t, device_t);
116 static int	vtpci_read_ivar(device_t, device_t, int, uintptr_t *);
117 static int	vtpci_write_ivar(device_t, device_t, int, uintptr_t);
118 
119 static uint64_t	vtpci_negotiate_features(device_t, uint64_t);
120 static int	vtpci_with_feature(device_t, uint64_t);
121 static int	vtpci_alloc_virtqueues(device_t, int, int,
122 		    struct vq_alloc_info *);
123 static int	vtpci_setup_intr(device_t, enum intr_type);
124 static void	vtpci_stop(device_t);
125 static int	vtpci_reinit(device_t, uint64_t);
126 static void	vtpci_reinit_complete(device_t);
127 static void	vtpci_notify_virtqueue(device_t, uint16_t);
128 static uint8_t	vtpci_get_status(device_t);
129 static void	vtpci_set_status(device_t, uint8_t);
130 static void	vtpci_read_dev_config(device_t, bus_size_t, void *, int);
131 static void	vtpci_write_dev_config(device_t, bus_size_t, void *, int);
132 
133 static void	vtpci_describe_features(struct vtpci_softc *, const char *,
134 		    uint64_t);
135 static void	vtpci_probe_and_attach_child(struct vtpci_softc *);
136 
137 static int 	vtpci_alloc_msix(struct vtpci_softc *, int);
138 static int 	vtpci_alloc_msi(struct vtpci_softc *);
139 static int 	vtpci_alloc_intr_msix_pervq(struct vtpci_softc *);
140 static int 	vtpci_alloc_intr_msix_shared(struct vtpci_softc *);
141 static int 	vtpci_alloc_intr_msi(struct vtpci_softc *);
142 static int 	vtpci_alloc_intr_legacy(struct vtpci_softc *);
143 static int	vtpci_alloc_intr_resources(struct vtpci_softc *);
144 
145 static int 	vtpci_setup_legacy_interrupt(struct vtpci_softc *,
146 		    enum intr_type);
147 static int 	vtpci_setup_msix_interrupts(struct vtpci_softc *,
148 		    enum intr_type);
149 static int 	vtpci_setup_interrupts(struct vtpci_softc *, enum intr_type);
150 
151 static int	vtpci_register_msix_vector(struct vtpci_softc *, int, int);
152 static int 	vtpci_set_host_msix_vectors(struct vtpci_softc *);
153 static int 	vtpci_reinit_virtqueue(struct vtpci_softc *, int);
154 
155 static void	vtpci_free_interrupts(struct vtpci_softc *);
156 static void	vtpci_free_virtqueues(struct vtpci_softc *);
157 static void 	vtpci_cleanup_setup_intr_attempt(struct vtpci_softc *);
158 static void	vtpci_release_child_resources(struct vtpci_softc *);
159 static void	vtpci_reset(struct vtpci_softc *);
160 
161 static void	vtpci_select_virtqueue(struct vtpci_softc *, int);
162 
163 static int	vtpci_legacy_intr(void *);
164 static int	vtpci_vq_shared_intr(void *);
165 static int	vtpci_vq_intr(void *);
166 static int	vtpci_config_intr(void *);
167 
168 #define vtpci_setup_msi_interrupt vtpci_setup_legacy_interrupt
169 
170 /*
171  * I/O port read/write wrappers.
172  */
173 #define vtpci_read_config_1(sc, o)	bus_read_1((sc)->vtpci_res, (o))
174 #define vtpci_read_config_2(sc, o)	bus_read_2((sc)->vtpci_res, (o))
175 #define vtpci_read_config_4(sc, o)	bus_read_4((sc)->vtpci_res, (o))
176 #define vtpci_write_config_1(sc, o, v)	bus_write_1((sc)->vtpci_res, (o), (v))
177 #define vtpci_write_config_2(sc, o, v)	bus_write_2((sc)->vtpci_res, (o), (v))
178 #define vtpci_write_config_4(sc, o, v)	bus_write_4((sc)->vtpci_res, (o), (v))
179 
180 /* Tunables. */
181 static int vtpci_disable_msix = 0;
182 TUNABLE_INT("hw.virtio.pci.disable_msix", &vtpci_disable_msix);
183 
184 static device_method_t vtpci_methods[] = {
185 	/* Device interface. */
186 	DEVMETHOD(device_probe,			  vtpci_probe),
187 	DEVMETHOD(device_attach,		  vtpci_attach),
188 	DEVMETHOD(device_detach,		  vtpci_detach),
189 	DEVMETHOD(device_suspend,		  vtpci_suspend),
190 	DEVMETHOD(device_resume,		  vtpci_resume),
191 	DEVMETHOD(device_shutdown,		  vtpci_shutdown),
192 
193 	/* Bus interface. */
194 	DEVMETHOD(bus_driver_added,		  vtpci_driver_added),
195 	DEVMETHOD(bus_child_detached,		  vtpci_child_detached),
196 	DEVMETHOD(bus_read_ivar,		  vtpci_read_ivar),
197 	DEVMETHOD(bus_write_ivar,		  vtpci_write_ivar),
198 
199 	/* VirtIO bus interface. */
200 	DEVMETHOD(virtio_bus_negotiate_features,  vtpci_negotiate_features),
201 	DEVMETHOD(virtio_bus_with_feature,	  vtpci_with_feature),
202 	DEVMETHOD(virtio_bus_alloc_virtqueues,	  vtpci_alloc_virtqueues),
203 	DEVMETHOD(virtio_bus_setup_intr,	  vtpci_setup_intr),
204 	DEVMETHOD(virtio_bus_stop,		  vtpci_stop),
205 	DEVMETHOD(virtio_bus_reinit,		  vtpci_reinit),
206 	DEVMETHOD(virtio_bus_reinit_complete,	  vtpci_reinit_complete),
207 	DEVMETHOD(virtio_bus_notify_vq,		  vtpci_notify_virtqueue),
208 	DEVMETHOD(virtio_bus_read_device_config,  vtpci_read_dev_config),
209 	DEVMETHOD(virtio_bus_write_device_config, vtpci_write_dev_config),
210 
211 	DEVMETHOD_END
212 };
213 
214 static driver_t vtpci_driver = {
215 	"virtio_pci",
216 	vtpci_methods,
217 	sizeof(struct vtpci_softc)
218 };
219 
220 devclass_t vtpci_devclass;
221 
222 DRIVER_MODULE(virtio_pci, pci, vtpci_driver, vtpci_devclass, 0, 0);
223 MODULE_VERSION(virtio_pci, 1);
224 MODULE_DEPEND(virtio_pci, pci, 1, 1, 1);
225 MODULE_DEPEND(virtio_pci, virtio, 1, 1, 1);
226 
227 static int
228 vtpci_probe(device_t dev)
229 {
230 	char desc[36];
231 	const char *name;
232 
233 	if (pci_get_vendor(dev) != VIRTIO_PCI_VENDORID)
234 		return (ENXIO);
235 
236 	if (pci_get_device(dev) < VIRTIO_PCI_DEVICEID_MIN ||
237 	    pci_get_device(dev) > VIRTIO_PCI_DEVICEID_MAX)
238 		return (ENXIO);
239 
240 	if (pci_get_revid(dev) != VIRTIO_PCI_ABI_VERSION)
241 		return (ENXIO);
242 
243 	name = virtio_device_name(pci_get_subdevice(dev));
244 	if (name == NULL)
245 		name = "Unknown";
246 
247 	snprintf(desc, sizeof(desc), "VirtIO PCI %s adapter", name);
248 	device_set_desc_copy(dev, desc);
249 
250 	return (BUS_PROBE_DEFAULT);
251 }
252 
253 static int
254 vtpci_attach(device_t dev)
255 {
256 	struct vtpci_softc *sc;
257 	device_t child;
258 	int rid;
259 
260 	sc = device_get_softc(dev);
261 	sc->vtpci_dev = dev;
262 
263 	pci_enable_busmaster(dev);
264 
265 	rid = PCIR_BAR(0);
266 	sc->vtpci_res = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &rid,
267 	    RF_ACTIVE);
268 	if (sc->vtpci_res == NULL) {
269 		device_printf(dev, "cannot map I/O space\n");
270 		return (ENXIO);
271 	}
272 
273 	if (pci_find_cap(dev, PCIY_MSI, NULL) != 0)
274 		sc->vtpci_flags |= VTPCI_FLAG_NO_MSI;
275 
276 	if (pci_find_cap(dev, PCIY_MSIX, NULL) == 0) {
277 		rid = PCIR_BAR(1);
278 		sc->vtpci_msix_res = bus_alloc_resource_any(dev,
279 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
280 	}
281 
282 	if (sc->vtpci_msix_res == NULL)
283 		sc->vtpci_flags |= VTPCI_FLAG_NO_MSIX;
284 
285 	vtpci_reset(sc);
286 
287 	/* Tell the host we've noticed this device. */
288 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK);
289 
290 	if ((child = device_add_child(dev, NULL, -1)) == NULL) {
291 		device_printf(dev, "cannot create child device\n");
292 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_FAILED);
293 		vtpci_detach(dev);
294 		return (ENOMEM);
295 	}
296 
297 	sc->vtpci_child_dev = child;
298 	vtpci_probe_and_attach_child(sc);
299 
300 	return (0);
301 }
302 
303 static int
304 vtpci_detach(device_t dev)
305 {
306 	struct vtpci_softc *sc;
307 	device_t child;
308 	int error;
309 
310 	sc = device_get_softc(dev);
311 
312 	if ((child = sc->vtpci_child_dev) != NULL) {
313 		error = device_delete_child(dev, child);
314 		if (error)
315 			return (error);
316 		sc->vtpci_child_dev = NULL;
317 	}
318 
319 	vtpci_reset(sc);
320 
321 	if (sc->vtpci_msix_res != NULL) {
322 		bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(1),
323 		    sc->vtpci_msix_res);
324 		sc->vtpci_msix_res = NULL;
325 	}
326 
327 	if (sc->vtpci_res != NULL) {
328 		bus_release_resource(dev, SYS_RES_IOPORT, PCIR_BAR(0),
329 		    sc->vtpci_res);
330 		sc->vtpci_res = NULL;
331 	}
332 
333 	return (0);
334 }
335 
336 static int
337 vtpci_suspend(device_t dev)
338 {
339 
340 	return (bus_generic_suspend(dev));
341 }
342 
343 static int
344 vtpci_resume(device_t dev)
345 {
346 
347 	return (bus_generic_resume(dev));
348 }
349 
350 static int
351 vtpci_shutdown(device_t dev)
352 {
353 
354 	(void) bus_generic_shutdown(dev);
355 	/* Forcibly stop the host device. */
356 	vtpci_stop(dev);
357 
358 	return (0);
359 }
360 
361 static void
362 vtpci_driver_added(device_t dev, driver_t *driver)
363 {
364 	struct vtpci_softc *sc;
365 
366 	sc = device_get_softc(dev);
367 
368 	vtpci_probe_and_attach_child(sc);
369 }
370 
371 static void
372 vtpci_child_detached(device_t dev, device_t child)
373 {
374 	struct vtpci_softc *sc;
375 
376 	sc = device_get_softc(dev);
377 
378 	vtpci_reset(sc);
379 	vtpci_release_child_resources(sc);
380 }
381 
382 static int
383 vtpci_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
384 {
385 	struct vtpci_softc *sc;
386 
387 	sc = device_get_softc(dev);
388 
389 	if (sc->vtpci_child_dev != child)
390 		return (ENOENT);
391 
392 	switch (index) {
393 	case VIRTIO_IVAR_DEVTYPE:
394 	case VIRTIO_IVAR_SUBDEVICE:
395 		*result = pci_get_subdevice(dev);
396 		break;
397 	case VIRTIO_IVAR_VENDOR:
398 		*result = pci_get_vendor(dev);
399 		break;
400 	case VIRTIO_IVAR_DEVICE:
401 		*result = pci_get_device(dev);
402 		break;
403 	case VIRTIO_IVAR_SUBVENDOR:
404 		*result = pci_get_subdevice(dev);
405 		break;
406 	default:
407 		return (ENOENT);
408 	}
409 
410 	return (0);
411 }
412 
413 static int
414 vtpci_write_ivar(device_t dev, device_t child, int index, uintptr_t value)
415 {
416 	struct vtpci_softc *sc;
417 
418 	sc = device_get_softc(dev);
419 
420 	if (sc->vtpci_child_dev != child)
421 		return (ENOENT);
422 
423 	switch (index) {
424 	case VIRTIO_IVAR_FEATURE_DESC:
425 		sc->vtpci_child_feat_desc = (void *) value;
426 		break;
427 	default:
428 		return (ENOENT);
429 	}
430 
431 	return (0);
432 }
433 
434 static uint64_t
435 vtpci_negotiate_features(device_t dev, uint64_t child_features)
436 {
437 	struct vtpci_softc *sc;
438 	uint64_t host_features, features;
439 
440 	sc = device_get_softc(dev);
441 
442 	host_features = vtpci_read_config_4(sc, VIRTIO_PCI_HOST_FEATURES);
443 	vtpci_describe_features(sc, "host", host_features);
444 
445 	/*
446 	 * Limit negotiated features to what the driver, virtqueue, and
447 	 * host all support.
448 	 */
449 	features = host_features & child_features;
450 	features = virtqueue_filter_features(features);
451 	sc->vtpci_features = features;
452 
453 	vtpci_describe_features(sc, "negotiated", features);
454 	vtpci_write_config_4(sc, VIRTIO_PCI_GUEST_FEATURES, features);
455 
456 	return (features);
457 }
458 
459 static int
460 vtpci_with_feature(device_t dev, uint64_t feature)
461 {
462 	struct vtpci_softc *sc;
463 
464 	sc = device_get_softc(dev);
465 
466 	return ((sc->vtpci_features & feature) != 0);
467 }
468 
469 static int
470 vtpci_alloc_virtqueues(device_t dev, int flags, int nvqs,
471     struct vq_alloc_info *vq_info)
472 {
473 	struct vtpci_softc *sc;
474 	struct virtqueue *vq;
475 	struct vtpci_virtqueue *vqx;
476 	struct vq_alloc_info *info;
477 	int idx, error;
478 	uint16_t size;
479 
480 	sc = device_get_softc(dev);
481 	error = 0;
482 
483 	if (sc->vtpci_nvqs != 0)
484 		return (EALREADY);
485 	if (nvqs <= 0 || nvqs > VIRTIO_MAX_VIRTQUEUES)
486 		return (EINVAL);
487 
488 	if (flags & VIRTIO_ALLOC_VQS_DISABLE_MSIX)
489 		sc->vtpci_flags |= VTPCI_FLAG_NO_MSIX;
490 
491 	for (idx = 0; idx < nvqs; idx++) {
492 		vqx = &sc->vtpci_vqx[idx];
493 		info = &vq_info[idx];
494 
495 		vtpci_select_virtqueue(sc, idx);
496 		size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM);
497 
498 		error = virtqueue_alloc(dev, idx, size, VIRTIO_PCI_VRING_ALIGN,
499 		    0xFFFFFFFFUL, info, &vq);
500 		if (error) {
501 			device_printf(dev,
502 			    "cannot allocate virtqueue %d: %d\n", idx, error);
503 			break;
504 		}
505 
506 		vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN,
507 		    virtqueue_paddr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
508 
509 		vqx->vq = *info->vqai_vq = vq;
510 		vqx->no_intr = info->vqai_intr == NULL;
511 
512 		sc->vtpci_nvqs++;
513 	}
514 
515 	return (error);
516 }
517 
518 static int
519 vtpci_setup_intr(device_t dev, enum intr_type type)
520 {
521 	struct vtpci_softc *sc;
522 	int attempt, error;
523 
524 	sc = device_get_softc(dev);
525 
526 	for (attempt = 0; attempt < 5; attempt++) {
527 		/*
528 		 * Start with the most desirable interrupt configuration and
529 		 * fallback towards less desirable ones.
530 		 */
531 		switch (attempt) {
532 		case 0:
533 			error = vtpci_alloc_intr_msix_pervq(sc);
534 			break;
535 		case 1:
536 			error = vtpci_alloc_intr_msix_shared(sc);
537 			break;
538 		case 2:
539 			error = vtpci_alloc_intr_msi(sc);
540 			break;
541 		case 3:
542 			error = vtpci_alloc_intr_legacy(sc);
543 			break;
544 		default:
545 			device_printf(dev,
546 			    "exhausted all interrupt allocation attempts\n");
547 			return (ENXIO);
548 		}
549 
550 		if (error == 0 && vtpci_setup_interrupts(sc, type) == 0)
551 			break;
552 
553 		vtpci_cleanup_setup_intr_attempt(sc);
554 	}
555 
556 	if (bootverbose) {
557 		if (sc->vtpci_flags & VTPCI_FLAG_LEGACY)
558 			device_printf(dev, "using legacy interrupt\n");
559 		else if (sc->vtpci_flags & VTPCI_FLAG_MSI)
560 			device_printf(dev, "using MSI interrupt\n");
561 		else if (sc->vtpci_flags & VTPCI_FLAG_SHARED_MSIX)
562 			device_printf(dev, "using shared MSIX interrupts\n");
563 		else
564 			device_printf(dev, "using per VQ MSIX interrupts\n");
565 	}
566 
567 	return (0);
568 }
569 
570 static void
571 vtpci_stop(device_t dev)
572 {
573 
574 	vtpci_reset(device_get_softc(dev));
575 }
576 
577 static int
578 vtpci_reinit(device_t dev, uint64_t features)
579 {
580 	struct vtpci_softc *sc;
581 	int idx, error;
582 
583 	sc = device_get_softc(dev);
584 
585 	/*
586 	 * Redrive the device initialization. This is a bit of an abuse of
587 	 * the specification, but VirtualBox, QEMU/KVM, and BHyVe seem to
588 	 * play nice.
589 	 *
590 	 * We do not allow the host device to change from what was originally
591 	 * negotiated beyond what the guest driver changed. MSIX state should
592 	 * not change, number of virtqueues and their size remain the same, etc.
593 	 * This will need to be rethought when we want to support migration.
594 	 */
595 
596 	if (vtpci_get_status(dev) != VIRTIO_CONFIG_STATUS_RESET)
597 		vtpci_stop(dev);
598 
599 	/*
600 	 * Quickly drive the status through ACK and DRIVER. The device
601 	 * does not become usable again until vtpci_reinit_complete().
602 	 */
603 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK);
604 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER);
605 
606 	vtpci_negotiate_features(dev, features);
607 
608 	for (idx = 0; idx < sc->vtpci_nvqs; idx++) {
609 		error = vtpci_reinit_virtqueue(sc, idx);
610 		if (error)
611 			return (error);
612 	}
613 
614 	if (sc->vtpci_flags & VTPCI_FLAG_MSIX) {
615 		error = vtpci_set_host_msix_vectors(sc);
616 		if (error)
617 			return (error);
618 	}
619 
620 	return (0);
621 }
622 
623 static void
624 vtpci_reinit_complete(device_t dev)
625 {
626 
627 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER_OK);
628 }
629 
630 static void
631 vtpci_notify_virtqueue(device_t dev, uint16_t queue)
632 {
633 	struct vtpci_softc *sc;
634 
635 	sc = device_get_softc(dev);
636 
637 	vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_NOTIFY, queue);
638 }
639 
640 static uint8_t
641 vtpci_get_status(device_t dev)
642 {
643 	struct vtpci_softc *sc;
644 
645 	sc = device_get_softc(dev);
646 
647 	return (vtpci_read_config_1(sc, VIRTIO_PCI_STATUS));
648 }
649 
650 static void
651 vtpci_set_status(device_t dev, uint8_t status)
652 {
653 	struct vtpci_softc *sc;
654 
655 	sc = device_get_softc(dev);
656 
657 	if (status != VIRTIO_CONFIG_STATUS_RESET)
658 		status |= vtpci_get_status(dev);
659 
660 	vtpci_write_config_1(sc, VIRTIO_PCI_STATUS, status);
661 }
662 
663 static void
664 vtpci_read_dev_config(device_t dev, bus_size_t offset,
665     void *dst, int length)
666 {
667 	struct vtpci_softc *sc;
668 	bus_size_t off;
669 	uint8_t *d;
670 	int size;
671 
672 	sc = device_get_softc(dev);
673 	off = VIRTIO_PCI_CONFIG(sc) + offset;
674 
675 	for (d = dst; length > 0; d += size, off += size, length -= size) {
676 		if (length >= 4) {
677 			size = 4;
678 			*(uint32_t *)d = vtpci_read_config_4(sc, off);
679 		} else if (length >= 2) {
680 			size = 2;
681 			*(uint16_t *)d = vtpci_read_config_2(sc, off);
682 		} else {
683 			size = 1;
684 			*d = vtpci_read_config_1(sc, off);
685 		}
686 	}
687 }
688 
689 static void
690 vtpci_write_dev_config(device_t dev, bus_size_t offset,
691     void *src, int length)
692 {
693 	struct vtpci_softc *sc;
694 	bus_size_t off;
695 	uint8_t *s;
696 	int size;
697 
698 	sc = device_get_softc(dev);
699 	off = VIRTIO_PCI_CONFIG(sc) + offset;
700 
701 	for (s = src; length > 0; s += size, off += size, length -= size) {
702 		if (length >= 4) {
703 			size = 4;
704 			vtpci_write_config_4(sc, off, *(uint32_t *)s);
705 		} else if (length >= 2) {
706 			size = 2;
707 			vtpci_write_config_2(sc, off, *(uint16_t *)s);
708 		} else {
709 			size = 1;
710 			vtpci_write_config_1(sc, off, *s);
711 		}
712 	}
713 }
714 
715 static void
716 vtpci_describe_features(struct vtpci_softc *sc, const char *msg,
717     uint64_t features)
718 {
719 	device_t dev, child;
720 
721 	dev = sc->vtpci_dev;
722 	child = sc->vtpci_child_dev;
723 
724 	if (device_is_attached(child) && bootverbose == 0)
725 		return;
726 
727 	virtio_describe(dev, msg, features, sc->vtpci_child_feat_desc);
728 }
729 
730 static void
731 vtpci_probe_and_attach_child(struct vtpci_softc *sc)
732 {
733 	device_t dev, child;
734 
735 	dev = sc->vtpci_dev;
736 	child = sc->vtpci_child_dev;
737 
738 	if (child == NULL)
739 		return;
740 
741 	if (device_get_state(child) != DS_NOTPRESENT)
742 		return;
743 
744 	if (device_probe(child) != 0)
745 		return;
746 
747 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER);
748 	if (device_attach(child) != 0) {
749 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_FAILED);
750 		vtpci_reset(sc);
751 		vtpci_release_child_resources(sc);
752 		/* Reset status for future attempt. */
753 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK);
754 	} else
755 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER_OK);
756 }
757 
758 static int
759 vtpci_alloc_msix(struct vtpci_softc *sc, int nvectors)
760 {
761 	device_t dev;
762 	int nmsix, cnt, required;
763 
764 	dev = sc->vtpci_dev;
765 
766 	/* Allocate an additional vector for the config changes. */
767 	required = nvectors + 1;
768 
769 	nmsix = pci_msix_count(dev);
770 	if (nmsix < required)
771 		return (1);
772 
773 	cnt = required;
774 	if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) {
775 		sc->vtpci_nintr_res = required;
776 		return (0);
777 	}
778 
779 	pci_release_msi(dev);
780 
781 	return (1);
782 }
783 
784 static int
785 vtpci_alloc_msi(struct vtpci_softc *sc)
786 {
787 	device_t dev;
788 	int nmsi, cnt, required;
789 
790 	dev = sc->vtpci_dev;
791 	required = 1;
792 
793 	nmsi = pci_msi_count(dev);
794 	if (nmsi < required)
795 		return (1);
796 
797 	cnt = required;
798 	if (pci_alloc_msi(dev, &cnt) == 0 && cnt >= required) {
799 		sc->vtpci_nintr_res = required;
800 		return (0);
801 	}
802 
803 	pci_release_msi(dev);
804 
805 	return (1);
806 }
807 
808 static int
809 vtpci_alloc_intr_msix_pervq(struct vtpci_softc *sc)
810 {
811 	int i, nvectors, error;
812 
813 	if (vtpci_disable_msix != 0 ||
814 	    sc->vtpci_flags & VTPCI_FLAG_NO_MSIX)
815 		return (ENOTSUP);
816 
817 	for (nvectors = 0, i = 0; i < sc->vtpci_nvqs; i++) {
818 		if (sc->vtpci_vqx[i].no_intr == 0)
819 			nvectors++;
820 	}
821 
822 	error = vtpci_alloc_msix(sc, nvectors);
823 	if (error)
824 		return (error);
825 
826 	sc->vtpci_flags |= VTPCI_FLAG_MSIX;
827 
828 	return (0);
829 }
830 
831 static int
832 vtpci_alloc_intr_msix_shared(struct vtpci_softc *sc)
833 {
834 	int error;
835 
836 	if (vtpci_disable_msix != 0 ||
837 	    sc->vtpci_flags & VTPCI_FLAG_NO_MSIX)
838 		return (ENOTSUP);
839 
840 	error = vtpci_alloc_msix(sc, 1);
841 	if (error)
842 		return (error);
843 
844 	sc->vtpci_flags |= VTPCI_FLAG_MSIX | VTPCI_FLAG_SHARED_MSIX;
845 
846 	return (0);
847 }
848 
849 static int
850 vtpci_alloc_intr_msi(struct vtpci_softc *sc)
851 {
852 	int error;
853 
854 	/* Only BHyVe supports MSI. */
855 	if (sc->vtpci_flags & VTPCI_FLAG_NO_MSI)
856 		return (ENOTSUP);
857 
858 	error = vtpci_alloc_msi(sc);
859 	if (error)
860 		return (error);
861 
862 	sc->vtpci_flags |= VTPCI_FLAG_MSI;
863 
864 	return (0);
865 }
866 
867 static int
868 vtpci_alloc_intr_legacy(struct vtpci_softc *sc)
869 {
870 
871 	sc->vtpci_flags |= VTPCI_FLAG_LEGACY;
872 	sc->vtpci_nintr_res = 1;
873 
874 	return (0);
875 }
876 
877 static int
878 vtpci_alloc_intr_resources(struct vtpci_softc *sc)
879 {
880 	device_t dev;
881 	struct resource *irq;
882 	struct vtpci_virtqueue *vqx;
883 	int i, rid, flags, res_idx;
884 
885 	dev = sc->vtpci_dev;
886 
887 	if (sc->vtpci_flags & VTPCI_FLAG_LEGACY) {
888 		rid = 0;
889 		flags = RF_ACTIVE | RF_SHAREABLE;
890 	} else {
891 		rid = 1;
892 		flags = RF_ACTIVE;
893 	}
894 
895 	for (i = 0; i < sc->vtpci_nintr_res; i++) {
896 		irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, flags);
897 		if (irq == NULL)
898 			return (ENXIO);
899 
900 		sc->vtpci_intr_res[i].irq = irq;
901 		sc->vtpci_intr_res[i].rid = rid++;
902 	}
903 
904 	/*
905 	 * Map the virtqueue into the correct index in vq_intr_res[]. The
906 	 * first index is reserved for configuration changed notifications.
907 	 */
908 	for (i = 0, res_idx = 1; i < sc->vtpci_nvqs; i++) {
909 		vqx = &sc->vtpci_vqx[i];
910 
911 		if (sc->vtpci_flags & VTPCI_FLAG_MSIX) {
912 			if (vqx->no_intr != 0)
913 				vqx->ires_idx = -1;
914 			else if (sc->vtpci_flags & VTPCI_FLAG_SHARED_MSIX)
915 				vqx->ires_idx = res_idx;
916 			else
917 				vqx->ires_idx = res_idx++;
918 		} else
919 			vqx->ires_idx = -1;
920 	}
921 
922 	return (0);
923 }
924 
925 static int
926 vtpci_setup_legacy_interrupt(struct vtpci_softc *sc, enum intr_type type)
927 {
928 	device_t dev;
929 	struct vtpci_intr_resource *ires;
930 	int error;
931 
932 	dev = sc->vtpci_dev;
933 
934 	ires = &sc->vtpci_intr_res[0];
935 	error = bus_setup_intr(dev, ires->irq, type, vtpci_legacy_intr, NULL,
936 	    sc, &ires->intrhand);
937 
938 	return (error);
939 }
940 
941 static int
942 vtpci_setup_msix_interrupts(struct vtpci_softc *sc, enum intr_type type)
943 {
944 	device_t dev;
945 	struct vtpci_intr_resource *ires;
946 	struct vtpci_virtqueue *vqx;
947 	int i, error;
948 
949 	dev = sc->vtpci_dev;
950 
951 	/*
952 	 * The first resource is used for configuration changed interrupts.
953 	 */
954 	ires = &sc->vtpci_intr_res[0];
955 	error = bus_setup_intr(dev, ires->irq, type, vtpci_config_intr,
956 	    NULL, sc, &ires->intrhand);
957 	if (error)
958 		return (error);
959 
960 	if (sc->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) {
961 		ires = &sc->vtpci_intr_res[1];
962 
963 		error = bus_setup_intr(dev, ires->irq, type,
964 		    vtpci_vq_shared_intr, NULL, sc, &ires->intrhand);
965 		if (error)
966 			return (error);
967 	} else {
968 		/*
969 		 * Each remaining resource is assigned to a specific virtqueue.
970 		 */
971 		for (i = 0; i < sc->vtpci_nvqs; i++) {
972 			vqx = &sc->vtpci_vqx[i];
973 			if (vqx->ires_idx < 1)
974 				continue;
975 
976 			ires = &sc->vtpci_intr_res[vqx->ires_idx];
977 			error = bus_setup_intr(dev, ires->irq, type,
978 			    vtpci_vq_intr, NULL, vqx->vq, &ires->intrhand);
979 			if (error)
980 				return (error);
981 		}
982 	}
983 
984 	error = vtpci_set_host_msix_vectors(sc);
985 	if (error)
986 		return (error);
987 
988 	return (0);
989 }
990 
991 static int
992 vtpci_setup_interrupts(struct vtpci_softc *sc, enum intr_type type)
993 {
994 	int error;
995 
996 	type |= INTR_MPSAFE;
997 	KASSERT(sc->vtpci_flags & VTPCI_FLAG_ITYPE_MASK,
998 	    ("no interrupt type selected: %#x", sc->vtpci_flags));
999 
1000 	error = vtpci_alloc_intr_resources(sc);
1001 	if (error)
1002 		return (error);
1003 
1004 	if (sc->vtpci_flags & VTPCI_FLAG_LEGACY)
1005 		error = vtpci_setup_legacy_interrupt(sc, type);
1006 	else if (sc->vtpci_flags & VTPCI_FLAG_MSI)
1007 		error = vtpci_setup_msi_interrupt(sc, type);
1008 	else
1009 		error = vtpci_setup_msix_interrupts(sc, type);
1010 
1011 	return (error);
1012 }
1013 
1014 static int
1015 vtpci_register_msix_vector(struct vtpci_softc *sc, int offset, int res_idx)
1016 {
1017 	device_t dev;
1018 	uint16_t vector, rdvector;
1019 
1020 	dev = sc->vtpci_dev;
1021 
1022 	if (res_idx != -1) {
1023 		/* Map from guest rid to host vector. */
1024 		vector = sc->vtpci_intr_res[res_idx].rid - 1;
1025 	} else
1026 		vector = VIRTIO_MSI_NO_VECTOR;
1027 
1028 	/*
1029 	 * Assert the first resource is always used for the configuration
1030 	 * changed interrupts.
1031 	 */
1032 	if (res_idx == 0) {
1033 		KASSERT(vector == 0 && offset == VIRTIO_MSI_CONFIG_VECTOR,
1034 		    ("bad first res use vector:%d offset:%d", vector, offset));
1035 	} else
1036 		KASSERT(offset == VIRTIO_MSI_QUEUE_VECTOR, ("bad offset"));
1037 
1038 	vtpci_write_config_2(sc, offset, vector);
1039 
1040 	/* Read vector to determine if the host had sufficient resources. */
1041 	rdvector = vtpci_read_config_2(sc, offset);
1042 	if (rdvector != vector) {
1043 		device_printf(dev,
1044 		    "insufficient host resources for MSIX interrupts\n");
1045 		return (ENODEV);
1046 	}
1047 
1048 	return (0);
1049 }
1050 
1051 static int
1052 vtpci_set_host_msix_vectors(struct vtpci_softc *sc)
1053 {
1054 	struct vtpci_virtqueue *vqx;
1055 	int idx, error;
1056 
1057 	error = vtpci_register_msix_vector(sc, VIRTIO_MSI_CONFIG_VECTOR, 0);
1058 	if (error)
1059 		return (error);
1060 
1061 	for (idx = 0; idx < sc->vtpci_nvqs; idx++) {
1062 		vqx = &sc->vtpci_vqx[idx];
1063 
1064 		vtpci_select_virtqueue(sc, idx);
1065 		error = vtpci_register_msix_vector(sc, VIRTIO_MSI_QUEUE_VECTOR,
1066 		    vqx->ires_idx);
1067 		if (error)
1068 			return (error);
1069 	}
1070 
1071 	return (0);
1072 }
1073 
1074 static int
1075 vtpci_reinit_virtqueue(struct vtpci_softc *sc, int idx)
1076 {
1077 	struct vtpci_virtqueue *vqx;
1078 	struct virtqueue *vq;
1079 	int error;
1080 	uint16_t size;
1081 
1082 	vqx = &sc->vtpci_vqx[idx];
1083 	vq = vqx->vq;
1084 
1085 	KASSERT(vq != NULL, ("vq %d not allocated", idx));
1086 
1087 	vtpci_select_virtqueue(sc, idx);
1088 	size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM);
1089 
1090 	error = virtqueue_reinit(vq, size);
1091 	if (error)
1092 		return (error);
1093 
1094 	vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN,
1095 	    virtqueue_paddr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
1096 
1097 	return (0);
1098 }
1099 
1100 static void
1101 vtpci_free_interrupts(struct vtpci_softc *sc)
1102 {
1103 	device_t dev;
1104 	struct vtpci_intr_resource *ires;
1105 	int i;
1106 
1107 	dev = sc->vtpci_dev;
1108 
1109 	for (i = 0; i < sc->vtpci_nintr_res; i++) {
1110 		ires = &sc->vtpci_intr_res[i];
1111 
1112 		if (ires->intrhand != NULL) {
1113 			bus_teardown_intr(dev, ires->irq, ires->intrhand);
1114 			ires->intrhand = NULL;
1115 		}
1116 
1117 		if (ires->irq != NULL) {
1118 			bus_release_resource(dev, SYS_RES_IRQ, ires->rid,
1119 			    ires->irq);
1120 			ires->irq = NULL;
1121 		}
1122 
1123 		ires->rid = -1;
1124 	}
1125 
1126 	if (sc->vtpci_flags & (VTPCI_FLAG_MSI | VTPCI_FLAG_MSIX))
1127 		pci_release_msi(dev);
1128 
1129 	sc->vtpci_nintr_res = 0;
1130 	sc->vtpci_flags &= ~VTPCI_FLAG_ITYPE_MASK;
1131 }
1132 
1133 static void
1134 vtpci_free_virtqueues(struct vtpci_softc *sc)
1135 {
1136 	struct vtpci_virtqueue *vqx;
1137 	int i;
1138 
1139 	for (i = 0; i < sc->vtpci_nvqs; i++) {
1140 		vqx = &sc->vtpci_vqx[i];
1141 
1142 		virtqueue_free(vqx->vq);
1143 		vqx->vq = NULL;
1144 	}
1145 
1146 	sc->vtpci_nvqs = 0;
1147 }
1148 
1149 static void
1150 vtpci_cleanup_setup_intr_attempt(struct vtpci_softc *sc)
1151 {
1152 	int idx;
1153 
1154 	if (sc->vtpci_flags & VTPCI_FLAG_MSIX) {
1155 		vtpci_write_config_2(sc, VIRTIO_MSI_CONFIG_VECTOR,
1156 		    VIRTIO_MSI_NO_VECTOR);
1157 
1158 		for (idx = 0; idx < sc->vtpci_nvqs; idx++) {
1159 			vtpci_select_virtqueue(sc, idx);
1160 			vtpci_write_config_2(sc, VIRTIO_MSI_QUEUE_VECTOR,
1161 			    VIRTIO_MSI_NO_VECTOR);
1162 		}
1163 	}
1164 
1165 	vtpci_free_interrupts(sc);
1166 }
1167 
1168 static void
1169 vtpci_release_child_resources(struct vtpci_softc *sc)
1170 {
1171 
1172 	vtpci_free_interrupts(sc);
1173 	vtpci_free_virtqueues(sc);
1174 }
1175 
1176 static void
1177 vtpci_reset(struct vtpci_softc *sc)
1178 {
1179 
1180 	/*
1181 	 * Setting the status to RESET sets the host device to
1182 	 * the original, uninitialized state.
1183 	 */
1184 	vtpci_set_status(sc->vtpci_dev, VIRTIO_CONFIG_STATUS_RESET);
1185 }
1186 
1187 static void
1188 vtpci_select_virtqueue(struct vtpci_softc *sc, int idx)
1189 {
1190 
1191 	vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, idx);
1192 }
1193 
1194 static int
1195 vtpci_legacy_intr(void *xsc)
1196 {
1197 	struct vtpci_softc *sc;
1198 	struct vtpci_virtqueue *vqx;
1199 	int i;
1200 	uint8_t isr;
1201 
1202 	sc = xsc;
1203 	vqx = &sc->vtpci_vqx[0];
1204 
1205 	/* Reading the ISR also clears it. */
1206 	isr = vtpci_read_config_1(sc, VIRTIO_PCI_ISR);
1207 
1208 	if (isr & VIRTIO_PCI_ISR_CONFIG)
1209 		vtpci_config_intr(sc);
1210 
1211 	if (isr & VIRTIO_PCI_ISR_INTR)
1212 		for (i = 0; i < sc->vtpci_nvqs; i++, vqx++)
1213 			virtqueue_intr(vqx->vq);
1214 
1215 	return (isr ? FILTER_HANDLED : FILTER_STRAY);
1216 }
1217 
1218 static int
1219 vtpci_vq_shared_intr(void *xsc)
1220 {
1221 	struct vtpci_softc *sc;
1222 	struct vtpci_virtqueue *vqx;
1223 	int i, rc;
1224 
1225 	rc = 0;
1226 	sc = xsc;
1227 	vqx = &sc->vtpci_vqx[0];
1228 
1229 	for (i = 0; i < sc->vtpci_nvqs; i++, vqx++)
1230 		rc |= virtqueue_intr(vqx->vq);
1231 
1232 	return (rc ? FILTER_HANDLED : FILTER_STRAY);
1233 }
1234 
1235 static int
1236 vtpci_vq_intr(void *xvq)
1237 {
1238 	struct virtqueue *vq;
1239 	int rc;
1240 
1241 	vq = xvq;
1242 	rc = virtqueue_intr(vq);
1243 
1244 	return (rc ? FILTER_HANDLED : FILTER_STRAY);
1245 }
1246 
1247 static int
1248 vtpci_config_intr(void *xsc)
1249 {
1250 	struct vtpci_softc *sc;
1251 	device_t child;
1252 	int rc;
1253 
1254 	rc = 0;
1255 	sc = xsc;
1256 	child = sc->vtpci_child_dev;
1257 
1258 	if (child != NULL)
1259 		rc = VIRTIO_CONFIG_CHANGE(child);
1260 
1261 	return (rc ? FILTER_HANDLED : FILTER_STRAY);
1262 }
1263