xref: /freebsd/sys/dev/virtio/pci/virtio_pci.c (revision cc759c1995237364b02829feb9e5fdd1e6ed2c5b)
1 /*-
2  * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /* Driver for the VirtIO PCI interface. */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/bus.h>
35 #include <sys/kernel.h>
36 #include <sys/module.h>
37 #include <sys/malloc.h>
38 
39 #include <machine/bus.h>
40 #include <machine/resource.h>
41 #include <sys/bus.h>
42 #include <sys/rman.h>
43 
44 #include <dev/pci/pcivar.h>
45 #include <dev/pci/pcireg.h>
46 
47 #include <dev/virtio/virtio.h>
48 #include <dev/virtio/virtqueue.h>
49 #include <dev/virtio/pci/virtio_pci.h>
50 
51 #include "virtio_bus_if.h"
52 #include "virtio_if.h"
53 
54 struct vtpci_interrupt {
55 	struct resource		*vti_irq;
56 	int			 vti_rid;
57 	void			*vti_handler;
58 };
59 
60 struct vtpci_virtqueue {
61 	struct virtqueue	*vtv_vq;
62 	int			 vtv_no_intr;
63 };
64 
65 struct vtpci_softc {
66 	device_t			 vtpci_dev;
67 	struct resource			*vtpci_res;
68 	struct resource			*vtpci_msix_res;
69 	uint64_t			 vtpci_features;
70 	uint32_t			 vtpci_flags;
71 #define VTPCI_FLAG_NO_MSI		0x0001
72 #define VTPCI_FLAG_NO_MSIX		0x0002
73 #define VTPCI_FLAG_LEGACY		0x1000
74 #define VTPCI_FLAG_MSI			0x2000
75 #define VTPCI_FLAG_MSIX			0x4000
76 #define VTPCI_FLAG_SHARED_MSIX		0x8000
77 #define VTPCI_FLAG_ITYPE_MASK		0xF000
78 
79 	/* This "bus" will only ever have one child. */
80 	device_t			 vtpci_child_dev;
81 	struct virtio_feature_desc	*vtpci_child_feat_desc;
82 
83 	int				 vtpci_nvqs;
84 	struct vtpci_virtqueue		*vtpci_vqs;
85 
86 	/*
87 	 * Ideally, each virtqueue that the driver provides a callback for will
88 	 * receive its own MSIX vector. If there are not sufficient vectors
89 	 * available, then attempt to have all the VQs share one vector. For
90 	 * MSIX, the configuration changed notifications must be on their own
91 	 * vector.
92 	 *
93 	 * If MSIX is not available, we will attempt to have the whole device
94 	 * share one MSI vector, and then, finally, one legacy interrupt.
95 	 */
96 	struct vtpci_interrupt		 vtpci_device_interrupt;
97 	struct vtpci_interrupt		*vtpci_msix_vq_interrupts;
98 	int				 vtpci_nmsix_resources;
99 };
100 
101 static int	vtpci_probe(device_t);
102 static int	vtpci_attach(device_t);
103 static int	vtpci_detach(device_t);
104 static int	vtpci_suspend(device_t);
105 static int	vtpci_resume(device_t);
106 static int	vtpci_shutdown(device_t);
107 static void	vtpci_driver_added(device_t, driver_t *);
108 static void	vtpci_child_detached(device_t, device_t);
109 static int	vtpci_read_ivar(device_t, device_t, int, uintptr_t *);
110 static int	vtpci_write_ivar(device_t, device_t, int, uintptr_t);
111 
112 static uint64_t	vtpci_negotiate_features(device_t, uint64_t);
113 static int	vtpci_with_feature(device_t, uint64_t);
114 static int	vtpci_alloc_virtqueues(device_t, int, int,
115 		    struct vq_alloc_info *);
116 static int	vtpci_setup_intr(device_t, enum intr_type);
117 static void	vtpci_stop(device_t);
118 static int	vtpci_reinit(device_t, uint64_t);
119 static void	vtpci_reinit_complete(device_t);
120 static void	vtpci_notify_virtqueue(device_t, uint16_t);
121 static uint8_t	vtpci_get_status(device_t);
122 static void	vtpci_set_status(device_t, uint8_t);
123 static void	vtpci_read_dev_config(device_t, bus_size_t, void *, int);
124 static void	vtpci_write_dev_config(device_t, bus_size_t, void *, int);
125 
126 static void	vtpci_describe_features(struct vtpci_softc *, const char *,
127 		    uint64_t);
128 static void	vtpci_probe_and_attach_child(struct vtpci_softc *);
129 
130 static int	vtpci_alloc_msix(struct vtpci_softc *, int);
131 static int	vtpci_alloc_msi(struct vtpci_softc *);
132 static int	vtpci_alloc_intr_msix_pervq(struct vtpci_softc *);
133 static int	vtpci_alloc_intr_msix_shared(struct vtpci_softc *);
134 static int	vtpci_alloc_intr_msi(struct vtpci_softc *);
135 static int	vtpci_alloc_intr_legacy(struct vtpci_softc *);
136 static int	vtpci_alloc_interrupt(struct vtpci_softc *, int, int,
137 		    struct vtpci_interrupt *);
138 static int	vtpci_alloc_intr_resources(struct vtpci_softc *);
139 
140 static int	vtpci_setup_legacy_interrupt(struct vtpci_softc *,
141 		    enum intr_type);
142 static int	vtpci_setup_pervq_msix_interrupts(struct vtpci_softc *,
143 		    enum intr_type);
144 static int	vtpci_setup_msix_interrupts(struct vtpci_softc *,
145 		    enum intr_type);
146 static int	vtpci_setup_interrupts(struct vtpci_softc *, enum intr_type);
147 
148 static int	vtpci_register_msix_vector(struct vtpci_softc *, int,
149 		    struct vtpci_interrupt *);
150 static int	vtpci_set_host_msix_vectors(struct vtpci_softc *);
151 static int	vtpci_reinit_virtqueue(struct vtpci_softc *, int);
152 
153 static void	vtpci_free_interrupt(struct vtpci_softc *,
154 		    struct vtpci_interrupt *);
155 static void	vtpci_free_interrupts(struct vtpci_softc *);
156 static void	vtpci_free_virtqueues(struct vtpci_softc *);
157 static void	vtpci_release_child_resources(struct vtpci_softc *);
158 static void	vtpci_cleanup_setup_intr_attempt(struct vtpci_softc *);
159 static void	vtpci_reset(struct vtpci_softc *);
160 
161 static void	vtpci_select_virtqueue(struct vtpci_softc *, int);
162 
163 static void	vtpci_legacy_intr(void *);
164 static int	vtpci_vq_shared_intr_filter(void *);
165 static void	vtpci_vq_shared_intr(void *);
166 static int	vtpci_vq_intr_filter(void *);
167 static void	vtpci_vq_intr(void *);
168 static void	vtpci_config_intr(void *);
169 
170 #define vtpci_setup_msi_interrupt vtpci_setup_legacy_interrupt
171 
172 /*
173  * I/O port read/write wrappers.
174  */
175 #define vtpci_read_config_1(sc, o)	bus_read_1((sc)->vtpci_res, (o))
176 #define vtpci_read_config_2(sc, o)	bus_read_2((sc)->vtpci_res, (o))
177 #define vtpci_read_config_4(sc, o)	bus_read_4((sc)->vtpci_res, (o))
178 #define vtpci_write_config_1(sc, o, v)	bus_write_1((sc)->vtpci_res, (o), (v))
179 #define vtpci_write_config_2(sc, o, v)	bus_write_2((sc)->vtpci_res, (o), (v))
180 #define vtpci_write_config_4(sc, o, v)	bus_write_4((sc)->vtpci_res, (o), (v))
181 
182 /* Tunables. */
183 static int vtpci_disable_msix = 0;
184 TUNABLE_INT("hw.virtio.pci.disable_msix", &vtpci_disable_msix);
185 
186 static device_method_t vtpci_methods[] = {
187 	/* Device interface. */
188 	DEVMETHOD(device_probe,			  vtpci_probe),
189 	DEVMETHOD(device_attach,		  vtpci_attach),
190 	DEVMETHOD(device_detach,		  vtpci_detach),
191 	DEVMETHOD(device_suspend,		  vtpci_suspend),
192 	DEVMETHOD(device_resume,		  vtpci_resume),
193 	DEVMETHOD(device_shutdown,		  vtpci_shutdown),
194 
195 	/* Bus interface. */
196 	DEVMETHOD(bus_driver_added,		  vtpci_driver_added),
197 	DEVMETHOD(bus_child_detached,		  vtpci_child_detached),
198 	DEVMETHOD(bus_read_ivar,		  vtpci_read_ivar),
199 	DEVMETHOD(bus_write_ivar,		  vtpci_write_ivar),
200 
201 	/* VirtIO bus interface. */
202 	DEVMETHOD(virtio_bus_negotiate_features,  vtpci_negotiate_features),
203 	DEVMETHOD(virtio_bus_with_feature,	  vtpci_with_feature),
204 	DEVMETHOD(virtio_bus_alloc_virtqueues,	  vtpci_alloc_virtqueues),
205 	DEVMETHOD(virtio_bus_setup_intr,	  vtpci_setup_intr),
206 	DEVMETHOD(virtio_bus_stop,		  vtpci_stop),
207 	DEVMETHOD(virtio_bus_reinit,		  vtpci_reinit),
208 	DEVMETHOD(virtio_bus_reinit_complete,	  vtpci_reinit_complete),
209 	DEVMETHOD(virtio_bus_notify_vq,		  vtpci_notify_virtqueue),
210 	DEVMETHOD(virtio_bus_read_device_config,  vtpci_read_dev_config),
211 	DEVMETHOD(virtio_bus_write_device_config, vtpci_write_dev_config),
212 
213 	DEVMETHOD_END
214 };
215 
216 static driver_t vtpci_driver = {
217 	"virtio_pci",
218 	vtpci_methods,
219 	sizeof(struct vtpci_softc)
220 };
221 
222 devclass_t vtpci_devclass;
223 
224 DRIVER_MODULE(virtio_pci, pci, vtpci_driver, vtpci_devclass, 0, 0);
225 MODULE_VERSION(virtio_pci, 1);
226 MODULE_DEPEND(virtio_pci, pci, 1, 1, 1);
227 MODULE_DEPEND(virtio_pci, virtio, 1, 1, 1);
228 
229 static int
230 vtpci_probe(device_t dev)
231 {
232 	char desc[36];
233 	const char *name;
234 
235 	if (pci_get_vendor(dev) != VIRTIO_PCI_VENDORID)
236 		return (ENXIO);
237 
238 	if (pci_get_device(dev) < VIRTIO_PCI_DEVICEID_MIN ||
239 	    pci_get_device(dev) > VIRTIO_PCI_DEVICEID_MAX)
240 		return (ENXIO);
241 
242 	if (pci_get_revid(dev) != VIRTIO_PCI_ABI_VERSION)
243 		return (ENXIO);
244 
245 	name = virtio_device_name(pci_get_subdevice(dev));
246 	if (name == NULL)
247 		name = "Unknown";
248 
249 	snprintf(desc, sizeof(desc), "VirtIO PCI %s adapter", name);
250 	device_set_desc_copy(dev, desc);
251 
252 	return (BUS_PROBE_DEFAULT);
253 }
254 
255 static int
256 vtpci_attach(device_t dev)
257 {
258 	struct vtpci_softc *sc;
259 	device_t child;
260 	int rid;
261 
262 	sc = device_get_softc(dev);
263 	sc->vtpci_dev = dev;
264 
265 	pci_enable_busmaster(dev);
266 
267 	rid = PCIR_BAR(0);
268 	sc->vtpci_res = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &rid,
269 	    RF_ACTIVE);
270 	if (sc->vtpci_res == NULL) {
271 		device_printf(dev, "cannot map I/O space\n");
272 		return (ENXIO);
273 	}
274 
275 	if (pci_find_cap(dev, PCIY_MSI, NULL) != 0)
276 		sc->vtpci_flags |= VTPCI_FLAG_NO_MSI;
277 
278 	if (pci_find_cap(dev, PCIY_MSIX, NULL) == 0) {
279 		rid = PCIR_BAR(1);
280 		sc->vtpci_msix_res = bus_alloc_resource_any(dev,
281 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
282 	}
283 
284 	if (sc->vtpci_msix_res == NULL)
285 		sc->vtpci_flags |= VTPCI_FLAG_NO_MSIX;
286 
287 	vtpci_reset(sc);
288 
289 	/* Tell the host we've noticed this device. */
290 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK);
291 
292 	if ((child = device_add_child(dev, NULL, -1)) == NULL) {
293 		device_printf(dev, "cannot create child device\n");
294 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_FAILED);
295 		vtpci_detach(dev);
296 		return (ENOMEM);
297 	}
298 
299 	sc->vtpci_child_dev = child;
300 	vtpci_probe_and_attach_child(sc);
301 
302 	return (0);
303 }
304 
305 static int
306 vtpci_detach(device_t dev)
307 {
308 	struct vtpci_softc *sc;
309 	device_t child;
310 	int error;
311 
312 	sc = device_get_softc(dev);
313 
314 	if ((child = sc->vtpci_child_dev) != NULL) {
315 		error = device_delete_child(dev, child);
316 		if (error)
317 			return (error);
318 		sc->vtpci_child_dev = NULL;
319 	}
320 
321 	vtpci_reset(sc);
322 
323 	if (sc->vtpci_msix_res != NULL) {
324 		bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(1),
325 		    sc->vtpci_msix_res);
326 		sc->vtpci_msix_res = NULL;
327 	}
328 
329 	if (sc->vtpci_res != NULL) {
330 		bus_release_resource(dev, SYS_RES_IOPORT, PCIR_BAR(0),
331 		    sc->vtpci_res);
332 		sc->vtpci_res = NULL;
333 	}
334 
335 	return (0);
336 }
337 
338 static int
339 vtpci_suspend(device_t dev)
340 {
341 
342 	return (bus_generic_suspend(dev));
343 }
344 
345 static int
346 vtpci_resume(device_t dev)
347 {
348 
349 	return (bus_generic_resume(dev));
350 }
351 
352 static int
353 vtpci_shutdown(device_t dev)
354 {
355 
356 	(void) bus_generic_shutdown(dev);
357 	/* Forcibly stop the host device. */
358 	vtpci_stop(dev);
359 
360 	return (0);
361 }
362 
363 static void
364 vtpci_driver_added(device_t dev, driver_t *driver)
365 {
366 	struct vtpci_softc *sc;
367 
368 	sc = device_get_softc(dev);
369 
370 	vtpci_probe_and_attach_child(sc);
371 }
372 
373 static void
374 vtpci_child_detached(device_t dev, device_t child)
375 {
376 	struct vtpci_softc *sc;
377 
378 	sc = device_get_softc(dev);
379 
380 	vtpci_reset(sc);
381 	vtpci_release_child_resources(sc);
382 }
383 
384 static int
385 vtpci_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
386 {
387 	struct vtpci_softc *sc;
388 
389 	sc = device_get_softc(dev);
390 
391 	if (sc->vtpci_child_dev != child)
392 		return (ENOENT);
393 
394 	switch (index) {
395 	case VIRTIO_IVAR_DEVTYPE:
396 	case VIRTIO_IVAR_SUBDEVICE:
397 		*result = pci_get_subdevice(dev);
398 		break;
399 	case VIRTIO_IVAR_VENDOR:
400 		*result = pci_get_vendor(dev);
401 		break;
402 	case VIRTIO_IVAR_DEVICE:
403 		*result = pci_get_device(dev);
404 		break;
405 	case VIRTIO_IVAR_SUBVENDOR:
406 		*result = pci_get_subdevice(dev);
407 		break;
408 	default:
409 		return (ENOENT);
410 	}
411 
412 	return (0);
413 }
414 
415 static int
416 vtpci_write_ivar(device_t dev, device_t child, int index, uintptr_t value)
417 {
418 	struct vtpci_softc *sc;
419 
420 	sc = device_get_softc(dev);
421 
422 	if (sc->vtpci_child_dev != child)
423 		return (ENOENT);
424 
425 	switch (index) {
426 	case VIRTIO_IVAR_FEATURE_DESC:
427 		sc->vtpci_child_feat_desc = (void *) value;
428 		break;
429 	default:
430 		return (ENOENT);
431 	}
432 
433 	return (0);
434 }
435 
436 static uint64_t
437 vtpci_negotiate_features(device_t dev, uint64_t child_features)
438 {
439 	struct vtpci_softc *sc;
440 	uint64_t host_features, features;
441 
442 	sc = device_get_softc(dev);
443 
444 	host_features = vtpci_read_config_4(sc, VIRTIO_PCI_HOST_FEATURES);
445 	vtpci_describe_features(sc, "host", host_features);
446 
447 	/*
448 	 * Limit negotiated features to what the driver, virtqueue, and
449 	 * host all support.
450 	 */
451 	features = host_features & child_features;
452 	features = virtqueue_filter_features(features);
453 	sc->vtpci_features = features;
454 
455 	vtpci_describe_features(sc, "negotiated", features);
456 	vtpci_write_config_4(sc, VIRTIO_PCI_GUEST_FEATURES, features);
457 
458 	return (features);
459 }
460 
461 static int
462 vtpci_with_feature(device_t dev, uint64_t feature)
463 {
464 	struct vtpci_softc *sc;
465 
466 	sc = device_get_softc(dev);
467 
468 	return ((sc->vtpci_features & feature) != 0);
469 }
470 
471 static int
472 vtpci_alloc_virtqueues(device_t dev, int flags, int nvqs,
473     struct vq_alloc_info *vq_info)
474 {
475 	struct vtpci_softc *sc;
476 	struct virtqueue *vq;
477 	struct vtpci_virtqueue *vqx;
478 	struct vq_alloc_info *info;
479 	int idx, error;
480 	uint16_t size;
481 
482 	sc = device_get_softc(dev);
483 
484 	if (sc->vtpci_nvqs != 0)
485 		return (EALREADY);
486 	if (nvqs <= 0)
487 		return (EINVAL);
488 
489 	sc->vtpci_vqs = malloc(nvqs * sizeof(struct vtpci_virtqueue),
490 	    M_DEVBUF, M_NOWAIT | M_ZERO);
491 	if (sc->vtpci_vqs == NULL)
492 		return (ENOMEM);
493 
494 	for (idx = 0; idx < nvqs; idx++) {
495 		vqx = &sc->vtpci_vqs[idx];
496 		info = &vq_info[idx];
497 
498 		vtpci_select_virtqueue(sc, idx);
499 		size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM);
500 
501 		error = virtqueue_alloc(dev, idx, size, VIRTIO_PCI_VRING_ALIGN,
502 		    0xFFFFFFFFUL, info, &vq);
503 		if (error) {
504 			device_printf(dev,
505 			    "cannot allocate virtqueue %d: %d\n", idx, error);
506 			break;
507 		}
508 
509 		vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN,
510 		    virtqueue_paddr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
511 
512 		vqx->vtv_vq = *info->vqai_vq = vq;
513 		vqx->vtv_no_intr = info->vqai_intr == NULL;
514 
515 		sc->vtpci_nvqs++;
516 	}
517 
518 	if (error)
519 		vtpci_free_virtqueues(sc);
520 
521 	return (error);
522 }
523 
524 static int
525 vtpci_setup_intr(device_t dev, enum intr_type type)
526 {
527 	struct vtpci_softc *sc;
528 	int attempt, error;
529 
530 	sc = device_get_softc(dev);
531 
532 	for (attempt = 0; attempt < 5; attempt++) {
533 		/*
534 		 * Start with the most desirable interrupt configuration and
535 		 * fallback towards less desirable ones.
536 		 */
537 		switch (attempt) {
538 		case 0:
539 			error = vtpci_alloc_intr_msix_pervq(sc);
540 			break;
541 		case 1:
542 			error = vtpci_alloc_intr_msix_shared(sc);
543 			break;
544 		case 2:
545 			error = vtpci_alloc_intr_msi(sc);
546 			break;
547 		case 3:
548 			error = vtpci_alloc_intr_legacy(sc);
549 			break;
550 		default:
551 			device_printf(dev,
552 			    "exhausted all interrupt allocation attempts\n");
553 			return (ENXIO);
554 		}
555 
556 		if (error == 0 && vtpci_setup_interrupts(sc, type) == 0)
557 			break;
558 
559 		vtpci_cleanup_setup_intr_attempt(sc);
560 	}
561 
562 	if (bootverbose) {
563 		if (sc->vtpci_flags & VTPCI_FLAG_LEGACY)
564 			device_printf(dev, "using legacy interrupt\n");
565 		else if (sc->vtpci_flags & VTPCI_FLAG_MSI)
566 			device_printf(dev, "using MSI interrupt\n");
567 		else if (sc->vtpci_flags & VTPCI_FLAG_SHARED_MSIX)
568 			device_printf(dev, "using shared MSIX interrupts\n");
569 		else
570 			device_printf(dev, "using per VQ MSIX interrupts\n");
571 	}
572 
573 	return (0);
574 }
575 
576 static void
577 vtpci_stop(device_t dev)
578 {
579 
580 	vtpci_reset(device_get_softc(dev));
581 }
582 
583 static int
584 vtpci_reinit(device_t dev, uint64_t features)
585 {
586 	struct vtpci_softc *sc;
587 	int idx, error;
588 
589 	sc = device_get_softc(dev);
590 
591 	/*
592 	 * Redrive the device initialization. This is a bit of an abuse of
593 	 * the specification, but VirtualBox, QEMU/KVM, and BHyVe seem to
594 	 * play nice.
595 	 *
596 	 * We do not allow the host device to change from what was originally
597 	 * negotiated beyond what the guest driver changed. MSIX state should
598 	 * not change, number of virtqueues and their size remain the same, etc.
599 	 * This will need to be rethought when we want to support migration.
600 	 */
601 
602 	if (vtpci_get_status(dev) != VIRTIO_CONFIG_STATUS_RESET)
603 		vtpci_stop(dev);
604 
605 	/*
606 	 * Quickly drive the status through ACK and DRIVER. The device
607 	 * does not become usable again until vtpci_reinit_complete().
608 	 */
609 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK);
610 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER);
611 
612 	vtpci_negotiate_features(dev, features);
613 
614 	for (idx = 0; idx < sc->vtpci_nvqs; idx++) {
615 		error = vtpci_reinit_virtqueue(sc, idx);
616 		if (error)
617 			return (error);
618 	}
619 
620 	if (sc->vtpci_flags & VTPCI_FLAG_MSIX) {
621 		error = vtpci_set_host_msix_vectors(sc);
622 		if (error)
623 			return (error);
624 	}
625 
626 	return (0);
627 }
628 
629 static void
630 vtpci_reinit_complete(device_t dev)
631 {
632 
633 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER_OK);
634 }
635 
636 static void
637 vtpci_notify_virtqueue(device_t dev, uint16_t queue)
638 {
639 	struct vtpci_softc *sc;
640 
641 	sc = device_get_softc(dev);
642 
643 	vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_NOTIFY, queue);
644 }
645 
646 static uint8_t
647 vtpci_get_status(device_t dev)
648 {
649 	struct vtpci_softc *sc;
650 
651 	sc = device_get_softc(dev);
652 
653 	return (vtpci_read_config_1(sc, VIRTIO_PCI_STATUS));
654 }
655 
656 static void
657 vtpci_set_status(device_t dev, uint8_t status)
658 {
659 	struct vtpci_softc *sc;
660 
661 	sc = device_get_softc(dev);
662 
663 	if (status != VIRTIO_CONFIG_STATUS_RESET)
664 		status |= vtpci_get_status(dev);
665 
666 	vtpci_write_config_1(sc, VIRTIO_PCI_STATUS, status);
667 }
668 
669 static void
670 vtpci_read_dev_config(device_t dev, bus_size_t offset,
671     void *dst, int length)
672 {
673 	struct vtpci_softc *sc;
674 	bus_size_t off;
675 	uint8_t *d;
676 	int size;
677 
678 	sc = device_get_softc(dev);
679 	off = VIRTIO_PCI_CONFIG(sc) + offset;
680 
681 	for (d = dst; length > 0; d += size, off += size, length -= size) {
682 		if (length >= 4) {
683 			size = 4;
684 			*(uint32_t *)d = vtpci_read_config_4(sc, off);
685 		} else if (length >= 2) {
686 			size = 2;
687 			*(uint16_t *)d = vtpci_read_config_2(sc, off);
688 		} else {
689 			size = 1;
690 			*d = vtpci_read_config_1(sc, off);
691 		}
692 	}
693 }
694 
695 static void
696 vtpci_write_dev_config(device_t dev, bus_size_t offset,
697     void *src, int length)
698 {
699 	struct vtpci_softc *sc;
700 	bus_size_t off;
701 	uint8_t *s;
702 	int size;
703 
704 	sc = device_get_softc(dev);
705 	off = VIRTIO_PCI_CONFIG(sc) + offset;
706 
707 	for (s = src; length > 0; s += size, off += size, length -= size) {
708 		if (length >= 4) {
709 			size = 4;
710 			vtpci_write_config_4(sc, off, *(uint32_t *)s);
711 		} else if (length >= 2) {
712 			size = 2;
713 			vtpci_write_config_2(sc, off, *(uint16_t *)s);
714 		} else {
715 			size = 1;
716 			vtpci_write_config_1(sc, off, *s);
717 		}
718 	}
719 }
720 
721 static void
722 vtpci_describe_features(struct vtpci_softc *sc, const char *msg,
723     uint64_t features)
724 {
725 	device_t dev, child;
726 
727 	dev = sc->vtpci_dev;
728 	child = sc->vtpci_child_dev;
729 
730 	if (device_is_attached(child) && bootverbose == 0)
731 		return;
732 
733 	virtio_describe(dev, msg, features, sc->vtpci_child_feat_desc);
734 }
735 
736 static void
737 vtpci_probe_and_attach_child(struct vtpci_softc *sc)
738 {
739 	device_t dev, child;
740 
741 	dev = sc->vtpci_dev;
742 	child = sc->vtpci_child_dev;
743 
744 	if (child == NULL)
745 		return;
746 
747 	if (device_get_state(child) != DS_NOTPRESENT)
748 		return;
749 
750 	if (device_probe(child) != 0)
751 		return;
752 
753 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER);
754 	if (device_attach(child) != 0) {
755 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_FAILED);
756 		vtpci_reset(sc);
757 		vtpci_release_child_resources(sc);
758 		/* Reset status for future attempt. */
759 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK);
760 	} else
761 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER_OK);
762 }
763 
764 static int
765 vtpci_alloc_msix(struct vtpci_softc *sc, int nvectors)
766 {
767 	device_t dev;
768 	int nmsix, cnt, required;
769 
770 	dev = sc->vtpci_dev;
771 
772 	/* Allocate an additional vector for the config changes. */
773 	required = nvectors + 1;
774 
775 	nmsix = pci_msix_count(dev);
776 	if (nmsix < required)
777 		return (1);
778 
779 	cnt = required;
780 	if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) {
781 		sc->vtpci_nmsix_resources = required;
782 		return (0);
783 	}
784 
785 	pci_release_msi(dev);
786 
787 	return (1);
788 }
789 
790 static int
791 vtpci_alloc_msi(struct vtpci_softc *sc)
792 {
793 	device_t dev;
794 	int nmsi, cnt, required;
795 
796 	dev = sc->vtpci_dev;
797 	required = 1;
798 
799 	nmsi = pci_msi_count(dev);
800 	if (nmsi < required)
801 		return (1);
802 
803 	cnt = required;
804 	if (pci_alloc_msi(dev, &cnt) == 0 && cnt >= required)
805 		return (0);
806 
807 	pci_release_msi(dev);
808 
809 	return (1);
810 }
811 
812 static int
813 vtpci_alloc_intr_msix_pervq(struct vtpci_softc *sc)
814 {
815 	int i, nvectors, error;
816 
817 	if (vtpci_disable_msix != 0 ||
818 	    sc->vtpci_flags & VTPCI_FLAG_NO_MSIX)
819 		return (ENOTSUP);
820 
821 	for (nvectors = 0, i = 0; i < sc->vtpci_nvqs; i++) {
822 		if (sc->vtpci_vqs[i].vtv_no_intr == 0)
823 			nvectors++;
824 	}
825 
826 	error = vtpci_alloc_msix(sc, nvectors);
827 	if (error)
828 		return (error);
829 
830 	sc->vtpci_flags |= VTPCI_FLAG_MSIX;
831 
832 	return (0);
833 }
834 
835 static int
836 vtpci_alloc_intr_msix_shared(struct vtpci_softc *sc)
837 {
838 	int error;
839 
840 	if (vtpci_disable_msix != 0 ||
841 	    sc->vtpci_flags & VTPCI_FLAG_NO_MSIX)
842 		return (ENOTSUP);
843 
844 	error = vtpci_alloc_msix(sc, 1);
845 	if (error)
846 		return (error);
847 
848 	sc->vtpci_flags |= VTPCI_FLAG_MSIX | VTPCI_FLAG_SHARED_MSIX;
849 
850 	return (0);
851 }
852 
853 static int
854 vtpci_alloc_intr_msi(struct vtpci_softc *sc)
855 {
856 	int error;
857 
858 	/* Only BHyVe supports MSI. */
859 	if (sc->vtpci_flags & VTPCI_FLAG_NO_MSI)
860 		return (ENOTSUP);
861 
862 	error = vtpci_alloc_msi(sc);
863 	if (error)
864 		return (error);
865 
866 	sc->vtpci_flags |= VTPCI_FLAG_MSI;
867 
868 	return (0);
869 }
870 
871 static int
872 vtpci_alloc_intr_legacy(struct vtpci_softc *sc)
873 {
874 
875 	sc->vtpci_flags |= VTPCI_FLAG_LEGACY;
876 
877 	return (0);
878 }
879 
880 static int
881 vtpci_alloc_interrupt(struct vtpci_softc *sc, int rid, int flags,
882     struct vtpci_interrupt *intr)
883 {
884 	struct resource *irq;
885 
886 	irq = bus_alloc_resource_any(sc->vtpci_dev, SYS_RES_IRQ, &rid, flags);
887 	if (irq == NULL)
888 		return (ENXIO);
889 
890 	intr->vti_irq = irq;
891 	intr->vti_rid = rid;
892 
893 	return (0);
894 }
895 
896 static int
897 vtpci_alloc_intr_resources(struct vtpci_softc *sc)
898 {
899 	struct vtpci_interrupt *intr;
900 	int i, rid, flags, nvq_intrs, error;
901 
902 	rid = 0;
903 	flags = RF_ACTIVE;
904 
905 	if (sc->vtpci_flags & VTPCI_FLAG_LEGACY)
906 		flags |= RF_SHAREABLE;
907 	else
908 		rid = 1;
909 
910 	/*
911 	 * For legacy and MSI interrupts, this single resource handles all
912 	 * interrupts. For MSIX, this resource is used for the configuration
913 	 * changed interrupt.
914 	 */
915 	intr = &sc->vtpci_device_interrupt;
916 	error = vtpci_alloc_interrupt(sc, rid, flags, intr);
917 	if (error || sc->vtpci_flags & (VTPCI_FLAG_LEGACY | VTPCI_FLAG_MSI))
918 		return (error);
919 
920 	/* Subtract one for the configuration changed interrupt. */
921 	nvq_intrs = sc->vtpci_nmsix_resources - 1;
922 
923 	intr = sc->vtpci_msix_vq_interrupts = malloc(nvq_intrs *
924 	    sizeof(struct vtpci_interrupt), M_DEVBUF, M_NOWAIT | M_ZERO);
925 	if (sc->vtpci_msix_vq_interrupts == NULL)
926 		return (ENOMEM);
927 
928 	for (i = 0, rid++; i < nvq_intrs; i++, rid++, intr++) {
929 		error = vtpci_alloc_interrupt(sc, rid, flags, intr);
930 		if (error)
931 			return (error);
932 	}
933 
934 	return (0);
935 }
936 
937 static int
938 vtpci_setup_legacy_interrupt(struct vtpci_softc *sc, enum intr_type type)
939 {
940 	struct vtpci_interrupt *intr;
941 	int error;
942 
943 	intr = &sc->vtpci_device_interrupt;
944 	error = bus_setup_intr(sc->vtpci_dev, intr->vti_irq, type, NULL,
945 	    vtpci_legacy_intr, sc, &intr->vti_handler);
946 
947 	return (error);
948 }
949 
950 static int
951 vtpci_setup_pervq_msix_interrupts(struct vtpci_softc *sc, enum intr_type type)
952 {
953 	struct vtpci_virtqueue *vqx;
954 	struct vtpci_interrupt *intr;
955 	int i, error;
956 
957 	intr = sc->vtpci_msix_vq_interrupts;
958 
959 	for (i = 0; i < sc->vtpci_nvqs; i++) {
960 		vqx = &sc->vtpci_vqs[i];
961 
962 		if (vqx->vtv_no_intr)
963 			continue;
964 
965 		error = bus_setup_intr(sc->vtpci_dev, intr->vti_irq, type,
966 		    vtpci_vq_intr_filter, vtpci_vq_intr, vqx->vtv_vq,
967 		    &intr->vti_handler);
968 		if (error)
969 			return (error);
970 
971 		intr++;
972 	}
973 
974 	return (0);
975 }
976 
977 static int
978 vtpci_setup_msix_interrupts(struct vtpci_softc *sc, enum intr_type type)
979 {
980 	device_t dev;
981 	struct vtpci_interrupt *intr;
982 	int error;
983 
984 	dev = sc->vtpci_dev;
985 	intr = &sc->vtpci_device_interrupt;
986 
987 	error = bus_setup_intr(dev, intr->vti_irq, type, NULL,
988 	    vtpci_config_intr, sc, &intr->vti_handler);
989 	if (error)
990 		return (error);
991 
992 	if (sc->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) {
993 		intr = sc->vtpci_msix_vq_interrupts;
994 		error = bus_setup_intr(dev, intr->vti_irq, type,
995 		    vtpci_vq_shared_intr_filter, vtpci_vq_shared_intr, sc,
996 		    &intr->vti_handler);
997 	} else
998 		error = vtpci_setup_pervq_msix_interrupts(sc, type);
999 
1000 	return (error ? error : vtpci_set_host_msix_vectors(sc));
1001 }
1002 
1003 static int
1004 vtpci_setup_interrupts(struct vtpci_softc *sc, enum intr_type type)
1005 {
1006 	int error;
1007 
1008 	type |= INTR_MPSAFE;
1009 	KASSERT(sc->vtpci_flags & VTPCI_FLAG_ITYPE_MASK,
1010 	    ("%s: no interrupt type selected %#x", __func__, sc->vtpci_flags));
1011 
1012 	error = vtpci_alloc_intr_resources(sc);
1013 	if (error)
1014 		return (error);
1015 
1016 	if (sc->vtpci_flags & VTPCI_FLAG_LEGACY)
1017 		error = vtpci_setup_legacy_interrupt(sc, type);
1018 	else if (sc->vtpci_flags & VTPCI_FLAG_MSI)
1019 		error = vtpci_setup_msi_interrupt(sc, type);
1020 	else
1021 		error = vtpci_setup_msix_interrupts(sc, type);
1022 
1023 	return (error);
1024 }
1025 
1026 static int
1027 vtpci_register_msix_vector(struct vtpci_softc *sc, int offset,
1028     struct vtpci_interrupt *intr)
1029 {
1030 	device_t dev;
1031 	uint16_t vector;
1032 
1033 	dev = sc->vtpci_dev;
1034 
1035 	if (intr != NULL) {
1036 		/* Map from guest rid to host vector. */
1037 		vector = intr->vti_rid - 1;
1038 	} else
1039 		vector = VIRTIO_MSI_NO_VECTOR;
1040 
1041 	vtpci_write_config_2(sc, offset, vector);
1042 
1043 	/* Read vector to determine if the host had sufficient resources. */
1044 	if (vtpci_read_config_2(sc, offset) != vector) {
1045 		device_printf(dev,
1046 		    "insufficient host resources for MSIX interrupts\n");
1047 		return (ENODEV);
1048 	}
1049 
1050 	return (0);
1051 }
1052 
1053 static int
1054 vtpci_set_host_msix_vectors(struct vtpci_softc *sc)
1055 {
1056 	struct vtpci_interrupt *intr, *tintr;
1057 	int idx, offset, error;
1058 
1059 	intr = &sc->vtpci_device_interrupt;
1060 	offset = VIRTIO_MSI_CONFIG_VECTOR;
1061 
1062 	error = vtpci_register_msix_vector(sc, offset, intr);
1063 	if (error)
1064 		return (error);
1065 
1066 	intr = sc->vtpci_msix_vq_interrupts;
1067 	offset = VIRTIO_MSI_QUEUE_VECTOR;
1068 
1069 	for (idx = 0; idx < sc->vtpci_nvqs; idx++) {
1070 		vtpci_select_virtqueue(sc, idx);
1071 
1072 		if (sc->vtpci_vqs[idx].vtv_no_intr)
1073 			tintr = NULL;
1074 		else
1075 			tintr = intr;
1076 
1077 		error = vtpci_register_msix_vector(sc, offset, tintr);
1078 		if (error)
1079 			break;
1080 
1081 		/*
1082 		 * For shared MSIX, all the virtqueues share the first
1083 		 * interrupt.
1084 		 */
1085 		if ((sc->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) == 0)
1086 			intr++;
1087 	}
1088 
1089 	return (error);
1090 }
1091 
1092 static int
1093 vtpci_reinit_virtqueue(struct vtpci_softc *sc, int idx)
1094 {
1095 	struct vtpci_virtqueue *vqx;
1096 	struct virtqueue *vq;
1097 	int error;
1098 	uint16_t size;
1099 
1100 	vqx = &sc->vtpci_vqs[idx];
1101 	vq = vqx->vtv_vq;
1102 
1103 	KASSERT(vq != NULL, ("%s: vq %d not allocated", __func__, idx));
1104 
1105 	vtpci_select_virtqueue(sc, idx);
1106 	size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM);
1107 
1108 	error = virtqueue_reinit(vq, size);
1109 	if (error)
1110 		return (error);
1111 
1112 	vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN,
1113 	    virtqueue_paddr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
1114 
1115 	return (0);
1116 }
1117 
1118 static void
1119 vtpci_free_interrupt(struct vtpci_softc *sc, struct vtpci_interrupt *intr)
1120 {
1121 	device_t dev;
1122 
1123 	dev = sc->vtpci_dev;
1124 
1125 	if (intr->vti_handler != NULL) {
1126 		bus_teardown_intr(dev, intr->vti_irq, intr->vti_handler);
1127 		intr->vti_handler = NULL;
1128 	}
1129 
1130 	if (intr->vti_irq != NULL) {
1131 		bus_release_resource(dev, SYS_RES_IRQ, intr->vti_rid,
1132 		    intr->vti_irq);
1133 		intr->vti_irq = NULL;
1134 		intr->vti_rid = -1;
1135 	}
1136 }
1137 
1138 static void
1139 vtpci_free_interrupts(struct vtpci_softc *sc)
1140 {
1141 	struct vtpci_interrupt *intr;
1142 	int i, nvq_intrs;
1143 
1144 	vtpci_free_interrupt(sc, &sc->vtpci_device_interrupt);
1145 
1146 	if (sc->vtpci_nmsix_resources != 0) {
1147 		nvq_intrs = sc->vtpci_nmsix_resources - 1;
1148 		sc->vtpci_nmsix_resources = 0;
1149 
1150 		intr = sc->vtpci_msix_vq_interrupts;
1151 		if (intr != NULL) {
1152 			for (i = 0; i < nvq_intrs; i++, intr++)
1153 				vtpci_free_interrupt(sc, intr);
1154 
1155 			free(sc->vtpci_msix_vq_interrupts, M_DEVBUF);
1156 			sc->vtpci_msix_vq_interrupts = NULL;
1157 		}
1158 	}
1159 
1160 	if (sc->vtpci_flags & (VTPCI_FLAG_MSI | VTPCI_FLAG_MSIX))
1161 		pci_release_msi(sc->vtpci_dev);
1162 
1163 	sc->vtpci_flags &= ~VTPCI_FLAG_ITYPE_MASK;
1164 }
1165 
1166 static void
1167 vtpci_free_virtqueues(struct vtpci_softc *sc)
1168 {
1169 	struct vtpci_virtqueue *vqx;
1170 	int idx;
1171 
1172 	for (idx = 0; idx < sc->vtpci_nvqs; idx++) {
1173 		vqx = &sc->vtpci_vqs[idx];
1174 
1175 		vtpci_select_virtqueue(sc, idx);
1176 		vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN, 0);
1177 
1178 		virtqueue_free(vqx->vtv_vq);
1179 		vqx->vtv_vq = NULL;
1180 	}
1181 
1182 	free(sc->vtpci_vqs, M_DEVBUF);
1183 	sc->vtpci_vqs = NULL;
1184 	sc->vtpci_nvqs = 0;
1185 }
1186 
1187 static void
1188 vtpci_release_child_resources(struct vtpci_softc *sc)
1189 {
1190 
1191 	vtpci_free_interrupts(sc);
1192 	vtpci_free_virtqueues(sc);
1193 }
1194 
1195 static void
1196 vtpci_cleanup_setup_intr_attempt(struct vtpci_softc *sc)
1197 {
1198 	int idx;
1199 
1200 	if (sc->vtpci_flags & VTPCI_FLAG_MSIX) {
1201 		vtpci_write_config_2(sc, VIRTIO_MSI_CONFIG_VECTOR,
1202 		    VIRTIO_MSI_NO_VECTOR);
1203 
1204 		for (idx = 0; idx < sc->vtpci_nvqs; idx++) {
1205 			vtpci_select_virtqueue(sc, idx);
1206 			vtpci_write_config_2(sc, VIRTIO_MSI_QUEUE_VECTOR,
1207 			    VIRTIO_MSI_NO_VECTOR);
1208 		}
1209 	}
1210 
1211 	vtpci_free_interrupts(sc);
1212 }
1213 
1214 static void
1215 vtpci_reset(struct vtpci_softc *sc)
1216 {
1217 
1218 	/*
1219 	 * Setting the status to RESET sets the host device to
1220 	 * the original, uninitialized state.
1221 	 */
1222 	vtpci_set_status(sc->vtpci_dev, VIRTIO_CONFIG_STATUS_RESET);
1223 }
1224 
1225 static void
1226 vtpci_select_virtqueue(struct vtpci_softc *sc, int idx)
1227 {
1228 
1229 	vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, idx);
1230 }
1231 
1232 static void
1233 vtpci_legacy_intr(void *xsc)
1234 {
1235 	struct vtpci_softc *sc;
1236 	struct vtpci_virtqueue *vqx;
1237 	int i;
1238 	uint8_t isr;
1239 
1240 	sc = xsc;
1241 	vqx = &sc->vtpci_vqs[0];
1242 
1243 	/* Reading the ISR also clears it. */
1244 	isr = vtpci_read_config_1(sc, VIRTIO_PCI_ISR);
1245 
1246 	if (isr & VIRTIO_PCI_ISR_CONFIG)
1247 		vtpci_config_intr(sc);
1248 
1249 	if (isr & VIRTIO_PCI_ISR_INTR) {
1250 		for (i = 0; i < sc->vtpci_nvqs; i++, vqx++) {
1251 			if (vqx->vtv_no_intr == 0)
1252 				virtqueue_intr(vqx->vtv_vq);
1253 		}
1254 	}
1255 }
1256 
1257 static int
1258 vtpci_vq_shared_intr_filter(void *xsc)
1259 {
1260 	struct vtpci_softc *sc;
1261 	struct vtpci_virtqueue *vqx;
1262 	int i, rc;
1263 
1264 	rc = 0;
1265 	sc = xsc;
1266 	vqx = &sc->vtpci_vqs[0];
1267 
1268 	for (i = 0; i < sc->vtpci_nvqs; i++, vqx++) {
1269 		if (vqx->vtv_no_intr == 0)
1270 			rc |= virtqueue_intr_filter(vqx->vtv_vq);
1271 	}
1272 
1273 	return (rc ? FILTER_SCHEDULE_THREAD : FILTER_STRAY);
1274 }
1275 
1276 static void
1277 vtpci_vq_shared_intr(void *xsc)
1278 {
1279 	struct vtpci_softc *sc;
1280 	struct vtpci_virtqueue *vqx;
1281 	int i;
1282 
1283 	sc = xsc;
1284 	vqx = &sc->vtpci_vqs[0];
1285 
1286 	for (i = 0; i < sc->vtpci_nvqs; i++, vqx++) {
1287 		if (vqx->vtv_no_intr == 0)
1288 			virtqueue_intr(vqx->vtv_vq);
1289 	}
1290 }
1291 
1292 static int
1293 vtpci_vq_intr_filter(void *xvq)
1294 {
1295 	struct virtqueue *vq;
1296 	int rc;
1297 
1298 	vq = xvq;
1299 	rc = virtqueue_intr_filter(vq);
1300 
1301 	return (rc ? FILTER_SCHEDULE_THREAD : FILTER_STRAY);
1302 }
1303 
1304 static void
1305 vtpci_vq_intr(void *xvq)
1306 {
1307 	struct virtqueue *vq;
1308 
1309 	vq = xvq;
1310 	virtqueue_intr(vq);
1311 }
1312 
1313 static void
1314 vtpci_config_intr(void *xsc)
1315 {
1316 	struct vtpci_softc *sc;
1317 	device_t child;
1318 
1319 	sc = xsc;
1320 	child = sc->vtpci_child_dev;
1321 
1322 	if (child != NULL)
1323 		VIRTIO_CONFIG_CHANGE(child);
1324 }
1325