xref: /freebsd/sys/dev/virtio/pci/virtio_pci.c (revision 9a14aa017b21c292740c00ee098195cd46642730)
1 /*-
2  * Copyright (c) 2011, Bryan Venteicher <bryanv@daemoninthecloset.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /* Driver for the VirtIO PCI interface. */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/bus.h>
35 #include <sys/kernel.h>
36 #include <sys/module.h>
37 #include <sys/malloc.h>
38 
39 #include <machine/bus.h>
40 #include <machine/resource.h>
41 #include <sys/bus.h>
42 #include <sys/rman.h>
43 
44 #include <dev/pci/pcivar.h>
45 #include <dev/pci/pcireg.h>
46 
47 #include <dev/virtio/virtio.h>
48 #include <dev/virtio/virtqueue.h>
49 #include <dev/virtio/pci/virtio_pci.h>
50 
51 #include "virtio_bus_if.h"
52 #include "virtio_if.h"
53 
54 struct vtpci_softc {
55 	device_t			 vtpci_dev;
56 	struct resource			*vtpci_res;
57 	struct resource			*vtpci_msix_res;
58 	uint64_t			 vtpci_features;
59 	uint32_t			 vtpci_flags;
60 #define VIRTIO_PCI_FLAG_NO_MSI		 0x0001
61 #define VIRTIO_PCI_FLAG_MSI		 0x0002
62 #define VIRTIO_PCI_FLAG_NO_MSIX		 0x0010
63 #define VIRTIO_PCI_FLAG_MSIX		 0x0020
64 #define VIRTIO_PCI_FLAG_SHARED_MSIX	 0x0040
65 
66 	device_t			 vtpci_child_dev;
67 	struct virtio_feature_desc	*vtpci_child_feat_desc;
68 
69 	/*
70 	 * Ideally, each virtqueue that the driver provides a callback for
71 	 * will receive its own MSIX vector. If there are not sufficient
72 	 * vectors available, we will then attempt to have all the VQs
73 	 * share one vector. Note that when using MSIX, the configuration
74 	 * changed notifications must be on their own vector.
75 	 *
76 	 * If MSIX is not available, we will attempt to have the whole
77 	 * device share one MSI vector, and then, finally, one legacy
78 	 * interrupt.
79 	 */
80 	int				 vtpci_nvqs;
81 	struct vtpci_virtqueue {
82 		struct virtqueue *vq;
83 
84 		/* Index into vtpci_intr_res[] below. Unused, then -1. */
85 		int		  ires_idx;
86 	} vtpci_vqx[VIRTIO_MAX_VIRTQUEUES];
87 
88 	/*
89 	 * When using MSIX interrupts, the first element of vtpci_intr_res[]
90 	 * is always the configuration changed notifications. The remaining
91 	 * element(s) are used for the virtqueues.
92 	 *
93 	 * With MSI and legacy interrupts, only the first element of
94 	 * vtpci_intr_res[] is used.
95 	 */
96 	int				 vtpci_nintr_res;
97 	struct vtpci_intr_resource {
98 		struct resource	*irq;
99 		int		 rid;
100 		void		*intrhand;
101 	} vtpci_intr_res[1 + VIRTIO_MAX_VIRTQUEUES];
102 };
103 
104 static int	vtpci_probe(device_t);
105 static int	vtpci_attach(device_t);
106 static int	vtpci_detach(device_t);
107 static int	vtpci_suspend(device_t);
108 static int	vtpci_resume(device_t);
109 static int	vtpci_shutdown(device_t);
110 static void	vtpci_driver_added(device_t, driver_t *);
111 static void	vtpci_child_detached(device_t, device_t);
112 static int	vtpci_read_ivar(device_t, device_t, int, uintptr_t *);
113 static int	vtpci_write_ivar(device_t, device_t, int, uintptr_t);
114 
115 static uint64_t	vtpci_negotiate_features(device_t, uint64_t);
116 static int	vtpci_with_feature(device_t, uint64_t);
117 static int	vtpci_alloc_virtqueues(device_t, int, int,
118 		    struct vq_alloc_info *);
119 static int	vtpci_setup_intr(device_t, enum intr_type);
120 static void	vtpci_stop(device_t);
121 static int	vtpci_reinit(device_t, uint64_t);
122 static void	vtpci_reinit_complete(device_t);
123 static void	vtpci_notify_virtqueue(device_t, uint16_t);
124 static uint8_t	vtpci_get_status(device_t);
125 static void	vtpci_set_status(device_t, uint8_t);
126 static void	vtpci_read_dev_config(device_t, bus_size_t, void *, int);
127 static void	vtpci_write_dev_config(device_t, bus_size_t, void *, int);
128 
129 static void	vtpci_describe_features(struct vtpci_softc *, const char *,
130 		    uint64_t);
131 static void	vtpci_probe_and_attach_child(struct vtpci_softc *);
132 
133 static int	vtpci_alloc_interrupts(struct vtpci_softc *, int, int,
134 		    struct vq_alloc_info *);
135 static int	vtpci_alloc_intr_resources(struct vtpci_softc *, int,
136 		    struct vq_alloc_info *);
137 static int	vtpci_alloc_msi(struct vtpci_softc *);
138 static int	vtpci_alloc_msix(struct vtpci_softc *, int);
139 static int	vtpci_register_msix_vector(struct vtpci_softc *, int, int);
140 
141 static void	vtpci_free_interrupts(struct vtpci_softc *);
142 static void	vtpci_free_virtqueues(struct vtpci_softc *);
143 static void	vtpci_release_child_resources(struct vtpci_softc *);
144 static void	vtpci_reset(struct vtpci_softc *);
145 
146 static int	vtpci_legacy_intr(void *);
147 static int	vtpci_vq_shared_intr(void *);
148 static int	vtpci_vq_intr(void *);
149 static int	vtpci_config_intr(void *);
150 
151 /*
152  * I/O port read/write wrappers.
153  */
154 #define vtpci_read_config_1(sc, o)	bus_read_1((sc)->vtpci_res, (o))
155 #define vtpci_read_config_2(sc, o)	bus_read_2((sc)->vtpci_res, (o))
156 #define vtpci_read_config_4(sc, o)	bus_read_4((sc)->vtpci_res, (o))
157 #define vtpci_write_config_1(sc, o, v)	bus_write_1((sc)->vtpci_res, (o), (v))
158 #define vtpci_write_config_2(sc, o, v)	bus_write_2((sc)->vtpci_res, (o), (v))
159 #define vtpci_write_config_4(sc, o, v)	bus_write_4((sc)->vtpci_res, (o), (v))
160 
161 /* Tunables. */
162 static int vtpci_disable_msix = 0;
163 TUNABLE_INT("hw.virtio.pci.disable_msix", &vtpci_disable_msix);
164 
165 static device_method_t vtpci_methods[] = {
166 	/* Device interface. */
167 	DEVMETHOD(device_probe,			  vtpci_probe),
168 	DEVMETHOD(device_attach,		  vtpci_attach),
169 	DEVMETHOD(device_detach,		  vtpci_detach),
170 	DEVMETHOD(device_suspend,		  vtpci_suspend),
171 	DEVMETHOD(device_resume,		  vtpci_resume),
172 	DEVMETHOD(device_shutdown,		  vtpci_shutdown),
173 
174 	/* Bus interface. */
175 	DEVMETHOD(bus_driver_added,		  vtpci_driver_added),
176 	DEVMETHOD(bus_child_detached,		  vtpci_child_detached),
177 	DEVMETHOD(bus_read_ivar,		  vtpci_read_ivar),
178 	DEVMETHOD(bus_write_ivar,		  vtpci_write_ivar),
179 
180 	/* VirtIO bus interface. */
181 	DEVMETHOD(virtio_bus_negotiate_features,  vtpci_negotiate_features),
182 	DEVMETHOD(virtio_bus_with_feature,	  vtpci_with_feature),
183 	DEVMETHOD(virtio_bus_alloc_virtqueues,	  vtpci_alloc_virtqueues),
184 	DEVMETHOD(virtio_bus_setup_intr,	  vtpci_setup_intr),
185 	DEVMETHOD(virtio_bus_stop,		  vtpci_stop),
186 	DEVMETHOD(virtio_bus_reinit,		  vtpci_reinit),
187 	DEVMETHOD(virtio_bus_reinit_complete,	  vtpci_reinit_complete),
188 	DEVMETHOD(virtio_bus_notify_vq,		  vtpci_notify_virtqueue),
189 	DEVMETHOD(virtio_bus_read_device_config,  vtpci_read_dev_config),
190 	DEVMETHOD(virtio_bus_write_device_config, vtpci_write_dev_config),
191 
192 	{ 0, 0 }
193 };
194 
195 static driver_t vtpci_driver = {
196 	"virtio_pci",
197 	vtpci_methods,
198 	sizeof(struct vtpci_softc)
199 };
200 
201 devclass_t vtpci_devclass;
202 
203 DRIVER_MODULE(virtio_pci, pci, vtpci_driver, vtpci_devclass, 0, 0);
204 MODULE_VERSION(virtio_pci, 1);
205 MODULE_DEPEND(virtio_pci, pci, 1, 1, 1);
206 MODULE_DEPEND(virtio_pci, virtio, 1, 1, 1);
207 
208 static int
209 vtpci_probe(device_t dev)
210 {
211 	char desc[36];
212 	const char *name;
213 
214 	if (pci_get_vendor(dev) != VIRTIO_PCI_VENDORID)
215 		return (ENXIO);
216 
217 	if (pci_get_device(dev) < VIRTIO_PCI_DEVICEID_MIN ||
218 	    pci_get_device(dev) > VIRTIO_PCI_DEVICEID_MAX)
219 		return (ENXIO);
220 
221 	if (pci_get_revid(dev) != VIRTIO_PCI_ABI_VERSION)
222 		return (ENXIO);
223 
224 	name = virtio_device_name(pci_get_subdevice(dev));
225 	if (name == NULL)
226 		name = "Unknown";
227 
228 	snprintf(desc, sizeof(desc), "VirtIO PCI %s adapter", name);
229 	device_set_desc_copy(dev, desc);
230 
231 	return (BUS_PROBE_DEFAULT);
232 }
233 
234 static int
235 vtpci_attach(device_t dev)
236 {
237 	struct vtpci_softc *sc;
238 	device_t child;
239 	int rid;
240 
241 	sc = device_get_softc(dev);
242 	sc->vtpci_dev = dev;
243 
244 	pci_enable_busmaster(dev);
245 
246 	rid = PCIR_BAR(0);
247 	sc->vtpci_res = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &rid,
248 	    RF_ACTIVE);
249 	if (sc->vtpci_res == NULL) {
250 		device_printf(dev, "cannot map I/O space\n");
251 		return (ENXIO);
252 	}
253 
254 	if (pci_find_extcap(dev, PCIY_MSI, NULL) != 0)
255 		sc->vtpci_flags |= VIRTIO_PCI_FLAG_NO_MSI;
256 
257 	if (pci_find_extcap(dev, PCIY_MSIX, NULL) == 0) {
258 		rid = PCIR_BAR(1);
259 		sc->vtpci_msix_res = bus_alloc_resource_any(dev,
260 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
261 	}
262 
263 	if (sc->vtpci_msix_res == NULL)
264 		sc->vtpci_flags |= VIRTIO_PCI_FLAG_NO_MSIX;
265 
266 	vtpci_reset(sc);
267 
268 	/* Tell the host we've noticed this device. */
269 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK);
270 
271 	if ((child = device_add_child(dev, NULL, -1)) == NULL) {
272 		device_printf(dev, "cannot create child device\n");
273 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_FAILED);
274 		vtpci_detach(dev);
275 		return (ENOMEM);
276 	}
277 
278 	sc->vtpci_child_dev = child;
279 	vtpci_probe_and_attach_child(sc);
280 
281 	return (0);
282 }
283 
284 static int
285 vtpci_detach(device_t dev)
286 {
287 	struct vtpci_softc *sc;
288 	device_t child;
289 	int error;
290 
291 	sc = device_get_softc(dev);
292 
293 	if ((child = sc->vtpci_child_dev) != NULL) {
294 		error = device_delete_child(dev, child);
295 		if (error)
296 			return (error);
297 		sc->vtpci_child_dev = NULL;
298 	}
299 
300 	vtpci_reset(sc);
301 
302 	if (sc->vtpci_msix_res != NULL) {
303 		bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(1),
304 		    sc->vtpci_msix_res);
305 		sc->vtpci_msix_res = NULL;
306 	}
307 
308 	if (sc->vtpci_res != NULL) {
309 		bus_release_resource(dev, SYS_RES_IOPORT, PCIR_BAR(0),
310 		    sc->vtpci_res);
311 		sc->vtpci_res = NULL;
312 	}
313 
314 	return (0);
315 }
316 
317 static int
318 vtpci_suspend(device_t dev)
319 {
320 
321 	return (bus_generic_suspend(dev));
322 }
323 
324 static int
325 vtpci_resume(device_t dev)
326 {
327 
328 	return (bus_generic_resume(dev));
329 }
330 
331 static int
332 vtpci_shutdown(device_t dev)
333 {
334 
335 	(void) bus_generic_shutdown(dev);
336 	/* Forcibly stop the host device. */
337 	vtpci_stop(dev);
338 
339 	return (0);
340 }
341 
342 static void
343 vtpci_driver_added(device_t dev, driver_t *driver)
344 {
345 	struct vtpci_softc *sc;
346 
347 	sc = device_get_softc(dev);
348 
349 	vtpci_probe_and_attach_child(sc);
350 }
351 
352 static void
353 vtpci_child_detached(device_t dev, device_t child)
354 {
355 	struct vtpci_softc *sc;
356 
357 	sc = device_get_softc(dev);
358 
359 	vtpci_reset(sc);
360 	vtpci_release_child_resources(sc);
361 }
362 
363 static int
364 vtpci_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
365 {
366 	struct vtpci_softc *sc;
367 
368 	sc = device_get_softc(dev);
369 
370 	if (sc->vtpci_child_dev != child)
371 		return (ENOENT);
372 
373 	switch (index) {
374 	case VIRTIO_IVAR_DEVTYPE:
375 		*result = pci_get_subdevice(dev);
376 		break;
377 	default:
378 		return (ENOENT);
379 	}
380 
381 	return (0);
382 }
383 
384 static int
385 vtpci_write_ivar(device_t dev, device_t child, int index, uintptr_t value)
386 {
387 	struct vtpci_softc *sc;
388 
389 	sc = device_get_softc(dev);
390 
391 	if (sc->vtpci_child_dev != child)
392 		return (ENOENT);
393 
394 	switch (index) {
395 	case VIRTIO_IVAR_FEATURE_DESC:
396 		sc->vtpci_child_feat_desc = (void *) value;
397 		break;
398 	default:
399 		return (ENOENT);
400 	}
401 
402 	return (0);
403 }
404 
405 static uint64_t
406 vtpci_negotiate_features(device_t dev, uint64_t child_features)
407 {
408 	struct vtpci_softc *sc;
409 	uint64_t host_features, features;
410 
411 	sc = device_get_softc(dev);
412 
413 	host_features = vtpci_read_config_4(sc, VIRTIO_PCI_HOST_FEATURES);
414 	vtpci_describe_features(sc, "host", host_features);
415 
416 	/*
417 	 * Limit negotiated features to what the driver, virtqueue, and
418 	 * host all support.
419 	 */
420 	features = host_features & child_features;
421 	features = virtqueue_filter_features(features);
422 	sc->vtpci_features = features;
423 
424 	vtpci_describe_features(sc, "negotiated", features);
425 	vtpci_write_config_4(sc, VIRTIO_PCI_GUEST_FEATURES, features);
426 
427 	return (features);
428 }
429 
430 static int
431 vtpci_with_feature(device_t dev, uint64_t feature)
432 {
433 	struct vtpci_softc *sc;
434 
435 	sc = device_get_softc(dev);
436 
437 	return ((sc->vtpci_features & feature) != 0);
438 }
439 
440 static int
441 vtpci_alloc_virtqueues(device_t dev, int flags, int nvqs,
442     struct vq_alloc_info *vq_info)
443 {
444 	struct vtpci_softc *sc;
445 	struct vtpci_virtqueue *vqx;
446 	struct vq_alloc_info *info;
447 	int queue, error;
448 	uint16_t vq_size;
449 
450 	sc = device_get_softc(dev);
451 
452 	if (sc->vtpci_nvqs != 0 || nvqs <= 0 ||
453 	    nvqs > VIRTIO_MAX_VIRTQUEUES)
454 		return (EINVAL);
455 
456 	error = vtpci_alloc_interrupts(sc, flags, nvqs, vq_info);
457 	if (error) {
458 		device_printf(dev, "cannot allocate interrupts\n");
459 		return (error);
460 	}
461 
462 	if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
463 		error = vtpci_register_msix_vector(sc,
464 		    VIRTIO_MSI_CONFIG_VECTOR, 0);
465 		if (error)
466 			return (error);
467 	}
468 
469 	for (queue = 0; queue < nvqs; queue++) {
470 		vqx = &sc->vtpci_vqx[queue];
471 		info = &vq_info[queue];
472 
473 		vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, queue);
474 
475 		vq_size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM);
476 		error = virtqueue_alloc(dev, queue, vq_size,
477 		    VIRTIO_PCI_VRING_ALIGN, 0xFFFFFFFFUL, info, &vqx->vq);
478 		if (error)
479 			return (error);
480 
481 		if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
482 			error = vtpci_register_msix_vector(sc,
483 			    VIRTIO_MSI_QUEUE_VECTOR, vqx->ires_idx);
484 			if (error)
485 				return (error);
486 		}
487 
488 		vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN,
489 		    virtqueue_paddr(vqx->vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
490 
491 		*info->vqai_vq = vqx->vq;
492 		sc->vtpci_nvqs++;
493 	}
494 
495 	return (0);
496 }
497 
498 static int
499 vtpci_setup_intr(device_t dev, enum intr_type type)
500 {
501 	struct vtpci_softc *sc;
502 	struct vtpci_intr_resource *ires;
503 	struct vtpci_virtqueue *vqx;
504 	int i, flags, error;
505 
506 	sc = device_get_softc(dev);
507 	flags = type | INTR_MPSAFE;
508 	ires = &sc->vtpci_intr_res[0];
509 
510 	if ((sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) == 0) {
511 		error = bus_setup_intr(dev, ires->irq, flags,
512 		    vtpci_legacy_intr, NULL, sc, &ires->intrhand);
513 
514 		return (error);
515 	}
516 
517 	error = bus_setup_intr(dev, ires->irq, flags, vtpci_config_intr,
518 	    NULL, sc, &ires->intrhand);
519 	if (error)
520 		return (error);
521 
522 	if (sc->vtpci_flags & VIRTIO_PCI_FLAG_SHARED_MSIX) {
523 		ires = &sc->vtpci_intr_res[1];
524 		error = bus_setup_intr(dev, ires->irq, flags,
525 		    vtpci_vq_shared_intr, NULL, sc, &ires->intrhand);
526 
527 		return (error);
528 	}
529 
530 	/* Setup an interrupt handler for each virtqueue. */
531 	for (i = 0; i < sc->vtpci_nvqs; i++) {
532 		vqx = &sc->vtpci_vqx[i];
533 		if (vqx->ires_idx < 1)
534 			continue;
535 
536 		ires = &sc->vtpci_intr_res[vqx->ires_idx];
537 		error = bus_setup_intr(dev, ires->irq, flags,
538 		    vtpci_vq_intr, NULL, vqx->vq, &ires->intrhand);
539 		if (error)
540 			return (error);
541 	}
542 
543 	return (0);
544 }
545 
546 static void
547 vtpci_stop(device_t dev)
548 {
549 
550 	vtpci_reset(device_get_softc(dev));
551 }
552 
553 static int
554 vtpci_reinit(device_t dev, uint64_t features)
555 {
556 	struct vtpci_softc *sc;
557 	struct vtpci_virtqueue *vqx;
558 	struct virtqueue *vq;
559 	int queue, error;
560 	uint16_t vq_size;
561 
562 	sc = device_get_softc(dev);
563 
564 	/*
565 	 * Redrive the device initialization. This is a bit of an abuse
566 	 * of the specification, but both VirtualBox and QEMU/KVM seem
567 	 * to play nice. We do not allow the host device to change from
568 	 * what was originally negotiated beyond what the guest driver
569 	 * changed (MSIX state should not change, number of virtqueues
570 	 * and their size remain the same, etc).
571 	 */
572 
573 	if (vtpci_get_status(dev) != VIRTIO_CONFIG_STATUS_RESET)
574 		vtpci_stop(dev);
575 
576 	/*
577 	 * Quickly drive the status through ACK and DRIVER. The device
578 	 * does not become usable again until vtpci_reinit_complete().
579 	 */
580 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK);
581 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER);
582 
583 	vtpci_negotiate_features(dev, features);
584 
585 	if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
586 		error = vtpci_register_msix_vector(sc,
587 		    VIRTIO_MSI_CONFIG_VECTOR, 0);
588 		if (error)
589 			return (error);
590 	}
591 
592 	for (queue = 0; queue < sc->vtpci_nvqs; queue++) {
593 		vqx = &sc->vtpci_vqx[queue];
594 		vq = vqx->vq;
595 
596 		KASSERT(vq != NULL, ("vq %d not allocated", queue));
597 		vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, queue);
598 
599 		vq_size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM);
600 		error = virtqueue_reinit(vq, vq_size);
601 		if (error)
602 			return (error);
603 
604 		if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
605 			error = vtpci_register_msix_vector(sc,
606 			    VIRTIO_MSI_QUEUE_VECTOR, vqx->ires_idx);
607 			if (error)
608 				return (error);
609 		}
610 
611 		vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN,
612 		    virtqueue_paddr(vqx->vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
613 	}
614 
615 	return (0);
616 }
617 
618 static void
619 vtpci_reinit_complete(device_t dev)
620 {
621 
622 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER_OK);
623 }
624 
625 static void
626 vtpci_notify_virtqueue(device_t dev, uint16_t queue)
627 {
628 	struct vtpci_softc *sc;
629 
630 	sc = device_get_softc(dev);
631 
632 	vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_NOTIFY, queue);
633 }
634 
635 static uint8_t
636 vtpci_get_status(device_t dev)
637 {
638 	struct vtpci_softc *sc;
639 
640 	sc = device_get_softc(dev);
641 
642 	return (vtpci_read_config_1(sc, VIRTIO_PCI_STATUS));
643 }
644 
645 static void
646 vtpci_set_status(device_t dev, uint8_t status)
647 {
648 	struct vtpci_softc *sc;
649 
650 	sc = device_get_softc(dev);
651 
652 	if (status != VIRTIO_CONFIG_STATUS_RESET)
653 		status |= vtpci_get_status(dev);
654 
655 	vtpci_write_config_1(sc, VIRTIO_PCI_STATUS, status);
656 }
657 
658 static void
659 vtpci_read_dev_config(device_t dev, bus_size_t offset,
660     void *dst, int length)
661 {
662 	struct vtpci_softc *sc;
663 	bus_size_t off;
664 	uint8_t *d;
665 	int size;
666 
667 	sc = device_get_softc(dev);
668 	off = VIRTIO_PCI_CONFIG(sc) + offset;
669 
670 	for (d = dst; length > 0; d += size, off += size, length -= size) {
671 		if (length >= 4) {
672 			size = 4;
673 			*(uint32_t *)d = vtpci_read_config_4(sc, off);
674 		} else if (length >= 2) {
675 			size = 2;
676 			*(uint16_t *)d = vtpci_read_config_2(sc, off);
677 		} else {
678 			size = 1;
679 			*d = vtpci_read_config_1(sc, off);
680 		}
681 	}
682 }
683 
684 static void
685 vtpci_write_dev_config(device_t dev, bus_size_t offset,
686     void *src, int length)
687 {
688 	struct vtpci_softc *sc;
689 	bus_size_t off;
690 	uint8_t *s;
691 	int size;
692 
693 	sc = device_get_softc(dev);
694 	off = VIRTIO_PCI_CONFIG(sc) + offset;
695 
696 	for (s = src; length > 0; s += size, off += size, length -= size) {
697 		if (length >= 4) {
698 			size = 4;
699 			vtpci_write_config_4(sc, off, *(uint32_t *)s);
700 		} else if (length >= 2) {
701 			size = 2;
702 			vtpci_write_config_2(sc, off, *(uint16_t *)s);
703 		} else {
704 			size = 1;
705 			vtpci_write_config_1(sc, off, *s);
706 		}
707 	}
708 }
709 
710 static void
711 vtpci_describe_features(struct vtpci_softc *sc, const char *msg,
712     uint64_t features)
713 {
714 	device_t dev, child;
715 
716 	dev = sc->vtpci_dev;
717 	child = sc->vtpci_child_dev;
718 
719 	if (device_is_attached(child) && bootverbose == 0)
720 		return;
721 
722 	virtio_describe(dev, msg, features, sc->vtpci_child_feat_desc);
723 }
724 
725 static void
726 vtpci_probe_and_attach_child(struct vtpci_softc *sc)
727 {
728 	device_t dev, child;
729 
730 	dev = sc->vtpci_dev;
731 	child = sc->vtpci_child_dev;
732 
733 	if (child == NULL)
734 		return;
735 
736 	if (device_get_state(child) != DS_NOTPRESENT)
737 		return;
738 
739 	if (device_probe(child) != 0)
740 		return;
741 
742 	vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER);
743 	if (device_attach(child) != 0) {
744 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_FAILED);
745 		vtpci_reset(sc);
746 		vtpci_release_child_resources(sc);
747 
748 		/* Reset status for future attempt. */
749 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK);
750 	} else
751 		vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER_OK);
752 }
753 
754 static int
755 vtpci_alloc_interrupts(struct vtpci_softc *sc, int flags, int nvqs,
756     struct vq_alloc_info *vq_info)
757 {
758 	int i, nvectors, error;
759 
760 	/*
761 	 * Only allocate a vector for virtqueues that are actually
762 	 * expecting an interrupt.
763 	 */
764 	for (nvectors = 0, i = 0; i < nvqs; i++)
765 		if (vq_info[i].vqai_intr != NULL)
766 			nvectors++;
767 
768 	if (vtpci_disable_msix != 0 ||
769 	    sc->vtpci_flags & VIRTIO_PCI_FLAG_NO_MSIX ||
770 	    flags & VIRTIO_ALLOC_VQS_DISABLE_MSIX ||
771 	    vtpci_alloc_msix(sc, nvectors) != 0) {
772 		/*
773 		 * Use MSI interrupts if available. Otherwise, we fallback
774 		 * to legacy interrupts.
775 		 */
776 		if ((sc->vtpci_flags & VIRTIO_PCI_FLAG_NO_MSI) == 0 &&
777 		    vtpci_alloc_msi(sc) == 0)
778 			sc->vtpci_flags |= VIRTIO_PCI_FLAG_MSI;
779 
780 		sc->vtpci_nintr_res = 1;
781 	}
782 
783 	error = vtpci_alloc_intr_resources(sc, nvqs, vq_info);
784 
785 	return (error);
786 }
787 
788 static int
789 vtpci_alloc_intr_resources(struct vtpci_softc *sc, int nvqs,
790     struct vq_alloc_info *vq_info)
791 {
792 	device_t dev;
793 	struct resource *irq;
794 	struct vtpci_virtqueue *vqx;
795 	int i, rid, flags, res_idx;
796 
797 	dev = sc->vtpci_dev;
798 	flags = RF_ACTIVE;
799 
800 	if ((sc->vtpci_flags &
801 	    (VIRTIO_PCI_FLAG_MSI | VIRTIO_PCI_FLAG_MSIX)) == 0) {
802 		rid = 0;
803 		flags |= RF_SHAREABLE;
804 	} else
805 		rid = 1;
806 
807 	for (i = 0; i < sc->vtpci_nintr_res; i++) {
808 		irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, flags);
809 		if (irq == NULL)
810 			return (ENXIO);
811 
812 		sc->vtpci_intr_res[i].irq = irq;
813 		sc->vtpci_intr_res[i].rid = rid++;
814 	}
815 
816 	/*
817 	 * Map the virtqueue into the correct index in vq_intr_res[]. Note the
818 	 * first index is reserved for configuration changes notifications.
819 	 */
820 	for (i = 0, res_idx = 1; i < nvqs; i++) {
821 		vqx = &sc->vtpci_vqx[i];
822 
823 		if (sc->vtpci_flags & VIRTIO_PCI_FLAG_MSIX) {
824 			if (vq_info[i].vqai_intr == NULL)
825 				vqx->ires_idx = -1;
826 			else if (sc->vtpci_flags & VIRTIO_PCI_FLAG_SHARED_MSIX)
827 				vqx->ires_idx = res_idx;
828 			else
829 				vqx->ires_idx = res_idx++;
830 		} else
831 			vqx->ires_idx = -1;
832 	}
833 
834 	return (0);
835 }
836 
837 static int
838 vtpci_alloc_msi(struct vtpci_softc *sc)
839 {
840 	device_t dev;
841 	int nmsi, cnt;
842 
843 	dev = sc->vtpci_dev;
844 	nmsi = pci_msi_count(dev);
845 
846 	if (nmsi < 1)
847 		return (1);
848 
849 	cnt = 1;
850 	if (pci_alloc_msi(dev, &cnt) == 0 && cnt == 1)
851 		return (0);
852 
853 	return (1);
854 }
855 
856 static int
857 vtpci_alloc_msix(struct vtpci_softc *sc, int nvectors)
858 {
859 	device_t dev;
860 	int nmsix, cnt, required;
861 
862 	dev = sc->vtpci_dev;
863 
864 	nmsix = pci_msix_count(dev);
865 	if (nmsix < 1)
866 		return (1);
867 
868 	/* An additional vector is needed for the config changes. */
869 	required = nvectors + 1;
870 	if (nmsix >= required) {
871 		cnt = required;
872 		if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required)
873 			goto out;
874 
875 		pci_release_msi(dev);
876 	}
877 
878 	/* Attempt shared MSIX configuration. */
879 	required = 2;
880 	if (nmsix >= required) {
881 		cnt = required;
882 		if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) {
883 			sc->vtpci_flags |= VIRTIO_PCI_FLAG_SHARED_MSIX;
884 			goto out;
885 		}
886 
887 		pci_release_msi(dev);
888 	}
889 
890 	return (1);
891 
892 out:
893 	sc->vtpci_nintr_res = required;
894 	sc->vtpci_flags |= VIRTIO_PCI_FLAG_MSIX;
895 
896 	if (bootverbose) {
897 		if (sc->vtpci_flags & VIRTIO_PCI_FLAG_SHARED_MSIX)
898 			device_printf(dev, "using shared virtqueue MSIX\n");
899 		else
900 			device_printf(dev, "using per virtqueue MSIX\n");
901 	}
902 
903 	return (0);
904 }
905 
906 static int
907 vtpci_register_msix_vector(struct vtpci_softc *sc, int offset, int res_idx)
908 {
909 	device_t dev;
910 	uint16_t vector;
911 
912 	dev = sc->vtpci_dev;
913 
914 	if (offset != VIRTIO_MSI_CONFIG_VECTOR &&
915 	    offset != VIRTIO_MSI_QUEUE_VECTOR)
916 		return (EINVAL);
917 
918 	if (res_idx != -1) {
919 		/* Map from rid to host vector. */
920 		vector = sc->vtpci_intr_res[res_idx].rid - 1;
921 	} else
922 		vector = VIRTIO_MSI_NO_VECTOR;
923 
924 	/* The first resource is special; make sure it is used correctly. */
925 	if (res_idx == 0) {
926 		KASSERT(vector == 0, ("unexpected config vector"));
927 		KASSERT(offset == VIRTIO_MSI_CONFIG_VECTOR,
928 		    ("unexpected config offset"));
929 	}
930 
931 	vtpci_write_config_2(sc, offset, vector);
932 
933 	if (vtpci_read_config_2(sc, offset) != vector) {
934 		device_printf(dev, "insufficient host resources for "
935 		    "MSIX interrupts\n");
936 		return (ENODEV);
937 	}
938 
939 	return (0);
940 }
941 
942 static void
943 vtpci_free_interrupts(struct vtpci_softc *sc)
944 {
945 	device_t dev;
946 	struct vtpci_intr_resource *ires;
947 	int i;
948 
949 	dev = sc->vtpci_dev;
950 	sc->vtpci_nintr_res = 0;
951 
952 	if (sc->vtpci_flags & (VIRTIO_PCI_FLAG_MSI | VIRTIO_PCI_FLAG_MSIX)) {
953 		pci_release_msi(dev);
954 		sc->vtpci_flags &= ~(VIRTIO_PCI_FLAG_MSI |
955 		    VIRTIO_PCI_FLAG_MSIX | VIRTIO_PCI_FLAG_SHARED_MSIX);
956 	}
957 
958 	for (i = 0; i < 1 + VIRTIO_MAX_VIRTQUEUES; i++) {
959 		ires = &sc->vtpci_intr_res[i];
960 
961 		if (ires->intrhand != NULL) {
962 			bus_teardown_intr(dev, ires->irq, ires->intrhand);
963 			ires->intrhand = NULL;
964 		}
965 
966 		if (ires->irq != NULL) {
967 			bus_release_resource(dev, SYS_RES_IRQ, ires->rid,
968 			    ires->irq);
969 			ires->irq = NULL;
970 		}
971 
972 		ires->rid = -1;
973 	}
974 }
975 
976 static void
977 vtpci_free_virtqueues(struct vtpci_softc *sc)
978 {
979 	struct vtpci_virtqueue *vqx;
980 	int i;
981 
982 	sc->vtpci_nvqs = 0;
983 
984 	for (i = 0; i < VIRTIO_MAX_VIRTQUEUES; i++) {
985 		vqx = &sc->vtpci_vqx[i];
986 
987 		if (vqx->vq != NULL) {
988 			virtqueue_free(vqx->vq);
989 			vqx->vq = NULL;
990 		}
991 	}
992 }
993 
994 static void
995 vtpci_release_child_resources(struct vtpci_softc *sc)
996 {
997 
998 	vtpci_free_interrupts(sc);
999 	vtpci_free_virtqueues(sc);
1000 }
1001 
1002 static void
1003 vtpci_reset(struct vtpci_softc *sc)
1004 {
1005 
1006 	/*
1007 	 * Setting the status to RESET sets the host device to
1008 	 * the original, uninitialized state.
1009 	 */
1010 	vtpci_set_status(sc->vtpci_dev, VIRTIO_CONFIG_STATUS_RESET);
1011 }
1012 
1013 static int
1014 vtpci_legacy_intr(void *xsc)
1015 {
1016 	struct vtpci_softc *sc;
1017 	struct vtpci_virtqueue *vqx;
1018 	int i;
1019 	uint8_t isr;
1020 
1021 	sc = xsc;
1022 	vqx = &sc->vtpci_vqx[0];
1023 
1024 	/* Reading the ISR also clears it. */
1025 	isr = vtpci_read_config_1(sc, VIRTIO_PCI_ISR);
1026 
1027 	if (isr & VIRTIO_PCI_ISR_CONFIG)
1028 		vtpci_config_intr(sc);
1029 
1030 	if (isr & VIRTIO_PCI_ISR_INTR)
1031 		for (i = 0; i < sc->vtpci_nvqs; i++, vqx++)
1032 			virtqueue_intr(vqx->vq);
1033 
1034 	return (isr ? FILTER_HANDLED : FILTER_STRAY);
1035 }
1036 
1037 static int
1038 vtpci_vq_shared_intr(void *xsc)
1039 {
1040 	struct vtpci_softc *sc;
1041 	struct vtpci_virtqueue *vqx;
1042 	int i, rc;
1043 
1044 	rc = 0;
1045 	sc = xsc;
1046 	vqx = &sc->vtpci_vqx[0];
1047 
1048 	for (i = 0; i < sc->vtpci_nvqs; i++, vqx++)
1049 		rc |= virtqueue_intr(vqx->vq);
1050 
1051 	return (rc ? FILTER_HANDLED : FILTER_STRAY);
1052 }
1053 
1054 static int
1055 vtpci_vq_intr(void *xvq)
1056 {
1057 	struct virtqueue *vq;
1058 	int rc;
1059 
1060 	vq = xvq;
1061 	rc = virtqueue_intr(vq);
1062 
1063 	return (rc ? FILTER_HANDLED : FILTER_STRAY);
1064 }
1065 
1066 static int
1067 vtpci_config_intr(void *xsc)
1068 {
1069 	struct vtpci_softc *sc;
1070 	device_t child;
1071 	int rc;
1072 
1073 	rc = 0;
1074 	sc = xsc;
1075 	child = sc->vtpci_child_dev;
1076 
1077 	if (child != NULL)
1078 		rc = VIRTIO_CONFIG_CHANGE(child);
1079 
1080 	return (rc ? FILTER_HANDLED : FILTER_STRAY);
1081 }
1082