xref: /freebsd/sys/dev/virtio/pci/virtio_pci.c (revision 96190b4fef3b4a0cc3ca0606b0c4e3e69a5e6717)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2017, Bryan Venteicher <bryanv@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/bus.h>
32 #include <sys/kernel.h>
33 #include <sys/sbuf.h>
34 #include <sys/sysctl.h>
35 #include <sys/module.h>
36 #include <sys/malloc.h>
37 
38 #include <machine/bus.h>
39 #include <machine/resource.h>
40 #include <sys/bus.h>
41 #include <sys/rman.h>
42 
43 #include <dev/pci/pcivar.h>
44 #include <dev/pci/pcireg.h>
45 
46 #include <dev/virtio/virtio.h>
47 #include <dev/virtio/virtqueue.h>
48 #include <dev/virtio/pci/virtio_pci.h>
49 #include <dev/virtio/pci/virtio_pci_var.h>
50 
51 #include "virtio_pci_if.h"
52 #include "virtio_if.h"
53 
54 static void	vtpci_describe_features(struct vtpci_common *, const char *,
55 		    uint64_t);
56 static int	vtpci_alloc_msix(struct vtpci_common *, int);
57 static int	vtpci_alloc_msi(struct vtpci_common *);
58 static int	vtpci_alloc_intr_msix_pervq(struct vtpci_common *);
59 static int	vtpci_alloc_intr_msix_shared(struct vtpci_common *);
60 static int	vtpci_alloc_intr_msi(struct vtpci_common *);
61 static int	vtpci_alloc_intr_intx(struct vtpci_common *);
62 static int	vtpci_alloc_interrupt(struct vtpci_common *, int, int,
63 		    struct vtpci_interrupt *);
64 static void	vtpci_free_interrupt(struct vtpci_common *,
65 		    struct vtpci_interrupt *);
66 
67 static void	vtpci_free_interrupts(struct vtpci_common *);
68 static void	vtpci_free_virtqueues(struct vtpci_common *);
69 static void	vtpci_cleanup_setup_intr_attempt(struct vtpci_common *);
70 static int	vtpci_alloc_intr_resources(struct vtpci_common *);
71 static int	vtpci_setup_intx_interrupt(struct vtpci_common *,
72 		    enum intr_type);
73 static int	vtpci_setup_pervq_msix_interrupts(struct vtpci_common *,
74 		    enum intr_type);
75 static int	vtpci_set_host_msix_vectors(struct vtpci_common *);
76 static int	vtpci_setup_msix_interrupts(struct vtpci_common *,
77 		    enum intr_type);
78 static int	vtpci_setup_intrs(struct vtpci_common *, enum intr_type);
79 static int	vtpci_reinit_virtqueue(struct vtpci_common *, int);
80 static void	vtpci_intx_intr(void *);
81 static int	vtpci_vq_shared_intr_filter(void *);
82 static void	vtpci_vq_shared_intr(void *);
83 static int	vtpci_vq_intr_filter(void *);
84 static void	vtpci_vq_intr(void *);
85 static void	vtpci_config_intr(void *);
86 
87 static void	vtpci_setup_sysctl(struct vtpci_common *);
88 
89 #define vtpci_setup_msi_interrupt vtpci_setup_intx_interrupt
90 
91 /*
92  * This module contains two drivers:
93  *   - virtio_pci_legacy for pre-V1 support
94  *   - virtio_pci_modern for V1 support
95  */
96 MODULE_VERSION(virtio_pci, 1);
97 MODULE_DEPEND(virtio_pci, pci, 1, 1, 1);
98 MODULE_DEPEND(virtio_pci, virtio, 1, 1, 1);
99 
100 int vtpci_disable_msix = 0;
101 TUNABLE_INT("hw.virtio.pci.disable_msix", &vtpci_disable_msix);
102 
103 static uint8_t
104 vtpci_read_isr(struct vtpci_common *cn)
105 {
106 	return (VIRTIO_PCI_READ_ISR(cn->vtpci_dev));
107 }
108 
109 static uint16_t
110 vtpci_get_vq_size(struct vtpci_common *cn, int idx)
111 {
112 	return (VIRTIO_PCI_GET_VQ_SIZE(cn->vtpci_dev, idx));
113 }
114 
115 static bus_size_t
116 vtpci_get_vq_notify_off(struct vtpci_common *cn, int idx)
117 {
118 	return (VIRTIO_PCI_GET_VQ_NOTIFY_OFF(cn->vtpci_dev, idx));
119 }
120 
121 static void
122 vtpci_set_vq(struct vtpci_common *cn, struct virtqueue *vq)
123 {
124 	VIRTIO_PCI_SET_VQ(cn->vtpci_dev, vq);
125 }
126 
127 static void
128 vtpci_disable_vq(struct vtpci_common *cn, int idx)
129 {
130 	VIRTIO_PCI_DISABLE_VQ(cn->vtpci_dev, idx);
131 }
132 
133 static int
134 vtpci_register_cfg_msix(struct vtpci_common *cn, struct vtpci_interrupt *intr)
135 {
136 	return (VIRTIO_PCI_REGISTER_CFG_MSIX(cn->vtpci_dev, intr));
137 }
138 
139 static int
140 vtpci_register_vq_msix(struct vtpci_common *cn, int idx,
141     struct vtpci_interrupt *intr)
142 {
143 	return (VIRTIO_PCI_REGISTER_VQ_MSIX(cn->vtpci_dev, idx, intr));
144 }
145 
146 void
147 vtpci_init(struct vtpci_common *cn, device_t dev, bool modern)
148 {
149 
150 	cn->vtpci_dev = dev;
151 
152 	pci_enable_busmaster(dev);
153 
154 	if (modern)
155 		cn->vtpci_flags |= VTPCI_FLAG_MODERN;
156 	if (pci_find_cap(dev, PCIY_MSI, NULL) != 0)
157 		cn->vtpci_flags |= VTPCI_FLAG_NO_MSI;
158 	if (pci_find_cap(dev, PCIY_MSIX, NULL) != 0)
159 		cn->vtpci_flags |= VTPCI_FLAG_NO_MSIX;
160 
161 	vtpci_setup_sysctl(cn);
162 }
163 
164 int
165 vtpci_add_child(struct vtpci_common *cn)
166 {
167 	device_t dev, child;
168 
169 	dev = cn->vtpci_dev;
170 
171 	child = device_add_child(dev, NULL, DEVICE_UNIT_ANY);
172 	if (child == NULL) {
173 		device_printf(dev, "cannot create child device\n");
174 		return (ENOMEM);
175 	}
176 
177 	cn->vtpci_child_dev = child;
178 
179 	return (0);
180 }
181 
182 int
183 vtpci_delete_child(struct vtpci_common *cn)
184 {
185 	device_t dev, child;
186 	int error;
187 
188 	dev = cn->vtpci_dev;
189 
190 	child = cn->vtpci_child_dev;
191 	if (child != NULL) {
192 		error = device_delete_child(dev, child);
193 		if (error)
194 			return (error);
195 		cn->vtpci_child_dev = NULL;
196 	}
197 
198 	return (0);
199 }
200 
201 void
202 vtpci_child_detached(struct vtpci_common *cn)
203 {
204 
205 	vtpci_release_child_resources(cn);
206 
207 	cn->vtpci_child_feat_desc = NULL;
208 	cn->vtpci_host_features = 0;
209 	cn->vtpci_features = 0;
210 }
211 
212 int
213 vtpci_reinit(struct vtpci_common *cn)
214 {
215 	int idx, error;
216 
217 	for (idx = 0; idx < cn->vtpci_nvqs; idx++) {
218 		error = vtpci_reinit_virtqueue(cn, idx);
219 		if (error)
220 			return (error);
221 	}
222 
223 	if (vtpci_is_msix_enabled(cn)) {
224 		error = vtpci_set_host_msix_vectors(cn);
225 		if (error)
226 			return (error);
227 	}
228 
229 	return (0);
230 }
231 
232 static void
233 vtpci_describe_features(struct vtpci_common *cn, const char *msg,
234     uint64_t features)
235 {
236 	device_t dev, child;
237 
238 	dev = cn->vtpci_dev;
239 	child = cn->vtpci_child_dev;
240 
241 	if (device_is_attached(child) || bootverbose == 0)
242 		return;
243 
244 	virtio_describe(dev, msg, features, cn->vtpci_child_feat_desc);
245 }
246 
247 uint64_t
248 vtpci_negotiate_features(struct vtpci_common *cn,
249     uint64_t child_features, uint64_t host_features)
250 {
251 	uint64_t features;
252 
253 	cn->vtpci_host_features = host_features;
254 	vtpci_describe_features(cn, "host", host_features);
255 
256 	/*
257 	 * Limit negotiated features to what the driver, virtqueue, and
258 	 * host all support.
259 	 */
260 	features = host_features & child_features;
261 	features = virtio_filter_transport_features(features);
262 
263 	cn->vtpci_features = features;
264 	vtpci_describe_features(cn, "negotiated", features);
265 
266 	return (features);
267 }
268 
269 bool
270 vtpci_with_feature(struct vtpci_common *cn, uint64_t feature)
271 {
272 	return ((cn->vtpci_features & feature) != 0);
273 }
274 
275 int
276 vtpci_read_ivar(struct vtpci_common *cn, int index, uintptr_t *result)
277 {
278 	device_t dev;
279 	int error;
280 
281 	dev = cn->vtpci_dev;
282 	error = 0;
283 
284 	switch (index) {
285 	case VIRTIO_IVAR_SUBDEVICE:
286 		*result = pci_get_subdevice(dev);
287 		break;
288 	case VIRTIO_IVAR_VENDOR:
289 		*result = pci_get_vendor(dev);
290 		break;
291 	case VIRTIO_IVAR_DEVICE:
292 		*result = pci_get_device(dev);
293 		break;
294 	case VIRTIO_IVAR_SUBVENDOR:
295 		*result = pci_get_subvendor(dev);
296 		break;
297 	case VIRTIO_IVAR_MODERN:
298 		*result = vtpci_is_modern(cn);
299 		break;
300 	default:
301 		error = ENOENT;
302 	}
303 
304 	return (error);
305 }
306 
307 int
308 vtpci_write_ivar(struct vtpci_common *cn, int index, uintptr_t value)
309 {
310 	int error;
311 
312 	error = 0;
313 
314 	switch (index) {
315 	case VIRTIO_IVAR_FEATURE_DESC:
316 		cn->vtpci_child_feat_desc = (void *) value;
317 		break;
318 	default:
319 		error = ENOENT;
320 	}
321 
322 	return (error);
323 }
324 
325 int
326 vtpci_alloc_virtqueues(struct vtpci_common *cn, int nvqs,
327     struct vq_alloc_info *vq_info)
328 {
329 	device_t dev;
330 	int idx, align, error;
331 
332 	dev = cn->vtpci_dev;
333 
334 	/*
335 	 * This is VIRTIO_PCI_VRING_ALIGN from legacy VirtIO. In modern VirtIO,
336 	 * the tables do not have to be allocated contiguously, but we do so
337 	 * anyways.
338 	 */
339 	align = 4096;
340 
341 	if (cn->vtpci_nvqs != 0)
342 		return (EALREADY);
343 	if (nvqs <= 0)
344 		return (EINVAL);
345 
346 	cn->vtpci_vqs = malloc(nvqs * sizeof(struct vtpci_virtqueue),
347 	    M_DEVBUF, M_NOWAIT | M_ZERO);
348 	if (cn->vtpci_vqs == NULL)
349 		return (ENOMEM);
350 
351 	for (idx = 0; idx < nvqs; idx++) {
352 		struct vtpci_virtqueue *vqx;
353 		struct vq_alloc_info *info;
354 		struct virtqueue *vq;
355 		bus_size_t notify_offset;
356 		uint16_t size;
357 
358 		vqx = &cn->vtpci_vqs[idx];
359 		info = &vq_info[idx];
360 
361 		size = vtpci_get_vq_size(cn, idx);
362 		notify_offset = vtpci_get_vq_notify_off(cn, idx);
363 
364 		error = virtqueue_alloc(dev, idx, size, notify_offset, align,
365 		    ~(vm_paddr_t)0, info, &vq);
366 		if (error) {
367 			device_printf(dev,
368 			    "cannot allocate virtqueue %d: %d\n", idx, error);
369 			break;
370 		}
371 
372 		vtpci_set_vq(cn, vq);
373 
374 		vqx->vtv_vq = *info->vqai_vq = vq;
375 		vqx->vtv_no_intr = info->vqai_intr == NULL;
376 
377 		cn->vtpci_nvqs++;
378 	}
379 
380 	if (error)
381 		vtpci_free_virtqueues(cn);
382 
383 	return (error);
384 }
385 
386 static int
387 vtpci_alloc_msix(struct vtpci_common *cn, int nvectors)
388 {
389 	device_t dev;
390 	int nmsix, cnt, required;
391 
392 	dev = cn->vtpci_dev;
393 
394 	/* Allocate an additional vector for the config changes. */
395 	required = nvectors + 1;
396 
397 	nmsix = pci_msix_count(dev);
398 	if (nmsix < required)
399 		return (1);
400 
401 	cnt = required;
402 	if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) {
403 		cn->vtpci_nmsix_resources = required;
404 		return (0);
405 	}
406 
407 	pci_release_msi(dev);
408 
409 	return (1);
410 }
411 
412 static int
413 vtpci_alloc_msi(struct vtpci_common *cn)
414 {
415 	device_t dev;
416 	int nmsi, cnt, required;
417 
418 	dev = cn->vtpci_dev;
419 	required = 1;
420 
421 	nmsi = pci_msi_count(dev);
422 	if (nmsi < required)
423 		return (1);
424 
425 	cnt = required;
426 	if (pci_alloc_msi(dev, &cnt) == 0 && cnt >= required)
427 		return (0);
428 
429 	pci_release_msi(dev);
430 
431 	return (1);
432 }
433 
434 static int
435 vtpci_alloc_intr_msix_pervq(struct vtpci_common *cn)
436 {
437 	int i, nvectors, error;
438 
439 	if (vtpci_disable_msix != 0 || cn->vtpci_flags & VTPCI_FLAG_NO_MSIX)
440 		return (ENOTSUP);
441 
442 	for (nvectors = 0, i = 0; i < cn->vtpci_nvqs; i++) {
443 		if (cn->vtpci_vqs[i].vtv_no_intr == 0)
444 			nvectors++;
445 	}
446 
447 	error = vtpci_alloc_msix(cn, nvectors);
448 	if (error)
449 		return (error);
450 
451 	cn->vtpci_flags |= VTPCI_FLAG_MSIX;
452 
453 	return (0);
454 }
455 
456 static int
457 vtpci_alloc_intr_msix_shared(struct vtpci_common *cn)
458 {
459 	int error;
460 
461 	if (vtpci_disable_msix != 0 || cn->vtpci_flags & VTPCI_FLAG_NO_MSIX)
462 		return (ENOTSUP);
463 
464 	error = vtpci_alloc_msix(cn, 1);
465 	if (error)
466 		return (error);
467 
468 	cn->vtpci_flags |= VTPCI_FLAG_MSIX | VTPCI_FLAG_SHARED_MSIX;
469 
470 	return (0);
471 }
472 
473 static int
474 vtpci_alloc_intr_msi(struct vtpci_common *cn)
475 {
476 	int error;
477 
478 	/* Only BHyVe supports MSI. */
479 	if (cn->vtpci_flags & VTPCI_FLAG_NO_MSI)
480 		return (ENOTSUP);
481 
482 	error = vtpci_alloc_msi(cn);
483 	if (error)
484 		return (error);
485 
486 	cn->vtpci_flags |= VTPCI_FLAG_MSI;
487 
488 	return (0);
489 }
490 
491 static int
492 vtpci_alloc_intr_intx(struct vtpci_common *cn)
493 {
494 
495 	cn->vtpci_flags |= VTPCI_FLAG_INTX;
496 
497 	return (0);
498 }
499 
500 static int
501 vtpci_alloc_interrupt(struct vtpci_common *cn, int rid, int flags,
502     struct vtpci_interrupt *intr)
503 {
504 	struct resource *irq;
505 
506 	irq = bus_alloc_resource_any(cn->vtpci_dev, SYS_RES_IRQ, &rid, flags);
507 	if (irq == NULL)
508 		return (ENXIO);
509 
510 	intr->vti_irq = irq;
511 	intr->vti_rid = rid;
512 
513 	return (0);
514 }
515 
516 static void
517 vtpci_free_interrupt(struct vtpci_common *cn, struct vtpci_interrupt *intr)
518 {
519 	device_t dev;
520 
521 	dev = cn->vtpci_dev;
522 
523 	if (intr->vti_handler != NULL) {
524 		bus_teardown_intr(dev, intr->vti_irq, intr->vti_handler);
525 		intr->vti_handler = NULL;
526 	}
527 
528 	if (intr->vti_irq != NULL) {
529 		bus_release_resource(dev, SYS_RES_IRQ, intr->vti_rid,
530 		    intr->vti_irq);
531 		intr->vti_irq = NULL;
532 		intr->vti_rid = -1;
533 	}
534 }
535 
536 static void
537 vtpci_free_interrupts(struct vtpci_common *cn)
538 {
539 	struct vtpci_interrupt *intr;
540 	int i, nvq_intrs;
541 
542 	vtpci_free_interrupt(cn, &cn->vtpci_device_interrupt);
543 
544 	if (cn->vtpci_nmsix_resources != 0) {
545 		nvq_intrs = cn->vtpci_nmsix_resources - 1;
546 		cn->vtpci_nmsix_resources = 0;
547 
548 		if ((intr = cn->vtpci_msix_vq_interrupts) != NULL) {
549 			for (i = 0; i < nvq_intrs; i++, intr++)
550 				vtpci_free_interrupt(cn, intr);
551 
552 			free(cn->vtpci_msix_vq_interrupts, M_DEVBUF);
553 			cn->vtpci_msix_vq_interrupts = NULL;
554 		}
555 	}
556 
557 	if (cn->vtpci_flags & (VTPCI_FLAG_MSI | VTPCI_FLAG_MSIX))
558 		pci_release_msi(cn->vtpci_dev);
559 
560 	cn->vtpci_flags &= ~VTPCI_FLAG_ITYPE_MASK;
561 }
562 
563 static void
564 vtpci_free_virtqueues(struct vtpci_common *cn)
565 {
566 	struct vtpci_virtqueue *vqx;
567 	int idx;
568 
569 	for (idx = 0; idx < cn->vtpci_nvqs; idx++) {
570 		vtpci_disable_vq(cn, idx);
571 
572 		vqx = &cn->vtpci_vqs[idx];
573 		virtqueue_free(vqx->vtv_vq);
574 		vqx->vtv_vq = NULL;
575 	}
576 
577 	free(cn->vtpci_vqs, M_DEVBUF);
578 	cn->vtpci_vqs = NULL;
579 	cn->vtpci_nvqs = 0;
580 }
581 
582 void
583 vtpci_release_child_resources(struct vtpci_common *cn)
584 {
585 
586 	vtpci_free_interrupts(cn);
587 	vtpci_free_virtqueues(cn);
588 }
589 
590 static void
591 vtpci_cleanup_setup_intr_attempt(struct vtpci_common *cn)
592 {
593 	int idx;
594 
595 	if (cn->vtpci_flags & VTPCI_FLAG_MSIX) {
596 		vtpci_register_cfg_msix(cn, NULL);
597 
598 		for (idx = 0; idx < cn->vtpci_nvqs; idx++)
599 			vtpci_register_vq_msix(cn, idx, NULL);
600 	}
601 
602 	vtpci_free_interrupts(cn);
603 }
604 
605 static int
606 vtpci_alloc_intr_resources(struct vtpci_common *cn)
607 {
608 	struct vtpci_interrupt *intr;
609 	int i, rid, flags, nvq_intrs, error;
610 
611 	flags = RF_ACTIVE;
612 
613 	if (cn->vtpci_flags & VTPCI_FLAG_INTX) {
614 		rid = 0;
615 		flags |= RF_SHAREABLE;
616 	} else
617 		rid = 1;
618 
619 	/*
620 	 * When using INTX or MSI interrupts, this resource handles all
621 	 * interrupts. When using MSIX, this resource handles just the
622 	 * configuration changed interrupt.
623 	 */
624 	intr = &cn->vtpci_device_interrupt;
625 
626 	error = vtpci_alloc_interrupt(cn, rid, flags, intr);
627 	if (error || cn->vtpci_flags & (VTPCI_FLAG_INTX | VTPCI_FLAG_MSI))
628 		return (error);
629 
630 	/*
631 	 * Now allocate the interrupts for the virtqueues. This may be one
632 	 * for all the virtqueues, or one for each virtqueue. Subtract one
633 	 * below for because of the configuration changed interrupt.
634 	 */
635 	nvq_intrs = cn->vtpci_nmsix_resources - 1;
636 
637 	cn->vtpci_msix_vq_interrupts = malloc(nvq_intrs *
638 	    sizeof(struct vtpci_interrupt), M_DEVBUF, M_NOWAIT | M_ZERO);
639 	if (cn->vtpci_msix_vq_interrupts == NULL)
640 		return (ENOMEM);
641 
642 	intr = cn->vtpci_msix_vq_interrupts;
643 
644 	for (i = 0, rid++; i < nvq_intrs; i++, rid++, intr++) {
645 		error = vtpci_alloc_interrupt(cn, rid, flags, intr);
646 		if (error)
647 			return (error);
648 	}
649 
650 	return (0);
651 }
652 
653 static int
654 vtpci_setup_intx_interrupt(struct vtpci_common *cn, enum intr_type type)
655 {
656 	struct vtpci_interrupt *intr;
657 	int error;
658 
659 	intr = &cn->vtpci_device_interrupt;
660 
661 	error = bus_setup_intr(cn->vtpci_dev, intr->vti_irq, type, NULL,
662 	    vtpci_intx_intr, cn, &intr->vti_handler);
663 
664 	return (error);
665 }
666 
667 static int
668 vtpci_setup_pervq_msix_interrupts(struct vtpci_common *cn, enum intr_type type)
669 {
670 	struct vtpci_virtqueue *vqx;
671 	struct vtpci_interrupt *intr;
672 	int i, error;
673 
674 	intr = cn->vtpci_msix_vq_interrupts;
675 
676 	for (i = 0; i < cn->vtpci_nvqs; i++) {
677 		vqx = &cn->vtpci_vqs[i];
678 
679 		if (vqx->vtv_no_intr)
680 			continue;
681 
682 		error = bus_setup_intr(cn->vtpci_dev, intr->vti_irq, type,
683 		    vtpci_vq_intr_filter, vtpci_vq_intr, vqx->vtv_vq,
684 		    &intr->vti_handler);
685 		if (error)
686 			return (error);
687 
688 		intr++;
689 	}
690 
691 	return (0);
692 }
693 
694 static int
695 vtpci_set_host_msix_vectors(struct vtpci_common *cn)
696 {
697 	struct vtpci_interrupt *intr, *tintr;
698 	int idx, error;
699 
700 	intr = &cn->vtpci_device_interrupt;
701 	error = vtpci_register_cfg_msix(cn, intr);
702 	if (error)
703 		return (error);
704 
705 	intr = cn->vtpci_msix_vq_interrupts;
706 	for (idx = 0; idx < cn->vtpci_nvqs; idx++) {
707 		if (cn->vtpci_vqs[idx].vtv_no_intr)
708 			tintr = NULL;
709 		else
710 			tintr = intr;
711 
712 		error = vtpci_register_vq_msix(cn, idx, tintr);
713 		if (error)
714 			break;
715 
716 		/*
717 		 * For shared MSIX, all the virtqueues share the first
718 		 * interrupt.
719 		 */
720 		if (!cn->vtpci_vqs[idx].vtv_no_intr &&
721 		    (cn->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) == 0)
722 			intr++;
723 	}
724 
725 	return (error);
726 }
727 
728 static int
729 vtpci_setup_msix_interrupts(struct vtpci_common *cn, enum intr_type type)
730 {
731 	struct vtpci_interrupt *intr;
732 	int error;
733 
734 	intr = &cn->vtpci_device_interrupt;
735 
736 	error = bus_setup_intr(cn->vtpci_dev, intr->vti_irq, type, NULL,
737 	    vtpci_config_intr, cn, &intr->vti_handler);
738 	if (error)
739 		return (error);
740 
741 	if (cn->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) {
742 		intr = &cn->vtpci_msix_vq_interrupts[0];
743 
744 		error = bus_setup_intr(cn->vtpci_dev, intr->vti_irq, type,
745 		    vtpci_vq_shared_intr_filter, vtpci_vq_shared_intr, cn,
746 		    &intr->vti_handler);
747 	} else
748 		error = vtpci_setup_pervq_msix_interrupts(cn, type);
749 
750 	return (error ? error : vtpci_set_host_msix_vectors(cn));
751 }
752 
753 static int
754 vtpci_setup_intrs(struct vtpci_common *cn, enum intr_type type)
755 {
756 	int error;
757 
758 	type |= INTR_MPSAFE;
759 	KASSERT(cn->vtpci_flags & VTPCI_FLAG_ITYPE_MASK,
760 	    ("%s: no interrupt type selected %#x", __func__, cn->vtpci_flags));
761 
762 	error = vtpci_alloc_intr_resources(cn);
763 	if (error)
764 		return (error);
765 
766 	if (cn->vtpci_flags & VTPCI_FLAG_INTX)
767 		error = vtpci_setup_intx_interrupt(cn, type);
768 	else if (cn->vtpci_flags & VTPCI_FLAG_MSI)
769 		error = vtpci_setup_msi_interrupt(cn, type);
770 	else
771 		error = vtpci_setup_msix_interrupts(cn, type);
772 
773 	return (error);
774 }
775 
776 int
777 vtpci_setup_interrupts(struct vtpci_common *cn, enum intr_type type)
778 {
779 	device_t dev;
780 	int attempt, error;
781 
782 	dev = cn->vtpci_dev;
783 
784 	for (attempt = 0; attempt < 5; attempt++) {
785 		/*
786 		 * Start with the most desirable interrupt configuration and
787 		 * fallback towards less desirable ones.
788 		 */
789 		switch (attempt) {
790 		case 0:
791 			error = vtpci_alloc_intr_msix_pervq(cn);
792 			break;
793 		case 1:
794 			error = vtpci_alloc_intr_msix_shared(cn);
795 			break;
796 		case 2:
797 			error = vtpci_alloc_intr_msi(cn);
798 			break;
799 		case 3:
800 			error = vtpci_alloc_intr_intx(cn);
801 			break;
802 		default:
803 			device_printf(dev,
804 			    "exhausted all interrupt allocation attempts\n");
805 			return (ENXIO);
806 		}
807 
808 		if (error == 0 && vtpci_setup_intrs(cn, type) == 0)
809 			break;
810 
811 		vtpci_cleanup_setup_intr_attempt(cn);
812 	}
813 
814 	if (bootverbose) {
815 		if (cn->vtpci_flags & VTPCI_FLAG_INTX)
816 			device_printf(dev, "using legacy interrupt\n");
817 		else if (cn->vtpci_flags & VTPCI_FLAG_MSI)
818 			device_printf(dev, "using MSI interrupt\n");
819 		else if (cn->vtpci_flags & VTPCI_FLAG_SHARED_MSIX)
820 			device_printf(dev, "using shared MSIX interrupts\n");
821 		else
822 			device_printf(dev, "using per VQ MSIX interrupts\n");
823 	}
824 
825 	return (0);
826 }
827 
828 static int
829 vtpci_reinit_virtqueue(struct vtpci_common *cn, int idx)
830 {
831 	struct vtpci_virtqueue *vqx;
832 	struct virtqueue *vq;
833 	int error;
834 
835 	vqx = &cn->vtpci_vqs[idx];
836 	vq = vqx->vtv_vq;
837 
838 	KASSERT(vq != NULL, ("%s: vq %d not allocated", __func__, idx));
839 
840 	error = virtqueue_reinit(vq, vtpci_get_vq_size(cn, idx));
841 	if (error == 0)
842 		vtpci_set_vq(cn, vq);
843 
844 	return (error);
845 }
846 
847 static void
848 vtpci_intx_intr(void *xcn)
849 {
850 	struct vtpci_common *cn;
851 	struct vtpci_virtqueue *vqx;
852 	int i;
853 	uint8_t isr;
854 
855 	cn = xcn;
856 	isr = vtpci_read_isr(cn);
857 
858 	if (isr & VIRTIO_PCI_ISR_CONFIG)
859 		vtpci_config_intr(cn);
860 
861 	if (isr & VIRTIO_PCI_ISR_INTR) {
862 		vqx = &cn->vtpci_vqs[0];
863 		for (i = 0; i < cn->vtpci_nvqs; i++, vqx++) {
864 			if (vqx->vtv_no_intr == 0)
865 				virtqueue_intr(vqx->vtv_vq);
866 		}
867 	}
868 }
869 
870 static int
871 vtpci_vq_shared_intr_filter(void *xcn)
872 {
873 	struct vtpci_common *cn;
874 	struct vtpci_virtqueue *vqx;
875 	int i, rc;
876 
877 	cn = xcn;
878 	vqx = &cn->vtpci_vqs[0];
879 	rc = 0;
880 
881 	for (i = 0; i < cn->vtpci_nvqs; i++, vqx++) {
882 		if (vqx->vtv_no_intr == 0)
883 			rc |= virtqueue_intr_filter(vqx->vtv_vq);
884 	}
885 
886 	return (rc ? FILTER_SCHEDULE_THREAD : FILTER_STRAY);
887 }
888 
889 static void
890 vtpci_vq_shared_intr(void *xcn)
891 {
892 	struct vtpci_common *cn;
893 	struct vtpci_virtqueue *vqx;
894 	int i;
895 
896 	cn = xcn;
897 	vqx = &cn->vtpci_vqs[0];
898 
899 	for (i = 0; i < cn->vtpci_nvqs; i++, vqx++) {
900 		if (vqx->vtv_no_intr == 0)
901 			virtqueue_intr(vqx->vtv_vq);
902 	}
903 }
904 
905 static int
906 vtpci_vq_intr_filter(void *xvq)
907 {
908 	struct virtqueue *vq;
909 	int rc;
910 
911 	vq = xvq;
912 	rc = virtqueue_intr_filter(vq);
913 
914 	return (rc ? FILTER_SCHEDULE_THREAD : FILTER_STRAY);
915 }
916 
917 static void
918 vtpci_vq_intr(void *xvq)
919 {
920 	struct virtqueue *vq;
921 
922 	vq = xvq;
923 	virtqueue_intr(vq);
924 }
925 
926 static void
927 vtpci_config_intr(void *xcn)
928 {
929 	struct vtpci_common *cn;
930 	device_t child;
931 
932 	cn = xcn;
933 	child = cn->vtpci_child_dev;
934 
935 	if (child != NULL)
936 		VIRTIO_CONFIG_CHANGE(child);
937 }
938 
939 static int
940 vtpci_feature_sysctl(struct sysctl_req *req, struct vtpci_common *cn,
941     uint64_t features)
942 {
943 	struct sbuf *sb;
944 	int error;
945 
946 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
947 	if (sb == NULL)
948 		return (ENOMEM);
949 
950 	error = virtio_describe_sbuf(sb, features, cn->vtpci_child_feat_desc);
951 	sbuf_delete(sb);
952 
953 	return (error);
954 }
955 
956 static int
957 vtpci_host_features_sysctl(SYSCTL_HANDLER_ARGS)
958 {
959 	struct vtpci_common *cn;
960 
961 	cn = arg1;
962 
963 	return (vtpci_feature_sysctl(req, cn, cn->vtpci_host_features));
964 }
965 
966 static int
967 vtpci_negotiated_features_sysctl(SYSCTL_HANDLER_ARGS)
968 {
969 	struct vtpci_common *cn;
970 
971 	cn = arg1;
972 
973 	return (vtpci_feature_sysctl(req, cn, cn->vtpci_features));
974 }
975 
976 static void
977 vtpci_setup_sysctl(struct vtpci_common *cn)
978 {
979 	device_t dev;
980 	struct sysctl_ctx_list *ctx;
981 	struct sysctl_oid *tree;
982 	struct sysctl_oid_list *child;
983 
984 	dev = cn->vtpci_dev;
985 	ctx = device_get_sysctl_ctx(dev);
986 	tree = device_get_sysctl_tree(dev);
987 	child = SYSCTL_CHILDREN(tree);
988 
989 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "nvqs",
990 	    CTLFLAG_RD, &cn->vtpci_nvqs, 0, "Number of virtqueues");
991 
992 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "host_features",
993 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, cn, 0,
994 	    vtpci_host_features_sysctl, "A", "Features supported by the host");
995 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "negotiated_features",
996 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, cn, 0,
997 	    vtpci_negotiated_features_sysctl, "A", "Features negotiated");
998 }
999