xref: /freebsd/sys/dev/virtio/pci/virtio_pci.c (revision 22cf89c938886d14f5796fc49f9f020c23ea8eaf)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2017, Bryan Venteicher <bryanv@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/bus.h>
33 #include <sys/kernel.h>
34 #include <sys/sbuf.h>
35 #include <sys/sysctl.h>
36 #include <sys/module.h>
37 #include <sys/malloc.h>
38 
39 #include <machine/bus.h>
40 #include <machine/resource.h>
41 #include <sys/bus.h>
42 #include <sys/rman.h>
43 
44 #include <dev/pci/pcivar.h>
45 #include <dev/pci/pcireg.h>
46 
47 #include <dev/virtio/virtio.h>
48 #include <dev/virtio/virtqueue.h>
49 #include <dev/virtio/pci/virtio_pci.h>
50 #include <dev/virtio/pci/virtio_pci_var.h>
51 
52 #include "virtio_pci_if.h"
53 #include "virtio_if.h"
54 
55 static void	vtpci_describe_features(struct vtpci_common *, const char *,
56 		    uint64_t);
57 static int	vtpci_alloc_msix(struct vtpci_common *, int);
58 static int	vtpci_alloc_msi(struct vtpci_common *);
59 static int	vtpci_alloc_intr_msix_pervq(struct vtpci_common *);
60 static int	vtpci_alloc_intr_msix_shared(struct vtpci_common *);
61 static int	vtpci_alloc_intr_msi(struct vtpci_common *);
62 static int	vtpci_alloc_intr_intx(struct vtpci_common *);
63 static int	vtpci_alloc_interrupt(struct vtpci_common *, int, int,
64 		    struct vtpci_interrupt *);
65 static void	vtpci_free_interrupt(struct vtpci_common *,
66 		    struct vtpci_interrupt *);
67 
68 static void	vtpci_free_interrupts(struct vtpci_common *);
69 static void	vtpci_free_virtqueues(struct vtpci_common *);
70 static void	vtpci_cleanup_setup_intr_attempt(struct vtpci_common *);
71 static int	vtpci_alloc_intr_resources(struct vtpci_common *);
72 static int	vtpci_setup_intx_interrupt(struct vtpci_common *,
73 		    enum intr_type);
74 static int	vtpci_setup_pervq_msix_interrupts(struct vtpci_common *,
75 		    enum intr_type);
76 static int	vtpci_set_host_msix_vectors(struct vtpci_common *);
77 static int	vtpci_setup_msix_interrupts(struct vtpci_common *,
78 		    enum intr_type);
79 static int	vtpci_setup_intrs(struct vtpci_common *, enum intr_type);
80 static int	vtpci_reinit_virtqueue(struct vtpci_common *, int);
81 static void	vtpci_intx_intr(void *);
82 static int	vtpci_vq_shared_intr_filter(void *);
83 static void	vtpci_vq_shared_intr(void *);
84 static int	vtpci_vq_intr_filter(void *);
85 static void	vtpci_vq_intr(void *);
86 static void	vtpci_config_intr(void *);
87 
88 static void	vtpci_setup_sysctl(struct vtpci_common *);
89 
90 #define vtpci_setup_msi_interrupt vtpci_setup_intx_interrupt
91 
92 /*
93  * This module contains two drivers:
94  *   - virtio_pci_legacy for pre-V1 support
95  *   - virtio_pci_modern for V1 support
96  */
97 MODULE_VERSION(virtio_pci, 1);
98 MODULE_DEPEND(virtio_pci, pci, 1, 1, 1);
99 MODULE_DEPEND(virtio_pci, virtio, 1, 1, 1);
100 
101 int vtpci_disable_msix = 0;
102 TUNABLE_INT("hw.virtio.pci.disable_msix", &vtpci_disable_msix);
103 
104 static uint8_t
105 vtpci_read_isr(struct vtpci_common *cn)
106 {
107 	return (VIRTIO_PCI_READ_ISR(cn->vtpci_dev));
108 }
109 
110 static uint16_t
111 vtpci_get_vq_size(struct vtpci_common *cn, int idx)
112 {
113 	return (VIRTIO_PCI_GET_VQ_SIZE(cn->vtpci_dev, idx));
114 }
115 
116 static bus_size_t
117 vtpci_get_vq_notify_off(struct vtpci_common *cn, int idx)
118 {
119 	return (VIRTIO_PCI_GET_VQ_NOTIFY_OFF(cn->vtpci_dev, idx));
120 }
121 
122 static void
123 vtpci_set_vq(struct vtpci_common *cn, struct virtqueue *vq)
124 {
125 	VIRTIO_PCI_SET_VQ(cn->vtpci_dev, vq);
126 }
127 
128 static void
129 vtpci_disable_vq(struct vtpci_common *cn, int idx)
130 {
131 	VIRTIO_PCI_DISABLE_VQ(cn->vtpci_dev, idx);
132 }
133 
134 static int
135 vtpci_register_cfg_msix(struct vtpci_common *cn, struct vtpci_interrupt *intr)
136 {
137 	return (VIRTIO_PCI_REGISTER_CFG_MSIX(cn->vtpci_dev, intr));
138 }
139 
140 static int
141 vtpci_register_vq_msix(struct vtpci_common *cn, int idx,
142     struct vtpci_interrupt *intr)
143 {
144 	return (VIRTIO_PCI_REGISTER_VQ_MSIX(cn->vtpci_dev, idx, intr));
145 }
146 
147 void
148 vtpci_init(struct vtpci_common *cn, device_t dev, bool modern)
149 {
150 
151 	cn->vtpci_dev = dev;
152 
153 	pci_enable_busmaster(dev);
154 
155 	if (modern)
156 		cn->vtpci_flags |= VTPCI_FLAG_MODERN;
157 	if (pci_find_cap(dev, PCIY_MSI, NULL) != 0)
158 		cn->vtpci_flags |= VTPCI_FLAG_NO_MSI;
159 	if (pci_find_cap(dev, PCIY_MSIX, NULL) != 0)
160 		cn->vtpci_flags |= VTPCI_FLAG_NO_MSIX;
161 
162 	vtpci_setup_sysctl(cn);
163 }
164 
165 int
166 vtpci_add_child(struct vtpci_common *cn)
167 {
168 	device_t dev, child;
169 
170 	dev = cn->vtpci_dev;
171 
172 	child = device_add_child(dev, NULL, -1);
173 	if (child == NULL) {
174 		device_printf(dev, "cannot create child device\n");
175 		return (ENOMEM);
176 	}
177 
178 	cn->vtpci_child_dev = child;
179 
180 	return (0);
181 }
182 
183 int
184 vtpci_delete_child(struct vtpci_common *cn)
185 {
186 	device_t dev, child;
187 	int error;
188 
189 	dev = cn->vtpci_dev;
190 
191 	child = cn->vtpci_child_dev;
192 	if (child != NULL) {
193 		error = device_delete_child(dev, child);
194 		if (error)
195 			return (error);
196 		cn->vtpci_child_dev = NULL;
197 	}
198 
199 	return (0);
200 }
201 
202 void
203 vtpci_child_detached(struct vtpci_common *cn)
204 {
205 
206 	vtpci_release_child_resources(cn);
207 
208 	cn->vtpci_child_feat_desc = NULL;
209 	cn->vtpci_host_features = 0;
210 	cn->vtpci_features = 0;
211 }
212 
213 int
214 vtpci_reinit(struct vtpci_common *cn)
215 {
216 	int idx, error;
217 
218 	for (idx = 0; idx < cn->vtpci_nvqs; idx++) {
219 		error = vtpci_reinit_virtqueue(cn, idx);
220 		if (error)
221 			return (error);
222 	}
223 
224 	if (vtpci_is_msix_enabled(cn)) {
225 		error = vtpci_set_host_msix_vectors(cn);
226 		if (error)
227 			return (error);
228 	}
229 
230 	return (0);
231 }
232 
233 static void
234 vtpci_describe_features(struct vtpci_common *cn, const char *msg,
235     uint64_t features)
236 {
237 	device_t dev, child;
238 
239 	dev = cn->vtpci_dev;
240 	child = cn->vtpci_child_dev;
241 
242 	if (device_is_attached(child) || bootverbose == 0)
243 		return;
244 
245 	virtio_describe(dev, msg, features, cn->vtpci_child_feat_desc);
246 }
247 
248 uint64_t
249 vtpci_negotiate_features(struct vtpci_common *cn,
250     uint64_t child_features, uint64_t host_features)
251 {
252 	uint64_t features;
253 
254 	cn->vtpci_host_features = host_features;
255 	vtpci_describe_features(cn, "host", host_features);
256 
257 	/*
258 	 * Limit negotiated features to what the driver, virtqueue, and
259 	 * host all support.
260 	 */
261 	features = host_features & child_features;
262 	features = virtio_filter_transport_features(features);
263 
264 	cn->vtpci_features = features;
265 	vtpci_describe_features(cn, "negotiated", features);
266 
267 	return (features);
268 }
269 
270 int
271 vtpci_with_feature(struct vtpci_common *cn, uint64_t feature)
272 {
273 	return ((cn->vtpci_features & feature) != 0);
274 }
275 
276 int
277 vtpci_read_ivar(struct vtpci_common *cn, int index, uintptr_t *result)
278 {
279 	device_t dev;
280 	int error;
281 
282 	dev = cn->vtpci_dev;
283 	error = 0;
284 
285 	switch (index) {
286 	case VIRTIO_IVAR_SUBDEVICE:
287 		*result = pci_get_subdevice(dev);
288 		break;
289 	case VIRTIO_IVAR_VENDOR:
290 		*result = pci_get_vendor(dev);
291 		break;
292 	case VIRTIO_IVAR_DEVICE:
293 		*result = pci_get_device(dev);
294 		break;
295 	case VIRTIO_IVAR_SUBVENDOR:
296 		*result = pci_get_subvendor(dev);
297 		break;
298 	case VIRTIO_IVAR_MODERN:
299 		*result = vtpci_is_modern(cn);
300 		break;
301 	default:
302 		error = ENOENT;
303 	}
304 
305 	return (error);
306 }
307 
308 int
309 vtpci_write_ivar(struct vtpci_common *cn, int index, uintptr_t value)
310 {
311 	int error;
312 
313 	error = 0;
314 
315 	switch (index) {
316 	case VIRTIO_IVAR_FEATURE_DESC:
317 		cn->vtpci_child_feat_desc = (void *) value;
318 		break;
319 	default:
320 		error = ENOENT;
321 	}
322 
323 	return (error);
324 }
325 
326 int
327 vtpci_alloc_virtqueues(struct vtpci_common *cn, int flags, int nvqs,
328     struct vq_alloc_info *vq_info)
329 {
330 	device_t dev;
331 	int idx, align, error;
332 
333 	dev = cn->vtpci_dev;
334 
335 	/*
336 	 * This is VIRTIO_PCI_VRING_ALIGN from legacy VirtIO. In modern VirtIO,
337 	 * the tables do not have to be allocated contiguously, but we do so
338 	 * anyways.
339 	 */
340 	align = 4096;
341 
342 	if (cn->vtpci_nvqs != 0)
343 		return (EALREADY);
344 	if (nvqs <= 0)
345 		return (EINVAL);
346 
347 	cn->vtpci_vqs = malloc(nvqs * sizeof(struct vtpci_virtqueue),
348 	    M_DEVBUF, M_NOWAIT | M_ZERO);
349 	if (cn->vtpci_vqs == NULL)
350 		return (ENOMEM);
351 
352 	for (idx = 0; idx < nvqs; idx++) {
353 		struct vtpci_virtqueue *vqx;
354 		struct vq_alloc_info *info;
355 		struct virtqueue *vq;
356 		bus_size_t notify_offset;
357 		uint16_t size;
358 
359 		vqx = &cn->vtpci_vqs[idx];
360 		info = &vq_info[idx];
361 
362 		size = vtpci_get_vq_size(cn, idx);
363 		notify_offset = vtpci_get_vq_notify_off(cn, idx);
364 
365 		error = virtqueue_alloc(dev, idx, size, notify_offset, align,
366 		    ~(vm_paddr_t)0, info, &vq);
367 		if (error) {
368 			device_printf(dev,
369 			    "cannot allocate virtqueue %d: %d\n", idx, error);
370 			break;
371 		}
372 
373 		vtpci_set_vq(cn, vq);
374 
375 		vqx->vtv_vq = *info->vqai_vq = vq;
376 		vqx->vtv_no_intr = info->vqai_intr == NULL;
377 
378 		cn->vtpci_nvqs++;
379 	}
380 
381 	if (error)
382 		vtpci_free_virtqueues(cn);
383 
384 	return (error);
385 }
386 
387 static int
388 vtpci_alloc_msix(struct vtpci_common *cn, int nvectors)
389 {
390 	device_t dev;
391 	int nmsix, cnt, required;
392 
393 	dev = cn->vtpci_dev;
394 
395 	/* Allocate an additional vector for the config changes. */
396 	required = nvectors + 1;
397 
398 	nmsix = pci_msix_count(dev);
399 	if (nmsix < required)
400 		return (1);
401 
402 	cnt = required;
403 	if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) {
404 		cn->vtpci_nmsix_resources = required;
405 		return (0);
406 	}
407 
408 	pci_release_msi(dev);
409 
410 	return (1);
411 }
412 
413 static int
414 vtpci_alloc_msi(struct vtpci_common *cn)
415 {
416 	device_t dev;
417 	int nmsi, cnt, required;
418 
419 	dev = cn->vtpci_dev;
420 	required = 1;
421 
422 	nmsi = pci_msi_count(dev);
423 	if (nmsi < required)
424 		return (1);
425 
426 	cnt = required;
427 	if (pci_alloc_msi(dev, &cnt) == 0 && cnt >= required)
428 		return (0);
429 
430 	pci_release_msi(dev);
431 
432 	return (1);
433 }
434 
435 static int
436 vtpci_alloc_intr_msix_pervq(struct vtpci_common *cn)
437 {
438 	int i, nvectors, error;
439 
440 	if (vtpci_disable_msix != 0 || cn->vtpci_flags & VTPCI_FLAG_NO_MSIX)
441 		return (ENOTSUP);
442 
443 	for (nvectors = 0, i = 0; i < cn->vtpci_nvqs; i++) {
444 		if (cn->vtpci_vqs[i].vtv_no_intr == 0)
445 			nvectors++;
446 	}
447 
448 	error = vtpci_alloc_msix(cn, nvectors);
449 	if (error)
450 		return (error);
451 
452 	cn->vtpci_flags |= VTPCI_FLAG_MSIX;
453 
454 	return (0);
455 }
456 
457 static int
458 vtpci_alloc_intr_msix_shared(struct vtpci_common *cn)
459 {
460 	int error;
461 
462 	if (vtpci_disable_msix != 0 || cn->vtpci_flags & VTPCI_FLAG_NO_MSIX)
463 		return (ENOTSUP);
464 
465 	error = vtpci_alloc_msix(cn, 1);
466 	if (error)
467 		return (error);
468 
469 	cn->vtpci_flags |= VTPCI_FLAG_MSIX | VTPCI_FLAG_SHARED_MSIX;
470 
471 	return (0);
472 }
473 
474 static int
475 vtpci_alloc_intr_msi(struct vtpci_common *cn)
476 {
477 	int error;
478 
479 	/* Only BHyVe supports MSI. */
480 	if (cn->vtpci_flags & VTPCI_FLAG_NO_MSI)
481 		return (ENOTSUP);
482 
483 	error = vtpci_alloc_msi(cn);
484 	if (error)
485 		return (error);
486 
487 	cn->vtpci_flags |= VTPCI_FLAG_MSI;
488 
489 	return (0);
490 }
491 
492 static int
493 vtpci_alloc_intr_intx(struct vtpci_common *cn)
494 {
495 
496 	cn->vtpci_flags |= VTPCI_FLAG_INTX;
497 
498 	return (0);
499 }
500 
501 static int
502 vtpci_alloc_interrupt(struct vtpci_common *cn, int rid, int flags,
503     struct vtpci_interrupt *intr)
504 {
505 	struct resource *irq;
506 
507 	irq = bus_alloc_resource_any(cn->vtpci_dev, SYS_RES_IRQ, &rid, flags);
508 	if (irq == NULL)
509 		return (ENXIO);
510 
511 	intr->vti_irq = irq;
512 	intr->vti_rid = rid;
513 
514 	return (0);
515 }
516 
517 static void
518 vtpci_free_interrupt(struct vtpci_common *cn, struct vtpci_interrupt *intr)
519 {
520 	device_t dev;
521 
522 	dev = cn->vtpci_dev;
523 
524 	if (intr->vti_handler != NULL) {
525 		bus_teardown_intr(dev, intr->vti_irq, intr->vti_handler);
526 		intr->vti_handler = NULL;
527 	}
528 
529 	if (intr->vti_irq != NULL) {
530 		bus_release_resource(dev, SYS_RES_IRQ, intr->vti_rid,
531 		    intr->vti_irq);
532 		intr->vti_irq = NULL;
533 		intr->vti_rid = -1;
534 	}
535 }
536 
537 static void
538 vtpci_free_interrupts(struct vtpci_common *cn)
539 {
540 	struct vtpci_interrupt *intr;
541 	int i, nvq_intrs;
542 
543 	vtpci_free_interrupt(cn, &cn->vtpci_device_interrupt);
544 
545 	if (cn->vtpci_nmsix_resources != 0) {
546 		nvq_intrs = cn->vtpci_nmsix_resources - 1;
547 		cn->vtpci_nmsix_resources = 0;
548 
549 		if ((intr = cn->vtpci_msix_vq_interrupts) != NULL) {
550 			for (i = 0; i < nvq_intrs; i++, intr++)
551 				vtpci_free_interrupt(cn, intr);
552 
553 			free(cn->vtpci_msix_vq_interrupts, M_DEVBUF);
554 			cn->vtpci_msix_vq_interrupts = NULL;
555 		}
556 	}
557 
558 	if (cn->vtpci_flags & (VTPCI_FLAG_MSI | VTPCI_FLAG_MSIX))
559 		pci_release_msi(cn->vtpci_dev);
560 
561 	cn->vtpci_flags &= ~VTPCI_FLAG_ITYPE_MASK;
562 }
563 
564 static void
565 vtpci_free_virtqueues(struct vtpci_common *cn)
566 {
567 	struct vtpci_virtqueue *vqx;
568 	int idx;
569 
570 	for (idx = 0; idx < cn->vtpci_nvqs; idx++) {
571 		vtpci_disable_vq(cn, idx);
572 
573 		vqx = &cn->vtpci_vqs[idx];
574 		virtqueue_free(vqx->vtv_vq);
575 		vqx->vtv_vq = NULL;
576 	}
577 
578 	free(cn->vtpci_vqs, M_DEVBUF);
579 	cn->vtpci_vqs = NULL;
580 	cn->vtpci_nvqs = 0;
581 }
582 
583 void
584 vtpci_release_child_resources(struct vtpci_common *cn)
585 {
586 
587 	vtpci_free_interrupts(cn);
588 	vtpci_free_virtqueues(cn);
589 }
590 
591 static void
592 vtpci_cleanup_setup_intr_attempt(struct vtpci_common *cn)
593 {
594 	int idx;
595 
596 	if (cn->vtpci_flags & VTPCI_FLAG_MSIX) {
597 		vtpci_register_cfg_msix(cn, NULL);
598 
599 		for (idx = 0; idx < cn->vtpci_nvqs; idx++)
600 			vtpci_register_vq_msix(cn, idx, NULL);
601 	}
602 
603 	vtpci_free_interrupts(cn);
604 }
605 
606 static int
607 vtpci_alloc_intr_resources(struct vtpci_common *cn)
608 {
609 	struct vtpci_interrupt *intr;
610 	int i, rid, flags, nvq_intrs, error;
611 
612 	flags = RF_ACTIVE;
613 
614 	if (cn->vtpci_flags & VTPCI_FLAG_INTX) {
615 		rid = 0;
616 		flags |= RF_SHAREABLE;
617 	} else
618 		rid = 1;
619 
620 	/*
621 	 * When using INTX or MSI interrupts, this resource handles all
622 	 * interrupts. When using MSIX, this resource handles just the
623 	 * configuration changed interrupt.
624 	 */
625 	intr = &cn->vtpci_device_interrupt;
626 
627 	error = vtpci_alloc_interrupt(cn, rid, flags, intr);
628 	if (error || cn->vtpci_flags & (VTPCI_FLAG_INTX | VTPCI_FLAG_MSI))
629 		return (error);
630 
631 	/*
632 	 * Now allocate the interrupts for the virtqueues. This may be one
633 	 * for all the virtqueues, or one for each virtqueue. Subtract one
634 	 * below for because of the configuration changed interrupt.
635 	 */
636 	nvq_intrs = cn->vtpci_nmsix_resources - 1;
637 
638 	cn->vtpci_msix_vq_interrupts = malloc(nvq_intrs *
639 	    sizeof(struct vtpci_interrupt), M_DEVBUF, M_NOWAIT | M_ZERO);
640 	if (cn->vtpci_msix_vq_interrupts == NULL)
641 		return (ENOMEM);
642 
643 	intr = cn->vtpci_msix_vq_interrupts;
644 
645 	for (i = 0, rid++; i < nvq_intrs; i++, rid++, intr++) {
646 		error = vtpci_alloc_interrupt(cn, rid, flags, intr);
647 		if (error)
648 			return (error);
649 	}
650 
651 	return (0);
652 }
653 
654 static int
655 vtpci_setup_intx_interrupt(struct vtpci_common *cn, enum intr_type type)
656 {
657 	struct vtpci_interrupt *intr;
658 	int error;
659 
660 	intr = &cn->vtpci_device_interrupt;
661 
662 	error = bus_setup_intr(cn->vtpci_dev, intr->vti_irq, type, NULL,
663 	    vtpci_intx_intr, cn, &intr->vti_handler);
664 
665 	return (error);
666 }
667 
668 static int
669 vtpci_setup_pervq_msix_interrupts(struct vtpci_common *cn, enum intr_type type)
670 {
671 	struct vtpci_virtqueue *vqx;
672 	struct vtpci_interrupt *intr;
673 	int i, error;
674 
675 	intr = cn->vtpci_msix_vq_interrupts;
676 
677 	for (i = 0; i < cn->vtpci_nvqs; i++) {
678 		vqx = &cn->vtpci_vqs[i];
679 
680 		if (vqx->vtv_no_intr)
681 			continue;
682 
683 		error = bus_setup_intr(cn->vtpci_dev, intr->vti_irq, type,
684 		    vtpci_vq_intr_filter, vtpci_vq_intr, vqx->vtv_vq,
685 		    &intr->vti_handler);
686 		if (error)
687 			return (error);
688 
689 		intr++;
690 	}
691 
692 	return (0);
693 }
694 
695 static int
696 vtpci_set_host_msix_vectors(struct vtpci_common *cn)
697 {
698 	struct vtpci_interrupt *intr, *tintr;
699 	int idx, error;
700 
701 	intr = &cn->vtpci_device_interrupt;
702 	error = vtpci_register_cfg_msix(cn, intr);
703 	if (error)
704 		return (error);
705 
706 	intr = cn->vtpci_msix_vq_interrupts;
707 	for (idx = 0; idx < cn->vtpci_nvqs; idx++) {
708 		if (cn->vtpci_vqs[idx].vtv_no_intr)
709 			tintr = NULL;
710 		else
711 			tintr = intr;
712 
713 		error = vtpci_register_vq_msix(cn, idx, tintr);
714 		if (error)
715 			break;
716 
717 		/*
718 		 * For shared MSIX, all the virtqueues share the first
719 		 * interrupt.
720 		 */
721 		if (!cn->vtpci_vqs[idx].vtv_no_intr &&
722 		    (cn->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) == 0)
723 			intr++;
724 	}
725 
726 	return (error);
727 }
728 
729 static int
730 vtpci_setup_msix_interrupts(struct vtpci_common *cn, enum intr_type type)
731 {
732 	struct vtpci_interrupt *intr;
733 	int error;
734 
735 	intr = &cn->vtpci_device_interrupt;
736 
737 	error = bus_setup_intr(cn->vtpci_dev, intr->vti_irq, type, NULL,
738 	    vtpci_config_intr, cn, &intr->vti_handler);
739 	if (error)
740 		return (error);
741 
742 	if (cn->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) {
743 		intr = &cn->vtpci_msix_vq_interrupts[0];
744 
745 		error = bus_setup_intr(cn->vtpci_dev, intr->vti_irq, type,
746 		    vtpci_vq_shared_intr_filter, vtpci_vq_shared_intr, cn,
747 		    &intr->vti_handler);
748 	} else
749 		error = vtpci_setup_pervq_msix_interrupts(cn, type);
750 
751 	return (error ? error : vtpci_set_host_msix_vectors(cn));
752 }
753 
754 static int
755 vtpci_setup_intrs(struct vtpci_common *cn, enum intr_type type)
756 {
757 	int error;
758 
759 	type |= INTR_MPSAFE;
760 	KASSERT(cn->vtpci_flags & VTPCI_FLAG_ITYPE_MASK,
761 	    ("%s: no interrupt type selected %#x", __func__, cn->vtpci_flags));
762 
763 	error = vtpci_alloc_intr_resources(cn);
764 	if (error)
765 		return (error);
766 
767 	if (cn->vtpci_flags & VTPCI_FLAG_INTX)
768 		error = vtpci_setup_intx_interrupt(cn, type);
769 	else if (cn->vtpci_flags & VTPCI_FLAG_MSI)
770 		error = vtpci_setup_msi_interrupt(cn, type);
771 	else
772 		error = vtpci_setup_msix_interrupts(cn, type);
773 
774 	return (error);
775 }
776 
777 int
778 vtpci_setup_interrupts(struct vtpci_common *cn, enum intr_type type)
779 {
780 	device_t dev;
781 	int attempt, error;
782 
783 	dev = cn->vtpci_dev;
784 
785 	for (attempt = 0; attempt < 5; attempt++) {
786 		/*
787 		 * Start with the most desirable interrupt configuration and
788 		 * fallback towards less desirable ones.
789 		 */
790 		switch (attempt) {
791 		case 0:
792 			error = vtpci_alloc_intr_msix_pervq(cn);
793 			break;
794 		case 1:
795 			error = vtpci_alloc_intr_msix_shared(cn);
796 			break;
797 		case 2:
798 			error = vtpci_alloc_intr_msi(cn);
799 			break;
800 		case 3:
801 			error = vtpci_alloc_intr_intx(cn);
802 			break;
803 		default:
804 			device_printf(dev,
805 			    "exhausted all interrupt allocation attempts\n");
806 			return (ENXIO);
807 		}
808 
809 		if (error == 0 && vtpci_setup_intrs(cn, type) == 0)
810 			break;
811 
812 		vtpci_cleanup_setup_intr_attempt(cn);
813 	}
814 
815 	if (bootverbose) {
816 		if (cn->vtpci_flags & VTPCI_FLAG_INTX)
817 			device_printf(dev, "using legacy interrupt\n");
818 		else if (cn->vtpci_flags & VTPCI_FLAG_MSI)
819 			device_printf(dev, "using MSI interrupt\n");
820 		else if (cn->vtpci_flags & VTPCI_FLAG_SHARED_MSIX)
821 			device_printf(dev, "using shared MSIX interrupts\n");
822 		else
823 			device_printf(dev, "using per VQ MSIX interrupts\n");
824 	}
825 
826 	return (0);
827 }
828 
829 static int
830 vtpci_reinit_virtqueue(struct vtpci_common *cn, int idx)
831 {
832 	struct vtpci_virtqueue *vqx;
833 	struct virtqueue *vq;
834 	int error;
835 
836 	vqx = &cn->vtpci_vqs[idx];
837 	vq = vqx->vtv_vq;
838 
839 	KASSERT(vq != NULL, ("%s: vq %d not allocated", __func__, idx));
840 
841 	error = virtqueue_reinit(vq, vtpci_get_vq_size(cn, idx));
842 	if (error == 0)
843 		vtpci_set_vq(cn, vq);
844 
845 	return (error);
846 }
847 
848 static void
849 vtpci_intx_intr(void *xcn)
850 {
851 	struct vtpci_common *cn;
852 	struct vtpci_virtqueue *vqx;
853 	int i;
854 	uint8_t isr;
855 
856 	cn = xcn;
857 	isr = vtpci_read_isr(cn);
858 
859 	if (isr & VIRTIO_PCI_ISR_CONFIG)
860 		vtpci_config_intr(cn);
861 
862 	if (isr & VIRTIO_PCI_ISR_INTR) {
863 		vqx = &cn->vtpci_vqs[0];
864 		for (i = 0; i < cn->vtpci_nvqs; i++, vqx++) {
865 			if (vqx->vtv_no_intr == 0)
866 				virtqueue_intr(vqx->vtv_vq);
867 		}
868 	}
869 }
870 
871 static int
872 vtpci_vq_shared_intr_filter(void *xcn)
873 {
874 	struct vtpci_common *cn;
875 	struct vtpci_virtqueue *vqx;
876 	int i, rc;
877 
878 	cn = xcn;
879 	vqx = &cn->vtpci_vqs[0];
880 	rc = 0;
881 
882 	for (i = 0; i < cn->vtpci_nvqs; i++, vqx++) {
883 		if (vqx->vtv_no_intr == 0)
884 			rc |= virtqueue_intr_filter(vqx->vtv_vq);
885 	}
886 
887 	return (rc ? FILTER_SCHEDULE_THREAD : FILTER_STRAY);
888 }
889 
890 static void
891 vtpci_vq_shared_intr(void *xcn)
892 {
893 	struct vtpci_common *cn;
894 	struct vtpci_virtqueue *vqx;
895 	int i;
896 
897 	cn = xcn;
898 	vqx = &cn->vtpci_vqs[0];
899 
900 	for (i = 0; i < cn->vtpci_nvqs; i++, vqx++) {
901 		if (vqx->vtv_no_intr == 0)
902 			virtqueue_intr(vqx->vtv_vq);
903 	}
904 }
905 
906 static int
907 vtpci_vq_intr_filter(void *xvq)
908 {
909 	struct virtqueue *vq;
910 	int rc;
911 
912 	vq = xvq;
913 	rc = virtqueue_intr_filter(vq);
914 
915 	return (rc ? FILTER_SCHEDULE_THREAD : FILTER_STRAY);
916 }
917 
918 static void
919 vtpci_vq_intr(void *xvq)
920 {
921 	struct virtqueue *vq;
922 
923 	vq = xvq;
924 	virtqueue_intr(vq);
925 }
926 
927 static void
928 vtpci_config_intr(void *xcn)
929 {
930 	struct vtpci_common *cn;
931 	device_t child;
932 
933 	cn = xcn;
934 	child = cn->vtpci_child_dev;
935 
936 	if (child != NULL)
937 		VIRTIO_CONFIG_CHANGE(child);
938 }
939 
940 static int
941 vtpci_feature_sysctl(struct sysctl_req *req, struct vtpci_common *cn,
942     uint64_t features)
943 {
944 	struct sbuf *sb;
945 	int error;
946 
947 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
948 	if (sb == NULL)
949 		return (ENOMEM);
950 
951 	error = virtio_describe_sbuf(sb, features, cn->vtpci_child_feat_desc);
952 	sbuf_delete(sb);
953 
954 	return (error);
955 }
956 
957 static int
958 vtpci_host_features_sysctl(SYSCTL_HANDLER_ARGS)
959 {
960 	struct vtpci_common *cn;
961 
962 	cn = arg1;
963 
964 	return (vtpci_feature_sysctl(req, cn, cn->vtpci_host_features));
965 }
966 
967 static int
968 vtpci_negotiated_features_sysctl(SYSCTL_HANDLER_ARGS)
969 {
970 	struct vtpci_common *cn;
971 
972 	cn = arg1;
973 
974 	return (vtpci_feature_sysctl(req, cn, cn->vtpci_features));
975 }
976 
977 static void
978 vtpci_setup_sysctl(struct vtpci_common *cn)
979 {
980 	device_t dev;
981 	struct sysctl_ctx_list *ctx;
982 	struct sysctl_oid *tree;
983 	struct sysctl_oid_list *child;
984 
985 	dev = cn->vtpci_dev;
986 	ctx = device_get_sysctl_ctx(dev);
987 	tree = device_get_sysctl_tree(dev);
988 	child = SYSCTL_CHILDREN(tree);
989 
990 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "nvqs",
991 	    CTLFLAG_RD, &cn->vtpci_nvqs, 0, "Number of virtqueues");
992 
993 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "host_features",
994 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, cn, 0,
995 	    vtpci_host_features_sysctl, "A", "Features supported by the host");
996 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "negotiated_features",
997 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, cn, 0,
998 	    vtpci_negotiated_features_sysctl, "A", "Features negotiated");
999 }
1000