xref: /freebsd/sys/dev/virtio/pci/virtio_pci.c (revision bc5304a006238115291e7568583632889dffbab9)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2017, Bryan Venteicher <bryanv@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/bus.h>
35 #include <sys/kernel.h>
36 #include <sys/sbuf.h>
37 #include <sys/sysctl.h>
38 #include <sys/module.h>
39 #include <sys/malloc.h>
40 
41 #include <machine/bus.h>
42 #include <machine/resource.h>
43 #include <sys/bus.h>
44 #include <sys/rman.h>
45 
46 #include <dev/pci/pcivar.h>
47 #include <dev/pci/pcireg.h>
48 
49 #include <dev/virtio/virtio.h>
50 #include <dev/virtio/virtqueue.h>
51 #include <dev/virtio/pci/virtio_pci.h>
52 #include <dev/virtio/pci/virtio_pci_var.h>
53 
54 #include "virtio_pci_if.h"
55 #include "virtio_if.h"
56 
57 static void	vtpci_describe_features(struct vtpci_common *, const char *,
58 		    uint64_t);
59 static int	vtpci_alloc_msix(struct vtpci_common *, int);
60 static int	vtpci_alloc_msi(struct vtpci_common *);
61 static int	vtpci_alloc_intr_msix_pervq(struct vtpci_common *);
62 static int	vtpci_alloc_intr_msix_shared(struct vtpci_common *);
63 static int	vtpci_alloc_intr_msi(struct vtpci_common *);
64 static int	vtpci_alloc_intr_intx(struct vtpci_common *);
65 static int	vtpci_alloc_interrupt(struct vtpci_common *, int, int,
66 		    struct vtpci_interrupt *);
67 static void	vtpci_free_interrupt(struct vtpci_common *,
68 		    struct vtpci_interrupt *);
69 
70 static void	vtpci_free_interrupts(struct vtpci_common *);
71 static void	vtpci_free_virtqueues(struct vtpci_common *);
72 static void	vtpci_cleanup_setup_intr_attempt(struct vtpci_common *);
73 static int	vtpci_alloc_intr_resources(struct vtpci_common *);
74 static int	vtpci_setup_intx_interrupt(struct vtpci_common *,
75 		    enum intr_type);
76 static int	vtpci_setup_pervq_msix_interrupts(struct vtpci_common *,
77 		    enum intr_type);
78 static int	vtpci_set_host_msix_vectors(struct vtpci_common *);
79 static int	vtpci_setup_msix_interrupts(struct vtpci_common *,
80 		    enum intr_type);
81 static int	vtpci_setup_intrs(struct vtpci_common *, enum intr_type);
82 static int	vtpci_reinit_virtqueue(struct vtpci_common *, int);
83 static void	vtpci_intx_intr(void *);
84 static int	vtpci_vq_shared_intr_filter(void *);
85 static void	vtpci_vq_shared_intr(void *);
86 static int	vtpci_vq_intr_filter(void *);
87 static void	vtpci_vq_intr(void *);
88 static void	vtpci_config_intr(void *);
89 
90 static void	vtpci_setup_sysctl(struct vtpci_common *);
91 
92 #define vtpci_setup_msi_interrupt vtpci_setup_intx_interrupt
93 
94 /*
95  * This module contains two drivers:
96  *   - virtio_pci_legacy for pre-V1 support
97  *   - virtio_pci_modern for V1 support
98  */
99 MODULE_VERSION(virtio_pci, 1);
100 MODULE_DEPEND(virtio_pci, pci, 1, 1, 1);
101 MODULE_DEPEND(virtio_pci, virtio, 1, 1, 1);
102 
103 int vtpci_disable_msix = 0;
104 TUNABLE_INT("hw.virtio.pci.disable_msix", &vtpci_disable_msix);
105 
106 static uint8_t
107 vtpci_read_isr(struct vtpci_common *cn)
108 {
109 	return (VIRTIO_PCI_READ_ISR(cn->vtpci_dev));
110 }
111 
112 static uint16_t
113 vtpci_get_vq_size(struct vtpci_common *cn, int idx)
114 {
115 	return (VIRTIO_PCI_GET_VQ_SIZE(cn->vtpci_dev, idx));
116 }
117 
118 static bus_size_t
119 vtpci_get_vq_notify_off(struct vtpci_common *cn, int idx)
120 {
121 	return (VIRTIO_PCI_GET_VQ_NOTIFY_OFF(cn->vtpci_dev, idx));
122 }
123 
124 static void
125 vtpci_set_vq(struct vtpci_common *cn, struct virtqueue *vq)
126 {
127 	VIRTIO_PCI_SET_VQ(cn->vtpci_dev, vq);
128 }
129 
130 static void
131 vtpci_disable_vq(struct vtpci_common *cn, int idx)
132 {
133 	VIRTIO_PCI_DISABLE_VQ(cn->vtpci_dev, idx);
134 }
135 
136 static int
137 vtpci_register_cfg_msix(struct vtpci_common *cn, struct vtpci_interrupt *intr)
138 {
139 	return (VIRTIO_PCI_REGISTER_CFG_MSIX(cn->vtpci_dev, intr));
140 }
141 
142 static int
143 vtpci_register_vq_msix(struct vtpci_common *cn, int idx,
144     struct vtpci_interrupt *intr)
145 {
146 	return (VIRTIO_PCI_REGISTER_VQ_MSIX(cn->vtpci_dev, idx, intr));
147 }
148 
149 void
150 vtpci_init(struct vtpci_common *cn, device_t dev, bool modern)
151 {
152 
153 	cn->vtpci_dev = dev;
154 
155 	pci_enable_busmaster(dev);
156 
157 	if (modern)
158 		cn->vtpci_flags |= VTPCI_FLAG_MODERN;
159 	if (pci_find_cap(dev, PCIY_MSI, NULL) != 0)
160 		cn->vtpci_flags |= VTPCI_FLAG_NO_MSI;
161 	if (pci_find_cap(dev, PCIY_MSIX, NULL) != 0)
162 		cn->vtpci_flags |= VTPCI_FLAG_NO_MSIX;
163 
164 	vtpci_setup_sysctl(cn);
165 }
166 
167 int
168 vtpci_add_child(struct vtpci_common *cn)
169 {
170 	device_t dev, child;
171 
172 	dev = cn->vtpci_dev;
173 
174 	child = device_add_child(dev, NULL, -1);
175 	if (child == NULL) {
176 		device_printf(dev, "cannot create child device\n");
177 		return (ENOMEM);
178 	}
179 
180 	cn->vtpci_child_dev = child;
181 
182 	return (0);
183 }
184 
185 int
186 vtpci_delete_child(struct vtpci_common *cn)
187 {
188 	device_t dev, child;
189 	int error;
190 
191 	dev = cn->vtpci_dev;
192 
193 	child = cn->vtpci_child_dev;
194 	if (child != NULL) {
195 		error = device_delete_child(dev, child);
196 		if (error)
197 			return (error);
198 		cn->vtpci_child_dev = NULL;
199 	}
200 
201 	return (0);
202 }
203 
204 void
205 vtpci_child_detached(struct vtpci_common *cn)
206 {
207 
208 	vtpci_release_child_resources(cn);
209 
210 	cn->vtpci_child_feat_desc = NULL;
211 	cn->vtpci_host_features = 0;
212 	cn->vtpci_features = 0;
213 }
214 
215 int
216 vtpci_reinit(struct vtpci_common *cn)
217 {
218 	int idx, error;
219 
220 	for (idx = 0; idx < cn->vtpci_nvqs; idx++) {
221 		error = vtpci_reinit_virtqueue(cn, idx);
222 		if (error)
223 			return (error);
224 	}
225 
226 	if (vtpci_is_msix_enabled(cn)) {
227 		error = vtpci_set_host_msix_vectors(cn);
228 		if (error)
229 			return (error);
230 	}
231 
232 	return (0);
233 }
234 
235 static void
236 vtpci_describe_features(struct vtpci_common *cn, const char *msg,
237     uint64_t features)
238 {
239 	device_t dev, child;
240 
241 	dev = cn->vtpci_dev;
242 	child = cn->vtpci_child_dev;
243 
244 	if (device_is_attached(child) || bootverbose == 0)
245 		return;
246 
247 	virtio_describe(dev, msg, features, cn->vtpci_child_feat_desc);
248 }
249 
250 uint64_t
251 vtpci_negotiate_features(struct vtpci_common *cn,
252     uint64_t child_features, uint64_t host_features)
253 {
254 	uint64_t features;
255 
256 	cn->vtpci_host_features = host_features;
257 	vtpci_describe_features(cn, "host", host_features);
258 
259 	/*
260 	 * Limit negotiated features to what the driver, virtqueue, and
261 	 * host all support.
262 	 */
263 	features = host_features & child_features;
264 	features = virtio_filter_transport_features(features);
265 
266 	cn->vtpci_features = features;
267 	vtpci_describe_features(cn, "negotiated", features);
268 
269 	return (features);
270 }
271 
272 int
273 vtpci_with_feature(struct vtpci_common *cn, uint64_t feature)
274 {
275 	return ((cn->vtpci_features & feature) != 0);
276 }
277 
278 int
279 vtpci_read_ivar(struct vtpci_common *cn, int index, uintptr_t *result)
280 {
281 	device_t dev;
282 	int error;
283 
284 	dev = cn->vtpci_dev;
285 	error = 0;
286 
287 	switch (index) {
288 	case VIRTIO_IVAR_SUBDEVICE:
289 		*result = pci_get_subdevice(dev);
290 		break;
291 	case VIRTIO_IVAR_VENDOR:
292 		*result = pci_get_vendor(dev);
293 		break;
294 	case VIRTIO_IVAR_DEVICE:
295 		*result = pci_get_device(dev);
296 		break;
297 	case VIRTIO_IVAR_SUBVENDOR:
298 		*result = pci_get_subvendor(dev);
299 		break;
300 	case VIRTIO_IVAR_MODERN:
301 		*result = vtpci_is_modern(cn);
302 		break;
303 	default:
304 		error = ENOENT;
305 	}
306 
307 	return (error);
308 }
309 
310 int
311 vtpci_write_ivar(struct vtpci_common *cn, int index, uintptr_t value)
312 {
313 	int error;
314 
315 	error = 0;
316 
317 	switch (index) {
318 	case VIRTIO_IVAR_FEATURE_DESC:
319 		cn->vtpci_child_feat_desc = (void *) value;
320 		break;
321 	default:
322 		error = ENOENT;
323 	}
324 
325 	return (error);
326 }
327 
328 int
329 vtpci_alloc_virtqueues(struct vtpci_common *cn, int flags, int nvqs,
330     struct vq_alloc_info *vq_info)
331 {
332 	device_t dev;
333 	int idx, align, error;
334 
335 	dev = cn->vtpci_dev;
336 
337 	/*
338 	 * This is VIRTIO_PCI_VRING_ALIGN from legacy VirtIO. In modern VirtIO,
339 	 * the tables do not have to be allocated contiguously, but we do so
340 	 * anyways.
341 	 */
342 	align = 4096;
343 
344 	if (cn->vtpci_nvqs != 0)
345 		return (EALREADY);
346 	if (nvqs <= 0)
347 		return (EINVAL);
348 
349 	cn->vtpci_vqs = malloc(nvqs * sizeof(struct vtpci_virtqueue),
350 	    M_DEVBUF, M_NOWAIT | M_ZERO);
351 	if (cn->vtpci_vqs == NULL)
352 		return (ENOMEM);
353 
354 	for (idx = 0; idx < nvqs; idx++) {
355 		struct vtpci_virtqueue *vqx;
356 		struct vq_alloc_info *info;
357 		struct virtqueue *vq;
358 		bus_size_t notify_offset;
359 		uint16_t size;
360 
361 		vqx = &cn->vtpci_vqs[idx];
362 		info = &vq_info[idx];
363 
364 		size = vtpci_get_vq_size(cn, idx);
365 		notify_offset = vtpci_get_vq_notify_off(cn, idx);
366 
367 		error = virtqueue_alloc(dev, idx, size, notify_offset, align,
368 		    ~(vm_paddr_t)0, info, &vq);
369 		if (error) {
370 			device_printf(dev,
371 			    "cannot allocate virtqueue %d: %d\n", idx, error);
372 			break;
373 		}
374 
375 		vtpci_set_vq(cn, vq);
376 
377 		vqx->vtv_vq = *info->vqai_vq = vq;
378 		vqx->vtv_no_intr = info->vqai_intr == NULL;
379 
380 		cn->vtpci_nvqs++;
381 	}
382 
383 	if (error)
384 		vtpci_free_virtqueues(cn);
385 
386 	return (error);
387 }
388 
389 static int
390 vtpci_alloc_msix(struct vtpci_common *cn, int nvectors)
391 {
392 	device_t dev;
393 	int nmsix, cnt, required;
394 
395 	dev = cn->vtpci_dev;
396 
397 	/* Allocate an additional vector for the config changes. */
398 	required = nvectors + 1;
399 
400 	nmsix = pci_msix_count(dev);
401 	if (nmsix < required)
402 		return (1);
403 
404 	cnt = required;
405 	if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) {
406 		cn->vtpci_nmsix_resources = required;
407 		return (0);
408 	}
409 
410 	pci_release_msi(dev);
411 
412 	return (1);
413 }
414 
415 static int
416 vtpci_alloc_msi(struct vtpci_common *cn)
417 {
418 	device_t dev;
419 	int nmsi, cnt, required;
420 
421 	dev = cn->vtpci_dev;
422 	required = 1;
423 
424 	nmsi = pci_msi_count(dev);
425 	if (nmsi < required)
426 		return (1);
427 
428 	cnt = required;
429 	if (pci_alloc_msi(dev, &cnt) == 0 && cnt >= required)
430 		return (0);
431 
432 	pci_release_msi(dev);
433 
434 	return (1);
435 }
436 
437 static int
438 vtpci_alloc_intr_msix_pervq(struct vtpci_common *cn)
439 {
440 	int i, nvectors, error;
441 
442 	if (vtpci_disable_msix != 0 || cn->vtpci_flags & VTPCI_FLAG_NO_MSIX)
443 		return (ENOTSUP);
444 
445 	for (nvectors = 0, i = 0; i < cn->vtpci_nvqs; i++) {
446 		if (cn->vtpci_vqs[i].vtv_no_intr == 0)
447 			nvectors++;
448 	}
449 
450 	error = vtpci_alloc_msix(cn, nvectors);
451 	if (error)
452 		return (error);
453 
454 	cn->vtpci_flags |= VTPCI_FLAG_MSIX;
455 
456 	return (0);
457 }
458 
459 static int
460 vtpci_alloc_intr_msix_shared(struct vtpci_common *cn)
461 {
462 	int error;
463 
464 	if (vtpci_disable_msix != 0 || cn->vtpci_flags & VTPCI_FLAG_NO_MSIX)
465 		return (ENOTSUP);
466 
467 	error = vtpci_alloc_msix(cn, 1);
468 	if (error)
469 		return (error);
470 
471 	cn->vtpci_flags |= VTPCI_FLAG_MSIX | VTPCI_FLAG_SHARED_MSIX;
472 
473 	return (0);
474 }
475 
476 static int
477 vtpci_alloc_intr_msi(struct vtpci_common *cn)
478 {
479 	int error;
480 
481 	/* Only BHyVe supports MSI. */
482 	if (cn->vtpci_flags & VTPCI_FLAG_NO_MSI)
483 		return (ENOTSUP);
484 
485 	error = vtpci_alloc_msi(cn);
486 	if (error)
487 		return (error);
488 
489 	cn->vtpci_flags |= VTPCI_FLAG_MSI;
490 
491 	return (0);
492 }
493 
494 static int
495 vtpci_alloc_intr_intx(struct vtpci_common *cn)
496 {
497 
498 	cn->vtpci_flags |= VTPCI_FLAG_INTX;
499 
500 	return (0);
501 }
502 
503 static int
504 vtpci_alloc_interrupt(struct vtpci_common *cn, int rid, int flags,
505     struct vtpci_interrupt *intr)
506 {
507 	struct resource *irq;
508 
509 	irq = bus_alloc_resource_any(cn->vtpci_dev, SYS_RES_IRQ, &rid, flags);
510 	if (irq == NULL)
511 		return (ENXIO);
512 
513 	intr->vti_irq = irq;
514 	intr->vti_rid = rid;
515 
516 	return (0);
517 }
518 
519 static void
520 vtpci_free_interrupt(struct vtpci_common *cn, struct vtpci_interrupt *intr)
521 {
522 	device_t dev;
523 
524 	dev = cn->vtpci_dev;
525 
526 	if (intr->vti_handler != NULL) {
527 		bus_teardown_intr(dev, intr->vti_irq, intr->vti_handler);
528 		intr->vti_handler = NULL;
529 	}
530 
531 	if (intr->vti_irq != NULL) {
532 		bus_release_resource(dev, SYS_RES_IRQ, intr->vti_rid,
533 		    intr->vti_irq);
534 		intr->vti_irq = NULL;
535 		intr->vti_rid = -1;
536 	}
537 }
538 
539 static void
540 vtpci_free_interrupts(struct vtpci_common *cn)
541 {
542 	struct vtpci_interrupt *intr;
543 	int i, nvq_intrs;
544 
545 	vtpci_free_interrupt(cn, &cn->vtpci_device_interrupt);
546 
547 	if (cn->vtpci_nmsix_resources != 0) {
548 		nvq_intrs = cn->vtpci_nmsix_resources - 1;
549 		cn->vtpci_nmsix_resources = 0;
550 
551 		if ((intr = cn->vtpci_msix_vq_interrupts) != NULL) {
552 			for (i = 0; i < nvq_intrs; i++, intr++)
553 				vtpci_free_interrupt(cn, intr);
554 
555 			free(cn->vtpci_msix_vq_interrupts, M_DEVBUF);
556 			cn->vtpci_msix_vq_interrupts = NULL;
557 		}
558 	}
559 
560 	if (cn->vtpci_flags & (VTPCI_FLAG_MSI | VTPCI_FLAG_MSIX))
561 		pci_release_msi(cn->vtpci_dev);
562 
563 	cn->vtpci_flags &= ~VTPCI_FLAG_ITYPE_MASK;
564 }
565 
566 static void
567 vtpci_free_virtqueues(struct vtpci_common *cn)
568 {
569 	struct vtpci_virtqueue *vqx;
570 	int idx;
571 
572 	for (idx = 0; idx < cn->vtpci_nvqs; idx++) {
573 		vtpci_disable_vq(cn, idx);
574 
575 		vqx = &cn->vtpci_vqs[idx];
576 		virtqueue_free(vqx->vtv_vq);
577 		vqx->vtv_vq = NULL;
578 	}
579 
580 	free(cn->vtpci_vqs, M_DEVBUF);
581 	cn->vtpci_vqs = NULL;
582 	cn->vtpci_nvqs = 0;
583 }
584 
585 void
586 vtpci_release_child_resources(struct vtpci_common *cn)
587 {
588 
589 	vtpci_free_interrupts(cn);
590 	vtpci_free_virtqueues(cn);
591 }
592 
593 static void
594 vtpci_cleanup_setup_intr_attempt(struct vtpci_common *cn)
595 {
596 	int idx;
597 
598 	if (cn->vtpci_flags & VTPCI_FLAG_MSIX) {
599 		vtpci_register_cfg_msix(cn, NULL);
600 
601 		for (idx = 0; idx < cn->vtpci_nvqs; idx++)
602 			vtpci_register_vq_msix(cn, idx, NULL);
603 	}
604 
605 	vtpci_free_interrupts(cn);
606 }
607 
608 static int
609 vtpci_alloc_intr_resources(struct vtpci_common *cn)
610 {
611 	struct vtpci_interrupt *intr;
612 	int i, rid, flags, nvq_intrs, error;
613 
614 	flags = RF_ACTIVE;
615 
616 	if (cn->vtpci_flags & VTPCI_FLAG_INTX) {
617 		rid = 0;
618 		flags |= RF_SHAREABLE;
619 	} else
620 		rid = 1;
621 
622 	/*
623 	 * When using INTX or MSI interrupts, this resource handles all
624 	 * interrupts. When using MSIX, this resource handles just the
625 	 * configuration changed interrupt.
626 	 */
627 	intr = &cn->vtpci_device_interrupt;
628 
629 	error = vtpci_alloc_interrupt(cn, rid, flags, intr);
630 	if (error || cn->vtpci_flags & (VTPCI_FLAG_INTX | VTPCI_FLAG_MSI))
631 		return (error);
632 
633 	/*
634 	 * Now allocate the interrupts for the virtqueues. This may be one
635 	 * for all the virtqueues, or one for each virtqueue. Subtract one
636 	 * below for because of the configuration changed interrupt.
637 	 */
638 	nvq_intrs = cn->vtpci_nmsix_resources - 1;
639 
640 	cn->vtpci_msix_vq_interrupts = malloc(nvq_intrs *
641 	    sizeof(struct vtpci_interrupt), M_DEVBUF, M_NOWAIT | M_ZERO);
642 	if (cn->vtpci_msix_vq_interrupts == NULL)
643 		return (ENOMEM);
644 
645 	intr = cn->vtpci_msix_vq_interrupts;
646 
647 	for (i = 0, rid++; i < nvq_intrs; i++, rid++, intr++) {
648 		error = vtpci_alloc_interrupt(cn, rid, flags, intr);
649 		if (error)
650 			return (error);
651 	}
652 
653 	return (0);
654 }
655 
656 static int
657 vtpci_setup_intx_interrupt(struct vtpci_common *cn, enum intr_type type)
658 {
659 	struct vtpci_interrupt *intr;
660 	int error;
661 
662 	intr = &cn->vtpci_device_interrupt;
663 
664 	error = bus_setup_intr(cn->vtpci_dev, intr->vti_irq, type, NULL,
665 	    vtpci_intx_intr, cn, &intr->vti_handler);
666 
667 	return (error);
668 }
669 
670 static int
671 vtpci_setup_pervq_msix_interrupts(struct vtpci_common *cn, enum intr_type type)
672 {
673 	struct vtpci_virtqueue *vqx;
674 	struct vtpci_interrupt *intr;
675 	int i, error;
676 
677 	intr = cn->vtpci_msix_vq_interrupts;
678 
679 	for (i = 0; i < cn->vtpci_nvqs; i++) {
680 		vqx = &cn->vtpci_vqs[i];
681 
682 		if (vqx->vtv_no_intr)
683 			continue;
684 
685 		error = bus_setup_intr(cn->vtpci_dev, intr->vti_irq, type,
686 		    vtpci_vq_intr_filter, vtpci_vq_intr, vqx->vtv_vq,
687 		    &intr->vti_handler);
688 		if (error)
689 			return (error);
690 
691 		intr++;
692 	}
693 
694 	return (0);
695 }
696 
697 static int
698 vtpci_set_host_msix_vectors(struct vtpci_common *cn)
699 {
700 	struct vtpci_interrupt *intr, *tintr;
701 	int idx, error;
702 
703 	intr = &cn->vtpci_device_interrupt;
704 	error = vtpci_register_cfg_msix(cn, intr);
705 	if (error)
706 		return (error);
707 
708 	intr = cn->vtpci_msix_vq_interrupts;
709 	for (idx = 0; idx < cn->vtpci_nvqs; idx++) {
710 		if (cn->vtpci_vqs[idx].vtv_no_intr)
711 			tintr = NULL;
712 		else
713 			tintr = intr;
714 
715 		error = vtpci_register_vq_msix(cn, idx, tintr);
716 		if (error)
717 			break;
718 
719 		/*
720 		 * For shared MSIX, all the virtqueues share the first
721 		 * interrupt.
722 		 */
723 		if (!cn->vtpci_vqs[idx].vtv_no_intr &&
724 		    (cn->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) == 0)
725 			intr++;
726 	}
727 
728 	return (error);
729 }
730 
731 static int
732 vtpci_setup_msix_interrupts(struct vtpci_common *cn, enum intr_type type)
733 {
734 	struct vtpci_interrupt *intr;
735 	int error;
736 
737 	intr = &cn->vtpci_device_interrupt;
738 
739 	error = bus_setup_intr(cn->vtpci_dev, intr->vti_irq, type, NULL,
740 	    vtpci_config_intr, cn, &intr->vti_handler);
741 	if (error)
742 		return (error);
743 
744 	if (cn->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) {
745 		intr = &cn->vtpci_msix_vq_interrupts[0];
746 
747 		error = bus_setup_intr(cn->vtpci_dev, intr->vti_irq, type,
748 		    vtpci_vq_shared_intr_filter, vtpci_vq_shared_intr, cn,
749 		    &intr->vti_handler);
750 	} else
751 		error = vtpci_setup_pervq_msix_interrupts(cn, type);
752 
753 	return (error ? error : vtpci_set_host_msix_vectors(cn));
754 }
755 
756 static int
757 vtpci_setup_intrs(struct vtpci_common *cn, enum intr_type type)
758 {
759 	int error;
760 
761 	type |= INTR_MPSAFE;
762 	KASSERT(cn->vtpci_flags & VTPCI_FLAG_ITYPE_MASK,
763 	    ("%s: no interrupt type selected %#x", __func__, cn->vtpci_flags));
764 
765 	error = vtpci_alloc_intr_resources(cn);
766 	if (error)
767 		return (error);
768 
769 	if (cn->vtpci_flags & VTPCI_FLAG_INTX)
770 		error = vtpci_setup_intx_interrupt(cn, type);
771 	else if (cn->vtpci_flags & VTPCI_FLAG_MSI)
772 		error = vtpci_setup_msi_interrupt(cn, type);
773 	else
774 		error = vtpci_setup_msix_interrupts(cn, type);
775 
776 	return (error);
777 }
778 
779 int
780 vtpci_setup_interrupts(struct vtpci_common *cn, enum intr_type type)
781 {
782 	device_t dev;
783 	int attempt, error;
784 
785 	dev = cn->vtpci_dev;
786 
787 	for (attempt = 0; attempt < 5; attempt++) {
788 		/*
789 		 * Start with the most desirable interrupt configuration and
790 		 * fallback towards less desirable ones.
791 		 */
792 		switch (attempt) {
793 		case 0:
794 			error = vtpci_alloc_intr_msix_pervq(cn);
795 			break;
796 		case 1:
797 			error = vtpci_alloc_intr_msix_shared(cn);
798 			break;
799 		case 2:
800 			error = vtpci_alloc_intr_msi(cn);
801 			break;
802 		case 3:
803 			error = vtpci_alloc_intr_intx(cn);
804 			break;
805 		default:
806 			device_printf(dev,
807 			    "exhausted all interrupt allocation attempts\n");
808 			return (ENXIO);
809 		}
810 
811 		if (error == 0 && vtpci_setup_intrs(cn, type) == 0)
812 			break;
813 
814 		vtpci_cleanup_setup_intr_attempt(cn);
815 	}
816 
817 	if (bootverbose) {
818 		if (cn->vtpci_flags & VTPCI_FLAG_INTX)
819 			device_printf(dev, "using legacy interrupt\n");
820 		else if (cn->vtpci_flags & VTPCI_FLAG_MSI)
821 			device_printf(dev, "using MSI interrupt\n");
822 		else if (cn->vtpci_flags & VTPCI_FLAG_SHARED_MSIX)
823 			device_printf(dev, "using shared MSIX interrupts\n");
824 		else
825 			device_printf(dev, "using per VQ MSIX interrupts\n");
826 	}
827 
828 	return (0);
829 }
830 
831 static int
832 vtpci_reinit_virtqueue(struct vtpci_common *cn, int idx)
833 {
834 	struct vtpci_virtqueue *vqx;
835 	struct virtqueue *vq;
836 	int error;
837 
838 	vqx = &cn->vtpci_vqs[idx];
839 	vq = vqx->vtv_vq;
840 
841 	KASSERT(vq != NULL, ("%s: vq %d not allocated", __func__, idx));
842 
843 	error = virtqueue_reinit(vq, vtpci_get_vq_size(cn, idx));
844 	if (error == 0)
845 		vtpci_set_vq(cn, vq);
846 
847 	return (error);
848 }
849 
850 static void
851 vtpci_intx_intr(void *xcn)
852 {
853 	struct vtpci_common *cn;
854 	struct vtpci_virtqueue *vqx;
855 	int i;
856 	uint8_t isr;
857 
858 	cn = xcn;
859 	isr = vtpci_read_isr(cn);
860 
861 	if (isr & VIRTIO_PCI_ISR_CONFIG)
862 		vtpci_config_intr(cn);
863 
864 	if (isr & VIRTIO_PCI_ISR_INTR) {
865 		vqx = &cn->vtpci_vqs[0];
866 		for (i = 0; i < cn->vtpci_nvqs; i++, vqx++) {
867 			if (vqx->vtv_no_intr == 0)
868 				virtqueue_intr(vqx->vtv_vq);
869 		}
870 	}
871 }
872 
873 static int
874 vtpci_vq_shared_intr_filter(void *xcn)
875 {
876 	struct vtpci_common *cn;
877 	struct vtpci_virtqueue *vqx;
878 	int i, rc;
879 
880 	cn = xcn;
881 	vqx = &cn->vtpci_vqs[0];
882 	rc = 0;
883 
884 	for (i = 0; i < cn->vtpci_nvqs; i++, vqx++) {
885 		if (vqx->vtv_no_intr == 0)
886 			rc |= virtqueue_intr_filter(vqx->vtv_vq);
887 	}
888 
889 	return (rc ? FILTER_SCHEDULE_THREAD : FILTER_STRAY);
890 }
891 
892 static void
893 vtpci_vq_shared_intr(void *xcn)
894 {
895 	struct vtpci_common *cn;
896 	struct vtpci_virtqueue *vqx;
897 	int i;
898 
899 	cn = xcn;
900 	vqx = &cn->vtpci_vqs[0];
901 
902 	for (i = 0; i < cn->vtpci_nvqs; i++, vqx++) {
903 		if (vqx->vtv_no_intr == 0)
904 			virtqueue_intr(vqx->vtv_vq);
905 	}
906 }
907 
908 static int
909 vtpci_vq_intr_filter(void *xvq)
910 {
911 	struct virtqueue *vq;
912 	int rc;
913 
914 	vq = xvq;
915 	rc = virtqueue_intr_filter(vq);
916 
917 	return (rc ? FILTER_SCHEDULE_THREAD : FILTER_STRAY);
918 }
919 
920 static void
921 vtpci_vq_intr(void *xvq)
922 {
923 	struct virtqueue *vq;
924 
925 	vq = xvq;
926 	virtqueue_intr(vq);
927 }
928 
929 static void
930 vtpci_config_intr(void *xcn)
931 {
932 	struct vtpci_common *cn;
933 	device_t child;
934 
935 	cn = xcn;
936 	child = cn->vtpci_child_dev;
937 
938 	if (child != NULL)
939 		VIRTIO_CONFIG_CHANGE(child);
940 }
941 
942 static int
943 vtpci_feature_sysctl(struct sysctl_req *req, struct vtpci_common *cn,
944     uint64_t features)
945 {
946 	struct sbuf *sb;
947 	int error;
948 
949 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
950 	if (sb == NULL)
951 		return (ENOMEM);
952 
953 	error = virtio_describe_sbuf(sb, features, cn->vtpci_child_feat_desc);
954 	sbuf_delete(sb);
955 
956 	return (error);
957 }
958 
959 static int
960 vtpci_host_features_sysctl(SYSCTL_HANDLER_ARGS)
961 {
962 	struct vtpci_common *cn;
963 
964 	cn = arg1;
965 
966 	return (vtpci_feature_sysctl(req, cn, cn->vtpci_host_features));
967 }
968 
969 static int
970 vtpci_negotiated_features_sysctl(SYSCTL_HANDLER_ARGS)
971 {
972 	struct vtpci_common *cn;
973 
974 	cn = arg1;
975 
976 	return (vtpci_feature_sysctl(req, cn, cn->vtpci_features));
977 }
978 
979 static void
980 vtpci_setup_sysctl(struct vtpci_common *cn)
981 {
982 	device_t dev;
983 	struct sysctl_ctx_list *ctx;
984 	struct sysctl_oid *tree;
985 	struct sysctl_oid_list *child;
986 
987 	dev = cn->vtpci_dev;
988 	ctx = device_get_sysctl_ctx(dev);
989 	tree = device_get_sysctl_tree(dev);
990 	child = SYSCTL_CHILDREN(tree);
991 
992 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "nvqs",
993 	    CTLFLAG_RD, &cn->vtpci_nvqs, 0, "Number of virtqueues");
994 
995 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "host_features",
996 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, cn, 0,
997 	    vtpci_host_features_sysctl, "A", "Features supported by the host");
998 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "negotiated_features",
999 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, cn, 0,
1000 	    vtpci_negotiated_features_sysctl, "A", "Features negotiated");
1001 }
1002