xref: /illumos-gate/usr/src/cmd/bhyve/common/virtio.c (revision 4e021076192ab3215cb81f7344c5c64e2fb4149c)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013  Chris Torek <torek @ torek net>
5  * All rights reserved.
6  * Copyright (c) 2019 Joyent, Inc.
7  * Copyright (c) 2021 The FreeBSD Foundation
8  *
9  * Portions of this software were developed by Ka Ho Ng
10  * under sponsorship of the FreeBSD Foundation.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 /*
34  * This file and its contents are supplied under the terms of the
35  * Common Development and Distribution License ("CDDL"), version 1.0.
36  * You may only use this file in accordance with the terms of version
37  * 1.0 of the CDDL.
38  *
39  * A full copy of the text of the CDDL should have accompanied this
40  * source.  A copy of the CDDL is also available via the Internet at
41  * http://www.illumos.org/license/CDDL.
42  */
43 /* This file is dual-licensed; see usr/src/contrib/bhyve/LICENSE */
44 
45 /*
46  * Copyright 2026 Oxide Computer Company
47  */
48 
49 #include <sys/stdbool.h>
50 #include <sys/param.h>
51 #include <sys/uio.h>
52 #include <sys/sysmacros.h>
53 #include <sys/debug.h>
54 
55 #include <machine/atomic.h>
56 
57 #include <inttypes.h>
58 #include <stddef.h>
59 #include <stdio.h>
60 #include <stdint.h>
61 #include <stdarg.h>
62 #include <string.h>
63 #include <pthread.h>
64 #include <pthread_np.h>
65 
66 #include "bhyverun.h"
67 #include "config.h"
68 #include "debug.h"
69 #include "pci_emul.h"
70 #include "virtio.h"
71 
72 /*
73  * Functions for dealing with generalized "virtual devices" as
74  * defined by <https://www.google.com/#output=search&q=virtio+spec>
75  *
76  * The reference for the implementation of virtio modern is on
77  * <https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/>
78  */
79 
80 #define DPRINTF(vs, fmt, arg...) \
81 	do { \
82 		if ((((vs)->vs_flags) & VIRTIO_DEBUG) != 0) { \
83 			FPRINTLN(stdout, fmt, ##arg); \
84 			fflush(stdout); \
85 		} \
86 	} while (0)
87 
88 #define VQ_NOTIFY_OFF_MULTIPLIER sizeof (uint32_t)
89 
90 /*
91  * In case we decide to relax the "virtio softc comes at the
92  * front of virtio-based device softc" constraint, let's use
93  * this to convert.
94  */
95 #define	DEV_SOFTC(vs) ((void *)(vs))
96 
97 #define VI_MASK(nbytes) \
98         (((nbytes) >= 4) ? 0xFFFFFFFFu : (~0u >> (32 - 8 * (nbytes))))
99 
100 static uint64_t vi_modern_pci_read(struct virtio_softc *, int, uint64_t, int);
101 static void vi_modern_pci_write(struct virtio_softc *, int, uint64_t, int,
102     uint64_t);
103 
104 void
vi_queue_linkup(struct virtio_softc * vs,struct vqueue_info * queues)105 vi_queue_linkup(struct virtio_softc *vs, struct vqueue_info *queues)
106 {
107 	struct virtio_consts *vc = vs->vs_vc;
108 
109 	vs->vs_queues = queues;
110 
111 	for (int i = 0; i < vc->vc_nvq; i++) {
112 		vs->vs_queues[i].vq_vs = vs;
113 		vs->vs_queues[i].vq_num = i;
114 	}
115 }
116 
117 /*
118  * Link a virtio_softc to its constants, the device softc, and
119  * the PCI emulation.
120  */
121 void
vi_softc_linkup(struct virtio_softc * vs,struct virtio_consts * vc,void * dev_softc,struct pci_devinst * pi,struct vqueue_info * queues)122 vi_softc_linkup(struct virtio_softc *vs, struct virtio_consts *vc,
123     void *dev_softc, struct pci_devinst *pi, struct vqueue_info *queues)
124 {
125 	/* vs and dev_softc addresses must match */
126 	assert((void *)vs == dev_softc);
127 	vs->vs_vc = vc;
128 	vs->vs_pi = pi;
129 	pi->pi_arg = vs;
130 
131 	vi_queue_linkup(vs, queues);
132 }
133 
134 /*
135  * Reset device (device-wide).  This erases all queues, i.e.,
136  * all the queues become invalid (though we don't wipe out the
137  * internal pointers, we just clear the VQ_ALLOC flag).
138  *
139  * It resets negotiated features to "none".
140  *
141  * If MSI-X is enabled, this also resets all the vectors to NO_VECTOR.
142  */
143 void
vi_reset_dev(struct virtio_softc * vs)144 vi_reset_dev(struct virtio_softc *vs)
145 {
146 	struct vqueue_info *vq;
147 	int i, nvq;
148 
149 	if (vs->vs_mtx)
150 		assert(pthread_mutex_isowned_np(vs->vs_mtx));
151 
152 	nvq = vs->vs_vc->vc_nvq;
153 	for (vq = vs->vs_queues, i = 0; i < nvq; vq++, i++) {
154 		vq->vq_flags = 0;
155 		vq->vq_last_avail = 0;
156 		vq->vq_next_used = 0;
157 		vq->vq_save_used = 0;
158 		vq->vq_pfn = 0;
159 		vq->vq_desc_gpa = vq->vq_avail_gpa = vq->vq_used_gpa = 0;
160 		vq->vq_msix_idx = VIRTIO_MSI_NO_VECTOR;
161 	}
162 	vs->vs_negotiated_caps = 0;
163 	vs->vs_curq = 0;
164 	if (vs->vs_isr != 0)
165 		pci_lintr_deassert(vs->vs_pi);
166 	vs->vs_isr = 0;
167 	vs->vs_msix_cfg_idx = VIRTIO_MSI_NO_VECTOR;
168 }
169 
170 /*
171  * These are the capability bits common to all virtio devices.
172  */
173 static const virtio_capstr_t virtio_caps[] = {
174 	{ VIRTIO_F_NOTIFY_ON_EMPTY,	"VIRTIO_F_NOTIFY_ON_EMPTY" },
175 	{ VIRTIO_F_ANY_LAYOUT,		"VIRTIO_F_ANY_LAYOUT" },
176 	{ VIRTIO_RING_F_INDIRECT_DESC,	"VIRTIO_RING_F_INDIRECT_DESC" },
177 	{ VIRTIO_RING_F_EVENT_IDX,	"VIRTIO_RING_F_EVENT_IDX" },
178 	{ VIRTIO_F_BAD_FEATURE,		"VIRTIO_F_BAD_FEATURE" },
179 	{ VIRTIO_F_VERSION_1,		"VIRTIO_F_VERSION_1" },
180 };
181 
182 static void
vi_print_caps(struct virtio_softc * vs,uint64_t caps)183 vi_print_caps(struct virtio_softc *vs, uint64_t caps)
184 {
185 	struct virtio_consts *vc = vs->vs_vc;
186 
187 	if ((vs->vs_flags & VIRTIO_DEBUG) == 0)
188 		return;
189 
190 	for (size_t i = 0; i < vc->vc_ncapstr; i++) {
191 		if ((caps & vc->vc_capstr[i].vp_flag) != 0)
192 			FPRINTLN(stdout, "    -> %s", vc->vc_capstr[i].vp_name);
193 	}
194 	for (size_t i = 0; i < ARRAY_SIZE(virtio_caps); i++) {
195 		if ((caps & virtio_caps[i].vp_flag) != 0)
196 			FPRINTLN(stdout, "    -> %s", virtio_caps[i].vp_name);
197 	}
198 	fflush(stdout);
199 }
200 
201 void
vi_set_debug(struct virtio_softc * vs,bool debug)202 vi_set_debug(struct virtio_softc *vs, bool debug)
203 {
204 	if (debug)
205 		vs->vs_flags |= VIRTIO_DEBUG;
206 	else
207 		vs->vs_flags &= ~VIRTIO_DEBUG;
208 }
209 
210 bool
vi_is_modern(struct virtio_softc * vs)211 vi_is_modern(struct virtio_softc *vs)
212 {
213 	return (vs->vs_negotiated_caps & VIRTIO_F_VERSION_1) != 0;
214 }
215 
216 void __PRINTFLIKE(2)
vi_error(struct virtio_softc * vs,const char * fmt,...)217 vi_error(struct virtio_softc *vs, const char *fmt, ...)
218 {
219 	va_list ap;
220 
221 	va_start(ap, fmt);
222 	vfprintf(stderr, fmt, ap);
223 	fprintf(stderr, "%s", raw_stdio ? "\r\n" : "\n");
224 	va_end(ap);
225 
226 	if (vi_is_modern(vs)) {
227 		vs->vs_status |= VTCFG_STATUS_NEEDS_RST;
228 		vq_devcfg_changed(vs);
229 	}
230 
231 	vs->vs_flags |= VIRTIO_BROKEN;
232 }
233 
234 /*
235  * Set I/O BAR (usually 0) to map legacy PCI config registers.
236  */
237 static bool
vi_legacy_iobar_setup(struct virtio_softc * vs,int barnum)238 vi_legacy_iobar_setup(struct virtio_softc *vs, int barnum)
239 {
240 	size_t size;
241 
242 	/*
243 	 * We set the size to that which will accommodate the configuration
244 	 * space with MSI-X enabled, plus the configuration size.
245 	 */
246 	size = VIRTIO_PCI_CONFIG_OFF(1) + vs->vs_vc->vc_cfgsize;
247 	if (pci_emul_alloc_bar(vs->vs_pi, barnum, PCIBAR_IO, size) != 0)
248 		return (false);
249 
250 	return (true);
251 }
252 
253 virtio_pci_capcfg_t *
vi_pci_cfg_bytype(struct virtio_softc * vs,uint8_t cfgtype)254 vi_pci_cfg_bytype(struct virtio_softc *vs, uint8_t cfgtype)
255 {
256 	for (uint_t i = 0; i < vs->vs_ncaps; i++) {
257 		if (vs->vs_caps[i].c_captype == cfgtype)
258 			return (&vs->vs_caps[i]);
259 	}
260 	return (NULL);
261 }
262 
263 virtio_pci_capcfg_t *
vi_pci_cfg_bycapaddr(struct virtio_softc * vs,uint32_t start,uint32_t size)264 vi_pci_cfg_bycapaddr(struct virtio_softc *vs, uint32_t start, uint32_t size)
265 {
266 	if (size == 0 || start > UINT32_MAX - size)
267 		return (NULL);
268 
269 	const uint32_t end = start + size;
270 
271 	for (uint_t i = 0; i < vs->vs_ncaps; i++) {
272 		virtio_pci_capcfg_t *cfg = &vs->vs_caps[i];
273 		const uint32_t cap_start = cfg->c_capoff;
274 		const uint32_t cap_end = cap_start + cfg->c_caplen;
275 
276 		if (cap_start <= start && end <= cap_end)
277 			return (cfg);
278 	}
279 
280 	return (NULL);
281 }
282 
283 virtio_pci_capcfg_t *
vi_pci_cfg_bybaraddr(struct virtio_softc * vs,uint8_t bar,uint64_t offset,uint32_t size)284 vi_pci_cfg_bybaraddr(struct virtio_softc *vs, uint8_t bar, uint64_t offset,
285     uint32_t size)
286 {
287 	/*
288 	 * We currently don't use the larger capabilities introduced in VirtIO
289 	 * 1.2 that allow for 64-bit offsets and sizes.
290 	 */
291 	if (size == 0 || offset > UINT32_MAX - size)
292 		return (NULL);
293 
294 	const uint32_t end = offset + size;
295 
296 	for (uint_t i = 0; i < vs->vs_ncaps; i++) {
297 		virtio_pci_capcfg_t *cfg = &vs->vs_caps[i];
298 
299 		if (cfg->c_baridx != bar)
300 			continue;
301 
302 		const uint32_t bar_start = cfg->c_baroff;
303 		const uint32_t bar_end = bar_start + cfg->c_barlen;
304 
305 		if (bar_start <= offset && end <= bar_end)
306 			return (cfg);
307 	}
308 
309 	return (NULL);
310 }
311 
312 /*
313  * Add a modern configuration structure capability.
314  */
315 static bool
vi_modern_add_cfg(struct virtio_softc * vs,struct virtio_pci_cap * cap,int barnum,uint32_t baroff,uint32_t barlen,uint8_t caplen,uint8_t cfgtype)316 vi_modern_add_cfg(struct virtio_softc *vs, struct virtio_pci_cap *cap,
317     int barnum, uint32_t baroff, uint32_t barlen, uint8_t caplen,
318     uint8_t cfgtype)
319 {
320 	int capoff;
321 
322 	cap->cap_vndr = PCIY_VENDOR;
323 	cap->cap_len = caplen;
324 	cap->cfg_type = cfgtype;
325 	cap->bar = barnum;
326 	cap->id = 0;
327 	cap->offset = baroff;
328 	cap->length = barlen;
329 	if (pci_emul_add_capability(vs->vs_pi, (u_char *)cap, caplen,
330 	    &capoff) != 0) {
331 		return (false);
332 	}
333 
334 	vs->vs_caps[vs->vs_ncaps].c_captype = cfgtype;
335 	vs->vs_caps[vs->vs_ncaps].c_baridx = cap->bar;
336 	vs->vs_caps[vs->vs_ncaps].c_baroff = cap->offset;
337 	vs->vs_caps[vs->vs_ncaps].c_barlen = cap->length;
338 	vs->vs_caps[vs->vs_ncaps].c_capoff = capoff;
339 	vs->vs_caps[vs->vs_ncaps].c_caplen = caplen;
340 	vs->vs_ncaps++;
341 	VERIFY3U(vs->vs_ncaps, <=, sizeof (vs->vs_caps));
342 
343 	return (true);
344 }
345 
346 /*
347  * Add COMMON_CFG configuration structure capability.
348  */
349 static bool
vi_modern_add_common_cfg(struct virtio_softc * vs,int barnum,uint32_t * offp)350 vi_modern_add_common_cfg(struct virtio_softc *vs, int barnum, uint32_t *offp)
351 {
352 	struct virtio_pci_cap cap;
353 	uint32_t bardatalen;
354 
355 	*offp = roundup2(*offp, VIRTIO_PCI_CAP_COMMON_CFG_ALIGN);
356 	/*
357 	 * We choose to round this BAR area up to a page size in common with
358 	 * other hypervisors.
359 	 */
360 	bardatalen = roundup2(sizeof (struct virtio_pci_common_cfg), PAGE_SIZE);
361 
362 	memset(&cap, 0, sizeof (cap));
363 	if (vi_modern_add_cfg(vs, &cap, barnum, *offp, bardatalen, sizeof (cap),
364 	    VIRTIO_PCI_CAP_COMMON_CFG)) {
365 		*offp += bardatalen;
366 		return (true);
367 	}
368 	return (false);
369 }
370 
371 /*
372  * Add NOTIFY_CFG configuration structure capability.
373  */
374 static bool
vi_modern_add_notify_cfg(struct virtio_softc * vs,int barnum,uint32_t * offp)375 vi_modern_add_notify_cfg(struct virtio_softc *vs, int barnum, uint32_t *offp)
376 {
377 	struct virtio_pci_notify_cap cap;
378 	struct virtio_consts *vc = vs->vs_vc;
379 	int numq = MAX(vc->vc_max_nvq, vc->vc_nvq);
380 	uint32_t bardatalen;
381 
382 	VERIFY3S(numq, >, 0);
383 	VERIFY3S(numq, <=, UINT16_MAX);
384 
385 	*offp = roundup2(*offp, VIRTIO_PCI_CAP_NOTIFY_CFG_ALIGN);
386 	/*
387 	 * We choose to round this BAR area up to a page size in common with
388 	 * other hypervisors.
389 	 */
390 	uint64_t datalen = (uint64_t)numq * VQ_NOTIFY_OFF_MULTIPLIER;
391 	VERIFY3U(datalen, <=, UINT32_MAX - (PAGE_SIZE - 1));
392 	bardatalen = roundup2((uint32_t)datalen, PAGE_SIZE);
393 
394 	memset(&cap, 0, sizeof (cap));
395 	cap.notify_off_multiplier = VQ_NOTIFY_OFF_MULTIPLIER;
396 	if (vi_modern_add_cfg(vs, &cap.cap, barnum, *offp, bardatalen,
397 	    sizeof (cap), VIRTIO_PCI_CAP_NOTIFY_CFG)) {
398 		*offp += bardatalen;
399 		return (true);
400 	}
401 	return (false);
402 }
403 
404 /*
405  * Add ISR_CFG configuration structure capability.
406  */
407 static bool
vi_modern_add_isr_cfg(struct virtio_softc * vs,int barnum,uint32_t * offp)408 vi_modern_add_isr_cfg(struct virtio_softc *vs, int barnum, uint32_t *offp)
409 {
410 	struct virtio_pci_cap cap;
411 	uint32_t bardatalen;
412 
413 	*offp = roundup2(*offp, VIRTIO_PCI_CAP_ISR_CFG_ALIGN);
414 	/*
415 	 * While this capability could point to a single byte in the BAR, we
416 	 * choose to round up to a page in common with other hypervisors.
417 	 */
418 	bardatalen = PAGE_SIZE;
419 
420 	memset(&cap, 0, sizeof (cap));
421 	if (vi_modern_add_cfg(vs, &cap, barnum, *offp, bardatalen, sizeof (cap),
422 	    VIRTIO_PCI_CAP_ISR_CFG)) {
423 		*offp += bardatalen;
424 		return (true);
425 	}
426 	return (false);
427 }
428 
429 /*
430  * Add DEV_CFG configuration structure capability.
431  */
432 static bool
vi_modern_add_dev_cfg(struct virtio_softc * vs,int barnum,uint32_t * offp)433 vi_modern_add_dev_cfg(struct virtio_softc *vs, int barnum, uint32_t *offp)
434 {
435 	struct virtio_pci_cap cap;
436 	uint32_t bardatalen;
437 
438 	*offp = roundup2(*offp, VIRTIO_PCI_CAP_DEVICE_CFG_ALIGN);
439 	/*
440 	 * We choose to round this BAR area up to a page size in common with
441 	 * other hypervisors.
442 	 */
443 	bardatalen = PAGE_SIZE;
444 
445 	memset(&cap, 0, sizeof (cap));
446 	if (vi_modern_add_cfg(vs, &cap, barnum, *offp, bardatalen, sizeof (cap),
447 	    VIRTIO_PCI_CAP_DEVICE_CFG)) {
448 		*offp += bardatalen;
449 		return (true);
450 	}
451 	return (false);
452 }
453 
454 /*
455  * Add PCI_CFG configuration structure capability.
456  */
457 static bool
vi_modern_add_pci_cfg(struct virtio_softc * vs)458 vi_modern_add_pci_cfg(struct virtio_softc *vs)
459 {
460 	struct virtio_pci_cfg_cap cap;
461 
462 	memset(&cap, 0, sizeof (cap));
463 	memset(cap.pci_cfg_data, 0xff, sizeof (cap.pci_cfg_data));
464 	if (vi_modern_add_cfg(vs, &cap.cap, 0, 0, 0, sizeof (cap),
465 	    VIRTIO_PCI_CAP_PCI_CFG)) {
466 		vs->vs_pcicap = &vs->vs_caps[vs->vs_ncaps - 1];
467 		return (true);
468 	}
469 	return (false);
470 }
471 
472 /*
473  * Set up Virtio modern device pci configuration space
474  */
475 static bool
vi_modern_membar_setup(struct virtio_softc * vs,int barnum)476 vi_modern_membar_setup(struct virtio_softc *vs, int barnum)
477 {
478 	uint32_t baroff = 0;
479 	bool ret = false;
480 
481 	ret |= vi_modern_add_common_cfg(vs, barnum, &baroff);
482 	ret |= vi_modern_add_notify_cfg(vs, barnum, &baroff);
483 	ret |= vi_modern_add_dev_cfg(vs, barnum, &baroff);
484 	ret |= vi_modern_add_isr_cfg(vs, barnum, &baroff);
485 	ret |= vi_modern_add_pci_cfg(vs);
486 	if (!ret)
487 		return (false);
488 	if (pci_emul_alloc_bar(vs->vs_pi, barnum, PCIBAR_MEM64, baroff) != 0)
489 		return (false);
490 	return (true);
491 }
492 
493 void
vi_pci_init(struct pci_devinst * pi,virtio_mode_t mode,uint16_t legacy,uint16_t device_id,uint8_t class)494 vi_pci_init(struct pci_devinst *pi, virtio_mode_t mode,
495     uint16_t legacy, uint16_t device_id, uint8_t class)
496 {
497 	struct virtio_softc *vs = pi->pi_arg;
498 
499 	DPRINTF(vs, "VIRTIO %s PCI init mode=%x, legacy=0x%x devid=0x%x",
500 	    vs->vs_vc->vc_name, mode, legacy, device_id);
501 
502 	/*
503 	 * We provide global options to force transitional devices to present
504 	 * as pure legacy or modern. This is mostly to support testing guest
505 	 * drivers or bhyve itself.
506          *
507          *   TRANSITIONAL mode usually exposes both interfaces
508          *   - virtio.legacy=false forces a modern-only device
509          *   - virtio.modern=false forces a legacy-only device
510 	 */
511 	if (mode == VIRTIO_MODE_TRANSITIONAL) {
512 		if (!get_config_bool_default("virtio.legacy", true))
513 			mode = VIRTIO_MODE_MODERN;
514 		else if (!get_config_bool_default("virtio.modern", true))
515 			mode = VIRTIO_MODE_LEGACY;
516 	}
517 
518 	vs->vs_mode = mode;
519 
520 	pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
521 	pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
522 	pci_set_cfgdata8(pi, PCIR_CLASS, class);
523 
524 	if (mode == VIRTIO_MODE_MODERN) {
525 		/*
526 		 * Pure modern / non-transitional device.
527 		 *
528 		 * Virtio 1.2, 4.1.2.1:
529 		 *   - PCI Device ID		= 0x1040 + virtio device ID
530 		 *   - PCI Revision ID>		>= 1
531 		 *   - PCI Subsystem Device ID	>= 0x40
532 		 *
533 		 * `device_id` here is the virtio Device ID from section 5
534 		 * [0x0-0x3f].
535 		 */
536 		VERIFY3U(device_id, <=, 0x3f);
537 		pci_set_cfgdata16(pi, PCIR_DEVICE,
538 		    VIRTIO_PCI_DEVICEID_MODERN_MIN + device_id);
539 		/*
540 		 * For modern devices the spec only recommends that the
541 		 * Subsystem Device ID be >= 0x40 to avoid legacy binding.
542 		 * We choose to mirror the main device ID here so that the
543 		 * (vendor,device) and (subvendor,subdevice) pairs line up.
544 		 */
545 		pci_set_cfgdata16(pi, PCIR_SUBDEV_0,
546 		    VIRTIO_PCI_DEVICEID_MODERN_MIN + device_id);
547 		pci_set_cfgdata16(pi, PCIR_REVID, 1);
548 	} else {
549 		/*
550 		 * Legacy-only or transitional device.
551 		 *
552 		 * For *transitional* devices, virtio 1.2, 4.1.2.3 requires:
553 		 *   - PCI Device ID in [0x1000, 0x103f]
554 		 *   - PCI Revision ID == 0
555 		 *   - PCI Subsystem Device ID == virtio Device ID
556 		 *
557 		 * We rely on the caller to pass:
558 		 *   - `legacy`		the 0x1000-0x103f PCI Device ID
559 		 *   - `device_id`	the virtio Device ID from section 5
560 		 *
561 		 * For a true legacy-only device this layout is also compatible
562 		 * with old drivers.
563 		 */
564 		VERIFY(legacy >= 0x1000 && legacy <= 0x103f);
565 		pci_set_cfgdata16(pi, PCIR_DEVICE, legacy);
566 		pci_set_cfgdata16(pi, PCIR_SUBDEV_0, device_id);
567 		pci_set_cfgdata16(pi, PCIR_REVID, 0);
568 	}
569 }
570 
571 /*
572  * Set up Virtio device pci configuration space.
573  */
574 bool
vi_pcibar_setup(struct virtio_softc * vs)575 vi_pcibar_setup(struct virtio_softc *vs)
576 {
577 	DPRINTF(vs, "VIRTIO %s set up PCI BARs", vs->vs_vc->vc_name);
578 
579 	assert(vs->vs_mode != VIRTIO_MODE_UNSET);
580 
581 	if (vs->vs_mode == VIRTIO_MODE_LEGACY ||
582 	    vs->vs_mode == VIRTIO_MODE_TRANSITIONAL) {
583 		if (!vi_legacy_iobar_setup(vs, VIRTIO_LEGACY_BAR))
584 			return (false);
585 	}
586 	if (vs->vs_mode == VIRTIO_MODE_MODERN ||
587 	    vs->vs_mode == VIRTIO_MODE_TRANSITIONAL) {
588 		if (!vi_modern_membar_setup(vs, VIRTIO_MODERN_BAR))
589 			return (false);
590 	}
591 
592 	return (true);
593 }
594 
595 /*
596  * Configure interrupt delivery for this VirtIO device.
597  *
598  * If requested, enable MSI-X and allocate one vector per queue plus
599  * a configuration vector. Regardless, always establish the mandatory
600  * legacy (INTx) interrupt as VirtIO devices do not support MSI and
601  * require a fixed interrupt line for compatibility.
602  */
603 bool
vi_intr_init(struct virtio_softc * vs,bool use_msix)604 vi_intr_init(struct virtio_softc *vs, bool use_msix)
605 {
606 	if (use_msix) {
607 		struct virtio_consts *vc = vs->vs_vc;
608 		int nvec = MIN(MAX(vc->vc_max_nvq, vc->vc_nvq) + 1,
609 		    MAX_MSIX_TABLE_ENTRIES);
610 
611 		vs->vs_flags |= VIRTIO_USE_MSIX;
612 		VS_LOCK(vs);
613 		vi_reset_dev(vs); /* set all vectors to NO_VECTOR */
614 		VS_UNLOCK(vs);
615 		if (pci_emul_add_msixcap(vs->vs_pi, nvec, VIRTIO_MSIX_BAR) != 0)
616 			return (false);
617 	} else {
618 		vs->vs_flags &= ~VIRTIO_USE_MSIX;
619 	}
620 
621 	/* Legacy interrupts are mandatory for virtio devices */
622 	pci_lintr_request(vs->vs_pi);
623 
624 	return (true);
625 }
626 
627 /*
628  * Initialize the currently-selected virtio queue (vs->vs_curq)
629  */
630 void
vi_vq_init(struct virtio_softc * vs)631 vi_vq_init(struct virtio_softc *vs)
632 {
633 	struct vqueue_info *vq;
634 	uint64_t phys;
635 	size_t size;
636 	char *base;
637 
638 	vq = &vs->vs_queues[vs->vs_curq];
639 
640 	phys = vq->vq_desc_gpa;
641 	size = vq->vq_qsize * sizeof (struct vring_desc);
642 	base = paddr_guest2host(vs->vs_pi->pi_vmctx, phys, size);
643 	if (base == NULL) {
644 		vi_error(vs, "Could not map queue 0x%x phys 0x%" PRIx64,
645 		    vq->vq_num, phys);
646 		return;
647 	}
648 	vq->vq_desc = (struct vring_desc *)base;
649 
650 	phys = vq->vq_avail_gpa;
651 	size = sizeof (struct vring_avail) + sizeof (uint16_t) +
652 	    vq->vq_qsize * sizeof (uint16_t);
653 	base = paddr_guest2host(vs->vs_pi->pi_vmctx, phys, size);
654 	if (base == NULL) {
655 		vi_error(vs, "Could not map queue 0x%x phys 0x%" PRIx64,
656 		    vq->vq_num, phys);
657 		return;
658 	}
659 	vq->vq_avail = (struct vring_avail *)base;
660 
661 	phys = vq->vq_used_gpa;
662 	size = sizeof (struct vring_used) + sizeof (uint16_t) +
663 	    vq->vq_qsize * sizeof (struct vring_used_elem);
664 	base = paddr_guest2host(vs->vs_pi->pi_vmctx, phys, size);
665 	if (base == NULL) {
666 		vi_error(vs, "Could not map queue 0x%x phys 0x%" PRIx64,
667 		    vq->vq_num, phys);
668 		return;
669 	}
670 	vq->vq_used = (struct vring_used *)base;
671 
672 	/* Mark queue as allocated, and start at 0 when we use it. */
673 	vq->vq_flags = VQ_ALLOC;
674 	vq->vq_last_avail = 0;
675 	vq->vq_next_used = 0;
676 	vq->vq_save_used = 0;
677 }
678 
679 /*
680  * Initialize the currently-selected virtio queue (vs->vs_curq).
681  * The guest just gave us a page frame number, from which we can
682  * calculate the addresses of the queue components.
683  */
684 void
vi_legacy_vq_init(struct virtio_softc * vs,uint32_t pfn)685 vi_legacy_vq_init(struct virtio_softc *vs, uint32_t pfn)
686 {
687 	struct vqueue_info *vq;
688 	uint64_t phys;
689 
690 	vq = &vs->vs_queues[vs->vs_curq];
691 	vq->vq_pfn = pfn;
692 	phys = (uint64_t)pfn << LEGACY_VRING_PFN;
693 
694 	/* First page(s) are descriptors... */
695 	vq->vq_desc_gpa = phys;
696 	phys += vq->vq_qsize * sizeof (struct vring_desc);
697 	/* ... immediately followed by "avail" ring (entirely uint16_t's) */
698 	vq->vq_avail_gpa = phys;
699 	phys += sizeof (struct vring_avail) + sizeof (uint16_t) +
700 	    vq->vq_qsize * sizeof (uint16_t);
701 	/* Then it's rounded up to the next page... */
702 	phys = roundup2(phys, LEGACY_VRING_ALIGN);
703 	/* ... and the last page(s) are the used ring. */
704 	vq->vq_used_gpa = phys;
705 
706 	vi_vq_init(vs);
707 }
708 
709 /*
710  * Helper inline for vq_getchain(): record the i'th "real"
711  * descriptor.
712  */
713 static inline void
_vq_record(struct virtio_softc * vs,int i,struct vring_desc * vd,struct iovec * iov,int n_iov,struct vi_req * reqp)714 _vq_record(struct virtio_softc *vs, int i, struct vring_desc *vd,
715     struct iovec *iov, int n_iov, struct vi_req *reqp)
716 {
717 	struct vmctx *ctx;
718 	uint32_t len;
719 	uint64_t addr;
720 
721 	ctx = vs->vs_pi->pi_vmctx;
722 
723 	if (i >= n_iov)
724 		return;
725 	len = atomic_load_32(&vd->len);
726 	addr = atomic_load_64(&vd->addr);
727 	iov[i].iov_len = len;
728 	iov[i].iov_base = paddr_guest2host(ctx, addr, len);
729 	if ((vd->flags & VRING_DESC_F_WRITE) == 0)
730 		reqp->readable++;
731 	else
732 		reqp->writable++;
733 }
734 #define	VQ_MAX_DESCRIPTORS	512	/* see below */
735 
736 /*
737  * Examine the chain of descriptors starting at the "next one" to
738  * make sure that they describe a sensible request.  If so, return
739  * the number of "real" descriptors that would be needed/used in
740  * acting on this request.  This may be smaller than the number of
741  * available descriptors, e.g., if there are two available but
742  * they are two separate requests, this just returns 1.  Or, it
743  * may be larger: if there are indirect descriptors involved,
744  * there may only be one descriptor available but it may be an
745  * indirect pointing to eight more.  We return 8 in this case,
746  * i.e., we do not count the indirect descriptors, only the "real"
747  * ones.
748  *
749  * Basically, this vets the "flags" and "next" field of each
750  * descriptor and tells you how many are involved.  Since some may
751  * be indirect, this also needs the vmctx (in the pci_devinst
752  * at vs->vs_pi) so that it can find indirect descriptors.
753  *
754  * As we process each descriptor, we copy and adjust it (guest to
755  * host address wise, also using the vmtctx) into the given iov[]
756  * array (of the given size).  If the array overflows, we stop
757  * placing values into the array but keep processing descriptors,
758  * up to VQ_MAX_DESCRIPTORS, before giving up and returning -1.
759  * So you, the caller, must not assume that iov[] is as big as the
760  * return value (you can process the same thing twice to allocate
761  * a larger iov array if needed, or supply a zero length to find
762  * out how much space is needed).
763  *
764  * If some descriptor(s) are invalid, this prints a diagnostic message
765  * and returns -1.  If no descriptors are ready now it simply returns 0.
766  *
767  * You are assumed to have done a vq_ring_ready() if needed (note
768  * that vq_has_descs() does one).
769  */
770 int
vq_getchain(struct vqueue_info * vq,struct iovec * iov,int niov,struct vi_req * reqp)771 vq_getchain(struct vqueue_info *vq, struct iovec *iov, int niov,
772     struct vi_req *reqp)
773 {
774 	int i;
775 	u_int ndesc, n_indir;
776 	u_int idx, next;
777 	struct vi_req req;
778 	struct vring_desc *vdir, *vindir, *vp;
779 	struct vmctx *ctx;
780 	struct virtio_softc *vs;
781 	const char *name;
782 
783 	vs = vq->vq_vs;
784 	name = vs->vs_vc->vc_name;
785 	memset(&req, 0, sizeof (req));
786 
787 	/*
788 	 * Note: it's the responsibility of the guest not to
789 	 * update vq->vq_avail->idx until all of the descriptors
790          * the guest has written are valid (including all their
791          * "next" fields and "flags").
792 	 *
793 	 * Compute (vq_avail->idx - last_avail) in integers mod 2**16.  This is
794 	 * the number of descriptors the device has made available
795 	 * since the last time we updated vq->vq_last_avail.
796 	 *
797 	 * We just need to do the subtraction as an unsigned int,
798 	 * then trim off excess bits.
799 	 */
800 	idx = vq->vq_last_avail;
801 	ndesc = (uint16_t)((u_int)vq->vq_avail->idx - idx);
802 	if (ndesc == 0)
803 		return (0);
804 	if (ndesc > vq->vq_qsize) {
805 		vi_error(vs,
806 		    "%s: ndesc (%u) out of range, driver confused?",
807 		    name, (u_int)ndesc);
808 		return (-1);
809 	}
810 
811 	/*
812 	 * Now count/parse "involved" descriptors starting from
813 	 * the head of the chain.
814 	 *
815 	 * To prevent loops, we could be more complicated and
816 	 * check whether we're re-visiting a previously visited
817 	 * index, but we just abort if the count gets excessive.
818 	 */
819 	ctx = vs->vs_pi->pi_vmctx;
820 	req.idx = next = vq->vq_avail->ring[idx & (vq->vq_qsize - 1)];
821 	vq->vq_last_avail++;
822 	for (i = 0; i < VQ_MAX_DESCRIPTORS; next = vdir->next) {
823 		if (next >= vq->vq_qsize) {
824 			vi_error(vs,
825 			    "%s: descriptor index %u out of range, "
826 			    "driver confused?",
827 			    name, next);
828 			return (-1);
829 		}
830 		vdir = &vq->vq_desc[next];
831 		if ((vdir->flags & VRING_DESC_F_INDIRECT) == 0) {
832 			_vq_record(vs, i, vdir, iov, niov, &req);
833 			i++;
834 		} else if ((vs->vs_negotiated_caps &
835 		    VIRTIO_RING_F_INDIRECT_DESC) == 0) {
836 			vi_error(vs,
837 			    "%s: descriptor has forbidden INDIRECT flag, "
838 			    "driver confused?",
839 			    name);
840 			return (-1);
841 		} else {
842 			n_indir = vdir->len / 16;
843 			if ((vdir->len & 0xf) || n_indir == 0) {
844 				vi_error(vs,
845 				    "%s: invalid indir len 0x%x, "
846 				    "driver confused?",
847 				    name, (u_int)vdir->len);
848 				return (-1);
849 			}
850 			vindir = paddr_guest2host(ctx,
851 			    vdir->addr, vdir->len);
852 			/*
853 			 * Indirects start at the 0th, then follow
854 			 * their own embedded "next"s until those run
855 			 * out.  Each one's indirect flag must be off
856 			 * (we don't really have to check, could just
857 			 * ignore errors...).
858 			 */
859 			next = 0;
860 			for (;;) {
861 				vp = &vindir[next];
862 				if (vp->flags & VRING_DESC_F_INDIRECT) {
863 					vi_error(vs,
864 					    "%s: indirect desc has INDIR flag,"
865 					    " driver confused?",
866 					    name);
867 					return (-1);
868 				}
869 				_vq_record(vs, i, vp, iov, niov, &req);
870 				if (++i > VQ_MAX_DESCRIPTORS)
871 					goto loopy;
872 				if ((vp->flags & VRING_DESC_F_NEXT) == 0)
873 					break;
874 				next = vp->next;
875 				if (next >= n_indir) {
876 					vi_error(vs,
877 					    "%s: invalid next %u > %u, "
878 					    "driver confused?",
879 					    name, (u_int)next, n_indir);
880 					return (-1);
881 				}
882 			}
883 		}
884 		if ((vdir->flags & VRING_DESC_F_NEXT) == 0)
885 			goto done;
886 	}
887 
888 loopy:
889 	vi_error(vs, "%s: descriptor loop? count > %d - driver confused?",
890 	    name, i);
891 	return (-1);
892 
893 done:
894 	*reqp = req;
895 	return (i);
896 }
897 
898 /*
899  * Return the first n_chain request chains back to the available queue.
900  *
901  * (These chains are the ones you handled when you called vq_getchain()
902  * and used its positive return value.)
903  */
904 void
vq_retchains(struct vqueue_info * vq,uint16_t n_chains)905 vq_retchains(struct vqueue_info *vq, uint16_t n_chains)
906 {
907 
908 	vq->vq_last_avail -= n_chains;
909 }
910 
911 void
vq_relchain_prepare(struct vqueue_info * vq,uint16_t idx,uint32_t iolen)912 vq_relchain_prepare(struct vqueue_info *vq, uint16_t idx, uint32_t iolen)
913 {
914 	struct vring_used *vuh;
915 	struct vring_used_elem *vue;
916 	uint16_t mask;
917 
918 	/*
919 	 * Notes:
920 	 *  - mask is N-1 where N is a power of 2 so computes x % N
921 	 *  - vuh points to the "used" data shared with guest
922 	 *  - vue points to the "used" ring entry we want to update
923 	 */
924 	mask = vq->vq_qsize - 1;
925 	vuh = vq->vq_used;
926 
927 	vue = &vuh->ring[vq->vq_next_used++ & mask];
928 	vue->id = idx;
929 	vue->len = iolen;
930 }
931 
932 void
vq_relchain_publish(struct vqueue_info * vq)933 vq_relchain_publish(struct vqueue_info *vq)
934 {
935 	/*
936 	 * Ensure the used descriptor is visible before updating the index.
937 	 * This is necessary on ISAs with memory ordering less strict than x86
938 	 * (and even on x86 to act as a compiler barrier).
939 	 */
940 	atomic_thread_fence_rel();
941 	vq->vq_used->idx = vq->vq_next_used;
942 }
943 
944 /*
945  * Return specified request chain to the guest, setting its I/O length
946  * to the provided value.
947  *
948  * (This chain is the one you handled when you called vq_getchain()
949  * and used its positive return value.)
950  */
951 void
vq_relchain(struct vqueue_info * vq,uint16_t idx,uint32_t iolen)952 vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen)
953 {
954 	vq_relchain_prepare(vq, idx, iolen);
955 	vq_relchain_publish(vq);
956 }
957 
958 /*
959  * Driver has finished processing "available" chains and calling
960  * vq_relchain on each one.  If driver used all the available
961  * chains, used_all should be set.
962  *
963  * If the "used" index moved we may need to inform the guest, i.e.,
964  * deliver an interrupt.  Even if the used index did NOT move we
965  * may need to deliver an interrupt, if the avail ring is empty and
966  * we are supposed to interrupt on empty.
967  *
968  * Note that used_all_avail is provided by the caller because it's
969  * a snapshot of the ring state when he decided to finish interrupt
970  * processing -- it's possible that descriptors became available after
971  * that point.  (It's also typically a constant 1/True as well.)
972  */
973 void
vq_endchains(struct vqueue_info * vq,int used_all_avail)974 vq_endchains(struct vqueue_info *vq, int used_all_avail)
975 {
976 	struct virtio_softc *vs;
977 	uint16_t event_idx, new_idx, old_idx;
978 	int intr;
979 
980 	/*
981 	 * Interrupt generation: if we're using EVENT_IDX,
982 	 * interrupt if we've crossed the event threshold.
983 	 * Otherwise interrupt is generated if we added "used" entries,
984 	 * but suppressed by VRING_AVAIL_F_NO_INTERRUPT.
985 	 *
986 	 * In any case, though, if NOTIFY_ON_EMPTY is set and the
987 	 * entire avail was processed, we need to interrupt always.
988 	 */
989 	vs = vq->vq_vs;
990 	old_idx = vq->vq_save_used;
991 	vq->vq_save_used = new_idx = vq->vq_used->idx;
992 
993 	/*
994 	 * Use full memory barrier between "idx" store from preceding
995 	 * vq_relchain() call and the loads from VQ_USED_EVENT_IDX() or
996 	 * "flags" field below.
997 	 */
998 	atomic_thread_fence_seq_cst();
999 	if (used_all_avail &&
1000 	    (vs->vs_negotiated_caps & VIRTIO_F_NOTIFY_ON_EMPTY)) {
1001 		intr = 1;
1002 	} else if (vs->vs_negotiated_caps & VIRTIO_RING_F_EVENT_IDX) {
1003 		event_idx = VQ_USED_EVENT_IDX(vq);
1004 		/*
1005 		 * This calculation is per docs and the kernel
1006 		 * (see src/sys/dev/virtio/virtio_ring.h).
1007 		 */
1008 		intr = (uint16_t)(new_idx - event_idx - 1) <
1009 			(uint16_t)(new_idx - old_idx);
1010 	} else {
1011 		intr = new_idx != old_idx &&
1012 		    !(vq->vq_avail->flags & VRING_AVAIL_F_NO_INTERRUPT);
1013 	}
1014 	if (intr)
1015 		vq_interrupt(vs, vq);
1016 }
1017 
1018 /* Note: these are in sorted order to make for a fast search */
1019 static struct config_reg {
1020 	uint16_t	cr_offset;	/* register offset */
1021 	uint8_t		cr_size;	/* size (bytes) */
1022 	uint8_t		cr_ro;		/* true => reg is read only */
1023 	const char	*cr_name;	/* name of reg */
1024 } legacy_cfg_regs[] = {
1025 	{ VIRTIO_PCI_HOST_FEATURES,		4, 1, "HOST_FEATURES" },
1026 	{ VIRTIO_PCI_GUEST_FEATURES,		4, 0, "GUEST_FEATURES" },
1027 	{ VIRTIO_PCI_QUEUE_PFN,			4, 0, "QUEUE_PFN" },
1028 	{ VIRTIO_PCI_QUEUE_NUM,			2, 1, "QUEUE_NUM" },
1029 	{ VIRTIO_PCI_QUEUE_SEL,			2, 0, "QUEUE_SEL" },
1030 	{ VIRTIO_PCI_QUEUE_NOTIFY,		2, 0, "QUEUE_NOTIFY" },
1031 	{ VIRTIO_PCI_STATUS,			1, 0, "STATUS" },
1032 	{ VIRTIO_PCI_ISR,			1, 0, "ISR" },
1033 	{ VIRTIO_MSI_CONFIG_VECTOR,		2, 0, "CONFIG_VECTOR" },
1034 	{ VIRTIO_MSI_QUEUE_VECTOR,		2, 0, "QUEUE_VECTOR" },
1035 }, common_cfg_regs[] = {
1036 	{ VIRTIO_PCI_COMMON_DFSELECT,		4, 0, "DFSELECT" },
1037 	{ VIRTIO_PCI_COMMON_DF,			4, 1, "DF" },
1038 	{ VIRTIO_PCI_COMMON_GFSELECT,		4, 0, "GFSELECT" },
1039 	{ VIRTIO_PCI_COMMON_GF,			4, 0, "GF" },
1040 	{ VIRTIO_PCI_COMMON_MSIX,		2, 0, "MSIX" },
1041 	{ VIRTIO_PCI_COMMON_NUMQ,		2, 1, "NUMQ" },
1042 	{ VIRTIO_PCI_COMMON_STATUS,		1, 0, "STATUS" },
1043 	{ VIRTIO_PCI_COMMON_CFGGENERATION,	1, 1, "CFGGENERATION" },
1044 	{ VIRTIO_PCI_COMMON_Q_SELECT,		2, 0, "Q_SELECT" },
1045 	{ VIRTIO_PCI_COMMON_Q_SIZE,		2, 0, "Q_SIZE" },
1046 	{ VIRTIO_PCI_COMMON_Q_MSIX,		2, 0, "Q_MSIX" },
1047 	{ VIRTIO_PCI_COMMON_Q_ENABLE,		2, 0, "Q_ENABLE" },
1048 	{ VIRTIO_PCI_COMMON_Q_NOFF,		2, 1, "Q_NOFF" },
1049 	{ VIRTIO_PCI_COMMON_Q_DESCLO,		4, 0, "Q_DESCLO" },
1050 	{ VIRTIO_PCI_COMMON_Q_DESCHI,		4, 0, "Q_DESCHI" },
1051 	{ VIRTIO_PCI_COMMON_Q_AVAILLO,		4, 0, "Q_AVAILLO" },
1052 	{ VIRTIO_PCI_COMMON_Q_AVAILHI,		4, 0, "Q_AVAILHI" },
1053 	{ VIRTIO_PCI_COMMON_Q_USEDLO,		4, 0, "Q_USEDLO" },
1054 	{ VIRTIO_PCI_COMMON_Q_USEDHI,		4, 0, "Q_USEDHI" },
1055 };
1056 
1057 static inline struct config_reg *
vi_find_cr(struct config_reg * regstbl,size_t n,int offset)1058 vi_find_cr(struct config_reg *regstbl, size_t n, int offset) {
1059 	u_int hi, lo, mid;
1060 	struct config_reg *cr;
1061 
1062 	lo = 0;
1063 	hi = n - 1;
1064 	while (hi >= lo) {
1065 		mid = (hi + lo) >> 1;
1066 		cr = &regstbl[mid];
1067 		if (cr->cr_offset == offset)
1068 			return (cr);
1069 		if (cr->cr_offset < offset)
1070 			lo = mid + 1;
1071 		else
1072 			hi = mid - 1;
1073 	}
1074 	return (NULL);
1075 }
1076 
1077 static uint64_t
vi_hv_features(struct virtio_softc * vs,bool modern)1078 vi_hv_features(struct virtio_softc *vs, bool modern)
1079 {
1080 	return (modern ? vs->vs_vc->vc_hv_caps_modern | VIRTIO_F_VERSION_1 :
1081 	    vs->vs_vc->vc_hv_caps_legacy);
1082 }
1083 
1084 /*
1085  * Handle legacy pci config space reads.
1086  *
1087  * If it's part of the legacy virtio config structure, do that.
1088  * Otherwise dispatch to the actual device backend's config read
1089  * callback.
1090  */
1091 static uint64_t
vi_legacy_pci_read(struct virtio_softc * vs,uint64_t offset,int size)1092 vi_legacy_pci_read(struct virtio_softc *vs, uint64_t offset, int size)
1093 {
1094 	struct virtio_consts *vc;
1095 	struct config_reg *cr;
1096 	uint64_t virtio_config_size;
1097 	const char *name;
1098 	uint32_t newoff;
1099 	uint32_t value;
1100 	int error;
1101 
1102 	/* Checked by caller */
1103 	assert(size == 1 || size == 2 || size == 4);
1104 
1105 	vc = vs->vs_vc;
1106 	name = vc->vc_name;
1107 	value = VI_MASK(size);
1108 	virtio_config_size = VIRTIO_PCI_CONFIG_OFF(pci_msix_enabled(vs->vs_pi));
1109 
1110 	if (offset >= virtio_config_size) {
1111 		/*
1112 		 * Subtract off the standard size (including MSI-X
1113 		 * registers if enabled) and dispatch to underlying driver.
1114 		 * If that fails, fall into general code.
1115 		 */
1116 		newoff = offset - virtio_config_size;
1117 		if (newoff + size > vc->vc_cfgsize)
1118 			goto bad;
1119 		if (vc->vc_cfgread != NULL) {
1120 			error = (*vc->vc_cfgread)(DEV_SOFTC(vs),
1121 			    newoff, size, &value);
1122 		} else {
1123 			error = 0;
1124 		}
1125 		if (error == 0) {
1126 			DPRINTF(vs, "VIRTIO %s LEGACY PCI devcfg read[0x%"
1127 			    PRIx64 "] = 0x%x", name, newoff, value);
1128 			goto done;
1129 		}
1130 	}
1131 
1132 bad:
1133 	cr = vi_find_cr(legacy_cfg_regs, nitems(legacy_cfg_regs), offset);
1134 	if (cr == NULL || cr->cr_size != size) {
1135 		if (cr != NULL) {
1136 			/* offset must be OK, so size must be bad */
1137 			EPRINTLN(
1138 			    "%s: read from %s: bad size %d",
1139 			    name, cr->cr_name, size);
1140 		} else {
1141 			EPRINTLN(
1142 			    "%s: read from bad offset/size %jd/%d",
1143 			    name, (uintmax_t)offset, size);
1144 		}
1145 		goto done;
1146 	}
1147 
1148 	switch (offset) {
1149 	case VIRTIO_PCI_HOST_FEATURES:
1150 		/* Caps for legacy PCI configuration layout is only 32bit */
1151 		if (vc->vc_hv_features != NULL)
1152 			value = vc->vc_hv_features(DEV_SOFTC(vs), false);
1153 		else
1154 			value = vi_hv_features(vs, false);
1155 		break;
1156 	case VIRTIO_PCI_GUEST_FEATURES:
1157 		value = vs->vs_negotiated_caps;
1158 		break;
1159 	case VIRTIO_PCI_QUEUE_PFN:
1160 		if (!vi_is_modern(vs) && vs->vs_curq < vc->vc_nvq)
1161 			value = vs->vs_queues[vs->vs_curq].vq_pfn;
1162 		break;
1163 	case VIRTIO_PCI_QUEUE_NUM:
1164 		value = vs->vs_curq < vc->vc_nvq ?
1165 		    vs->vs_queues[vs->vs_curq].vq_qsize : 0;
1166 		break;
1167 	case VIRTIO_PCI_QUEUE_SEL:
1168 		value = vs->vs_curq;
1169 		break;
1170 	case VIRTIO_PCI_QUEUE_NOTIFY:
1171 		value = 0;	/* XXX */
1172 		break;
1173 	case VIRTIO_PCI_STATUS:
1174 		value = vs->vs_status;
1175 		break;
1176 	case VIRTIO_PCI_ISR:
1177 		value = vs->vs_isr;
1178 		vs->vs_isr = 0;		/* a read clears this flag */
1179 		if (value != 0)
1180 			pci_lintr_deassert(vs->vs_pi);
1181 		break;
1182 	case VIRTIO_MSI_CONFIG_VECTOR:
1183 		value = vs->vs_msix_cfg_idx;
1184 		break;
1185 	case VIRTIO_MSI_QUEUE_VECTOR:
1186 		value = vs->vs_curq < vc->vc_nvq ?
1187 		    vs->vs_queues[vs->vs_curq].vq_msix_idx :
1188 		    VIRTIO_MSI_NO_VECTOR;
1189 		break;
1190 	}
1191 
1192 	DPRINTF(vs, "VIRTIO %s LEGACY READ %s = 0x%x",
1193 	    name, cr->cr_name, value);
1194 
1195 	switch (offset) {
1196 	case VIRTIO_PCI_GUEST_FEATURES:
1197 	case VIRTIO_PCI_HOST_FEATURES:
1198 		vi_print_caps(vs, value);
1199 		break;
1200 	}
1201 
1202 done:
1203 	return (value);
1204 }
1205 
1206 /*
1207  * Handle legacy pci config space writes.
1208  *
1209  * If it's part of the legacy virtio config structure, do that.
1210  * Otherwise dispatch to the actual device backend's config write
1211  * callback.
1212  */
1213 static void
vi_legacy_pci_write(struct virtio_softc * vs,uint64_t offset,int size,uint64_t value)1214 vi_legacy_pci_write(struct virtio_softc *vs, uint64_t offset, int size,
1215     uint64_t value)
1216 {
1217 	struct vqueue_info *vq;
1218 	struct virtio_consts *vc;
1219 	struct config_reg *cr;
1220 	uint64_t virtio_config_size;
1221 	const char *name;
1222 	uint32_t newoff;
1223 	int error;
1224 
1225 	/* Checked by caller */
1226 	assert(size == 1 || size == 2 || size == 4);
1227 
1228 	vc = vs->vs_vc;
1229 	name = vc->vc_name;
1230 	virtio_config_size = VIRTIO_PCI_CONFIG_OFF(pci_msix_enabled(vs->vs_pi));
1231 
1232 	if (offset >= virtio_config_size) {
1233 		/*
1234 		 * Subtract off the standard size (including MSI-X
1235 		 * registers if enabled) and dispatch to underlying driver.
1236 		 */
1237 		newoff = offset - virtio_config_size;
1238 		if (newoff + size > vc->vc_cfgsize)
1239 			goto bad;
1240 		if (vc->vc_cfgwrite != NULL) {
1241 			error = (*vc->vc_cfgwrite)(DEV_SOFTC(vs),
1242 			    newoff, size, value);
1243 		} else {
1244 			error = 0;
1245 		}
1246 		if (error == 0) {
1247 			DPRINTF(vs,
1248 			    "VIRTIO %s LEGACY PCI devcfg write[0x%"
1249 			    PRIx64 "] = 0x%x", name, newoff, value);
1250 			return;
1251 		}
1252 	}
1253 
1254 bad:
1255 	cr = vi_find_cr(legacy_cfg_regs, nitems(legacy_cfg_regs), offset);
1256 	if (cr == NULL || cr->cr_size != size || cr->cr_ro) {
1257 		if (cr != NULL) {
1258 			/* offset must be OK, wrong size and/or reg is R/O */
1259 			if (cr->cr_size != size)
1260 				EPRINTLN(
1261 				    "%s: write to %s: bad size %d",
1262 				    name, cr->cr_name, size);
1263 			if (cr->cr_ro)
1264 				EPRINTLN(
1265 				    "%s: write to read-only reg %s",
1266 				    name, cr->cr_name);
1267 		} else {
1268 			EPRINTLN(
1269 			    "%s: write to bad offset/size %jd/%d",
1270 			    name, (uintmax_t)offset, size);
1271 		}
1272 		return;
1273 	}
1274 
1275 	DPRINTF(vs, "VIRTIO %s LEGACY WRITE %s = 0x%x",
1276 	    name, cr->cr_name, value);
1277 
1278 	switch (offset) {
1279 	case VIRTIO_PCI_GUEST_FEATURES:
1280 		if (vc->vc_hv_features != NULL)
1281 			value &= vc->vc_hv_features(DEV_SOFTC(vs), false);
1282 		else
1283 			value &= vi_hv_features(vs, false);
1284 		vs->vs_negotiated_caps = value;
1285 		if (vc->vc_apply_features != NULL) {
1286 			(*vc->vc_apply_features)(DEV_SOFTC(vs),
1287 			    &vs->vs_negotiated_caps);
1288 		}
1289 		DPRINTF(vs, "NEGOTIATED FEATURES 0x%" PRIx64 " (%s)",
1290 		    vs->vs_negotiated_caps,
1291 		    vi_is_modern(vs) ? "modern" : "legacy");
1292 		vi_print_caps(vs, vs->vs_negotiated_caps);
1293 		break;
1294 	case VIRTIO_PCI_QUEUE_PFN:
1295 		if (vs->vs_curq >= vc->vc_nvq)
1296 			goto bad_qindex;
1297 		if (vc->vc_qinit != NULL)
1298 			vc->vc_qinit(DEV_SOFTC(vs), value, false);
1299 		else
1300 			vi_legacy_vq_init(vs, value);
1301 		break;
1302 	case VIRTIO_PCI_QUEUE_SEL:
1303 		/*
1304 		 * Note that the guest is allowed to select an
1305 		 * invalid queue; we just need to return a QNUM
1306 		 * of 0 while the bad queue is selected.
1307 		 */
1308 		vs->vs_curq = value;
1309 		break;
1310 	case VIRTIO_PCI_QUEUE_NOTIFY:
1311 		if (value >= (unsigned int)vc->vc_nvq) {
1312 			EPRINTLN("%s: queue %d notify out of range",
1313 			    name, (int)value);
1314 			break;
1315 		}
1316 		if ((vs->vs_flags & VIRTIO_BROKEN) != 0)
1317 			break;
1318 		vq = &vs->vs_queues[value];
1319 		if (vq->vq_notify != NULL) {
1320 			(*vq->vq_notify)(DEV_SOFTC(vs), vq);
1321 		} else if (vc->vc_qnotify != NULL) {
1322 			(*vc->vc_qnotify)(DEV_SOFTC(vs), vq);
1323 		} else {
1324 			EPRINTLN("%s: qnotify queue %d: missing vq/vc notify",
1325 			    name, (int)value);
1326 		}
1327 		break;
1328 	case VIRTIO_PCI_STATUS:
1329 		vs->vs_status = value;
1330 		if (value == 0) {
1331 			DPRINTF(vs, "VIRTIO %s RESET", name);
1332 			DPRINTF(vs, "**************************************");
1333 			vc->vc_reset(DEV_SOFTC(vs));
1334 		}
1335 		break;
1336 	case VIRTIO_MSI_CONFIG_VECTOR:
1337 		vs->vs_msix_cfg_idx = value;
1338 		break;
1339 	case VIRTIO_MSI_QUEUE_VECTOR:
1340 		if (vs->vs_curq >= vc->vc_nvq)
1341 			goto bad_qindex;
1342 		vq = &vs->vs_queues[vs->vs_curq];
1343 		vq->vq_msix_idx = value;
1344 		if (vc->vc_set_msix != NULL)
1345 			vc->vc_set_msix(DEV_SOFTC(vs), vs->vs_curq);
1346 		break;
1347 	}
1348 	return;
1349 
1350 bad_qindex:
1351 	EPRINTLN(
1352 	    "%s: write config reg %s: curq %d >= max %d",
1353 	    name, cr->cr_name, vs->vs_curq, vc->vc_nvq);
1354 }
1355 
1356 #define VI_HIGH(x) (((x) >> 32) & 0xffffffff)
1357 #define VI_LOW(x) ((x) & 0xffffffff)
1358 
1359 /*
1360  * Virtio modern:
1361  * Handle pci config space reads to common config structure.
1362  */
1363 static uint64_t
vi_pci_common_cfg_read(struct virtio_softc * vs,uint64_t offset,int size)1364 vi_pci_common_cfg_read(struct virtio_softc *vs, uint64_t offset, int size)
1365 {
1366 	uint64_t value = -1;
1367 	struct virtio_consts *vc;
1368 	struct vqueue_info *vq;
1369 	struct config_reg *cr;
1370 	const char *name;
1371 	uint64_t capval = 0;
1372 
1373 	/* Checked by caller */
1374 	assert(size == 1 || size == 2 || size == 4);
1375 
1376 	vc = vs->vs_vc;
1377 	name = vc->vc_name;
1378 	cr = vi_find_cr(common_cfg_regs, nitems(common_cfg_regs), offset);
1379 	if (cr == NULL) {
1380 		EPRINTLN("%s: read from bad offset/size 0x%jx/0x%x",
1381 		    name, (uintmax_t)offset, size);
1382 		goto done;
1383 	}
1384 	/*
1385 	 * We check that the requested size matches the register at this
1386 	 * offset, and refuse to process it if there is a mismatch.
1387 	 */
1388 	if (cr->cr_size != size) {
1389 		EPRINTLN("%s: read from %s: bad size 0x%x",
1390 		    name, cr->cr_name, size);
1391 		goto done;
1392 	}
1393 
1394 	vq = (vs->vs_curq < vc->vc_nvq ? &vs->vs_queues[vs->vs_curq] : NULL);
1395 
1396 	switch (offset) {
1397 	case VIRTIO_PCI_COMMON_DFSELECT:
1398 		value = vs->vs_dfselect;
1399 		break;
1400 	case VIRTIO_PCI_COMMON_DF:
1401 		if (vc->vc_hv_features != NULL)
1402 			value = vc->vc_hv_features(DEV_SOFTC(vs), true);
1403 		else
1404 			value = vi_hv_features(vs, true);
1405 		switch (vs->vs_dfselect) {
1406 		case 0:
1407 			capval = value = VI_LOW(value);
1408 			break;
1409 		case 1:
1410 			value = VI_HIGH(value);
1411 			capval = value << 32;
1412 			break;
1413 		default:
1414 			value = capval = 0;
1415 			break;
1416 		}
1417 		/* capval is debug printed below */
1418 		break;
1419 	case VIRTIO_PCI_COMMON_GFSELECT:
1420 		value = vs->vs_gfselect;
1421 		break;
1422 	case VIRTIO_PCI_COMMON_GF:
1423 		value = vs->vs_negotiated_caps;
1424 		switch (vs->vs_gfselect) {
1425 		case 0:
1426 			capval = value = VI_LOW(value);
1427 			break;
1428 		case 1:
1429 			value = VI_HIGH(value);
1430 			capval = value << 32;
1431 			break;
1432 		default:
1433 			value = capval = 0;
1434 			break;
1435 		}
1436 		/* capval is debug printed below */
1437 		break;
1438 	case VIRTIO_PCI_COMMON_MSIX:
1439 		value = vs->vs_msix_cfg_idx;
1440 		break;
1441 	case VIRTIO_PCI_COMMON_NUMQ:
1442 		value = vc->vc_nvq;
1443 		break;
1444 	case VIRTIO_PCI_COMMON_STATUS:
1445 		value = vs->vs_status;
1446 		break;
1447 	case VIRTIO_PCI_COMMON_CFGGENERATION:
1448 		if ((vs->vs_flags & VIRTIO_DEVCFG_CHG) != 0) {
1449 			vs->vs_devcfg_gen++;
1450 			vs->vs_flags &= ~VIRTIO_DEVCFG_CHG;
1451 		}
1452 		value = vs->vs_devcfg_gen;
1453 		break;
1454 	case VIRTIO_PCI_COMMON_Q_SELECT:
1455 		value = vs->vs_curq;
1456 		break;
1457 	case VIRTIO_PCI_COMMON_Q_SIZE:
1458 		value = vq != NULL ? vq->vq_qsize : 0;
1459 		break;
1460 	case VIRTIO_PCI_COMMON_Q_MSIX:
1461 		if (vq != NULL)
1462 			value = vq->vq_msix_idx;
1463 		break;
1464 	case VIRTIO_PCI_COMMON_Q_ENABLE:
1465 		value = vq != NULL ? !!(vq->vq_flags & VQ_ENABLED) : 0;
1466 		break;
1467 	case VIRTIO_PCI_COMMON_Q_NOFF:
1468 		/* queue_notify_off is equal to qid for now */
1469 		value = vs->vs_curq;
1470 		break;
1471 	case VIRTIO_PCI_COMMON_Q_DESCLO:
1472 		if (vq != NULL)
1473 			value = VI_LOW(vq->vq_desc_gpa);
1474 		break;
1475 	case VIRTIO_PCI_COMMON_Q_DESCHI:
1476 		if (vq != NULL)
1477 			value = VI_HIGH(vq->vq_desc_gpa);
1478 		break;
1479 	case VIRTIO_PCI_COMMON_Q_AVAILLO:
1480 		if (vq != NULL)
1481 			value = VI_LOW(vq->vq_avail_gpa);
1482 		break;
1483 	case VIRTIO_PCI_COMMON_Q_AVAILHI:
1484 		if (vq != NULL)
1485 			value = VI_HIGH(vq->vq_avail_gpa);
1486 		break;
1487 	case VIRTIO_PCI_COMMON_Q_USEDLO:
1488 		if (vq != NULL)
1489 			value = VI_LOW(vq->vq_used_gpa);
1490 		break;
1491 	case VIRTIO_PCI_COMMON_Q_USEDHI:
1492 		if (vq != NULL)
1493 			value = VI_HIGH(vq->vq_used_gpa);
1494 		break;
1495 	}
1496 
1497 done:
1498 	value &= VI_MASK(size);
1499 	DPRINTF(vs, "VIRTIO %s COMMON %s read = 0x%x",
1500 	    name, cr->cr_name, value);
1501 
1502 	switch (offset) {
1503 	case VIRTIO_PCI_COMMON_DF:
1504 	case VIRTIO_PCI_COMMON_GF:
1505 		vi_print_caps(vs, capval);
1506 		break;
1507 	}
1508 	return (value);
1509 }
1510 
1511 /*
1512  * Virtio modern:
1513  * Handle pci config space writes to common config structure.
1514  */
1515 static void
vi_pci_common_cfg_write(struct virtio_softc * vs,uint64_t offset,int size,uint64_t value)1516 vi_pci_common_cfg_write(struct virtio_softc *vs, uint64_t offset, int size,
1517     uint64_t value)
1518 {
1519 	uint64_t capval = 0;
1520 	struct virtio_consts *vc;
1521 	struct vqueue_info *vq;
1522 	struct config_reg *cr;
1523 	const char *name;
1524 
1525 	/* Checked by caller */
1526 	assert(size == 1 || size == 2 || size == 4);
1527 
1528 	vc = vs->vs_vc;
1529 	name = vc->vc_name;
1530 	value &= VI_MASK(size);
1531 
1532 	cr = vi_find_cr(common_cfg_regs, nitems(common_cfg_regs), offset);
1533 	if (cr == NULL) {
1534 		EPRINTLN( "%s: write to %s: bad size 0x%x",
1535 		    name, cr->cr_name, size);
1536 		return;
1537 	}
1538 	/*
1539 	 * We check that the requested size matches the register at this
1540 	 * offset, and refuse to process it if there is a mismatch.
1541 	 */
1542 	if (cr->cr_size != size) {
1543 		EPRINTLN("%s: write to bad offset/size 0x%jx/0x%x",
1544 		    name, (uintmax_t)offset, size);
1545 		return;
1546 	}
1547 
1548 	DPRINTF(vs, "VIRTIO %s COMMON %s write 0x%x", name, cr->cr_name, value);
1549 
1550 	vq = NULL;
1551 	switch (offset) {
1552 	case VIRTIO_PCI_COMMON_Q_SIZE:
1553 	case VIRTIO_PCI_COMMON_Q_MSIX:
1554 	case VIRTIO_PCI_COMMON_Q_ENABLE:
1555 	case VIRTIO_PCI_COMMON_Q_DESCLO:
1556 	case VIRTIO_PCI_COMMON_Q_DESCHI:
1557 	case VIRTIO_PCI_COMMON_Q_AVAILLO:
1558 	case VIRTIO_PCI_COMMON_Q_AVAILHI:
1559 	case VIRTIO_PCI_COMMON_Q_USEDLO:
1560 	case VIRTIO_PCI_COMMON_Q_USEDHI:
1561 		if (vs->vs_curq >= vc->vc_nvq) {
1562 			EPRINTLN("%s: write queue %d out of range",
1563 			    name, vs->vs_curq);
1564 			goto bad_write;
1565 		}
1566 		vq = &vs->vs_queues[vs->vs_curq];
1567 		break;
1568 	default:
1569 		break;
1570 	}
1571 
1572 	switch (offset) {
1573 	case VIRTIO_PCI_COMMON_DFSELECT:
1574 		vs->vs_dfselect = value;
1575 		break;
1576 	case VIRTIO_PCI_COMMON_GFSELECT:
1577 		vs->vs_gfselect = value;
1578 		break;
1579 	case VIRTIO_PCI_COMMON_GF:
1580 		switch (vs->vs_gfselect) {
1581 		case 0:
1582 			capval = value;
1583 			vs->vs_negotiated_caps =
1584 			    (VI_HIGH(vs->vs_negotiated_caps) << 32) | value;
1585 			break;
1586 		case 1:
1587 			capval = value << 32;
1588 			vs->vs_negotiated_caps =
1589 			    capval | VI_LOW(vs->vs_negotiated_caps);
1590 			break;
1591 		default:
1592 			capval = 0;
1593 			break;
1594 		}
1595 		vi_print_caps(vs, capval);
1596 
1597 		uint64_t hvfeat;
1598 		if (vc->vc_hv_features != NULL)
1599 			hvfeat = vc->vc_hv_features(DEV_SOFTC(vs), true);
1600 		else
1601 			hvfeat = vi_hv_features(vs, true);
1602 		vs->vs_negotiated_caps &= hvfeat;
1603 		break;
1604 	case VIRTIO_PCI_COMMON_MSIX:
1605 		vs->vs_msix_cfg_idx = value;
1606 		break;
1607 	case VIRTIO_PCI_COMMON_STATUS:
1608 		if (value == 0) {
1609 			DPRINTF(vs, "VIRTIO %s RESET", name);
1610 			(*vc->vc_reset)(DEV_SOFTC(vs));
1611 			vs->vs_status = value;
1612 			break;
1613 		}
1614 		if ((vs->vs_status & VIRTIO_CONFIG_S_FEATURES_OK) == 0 &&
1615 		    (value & VIRTIO_CONFIG_S_FEATURES_OK) != 0) {
1616 			if (vc->vc_apply_features != NULL) {
1617 				(*vc->vc_apply_features)(DEV_SOFTC(vs),
1618 				    &vs->vs_negotiated_caps);
1619 			}
1620 			DPRINTF(vs, "NEGOTIATED FEATURES 0x%" PRIx64 " (%s)",
1621 			    vs->vs_negotiated_caps,
1622 			    vi_is_modern(vs) ? "modern" : "legacy");
1623 			vi_print_caps(vs, vs->vs_negotiated_caps);
1624 		}
1625 		vs->vs_status = value;
1626 		break;
1627 	case VIRTIO_PCI_COMMON_Q_SELECT:
1628 		if (value >= vc->vc_nvq) {
1629 			EPRINTLN("%s: queue select %d out of range",
1630 			    name, (int)value);
1631 			goto bad_write;
1632 		}
1633 		vs->vs_curq = value;
1634 		break;
1635 	case VIRTIO_PCI_COMMON_Q_SIZE:
1636 		/*
1637 		 * If the guest has passed us a queue size that is not a power
1638 		 * of two, something is very wrong.
1639 		 */
1640 		if (!ISP2(value)) {
1641 			vi_error(vs, "Bad queue size 0x%" PRIx64
1642 			    " for qid 0x%x, not power of 2",
1643 			    value, vq->vq_num);
1644 		} else {
1645 			vq->vq_qsize = value;
1646 		}
1647 		break;
1648 	case VIRTIO_PCI_COMMON_Q_MSIX:
1649 		vq->vq_msix_idx = value;
1650 		if (vc->vc_set_msix != NULL)
1651 			vc->vc_set_msix(DEV_SOFTC(vs), vs->vs_curq);
1652 		break;
1653 	case VIRTIO_PCI_COMMON_Q_ENABLE:
1654 		if ((vq->vq_flags & VQ_ENABLED) == 0 && value == 1) {
1655 			if (vc->vc_qinit != NULL)
1656 				vc->vc_qinit(DEV_SOFTC(vs), 0, true);
1657 			else
1658 				vi_vq_init(vs);
1659 			vq->vq_flags |= VQ_ENABLED;
1660 		} else if (value == 0) {
1661 			/*
1662 			 * The driver is not permitted to write a 0 to this
1663 			 * register. We choose to ignore it rather than fault
1664 			 * the device.
1665 			 */
1666 		}
1667 		break;
1668 	case VIRTIO_PCI_COMMON_Q_DESCLO:
1669 		vq->vq_desc_gpa = (VI_HIGH(vq->vq_desc_gpa) << 32) | value;
1670 		break;
1671 	case VIRTIO_PCI_COMMON_Q_DESCHI:
1672 		vq->vq_desc_gpa = (value << 32) | VI_LOW(vq->vq_desc_gpa);
1673 		break;
1674 	case VIRTIO_PCI_COMMON_Q_AVAILLO:
1675 		vq->vq_avail_gpa = (VI_HIGH(vq->vq_avail_gpa) << 32) | value;
1676 		break;
1677 	case VIRTIO_PCI_COMMON_Q_AVAILHI:
1678 		vq->vq_avail_gpa = (value << 32) | VI_LOW(vq->vq_avail_gpa);
1679 		break;
1680 	case VIRTIO_PCI_COMMON_Q_USEDLO:
1681 		vq->vq_used_gpa = (VI_HIGH(vq->vq_used_gpa) << 32) | value;
1682 		break;
1683 	case VIRTIO_PCI_COMMON_Q_USEDHI:
1684 		vq->vq_used_gpa = (value << 32) | VI_LOW(vq->vq_used_gpa);
1685 		break;
1686 	default:
1687 		EPRINTLN("%s: write to bad offset/size %jd/%d", name,
1688 		    (uintmax_t)offset, size);
1689 		goto bad_write;
1690 	}
1691 
1692 	return;
1693 
1694 bad_write:
1695 	return;
1696 }
1697 
1698 /*
1699  * Virtio modern:
1700  * Handle pci MMIO reads to the notification structure.
1701  *
1702  * Reading the structure always returns zero.
1703  */
1704 static uint64_t
vi_pci_notify_cfg_read(struct virtio_softc * vs,uint64_t offset,int size)1705 vi_pci_notify_cfg_read(struct virtio_softc *vs, uint64_t offset, int size)
1706 {
1707 	return (0);
1708 }
1709 
1710 /*
1711  * Virtio modern:
1712  * Handle pci MMIO writes to the notification structure.
1713  *
1714  * VIRTIO_F_NOTIFICATION_DATA is not a feature that this device advertises
1715  * so we only need to consider the simple case where the vq index is written
1716  * into the registers.
1717  */
1718 static void
vi_pci_notify_cfg_write(struct virtio_softc * vs,uint64_t offset,int size,uint64_t value)1719 vi_pci_notify_cfg_write(struct virtio_softc *vs, uint64_t offset, int size,
1720     uint64_t value)
1721 {
1722 	struct virtio_consts *vc = vs->vs_vc;
1723 	const char *name = vc->vc_name;
1724 	unsigned int qid = value;
1725 	struct vqueue_info *vq;
1726 
1727 	DPRINTF(vs, "VIRTIO %s notify VQ 0x%x offset 0x%x",
1728 	    name, qid, offset);
1729 
1730 	if (size != 2) {
1731 		EPRINTLN("%s: bad size 0x%x access at offset 0x%" PRIx64,
1732 		    name, size, offset);
1733 		return;
1734 	}
1735 
1736 	if ((vs->vs_status & VIRTIO_CONFIG_STATUS_DRIVER_OK) == 0) {
1737 		EPRINTLN("%s: attempt to use VQ 0x%x before DRIVER_OK, "
1738 		    "driver confused?", name, qid);
1739 		return;
1740 	}
1741 
1742 	if ((vs->vs_flags & VIRTIO_BROKEN) != 0) {
1743 		EPRINTLN("%s: attempt to use VQ 0x%x while VIRTIO device is "
1744 		    "flagged as broken", name, qid);
1745 		return;
1746 	}
1747 
1748 	if (offset != qid * VQ_NOTIFY_OFF_MULTIPLIER) {
1749 		EPRINTLN(
1750 		    "%s: VQ 0x%x notify does not have matching offset at 0x%"
1751 		    PRIx64, name, qid, offset);
1752 		return;
1753 	}
1754 
1755 	if (qid >= vc->vc_nvq) {
1756 		EPRINTLN("%s: VQ 0x%x notify out of range", name, qid);
1757 		return;
1758 	}
1759 
1760 	vq = &vs->vs_queues[qid];
1761 	if ((vq->vq_flags & VQ_ENABLED) == 0)
1762 		return;
1763 	if (vq->vq_notify != NULL)
1764 		(*vq->vq_notify)(DEV_SOFTC(vs), vq);
1765 	else if (vc->vc_qnotify != NULL)
1766 		(*vc->vc_qnotify)(DEV_SOFTC(vs), vq);
1767 	else
1768 		EPRINTLN("%s: qnotify VQ 0x%x: no vq/vc notify", name, qid);
1769 }
1770 
1771 /*
1772  * Virtio modern:
1773  * Handle pci MMIO reads to ISR structure.
1774  *
1775  * The ISR structure has a relaxed requirement on alignment.
1776  */
1777 static uint64_t
vi_pci_isr_cfg_read(struct virtio_softc * vs,uint64_t offset,int size)1778 vi_pci_isr_cfg_read(struct virtio_softc *vs, uint64_t offset, int size)
1779 {
1780 	uint64_t value;
1781 
1782 	if (offset != 0)
1783 		return (0);
1784 
1785 	value = vs->vs_isr;
1786 	vs->vs_isr = 0;
1787 	if (value != 0) {
1788 		DPRINTF(vs, "VIRTIO ISR read[0x%" PRIx64 "] = 0x%x",
1789 		    offset, value);
1790 		pci_lintr_deassert(vs->vs_pi);
1791 	}
1792 	return (value);
1793 }
1794 
1795 /*
1796  * Virtio modern:
1797  * pci MMIO writes to ISR structure are disallowed.
1798  */
1799 static void
vi_pci_isr_cfg_write(struct virtio_softc * vs,uint64_t offset,int size,uint64_t value)1800 vi_pci_isr_cfg_write(struct virtio_softc *vs, uint64_t offset, int size,
1801     uint64_t value)
1802 {
1803 	const char *name = vs->vs_vc->vc_name;
1804 
1805 	EPRINTLN("%s: invalid write into isr cfg", name);
1806 }
1807 
1808 /*
1809  * Virtio modern:
1810  * Handle pci MMIO reads to device-specific config structure.
1811  */
1812 static uint64_t
vi_pci_dev_cfg_read(struct virtio_softc * vs,uint64_t offset,int size)1813 vi_pci_dev_cfg_read(struct virtio_softc *vs, uint64_t offset, int size)
1814 {
1815 	struct virtio_consts *vc = vs->vs_vc;
1816 	uint32_t value = VI_MASK(size);
1817 
1818 	if (offset + size > vc->vc_cfgsize)
1819 		return (value);
1820 
1821 	vc->vc_cfgread(DEV_SOFTC(vs), offset, size, &value);
1822 	DPRINTF(vs, "VIRTIO %s PCI devcfg read[0x%" PRIx64 "] = 0x%x",
1823 	    vs->vs_vc->vc_name, offset, value);
1824 	return (value);
1825 }
1826 
1827 /*
1828  * Virtio modern:
1829  * Handle pci MMIO writes to device-specific config structure.
1830  */
1831 static void
vi_pci_dev_cfg_write(struct virtio_softc * vs,uint64_t offset,int size,uint64_t value)1832 vi_pci_dev_cfg_write(struct virtio_softc *vs, uint64_t offset, int size,
1833     uint64_t value)
1834 {
1835 	struct virtio_consts *vc = vs->vs_vc;
1836 
1837 	value &= VI_MASK(size);
1838 
1839 	if (offset + size > vc->vc_cfgsize)
1840 		return;
1841 	if (vc->vc_cfgwrite != NULL)
1842 		vc->vc_cfgwrite(DEV_SOFTC(vs), offset, size, value);
1843 	DPRINTF(vs, "VIRTIO %s PCI devcfg write[0x%" PRIx64 "] = 0x%x",
1844 	    vs->vs_vc->vc_name, offset, value);
1845 }
1846 
1847 /*
1848  * Handle configuration space reads.
1849  */
1850 int
vi_pci_cfgread(struct pci_devinst * pi,int offset,int bytes,uint32_t * retval)1851 vi_pci_cfgread(struct pci_devinst *pi, int offset, int bytes, uint32_t *retval)
1852 {
1853 	struct virtio_softc *vs = pi->pi_arg;
1854 	virtio_pci_capcfg_t *cfg;
1855 	uint32_t baroff, barlen;
1856 	int baridx;
1857 
1858 	cfg = vi_pci_cfg_bycapaddr(vs, offset, bytes);
1859 
1860 	/* If this is not a VirtIO cap, use the default cfgspace handler */
1861 	if (cfg == NULL)
1862 		return (PE_CFGRW_DEFAULT);
1863 
1864 	/* Only the PCI cap has special handling */
1865 	if (cfg->c_captype != VIRTIO_PCI_CAP_PCI_CFG)
1866 		return (PE_CFGRW_DEFAULT);
1867 
1868 	/* and then only the data field */
1869 	if (offset != vs->vs_pcicap->c_capoff +
1870 	    offsetof(struct virtio_pci_cfg_cap, pci_cfg_data)) {
1871 		return (PE_CFGRW_DEFAULT);
1872 	}
1873 
1874 	if (bytes != 1 && bytes != 2 && bytes != 4)
1875 		return (PE_CFGRW_DROP);
1876 
1877 	if (vs->vs_mtx)
1878 		pthread_mutex_lock(vs->vs_mtx);
1879 
1880 	baridx = pci_get_cfgdata8(pi,
1881 	    offset + offsetof(struct virtio_pci_cap, bar));
1882 	baroff = pci_get_cfgdata32(pi,
1883 	    offset + offsetof(struct virtio_pci_cap, offset));
1884 	barlen = pci_get_cfgdata32(pi,
1885 	    offset + offsetof(struct virtio_pci_cap, length));
1886 	if (baridx > PCIR_MAX_BAR_0) {
1887 		*retval = VI_MASK(bytes);
1888 		goto done;
1889 	}
1890 	*retval = vi_modern_pci_read(vs, baridx, baroff, barlen);
1891 
1892 done:
1893 	if (vs->vs_mtx)
1894 		pthread_mutex_unlock(vs->vs_mtx);
1895 
1896 	DPRINTF(vs, "VIRTIO %s PCI READ BAR%u[0x%x+%x] = 0x%x",
1897 	    vs->vs_vc->vc_name, baridx, baroff, barlen, *retval);
1898 
1899 	return (PE_CFGRW_DROP);
1900 }
1901 
1902 /*
1903  * Handle configuration space writes.
1904  */
1905 int
vi_pci_cfgwrite(struct pci_devinst * pi,int offset,int bytes,uint32_t val)1906 vi_pci_cfgwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val)
1907 {
1908 	struct virtio_softc *vs = pi->pi_arg;
1909 	virtio_pci_capcfg_t *cfg;
1910 	uint32_t baroff, barlen;
1911 	int baridx;
1912 
1913 	cfg = vi_pci_cfg_bycapaddr(vs, offset, bytes);
1914 
1915 	/* If this is not a VirtIO cap, use the default cfgspace handler */
1916 	if (cfg == NULL)
1917 		return (PE_CFGRW_DEFAULT);
1918 
1919 	/* Only the PCI VirtIO cap can be written to */
1920 	if (cfg->c_captype != VIRTIO_PCI_CAP_PCI_CFG)
1921 		return (PE_CFGRW_DROP);
1922 
1923 	/* and then only the data field needs special handling */
1924 	if (offset != vs->vs_pcicap->c_capoff +
1925 	    offsetof(struct virtio_pci_cfg_cap, pci_cfg_data)) {
1926 		return (PE_CFGRW_DEFAULT);
1927 	}
1928 
1929 	if (bytes != 1 && bytes != 2 && bytes != 4)
1930 		return (PE_CFGRW_DROP);
1931 
1932 	if (vs->vs_mtx)
1933 		pthread_mutex_lock(vs->vs_mtx);
1934 
1935 	baridx = pci_get_cfgdata8(pi,
1936 	    offset + offsetof(struct virtio_pci_cap, bar));
1937 	baroff = pci_get_cfgdata32(pi,
1938 	    offset + offsetof(struct virtio_pci_cap, offset));
1939 	barlen = pci_get_cfgdata32(pi,
1940 	    offset + offsetof(struct virtio_pci_cap, length));
1941 	if (baridx > PCIR_MAX_BAR_0)
1942 		goto done;
1943 	vi_modern_pci_write(vs, baridx, baroff, barlen, val);
1944 
1945 done:
1946 	if (vs->vs_mtx)
1947 		pthread_mutex_unlock(vs->vs_mtx);
1948 
1949 	DPRINTF(vs, "VIRTIO %s PCI WRITE BAR%x[0x%x+%x] = 0x%x",
1950 	    vs->vs_vc->vc_name, baridx, baroff, barlen, val);
1951 
1952 	return (PE_CFGRW_DROP);
1953 }
1954 
1955 /*
1956  * Handle pci config space reads to virtio-related structures
1957  */
1958 static uint64_t
vi_modern_pci_read(struct virtio_softc * vs,int baridx,uint64_t offset,int size)1959 vi_modern_pci_read(struct virtio_softc *vs, int baridx, uint64_t offset,
1960     int size)
1961 {
1962 	virtio_pci_capcfg_t *cfg;
1963 	uint64_t value = VI_MASK(size);
1964 
1965 	cfg = vi_pci_cfg_bybaraddr(vs, baridx, offset, size);
1966 	if (cfg == NULL)
1967 		return (value);
1968 
1969 	offset -= cfg->c_baroff;
1970 
1971 	switch (cfg->c_captype) {
1972 	case VIRTIO_PCI_CAP_COMMON_CFG:
1973 		value = vi_pci_common_cfg_read(vs, offset, size);
1974 		break;
1975 	case VIRTIO_PCI_CAP_NOTIFY_CFG:
1976 		value = vi_pci_notify_cfg_read(vs, offset, size);
1977 		break;
1978 	case VIRTIO_PCI_CAP_ISR_CFG:
1979 		value = vi_pci_isr_cfg_read(vs, offset, size);
1980 		break;
1981 	case VIRTIO_PCI_CAP_DEVICE_CFG:
1982 		value = vi_pci_dev_cfg_read(vs, offset, size);
1983 		break;
1984 	default:
1985 		break;
1986 	}
1987 
1988 	return (value);
1989 }
1990 
1991 /*
1992  * Handle pci config space reads to virtio-related structures
1993  */
1994 static void
vi_modern_pci_write(struct virtio_softc * vs,int baridx,uint64_t offset,int size,uint64_t value)1995 vi_modern_pci_write(struct virtio_softc *vs, int baridx, uint64_t offset,
1996     int size, uint64_t value)
1997 {
1998 	virtio_pci_capcfg_t *cfg;
1999 
2000 	cfg = vi_pci_cfg_bybaraddr(vs, baridx, offset, size);
2001 	if (cfg == NULL)
2002 		return;
2003 
2004 	offset -= cfg->c_baroff;
2005 
2006 	switch (cfg->c_captype) {
2007 	case VIRTIO_PCI_CAP_COMMON_CFG:
2008 		vi_pci_common_cfg_write(vs, offset, size, value);
2009 		break;
2010 	case VIRTIO_PCI_CAP_NOTIFY_CFG:
2011 		vi_pci_notify_cfg_write(vs, offset, size, value);
2012 		break;
2013 	case VIRTIO_PCI_CAP_ISR_CFG:
2014 		vi_pci_isr_cfg_write(vs, offset, size, value);
2015 		break;
2016 	case VIRTIO_PCI_CAP_DEVICE_CFG:
2017 		vi_pci_dev_cfg_write(vs, offset, size, value);
2018 		break;
2019 	}
2020 }
2021 
2022 /*
2023  * Handle virtio bar reads.
2024  *
2025  * If it's to the MSI-X info, dispatch the reads to the msix handling code.
2026  * Otherwise, dispatch the reads to virtio device code.
2027  */
2028 uint64_t
vi_pci_read(struct pci_devinst * pi,int baridx,uint64_t offset,int size)2029 vi_pci_read(struct pci_devinst *pi, int baridx, uint64_t offset, int size)
2030 {
2031 	struct virtio_softc *vs = pi->pi_arg;
2032 	uint64_t value;
2033 
2034 	if ((vs->vs_flags & VIRTIO_USE_MSIX) != 0 &&
2035 	    (baridx == pci_msix_table_bar(pi) ||
2036 	    baridx == pci_msix_pba_bar(pi))) {
2037 		return (pci_emul_msix_tread(pi, offset, size));
2038 	}
2039 
2040 	if (vs->vs_mtx)
2041 		pthread_mutex_lock(vs->vs_mtx);
2042 
2043 	value = VI_MASK(size);
2044 
2045 	if (size != 1 && size != 2 && size != 4)
2046 		goto done;
2047 
2048 	switch (baridx) {
2049 	case VIRTIO_LEGACY_BAR:
2050 		value = vi_legacy_pci_read(vs, offset, size);
2051 		break;
2052 	case VIRTIO_MODERN_BAR:
2053 		value = vi_modern_pci_read(vs, baridx, offset, size);
2054 		break;
2055 	default:
2056 		break;
2057 	}
2058 
2059 done:
2060 	if (vs->vs_mtx)
2061 		pthread_mutex_unlock(vs->vs_mtx);
2062 	return (value);
2063 }
2064 
2065 /*
2066  * Handle virtio bar writes.
2067  *
2068  * If it's to the MSI-X info, dispatch the writes to the msix handling code.
2069  * Otherwise, dispatch the writes to virtio device code.
2070  */
2071 void
vi_pci_write(struct pci_devinst * pi,int baridx,uint64_t offset,int size,uint64_t value)2072 vi_pci_write(struct pci_devinst *pi, int baridx, uint64_t offset, int size,
2073     uint64_t value)
2074 {
2075 	struct virtio_softc *vs = pi->pi_arg;
2076 	struct virtio_consts *vc = vs->vs_vc;
2077 
2078 	if ((vs->vs_flags & VIRTIO_USE_MSIX) != 0 &&
2079 	    (baridx == pci_msix_table_bar(pi) ||
2080 	    baridx == pci_msix_pba_bar(pi))) {
2081 		if (pci_emul_msix_twrite(pi, offset, size, value) == 0 &&
2082 		    vc->vc_update_msix != NULL) {
2083 			vc->vc_update_msix(DEV_SOFTC(vs), offset);
2084 		}
2085 		return;
2086 	}
2087 
2088 	if (vs->vs_mtx)
2089 		pthread_mutex_lock(vs->vs_mtx);
2090 
2091 	if (size != 1 && size != 2 && size != 4)
2092 		goto done;
2093 
2094 	switch (baridx) {
2095 	case VIRTIO_LEGACY_BAR:
2096 		vi_legacy_pci_write(vs, offset, size, value);
2097 		break;
2098 	case VIRTIO_MODERN_BAR:
2099 		vi_modern_pci_write(vs, baridx, offset, size, value);
2100 		break;
2101 	default:
2102 		break;
2103 	}
2104 
2105 done:
2106 	if (vs->vs_mtx)
2107 		pthread_mutex_unlock(vs->vs_mtx);
2108 }
2109