1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2013 Chris Torek <torek @ torek net>
5 * All rights reserved.
6 * Copyright (c) 2019 Joyent, Inc.
7 * Copyright (c) 2021 The FreeBSD Foundation
8 *
9 * Portions of this software were developed by Ka Ho Ng
10 * under sponsorship of the FreeBSD Foundation.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33 /*
34 * This file and its contents are supplied under the terms of the
35 * Common Development and Distribution License ("CDDL"), version 1.0.
36 * You may only use this file in accordance with the terms of version
37 * 1.0 of the CDDL.
38 *
39 * A full copy of the text of the CDDL should have accompanied this
40 * source. A copy of the CDDL is also available via the Internet at
41 * http://www.illumos.org/license/CDDL.
42 */
43 /* This file is dual-licensed; see usr/src/contrib/bhyve/LICENSE */
44
45 /*
46 * Copyright 2026 Oxide Computer Company
47 */
48
49 #include <sys/stdbool.h>
50 #include <sys/param.h>
51 #include <sys/uio.h>
52 #include <sys/sysmacros.h>
53 #include <sys/debug.h>
54
55 #include <machine/atomic.h>
56
57 #include <inttypes.h>
58 #include <stddef.h>
59 #include <stdio.h>
60 #include <stdint.h>
61 #include <stdarg.h>
62 #include <string.h>
63 #include <pthread.h>
64 #include <pthread_np.h>
65
66 #include "bhyverun.h"
67 #include "config.h"
68 #include "debug.h"
69 #include "pci_emul.h"
70 #include "virtio.h"
71
72 /*
73 * Functions for dealing with generalized "virtual devices" as
74 * defined by <https://www.google.com/#output=search&q=virtio+spec>
75 *
76 * The reference for the implementation of virtio modern is on
77 * <https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/>
78 */
79
80 #define DPRINTF(vs, fmt, arg...) \
81 do { \
82 if ((((vs)->vs_flags) & VIRTIO_DEBUG) != 0) { \
83 FPRINTLN(stdout, fmt, ##arg); \
84 fflush(stdout); \
85 } \
86 } while (0)
87
88 #define VQ_NOTIFY_OFF_MULTIPLIER sizeof (uint32_t)
89
90 /*
91 * In case we decide to relax the "virtio softc comes at the
92 * front of virtio-based device softc" constraint, let's use
93 * this to convert.
94 */
95 #define DEV_SOFTC(vs) ((void *)(vs))
96
97 #define VI_MASK(nbytes) \
98 (((nbytes) >= 4) ? 0xFFFFFFFFu : (~0u >> (32 - 8 * (nbytes))))
99
100 static uint64_t vi_modern_pci_read(struct virtio_softc *, int, uint64_t, int);
101 static void vi_modern_pci_write(struct virtio_softc *, int, uint64_t, int,
102 uint64_t);
103
104 void
vi_queue_linkup(struct virtio_softc * vs,struct vqueue_info * queues)105 vi_queue_linkup(struct virtio_softc *vs, struct vqueue_info *queues)
106 {
107 struct virtio_consts *vc = vs->vs_vc;
108
109 vs->vs_queues = queues;
110
111 for (int i = 0; i < vc->vc_nvq; i++) {
112 vs->vs_queues[i].vq_vs = vs;
113 vs->vs_queues[i].vq_num = i;
114 }
115 }
116
117 /*
118 * Link a virtio_softc to its constants, the device softc, and
119 * the PCI emulation.
120 */
121 void
vi_softc_linkup(struct virtio_softc * vs,struct virtio_consts * vc,void * dev_softc,struct pci_devinst * pi,struct vqueue_info * queues)122 vi_softc_linkup(struct virtio_softc *vs, struct virtio_consts *vc,
123 void *dev_softc, struct pci_devinst *pi, struct vqueue_info *queues)
124 {
125 /* vs and dev_softc addresses must match */
126 assert((void *)vs == dev_softc);
127 vs->vs_vc = vc;
128 vs->vs_pi = pi;
129 pi->pi_arg = vs;
130
131 vi_queue_linkup(vs, queues);
132 }
133
134 /*
135 * Reset device (device-wide). This erases all queues, i.e.,
136 * all the queues become invalid (though we don't wipe out the
137 * internal pointers, we just clear the VQ_ALLOC flag).
138 *
139 * It resets negotiated features to "none".
140 *
141 * If MSI-X is enabled, this also resets all the vectors to NO_VECTOR.
142 */
143 void
vi_reset_dev(struct virtio_softc * vs)144 vi_reset_dev(struct virtio_softc *vs)
145 {
146 struct vqueue_info *vq;
147 int i, nvq;
148
149 if (vs->vs_mtx)
150 assert(pthread_mutex_isowned_np(vs->vs_mtx));
151
152 nvq = vs->vs_vc->vc_nvq;
153 for (vq = vs->vs_queues, i = 0; i < nvq; vq++, i++) {
154 vq->vq_flags = 0;
155 vq->vq_last_avail = 0;
156 vq->vq_next_used = 0;
157 vq->vq_save_used = 0;
158 vq->vq_pfn = 0;
159 vq->vq_desc_gpa = vq->vq_avail_gpa = vq->vq_used_gpa = 0;
160 vq->vq_msix_idx = VIRTIO_MSI_NO_VECTOR;
161 }
162 vs->vs_negotiated_caps = 0;
163 vs->vs_curq = 0;
164 if (vs->vs_isr != 0)
165 pci_lintr_deassert(vs->vs_pi);
166 vs->vs_isr = 0;
167 vs->vs_msix_cfg_idx = VIRTIO_MSI_NO_VECTOR;
168 }
169
170 /*
171 * These are the capability bits common to all virtio devices.
172 */
173 static const virtio_capstr_t virtio_caps[] = {
174 { VIRTIO_F_NOTIFY_ON_EMPTY, "VIRTIO_F_NOTIFY_ON_EMPTY" },
175 { VIRTIO_F_ANY_LAYOUT, "VIRTIO_F_ANY_LAYOUT" },
176 { VIRTIO_RING_F_INDIRECT_DESC, "VIRTIO_RING_F_INDIRECT_DESC" },
177 { VIRTIO_RING_F_EVENT_IDX, "VIRTIO_RING_F_EVENT_IDX" },
178 { VIRTIO_F_BAD_FEATURE, "VIRTIO_F_BAD_FEATURE" },
179 { VIRTIO_F_VERSION_1, "VIRTIO_F_VERSION_1" },
180 };
181
182 static void
vi_print_caps(struct virtio_softc * vs,uint64_t caps)183 vi_print_caps(struct virtio_softc *vs, uint64_t caps)
184 {
185 struct virtio_consts *vc = vs->vs_vc;
186
187 if ((vs->vs_flags & VIRTIO_DEBUG) == 0)
188 return;
189
190 for (size_t i = 0; i < vc->vc_ncapstr; i++) {
191 if ((caps & vc->vc_capstr[i].vp_flag) != 0)
192 FPRINTLN(stdout, " -> %s", vc->vc_capstr[i].vp_name);
193 }
194 for (size_t i = 0; i < ARRAY_SIZE(virtio_caps); i++) {
195 if ((caps & virtio_caps[i].vp_flag) != 0)
196 FPRINTLN(stdout, " -> %s", virtio_caps[i].vp_name);
197 }
198 fflush(stdout);
199 }
200
201 void
vi_set_debug(struct virtio_softc * vs,bool debug)202 vi_set_debug(struct virtio_softc *vs, bool debug)
203 {
204 if (debug)
205 vs->vs_flags |= VIRTIO_DEBUG;
206 else
207 vs->vs_flags &= ~VIRTIO_DEBUG;
208 }
209
210 bool
vi_is_modern(struct virtio_softc * vs)211 vi_is_modern(struct virtio_softc *vs)
212 {
213 return (vs->vs_negotiated_caps & VIRTIO_F_VERSION_1) != 0;
214 }
215
216 void __PRINTFLIKE(2)
vi_error(struct virtio_softc * vs,const char * fmt,...)217 vi_error(struct virtio_softc *vs, const char *fmt, ...)
218 {
219 va_list ap;
220
221 va_start(ap, fmt);
222 vfprintf(stderr, fmt, ap);
223 fprintf(stderr, "%s", raw_stdio ? "\r\n" : "\n");
224 va_end(ap);
225
226 if (vi_is_modern(vs)) {
227 vs->vs_status |= VTCFG_STATUS_NEEDS_RST;
228 vq_devcfg_changed(vs);
229 }
230
231 vs->vs_flags |= VIRTIO_BROKEN;
232 }
233
234 /*
235 * Set I/O BAR (usually 0) to map legacy PCI config registers.
236 */
237 static bool
vi_legacy_iobar_setup(struct virtio_softc * vs,int barnum)238 vi_legacy_iobar_setup(struct virtio_softc *vs, int barnum)
239 {
240 size_t size;
241
242 /*
243 * We set the size to that which will accommodate the configuration
244 * space with MSI-X enabled, plus the configuration size.
245 */
246 size = VIRTIO_PCI_CONFIG_OFF(1) + vs->vs_vc->vc_cfgsize;
247 if (pci_emul_alloc_bar(vs->vs_pi, barnum, PCIBAR_IO, size) != 0)
248 return (false);
249
250 return (true);
251 }
252
253 virtio_pci_capcfg_t *
vi_pci_cfg_bytype(struct virtio_softc * vs,uint8_t cfgtype)254 vi_pci_cfg_bytype(struct virtio_softc *vs, uint8_t cfgtype)
255 {
256 for (uint_t i = 0; i < vs->vs_ncaps; i++) {
257 if (vs->vs_caps[i].c_captype == cfgtype)
258 return (&vs->vs_caps[i]);
259 }
260 return (NULL);
261 }
262
263 virtio_pci_capcfg_t *
vi_pci_cfg_bycapaddr(struct virtio_softc * vs,uint32_t start,uint32_t size)264 vi_pci_cfg_bycapaddr(struct virtio_softc *vs, uint32_t start, uint32_t size)
265 {
266 if (size == 0 || start > UINT32_MAX - size)
267 return (NULL);
268
269 const uint32_t end = start + size;
270
271 for (uint_t i = 0; i < vs->vs_ncaps; i++) {
272 virtio_pci_capcfg_t *cfg = &vs->vs_caps[i];
273 const uint32_t cap_start = cfg->c_capoff;
274 const uint32_t cap_end = cap_start + cfg->c_caplen;
275
276 if (cap_start <= start && end <= cap_end)
277 return (cfg);
278 }
279
280 return (NULL);
281 }
282
283 virtio_pci_capcfg_t *
vi_pci_cfg_bybaraddr(struct virtio_softc * vs,uint8_t bar,uint64_t offset,uint32_t size)284 vi_pci_cfg_bybaraddr(struct virtio_softc *vs, uint8_t bar, uint64_t offset,
285 uint32_t size)
286 {
287 /*
288 * We currently don't use the larger capabilities introduced in VirtIO
289 * 1.2 that allow for 64-bit offsets and sizes.
290 */
291 if (size == 0 || offset > UINT32_MAX - size)
292 return (NULL);
293
294 const uint32_t end = offset + size;
295
296 for (uint_t i = 0; i < vs->vs_ncaps; i++) {
297 virtio_pci_capcfg_t *cfg = &vs->vs_caps[i];
298
299 if (cfg->c_baridx != bar)
300 continue;
301
302 const uint32_t bar_start = cfg->c_baroff;
303 const uint32_t bar_end = bar_start + cfg->c_barlen;
304
305 if (bar_start <= offset && end <= bar_end)
306 return (cfg);
307 }
308
309 return (NULL);
310 }
311
312 /*
313 * Add a modern configuration structure capability.
314 */
315 static bool
vi_modern_add_cfg(struct virtio_softc * vs,struct virtio_pci_cap * cap,int barnum,uint32_t baroff,uint32_t barlen,uint8_t caplen,uint8_t cfgtype)316 vi_modern_add_cfg(struct virtio_softc *vs, struct virtio_pci_cap *cap,
317 int barnum, uint32_t baroff, uint32_t barlen, uint8_t caplen,
318 uint8_t cfgtype)
319 {
320 int capoff;
321
322 cap->cap_vndr = PCIY_VENDOR;
323 cap->cap_len = caplen;
324 cap->cfg_type = cfgtype;
325 cap->bar = barnum;
326 cap->id = 0;
327 cap->offset = baroff;
328 cap->length = barlen;
329 if (pci_emul_add_capability(vs->vs_pi, (u_char *)cap, caplen,
330 &capoff) != 0) {
331 return (false);
332 }
333
334 vs->vs_caps[vs->vs_ncaps].c_captype = cfgtype;
335 vs->vs_caps[vs->vs_ncaps].c_baridx = cap->bar;
336 vs->vs_caps[vs->vs_ncaps].c_baroff = cap->offset;
337 vs->vs_caps[vs->vs_ncaps].c_barlen = cap->length;
338 vs->vs_caps[vs->vs_ncaps].c_capoff = capoff;
339 vs->vs_caps[vs->vs_ncaps].c_caplen = caplen;
340 vs->vs_ncaps++;
341 VERIFY3U(vs->vs_ncaps, <=, sizeof (vs->vs_caps));
342
343 return (true);
344 }
345
346 /*
347 * Add COMMON_CFG configuration structure capability.
348 */
349 static bool
vi_modern_add_common_cfg(struct virtio_softc * vs,int barnum,uint32_t * offp)350 vi_modern_add_common_cfg(struct virtio_softc *vs, int barnum, uint32_t *offp)
351 {
352 struct virtio_pci_cap cap;
353 uint32_t bardatalen;
354
355 *offp = roundup2(*offp, VIRTIO_PCI_CAP_COMMON_CFG_ALIGN);
356 /*
357 * We choose to round this BAR area up to a page size in common with
358 * other hypervisors.
359 */
360 bardatalen = roundup2(sizeof (struct virtio_pci_common_cfg), PAGE_SIZE);
361
362 memset(&cap, 0, sizeof (cap));
363 if (vi_modern_add_cfg(vs, &cap, barnum, *offp, bardatalen, sizeof (cap),
364 VIRTIO_PCI_CAP_COMMON_CFG)) {
365 *offp += bardatalen;
366 return (true);
367 }
368 return (false);
369 }
370
371 /*
372 * Add NOTIFY_CFG configuration structure capability.
373 */
374 static bool
vi_modern_add_notify_cfg(struct virtio_softc * vs,int barnum,uint32_t * offp)375 vi_modern_add_notify_cfg(struct virtio_softc *vs, int barnum, uint32_t *offp)
376 {
377 struct virtio_pci_notify_cap cap;
378 struct virtio_consts *vc = vs->vs_vc;
379 int numq = MAX(vc->vc_max_nvq, vc->vc_nvq);
380 uint32_t bardatalen;
381
382 VERIFY3S(numq, >, 0);
383 VERIFY3S(numq, <=, UINT16_MAX);
384
385 *offp = roundup2(*offp, VIRTIO_PCI_CAP_NOTIFY_CFG_ALIGN);
386 /*
387 * We choose to round this BAR area up to a page size in common with
388 * other hypervisors.
389 */
390 uint64_t datalen = (uint64_t)numq * VQ_NOTIFY_OFF_MULTIPLIER;
391 VERIFY3U(datalen, <=, UINT32_MAX - (PAGE_SIZE - 1));
392 bardatalen = roundup2((uint32_t)datalen, PAGE_SIZE);
393
394 memset(&cap, 0, sizeof (cap));
395 cap.notify_off_multiplier = VQ_NOTIFY_OFF_MULTIPLIER;
396 if (vi_modern_add_cfg(vs, &cap.cap, barnum, *offp, bardatalen,
397 sizeof (cap), VIRTIO_PCI_CAP_NOTIFY_CFG)) {
398 *offp += bardatalen;
399 return (true);
400 }
401 return (false);
402 }
403
404 /*
405 * Add ISR_CFG configuration structure capability.
406 */
407 static bool
vi_modern_add_isr_cfg(struct virtio_softc * vs,int barnum,uint32_t * offp)408 vi_modern_add_isr_cfg(struct virtio_softc *vs, int barnum, uint32_t *offp)
409 {
410 struct virtio_pci_cap cap;
411 uint32_t bardatalen;
412
413 *offp = roundup2(*offp, VIRTIO_PCI_CAP_ISR_CFG_ALIGN);
414 /*
415 * While this capability could point to a single byte in the BAR, we
416 * choose to round up to a page in common with other hypervisors.
417 */
418 bardatalen = PAGE_SIZE;
419
420 memset(&cap, 0, sizeof (cap));
421 if (vi_modern_add_cfg(vs, &cap, barnum, *offp, bardatalen, sizeof (cap),
422 VIRTIO_PCI_CAP_ISR_CFG)) {
423 *offp += bardatalen;
424 return (true);
425 }
426 return (false);
427 }
428
429 /*
430 * Add DEV_CFG configuration structure capability.
431 */
432 static bool
vi_modern_add_dev_cfg(struct virtio_softc * vs,int barnum,uint32_t * offp)433 vi_modern_add_dev_cfg(struct virtio_softc *vs, int barnum, uint32_t *offp)
434 {
435 struct virtio_pci_cap cap;
436 uint32_t bardatalen;
437
438 *offp = roundup2(*offp, VIRTIO_PCI_CAP_DEVICE_CFG_ALIGN);
439 /*
440 * We choose to round this BAR area up to a page size in common with
441 * other hypervisors.
442 */
443 bardatalen = PAGE_SIZE;
444
445 memset(&cap, 0, sizeof (cap));
446 if (vi_modern_add_cfg(vs, &cap, barnum, *offp, bardatalen, sizeof (cap),
447 VIRTIO_PCI_CAP_DEVICE_CFG)) {
448 *offp += bardatalen;
449 return (true);
450 }
451 return (false);
452 }
453
454 /*
455 * Add PCI_CFG configuration structure capability.
456 */
457 static bool
vi_modern_add_pci_cfg(struct virtio_softc * vs)458 vi_modern_add_pci_cfg(struct virtio_softc *vs)
459 {
460 struct virtio_pci_cfg_cap cap;
461
462 memset(&cap, 0, sizeof (cap));
463 memset(cap.pci_cfg_data, 0xff, sizeof (cap.pci_cfg_data));
464 if (vi_modern_add_cfg(vs, &cap.cap, 0, 0, 0, sizeof (cap),
465 VIRTIO_PCI_CAP_PCI_CFG)) {
466 vs->vs_pcicap = &vs->vs_caps[vs->vs_ncaps - 1];
467 return (true);
468 }
469 return (false);
470 }
471
472 /*
473 * Set up Virtio modern device pci configuration space
474 */
475 static bool
vi_modern_membar_setup(struct virtio_softc * vs,int barnum)476 vi_modern_membar_setup(struct virtio_softc *vs, int barnum)
477 {
478 uint32_t baroff = 0;
479 bool ret = false;
480
481 ret |= vi_modern_add_common_cfg(vs, barnum, &baroff);
482 ret |= vi_modern_add_notify_cfg(vs, barnum, &baroff);
483 ret |= vi_modern_add_dev_cfg(vs, barnum, &baroff);
484 ret |= vi_modern_add_isr_cfg(vs, barnum, &baroff);
485 ret |= vi_modern_add_pci_cfg(vs);
486 if (!ret)
487 return (false);
488 if (pci_emul_alloc_bar(vs->vs_pi, barnum, PCIBAR_MEM64, baroff) != 0)
489 return (false);
490 return (true);
491 }
492
493 void
vi_pci_init(struct pci_devinst * pi,virtio_mode_t mode,uint16_t legacy,uint16_t device_id,uint8_t class)494 vi_pci_init(struct pci_devinst *pi, virtio_mode_t mode,
495 uint16_t legacy, uint16_t device_id, uint8_t class)
496 {
497 struct virtio_softc *vs = pi->pi_arg;
498
499 DPRINTF(vs, "VIRTIO %s PCI init mode=%x, legacy=0x%x devid=0x%x",
500 vs->vs_vc->vc_name, mode, legacy, device_id);
501
502 /*
503 * We provide global options to force transitional devices to present
504 * as pure legacy or modern. This is mostly to support testing guest
505 * drivers or bhyve itself.
506 *
507 * TRANSITIONAL mode usually exposes both interfaces
508 * - virtio.legacy=false forces a modern-only device
509 * - virtio.modern=false forces a legacy-only device
510 */
511 if (mode == VIRTIO_MODE_TRANSITIONAL) {
512 if (!get_config_bool_default("virtio.legacy", true))
513 mode = VIRTIO_MODE_MODERN;
514 else if (!get_config_bool_default("virtio.modern", true))
515 mode = VIRTIO_MODE_LEGACY;
516 }
517
518 vs->vs_mode = mode;
519
520 pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
521 pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
522 pci_set_cfgdata8(pi, PCIR_CLASS, class);
523
524 if (mode == VIRTIO_MODE_MODERN) {
525 /*
526 * Pure modern / non-transitional device.
527 *
528 * Virtio 1.2, 4.1.2.1:
529 * - PCI Device ID = 0x1040 + virtio device ID
530 * - PCI Revision ID> >= 1
531 * - PCI Subsystem Device ID >= 0x40
532 *
533 * `device_id` here is the virtio Device ID from section 5
534 * [0x0-0x3f].
535 */
536 VERIFY3U(device_id, <=, 0x3f);
537 pci_set_cfgdata16(pi, PCIR_DEVICE,
538 VIRTIO_PCI_DEVICEID_MODERN_MIN + device_id);
539 /*
540 * For modern devices the spec only recommends that the
541 * Subsystem Device ID be >= 0x40 to avoid legacy binding.
542 * We choose to mirror the main device ID here so that the
543 * (vendor,device) and (subvendor,subdevice) pairs line up.
544 */
545 pci_set_cfgdata16(pi, PCIR_SUBDEV_0,
546 VIRTIO_PCI_DEVICEID_MODERN_MIN + device_id);
547 pci_set_cfgdata16(pi, PCIR_REVID, 1);
548 } else {
549 /*
550 * Legacy-only or transitional device.
551 *
552 * For *transitional* devices, virtio 1.2, 4.1.2.3 requires:
553 * - PCI Device ID in [0x1000, 0x103f]
554 * - PCI Revision ID == 0
555 * - PCI Subsystem Device ID == virtio Device ID
556 *
557 * We rely on the caller to pass:
558 * - `legacy` the 0x1000-0x103f PCI Device ID
559 * - `device_id` the virtio Device ID from section 5
560 *
561 * For a true legacy-only device this layout is also compatible
562 * with old drivers.
563 */
564 VERIFY(legacy >= 0x1000 && legacy <= 0x103f);
565 pci_set_cfgdata16(pi, PCIR_DEVICE, legacy);
566 pci_set_cfgdata16(pi, PCIR_SUBDEV_0, device_id);
567 pci_set_cfgdata16(pi, PCIR_REVID, 0);
568 }
569 }
570
571 /*
572 * Set up Virtio device pci configuration space.
573 */
574 bool
vi_pcibar_setup(struct virtio_softc * vs)575 vi_pcibar_setup(struct virtio_softc *vs)
576 {
577 DPRINTF(vs, "VIRTIO %s set up PCI BARs", vs->vs_vc->vc_name);
578
579 assert(vs->vs_mode != VIRTIO_MODE_UNSET);
580
581 if (vs->vs_mode == VIRTIO_MODE_LEGACY ||
582 vs->vs_mode == VIRTIO_MODE_TRANSITIONAL) {
583 if (!vi_legacy_iobar_setup(vs, VIRTIO_LEGACY_BAR))
584 return (false);
585 }
586 if (vs->vs_mode == VIRTIO_MODE_MODERN ||
587 vs->vs_mode == VIRTIO_MODE_TRANSITIONAL) {
588 if (!vi_modern_membar_setup(vs, VIRTIO_MODERN_BAR))
589 return (false);
590 }
591
592 return (true);
593 }
594
595 /*
596 * Configure interrupt delivery for this VirtIO device.
597 *
598 * If requested, enable MSI-X and allocate one vector per queue plus
599 * a configuration vector. Regardless, always establish the mandatory
600 * legacy (INTx) interrupt as VirtIO devices do not support MSI and
601 * require a fixed interrupt line for compatibility.
602 */
603 bool
vi_intr_init(struct virtio_softc * vs,bool use_msix)604 vi_intr_init(struct virtio_softc *vs, bool use_msix)
605 {
606 if (use_msix) {
607 struct virtio_consts *vc = vs->vs_vc;
608 int nvec = MIN(MAX(vc->vc_max_nvq, vc->vc_nvq) + 1,
609 MAX_MSIX_TABLE_ENTRIES);
610
611 vs->vs_flags |= VIRTIO_USE_MSIX;
612 VS_LOCK(vs);
613 vi_reset_dev(vs); /* set all vectors to NO_VECTOR */
614 VS_UNLOCK(vs);
615 if (pci_emul_add_msixcap(vs->vs_pi, nvec, VIRTIO_MSIX_BAR) != 0)
616 return (false);
617 } else {
618 vs->vs_flags &= ~VIRTIO_USE_MSIX;
619 }
620
621 /* Legacy interrupts are mandatory for virtio devices */
622 pci_lintr_request(vs->vs_pi);
623
624 return (true);
625 }
626
627 /*
628 * Initialize the currently-selected virtio queue (vs->vs_curq)
629 */
630 void
vi_vq_init(struct virtio_softc * vs)631 vi_vq_init(struct virtio_softc *vs)
632 {
633 struct vqueue_info *vq;
634 uint64_t phys;
635 size_t size;
636 char *base;
637
638 vq = &vs->vs_queues[vs->vs_curq];
639
640 phys = vq->vq_desc_gpa;
641 size = vq->vq_qsize * sizeof (struct vring_desc);
642 base = paddr_guest2host(vs->vs_pi->pi_vmctx, phys, size);
643 if (base == NULL) {
644 vi_error(vs, "Could not map queue 0x%x phys 0x%" PRIx64,
645 vq->vq_num, phys);
646 return;
647 }
648 vq->vq_desc = (struct vring_desc *)base;
649
650 phys = vq->vq_avail_gpa;
651 size = sizeof (struct vring_avail) + sizeof (uint16_t) +
652 vq->vq_qsize * sizeof (uint16_t);
653 base = paddr_guest2host(vs->vs_pi->pi_vmctx, phys, size);
654 if (base == NULL) {
655 vi_error(vs, "Could not map queue 0x%x phys 0x%" PRIx64,
656 vq->vq_num, phys);
657 return;
658 }
659 vq->vq_avail = (struct vring_avail *)base;
660
661 phys = vq->vq_used_gpa;
662 size = sizeof (struct vring_used) + sizeof (uint16_t) +
663 vq->vq_qsize * sizeof (struct vring_used_elem);
664 base = paddr_guest2host(vs->vs_pi->pi_vmctx, phys, size);
665 if (base == NULL) {
666 vi_error(vs, "Could not map queue 0x%x phys 0x%" PRIx64,
667 vq->vq_num, phys);
668 return;
669 }
670 vq->vq_used = (struct vring_used *)base;
671
672 /* Mark queue as allocated, and start at 0 when we use it. */
673 vq->vq_flags = VQ_ALLOC;
674 vq->vq_last_avail = 0;
675 vq->vq_next_used = 0;
676 vq->vq_save_used = 0;
677 }
678
679 /*
680 * Initialize the currently-selected virtio queue (vs->vs_curq).
681 * The guest just gave us a page frame number, from which we can
682 * calculate the addresses of the queue components.
683 */
684 void
vi_legacy_vq_init(struct virtio_softc * vs,uint32_t pfn)685 vi_legacy_vq_init(struct virtio_softc *vs, uint32_t pfn)
686 {
687 struct vqueue_info *vq;
688 uint64_t phys;
689
690 vq = &vs->vs_queues[vs->vs_curq];
691 vq->vq_pfn = pfn;
692 phys = (uint64_t)pfn << LEGACY_VRING_PFN;
693
694 /* First page(s) are descriptors... */
695 vq->vq_desc_gpa = phys;
696 phys += vq->vq_qsize * sizeof (struct vring_desc);
697 /* ... immediately followed by "avail" ring (entirely uint16_t's) */
698 vq->vq_avail_gpa = phys;
699 phys += sizeof (struct vring_avail) + sizeof (uint16_t) +
700 vq->vq_qsize * sizeof (uint16_t);
701 /* Then it's rounded up to the next page... */
702 phys = roundup2(phys, LEGACY_VRING_ALIGN);
703 /* ... and the last page(s) are the used ring. */
704 vq->vq_used_gpa = phys;
705
706 vi_vq_init(vs);
707 }
708
709 /*
710 * Helper inline for vq_getchain(): record the i'th "real"
711 * descriptor.
712 */
713 static inline void
_vq_record(struct virtio_softc * vs,int i,struct vring_desc * vd,struct iovec * iov,int n_iov,struct vi_req * reqp)714 _vq_record(struct virtio_softc *vs, int i, struct vring_desc *vd,
715 struct iovec *iov, int n_iov, struct vi_req *reqp)
716 {
717 struct vmctx *ctx;
718 uint32_t len;
719 uint64_t addr;
720
721 ctx = vs->vs_pi->pi_vmctx;
722
723 if (i >= n_iov)
724 return;
725 len = atomic_load_32(&vd->len);
726 addr = atomic_load_64(&vd->addr);
727 iov[i].iov_len = len;
728 iov[i].iov_base = paddr_guest2host(ctx, addr, len);
729 if ((vd->flags & VRING_DESC_F_WRITE) == 0)
730 reqp->readable++;
731 else
732 reqp->writable++;
733 }
734 #define VQ_MAX_DESCRIPTORS 512 /* see below */
735
736 /*
737 * Examine the chain of descriptors starting at the "next one" to
738 * make sure that they describe a sensible request. If so, return
739 * the number of "real" descriptors that would be needed/used in
740 * acting on this request. This may be smaller than the number of
741 * available descriptors, e.g., if there are two available but
742 * they are two separate requests, this just returns 1. Or, it
743 * may be larger: if there are indirect descriptors involved,
744 * there may only be one descriptor available but it may be an
745 * indirect pointing to eight more. We return 8 in this case,
746 * i.e., we do not count the indirect descriptors, only the "real"
747 * ones.
748 *
749 * Basically, this vets the "flags" and "next" field of each
750 * descriptor and tells you how many are involved. Since some may
751 * be indirect, this also needs the vmctx (in the pci_devinst
752 * at vs->vs_pi) so that it can find indirect descriptors.
753 *
754 * As we process each descriptor, we copy and adjust it (guest to
755 * host address wise, also using the vmtctx) into the given iov[]
756 * array (of the given size). If the array overflows, we stop
757 * placing values into the array but keep processing descriptors,
758 * up to VQ_MAX_DESCRIPTORS, before giving up and returning -1.
759 * So you, the caller, must not assume that iov[] is as big as the
760 * return value (you can process the same thing twice to allocate
761 * a larger iov array if needed, or supply a zero length to find
762 * out how much space is needed).
763 *
764 * If some descriptor(s) are invalid, this prints a diagnostic message
765 * and returns -1. If no descriptors are ready now it simply returns 0.
766 *
767 * You are assumed to have done a vq_ring_ready() if needed (note
768 * that vq_has_descs() does one).
769 */
770 int
vq_getchain(struct vqueue_info * vq,struct iovec * iov,int niov,struct vi_req * reqp)771 vq_getchain(struct vqueue_info *vq, struct iovec *iov, int niov,
772 struct vi_req *reqp)
773 {
774 int i;
775 u_int ndesc, n_indir;
776 u_int idx, next;
777 struct vi_req req;
778 struct vring_desc *vdir, *vindir, *vp;
779 struct vmctx *ctx;
780 struct virtio_softc *vs;
781 const char *name;
782
783 vs = vq->vq_vs;
784 name = vs->vs_vc->vc_name;
785 memset(&req, 0, sizeof (req));
786
787 /*
788 * Note: it's the responsibility of the guest not to
789 * update vq->vq_avail->idx until all of the descriptors
790 * the guest has written are valid (including all their
791 * "next" fields and "flags").
792 *
793 * Compute (vq_avail->idx - last_avail) in integers mod 2**16. This is
794 * the number of descriptors the device has made available
795 * since the last time we updated vq->vq_last_avail.
796 *
797 * We just need to do the subtraction as an unsigned int,
798 * then trim off excess bits.
799 */
800 idx = vq->vq_last_avail;
801 ndesc = (uint16_t)((u_int)vq->vq_avail->idx - idx);
802 if (ndesc == 0)
803 return (0);
804 if (ndesc > vq->vq_qsize) {
805 vi_error(vs,
806 "%s: ndesc (%u) out of range, driver confused?",
807 name, (u_int)ndesc);
808 return (-1);
809 }
810
811 /*
812 * Now count/parse "involved" descriptors starting from
813 * the head of the chain.
814 *
815 * To prevent loops, we could be more complicated and
816 * check whether we're re-visiting a previously visited
817 * index, but we just abort if the count gets excessive.
818 */
819 ctx = vs->vs_pi->pi_vmctx;
820 req.idx = next = vq->vq_avail->ring[idx & (vq->vq_qsize - 1)];
821 vq->vq_last_avail++;
822 for (i = 0; i < VQ_MAX_DESCRIPTORS; next = vdir->next) {
823 if (next >= vq->vq_qsize) {
824 vi_error(vs,
825 "%s: descriptor index %u out of range, "
826 "driver confused?",
827 name, next);
828 return (-1);
829 }
830 vdir = &vq->vq_desc[next];
831 if ((vdir->flags & VRING_DESC_F_INDIRECT) == 0) {
832 _vq_record(vs, i, vdir, iov, niov, &req);
833 i++;
834 } else if ((vs->vs_negotiated_caps &
835 VIRTIO_RING_F_INDIRECT_DESC) == 0) {
836 vi_error(vs,
837 "%s: descriptor has forbidden INDIRECT flag, "
838 "driver confused?",
839 name);
840 return (-1);
841 } else {
842 n_indir = vdir->len / 16;
843 if ((vdir->len & 0xf) || n_indir == 0) {
844 vi_error(vs,
845 "%s: invalid indir len 0x%x, "
846 "driver confused?",
847 name, (u_int)vdir->len);
848 return (-1);
849 }
850 vindir = paddr_guest2host(ctx,
851 vdir->addr, vdir->len);
852 /*
853 * Indirects start at the 0th, then follow
854 * their own embedded "next"s until those run
855 * out. Each one's indirect flag must be off
856 * (we don't really have to check, could just
857 * ignore errors...).
858 */
859 next = 0;
860 for (;;) {
861 vp = &vindir[next];
862 if (vp->flags & VRING_DESC_F_INDIRECT) {
863 vi_error(vs,
864 "%s: indirect desc has INDIR flag,"
865 " driver confused?",
866 name);
867 return (-1);
868 }
869 _vq_record(vs, i, vp, iov, niov, &req);
870 if (++i > VQ_MAX_DESCRIPTORS)
871 goto loopy;
872 if ((vp->flags & VRING_DESC_F_NEXT) == 0)
873 break;
874 next = vp->next;
875 if (next >= n_indir) {
876 vi_error(vs,
877 "%s: invalid next %u > %u, "
878 "driver confused?",
879 name, (u_int)next, n_indir);
880 return (-1);
881 }
882 }
883 }
884 if ((vdir->flags & VRING_DESC_F_NEXT) == 0)
885 goto done;
886 }
887
888 loopy:
889 vi_error(vs, "%s: descriptor loop? count > %d - driver confused?",
890 name, i);
891 return (-1);
892
893 done:
894 *reqp = req;
895 return (i);
896 }
897
898 /*
899 * Return the first n_chain request chains back to the available queue.
900 *
901 * (These chains are the ones you handled when you called vq_getchain()
902 * and used its positive return value.)
903 */
904 void
vq_retchains(struct vqueue_info * vq,uint16_t n_chains)905 vq_retchains(struct vqueue_info *vq, uint16_t n_chains)
906 {
907
908 vq->vq_last_avail -= n_chains;
909 }
910
911 void
vq_relchain_prepare(struct vqueue_info * vq,uint16_t idx,uint32_t iolen)912 vq_relchain_prepare(struct vqueue_info *vq, uint16_t idx, uint32_t iolen)
913 {
914 struct vring_used *vuh;
915 struct vring_used_elem *vue;
916 uint16_t mask;
917
918 /*
919 * Notes:
920 * - mask is N-1 where N is a power of 2 so computes x % N
921 * - vuh points to the "used" data shared with guest
922 * - vue points to the "used" ring entry we want to update
923 */
924 mask = vq->vq_qsize - 1;
925 vuh = vq->vq_used;
926
927 vue = &vuh->ring[vq->vq_next_used++ & mask];
928 vue->id = idx;
929 vue->len = iolen;
930 }
931
932 void
vq_relchain_publish(struct vqueue_info * vq)933 vq_relchain_publish(struct vqueue_info *vq)
934 {
935 /*
936 * Ensure the used descriptor is visible before updating the index.
937 * This is necessary on ISAs with memory ordering less strict than x86
938 * (and even on x86 to act as a compiler barrier).
939 */
940 atomic_thread_fence_rel();
941 vq->vq_used->idx = vq->vq_next_used;
942 }
943
944 /*
945 * Return specified request chain to the guest, setting its I/O length
946 * to the provided value.
947 *
948 * (This chain is the one you handled when you called vq_getchain()
949 * and used its positive return value.)
950 */
951 void
vq_relchain(struct vqueue_info * vq,uint16_t idx,uint32_t iolen)952 vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen)
953 {
954 vq_relchain_prepare(vq, idx, iolen);
955 vq_relchain_publish(vq);
956 }
957
958 /*
959 * Driver has finished processing "available" chains and calling
960 * vq_relchain on each one. If driver used all the available
961 * chains, used_all should be set.
962 *
963 * If the "used" index moved we may need to inform the guest, i.e.,
964 * deliver an interrupt. Even if the used index did NOT move we
965 * may need to deliver an interrupt, if the avail ring is empty and
966 * we are supposed to interrupt on empty.
967 *
968 * Note that used_all_avail is provided by the caller because it's
969 * a snapshot of the ring state when he decided to finish interrupt
970 * processing -- it's possible that descriptors became available after
971 * that point. (It's also typically a constant 1/True as well.)
972 */
973 void
vq_endchains(struct vqueue_info * vq,int used_all_avail)974 vq_endchains(struct vqueue_info *vq, int used_all_avail)
975 {
976 struct virtio_softc *vs;
977 uint16_t event_idx, new_idx, old_idx;
978 int intr;
979
980 /*
981 * Interrupt generation: if we're using EVENT_IDX,
982 * interrupt if we've crossed the event threshold.
983 * Otherwise interrupt is generated if we added "used" entries,
984 * but suppressed by VRING_AVAIL_F_NO_INTERRUPT.
985 *
986 * In any case, though, if NOTIFY_ON_EMPTY is set and the
987 * entire avail was processed, we need to interrupt always.
988 */
989 vs = vq->vq_vs;
990 old_idx = vq->vq_save_used;
991 vq->vq_save_used = new_idx = vq->vq_used->idx;
992
993 /*
994 * Use full memory barrier between "idx" store from preceding
995 * vq_relchain() call and the loads from VQ_USED_EVENT_IDX() or
996 * "flags" field below.
997 */
998 atomic_thread_fence_seq_cst();
999 if (used_all_avail &&
1000 (vs->vs_negotiated_caps & VIRTIO_F_NOTIFY_ON_EMPTY)) {
1001 intr = 1;
1002 } else if (vs->vs_negotiated_caps & VIRTIO_RING_F_EVENT_IDX) {
1003 event_idx = VQ_USED_EVENT_IDX(vq);
1004 /*
1005 * This calculation is per docs and the kernel
1006 * (see src/sys/dev/virtio/virtio_ring.h).
1007 */
1008 intr = (uint16_t)(new_idx - event_idx - 1) <
1009 (uint16_t)(new_idx - old_idx);
1010 } else {
1011 intr = new_idx != old_idx &&
1012 !(vq->vq_avail->flags & VRING_AVAIL_F_NO_INTERRUPT);
1013 }
1014 if (intr)
1015 vq_interrupt(vs, vq);
1016 }
1017
1018 /* Note: these are in sorted order to make for a fast search */
1019 static struct config_reg {
1020 uint16_t cr_offset; /* register offset */
1021 uint8_t cr_size; /* size (bytes) */
1022 uint8_t cr_ro; /* true => reg is read only */
1023 const char *cr_name; /* name of reg */
1024 } legacy_cfg_regs[] = {
1025 { VIRTIO_PCI_HOST_FEATURES, 4, 1, "HOST_FEATURES" },
1026 { VIRTIO_PCI_GUEST_FEATURES, 4, 0, "GUEST_FEATURES" },
1027 { VIRTIO_PCI_QUEUE_PFN, 4, 0, "QUEUE_PFN" },
1028 { VIRTIO_PCI_QUEUE_NUM, 2, 1, "QUEUE_NUM" },
1029 { VIRTIO_PCI_QUEUE_SEL, 2, 0, "QUEUE_SEL" },
1030 { VIRTIO_PCI_QUEUE_NOTIFY, 2, 0, "QUEUE_NOTIFY" },
1031 { VIRTIO_PCI_STATUS, 1, 0, "STATUS" },
1032 { VIRTIO_PCI_ISR, 1, 0, "ISR" },
1033 { VIRTIO_MSI_CONFIG_VECTOR, 2, 0, "CONFIG_VECTOR" },
1034 { VIRTIO_MSI_QUEUE_VECTOR, 2, 0, "QUEUE_VECTOR" },
1035 }, common_cfg_regs[] = {
1036 { VIRTIO_PCI_COMMON_DFSELECT, 4, 0, "DFSELECT" },
1037 { VIRTIO_PCI_COMMON_DF, 4, 1, "DF" },
1038 { VIRTIO_PCI_COMMON_GFSELECT, 4, 0, "GFSELECT" },
1039 { VIRTIO_PCI_COMMON_GF, 4, 0, "GF" },
1040 { VIRTIO_PCI_COMMON_MSIX, 2, 0, "MSIX" },
1041 { VIRTIO_PCI_COMMON_NUMQ, 2, 1, "NUMQ" },
1042 { VIRTIO_PCI_COMMON_STATUS, 1, 0, "STATUS" },
1043 { VIRTIO_PCI_COMMON_CFGGENERATION, 1, 1, "CFGGENERATION" },
1044 { VIRTIO_PCI_COMMON_Q_SELECT, 2, 0, "Q_SELECT" },
1045 { VIRTIO_PCI_COMMON_Q_SIZE, 2, 0, "Q_SIZE" },
1046 { VIRTIO_PCI_COMMON_Q_MSIX, 2, 0, "Q_MSIX" },
1047 { VIRTIO_PCI_COMMON_Q_ENABLE, 2, 0, "Q_ENABLE" },
1048 { VIRTIO_PCI_COMMON_Q_NOFF, 2, 1, "Q_NOFF" },
1049 { VIRTIO_PCI_COMMON_Q_DESCLO, 4, 0, "Q_DESCLO" },
1050 { VIRTIO_PCI_COMMON_Q_DESCHI, 4, 0, "Q_DESCHI" },
1051 { VIRTIO_PCI_COMMON_Q_AVAILLO, 4, 0, "Q_AVAILLO" },
1052 { VIRTIO_PCI_COMMON_Q_AVAILHI, 4, 0, "Q_AVAILHI" },
1053 { VIRTIO_PCI_COMMON_Q_USEDLO, 4, 0, "Q_USEDLO" },
1054 { VIRTIO_PCI_COMMON_Q_USEDHI, 4, 0, "Q_USEDHI" },
1055 };
1056
1057 static inline struct config_reg *
vi_find_cr(struct config_reg * regstbl,size_t n,int offset)1058 vi_find_cr(struct config_reg *regstbl, size_t n, int offset) {
1059 u_int hi, lo, mid;
1060 struct config_reg *cr;
1061
1062 lo = 0;
1063 hi = n - 1;
1064 while (hi >= lo) {
1065 mid = (hi + lo) >> 1;
1066 cr = ®stbl[mid];
1067 if (cr->cr_offset == offset)
1068 return (cr);
1069 if (cr->cr_offset < offset)
1070 lo = mid + 1;
1071 else
1072 hi = mid - 1;
1073 }
1074 return (NULL);
1075 }
1076
1077 static uint64_t
vi_hv_features(struct virtio_softc * vs,bool modern)1078 vi_hv_features(struct virtio_softc *vs, bool modern)
1079 {
1080 return (modern ? vs->vs_vc->vc_hv_caps_modern | VIRTIO_F_VERSION_1 :
1081 vs->vs_vc->vc_hv_caps_legacy);
1082 }
1083
1084 /*
1085 * Handle legacy pci config space reads.
1086 *
1087 * If it's part of the legacy virtio config structure, do that.
1088 * Otherwise dispatch to the actual device backend's config read
1089 * callback.
1090 */
1091 static uint64_t
vi_legacy_pci_read(struct virtio_softc * vs,uint64_t offset,int size)1092 vi_legacy_pci_read(struct virtio_softc *vs, uint64_t offset, int size)
1093 {
1094 struct virtio_consts *vc;
1095 struct config_reg *cr;
1096 uint64_t virtio_config_size;
1097 const char *name;
1098 uint32_t newoff;
1099 uint32_t value;
1100 int error;
1101
1102 /* Checked by caller */
1103 assert(size == 1 || size == 2 || size == 4);
1104
1105 vc = vs->vs_vc;
1106 name = vc->vc_name;
1107 value = VI_MASK(size);
1108 virtio_config_size = VIRTIO_PCI_CONFIG_OFF(pci_msix_enabled(vs->vs_pi));
1109
1110 if (offset >= virtio_config_size) {
1111 /*
1112 * Subtract off the standard size (including MSI-X
1113 * registers if enabled) and dispatch to underlying driver.
1114 * If that fails, fall into general code.
1115 */
1116 newoff = offset - virtio_config_size;
1117 if (newoff + size > vc->vc_cfgsize)
1118 goto bad;
1119 if (vc->vc_cfgread != NULL) {
1120 error = (*vc->vc_cfgread)(DEV_SOFTC(vs),
1121 newoff, size, &value);
1122 } else {
1123 error = 0;
1124 }
1125 if (error == 0) {
1126 DPRINTF(vs, "VIRTIO %s LEGACY PCI devcfg read[0x%"
1127 PRIx64 "] = 0x%x", name, newoff, value);
1128 goto done;
1129 }
1130 }
1131
1132 bad:
1133 cr = vi_find_cr(legacy_cfg_regs, nitems(legacy_cfg_regs), offset);
1134 if (cr == NULL || cr->cr_size != size) {
1135 if (cr != NULL) {
1136 /* offset must be OK, so size must be bad */
1137 EPRINTLN(
1138 "%s: read from %s: bad size %d",
1139 name, cr->cr_name, size);
1140 } else {
1141 EPRINTLN(
1142 "%s: read from bad offset/size %jd/%d",
1143 name, (uintmax_t)offset, size);
1144 }
1145 goto done;
1146 }
1147
1148 switch (offset) {
1149 case VIRTIO_PCI_HOST_FEATURES:
1150 /* Caps for legacy PCI configuration layout is only 32bit */
1151 if (vc->vc_hv_features != NULL)
1152 value = vc->vc_hv_features(DEV_SOFTC(vs), false);
1153 else
1154 value = vi_hv_features(vs, false);
1155 break;
1156 case VIRTIO_PCI_GUEST_FEATURES:
1157 value = vs->vs_negotiated_caps;
1158 break;
1159 case VIRTIO_PCI_QUEUE_PFN:
1160 if (!vi_is_modern(vs) && vs->vs_curq < vc->vc_nvq)
1161 value = vs->vs_queues[vs->vs_curq].vq_pfn;
1162 break;
1163 case VIRTIO_PCI_QUEUE_NUM:
1164 value = vs->vs_curq < vc->vc_nvq ?
1165 vs->vs_queues[vs->vs_curq].vq_qsize : 0;
1166 break;
1167 case VIRTIO_PCI_QUEUE_SEL:
1168 value = vs->vs_curq;
1169 break;
1170 case VIRTIO_PCI_QUEUE_NOTIFY:
1171 value = 0; /* XXX */
1172 break;
1173 case VIRTIO_PCI_STATUS:
1174 value = vs->vs_status;
1175 break;
1176 case VIRTIO_PCI_ISR:
1177 value = vs->vs_isr;
1178 vs->vs_isr = 0; /* a read clears this flag */
1179 if (value != 0)
1180 pci_lintr_deassert(vs->vs_pi);
1181 break;
1182 case VIRTIO_MSI_CONFIG_VECTOR:
1183 value = vs->vs_msix_cfg_idx;
1184 break;
1185 case VIRTIO_MSI_QUEUE_VECTOR:
1186 value = vs->vs_curq < vc->vc_nvq ?
1187 vs->vs_queues[vs->vs_curq].vq_msix_idx :
1188 VIRTIO_MSI_NO_VECTOR;
1189 break;
1190 }
1191
1192 DPRINTF(vs, "VIRTIO %s LEGACY READ %s = 0x%x",
1193 name, cr->cr_name, value);
1194
1195 switch (offset) {
1196 case VIRTIO_PCI_GUEST_FEATURES:
1197 case VIRTIO_PCI_HOST_FEATURES:
1198 vi_print_caps(vs, value);
1199 break;
1200 }
1201
1202 done:
1203 return (value);
1204 }
1205
1206 /*
1207 * Handle legacy pci config space writes.
1208 *
1209 * If it's part of the legacy virtio config structure, do that.
1210 * Otherwise dispatch to the actual device backend's config write
1211 * callback.
1212 */
1213 static void
vi_legacy_pci_write(struct virtio_softc * vs,uint64_t offset,int size,uint64_t value)1214 vi_legacy_pci_write(struct virtio_softc *vs, uint64_t offset, int size,
1215 uint64_t value)
1216 {
1217 struct vqueue_info *vq;
1218 struct virtio_consts *vc;
1219 struct config_reg *cr;
1220 uint64_t virtio_config_size;
1221 const char *name;
1222 uint32_t newoff;
1223 int error;
1224
1225 /* Checked by caller */
1226 assert(size == 1 || size == 2 || size == 4);
1227
1228 vc = vs->vs_vc;
1229 name = vc->vc_name;
1230 virtio_config_size = VIRTIO_PCI_CONFIG_OFF(pci_msix_enabled(vs->vs_pi));
1231
1232 if (offset >= virtio_config_size) {
1233 /*
1234 * Subtract off the standard size (including MSI-X
1235 * registers if enabled) and dispatch to underlying driver.
1236 */
1237 newoff = offset - virtio_config_size;
1238 if (newoff + size > vc->vc_cfgsize)
1239 goto bad;
1240 if (vc->vc_cfgwrite != NULL) {
1241 error = (*vc->vc_cfgwrite)(DEV_SOFTC(vs),
1242 newoff, size, value);
1243 } else {
1244 error = 0;
1245 }
1246 if (error == 0) {
1247 DPRINTF(vs,
1248 "VIRTIO %s LEGACY PCI devcfg write[0x%"
1249 PRIx64 "] = 0x%x", name, newoff, value);
1250 return;
1251 }
1252 }
1253
1254 bad:
1255 cr = vi_find_cr(legacy_cfg_regs, nitems(legacy_cfg_regs), offset);
1256 if (cr == NULL || cr->cr_size != size || cr->cr_ro) {
1257 if (cr != NULL) {
1258 /* offset must be OK, wrong size and/or reg is R/O */
1259 if (cr->cr_size != size)
1260 EPRINTLN(
1261 "%s: write to %s: bad size %d",
1262 name, cr->cr_name, size);
1263 if (cr->cr_ro)
1264 EPRINTLN(
1265 "%s: write to read-only reg %s",
1266 name, cr->cr_name);
1267 } else {
1268 EPRINTLN(
1269 "%s: write to bad offset/size %jd/%d",
1270 name, (uintmax_t)offset, size);
1271 }
1272 return;
1273 }
1274
1275 DPRINTF(vs, "VIRTIO %s LEGACY WRITE %s = 0x%x",
1276 name, cr->cr_name, value);
1277
1278 switch (offset) {
1279 case VIRTIO_PCI_GUEST_FEATURES:
1280 if (vc->vc_hv_features != NULL)
1281 value &= vc->vc_hv_features(DEV_SOFTC(vs), false);
1282 else
1283 value &= vi_hv_features(vs, false);
1284 vs->vs_negotiated_caps = value;
1285 if (vc->vc_apply_features != NULL) {
1286 (*vc->vc_apply_features)(DEV_SOFTC(vs),
1287 &vs->vs_negotiated_caps);
1288 }
1289 DPRINTF(vs, "NEGOTIATED FEATURES 0x%" PRIx64 " (%s)",
1290 vs->vs_negotiated_caps,
1291 vi_is_modern(vs) ? "modern" : "legacy");
1292 vi_print_caps(vs, vs->vs_negotiated_caps);
1293 break;
1294 case VIRTIO_PCI_QUEUE_PFN:
1295 if (vs->vs_curq >= vc->vc_nvq)
1296 goto bad_qindex;
1297 if (vc->vc_qinit != NULL)
1298 vc->vc_qinit(DEV_SOFTC(vs), value, false);
1299 else
1300 vi_legacy_vq_init(vs, value);
1301 break;
1302 case VIRTIO_PCI_QUEUE_SEL:
1303 /*
1304 * Note that the guest is allowed to select an
1305 * invalid queue; we just need to return a QNUM
1306 * of 0 while the bad queue is selected.
1307 */
1308 vs->vs_curq = value;
1309 break;
1310 case VIRTIO_PCI_QUEUE_NOTIFY:
1311 if (value >= (unsigned int)vc->vc_nvq) {
1312 EPRINTLN("%s: queue %d notify out of range",
1313 name, (int)value);
1314 break;
1315 }
1316 if ((vs->vs_flags & VIRTIO_BROKEN) != 0)
1317 break;
1318 vq = &vs->vs_queues[value];
1319 if (vq->vq_notify != NULL) {
1320 (*vq->vq_notify)(DEV_SOFTC(vs), vq);
1321 } else if (vc->vc_qnotify != NULL) {
1322 (*vc->vc_qnotify)(DEV_SOFTC(vs), vq);
1323 } else {
1324 EPRINTLN("%s: qnotify queue %d: missing vq/vc notify",
1325 name, (int)value);
1326 }
1327 break;
1328 case VIRTIO_PCI_STATUS:
1329 vs->vs_status = value;
1330 if (value == 0) {
1331 DPRINTF(vs, "VIRTIO %s RESET", name);
1332 DPRINTF(vs, "**************************************");
1333 vc->vc_reset(DEV_SOFTC(vs));
1334 }
1335 break;
1336 case VIRTIO_MSI_CONFIG_VECTOR:
1337 vs->vs_msix_cfg_idx = value;
1338 break;
1339 case VIRTIO_MSI_QUEUE_VECTOR:
1340 if (vs->vs_curq >= vc->vc_nvq)
1341 goto bad_qindex;
1342 vq = &vs->vs_queues[vs->vs_curq];
1343 vq->vq_msix_idx = value;
1344 if (vc->vc_set_msix != NULL)
1345 vc->vc_set_msix(DEV_SOFTC(vs), vs->vs_curq);
1346 break;
1347 }
1348 return;
1349
1350 bad_qindex:
1351 EPRINTLN(
1352 "%s: write config reg %s: curq %d >= max %d",
1353 name, cr->cr_name, vs->vs_curq, vc->vc_nvq);
1354 }
1355
1356 #define VI_HIGH(x) (((x) >> 32) & 0xffffffff)
1357 #define VI_LOW(x) ((x) & 0xffffffff)
1358
1359 /*
1360 * Virtio modern:
1361 * Handle pci config space reads to common config structure.
1362 */
1363 static uint64_t
vi_pci_common_cfg_read(struct virtio_softc * vs,uint64_t offset,int size)1364 vi_pci_common_cfg_read(struct virtio_softc *vs, uint64_t offset, int size)
1365 {
1366 uint64_t value = -1;
1367 struct virtio_consts *vc;
1368 struct vqueue_info *vq;
1369 struct config_reg *cr;
1370 const char *name;
1371 uint64_t capval = 0;
1372
1373 /* Checked by caller */
1374 assert(size == 1 || size == 2 || size == 4);
1375
1376 vc = vs->vs_vc;
1377 name = vc->vc_name;
1378 cr = vi_find_cr(common_cfg_regs, nitems(common_cfg_regs), offset);
1379 if (cr == NULL) {
1380 EPRINTLN("%s: read from bad offset/size 0x%jx/0x%x",
1381 name, (uintmax_t)offset, size);
1382 goto done;
1383 }
1384 /*
1385 * We check that the requested size matches the register at this
1386 * offset, and refuse to process it if there is a mismatch.
1387 */
1388 if (cr->cr_size != size) {
1389 EPRINTLN("%s: read from %s: bad size 0x%x",
1390 name, cr->cr_name, size);
1391 goto done;
1392 }
1393
1394 vq = (vs->vs_curq < vc->vc_nvq ? &vs->vs_queues[vs->vs_curq] : NULL);
1395
1396 switch (offset) {
1397 case VIRTIO_PCI_COMMON_DFSELECT:
1398 value = vs->vs_dfselect;
1399 break;
1400 case VIRTIO_PCI_COMMON_DF:
1401 if (vc->vc_hv_features != NULL)
1402 value = vc->vc_hv_features(DEV_SOFTC(vs), true);
1403 else
1404 value = vi_hv_features(vs, true);
1405 switch (vs->vs_dfselect) {
1406 case 0:
1407 capval = value = VI_LOW(value);
1408 break;
1409 case 1:
1410 value = VI_HIGH(value);
1411 capval = value << 32;
1412 break;
1413 default:
1414 value = capval = 0;
1415 break;
1416 }
1417 /* capval is debug printed below */
1418 break;
1419 case VIRTIO_PCI_COMMON_GFSELECT:
1420 value = vs->vs_gfselect;
1421 break;
1422 case VIRTIO_PCI_COMMON_GF:
1423 value = vs->vs_negotiated_caps;
1424 switch (vs->vs_gfselect) {
1425 case 0:
1426 capval = value = VI_LOW(value);
1427 break;
1428 case 1:
1429 value = VI_HIGH(value);
1430 capval = value << 32;
1431 break;
1432 default:
1433 value = capval = 0;
1434 break;
1435 }
1436 /* capval is debug printed below */
1437 break;
1438 case VIRTIO_PCI_COMMON_MSIX:
1439 value = vs->vs_msix_cfg_idx;
1440 break;
1441 case VIRTIO_PCI_COMMON_NUMQ:
1442 value = vc->vc_nvq;
1443 break;
1444 case VIRTIO_PCI_COMMON_STATUS:
1445 value = vs->vs_status;
1446 break;
1447 case VIRTIO_PCI_COMMON_CFGGENERATION:
1448 if ((vs->vs_flags & VIRTIO_DEVCFG_CHG) != 0) {
1449 vs->vs_devcfg_gen++;
1450 vs->vs_flags &= ~VIRTIO_DEVCFG_CHG;
1451 }
1452 value = vs->vs_devcfg_gen;
1453 break;
1454 case VIRTIO_PCI_COMMON_Q_SELECT:
1455 value = vs->vs_curq;
1456 break;
1457 case VIRTIO_PCI_COMMON_Q_SIZE:
1458 value = vq != NULL ? vq->vq_qsize : 0;
1459 break;
1460 case VIRTIO_PCI_COMMON_Q_MSIX:
1461 if (vq != NULL)
1462 value = vq->vq_msix_idx;
1463 break;
1464 case VIRTIO_PCI_COMMON_Q_ENABLE:
1465 value = vq != NULL ? !!(vq->vq_flags & VQ_ENABLED) : 0;
1466 break;
1467 case VIRTIO_PCI_COMMON_Q_NOFF:
1468 /* queue_notify_off is equal to qid for now */
1469 value = vs->vs_curq;
1470 break;
1471 case VIRTIO_PCI_COMMON_Q_DESCLO:
1472 if (vq != NULL)
1473 value = VI_LOW(vq->vq_desc_gpa);
1474 break;
1475 case VIRTIO_PCI_COMMON_Q_DESCHI:
1476 if (vq != NULL)
1477 value = VI_HIGH(vq->vq_desc_gpa);
1478 break;
1479 case VIRTIO_PCI_COMMON_Q_AVAILLO:
1480 if (vq != NULL)
1481 value = VI_LOW(vq->vq_avail_gpa);
1482 break;
1483 case VIRTIO_PCI_COMMON_Q_AVAILHI:
1484 if (vq != NULL)
1485 value = VI_HIGH(vq->vq_avail_gpa);
1486 break;
1487 case VIRTIO_PCI_COMMON_Q_USEDLO:
1488 if (vq != NULL)
1489 value = VI_LOW(vq->vq_used_gpa);
1490 break;
1491 case VIRTIO_PCI_COMMON_Q_USEDHI:
1492 if (vq != NULL)
1493 value = VI_HIGH(vq->vq_used_gpa);
1494 break;
1495 }
1496
1497 done:
1498 value &= VI_MASK(size);
1499 DPRINTF(vs, "VIRTIO %s COMMON %s read = 0x%x",
1500 name, cr->cr_name, value);
1501
1502 switch (offset) {
1503 case VIRTIO_PCI_COMMON_DF:
1504 case VIRTIO_PCI_COMMON_GF:
1505 vi_print_caps(vs, capval);
1506 break;
1507 }
1508 return (value);
1509 }
1510
1511 /*
1512 * Virtio modern:
1513 * Handle pci config space writes to common config structure.
1514 */
1515 static void
vi_pci_common_cfg_write(struct virtio_softc * vs,uint64_t offset,int size,uint64_t value)1516 vi_pci_common_cfg_write(struct virtio_softc *vs, uint64_t offset, int size,
1517 uint64_t value)
1518 {
1519 uint64_t capval = 0;
1520 struct virtio_consts *vc;
1521 struct vqueue_info *vq;
1522 struct config_reg *cr;
1523 const char *name;
1524
1525 /* Checked by caller */
1526 assert(size == 1 || size == 2 || size == 4);
1527
1528 vc = vs->vs_vc;
1529 name = vc->vc_name;
1530 value &= VI_MASK(size);
1531
1532 cr = vi_find_cr(common_cfg_regs, nitems(common_cfg_regs), offset);
1533 if (cr == NULL) {
1534 EPRINTLN( "%s: write to %s: bad size 0x%x",
1535 name, cr->cr_name, size);
1536 return;
1537 }
1538 /*
1539 * We check that the requested size matches the register at this
1540 * offset, and refuse to process it if there is a mismatch.
1541 */
1542 if (cr->cr_size != size) {
1543 EPRINTLN("%s: write to bad offset/size 0x%jx/0x%x",
1544 name, (uintmax_t)offset, size);
1545 return;
1546 }
1547
1548 DPRINTF(vs, "VIRTIO %s COMMON %s write 0x%x", name, cr->cr_name, value);
1549
1550 vq = NULL;
1551 switch (offset) {
1552 case VIRTIO_PCI_COMMON_Q_SIZE:
1553 case VIRTIO_PCI_COMMON_Q_MSIX:
1554 case VIRTIO_PCI_COMMON_Q_ENABLE:
1555 case VIRTIO_PCI_COMMON_Q_DESCLO:
1556 case VIRTIO_PCI_COMMON_Q_DESCHI:
1557 case VIRTIO_PCI_COMMON_Q_AVAILLO:
1558 case VIRTIO_PCI_COMMON_Q_AVAILHI:
1559 case VIRTIO_PCI_COMMON_Q_USEDLO:
1560 case VIRTIO_PCI_COMMON_Q_USEDHI:
1561 if (vs->vs_curq >= vc->vc_nvq) {
1562 EPRINTLN("%s: write queue %d out of range",
1563 name, vs->vs_curq);
1564 goto bad_write;
1565 }
1566 vq = &vs->vs_queues[vs->vs_curq];
1567 break;
1568 default:
1569 break;
1570 }
1571
1572 switch (offset) {
1573 case VIRTIO_PCI_COMMON_DFSELECT:
1574 vs->vs_dfselect = value;
1575 break;
1576 case VIRTIO_PCI_COMMON_GFSELECT:
1577 vs->vs_gfselect = value;
1578 break;
1579 case VIRTIO_PCI_COMMON_GF:
1580 switch (vs->vs_gfselect) {
1581 case 0:
1582 capval = value;
1583 vs->vs_negotiated_caps =
1584 (VI_HIGH(vs->vs_negotiated_caps) << 32) | value;
1585 break;
1586 case 1:
1587 capval = value << 32;
1588 vs->vs_negotiated_caps =
1589 capval | VI_LOW(vs->vs_negotiated_caps);
1590 break;
1591 default:
1592 capval = 0;
1593 break;
1594 }
1595 vi_print_caps(vs, capval);
1596
1597 uint64_t hvfeat;
1598 if (vc->vc_hv_features != NULL)
1599 hvfeat = vc->vc_hv_features(DEV_SOFTC(vs), true);
1600 else
1601 hvfeat = vi_hv_features(vs, true);
1602 vs->vs_negotiated_caps &= hvfeat;
1603 break;
1604 case VIRTIO_PCI_COMMON_MSIX:
1605 vs->vs_msix_cfg_idx = value;
1606 break;
1607 case VIRTIO_PCI_COMMON_STATUS:
1608 if (value == 0) {
1609 DPRINTF(vs, "VIRTIO %s RESET", name);
1610 (*vc->vc_reset)(DEV_SOFTC(vs));
1611 vs->vs_status = value;
1612 break;
1613 }
1614 if ((vs->vs_status & VIRTIO_CONFIG_S_FEATURES_OK) == 0 &&
1615 (value & VIRTIO_CONFIG_S_FEATURES_OK) != 0) {
1616 if (vc->vc_apply_features != NULL) {
1617 (*vc->vc_apply_features)(DEV_SOFTC(vs),
1618 &vs->vs_negotiated_caps);
1619 }
1620 DPRINTF(vs, "NEGOTIATED FEATURES 0x%" PRIx64 " (%s)",
1621 vs->vs_negotiated_caps,
1622 vi_is_modern(vs) ? "modern" : "legacy");
1623 vi_print_caps(vs, vs->vs_negotiated_caps);
1624 }
1625 vs->vs_status = value;
1626 break;
1627 case VIRTIO_PCI_COMMON_Q_SELECT:
1628 if (value >= vc->vc_nvq) {
1629 EPRINTLN("%s: queue select %d out of range",
1630 name, (int)value);
1631 goto bad_write;
1632 }
1633 vs->vs_curq = value;
1634 break;
1635 case VIRTIO_PCI_COMMON_Q_SIZE:
1636 /*
1637 * If the guest has passed us a queue size that is not a power
1638 * of two, something is very wrong.
1639 */
1640 if (!ISP2(value)) {
1641 vi_error(vs, "Bad queue size 0x%" PRIx64
1642 " for qid 0x%x, not power of 2",
1643 value, vq->vq_num);
1644 } else {
1645 vq->vq_qsize = value;
1646 }
1647 break;
1648 case VIRTIO_PCI_COMMON_Q_MSIX:
1649 vq->vq_msix_idx = value;
1650 if (vc->vc_set_msix != NULL)
1651 vc->vc_set_msix(DEV_SOFTC(vs), vs->vs_curq);
1652 break;
1653 case VIRTIO_PCI_COMMON_Q_ENABLE:
1654 if ((vq->vq_flags & VQ_ENABLED) == 0 && value == 1) {
1655 if (vc->vc_qinit != NULL)
1656 vc->vc_qinit(DEV_SOFTC(vs), 0, true);
1657 else
1658 vi_vq_init(vs);
1659 vq->vq_flags |= VQ_ENABLED;
1660 } else if (value == 0) {
1661 /*
1662 * The driver is not permitted to write a 0 to this
1663 * register. We choose to ignore it rather than fault
1664 * the device.
1665 */
1666 }
1667 break;
1668 case VIRTIO_PCI_COMMON_Q_DESCLO:
1669 vq->vq_desc_gpa = (VI_HIGH(vq->vq_desc_gpa) << 32) | value;
1670 break;
1671 case VIRTIO_PCI_COMMON_Q_DESCHI:
1672 vq->vq_desc_gpa = (value << 32) | VI_LOW(vq->vq_desc_gpa);
1673 break;
1674 case VIRTIO_PCI_COMMON_Q_AVAILLO:
1675 vq->vq_avail_gpa = (VI_HIGH(vq->vq_avail_gpa) << 32) | value;
1676 break;
1677 case VIRTIO_PCI_COMMON_Q_AVAILHI:
1678 vq->vq_avail_gpa = (value << 32) | VI_LOW(vq->vq_avail_gpa);
1679 break;
1680 case VIRTIO_PCI_COMMON_Q_USEDLO:
1681 vq->vq_used_gpa = (VI_HIGH(vq->vq_used_gpa) << 32) | value;
1682 break;
1683 case VIRTIO_PCI_COMMON_Q_USEDHI:
1684 vq->vq_used_gpa = (value << 32) | VI_LOW(vq->vq_used_gpa);
1685 break;
1686 default:
1687 EPRINTLN("%s: write to bad offset/size %jd/%d", name,
1688 (uintmax_t)offset, size);
1689 goto bad_write;
1690 }
1691
1692 return;
1693
1694 bad_write:
1695 return;
1696 }
1697
1698 /*
1699 * Virtio modern:
1700 * Handle pci MMIO reads to the notification structure.
1701 *
1702 * Reading the structure always returns zero.
1703 */
1704 static uint64_t
vi_pci_notify_cfg_read(struct virtio_softc * vs,uint64_t offset,int size)1705 vi_pci_notify_cfg_read(struct virtio_softc *vs, uint64_t offset, int size)
1706 {
1707 return (0);
1708 }
1709
1710 /*
1711 * Virtio modern:
1712 * Handle pci MMIO writes to the notification structure.
1713 *
1714 * VIRTIO_F_NOTIFICATION_DATA is not a feature that this device advertises
1715 * so we only need to consider the simple case where the vq index is written
1716 * into the registers.
1717 */
1718 static void
vi_pci_notify_cfg_write(struct virtio_softc * vs,uint64_t offset,int size,uint64_t value)1719 vi_pci_notify_cfg_write(struct virtio_softc *vs, uint64_t offset, int size,
1720 uint64_t value)
1721 {
1722 struct virtio_consts *vc = vs->vs_vc;
1723 const char *name = vc->vc_name;
1724 unsigned int qid = value;
1725 struct vqueue_info *vq;
1726
1727 DPRINTF(vs, "VIRTIO %s notify VQ 0x%x offset 0x%x",
1728 name, qid, offset);
1729
1730 if (size != 2) {
1731 EPRINTLN("%s: bad size 0x%x access at offset 0x%" PRIx64,
1732 name, size, offset);
1733 return;
1734 }
1735
1736 if ((vs->vs_status & VIRTIO_CONFIG_STATUS_DRIVER_OK) == 0) {
1737 EPRINTLN("%s: attempt to use VQ 0x%x before DRIVER_OK, "
1738 "driver confused?", name, qid);
1739 return;
1740 }
1741
1742 if ((vs->vs_flags & VIRTIO_BROKEN) != 0) {
1743 EPRINTLN("%s: attempt to use VQ 0x%x while VIRTIO device is "
1744 "flagged as broken", name, qid);
1745 return;
1746 }
1747
1748 if (offset != qid * VQ_NOTIFY_OFF_MULTIPLIER) {
1749 EPRINTLN(
1750 "%s: VQ 0x%x notify does not have matching offset at 0x%"
1751 PRIx64, name, qid, offset);
1752 return;
1753 }
1754
1755 if (qid >= vc->vc_nvq) {
1756 EPRINTLN("%s: VQ 0x%x notify out of range", name, qid);
1757 return;
1758 }
1759
1760 vq = &vs->vs_queues[qid];
1761 if ((vq->vq_flags & VQ_ENABLED) == 0)
1762 return;
1763 if (vq->vq_notify != NULL)
1764 (*vq->vq_notify)(DEV_SOFTC(vs), vq);
1765 else if (vc->vc_qnotify != NULL)
1766 (*vc->vc_qnotify)(DEV_SOFTC(vs), vq);
1767 else
1768 EPRINTLN("%s: qnotify VQ 0x%x: no vq/vc notify", name, qid);
1769 }
1770
1771 /*
1772 * Virtio modern:
1773 * Handle pci MMIO reads to ISR structure.
1774 *
1775 * The ISR structure has a relaxed requirement on alignment.
1776 */
1777 static uint64_t
vi_pci_isr_cfg_read(struct virtio_softc * vs,uint64_t offset,int size)1778 vi_pci_isr_cfg_read(struct virtio_softc *vs, uint64_t offset, int size)
1779 {
1780 uint64_t value;
1781
1782 if (offset != 0)
1783 return (0);
1784
1785 value = vs->vs_isr;
1786 vs->vs_isr = 0;
1787 if (value != 0) {
1788 DPRINTF(vs, "VIRTIO ISR read[0x%" PRIx64 "] = 0x%x",
1789 offset, value);
1790 pci_lintr_deassert(vs->vs_pi);
1791 }
1792 return (value);
1793 }
1794
1795 /*
1796 * Virtio modern:
1797 * pci MMIO writes to ISR structure are disallowed.
1798 */
1799 static void
vi_pci_isr_cfg_write(struct virtio_softc * vs,uint64_t offset,int size,uint64_t value)1800 vi_pci_isr_cfg_write(struct virtio_softc *vs, uint64_t offset, int size,
1801 uint64_t value)
1802 {
1803 const char *name = vs->vs_vc->vc_name;
1804
1805 EPRINTLN("%s: invalid write into isr cfg", name);
1806 }
1807
1808 /*
1809 * Virtio modern:
1810 * Handle pci MMIO reads to device-specific config structure.
1811 */
1812 static uint64_t
vi_pci_dev_cfg_read(struct virtio_softc * vs,uint64_t offset,int size)1813 vi_pci_dev_cfg_read(struct virtio_softc *vs, uint64_t offset, int size)
1814 {
1815 struct virtio_consts *vc = vs->vs_vc;
1816 uint32_t value = VI_MASK(size);
1817
1818 if (offset + size > vc->vc_cfgsize)
1819 return (value);
1820
1821 vc->vc_cfgread(DEV_SOFTC(vs), offset, size, &value);
1822 DPRINTF(vs, "VIRTIO %s PCI devcfg read[0x%" PRIx64 "] = 0x%x",
1823 vs->vs_vc->vc_name, offset, value);
1824 return (value);
1825 }
1826
1827 /*
1828 * Virtio modern:
1829 * Handle pci MMIO writes to device-specific config structure.
1830 */
1831 static void
vi_pci_dev_cfg_write(struct virtio_softc * vs,uint64_t offset,int size,uint64_t value)1832 vi_pci_dev_cfg_write(struct virtio_softc *vs, uint64_t offset, int size,
1833 uint64_t value)
1834 {
1835 struct virtio_consts *vc = vs->vs_vc;
1836
1837 value &= VI_MASK(size);
1838
1839 if (offset + size > vc->vc_cfgsize)
1840 return;
1841 if (vc->vc_cfgwrite != NULL)
1842 vc->vc_cfgwrite(DEV_SOFTC(vs), offset, size, value);
1843 DPRINTF(vs, "VIRTIO %s PCI devcfg write[0x%" PRIx64 "] = 0x%x",
1844 vs->vs_vc->vc_name, offset, value);
1845 }
1846
1847 /*
1848 * Handle configuration space reads.
1849 */
1850 int
vi_pci_cfgread(struct pci_devinst * pi,int offset,int bytes,uint32_t * retval)1851 vi_pci_cfgread(struct pci_devinst *pi, int offset, int bytes, uint32_t *retval)
1852 {
1853 struct virtio_softc *vs = pi->pi_arg;
1854 virtio_pci_capcfg_t *cfg;
1855 uint32_t baroff, barlen;
1856 int baridx;
1857
1858 cfg = vi_pci_cfg_bycapaddr(vs, offset, bytes);
1859
1860 /* If this is not a VirtIO cap, use the default cfgspace handler */
1861 if (cfg == NULL)
1862 return (PE_CFGRW_DEFAULT);
1863
1864 /* Only the PCI cap has special handling */
1865 if (cfg->c_captype != VIRTIO_PCI_CAP_PCI_CFG)
1866 return (PE_CFGRW_DEFAULT);
1867
1868 /* and then only the data field */
1869 if (offset != vs->vs_pcicap->c_capoff +
1870 offsetof(struct virtio_pci_cfg_cap, pci_cfg_data)) {
1871 return (PE_CFGRW_DEFAULT);
1872 }
1873
1874 if (bytes != 1 && bytes != 2 && bytes != 4)
1875 return (PE_CFGRW_DROP);
1876
1877 if (vs->vs_mtx)
1878 pthread_mutex_lock(vs->vs_mtx);
1879
1880 baridx = pci_get_cfgdata8(pi,
1881 offset + offsetof(struct virtio_pci_cap, bar));
1882 baroff = pci_get_cfgdata32(pi,
1883 offset + offsetof(struct virtio_pci_cap, offset));
1884 barlen = pci_get_cfgdata32(pi,
1885 offset + offsetof(struct virtio_pci_cap, length));
1886 if (baridx > PCIR_MAX_BAR_0) {
1887 *retval = VI_MASK(bytes);
1888 goto done;
1889 }
1890 *retval = vi_modern_pci_read(vs, baridx, baroff, barlen);
1891
1892 done:
1893 if (vs->vs_mtx)
1894 pthread_mutex_unlock(vs->vs_mtx);
1895
1896 DPRINTF(vs, "VIRTIO %s PCI READ BAR%u[0x%x+%x] = 0x%x",
1897 vs->vs_vc->vc_name, baridx, baroff, barlen, *retval);
1898
1899 return (PE_CFGRW_DROP);
1900 }
1901
1902 /*
1903 * Handle configuration space writes.
1904 */
1905 int
vi_pci_cfgwrite(struct pci_devinst * pi,int offset,int bytes,uint32_t val)1906 vi_pci_cfgwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val)
1907 {
1908 struct virtio_softc *vs = pi->pi_arg;
1909 virtio_pci_capcfg_t *cfg;
1910 uint32_t baroff, barlen;
1911 int baridx;
1912
1913 cfg = vi_pci_cfg_bycapaddr(vs, offset, bytes);
1914
1915 /* If this is not a VirtIO cap, use the default cfgspace handler */
1916 if (cfg == NULL)
1917 return (PE_CFGRW_DEFAULT);
1918
1919 /* Only the PCI VirtIO cap can be written to */
1920 if (cfg->c_captype != VIRTIO_PCI_CAP_PCI_CFG)
1921 return (PE_CFGRW_DROP);
1922
1923 /* and then only the data field needs special handling */
1924 if (offset != vs->vs_pcicap->c_capoff +
1925 offsetof(struct virtio_pci_cfg_cap, pci_cfg_data)) {
1926 return (PE_CFGRW_DEFAULT);
1927 }
1928
1929 if (bytes != 1 && bytes != 2 && bytes != 4)
1930 return (PE_CFGRW_DROP);
1931
1932 if (vs->vs_mtx)
1933 pthread_mutex_lock(vs->vs_mtx);
1934
1935 baridx = pci_get_cfgdata8(pi,
1936 offset + offsetof(struct virtio_pci_cap, bar));
1937 baroff = pci_get_cfgdata32(pi,
1938 offset + offsetof(struct virtio_pci_cap, offset));
1939 barlen = pci_get_cfgdata32(pi,
1940 offset + offsetof(struct virtio_pci_cap, length));
1941 if (baridx > PCIR_MAX_BAR_0)
1942 goto done;
1943 vi_modern_pci_write(vs, baridx, baroff, barlen, val);
1944
1945 done:
1946 if (vs->vs_mtx)
1947 pthread_mutex_unlock(vs->vs_mtx);
1948
1949 DPRINTF(vs, "VIRTIO %s PCI WRITE BAR%x[0x%x+%x] = 0x%x",
1950 vs->vs_vc->vc_name, baridx, baroff, barlen, val);
1951
1952 return (PE_CFGRW_DROP);
1953 }
1954
1955 /*
1956 * Handle pci config space reads to virtio-related structures
1957 */
1958 static uint64_t
vi_modern_pci_read(struct virtio_softc * vs,int baridx,uint64_t offset,int size)1959 vi_modern_pci_read(struct virtio_softc *vs, int baridx, uint64_t offset,
1960 int size)
1961 {
1962 virtio_pci_capcfg_t *cfg;
1963 uint64_t value = VI_MASK(size);
1964
1965 cfg = vi_pci_cfg_bybaraddr(vs, baridx, offset, size);
1966 if (cfg == NULL)
1967 return (value);
1968
1969 offset -= cfg->c_baroff;
1970
1971 switch (cfg->c_captype) {
1972 case VIRTIO_PCI_CAP_COMMON_CFG:
1973 value = vi_pci_common_cfg_read(vs, offset, size);
1974 break;
1975 case VIRTIO_PCI_CAP_NOTIFY_CFG:
1976 value = vi_pci_notify_cfg_read(vs, offset, size);
1977 break;
1978 case VIRTIO_PCI_CAP_ISR_CFG:
1979 value = vi_pci_isr_cfg_read(vs, offset, size);
1980 break;
1981 case VIRTIO_PCI_CAP_DEVICE_CFG:
1982 value = vi_pci_dev_cfg_read(vs, offset, size);
1983 break;
1984 default:
1985 break;
1986 }
1987
1988 return (value);
1989 }
1990
1991 /*
1992 * Handle pci config space reads to virtio-related structures
1993 */
1994 static void
vi_modern_pci_write(struct virtio_softc * vs,int baridx,uint64_t offset,int size,uint64_t value)1995 vi_modern_pci_write(struct virtio_softc *vs, int baridx, uint64_t offset,
1996 int size, uint64_t value)
1997 {
1998 virtio_pci_capcfg_t *cfg;
1999
2000 cfg = vi_pci_cfg_bybaraddr(vs, baridx, offset, size);
2001 if (cfg == NULL)
2002 return;
2003
2004 offset -= cfg->c_baroff;
2005
2006 switch (cfg->c_captype) {
2007 case VIRTIO_PCI_CAP_COMMON_CFG:
2008 vi_pci_common_cfg_write(vs, offset, size, value);
2009 break;
2010 case VIRTIO_PCI_CAP_NOTIFY_CFG:
2011 vi_pci_notify_cfg_write(vs, offset, size, value);
2012 break;
2013 case VIRTIO_PCI_CAP_ISR_CFG:
2014 vi_pci_isr_cfg_write(vs, offset, size, value);
2015 break;
2016 case VIRTIO_PCI_CAP_DEVICE_CFG:
2017 vi_pci_dev_cfg_write(vs, offset, size, value);
2018 break;
2019 }
2020 }
2021
2022 /*
2023 * Handle virtio bar reads.
2024 *
2025 * If it's to the MSI-X info, dispatch the reads to the msix handling code.
2026 * Otherwise, dispatch the reads to virtio device code.
2027 */
2028 uint64_t
vi_pci_read(struct pci_devinst * pi,int baridx,uint64_t offset,int size)2029 vi_pci_read(struct pci_devinst *pi, int baridx, uint64_t offset, int size)
2030 {
2031 struct virtio_softc *vs = pi->pi_arg;
2032 uint64_t value;
2033
2034 if ((vs->vs_flags & VIRTIO_USE_MSIX) != 0 &&
2035 (baridx == pci_msix_table_bar(pi) ||
2036 baridx == pci_msix_pba_bar(pi))) {
2037 return (pci_emul_msix_tread(pi, offset, size));
2038 }
2039
2040 if (vs->vs_mtx)
2041 pthread_mutex_lock(vs->vs_mtx);
2042
2043 value = VI_MASK(size);
2044
2045 if (size != 1 && size != 2 && size != 4)
2046 goto done;
2047
2048 switch (baridx) {
2049 case VIRTIO_LEGACY_BAR:
2050 value = vi_legacy_pci_read(vs, offset, size);
2051 break;
2052 case VIRTIO_MODERN_BAR:
2053 value = vi_modern_pci_read(vs, baridx, offset, size);
2054 break;
2055 default:
2056 break;
2057 }
2058
2059 done:
2060 if (vs->vs_mtx)
2061 pthread_mutex_unlock(vs->vs_mtx);
2062 return (value);
2063 }
2064
2065 /*
2066 * Handle virtio bar writes.
2067 *
2068 * If it's to the MSI-X info, dispatch the writes to the msix handling code.
2069 * Otherwise, dispatch the writes to virtio device code.
2070 */
2071 void
vi_pci_write(struct pci_devinst * pi,int baridx,uint64_t offset,int size,uint64_t value)2072 vi_pci_write(struct pci_devinst *pi, int baridx, uint64_t offset, int size,
2073 uint64_t value)
2074 {
2075 struct virtio_softc *vs = pi->pi_arg;
2076 struct virtio_consts *vc = vs->vs_vc;
2077
2078 if ((vs->vs_flags & VIRTIO_USE_MSIX) != 0 &&
2079 (baridx == pci_msix_table_bar(pi) ||
2080 baridx == pci_msix_pba_bar(pi))) {
2081 if (pci_emul_msix_twrite(pi, offset, size, value) == 0 &&
2082 vc->vc_update_msix != NULL) {
2083 vc->vc_update_msix(DEV_SOFTC(vs), offset);
2084 }
2085 return;
2086 }
2087
2088 if (vs->vs_mtx)
2089 pthread_mutex_lock(vs->vs_mtx);
2090
2091 if (size != 1 && size != 2 && size != 4)
2092 goto done;
2093
2094 switch (baridx) {
2095 case VIRTIO_LEGACY_BAR:
2096 vi_legacy_pci_write(vs, offset, size, value);
2097 break;
2098 case VIRTIO_MODERN_BAR:
2099 vi_modern_pci_write(vs, baridx, offset, size, value);
2100 break;
2101 default:
2102 break;
2103 }
2104
2105 done:
2106 if (vs->vs_mtx)
2107 pthread_mutex_unlock(vs->vs_mtx);
2108 }
2109