1 /*
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26 /*
27 * This file and its contents are supplied under the terms of the
28 * Common Development and Distribution License ("CDDL"), version 1.0.
29 * You may only use this file in accordance with the terms of version
30 * 1.0 of the CDDL.
31 *
32 * A full copy of the text of the CDDL should have accompanied this
33 * source. A copy of the CDDL is also available via the Internet at
34 * http://www.illumos.org/license/CDDL.
35 *
36 * Copyright 2015 Pluribus Networks Inc.
37 * Copyright 2019 Joyent, Inc.
38 * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
39 */
40
41 #include <sys/cdefs.h>
42
43 #include <sys/param.h>
44 #include <sys/linker_set.h>
45 #include <sys/ioctl.h>
46 #include <sys/uio.h>
47 #include <sys/viona_io.h>
48
49 #include <errno.h>
50 #include <fcntl.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <stdint.h>
54 #include <string.h>
55 #include <strings.h>
56 #include <unistd.h>
57 #include <assert.h>
58 #include <pthread.h>
59 #include <signal.h>
60 #include <stdbool.h>
61 #include <poll.h>
62 #include <libdladm.h>
63 #include <libdllink.h>
64 #include <libdlvnic.h>
65
66 #include <machine/vmm.h>
67 #include <vmmapi.h>
68
69 #include "bhyverun.h"
70 #include "config.h"
71 #include "debug.h"
72 #include "pci_emul.h"
73 #include "virtio.h"
74 #include "iov.h"
75 #include "virtio_net.h"
76
77 #define VIONA_RINGSZ 1024
78 #define VIONA_CTLQ_SIZE 64
79 #define VIONA_CTLQ_MAXSEGS 32
80
81 /*
82 * PCI config-space register offsets
83 */
84 #define VIONA_R_CFG0 24
85 #define VIONA_R_CFG1 25
86 #define VIONA_R_CFG2 26
87 #define VIONA_R_CFG3 27
88 #define VIONA_R_CFG4 28
89 #define VIONA_R_CFG5 29
90 #define VIONA_R_CFG6 30
91 #define VIONA_R_CFG7 31
92 #define VIONA_R_MAX 31
93
94 #define VIONA_REGSZ (VIONA_R_MAX + 1)
95
96 /*
97 * Queue definitions.
98 */
99 #define VIONA_RXQ 0
100 #define VIONA_TXQ 1
101 #define VIONA_CTLQ 2
102
103 #define VIONA_MAXQ 3
104
105 /*
106 * Supplementary host capabilities provided in the userspace component.
107 */
108 #define VIONA_S_HOSTCAPS_USERSPACE ( \
109 VIRTIO_NET_F_CTRL_VQ | \
110 VIRTIO_NET_F_CTRL_RX)
111
112 /*
113 * Debug printf
114 */
115 static volatile int pci_viona_debug;
116 #define DPRINTF(fmt, arg...) \
117 do { \
118 if (pci_viona_debug) { \
119 FPRINTLN(stdout, fmt, ##arg); \
120 fflush(stdout); \
121 } \
122 } while (0)
123 #define WPRINTF(fmt, arg...) FPRINTLN(stderr, fmt, ##arg)
124
125 /*
126 * Per-device softc
127 */
128 struct pci_viona_softc {
129 struct virtio_softc vsc_vs;
130 struct virtio_consts vsc_consts;
131 struct vqueue_info vsc_queues[VIONA_MAXQ];
132 pthread_mutex_t vsc_mtx;
133
134 datalink_id_t vsc_linkid;
135 int vsc_vnafd;
136
137 /* Configurable parameters */
138 char vsc_linkname[MAXLINKNAMELEN];
139 uint32_t vsc_feature_mask;
140 uint16_t vsc_vq_size;
141
142 uint8_t vsc_macaddr[6];
143
144 bool vsc_resetting;
145 bool vsc_msix_active;
146
147 viona_promisc_t vsc_promisc; /* Current promisc mode */
148 bool vsc_promisc_promisc; /* PROMISC enabled */
149 bool vsc_promisc_allmulti; /* ALLMULTI enabled */
150 bool vsc_promisc_umac; /* unicast MACs sent */
151 bool vsc_promisc_mmac; /* multicast MACs sent */
152 };
153
154 static struct virtio_consts viona_vi_consts = {
155 .vc_name = "viona",
156 .vc_nvq = VIONA_MAXQ,
157 /*
158 * We use the common bhyve virtio framework so that we can call
159 * the utility functions to work with the queues handled in userspace.
160 * The framework PCI read/write functions are not used so these
161 * callbacks will not be invoked.
162 */
163 .vc_cfgsize = 0,
164 .vc_reset = NULL,
165 .vc_qnotify = NULL,
166 .vc_cfgread = NULL,
167 .vc_cfgwrite = NULL,
168 .vc_apply_features = NULL,
169 /*
170 * The following field is populated using the response from the
171 * viona driver during initialisation, augmented with the additional
172 * capabilities emulated in userspace.
173 */
174 .vc_hv_caps = 0,
175 };
176
177 /*
178 * Return the size of IO BAR that maps virtio header and device specific
179 * region. The size would vary depending on whether MSI-X is enabled or
180 * not.
181 */
182 static uint64_t
pci_viona_iosize(struct pci_devinst * pi)183 pci_viona_iosize(struct pci_devinst *pi)
184 {
185 if (pci_msix_enabled(pi)) {
186 return (VIONA_REGSZ);
187 } else {
188 return (VIONA_REGSZ -
189 (VIRTIO_PCI_CONFIG_OFF(1) - VIRTIO_PCI_CONFIG_OFF(0)));
190 }
191 }
192
193 static uint16_t
pci_viona_qsize(struct pci_viona_softc * sc,int qnum)194 pci_viona_qsize(struct pci_viona_softc *sc, int qnum)
195 {
196 if (qnum == VIONA_CTLQ)
197 return (VIONA_CTLQ_SIZE);
198
199 return (sc->vsc_vq_size);
200 }
201
202 static void
pci_viona_ring_reset(struct pci_viona_softc * sc,int ring)203 pci_viona_ring_reset(struct pci_viona_softc *sc, int ring)
204 {
205 assert(ring < VIONA_MAXQ);
206
207 switch (ring) {
208 case VIONA_RXQ:
209 case VIONA_TXQ:
210 break;
211 case VIONA_CTLQ:
212 default:
213 return;
214 }
215
216 for (;;) {
217 int res;
218
219 res = ioctl(sc->vsc_vnafd, VNA_IOC_RING_RESET, ring);
220 if (res == 0) {
221 break;
222 } else if (errno != EINTR) {
223 WPRINTF("ioctl viona ring %d reset failed %d",
224 ring, errno);
225 return;
226 }
227 }
228 }
229
230 static void
pci_viona_update_status(struct pci_viona_softc * sc,uint32_t value)231 pci_viona_update_status(struct pci_viona_softc *sc, uint32_t value)
232 {
233
234 if (value == 0) {
235 DPRINTF("viona: device reset requested !");
236
237 vi_reset_dev(&sc->vsc_vs);
238 pci_viona_ring_reset(sc, VIONA_RXQ);
239 pci_viona_ring_reset(sc, VIONA_TXQ);
240 }
241
242 sc->vsc_vs.vs_status = value;
243 }
244
245 static const char *
pci_viona_promisc_descr(viona_promisc_t mode)246 pci_viona_promisc_descr(viona_promisc_t mode)
247 {
248 switch (mode) {
249 case VIONA_PROMISC_NONE:
250 return ("none");
251 case VIONA_PROMISC_MULTI:
252 return ("multicast");
253 case VIONA_PROMISC_ALL:
254 return ("all");
255 default:
256 abort();
257 }
258 }
259
260 static int
pci_viona_eval_promisc(struct pci_viona_softc * sc)261 pci_viona_eval_promisc(struct pci_viona_softc *sc)
262 {
263 viona_promisc_t mode = VIONA_PROMISC_NONE;
264 int err = 0;
265
266 /*
267 * If the guest has explicitly requested promiscuous mode or has sent a
268 * non-empty unicast MAC address table, then set viona to promiscuous
269 * mode. Otherwise, if the guest has explicitly requested multicast
270 * promiscuity or has sent a non-empty multicast MAC address table,
271 * then set viona to multicast promiscuous mode.
272 */
273 if (sc->vsc_promisc_promisc || sc->vsc_promisc_umac)
274 mode = VIONA_PROMISC_ALL;
275 else if (sc->vsc_promisc_allmulti || sc->vsc_promisc_mmac)
276 mode = VIONA_PROMISC_MULTI;
277
278 if (mode != sc->vsc_promisc) {
279 DPRINTF("viona: setting promiscuous mode to %d (%s)",
280 mode, pci_viona_promisc_descr(mode));
281 DPRINTF(" promisc=%u, umac=%u, allmulti=%u, mmac=%u",
282 sc->vsc_promisc_promisc, sc->vsc_promisc_umac,
283 sc->vsc_promisc_allmulti, sc->vsc_promisc_mmac);
284
285 err = ioctl(sc->vsc_vnafd, VNA_IOC_SET_PROMISC, mode);
286 if (err == 0)
287 sc->vsc_promisc = mode;
288 else
289 WPRINTF("ioctl viona set promisc failed %d", errno);
290 }
291
292 return (err);
293 }
294
295 static uint8_t
pci_viona_control_rx(struct vqueue_info * vq,const virtio_net_ctrl_hdr_t * hdr,struct iovec * iov,size_t niov)296 pci_viona_control_rx(struct vqueue_info *vq, const virtio_net_ctrl_hdr_t *hdr,
297 struct iovec *iov, size_t niov)
298 {
299 struct pci_viona_softc *sc = (struct pci_viona_softc *)vq->vq_vs;
300 uint8_t v;
301
302 if (iov[0].iov_len != sizeof (uint8_t) || niov != 1) {
303 EPRINTLN("viona: bad control RX data");
304 return (VIRTIO_NET_CQ_ERR);
305 }
306
307 v = *(uint8_t *)iov[0].iov_base;
308
309 switch (hdr->vnch_command) {
310 case VIRTIO_NET_CTRL_RX_PROMISC:
311 DPRINTF("viona: ctrl RX promisc %d", v);
312 sc->vsc_promisc_promisc = (v != 0);
313 break;
314 case VIRTIO_NET_CTRL_RX_ALLMULTI:
315 DPRINTF("viona: ctrl RX allmulti %d", v);
316 sc->vsc_promisc_allmulti = (v != 0);
317 break;
318 default:
319 /*
320 * VIRTIO_NET_F_CTRL_RX_EXTRA was not offered so no other
321 * commands are expected.
322 */
323 EPRINTLN("viona: unrecognised RX control cmd %u",
324 hdr->vnch_command);
325 return (VIRTIO_NET_CQ_ERR);
326 }
327
328 if (pci_viona_eval_promisc(sc) == 0)
329 return (VIRTIO_NET_CQ_OK);
330 return (VIRTIO_NET_CQ_ERR);
331 }
332
333 static void
pci_viona_control_mac_dump(const char * tag,const struct iovec * iov)334 pci_viona_control_mac_dump(const char *tag, const struct iovec *iov)
335 {
336 virtio_net_ctrl_mac_t *table = (virtio_net_ctrl_mac_t *)iov->iov_base;
337 ether_addr_t *mac = &table->vncm_mac;
338
339 DPRINTF("-- %s MAC TABLE (entries: %u)", tag, table->vncm_entries);
340
341 if (table->vncm_entries * ETHERADDRL !=
342 iov->iov_len - sizeof (table->vncm_entries)) {
343 DPRINTF(" Bad table size %u", iov->iov_len);
344 return;
345 }
346
347 for (uint32_t i = 0; i < table->vncm_entries; i++) {
348 DPRINTF(" [%2d] %s", i, ether_ntoa((struct ether_addr *)mac));
349 mac++;
350 }
351 }
352
353 static uint8_t
pci_viona_control_mac(struct vqueue_info * vq,const virtio_net_ctrl_hdr_t * hdr,struct iovec * iov,size_t niov)354 pci_viona_control_mac(struct vqueue_info *vq, const virtio_net_ctrl_hdr_t *hdr,
355 struct iovec *iov, size_t niov)
356 {
357 struct pci_viona_softc *sc = (struct pci_viona_softc *)vq->vq_vs;
358
359 switch (hdr->vnch_command) {
360 case VIRTIO_NET_CTRL_MAC_TABLE_SET: {
361 virtio_net_ctrl_mac_t *table;
362
363 DPRINTF("viona: ctrl MAC table set");
364
365 if (niov != 2) {
366 EPRINTLN("viona: bad control MAC data");
367 return (VIRTIO_NET_CQ_ERR);
368 }
369
370 /*
371 * We advertise VIRTIO_NET_F_CTRL_RX and therefore need to
372 * accept VIRTIO_NET_CTRL_MAC, but we don't support passing
373 * changes in the MAC address lists down to viona.
374 * Instead, we set flags to indicate if the guest has sent
375 * any MAC addresses for each table, and use these to determine
376 * the resulting promiscuous mode, see pci_viona_eval_promisc()
377 * above.
378 */
379
380 /* Unicast MAC table */
381 table = (virtio_net_ctrl_mac_t *)iov[0].iov_base;
382 sc->vsc_promisc_umac = (table->vncm_entries != 0);
383 if (pci_viona_debug)
384 pci_viona_control_mac_dump("UNICAST", &iov[0]);
385
386 /* Multicast MAC table */
387 table = (virtio_net_ctrl_mac_t *)iov[1].iov_base;
388 sc->vsc_promisc_mmac = (table->vncm_entries != 0);
389 if (pci_viona_debug)
390 pci_viona_control_mac_dump("MULTICAST", &iov[1]);
391
392 break;
393 }
394 case VIRTIO_NET_CTRL_MAC_ADDR_SET:
395 /* disallow setting the primary filter MAC address */
396 DPRINTF("viona: ctrl MAC addr set %d", niov);
397 return (VIRTIO_NET_CQ_ERR);
398 default:
399 EPRINTLN("viona: unrecognised MAC control cmd %u",
400 hdr->vnch_command);
401 return (VIRTIO_NET_CQ_ERR);
402 }
403
404 if (pci_viona_eval_promisc(sc) == 0)
405 return (VIRTIO_NET_CQ_OK);
406 return (VIRTIO_NET_CQ_ERR);
407 }
408
409 static void
pci_viona_control(struct vqueue_info * vq)410 pci_viona_control(struct vqueue_info *vq)
411 {
412 struct iovec iov[VIONA_CTLQ_MAXSEGS + 1];
413 const virtio_net_ctrl_hdr_t *hdr;
414 struct iovec *siov = iov;
415 struct vi_req req = { 0 };
416 uint8_t *ackp;
417 size_t nsiov;
418 uint32_t len;
419 int n;
420
421 n = vq_getchain(vq, iov, VIONA_CTLQ_MAXSEGS, &req);
422
423 assert(n >= 1 && n <= VIONA_CTLQ_MAXSEGS);
424
425 /*
426 * Since we have not negotiated VIRTIO_F_ANY_LAYOUT, we expect the
427 * control message to be laid out in at least three descriptors as
428 * follows:
429 * header - sizeof (virtio_net_ctrl_hdr_t)
430 * data[] - at least one descriptor, varying size
431 * ack - uint8_t, flagged as writable
432 * Check the incoming message to make sure it matches this layout and
433 * drop the entire chain if not.
434 */
435 if (n < 3 || req.writable != 1 || req.readable + 1 != n ||
436 iov[req.readable].iov_len != sizeof (uint8_t)) {
437 EPRINTLN("viona: bad control chain, len=%d, w=%d, r=%d",
438 n, req.writable, req.readable);
439 goto drop;
440 }
441
442 hdr = (const virtio_net_ctrl_hdr_t *)iov[0].iov_base;
443 if (iov[0].iov_len < sizeof (virtio_net_ctrl_hdr_t)) {
444 EPRINTLN("viona: control header too short: %u", iov[0].iov_len);
445 goto drop;
446 }
447
448 /*
449 * Writable iovecs start at iov[req.readable], and we've already
450 * checked that there is only one writable, it's at the end, and the
451 * right size; it's the acknowledgement byte.
452 */
453 ackp = (uint8_t *)iov[req.readable].iov_base;
454
455 siov = &iov[1];
456 nsiov = n - 2;
457
458 switch (hdr->vnch_class) {
459 case VIRTIO_NET_CTRL_RX:
460 *ackp = pci_viona_control_rx(vq, hdr, siov, nsiov);
461 break;
462 case VIRTIO_NET_CTRL_MAC:
463 *ackp = pci_viona_control_mac(vq, hdr, siov, nsiov);
464 break;
465 default:
466 EPRINTLN("viona: unrecognised control class %u, cmd %u",
467 hdr->vnch_class, hdr->vnch_command);
468 *ackp = VIRTIO_NET_CQ_ERR;
469 break;
470 }
471
472 drop:
473 len = 0;
474 for (uint_t i = 0; i < n; i++)
475 len += iov[i].iov_len;
476
477 vq_relchain(vq, req.idx, len);
478 }
479
480 static void
pci_viona_process_ctrlq(struct vqueue_info * vq)481 pci_viona_process_ctrlq(struct vqueue_info *vq)
482 {
483 for (;;) {
484 vq_kick_disable(vq);
485
486 while (vq_has_descs(vq))
487 pci_viona_control(vq);
488
489 vq_kick_enable(vq);
490
491 /*
492 * One more check in case a late addition raced with
493 * re-enabling kicks. Note that vq_kick_enable() includes a
494 * memory barrier.
495 */
496
497 if (!vq_has_descs(vq))
498 break;
499 }
500
501 vq_endchains(vq, /* used_all_avail= */1);
502 }
503
504 static void *
pci_viona_poll_thread(void * param)505 pci_viona_poll_thread(void *param)
506 {
507 struct pci_viona_softc *sc = param;
508 pollfd_t pollset;
509 const int fd = sc->vsc_vnafd;
510
511 pollset.fd = fd;
512 pollset.events = POLLRDBAND;
513
514 for (;;) {
515 if (poll(&pollset, 1, -1) < 0) {
516 if (errno == EINTR || errno == EAGAIN) {
517 continue;
518 } else {
519 WPRINTF("pci_viona_poll_thread poll() error %d",
520 errno);
521 break;
522 }
523 }
524 if (pollset.revents & POLLRDBAND) {
525 vioc_intr_poll_t vip;
526 uint_t i;
527 int res;
528 bool assert_lintr = false;
529 const bool do_msix = pci_msix_enabled(sc->vsc_vs.vs_pi);
530
531 res = ioctl(fd, VNA_IOC_INTR_POLL, &vip);
532 for (i = 0; res > 0 && i < VIONA_VQ_MAX; i++) {
533 if (vip.vip_status[i] == 0) {
534 continue;
535 }
536 if (do_msix) {
537 pci_generate_msix(sc->vsc_vs.vs_pi,
538 sc->vsc_queues[i].vq_msix_idx);
539 } else {
540 assert_lintr = true;
541 }
542 res = ioctl(fd, VNA_IOC_RING_INTR_CLR, i);
543 if (res != 0) {
544 WPRINTF("ioctl viona vq %d intr "
545 "clear failed %d", i, errno);
546 }
547 }
548 if (assert_lintr) {
549 pthread_mutex_lock(&sc->vsc_mtx);
550 sc->vsc_vs.vs_isr |= VIRTIO_PCI_ISR_INTR;
551 pci_lintr_assert(sc->vsc_vs.vs_pi);
552 pthread_mutex_unlock(&sc->vsc_mtx);
553 }
554 }
555 }
556
557 pthread_exit(NULL);
558 }
559
560 static void
pci_viona_ring_init(struct pci_viona_softc * sc,uint64_t pfn)561 pci_viona_ring_init(struct pci_viona_softc *sc, uint64_t pfn)
562 {
563 int qnum = sc->vsc_vs.vs_curq;
564 vioc_ring_init_t vna_ri;
565 int error;
566
567 assert(qnum < VIONA_MAXQ);
568
569 if (qnum == VIONA_CTLQ) {
570 vi_vq_init(&sc->vsc_vs, pfn);
571 return;
572 }
573
574 sc->vsc_queues[qnum].vq_pfn = (pfn << VRING_PFN);
575 vna_ri.ri_index = qnum;
576 vna_ri.ri_qsize = pci_viona_qsize(sc, qnum);
577 vna_ri.ri_qaddr = (pfn << VRING_PFN);
578 error = ioctl(sc->vsc_vnafd, VNA_IOC_RING_INIT, &vna_ri);
579
580 if (error != 0) {
581 WPRINTF("ioctl viona ring %u init failed %d", qnum, errno);
582 }
583 }
584
585 static int
pci_viona_viona_init(struct vmctx * ctx,struct pci_viona_softc * sc)586 pci_viona_viona_init(struct vmctx *ctx, struct pci_viona_softc *sc)
587 {
588 vioc_create_t vna_create;
589 int error;
590
591 sc->vsc_vnafd = open("/dev/viona", O_RDWR | O_EXCL);
592 if (sc->vsc_vnafd == -1) {
593 WPRINTF("open viona ctl failed: %d", errno);
594 return (-1);
595 }
596
597 vna_create.c_linkid = sc->vsc_linkid;
598 vna_create.c_vmfd = vm_get_device_fd(ctx);
599 error = ioctl(sc->vsc_vnafd, VNA_IOC_CREATE, &vna_create);
600 if (error != 0) {
601 (void) close(sc->vsc_vnafd);
602 WPRINTF("ioctl viona create failed %d", errno);
603 return (-1);
604 }
605
606 return (0);
607 }
608
609 static int
pci_viona_legacy_config(nvlist_t * nvl,const char * opt)610 pci_viona_legacy_config(nvlist_t *nvl, const char *opt)
611 {
612 char *config, *name, *tofree, *value;
613
614 if (opt == NULL)
615 return (0);
616
617 config = tofree = strdup(opt);
618 while ((name = strsep(&config, ",")) != NULL) {
619 value = strchr(name, '=');
620 if (value != NULL) {
621 *value++ = '\0';
622 set_config_value_node(nvl, name, value);
623 } else {
624 set_config_value_node(nvl, "vnic", name);
625 }
626 }
627 free(tofree);
628 return (0);
629 }
630
631 static int
pci_viona_parse_opts(struct pci_viona_softc * sc,nvlist_t * nvl)632 pci_viona_parse_opts(struct pci_viona_softc *sc, nvlist_t *nvl)
633 {
634 const char *value;
635 int err = 0;
636
637 sc->vsc_vq_size = VIONA_RINGSZ;
638 sc->vsc_feature_mask = 0;
639 sc->vsc_linkname[0] = '\0';
640
641 value = get_config_value_node(nvl, "feature_mask");
642 if (value != NULL) {
643 long num;
644
645 errno = 0;
646 num = strtol(value, NULL, 0);
647 if (errno != 0 || num < 0) {
648 fprintf(stderr,
649 "viona: invalid mask '%s'", value);
650 } else {
651 sc->vsc_feature_mask = num;
652 }
653 }
654
655 value = get_config_value_node(nvl, "vqsize");
656 if (value != NULL) {
657 long num;
658
659 errno = 0;
660 num = strtol(value, NULL, 0);
661 if (errno != 0) {
662 fprintf(stderr,
663 "viona: invalid vsqize '%s'", value);
664 err = -1;
665 } else if (num <= 2 || num > 32768) {
666 fprintf(stderr,
667 "viona: vqsize out of range", num);
668 err = -1;
669 } else if ((1 << (ffs(num) - 1)) != num) {
670 fprintf(stderr,
671 "viona: vqsize must be power of 2", num);
672 err = -1;
673 } else {
674 sc->vsc_vq_size = num;
675 }
676 }
677
678 value = get_config_value_node(nvl, "vnic");
679 if (value == NULL) {
680 fprintf(stderr, "viona: vnic name required");
681 err = -1;
682 } else {
683 (void) strlcpy(sc->vsc_linkname, value, MAXLINKNAMELEN);
684 }
685
686 DPRINTF("viona=%p dev=%s vqsize=%x feature_mask=%x", sc,
687 sc->vsc_linkname, sc->vsc_vq_size, sc->vsc_feature_mask);
688 return (err);
689 }
690
691 static int
pci_viona_init(struct pci_devinst * pi,nvlist_t * nvl)692 pci_viona_init(struct pci_devinst *pi, nvlist_t *nvl)
693 {
694 dladm_handle_t handle;
695 dladm_status_t status;
696 dladm_vnic_attr_t attr;
697 char errmsg[DLADM_STRSIZE];
698 char tname[MAXCOMLEN + 1];
699 int error, i;
700 struct pci_viona_softc *sc;
701 const char *vnic;
702 pthread_t tid;
703
704 if (get_config_bool_default("viona.debug", false))
705 pci_viona_debug = 1;
706
707 vnic = get_config_value_node(nvl, "vnic");
708 if (vnic == NULL) {
709 WPRINTF("virtio-viona: vnic required");
710 return (1);
711 }
712
713 sc = malloc(sizeof (struct pci_viona_softc));
714 memset(sc, 0, sizeof (struct pci_viona_softc));
715
716 if (pci_viona_parse_opts(sc, nvl) != 0) {
717 free(sc);
718 return (1);
719 }
720
721 if ((status = dladm_open(&handle)) != DLADM_STATUS_OK) {
722 WPRINTF("could not open /dev/dld");
723 free(sc);
724 return (1);
725 }
726
727 if ((status = dladm_name2info(handle, sc->vsc_linkname, &sc->vsc_linkid,
728 NULL, NULL, NULL)) != DLADM_STATUS_OK) {
729 WPRINTF("dladm_name2info() for %s failed: %s", vnic,
730 dladm_status2str(status, errmsg));
731 dladm_close(handle);
732 free(sc);
733 return (1);
734 }
735
736 if ((status = dladm_vnic_info(handle, sc->vsc_linkid, &attr,
737 DLADM_OPT_ACTIVE)) != DLADM_STATUS_OK) {
738 WPRINTF("dladm_vnic_info() for %s failed: %s", vnic,
739 dladm_status2str(status, errmsg));
740 dladm_close(handle);
741 free(sc);
742 return (1);
743 }
744
745 memcpy(sc->vsc_macaddr, attr.va_mac_addr, ETHERADDRL);
746
747 dladm_close(handle);
748
749 error = pci_viona_viona_init(pi->pi_vmctx, sc);
750 if (error != 0) {
751 free(sc);
752 return (1);
753 }
754
755 error = pthread_create(&tid, NULL, pci_viona_poll_thread, sc);
756 assert(error == 0);
757 snprintf(tname, sizeof (tname), "vionapoll:%s", vnic);
758 pthread_set_name_np(tid, tname);
759
760 /* initialize config space */
761 pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET);
762 pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
763 pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK);
764 pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_ID_NETWORK);
765 pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
766
767 sc->vsc_consts = viona_vi_consts;
768 pthread_mutex_init(&sc->vsc_mtx, NULL);
769
770 /*
771 * The RX and TX queues are handled in the kernel component of
772 * viona; however The control queue is emulated in userspace.
773 */
774 sc->vsc_queues[VIONA_CTLQ].vq_qsize = pci_viona_qsize(sc, VIONA_CTLQ);
775
776 vi_softc_linkup(&sc->vsc_vs, &sc->vsc_consts, sc, pi, sc->vsc_queues);
777 sc->vsc_vs.vs_mtx = &sc->vsc_mtx;
778
779 /*
780 * Guests that do not support CTRL_RX_MAC still generally need to
781 * receive multicast packets. Guests that do support this feature will
782 * end up setting this flag indirectly via messages on the control
783 * queue but it does not hurt to default to multicast promiscuity here
784 * and it is what older version of viona did.
785 */
786 sc->vsc_promisc_mmac = true;
787 pci_viona_eval_promisc(sc);
788
789 /* MSI-X support */
790 for (i = 0; i < VIONA_MAXQ; i++)
791 sc->vsc_queues[i].vq_msix_idx = VIRTIO_MSI_NO_VECTOR;
792
793 /* BAR 1 used to map MSI-X table and PBA */
794 if (pci_emul_add_msixcap(pi, VIONA_MAXQ, 1)) {
795 free(sc);
796 return (1);
797 }
798
799 /* BAR 0 for legacy-style virtio register access. */
800 error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, VIONA_REGSZ);
801 if (error != 0) {
802 WPRINTF("could not allocate virtio BAR");
803 free(sc);
804 return (1);
805 }
806
807 /*
808 * Need a legacy interrupt for virtio compliance, even though MSI-X
809 * operation is _strongly_ suggested for adequate performance.
810 */
811 pci_lintr_request(pi);
812
813 return (0);
814 }
815
816 static uint64_t
viona_adjust_offset(struct pci_devinst * pi,uint64_t offset)817 viona_adjust_offset(struct pci_devinst *pi, uint64_t offset)
818 {
819 /*
820 * Device specific offsets used by guest would change based on
821 * whether MSI-X capability is enabled or not
822 */
823 if (!pci_msix_enabled(pi)) {
824 if (offset >= VIRTIO_PCI_CONFIG_OFF(0)) {
825 return (offset + (VIRTIO_PCI_CONFIG_OFF(1) -
826 VIRTIO_PCI_CONFIG_OFF(0)));
827 }
828 }
829
830 return (offset);
831 }
832
833 static void
pci_viona_ring_set_msix(struct pci_devinst * pi,uint_t ring)834 pci_viona_ring_set_msix(struct pci_devinst *pi, uint_t ring)
835 {
836 struct pci_viona_softc *sc = pi->pi_arg;
837 struct msix_table_entry mte;
838 uint16_t tab_index;
839 vioc_ring_msi_t vrm;
840 int res;
841
842 if (ring == VIONA_CTLQ)
843 return;
844
845 assert(ring <= VIONA_VQ_TX);
846
847 vrm.rm_index = ring;
848 vrm.rm_addr = 0;
849 vrm.rm_msg = 0;
850 tab_index = sc->vsc_queues[ring].vq_msix_idx;
851
852 if (tab_index != VIRTIO_MSI_NO_VECTOR && sc->vsc_msix_active) {
853 mte = pi->pi_msix.table[tab_index];
854 if ((mte.vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
855 vrm.rm_addr = mte.addr;
856 vrm.rm_msg = mte.msg_data;
857 }
858 }
859
860 res = ioctl(sc->vsc_vnafd, VNA_IOC_RING_SET_MSI, &vrm);
861 if (res != 0) {
862 WPRINTF("ioctl viona set_msi %d failed %d", ring, errno);
863 }
864 }
865
866 static void
pci_viona_lintrupdate(struct pci_devinst * pi)867 pci_viona_lintrupdate(struct pci_devinst *pi)
868 {
869 struct pci_viona_softc *sc = pi->pi_arg;
870 bool msix_on = false;
871
872 pthread_mutex_lock(&sc->vsc_mtx);
873 msix_on = pci_msix_enabled(pi) && (pi->pi_msix.function_mask == 0);
874 if ((sc->vsc_msix_active && !msix_on) ||
875 (msix_on && !sc->vsc_msix_active)) {
876 uint_t i;
877
878 sc->vsc_msix_active = msix_on;
879 /* Update in-kernel ring configs */
880 for (i = 0; i <= VIONA_VQ_TX; i++) {
881 pci_viona_ring_set_msix(pi, i);
882 }
883 }
884 pthread_mutex_unlock(&sc->vsc_mtx);
885 }
886
887 static void
pci_viona_msix_update(struct pci_devinst * pi,uint64_t offset)888 pci_viona_msix_update(struct pci_devinst *pi, uint64_t offset)
889 {
890 struct pci_viona_softc *sc = pi->pi_arg;
891 uint_t tab_index, i;
892
893 pthread_mutex_lock(&sc->vsc_mtx);
894 if (!sc->vsc_msix_active) {
895 pthread_mutex_unlock(&sc->vsc_mtx);
896 return;
897 }
898
899 /*
900 * Rather than update every possible MSI-X vector, cheat and use the
901 * offset to calculate the entry within the table. Since this should
902 * only be called when a write to the table succeeds, the index should
903 * be valid.
904 */
905 tab_index = offset / MSIX_TABLE_ENTRY_SIZE;
906
907 for (i = 0; i <= VIONA_VQ_TX; i++) {
908 if (sc->vsc_queues[i].vq_msix_idx != tab_index) {
909 continue;
910 }
911 pci_viona_ring_set_msix(pi, i);
912 }
913
914 pthread_mutex_unlock(&sc->vsc_mtx);
915 }
916
917 static void
pci_viona_qnotify(struct pci_viona_softc * sc,int ring)918 pci_viona_qnotify(struct pci_viona_softc *sc, int ring)
919 {
920 int error;
921
922 switch (ring) {
923 case VIONA_TXQ:
924 case VIONA_RXQ:
925 error = ioctl(sc->vsc_vnafd, VNA_IOC_RING_KICK, ring);
926 if (error != 0) {
927 WPRINTF("ioctl viona ring %d kick failed %d",
928 ring, errno);
929 }
930 break;
931 case VIONA_CTLQ: {
932 struct vqueue_info *vq = &sc->vsc_queues[VIONA_CTLQ];
933
934 if (vq_has_descs(vq))
935 pci_viona_process_ctrlq(vq);
936 break;
937 }
938 }
939 }
940
941 static void
pci_viona_baraddr(struct pci_devinst * pi,int baridx,int enabled,uint64_t address)942 pci_viona_baraddr(struct pci_devinst *pi, int baridx, int enabled,
943 uint64_t address)
944 {
945 struct pci_viona_softc *sc = pi->pi_arg;
946 uint64_t ioport;
947 int error;
948
949 if (baridx != 0)
950 return;
951
952 if (enabled == 0) {
953 error = ioctl(sc->vsc_vnafd, VNA_IOC_SET_NOTIFY_IOP, 0);
954 if (error != 0)
955 WPRINTF("uninstall ioport hook failed %d", errno);
956 return;
957 }
958
959 /*
960 * Install ioport hook for virtqueue notification.
961 * This is part of the virtio common configuration area so the
962 * address does not change with MSI-X status.
963 */
964 ioport = address + VIRTIO_PCI_QUEUE_NOTIFY;
965 error = ioctl(sc->vsc_vnafd, VNA_IOC_SET_NOTIFY_IOP, ioport);
966 if (error != 0) {
967 WPRINTF("install ioport hook at %x failed %d",
968 ioport, errno);
969 }
970 }
971
972 static void
pci_viona_write(struct pci_devinst * pi,int baridx,uint64_t offset,int size,uint64_t value)973 pci_viona_write(struct pci_devinst *pi, int baridx, uint64_t offset, int size,
974 uint64_t value)
975 {
976 struct pci_viona_softc *sc = pi->pi_arg;
977 void *ptr;
978 int err = 0;
979
980 if (baridx == pci_msix_table_bar(pi) ||
981 baridx == pci_msix_pba_bar(pi)) {
982 if (pci_emul_msix_twrite(pi, offset, size, value) == 0) {
983 pci_viona_msix_update(pi, offset);
984 }
985 return;
986 }
987
988 assert(baridx == 0);
989
990 if (offset + size > pci_viona_iosize(pi)) {
991 DPRINTF("viona_write: 2big, offset %ld size %d",
992 offset, size);
993 return;
994 }
995
996 pthread_mutex_lock(&sc->vsc_mtx);
997
998 offset = viona_adjust_offset(pi, offset);
999
1000 switch (offset) {
1001 case VIRTIO_PCI_GUEST_FEATURES:
1002 assert(size == 4);
1003 value &= ~(sc->vsc_feature_mask);
1004 err = ioctl(sc->vsc_vnafd, VNA_IOC_SET_FEATURES, &value);
1005 if (err != 0) {
1006 WPRINTF("ioctl feature negotiation returned err = %d",
1007 errno);
1008 } else {
1009 sc->vsc_vs.vs_negotiated_caps = value;
1010 }
1011 break;
1012 case VIRTIO_PCI_QUEUE_PFN:
1013 assert(size == 4);
1014 pci_viona_ring_init(sc, value);
1015 break;
1016 case VIRTIO_PCI_QUEUE_SEL:
1017 assert(size == 2);
1018 assert(value < VIONA_MAXQ);
1019 sc->vsc_vs.vs_curq = value;
1020 break;
1021 case VIRTIO_PCI_QUEUE_NOTIFY:
1022 assert(size == 2);
1023 assert(value < VIONA_MAXQ);
1024 pci_viona_qnotify(sc, value);
1025 break;
1026 case VIRTIO_PCI_STATUS:
1027 assert(size == 1);
1028 pci_viona_update_status(sc, value);
1029 break;
1030 case VIRTIO_MSI_CONFIG_VECTOR:
1031 assert(size == 2);
1032 sc->vsc_vs.vs_msix_cfg_idx = value;
1033 break;
1034 case VIRTIO_MSI_QUEUE_VECTOR:
1035 assert(size == 2);
1036 assert(sc->vsc_vs.vs_curq < VIONA_MAXQ);
1037 sc->vsc_queues[sc->vsc_vs.vs_curq].vq_msix_idx = value;
1038 pci_viona_ring_set_msix(pi, sc->vsc_vs.vs_curq);
1039 break;
1040 case VIONA_R_CFG0:
1041 case VIONA_R_CFG1:
1042 case VIONA_R_CFG2:
1043 case VIONA_R_CFG3:
1044 case VIONA_R_CFG4:
1045 case VIONA_R_CFG5:
1046 assert((size + offset) <= (VIONA_R_CFG5 + 1));
1047 ptr = &sc->vsc_macaddr[offset - VIONA_R_CFG0];
1048 /*
1049 * The driver is allowed to change the MAC address
1050 */
1051 sc->vsc_macaddr[offset - VIONA_R_CFG0] = value;
1052 if (size == 1) {
1053 *(uint8_t *)ptr = value;
1054 } else if (size == 2) {
1055 *(uint16_t *)ptr = value;
1056 } else {
1057 *(uint32_t *)ptr = value;
1058 }
1059 break;
1060 case VIRTIO_PCI_HOST_FEATURES:
1061 case VIRTIO_PCI_QUEUE_NUM:
1062 case VIRTIO_PCI_ISR:
1063 case VIONA_R_CFG6:
1064 case VIONA_R_CFG7:
1065 DPRINTF("viona: write to readonly reg %ld", offset);
1066 break;
1067 default:
1068 DPRINTF("viona: unknown i/o write offset %ld", offset);
1069 value = 0;
1070 break;
1071 }
1072
1073 pthread_mutex_unlock(&sc->vsc_mtx);
1074 }
1075
1076 static uint64_t
pci_viona_read(struct pci_devinst * pi,int baridx,uint64_t offset,int size)1077 pci_viona_read(struct pci_devinst *pi, int baridx, uint64_t offset, int size)
1078 {
1079 struct pci_viona_softc *sc = pi->pi_arg;
1080 void *ptr;
1081 uint64_t value;
1082 int err = 0;
1083
1084 if (baridx == pci_msix_table_bar(pi) ||
1085 baridx == pci_msix_pba_bar(pi)) {
1086 return (pci_emul_msix_tread(pi, offset, size));
1087 }
1088
1089 assert(baridx == 0);
1090
1091 if (offset + size > pci_viona_iosize(pi)) {
1092 DPRINTF("viona_read: 2big, offset %ld size %d",
1093 offset, size);
1094 return (0);
1095 }
1096
1097 pthread_mutex_lock(&sc->vsc_mtx);
1098
1099 offset = viona_adjust_offset(pi, offset);
1100
1101 switch (offset) {
1102 case VIRTIO_PCI_HOST_FEATURES:
1103 assert(size == 4);
1104 err = ioctl(sc->vsc_vnafd, VNA_IOC_GET_FEATURES, &value);
1105 if (err != 0) {
1106 WPRINTF("ioctl get host features returned err = %d",
1107 errno);
1108 }
1109 value |= VIONA_S_HOSTCAPS_USERSPACE;
1110 value &= ~sc->vsc_feature_mask;
1111 sc->vsc_consts.vc_hv_caps = value;
1112 break;
1113 case VIRTIO_PCI_GUEST_FEATURES:
1114 assert(size == 4);
1115 value = sc->vsc_vs.vs_negotiated_caps; /* XXX never read ? */
1116 break;
1117 case VIRTIO_PCI_QUEUE_PFN:
1118 assert(size == 4);
1119 value = sc->vsc_queues[sc->vsc_vs.vs_curq].vq_pfn >> VRING_PFN;
1120 break;
1121 case VIRTIO_PCI_QUEUE_NUM:
1122 assert(size == 2);
1123 value = pci_viona_qsize(sc, sc->vsc_vs.vs_curq);
1124 break;
1125 case VIRTIO_PCI_QUEUE_SEL:
1126 assert(size == 2);
1127 value = sc->vsc_vs.vs_curq; /* XXX never read ? */
1128 break;
1129 case VIRTIO_PCI_QUEUE_NOTIFY:
1130 assert(size == 2);
1131 value = sc->vsc_vs.vs_curq; /* XXX never read ? */
1132 break;
1133 case VIRTIO_PCI_STATUS:
1134 assert(size == 1);
1135 value = sc->vsc_vs.vs_status;
1136 break;
1137 case VIRTIO_PCI_ISR:
1138 assert(size == 1);
1139 value = sc->vsc_vs.vs_isr;
1140 sc->vsc_vs.vs_isr = 0; /* a read clears this flag */
1141 if (value != 0) {
1142 pci_lintr_deassert(pi);
1143 }
1144 break;
1145 case VIRTIO_MSI_CONFIG_VECTOR:
1146 assert(size == 2);
1147 value = sc->vsc_vs.vs_msix_cfg_idx;
1148 break;
1149 case VIRTIO_MSI_QUEUE_VECTOR:
1150 assert(size == 2);
1151 assert(sc->vsc_vs.vs_curq < VIONA_MAXQ);
1152 value = sc->vsc_queues[sc->vsc_vs.vs_curq].vq_msix_idx;
1153 break;
1154 case VIONA_R_CFG0:
1155 case VIONA_R_CFG1:
1156 case VIONA_R_CFG2:
1157 case VIONA_R_CFG3:
1158 case VIONA_R_CFG4:
1159 case VIONA_R_CFG5:
1160 assert((size + offset) <= (VIONA_R_CFG5 + 1));
1161 ptr = &sc->vsc_macaddr[offset - VIONA_R_CFG0];
1162 if (size == 1) {
1163 value = *(uint8_t *)ptr;
1164 } else if (size == 2) {
1165 value = *(uint16_t *)ptr;
1166 } else {
1167 value = *(uint32_t *)ptr;
1168 }
1169 break;
1170 case VIONA_R_CFG6:
1171 assert(size != 4);
1172 value = 0x01; /* XXX link always up */
1173 break;
1174 case VIONA_R_CFG7:
1175 assert(size == 1);
1176 value = 0; /* XXX link status in LSB */
1177 break;
1178 default:
1179 DPRINTF("viona: unknown i/o read offset %ld", offset);
1180 value = 0;
1181 break;
1182 }
1183
1184 pthread_mutex_unlock(&sc->vsc_mtx);
1185
1186 return (value);
1187 }
1188
1189 struct pci_devemu pci_de_viona = {
1190 .pe_emu = "virtio-net-viona",
1191 .pe_init = pci_viona_init,
1192 .pe_legacy_config = pci_viona_legacy_config,
1193 .pe_barwrite = pci_viona_write,
1194 .pe_barread = pci_viona_read,
1195 .pe_baraddr = pci_viona_baraddr,
1196 .pe_lintrupdate = pci_viona_lintrupdate
1197 };
1198 PCI_EMUL_SET(pci_de_viona);
1199