1 /*
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26 /*
27 * This file and its contents are supplied under the terms of the
28 * Common Development and Distribution License ("CDDL"), version 1.0.
29 * You may only use this file in accordance with the terms of version
30 * 1.0 of the CDDL.
31 *
32 * A full copy of the text of the CDDL should have accompanied this
33 * source. A copy of the CDDL is also available via the Internet at
34 * http://www.illumos.org/license/CDDL.
35 *
36 * Copyright 2015 Pluribus Networks Inc.
37 * Copyright 2019 Joyent, Inc.
38 * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
39 */
40
41
42 #include <sys/param.h>
43 #include <sys/linker_set.h>
44 #include <sys/ioctl.h>
45 #include <sys/uio.h>
46 #include <sys/viona_io.h>
47
48 #include <errno.h>
49 #include <fcntl.h>
50 #include <stdio.h>
51 #include <stdlib.h>
52 #include <stdint.h>
53 #include <string.h>
54 #include <strings.h>
55 #include <unistd.h>
56 #include <assert.h>
57 #include <pthread.h>
58 #include <signal.h>
59 #include <stdbool.h>
60 #include <poll.h>
61 #include <libdladm.h>
62 #include <libdllink.h>
63 #include <libdlvnic.h>
64
65 #include <machine/vmm.h>
66 #include <vmmapi.h>
67
68 #include "bhyverun.h"
69 #include "config.h"
70 #include "debug.h"
71 #include "pci_emul.h"
72 #include "virtio.h"
73 #include "iov.h"
74 #include "virtio_net.h"
75
76 #define VIONA_RINGSZ 1024
77 #define VIONA_CTLQ_SIZE 64
78 #define VIONA_CTLQ_MAXSEGS 32
79
80 /*
81 * PCI config-space register offsets
82 */
83 #define VIONA_R_CFG0 24
84 #define VIONA_R_CFG1 25
85 #define VIONA_R_CFG2 26
86 #define VIONA_R_CFG3 27
87 #define VIONA_R_CFG4 28
88 #define VIONA_R_CFG5 29
89 #define VIONA_R_CFG6 30
90 #define VIONA_R_CFG7 31
91 #define VIONA_R_MAX 31
92
93 #define VIONA_REGSZ (VIONA_R_MAX + 1)
94
95 /*
96 * Queue definitions.
97 */
98 #define VIONA_RXQ 0
99 #define VIONA_TXQ 1
100 #define VIONA_CTLQ 2
101
102 #define VIONA_MAXQ 3
103
104 /*
105 * Supplementary host capabilities provided in the userspace component.
106 */
107 #define VIONA_S_HOSTCAPS_USERSPACE ( \
108 VIRTIO_NET_F_CTRL_VQ | \
109 VIRTIO_NET_F_CTRL_RX)
110
111 /*
112 * Debug printf
113 */
114 static volatile int pci_viona_debug;
115 #define DPRINTF(fmt, arg...) \
116 do { \
117 if (pci_viona_debug) { \
118 FPRINTLN(stdout, fmt, ##arg); \
119 fflush(stdout); \
120 } \
121 } while (0)
122 #define WPRINTF(fmt, arg...) FPRINTLN(stderr, fmt, ##arg)
123
124 /*
125 * Per-device softc
126 */
127 struct pci_viona_softc {
128 struct virtio_softc vsc_vs;
129 struct virtio_consts vsc_consts;
130 struct vqueue_info vsc_queues[VIONA_MAXQ];
131 pthread_mutex_t vsc_mtx;
132
133 datalink_id_t vsc_linkid;
134 int vsc_vnafd;
135
136 /* Configurable parameters */
137 char vsc_linkname[MAXLINKNAMELEN];
138 uint32_t vsc_feature_mask;
139 uint16_t vsc_vq_size;
140
141 uint8_t vsc_macaddr[6];
142
143 bool vsc_resetting;
144 bool vsc_msix_active;
145
146 viona_promisc_t vsc_promisc; /* Current promisc mode */
147 bool vsc_promisc_promisc; /* PROMISC enabled */
148 bool vsc_promisc_allmulti; /* ALLMULTI enabled */
149 bool vsc_promisc_umac; /* unicast MACs sent */
150 bool vsc_promisc_mmac; /* multicast MACs sent */
151 };
152
153 static struct virtio_consts viona_vi_consts = {
154 .vc_name = "viona",
155 .vc_nvq = VIONA_MAXQ,
156 /*
157 * We use the common bhyve virtio framework so that we can call
158 * the utility functions to work with the queues handled in userspace.
159 * The framework PCI read/write functions are not used so these
160 * callbacks will not be invoked.
161 */
162 .vc_cfgsize = 0,
163 .vc_reset = NULL,
164 .vc_qnotify = NULL,
165 .vc_cfgread = NULL,
166 .vc_cfgwrite = NULL,
167 .vc_apply_features = NULL,
168 /*
169 * The following field is populated using the response from the
170 * viona driver during initialisation, augmented with the additional
171 * capabilities emulated in userspace.
172 */
173 .vc_hv_caps = 0,
174 };
175
176 /*
177 * Return the size of IO BAR that maps virtio header and device specific
178 * region. The size would vary depending on whether MSI-X is enabled or
179 * not.
180 */
181 static uint64_t
pci_viona_iosize(struct pci_devinst * pi)182 pci_viona_iosize(struct pci_devinst *pi)
183 {
184 if (pci_msix_enabled(pi)) {
185 return (VIONA_REGSZ);
186 } else {
187 return (VIONA_REGSZ -
188 (VIRTIO_PCI_CONFIG_OFF(1) - VIRTIO_PCI_CONFIG_OFF(0)));
189 }
190 }
191
192 static uint16_t
pci_viona_qsize(struct pci_viona_softc * sc,int qnum)193 pci_viona_qsize(struct pci_viona_softc *sc, int qnum)
194 {
195 if (qnum == VIONA_CTLQ)
196 return (VIONA_CTLQ_SIZE);
197
198 return (sc->vsc_vq_size);
199 }
200
201 static void
pci_viona_ring_reset(struct pci_viona_softc * sc,int ring)202 pci_viona_ring_reset(struct pci_viona_softc *sc, int ring)
203 {
204 assert(ring < VIONA_MAXQ);
205
206 switch (ring) {
207 case VIONA_RXQ:
208 case VIONA_TXQ:
209 break;
210 case VIONA_CTLQ:
211 default:
212 return;
213 }
214
215 for (;;) {
216 int res;
217
218 res = ioctl(sc->vsc_vnafd, VNA_IOC_RING_RESET, ring);
219 if (res == 0) {
220 break;
221 } else if (errno != EINTR) {
222 WPRINTF("ioctl viona ring %d reset failed %d",
223 ring, errno);
224 return;
225 }
226 }
227 }
228
229 static void
pci_viona_update_status(struct pci_viona_softc * sc,uint32_t value)230 pci_viona_update_status(struct pci_viona_softc *sc, uint32_t value)
231 {
232
233 if (value == 0) {
234 DPRINTF("viona: device reset requested !");
235
236 vi_reset_dev(&sc->vsc_vs);
237 pci_viona_ring_reset(sc, VIONA_RXQ);
238 pci_viona_ring_reset(sc, VIONA_TXQ);
239 }
240
241 sc->vsc_vs.vs_status = value;
242 }
243
244 static const char *
pci_viona_promisc_descr(viona_promisc_t mode)245 pci_viona_promisc_descr(viona_promisc_t mode)
246 {
247 switch (mode) {
248 case VIONA_PROMISC_NONE:
249 return ("none");
250 case VIONA_PROMISC_MULTI:
251 return ("multicast");
252 case VIONA_PROMISC_ALL:
253 return ("all");
254 default:
255 abort();
256 }
257 }
258
259 static int
pci_viona_eval_promisc(struct pci_viona_softc * sc)260 pci_viona_eval_promisc(struct pci_viona_softc *sc)
261 {
262 viona_promisc_t mode = VIONA_PROMISC_NONE;
263 int err = 0;
264
265 /*
266 * If the guest has explicitly requested promiscuous mode or has sent a
267 * non-empty unicast MAC address table, then set viona to promiscuous
268 * mode. Otherwise, if the guest has explicitly requested multicast
269 * promiscuity or has sent a non-empty multicast MAC address table,
270 * then set viona to multicast promiscuous mode.
271 */
272 if (sc->vsc_promisc_promisc || sc->vsc_promisc_umac)
273 mode = VIONA_PROMISC_ALL;
274 else if (sc->vsc_promisc_allmulti || sc->vsc_promisc_mmac)
275 mode = VIONA_PROMISC_MULTI;
276
277 if (mode != sc->vsc_promisc) {
278 DPRINTF("viona: setting promiscuous mode to %d (%s)",
279 mode, pci_viona_promisc_descr(mode));
280 DPRINTF(" promisc=%u, umac=%u, allmulti=%u, mmac=%u",
281 sc->vsc_promisc_promisc, sc->vsc_promisc_umac,
282 sc->vsc_promisc_allmulti, sc->vsc_promisc_mmac);
283
284 err = ioctl(sc->vsc_vnafd, VNA_IOC_SET_PROMISC, mode);
285 if (err == 0)
286 sc->vsc_promisc = mode;
287 else
288 WPRINTF("ioctl viona set promisc failed %d", errno);
289 }
290
291 return (err);
292 }
293
294 static uint8_t
pci_viona_control_rx(struct vqueue_info * vq,const virtio_net_ctrl_hdr_t * hdr,struct iovec * iov,size_t niov)295 pci_viona_control_rx(struct vqueue_info *vq, const virtio_net_ctrl_hdr_t *hdr,
296 struct iovec *iov, size_t niov)
297 {
298 struct pci_viona_softc *sc = (struct pci_viona_softc *)vq->vq_vs;
299 uint8_t v;
300
301 if (iov[0].iov_len != sizeof (uint8_t) || niov != 1) {
302 EPRINTLN("viona: bad control RX data");
303 return (VIRTIO_NET_CQ_ERR);
304 }
305
306 v = *(uint8_t *)iov[0].iov_base;
307
308 switch (hdr->vnch_command) {
309 case VIRTIO_NET_CTRL_RX_PROMISC:
310 DPRINTF("viona: ctrl RX promisc %d", v);
311 sc->vsc_promisc_promisc = (v != 0);
312 break;
313 case VIRTIO_NET_CTRL_RX_ALLMULTI:
314 DPRINTF("viona: ctrl RX allmulti %d", v);
315 sc->vsc_promisc_allmulti = (v != 0);
316 break;
317 default:
318 /*
319 * VIRTIO_NET_F_CTRL_RX_EXTRA was not offered so no other
320 * commands are expected.
321 */
322 EPRINTLN("viona: unrecognised RX control cmd %u",
323 hdr->vnch_command);
324 return (VIRTIO_NET_CQ_ERR);
325 }
326
327 if (pci_viona_eval_promisc(sc) == 0)
328 return (VIRTIO_NET_CQ_OK);
329 return (VIRTIO_NET_CQ_ERR);
330 }
331
332 static void
pci_viona_control_mac_dump(const char * tag,const struct iovec * iov)333 pci_viona_control_mac_dump(const char *tag, const struct iovec *iov)
334 {
335 virtio_net_ctrl_mac_t *table = (virtio_net_ctrl_mac_t *)iov->iov_base;
336 ether_addr_t *mac = &table->vncm_mac;
337
338 DPRINTF("-- %s MAC TABLE (entries: %u)", tag, table->vncm_entries);
339
340 if (table->vncm_entries * ETHERADDRL !=
341 iov->iov_len - sizeof (table->vncm_entries)) {
342 DPRINTF(" Bad table size %u", iov->iov_len);
343 return;
344 }
345
346 for (uint32_t i = 0; i < table->vncm_entries; i++) {
347 DPRINTF(" [%2d] %s", i, ether_ntoa((struct ether_addr *)mac));
348 mac++;
349 }
350 }
351
352 static uint8_t
pci_viona_control_mac(struct vqueue_info * vq,const virtio_net_ctrl_hdr_t * hdr,struct iovec * iov,size_t niov)353 pci_viona_control_mac(struct vqueue_info *vq, const virtio_net_ctrl_hdr_t *hdr,
354 struct iovec *iov, size_t niov)
355 {
356 struct pci_viona_softc *sc = (struct pci_viona_softc *)vq->vq_vs;
357
358 switch (hdr->vnch_command) {
359 case VIRTIO_NET_CTRL_MAC_TABLE_SET: {
360 virtio_net_ctrl_mac_t *table;
361
362 DPRINTF("viona: ctrl MAC table set");
363
364 if (niov != 2) {
365 EPRINTLN("viona: bad control MAC data");
366 return (VIRTIO_NET_CQ_ERR);
367 }
368
369 /*
370 * We advertise VIRTIO_NET_F_CTRL_RX and therefore need to
371 * accept VIRTIO_NET_CTRL_MAC, but we don't support passing
372 * changes in the MAC address lists down to viona.
373 * Instead, we set flags to indicate if the guest has sent
374 * any MAC addresses for each table, and use these to determine
375 * the resulting promiscuous mode, see pci_viona_eval_promisc()
376 * above.
377 */
378
379 /* Unicast MAC table */
380 table = (virtio_net_ctrl_mac_t *)iov[0].iov_base;
381 sc->vsc_promisc_umac = (table->vncm_entries != 0);
382 if (pci_viona_debug)
383 pci_viona_control_mac_dump("UNICAST", &iov[0]);
384
385 /* Multicast MAC table */
386 table = (virtio_net_ctrl_mac_t *)iov[1].iov_base;
387 sc->vsc_promisc_mmac = (table->vncm_entries != 0);
388 if (pci_viona_debug)
389 pci_viona_control_mac_dump("MULTICAST", &iov[1]);
390
391 break;
392 }
393 case VIRTIO_NET_CTRL_MAC_ADDR_SET:
394 /* disallow setting the primary filter MAC address */
395 DPRINTF("viona: ctrl MAC addr set %d", niov);
396 return (VIRTIO_NET_CQ_ERR);
397 default:
398 EPRINTLN("viona: unrecognised MAC control cmd %u",
399 hdr->vnch_command);
400 return (VIRTIO_NET_CQ_ERR);
401 }
402
403 if (pci_viona_eval_promisc(sc) == 0)
404 return (VIRTIO_NET_CQ_OK);
405 return (VIRTIO_NET_CQ_ERR);
406 }
407
408 static void
pci_viona_control(struct vqueue_info * vq)409 pci_viona_control(struct vqueue_info *vq)
410 {
411 struct iovec iov[VIONA_CTLQ_MAXSEGS + 1];
412 const virtio_net_ctrl_hdr_t *hdr;
413 struct iovec *siov = iov;
414 struct vi_req req = { 0 };
415 uint8_t *ackp;
416 size_t nsiov;
417 uint32_t len;
418 int n;
419
420 n = vq_getchain(vq, iov, VIONA_CTLQ_MAXSEGS, &req);
421
422 assert(n >= 1 && n <= VIONA_CTLQ_MAXSEGS);
423
424 /*
425 * Since we have not negotiated VIRTIO_F_ANY_LAYOUT, we expect the
426 * control message to be laid out in at least three descriptors as
427 * follows:
428 * header - sizeof (virtio_net_ctrl_hdr_t)
429 * data[] - at least one descriptor, varying size
430 * ack - uint8_t, flagged as writable
431 * Check the incoming message to make sure it matches this layout and
432 * drop the entire chain if not.
433 */
434 if (n < 3 || req.writable != 1 || req.readable + 1 != n ||
435 iov[req.readable].iov_len != sizeof (uint8_t)) {
436 EPRINTLN("viona: bad control chain, len=%d, w=%d, r=%d",
437 n, req.writable, req.readable);
438 goto drop;
439 }
440
441 hdr = (const virtio_net_ctrl_hdr_t *)iov[0].iov_base;
442 if (iov[0].iov_len < sizeof (virtio_net_ctrl_hdr_t)) {
443 EPRINTLN("viona: control header too short: %u", iov[0].iov_len);
444 goto drop;
445 }
446
447 /*
448 * Writable iovecs start at iov[req.readable], and we've already
449 * checked that there is only one writable, it's at the end, and the
450 * right size; it's the acknowledgement byte.
451 */
452 ackp = (uint8_t *)iov[req.readable].iov_base;
453
454 siov = &iov[1];
455 nsiov = n - 2;
456
457 switch (hdr->vnch_class) {
458 case VIRTIO_NET_CTRL_RX:
459 *ackp = pci_viona_control_rx(vq, hdr, siov, nsiov);
460 break;
461 case VIRTIO_NET_CTRL_MAC:
462 *ackp = pci_viona_control_mac(vq, hdr, siov, nsiov);
463 break;
464 default:
465 EPRINTLN("viona: unrecognised control class %u, cmd %u",
466 hdr->vnch_class, hdr->vnch_command);
467 *ackp = VIRTIO_NET_CQ_ERR;
468 break;
469 }
470
471 drop:
472 len = 0;
473 for (uint_t i = 0; i < n; i++)
474 len += iov[i].iov_len;
475
476 vq_relchain(vq, req.idx, len);
477 }
478
479 static void
pci_viona_process_ctrlq(struct vqueue_info * vq)480 pci_viona_process_ctrlq(struct vqueue_info *vq)
481 {
482 for (;;) {
483 vq_kick_disable(vq);
484
485 while (vq_has_descs(vq))
486 pci_viona_control(vq);
487
488 vq_kick_enable(vq);
489
490 /*
491 * One more check in case a late addition raced with
492 * re-enabling kicks. Note that vq_kick_enable() includes a
493 * memory barrier.
494 */
495
496 if (!vq_has_descs(vq))
497 break;
498 }
499
500 vq_endchains(vq, /* used_all_avail= */1);
501 }
502
503 static void *
pci_viona_poll_thread(void * param)504 pci_viona_poll_thread(void *param)
505 {
506 struct pci_viona_softc *sc = param;
507 pollfd_t pollset;
508 const int fd = sc->vsc_vnafd;
509
510 pollset.fd = fd;
511 pollset.events = POLLRDBAND;
512
513 for (;;) {
514 if (poll(&pollset, 1, -1) < 0) {
515 if (errno == EINTR || errno == EAGAIN) {
516 continue;
517 } else {
518 WPRINTF("pci_viona_poll_thread poll() error %d",
519 errno);
520 break;
521 }
522 }
523 if (pollset.revents & POLLRDBAND) {
524 vioc_intr_poll_t vip;
525 uint_t i;
526 int res;
527 bool assert_lintr = false;
528 const bool do_msix = pci_msix_enabled(sc->vsc_vs.vs_pi);
529
530 res = ioctl(fd, VNA_IOC_INTR_POLL, &vip);
531 for (i = 0; res > 0 && i < VIONA_VQ_MAX; i++) {
532 if (vip.vip_status[i] == 0) {
533 continue;
534 }
535 if (do_msix) {
536 pci_generate_msix(sc->vsc_vs.vs_pi,
537 sc->vsc_queues[i].vq_msix_idx);
538 } else {
539 assert_lintr = true;
540 }
541 res = ioctl(fd, VNA_IOC_RING_INTR_CLR, i);
542 if (res != 0) {
543 WPRINTF("ioctl viona vq %d intr "
544 "clear failed %d", i, errno);
545 }
546 }
547 if (assert_lintr) {
548 pthread_mutex_lock(&sc->vsc_mtx);
549 sc->vsc_vs.vs_isr |= VIRTIO_PCI_ISR_INTR;
550 pci_lintr_assert(sc->vsc_vs.vs_pi);
551 pthread_mutex_unlock(&sc->vsc_mtx);
552 }
553 }
554 }
555
556 pthread_exit(NULL);
557 }
558
559 static void
pci_viona_ring_init(struct pci_viona_softc * sc,uint64_t pfn)560 pci_viona_ring_init(struct pci_viona_softc *sc, uint64_t pfn)
561 {
562 int qnum = sc->vsc_vs.vs_curq;
563 vioc_ring_init_t vna_ri;
564 int error;
565
566 assert(qnum < VIONA_MAXQ);
567
568 if (qnum == VIONA_CTLQ) {
569 vi_vq_init(&sc->vsc_vs, pfn);
570 return;
571 }
572
573 sc->vsc_queues[qnum].vq_pfn = (pfn << VRING_PFN);
574 vna_ri.ri_index = qnum;
575 vna_ri.ri_qsize = pci_viona_qsize(sc, qnum);
576 vna_ri.ri_qaddr = (pfn << VRING_PFN);
577 error = ioctl(sc->vsc_vnafd, VNA_IOC_RING_INIT, &vna_ri);
578
579 if (error != 0) {
580 WPRINTF("ioctl viona ring %u init failed %d", qnum, errno);
581 }
582 }
583
584 static int
pci_viona_viona_init(struct vmctx * ctx,struct pci_viona_softc * sc)585 pci_viona_viona_init(struct vmctx *ctx, struct pci_viona_softc *sc)
586 {
587 vioc_create_t vna_create;
588 int error;
589
590 sc->vsc_vnafd = open("/dev/viona", O_RDWR | O_EXCL);
591 if (sc->vsc_vnafd == -1) {
592 WPRINTF("open viona ctl failed: %d", errno);
593 return (-1);
594 }
595
596 vna_create.c_linkid = sc->vsc_linkid;
597 vna_create.c_vmfd = vm_get_device_fd(ctx);
598 error = ioctl(sc->vsc_vnafd, VNA_IOC_CREATE, &vna_create);
599 if (error != 0) {
600 (void) close(sc->vsc_vnafd);
601 WPRINTF("ioctl viona create failed %d", errno);
602 return (-1);
603 }
604
605 return (0);
606 }
607
608 static int
pci_viona_legacy_config(nvlist_t * nvl,const char * opt)609 pci_viona_legacy_config(nvlist_t *nvl, const char *opt)
610 {
611 char *config, *name, *tofree, *value;
612
613 if (opt == NULL)
614 return (0);
615
616 config = tofree = strdup(opt);
617 while ((name = strsep(&config, ",")) != NULL) {
618 value = strchr(name, '=');
619 if (value != NULL) {
620 *value++ = '\0';
621 set_config_value_node(nvl, name, value);
622 } else {
623 set_config_value_node(nvl, "vnic", name);
624 }
625 }
626 free(tofree);
627 return (0);
628 }
629
630 static int
pci_viona_parse_opts(struct pci_viona_softc * sc,nvlist_t * nvl)631 pci_viona_parse_opts(struct pci_viona_softc *sc, nvlist_t *nvl)
632 {
633 const char *value;
634 int err = 0;
635
636 sc->vsc_vq_size = VIONA_RINGSZ;
637 sc->vsc_feature_mask = 0;
638 sc->vsc_linkname[0] = '\0';
639
640 value = get_config_value_node(nvl, "feature_mask");
641 if (value != NULL) {
642 long num;
643
644 errno = 0;
645 num = strtol(value, NULL, 0);
646 if (errno != 0 || num < 0) {
647 fprintf(stderr,
648 "viona: invalid mask '%s'", value);
649 } else {
650 sc->vsc_feature_mask = num;
651 }
652 }
653
654 value = get_config_value_node(nvl, "vqsize");
655 if (value != NULL) {
656 long num;
657
658 errno = 0;
659 num = strtol(value, NULL, 0);
660 if (errno != 0) {
661 fprintf(stderr,
662 "viona: invalid vsqize '%s'", value);
663 err = -1;
664 } else if (num <= 2 || num > 32768) {
665 fprintf(stderr,
666 "viona: vqsize out of range", num);
667 err = -1;
668 } else if ((1 << (ffs(num) - 1)) != num) {
669 fprintf(stderr,
670 "viona: vqsize must be power of 2", num);
671 err = -1;
672 } else {
673 sc->vsc_vq_size = num;
674 }
675 }
676
677 value = get_config_value_node(nvl, "vnic");
678 if (value == NULL) {
679 fprintf(stderr, "viona: vnic name required");
680 err = -1;
681 } else {
682 (void) strlcpy(sc->vsc_linkname, value, MAXLINKNAMELEN);
683 }
684
685 DPRINTF("viona=%p dev=%s vqsize=%x feature_mask=%x", sc,
686 sc->vsc_linkname, sc->vsc_vq_size, sc->vsc_feature_mask);
687 return (err);
688 }
689
690 static int
pci_viona_init(struct pci_devinst * pi,nvlist_t * nvl)691 pci_viona_init(struct pci_devinst *pi, nvlist_t *nvl)
692 {
693 dladm_handle_t handle;
694 dladm_status_t status;
695 dladm_vnic_attr_t attr;
696 char errmsg[DLADM_STRSIZE];
697 char tname[MAXCOMLEN + 1];
698 int error, i;
699 struct pci_viona_softc *sc;
700 const char *vnic;
701 pthread_t tid;
702
703 if (get_config_bool_default("viona.debug", false))
704 pci_viona_debug = 1;
705
706 vnic = get_config_value_node(nvl, "vnic");
707 if (vnic == NULL) {
708 WPRINTF("virtio-viona: vnic required");
709 return (1);
710 }
711
712 sc = malloc(sizeof (struct pci_viona_softc));
713 memset(sc, 0, sizeof (struct pci_viona_softc));
714
715 if (pci_viona_parse_opts(sc, nvl) != 0) {
716 free(sc);
717 return (1);
718 }
719
720 if ((status = dladm_open(&handle)) != DLADM_STATUS_OK) {
721 WPRINTF("could not open /dev/dld");
722 free(sc);
723 return (1);
724 }
725
726 if ((status = dladm_name2info(handle, sc->vsc_linkname, &sc->vsc_linkid,
727 NULL, NULL, NULL)) != DLADM_STATUS_OK) {
728 WPRINTF("dladm_name2info() for %s failed: %s", vnic,
729 dladm_status2str(status, errmsg));
730 dladm_close(handle);
731 free(sc);
732 return (1);
733 }
734
735 if ((status = dladm_vnic_info(handle, sc->vsc_linkid, &attr,
736 DLADM_OPT_ACTIVE)) != DLADM_STATUS_OK) {
737 WPRINTF("dladm_vnic_info() for %s failed: %s", vnic,
738 dladm_status2str(status, errmsg));
739 dladm_close(handle);
740 free(sc);
741 return (1);
742 }
743
744 memcpy(sc->vsc_macaddr, attr.va_mac_addr, ETHERADDRL);
745
746 dladm_close(handle);
747
748 error = pci_viona_viona_init(pi->pi_vmctx, sc);
749 if (error != 0) {
750 free(sc);
751 return (1);
752 }
753
754 error = pthread_create(&tid, NULL, pci_viona_poll_thread, sc);
755 assert(error == 0);
756 snprintf(tname, sizeof (tname), "vionapoll:%s", vnic);
757 pthread_set_name_np(tid, tname);
758
759 /* initialize config space */
760 pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET);
761 pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
762 pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK);
763 pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_ID_NETWORK);
764 pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
765
766 sc->vsc_consts = viona_vi_consts;
767 pthread_mutex_init(&sc->vsc_mtx, NULL);
768
769 /*
770 * The RX and TX queues are handled in the kernel component of
771 * viona; however The control queue is emulated in userspace.
772 */
773 sc->vsc_queues[VIONA_CTLQ].vq_qsize = pci_viona_qsize(sc, VIONA_CTLQ);
774
775 vi_softc_linkup(&sc->vsc_vs, &sc->vsc_consts, sc, pi, sc->vsc_queues);
776 sc->vsc_vs.vs_mtx = &sc->vsc_mtx;
777
778 /*
779 * Guests that do not support CTRL_RX_MAC still generally need to
780 * receive multicast packets. Guests that do support this feature will
781 * end up setting this flag indirectly via messages on the control
782 * queue but it does not hurt to default to multicast promiscuity here
783 * and it is what older version of viona did.
784 */
785 sc->vsc_promisc_mmac = true;
786 pci_viona_eval_promisc(sc);
787
788 /* MSI-X support */
789 for (i = 0; i < VIONA_MAXQ; i++)
790 sc->vsc_queues[i].vq_msix_idx = VIRTIO_MSI_NO_VECTOR;
791
792 /* BAR 1 used to map MSI-X table and PBA */
793 if (pci_emul_add_msixcap(pi, VIONA_MAXQ, 1)) {
794 free(sc);
795 return (1);
796 }
797
798 /* BAR 0 for legacy-style virtio register access. */
799 error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, VIONA_REGSZ);
800 if (error != 0) {
801 WPRINTF("could not allocate virtio BAR");
802 free(sc);
803 return (1);
804 }
805
806 /*
807 * Need a legacy interrupt for virtio compliance, even though MSI-X
808 * operation is _strongly_ suggested for adequate performance.
809 */
810 pci_lintr_request(pi);
811
812 return (0);
813 }
814
815 static uint64_t
viona_adjust_offset(struct pci_devinst * pi,uint64_t offset)816 viona_adjust_offset(struct pci_devinst *pi, uint64_t offset)
817 {
818 /*
819 * Device specific offsets used by guest would change based on
820 * whether MSI-X capability is enabled or not
821 */
822 if (!pci_msix_enabled(pi)) {
823 if (offset >= VIRTIO_PCI_CONFIG_OFF(0)) {
824 return (offset + (VIRTIO_PCI_CONFIG_OFF(1) -
825 VIRTIO_PCI_CONFIG_OFF(0)));
826 }
827 }
828
829 return (offset);
830 }
831
832 static void
pci_viona_ring_set_msix(struct pci_devinst * pi,uint_t ring)833 pci_viona_ring_set_msix(struct pci_devinst *pi, uint_t ring)
834 {
835 struct pci_viona_softc *sc = pi->pi_arg;
836 struct msix_table_entry mte;
837 uint16_t tab_index;
838 vioc_ring_msi_t vrm;
839 int res;
840
841 if (ring == VIONA_CTLQ)
842 return;
843
844 assert(ring <= VIONA_VQ_TX);
845
846 vrm.rm_index = ring;
847 vrm.rm_addr = 0;
848 vrm.rm_msg = 0;
849 tab_index = sc->vsc_queues[ring].vq_msix_idx;
850
851 if (tab_index != VIRTIO_MSI_NO_VECTOR && sc->vsc_msix_active) {
852 mte = pi->pi_msix.table[tab_index];
853 if ((mte.vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
854 vrm.rm_addr = mte.addr;
855 vrm.rm_msg = mte.msg_data;
856 }
857 }
858
859 res = ioctl(sc->vsc_vnafd, VNA_IOC_RING_SET_MSI, &vrm);
860 if (res != 0) {
861 WPRINTF("ioctl viona set_msi %d failed %d", ring, errno);
862 }
863 }
864
865 static void
pci_viona_lintrupdate(struct pci_devinst * pi)866 pci_viona_lintrupdate(struct pci_devinst *pi)
867 {
868 struct pci_viona_softc *sc = pi->pi_arg;
869 bool msix_on = false;
870
871 pthread_mutex_lock(&sc->vsc_mtx);
872 msix_on = pci_msix_enabled(pi) && (pi->pi_msix.function_mask == 0);
873 if ((sc->vsc_msix_active && !msix_on) ||
874 (msix_on && !sc->vsc_msix_active)) {
875 uint_t i;
876
877 sc->vsc_msix_active = msix_on;
878 /* Update in-kernel ring configs */
879 for (i = 0; i <= VIONA_VQ_TX; i++) {
880 pci_viona_ring_set_msix(pi, i);
881 }
882 }
883 pthread_mutex_unlock(&sc->vsc_mtx);
884 }
885
886 static void
pci_viona_msix_update(struct pci_devinst * pi,uint64_t offset)887 pci_viona_msix_update(struct pci_devinst *pi, uint64_t offset)
888 {
889 struct pci_viona_softc *sc = pi->pi_arg;
890 uint_t tab_index, i;
891
892 pthread_mutex_lock(&sc->vsc_mtx);
893 if (!sc->vsc_msix_active) {
894 pthread_mutex_unlock(&sc->vsc_mtx);
895 return;
896 }
897
898 /*
899 * Rather than update every possible MSI-X vector, cheat and use the
900 * offset to calculate the entry within the table. Since this should
901 * only be called when a write to the table succeeds, the index should
902 * be valid.
903 */
904 tab_index = offset / MSIX_TABLE_ENTRY_SIZE;
905
906 for (i = 0; i <= VIONA_VQ_TX; i++) {
907 if (sc->vsc_queues[i].vq_msix_idx != tab_index) {
908 continue;
909 }
910 pci_viona_ring_set_msix(pi, i);
911 }
912
913 pthread_mutex_unlock(&sc->vsc_mtx);
914 }
915
916 static void
pci_viona_qnotify(struct pci_viona_softc * sc,int ring)917 pci_viona_qnotify(struct pci_viona_softc *sc, int ring)
918 {
919 int error;
920
921 switch (ring) {
922 case VIONA_TXQ:
923 case VIONA_RXQ:
924 error = ioctl(sc->vsc_vnafd, VNA_IOC_RING_KICK, ring);
925 if (error != 0) {
926 WPRINTF("ioctl viona ring %d kick failed %d",
927 ring, errno);
928 }
929 break;
930 case VIONA_CTLQ: {
931 struct vqueue_info *vq = &sc->vsc_queues[VIONA_CTLQ];
932
933 if (vq_has_descs(vq))
934 pci_viona_process_ctrlq(vq);
935 break;
936 }
937 }
938 }
939
940 static void
pci_viona_baraddr(struct pci_devinst * pi,int baridx,int enabled,uint64_t address)941 pci_viona_baraddr(struct pci_devinst *pi, int baridx, int enabled,
942 uint64_t address)
943 {
944 struct pci_viona_softc *sc = pi->pi_arg;
945 uint64_t ioport;
946 int error;
947
948 if (baridx != 0)
949 return;
950
951 if (enabled == 0) {
952 error = ioctl(sc->vsc_vnafd, VNA_IOC_SET_NOTIFY_IOP, 0);
953 if (error != 0)
954 WPRINTF("uninstall ioport hook failed %d", errno);
955 return;
956 }
957
958 /*
959 * Install ioport hook for virtqueue notification.
960 * This is part of the virtio common configuration area so the
961 * address does not change with MSI-X status.
962 */
963 ioport = address + VIRTIO_PCI_QUEUE_NOTIFY;
964 error = ioctl(sc->vsc_vnafd, VNA_IOC_SET_NOTIFY_IOP, ioport);
965 if (error != 0) {
966 WPRINTF("install ioport hook at %x failed %d",
967 ioport, errno);
968 }
969 }
970
971 static void
pci_viona_write(struct pci_devinst * pi,int baridx,uint64_t offset,int size,uint64_t value)972 pci_viona_write(struct pci_devinst *pi, int baridx, uint64_t offset, int size,
973 uint64_t value)
974 {
975 struct pci_viona_softc *sc = pi->pi_arg;
976 void *ptr;
977 int err = 0;
978
979 if (baridx == pci_msix_table_bar(pi) ||
980 baridx == pci_msix_pba_bar(pi)) {
981 if (pci_emul_msix_twrite(pi, offset, size, value) == 0) {
982 pci_viona_msix_update(pi, offset);
983 }
984 return;
985 }
986
987 assert(baridx == 0);
988
989 if (offset + size > pci_viona_iosize(pi)) {
990 DPRINTF("viona_write: 2big, offset %ld size %d",
991 offset, size);
992 return;
993 }
994
995 pthread_mutex_lock(&sc->vsc_mtx);
996
997 offset = viona_adjust_offset(pi, offset);
998
999 switch (offset) {
1000 case VIRTIO_PCI_GUEST_FEATURES:
1001 assert(size == 4);
1002 value &= ~(sc->vsc_feature_mask);
1003 err = ioctl(sc->vsc_vnafd, VNA_IOC_SET_FEATURES, &value);
1004 if (err != 0) {
1005 WPRINTF("ioctl feature negotiation returned err = %d",
1006 errno);
1007 } else {
1008 sc->vsc_vs.vs_negotiated_caps = value;
1009 }
1010 break;
1011 case VIRTIO_PCI_QUEUE_PFN:
1012 assert(size == 4);
1013 pci_viona_ring_init(sc, value);
1014 break;
1015 case VIRTIO_PCI_QUEUE_SEL:
1016 assert(size == 2);
1017 assert(value < VIONA_MAXQ);
1018 sc->vsc_vs.vs_curq = value;
1019 break;
1020 case VIRTIO_PCI_QUEUE_NOTIFY:
1021 assert(size == 2);
1022 assert(value < VIONA_MAXQ);
1023 pci_viona_qnotify(sc, value);
1024 break;
1025 case VIRTIO_PCI_STATUS:
1026 assert(size == 1);
1027 pci_viona_update_status(sc, value);
1028 break;
1029 case VIRTIO_MSI_CONFIG_VECTOR:
1030 assert(size == 2);
1031 sc->vsc_vs.vs_msix_cfg_idx = value;
1032 break;
1033 case VIRTIO_MSI_QUEUE_VECTOR:
1034 assert(size == 2);
1035 assert(sc->vsc_vs.vs_curq < VIONA_MAXQ);
1036 sc->vsc_queues[sc->vsc_vs.vs_curq].vq_msix_idx = value;
1037 pci_viona_ring_set_msix(pi, sc->vsc_vs.vs_curq);
1038 break;
1039 case VIONA_R_CFG0:
1040 case VIONA_R_CFG1:
1041 case VIONA_R_CFG2:
1042 case VIONA_R_CFG3:
1043 case VIONA_R_CFG4:
1044 case VIONA_R_CFG5:
1045 assert((size + offset) <= (VIONA_R_CFG5 + 1));
1046 ptr = &sc->vsc_macaddr[offset - VIONA_R_CFG0];
1047 /*
1048 * The driver is allowed to change the MAC address
1049 */
1050 sc->vsc_macaddr[offset - VIONA_R_CFG0] = value;
1051 if (size == 1) {
1052 *(uint8_t *)ptr = value;
1053 } else if (size == 2) {
1054 *(uint16_t *)ptr = value;
1055 } else {
1056 *(uint32_t *)ptr = value;
1057 }
1058 break;
1059 case VIRTIO_PCI_HOST_FEATURES:
1060 case VIRTIO_PCI_QUEUE_NUM:
1061 case VIRTIO_PCI_ISR:
1062 case VIONA_R_CFG6:
1063 case VIONA_R_CFG7:
1064 DPRINTF("viona: write to readonly reg %ld", offset);
1065 break;
1066 default:
1067 DPRINTF("viona: unknown i/o write offset %ld", offset);
1068 value = 0;
1069 break;
1070 }
1071
1072 pthread_mutex_unlock(&sc->vsc_mtx);
1073 }
1074
1075 static uint64_t
pci_viona_read(struct pci_devinst * pi,int baridx,uint64_t offset,int size)1076 pci_viona_read(struct pci_devinst *pi, int baridx, uint64_t offset, int size)
1077 {
1078 struct pci_viona_softc *sc = pi->pi_arg;
1079 void *ptr;
1080 uint64_t value;
1081 int err = 0;
1082
1083 if (baridx == pci_msix_table_bar(pi) ||
1084 baridx == pci_msix_pba_bar(pi)) {
1085 return (pci_emul_msix_tread(pi, offset, size));
1086 }
1087
1088 assert(baridx == 0);
1089
1090 if (offset + size > pci_viona_iosize(pi)) {
1091 DPRINTF("viona_read: 2big, offset %ld size %d",
1092 offset, size);
1093 return (0);
1094 }
1095
1096 pthread_mutex_lock(&sc->vsc_mtx);
1097
1098 offset = viona_adjust_offset(pi, offset);
1099
1100 switch (offset) {
1101 case VIRTIO_PCI_HOST_FEATURES:
1102 assert(size == 4);
1103 err = ioctl(sc->vsc_vnafd, VNA_IOC_GET_FEATURES, &value);
1104 if (err != 0) {
1105 WPRINTF("ioctl get host features returned err = %d",
1106 errno);
1107 }
1108 value |= VIONA_S_HOSTCAPS_USERSPACE;
1109 value &= ~sc->vsc_feature_mask;
1110 sc->vsc_consts.vc_hv_caps = value;
1111 break;
1112 case VIRTIO_PCI_GUEST_FEATURES:
1113 assert(size == 4);
1114 value = sc->vsc_vs.vs_negotiated_caps; /* XXX never read ? */
1115 break;
1116 case VIRTIO_PCI_QUEUE_PFN:
1117 assert(size == 4);
1118 value = sc->vsc_queues[sc->vsc_vs.vs_curq].vq_pfn >> VRING_PFN;
1119 break;
1120 case VIRTIO_PCI_QUEUE_NUM:
1121 assert(size == 2);
1122 value = pci_viona_qsize(sc, sc->vsc_vs.vs_curq);
1123 break;
1124 case VIRTIO_PCI_QUEUE_SEL:
1125 assert(size == 2);
1126 value = sc->vsc_vs.vs_curq; /* XXX never read ? */
1127 break;
1128 case VIRTIO_PCI_QUEUE_NOTIFY:
1129 assert(size == 2);
1130 value = sc->vsc_vs.vs_curq; /* XXX never read ? */
1131 break;
1132 case VIRTIO_PCI_STATUS:
1133 assert(size == 1);
1134 value = sc->vsc_vs.vs_status;
1135 break;
1136 case VIRTIO_PCI_ISR:
1137 assert(size == 1);
1138 value = sc->vsc_vs.vs_isr;
1139 sc->vsc_vs.vs_isr = 0; /* a read clears this flag */
1140 if (value != 0) {
1141 pci_lintr_deassert(pi);
1142 }
1143 break;
1144 case VIRTIO_MSI_CONFIG_VECTOR:
1145 assert(size == 2);
1146 value = sc->vsc_vs.vs_msix_cfg_idx;
1147 break;
1148 case VIRTIO_MSI_QUEUE_VECTOR:
1149 assert(size == 2);
1150 assert(sc->vsc_vs.vs_curq < VIONA_MAXQ);
1151 value = sc->vsc_queues[sc->vsc_vs.vs_curq].vq_msix_idx;
1152 break;
1153 case VIONA_R_CFG0:
1154 case VIONA_R_CFG1:
1155 case VIONA_R_CFG2:
1156 case VIONA_R_CFG3:
1157 case VIONA_R_CFG4:
1158 case VIONA_R_CFG5:
1159 assert((size + offset) <= (VIONA_R_CFG5 + 1));
1160 ptr = &sc->vsc_macaddr[offset - VIONA_R_CFG0];
1161 if (size == 1) {
1162 value = *(uint8_t *)ptr;
1163 } else if (size == 2) {
1164 value = *(uint16_t *)ptr;
1165 } else {
1166 value = *(uint32_t *)ptr;
1167 }
1168 break;
1169 case VIONA_R_CFG6:
1170 assert(size != 4);
1171 value = 0x01; /* XXX link always up */
1172 break;
1173 case VIONA_R_CFG7:
1174 assert(size == 1);
1175 value = 0; /* XXX link status in LSB */
1176 break;
1177 default:
1178 DPRINTF("viona: unknown i/o read offset %ld", offset);
1179 value = 0;
1180 break;
1181 }
1182
1183 pthread_mutex_unlock(&sc->vsc_mtx);
1184
1185 return (value);
1186 }
1187
1188 struct pci_devemu pci_de_viona = {
1189 .pe_emu = "virtio-net-viona",
1190 .pe_init = pci_viona_init,
1191 .pe_legacy_config = pci_viona_legacy_config,
1192 .pe_barwrite = pci_viona_write,
1193 .pe_barread = pci_viona_read,
1194 .pe_baraddr = pci_viona_baraddr,
1195 .pe_lintrupdate = pci_viona_lintrupdate
1196 };
1197 PCI_EMUL_SET(pci_de_viona);
1198