1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2019 Vincenzo Maffione <vmaffione@FreeBSD.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
19 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
20 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
24 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 /*
29 * This file implements multiple network backends (tap, netmap, ...),
30 * to be used by network frontends such as virtio-net and e1000.
31 * The API to access the backend (e.g. send/receive packets, negotiate
32 * features) is exported by net_backends.h.
33 */
34
35 #include <sys/cdefs.h>
36
37 #include <sys/types.h> /* u_short etc */
38 #ifndef WITHOUT_CAPSICUM
39 #include <sys/capsicum.h>
40 #endif
41 #include <sys/ioctl.h>
42 #include <sys/mman.h>
43 #include <sys/uio.h>
44
45 #include <net/if.h>
46 #ifdef __FreeBSD__
47 #if defined(INET6) || defined(INET)
48 #include <net/if_tap.h>
49 #endif
50 #include <net/netmap.h>
51 #include <net/netmap_virt.h>
52 #define NETMAP_WITH_LIBS
53 #include <net/netmap_user.h>
54 #endif /* __FreeBSD__ */
55
56 #ifndef WITHOUT_CAPSICUM
57 #include <capsicum_helpers.h>
58 #endif
59 #include <err.h>
60 #include <errno.h>
61 #include <fcntl.h>
62 #include <stdio.h>
63 #include <stdlib.h>
64 #include <stdint.h>
65 #include <string.h>
66 #include <unistd.h>
67 #include <sysexits.h>
68 #include <assert.h>
69 #include <pthread.h>
70 #include <pthread_np.h>
71 #include <poll.h>
72 #include <assert.h>
73
74 #ifdef NETGRAPH
75 #include <sys/param.h>
76 #include <sys/sysctl.h>
77 #include <netgraph.h>
78 #endif
79
80 #ifndef __FreeBSD__
81 #include <libdlpi.h>
82 #include <net/ethernet.h>
83 #endif
84
85 #include "config.h"
86 #include "debug.h"
87 #include "iov.h"
88 #include "mevent.h"
89 #include "net_backends.h"
90 #include "pci_emul.h"
91
92 #include <sys/linker_set.h>
93
94 /*
95 * Each network backend registers a set of function pointers that are
96 * used to implement the net backends API.
97 * This might need to be exposed if we implement backends in separate files.
98 */
99 struct net_backend {
100 const char *prefix; /* prefix matching this backend */
101
102 /*
103 * Routines used to initialize and cleanup the resources needed
104 * by a backend. The cleanup function is used internally,
105 * and should not be called by the frontend.
106 */
107 int (*init)(struct net_backend *be, const char *devname,
108 nvlist_t *nvl, net_be_rxeof_t cb, void *param);
109 void (*cleanup)(struct net_backend *be);
110
111 /*
112 * Called to serve a guest transmit request. The scatter-gather
113 * vector provided by the caller has 'iovcnt' elements and contains
114 * the packet to send.
115 */
116 ssize_t (*send)(struct net_backend *be, const struct iovec *iov,
117 int iovcnt);
118
119 /*
120 * Get the length of the next packet that can be received from
121 * the backend. If no packets are currently available, this
122 * function returns 0.
123 */
124 ssize_t (*peek_recvlen)(struct net_backend *be);
125
126 /*
127 * Called to receive a packet from the backend. When the function
128 * returns a positive value 'len', the scatter-gather vector
129 * provided by the caller contains a packet with such length.
130 * The function returns 0 if the backend doesn't have a new packet to
131 * receive.
132 */
133 ssize_t (*recv)(struct net_backend *be, const struct iovec *iov,
134 int iovcnt);
135
136 /*
137 * Ask the backend to enable or disable receive operation in the
138 * backend. On return from a disable operation, it is guaranteed
139 * that the receive callback won't be called until receive is
140 * enabled again. Note however that it is up to the caller to make
141 * sure that netbe_recv() is not currently being executed by another
142 * thread.
143 */
144 void (*recv_enable)(struct net_backend *be);
145 void (*recv_disable)(struct net_backend *be);
146
147 /*
148 * Ask the backend for the virtio-net features it is able to
149 * support. Possible features are TSO, UFO and checksum offloading
150 * in both rx and tx direction and for both IPv4 and IPv6.
151 */
152 uint64_t (*get_cap)(struct net_backend *be);
153
154 /*
155 * Tell the backend to enable/disable the specified virtio-net
156 * features (capabilities).
157 */
158 int (*set_cap)(struct net_backend *be, uint64_t features,
159 unsigned int vnet_hdr_len);
160
161 #ifndef __FreeBSD__
162 int (*get_mac)(struct net_backend *be, void *, size_t *);
163 #endif
164
165 struct pci_vtnet_softc *sc;
166 int fd;
167
168 /*
169 * Length of the virtio-net header used by the backend and the
170 * frontend, respectively. A zero value means that the header
171 * is not used.
172 */
173 unsigned int be_vnet_hdr_len;
174 unsigned int fe_vnet_hdr_len;
175
176 /* Size of backend-specific private data. */
177 size_t priv_size;
178
179 /* Backend-specific private data follows. */
180 };
181
182 #define NET_BE_PRIV(be) ((void *)((be) + 1))
183 #define NET_BE_SIZE(be) (sizeof(*be) + (be)->priv_size)
184
185 SET_DECLARE(net_backend_set, struct net_backend);
186
187 #define VNET_HDR_LEN sizeof(struct virtio_net_rxhdr)
188
189 #define WPRINTF(params) PRINTLN params
190
191 #ifdef __FreeBSD__
192
193 /*
194 * The tap backend
195 */
196
197 #if defined(INET6) || defined(INET)
198 static const int pf_list[] = {
199 #if defined(INET6)
200 PF_INET6,
201 #endif
202 #if defined(INET)
203 PF_INET,
204 #endif
205 };
206 #endif
207
208 struct tap_priv {
209 struct mevent *mevp;
210 /*
211 * A bounce buffer that allows us to implement the peek_recvlen
212 * callback. In the future we may get the same information from
213 * the kevent data.
214 */
215 char bbuf[1 << 16];
216 ssize_t bbuflen;
217 };
218
219 static void
tap_cleanup(struct net_backend * be)220 tap_cleanup(struct net_backend *be)
221 {
222 struct tap_priv *priv = NET_BE_PRIV(be);
223
224 if (priv->mevp) {
225 mevent_delete(priv->mevp);
226 }
227 if (be->fd != -1) {
228 close(be->fd);
229 be->fd = -1;
230 }
231 }
232
233 static int
tap_init(struct net_backend * be,const char * devname,nvlist_t * nvl __unused,net_be_rxeof_t cb,void * param)234 tap_init(struct net_backend *be, const char *devname,
235 nvlist_t *nvl __unused, net_be_rxeof_t cb, void *param)
236 {
237 struct tap_priv *priv = NET_BE_PRIV(be);
238 char tbuf[80];
239 int opt = 1;
240 #if defined(INET6) || defined(INET)
241 struct ifreq ifrq;
242 int s;
243 #endif
244 #ifndef WITHOUT_CAPSICUM
245 cap_rights_t rights;
246 #endif
247
248 if (cb == NULL) {
249 WPRINTF(("TAP backend requires non-NULL callback"));
250 return (-1);
251 }
252
253 strcpy(tbuf, "/dev/");
254 strlcat(tbuf, devname, sizeof(tbuf));
255
256 be->fd = open(tbuf, O_RDWR);
257 if (be->fd == -1) {
258 WPRINTF(("open of tap device %s failed", tbuf));
259 goto error;
260 }
261
262 /*
263 * Set non-blocking and register for read
264 * notifications with the event loop
265 */
266 if (ioctl(be->fd, FIONBIO, &opt) < 0) {
267 WPRINTF(("tap device O_NONBLOCK failed"));
268 goto error;
269 }
270
271 #if defined(INET6) || defined(INET)
272 /*
273 * Try to UP the interface rather than relying on
274 * net.link.tap.up_on_open.
275 */
276 bzero(&ifrq, sizeof(ifrq));
277 if (ioctl(be->fd, TAPGIFNAME, &ifrq) < 0) {
278 WPRINTF(("Could not get interface name"));
279 goto error;
280 }
281
282 s = -1;
283 for (size_t i = 0; s == -1 && i < nitems(pf_list); i++)
284 s = socket(pf_list[i], SOCK_DGRAM, 0);
285 if (s == -1) {
286 WPRINTF(("Could open socket"));
287 goto error;
288 }
289
290 if (ioctl(s, SIOCGIFFLAGS, &ifrq) < 0) {
291 (void)close(s);
292 WPRINTF(("Could not get interface flags"));
293 goto error;
294 }
295 ifrq.ifr_flags |= IFF_UP;
296 if (ioctl(s, SIOCSIFFLAGS, &ifrq) < 0) {
297 (void)close(s);
298 WPRINTF(("Could not set interface flags"));
299 goto error;
300 }
301 (void)close(s);
302 #endif
303
304 #ifndef WITHOUT_CAPSICUM
305 cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
306 if (caph_rights_limit(be->fd, &rights) == -1)
307 errx(EX_OSERR, "Unable to apply rights for sandbox");
308 #endif
309
310 memset(priv->bbuf, 0, sizeof(priv->bbuf));
311 priv->bbuflen = 0;
312
313 priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
314 if (priv->mevp == NULL) {
315 WPRINTF(("Could not register event"));
316 goto error;
317 }
318
319 return (0);
320
321 error:
322 tap_cleanup(be);
323 return (-1);
324 }
325
326 /*
327 * Called to send a buffer chain out to the tap device
328 */
329 static ssize_t
tap_send(struct net_backend * be,const struct iovec * iov,int iovcnt)330 tap_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
331 {
332 return (writev(be->fd, iov, iovcnt));
333 }
334
335 static ssize_t
tap_peek_recvlen(struct net_backend * be)336 tap_peek_recvlen(struct net_backend *be)
337 {
338 struct tap_priv *priv = NET_BE_PRIV(be);
339 ssize_t ret;
340
341 if (priv->bbuflen > 0) {
342 /*
343 * We already have a packet in the bounce buffer.
344 * Just return its length.
345 */
346 return priv->bbuflen;
347 }
348
349 /*
350 * Read the next packet (if any) into the bounce buffer, so
351 * that we get to know its length and we can return that
352 * to the caller.
353 */
354 ret = read(be->fd, priv->bbuf, sizeof(priv->bbuf));
355 if (ret < 0 && errno == EWOULDBLOCK) {
356 return (0);
357 }
358
359 if (ret > 0)
360 priv->bbuflen = ret;
361
362 return (ret);
363 }
364
365 static ssize_t
tap_recv(struct net_backend * be,const struct iovec * iov,int iovcnt)366 tap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
367 {
368 struct tap_priv *priv = NET_BE_PRIV(be);
369 ssize_t ret;
370
371 if (priv->bbuflen > 0) {
372 /*
373 * A packet is available in the bounce buffer, so
374 * we read it from there.
375 */
376 ret = buf_to_iov(priv->bbuf, priv->bbuflen,
377 iov, iovcnt, 0);
378
379 /* Mark the bounce buffer as empty. */
380 priv->bbuflen = 0;
381
382 return (ret);
383 }
384
385 ret = readv(be->fd, iov, iovcnt);
386 if (ret < 0 && errno == EWOULDBLOCK) {
387 return (0);
388 }
389
390 return (ret);
391 }
392
393 static void
tap_recv_enable(struct net_backend * be)394 tap_recv_enable(struct net_backend *be)
395 {
396 struct tap_priv *priv = NET_BE_PRIV(be);
397
398 mevent_enable(priv->mevp);
399 }
400
401 static void
tap_recv_disable(struct net_backend * be)402 tap_recv_disable(struct net_backend *be)
403 {
404 struct tap_priv *priv = NET_BE_PRIV(be);
405
406 mevent_disable(priv->mevp);
407 }
408
409 static uint64_t
tap_get_cap(struct net_backend * be __unused)410 tap_get_cap(struct net_backend *be __unused)
411 {
412
413 return (0); /* no capabilities for now */
414 }
415
416 static int
tap_set_cap(struct net_backend * be __unused,uint64_t features,unsigned vnet_hdr_len)417 tap_set_cap(struct net_backend *be __unused, uint64_t features,
418 unsigned vnet_hdr_len)
419 {
420
421 return ((features || vnet_hdr_len) ? -1 : 0);
422 }
423
424 static struct net_backend tap_backend = {
425 .prefix = "tap",
426 .priv_size = sizeof(struct tap_priv),
427 .init = tap_init,
428 .cleanup = tap_cleanup,
429 .send = tap_send,
430 .peek_recvlen = tap_peek_recvlen,
431 .recv = tap_recv,
432 .recv_enable = tap_recv_enable,
433 .recv_disable = tap_recv_disable,
434 .get_cap = tap_get_cap,
435 .set_cap = tap_set_cap,
436 };
437
438 /* A clone of the tap backend, with a different prefix. */
439 static struct net_backend vmnet_backend = {
440 .prefix = "vmnet",
441 .priv_size = sizeof(struct tap_priv),
442 .init = tap_init,
443 .cleanup = tap_cleanup,
444 .send = tap_send,
445 .peek_recvlen = tap_peek_recvlen,
446 .recv = tap_recv,
447 .recv_enable = tap_recv_enable,
448 .recv_disable = tap_recv_disable,
449 .get_cap = tap_get_cap,
450 .set_cap = tap_set_cap,
451 };
452
453 DATA_SET(net_backend_set, tap_backend);
454 DATA_SET(net_backend_set, vmnet_backend);
455
456 #ifdef NETGRAPH
457
458 /*
459 * Netgraph backend
460 */
461
462 #define NG_SBUF_MAX_SIZE (4 * 1024 * 1024)
463
464 static int
ng_init(struct net_backend * be,const char * devname __unused,nvlist_t * nvl,net_be_rxeof_t cb,void * param)465 ng_init(struct net_backend *be, const char *devname __unused,
466 nvlist_t *nvl, net_be_rxeof_t cb, void *param)
467 {
468 struct tap_priv *p = NET_BE_PRIV(be);
469 struct ngm_connect ngc;
470 const char *value, *nodename;
471 int sbsz;
472 int ctrl_sock;
473 int flags;
474 unsigned long maxsbsz;
475 size_t msbsz;
476 #ifndef WITHOUT_CAPSICUM
477 cap_rights_t rights;
478 #endif
479
480 if (cb == NULL) {
481 WPRINTF(("Netgraph backend requires non-NULL callback"));
482 return (-1);
483 }
484
485 be->fd = -1;
486
487 memset(&ngc, 0, sizeof(ngc));
488
489 value = get_config_value_node(nvl, "path");
490 if (value == NULL) {
491 WPRINTF(("path must be provided"));
492 return (-1);
493 }
494 strncpy(ngc.path, value, NG_PATHSIZ - 1);
495
496 value = get_config_value_node(nvl, "hook");
497 if (value == NULL)
498 value = "vmlink";
499 strncpy(ngc.ourhook, value, NG_HOOKSIZ - 1);
500
501 value = get_config_value_node(nvl, "peerhook");
502 if (value == NULL) {
503 WPRINTF(("peer hook must be provided"));
504 return (-1);
505 }
506 strncpy(ngc.peerhook, value, NG_HOOKSIZ - 1);
507
508 nodename = get_config_value_node(nvl, "socket");
509 if (NgMkSockNode(nodename,
510 &ctrl_sock, &be->fd) < 0) {
511 WPRINTF(("can't get Netgraph sockets"));
512 return (-1);
513 }
514
515 if (NgSendMsg(ctrl_sock, ".",
516 NGM_GENERIC_COOKIE,
517 NGM_CONNECT, &ngc, sizeof(ngc)) < 0) {
518 WPRINTF(("can't connect to node"));
519 close(ctrl_sock);
520 goto error;
521 }
522
523 close(ctrl_sock);
524
525 flags = fcntl(be->fd, F_GETFL);
526
527 if (flags < 0) {
528 WPRINTF(("can't get socket flags"));
529 goto error;
530 }
531
532 if (fcntl(be->fd, F_SETFL, flags | O_NONBLOCK) < 0) {
533 WPRINTF(("can't set O_NONBLOCK flag"));
534 goto error;
535 }
536
537 /*
538 * The default ng_socket(4) buffer's size is too low.
539 * Calculate the minimum value between NG_SBUF_MAX_SIZE
540 * and kern.ipc.maxsockbuf.
541 */
542 msbsz = sizeof(maxsbsz);
543 if (sysctlbyname("kern.ipc.maxsockbuf", &maxsbsz, &msbsz,
544 NULL, 0) < 0) {
545 WPRINTF(("can't get 'kern.ipc.maxsockbuf' value"));
546 goto error;
547 }
548
549 /*
550 * We can't set the socket buffer size to kern.ipc.maxsockbuf value,
551 * as it takes into account the mbuf(9) overhead.
552 */
553 maxsbsz = maxsbsz * MCLBYTES / (MSIZE + MCLBYTES);
554
555 sbsz = MIN(NG_SBUF_MAX_SIZE, maxsbsz);
556
557 if (setsockopt(be->fd, SOL_SOCKET, SO_SNDBUF, &sbsz,
558 sizeof(sbsz)) < 0) {
559 WPRINTF(("can't set TX buffer size"));
560 goto error;
561 }
562
563 if (setsockopt(be->fd, SOL_SOCKET, SO_RCVBUF, &sbsz,
564 sizeof(sbsz)) < 0) {
565 WPRINTF(("can't set RX buffer size"));
566 goto error;
567 }
568
569 #ifndef WITHOUT_CAPSICUM
570 cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
571 if (caph_rights_limit(be->fd, &rights) == -1)
572 errx(EX_OSERR, "Unable to apply rights for sandbox");
573 #endif
574
575 memset(p->bbuf, 0, sizeof(p->bbuf));
576 p->bbuflen = 0;
577
578 p->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
579 if (p->mevp == NULL) {
580 WPRINTF(("Could not register event"));
581 goto error;
582 }
583
584 return (0);
585
586 error:
587 tap_cleanup(be);
588 return (-1);
589 }
590
591 static struct net_backend ng_backend = {
592 .prefix = "netgraph",
593 .priv_size = sizeof(struct tap_priv),
594 .init = ng_init,
595 .cleanup = tap_cleanup,
596 .send = tap_send,
597 .peek_recvlen = tap_peek_recvlen,
598 .recv = tap_recv,
599 .recv_enable = tap_recv_enable,
600 .recv_disable = tap_recv_disable,
601 .get_cap = tap_get_cap,
602 .set_cap = tap_set_cap,
603 };
604
605 DATA_SET(net_backend_set, ng_backend);
606
607 #endif /* NETGRAPH */
608
609 /*
610 * The netmap backend
611 */
612
613 /* The virtio-net features supported by netmap. */
614 #define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \
615 VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \
616 VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \
617 VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO)
618
619 struct netmap_priv {
620 char ifname[IFNAMSIZ];
621 struct nm_desc *nmd;
622 uint16_t memid;
623 struct netmap_ring *rx;
624 struct netmap_ring *tx;
625 struct mevent *mevp;
626 net_be_rxeof_t cb;
627 void *cb_param;
628 };
629
630 static void
nmreq_init(struct nmreq * req,char * ifname)631 nmreq_init(struct nmreq *req, char *ifname)
632 {
633
634 memset(req, 0, sizeof(*req));
635 strlcpy(req->nr_name, ifname, sizeof(req->nr_name));
636 req->nr_version = NETMAP_API;
637 }
638
639 static int
netmap_set_vnet_hdr_len(struct net_backend * be,int vnet_hdr_len)640 netmap_set_vnet_hdr_len(struct net_backend *be, int vnet_hdr_len)
641 {
642 int err;
643 struct nmreq req;
644 struct netmap_priv *priv = NET_BE_PRIV(be);
645
646 nmreq_init(&req, priv->ifname);
647 req.nr_cmd = NETMAP_BDG_VNET_HDR;
648 req.nr_arg1 = vnet_hdr_len;
649 err = ioctl(be->fd, NIOCREGIF, &req);
650 if (err) {
651 WPRINTF(("Unable to set vnet header length %d",
652 vnet_hdr_len));
653 return (err);
654 }
655
656 be->be_vnet_hdr_len = vnet_hdr_len;
657
658 return (0);
659 }
660
661 static int
netmap_has_vnet_hdr_len(struct net_backend * be,unsigned vnet_hdr_len)662 netmap_has_vnet_hdr_len(struct net_backend *be, unsigned vnet_hdr_len)
663 {
664 unsigned prev_hdr_len = be->be_vnet_hdr_len;
665 int ret;
666
667 if (vnet_hdr_len == prev_hdr_len) {
668 return (1);
669 }
670
671 ret = netmap_set_vnet_hdr_len(be, vnet_hdr_len);
672 if (ret) {
673 return (0);
674 }
675
676 netmap_set_vnet_hdr_len(be, prev_hdr_len);
677
678 return (1);
679 }
680
681 static uint64_t
netmap_get_cap(struct net_backend * be)682 netmap_get_cap(struct net_backend *be)
683 {
684
685 return (netmap_has_vnet_hdr_len(be, VNET_HDR_LEN) ?
686 NETMAP_FEATURES : 0);
687 }
688
689 static int
netmap_set_cap(struct net_backend * be,uint64_t features __unused,unsigned vnet_hdr_len)690 netmap_set_cap(struct net_backend *be, uint64_t features __unused,
691 unsigned vnet_hdr_len)
692 {
693
694 return (netmap_set_vnet_hdr_len(be, vnet_hdr_len));
695 }
696
697 static int
netmap_init(struct net_backend * be,const char * devname,nvlist_t * nvl __unused,net_be_rxeof_t cb,void * param)698 netmap_init(struct net_backend *be, const char *devname,
699 nvlist_t *nvl __unused, net_be_rxeof_t cb, void *param)
700 {
701 struct netmap_priv *priv = NET_BE_PRIV(be);
702
703 strlcpy(priv->ifname, devname, sizeof(priv->ifname));
704 priv->ifname[sizeof(priv->ifname) - 1] = '\0';
705
706 priv->nmd = nm_open(priv->ifname, NULL, NETMAP_NO_TX_POLL, NULL);
707 if (priv->nmd == NULL) {
708 WPRINTF(("Unable to nm_open(): interface '%s', errno (%s)",
709 devname, strerror(errno)));
710 return (-1);
711 }
712
713 priv->memid = priv->nmd->req.nr_arg2;
714 priv->tx = NETMAP_TXRING(priv->nmd->nifp, 0);
715 priv->rx = NETMAP_RXRING(priv->nmd->nifp, 0);
716 priv->cb = cb;
717 priv->cb_param = param;
718 be->fd = priv->nmd->fd;
719
720 priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
721 if (priv->mevp == NULL) {
722 WPRINTF(("Could not register event"));
723 return (-1);
724 }
725
726 return (0);
727 }
728
729 static void
netmap_cleanup(struct net_backend * be)730 netmap_cleanup(struct net_backend *be)
731 {
732 struct netmap_priv *priv = NET_BE_PRIV(be);
733
734 if (priv->mevp) {
735 mevent_delete(priv->mevp);
736 }
737 if (priv->nmd) {
738 nm_close(priv->nmd);
739 }
740 be->fd = -1;
741 }
742
743 static ssize_t
netmap_send(struct net_backend * be,const struct iovec * iov,int iovcnt)744 netmap_send(struct net_backend *be, const struct iovec *iov,
745 int iovcnt)
746 {
747 struct netmap_priv *priv = NET_BE_PRIV(be);
748 struct netmap_ring *ring;
749 ssize_t totlen = 0;
750 int nm_buf_size;
751 int nm_buf_len;
752 uint32_t head;
753 uint8_t *nm_buf;
754 int j;
755
756 ring = priv->tx;
757 head = ring->head;
758 if (head == ring->tail) {
759 WPRINTF(("No space, drop %zu bytes", count_iov(iov, iovcnt)));
760 goto txsync;
761 }
762 nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
763 nm_buf_size = ring->nr_buf_size;
764 nm_buf_len = 0;
765
766 for (j = 0; j < iovcnt; j++) {
767 uint8_t *iov_frag_buf = iov[j].iov_base;
768 int iov_frag_size = iov[j].iov_len;
769
770 totlen += iov_frag_size;
771
772 /*
773 * Split each iovec fragment over more netmap slots, if
774 * necessary.
775 */
776 for (;;) {
777 int copylen;
778
779 copylen = iov_frag_size < nm_buf_size ? iov_frag_size : nm_buf_size;
780 memcpy(nm_buf, iov_frag_buf, copylen);
781
782 iov_frag_buf += copylen;
783 iov_frag_size -= copylen;
784 nm_buf += copylen;
785 nm_buf_size -= copylen;
786 nm_buf_len += copylen;
787
788 if (iov_frag_size == 0) {
789 break;
790 }
791
792 ring->slot[head].len = nm_buf_len;
793 ring->slot[head].flags = NS_MOREFRAG;
794 head = nm_ring_next(ring, head);
795 if (head == ring->tail) {
796 /*
797 * We ran out of netmap slots while
798 * splitting the iovec fragments.
799 */
800 WPRINTF(("No space, drop %zu bytes",
801 count_iov(iov, iovcnt)));
802 goto txsync;
803 }
804 nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
805 nm_buf_size = ring->nr_buf_size;
806 nm_buf_len = 0;
807 }
808 }
809
810 /* Complete the last slot, which must not have NS_MOREFRAG set. */
811 ring->slot[head].len = nm_buf_len;
812 ring->slot[head].flags = 0;
813 head = nm_ring_next(ring, head);
814
815 /* Now update ring->head and ring->cur. */
816 ring->head = ring->cur = head;
817 txsync:
818 ioctl(be->fd, NIOCTXSYNC, NULL);
819
820 return (totlen);
821 }
822
823 static ssize_t
netmap_peek_recvlen(struct net_backend * be)824 netmap_peek_recvlen(struct net_backend *be)
825 {
826 struct netmap_priv *priv = NET_BE_PRIV(be);
827 struct netmap_ring *ring = priv->rx;
828 uint32_t head = ring->head;
829 ssize_t totlen = 0;
830
831 while (head != ring->tail) {
832 struct netmap_slot *slot = ring->slot + head;
833
834 totlen += slot->len;
835 if ((slot->flags & NS_MOREFRAG) == 0)
836 break;
837 head = nm_ring_next(ring, head);
838 }
839
840 return (totlen);
841 }
842
843 static ssize_t
netmap_recv(struct net_backend * be,const struct iovec * iov,int iovcnt)844 netmap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
845 {
846 struct netmap_priv *priv = NET_BE_PRIV(be);
847 struct netmap_slot *slot = NULL;
848 struct netmap_ring *ring;
849 uint8_t *iov_frag_buf;
850 int iov_frag_size;
851 ssize_t totlen = 0;
852 uint32_t head;
853
854 assert(iovcnt);
855
856 ring = priv->rx;
857 head = ring->head;
858 iov_frag_buf = iov->iov_base;
859 iov_frag_size = iov->iov_len;
860
861 do {
862 uint8_t *nm_buf;
863 int nm_buf_len;
864
865 if (head == ring->tail) {
866 return (0);
867 }
868
869 slot = ring->slot + head;
870 nm_buf = NETMAP_BUF(ring, slot->buf_idx);
871 nm_buf_len = slot->len;
872
873 for (;;) {
874 int copylen = nm_buf_len < iov_frag_size ?
875 nm_buf_len : iov_frag_size;
876
877 memcpy(iov_frag_buf, nm_buf, copylen);
878 nm_buf += copylen;
879 nm_buf_len -= copylen;
880 iov_frag_buf += copylen;
881 iov_frag_size -= copylen;
882 totlen += copylen;
883
884 if (nm_buf_len == 0) {
885 break;
886 }
887
888 iov++;
889 iovcnt--;
890 if (iovcnt == 0) {
891 /* No space to receive. */
892 WPRINTF(("Short iov, drop %zd bytes",
893 totlen));
894 return (-ENOSPC);
895 }
896 iov_frag_buf = iov->iov_base;
897 iov_frag_size = iov->iov_len;
898 }
899
900 head = nm_ring_next(ring, head);
901
902 } while (slot->flags & NS_MOREFRAG);
903
904 /* Release slots to netmap. */
905 ring->head = ring->cur = head;
906
907 return (totlen);
908 }
909
910 static void
netmap_recv_enable(struct net_backend * be)911 netmap_recv_enable(struct net_backend *be)
912 {
913 struct netmap_priv *priv = NET_BE_PRIV(be);
914
915 mevent_enable(priv->mevp);
916 }
917
918 static void
netmap_recv_disable(struct net_backend * be)919 netmap_recv_disable(struct net_backend *be)
920 {
921 struct netmap_priv *priv = NET_BE_PRIV(be);
922
923 mevent_disable(priv->mevp);
924 }
925
926 static struct net_backend netmap_backend = {
927 .prefix = "netmap",
928 .priv_size = sizeof(struct netmap_priv),
929 .init = netmap_init,
930 .cleanup = netmap_cleanup,
931 .send = netmap_send,
932 .peek_recvlen = netmap_peek_recvlen,
933 .recv = netmap_recv,
934 .recv_enable = netmap_recv_enable,
935 .recv_disable = netmap_recv_disable,
936 .get_cap = netmap_get_cap,
937 .set_cap = netmap_set_cap,
938 };
939
940 /* A clone of the netmap backend, with a different prefix. */
941 static struct net_backend vale_backend = {
942 .prefix = "vale",
943 .priv_size = sizeof(struct netmap_priv),
944 .init = netmap_init,
945 .cleanup = netmap_cleanup,
946 .send = netmap_send,
947 .peek_recvlen = netmap_peek_recvlen,
948 .recv = netmap_recv,
949 .recv_enable = netmap_recv_enable,
950 .recv_disable = netmap_recv_disable,
951 .get_cap = netmap_get_cap,
952 .set_cap = netmap_set_cap,
953 };
954
955 DATA_SET(net_backend_set, netmap_backend);
956 DATA_SET(net_backend_set, vale_backend);
957
958 #else /* __FreeBSD__ */
959
960 /*
961 * The illumos dlpi backend
962 */
963
964 /*
965 * The size of the bounce buffer used to implement the peek callback.
966 * This value should be big enough to accommodate the largest of all possible
967 * frontend packet lengths. The value here matches the definition of
968 * VTNET_MAX_PKT_LEN in pci_virtio_net.c
969 */
970 #define DLPI_BBUF_SIZE (65536 + 64)
971
972 typedef struct be_dlpi_priv {
973 dlpi_handle_t bdp_dhp;
974 struct mevent *bdp_mevp;
975 /*
976 * A bounce buffer that allows us to implement the peek_recvlen
977 * callback. Each structure is only used by a single thread so
978 * one is enough.
979 */
980 uint8_t bdp_bbuf[DLPI_BBUF_SIZE];
981 ssize_t bdp_bbuflen;
982 } be_dlpi_priv_t;
983
984 static void
be_dlpi_cleanup(net_backend_t * be)985 be_dlpi_cleanup(net_backend_t *be)
986 {
987 be_dlpi_priv_t *priv = NET_BE_PRIV(be);
988
989 if (priv->bdp_dhp != NULL)
990 dlpi_close(priv->bdp_dhp);
991 priv->bdp_dhp = NULL;
992
993 if (priv->bdp_mevp != NULL)
994 mevent_delete(priv->bdp_mevp);
995 priv->bdp_mevp = NULL;
996
997 priv->bdp_bbuflen = 0;
998 be->fd = -1;
999 }
1000
1001 static void
be_dlpi_err(int ret,const char * dev,char * msg)1002 be_dlpi_err(int ret, const char *dev, char *msg)
1003 {
1004 WPRINTF(("%s: %s (%s)", dev, msg, dlpi_strerror(ret)));
1005 }
1006
1007 static int
be_dlpi_init(net_backend_t * be,const char * devname __unused,nvlist_t * nvl,net_be_rxeof_t cb,void * param)1008 be_dlpi_init(net_backend_t *be, const char *devname __unused,
1009 nvlist_t *nvl, net_be_rxeof_t cb, void *param)
1010 {
1011 be_dlpi_priv_t *priv = NET_BE_PRIV(be);
1012 const char *vnic;
1013 int ret;
1014
1015 if (cb == NULL) {
1016 WPRINTF(("dlpi backend requires non-NULL callback"));
1017 return (-1);
1018 }
1019
1020 vnic = get_config_value_node(nvl, "vnic");
1021 if (vnic == NULL) {
1022 WPRINTF(("dlpi backend requires a VNIC"));
1023 return (-1);
1024 }
1025
1026 priv->bdp_bbuflen = 0;
1027
1028 ret = dlpi_open(vnic, &priv->bdp_dhp, DLPI_RAW);
1029
1030 if (ret != DLPI_SUCCESS) {
1031 be_dlpi_err(ret, vnic, "open failed");
1032 goto error;
1033 }
1034
1035 if ((ret = dlpi_bind(priv->bdp_dhp, DLPI_ANY_SAP, NULL)) !=
1036 DLPI_SUCCESS) {
1037 be_dlpi_err(ret, vnic, "bind failed");
1038 goto error;
1039 }
1040
1041 if (get_config_bool_node_default(nvl, "promiscrxonly", true)) {
1042 if ((ret = dlpi_promiscon(priv->bdp_dhp, DL_PROMISC_RX_ONLY)) !=
1043 DLPI_SUCCESS) {
1044 be_dlpi_err(ret, vnic,
1045 "enable promiscuous mode(rxonly) failed");
1046 goto error;
1047 }
1048 }
1049 if (get_config_bool_node_default(nvl, "promiscphys", false)) {
1050 if ((ret = dlpi_promiscon(priv->bdp_dhp, DL_PROMISC_PHYS)) !=
1051 DLPI_SUCCESS) {
1052 be_dlpi_err(ret, vnic,
1053 "enable promiscuous mode(physical) failed");
1054 goto error;
1055 }
1056 }
1057 if (get_config_bool_node_default(nvl, "promiscsap", true)) {
1058 if ((ret = dlpi_promiscon(priv->bdp_dhp, DL_PROMISC_SAP)) !=
1059 DLPI_SUCCESS) {
1060 be_dlpi_err(ret, vnic,
1061 "enable promiscuous mode(SAP) failed");
1062 goto error;
1063 }
1064 }
1065 if (get_config_bool_node_default(nvl, "promiscmulti", true)) {
1066 if ((ret = dlpi_promiscon(priv->bdp_dhp, DL_PROMISC_MULTI)) !=
1067 DLPI_SUCCESS) {
1068 be_dlpi_err(ret, vnic,
1069 "enable promiscuous mode(muticast) failed");
1070 goto error;
1071 }
1072 }
1073
1074 be->fd = dlpi_fd(priv->bdp_dhp);
1075
1076 if (fcntl(be->fd, F_SETFL, O_NONBLOCK) < 0) {
1077 WPRINTF(("%s: enable O_NONBLOCK failed", vnic));
1078 goto error;
1079 }
1080
1081 priv->bdp_mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
1082 if (priv->bdp_mevp == NULL) {
1083 WPRINTF(("Could not register event"));
1084 goto error;
1085 }
1086
1087 return (0);
1088
1089 error:
1090 be_dlpi_cleanup(be);
1091 return (-1);
1092 }
1093
1094 /*
1095 * Called to send a buffer chain out to the dlpi device
1096 */
1097 static ssize_t
be_dlpi_send(net_backend_t * be,const struct iovec * iov,int iovcnt)1098 be_dlpi_send(net_backend_t *be, const struct iovec *iov, int iovcnt)
1099 {
1100 be_dlpi_priv_t *priv = NET_BE_PRIV(be);
1101 ssize_t len = 0;
1102 int ret;
1103
1104 if (iovcnt == 1) {
1105 len = iov[0].iov_len;
1106 ret = dlpi_send(priv->bdp_dhp, NULL, 0, iov[0].iov_base, len,
1107 NULL);
1108 } else {
1109 void *buf = NULL;
1110
1111 len = iov_to_buf(iov, iovcnt, &buf);
1112
1113 if (len <= 0 || buf == NULL)
1114 return (-1);
1115
1116 ret = dlpi_send(priv->bdp_dhp, NULL, 0, buf, len, NULL);
1117 free(buf);
1118 }
1119
1120 if (ret != DLPI_SUCCESS)
1121 return (-1);
1122
1123 return (len);
1124 }
1125
1126 static ssize_t
be_dlpi_peek_recvlen(net_backend_t * be)1127 be_dlpi_peek_recvlen(net_backend_t *be)
1128 {
1129 be_dlpi_priv_t *priv = NET_BE_PRIV(be);
1130 dlpi_recvinfo_t recv;
1131 size_t len;
1132 int ret;
1133
1134 /*
1135 * We already have a packet in the bounce buffer.
1136 * Just return its length.
1137 */
1138 if (priv->bdp_bbuflen > 0)
1139 return (priv->bdp_bbuflen);
1140
1141 /*
1142 * Read the next packet (if any) into the bounce buffer, so
1143 * that we get to know its length and we can return that
1144 * to the caller.
1145 */
1146 len = sizeof (priv->bdp_bbuf);
1147 ret = dlpi_recv(priv->bdp_dhp, NULL, NULL, priv->bdp_bbuf, &len,
1148 0, &recv);
1149 if (ret == DL_SYSERR) {
1150 if (errno == EWOULDBLOCK)
1151 return (0);
1152 return (-1);
1153 } else if (ret == DLPI_ETIMEDOUT) {
1154 return (0);
1155 } else if (ret != DLPI_SUCCESS) {
1156 return (-1);
1157 }
1158
1159 if (recv.dri_totmsglen > sizeof (priv->bdp_bbuf)) {
1160 EPRINTLN("DLPI bounce buffer was too small! - needed %x bytes",
1161 recv.dri_totmsglen);
1162 }
1163
1164 priv->bdp_bbuflen = len;
1165
1166 return (len);
1167 }
1168
1169 static ssize_t
be_dlpi_recv(net_backend_t * be,const struct iovec * iov,int iovcnt)1170 be_dlpi_recv(net_backend_t *be, const struct iovec *iov, int iovcnt)
1171 {
1172 be_dlpi_priv_t *priv = NET_BE_PRIV(be);
1173 size_t len;
1174 int ret;
1175
1176 if (priv->bdp_bbuflen > 0) {
1177 /*
1178 * A packet is available in the bounce buffer, so
1179 * we read it from there.
1180 */
1181 len = buf_to_iov(priv->bdp_bbuf, priv->bdp_bbuflen,
1182 iov, iovcnt, 0);
1183
1184 /* Mark the bounce buffer as empty. */
1185 priv->bdp_bbuflen = 0;
1186
1187 return (len);
1188 }
1189
1190 len = iov[0].iov_len;
1191 ret = dlpi_recv(priv->bdp_dhp, NULL, NULL,
1192 (uint8_t *)iov[0].iov_base, &len, 0, NULL);
1193 if (ret == DL_SYSERR) {
1194 if (errno == EWOULDBLOCK)
1195 return (0);
1196 return (-1);
1197 } else if (ret == DLPI_ETIMEDOUT) {
1198 return (0);
1199 } else if (ret != DLPI_SUCCESS) {
1200 return (-1);
1201 }
1202
1203 return (len);
1204 }
1205
1206 static void
be_dlpi_recv_enable(net_backend_t * be)1207 be_dlpi_recv_enable(net_backend_t *be)
1208 {
1209 be_dlpi_priv_t *priv = NET_BE_PRIV(be);
1210
1211 mevent_enable(priv->bdp_mevp);
1212 }
1213
1214 static void
be_dlpi_recv_disable(net_backend_t * be)1215 be_dlpi_recv_disable(net_backend_t *be)
1216 {
1217 be_dlpi_priv_t *priv = NET_BE_PRIV(be);
1218
1219 mevent_disable(priv->bdp_mevp);
1220 }
1221
1222 static uint64_t
be_dlpi_get_cap(net_backend_t * be)1223 be_dlpi_get_cap(net_backend_t *be)
1224 {
1225 return (0); /* no capabilities for now */
1226 }
1227
1228 static int
be_dlpi_set_cap(net_backend_t * be,uint64_t features,unsigned vnet_hdr_len)1229 be_dlpi_set_cap(net_backend_t *be, uint64_t features,
1230 unsigned vnet_hdr_len)
1231 {
1232 return ((features || vnet_hdr_len) ? -1 : 0);
1233 }
1234
1235 static int
be_dlpi_get_mac(net_backend_t * be,void * buf,size_t * buflen)1236 be_dlpi_get_mac(net_backend_t *be, void *buf, size_t *buflen)
1237 {
1238 be_dlpi_priv_t *priv = NET_BE_PRIV(be);
1239 uchar_t physaddr[DLPI_PHYSADDR_MAX];
1240 size_t physaddrlen = DLPI_PHYSADDR_MAX;
1241 int ret;
1242
1243 if ((ret = dlpi_get_physaddr(priv->bdp_dhp, DL_CURR_PHYS_ADDR,
1244 physaddr, &physaddrlen)) != DLPI_SUCCESS) {
1245 be_dlpi_err(ret, dlpi_linkname(priv->bdp_dhp),
1246 "read MAC address failed");
1247 return (EINVAL);
1248 }
1249
1250 if (physaddrlen != ETHERADDRL) {
1251 WPRINTF(("%s: bad MAC address len %d",
1252 dlpi_linkname(priv->bdp_dhp), physaddrlen));
1253 return (EINVAL);
1254 }
1255
1256 if (physaddrlen > *buflen) {
1257 WPRINTF(("%s: MAC address too long (%d bytes required)",
1258 dlpi_linkname(priv->bdp_dhp), physaddrlen));
1259 return (ENOMEM);
1260 }
1261
1262 *buflen = physaddrlen;
1263 memcpy(buf, physaddr, *buflen);
1264
1265 return (0);
1266 }
1267
1268 static struct net_backend dlpi_backend = {
1269 .prefix = "dlpi",
1270 .priv_size = sizeof(struct be_dlpi_priv),
1271 .init = be_dlpi_init,
1272 .cleanup = be_dlpi_cleanup,
1273 .send = be_dlpi_send,
1274 .peek_recvlen = be_dlpi_peek_recvlen,
1275 .recv = be_dlpi_recv,
1276 .recv_enable = be_dlpi_recv_enable,
1277 .recv_disable = be_dlpi_recv_disable,
1278 .get_cap = be_dlpi_get_cap,
1279 .set_cap = be_dlpi_set_cap,
1280 .get_mac = be_dlpi_get_mac,
1281 };
1282
1283 DATA_SET(net_backend_set, dlpi_backend);
1284
1285 #endif /* __FreeBSD__ */
1286
1287 #ifdef __FreeBSD__
1288 int
netbe_legacy_config(nvlist_t * nvl,const char * opts)1289 netbe_legacy_config(nvlist_t *nvl, const char *opts)
1290 {
1291 char *backend, *cp;
1292
1293 if (opts == NULL)
1294 return (0);
1295
1296 cp = strchr(opts, ',');
1297 if (cp == NULL) {
1298 set_config_value_node(nvl, "backend", opts);
1299 return (0);
1300 }
1301 backend = strndup(opts, cp - opts);
1302 set_config_value_node(nvl, "backend", backend);
1303 free(backend);
1304 return (pci_parse_legacy_config(nvl, cp + 1));
1305 }
1306 #else
1307 int
netbe_legacy_config(nvlist_t * nvl,const char * opts)1308 netbe_legacy_config(nvlist_t *nvl, const char *opts)
1309 {
1310 char *config, *name, *tofree, *value;
1311
1312 if (opts == NULL)
1313 return (0);
1314
1315 /* Default to the 'dlpi' backend - can still be overridden by opts */
1316 set_config_value_node(nvl, "backend", "dlpi");
1317 set_config_value_node(nvl, "type", "dlpi");
1318
1319 config = tofree = strdup(opts);
1320 if (config == NULL)
1321 err(4, "netbe_legacy_config strdup()");
1322 while ((name = strsep(&config, ",")) != NULL) {
1323 value = strchr(name, '=');
1324 if (value != NULL) {
1325 *value++ = '\0';
1326 set_config_value_node(nvl, name, value);
1327 } else {
1328 set_config_value_node(nvl, "vnic", name);
1329 }
1330 }
1331 free(tofree);
1332
1333 return (0);
1334 }
1335 #endif
1336
1337 /*
1338 * Initialize a backend and attach to the frontend.
1339 * This is called during frontend initialization.
1340 * @ret is a pointer to the backend to be initialized
1341 * @devname is the backend-name as supplied on the command line,
1342 * e.g. -s 2:0,frontend-name,backend-name[,other-args]
1343 * @cb is the receive callback supplied by the frontend,
1344 * and it is invoked in the event loop when a receive
1345 * event is generated in the hypervisor,
1346 * @param is a pointer to the frontend, and normally used as
1347 * the argument for the callback.
1348 */
1349 int
netbe_init(struct net_backend ** ret,nvlist_t * nvl,net_be_rxeof_t cb,void * param)1350 netbe_init(struct net_backend **ret, nvlist_t *nvl, net_be_rxeof_t cb,
1351 void *param)
1352 {
1353 struct net_backend **pbe, *nbe, *tbe = NULL;
1354 const char *value, *type;
1355 char *devname;
1356 int err;
1357
1358 value = get_config_value_node(nvl, "backend");
1359 if (value == NULL) {
1360 return (-1);
1361 }
1362 devname = strdup(value);
1363
1364 /*
1365 * Use the type given by configuration if exists; otherwise
1366 * use the prefix of the backend as the type.
1367 */
1368 type = get_config_value_node(nvl, "type");
1369 if (type == NULL)
1370 type = devname;
1371
1372 /*
1373 * Find the network backend that matches the user-provided
1374 * device name. net_backend_set is built using a linker set.
1375 */
1376 SET_FOREACH(pbe, net_backend_set) {
1377 if (strncmp(type, (*pbe)->prefix,
1378 strlen((*pbe)->prefix)) == 0) {
1379 tbe = *pbe;
1380 assert(tbe->init != NULL);
1381 assert(tbe->cleanup != NULL);
1382 assert(tbe->send != NULL);
1383 assert(tbe->recv != NULL);
1384 assert(tbe->get_cap != NULL);
1385 assert(tbe->set_cap != NULL);
1386 break;
1387 }
1388 }
1389
1390 *ret = NULL;
1391 if (tbe == NULL) {
1392 free(devname);
1393 return (EINVAL);
1394 }
1395
1396 nbe = calloc(1, NET_BE_SIZE(tbe));
1397 *nbe = *tbe; /* copy the template */
1398 nbe->fd = -1;
1399 nbe->sc = param;
1400 nbe->be_vnet_hdr_len = 0;
1401 nbe->fe_vnet_hdr_len = 0;
1402
1403 /* Initialize the backend. */
1404 err = nbe->init(nbe, devname, nvl, cb, param);
1405 if (err) {
1406 free(devname);
1407 free(nbe);
1408 return (err);
1409 }
1410
1411 *ret = nbe;
1412 free(devname);
1413
1414 return (0);
1415 }
1416
1417 void
netbe_cleanup(struct net_backend * be)1418 netbe_cleanup(struct net_backend *be)
1419 {
1420
1421 if (be != NULL) {
1422 be->cleanup(be);
1423 free(be);
1424 }
1425 }
1426
1427 uint64_t
netbe_get_cap(struct net_backend * be)1428 netbe_get_cap(struct net_backend *be)
1429 {
1430
1431 assert(be != NULL);
1432 return (be->get_cap(be));
1433 }
1434
1435 int
netbe_set_cap(struct net_backend * be,uint64_t features,unsigned vnet_hdr_len)1436 netbe_set_cap(struct net_backend *be, uint64_t features,
1437 unsigned vnet_hdr_len)
1438 {
1439 int ret;
1440
1441 assert(be != NULL);
1442
1443 /* There are only three valid lengths, i.e., 0, 10 and 12. */
1444 if (vnet_hdr_len && vnet_hdr_len != VNET_HDR_LEN
1445 && vnet_hdr_len != (VNET_HDR_LEN - sizeof(uint16_t)))
1446 return (-1);
1447
1448 be->fe_vnet_hdr_len = vnet_hdr_len;
1449
1450 ret = be->set_cap(be, features, vnet_hdr_len);
1451 assert(be->be_vnet_hdr_len == 0 ||
1452 be->be_vnet_hdr_len == be->fe_vnet_hdr_len);
1453
1454 return (ret);
1455 }
1456
1457 ssize_t
netbe_send(struct net_backend * be,const struct iovec * iov,int iovcnt)1458 netbe_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
1459 {
1460
1461 return (be->send(be, iov, iovcnt));
1462 }
1463
1464 ssize_t
netbe_peek_recvlen(struct net_backend * be)1465 netbe_peek_recvlen(struct net_backend *be)
1466 {
1467
1468 return (be->peek_recvlen(be));
1469 }
1470
1471 /*
1472 * Try to read a packet from the backend, without blocking.
1473 * If no packets are available, return 0. In case of success, return
1474 * the length of the packet just read. Return -1 in case of errors.
1475 */
1476 ssize_t
netbe_recv(struct net_backend * be,const struct iovec * iov,int iovcnt)1477 netbe_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
1478 {
1479
1480 return (be->recv(be, iov, iovcnt));
1481 }
1482
1483 /*
1484 * Read a packet from the backend and discard it.
1485 * Returns the size of the discarded packet or zero if no packet was available.
1486 * A negative error code is returned in case of read error.
1487 */
1488 ssize_t
netbe_rx_discard(struct net_backend * be)1489 netbe_rx_discard(struct net_backend *be)
1490 {
1491 /*
1492 * MP note: the dummybuf is only used to discard frames,
1493 * so there is no need for it to be per-vtnet or locked.
1494 * We only make it large enough for TSO-sized segment.
1495 */
1496 static uint8_t dummybuf[65536 + 64];
1497 struct iovec iov;
1498
1499 #ifdef __FreeBSD__
1500 iov.iov_base = dummybuf;
1501 #else
1502 iov.iov_base = (caddr_t)dummybuf;
1503 #endif
1504 iov.iov_len = sizeof(dummybuf);
1505
1506 return netbe_recv(be, &iov, 1);
1507 }
1508
1509 void
netbe_rx_disable(struct net_backend * be)1510 netbe_rx_disable(struct net_backend *be)
1511 {
1512
1513 return be->recv_disable(be);
1514 }
1515
1516 void
netbe_rx_enable(struct net_backend * be)1517 netbe_rx_enable(struct net_backend *be)
1518 {
1519
1520 return be->recv_enable(be);
1521 }
1522
1523 size_t
netbe_get_vnet_hdr_len(struct net_backend * be)1524 netbe_get_vnet_hdr_len(struct net_backend *be)
1525 {
1526
1527 return (be->be_vnet_hdr_len);
1528 }
1529
1530 #ifndef __FreeBSD__
1531 int
netbe_get_mac(net_backend_t * be,void * buf,size_t * buflen)1532 netbe_get_mac(net_backend_t *be, void *buf, size_t *buflen)
1533 {
1534 if (be->get_mac == NULL)
1535 return (ENOTSUP);
1536 return (be->get_mac(be, buf, buflen));
1537 }
1538 #endif
1539