xref: /illumos-gate/usr/src/cmd/bhyve/net_backends.c (revision 069b2ef0d51cd626922df94af789ca0dc322222d)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2019 Vincenzo Maffione <vmaffione@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
19  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
20  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
24  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  * $FreeBSD$
28  */
29 
30 /*
31  * This file implements multiple network backends (tap, netmap, ...),
32  * to be used by network frontends such as virtio-net and e1000.
33  * The API to access the backend (e.g. send/receive packets, negotiate
34  * features) is exported by net_backends.h.
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include <sys/types.h>		/* u_short etc */
41 #ifndef WITHOUT_CAPSICUM
42 #include <sys/capsicum.h>
43 #endif
44 #include <sys/ioctl.h>
45 #include <sys/mman.h>
46 #include <sys/uio.h>
47 
48 #include <net/if.h>
49 #ifdef __FreeBSD__
50 #include <net/netmap.h>
51 #include <net/netmap_virt.h>
52 #define NETMAP_WITH_LIBS
53 #include <net/netmap_user.h>
54 #endif
55 
56 #ifndef WITHOUT_CAPSICUM
57 #include <capsicum_helpers.h>
58 #endif
59 #include <err.h>
60 #include <errno.h>
61 #include <fcntl.h>
62 #include <stdio.h>
63 #include <stdlib.h>
64 #include <stdint.h>
65 #include <string.h>
66 #include <unistd.h>
67 #include <sysexits.h>
68 #include <assert.h>
69 #include <pthread.h>
70 #include <pthread_np.h>
71 #include <poll.h>
72 #include <assert.h>
73 
74 #ifdef NETGRAPH
75 #include <sys/param.h>
76 #include <sys/sysctl.h>
77 #include <netgraph.h>
78 #endif
79 
80 #ifndef __FreeBSD__
81 #include <libdlpi.h>
82 #include <net/ethernet.h>
83 #endif
84 
85 #include "config.h"
86 #include "debug.h"
87 #include "iov.h"
88 #include "mevent.h"
89 #include "net_backends.h"
90 #include "pci_emul.h"
91 
92 #include <sys/linker_set.h>
93 
94 /*
95  * Each network backend registers a set of function pointers that are
96  * used to implement the net backends API.
97  * This might need to be exposed if we implement backends in separate files.
98  */
99 struct net_backend {
100 	const char *prefix;	/* prefix matching this backend */
101 
102 	/*
103 	 * Routines used to initialize and cleanup the resources needed
104 	 * by a backend. The cleanup function is used internally,
105 	 * and should not be called by the frontend.
106 	 */
107 	int (*init)(struct net_backend *be, const char *devname,
108 	    nvlist_t *nvl, net_be_rxeof_t cb, void *param);
109 	void (*cleanup)(struct net_backend *be);
110 
111 	/*
112 	 * Called to serve a guest transmit request. The scatter-gather
113 	 * vector provided by the caller has 'iovcnt' elements and contains
114 	 * the packet to send.
115 	 */
116 	ssize_t (*send)(struct net_backend *be, const struct iovec *iov,
117 	    int iovcnt);
118 
119 	/*
120 	 * Get the length of the next packet that can be received from
121 	 * the backend. If no packets are currently available, this
122 	 * function returns 0.
123 	 */
124 	ssize_t (*peek_recvlen)(struct net_backend *be);
125 
126 	/*
127 	 * Called to receive a packet from the backend. When the function
128 	 * returns a positive value 'len', the scatter-gather vector
129 	 * provided by the caller contains a packet with such length.
130 	 * The function returns 0 if the backend doesn't have a new packet to
131 	 * receive.
132 	 */
133 	ssize_t (*recv)(struct net_backend *be, const struct iovec *iov,
134 	    int iovcnt);
135 
136 	/*
137 	 * Ask the backend to enable or disable receive operation in the
138 	 * backend. On return from a disable operation, it is guaranteed
139 	 * that the receive callback won't be called until receive is
140 	 * enabled again. Note however that it is up to the caller to make
141 	 * sure that netbe_recv() is not currently being executed by another
142 	 * thread.
143 	 */
144 	void (*recv_enable)(struct net_backend *be);
145 	void (*recv_disable)(struct net_backend *be);
146 
147 	/*
148 	 * Ask the backend for the virtio-net features it is able to
149 	 * support. Possible features are TSO, UFO and checksum offloading
150 	 * in both rx and tx direction and for both IPv4 and IPv6.
151 	 */
152 	uint64_t (*get_cap)(struct net_backend *be);
153 
154 	/*
155 	 * Tell the backend to enable/disable the specified virtio-net
156 	 * features (capabilities).
157 	 */
158 	int (*set_cap)(struct net_backend *be, uint64_t features,
159 	    unsigned int vnet_hdr_len);
160 
161 #ifndef __FreeBSD__
162 	int (*get_mac)(struct net_backend *be, void *, size_t *);
163 #endif
164 
165 	struct pci_vtnet_softc *sc;
166 	int fd;
167 
168 	/*
169 	 * Length of the virtio-net header used by the backend and the
170 	 * frontend, respectively. A zero value means that the header
171 	 * is not used.
172 	 */
173 	unsigned int be_vnet_hdr_len;
174 	unsigned int fe_vnet_hdr_len;
175 
176 	/* Size of backend-specific private data. */
177 	size_t priv_size;
178 
179 	/* Room for backend-specific data. */
180 	char opaque[0];
181 };
182 
183 SET_DECLARE(net_backend_set, struct net_backend);
184 
185 #define VNET_HDR_LEN	sizeof(struct virtio_net_rxhdr)
186 
187 #define WPRINTF(params) PRINTLN params
188 
189 #ifdef __FreeBSD__
190 
191 /*
192  * The tap backend
193  */
194 
195 struct tap_priv {
196 	struct mevent *mevp;
197 	/*
198 	 * A bounce buffer that allows us to implement the peek_recvlen
199 	 * callback. In the future we may get the same information from
200 	 * the kevent data.
201 	 */
202 	char bbuf[1 << 16];
203 	ssize_t bbuflen;
204 };
205 
206 static void
207 tap_cleanup(struct net_backend *be)
208 {
209 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
210 
211 	if (priv->mevp) {
212 		mevent_delete(priv->mevp);
213 	}
214 	if (be->fd != -1) {
215 		close(be->fd);
216 		be->fd = -1;
217 	}
218 }
219 
220 static int
221 tap_init(struct net_backend *be, const char *devname,
222 	 nvlist_t *nvl, net_be_rxeof_t cb, void *param)
223 {
224 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
225 	char tbuf[80];
226 	int opt = 1;
227 #ifndef WITHOUT_CAPSICUM
228 	cap_rights_t rights;
229 #endif
230 
231 	if (cb == NULL) {
232 		WPRINTF(("TAP backend requires non-NULL callback"));
233 		return (-1);
234 	}
235 
236 	strcpy(tbuf, "/dev/");
237 	strlcat(tbuf, devname, sizeof(tbuf));
238 
239 	be->fd = open(tbuf, O_RDWR);
240 	if (be->fd == -1) {
241 		WPRINTF(("open of tap device %s failed", tbuf));
242 		goto error;
243 	}
244 
245 	/*
246 	 * Set non-blocking and register for read
247 	 * notifications with the event loop
248 	 */
249 	if (ioctl(be->fd, FIONBIO, &opt) < 0) {
250 		WPRINTF(("tap device O_NONBLOCK failed"));
251 		goto error;
252 	}
253 
254 #ifndef WITHOUT_CAPSICUM
255 	cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
256 	if (caph_rights_limit(be->fd, &rights) == -1)
257 		errx(EX_OSERR, "Unable to apply rights for sandbox");
258 #endif
259 
260 	memset(priv->bbuf, 0, sizeof(priv->bbuf));
261 	priv->bbuflen = 0;
262 
263 	priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
264 	if (priv->mevp == NULL) {
265 		WPRINTF(("Could not register event"));
266 		goto error;
267 	}
268 
269 	return (0);
270 
271 error:
272 	tap_cleanup(be);
273 	return (-1);
274 }
275 
276 /*
277  * Called to send a buffer chain out to the tap device
278  */
279 static ssize_t
280 tap_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
281 {
282 	return (writev(be->fd, iov, iovcnt));
283 }
284 
285 static ssize_t
286 tap_peek_recvlen(struct net_backend *be)
287 {
288 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
289 	ssize_t ret;
290 
291 	if (priv->bbuflen > 0) {
292 		/*
293 		 * We already have a packet in the bounce buffer.
294 		 * Just return its length.
295 		 */
296 		return priv->bbuflen;
297 	}
298 
299 	/*
300 	 * Read the next packet (if any) into the bounce buffer, so
301 	 * that we get to know its length and we can return that
302 	 * to the caller.
303 	 */
304 	ret = read(be->fd, priv->bbuf, sizeof(priv->bbuf));
305 	if (ret < 0 && errno == EWOULDBLOCK) {
306 		return (0);
307 	}
308 
309 	if (ret > 0)
310 		priv->bbuflen = ret;
311 
312 	return (ret);
313 }
314 
315 static ssize_t
316 tap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
317 {
318 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
319 	ssize_t ret;
320 
321 	if (priv->bbuflen > 0) {
322 		/*
323 		 * A packet is available in the bounce buffer, so
324 		 * we read it from there.
325 		 */
326 		ret = buf_to_iov(priv->bbuf, priv->bbuflen,
327 		    iov, iovcnt, 0);
328 
329 		/* Mark the bounce buffer as empty. */
330 		priv->bbuflen = 0;
331 
332 		return (ret);
333 	}
334 
335 	ret = readv(be->fd, iov, iovcnt);
336 	if (ret < 0 && errno == EWOULDBLOCK) {
337 		return (0);
338 	}
339 
340 	return (ret);
341 }
342 
343 static void
344 tap_recv_enable(struct net_backend *be)
345 {
346 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
347 
348 	mevent_enable(priv->mevp);
349 }
350 
351 static void
352 tap_recv_disable(struct net_backend *be)
353 {
354 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
355 
356 	mevent_disable(priv->mevp);
357 }
358 
359 static uint64_t
360 tap_get_cap(struct net_backend *be)
361 {
362 
363 	return (0); /* no capabilities for now */
364 }
365 
366 static int
367 tap_set_cap(struct net_backend *be, uint64_t features,
368 		unsigned vnet_hdr_len)
369 {
370 
371 	return ((features || vnet_hdr_len) ? -1 : 0);
372 }
373 
374 static struct net_backend tap_backend = {
375 	.prefix = "tap",
376 	.priv_size = sizeof(struct tap_priv),
377 	.init = tap_init,
378 	.cleanup = tap_cleanup,
379 	.send = tap_send,
380 	.peek_recvlen = tap_peek_recvlen,
381 	.recv = tap_recv,
382 	.recv_enable = tap_recv_enable,
383 	.recv_disable = tap_recv_disable,
384 	.get_cap = tap_get_cap,
385 	.set_cap = tap_set_cap,
386 };
387 
388 /* A clone of the tap backend, with a different prefix. */
389 static struct net_backend vmnet_backend = {
390 	.prefix = "vmnet",
391 	.priv_size = sizeof(struct tap_priv),
392 	.init = tap_init,
393 	.cleanup = tap_cleanup,
394 	.send = tap_send,
395 	.peek_recvlen = tap_peek_recvlen,
396 	.recv = tap_recv,
397 	.recv_enable = tap_recv_enable,
398 	.recv_disable = tap_recv_disable,
399 	.get_cap = tap_get_cap,
400 	.set_cap = tap_set_cap,
401 };
402 
403 DATA_SET(net_backend_set, tap_backend);
404 DATA_SET(net_backend_set, vmnet_backend);
405 
406 #ifdef NETGRAPH
407 
408 /*
409  * Netgraph backend
410  */
411 
412 #define NG_SBUF_MAX_SIZE (4 * 1024 * 1024)
413 
414 static int
415 ng_init(struct net_backend *be, const char *devname,
416 	 nvlist_t *nvl, net_be_rxeof_t cb, void *param)
417 {
418 	struct tap_priv *p = (struct tap_priv *)be->opaque;
419 	struct ngm_connect ngc;
420 	const char *value, *nodename;
421 	int sbsz;
422 	int ctrl_sock;
423 	int flags;
424 	unsigned long maxsbsz;
425 	size_t msbsz;
426 #ifndef WITHOUT_CAPSICUM
427 	cap_rights_t rights;
428 #endif
429 
430 	if (cb == NULL) {
431 		WPRINTF(("Netgraph backend requires non-NULL callback"));
432 		return (-1);
433 	}
434 
435 	be->fd = -1;
436 
437 	memset(&ngc, 0, sizeof(ngc));
438 
439 	value = get_config_value_node(nvl, "path");
440 	if (value == NULL) {
441 		WPRINTF(("path must be provided"));
442 		return (-1);
443 	}
444 	strncpy(ngc.path, value, NG_PATHSIZ - 1);
445 
446 	value = get_config_value_node(nvl, "hook");
447 	if (value == NULL)
448 		value = "vmlink";
449 	strncpy(ngc.ourhook, value, NG_HOOKSIZ - 1);
450 
451 	value = get_config_value_node(nvl, "peerhook");
452 	if (value == NULL) {
453 		WPRINTF(("peer hook must be provided"));
454 		return (-1);
455 	}
456 	strncpy(ngc.peerhook, value, NG_HOOKSIZ - 1);
457 
458 	nodename = get_config_value_node(nvl, "socket");
459 	if (NgMkSockNode(nodename,
460 		&ctrl_sock, &be->fd) < 0) {
461 		WPRINTF(("can't get Netgraph sockets"));
462 		return (-1);
463 	}
464 
465 	if (NgSendMsg(ctrl_sock, ".",
466 		NGM_GENERIC_COOKIE,
467 		NGM_CONNECT, &ngc, sizeof(ngc)) < 0) {
468 		WPRINTF(("can't connect to node"));
469 		close(ctrl_sock);
470 		goto error;
471 	}
472 
473 	close(ctrl_sock);
474 
475 	flags = fcntl(be->fd, F_GETFL);
476 
477 	if (flags < 0) {
478 		WPRINTF(("can't get socket flags"));
479 		goto error;
480 	}
481 
482 	if (fcntl(be->fd, F_SETFL, flags | O_NONBLOCK) < 0) {
483 		WPRINTF(("can't set O_NONBLOCK flag"));
484 		goto error;
485 	}
486 
487 	/*
488 	 * The default ng_socket(4) buffer's size is too low.
489 	 * Calculate the minimum value between NG_SBUF_MAX_SIZE
490 	 * and kern.ipc.maxsockbuf.
491 	 */
492 	msbsz = sizeof(maxsbsz);
493 	if (sysctlbyname("kern.ipc.maxsockbuf", &maxsbsz, &msbsz,
494 		NULL, 0) < 0) {
495 		WPRINTF(("can't get 'kern.ipc.maxsockbuf' value"));
496 		goto error;
497 	}
498 
499 	/*
500 	 * We can't set the socket buffer size to kern.ipc.maxsockbuf value,
501 	 * as it takes into account the mbuf(9) overhead.
502 	 */
503 	maxsbsz = maxsbsz * MCLBYTES / (MSIZE + MCLBYTES);
504 
505 	sbsz = MIN(NG_SBUF_MAX_SIZE, maxsbsz);
506 
507 	if (setsockopt(be->fd, SOL_SOCKET, SO_SNDBUF, &sbsz,
508 		sizeof(sbsz)) < 0) {
509 		WPRINTF(("can't set TX buffer size"));
510 		goto error;
511 	}
512 
513 	if (setsockopt(be->fd, SOL_SOCKET, SO_RCVBUF, &sbsz,
514 		sizeof(sbsz)) < 0) {
515 		WPRINTF(("can't set RX buffer size"));
516 		goto error;
517 	}
518 
519 #ifndef WITHOUT_CAPSICUM
520 	cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
521 	if (caph_rights_limit(be->fd, &rights) == -1)
522 		errx(EX_OSERR, "Unable to apply rights for sandbox");
523 #endif
524 
525 	memset(p->bbuf, 0, sizeof(p->bbuf));
526 	p->bbuflen = 0;
527 
528 	p->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
529 	if (p->mevp == NULL) {
530 		WPRINTF(("Could not register event"));
531 		goto error;
532 	}
533 
534 	return (0);
535 
536 error:
537 	tap_cleanup(be);
538 	return (-1);
539 }
540 
541 static struct net_backend ng_backend = {
542 	.prefix = "netgraph",
543 	.priv_size = sizeof(struct tap_priv),
544 	.init = ng_init,
545 	.cleanup = tap_cleanup,
546 	.send = tap_send,
547 	.peek_recvlen = tap_peek_recvlen,
548 	.recv = tap_recv,
549 	.recv_enable = tap_recv_enable,
550 	.recv_disable = tap_recv_disable,
551 	.get_cap = tap_get_cap,
552 	.set_cap = tap_set_cap,
553 };
554 
555 DATA_SET(net_backend_set, ng_backend);
556 
557 #endif /* NETGRAPH */
558 
559 /*
560  * The netmap backend
561  */
562 
563 /* The virtio-net features supported by netmap. */
564 #define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \
565 		VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \
566 		VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \
567 		VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO)
568 
569 struct netmap_priv {
570 	char ifname[IFNAMSIZ];
571 	struct nm_desc *nmd;
572 	uint16_t memid;
573 	struct netmap_ring *rx;
574 	struct netmap_ring *tx;
575 	struct mevent *mevp;
576 	net_be_rxeof_t cb;
577 	void *cb_param;
578 };
579 
580 static void
581 nmreq_init(struct nmreq *req, char *ifname)
582 {
583 
584 	memset(req, 0, sizeof(*req));
585 	strlcpy(req->nr_name, ifname, sizeof(req->nr_name));
586 	req->nr_version = NETMAP_API;
587 }
588 
589 static int
590 netmap_set_vnet_hdr_len(struct net_backend *be, int vnet_hdr_len)
591 {
592 	int err;
593 	struct nmreq req;
594 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
595 
596 	nmreq_init(&req, priv->ifname);
597 	req.nr_cmd = NETMAP_BDG_VNET_HDR;
598 	req.nr_arg1 = vnet_hdr_len;
599 	err = ioctl(be->fd, NIOCREGIF, &req);
600 	if (err) {
601 		WPRINTF(("Unable to set vnet header length %d",
602 				vnet_hdr_len));
603 		return (err);
604 	}
605 
606 	be->be_vnet_hdr_len = vnet_hdr_len;
607 
608 	return (0);
609 }
610 
611 static int
612 netmap_has_vnet_hdr_len(struct net_backend *be, unsigned vnet_hdr_len)
613 {
614 	int prev_hdr_len = be->be_vnet_hdr_len;
615 	int ret;
616 
617 	if (vnet_hdr_len == prev_hdr_len) {
618 		return (1);
619 	}
620 
621 	ret = netmap_set_vnet_hdr_len(be, vnet_hdr_len);
622 	if (ret) {
623 		return (0);
624 	}
625 
626 	netmap_set_vnet_hdr_len(be, prev_hdr_len);
627 
628 	return (1);
629 }
630 
631 static uint64_t
632 netmap_get_cap(struct net_backend *be)
633 {
634 
635 	return (netmap_has_vnet_hdr_len(be, VNET_HDR_LEN) ?
636 	    NETMAP_FEATURES : 0);
637 }
638 
639 static int
640 netmap_set_cap(struct net_backend *be, uint64_t features,
641 	       unsigned vnet_hdr_len)
642 {
643 
644 	return (netmap_set_vnet_hdr_len(be, vnet_hdr_len));
645 }
646 
647 static int
648 netmap_init(struct net_backend *be, const char *devname,
649 	    nvlist_t *nvl, net_be_rxeof_t cb, void *param)
650 {
651 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
652 
653 	strlcpy(priv->ifname, devname, sizeof(priv->ifname));
654 	priv->ifname[sizeof(priv->ifname) - 1] = '\0';
655 
656 	priv->nmd = nm_open(priv->ifname, NULL, NETMAP_NO_TX_POLL, NULL);
657 	if (priv->nmd == NULL) {
658 		WPRINTF(("Unable to nm_open(): interface '%s', errno (%s)",
659 			devname, strerror(errno)));
660 		free(priv);
661 		return (-1);
662 	}
663 
664 	priv->memid = priv->nmd->req.nr_arg2;
665 	priv->tx = NETMAP_TXRING(priv->nmd->nifp, 0);
666 	priv->rx = NETMAP_RXRING(priv->nmd->nifp, 0);
667 	priv->cb = cb;
668 	priv->cb_param = param;
669 	be->fd = priv->nmd->fd;
670 
671 	priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
672 	if (priv->mevp == NULL) {
673 		WPRINTF(("Could not register event"));
674 		return (-1);
675 	}
676 
677 	return (0);
678 }
679 
680 static void
681 netmap_cleanup(struct net_backend *be)
682 {
683 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
684 
685 	if (priv->mevp) {
686 		mevent_delete(priv->mevp);
687 	}
688 	if (priv->nmd) {
689 		nm_close(priv->nmd);
690 	}
691 	be->fd = -1;
692 }
693 
694 static ssize_t
695 netmap_send(struct net_backend *be, const struct iovec *iov,
696 	    int iovcnt)
697 {
698 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
699 	struct netmap_ring *ring;
700 	ssize_t totlen = 0;
701 	int nm_buf_size;
702 	int nm_buf_len;
703 	uint32_t head;
704 	void *nm_buf;
705 	int j;
706 
707 	ring = priv->tx;
708 	head = ring->head;
709 	if (head == ring->tail) {
710 		WPRINTF(("No space, drop %zu bytes", count_iov(iov, iovcnt)));
711 		goto txsync;
712 	}
713 	nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
714 	nm_buf_size = ring->nr_buf_size;
715 	nm_buf_len = 0;
716 
717 	for (j = 0; j < iovcnt; j++) {
718 		int iov_frag_size = iov[j].iov_len;
719 		void *iov_frag_buf = iov[j].iov_base;
720 
721 		totlen += iov_frag_size;
722 
723 		/*
724 		 * Split each iovec fragment over more netmap slots, if
725 		 * necessary.
726 		 */
727 		for (;;) {
728 			int copylen;
729 
730 			copylen = iov_frag_size < nm_buf_size ? iov_frag_size : nm_buf_size;
731 			memcpy(nm_buf, iov_frag_buf, copylen);
732 
733 			iov_frag_buf += copylen;
734 			iov_frag_size -= copylen;
735 			nm_buf += copylen;
736 			nm_buf_size -= copylen;
737 			nm_buf_len += copylen;
738 
739 			if (iov_frag_size == 0) {
740 				break;
741 			}
742 
743 			ring->slot[head].len = nm_buf_len;
744 			ring->slot[head].flags = NS_MOREFRAG;
745 			head = nm_ring_next(ring, head);
746 			if (head == ring->tail) {
747 				/*
748 				 * We ran out of netmap slots while
749 				 * splitting the iovec fragments.
750 				 */
751 				WPRINTF(("No space, drop %zu bytes",
752 				   count_iov(iov, iovcnt)));
753 				goto txsync;
754 			}
755 			nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
756 			nm_buf_size = ring->nr_buf_size;
757 			nm_buf_len = 0;
758 		}
759 	}
760 
761 	/* Complete the last slot, which must not have NS_MOREFRAG set. */
762 	ring->slot[head].len = nm_buf_len;
763 	ring->slot[head].flags = 0;
764 	head = nm_ring_next(ring, head);
765 
766 	/* Now update ring->head and ring->cur. */
767 	ring->head = ring->cur = head;
768 txsync:
769 	ioctl(be->fd, NIOCTXSYNC, NULL);
770 
771 	return (totlen);
772 }
773 
774 static ssize_t
775 netmap_peek_recvlen(struct net_backend *be)
776 {
777 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
778 	struct netmap_ring *ring = priv->rx;
779 	uint32_t head = ring->head;
780 	ssize_t totlen = 0;
781 
782 	while (head != ring->tail) {
783 		struct netmap_slot *slot = ring->slot + head;
784 
785 		totlen += slot->len;
786 		if ((slot->flags & NS_MOREFRAG) == 0)
787 			break;
788 		head = nm_ring_next(ring, head);
789 	}
790 
791 	return (totlen);
792 }
793 
794 static ssize_t
795 netmap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
796 {
797 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
798 	struct netmap_slot *slot = NULL;
799 	struct netmap_ring *ring;
800 	void *iov_frag_buf;
801 	int iov_frag_size;
802 	ssize_t totlen = 0;
803 	uint32_t head;
804 
805 	assert(iovcnt);
806 
807 	ring = priv->rx;
808 	head = ring->head;
809 	iov_frag_buf = iov->iov_base;
810 	iov_frag_size = iov->iov_len;
811 
812 	do {
813 		int nm_buf_len;
814 		void *nm_buf;
815 
816 		if (head == ring->tail) {
817 			return (0);
818 		}
819 
820 		slot = ring->slot + head;
821 		nm_buf = NETMAP_BUF(ring, slot->buf_idx);
822 		nm_buf_len = slot->len;
823 
824 		for (;;) {
825 			int copylen = nm_buf_len < iov_frag_size ?
826 			    nm_buf_len : iov_frag_size;
827 
828 			memcpy(iov_frag_buf, nm_buf, copylen);
829 			nm_buf += copylen;
830 			nm_buf_len -= copylen;
831 			iov_frag_buf += copylen;
832 			iov_frag_size -= copylen;
833 			totlen += copylen;
834 
835 			if (nm_buf_len == 0) {
836 				break;
837 			}
838 
839 			iov++;
840 			iovcnt--;
841 			if (iovcnt == 0) {
842 				/* No space to receive. */
843 				WPRINTF(("Short iov, drop %zd bytes",
844 				    totlen));
845 				return (-ENOSPC);
846 			}
847 			iov_frag_buf = iov->iov_base;
848 			iov_frag_size = iov->iov_len;
849 		}
850 
851 		head = nm_ring_next(ring, head);
852 
853 	} while (slot->flags & NS_MOREFRAG);
854 
855 	/* Release slots to netmap. */
856 	ring->head = ring->cur = head;
857 
858 	return (totlen);
859 }
860 
861 static void
862 netmap_recv_enable(struct net_backend *be)
863 {
864 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
865 
866 	mevent_enable(priv->mevp);
867 }
868 
869 static void
870 netmap_recv_disable(struct net_backend *be)
871 {
872 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
873 
874 	mevent_disable(priv->mevp);
875 }
876 
877 static struct net_backend netmap_backend = {
878 	.prefix = "netmap",
879 	.priv_size = sizeof(struct netmap_priv),
880 	.init = netmap_init,
881 	.cleanup = netmap_cleanup,
882 	.send = netmap_send,
883 	.peek_recvlen = netmap_peek_recvlen,
884 	.recv = netmap_recv,
885 	.recv_enable = netmap_recv_enable,
886 	.recv_disable = netmap_recv_disable,
887 	.get_cap = netmap_get_cap,
888 	.set_cap = netmap_set_cap,
889 };
890 
891 /* A clone of the netmap backend, with a different prefix. */
892 static struct net_backend vale_backend = {
893 	.prefix = "vale",
894 	.priv_size = sizeof(struct netmap_priv),
895 	.init = netmap_init,
896 	.cleanup = netmap_cleanup,
897 	.send = netmap_send,
898 	.peek_recvlen = netmap_peek_recvlen,
899 	.recv = netmap_recv,
900 	.recv_enable = netmap_recv_enable,
901 	.recv_disable = netmap_recv_disable,
902 	.get_cap = netmap_get_cap,
903 	.set_cap = netmap_set_cap,
904 };
905 
906 DATA_SET(net_backend_set, netmap_backend);
907 DATA_SET(net_backend_set, vale_backend);
908 
909 #else /* __FreeBSD__ */
910 
911 /*
912  * The illumos dlpi backend
913  */
914 
915 /*
916  * The size of the bounce buffer used to implement the peek callback.
917  * This value should be big enough to accommodate the largest of all possible
918  * frontend packet lengths. The value here matches the definition of
919  * VTNET_MAX_PKT_LEN in pci_virtio_net.c
920  */
921 #define	DLPI_BBUF_SIZE (65536 + 64)
922 
923 typedef struct be_dlpi_priv {
924 	dlpi_handle_t bdp_dhp;
925 	struct mevent *bdp_mevp;
926 	/*
927 	 * A bounce buffer that allows us to implement the peek_recvlen
928 	 * callback. Each structure is only used by a single thread so
929 	 * one is enough.
930 	 */
931 	uint8_t bdp_bbuf[DLPI_BBUF_SIZE];
932 	ssize_t bdp_bbuflen;
933 } be_dlpi_priv_t;
934 
935 static void
936 be_dlpi_cleanup(net_backend_t *be)
937 {
938 	be_dlpi_priv_t *priv = (be_dlpi_priv_t *)be->opaque;
939 
940 	if (priv->bdp_dhp != NULL)
941 		dlpi_close(priv->bdp_dhp);
942 	priv->bdp_dhp = NULL;
943 
944 	if (priv->bdp_mevp != NULL)
945 		mevent_delete(priv->bdp_mevp);
946 	priv->bdp_mevp = NULL;
947 
948 	priv->bdp_bbuflen = 0;
949 	be->fd = -1;
950 }
951 
952 static void
953 be_dlpi_err(int ret, const char *dev, char *msg)
954 {
955 	WPRINTF(("%s: %s (%s)", dev, msg, dlpi_strerror(ret)));
956 }
957 
958 static int
959 be_dlpi_init(net_backend_t *be, const char *devname __unused,
960      nvlist_t *nvl, net_be_rxeof_t cb, void *param)
961 {
962 	be_dlpi_priv_t *priv = (be_dlpi_priv_t *)be->opaque;
963 	const char *vnic;
964 	int ret;
965 
966 	if (cb == NULL) {
967 		WPRINTF(("dlpi backend requires non-NULL callback"));
968 		return (-1);
969 	}
970 
971 	vnic = get_config_value_node(nvl, "vnic");
972 	if (vnic == NULL) {
973 		WPRINTF(("dlpi backend requires a VNIC"));
974 		return (-1);
975 	}
976 
977 	priv->bdp_bbuflen = 0;
978 
979 	ret = dlpi_open(vnic, &priv->bdp_dhp, DLPI_RAW);
980 
981 	if (ret != DLPI_SUCCESS) {
982 		be_dlpi_err(ret, vnic, "open failed");
983 		goto error;
984 	}
985 
986 	if ((ret = dlpi_bind(priv->bdp_dhp, DLPI_ANY_SAP, NULL)) !=
987 	    DLPI_SUCCESS) {
988 		be_dlpi_err(ret, vnic, "bind failed");
989 		goto error;
990 	}
991 
992 	if (get_config_bool_node_default(nvl, "promiscrxonly", true)) {
993 		if ((ret = dlpi_promiscon(priv->bdp_dhp, DL_PROMISC_RX_ONLY)) !=
994 		    DLPI_SUCCESS) {
995 			be_dlpi_err(ret, vnic,
996 			    "enable promiscuous mode(rxonly) failed");
997 			goto error;
998 		}
999 	}
1000 	if (get_config_bool_node_default(nvl, "promiscphys", false)) {
1001 		if ((ret = dlpi_promiscon(priv->bdp_dhp, DL_PROMISC_PHYS)) !=
1002 		    DLPI_SUCCESS) {
1003 			be_dlpi_err(ret, vnic,
1004 			    "enable promiscuous mode(physical) failed");
1005 			goto error;
1006 		}
1007 	}
1008 	if (get_config_bool_node_default(nvl, "promiscsap", true)) {
1009 		if ((ret = dlpi_promiscon(priv->bdp_dhp, DL_PROMISC_SAP)) !=
1010 		    DLPI_SUCCESS) {
1011 			be_dlpi_err(ret, vnic,
1012 			    "enable promiscuous mode(SAP) failed");
1013 			goto error;
1014 		}
1015 	}
1016 	if (get_config_bool_node_default(nvl, "promiscmulti", true)) {
1017 		if ((ret = dlpi_promiscon(priv->bdp_dhp, DL_PROMISC_MULTI)) !=
1018 		    DLPI_SUCCESS) {
1019 			be_dlpi_err(ret, vnic,
1020 			    "enable promiscuous mode(muticast) failed");
1021 			goto error;
1022 		}
1023 	}
1024 
1025         be->fd = dlpi_fd(priv->bdp_dhp);
1026 
1027         if (fcntl(be->fd, F_SETFL, O_NONBLOCK) < 0) {
1028                 WPRINTF(("%s: enable O_NONBLOCK failed", vnic));
1029 		goto error;
1030         }
1031 
1032 	priv->bdp_mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
1033 	if (priv->bdp_mevp == NULL) {
1034 		WPRINTF(("Could not register event"));
1035 		goto error;
1036 	}
1037 
1038 	return (0);
1039 
1040 error:
1041 	be_dlpi_cleanup(be);
1042 	return (-1);
1043 }
1044 
1045 /*
1046  * Called to send a buffer chain out to the dlpi device
1047  */
1048 static ssize_t
1049 be_dlpi_send(net_backend_t *be, const struct iovec *iov, int iovcnt)
1050 {
1051 	be_dlpi_priv_t *priv = (be_dlpi_priv_t *)be->opaque;
1052 	ssize_t len = 0;
1053 	int ret;
1054 
1055 	if (iovcnt == 1) {
1056 		len = iov[0].iov_len;
1057 		ret = dlpi_send(priv->bdp_dhp, NULL, 0, iov[0].iov_base, len,
1058 		    NULL);
1059 	} else {
1060 		void *buf = NULL;
1061 
1062 		len = iov_to_buf(iov, iovcnt, &buf);
1063 
1064 		if (len <= 0 || buf == NULL)
1065 			return (-1);
1066 
1067 		ret = dlpi_send(priv->bdp_dhp, NULL, 0, buf, len, NULL);
1068 		free(buf);
1069 	}
1070 
1071 	if (ret != DLPI_SUCCESS)
1072 		return (-1);
1073 
1074 	return (len);
1075 }
1076 
1077 static ssize_t
1078 be_dlpi_peek_recvlen(net_backend_t *be)
1079 {
1080 	be_dlpi_priv_t *priv = (be_dlpi_priv_t *)be->opaque;
1081 	dlpi_recvinfo_t recv;
1082 	size_t len;
1083 	int ret;
1084 
1085 	/*
1086 	 * We already have a packet in the bounce buffer.
1087 	 * Just return its length.
1088 	 */
1089 	if (priv->bdp_bbuflen > 0)
1090 		return (priv->bdp_bbuflen);
1091 
1092 	/*
1093 	 * Read the next packet (if any) into the bounce buffer, so
1094 	 * that we get to know its length and we can return that
1095 	 * to the caller.
1096 	 */
1097 	len = sizeof (priv->bdp_bbuf);
1098 	ret = dlpi_recv(priv->bdp_dhp, NULL, NULL, priv->bdp_bbuf, &len,
1099 	    0, &recv);
1100 	if (ret == DL_SYSERR) {
1101 		if (errno == EWOULDBLOCK)
1102 			return (0);
1103 		return (-1);
1104 	} else if (ret == DLPI_ETIMEDOUT) {
1105 		return (0);
1106 	} else if (ret != DLPI_SUCCESS) {
1107 		return (-1);
1108 	}
1109 
1110 	if (recv.dri_totmsglen > sizeof (priv->bdp_bbuf)) {
1111 		EPRINTLN("DLPI bounce buffer was too small! - needed %x bytes",
1112 		    recv.dri_totmsglen);
1113 	}
1114 
1115 	priv->bdp_bbuflen = len;
1116 
1117 	return (len);
1118 }
1119 
1120 static ssize_t
1121 be_dlpi_recv(net_backend_t *be, const struct iovec *iov, int iovcnt)
1122 {
1123 	be_dlpi_priv_t *priv = (be_dlpi_priv_t *)be->opaque;
1124 	size_t len;
1125 	int ret;
1126 
1127 	if (priv->bdp_bbuflen > 0) {
1128 		/*
1129 		 * A packet is available in the bounce buffer, so
1130 		 * we read it from there.
1131 		 */
1132 		len = buf_to_iov(priv->bdp_bbuf, priv->bdp_bbuflen,
1133 		    iov, iovcnt, 0);
1134 
1135 		/* Mark the bounce buffer as empty. */
1136 		priv->bdp_bbuflen = 0;
1137 
1138 		return (len);
1139 	}
1140 
1141 	len = iov[0].iov_len;
1142 	ret = dlpi_recv(priv->bdp_dhp, NULL, NULL,
1143 	    (uint8_t *)iov[0].iov_base, &len, 0, NULL);
1144 	if (ret == DL_SYSERR) {
1145 		if (errno == EWOULDBLOCK)
1146 			return (0);
1147 		return (-1);
1148 	} else if (ret == DLPI_ETIMEDOUT) {
1149 		return (0);
1150 	} else if (ret != DLPI_SUCCESS) {
1151 		return (-1);
1152 	}
1153 
1154 	return (len);
1155 }
1156 
1157 static void
1158 be_dlpi_recv_enable(net_backend_t *be)
1159 {
1160 	be_dlpi_priv_t *priv = (be_dlpi_priv_t *)be->opaque;
1161 
1162 	mevent_enable(priv->bdp_mevp);
1163 }
1164 
1165 static void
1166 be_dlpi_recv_disable(net_backend_t *be)
1167 {
1168 	be_dlpi_priv_t *priv = (be_dlpi_priv_t *)be->opaque;
1169 
1170 	mevent_disable(priv->bdp_mevp);
1171 }
1172 
1173 static uint64_t
1174 be_dlpi_get_cap(net_backend_t *be)
1175 {
1176 	return (0); /* no capabilities for now */
1177 }
1178 
1179 static int
1180 be_dlpi_set_cap(net_backend_t *be, uint64_t features,
1181     unsigned vnet_hdr_len)
1182 {
1183 	return ((features || vnet_hdr_len) ? -1 : 0);
1184 }
1185 
1186 static int
1187 be_dlpi_get_mac(net_backend_t *be, void *buf, size_t *buflen)
1188 {
1189 	be_dlpi_priv_t *priv = (be_dlpi_priv_t *)be->opaque;
1190 	uchar_t physaddr[DLPI_PHYSADDR_MAX];
1191 	size_t physaddrlen = DLPI_PHYSADDR_MAX;
1192 	int ret;
1193 
1194 	if ((ret = dlpi_get_physaddr(priv->bdp_dhp, DL_CURR_PHYS_ADDR,
1195 	    physaddr, &physaddrlen)) != DLPI_SUCCESS) {
1196 		be_dlpi_err(ret, dlpi_linkname(priv->bdp_dhp),
1197 		    "read MAC address failed");
1198 		return (EINVAL);
1199 	}
1200 
1201 	if (physaddrlen != ETHERADDRL) {
1202 		WPRINTF(("%s: bad MAC address len %d",
1203 		    dlpi_linkname(priv->bdp_dhp), physaddrlen));
1204 		return (EINVAL);
1205 	}
1206 
1207 	if (physaddrlen > *buflen) {
1208 		WPRINTF(("%s: MAC address too long (%d bytes required)",
1209 		    dlpi_linkname(priv->bdp_dhp), physaddrlen));
1210 		return (ENOMEM);
1211 	}
1212 
1213 	*buflen = physaddrlen;
1214 	memcpy(buf, physaddr, *buflen);
1215 
1216 	return (0);
1217 }
1218 
1219 static struct net_backend dlpi_backend = {
1220 	.prefix = "dlpi",
1221 	.priv_size = sizeof(struct be_dlpi_priv),
1222 	.init = be_dlpi_init,
1223 	.cleanup = be_dlpi_cleanup,
1224 	.send = be_dlpi_send,
1225 	.peek_recvlen = be_dlpi_peek_recvlen,
1226 	.recv = be_dlpi_recv,
1227 	.recv_enable = be_dlpi_recv_enable,
1228 	.recv_disable = be_dlpi_recv_disable,
1229 	.get_cap = be_dlpi_get_cap,
1230 	.set_cap = be_dlpi_set_cap,
1231 	.get_mac = be_dlpi_get_mac,
1232 };
1233 
1234 DATA_SET(net_backend_set, dlpi_backend);
1235 
1236 #endif /* __FreeBSD__ */
1237 
1238 #ifdef __FreeBSD__
1239 int
1240 netbe_legacy_config(nvlist_t *nvl, const char *opts)
1241 {
1242 	char *backend, *cp;
1243 
1244 	if (opts == NULL)
1245 		return (0);
1246 
1247 	cp = strchr(opts, ',');
1248 	if (cp == NULL) {
1249 		set_config_value_node(nvl, "backend", opts);
1250 		return (0);
1251 	}
1252 	backend = strndup(opts, cp - opts);
1253 	set_config_value_node(nvl, "backend", backend);
1254 	free(backend);
1255 	return (pci_parse_legacy_config(nvl, cp + 1));
1256 }
1257 #else
1258 int
1259 netbe_legacy_config(nvlist_t *nvl, const char *opts)
1260 {
1261 	char *config, *name, *tofree, *value;
1262 
1263 	if (opts == NULL)
1264 		return (0);
1265 
1266 	/* Default to the 'dlpi' backend - can still be overridden by opts */
1267 	set_config_value_node(nvl, "backend", "dlpi");
1268 
1269 	config = tofree = strdup(opts);
1270 	if (config == NULL)
1271 		err(4, "netbe_legacy_config strdup()");
1272 	while ((name = strsep(&config, ",")) != NULL) {
1273 		value = strchr(name, '=');
1274 		if (value != NULL) {
1275 			*value++ = '\0';
1276 			set_config_value_node(nvl, name, value);
1277 		} else {
1278 			set_config_value_node(nvl, "vnic", name);
1279 		}
1280 	}
1281 	free(tofree);
1282 
1283 	return (0);
1284 }
1285 #endif
1286 
1287 /*
1288  * Initialize a backend and attach to the frontend.
1289  * This is called during frontend initialization.
1290  *  @ret is a pointer to the backend to be initialized
1291  *  @devname is the backend-name as supplied on the command line,
1292  * 	e.g. -s 2:0,frontend-name,backend-name[,other-args]
1293  *  @cb is the receive callback supplied by the frontend,
1294  *	and it is invoked in the event loop when a receive
1295  *	event is generated in the hypervisor,
1296  *  @param is a pointer to the frontend, and normally used as
1297  *	the argument for the callback.
1298  */
1299 int
1300 netbe_init(struct net_backend **ret, nvlist_t *nvl, net_be_rxeof_t cb,
1301     void *param)
1302 {
1303 	struct net_backend **pbe, *nbe, *tbe = NULL;
1304 	const char *value;
1305 	char *devname;
1306 	int err;
1307 
1308 	value = get_config_value_node(nvl, "backend");
1309 	if (value == NULL) {
1310 		return (-1);
1311 	}
1312 	devname = strdup(value);
1313 
1314 	/*
1315 	 * Find the network backend that matches the user-provided
1316 	 * device name. net_backend_set is built using a linker set.
1317 	 */
1318 	SET_FOREACH(pbe, net_backend_set) {
1319 		if (strncmp(devname, (*pbe)->prefix,
1320 		    strlen((*pbe)->prefix)) == 0) {
1321 			tbe = *pbe;
1322 			assert(tbe->init != NULL);
1323 			assert(tbe->cleanup != NULL);
1324 			assert(tbe->send != NULL);
1325 			assert(tbe->recv != NULL);
1326 			assert(tbe->get_cap != NULL);
1327 			assert(tbe->set_cap != NULL);
1328 			break;
1329 		}
1330 	}
1331 
1332 	*ret = NULL;
1333 	if (tbe == NULL) {
1334 		free(devname);
1335 		return (EINVAL);
1336 	}
1337 
1338 	nbe = calloc(1, sizeof(*nbe) + tbe->priv_size);
1339 	*nbe = *tbe;	/* copy the template */
1340 	nbe->fd = -1;
1341 	nbe->sc = param;
1342 	nbe->be_vnet_hdr_len = 0;
1343 	nbe->fe_vnet_hdr_len = 0;
1344 
1345 	/* Initialize the backend. */
1346 	err = nbe->init(nbe, devname, nvl, cb, param);
1347 	if (err) {
1348 		free(devname);
1349 		free(nbe);
1350 		return (err);
1351 	}
1352 
1353 	*ret = nbe;
1354 	free(devname);
1355 
1356 	return (0);
1357 }
1358 
1359 void
1360 netbe_cleanup(struct net_backend *be)
1361 {
1362 
1363 	if (be != NULL) {
1364 		be->cleanup(be);
1365 		free(be);
1366 	}
1367 }
1368 
1369 uint64_t
1370 netbe_get_cap(struct net_backend *be)
1371 {
1372 
1373 	assert(be != NULL);
1374 	return (be->get_cap(be));
1375 }
1376 
1377 int
1378 netbe_set_cap(struct net_backend *be, uint64_t features,
1379 	      unsigned vnet_hdr_len)
1380 {
1381 	int ret;
1382 
1383 	assert(be != NULL);
1384 
1385 	/* There are only three valid lengths, i.e., 0, 10 and 12. */
1386 	if (vnet_hdr_len && vnet_hdr_len != VNET_HDR_LEN
1387 		&& vnet_hdr_len != (VNET_HDR_LEN - sizeof(uint16_t)))
1388 		return (-1);
1389 
1390 	be->fe_vnet_hdr_len = vnet_hdr_len;
1391 
1392 	ret = be->set_cap(be, features, vnet_hdr_len);
1393 	assert(be->be_vnet_hdr_len == 0 ||
1394 	       be->be_vnet_hdr_len == be->fe_vnet_hdr_len);
1395 
1396 	return (ret);
1397 }
1398 
1399 ssize_t
1400 netbe_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
1401 {
1402 
1403 	return (be->send(be, iov, iovcnt));
1404 }
1405 
1406 ssize_t
1407 netbe_peek_recvlen(struct net_backend *be)
1408 {
1409 
1410 	return (be->peek_recvlen(be));
1411 }
1412 
1413 /*
1414  * Try to read a packet from the backend, without blocking.
1415  * If no packets are available, return 0. In case of success, return
1416  * the length of the packet just read. Return -1 in case of errors.
1417  */
1418 ssize_t
1419 netbe_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
1420 {
1421 
1422 	return (be->recv(be, iov, iovcnt));
1423 }
1424 
1425 /*
1426  * Read a packet from the backend and discard it.
1427  * Returns the size of the discarded packet or zero if no packet was available.
1428  * A negative error code is returned in case of read error.
1429  */
1430 ssize_t
1431 netbe_rx_discard(struct net_backend *be)
1432 {
1433 	/*
1434 	 * MP note: the dummybuf is only used to discard frames,
1435 	 * so there is no need for it to be per-vtnet or locked.
1436 	 * We only make it large enough for TSO-sized segment.
1437 	 */
1438 	static uint8_t dummybuf[65536 + 64];
1439 	struct iovec iov;
1440 
1441 #ifdef __FreeBSD__
1442 	iov.iov_base = dummybuf;
1443 #else
1444 	iov.iov_base = (caddr_t)dummybuf;
1445 #endif
1446 	iov.iov_len = sizeof(dummybuf);
1447 
1448 	return netbe_recv(be, &iov, 1);
1449 }
1450 
1451 void
1452 netbe_rx_disable(struct net_backend *be)
1453 {
1454 
1455 	return be->recv_disable(be);
1456 }
1457 
1458 void
1459 netbe_rx_enable(struct net_backend *be)
1460 {
1461 
1462 	return be->recv_enable(be);
1463 }
1464 
1465 size_t
1466 netbe_get_vnet_hdr_len(struct net_backend *be)
1467 {
1468 
1469 	return (be->be_vnet_hdr_len);
1470 }
1471 
1472 #ifndef __FreeBSD__
1473 int
1474 netbe_get_mac(net_backend_t *be, void *buf, size_t *buflen)
1475 {
1476 	if (be->get_mac == NULL)
1477 		return (ENOTSUP);
1478 	return (be->get_mac(be, buf, buflen));
1479 }
1480 #endif
1481