xref: /freebsd/usr.sbin/bhyve/net_backends.c (revision 2f513db72b034fd5ef7f080b11be5c711c15186a)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2019 Vincenzo Maffione <vmaffione@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
19  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
20  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
24  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  * $FreeBSD$
28  */
29 
30 /*
31  * This file implements multiple network backends (tap, netmap, ...),
32  * to be used by network frontends such as virtio-net and e1000.
33  * The API to access the backend (e.g. send/receive packets, negotiate
34  * features) is exported by net_backends.h.
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include <sys/types.h>		/* u_short etc */
41 #ifndef WITHOUT_CAPSICUM
42 #include <sys/capsicum.h>
43 #endif
44 #include <sys/ioctl.h>
45 #include <sys/mman.h>
46 #include <sys/uio.h>
47 
48 #include <net/if.h>
49 #include <net/netmap.h>
50 #include <net/netmap_virt.h>
51 #define NETMAP_WITH_LIBS
52 #include <net/netmap_user.h>
53 
54 #ifndef WITHOUT_CAPSICUM
55 #include <capsicum_helpers.h>
56 #endif
57 #include <err.h>
58 #include <errno.h>
59 #include <fcntl.h>
60 #include <stdio.h>
61 #include <stdlib.h>
62 #include <stdint.h>
63 #include <string.h>
64 #include <unistd.h>
65 #include <sysexits.h>
66 #include <assert.h>
67 #include <pthread.h>
68 #include <pthread_np.h>
69 #include <poll.h>
70 #include <assert.h>
71 
72 
73 #include "debug.h"
74 #include "iov.h"
75 #include "mevent.h"
76 #include "net_backends.h"
77 
78 #include <sys/linker_set.h>
79 
80 /*
81  * Each network backend registers a set of function pointers that are
82  * used to implement the net backends API.
83  * This might need to be exposed if we implement backends in separate files.
84  */
85 struct net_backend {
86 	const char *prefix;	/* prefix matching this backend */
87 
88 	/*
89 	 * Routines used to initialize and cleanup the resources needed
90 	 * by a backend. The cleanup function is used internally,
91 	 * and should not be called by the frontend.
92 	 */
93 	int (*init)(struct net_backend *be, const char *devname,
94 	    net_be_rxeof_t cb, void *param);
95 	void (*cleanup)(struct net_backend *be);
96 
97 	/*
98 	 * Called to serve a guest transmit request. The scatter-gather
99 	 * vector provided by the caller has 'iovcnt' elements and contains
100 	 * the packet to send.
101 	 */
102 	ssize_t (*send)(struct net_backend *be, const struct iovec *iov,
103 	    int iovcnt);
104 
105 	/*
106 	 * Called to receive a packet from the backend. When the function
107 	 * returns a positive value 'len', the scatter-gather vector
108 	 * provided by the caller contains a packet with such length.
109 	 * The function returns 0 if the backend doesn't have a new packet to
110 	 * receive.
111 	 */
112 	ssize_t (*recv)(struct net_backend *be, const struct iovec *iov,
113 	    int iovcnt);
114 
115 	/*
116 	 * Ask the backend to enable or disable receive operation in the
117 	 * backend. On return from a disable operation, it is guaranteed
118 	 * that the receive callback won't be called until receive is
119 	 * enabled again. Note however that it is up to the caller to make
120 	 * sure that netbe_recv() is not currently being executed by another
121 	 * thread.
122 	 */
123 	void (*recv_enable)(struct net_backend *be);
124 	void (*recv_disable)(struct net_backend *be);
125 
126 	/*
127 	 * Ask the backend for the virtio-net features it is able to
128 	 * support. Possible features are TSO, UFO and checksum offloading
129 	 * in both rx and tx direction and for both IPv4 and IPv6.
130 	 */
131 	uint64_t (*get_cap)(struct net_backend *be);
132 
133 	/*
134 	 * Tell the backend to enable/disable the specified virtio-net
135 	 * features (capabilities).
136 	 */
137 	int (*set_cap)(struct net_backend *be, uint64_t features,
138 	    unsigned int vnet_hdr_len);
139 
140 	struct pci_vtnet_softc *sc;
141 	int fd;
142 
143 	/*
144 	 * Length of the virtio-net header used by the backend and the
145 	 * frontend, respectively. A zero value means that the header
146 	 * is not used.
147 	 */
148 	unsigned int be_vnet_hdr_len;
149 	unsigned int fe_vnet_hdr_len;
150 
151 	/* Size of backend-specific private data. */
152 	size_t priv_size;
153 
154 	/* Room for backend-specific data. */
155 	char opaque[0];
156 };
157 
158 SET_DECLARE(net_backend_set, struct net_backend);
159 
160 #define VNET_HDR_LEN	sizeof(struct virtio_net_rxhdr)
161 
162 #define WPRINTF(params) PRINTLN params
163 
164 /*
165  * The tap backend
166  */
167 
168 struct tap_priv {
169 	struct mevent *mevp;
170 };
171 
172 static void
173 tap_cleanup(struct net_backend *be)
174 {
175 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
176 
177 	if (priv->mevp) {
178 		mevent_delete(priv->mevp);
179 	}
180 	if (be->fd != -1) {
181 		close(be->fd);
182 		be->fd = -1;
183 	}
184 }
185 
186 static int
187 tap_init(struct net_backend *be, const char *devname,
188 	 net_be_rxeof_t cb, void *param)
189 {
190 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
191 	char tbuf[80];
192 	int opt = 1;
193 #ifndef WITHOUT_CAPSICUM
194 	cap_rights_t rights;
195 #endif
196 
197 	if (cb == NULL) {
198 		WPRINTF(("TAP backend requires non-NULL callback"));
199 		return (-1);
200 	}
201 
202 	strcpy(tbuf, "/dev/");
203 	strlcat(tbuf, devname, sizeof(tbuf));
204 
205 	be->fd = open(tbuf, O_RDWR);
206 	if (be->fd == -1) {
207 		WPRINTF(("open of tap device %s failed", tbuf));
208 		goto error;
209 	}
210 
211 	/*
212 	 * Set non-blocking and register for read
213 	 * notifications with the event loop
214 	 */
215 	if (ioctl(be->fd, FIONBIO, &opt) < 0) {
216 		WPRINTF(("tap device O_NONBLOCK failed"));
217 		goto error;
218 	}
219 
220 #ifndef WITHOUT_CAPSICUM
221 	cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
222 	if (caph_rights_limit(be->fd, &rights) == -1)
223 		errx(EX_OSERR, "Unable to apply rights for sandbox");
224 #endif
225 
226 	priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
227 	if (priv->mevp == NULL) {
228 		WPRINTF(("Could not register event"));
229 		goto error;
230 	}
231 
232 	return (0);
233 
234 error:
235 	tap_cleanup(be);
236 	return (-1);
237 }
238 
239 /*
240  * Called to send a buffer chain out to the tap device
241  */
242 static ssize_t
243 tap_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
244 {
245 	return (writev(be->fd, iov, iovcnt));
246 }
247 
248 static ssize_t
249 tap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
250 {
251 	ssize_t ret;
252 
253 	/* Should never be called without a valid tap fd */
254 	assert(be->fd != -1);
255 
256 	ret = readv(be->fd, iov, iovcnt);
257 
258 	if (ret < 0 && errno == EWOULDBLOCK) {
259 		return (0);
260 	}
261 
262 	return (ret);
263 }
264 
265 static void
266 tap_recv_enable(struct net_backend *be)
267 {
268 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
269 
270 	mevent_enable(priv->mevp);
271 }
272 
273 static void
274 tap_recv_disable(struct net_backend *be)
275 {
276 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
277 
278 	mevent_disable(priv->mevp);
279 }
280 
281 static uint64_t
282 tap_get_cap(struct net_backend *be)
283 {
284 
285 	return (0); /* no capabilities for now */
286 }
287 
288 static int
289 tap_set_cap(struct net_backend *be, uint64_t features,
290 		unsigned vnet_hdr_len)
291 {
292 
293 	return ((features || vnet_hdr_len) ? -1 : 0);
294 }
295 
296 static struct net_backend tap_backend = {
297 	.prefix = "tap",
298 	.priv_size = sizeof(struct tap_priv),
299 	.init = tap_init,
300 	.cleanup = tap_cleanup,
301 	.send = tap_send,
302 	.recv = tap_recv,
303 	.recv_enable = tap_recv_enable,
304 	.recv_disable = tap_recv_disable,
305 	.get_cap = tap_get_cap,
306 	.set_cap = tap_set_cap,
307 };
308 
309 /* A clone of the tap backend, with a different prefix. */
310 static struct net_backend vmnet_backend = {
311 	.prefix = "vmnet",
312 	.priv_size = sizeof(struct tap_priv),
313 	.init = tap_init,
314 	.cleanup = tap_cleanup,
315 	.send = tap_send,
316 	.recv = tap_recv,
317 	.recv_enable = tap_recv_enable,
318 	.recv_disable = tap_recv_disable,
319 	.get_cap = tap_get_cap,
320 	.set_cap = tap_set_cap,
321 };
322 
323 DATA_SET(net_backend_set, tap_backend);
324 DATA_SET(net_backend_set, vmnet_backend);
325 
326 /*
327  * The netmap backend
328  */
329 
330 /* The virtio-net features supported by netmap. */
331 #define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \
332 		VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \
333 		VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \
334 		VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO | \
335 		VIRTIO_NET_F_MRG_RXBUF)
336 
337 struct netmap_priv {
338 	char ifname[IFNAMSIZ];
339 	struct nm_desc *nmd;
340 	uint16_t memid;
341 	struct netmap_ring *rx;
342 	struct netmap_ring *tx;
343 	struct mevent *mevp;
344 	net_be_rxeof_t cb;
345 	void *cb_param;
346 };
347 
348 static void
349 nmreq_init(struct nmreq *req, char *ifname)
350 {
351 
352 	memset(req, 0, sizeof(*req));
353 	strlcpy(req->nr_name, ifname, sizeof(req->nr_name));
354 	req->nr_version = NETMAP_API;
355 }
356 
357 static int
358 netmap_set_vnet_hdr_len(struct net_backend *be, int vnet_hdr_len)
359 {
360 	int err;
361 	struct nmreq req;
362 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
363 
364 	nmreq_init(&req, priv->ifname);
365 	req.nr_cmd = NETMAP_BDG_VNET_HDR;
366 	req.nr_arg1 = vnet_hdr_len;
367 	err = ioctl(be->fd, NIOCREGIF, &req);
368 	if (err) {
369 		WPRINTF(("Unable to set vnet header length %d",
370 				vnet_hdr_len));
371 		return (err);
372 	}
373 
374 	be->be_vnet_hdr_len = vnet_hdr_len;
375 
376 	return (0);
377 }
378 
379 static int
380 netmap_has_vnet_hdr_len(struct net_backend *be, unsigned vnet_hdr_len)
381 {
382 	int prev_hdr_len = be->be_vnet_hdr_len;
383 	int ret;
384 
385 	if (vnet_hdr_len == prev_hdr_len) {
386 		return (1);
387 	}
388 
389 	ret = netmap_set_vnet_hdr_len(be, vnet_hdr_len);
390 	if (ret) {
391 		return (0);
392 	}
393 
394 	netmap_set_vnet_hdr_len(be, prev_hdr_len);
395 
396 	return (1);
397 }
398 
399 static uint64_t
400 netmap_get_cap(struct net_backend *be)
401 {
402 
403 	return (netmap_has_vnet_hdr_len(be, VNET_HDR_LEN) ?
404 	    NETMAP_FEATURES : 0);
405 }
406 
407 static int
408 netmap_set_cap(struct net_backend *be, uint64_t features,
409 	       unsigned vnet_hdr_len)
410 {
411 
412 	return (netmap_set_vnet_hdr_len(be, vnet_hdr_len));
413 }
414 
415 static int
416 netmap_init(struct net_backend *be, const char *devname,
417 	    net_be_rxeof_t cb, void *param)
418 {
419 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
420 
421 	strlcpy(priv->ifname, devname, sizeof(priv->ifname));
422 	priv->ifname[sizeof(priv->ifname) - 1] = '\0';
423 
424 	priv->nmd = nm_open(priv->ifname, NULL, NETMAP_NO_TX_POLL, NULL);
425 	if (priv->nmd == NULL) {
426 		WPRINTF(("Unable to nm_open(): interface '%s', errno (%s)",
427 			devname, strerror(errno)));
428 		free(priv);
429 		return (-1);
430 	}
431 
432 	priv->memid = priv->nmd->req.nr_arg2;
433 	priv->tx = NETMAP_TXRING(priv->nmd->nifp, 0);
434 	priv->rx = NETMAP_RXRING(priv->nmd->nifp, 0);
435 	priv->cb = cb;
436 	priv->cb_param = param;
437 	be->fd = priv->nmd->fd;
438 
439 	priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
440 	if (priv->mevp == NULL) {
441 		WPRINTF(("Could not register event"));
442 		return (-1);
443 	}
444 
445 	return (0);
446 }
447 
448 static void
449 netmap_cleanup(struct net_backend *be)
450 {
451 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
452 
453 	if (priv->mevp) {
454 		mevent_delete(priv->mevp);
455 	}
456 	if (priv->nmd) {
457 		nm_close(priv->nmd);
458 	}
459 	be->fd = -1;
460 }
461 
462 static ssize_t
463 netmap_send(struct net_backend *be, const struct iovec *iov,
464 	    int iovcnt)
465 {
466 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
467 	struct netmap_ring *ring;
468 	ssize_t totlen = 0;
469 	int nm_buf_size;
470 	int nm_buf_len;
471 	uint32_t head;
472 	void *nm_buf;
473 	int j;
474 
475 	ring = priv->tx;
476 	head = ring->head;
477 	if (head == ring->tail) {
478 		WPRINTF(("No space, drop %zu bytes", count_iov(iov, iovcnt)));
479 		goto txsync;
480 	}
481 	nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
482 	nm_buf_size = ring->nr_buf_size;
483 	nm_buf_len = 0;
484 
485 	for (j = 0; j < iovcnt; j++) {
486 		int iov_frag_size = iov[j].iov_len;
487 		void *iov_frag_buf = iov[j].iov_base;
488 
489 		totlen += iov_frag_size;
490 
491 		/*
492 		 * Split each iovec fragment over more netmap slots, if
493 		 * necessary.
494 		 */
495 		for (;;) {
496 			int copylen;
497 
498 			copylen = iov_frag_size < nm_buf_size ? iov_frag_size : nm_buf_size;
499 			memcpy(nm_buf, iov_frag_buf, copylen);
500 
501 			iov_frag_buf += copylen;
502 			iov_frag_size -= copylen;
503 			nm_buf += copylen;
504 			nm_buf_size -= copylen;
505 			nm_buf_len += copylen;
506 
507 			if (iov_frag_size == 0) {
508 				break;
509 			}
510 
511 			ring->slot[head].len = nm_buf_len;
512 			ring->slot[head].flags = NS_MOREFRAG;
513 			head = nm_ring_next(ring, head);
514 			if (head == ring->tail) {
515 				/*
516 				 * We ran out of netmap slots while
517 				 * splitting the iovec fragments.
518 				 */
519 				WPRINTF(("No space, drop %zu bytes",
520 				   count_iov(iov, iovcnt)));
521 				goto txsync;
522 			}
523 			nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
524 			nm_buf_size = ring->nr_buf_size;
525 			nm_buf_len = 0;
526 		}
527 	}
528 
529 	/* Complete the last slot, which must not have NS_MOREFRAG set. */
530 	ring->slot[head].len = nm_buf_len;
531 	ring->slot[head].flags = 0;
532 	head = nm_ring_next(ring, head);
533 
534 	/* Now update ring->head and ring->cur. */
535 	ring->head = ring->cur = head;
536 txsync:
537 	ioctl(be->fd, NIOCTXSYNC, NULL);
538 
539 	return (totlen);
540 }
541 
542 static ssize_t
543 netmap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
544 {
545 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
546 	struct netmap_slot *slot = NULL;
547 	struct netmap_ring *ring;
548 	void *iov_frag_buf;
549 	int iov_frag_size;
550 	ssize_t totlen = 0;
551 	uint32_t head;
552 
553 	assert(iovcnt);
554 
555 	ring = priv->rx;
556 	head = ring->head;
557 	iov_frag_buf = iov->iov_base;
558 	iov_frag_size = iov->iov_len;
559 
560 	do {
561 		int nm_buf_len;
562 		void *nm_buf;
563 
564 		if (head == ring->tail) {
565 			return (0);
566 		}
567 
568 		slot = ring->slot + head;
569 		nm_buf = NETMAP_BUF(ring, slot->buf_idx);
570 		nm_buf_len = slot->len;
571 
572 		for (;;) {
573 			int copylen = nm_buf_len < iov_frag_size ?
574 			    nm_buf_len : iov_frag_size;
575 
576 			memcpy(iov_frag_buf, nm_buf, copylen);
577 			nm_buf += copylen;
578 			nm_buf_len -= copylen;
579 			iov_frag_buf += copylen;
580 			iov_frag_size -= copylen;
581 			totlen += copylen;
582 
583 			if (nm_buf_len == 0) {
584 				break;
585 			}
586 
587 			iov++;
588 			iovcnt--;
589 			if (iovcnt == 0) {
590 				/* No space to receive. */
591 				WPRINTF(("Short iov, drop %zd bytes",
592 				    totlen));
593 				return (-ENOSPC);
594 			}
595 			iov_frag_buf = iov->iov_base;
596 			iov_frag_size = iov->iov_len;
597 		}
598 
599 		head = nm_ring_next(ring, head);
600 
601 	} while (slot->flags & NS_MOREFRAG);
602 
603 	/* Release slots to netmap. */
604 	ring->head = ring->cur = head;
605 
606 	return (totlen);
607 }
608 
609 static void
610 netmap_recv_enable(struct net_backend *be)
611 {
612 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
613 
614 	mevent_enable(priv->mevp);
615 }
616 
617 static void
618 netmap_recv_disable(struct net_backend *be)
619 {
620 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
621 
622 	mevent_disable(priv->mevp);
623 }
624 
625 static struct net_backend netmap_backend = {
626 	.prefix = "netmap",
627 	.priv_size = sizeof(struct netmap_priv),
628 	.init = netmap_init,
629 	.cleanup = netmap_cleanup,
630 	.send = netmap_send,
631 	.recv = netmap_recv,
632 	.recv_enable = netmap_recv_enable,
633 	.recv_disable = netmap_recv_disable,
634 	.get_cap = netmap_get_cap,
635 	.set_cap = netmap_set_cap,
636 };
637 
638 /* A clone of the netmap backend, with a different prefix. */
639 static struct net_backend vale_backend = {
640 	.prefix = "vale",
641 	.priv_size = sizeof(struct netmap_priv),
642 	.init = netmap_init,
643 	.cleanup = netmap_cleanup,
644 	.send = netmap_send,
645 	.recv = netmap_recv,
646 	.recv_enable = netmap_recv_enable,
647 	.recv_disable = netmap_recv_disable,
648 	.get_cap = netmap_get_cap,
649 	.set_cap = netmap_set_cap,
650 };
651 
652 DATA_SET(net_backend_set, netmap_backend);
653 DATA_SET(net_backend_set, vale_backend);
654 
655 /*
656  * Initialize a backend and attach to the frontend.
657  * This is called during frontend initialization.
658  *  @pbe is a pointer to the backend to be initialized
659  *  @devname is the backend-name as supplied on the command line,
660  * 	e.g. -s 2:0,frontend-name,backend-name[,other-args]
661  *  @cb is the receive callback supplied by the frontend,
662  *	and it is invoked in the event loop when a receive
663  *	event is generated in the hypervisor,
664  *  @param is a pointer to the frontend, and normally used as
665  *	the argument for the callback.
666  */
667 int
668 netbe_init(struct net_backend **ret, const char *devname, net_be_rxeof_t cb,
669     void *param)
670 {
671 	struct net_backend **pbe, *nbe, *tbe = NULL;
672 	int err;
673 
674 	/*
675 	 * Find the network backend that matches the user-provided
676 	 * device name. net_backend_set is built using a linker set.
677 	 */
678 	SET_FOREACH(pbe, net_backend_set) {
679 		if (strncmp(devname, (*pbe)->prefix,
680 		    strlen((*pbe)->prefix)) == 0) {
681 			tbe = *pbe;
682 			assert(tbe->init != NULL);
683 			assert(tbe->cleanup != NULL);
684 			assert(tbe->send != NULL);
685 			assert(tbe->recv != NULL);
686 			assert(tbe->get_cap != NULL);
687 			assert(tbe->set_cap != NULL);
688 			break;
689 		}
690 	}
691 
692 	*ret = NULL;
693 	if (tbe == NULL)
694 		return (EINVAL);
695 	nbe = calloc(1, sizeof(*nbe) + tbe->priv_size);
696 	*nbe = *tbe;	/* copy the template */
697 	nbe->fd = -1;
698 	nbe->sc = param;
699 	nbe->be_vnet_hdr_len = 0;
700 	nbe->fe_vnet_hdr_len = 0;
701 
702 	/* Initialize the backend. */
703 	err = nbe->init(nbe, devname, cb, param);
704 	if (err) {
705 		free(nbe);
706 		return (err);
707 	}
708 
709 	*ret = nbe;
710 
711 	return (0);
712 }
713 
714 void
715 netbe_cleanup(struct net_backend *be)
716 {
717 
718 	if (be != NULL) {
719 		be->cleanup(be);
720 		free(be);
721 	}
722 }
723 
724 uint64_t
725 netbe_get_cap(struct net_backend *be)
726 {
727 
728 	assert(be != NULL);
729 	return (be->get_cap(be));
730 }
731 
732 int
733 netbe_set_cap(struct net_backend *be, uint64_t features,
734 	      unsigned vnet_hdr_len)
735 {
736 	int ret;
737 
738 	assert(be != NULL);
739 
740 	/* There are only three valid lengths, i.e., 0, 10 and 12. */
741 	if (vnet_hdr_len && vnet_hdr_len != VNET_HDR_LEN
742 		&& vnet_hdr_len != (VNET_HDR_LEN - sizeof(uint16_t)))
743 		return (-1);
744 
745 	be->fe_vnet_hdr_len = vnet_hdr_len;
746 
747 	ret = be->set_cap(be, features, vnet_hdr_len);
748 	assert(be->be_vnet_hdr_len == 0 ||
749 	       be->be_vnet_hdr_len == be->fe_vnet_hdr_len);
750 
751 	return (ret);
752 }
753 
754 ssize_t
755 netbe_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
756 {
757 
758 	return (be->send(be, iov, iovcnt));
759 }
760 
761 /*
762  * Try to read a packet from the backend, without blocking.
763  * If no packets are available, return 0. In case of success, return
764  * the length of the packet just read. Return -1 in case of errors.
765  */
766 ssize_t
767 netbe_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
768 {
769 
770 	return (be->recv(be, iov, iovcnt));
771 }
772 
773 /*
774  * Read a packet from the backend and discard it.
775  * Returns the size of the discarded packet or zero if no packet was available.
776  * A negative error code is returned in case of read error.
777  */
778 ssize_t
779 netbe_rx_discard(struct net_backend *be)
780 {
781 	/*
782 	 * MP note: the dummybuf is only used to discard frames,
783 	 * so there is no need for it to be per-vtnet or locked.
784 	 * We only make it large enough for TSO-sized segment.
785 	 */
786 	static uint8_t dummybuf[65536 + 64];
787 	struct iovec iov;
788 
789 	iov.iov_base = dummybuf;
790 	iov.iov_len = sizeof(dummybuf);
791 
792 	return netbe_recv(be, &iov, 1);
793 }
794 
795 void
796 netbe_rx_disable(struct net_backend *be)
797 {
798 
799 	return be->recv_disable(be);
800 }
801 
802 void
803 netbe_rx_enable(struct net_backend *be)
804 {
805 
806 	return be->recv_enable(be);
807 }
808 
809 size_t
810 netbe_get_vnet_hdr_len(struct net_backend *be)
811 {
812 
813 	return (be->be_vnet_hdr_len);
814 }
815