xref: /freebsd/usr.sbin/bhyve/net_backends.c (revision b9c3e544c48e76dd01aa32e9b1f5cd4c1ad8532c)
10ff7076bSVincenzo Maffione /*-
290db4ba9SVincenzo Maffione  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
390db4ba9SVincenzo Maffione  *
40ff7076bSVincenzo Maffione  * Copyright (c) 2019 Vincenzo Maffione <vmaffione@FreeBSD.org>
50ff7076bSVincenzo Maffione  *
60ff7076bSVincenzo Maffione  * Redistribution and use in source and binary forms, with or without
70ff7076bSVincenzo Maffione  * modification, are permitted provided that the following conditions
80ff7076bSVincenzo Maffione  * are met:
90ff7076bSVincenzo Maffione  * 1. Redistributions of source code must retain the above copyright
100ff7076bSVincenzo Maffione  *    notice, this list of conditions and the following disclaimer.
110ff7076bSVincenzo Maffione  * 2. Redistributions in binary form must reproduce the above copyright
120ff7076bSVincenzo Maffione  *    notice, this list of conditions and the following disclaimer in the
130ff7076bSVincenzo Maffione  *    documentation and/or other materials provided with the distribution.
140ff7076bSVincenzo Maffione  *
150ff7076bSVincenzo Maffione  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
160ff7076bSVincenzo Maffione  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
170ff7076bSVincenzo Maffione  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
180ff7076bSVincenzo Maffione  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
190ff7076bSVincenzo Maffione  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
200ff7076bSVincenzo Maffione  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
210ff7076bSVincenzo Maffione  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
220ff7076bSVincenzo Maffione  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
230ff7076bSVincenzo Maffione  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
240ff7076bSVincenzo Maffione  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
250ff7076bSVincenzo Maffione  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
260ff7076bSVincenzo Maffione  *
270ff7076bSVincenzo Maffione  * $FreeBSD$
280ff7076bSVincenzo Maffione  */
290ff7076bSVincenzo Maffione 
300ff7076bSVincenzo Maffione /*
310ff7076bSVincenzo Maffione  * This file implements multiple network backends (tap, netmap, ...),
320ff7076bSVincenzo Maffione  * to be used by network frontends such as virtio-net and e1000.
330ff7076bSVincenzo Maffione  * The API to access the backend (e.g. send/receive packets, negotiate
340ff7076bSVincenzo Maffione  * features) is exported by net_backends.h.
350ff7076bSVincenzo Maffione  */
360ff7076bSVincenzo Maffione 
378cd0c1acSVincenzo Maffione #include <sys/cdefs.h>
388cd0c1acSVincenzo Maffione __FBSDID("$FreeBSD$");
398cd0c1acSVincenzo Maffione 
400ff7076bSVincenzo Maffione #include <sys/types.h>		/* u_short etc */
410ff7076bSVincenzo Maffione #ifndef WITHOUT_CAPSICUM
420ff7076bSVincenzo Maffione #include <sys/capsicum.h>
430ff7076bSVincenzo Maffione #endif
440ff7076bSVincenzo Maffione #include <sys/ioctl.h>
450ff7076bSVincenzo Maffione #include <sys/mman.h>
460ff7076bSVincenzo Maffione #include <sys/uio.h>
470ff7076bSVincenzo Maffione 
480ff7076bSVincenzo Maffione #include <net/if.h>
4956be282bSBjoern A. Zeeb #if defined(INET6) || defined(INET)
5056be282bSBjoern A. Zeeb #include <net/if_tap.h>
5156be282bSBjoern A. Zeeb #endif
520ff7076bSVincenzo Maffione #include <net/netmap.h>
530ff7076bSVincenzo Maffione #include <net/netmap_virt.h>
540ff7076bSVincenzo Maffione #define NETMAP_WITH_LIBS
550ff7076bSVincenzo Maffione #include <net/netmap_user.h>
560ff7076bSVincenzo Maffione 
570ff7076bSVincenzo Maffione #ifndef WITHOUT_CAPSICUM
580ff7076bSVincenzo Maffione #include <capsicum_helpers.h>
590ff7076bSVincenzo Maffione #endif
600ff7076bSVincenzo Maffione #include <err.h>
610ff7076bSVincenzo Maffione #include <errno.h>
620ff7076bSVincenzo Maffione #include <fcntl.h>
630ff7076bSVincenzo Maffione #include <stdio.h>
640ff7076bSVincenzo Maffione #include <stdlib.h>
650ff7076bSVincenzo Maffione #include <stdint.h>
660ff7076bSVincenzo Maffione #include <string.h>
670ff7076bSVincenzo Maffione #include <unistd.h>
680ff7076bSVincenzo Maffione #include <sysexits.h>
690ff7076bSVincenzo Maffione #include <assert.h>
700ff7076bSVincenzo Maffione #include <pthread.h>
710ff7076bSVincenzo Maffione #include <pthread_np.h>
720ff7076bSVincenzo Maffione #include <poll.h>
730ff7076bSVincenzo Maffione #include <assert.h>
740ff7076bSVincenzo Maffione 
752cd7735dSAleksandr Fedorov #ifdef NETGRAPH
762cd7735dSAleksandr Fedorov #include <sys/param.h>
772cd7735dSAleksandr Fedorov #include <sys/sysctl.h>
782cd7735dSAleksandr Fedorov #include <netgraph.h>
792cd7735dSAleksandr Fedorov #endif
800ff7076bSVincenzo Maffione 
81621b5090SJohn Baldwin #include "config.h"
82332eff95SVincenzo Maffione #include "debug.h"
830ff7076bSVincenzo Maffione #include "iov.h"
840ff7076bSVincenzo Maffione #include "mevent.h"
850ff7076bSVincenzo Maffione #include "net_backends.h"
86621b5090SJohn Baldwin #include "pci_emul.h"
870ff7076bSVincenzo Maffione 
880ff7076bSVincenzo Maffione #include <sys/linker_set.h>
890ff7076bSVincenzo Maffione 
900ff7076bSVincenzo Maffione /*
910ff7076bSVincenzo Maffione  * Each network backend registers a set of function pointers that are
920ff7076bSVincenzo Maffione  * used to implement the net backends API.
930ff7076bSVincenzo Maffione  * This might need to be exposed if we implement backends in separate files.
940ff7076bSVincenzo Maffione  */
950ff7076bSVincenzo Maffione struct net_backend {
960ff7076bSVincenzo Maffione 	const char *prefix;	/* prefix matching this backend */
970ff7076bSVincenzo Maffione 
980ff7076bSVincenzo Maffione 	/*
990ff7076bSVincenzo Maffione 	 * Routines used to initialize and cleanup the resources needed
1000ff7076bSVincenzo Maffione 	 * by a backend. The cleanup function is used internally,
1010ff7076bSVincenzo Maffione 	 * and should not be called by the frontend.
1020ff7076bSVincenzo Maffione 	 */
1030ff7076bSVincenzo Maffione 	int (*init)(struct net_backend *be, const char *devname,
104621b5090SJohn Baldwin 	    nvlist_t *nvl, net_be_rxeof_t cb, void *param);
1050ff7076bSVincenzo Maffione 	void (*cleanup)(struct net_backend *be);
1060ff7076bSVincenzo Maffione 
1070ff7076bSVincenzo Maffione 	/*
1080ff7076bSVincenzo Maffione 	 * Called to serve a guest transmit request. The scatter-gather
1090ff7076bSVincenzo Maffione 	 * vector provided by the caller has 'iovcnt' elements and contains
1100ff7076bSVincenzo Maffione 	 * the packet to send.
1110ff7076bSVincenzo Maffione 	 */
11266c662b0SVincenzo Maffione 	ssize_t (*send)(struct net_backend *be, const struct iovec *iov,
11366c662b0SVincenzo Maffione 	    int iovcnt);
1140ff7076bSVincenzo Maffione 
1150ff7076bSVincenzo Maffione 	/*
116f92bb8c1SVincenzo Maffione 	 * Get the length of the next packet that can be received from
117f92bb8c1SVincenzo Maffione 	 * the backend. If no packets are currently available, this
118f92bb8c1SVincenzo Maffione 	 * function returns 0.
119f92bb8c1SVincenzo Maffione 	 */
120f92bb8c1SVincenzo Maffione 	ssize_t (*peek_recvlen)(struct net_backend *be);
121f92bb8c1SVincenzo Maffione 
122f92bb8c1SVincenzo Maffione 	/*
1230ff7076bSVincenzo Maffione 	 * Called to receive a packet from the backend. When the function
1240ff7076bSVincenzo Maffione 	 * returns a positive value 'len', the scatter-gather vector
1250ff7076bSVincenzo Maffione 	 * provided by the caller contains a packet with such length.
1260ff7076bSVincenzo Maffione 	 * The function returns 0 if the backend doesn't have a new packet to
1270ff7076bSVincenzo Maffione 	 * receive.
1280ff7076bSVincenzo Maffione 	 */
12966c662b0SVincenzo Maffione 	ssize_t (*recv)(struct net_backend *be, const struct iovec *iov,
13066c662b0SVincenzo Maffione 	    int iovcnt);
1310ff7076bSVincenzo Maffione 
1320ff7076bSVincenzo Maffione 	/*
133d12c5ef6SVincenzo Maffione 	 * Ask the backend to enable or disable receive operation in the
134d12c5ef6SVincenzo Maffione 	 * backend. On return from a disable operation, it is guaranteed
135d12c5ef6SVincenzo Maffione 	 * that the receive callback won't be called until receive is
136d12c5ef6SVincenzo Maffione 	 * enabled again. Note however that it is up to the caller to make
137d12c5ef6SVincenzo Maffione 	 * sure that netbe_recv() is not currently being executed by another
138d12c5ef6SVincenzo Maffione 	 * thread.
139d12c5ef6SVincenzo Maffione 	 */
140d12c5ef6SVincenzo Maffione 	void (*recv_enable)(struct net_backend *be);
141d12c5ef6SVincenzo Maffione 	void (*recv_disable)(struct net_backend *be);
142d12c5ef6SVincenzo Maffione 
143d12c5ef6SVincenzo Maffione 	/*
1440ff7076bSVincenzo Maffione 	 * Ask the backend for the virtio-net features it is able to
1450ff7076bSVincenzo Maffione 	 * support. Possible features are TSO, UFO and checksum offloading
1460ff7076bSVincenzo Maffione 	 * in both rx and tx direction and for both IPv4 and IPv6.
1470ff7076bSVincenzo Maffione 	 */
1480ff7076bSVincenzo Maffione 	uint64_t (*get_cap)(struct net_backend *be);
1490ff7076bSVincenzo Maffione 
1500ff7076bSVincenzo Maffione 	/*
1510ff7076bSVincenzo Maffione 	 * Tell the backend to enable/disable the specified virtio-net
1520ff7076bSVincenzo Maffione 	 * features (capabilities).
1530ff7076bSVincenzo Maffione 	 */
1540ff7076bSVincenzo Maffione 	int (*set_cap)(struct net_backend *be, uint64_t features,
1550ff7076bSVincenzo Maffione 	    unsigned int vnet_hdr_len);
1560ff7076bSVincenzo Maffione 
1570ff7076bSVincenzo Maffione 	struct pci_vtnet_softc *sc;
1580ff7076bSVincenzo Maffione 	int fd;
1590ff7076bSVincenzo Maffione 
1600ff7076bSVincenzo Maffione 	/*
1610ff7076bSVincenzo Maffione 	 * Length of the virtio-net header used by the backend and the
1620ff7076bSVincenzo Maffione 	 * frontend, respectively. A zero value means that the header
1630ff7076bSVincenzo Maffione 	 * is not used.
1640ff7076bSVincenzo Maffione 	 */
1650ff7076bSVincenzo Maffione 	unsigned int be_vnet_hdr_len;
1660ff7076bSVincenzo Maffione 	unsigned int fe_vnet_hdr_len;
1670ff7076bSVincenzo Maffione 
1680ff7076bSVincenzo Maffione 	/* Size of backend-specific private data. */
1690ff7076bSVincenzo Maffione 	size_t priv_size;
1700ff7076bSVincenzo Maffione 
1710ff7076bSVincenzo Maffione 	/* Room for backend-specific data. */
1720ff7076bSVincenzo Maffione 	char opaque[0];
1730ff7076bSVincenzo Maffione };
1740ff7076bSVincenzo Maffione 
1750ff7076bSVincenzo Maffione SET_DECLARE(net_backend_set, struct net_backend);
1760ff7076bSVincenzo Maffione 
1770ff7076bSVincenzo Maffione #define VNET_HDR_LEN	sizeof(struct virtio_net_rxhdr)
1780ff7076bSVincenzo Maffione 
179332eff95SVincenzo Maffione #define WPRINTF(params) PRINTLN params
1800ff7076bSVincenzo Maffione 
1810ff7076bSVincenzo Maffione /*
1820ff7076bSVincenzo Maffione  * The tap backend
1830ff7076bSVincenzo Maffione  */
1840ff7076bSVincenzo Maffione 
18556be282bSBjoern A. Zeeb #if defined(INET6) || defined(INET)
18656be282bSBjoern A. Zeeb const int pf_list[] = {
18756be282bSBjoern A. Zeeb #if defined(INET6)
18856be282bSBjoern A. Zeeb 	PF_INET6,
18956be282bSBjoern A. Zeeb #endif
19056be282bSBjoern A. Zeeb #if defined(INET)
19156be282bSBjoern A. Zeeb 	PF_INET,
19256be282bSBjoern A. Zeeb #endif
19356be282bSBjoern A. Zeeb };
19456be282bSBjoern A. Zeeb #endif
19556be282bSBjoern A. Zeeb 
1960ff7076bSVincenzo Maffione struct tap_priv {
1970ff7076bSVincenzo Maffione 	struct mevent *mevp;
198f92bb8c1SVincenzo Maffione 	/*
199f92bb8c1SVincenzo Maffione 	 * A bounce buffer that allows us to implement the peek_recvlen
200f92bb8c1SVincenzo Maffione 	 * callback. In the future we may get the same information from
201f92bb8c1SVincenzo Maffione 	 * the kevent data.
202f92bb8c1SVincenzo Maffione 	 */
203f92bb8c1SVincenzo Maffione 	char bbuf[1 << 16];
204f92bb8c1SVincenzo Maffione 	ssize_t bbuflen;
2050ff7076bSVincenzo Maffione };
2060ff7076bSVincenzo Maffione 
2070ff7076bSVincenzo Maffione static void
2080ff7076bSVincenzo Maffione tap_cleanup(struct net_backend *be)
2090ff7076bSVincenzo Maffione {
2100ff7076bSVincenzo Maffione 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
2110ff7076bSVincenzo Maffione 
2120ff7076bSVincenzo Maffione 	if (priv->mevp) {
2130ff7076bSVincenzo Maffione 		mevent_delete(priv->mevp);
2140ff7076bSVincenzo Maffione 	}
2150ff7076bSVincenzo Maffione 	if (be->fd != -1) {
2160ff7076bSVincenzo Maffione 		close(be->fd);
2170ff7076bSVincenzo Maffione 		be->fd = -1;
2180ff7076bSVincenzo Maffione 	}
2190ff7076bSVincenzo Maffione }
2200ff7076bSVincenzo Maffione 
2210ff7076bSVincenzo Maffione static int
2220ff7076bSVincenzo Maffione tap_init(struct net_backend *be, const char *devname,
223621b5090SJohn Baldwin 	 nvlist_t *nvl, net_be_rxeof_t cb, void *param)
2240ff7076bSVincenzo Maffione {
2250ff7076bSVincenzo Maffione 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
2260ff7076bSVincenzo Maffione 	char tbuf[80];
2270ff7076bSVincenzo Maffione 	int opt = 1;
22856be282bSBjoern A. Zeeb #if defined(INET6) || defined(INET)
22956be282bSBjoern A. Zeeb 	struct ifreq ifrq;
23056be282bSBjoern A. Zeeb 	int i, s;
23156be282bSBjoern A. Zeeb #endif
2320ff7076bSVincenzo Maffione #ifndef WITHOUT_CAPSICUM
2330ff7076bSVincenzo Maffione 	cap_rights_t rights;
2340ff7076bSVincenzo Maffione #endif
2350ff7076bSVincenzo Maffione 
2360ff7076bSVincenzo Maffione 	if (cb == NULL) {
237332eff95SVincenzo Maffione 		WPRINTF(("TAP backend requires non-NULL callback"));
2380ff7076bSVincenzo Maffione 		return (-1);
2390ff7076bSVincenzo Maffione 	}
2400ff7076bSVincenzo Maffione 
2410ff7076bSVincenzo Maffione 	strcpy(tbuf, "/dev/");
2420ff7076bSVincenzo Maffione 	strlcat(tbuf, devname, sizeof(tbuf));
2430ff7076bSVincenzo Maffione 
2442d5fe369SSean Chittenden 	be->fd = open(tbuf, O_RDWR);
2452d5fe369SSean Chittenden 	if (be->fd == -1) {
246332eff95SVincenzo Maffione 		WPRINTF(("open of tap device %s failed", tbuf));
2470ff7076bSVincenzo Maffione 		goto error;
2480ff7076bSVincenzo Maffione 	}
2490ff7076bSVincenzo Maffione 
2500ff7076bSVincenzo Maffione 	/*
2510ff7076bSVincenzo Maffione 	 * Set non-blocking and register for read
2520ff7076bSVincenzo Maffione 	 * notifications with the event loop
2530ff7076bSVincenzo Maffione 	 */
2542d5fe369SSean Chittenden 	if (ioctl(be->fd, FIONBIO, &opt) < 0) {
255332eff95SVincenzo Maffione 		WPRINTF(("tap device O_NONBLOCK failed"));
2560ff7076bSVincenzo Maffione 		goto error;
2570ff7076bSVincenzo Maffione 	}
2580ff7076bSVincenzo Maffione 
25956be282bSBjoern A. Zeeb #if defined(INET6) || defined(INET)
26056be282bSBjoern A. Zeeb 	/*
26156be282bSBjoern A. Zeeb 	 * Try to UP the interface rather than relying on
26256be282bSBjoern A. Zeeb 	 * net.link.tap.up_on_open.
26356be282bSBjoern A. Zeeb 	  */
26456be282bSBjoern A. Zeeb 	bzero(&ifrq, sizeof(ifrq));
26556be282bSBjoern A. Zeeb 	if (ioctl(be->fd, TAPGIFNAME, &ifrq) < 0) {
26656be282bSBjoern A. Zeeb 		WPRINTF(("Could not get interface name"));
26756be282bSBjoern A. Zeeb 		goto error;
26856be282bSBjoern A. Zeeb 	}
26956be282bSBjoern A. Zeeb 
27056be282bSBjoern A. Zeeb 	s = -1;
27156be282bSBjoern A. Zeeb 	for (i = 0; s == -1 && i < nitems(pf_list); i++)
27256be282bSBjoern A. Zeeb 		s = socket(pf_list[i], SOCK_DGRAM, 0);
27356be282bSBjoern A. Zeeb 	if (s == -1) {
27456be282bSBjoern A. Zeeb 		WPRINTF(("Could open socket"));
27556be282bSBjoern A. Zeeb 		goto error;
27656be282bSBjoern A. Zeeb 	}
27756be282bSBjoern A. Zeeb 
27856be282bSBjoern A. Zeeb 	if (ioctl(s, SIOCGIFFLAGS, &ifrq) < 0) {
27956be282bSBjoern A. Zeeb 		(void)close(s);
28056be282bSBjoern A. Zeeb 		WPRINTF(("Could not get interface flags"));
28156be282bSBjoern A. Zeeb 		goto error;
28256be282bSBjoern A. Zeeb 	}
28356be282bSBjoern A. Zeeb 	ifrq.ifr_flags |= IFF_UP;
28456be282bSBjoern A. Zeeb 	if (ioctl(s, SIOCSIFFLAGS, &ifrq) < 0) {
28556be282bSBjoern A. Zeeb 		(void)close(s);
28656be282bSBjoern A. Zeeb 		WPRINTF(("Could not set interface flags"));
28756be282bSBjoern A. Zeeb 		goto error;
28856be282bSBjoern A. Zeeb 	}
28956be282bSBjoern A. Zeeb 	(void)close(s);
29056be282bSBjoern A. Zeeb #endif
29156be282bSBjoern A. Zeeb 
2920ff7076bSVincenzo Maffione #ifndef WITHOUT_CAPSICUM
2930ff7076bSVincenzo Maffione 	cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
2942d5fe369SSean Chittenden 	if (caph_rights_limit(be->fd, &rights) == -1)
2950ff7076bSVincenzo Maffione 		errx(EX_OSERR, "Unable to apply rights for sandbox");
2960ff7076bSVincenzo Maffione #endif
2970ff7076bSVincenzo Maffione 
298f92bb8c1SVincenzo Maffione 	memset(priv->bbuf, 0, sizeof(priv->bbuf));
299f92bb8c1SVincenzo Maffione 	priv->bbuflen = 0;
300f92bb8c1SVincenzo Maffione 
3013e11768eSVincenzo Maffione 	priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
3020ff7076bSVincenzo Maffione 	if (priv->mevp == NULL) {
303332eff95SVincenzo Maffione 		WPRINTF(("Could not register event"));
3040ff7076bSVincenzo Maffione 		goto error;
3050ff7076bSVincenzo Maffione 	}
3060ff7076bSVincenzo Maffione 
3070ff7076bSVincenzo Maffione 	return (0);
3080ff7076bSVincenzo Maffione 
3090ff7076bSVincenzo Maffione error:
3100ff7076bSVincenzo Maffione 	tap_cleanup(be);
3110ff7076bSVincenzo Maffione 	return (-1);
3120ff7076bSVincenzo Maffione }
3130ff7076bSVincenzo Maffione 
3140ff7076bSVincenzo Maffione /*
3150ff7076bSVincenzo Maffione  * Called to send a buffer chain out to the tap device
3160ff7076bSVincenzo Maffione  */
3170ff7076bSVincenzo Maffione static ssize_t
31866c662b0SVincenzo Maffione tap_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
3190ff7076bSVincenzo Maffione {
3200ff7076bSVincenzo Maffione 	return (writev(be->fd, iov, iovcnt));
3210ff7076bSVincenzo Maffione }
3220ff7076bSVincenzo Maffione 
3230ff7076bSVincenzo Maffione static ssize_t
324f92bb8c1SVincenzo Maffione tap_peek_recvlen(struct net_backend *be)
3250ff7076bSVincenzo Maffione {
326f92bb8c1SVincenzo Maffione 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
3270ff7076bSVincenzo Maffione 	ssize_t ret;
3280ff7076bSVincenzo Maffione 
329f92bb8c1SVincenzo Maffione 	if (priv->bbuflen > 0) {
330f92bb8c1SVincenzo Maffione 		/*
331f92bb8c1SVincenzo Maffione 		 * We already have a packet in the bounce buffer.
332f92bb8c1SVincenzo Maffione 		 * Just return its length.
333f92bb8c1SVincenzo Maffione 		 */
334f92bb8c1SVincenzo Maffione 		return priv->bbuflen;
335f92bb8c1SVincenzo Maffione 	}
336f92bb8c1SVincenzo Maffione 
337f92bb8c1SVincenzo Maffione 	/*
338f92bb8c1SVincenzo Maffione 	 * Read the next packet (if any) into the bounce buffer, so
339f92bb8c1SVincenzo Maffione 	 * that we get to know its length and we can return that
340f92bb8c1SVincenzo Maffione 	 * to the caller.
341f92bb8c1SVincenzo Maffione 	 */
342f92bb8c1SVincenzo Maffione 	ret = read(be->fd, priv->bbuf, sizeof(priv->bbuf));
343f92bb8c1SVincenzo Maffione 	if (ret < 0 && errno == EWOULDBLOCK) {
344f92bb8c1SVincenzo Maffione 		return (0);
345f92bb8c1SVincenzo Maffione 	}
346f92bb8c1SVincenzo Maffione 
347f92bb8c1SVincenzo Maffione 	if (ret > 0)
348f92bb8c1SVincenzo Maffione 		priv->bbuflen = ret;
349f92bb8c1SVincenzo Maffione 
350f92bb8c1SVincenzo Maffione 	return (ret);
351f92bb8c1SVincenzo Maffione }
352f92bb8c1SVincenzo Maffione 
353f92bb8c1SVincenzo Maffione static ssize_t
354f92bb8c1SVincenzo Maffione tap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
355f92bb8c1SVincenzo Maffione {
356f92bb8c1SVincenzo Maffione 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
357f92bb8c1SVincenzo Maffione 	ssize_t ret;
358f92bb8c1SVincenzo Maffione 
359f92bb8c1SVincenzo Maffione 	if (priv->bbuflen > 0) {
360f92bb8c1SVincenzo Maffione 		/*
361f92bb8c1SVincenzo Maffione 		 * A packet is available in the bounce buffer, so
362f92bb8c1SVincenzo Maffione 		 * we read it from there.
363f92bb8c1SVincenzo Maffione 		 */
364f92bb8c1SVincenzo Maffione 		ret = buf_to_iov(priv->bbuf, priv->bbuflen,
365f92bb8c1SVincenzo Maffione 		    iov, iovcnt, 0);
366f92bb8c1SVincenzo Maffione 
367f92bb8c1SVincenzo Maffione 		/* Mark the bounce buffer as empty. */
368f92bb8c1SVincenzo Maffione 		priv->bbuflen = 0;
369f92bb8c1SVincenzo Maffione 
370f92bb8c1SVincenzo Maffione 		return (ret);
371f92bb8c1SVincenzo Maffione 	}
3720ff7076bSVincenzo Maffione 
3730ff7076bSVincenzo Maffione 	ret = readv(be->fd, iov, iovcnt);
3740ff7076bSVincenzo Maffione 	if (ret < 0 && errno == EWOULDBLOCK) {
3750ff7076bSVincenzo Maffione 		return (0);
3760ff7076bSVincenzo Maffione 	}
3770ff7076bSVincenzo Maffione 
3780ff7076bSVincenzo Maffione 	return (ret);
3790ff7076bSVincenzo Maffione }
3800ff7076bSVincenzo Maffione 
381d12c5ef6SVincenzo Maffione static void
382d12c5ef6SVincenzo Maffione tap_recv_enable(struct net_backend *be)
383d12c5ef6SVincenzo Maffione {
384d12c5ef6SVincenzo Maffione 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
385d12c5ef6SVincenzo Maffione 
386d12c5ef6SVincenzo Maffione 	mevent_enable(priv->mevp);
387d12c5ef6SVincenzo Maffione }
388d12c5ef6SVincenzo Maffione 
389d12c5ef6SVincenzo Maffione static void
390d12c5ef6SVincenzo Maffione tap_recv_disable(struct net_backend *be)
391d12c5ef6SVincenzo Maffione {
392d12c5ef6SVincenzo Maffione 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
393d12c5ef6SVincenzo Maffione 
394d12c5ef6SVincenzo Maffione 	mevent_disable(priv->mevp);
395d12c5ef6SVincenzo Maffione }
396d12c5ef6SVincenzo Maffione 
3970ff7076bSVincenzo Maffione static uint64_t
3980ff7076bSVincenzo Maffione tap_get_cap(struct net_backend *be)
3990ff7076bSVincenzo Maffione {
4000ff7076bSVincenzo Maffione 
4010ff7076bSVincenzo Maffione 	return (0); /* no capabilities for now */
4020ff7076bSVincenzo Maffione }
4030ff7076bSVincenzo Maffione 
4040ff7076bSVincenzo Maffione static int
4050ff7076bSVincenzo Maffione tap_set_cap(struct net_backend *be, uint64_t features,
4060ff7076bSVincenzo Maffione 		unsigned vnet_hdr_len)
4070ff7076bSVincenzo Maffione {
4080ff7076bSVincenzo Maffione 
4090ff7076bSVincenzo Maffione 	return ((features || vnet_hdr_len) ? -1 : 0);
4100ff7076bSVincenzo Maffione }
4110ff7076bSVincenzo Maffione 
4120ff7076bSVincenzo Maffione static struct net_backend tap_backend = {
4130ff7076bSVincenzo Maffione 	.prefix = "tap",
4140ff7076bSVincenzo Maffione 	.priv_size = sizeof(struct tap_priv),
4150ff7076bSVincenzo Maffione 	.init = tap_init,
4160ff7076bSVincenzo Maffione 	.cleanup = tap_cleanup,
4170ff7076bSVincenzo Maffione 	.send = tap_send,
418f92bb8c1SVincenzo Maffione 	.peek_recvlen = tap_peek_recvlen,
4190ff7076bSVincenzo Maffione 	.recv = tap_recv,
420d12c5ef6SVincenzo Maffione 	.recv_enable = tap_recv_enable,
421d12c5ef6SVincenzo Maffione 	.recv_disable = tap_recv_disable,
4220ff7076bSVincenzo Maffione 	.get_cap = tap_get_cap,
4230ff7076bSVincenzo Maffione 	.set_cap = tap_set_cap,
4240ff7076bSVincenzo Maffione };
4250ff7076bSVincenzo Maffione 
4260ff7076bSVincenzo Maffione /* A clone of the tap backend, with a different prefix. */
4270ff7076bSVincenzo Maffione static struct net_backend vmnet_backend = {
4280ff7076bSVincenzo Maffione 	.prefix = "vmnet",
4290ff7076bSVincenzo Maffione 	.priv_size = sizeof(struct tap_priv),
4300ff7076bSVincenzo Maffione 	.init = tap_init,
4310ff7076bSVincenzo Maffione 	.cleanup = tap_cleanup,
4320ff7076bSVincenzo Maffione 	.send = tap_send,
433f92bb8c1SVincenzo Maffione 	.peek_recvlen = tap_peek_recvlen,
4340ff7076bSVincenzo Maffione 	.recv = tap_recv,
435d12c5ef6SVincenzo Maffione 	.recv_enable = tap_recv_enable,
436d12c5ef6SVincenzo Maffione 	.recv_disable = tap_recv_disable,
4370ff7076bSVincenzo Maffione 	.get_cap = tap_get_cap,
4380ff7076bSVincenzo Maffione 	.set_cap = tap_set_cap,
4390ff7076bSVincenzo Maffione };
4400ff7076bSVincenzo Maffione 
4410ff7076bSVincenzo Maffione DATA_SET(net_backend_set, tap_backend);
4420ff7076bSVincenzo Maffione DATA_SET(net_backend_set, vmnet_backend);
4430ff7076bSVincenzo Maffione 
4442cd7735dSAleksandr Fedorov #ifdef NETGRAPH
4452cd7735dSAleksandr Fedorov 
4462cd7735dSAleksandr Fedorov /*
4472cd7735dSAleksandr Fedorov  * Netgraph backend
4482cd7735dSAleksandr Fedorov  */
4492cd7735dSAleksandr Fedorov 
4502cd7735dSAleksandr Fedorov #define NG_SBUF_MAX_SIZE (4 * 1024 * 1024)
4512cd7735dSAleksandr Fedorov 
4522cd7735dSAleksandr Fedorov static int
4532cd7735dSAleksandr Fedorov ng_init(struct net_backend *be, const char *devname,
454621b5090SJohn Baldwin 	 nvlist_t *nvl, net_be_rxeof_t cb, void *param)
4552cd7735dSAleksandr Fedorov {
4562cd7735dSAleksandr Fedorov 	struct tap_priv *p = (struct tap_priv *)be->opaque;
4572cd7735dSAleksandr Fedorov 	struct ngm_connect ngc;
458621b5090SJohn Baldwin 	const char *value, *nodename;
4592cd7735dSAleksandr Fedorov 	int sbsz;
4602cd7735dSAleksandr Fedorov 	int ctrl_sock;
4612cd7735dSAleksandr Fedorov 	int flags;
4622cd7735dSAleksandr Fedorov 	unsigned long maxsbsz;
4632cd7735dSAleksandr Fedorov 	size_t msbsz;
4642cd7735dSAleksandr Fedorov #ifndef WITHOUT_CAPSICUM
4652cd7735dSAleksandr Fedorov 	cap_rights_t rights;
4662cd7735dSAleksandr Fedorov #endif
4672cd7735dSAleksandr Fedorov 
4682cd7735dSAleksandr Fedorov 	if (cb == NULL) {
4692cd7735dSAleksandr Fedorov 		WPRINTF(("Netgraph backend requires non-NULL callback"));
4702cd7735dSAleksandr Fedorov 		return (-1);
4712cd7735dSAleksandr Fedorov 	}
4722cd7735dSAleksandr Fedorov 
4732cd7735dSAleksandr Fedorov 	be->fd = -1;
4742cd7735dSAleksandr Fedorov 
4752cd7735dSAleksandr Fedorov 	memset(&ngc, 0, sizeof(ngc));
4762cd7735dSAleksandr Fedorov 
477621b5090SJohn Baldwin 	value = get_config_value_node(nvl, "path");
478621b5090SJohn Baldwin 	if (value == NULL) {
4792cd7735dSAleksandr Fedorov 		WPRINTF(("path must be provided"));
4802cd7735dSAleksandr Fedorov 		return (-1);
4812cd7735dSAleksandr Fedorov 	}
482621b5090SJohn Baldwin 	strncpy(ngc.path, value, NG_PATHSIZ - 1);
4832cd7735dSAleksandr Fedorov 
484621b5090SJohn Baldwin 	value = get_config_value_node(nvl, "hook");
485621b5090SJohn Baldwin 	if (value == NULL)
486621b5090SJohn Baldwin 		value = "vmlink";
487621b5090SJohn Baldwin 	strncpy(ngc.ourhook, value, NG_HOOKSIZ - 1);
488621b5090SJohn Baldwin 
489621b5090SJohn Baldwin 	value = get_config_value_node(nvl, "peerhook");
490621b5090SJohn Baldwin 	if (value == NULL) {
4912cd7735dSAleksandr Fedorov 		WPRINTF(("peer hook must be provided"));
4922cd7735dSAleksandr Fedorov 		return (-1);
4932cd7735dSAleksandr Fedorov 	}
494621b5090SJohn Baldwin 	strncpy(ngc.peerhook, value, NG_HOOKSIZ - 1);
4952cd7735dSAleksandr Fedorov 
496621b5090SJohn Baldwin 	nodename = get_config_value_node(nvl, "socket");
497621b5090SJohn Baldwin 	if (NgMkSockNode(nodename,
4982cd7735dSAleksandr Fedorov 		&ctrl_sock, &be->fd) < 0) {
4992cd7735dSAleksandr Fedorov 		WPRINTF(("can't get Netgraph sockets"));
5002cd7735dSAleksandr Fedorov 		return (-1);
5012cd7735dSAleksandr Fedorov 	}
5022cd7735dSAleksandr Fedorov 
5032cd7735dSAleksandr Fedorov 	if (NgSendMsg(ctrl_sock, ".",
5042cd7735dSAleksandr Fedorov 		NGM_GENERIC_COOKIE,
5052cd7735dSAleksandr Fedorov 		NGM_CONNECT, &ngc, sizeof(ngc)) < 0) {
5062cd7735dSAleksandr Fedorov 		WPRINTF(("can't connect to node"));
5072cd7735dSAleksandr Fedorov 		close(ctrl_sock);
5082cd7735dSAleksandr Fedorov 		goto error;
5092cd7735dSAleksandr Fedorov 	}
5102cd7735dSAleksandr Fedorov 
5112cd7735dSAleksandr Fedorov 	close(ctrl_sock);
5122cd7735dSAleksandr Fedorov 
5132cd7735dSAleksandr Fedorov 	flags = fcntl(be->fd, F_GETFL);
5142cd7735dSAleksandr Fedorov 
5152cd7735dSAleksandr Fedorov 	if (flags < 0) {
5162cd7735dSAleksandr Fedorov 		WPRINTF(("can't get socket flags"));
5172cd7735dSAleksandr Fedorov 		goto error;
5182cd7735dSAleksandr Fedorov 	}
5192cd7735dSAleksandr Fedorov 
5202cd7735dSAleksandr Fedorov 	if (fcntl(be->fd, F_SETFL, flags | O_NONBLOCK) < 0) {
5212cd7735dSAleksandr Fedorov 		WPRINTF(("can't set O_NONBLOCK flag"));
5222cd7735dSAleksandr Fedorov 		goto error;
5232cd7735dSAleksandr Fedorov 	}
5242cd7735dSAleksandr Fedorov 
5252cd7735dSAleksandr Fedorov 	/*
5262cd7735dSAleksandr Fedorov 	 * The default ng_socket(4) buffer's size is too low.
5272cd7735dSAleksandr Fedorov 	 * Calculate the minimum value between NG_SBUF_MAX_SIZE
5282cd7735dSAleksandr Fedorov 	 * and kern.ipc.maxsockbuf.
5292cd7735dSAleksandr Fedorov 	 */
5302cd7735dSAleksandr Fedorov 	msbsz = sizeof(maxsbsz);
5312cd7735dSAleksandr Fedorov 	if (sysctlbyname("kern.ipc.maxsockbuf", &maxsbsz, &msbsz,
5322cd7735dSAleksandr Fedorov 		NULL, 0) < 0) {
5332cd7735dSAleksandr Fedorov 		WPRINTF(("can't get 'kern.ipc.maxsockbuf' value"));
5342cd7735dSAleksandr Fedorov 		goto error;
5352cd7735dSAleksandr Fedorov 	}
5362cd7735dSAleksandr Fedorov 
5372cd7735dSAleksandr Fedorov 	/*
5382cd7735dSAleksandr Fedorov 	 * We can't set the socket buffer size to kern.ipc.maxsockbuf value,
5392cd7735dSAleksandr Fedorov 	 * as it takes into account the mbuf(9) overhead.
5402cd7735dSAleksandr Fedorov 	 */
5412cd7735dSAleksandr Fedorov 	maxsbsz = maxsbsz * MCLBYTES / (MSIZE + MCLBYTES);
5422cd7735dSAleksandr Fedorov 
5432cd7735dSAleksandr Fedorov 	sbsz = MIN(NG_SBUF_MAX_SIZE, maxsbsz);
5442cd7735dSAleksandr Fedorov 
5452cd7735dSAleksandr Fedorov 	if (setsockopt(be->fd, SOL_SOCKET, SO_SNDBUF, &sbsz,
5462cd7735dSAleksandr Fedorov 		sizeof(sbsz)) < 0) {
5472cd7735dSAleksandr Fedorov 		WPRINTF(("can't set TX buffer size"));
5482cd7735dSAleksandr Fedorov 		goto error;
5492cd7735dSAleksandr Fedorov 	}
5502cd7735dSAleksandr Fedorov 
5512cd7735dSAleksandr Fedorov 	if (setsockopt(be->fd, SOL_SOCKET, SO_RCVBUF, &sbsz,
5522cd7735dSAleksandr Fedorov 		sizeof(sbsz)) < 0) {
5532cd7735dSAleksandr Fedorov 		WPRINTF(("can't set RX buffer size"));
5542cd7735dSAleksandr Fedorov 		goto error;
5552cd7735dSAleksandr Fedorov 	}
5562cd7735dSAleksandr Fedorov 
5572cd7735dSAleksandr Fedorov #ifndef WITHOUT_CAPSICUM
5582cd7735dSAleksandr Fedorov 	cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
5592cd7735dSAleksandr Fedorov 	if (caph_rights_limit(be->fd, &rights) == -1)
5602cd7735dSAleksandr Fedorov 		errx(EX_OSERR, "Unable to apply rights for sandbox");
5612cd7735dSAleksandr Fedorov #endif
5622cd7735dSAleksandr Fedorov 
5632cd7735dSAleksandr Fedorov 	memset(p->bbuf, 0, sizeof(p->bbuf));
5642cd7735dSAleksandr Fedorov 	p->bbuflen = 0;
5652cd7735dSAleksandr Fedorov 
5662cd7735dSAleksandr Fedorov 	p->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
5672cd7735dSAleksandr Fedorov 	if (p->mevp == NULL) {
5682cd7735dSAleksandr Fedorov 		WPRINTF(("Could not register event"));
5692cd7735dSAleksandr Fedorov 		goto error;
5702cd7735dSAleksandr Fedorov 	}
5712cd7735dSAleksandr Fedorov 
5722cd7735dSAleksandr Fedorov 	return (0);
5732cd7735dSAleksandr Fedorov 
5742cd7735dSAleksandr Fedorov error:
5752cd7735dSAleksandr Fedorov 	tap_cleanup(be);
5762cd7735dSAleksandr Fedorov 	return (-1);
5772cd7735dSAleksandr Fedorov }
5782cd7735dSAleksandr Fedorov 
5792cd7735dSAleksandr Fedorov static struct net_backend ng_backend = {
5802cd7735dSAleksandr Fedorov 	.prefix = "netgraph",
5812cd7735dSAleksandr Fedorov 	.priv_size = sizeof(struct tap_priv),
5822cd7735dSAleksandr Fedorov 	.init = ng_init,
5832cd7735dSAleksandr Fedorov 	.cleanup = tap_cleanup,
5842cd7735dSAleksandr Fedorov 	.send = tap_send,
5852cd7735dSAleksandr Fedorov 	.peek_recvlen = tap_peek_recvlen,
5862cd7735dSAleksandr Fedorov 	.recv = tap_recv,
5872cd7735dSAleksandr Fedorov 	.recv_enable = tap_recv_enable,
5882cd7735dSAleksandr Fedorov 	.recv_disable = tap_recv_disable,
5892cd7735dSAleksandr Fedorov 	.get_cap = tap_get_cap,
5902cd7735dSAleksandr Fedorov 	.set_cap = tap_set_cap,
5912cd7735dSAleksandr Fedorov };
5922cd7735dSAleksandr Fedorov 
5932cd7735dSAleksandr Fedorov DATA_SET(net_backend_set, ng_backend);
5942cd7735dSAleksandr Fedorov 
5952cd7735dSAleksandr Fedorov #endif /* NETGRAPH */
5962cd7735dSAleksandr Fedorov 
5970ff7076bSVincenzo Maffione /*
5980ff7076bSVincenzo Maffione  * The netmap backend
5990ff7076bSVincenzo Maffione  */
6000ff7076bSVincenzo Maffione 
6010ff7076bSVincenzo Maffione /* The virtio-net features supported by netmap. */
6020ff7076bSVincenzo Maffione #define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \
6030ff7076bSVincenzo Maffione 		VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \
6040ff7076bSVincenzo Maffione 		VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \
605f92bb8c1SVincenzo Maffione 		VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO)
6060ff7076bSVincenzo Maffione 
6070ff7076bSVincenzo Maffione struct netmap_priv {
6080ff7076bSVincenzo Maffione 	char ifname[IFNAMSIZ];
6090ff7076bSVincenzo Maffione 	struct nm_desc *nmd;
6100ff7076bSVincenzo Maffione 	uint16_t memid;
6110ff7076bSVincenzo Maffione 	struct netmap_ring *rx;
6120ff7076bSVincenzo Maffione 	struct netmap_ring *tx;
6130ff7076bSVincenzo Maffione 	struct mevent *mevp;
6140ff7076bSVincenzo Maffione 	net_be_rxeof_t cb;
6150ff7076bSVincenzo Maffione 	void *cb_param;
6160ff7076bSVincenzo Maffione };
6170ff7076bSVincenzo Maffione 
6180ff7076bSVincenzo Maffione static void
6190ff7076bSVincenzo Maffione nmreq_init(struct nmreq *req, char *ifname)
6200ff7076bSVincenzo Maffione {
6210ff7076bSVincenzo Maffione 
6220ff7076bSVincenzo Maffione 	memset(req, 0, sizeof(*req));
6230ff7076bSVincenzo Maffione 	strlcpy(req->nr_name, ifname, sizeof(req->nr_name));
6240ff7076bSVincenzo Maffione 	req->nr_version = NETMAP_API;
6250ff7076bSVincenzo Maffione }
6260ff7076bSVincenzo Maffione 
6270ff7076bSVincenzo Maffione static int
6280ff7076bSVincenzo Maffione netmap_set_vnet_hdr_len(struct net_backend *be, int vnet_hdr_len)
6290ff7076bSVincenzo Maffione {
6300ff7076bSVincenzo Maffione 	int err;
6310ff7076bSVincenzo Maffione 	struct nmreq req;
6320ff7076bSVincenzo Maffione 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
6330ff7076bSVincenzo Maffione 
6340ff7076bSVincenzo Maffione 	nmreq_init(&req, priv->ifname);
6350ff7076bSVincenzo Maffione 	req.nr_cmd = NETMAP_BDG_VNET_HDR;
6360ff7076bSVincenzo Maffione 	req.nr_arg1 = vnet_hdr_len;
6370ff7076bSVincenzo Maffione 	err = ioctl(be->fd, NIOCREGIF, &req);
6380ff7076bSVincenzo Maffione 	if (err) {
639332eff95SVincenzo Maffione 		WPRINTF(("Unable to set vnet header length %d",
6400ff7076bSVincenzo Maffione 				vnet_hdr_len));
6410ff7076bSVincenzo Maffione 		return (err);
6420ff7076bSVincenzo Maffione 	}
6430ff7076bSVincenzo Maffione 
6440ff7076bSVincenzo Maffione 	be->be_vnet_hdr_len = vnet_hdr_len;
6450ff7076bSVincenzo Maffione 
6460ff7076bSVincenzo Maffione 	return (0);
6470ff7076bSVincenzo Maffione }
6480ff7076bSVincenzo Maffione 
6490ff7076bSVincenzo Maffione static int
6500ff7076bSVincenzo Maffione netmap_has_vnet_hdr_len(struct net_backend *be, unsigned vnet_hdr_len)
6510ff7076bSVincenzo Maffione {
6520ff7076bSVincenzo Maffione 	int prev_hdr_len = be->be_vnet_hdr_len;
6530ff7076bSVincenzo Maffione 	int ret;
6540ff7076bSVincenzo Maffione 
6550ff7076bSVincenzo Maffione 	if (vnet_hdr_len == prev_hdr_len) {
6560ff7076bSVincenzo Maffione 		return (1);
6570ff7076bSVincenzo Maffione 	}
6580ff7076bSVincenzo Maffione 
6590ff7076bSVincenzo Maffione 	ret = netmap_set_vnet_hdr_len(be, vnet_hdr_len);
6600ff7076bSVincenzo Maffione 	if (ret) {
6610ff7076bSVincenzo Maffione 		return (0);
6620ff7076bSVincenzo Maffione 	}
6630ff7076bSVincenzo Maffione 
6640ff7076bSVincenzo Maffione 	netmap_set_vnet_hdr_len(be, prev_hdr_len);
6650ff7076bSVincenzo Maffione 
6660ff7076bSVincenzo Maffione 	return (1);
6670ff7076bSVincenzo Maffione }
6680ff7076bSVincenzo Maffione 
6690ff7076bSVincenzo Maffione static uint64_t
6700ff7076bSVincenzo Maffione netmap_get_cap(struct net_backend *be)
6710ff7076bSVincenzo Maffione {
6720ff7076bSVincenzo Maffione 
6730ff7076bSVincenzo Maffione 	return (netmap_has_vnet_hdr_len(be, VNET_HDR_LEN) ?
6740ff7076bSVincenzo Maffione 	    NETMAP_FEATURES : 0);
6750ff7076bSVincenzo Maffione }
6760ff7076bSVincenzo Maffione 
6770ff7076bSVincenzo Maffione static int
6780ff7076bSVincenzo Maffione netmap_set_cap(struct net_backend *be, uint64_t features,
6790ff7076bSVincenzo Maffione 	       unsigned vnet_hdr_len)
6800ff7076bSVincenzo Maffione {
6810ff7076bSVincenzo Maffione 
6820ff7076bSVincenzo Maffione 	return (netmap_set_vnet_hdr_len(be, vnet_hdr_len));
6830ff7076bSVincenzo Maffione }
6840ff7076bSVincenzo Maffione 
6850ff7076bSVincenzo Maffione static int
6860ff7076bSVincenzo Maffione netmap_init(struct net_backend *be, const char *devname,
687621b5090SJohn Baldwin 	    nvlist_t *nvl, net_be_rxeof_t cb, void *param)
6880ff7076bSVincenzo Maffione {
6890ff7076bSVincenzo Maffione 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
6900ff7076bSVincenzo Maffione 
6910ff7076bSVincenzo Maffione 	strlcpy(priv->ifname, devname, sizeof(priv->ifname));
6920ff7076bSVincenzo Maffione 	priv->ifname[sizeof(priv->ifname) - 1] = '\0';
6930ff7076bSVincenzo Maffione 
6940ff7076bSVincenzo Maffione 	priv->nmd = nm_open(priv->ifname, NULL, NETMAP_NO_TX_POLL, NULL);
6950ff7076bSVincenzo Maffione 	if (priv->nmd == NULL) {
696332eff95SVincenzo Maffione 		WPRINTF(("Unable to nm_open(): interface '%s', errno (%s)",
6970ff7076bSVincenzo Maffione 			devname, strerror(errno)));
6980ff7076bSVincenzo Maffione 		free(priv);
6990ff7076bSVincenzo Maffione 		return (-1);
7000ff7076bSVincenzo Maffione 	}
7010ff7076bSVincenzo Maffione 
7020ff7076bSVincenzo Maffione 	priv->memid = priv->nmd->req.nr_arg2;
7030ff7076bSVincenzo Maffione 	priv->tx = NETMAP_TXRING(priv->nmd->nifp, 0);
7040ff7076bSVincenzo Maffione 	priv->rx = NETMAP_RXRING(priv->nmd->nifp, 0);
7050ff7076bSVincenzo Maffione 	priv->cb = cb;
7060ff7076bSVincenzo Maffione 	priv->cb_param = param;
7070ff7076bSVincenzo Maffione 	be->fd = priv->nmd->fd;
7080ff7076bSVincenzo Maffione 
7093e11768eSVincenzo Maffione 	priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
7100ff7076bSVincenzo Maffione 	if (priv->mevp == NULL) {
711332eff95SVincenzo Maffione 		WPRINTF(("Could not register event"));
7120ff7076bSVincenzo Maffione 		return (-1);
7130ff7076bSVincenzo Maffione 	}
7140ff7076bSVincenzo Maffione 
7150ff7076bSVincenzo Maffione 	return (0);
7160ff7076bSVincenzo Maffione }
7170ff7076bSVincenzo Maffione 
7180ff7076bSVincenzo Maffione static void
7190ff7076bSVincenzo Maffione netmap_cleanup(struct net_backend *be)
7200ff7076bSVincenzo Maffione {
7210ff7076bSVincenzo Maffione 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
7220ff7076bSVincenzo Maffione 
7230ff7076bSVincenzo Maffione 	if (priv->mevp) {
7240ff7076bSVincenzo Maffione 		mevent_delete(priv->mevp);
7250ff7076bSVincenzo Maffione 	}
7260ff7076bSVincenzo Maffione 	if (priv->nmd) {
7270ff7076bSVincenzo Maffione 		nm_close(priv->nmd);
7280ff7076bSVincenzo Maffione 	}
7290ff7076bSVincenzo Maffione 	be->fd = -1;
7300ff7076bSVincenzo Maffione }
7310ff7076bSVincenzo Maffione 
7320ff7076bSVincenzo Maffione static ssize_t
73366c662b0SVincenzo Maffione netmap_send(struct net_backend *be, const struct iovec *iov,
7340ff7076bSVincenzo Maffione 	    int iovcnt)
7350ff7076bSVincenzo Maffione {
7360ff7076bSVincenzo Maffione 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
7370ff7076bSVincenzo Maffione 	struct netmap_ring *ring;
7380ff7076bSVincenzo Maffione 	ssize_t totlen = 0;
7390ff7076bSVincenzo Maffione 	int nm_buf_size;
7400ff7076bSVincenzo Maffione 	int nm_buf_len;
7410ff7076bSVincenzo Maffione 	uint32_t head;
7420ff7076bSVincenzo Maffione 	void *nm_buf;
7430ff7076bSVincenzo Maffione 	int j;
7440ff7076bSVincenzo Maffione 
7450ff7076bSVincenzo Maffione 	ring = priv->tx;
7460ff7076bSVincenzo Maffione 	head = ring->head;
7470ff7076bSVincenzo Maffione 	if (head == ring->tail) {
748332eff95SVincenzo Maffione 		WPRINTF(("No space, drop %zu bytes", count_iov(iov, iovcnt)));
7490ff7076bSVincenzo Maffione 		goto txsync;
7500ff7076bSVincenzo Maffione 	}
7510ff7076bSVincenzo Maffione 	nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
7520ff7076bSVincenzo Maffione 	nm_buf_size = ring->nr_buf_size;
7530ff7076bSVincenzo Maffione 	nm_buf_len = 0;
7540ff7076bSVincenzo Maffione 
7550ff7076bSVincenzo Maffione 	for (j = 0; j < iovcnt; j++) {
7560ff7076bSVincenzo Maffione 		int iov_frag_size = iov[j].iov_len;
7570ff7076bSVincenzo Maffione 		void *iov_frag_buf = iov[j].iov_base;
7580ff7076bSVincenzo Maffione 
7590ff7076bSVincenzo Maffione 		totlen += iov_frag_size;
7600ff7076bSVincenzo Maffione 
7610ff7076bSVincenzo Maffione 		/*
7620ff7076bSVincenzo Maffione 		 * Split each iovec fragment over more netmap slots, if
7630ff7076bSVincenzo Maffione 		 * necessary.
7640ff7076bSVincenzo Maffione 		 */
7650ff7076bSVincenzo Maffione 		for (;;) {
7660ff7076bSVincenzo Maffione 			int copylen;
7670ff7076bSVincenzo Maffione 
7680ff7076bSVincenzo Maffione 			copylen = iov_frag_size < nm_buf_size ? iov_frag_size : nm_buf_size;
7690ff7076bSVincenzo Maffione 			memcpy(nm_buf, iov_frag_buf, copylen);
7700ff7076bSVincenzo Maffione 
7710ff7076bSVincenzo Maffione 			iov_frag_buf += copylen;
7720ff7076bSVincenzo Maffione 			iov_frag_size -= copylen;
7730ff7076bSVincenzo Maffione 			nm_buf += copylen;
7740ff7076bSVincenzo Maffione 			nm_buf_size -= copylen;
7750ff7076bSVincenzo Maffione 			nm_buf_len += copylen;
7760ff7076bSVincenzo Maffione 
7770ff7076bSVincenzo Maffione 			if (iov_frag_size == 0) {
7780ff7076bSVincenzo Maffione 				break;
7790ff7076bSVincenzo Maffione 			}
7800ff7076bSVincenzo Maffione 
7810ff7076bSVincenzo Maffione 			ring->slot[head].len = nm_buf_len;
7820ff7076bSVincenzo Maffione 			ring->slot[head].flags = NS_MOREFRAG;
7830ff7076bSVincenzo Maffione 			head = nm_ring_next(ring, head);
7840ff7076bSVincenzo Maffione 			if (head == ring->tail) {
7850ff7076bSVincenzo Maffione 				/*
7860ff7076bSVincenzo Maffione 				 * We ran out of netmap slots while
7870ff7076bSVincenzo Maffione 				 * splitting the iovec fragments.
7880ff7076bSVincenzo Maffione 				 */
789332eff95SVincenzo Maffione 				WPRINTF(("No space, drop %zu bytes",
7900ff7076bSVincenzo Maffione 				   count_iov(iov, iovcnt)));
7910ff7076bSVincenzo Maffione 				goto txsync;
7920ff7076bSVincenzo Maffione 			}
7930ff7076bSVincenzo Maffione 			nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
7940ff7076bSVincenzo Maffione 			nm_buf_size = ring->nr_buf_size;
7950ff7076bSVincenzo Maffione 			nm_buf_len = 0;
7960ff7076bSVincenzo Maffione 		}
7970ff7076bSVincenzo Maffione 	}
7980ff7076bSVincenzo Maffione 
7990ff7076bSVincenzo Maffione 	/* Complete the last slot, which must not have NS_MOREFRAG set. */
8000ff7076bSVincenzo Maffione 	ring->slot[head].len = nm_buf_len;
8010ff7076bSVincenzo Maffione 	ring->slot[head].flags = 0;
8020ff7076bSVincenzo Maffione 	head = nm_ring_next(ring, head);
8030ff7076bSVincenzo Maffione 
8040ff7076bSVincenzo Maffione 	/* Now update ring->head and ring->cur. */
8050ff7076bSVincenzo Maffione 	ring->head = ring->cur = head;
8060ff7076bSVincenzo Maffione txsync:
8070ff7076bSVincenzo Maffione 	ioctl(be->fd, NIOCTXSYNC, NULL);
8080ff7076bSVincenzo Maffione 
8090ff7076bSVincenzo Maffione 	return (totlen);
8100ff7076bSVincenzo Maffione }
8110ff7076bSVincenzo Maffione 
8120ff7076bSVincenzo Maffione static ssize_t
813f92bb8c1SVincenzo Maffione netmap_peek_recvlen(struct net_backend *be)
814f92bb8c1SVincenzo Maffione {
815f92bb8c1SVincenzo Maffione 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
816f92bb8c1SVincenzo Maffione 	struct netmap_ring *ring = priv->rx;
817f92bb8c1SVincenzo Maffione 	uint32_t head = ring->head;
818f92bb8c1SVincenzo Maffione 	ssize_t totlen = 0;
819f92bb8c1SVincenzo Maffione 
820f92bb8c1SVincenzo Maffione 	while (head != ring->tail) {
821f92bb8c1SVincenzo Maffione 		struct netmap_slot *slot = ring->slot + head;
822f92bb8c1SVincenzo Maffione 
823f92bb8c1SVincenzo Maffione 		totlen += slot->len;
824f92bb8c1SVincenzo Maffione 		if ((slot->flags & NS_MOREFRAG) == 0)
825f92bb8c1SVincenzo Maffione 			break;
826f92bb8c1SVincenzo Maffione 		head = nm_ring_next(ring, head);
827f92bb8c1SVincenzo Maffione 	}
828f92bb8c1SVincenzo Maffione 
829f92bb8c1SVincenzo Maffione 	return (totlen);
830f92bb8c1SVincenzo Maffione }
831f92bb8c1SVincenzo Maffione 
832f92bb8c1SVincenzo Maffione static ssize_t
83366c662b0SVincenzo Maffione netmap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
8340ff7076bSVincenzo Maffione {
8350ff7076bSVincenzo Maffione 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
8360ff7076bSVincenzo Maffione 	struct netmap_slot *slot = NULL;
8370ff7076bSVincenzo Maffione 	struct netmap_ring *ring;
8380ff7076bSVincenzo Maffione 	void *iov_frag_buf;
8390ff7076bSVincenzo Maffione 	int iov_frag_size;
8400ff7076bSVincenzo Maffione 	ssize_t totlen = 0;
8410ff7076bSVincenzo Maffione 	uint32_t head;
8420ff7076bSVincenzo Maffione 
8430ff7076bSVincenzo Maffione 	assert(iovcnt);
8440ff7076bSVincenzo Maffione 
8450ff7076bSVincenzo Maffione 	ring = priv->rx;
8460ff7076bSVincenzo Maffione 	head = ring->head;
8470ff7076bSVincenzo Maffione 	iov_frag_buf = iov->iov_base;
8480ff7076bSVincenzo Maffione 	iov_frag_size = iov->iov_len;
8490ff7076bSVincenzo Maffione 
8500ff7076bSVincenzo Maffione 	do {
8510ff7076bSVincenzo Maffione 		int nm_buf_len;
8520ff7076bSVincenzo Maffione 		void *nm_buf;
8530ff7076bSVincenzo Maffione 
8540ff7076bSVincenzo Maffione 		if (head == ring->tail) {
8550ff7076bSVincenzo Maffione 			return (0);
8560ff7076bSVincenzo Maffione 		}
8570ff7076bSVincenzo Maffione 
8580ff7076bSVincenzo Maffione 		slot = ring->slot + head;
8590ff7076bSVincenzo Maffione 		nm_buf = NETMAP_BUF(ring, slot->buf_idx);
8600ff7076bSVincenzo Maffione 		nm_buf_len = slot->len;
8610ff7076bSVincenzo Maffione 
8620ff7076bSVincenzo Maffione 		for (;;) {
8630ff7076bSVincenzo Maffione 			int copylen = nm_buf_len < iov_frag_size ?
8640ff7076bSVincenzo Maffione 			    nm_buf_len : iov_frag_size;
8650ff7076bSVincenzo Maffione 
8660ff7076bSVincenzo Maffione 			memcpy(iov_frag_buf, nm_buf, copylen);
8670ff7076bSVincenzo Maffione 			nm_buf += copylen;
8680ff7076bSVincenzo Maffione 			nm_buf_len -= copylen;
8690ff7076bSVincenzo Maffione 			iov_frag_buf += copylen;
8700ff7076bSVincenzo Maffione 			iov_frag_size -= copylen;
8710ff7076bSVincenzo Maffione 			totlen += copylen;
8720ff7076bSVincenzo Maffione 
8730ff7076bSVincenzo Maffione 			if (nm_buf_len == 0) {
8740ff7076bSVincenzo Maffione 				break;
8750ff7076bSVincenzo Maffione 			}
8760ff7076bSVincenzo Maffione 
8770ff7076bSVincenzo Maffione 			iov++;
8780ff7076bSVincenzo Maffione 			iovcnt--;
8790ff7076bSVincenzo Maffione 			if (iovcnt == 0) {
8800ff7076bSVincenzo Maffione 				/* No space to receive. */
881332eff95SVincenzo Maffione 				WPRINTF(("Short iov, drop %zd bytes",
8820ff7076bSVincenzo Maffione 				    totlen));
8830ff7076bSVincenzo Maffione 				return (-ENOSPC);
8840ff7076bSVincenzo Maffione 			}
8850ff7076bSVincenzo Maffione 			iov_frag_buf = iov->iov_base;
8860ff7076bSVincenzo Maffione 			iov_frag_size = iov->iov_len;
8870ff7076bSVincenzo Maffione 		}
8880ff7076bSVincenzo Maffione 
8890ff7076bSVincenzo Maffione 		head = nm_ring_next(ring, head);
8900ff7076bSVincenzo Maffione 
8910ff7076bSVincenzo Maffione 	} while (slot->flags & NS_MOREFRAG);
8920ff7076bSVincenzo Maffione 
8930ff7076bSVincenzo Maffione 	/* Release slots to netmap. */
8940ff7076bSVincenzo Maffione 	ring->head = ring->cur = head;
8950ff7076bSVincenzo Maffione 
8960ff7076bSVincenzo Maffione 	return (totlen);
8970ff7076bSVincenzo Maffione }
8980ff7076bSVincenzo Maffione 
899d12c5ef6SVincenzo Maffione static void
900d12c5ef6SVincenzo Maffione netmap_recv_enable(struct net_backend *be)
901d12c5ef6SVincenzo Maffione {
902d12c5ef6SVincenzo Maffione 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
903d12c5ef6SVincenzo Maffione 
904d12c5ef6SVincenzo Maffione 	mevent_enable(priv->mevp);
905d12c5ef6SVincenzo Maffione }
906d12c5ef6SVincenzo Maffione 
907d12c5ef6SVincenzo Maffione static void
908d12c5ef6SVincenzo Maffione netmap_recv_disable(struct net_backend *be)
909d12c5ef6SVincenzo Maffione {
910d12c5ef6SVincenzo Maffione 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
911d12c5ef6SVincenzo Maffione 
912d12c5ef6SVincenzo Maffione 	mevent_disable(priv->mevp);
913d12c5ef6SVincenzo Maffione }
914d12c5ef6SVincenzo Maffione 
9150ff7076bSVincenzo Maffione static struct net_backend netmap_backend = {
9160ff7076bSVincenzo Maffione 	.prefix = "netmap",
9170ff7076bSVincenzo Maffione 	.priv_size = sizeof(struct netmap_priv),
9180ff7076bSVincenzo Maffione 	.init = netmap_init,
9190ff7076bSVincenzo Maffione 	.cleanup = netmap_cleanup,
9200ff7076bSVincenzo Maffione 	.send = netmap_send,
921f92bb8c1SVincenzo Maffione 	.peek_recvlen = netmap_peek_recvlen,
9220ff7076bSVincenzo Maffione 	.recv = netmap_recv,
923d12c5ef6SVincenzo Maffione 	.recv_enable = netmap_recv_enable,
924d12c5ef6SVincenzo Maffione 	.recv_disable = netmap_recv_disable,
9250ff7076bSVincenzo Maffione 	.get_cap = netmap_get_cap,
9260ff7076bSVincenzo Maffione 	.set_cap = netmap_set_cap,
9270ff7076bSVincenzo Maffione };
9280ff7076bSVincenzo Maffione 
9290ff7076bSVincenzo Maffione /* A clone of the netmap backend, with a different prefix. */
9300ff7076bSVincenzo Maffione static struct net_backend vale_backend = {
9310ff7076bSVincenzo Maffione 	.prefix = "vale",
9320ff7076bSVincenzo Maffione 	.priv_size = sizeof(struct netmap_priv),
9330ff7076bSVincenzo Maffione 	.init = netmap_init,
9340ff7076bSVincenzo Maffione 	.cleanup = netmap_cleanup,
9350ff7076bSVincenzo Maffione 	.send = netmap_send,
936f92bb8c1SVincenzo Maffione 	.peek_recvlen = netmap_peek_recvlen,
9370ff7076bSVincenzo Maffione 	.recv = netmap_recv,
938d12c5ef6SVincenzo Maffione 	.recv_enable = netmap_recv_enable,
939d12c5ef6SVincenzo Maffione 	.recv_disable = netmap_recv_disable,
9400ff7076bSVincenzo Maffione 	.get_cap = netmap_get_cap,
9410ff7076bSVincenzo Maffione 	.set_cap = netmap_set_cap,
9420ff7076bSVincenzo Maffione };
9430ff7076bSVincenzo Maffione 
9440ff7076bSVincenzo Maffione DATA_SET(net_backend_set, netmap_backend);
9450ff7076bSVincenzo Maffione DATA_SET(net_backend_set, vale_backend);
9460ff7076bSVincenzo Maffione 
947621b5090SJohn Baldwin int
948621b5090SJohn Baldwin netbe_legacy_config(nvlist_t *nvl, const char *opts)
949621b5090SJohn Baldwin {
950621b5090SJohn Baldwin 	char *backend, *cp;
951621b5090SJohn Baldwin 
952621b5090SJohn Baldwin 	if (opts == NULL)
953621b5090SJohn Baldwin 		return (0);
954621b5090SJohn Baldwin 
955621b5090SJohn Baldwin 	cp = strchr(opts, ',');
956621b5090SJohn Baldwin 	if (cp == NULL) {
957621b5090SJohn Baldwin 		set_config_value_node(nvl, "backend", opts);
958621b5090SJohn Baldwin 		return (0);
959621b5090SJohn Baldwin 	}
960621b5090SJohn Baldwin 	backend = strndup(opts, cp - opts);
961621b5090SJohn Baldwin 	set_config_value_node(nvl, "backend", backend);
962621b5090SJohn Baldwin 	free(backend);
963621b5090SJohn Baldwin 	return (pci_parse_legacy_config(nvl, cp + 1));
964621b5090SJohn Baldwin }
965621b5090SJohn Baldwin 
9660ff7076bSVincenzo Maffione /*
9670ff7076bSVincenzo Maffione  * Initialize a backend and attach to the frontend.
9680ff7076bSVincenzo Maffione  * This is called during frontend initialization.
969621b5090SJohn Baldwin  *  @ret is a pointer to the backend to be initialized
9700ff7076bSVincenzo Maffione  *  @devname is the backend-name as supplied on the command line,
9710ff7076bSVincenzo Maffione  * 	e.g. -s 2:0,frontend-name,backend-name[,other-args]
9720ff7076bSVincenzo Maffione  *  @cb is the receive callback supplied by the frontend,
9730ff7076bSVincenzo Maffione  *	and it is invoked in the event loop when a receive
9740ff7076bSVincenzo Maffione  *	event is generated in the hypervisor,
9750ff7076bSVincenzo Maffione  *  @param is a pointer to the frontend, and normally used as
9760ff7076bSVincenzo Maffione  *	the argument for the callback.
9770ff7076bSVincenzo Maffione  */
9780ff7076bSVincenzo Maffione int
979621b5090SJohn Baldwin netbe_init(struct net_backend **ret, nvlist_t *nvl, net_be_rxeof_t cb,
9800ff7076bSVincenzo Maffione     void *param)
9810ff7076bSVincenzo Maffione {
9820ff7076bSVincenzo Maffione 	struct net_backend **pbe, *nbe, *tbe = NULL;
983*b9c3e544SYan Ka Chiu 	const char *value, *type;
9845bebe923SAleksandr Fedorov 	char *devname;
9850ff7076bSVincenzo Maffione 	int err;
9860ff7076bSVincenzo Maffione 
987621b5090SJohn Baldwin 	value = get_config_value_node(nvl, "backend");
988621b5090SJohn Baldwin 	if (value == NULL) {
9895bebe923SAleksandr Fedorov 		return (-1);
9905bebe923SAleksandr Fedorov 	}
991621b5090SJohn Baldwin 	devname = strdup(value);
9925bebe923SAleksandr Fedorov 
9930ff7076bSVincenzo Maffione 	/*
994*b9c3e544SYan Ka Chiu 	 * Use the type given by configuration if exists; otherwise
995*b9c3e544SYan Ka Chiu 	 * use the prefix of the backend as the type.
996*b9c3e544SYan Ka Chiu 	 */
997*b9c3e544SYan Ka Chiu 	type = get_config_value_node(nvl, "type");
998*b9c3e544SYan Ka Chiu 	if (type == NULL)
999*b9c3e544SYan Ka Chiu 		type = devname;
1000*b9c3e544SYan Ka Chiu 
1001*b9c3e544SYan Ka Chiu 	/*
10020ff7076bSVincenzo Maffione 	 * Find the network backend that matches the user-provided
10030ff7076bSVincenzo Maffione 	 * device name. net_backend_set is built using a linker set.
10040ff7076bSVincenzo Maffione 	 */
10050ff7076bSVincenzo Maffione 	SET_FOREACH(pbe, net_backend_set) {
1006*b9c3e544SYan Ka Chiu 		if (strncmp(type, (*pbe)->prefix,
10070ff7076bSVincenzo Maffione 		    strlen((*pbe)->prefix)) == 0) {
10080ff7076bSVincenzo Maffione 			tbe = *pbe;
10090ff7076bSVincenzo Maffione 			assert(tbe->init != NULL);
10100ff7076bSVincenzo Maffione 			assert(tbe->cleanup != NULL);
10110ff7076bSVincenzo Maffione 			assert(tbe->send != NULL);
10120ff7076bSVincenzo Maffione 			assert(tbe->recv != NULL);
10130ff7076bSVincenzo Maffione 			assert(tbe->get_cap != NULL);
10140ff7076bSVincenzo Maffione 			assert(tbe->set_cap != NULL);
10150ff7076bSVincenzo Maffione 			break;
10160ff7076bSVincenzo Maffione 		}
10170ff7076bSVincenzo Maffione 	}
10180ff7076bSVincenzo Maffione 
10190ff7076bSVincenzo Maffione 	*ret = NULL;
10205bebe923SAleksandr Fedorov 	if (tbe == NULL) {
10215bebe923SAleksandr Fedorov 		free(devname);
10220ff7076bSVincenzo Maffione 		return (EINVAL);
10235bebe923SAleksandr Fedorov 	}
10245bebe923SAleksandr Fedorov 
10250ff7076bSVincenzo Maffione 	nbe = calloc(1, sizeof(*nbe) + tbe->priv_size);
10260ff7076bSVincenzo Maffione 	*nbe = *tbe;	/* copy the template */
10270ff7076bSVincenzo Maffione 	nbe->fd = -1;
10280ff7076bSVincenzo Maffione 	nbe->sc = param;
10290ff7076bSVincenzo Maffione 	nbe->be_vnet_hdr_len = 0;
10300ff7076bSVincenzo Maffione 	nbe->fe_vnet_hdr_len = 0;
10310ff7076bSVincenzo Maffione 
10320ff7076bSVincenzo Maffione 	/* Initialize the backend. */
1033621b5090SJohn Baldwin 	err = nbe->init(nbe, devname, nvl, cb, param);
10340ff7076bSVincenzo Maffione 	if (err) {
10355bebe923SAleksandr Fedorov 		free(devname);
10360ff7076bSVincenzo Maffione 		free(nbe);
10370ff7076bSVincenzo Maffione 		return (err);
10380ff7076bSVincenzo Maffione 	}
10390ff7076bSVincenzo Maffione 
10400ff7076bSVincenzo Maffione 	*ret = nbe;
10415bebe923SAleksandr Fedorov 	free(devname);
10420ff7076bSVincenzo Maffione 
10430ff7076bSVincenzo Maffione 	return (0);
10440ff7076bSVincenzo Maffione }
10450ff7076bSVincenzo Maffione 
10460ff7076bSVincenzo Maffione void
10470ff7076bSVincenzo Maffione netbe_cleanup(struct net_backend *be)
10480ff7076bSVincenzo Maffione {
10490ff7076bSVincenzo Maffione 
10500ff7076bSVincenzo Maffione 	if (be != NULL) {
10510ff7076bSVincenzo Maffione 		be->cleanup(be);
10520ff7076bSVincenzo Maffione 		free(be);
10530ff7076bSVincenzo Maffione 	}
10540ff7076bSVincenzo Maffione }
10550ff7076bSVincenzo Maffione 
10560ff7076bSVincenzo Maffione uint64_t
10570ff7076bSVincenzo Maffione netbe_get_cap(struct net_backend *be)
10580ff7076bSVincenzo Maffione {
10590ff7076bSVincenzo Maffione 
10600ff7076bSVincenzo Maffione 	assert(be != NULL);
10610ff7076bSVincenzo Maffione 	return (be->get_cap(be));
10620ff7076bSVincenzo Maffione }
10630ff7076bSVincenzo Maffione 
10640ff7076bSVincenzo Maffione int
10650ff7076bSVincenzo Maffione netbe_set_cap(struct net_backend *be, uint64_t features,
10660ff7076bSVincenzo Maffione 	      unsigned vnet_hdr_len)
10670ff7076bSVincenzo Maffione {
10680ff7076bSVincenzo Maffione 	int ret;
10690ff7076bSVincenzo Maffione 
10700ff7076bSVincenzo Maffione 	assert(be != NULL);
10710ff7076bSVincenzo Maffione 
10720ff7076bSVincenzo Maffione 	/* There are only three valid lengths, i.e., 0, 10 and 12. */
10730ff7076bSVincenzo Maffione 	if (vnet_hdr_len && vnet_hdr_len != VNET_HDR_LEN
10740ff7076bSVincenzo Maffione 		&& vnet_hdr_len != (VNET_HDR_LEN - sizeof(uint16_t)))
10750ff7076bSVincenzo Maffione 		return (-1);
10760ff7076bSVincenzo Maffione 
10770ff7076bSVincenzo Maffione 	be->fe_vnet_hdr_len = vnet_hdr_len;
10780ff7076bSVincenzo Maffione 
10790ff7076bSVincenzo Maffione 	ret = be->set_cap(be, features, vnet_hdr_len);
10800ff7076bSVincenzo Maffione 	assert(be->be_vnet_hdr_len == 0 ||
10810ff7076bSVincenzo Maffione 	       be->be_vnet_hdr_len == be->fe_vnet_hdr_len);
10820ff7076bSVincenzo Maffione 
10830ff7076bSVincenzo Maffione 	return (ret);
10840ff7076bSVincenzo Maffione }
10850ff7076bSVincenzo Maffione 
10860ff7076bSVincenzo Maffione ssize_t
108766c662b0SVincenzo Maffione netbe_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
10880ff7076bSVincenzo Maffione {
10890ff7076bSVincenzo Maffione 
10900ff7076bSVincenzo Maffione 	return (be->send(be, iov, iovcnt));
10910ff7076bSVincenzo Maffione }
10920ff7076bSVincenzo Maffione 
1093f92bb8c1SVincenzo Maffione ssize_t
1094f92bb8c1SVincenzo Maffione netbe_peek_recvlen(struct net_backend *be)
1095f92bb8c1SVincenzo Maffione {
1096f92bb8c1SVincenzo Maffione 
1097f92bb8c1SVincenzo Maffione 	return (be->peek_recvlen(be));
1098f92bb8c1SVincenzo Maffione }
1099f92bb8c1SVincenzo Maffione 
11000ff7076bSVincenzo Maffione /*
11010ff7076bSVincenzo Maffione  * Try to read a packet from the backend, without blocking.
11020ff7076bSVincenzo Maffione  * If no packets are available, return 0. In case of success, return
11030ff7076bSVincenzo Maffione  * the length of the packet just read. Return -1 in case of errors.
11040ff7076bSVincenzo Maffione  */
11050ff7076bSVincenzo Maffione ssize_t
110666c662b0SVincenzo Maffione netbe_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
11070ff7076bSVincenzo Maffione {
11080ff7076bSVincenzo Maffione 
110966c662b0SVincenzo Maffione 	return (be->recv(be, iov, iovcnt));
11100ff7076bSVincenzo Maffione }
11110ff7076bSVincenzo Maffione 
11120ff7076bSVincenzo Maffione /*
11130ff7076bSVincenzo Maffione  * Read a packet from the backend and discard it.
11140ff7076bSVincenzo Maffione  * Returns the size of the discarded packet or zero if no packet was available.
11150ff7076bSVincenzo Maffione  * A negative error code is returned in case of read error.
11160ff7076bSVincenzo Maffione  */
11170ff7076bSVincenzo Maffione ssize_t
11180ff7076bSVincenzo Maffione netbe_rx_discard(struct net_backend *be)
11190ff7076bSVincenzo Maffione {
11200ff7076bSVincenzo Maffione 	/*
11210ff7076bSVincenzo Maffione 	 * MP note: the dummybuf is only used to discard frames,
11220ff7076bSVincenzo Maffione 	 * so there is no need for it to be per-vtnet or locked.
11230ff7076bSVincenzo Maffione 	 * We only make it large enough for TSO-sized segment.
11240ff7076bSVincenzo Maffione 	 */
11250ff7076bSVincenzo Maffione 	static uint8_t dummybuf[65536 + 64];
11260ff7076bSVincenzo Maffione 	struct iovec iov;
11270ff7076bSVincenzo Maffione 
11280ff7076bSVincenzo Maffione 	iov.iov_base = dummybuf;
11290ff7076bSVincenzo Maffione 	iov.iov_len = sizeof(dummybuf);
11300ff7076bSVincenzo Maffione 
11310ff7076bSVincenzo Maffione 	return netbe_recv(be, &iov, 1);
11320ff7076bSVincenzo Maffione }
11330ff7076bSVincenzo Maffione 
1134d12c5ef6SVincenzo Maffione void
1135d12c5ef6SVincenzo Maffione netbe_rx_disable(struct net_backend *be)
1136d12c5ef6SVincenzo Maffione {
1137d12c5ef6SVincenzo Maffione 
113814d72637SVincenzo Maffione 	return be->recv_disable(be);
1139d12c5ef6SVincenzo Maffione }
1140d12c5ef6SVincenzo Maffione 
1141d12c5ef6SVincenzo Maffione void
1142d12c5ef6SVincenzo Maffione netbe_rx_enable(struct net_backend *be)
1143d12c5ef6SVincenzo Maffione {
1144d12c5ef6SVincenzo Maffione 
114514d72637SVincenzo Maffione 	return be->recv_enable(be);
1146d12c5ef6SVincenzo Maffione }
114766c662b0SVincenzo Maffione 
114866c662b0SVincenzo Maffione size_t
114966c662b0SVincenzo Maffione netbe_get_vnet_hdr_len(struct net_backend *be)
115066c662b0SVincenzo Maffione {
115166c662b0SVincenzo Maffione 
115266c662b0SVincenzo Maffione 	return (be->be_vnet_hdr_len);
115366c662b0SVincenzo Maffione }
1154