xref: /freebsd/usr.sbin/bhyve/net_backends.c (revision d12c5ef6409b6c0fd41646341c1e86d1a4bb0f0d)
10ff7076bSVincenzo Maffione /*-
290db4ba9SVincenzo Maffione  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
390db4ba9SVincenzo Maffione  *
40ff7076bSVincenzo Maffione  * Copyright (c) 2019 Vincenzo Maffione <vmaffione@FreeBSD.org>
50ff7076bSVincenzo Maffione  *
60ff7076bSVincenzo Maffione  * Redistribution and use in source and binary forms, with or without
70ff7076bSVincenzo Maffione  * modification, are permitted provided that the following conditions
80ff7076bSVincenzo Maffione  * are met:
90ff7076bSVincenzo Maffione  * 1. Redistributions of source code must retain the above copyright
100ff7076bSVincenzo Maffione  *    notice, this list of conditions and the following disclaimer.
110ff7076bSVincenzo Maffione  * 2. Redistributions in binary form must reproduce the above copyright
120ff7076bSVincenzo Maffione  *    notice, this list of conditions and the following disclaimer in the
130ff7076bSVincenzo Maffione  *    documentation and/or other materials provided with the distribution.
140ff7076bSVincenzo Maffione  *
150ff7076bSVincenzo Maffione  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
160ff7076bSVincenzo Maffione  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
170ff7076bSVincenzo Maffione  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
180ff7076bSVincenzo Maffione  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
190ff7076bSVincenzo Maffione  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
200ff7076bSVincenzo Maffione  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
210ff7076bSVincenzo Maffione  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
220ff7076bSVincenzo Maffione  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
230ff7076bSVincenzo Maffione  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
240ff7076bSVincenzo Maffione  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
250ff7076bSVincenzo Maffione  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
260ff7076bSVincenzo Maffione  *
270ff7076bSVincenzo Maffione  * $FreeBSD$
280ff7076bSVincenzo Maffione  */
290ff7076bSVincenzo Maffione 
300ff7076bSVincenzo Maffione /*
310ff7076bSVincenzo Maffione  * This file implements multiple network backends (tap, netmap, ...),
320ff7076bSVincenzo Maffione  * to be used by network frontends such as virtio-net and e1000.
330ff7076bSVincenzo Maffione  * The API to access the backend (e.g. send/receive packets, negotiate
340ff7076bSVincenzo Maffione  * features) is exported by net_backends.h.
350ff7076bSVincenzo Maffione  */
360ff7076bSVincenzo Maffione 
378cd0c1acSVincenzo Maffione #include <sys/cdefs.h>
388cd0c1acSVincenzo Maffione __FBSDID("$FreeBSD$");
398cd0c1acSVincenzo Maffione 
400ff7076bSVincenzo Maffione #include <sys/types.h>		/* u_short etc */
410ff7076bSVincenzo Maffione #ifndef WITHOUT_CAPSICUM
420ff7076bSVincenzo Maffione #include <sys/capsicum.h>
430ff7076bSVincenzo Maffione #endif
440ff7076bSVincenzo Maffione #include <sys/ioctl.h>
450ff7076bSVincenzo Maffione #include <sys/mman.h>
460ff7076bSVincenzo Maffione #include <sys/uio.h>
470ff7076bSVincenzo Maffione 
480ff7076bSVincenzo Maffione #include <net/if.h>
490ff7076bSVincenzo Maffione #include <net/netmap.h>
500ff7076bSVincenzo Maffione #include <net/netmap_virt.h>
510ff7076bSVincenzo Maffione #define NETMAP_WITH_LIBS
520ff7076bSVincenzo Maffione #include <net/netmap_user.h>
530ff7076bSVincenzo Maffione 
540ff7076bSVincenzo Maffione #ifndef WITHOUT_CAPSICUM
550ff7076bSVincenzo Maffione #include <capsicum_helpers.h>
560ff7076bSVincenzo Maffione #endif
570ff7076bSVincenzo Maffione #include <err.h>
580ff7076bSVincenzo Maffione #include <errno.h>
590ff7076bSVincenzo Maffione #include <fcntl.h>
600ff7076bSVincenzo Maffione #include <stdio.h>
610ff7076bSVincenzo Maffione #include <stdlib.h>
620ff7076bSVincenzo Maffione #include <stdint.h>
630ff7076bSVincenzo Maffione #include <string.h>
640ff7076bSVincenzo Maffione #include <unistd.h>
650ff7076bSVincenzo Maffione #include <sysexits.h>
660ff7076bSVincenzo Maffione #include <assert.h>
670ff7076bSVincenzo Maffione #include <pthread.h>
680ff7076bSVincenzo Maffione #include <pthread_np.h>
690ff7076bSVincenzo Maffione #include <poll.h>
700ff7076bSVincenzo Maffione #include <assert.h>
710ff7076bSVincenzo Maffione 
720ff7076bSVincenzo Maffione 
730ff7076bSVincenzo Maffione #include "iov.h"
740ff7076bSVincenzo Maffione #include "mevent.h"
750ff7076bSVincenzo Maffione #include "net_backends.h"
760ff7076bSVincenzo Maffione 
770ff7076bSVincenzo Maffione #include <sys/linker_set.h>
780ff7076bSVincenzo Maffione 
790ff7076bSVincenzo Maffione /*
800ff7076bSVincenzo Maffione  * Each network backend registers a set of function pointers that are
810ff7076bSVincenzo Maffione  * used to implement the net backends API.
820ff7076bSVincenzo Maffione  * This might need to be exposed if we implement backends in separate files.
830ff7076bSVincenzo Maffione  */
840ff7076bSVincenzo Maffione struct net_backend {
850ff7076bSVincenzo Maffione 	const char *prefix;	/* prefix matching this backend */
860ff7076bSVincenzo Maffione 
870ff7076bSVincenzo Maffione 	/*
880ff7076bSVincenzo Maffione 	 * Routines used to initialize and cleanup the resources needed
890ff7076bSVincenzo Maffione 	 * by a backend. The cleanup function is used internally,
900ff7076bSVincenzo Maffione 	 * and should not be called by the frontend.
910ff7076bSVincenzo Maffione 	 */
920ff7076bSVincenzo Maffione 	int (*init)(struct net_backend *be, const char *devname,
930ff7076bSVincenzo Maffione 	    net_be_rxeof_t cb, void *param);
940ff7076bSVincenzo Maffione 	void (*cleanup)(struct net_backend *be);
950ff7076bSVincenzo Maffione 
960ff7076bSVincenzo Maffione 	/*
970ff7076bSVincenzo Maffione 	 * Called to serve a guest transmit request. The scatter-gather
980ff7076bSVincenzo Maffione 	 * vector provided by the caller has 'iovcnt' elements and contains
990ff7076bSVincenzo Maffione 	 * the packet to send.
1000ff7076bSVincenzo Maffione 	 */
1010ff7076bSVincenzo Maffione 	ssize_t (*send)(struct net_backend *be, struct iovec *iov, int iovcnt);
1020ff7076bSVincenzo Maffione 
1030ff7076bSVincenzo Maffione 	/*
1040ff7076bSVincenzo Maffione 	 * Called to receive a packet from the backend. When the function
1050ff7076bSVincenzo Maffione 	 * returns a positive value 'len', the scatter-gather vector
1060ff7076bSVincenzo Maffione 	 * provided by the caller contains a packet with such length.
1070ff7076bSVincenzo Maffione 	 * The function returns 0 if the backend doesn't have a new packet to
1080ff7076bSVincenzo Maffione 	 * receive.
1090ff7076bSVincenzo Maffione 	 */
1100ff7076bSVincenzo Maffione 	ssize_t (*recv)(struct net_backend *be, struct iovec *iov, int iovcnt);
1110ff7076bSVincenzo Maffione 
1120ff7076bSVincenzo Maffione 	/*
113*d12c5ef6SVincenzo Maffione 	 * Ask the backend to enable or disable receive operation in the
114*d12c5ef6SVincenzo Maffione 	 * backend. On return from a disable operation, it is guaranteed
115*d12c5ef6SVincenzo Maffione 	 * that the receive callback won't be called until receive is
116*d12c5ef6SVincenzo Maffione 	 * enabled again. Note however that it is up to the caller to make
117*d12c5ef6SVincenzo Maffione 	 * sure that netbe_recv() is not currently being executed by another
118*d12c5ef6SVincenzo Maffione 	 * thread.
119*d12c5ef6SVincenzo Maffione 	 */
120*d12c5ef6SVincenzo Maffione 	void (*recv_enable)(struct net_backend *be);
121*d12c5ef6SVincenzo Maffione 	void (*recv_disable)(struct net_backend *be);
122*d12c5ef6SVincenzo Maffione 
123*d12c5ef6SVincenzo Maffione 	/*
1240ff7076bSVincenzo Maffione 	 * Ask the backend for the virtio-net features it is able to
1250ff7076bSVincenzo Maffione 	 * support. Possible features are TSO, UFO and checksum offloading
1260ff7076bSVincenzo Maffione 	 * in both rx and tx direction and for both IPv4 and IPv6.
1270ff7076bSVincenzo Maffione 	 */
1280ff7076bSVincenzo Maffione 	uint64_t (*get_cap)(struct net_backend *be);
1290ff7076bSVincenzo Maffione 
1300ff7076bSVincenzo Maffione 	/*
1310ff7076bSVincenzo Maffione 	 * Tell the backend to enable/disable the specified virtio-net
1320ff7076bSVincenzo Maffione 	 * features (capabilities).
1330ff7076bSVincenzo Maffione 	 */
1340ff7076bSVincenzo Maffione 	int (*set_cap)(struct net_backend *be, uint64_t features,
1350ff7076bSVincenzo Maffione 	    unsigned int vnet_hdr_len);
1360ff7076bSVincenzo Maffione 
1370ff7076bSVincenzo Maffione 	struct pci_vtnet_softc *sc;
1380ff7076bSVincenzo Maffione 	int fd;
1390ff7076bSVincenzo Maffione 
1400ff7076bSVincenzo Maffione 	/*
1410ff7076bSVincenzo Maffione 	 * Length of the virtio-net header used by the backend and the
1420ff7076bSVincenzo Maffione 	 * frontend, respectively. A zero value means that the header
1430ff7076bSVincenzo Maffione 	 * is not used.
1440ff7076bSVincenzo Maffione 	 */
1450ff7076bSVincenzo Maffione 	unsigned int be_vnet_hdr_len;
1460ff7076bSVincenzo Maffione 	unsigned int fe_vnet_hdr_len;
1470ff7076bSVincenzo Maffione 
1480ff7076bSVincenzo Maffione 	/* Size of backend-specific private data. */
1490ff7076bSVincenzo Maffione 	size_t priv_size;
1500ff7076bSVincenzo Maffione 
1510ff7076bSVincenzo Maffione 	/* Room for backend-specific data. */
1520ff7076bSVincenzo Maffione 	char opaque[0];
1530ff7076bSVincenzo Maffione };
1540ff7076bSVincenzo Maffione 
1550ff7076bSVincenzo Maffione SET_DECLARE(net_backend_set, struct net_backend);
1560ff7076bSVincenzo Maffione 
1570ff7076bSVincenzo Maffione #define VNET_HDR_LEN	sizeof(struct virtio_net_rxhdr)
1580ff7076bSVincenzo Maffione 
1590ff7076bSVincenzo Maffione #define WPRINTF(params) printf params
1600ff7076bSVincenzo Maffione 
1610ff7076bSVincenzo Maffione /*
1620ff7076bSVincenzo Maffione  * The tap backend
1630ff7076bSVincenzo Maffione  */
1640ff7076bSVincenzo Maffione 
1650ff7076bSVincenzo Maffione struct tap_priv {
1660ff7076bSVincenzo Maffione 	struct mevent *mevp;
1670ff7076bSVincenzo Maffione };
1680ff7076bSVincenzo Maffione 
1690ff7076bSVincenzo Maffione static void
1700ff7076bSVincenzo Maffione tap_cleanup(struct net_backend *be)
1710ff7076bSVincenzo Maffione {
1720ff7076bSVincenzo Maffione 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
1730ff7076bSVincenzo Maffione 
1740ff7076bSVincenzo Maffione 	if (priv->mevp) {
1750ff7076bSVincenzo Maffione 		mevent_delete(priv->mevp);
1760ff7076bSVincenzo Maffione 	}
1770ff7076bSVincenzo Maffione 	if (be->fd != -1) {
1780ff7076bSVincenzo Maffione 		close(be->fd);
1790ff7076bSVincenzo Maffione 		be->fd = -1;
1800ff7076bSVincenzo Maffione 	}
1810ff7076bSVincenzo Maffione }
1820ff7076bSVincenzo Maffione 
1830ff7076bSVincenzo Maffione static int
1840ff7076bSVincenzo Maffione tap_init(struct net_backend *be, const char *devname,
1850ff7076bSVincenzo Maffione 	 net_be_rxeof_t cb, void *param)
1860ff7076bSVincenzo Maffione {
1870ff7076bSVincenzo Maffione 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
1880ff7076bSVincenzo Maffione 	char tbuf[80];
1890ff7076bSVincenzo Maffione 	int opt = 1;
1900ff7076bSVincenzo Maffione #ifndef WITHOUT_CAPSICUM
1910ff7076bSVincenzo Maffione 	cap_rights_t rights;
1920ff7076bSVincenzo Maffione #endif
1930ff7076bSVincenzo Maffione 
1940ff7076bSVincenzo Maffione 	if (cb == NULL) {
1950ff7076bSVincenzo Maffione 		WPRINTF(("TAP backend requires non-NULL callback\n"));
1960ff7076bSVincenzo Maffione 		return (-1);
1970ff7076bSVincenzo Maffione 	}
1980ff7076bSVincenzo Maffione 
1990ff7076bSVincenzo Maffione 	strcpy(tbuf, "/dev/");
2000ff7076bSVincenzo Maffione 	strlcat(tbuf, devname, sizeof(tbuf));
2010ff7076bSVincenzo Maffione 
2022d5fe369SSean Chittenden 	be->fd = open(tbuf, O_RDWR);
2032d5fe369SSean Chittenden 	if (be->fd == -1) {
2040ff7076bSVincenzo Maffione 		WPRINTF(("open of tap device %s failed\n", tbuf));
2050ff7076bSVincenzo Maffione 		goto error;
2060ff7076bSVincenzo Maffione 	}
2070ff7076bSVincenzo Maffione 
2080ff7076bSVincenzo Maffione 	/*
2090ff7076bSVincenzo Maffione 	 * Set non-blocking and register for read
2100ff7076bSVincenzo Maffione 	 * notifications with the event loop
2110ff7076bSVincenzo Maffione 	 */
2122d5fe369SSean Chittenden 	if (ioctl(be->fd, FIONBIO, &opt) < 0) {
2130ff7076bSVincenzo Maffione 		WPRINTF(("tap device O_NONBLOCK failed\n"));
2140ff7076bSVincenzo Maffione 		goto error;
2150ff7076bSVincenzo Maffione 	}
2160ff7076bSVincenzo Maffione 
2170ff7076bSVincenzo Maffione #ifndef WITHOUT_CAPSICUM
2180ff7076bSVincenzo Maffione 	cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
2192d5fe369SSean Chittenden 	if (caph_rights_limit(be->fd, &rights) == -1)
2200ff7076bSVincenzo Maffione 		errx(EX_OSERR, "Unable to apply rights for sandbox");
2210ff7076bSVincenzo Maffione #endif
2220ff7076bSVincenzo Maffione 
2232d5fe369SSean Chittenden 	priv->mevp = mevent_add(be->fd, EVF_READ, cb, param);
2240ff7076bSVincenzo Maffione 	if (priv->mevp == NULL) {
2250ff7076bSVincenzo Maffione 		WPRINTF(("Could not register event\n"));
2260ff7076bSVincenzo Maffione 		goto error;
2270ff7076bSVincenzo Maffione 	}
2280ff7076bSVincenzo Maffione 
2290ff7076bSVincenzo Maffione 	return (0);
2300ff7076bSVincenzo Maffione 
2310ff7076bSVincenzo Maffione error:
2320ff7076bSVincenzo Maffione 	tap_cleanup(be);
2330ff7076bSVincenzo Maffione 	return (-1);
2340ff7076bSVincenzo Maffione }
2350ff7076bSVincenzo Maffione 
2360ff7076bSVincenzo Maffione /*
2370ff7076bSVincenzo Maffione  * Called to send a buffer chain out to the tap device
2380ff7076bSVincenzo Maffione  */
2390ff7076bSVincenzo Maffione static ssize_t
2400ff7076bSVincenzo Maffione tap_send(struct net_backend *be, struct iovec *iov, int iovcnt)
2410ff7076bSVincenzo Maffione {
2420ff7076bSVincenzo Maffione 	return (writev(be->fd, iov, iovcnt));
2430ff7076bSVincenzo Maffione }
2440ff7076bSVincenzo Maffione 
2450ff7076bSVincenzo Maffione static ssize_t
2460ff7076bSVincenzo Maffione tap_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
2470ff7076bSVincenzo Maffione {
2480ff7076bSVincenzo Maffione 	ssize_t ret;
2490ff7076bSVincenzo Maffione 
2500ff7076bSVincenzo Maffione 	/* Should never be called without a valid tap fd */
2510ff7076bSVincenzo Maffione 	assert(be->fd != -1);
2520ff7076bSVincenzo Maffione 
2530ff7076bSVincenzo Maffione 	ret = readv(be->fd, iov, iovcnt);
2540ff7076bSVincenzo Maffione 
2550ff7076bSVincenzo Maffione 	if (ret < 0 && errno == EWOULDBLOCK) {
2560ff7076bSVincenzo Maffione 		return (0);
2570ff7076bSVincenzo Maffione 	}
2580ff7076bSVincenzo Maffione 
2590ff7076bSVincenzo Maffione 	return (ret);
2600ff7076bSVincenzo Maffione }
2610ff7076bSVincenzo Maffione 
262*d12c5ef6SVincenzo Maffione static void
263*d12c5ef6SVincenzo Maffione tap_recv_enable(struct net_backend *be)
264*d12c5ef6SVincenzo Maffione {
265*d12c5ef6SVincenzo Maffione 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
266*d12c5ef6SVincenzo Maffione 
267*d12c5ef6SVincenzo Maffione 	mevent_enable(priv->mevp);
268*d12c5ef6SVincenzo Maffione }
269*d12c5ef6SVincenzo Maffione 
270*d12c5ef6SVincenzo Maffione static void
271*d12c5ef6SVincenzo Maffione tap_recv_disable(struct net_backend *be)
272*d12c5ef6SVincenzo Maffione {
273*d12c5ef6SVincenzo Maffione 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
274*d12c5ef6SVincenzo Maffione 
275*d12c5ef6SVincenzo Maffione 	mevent_disable(priv->mevp);
276*d12c5ef6SVincenzo Maffione }
277*d12c5ef6SVincenzo Maffione 
2780ff7076bSVincenzo Maffione static uint64_t
2790ff7076bSVincenzo Maffione tap_get_cap(struct net_backend *be)
2800ff7076bSVincenzo Maffione {
2810ff7076bSVincenzo Maffione 
2820ff7076bSVincenzo Maffione 	return (0); /* no capabilities for now */
2830ff7076bSVincenzo Maffione }
2840ff7076bSVincenzo Maffione 
2850ff7076bSVincenzo Maffione static int
2860ff7076bSVincenzo Maffione tap_set_cap(struct net_backend *be, uint64_t features,
2870ff7076bSVincenzo Maffione 		unsigned vnet_hdr_len)
2880ff7076bSVincenzo Maffione {
2890ff7076bSVincenzo Maffione 
2900ff7076bSVincenzo Maffione 	return ((features || vnet_hdr_len) ? -1 : 0);
2910ff7076bSVincenzo Maffione }
2920ff7076bSVincenzo Maffione 
2930ff7076bSVincenzo Maffione static struct net_backend tap_backend = {
2940ff7076bSVincenzo Maffione 	.prefix = "tap",
2950ff7076bSVincenzo Maffione 	.priv_size = sizeof(struct tap_priv),
2960ff7076bSVincenzo Maffione 	.init = tap_init,
2970ff7076bSVincenzo Maffione 	.cleanup = tap_cleanup,
2980ff7076bSVincenzo Maffione 	.send = tap_send,
2990ff7076bSVincenzo Maffione 	.recv = tap_recv,
300*d12c5ef6SVincenzo Maffione 	.recv_enable = tap_recv_enable,
301*d12c5ef6SVincenzo Maffione 	.recv_disable = tap_recv_disable,
3020ff7076bSVincenzo Maffione 	.get_cap = tap_get_cap,
3030ff7076bSVincenzo Maffione 	.set_cap = tap_set_cap,
3040ff7076bSVincenzo Maffione };
3050ff7076bSVincenzo Maffione 
3060ff7076bSVincenzo Maffione /* A clone of the tap backend, with a different prefix. */
3070ff7076bSVincenzo Maffione static struct net_backend vmnet_backend = {
3080ff7076bSVincenzo Maffione 	.prefix = "vmnet",
3090ff7076bSVincenzo Maffione 	.priv_size = sizeof(struct tap_priv),
3100ff7076bSVincenzo Maffione 	.init = tap_init,
3110ff7076bSVincenzo Maffione 	.cleanup = tap_cleanup,
3120ff7076bSVincenzo Maffione 	.send = tap_send,
3130ff7076bSVincenzo Maffione 	.recv = tap_recv,
314*d12c5ef6SVincenzo Maffione 	.recv_enable = tap_recv_enable,
315*d12c5ef6SVincenzo Maffione 	.recv_disable = tap_recv_disable,
3160ff7076bSVincenzo Maffione 	.get_cap = tap_get_cap,
3170ff7076bSVincenzo Maffione 	.set_cap = tap_set_cap,
3180ff7076bSVincenzo Maffione };
3190ff7076bSVincenzo Maffione 
3200ff7076bSVincenzo Maffione DATA_SET(net_backend_set, tap_backend);
3210ff7076bSVincenzo Maffione DATA_SET(net_backend_set, vmnet_backend);
3220ff7076bSVincenzo Maffione 
3230ff7076bSVincenzo Maffione /*
3240ff7076bSVincenzo Maffione  * The netmap backend
3250ff7076bSVincenzo Maffione  */
3260ff7076bSVincenzo Maffione 
3270ff7076bSVincenzo Maffione /* The virtio-net features supported by netmap. */
3280ff7076bSVincenzo Maffione #define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \
3290ff7076bSVincenzo Maffione 		VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \
3300ff7076bSVincenzo Maffione 		VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \
3310ff7076bSVincenzo Maffione 		VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO)
3320ff7076bSVincenzo Maffione 
3330ff7076bSVincenzo Maffione struct netmap_priv {
3340ff7076bSVincenzo Maffione 	char ifname[IFNAMSIZ];
3350ff7076bSVincenzo Maffione 	struct nm_desc *nmd;
3360ff7076bSVincenzo Maffione 	uint16_t memid;
3370ff7076bSVincenzo Maffione 	struct netmap_ring *rx;
3380ff7076bSVincenzo Maffione 	struct netmap_ring *tx;
3390ff7076bSVincenzo Maffione 	struct mevent *mevp;
3400ff7076bSVincenzo Maffione 	net_be_rxeof_t cb;
3410ff7076bSVincenzo Maffione 	void *cb_param;
3420ff7076bSVincenzo Maffione };
3430ff7076bSVincenzo Maffione 
3440ff7076bSVincenzo Maffione static void
3450ff7076bSVincenzo Maffione nmreq_init(struct nmreq *req, char *ifname)
3460ff7076bSVincenzo Maffione {
3470ff7076bSVincenzo Maffione 
3480ff7076bSVincenzo Maffione 	memset(req, 0, sizeof(*req));
3490ff7076bSVincenzo Maffione 	strlcpy(req->nr_name, ifname, sizeof(req->nr_name));
3500ff7076bSVincenzo Maffione 	req->nr_version = NETMAP_API;
3510ff7076bSVincenzo Maffione }
3520ff7076bSVincenzo Maffione 
3530ff7076bSVincenzo Maffione static int
3540ff7076bSVincenzo Maffione netmap_set_vnet_hdr_len(struct net_backend *be, int vnet_hdr_len)
3550ff7076bSVincenzo Maffione {
3560ff7076bSVincenzo Maffione 	int err;
3570ff7076bSVincenzo Maffione 	struct nmreq req;
3580ff7076bSVincenzo Maffione 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
3590ff7076bSVincenzo Maffione 
3600ff7076bSVincenzo Maffione 	nmreq_init(&req, priv->ifname);
3610ff7076bSVincenzo Maffione 	req.nr_cmd = NETMAP_BDG_VNET_HDR;
3620ff7076bSVincenzo Maffione 	req.nr_arg1 = vnet_hdr_len;
3630ff7076bSVincenzo Maffione 	err = ioctl(be->fd, NIOCREGIF, &req);
3640ff7076bSVincenzo Maffione 	if (err) {
3650ff7076bSVincenzo Maffione 		WPRINTF(("Unable to set vnet header length %d\n",
3660ff7076bSVincenzo Maffione 				vnet_hdr_len));
3670ff7076bSVincenzo Maffione 		return (err);
3680ff7076bSVincenzo Maffione 	}
3690ff7076bSVincenzo Maffione 
3700ff7076bSVincenzo Maffione 	be->be_vnet_hdr_len = vnet_hdr_len;
3710ff7076bSVincenzo Maffione 
3720ff7076bSVincenzo Maffione 	return (0);
3730ff7076bSVincenzo Maffione }
3740ff7076bSVincenzo Maffione 
3750ff7076bSVincenzo Maffione static int
3760ff7076bSVincenzo Maffione netmap_has_vnet_hdr_len(struct net_backend *be, unsigned vnet_hdr_len)
3770ff7076bSVincenzo Maffione {
3780ff7076bSVincenzo Maffione 	int prev_hdr_len = be->be_vnet_hdr_len;
3790ff7076bSVincenzo Maffione 	int ret;
3800ff7076bSVincenzo Maffione 
3810ff7076bSVincenzo Maffione 	if (vnet_hdr_len == prev_hdr_len) {
3820ff7076bSVincenzo Maffione 		return (1);
3830ff7076bSVincenzo Maffione 	}
3840ff7076bSVincenzo Maffione 
3850ff7076bSVincenzo Maffione 	ret = netmap_set_vnet_hdr_len(be, vnet_hdr_len);
3860ff7076bSVincenzo Maffione 	if (ret) {
3870ff7076bSVincenzo Maffione 		return (0);
3880ff7076bSVincenzo Maffione 	}
3890ff7076bSVincenzo Maffione 
3900ff7076bSVincenzo Maffione 	netmap_set_vnet_hdr_len(be, prev_hdr_len);
3910ff7076bSVincenzo Maffione 
3920ff7076bSVincenzo Maffione 	return (1);
3930ff7076bSVincenzo Maffione }
3940ff7076bSVincenzo Maffione 
3950ff7076bSVincenzo Maffione static uint64_t
3960ff7076bSVincenzo Maffione netmap_get_cap(struct net_backend *be)
3970ff7076bSVincenzo Maffione {
3980ff7076bSVincenzo Maffione 
3990ff7076bSVincenzo Maffione 	return (netmap_has_vnet_hdr_len(be, VNET_HDR_LEN) ?
4000ff7076bSVincenzo Maffione 	    NETMAP_FEATURES : 0);
4010ff7076bSVincenzo Maffione }
4020ff7076bSVincenzo Maffione 
4030ff7076bSVincenzo Maffione static int
4040ff7076bSVincenzo Maffione netmap_set_cap(struct net_backend *be, uint64_t features,
4050ff7076bSVincenzo Maffione 	       unsigned vnet_hdr_len)
4060ff7076bSVincenzo Maffione {
4070ff7076bSVincenzo Maffione 
4080ff7076bSVincenzo Maffione 	return (netmap_set_vnet_hdr_len(be, vnet_hdr_len));
4090ff7076bSVincenzo Maffione }
4100ff7076bSVincenzo Maffione 
4110ff7076bSVincenzo Maffione static int
4120ff7076bSVincenzo Maffione netmap_init(struct net_backend *be, const char *devname,
4130ff7076bSVincenzo Maffione 	    net_be_rxeof_t cb, void *param)
4140ff7076bSVincenzo Maffione {
4150ff7076bSVincenzo Maffione 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
4160ff7076bSVincenzo Maffione 
4170ff7076bSVincenzo Maffione 	strlcpy(priv->ifname, devname, sizeof(priv->ifname));
4180ff7076bSVincenzo Maffione 	priv->ifname[sizeof(priv->ifname) - 1] = '\0';
4190ff7076bSVincenzo Maffione 
4200ff7076bSVincenzo Maffione 	priv->nmd = nm_open(priv->ifname, NULL, NETMAP_NO_TX_POLL, NULL);
4210ff7076bSVincenzo Maffione 	if (priv->nmd == NULL) {
4220ff7076bSVincenzo Maffione 		WPRINTF(("Unable to nm_open(): interface '%s', errno (%s)\n",
4230ff7076bSVincenzo Maffione 			devname, strerror(errno)));
4240ff7076bSVincenzo Maffione 		free(priv);
4250ff7076bSVincenzo Maffione 		return (-1);
4260ff7076bSVincenzo Maffione 	}
4270ff7076bSVincenzo Maffione 
4280ff7076bSVincenzo Maffione 	priv->memid = priv->nmd->req.nr_arg2;
4290ff7076bSVincenzo Maffione 	priv->tx = NETMAP_TXRING(priv->nmd->nifp, 0);
4300ff7076bSVincenzo Maffione 	priv->rx = NETMAP_RXRING(priv->nmd->nifp, 0);
4310ff7076bSVincenzo Maffione 	priv->cb = cb;
4320ff7076bSVincenzo Maffione 	priv->cb_param = param;
4330ff7076bSVincenzo Maffione 	be->fd = priv->nmd->fd;
4340ff7076bSVincenzo Maffione 
4350ff7076bSVincenzo Maffione 	priv->mevp = mevent_add(be->fd, EVF_READ, cb, param);
4360ff7076bSVincenzo Maffione 	if (priv->mevp == NULL) {
4370ff7076bSVincenzo Maffione 		WPRINTF(("Could not register event\n"));
4380ff7076bSVincenzo Maffione 		return (-1);
4390ff7076bSVincenzo Maffione 	}
4400ff7076bSVincenzo Maffione 
4410ff7076bSVincenzo Maffione 	return (0);
4420ff7076bSVincenzo Maffione }
4430ff7076bSVincenzo Maffione 
4440ff7076bSVincenzo Maffione static void
4450ff7076bSVincenzo Maffione netmap_cleanup(struct net_backend *be)
4460ff7076bSVincenzo Maffione {
4470ff7076bSVincenzo Maffione 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
4480ff7076bSVincenzo Maffione 
4490ff7076bSVincenzo Maffione 	if (priv->mevp) {
4500ff7076bSVincenzo Maffione 		mevent_delete(priv->mevp);
4510ff7076bSVincenzo Maffione 	}
4520ff7076bSVincenzo Maffione 	if (priv->nmd) {
4530ff7076bSVincenzo Maffione 		nm_close(priv->nmd);
4540ff7076bSVincenzo Maffione 	}
4550ff7076bSVincenzo Maffione 	be->fd = -1;
4560ff7076bSVincenzo Maffione }
4570ff7076bSVincenzo Maffione 
4580ff7076bSVincenzo Maffione static ssize_t
4590ff7076bSVincenzo Maffione netmap_send(struct net_backend *be, struct iovec *iov,
4600ff7076bSVincenzo Maffione 	    int iovcnt)
4610ff7076bSVincenzo Maffione {
4620ff7076bSVincenzo Maffione 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
4630ff7076bSVincenzo Maffione 	struct netmap_ring *ring;
4640ff7076bSVincenzo Maffione 	ssize_t totlen = 0;
4650ff7076bSVincenzo Maffione 	int nm_buf_size;
4660ff7076bSVincenzo Maffione 	int nm_buf_len;
4670ff7076bSVincenzo Maffione 	uint32_t head;
4680ff7076bSVincenzo Maffione 	void *nm_buf;
4690ff7076bSVincenzo Maffione 	int j;
4700ff7076bSVincenzo Maffione 
4710ff7076bSVincenzo Maffione 	ring = priv->tx;
4720ff7076bSVincenzo Maffione 	head = ring->head;
4730ff7076bSVincenzo Maffione 	if (head == ring->tail) {
4740ff7076bSVincenzo Maffione 		WPRINTF(("No space, drop %zu bytes\n", count_iov(iov, iovcnt)));
4750ff7076bSVincenzo Maffione 		goto txsync;
4760ff7076bSVincenzo Maffione 	}
4770ff7076bSVincenzo Maffione 	nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
4780ff7076bSVincenzo Maffione 	nm_buf_size = ring->nr_buf_size;
4790ff7076bSVincenzo Maffione 	nm_buf_len = 0;
4800ff7076bSVincenzo Maffione 
4810ff7076bSVincenzo Maffione 	for (j = 0; j < iovcnt; j++) {
4820ff7076bSVincenzo Maffione 		int iov_frag_size = iov[j].iov_len;
4830ff7076bSVincenzo Maffione 		void *iov_frag_buf = iov[j].iov_base;
4840ff7076bSVincenzo Maffione 
4850ff7076bSVincenzo Maffione 		totlen += iov_frag_size;
4860ff7076bSVincenzo Maffione 
4870ff7076bSVincenzo Maffione 		/*
4880ff7076bSVincenzo Maffione 		 * Split each iovec fragment over more netmap slots, if
4890ff7076bSVincenzo Maffione 		 * necessary.
4900ff7076bSVincenzo Maffione 		 */
4910ff7076bSVincenzo Maffione 		for (;;) {
4920ff7076bSVincenzo Maffione 			int copylen;
4930ff7076bSVincenzo Maffione 
4940ff7076bSVincenzo Maffione 			copylen = iov_frag_size < nm_buf_size ? iov_frag_size : nm_buf_size;
4950ff7076bSVincenzo Maffione 			memcpy(nm_buf, iov_frag_buf, copylen);
4960ff7076bSVincenzo Maffione 
4970ff7076bSVincenzo Maffione 			iov_frag_buf += copylen;
4980ff7076bSVincenzo Maffione 			iov_frag_size -= copylen;
4990ff7076bSVincenzo Maffione 			nm_buf += copylen;
5000ff7076bSVincenzo Maffione 			nm_buf_size -= copylen;
5010ff7076bSVincenzo Maffione 			nm_buf_len += copylen;
5020ff7076bSVincenzo Maffione 
5030ff7076bSVincenzo Maffione 			if (iov_frag_size == 0) {
5040ff7076bSVincenzo Maffione 				break;
5050ff7076bSVincenzo Maffione 			}
5060ff7076bSVincenzo Maffione 
5070ff7076bSVincenzo Maffione 			ring->slot[head].len = nm_buf_len;
5080ff7076bSVincenzo Maffione 			ring->slot[head].flags = NS_MOREFRAG;
5090ff7076bSVincenzo Maffione 			head = nm_ring_next(ring, head);
5100ff7076bSVincenzo Maffione 			if (head == ring->tail) {
5110ff7076bSVincenzo Maffione 				/*
5120ff7076bSVincenzo Maffione 				 * We ran out of netmap slots while
5130ff7076bSVincenzo Maffione 				 * splitting the iovec fragments.
5140ff7076bSVincenzo Maffione 				 */
5150ff7076bSVincenzo Maffione 				WPRINTF(("No space, drop %zu bytes\n",
5160ff7076bSVincenzo Maffione 				   count_iov(iov, iovcnt)));
5170ff7076bSVincenzo Maffione 				goto txsync;
5180ff7076bSVincenzo Maffione 			}
5190ff7076bSVincenzo Maffione 			nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
5200ff7076bSVincenzo Maffione 			nm_buf_size = ring->nr_buf_size;
5210ff7076bSVincenzo Maffione 			nm_buf_len = 0;
5220ff7076bSVincenzo Maffione 		}
5230ff7076bSVincenzo Maffione 	}
5240ff7076bSVincenzo Maffione 
5250ff7076bSVincenzo Maffione 	/* Complete the last slot, which must not have NS_MOREFRAG set. */
5260ff7076bSVincenzo Maffione 	ring->slot[head].len = nm_buf_len;
5270ff7076bSVincenzo Maffione 	ring->slot[head].flags = 0;
5280ff7076bSVincenzo Maffione 	head = nm_ring_next(ring, head);
5290ff7076bSVincenzo Maffione 
5300ff7076bSVincenzo Maffione 	/* Now update ring->head and ring->cur. */
5310ff7076bSVincenzo Maffione 	ring->head = ring->cur = head;
5320ff7076bSVincenzo Maffione txsync:
5330ff7076bSVincenzo Maffione 	ioctl(be->fd, NIOCTXSYNC, NULL);
5340ff7076bSVincenzo Maffione 
5350ff7076bSVincenzo Maffione 	return (totlen);
5360ff7076bSVincenzo Maffione }
5370ff7076bSVincenzo Maffione 
5380ff7076bSVincenzo Maffione static ssize_t
5390ff7076bSVincenzo Maffione netmap_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
5400ff7076bSVincenzo Maffione {
5410ff7076bSVincenzo Maffione 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
5420ff7076bSVincenzo Maffione 	struct netmap_slot *slot = NULL;
5430ff7076bSVincenzo Maffione 	struct netmap_ring *ring;
5440ff7076bSVincenzo Maffione 	void *iov_frag_buf;
5450ff7076bSVincenzo Maffione 	int iov_frag_size;
5460ff7076bSVincenzo Maffione 	ssize_t totlen = 0;
5470ff7076bSVincenzo Maffione 	uint32_t head;
5480ff7076bSVincenzo Maffione 
5490ff7076bSVincenzo Maffione 	assert(iovcnt);
5500ff7076bSVincenzo Maffione 
5510ff7076bSVincenzo Maffione 	ring = priv->rx;
5520ff7076bSVincenzo Maffione 	head = ring->head;
5530ff7076bSVincenzo Maffione 	iov_frag_buf = iov->iov_base;
5540ff7076bSVincenzo Maffione 	iov_frag_size = iov->iov_len;
5550ff7076bSVincenzo Maffione 
5560ff7076bSVincenzo Maffione 	do {
5570ff7076bSVincenzo Maffione 		int nm_buf_len;
5580ff7076bSVincenzo Maffione 		void *nm_buf;
5590ff7076bSVincenzo Maffione 
5600ff7076bSVincenzo Maffione 		if (head == ring->tail) {
5610ff7076bSVincenzo Maffione 			return (0);
5620ff7076bSVincenzo Maffione 		}
5630ff7076bSVincenzo Maffione 
5640ff7076bSVincenzo Maffione 		slot = ring->slot + head;
5650ff7076bSVincenzo Maffione 		nm_buf = NETMAP_BUF(ring, slot->buf_idx);
5660ff7076bSVincenzo Maffione 		nm_buf_len = slot->len;
5670ff7076bSVincenzo Maffione 
5680ff7076bSVincenzo Maffione 		for (;;) {
5690ff7076bSVincenzo Maffione 			int copylen = nm_buf_len < iov_frag_size ?
5700ff7076bSVincenzo Maffione 			    nm_buf_len : iov_frag_size;
5710ff7076bSVincenzo Maffione 
5720ff7076bSVincenzo Maffione 			memcpy(iov_frag_buf, nm_buf, copylen);
5730ff7076bSVincenzo Maffione 			nm_buf += copylen;
5740ff7076bSVincenzo Maffione 			nm_buf_len -= copylen;
5750ff7076bSVincenzo Maffione 			iov_frag_buf += copylen;
5760ff7076bSVincenzo Maffione 			iov_frag_size -= copylen;
5770ff7076bSVincenzo Maffione 			totlen += copylen;
5780ff7076bSVincenzo Maffione 
5790ff7076bSVincenzo Maffione 			if (nm_buf_len == 0) {
5800ff7076bSVincenzo Maffione 				break;
5810ff7076bSVincenzo Maffione 			}
5820ff7076bSVincenzo Maffione 
5830ff7076bSVincenzo Maffione 			iov++;
5840ff7076bSVincenzo Maffione 			iovcnt--;
5850ff7076bSVincenzo Maffione 			if (iovcnt == 0) {
5860ff7076bSVincenzo Maffione 				/* No space to receive. */
5870ff7076bSVincenzo Maffione 				WPRINTF(("Short iov, drop %zd bytes\n",
5880ff7076bSVincenzo Maffione 				    totlen));
5890ff7076bSVincenzo Maffione 				return (-ENOSPC);
5900ff7076bSVincenzo Maffione 			}
5910ff7076bSVincenzo Maffione 			iov_frag_buf = iov->iov_base;
5920ff7076bSVincenzo Maffione 			iov_frag_size = iov->iov_len;
5930ff7076bSVincenzo Maffione 		}
5940ff7076bSVincenzo Maffione 
5950ff7076bSVincenzo Maffione 		head = nm_ring_next(ring, head);
5960ff7076bSVincenzo Maffione 
5970ff7076bSVincenzo Maffione 	} while (slot->flags & NS_MOREFRAG);
5980ff7076bSVincenzo Maffione 
5990ff7076bSVincenzo Maffione 	/* Release slots to netmap. */
6000ff7076bSVincenzo Maffione 	ring->head = ring->cur = head;
6010ff7076bSVincenzo Maffione 
6020ff7076bSVincenzo Maffione 	return (totlen);
6030ff7076bSVincenzo Maffione }
6040ff7076bSVincenzo Maffione 
605*d12c5ef6SVincenzo Maffione static void
606*d12c5ef6SVincenzo Maffione netmap_recv_enable(struct net_backend *be)
607*d12c5ef6SVincenzo Maffione {
608*d12c5ef6SVincenzo Maffione 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
609*d12c5ef6SVincenzo Maffione 
610*d12c5ef6SVincenzo Maffione 	mevent_enable(priv->mevp);
611*d12c5ef6SVincenzo Maffione }
612*d12c5ef6SVincenzo Maffione 
613*d12c5ef6SVincenzo Maffione static void
614*d12c5ef6SVincenzo Maffione netmap_recv_disable(struct net_backend *be)
615*d12c5ef6SVincenzo Maffione {
616*d12c5ef6SVincenzo Maffione 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
617*d12c5ef6SVincenzo Maffione 
618*d12c5ef6SVincenzo Maffione 	mevent_disable(priv->mevp);
619*d12c5ef6SVincenzo Maffione }
620*d12c5ef6SVincenzo Maffione 
6210ff7076bSVincenzo Maffione static struct net_backend netmap_backend = {
6220ff7076bSVincenzo Maffione 	.prefix = "netmap",
6230ff7076bSVincenzo Maffione 	.priv_size = sizeof(struct netmap_priv),
6240ff7076bSVincenzo Maffione 	.init = netmap_init,
6250ff7076bSVincenzo Maffione 	.cleanup = netmap_cleanup,
6260ff7076bSVincenzo Maffione 	.send = netmap_send,
6270ff7076bSVincenzo Maffione 	.recv = netmap_recv,
628*d12c5ef6SVincenzo Maffione 	.recv_enable = netmap_recv_enable,
629*d12c5ef6SVincenzo Maffione 	.recv_disable = netmap_recv_disable,
6300ff7076bSVincenzo Maffione 	.get_cap = netmap_get_cap,
6310ff7076bSVincenzo Maffione 	.set_cap = netmap_set_cap,
6320ff7076bSVincenzo Maffione };
6330ff7076bSVincenzo Maffione 
6340ff7076bSVincenzo Maffione /* A clone of the netmap backend, with a different prefix. */
6350ff7076bSVincenzo Maffione static struct net_backend vale_backend = {
6360ff7076bSVincenzo Maffione 	.prefix = "vale",
6370ff7076bSVincenzo Maffione 	.priv_size = sizeof(struct netmap_priv),
6380ff7076bSVincenzo Maffione 	.init = netmap_init,
6390ff7076bSVincenzo Maffione 	.cleanup = netmap_cleanup,
6400ff7076bSVincenzo Maffione 	.send = netmap_send,
6410ff7076bSVincenzo Maffione 	.recv = netmap_recv,
642*d12c5ef6SVincenzo Maffione 	.recv_enable = netmap_recv_enable,
643*d12c5ef6SVincenzo Maffione 	.recv_disable = netmap_recv_disable,
6440ff7076bSVincenzo Maffione 	.get_cap = netmap_get_cap,
6450ff7076bSVincenzo Maffione 	.set_cap = netmap_set_cap,
6460ff7076bSVincenzo Maffione };
6470ff7076bSVincenzo Maffione 
6480ff7076bSVincenzo Maffione DATA_SET(net_backend_set, netmap_backend);
6490ff7076bSVincenzo Maffione DATA_SET(net_backend_set, vale_backend);
6500ff7076bSVincenzo Maffione 
6510ff7076bSVincenzo Maffione /*
6520ff7076bSVincenzo Maffione  * Initialize a backend and attach to the frontend.
6530ff7076bSVincenzo Maffione  * This is called during frontend initialization.
6540ff7076bSVincenzo Maffione  *  @pbe is a pointer to the backend to be initialized
6550ff7076bSVincenzo Maffione  *  @devname is the backend-name as supplied on the command line,
6560ff7076bSVincenzo Maffione  * 	e.g. -s 2:0,frontend-name,backend-name[,other-args]
6570ff7076bSVincenzo Maffione  *  @cb is the receive callback supplied by the frontend,
6580ff7076bSVincenzo Maffione  *	and it is invoked in the event loop when a receive
6590ff7076bSVincenzo Maffione  *	event is generated in the hypervisor,
6600ff7076bSVincenzo Maffione  *  @param is a pointer to the frontend, and normally used as
6610ff7076bSVincenzo Maffione  *	the argument for the callback.
6620ff7076bSVincenzo Maffione  */
6630ff7076bSVincenzo Maffione int
6640ff7076bSVincenzo Maffione netbe_init(struct net_backend **ret, const char *devname, net_be_rxeof_t cb,
6650ff7076bSVincenzo Maffione     void *param)
6660ff7076bSVincenzo Maffione {
6670ff7076bSVincenzo Maffione 	struct net_backend **pbe, *nbe, *tbe = NULL;
6680ff7076bSVincenzo Maffione 	int err;
6690ff7076bSVincenzo Maffione 
6700ff7076bSVincenzo Maffione 	/*
6710ff7076bSVincenzo Maffione 	 * Find the network backend that matches the user-provided
6720ff7076bSVincenzo Maffione 	 * device name. net_backend_set is built using a linker set.
6730ff7076bSVincenzo Maffione 	 */
6740ff7076bSVincenzo Maffione 	SET_FOREACH(pbe, net_backend_set) {
6750ff7076bSVincenzo Maffione 		if (strncmp(devname, (*pbe)->prefix,
6760ff7076bSVincenzo Maffione 		    strlen((*pbe)->prefix)) == 0) {
6770ff7076bSVincenzo Maffione 			tbe = *pbe;
6780ff7076bSVincenzo Maffione 			assert(tbe->init != NULL);
6790ff7076bSVincenzo Maffione 			assert(tbe->cleanup != NULL);
6800ff7076bSVincenzo Maffione 			assert(tbe->send != NULL);
6810ff7076bSVincenzo Maffione 			assert(tbe->recv != NULL);
6820ff7076bSVincenzo Maffione 			assert(tbe->get_cap != NULL);
6830ff7076bSVincenzo Maffione 			assert(tbe->set_cap != NULL);
6840ff7076bSVincenzo Maffione 			break;
6850ff7076bSVincenzo Maffione 		}
6860ff7076bSVincenzo Maffione 	}
6870ff7076bSVincenzo Maffione 
6880ff7076bSVincenzo Maffione 	*ret = NULL;
6890ff7076bSVincenzo Maffione 	if (tbe == NULL)
6900ff7076bSVincenzo Maffione 		return (EINVAL);
6910ff7076bSVincenzo Maffione 	nbe = calloc(1, sizeof(*nbe) + tbe->priv_size);
6920ff7076bSVincenzo Maffione 	*nbe = *tbe;	/* copy the template */
6930ff7076bSVincenzo Maffione 	nbe->fd = -1;
6940ff7076bSVincenzo Maffione 	nbe->sc = param;
6950ff7076bSVincenzo Maffione 	nbe->be_vnet_hdr_len = 0;
6960ff7076bSVincenzo Maffione 	nbe->fe_vnet_hdr_len = 0;
6970ff7076bSVincenzo Maffione 
6980ff7076bSVincenzo Maffione 	/* Initialize the backend. */
6990ff7076bSVincenzo Maffione 	err = nbe->init(nbe, devname, cb, param);
7000ff7076bSVincenzo Maffione 	if (err) {
7010ff7076bSVincenzo Maffione 		free(nbe);
7020ff7076bSVincenzo Maffione 		return (err);
7030ff7076bSVincenzo Maffione 	}
7040ff7076bSVincenzo Maffione 
7050ff7076bSVincenzo Maffione 	*ret = nbe;
7060ff7076bSVincenzo Maffione 
7070ff7076bSVincenzo Maffione 	return (0);
7080ff7076bSVincenzo Maffione }
7090ff7076bSVincenzo Maffione 
7100ff7076bSVincenzo Maffione void
7110ff7076bSVincenzo Maffione netbe_cleanup(struct net_backend *be)
7120ff7076bSVincenzo Maffione {
7130ff7076bSVincenzo Maffione 
7140ff7076bSVincenzo Maffione 	if (be != NULL) {
7150ff7076bSVincenzo Maffione 		be->cleanup(be);
7160ff7076bSVincenzo Maffione 		free(be);
7170ff7076bSVincenzo Maffione 	}
7180ff7076bSVincenzo Maffione }
7190ff7076bSVincenzo Maffione 
7200ff7076bSVincenzo Maffione uint64_t
7210ff7076bSVincenzo Maffione netbe_get_cap(struct net_backend *be)
7220ff7076bSVincenzo Maffione {
7230ff7076bSVincenzo Maffione 
7240ff7076bSVincenzo Maffione 	assert(be != NULL);
7250ff7076bSVincenzo Maffione 	return (be->get_cap(be));
7260ff7076bSVincenzo Maffione }
7270ff7076bSVincenzo Maffione 
7280ff7076bSVincenzo Maffione int
7290ff7076bSVincenzo Maffione netbe_set_cap(struct net_backend *be, uint64_t features,
7300ff7076bSVincenzo Maffione 	      unsigned vnet_hdr_len)
7310ff7076bSVincenzo Maffione {
7320ff7076bSVincenzo Maffione 	int ret;
7330ff7076bSVincenzo Maffione 
7340ff7076bSVincenzo Maffione 	assert(be != NULL);
7350ff7076bSVincenzo Maffione 
7360ff7076bSVincenzo Maffione 	/* There are only three valid lengths, i.e., 0, 10 and 12. */
7370ff7076bSVincenzo Maffione 	if (vnet_hdr_len && vnet_hdr_len != VNET_HDR_LEN
7380ff7076bSVincenzo Maffione 		&& vnet_hdr_len != (VNET_HDR_LEN - sizeof(uint16_t)))
7390ff7076bSVincenzo Maffione 		return (-1);
7400ff7076bSVincenzo Maffione 
7410ff7076bSVincenzo Maffione 	be->fe_vnet_hdr_len = vnet_hdr_len;
7420ff7076bSVincenzo Maffione 
7430ff7076bSVincenzo Maffione 	ret = be->set_cap(be, features, vnet_hdr_len);
7440ff7076bSVincenzo Maffione 	assert(be->be_vnet_hdr_len == 0 ||
7450ff7076bSVincenzo Maffione 	       be->be_vnet_hdr_len == be->fe_vnet_hdr_len);
7460ff7076bSVincenzo Maffione 
7470ff7076bSVincenzo Maffione 	return (ret);
7480ff7076bSVincenzo Maffione }
7490ff7076bSVincenzo Maffione 
7500ff7076bSVincenzo Maffione static __inline struct iovec *
7510ff7076bSVincenzo Maffione iov_trim(struct iovec *iov, int *iovcnt, unsigned int tlen)
7520ff7076bSVincenzo Maffione {
7530ff7076bSVincenzo Maffione 	struct iovec *riov;
7540ff7076bSVincenzo Maffione 
7550ff7076bSVincenzo Maffione 	/* XXX short-cut: assume first segment is >= tlen */
7560ff7076bSVincenzo Maffione 	assert(iov[0].iov_len >= tlen);
7570ff7076bSVincenzo Maffione 
7580ff7076bSVincenzo Maffione 	iov[0].iov_len -= tlen;
7590ff7076bSVincenzo Maffione 	if (iov[0].iov_len == 0) {
7600ff7076bSVincenzo Maffione 		assert(*iovcnt > 1);
7610ff7076bSVincenzo Maffione 		*iovcnt -= 1;
7620ff7076bSVincenzo Maffione 		riov = &iov[1];
7630ff7076bSVincenzo Maffione 	} else {
7640ff7076bSVincenzo Maffione 		iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + tlen);
7650ff7076bSVincenzo Maffione 		riov = &iov[0];
7660ff7076bSVincenzo Maffione 	}
7670ff7076bSVincenzo Maffione 
7680ff7076bSVincenzo Maffione 	return (riov);
7690ff7076bSVincenzo Maffione }
7700ff7076bSVincenzo Maffione 
7710ff7076bSVincenzo Maffione ssize_t
7720ff7076bSVincenzo Maffione netbe_send(struct net_backend *be, struct iovec *iov, int iovcnt)
7730ff7076bSVincenzo Maffione {
7740ff7076bSVincenzo Maffione 
7750ff7076bSVincenzo Maffione 	assert(be != NULL);
7760ff7076bSVincenzo Maffione 	if (be->be_vnet_hdr_len != be->fe_vnet_hdr_len) {
7770ff7076bSVincenzo Maffione 		/*
7780ff7076bSVincenzo Maffione 		 * The frontend uses a virtio-net header, but the backend
7790ff7076bSVincenzo Maffione 		 * does not. We ignore it (as it must be all zeroes) and
7800ff7076bSVincenzo Maffione 		 * strip it.
7810ff7076bSVincenzo Maffione 		 */
7820ff7076bSVincenzo Maffione 		assert(be->be_vnet_hdr_len == 0);
7830ff7076bSVincenzo Maffione 		iov = iov_trim(iov, &iovcnt, be->fe_vnet_hdr_len);
7840ff7076bSVincenzo Maffione 	}
7850ff7076bSVincenzo Maffione 
7860ff7076bSVincenzo Maffione 	return (be->send(be, iov, iovcnt));
7870ff7076bSVincenzo Maffione }
7880ff7076bSVincenzo Maffione 
7890ff7076bSVincenzo Maffione /*
7900ff7076bSVincenzo Maffione  * Try to read a packet from the backend, without blocking.
7910ff7076bSVincenzo Maffione  * If no packets are available, return 0. In case of success, return
7920ff7076bSVincenzo Maffione  * the length of the packet just read. Return -1 in case of errors.
7930ff7076bSVincenzo Maffione  */
7940ff7076bSVincenzo Maffione ssize_t
7950ff7076bSVincenzo Maffione netbe_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
7960ff7076bSVincenzo Maffione {
7970ff7076bSVincenzo Maffione 	/* Length of prepended virtio-net header. */
7980ff7076bSVincenzo Maffione 	unsigned int hlen = be->fe_vnet_hdr_len;
7990ff7076bSVincenzo Maffione 	int ret;
8000ff7076bSVincenzo Maffione 
8010ff7076bSVincenzo Maffione 	assert(be != NULL);
8020ff7076bSVincenzo Maffione 
8030ff7076bSVincenzo Maffione 	if (hlen && hlen != be->be_vnet_hdr_len) {
8040ff7076bSVincenzo Maffione 		/*
8050ff7076bSVincenzo Maffione 		 * The frontend uses a virtio-net header, but the backend
8060ff7076bSVincenzo Maffione 		 * does not. We need to prepend a zeroed header.
8070ff7076bSVincenzo Maffione 		 */
8080ff7076bSVincenzo Maffione 		struct virtio_net_rxhdr *vh;
8090ff7076bSVincenzo Maffione 
8100ff7076bSVincenzo Maffione 		assert(be->be_vnet_hdr_len == 0);
8110ff7076bSVincenzo Maffione 
8120ff7076bSVincenzo Maffione 		/*
8130ff7076bSVincenzo Maffione 		 * Get a pointer to the rx header, and use the
8140ff7076bSVincenzo Maffione 		 * data immediately following it for the packet buffer.
8150ff7076bSVincenzo Maffione 		 */
8160ff7076bSVincenzo Maffione 		vh = iov[0].iov_base;
8170ff7076bSVincenzo Maffione 		iov = iov_trim(iov, &iovcnt, hlen);
8180ff7076bSVincenzo Maffione 
8190ff7076bSVincenzo Maffione 		/*
8200ff7076bSVincenzo Maffione 		 * The only valid field in the rx packet header is the
8210ff7076bSVincenzo Maffione 		 * number of buffers if merged rx bufs were negotiated.
8220ff7076bSVincenzo Maffione 		 */
8230ff7076bSVincenzo Maffione 		memset(vh, 0, hlen);
8240ff7076bSVincenzo Maffione 		if (hlen == VNET_HDR_LEN) {
8250ff7076bSVincenzo Maffione 			vh->vrh_bufs = 1;
8260ff7076bSVincenzo Maffione 		}
8270ff7076bSVincenzo Maffione 	}
8280ff7076bSVincenzo Maffione 
8290ff7076bSVincenzo Maffione 	ret = be->recv(be, iov, iovcnt);
8300ff7076bSVincenzo Maffione 	if (ret > 0) {
8310ff7076bSVincenzo Maffione 		ret += hlen;
8320ff7076bSVincenzo Maffione 	}
8330ff7076bSVincenzo Maffione 
8340ff7076bSVincenzo Maffione 	return (ret);
8350ff7076bSVincenzo Maffione }
8360ff7076bSVincenzo Maffione 
8370ff7076bSVincenzo Maffione /*
8380ff7076bSVincenzo Maffione  * Read a packet from the backend and discard it.
8390ff7076bSVincenzo Maffione  * Returns the size of the discarded packet or zero if no packet was available.
8400ff7076bSVincenzo Maffione  * A negative error code is returned in case of read error.
8410ff7076bSVincenzo Maffione  */
8420ff7076bSVincenzo Maffione ssize_t
8430ff7076bSVincenzo Maffione netbe_rx_discard(struct net_backend *be)
8440ff7076bSVincenzo Maffione {
8450ff7076bSVincenzo Maffione 	/*
8460ff7076bSVincenzo Maffione 	 * MP note: the dummybuf is only used to discard frames,
8470ff7076bSVincenzo Maffione 	 * so there is no need for it to be per-vtnet or locked.
8480ff7076bSVincenzo Maffione 	 * We only make it large enough for TSO-sized segment.
8490ff7076bSVincenzo Maffione 	 */
8500ff7076bSVincenzo Maffione 	static uint8_t dummybuf[65536 + 64];
8510ff7076bSVincenzo Maffione 	struct iovec iov;
8520ff7076bSVincenzo Maffione 
8530ff7076bSVincenzo Maffione 	iov.iov_base = dummybuf;
8540ff7076bSVincenzo Maffione 	iov.iov_len = sizeof(dummybuf);
8550ff7076bSVincenzo Maffione 
8560ff7076bSVincenzo Maffione 	return netbe_recv(be, &iov, 1);
8570ff7076bSVincenzo Maffione }
8580ff7076bSVincenzo Maffione 
859*d12c5ef6SVincenzo Maffione void
860*d12c5ef6SVincenzo Maffione netbe_rx_disable(struct net_backend *be)
861*d12c5ef6SVincenzo Maffione {
862*d12c5ef6SVincenzo Maffione 
863*d12c5ef6SVincenzo Maffione 	return be->recv_enable(be);
864*d12c5ef6SVincenzo Maffione }
865*d12c5ef6SVincenzo Maffione 
866*d12c5ef6SVincenzo Maffione void
867*d12c5ef6SVincenzo Maffione netbe_rx_enable(struct net_backend *be)
868*d12c5ef6SVincenzo Maffione {
869*d12c5ef6SVincenzo Maffione 
870*d12c5ef6SVincenzo Maffione 	return be->recv_disable(be);
871*d12c5ef6SVincenzo Maffione }
872