xref: /freebsd/usr.sbin/bhyve/net_backends.c (revision 2d5fe36980f2bb5666e85d1a433ea7e38d78092a)
10ff7076bSVincenzo Maffione /*-
290db4ba9SVincenzo Maffione  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
390db4ba9SVincenzo Maffione  *
40ff7076bSVincenzo Maffione  * Copyright (c) 2019 Vincenzo Maffione <vmaffione@FreeBSD.org>
50ff7076bSVincenzo Maffione  *
60ff7076bSVincenzo Maffione  * Redistribution and use in source and binary forms, with or without
70ff7076bSVincenzo Maffione  * modification, are permitted provided that the following conditions
80ff7076bSVincenzo Maffione  * are met:
90ff7076bSVincenzo Maffione  * 1. Redistributions of source code must retain the above copyright
100ff7076bSVincenzo Maffione  *    notice, this list of conditions and the following disclaimer.
110ff7076bSVincenzo Maffione  * 2. Redistributions in binary form must reproduce the above copyright
120ff7076bSVincenzo Maffione  *    notice, this list of conditions and the following disclaimer in the
130ff7076bSVincenzo Maffione  *    documentation and/or other materials provided with the distribution.
140ff7076bSVincenzo Maffione  *
150ff7076bSVincenzo Maffione  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
160ff7076bSVincenzo Maffione  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
170ff7076bSVincenzo Maffione  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
180ff7076bSVincenzo Maffione  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
190ff7076bSVincenzo Maffione  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
200ff7076bSVincenzo Maffione  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
210ff7076bSVincenzo Maffione  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
220ff7076bSVincenzo Maffione  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
230ff7076bSVincenzo Maffione  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
240ff7076bSVincenzo Maffione  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
250ff7076bSVincenzo Maffione  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
260ff7076bSVincenzo Maffione  *
270ff7076bSVincenzo Maffione  * $FreeBSD$
280ff7076bSVincenzo Maffione  */
290ff7076bSVincenzo Maffione 
300ff7076bSVincenzo Maffione /*
310ff7076bSVincenzo Maffione  * This file implements multiple network backends (tap, netmap, ...),
320ff7076bSVincenzo Maffione  * to be used by network frontends such as virtio-net and e1000.
330ff7076bSVincenzo Maffione  * The API to access the backend (e.g. send/receive packets, negotiate
340ff7076bSVincenzo Maffione  * features) is exported by net_backends.h.
350ff7076bSVincenzo Maffione  */
360ff7076bSVincenzo Maffione 
378cd0c1acSVincenzo Maffione #include <sys/cdefs.h>
388cd0c1acSVincenzo Maffione __FBSDID("$FreeBSD$");
398cd0c1acSVincenzo Maffione 
400ff7076bSVincenzo Maffione #include <sys/types.h>		/* u_short etc */
410ff7076bSVincenzo Maffione #ifndef WITHOUT_CAPSICUM
420ff7076bSVincenzo Maffione #include <sys/capsicum.h>
430ff7076bSVincenzo Maffione #endif
440ff7076bSVincenzo Maffione #include <sys/ioctl.h>
450ff7076bSVincenzo Maffione #include <sys/mman.h>
460ff7076bSVincenzo Maffione #include <sys/uio.h>
470ff7076bSVincenzo Maffione 
480ff7076bSVincenzo Maffione #include <net/if.h>
490ff7076bSVincenzo Maffione #include <net/netmap.h>
500ff7076bSVincenzo Maffione #include <net/netmap_virt.h>
510ff7076bSVincenzo Maffione #define NETMAP_WITH_LIBS
520ff7076bSVincenzo Maffione #include <net/netmap_user.h>
530ff7076bSVincenzo Maffione 
540ff7076bSVincenzo Maffione #ifndef WITHOUT_CAPSICUM
550ff7076bSVincenzo Maffione #include <capsicum_helpers.h>
560ff7076bSVincenzo Maffione #endif
570ff7076bSVincenzo Maffione #include <err.h>
580ff7076bSVincenzo Maffione #include <errno.h>
590ff7076bSVincenzo Maffione #include <fcntl.h>
600ff7076bSVincenzo Maffione #include <stdio.h>
610ff7076bSVincenzo Maffione #include <stdlib.h>
620ff7076bSVincenzo Maffione #include <stdint.h>
630ff7076bSVincenzo Maffione #include <string.h>
640ff7076bSVincenzo Maffione #include <unistd.h>
650ff7076bSVincenzo Maffione #include <sysexits.h>
660ff7076bSVincenzo Maffione #include <assert.h>
670ff7076bSVincenzo Maffione #include <pthread.h>
680ff7076bSVincenzo Maffione #include <pthread_np.h>
690ff7076bSVincenzo Maffione #include <poll.h>
700ff7076bSVincenzo Maffione #include <assert.h>
710ff7076bSVincenzo Maffione 
720ff7076bSVincenzo Maffione 
730ff7076bSVincenzo Maffione #include "iov.h"
740ff7076bSVincenzo Maffione #include "mevent.h"
750ff7076bSVincenzo Maffione #include "net_backends.h"
760ff7076bSVincenzo Maffione 
770ff7076bSVincenzo Maffione #include <sys/linker_set.h>
780ff7076bSVincenzo Maffione 
790ff7076bSVincenzo Maffione /*
800ff7076bSVincenzo Maffione  * Each network backend registers a set of function pointers that are
810ff7076bSVincenzo Maffione  * used to implement the net backends API.
820ff7076bSVincenzo Maffione  * This might need to be exposed if we implement backends in separate files.
830ff7076bSVincenzo Maffione  */
840ff7076bSVincenzo Maffione struct net_backend {
850ff7076bSVincenzo Maffione 	const char *prefix;	/* prefix matching this backend */
860ff7076bSVincenzo Maffione 
870ff7076bSVincenzo Maffione 	/*
880ff7076bSVincenzo Maffione 	 * Routines used to initialize and cleanup the resources needed
890ff7076bSVincenzo Maffione 	 * by a backend. The cleanup function is used internally,
900ff7076bSVincenzo Maffione 	 * and should not be called by the frontend.
910ff7076bSVincenzo Maffione 	 */
920ff7076bSVincenzo Maffione 	int (*init)(struct net_backend *be, const char *devname,
930ff7076bSVincenzo Maffione 	    net_be_rxeof_t cb, void *param);
940ff7076bSVincenzo Maffione 	void (*cleanup)(struct net_backend *be);
950ff7076bSVincenzo Maffione 
960ff7076bSVincenzo Maffione 	/*
970ff7076bSVincenzo Maffione 	 * Called to serve a guest transmit request. The scatter-gather
980ff7076bSVincenzo Maffione 	 * vector provided by the caller has 'iovcnt' elements and contains
990ff7076bSVincenzo Maffione 	 * the packet to send.
1000ff7076bSVincenzo Maffione 	 */
1010ff7076bSVincenzo Maffione 	ssize_t (*send)(struct net_backend *be, struct iovec *iov, int iovcnt);
1020ff7076bSVincenzo Maffione 
1030ff7076bSVincenzo Maffione 	/*
1040ff7076bSVincenzo Maffione 	 * Called to receive a packet from the backend. When the function
1050ff7076bSVincenzo Maffione 	 * returns a positive value 'len', the scatter-gather vector
1060ff7076bSVincenzo Maffione 	 * provided by the caller contains a packet with such length.
1070ff7076bSVincenzo Maffione 	 * The function returns 0 if the backend doesn't have a new packet to
1080ff7076bSVincenzo Maffione 	 * receive.
1090ff7076bSVincenzo Maffione 	 */
1100ff7076bSVincenzo Maffione 	ssize_t (*recv)(struct net_backend *be, struct iovec *iov, int iovcnt);
1110ff7076bSVincenzo Maffione 
1120ff7076bSVincenzo Maffione 	/*
1130ff7076bSVincenzo Maffione 	 * Ask the backend for the virtio-net features it is able to
1140ff7076bSVincenzo Maffione 	 * support. Possible features are TSO, UFO and checksum offloading
1150ff7076bSVincenzo Maffione 	 * in both rx and tx direction and for both IPv4 and IPv6.
1160ff7076bSVincenzo Maffione 	 */
1170ff7076bSVincenzo Maffione 	uint64_t (*get_cap)(struct net_backend *be);
1180ff7076bSVincenzo Maffione 
1190ff7076bSVincenzo Maffione 	/*
1200ff7076bSVincenzo Maffione 	 * Tell the backend to enable/disable the specified virtio-net
1210ff7076bSVincenzo Maffione 	 * features (capabilities).
1220ff7076bSVincenzo Maffione 	 */
1230ff7076bSVincenzo Maffione 	int (*set_cap)(struct net_backend *be, uint64_t features,
1240ff7076bSVincenzo Maffione 	    unsigned int vnet_hdr_len);
1250ff7076bSVincenzo Maffione 
1260ff7076bSVincenzo Maffione 	struct pci_vtnet_softc *sc;
1270ff7076bSVincenzo Maffione 	int fd;
1280ff7076bSVincenzo Maffione 
1290ff7076bSVincenzo Maffione 	/*
1300ff7076bSVincenzo Maffione 	 * Length of the virtio-net header used by the backend and the
1310ff7076bSVincenzo Maffione 	 * frontend, respectively. A zero value means that the header
1320ff7076bSVincenzo Maffione 	 * is not used.
1330ff7076bSVincenzo Maffione 	 */
1340ff7076bSVincenzo Maffione 	unsigned int be_vnet_hdr_len;
1350ff7076bSVincenzo Maffione 	unsigned int fe_vnet_hdr_len;
1360ff7076bSVincenzo Maffione 
1370ff7076bSVincenzo Maffione 	/* Size of backend-specific private data. */
1380ff7076bSVincenzo Maffione 	size_t priv_size;
1390ff7076bSVincenzo Maffione 
1400ff7076bSVincenzo Maffione 	/* Room for backend-specific data. */
1410ff7076bSVincenzo Maffione 	char opaque[0];
1420ff7076bSVincenzo Maffione };
1430ff7076bSVincenzo Maffione 
1440ff7076bSVincenzo Maffione SET_DECLARE(net_backend_set, struct net_backend);
1450ff7076bSVincenzo Maffione 
1460ff7076bSVincenzo Maffione #define VNET_HDR_LEN	sizeof(struct virtio_net_rxhdr)
1470ff7076bSVincenzo Maffione 
1480ff7076bSVincenzo Maffione #define WPRINTF(params) printf params
1490ff7076bSVincenzo Maffione 
1500ff7076bSVincenzo Maffione /*
1510ff7076bSVincenzo Maffione  * The tap backend
1520ff7076bSVincenzo Maffione  */
1530ff7076bSVincenzo Maffione 
1540ff7076bSVincenzo Maffione struct tap_priv {
1550ff7076bSVincenzo Maffione 	struct mevent *mevp;
1560ff7076bSVincenzo Maffione };
1570ff7076bSVincenzo Maffione 
1580ff7076bSVincenzo Maffione static void
1590ff7076bSVincenzo Maffione tap_cleanup(struct net_backend *be)
1600ff7076bSVincenzo Maffione {
1610ff7076bSVincenzo Maffione 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
1620ff7076bSVincenzo Maffione 
1630ff7076bSVincenzo Maffione 	if (priv->mevp) {
1640ff7076bSVincenzo Maffione 		mevent_delete(priv->mevp);
1650ff7076bSVincenzo Maffione 	}
1660ff7076bSVincenzo Maffione 	if (be->fd != -1) {
1670ff7076bSVincenzo Maffione 		close(be->fd);
1680ff7076bSVincenzo Maffione 		be->fd = -1;
1690ff7076bSVincenzo Maffione 	}
1700ff7076bSVincenzo Maffione }
1710ff7076bSVincenzo Maffione 
1720ff7076bSVincenzo Maffione static int
1730ff7076bSVincenzo Maffione tap_init(struct net_backend *be, const char *devname,
1740ff7076bSVincenzo Maffione 	 net_be_rxeof_t cb, void *param)
1750ff7076bSVincenzo Maffione {
1760ff7076bSVincenzo Maffione 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
1770ff7076bSVincenzo Maffione 	char tbuf[80];
1780ff7076bSVincenzo Maffione 	int opt = 1;
1790ff7076bSVincenzo Maffione #ifndef WITHOUT_CAPSICUM
1800ff7076bSVincenzo Maffione 	cap_rights_t rights;
1810ff7076bSVincenzo Maffione #endif
1820ff7076bSVincenzo Maffione 
1830ff7076bSVincenzo Maffione 	if (cb == NULL) {
1840ff7076bSVincenzo Maffione 		WPRINTF(("TAP backend requires non-NULL callback\n"));
1850ff7076bSVincenzo Maffione 		return (-1);
1860ff7076bSVincenzo Maffione 	}
1870ff7076bSVincenzo Maffione 
1880ff7076bSVincenzo Maffione 	strcpy(tbuf, "/dev/");
1890ff7076bSVincenzo Maffione 	strlcat(tbuf, devname, sizeof(tbuf));
1900ff7076bSVincenzo Maffione 
191*2d5fe369SSean Chittenden 	be->fd = open(tbuf, O_RDWR);
192*2d5fe369SSean Chittenden 	if (be->fd == -1) {
1930ff7076bSVincenzo Maffione 		WPRINTF(("open of tap device %s failed\n", tbuf));
1940ff7076bSVincenzo Maffione 		goto error;
1950ff7076bSVincenzo Maffione 	}
1960ff7076bSVincenzo Maffione 
1970ff7076bSVincenzo Maffione 	/*
1980ff7076bSVincenzo Maffione 	 * Set non-blocking and register for read
1990ff7076bSVincenzo Maffione 	 * notifications with the event loop
2000ff7076bSVincenzo Maffione 	 */
201*2d5fe369SSean Chittenden 	if (ioctl(be->fd, FIONBIO, &opt) < 0) {
2020ff7076bSVincenzo Maffione 		WPRINTF(("tap device O_NONBLOCK failed\n"));
2030ff7076bSVincenzo Maffione 		goto error;
2040ff7076bSVincenzo Maffione 	}
2050ff7076bSVincenzo Maffione 
2060ff7076bSVincenzo Maffione #ifndef WITHOUT_CAPSICUM
2070ff7076bSVincenzo Maffione 	cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
208*2d5fe369SSean Chittenden 	if (caph_rights_limit(be->fd, &rights) == -1)
2090ff7076bSVincenzo Maffione 		errx(EX_OSERR, "Unable to apply rights for sandbox");
2100ff7076bSVincenzo Maffione #endif
2110ff7076bSVincenzo Maffione 
212*2d5fe369SSean Chittenden 	priv->mevp = mevent_add(be->fd, EVF_READ, cb, param);
2130ff7076bSVincenzo Maffione 	if (priv->mevp == NULL) {
2140ff7076bSVincenzo Maffione 		WPRINTF(("Could not register event\n"));
2150ff7076bSVincenzo Maffione 		goto error;
2160ff7076bSVincenzo Maffione 	}
2170ff7076bSVincenzo Maffione 
2180ff7076bSVincenzo Maffione 	return (0);
2190ff7076bSVincenzo Maffione 
2200ff7076bSVincenzo Maffione error:
2210ff7076bSVincenzo Maffione 	tap_cleanup(be);
2220ff7076bSVincenzo Maffione 	return (-1);
2230ff7076bSVincenzo Maffione }
2240ff7076bSVincenzo Maffione 
2250ff7076bSVincenzo Maffione /*
2260ff7076bSVincenzo Maffione  * Called to send a buffer chain out to the tap device
2270ff7076bSVincenzo Maffione  */
2280ff7076bSVincenzo Maffione static ssize_t
2290ff7076bSVincenzo Maffione tap_send(struct net_backend *be, struct iovec *iov, int iovcnt)
2300ff7076bSVincenzo Maffione {
2310ff7076bSVincenzo Maffione 	return (writev(be->fd, iov, iovcnt));
2320ff7076bSVincenzo Maffione }
2330ff7076bSVincenzo Maffione 
2340ff7076bSVincenzo Maffione static ssize_t
2350ff7076bSVincenzo Maffione tap_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
2360ff7076bSVincenzo Maffione {
2370ff7076bSVincenzo Maffione 	ssize_t ret;
2380ff7076bSVincenzo Maffione 
2390ff7076bSVincenzo Maffione 	/* Should never be called without a valid tap fd */
2400ff7076bSVincenzo Maffione 	assert(be->fd != -1);
2410ff7076bSVincenzo Maffione 
2420ff7076bSVincenzo Maffione 	ret = readv(be->fd, iov, iovcnt);
2430ff7076bSVincenzo Maffione 
2440ff7076bSVincenzo Maffione 	if (ret < 0 && errno == EWOULDBLOCK) {
2450ff7076bSVincenzo Maffione 		return (0);
2460ff7076bSVincenzo Maffione 	}
2470ff7076bSVincenzo Maffione 
2480ff7076bSVincenzo Maffione 	return (ret);
2490ff7076bSVincenzo Maffione }
2500ff7076bSVincenzo Maffione 
2510ff7076bSVincenzo Maffione static uint64_t
2520ff7076bSVincenzo Maffione tap_get_cap(struct net_backend *be)
2530ff7076bSVincenzo Maffione {
2540ff7076bSVincenzo Maffione 
2550ff7076bSVincenzo Maffione 	return (0); /* no capabilities for now */
2560ff7076bSVincenzo Maffione }
2570ff7076bSVincenzo Maffione 
2580ff7076bSVincenzo Maffione static int
2590ff7076bSVincenzo Maffione tap_set_cap(struct net_backend *be, uint64_t features,
2600ff7076bSVincenzo Maffione 		unsigned vnet_hdr_len)
2610ff7076bSVincenzo Maffione {
2620ff7076bSVincenzo Maffione 
2630ff7076bSVincenzo Maffione 	return ((features || vnet_hdr_len) ? -1 : 0);
2640ff7076bSVincenzo Maffione }
2650ff7076bSVincenzo Maffione 
2660ff7076bSVincenzo Maffione static struct net_backend tap_backend = {
2670ff7076bSVincenzo Maffione 	.prefix = "tap",
2680ff7076bSVincenzo Maffione 	.priv_size = sizeof(struct tap_priv),
2690ff7076bSVincenzo Maffione 	.init = tap_init,
2700ff7076bSVincenzo Maffione 	.cleanup = tap_cleanup,
2710ff7076bSVincenzo Maffione 	.send = tap_send,
2720ff7076bSVincenzo Maffione 	.recv = tap_recv,
2730ff7076bSVincenzo Maffione 	.get_cap = tap_get_cap,
2740ff7076bSVincenzo Maffione 	.set_cap = tap_set_cap,
2750ff7076bSVincenzo Maffione };
2760ff7076bSVincenzo Maffione 
2770ff7076bSVincenzo Maffione /* A clone of the tap backend, with a different prefix. */
2780ff7076bSVincenzo Maffione static struct net_backend vmnet_backend = {
2790ff7076bSVincenzo Maffione 	.prefix = "vmnet",
2800ff7076bSVincenzo Maffione 	.priv_size = sizeof(struct tap_priv),
2810ff7076bSVincenzo Maffione 	.init = tap_init,
2820ff7076bSVincenzo Maffione 	.cleanup = tap_cleanup,
2830ff7076bSVincenzo Maffione 	.send = tap_send,
2840ff7076bSVincenzo Maffione 	.recv = tap_recv,
2850ff7076bSVincenzo Maffione 	.get_cap = tap_get_cap,
2860ff7076bSVincenzo Maffione 	.set_cap = tap_set_cap,
2870ff7076bSVincenzo Maffione };
2880ff7076bSVincenzo Maffione 
2890ff7076bSVincenzo Maffione DATA_SET(net_backend_set, tap_backend);
2900ff7076bSVincenzo Maffione DATA_SET(net_backend_set, vmnet_backend);
2910ff7076bSVincenzo Maffione 
2920ff7076bSVincenzo Maffione /*
2930ff7076bSVincenzo Maffione  * The netmap backend
2940ff7076bSVincenzo Maffione  */
2950ff7076bSVincenzo Maffione 
2960ff7076bSVincenzo Maffione /* The virtio-net features supported by netmap. */
2970ff7076bSVincenzo Maffione #define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \
2980ff7076bSVincenzo Maffione 		VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \
2990ff7076bSVincenzo Maffione 		VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \
3000ff7076bSVincenzo Maffione 		VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO)
3010ff7076bSVincenzo Maffione 
3020ff7076bSVincenzo Maffione struct netmap_priv {
3030ff7076bSVincenzo Maffione 	char ifname[IFNAMSIZ];
3040ff7076bSVincenzo Maffione 	struct nm_desc *nmd;
3050ff7076bSVincenzo Maffione 	uint16_t memid;
3060ff7076bSVincenzo Maffione 	struct netmap_ring *rx;
3070ff7076bSVincenzo Maffione 	struct netmap_ring *tx;
3080ff7076bSVincenzo Maffione 	struct mevent *mevp;
3090ff7076bSVincenzo Maffione 	net_be_rxeof_t cb;
3100ff7076bSVincenzo Maffione 	void *cb_param;
3110ff7076bSVincenzo Maffione };
3120ff7076bSVincenzo Maffione 
3130ff7076bSVincenzo Maffione static void
3140ff7076bSVincenzo Maffione nmreq_init(struct nmreq *req, char *ifname)
3150ff7076bSVincenzo Maffione {
3160ff7076bSVincenzo Maffione 
3170ff7076bSVincenzo Maffione 	memset(req, 0, sizeof(*req));
3180ff7076bSVincenzo Maffione 	strlcpy(req->nr_name, ifname, sizeof(req->nr_name));
3190ff7076bSVincenzo Maffione 	req->nr_version = NETMAP_API;
3200ff7076bSVincenzo Maffione }
3210ff7076bSVincenzo Maffione 
3220ff7076bSVincenzo Maffione static int
3230ff7076bSVincenzo Maffione netmap_set_vnet_hdr_len(struct net_backend *be, int vnet_hdr_len)
3240ff7076bSVincenzo Maffione {
3250ff7076bSVincenzo Maffione 	int err;
3260ff7076bSVincenzo Maffione 	struct nmreq req;
3270ff7076bSVincenzo Maffione 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
3280ff7076bSVincenzo Maffione 
3290ff7076bSVincenzo Maffione 	nmreq_init(&req, priv->ifname);
3300ff7076bSVincenzo Maffione 	req.nr_cmd = NETMAP_BDG_VNET_HDR;
3310ff7076bSVincenzo Maffione 	req.nr_arg1 = vnet_hdr_len;
3320ff7076bSVincenzo Maffione 	err = ioctl(be->fd, NIOCREGIF, &req);
3330ff7076bSVincenzo Maffione 	if (err) {
3340ff7076bSVincenzo Maffione 		WPRINTF(("Unable to set vnet header length %d\n",
3350ff7076bSVincenzo Maffione 				vnet_hdr_len));
3360ff7076bSVincenzo Maffione 		return (err);
3370ff7076bSVincenzo Maffione 	}
3380ff7076bSVincenzo Maffione 
3390ff7076bSVincenzo Maffione 	be->be_vnet_hdr_len = vnet_hdr_len;
3400ff7076bSVincenzo Maffione 
3410ff7076bSVincenzo Maffione 	return (0);
3420ff7076bSVincenzo Maffione }
3430ff7076bSVincenzo Maffione 
3440ff7076bSVincenzo Maffione static int
3450ff7076bSVincenzo Maffione netmap_has_vnet_hdr_len(struct net_backend *be, unsigned vnet_hdr_len)
3460ff7076bSVincenzo Maffione {
3470ff7076bSVincenzo Maffione 	int prev_hdr_len = be->be_vnet_hdr_len;
3480ff7076bSVincenzo Maffione 	int ret;
3490ff7076bSVincenzo Maffione 
3500ff7076bSVincenzo Maffione 	if (vnet_hdr_len == prev_hdr_len) {
3510ff7076bSVincenzo Maffione 		return (1);
3520ff7076bSVincenzo Maffione 	}
3530ff7076bSVincenzo Maffione 
3540ff7076bSVincenzo Maffione 	ret = netmap_set_vnet_hdr_len(be, vnet_hdr_len);
3550ff7076bSVincenzo Maffione 	if (ret) {
3560ff7076bSVincenzo Maffione 		return (0);
3570ff7076bSVincenzo Maffione 	}
3580ff7076bSVincenzo Maffione 
3590ff7076bSVincenzo Maffione 	netmap_set_vnet_hdr_len(be, prev_hdr_len);
3600ff7076bSVincenzo Maffione 
3610ff7076bSVincenzo Maffione 	return (1);
3620ff7076bSVincenzo Maffione }
3630ff7076bSVincenzo Maffione 
3640ff7076bSVincenzo Maffione static uint64_t
3650ff7076bSVincenzo Maffione netmap_get_cap(struct net_backend *be)
3660ff7076bSVincenzo Maffione {
3670ff7076bSVincenzo Maffione 
3680ff7076bSVincenzo Maffione 	return (netmap_has_vnet_hdr_len(be, VNET_HDR_LEN) ?
3690ff7076bSVincenzo Maffione 	    NETMAP_FEATURES : 0);
3700ff7076bSVincenzo Maffione }
3710ff7076bSVincenzo Maffione 
3720ff7076bSVincenzo Maffione static int
3730ff7076bSVincenzo Maffione netmap_set_cap(struct net_backend *be, uint64_t features,
3740ff7076bSVincenzo Maffione 	       unsigned vnet_hdr_len)
3750ff7076bSVincenzo Maffione {
3760ff7076bSVincenzo Maffione 
3770ff7076bSVincenzo Maffione 	return (netmap_set_vnet_hdr_len(be, vnet_hdr_len));
3780ff7076bSVincenzo Maffione }
3790ff7076bSVincenzo Maffione 
3800ff7076bSVincenzo Maffione static int
3810ff7076bSVincenzo Maffione netmap_init(struct net_backend *be, const char *devname,
3820ff7076bSVincenzo Maffione 	    net_be_rxeof_t cb, void *param)
3830ff7076bSVincenzo Maffione {
3840ff7076bSVincenzo Maffione 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
3850ff7076bSVincenzo Maffione 
3860ff7076bSVincenzo Maffione 	strlcpy(priv->ifname, devname, sizeof(priv->ifname));
3870ff7076bSVincenzo Maffione 	priv->ifname[sizeof(priv->ifname) - 1] = '\0';
3880ff7076bSVincenzo Maffione 
3890ff7076bSVincenzo Maffione 	priv->nmd = nm_open(priv->ifname, NULL, NETMAP_NO_TX_POLL, NULL);
3900ff7076bSVincenzo Maffione 	if (priv->nmd == NULL) {
3910ff7076bSVincenzo Maffione 		WPRINTF(("Unable to nm_open(): interface '%s', errno (%s)\n",
3920ff7076bSVincenzo Maffione 			devname, strerror(errno)));
3930ff7076bSVincenzo Maffione 		free(priv);
3940ff7076bSVincenzo Maffione 		return (-1);
3950ff7076bSVincenzo Maffione 	}
3960ff7076bSVincenzo Maffione 
3970ff7076bSVincenzo Maffione 	priv->memid = priv->nmd->req.nr_arg2;
3980ff7076bSVincenzo Maffione 	priv->tx = NETMAP_TXRING(priv->nmd->nifp, 0);
3990ff7076bSVincenzo Maffione 	priv->rx = NETMAP_RXRING(priv->nmd->nifp, 0);
4000ff7076bSVincenzo Maffione 	priv->cb = cb;
4010ff7076bSVincenzo Maffione 	priv->cb_param = param;
4020ff7076bSVincenzo Maffione 	be->fd = priv->nmd->fd;
4030ff7076bSVincenzo Maffione 
4040ff7076bSVincenzo Maffione 	priv->mevp = mevent_add(be->fd, EVF_READ, cb, param);
4050ff7076bSVincenzo Maffione 	if (priv->mevp == NULL) {
4060ff7076bSVincenzo Maffione 		WPRINTF(("Could not register event\n"));
4070ff7076bSVincenzo Maffione 		return (-1);
4080ff7076bSVincenzo Maffione 	}
4090ff7076bSVincenzo Maffione 
4100ff7076bSVincenzo Maffione 	return (0);
4110ff7076bSVincenzo Maffione }
4120ff7076bSVincenzo Maffione 
4130ff7076bSVincenzo Maffione static void
4140ff7076bSVincenzo Maffione netmap_cleanup(struct net_backend *be)
4150ff7076bSVincenzo Maffione {
4160ff7076bSVincenzo Maffione 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
4170ff7076bSVincenzo Maffione 
4180ff7076bSVincenzo Maffione 	if (priv->mevp) {
4190ff7076bSVincenzo Maffione 		mevent_delete(priv->mevp);
4200ff7076bSVincenzo Maffione 	}
4210ff7076bSVincenzo Maffione 	if (priv->nmd) {
4220ff7076bSVincenzo Maffione 		nm_close(priv->nmd);
4230ff7076bSVincenzo Maffione 	}
4240ff7076bSVincenzo Maffione 	be->fd = -1;
4250ff7076bSVincenzo Maffione }
4260ff7076bSVincenzo Maffione 
4270ff7076bSVincenzo Maffione static ssize_t
4280ff7076bSVincenzo Maffione netmap_send(struct net_backend *be, struct iovec *iov,
4290ff7076bSVincenzo Maffione 	    int iovcnt)
4300ff7076bSVincenzo Maffione {
4310ff7076bSVincenzo Maffione 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
4320ff7076bSVincenzo Maffione 	struct netmap_ring *ring;
4330ff7076bSVincenzo Maffione 	ssize_t totlen = 0;
4340ff7076bSVincenzo Maffione 	int nm_buf_size;
4350ff7076bSVincenzo Maffione 	int nm_buf_len;
4360ff7076bSVincenzo Maffione 	uint32_t head;
4370ff7076bSVincenzo Maffione 	void *nm_buf;
4380ff7076bSVincenzo Maffione 	int j;
4390ff7076bSVincenzo Maffione 
4400ff7076bSVincenzo Maffione 	ring = priv->tx;
4410ff7076bSVincenzo Maffione 	head = ring->head;
4420ff7076bSVincenzo Maffione 	if (head == ring->tail) {
4430ff7076bSVincenzo Maffione 		WPRINTF(("No space, drop %zu bytes\n", count_iov(iov, iovcnt)));
4440ff7076bSVincenzo Maffione 		goto txsync;
4450ff7076bSVincenzo Maffione 	}
4460ff7076bSVincenzo Maffione 	nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
4470ff7076bSVincenzo Maffione 	nm_buf_size = ring->nr_buf_size;
4480ff7076bSVincenzo Maffione 	nm_buf_len = 0;
4490ff7076bSVincenzo Maffione 
4500ff7076bSVincenzo Maffione 	for (j = 0; j < iovcnt; j++) {
4510ff7076bSVincenzo Maffione 		int iov_frag_size = iov[j].iov_len;
4520ff7076bSVincenzo Maffione 		void *iov_frag_buf = iov[j].iov_base;
4530ff7076bSVincenzo Maffione 
4540ff7076bSVincenzo Maffione 		totlen += iov_frag_size;
4550ff7076bSVincenzo Maffione 
4560ff7076bSVincenzo Maffione 		/*
4570ff7076bSVincenzo Maffione 		 * Split each iovec fragment over more netmap slots, if
4580ff7076bSVincenzo Maffione 		 * necessary.
4590ff7076bSVincenzo Maffione 		 */
4600ff7076bSVincenzo Maffione 		for (;;) {
4610ff7076bSVincenzo Maffione 			int copylen;
4620ff7076bSVincenzo Maffione 
4630ff7076bSVincenzo Maffione 			copylen = iov_frag_size < nm_buf_size ? iov_frag_size : nm_buf_size;
4640ff7076bSVincenzo Maffione 			memcpy(nm_buf, iov_frag_buf, copylen);
4650ff7076bSVincenzo Maffione 
4660ff7076bSVincenzo Maffione 			iov_frag_buf += copylen;
4670ff7076bSVincenzo Maffione 			iov_frag_size -= copylen;
4680ff7076bSVincenzo Maffione 			nm_buf += copylen;
4690ff7076bSVincenzo Maffione 			nm_buf_size -= copylen;
4700ff7076bSVincenzo Maffione 			nm_buf_len += copylen;
4710ff7076bSVincenzo Maffione 
4720ff7076bSVincenzo Maffione 			if (iov_frag_size == 0) {
4730ff7076bSVincenzo Maffione 				break;
4740ff7076bSVincenzo Maffione 			}
4750ff7076bSVincenzo Maffione 
4760ff7076bSVincenzo Maffione 			ring->slot[head].len = nm_buf_len;
4770ff7076bSVincenzo Maffione 			ring->slot[head].flags = NS_MOREFRAG;
4780ff7076bSVincenzo Maffione 			head = nm_ring_next(ring, head);
4790ff7076bSVincenzo Maffione 			if (head == ring->tail) {
4800ff7076bSVincenzo Maffione 				/*
4810ff7076bSVincenzo Maffione 				 * We ran out of netmap slots while
4820ff7076bSVincenzo Maffione 				 * splitting the iovec fragments.
4830ff7076bSVincenzo Maffione 				 */
4840ff7076bSVincenzo Maffione 				WPRINTF(("No space, drop %zu bytes\n",
4850ff7076bSVincenzo Maffione 				   count_iov(iov, iovcnt)));
4860ff7076bSVincenzo Maffione 				goto txsync;
4870ff7076bSVincenzo Maffione 			}
4880ff7076bSVincenzo Maffione 			nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
4890ff7076bSVincenzo Maffione 			nm_buf_size = ring->nr_buf_size;
4900ff7076bSVincenzo Maffione 			nm_buf_len = 0;
4910ff7076bSVincenzo Maffione 		}
4920ff7076bSVincenzo Maffione 	}
4930ff7076bSVincenzo Maffione 
4940ff7076bSVincenzo Maffione 	/* Complete the last slot, which must not have NS_MOREFRAG set. */
4950ff7076bSVincenzo Maffione 	ring->slot[head].len = nm_buf_len;
4960ff7076bSVincenzo Maffione 	ring->slot[head].flags = 0;
4970ff7076bSVincenzo Maffione 	head = nm_ring_next(ring, head);
4980ff7076bSVincenzo Maffione 
4990ff7076bSVincenzo Maffione 	/* Now update ring->head and ring->cur. */
5000ff7076bSVincenzo Maffione 	ring->head = ring->cur = head;
5010ff7076bSVincenzo Maffione txsync:
5020ff7076bSVincenzo Maffione 	ioctl(be->fd, NIOCTXSYNC, NULL);
5030ff7076bSVincenzo Maffione 
5040ff7076bSVincenzo Maffione 	return (totlen);
5050ff7076bSVincenzo Maffione }
5060ff7076bSVincenzo Maffione 
5070ff7076bSVincenzo Maffione static ssize_t
5080ff7076bSVincenzo Maffione netmap_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
5090ff7076bSVincenzo Maffione {
5100ff7076bSVincenzo Maffione 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
5110ff7076bSVincenzo Maffione 	struct netmap_slot *slot = NULL;
5120ff7076bSVincenzo Maffione 	struct netmap_ring *ring;
5130ff7076bSVincenzo Maffione 	void *iov_frag_buf;
5140ff7076bSVincenzo Maffione 	int iov_frag_size;
5150ff7076bSVincenzo Maffione 	ssize_t totlen = 0;
5160ff7076bSVincenzo Maffione 	uint32_t head;
5170ff7076bSVincenzo Maffione 
5180ff7076bSVincenzo Maffione 	assert(iovcnt);
5190ff7076bSVincenzo Maffione 
5200ff7076bSVincenzo Maffione 	ring = priv->rx;
5210ff7076bSVincenzo Maffione 	head = ring->head;
5220ff7076bSVincenzo Maffione 	iov_frag_buf = iov->iov_base;
5230ff7076bSVincenzo Maffione 	iov_frag_size = iov->iov_len;
5240ff7076bSVincenzo Maffione 
5250ff7076bSVincenzo Maffione 	do {
5260ff7076bSVincenzo Maffione 		int nm_buf_len;
5270ff7076bSVincenzo Maffione 		void *nm_buf;
5280ff7076bSVincenzo Maffione 
5290ff7076bSVincenzo Maffione 		if (head == ring->tail) {
5300ff7076bSVincenzo Maffione 			return (0);
5310ff7076bSVincenzo Maffione 		}
5320ff7076bSVincenzo Maffione 
5330ff7076bSVincenzo Maffione 		slot = ring->slot + head;
5340ff7076bSVincenzo Maffione 		nm_buf = NETMAP_BUF(ring, slot->buf_idx);
5350ff7076bSVincenzo Maffione 		nm_buf_len = slot->len;
5360ff7076bSVincenzo Maffione 
5370ff7076bSVincenzo Maffione 		for (;;) {
5380ff7076bSVincenzo Maffione 			int copylen = nm_buf_len < iov_frag_size ?
5390ff7076bSVincenzo Maffione 			    nm_buf_len : iov_frag_size;
5400ff7076bSVincenzo Maffione 
5410ff7076bSVincenzo Maffione 			memcpy(iov_frag_buf, nm_buf, copylen);
5420ff7076bSVincenzo Maffione 			nm_buf += copylen;
5430ff7076bSVincenzo Maffione 			nm_buf_len -= copylen;
5440ff7076bSVincenzo Maffione 			iov_frag_buf += copylen;
5450ff7076bSVincenzo Maffione 			iov_frag_size -= copylen;
5460ff7076bSVincenzo Maffione 			totlen += copylen;
5470ff7076bSVincenzo Maffione 
5480ff7076bSVincenzo Maffione 			if (nm_buf_len == 0) {
5490ff7076bSVincenzo Maffione 				break;
5500ff7076bSVincenzo Maffione 			}
5510ff7076bSVincenzo Maffione 
5520ff7076bSVincenzo Maffione 			iov++;
5530ff7076bSVincenzo Maffione 			iovcnt--;
5540ff7076bSVincenzo Maffione 			if (iovcnt == 0) {
5550ff7076bSVincenzo Maffione 				/* No space to receive. */
5560ff7076bSVincenzo Maffione 				WPRINTF(("Short iov, drop %zd bytes\n",
5570ff7076bSVincenzo Maffione 				    totlen));
5580ff7076bSVincenzo Maffione 				return (-ENOSPC);
5590ff7076bSVincenzo Maffione 			}
5600ff7076bSVincenzo Maffione 			iov_frag_buf = iov->iov_base;
5610ff7076bSVincenzo Maffione 			iov_frag_size = iov->iov_len;
5620ff7076bSVincenzo Maffione 		}
5630ff7076bSVincenzo Maffione 
5640ff7076bSVincenzo Maffione 		head = nm_ring_next(ring, head);
5650ff7076bSVincenzo Maffione 
5660ff7076bSVincenzo Maffione 	} while (slot->flags & NS_MOREFRAG);
5670ff7076bSVincenzo Maffione 
5680ff7076bSVincenzo Maffione 	/* Release slots to netmap. */
5690ff7076bSVincenzo Maffione 	ring->head = ring->cur = head;
5700ff7076bSVincenzo Maffione 
5710ff7076bSVincenzo Maffione 	return (totlen);
5720ff7076bSVincenzo Maffione }
5730ff7076bSVincenzo Maffione 
5740ff7076bSVincenzo Maffione static struct net_backend netmap_backend = {
5750ff7076bSVincenzo Maffione 	.prefix = "netmap",
5760ff7076bSVincenzo Maffione 	.priv_size = sizeof(struct netmap_priv),
5770ff7076bSVincenzo Maffione 	.init = netmap_init,
5780ff7076bSVincenzo Maffione 	.cleanup = netmap_cleanup,
5790ff7076bSVincenzo Maffione 	.send = netmap_send,
5800ff7076bSVincenzo Maffione 	.recv = netmap_recv,
5810ff7076bSVincenzo Maffione 	.get_cap = netmap_get_cap,
5820ff7076bSVincenzo Maffione 	.set_cap = netmap_set_cap,
5830ff7076bSVincenzo Maffione };
5840ff7076bSVincenzo Maffione 
5850ff7076bSVincenzo Maffione /* A clone of the netmap backend, with a different prefix. */
5860ff7076bSVincenzo Maffione static struct net_backend vale_backend = {
5870ff7076bSVincenzo Maffione 	.prefix = "vale",
5880ff7076bSVincenzo Maffione 	.priv_size = sizeof(struct netmap_priv),
5890ff7076bSVincenzo Maffione 	.init = netmap_init,
5900ff7076bSVincenzo Maffione 	.cleanup = netmap_cleanup,
5910ff7076bSVincenzo Maffione 	.send = netmap_send,
5920ff7076bSVincenzo Maffione 	.recv = netmap_recv,
5930ff7076bSVincenzo Maffione 	.get_cap = netmap_get_cap,
5940ff7076bSVincenzo Maffione 	.set_cap = netmap_set_cap,
5950ff7076bSVincenzo Maffione };
5960ff7076bSVincenzo Maffione 
5970ff7076bSVincenzo Maffione DATA_SET(net_backend_set, netmap_backend);
5980ff7076bSVincenzo Maffione DATA_SET(net_backend_set, vale_backend);
5990ff7076bSVincenzo Maffione 
6000ff7076bSVincenzo Maffione /*
6010ff7076bSVincenzo Maffione  * Initialize a backend and attach to the frontend.
6020ff7076bSVincenzo Maffione  * This is called during frontend initialization.
6030ff7076bSVincenzo Maffione  *  @pbe is a pointer to the backend to be initialized
6040ff7076bSVincenzo Maffione  *  @devname is the backend-name as supplied on the command line,
6050ff7076bSVincenzo Maffione  * 	e.g. -s 2:0,frontend-name,backend-name[,other-args]
6060ff7076bSVincenzo Maffione  *  @cb is the receive callback supplied by the frontend,
6070ff7076bSVincenzo Maffione  *	and it is invoked in the event loop when a receive
6080ff7076bSVincenzo Maffione  *	event is generated in the hypervisor,
6090ff7076bSVincenzo Maffione  *  @param is a pointer to the frontend, and normally used as
6100ff7076bSVincenzo Maffione  *	the argument for the callback.
6110ff7076bSVincenzo Maffione  */
6120ff7076bSVincenzo Maffione int
6130ff7076bSVincenzo Maffione netbe_init(struct net_backend **ret, const char *devname, net_be_rxeof_t cb,
6140ff7076bSVincenzo Maffione     void *param)
6150ff7076bSVincenzo Maffione {
6160ff7076bSVincenzo Maffione 	struct net_backend **pbe, *nbe, *tbe = NULL;
6170ff7076bSVincenzo Maffione 	int err;
6180ff7076bSVincenzo Maffione 
6190ff7076bSVincenzo Maffione 	/*
6200ff7076bSVincenzo Maffione 	 * Find the network backend that matches the user-provided
6210ff7076bSVincenzo Maffione 	 * device name. net_backend_set is built using a linker set.
6220ff7076bSVincenzo Maffione 	 */
6230ff7076bSVincenzo Maffione 	SET_FOREACH(pbe, net_backend_set) {
6240ff7076bSVincenzo Maffione 		if (strncmp(devname, (*pbe)->prefix,
6250ff7076bSVincenzo Maffione 		    strlen((*pbe)->prefix)) == 0) {
6260ff7076bSVincenzo Maffione 			tbe = *pbe;
6270ff7076bSVincenzo Maffione 			assert(tbe->init != NULL);
6280ff7076bSVincenzo Maffione 			assert(tbe->cleanup != NULL);
6290ff7076bSVincenzo Maffione 			assert(tbe->send != NULL);
6300ff7076bSVincenzo Maffione 			assert(tbe->recv != NULL);
6310ff7076bSVincenzo Maffione 			assert(tbe->get_cap != NULL);
6320ff7076bSVincenzo Maffione 			assert(tbe->set_cap != NULL);
6330ff7076bSVincenzo Maffione 			break;
6340ff7076bSVincenzo Maffione 		}
6350ff7076bSVincenzo Maffione 	}
6360ff7076bSVincenzo Maffione 
6370ff7076bSVincenzo Maffione 	*ret = NULL;
6380ff7076bSVincenzo Maffione 	if (tbe == NULL)
6390ff7076bSVincenzo Maffione 		return (EINVAL);
6400ff7076bSVincenzo Maffione 	nbe = calloc(1, sizeof(*nbe) + tbe->priv_size);
6410ff7076bSVincenzo Maffione 	*nbe = *tbe;	/* copy the template */
6420ff7076bSVincenzo Maffione 	nbe->fd = -1;
6430ff7076bSVincenzo Maffione 	nbe->sc = param;
6440ff7076bSVincenzo Maffione 	nbe->be_vnet_hdr_len = 0;
6450ff7076bSVincenzo Maffione 	nbe->fe_vnet_hdr_len = 0;
6460ff7076bSVincenzo Maffione 
6470ff7076bSVincenzo Maffione 	/* Initialize the backend. */
6480ff7076bSVincenzo Maffione 	err = nbe->init(nbe, devname, cb, param);
6490ff7076bSVincenzo Maffione 	if (err) {
6500ff7076bSVincenzo Maffione 		free(nbe);
6510ff7076bSVincenzo Maffione 		return (err);
6520ff7076bSVincenzo Maffione 	}
6530ff7076bSVincenzo Maffione 
6540ff7076bSVincenzo Maffione 	*ret = nbe;
6550ff7076bSVincenzo Maffione 
6560ff7076bSVincenzo Maffione 	return (0);
6570ff7076bSVincenzo Maffione }
6580ff7076bSVincenzo Maffione 
6590ff7076bSVincenzo Maffione void
6600ff7076bSVincenzo Maffione netbe_cleanup(struct net_backend *be)
6610ff7076bSVincenzo Maffione {
6620ff7076bSVincenzo Maffione 
6630ff7076bSVincenzo Maffione 	if (be != NULL) {
6640ff7076bSVincenzo Maffione 		be->cleanup(be);
6650ff7076bSVincenzo Maffione 		free(be);
6660ff7076bSVincenzo Maffione 	}
6670ff7076bSVincenzo Maffione }
6680ff7076bSVincenzo Maffione 
6690ff7076bSVincenzo Maffione uint64_t
6700ff7076bSVincenzo Maffione netbe_get_cap(struct net_backend *be)
6710ff7076bSVincenzo Maffione {
6720ff7076bSVincenzo Maffione 
6730ff7076bSVincenzo Maffione 	assert(be != NULL);
6740ff7076bSVincenzo Maffione 	return (be->get_cap(be));
6750ff7076bSVincenzo Maffione }
6760ff7076bSVincenzo Maffione 
6770ff7076bSVincenzo Maffione int
6780ff7076bSVincenzo Maffione netbe_set_cap(struct net_backend *be, uint64_t features,
6790ff7076bSVincenzo Maffione 	      unsigned vnet_hdr_len)
6800ff7076bSVincenzo Maffione {
6810ff7076bSVincenzo Maffione 	int ret;
6820ff7076bSVincenzo Maffione 
6830ff7076bSVincenzo Maffione 	assert(be != NULL);
6840ff7076bSVincenzo Maffione 
6850ff7076bSVincenzo Maffione 	/* There are only three valid lengths, i.e., 0, 10 and 12. */
6860ff7076bSVincenzo Maffione 	if (vnet_hdr_len && vnet_hdr_len != VNET_HDR_LEN
6870ff7076bSVincenzo Maffione 		&& vnet_hdr_len != (VNET_HDR_LEN - sizeof(uint16_t)))
6880ff7076bSVincenzo Maffione 		return (-1);
6890ff7076bSVincenzo Maffione 
6900ff7076bSVincenzo Maffione 	be->fe_vnet_hdr_len = vnet_hdr_len;
6910ff7076bSVincenzo Maffione 
6920ff7076bSVincenzo Maffione 	ret = be->set_cap(be, features, vnet_hdr_len);
6930ff7076bSVincenzo Maffione 	assert(be->be_vnet_hdr_len == 0 ||
6940ff7076bSVincenzo Maffione 	       be->be_vnet_hdr_len == be->fe_vnet_hdr_len);
6950ff7076bSVincenzo Maffione 
6960ff7076bSVincenzo Maffione 	return (ret);
6970ff7076bSVincenzo Maffione }
6980ff7076bSVincenzo Maffione 
6990ff7076bSVincenzo Maffione static __inline struct iovec *
7000ff7076bSVincenzo Maffione iov_trim(struct iovec *iov, int *iovcnt, unsigned int tlen)
7010ff7076bSVincenzo Maffione {
7020ff7076bSVincenzo Maffione 	struct iovec *riov;
7030ff7076bSVincenzo Maffione 
7040ff7076bSVincenzo Maffione 	/* XXX short-cut: assume first segment is >= tlen */
7050ff7076bSVincenzo Maffione 	assert(iov[0].iov_len >= tlen);
7060ff7076bSVincenzo Maffione 
7070ff7076bSVincenzo Maffione 	iov[0].iov_len -= tlen;
7080ff7076bSVincenzo Maffione 	if (iov[0].iov_len == 0) {
7090ff7076bSVincenzo Maffione 		assert(*iovcnt > 1);
7100ff7076bSVincenzo Maffione 		*iovcnt -= 1;
7110ff7076bSVincenzo Maffione 		riov = &iov[1];
7120ff7076bSVincenzo Maffione 	} else {
7130ff7076bSVincenzo Maffione 		iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + tlen);
7140ff7076bSVincenzo Maffione 		riov = &iov[0];
7150ff7076bSVincenzo Maffione 	}
7160ff7076bSVincenzo Maffione 
7170ff7076bSVincenzo Maffione 	return (riov);
7180ff7076bSVincenzo Maffione }
7190ff7076bSVincenzo Maffione 
7200ff7076bSVincenzo Maffione ssize_t
7210ff7076bSVincenzo Maffione netbe_send(struct net_backend *be, struct iovec *iov, int iovcnt)
7220ff7076bSVincenzo Maffione {
7230ff7076bSVincenzo Maffione 
7240ff7076bSVincenzo Maffione 	assert(be != NULL);
7250ff7076bSVincenzo Maffione 	if (be->be_vnet_hdr_len != be->fe_vnet_hdr_len) {
7260ff7076bSVincenzo Maffione 		/*
7270ff7076bSVincenzo Maffione 		 * The frontend uses a virtio-net header, but the backend
7280ff7076bSVincenzo Maffione 		 * does not. We ignore it (as it must be all zeroes) and
7290ff7076bSVincenzo Maffione 		 * strip it.
7300ff7076bSVincenzo Maffione 		 */
7310ff7076bSVincenzo Maffione 		assert(be->be_vnet_hdr_len == 0);
7320ff7076bSVincenzo Maffione 		iov = iov_trim(iov, &iovcnt, be->fe_vnet_hdr_len);
7330ff7076bSVincenzo Maffione 	}
7340ff7076bSVincenzo Maffione 
7350ff7076bSVincenzo Maffione 	return (be->send(be, iov, iovcnt));
7360ff7076bSVincenzo Maffione }
7370ff7076bSVincenzo Maffione 
7380ff7076bSVincenzo Maffione /*
7390ff7076bSVincenzo Maffione  * Try to read a packet from the backend, without blocking.
7400ff7076bSVincenzo Maffione  * If no packets are available, return 0. In case of success, return
7410ff7076bSVincenzo Maffione  * the length of the packet just read. Return -1 in case of errors.
7420ff7076bSVincenzo Maffione  */
7430ff7076bSVincenzo Maffione ssize_t
7440ff7076bSVincenzo Maffione netbe_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
7450ff7076bSVincenzo Maffione {
7460ff7076bSVincenzo Maffione 	/* Length of prepended virtio-net header. */
7470ff7076bSVincenzo Maffione 	unsigned int hlen = be->fe_vnet_hdr_len;
7480ff7076bSVincenzo Maffione 	int ret;
7490ff7076bSVincenzo Maffione 
7500ff7076bSVincenzo Maffione 	assert(be != NULL);
7510ff7076bSVincenzo Maffione 
7520ff7076bSVincenzo Maffione 	if (hlen && hlen != be->be_vnet_hdr_len) {
7530ff7076bSVincenzo Maffione 		/*
7540ff7076bSVincenzo Maffione 		 * The frontend uses a virtio-net header, but the backend
7550ff7076bSVincenzo Maffione 		 * does not. We need to prepend a zeroed header.
7560ff7076bSVincenzo Maffione 		 */
7570ff7076bSVincenzo Maffione 		struct virtio_net_rxhdr *vh;
7580ff7076bSVincenzo Maffione 
7590ff7076bSVincenzo Maffione 		assert(be->be_vnet_hdr_len == 0);
7600ff7076bSVincenzo Maffione 
7610ff7076bSVincenzo Maffione 		/*
7620ff7076bSVincenzo Maffione 		 * Get a pointer to the rx header, and use the
7630ff7076bSVincenzo Maffione 		 * data immediately following it for the packet buffer.
7640ff7076bSVincenzo Maffione 		 */
7650ff7076bSVincenzo Maffione 		vh = iov[0].iov_base;
7660ff7076bSVincenzo Maffione 		iov = iov_trim(iov, &iovcnt, hlen);
7670ff7076bSVincenzo Maffione 
7680ff7076bSVincenzo Maffione 		/*
7690ff7076bSVincenzo Maffione 		 * The only valid field in the rx packet header is the
7700ff7076bSVincenzo Maffione 		 * number of buffers if merged rx bufs were negotiated.
7710ff7076bSVincenzo Maffione 		 */
7720ff7076bSVincenzo Maffione 		memset(vh, 0, hlen);
7730ff7076bSVincenzo Maffione 		if (hlen == VNET_HDR_LEN) {
7740ff7076bSVincenzo Maffione 			vh->vrh_bufs = 1;
7750ff7076bSVincenzo Maffione 		}
7760ff7076bSVincenzo Maffione 	}
7770ff7076bSVincenzo Maffione 
7780ff7076bSVincenzo Maffione 	ret = be->recv(be, iov, iovcnt);
7790ff7076bSVincenzo Maffione 	if (ret > 0) {
7800ff7076bSVincenzo Maffione 		ret += hlen;
7810ff7076bSVincenzo Maffione 	}
7820ff7076bSVincenzo Maffione 
7830ff7076bSVincenzo Maffione 	return (ret);
7840ff7076bSVincenzo Maffione }
7850ff7076bSVincenzo Maffione 
7860ff7076bSVincenzo Maffione /*
7870ff7076bSVincenzo Maffione  * Read a packet from the backend and discard it.
7880ff7076bSVincenzo Maffione  * Returns the size of the discarded packet or zero if no packet was available.
7890ff7076bSVincenzo Maffione  * A negative error code is returned in case of read error.
7900ff7076bSVincenzo Maffione  */
7910ff7076bSVincenzo Maffione ssize_t
7920ff7076bSVincenzo Maffione netbe_rx_discard(struct net_backend *be)
7930ff7076bSVincenzo Maffione {
7940ff7076bSVincenzo Maffione 	/*
7950ff7076bSVincenzo Maffione 	 * MP note: the dummybuf is only used to discard frames,
7960ff7076bSVincenzo Maffione 	 * so there is no need for it to be per-vtnet or locked.
7970ff7076bSVincenzo Maffione 	 * We only make it large enough for TSO-sized segment.
7980ff7076bSVincenzo Maffione 	 */
7990ff7076bSVincenzo Maffione 	static uint8_t dummybuf[65536 + 64];
8000ff7076bSVincenzo Maffione 	struct iovec iov;
8010ff7076bSVincenzo Maffione 
8020ff7076bSVincenzo Maffione 	iov.iov_base = dummybuf;
8030ff7076bSVincenzo Maffione 	iov.iov_len = sizeof(dummybuf);
8040ff7076bSVincenzo Maffione 
8050ff7076bSVincenzo Maffione 	return netbe_recv(be, &iov, 1);
8060ff7076bSVincenzo Maffione }
8070ff7076bSVincenzo Maffione 
808