xref: /freebsd/usr.sbin/bhyve/net_backends.c (revision 0ff7076bdbc6dae5ea44c0acdb567e1cede199d1)
1*0ff7076bSVincenzo Maffione /*-
2*0ff7076bSVincenzo Maffione  * Copyright (c) 2019 Vincenzo Maffione <vmaffione@FreeBSD.org>
3*0ff7076bSVincenzo Maffione  *
4*0ff7076bSVincenzo Maffione  * Redistribution and use in source and binary forms, with or without
5*0ff7076bSVincenzo Maffione  * modification, are permitted provided that the following conditions
6*0ff7076bSVincenzo Maffione  * are met:
7*0ff7076bSVincenzo Maffione  * 1. Redistributions of source code must retain the above copyright
8*0ff7076bSVincenzo Maffione  *    notice, this list of conditions and the following disclaimer.
9*0ff7076bSVincenzo Maffione  * 2. Redistributions in binary form must reproduce the above copyright
10*0ff7076bSVincenzo Maffione  *    notice, this list of conditions and the following disclaimer in the
11*0ff7076bSVincenzo Maffione  *    documentation and/or other materials provided with the distribution.
12*0ff7076bSVincenzo Maffione  *
13*0ff7076bSVincenzo Maffione  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
14*0ff7076bSVincenzo Maffione  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15*0ff7076bSVincenzo Maffione  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16*0ff7076bSVincenzo Maffione  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
17*0ff7076bSVincenzo Maffione  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
18*0ff7076bSVincenzo Maffione  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
19*0ff7076bSVincenzo Maffione  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
20*0ff7076bSVincenzo Maffione  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
21*0ff7076bSVincenzo Maffione  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
22*0ff7076bSVincenzo Maffione  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
23*0ff7076bSVincenzo Maffione  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24*0ff7076bSVincenzo Maffione  *
25*0ff7076bSVincenzo Maffione  * $FreeBSD$
26*0ff7076bSVincenzo Maffione  */
27*0ff7076bSVincenzo Maffione 
28*0ff7076bSVincenzo Maffione /*
29*0ff7076bSVincenzo Maffione  * This file implements multiple network backends (tap, netmap, ...),
30*0ff7076bSVincenzo Maffione  * to be used by network frontends such as virtio-net and e1000.
31*0ff7076bSVincenzo Maffione  * The API to access the backend (e.g. send/receive packets, negotiate
32*0ff7076bSVincenzo Maffione  * features) is exported by net_backends.h.
33*0ff7076bSVincenzo Maffione  */
34*0ff7076bSVincenzo Maffione 
35*0ff7076bSVincenzo Maffione #include <sys/types.h>		/* u_short etc */
36*0ff7076bSVincenzo Maffione #ifndef WITHOUT_CAPSICUM
37*0ff7076bSVincenzo Maffione #include <sys/capsicum.h>
38*0ff7076bSVincenzo Maffione #endif
39*0ff7076bSVincenzo Maffione #include <sys/cdefs.h>
40*0ff7076bSVincenzo Maffione #include <sys/ioctl.h>
41*0ff7076bSVincenzo Maffione #include <sys/mman.h>
42*0ff7076bSVincenzo Maffione #include <sys/uio.h>
43*0ff7076bSVincenzo Maffione 
44*0ff7076bSVincenzo Maffione #include <net/if.h>
45*0ff7076bSVincenzo Maffione #include <net/netmap.h>
46*0ff7076bSVincenzo Maffione #include <net/netmap_virt.h>
47*0ff7076bSVincenzo Maffione #define NETMAP_WITH_LIBS
48*0ff7076bSVincenzo Maffione #include <net/netmap_user.h>
49*0ff7076bSVincenzo Maffione 
50*0ff7076bSVincenzo Maffione #ifndef WITHOUT_CAPSICUM
51*0ff7076bSVincenzo Maffione #include <capsicum_helpers.h>
52*0ff7076bSVincenzo Maffione #endif
53*0ff7076bSVincenzo Maffione #include <err.h>
54*0ff7076bSVincenzo Maffione #include <errno.h>
55*0ff7076bSVincenzo Maffione #include <fcntl.h>
56*0ff7076bSVincenzo Maffione #include <stdio.h>
57*0ff7076bSVincenzo Maffione #include <stdlib.h>
58*0ff7076bSVincenzo Maffione #include <stdint.h>
59*0ff7076bSVincenzo Maffione #include <string.h>
60*0ff7076bSVincenzo Maffione #include <unistd.h>
61*0ff7076bSVincenzo Maffione #include <sysexits.h>
62*0ff7076bSVincenzo Maffione #include <assert.h>
63*0ff7076bSVincenzo Maffione #include <pthread.h>
64*0ff7076bSVincenzo Maffione #include <pthread_np.h>
65*0ff7076bSVincenzo Maffione #include <poll.h>
66*0ff7076bSVincenzo Maffione #include <assert.h>
67*0ff7076bSVincenzo Maffione 
68*0ff7076bSVincenzo Maffione 
69*0ff7076bSVincenzo Maffione #include "iov.h"
70*0ff7076bSVincenzo Maffione #include "mevent.h"
71*0ff7076bSVincenzo Maffione #include "net_backends.h"
72*0ff7076bSVincenzo Maffione 
73*0ff7076bSVincenzo Maffione #include <sys/linker_set.h>
74*0ff7076bSVincenzo Maffione 
75*0ff7076bSVincenzo Maffione /*
76*0ff7076bSVincenzo Maffione  * Each network backend registers a set of function pointers that are
77*0ff7076bSVincenzo Maffione  * used to implement the net backends API.
78*0ff7076bSVincenzo Maffione  * This might need to be exposed if we implement backends in separate files.
79*0ff7076bSVincenzo Maffione  */
80*0ff7076bSVincenzo Maffione struct net_backend {
81*0ff7076bSVincenzo Maffione 	const char *prefix;	/* prefix matching this backend */
82*0ff7076bSVincenzo Maffione 
83*0ff7076bSVincenzo Maffione 	/*
84*0ff7076bSVincenzo Maffione 	 * Routines used to initialize and cleanup the resources needed
85*0ff7076bSVincenzo Maffione 	 * by a backend. The cleanup function is used internally,
86*0ff7076bSVincenzo Maffione 	 * and should not be called by the frontend.
87*0ff7076bSVincenzo Maffione 	 */
88*0ff7076bSVincenzo Maffione 	int (*init)(struct net_backend *be, const char *devname,
89*0ff7076bSVincenzo Maffione 	    net_be_rxeof_t cb, void *param);
90*0ff7076bSVincenzo Maffione 	void (*cleanup)(struct net_backend *be);
91*0ff7076bSVincenzo Maffione 
92*0ff7076bSVincenzo Maffione 	/*
93*0ff7076bSVincenzo Maffione 	 * Called to serve a guest transmit request. The scatter-gather
94*0ff7076bSVincenzo Maffione 	 * vector provided by the caller has 'iovcnt' elements and contains
95*0ff7076bSVincenzo Maffione 	 * the packet to send.
96*0ff7076bSVincenzo Maffione 	 */
97*0ff7076bSVincenzo Maffione 	ssize_t (*send)(struct net_backend *be, struct iovec *iov, int iovcnt);
98*0ff7076bSVincenzo Maffione 
99*0ff7076bSVincenzo Maffione 	/*
100*0ff7076bSVincenzo Maffione 	 * Called to receive a packet from the backend. When the function
101*0ff7076bSVincenzo Maffione 	 * returns a positive value 'len', the scatter-gather vector
102*0ff7076bSVincenzo Maffione 	 * provided by the caller contains a packet with such length.
103*0ff7076bSVincenzo Maffione 	 * The function returns 0 if the backend doesn't have a new packet to
104*0ff7076bSVincenzo Maffione 	 * receive.
105*0ff7076bSVincenzo Maffione 	 */
106*0ff7076bSVincenzo Maffione 	ssize_t (*recv)(struct net_backend *be, struct iovec *iov, int iovcnt);
107*0ff7076bSVincenzo Maffione 
108*0ff7076bSVincenzo Maffione 	/*
109*0ff7076bSVincenzo Maffione 	 * Ask the backend for the virtio-net features it is able to
110*0ff7076bSVincenzo Maffione 	 * support. Possible features are TSO, UFO and checksum offloading
111*0ff7076bSVincenzo Maffione 	 * in both rx and tx direction and for both IPv4 and IPv6.
112*0ff7076bSVincenzo Maffione 	 */
113*0ff7076bSVincenzo Maffione 	uint64_t (*get_cap)(struct net_backend *be);
114*0ff7076bSVincenzo Maffione 
115*0ff7076bSVincenzo Maffione 	/*
116*0ff7076bSVincenzo Maffione 	 * Tell the backend to enable/disable the specified virtio-net
117*0ff7076bSVincenzo Maffione 	 * features (capabilities).
118*0ff7076bSVincenzo Maffione 	 */
119*0ff7076bSVincenzo Maffione 	int (*set_cap)(struct net_backend *be, uint64_t features,
120*0ff7076bSVincenzo Maffione 	    unsigned int vnet_hdr_len);
121*0ff7076bSVincenzo Maffione 
122*0ff7076bSVincenzo Maffione 	struct pci_vtnet_softc *sc;
123*0ff7076bSVincenzo Maffione 	int fd;
124*0ff7076bSVincenzo Maffione 
125*0ff7076bSVincenzo Maffione 	/*
126*0ff7076bSVincenzo Maffione 	 * Length of the virtio-net header used by the backend and the
127*0ff7076bSVincenzo Maffione 	 * frontend, respectively. A zero value means that the header
128*0ff7076bSVincenzo Maffione 	 * is not used.
129*0ff7076bSVincenzo Maffione 	 */
130*0ff7076bSVincenzo Maffione 	unsigned int be_vnet_hdr_len;
131*0ff7076bSVincenzo Maffione 	unsigned int fe_vnet_hdr_len;
132*0ff7076bSVincenzo Maffione 
133*0ff7076bSVincenzo Maffione 	/* Size of backend-specific private data. */
134*0ff7076bSVincenzo Maffione 	size_t priv_size;
135*0ff7076bSVincenzo Maffione 
136*0ff7076bSVincenzo Maffione 	/* Room for backend-specific data. */
137*0ff7076bSVincenzo Maffione 	char opaque[0];
138*0ff7076bSVincenzo Maffione };
139*0ff7076bSVincenzo Maffione 
140*0ff7076bSVincenzo Maffione SET_DECLARE(net_backend_set, struct net_backend);
141*0ff7076bSVincenzo Maffione 
142*0ff7076bSVincenzo Maffione #define VNET_HDR_LEN	sizeof(struct virtio_net_rxhdr)
143*0ff7076bSVincenzo Maffione 
144*0ff7076bSVincenzo Maffione #define WPRINTF(params) printf params
145*0ff7076bSVincenzo Maffione 
146*0ff7076bSVincenzo Maffione /*
147*0ff7076bSVincenzo Maffione  * The tap backend
148*0ff7076bSVincenzo Maffione  */
149*0ff7076bSVincenzo Maffione 
150*0ff7076bSVincenzo Maffione struct tap_priv {
151*0ff7076bSVincenzo Maffione 	struct mevent *mevp;
152*0ff7076bSVincenzo Maffione };
153*0ff7076bSVincenzo Maffione 
154*0ff7076bSVincenzo Maffione static void
155*0ff7076bSVincenzo Maffione tap_cleanup(struct net_backend *be)
156*0ff7076bSVincenzo Maffione {
157*0ff7076bSVincenzo Maffione 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
158*0ff7076bSVincenzo Maffione 
159*0ff7076bSVincenzo Maffione 	if (priv->mevp) {
160*0ff7076bSVincenzo Maffione 		mevent_delete(priv->mevp);
161*0ff7076bSVincenzo Maffione 	}
162*0ff7076bSVincenzo Maffione 	if (be->fd != -1) {
163*0ff7076bSVincenzo Maffione 		close(be->fd);
164*0ff7076bSVincenzo Maffione 		be->fd = -1;
165*0ff7076bSVincenzo Maffione 	}
166*0ff7076bSVincenzo Maffione }
167*0ff7076bSVincenzo Maffione 
168*0ff7076bSVincenzo Maffione static int
169*0ff7076bSVincenzo Maffione tap_init(struct net_backend *be, const char *devname,
170*0ff7076bSVincenzo Maffione 	 net_be_rxeof_t cb, void *param)
171*0ff7076bSVincenzo Maffione {
172*0ff7076bSVincenzo Maffione 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
173*0ff7076bSVincenzo Maffione 	char tbuf[80];
174*0ff7076bSVincenzo Maffione 	int fd;
175*0ff7076bSVincenzo Maffione 	int opt = 1;
176*0ff7076bSVincenzo Maffione #ifndef WITHOUT_CAPSICUM
177*0ff7076bSVincenzo Maffione 	cap_rights_t rights;
178*0ff7076bSVincenzo Maffione #endif
179*0ff7076bSVincenzo Maffione 
180*0ff7076bSVincenzo Maffione 	if (cb == NULL) {
181*0ff7076bSVincenzo Maffione 		WPRINTF(("TAP backend requires non-NULL callback\n"));
182*0ff7076bSVincenzo Maffione 		return (-1);
183*0ff7076bSVincenzo Maffione 	}
184*0ff7076bSVincenzo Maffione 
185*0ff7076bSVincenzo Maffione 	strcpy(tbuf, "/dev/");
186*0ff7076bSVincenzo Maffione 	strlcat(tbuf, devname, sizeof(tbuf));
187*0ff7076bSVincenzo Maffione 
188*0ff7076bSVincenzo Maffione 	fd = open(tbuf, O_RDWR);
189*0ff7076bSVincenzo Maffione 	if (fd == -1) {
190*0ff7076bSVincenzo Maffione 		WPRINTF(("open of tap device %s failed\n", tbuf));
191*0ff7076bSVincenzo Maffione 		goto error;
192*0ff7076bSVincenzo Maffione 	}
193*0ff7076bSVincenzo Maffione 
194*0ff7076bSVincenzo Maffione 	/*
195*0ff7076bSVincenzo Maffione 	 * Set non-blocking and register for read
196*0ff7076bSVincenzo Maffione 	 * notifications with the event loop
197*0ff7076bSVincenzo Maffione 	 */
198*0ff7076bSVincenzo Maffione 	if (ioctl(fd, FIONBIO, &opt) < 0) {
199*0ff7076bSVincenzo Maffione 		WPRINTF(("tap device O_NONBLOCK failed\n"));
200*0ff7076bSVincenzo Maffione 		goto error;
201*0ff7076bSVincenzo Maffione 	}
202*0ff7076bSVincenzo Maffione 
203*0ff7076bSVincenzo Maffione #ifndef WITHOUT_CAPSICUM
204*0ff7076bSVincenzo Maffione 	cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
205*0ff7076bSVincenzo Maffione 	if (caph_rights_limit(fd, &rights) == -1)
206*0ff7076bSVincenzo Maffione 		errx(EX_OSERR, "Unable to apply rights for sandbox");
207*0ff7076bSVincenzo Maffione #endif
208*0ff7076bSVincenzo Maffione 
209*0ff7076bSVincenzo Maffione 	priv->mevp = mevent_add(fd, EVF_READ, cb, param);
210*0ff7076bSVincenzo Maffione 	if (priv->mevp == NULL) {
211*0ff7076bSVincenzo Maffione 		WPRINTF(("Could not register event\n"));
212*0ff7076bSVincenzo Maffione 		goto error;
213*0ff7076bSVincenzo Maffione 	}
214*0ff7076bSVincenzo Maffione 
215*0ff7076bSVincenzo Maffione 	be->fd = fd;
216*0ff7076bSVincenzo Maffione 
217*0ff7076bSVincenzo Maffione 	return (0);
218*0ff7076bSVincenzo Maffione 
219*0ff7076bSVincenzo Maffione error:
220*0ff7076bSVincenzo Maffione 	tap_cleanup(be);
221*0ff7076bSVincenzo Maffione 	return (-1);
222*0ff7076bSVincenzo Maffione }
223*0ff7076bSVincenzo Maffione 
224*0ff7076bSVincenzo Maffione /*
225*0ff7076bSVincenzo Maffione  * Called to send a buffer chain out to the tap device
226*0ff7076bSVincenzo Maffione  */
227*0ff7076bSVincenzo Maffione static ssize_t
228*0ff7076bSVincenzo Maffione tap_send(struct net_backend *be, struct iovec *iov, int iovcnt)
229*0ff7076bSVincenzo Maffione {
230*0ff7076bSVincenzo Maffione 	return (writev(be->fd, iov, iovcnt));
231*0ff7076bSVincenzo Maffione }
232*0ff7076bSVincenzo Maffione 
233*0ff7076bSVincenzo Maffione static ssize_t
234*0ff7076bSVincenzo Maffione tap_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
235*0ff7076bSVincenzo Maffione {
236*0ff7076bSVincenzo Maffione 	ssize_t ret;
237*0ff7076bSVincenzo Maffione 
238*0ff7076bSVincenzo Maffione 	/* Should never be called without a valid tap fd */
239*0ff7076bSVincenzo Maffione 	assert(be->fd != -1);
240*0ff7076bSVincenzo Maffione 
241*0ff7076bSVincenzo Maffione 	ret = readv(be->fd, iov, iovcnt);
242*0ff7076bSVincenzo Maffione 
243*0ff7076bSVincenzo Maffione 	if (ret < 0 && errno == EWOULDBLOCK) {
244*0ff7076bSVincenzo Maffione 		return (0);
245*0ff7076bSVincenzo Maffione 	}
246*0ff7076bSVincenzo Maffione 
247*0ff7076bSVincenzo Maffione 	return (ret);
248*0ff7076bSVincenzo Maffione }
249*0ff7076bSVincenzo Maffione 
250*0ff7076bSVincenzo Maffione static uint64_t
251*0ff7076bSVincenzo Maffione tap_get_cap(struct net_backend *be)
252*0ff7076bSVincenzo Maffione {
253*0ff7076bSVincenzo Maffione 
254*0ff7076bSVincenzo Maffione 	return (0); /* no capabilities for now */
255*0ff7076bSVincenzo Maffione }
256*0ff7076bSVincenzo Maffione 
257*0ff7076bSVincenzo Maffione static int
258*0ff7076bSVincenzo Maffione tap_set_cap(struct net_backend *be, uint64_t features,
259*0ff7076bSVincenzo Maffione 		unsigned vnet_hdr_len)
260*0ff7076bSVincenzo Maffione {
261*0ff7076bSVincenzo Maffione 
262*0ff7076bSVincenzo Maffione 	return ((features || vnet_hdr_len) ? -1 : 0);
263*0ff7076bSVincenzo Maffione }
264*0ff7076bSVincenzo Maffione 
265*0ff7076bSVincenzo Maffione static struct net_backend tap_backend = {
266*0ff7076bSVincenzo Maffione 	.prefix = "tap",
267*0ff7076bSVincenzo Maffione 	.priv_size = sizeof(struct tap_priv),
268*0ff7076bSVincenzo Maffione 	.init = tap_init,
269*0ff7076bSVincenzo Maffione 	.cleanup = tap_cleanup,
270*0ff7076bSVincenzo Maffione 	.send = tap_send,
271*0ff7076bSVincenzo Maffione 	.recv = tap_recv,
272*0ff7076bSVincenzo Maffione 	.get_cap = tap_get_cap,
273*0ff7076bSVincenzo Maffione 	.set_cap = tap_set_cap,
274*0ff7076bSVincenzo Maffione };
275*0ff7076bSVincenzo Maffione 
276*0ff7076bSVincenzo Maffione /* A clone of the tap backend, with a different prefix. */
277*0ff7076bSVincenzo Maffione static struct net_backend vmnet_backend = {
278*0ff7076bSVincenzo Maffione 	.prefix = "vmnet",
279*0ff7076bSVincenzo Maffione 	.priv_size = sizeof(struct tap_priv),
280*0ff7076bSVincenzo Maffione 	.init = tap_init,
281*0ff7076bSVincenzo Maffione 	.cleanup = tap_cleanup,
282*0ff7076bSVincenzo Maffione 	.send = tap_send,
283*0ff7076bSVincenzo Maffione 	.recv = tap_recv,
284*0ff7076bSVincenzo Maffione 	.get_cap = tap_get_cap,
285*0ff7076bSVincenzo Maffione 	.set_cap = tap_set_cap,
286*0ff7076bSVincenzo Maffione };
287*0ff7076bSVincenzo Maffione 
288*0ff7076bSVincenzo Maffione DATA_SET(net_backend_set, tap_backend);
289*0ff7076bSVincenzo Maffione DATA_SET(net_backend_set, vmnet_backend);
290*0ff7076bSVincenzo Maffione 
291*0ff7076bSVincenzo Maffione /*
292*0ff7076bSVincenzo Maffione  * The netmap backend
293*0ff7076bSVincenzo Maffione  */
294*0ff7076bSVincenzo Maffione 
295*0ff7076bSVincenzo Maffione /* The virtio-net features supported by netmap. */
296*0ff7076bSVincenzo Maffione #define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \
297*0ff7076bSVincenzo Maffione 		VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \
298*0ff7076bSVincenzo Maffione 		VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \
299*0ff7076bSVincenzo Maffione 		VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO)
300*0ff7076bSVincenzo Maffione 
301*0ff7076bSVincenzo Maffione struct netmap_priv {
302*0ff7076bSVincenzo Maffione 	char ifname[IFNAMSIZ];
303*0ff7076bSVincenzo Maffione 	struct nm_desc *nmd;
304*0ff7076bSVincenzo Maffione 	uint16_t memid;
305*0ff7076bSVincenzo Maffione 	struct netmap_ring *rx;
306*0ff7076bSVincenzo Maffione 	struct netmap_ring *tx;
307*0ff7076bSVincenzo Maffione 	struct mevent *mevp;
308*0ff7076bSVincenzo Maffione 	net_be_rxeof_t cb;
309*0ff7076bSVincenzo Maffione 	void *cb_param;
310*0ff7076bSVincenzo Maffione };
311*0ff7076bSVincenzo Maffione 
312*0ff7076bSVincenzo Maffione static void
313*0ff7076bSVincenzo Maffione nmreq_init(struct nmreq *req, char *ifname)
314*0ff7076bSVincenzo Maffione {
315*0ff7076bSVincenzo Maffione 
316*0ff7076bSVincenzo Maffione 	memset(req, 0, sizeof(*req));
317*0ff7076bSVincenzo Maffione 	strlcpy(req->nr_name, ifname, sizeof(req->nr_name));
318*0ff7076bSVincenzo Maffione 	req->nr_version = NETMAP_API;
319*0ff7076bSVincenzo Maffione }
320*0ff7076bSVincenzo Maffione 
321*0ff7076bSVincenzo Maffione static int
322*0ff7076bSVincenzo Maffione netmap_set_vnet_hdr_len(struct net_backend *be, int vnet_hdr_len)
323*0ff7076bSVincenzo Maffione {
324*0ff7076bSVincenzo Maffione 	int err;
325*0ff7076bSVincenzo Maffione 	struct nmreq req;
326*0ff7076bSVincenzo Maffione 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
327*0ff7076bSVincenzo Maffione 
328*0ff7076bSVincenzo Maffione 	nmreq_init(&req, priv->ifname);
329*0ff7076bSVincenzo Maffione 	req.nr_cmd = NETMAP_BDG_VNET_HDR;
330*0ff7076bSVincenzo Maffione 	req.nr_arg1 = vnet_hdr_len;
331*0ff7076bSVincenzo Maffione 	err = ioctl(be->fd, NIOCREGIF, &req);
332*0ff7076bSVincenzo Maffione 	if (err) {
333*0ff7076bSVincenzo Maffione 		WPRINTF(("Unable to set vnet header length %d\n",
334*0ff7076bSVincenzo Maffione 				vnet_hdr_len));
335*0ff7076bSVincenzo Maffione 		return (err);
336*0ff7076bSVincenzo Maffione 	}
337*0ff7076bSVincenzo Maffione 
338*0ff7076bSVincenzo Maffione 	be->be_vnet_hdr_len = vnet_hdr_len;
339*0ff7076bSVincenzo Maffione 
340*0ff7076bSVincenzo Maffione 	return (0);
341*0ff7076bSVincenzo Maffione }
342*0ff7076bSVincenzo Maffione 
343*0ff7076bSVincenzo Maffione static int
344*0ff7076bSVincenzo Maffione netmap_has_vnet_hdr_len(struct net_backend *be, unsigned vnet_hdr_len)
345*0ff7076bSVincenzo Maffione {
346*0ff7076bSVincenzo Maffione 	int prev_hdr_len = be->be_vnet_hdr_len;
347*0ff7076bSVincenzo Maffione 	int ret;
348*0ff7076bSVincenzo Maffione 
349*0ff7076bSVincenzo Maffione 	if (vnet_hdr_len == prev_hdr_len) {
350*0ff7076bSVincenzo Maffione 		return (1);
351*0ff7076bSVincenzo Maffione 	}
352*0ff7076bSVincenzo Maffione 
353*0ff7076bSVincenzo Maffione 	ret = netmap_set_vnet_hdr_len(be, vnet_hdr_len);
354*0ff7076bSVincenzo Maffione 	if (ret) {
355*0ff7076bSVincenzo Maffione 		return (0);
356*0ff7076bSVincenzo Maffione 	}
357*0ff7076bSVincenzo Maffione 
358*0ff7076bSVincenzo Maffione 	netmap_set_vnet_hdr_len(be, prev_hdr_len);
359*0ff7076bSVincenzo Maffione 
360*0ff7076bSVincenzo Maffione 	return (1);
361*0ff7076bSVincenzo Maffione }
362*0ff7076bSVincenzo Maffione 
363*0ff7076bSVincenzo Maffione static uint64_t
364*0ff7076bSVincenzo Maffione netmap_get_cap(struct net_backend *be)
365*0ff7076bSVincenzo Maffione {
366*0ff7076bSVincenzo Maffione 
367*0ff7076bSVincenzo Maffione 	return (netmap_has_vnet_hdr_len(be, VNET_HDR_LEN) ?
368*0ff7076bSVincenzo Maffione 	    NETMAP_FEATURES : 0);
369*0ff7076bSVincenzo Maffione }
370*0ff7076bSVincenzo Maffione 
371*0ff7076bSVincenzo Maffione static int
372*0ff7076bSVincenzo Maffione netmap_set_cap(struct net_backend *be, uint64_t features,
373*0ff7076bSVincenzo Maffione 	       unsigned vnet_hdr_len)
374*0ff7076bSVincenzo Maffione {
375*0ff7076bSVincenzo Maffione 
376*0ff7076bSVincenzo Maffione 	return (netmap_set_vnet_hdr_len(be, vnet_hdr_len));
377*0ff7076bSVincenzo Maffione }
378*0ff7076bSVincenzo Maffione 
379*0ff7076bSVincenzo Maffione static int
380*0ff7076bSVincenzo Maffione netmap_init(struct net_backend *be, const char *devname,
381*0ff7076bSVincenzo Maffione 	    net_be_rxeof_t cb, void *param)
382*0ff7076bSVincenzo Maffione {
383*0ff7076bSVincenzo Maffione 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
384*0ff7076bSVincenzo Maffione 
385*0ff7076bSVincenzo Maffione 	strlcpy(priv->ifname, devname, sizeof(priv->ifname));
386*0ff7076bSVincenzo Maffione 	priv->ifname[sizeof(priv->ifname) - 1] = '\0';
387*0ff7076bSVincenzo Maffione 
388*0ff7076bSVincenzo Maffione 	priv->nmd = nm_open(priv->ifname, NULL, NETMAP_NO_TX_POLL, NULL);
389*0ff7076bSVincenzo Maffione 	if (priv->nmd == NULL) {
390*0ff7076bSVincenzo Maffione 		WPRINTF(("Unable to nm_open(): interface '%s', errno (%s)\n",
391*0ff7076bSVincenzo Maffione 			devname, strerror(errno)));
392*0ff7076bSVincenzo Maffione 		free(priv);
393*0ff7076bSVincenzo Maffione 		return (-1);
394*0ff7076bSVincenzo Maffione 	}
395*0ff7076bSVincenzo Maffione 
396*0ff7076bSVincenzo Maffione 	priv->memid = priv->nmd->req.nr_arg2;
397*0ff7076bSVincenzo Maffione 	priv->tx = NETMAP_TXRING(priv->nmd->nifp, 0);
398*0ff7076bSVincenzo Maffione 	priv->rx = NETMAP_RXRING(priv->nmd->nifp, 0);
399*0ff7076bSVincenzo Maffione 	priv->cb = cb;
400*0ff7076bSVincenzo Maffione 	priv->cb_param = param;
401*0ff7076bSVincenzo Maffione 	be->fd = priv->nmd->fd;
402*0ff7076bSVincenzo Maffione 
403*0ff7076bSVincenzo Maffione 	priv->mevp = mevent_add(be->fd, EVF_READ, cb, param);
404*0ff7076bSVincenzo Maffione 	if (priv->mevp == NULL) {
405*0ff7076bSVincenzo Maffione 		WPRINTF(("Could not register event\n"));
406*0ff7076bSVincenzo Maffione 		return (-1);
407*0ff7076bSVincenzo Maffione 	}
408*0ff7076bSVincenzo Maffione 
409*0ff7076bSVincenzo Maffione 	return (0);
410*0ff7076bSVincenzo Maffione }
411*0ff7076bSVincenzo Maffione 
412*0ff7076bSVincenzo Maffione static void
413*0ff7076bSVincenzo Maffione netmap_cleanup(struct net_backend *be)
414*0ff7076bSVincenzo Maffione {
415*0ff7076bSVincenzo Maffione 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
416*0ff7076bSVincenzo Maffione 
417*0ff7076bSVincenzo Maffione 	if (priv->mevp) {
418*0ff7076bSVincenzo Maffione 		mevent_delete(priv->mevp);
419*0ff7076bSVincenzo Maffione 	}
420*0ff7076bSVincenzo Maffione 	if (priv->nmd) {
421*0ff7076bSVincenzo Maffione 		nm_close(priv->nmd);
422*0ff7076bSVincenzo Maffione 	}
423*0ff7076bSVincenzo Maffione 	be->fd = -1;
424*0ff7076bSVincenzo Maffione }
425*0ff7076bSVincenzo Maffione 
426*0ff7076bSVincenzo Maffione static ssize_t
427*0ff7076bSVincenzo Maffione netmap_send(struct net_backend *be, struct iovec *iov,
428*0ff7076bSVincenzo Maffione 	    int iovcnt)
429*0ff7076bSVincenzo Maffione {
430*0ff7076bSVincenzo Maffione 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
431*0ff7076bSVincenzo Maffione 	struct netmap_ring *ring;
432*0ff7076bSVincenzo Maffione 	ssize_t totlen = 0;
433*0ff7076bSVincenzo Maffione 	int nm_buf_size;
434*0ff7076bSVincenzo Maffione 	int nm_buf_len;
435*0ff7076bSVincenzo Maffione 	uint32_t head;
436*0ff7076bSVincenzo Maffione 	void *nm_buf;
437*0ff7076bSVincenzo Maffione 	int j;
438*0ff7076bSVincenzo Maffione 
439*0ff7076bSVincenzo Maffione 	ring = priv->tx;
440*0ff7076bSVincenzo Maffione 	head = ring->head;
441*0ff7076bSVincenzo Maffione 	if (head == ring->tail) {
442*0ff7076bSVincenzo Maffione 		WPRINTF(("No space, drop %zu bytes\n", count_iov(iov, iovcnt)));
443*0ff7076bSVincenzo Maffione 		goto txsync;
444*0ff7076bSVincenzo Maffione 	}
445*0ff7076bSVincenzo Maffione 	nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
446*0ff7076bSVincenzo Maffione 	nm_buf_size = ring->nr_buf_size;
447*0ff7076bSVincenzo Maffione 	nm_buf_len = 0;
448*0ff7076bSVincenzo Maffione 
449*0ff7076bSVincenzo Maffione 	for (j = 0; j < iovcnt; j++) {
450*0ff7076bSVincenzo Maffione 		int iov_frag_size = iov[j].iov_len;
451*0ff7076bSVincenzo Maffione 		void *iov_frag_buf = iov[j].iov_base;
452*0ff7076bSVincenzo Maffione 
453*0ff7076bSVincenzo Maffione 		totlen += iov_frag_size;
454*0ff7076bSVincenzo Maffione 
455*0ff7076bSVincenzo Maffione 		/*
456*0ff7076bSVincenzo Maffione 		 * Split each iovec fragment over more netmap slots, if
457*0ff7076bSVincenzo Maffione 		 * necessary.
458*0ff7076bSVincenzo Maffione 		 */
459*0ff7076bSVincenzo Maffione 		for (;;) {
460*0ff7076bSVincenzo Maffione 			int copylen;
461*0ff7076bSVincenzo Maffione 
462*0ff7076bSVincenzo Maffione 			copylen = iov_frag_size < nm_buf_size ? iov_frag_size : nm_buf_size;
463*0ff7076bSVincenzo Maffione 			memcpy(nm_buf, iov_frag_buf, copylen);
464*0ff7076bSVincenzo Maffione 
465*0ff7076bSVincenzo Maffione 			iov_frag_buf += copylen;
466*0ff7076bSVincenzo Maffione 			iov_frag_size -= copylen;
467*0ff7076bSVincenzo Maffione 			nm_buf += copylen;
468*0ff7076bSVincenzo Maffione 			nm_buf_size -= copylen;
469*0ff7076bSVincenzo Maffione 			nm_buf_len += copylen;
470*0ff7076bSVincenzo Maffione 
471*0ff7076bSVincenzo Maffione 			if (iov_frag_size == 0) {
472*0ff7076bSVincenzo Maffione 				break;
473*0ff7076bSVincenzo Maffione 			}
474*0ff7076bSVincenzo Maffione 
475*0ff7076bSVincenzo Maffione 			ring->slot[head].len = nm_buf_len;
476*0ff7076bSVincenzo Maffione 			ring->slot[head].flags = NS_MOREFRAG;
477*0ff7076bSVincenzo Maffione 			head = nm_ring_next(ring, head);
478*0ff7076bSVincenzo Maffione 			if (head == ring->tail) {
479*0ff7076bSVincenzo Maffione 				/*
480*0ff7076bSVincenzo Maffione 				 * We ran out of netmap slots while
481*0ff7076bSVincenzo Maffione 				 * splitting the iovec fragments.
482*0ff7076bSVincenzo Maffione 				 */
483*0ff7076bSVincenzo Maffione 				WPRINTF(("No space, drop %zu bytes\n",
484*0ff7076bSVincenzo Maffione 				   count_iov(iov, iovcnt)));
485*0ff7076bSVincenzo Maffione 				goto txsync;
486*0ff7076bSVincenzo Maffione 			}
487*0ff7076bSVincenzo Maffione 			nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
488*0ff7076bSVincenzo Maffione 			nm_buf_size = ring->nr_buf_size;
489*0ff7076bSVincenzo Maffione 			nm_buf_len = 0;
490*0ff7076bSVincenzo Maffione 		}
491*0ff7076bSVincenzo Maffione 	}
492*0ff7076bSVincenzo Maffione 
493*0ff7076bSVincenzo Maffione 	/* Complete the last slot, which must not have NS_MOREFRAG set. */
494*0ff7076bSVincenzo Maffione 	ring->slot[head].len = nm_buf_len;
495*0ff7076bSVincenzo Maffione 	ring->slot[head].flags = 0;
496*0ff7076bSVincenzo Maffione 	head = nm_ring_next(ring, head);
497*0ff7076bSVincenzo Maffione 
498*0ff7076bSVincenzo Maffione 	/* Now update ring->head and ring->cur. */
499*0ff7076bSVincenzo Maffione 	ring->head = ring->cur = head;
500*0ff7076bSVincenzo Maffione txsync:
501*0ff7076bSVincenzo Maffione 	ioctl(be->fd, NIOCTXSYNC, NULL);
502*0ff7076bSVincenzo Maffione 
503*0ff7076bSVincenzo Maffione 	return (totlen);
504*0ff7076bSVincenzo Maffione }
505*0ff7076bSVincenzo Maffione 
506*0ff7076bSVincenzo Maffione static ssize_t
507*0ff7076bSVincenzo Maffione netmap_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
508*0ff7076bSVincenzo Maffione {
509*0ff7076bSVincenzo Maffione 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
510*0ff7076bSVincenzo Maffione 	struct netmap_slot *slot = NULL;
511*0ff7076bSVincenzo Maffione 	struct netmap_ring *ring;
512*0ff7076bSVincenzo Maffione 	void *iov_frag_buf;
513*0ff7076bSVincenzo Maffione 	int iov_frag_size;
514*0ff7076bSVincenzo Maffione 	ssize_t totlen = 0;
515*0ff7076bSVincenzo Maffione 	uint32_t head;
516*0ff7076bSVincenzo Maffione 
517*0ff7076bSVincenzo Maffione 	assert(iovcnt);
518*0ff7076bSVincenzo Maffione 
519*0ff7076bSVincenzo Maffione 	ring = priv->rx;
520*0ff7076bSVincenzo Maffione 	head = ring->head;
521*0ff7076bSVincenzo Maffione 	iov_frag_buf = iov->iov_base;
522*0ff7076bSVincenzo Maffione 	iov_frag_size = iov->iov_len;
523*0ff7076bSVincenzo Maffione 
524*0ff7076bSVincenzo Maffione 	do {
525*0ff7076bSVincenzo Maffione 		int nm_buf_len;
526*0ff7076bSVincenzo Maffione 		void *nm_buf;
527*0ff7076bSVincenzo Maffione 
528*0ff7076bSVincenzo Maffione 		if (head == ring->tail) {
529*0ff7076bSVincenzo Maffione 			return (0);
530*0ff7076bSVincenzo Maffione 		}
531*0ff7076bSVincenzo Maffione 
532*0ff7076bSVincenzo Maffione 		slot = ring->slot + head;
533*0ff7076bSVincenzo Maffione 		nm_buf = NETMAP_BUF(ring, slot->buf_idx);
534*0ff7076bSVincenzo Maffione 		nm_buf_len = slot->len;
535*0ff7076bSVincenzo Maffione 
536*0ff7076bSVincenzo Maffione 		for (;;) {
537*0ff7076bSVincenzo Maffione 			int copylen = nm_buf_len < iov_frag_size ?
538*0ff7076bSVincenzo Maffione 			    nm_buf_len : iov_frag_size;
539*0ff7076bSVincenzo Maffione 
540*0ff7076bSVincenzo Maffione 			memcpy(iov_frag_buf, nm_buf, copylen);
541*0ff7076bSVincenzo Maffione 			nm_buf += copylen;
542*0ff7076bSVincenzo Maffione 			nm_buf_len -= copylen;
543*0ff7076bSVincenzo Maffione 			iov_frag_buf += copylen;
544*0ff7076bSVincenzo Maffione 			iov_frag_size -= copylen;
545*0ff7076bSVincenzo Maffione 			totlen += copylen;
546*0ff7076bSVincenzo Maffione 
547*0ff7076bSVincenzo Maffione 			if (nm_buf_len == 0) {
548*0ff7076bSVincenzo Maffione 				break;
549*0ff7076bSVincenzo Maffione 			}
550*0ff7076bSVincenzo Maffione 
551*0ff7076bSVincenzo Maffione 			iov++;
552*0ff7076bSVincenzo Maffione 			iovcnt--;
553*0ff7076bSVincenzo Maffione 			if (iovcnt == 0) {
554*0ff7076bSVincenzo Maffione 				/* No space to receive. */
555*0ff7076bSVincenzo Maffione 				WPRINTF(("Short iov, drop %zd bytes\n",
556*0ff7076bSVincenzo Maffione 				    totlen));
557*0ff7076bSVincenzo Maffione 				return (-ENOSPC);
558*0ff7076bSVincenzo Maffione 			}
559*0ff7076bSVincenzo Maffione 			iov_frag_buf = iov->iov_base;
560*0ff7076bSVincenzo Maffione 			iov_frag_size = iov->iov_len;
561*0ff7076bSVincenzo Maffione 		}
562*0ff7076bSVincenzo Maffione 
563*0ff7076bSVincenzo Maffione 		head = nm_ring_next(ring, head);
564*0ff7076bSVincenzo Maffione 
565*0ff7076bSVincenzo Maffione 	} while (slot->flags & NS_MOREFRAG);
566*0ff7076bSVincenzo Maffione 
567*0ff7076bSVincenzo Maffione 	/* Release slots to netmap. */
568*0ff7076bSVincenzo Maffione 	ring->head = ring->cur = head;
569*0ff7076bSVincenzo Maffione 
570*0ff7076bSVincenzo Maffione 	return (totlen);
571*0ff7076bSVincenzo Maffione }
572*0ff7076bSVincenzo Maffione 
573*0ff7076bSVincenzo Maffione static struct net_backend netmap_backend = {
574*0ff7076bSVincenzo Maffione 	.prefix = "netmap",
575*0ff7076bSVincenzo Maffione 	.priv_size = sizeof(struct netmap_priv),
576*0ff7076bSVincenzo Maffione 	.init = netmap_init,
577*0ff7076bSVincenzo Maffione 	.cleanup = netmap_cleanup,
578*0ff7076bSVincenzo Maffione 	.send = netmap_send,
579*0ff7076bSVincenzo Maffione 	.recv = netmap_recv,
580*0ff7076bSVincenzo Maffione 	.get_cap = netmap_get_cap,
581*0ff7076bSVincenzo Maffione 	.set_cap = netmap_set_cap,
582*0ff7076bSVincenzo Maffione };
583*0ff7076bSVincenzo Maffione 
584*0ff7076bSVincenzo Maffione /* A clone of the netmap backend, with a different prefix. */
585*0ff7076bSVincenzo Maffione static struct net_backend vale_backend = {
586*0ff7076bSVincenzo Maffione 	.prefix = "vale",
587*0ff7076bSVincenzo Maffione 	.priv_size = sizeof(struct netmap_priv),
588*0ff7076bSVincenzo Maffione 	.init = netmap_init,
589*0ff7076bSVincenzo Maffione 	.cleanup = netmap_cleanup,
590*0ff7076bSVincenzo Maffione 	.send = netmap_send,
591*0ff7076bSVincenzo Maffione 	.recv = netmap_recv,
592*0ff7076bSVincenzo Maffione 	.get_cap = netmap_get_cap,
593*0ff7076bSVincenzo Maffione 	.set_cap = netmap_set_cap,
594*0ff7076bSVincenzo Maffione };
595*0ff7076bSVincenzo Maffione 
596*0ff7076bSVincenzo Maffione DATA_SET(net_backend_set, netmap_backend);
597*0ff7076bSVincenzo Maffione DATA_SET(net_backend_set, vale_backend);
598*0ff7076bSVincenzo Maffione 
599*0ff7076bSVincenzo Maffione /*
600*0ff7076bSVincenzo Maffione  * Initialize a backend and attach to the frontend.
601*0ff7076bSVincenzo Maffione  * This is called during frontend initialization.
602*0ff7076bSVincenzo Maffione  *  @pbe is a pointer to the backend to be initialized
603*0ff7076bSVincenzo Maffione  *  @devname is the backend-name as supplied on the command line,
604*0ff7076bSVincenzo Maffione  * 	e.g. -s 2:0,frontend-name,backend-name[,other-args]
605*0ff7076bSVincenzo Maffione  *  @cb is the receive callback supplied by the frontend,
606*0ff7076bSVincenzo Maffione  *	and it is invoked in the event loop when a receive
607*0ff7076bSVincenzo Maffione  *	event is generated in the hypervisor,
608*0ff7076bSVincenzo Maffione  *  @param is a pointer to the frontend, and normally used as
609*0ff7076bSVincenzo Maffione  *	the argument for the callback.
610*0ff7076bSVincenzo Maffione  */
611*0ff7076bSVincenzo Maffione int
612*0ff7076bSVincenzo Maffione netbe_init(struct net_backend **ret, const char *devname, net_be_rxeof_t cb,
613*0ff7076bSVincenzo Maffione     void *param)
614*0ff7076bSVincenzo Maffione {
615*0ff7076bSVincenzo Maffione 	struct net_backend **pbe, *nbe, *tbe = NULL;
616*0ff7076bSVincenzo Maffione 	int err;
617*0ff7076bSVincenzo Maffione 
618*0ff7076bSVincenzo Maffione 	/*
619*0ff7076bSVincenzo Maffione 	 * Find the network backend that matches the user-provided
620*0ff7076bSVincenzo Maffione 	 * device name. net_backend_set is built using a linker set.
621*0ff7076bSVincenzo Maffione 	 */
622*0ff7076bSVincenzo Maffione 	SET_FOREACH(pbe, net_backend_set) {
623*0ff7076bSVincenzo Maffione 		if (strncmp(devname, (*pbe)->prefix,
624*0ff7076bSVincenzo Maffione 		    strlen((*pbe)->prefix)) == 0) {
625*0ff7076bSVincenzo Maffione 			tbe = *pbe;
626*0ff7076bSVincenzo Maffione 			assert(tbe->init != NULL);
627*0ff7076bSVincenzo Maffione 			assert(tbe->cleanup != NULL);
628*0ff7076bSVincenzo Maffione 			assert(tbe->send != NULL);
629*0ff7076bSVincenzo Maffione 			assert(tbe->recv != NULL);
630*0ff7076bSVincenzo Maffione 			assert(tbe->get_cap != NULL);
631*0ff7076bSVincenzo Maffione 			assert(tbe->set_cap != NULL);
632*0ff7076bSVincenzo Maffione 			break;
633*0ff7076bSVincenzo Maffione 		}
634*0ff7076bSVincenzo Maffione 	}
635*0ff7076bSVincenzo Maffione 
636*0ff7076bSVincenzo Maffione 	*ret = NULL;
637*0ff7076bSVincenzo Maffione 	if (tbe == NULL)
638*0ff7076bSVincenzo Maffione 		return (EINVAL);
639*0ff7076bSVincenzo Maffione 	nbe = calloc(1, sizeof(*nbe) + tbe->priv_size);
640*0ff7076bSVincenzo Maffione 	*nbe = *tbe;	/* copy the template */
641*0ff7076bSVincenzo Maffione 	nbe->fd = -1;
642*0ff7076bSVincenzo Maffione 	nbe->sc = param;
643*0ff7076bSVincenzo Maffione 	nbe->be_vnet_hdr_len = 0;
644*0ff7076bSVincenzo Maffione 	nbe->fe_vnet_hdr_len = 0;
645*0ff7076bSVincenzo Maffione 
646*0ff7076bSVincenzo Maffione 	/* Initialize the backend. */
647*0ff7076bSVincenzo Maffione 	err = nbe->init(nbe, devname, cb, param);
648*0ff7076bSVincenzo Maffione 	if (err) {
649*0ff7076bSVincenzo Maffione 		free(nbe);
650*0ff7076bSVincenzo Maffione 		return (err);
651*0ff7076bSVincenzo Maffione 	}
652*0ff7076bSVincenzo Maffione 
653*0ff7076bSVincenzo Maffione 	*ret = nbe;
654*0ff7076bSVincenzo Maffione 
655*0ff7076bSVincenzo Maffione 	return (0);
656*0ff7076bSVincenzo Maffione }
657*0ff7076bSVincenzo Maffione 
658*0ff7076bSVincenzo Maffione void
659*0ff7076bSVincenzo Maffione netbe_cleanup(struct net_backend *be)
660*0ff7076bSVincenzo Maffione {
661*0ff7076bSVincenzo Maffione 
662*0ff7076bSVincenzo Maffione 	if (be != NULL) {
663*0ff7076bSVincenzo Maffione 		be->cleanup(be);
664*0ff7076bSVincenzo Maffione 		free(be);
665*0ff7076bSVincenzo Maffione 	}
666*0ff7076bSVincenzo Maffione }
667*0ff7076bSVincenzo Maffione 
668*0ff7076bSVincenzo Maffione uint64_t
669*0ff7076bSVincenzo Maffione netbe_get_cap(struct net_backend *be)
670*0ff7076bSVincenzo Maffione {
671*0ff7076bSVincenzo Maffione 
672*0ff7076bSVincenzo Maffione 	assert(be != NULL);
673*0ff7076bSVincenzo Maffione 	return (be->get_cap(be));
674*0ff7076bSVincenzo Maffione }
675*0ff7076bSVincenzo Maffione 
676*0ff7076bSVincenzo Maffione int
677*0ff7076bSVincenzo Maffione netbe_set_cap(struct net_backend *be, uint64_t features,
678*0ff7076bSVincenzo Maffione 	      unsigned vnet_hdr_len)
679*0ff7076bSVincenzo Maffione {
680*0ff7076bSVincenzo Maffione 	int ret;
681*0ff7076bSVincenzo Maffione 
682*0ff7076bSVincenzo Maffione 	assert(be != NULL);
683*0ff7076bSVincenzo Maffione 
684*0ff7076bSVincenzo Maffione 	/* There are only three valid lengths, i.e., 0, 10 and 12. */
685*0ff7076bSVincenzo Maffione 	if (vnet_hdr_len && vnet_hdr_len != VNET_HDR_LEN
686*0ff7076bSVincenzo Maffione 		&& vnet_hdr_len != (VNET_HDR_LEN - sizeof(uint16_t)))
687*0ff7076bSVincenzo Maffione 		return (-1);
688*0ff7076bSVincenzo Maffione 
689*0ff7076bSVincenzo Maffione 	be->fe_vnet_hdr_len = vnet_hdr_len;
690*0ff7076bSVincenzo Maffione 
691*0ff7076bSVincenzo Maffione 	ret = be->set_cap(be, features, vnet_hdr_len);
692*0ff7076bSVincenzo Maffione 	assert(be->be_vnet_hdr_len == 0 ||
693*0ff7076bSVincenzo Maffione 	       be->be_vnet_hdr_len == be->fe_vnet_hdr_len);
694*0ff7076bSVincenzo Maffione 
695*0ff7076bSVincenzo Maffione 	return (ret);
696*0ff7076bSVincenzo Maffione }
697*0ff7076bSVincenzo Maffione 
698*0ff7076bSVincenzo Maffione static __inline struct iovec *
699*0ff7076bSVincenzo Maffione iov_trim(struct iovec *iov, int *iovcnt, unsigned int tlen)
700*0ff7076bSVincenzo Maffione {
701*0ff7076bSVincenzo Maffione 	struct iovec *riov;
702*0ff7076bSVincenzo Maffione 
703*0ff7076bSVincenzo Maffione 	/* XXX short-cut: assume first segment is >= tlen */
704*0ff7076bSVincenzo Maffione 	assert(iov[0].iov_len >= tlen);
705*0ff7076bSVincenzo Maffione 
706*0ff7076bSVincenzo Maffione 	iov[0].iov_len -= tlen;
707*0ff7076bSVincenzo Maffione 	if (iov[0].iov_len == 0) {
708*0ff7076bSVincenzo Maffione 		assert(*iovcnt > 1);
709*0ff7076bSVincenzo Maffione 		*iovcnt -= 1;
710*0ff7076bSVincenzo Maffione 		riov = &iov[1];
711*0ff7076bSVincenzo Maffione 	} else {
712*0ff7076bSVincenzo Maffione 		iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + tlen);
713*0ff7076bSVincenzo Maffione 		riov = &iov[0];
714*0ff7076bSVincenzo Maffione 	}
715*0ff7076bSVincenzo Maffione 
716*0ff7076bSVincenzo Maffione 	return (riov);
717*0ff7076bSVincenzo Maffione }
718*0ff7076bSVincenzo Maffione 
719*0ff7076bSVincenzo Maffione ssize_t
720*0ff7076bSVincenzo Maffione netbe_send(struct net_backend *be, struct iovec *iov, int iovcnt)
721*0ff7076bSVincenzo Maffione {
722*0ff7076bSVincenzo Maffione 
723*0ff7076bSVincenzo Maffione 	assert(be != NULL);
724*0ff7076bSVincenzo Maffione 	if (be->be_vnet_hdr_len != be->fe_vnet_hdr_len) {
725*0ff7076bSVincenzo Maffione 		/*
726*0ff7076bSVincenzo Maffione 		 * The frontend uses a virtio-net header, but the backend
727*0ff7076bSVincenzo Maffione 		 * does not. We ignore it (as it must be all zeroes) and
728*0ff7076bSVincenzo Maffione 		 * strip it.
729*0ff7076bSVincenzo Maffione 		 */
730*0ff7076bSVincenzo Maffione 		assert(be->be_vnet_hdr_len == 0);
731*0ff7076bSVincenzo Maffione 		iov = iov_trim(iov, &iovcnt, be->fe_vnet_hdr_len);
732*0ff7076bSVincenzo Maffione 	}
733*0ff7076bSVincenzo Maffione 
734*0ff7076bSVincenzo Maffione 	return (be->send(be, iov, iovcnt));
735*0ff7076bSVincenzo Maffione }
736*0ff7076bSVincenzo Maffione 
737*0ff7076bSVincenzo Maffione /*
738*0ff7076bSVincenzo Maffione  * Try to read a packet from the backend, without blocking.
739*0ff7076bSVincenzo Maffione  * If no packets are available, return 0. In case of success, return
740*0ff7076bSVincenzo Maffione  * the length of the packet just read. Return -1 in case of errors.
741*0ff7076bSVincenzo Maffione  */
742*0ff7076bSVincenzo Maffione ssize_t
743*0ff7076bSVincenzo Maffione netbe_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
744*0ff7076bSVincenzo Maffione {
745*0ff7076bSVincenzo Maffione 	/* Length of prepended virtio-net header. */
746*0ff7076bSVincenzo Maffione 	unsigned int hlen = be->fe_vnet_hdr_len;
747*0ff7076bSVincenzo Maffione 	int ret;
748*0ff7076bSVincenzo Maffione 
749*0ff7076bSVincenzo Maffione 	assert(be != NULL);
750*0ff7076bSVincenzo Maffione 
751*0ff7076bSVincenzo Maffione 	if (hlen && hlen != be->be_vnet_hdr_len) {
752*0ff7076bSVincenzo Maffione 		/*
753*0ff7076bSVincenzo Maffione 		 * The frontend uses a virtio-net header, but the backend
754*0ff7076bSVincenzo Maffione 		 * does not. We need to prepend a zeroed header.
755*0ff7076bSVincenzo Maffione 		 */
756*0ff7076bSVincenzo Maffione 		struct virtio_net_rxhdr *vh;
757*0ff7076bSVincenzo Maffione 
758*0ff7076bSVincenzo Maffione 		assert(be->be_vnet_hdr_len == 0);
759*0ff7076bSVincenzo Maffione 
760*0ff7076bSVincenzo Maffione 		/*
761*0ff7076bSVincenzo Maffione 		 * Get a pointer to the rx header, and use the
762*0ff7076bSVincenzo Maffione 		 * data immediately following it for the packet buffer.
763*0ff7076bSVincenzo Maffione 		 */
764*0ff7076bSVincenzo Maffione 		vh = iov[0].iov_base;
765*0ff7076bSVincenzo Maffione 		iov = iov_trim(iov, &iovcnt, hlen);
766*0ff7076bSVincenzo Maffione 
767*0ff7076bSVincenzo Maffione 		/*
768*0ff7076bSVincenzo Maffione 		 * The only valid field in the rx packet header is the
769*0ff7076bSVincenzo Maffione 		 * number of buffers if merged rx bufs were negotiated.
770*0ff7076bSVincenzo Maffione 		 */
771*0ff7076bSVincenzo Maffione 		memset(vh, 0, hlen);
772*0ff7076bSVincenzo Maffione 		if (hlen == VNET_HDR_LEN) {
773*0ff7076bSVincenzo Maffione 			vh->vrh_bufs = 1;
774*0ff7076bSVincenzo Maffione 		}
775*0ff7076bSVincenzo Maffione 	}
776*0ff7076bSVincenzo Maffione 
777*0ff7076bSVincenzo Maffione 	ret = be->recv(be, iov, iovcnt);
778*0ff7076bSVincenzo Maffione 	if (ret > 0) {
779*0ff7076bSVincenzo Maffione 		ret += hlen;
780*0ff7076bSVincenzo Maffione 	}
781*0ff7076bSVincenzo Maffione 
782*0ff7076bSVincenzo Maffione 	return (ret);
783*0ff7076bSVincenzo Maffione }
784*0ff7076bSVincenzo Maffione 
785*0ff7076bSVincenzo Maffione /*
786*0ff7076bSVincenzo Maffione  * Read a packet from the backend and discard it.
787*0ff7076bSVincenzo Maffione  * Returns the size of the discarded packet or zero if no packet was available.
788*0ff7076bSVincenzo Maffione  * A negative error code is returned in case of read error.
789*0ff7076bSVincenzo Maffione  */
790*0ff7076bSVincenzo Maffione ssize_t
791*0ff7076bSVincenzo Maffione netbe_rx_discard(struct net_backend *be)
792*0ff7076bSVincenzo Maffione {
793*0ff7076bSVincenzo Maffione 	/*
794*0ff7076bSVincenzo Maffione 	 * MP note: the dummybuf is only used to discard frames,
795*0ff7076bSVincenzo Maffione 	 * so there is no need for it to be per-vtnet or locked.
796*0ff7076bSVincenzo Maffione 	 * We only make it large enough for TSO-sized segment.
797*0ff7076bSVincenzo Maffione 	 */
798*0ff7076bSVincenzo Maffione 	static uint8_t dummybuf[65536 + 64];
799*0ff7076bSVincenzo Maffione 	struct iovec iov;
800*0ff7076bSVincenzo Maffione 
801*0ff7076bSVincenzo Maffione 	iov.iov_base = dummybuf;
802*0ff7076bSVincenzo Maffione 	iov.iov_len = sizeof(dummybuf);
803*0ff7076bSVincenzo Maffione 
804*0ff7076bSVincenzo Maffione 	return netbe_recv(be, &iov, 1);
805*0ff7076bSVincenzo Maffione }
806*0ff7076bSVincenzo Maffione 
807