xref: /freebsd/usr.sbin/bhyve/net_backends.c (revision 6f63e88c0166ed3e5f2805a9e667c7d24d304cf1)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2019 Vincenzo Maffione <vmaffione@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
19  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
20  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
24  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  * $FreeBSD$
28  */
29 
30 /*
31  * This file implements multiple network backends (tap, netmap, ...),
32  * to be used by network frontends such as virtio-net and e1000.
33  * The API to access the backend (e.g. send/receive packets, negotiate
34  * features) is exported by net_backends.h.
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include <sys/types.h>		/* u_short etc */
41 #ifndef WITHOUT_CAPSICUM
42 #include <sys/capsicum.h>
43 #endif
44 #include <sys/ioctl.h>
45 #include <sys/mman.h>
46 #include <sys/uio.h>
47 
48 #include <net/if.h>
49 #include <net/netmap.h>
50 #include <net/netmap_virt.h>
51 #define NETMAP_WITH_LIBS
52 #include <net/netmap_user.h>
53 
54 #ifndef WITHOUT_CAPSICUM
55 #include <capsicum_helpers.h>
56 #endif
57 #include <err.h>
58 #include <errno.h>
59 #include <fcntl.h>
60 #include <stdio.h>
61 #include <stdlib.h>
62 #include <stdint.h>
63 #include <string.h>
64 #include <unistd.h>
65 #include <sysexits.h>
66 #include <assert.h>
67 #include <pthread.h>
68 #include <pthread_np.h>
69 #include <poll.h>
70 #include <assert.h>
71 
72 
73 #include "debug.h"
74 #include "iov.h"
75 #include "mevent.h"
76 #include "net_backends.h"
77 
78 #include <sys/linker_set.h>
79 
80 /*
81  * Each network backend registers a set of function pointers that are
82  * used to implement the net backends API.
83  * This might need to be exposed if we implement backends in separate files.
84  */
85 struct net_backend {
86 	const char *prefix;	/* prefix matching this backend */
87 
88 	/*
89 	 * Routines used to initialize and cleanup the resources needed
90 	 * by a backend. The cleanup function is used internally,
91 	 * and should not be called by the frontend.
92 	 */
93 	int (*init)(struct net_backend *be, const char *devname,
94 	    net_be_rxeof_t cb, void *param);
95 	void (*cleanup)(struct net_backend *be);
96 
97 	/*
98 	 * Called to serve a guest transmit request. The scatter-gather
99 	 * vector provided by the caller has 'iovcnt' elements and contains
100 	 * the packet to send.
101 	 */
102 	ssize_t (*send)(struct net_backend *be, const struct iovec *iov,
103 	    int iovcnt);
104 
105 	/*
106 	 * Get the length of the next packet that can be received from
107 	 * the backend. If no packets are currently available, this
108 	 * function returns 0.
109 	 */
110 	ssize_t (*peek_recvlen)(struct net_backend *be);
111 
112 	/*
113 	 * Called to receive a packet from the backend. When the function
114 	 * returns a positive value 'len', the scatter-gather vector
115 	 * provided by the caller contains a packet with such length.
116 	 * The function returns 0 if the backend doesn't have a new packet to
117 	 * receive.
118 	 */
119 	ssize_t (*recv)(struct net_backend *be, const struct iovec *iov,
120 	    int iovcnt);
121 
122 	/*
123 	 * Ask the backend to enable or disable receive operation in the
124 	 * backend. On return from a disable operation, it is guaranteed
125 	 * that the receive callback won't be called until receive is
126 	 * enabled again. Note however that it is up to the caller to make
127 	 * sure that netbe_recv() is not currently being executed by another
128 	 * thread.
129 	 */
130 	void (*recv_enable)(struct net_backend *be);
131 	void (*recv_disable)(struct net_backend *be);
132 
133 	/*
134 	 * Ask the backend for the virtio-net features it is able to
135 	 * support. Possible features are TSO, UFO and checksum offloading
136 	 * in both rx and tx direction and for both IPv4 and IPv6.
137 	 */
138 	uint64_t (*get_cap)(struct net_backend *be);
139 
140 	/*
141 	 * Tell the backend to enable/disable the specified virtio-net
142 	 * features (capabilities).
143 	 */
144 	int (*set_cap)(struct net_backend *be, uint64_t features,
145 	    unsigned int vnet_hdr_len);
146 
147 	struct pci_vtnet_softc *sc;
148 	int fd;
149 
150 	/*
151 	 * Length of the virtio-net header used by the backend and the
152 	 * frontend, respectively. A zero value means that the header
153 	 * is not used.
154 	 */
155 	unsigned int be_vnet_hdr_len;
156 	unsigned int fe_vnet_hdr_len;
157 
158 	/* Size of backend-specific private data. */
159 	size_t priv_size;
160 
161 	/* Room for backend-specific data. */
162 	char opaque[0];
163 };
164 
165 SET_DECLARE(net_backend_set, struct net_backend);
166 
167 #define VNET_HDR_LEN	sizeof(struct virtio_net_rxhdr)
168 
169 #define WPRINTF(params) PRINTLN params
170 
171 /*
172  * The tap backend
173  */
174 
175 struct tap_priv {
176 	struct mevent *mevp;
177 	/*
178 	 * A bounce buffer that allows us to implement the peek_recvlen
179 	 * callback. In the future we may get the same information from
180 	 * the kevent data.
181 	 */
182 	char bbuf[1 << 16];
183 	ssize_t bbuflen;
184 };
185 
186 static void
187 tap_cleanup(struct net_backend *be)
188 {
189 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
190 
191 	if (priv->mevp) {
192 		mevent_delete(priv->mevp);
193 	}
194 	if (be->fd != -1) {
195 		close(be->fd);
196 		be->fd = -1;
197 	}
198 }
199 
200 static int
201 tap_init(struct net_backend *be, const char *devname,
202 	 net_be_rxeof_t cb, void *param)
203 {
204 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
205 	char tbuf[80];
206 	int opt = 1;
207 #ifndef WITHOUT_CAPSICUM
208 	cap_rights_t rights;
209 #endif
210 
211 	if (cb == NULL) {
212 		WPRINTF(("TAP backend requires non-NULL callback"));
213 		return (-1);
214 	}
215 
216 	strcpy(tbuf, "/dev/");
217 	strlcat(tbuf, devname, sizeof(tbuf));
218 
219 	be->fd = open(tbuf, O_RDWR);
220 	if (be->fd == -1) {
221 		WPRINTF(("open of tap device %s failed", tbuf));
222 		goto error;
223 	}
224 
225 	/*
226 	 * Set non-blocking and register for read
227 	 * notifications with the event loop
228 	 */
229 	if (ioctl(be->fd, FIONBIO, &opt) < 0) {
230 		WPRINTF(("tap device O_NONBLOCK failed"));
231 		goto error;
232 	}
233 
234 #ifndef WITHOUT_CAPSICUM
235 	cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
236 	if (caph_rights_limit(be->fd, &rights) == -1)
237 		errx(EX_OSERR, "Unable to apply rights for sandbox");
238 #endif
239 
240 	memset(priv->bbuf, 0, sizeof(priv->bbuf));
241 	priv->bbuflen = 0;
242 
243 	priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
244 	if (priv->mevp == NULL) {
245 		WPRINTF(("Could not register event"));
246 		goto error;
247 	}
248 
249 	return (0);
250 
251 error:
252 	tap_cleanup(be);
253 	return (-1);
254 }
255 
256 /*
257  * Called to send a buffer chain out to the tap device
258  */
259 static ssize_t
260 tap_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
261 {
262 	return (writev(be->fd, iov, iovcnt));
263 }
264 
265 static ssize_t
266 tap_peek_recvlen(struct net_backend *be)
267 {
268 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
269 	ssize_t ret;
270 
271 	if (priv->bbuflen > 0) {
272 		/*
273 		 * We already have a packet in the bounce buffer.
274 		 * Just return its length.
275 		 */
276 		return priv->bbuflen;
277 	}
278 
279 	/*
280 	 * Read the next packet (if any) into the bounce buffer, so
281 	 * that we get to know its length and we can return that
282 	 * to the caller.
283 	 */
284 	ret = read(be->fd, priv->bbuf, sizeof(priv->bbuf));
285 	if (ret < 0 && errno == EWOULDBLOCK) {
286 		return (0);
287 	}
288 
289 	if (ret > 0)
290 		priv->bbuflen = ret;
291 
292 	return (ret);
293 }
294 
295 static ssize_t
296 tap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
297 {
298 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
299 	ssize_t ret;
300 
301 	if (priv->bbuflen > 0) {
302 		/*
303 		 * A packet is available in the bounce buffer, so
304 		 * we read it from there.
305 		 */
306 		ret = buf_to_iov(priv->bbuf, priv->bbuflen,
307 		    iov, iovcnt, 0);
308 
309 		/* Mark the bounce buffer as empty. */
310 		priv->bbuflen = 0;
311 
312 		return (ret);
313 	}
314 
315 	ret = readv(be->fd, iov, iovcnt);
316 	if (ret < 0 && errno == EWOULDBLOCK) {
317 		return (0);
318 	}
319 
320 	return (ret);
321 }
322 
323 static void
324 tap_recv_enable(struct net_backend *be)
325 {
326 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
327 
328 	mevent_enable(priv->mevp);
329 }
330 
331 static void
332 tap_recv_disable(struct net_backend *be)
333 {
334 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
335 
336 	mevent_disable(priv->mevp);
337 }
338 
339 static uint64_t
340 tap_get_cap(struct net_backend *be)
341 {
342 
343 	return (0); /* no capabilities for now */
344 }
345 
346 static int
347 tap_set_cap(struct net_backend *be, uint64_t features,
348 		unsigned vnet_hdr_len)
349 {
350 
351 	return ((features || vnet_hdr_len) ? -1 : 0);
352 }
353 
354 static struct net_backend tap_backend = {
355 	.prefix = "tap",
356 	.priv_size = sizeof(struct tap_priv),
357 	.init = tap_init,
358 	.cleanup = tap_cleanup,
359 	.send = tap_send,
360 	.peek_recvlen = tap_peek_recvlen,
361 	.recv = tap_recv,
362 	.recv_enable = tap_recv_enable,
363 	.recv_disable = tap_recv_disable,
364 	.get_cap = tap_get_cap,
365 	.set_cap = tap_set_cap,
366 };
367 
368 /* A clone of the tap backend, with a different prefix. */
369 static struct net_backend vmnet_backend = {
370 	.prefix = "vmnet",
371 	.priv_size = sizeof(struct tap_priv),
372 	.init = tap_init,
373 	.cleanup = tap_cleanup,
374 	.send = tap_send,
375 	.peek_recvlen = tap_peek_recvlen,
376 	.recv = tap_recv,
377 	.recv_enable = tap_recv_enable,
378 	.recv_disable = tap_recv_disable,
379 	.get_cap = tap_get_cap,
380 	.set_cap = tap_set_cap,
381 };
382 
383 DATA_SET(net_backend_set, tap_backend);
384 DATA_SET(net_backend_set, vmnet_backend);
385 
386 /*
387  * The netmap backend
388  */
389 
390 /* The virtio-net features supported by netmap. */
391 #define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \
392 		VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \
393 		VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \
394 		VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO)
395 
396 struct netmap_priv {
397 	char ifname[IFNAMSIZ];
398 	struct nm_desc *nmd;
399 	uint16_t memid;
400 	struct netmap_ring *rx;
401 	struct netmap_ring *tx;
402 	struct mevent *mevp;
403 	net_be_rxeof_t cb;
404 	void *cb_param;
405 };
406 
407 static void
408 nmreq_init(struct nmreq *req, char *ifname)
409 {
410 
411 	memset(req, 0, sizeof(*req));
412 	strlcpy(req->nr_name, ifname, sizeof(req->nr_name));
413 	req->nr_version = NETMAP_API;
414 }
415 
416 static int
417 netmap_set_vnet_hdr_len(struct net_backend *be, int vnet_hdr_len)
418 {
419 	int err;
420 	struct nmreq req;
421 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
422 
423 	nmreq_init(&req, priv->ifname);
424 	req.nr_cmd = NETMAP_BDG_VNET_HDR;
425 	req.nr_arg1 = vnet_hdr_len;
426 	err = ioctl(be->fd, NIOCREGIF, &req);
427 	if (err) {
428 		WPRINTF(("Unable to set vnet header length %d",
429 				vnet_hdr_len));
430 		return (err);
431 	}
432 
433 	be->be_vnet_hdr_len = vnet_hdr_len;
434 
435 	return (0);
436 }
437 
438 static int
439 netmap_has_vnet_hdr_len(struct net_backend *be, unsigned vnet_hdr_len)
440 {
441 	int prev_hdr_len = be->be_vnet_hdr_len;
442 	int ret;
443 
444 	if (vnet_hdr_len == prev_hdr_len) {
445 		return (1);
446 	}
447 
448 	ret = netmap_set_vnet_hdr_len(be, vnet_hdr_len);
449 	if (ret) {
450 		return (0);
451 	}
452 
453 	netmap_set_vnet_hdr_len(be, prev_hdr_len);
454 
455 	return (1);
456 }
457 
458 static uint64_t
459 netmap_get_cap(struct net_backend *be)
460 {
461 
462 	return (netmap_has_vnet_hdr_len(be, VNET_HDR_LEN) ?
463 	    NETMAP_FEATURES : 0);
464 }
465 
466 static int
467 netmap_set_cap(struct net_backend *be, uint64_t features,
468 	       unsigned vnet_hdr_len)
469 {
470 
471 	return (netmap_set_vnet_hdr_len(be, vnet_hdr_len));
472 }
473 
474 static int
475 netmap_init(struct net_backend *be, const char *devname,
476 	    net_be_rxeof_t cb, void *param)
477 {
478 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
479 
480 	strlcpy(priv->ifname, devname, sizeof(priv->ifname));
481 	priv->ifname[sizeof(priv->ifname) - 1] = '\0';
482 
483 	priv->nmd = nm_open(priv->ifname, NULL, NETMAP_NO_TX_POLL, NULL);
484 	if (priv->nmd == NULL) {
485 		WPRINTF(("Unable to nm_open(): interface '%s', errno (%s)",
486 			devname, strerror(errno)));
487 		free(priv);
488 		return (-1);
489 	}
490 
491 	priv->memid = priv->nmd->req.nr_arg2;
492 	priv->tx = NETMAP_TXRING(priv->nmd->nifp, 0);
493 	priv->rx = NETMAP_RXRING(priv->nmd->nifp, 0);
494 	priv->cb = cb;
495 	priv->cb_param = param;
496 	be->fd = priv->nmd->fd;
497 
498 	priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
499 	if (priv->mevp == NULL) {
500 		WPRINTF(("Could not register event"));
501 		return (-1);
502 	}
503 
504 	return (0);
505 }
506 
507 static void
508 netmap_cleanup(struct net_backend *be)
509 {
510 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
511 
512 	if (priv->mevp) {
513 		mevent_delete(priv->mevp);
514 	}
515 	if (priv->nmd) {
516 		nm_close(priv->nmd);
517 	}
518 	be->fd = -1;
519 }
520 
521 static ssize_t
522 netmap_send(struct net_backend *be, const struct iovec *iov,
523 	    int iovcnt)
524 {
525 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
526 	struct netmap_ring *ring;
527 	ssize_t totlen = 0;
528 	int nm_buf_size;
529 	int nm_buf_len;
530 	uint32_t head;
531 	void *nm_buf;
532 	int j;
533 
534 	ring = priv->tx;
535 	head = ring->head;
536 	if (head == ring->tail) {
537 		WPRINTF(("No space, drop %zu bytes", count_iov(iov, iovcnt)));
538 		goto txsync;
539 	}
540 	nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
541 	nm_buf_size = ring->nr_buf_size;
542 	nm_buf_len = 0;
543 
544 	for (j = 0; j < iovcnt; j++) {
545 		int iov_frag_size = iov[j].iov_len;
546 		void *iov_frag_buf = iov[j].iov_base;
547 
548 		totlen += iov_frag_size;
549 
550 		/*
551 		 * Split each iovec fragment over more netmap slots, if
552 		 * necessary.
553 		 */
554 		for (;;) {
555 			int copylen;
556 
557 			copylen = iov_frag_size < nm_buf_size ? iov_frag_size : nm_buf_size;
558 			memcpy(nm_buf, iov_frag_buf, copylen);
559 
560 			iov_frag_buf += copylen;
561 			iov_frag_size -= copylen;
562 			nm_buf += copylen;
563 			nm_buf_size -= copylen;
564 			nm_buf_len += copylen;
565 
566 			if (iov_frag_size == 0) {
567 				break;
568 			}
569 
570 			ring->slot[head].len = nm_buf_len;
571 			ring->slot[head].flags = NS_MOREFRAG;
572 			head = nm_ring_next(ring, head);
573 			if (head == ring->tail) {
574 				/*
575 				 * We ran out of netmap slots while
576 				 * splitting the iovec fragments.
577 				 */
578 				WPRINTF(("No space, drop %zu bytes",
579 				   count_iov(iov, iovcnt)));
580 				goto txsync;
581 			}
582 			nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
583 			nm_buf_size = ring->nr_buf_size;
584 			nm_buf_len = 0;
585 		}
586 	}
587 
588 	/* Complete the last slot, which must not have NS_MOREFRAG set. */
589 	ring->slot[head].len = nm_buf_len;
590 	ring->slot[head].flags = 0;
591 	head = nm_ring_next(ring, head);
592 
593 	/* Now update ring->head and ring->cur. */
594 	ring->head = ring->cur = head;
595 txsync:
596 	ioctl(be->fd, NIOCTXSYNC, NULL);
597 
598 	return (totlen);
599 }
600 
601 static ssize_t
602 netmap_peek_recvlen(struct net_backend *be)
603 {
604 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
605 	struct netmap_ring *ring = priv->rx;
606 	uint32_t head = ring->head;
607 	ssize_t totlen = 0;
608 
609 	while (head != ring->tail) {
610 		struct netmap_slot *slot = ring->slot + head;
611 
612 		totlen += slot->len;
613 		if ((slot->flags & NS_MOREFRAG) == 0)
614 			break;
615 		head = nm_ring_next(ring, head);
616 	}
617 
618 	return (totlen);
619 }
620 
621 static ssize_t
622 netmap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
623 {
624 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
625 	struct netmap_slot *slot = NULL;
626 	struct netmap_ring *ring;
627 	void *iov_frag_buf;
628 	int iov_frag_size;
629 	ssize_t totlen = 0;
630 	uint32_t head;
631 
632 	assert(iovcnt);
633 
634 	ring = priv->rx;
635 	head = ring->head;
636 	iov_frag_buf = iov->iov_base;
637 	iov_frag_size = iov->iov_len;
638 
639 	do {
640 		int nm_buf_len;
641 		void *nm_buf;
642 
643 		if (head == ring->tail) {
644 			return (0);
645 		}
646 
647 		slot = ring->slot + head;
648 		nm_buf = NETMAP_BUF(ring, slot->buf_idx);
649 		nm_buf_len = slot->len;
650 
651 		for (;;) {
652 			int copylen = nm_buf_len < iov_frag_size ?
653 			    nm_buf_len : iov_frag_size;
654 
655 			memcpy(iov_frag_buf, nm_buf, copylen);
656 			nm_buf += copylen;
657 			nm_buf_len -= copylen;
658 			iov_frag_buf += copylen;
659 			iov_frag_size -= copylen;
660 			totlen += copylen;
661 
662 			if (nm_buf_len == 0) {
663 				break;
664 			}
665 
666 			iov++;
667 			iovcnt--;
668 			if (iovcnt == 0) {
669 				/* No space to receive. */
670 				WPRINTF(("Short iov, drop %zd bytes",
671 				    totlen));
672 				return (-ENOSPC);
673 			}
674 			iov_frag_buf = iov->iov_base;
675 			iov_frag_size = iov->iov_len;
676 		}
677 
678 		head = nm_ring_next(ring, head);
679 
680 	} while (slot->flags & NS_MOREFRAG);
681 
682 	/* Release slots to netmap. */
683 	ring->head = ring->cur = head;
684 
685 	return (totlen);
686 }
687 
688 static void
689 netmap_recv_enable(struct net_backend *be)
690 {
691 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
692 
693 	mevent_enable(priv->mevp);
694 }
695 
696 static void
697 netmap_recv_disable(struct net_backend *be)
698 {
699 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
700 
701 	mevent_disable(priv->mevp);
702 }
703 
704 static struct net_backend netmap_backend = {
705 	.prefix = "netmap",
706 	.priv_size = sizeof(struct netmap_priv),
707 	.init = netmap_init,
708 	.cleanup = netmap_cleanup,
709 	.send = netmap_send,
710 	.peek_recvlen = netmap_peek_recvlen,
711 	.recv = netmap_recv,
712 	.recv_enable = netmap_recv_enable,
713 	.recv_disable = netmap_recv_disable,
714 	.get_cap = netmap_get_cap,
715 	.set_cap = netmap_set_cap,
716 };
717 
718 /* A clone of the netmap backend, with a different prefix. */
719 static struct net_backend vale_backend = {
720 	.prefix = "vale",
721 	.priv_size = sizeof(struct netmap_priv),
722 	.init = netmap_init,
723 	.cleanup = netmap_cleanup,
724 	.send = netmap_send,
725 	.peek_recvlen = netmap_peek_recvlen,
726 	.recv = netmap_recv,
727 	.recv_enable = netmap_recv_enable,
728 	.recv_disable = netmap_recv_disable,
729 	.get_cap = netmap_get_cap,
730 	.set_cap = netmap_set_cap,
731 };
732 
733 DATA_SET(net_backend_set, netmap_backend);
734 DATA_SET(net_backend_set, vale_backend);
735 
736 /*
737  * Initialize a backend and attach to the frontend.
738  * This is called during frontend initialization.
739  *  @pbe is a pointer to the backend to be initialized
740  *  @devname is the backend-name as supplied on the command line,
741  * 	e.g. -s 2:0,frontend-name,backend-name[,other-args]
742  *  @cb is the receive callback supplied by the frontend,
743  *	and it is invoked in the event loop when a receive
744  *	event is generated in the hypervisor,
745  *  @param is a pointer to the frontend, and normally used as
746  *	the argument for the callback.
747  */
748 int
749 netbe_init(struct net_backend **ret, const char *devname, net_be_rxeof_t cb,
750     void *param)
751 {
752 	struct net_backend **pbe, *nbe, *tbe = NULL;
753 	int err;
754 
755 	/*
756 	 * Find the network backend that matches the user-provided
757 	 * device name. net_backend_set is built using a linker set.
758 	 */
759 	SET_FOREACH(pbe, net_backend_set) {
760 		if (strncmp(devname, (*pbe)->prefix,
761 		    strlen((*pbe)->prefix)) == 0) {
762 			tbe = *pbe;
763 			assert(tbe->init != NULL);
764 			assert(tbe->cleanup != NULL);
765 			assert(tbe->send != NULL);
766 			assert(tbe->recv != NULL);
767 			assert(tbe->get_cap != NULL);
768 			assert(tbe->set_cap != NULL);
769 			break;
770 		}
771 	}
772 
773 	*ret = NULL;
774 	if (tbe == NULL)
775 		return (EINVAL);
776 	nbe = calloc(1, sizeof(*nbe) + tbe->priv_size);
777 	*nbe = *tbe;	/* copy the template */
778 	nbe->fd = -1;
779 	nbe->sc = param;
780 	nbe->be_vnet_hdr_len = 0;
781 	nbe->fe_vnet_hdr_len = 0;
782 
783 	/* Initialize the backend. */
784 	err = nbe->init(nbe, devname, cb, param);
785 	if (err) {
786 		free(nbe);
787 		return (err);
788 	}
789 
790 	*ret = nbe;
791 
792 	return (0);
793 }
794 
795 void
796 netbe_cleanup(struct net_backend *be)
797 {
798 
799 	if (be != NULL) {
800 		be->cleanup(be);
801 		free(be);
802 	}
803 }
804 
805 uint64_t
806 netbe_get_cap(struct net_backend *be)
807 {
808 
809 	assert(be != NULL);
810 	return (be->get_cap(be));
811 }
812 
813 int
814 netbe_set_cap(struct net_backend *be, uint64_t features,
815 	      unsigned vnet_hdr_len)
816 {
817 	int ret;
818 
819 	assert(be != NULL);
820 
821 	/* There are only three valid lengths, i.e., 0, 10 and 12. */
822 	if (vnet_hdr_len && vnet_hdr_len != VNET_HDR_LEN
823 		&& vnet_hdr_len != (VNET_HDR_LEN - sizeof(uint16_t)))
824 		return (-1);
825 
826 	be->fe_vnet_hdr_len = vnet_hdr_len;
827 
828 	ret = be->set_cap(be, features, vnet_hdr_len);
829 	assert(be->be_vnet_hdr_len == 0 ||
830 	       be->be_vnet_hdr_len == be->fe_vnet_hdr_len);
831 
832 	return (ret);
833 }
834 
835 ssize_t
836 netbe_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
837 {
838 
839 	return (be->send(be, iov, iovcnt));
840 }
841 
842 ssize_t
843 netbe_peek_recvlen(struct net_backend *be)
844 {
845 
846 	return (be->peek_recvlen(be));
847 }
848 
849 /*
850  * Try to read a packet from the backend, without blocking.
851  * If no packets are available, return 0. In case of success, return
852  * the length of the packet just read. Return -1 in case of errors.
853  */
854 ssize_t
855 netbe_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
856 {
857 
858 	return (be->recv(be, iov, iovcnt));
859 }
860 
861 /*
862  * Read a packet from the backend and discard it.
863  * Returns the size of the discarded packet or zero if no packet was available.
864  * A negative error code is returned in case of read error.
865  */
866 ssize_t
867 netbe_rx_discard(struct net_backend *be)
868 {
869 	/*
870 	 * MP note: the dummybuf is only used to discard frames,
871 	 * so there is no need for it to be per-vtnet or locked.
872 	 * We only make it large enough for TSO-sized segment.
873 	 */
874 	static uint8_t dummybuf[65536 + 64];
875 	struct iovec iov;
876 
877 	iov.iov_base = dummybuf;
878 	iov.iov_len = sizeof(dummybuf);
879 
880 	return netbe_recv(be, &iov, 1);
881 }
882 
883 void
884 netbe_rx_disable(struct net_backend *be)
885 {
886 
887 	return be->recv_disable(be);
888 }
889 
890 void
891 netbe_rx_enable(struct net_backend *be)
892 {
893 
894 	return be->recv_enable(be);
895 }
896 
897 size_t
898 netbe_get_vnet_hdr_len(struct net_backend *be)
899 {
900 
901 	return (be->be_vnet_hdr_len);
902 }
903