1*0ff7076bSVincenzo Maffione /*- 2*0ff7076bSVincenzo Maffione * Copyright (c) 2019 Vincenzo Maffione <vmaffione@FreeBSD.org> 3*0ff7076bSVincenzo Maffione * 4*0ff7076bSVincenzo Maffione * Redistribution and use in source and binary forms, with or without 5*0ff7076bSVincenzo Maffione * modification, are permitted provided that the following conditions 6*0ff7076bSVincenzo Maffione * are met: 7*0ff7076bSVincenzo Maffione * 1. Redistributions of source code must retain the above copyright 8*0ff7076bSVincenzo Maffione * notice, this list of conditions and the following disclaimer. 9*0ff7076bSVincenzo Maffione * 2. Redistributions in binary form must reproduce the above copyright 10*0ff7076bSVincenzo Maffione * notice, this list of conditions and the following disclaimer in the 11*0ff7076bSVincenzo Maffione * documentation and/or other materials provided with the distribution. 12*0ff7076bSVincenzo Maffione * 13*0ff7076bSVincenzo Maffione * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND 14*0ff7076bSVincenzo Maffione * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15*0ff7076bSVincenzo Maffione * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16*0ff7076bSVincenzo Maffione * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS 17*0ff7076bSVincenzo Maffione * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, 18*0ff7076bSVincenzo Maffione * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 19*0ff7076bSVincenzo Maffione * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 20*0ff7076bSVincenzo Maffione * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 21*0ff7076bSVincenzo Maffione * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 22*0ff7076bSVincenzo Maffione * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 23*0ff7076bSVincenzo Maffione * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24*0ff7076bSVincenzo Maffione * 25*0ff7076bSVincenzo Maffione * $FreeBSD$ 26*0ff7076bSVincenzo Maffione */ 27*0ff7076bSVincenzo Maffione 28*0ff7076bSVincenzo Maffione /* 29*0ff7076bSVincenzo Maffione * This file implements multiple network backends (tap, netmap, ...), 30*0ff7076bSVincenzo Maffione * to be used by network frontends such as virtio-net and e1000. 31*0ff7076bSVincenzo Maffione * The API to access the backend (e.g. send/receive packets, negotiate 32*0ff7076bSVincenzo Maffione * features) is exported by net_backends.h. 33*0ff7076bSVincenzo Maffione */ 34*0ff7076bSVincenzo Maffione 35*0ff7076bSVincenzo Maffione #include <sys/types.h> /* u_short etc */ 36*0ff7076bSVincenzo Maffione #ifndef WITHOUT_CAPSICUM 37*0ff7076bSVincenzo Maffione #include <sys/capsicum.h> 38*0ff7076bSVincenzo Maffione #endif 39*0ff7076bSVincenzo Maffione #include <sys/cdefs.h> 40*0ff7076bSVincenzo Maffione #include <sys/ioctl.h> 41*0ff7076bSVincenzo Maffione #include <sys/mman.h> 42*0ff7076bSVincenzo Maffione #include <sys/uio.h> 43*0ff7076bSVincenzo Maffione 44*0ff7076bSVincenzo Maffione #include <net/if.h> 45*0ff7076bSVincenzo Maffione #include <net/netmap.h> 46*0ff7076bSVincenzo Maffione #include <net/netmap_virt.h> 47*0ff7076bSVincenzo Maffione #define NETMAP_WITH_LIBS 48*0ff7076bSVincenzo Maffione #include <net/netmap_user.h> 49*0ff7076bSVincenzo Maffione 50*0ff7076bSVincenzo Maffione #ifndef WITHOUT_CAPSICUM 51*0ff7076bSVincenzo Maffione #include <capsicum_helpers.h> 52*0ff7076bSVincenzo Maffione #endif 53*0ff7076bSVincenzo Maffione #include <err.h> 54*0ff7076bSVincenzo Maffione #include <errno.h> 55*0ff7076bSVincenzo Maffione #include <fcntl.h> 56*0ff7076bSVincenzo Maffione #include <stdio.h> 57*0ff7076bSVincenzo Maffione #include <stdlib.h> 58*0ff7076bSVincenzo Maffione #include <stdint.h> 59*0ff7076bSVincenzo Maffione #include <string.h> 60*0ff7076bSVincenzo Maffione #include <unistd.h> 61*0ff7076bSVincenzo Maffione #include <sysexits.h> 62*0ff7076bSVincenzo Maffione #include <assert.h> 63*0ff7076bSVincenzo Maffione #include <pthread.h> 64*0ff7076bSVincenzo Maffione #include <pthread_np.h> 65*0ff7076bSVincenzo Maffione #include <poll.h> 66*0ff7076bSVincenzo Maffione #include <assert.h> 67*0ff7076bSVincenzo Maffione 68*0ff7076bSVincenzo Maffione 69*0ff7076bSVincenzo Maffione #include "iov.h" 70*0ff7076bSVincenzo Maffione #include "mevent.h" 71*0ff7076bSVincenzo Maffione #include "net_backends.h" 72*0ff7076bSVincenzo Maffione 73*0ff7076bSVincenzo Maffione #include <sys/linker_set.h> 74*0ff7076bSVincenzo Maffione 75*0ff7076bSVincenzo Maffione /* 76*0ff7076bSVincenzo Maffione * Each network backend registers a set of function pointers that are 77*0ff7076bSVincenzo Maffione * used to implement the net backends API. 78*0ff7076bSVincenzo Maffione * This might need to be exposed if we implement backends in separate files. 79*0ff7076bSVincenzo Maffione */ 80*0ff7076bSVincenzo Maffione struct net_backend { 81*0ff7076bSVincenzo Maffione const char *prefix; /* prefix matching this backend */ 82*0ff7076bSVincenzo Maffione 83*0ff7076bSVincenzo Maffione /* 84*0ff7076bSVincenzo Maffione * Routines used to initialize and cleanup the resources needed 85*0ff7076bSVincenzo Maffione * by a backend. The cleanup function is used internally, 86*0ff7076bSVincenzo Maffione * and should not be called by the frontend. 87*0ff7076bSVincenzo Maffione */ 88*0ff7076bSVincenzo Maffione int (*init)(struct net_backend *be, const char *devname, 89*0ff7076bSVincenzo Maffione net_be_rxeof_t cb, void *param); 90*0ff7076bSVincenzo Maffione void (*cleanup)(struct net_backend *be); 91*0ff7076bSVincenzo Maffione 92*0ff7076bSVincenzo Maffione /* 93*0ff7076bSVincenzo Maffione * Called to serve a guest transmit request. The scatter-gather 94*0ff7076bSVincenzo Maffione * vector provided by the caller has 'iovcnt' elements and contains 95*0ff7076bSVincenzo Maffione * the packet to send. 96*0ff7076bSVincenzo Maffione */ 97*0ff7076bSVincenzo Maffione ssize_t (*send)(struct net_backend *be, struct iovec *iov, int iovcnt); 98*0ff7076bSVincenzo Maffione 99*0ff7076bSVincenzo Maffione /* 100*0ff7076bSVincenzo Maffione * Called to receive a packet from the backend. When the function 101*0ff7076bSVincenzo Maffione * returns a positive value 'len', the scatter-gather vector 102*0ff7076bSVincenzo Maffione * provided by the caller contains a packet with such length. 103*0ff7076bSVincenzo Maffione * The function returns 0 if the backend doesn't have a new packet to 104*0ff7076bSVincenzo Maffione * receive. 105*0ff7076bSVincenzo Maffione */ 106*0ff7076bSVincenzo Maffione ssize_t (*recv)(struct net_backend *be, struct iovec *iov, int iovcnt); 107*0ff7076bSVincenzo Maffione 108*0ff7076bSVincenzo Maffione /* 109*0ff7076bSVincenzo Maffione * Ask the backend for the virtio-net features it is able to 110*0ff7076bSVincenzo Maffione * support. Possible features are TSO, UFO and checksum offloading 111*0ff7076bSVincenzo Maffione * in both rx and tx direction and for both IPv4 and IPv6. 112*0ff7076bSVincenzo Maffione */ 113*0ff7076bSVincenzo Maffione uint64_t (*get_cap)(struct net_backend *be); 114*0ff7076bSVincenzo Maffione 115*0ff7076bSVincenzo Maffione /* 116*0ff7076bSVincenzo Maffione * Tell the backend to enable/disable the specified virtio-net 117*0ff7076bSVincenzo Maffione * features (capabilities). 118*0ff7076bSVincenzo Maffione */ 119*0ff7076bSVincenzo Maffione int (*set_cap)(struct net_backend *be, uint64_t features, 120*0ff7076bSVincenzo Maffione unsigned int vnet_hdr_len); 121*0ff7076bSVincenzo Maffione 122*0ff7076bSVincenzo Maffione struct pci_vtnet_softc *sc; 123*0ff7076bSVincenzo Maffione int fd; 124*0ff7076bSVincenzo Maffione 125*0ff7076bSVincenzo Maffione /* 126*0ff7076bSVincenzo Maffione * Length of the virtio-net header used by the backend and the 127*0ff7076bSVincenzo Maffione * frontend, respectively. A zero value means that the header 128*0ff7076bSVincenzo Maffione * is not used. 129*0ff7076bSVincenzo Maffione */ 130*0ff7076bSVincenzo Maffione unsigned int be_vnet_hdr_len; 131*0ff7076bSVincenzo Maffione unsigned int fe_vnet_hdr_len; 132*0ff7076bSVincenzo Maffione 133*0ff7076bSVincenzo Maffione /* Size of backend-specific private data. */ 134*0ff7076bSVincenzo Maffione size_t priv_size; 135*0ff7076bSVincenzo Maffione 136*0ff7076bSVincenzo Maffione /* Room for backend-specific data. */ 137*0ff7076bSVincenzo Maffione char opaque[0]; 138*0ff7076bSVincenzo Maffione }; 139*0ff7076bSVincenzo Maffione 140*0ff7076bSVincenzo Maffione SET_DECLARE(net_backend_set, struct net_backend); 141*0ff7076bSVincenzo Maffione 142*0ff7076bSVincenzo Maffione #define VNET_HDR_LEN sizeof(struct virtio_net_rxhdr) 143*0ff7076bSVincenzo Maffione 144*0ff7076bSVincenzo Maffione #define WPRINTF(params) printf params 145*0ff7076bSVincenzo Maffione 146*0ff7076bSVincenzo Maffione /* 147*0ff7076bSVincenzo Maffione * The tap backend 148*0ff7076bSVincenzo Maffione */ 149*0ff7076bSVincenzo Maffione 150*0ff7076bSVincenzo Maffione struct tap_priv { 151*0ff7076bSVincenzo Maffione struct mevent *mevp; 152*0ff7076bSVincenzo Maffione }; 153*0ff7076bSVincenzo Maffione 154*0ff7076bSVincenzo Maffione static void 155*0ff7076bSVincenzo Maffione tap_cleanup(struct net_backend *be) 156*0ff7076bSVincenzo Maffione { 157*0ff7076bSVincenzo Maffione struct tap_priv *priv = (struct tap_priv *)be->opaque; 158*0ff7076bSVincenzo Maffione 159*0ff7076bSVincenzo Maffione if (priv->mevp) { 160*0ff7076bSVincenzo Maffione mevent_delete(priv->mevp); 161*0ff7076bSVincenzo Maffione } 162*0ff7076bSVincenzo Maffione if (be->fd != -1) { 163*0ff7076bSVincenzo Maffione close(be->fd); 164*0ff7076bSVincenzo Maffione be->fd = -1; 165*0ff7076bSVincenzo Maffione } 166*0ff7076bSVincenzo Maffione } 167*0ff7076bSVincenzo Maffione 168*0ff7076bSVincenzo Maffione static int 169*0ff7076bSVincenzo Maffione tap_init(struct net_backend *be, const char *devname, 170*0ff7076bSVincenzo Maffione net_be_rxeof_t cb, void *param) 171*0ff7076bSVincenzo Maffione { 172*0ff7076bSVincenzo Maffione struct tap_priv *priv = (struct tap_priv *)be->opaque; 173*0ff7076bSVincenzo Maffione char tbuf[80]; 174*0ff7076bSVincenzo Maffione int fd; 175*0ff7076bSVincenzo Maffione int opt = 1; 176*0ff7076bSVincenzo Maffione #ifndef WITHOUT_CAPSICUM 177*0ff7076bSVincenzo Maffione cap_rights_t rights; 178*0ff7076bSVincenzo Maffione #endif 179*0ff7076bSVincenzo Maffione 180*0ff7076bSVincenzo Maffione if (cb == NULL) { 181*0ff7076bSVincenzo Maffione WPRINTF(("TAP backend requires non-NULL callback\n")); 182*0ff7076bSVincenzo Maffione return (-1); 183*0ff7076bSVincenzo Maffione } 184*0ff7076bSVincenzo Maffione 185*0ff7076bSVincenzo Maffione strcpy(tbuf, "/dev/"); 186*0ff7076bSVincenzo Maffione strlcat(tbuf, devname, sizeof(tbuf)); 187*0ff7076bSVincenzo Maffione 188*0ff7076bSVincenzo Maffione fd = open(tbuf, O_RDWR); 189*0ff7076bSVincenzo Maffione if (fd == -1) { 190*0ff7076bSVincenzo Maffione WPRINTF(("open of tap device %s failed\n", tbuf)); 191*0ff7076bSVincenzo Maffione goto error; 192*0ff7076bSVincenzo Maffione } 193*0ff7076bSVincenzo Maffione 194*0ff7076bSVincenzo Maffione /* 195*0ff7076bSVincenzo Maffione * Set non-blocking and register for read 196*0ff7076bSVincenzo Maffione * notifications with the event loop 197*0ff7076bSVincenzo Maffione */ 198*0ff7076bSVincenzo Maffione if (ioctl(fd, FIONBIO, &opt) < 0) { 199*0ff7076bSVincenzo Maffione WPRINTF(("tap device O_NONBLOCK failed\n")); 200*0ff7076bSVincenzo Maffione goto error; 201*0ff7076bSVincenzo Maffione } 202*0ff7076bSVincenzo Maffione 203*0ff7076bSVincenzo Maffione #ifndef WITHOUT_CAPSICUM 204*0ff7076bSVincenzo Maffione cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); 205*0ff7076bSVincenzo Maffione if (caph_rights_limit(fd, &rights) == -1) 206*0ff7076bSVincenzo Maffione errx(EX_OSERR, "Unable to apply rights for sandbox"); 207*0ff7076bSVincenzo Maffione #endif 208*0ff7076bSVincenzo Maffione 209*0ff7076bSVincenzo Maffione priv->mevp = mevent_add(fd, EVF_READ, cb, param); 210*0ff7076bSVincenzo Maffione if (priv->mevp == NULL) { 211*0ff7076bSVincenzo Maffione WPRINTF(("Could not register event\n")); 212*0ff7076bSVincenzo Maffione goto error; 213*0ff7076bSVincenzo Maffione } 214*0ff7076bSVincenzo Maffione 215*0ff7076bSVincenzo Maffione be->fd = fd; 216*0ff7076bSVincenzo Maffione 217*0ff7076bSVincenzo Maffione return (0); 218*0ff7076bSVincenzo Maffione 219*0ff7076bSVincenzo Maffione error: 220*0ff7076bSVincenzo Maffione tap_cleanup(be); 221*0ff7076bSVincenzo Maffione return (-1); 222*0ff7076bSVincenzo Maffione } 223*0ff7076bSVincenzo Maffione 224*0ff7076bSVincenzo Maffione /* 225*0ff7076bSVincenzo Maffione * Called to send a buffer chain out to the tap device 226*0ff7076bSVincenzo Maffione */ 227*0ff7076bSVincenzo Maffione static ssize_t 228*0ff7076bSVincenzo Maffione tap_send(struct net_backend *be, struct iovec *iov, int iovcnt) 229*0ff7076bSVincenzo Maffione { 230*0ff7076bSVincenzo Maffione return (writev(be->fd, iov, iovcnt)); 231*0ff7076bSVincenzo Maffione } 232*0ff7076bSVincenzo Maffione 233*0ff7076bSVincenzo Maffione static ssize_t 234*0ff7076bSVincenzo Maffione tap_recv(struct net_backend *be, struct iovec *iov, int iovcnt) 235*0ff7076bSVincenzo Maffione { 236*0ff7076bSVincenzo Maffione ssize_t ret; 237*0ff7076bSVincenzo Maffione 238*0ff7076bSVincenzo Maffione /* Should never be called without a valid tap fd */ 239*0ff7076bSVincenzo Maffione assert(be->fd != -1); 240*0ff7076bSVincenzo Maffione 241*0ff7076bSVincenzo Maffione ret = readv(be->fd, iov, iovcnt); 242*0ff7076bSVincenzo Maffione 243*0ff7076bSVincenzo Maffione if (ret < 0 && errno == EWOULDBLOCK) { 244*0ff7076bSVincenzo Maffione return (0); 245*0ff7076bSVincenzo Maffione } 246*0ff7076bSVincenzo Maffione 247*0ff7076bSVincenzo Maffione return (ret); 248*0ff7076bSVincenzo Maffione } 249*0ff7076bSVincenzo Maffione 250*0ff7076bSVincenzo Maffione static uint64_t 251*0ff7076bSVincenzo Maffione tap_get_cap(struct net_backend *be) 252*0ff7076bSVincenzo Maffione { 253*0ff7076bSVincenzo Maffione 254*0ff7076bSVincenzo Maffione return (0); /* no capabilities for now */ 255*0ff7076bSVincenzo Maffione } 256*0ff7076bSVincenzo Maffione 257*0ff7076bSVincenzo Maffione static int 258*0ff7076bSVincenzo Maffione tap_set_cap(struct net_backend *be, uint64_t features, 259*0ff7076bSVincenzo Maffione unsigned vnet_hdr_len) 260*0ff7076bSVincenzo Maffione { 261*0ff7076bSVincenzo Maffione 262*0ff7076bSVincenzo Maffione return ((features || vnet_hdr_len) ? -1 : 0); 263*0ff7076bSVincenzo Maffione } 264*0ff7076bSVincenzo Maffione 265*0ff7076bSVincenzo Maffione static struct net_backend tap_backend = { 266*0ff7076bSVincenzo Maffione .prefix = "tap", 267*0ff7076bSVincenzo Maffione .priv_size = sizeof(struct tap_priv), 268*0ff7076bSVincenzo Maffione .init = tap_init, 269*0ff7076bSVincenzo Maffione .cleanup = tap_cleanup, 270*0ff7076bSVincenzo Maffione .send = tap_send, 271*0ff7076bSVincenzo Maffione .recv = tap_recv, 272*0ff7076bSVincenzo Maffione .get_cap = tap_get_cap, 273*0ff7076bSVincenzo Maffione .set_cap = tap_set_cap, 274*0ff7076bSVincenzo Maffione }; 275*0ff7076bSVincenzo Maffione 276*0ff7076bSVincenzo Maffione /* A clone of the tap backend, with a different prefix. */ 277*0ff7076bSVincenzo Maffione static struct net_backend vmnet_backend = { 278*0ff7076bSVincenzo Maffione .prefix = "vmnet", 279*0ff7076bSVincenzo Maffione .priv_size = sizeof(struct tap_priv), 280*0ff7076bSVincenzo Maffione .init = tap_init, 281*0ff7076bSVincenzo Maffione .cleanup = tap_cleanup, 282*0ff7076bSVincenzo Maffione .send = tap_send, 283*0ff7076bSVincenzo Maffione .recv = tap_recv, 284*0ff7076bSVincenzo Maffione .get_cap = tap_get_cap, 285*0ff7076bSVincenzo Maffione .set_cap = tap_set_cap, 286*0ff7076bSVincenzo Maffione }; 287*0ff7076bSVincenzo Maffione 288*0ff7076bSVincenzo Maffione DATA_SET(net_backend_set, tap_backend); 289*0ff7076bSVincenzo Maffione DATA_SET(net_backend_set, vmnet_backend); 290*0ff7076bSVincenzo Maffione 291*0ff7076bSVincenzo Maffione /* 292*0ff7076bSVincenzo Maffione * The netmap backend 293*0ff7076bSVincenzo Maffione */ 294*0ff7076bSVincenzo Maffione 295*0ff7076bSVincenzo Maffione /* The virtio-net features supported by netmap. */ 296*0ff7076bSVincenzo Maffione #define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \ 297*0ff7076bSVincenzo Maffione VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \ 298*0ff7076bSVincenzo Maffione VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \ 299*0ff7076bSVincenzo Maffione VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO) 300*0ff7076bSVincenzo Maffione 301*0ff7076bSVincenzo Maffione struct netmap_priv { 302*0ff7076bSVincenzo Maffione char ifname[IFNAMSIZ]; 303*0ff7076bSVincenzo Maffione struct nm_desc *nmd; 304*0ff7076bSVincenzo Maffione uint16_t memid; 305*0ff7076bSVincenzo Maffione struct netmap_ring *rx; 306*0ff7076bSVincenzo Maffione struct netmap_ring *tx; 307*0ff7076bSVincenzo Maffione struct mevent *mevp; 308*0ff7076bSVincenzo Maffione net_be_rxeof_t cb; 309*0ff7076bSVincenzo Maffione void *cb_param; 310*0ff7076bSVincenzo Maffione }; 311*0ff7076bSVincenzo Maffione 312*0ff7076bSVincenzo Maffione static void 313*0ff7076bSVincenzo Maffione nmreq_init(struct nmreq *req, char *ifname) 314*0ff7076bSVincenzo Maffione { 315*0ff7076bSVincenzo Maffione 316*0ff7076bSVincenzo Maffione memset(req, 0, sizeof(*req)); 317*0ff7076bSVincenzo Maffione strlcpy(req->nr_name, ifname, sizeof(req->nr_name)); 318*0ff7076bSVincenzo Maffione req->nr_version = NETMAP_API; 319*0ff7076bSVincenzo Maffione } 320*0ff7076bSVincenzo Maffione 321*0ff7076bSVincenzo Maffione static int 322*0ff7076bSVincenzo Maffione netmap_set_vnet_hdr_len(struct net_backend *be, int vnet_hdr_len) 323*0ff7076bSVincenzo Maffione { 324*0ff7076bSVincenzo Maffione int err; 325*0ff7076bSVincenzo Maffione struct nmreq req; 326*0ff7076bSVincenzo Maffione struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 327*0ff7076bSVincenzo Maffione 328*0ff7076bSVincenzo Maffione nmreq_init(&req, priv->ifname); 329*0ff7076bSVincenzo Maffione req.nr_cmd = NETMAP_BDG_VNET_HDR; 330*0ff7076bSVincenzo Maffione req.nr_arg1 = vnet_hdr_len; 331*0ff7076bSVincenzo Maffione err = ioctl(be->fd, NIOCREGIF, &req); 332*0ff7076bSVincenzo Maffione if (err) { 333*0ff7076bSVincenzo Maffione WPRINTF(("Unable to set vnet header length %d\n", 334*0ff7076bSVincenzo Maffione vnet_hdr_len)); 335*0ff7076bSVincenzo Maffione return (err); 336*0ff7076bSVincenzo Maffione } 337*0ff7076bSVincenzo Maffione 338*0ff7076bSVincenzo Maffione be->be_vnet_hdr_len = vnet_hdr_len; 339*0ff7076bSVincenzo Maffione 340*0ff7076bSVincenzo Maffione return (0); 341*0ff7076bSVincenzo Maffione } 342*0ff7076bSVincenzo Maffione 343*0ff7076bSVincenzo Maffione static int 344*0ff7076bSVincenzo Maffione netmap_has_vnet_hdr_len(struct net_backend *be, unsigned vnet_hdr_len) 345*0ff7076bSVincenzo Maffione { 346*0ff7076bSVincenzo Maffione int prev_hdr_len = be->be_vnet_hdr_len; 347*0ff7076bSVincenzo Maffione int ret; 348*0ff7076bSVincenzo Maffione 349*0ff7076bSVincenzo Maffione if (vnet_hdr_len == prev_hdr_len) { 350*0ff7076bSVincenzo Maffione return (1); 351*0ff7076bSVincenzo Maffione } 352*0ff7076bSVincenzo Maffione 353*0ff7076bSVincenzo Maffione ret = netmap_set_vnet_hdr_len(be, vnet_hdr_len); 354*0ff7076bSVincenzo Maffione if (ret) { 355*0ff7076bSVincenzo Maffione return (0); 356*0ff7076bSVincenzo Maffione } 357*0ff7076bSVincenzo Maffione 358*0ff7076bSVincenzo Maffione netmap_set_vnet_hdr_len(be, prev_hdr_len); 359*0ff7076bSVincenzo Maffione 360*0ff7076bSVincenzo Maffione return (1); 361*0ff7076bSVincenzo Maffione } 362*0ff7076bSVincenzo Maffione 363*0ff7076bSVincenzo Maffione static uint64_t 364*0ff7076bSVincenzo Maffione netmap_get_cap(struct net_backend *be) 365*0ff7076bSVincenzo Maffione { 366*0ff7076bSVincenzo Maffione 367*0ff7076bSVincenzo Maffione return (netmap_has_vnet_hdr_len(be, VNET_HDR_LEN) ? 368*0ff7076bSVincenzo Maffione NETMAP_FEATURES : 0); 369*0ff7076bSVincenzo Maffione } 370*0ff7076bSVincenzo Maffione 371*0ff7076bSVincenzo Maffione static int 372*0ff7076bSVincenzo Maffione netmap_set_cap(struct net_backend *be, uint64_t features, 373*0ff7076bSVincenzo Maffione unsigned vnet_hdr_len) 374*0ff7076bSVincenzo Maffione { 375*0ff7076bSVincenzo Maffione 376*0ff7076bSVincenzo Maffione return (netmap_set_vnet_hdr_len(be, vnet_hdr_len)); 377*0ff7076bSVincenzo Maffione } 378*0ff7076bSVincenzo Maffione 379*0ff7076bSVincenzo Maffione static int 380*0ff7076bSVincenzo Maffione netmap_init(struct net_backend *be, const char *devname, 381*0ff7076bSVincenzo Maffione net_be_rxeof_t cb, void *param) 382*0ff7076bSVincenzo Maffione { 383*0ff7076bSVincenzo Maffione struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 384*0ff7076bSVincenzo Maffione 385*0ff7076bSVincenzo Maffione strlcpy(priv->ifname, devname, sizeof(priv->ifname)); 386*0ff7076bSVincenzo Maffione priv->ifname[sizeof(priv->ifname) - 1] = '\0'; 387*0ff7076bSVincenzo Maffione 388*0ff7076bSVincenzo Maffione priv->nmd = nm_open(priv->ifname, NULL, NETMAP_NO_TX_POLL, NULL); 389*0ff7076bSVincenzo Maffione if (priv->nmd == NULL) { 390*0ff7076bSVincenzo Maffione WPRINTF(("Unable to nm_open(): interface '%s', errno (%s)\n", 391*0ff7076bSVincenzo Maffione devname, strerror(errno))); 392*0ff7076bSVincenzo Maffione free(priv); 393*0ff7076bSVincenzo Maffione return (-1); 394*0ff7076bSVincenzo Maffione } 395*0ff7076bSVincenzo Maffione 396*0ff7076bSVincenzo Maffione priv->memid = priv->nmd->req.nr_arg2; 397*0ff7076bSVincenzo Maffione priv->tx = NETMAP_TXRING(priv->nmd->nifp, 0); 398*0ff7076bSVincenzo Maffione priv->rx = NETMAP_RXRING(priv->nmd->nifp, 0); 399*0ff7076bSVincenzo Maffione priv->cb = cb; 400*0ff7076bSVincenzo Maffione priv->cb_param = param; 401*0ff7076bSVincenzo Maffione be->fd = priv->nmd->fd; 402*0ff7076bSVincenzo Maffione 403*0ff7076bSVincenzo Maffione priv->mevp = mevent_add(be->fd, EVF_READ, cb, param); 404*0ff7076bSVincenzo Maffione if (priv->mevp == NULL) { 405*0ff7076bSVincenzo Maffione WPRINTF(("Could not register event\n")); 406*0ff7076bSVincenzo Maffione return (-1); 407*0ff7076bSVincenzo Maffione } 408*0ff7076bSVincenzo Maffione 409*0ff7076bSVincenzo Maffione return (0); 410*0ff7076bSVincenzo Maffione } 411*0ff7076bSVincenzo Maffione 412*0ff7076bSVincenzo Maffione static void 413*0ff7076bSVincenzo Maffione netmap_cleanup(struct net_backend *be) 414*0ff7076bSVincenzo Maffione { 415*0ff7076bSVincenzo Maffione struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 416*0ff7076bSVincenzo Maffione 417*0ff7076bSVincenzo Maffione if (priv->mevp) { 418*0ff7076bSVincenzo Maffione mevent_delete(priv->mevp); 419*0ff7076bSVincenzo Maffione } 420*0ff7076bSVincenzo Maffione if (priv->nmd) { 421*0ff7076bSVincenzo Maffione nm_close(priv->nmd); 422*0ff7076bSVincenzo Maffione } 423*0ff7076bSVincenzo Maffione be->fd = -1; 424*0ff7076bSVincenzo Maffione } 425*0ff7076bSVincenzo Maffione 426*0ff7076bSVincenzo Maffione static ssize_t 427*0ff7076bSVincenzo Maffione netmap_send(struct net_backend *be, struct iovec *iov, 428*0ff7076bSVincenzo Maffione int iovcnt) 429*0ff7076bSVincenzo Maffione { 430*0ff7076bSVincenzo Maffione struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 431*0ff7076bSVincenzo Maffione struct netmap_ring *ring; 432*0ff7076bSVincenzo Maffione ssize_t totlen = 0; 433*0ff7076bSVincenzo Maffione int nm_buf_size; 434*0ff7076bSVincenzo Maffione int nm_buf_len; 435*0ff7076bSVincenzo Maffione uint32_t head; 436*0ff7076bSVincenzo Maffione void *nm_buf; 437*0ff7076bSVincenzo Maffione int j; 438*0ff7076bSVincenzo Maffione 439*0ff7076bSVincenzo Maffione ring = priv->tx; 440*0ff7076bSVincenzo Maffione head = ring->head; 441*0ff7076bSVincenzo Maffione if (head == ring->tail) { 442*0ff7076bSVincenzo Maffione WPRINTF(("No space, drop %zu bytes\n", count_iov(iov, iovcnt))); 443*0ff7076bSVincenzo Maffione goto txsync; 444*0ff7076bSVincenzo Maffione } 445*0ff7076bSVincenzo Maffione nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx); 446*0ff7076bSVincenzo Maffione nm_buf_size = ring->nr_buf_size; 447*0ff7076bSVincenzo Maffione nm_buf_len = 0; 448*0ff7076bSVincenzo Maffione 449*0ff7076bSVincenzo Maffione for (j = 0; j < iovcnt; j++) { 450*0ff7076bSVincenzo Maffione int iov_frag_size = iov[j].iov_len; 451*0ff7076bSVincenzo Maffione void *iov_frag_buf = iov[j].iov_base; 452*0ff7076bSVincenzo Maffione 453*0ff7076bSVincenzo Maffione totlen += iov_frag_size; 454*0ff7076bSVincenzo Maffione 455*0ff7076bSVincenzo Maffione /* 456*0ff7076bSVincenzo Maffione * Split each iovec fragment over more netmap slots, if 457*0ff7076bSVincenzo Maffione * necessary. 458*0ff7076bSVincenzo Maffione */ 459*0ff7076bSVincenzo Maffione for (;;) { 460*0ff7076bSVincenzo Maffione int copylen; 461*0ff7076bSVincenzo Maffione 462*0ff7076bSVincenzo Maffione copylen = iov_frag_size < nm_buf_size ? iov_frag_size : nm_buf_size; 463*0ff7076bSVincenzo Maffione memcpy(nm_buf, iov_frag_buf, copylen); 464*0ff7076bSVincenzo Maffione 465*0ff7076bSVincenzo Maffione iov_frag_buf += copylen; 466*0ff7076bSVincenzo Maffione iov_frag_size -= copylen; 467*0ff7076bSVincenzo Maffione nm_buf += copylen; 468*0ff7076bSVincenzo Maffione nm_buf_size -= copylen; 469*0ff7076bSVincenzo Maffione nm_buf_len += copylen; 470*0ff7076bSVincenzo Maffione 471*0ff7076bSVincenzo Maffione if (iov_frag_size == 0) { 472*0ff7076bSVincenzo Maffione break; 473*0ff7076bSVincenzo Maffione } 474*0ff7076bSVincenzo Maffione 475*0ff7076bSVincenzo Maffione ring->slot[head].len = nm_buf_len; 476*0ff7076bSVincenzo Maffione ring->slot[head].flags = NS_MOREFRAG; 477*0ff7076bSVincenzo Maffione head = nm_ring_next(ring, head); 478*0ff7076bSVincenzo Maffione if (head == ring->tail) { 479*0ff7076bSVincenzo Maffione /* 480*0ff7076bSVincenzo Maffione * We ran out of netmap slots while 481*0ff7076bSVincenzo Maffione * splitting the iovec fragments. 482*0ff7076bSVincenzo Maffione */ 483*0ff7076bSVincenzo Maffione WPRINTF(("No space, drop %zu bytes\n", 484*0ff7076bSVincenzo Maffione count_iov(iov, iovcnt))); 485*0ff7076bSVincenzo Maffione goto txsync; 486*0ff7076bSVincenzo Maffione } 487*0ff7076bSVincenzo Maffione nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx); 488*0ff7076bSVincenzo Maffione nm_buf_size = ring->nr_buf_size; 489*0ff7076bSVincenzo Maffione nm_buf_len = 0; 490*0ff7076bSVincenzo Maffione } 491*0ff7076bSVincenzo Maffione } 492*0ff7076bSVincenzo Maffione 493*0ff7076bSVincenzo Maffione /* Complete the last slot, which must not have NS_MOREFRAG set. */ 494*0ff7076bSVincenzo Maffione ring->slot[head].len = nm_buf_len; 495*0ff7076bSVincenzo Maffione ring->slot[head].flags = 0; 496*0ff7076bSVincenzo Maffione head = nm_ring_next(ring, head); 497*0ff7076bSVincenzo Maffione 498*0ff7076bSVincenzo Maffione /* Now update ring->head and ring->cur. */ 499*0ff7076bSVincenzo Maffione ring->head = ring->cur = head; 500*0ff7076bSVincenzo Maffione txsync: 501*0ff7076bSVincenzo Maffione ioctl(be->fd, NIOCTXSYNC, NULL); 502*0ff7076bSVincenzo Maffione 503*0ff7076bSVincenzo Maffione return (totlen); 504*0ff7076bSVincenzo Maffione } 505*0ff7076bSVincenzo Maffione 506*0ff7076bSVincenzo Maffione static ssize_t 507*0ff7076bSVincenzo Maffione netmap_recv(struct net_backend *be, struct iovec *iov, int iovcnt) 508*0ff7076bSVincenzo Maffione { 509*0ff7076bSVincenzo Maffione struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 510*0ff7076bSVincenzo Maffione struct netmap_slot *slot = NULL; 511*0ff7076bSVincenzo Maffione struct netmap_ring *ring; 512*0ff7076bSVincenzo Maffione void *iov_frag_buf; 513*0ff7076bSVincenzo Maffione int iov_frag_size; 514*0ff7076bSVincenzo Maffione ssize_t totlen = 0; 515*0ff7076bSVincenzo Maffione uint32_t head; 516*0ff7076bSVincenzo Maffione 517*0ff7076bSVincenzo Maffione assert(iovcnt); 518*0ff7076bSVincenzo Maffione 519*0ff7076bSVincenzo Maffione ring = priv->rx; 520*0ff7076bSVincenzo Maffione head = ring->head; 521*0ff7076bSVincenzo Maffione iov_frag_buf = iov->iov_base; 522*0ff7076bSVincenzo Maffione iov_frag_size = iov->iov_len; 523*0ff7076bSVincenzo Maffione 524*0ff7076bSVincenzo Maffione do { 525*0ff7076bSVincenzo Maffione int nm_buf_len; 526*0ff7076bSVincenzo Maffione void *nm_buf; 527*0ff7076bSVincenzo Maffione 528*0ff7076bSVincenzo Maffione if (head == ring->tail) { 529*0ff7076bSVincenzo Maffione return (0); 530*0ff7076bSVincenzo Maffione } 531*0ff7076bSVincenzo Maffione 532*0ff7076bSVincenzo Maffione slot = ring->slot + head; 533*0ff7076bSVincenzo Maffione nm_buf = NETMAP_BUF(ring, slot->buf_idx); 534*0ff7076bSVincenzo Maffione nm_buf_len = slot->len; 535*0ff7076bSVincenzo Maffione 536*0ff7076bSVincenzo Maffione for (;;) { 537*0ff7076bSVincenzo Maffione int copylen = nm_buf_len < iov_frag_size ? 538*0ff7076bSVincenzo Maffione nm_buf_len : iov_frag_size; 539*0ff7076bSVincenzo Maffione 540*0ff7076bSVincenzo Maffione memcpy(iov_frag_buf, nm_buf, copylen); 541*0ff7076bSVincenzo Maffione nm_buf += copylen; 542*0ff7076bSVincenzo Maffione nm_buf_len -= copylen; 543*0ff7076bSVincenzo Maffione iov_frag_buf += copylen; 544*0ff7076bSVincenzo Maffione iov_frag_size -= copylen; 545*0ff7076bSVincenzo Maffione totlen += copylen; 546*0ff7076bSVincenzo Maffione 547*0ff7076bSVincenzo Maffione if (nm_buf_len == 0) { 548*0ff7076bSVincenzo Maffione break; 549*0ff7076bSVincenzo Maffione } 550*0ff7076bSVincenzo Maffione 551*0ff7076bSVincenzo Maffione iov++; 552*0ff7076bSVincenzo Maffione iovcnt--; 553*0ff7076bSVincenzo Maffione if (iovcnt == 0) { 554*0ff7076bSVincenzo Maffione /* No space to receive. */ 555*0ff7076bSVincenzo Maffione WPRINTF(("Short iov, drop %zd bytes\n", 556*0ff7076bSVincenzo Maffione totlen)); 557*0ff7076bSVincenzo Maffione return (-ENOSPC); 558*0ff7076bSVincenzo Maffione } 559*0ff7076bSVincenzo Maffione iov_frag_buf = iov->iov_base; 560*0ff7076bSVincenzo Maffione iov_frag_size = iov->iov_len; 561*0ff7076bSVincenzo Maffione } 562*0ff7076bSVincenzo Maffione 563*0ff7076bSVincenzo Maffione head = nm_ring_next(ring, head); 564*0ff7076bSVincenzo Maffione 565*0ff7076bSVincenzo Maffione } while (slot->flags & NS_MOREFRAG); 566*0ff7076bSVincenzo Maffione 567*0ff7076bSVincenzo Maffione /* Release slots to netmap. */ 568*0ff7076bSVincenzo Maffione ring->head = ring->cur = head; 569*0ff7076bSVincenzo Maffione 570*0ff7076bSVincenzo Maffione return (totlen); 571*0ff7076bSVincenzo Maffione } 572*0ff7076bSVincenzo Maffione 573*0ff7076bSVincenzo Maffione static struct net_backend netmap_backend = { 574*0ff7076bSVincenzo Maffione .prefix = "netmap", 575*0ff7076bSVincenzo Maffione .priv_size = sizeof(struct netmap_priv), 576*0ff7076bSVincenzo Maffione .init = netmap_init, 577*0ff7076bSVincenzo Maffione .cleanup = netmap_cleanup, 578*0ff7076bSVincenzo Maffione .send = netmap_send, 579*0ff7076bSVincenzo Maffione .recv = netmap_recv, 580*0ff7076bSVincenzo Maffione .get_cap = netmap_get_cap, 581*0ff7076bSVincenzo Maffione .set_cap = netmap_set_cap, 582*0ff7076bSVincenzo Maffione }; 583*0ff7076bSVincenzo Maffione 584*0ff7076bSVincenzo Maffione /* A clone of the netmap backend, with a different prefix. */ 585*0ff7076bSVincenzo Maffione static struct net_backend vale_backend = { 586*0ff7076bSVincenzo Maffione .prefix = "vale", 587*0ff7076bSVincenzo Maffione .priv_size = sizeof(struct netmap_priv), 588*0ff7076bSVincenzo Maffione .init = netmap_init, 589*0ff7076bSVincenzo Maffione .cleanup = netmap_cleanup, 590*0ff7076bSVincenzo Maffione .send = netmap_send, 591*0ff7076bSVincenzo Maffione .recv = netmap_recv, 592*0ff7076bSVincenzo Maffione .get_cap = netmap_get_cap, 593*0ff7076bSVincenzo Maffione .set_cap = netmap_set_cap, 594*0ff7076bSVincenzo Maffione }; 595*0ff7076bSVincenzo Maffione 596*0ff7076bSVincenzo Maffione DATA_SET(net_backend_set, netmap_backend); 597*0ff7076bSVincenzo Maffione DATA_SET(net_backend_set, vale_backend); 598*0ff7076bSVincenzo Maffione 599*0ff7076bSVincenzo Maffione /* 600*0ff7076bSVincenzo Maffione * Initialize a backend and attach to the frontend. 601*0ff7076bSVincenzo Maffione * This is called during frontend initialization. 602*0ff7076bSVincenzo Maffione * @pbe is a pointer to the backend to be initialized 603*0ff7076bSVincenzo Maffione * @devname is the backend-name as supplied on the command line, 604*0ff7076bSVincenzo Maffione * e.g. -s 2:0,frontend-name,backend-name[,other-args] 605*0ff7076bSVincenzo Maffione * @cb is the receive callback supplied by the frontend, 606*0ff7076bSVincenzo Maffione * and it is invoked in the event loop when a receive 607*0ff7076bSVincenzo Maffione * event is generated in the hypervisor, 608*0ff7076bSVincenzo Maffione * @param is a pointer to the frontend, and normally used as 609*0ff7076bSVincenzo Maffione * the argument for the callback. 610*0ff7076bSVincenzo Maffione */ 611*0ff7076bSVincenzo Maffione int 612*0ff7076bSVincenzo Maffione netbe_init(struct net_backend **ret, const char *devname, net_be_rxeof_t cb, 613*0ff7076bSVincenzo Maffione void *param) 614*0ff7076bSVincenzo Maffione { 615*0ff7076bSVincenzo Maffione struct net_backend **pbe, *nbe, *tbe = NULL; 616*0ff7076bSVincenzo Maffione int err; 617*0ff7076bSVincenzo Maffione 618*0ff7076bSVincenzo Maffione /* 619*0ff7076bSVincenzo Maffione * Find the network backend that matches the user-provided 620*0ff7076bSVincenzo Maffione * device name. net_backend_set is built using a linker set. 621*0ff7076bSVincenzo Maffione */ 622*0ff7076bSVincenzo Maffione SET_FOREACH(pbe, net_backend_set) { 623*0ff7076bSVincenzo Maffione if (strncmp(devname, (*pbe)->prefix, 624*0ff7076bSVincenzo Maffione strlen((*pbe)->prefix)) == 0) { 625*0ff7076bSVincenzo Maffione tbe = *pbe; 626*0ff7076bSVincenzo Maffione assert(tbe->init != NULL); 627*0ff7076bSVincenzo Maffione assert(tbe->cleanup != NULL); 628*0ff7076bSVincenzo Maffione assert(tbe->send != NULL); 629*0ff7076bSVincenzo Maffione assert(tbe->recv != NULL); 630*0ff7076bSVincenzo Maffione assert(tbe->get_cap != NULL); 631*0ff7076bSVincenzo Maffione assert(tbe->set_cap != NULL); 632*0ff7076bSVincenzo Maffione break; 633*0ff7076bSVincenzo Maffione } 634*0ff7076bSVincenzo Maffione } 635*0ff7076bSVincenzo Maffione 636*0ff7076bSVincenzo Maffione *ret = NULL; 637*0ff7076bSVincenzo Maffione if (tbe == NULL) 638*0ff7076bSVincenzo Maffione return (EINVAL); 639*0ff7076bSVincenzo Maffione nbe = calloc(1, sizeof(*nbe) + tbe->priv_size); 640*0ff7076bSVincenzo Maffione *nbe = *tbe; /* copy the template */ 641*0ff7076bSVincenzo Maffione nbe->fd = -1; 642*0ff7076bSVincenzo Maffione nbe->sc = param; 643*0ff7076bSVincenzo Maffione nbe->be_vnet_hdr_len = 0; 644*0ff7076bSVincenzo Maffione nbe->fe_vnet_hdr_len = 0; 645*0ff7076bSVincenzo Maffione 646*0ff7076bSVincenzo Maffione /* Initialize the backend. */ 647*0ff7076bSVincenzo Maffione err = nbe->init(nbe, devname, cb, param); 648*0ff7076bSVincenzo Maffione if (err) { 649*0ff7076bSVincenzo Maffione free(nbe); 650*0ff7076bSVincenzo Maffione return (err); 651*0ff7076bSVincenzo Maffione } 652*0ff7076bSVincenzo Maffione 653*0ff7076bSVincenzo Maffione *ret = nbe; 654*0ff7076bSVincenzo Maffione 655*0ff7076bSVincenzo Maffione return (0); 656*0ff7076bSVincenzo Maffione } 657*0ff7076bSVincenzo Maffione 658*0ff7076bSVincenzo Maffione void 659*0ff7076bSVincenzo Maffione netbe_cleanup(struct net_backend *be) 660*0ff7076bSVincenzo Maffione { 661*0ff7076bSVincenzo Maffione 662*0ff7076bSVincenzo Maffione if (be != NULL) { 663*0ff7076bSVincenzo Maffione be->cleanup(be); 664*0ff7076bSVincenzo Maffione free(be); 665*0ff7076bSVincenzo Maffione } 666*0ff7076bSVincenzo Maffione } 667*0ff7076bSVincenzo Maffione 668*0ff7076bSVincenzo Maffione uint64_t 669*0ff7076bSVincenzo Maffione netbe_get_cap(struct net_backend *be) 670*0ff7076bSVincenzo Maffione { 671*0ff7076bSVincenzo Maffione 672*0ff7076bSVincenzo Maffione assert(be != NULL); 673*0ff7076bSVincenzo Maffione return (be->get_cap(be)); 674*0ff7076bSVincenzo Maffione } 675*0ff7076bSVincenzo Maffione 676*0ff7076bSVincenzo Maffione int 677*0ff7076bSVincenzo Maffione netbe_set_cap(struct net_backend *be, uint64_t features, 678*0ff7076bSVincenzo Maffione unsigned vnet_hdr_len) 679*0ff7076bSVincenzo Maffione { 680*0ff7076bSVincenzo Maffione int ret; 681*0ff7076bSVincenzo Maffione 682*0ff7076bSVincenzo Maffione assert(be != NULL); 683*0ff7076bSVincenzo Maffione 684*0ff7076bSVincenzo Maffione /* There are only three valid lengths, i.e., 0, 10 and 12. */ 685*0ff7076bSVincenzo Maffione if (vnet_hdr_len && vnet_hdr_len != VNET_HDR_LEN 686*0ff7076bSVincenzo Maffione && vnet_hdr_len != (VNET_HDR_LEN - sizeof(uint16_t))) 687*0ff7076bSVincenzo Maffione return (-1); 688*0ff7076bSVincenzo Maffione 689*0ff7076bSVincenzo Maffione be->fe_vnet_hdr_len = vnet_hdr_len; 690*0ff7076bSVincenzo Maffione 691*0ff7076bSVincenzo Maffione ret = be->set_cap(be, features, vnet_hdr_len); 692*0ff7076bSVincenzo Maffione assert(be->be_vnet_hdr_len == 0 || 693*0ff7076bSVincenzo Maffione be->be_vnet_hdr_len == be->fe_vnet_hdr_len); 694*0ff7076bSVincenzo Maffione 695*0ff7076bSVincenzo Maffione return (ret); 696*0ff7076bSVincenzo Maffione } 697*0ff7076bSVincenzo Maffione 698*0ff7076bSVincenzo Maffione static __inline struct iovec * 699*0ff7076bSVincenzo Maffione iov_trim(struct iovec *iov, int *iovcnt, unsigned int tlen) 700*0ff7076bSVincenzo Maffione { 701*0ff7076bSVincenzo Maffione struct iovec *riov; 702*0ff7076bSVincenzo Maffione 703*0ff7076bSVincenzo Maffione /* XXX short-cut: assume first segment is >= tlen */ 704*0ff7076bSVincenzo Maffione assert(iov[0].iov_len >= tlen); 705*0ff7076bSVincenzo Maffione 706*0ff7076bSVincenzo Maffione iov[0].iov_len -= tlen; 707*0ff7076bSVincenzo Maffione if (iov[0].iov_len == 0) { 708*0ff7076bSVincenzo Maffione assert(*iovcnt > 1); 709*0ff7076bSVincenzo Maffione *iovcnt -= 1; 710*0ff7076bSVincenzo Maffione riov = &iov[1]; 711*0ff7076bSVincenzo Maffione } else { 712*0ff7076bSVincenzo Maffione iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + tlen); 713*0ff7076bSVincenzo Maffione riov = &iov[0]; 714*0ff7076bSVincenzo Maffione } 715*0ff7076bSVincenzo Maffione 716*0ff7076bSVincenzo Maffione return (riov); 717*0ff7076bSVincenzo Maffione } 718*0ff7076bSVincenzo Maffione 719*0ff7076bSVincenzo Maffione ssize_t 720*0ff7076bSVincenzo Maffione netbe_send(struct net_backend *be, struct iovec *iov, int iovcnt) 721*0ff7076bSVincenzo Maffione { 722*0ff7076bSVincenzo Maffione 723*0ff7076bSVincenzo Maffione assert(be != NULL); 724*0ff7076bSVincenzo Maffione if (be->be_vnet_hdr_len != be->fe_vnet_hdr_len) { 725*0ff7076bSVincenzo Maffione /* 726*0ff7076bSVincenzo Maffione * The frontend uses a virtio-net header, but the backend 727*0ff7076bSVincenzo Maffione * does not. We ignore it (as it must be all zeroes) and 728*0ff7076bSVincenzo Maffione * strip it. 729*0ff7076bSVincenzo Maffione */ 730*0ff7076bSVincenzo Maffione assert(be->be_vnet_hdr_len == 0); 731*0ff7076bSVincenzo Maffione iov = iov_trim(iov, &iovcnt, be->fe_vnet_hdr_len); 732*0ff7076bSVincenzo Maffione } 733*0ff7076bSVincenzo Maffione 734*0ff7076bSVincenzo Maffione return (be->send(be, iov, iovcnt)); 735*0ff7076bSVincenzo Maffione } 736*0ff7076bSVincenzo Maffione 737*0ff7076bSVincenzo Maffione /* 738*0ff7076bSVincenzo Maffione * Try to read a packet from the backend, without blocking. 739*0ff7076bSVincenzo Maffione * If no packets are available, return 0. In case of success, return 740*0ff7076bSVincenzo Maffione * the length of the packet just read. Return -1 in case of errors. 741*0ff7076bSVincenzo Maffione */ 742*0ff7076bSVincenzo Maffione ssize_t 743*0ff7076bSVincenzo Maffione netbe_recv(struct net_backend *be, struct iovec *iov, int iovcnt) 744*0ff7076bSVincenzo Maffione { 745*0ff7076bSVincenzo Maffione /* Length of prepended virtio-net header. */ 746*0ff7076bSVincenzo Maffione unsigned int hlen = be->fe_vnet_hdr_len; 747*0ff7076bSVincenzo Maffione int ret; 748*0ff7076bSVincenzo Maffione 749*0ff7076bSVincenzo Maffione assert(be != NULL); 750*0ff7076bSVincenzo Maffione 751*0ff7076bSVincenzo Maffione if (hlen && hlen != be->be_vnet_hdr_len) { 752*0ff7076bSVincenzo Maffione /* 753*0ff7076bSVincenzo Maffione * The frontend uses a virtio-net header, but the backend 754*0ff7076bSVincenzo Maffione * does not. We need to prepend a zeroed header. 755*0ff7076bSVincenzo Maffione */ 756*0ff7076bSVincenzo Maffione struct virtio_net_rxhdr *vh; 757*0ff7076bSVincenzo Maffione 758*0ff7076bSVincenzo Maffione assert(be->be_vnet_hdr_len == 0); 759*0ff7076bSVincenzo Maffione 760*0ff7076bSVincenzo Maffione /* 761*0ff7076bSVincenzo Maffione * Get a pointer to the rx header, and use the 762*0ff7076bSVincenzo Maffione * data immediately following it for the packet buffer. 763*0ff7076bSVincenzo Maffione */ 764*0ff7076bSVincenzo Maffione vh = iov[0].iov_base; 765*0ff7076bSVincenzo Maffione iov = iov_trim(iov, &iovcnt, hlen); 766*0ff7076bSVincenzo Maffione 767*0ff7076bSVincenzo Maffione /* 768*0ff7076bSVincenzo Maffione * The only valid field in the rx packet header is the 769*0ff7076bSVincenzo Maffione * number of buffers if merged rx bufs were negotiated. 770*0ff7076bSVincenzo Maffione */ 771*0ff7076bSVincenzo Maffione memset(vh, 0, hlen); 772*0ff7076bSVincenzo Maffione if (hlen == VNET_HDR_LEN) { 773*0ff7076bSVincenzo Maffione vh->vrh_bufs = 1; 774*0ff7076bSVincenzo Maffione } 775*0ff7076bSVincenzo Maffione } 776*0ff7076bSVincenzo Maffione 777*0ff7076bSVincenzo Maffione ret = be->recv(be, iov, iovcnt); 778*0ff7076bSVincenzo Maffione if (ret > 0) { 779*0ff7076bSVincenzo Maffione ret += hlen; 780*0ff7076bSVincenzo Maffione } 781*0ff7076bSVincenzo Maffione 782*0ff7076bSVincenzo Maffione return (ret); 783*0ff7076bSVincenzo Maffione } 784*0ff7076bSVincenzo Maffione 785*0ff7076bSVincenzo Maffione /* 786*0ff7076bSVincenzo Maffione * Read a packet from the backend and discard it. 787*0ff7076bSVincenzo Maffione * Returns the size of the discarded packet or zero if no packet was available. 788*0ff7076bSVincenzo Maffione * A negative error code is returned in case of read error. 789*0ff7076bSVincenzo Maffione */ 790*0ff7076bSVincenzo Maffione ssize_t 791*0ff7076bSVincenzo Maffione netbe_rx_discard(struct net_backend *be) 792*0ff7076bSVincenzo Maffione { 793*0ff7076bSVincenzo Maffione /* 794*0ff7076bSVincenzo Maffione * MP note: the dummybuf is only used to discard frames, 795*0ff7076bSVincenzo Maffione * so there is no need for it to be per-vtnet or locked. 796*0ff7076bSVincenzo Maffione * We only make it large enough for TSO-sized segment. 797*0ff7076bSVincenzo Maffione */ 798*0ff7076bSVincenzo Maffione static uint8_t dummybuf[65536 + 64]; 799*0ff7076bSVincenzo Maffione struct iovec iov; 800*0ff7076bSVincenzo Maffione 801*0ff7076bSVincenzo Maffione iov.iov_base = dummybuf; 802*0ff7076bSVincenzo Maffione iov.iov_len = sizeof(dummybuf); 803*0ff7076bSVincenzo Maffione 804*0ff7076bSVincenzo Maffione return netbe_recv(be, &iov, 1); 805*0ff7076bSVincenzo Maffione } 806*0ff7076bSVincenzo Maffione 807