1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2019 Vincenzo Maffione <vmaffione@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS 19 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, 20 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 21 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 24 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 25 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 * 27 * $FreeBSD$ 28 */ 29 30 /* 31 * This file implements multiple network backends (tap, netmap, ...), 32 * to be used by network frontends such as virtio-net and e1000. 33 * The API to access the backend (e.g. send/receive packets, negotiate 34 * features) is exported by net_backends.h. 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include <sys/types.h> /* u_short etc */ 41 #ifndef WITHOUT_CAPSICUM 42 #include <sys/capsicum.h> 43 #endif 44 #include <sys/ioctl.h> 45 #include <sys/mman.h> 46 #include <sys/uio.h> 47 48 #include <net/if.h> 49 #include <net/netmap.h> 50 #include <net/netmap_virt.h> 51 #define NETMAP_WITH_LIBS 52 #include <net/netmap_user.h> 53 54 #ifndef WITHOUT_CAPSICUM 55 #include <capsicum_helpers.h> 56 #endif 57 #include <err.h> 58 #include <errno.h> 59 #include <fcntl.h> 60 #include <stdio.h> 61 #include <stdlib.h> 62 #include <stdint.h> 63 #include <string.h> 64 #include <unistd.h> 65 #include <sysexits.h> 66 #include <assert.h> 67 #include <pthread.h> 68 #include <pthread_np.h> 69 #include <poll.h> 70 #include <assert.h> 71 72 73 #include "debug.h" 74 #include "iov.h" 75 #include "mevent.h" 76 #include "net_backends.h" 77 78 #include <sys/linker_set.h> 79 80 /* 81 * Each network backend registers a set of function pointers that are 82 * used to implement the net backends API. 83 * This might need to be exposed if we implement backends in separate files. 84 */ 85 struct net_backend { 86 const char *prefix; /* prefix matching this backend */ 87 88 /* 89 * Routines used to initialize and cleanup the resources needed 90 * by a backend. The cleanup function is used internally, 91 * and should not be called by the frontend. 92 */ 93 int (*init)(struct net_backend *be, const char *devname, 94 net_be_rxeof_t cb, void *param); 95 void (*cleanup)(struct net_backend *be); 96 97 /* 98 * Called to serve a guest transmit request. The scatter-gather 99 * vector provided by the caller has 'iovcnt' elements and contains 100 * the packet to send. 101 */ 102 ssize_t (*send)(struct net_backend *be, struct iovec *iov, int iovcnt); 103 104 /* 105 * Called to receive a packet from the backend. When the function 106 * returns a positive value 'len', the scatter-gather vector 107 * provided by the caller contains a packet with such length. 108 * The function returns 0 if the backend doesn't have a new packet to 109 * receive. 110 */ 111 ssize_t (*recv)(struct net_backend *be, struct iovec *iov, int iovcnt); 112 113 /* 114 * Ask the backend to enable or disable receive operation in the 115 * backend. On return from a disable operation, it is guaranteed 116 * that the receive callback won't be called until receive is 117 * enabled again. Note however that it is up to the caller to make 118 * sure that netbe_recv() is not currently being executed by another 119 * thread. 120 */ 121 void (*recv_enable)(struct net_backend *be); 122 void (*recv_disable)(struct net_backend *be); 123 124 /* 125 * Ask the backend for the virtio-net features it is able to 126 * support. Possible features are TSO, UFO and checksum offloading 127 * in both rx and tx direction and for both IPv4 and IPv6. 128 */ 129 uint64_t (*get_cap)(struct net_backend *be); 130 131 /* 132 * Tell the backend to enable/disable the specified virtio-net 133 * features (capabilities). 134 */ 135 int (*set_cap)(struct net_backend *be, uint64_t features, 136 unsigned int vnet_hdr_len); 137 138 struct pci_vtnet_softc *sc; 139 int fd; 140 141 /* 142 * Length of the virtio-net header used by the backend and the 143 * frontend, respectively. A zero value means that the header 144 * is not used. 145 */ 146 unsigned int be_vnet_hdr_len; 147 unsigned int fe_vnet_hdr_len; 148 149 /* Size of backend-specific private data. */ 150 size_t priv_size; 151 152 /* Room for backend-specific data. */ 153 char opaque[0]; 154 }; 155 156 SET_DECLARE(net_backend_set, struct net_backend); 157 158 #define VNET_HDR_LEN sizeof(struct virtio_net_rxhdr) 159 160 #define WPRINTF(params) PRINTLN params 161 162 /* 163 * The tap backend 164 */ 165 166 struct tap_priv { 167 struct mevent *mevp; 168 }; 169 170 static void 171 tap_cleanup(struct net_backend *be) 172 { 173 struct tap_priv *priv = (struct tap_priv *)be->opaque; 174 175 if (priv->mevp) { 176 mevent_delete(priv->mevp); 177 } 178 if (be->fd != -1) { 179 close(be->fd); 180 be->fd = -1; 181 } 182 } 183 184 static int 185 tap_init(struct net_backend *be, const char *devname, 186 net_be_rxeof_t cb, void *param) 187 { 188 struct tap_priv *priv = (struct tap_priv *)be->opaque; 189 char tbuf[80]; 190 int opt = 1; 191 #ifndef WITHOUT_CAPSICUM 192 cap_rights_t rights; 193 #endif 194 195 if (cb == NULL) { 196 WPRINTF(("TAP backend requires non-NULL callback")); 197 return (-1); 198 } 199 200 strcpy(tbuf, "/dev/"); 201 strlcat(tbuf, devname, sizeof(tbuf)); 202 203 be->fd = open(tbuf, O_RDWR); 204 if (be->fd == -1) { 205 WPRINTF(("open of tap device %s failed", tbuf)); 206 goto error; 207 } 208 209 /* 210 * Set non-blocking and register for read 211 * notifications with the event loop 212 */ 213 if (ioctl(be->fd, FIONBIO, &opt) < 0) { 214 WPRINTF(("tap device O_NONBLOCK failed")); 215 goto error; 216 } 217 218 #ifndef WITHOUT_CAPSICUM 219 cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); 220 if (caph_rights_limit(be->fd, &rights) == -1) 221 errx(EX_OSERR, "Unable to apply rights for sandbox"); 222 #endif 223 224 priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param); 225 if (priv->mevp == NULL) { 226 WPRINTF(("Could not register event")); 227 goto error; 228 } 229 230 return (0); 231 232 error: 233 tap_cleanup(be); 234 return (-1); 235 } 236 237 /* 238 * Called to send a buffer chain out to the tap device 239 */ 240 static ssize_t 241 tap_send(struct net_backend *be, struct iovec *iov, int iovcnt) 242 { 243 return (writev(be->fd, iov, iovcnt)); 244 } 245 246 static ssize_t 247 tap_recv(struct net_backend *be, struct iovec *iov, int iovcnt) 248 { 249 ssize_t ret; 250 251 /* Should never be called without a valid tap fd */ 252 assert(be->fd != -1); 253 254 ret = readv(be->fd, iov, iovcnt); 255 256 if (ret < 0 && errno == EWOULDBLOCK) { 257 return (0); 258 } 259 260 return (ret); 261 } 262 263 static void 264 tap_recv_enable(struct net_backend *be) 265 { 266 struct tap_priv *priv = (struct tap_priv *)be->opaque; 267 268 mevent_enable(priv->mevp); 269 } 270 271 static void 272 tap_recv_disable(struct net_backend *be) 273 { 274 struct tap_priv *priv = (struct tap_priv *)be->opaque; 275 276 mevent_disable(priv->mevp); 277 } 278 279 static uint64_t 280 tap_get_cap(struct net_backend *be) 281 { 282 283 return (0); /* no capabilities for now */ 284 } 285 286 static int 287 tap_set_cap(struct net_backend *be, uint64_t features, 288 unsigned vnet_hdr_len) 289 { 290 291 return ((features || vnet_hdr_len) ? -1 : 0); 292 } 293 294 static struct net_backend tap_backend = { 295 .prefix = "tap", 296 .priv_size = sizeof(struct tap_priv), 297 .init = tap_init, 298 .cleanup = tap_cleanup, 299 .send = tap_send, 300 .recv = tap_recv, 301 .recv_enable = tap_recv_enable, 302 .recv_disable = tap_recv_disable, 303 .get_cap = tap_get_cap, 304 .set_cap = tap_set_cap, 305 }; 306 307 /* A clone of the tap backend, with a different prefix. */ 308 static struct net_backend vmnet_backend = { 309 .prefix = "vmnet", 310 .priv_size = sizeof(struct tap_priv), 311 .init = tap_init, 312 .cleanup = tap_cleanup, 313 .send = tap_send, 314 .recv = tap_recv, 315 .recv_enable = tap_recv_enable, 316 .recv_disable = tap_recv_disable, 317 .get_cap = tap_get_cap, 318 .set_cap = tap_set_cap, 319 }; 320 321 DATA_SET(net_backend_set, tap_backend); 322 DATA_SET(net_backend_set, vmnet_backend); 323 324 /* 325 * The netmap backend 326 */ 327 328 /* The virtio-net features supported by netmap. */ 329 #define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \ 330 VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \ 331 VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \ 332 VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO | \ 333 VIRTIO_NET_F_MRG_RXBUF) 334 335 struct netmap_priv { 336 char ifname[IFNAMSIZ]; 337 struct nm_desc *nmd; 338 uint16_t memid; 339 struct netmap_ring *rx; 340 struct netmap_ring *tx; 341 struct mevent *mevp; 342 net_be_rxeof_t cb; 343 void *cb_param; 344 }; 345 346 static void 347 nmreq_init(struct nmreq *req, char *ifname) 348 { 349 350 memset(req, 0, sizeof(*req)); 351 strlcpy(req->nr_name, ifname, sizeof(req->nr_name)); 352 req->nr_version = NETMAP_API; 353 } 354 355 static int 356 netmap_set_vnet_hdr_len(struct net_backend *be, int vnet_hdr_len) 357 { 358 int err; 359 struct nmreq req; 360 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 361 362 nmreq_init(&req, priv->ifname); 363 req.nr_cmd = NETMAP_BDG_VNET_HDR; 364 req.nr_arg1 = vnet_hdr_len; 365 err = ioctl(be->fd, NIOCREGIF, &req); 366 if (err) { 367 WPRINTF(("Unable to set vnet header length %d", 368 vnet_hdr_len)); 369 return (err); 370 } 371 372 be->be_vnet_hdr_len = vnet_hdr_len; 373 374 return (0); 375 } 376 377 static int 378 netmap_has_vnet_hdr_len(struct net_backend *be, unsigned vnet_hdr_len) 379 { 380 int prev_hdr_len = be->be_vnet_hdr_len; 381 int ret; 382 383 if (vnet_hdr_len == prev_hdr_len) { 384 return (1); 385 } 386 387 ret = netmap_set_vnet_hdr_len(be, vnet_hdr_len); 388 if (ret) { 389 return (0); 390 } 391 392 netmap_set_vnet_hdr_len(be, prev_hdr_len); 393 394 return (1); 395 } 396 397 static uint64_t 398 netmap_get_cap(struct net_backend *be) 399 { 400 401 return (netmap_has_vnet_hdr_len(be, VNET_HDR_LEN) ? 402 NETMAP_FEATURES : 0); 403 } 404 405 static int 406 netmap_set_cap(struct net_backend *be, uint64_t features, 407 unsigned vnet_hdr_len) 408 { 409 410 return (netmap_set_vnet_hdr_len(be, vnet_hdr_len)); 411 } 412 413 static int 414 netmap_init(struct net_backend *be, const char *devname, 415 net_be_rxeof_t cb, void *param) 416 { 417 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 418 419 strlcpy(priv->ifname, devname, sizeof(priv->ifname)); 420 priv->ifname[sizeof(priv->ifname) - 1] = '\0'; 421 422 priv->nmd = nm_open(priv->ifname, NULL, NETMAP_NO_TX_POLL, NULL); 423 if (priv->nmd == NULL) { 424 WPRINTF(("Unable to nm_open(): interface '%s', errno (%s)", 425 devname, strerror(errno))); 426 free(priv); 427 return (-1); 428 } 429 430 priv->memid = priv->nmd->req.nr_arg2; 431 priv->tx = NETMAP_TXRING(priv->nmd->nifp, 0); 432 priv->rx = NETMAP_RXRING(priv->nmd->nifp, 0); 433 priv->cb = cb; 434 priv->cb_param = param; 435 be->fd = priv->nmd->fd; 436 437 priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param); 438 if (priv->mevp == NULL) { 439 WPRINTF(("Could not register event")); 440 return (-1); 441 } 442 443 return (0); 444 } 445 446 static void 447 netmap_cleanup(struct net_backend *be) 448 { 449 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 450 451 if (priv->mevp) { 452 mevent_delete(priv->mevp); 453 } 454 if (priv->nmd) { 455 nm_close(priv->nmd); 456 } 457 be->fd = -1; 458 } 459 460 static ssize_t 461 netmap_send(struct net_backend *be, struct iovec *iov, 462 int iovcnt) 463 { 464 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 465 struct netmap_ring *ring; 466 ssize_t totlen = 0; 467 int nm_buf_size; 468 int nm_buf_len; 469 uint32_t head; 470 void *nm_buf; 471 int j; 472 473 ring = priv->tx; 474 head = ring->head; 475 if (head == ring->tail) { 476 WPRINTF(("No space, drop %zu bytes", count_iov(iov, iovcnt))); 477 goto txsync; 478 } 479 nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx); 480 nm_buf_size = ring->nr_buf_size; 481 nm_buf_len = 0; 482 483 for (j = 0; j < iovcnt; j++) { 484 int iov_frag_size = iov[j].iov_len; 485 void *iov_frag_buf = iov[j].iov_base; 486 487 totlen += iov_frag_size; 488 489 /* 490 * Split each iovec fragment over more netmap slots, if 491 * necessary. 492 */ 493 for (;;) { 494 int copylen; 495 496 copylen = iov_frag_size < nm_buf_size ? iov_frag_size : nm_buf_size; 497 memcpy(nm_buf, iov_frag_buf, copylen); 498 499 iov_frag_buf += copylen; 500 iov_frag_size -= copylen; 501 nm_buf += copylen; 502 nm_buf_size -= copylen; 503 nm_buf_len += copylen; 504 505 if (iov_frag_size == 0) { 506 break; 507 } 508 509 ring->slot[head].len = nm_buf_len; 510 ring->slot[head].flags = NS_MOREFRAG; 511 head = nm_ring_next(ring, head); 512 if (head == ring->tail) { 513 /* 514 * We ran out of netmap slots while 515 * splitting the iovec fragments. 516 */ 517 WPRINTF(("No space, drop %zu bytes", 518 count_iov(iov, iovcnt))); 519 goto txsync; 520 } 521 nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx); 522 nm_buf_size = ring->nr_buf_size; 523 nm_buf_len = 0; 524 } 525 } 526 527 /* Complete the last slot, which must not have NS_MOREFRAG set. */ 528 ring->slot[head].len = nm_buf_len; 529 ring->slot[head].flags = 0; 530 head = nm_ring_next(ring, head); 531 532 /* Now update ring->head and ring->cur. */ 533 ring->head = ring->cur = head; 534 txsync: 535 ioctl(be->fd, NIOCTXSYNC, NULL); 536 537 return (totlen); 538 } 539 540 static ssize_t 541 netmap_recv(struct net_backend *be, struct iovec *iov, int iovcnt) 542 { 543 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 544 struct netmap_slot *slot = NULL; 545 struct netmap_ring *ring; 546 void *iov_frag_buf; 547 int iov_frag_size; 548 ssize_t totlen = 0; 549 uint32_t head; 550 551 assert(iovcnt); 552 553 ring = priv->rx; 554 head = ring->head; 555 iov_frag_buf = iov->iov_base; 556 iov_frag_size = iov->iov_len; 557 558 do { 559 int nm_buf_len; 560 void *nm_buf; 561 562 if (head == ring->tail) { 563 return (0); 564 } 565 566 slot = ring->slot + head; 567 nm_buf = NETMAP_BUF(ring, slot->buf_idx); 568 nm_buf_len = slot->len; 569 570 for (;;) { 571 int copylen = nm_buf_len < iov_frag_size ? 572 nm_buf_len : iov_frag_size; 573 574 memcpy(iov_frag_buf, nm_buf, copylen); 575 nm_buf += copylen; 576 nm_buf_len -= copylen; 577 iov_frag_buf += copylen; 578 iov_frag_size -= copylen; 579 totlen += copylen; 580 581 if (nm_buf_len == 0) { 582 break; 583 } 584 585 iov++; 586 iovcnt--; 587 if (iovcnt == 0) { 588 /* No space to receive. */ 589 WPRINTF(("Short iov, drop %zd bytes", 590 totlen)); 591 return (-ENOSPC); 592 } 593 iov_frag_buf = iov->iov_base; 594 iov_frag_size = iov->iov_len; 595 } 596 597 head = nm_ring_next(ring, head); 598 599 } while (slot->flags & NS_MOREFRAG); 600 601 /* Release slots to netmap. */ 602 ring->head = ring->cur = head; 603 604 return (totlen); 605 } 606 607 static void 608 netmap_recv_enable(struct net_backend *be) 609 { 610 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 611 612 mevent_enable(priv->mevp); 613 } 614 615 static void 616 netmap_recv_disable(struct net_backend *be) 617 { 618 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 619 620 mevent_disable(priv->mevp); 621 } 622 623 static struct net_backend netmap_backend = { 624 .prefix = "netmap", 625 .priv_size = sizeof(struct netmap_priv), 626 .init = netmap_init, 627 .cleanup = netmap_cleanup, 628 .send = netmap_send, 629 .recv = netmap_recv, 630 .recv_enable = netmap_recv_enable, 631 .recv_disable = netmap_recv_disable, 632 .get_cap = netmap_get_cap, 633 .set_cap = netmap_set_cap, 634 }; 635 636 /* A clone of the netmap backend, with a different prefix. */ 637 static struct net_backend vale_backend = { 638 .prefix = "vale", 639 .priv_size = sizeof(struct netmap_priv), 640 .init = netmap_init, 641 .cleanup = netmap_cleanup, 642 .send = netmap_send, 643 .recv = netmap_recv, 644 .recv_enable = netmap_recv_enable, 645 .recv_disable = netmap_recv_disable, 646 .get_cap = netmap_get_cap, 647 .set_cap = netmap_set_cap, 648 }; 649 650 DATA_SET(net_backend_set, netmap_backend); 651 DATA_SET(net_backend_set, vale_backend); 652 653 /* 654 * Initialize a backend and attach to the frontend. 655 * This is called during frontend initialization. 656 * @pbe is a pointer to the backend to be initialized 657 * @devname is the backend-name as supplied on the command line, 658 * e.g. -s 2:0,frontend-name,backend-name[,other-args] 659 * @cb is the receive callback supplied by the frontend, 660 * and it is invoked in the event loop when a receive 661 * event is generated in the hypervisor, 662 * @param is a pointer to the frontend, and normally used as 663 * the argument for the callback. 664 */ 665 int 666 netbe_init(struct net_backend **ret, const char *devname, net_be_rxeof_t cb, 667 void *param) 668 { 669 struct net_backend **pbe, *nbe, *tbe = NULL; 670 int err; 671 672 /* 673 * Find the network backend that matches the user-provided 674 * device name. net_backend_set is built using a linker set. 675 */ 676 SET_FOREACH(pbe, net_backend_set) { 677 if (strncmp(devname, (*pbe)->prefix, 678 strlen((*pbe)->prefix)) == 0) { 679 tbe = *pbe; 680 assert(tbe->init != NULL); 681 assert(tbe->cleanup != NULL); 682 assert(tbe->send != NULL); 683 assert(tbe->recv != NULL); 684 assert(tbe->get_cap != NULL); 685 assert(tbe->set_cap != NULL); 686 break; 687 } 688 } 689 690 *ret = NULL; 691 if (tbe == NULL) 692 return (EINVAL); 693 nbe = calloc(1, sizeof(*nbe) + tbe->priv_size); 694 *nbe = *tbe; /* copy the template */ 695 nbe->fd = -1; 696 nbe->sc = param; 697 nbe->be_vnet_hdr_len = 0; 698 nbe->fe_vnet_hdr_len = 0; 699 700 /* Initialize the backend. */ 701 err = nbe->init(nbe, devname, cb, param); 702 if (err) { 703 free(nbe); 704 return (err); 705 } 706 707 *ret = nbe; 708 709 return (0); 710 } 711 712 void 713 netbe_cleanup(struct net_backend *be) 714 { 715 716 if (be != NULL) { 717 be->cleanup(be); 718 free(be); 719 } 720 } 721 722 uint64_t 723 netbe_get_cap(struct net_backend *be) 724 { 725 726 assert(be != NULL); 727 return (be->get_cap(be)); 728 } 729 730 int 731 netbe_set_cap(struct net_backend *be, uint64_t features, 732 unsigned vnet_hdr_len) 733 { 734 int ret; 735 736 assert(be != NULL); 737 738 /* There are only three valid lengths, i.e., 0, 10 and 12. */ 739 if (vnet_hdr_len && vnet_hdr_len != VNET_HDR_LEN 740 && vnet_hdr_len != (VNET_HDR_LEN - sizeof(uint16_t))) 741 return (-1); 742 743 be->fe_vnet_hdr_len = vnet_hdr_len; 744 745 ret = be->set_cap(be, features, vnet_hdr_len); 746 assert(be->be_vnet_hdr_len == 0 || 747 be->be_vnet_hdr_len == be->fe_vnet_hdr_len); 748 749 return (ret); 750 } 751 752 static __inline struct iovec * 753 iov_trim(struct iovec *iov, int *iovcnt, unsigned int tlen) 754 { 755 struct iovec *riov; 756 757 /* XXX short-cut: assume first segment is >= tlen */ 758 assert(iov[0].iov_len >= tlen); 759 760 iov[0].iov_len -= tlen; 761 if (iov[0].iov_len == 0) { 762 assert(*iovcnt > 1); 763 *iovcnt -= 1; 764 riov = &iov[1]; 765 } else { 766 iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + tlen); 767 riov = &iov[0]; 768 } 769 770 return (riov); 771 } 772 773 ssize_t 774 netbe_send(struct net_backend *be, struct iovec *iov, int iovcnt) 775 { 776 777 assert(be != NULL); 778 if (be->be_vnet_hdr_len != be->fe_vnet_hdr_len) { 779 /* 780 * The frontend uses a virtio-net header, but the backend 781 * does not. We ignore it (as it must be all zeroes) and 782 * strip it. 783 */ 784 assert(be->be_vnet_hdr_len == 0); 785 iov = iov_trim(iov, &iovcnt, be->fe_vnet_hdr_len); 786 } 787 788 return (be->send(be, iov, iovcnt)); 789 } 790 791 /* 792 * Try to read a packet from the backend, without blocking. 793 * If no packets are available, return 0. In case of success, return 794 * the length of the packet just read. Return -1 in case of errors. 795 */ 796 ssize_t 797 netbe_recv(struct net_backend *be, struct iovec *iov, int iovcnt) 798 { 799 /* Length of prepended virtio-net header. */ 800 unsigned int hlen = be->fe_vnet_hdr_len; 801 int ret; 802 803 assert(be != NULL); 804 805 if (hlen && hlen != be->be_vnet_hdr_len) { 806 /* 807 * The frontend uses a virtio-net header, but the backend 808 * does not. We need to prepend a zeroed header. 809 */ 810 struct virtio_net_rxhdr *vh; 811 812 assert(be->be_vnet_hdr_len == 0); 813 814 /* 815 * Get a pointer to the rx header, and use the 816 * data immediately following it for the packet buffer. 817 */ 818 vh = iov[0].iov_base; 819 iov = iov_trim(iov, &iovcnt, hlen); 820 821 /* 822 * The only valid field in the rx packet header is the 823 * number of buffers if merged rx bufs were negotiated. 824 */ 825 memset(vh, 0, hlen); 826 if (hlen == VNET_HDR_LEN) { 827 vh->vrh_bufs = 1; 828 } 829 } 830 831 ret = be->recv(be, iov, iovcnt); 832 if (ret > 0) { 833 ret += hlen; 834 } 835 836 return (ret); 837 } 838 839 /* 840 * Read a packet from the backend and discard it. 841 * Returns the size of the discarded packet or zero if no packet was available. 842 * A negative error code is returned in case of read error. 843 */ 844 ssize_t 845 netbe_rx_discard(struct net_backend *be) 846 { 847 /* 848 * MP note: the dummybuf is only used to discard frames, 849 * so there is no need for it to be per-vtnet or locked. 850 * We only make it large enough for TSO-sized segment. 851 */ 852 static uint8_t dummybuf[65536 + 64]; 853 struct iovec iov; 854 855 iov.iov_base = dummybuf; 856 iov.iov_len = sizeof(dummybuf); 857 858 return netbe_recv(be, &iov, 1); 859 } 860 861 void 862 netbe_rx_disable(struct net_backend *be) 863 { 864 865 return be->recv_disable(be); 866 } 867 868 void 869 netbe_rx_enable(struct net_backend *be) 870 { 871 872 return be->recv_enable(be); 873 } 874