1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2019 Vincenzo Maffione <vmaffione@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS 19 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, 20 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 21 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 24 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 25 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 * 27 * $FreeBSD$ 28 */ 29 30 /* 31 * This file implements multiple network backends (tap, netmap, ...), 32 * to be used by network frontends such as virtio-net and e1000. 33 * The API to access the backend (e.g. send/receive packets, negotiate 34 * features) is exported by net_backends.h. 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include <sys/types.h> /* u_short etc */ 41 #ifndef WITHOUT_CAPSICUM 42 #include <sys/capsicum.h> 43 #endif 44 #include <sys/ioctl.h> 45 #include <sys/mman.h> 46 #include <sys/uio.h> 47 48 #include <net/if.h> 49 #include <net/netmap.h> 50 #include <net/netmap_virt.h> 51 #define NETMAP_WITH_LIBS 52 #include <net/netmap_user.h> 53 54 #ifndef WITHOUT_CAPSICUM 55 #include <capsicum_helpers.h> 56 #endif 57 #include <err.h> 58 #include <errno.h> 59 #include <fcntl.h> 60 #include <stdio.h> 61 #include <stdlib.h> 62 #include <stdint.h> 63 #include <string.h> 64 #include <unistd.h> 65 #include <sysexits.h> 66 #include <assert.h> 67 #include <pthread.h> 68 #include <pthread_np.h> 69 #include <poll.h> 70 #include <assert.h> 71 72 73 #include "iov.h" 74 #include "mevent.h" 75 #include "net_backends.h" 76 77 #include <sys/linker_set.h> 78 79 /* 80 * Each network backend registers a set of function pointers that are 81 * used to implement the net backends API. 82 * This might need to be exposed if we implement backends in separate files. 83 */ 84 struct net_backend { 85 const char *prefix; /* prefix matching this backend */ 86 87 /* 88 * Routines used to initialize and cleanup the resources needed 89 * by a backend. The cleanup function is used internally, 90 * and should not be called by the frontend. 91 */ 92 int (*init)(struct net_backend *be, const char *devname, 93 net_be_rxeof_t cb, void *param); 94 void (*cleanup)(struct net_backend *be); 95 96 /* 97 * Called to serve a guest transmit request. The scatter-gather 98 * vector provided by the caller has 'iovcnt' elements and contains 99 * the packet to send. 100 */ 101 ssize_t (*send)(struct net_backend *be, struct iovec *iov, int iovcnt); 102 103 /* 104 * Called to receive a packet from the backend. When the function 105 * returns a positive value 'len', the scatter-gather vector 106 * provided by the caller contains a packet with such length. 107 * The function returns 0 if the backend doesn't have a new packet to 108 * receive. 109 */ 110 ssize_t (*recv)(struct net_backend *be, struct iovec *iov, int iovcnt); 111 112 /* 113 * Ask the backend to enable or disable receive operation in the 114 * backend. On return from a disable operation, it is guaranteed 115 * that the receive callback won't be called until receive is 116 * enabled again. Note however that it is up to the caller to make 117 * sure that netbe_recv() is not currently being executed by another 118 * thread. 119 */ 120 void (*recv_enable)(struct net_backend *be); 121 void (*recv_disable)(struct net_backend *be); 122 123 /* 124 * Ask the backend for the virtio-net features it is able to 125 * support. Possible features are TSO, UFO and checksum offloading 126 * in both rx and tx direction and for both IPv4 and IPv6. 127 */ 128 uint64_t (*get_cap)(struct net_backend *be); 129 130 /* 131 * Tell the backend to enable/disable the specified virtio-net 132 * features (capabilities). 133 */ 134 int (*set_cap)(struct net_backend *be, uint64_t features, 135 unsigned int vnet_hdr_len); 136 137 struct pci_vtnet_softc *sc; 138 int fd; 139 140 /* 141 * Length of the virtio-net header used by the backend and the 142 * frontend, respectively. A zero value means that the header 143 * is not used. 144 */ 145 unsigned int be_vnet_hdr_len; 146 unsigned int fe_vnet_hdr_len; 147 148 /* Size of backend-specific private data. */ 149 size_t priv_size; 150 151 /* Room for backend-specific data. */ 152 char opaque[0]; 153 }; 154 155 SET_DECLARE(net_backend_set, struct net_backend); 156 157 #define VNET_HDR_LEN sizeof(struct virtio_net_rxhdr) 158 159 #define WPRINTF(params) printf params 160 161 /* 162 * The tap backend 163 */ 164 165 struct tap_priv { 166 struct mevent *mevp; 167 }; 168 169 static void 170 tap_cleanup(struct net_backend *be) 171 { 172 struct tap_priv *priv = (struct tap_priv *)be->opaque; 173 174 if (priv->mevp) { 175 mevent_delete(priv->mevp); 176 } 177 if (be->fd != -1) { 178 close(be->fd); 179 be->fd = -1; 180 } 181 } 182 183 static int 184 tap_init(struct net_backend *be, const char *devname, 185 net_be_rxeof_t cb, void *param) 186 { 187 struct tap_priv *priv = (struct tap_priv *)be->opaque; 188 char tbuf[80]; 189 int opt = 1; 190 #ifndef WITHOUT_CAPSICUM 191 cap_rights_t rights; 192 #endif 193 194 if (cb == NULL) { 195 WPRINTF(("TAP backend requires non-NULL callback\n")); 196 return (-1); 197 } 198 199 strcpy(tbuf, "/dev/"); 200 strlcat(tbuf, devname, sizeof(tbuf)); 201 202 be->fd = open(tbuf, O_RDWR); 203 if (be->fd == -1) { 204 WPRINTF(("open of tap device %s failed\n", tbuf)); 205 goto error; 206 } 207 208 /* 209 * Set non-blocking and register for read 210 * notifications with the event loop 211 */ 212 if (ioctl(be->fd, FIONBIO, &opt) < 0) { 213 WPRINTF(("tap device O_NONBLOCK failed\n")); 214 goto error; 215 } 216 217 #ifndef WITHOUT_CAPSICUM 218 cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); 219 if (caph_rights_limit(be->fd, &rights) == -1) 220 errx(EX_OSERR, "Unable to apply rights for sandbox"); 221 #endif 222 223 priv->mevp = mevent_add(be->fd, EVF_READ, cb, param); 224 if (priv->mevp == NULL) { 225 WPRINTF(("Could not register event\n")); 226 goto error; 227 } 228 229 return (0); 230 231 error: 232 tap_cleanup(be); 233 return (-1); 234 } 235 236 /* 237 * Called to send a buffer chain out to the tap device 238 */ 239 static ssize_t 240 tap_send(struct net_backend *be, struct iovec *iov, int iovcnt) 241 { 242 return (writev(be->fd, iov, iovcnt)); 243 } 244 245 static ssize_t 246 tap_recv(struct net_backend *be, struct iovec *iov, int iovcnt) 247 { 248 ssize_t ret; 249 250 /* Should never be called without a valid tap fd */ 251 assert(be->fd != -1); 252 253 ret = readv(be->fd, iov, iovcnt); 254 255 if (ret < 0 && errno == EWOULDBLOCK) { 256 return (0); 257 } 258 259 return (ret); 260 } 261 262 static void 263 tap_recv_enable(struct net_backend *be) 264 { 265 struct tap_priv *priv = (struct tap_priv *)be->opaque; 266 267 mevent_enable(priv->mevp); 268 } 269 270 static void 271 tap_recv_disable(struct net_backend *be) 272 { 273 struct tap_priv *priv = (struct tap_priv *)be->opaque; 274 275 mevent_disable(priv->mevp); 276 } 277 278 static uint64_t 279 tap_get_cap(struct net_backend *be) 280 { 281 282 return (0); /* no capabilities for now */ 283 } 284 285 static int 286 tap_set_cap(struct net_backend *be, uint64_t features, 287 unsigned vnet_hdr_len) 288 { 289 290 return ((features || vnet_hdr_len) ? -1 : 0); 291 } 292 293 static struct net_backend tap_backend = { 294 .prefix = "tap", 295 .priv_size = sizeof(struct tap_priv), 296 .init = tap_init, 297 .cleanup = tap_cleanup, 298 .send = tap_send, 299 .recv = tap_recv, 300 .recv_enable = tap_recv_enable, 301 .recv_disable = tap_recv_disable, 302 .get_cap = tap_get_cap, 303 .set_cap = tap_set_cap, 304 }; 305 306 /* A clone of the tap backend, with a different prefix. */ 307 static struct net_backend vmnet_backend = { 308 .prefix = "vmnet", 309 .priv_size = sizeof(struct tap_priv), 310 .init = tap_init, 311 .cleanup = tap_cleanup, 312 .send = tap_send, 313 .recv = tap_recv, 314 .recv_enable = tap_recv_enable, 315 .recv_disable = tap_recv_disable, 316 .get_cap = tap_get_cap, 317 .set_cap = tap_set_cap, 318 }; 319 320 DATA_SET(net_backend_set, tap_backend); 321 DATA_SET(net_backend_set, vmnet_backend); 322 323 /* 324 * The netmap backend 325 */ 326 327 /* The virtio-net features supported by netmap. */ 328 #define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \ 329 VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \ 330 VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \ 331 VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO) 332 333 struct netmap_priv { 334 char ifname[IFNAMSIZ]; 335 struct nm_desc *nmd; 336 uint16_t memid; 337 struct netmap_ring *rx; 338 struct netmap_ring *tx; 339 struct mevent *mevp; 340 net_be_rxeof_t cb; 341 void *cb_param; 342 }; 343 344 static void 345 nmreq_init(struct nmreq *req, char *ifname) 346 { 347 348 memset(req, 0, sizeof(*req)); 349 strlcpy(req->nr_name, ifname, sizeof(req->nr_name)); 350 req->nr_version = NETMAP_API; 351 } 352 353 static int 354 netmap_set_vnet_hdr_len(struct net_backend *be, int vnet_hdr_len) 355 { 356 int err; 357 struct nmreq req; 358 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 359 360 nmreq_init(&req, priv->ifname); 361 req.nr_cmd = NETMAP_BDG_VNET_HDR; 362 req.nr_arg1 = vnet_hdr_len; 363 err = ioctl(be->fd, NIOCREGIF, &req); 364 if (err) { 365 WPRINTF(("Unable to set vnet header length %d\n", 366 vnet_hdr_len)); 367 return (err); 368 } 369 370 be->be_vnet_hdr_len = vnet_hdr_len; 371 372 return (0); 373 } 374 375 static int 376 netmap_has_vnet_hdr_len(struct net_backend *be, unsigned vnet_hdr_len) 377 { 378 int prev_hdr_len = be->be_vnet_hdr_len; 379 int ret; 380 381 if (vnet_hdr_len == prev_hdr_len) { 382 return (1); 383 } 384 385 ret = netmap_set_vnet_hdr_len(be, vnet_hdr_len); 386 if (ret) { 387 return (0); 388 } 389 390 netmap_set_vnet_hdr_len(be, prev_hdr_len); 391 392 return (1); 393 } 394 395 static uint64_t 396 netmap_get_cap(struct net_backend *be) 397 { 398 399 return (netmap_has_vnet_hdr_len(be, VNET_HDR_LEN) ? 400 NETMAP_FEATURES : 0); 401 } 402 403 static int 404 netmap_set_cap(struct net_backend *be, uint64_t features, 405 unsigned vnet_hdr_len) 406 { 407 408 return (netmap_set_vnet_hdr_len(be, vnet_hdr_len)); 409 } 410 411 static int 412 netmap_init(struct net_backend *be, const char *devname, 413 net_be_rxeof_t cb, void *param) 414 { 415 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 416 417 strlcpy(priv->ifname, devname, sizeof(priv->ifname)); 418 priv->ifname[sizeof(priv->ifname) - 1] = '\0'; 419 420 priv->nmd = nm_open(priv->ifname, NULL, NETMAP_NO_TX_POLL, NULL); 421 if (priv->nmd == NULL) { 422 WPRINTF(("Unable to nm_open(): interface '%s', errno (%s)\n", 423 devname, strerror(errno))); 424 free(priv); 425 return (-1); 426 } 427 428 priv->memid = priv->nmd->req.nr_arg2; 429 priv->tx = NETMAP_TXRING(priv->nmd->nifp, 0); 430 priv->rx = NETMAP_RXRING(priv->nmd->nifp, 0); 431 priv->cb = cb; 432 priv->cb_param = param; 433 be->fd = priv->nmd->fd; 434 435 priv->mevp = mevent_add(be->fd, EVF_READ, cb, param); 436 if (priv->mevp == NULL) { 437 WPRINTF(("Could not register event\n")); 438 return (-1); 439 } 440 441 return (0); 442 } 443 444 static void 445 netmap_cleanup(struct net_backend *be) 446 { 447 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 448 449 if (priv->mevp) { 450 mevent_delete(priv->mevp); 451 } 452 if (priv->nmd) { 453 nm_close(priv->nmd); 454 } 455 be->fd = -1; 456 } 457 458 static ssize_t 459 netmap_send(struct net_backend *be, struct iovec *iov, 460 int iovcnt) 461 { 462 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 463 struct netmap_ring *ring; 464 ssize_t totlen = 0; 465 int nm_buf_size; 466 int nm_buf_len; 467 uint32_t head; 468 void *nm_buf; 469 int j; 470 471 ring = priv->tx; 472 head = ring->head; 473 if (head == ring->tail) { 474 WPRINTF(("No space, drop %zu bytes\n", count_iov(iov, iovcnt))); 475 goto txsync; 476 } 477 nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx); 478 nm_buf_size = ring->nr_buf_size; 479 nm_buf_len = 0; 480 481 for (j = 0; j < iovcnt; j++) { 482 int iov_frag_size = iov[j].iov_len; 483 void *iov_frag_buf = iov[j].iov_base; 484 485 totlen += iov_frag_size; 486 487 /* 488 * Split each iovec fragment over more netmap slots, if 489 * necessary. 490 */ 491 for (;;) { 492 int copylen; 493 494 copylen = iov_frag_size < nm_buf_size ? iov_frag_size : nm_buf_size; 495 memcpy(nm_buf, iov_frag_buf, copylen); 496 497 iov_frag_buf += copylen; 498 iov_frag_size -= copylen; 499 nm_buf += copylen; 500 nm_buf_size -= copylen; 501 nm_buf_len += copylen; 502 503 if (iov_frag_size == 0) { 504 break; 505 } 506 507 ring->slot[head].len = nm_buf_len; 508 ring->slot[head].flags = NS_MOREFRAG; 509 head = nm_ring_next(ring, head); 510 if (head == ring->tail) { 511 /* 512 * We ran out of netmap slots while 513 * splitting the iovec fragments. 514 */ 515 WPRINTF(("No space, drop %zu bytes\n", 516 count_iov(iov, iovcnt))); 517 goto txsync; 518 } 519 nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx); 520 nm_buf_size = ring->nr_buf_size; 521 nm_buf_len = 0; 522 } 523 } 524 525 /* Complete the last slot, which must not have NS_MOREFRAG set. */ 526 ring->slot[head].len = nm_buf_len; 527 ring->slot[head].flags = 0; 528 head = nm_ring_next(ring, head); 529 530 /* Now update ring->head and ring->cur. */ 531 ring->head = ring->cur = head; 532 txsync: 533 ioctl(be->fd, NIOCTXSYNC, NULL); 534 535 return (totlen); 536 } 537 538 static ssize_t 539 netmap_recv(struct net_backend *be, struct iovec *iov, int iovcnt) 540 { 541 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 542 struct netmap_slot *slot = NULL; 543 struct netmap_ring *ring; 544 void *iov_frag_buf; 545 int iov_frag_size; 546 ssize_t totlen = 0; 547 uint32_t head; 548 549 assert(iovcnt); 550 551 ring = priv->rx; 552 head = ring->head; 553 iov_frag_buf = iov->iov_base; 554 iov_frag_size = iov->iov_len; 555 556 do { 557 int nm_buf_len; 558 void *nm_buf; 559 560 if (head == ring->tail) { 561 return (0); 562 } 563 564 slot = ring->slot + head; 565 nm_buf = NETMAP_BUF(ring, slot->buf_idx); 566 nm_buf_len = slot->len; 567 568 for (;;) { 569 int copylen = nm_buf_len < iov_frag_size ? 570 nm_buf_len : iov_frag_size; 571 572 memcpy(iov_frag_buf, nm_buf, copylen); 573 nm_buf += copylen; 574 nm_buf_len -= copylen; 575 iov_frag_buf += copylen; 576 iov_frag_size -= copylen; 577 totlen += copylen; 578 579 if (nm_buf_len == 0) { 580 break; 581 } 582 583 iov++; 584 iovcnt--; 585 if (iovcnt == 0) { 586 /* No space to receive. */ 587 WPRINTF(("Short iov, drop %zd bytes\n", 588 totlen)); 589 return (-ENOSPC); 590 } 591 iov_frag_buf = iov->iov_base; 592 iov_frag_size = iov->iov_len; 593 } 594 595 head = nm_ring_next(ring, head); 596 597 } while (slot->flags & NS_MOREFRAG); 598 599 /* Release slots to netmap. */ 600 ring->head = ring->cur = head; 601 602 return (totlen); 603 } 604 605 static void 606 netmap_recv_enable(struct net_backend *be) 607 { 608 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 609 610 mevent_enable(priv->mevp); 611 } 612 613 static void 614 netmap_recv_disable(struct net_backend *be) 615 { 616 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 617 618 mevent_disable(priv->mevp); 619 } 620 621 static struct net_backend netmap_backend = { 622 .prefix = "netmap", 623 .priv_size = sizeof(struct netmap_priv), 624 .init = netmap_init, 625 .cleanup = netmap_cleanup, 626 .send = netmap_send, 627 .recv = netmap_recv, 628 .recv_enable = netmap_recv_enable, 629 .recv_disable = netmap_recv_disable, 630 .get_cap = netmap_get_cap, 631 .set_cap = netmap_set_cap, 632 }; 633 634 /* A clone of the netmap backend, with a different prefix. */ 635 static struct net_backend vale_backend = { 636 .prefix = "vale", 637 .priv_size = sizeof(struct netmap_priv), 638 .init = netmap_init, 639 .cleanup = netmap_cleanup, 640 .send = netmap_send, 641 .recv = netmap_recv, 642 .recv_enable = netmap_recv_enable, 643 .recv_disable = netmap_recv_disable, 644 .get_cap = netmap_get_cap, 645 .set_cap = netmap_set_cap, 646 }; 647 648 DATA_SET(net_backend_set, netmap_backend); 649 DATA_SET(net_backend_set, vale_backend); 650 651 /* 652 * Initialize a backend and attach to the frontend. 653 * This is called during frontend initialization. 654 * @pbe is a pointer to the backend to be initialized 655 * @devname is the backend-name as supplied on the command line, 656 * e.g. -s 2:0,frontend-name,backend-name[,other-args] 657 * @cb is the receive callback supplied by the frontend, 658 * and it is invoked in the event loop when a receive 659 * event is generated in the hypervisor, 660 * @param is a pointer to the frontend, and normally used as 661 * the argument for the callback. 662 */ 663 int 664 netbe_init(struct net_backend **ret, const char *devname, net_be_rxeof_t cb, 665 void *param) 666 { 667 struct net_backend **pbe, *nbe, *tbe = NULL; 668 int err; 669 670 /* 671 * Find the network backend that matches the user-provided 672 * device name. net_backend_set is built using a linker set. 673 */ 674 SET_FOREACH(pbe, net_backend_set) { 675 if (strncmp(devname, (*pbe)->prefix, 676 strlen((*pbe)->prefix)) == 0) { 677 tbe = *pbe; 678 assert(tbe->init != NULL); 679 assert(tbe->cleanup != NULL); 680 assert(tbe->send != NULL); 681 assert(tbe->recv != NULL); 682 assert(tbe->get_cap != NULL); 683 assert(tbe->set_cap != NULL); 684 break; 685 } 686 } 687 688 *ret = NULL; 689 if (tbe == NULL) 690 return (EINVAL); 691 nbe = calloc(1, sizeof(*nbe) + tbe->priv_size); 692 *nbe = *tbe; /* copy the template */ 693 nbe->fd = -1; 694 nbe->sc = param; 695 nbe->be_vnet_hdr_len = 0; 696 nbe->fe_vnet_hdr_len = 0; 697 698 /* Initialize the backend. */ 699 err = nbe->init(nbe, devname, cb, param); 700 if (err) { 701 free(nbe); 702 return (err); 703 } 704 705 *ret = nbe; 706 707 return (0); 708 } 709 710 void 711 netbe_cleanup(struct net_backend *be) 712 { 713 714 if (be != NULL) { 715 be->cleanup(be); 716 free(be); 717 } 718 } 719 720 uint64_t 721 netbe_get_cap(struct net_backend *be) 722 { 723 724 assert(be != NULL); 725 return (be->get_cap(be)); 726 } 727 728 int 729 netbe_set_cap(struct net_backend *be, uint64_t features, 730 unsigned vnet_hdr_len) 731 { 732 int ret; 733 734 assert(be != NULL); 735 736 /* There are only three valid lengths, i.e., 0, 10 and 12. */ 737 if (vnet_hdr_len && vnet_hdr_len != VNET_HDR_LEN 738 && vnet_hdr_len != (VNET_HDR_LEN - sizeof(uint16_t))) 739 return (-1); 740 741 be->fe_vnet_hdr_len = vnet_hdr_len; 742 743 ret = be->set_cap(be, features, vnet_hdr_len); 744 assert(be->be_vnet_hdr_len == 0 || 745 be->be_vnet_hdr_len == be->fe_vnet_hdr_len); 746 747 return (ret); 748 } 749 750 static __inline struct iovec * 751 iov_trim(struct iovec *iov, int *iovcnt, unsigned int tlen) 752 { 753 struct iovec *riov; 754 755 /* XXX short-cut: assume first segment is >= tlen */ 756 assert(iov[0].iov_len >= tlen); 757 758 iov[0].iov_len -= tlen; 759 if (iov[0].iov_len == 0) { 760 assert(*iovcnt > 1); 761 *iovcnt -= 1; 762 riov = &iov[1]; 763 } else { 764 iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + tlen); 765 riov = &iov[0]; 766 } 767 768 return (riov); 769 } 770 771 ssize_t 772 netbe_send(struct net_backend *be, struct iovec *iov, int iovcnt) 773 { 774 775 assert(be != NULL); 776 if (be->be_vnet_hdr_len != be->fe_vnet_hdr_len) { 777 /* 778 * The frontend uses a virtio-net header, but the backend 779 * does not. We ignore it (as it must be all zeroes) and 780 * strip it. 781 */ 782 assert(be->be_vnet_hdr_len == 0); 783 iov = iov_trim(iov, &iovcnt, be->fe_vnet_hdr_len); 784 } 785 786 return (be->send(be, iov, iovcnt)); 787 } 788 789 /* 790 * Try to read a packet from the backend, without blocking. 791 * If no packets are available, return 0. In case of success, return 792 * the length of the packet just read. Return -1 in case of errors. 793 */ 794 ssize_t 795 netbe_recv(struct net_backend *be, struct iovec *iov, int iovcnt) 796 { 797 /* Length of prepended virtio-net header. */ 798 unsigned int hlen = be->fe_vnet_hdr_len; 799 int ret; 800 801 assert(be != NULL); 802 803 if (hlen && hlen != be->be_vnet_hdr_len) { 804 /* 805 * The frontend uses a virtio-net header, but the backend 806 * does not. We need to prepend a zeroed header. 807 */ 808 struct virtio_net_rxhdr *vh; 809 810 assert(be->be_vnet_hdr_len == 0); 811 812 /* 813 * Get a pointer to the rx header, and use the 814 * data immediately following it for the packet buffer. 815 */ 816 vh = iov[0].iov_base; 817 iov = iov_trim(iov, &iovcnt, hlen); 818 819 /* 820 * The only valid field in the rx packet header is the 821 * number of buffers if merged rx bufs were negotiated. 822 */ 823 memset(vh, 0, hlen); 824 if (hlen == VNET_HDR_LEN) { 825 vh->vrh_bufs = 1; 826 } 827 } 828 829 ret = be->recv(be, iov, iovcnt); 830 if (ret > 0) { 831 ret += hlen; 832 } 833 834 return (ret); 835 } 836 837 /* 838 * Read a packet from the backend and discard it. 839 * Returns the size of the discarded packet or zero if no packet was available. 840 * A negative error code is returned in case of read error. 841 */ 842 ssize_t 843 netbe_rx_discard(struct net_backend *be) 844 { 845 /* 846 * MP note: the dummybuf is only used to discard frames, 847 * so there is no need for it to be per-vtnet or locked. 848 * We only make it large enough for TSO-sized segment. 849 */ 850 static uint8_t dummybuf[65536 + 64]; 851 struct iovec iov; 852 853 iov.iov_base = dummybuf; 854 iov.iov_len = sizeof(dummybuf); 855 856 return netbe_recv(be, &iov, 1); 857 } 858 859 void 860 netbe_rx_disable(struct net_backend *be) 861 { 862 863 return be->recv_enable(be); 864 } 865 866 void 867 netbe_rx_enable(struct net_backend *be) 868 { 869 870 return be->recv_disable(be); 871 } 872