1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2019 Vincenzo Maffione <vmaffione@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS 19 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, 20 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 21 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 24 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 25 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 * 27 * $FreeBSD$ 28 */ 29 30 /* 31 * This file implements multiple network backends (tap, netmap, ...), 32 * to be used by network frontends such as virtio-net and e1000. 33 * The API to access the backend (e.g. send/receive packets, negotiate 34 * features) is exported by net_backends.h. 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include <sys/types.h> /* u_short etc */ 41 #ifndef WITHOUT_CAPSICUM 42 #include <sys/capsicum.h> 43 #endif 44 #include <sys/ioctl.h> 45 #include <sys/mman.h> 46 #include <sys/uio.h> 47 48 #include <net/if.h> 49 #include <net/netmap.h> 50 #include <net/netmap_virt.h> 51 #define NETMAP_WITH_LIBS 52 #include <net/netmap_user.h> 53 54 #ifndef WITHOUT_CAPSICUM 55 #include <capsicum_helpers.h> 56 #endif 57 #include <err.h> 58 #include <errno.h> 59 #include <fcntl.h> 60 #include <stdio.h> 61 #include <stdlib.h> 62 #include <stdint.h> 63 #include <string.h> 64 #include <unistd.h> 65 #include <sysexits.h> 66 #include <assert.h> 67 #include <pthread.h> 68 #include <pthread_np.h> 69 #include <poll.h> 70 #include <assert.h> 71 72 73 #include "iov.h" 74 #include "mevent.h" 75 #include "net_backends.h" 76 77 #include <sys/linker_set.h> 78 79 /* 80 * Each network backend registers a set of function pointers that are 81 * used to implement the net backends API. 82 * This might need to be exposed if we implement backends in separate files. 83 */ 84 struct net_backend { 85 const char *prefix; /* prefix matching this backend */ 86 87 /* 88 * Routines used to initialize and cleanup the resources needed 89 * by a backend. The cleanup function is used internally, 90 * and should not be called by the frontend. 91 */ 92 int (*init)(struct net_backend *be, const char *devname, 93 net_be_rxeof_t cb, void *param); 94 void (*cleanup)(struct net_backend *be); 95 96 /* 97 * Called to serve a guest transmit request. The scatter-gather 98 * vector provided by the caller has 'iovcnt' elements and contains 99 * the packet to send. 100 */ 101 ssize_t (*send)(struct net_backend *be, struct iovec *iov, int iovcnt); 102 103 /* 104 * Called to receive a packet from the backend. When the function 105 * returns a positive value 'len', the scatter-gather vector 106 * provided by the caller contains a packet with such length. 107 * The function returns 0 if the backend doesn't have a new packet to 108 * receive. 109 */ 110 ssize_t (*recv)(struct net_backend *be, struct iovec *iov, int iovcnt); 111 112 /* 113 * Ask the backend to enable or disable receive operation in the 114 * backend. On return from a disable operation, it is guaranteed 115 * that the receive callback won't be called until receive is 116 * enabled again. Note however that it is up to the caller to make 117 * sure that netbe_recv() is not currently being executed by another 118 * thread. 119 */ 120 void (*recv_enable)(struct net_backend *be); 121 void (*recv_disable)(struct net_backend *be); 122 123 /* 124 * Ask the backend for the virtio-net features it is able to 125 * support. Possible features are TSO, UFO and checksum offloading 126 * in both rx and tx direction and for both IPv4 and IPv6. 127 */ 128 uint64_t (*get_cap)(struct net_backend *be); 129 130 /* 131 * Tell the backend to enable/disable the specified virtio-net 132 * features (capabilities). 133 */ 134 int (*set_cap)(struct net_backend *be, uint64_t features, 135 unsigned int vnet_hdr_len); 136 137 struct pci_vtnet_softc *sc; 138 int fd; 139 140 /* 141 * Length of the virtio-net header used by the backend and the 142 * frontend, respectively. A zero value means that the header 143 * is not used. 144 */ 145 unsigned int be_vnet_hdr_len; 146 unsigned int fe_vnet_hdr_len; 147 148 /* Size of backend-specific private data. */ 149 size_t priv_size; 150 151 /* Room for backend-specific data. */ 152 char opaque[0]; 153 }; 154 155 SET_DECLARE(net_backend_set, struct net_backend); 156 157 #define VNET_HDR_LEN sizeof(struct virtio_net_rxhdr) 158 159 #define WPRINTF(params) printf params 160 161 /* 162 * The tap backend 163 */ 164 165 struct tap_priv { 166 struct mevent *mevp; 167 }; 168 169 static void 170 tap_cleanup(struct net_backend *be) 171 { 172 struct tap_priv *priv = (struct tap_priv *)be->opaque; 173 174 if (priv->mevp) { 175 mevent_delete(priv->mevp); 176 } 177 if (be->fd != -1) { 178 close(be->fd); 179 be->fd = -1; 180 } 181 } 182 183 static int 184 tap_init(struct net_backend *be, const char *devname, 185 net_be_rxeof_t cb, void *param) 186 { 187 struct tap_priv *priv = (struct tap_priv *)be->opaque; 188 char tbuf[80]; 189 int opt = 1; 190 #ifndef WITHOUT_CAPSICUM 191 cap_rights_t rights; 192 #endif 193 194 if (cb == NULL) { 195 WPRINTF(("TAP backend requires non-NULL callback\n\r")); 196 return (-1); 197 } 198 199 strcpy(tbuf, "/dev/"); 200 strlcat(tbuf, devname, sizeof(tbuf)); 201 202 be->fd = open(tbuf, O_RDWR); 203 if (be->fd == -1) { 204 WPRINTF(("open of tap device %s failed\n\r", tbuf)); 205 goto error; 206 } 207 208 /* 209 * Set non-blocking and register for read 210 * notifications with the event loop 211 */ 212 if (ioctl(be->fd, FIONBIO, &opt) < 0) { 213 WPRINTF(("tap device O_NONBLOCK failed\n\r")); 214 goto error; 215 } 216 217 #ifndef WITHOUT_CAPSICUM 218 cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); 219 if (caph_rights_limit(be->fd, &rights) == -1) 220 errx(EX_OSERR, "Unable to apply rights for sandbox"); 221 #endif 222 223 priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param); 224 if (priv->mevp == NULL) { 225 WPRINTF(("Could not register event\n\r")); 226 goto error; 227 } 228 229 return (0); 230 231 error: 232 tap_cleanup(be); 233 return (-1); 234 } 235 236 /* 237 * Called to send a buffer chain out to the tap device 238 */ 239 static ssize_t 240 tap_send(struct net_backend *be, struct iovec *iov, int iovcnt) 241 { 242 return (writev(be->fd, iov, iovcnt)); 243 } 244 245 static ssize_t 246 tap_recv(struct net_backend *be, struct iovec *iov, int iovcnt) 247 { 248 ssize_t ret; 249 250 /* Should never be called without a valid tap fd */ 251 assert(be->fd != -1); 252 253 ret = readv(be->fd, iov, iovcnt); 254 255 if (ret < 0 && errno == EWOULDBLOCK) { 256 return (0); 257 } 258 259 return (ret); 260 } 261 262 static void 263 tap_recv_enable(struct net_backend *be) 264 { 265 struct tap_priv *priv = (struct tap_priv *)be->opaque; 266 267 mevent_enable(priv->mevp); 268 } 269 270 static void 271 tap_recv_disable(struct net_backend *be) 272 { 273 struct tap_priv *priv = (struct tap_priv *)be->opaque; 274 275 mevent_disable(priv->mevp); 276 } 277 278 static uint64_t 279 tap_get_cap(struct net_backend *be) 280 { 281 282 return (0); /* no capabilities for now */ 283 } 284 285 static int 286 tap_set_cap(struct net_backend *be, uint64_t features, 287 unsigned vnet_hdr_len) 288 { 289 290 return ((features || vnet_hdr_len) ? -1 : 0); 291 } 292 293 static struct net_backend tap_backend = { 294 .prefix = "tap", 295 .priv_size = sizeof(struct tap_priv), 296 .init = tap_init, 297 .cleanup = tap_cleanup, 298 .send = tap_send, 299 .recv = tap_recv, 300 .recv_enable = tap_recv_enable, 301 .recv_disable = tap_recv_disable, 302 .get_cap = tap_get_cap, 303 .set_cap = tap_set_cap, 304 }; 305 306 /* A clone of the tap backend, with a different prefix. */ 307 static struct net_backend vmnet_backend = { 308 .prefix = "vmnet", 309 .priv_size = sizeof(struct tap_priv), 310 .init = tap_init, 311 .cleanup = tap_cleanup, 312 .send = tap_send, 313 .recv = tap_recv, 314 .recv_enable = tap_recv_enable, 315 .recv_disable = tap_recv_disable, 316 .get_cap = tap_get_cap, 317 .set_cap = tap_set_cap, 318 }; 319 320 DATA_SET(net_backend_set, tap_backend); 321 DATA_SET(net_backend_set, vmnet_backend); 322 323 /* 324 * The netmap backend 325 */ 326 327 /* The virtio-net features supported by netmap. */ 328 #define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \ 329 VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \ 330 VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \ 331 VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO | \ 332 VIRTIO_NET_F_MRG_RXBUF) 333 334 struct netmap_priv { 335 char ifname[IFNAMSIZ]; 336 struct nm_desc *nmd; 337 uint16_t memid; 338 struct netmap_ring *rx; 339 struct netmap_ring *tx; 340 struct mevent *mevp; 341 net_be_rxeof_t cb; 342 void *cb_param; 343 }; 344 345 static void 346 nmreq_init(struct nmreq *req, char *ifname) 347 { 348 349 memset(req, 0, sizeof(*req)); 350 strlcpy(req->nr_name, ifname, sizeof(req->nr_name)); 351 req->nr_version = NETMAP_API; 352 } 353 354 static int 355 netmap_set_vnet_hdr_len(struct net_backend *be, int vnet_hdr_len) 356 { 357 int err; 358 struct nmreq req; 359 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 360 361 nmreq_init(&req, priv->ifname); 362 req.nr_cmd = NETMAP_BDG_VNET_HDR; 363 req.nr_arg1 = vnet_hdr_len; 364 err = ioctl(be->fd, NIOCREGIF, &req); 365 if (err) { 366 WPRINTF(("Unable to set vnet header length %d\n\r", 367 vnet_hdr_len)); 368 return (err); 369 } 370 371 be->be_vnet_hdr_len = vnet_hdr_len; 372 373 return (0); 374 } 375 376 static int 377 netmap_has_vnet_hdr_len(struct net_backend *be, unsigned vnet_hdr_len) 378 { 379 int prev_hdr_len = be->be_vnet_hdr_len; 380 int ret; 381 382 if (vnet_hdr_len == prev_hdr_len) { 383 return (1); 384 } 385 386 ret = netmap_set_vnet_hdr_len(be, vnet_hdr_len); 387 if (ret) { 388 return (0); 389 } 390 391 netmap_set_vnet_hdr_len(be, prev_hdr_len); 392 393 return (1); 394 } 395 396 static uint64_t 397 netmap_get_cap(struct net_backend *be) 398 { 399 400 return (netmap_has_vnet_hdr_len(be, VNET_HDR_LEN) ? 401 NETMAP_FEATURES : 0); 402 } 403 404 static int 405 netmap_set_cap(struct net_backend *be, uint64_t features, 406 unsigned vnet_hdr_len) 407 { 408 409 return (netmap_set_vnet_hdr_len(be, vnet_hdr_len)); 410 } 411 412 static int 413 netmap_init(struct net_backend *be, const char *devname, 414 net_be_rxeof_t cb, void *param) 415 { 416 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 417 418 strlcpy(priv->ifname, devname, sizeof(priv->ifname)); 419 priv->ifname[sizeof(priv->ifname) - 1] = '\0'; 420 421 priv->nmd = nm_open(priv->ifname, NULL, NETMAP_NO_TX_POLL, NULL); 422 if (priv->nmd == NULL) { 423 WPRINTF(("Unable to nm_open(): interface '%s', errno (%s)\n\r", 424 devname, strerror(errno))); 425 free(priv); 426 return (-1); 427 } 428 429 priv->memid = priv->nmd->req.nr_arg2; 430 priv->tx = NETMAP_TXRING(priv->nmd->nifp, 0); 431 priv->rx = NETMAP_RXRING(priv->nmd->nifp, 0); 432 priv->cb = cb; 433 priv->cb_param = param; 434 be->fd = priv->nmd->fd; 435 436 priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param); 437 if (priv->mevp == NULL) { 438 WPRINTF(("Could not register event\n\r")); 439 return (-1); 440 } 441 442 return (0); 443 } 444 445 static void 446 netmap_cleanup(struct net_backend *be) 447 { 448 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 449 450 if (priv->mevp) { 451 mevent_delete(priv->mevp); 452 } 453 if (priv->nmd) { 454 nm_close(priv->nmd); 455 } 456 be->fd = -1; 457 } 458 459 static ssize_t 460 netmap_send(struct net_backend *be, struct iovec *iov, 461 int iovcnt) 462 { 463 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 464 struct netmap_ring *ring; 465 ssize_t totlen = 0; 466 int nm_buf_size; 467 int nm_buf_len; 468 uint32_t head; 469 void *nm_buf; 470 int j; 471 472 ring = priv->tx; 473 head = ring->head; 474 if (head == ring->tail) { 475 WPRINTF(("No space, drop %zu bytes\n\r", count_iov(iov, iovcnt))); 476 goto txsync; 477 } 478 nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx); 479 nm_buf_size = ring->nr_buf_size; 480 nm_buf_len = 0; 481 482 for (j = 0; j < iovcnt; j++) { 483 int iov_frag_size = iov[j].iov_len; 484 void *iov_frag_buf = iov[j].iov_base; 485 486 totlen += iov_frag_size; 487 488 /* 489 * Split each iovec fragment over more netmap slots, if 490 * necessary. 491 */ 492 for (;;) { 493 int copylen; 494 495 copylen = iov_frag_size < nm_buf_size ? iov_frag_size : nm_buf_size; 496 memcpy(nm_buf, iov_frag_buf, copylen); 497 498 iov_frag_buf += copylen; 499 iov_frag_size -= copylen; 500 nm_buf += copylen; 501 nm_buf_size -= copylen; 502 nm_buf_len += copylen; 503 504 if (iov_frag_size == 0) { 505 break; 506 } 507 508 ring->slot[head].len = nm_buf_len; 509 ring->slot[head].flags = NS_MOREFRAG; 510 head = nm_ring_next(ring, head); 511 if (head == ring->tail) { 512 /* 513 * We ran out of netmap slots while 514 * splitting the iovec fragments. 515 */ 516 WPRINTF(("No space, drop %zu bytes\n\r", 517 count_iov(iov, iovcnt))); 518 goto txsync; 519 } 520 nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx); 521 nm_buf_size = ring->nr_buf_size; 522 nm_buf_len = 0; 523 } 524 } 525 526 /* Complete the last slot, which must not have NS_MOREFRAG set. */ 527 ring->slot[head].len = nm_buf_len; 528 ring->slot[head].flags = 0; 529 head = nm_ring_next(ring, head); 530 531 /* Now update ring->head and ring->cur. */ 532 ring->head = ring->cur = head; 533 txsync: 534 ioctl(be->fd, NIOCTXSYNC, NULL); 535 536 return (totlen); 537 } 538 539 static ssize_t 540 netmap_recv(struct net_backend *be, struct iovec *iov, int iovcnt) 541 { 542 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 543 struct netmap_slot *slot = NULL; 544 struct netmap_ring *ring; 545 void *iov_frag_buf; 546 int iov_frag_size; 547 ssize_t totlen = 0; 548 uint32_t head; 549 550 assert(iovcnt); 551 552 ring = priv->rx; 553 head = ring->head; 554 iov_frag_buf = iov->iov_base; 555 iov_frag_size = iov->iov_len; 556 557 do { 558 int nm_buf_len; 559 void *nm_buf; 560 561 if (head == ring->tail) { 562 return (0); 563 } 564 565 slot = ring->slot + head; 566 nm_buf = NETMAP_BUF(ring, slot->buf_idx); 567 nm_buf_len = slot->len; 568 569 for (;;) { 570 int copylen = nm_buf_len < iov_frag_size ? 571 nm_buf_len : iov_frag_size; 572 573 memcpy(iov_frag_buf, nm_buf, copylen); 574 nm_buf += copylen; 575 nm_buf_len -= copylen; 576 iov_frag_buf += copylen; 577 iov_frag_size -= copylen; 578 totlen += copylen; 579 580 if (nm_buf_len == 0) { 581 break; 582 } 583 584 iov++; 585 iovcnt--; 586 if (iovcnt == 0) { 587 /* No space to receive. */ 588 WPRINTF(("Short iov, drop %zd bytes\n\r", 589 totlen)); 590 return (-ENOSPC); 591 } 592 iov_frag_buf = iov->iov_base; 593 iov_frag_size = iov->iov_len; 594 } 595 596 head = nm_ring_next(ring, head); 597 598 } while (slot->flags & NS_MOREFRAG); 599 600 /* Release slots to netmap. */ 601 ring->head = ring->cur = head; 602 603 return (totlen); 604 } 605 606 static void 607 netmap_recv_enable(struct net_backend *be) 608 { 609 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 610 611 mevent_enable(priv->mevp); 612 } 613 614 static void 615 netmap_recv_disable(struct net_backend *be) 616 { 617 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 618 619 mevent_disable(priv->mevp); 620 } 621 622 static struct net_backend netmap_backend = { 623 .prefix = "netmap", 624 .priv_size = sizeof(struct netmap_priv), 625 .init = netmap_init, 626 .cleanup = netmap_cleanup, 627 .send = netmap_send, 628 .recv = netmap_recv, 629 .recv_enable = netmap_recv_enable, 630 .recv_disable = netmap_recv_disable, 631 .get_cap = netmap_get_cap, 632 .set_cap = netmap_set_cap, 633 }; 634 635 /* A clone of the netmap backend, with a different prefix. */ 636 static struct net_backend vale_backend = { 637 .prefix = "vale", 638 .priv_size = sizeof(struct netmap_priv), 639 .init = netmap_init, 640 .cleanup = netmap_cleanup, 641 .send = netmap_send, 642 .recv = netmap_recv, 643 .recv_enable = netmap_recv_enable, 644 .recv_disable = netmap_recv_disable, 645 .get_cap = netmap_get_cap, 646 .set_cap = netmap_set_cap, 647 }; 648 649 DATA_SET(net_backend_set, netmap_backend); 650 DATA_SET(net_backend_set, vale_backend); 651 652 /* 653 * Initialize a backend and attach to the frontend. 654 * This is called during frontend initialization. 655 * @pbe is a pointer to the backend to be initialized 656 * @devname is the backend-name as supplied on the command line, 657 * e.g. -s 2:0,frontend-name,backend-name[,other-args] 658 * @cb is the receive callback supplied by the frontend, 659 * and it is invoked in the event loop when a receive 660 * event is generated in the hypervisor, 661 * @param is a pointer to the frontend, and normally used as 662 * the argument for the callback. 663 */ 664 int 665 netbe_init(struct net_backend **ret, const char *devname, net_be_rxeof_t cb, 666 void *param) 667 { 668 struct net_backend **pbe, *nbe, *tbe = NULL; 669 int err; 670 671 /* 672 * Find the network backend that matches the user-provided 673 * device name. net_backend_set is built using a linker set. 674 */ 675 SET_FOREACH(pbe, net_backend_set) { 676 if (strncmp(devname, (*pbe)->prefix, 677 strlen((*pbe)->prefix)) == 0) { 678 tbe = *pbe; 679 assert(tbe->init != NULL); 680 assert(tbe->cleanup != NULL); 681 assert(tbe->send != NULL); 682 assert(tbe->recv != NULL); 683 assert(tbe->get_cap != NULL); 684 assert(tbe->set_cap != NULL); 685 break; 686 } 687 } 688 689 *ret = NULL; 690 if (tbe == NULL) 691 return (EINVAL); 692 nbe = calloc(1, sizeof(*nbe) + tbe->priv_size); 693 *nbe = *tbe; /* copy the template */ 694 nbe->fd = -1; 695 nbe->sc = param; 696 nbe->be_vnet_hdr_len = 0; 697 nbe->fe_vnet_hdr_len = 0; 698 699 /* Initialize the backend. */ 700 err = nbe->init(nbe, devname, cb, param); 701 if (err) { 702 free(nbe); 703 return (err); 704 } 705 706 *ret = nbe; 707 708 return (0); 709 } 710 711 void 712 netbe_cleanup(struct net_backend *be) 713 { 714 715 if (be != NULL) { 716 be->cleanup(be); 717 free(be); 718 } 719 } 720 721 uint64_t 722 netbe_get_cap(struct net_backend *be) 723 { 724 725 assert(be != NULL); 726 return (be->get_cap(be)); 727 } 728 729 int 730 netbe_set_cap(struct net_backend *be, uint64_t features, 731 unsigned vnet_hdr_len) 732 { 733 int ret; 734 735 assert(be != NULL); 736 737 /* There are only three valid lengths, i.e., 0, 10 and 12. */ 738 if (vnet_hdr_len && vnet_hdr_len != VNET_HDR_LEN 739 && vnet_hdr_len != (VNET_HDR_LEN - sizeof(uint16_t))) 740 return (-1); 741 742 be->fe_vnet_hdr_len = vnet_hdr_len; 743 744 ret = be->set_cap(be, features, vnet_hdr_len); 745 assert(be->be_vnet_hdr_len == 0 || 746 be->be_vnet_hdr_len == be->fe_vnet_hdr_len); 747 748 return (ret); 749 } 750 751 static __inline struct iovec * 752 iov_trim(struct iovec *iov, int *iovcnt, unsigned int tlen) 753 { 754 struct iovec *riov; 755 756 /* XXX short-cut: assume first segment is >= tlen */ 757 assert(iov[0].iov_len >= tlen); 758 759 iov[0].iov_len -= tlen; 760 if (iov[0].iov_len == 0) { 761 assert(*iovcnt > 1); 762 *iovcnt -= 1; 763 riov = &iov[1]; 764 } else { 765 iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + tlen); 766 riov = &iov[0]; 767 } 768 769 return (riov); 770 } 771 772 ssize_t 773 netbe_send(struct net_backend *be, struct iovec *iov, int iovcnt) 774 { 775 776 assert(be != NULL); 777 if (be->be_vnet_hdr_len != be->fe_vnet_hdr_len) { 778 /* 779 * The frontend uses a virtio-net header, but the backend 780 * does not. We ignore it (as it must be all zeroes) and 781 * strip it. 782 */ 783 assert(be->be_vnet_hdr_len == 0); 784 iov = iov_trim(iov, &iovcnt, be->fe_vnet_hdr_len); 785 } 786 787 return (be->send(be, iov, iovcnt)); 788 } 789 790 /* 791 * Try to read a packet from the backend, without blocking. 792 * If no packets are available, return 0. In case of success, return 793 * the length of the packet just read. Return -1 in case of errors. 794 */ 795 ssize_t 796 netbe_recv(struct net_backend *be, struct iovec *iov, int iovcnt) 797 { 798 /* Length of prepended virtio-net header. */ 799 unsigned int hlen = be->fe_vnet_hdr_len; 800 int ret; 801 802 assert(be != NULL); 803 804 if (hlen && hlen != be->be_vnet_hdr_len) { 805 /* 806 * The frontend uses a virtio-net header, but the backend 807 * does not. We need to prepend a zeroed header. 808 */ 809 struct virtio_net_rxhdr *vh; 810 811 assert(be->be_vnet_hdr_len == 0); 812 813 /* 814 * Get a pointer to the rx header, and use the 815 * data immediately following it for the packet buffer. 816 */ 817 vh = iov[0].iov_base; 818 iov = iov_trim(iov, &iovcnt, hlen); 819 820 /* 821 * The only valid field in the rx packet header is the 822 * number of buffers if merged rx bufs were negotiated. 823 */ 824 memset(vh, 0, hlen); 825 if (hlen == VNET_HDR_LEN) { 826 vh->vrh_bufs = 1; 827 } 828 } 829 830 ret = be->recv(be, iov, iovcnt); 831 if (ret > 0) { 832 ret += hlen; 833 } 834 835 return (ret); 836 } 837 838 /* 839 * Read a packet from the backend and discard it. 840 * Returns the size of the discarded packet or zero if no packet was available. 841 * A negative error code is returned in case of read error. 842 */ 843 ssize_t 844 netbe_rx_discard(struct net_backend *be) 845 { 846 /* 847 * MP note: the dummybuf is only used to discard frames, 848 * so there is no need for it to be per-vtnet or locked. 849 * We only make it large enough for TSO-sized segment. 850 */ 851 static uint8_t dummybuf[65536 + 64]; 852 struct iovec iov; 853 854 iov.iov_base = dummybuf; 855 iov.iov_len = sizeof(dummybuf); 856 857 return netbe_recv(be, &iov, 1); 858 } 859 860 void 861 netbe_rx_disable(struct net_backend *be) 862 { 863 864 return be->recv_disable(be); 865 } 866 867 void 868 netbe_rx_enable(struct net_backend *be) 869 { 870 871 return be->recv_enable(be); 872 } 873