1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2019 Vincenzo Maffione <vmaffione@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS 19 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, 20 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 21 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 24 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 25 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 * 27 * $FreeBSD$ 28 */ 29 30 /* 31 * This file implements multiple network backends (tap, netmap, ...), 32 * to be used by network frontends such as virtio-net and e1000. 33 * The API to access the backend (e.g. send/receive packets, negotiate 34 * features) is exported by net_backends.h. 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include <sys/types.h> /* u_short etc */ 41 #ifndef WITHOUT_CAPSICUM 42 #include <sys/capsicum.h> 43 #endif 44 #include <sys/ioctl.h> 45 #include <sys/mman.h> 46 #include <sys/uio.h> 47 48 #include <net/if.h> 49 #include <net/netmap.h> 50 #include <net/netmap_virt.h> 51 #define NETMAP_WITH_LIBS 52 #include <net/netmap_user.h> 53 54 #ifndef WITHOUT_CAPSICUM 55 #include <capsicum_helpers.h> 56 #endif 57 #include <err.h> 58 #include <errno.h> 59 #include <fcntl.h> 60 #include <stdio.h> 61 #include <stdlib.h> 62 #include <stdint.h> 63 #include <string.h> 64 #include <unistd.h> 65 #include <sysexits.h> 66 #include <assert.h> 67 #include <pthread.h> 68 #include <pthread_np.h> 69 #include <poll.h> 70 #include <assert.h> 71 72 73 #include "iov.h" 74 #include "mevent.h" 75 #include "net_backends.h" 76 77 #include <sys/linker_set.h> 78 79 /* 80 * Each network backend registers a set of function pointers that are 81 * used to implement the net backends API. 82 * This might need to be exposed if we implement backends in separate files. 83 */ 84 struct net_backend { 85 const char *prefix; /* prefix matching this backend */ 86 87 /* 88 * Routines used to initialize and cleanup the resources needed 89 * by a backend. The cleanup function is used internally, 90 * and should not be called by the frontend. 91 */ 92 int (*init)(struct net_backend *be, const char *devname, 93 net_be_rxeof_t cb, void *param); 94 void (*cleanup)(struct net_backend *be); 95 96 /* 97 * Called to serve a guest transmit request. The scatter-gather 98 * vector provided by the caller has 'iovcnt' elements and contains 99 * the packet to send. 100 */ 101 ssize_t (*send)(struct net_backend *be, struct iovec *iov, int iovcnt); 102 103 /* 104 * Called to receive a packet from the backend. When the function 105 * returns a positive value 'len', the scatter-gather vector 106 * provided by the caller contains a packet with such length. 107 * The function returns 0 if the backend doesn't have a new packet to 108 * receive. 109 */ 110 ssize_t (*recv)(struct net_backend *be, struct iovec *iov, int iovcnt); 111 112 /* 113 * Ask the backend for the virtio-net features it is able to 114 * support. Possible features are TSO, UFO and checksum offloading 115 * in both rx and tx direction and for both IPv4 and IPv6. 116 */ 117 uint64_t (*get_cap)(struct net_backend *be); 118 119 /* 120 * Tell the backend to enable/disable the specified virtio-net 121 * features (capabilities). 122 */ 123 int (*set_cap)(struct net_backend *be, uint64_t features, 124 unsigned int vnet_hdr_len); 125 126 struct pci_vtnet_softc *sc; 127 int fd; 128 129 /* 130 * Length of the virtio-net header used by the backend and the 131 * frontend, respectively. A zero value means that the header 132 * is not used. 133 */ 134 unsigned int be_vnet_hdr_len; 135 unsigned int fe_vnet_hdr_len; 136 137 /* Size of backend-specific private data. */ 138 size_t priv_size; 139 140 /* Room for backend-specific data. */ 141 char opaque[0]; 142 }; 143 144 SET_DECLARE(net_backend_set, struct net_backend); 145 146 #define VNET_HDR_LEN sizeof(struct virtio_net_rxhdr) 147 148 #define WPRINTF(params) printf params 149 150 /* 151 * The tap backend 152 */ 153 154 struct tap_priv { 155 struct mevent *mevp; 156 }; 157 158 static void 159 tap_cleanup(struct net_backend *be) 160 { 161 struct tap_priv *priv = (struct tap_priv *)be->opaque; 162 163 if (priv->mevp) { 164 mevent_delete(priv->mevp); 165 } 166 if (be->fd != -1) { 167 close(be->fd); 168 be->fd = -1; 169 } 170 } 171 172 static int 173 tap_init(struct net_backend *be, const char *devname, 174 net_be_rxeof_t cb, void *param) 175 { 176 struct tap_priv *priv = (struct tap_priv *)be->opaque; 177 char tbuf[80]; 178 int opt = 1; 179 #ifndef WITHOUT_CAPSICUM 180 cap_rights_t rights; 181 #endif 182 183 if (cb == NULL) { 184 WPRINTF(("TAP backend requires non-NULL callback\n")); 185 return (-1); 186 } 187 188 strcpy(tbuf, "/dev/"); 189 strlcat(tbuf, devname, sizeof(tbuf)); 190 191 be->fd = open(tbuf, O_RDWR); 192 if (be->fd == -1) { 193 WPRINTF(("open of tap device %s failed\n", tbuf)); 194 goto error; 195 } 196 197 /* 198 * Set non-blocking and register for read 199 * notifications with the event loop 200 */ 201 if (ioctl(be->fd, FIONBIO, &opt) < 0) { 202 WPRINTF(("tap device O_NONBLOCK failed\n")); 203 goto error; 204 } 205 206 #ifndef WITHOUT_CAPSICUM 207 cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); 208 if (caph_rights_limit(be->fd, &rights) == -1) 209 errx(EX_OSERR, "Unable to apply rights for sandbox"); 210 #endif 211 212 priv->mevp = mevent_add(be->fd, EVF_READ, cb, param); 213 if (priv->mevp == NULL) { 214 WPRINTF(("Could not register event\n")); 215 goto error; 216 } 217 218 return (0); 219 220 error: 221 tap_cleanup(be); 222 return (-1); 223 } 224 225 /* 226 * Called to send a buffer chain out to the tap device 227 */ 228 static ssize_t 229 tap_send(struct net_backend *be, struct iovec *iov, int iovcnt) 230 { 231 return (writev(be->fd, iov, iovcnt)); 232 } 233 234 static ssize_t 235 tap_recv(struct net_backend *be, struct iovec *iov, int iovcnt) 236 { 237 ssize_t ret; 238 239 /* Should never be called without a valid tap fd */ 240 assert(be->fd != -1); 241 242 ret = readv(be->fd, iov, iovcnt); 243 244 if (ret < 0 && errno == EWOULDBLOCK) { 245 return (0); 246 } 247 248 return (ret); 249 } 250 251 static uint64_t 252 tap_get_cap(struct net_backend *be) 253 { 254 255 return (0); /* no capabilities for now */ 256 } 257 258 static int 259 tap_set_cap(struct net_backend *be, uint64_t features, 260 unsigned vnet_hdr_len) 261 { 262 263 return ((features || vnet_hdr_len) ? -1 : 0); 264 } 265 266 static struct net_backend tap_backend = { 267 .prefix = "tap", 268 .priv_size = sizeof(struct tap_priv), 269 .init = tap_init, 270 .cleanup = tap_cleanup, 271 .send = tap_send, 272 .recv = tap_recv, 273 .get_cap = tap_get_cap, 274 .set_cap = tap_set_cap, 275 }; 276 277 /* A clone of the tap backend, with a different prefix. */ 278 static struct net_backend vmnet_backend = { 279 .prefix = "vmnet", 280 .priv_size = sizeof(struct tap_priv), 281 .init = tap_init, 282 .cleanup = tap_cleanup, 283 .send = tap_send, 284 .recv = tap_recv, 285 .get_cap = tap_get_cap, 286 .set_cap = tap_set_cap, 287 }; 288 289 DATA_SET(net_backend_set, tap_backend); 290 DATA_SET(net_backend_set, vmnet_backend); 291 292 /* 293 * The netmap backend 294 */ 295 296 /* The virtio-net features supported by netmap. */ 297 #define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \ 298 VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \ 299 VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \ 300 VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO) 301 302 struct netmap_priv { 303 char ifname[IFNAMSIZ]; 304 struct nm_desc *nmd; 305 uint16_t memid; 306 struct netmap_ring *rx; 307 struct netmap_ring *tx; 308 struct mevent *mevp; 309 net_be_rxeof_t cb; 310 void *cb_param; 311 }; 312 313 static void 314 nmreq_init(struct nmreq *req, char *ifname) 315 { 316 317 memset(req, 0, sizeof(*req)); 318 strlcpy(req->nr_name, ifname, sizeof(req->nr_name)); 319 req->nr_version = NETMAP_API; 320 } 321 322 static int 323 netmap_set_vnet_hdr_len(struct net_backend *be, int vnet_hdr_len) 324 { 325 int err; 326 struct nmreq req; 327 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 328 329 nmreq_init(&req, priv->ifname); 330 req.nr_cmd = NETMAP_BDG_VNET_HDR; 331 req.nr_arg1 = vnet_hdr_len; 332 err = ioctl(be->fd, NIOCREGIF, &req); 333 if (err) { 334 WPRINTF(("Unable to set vnet header length %d\n", 335 vnet_hdr_len)); 336 return (err); 337 } 338 339 be->be_vnet_hdr_len = vnet_hdr_len; 340 341 return (0); 342 } 343 344 static int 345 netmap_has_vnet_hdr_len(struct net_backend *be, unsigned vnet_hdr_len) 346 { 347 int prev_hdr_len = be->be_vnet_hdr_len; 348 int ret; 349 350 if (vnet_hdr_len == prev_hdr_len) { 351 return (1); 352 } 353 354 ret = netmap_set_vnet_hdr_len(be, vnet_hdr_len); 355 if (ret) { 356 return (0); 357 } 358 359 netmap_set_vnet_hdr_len(be, prev_hdr_len); 360 361 return (1); 362 } 363 364 static uint64_t 365 netmap_get_cap(struct net_backend *be) 366 { 367 368 return (netmap_has_vnet_hdr_len(be, VNET_HDR_LEN) ? 369 NETMAP_FEATURES : 0); 370 } 371 372 static int 373 netmap_set_cap(struct net_backend *be, uint64_t features, 374 unsigned vnet_hdr_len) 375 { 376 377 return (netmap_set_vnet_hdr_len(be, vnet_hdr_len)); 378 } 379 380 static int 381 netmap_init(struct net_backend *be, const char *devname, 382 net_be_rxeof_t cb, void *param) 383 { 384 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 385 386 strlcpy(priv->ifname, devname, sizeof(priv->ifname)); 387 priv->ifname[sizeof(priv->ifname) - 1] = '\0'; 388 389 priv->nmd = nm_open(priv->ifname, NULL, NETMAP_NO_TX_POLL, NULL); 390 if (priv->nmd == NULL) { 391 WPRINTF(("Unable to nm_open(): interface '%s', errno (%s)\n", 392 devname, strerror(errno))); 393 free(priv); 394 return (-1); 395 } 396 397 priv->memid = priv->nmd->req.nr_arg2; 398 priv->tx = NETMAP_TXRING(priv->nmd->nifp, 0); 399 priv->rx = NETMAP_RXRING(priv->nmd->nifp, 0); 400 priv->cb = cb; 401 priv->cb_param = param; 402 be->fd = priv->nmd->fd; 403 404 priv->mevp = mevent_add(be->fd, EVF_READ, cb, param); 405 if (priv->mevp == NULL) { 406 WPRINTF(("Could not register event\n")); 407 return (-1); 408 } 409 410 return (0); 411 } 412 413 static void 414 netmap_cleanup(struct net_backend *be) 415 { 416 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 417 418 if (priv->mevp) { 419 mevent_delete(priv->mevp); 420 } 421 if (priv->nmd) { 422 nm_close(priv->nmd); 423 } 424 be->fd = -1; 425 } 426 427 static ssize_t 428 netmap_send(struct net_backend *be, struct iovec *iov, 429 int iovcnt) 430 { 431 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 432 struct netmap_ring *ring; 433 ssize_t totlen = 0; 434 int nm_buf_size; 435 int nm_buf_len; 436 uint32_t head; 437 void *nm_buf; 438 int j; 439 440 ring = priv->tx; 441 head = ring->head; 442 if (head == ring->tail) { 443 WPRINTF(("No space, drop %zu bytes\n", count_iov(iov, iovcnt))); 444 goto txsync; 445 } 446 nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx); 447 nm_buf_size = ring->nr_buf_size; 448 nm_buf_len = 0; 449 450 for (j = 0; j < iovcnt; j++) { 451 int iov_frag_size = iov[j].iov_len; 452 void *iov_frag_buf = iov[j].iov_base; 453 454 totlen += iov_frag_size; 455 456 /* 457 * Split each iovec fragment over more netmap slots, if 458 * necessary. 459 */ 460 for (;;) { 461 int copylen; 462 463 copylen = iov_frag_size < nm_buf_size ? iov_frag_size : nm_buf_size; 464 memcpy(nm_buf, iov_frag_buf, copylen); 465 466 iov_frag_buf += copylen; 467 iov_frag_size -= copylen; 468 nm_buf += copylen; 469 nm_buf_size -= copylen; 470 nm_buf_len += copylen; 471 472 if (iov_frag_size == 0) { 473 break; 474 } 475 476 ring->slot[head].len = nm_buf_len; 477 ring->slot[head].flags = NS_MOREFRAG; 478 head = nm_ring_next(ring, head); 479 if (head == ring->tail) { 480 /* 481 * We ran out of netmap slots while 482 * splitting the iovec fragments. 483 */ 484 WPRINTF(("No space, drop %zu bytes\n", 485 count_iov(iov, iovcnt))); 486 goto txsync; 487 } 488 nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx); 489 nm_buf_size = ring->nr_buf_size; 490 nm_buf_len = 0; 491 } 492 } 493 494 /* Complete the last slot, which must not have NS_MOREFRAG set. */ 495 ring->slot[head].len = nm_buf_len; 496 ring->slot[head].flags = 0; 497 head = nm_ring_next(ring, head); 498 499 /* Now update ring->head and ring->cur. */ 500 ring->head = ring->cur = head; 501 txsync: 502 ioctl(be->fd, NIOCTXSYNC, NULL); 503 504 return (totlen); 505 } 506 507 static ssize_t 508 netmap_recv(struct net_backend *be, struct iovec *iov, int iovcnt) 509 { 510 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 511 struct netmap_slot *slot = NULL; 512 struct netmap_ring *ring; 513 void *iov_frag_buf; 514 int iov_frag_size; 515 ssize_t totlen = 0; 516 uint32_t head; 517 518 assert(iovcnt); 519 520 ring = priv->rx; 521 head = ring->head; 522 iov_frag_buf = iov->iov_base; 523 iov_frag_size = iov->iov_len; 524 525 do { 526 int nm_buf_len; 527 void *nm_buf; 528 529 if (head == ring->tail) { 530 return (0); 531 } 532 533 slot = ring->slot + head; 534 nm_buf = NETMAP_BUF(ring, slot->buf_idx); 535 nm_buf_len = slot->len; 536 537 for (;;) { 538 int copylen = nm_buf_len < iov_frag_size ? 539 nm_buf_len : iov_frag_size; 540 541 memcpy(iov_frag_buf, nm_buf, copylen); 542 nm_buf += copylen; 543 nm_buf_len -= copylen; 544 iov_frag_buf += copylen; 545 iov_frag_size -= copylen; 546 totlen += copylen; 547 548 if (nm_buf_len == 0) { 549 break; 550 } 551 552 iov++; 553 iovcnt--; 554 if (iovcnt == 0) { 555 /* No space to receive. */ 556 WPRINTF(("Short iov, drop %zd bytes\n", 557 totlen)); 558 return (-ENOSPC); 559 } 560 iov_frag_buf = iov->iov_base; 561 iov_frag_size = iov->iov_len; 562 } 563 564 head = nm_ring_next(ring, head); 565 566 } while (slot->flags & NS_MOREFRAG); 567 568 /* Release slots to netmap. */ 569 ring->head = ring->cur = head; 570 571 return (totlen); 572 } 573 574 static struct net_backend netmap_backend = { 575 .prefix = "netmap", 576 .priv_size = sizeof(struct netmap_priv), 577 .init = netmap_init, 578 .cleanup = netmap_cleanup, 579 .send = netmap_send, 580 .recv = netmap_recv, 581 .get_cap = netmap_get_cap, 582 .set_cap = netmap_set_cap, 583 }; 584 585 /* A clone of the netmap backend, with a different prefix. */ 586 static struct net_backend vale_backend = { 587 .prefix = "vale", 588 .priv_size = sizeof(struct netmap_priv), 589 .init = netmap_init, 590 .cleanup = netmap_cleanup, 591 .send = netmap_send, 592 .recv = netmap_recv, 593 .get_cap = netmap_get_cap, 594 .set_cap = netmap_set_cap, 595 }; 596 597 DATA_SET(net_backend_set, netmap_backend); 598 DATA_SET(net_backend_set, vale_backend); 599 600 /* 601 * Initialize a backend and attach to the frontend. 602 * This is called during frontend initialization. 603 * @pbe is a pointer to the backend to be initialized 604 * @devname is the backend-name as supplied on the command line, 605 * e.g. -s 2:0,frontend-name,backend-name[,other-args] 606 * @cb is the receive callback supplied by the frontend, 607 * and it is invoked in the event loop when a receive 608 * event is generated in the hypervisor, 609 * @param is a pointer to the frontend, and normally used as 610 * the argument for the callback. 611 */ 612 int 613 netbe_init(struct net_backend **ret, const char *devname, net_be_rxeof_t cb, 614 void *param) 615 { 616 struct net_backend **pbe, *nbe, *tbe = NULL; 617 int err; 618 619 /* 620 * Find the network backend that matches the user-provided 621 * device name. net_backend_set is built using a linker set. 622 */ 623 SET_FOREACH(pbe, net_backend_set) { 624 if (strncmp(devname, (*pbe)->prefix, 625 strlen((*pbe)->prefix)) == 0) { 626 tbe = *pbe; 627 assert(tbe->init != NULL); 628 assert(tbe->cleanup != NULL); 629 assert(tbe->send != NULL); 630 assert(tbe->recv != NULL); 631 assert(tbe->get_cap != NULL); 632 assert(tbe->set_cap != NULL); 633 break; 634 } 635 } 636 637 *ret = NULL; 638 if (tbe == NULL) 639 return (EINVAL); 640 nbe = calloc(1, sizeof(*nbe) + tbe->priv_size); 641 *nbe = *tbe; /* copy the template */ 642 nbe->fd = -1; 643 nbe->sc = param; 644 nbe->be_vnet_hdr_len = 0; 645 nbe->fe_vnet_hdr_len = 0; 646 647 /* Initialize the backend. */ 648 err = nbe->init(nbe, devname, cb, param); 649 if (err) { 650 free(nbe); 651 return (err); 652 } 653 654 *ret = nbe; 655 656 return (0); 657 } 658 659 void 660 netbe_cleanup(struct net_backend *be) 661 { 662 663 if (be != NULL) { 664 be->cleanup(be); 665 free(be); 666 } 667 } 668 669 uint64_t 670 netbe_get_cap(struct net_backend *be) 671 { 672 673 assert(be != NULL); 674 return (be->get_cap(be)); 675 } 676 677 int 678 netbe_set_cap(struct net_backend *be, uint64_t features, 679 unsigned vnet_hdr_len) 680 { 681 int ret; 682 683 assert(be != NULL); 684 685 /* There are only three valid lengths, i.e., 0, 10 and 12. */ 686 if (vnet_hdr_len && vnet_hdr_len != VNET_HDR_LEN 687 && vnet_hdr_len != (VNET_HDR_LEN - sizeof(uint16_t))) 688 return (-1); 689 690 be->fe_vnet_hdr_len = vnet_hdr_len; 691 692 ret = be->set_cap(be, features, vnet_hdr_len); 693 assert(be->be_vnet_hdr_len == 0 || 694 be->be_vnet_hdr_len == be->fe_vnet_hdr_len); 695 696 return (ret); 697 } 698 699 static __inline struct iovec * 700 iov_trim(struct iovec *iov, int *iovcnt, unsigned int tlen) 701 { 702 struct iovec *riov; 703 704 /* XXX short-cut: assume first segment is >= tlen */ 705 assert(iov[0].iov_len >= tlen); 706 707 iov[0].iov_len -= tlen; 708 if (iov[0].iov_len == 0) { 709 assert(*iovcnt > 1); 710 *iovcnt -= 1; 711 riov = &iov[1]; 712 } else { 713 iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + tlen); 714 riov = &iov[0]; 715 } 716 717 return (riov); 718 } 719 720 ssize_t 721 netbe_send(struct net_backend *be, struct iovec *iov, int iovcnt) 722 { 723 724 assert(be != NULL); 725 if (be->be_vnet_hdr_len != be->fe_vnet_hdr_len) { 726 /* 727 * The frontend uses a virtio-net header, but the backend 728 * does not. We ignore it (as it must be all zeroes) and 729 * strip it. 730 */ 731 assert(be->be_vnet_hdr_len == 0); 732 iov = iov_trim(iov, &iovcnt, be->fe_vnet_hdr_len); 733 } 734 735 return (be->send(be, iov, iovcnt)); 736 } 737 738 /* 739 * Try to read a packet from the backend, without blocking. 740 * If no packets are available, return 0. In case of success, return 741 * the length of the packet just read. Return -1 in case of errors. 742 */ 743 ssize_t 744 netbe_recv(struct net_backend *be, struct iovec *iov, int iovcnt) 745 { 746 /* Length of prepended virtio-net header. */ 747 unsigned int hlen = be->fe_vnet_hdr_len; 748 int ret; 749 750 assert(be != NULL); 751 752 if (hlen && hlen != be->be_vnet_hdr_len) { 753 /* 754 * The frontend uses a virtio-net header, but the backend 755 * does not. We need to prepend a zeroed header. 756 */ 757 struct virtio_net_rxhdr *vh; 758 759 assert(be->be_vnet_hdr_len == 0); 760 761 /* 762 * Get a pointer to the rx header, and use the 763 * data immediately following it for the packet buffer. 764 */ 765 vh = iov[0].iov_base; 766 iov = iov_trim(iov, &iovcnt, hlen); 767 768 /* 769 * The only valid field in the rx packet header is the 770 * number of buffers if merged rx bufs were negotiated. 771 */ 772 memset(vh, 0, hlen); 773 if (hlen == VNET_HDR_LEN) { 774 vh->vrh_bufs = 1; 775 } 776 } 777 778 ret = be->recv(be, iov, iovcnt); 779 if (ret > 0) { 780 ret += hlen; 781 } 782 783 return (ret); 784 } 785 786 /* 787 * Read a packet from the backend and discard it. 788 * Returns the size of the discarded packet or zero if no packet was available. 789 * A negative error code is returned in case of read error. 790 */ 791 ssize_t 792 netbe_rx_discard(struct net_backend *be) 793 { 794 /* 795 * MP note: the dummybuf is only used to discard frames, 796 * so there is no need for it to be per-vtnet or locked. 797 * We only make it large enough for TSO-sized segment. 798 */ 799 static uint8_t dummybuf[65536 + 64]; 800 struct iovec iov; 801 802 iov.iov_base = dummybuf; 803 iov.iov_len = sizeof(dummybuf); 804 805 return netbe_recv(be, &iov, 1); 806 } 807 808