1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2019 Vincenzo Maffione <vmaffione@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS 19 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, 20 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 21 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 24 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 25 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 * 27 * $FreeBSD$ 28 */ 29 30 /* 31 * This file implements multiple network backends (tap, netmap, ...), 32 * to be used by network frontends such as virtio-net and e1000. 33 * The API to access the backend (e.g. send/receive packets, negotiate 34 * features) is exported by net_backends.h. 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include <sys/types.h> /* u_short etc */ 41 #ifndef WITHOUT_CAPSICUM 42 #include <sys/capsicum.h> 43 #endif 44 #include <sys/ioctl.h> 45 #include <sys/mman.h> 46 #include <sys/uio.h> 47 48 #include <net/if.h> 49 #include <net/netmap.h> 50 #include <net/netmap_virt.h> 51 #define NETMAP_WITH_LIBS 52 #include <net/netmap_user.h> 53 54 #ifndef WITHOUT_CAPSICUM 55 #include <capsicum_helpers.h> 56 #endif 57 #include <err.h> 58 #include <errno.h> 59 #include <fcntl.h> 60 #include <stdio.h> 61 #include <stdlib.h> 62 #include <stdint.h> 63 #include <string.h> 64 #include <unistd.h> 65 #include <sysexits.h> 66 #include <assert.h> 67 #include <pthread.h> 68 #include <pthread_np.h> 69 #include <poll.h> 70 #include <assert.h> 71 72 73 #include "debug.h" 74 #include "iov.h" 75 #include "mevent.h" 76 #include "net_backends.h" 77 78 #include <sys/linker_set.h> 79 80 /* 81 * Each network backend registers a set of function pointers that are 82 * used to implement the net backends API. 83 * This might need to be exposed if we implement backends in separate files. 84 */ 85 struct net_backend { 86 const char *prefix; /* prefix matching this backend */ 87 88 /* 89 * Routines used to initialize and cleanup the resources needed 90 * by a backend. The cleanup function is used internally, 91 * and should not be called by the frontend. 92 */ 93 int (*init)(struct net_backend *be, const char *devname, 94 net_be_rxeof_t cb, void *param); 95 void (*cleanup)(struct net_backend *be); 96 97 /* 98 * Called to serve a guest transmit request. The scatter-gather 99 * vector provided by the caller has 'iovcnt' elements and contains 100 * the packet to send. 101 */ 102 ssize_t (*send)(struct net_backend *be, const struct iovec *iov, 103 int iovcnt); 104 105 /* 106 * Get the length of the next packet that can be received from 107 * the backend. If no packets are currently available, this 108 * function returns 0. 109 */ 110 ssize_t (*peek_recvlen)(struct net_backend *be); 111 112 /* 113 * Called to receive a packet from the backend. When the function 114 * returns a positive value 'len', the scatter-gather vector 115 * provided by the caller contains a packet with such length. 116 * The function returns 0 if the backend doesn't have a new packet to 117 * receive. 118 */ 119 ssize_t (*recv)(struct net_backend *be, const struct iovec *iov, 120 int iovcnt); 121 122 /* 123 * Ask the backend to enable or disable receive operation in the 124 * backend. On return from a disable operation, it is guaranteed 125 * that the receive callback won't be called until receive is 126 * enabled again. Note however that it is up to the caller to make 127 * sure that netbe_recv() is not currently being executed by another 128 * thread. 129 */ 130 void (*recv_enable)(struct net_backend *be); 131 void (*recv_disable)(struct net_backend *be); 132 133 /* 134 * Ask the backend for the virtio-net features it is able to 135 * support. Possible features are TSO, UFO and checksum offloading 136 * in both rx and tx direction and for both IPv4 and IPv6. 137 */ 138 uint64_t (*get_cap)(struct net_backend *be); 139 140 /* 141 * Tell the backend to enable/disable the specified virtio-net 142 * features (capabilities). 143 */ 144 int (*set_cap)(struct net_backend *be, uint64_t features, 145 unsigned int vnet_hdr_len); 146 147 struct pci_vtnet_softc *sc; 148 int fd; 149 150 /* 151 * Length of the virtio-net header used by the backend and the 152 * frontend, respectively. A zero value means that the header 153 * is not used. 154 */ 155 unsigned int be_vnet_hdr_len; 156 unsigned int fe_vnet_hdr_len; 157 158 /* Size of backend-specific private data. */ 159 size_t priv_size; 160 161 /* Room for backend-specific data. */ 162 char opaque[0]; 163 }; 164 165 SET_DECLARE(net_backend_set, struct net_backend); 166 167 #define VNET_HDR_LEN sizeof(struct virtio_net_rxhdr) 168 169 #define WPRINTF(params) PRINTLN params 170 171 /* 172 * The tap backend 173 */ 174 175 struct tap_priv { 176 struct mevent *mevp; 177 /* 178 * A bounce buffer that allows us to implement the peek_recvlen 179 * callback. In the future we may get the same information from 180 * the kevent data. 181 */ 182 char bbuf[1 << 16]; 183 ssize_t bbuflen; 184 }; 185 186 static void 187 tap_cleanup(struct net_backend *be) 188 { 189 struct tap_priv *priv = (struct tap_priv *)be->opaque; 190 191 if (priv->mevp) { 192 mevent_delete(priv->mevp); 193 } 194 if (be->fd != -1) { 195 close(be->fd); 196 be->fd = -1; 197 } 198 } 199 200 static int 201 tap_init(struct net_backend *be, const char *devname, 202 net_be_rxeof_t cb, void *param) 203 { 204 struct tap_priv *priv = (struct tap_priv *)be->opaque; 205 char tbuf[80]; 206 int opt = 1; 207 #ifndef WITHOUT_CAPSICUM 208 cap_rights_t rights; 209 #endif 210 211 if (cb == NULL) { 212 WPRINTF(("TAP backend requires non-NULL callback")); 213 return (-1); 214 } 215 216 strcpy(tbuf, "/dev/"); 217 strlcat(tbuf, devname, sizeof(tbuf)); 218 219 be->fd = open(tbuf, O_RDWR); 220 if (be->fd == -1) { 221 WPRINTF(("open of tap device %s failed", tbuf)); 222 goto error; 223 } 224 225 /* 226 * Set non-blocking and register for read 227 * notifications with the event loop 228 */ 229 if (ioctl(be->fd, FIONBIO, &opt) < 0) { 230 WPRINTF(("tap device O_NONBLOCK failed")); 231 goto error; 232 } 233 234 #ifndef WITHOUT_CAPSICUM 235 cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); 236 if (caph_rights_limit(be->fd, &rights) == -1) 237 errx(EX_OSERR, "Unable to apply rights for sandbox"); 238 #endif 239 240 memset(priv->bbuf, 0, sizeof(priv->bbuf)); 241 priv->bbuflen = 0; 242 243 priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param); 244 if (priv->mevp == NULL) { 245 WPRINTF(("Could not register event")); 246 goto error; 247 } 248 249 return (0); 250 251 error: 252 tap_cleanup(be); 253 return (-1); 254 } 255 256 /* 257 * Called to send a buffer chain out to the tap device 258 */ 259 static ssize_t 260 tap_send(struct net_backend *be, const struct iovec *iov, int iovcnt) 261 { 262 return (writev(be->fd, iov, iovcnt)); 263 } 264 265 static ssize_t 266 tap_peek_recvlen(struct net_backend *be) 267 { 268 struct tap_priv *priv = (struct tap_priv *)be->opaque; 269 ssize_t ret; 270 271 if (priv->bbuflen > 0) { 272 /* 273 * We already have a packet in the bounce buffer. 274 * Just return its length. 275 */ 276 return priv->bbuflen; 277 } 278 279 /* 280 * Read the next packet (if any) into the bounce buffer, so 281 * that we get to know its length and we can return that 282 * to the caller. 283 */ 284 ret = read(be->fd, priv->bbuf, sizeof(priv->bbuf)); 285 if (ret < 0 && errno == EWOULDBLOCK) { 286 return (0); 287 } 288 289 if (ret > 0) 290 priv->bbuflen = ret; 291 292 return (ret); 293 } 294 295 static ssize_t 296 tap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt) 297 { 298 struct tap_priv *priv = (struct tap_priv *)be->opaque; 299 ssize_t ret; 300 301 if (priv->bbuflen > 0) { 302 /* 303 * A packet is available in the bounce buffer, so 304 * we read it from there. 305 */ 306 ret = buf_to_iov(priv->bbuf, priv->bbuflen, 307 iov, iovcnt, 0); 308 309 /* Mark the bounce buffer as empty. */ 310 priv->bbuflen = 0; 311 312 return (ret); 313 } 314 315 ret = readv(be->fd, iov, iovcnt); 316 if (ret < 0 && errno == EWOULDBLOCK) { 317 return (0); 318 } 319 320 return (ret); 321 } 322 323 static void 324 tap_recv_enable(struct net_backend *be) 325 { 326 struct tap_priv *priv = (struct tap_priv *)be->opaque; 327 328 mevent_enable(priv->mevp); 329 } 330 331 static void 332 tap_recv_disable(struct net_backend *be) 333 { 334 struct tap_priv *priv = (struct tap_priv *)be->opaque; 335 336 mevent_disable(priv->mevp); 337 } 338 339 static uint64_t 340 tap_get_cap(struct net_backend *be) 341 { 342 343 return (0); /* no capabilities for now */ 344 } 345 346 static int 347 tap_set_cap(struct net_backend *be, uint64_t features, 348 unsigned vnet_hdr_len) 349 { 350 351 return ((features || vnet_hdr_len) ? -1 : 0); 352 } 353 354 static struct net_backend tap_backend = { 355 .prefix = "tap", 356 .priv_size = sizeof(struct tap_priv), 357 .init = tap_init, 358 .cleanup = tap_cleanup, 359 .send = tap_send, 360 .peek_recvlen = tap_peek_recvlen, 361 .recv = tap_recv, 362 .recv_enable = tap_recv_enable, 363 .recv_disable = tap_recv_disable, 364 .get_cap = tap_get_cap, 365 .set_cap = tap_set_cap, 366 }; 367 368 /* A clone of the tap backend, with a different prefix. */ 369 static struct net_backend vmnet_backend = { 370 .prefix = "vmnet", 371 .priv_size = sizeof(struct tap_priv), 372 .init = tap_init, 373 .cleanup = tap_cleanup, 374 .send = tap_send, 375 .peek_recvlen = tap_peek_recvlen, 376 .recv = tap_recv, 377 .recv_enable = tap_recv_enable, 378 .recv_disable = tap_recv_disable, 379 .get_cap = tap_get_cap, 380 .set_cap = tap_set_cap, 381 }; 382 383 DATA_SET(net_backend_set, tap_backend); 384 DATA_SET(net_backend_set, vmnet_backend); 385 386 /* 387 * The netmap backend 388 */ 389 390 /* The virtio-net features supported by netmap. */ 391 #define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \ 392 VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \ 393 VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \ 394 VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO) 395 396 struct netmap_priv { 397 char ifname[IFNAMSIZ]; 398 struct nm_desc *nmd; 399 uint16_t memid; 400 struct netmap_ring *rx; 401 struct netmap_ring *tx; 402 struct mevent *mevp; 403 net_be_rxeof_t cb; 404 void *cb_param; 405 }; 406 407 static void 408 nmreq_init(struct nmreq *req, char *ifname) 409 { 410 411 memset(req, 0, sizeof(*req)); 412 strlcpy(req->nr_name, ifname, sizeof(req->nr_name)); 413 req->nr_version = NETMAP_API; 414 } 415 416 static int 417 netmap_set_vnet_hdr_len(struct net_backend *be, int vnet_hdr_len) 418 { 419 int err; 420 struct nmreq req; 421 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 422 423 nmreq_init(&req, priv->ifname); 424 req.nr_cmd = NETMAP_BDG_VNET_HDR; 425 req.nr_arg1 = vnet_hdr_len; 426 err = ioctl(be->fd, NIOCREGIF, &req); 427 if (err) { 428 WPRINTF(("Unable to set vnet header length %d", 429 vnet_hdr_len)); 430 return (err); 431 } 432 433 be->be_vnet_hdr_len = vnet_hdr_len; 434 435 return (0); 436 } 437 438 static int 439 netmap_has_vnet_hdr_len(struct net_backend *be, unsigned vnet_hdr_len) 440 { 441 int prev_hdr_len = be->be_vnet_hdr_len; 442 int ret; 443 444 if (vnet_hdr_len == prev_hdr_len) { 445 return (1); 446 } 447 448 ret = netmap_set_vnet_hdr_len(be, vnet_hdr_len); 449 if (ret) { 450 return (0); 451 } 452 453 netmap_set_vnet_hdr_len(be, prev_hdr_len); 454 455 return (1); 456 } 457 458 static uint64_t 459 netmap_get_cap(struct net_backend *be) 460 { 461 462 return (netmap_has_vnet_hdr_len(be, VNET_HDR_LEN) ? 463 NETMAP_FEATURES : 0); 464 } 465 466 static int 467 netmap_set_cap(struct net_backend *be, uint64_t features, 468 unsigned vnet_hdr_len) 469 { 470 471 return (netmap_set_vnet_hdr_len(be, vnet_hdr_len)); 472 } 473 474 static int 475 netmap_init(struct net_backend *be, const char *devname, 476 net_be_rxeof_t cb, void *param) 477 { 478 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 479 480 strlcpy(priv->ifname, devname, sizeof(priv->ifname)); 481 priv->ifname[sizeof(priv->ifname) - 1] = '\0'; 482 483 priv->nmd = nm_open(priv->ifname, NULL, NETMAP_NO_TX_POLL, NULL); 484 if (priv->nmd == NULL) { 485 WPRINTF(("Unable to nm_open(): interface '%s', errno (%s)", 486 devname, strerror(errno))); 487 free(priv); 488 return (-1); 489 } 490 491 priv->memid = priv->nmd->req.nr_arg2; 492 priv->tx = NETMAP_TXRING(priv->nmd->nifp, 0); 493 priv->rx = NETMAP_RXRING(priv->nmd->nifp, 0); 494 priv->cb = cb; 495 priv->cb_param = param; 496 be->fd = priv->nmd->fd; 497 498 priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param); 499 if (priv->mevp == NULL) { 500 WPRINTF(("Could not register event")); 501 return (-1); 502 } 503 504 return (0); 505 } 506 507 static void 508 netmap_cleanup(struct net_backend *be) 509 { 510 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 511 512 if (priv->mevp) { 513 mevent_delete(priv->mevp); 514 } 515 if (priv->nmd) { 516 nm_close(priv->nmd); 517 } 518 be->fd = -1; 519 } 520 521 static ssize_t 522 netmap_send(struct net_backend *be, const struct iovec *iov, 523 int iovcnt) 524 { 525 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 526 struct netmap_ring *ring; 527 ssize_t totlen = 0; 528 int nm_buf_size; 529 int nm_buf_len; 530 uint32_t head; 531 void *nm_buf; 532 int j; 533 534 ring = priv->tx; 535 head = ring->head; 536 if (head == ring->tail) { 537 WPRINTF(("No space, drop %zu bytes", count_iov(iov, iovcnt))); 538 goto txsync; 539 } 540 nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx); 541 nm_buf_size = ring->nr_buf_size; 542 nm_buf_len = 0; 543 544 for (j = 0; j < iovcnt; j++) { 545 int iov_frag_size = iov[j].iov_len; 546 void *iov_frag_buf = iov[j].iov_base; 547 548 totlen += iov_frag_size; 549 550 /* 551 * Split each iovec fragment over more netmap slots, if 552 * necessary. 553 */ 554 for (;;) { 555 int copylen; 556 557 copylen = iov_frag_size < nm_buf_size ? iov_frag_size : nm_buf_size; 558 memcpy(nm_buf, iov_frag_buf, copylen); 559 560 iov_frag_buf += copylen; 561 iov_frag_size -= copylen; 562 nm_buf += copylen; 563 nm_buf_size -= copylen; 564 nm_buf_len += copylen; 565 566 if (iov_frag_size == 0) { 567 break; 568 } 569 570 ring->slot[head].len = nm_buf_len; 571 ring->slot[head].flags = NS_MOREFRAG; 572 head = nm_ring_next(ring, head); 573 if (head == ring->tail) { 574 /* 575 * We ran out of netmap slots while 576 * splitting the iovec fragments. 577 */ 578 WPRINTF(("No space, drop %zu bytes", 579 count_iov(iov, iovcnt))); 580 goto txsync; 581 } 582 nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx); 583 nm_buf_size = ring->nr_buf_size; 584 nm_buf_len = 0; 585 } 586 } 587 588 /* Complete the last slot, which must not have NS_MOREFRAG set. */ 589 ring->slot[head].len = nm_buf_len; 590 ring->slot[head].flags = 0; 591 head = nm_ring_next(ring, head); 592 593 /* Now update ring->head and ring->cur. */ 594 ring->head = ring->cur = head; 595 txsync: 596 ioctl(be->fd, NIOCTXSYNC, NULL); 597 598 return (totlen); 599 } 600 601 static ssize_t 602 netmap_peek_recvlen(struct net_backend *be) 603 { 604 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 605 struct netmap_ring *ring = priv->rx; 606 uint32_t head = ring->head; 607 ssize_t totlen = 0; 608 609 while (head != ring->tail) { 610 struct netmap_slot *slot = ring->slot + head; 611 612 totlen += slot->len; 613 if ((slot->flags & NS_MOREFRAG) == 0) 614 break; 615 head = nm_ring_next(ring, head); 616 } 617 618 return (totlen); 619 } 620 621 static ssize_t 622 netmap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt) 623 { 624 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 625 struct netmap_slot *slot = NULL; 626 struct netmap_ring *ring; 627 void *iov_frag_buf; 628 int iov_frag_size; 629 ssize_t totlen = 0; 630 uint32_t head; 631 632 assert(iovcnt); 633 634 ring = priv->rx; 635 head = ring->head; 636 iov_frag_buf = iov->iov_base; 637 iov_frag_size = iov->iov_len; 638 639 do { 640 int nm_buf_len; 641 void *nm_buf; 642 643 if (head == ring->tail) { 644 return (0); 645 } 646 647 slot = ring->slot + head; 648 nm_buf = NETMAP_BUF(ring, slot->buf_idx); 649 nm_buf_len = slot->len; 650 651 for (;;) { 652 int copylen = nm_buf_len < iov_frag_size ? 653 nm_buf_len : iov_frag_size; 654 655 memcpy(iov_frag_buf, nm_buf, copylen); 656 nm_buf += copylen; 657 nm_buf_len -= copylen; 658 iov_frag_buf += copylen; 659 iov_frag_size -= copylen; 660 totlen += copylen; 661 662 if (nm_buf_len == 0) { 663 break; 664 } 665 666 iov++; 667 iovcnt--; 668 if (iovcnt == 0) { 669 /* No space to receive. */ 670 WPRINTF(("Short iov, drop %zd bytes", 671 totlen)); 672 return (-ENOSPC); 673 } 674 iov_frag_buf = iov->iov_base; 675 iov_frag_size = iov->iov_len; 676 } 677 678 head = nm_ring_next(ring, head); 679 680 } while (slot->flags & NS_MOREFRAG); 681 682 /* Release slots to netmap. */ 683 ring->head = ring->cur = head; 684 685 return (totlen); 686 } 687 688 static void 689 netmap_recv_enable(struct net_backend *be) 690 { 691 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 692 693 mevent_enable(priv->mevp); 694 } 695 696 static void 697 netmap_recv_disable(struct net_backend *be) 698 { 699 struct netmap_priv *priv = (struct netmap_priv *)be->opaque; 700 701 mevent_disable(priv->mevp); 702 } 703 704 static struct net_backend netmap_backend = { 705 .prefix = "netmap", 706 .priv_size = sizeof(struct netmap_priv), 707 .init = netmap_init, 708 .cleanup = netmap_cleanup, 709 .send = netmap_send, 710 .peek_recvlen = netmap_peek_recvlen, 711 .recv = netmap_recv, 712 .recv_enable = netmap_recv_enable, 713 .recv_disable = netmap_recv_disable, 714 .get_cap = netmap_get_cap, 715 .set_cap = netmap_set_cap, 716 }; 717 718 /* A clone of the netmap backend, with a different prefix. */ 719 static struct net_backend vale_backend = { 720 .prefix = "vale", 721 .priv_size = sizeof(struct netmap_priv), 722 .init = netmap_init, 723 .cleanup = netmap_cleanup, 724 .send = netmap_send, 725 .peek_recvlen = netmap_peek_recvlen, 726 .recv = netmap_recv, 727 .recv_enable = netmap_recv_enable, 728 .recv_disable = netmap_recv_disable, 729 .get_cap = netmap_get_cap, 730 .set_cap = netmap_set_cap, 731 }; 732 733 DATA_SET(net_backend_set, netmap_backend); 734 DATA_SET(net_backend_set, vale_backend); 735 736 /* 737 * Initialize a backend and attach to the frontend. 738 * This is called during frontend initialization. 739 * @pbe is a pointer to the backend to be initialized 740 * @devname is the backend-name as supplied on the command line, 741 * e.g. -s 2:0,frontend-name,backend-name[,other-args] 742 * @cb is the receive callback supplied by the frontend, 743 * and it is invoked in the event loop when a receive 744 * event is generated in the hypervisor, 745 * @param is a pointer to the frontend, and normally used as 746 * the argument for the callback. 747 */ 748 int 749 netbe_init(struct net_backend **ret, const char *devname, net_be_rxeof_t cb, 750 void *param) 751 { 752 struct net_backend **pbe, *nbe, *tbe = NULL; 753 int err; 754 755 /* 756 * Find the network backend that matches the user-provided 757 * device name. net_backend_set is built using a linker set. 758 */ 759 SET_FOREACH(pbe, net_backend_set) { 760 if (strncmp(devname, (*pbe)->prefix, 761 strlen((*pbe)->prefix)) == 0) { 762 tbe = *pbe; 763 assert(tbe->init != NULL); 764 assert(tbe->cleanup != NULL); 765 assert(tbe->send != NULL); 766 assert(tbe->recv != NULL); 767 assert(tbe->get_cap != NULL); 768 assert(tbe->set_cap != NULL); 769 break; 770 } 771 } 772 773 *ret = NULL; 774 if (tbe == NULL) 775 return (EINVAL); 776 nbe = calloc(1, sizeof(*nbe) + tbe->priv_size); 777 *nbe = *tbe; /* copy the template */ 778 nbe->fd = -1; 779 nbe->sc = param; 780 nbe->be_vnet_hdr_len = 0; 781 nbe->fe_vnet_hdr_len = 0; 782 783 /* Initialize the backend. */ 784 err = nbe->init(nbe, devname, cb, param); 785 if (err) { 786 free(nbe); 787 return (err); 788 } 789 790 *ret = nbe; 791 792 return (0); 793 } 794 795 void 796 netbe_cleanup(struct net_backend *be) 797 { 798 799 if (be != NULL) { 800 be->cleanup(be); 801 free(be); 802 } 803 } 804 805 uint64_t 806 netbe_get_cap(struct net_backend *be) 807 { 808 809 assert(be != NULL); 810 return (be->get_cap(be)); 811 } 812 813 int 814 netbe_set_cap(struct net_backend *be, uint64_t features, 815 unsigned vnet_hdr_len) 816 { 817 int ret; 818 819 assert(be != NULL); 820 821 /* There are only three valid lengths, i.e., 0, 10 and 12. */ 822 if (vnet_hdr_len && vnet_hdr_len != VNET_HDR_LEN 823 && vnet_hdr_len != (VNET_HDR_LEN - sizeof(uint16_t))) 824 return (-1); 825 826 be->fe_vnet_hdr_len = vnet_hdr_len; 827 828 ret = be->set_cap(be, features, vnet_hdr_len); 829 assert(be->be_vnet_hdr_len == 0 || 830 be->be_vnet_hdr_len == be->fe_vnet_hdr_len); 831 832 return (ret); 833 } 834 835 ssize_t 836 netbe_send(struct net_backend *be, const struct iovec *iov, int iovcnt) 837 { 838 839 return (be->send(be, iov, iovcnt)); 840 } 841 842 ssize_t 843 netbe_peek_recvlen(struct net_backend *be) 844 { 845 846 return (be->peek_recvlen(be)); 847 } 848 849 /* 850 * Try to read a packet from the backend, without blocking. 851 * If no packets are available, return 0. In case of success, return 852 * the length of the packet just read. Return -1 in case of errors. 853 */ 854 ssize_t 855 netbe_recv(struct net_backend *be, const struct iovec *iov, int iovcnt) 856 { 857 858 return (be->recv(be, iov, iovcnt)); 859 } 860 861 /* 862 * Read a packet from the backend and discard it. 863 * Returns the size of the discarded packet or zero if no packet was available. 864 * A negative error code is returned in case of read error. 865 */ 866 ssize_t 867 netbe_rx_discard(struct net_backend *be) 868 { 869 /* 870 * MP note: the dummybuf is only used to discard frames, 871 * so there is no need for it to be per-vtnet or locked. 872 * We only make it large enough for TSO-sized segment. 873 */ 874 static uint8_t dummybuf[65536 + 64]; 875 struct iovec iov; 876 877 iov.iov_base = dummybuf; 878 iov.iov_len = sizeof(dummybuf); 879 880 return netbe_recv(be, &iov, 1); 881 } 882 883 void 884 netbe_rx_disable(struct net_backend *be) 885 { 886 887 return be->recv_disable(be); 888 } 889 890 void 891 netbe_rx_enable(struct net_backend *be) 892 { 893 894 return be->recv_enable(be); 895 } 896 897 size_t 898 netbe_get_vnet_hdr_len(struct net_backend *be) 899 { 900 901 return (be->be_vnet_hdr_len); 902 } 903