1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2008 The FreeBSD Foundation 5 * Copyright (c) 2009-2021 Bjoern A. Zeeb <bz@FreeBSD.org> 6 * 7 * This software was developed by CK Software GmbH under sponsorship 8 * from the FreeBSD Foundation. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * A pair of virtual back-to-back connected ethernet like interfaces 34 * (``two interfaces with a virtual cross-over cable''). 35 * 36 * This is mostly intended to be used to provide connectivity between 37 * different virtual network stack instances. 38 */ 39 40 #include <sys/cdefs.h> 41 __FBSDID("$FreeBSD$"); 42 43 #include "opt_rss.h" 44 #include "opt_inet.h" 45 #include "opt_inet6.h" 46 47 #include <sys/param.h> 48 #include <sys/bus.h> 49 #include <sys/hash.h> 50 #include <sys/interrupt.h> 51 #include <sys/jail.h> 52 #include <sys/kernel.h> 53 #include <sys/libkern.h> 54 #include <sys/malloc.h> 55 #include <sys/mbuf.h> 56 #include <sys/module.h> 57 #include <sys/proc.h> 58 #include <sys/queue.h> 59 #include <sys/sched.h> 60 #include <sys/smp.h> 61 #include <sys/socket.h> 62 #include <sys/sockio.h> 63 #include <sys/taskqueue.h> 64 65 #include <net/bpf.h> 66 #include <net/ethernet.h> 67 #include <net/if.h> 68 #include <net/if_var.h> 69 #include <net/if_clone.h> 70 #include <net/if_media.h> 71 #include <net/if_var.h> 72 #include <net/if_private.h> 73 #include <net/if_types.h> 74 #include <net/netisr.h> 75 #ifdef RSS 76 #include <net/rss_config.h> 77 #ifdef INET 78 #include <netinet/in_rss.h> 79 #endif 80 #ifdef INET6 81 #include <netinet6/in6_rss.h> 82 #endif 83 #endif 84 #include <net/vnet.h> 85 86 static const char epairname[] = "epair"; 87 #define RXRSIZE 4096 /* Probably overkill by 4-8x. */ 88 89 static MALLOC_DEFINE(M_EPAIR, epairname, 90 "Pair of virtual cross-over connected Ethernet-like interfaces"); 91 92 VNET_DEFINE_STATIC(struct if_clone *, epair_cloner); 93 #define V_epair_cloner VNET(epair_cloner) 94 95 static unsigned int next_index = 0; 96 #define EPAIR_LOCK_INIT() mtx_init(&epair_n_index_mtx, "epairidx", \ 97 NULL, MTX_DEF) 98 #define EPAIR_LOCK_DESTROY() mtx_destroy(&epair_n_index_mtx) 99 #define EPAIR_LOCK() mtx_lock(&epair_n_index_mtx) 100 #define EPAIR_UNLOCK() mtx_unlock(&epair_n_index_mtx) 101 102 struct epair_softc; 103 struct epair_queue { 104 struct mtx mtx; 105 struct mbufq q; 106 int id; 107 enum { 108 EPAIR_QUEUE_IDLE, 109 EPAIR_QUEUE_WAKING, 110 EPAIR_QUEUE_RUNNING, 111 } state; 112 struct task tx_task; 113 struct epair_softc *sc; 114 }; 115 116 static struct mtx epair_n_index_mtx; 117 struct epair_softc { 118 struct ifnet *ifp; /* This ifp. */ 119 struct ifnet *oifp; /* other ifp of pair. */ 120 int num_queues; 121 struct epair_queue *queues; 122 struct ifmedia media; /* Media config (fake). */ 123 STAILQ_ENTRY(epair_softc) entry; 124 }; 125 126 struct epair_tasks_t { 127 int tasks; 128 struct taskqueue *tq[MAXCPU]; 129 }; 130 131 static struct epair_tasks_t epair_tasks; 132 133 static void 134 epair_clear_mbuf(struct mbuf *m) 135 { 136 M_ASSERTPKTHDR(m); 137 138 /* Remove any CSUM_SND_TAG as ether_input will barf. */ 139 if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) { 140 m_snd_tag_rele(m->m_pkthdr.snd_tag); 141 m->m_pkthdr.snd_tag = NULL; 142 m->m_pkthdr.csum_flags &= ~CSUM_SND_TAG; 143 } 144 145 /* Clear vlan information. */ 146 m->m_flags &= ~M_VLANTAG; 147 m->m_pkthdr.ether_vtag = 0; 148 149 m_tag_delete_nonpersistent(m); 150 } 151 152 static void 153 epair_tx_start_deferred(void *arg, int pending) 154 { 155 struct epair_queue *q = (struct epair_queue *)arg; 156 if_t ifp; 157 struct mbuf *m, *n; 158 bool resched; 159 160 ifp = q->sc->ifp; 161 162 if_ref(ifp); 163 CURVNET_SET(ifp->if_vnet); 164 165 mtx_lock(&q->mtx); 166 m = mbufq_flush(&q->q); 167 q->state = EPAIR_QUEUE_RUNNING; 168 mtx_unlock(&q->mtx); 169 170 while (m != NULL) { 171 n = STAILQ_NEXT(m, m_stailqpkt); 172 m->m_nextpkt = NULL; 173 if_input(ifp, m); 174 m = n; 175 } 176 177 /* 178 * Avoid flushing the queue more than once per task. We can otherwise 179 * end up starving ourselves in a multi-epair routing configuration. 180 */ 181 mtx_lock(&q->mtx); 182 if (mbufq_len(&q->q) > 0) { 183 resched = true; 184 q->state = EPAIR_QUEUE_WAKING; 185 } else { 186 resched = false; 187 q->state = EPAIR_QUEUE_IDLE; 188 } 189 mtx_unlock(&q->mtx); 190 191 if (resched) 192 taskqueue_enqueue(epair_tasks.tq[q->id], &q->tx_task); 193 194 CURVNET_RESTORE(); 195 if_rele(ifp); 196 } 197 198 static struct epair_queue * 199 epair_select_queue(struct epair_softc *sc, struct mbuf *m) 200 { 201 uint32_t bucket; 202 #ifdef RSS 203 struct ether_header *eh; 204 int ret; 205 206 ret = rss_m2bucket(m, &bucket); 207 if (ret) { 208 /* Actually hash the packet. */ 209 eh = mtod(m, struct ether_header *); 210 211 switch (ntohs(eh->ether_type)) { 212 #ifdef INET 213 case ETHERTYPE_IP: 214 rss_soft_m2cpuid_v4(m, 0, &bucket); 215 break; 216 #endif 217 #ifdef INET6 218 case ETHERTYPE_IPV6: 219 rss_soft_m2cpuid_v6(m, 0, &bucket); 220 break; 221 #endif 222 default: 223 bucket = 0; 224 break; 225 } 226 } 227 bucket %= sc->num_queues; 228 #else 229 bucket = 0; 230 #endif 231 return (&sc->queues[bucket]); 232 } 233 234 static void 235 epair_prepare_mbuf(struct mbuf *m, struct ifnet *src_ifp) 236 { 237 M_ASSERTPKTHDR(m); 238 epair_clear_mbuf(m); 239 if_setrcvif(m, src_ifp); 240 M_SETFIB(m, src_ifp->if_fib); 241 242 MPASS(m->m_nextpkt == NULL); 243 MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); 244 } 245 246 static void 247 epair_menq(struct mbuf *m, struct epair_softc *osc) 248 { 249 struct epair_queue *q; 250 struct ifnet *ifp, *oifp; 251 int error, len; 252 bool mcast; 253 254 /* 255 * I know this looks weird. We pass the "other sc" as we need that one 256 * and can get both ifps from it as well. 257 */ 258 oifp = osc->ifp; 259 ifp = osc->oifp; 260 261 epair_prepare_mbuf(m, oifp); 262 263 /* Save values as once the mbuf is queued, it's not ours anymore. */ 264 len = m->m_pkthdr.len; 265 mcast = (m->m_flags & (M_BCAST | M_MCAST)) != 0; 266 267 q = epair_select_queue(osc, m); 268 269 mtx_lock(&q->mtx); 270 if (q->state == EPAIR_QUEUE_IDLE) { 271 q->state = EPAIR_QUEUE_WAKING; 272 taskqueue_enqueue(epair_tasks.tq[q->id], &q->tx_task); 273 } 274 error = mbufq_enqueue(&q->q, m); 275 mtx_unlock(&q->mtx); 276 277 if (error != 0) { 278 m_freem(m); 279 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 280 } else { 281 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 282 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 283 if (mcast) 284 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 285 if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1); 286 } 287 } 288 289 static void 290 epair_start(struct ifnet *ifp) 291 { 292 struct mbuf *m; 293 struct epair_softc *sc; 294 struct ifnet *oifp; 295 296 /* 297 * We get packets here from ether_output via if_handoff() 298 * and need to put them into the input queue of the oifp 299 * and will put the packet into the receive-queue (rxq) of the 300 * other interface (oifp) of our pair. 301 */ 302 sc = ifp->if_softc; 303 oifp = sc->oifp; 304 sc = oifp->if_softc; 305 for (;;) { 306 IFQ_DEQUEUE(&ifp->if_snd, m); 307 if (m == NULL) 308 break; 309 M_ASSERTPKTHDR(m); 310 BPF_MTAP(ifp, m); 311 312 /* In case either interface is not usable drop the packet. */ 313 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 314 (ifp->if_flags & IFF_UP) == 0 || 315 (oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 316 (oifp->if_flags & IFF_UP) == 0) { 317 m_freem(m); 318 continue; 319 } 320 321 epair_menq(m, sc); 322 } 323 } 324 325 static int 326 epair_transmit(struct ifnet *ifp, struct mbuf *m) 327 { 328 struct epair_softc *sc; 329 struct ifnet *oifp; 330 #ifdef ALTQ 331 int len; 332 bool mcast; 333 #endif 334 335 if (m == NULL) 336 return (0); 337 M_ASSERTPKTHDR(m); 338 339 /* 340 * We could just transmit this, but it makes testing easier if we're a 341 * little bit more like real hardware. 342 * Allow just that little bit extra for ethernet (and vlan) headers. 343 */ 344 if (m->m_pkthdr.len > (ifp->if_mtu + sizeof(struct ether_vlan_header))) { 345 m_freem(m); 346 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 347 return (E2BIG); 348 } 349 350 /* 351 * We are not going to use the interface en/dequeue mechanism 352 * on the TX side. We are called from ether_output_frame() 353 * and will put the packet into the receive-queue (rxq) of the 354 * other interface (oifp) of our pair. 355 */ 356 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 357 m_freem(m); 358 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 359 return (ENXIO); 360 } 361 if ((ifp->if_flags & IFF_UP) == 0) { 362 m_freem(m); 363 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 364 return (ENETDOWN); 365 } 366 367 BPF_MTAP(ifp, m); 368 369 /* 370 * In case the outgoing interface is not usable, 371 * drop the packet. 372 */ 373 sc = ifp->if_softc; 374 oifp = sc->oifp; 375 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 376 (oifp->if_flags & IFF_UP) == 0) { 377 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 378 m_freem(m); 379 return (0); 380 } 381 382 #ifdef ALTQ 383 len = m->m_pkthdr.len; 384 mcast = (m->m_flags & (M_BCAST | M_MCAST)) != 0; 385 int error = 0; 386 387 /* Support ALTQ via the classic if_start() path. */ 388 IF_LOCK(&ifp->if_snd); 389 if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 390 ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error); 391 if (error) 392 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 393 IF_UNLOCK(&ifp->if_snd); 394 if (!error) { 395 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 396 if (mcast) 397 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 398 epair_start(ifp); 399 } 400 return (error); 401 } 402 IF_UNLOCK(&ifp->if_snd); 403 #endif 404 405 epair_menq(m, oifp->if_softc); 406 return (0); 407 } 408 409 static void 410 epair_qflush(struct ifnet *ifp __unused) 411 { 412 } 413 414 static int 415 epair_media_change(struct ifnet *ifp __unused) 416 { 417 418 /* Do nothing. */ 419 return (0); 420 } 421 422 static void 423 epair_media_status(struct ifnet *ifp __unused, struct ifmediareq *imr) 424 { 425 426 imr->ifm_status = IFM_AVALID | IFM_ACTIVE; 427 imr->ifm_active = IFM_ETHER | IFM_10G_T | IFM_FDX; 428 } 429 430 static int 431 epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 432 { 433 struct epair_softc *sc; 434 struct ifreq *ifr; 435 int error; 436 437 ifr = (struct ifreq *)data; 438 switch (cmd) { 439 case SIOCSIFFLAGS: 440 case SIOCADDMULTI: 441 case SIOCDELMULTI: 442 error = 0; 443 break; 444 445 case SIOCSIFMEDIA: 446 case SIOCGIFMEDIA: 447 sc = ifp->if_softc; 448 error = ifmedia_ioctl(ifp, ifr, &sc->media, cmd); 449 break; 450 451 case SIOCSIFMTU: 452 /* We basically allow all kinds of MTUs. */ 453 ifp->if_mtu = ifr->ifr_mtu; 454 error = 0; 455 break; 456 457 default: 458 /* Let the common ethernet handler process this. */ 459 error = ether_ioctl(ifp, cmd, data); 460 break; 461 } 462 463 return (error); 464 } 465 466 static void 467 epair_init(void *dummy __unused) 468 { 469 } 470 471 /* 472 * Interface cloning functions. 473 * We use our private ones so that we can create/destroy our secondary 474 * device along with the primary one. 475 */ 476 static int 477 epair_clone_match(struct if_clone *ifc, const char *name) 478 { 479 const char *cp; 480 481 /* 482 * Our base name is epair. 483 * Our interfaces will be named epair<n>[ab]. 484 * So accept anything of the following list: 485 * - epair 486 * - epair<n> 487 * but not the epair<n>[ab] versions. 488 */ 489 if (strncmp(epairname, name, sizeof(epairname)-1) != 0) 490 return (0); 491 492 for (cp = name + sizeof(epairname) - 1; *cp != '\0'; cp++) { 493 if (*cp < '0' || *cp > '9') 494 return (0); 495 } 496 497 return (1); 498 } 499 500 static void 501 epair_clone_add(struct if_clone *ifc, struct epair_softc *scb) 502 { 503 struct ifnet *ifp; 504 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ 505 506 ifp = scb->ifp; 507 /* Copy epairNa etheraddr and change the last byte. */ 508 memcpy(eaddr, scb->oifp->if_hw_addr, ETHER_ADDR_LEN); 509 eaddr[5] = 0x0b; 510 ether_ifattach(ifp, eaddr); 511 512 if_clone_addif(ifc, ifp); 513 } 514 515 static struct epair_softc * 516 epair_alloc_sc(struct if_clone *ifc) 517 { 518 struct epair_softc *sc; 519 520 struct ifnet *ifp = if_alloc(IFT_ETHER); 521 if (ifp == NULL) 522 return (NULL); 523 524 sc = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 525 sc->ifp = ifp; 526 sc->num_queues = epair_tasks.tasks; 527 sc->queues = mallocarray(sc->num_queues, sizeof(struct epair_queue), 528 M_EPAIR, M_WAITOK); 529 for (int i = 0; i < sc->num_queues; i++) { 530 struct epair_queue *q = &sc->queues[i]; 531 q->id = i; 532 q->state = EPAIR_QUEUE_IDLE; 533 mtx_init(&q->mtx, "epairq", NULL, MTX_DEF | MTX_NEW); 534 mbufq_init(&q->q, RXRSIZE); 535 q->sc = sc; 536 NET_TASK_INIT(&q->tx_task, 0, epair_tx_start_deferred, q); 537 } 538 539 /* Initialise pseudo media types. */ 540 ifmedia_init(&sc->media, 0, epair_media_change, epair_media_status); 541 ifmedia_add(&sc->media, IFM_ETHER | IFM_10G_T, 0, NULL); 542 ifmedia_set(&sc->media, IFM_ETHER | IFM_10G_T); 543 544 return (sc); 545 } 546 547 static void 548 epair_setup_ifp(struct epair_softc *sc, char *name, int unit) 549 { 550 struct ifnet *ifp = sc->ifp; 551 552 ifp->if_softc = sc; 553 strlcpy(ifp->if_xname, name, IFNAMSIZ); 554 ifp->if_dname = epairname; 555 ifp->if_dunit = unit; 556 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 557 ifp->if_capabilities = IFCAP_VLAN_MTU; 558 ifp->if_capenable = IFCAP_VLAN_MTU; 559 ifp->if_transmit = epair_transmit; 560 ifp->if_qflush = epair_qflush; 561 ifp->if_start = epair_start; 562 ifp->if_ioctl = epair_ioctl; 563 ifp->if_init = epair_init; 564 if_setsendqlen(ifp, ifqmaxlen); 565 if_setsendqready(ifp); 566 567 ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */ 568 } 569 570 static void 571 epair_generate_mac(struct epair_softc *sc, uint8_t *eaddr) 572 { 573 uint32_t key[3]; 574 uint32_t hash; 575 uint64_t hostid; 576 577 EPAIR_LOCK(); 578 #ifdef SMP 579 /* Get an approximate distribution. */ 580 hash = next_index % mp_ncpus; 581 #else 582 hash = 0; 583 #endif 584 EPAIR_UNLOCK(); 585 586 /* 587 * Calculate the etheraddr hashing the hostid and the 588 * interface index. The result would be hopefully unique. 589 * Note that the "a" component of an epair instance may get moved 590 * to a different VNET after creation. In that case its index 591 * will be freed and the index can get reused by new epair instance. 592 * Make sure we do not create same etheraddr again. 593 */ 594 getcredhostid(curthread->td_ucred, (unsigned long *)&hostid); 595 if (hostid == 0) 596 arc4rand(&hostid, sizeof(hostid), 0); 597 598 struct ifnet *ifp = sc->ifp; 599 EPAIR_LOCK(); 600 if (ifp->if_index > next_index) 601 next_index = ifp->if_index; 602 else 603 next_index++; 604 605 key[0] = (uint32_t)next_index; 606 EPAIR_UNLOCK(); 607 key[1] = (uint32_t)(hostid & 0xffffffff); 608 key[2] = (uint32_t)((hostid >> 32) & 0xfffffffff); 609 hash = jenkins_hash32(key, 3, 0); 610 611 eaddr[0] = 0x02; 612 memcpy(&eaddr[1], &hash, 4); 613 eaddr[5] = 0x0a; 614 } 615 616 static void 617 epair_free_sc(struct epair_softc *sc) 618 { 619 if (sc == NULL) 620 return; 621 622 if_free(sc->ifp); 623 ifmedia_removeall(&sc->media); 624 for (int i = 0; i < sc->num_queues; i++) { 625 struct epair_queue *q = &sc->queues[i]; 626 mtx_destroy(&q->mtx); 627 } 628 free(sc->queues, M_EPAIR); 629 free(sc, M_EPAIR); 630 } 631 632 static void 633 epair_set_state(struct ifnet *ifp, bool running) 634 { 635 if (running) { 636 ifp->if_drv_flags |= IFF_DRV_RUNNING; 637 if_link_state_change(ifp, LINK_STATE_UP); 638 } else { 639 if_link_state_change(ifp, LINK_STATE_DOWN); 640 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 641 } 642 } 643 644 static int 645 epair_handle_unit(struct if_clone *ifc, char *name, size_t len, int *punit) 646 { 647 int error = 0, unit, wildcard; 648 char *dp; 649 650 /* Try to see if a special unit was requested. */ 651 error = ifc_name2unit(name, &unit); 652 if (error != 0) 653 return (error); 654 wildcard = (unit < 0); 655 656 error = ifc_alloc_unit(ifc, &unit); 657 if (error != 0) 658 return (error); 659 660 /* 661 * If no unit had been given, we need to adjust the ifName. 662 * Also make sure there is space for our extra [ab] suffix. 663 */ 664 for (dp = name; *dp != '\0'; dp++); 665 if (wildcard) { 666 int slen = snprintf(dp, len - (dp - name), "%d", unit); 667 if (slen > len - (dp - name) - 1) { 668 /* ifName too long. */ 669 error = ENOSPC; 670 goto done; 671 } 672 dp += slen; 673 } 674 if (len - (dp - name) - 1 < 1) { 675 /* No space left for our [ab] suffix. */ 676 error = ENOSPC; 677 goto done; 678 } 679 *dp = 'b'; 680 /* Must not change dp so we can replace 'a' by 'b' later. */ 681 *(dp+1) = '\0'; 682 683 /* Check if 'a' and 'b' interfaces already exist. */ 684 if (ifunit(name) != NULL) { 685 error = EEXIST; 686 goto done; 687 } 688 689 *dp = 'a'; 690 if (ifunit(name) != NULL) { 691 error = EEXIST; 692 goto done; 693 } 694 *punit = unit; 695 done: 696 if (error != 0) 697 ifc_free_unit(ifc, unit); 698 699 return (error); 700 } 701 702 static int 703 epair_clone_create(struct if_clone *ifc, char *name, size_t len, 704 struct ifc_data *ifd, struct ifnet **ifpp) 705 { 706 struct epair_softc *sca, *scb; 707 struct ifnet *ifp; 708 char *dp; 709 int error, unit; 710 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ 711 712 error = epair_handle_unit(ifc, name, len, &unit); 713 if (error != 0) 714 return (error); 715 716 /* Allocate memory for both [ab] interfaces */ 717 sca = epair_alloc_sc(ifc); 718 scb = epair_alloc_sc(ifc); 719 if (sca == NULL || scb == NULL) { 720 epair_free_sc(sca); 721 epair_free_sc(scb); 722 ifc_free_unit(ifc, unit); 723 return (ENOSPC); 724 } 725 726 /* 727 * Cross-reference the interfaces so we will be able to free both. 728 */ 729 sca->oifp = scb->ifp; 730 scb->oifp = sca->ifp; 731 732 /* Finish initialization of interface <n>a. */ 733 ifp = sca->ifp; 734 epair_setup_ifp(sca, name, unit); 735 epair_generate_mac(sca, eaddr); 736 737 ether_ifattach(ifp, eaddr); 738 739 /* Swap the name and finish initialization of interface <n>b. */ 740 dp = name + strlen(name) - 1; 741 *dp = 'b'; 742 743 epair_setup_ifp(scb, name, unit); 744 745 ifp = scb->ifp; 746 /* We need to play some tricks here for the second interface. */ 747 strlcpy(name, epairname, len); 748 /* Correctly set the name for the cloner list. */ 749 strlcpy(name, scb->ifp->if_xname, len); 750 751 epair_clone_add(ifc, scb); 752 753 /* 754 * Restore name to <n>a as the ifp for this will go into the 755 * cloner list for the initial call. 756 */ 757 strlcpy(name, sca->ifp->if_xname, len); 758 759 /* Tell the world, that we are ready to rock. */ 760 epair_set_state(sca->ifp, true); 761 epair_set_state(scb->ifp, true); 762 763 *ifpp = sca->ifp; 764 765 return (0); 766 } 767 768 static void 769 epair_drain_rings(struct epair_softc *sc) 770 { 771 for (int i = 0; i < sc->num_queues; i++) { 772 struct epair_queue *q; 773 struct mbuf *m, *n; 774 775 q = &sc->queues[i]; 776 mtx_lock(&q->mtx); 777 m = mbufq_flush(&q->q); 778 mtx_unlock(&q->mtx); 779 780 for (; m != NULL; m = n) { 781 n = m->m_nextpkt; 782 m_freem(m); 783 } 784 } 785 } 786 787 static int 788 epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags) 789 { 790 struct ifnet *oifp; 791 struct epair_softc *sca, *scb; 792 int unit, error; 793 794 /* 795 * In case we called into if_clone_destroyif() ourselves 796 * again to remove the second interface, the softc will be 797 * NULL. In that case so not do anything but return success. 798 */ 799 if (ifp->if_softc == NULL) 800 return (0); 801 802 unit = ifp->if_dunit; 803 sca = ifp->if_softc; 804 oifp = sca->oifp; 805 scb = oifp->if_softc; 806 807 /* Frist get the interfaces down and detached. */ 808 epair_set_state(ifp, false); 809 epair_set_state(oifp, false); 810 811 ether_ifdetach(ifp); 812 ether_ifdetach(oifp); 813 814 /* Third free any queued packets and all the resources. */ 815 CURVNET_SET_QUIET(oifp->if_vnet); 816 epair_drain_rings(scb); 817 oifp->if_softc = NULL; 818 error = if_clone_destroyif(ifc, oifp); 819 if (error) 820 panic("%s: if_clone_destroyif() for our 2nd iface failed: %d", 821 __func__, error); 822 epair_free_sc(scb); 823 CURVNET_RESTORE(); 824 825 epair_drain_rings(sca); 826 epair_free_sc(sca); 827 828 /* Last free the cloner unit. */ 829 ifc_free_unit(ifc, unit); 830 831 return (0); 832 } 833 834 static void 835 vnet_epair_init(const void *unused __unused) 836 { 837 struct if_clone_addreq req = { 838 .match_f = epair_clone_match, 839 .create_f = epair_clone_create, 840 .destroy_f = epair_clone_destroy, 841 }; 842 V_epair_cloner = ifc_attach_cloner(epairname, &req); 843 } 844 VNET_SYSINIT(vnet_epair_init, SI_SUB_PSEUDO, SI_ORDER_ANY, 845 vnet_epair_init, NULL); 846 847 static void 848 vnet_epair_uninit(const void *unused __unused) 849 { 850 851 ifc_detach_cloner(V_epair_cloner); 852 } 853 VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, 854 vnet_epair_uninit, NULL); 855 856 static int 857 epair_mod_init(void) 858 { 859 char name[32]; 860 epair_tasks.tasks = 0; 861 862 #ifdef RSS 863 int cpu; 864 865 CPU_FOREACH(cpu) { 866 cpuset_t cpu_mask; 867 868 /* Pin to this CPU so we get appropriate NUMA allocations. */ 869 thread_lock(curthread); 870 sched_bind(curthread, cpu); 871 thread_unlock(curthread); 872 873 snprintf(name, sizeof(name), "epair_task_%d", cpu); 874 875 epair_tasks.tq[cpu] = taskqueue_create(name, M_WAITOK, 876 taskqueue_thread_enqueue, 877 &epair_tasks.tq[cpu]); 878 CPU_SETOF(cpu, &cpu_mask); 879 taskqueue_start_threads_cpuset(&epair_tasks.tq[cpu], 1, PI_NET, 880 &cpu_mask, "%s", name); 881 882 epair_tasks.tasks++; 883 } 884 thread_lock(curthread); 885 sched_unbind(curthread); 886 thread_unlock(curthread); 887 #else 888 snprintf(name, sizeof(name), "epair_task"); 889 890 epair_tasks.tq[0] = taskqueue_create(name, M_WAITOK, 891 taskqueue_thread_enqueue, 892 &epair_tasks.tq[0]); 893 taskqueue_start_threads(&epair_tasks.tq[0], 1, PI_NET, "%s", name); 894 895 epair_tasks.tasks = 1; 896 #endif 897 898 return (0); 899 } 900 901 static void 902 epair_mod_cleanup(void) 903 { 904 905 for (int i = 0; i < epair_tasks.tasks; i++) { 906 taskqueue_drain_all(epair_tasks.tq[i]); 907 taskqueue_free(epair_tasks.tq[i]); 908 } 909 } 910 911 static int 912 epair_modevent(module_t mod, int type, void *data) 913 { 914 int ret; 915 916 switch (type) { 917 case MOD_LOAD: 918 EPAIR_LOCK_INIT(); 919 ret = epair_mod_init(); 920 if (ret != 0) 921 return (ret); 922 if (bootverbose) 923 printf("%s: %s initialized.\n", __func__, epairname); 924 break; 925 case MOD_UNLOAD: 926 epair_mod_cleanup(); 927 EPAIR_LOCK_DESTROY(); 928 if (bootverbose) 929 printf("%s: %s unloaded.\n", __func__, epairname); 930 break; 931 default: 932 return (EOPNOTSUPP); 933 } 934 return (0); 935 } 936 937 static moduledata_t epair_mod = { 938 "if_epair", 939 epair_modevent, 940 0 941 }; 942 943 DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE); 944 MODULE_VERSION(if_epair, 3); 945