1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2008 The FreeBSD Foundation 5 * Copyright (c) 2009-2021 Bjoern A. Zeeb <bz@FreeBSD.org> 6 * 7 * This software was developed by CK Software GmbH under sponsorship 8 * from the FreeBSD Foundation. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * A pair of virtual back-to-back connected ethernet like interfaces 34 * (``two interfaces with a virtual cross-over cable''). 35 * 36 * This is mostly intended to be used to provide connectivity between 37 * different virtual network stack instances. 38 */ 39 40 #include <sys/cdefs.h> 41 #include "opt_rss.h" 42 #include "opt_inet.h" 43 #include "opt_inet6.h" 44 45 #include <sys/param.h> 46 #include <sys/bus.h> 47 #include <sys/hash.h> 48 #include <sys/interrupt.h> 49 #include <sys/jail.h> 50 #include <sys/kernel.h> 51 #include <sys/libkern.h> 52 #include <sys/malloc.h> 53 #include <sys/mbuf.h> 54 #include <sys/module.h> 55 #include <sys/proc.h> 56 #include <sys/queue.h> 57 #include <sys/sched.h> 58 #include <sys/smp.h> 59 #include <sys/socket.h> 60 #include <sys/sockio.h> 61 #include <sys/sysctl.h> 62 #include <sys/taskqueue.h> 63 64 #include <net/bpf.h> 65 #include <net/ethernet.h> 66 #include <net/if.h> 67 #include <net/if_var.h> 68 #include <net/if_clone.h> 69 #include <net/if_media.h> 70 #include <net/if_var.h> 71 #include <net/if_private.h> 72 #include <net/if_types.h> 73 #include <net/netisr.h> 74 #ifdef RSS 75 #include <net/rss_config.h> 76 #ifdef INET 77 #include <netinet/in_rss.h> 78 #endif 79 #ifdef INET6 80 #include <netinet6/in6_rss.h> 81 #endif 82 #endif 83 #include <net/vnet.h> 84 85 static const char epairname[] = "epair"; 86 #define RXRSIZE 4096 /* Probably overkill by 4-8x. */ 87 88 static MALLOC_DEFINE(M_EPAIR, epairname, 89 "Pair of virtual cross-over connected Ethernet-like interfaces"); 90 91 VNET_DEFINE_STATIC(struct if_clone *, epair_cloner); 92 #define V_epair_cloner VNET(epair_cloner) 93 94 static unsigned int next_index = 0; 95 #define EPAIR_LOCK_INIT() mtx_init(&epair_n_index_mtx, "epairidx", \ 96 NULL, MTX_DEF) 97 #define EPAIR_LOCK_DESTROY() mtx_destroy(&epair_n_index_mtx) 98 #define EPAIR_LOCK() mtx_lock(&epair_n_index_mtx) 99 #define EPAIR_UNLOCK() mtx_unlock(&epair_n_index_mtx) 100 101 SYSCTL_DECL(_net_link); 102 static SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 103 "Pair of virtual cross-over connected Ethernet-like interfaces"); 104 105 static bool use_ether_gen_addr = true; 106 SYSCTL_BOOL(_net_link_epair, OID_AUTO, ether_gen_addr, CTLFLAG_RWTUN, 107 &use_ether_gen_addr, false, 108 "Generate MAC with FreeBSD OUI using ether_gen_addr(9)"); 109 110 struct epair_softc; 111 struct epair_queue { 112 struct mtx mtx; 113 struct mbufq q; 114 int id; 115 enum { 116 EPAIR_QUEUE_IDLE, 117 EPAIR_QUEUE_WAKING, 118 EPAIR_QUEUE_RUNNING, 119 } state; 120 struct task tx_task; 121 struct epair_softc *sc; 122 }; 123 124 static struct mtx epair_n_index_mtx; 125 struct epair_softc { 126 struct ifnet *ifp; /* This ifp. */ 127 struct ifnet *oifp; /* other ifp of pair. */ 128 int num_queues; 129 struct epair_queue *queues; 130 struct ifmedia media; /* Media config (fake). */ 131 STAILQ_ENTRY(epair_softc) entry; 132 }; 133 134 struct epair_tasks_t { 135 int tasks; 136 struct taskqueue *tq[MAXCPU]; 137 }; 138 139 static struct epair_tasks_t epair_tasks; 140 141 static void 142 epair_clear_mbuf(struct mbuf *m) 143 { 144 M_ASSERTPKTHDR(m); 145 146 /* Remove any CSUM_SND_TAG as ether_input will barf. */ 147 if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) { 148 m_snd_tag_rele(m->m_pkthdr.snd_tag); 149 m->m_pkthdr.snd_tag = NULL; 150 m->m_pkthdr.csum_flags &= ~CSUM_SND_TAG; 151 } 152 153 /* Clear vlan information. */ 154 m->m_flags &= ~M_VLANTAG; 155 m->m_pkthdr.ether_vtag = 0; 156 157 m_tag_delete_nonpersistent(m); 158 } 159 160 static void 161 epair_tx_start_deferred(void *arg, int pending) 162 { 163 struct epair_queue *q = (struct epair_queue *)arg; 164 if_t ifp; 165 struct mbuf *m, *n; 166 bool resched; 167 168 ifp = q->sc->ifp; 169 170 if_ref(ifp); 171 CURVNET_SET(ifp->if_vnet); 172 173 mtx_lock(&q->mtx); 174 m = mbufq_flush(&q->q); 175 q->state = EPAIR_QUEUE_RUNNING; 176 mtx_unlock(&q->mtx); 177 178 while (m != NULL) { 179 n = STAILQ_NEXT(m, m_stailqpkt); 180 m->m_nextpkt = NULL; 181 if_input(ifp, m); 182 m = n; 183 } 184 185 /* 186 * Avoid flushing the queue more than once per task. We can otherwise 187 * end up starving ourselves in a multi-epair routing configuration. 188 */ 189 mtx_lock(&q->mtx); 190 if (!mbufq_empty(&q->q)) { 191 resched = true; 192 q->state = EPAIR_QUEUE_WAKING; 193 } else { 194 resched = false; 195 q->state = EPAIR_QUEUE_IDLE; 196 } 197 mtx_unlock(&q->mtx); 198 199 if (resched) 200 taskqueue_enqueue(epair_tasks.tq[q->id], &q->tx_task); 201 202 CURVNET_RESTORE(); 203 if_rele(ifp); 204 } 205 206 static struct epair_queue * 207 epair_select_queue(struct epair_softc *sc, struct mbuf *m) 208 { 209 uint32_t bucket; 210 #ifdef RSS 211 struct ether_header *eh; 212 int ret; 213 214 ret = rss_m2bucket(m, &bucket); 215 if (ret) { 216 /* Actually hash the packet. */ 217 eh = mtod(m, struct ether_header *); 218 219 switch (ntohs(eh->ether_type)) { 220 #ifdef INET 221 case ETHERTYPE_IP: 222 rss_soft_m2cpuid_v4(m, 0, &bucket); 223 break; 224 #endif 225 #ifdef INET6 226 case ETHERTYPE_IPV6: 227 rss_soft_m2cpuid_v6(m, 0, &bucket); 228 break; 229 #endif 230 default: 231 bucket = 0; 232 break; 233 } 234 } 235 bucket %= sc->num_queues; 236 #else 237 bucket = 0; 238 #endif 239 return (&sc->queues[bucket]); 240 } 241 242 static void 243 epair_prepare_mbuf(struct mbuf *m, struct ifnet *src_ifp) 244 { 245 M_ASSERTPKTHDR(m); 246 epair_clear_mbuf(m); 247 if_setrcvif(m, src_ifp); 248 M_SETFIB(m, src_ifp->if_fib); 249 250 MPASS(m->m_nextpkt == NULL); 251 MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); 252 } 253 254 static void 255 epair_menq(struct mbuf *m, struct epair_softc *osc) 256 { 257 struct epair_queue *q; 258 struct ifnet *ifp, *oifp; 259 int error, len; 260 bool mcast; 261 262 /* 263 * I know this looks weird. We pass the "other sc" as we need that one 264 * and can get both ifps from it as well. 265 */ 266 oifp = osc->ifp; 267 ifp = osc->oifp; 268 269 epair_prepare_mbuf(m, oifp); 270 271 /* Save values as once the mbuf is queued, it's not ours anymore. */ 272 len = m->m_pkthdr.len; 273 mcast = (m->m_flags & (M_BCAST | M_MCAST)) != 0; 274 275 q = epair_select_queue(osc, m); 276 277 mtx_lock(&q->mtx); 278 if (q->state == EPAIR_QUEUE_IDLE) { 279 q->state = EPAIR_QUEUE_WAKING; 280 taskqueue_enqueue(epair_tasks.tq[q->id], &q->tx_task); 281 } 282 error = mbufq_enqueue(&q->q, m); 283 mtx_unlock(&q->mtx); 284 285 if (error != 0) { 286 m_freem(m); 287 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 288 } else { 289 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 290 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 291 if (mcast) 292 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 293 if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1); 294 } 295 } 296 297 static void 298 epair_start(struct ifnet *ifp) 299 { 300 struct mbuf *m; 301 struct epair_softc *sc; 302 struct ifnet *oifp; 303 304 /* 305 * We get packets here from ether_output via if_handoff() 306 * and need to put them into the input queue of the oifp 307 * and will put the packet into the receive-queue (rxq) of the 308 * other interface (oifp) of our pair. 309 */ 310 sc = ifp->if_softc; 311 oifp = sc->oifp; 312 sc = oifp->if_softc; 313 for (;;) { 314 IFQ_DEQUEUE(&ifp->if_snd, m); 315 if (m == NULL) 316 break; 317 M_ASSERTPKTHDR(m); 318 BPF_MTAP(ifp, m); 319 320 /* In case either interface is not usable drop the packet. */ 321 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 322 (ifp->if_flags & IFF_UP) == 0 || 323 (oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 324 (oifp->if_flags & IFF_UP) == 0) { 325 m_freem(m); 326 continue; 327 } 328 329 epair_menq(m, sc); 330 } 331 } 332 333 static int 334 epair_transmit(struct ifnet *ifp, struct mbuf *m) 335 { 336 struct epair_softc *sc; 337 struct ifnet *oifp; 338 #ifdef ALTQ 339 int len; 340 bool mcast; 341 #endif 342 343 if (m == NULL) 344 return (0); 345 M_ASSERTPKTHDR(m); 346 347 /* 348 * We could just transmit this, but it makes testing easier if we're a 349 * little bit more like real hardware. 350 * Allow just that little bit extra for ethernet (and vlan) headers. 351 */ 352 if (m->m_pkthdr.len > (ifp->if_mtu + sizeof(struct ether_vlan_header))) { 353 m_freem(m); 354 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 355 return (E2BIG); 356 } 357 358 /* 359 * We are not going to use the interface en/dequeue mechanism 360 * on the TX side. We are called from ether_output_frame() 361 * and will put the packet into the receive-queue (rxq) of the 362 * other interface (oifp) of our pair. 363 */ 364 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 365 m_freem(m); 366 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 367 return (ENXIO); 368 } 369 if ((ifp->if_flags & IFF_UP) == 0) { 370 m_freem(m); 371 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 372 return (ENETDOWN); 373 } 374 375 BPF_MTAP(ifp, m); 376 377 /* 378 * In case the outgoing interface is not usable, 379 * drop the packet. 380 */ 381 sc = ifp->if_softc; 382 oifp = sc->oifp; 383 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 384 (oifp->if_flags & IFF_UP) == 0) { 385 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 386 m_freem(m); 387 return (0); 388 } 389 390 #ifdef ALTQ 391 len = m->m_pkthdr.len; 392 mcast = (m->m_flags & (M_BCAST | M_MCAST)) != 0; 393 int error = 0; 394 395 /* Support ALTQ via the classic if_start() path. */ 396 IF_LOCK(&ifp->if_snd); 397 if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 398 ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error); 399 if (error) 400 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 401 IF_UNLOCK(&ifp->if_snd); 402 if (!error) { 403 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 404 if (mcast) 405 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 406 epair_start(ifp); 407 } 408 return (error); 409 } 410 IF_UNLOCK(&ifp->if_snd); 411 #endif 412 413 epair_menq(m, oifp->if_softc); 414 return (0); 415 } 416 417 static void 418 epair_qflush(struct ifnet *ifp __unused) 419 { 420 } 421 422 static int 423 epair_media_change(struct ifnet *ifp __unused) 424 { 425 426 /* Do nothing. */ 427 return (0); 428 } 429 430 static void 431 epair_media_status(struct ifnet *ifp __unused, struct ifmediareq *imr) 432 { 433 434 imr->ifm_status = IFM_AVALID | IFM_ACTIVE; 435 imr->ifm_active = IFM_ETHER | IFM_10G_T | IFM_FDX; 436 } 437 438 static int 439 epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 440 { 441 struct epair_softc *sc; 442 struct ifreq *ifr; 443 int error; 444 445 ifr = (struct ifreq *)data; 446 switch (cmd) { 447 case SIOCSIFFLAGS: 448 case SIOCADDMULTI: 449 case SIOCDELMULTI: 450 error = 0; 451 break; 452 453 case SIOCSIFMEDIA: 454 case SIOCGIFMEDIA: 455 sc = ifp->if_softc; 456 error = ifmedia_ioctl(ifp, ifr, &sc->media, cmd); 457 break; 458 459 case SIOCSIFMTU: 460 /* We basically allow all kinds of MTUs. */ 461 ifp->if_mtu = ifr->ifr_mtu; 462 error = 0; 463 break; 464 465 default: 466 /* Let the common ethernet handler process this. */ 467 error = ether_ioctl(ifp, cmd, data); 468 break; 469 } 470 471 return (error); 472 } 473 474 static void 475 epair_init(void *dummy __unused) 476 { 477 } 478 479 /* 480 * Interface cloning functions. 481 * We use our private ones so that we can create/destroy our secondary 482 * device along with the primary one. 483 */ 484 static int 485 epair_clone_match(struct if_clone *ifc, const char *name) 486 { 487 const char *cp; 488 489 /* 490 * Our base name is epair. 491 * Our interfaces will be named epair<n>[ab]. 492 * So accept anything of the following list: 493 * - epair 494 * - epair<n> 495 * but not the epair<n>[ab] versions. 496 */ 497 if (strncmp(epairname, name, sizeof(epairname)-1) != 0) 498 return (0); 499 500 for (cp = name + sizeof(epairname) - 1; *cp != '\0'; cp++) { 501 if (*cp < '0' || *cp > '9') 502 return (0); 503 } 504 505 return (1); 506 } 507 508 static void 509 epair_generate_mac_byname(struct epair_softc *sc, uint8_t eaddr[]) 510 { 511 struct ether_addr gen_eaddr; 512 int i; 513 514 ether_gen_addr_byname(if_name(sc->ifp), &gen_eaddr); 515 for (i = 0; i < ETHER_ADDR_LEN; i++) 516 eaddr[i] = gen_eaddr.octet[i]; 517 } 518 519 static void 520 epair_clone_add(struct if_clone *ifc, struct epair_softc *scb) 521 { 522 struct ifnet *ifp; 523 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ 524 525 ifp = scb->ifp; 526 if (!use_ether_gen_addr) { 527 /* Copy epairNa etheraddr and change the last byte. */ 528 memcpy(eaddr, scb->oifp->if_hw_addr, ETHER_ADDR_LEN); 529 eaddr[5] = 0x0b; 530 } else 531 epair_generate_mac_byname(scb, eaddr); 532 ether_ifattach(ifp, eaddr); 533 534 if_clone_addif(ifc, ifp); 535 } 536 537 static struct epair_softc * 538 epair_alloc_sc(struct if_clone *ifc) 539 { 540 struct epair_softc *sc; 541 542 struct ifnet *ifp = if_alloc(IFT_ETHER); 543 sc = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 544 sc->ifp = ifp; 545 sc->num_queues = epair_tasks.tasks; 546 sc->queues = mallocarray(sc->num_queues, sizeof(struct epair_queue), 547 M_EPAIR, M_WAITOK); 548 for (int i = 0; i < sc->num_queues; i++) { 549 struct epair_queue *q = &sc->queues[i]; 550 q->id = i; 551 q->state = EPAIR_QUEUE_IDLE; 552 mtx_init(&q->mtx, "epairq", NULL, MTX_DEF | MTX_NEW); 553 mbufq_init(&q->q, RXRSIZE); 554 q->sc = sc; 555 NET_TASK_INIT(&q->tx_task, 0, epair_tx_start_deferred, q); 556 } 557 558 /* Initialise pseudo media types. */ 559 ifmedia_init(&sc->media, 0, epair_media_change, epair_media_status); 560 ifmedia_add(&sc->media, IFM_ETHER | IFM_10G_T, 0, NULL); 561 ifmedia_set(&sc->media, IFM_ETHER | IFM_10G_T); 562 563 return (sc); 564 } 565 566 static void 567 epair_setup_ifp(struct epair_softc *sc, char *name, int unit) 568 { 569 struct ifnet *ifp = sc->ifp; 570 571 ifp->if_softc = sc; 572 strlcpy(ifp->if_xname, name, IFNAMSIZ); 573 ifp->if_dname = epairname; 574 ifp->if_dunit = unit; 575 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 576 ifp->if_capabilities = IFCAP_VLAN_MTU; 577 ifp->if_capenable = IFCAP_VLAN_MTU; 578 ifp->if_transmit = epair_transmit; 579 ifp->if_qflush = epair_qflush; 580 ifp->if_start = epair_start; 581 ifp->if_ioctl = epair_ioctl; 582 ifp->if_init = epair_init; 583 if_setsendqlen(ifp, ifqmaxlen); 584 if_setsendqready(ifp); 585 586 ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */ 587 } 588 589 static void 590 epair_generate_mac(struct epair_softc *sc, uint8_t *eaddr) 591 { 592 uint32_t key[3]; 593 uint32_t hash; 594 uint64_t hostid; 595 596 EPAIR_LOCK(); 597 #ifdef SMP 598 /* Get an approximate distribution. */ 599 hash = next_index % mp_ncpus; 600 #else 601 hash = 0; 602 #endif 603 EPAIR_UNLOCK(); 604 605 /* 606 * Calculate the etheraddr hashing the hostid and the 607 * interface index. The result would be hopefully unique. 608 * Note that the "a" component of an epair instance may get moved 609 * to a different VNET after creation. In that case its index 610 * will be freed and the index can get reused by new epair instance. 611 * Make sure we do not create same etheraddr again. 612 */ 613 getcredhostid(curthread->td_ucred, (unsigned long *)&hostid); 614 if (hostid == 0) 615 arc4rand(&hostid, sizeof(hostid), 0); 616 617 struct ifnet *ifp = sc->ifp; 618 EPAIR_LOCK(); 619 if (ifp->if_index > next_index) 620 next_index = ifp->if_index; 621 else 622 next_index++; 623 624 key[0] = (uint32_t)next_index; 625 EPAIR_UNLOCK(); 626 key[1] = (uint32_t)(hostid & 0xffffffff); 627 key[2] = (uint32_t)((hostid >> 32) & 0xfffffffff); 628 hash = jenkins_hash32(key, 3, 0); 629 630 eaddr[0] = 0x02; 631 memcpy(&eaddr[1], &hash, 4); 632 eaddr[5] = 0x0a; 633 } 634 635 static void 636 epair_free_sc(struct epair_softc *sc) 637 { 638 639 if_free(sc->ifp); 640 ifmedia_removeall(&sc->media); 641 for (int i = 0; i < sc->num_queues; i++) { 642 struct epair_queue *q = &sc->queues[i]; 643 mtx_destroy(&q->mtx); 644 } 645 free(sc->queues, M_EPAIR); 646 free(sc, M_EPAIR); 647 } 648 649 static void 650 epair_set_state(struct ifnet *ifp, bool running) 651 { 652 if (running) { 653 ifp->if_drv_flags |= IFF_DRV_RUNNING; 654 if_link_state_change(ifp, LINK_STATE_UP); 655 } else { 656 if_link_state_change(ifp, LINK_STATE_DOWN); 657 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 658 } 659 } 660 661 static int 662 epair_handle_unit(struct if_clone *ifc, char *name, size_t len, int *punit) 663 { 664 int error = 0, unit, wildcard; 665 char *dp; 666 667 /* Try to see if a special unit was requested. */ 668 error = ifc_name2unit(name, &unit); 669 if (error != 0) 670 return (error); 671 wildcard = (unit < 0); 672 673 error = ifc_alloc_unit(ifc, &unit); 674 if (error != 0) 675 return (error); 676 677 /* 678 * If no unit had been given, we need to adjust the ifName. 679 * Also make sure there is space for our extra [ab] suffix. 680 */ 681 for (dp = name; *dp != '\0'; dp++); 682 if (wildcard) { 683 int slen = snprintf(dp, len - (dp - name), "%d", unit); 684 if (slen > len - (dp - name) - 1) { 685 /* ifName too long. */ 686 error = ENOSPC; 687 goto done; 688 } 689 dp += slen; 690 } 691 if (len - (dp - name) - 1 < 1) { 692 /* No space left for our [ab] suffix. */ 693 error = ENOSPC; 694 goto done; 695 } 696 *dp = 'b'; 697 /* Must not change dp so we can replace 'a' by 'b' later. */ 698 *(dp+1) = '\0'; 699 700 /* Check if 'a' and 'b' interfaces already exist. */ 701 if (ifunit(name) != NULL) { 702 error = EEXIST; 703 goto done; 704 } 705 706 *dp = 'a'; 707 if (ifunit(name) != NULL) { 708 error = EEXIST; 709 goto done; 710 } 711 *punit = unit; 712 done: 713 if (error != 0) 714 ifc_free_unit(ifc, unit); 715 716 return (error); 717 } 718 719 static int 720 epair_clone_create(struct if_clone *ifc, char *name, size_t len, 721 struct ifc_data *ifd, struct ifnet **ifpp) 722 { 723 struct epair_softc *sca, *scb; 724 struct ifnet *ifp; 725 char *dp; 726 int error, unit; 727 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ 728 729 error = epair_handle_unit(ifc, name, len, &unit); 730 if (error != 0) 731 return (error); 732 733 /* Allocate memory for both [ab] interfaces */ 734 sca = epair_alloc_sc(ifc); 735 scb = epair_alloc_sc(ifc); 736 737 /* 738 * Cross-reference the interfaces so we will be able to free both. 739 */ 740 sca->oifp = scb->ifp; 741 scb->oifp = sca->ifp; 742 743 /* Finish initialization of interface <n>a. */ 744 ifp = sca->ifp; 745 epair_setup_ifp(sca, name, unit); 746 if (!use_ether_gen_addr) 747 epair_generate_mac(sca, eaddr); 748 else 749 epair_generate_mac_byname(sca, eaddr); 750 751 ether_ifattach(ifp, eaddr); 752 753 /* Swap the name and finish initialization of interface <n>b. */ 754 dp = name + strlen(name) - 1; 755 *dp = 'b'; 756 757 epair_setup_ifp(scb, name, unit); 758 759 ifp = scb->ifp; 760 /* We need to play some tricks here for the second interface. */ 761 strlcpy(name, epairname, len); 762 /* Correctly set the name for the cloner list. */ 763 strlcpy(name, scb->ifp->if_xname, len); 764 765 epair_clone_add(ifc, scb); 766 767 /* 768 * Restore name to <n>a as the ifp for this will go into the 769 * cloner list for the initial call. 770 */ 771 strlcpy(name, sca->ifp->if_xname, len); 772 773 /* Tell the world, that we are ready to rock. */ 774 epair_set_state(sca->ifp, true); 775 epair_set_state(scb->ifp, true); 776 777 *ifpp = sca->ifp; 778 779 return (0); 780 } 781 782 static void 783 epair_drain_rings(struct epair_softc *sc) 784 { 785 for (int i = 0; i < sc->num_queues; i++) { 786 struct epair_queue *q; 787 struct mbuf *m, *n; 788 789 q = &sc->queues[i]; 790 mtx_lock(&q->mtx); 791 m = mbufq_flush(&q->q); 792 mtx_unlock(&q->mtx); 793 794 for (; m != NULL; m = n) { 795 n = m->m_nextpkt; 796 m_freem(m); 797 } 798 } 799 } 800 801 static int 802 epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags) 803 { 804 struct ifnet *oifp; 805 struct epair_softc *sca, *scb; 806 int unit, error; 807 808 /* 809 * In case we called into if_clone_destroyif() ourselves 810 * again to remove the second interface, the softc will be 811 * NULL. In that case so not do anything but return success. 812 */ 813 if (ifp->if_softc == NULL) 814 return (0); 815 816 unit = ifp->if_dunit; 817 sca = ifp->if_softc; 818 oifp = sca->oifp; 819 scb = oifp->if_softc; 820 821 /* Frist get the interfaces down and detached. */ 822 epair_set_state(ifp, false); 823 epair_set_state(oifp, false); 824 825 ether_ifdetach(ifp); 826 ether_ifdetach(oifp); 827 828 /* Third free any queued packets and all the resources. */ 829 CURVNET_SET_QUIET(oifp->if_vnet); 830 epair_drain_rings(scb); 831 oifp->if_softc = NULL; 832 error = if_clone_destroyif(ifc, oifp); 833 if (error) 834 panic("%s: if_clone_destroyif() for our 2nd iface failed: %d", 835 __func__, error); 836 epair_free_sc(scb); 837 CURVNET_RESTORE(); 838 839 epair_drain_rings(sca); 840 epair_free_sc(sca); 841 842 /* Last free the cloner unit. */ 843 ifc_free_unit(ifc, unit); 844 845 return (0); 846 } 847 848 static void 849 vnet_epair_init(const void *unused __unused) 850 { 851 struct if_clone_addreq req = { 852 .match_f = epair_clone_match, 853 .create_f = epair_clone_create, 854 .destroy_f = epair_clone_destroy, 855 }; 856 V_epair_cloner = ifc_attach_cloner(epairname, &req); 857 } 858 VNET_SYSINIT(vnet_epair_init, SI_SUB_PSEUDO, SI_ORDER_ANY, 859 vnet_epair_init, NULL); 860 861 static void 862 vnet_epair_uninit(const void *unused __unused) 863 { 864 865 ifc_detach_cloner(V_epair_cloner); 866 } 867 VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, 868 vnet_epair_uninit, NULL); 869 870 static int 871 epair_mod_init(void) 872 { 873 char name[32]; 874 epair_tasks.tasks = 0; 875 876 #ifdef RSS 877 int cpu; 878 879 CPU_FOREACH(cpu) { 880 cpuset_t cpu_mask; 881 882 /* Pin to this CPU so we get appropriate NUMA allocations. */ 883 thread_lock(curthread); 884 sched_bind(curthread, cpu); 885 thread_unlock(curthread); 886 887 snprintf(name, sizeof(name), "epair_task_%d", cpu); 888 889 epair_tasks.tq[cpu] = taskqueue_create(name, M_WAITOK, 890 taskqueue_thread_enqueue, 891 &epair_tasks.tq[cpu]); 892 CPU_SETOF(cpu, &cpu_mask); 893 taskqueue_start_threads_cpuset(&epair_tasks.tq[cpu], 1, PI_NET, 894 &cpu_mask, "%s", name); 895 896 epair_tasks.tasks++; 897 } 898 thread_lock(curthread); 899 sched_unbind(curthread); 900 thread_unlock(curthread); 901 #else 902 snprintf(name, sizeof(name), "epair_task"); 903 904 epair_tasks.tq[0] = taskqueue_create(name, M_WAITOK, 905 taskqueue_thread_enqueue, 906 &epair_tasks.tq[0]); 907 taskqueue_start_threads(&epair_tasks.tq[0], 1, PI_NET, "%s", name); 908 909 epair_tasks.tasks = 1; 910 #endif 911 912 return (0); 913 } 914 915 static void 916 epair_mod_cleanup(void) 917 { 918 919 for (int i = 0; i < epair_tasks.tasks; i++) { 920 taskqueue_drain_all(epair_tasks.tq[i]); 921 taskqueue_free(epair_tasks.tq[i]); 922 } 923 } 924 925 static int 926 epair_modevent(module_t mod, int type, void *data) 927 { 928 int ret; 929 930 switch (type) { 931 case MOD_LOAD: 932 EPAIR_LOCK_INIT(); 933 ret = epair_mod_init(); 934 if (ret != 0) 935 return (ret); 936 if (bootverbose) 937 printf("%s: %s initialized.\n", __func__, epairname); 938 break; 939 case MOD_UNLOAD: 940 epair_mod_cleanup(); 941 EPAIR_LOCK_DESTROY(); 942 if (bootverbose) 943 printf("%s: %s unloaded.\n", __func__, epairname); 944 break; 945 default: 946 return (EOPNOTSUPP); 947 } 948 return (0); 949 } 950 951 static moduledata_t epair_mod = { 952 "if_epair", 953 epair_modevent, 954 0 955 }; 956 957 DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE); 958 MODULE_VERSION(if_epair, 3); 959