1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2008 The FreeBSD Foundation 5 * Copyright (c) 2009-2021 Bjoern A. Zeeb <bz@FreeBSD.org> 6 * 7 * This software was developed by CK Software GmbH under sponsorship 8 * from the FreeBSD Foundation. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * A pair of virtual back-to-back connected ethernet like interfaces 34 * (``two interfaces with a virtual cross-over cable''). 35 * 36 * This is mostly intended to be used to provide connectivity between 37 * different virtual network stack instances. 38 */ 39 40 #include <sys/cdefs.h> 41 #include "opt_rss.h" 42 #include "opt_inet.h" 43 #include "opt_inet6.h" 44 45 #include <sys/param.h> 46 #include <sys/bus.h> 47 #include <sys/hash.h> 48 #include <sys/interrupt.h> 49 #include <sys/jail.h> 50 #include <sys/kernel.h> 51 #include <sys/libkern.h> 52 #include <sys/malloc.h> 53 #include <sys/mbuf.h> 54 #include <sys/module.h> 55 #include <sys/proc.h> 56 #include <sys/queue.h> 57 #include <sys/sched.h> 58 #include <sys/smp.h> 59 #include <sys/socket.h> 60 #include <sys/sockio.h> 61 #include <sys/sysctl.h> 62 #include <sys/taskqueue.h> 63 64 #include <net/bpf.h> 65 #include <net/ethernet.h> 66 #include <net/if.h> 67 #include <net/if_var.h> 68 #include <net/if_clone.h> 69 #include <net/if_media.h> 70 #include <net/if_private.h> 71 #include <net/if_types.h> 72 #include <net/netisr.h> 73 #ifdef RSS 74 #include <net/rss_config.h> 75 #ifdef INET 76 #include <netinet/in_rss.h> 77 #endif 78 #ifdef INET6 79 #include <netinet6/in6_rss.h> 80 #endif 81 #endif 82 #include <net/vnet.h> 83 84 static const char epairname[] = "epair"; 85 #define RXRSIZE 4096 /* Probably overkill by 4-8x. */ 86 87 static MALLOC_DEFINE(M_EPAIR, epairname, 88 "Pair of virtual cross-over connected Ethernet-like interfaces"); 89 90 VNET_DEFINE_STATIC(struct if_clone *, epair_cloner); 91 #define V_epair_cloner VNET(epair_cloner) 92 93 static unsigned int next_index = 0; 94 #define EPAIR_LOCK_INIT() mtx_init(&epair_n_index_mtx, "epairidx", \ 95 NULL, MTX_DEF) 96 #define EPAIR_LOCK_DESTROY() mtx_destroy(&epair_n_index_mtx) 97 #define EPAIR_LOCK() mtx_lock(&epair_n_index_mtx) 98 #define EPAIR_UNLOCK() mtx_unlock(&epair_n_index_mtx) 99 100 SYSCTL_DECL(_net_link); 101 static SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 102 "Pair of virtual cross-over connected Ethernet-like interfaces"); 103 104 static bool use_ether_gen_addr = true; 105 SYSCTL_BOOL(_net_link_epair, OID_AUTO, ether_gen_addr, CTLFLAG_RWTUN, 106 &use_ether_gen_addr, false, 107 "Generate MAC with FreeBSD OUI using ether_gen_addr(9)"); 108 109 struct epair_softc; 110 struct epair_queue { 111 struct mtx mtx; 112 struct mbufq q; 113 int id; 114 enum { 115 EPAIR_QUEUE_IDLE, 116 EPAIR_QUEUE_WAKING, 117 EPAIR_QUEUE_RUNNING, 118 } state; 119 struct task tx_task; 120 struct epair_softc *sc; 121 }; 122 123 static struct mtx epair_n_index_mtx; 124 struct epair_softc { 125 struct ifnet *ifp; /* This ifp. */ 126 struct ifnet *oifp; /* other ifp of pair. */ 127 int num_queues; 128 struct epair_queue *queues; 129 struct ifmedia media; /* Media config (fake). */ 130 STAILQ_ENTRY(epair_softc) entry; 131 }; 132 133 struct epair_tasks_t { 134 int tasks; 135 struct taskqueue *tq[MAXCPU]; 136 }; 137 138 static struct epair_tasks_t epair_tasks; 139 140 static void 141 epair_clear_mbuf(struct mbuf *m) 142 { 143 M_ASSERTPKTHDR(m); 144 145 /* Remove any CSUM_SND_TAG as ether_input will barf. */ 146 if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) { 147 m_snd_tag_rele(m->m_pkthdr.snd_tag); 148 m->m_pkthdr.snd_tag = NULL; 149 m->m_pkthdr.csum_flags &= ~CSUM_SND_TAG; 150 } 151 152 /* Clear vlan information. */ 153 m->m_flags &= ~M_VLANTAG; 154 m->m_pkthdr.ether_vtag = 0; 155 156 m_tag_delete_nonpersistent(m); 157 } 158 159 static void 160 epair_tx_start_deferred(void *arg, int pending) 161 { 162 struct epair_queue *q = (struct epair_queue *)arg; 163 if_t ifp; 164 struct mbuf *m, *n; 165 bool resched; 166 167 ifp = q->sc->ifp; 168 169 if_ref(ifp); 170 CURVNET_SET(ifp->if_vnet); 171 172 mtx_lock(&q->mtx); 173 m = mbufq_flush(&q->q); 174 q->state = EPAIR_QUEUE_RUNNING; 175 mtx_unlock(&q->mtx); 176 177 while (m != NULL) { 178 n = STAILQ_NEXT(m, m_stailqpkt); 179 m->m_nextpkt = NULL; 180 if_input(ifp, m); 181 m = n; 182 } 183 184 /* 185 * Avoid flushing the queue more than once per task. We can otherwise 186 * end up starving ourselves in a multi-epair routing configuration. 187 */ 188 mtx_lock(&q->mtx); 189 if (!mbufq_empty(&q->q)) { 190 resched = true; 191 q->state = EPAIR_QUEUE_WAKING; 192 } else { 193 resched = false; 194 q->state = EPAIR_QUEUE_IDLE; 195 } 196 mtx_unlock(&q->mtx); 197 198 if (resched) 199 taskqueue_enqueue(epair_tasks.tq[q->id], &q->tx_task); 200 201 CURVNET_RESTORE(); 202 if_rele(ifp); 203 } 204 205 static struct epair_queue * 206 epair_select_queue(struct epair_softc *sc, struct mbuf *m) 207 { 208 uint32_t bucket; 209 #ifdef RSS 210 struct ether_header *eh; 211 int ret; 212 213 ret = rss_m2bucket(m, &bucket); 214 if (ret) { 215 /* Actually hash the packet. */ 216 eh = mtod(m, struct ether_header *); 217 218 switch (ntohs(eh->ether_type)) { 219 #ifdef INET 220 case ETHERTYPE_IP: 221 rss_soft_m2cpuid_v4(m, 0, &bucket); 222 break; 223 #endif 224 #ifdef INET6 225 case ETHERTYPE_IPV6: 226 rss_soft_m2cpuid_v6(m, 0, &bucket); 227 break; 228 #endif 229 default: 230 bucket = 0; 231 break; 232 } 233 } 234 bucket %= sc->num_queues; 235 #else 236 bucket = 0; 237 #endif 238 return (&sc->queues[bucket]); 239 } 240 241 static void 242 epair_prepare_mbuf(struct mbuf *m, struct ifnet *src_ifp) 243 { 244 M_ASSERTPKTHDR(m); 245 epair_clear_mbuf(m); 246 if_setrcvif(m, src_ifp); 247 M_SETFIB(m, src_ifp->if_fib); 248 249 MPASS(m->m_nextpkt == NULL); 250 MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); 251 } 252 253 static void 254 epair_menq(struct mbuf *m, struct epair_softc *osc) 255 { 256 struct epair_queue *q; 257 struct ifnet *ifp, *oifp; 258 int error, len; 259 bool mcast; 260 261 /* 262 * I know this looks weird. We pass the "other sc" as we need that one 263 * and can get both ifps from it as well. 264 */ 265 oifp = osc->ifp; 266 ifp = osc->oifp; 267 268 epair_prepare_mbuf(m, oifp); 269 270 /* Save values as once the mbuf is queued, it's not ours anymore. */ 271 len = m->m_pkthdr.len; 272 mcast = (m->m_flags & (M_BCAST | M_MCAST)) != 0; 273 274 q = epair_select_queue(osc, m); 275 276 mtx_lock(&q->mtx); 277 if (q->state == EPAIR_QUEUE_IDLE) { 278 q->state = EPAIR_QUEUE_WAKING; 279 taskqueue_enqueue(epair_tasks.tq[q->id], &q->tx_task); 280 } 281 error = mbufq_enqueue(&q->q, m); 282 mtx_unlock(&q->mtx); 283 284 if (error != 0) { 285 m_freem(m); 286 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 287 } else { 288 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 289 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 290 if (mcast) 291 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 292 if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1); 293 } 294 } 295 296 static void 297 epair_start(struct ifnet *ifp) 298 { 299 struct mbuf *m; 300 struct epair_softc *sc; 301 struct ifnet *oifp; 302 303 /* 304 * We get packets here from ether_output via if_handoff() 305 * and need to put them into the input queue of the oifp 306 * and will put the packet into the receive-queue (rxq) of the 307 * other interface (oifp) of our pair. 308 */ 309 sc = ifp->if_softc; 310 oifp = sc->oifp; 311 sc = oifp->if_softc; 312 for (;;) { 313 IFQ_DEQUEUE(&ifp->if_snd, m); 314 if (m == NULL) 315 break; 316 M_ASSERTPKTHDR(m); 317 BPF_MTAP(ifp, m); 318 319 /* In case either interface is not usable drop the packet. */ 320 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 321 (ifp->if_flags & IFF_UP) == 0 || 322 (oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 323 (oifp->if_flags & IFF_UP) == 0) { 324 m_freem(m); 325 continue; 326 } 327 328 epair_menq(m, sc); 329 } 330 } 331 332 static int 333 epair_transmit(struct ifnet *ifp, struct mbuf *m) 334 { 335 struct epair_softc *sc; 336 struct ifnet *oifp; 337 #ifdef ALTQ 338 int len; 339 bool mcast; 340 #endif 341 342 if (m == NULL) 343 return (0); 344 M_ASSERTPKTHDR(m); 345 346 /* 347 * We could just transmit this, but it makes testing easier if we're a 348 * little bit more like real hardware. 349 * Allow just that little bit extra for ethernet (and vlan) headers. 350 */ 351 if (m->m_pkthdr.len > (ifp->if_mtu + sizeof(struct ether_vlan_header))) { 352 m_freem(m); 353 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 354 return (E2BIG); 355 } 356 357 /* 358 * We are not going to use the interface en/dequeue mechanism 359 * on the TX side. We are called from ether_output_frame() 360 * and will put the packet into the receive-queue (rxq) of the 361 * other interface (oifp) of our pair. 362 */ 363 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 364 m_freem(m); 365 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 366 return (ENXIO); 367 } 368 if ((ifp->if_flags & IFF_UP) == 0) { 369 m_freem(m); 370 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 371 return (ENETDOWN); 372 } 373 374 BPF_MTAP(ifp, m); 375 376 /* 377 * In case the outgoing interface is not usable, 378 * drop the packet. 379 */ 380 sc = ifp->if_softc; 381 oifp = sc->oifp; 382 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 383 (oifp->if_flags & IFF_UP) == 0) { 384 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 385 m_freem(m); 386 return (0); 387 } 388 389 #ifdef ALTQ 390 len = m->m_pkthdr.len; 391 mcast = (m->m_flags & (M_BCAST | M_MCAST)) != 0; 392 int error = 0; 393 394 /* Support ALTQ via the classic if_start() path. */ 395 IF_LOCK(&ifp->if_snd); 396 if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 397 ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error); 398 if (error) 399 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 400 IF_UNLOCK(&ifp->if_snd); 401 if (!error) { 402 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 403 if (mcast) 404 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 405 epair_start(ifp); 406 } 407 return (error); 408 } 409 IF_UNLOCK(&ifp->if_snd); 410 #endif 411 412 epair_menq(m, oifp->if_softc); 413 return (0); 414 } 415 416 static void 417 epair_qflush(struct ifnet *ifp __unused) 418 { 419 } 420 421 static int 422 epair_media_change(struct ifnet *ifp __unused) 423 { 424 425 /* Do nothing. */ 426 return (0); 427 } 428 429 static void 430 epair_media_status(struct ifnet *ifp __unused, struct ifmediareq *imr) 431 { 432 433 imr->ifm_status = IFM_AVALID | IFM_ACTIVE; 434 imr->ifm_active = IFM_ETHER | IFM_10G_T | IFM_FDX; 435 } 436 437 static int 438 epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 439 { 440 struct epair_softc *sc; 441 struct ifreq *ifr; 442 int error; 443 444 ifr = (struct ifreq *)data; 445 switch (cmd) { 446 case SIOCSIFFLAGS: 447 case SIOCADDMULTI: 448 case SIOCDELMULTI: 449 error = 0; 450 break; 451 452 case SIOCSIFMEDIA: 453 case SIOCGIFMEDIA: 454 sc = ifp->if_softc; 455 error = ifmedia_ioctl(ifp, ifr, &sc->media, cmd); 456 break; 457 458 case SIOCSIFMTU: 459 /* We basically allow all kinds of MTUs. */ 460 ifp->if_mtu = ifr->ifr_mtu; 461 error = 0; 462 break; 463 464 default: 465 /* Let the common ethernet handler process this. */ 466 error = ether_ioctl(ifp, cmd, data); 467 break; 468 } 469 470 return (error); 471 } 472 473 static void 474 epair_init(void *dummy __unused) 475 { 476 } 477 478 /* 479 * Interface cloning functions. 480 * We use our private ones so that we can create/destroy our secondary 481 * device along with the primary one. 482 */ 483 static int 484 epair_clone_match(struct if_clone *ifc, const char *name) 485 { 486 const char *cp; 487 488 /* 489 * Our base name is epair. 490 * Our interfaces will be named epair<n>[ab]. 491 * So accept anything of the following list: 492 * - epair 493 * - epair<n> 494 * but not the epair<n>[ab] versions. 495 */ 496 if (strncmp(epairname, name, sizeof(epairname)-1) != 0) 497 return (0); 498 499 for (cp = name + sizeof(epairname) - 1; *cp != '\0'; cp++) { 500 if (*cp < '0' || *cp > '9') 501 return (0); 502 } 503 504 return (1); 505 } 506 507 static void 508 epair_generate_mac_byname(struct epair_softc *sc, uint8_t eaddr[]) 509 { 510 struct ether_addr gen_eaddr; 511 int i; 512 513 ether_gen_addr_byname(if_name(sc->ifp), &gen_eaddr); 514 for (i = 0; i < ETHER_ADDR_LEN; i++) 515 eaddr[i] = gen_eaddr.octet[i]; 516 } 517 518 static void 519 epair_clone_add(struct if_clone *ifc, struct epair_softc *scb) 520 { 521 struct ifnet *ifp; 522 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ 523 524 ifp = scb->ifp; 525 if (!use_ether_gen_addr) { 526 /* Copy epairNa etheraddr and change the last byte. */ 527 memcpy(eaddr, scb->oifp->if_hw_addr, ETHER_ADDR_LEN); 528 eaddr[5] = 0x0b; 529 } else 530 epair_generate_mac_byname(scb, eaddr); 531 ether_ifattach(ifp, eaddr); 532 533 if_clone_addif(ifc, ifp); 534 } 535 536 static struct epair_softc * 537 epair_alloc_sc(struct if_clone *ifc) 538 { 539 struct epair_softc *sc; 540 541 struct ifnet *ifp = if_alloc(IFT_ETHER); 542 sc = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 543 sc->ifp = ifp; 544 sc->num_queues = epair_tasks.tasks; 545 sc->queues = mallocarray(sc->num_queues, sizeof(struct epair_queue), 546 M_EPAIR, M_WAITOK); 547 for (int i = 0; i < sc->num_queues; i++) { 548 struct epair_queue *q = &sc->queues[i]; 549 q->id = i; 550 q->state = EPAIR_QUEUE_IDLE; 551 mtx_init(&q->mtx, "epairq", NULL, MTX_DEF | MTX_NEW); 552 mbufq_init(&q->q, RXRSIZE); 553 q->sc = sc; 554 NET_TASK_INIT(&q->tx_task, 0, epair_tx_start_deferred, q); 555 } 556 557 /* Initialise pseudo media types. */ 558 ifmedia_init(&sc->media, 0, epair_media_change, epair_media_status); 559 ifmedia_add(&sc->media, IFM_ETHER | IFM_10G_T, 0, NULL); 560 ifmedia_set(&sc->media, IFM_ETHER | IFM_10G_T); 561 562 return (sc); 563 } 564 565 static void 566 epair_setup_ifp(struct epair_softc *sc, char *name, int unit) 567 { 568 struct ifnet *ifp = sc->ifp; 569 570 ifp->if_softc = sc; 571 strlcpy(ifp->if_xname, name, IFNAMSIZ); 572 ifp->if_dname = epairname; 573 ifp->if_dunit = unit; 574 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 575 ifp->if_capabilities = IFCAP_VLAN_MTU; 576 ifp->if_capenable = IFCAP_VLAN_MTU; 577 ifp->if_transmit = epair_transmit; 578 ifp->if_qflush = epair_qflush; 579 ifp->if_start = epair_start; 580 ifp->if_ioctl = epair_ioctl; 581 ifp->if_init = epair_init; 582 if_setsendqlen(ifp, ifqmaxlen); 583 if_setsendqready(ifp); 584 585 ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */ 586 } 587 588 static void 589 epair_generate_mac(struct epair_softc *sc, uint8_t *eaddr) 590 { 591 uint32_t key[3]; 592 uint32_t hash; 593 uint64_t hostid; 594 595 EPAIR_LOCK(); 596 #ifdef SMP 597 /* Get an approximate distribution. */ 598 hash = next_index % mp_ncpus; 599 #else 600 hash = 0; 601 #endif 602 EPAIR_UNLOCK(); 603 604 /* 605 * Calculate the etheraddr hashing the hostid and the 606 * interface index. The result would be hopefully unique. 607 * Note that the "a" component of an epair instance may get moved 608 * to a different VNET after creation. In that case its index 609 * will be freed and the index can get reused by new epair instance. 610 * Make sure we do not create same etheraddr again. 611 */ 612 getcredhostid(curthread->td_ucred, (unsigned long *)&hostid); 613 if (hostid == 0) 614 arc4rand(&hostid, sizeof(hostid), 0); 615 616 struct ifnet *ifp = sc->ifp; 617 EPAIR_LOCK(); 618 if (ifp->if_index > next_index) 619 next_index = ifp->if_index; 620 else 621 next_index++; 622 623 key[0] = (uint32_t)next_index; 624 EPAIR_UNLOCK(); 625 key[1] = (uint32_t)(hostid & 0xffffffff); 626 key[2] = (uint32_t)((hostid >> 32) & 0xfffffffff); 627 hash = jenkins_hash32(key, 3, 0); 628 629 eaddr[0] = 0x02; 630 memcpy(&eaddr[1], &hash, 4); 631 eaddr[5] = 0x0a; 632 } 633 634 static void 635 epair_free_sc(struct epair_softc *sc) 636 { 637 638 if_free(sc->ifp); 639 ifmedia_removeall(&sc->media); 640 for (int i = 0; i < sc->num_queues; i++) { 641 struct epair_queue *q = &sc->queues[i]; 642 mtx_destroy(&q->mtx); 643 } 644 free(sc->queues, M_EPAIR); 645 free(sc, M_EPAIR); 646 } 647 648 static void 649 epair_set_state(struct ifnet *ifp, bool running) 650 { 651 if (running) { 652 ifp->if_drv_flags |= IFF_DRV_RUNNING; 653 if_link_state_change(ifp, LINK_STATE_UP); 654 } else { 655 if_link_state_change(ifp, LINK_STATE_DOWN); 656 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 657 } 658 } 659 660 static int 661 epair_handle_unit(struct if_clone *ifc, char *name, size_t len, int *punit) 662 { 663 int error = 0, unit, wildcard; 664 char *dp; 665 666 /* Try to see if a special unit was requested. */ 667 error = ifc_name2unit(name, &unit); 668 if (error != 0) 669 return (error); 670 wildcard = (unit < 0); 671 672 error = ifc_alloc_unit(ifc, &unit); 673 if (error != 0) 674 return (error); 675 676 /* 677 * If no unit had been given, we need to adjust the ifName. 678 * Also make sure there is space for our extra [ab] suffix. 679 */ 680 for (dp = name; *dp != '\0'; dp++); 681 if (wildcard) { 682 int slen = snprintf(dp, len - (dp - name), "%d", unit); 683 if (slen > len - (dp - name) - 1) { 684 /* ifName too long. */ 685 error = ENOSPC; 686 goto done; 687 } 688 dp += slen; 689 } 690 if (len - (dp - name) - 1 < 1) { 691 /* No space left for our [ab] suffix. */ 692 error = ENOSPC; 693 goto done; 694 } 695 *dp = 'b'; 696 /* Must not change dp so we can replace 'a' by 'b' later. */ 697 *(dp+1) = '\0'; 698 699 /* Check if 'a' and 'b' interfaces already exist. */ 700 if (ifunit(name) != NULL) { 701 error = EEXIST; 702 goto done; 703 } 704 705 *dp = 'a'; 706 if (ifunit(name) != NULL) { 707 error = EEXIST; 708 goto done; 709 } 710 *punit = unit; 711 done: 712 if (error != 0) 713 ifc_free_unit(ifc, unit); 714 715 return (error); 716 } 717 718 static int 719 epair_clone_create(struct if_clone *ifc, char *name, size_t len, 720 struct ifc_data *ifd, struct ifnet **ifpp) 721 { 722 struct epair_softc *sca, *scb; 723 struct ifnet *ifp; 724 char *dp; 725 int error, unit; 726 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ 727 728 error = epair_handle_unit(ifc, name, len, &unit); 729 if (error != 0) 730 return (error); 731 732 /* Allocate memory for both [ab] interfaces */ 733 sca = epair_alloc_sc(ifc); 734 scb = epair_alloc_sc(ifc); 735 736 /* 737 * Cross-reference the interfaces so we will be able to free both. 738 */ 739 sca->oifp = scb->ifp; 740 scb->oifp = sca->ifp; 741 742 /* Finish initialization of interface <n>a. */ 743 ifp = sca->ifp; 744 epair_setup_ifp(sca, name, unit); 745 if (!use_ether_gen_addr) 746 epair_generate_mac(sca, eaddr); 747 else 748 epair_generate_mac_byname(sca, eaddr); 749 750 ether_ifattach(ifp, eaddr); 751 752 /* Swap the name and finish initialization of interface <n>b. */ 753 dp = name + strlen(name) - 1; 754 *dp = 'b'; 755 756 epair_setup_ifp(scb, name, unit); 757 758 ifp = scb->ifp; 759 /* We need to play some tricks here for the second interface. */ 760 strlcpy(name, epairname, len); 761 /* Correctly set the name for the cloner list. */ 762 strlcpy(name, scb->ifp->if_xname, len); 763 764 epair_clone_add(ifc, scb); 765 766 /* 767 * Restore name to <n>a as the ifp for this will go into the 768 * cloner list for the initial call. 769 */ 770 strlcpy(name, sca->ifp->if_xname, len); 771 772 /* Tell the world, that we are ready to rock. */ 773 epair_set_state(sca->ifp, true); 774 epair_set_state(scb->ifp, true); 775 776 *ifpp = sca->ifp; 777 778 return (0); 779 } 780 781 static void 782 epair_drain_rings(struct epair_softc *sc) 783 { 784 for (int i = 0; i < sc->num_queues; i++) { 785 struct epair_queue *q; 786 struct mbuf *m, *n; 787 788 q = &sc->queues[i]; 789 mtx_lock(&q->mtx); 790 m = mbufq_flush(&q->q); 791 mtx_unlock(&q->mtx); 792 793 for (; m != NULL; m = n) { 794 n = m->m_nextpkt; 795 m_freem(m); 796 } 797 } 798 } 799 800 static int 801 epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags) 802 { 803 struct ifnet *oifp; 804 struct epair_softc *sca, *scb; 805 int unit, error; 806 807 /* 808 * In case we called into if_clone_destroyif() ourselves 809 * again to remove the second interface, the softc will be 810 * NULL. In that case so not do anything but return success. 811 */ 812 if (ifp->if_softc == NULL) 813 return (0); 814 815 unit = ifp->if_dunit; 816 sca = ifp->if_softc; 817 oifp = sca->oifp; 818 scb = oifp->if_softc; 819 820 /* Frist get the interfaces down and detached. */ 821 epair_set_state(ifp, false); 822 epair_set_state(oifp, false); 823 824 ether_ifdetach(ifp); 825 ether_ifdetach(oifp); 826 827 /* Third free any queued packets and all the resources. */ 828 CURVNET_SET_QUIET(oifp->if_vnet); 829 epair_drain_rings(scb); 830 oifp->if_softc = NULL; 831 error = if_clone_destroyif(ifc, oifp); 832 if (error) 833 panic("%s: if_clone_destroyif() for our 2nd iface failed: %d", 834 __func__, error); 835 epair_free_sc(scb); 836 CURVNET_RESTORE(); 837 838 epair_drain_rings(sca); 839 epair_free_sc(sca); 840 841 /* Last free the cloner unit. */ 842 ifc_free_unit(ifc, unit); 843 844 return (0); 845 } 846 847 static void 848 vnet_epair_init(const void *unused __unused) 849 { 850 struct if_clone_addreq req = { 851 .match_f = epair_clone_match, 852 .create_f = epair_clone_create, 853 .destroy_f = epair_clone_destroy, 854 }; 855 V_epair_cloner = ifc_attach_cloner(epairname, &req); 856 } 857 VNET_SYSINIT(vnet_epair_init, SI_SUB_PSEUDO, SI_ORDER_ANY, 858 vnet_epair_init, NULL); 859 860 static void 861 vnet_epair_uninit(const void *unused __unused) 862 { 863 864 ifc_detach_cloner(V_epair_cloner); 865 } 866 VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, 867 vnet_epair_uninit, NULL); 868 869 static int 870 epair_mod_init(void) 871 { 872 char name[32]; 873 epair_tasks.tasks = 0; 874 875 #ifdef RSS 876 int cpu; 877 878 CPU_FOREACH(cpu) { 879 cpuset_t cpu_mask; 880 881 /* Pin to this CPU so we get appropriate NUMA allocations. */ 882 thread_lock(curthread); 883 sched_bind(curthread, cpu); 884 thread_unlock(curthread); 885 886 snprintf(name, sizeof(name), "epair_task_%d", cpu); 887 888 epair_tasks.tq[cpu] = taskqueue_create(name, M_WAITOK, 889 taskqueue_thread_enqueue, 890 &epair_tasks.tq[cpu]); 891 CPU_SETOF(cpu, &cpu_mask); 892 taskqueue_start_threads_cpuset(&epair_tasks.tq[cpu], 1, PI_NET, 893 &cpu_mask, "%s", name); 894 895 epair_tasks.tasks++; 896 } 897 thread_lock(curthread); 898 sched_unbind(curthread); 899 thread_unlock(curthread); 900 #else 901 snprintf(name, sizeof(name), "epair_task"); 902 903 epair_tasks.tq[0] = taskqueue_create(name, M_WAITOK, 904 taskqueue_thread_enqueue, 905 &epair_tasks.tq[0]); 906 taskqueue_start_threads(&epair_tasks.tq[0], 1, PI_NET, "%s", name); 907 908 epair_tasks.tasks = 1; 909 #endif 910 911 return (0); 912 } 913 914 static void 915 epair_mod_cleanup(void) 916 { 917 918 for (int i = 0; i < epair_tasks.tasks; i++) { 919 taskqueue_drain_all(epair_tasks.tq[i]); 920 taskqueue_free(epair_tasks.tq[i]); 921 } 922 } 923 924 static int 925 epair_modevent(module_t mod, int type, void *data) 926 { 927 int ret; 928 929 switch (type) { 930 case MOD_LOAD: 931 EPAIR_LOCK_INIT(); 932 ret = epair_mod_init(); 933 if (ret != 0) 934 return (ret); 935 if (bootverbose) 936 printf("%s: %s initialized.\n", __func__, epairname); 937 break; 938 case MOD_UNLOAD: 939 epair_mod_cleanup(); 940 EPAIR_LOCK_DESTROY(); 941 if (bootverbose) 942 printf("%s: %s unloaded.\n", __func__, epairname); 943 break; 944 default: 945 return (EOPNOTSUPP); 946 } 947 return (0); 948 } 949 950 static moduledata_t epair_mod = { 951 "if_epair", 952 epair_modevent, 953 0 954 }; 955 956 DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE); 957 MODULE_VERSION(if_epair, 3); 958