1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2008 The FreeBSD Foundation 5 * Copyright (c) 2009-2021 Bjoern A. Zeeb <bz@FreeBSD.org> 6 * 7 * This software was developed by CK Software GmbH under sponsorship 8 * from the FreeBSD Foundation. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * A pair of virtual back-to-back connected ethernet like interfaces 34 * (``two interfaces with a virtual cross-over cable''). 35 * 36 * This is mostly intended to be used to provide connectivity between 37 * different virtual network stack instances. 38 */ 39 40 #include <sys/cdefs.h> 41 #include "opt_rss.h" 42 #include "opt_inet.h" 43 #include "opt_inet6.h" 44 45 #include <sys/param.h> 46 #include <sys/bus.h> 47 #include <sys/hash.h> 48 #include <sys/interrupt.h> 49 #include <sys/jail.h> 50 #include <sys/kernel.h> 51 #include <sys/libkern.h> 52 #include <sys/malloc.h> 53 #include <sys/mbuf.h> 54 #include <sys/module.h> 55 #include <sys/proc.h> 56 #include <sys/queue.h> 57 #include <sys/sched.h> 58 #include <sys/smp.h> 59 #include <sys/socket.h> 60 #include <sys/sockio.h> 61 #include <sys/sysctl.h> 62 #include <sys/taskqueue.h> 63 64 #include <net/bpf.h> 65 #include <net/ethernet.h> 66 #include <net/if.h> 67 #include <net/if_var.h> 68 #include <net/if_clone.h> 69 #include <net/if_media.h> 70 #include <net/if_private.h> 71 #include <net/if_types.h> 72 #include <net/if_vlan_var.h> 73 #include <net/netisr.h> 74 #ifdef RSS 75 #include <net/rss_config.h> 76 #ifdef INET 77 #include <netinet/in_rss.h> 78 #endif 79 #ifdef INET6 80 #include <netinet6/in6_rss.h> 81 #endif 82 #endif 83 #include <net/vnet.h> 84 85 static const char epairname[] = "epair"; 86 #define RXRSIZE 4096 /* Probably overkill by 4-8x. */ 87 88 static MALLOC_DEFINE(M_EPAIR, epairname, 89 "Pair of virtual cross-over connected Ethernet-like interfaces"); 90 91 VNET_DEFINE_STATIC(struct if_clone *, epair_cloner); 92 #define V_epair_cloner VNET(epair_cloner) 93 94 static unsigned int next_index = 0; 95 #define EPAIR_LOCK_INIT() mtx_init(&epair_n_index_mtx, "epairidx", \ 96 NULL, MTX_DEF) 97 #define EPAIR_LOCK_DESTROY() mtx_destroy(&epair_n_index_mtx) 98 #define EPAIR_LOCK() mtx_lock(&epair_n_index_mtx) 99 #define EPAIR_UNLOCK() mtx_unlock(&epair_n_index_mtx) 100 101 SYSCTL_DECL(_net_link); 102 static SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 103 "Pair of virtual cross-over connected Ethernet-like interfaces"); 104 105 static bool use_ether_gen_addr = true; 106 SYSCTL_BOOL(_net_link_epair, OID_AUTO, ether_gen_addr, CTLFLAG_RWTUN, 107 &use_ether_gen_addr, false, 108 "Generate MAC with FreeBSD OUI using ether_gen_addr(9)"); 109 110 struct epair_softc; 111 struct epair_queue { 112 struct mtx mtx; 113 struct mbufq q; 114 int id; 115 enum { 116 EPAIR_QUEUE_IDLE, 117 EPAIR_QUEUE_WAKING, 118 EPAIR_QUEUE_RUNNING, 119 } state; 120 struct task tx_task; 121 struct epair_softc *sc; 122 }; 123 124 static struct mtx epair_n_index_mtx; 125 struct epair_softc { 126 struct ifnet *ifp; /* This ifp. */ 127 struct ifnet *oifp; /* other ifp of pair. */ 128 int num_queues; 129 struct epair_queue *queues; 130 struct ifmedia media; /* Media config (fake). */ 131 STAILQ_ENTRY(epair_softc) entry; 132 }; 133 134 struct epair_tasks_t { 135 int tasks; 136 struct taskqueue *tq[MAXCPU]; 137 }; 138 139 static struct epair_tasks_t epair_tasks; 140 141 static void 142 epair_clear_mbuf(struct mbuf *m) 143 { 144 M_ASSERTPKTHDR(m); 145 146 /* Remove any CSUM_SND_TAG as ether_input will barf. */ 147 if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) { 148 m_snd_tag_rele(m->m_pkthdr.snd_tag); 149 m->m_pkthdr.snd_tag = NULL; 150 m->m_pkthdr.csum_flags &= ~CSUM_SND_TAG; 151 } 152 153 /* Clear vlan information. */ 154 m->m_flags &= ~M_VLANTAG; 155 m->m_pkthdr.ether_vtag = 0; 156 157 m_tag_delete_nonpersistent(m); 158 } 159 160 static void 161 epair_tx_start_deferred(void *arg, int pending) 162 { 163 struct epair_queue *q = (struct epair_queue *)arg; 164 if_t ifp; 165 struct mbuf *m, *n; 166 bool resched; 167 168 ifp = q->sc->ifp; 169 170 if_ref(ifp); 171 CURVNET_SET(ifp->if_vnet); 172 173 mtx_lock(&q->mtx); 174 m = mbufq_flush(&q->q); 175 q->state = EPAIR_QUEUE_RUNNING; 176 mtx_unlock(&q->mtx); 177 178 while (m != NULL) { 179 n = STAILQ_NEXT(m, m_stailqpkt); 180 m->m_nextpkt = NULL; 181 if_input(ifp, m); 182 m = n; 183 } 184 185 /* 186 * Avoid flushing the queue more than once per task. We can otherwise 187 * end up starving ourselves in a multi-epair routing configuration. 188 */ 189 mtx_lock(&q->mtx); 190 if (!mbufq_empty(&q->q)) { 191 resched = true; 192 q->state = EPAIR_QUEUE_WAKING; 193 } else { 194 resched = false; 195 q->state = EPAIR_QUEUE_IDLE; 196 } 197 mtx_unlock(&q->mtx); 198 199 if (resched) 200 taskqueue_enqueue(epair_tasks.tq[q->id], &q->tx_task); 201 202 CURVNET_RESTORE(); 203 if_rele(ifp); 204 } 205 206 static struct epair_queue * 207 epair_select_queue(struct epair_softc *sc, struct mbuf *m) 208 { 209 uint32_t bucket; 210 #ifdef RSS 211 struct ether_header *eh; 212 int ret; 213 214 ret = rss_m2bucket(m, &bucket); 215 if (ret) { 216 /* Actually hash the packet. */ 217 eh = mtod(m, struct ether_header *); 218 219 switch (ntohs(eh->ether_type)) { 220 #ifdef INET 221 case ETHERTYPE_IP: 222 rss_soft_m2cpuid_v4(m, 0, &bucket); 223 break; 224 #endif 225 #ifdef INET6 226 case ETHERTYPE_IPV6: 227 rss_soft_m2cpuid_v6(m, 0, &bucket); 228 break; 229 #endif 230 default: 231 bucket = 0; 232 break; 233 } 234 } 235 bucket %= sc->num_queues; 236 #else 237 bucket = 0; 238 #endif 239 return (&sc->queues[bucket]); 240 } 241 242 static void 243 epair_prepare_mbuf(struct mbuf *m, struct ifnet *src_ifp) 244 { 245 M_ASSERTPKTHDR(m); 246 epair_clear_mbuf(m); 247 if_setrcvif(m, src_ifp); 248 M_SETFIB(m, src_ifp->if_fib); 249 250 MPASS(m->m_nextpkt == NULL); 251 MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); 252 } 253 254 static void 255 epair_menq(struct mbuf *m, struct epair_softc *osc) 256 { 257 struct epair_queue *q; 258 struct ifnet *ifp, *oifp; 259 int error, len; 260 bool mcast; 261 262 /* 263 * I know this looks weird. We pass the "other sc" as we need that one 264 * and can get both ifps from it as well. 265 */ 266 oifp = osc->ifp; 267 ifp = osc->oifp; 268 269 epair_prepare_mbuf(m, oifp); 270 271 /* Save values as once the mbuf is queued, it's not ours anymore. */ 272 len = m->m_pkthdr.len; 273 mcast = (m->m_flags & (M_BCAST | M_MCAST)) != 0; 274 275 q = epair_select_queue(osc, m); 276 277 mtx_lock(&q->mtx); 278 if (q->state == EPAIR_QUEUE_IDLE) { 279 q->state = EPAIR_QUEUE_WAKING; 280 taskqueue_enqueue(epair_tasks.tq[q->id], &q->tx_task); 281 } 282 error = mbufq_enqueue(&q->q, m); 283 mtx_unlock(&q->mtx); 284 285 if (error != 0) { 286 m_freem(m); 287 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 288 } else { 289 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 290 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 291 if (mcast) 292 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 293 if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1); 294 } 295 } 296 297 static void 298 epair_start(struct ifnet *ifp) 299 { 300 struct mbuf *m; 301 struct epair_softc *sc; 302 struct ifnet *oifp; 303 304 /* 305 * We get packets here from ether_output via if_handoff() 306 * and need to put them into the input queue of the oifp 307 * and will put the packet into the receive-queue (rxq) of the 308 * other interface (oifp) of our pair. 309 */ 310 sc = ifp->if_softc; 311 oifp = sc->oifp; 312 sc = oifp->if_softc; 313 for (;;) { 314 IFQ_DEQUEUE(&ifp->if_snd, m); 315 if (m == NULL) 316 break; 317 M_ASSERTPKTHDR(m); 318 BPF_MTAP(ifp, m); 319 320 /* In case either interface is not usable drop the packet. */ 321 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 322 (ifp->if_flags & IFF_UP) == 0 || 323 (oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 324 (oifp->if_flags & IFF_UP) == 0) { 325 m_freem(m); 326 continue; 327 } 328 329 epair_menq(m, sc); 330 } 331 } 332 333 static int 334 epair_transmit(struct ifnet *ifp, struct mbuf *m) 335 { 336 struct epair_softc *sc; 337 struct ifnet *oifp; 338 #ifdef ALTQ 339 int len; 340 bool mcast; 341 #endif 342 343 if (m == NULL) 344 return (0); 345 M_ASSERTPKTHDR(m); 346 347 /* 348 * We could just transmit this, but it makes testing easier if we're a 349 * little bit more like real hardware. 350 * Allow just that little bit extra for ethernet (and vlan) headers. 351 */ 352 if (m->m_pkthdr.len > (ifp->if_mtu + sizeof(struct ether_vlan_header))) { 353 m_freem(m); 354 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 355 return (E2BIG); 356 } 357 358 /* 359 * We are not going to use the interface en/dequeue mechanism 360 * on the TX side. We are called from ether_output_frame() 361 * and will put the packet into the receive-queue (rxq) of the 362 * other interface (oifp) of our pair. 363 */ 364 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 365 m_freem(m); 366 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 367 return (ENXIO); 368 } 369 if ((ifp->if_flags & IFF_UP) == 0) { 370 m_freem(m); 371 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 372 return (ENETDOWN); 373 } 374 375 BPF_MTAP(ifp, m); 376 377 /* 378 * In case the outgoing interface is not usable, 379 * drop the packet. 380 */ 381 sc = ifp->if_softc; 382 oifp = sc->oifp; 383 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 384 (oifp->if_flags & IFF_UP) == 0) { 385 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 386 m_freem(m); 387 return (0); 388 } 389 390 #ifdef ALTQ 391 len = m->m_pkthdr.len; 392 mcast = (m->m_flags & (M_BCAST | M_MCAST)) != 0; 393 int error = 0; 394 395 /* Support ALTQ via the classic if_start() path. */ 396 IF_LOCK(&ifp->if_snd); 397 if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 398 ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error); 399 if (error) 400 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 401 IF_UNLOCK(&ifp->if_snd); 402 if (!error) { 403 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 404 if (mcast) 405 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 406 epair_start(ifp); 407 } 408 return (error); 409 } 410 IF_UNLOCK(&ifp->if_snd); 411 #endif 412 413 epair_menq(m, oifp->if_softc); 414 return (0); 415 } 416 417 static void 418 epair_qflush(struct ifnet *ifp __unused) 419 { 420 } 421 422 static int 423 epair_media_change(struct ifnet *ifp __unused) 424 { 425 426 /* Do nothing. */ 427 return (0); 428 } 429 430 static void 431 epair_media_status(struct ifnet *ifp __unused, struct ifmediareq *imr) 432 { 433 434 imr->ifm_status = IFM_AVALID | IFM_ACTIVE; 435 imr->ifm_active = IFM_ETHER | IFM_10G_T | IFM_FDX; 436 } 437 438 /* 439 * Update ifp->if_hwassist according to the current value of ifp->if_capenable. 440 */ 441 static void 442 epair_caps_changed(struct ifnet *ifp) 443 { 444 uint64_t hwassist = 0; 445 446 if (ifp->if_capenable & IFCAP_TXCSUM) 447 hwassist |= CSUM_IP_TCP | CSUM_IP_UDP; 448 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) 449 hwassist |= CSUM_IP6_TCP | CSUM_IP6_UDP; 450 ifp->if_hwassist = hwassist; 451 } 452 453 static int 454 epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 455 { 456 struct epair_softc *sc; 457 struct ifreq *ifr; 458 int error; 459 460 ifr = (struct ifreq *)data; 461 switch (cmd) { 462 case SIOCSIFFLAGS: 463 case SIOCADDMULTI: 464 case SIOCDELMULTI: 465 error = 0; 466 break; 467 468 case SIOCSIFMEDIA: 469 case SIOCGIFMEDIA: 470 sc = ifp->if_softc; 471 error = ifmedia_ioctl(ifp, ifr, &sc->media, cmd); 472 break; 473 474 case SIOCSIFMTU: 475 /* We basically allow all kinds of MTUs. */ 476 ifp->if_mtu = ifr->ifr_mtu; 477 error = 0; 478 break; 479 480 case SIOCGIFCAP: 481 ifr->ifr_reqcap = ifp->if_capabilities; 482 ifr->ifr_curcap = ifp->if_capenable; 483 error = 0; 484 break; 485 case SIOCSIFCAP: 486 /* 487 * Enable/disable capabilities as requested, besides 488 * IFCAP_RXCSUM(_IPV6), which always remain enabled. 489 * Incoming packets may have the mbuf flag CSUM_DATA_VALID set. 490 * Without IFCAP_RXCSUM(_IPV6), this flag would have to be 491 * removed, which does not seem helpful. 492 */ 493 ifp->if_capenable = ifr->ifr_reqcap | IFCAP_RXCSUM | 494 IFCAP_RXCSUM_IPV6; 495 epair_caps_changed(ifp); 496 /* 497 * If IFCAP_TXCSUM(_IPV6) has been changed, change it on the 498 * other epair interface as well. 499 * A bridge disables IFCAP_TXCSUM(_IPV6) when adding one epair 500 * interface if another interface in the bridge has it disabled. 501 * In that case this capability needs to be disabled on the 502 * other epair interface to avoid sending packets in the bridge 503 * that rely on this capability. 504 */ 505 sc = ifp->if_softc; 506 if ((ifp->if_capenable ^ sc->oifp->if_capenable) & 507 (IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6)) { 508 sc->oifp->if_capenable &= 509 ~(IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6); 510 sc->oifp->if_capenable |= ifp->if_capenable & 511 (IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6); 512 epair_caps_changed(sc->oifp); 513 } 514 VLAN_CAPABILITIES(ifp); 515 error = 0; 516 break; 517 518 default: 519 /* Let the common ethernet handler process this. */ 520 error = ether_ioctl(ifp, cmd, data); 521 break; 522 } 523 524 return (error); 525 } 526 527 static void 528 epair_init(void *dummy __unused) 529 { 530 } 531 532 /* 533 * Interface cloning functions. 534 * We use our private ones so that we can create/destroy our secondary 535 * device along with the primary one. 536 */ 537 static int 538 epair_clone_match(struct if_clone *ifc, const char *name) 539 { 540 const char *cp; 541 542 /* 543 * Our base name is epair. 544 * Our interfaces will be named epair<n>[ab]. 545 * So accept anything of the following list: 546 * - epair 547 * - epair<n> 548 * but not the epair<n>[ab] versions. 549 */ 550 if (strncmp(epairname, name, sizeof(epairname)-1) != 0) 551 return (0); 552 553 for (cp = name + sizeof(epairname) - 1; *cp != '\0'; cp++) { 554 if (*cp < '0' || *cp > '9') 555 return (0); 556 } 557 558 return (1); 559 } 560 561 static void 562 epair_generate_mac_byname(struct epair_softc *sc, uint8_t eaddr[]) 563 { 564 struct ether_addr gen_eaddr; 565 int i; 566 567 ether_gen_addr_byname(if_name(sc->ifp), &gen_eaddr); 568 for (i = 0; i < ETHER_ADDR_LEN; i++) 569 eaddr[i] = gen_eaddr.octet[i]; 570 } 571 572 static void 573 epair_clone_add(struct if_clone *ifc, struct epair_softc *scb) 574 { 575 struct ifnet *ifp; 576 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ 577 578 ifp = scb->ifp; 579 if (!use_ether_gen_addr) { 580 /* Copy epairNa etheraddr and change the last byte. */ 581 memcpy(eaddr, scb->oifp->if_hw_addr, ETHER_ADDR_LEN); 582 eaddr[5] = 0x0b; 583 } else 584 epair_generate_mac_byname(scb, eaddr); 585 ether_ifattach(ifp, eaddr); 586 587 if_clone_addif(ifc, ifp); 588 } 589 590 static struct epair_softc * 591 epair_alloc_sc(struct if_clone *ifc) 592 { 593 struct epair_softc *sc; 594 595 struct ifnet *ifp = if_alloc(IFT_ETHER); 596 sc = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 597 sc->ifp = ifp; 598 sc->num_queues = epair_tasks.tasks; 599 sc->queues = mallocarray(sc->num_queues, sizeof(struct epair_queue), 600 M_EPAIR, M_WAITOK); 601 for (int i = 0; i < sc->num_queues; i++) { 602 struct epair_queue *q = &sc->queues[i]; 603 q->id = i; 604 q->state = EPAIR_QUEUE_IDLE; 605 mtx_init(&q->mtx, "epairq", NULL, MTX_DEF | MTX_NEW); 606 mbufq_init(&q->q, RXRSIZE); 607 q->sc = sc; 608 NET_TASK_INIT(&q->tx_task, 0, epair_tx_start_deferred, q); 609 } 610 611 /* Initialise pseudo media types. */ 612 ifmedia_init(&sc->media, 0, epair_media_change, epair_media_status); 613 ifmedia_add(&sc->media, IFM_ETHER | IFM_10G_T, 0, NULL); 614 ifmedia_set(&sc->media, IFM_ETHER | IFM_10G_T); 615 616 return (sc); 617 } 618 619 static void 620 epair_setup_ifp(struct epair_softc *sc, char *name, int unit) 621 { 622 struct ifnet *ifp = sc->ifp; 623 624 ifp->if_softc = sc; 625 strlcpy(ifp->if_xname, name, IFNAMSIZ); 626 ifp->if_dname = epairname; 627 ifp->if_dunit = unit; 628 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 629 ifp->if_capabilities = IFCAP_VLAN_MTU | IFCAP_TXCSUM | 630 IFCAP_TXCSUM_IPV6 | IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6; 631 ifp->if_capenable = IFCAP_VLAN_MTU | IFCAP_TXCSUM | 632 IFCAP_TXCSUM_IPV6 | IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6; 633 epair_caps_changed(ifp); 634 ifp->if_transmit = epair_transmit; 635 ifp->if_qflush = epair_qflush; 636 ifp->if_start = epair_start; 637 ifp->if_ioctl = epair_ioctl; 638 ifp->if_init = epair_init; 639 if_setsendqlen(ifp, ifqmaxlen); 640 if_setsendqready(ifp); 641 642 ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */ 643 } 644 645 static void 646 epair_generate_mac(struct epair_softc *sc, uint8_t *eaddr) 647 { 648 uint32_t key[3]; 649 uint32_t hash; 650 uint64_t hostid; 651 652 EPAIR_LOCK(); 653 #ifdef SMP 654 /* Get an approximate distribution. */ 655 hash = next_index % mp_ncpus; 656 #else 657 hash = 0; 658 #endif 659 EPAIR_UNLOCK(); 660 661 /* 662 * Calculate the etheraddr hashing the hostid and the 663 * interface index. The result would be hopefully unique. 664 * Note that the "a" component of an epair instance may get moved 665 * to a different VNET after creation. In that case its index 666 * will be freed and the index can get reused by new epair instance. 667 * Make sure we do not create same etheraddr again. 668 */ 669 getcredhostid(curthread->td_ucred, (unsigned long *)&hostid); 670 if (hostid == 0) 671 arc4rand(&hostid, sizeof(hostid), 0); 672 673 struct ifnet *ifp = sc->ifp; 674 EPAIR_LOCK(); 675 if (ifp->if_index > next_index) 676 next_index = ifp->if_index; 677 else 678 next_index++; 679 680 key[0] = (uint32_t)next_index; 681 EPAIR_UNLOCK(); 682 key[1] = (uint32_t)(hostid & 0xffffffff); 683 key[2] = (uint32_t)((hostid >> 32) & 0xfffffffff); 684 hash = jenkins_hash32(key, 3, 0); 685 686 eaddr[0] = 0x02; 687 memcpy(&eaddr[1], &hash, 4); 688 eaddr[5] = 0x0a; 689 } 690 691 static void 692 epair_free_sc(struct epair_softc *sc) 693 { 694 695 if_free(sc->ifp); 696 ifmedia_removeall(&sc->media); 697 for (int i = 0; i < sc->num_queues; i++) { 698 struct epair_queue *q = &sc->queues[i]; 699 mtx_destroy(&q->mtx); 700 } 701 free(sc->queues, M_EPAIR); 702 free(sc, M_EPAIR); 703 } 704 705 static void 706 epair_set_state(struct ifnet *ifp, bool running) 707 { 708 if (running) { 709 ifp->if_drv_flags |= IFF_DRV_RUNNING; 710 if_link_state_change(ifp, LINK_STATE_UP); 711 } else { 712 if_link_state_change(ifp, LINK_STATE_DOWN); 713 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 714 } 715 } 716 717 static int 718 epair_handle_unit(struct if_clone *ifc, char *name, size_t len, int *punit) 719 { 720 int error = 0, unit, wildcard; 721 char *dp; 722 723 /* Try to see if a special unit was requested. */ 724 error = ifc_name2unit(name, &unit); 725 if (error != 0) 726 return (error); 727 wildcard = (unit < 0); 728 729 error = ifc_alloc_unit(ifc, &unit); 730 if (error != 0) 731 return (error); 732 733 /* 734 * If no unit had been given, we need to adjust the ifName. 735 * Also make sure there is space for our extra [ab] suffix. 736 */ 737 for (dp = name; *dp != '\0'; dp++); 738 if (wildcard) { 739 int slen = snprintf(dp, len - (dp - name), "%d", unit); 740 if (slen > len - (dp - name) - 1) { 741 /* ifName too long. */ 742 error = ENOSPC; 743 goto done; 744 } 745 dp += slen; 746 } 747 if (len - (dp - name) - 1 < 1) { 748 /* No space left for our [ab] suffix. */ 749 error = ENOSPC; 750 goto done; 751 } 752 *dp = 'b'; 753 /* Must not change dp so we can replace 'a' by 'b' later. */ 754 *(dp+1) = '\0'; 755 756 /* Check if 'a' and 'b' interfaces already exist. */ 757 if (ifunit(name) != NULL) { 758 error = EEXIST; 759 goto done; 760 } 761 762 *dp = 'a'; 763 if (ifunit(name) != NULL) { 764 error = EEXIST; 765 goto done; 766 } 767 *punit = unit; 768 done: 769 if (error != 0) 770 ifc_free_unit(ifc, unit); 771 772 return (error); 773 } 774 775 static int 776 epair_clone_create(struct if_clone *ifc, char *name, size_t len, 777 struct ifc_data *ifd, struct ifnet **ifpp) 778 { 779 struct epair_softc *sca, *scb; 780 struct ifnet *ifp; 781 char *dp; 782 int error, unit; 783 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ 784 785 error = epair_handle_unit(ifc, name, len, &unit); 786 if (error != 0) 787 return (error); 788 789 /* Allocate memory for both [ab] interfaces */ 790 sca = epair_alloc_sc(ifc); 791 scb = epair_alloc_sc(ifc); 792 793 /* 794 * Cross-reference the interfaces so we will be able to free both. 795 */ 796 sca->oifp = scb->ifp; 797 scb->oifp = sca->ifp; 798 799 /* Finish initialization of interface <n>a. */ 800 ifp = sca->ifp; 801 epair_setup_ifp(sca, name, unit); 802 if (!use_ether_gen_addr) 803 epair_generate_mac(sca, eaddr); 804 else 805 epair_generate_mac_byname(sca, eaddr); 806 807 ether_ifattach(ifp, eaddr); 808 809 /* Swap the name and finish initialization of interface <n>b. */ 810 dp = name + strlen(name) - 1; 811 *dp = 'b'; 812 813 epair_setup_ifp(scb, name, unit); 814 815 ifp = scb->ifp; 816 /* We need to play some tricks here for the second interface. */ 817 strlcpy(name, epairname, len); 818 /* Correctly set the name for the cloner list. */ 819 strlcpy(name, scb->ifp->if_xname, len); 820 821 epair_clone_add(ifc, scb); 822 823 /* 824 * Restore name to <n>a as the ifp for this will go into the 825 * cloner list for the initial call. 826 */ 827 strlcpy(name, sca->ifp->if_xname, len); 828 829 /* Tell the world, that we are ready to rock. */ 830 epair_set_state(sca->ifp, true); 831 epair_set_state(scb->ifp, true); 832 833 *ifpp = sca->ifp; 834 835 return (0); 836 } 837 838 static void 839 epair_drain_rings(struct epair_softc *sc) 840 { 841 for (int i = 0; i < sc->num_queues; i++) { 842 struct epair_queue *q; 843 struct mbuf *m, *n; 844 845 q = &sc->queues[i]; 846 mtx_lock(&q->mtx); 847 m = mbufq_flush(&q->q); 848 mtx_unlock(&q->mtx); 849 850 for (; m != NULL; m = n) { 851 n = m->m_nextpkt; 852 m_freem(m); 853 } 854 } 855 } 856 857 static int 858 epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags) 859 { 860 struct ifnet *oifp; 861 struct epair_softc *sca, *scb; 862 int unit, error; 863 864 /* 865 * In case we called into if_clone_destroyif() ourselves 866 * again to remove the second interface, the softc will be 867 * NULL. In that case so not do anything but return success. 868 */ 869 if (ifp->if_softc == NULL) 870 return (0); 871 872 unit = ifp->if_dunit; 873 sca = ifp->if_softc; 874 oifp = sca->oifp; 875 scb = oifp->if_softc; 876 877 /* Frist get the interfaces down and detached. */ 878 epair_set_state(ifp, false); 879 epair_set_state(oifp, false); 880 881 ether_ifdetach(ifp); 882 ether_ifdetach(oifp); 883 884 /* Third free any queued packets and all the resources. */ 885 CURVNET_SET_QUIET(oifp->if_vnet); 886 epair_drain_rings(scb); 887 oifp->if_softc = NULL; 888 error = if_clone_destroyif(ifc, oifp); 889 if (error) 890 panic("%s: if_clone_destroyif() for our 2nd iface failed: %d", 891 __func__, error); 892 epair_free_sc(scb); 893 CURVNET_RESTORE(); 894 895 epair_drain_rings(sca); 896 epair_free_sc(sca); 897 898 /* Last free the cloner unit. */ 899 ifc_free_unit(ifc, unit); 900 901 return (0); 902 } 903 904 static void 905 vnet_epair_init(const void *unused __unused) 906 { 907 struct if_clone_addreq req = { 908 .match_f = epair_clone_match, 909 .create_f = epair_clone_create, 910 .destroy_f = epair_clone_destroy, 911 }; 912 V_epair_cloner = ifc_attach_cloner(epairname, &req); 913 } 914 VNET_SYSINIT(vnet_epair_init, SI_SUB_PSEUDO, SI_ORDER_ANY, 915 vnet_epair_init, NULL); 916 917 static void 918 vnet_epair_uninit(const void *unused __unused) 919 { 920 921 ifc_detach_cloner(V_epair_cloner); 922 } 923 VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, 924 vnet_epair_uninit, NULL); 925 926 static int 927 epair_mod_init(void) 928 { 929 char name[32]; 930 epair_tasks.tasks = 0; 931 932 #ifdef RSS 933 int cpu; 934 935 CPU_FOREACH(cpu) { 936 cpuset_t cpu_mask; 937 938 /* Pin to this CPU so we get appropriate NUMA allocations. */ 939 thread_lock(curthread); 940 sched_bind(curthread, cpu); 941 thread_unlock(curthread); 942 943 snprintf(name, sizeof(name), "epair_task_%d", cpu); 944 945 epair_tasks.tq[cpu] = taskqueue_create(name, M_WAITOK, 946 taskqueue_thread_enqueue, 947 &epair_tasks.tq[cpu]); 948 CPU_SETOF(cpu, &cpu_mask); 949 taskqueue_start_threads_cpuset(&epair_tasks.tq[cpu], 1, PI_NET, 950 &cpu_mask, "%s", name); 951 952 epair_tasks.tasks++; 953 } 954 thread_lock(curthread); 955 sched_unbind(curthread); 956 thread_unlock(curthread); 957 #else 958 snprintf(name, sizeof(name), "epair_task"); 959 960 epair_tasks.tq[0] = taskqueue_create(name, M_WAITOK, 961 taskqueue_thread_enqueue, 962 &epair_tasks.tq[0]); 963 taskqueue_start_threads(&epair_tasks.tq[0], 1, PI_NET, "%s", name); 964 965 epair_tasks.tasks = 1; 966 #endif 967 968 return (0); 969 } 970 971 static void 972 epair_mod_cleanup(void) 973 { 974 975 for (int i = 0; i < epair_tasks.tasks; i++) { 976 taskqueue_drain_all(epair_tasks.tq[i]); 977 taskqueue_free(epair_tasks.tq[i]); 978 } 979 } 980 981 static int 982 epair_modevent(module_t mod, int type, void *data) 983 { 984 int ret; 985 986 switch (type) { 987 case MOD_LOAD: 988 EPAIR_LOCK_INIT(); 989 ret = epair_mod_init(); 990 if (ret != 0) 991 return (ret); 992 if (bootverbose) 993 printf("%s: %s initialized.\n", __func__, epairname); 994 break; 995 case MOD_UNLOAD: 996 epair_mod_cleanup(); 997 EPAIR_LOCK_DESTROY(); 998 if (bootverbose) 999 printf("%s: %s unloaded.\n", __func__, epairname); 1000 break; 1001 default: 1002 return (EOPNOTSUPP); 1003 } 1004 return (0); 1005 } 1006 1007 static moduledata_t epair_mod = { 1008 "if_epair", 1009 epair_modevent, 1010 0 1011 }; 1012 1013 DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE); 1014 MODULE_VERSION(if_epair, 3); 1015