1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2008 The FreeBSD Foundation 5 * Copyright (c) 2009-2021 Bjoern A. Zeeb <bz@FreeBSD.org> 6 * 7 * This software was developed by CK Software GmbH under sponsorship 8 * from the FreeBSD Foundation. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * A pair of virtual back-to-back connected ethernet like interfaces 34 * (``two interfaces with a virtual cross-over cable''). 35 * 36 * This is mostly intended to be used to provide connectivity between 37 * different virtual network stack instances. 38 */ 39 40 #include <sys/cdefs.h> 41 __FBSDID("$FreeBSD$"); 42 43 #include "opt_rss.h" 44 #include "opt_inet.h" 45 #include "opt_inet6.h" 46 47 #include <sys/param.h> 48 #include <sys/hash.h> 49 #include <sys/jail.h> 50 #include <sys/kernel.h> 51 #include <sys/libkern.h> 52 #include <sys/malloc.h> 53 #include <sys/mbuf.h> 54 #include <sys/module.h> 55 #include <sys/proc.h> 56 #include <sys/queue.h> 57 #include <sys/sched.h> 58 #include <sys/smp.h> 59 #include <sys/socket.h> 60 #include <sys/sockio.h> 61 #include <sys/taskqueue.h> 62 #include <sys/types.h> 63 #include <sys/buf_ring.h> 64 #include <sys/bus.h> 65 #include <sys/interrupt.h> 66 67 #include <net/bpf.h> 68 #include <net/ethernet.h> 69 #include <net/if.h> 70 #include <net/if_var.h> 71 #include <net/if_clone.h> 72 #include <net/if_media.h> 73 #include <net/if_var.h> 74 #include <net/if_types.h> 75 #include <net/netisr.h> 76 #ifdef RSS 77 #include <net/rss_config.h> 78 #ifdef INET 79 #include <netinet/in_rss.h> 80 #endif 81 #ifdef INET6 82 #include <netinet6/in6_rss.h> 83 #endif 84 #endif 85 #include <net/vnet.h> 86 87 static const char epairname[] = "epair"; 88 #define RXRSIZE 4096 /* Probably overkill by 4-8x. */ 89 90 static MALLOC_DEFINE(M_EPAIR, epairname, 91 "Pair of virtual cross-over connected Ethernet-like interfaces"); 92 93 VNET_DEFINE_STATIC(struct if_clone *, epair_cloner); 94 #define V_epair_cloner VNET(epair_cloner) 95 96 static unsigned int next_index = 0; 97 #define EPAIR_LOCK_INIT() mtx_init(&epair_n_index_mtx, "epairidx", \ 98 NULL, MTX_DEF) 99 #define EPAIR_LOCK_DESTROY() mtx_destroy(&epair_n_index_mtx) 100 #define EPAIR_LOCK() mtx_lock(&epair_n_index_mtx) 101 #define EPAIR_UNLOCK() mtx_unlock(&epair_n_index_mtx) 102 103 #define BIT_QUEUE_TASK 0 104 #define BIT_MBUF_QUEUED 1 105 106 struct epair_softc; 107 struct epair_queue { 108 int id; 109 struct buf_ring *rxring[2]; 110 volatile int ridx; /* 0 || 1 */ 111 volatile long state; /* taskqueue coordination */ 112 struct task tx_task; 113 struct epair_softc *sc; 114 }; 115 116 static struct mtx epair_n_index_mtx; 117 struct epair_softc { 118 struct ifnet *ifp; /* This ifp. */ 119 struct ifnet *oifp; /* other ifp of pair. */ 120 int num_queues; 121 struct epair_queue *queues; 122 struct ifmedia media; /* Media config (fake). */ 123 STAILQ_ENTRY(epair_softc) entry; 124 }; 125 126 struct epair_tasks_t { 127 int tasks; 128 struct taskqueue *tq[MAXCPU]; 129 }; 130 131 static struct epair_tasks_t epair_tasks; 132 133 static void 134 epair_clear_mbuf(struct mbuf *m) 135 { 136 /* Remove any CSUM_SND_TAG as ether_input will barf. */ 137 if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) { 138 m_snd_tag_rele(m->m_pkthdr.snd_tag); 139 m->m_pkthdr.snd_tag = NULL; 140 m->m_pkthdr.csum_flags &= ~CSUM_SND_TAG; 141 } 142 143 m_tag_delete_nonpersistent(m); 144 } 145 146 static void 147 epair_if_input(struct epair_softc *sc, struct epair_queue *q, int ridx) 148 { 149 struct ifnet *ifp; 150 struct mbuf *m; 151 152 ifp = sc->ifp; 153 CURVNET_SET(ifp->if_vnet); 154 while (! buf_ring_empty(q->rxring[ridx])) { 155 m = buf_ring_dequeue_mc(q->rxring[ridx]); 156 if (m == NULL) 157 continue; 158 159 MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); 160 (*ifp->if_input)(ifp, m); 161 162 } 163 CURVNET_RESTORE(); 164 } 165 166 static void 167 epair_tx_start_deferred(void *arg, int pending) 168 { 169 struct epair_queue *q = (struct epair_queue *)arg; 170 struct epair_softc *sc = q->sc; 171 int ridx, nidx; 172 173 if_ref(sc->ifp); 174 ridx = atomic_load_int(&q->ridx); 175 do { 176 nidx = (ridx == 0) ? 1 : 0; 177 } while (!atomic_fcmpset_int(&q->ridx, &ridx, nidx)); 178 epair_if_input(sc, q, ridx); 179 180 atomic_clear_long(&q->state, (1 << BIT_QUEUE_TASK)); 181 if (atomic_testandclear_long(&q->state, BIT_MBUF_QUEUED)) 182 taskqueue_enqueue(epair_tasks.tq[q->id], &q->tx_task); 183 184 if_rele(sc->ifp); 185 } 186 187 static struct epair_queue * 188 epair_select_queue(struct epair_softc *sc, const struct mbuf *m) 189 { 190 uint32_t bucket; 191 #ifdef RSS 192 struct ether_header *eh; 193 194 ret = rss_m2bucket(m, &bucket); 195 if (ret) { 196 /* Actually hash the packet. */ 197 eh = mtod(m, struct ether_header *); 198 199 switch (ntohs(eh->ether_type)) { 200 #ifdef INET 201 case ETHERTYPE_IP: 202 rss_soft_m2cpuid_v4(m, 0, &bucket); 203 break; 204 #endif 205 #ifdef INET6 206 case ETHERTYPE_IPV6: 207 rss_soft_m2cpuid_v6(m, 0, &bucket); 208 break; 209 #endif 210 default: 211 bucket = 0; 212 break; 213 } 214 } 215 bucket %= sc->num_queues; 216 #else 217 bucket = 0; 218 #endif 219 return (&sc->queues[bucket]); 220 } 221 222 static void 223 epair_prepare_mbuf(struct mbuf *m, struct ifnet *src_ifp) 224 { 225 M_ASSERTPKTHDR(m); 226 epair_clear_mbuf(m); 227 if_setrcvif(m, src_ifp); 228 M_SETFIB(m, src_ifp->if_fib); 229 230 MPASS(m->m_nextpkt == NULL); 231 MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); 232 } 233 234 static void 235 epair_menq(struct mbuf *m, struct epair_softc *osc) 236 { 237 struct ifnet *ifp, *oifp; 238 int len, ret; 239 int ridx; 240 short mflags; 241 242 /* 243 * I know this looks weird. We pass the "other sc" as we need that one 244 * and can get both ifps from it as well. 245 */ 246 oifp = osc->ifp; 247 ifp = osc->oifp; 248 249 epair_prepare_mbuf(m, oifp); 250 251 /* Save values as once the mbuf is queued, it's not ours anymore. */ 252 len = m->m_pkthdr.len; 253 mflags = m->m_flags; 254 255 struct epair_queue *q = epair_select_queue(osc, m); 256 257 atomic_set_long(&q->state, (1 << BIT_MBUF_QUEUED)); 258 ridx = atomic_load_int(&q->ridx); 259 ret = buf_ring_enqueue(q->rxring[ridx], m); 260 if (ret != 0) { 261 /* Ring is full. */ 262 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 263 m_freem(m); 264 return; 265 } 266 267 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 268 /* 269 * IFQ_HANDOFF_ADJ/ip_handoff() update statistics, 270 * but as we bypass all this we have to duplicate 271 * the logic another time. 272 */ 273 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 274 if (mflags & (M_BCAST|M_MCAST)) 275 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 276 /* Someone else received the packet. */ 277 if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1); 278 279 if (!atomic_testandset_long(&q->state, BIT_QUEUE_TASK)) 280 taskqueue_enqueue(epair_tasks.tq[q->id], &q->tx_task); 281 } 282 283 static void 284 epair_start(struct ifnet *ifp) 285 { 286 struct mbuf *m; 287 struct epair_softc *sc; 288 struct ifnet *oifp; 289 290 /* 291 * We get packets here from ether_output via if_handoff() 292 * and need to put them into the input queue of the oifp 293 * and will put the packet into the receive-queue (rxq) of the 294 * other interface (oifp) of our pair. 295 */ 296 sc = ifp->if_softc; 297 oifp = sc->oifp; 298 sc = oifp->if_softc; 299 for (;;) { 300 IFQ_DEQUEUE(&ifp->if_snd, m); 301 if (m == NULL) 302 break; 303 M_ASSERTPKTHDR(m); 304 BPF_MTAP(ifp, m); 305 306 /* In case either interface is not usable drop the packet. */ 307 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 308 (ifp->if_flags & IFF_UP) == 0 || 309 (oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 310 (oifp->if_flags & IFF_UP) == 0) { 311 m_freem(m); 312 continue; 313 } 314 315 epair_menq(m, sc); 316 } 317 } 318 319 static int 320 epair_transmit(struct ifnet *ifp, struct mbuf *m) 321 { 322 struct epair_softc *sc; 323 struct ifnet *oifp; 324 #ifdef ALTQ 325 int len; 326 short mflags; 327 #endif 328 329 if (m == NULL) 330 return (0); 331 M_ASSERTPKTHDR(m); 332 333 /* 334 * We are not going to use the interface en/dequeue mechanism 335 * on the TX side. We are called from ether_output_frame() 336 * and will put the packet into the receive-queue (rxq) of the 337 * other interface (oifp) of our pair. 338 */ 339 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 340 m_freem(m); 341 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 342 return (ENXIO); 343 } 344 if ((ifp->if_flags & IFF_UP) == 0) { 345 m_freem(m); 346 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 347 return (ENETDOWN); 348 } 349 350 BPF_MTAP(ifp, m); 351 352 /* 353 * In case the outgoing interface is not usable, 354 * drop the packet. 355 */ 356 sc = ifp->if_softc; 357 oifp = sc->oifp; 358 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 359 (oifp->if_flags & IFF_UP) == 0) { 360 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 361 m_freem(m); 362 return (0); 363 } 364 365 #ifdef ALTQ 366 len = m->m_pkthdr.len; 367 mflags = m->m_flags; 368 int error = 0; 369 370 /* Support ALTQ via the classic if_start() path. */ 371 IF_LOCK(&ifp->if_snd); 372 if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 373 ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error); 374 if (error) 375 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 376 IF_UNLOCK(&ifp->if_snd); 377 if (!error) { 378 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 379 if (mflags & (M_BCAST|M_MCAST)) 380 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 381 epair_start(ifp); 382 } 383 return (error); 384 } 385 IF_UNLOCK(&ifp->if_snd); 386 #endif 387 388 epair_menq(m, oifp->if_softc); 389 return (0); 390 } 391 392 static void 393 epair_qflush(struct ifnet *ifp __unused) 394 { 395 } 396 397 static int 398 epair_media_change(struct ifnet *ifp __unused) 399 { 400 401 /* Do nothing. */ 402 return (0); 403 } 404 405 static void 406 epair_media_status(struct ifnet *ifp __unused, struct ifmediareq *imr) 407 { 408 409 imr->ifm_status = IFM_AVALID | IFM_ACTIVE; 410 imr->ifm_active = IFM_ETHER | IFM_10G_T | IFM_FDX; 411 } 412 413 static int 414 epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 415 { 416 struct epair_softc *sc; 417 struct ifreq *ifr; 418 int error; 419 420 ifr = (struct ifreq *)data; 421 switch (cmd) { 422 case SIOCSIFFLAGS: 423 case SIOCADDMULTI: 424 case SIOCDELMULTI: 425 error = 0; 426 break; 427 428 case SIOCSIFMEDIA: 429 case SIOCGIFMEDIA: 430 sc = ifp->if_softc; 431 error = ifmedia_ioctl(ifp, ifr, &sc->media, cmd); 432 break; 433 434 case SIOCSIFMTU: 435 /* We basically allow all kinds of MTUs. */ 436 ifp->if_mtu = ifr->ifr_mtu; 437 error = 0; 438 break; 439 440 default: 441 /* Let the common ethernet handler process this. */ 442 error = ether_ioctl(ifp, cmd, data); 443 break; 444 } 445 446 return (error); 447 } 448 449 static void 450 epair_init(void *dummy __unused) 451 { 452 } 453 454 /* 455 * Interface cloning functions. 456 * We use our private ones so that we can create/destroy our secondary 457 * device along with the primary one. 458 */ 459 static int 460 epair_clone_match(struct if_clone *ifc, const char *name) 461 { 462 const char *cp; 463 464 /* 465 * Our base name is epair. 466 * Our interfaces will be named epair<n>[ab]. 467 * So accept anything of the following list: 468 * - epair 469 * - epair<n> 470 * but not the epair<n>[ab] versions. 471 */ 472 if (strncmp(epairname, name, sizeof(epairname)-1) != 0) 473 return (0); 474 475 for (cp = name + sizeof(epairname) - 1; *cp != '\0'; cp++) { 476 if (*cp < '0' || *cp > '9') 477 return (0); 478 } 479 480 return (1); 481 } 482 483 static void 484 epair_clone_add(struct if_clone *ifc, struct epair_softc *scb) 485 { 486 struct ifnet *ifp; 487 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ 488 489 ifp = scb->ifp; 490 /* Copy epairNa etheraddr and change the last byte. */ 491 memcpy(eaddr, scb->oifp->if_hw_addr, ETHER_ADDR_LEN); 492 eaddr[5] = 0x0b; 493 ether_ifattach(ifp, eaddr); 494 495 if_clone_addif(ifc, ifp); 496 } 497 498 static struct epair_softc * 499 epair_alloc_sc(struct if_clone *ifc) 500 { 501 struct epair_softc *sc; 502 503 struct ifnet *ifp = if_alloc(IFT_ETHER); 504 if (ifp == NULL) 505 return (NULL); 506 507 sc = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 508 sc->ifp = ifp; 509 sc->num_queues = epair_tasks.tasks; 510 sc->queues = mallocarray(sc->num_queues, sizeof(struct epair_queue), 511 M_EPAIR, M_WAITOK); 512 for (int i = 0; i < sc->num_queues; i++) { 513 struct epair_queue *q = &sc->queues[i]; 514 q->id = i; 515 q->rxring[0] = buf_ring_alloc(RXRSIZE, M_EPAIR, M_WAITOK, NULL); 516 q->rxring[1] = buf_ring_alloc(RXRSIZE, M_EPAIR, M_WAITOK, NULL); 517 q->ridx = 0; 518 q->state = 0; 519 q->sc = sc; 520 NET_TASK_INIT(&q->tx_task, 0, epair_tx_start_deferred, q); 521 } 522 523 /* Initialise pseudo media types. */ 524 ifmedia_init(&sc->media, 0, epair_media_change, epair_media_status); 525 ifmedia_add(&sc->media, IFM_ETHER | IFM_10G_T, 0, NULL); 526 ifmedia_set(&sc->media, IFM_ETHER | IFM_10G_T); 527 528 return (sc); 529 } 530 531 static void 532 epair_setup_ifp(struct epair_softc *sc, char *name, int unit) 533 { 534 struct ifnet *ifp = sc->ifp; 535 536 ifp->if_softc = sc; 537 strlcpy(ifp->if_xname, name, IFNAMSIZ); 538 ifp->if_dname = epairname; 539 ifp->if_dunit = unit; 540 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 541 ifp->if_flags |= IFF_KNOWSEPOCH; 542 ifp->if_capabilities = IFCAP_VLAN_MTU; 543 ifp->if_capenable = IFCAP_VLAN_MTU; 544 ifp->if_transmit = epair_transmit; 545 ifp->if_qflush = epair_qflush; 546 ifp->if_start = epair_start; 547 ifp->if_ioctl = epair_ioctl; 548 ifp->if_init = epair_init; 549 if_setsendqlen(ifp, ifqmaxlen); 550 if_setsendqready(ifp); 551 552 ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */ 553 } 554 555 static void 556 epair_generate_mac(struct epair_softc *sc, uint8_t *eaddr) 557 { 558 uint32_t key[3]; 559 uint32_t hash; 560 uint64_t hostid; 561 562 EPAIR_LOCK(); 563 #ifdef SMP 564 /* Get an approximate distribution. */ 565 hash = next_index % mp_ncpus; 566 #else 567 hash = 0; 568 #endif 569 EPAIR_UNLOCK(); 570 571 /* 572 * Calculate the etheraddr hashing the hostid and the 573 * interface index. The result would be hopefully unique. 574 * Note that the "a" component of an epair instance may get moved 575 * to a different VNET after creation. In that case its index 576 * will be freed and the index can get reused by new epair instance. 577 * Make sure we do not create same etheraddr again. 578 */ 579 getcredhostid(curthread->td_ucred, (unsigned long *)&hostid); 580 if (hostid == 0) 581 arc4rand(&hostid, sizeof(hostid), 0); 582 583 struct ifnet *ifp = sc->ifp; 584 EPAIR_LOCK(); 585 if (ifp->if_index > next_index) 586 next_index = ifp->if_index; 587 else 588 next_index++; 589 590 key[0] = (uint32_t)next_index; 591 EPAIR_UNLOCK(); 592 key[1] = (uint32_t)(hostid & 0xffffffff); 593 key[2] = (uint32_t)((hostid >> 32) & 0xfffffffff); 594 hash = jenkins_hash32(key, 3, 0); 595 596 eaddr[0] = 0x02; 597 memcpy(&eaddr[1], &hash, 4); 598 eaddr[5] = 0x0a; 599 } 600 601 static void 602 epair_free_sc(struct epair_softc *sc) 603 { 604 if (sc == NULL) 605 return; 606 607 if_free(sc->ifp); 608 ifmedia_removeall(&sc->media); 609 for (int i = 0; i < sc->num_queues; i++) { 610 struct epair_queue *q = &sc->queues[i]; 611 buf_ring_free(q->rxring[0], M_EPAIR); 612 buf_ring_free(q->rxring[1], M_EPAIR); 613 } 614 free(sc->queues, M_EPAIR); 615 free(sc, M_EPAIR); 616 } 617 618 static void 619 epair_set_state(struct ifnet *ifp, bool running) 620 { 621 if (running) { 622 ifp->if_drv_flags |= IFF_DRV_RUNNING; 623 if_link_state_change(ifp, LINK_STATE_UP); 624 } else { 625 if_link_state_change(ifp, LINK_STATE_DOWN); 626 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 627 } 628 } 629 630 static int 631 epair_handle_unit(struct if_clone *ifc, char *name, size_t len, int *punit) 632 { 633 int error = 0, unit, wildcard; 634 char *dp; 635 636 /* Try to see if a special unit was requested. */ 637 error = ifc_name2unit(name, &unit); 638 if (error != 0) 639 return (error); 640 wildcard = (unit < 0); 641 642 error = ifc_alloc_unit(ifc, &unit); 643 if (error != 0) 644 return (error); 645 646 /* 647 * If no unit had been given, we need to adjust the ifName. 648 * Also make sure there is space for our extra [ab] suffix. 649 */ 650 for (dp = name; *dp != '\0'; dp++); 651 if (wildcard) { 652 int slen = snprintf(dp, len - (dp - name), "%d", unit); 653 if (slen > len - (dp - name) - 1) { 654 /* ifName too long. */ 655 error = ENOSPC; 656 goto done; 657 } 658 dp += slen; 659 } 660 if (len - (dp - name) - 1 < 1) { 661 /* No space left for our [ab] suffix. */ 662 error = ENOSPC; 663 goto done; 664 } 665 *dp = 'b'; 666 /* Must not change dp so we can replace 'a' by 'b' later. */ 667 *(dp+1) = '\0'; 668 669 /* Check if 'a' and 'b' interfaces already exist. */ 670 if (ifunit(name) != NULL) { 671 error = EEXIST; 672 goto done; 673 } 674 675 *dp = 'a'; 676 if (ifunit(name) != NULL) { 677 error = EEXIST; 678 goto done; 679 } 680 *punit = unit; 681 done: 682 if (error != 0) 683 ifc_free_unit(ifc, unit); 684 685 return (error); 686 } 687 688 static int 689 epair_clone_create(struct if_clone *ifc, char *name, size_t len, 690 struct ifc_data *ifd, struct ifnet **ifpp) 691 { 692 struct epair_softc *sca, *scb; 693 struct ifnet *ifp; 694 char *dp; 695 int error, unit; 696 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ 697 698 error = epair_handle_unit(ifc, name, len, &unit); 699 if (error != 0) 700 return (error); 701 702 /* Allocate memory for both [ab] interfaces */ 703 sca = epair_alloc_sc(ifc); 704 scb = epair_alloc_sc(ifc); 705 if (sca == NULL || scb == NULL) { 706 epair_free_sc(sca); 707 epair_free_sc(scb); 708 ifc_free_unit(ifc, unit); 709 return (ENOSPC); 710 } 711 712 /* 713 * Cross-reference the interfaces so we will be able to free both. 714 */ 715 sca->oifp = scb->ifp; 716 scb->oifp = sca->ifp; 717 718 /* Finish initialization of interface <n>a. */ 719 ifp = sca->ifp; 720 epair_setup_ifp(sca, name, unit); 721 epair_generate_mac(sca, eaddr); 722 723 ether_ifattach(ifp, eaddr); 724 725 /* Swap the name and finish initialization of interface <n>b. */ 726 dp = name + strlen(name) - 1; 727 *dp = 'b'; 728 729 epair_setup_ifp(scb, name, unit); 730 731 ifp = scb->ifp; 732 /* We need to play some tricks here for the second interface. */ 733 strlcpy(name, epairname, len); 734 /* Correctly set the name for the cloner list. */ 735 strlcpy(name, scb->ifp->if_xname, len); 736 737 epair_clone_add(ifc, scb); 738 739 /* 740 * Restore name to <n>a as the ifp for this will go into the 741 * cloner list for the initial call. 742 */ 743 strlcpy(name, sca->ifp->if_xname, len); 744 745 /* Tell the world, that we are ready to rock. */ 746 epair_set_state(sca->ifp, true); 747 epair_set_state(scb->ifp, true); 748 749 *ifpp = sca->ifp; 750 751 return (0); 752 } 753 754 static void 755 epair_drain_rings(struct epair_softc *sc) 756 { 757 int ridx; 758 struct mbuf *m; 759 760 for (ridx = 0; ridx < 2; ridx++) { 761 for (int i = 0; i < sc->num_queues; i++) { 762 struct epair_queue *q = &sc->queues[i]; 763 do { 764 m = buf_ring_dequeue_sc(q->rxring[ridx]); 765 if (m == NULL) 766 break; 767 m_freem(m); 768 } while (1); 769 } 770 } 771 } 772 773 static int 774 epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags) 775 { 776 struct ifnet *oifp; 777 struct epair_softc *sca, *scb; 778 int unit, error; 779 780 /* 781 * In case we called into if_clone_destroyif() ourselves 782 * again to remove the second interface, the softc will be 783 * NULL. In that case so not do anything but return success. 784 */ 785 if (ifp->if_softc == NULL) 786 return (0); 787 788 unit = ifp->if_dunit; 789 sca = ifp->if_softc; 790 oifp = sca->oifp; 791 scb = oifp->if_softc; 792 793 /* Frist get the interfaces down and detached. */ 794 epair_set_state(ifp, false); 795 epair_set_state(oifp, false); 796 797 ether_ifdetach(ifp); 798 ether_ifdetach(oifp); 799 800 /* Third free any queued packets and all the resources. */ 801 CURVNET_SET_QUIET(oifp->if_vnet); 802 epair_drain_rings(scb); 803 oifp->if_softc = NULL; 804 error = if_clone_destroyif(ifc, oifp); 805 if (error) 806 panic("%s: if_clone_destroyif() for our 2nd iface failed: %d", 807 __func__, error); 808 epair_free_sc(scb); 809 CURVNET_RESTORE(); 810 811 epair_drain_rings(sca); 812 epair_free_sc(sca); 813 814 /* Last free the cloner unit. */ 815 ifc_free_unit(ifc, unit); 816 817 return (0); 818 } 819 820 static void 821 vnet_epair_init(const void *unused __unused) 822 { 823 struct if_clone_addreq req = { 824 .match_f = epair_clone_match, 825 .create_f = epair_clone_create, 826 .destroy_f = epair_clone_destroy, 827 }; 828 V_epair_cloner = ifc_attach_cloner(epairname, &req); 829 } 830 VNET_SYSINIT(vnet_epair_init, SI_SUB_PSEUDO, SI_ORDER_ANY, 831 vnet_epair_init, NULL); 832 833 static void 834 vnet_epair_uninit(const void *unused __unused) 835 { 836 837 ifc_detach_cloner(V_epair_cloner); 838 } 839 VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, 840 vnet_epair_uninit, NULL); 841 842 static int 843 epair_mod_init(void) 844 { 845 char name[32]; 846 epair_tasks.tasks = 0; 847 848 #ifdef RSS 849 int cpu; 850 851 CPU_FOREACH(cpu) { 852 cpuset_t cpu_mask; 853 854 /* Pin to this CPU so we get appropriate NUMA allocations. */ 855 thread_lock(curthread); 856 sched_bind(curthread, cpu); 857 thread_unlock(curthread); 858 859 snprintf(name, sizeof(name), "epair_task_%d", cpu); 860 861 epair_tasks.tq[cpu] = taskqueue_create(name, M_WAITOK, 862 taskqueue_thread_enqueue, 863 &epair_tasks.tq[cpu]); 864 CPU_SETOF(cpu, &cpu_mask); 865 taskqueue_start_threads_cpuset(&epair_tasks.tq[cpu], 1, PI_NET, 866 &cpu_mask, "%s", name); 867 868 epair_tasks.tasks++; 869 } 870 thread_lock(curthread); 871 sched_unbind(curthread); 872 thread_unlock(curthread); 873 #else 874 snprintf(name, sizeof(name), "epair_task"); 875 876 epair_tasks.tq[0] = taskqueue_create(name, M_WAITOK, 877 taskqueue_thread_enqueue, 878 &epair_tasks.tq[0]); 879 taskqueue_start_threads(&epair_tasks.tq[0], 1, PI_NET, "%s", name); 880 881 epair_tasks.tasks = 1; 882 #endif 883 884 return (0); 885 } 886 887 static void 888 epair_mod_cleanup(void) 889 { 890 891 for (int i = 0; i < epair_tasks.tasks; i++) { 892 taskqueue_drain_all(epair_tasks.tq[i]); 893 taskqueue_free(epair_tasks.tq[i]); 894 } 895 } 896 897 static int 898 epair_modevent(module_t mod, int type, void *data) 899 { 900 int ret; 901 902 switch (type) { 903 case MOD_LOAD: 904 EPAIR_LOCK_INIT(); 905 ret = epair_mod_init(); 906 if (ret != 0) 907 return (ret); 908 if (bootverbose) 909 printf("%s: %s initialized.\n", __func__, epairname); 910 break; 911 case MOD_UNLOAD: 912 epair_mod_cleanup(); 913 EPAIR_LOCK_DESTROY(); 914 if (bootverbose) 915 printf("%s: %s unloaded.\n", __func__, epairname); 916 break; 917 default: 918 return (EOPNOTSUPP); 919 } 920 return (0); 921 } 922 923 static moduledata_t epair_mod = { 924 "if_epair", 925 epair_modevent, 926 0 927 }; 928 929 DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE); 930 MODULE_VERSION(if_epair, 3); 931