1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2008 The FreeBSD Foundation 5 * Copyright (c) 2009-2021 Bjoern A. Zeeb <bz@FreeBSD.org> 6 * 7 * This software was developed by CK Software GmbH under sponsorship 8 * from the FreeBSD Foundation. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * A pair of virtual back-to-back connected ethernet like interfaces 34 * (``two interfaces with a virtual cross-over cable''). 35 * 36 * This is mostly intended to be used to provide connectivity between 37 * different virtual network stack instances. 38 */ 39 40 #include <sys/cdefs.h> 41 __FBSDID("$FreeBSD$"); 42 43 #include "opt_rss.h" 44 #include "opt_inet.h" 45 #include "opt_inet6.h" 46 47 #include <sys/param.h> 48 #include <sys/hash.h> 49 #include <sys/jail.h> 50 #include <sys/kernel.h> 51 #include <sys/libkern.h> 52 #include <sys/malloc.h> 53 #include <sys/mbuf.h> 54 #include <sys/module.h> 55 #include <sys/proc.h> 56 #include <sys/queue.h> 57 #include <sys/sched.h> 58 #include <sys/smp.h> 59 #include <sys/socket.h> 60 #include <sys/sockio.h> 61 #include <sys/taskqueue.h> 62 #include <sys/types.h> 63 #include <sys/buf_ring.h> 64 #include <sys/bus.h> 65 #include <sys/interrupt.h> 66 67 #include <net/bpf.h> 68 #include <net/ethernet.h> 69 #include <net/if.h> 70 #include <net/if_var.h> 71 #include <net/if_clone.h> 72 #include <net/if_media.h> 73 #include <net/if_var.h> 74 #include <net/if_types.h> 75 #include <net/netisr.h> 76 #ifdef RSS 77 #include <net/rss_config.h> 78 #ifdef INET 79 #include <netinet/in_rss.h> 80 #endif 81 #ifdef INET6 82 #include <netinet6/in6_rss.h> 83 #endif 84 #endif 85 #include <net/vnet.h> 86 87 static int epair_clone_match(struct if_clone *, const char *); 88 static int epair_clone_create(struct if_clone *, char *, size_t, caddr_t); 89 static int epair_clone_destroy(struct if_clone *, struct ifnet *); 90 91 static const char epairname[] = "epair"; 92 #define RXRSIZE 4096 /* Probably overkill by 4-8x. */ 93 94 static MALLOC_DEFINE(M_EPAIR, epairname, 95 "Pair of virtual cross-over connected Ethernet-like interfaces"); 96 97 VNET_DEFINE_STATIC(struct if_clone *, epair_cloner); 98 #define V_epair_cloner VNET(epair_cloner) 99 100 static unsigned int next_index = 0; 101 #define EPAIR_LOCK_INIT() mtx_init(&epair_n_index_mtx, "epairidx", \ 102 NULL, MTX_DEF) 103 #define EPAIR_LOCK_DESTROY() mtx_destroy(&epair_n_index_mtx) 104 #define EPAIR_LOCK() mtx_lock(&epair_n_index_mtx) 105 #define EPAIR_UNLOCK() mtx_unlock(&epair_n_index_mtx) 106 107 struct epair_softc; 108 struct epair_queue { 109 int id; 110 struct buf_ring *rxring[2]; 111 volatile int ridx; /* 0 || 1 */ 112 struct task tx_task; 113 struct epair_softc *sc; 114 }; 115 116 static struct mtx epair_n_index_mtx; 117 struct epair_softc { 118 struct ifnet *ifp; /* This ifp. */ 119 struct ifnet *oifp; /* other ifp of pair. */ 120 int num_queues; 121 struct epair_queue *queues; 122 struct ifmedia media; /* Media config (fake). */ 123 STAILQ_ENTRY(epair_softc) entry; 124 }; 125 126 struct epair_tasks_t { 127 int tasks; 128 struct taskqueue *tq[MAXCPU]; 129 }; 130 131 static struct epair_tasks_t epair_tasks; 132 133 static void 134 epair_clear_mbuf(struct mbuf *m) 135 { 136 /* Remove any CSUM_SND_TAG as ether_input will barf. */ 137 if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) { 138 m_snd_tag_rele(m->m_pkthdr.snd_tag); 139 m->m_pkthdr.snd_tag = NULL; 140 m->m_pkthdr.csum_flags &= ~CSUM_SND_TAG; 141 } 142 143 m_tag_delete_nonpersistent(m); 144 } 145 146 static void 147 epair_if_input(struct epair_softc *sc, struct epair_queue *q, int ridx) 148 { 149 struct ifnet *ifp; 150 struct mbuf *m; 151 152 ifp = sc->ifp; 153 CURVNET_SET(ifp->if_vnet); 154 while (! buf_ring_empty(q->rxring[ridx])) { 155 m = buf_ring_dequeue_mc(q->rxring[ridx]); 156 if (m == NULL) 157 continue; 158 159 MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); 160 (*ifp->if_input)(ifp, m); 161 162 } 163 CURVNET_RESTORE(); 164 } 165 166 static void 167 epair_tx_start_deferred(void *arg, int pending) 168 { 169 struct epair_queue *q = (struct epair_queue *)arg; 170 struct epair_softc *sc = q->sc; 171 int ridx, nidx; 172 173 if_ref(sc->ifp); 174 ridx = atomic_load_int(&q->ridx); 175 do { 176 nidx = (ridx == 0) ? 1 : 0; 177 } while (!atomic_fcmpset_int(&q->ridx, &ridx, nidx)); 178 epair_if_input(sc, q, ridx); 179 180 if (! buf_ring_empty(q->rxring[nidx])) 181 taskqueue_enqueue(epair_tasks.tq[q->id], &q->tx_task); 182 183 if_rele(sc->ifp); 184 } 185 186 static int 187 epair_menq(struct mbuf *m, struct epair_softc *osc) 188 { 189 struct ifnet *ifp, *oifp; 190 int len, ret; 191 int ridx; 192 short mflags; 193 struct epair_queue *q = NULL; 194 uint32_t bucket; 195 bool was_empty; 196 #ifdef RSS 197 struct ether_header *eh; 198 #endif 199 200 /* 201 * I know this looks weird. We pass the "other sc" as we need that one 202 * and can get both ifps from it as well. 203 */ 204 oifp = osc->ifp; 205 ifp = osc->oifp; 206 207 M_ASSERTPKTHDR(m); 208 epair_clear_mbuf(m); 209 if_setrcvif(m, oifp); 210 M_SETFIB(m, oifp->if_fib); 211 212 /* Save values as once the mbuf is queued, it's not ours anymore. */ 213 len = m->m_pkthdr.len; 214 mflags = m->m_flags; 215 216 MPASS(m->m_nextpkt == NULL); 217 MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); 218 219 #ifdef RSS 220 ret = rss_m2bucket(m, &bucket); 221 if (ret) { 222 /* Actually hash the packet. */ 223 eh = mtod(m, struct ether_header *); 224 225 switch (ntohs(eh->ether_type)) { 226 #ifdef INET 227 case ETHERTYPE_IP: 228 rss_soft_m2cpuid_v4(m, 0, &bucket); 229 break; 230 #endif 231 #ifdef INET6 232 case ETHERTYPE_IPV6: 233 rss_soft_m2cpuid_v6(m, 0, &bucket); 234 break; 235 #endif 236 default: 237 bucket = 0; 238 break; 239 } 240 } 241 bucket %= osc->num_queues; 242 #else 243 bucket = 0; 244 #endif 245 q = &osc->queues[bucket]; 246 247 ridx = atomic_load_int(&q->ridx); 248 was_empty = buf_ring_empty(q->rxring[ridx]); 249 ret = buf_ring_enqueue(q->rxring[ridx], m); 250 if (ret != 0) { 251 /* Ring is full. */ 252 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 253 m_freem(m); 254 goto done; 255 } 256 257 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 258 /* 259 * IFQ_HANDOFF_ADJ/ip_handoff() update statistics, 260 * but as we bypass all this we have to duplicate 261 * the logic another time. 262 */ 263 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 264 if (mflags & (M_BCAST|M_MCAST)) 265 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 266 /* Someone else received the packet. */ 267 if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1); 268 269 done: 270 if (was_empty) 271 taskqueue_enqueue(epair_tasks.tq[bucket], &q->tx_task); 272 273 return (0); 274 } 275 276 static void 277 epair_start(struct ifnet *ifp) 278 { 279 struct mbuf *m; 280 struct epair_softc *sc; 281 struct ifnet *oifp; 282 283 /* 284 * We get packets here from ether_output via if_handoff() 285 * and need to put them into the input queue of the oifp 286 * and will put the packet into the receive-queue (rxq) of the 287 * other interface (oifp) of our pair. 288 */ 289 sc = ifp->if_softc; 290 oifp = sc->oifp; 291 sc = oifp->if_softc; 292 for (;;) { 293 IFQ_DEQUEUE(&ifp->if_snd, m); 294 if (m == NULL) 295 break; 296 M_ASSERTPKTHDR(m); 297 BPF_MTAP(ifp, m); 298 299 /* In case either interface is not usable drop the packet. */ 300 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 301 (ifp->if_flags & IFF_UP) == 0 || 302 (oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 303 (oifp->if_flags & IFF_UP) == 0) { 304 m_freem(m); 305 continue; 306 } 307 308 (void) epair_menq(m, sc); 309 } 310 } 311 312 static int 313 epair_transmit(struct ifnet *ifp, struct mbuf *m) 314 { 315 struct epair_softc *sc; 316 struct ifnet *oifp; 317 int error; 318 #ifdef ALTQ 319 int len; 320 short mflags; 321 #endif 322 323 if (m == NULL) 324 return (0); 325 M_ASSERTPKTHDR(m); 326 327 /* 328 * We are not going to use the interface en/dequeue mechanism 329 * on the TX side. We are called from ether_output_frame() 330 * and will put the packet into the receive-queue (rxq) of the 331 * other interface (oifp) of our pair. 332 */ 333 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 334 m_freem(m); 335 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 336 return (ENXIO); 337 } 338 if ((ifp->if_flags & IFF_UP) == 0) { 339 m_freem(m); 340 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 341 return (ENETDOWN); 342 } 343 344 BPF_MTAP(ifp, m); 345 346 /* 347 * In case the outgoing interface is not usable, 348 * drop the packet. 349 */ 350 sc = ifp->if_softc; 351 oifp = sc->oifp; 352 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 353 (oifp->if_flags & IFF_UP) == 0) { 354 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 355 m_freem(m); 356 return (0); 357 } 358 359 #ifdef ALTQ 360 len = m->m_pkthdr.len; 361 mflags = m->m_flags; 362 363 /* Support ALTQ via the classic if_start() path. */ 364 IF_LOCK(&ifp->if_snd); 365 if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 366 ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error); 367 if (error) 368 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 369 IF_UNLOCK(&ifp->if_snd); 370 if (!error) { 371 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 372 if (mflags & (M_BCAST|M_MCAST)) 373 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 374 epair_start(ifp); 375 } 376 return (error); 377 } 378 IF_UNLOCK(&ifp->if_snd); 379 #endif 380 381 error = epair_menq(m, oifp->if_softc); 382 return (error); 383 } 384 385 static int 386 epair_media_change(struct ifnet *ifp __unused) 387 { 388 389 /* Do nothing. */ 390 return (0); 391 } 392 393 static void 394 epair_media_status(struct ifnet *ifp __unused, struct ifmediareq *imr) 395 { 396 397 imr->ifm_status = IFM_AVALID | IFM_ACTIVE; 398 imr->ifm_active = IFM_ETHER | IFM_10G_T | IFM_FDX; 399 } 400 401 static int 402 epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 403 { 404 struct epair_softc *sc; 405 struct ifreq *ifr; 406 int error; 407 408 ifr = (struct ifreq *)data; 409 switch (cmd) { 410 case SIOCSIFFLAGS: 411 case SIOCADDMULTI: 412 case SIOCDELMULTI: 413 error = 0; 414 break; 415 416 case SIOCSIFMEDIA: 417 case SIOCGIFMEDIA: 418 sc = ifp->if_softc; 419 error = ifmedia_ioctl(ifp, ifr, &sc->media, cmd); 420 break; 421 422 case SIOCSIFMTU: 423 /* We basically allow all kinds of MTUs. */ 424 ifp->if_mtu = ifr->ifr_mtu; 425 error = 0; 426 break; 427 428 default: 429 /* Let the common ethernet handler process this. */ 430 error = ether_ioctl(ifp, cmd, data); 431 break; 432 } 433 434 return (error); 435 } 436 437 static void 438 epair_init(void *dummy __unused) 439 { 440 } 441 442 /* 443 * Interface cloning functions. 444 * We use our private ones so that we can create/destroy our secondary 445 * device along with the primary one. 446 */ 447 static int 448 epair_clone_match(struct if_clone *ifc, const char *name) 449 { 450 const char *cp; 451 452 /* 453 * Our base name is epair. 454 * Our interfaces will be named epair<n>[ab]. 455 * So accept anything of the following list: 456 * - epair 457 * - epair<n> 458 * but not the epair<n>[ab] versions. 459 */ 460 if (strncmp(epairname, name, sizeof(epairname)-1) != 0) 461 return (0); 462 463 for (cp = name + sizeof(epairname) - 1; *cp != '\0'; cp++) { 464 if (*cp < '0' || *cp > '9') 465 return (0); 466 } 467 468 return (1); 469 } 470 471 static void 472 epair_clone_add(struct if_clone *ifc, struct epair_softc *scb) 473 { 474 struct ifnet *ifp; 475 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ 476 477 ifp = scb->ifp; 478 /* Copy epairNa etheraddr and change the last byte. */ 479 memcpy(eaddr, scb->oifp->if_hw_addr, ETHER_ADDR_LEN); 480 eaddr[5] = 0x0b; 481 ether_ifattach(ifp, eaddr); 482 483 if_clone_addif(ifc, ifp); 484 } 485 486 static int 487 epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) 488 { 489 struct epair_softc *sca, *scb; 490 struct ifnet *ifp; 491 char *dp; 492 int error, unit, wildcard; 493 uint64_t hostid; 494 uint32_t key[3]; 495 uint32_t hash; 496 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ 497 498 /* Try to see if a special unit was requested. */ 499 error = ifc_name2unit(name, &unit); 500 if (error != 0) 501 return (error); 502 wildcard = (unit < 0); 503 504 error = ifc_alloc_unit(ifc, &unit); 505 if (error != 0) 506 return (error); 507 508 /* 509 * If no unit had been given, we need to adjust the ifName. 510 * Also make sure there is space for our extra [ab] suffix. 511 */ 512 for (dp = name; *dp != '\0'; dp++); 513 if (wildcard) { 514 error = snprintf(dp, len - (dp - name), "%d", unit); 515 if (error > len - (dp - name) - 1) { 516 /* ifName too long. */ 517 ifc_free_unit(ifc, unit); 518 return (ENOSPC); 519 } 520 dp += error; 521 } 522 if (len - (dp - name) - 1 < 1) { 523 /* No space left for our [ab] suffix. */ 524 ifc_free_unit(ifc, unit); 525 return (ENOSPC); 526 } 527 *dp = 'b'; 528 /* Must not change dp so we can replace 'a' by 'b' later. */ 529 *(dp+1) = '\0'; 530 531 /* Check if 'a' and 'b' interfaces already exist. */ 532 if (ifunit(name) != NULL) 533 return (EEXIST); 534 *dp = 'a'; 535 if (ifunit(name) != NULL) 536 return (EEXIST); 537 538 /* Allocate memory for both [ab] interfaces */ 539 sca = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 540 sca->ifp = if_alloc(IFT_ETHER); 541 sca->num_queues = epair_tasks.tasks; 542 if (sca->ifp == NULL) { 543 free(sca, M_EPAIR); 544 ifc_free_unit(ifc, unit); 545 return (ENOSPC); 546 } 547 sca->queues = mallocarray(sca->num_queues, sizeof(struct epair_queue), 548 M_EPAIR, M_WAITOK); 549 for (int i = 0; i < sca->num_queues; i++) { 550 struct epair_queue *q = &sca->queues[i]; 551 q->id = i; 552 q->rxring[0] = buf_ring_alloc(RXRSIZE, M_EPAIR, M_WAITOK, NULL); 553 q->rxring[1] = buf_ring_alloc(RXRSIZE, M_EPAIR, M_WAITOK, NULL); 554 q->ridx = 0; 555 q->sc = sca; 556 NET_TASK_INIT(&q->tx_task, 0, epair_tx_start_deferred, q); 557 } 558 559 scb = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 560 scb->ifp = if_alloc(IFT_ETHER); 561 scb->num_queues = epair_tasks.tasks; 562 if (scb->ifp == NULL) { 563 free(scb, M_EPAIR); 564 if_free(sca->ifp); 565 free(sca, M_EPAIR); 566 ifc_free_unit(ifc, unit); 567 return (ENOSPC); 568 } 569 scb->queues = mallocarray(scb->num_queues, sizeof(struct epair_queue), 570 M_EPAIR, M_WAITOK); 571 for (int i = 0; i < scb->num_queues; i++) { 572 struct epair_queue *q = &scb->queues[i]; 573 q->id = i; 574 q->rxring[0] = buf_ring_alloc(RXRSIZE, M_EPAIR, M_WAITOK, NULL); 575 q->rxring[1] = buf_ring_alloc(RXRSIZE, M_EPAIR, M_WAITOK, NULL); 576 q->ridx = 0; 577 q->sc = scb; 578 NET_TASK_INIT(&q->tx_task, 0, epair_tx_start_deferred, q); 579 } 580 581 /* 582 * Cross-reference the interfaces so we will be able to free both. 583 */ 584 sca->oifp = scb->ifp; 585 scb->oifp = sca->ifp; 586 587 EPAIR_LOCK(); 588 #ifdef SMP 589 /* Get an approximate distribution. */ 590 hash = next_index % mp_ncpus; 591 #else 592 hash = 0; 593 #endif 594 EPAIR_UNLOCK(); 595 596 /* Initialise pseudo media types. */ 597 ifmedia_init(&sca->media, 0, epair_media_change, epair_media_status); 598 ifmedia_add(&sca->media, IFM_ETHER | IFM_10G_T, 0, NULL); 599 ifmedia_set(&sca->media, IFM_ETHER | IFM_10G_T); 600 ifmedia_init(&scb->media, 0, epair_media_change, epair_media_status); 601 ifmedia_add(&scb->media, IFM_ETHER | IFM_10G_T, 0, NULL); 602 ifmedia_set(&scb->media, IFM_ETHER | IFM_10G_T); 603 604 /* Finish initialization of interface <n>a. */ 605 ifp = sca->ifp; 606 ifp->if_softc = sca; 607 strlcpy(ifp->if_xname, name, IFNAMSIZ); 608 ifp->if_dname = epairname; 609 ifp->if_dunit = unit; 610 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 611 ifp->if_flags |= IFF_KNOWSEPOCH; 612 ifp->if_capabilities = IFCAP_VLAN_MTU; 613 ifp->if_capenable = IFCAP_VLAN_MTU; 614 ifp->if_start = epair_start; 615 ifp->if_ioctl = epair_ioctl; 616 ifp->if_init = epair_init; 617 if_setsendqlen(ifp, ifqmaxlen); 618 if_setsendqready(ifp); 619 620 /* 621 * Calculate the etheraddr hashing the hostid and the 622 * interface index. The result would be hopefully unique. 623 * Note that the "a" component of an epair instance may get moved 624 * to a different VNET after creation. In that case its index 625 * will be freed and the index can get reused by new epair instance. 626 * Make sure we do not create same etheraddr again. 627 */ 628 getcredhostid(curthread->td_ucred, (unsigned long *)&hostid); 629 if (hostid == 0) 630 arc4rand(&hostid, sizeof(hostid), 0); 631 632 EPAIR_LOCK(); 633 if (ifp->if_index > next_index) 634 next_index = ifp->if_index; 635 else 636 next_index++; 637 638 key[0] = (uint32_t)next_index; 639 EPAIR_UNLOCK(); 640 key[1] = (uint32_t)(hostid & 0xffffffff); 641 key[2] = (uint32_t)((hostid >> 32) & 0xfffffffff); 642 hash = jenkins_hash32(key, 3, 0); 643 644 eaddr[0] = 0x02; 645 memcpy(&eaddr[1], &hash, 4); 646 eaddr[5] = 0x0a; 647 ether_ifattach(ifp, eaddr); 648 ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */ 649 ifp->if_transmit = epair_transmit; 650 651 /* Swap the name and finish initialization of interface <n>b. */ 652 *dp = 'b'; 653 654 ifp = scb->ifp; 655 ifp->if_softc = scb; 656 strlcpy(ifp->if_xname, name, IFNAMSIZ); 657 ifp->if_dname = epairname; 658 ifp->if_dunit = unit; 659 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 660 ifp->if_flags |= IFF_KNOWSEPOCH; 661 ifp->if_capabilities = IFCAP_VLAN_MTU; 662 ifp->if_capenable = IFCAP_VLAN_MTU; 663 ifp->if_start = epair_start; 664 ifp->if_ioctl = epair_ioctl; 665 ifp->if_init = epair_init; 666 if_setsendqlen(ifp, ifqmaxlen); 667 if_setsendqready(ifp); 668 /* We need to play some tricks here for the second interface. */ 669 strlcpy(name, epairname, len); 670 671 /* Correctly set the name for the cloner list. */ 672 strlcpy(name, scb->ifp->if_xname, len); 673 epair_clone_add(ifc, scb); 674 675 ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */ 676 ifp->if_transmit = epair_transmit; 677 678 /* 679 * Restore name to <n>a as the ifp for this will go into the 680 * cloner list for the initial call. 681 */ 682 strlcpy(name, sca->ifp->if_xname, len); 683 684 /* Tell the world, that we are ready to rock. */ 685 sca->ifp->if_drv_flags |= IFF_DRV_RUNNING; 686 if_link_state_change(sca->ifp, LINK_STATE_UP); 687 scb->ifp->if_drv_flags |= IFF_DRV_RUNNING; 688 if_link_state_change(scb->ifp, LINK_STATE_UP); 689 690 return (0); 691 } 692 693 static void 694 epair_drain_rings(struct epair_softc *sc) 695 { 696 int ridx; 697 struct mbuf *m; 698 699 for (ridx = 0; ridx < 2; ridx++) { 700 for (int i = 0; i < sc->num_queues; i++) { 701 struct epair_queue *q = &sc->queues[i]; 702 do { 703 m = buf_ring_dequeue_sc(q->rxring[ridx]); 704 if (m == NULL) 705 break; 706 m_freem(m); 707 } while (1); 708 } 709 } 710 } 711 712 static int 713 epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) 714 { 715 struct ifnet *oifp; 716 struct epair_softc *sca, *scb; 717 int unit, error; 718 719 /* 720 * In case we called into if_clone_destroyif() ourselves 721 * again to remove the second interface, the softc will be 722 * NULL. In that case so not do anything but return success. 723 */ 724 if (ifp->if_softc == NULL) 725 return (0); 726 727 unit = ifp->if_dunit; 728 sca = ifp->if_softc; 729 oifp = sca->oifp; 730 scb = oifp->if_softc; 731 732 /* Frist get the interfaces down and detached. */ 733 if_link_state_change(ifp, LINK_STATE_DOWN); 734 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 735 if_link_state_change(oifp, LINK_STATE_DOWN); 736 oifp->if_drv_flags &= ~IFF_DRV_RUNNING; 737 738 ether_ifdetach(ifp); 739 ether_ifdetach(oifp); 740 741 /* Third free any queued packets and all the resources. */ 742 CURVNET_SET_QUIET(oifp->if_vnet); 743 epair_drain_rings(scb); 744 oifp->if_softc = NULL; 745 error = if_clone_destroyif(ifc, oifp); 746 if (error) 747 panic("%s: if_clone_destroyif() for our 2nd iface failed: %d", 748 __func__, error); 749 if_free(oifp); 750 ifmedia_removeall(&scb->media); 751 for (int i = 0; i < scb->num_queues; i++) { 752 struct epair_queue *q = &scb->queues[i]; 753 buf_ring_free(q->rxring[0], M_EPAIR); 754 buf_ring_free(q->rxring[1], M_EPAIR); 755 } 756 free(scb->queues, M_EPAIR); 757 free(scb, M_EPAIR); 758 CURVNET_RESTORE(); 759 760 epair_drain_rings(sca); 761 if_free(ifp); 762 ifmedia_removeall(&sca->media); 763 for (int i = 0; i < sca->num_queues; i++) { 764 struct epair_queue *q = &sca->queues[i]; 765 buf_ring_free(q->rxring[0], M_EPAIR); 766 buf_ring_free(q->rxring[1], M_EPAIR); 767 } 768 free(sca->queues, M_EPAIR); 769 free(sca, M_EPAIR); 770 771 /* Last free the cloner unit. */ 772 ifc_free_unit(ifc, unit); 773 774 return (0); 775 } 776 777 static void 778 vnet_epair_init(const void *unused __unused) 779 { 780 781 V_epair_cloner = if_clone_advanced(epairname, 0, 782 epair_clone_match, epair_clone_create, epair_clone_destroy); 783 } 784 VNET_SYSINIT(vnet_epair_init, SI_SUB_PSEUDO, SI_ORDER_ANY, 785 vnet_epair_init, NULL); 786 787 static void 788 vnet_epair_uninit(const void *unused __unused) 789 { 790 791 if_clone_detach(V_epair_cloner); 792 } 793 VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, 794 vnet_epair_uninit, NULL); 795 796 static int 797 epair_mod_init(void) 798 { 799 char name[32]; 800 epair_tasks.tasks = 0; 801 802 #ifdef RSS 803 struct pcpu *pcpu; 804 int cpu; 805 806 CPU_FOREACH(cpu) { 807 cpuset_t cpu_mask; 808 809 /* Pin to this CPU so we get appropriate NUMA allocations. */ 810 pcpu = pcpu_find(cpu); 811 thread_lock(curthread); 812 sched_bind(curthread, cpu); 813 thread_unlock(curthread); 814 815 snprintf(name, sizeof(name), "epair_task_%d", cpu); 816 817 epair_tasks.tq[cpu] = taskqueue_create(name, M_WAITOK, 818 taskqueue_thread_enqueue, 819 &epair_tasks.tq[cpu]); 820 CPU_SETOF(cpu, &cpu_mask); 821 taskqueue_start_threads_cpuset(&epair_tasks.tq[cpu], 1, PI_NET, 822 &cpu_mask, "%s", name); 823 824 epair_tasks.tasks++; 825 } 826 #else 827 snprintf(name, sizeof(name), "epair_task"); 828 829 epair_tasks.tq[0] = taskqueue_create(name, M_WAITOK, 830 taskqueue_thread_enqueue, 831 &epair_tasks.tq[0]); 832 taskqueue_start_threads(&epair_tasks.tq[0], 1, PI_NET, "%s", name); 833 834 epair_tasks.tasks = 1; 835 #endif 836 837 return (0); 838 } 839 840 static void 841 epair_mod_cleanup(void) 842 { 843 844 for (int i = 0; i < epair_tasks.tasks; i++) { 845 taskqueue_drain_all(epair_tasks.tq[i]); 846 taskqueue_free(epair_tasks.tq[i]); 847 } 848 } 849 850 static int 851 epair_modevent(module_t mod, int type, void *data) 852 { 853 int ret; 854 855 switch (type) { 856 case MOD_LOAD: 857 EPAIR_LOCK_INIT(); 858 ret = epair_mod_init(); 859 if (ret != 0) 860 return (ret); 861 if (bootverbose) 862 printf("%s: %s initialized.\n", __func__, epairname); 863 break; 864 case MOD_UNLOAD: 865 epair_mod_cleanup(); 866 EPAIR_LOCK_DESTROY(); 867 if (bootverbose) 868 printf("%s: %s unloaded.\n", __func__, epairname); 869 break; 870 default: 871 return (EOPNOTSUPP); 872 } 873 return (0); 874 } 875 876 static moduledata_t epair_mod = { 877 "if_epair", 878 epair_modevent, 879 0 880 }; 881 882 DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE); 883 MODULE_VERSION(if_epair, 3); 884