1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2008 The FreeBSD Foundation 5 * Copyright (c) 2009-2010 Bjoern A. Zeeb <bz@FreeBSD.org> 6 * All rights reserved. 7 * 8 * This software was developed by CK Software GmbH under sponsorship 9 * from the FreeBSD Foundation. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * A pair of virtual back-to-back connected ethernet like interfaces 35 * (``two interfaces with a virtual cross-over cable''). 36 * 37 * This is mostly intended to be used to provide connectivity between 38 * different virtual network stack instances. 39 */ 40 /* 41 * Things to re-think once we have more experience: 42 * - ifp->if_reassign function once we can test with vimage. Depending on 43 * how if_vmove() is going to be improved. 44 * - Real random etheraddrs that are checked to be uniquish; we would need 45 * to re-do them in case we move the interface between network stacks 46 * in a private if_reassign function. 47 * In case we bridge to a real interface/network or between indepedent 48 * epairs on multiple stacks/machines, we may need this. 49 * For now let the user handle that case. 50 */ 51 52 #include <sys/cdefs.h> 53 __FBSDID("$FreeBSD$"); 54 55 #include <sys/param.h> 56 #include <sys/hash.h> 57 #include <sys/jail.h> 58 #include <sys/kernel.h> 59 #include <sys/libkern.h> 60 #include <sys/malloc.h> 61 #include <sys/mbuf.h> 62 #include <sys/module.h> 63 #include <sys/proc.h> 64 #include <sys/refcount.h> 65 #include <sys/queue.h> 66 #include <sys/smp.h> 67 #include <sys/socket.h> 68 #include <sys/sockio.h> 69 #include <sys/sysctl.h> 70 #include <sys/types.h> 71 72 #include <net/bpf.h> 73 #include <net/ethernet.h> 74 #include <net/if.h> 75 #include <net/if_var.h> 76 #include <net/if_clone.h> 77 #include <net/if_media.h> 78 #include <net/if_var.h> 79 #include <net/if_types.h> 80 #include <net/netisr.h> 81 #include <net/vnet.h> 82 83 SYSCTL_DECL(_net_link); 84 static SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 85 "epair sysctl"); 86 87 #ifdef EPAIR_DEBUG 88 static int epair_debug = 0; 89 SYSCTL_INT(_net_link_epair, OID_AUTO, epair_debug, CTLFLAG_RW, 90 &epair_debug, 0, "if_epair(4) debugging."); 91 #define DPRINTF(fmt, arg...) \ 92 if (epair_debug) \ 93 printf("[%s:%d] " fmt, __func__, __LINE__, ##arg) 94 #else 95 #define DPRINTF(fmt, arg...) 96 #endif 97 98 static void epair_nh_sintr(struct mbuf *); 99 static struct mbuf *epair_nh_m2cpuid(struct mbuf *, uintptr_t, u_int *); 100 static void epair_nh_drainedcpu(u_int); 101 102 static void epair_start_locked(struct ifnet *); 103 static int epair_media_change(struct ifnet *); 104 static void epair_media_status(struct ifnet *, struct ifmediareq *); 105 106 static int epair_clone_match(struct if_clone *, const char *); 107 static int epair_clone_create(struct if_clone *, char *, size_t, caddr_t); 108 static int epair_clone_destroy(struct if_clone *, struct ifnet *); 109 110 static const char epairname[] = "epair"; 111 static unsigned int next_index = 0; 112 113 /* Netisr related definitions and sysctl. */ 114 static struct netisr_handler epair_nh = { 115 .nh_name = epairname, 116 .nh_proto = NETISR_EPAIR, 117 .nh_policy = NETISR_POLICY_CPU, 118 .nh_handler = epair_nh_sintr, 119 .nh_m2cpuid = epair_nh_m2cpuid, 120 .nh_drainedcpu = epair_nh_drainedcpu, 121 }; 122 123 static int 124 sysctl_epair_netisr_maxqlen(SYSCTL_HANDLER_ARGS) 125 { 126 int error, qlimit; 127 128 netisr_getqlimit(&epair_nh, &qlimit); 129 error = sysctl_handle_int(oidp, &qlimit, 0, req); 130 if (error || !req->newptr) 131 return (error); 132 if (qlimit < 1) 133 return (EINVAL); 134 return (netisr_setqlimit(&epair_nh, qlimit)); 135 } 136 SYSCTL_PROC(_net_link_epair, OID_AUTO, netisr_maxqlen, 137 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 138 sysctl_epair_netisr_maxqlen, "I", 139 "Maximum if_epair(4) netisr \"hw\" queue length"); 140 141 struct epair_softc { 142 struct ifnet *ifp; /* This ifp. */ 143 struct ifnet *oifp; /* other ifp of pair. */ 144 struct ifmedia media; /* Media config (fake). */ 145 u_int refcount; /* # of mbufs in flight. */ 146 u_int cpuid; /* CPU ID assigned upon creation. */ 147 void (*if_qflush)(struct ifnet *); 148 /* Original if_qflush routine. */ 149 }; 150 151 /* 152 * Per-CPU list of ifps with data in the ifq that needs to be flushed 153 * to the netisr ``hw'' queue before we allow any further direct queuing 154 * to the ``hw'' queue. 155 */ 156 struct epair_ifp_drain { 157 STAILQ_ENTRY(epair_ifp_drain) ifp_next; 158 struct ifnet *ifp; 159 }; 160 STAILQ_HEAD(eid_list, epair_ifp_drain); 161 162 #define EPAIR_LOCK_INIT(dpcpu) mtx_init(&(dpcpu)->if_epair_mtx, \ 163 "if_epair", NULL, MTX_DEF) 164 #define EPAIR_LOCK_DESTROY(dpcpu) mtx_destroy(&(dpcpu)->if_epair_mtx) 165 #define EPAIR_LOCK_ASSERT(dpcpu) mtx_assert(&(dpcpu)->if_epair_mtx, \ 166 MA_OWNED) 167 #define EPAIR_LOCK(dpcpu) mtx_lock(&(dpcpu)->if_epair_mtx) 168 #define EPAIR_UNLOCK(dpcpu) mtx_unlock(&(dpcpu)->if_epair_mtx) 169 170 #ifdef INVARIANTS 171 #define EPAIR_REFCOUNT_INIT(r, v) refcount_init((r), (v)) 172 #define EPAIR_REFCOUNT_AQUIRE(r) refcount_acquire((r)) 173 #define EPAIR_REFCOUNT_RELEASE(r) refcount_release((r)) 174 #define EPAIR_REFCOUNT_ASSERT(a, p) KASSERT(a, p) 175 #else 176 #define EPAIR_REFCOUNT_INIT(r, v) 177 #define EPAIR_REFCOUNT_AQUIRE(r) 178 #define EPAIR_REFCOUNT_RELEASE(r) 179 #define EPAIR_REFCOUNT_ASSERT(a, p) 180 #endif 181 182 static MALLOC_DEFINE(M_EPAIR, epairname, 183 "Pair of virtual cross-over connected Ethernet-like interfaces"); 184 185 VNET_DEFINE_STATIC(struct if_clone *, epair_cloner); 186 #define V_epair_cloner VNET(epair_cloner) 187 188 /* 189 * DPCPU area and functions. 190 */ 191 struct epair_dpcpu { 192 struct mtx if_epair_mtx; /* Per-CPU locking. */ 193 int epair_drv_flags; /* Per-CPU ``hw'' drv flags. */ 194 struct eid_list epair_ifp_drain_list; /* Per-CPU list of ifps with 195 * data in the ifq. */ 196 }; 197 DPCPU_DEFINE(struct epair_dpcpu, epair_dpcpu); 198 199 static void 200 epair_clear_mbuf(struct mbuf *m) 201 { 202 /* Remove any CSUM_SND_TAG as ether_input will barf. */ 203 if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) { 204 m_snd_tag_rele(m->m_pkthdr.snd_tag); 205 m->m_pkthdr.snd_tag = NULL; 206 m->m_pkthdr.csum_flags &= ~CSUM_SND_TAG; 207 } 208 209 m_tag_delete_nonpersistent(m); 210 } 211 212 static void 213 epair_dpcpu_init(void) 214 { 215 struct epair_dpcpu *epair_dpcpu; 216 struct eid_list *s; 217 u_int cpuid; 218 219 CPU_FOREACH(cpuid) { 220 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 221 222 /* Initialize per-cpu lock. */ 223 EPAIR_LOCK_INIT(epair_dpcpu); 224 225 /* Driver flags are per-cpu as are our netisr "hw" queues. */ 226 epair_dpcpu->epair_drv_flags = 0; 227 228 /* 229 * Initialize per-cpu drain list. 230 * Manually do what STAILQ_HEAD_INITIALIZER would do. 231 */ 232 s = &epair_dpcpu->epair_ifp_drain_list; 233 s->stqh_first = NULL; 234 s->stqh_last = &s->stqh_first; 235 } 236 } 237 238 static void 239 epair_dpcpu_detach(void) 240 { 241 struct epair_dpcpu *epair_dpcpu; 242 u_int cpuid; 243 244 CPU_FOREACH(cpuid) { 245 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 246 247 /* Destroy per-cpu lock. */ 248 EPAIR_LOCK_DESTROY(epair_dpcpu); 249 } 250 } 251 252 /* 253 * Helper functions. 254 */ 255 static u_int 256 cpuid_from_ifp(struct ifnet *ifp) 257 { 258 struct epair_softc *sc; 259 260 if (ifp == NULL) 261 return (0); 262 sc = ifp->if_softc; 263 264 return (sc->cpuid); 265 } 266 267 /* 268 * Netisr handler functions. 269 */ 270 static void 271 epair_nh_sintr(struct mbuf *m) 272 { 273 struct ifnet *ifp; 274 struct epair_softc *sc __unused; 275 276 ifp = m->m_pkthdr.rcvif; 277 (*ifp->if_input)(ifp, m); 278 sc = ifp->if_softc; 279 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 280 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 281 ("%s: ifp=%p sc->refcount not >= 1: %d", 282 __func__, ifp, sc->refcount)); 283 DPRINTF("ifp=%p refcount=%u\n", ifp, sc->refcount); 284 } 285 286 static struct mbuf * 287 epair_nh_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid) 288 { 289 290 *cpuid = cpuid_from_ifp(m->m_pkthdr.rcvif); 291 292 return (m); 293 } 294 295 static void 296 epair_nh_drainedcpu(u_int cpuid) 297 { 298 struct epair_dpcpu *epair_dpcpu; 299 struct epair_ifp_drain *elm, *tvar; 300 struct ifnet *ifp; 301 302 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 303 EPAIR_LOCK(epair_dpcpu); 304 /* 305 * Assume our "hw" queue and possibly ifq will be emptied 306 * again. In case we will overflow the "hw" queue while 307 * draining, epair_start_locked will set IFF_DRV_OACTIVE 308 * again and we will stop and return. 309 */ 310 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list, 311 ifp_next, tvar) { 312 ifp = elm->ifp; 313 epair_dpcpu->epair_drv_flags &= ~IFF_DRV_OACTIVE; 314 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 315 epair_start_locked(ifp); 316 317 IFQ_LOCK(&ifp->if_snd); 318 if (IFQ_IS_EMPTY(&ifp->if_snd)) { 319 struct epair_softc *sc __unused; 320 321 STAILQ_REMOVE(&epair_dpcpu->epair_ifp_drain_list, 322 elm, epair_ifp_drain, ifp_next); 323 /* The cached ifp goes off the list. */ 324 sc = ifp->if_softc; 325 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 326 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 327 ("%s: ifp=%p sc->refcount not >= 1: %d", 328 __func__, ifp, sc->refcount)); 329 free(elm, M_EPAIR); 330 } 331 IFQ_UNLOCK(&ifp->if_snd); 332 333 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) != 0) { 334 /* Our "hw"q overflew again. */ 335 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 336 DPRINTF("hw queue length overflow at %u\n", 337 epair_nh.nh_qlimit); 338 break; 339 } 340 } 341 EPAIR_UNLOCK(epair_dpcpu); 342 } 343 344 /* 345 * Network interface (`if') related functions. 346 */ 347 static void 348 epair_remove_ifp_from_draining(struct ifnet *ifp) 349 { 350 struct epair_dpcpu *epair_dpcpu; 351 struct epair_ifp_drain *elm, *tvar; 352 u_int cpuid; 353 354 CPU_FOREACH(cpuid) { 355 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 356 EPAIR_LOCK(epair_dpcpu); 357 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list, 358 ifp_next, tvar) { 359 if (ifp == elm->ifp) { 360 struct epair_softc *sc __unused; 361 362 STAILQ_REMOVE( 363 &epair_dpcpu->epair_ifp_drain_list, elm, 364 epair_ifp_drain, ifp_next); 365 /* The cached ifp goes off the list. */ 366 sc = ifp->if_softc; 367 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 368 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 369 ("%s: ifp=%p sc->refcount not >= 1: %d", 370 __func__, ifp, sc->refcount)); 371 free(elm, M_EPAIR); 372 } 373 } 374 EPAIR_UNLOCK(epair_dpcpu); 375 } 376 } 377 378 static int 379 epair_add_ifp_for_draining(struct ifnet *ifp) 380 { 381 struct epair_dpcpu *epair_dpcpu; 382 struct epair_softc *sc; 383 struct epair_ifp_drain *elm = NULL; 384 385 sc = ifp->if_softc; 386 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 387 EPAIR_LOCK_ASSERT(epair_dpcpu); 388 STAILQ_FOREACH(elm, &epair_dpcpu->epair_ifp_drain_list, ifp_next) 389 if (elm->ifp == ifp) 390 break; 391 /* If the ifp is there already, return success. */ 392 if (elm != NULL) 393 return (0); 394 395 elm = malloc(sizeof(struct epair_ifp_drain), M_EPAIR, M_NOWAIT|M_ZERO); 396 if (elm == NULL) 397 return (ENOMEM); 398 399 elm->ifp = ifp; 400 /* Add a reference for the ifp pointer on the list. */ 401 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 402 STAILQ_INSERT_TAIL(&epair_dpcpu->epair_ifp_drain_list, elm, ifp_next); 403 404 return (0); 405 } 406 407 static void 408 epair_start_locked(struct ifnet *ifp) 409 { 410 struct epair_dpcpu *epair_dpcpu; 411 struct mbuf *m; 412 struct epair_softc *sc; 413 struct ifnet *oifp; 414 int error; 415 416 DPRINTF("ifp=%p\n", ifp); 417 sc = ifp->if_softc; 418 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 419 EPAIR_LOCK_ASSERT(epair_dpcpu); 420 421 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 422 return; 423 if ((ifp->if_flags & IFF_UP) == 0) 424 return; 425 426 /* 427 * We get packets here from ether_output via if_handoff() 428 * and need to put them into the input queue of the oifp 429 * and call oifp->if_input() via netisr/epair_sintr(). 430 */ 431 oifp = sc->oifp; 432 sc = oifp->if_softc; 433 for (;;) { 434 IFQ_DEQUEUE(&ifp->if_snd, m); 435 if (m == NULL) 436 break; 437 BPF_MTAP(ifp, m); 438 439 /* 440 * In case the outgoing interface is not usable, 441 * drop the packet. 442 */ 443 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 444 (oifp->if_flags & IFF_UP) ==0) { 445 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 446 m_freem(m); 447 continue; 448 } 449 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname); 450 451 epair_clear_mbuf(m); 452 453 /* 454 * Add a reference so the interface cannot go while the 455 * packet is in transit as we rely on rcvif to stay valid. 456 */ 457 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 458 m->m_pkthdr.rcvif = oifp; 459 CURVNET_SET_QUIET(oifp->if_vnet); 460 error = netisr_queue(NETISR_EPAIR, m); 461 CURVNET_RESTORE(); 462 if (!error) { 463 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 464 /* Someone else received the packet. */ 465 if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1); 466 } else { 467 /* The packet was freed already. */ 468 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 469 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 470 (void) epair_add_ifp_for_draining(ifp); 471 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 472 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 473 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 474 ("%s: ifp=%p sc->refcount not >= 1: %d", 475 __func__, oifp, sc->refcount)); 476 } 477 } 478 } 479 480 static void 481 epair_start(struct ifnet *ifp) 482 { 483 struct epair_dpcpu *epair_dpcpu; 484 485 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu); 486 EPAIR_LOCK(epair_dpcpu); 487 epair_start_locked(ifp); 488 EPAIR_UNLOCK(epair_dpcpu); 489 } 490 491 static int 492 epair_transmit_locked(struct ifnet *ifp, struct mbuf *m) 493 { 494 struct epair_dpcpu *epair_dpcpu; 495 struct epair_softc *sc; 496 struct ifnet *oifp; 497 int error, len; 498 short mflags; 499 500 DPRINTF("ifp=%p m=%p\n", ifp, m); 501 sc = ifp->if_softc; 502 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 503 EPAIR_LOCK_ASSERT(epair_dpcpu); 504 505 if (m == NULL) 506 return (0); 507 508 /* 509 * We are not going to use the interface en/dequeue mechanism 510 * on the TX side. We are called from ether_output_frame() 511 * and will put the packet into the incoming queue of the 512 * other interface of our pair via the netsir. 513 */ 514 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 515 m_freem(m); 516 return (ENXIO); 517 } 518 if ((ifp->if_flags & IFF_UP) == 0) { 519 m_freem(m); 520 return (ENETDOWN); 521 } 522 523 BPF_MTAP(ifp, m); 524 525 /* 526 * In case the outgoing interface is not usable, 527 * drop the packet. 528 */ 529 oifp = sc->oifp; 530 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 531 (oifp->if_flags & IFF_UP) ==0) { 532 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 533 m_freem(m); 534 return (0); 535 } 536 len = m->m_pkthdr.len; 537 mflags = m->m_flags; 538 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname); 539 540 #ifdef ALTQ 541 /* Support ALTQ via the classic if_start() path. */ 542 IF_LOCK(&ifp->if_snd); 543 if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 544 ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error); 545 if (error) 546 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 547 IF_UNLOCK(&ifp->if_snd); 548 if (!error) { 549 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 550 if (mflags & (M_BCAST|M_MCAST)) 551 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 552 553 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) 554 epair_start_locked(ifp); 555 else 556 (void)epair_add_ifp_for_draining(ifp); 557 } 558 return (error); 559 } 560 IF_UNLOCK(&ifp->if_snd); 561 #endif 562 563 if ((epair_dpcpu->epair_drv_flags & IFF_DRV_OACTIVE) != 0) { 564 /* 565 * Our hardware queue is full, try to fall back 566 * queuing to the ifq but do not call ifp->if_start. 567 * Either we are lucky or the packet is gone. 568 */ 569 IFQ_ENQUEUE(&ifp->if_snd, m, error); 570 if (!error) 571 (void)epair_add_ifp_for_draining(ifp); 572 return (error); 573 } 574 575 epair_clear_mbuf(m); 576 577 sc = oifp->if_softc; 578 /* 579 * Add a reference so the interface cannot go while the 580 * packet is in transit as we rely on rcvif to stay valid. 581 */ 582 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 583 m->m_pkthdr.rcvif = oifp; 584 CURVNET_SET_QUIET(oifp->if_vnet); 585 error = netisr_queue(NETISR_EPAIR, m); 586 CURVNET_RESTORE(); 587 if (!error) { 588 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 589 /* 590 * IFQ_HANDOFF_ADJ/ip_handoff() update statistics, 591 * but as we bypass all this we have to duplicate 592 * the logic another time. 593 */ 594 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 595 if (mflags & (M_BCAST|M_MCAST)) 596 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 597 /* Someone else received the packet. */ 598 if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1); 599 } else { 600 /* The packet was freed already. */ 601 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 602 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 603 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 604 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 605 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 606 ("%s: ifp=%p sc->refcount not >= 1: %d", 607 __func__, oifp, sc->refcount)); 608 } 609 610 return (error); 611 } 612 613 static int 614 epair_transmit(struct ifnet *ifp, struct mbuf *m) 615 { 616 struct epair_dpcpu *epair_dpcpu; 617 int error; 618 619 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu); 620 EPAIR_LOCK(epair_dpcpu); 621 error = epair_transmit_locked(ifp, m); 622 EPAIR_UNLOCK(epair_dpcpu); 623 return (error); 624 } 625 626 static void 627 epair_qflush(struct ifnet *ifp) 628 { 629 struct epair_softc *sc; 630 631 sc = ifp->if_softc; 632 KASSERT(sc != NULL, ("%s: ifp=%p, epair_softc gone? sc=%p\n", 633 __func__, ifp, sc)); 634 /* 635 * Remove this ifp from all backpointer lists. The interface will not 636 * usable for flushing anyway nor should it have anything to flush 637 * after if_qflush(). 638 */ 639 epair_remove_ifp_from_draining(ifp); 640 641 if (sc->if_qflush) 642 sc->if_qflush(ifp); 643 } 644 645 static int 646 epair_media_change(struct ifnet *ifp __unused) 647 { 648 649 /* Do nothing. */ 650 return (0); 651 } 652 653 static void 654 epair_media_status(struct ifnet *ifp __unused, struct ifmediareq *imr) 655 { 656 657 imr->ifm_status = IFM_AVALID | IFM_ACTIVE; 658 imr->ifm_active = IFM_ETHER | IFM_10G_T | IFM_FDX; 659 } 660 661 static int 662 epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 663 { 664 struct epair_softc *sc; 665 struct ifreq *ifr; 666 int error; 667 668 ifr = (struct ifreq *)data; 669 switch (cmd) { 670 case SIOCSIFFLAGS: 671 case SIOCADDMULTI: 672 case SIOCDELMULTI: 673 error = 0; 674 break; 675 676 case SIOCSIFMEDIA: 677 case SIOCGIFMEDIA: 678 sc = ifp->if_softc; 679 error = ifmedia_ioctl(ifp, ifr, &sc->media, cmd); 680 break; 681 682 case SIOCSIFMTU: 683 /* We basically allow all kinds of MTUs. */ 684 ifp->if_mtu = ifr->ifr_mtu; 685 error = 0; 686 break; 687 688 default: 689 /* Let the common ethernet handler process this. */ 690 error = ether_ioctl(ifp, cmd, data); 691 break; 692 } 693 694 return (error); 695 } 696 697 static void 698 epair_init(void *dummy __unused) 699 { 700 } 701 702 /* 703 * Interface cloning functions. 704 * We use our private ones so that we can create/destroy our secondary 705 * device along with the primary one. 706 */ 707 static int 708 epair_clone_match(struct if_clone *ifc, const char *name) 709 { 710 const char *cp; 711 712 DPRINTF("name='%s'\n", name); 713 714 /* 715 * Our base name is epair. 716 * Our interfaces will be named epair<n>[ab]. 717 * So accept anything of the following list: 718 * - epair 719 * - epair<n> 720 * but not the epair<n>[ab] versions. 721 */ 722 if (strncmp(epairname, name, sizeof(epairname)-1) != 0) 723 return (0); 724 725 for (cp = name + sizeof(epairname) - 1; *cp != '\0'; cp++) { 726 if (*cp < '0' || *cp > '9') 727 return (0); 728 } 729 730 return (1); 731 } 732 733 static void 734 epair_clone_add(struct if_clone *ifc, struct epair_softc *scb) 735 { 736 struct ifnet *ifp; 737 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ 738 739 ifp = scb->ifp; 740 /* Copy epairNa etheraddr and change the last byte. */ 741 memcpy(eaddr, scb->oifp->if_hw_addr, ETHER_ADDR_LEN); 742 eaddr[5] = 0x0b; 743 ether_ifattach(ifp, eaddr); 744 745 if_clone_addif(ifc, ifp); 746 } 747 748 static int 749 epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) 750 { 751 struct epair_softc *sca, *scb; 752 struct ifnet *ifp; 753 char *dp; 754 int error, unit, wildcard; 755 uint64_t hostid; 756 uint32_t key[3]; 757 uint32_t hash; 758 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ 759 760 /* Try to see if a special unit was requested. */ 761 error = ifc_name2unit(name, &unit); 762 if (error != 0) 763 return (error); 764 wildcard = (unit < 0); 765 766 error = ifc_alloc_unit(ifc, &unit); 767 if (error != 0) 768 return (error); 769 770 /* 771 * If no unit had been given, we need to adjust the ifName. 772 * Also make sure there is space for our extra [ab] suffix. 773 */ 774 for (dp = name; *dp != '\0'; dp++); 775 if (wildcard) { 776 error = snprintf(dp, len - (dp - name), "%d", unit); 777 if (error > len - (dp - name) - 1) { 778 /* ifName too long. */ 779 ifc_free_unit(ifc, unit); 780 return (ENOSPC); 781 } 782 dp += error; 783 } 784 if (len - (dp - name) - 1 < 1) { 785 /* No space left for our [ab] suffix. */ 786 ifc_free_unit(ifc, unit); 787 return (ENOSPC); 788 } 789 *dp = 'b'; 790 /* Must not change dp so we can replace 'a' by 'b' later. */ 791 *(dp+1) = '\0'; 792 793 /* Check if 'a' and 'b' interfaces already exist. */ 794 if (ifunit(name) != NULL) 795 return (EEXIST); 796 *dp = 'a'; 797 if (ifunit(name) != NULL) 798 return (EEXIST); 799 800 /* Allocate memory for both [ab] interfaces */ 801 sca = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 802 EPAIR_REFCOUNT_INIT(&sca->refcount, 1); 803 sca->ifp = if_alloc(IFT_ETHER); 804 if (sca->ifp == NULL) { 805 free(sca, M_EPAIR); 806 ifc_free_unit(ifc, unit); 807 return (ENOSPC); 808 } 809 810 scb = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 811 EPAIR_REFCOUNT_INIT(&scb->refcount, 1); 812 scb->ifp = if_alloc(IFT_ETHER); 813 if (scb->ifp == NULL) { 814 free(scb, M_EPAIR); 815 if_free(sca->ifp); 816 free(sca, M_EPAIR); 817 ifc_free_unit(ifc, unit); 818 return (ENOSPC); 819 } 820 821 /* 822 * Cross-reference the interfaces so we will be able to free both. 823 */ 824 sca->oifp = scb->ifp; 825 scb->oifp = sca->ifp; 826 827 /* 828 * Calculate the cpuid for netisr queueing based on the 829 * ifIndex of the interfaces. As long as we cannot configure 830 * this or use cpuset information easily we cannot guarantee 831 * cache locality but we can at least allow parallelism. 832 */ 833 sca->cpuid = 834 netisr_get_cpuid(sca->ifp->if_index); 835 scb->cpuid = 836 netisr_get_cpuid(scb->ifp->if_index); 837 838 /* Initialise pseudo media types. */ 839 ifmedia_init(&sca->media, 0, epair_media_change, epair_media_status); 840 ifmedia_add(&sca->media, IFM_ETHER | IFM_10G_T, 0, NULL); 841 ifmedia_set(&sca->media, IFM_ETHER | IFM_10G_T); 842 ifmedia_init(&scb->media, 0, epair_media_change, epair_media_status); 843 ifmedia_add(&scb->media, IFM_ETHER | IFM_10G_T, 0, NULL); 844 ifmedia_set(&scb->media, IFM_ETHER | IFM_10G_T); 845 846 /* Finish initialization of interface <n>a. */ 847 ifp = sca->ifp; 848 ifp->if_softc = sca; 849 strlcpy(ifp->if_xname, name, IFNAMSIZ); 850 ifp->if_dname = epairname; 851 ifp->if_dunit = unit; 852 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 853 ifp->if_capabilities = IFCAP_VLAN_MTU; 854 ifp->if_capenable = IFCAP_VLAN_MTU; 855 ifp->if_start = epair_start; 856 ifp->if_ioctl = epair_ioctl; 857 ifp->if_init = epair_init; 858 if_setsendqlen(ifp, ifqmaxlen); 859 if_setsendqready(ifp); 860 861 /* 862 * Calculate the etheraddr hashing the hostid and the 863 * interface index. The result would be hopefully unique. 864 * Note that the "a" component of an epair instance may get moved 865 * to a different VNET after creation. In that case its index 866 * will be freed and the index can get reused by new epair instance. 867 * Make sure we do not create same etheraddr again. 868 */ 869 getcredhostid(curthread->td_ucred, (unsigned long *)&hostid); 870 if (hostid == 0) 871 arc4rand(&hostid, sizeof(hostid), 0); 872 873 if (ifp->if_index > next_index) 874 next_index = ifp->if_index; 875 else 876 next_index++; 877 878 key[0] = (uint32_t)next_index; 879 key[1] = (uint32_t)(hostid & 0xffffffff); 880 key[2] = (uint32_t)((hostid >> 32) & 0xfffffffff); 881 hash = jenkins_hash32(key, 3, 0); 882 883 eaddr[0] = 0x02; 884 memcpy(&eaddr[1], &hash, 4); 885 eaddr[5] = 0x0a; 886 ether_ifattach(ifp, eaddr); 887 sca->if_qflush = ifp->if_qflush; 888 ifp->if_qflush = epair_qflush; 889 ifp->if_transmit = epair_transmit; 890 ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */ 891 892 /* Swap the name and finish initialization of interface <n>b. */ 893 *dp = 'b'; 894 895 ifp = scb->ifp; 896 ifp->if_softc = scb; 897 strlcpy(ifp->if_xname, name, IFNAMSIZ); 898 ifp->if_dname = epairname; 899 ifp->if_dunit = unit; 900 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 901 ifp->if_capabilities = IFCAP_VLAN_MTU; 902 ifp->if_capenable = IFCAP_VLAN_MTU; 903 ifp->if_start = epair_start; 904 ifp->if_ioctl = epair_ioctl; 905 ifp->if_init = epair_init; 906 if_setsendqlen(ifp, ifqmaxlen); 907 if_setsendqready(ifp); 908 /* We need to play some tricks here for the second interface. */ 909 strlcpy(name, epairname, len); 910 911 /* Correctly set the name for the cloner list. */ 912 strlcpy(name, scb->ifp->if_xname, len); 913 epair_clone_add(ifc, scb); 914 915 scb->if_qflush = ifp->if_qflush; 916 ifp->if_qflush = epair_qflush; 917 ifp->if_transmit = epair_transmit; 918 ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */ 919 920 /* 921 * Restore name to <n>a as the ifp for this will go into the 922 * cloner list for the initial call. 923 */ 924 strlcpy(name, sca->ifp->if_xname, len); 925 DPRINTF("name='%s/%db' created sca=%p scb=%p\n", name, unit, sca, scb); 926 927 /* Tell the world, that we are ready to rock. */ 928 sca->ifp->if_drv_flags |= IFF_DRV_RUNNING; 929 scb->ifp->if_drv_flags |= IFF_DRV_RUNNING; 930 if_link_state_change(sca->ifp, LINK_STATE_UP); 931 if_link_state_change(scb->ifp, LINK_STATE_UP); 932 933 return (0); 934 } 935 936 static int 937 epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) 938 { 939 struct ifnet *oifp; 940 struct epair_softc *sca, *scb; 941 int unit, error; 942 943 DPRINTF("ifp=%p\n", ifp); 944 945 /* 946 * In case we called into if_clone_destroyif() ourselves 947 * again to remove the second interface, the softc will be 948 * NULL. In that case so not do anything but return success. 949 */ 950 if (ifp->if_softc == NULL) 951 return (0); 952 953 unit = ifp->if_dunit; 954 sca = ifp->if_softc; 955 oifp = sca->oifp; 956 scb = oifp->if_softc; 957 958 DPRINTF("ifp=%p oifp=%p\n", ifp, oifp); 959 if_link_state_change(ifp, LINK_STATE_DOWN); 960 if_link_state_change(oifp, LINK_STATE_DOWN); 961 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 962 oifp->if_drv_flags &= ~IFF_DRV_RUNNING; 963 964 /* 965 * Get rid of our second half. As the other of the two 966 * interfaces may reside in a different vnet, we need to 967 * switch before freeing them. 968 */ 969 CURVNET_SET_QUIET(oifp->if_vnet); 970 ether_ifdetach(oifp); 971 /* 972 * Wait for all packets to be dispatched to if_input. 973 * The numbers can only go down as the interface is 974 * detached so there is no need to use atomics. 975 */ 976 DPRINTF("scb refcnt=%u\n", scb->refcount); 977 EPAIR_REFCOUNT_ASSERT(scb->refcount == 1, 978 ("%s: ifp=%p scb->refcount!=1: %d", __func__, oifp, scb->refcount)); 979 oifp->if_softc = NULL; 980 error = if_clone_destroyif(ifc, oifp); 981 if (error) 982 panic("%s: if_clone_destroyif() for our 2nd iface failed: %d", 983 __func__, error); 984 if_free(oifp); 985 ifmedia_removeall(&scb->media); 986 free(scb, M_EPAIR); 987 CURVNET_RESTORE(); 988 989 ether_ifdetach(ifp); 990 /* 991 * Wait for all packets to be dispatched to if_input. 992 */ 993 DPRINTF("sca refcnt=%u\n", sca->refcount); 994 EPAIR_REFCOUNT_ASSERT(sca->refcount == 1, 995 ("%s: ifp=%p sca->refcount!=1: %d", __func__, ifp, sca->refcount)); 996 if_free(ifp); 997 ifmedia_removeall(&sca->media); 998 free(sca, M_EPAIR); 999 ifc_free_unit(ifc, unit); 1000 1001 return (0); 1002 } 1003 1004 static void 1005 vnet_epair_init(const void *unused __unused) 1006 { 1007 1008 V_epair_cloner = if_clone_advanced(epairname, 0, 1009 epair_clone_match, epair_clone_create, epair_clone_destroy); 1010 #ifdef VIMAGE 1011 netisr_register_vnet(&epair_nh); 1012 #endif 1013 } 1014 VNET_SYSINIT(vnet_epair_init, SI_SUB_PSEUDO, SI_ORDER_ANY, 1015 vnet_epair_init, NULL); 1016 1017 static void 1018 vnet_epair_uninit(const void *unused __unused) 1019 { 1020 1021 #ifdef VIMAGE 1022 netisr_unregister_vnet(&epair_nh); 1023 #endif 1024 if_clone_detach(V_epair_cloner); 1025 } 1026 VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, 1027 vnet_epair_uninit, NULL); 1028 1029 static void 1030 epair_uninit(const void *unused __unused) 1031 { 1032 netisr_unregister(&epair_nh); 1033 epair_dpcpu_detach(); 1034 if (bootverbose) 1035 printf("%s unloaded.\n", epairname); 1036 } 1037 SYSUNINIT(epair_uninit, SI_SUB_INIT_IF, SI_ORDER_MIDDLE, 1038 epair_uninit, NULL); 1039 1040 static int 1041 epair_modevent(module_t mod, int type, void *data) 1042 { 1043 int qlimit; 1044 1045 switch (type) { 1046 case MOD_LOAD: 1047 /* For now limit us to one global mutex and one inq. */ 1048 epair_dpcpu_init(); 1049 epair_nh.nh_qlimit = 42 * ifqmaxlen; /* 42 shall be the number. */ 1050 if (TUNABLE_INT_FETCH("net.link.epair.netisr_maxqlen", &qlimit)) 1051 epair_nh.nh_qlimit = qlimit; 1052 netisr_register(&epair_nh); 1053 if (bootverbose) 1054 printf("%s initialized.\n", epairname); 1055 break; 1056 case MOD_UNLOAD: 1057 /* Handled in epair_uninit() */ 1058 break; 1059 default: 1060 return (EOPNOTSUPP); 1061 } 1062 return (0); 1063 } 1064 1065 static moduledata_t epair_mod = { 1066 "if_epair", 1067 epair_modevent, 1068 0 1069 }; 1070 1071 DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE); 1072 MODULE_VERSION(if_epair, 1); 1073