1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2008 The FreeBSD Foundation 5 * Copyright (c) 2009-2010 Bjoern A. Zeeb <bz@FreeBSD.org> 6 * All rights reserved. 7 * 8 * This software was developed by CK Software GmbH under sponsorship 9 * from the FreeBSD Foundation. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * A pair of virtual back-to-back connected ethernet like interfaces 35 * (``two interfaces with a virtual cross-over cable''). 36 * 37 * This is mostly intended to be used to provide connectivity between 38 * different virtual network stack instances. 39 */ 40 /* 41 * Things to re-think once we have more experience: 42 * - ifp->if_reassign function once we can test with vimage. Depending on 43 * how if_vmove() is going to be improved. 44 * - Real random etheraddrs that are checked to be uniquish; we would need 45 * to re-do them in case we move the interface between network stacks 46 * in a private if_reassign function. 47 * In case we bridge to a real interface/network or between indepedent 48 * epairs on multiple stacks/machines, we may need this. 49 * For now let the user handle that case. 50 */ 51 52 #include <sys/cdefs.h> 53 __FBSDID("$FreeBSD$"); 54 55 #include <sys/param.h> 56 #include <sys/hash.h> 57 #include <sys/jail.h> 58 #include <sys/kernel.h> 59 #include <sys/libkern.h> 60 #include <sys/malloc.h> 61 #include <sys/mbuf.h> 62 #include <sys/module.h> 63 #include <sys/proc.h> 64 #include <sys/refcount.h> 65 #include <sys/queue.h> 66 #include <sys/smp.h> 67 #include <sys/socket.h> 68 #include <sys/sockio.h> 69 #include <sys/sysctl.h> 70 #include <sys/types.h> 71 72 #include <net/bpf.h> 73 #include <net/ethernet.h> 74 #include <net/if.h> 75 #include <net/if_var.h> 76 #include <net/if_clone.h> 77 #include <net/if_media.h> 78 #include <net/if_var.h> 79 #include <net/if_types.h> 80 #include <net/netisr.h> 81 #include <net/vnet.h> 82 83 SYSCTL_DECL(_net_link); 84 static SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW, 0, "epair sysctl"); 85 86 #ifdef EPAIR_DEBUG 87 static int epair_debug = 0; 88 SYSCTL_INT(_net_link_epair, OID_AUTO, epair_debug, CTLFLAG_RW, 89 &epair_debug, 0, "if_epair(4) debugging."); 90 #define DPRINTF(fmt, arg...) \ 91 if (epair_debug) \ 92 printf("[%s:%d] " fmt, __func__, __LINE__, ##arg) 93 #else 94 #define DPRINTF(fmt, arg...) 95 #endif 96 97 static void epair_nh_sintr(struct mbuf *); 98 static struct mbuf *epair_nh_m2cpuid(struct mbuf *, uintptr_t, u_int *); 99 static void epair_nh_drainedcpu(u_int); 100 101 static void epair_start_locked(struct ifnet *); 102 static int epair_media_change(struct ifnet *); 103 static void epair_media_status(struct ifnet *, struct ifmediareq *); 104 105 static int epair_clone_match(struct if_clone *, const char *); 106 static int epair_clone_create(struct if_clone *, char *, size_t, caddr_t); 107 static int epair_clone_destroy(struct if_clone *, struct ifnet *); 108 109 static const char epairname[] = "epair"; 110 static unsigned int next_index = 0; 111 112 /* Netisr related definitions and sysctl. */ 113 static struct netisr_handler epair_nh = { 114 .nh_name = epairname, 115 .nh_proto = NETISR_EPAIR, 116 .nh_policy = NETISR_POLICY_CPU, 117 .nh_handler = epair_nh_sintr, 118 .nh_m2cpuid = epair_nh_m2cpuid, 119 .nh_drainedcpu = epair_nh_drainedcpu, 120 }; 121 122 static int 123 sysctl_epair_netisr_maxqlen(SYSCTL_HANDLER_ARGS) 124 { 125 int error, qlimit; 126 127 netisr_getqlimit(&epair_nh, &qlimit); 128 error = sysctl_handle_int(oidp, &qlimit, 0, req); 129 if (error || !req->newptr) 130 return (error); 131 if (qlimit < 1) 132 return (EINVAL); 133 return (netisr_setqlimit(&epair_nh, qlimit)); 134 } 135 SYSCTL_PROC(_net_link_epair, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW, 136 0, 0, sysctl_epair_netisr_maxqlen, "I", 137 "Maximum if_epair(4) netisr \"hw\" queue length"); 138 139 struct epair_softc { 140 struct ifnet *ifp; /* This ifp. */ 141 struct ifnet *oifp; /* other ifp of pair. */ 142 struct ifmedia media; /* Media config (fake). */ 143 u_int refcount; /* # of mbufs in flight. */ 144 u_int cpuid; /* CPU ID assigned upon creation. */ 145 void (*if_qflush)(struct ifnet *); 146 /* Original if_qflush routine. */ 147 }; 148 149 /* 150 * Per-CPU list of ifps with data in the ifq that needs to be flushed 151 * to the netisr ``hw'' queue before we allow any further direct queuing 152 * to the ``hw'' queue. 153 */ 154 struct epair_ifp_drain { 155 STAILQ_ENTRY(epair_ifp_drain) ifp_next; 156 struct ifnet *ifp; 157 }; 158 STAILQ_HEAD(eid_list, epair_ifp_drain); 159 160 #define EPAIR_LOCK_INIT(dpcpu) mtx_init(&(dpcpu)->if_epair_mtx, \ 161 "if_epair", NULL, MTX_DEF) 162 #define EPAIR_LOCK_DESTROY(dpcpu) mtx_destroy(&(dpcpu)->if_epair_mtx) 163 #define EPAIR_LOCK_ASSERT(dpcpu) mtx_assert(&(dpcpu)->if_epair_mtx, \ 164 MA_OWNED) 165 #define EPAIR_LOCK(dpcpu) mtx_lock(&(dpcpu)->if_epair_mtx) 166 #define EPAIR_UNLOCK(dpcpu) mtx_unlock(&(dpcpu)->if_epair_mtx) 167 168 #ifdef INVARIANTS 169 #define EPAIR_REFCOUNT_INIT(r, v) refcount_init((r), (v)) 170 #define EPAIR_REFCOUNT_AQUIRE(r) refcount_acquire((r)) 171 #define EPAIR_REFCOUNT_RELEASE(r) refcount_release((r)) 172 #define EPAIR_REFCOUNT_ASSERT(a, p) KASSERT(a, p) 173 #else 174 #define EPAIR_REFCOUNT_INIT(r, v) 175 #define EPAIR_REFCOUNT_AQUIRE(r) 176 #define EPAIR_REFCOUNT_RELEASE(r) 177 #define EPAIR_REFCOUNT_ASSERT(a, p) 178 #endif 179 180 static MALLOC_DEFINE(M_EPAIR, epairname, 181 "Pair of virtual cross-over connected Ethernet-like interfaces"); 182 183 VNET_DEFINE_STATIC(struct if_clone *, epair_cloner); 184 #define V_epair_cloner VNET(epair_cloner) 185 186 /* 187 * DPCPU area and functions. 188 */ 189 struct epair_dpcpu { 190 struct mtx if_epair_mtx; /* Per-CPU locking. */ 191 int epair_drv_flags; /* Per-CPU ``hw'' drv flags. */ 192 struct eid_list epair_ifp_drain_list; /* Per-CPU list of ifps with 193 * data in the ifq. */ 194 }; 195 DPCPU_DEFINE(struct epair_dpcpu, epair_dpcpu); 196 197 static void 198 epair_dpcpu_init(void) 199 { 200 struct epair_dpcpu *epair_dpcpu; 201 struct eid_list *s; 202 u_int cpuid; 203 204 CPU_FOREACH(cpuid) { 205 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 206 207 /* Initialize per-cpu lock. */ 208 EPAIR_LOCK_INIT(epair_dpcpu); 209 210 /* Driver flags are per-cpu as are our netisr "hw" queues. */ 211 epair_dpcpu->epair_drv_flags = 0; 212 213 /* 214 * Initialize per-cpu drain list. 215 * Manually do what STAILQ_HEAD_INITIALIZER would do. 216 */ 217 s = &epair_dpcpu->epair_ifp_drain_list; 218 s->stqh_first = NULL; 219 s->stqh_last = &s->stqh_first; 220 } 221 } 222 223 static void 224 epair_dpcpu_detach(void) 225 { 226 struct epair_dpcpu *epair_dpcpu; 227 u_int cpuid; 228 229 CPU_FOREACH(cpuid) { 230 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 231 232 /* Destroy per-cpu lock. */ 233 EPAIR_LOCK_DESTROY(epair_dpcpu); 234 } 235 } 236 237 /* 238 * Helper functions. 239 */ 240 static u_int 241 cpuid_from_ifp(struct ifnet *ifp) 242 { 243 struct epair_softc *sc; 244 245 if (ifp == NULL) 246 return (0); 247 sc = ifp->if_softc; 248 249 return (sc->cpuid); 250 } 251 252 /* 253 * Netisr handler functions. 254 */ 255 static void 256 epair_nh_sintr(struct mbuf *m) 257 { 258 struct ifnet *ifp; 259 struct epair_softc *sc __unused; 260 261 ifp = m->m_pkthdr.rcvif; 262 (*ifp->if_input)(ifp, m); 263 sc = ifp->if_softc; 264 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 265 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 266 ("%s: ifp=%p sc->refcount not >= 1: %d", 267 __func__, ifp, sc->refcount)); 268 DPRINTF("ifp=%p refcount=%u\n", ifp, sc->refcount); 269 } 270 271 static struct mbuf * 272 epair_nh_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid) 273 { 274 275 *cpuid = cpuid_from_ifp(m->m_pkthdr.rcvif); 276 277 return (m); 278 } 279 280 static void 281 epair_nh_drainedcpu(u_int cpuid) 282 { 283 struct epair_dpcpu *epair_dpcpu; 284 struct epair_ifp_drain *elm, *tvar; 285 struct ifnet *ifp; 286 287 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 288 EPAIR_LOCK(epair_dpcpu); 289 /* 290 * Assume our "hw" queue and possibly ifq will be emptied 291 * again. In case we will overflow the "hw" queue while 292 * draining, epair_start_locked will set IFF_DRV_OACTIVE 293 * again and we will stop and return. 294 */ 295 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list, 296 ifp_next, tvar) { 297 ifp = elm->ifp; 298 epair_dpcpu->epair_drv_flags &= ~IFF_DRV_OACTIVE; 299 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 300 epair_start_locked(ifp); 301 302 IFQ_LOCK(&ifp->if_snd); 303 if (IFQ_IS_EMPTY(&ifp->if_snd)) { 304 struct epair_softc *sc __unused; 305 306 STAILQ_REMOVE(&epair_dpcpu->epair_ifp_drain_list, 307 elm, epair_ifp_drain, ifp_next); 308 /* The cached ifp goes off the list. */ 309 sc = ifp->if_softc; 310 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 311 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 312 ("%s: ifp=%p sc->refcount not >= 1: %d", 313 __func__, ifp, sc->refcount)); 314 free(elm, M_EPAIR); 315 } 316 IFQ_UNLOCK(&ifp->if_snd); 317 318 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) != 0) { 319 /* Our "hw"q overflew again. */ 320 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 321 DPRINTF("hw queue length overflow at %u\n", 322 epair_nh.nh_qlimit); 323 break; 324 } 325 } 326 EPAIR_UNLOCK(epair_dpcpu); 327 } 328 329 /* 330 * Network interface (`if') related functions. 331 */ 332 static void 333 epair_remove_ifp_from_draining(struct ifnet *ifp) 334 { 335 struct epair_dpcpu *epair_dpcpu; 336 struct epair_ifp_drain *elm, *tvar; 337 u_int cpuid; 338 339 CPU_FOREACH(cpuid) { 340 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 341 EPAIR_LOCK(epair_dpcpu); 342 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list, 343 ifp_next, tvar) { 344 if (ifp == elm->ifp) { 345 struct epair_softc *sc __unused; 346 347 STAILQ_REMOVE( 348 &epair_dpcpu->epair_ifp_drain_list, elm, 349 epair_ifp_drain, ifp_next); 350 /* The cached ifp goes off the list. */ 351 sc = ifp->if_softc; 352 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 353 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 354 ("%s: ifp=%p sc->refcount not >= 1: %d", 355 __func__, ifp, sc->refcount)); 356 free(elm, M_EPAIR); 357 } 358 } 359 EPAIR_UNLOCK(epair_dpcpu); 360 } 361 } 362 363 static int 364 epair_add_ifp_for_draining(struct ifnet *ifp) 365 { 366 struct epair_dpcpu *epair_dpcpu; 367 struct epair_softc *sc; 368 struct epair_ifp_drain *elm = NULL; 369 370 sc = ifp->if_softc; 371 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 372 EPAIR_LOCK_ASSERT(epair_dpcpu); 373 STAILQ_FOREACH(elm, &epair_dpcpu->epair_ifp_drain_list, ifp_next) 374 if (elm->ifp == ifp) 375 break; 376 /* If the ifp is there already, return success. */ 377 if (elm != NULL) 378 return (0); 379 380 elm = malloc(sizeof(struct epair_ifp_drain), M_EPAIR, M_NOWAIT|M_ZERO); 381 if (elm == NULL) 382 return (ENOMEM); 383 384 elm->ifp = ifp; 385 /* Add a reference for the ifp pointer on the list. */ 386 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 387 STAILQ_INSERT_TAIL(&epair_dpcpu->epair_ifp_drain_list, elm, ifp_next); 388 389 return (0); 390 } 391 392 static void 393 epair_start_locked(struct ifnet *ifp) 394 { 395 struct epair_dpcpu *epair_dpcpu; 396 struct mbuf *m; 397 struct epair_softc *sc; 398 struct ifnet *oifp; 399 int error; 400 401 DPRINTF("ifp=%p\n", ifp); 402 sc = ifp->if_softc; 403 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 404 EPAIR_LOCK_ASSERT(epair_dpcpu); 405 406 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 407 return; 408 if ((ifp->if_flags & IFF_UP) == 0) 409 return; 410 411 /* 412 * We get packets here from ether_output via if_handoff() 413 * and need to put them into the input queue of the oifp 414 * and call oifp->if_input() via netisr/epair_sintr(). 415 */ 416 oifp = sc->oifp; 417 sc = oifp->if_softc; 418 for (;;) { 419 IFQ_DEQUEUE(&ifp->if_snd, m); 420 if (m == NULL) 421 break; 422 BPF_MTAP(ifp, m); 423 424 /* 425 * In case the outgoing interface is not usable, 426 * drop the packet. 427 */ 428 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 429 (oifp->if_flags & IFF_UP) ==0) { 430 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 431 m_freem(m); 432 continue; 433 } 434 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname); 435 436 /* 437 * Add a reference so the interface cannot go while the 438 * packet is in transit as we rely on rcvif to stay valid. 439 */ 440 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 441 m->m_pkthdr.rcvif = oifp; 442 CURVNET_SET_QUIET(oifp->if_vnet); 443 error = netisr_queue(NETISR_EPAIR, m); 444 CURVNET_RESTORE(); 445 if (!error) { 446 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 447 /* Someone else received the packet. */ 448 if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1); 449 } else { 450 /* The packet was freed already. */ 451 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 452 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 453 (void) epair_add_ifp_for_draining(ifp); 454 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 455 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 456 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 457 ("%s: ifp=%p sc->refcount not >= 1: %d", 458 __func__, oifp, sc->refcount)); 459 } 460 } 461 } 462 463 static void 464 epair_start(struct ifnet *ifp) 465 { 466 struct epair_dpcpu *epair_dpcpu; 467 468 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu); 469 EPAIR_LOCK(epair_dpcpu); 470 epair_start_locked(ifp); 471 EPAIR_UNLOCK(epair_dpcpu); 472 } 473 474 static int 475 epair_transmit_locked(struct ifnet *ifp, struct mbuf *m) 476 { 477 struct epair_dpcpu *epair_dpcpu; 478 struct epair_softc *sc; 479 struct ifnet *oifp; 480 int error, len; 481 short mflags; 482 483 DPRINTF("ifp=%p m=%p\n", ifp, m); 484 sc = ifp->if_softc; 485 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 486 EPAIR_LOCK_ASSERT(epair_dpcpu); 487 488 if (m == NULL) 489 return (0); 490 491 /* 492 * We are not going to use the interface en/dequeue mechanism 493 * on the TX side. We are called from ether_output_frame() 494 * and will put the packet into the incoming queue of the 495 * other interface of our pair via the netsir. 496 */ 497 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 498 m_freem(m); 499 return (ENXIO); 500 } 501 if ((ifp->if_flags & IFF_UP) == 0) { 502 m_freem(m); 503 return (ENETDOWN); 504 } 505 506 BPF_MTAP(ifp, m); 507 508 /* 509 * In case the outgoing interface is not usable, 510 * drop the packet. 511 */ 512 oifp = sc->oifp; 513 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 514 (oifp->if_flags & IFF_UP) ==0) { 515 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 516 m_freem(m); 517 return (0); 518 } 519 len = m->m_pkthdr.len; 520 mflags = m->m_flags; 521 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname); 522 523 #ifdef ALTQ 524 /* Support ALTQ via the classic if_start() path. */ 525 IF_LOCK(&ifp->if_snd); 526 if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 527 ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error); 528 if (error) 529 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 530 IF_UNLOCK(&ifp->if_snd); 531 if (!error) { 532 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 533 if (mflags & (M_BCAST|M_MCAST)) 534 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 535 536 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) 537 epair_start_locked(ifp); 538 else 539 (void)epair_add_ifp_for_draining(ifp); 540 } 541 return (error); 542 } 543 IF_UNLOCK(&ifp->if_snd); 544 #endif 545 546 if ((epair_dpcpu->epair_drv_flags & IFF_DRV_OACTIVE) != 0) { 547 /* 548 * Our hardware queue is full, try to fall back 549 * queuing to the ifq but do not call ifp->if_start. 550 * Either we are lucky or the packet is gone. 551 */ 552 IFQ_ENQUEUE(&ifp->if_snd, m, error); 553 if (!error) 554 (void)epair_add_ifp_for_draining(ifp); 555 return (error); 556 } 557 sc = oifp->if_softc; 558 /* 559 * Add a reference so the interface cannot go while the 560 * packet is in transit as we rely on rcvif to stay valid. 561 */ 562 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 563 m->m_pkthdr.rcvif = oifp; 564 CURVNET_SET_QUIET(oifp->if_vnet); 565 error = netisr_queue(NETISR_EPAIR, m); 566 CURVNET_RESTORE(); 567 if (!error) { 568 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 569 /* 570 * IFQ_HANDOFF_ADJ/ip_handoff() update statistics, 571 * but as we bypass all this we have to duplicate 572 * the logic another time. 573 */ 574 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 575 if (mflags & (M_BCAST|M_MCAST)) 576 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 577 /* Someone else received the packet. */ 578 if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1); 579 } else { 580 /* The packet was freed already. */ 581 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 582 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 583 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 584 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 585 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 586 ("%s: ifp=%p sc->refcount not >= 1: %d", 587 __func__, oifp, sc->refcount)); 588 } 589 590 return (error); 591 } 592 593 static int 594 epair_transmit(struct ifnet *ifp, struct mbuf *m) 595 { 596 struct epair_dpcpu *epair_dpcpu; 597 int error; 598 599 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu); 600 EPAIR_LOCK(epair_dpcpu); 601 error = epair_transmit_locked(ifp, m); 602 EPAIR_UNLOCK(epair_dpcpu); 603 return (error); 604 } 605 606 static void 607 epair_qflush(struct ifnet *ifp) 608 { 609 struct epair_softc *sc; 610 611 sc = ifp->if_softc; 612 KASSERT(sc != NULL, ("%s: ifp=%p, epair_softc gone? sc=%p\n", 613 __func__, ifp, sc)); 614 /* 615 * Remove this ifp from all backpointer lists. The interface will not 616 * usable for flushing anyway nor should it have anything to flush 617 * after if_qflush(). 618 */ 619 epair_remove_ifp_from_draining(ifp); 620 621 if (sc->if_qflush) 622 sc->if_qflush(ifp); 623 } 624 625 static int 626 epair_media_change(struct ifnet *ifp __unused) 627 { 628 629 /* Do nothing. */ 630 return (0); 631 } 632 633 static void 634 epair_media_status(struct ifnet *ifp __unused, struct ifmediareq *imr) 635 { 636 637 imr->ifm_status = IFM_AVALID | IFM_ACTIVE; 638 imr->ifm_active = IFM_ETHER | IFM_10G_T | IFM_FDX; 639 } 640 641 static int 642 epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 643 { 644 struct epair_softc *sc; 645 struct ifreq *ifr; 646 int error; 647 648 ifr = (struct ifreq *)data; 649 switch (cmd) { 650 case SIOCSIFFLAGS: 651 case SIOCADDMULTI: 652 case SIOCDELMULTI: 653 error = 0; 654 break; 655 656 case SIOCSIFMEDIA: 657 case SIOCGIFMEDIA: 658 sc = ifp->if_softc; 659 error = ifmedia_ioctl(ifp, ifr, &sc->media, cmd); 660 break; 661 662 case SIOCSIFMTU: 663 /* We basically allow all kinds of MTUs. */ 664 ifp->if_mtu = ifr->ifr_mtu; 665 error = 0; 666 break; 667 668 default: 669 /* Let the common ethernet handler process this. */ 670 error = ether_ioctl(ifp, cmd, data); 671 break; 672 } 673 674 return (error); 675 } 676 677 static void 678 epair_init(void *dummy __unused) 679 { 680 } 681 682 683 /* 684 * Interface cloning functions. 685 * We use our private ones so that we can create/destroy our secondary 686 * device along with the primary one. 687 */ 688 static int 689 epair_clone_match(struct if_clone *ifc, const char *name) 690 { 691 const char *cp; 692 693 DPRINTF("name='%s'\n", name); 694 695 /* 696 * Our base name is epair. 697 * Our interfaces will be named epair<n>[ab]. 698 * So accept anything of the following list: 699 * - epair 700 * - epair<n> 701 * but not the epair<n>[ab] versions. 702 */ 703 if (strncmp(epairname, name, sizeof(epairname)-1) != 0) 704 return (0); 705 706 for (cp = name + sizeof(epairname) - 1; *cp != '\0'; cp++) { 707 if (*cp < '0' || *cp > '9') 708 return (0); 709 } 710 711 return (1); 712 } 713 714 static void 715 epair_clone_add(struct if_clone *ifc, struct epair_softc *scb) 716 { 717 struct ifnet *ifp; 718 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ 719 720 ifp = scb->ifp; 721 /* Copy epairNa etheraddr and change the last byte. */ 722 memcpy(eaddr, scb->oifp->if_hw_addr, ETHER_ADDR_LEN); 723 eaddr[5] = 0x0b; 724 ether_ifattach(ifp, eaddr); 725 726 if_clone_addif(ifc, ifp); 727 } 728 729 static int 730 epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) 731 { 732 struct epair_softc *sca, *scb; 733 struct ifnet *ifp; 734 char *dp; 735 int error, unit, wildcard; 736 uint64_t hostid; 737 uint32_t key[3]; 738 uint32_t hash; 739 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ 740 741 /* Try to see if a special unit was requested. */ 742 error = ifc_name2unit(name, &unit); 743 if (error != 0) 744 return (error); 745 wildcard = (unit < 0); 746 747 error = ifc_alloc_unit(ifc, &unit); 748 if (error != 0) 749 return (error); 750 751 /* 752 * If no unit had been given, we need to adjust the ifName. 753 * Also make sure there is space for our extra [ab] suffix. 754 */ 755 for (dp = name; *dp != '\0'; dp++); 756 if (wildcard) { 757 error = snprintf(dp, len - (dp - name), "%d", unit); 758 if (error > len - (dp - name) - 1) { 759 /* ifName too long. */ 760 ifc_free_unit(ifc, unit); 761 return (ENOSPC); 762 } 763 dp += error; 764 } 765 if (len - (dp - name) - 1 < 1) { 766 /* No space left for our [ab] suffix. */ 767 ifc_free_unit(ifc, unit); 768 return (ENOSPC); 769 } 770 *dp = 'b'; 771 /* Must not change dp so we can replace 'a' by 'b' later. */ 772 *(dp+1) = '\0'; 773 774 /* Check if 'a' and 'b' interfaces already exist. */ 775 if (ifunit(name) != NULL) 776 return (EEXIST); 777 *dp = 'a'; 778 if (ifunit(name) != NULL) 779 return (EEXIST); 780 781 /* Allocate memory for both [ab] interfaces */ 782 sca = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 783 EPAIR_REFCOUNT_INIT(&sca->refcount, 1); 784 sca->ifp = if_alloc(IFT_ETHER); 785 if (sca->ifp == NULL) { 786 free(sca, M_EPAIR); 787 ifc_free_unit(ifc, unit); 788 return (ENOSPC); 789 } 790 791 scb = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 792 EPAIR_REFCOUNT_INIT(&scb->refcount, 1); 793 scb->ifp = if_alloc(IFT_ETHER); 794 if (scb->ifp == NULL) { 795 free(scb, M_EPAIR); 796 if_free(sca->ifp); 797 free(sca, M_EPAIR); 798 ifc_free_unit(ifc, unit); 799 return (ENOSPC); 800 } 801 802 /* 803 * Cross-reference the interfaces so we will be able to free both. 804 */ 805 sca->oifp = scb->ifp; 806 scb->oifp = sca->ifp; 807 808 /* 809 * Calculate the cpuid for netisr queueing based on the 810 * ifIndex of the interfaces. As long as we cannot configure 811 * this or use cpuset information easily we cannot guarantee 812 * cache locality but we can at least allow parallelism. 813 */ 814 sca->cpuid = 815 netisr_get_cpuid(sca->ifp->if_index); 816 scb->cpuid = 817 netisr_get_cpuid(scb->ifp->if_index); 818 819 /* Initialise pseudo media types. */ 820 ifmedia_init(&sca->media, 0, epair_media_change, epair_media_status); 821 ifmedia_add(&sca->media, IFM_ETHER | IFM_10G_T, 0, NULL); 822 ifmedia_set(&sca->media, IFM_ETHER | IFM_10G_T); 823 ifmedia_init(&scb->media, 0, epair_media_change, epair_media_status); 824 ifmedia_add(&scb->media, IFM_ETHER | IFM_10G_T, 0, NULL); 825 ifmedia_set(&scb->media, IFM_ETHER | IFM_10G_T); 826 827 /* Finish initialization of interface <n>a. */ 828 ifp = sca->ifp; 829 ifp->if_softc = sca; 830 strlcpy(ifp->if_xname, name, IFNAMSIZ); 831 ifp->if_dname = epairname; 832 ifp->if_dunit = unit; 833 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 834 ifp->if_capabilities = IFCAP_VLAN_MTU; 835 ifp->if_capenable = IFCAP_VLAN_MTU; 836 ifp->if_start = epair_start; 837 ifp->if_ioctl = epair_ioctl; 838 ifp->if_init = epair_init; 839 if_setsendqlen(ifp, ifqmaxlen); 840 if_setsendqready(ifp); 841 842 /* 843 * Calculate the etheraddr hashing the hostid and the 844 * interface index. The result would be hopefully unique. 845 * Note that the "a" component of an epair instance may get moved 846 * to a different VNET after creation. In that case its index 847 * will be freed and the index can get reused by new epair instance. 848 * Make sure we do not create same etheraddr again. 849 */ 850 getcredhostid(curthread->td_ucred, (unsigned long *)&hostid); 851 if (hostid == 0) 852 arc4rand(&hostid, sizeof(hostid), 0); 853 854 if (ifp->if_index > next_index) 855 next_index = ifp->if_index; 856 else 857 next_index++; 858 859 key[0] = (uint32_t)next_index; 860 key[1] = (uint32_t)(hostid & 0xffffffff); 861 key[2] = (uint32_t)((hostid >> 32) & 0xfffffffff); 862 hash = jenkins_hash32(key, 3, 0); 863 864 eaddr[0] = 0x02; 865 memcpy(&eaddr[1], &hash, 4); 866 eaddr[5] = 0x0a; 867 ether_ifattach(ifp, eaddr); 868 sca->if_qflush = ifp->if_qflush; 869 ifp->if_qflush = epair_qflush; 870 ifp->if_transmit = epair_transmit; 871 ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */ 872 873 /* Swap the name and finish initialization of interface <n>b. */ 874 *dp = 'b'; 875 876 ifp = scb->ifp; 877 ifp->if_softc = scb; 878 strlcpy(ifp->if_xname, name, IFNAMSIZ); 879 ifp->if_dname = epairname; 880 ifp->if_dunit = unit; 881 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 882 ifp->if_capabilities = IFCAP_VLAN_MTU; 883 ifp->if_capenable = IFCAP_VLAN_MTU; 884 ifp->if_start = epair_start; 885 ifp->if_ioctl = epair_ioctl; 886 ifp->if_init = epair_init; 887 if_setsendqlen(ifp, ifqmaxlen); 888 if_setsendqready(ifp); 889 /* We need to play some tricks here for the second interface. */ 890 strlcpy(name, epairname, len); 891 892 /* Correctly set the name for the cloner list. */ 893 strlcpy(name, scb->ifp->if_xname, len); 894 epair_clone_add(ifc, scb); 895 896 scb->if_qflush = ifp->if_qflush; 897 ifp->if_qflush = epair_qflush; 898 ifp->if_transmit = epair_transmit; 899 ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */ 900 901 /* 902 * Restore name to <n>a as the ifp for this will go into the 903 * cloner list for the initial call. 904 */ 905 strlcpy(name, sca->ifp->if_xname, len); 906 DPRINTF("name='%s/%db' created sca=%p scb=%p\n", name, unit, sca, scb); 907 908 /* Tell the world, that we are ready to rock. */ 909 sca->ifp->if_drv_flags |= IFF_DRV_RUNNING; 910 scb->ifp->if_drv_flags |= IFF_DRV_RUNNING; 911 if_link_state_change(sca->ifp, LINK_STATE_UP); 912 if_link_state_change(scb->ifp, LINK_STATE_UP); 913 914 return (0); 915 } 916 917 static int 918 epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) 919 { 920 struct ifnet *oifp; 921 struct epair_softc *sca, *scb; 922 int unit, error; 923 924 DPRINTF("ifp=%p\n", ifp); 925 926 /* 927 * In case we called into if_clone_destroyif() ourselves 928 * again to remove the second interface, the softc will be 929 * NULL. In that case so not do anything but return success. 930 */ 931 if (ifp->if_softc == NULL) 932 return (0); 933 934 unit = ifp->if_dunit; 935 sca = ifp->if_softc; 936 oifp = sca->oifp; 937 scb = oifp->if_softc; 938 939 DPRINTF("ifp=%p oifp=%p\n", ifp, oifp); 940 if_link_state_change(ifp, LINK_STATE_DOWN); 941 if_link_state_change(oifp, LINK_STATE_DOWN); 942 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 943 oifp->if_drv_flags &= ~IFF_DRV_RUNNING; 944 945 /* 946 * Get rid of our second half. As the other of the two 947 * interfaces may reside in a different vnet, we need to 948 * switch before freeing them. 949 */ 950 CURVNET_SET_QUIET(oifp->if_vnet); 951 ether_ifdetach(oifp); 952 /* 953 * Wait for all packets to be dispatched to if_input. 954 * The numbers can only go down as the interface is 955 * detached so there is no need to use atomics. 956 */ 957 DPRINTF("scb refcnt=%u\n", scb->refcount); 958 EPAIR_REFCOUNT_ASSERT(scb->refcount == 1, 959 ("%s: ifp=%p scb->refcount!=1: %d", __func__, oifp, scb->refcount)); 960 oifp->if_softc = NULL; 961 error = if_clone_destroyif(ifc, oifp); 962 if (error) 963 panic("%s: if_clone_destroyif() for our 2nd iface failed: %d", 964 __func__, error); 965 if_free(oifp); 966 ifmedia_removeall(&scb->media); 967 free(scb, M_EPAIR); 968 CURVNET_RESTORE(); 969 970 ether_ifdetach(ifp); 971 /* 972 * Wait for all packets to be dispatched to if_input. 973 */ 974 DPRINTF("sca refcnt=%u\n", sca->refcount); 975 EPAIR_REFCOUNT_ASSERT(sca->refcount == 1, 976 ("%s: ifp=%p sca->refcount!=1: %d", __func__, ifp, sca->refcount)); 977 if_free(ifp); 978 ifmedia_removeall(&sca->media); 979 free(sca, M_EPAIR); 980 ifc_free_unit(ifc, unit); 981 982 return (0); 983 } 984 985 static void 986 vnet_epair_init(const void *unused __unused) 987 { 988 989 V_epair_cloner = if_clone_advanced(epairname, 0, 990 epair_clone_match, epair_clone_create, epair_clone_destroy); 991 #ifdef VIMAGE 992 netisr_register_vnet(&epair_nh); 993 #endif 994 } 995 VNET_SYSINIT(vnet_epair_init, SI_SUB_PSEUDO, SI_ORDER_ANY, 996 vnet_epair_init, NULL); 997 998 static void 999 vnet_epair_uninit(const void *unused __unused) 1000 { 1001 1002 #ifdef VIMAGE 1003 netisr_unregister_vnet(&epair_nh); 1004 #endif 1005 if_clone_detach(V_epair_cloner); 1006 } 1007 VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, 1008 vnet_epair_uninit, NULL); 1009 1010 static void 1011 epair_uninit(const void *unused __unused) 1012 { 1013 netisr_unregister(&epair_nh); 1014 epair_dpcpu_detach(); 1015 if (bootverbose) 1016 printf("%s unloaded.\n", epairname); 1017 } 1018 SYSUNINIT(epair_uninit, SI_SUB_INIT_IF, SI_ORDER_MIDDLE, 1019 epair_uninit, NULL); 1020 1021 static int 1022 epair_modevent(module_t mod, int type, void *data) 1023 { 1024 int qlimit; 1025 1026 switch (type) { 1027 case MOD_LOAD: 1028 /* For now limit us to one global mutex and one inq. */ 1029 epair_dpcpu_init(); 1030 epair_nh.nh_qlimit = 42 * ifqmaxlen; /* 42 shall be the number. */ 1031 if (TUNABLE_INT_FETCH("net.link.epair.netisr_maxqlen", &qlimit)) 1032 epair_nh.nh_qlimit = qlimit; 1033 netisr_register(&epair_nh); 1034 if (bootverbose) 1035 printf("%s initialized.\n", epairname); 1036 break; 1037 case MOD_UNLOAD: 1038 /* Handled in epair_uninit() */ 1039 break; 1040 default: 1041 return (EOPNOTSUPP); 1042 } 1043 return (0); 1044 } 1045 1046 static moduledata_t epair_mod = { 1047 "if_epair", 1048 epair_modevent, 1049 0 1050 }; 1051 1052 DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE); 1053 MODULE_VERSION(if_epair, 1); 1054