1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2008 The FreeBSD Foundation 5 * Copyright (c) 2009-2010 Bjoern A. Zeeb <bz@FreeBSD.org> 6 * All rights reserved. 7 * 8 * This software was developed by CK Software GmbH under sponsorship 9 * from the FreeBSD Foundation. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * A pair of virtual back-to-back connected ethernet like interfaces 35 * (``two interfaces with a virtual cross-over cable''). 36 * 37 * This is mostly intended to be used to provide connectivity between 38 * different virtual network stack instances. 39 */ 40 /* 41 * Things to re-think once we have more experience: 42 * - ifp->if_reassign function once we can test with vimage. Depending on 43 * how if_vmove() is going to be improved. 44 * - Real random etheraddrs that are checked to be uniquish; we would need 45 * to re-do them in case we move the interface between network stacks 46 * in a private if_reassign function. 47 * In case we bridge to a real interface/network or between indepedent 48 * epairs on multiple stacks/machines, we may need this. 49 * For now let the user handle that case. 50 */ 51 52 #include <sys/cdefs.h> 53 __FBSDID("$FreeBSD$"); 54 55 #include <sys/param.h> 56 #include <sys/hash.h> 57 #include <sys/jail.h> 58 #include <sys/kernel.h> 59 #include <sys/libkern.h> 60 #include <sys/malloc.h> 61 #include <sys/mbuf.h> 62 #include <sys/module.h> 63 #include <sys/proc.h> 64 #include <sys/refcount.h> 65 #include <sys/queue.h> 66 #include <sys/smp.h> 67 #include <sys/socket.h> 68 #include <sys/sockio.h> 69 #include <sys/sysctl.h> 70 #include <sys/types.h> 71 72 #include <net/bpf.h> 73 #include <net/ethernet.h> 74 #include <net/if.h> 75 #include <net/if_var.h> 76 #include <net/if_clone.h> 77 #include <net/if_media.h> 78 #include <net/if_var.h> 79 #include <net/if_types.h> 80 #include <net/netisr.h> 81 #include <net/vnet.h> 82 83 SYSCTL_DECL(_net_link); 84 static SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW, 0, "epair sysctl"); 85 86 #ifdef EPAIR_DEBUG 87 static int epair_debug = 0; 88 SYSCTL_INT(_net_link_epair, OID_AUTO, epair_debug, CTLFLAG_RW, 89 &epair_debug, 0, "if_epair(4) debugging."); 90 #define DPRINTF(fmt, arg...) \ 91 if (epair_debug) \ 92 printf("[%s:%d] " fmt, __func__, __LINE__, ##arg) 93 #else 94 #define DPRINTF(fmt, arg...) 95 #endif 96 97 static void epair_nh_sintr(struct mbuf *); 98 static struct mbuf *epair_nh_m2cpuid(struct mbuf *, uintptr_t, u_int *); 99 static void epair_nh_drainedcpu(u_int); 100 101 static void epair_start_locked(struct ifnet *); 102 static int epair_media_change(struct ifnet *); 103 static void epair_media_status(struct ifnet *, struct ifmediareq *); 104 105 static int epair_clone_match(struct if_clone *, const char *); 106 static int epair_clone_create(struct if_clone *, char *, size_t, caddr_t); 107 static int epair_clone_destroy(struct if_clone *, struct ifnet *); 108 109 static const char epairname[] = "epair"; 110 111 /* Netisr related definitions and sysctl. */ 112 static struct netisr_handler epair_nh = { 113 .nh_name = epairname, 114 .nh_proto = NETISR_EPAIR, 115 .nh_policy = NETISR_POLICY_CPU, 116 .nh_handler = epair_nh_sintr, 117 .nh_m2cpuid = epair_nh_m2cpuid, 118 .nh_drainedcpu = epair_nh_drainedcpu, 119 }; 120 121 static int 122 sysctl_epair_netisr_maxqlen(SYSCTL_HANDLER_ARGS) 123 { 124 int error, qlimit; 125 126 netisr_getqlimit(&epair_nh, &qlimit); 127 error = sysctl_handle_int(oidp, &qlimit, 0, req); 128 if (error || !req->newptr) 129 return (error); 130 if (qlimit < 1) 131 return (EINVAL); 132 return (netisr_setqlimit(&epair_nh, qlimit)); 133 } 134 SYSCTL_PROC(_net_link_epair, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW, 135 0, 0, sysctl_epair_netisr_maxqlen, "I", 136 "Maximum if_epair(4) netisr \"hw\" queue length"); 137 138 struct epair_softc { 139 struct ifnet *ifp; /* This ifp. */ 140 struct ifnet *oifp; /* other ifp of pair. */ 141 struct ifmedia media; /* Media config (fake). */ 142 u_int refcount; /* # of mbufs in flight. */ 143 u_int cpuid; /* CPU ID assigned upon creation. */ 144 void (*if_qflush)(struct ifnet *); 145 /* Original if_qflush routine. */ 146 }; 147 148 /* 149 * Per-CPU list of ifps with data in the ifq that needs to be flushed 150 * to the netisr ``hw'' queue before we allow any further direct queuing 151 * to the ``hw'' queue. 152 */ 153 struct epair_ifp_drain { 154 STAILQ_ENTRY(epair_ifp_drain) ifp_next; 155 struct ifnet *ifp; 156 }; 157 STAILQ_HEAD(eid_list, epair_ifp_drain); 158 159 #define EPAIR_LOCK_INIT(dpcpu) mtx_init(&(dpcpu)->if_epair_mtx, \ 160 "if_epair", NULL, MTX_DEF) 161 #define EPAIR_LOCK_DESTROY(dpcpu) mtx_destroy(&(dpcpu)->if_epair_mtx) 162 #define EPAIR_LOCK_ASSERT(dpcpu) mtx_assert(&(dpcpu)->if_epair_mtx, \ 163 MA_OWNED) 164 #define EPAIR_LOCK(dpcpu) mtx_lock(&(dpcpu)->if_epair_mtx) 165 #define EPAIR_UNLOCK(dpcpu) mtx_unlock(&(dpcpu)->if_epair_mtx) 166 167 #ifdef INVARIANTS 168 #define EPAIR_REFCOUNT_INIT(r, v) refcount_init((r), (v)) 169 #define EPAIR_REFCOUNT_AQUIRE(r) refcount_acquire((r)) 170 #define EPAIR_REFCOUNT_RELEASE(r) refcount_release((r)) 171 #define EPAIR_REFCOUNT_ASSERT(a, p) KASSERT(a, p) 172 #else 173 #define EPAIR_REFCOUNT_INIT(r, v) 174 #define EPAIR_REFCOUNT_AQUIRE(r) 175 #define EPAIR_REFCOUNT_RELEASE(r) 176 #define EPAIR_REFCOUNT_ASSERT(a, p) 177 #endif 178 179 static MALLOC_DEFINE(M_EPAIR, epairname, 180 "Pair of virtual cross-over connected Ethernet-like interfaces"); 181 182 static VNET_DEFINE(struct if_clone *, epair_cloner); 183 #define V_epair_cloner VNET(epair_cloner) 184 185 /* 186 * DPCPU area and functions. 187 */ 188 struct epair_dpcpu { 189 struct mtx if_epair_mtx; /* Per-CPU locking. */ 190 int epair_drv_flags; /* Per-CPU ``hw'' drv flags. */ 191 struct eid_list epair_ifp_drain_list; /* Per-CPU list of ifps with 192 * data in the ifq. */ 193 }; 194 DPCPU_DEFINE(struct epair_dpcpu, epair_dpcpu); 195 196 static void 197 epair_dpcpu_init(void) 198 { 199 struct epair_dpcpu *epair_dpcpu; 200 struct eid_list *s; 201 u_int cpuid; 202 203 CPU_FOREACH(cpuid) { 204 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 205 206 /* Initialize per-cpu lock. */ 207 EPAIR_LOCK_INIT(epair_dpcpu); 208 209 /* Driver flags are per-cpu as are our netisr "hw" queues. */ 210 epair_dpcpu->epair_drv_flags = 0; 211 212 /* 213 * Initialize per-cpu drain list. 214 * Manually do what STAILQ_HEAD_INITIALIZER would do. 215 */ 216 s = &epair_dpcpu->epair_ifp_drain_list; 217 s->stqh_first = NULL; 218 s->stqh_last = &s->stqh_first; 219 } 220 } 221 222 static void 223 epair_dpcpu_detach(void) 224 { 225 struct epair_dpcpu *epair_dpcpu; 226 u_int cpuid; 227 228 CPU_FOREACH(cpuid) { 229 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 230 231 /* Destroy per-cpu lock. */ 232 EPAIR_LOCK_DESTROY(epair_dpcpu); 233 } 234 } 235 236 /* 237 * Helper functions. 238 */ 239 static u_int 240 cpuid_from_ifp(struct ifnet *ifp) 241 { 242 struct epair_softc *sc; 243 244 if (ifp == NULL) 245 return (0); 246 sc = ifp->if_softc; 247 248 return (sc->cpuid); 249 } 250 251 /* 252 * Netisr handler functions. 253 */ 254 static void 255 epair_nh_sintr(struct mbuf *m) 256 { 257 struct ifnet *ifp; 258 struct epair_softc *sc __unused; 259 260 ifp = m->m_pkthdr.rcvif; 261 (*ifp->if_input)(ifp, m); 262 sc = ifp->if_softc; 263 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 264 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 265 ("%s: ifp=%p sc->refcount not >= 1: %d", 266 __func__, ifp, sc->refcount)); 267 DPRINTF("ifp=%p refcount=%u\n", ifp, sc->refcount); 268 } 269 270 static struct mbuf * 271 epair_nh_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid) 272 { 273 274 *cpuid = cpuid_from_ifp(m->m_pkthdr.rcvif); 275 276 return (m); 277 } 278 279 static void 280 epair_nh_drainedcpu(u_int cpuid) 281 { 282 struct epair_dpcpu *epair_dpcpu; 283 struct epair_ifp_drain *elm, *tvar; 284 struct ifnet *ifp; 285 286 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 287 EPAIR_LOCK(epair_dpcpu); 288 /* 289 * Assume our "hw" queue and possibly ifq will be emptied 290 * again. In case we will overflow the "hw" queue while 291 * draining, epair_start_locked will set IFF_DRV_OACTIVE 292 * again and we will stop and return. 293 */ 294 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list, 295 ifp_next, tvar) { 296 ifp = elm->ifp; 297 epair_dpcpu->epair_drv_flags &= ~IFF_DRV_OACTIVE; 298 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 299 epair_start_locked(ifp); 300 301 IFQ_LOCK(&ifp->if_snd); 302 if (IFQ_IS_EMPTY(&ifp->if_snd)) { 303 struct epair_softc *sc __unused; 304 305 STAILQ_REMOVE(&epair_dpcpu->epair_ifp_drain_list, 306 elm, epair_ifp_drain, ifp_next); 307 /* The cached ifp goes off the list. */ 308 sc = ifp->if_softc; 309 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 310 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 311 ("%s: ifp=%p sc->refcount not >= 1: %d", 312 __func__, ifp, sc->refcount)); 313 free(elm, M_EPAIR); 314 } 315 IFQ_UNLOCK(&ifp->if_snd); 316 317 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) != 0) { 318 /* Our "hw"q overflew again. */ 319 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 320 DPRINTF("hw queue length overflow at %u\n", 321 epair_nh.nh_qlimit); 322 break; 323 } 324 } 325 EPAIR_UNLOCK(epair_dpcpu); 326 } 327 328 /* 329 * Network interface (`if') related functions. 330 */ 331 static void 332 epair_remove_ifp_from_draining(struct ifnet *ifp) 333 { 334 struct epair_dpcpu *epair_dpcpu; 335 struct epair_ifp_drain *elm, *tvar; 336 u_int cpuid; 337 338 CPU_FOREACH(cpuid) { 339 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 340 EPAIR_LOCK(epair_dpcpu); 341 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list, 342 ifp_next, tvar) { 343 if (ifp == elm->ifp) { 344 struct epair_softc *sc __unused; 345 346 STAILQ_REMOVE( 347 &epair_dpcpu->epair_ifp_drain_list, elm, 348 epair_ifp_drain, ifp_next); 349 /* The cached ifp goes off the list. */ 350 sc = ifp->if_softc; 351 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 352 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 353 ("%s: ifp=%p sc->refcount not >= 1: %d", 354 __func__, ifp, sc->refcount)); 355 free(elm, M_EPAIR); 356 } 357 } 358 EPAIR_UNLOCK(epair_dpcpu); 359 } 360 } 361 362 static int 363 epair_add_ifp_for_draining(struct ifnet *ifp) 364 { 365 struct epair_dpcpu *epair_dpcpu; 366 struct epair_softc *sc; 367 struct epair_ifp_drain *elm = NULL; 368 369 sc = ifp->if_softc; 370 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 371 EPAIR_LOCK_ASSERT(epair_dpcpu); 372 STAILQ_FOREACH(elm, &epair_dpcpu->epair_ifp_drain_list, ifp_next) 373 if (elm->ifp == ifp) 374 break; 375 /* If the ifp is there already, return success. */ 376 if (elm != NULL) 377 return (0); 378 379 elm = malloc(sizeof(struct epair_ifp_drain), M_EPAIR, M_NOWAIT|M_ZERO); 380 if (elm == NULL) 381 return (ENOMEM); 382 383 elm->ifp = ifp; 384 /* Add a reference for the ifp pointer on the list. */ 385 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 386 STAILQ_INSERT_TAIL(&epair_dpcpu->epair_ifp_drain_list, elm, ifp_next); 387 388 return (0); 389 } 390 391 static void 392 epair_start_locked(struct ifnet *ifp) 393 { 394 struct epair_dpcpu *epair_dpcpu; 395 struct mbuf *m; 396 struct epair_softc *sc; 397 struct ifnet *oifp; 398 int error; 399 400 DPRINTF("ifp=%p\n", ifp); 401 sc = ifp->if_softc; 402 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 403 EPAIR_LOCK_ASSERT(epair_dpcpu); 404 405 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 406 return; 407 if ((ifp->if_flags & IFF_UP) == 0) 408 return; 409 410 /* 411 * We get packets here from ether_output via if_handoff() 412 * and need to put them into the input queue of the oifp 413 * and call oifp->if_input() via netisr/epair_sintr(). 414 */ 415 oifp = sc->oifp; 416 sc = oifp->if_softc; 417 for (;;) { 418 IFQ_DEQUEUE(&ifp->if_snd, m); 419 if (m == NULL) 420 break; 421 BPF_MTAP(ifp, m); 422 423 /* 424 * In case the outgoing interface is not usable, 425 * drop the packet. 426 */ 427 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 428 (oifp->if_flags & IFF_UP) ==0) { 429 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 430 m_freem(m); 431 continue; 432 } 433 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname); 434 435 /* 436 * Add a reference so the interface cannot go while the 437 * packet is in transit as we rely on rcvif to stay valid. 438 */ 439 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 440 m->m_pkthdr.rcvif = oifp; 441 CURVNET_SET_QUIET(oifp->if_vnet); 442 error = netisr_queue(NETISR_EPAIR, m); 443 CURVNET_RESTORE(); 444 if (!error) { 445 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 446 /* Someone else received the packet. */ 447 if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1); 448 } else { 449 /* The packet was freed already. */ 450 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 451 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 452 (void) epair_add_ifp_for_draining(ifp); 453 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 454 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 455 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 456 ("%s: ifp=%p sc->refcount not >= 1: %d", 457 __func__, oifp, sc->refcount)); 458 } 459 } 460 } 461 462 static void 463 epair_start(struct ifnet *ifp) 464 { 465 struct epair_dpcpu *epair_dpcpu; 466 467 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu); 468 EPAIR_LOCK(epair_dpcpu); 469 epair_start_locked(ifp); 470 EPAIR_UNLOCK(epair_dpcpu); 471 } 472 473 static int 474 epair_transmit_locked(struct ifnet *ifp, struct mbuf *m) 475 { 476 struct epair_dpcpu *epair_dpcpu; 477 struct epair_softc *sc; 478 struct ifnet *oifp; 479 int error, len; 480 short mflags; 481 482 DPRINTF("ifp=%p m=%p\n", ifp, m); 483 sc = ifp->if_softc; 484 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 485 EPAIR_LOCK_ASSERT(epair_dpcpu); 486 487 if (m == NULL) 488 return (0); 489 490 /* 491 * We are not going to use the interface en/dequeue mechanism 492 * on the TX side. We are called from ether_output_frame() 493 * and will put the packet into the incoming queue of the 494 * other interface of our pair via the netsir. 495 */ 496 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 497 m_freem(m); 498 return (ENXIO); 499 } 500 if ((ifp->if_flags & IFF_UP) == 0) { 501 m_freem(m); 502 return (ENETDOWN); 503 } 504 505 BPF_MTAP(ifp, m); 506 507 /* 508 * In case the outgoing interface is not usable, 509 * drop the packet. 510 */ 511 oifp = sc->oifp; 512 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 513 (oifp->if_flags & IFF_UP) ==0) { 514 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 515 m_freem(m); 516 return (0); 517 } 518 len = m->m_pkthdr.len; 519 mflags = m->m_flags; 520 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname); 521 522 #ifdef ALTQ 523 /* Support ALTQ via the classic if_start() path. */ 524 IF_LOCK(&ifp->if_snd); 525 if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 526 ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error); 527 if (error) 528 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 529 IF_UNLOCK(&ifp->if_snd); 530 if (!error) { 531 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 532 if (mflags & (M_BCAST|M_MCAST)) 533 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 534 535 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) 536 epair_start_locked(ifp); 537 else 538 (void)epair_add_ifp_for_draining(ifp); 539 } 540 return (error); 541 } 542 IF_UNLOCK(&ifp->if_snd); 543 #endif 544 545 if ((epair_dpcpu->epair_drv_flags & IFF_DRV_OACTIVE) != 0) { 546 /* 547 * Our hardware queue is full, try to fall back 548 * queuing to the ifq but do not call ifp->if_start. 549 * Either we are lucky or the packet is gone. 550 */ 551 IFQ_ENQUEUE(&ifp->if_snd, m, error); 552 if (!error) 553 (void)epair_add_ifp_for_draining(ifp); 554 return (error); 555 } 556 sc = oifp->if_softc; 557 /* 558 * Add a reference so the interface cannot go while the 559 * packet is in transit as we rely on rcvif to stay valid. 560 */ 561 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 562 m->m_pkthdr.rcvif = oifp; 563 CURVNET_SET_QUIET(oifp->if_vnet); 564 error = netisr_queue(NETISR_EPAIR, m); 565 CURVNET_RESTORE(); 566 if (!error) { 567 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 568 /* 569 * IFQ_HANDOFF_ADJ/ip_handoff() update statistics, 570 * but as we bypass all this we have to duplicate 571 * the logic another time. 572 */ 573 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 574 if (mflags & (M_BCAST|M_MCAST)) 575 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 576 /* Someone else received the packet. */ 577 if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1); 578 } else { 579 /* The packet was freed already. */ 580 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 581 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 582 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 583 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 584 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 585 ("%s: ifp=%p sc->refcount not >= 1: %d", 586 __func__, oifp, sc->refcount)); 587 } 588 589 return (error); 590 } 591 592 static int 593 epair_transmit(struct ifnet *ifp, struct mbuf *m) 594 { 595 struct epair_dpcpu *epair_dpcpu; 596 int error; 597 598 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu); 599 EPAIR_LOCK(epair_dpcpu); 600 error = epair_transmit_locked(ifp, m); 601 EPAIR_UNLOCK(epair_dpcpu); 602 return (error); 603 } 604 605 static void 606 epair_qflush(struct ifnet *ifp) 607 { 608 struct epair_softc *sc; 609 610 sc = ifp->if_softc; 611 KASSERT(sc != NULL, ("%s: ifp=%p, epair_softc gone? sc=%p\n", 612 __func__, ifp, sc)); 613 /* 614 * Remove this ifp from all backpointer lists. The interface will not 615 * usable for flushing anyway nor should it have anything to flush 616 * after if_qflush(). 617 */ 618 epair_remove_ifp_from_draining(ifp); 619 620 if (sc->if_qflush) 621 sc->if_qflush(ifp); 622 } 623 624 static int 625 epair_media_change(struct ifnet *ifp __unused) 626 { 627 628 /* Do nothing. */ 629 return (0); 630 } 631 632 static void 633 epair_media_status(struct ifnet *ifp __unused, struct ifmediareq *imr) 634 { 635 636 imr->ifm_status = IFM_AVALID | IFM_ACTIVE; 637 imr->ifm_active = IFM_ETHER | IFM_10G_T | IFM_FDX; 638 } 639 640 static int 641 epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 642 { 643 struct epair_softc *sc; 644 struct ifreq *ifr; 645 int error; 646 647 ifr = (struct ifreq *)data; 648 switch (cmd) { 649 case SIOCSIFFLAGS: 650 case SIOCADDMULTI: 651 case SIOCDELMULTI: 652 error = 0; 653 break; 654 655 case SIOCSIFMEDIA: 656 case SIOCGIFMEDIA: 657 sc = ifp->if_softc; 658 error = ifmedia_ioctl(ifp, ifr, &sc->media, cmd); 659 break; 660 661 case SIOCSIFMTU: 662 /* We basically allow all kinds of MTUs. */ 663 ifp->if_mtu = ifr->ifr_mtu; 664 error = 0; 665 break; 666 667 default: 668 /* Let the common ethernet handler process this. */ 669 error = ether_ioctl(ifp, cmd, data); 670 break; 671 } 672 673 return (error); 674 } 675 676 static void 677 epair_init(void *dummy __unused) 678 { 679 } 680 681 682 /* 683 * Interface cloning functions. 684 * We use our private ones so that we can create/destroy our secondary 685 * device along with the primary one. 686 */ 687 static int 688 epair_clone_match(struct if_clone *ifc, const char *name) 689 { 690 const char *cp; 691 692 DPRINTF("name='%s'\n", name); 693 694 /* 695 * Our base name is epair. 696 * Our interfaces will be named epair<n>[ab]. 697 * So accept anything of the following list: 698 * - epair 699 * - epair<n> 700 * but not the epair<n>[ab] versions. 701 */ 702 if (strncmp(epairname, name, sizeof(epairname)-1) != 0) 703 return (0); 704 705 for (cp = name + sizeof(epairname) - 1; *cp != '\0'; cp++) { 706 if (*cp < '0' || *cp > '9') 707 return (0); 708 } 709 710 return (1); 711 } 712 713 static int 714 epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) 715 { 716 struct epair_softc *sca, *scb; 717 struct ifnet *ifp; 718 char *dp; 719 int error, unit, wildcard; 720 uint64_t hostid; 721 uint32_t key[3]; 722 uint32_t hash; 723 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ 724 725 /* 726 * We are abusing params to create our second interface. 727 * Actually we already created it and called if_clone_create() 728 * for it to do the official insertion procedure the moment we knew 729 * it cannot fail anymore. So just do attach it here. 730 */ 731 if (params) { 732 scb = (struct epair_softc *)params; 733 ifp = scb->ifp; 734 /* Copy epairNa etheraddr and change the last byte. */ 735 memcpy(eaddr, scb->oifp->if_hw_addr, ETHER_ADDR_LEN); 736 eaddr[5] = 0x0b; 737 ether_ifattach(ifp, eaddr); 738 /* Correctly set the name for the cloner list. */ 739 strlcpy(name, ifp->if_xname, len); 740 return (0); 741 } 742 743 /* Try to see if a special unit was requested. */ 744 error = ifc_name2unit(name, &unit); 745 if (error != 0) 746 return (error); 747 wildcard = (unit < 0); 748 749 error = ifc_alloc_unit(ifc, &unit); 750 if (error != 0) 751 return (error); 752 753 /* 754 * If no unit had been given, we need to adjust the ifName. 755 * Also make sure there is space for our extra [ab] suffix. 756 */ 757 for (dp = name; *dp != '\0'; dp++); 758 if (wildcard) { 759 error = snprintf(dp, len - (dp - name), "%d", unit); 760 if (error > len - (dp - name) - 1) { 761 /* ifName too long. */ 762 ifc_free_unit(ifc, unit); 763 return (ENOSPC); 764 } 765 dp += error; 766 } 767 if (len - (dp - name) - 1 < 1) { 768 /* No space left for our [ab] suffix. */ 769 ifc_free_unit(ifc, unit); 770 return (ENOSPC); 771 } 772 *dp = 'b'; 773 /* Must not change dp so we can replace 'a' by 'b' later. */ 774 *(dp+1) = '\0'; 775 776 /* Check if 'a' and 'b' interfaces already exist. */ 777 if (ifunit(name) != NULL) 778 return (EEXIST); 779 *dp = 'a'; 780 if (ifunit(name) != NULL) 781 return (EEXIST); 782 783 /* Allocate memory for both [ab] interfaces */ 784 sca = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 785 EPAIR_REFCOUNT_INIT(&sca->refcount, 1); 786 sca->ifp = if_alloc(IFT_ETHER); 787 if (sca->ifp == NULL) { 788 free(sca, M_EPAIR); 789 ifc_free_unit(ifc, unit); 790 return (ENOSPC); 791 } 792 793 scb = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 794 EPAIR_REFCOUNT_INIT(&scb->refcount, 1); 795 scb->ifp = if_alloc(IFT_ETHER); 796 if (scb->ifp == NULL) { 797 free(scb, M_EPAIR); 798 if_free(sca->ifp); 799 free(sca, M_EPAIR); 800 ifc_free_unit(ifc, unit); 801 return (ENOSPC); 802 } 803 804 /* 805 * Cross-reference the interfaces so we will be able to free both. 806 */ 807 sca->oifp = scb->ifp; 808 scb->oifp = sca->ifp; 809 810 /* 811 * Calculate the cpuid for netisr queueing based on the 812 * ifIndex of the interfaces. As long as we cannot configure 813 * this or use cpuset information easily we cannot guarantee 814 * cache locality but we can at least allow parallelism. 815 */ 816 sca->cpuid = 817 netisr_get_cpuid(sca->ifp->if_index); 818 scb->cpuid = 819 netisr_get_cpuid(scb->ifp->if_index); 820 821 /* Initialise pseudo media types. */ 822 ifmedia_init(&sca->media, 0, epair_media_change, epair_media_status); 823 ifmedia_add(&sca->media, IFM_ETHER | IFM_10G_T, 0, NULL); 824 ifmedia_set(&sca->media, IFM_ETHER | IFM_10G_T); 825 ifmedia_init(&scb->media, 0, epair_media_change, epair_media_status); 826 ifmedia_add(&scb->media, IFM_ETHER | IFM_10G_T, 0, NULL); 827 ifmedia_set(&scb->media, IFM_ETHER | IFM_10G_T); 828 829 /* Finish initialization of interface <n>a. */ 830 ifp = sca->ifp; 831 ifp->if_softc = sca; 832 strlcpy(ifp->if_xname, name, IFNAMSIZ); 833 ifp->if_dname = epairname; 834 ifp->if_dunit = unit; 835 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 836 ifp->if_capabilities = IFCAP_VLAN_MTU; 837 ifp->if_capenable = IFCAP_VLAN_MTU; 838 ifp->if_start = epair_start; 839 ifp->if_ioctl = epair_ioctl; 840 ifp->if_init = epair_init; 841 if_setsendqlen(ifp, ifqmaxlen); 842 if_setsendqready(ifp); 843 844 /* 845 * Calculate the etheraddr hashing the hostid and the 846 * interface index. The result would be hopefully unique 847 */ 848 getcredhostid(curthread->td_ucred, (unsigned long *)&hostid); 849 if (hostid == 0) 850 arc4rand(&hostid, sizeof(hostid), 0); 851 key[0] = (uint32_t)ifp->if_index; 852 key[1] = (uint32_t)(hostid & 0xffffffff); 853 key[2] = (uint32_t)((hostid >> 32) & 0xfffffffff); 854 hash = jenkins_hash32(key, 3, 0); 855 856 eaddr[0] = 0x02; 857 memcpy(&eaddr[1], &hash, 4); 858 eaddr[5] = 0x0a; 859 ether_ifattach(ifp, eaddr); 860 sca->if_qflush = ifp->if_qflush; 861 ifp->if_qflush = epair_qflush; 862 ifp->if_transmit = epair_transmit; 863 ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */ 864 865 /* Swap the name and finish initialization of interface <n>b. */ 866 *dp = 'b'; 867 868 ifp = scb->ifp; 869 ifp->if_softc = scb; 870 strlcpy(ifp->if_xname, name, IFNAMSIZ); 871 ifp->if_dname = epairname; 872 ifp->if_dunit = unit; 873 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 874 ifp->if_capabilities = IFCAP_VLAN_MTU; 875 ifp->if_capenable = IFCAP_VLAN_MTU; 876 ifp->if_start = epair_start; 877 ifp->if_ioctl = epair_ioctl; 878 ifp->if_init = epair_init; 879 if_setsendqlen(ifp, ifqmaxlen); 880 if_setsendqready(ifp); 881 /* We need to play some tricks here for the second interface. */ 882 strlcpy(name, epairname, len); 883 error = if_clone_create(name, len, (caddr_t)scb); 884 if (error) 885 panic("%s: if_clone_create() for our 2nd iface failed: %d", 886 __func__, error); 887 scb->if_qflush = ifp->if_qflush; 888 ifp->if_qflush = epair_qflush; 889 ifp->if_transmit = epair_transmit; 890 ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */ 891 892 /* 893 * Restore name to <n>a as the ifp for this will go into the 894 * cloner list for the initial call. 895 */ 896 strlcpy(name, sca->ifp->if_xname, len); 897 DPRINTF("name='%s/%db' created sca=%p scb=%p\n", name, unit, sca, scb); 898 899 /* Tell the world, that we are ready to rock. */ 900 sca->ifp->if_drv_flags |= IFF_DRV_RUNNING; 901 scb->ifp->if_drv_flags |= IFF_DRV_RUNNING; 902 if_link_state_change(sca->ifp, LINK_STATE_UP); 903 if_link_state_change(scb->ifp, LINK_STATE_UP); 904 905 return (0); 906 } 907 908 static int 909 epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) 910 { 911 struct ifnet *oifp; 912 struct epair_softc *sca, *scb; 913 int unit, error; 914 915 DPRINTF("ifp=%p\n", ifp); 916 917 /* 918 * In case we called into if_clone_destroyif() ourselves 919 * again to remove the second interface, the softc will be 920 * NULL. In that case so not do anything but return success. 921 */ 922 if (ifp->if_softc == NULL) 923 return (0); 924 925 unit = ifp->if_dunit; 926 sca = ifp->if_softc; 927 oifp = sca->oifp; 928 scb = oifp->if_softc; 929 930 DPRINTF("ifp=%p oifp=%p\n", ifp, oifp); 931 if_link_state_change(ifp, LINK_STATE_DOWN); 932 if_link_state_change(oifp, LINK_STATE_DOWN); 933 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 934 oifp->if_drv_flags &= ~IFF_DRV_RUNNING; 935 936 /* 937 * Get rid of our second half. As the other of the two 938 * interfaces may reside in a different vnet, we need to 939 * switch before freeing them. 940 */ 941 CURVNET_SET_QUIET(oifp->if_vnet); 942 ether_ifdetach(oifp); 943 /* 944 * Wait for all packets to be dispatched to if_input. 945 * The numbers can only go down as the interface is 946 * detached so there is no need to use atomics. 947 */ 948 DPRINTF("scb refcnt=%u\n", scb->refcount); 949 EPAIR_REFCOUNT_ASSERT(scb->refcount == 1, 950 ("%s: ifp=%p scb->refcount!=1: %d", __func__, oifp, scb->refcount)); 951 oifp->if_softc = NULL; 952 error = if_clone_destroyif(ifc, oifp); 953 if (error) 954 panic("%s: if_clone_destroyif() for our 2nd iface failed: %d", 955 __func__, error); 956 if_free(oifp); 957 ifmedia_removeall(&scb->media); 958 free(scb, M_EPAIR); 959 CURVNET_RESTORE(); 960 961 ether_ifdetach(ifp); 962 /* 963 * Wait for all packets to be dispatched to if_input. 964 */ 965 DPRINTF("sca refcnt=%u\n", sca->refcount); 966 EPAIR_REFCOUNT_ASSERT(sca->refcount == 1, 967 ("%s: ifp=%p sca->refcount!=1: %d", __func__, ifp, sca->refcount)); 968 if_free(ifp); 969 ifmedia_removeall(&sca->media); 970 free(sca, M_EPAIR); 971 ifc_free_unit(ifc, unit); 972 973 return (0); 974 } 975 976 static void 977 vnet_epair_init(const void *unused __unused) 978 { 979 980 V_epair_cloner = if_clone_advanced(epairname, 0, 981 epair_clone_match, epair_clone_create, epair_clone_destroy); 982 #ifdef VIMAGE 983 netisr_register_vnet(&epair_nh); 984 #endif 985 } 986 VNET_SYSINIT(vnet_epair_init, SI_SUB_PSEUDO, SI_ORDER_ANY, 987 vnet_epair_init, NULL); 988 989 static void 990 vnet_epair_uninit(const void *unused __unused) 991 { 992 993 #ifdef VIMAGE 994 netisr_unregister_vnet(&epair_nh); 995 #endif 996 if_clone_detach(V_epair_cloner); 997 } 998 VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, 999 vnet_epair_uninit, NULL); 1000 1001 static void 1002 epair_uninit(const void *unused __unused) 1003 { 1004 netisr_unregister(&epair_nh); 1005 epair_dpcpu_detach(); 1006 if (bootverbose) 1007 printf("%s unloaded.\n", epairname); 1008 } 1009 SYSUNINIT(epair_uninit, SI_SUB_INIT_IF, SI_ORDER_MIDDLE, 1010 epair_uninit, NULL); 1011 1012 static int 1013 epair_modevent(module_t mod, int type, void *data) 1014 { 1015 int qlimit; 1016 1017 switch (type) { 1018 case MOD_LOAD: 1019 /* For now limit us to one global mutex and one inq. */ 1020 epair_dpcpu_init(); 1021 epair_nh.nh_qlimit = 42 * ifqmaxlen; /* 42 shall be the number. */ 1022 if (TUNABLE_INT_FETCH("net.link.epair.netisr_maxqlen", &qlimit)) 1023 epair_nh.nh_qlimit = qlimit; 1024 netisr_register(&epair_nh); 1025 if (bootverbose) 1026 printf("%s initialized.\n", epairname); 1027 break; 1028 case MOD_UNLOAD: 1029 /* Handled in epair_uninit() */ 1030 break; 1031 default: 1032 return (EOPNOTSUPP); 1033 } 1034 return (0); 1035 } 1036 1037 static moduledata_t epair_mod = { 1038 "if_epair", 1039 epair_modevent, 1040 0 1041 }; 1042 1043 DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE); 1044 MODULE_VERSION(if_epair, 1); 1045