1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2008 The FreeBSD Foundation 5 * Copyright (c) 2009-2010 Bjoern A. Zeeb <bz@FreeBSD.org> 6 * All rights reserved. 7 * 8 * This software was developed by CK Software GmbH under sponsorship 9 * from the FreeBSD Foundation. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * A pair of virtual back-to-back connected ethernet like interfaces 35 * (``two interfaces with a virtual cross-over cable''). 36 * 37 * This is mostly intended to be used to provide connectivity between 38 * different virtual network stack instances. 39 */ 40 /* 41 * Things to re-think once we have more experience: 42 * - ifp->if_reassign function once we can test with vimage. Depending on 43 * how if_vmove() is going to be improved. 44 * - Real random etheraddrs that are checked to be uniquish; we would need 45 * to re-do them in case we move the interface between network stacks 46 * in a private if_reassign function. 47 * In case we bridge to a real interface/network or between indepedent 48 * epairs on multiple stacks/machines, we may need this. 49 * For now let the user handle that case. 50 */ 51 52 #include <sys/cdefs.h> 53 __FBSDID("$FreeBSD$"); 54 55 #include <sys/param.h> 56 #include <sys/kernel.h> 57 #include <sys/malloc.h> 58 #include <sys/mbuf.h> 59 #include <sys/module.h> 60 #include <sys/refcount.h> 61 #include <sys/queue.h> 62 #include <sys/smp.h> 63 #include <sys/socket.h> 64 #include <sys/sockio.h> 65 #include <sys/sysctl.h> 66 #include <sys/types.h> 67 68 #include <net/bpf.h> 69 #include <net/ethernet.h> 70 #include <net/if.h> 71 #include <net/if_var.h> 72 #include <net/if_clone.h> 73 #include <net/if_media.h> 74 #include <net/if_var.h> 75 #include <net/if_types.h> 76 #include <net/netisr.h> 77 #include <net/vnet.h> 78 79 SYSCTL_DECL(_net_link); 80 static SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW, 0, "epair sysctl"); 81 82 #ifdef EPAIR_DEBUG 83 static int epair_debug = 0; 84 SYSCTL_INT(_net_link_epair, OID_AUTO, epair_debug, CTLFLAG_RW, 85 &epair_debug, 0, "if_epair(4) debugging."); 86 #define DPRINTF(fmt, arg...) \ 87 if (epair_debug) \ 88 printf("[%s:%d] " fmt, __func__, __LINE__, ##arg) 89 #else 90 #define DPRINTF(fmt, arg...) 91 #endif 92 93 static void epair_nh_sintr(struct mbuf *); 94 static struct mbuf *epair_nh_m2cpuid(struct mbuf *, uintptr_t, u_int *); 95 static void epair_nh_drainedcpu(u_int); 96 97 static void epair_start_locked(struct ifnet *); 98 static int epair_media_change(struct ifnet *); 99 static void epair_media_status(struct ifnet *, struct ifmediareq *); 100 101 static int epair_clone_match(struct if_clone *, const char *); 102 static int epair_clone_create(struct if_clone *, char *, size_t, caddr_t); 103 static int epair_clone_destroy(struct if_clone *, struct ifnet *); 104 105 static const char epairname[] = "epair"; 106 107 /* Netisr related definitions and sysctl. */ 108 static struct netisr_handler epair_nh = { 109 .nh_name = epairname, 110 .nh_proto = NETISR_EPAIR, 111 .nh_policy = NETISR_POLICY_CPU, 112 .nh_handler = epair_nh_sintr, 113 .nh_m2cpuid = epair_nh_m2cpuid, 114 .nh_drainedcpu = epair_nh_drainedcpu, 115 }; 116 117 static int 118 sysctl_epair_netisr_maxqlen(SYSCTL_HANDLER_ARGS) 119 { 120 int error, qlimit; 121 122 netisr_getqlimit(&epair_nh, &qlimit); 123 error = sysctl_handle_int(oidp, &qlimit, 0, req); 124 if (error || !req->newptr) 125 return (error); 126 if (qlimit < 1) 127 return (EINVAL); 128 return (netisr_setqlimit(&epair_nh, qlimit)); 129 } 130 SYSCTL_PROC(_net_link_epair, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW, 131 0, 0, sysctl_epair_netisr_maxqlen, "I", 132 "Maximum if_epair(4) netisr \"hw\" queue length"); 133 134 struct epair_softc { 135 struct ifnet *ifp; /* This ifp. */ 136 struct ifnet *oifp; /* other ifp of pair. */ 137 struct ifmedia media; /* Media config (fake). */ 138 u_int refcount; /* # of mbufs in flight. */ 139 u_int cpuid; /* CPU ID assigned upon creation. */ 140 void (*if_qflush)(struct ifnet *); 141 /* Original if_qflush routine. */ 142 }; 143 144 /* 145 * Per-CPU list of ifps with data in the ifq that needs to be flushed 146 * to the netisr ``hw'' queue before we allow any further direct queuing 147 * to the ``hw'' queue. 148 */ 149 struct epair_ifp_drain { 150 STAILQ_ENTRY(epair_ifp_drain) ifp_next; 151 struct ifnet *ifp; 152 }; 153 STAILQ_HEAD(eid_list, epair_ifp_drain); 154 155 #define EPAIR_LOCK_INIT(dpcpu) mtx_init(&(dpcpu)->if_epair_mtx, \ 156 "if_epair", NULL, MTX_DEF) 157 #define EPAIR_LOCK_DESTROY(dpcpu) mtx_destroy(&(dpcpu)->if_epair_mtx) 158 #define EPAIR_LOCK_ASSERT(dpcpu) mtx_assert(&(dpcpu)->if_epair_mtx, \ 159 MA_OWNED) 160 #define EPAIR_LOCK(dpcpu) mtx_lock(&(dpcpu)->if_epair_mtx) 161 #define EPAIR_UNLOCK(dpcpu) mtx_unlock(&(dpcpu)->if_epair_mtx) 162 163 #ifdef INVARIANTS 164 #define EPAIR_REFCOUNT_INIT(r, v) refcount_init((r), (v)) 165 #define EPAIR_REFCOUNT_AQUIRE(r) refcount_acquire((r)) 166 #define EPAIR_REFCOUNT_RELEASE(r) refcount_release((r)) 167 #define EPAIR_REFCOUNT_ASSERT(a, p) KASSERT(a, p) 168 #else 169 #define EPAIR_REFCOUNT_INIT(r, v) 170 #define EPAIR_REFCOUNT_AQUIRE(r) 171 #define EPAIR_REFCOUNT_RELEASE(r) 172 #define EPAIR_REFCOUNT_ASSERT(a, p) 173 #endif 174 175 static MALLOC_DEFINE(M_EPAIR, epairname, 176 "Pair of virtual cross-over connected Ethernet-like interfaces"); 177 178 static VNET_DEFINE(struct if_clone *, epair_cloner); 179 #define V_epair_cloner VNET(epair_cloner) 180 181 /* 182 * DPCPU area and functions. 183 */ 184 struct epair_dpcpu { 185 struct mtx if_epair_mtx; /* Per-CPU locking. */ 186 int epair_drv_flags; /* Per-CPU ``hw'' drv flags. */ 187 struct eid_list epair_ifp_drain_list; /* Per-CPU list of ifps with 188 * data in the ifq. */ 189 }; 190 DPCPU_DEFINE(struct epair_dpcpu, epair_dpcpu); 191 192 static void 193 epair_dpcpu_init(void) 194 { 195 struct epair_dpcpu *epair_dpcpu; 196 struct eid_list *s; 197 u_int cpuid; 198 199 CPU_FOREACH(cpuid) { 200 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 201 202 /* Initialize per-cpu lock. */ 203 EPAIR_LOCK_INIT(epair_dpcpu); 204 205 /* Driver flags are per-cpu as are our netisr "hw" queues. */ 206 epair_dpcpu->epair_drv_flags = 0; 207 208 /* 209 * Initialize per-cpu drain list. 210 * Manually do what STAILQ_HEAD_INITIALIZER would do. 211 */ 212 s = &epair_dpcpu->epair_ifp_drain_list; 213 s->stqh_first = NULL; 214 s->stqh_last = &s->stqh_first; 215 } 216 } 217 218 static void 219 epair_dpcpu_detach(void) 220 { 221 struct epair_dpcpu *epair_dpcpu; 222 u_int cpuid; 223 224 CPU_FOREACH(cpuid) { 225 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 226 227 /* Destroy per-cpu lock. */ 228 EPAIR_LOCK_DESTROY(epair_dpcpu); 229 } 230 } 231 232 /* 233 * Helper functions. 234 */ 235 static u_int 236 cpuid_from_ifp(struct ifnet *ifp) 237 { 238 struct epair_softc *sc; 239 240 if (ifp == NULL) 241 return (0); 242 sc = ifp->if_softc; 243 244 return (sc->cpuid); 245 } 246 247 /* 248 * Netisr handler functions. 249 */ 250 static void 251 epair_nh_sintr(struct mbuf *m) 252 { 253 struct ifnet *ifp; 254 struct epair_softc *sc; 255 256 ifp = m->m_pkthdr.rcvif; 257 (*ifp->if_input)(ifp, m); 258 sc = ifp->if_softc; 259 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 260 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 261 ("%s: ifp=%p sc->refcount not >= 1: %d", 262 __func__, ifp, sc->refcount)); 263 DPRINTF("ifp=%p refcount=%u\n", ifp, sc->refcount); 264 } 265 266 static struct mbuf * 267 epair_nh_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid) 268 { 269 270 *cpuid = cpuid_from_ifp(m->m_pkthdr.rcvif); 271 272 return (m); 273 } 274 275 static void 276 epair_nh_drainedcpu(u_int cpuid) 277 { 278 struct epair_dpcpu *epair_dpcpu; 279 struct epair_ifp_drain *elm, *tvar; 280 struct ifnet *ifp; 281 282 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 283 EPAIR_LOCK(epair_dpcpu); 284 /* 285 * Assume our "hw" queue and possibly ifq will be emptied 286 * again. In case we will overflow the "hw" queue while 287 * draining, epair_start_locked will set IFF_DRV_OACTIVE 288 * again and we will stop and return. 289 */ 290 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list, 291 ifp_next, tvar) { 292 ifp = elm->ifp; 293 epair_dpcpu->epair_drv_flags &= ~IFF_DRV_OACTIVE; 294 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 295 epair_start_locked(ifp); 296 297 IFQ_LOCK(&ifp->if_snd); 298 if (IFQ_IS_EMPTY(&ifp->if_snd)) { 299 struct epair_softc *sc; 300 301 STAILQ_REMOVE(&epair_dpcpu->epair_ifp_drain_list, 302 elm, epair_ifp_drain, ifp_next); 303 /* The cached ifp goes off the list. */ 304 sc = ifp->if_softc; 305 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 306 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 307 ("%s: ifp=%p sc->refcount not >= 1: %d", 308 __func__, ifp, sc->refcount)); 309 free(elm, M_EPAIR); 310 } 311 IFQ_UNLOCK(&ifp->if_snd); 312 313 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) != 0) { 314 /* Our "hw"q overflew again. */ 315 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 316 DPRINTF("hw queue length overflow at %u\n", 317 epair_nh.nh_qlimit); 318 break; 319 } 320 } 321 EPAIR_UNLOCK(epair_dpcpu); 322 } 323 324 /* 325 * Network interface (`if') related functions. 326 */ 327 static void 328 epair_remove_ifp_from_draining(struct ifnet *ifp) 329 { 330 struct epair_dpcpu *epair_dpcpu; 331 struct epair_ifp_drain *elm, *tvar; 332 u_int cpuid; 333 334 CPU_FOREACH(cpuid) { 335 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 336 EPAIR_LOCK(epair_dpcpu); 337 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list, 338 ifp_next, tvar) { 339 if (ifp == elm->ifp) { 340 struct epair_softc *sc; 341 342 STAILQ_REMOVE( 343 &epair_dpcpu->epair_ifp_drain_list, elm, 344 epair_ifp_drain, ifp_next); 345 /* The cached ifp goes off the list. */ 346 sc = ifp->if_softc; 347 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 348 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 349 ("%s: ifp=%p sc->refcount not >= 1: %d", 350 __func__, ifp, sc->refcount)); 351 free(elm, M_EPAIR); 352 } 353 } 354 EPAIR_UNLOCK(epair_dpcpu); 355 } 356 } 357 358 static int 359 epair_add_ifp_for_draining(struct ifnet *ifp) 360 { 361 struct epair_dpcpu *epair_dpcpu; 362 struct epair_softc *sc; 363 struct epair_ifp_drain *elm = NULL; 364 365 sc = ifp->if_softc; 366 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 367 EPAIR_LOCK_ASSERT(epair_dpcpu); 368 STAILQ_FOREACH(elm, &epair_dpcpu->epair_ifp_drain_list, ifp_next) 369 if (elm->ifp == ifp) 370 break; 371 /* If the ifp is there already, return success. */ 372 if (elm != NULL) 373 return (0); 374 375 elm = malloc(sizeof(struct epair_ifp_drain), M_EPAIR, M_NOWAIT|M_ZERO); 376 if (elm == NULL) 377 return (ENOMEM); 378 379 elm->ifp = ifp; 380 /* Add a reference for the ifp pointer on the list. */ 381 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 382 STAILQ_INSERT_TAIL(&epair_dpcpu->epair_ifp_drain_list, elm, ifp_next); 383 384 return (0); 385 } 386 387 static void 388 epair_start_locked(struct ifnet *ifp) 389 { 390 struct epair_dpcpu *epair_dpcpu; 391 struct mbuf *m; 392 struct epair_softc *sc; 393 struct ifnet *oifp; 394 int error; 395 396 DPRINTF("ifp=%p\n", ifp); 397 sc = ifp->if_softc; 398 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 399 EPAIR_LOCK_ASSERT(epair_dpcpu); 400 401 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 402 return; 403 if ((ifp->if_flags & IFF_UP) == 0) 404 return; 405 406 /* 407 * We get packets here from ether_output via if_handoff() 408 * and need to put them into the input queue of the oifp 409 * and call oifp->if_input() via netisr/epair_sintr(). 410 */ 411 oifp = sc->oifp; 412 sc = oifp->if_softc; 413 for (;;) { 414 IFQ_DEQUEUE(&ifp->if_snd, m); 415 if (m == NULL) 416 break; 417 BPF_MTAP(ifp, m); 418 419 /* 420 * In case the outgoing interface is not usable, 421 * drop the packet. 422 */ 423 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 424 (oifp->if_flags & IFF_UP) ==0) { 425 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 426 m_freem(m); 427 continue; 428 } 429 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname); 430 431 /* 432 * Add a reference so the interface cannot go while the 433 * packet is in transit as we rely on rcvif to stay valid. 434 */ 435 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 436 m->m_pkthdr.rcvif = oifp; 437 CURVNET_SET_QUIET(oifp->if_vnet); 438 error = netisr_queue(NETISR_EPAIR, m); 439 CURVNET_RESTORE(); 440 if (!error) { 441 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 442 /* Someone else received the packet. */ 443 if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1); 444 } else { 445 /* The packet was freed already. */ 446 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 447 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 448 (void) epair_add_ifp_for_draining(ifp); 449 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 450 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 451 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 452 ("%s: ifp=%p sc->refcount not >= 1: %d", 453 __func__, oifp, sc->refcount)); 454 } 455 } 456 } 457 458 static void 459 epair_start(struct ifnet *ifp) 460 { 461 struct epair_dpcpu *epair_dpcpu; 462 463 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu); 464 EPAIR_LOCK(epair_dpcpu); 465 epair_start_locked(ifp); 466 EPAIR_UNLOCK(epair_dpcpu); 467 } 468 469 static int 470 epair_transmit_locked(struct ifnet *ifp, struct mbuf *m) 471 { 472 struct epair_dpcpu *epair_dpcpu; 473 struct epair_softc *sc; 474 struct ifnet *oifp; 475 int error, len; 476 short mflags; 477 478 DPRINTF("ifp=%p m=%p\n", ifp, m); 479 sc = ifp->if_softc; 480 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 481 EPAIR_LOCK_ASSERT(epair_dpcpu); 482 483 if (m == NULL) 484 return (0); 485 486 /* 487 * We are not going to use the interface en/dequeue mechanism 488 * on the TX side. We are called from ether_output_frame() 489 * and will put the packet into the incoming queue of the 490 * other interface of our pair via the netsir. 491 */ 492 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 493 m_freem(m); 494 return (ENXIO); 495 } 496 if ((ifp->if_flags & IFF_UP) == 0) { 497 m_freem(m); 498 return (ENETDOWN); 499 } 500 501 BPF_MTAP(ifp, m); 502 503 /* 504 * In case the outgoing interface is not usable, 505 * drop the packet. 506 */ 507 oifp = sc->oifp; 508 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 509 (oifp->if_flags & IFF_UP) ==0) { 510 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 511 m_freem(m); 512 return (0); 513 } 514 len = m->m_pkthdr.len; 515 mflags = m->m_flags; 516 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname); 517 518 #ifdef ALTQ 519 /* Support ALTQ via the classic if_start() path. */ 520 IF_LOCK(&ifp->if_snd); 521 if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 522 ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error); 523 if (error) 524 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 525 IF_UNLOCK(&ifp->if_snd); 526 if (!error) { 527 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 528 if (mflags & (M_BCAST|M_MCAST)) 529 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 530 531 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) 532 epair_start_locked(ifp); 533 else 534 (void)epair_add_ifp_for_draining(ifp); 535 } 536 return (error); 537 } 538 IF_UNLOCK(&ifp->if_snd); 539 #endif 540 541 if ((epair_dpcpu->epair_drv_flags & IFF_DRV_OACTIVE) != 0) { 542 /* 543 * Our hardware queue is full, try to fall back 544 * queuing to the ifq but do not call ifp->if_start. 545 * Either we are lucky or the packet is gone. 546 */ 547 IFQ_ENQUEUE(&ifp->if_snd, m, error); 548 if (!error) 549 (void)epair_add_ifp_for_draining(ifp); 550 return (error); 551 } 552 sc = oifp->if_softc; 553 /* 554 * Add a reference so the interface cannot go while the 555 * packet is in transit as we rely on rcvif to stay valid. 556 */ 557 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 558 m->m_pkthdr.rcvif = oifp; 559 CURVNET_SET_QUIET(oifp->if_vnet); 560 error = netisr_queue(NETISR_EPAIR, m); 561 CURVNET_RESTORE(); 562 if (!error) { 563 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 564 /* 565 * IFQ_HANDOFF_ADJ/ip_handoff() update statistics, 566 * but as we bypass all this we have to duplicate 567 * the logic another time. 568 */ 569 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 570 if (mflags & (M_BCAST|M_MCAST)) 571 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 572 /* Someone else received the packet. */ 573 if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1); 574 } else { 575 /* The packet was freed already. */ 576 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 577 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 578 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 579 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 580 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 581 ("%s: ifp=%p sc->refcount not >= 1: %d", 582 __func__, oifp, sc->refcount)); 583 } 584 585 return (error); 586 } 587 588 static int 589 epair_transmit(struct ifnet *ifp, struct mbuf *m) 590 { 591 struct epair_dpcpu *epair_dpcpu; 592 int error; 593 594 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu); 595 EPAIR_LOCK(epair_dpcpu); 596 error = epair_transmit_locked(ifp, m); 597 EPAIR_UNLOCK(epair_dpcpu); 598 return (error); 599 } 600 601 static void 602 epair_qflush(struct ifnet *ifp) 603 { 604 struct epair_softc *sc; 605 606 sc = ifp->if_softc; 607 KASSERT(sc != NULL, ("%s: ifp=%p, epair_softc gone? sc=%p\n", 608 __func__, ifp, sc)); 609 /* 610 * Remove this ifp from all backpointer lists. The interface will not 611 * usable for flushing anyway nor should it have anything to flush 612 * after if_qflush(). 613 */ 614 epair_remove_ifp_from_draining(ifp); 615 616 if (sc->if_qflush) 617 sc->if_qflush(ifp); 618 } 619 620 static int 621 epair_media_change(struct ifnet *ifp __unused) 622 { 623 624 /* Do nothing. */ 625 return (0); 626 } 627 628 static void 629 epair_media_status(struct ifnet *ifp __unused, struct ifmediareq *imr) 630 { 631 632 imr->ifm_status = IFM_AVALID | IFM_ACTIVE; 633 imr->ifm_active = IFM_ETHER | IFM_10G_T | IFM_FDX; 634 } 635 636 static int 637 epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 638 { 639 struct epair_softc *sc; 640 struct ifreq *ifr; 641 int error; 642 643 ifr = (struct ifreq *)data; 644 switch (cmd) { 645 case SIOCSIFFLAGS: 646 case SIOCADDMULTI: 647 case SIOCDELMULTI: 648 error = 0; 649 break; 650 651 case SIOCSIFMEDIA: 652 case SIOCGIFMEDIA: 653 sc = ifp->if_softc; 654 error = ifmedia_ioctl(ifp, ifr, &sc->media, cmd); 655 break; 656 657 case SIOCSIFMTU: 658 /* We basically allow all kinds of MTUs. */ 659 ifp->if_mtu = ifr->ifr_mtu; 660 error = 0; 661 break; 662 663 default: 664 /* Let the common ethernet handler process this. */ 665 error = ether_ioctl(ifp, cmd, data); 666 break; 667 } 668 669 return (error); 670 } 671 672 static void 673 epair_init(void *dummy __unused) 674 { 675 } 676 677 678 /* 679 * Interface cloning functions. 680 * We use our private ones so that we can create/destroy our secondary 681 * device along with the primary one. 682 */ 683 static int 684 epair_clone_match(struct if_clone *ifc, const char *name) 685 { 686 const char *cp; 687 688 DPRINTF("name='%s'\n", name); 689 690 /* 691 * Our base name is epair. 692 * Our interfaces will be named epair<n>[ab]. 693 * So accept anything of the following list: 694 * - epair 695 * - epair<n> 696 * but not the epair<n>[ab] versions. 697 */ 698 if (strncmp(epairname, name, sizeof(epairname)-1) != 0) 699 return (0); 700 701 for (cp = name + sizeof(epairname) - 1; *cp != '\0'; cp++) { 702 if (*cp < '0' || *cp > '9') 703 return (0); 704 } 705 706 return (1); 707 } 708 709 static int 710 epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) 711 { 712 struct epair_softc *sca, *scb; 713 struct ifnet *ifp; 714 char *dp; 715 int error, unit, wildcard; 716 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ 717 718 /* 719 * We are abusing params to create our second interface. 720 * Actually we already created it and called if_clone_create() 721 * for it to do the official insertion procedure the moment we knew 722 * it cannot fail anymore. So just do attach it here. 723 */ 724 if (params) { 725 scb = (struct epair_softc *)params; 726 ifp = scb->ifp; 727 /* Assign a hopefully unique, locally administered etheraddr. */ 728 eaddr[0] = 0x02; 729 eaddr[3] = (ifp->if_index >> 8) & 0xff; 730 eaddr[4] = ifp->if_index & 0xff; 731 eaddr[5] = 0x0b; 732 ether_ifattach(ifp, eaddr); 733 /* Correctly set the name for the cloner list. */ 734 strlcpy(name, scb->ifp->if_xname, len); 735 return (0); 736 } 737 738 /* Try to see if a special unit was requested. */ 739 error = ifc_name2unit(name, &unit); 740 if (error != 0) 741 return (error); 742 wildcard = (unit < 0); 743 744 error = ifc_alloc_unit(ifc, &unit); 745 if (error != 0) 746 return (error); 747 748 /* 749 * If no unit had been given, we need to adjust the ifName. 750 * Also make sure there is space for our extra [ab] suffix. 751 */ 752 for (dp = name; *dp != '\0'; dp++); 753 if (wildcard) { 754 error = snprintf(dp, len - (dp - name), "%d", unit); 755 if (error > len - (dp - name) - 1) { 756 /* ifName too long. */ 757 ifc_free_unit(ifc, unit); 758 return (ENOSPC); 759 } 760 dp += error; 761 } 762 if (len - (dp - name) - 1 < 1) { 763 /* No space left for our [ab] suffix. */ 764 ifc_free_unit(ifc, unit); 765 return (ENOSPC); 766 } 767 *dp = 'b'; 768 /* Must not change dp so we can replace 'a' by 'b' later. */ 769 *(dp+1) = '\0'; 770 771 /* Check if 'a' and 'b' interfaces already exist. */ 772 if (ifunit(name) != NULL) 773 return (EEXIST); 774 *dp = 'a'; 775 if (ifunit(name) != NULL) 776 return (EEXIST); 777 778 /* Allocate memory for both [ab] interfaces */ 779 sca = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 780 EPAIR_REFCOUNT_INIT(&sca->refcount, 1); 781 sca->ifp = if_alloc(IFT_ETHER); 782 if (sca->ifp == NULL) { 783 free(sca, M_EPAIR); 784 ifc_free_unit(ifc, unit); 785 return (ENOSPC); 786 } 787 788 scb = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 789 EPAIR_REFCOUNT_INIT(&scb->refcount, 1); 790 scb->ifp = if_alloc(IFT_ETHER); 791 if (scb->ifp == NULL) { 792 free(scb, M_EPAIR); 793 if_free(sca->ifp); 794 free(sca, M_EPAIR); 795 ifc_free_unit(ifc, unit); 796 return (ENOSPC); 797 } 798 799 /* 800 * Cross-reference the interfaces so we will be able to free both. 801 */ 802 sca->oifp = scb->ifp; 803 scb->oifp = sca->ifp; 804 805 /* 806 * Calculate the cpuid for netisr queueing based on the 807 * ifIndex of the interfaces. As long as we cannot configure 808 * this or use cpuset information easily we cannot guarantee 809 * cache locality but we can at least allow parallelism. 810 */ 811 sca->cpuid = 812 netisr_get_cpuid(sca->ifp->if_index); 813 scb->cpuid = 814 netisr_get_cpuid(scb->ifp->if_index); 815 816 /* Initialise pseudo media types. */ 817 ifmedia_init(&sca->media, 0, epair_media_change, epair_media_status); 818 ifmedia_add(&sca->media, IFM_ETHER | IFM_10G_T, 0, NULL); 819 ifmedia_set(&sca->media, IFM_ETHER | IFM_10G_T); 820 ifmedia_init(&scb->media, 0, epair_media_change, epair_media_status); 821 ifmedia_add(&scb->media, IFM_ETHER | IFM_10G_T, 0, NULL); 822 ifmedia_set(&scb->media, IFM_ETHER | IFM_10G_T); 823 824 /* Finish initialization of interface <n>a. */ 825 ifp = sca->ifp; 826 ifp->if_softc = sca; 827 strlcpy(ifp->if_xname, name, IFNAMSIZ); 828 ifp->if_dname = epairname; 829 ifp->if_dunit = unit; 830 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 831 ifp->if_capabilities = IFCAP_VLAN_MTU; 832 ifp->if_capenable = IFCAP_VLAN_MTU; 833 ifp->if_start = epair_start; 834 ifp->if_ioctl = epair_ioctl; 835 ifp->if_init = epair_init; 836 if_setsendqlen(ifp, ifqmaxlen); 837 if_setsendqready(ifp); 838 /* Assign a hopefully unique, locally administered etheraddr. */ 839 eaddr[0] = 0x02; 840 eaddr[3] = (ifp->if_index >> 8) & 0xff; 841 eaddr[4] = ifp->if_index & 0xff; 842 eaddr[5] = 0x0a; 843 ether_ifattach(ifp, eaddr); 844 sca->if_qflush = ifp->if_qflush; 845 ifp->if_qflush = epair_qflush; 846 ifp->if_transmit = epair_transmit; 847 ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */ 848 849 /* Swap the name and finish initialization of interface <n>b. */ 850 *dp = 'b'; 851 852 ifp = scb->ifp; 853 ifp->if_softc = scb; 854 strlcpy(ifp->if_xname, name, IFNAMSIZ); 855 ifp->if_dname = epairname; 856 ifp->if_dunit = unit; 857 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 858 ifp->if_capabilities = IFCAP_VLAN_MTU; 859 ifp->if_capenable = IFCAP_VLAN_MTU; 860 ifp->if_start = epair_start; 861 ifp->if_ioctl = epair_ioctl; 862 ifp->if_init = epair_init; 863 if_setsendqlen(ifp, ifqmaxlen); 864 if_setsendqready(ifp); 865 /* We need to play some tricks here for the second interface. */ 866 strlcpy(name, epairname, len); 867 error = if_clone_create(name, len, (caddr_t)scb); 868 if (error) 869 panic("%s: if_clone_create() for our 2nd iface failed: %d", 870 __func__, error); 871 scb->if_qflush = ifp->if_qflush; 872 ifp->if_qflush = epair_qflush; 873 ifp->if_transmit = epair_transmit; 874 ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */ 875 876 /* 877 * Restore name to <n>a as the ifp for this will go into the 878 * cloner list for the initial call. 879 */ 880 strlcpy(name, sca->ifp->if_xname, len); 881 DPRINTF("name='%s/%db' created sca=%p scb=%p\n", name, unit, sca, scb); 882 883 /* Tell the world, that we are ready to rock. */ 884 sca->ifp->if_drv_flags |= IFF_DRV_RUNNING; 885 scb->ifp->if_drv_flags |= IFF_DRV_RUNNING; 886 if_link_state_change(sca->ifp, LINK_STATE_UP); 887 if_link_state_change(scb->ifp, LINK_STATE_UP); 888 889 return (0); 890 } 891 892 static int 893 epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) 894 { 895 struct ifnet *oifp; 896 struct epair_softc *sca, *scb; 897 int unit, error; 898 899 DPRINTF("ifp=%p\n", ifp); 900 901 /* 902 * In case we called into if_clone_destroyif() ourselves 903 * again to remove the second interface, the softc will be 904 * NULL. In that case so not do anything but return success. 905 */ 906 if (ifp->if_softc == NULL) 907 return (0); 908 909 unit = ifp->if_dunit; 910 sca = ifp->if_softc; 911 oifp = sca->oifp; 912 scb = oifp->if_softc; 913 914 DPRINTF("ifp=%p oifp=%p\n", ifp, oifp); 915 if_link_state_change(ifp, LINK_STATE_DOWN); 916 if_link_state_change(oifp, LINK_STATE_DOWN); 917 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 918 oifp->if_drv_flags &= ~IFF_DRV_RUNNING; 919 920 /* 921 * Get rid of our second half. As the other of the two 922 * interfaces may reside in a different vnet, we need to 923 * switch before freeing them. 924 */ 925 CURVNET_SET_QUIET(oifp->if_vnet); 926 ether_ifdetach(oifp); 927 /* 928 * Wait for all packets to be dispatched to if_input. 929 * The numbers can only go down as the interface is 930 * detached so there is no need to use atomics. 931 */ 932 DPRINTF("scb refcnt=%u\n", scb->refcount); 933 EPAIR_REFCOUNT_ASSERT(scb->refcount == 1, 934 ("%s: ifp=%p scb->refcount!=1: %d", __func__, oifp, scb->refcount)); 935 oifp->if_softc = NULL; 936 error = if_clone_destroyif(ifc, oifp); 937 if (error) 938 panic("%s: if_clone_destroyif() for our 2nd iface failed: %d", 939 __func__, error); 940 if_free(oifp); 941 ifmedia_removeall(&scb->media); 942 free(scb, M_EPAIR); 943 CURVNET_RESTORE(); 944 945 ether_ifdetach(ifp); 946 /* 947 * Wait for all packets to be dispatched to if_input. 948 */ 949 DPRINTF("sca refcnt=%u\n", sca->refcount); 950 EPAIR_REFCOUNT_ASSERT(sca->refcount == 1, 951 ("%s: ifp=%p sca->refcount!=1: %d", __func__, ifp, sca->refcount)); 952 if_free(ifp); 953 ifmedia_removeall(&sca->media); 954 free(sca, M_EPAIR); 955 ifc_free_unit(ifc, unit); 956 957 return (0); 958 } 959 960 static void 961 vnet_epair_init(const void *unused __unused) 962 { 963 964 V_epair_cloner = if_clone_advanced(epairname, 0, 965 epair_clone_match, epair_clone_create, epair_clone_destroy); 966 #ifdef VIMAGE 967 netisr_register_vnet(&epair_nh); 968 #endif 969 } 970 VNET_SYSINIT(vnet_epair_init, SI_SUB_PSEUDO, SI_ORDER_ANY, 971 vnet_epair_init, NULL); 972 973 static void 974 vnet_epair_uninit(const void *unused __unused) 975 { 976 977 #ifdef VIMAGE 978 netisr_unregister_vnet(&epair_nh); 979 #endif 980 if_clone_detach(V_epair_cloner); 981 } 982 VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, 983 vnet_epair_uninit, NULL); 984 985 static void 986 epair_uninit(const void *unused __unused) 987 { 988 netisr_unregister(&epair_nh); 989 epair_dpcpu_detach(); 990 if (bootverbose) 991 printf("%s unloaded.\n", epairname); 992 } 993 SYSUNINIT(epair_uninit, SI_SUB_INIT_IF, SI_ORDER_MIDDLE, 994 epair_uninit, NULL); 995 996 static int 997 epair_modevent(module_t mod, int type, void *data) 998 { 999 int qlimit; 1000 1001 switch (type) { 1002 case MOD_LOAD: 1003 /* For now limit us to one global mutex and one inq. */ 1004 epair_dpcpu_init(); 1005 epair_nh.nh_qlimit = 42 * ifqmaxlen; /* 42 shall be the number. */ 1006 if (TUNABLE_INT_FETCH("net.link.epair.netisr_maxqlen", &qlimit)) 1007 epair_nh.nh_qlimit = qlimit; 1008 netisr_register(&epair_nh); 1009 if (bootverbose) 1010 printf("%s initialized.\n", epairname); 1011 break; 1012 case MOD_UNLOAD: 1013 /* Handled in epair_uninit() */ 1014 break; 1015 default: 1016 return (EOPNOTSUPP); 1017 } 1018 return (0); 1019 } 1020 1021 static moduledata_t epair_mod = { 1022 "if_epair", 1023 epair_modevent, 1024 0 1025 }; 1026 1027 DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE); 1028 MODULE_VERSION(if_epair, 1); 1029