1 /*- 2 * Copyright (c) 2008 The FreeBSD Foundation 3 * Copyright (c) 2009-2010 Bjoern A. Zeeb <bz@FreeBSD.org> 4 * All rights reserved. 5 * 6 * This software was developed by CK Software GmbH under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 /* 32 * A pair of virtual back-to-back connected ethernet like interfaces 33 * (``two interfaces with a virtual cross-over cable''). 34 * 35 * This is mostly intended to be used to provide connectivity between 36 * different virtual network stack instances. 37 */ 38 /* 39 * Things to re-think once we have more experience: 40 * - ifp->if_reassign function once we can test with vimage. Depending on 41 * how if_vmove() is going to be improved. 42 * - Real random etheraddrs that are checked to be uniquish; we would need 43 * to re-do them in case we move the interface between network stacks 44 * in a private if_reassign function. 45 * In case we bridge to a real interface/network or between indepedent 46 * epairs on multiple stacks/machines, we may need this. 47 * For now let the user handle that case. 48 */ 49 50 #include <sys/cdefs.h> 51 __FBSDID("$FreeBSD$"); 52 53 #include <sys/param.h> 54 #include <sys/kernel.h> 55 #include <sys/mbuf.h> 56 #include <sys/module.h> 57 #include <sys/refcount.h> 58 #include <sys/queue.h> 59 #include <sys/smp.h> 60 #include <sys/socket.h> 61 #include <sys/sockio.h> 62 #include <sys/sysctl.h> 63 #include <sys/types.h> 64 65 #include <net/bpf.h> 66 #include <net/ethernet.h> 67 #include <net/if.h> 68 #include <net/if_clone.h> 69 #include <net/if_var.h> 70 #include <net/if_types.h> 71 #include <net/netisr.h> 72 #include <net/vnet.h> 73 74 #define EPAIRNAME "epair" 75 76 SYSCTL_DECL(_net_link); 77 SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW, 0, "epair sysctl"); 78 79 #ifdef EPAIR_DEBUG 80 static int epair_debug = 0; 81 SYSCTL_XINT(_net_link_epair, OID_AUTO, epair_debug, CTLFLAG_RW, 82 &epair_debug, 0, "if_epair(4) debugging."); 83 #define DPRINTF(fmt, arg...) \ 84 if (epair_debug) \ 85 printf("[%s:%d] " fmt, __func__, __LINE__, ##arg) 86 #else 87 #define DPRINTF(fmt, arg...) 88 #endif 89 90 static void epair_nh_sintr(struct mbuf *); 91 static struct mbuf *epair_nh_m2cpuid(struct mbuf *, uintptr_t, u_int *); 92 static void epair_nh_drainedcpu(u_int); 93 94 static void epair_start_locked(struct ifnet *); 95 96 static int epair_clone_match(struct if_clone *, const char *); 97 static int epair_clone_create(struct if_clone *, char *, size_t, caddr_t); 98 static int epair_clone_destroy(struct if_clone *, struct ifnet *); 99 100 /* Netisr realted definitions and sysctl. */ 101 static struct netisr_handler epair_nh = { 102 .nh_name = EPAIRNAME, 103 .nh_proto = NETISR_EPAIR, 104 .nh_policy = NETISR_POLICY_CPU, 105 .nh_handler = epair_nh_sintr, 106 .nh_m2cpuid = epair_nh_m2cpuid, 107 .nh_drainedcpu = epair_nh_drainedcpu, 108 }; 109 110 static int 111 sysctl_epair_netisr_maxqlen(SYSCTL_HANDLER_ARGS) 112 { 113 int error, qlimit; 114 115 netisr_getqlimit(&epair_nh, &qlimit); 116 error = sysctl_handle_int(oidp, &qlimit, 0, req); 117 if (error || !req->newptr) 118 return (error); 119 if (qlimit < 1) 120 return (EINVAL); 121 return (netisr_setqlimit(&epair_nh, qlimit)); 122 } 123 SYSCTL_PROC(_net_link_epair, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW, 124 0, 0, sysctl_epair_netisr_maxqlen, "I", 125 "Maximum if_epair(4) netisr \"hw\" queue length"); 126 127 struct epair_softc { 128 struct ifnet *ifp; /* This ifp. */ 129 struct ifnet *oifp; /* other ifp of pair. */ 130 u_int refcount; /* # of mbufs in flight. */ 131 u_int cpuid; /* CPU ID assigned upon creation. */ 132 void (*if_qflush)(struct ifnet *); 133 /* Original if_qflush routine. */ 134 }; 135 136 /* 137 * Per-CPU list of ifps with data in the ifq that needs to be flushed 138 * to the netisr ``hw'' queue before we allow any further direct queuing 139 * to the ``hw'' queue. 140 */ 141 struct epair_ifp_drain { 142 STAILQ_ENTRY(epair_ifp_drain) ifp_next; 143 struct ifnet *ifp; 144 }; 145 STAILQ_HEAD(eid_list, epair_ifp_drain); 146 147 #define EPAIR_LOCK_INIT(dpcpu) mtx_init(&(dpcpu)->if_epair_mtx, \ 148 "if_epair", NULL, MTX_DEF) 149 #define EPAIR_LOCK_DESTROY(dpcpu) mtx_destroy(&(dpcpu)->if_epair_mtx) 150 #define EPAIR_LOCK_ASSERT(dpcpu) mtx_assert(&(dpcpu)->if_epair_mtx, \ 151 MA_OWNED) 152 #define EPAIR_LOCK(dpcpu) mtx_lock(&(dpcpu)->if_epair_mtx) 153 #define EPAIR_UNLOCK(dpcpu) mtx_unlock(&(dpcpu)->if_epair_mtx) 154 155 #ifdef INVARIANTS 156 #define EPAIR_REFCOUNT_INIT(r, v) refcount_init((r), (v)) 157 #define EPAIR_REFCOUNT_AQUIRE(r) refcount_acquire((r)) 158 #define EPAIR_REFCOUNT_RELEASE(r) refcount_release((r)) 159 #define EPAIR_REFCOUNT_ASSERT(a, p) KASSERT(a, p) 160 #else 161 #define EPAIR_REFCOUNT_INIT(r, v) 162 #define EPAIR_REFCOUNT_AQUIRE(r) 163 #define EPAIR_REFCOUNT_RELEASE(r) 164 #define EPAIR_REFCOUNT_ASSERT(a, p) 165 #endif 166 167 static MALLOC_DEFINE(M_EPAIR, EPAIRNAME, 168 "Pair of virtual cross-over connected Ethernet-like interfaces"); 169 170 static struct if_clone epair_cloner = IFC_CLONE_INITIALIZER( 171 EPAIRNAME, NULL, IF_MAXUNIT, 172 NULL, epair_clone_match, epair_clone_create, epair_clone_destroy); 173 174 /* 175 * DPCPU area and functions. 176 */ 177 struct epair_dpcpu { 178 struct mtx if_epair_mtx; /* Per-CPU locking. */ 179 int epair_drv_flags; /* Per-CPU ``hw'' drv flags. */ 180 struct eid_list epair_ifp_drain_list; /* Per-CPU list of ifps with 181 * data in the ifq. */ 182 }; 183 DPCPU_DEFINE(struct epair_dpcpu, epair_dpcpu); 184 185 static void 186 epair_dpcpu_init(void) 187 { 188 struct epair_dpcpu *epair_dpcpu; 189 struct eid_list *s; 190 u_int cpuid; 191 192 for (cpuid = 0; cpuid <= mp_maxid; cpuid++) { 193 if (CPU_ABSENT(cpuid)) 194 continue; 195 196 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 197 198 /* Initialize per-cpu lock. */ 199 EPAIR_LOCK_INIT(epair_dpcpu); 200 201 /* Driver flags are per-cpu as are our netisr "hw" queues. */ 202 epair_dpcpu->epair_drv_flags = 0; 203 204 /* 205 * Initialize per-cpu drain list. 206 * Manually do what STAILQ_HEAD_INITIALIZER would do. 207 */ 208 s = &epair_dpcpu->epair_ifp_drain_list; 209 s->stqh_first = NULL; 210 s->stqh_last = &s->stqh_first; 211 } 212 } 213 214 static void 215 epair_dpcpu_detach(void) 216 { 217 struct epair_dpcpu *epair_dpcpu; 218 u_int cpuid; 219 220 for (cpuid = 0; cpuid <= mp_maxid; cpuid++) { 221 if (CPU_ABSENT(cpuid)) 222 continue; 223 224 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 225 226 /* Destroy per-cpu lock. */ 227 EPAIR_LOCK_DESTROY(epair_dpcpu); 228 } 229 } 230 231 /* 232 * Helper functions. 233 */ 234 static u_int 235 cpuid_from_ifp(struct ifnet *ifp) 236 { 237 struct epair_softc *sc; 238 239 if (ifp == NULL) 240 return (0); 241 sc = ifp->if_softc; 242 243 return (sc->cpuid); 244 } 245 246 /* 247 * Netisr handler functions. 248 */ 249 static void 250 epair_nh_sintr(struct mbuf *m) 251 { 252 struct ifnet *ifp; 253 struct epair_softc *sc; 254 255 ifp = m->m_pkthdr.rcvif; 256 (*ifp->if_input)(ifp, m); 257 sc = ifp->if_softc; 258 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 259 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 260 ("%s: ifp=%p sc->refcount not >= 1: %d", 261 __func__, ifp, sc->refcount)); 262 DPRINTF("ifp=%p refcount=%u\n", ifp, sc->refcount); 263 } 264 265 static struct mbuf * 266 epair_nh_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid) 267 { 268 269 *cpuid = cpuid_from_ifp(m->m_pkthdr.rcvif); 270 271 return (m); 272 } 273 274 static void 275 epair_nh_drainedcpu(u_int cpuid) 276 { 277 struct epair_dpcpu *epair_dpcpu; 278 struct epair_ifp_drain *elm, *tvar; 279 struct ifnet *ifp; 280 281 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 282 EPAIR_LOCK(epair_dpcpu); 283 /* 284 * Assume our "hw" queue and possibly ifq will be emptied 285 * again. In case we will overflow the "hw" queue while 286 * draining, epair_start_locked will set IFF_DRV_OACTIVE 287 * again and we will stop and return. 288 */ 289 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list, 290 ifp_next, tvar) { 291 ifp = elm->ifp; 292 epair_dpcpu->epair_drv_flags &= ~IFF_DRV_OACTIVE; 293 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 294 epair_start_locked(ifp); 295 296 IFQ_LOCK(&ifp->if_snd); 297 if (IFQ_IS_EMPTY(&ifp->if_snd)) { 298 struct epair_softc *sc; 299 300 STAILQ_REMOVE(&epair_dpcpu->epair_ifp_drain_list, 301 elm, epair_ifp_drain, ifp_next); 302 /* The cached ifp goes off the list. */ 303 sc = ifp->if_softc; 304 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 305 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 306 ("%s: ifp=%p sc->refcount not >= 1: %d", 307 __func__, ifp, sc->refcount)); 308 free(elm, M_EPAIR); 309 } 310 IFQ_UNLOCK(&ifp->if_snd); 311 312 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) != 0) { 313 /* Our "hw"q overflew again. */ 314 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE 315 DPRINTF("hw queue length overflow at %u\n", 316 epair_nh.nh_qlimit); 317 break; 318 } 319 } 320 EPAIR_UNLOCK(epair_dpcpu); 321 } 322 323 /* 324 * Network interface (`if') related functions. 325 */ 326 static void 327 epair_remove_ifp_from_draining(struct ifnet *ifp) 328 { 329 struct epair_dpcpu *epair_dpcpu; 330 struct epair_ifp_drain *elm, *tvar; 331 u_int cpuid; 332 333 for (cpuid = 0; cpuid <= mp_maxid; cpuid++) { 334 if (CPU_ABSENT(cpuid)) 335 continue; 336 337 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 338 EPAIR_LOCK(epair_dpcpu); 339 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list, 340 ifp_next, tvar) { 341 if (ifp == elm->ifp) { 342 struct epair_softc *sc; 343 344 STAILQ_REMOVE( 345 &epair_dpcpu->epair_ifp_drain_list, elm, 346 epair_ifp_drain, ifp_next); 347 /* The cached ifp goes off the list. */ 348 sc = ifp->if_softc; 349 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 350 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 351 ("%s: ifp=%p sc->refcount not >= 1: %d", 352 __func__, ifp, sc->refcount)); 353 free(elm, M_EPAIR); 354 } 355 } 356 EPAIR_UNLOCK(epair_dpcpu); 357 } 358 } 359 360 static int 361 epair_add_ifp_for_draining(struct ifnet *ifp) 362 { 363 struct epair_dpcpu *epair_dpcpu; 364 struct epair_softc *sc; 365 struct epair_ifp_drain *elm = NULL; 366 367 sc = ifp->if_softc; 368 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 369 EPAIR_LOCK_ASSERT(epair_dpcpu); 370 STAILQ_FOREACH(elm, &epair_dpcpu->epair_ifp_drain_list, ifp_next) 371 if (elm->ifp == ifp) 372 break; 373 /* If the ifp is there already, return success. */ 374 if (elm != NULL) 375 return (0); 376 377 elm = malloc(sizeof(struct epair_ifp_drain), M_EPAIR, M_NOWAIT|M_ZERO); 378 if (elm == NULL) 379 return (ENOMEM); 380 381 elm->ifp = ifp; 382 /* Add a reference for the ifp pointer on the list. */ 383 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 384 STAILQ_INSERT_TAIL(&epair_dpcpu->epair_ifp_drain_list, elm, ifp_next); 385 386 return (0); 387 } 388 389 static void 390 epair_start_locked(struct ifnet *ifp) 391 { 392 struct epair_dpcpu *epair_dpcpu; 393 struct mbuf *m; 394 struct epair_softc *sc; 395 struct ifnet *oifp; 396 int error; 397 398 DPRINTF("ifp=%p\n", ifp); 399 sc = ifp->if_softc; 400 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 401 EPAIR_LOCK_ASSERT(epair_dpcpu); 402 403 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 404 return; 405 if ((ifp->if_flags & IFF_UP) == 0) 406 return; 407 408 /* 409 * We get patckets here from ether_output via if_handoff() 410 * and ned to put them into the input queue of the oifp 411 * and call oifp->if_input() via netisr/epair_sintr(). 412 */ 413 oifp = sc->oifp; 414 sc = oifp->if_softc; 415 for (;;) { 416 IFQ_DEQUEUE(&ifp->if_snd, m); 417 if (m == NULL) 418 break; 419 BPF_MTAP(ifp, m); 420 421 /* 422 * In case the outgoing interface is not usable, 423 * drop the packet. 424 */ 425 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 426 (oifp->if_flags & IFF_UP) ==0) { 427 ifp->if_oerrors++; 428 m_freem(m); 429 continue; 430 } 431 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname); 432 433 /* 434 * Add a reference so the interface cannot go while the 435 * packet is in transit as we rely on rcvif to stay valid. 436 */ 437 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 438 m->m_pkthdr.rcvif = oifp; 439 CURVNET_SET_QUIET(oifp->if_vnet); 440 error = netisr_queue(NETISR_EPAIR, m); 441 CURVNET_RESTORE(); 442 if (!error) { 443 ifp->if_opackets++; 444 /* Someone else received the packet. */ 445 oifp->if_ipackets++; 446 } else { 447 /* The packet was freed already. */ 448 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 449 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 450 (void) epair_add_ifp_for_draining(ifp); 451 ifp->if_oerrors++; 452 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 453 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 454 ("%s: ifp=%p sc->refcount not >= 1: %d", 455 __func__, oifp, sc->refcount)); 456 } 457 } 458 } 459 460 static void 461 epair_start(struct ifnet *ifp) 462 { 463 struct epair_dpcpu *epair_dpcpu; 464 465 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu); 466 EPAIR_LOCK(epair_dpcpu); 467 epair_start_locked(ifp); 468 EPAIR_UNLOCK(epair_dpcpu); 469 } 470 471 static int 472 epair_transmit_locked(struct ifnet *ifp, struct mbuf *m) 473 { 474 struct epair_dpcpu *epair_dpcpu; 475 struct epair_softc *sc; 476 struct ifnet *oifp; 477 int error, len; 478 short mflags; 479 480 DPRINTF("ifp=%p m=%p\n", ifp, m); 481 sc = ifp->if_softc; 482 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 483 EPAIR_LOCK_ASSERT(epair_dpcpu); 484 485 if (m == NULL) 486 return (0); 487 488 /* 489 * We are not going to use the interface en/dequeue mechanism 490 * on the TX side. We are called from ether_output_frame() 491 * and will put the packet into the incoming queue of the 492 * other interface of our pair via the netsir. 493 */ 494 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 495 m_freem(m); 496 return (ENXIO); 497 } 498 if ((ifp->if_flags & IFF_UP) == 0) { 499 m_freem(m); 500 return (ENETDOWN); 501 } 502 503 BPF_MTAP(ifp, m); 504 505 /* 506 * In case the outgoing interface is not usable, 507 * drop the packet. 508 */ 509 oifp = sc->oifp; 510 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 511 (oifp->if_flags & IFF_UP) ==0) { 512 ifp->if_oerrors++; 513 m_freem(m); 514 return (0); 515 } 516 len = m->m_pkthdr.len; 517 mflags = m->m_flags; 518 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname); 519 520 #ifdef ALTQ 521 /* Support ALTQ via the clasic if_start() path. */ 522 IF_LOCK(&ifp->if_snd); 523 if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 524 ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error); 525 if (error) 526 ifp->if_snd.ifq_drops++; 527 IF_UNLOCK(&ifp->if_snd); 528 if (!error) { 529 ifp->if_obytes += len; 530 if (mflags & (M_BCAST|M_MCAST)) 531 ifp->if_omcasts++; 532 533 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) 534 epair_start_locked(ifp); 535 else 536 (void)epair_add_ifp_for_draining(ifp); 537 } 538 return (error); 539 } 540 IF_UNLOCK(&ifp->if_snd); 541 #endif 542 543 if ((epair_dpcpu->epair_drv_flags & IFF_DRV_OACTIVE) != 0) { 544 /* 545 * Our hardware queue is full, try to fall back 546 * queuing to the ifq but do not call ifp->if_start. 547 * Either we are lucky or the packet is gone. 548 */ 549 IFQ_ENQUEUE(&ifp->if_snd, m, error); 550 if (!error) 551 (void)epair_add_ifp_for_draining(ifp); 552 return (error); 553 } 554 sc = oifp->if_softc; 555 /* 556 * Add a reference so the interface cannot go while the 557 * packet is in transit as we rely on rcvif to stay valid. 558 */ 559 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 560 m->m_pkthdr.rcvif = oifp; 561 CURVNET_SET_QUIET(oifp->if_vnet); 562 error = netisr_queue(NETISR_EPAIR, m); 563 CURVNET_RESTORE(); 564 if (!error) { 565 ifp->if_opackets++; 566 /* 567 * IFQ_HANDOFF_ADJ/ip_handoff() update statistics, 568 * but as we bypass all this we have to duplicate 569 * the logic another time. 570 */ 571 ifp->if_obytes += len; 572 if (mflags & (M_BCAST|M_MCAST)) 573 ifp->if_omcasts++; 574 /* Someone else received the packet. */ 575 oifp->if_ipackets++; 576 } else { 577 /* The packet was freed already. */ 578 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 579 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 580 ifp->if_oerrors++; 581 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 582 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 583 ("%s: ifp=%p sc->refcount not >= 1: %d", 584 __func__, oifp, sc->refcount)); 585 } 586 587 return (error); 588 } 589 590 static int 591 epair_transmit(struct ifnet *ifp, struct mbuf *m) 592 { 593 struct epair_dpcpu *epair_dpcpu; 594 int error; 595 596 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu); 597 EPAIR_LOCK(epair_dpcpu); 598 error = epair_transmit_locked(ifp, m); 599 EPAIR_UNLOCK(epair_dpcpu); 600 return (error); 601 } 602 603 static void 604 epair_qflush(struct ifnet *ifp) 605 { 606 struct epair_softc *sc; 607 608 sc = ifp->if_softc; 609 KASSERT(sc != NULL, ("%s: ifp=%p, epair_softc gone? sc=%p\n", 610 __func__, ifp, sc)); 611 /* 612 * Remove this ifp from all backpointer lists. The interface will not 613 * usable for flushing anyway nor should it have anything to flush 614 * after if_qflush(). 615 */ 616 epair_remove_ifp_from_draining(ifp); 617 618 if (sc->if_qflush) 619 sc->if_qflush(ifp); 620 } 621 622 static int 623 epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 624 { 625 struct ifreq *ifr; 626 int error; 627 628 ifr = (struct ifreq *)data; 629 switch (cmd) { 630 case SIOCSIFFLAGS: 631 case SIOCADDMULTI: 632 case SIOCDELMULTI: 633 error = 0; 634 break; 635 636 case SIOCSIFMTU: 637 /* We basically allow all kinds of MTUs. */ 638 ifp->if_mtu = ifr->ifr_mtu; 639 error = 0; 640 break; 641 642 default: 643 /* Let the common ethernet handler process this. */ 644 error = ether_ioctl(ifp, cmd, data); 645 break; 646 } 647 648 return (error); 649 } 650 651 static void 652 epair_init(void *dummy __unused) 653 { 654 } 655 656 657 /* 658 * Interface cloning functions. 659 * We use our private ones so that we can create/destroy our secondary 660 * device along with the primary one. 661 */ 662 static int 663 epair_clone_match(struct if_clone *ifc, const char *name) 664 { 665 const char *cp; 666 667 DPRINTF("name='%s'\n", name); 668 669 /* 670 * Our base name is epair. 671 * Our interfaces will be named epair<n>[ab]. 672 * So accept anything of the following list: 673 * - epair 674 * - epair<n> 675 * but not the epair<n>[ab] versions. 676 */ 677 if (strncmp(EPAIRNAME, name, sizeof(EPAIRNAME)-1) != 0) 678 return (0); 679 680 for (cp = name + sizeof(EPAIRNAME) - 1; *cp != '\0'; cp++) { 681 if (*cp < '0' || *cp > '9') 682 return (0); 683 } 684 685 return (1); 686 } 687 688 static int 689 epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) 690 { 691 struct epair_softc *sca, *scb; 692 struct ifnet *ifp; 693 char *dp; 694 int error, unit, wildcard; 695 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ 696 697 /* 698 * We are abusing params to create our second interface. 699 * Actually we already created it and called if_clone_createif() 700 * for it to do the official insertion procedure the moment we knew 701 * it cannot fail anymore. So just do attach it here. 702 */ 703 if (params) { 704 scb = (struct epair_softc *)params; 705 ifp = scb->ifp; 706 /* Assign a hopefully unique, locally administered etheraddr. */ 707 eaddr[0] = 0x02; 708 eaddr[3] = (ifp->if_index >> 8) & 0xff; 709 eaddr[4] = ifp->if_index & 0xff; 710 eaddr[5] = 0x0b; 711 ether_ifattach(ifp, eaddr); 712 /* Correctly set the name for the cloner list. */ 713 strlcpy(name, scb->ifp->if_xname, len); 714 return (0); 715 } 716 717 /* Try to see if a special unit was requested. */ 718 error = ifc_name2unit(name, &unit); 719 if (error != 0) 720 return (error); 721 wildcard = (unit < 0); 722 723 error = ifc_alloc_unit(ifc, &unit); 724 if (error != 0) 725 return (error); 726 727 /* 728 * If no unit had been given, we need to adjust the ifName. 729 * Also make sure there is space for our extra [ab] suffix. 730 */ 731 for (dp = name; *dp != '\0'; dp++); 732 if (wildcard) { 733 error = snprintf(dp, len - (dp - name), "%d", unit); 734 if (error > len - (dp - name) - 1) { 735 /* ifName too long. */ 736 ifc_free_unit(ifc, unit); 737 return (ENOSPC); 738 } 739 dp += error; 740 } 741 if (len - (dp - name) - 1 < 1) { 742 /* No space left for our [ab] suffix. */ 743 ifc_free_unit(ifc, unit); 744 return (ENOSPC); 745 } 746 *dp = 'a'; 747 /* Must not change dp so we can replace 'a' by 'b' later. */ 748 *(dp+1) = '\0'; 749 750 /* Allocate memory for both [ab] interfaces */ 751 sca = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 752 EPAIR_REFCOUNT_INIT(&sca->refcount, 1); 753 sca->ifp = if_alloc(IFT_ETHER); 754 if (sca->ifp == NULL) { 755 free(sca, M_EPAIR); 756 ifc_free_unit(ifc, unit); 757 return (ENOSPC); 758 } 759 760 scb = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 761 EPAIR_REFCOUNT_INIT(&scb->refcount, 1); 762 scb->ifp = if_alloc(IFT_ETHER); 763 if (scb->ifp == NULL) { 764 free(scb, M_EPAIR); 765 if_free(sca->ifp); 766 free(sca, M_EPAIR); 767 ifc_free_unit(ifc, unit); 768 return (ENOSPC); 769 } 770 771 /* 772 * Cross-reference the interfaces so we will be able to free both. 773 */ 774 sca->oifp = scb->ifp; 775 scb->oifp = sca->ifp; 776 777 /* 778 * Calculate the cpuid for netisr queueing based on the 779 * ifIndex of the interfaces. As long as we cannot configure 780 * this or use cpuset information easily we cannot guarantee 781 * cache locality but we can at least allow parallelism. 782 */ 783 sca->cpuid = 784 netisr_get_cpuid(sca->ifp->if_index % netisr_get_cpucount()); 785 scb->cpuid = 786 netisr_get_cpuid(scb->ifp->if_index % netisr_get_cpucount()); 787 788 /* Finish initialization of interface <n>a. */ 789 ifp = sca->ifp; 790 ifp->if_softc = sca; 791 strlcpy(ifp->if_xname, name, IFNAMSIZ); 792 ifp->if_dname = ifc->ifc_name; 793 ifp->if_dunit = unit; 794 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 795 ifp->if_start = epair_start; 796 ifp->if_ioctl = epair_ioctl; 797 ifp->if_init = epair_init; 798 ifp->if_snd.ifq_maxlen = ifqmaxlen; 799 /* Assign a hopefully unique, locally administered etheraddr. */ 800 eaddr[0] = 0x02; 801 eaddr[3] = (ifp->if_index >> 8) & 0xff; 802 eaddr[4] = ifp->if_index & 0xff; 803 eaddr[5] = 0x0a; 804 ether_ifattach(ifp, eaddr); 805 sca->if_qflush = ifp->if_qflush; 806 ifp->if_qflush = epair_qflush; 807 ifp->if_transmit = epair_transmit; 808 ifp->if_baudrate = IF_Gbps(10UL); /* arbitrary maximum */ 809 810 /* Swap the name and finish initialization of interface <n>b. */ 811 *dp = 'b'; 812 813 ifp = scb->ifp; 814 ifp->if_softc = scb; 815 strlcpy(ifp->if_xname, name, IFNAMSIZ); 816 ifp->if_dname = ifc->ifc_name; 817 ifp->if_dunit = unit; 818 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 819 ifp->if_start = epair_start; 820 ifp->if_ioctl = epair_ioctl; 821 ifp->if_init = epair_init; 822 ifp->if_snd.ifq_maxlen = ifqmaxlen; 823 /* We need to play some tricks here for the second interface. */ 824 strlcpy(name, EPAIRNAME, len); 825 error = if_clone_create(name, len, (caddr_t)scb); 826 if (error) 827 panic("%s: if_clone_createif() for our 2nd iface failed: %d", 828 __func__, error); 829 scb->if_qflush = ifp->if_qflush; 830 ifp->if_qflush = epair_qflush; 831 ifp->if_transmit = epair_transmit; 832 ifp->if_baudrate = IF_Gbps(10UL); /* arbitrary maximum */ 833 834 /* 835 * Restore name to <n>a as the ifp for this will go into the 836 * cloner list for the initial call. 837 */ 838 strlcpy(name, sca->ifp->if_xname, len); 839 DPRINTF("name='%s/%db' created sca=%p scb=%p\n", name, unit, sca, scb); 840 841 /* Tell the world, that we are ready to rock. */ 842 sca->ifp->if_drv_flags |= IFF_DRV_RUNNING; 843 scb->ifp->if_drv_flags |= IFF_DRV_RUNNING; 844 845 return (0); 846 } 847 848 static int 849 epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) 850 { 851 struct ifnet *oifp; 852 struct epair_softc *sca, *scb; 853 int unit, error; 854 855 DPRINTF("ifp=%p\n", ifp); 856 857 /* 858 * In case we called into if_clone_destroyif() ourselves 859 * again to remove the second interface, the softc will be 860 * NULL. In that case so not do anything but return success. 861 */ 862 if (ifp->if_softc == NULL) 863 return (0); 864 865 unit = ifp->if_dunit; 866 sca = ifp->if_softc; 867 oifp = sca->oifp; 868 scb = oifp->if_softc; 869 870 DPRINTF("ifp=%p oifp=%p\n", ifp, oifp); 871 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 872 oifp->if_drv_flags &= ~IFF_DRV_RUNNING; 873 ether_ifdetach(oifp); 874 ether_ifdetach(ifp); 875 /* 876 * Wait for all packets to be dispatched to if_input. 877 * The numbers can only go down as the interfaces are 878 * detached so there is no need to use atomics. 879 */ 880 DPRINTF("sca refcnt=%u scb refcnt=%u\n", sca->refcount, scb->refcount); 881 EPAIR_REFCOUNT_ASSERT(sca->refcount == 1 && scb->refcount == 1, 882 ("%s: ifp=%p sca->refcount!=1: %d || ifp=%p scb->refcount!=1: %d", 883 __func__, ifp, sca->refcount, oifp, scb->refcount)); 884 885 /* 886 * Get rid of our second half. 887 */ 888 oifp->if_softc = NULL; 889 error = if_clone_destroyif(ifc, oifp); 890 if (error) 891 panic("%s: if_clone_destroyif() for our 2nd iface failed: %d", 892 __func__, error); 893 894 /* 895 * Finish cleaning up. Free them and release the unit. 896 * As the other of the two interfaces my reside in a different vnet, 897 * we need to switch before freeing them. 898 */ 899 CURVNET_SET_QUIET(oifp->if_vnet); 900 if_free_type(oifp, IFT_ETHER); 901 CURVNET_RESTORE(); 902 if_free_type(ifp, IFT_ETHER); 903 free(scb, M_EPAIR); 904 free(sca, M_EPAIR); 905 ifc_free_unit(ifc, unit); 906 907 return (0); 908 } 909 910 static int 911 epair_modevent(module_t mod, int type, void *data) 912 { 913 int qlimit; 914 915 switch (type) { 916 case MOD_LOAD: 917 /* For now limit us to one global mutex and one inq. */ 918 epair_dpcpu_init(); 919 epair_nh.nh_qlimit = 42 * ifqmaxlen; /* 42 shall be the number. */ 920 if (TUNABLE_INT_FETCH("net.link.epair.netisr_maxqlen", &qlimit)) 921 epair_nh.nh_qlimit = qlimit; 922 netisr_register(&epair_nh); 923 if_clone_attach(&epair_cloner); 924 if (bootverbose) 925 printf("%s initialized.\n", EPAIRNAME); 926 break; 927 case MOD_UNLOAD: 928 if_clone_detach(&epair_cloner); 929 netisr_unregister(&epair_nh); 930 epair_dpcpu_detach(); 931 if (bootverbose) 932 printf("%s unloaded.\n", EPAIRNAME); 933 break; 934 default: 935 return (EOPNOTSUPP); 936 } 937 return (0); 938 } 939 940 static moduledata_t epair_mod = { 941 "if_epair", 942 epair_modevent, 943 0 944 }; 945 946 DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 947 MODULE_VERSION(if_epair, 1); 948