1 /*- 2 * Copyright (c) 2008 The FreeBSD Foundation 3 * Copyright (c) 2009-2010 Bjoern A. Zeeb <bz@FreeBSD.org> 4 * All rights reserved. 5 * 6 * This software was developed by CK Software GmbH under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 /* 32 * A pair of virtual back-to-back connected ethernet like interfaces 33 * (``two interfaces with a virtual cross-over cable''). 34 * 35 * This is mostly intended to be used to provide connectivity between 36 * different virtual network stack instances. 37 */ 38 /* 39 * Things to re-think once we have more experience: 40 * - ifp->if_reassign function once we can test with vimage. Depending on 41 * how if_vmove() is going to be improved. 42 * - Real random etheraddrs that are checked to be uniquish; we would need 43 * to re-do them in case we move the interface between network stacks 44 * in a private if_reassign function. 45 * In case we bridge to a real interface/network or between indepedent 46 * epairs on multiple stacks/machines, we may need this. 47 * For now let the user handle that case. 48 */ 49 50 #include <sys/cdefs.h> 51 __FBSDID("$FreeBSD$"); 52 53 #include <sys/param.h> 54 #include <sys/kernel.h> 55 #include <sys/mbuf.h> 56 #include <sys/module.h> 57 #include <sys/refcount.h> 58 #include <sys/queue.h> 59 #include <sys/smp.h> 60 #include <sys/socket.h> 61 #include <sys/sockio.h> 62 #include <sys/sysctl.h> 63 #include <sys/types.h> 64 65 #include <net/bpf.h> 66 #include <net/ethernet.h> 67 #include <net/if.h> 68 #include <net/if_clone.h> 69 #include <net/if_var.h> 70 #include <net/if_types.h> 71 #include <net/netisr.h> 72 #include <net/vnet.h> 73 74 #define EPAIRNAME "epair" 75 76 SYSCTL_DECL(_net_link); 77 SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW, 0, "epair sysctl"); 78 79 #ifdef EPAIR_DEBUG 80 static int epair_debug = 0; 81 SYSCTL_XINT(_net_link_epair, OID_AUTO, epair_debug, CTLFLAG_RW, 82 &epair_debug, 0, "if_epair(4) debugging."); 83 #define DPRINTF(fmt, arg...) \ 84 if (epair_debug) \ 85 printf("[%s:%d] " fmt, __func__, __LINE__, ##arg) 86 #else 87 #define DPRINTF(fmt, arg...) 88 #endif 89 90 static void epair_nh_sintr(struct mbuf *); 91 static struct mbuf *epair_nh_m2cpuid(struct mbuf *, uintptr_t, u_int *); 92 static void epair_nh_drainedcpu(u_int); 93 94 static void epair_start_locked(struct ifnet *); 95 96 static int epair_clone_match(struct if_clone *, const char *); 97 static int epair_clone_create(struct if_clone *, char *, size_t, caddr_t); 98 static int epair_clone_destroy(struct if_clone *, struct ifnet *); 99 100 /* Netisr realted definitions and sysctl. */ 101 static struct netisr_handler epair_nh = { 102 .nh_name = EPAIRNAME, 103 .nh_proto = NETISR_EPAIR, 104 .nh_policy = NETISR_POLICY_CPU, 105 .nh_handler = epair_nh_sintr, 106 .nh_m2cpuid = epair_nh_m2cpuid, 107 .nh_drainedcpu = epair_nh_drainedcpu, 108 }; 109 110 static int 111 sysctl_epair_netisr_maxqlen(SYSCTL_HANDLER_ARGS) 112 { 113 int error, qlimit; 114 115 netisr_getqlimit(&epair_nh, &qlimit); 116 error = sysctl_handle_int(oidp, &qlimit, 0, req); 117 if (error || !req->newptr) 118 return (error); 119 if (qlimit < 1) 120 return (EINVAL); 121 return (netisr_setqlimit(&epair_nh, qlimit)); 122 } 123 SYSCTL_PROC(_net_link_epair, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW, 124 0, 0, sysctl_epair_netisr_maxqlen, "I", 125 "Maximum if_epair(4) netisr \"hw\" queue length"); 126 127 struct epair_softc { 128 struct ifnet *ifp; /* This ifp. */ 129 struct ifnet *oifp; /* other ifp of pair. */ 130 u_int refcount; /* # of mbufs in flight. */ 131 u_int cpuid; /* CPU ID assigned upon creation. */ 132 void (*if_qflush)(struct ifnet *); 133 /* Original if_qflush routine. */ 134 }; 135 136 /* 137 * Per-CPU list of ifps with data in the ifq that needs to be flushed 138 * to the netisr ``hw'' queue before we allow any further direct queuing 139 * to the ``hw'' queue. 140 */ 141 struct epair_ifp_drain { 142 STAILQ_ENTRY(epair_ifp_drain) ifp_next; 143 struct ifnet *ifp; 144 }; 145 STAILQ_HEAD(eid_list, epair_ifp_drain); 146 147 #define EPAIR_LOCK_INIT(dpcpu) mtx_init(&(dpcpu)->if_epair_mtx, \ 148 "if_epair", NULL, MTX_DEF) 149 #define EPAIR_LOCK_DESTROY(dpcpu) mtx_destroy(&(dpcpu)->if_epair_mtx) 150 #define EPAIR_LOCK_ASSERT(dpcpu) mtx_assert(&(dpcpu)->if_epair_mtx, \ 151 MA_OWNED) 152 #define EPAIR_LOCK(dpcpu) mtx_lock(&(dpcpu)->if_epair_mtx) 153 #define EPAIR_UNLOCK(dpcpu) mtx_unlock(&(dpcpu)->if_epair_mtx) 154 155 #ifdef INVARIANTS 156 #define EPAIR_REFCOUNT_INIT(r, v) refcount_init((r), (v)) 157 #define EPAIR_REFCOUNT_AQUIRE(r) refcount_acquire((r)) 158 #define EPAIR_REFCOUNT_RELEASE(r) refcount_release((r)) 159 #define EPAIR_REFCOUNT_ASSERT(a, p) KASSERT(a, p) 160 #else 161 #define EPAIR_REFCOUNT_INIT(r, v) 162 #define EPAIR_REFCOUNT_AQUIRE(r) 163 #define EPAIR_REFCOUNT_RELEASE(r) 164 #define EPAIR_REFCOUNT_ASSERT(a, p) 165 #endif 166 167 static MALLOC_DEFINE(M_EPAIR, EPAIRNAME, 168 "Pair of virtual cross-over connected Ethernet-like interfaces"); 169 170 static struct if_clone epair_cloner = IFC_CLONE_INITIALIZER( 171 EPAIRNAME, NULL, IF_MAXUNIT, 172 NULL, epair_clone_match, epair_clone_create, epair_clone_destroy); 173 174 /* 175 * DPCPU area and functions. 176 */ 177 struct epair_dpcpu { 178 struct mtx if_epair_mtx; /* Per-CPU locking. */ 179 int epair_drv_flags; /* Per-CPU ``hw'' drv flags. */ 180 struct eid_list epair_ifp_drain_list; /* Per-CPU list of ifps with 181 * data in the ifq. */ 182 }; 183 DPCPU_DEFINE(struct epair_dpcpu, epair_dpcpu); 184 185 static void 186 epair_dpcpu_init(void) 187 { 188 struct epair_dpcpu *epair_dpcpu; 189 struct eid_list *s; 190 u_int cpuid; 191 192 CPU_FOREACH(cpuid) { 193 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 194 195 /* Initialize per-cpu lock. */ 196 EPAIR_LOCK_INIT(epair_dpcpu); 197 198 /* Driver flags are per-cpu as are our netisr "hw" queues. */ 199 epair_dpcpu->epair_drv_flags = 0; 200 201 /* 202 * Initialize per-cpu drain list. 203 * Manually do what STAILQ_HEAD_INITIALIZER would do. 204 */ 205 s = &epair_dpcpu->epair_ifp_drain_list; 206 s->stqh_first = NULL; 207 s->stqh_last = &s->stqh_first; 208 } 209 } 210 211 static void 212 epair_dpcpu_detach(void) 213 { 214 struct epair_dpcpu *epair_dpcpu; 215 u_int cpuid; 216 217 CPU_FOREACH(cpuid) { 218 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 219 220 /* Destroy per-cpu lock. */ 221 EPAIR_LOCK_DESTROY(epair_dpcpu); 222 } 223 } 224 225 /* 226 * Helper functions. 227 */ 228 static u_int 229 cpuid_from_ifp(struct ifnet *ifp) 230 { 231 struct epair_softc *sc; 232 233 if (ifp == NULL) 234 return (0); 235 sc = ifp->if_softc; 236 237 return (sc->cpuid); 238 } 239 240 /* 241 * Netisr handler functions. 242 */ 243 static void 244 epair_nh_sintr(struct mbuf *m) 245 { 246 struct ifnet *ifp; 247 struct epair_softc *sc; 248 249 ifp = m->m_pkthdr.rcvif; 250 (*ifp->if_input)(ifp, m); 251 sc = ifp->if_softc; 252 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 253 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 254 ("%s: ifp=%p sc->refcount not >= 1: %d", 255 __func__, ifp, sc->refcount)); 256 DPRINTF("ifp=%p refcount=%u\n", ifp, sc->refcount); 257 } 258 259 static struct mbuf * 260 epair_nh_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid) 261 { 262 263 *cpuid = cpuid_from_ifp(m->m_pkthdr.rcvif); 264 265 return (m); 266 } 267 268 static void 269 epair_nh_drainedcpu(u_int cpuid) 270 { 271 struct epair_dpcpu *epair_dpcpu; 272 struct epair_ifp_drain *elm, *tvar; 273 struct ifnet *ifp; 274 275 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 276 EPAIR_LOCK(epair_dpcpu); 277 /* 278 * Assume our "hw" queue and possibly ifq will be emptied 279 * again. In case we will overflow the "hw" queue while 280 * draining, epair_start_locked will set IFF_DRV_OACTIVE 281 * again and we will stop and return. 282 */ 283 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list, 284 ifp_next, tvar) { 285 ifp = elm->ifp; 286 epair_dpcpu->epair_drv_flags &= ~IFF_DRV_OACTIVE; 287 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 288 epair_start_locked(ifp); 289 290 IFQ_LOCK(&ifp->if_snd); 291 if (IFQ_IS_EMPTY(&ifp->if_snd)) { 292 struct epair_softc *sc; 293 294 STAILQ_REMOVE(&epair_dpcpu->epair_ifp_drain_list, 295 elm, epair_ifp_drain, ifp_next); 296 /* The cached ifp goes off the list. */ 297 sc = ifp->if_softc; 298 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 299 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 300 ("%s: ifp=%p sc->refcount not >= 1: %d", 301 __func__, ifp, sc->refcount)); 302 free(elm, M_EPAIR); 303 } 304 IFQ_UNLOCK(&ifp->if_snd); 305 306 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) != 0) { 307 /* Our "hw"q overflew again. */ 308 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE 309 DPRINTF("hw queue length overflow at %u\n", 310 epair_nh.nh_qlimit); 311 break; 312 } 313 } 314 EPAIR_UNLOCK(epair_dpcpu); 315 } 316 317 /* 318 * Network interface (`if') related functions. 319 */ 320 static void 321 epair_remove_ifp_from_draining(struct ifnet *ifp) 322 { 323 struct epair_dpcpu *epair_dpcpu; 324 struct epair_ifp_drain *elm, *tvar; 325 u_int cpuid; 326 327 CPU_FOREACH(cpuid) { 328 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 329 EPAIR_LOCK(epair_dpcpu); 330 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list, 331 ifp_next, tvar) { 332 if (ifp == elm->ifp) { 333 struct epair_softc *sc; 334 335 STAILQ_REMOVE( 336 &epair_dpcpu->epair_ifp_drain_list, elm, 337 epair_ifp_drain, ifp_next); 338 /* The cached ifp goes off the list. */ 339 sc = ifp->if_softc; 340 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 341 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 342 ("%s: ifp=%p sc->refcount not >= 1: %d", 343 __func__, ifp, sc->refcount)); 344 free(elm, M_EPAIR); 345 } 346 } 347 EPAIR_UNLOCK(epair_dpcpu); 348 } 349 } 350 351 static int 352 epair_add_ifp_for_draining(struct ifnet *ifp) 353 { 354 struct epair_dpcpu *epair_dpcpu; 355 struct epair_softc *sc; 356 struct epair_ifp_drain *elm = NULL; 357 358 sc = ifp->if_softc; 359 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 360 EPAIR_LOCK_ASSERT(epair_dpcpu); 361 STAILQ_FOREACH(elm, &epair_dpcpu->epair_ifp_drain_list, ifp_next) 362 if (elm->ifp == ifp) 363 break; 364 /* If the ifp is there already, return success. */ 365 if (elm != NULL) 366 return (0); 367 368 elm = malloc(sizeof(struct epair_ifp_drain), M_EPAIR, M_NOWAIT|M_ZERO); 369 if (elm == NULL) 370 return (ENOMEM); 371 372 elm->ifp = ifp; 373 /* Add a reference for the ifp pointer on the list. */ 374 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 375 STAILQ_INSERT_TAIL(&epair_dpcpu->epair_ifp_drain_list, elm, ifp_next); 376 377 return (0); 378 } 379 380 static void 381 epair_start_locked(struct ifnet *ifp) 382 { 383 struct epair_dpcpu *epair_dpcpu; 384 struct mbuf *m; 385 struct epair_softc *sc; 386 struct ifnet *oifp; 387 int error; 388 389 DPRINTF("ifp=%p\n", ifp); 390 sc = ifp->if_softc; 391 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 392 EPAIR_LOCK_ASSERT(epair_dpcpu); 393 394 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 395 return; 396 if ((ifp->if_flags & IFF_UP) == 0) 397 return; 398 399 /* 400 * We get patckets here from ether_output via if_handoff() 401 * and ned to put them into the input queue of the oifp 402 * and call oifp->if_input() via netisr/epair_sintr(). 403 */ 404 oifp = sc->oifp; 405 sc = oifp->if_softc; 406 for (;;) { 407 IFQ_DEQUEUE(&ifp->if_snd, m); 408 if (m == NULL) 409 break; 410 BPF_MTAP(ifp, m); 411 412 /* 413 * In case the outgoing interface is not usable, 414 * drop the packet. 415 */ 416 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 417 (oifp->if_flags & IFF_UP) ==0) { 418 ifp->if_oerrors++; 419 m_freem(m); 420 continue; 421 } 422 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname); 423 424 /* 425 * Add a reference so the interface cannot go while the 426 * packet is in transit as we rely on rcvif to stay valid. 427 */ 428 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 429 m->m_pkthdr.rcvif = oifp; 430 CURVNET_SET_QUIET(oifp->if_vnet); 431 error = netisr_queue(NETISR_EPAIR, m); 432 CURVNET_RESTORE(); 433 if (!error) { 434 ifp->if_opackets++; 435 /* Someone else received the packet. */ 436 oifp->if_ipackets++; 437 } else { 438 /* The packet was freed already. */ 439 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 440 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 441 (void) epair_add_ifp_for_draining(ifp); 442 ifp->if_oerrors++; 443 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 444 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 445 ("%s: ifp=%p sc->refcount not >= 1: %d", 446 __func__, oifp, sc->refcount)); 447 } 448 } 449 } 450 451 static void 452 epair_start(struct ifnet *ifp) 453 { 454 struct epair_dpcpu *epair_dpcpu; 455 456 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu); 457 EPAIR_LOCK(epair_dpcpu); 458 epair_start_locked(ifp); 459 EPAIR_UNLOCK(epair_dpcpu); 460 } 461 462 static int 463 epair_transmit_locked(struct ifnet *ifp, struct mbuf *m) 464 { 465 struct epair_dpcpu *epair_dpcpu; 466 struct epair_softc *sc; 467 struct ifnet *oifp; 468 int error, len; 469 short mflags; 470 471 DPRINTF("ifp=%p m=%p\n", ifp, m); 472 sc = ifp->if_softc; 473 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 474 EPAIR_LOCK_ASSERT(epair_dpcpu); 475 476 if (m == NULL) 477 return (0); 478 479 /* 480 * We are not going to use the interface en/dequeue mechanism 481 * on the TX side. We are called from ether_output_frame() 482 * and will put the packet into the incoming queue of the 483 * other interface of our pair via the netsir. 484 */ 485 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 486 m_freem(m); 487 return (ENXIO); 488 } 489 if ((ifp->if_flags & IFF_UP) == 0) { 490 m_freem(m); 491 return (ENETDOWN); 492 } 493 494 BPF_MTAP(ifp, m); 495 496 /* 497 * In case the outgoing interface is not usable, 498 * drop the packet. 499 */ 500 oifp = sc->oifp; 501 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 502 (oifp->if_flags & IFF_UP) ==0) { 503 ifp->if_oerrors++; 504 m_freem(m); 505 return (0); 506 } 507 len = m->m_pkthdr.len; 508 mflags = m->m_flags; 509 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname); 510 511 #ifdef ALTQ 512 /* Support ALTQ via the clasic if_start() path. */ 513 IF_LOCK(&ifp->if_snd); 514 if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 515 ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error); 516 if (error) 517 ifp->if_snd.ifq_drops++; 518 IF_UNLOCK(&ifp->if_snd); 519 if (!error) { 520 ifp->if_obytes += len; 521 if (mflags & (M_BCAST|M_MCAST)) 522 ifp->if_omcasts++; 523 524 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) 525 epair_start_locked(ifp); 526 else 527 (void)epair_add_ifp_for_draining(ifp); 528 } 529 return (error); 530 } 531 IF_UNLOCK(&ifp->if_snd); 532 #endif 533 534 if ((epair_dpcpu->epair_drv_flags & IFF_DRV_OACTIVE) != 0) { 535 /* 536 * Our hardware queue is full, try to fall back 537 * queuing to the ifq but do not call ifp->if_start. 538 * Either we are lucky or the packet is gone. 539 */ 540 IFQ_ENQUEUE(&ifp->if_snd, m, error); 541 if (!error) 542 (void)epair_add_ifp_for_draining(ifp); 543 return (error); 544 } 545 sc = oifp->if_softc; 546 /* 547 * Add a reference so the interface cannot go while the 548 * packet is in transit as we rely on rcvif to stay valid. 549 */ 550 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 551 m->m_pkthdr.rcvif = oifp; 552 CURVNET_SET_QUIET(oifp->if_vnet); 553 error = netisr_queue(NETISR_EPAIR, m); 554 CURVNET_RESTORE(); 555 if (!error) { 556 ifp->if_opackets++; 557 /* 558 * IFQ_HANDOFF_ADJ/ip_handoff() update statistics, 559 * but as we bypass all this we have to duplicate 560 * the logic another time. 561 */ 562 ifp->if_obytes += len; 563 if (mflags & (M_BCAST|M_MCAST)) 564 ifp->if_omcasts++; 565 /* Someone else received the packet. */ 566 oifp->if_ipackets++; 567 } else { 568 /* The packet was freed already. */ 569 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 570 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 571 ifp->if_oerrors++; 572 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 573 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 574 ("%s: ifp=%p sc->refcount not >= 1: %d", 575 __func__, oifp, sc->refcount)); 576 } 577 578 return (error); 579 } 580 581 static int 582 epair_transmit(struct ifnet *ifp, struct mbuf *m) 583 { 584 struct epair_dpcpu *epair_dpcpu; 585 int error; 586 587 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu); 588 EPAIR_LOCK(epair_dpcpu); 589 error = epair_transmit_locked(ifp, m); 590 EPAIR_UNLOCK(epair_dpcpu); 591 return (error); 592 } 593 594 static void 595 epair_qflush(struct ifnet *ifp) 596 { 597 struct epair_softc *sc; 598 599 sc = ifp->if_softc; 600 KASSERT(sc != NULL, ("%s: ifp=%p, epair_softc gone? sc=%p\n", 601 __func__, ifp, sc)); 602 /* 603 * Remove this ifp from all backpointer lists. The interface will not 604 * usable for flushing anyway nor should it have anything to flush 605 * after if_qflush(). 606 */ 607 epair_remove_ifp_from_draining(ifp); 608 609 if (sc->if_qflush) 610 sc->if_qflush(ifp); 611 } 612 613 static int 614 epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 615 { 616 struct ifreq *ifr; 617 int error; 618 619 ifr = (struct ifreq *)data; 620 switch (cmd) { 621 case SIOCSIFFLAGS: 622 case SIOCADDMULTI: 623 case SIOCDELMULTI: 624 error = 0; 625 break; 626 627 case SIOCSIFMTU: 628 /* We basically allow all kinds of MTUs. */ 629 ifp->if_mtu = ifr->ifr_mtu; 630 error = 0; 631 break; 632 633 default: 634 /* Let the common ethernet handler process this. */ 635 error = ether_ioctl(ifp, cmd, data); 636 break; 637 } 638 639 return (error); 640 } 641 642 static void 643 epair_init(void *dummy __unused) 644 { 645 } 646 647 648 /* 649 * Interface cloning functions. 650 * We use our private ones so that we can create/destroy our secondary 651 * device along with the primary one. 652 */ 653 static int 654 epair_clone_match(struct if_clone *ifc, const char *name) 655 { 656 const char *cp; 657 658 DPRINTF("name='%s'\n", name); 659 660 /* 661 * Our base name is epair. 662 * Our interfaces will be named epair<n>[ab]. 663 * So accept anything of the following list: 664 * - epair 665 * - epair<n> 666 * but not the epair<n>[ab] versions. 667 */ 668 if (strncmp(EPAIRNAME, name, sizeof(EPAIRNAME)-1) != 0) 669 return (0); 670 671 for (cp = name + sizeof(EPAIRNAME) - 1; *cp != '\0'; cp++) { 672 if (*cp < '0' || *cp > '9') 673 return (0); 674 } 675 676 return (1); 677 } 678 679 static int 680 epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) 681 { 682 struct epair_softc *sca, *scb; 683 struct ifnet *ifp; 684 char *dp; 685 int error, unit, wildcard; 686 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ 687 688 /* 689 * We are abusing params to create our second interface. 690 * Actually we already created it and called if_clone_createif() 691 * for it to do the official insertion procedure the moment we knew 692 * it cannot fail anymore. So just do attach it here. 693 */ 694 if (params) { 695 scb = (struct epair_softc *)params; 696 ifp = scb->ifp; 697 /* Assign a hopefully unique, locally administered etheraddr. */ 698 eaddr[0] = 0x02; 699 eaddr[3] = (ifp->if_index >> 8) & 0xff; 700 eaddr[4] = ifp->if_index & 0xff; 701 eaddr[5] = 0x0b; 702 ether_ifattach(ifp, eaddr); 703 /* Correctly set the name for the cloner list. */ 704 strlcpy(name, scb->ifp->if_xname, len); 705 return (0); 706 } 707 708 /* Try to see if a special unit was requested. */ 709 error = ifc_name2unit(name, &unit); 710 if (error != 0) 711 return (error); 712 wildcard = (unit < 0); 713 714 error = ifc_alloc_unit(ifc, &unit); 715 if (error != 0) 716 return (error); 717 718 /* 719 * If no unit had been given, we need to adjust the ifName. 720 * Also make sure there is space for our extra [ab] suffix. 721 */ 722 for (dp = name; *dp != '\0'; dp++); 723 if (wildcard) { 724 error = snprintf(dp, len - (dp - name), "%d", unit); 725 if (error > len - (dp - name) - 1) { 726 /* ifName too long. */ 727 ifc_free_unit(ifc, unit); 728 return (ENOSPC); 729 } 730 dp += error; 731 } 732 if (len - (dp - name) - 1 < 1) { 733 /* No space left for our [ab] suffix. */ 734 ifc_free_unit(ifc, unit); 735 return (ENOSPC); 736 } 737 *dp = 'a'; 738 /* Must not change dp so we can replace 'a' by 'b' later. */ 739 *(dp+1) = '\0'; 740 741 /* Allocate memory for both [ab] interfaces */ 742 sca = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 743 EPAIR_REFCOUNT_INIT(&sca->refcount, 1); 744 sca->ifp = if_alloc(IFT_ETHER); 745 if (sca->ifp == NULL) { 746 free(sca, M_EPAIR); 747 ifc_free_unit(ifc, unit); 748 return (ENOSPC); 749 } 750 751 scb = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 752 EPAIR_REFCOUNT_INIT(&scb->refcount, 1); 753 scb->ifp = if_alloc(IFT_ETHER); 754 if (scb->ifp == NULL) { 755 free(scb, M_EPAIR); 756 if_free(sca->ifp); 757 free(sca, M_EPAIR); 758 ifc_free_unit(ifc, unit); 759 return (ENOSPC); 760 } 761 762 /* 763 * Cross-reference the interfaces so we will be able to free both. 764 */ 765 sca->oifp = scb->ifp; 766 scb->oifp = sca->ifp; 767 768 /* 769 * Calculate the cpuid for netisr queueing based on the 770 * ifIndex of the interfaces. As long as we cannot configure 771 * this or use cpuset information easily we cannot guarantee 772 * cache locality but we can at least allow parallelism. 773 */ 774 sca->cpuid = 775 netisr_get_cpuid(sca->ifp->if_index % netisr_get_cpucount()); 776 scb->cpuid = 777 netisr_get_cpuid(scb->ifp->if_index % netisr_get_cpucount()); 778 779 /* Finish initialization of interface <n>a. */ 780 ifp = sca->ifp; 781 ifp->if_softc = sca; 782 strlcpy(ifp->if_xname, name, IFNAMSIZ); 783 ifp->if_dname = ifc->ifc_name; 784 ifp->if_dunit = unit; 785 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 786 ifp->if_start = epair_start; 787 ifp->if_ioctl = epair_ioctl; 788 ifp->if_init = epair_init; 789 ifp->if_snd.ifq_maxlen = ifqmaxlen; 790 /* Assign a hopefully unique, locally administered etheraddr. */ 791 eaddr[0] = 0x02; 792 eaddr[3] = (ifp->if_index >> 8) & 0xff; 793 eaddr[4] = ifp->if_index & 0xff; 794 eaddr[5] = 0x0a; 795 ether_ifattach(ifp, eaddr); 796 sca->if_qflush = ifp->if_qflush; 797 ifp->if_qflush = epair_qflush; 798 ifp->if_transmit = epair_transmit; 799 ifp->if_baudrate = IF_Gbps(10UL); /* arbitrary maximum */ 800 801 /* Swap the name and finish initialization of interface <n>b. */ 802 *dp = 'b'; 803 804 ifp = scb->ifp; 805 ifp->if_softc = scb; 806 strlcpy(ifp->if_xname, name, IFNAMSIZ); 807 ifp->if_dname = ifc->ifc_name; 808 ifp->if_dunit = unit; 809 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 810 ifp->if_start = epair_start; 811 ifp->if_ioctl = epair_ioctl; 812 ifp->if_init = epair_init; 813 ifp->if_snd.ifq_maxlen = ifqmaxlen; 814 /* We need to play some tricks here for the second interface. */ 815 strlcpy(name, EPAIRNAME, len); 816 error = if_clone_create(name, len, (caddr_t)scb); 817 if (error) 818 panic("%s: if_clone_createif() for our 2nd iface failed: %d", 819 __func__, error); 820 scb->if_qflush = ifp->if_qflush; 821 ifp->if_qflush = epair_qflush; 822 ifp->if_transmit = epair_transmit; 823 ifp->if_baudrate = IF_Gbps(10UL); /* arbitrary maximum */ 824 825 /* 826 * Restore name to <n>a as the ifp for this will go into the 827 * cloner list for the initial call. 828 */ 829 strlcpy(name, sca->ifp->if_xname, len); 830 DPRINTF("name='%s/%db' created sca=%p scb=%p\n", name, unit, sca, scb); 831 832 /* Tell the world, that we are ready to rock. */ 833 sca->ifp->if_drv_flags |= IFF_DRV_RUNNING; 834 scb->ifp->if_drv_flags |= IFF_DRV_RUNNING; 835 836 return (0); 837 } 838 839 static int 840 epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) 841 { 842 struct ifnet *oifp; 843 struct epair_softc *sca, *scb; 844 int unit, error; 845 846 DPRINTF("ifp=%p\n", ifp); 847 848 /* 849 * In case we called into if_clone_destroyif() ourselves 850 * again to remove the second interface, the softc will be 851 * NULL. In that case so not do anything but return success. 852 */ 853 if (ifp->if_softc == NULL) 854 return (0); 855 856 unit = ifp->if_dunit; 857 sca = ifp->if_softc; 858 oifp = sca->oifp; 859 scb = oifp->if_softc; 860 861 DPRINTF("ifp=%p oifp=%p\n", ifp, oifp); 862 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 863 oifp->if_drv_flags &= ~IFF_DRV_RUNNING; 864 ether_ifdetach(oifp); 865 ether_ifdetach(ifp); 866 /* 867 * Wait for all packets to be dispatched to if_input. 868 * The numbers can only go down as the interfaces are 869 * detached so there is no need to use atomics. 870 */ 871 DPRINTF("sca refcnt=%u scb refcnt=%u\n", sca->refcount, scb->refcount); 872 EPAIR_REFCOUNT_ASSERT(sca->refcount == 1 && scb->refcount == 1, 873 ("%s: ifp=%p sca->refcount!=1: %d || ifp=%p scb->refcount!=1: %d", 874 __func__, ifp, sca->refcount, oifp, scb->refcount)); 875 876 /* 877 * Get rid of our second half. 878 */ 879 oifp->if_softc = NULL; 880 error = if_clone_destroyif(ifc, oifp); 881 if (error) 882 panic("%s: if_clone_destroyif() for our 2nd iface failed: %d", 883 __func__, error); 884 885 /* 886 * Finish cleaning up. Free them and release the unit. 887 * As the other of the two interfaces my reside in a different vnet, 888 * we need to switch before freeing them. 889 */ 890 CURVNET_SET_QUIET(oifp->if_vnet); 891 if_free_type(oifp, IFT_ETHER); 892 CURVNET_RESTORE(); 893 if_free_type(ifp, IFT_ETHER); 894 free(scb, M_EPAIR); 895 free(sca, M_EPAIR); 896 ifc_free_unit(ifc, unit); 897 898 return (0); 899 } 900 901 static int 902 epair_modevent(module_t mod, int type, void *data) 903 { 904 int qlimit; 905 906 switch (type) { 907 case MOD_LOAD: 908 /* For now limit us to one global mutex and one inq. */ 909 epair_dpcpu_init(); 910 epair_nh.nh_qlimit = 42 * ifqmaxlen; /* 42 shall be the number. */ 911 if (TUNABLE_INT_FETCH("net.link.epair.netisr_maxqlen", &qlimit)) 912 epair_nh.nh_qlimit = qlimit; 913 netisr_register(&epair_nh); 914 if_clone_attach(&epair_cloner); 915 if (bootverbose) 916 printf("%s initialized.\n", EPAIRNAME); 917 break; 918 case MOD_UNLOAD: 919 if_clone_detach(&epair_cloner); 920 netisr_unregister(&epair_nh); 921 epair_dpcpu_detach(); 922 if (bootverbose) 923 printf("%s unloaded.\n", EPAIRNAME); 924 break; 925 default: 926 return (EOPNOTSUPP); 927 } 928 return (0); 929 } 930 931 static moduledata_t epair_mod = { 932 "if_epair", 933 epair_modevent, 934 0 935 }; 936 937 DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 938 MODULE_VERSION(if_epair, 1); 939