1 /*- 2 * Copyright (c) 2008 The FreeBSD Foundation 3 * Copyright (c) 2009 Bjoern A. Zeeb <bz@FreeBSD.org> 4 * All rights reserved. 5 * 6 * This software was developed by CK Software GmbH under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 /* 32 * A pair of virtual back-to-back connected ethernet like interfaces 33 * (``two interfaces with a virtual cross-over cable''). 34 * 35 * This is mostly intended to be used to provide connectivity between 36 * different virtual network stack instances. 37 */ 38 /* 39 * Things to re-think once we have more experience: 40 * - ifp->if_reassign function once we can test with vimage. Depending on 41 * how if_vmove() is going to be improved. 42 * - Real random etheraddrs that are checked to be uniquish; we would need 43 * to re-do them in case we move the interface between network stacks 44 * in a private if_reassign function. 45 * In case we bridge to a real interface/network or between indepedent 46 * epairs on multiple stacks/machines, we may need this. 47 * For now let the user handle that case. 48 */ 49 50 #include <sys/cdefs.h> 51 __FBSDID("$FreeBSD$"); 52 53 #include <sys/param.h> 54 #include <sys/kernel.h> 55 #include <sys/mbuf.h> 56 #include <sys/module.h> 57 #include <sys/refcount.h> 58 #include <sys/queue.h> 59 #include <sys/smp.h> 60 #include <sys/socket.h> 61 #include <sys/sockio.h> 62 #include <sys/sysctl.h> 63 #include <sys/types.h> 64 65 #include <net/bpf.h> 66 #include <net/ethernet.h> 67 #include <net/if.h> 68 #include <net/if_clone.h> 69 #include <net/if_var.h> 70 #include <net/if_types.h> 71 #include <net/netisr.h> 72 #include <net/vnet.h> 73 74 #define EPAIRNAME "epair" 75 76 SYSCTL_DECL(_net_link); 77 SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW, 0, "epair sysctl"); 78 79 #ifdef EPAIR_DEBUG 80 static int epair_debug = 0; 81 SYSCTL_XINT(_net_link_epair, OID_AUTO, epair_debug, CTLFLAG_RW, 82 &epair_debug, 0, "if_epair(4) debugging."); 83 #define DPRINTF(fmt, arg...) \ 84 if (epair_debug) \ 85 printf("[%s:%d] " fmt, __func__, __LINE__, ##arg) 86 #else 87 #define DPRINTF(fmt, arg...) 88 #endif 89 90 static void epair_nh_sintr(struct mbuf *); 91 static struct mbuf *epair_nh_m2cpuid(struct mbuf *, uintptr_t, u_int *); 92 static void epair_nh_drainedcpu(u_int); 93 94 static void epair_start_locked(struct ifnet *); 95 96 static int epair_clone_match(struct if_clone *, const char *); 97 static int epair_clone_create(struct if_clone *, char *, size_t, caddr_t); 98 static int epair_clone_destroy(struct if_clone *, struct ifnet *); 99 100 /* Netisr realted definitions and sysctl. */ 101 static struct netisr_handler epair_nh = { 102 .nh_name = EPAIRNAME, 103 .nh_proto = NETISR_EPAIR, 104 .nh_policy = NETISR_POLICY_CPU, 105 .nh_handler = epair_nh_sintr, 106 .nh_m2cpuid = epair_nh_m2cpuid, 107 .nh_drainedcpu = epair_nh_drainedcpu, 108 }; 109 110 static int 111 sysctl_epair_netisr_maxqlen(SYSCTL_HANDLER_ARGS) 112 { 113 int error, qlimit; 114 115 netisr_getqlimit(&epair_nh, &qlimit); 116 error = sysctl_handle_int(oidp, &qlimit, 0, req); 117 if (error || !req->newptr) 118 return (error); 119 if (qlimit < 1) 120 return (EINVAL); 121 return (netisr_setqlimit(&epair_nh, qlimit)); 122 } 123 SYSCTL_PROC(_net_link_epair, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW, 124 0, 0, sysctl_epair_netisr_maxqlen, "I", 125 "Maximum if_epair(4) netisr \"hw\" queue length"); 126 127 struct epair_softc { 128 struct ifnet *ifp; /* This ifp. */ 129 struct ifnet *oifp; /* other ifp of pair. */ 130 u_int refcount; /* # of mbufs in flight. */ 131 u_int cpuid; /* CPU ID assigned upon creation. */ 132 void (*if_qflush)(struct ifnet *); 133 /* Original if_qflush routine. */ 134 }; 135 136 /* 137 * Per-CPU list of ifps with data in the ifq that needs to be flushed 138 * to the netisr ``hw'' queue before we allow any further direct queuing 139 * to the ``hw'' queue. 140 */ 141 struct epair_ifp_drain { 142 STAILQ_ENTRY(epair_ifp_drain) ifp_next; 143 struct ifnet *ifp; 144 }; 145 STAILQ_HEAD(eid_list, epair_ifp_drain); 146 147 #define EPAIR_LOCK_INIT(dpcpu) mtx_init(&(dpcpu)->if_epair_mtx, \ 148 "if_epair", NULL, MTX_DEF) 149 #define EPAIR_LOCK_DESTROY(dpcpu) mtx_destroy(&(dpcpu)->if_epair_mtx) 150 #define EPAIR_LOCK_ASSERT(dpcpu) mtx_assert(&(dpcpu)->if_epair_mtx, \ 151 MA_OWNED) 152 #define EPAIR_LOCK(dpcpu) mtx_lock(&(dpcpu)->if_epair_mtx) 153 #define EPAIR_UNLOCK(dpcpu) mtx_unlock(&(dpcpu)->if_epair_mtx) 154 155 #ifdef INVARIANTS 156 #define EPAIR_REFCOUNT_INIT(r, v) refcount_init((r), (v)) 157 #define EPAIR_REFCOUNT_AQUIRE(r) refcount_acquire((r)) 158 #define EPAIR_REFCOUNT_RELEASE(r) refcount_release((r)) 159 #define EPAIR_REFCOUNT_ASSERT(a, p) KASSERT(a, p) 160 #else 161 #define EPAIR_REFCOUNT_INIT(r, v) 162 #define EPAIR_REFCOUNT_AQUIRE(r) 163 #define EPAIR_REFCOUNT_RELEASE(r) 164 #define EPAIR_REFCOUNT_ASSERT(a, p) 165 #endif 166 167 static MALLOC_DEFINE(M_EPAIR, EPAIRNAME, 168 "Pair of virtual cross-over connected Ethernet-like interfaces"); 169 170 static struct if_clone epair_cloner = IFC_CLONE_INITIALIZER( 171 EPAIRNAME, NULL, IF_MAXUNIT, 172 NULL, epair_clone_match, epair_clone_create, epair_clone_destroy); 173 174 /* 175 * DPCPU area and functions. 176 */ 177 struct epair_dpcpu { 178 struct mtx if_epair_mtx; /* Per-CPU locking. */ 179 int epair_drv_flags; /* Per-CPU ``hw'' drv flags. */ 180 struct eid_list epair_ifp_drain_list; /* Per-CPU list of ifps with 181 * data in the ifq. */ 182 }; 183 DPCPU_DEFINE(struct epair_dpcpu, epair_dpcpu); 184 185 static void 186 epair_dpcpu_init(void) 187 { 188 struct epair_dpcpu *epair_dpcpu; 189 struct eid_list *s; 190 u_int cpuid; 191 192 for (cpuid = 0; cpuid <= mp_maxid; cpuid++) { 193 if (CPU_ABSENT(cpuid)) 194 continue; 195 196 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 197 198 /* Initialize per-cpu lock. */ 199 EPAIR_LOCK_INIT(epair_dpcpu); 200 201 /* Driver flags are per-cpu as are our netisr "hw" queues. */ 202 epair_dpcpu->epair_drv_flags = 0; 203 204 /* 205 * Initialize per-cpu drain list. 206 * Manually do what STAILQ_HEAD_INITIALIZER would do. 207 */ 208 s = &epair_dpcpu->epair_ifp_drain_list; 209 s->stqh_first = NULL; 210 s->stqh_last = &s->stqh_first; 211 } 212 } 213 214 static void 215 epair_dpcpu_detach(void) 216 { 217 struct epair_dpcpu *epair_dpcpu; 218 u_int cpuid; 219 220 for (cpuid = 0; cpuid <= mp_maxid; cpuid++) { 221 if (CPU_ABSENT(cpuid)) 222 continue; 223 224 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 225 226 /* Destroy per-cpu lock. */ 227 EPAIR_LOCK_DESTROY(epair_dpcpu); 228 } 229 } 230 231 /* 232 * Helper functions. 233 */ 234 static u_int 235 cpuid_from_ifp(struct ifnet *ifp) 236 { 237 struct epair_softc *sc; 238 239 if (ifp == NULL) 240 return (0); 241 sc = ifp->if_softc; 242 243 return (sc->cpuid); 244 } 245 246 /* 247 * Netisr handler functions. 248 */ 249 static void 250 epair_nh_sintr(struct mbuf *m) 251 { 252 struct ifnet *ifp; 253 struct epair_softc *sc; 254 255 ifp = m->m_pkthdr.rcvif; 256 (*ifp->if_input)(ifp, m); 257 sc = ifp->if_softc; 258 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 259 DPRINTF("ifp=%p refcount=%u\n", ifp, sc->refcount); 260 } 261 262 static struct mbuf * 263 epair_nh_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid) 264 { 265 266 *cpuid = cpuid_from_ifp(m->m_pkthdr.rcvif); 267 268 return (m); 269 } 270 271 static void 272 epair_nh_drainedcpu(u_int cpuid) 273 { 274 struct epair_dpcpu *epair_dpcpu; 275 struct epair_ifp_drain *elm, *tvar; 276 struct ifnet *ifp; 277 278 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 279 EPAIR_LOCK(epair_dpcpu); 280 /* 281 * Assume our "hw" queue and possibly ifq will be emptied 282 * again. In case we will overflow the "hw" queue while 283 * draining, epair_start_locked will set IFF_DRV_OACTIVE 284 * again and we will stop and return. 285 */ 286 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list, 287 ifp_next, tvar) { 288 ifp = elm->ifp; 289 epair_dpcpu->epair_drv_flags &= ~IFF_DRV_OACTIVE; 290 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 291 epair_start_locked(ifp); 292 293 IFQ_LOCK(&ifp->if_snd); 294 if (IFQ_IS_EMPTY(&ifp->if_snd)) { 295 STAILQ_REMOVE(&epair_dpcpu->epair_ifp_drain_list, 296 elm, epair_ifp_drain, ifp_next); 297 free(elm, M_EPAIR); 298 } 299 IFQ_UNLOCK(&ifp->if_snd); 300 301 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) != 0) { 302 /* Our "hw"q overflew again. */ 303 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE 304 DPRINTF("hw queue length overflow at %u\n", 305 epair_nh.nh_qlimit); 306 break; 307 } 308 } 309 EPAIR_UNLOCK(epair_dpcpu); 310 } 311 312 /* 313 * Network interface (`if') related functions. 314 */ 315 static int 316 epair_add_ifp_for_draining(struct ifnet *ifp) 317 { 318 struct epair_dpcpu *epair_dpcpu; 319 struct epair_softc *sc = sc = ifp->if_softc; 320 struct epair_ifp_drain *elm = NULL; 321 322 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 323 STAILQ_FOREACH(elm, &epair_dpcpu->epair_ifp_drain_list, ifp_next) 324 if (elm->ifp == ifp) 325 break; 326 /* If the ipf is there already, return success. */ 327 if (elm != NULL) 328 return (0); 329 330 elm = malloc(sizeof(struct epair_ifp_drain), M_EPAIR, M_NOWAIT|M_ZERO); 331 if (elm == NULL) 332 return (ENOMEM); 333 334 elm->ifp = ifp; 335 STAILQ_INSERT_TAIL(&epair_dpcpu->epair_ifp_drain_list, elm, ifp_next); 336 337 return (0); 338 } 339 340 static void 341 epair_start_locked(struct ifnet *ifp) 342 { 343 struct epair_dpcpu *epair_dpcpu; 344 struct mbuf *m; 345 struct epair_softc *sc; 346 struct ifnet *oifp; 347 int error; 348 349 DPRINTF("ifp=%p\n", ifp); 350 sc = ifp->if_softc; 351 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 352 EPAIR_LOCK_ASSERT(epair_dpcpu); 353 354 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 355 return; 356 if ((ifp->if_flags & IFF_UP) == 0) 357 return; 358 359 /* 360 * We get patckets here from ether_output via if_handoff() 361 * and ned to put them into the input queue of the oifp 362 * and call oifp->if_input() via netisr/epair_sintr(). 363 */ 364 oifp = sc->oifp; 365 sc = oifp->if_softc; 366 for (;;) { 367 IFQ_DEQUEUE(&ifp->if_snd, m); 368 if (m == NULL) 369 break; 370 BPF_MTAP(ifp, m); 371 372 /* 373 * In case the outgoing interface is not usable, 374 * drop the packet. 375 */ 376 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 377 (oifp->if_flags & IFF_UP) ==0) { 378 ifp->if_oerrors++; 379 m_freem(m); 380 continue; 381 } 382 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname); 383 384 /* 385 * Add a reference so the interface cannot go while the 386 * packet is in transit as we rely on rcvif to stay valid. 387 */ 388 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 389 m->m_pkthdr.rcvif = oifp; 390 CURVNET_SET_QUIET(oifp->if_vnet); 391 error = netisr_queue(NETISR_EPAIR, m); 392 CURVNET_RESTORE(); 393 if (!error) { 394 ifp->if_opackets++; 395 /* Someone else received the packet. */ 396 oifp->if_ipackets++; 397 } else { 398 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 399 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 400 if (epair_add_ifp_for_draining(ifp)) { 401 ifp->if_oerrors++; 402 m_freem(m); 403 } 404 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 405 } 406 } 407 } 408 409 static void 410 epair_start(struct ifnet *ifp) 411 { 412 struct epair_dpcpu *epair_dpcpu; 413 414 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu); 415 EPAIR_LOCK(epair_dpcpu); 416 epair_start_locked(ifp); 417 EPAIR_UNLOCK(epair_dpcpu); 418 } 419 420 static int 421 epair_transmit_locked(struct ifnet *ifp, struct mbuf *m) 422 { 423 struct epair_dpcpu *epair_dpcpu; 424 struct epair_softc *sc; 425 struct ifnet *oifp; 426 int error, len; 427 short mflags; 428 429 DPRINTF("ifp=%p m=%p\n", ifp, m); 430 sc = ifp->if_softc; 431 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 432 EPAIR_LOCK_ASSERT(epair_dpcpu); 433 434 if (m == NULL) 435 return (0); 436 437 /* 438 * We are not going to use the interface en/dequeue mechanism 439 * on the TX side. We are called from ether_output_frame() 440 * and will put the packet into the incoming queue of the 441 * other interface of our pair via the netsir. 442 */ 443 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 444 m_freem(m); 445 return (ENXIO); 446 } 447 if ((ifp->if_flags & IFF_UP) == 0) { 448 m_freem(m); 449 return (ENETDOWN); 450 } 451 452 BPF_MTAP(ifp, m); 453 454 /* 455 * In case the outgoing interface is not usable, 456 * drop the packet. 457 */ 458 oifp = sc->oifp; 459 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 460 (oifp->if_flags & IFF_UP) ==0) { 461 ifp->if_oerrors++; 462 m_freem(m); 463 return (0); 464 } 465 len = m->m_pkthdr.len; 466 mflags = m->m_flags; 467 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname); 468 469 #ifdef ALTQ 470 /* Support ALTQ via the clasic if_start() path. */ 471 IF_LOCK(&ifp->if_snd); 472 if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 473 ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error); 474 if (error) 475 ifp->if_snd.ifq_drops++; 476 IF_UNLOCK(&ifp->if_snd); 477 if (!error) { 478 ifp->if_obytes += len; 479 if (mflags & (M_BCAST|M_MCAST)) 480 ifp->if_omcasts++; 481 482 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) 483 epair_start_locked(ifp); 484 else 485 (void)epair_add_ifp_for_draining(ifp); 486 } 487 return (error); 488 } 489 IF_UNLOCK(&ifp->if_snd); 490 #endif 491 492 if ((epair_dpcpu->epair_drv_flags & IFF_DRV_OACTIVE) != 0) { 493 /* 494 * Our hardware queue is full, try to fall back 495 * queuing to the ifq but do not call ifp->if_start. 496 * Either we are lucky or the packet is gone. 497 */ 498 IFQ_ENQUEUE(&ifp->if_snd, m, error); 499 if (!error) 500 (void)epair_add_ifp_for_draining(ifp); 501 return (error); 502 } 503 sc = oifp->if_softc; 504 /* 505 * Add a reference so the interface cannot go while the 506 * packet is in transit as we rely on rcvif to stay valid. 507 */ 508 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 509 m->m_pkthdr.rcvif = oifp; 510 CURVNET_SET_QUIET(oifp->if_vnet); 511 error = netisr_queue(NETISR_EPAIR, m); 512 CURVNET_RESTORE(); 513 if (!error) { 514 ifp->if_opackets++; 515 /* 516 * IFQ_HANDOFF_ADJ/ip_handoff() update statistics, 517 * but as we bypass all this we have to duplicate 518 * the logic another time. 519 */ 520 ifp->if_obytes += len; 521 if (mflags & (M_BCAST|M_MCAST)) 522 ifp->if_omcasts++; 523 /* Someone else received the packet. */ 524 oifp->if_ipackets++; 525 } else { 526 /* The packet was freed already. */ 527 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 528 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 529 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 530 } 531 532 return (error); 533 } 534 535 static int 536 epair_transmit(struct ifnet *ifp, struct mbuf *m) 537 { 538 struct epair_dpcpu *epair_dpcpu; 539 int error; 540 541 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu); 542 EPAIR_LOCK(epair_dpcpu); 543 error = epair_transmit_locked(ifp, m); 544 EPAIR_UNLOCK(epair_dpcpu); 545 return (error); 546 } 547 548 static void 549 epair_qflush(struct ifnet *ifp) 550 { 551 struct epair_dpcpu *epair_dpcpu; 552 struct epair_softc *sc; 553 struct ifaltq *ifq; 554 555 sc = ifp->if_softc; 556 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 557 EPAIR_LOCK(epair_dpcpu); 558 ifq = &ifp->if_snd; 559 DPRINTF("ifp=%p sc refcnt=%u ifq_len=%u\n", 560 ifp, sc->refcount, ifq->ifq_len); 561 /* 562 * Instead of calling EPAIR_REFCOUNT_RELEASE(&sc->refcount); 563 * n times, just subtract for the cleanup. 564 */ 565 sc->refcount -= ifq->ifq_len; 566 EPAIR_UNLOCK(epair_dpcpu); 567 if (sc->if_qflush) 568 sc->if_qflush(ifp); 569 } 570 571 static int 572 epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 573 { 574 struct ifreq *ifr; 575 int error; 576 577 ifr = (struct ifreq *)data; 578 switch (cmd) { 579 case SIOCSIFFLAGS: 580 case SIOCADDMULTI: 581 case SIOCDELMULTI: 582 error = 0; 583 break; 584 585 case SIOCSIFMTU: 586 /* We basically allow all kinds of MTUs. */ 587 ifp->if_mtu = ifr->ifr_mtu; 588 error = 0; 589 break; 590 591 default: 592 /* Let the common ethernet handler process this. */ 593 error = ether_ioctl(ifp, cmd, data); 594 break; 595 } 596 597 return (error); 598 } 599 600 static void 601 epair_init(void *dummy __unused) 602 { 603 } 604 605 606 /* 607 * Interface cloning functions. 608 * We use our private ones so that we can create/destroy our secondary 609 * device along with the primary one. 610 */ 611 static int 612 epair_clone_match(struct if_clone *ifc, const char *name) 613 { 614 const char *cp; 615 616 DPRINTF("name='%s'\n", name); 617 618 /* 619 * Our base name is epair. 620 * Our interfaces will be named epair<n>[ab]. 621 * So accept anything of the following list: 622 * - epair 623 * - epair<n> 624 * but not the epair<n>[ab] versions. 625 */ 626 if (strncmp(EPAIRNAME, name, sizeof(EPAIRNAME)-1) != 0) 627 return (0); 628 629 for (cp = name + sizeof(EPAIRNAME) - 1; *cp != '\0'; cp++) { 630 if (*cp < '0' || *cp > '9') 631 return (0); 632 } 633 634 return (1); 635 } 636 637 static int 638 epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) 639 { 640 struct epair_softc *sca, *scb; 641 struct ifnet *ifp; 642 char *dp; 643 int error, unit, wildcard; 644 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ 645 646 /* 647 * We are abusing params to create our second interface. 648 * Actually we already created it and called if_clone_createif() 649 * for it to do the official insertion procedure the moment we knew 650 * it cannot fail anymore. So just do attach it here. 651 */ 652 if (params) { 653 scb = (struct epair_softc *)params; 654 ifp = scb->ifp; 655 /* Assign a hopefully unique, locally administered etheraddr. */ 656 eaddr[0] = 0x02; 657 eaddr[3] = (ifp->if_index >> 8) & 0xff; 658 eaddr[4] = ifp->if_index & 0xff; 659 eaddr[5] = 0x0b; 660 ether_ifattach(ifp, eaddr); 661 /* Correctly set the name for the cloner list. */ 662 strlcpy(name, scb->ifp->if_xname, len); 663 return (0); 664 } 665 666 /* Try to see if a special unit was requested. */ 667 error = ifc_name2unit(name, &unit); 668 if (error != 0) 669 return (error); 670 wildcard = (unit < 0); 671 672 error = ifc_alloc_unit(ifc, &unit); 673 if (error != 0) 674 return (error); 675 676 /* 677 * If no unit had been given, we need to adjust the ifName. 678 * Also make sure there is space for our extra [ab] suffix. 679 */ 680 for (dp = name; *dp != '\0'; dp++); 681 if (wildcard) { 682 error = snprintf(dp, len - (dp - name), "%d", unit); 683 if (error > len - (dp - name) - 1) { 684 /* ifName too long. */ 685 ifc_free_unit(ifc, unit); 686 return (ENOSPC); 687 } 688 dp += error; 689 } 690 if (len - (dp - name) - 1 < 1) { 691 /* No space left for our [ab] suffix. */ 692 ifc_free_unit(ifc, unit); 693 return (ENOSPC); 694 } 695 *dp = 'a'; 696 /* Must not change dp so we can replace 'a' by 'b' later. */ 697 *(dp+1) = '\0'; 698 699 /* Allocate memory for both [ab] interfaces */ 700 sca = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 701 EPAIR_REFCOUNT_INIT(&sca->refcount, 1); 702 sca->ifp = if_alloc(IFT_ETHER); 703 if (sca->ifp == NULL) { 704 free(sca, M_EPAIR); 705 ifc_free_unit(ifc, unit); 706 return (ENOSPC); 707 } 708 709 scb = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 710 EPAIR_REFCOUNT_INIT(&scb->refcount, 1); 711 scb->ifp = if_alloc(IFT_ETHER); 712 if (scb->ifp == NULL) { 713 free(scb, M_EPAIR); 714 if_free(sca->ifp); 715 free(sca, M_EPAIR); 716 ifc_free_unit(ifc, unit); 717 return (ENOSPC); 718 } 719 720 /* 721 * Cross-reference the interfaces so we will be able to free both. 722 */ 723 sca->oifp = scb->ifp; 724 scb->oifp = sca->ifp; 725 726 /* 727 * Calculate the cpuid for netisr queueing based on the 728 * ifIndex of the interfaces. As long as we cannot configure 729 * this or use cpuset information easily we cannot guarantee 730 * cache locality but we can at least allow parallelism. 731 */ 732 sca->cpuid = 733 netisr_get_cpuid(sca->ifp->if_index % netisr_get_cpucount()); 734 scb->cpuid = 735 netisr_get_cpuid(scb->ifp->if_index % netisr_get_cpucount()); 736 737 /* Finish initialization of interface <n>a. */ 738 ifp = sca->ifp; 739 ifp->if_softc = sca; 740 strlcpy(ifp->if_xname, name, IFNAMSIZ); 741 ifp->if_dname = ifc->ifc_name; 742 ifp->if_dunit = unit; 743 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 744 ifp->if_start = epair_start; 745 ifp->if_ioctl = epair_ioctl; 746 ifp->if_init = epair_init; 747 ifp->if_snd.ifq_maxlen = ifqmaxlen; 748 /* Assign a hopefully unique, locally administered etheraddr. */ 749 eaddr[0] = 0x02; 750 eaddr[3] = (ifp->if_index >> 8) & 0xff; 751 eaddr[4] = ifp->if_index & 0xff; 752 eaddr[5] = 0x0a; 753 ether_ifattach(ifp, eaddr); 754 sca->if_qflush = ifp->if_qflush; 755 ifp->if_qflush = epair_qflush; 756 ifp->if_transmit = epair_transmit; 757 ifp->if_baudrate = IF_Gbps(10UL); /* arbitrary maximum */ 758 759 /* Swap the name and finish initialization of interface <n>b. */ 760 *dp = 'b'; 761 762 ifp = scb->ifp; 763 ifp->if_softc = scb; 764 strlcpy(ifp->if_xname, name, IFNAMSIZ); 765 ifp->if_dname = ifc->ifc_name; 766 ifp->if_dunit = unit; 767 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 768 ifp->if_start = epair_start; 769 ifp->if_ioctl = epair_ioctl; 770 ifp->if_init = epair_init; 771 ifp->if_snd.ifq_maxlen = ifqmaxlen; 772 /* We need to play some tricks here for the second interface. */ 773 strlcpy(name, EPAIRNAME, len); 774 error = if_clone_create(name, len, (caddr_t)scb); 775 if (error) 776 panic("%s: if_clone_createif() for our 2nd iface failed: %d", 777 __func__, error); 778 scb->if_qflush = ifp->if_qflush; 779 ifp->if_qflush = epair_qflush; 780 ifp->if_transmit = epair_transmit; 781 ifp->if_baudrate = IF_Gbps(10UL); /* arbitrary maximum */ 782 783 /* 784 * Restore name to <n>a as the ifp for this will go into the 785 * cloner list for the initial call. 786 */ 787 strlcpy(name, sca->ifp->if_xname, len); 788 DPRINTF("name='%s/%db' created sca=%p scb=%p\n", name, unit, sca, scb); 789 790 /* Tell the world, that we are ready to rock. */ 791 sca->ifp->if_drv_flags |= IFF_DRV_RUNNING; 792 scb->ifp->if_drv_flags |= IFF_DRV_RUNNING; 793 794 return (0); 795 } 796 797 static int 798 epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) 799 { 800 struct ifnet *oifp; 801 struct epair_softc *sca, *scb; 802 int unit, error; 803 804 DPRINTF("ifp=%p\n", ifp); 805 806 /* 807 * In case we called into if_clone_destroyif() ourselves 808 * again to remove the second interface, the softc will be 809 * NULL. In that case so not do anything but return success. 810 */ 811 if (ifp->if_softc == NULL) 812 return (0); 813 814 unit = ifp->if_dunit; 815 sca = ifp->if_softc; 816 oifp = sca->oifp; 817 scb = oifp->if_softc; 818 819 DPRINTF("ifp=%p oifp=%p\n", ifp, oifp); 820 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 821 oifp->if_drv_flags &= ~IFF_DRV_RUNNING; 822 ether_ifdetach(oifp); 823 ether_ifdetach(ifp); 824 /* 825 * Wait for all packets to be dispatched to if_input. 826 * The numbers can only go down as the interfaces are 827 * detached so there is no need to use atomics. 828 */ 829 DPRINTF("sca refcnt=%u scb refcnt=%u\n", sca->refcount, scb->refcount); 830 EPAIR_REFCOUNT_ASSERT(sca->refcount == 1 && scb->refcount == 1, 831 ("%s: sca->refcount!=1: %d || scb->refcount!=1: %d", 832 __func__, sca->refcount, scb->refcount)); 833 834 /* 835 * Get rid of our second half. 836 */ 837 oifp->if_softc = NULL; 838 error = if_clone_destroyif(ifc, oifp); 839 if (error) 840 panic("%s: if_clone_destroyif() for our 2nd iface failed: %d", 841 __func__, error); 842 843 /* 844 * Finish cleaning up. Free them and release the unit. 845 * As the other of the two interfaces my reside in a different vnet, 846 * we need to switch before freeing them. 847 */ 848 CURVNET_SET_QUIET(oifp->if_vnet); 849 if_free_type(oifp, IFT_ETHER); 850 CURVNET_RESTORE(); 851 if_free_type(ifp, IFT_ETHER); 852 free(scb, M_EPAIR); 853 free(sca, M_EPAIR); 854 ifc_free_unit(ifc, unit); 855 856 return (0); 857 } 858 859 static int 860 epair_modevent(module_t mod, int type, void *data) 861 { 862 int qlimit; 863 864 switch (type) { 865 case MOD_LOAD: 866 /* For now limit us to one global mutex and one inq. */ 867 epair_dpcpu_init(); 868 epair_nh.nh_qlimit = 42 * ifqmaxlen; /* 42 shall be the number. */ 869 if (TUNABLE_INT_FETCH("net.link.epair.netisr_maxqlen", &qlimit)) 870 epair_nh.nh_qlimit = qlimit; 871 netisr_register(&epair_nh); 872 if_clone_attach(&epair_cloner); 873 if (bootverbose) 874 printf("%s initialized.\n", EPAIRNAME); 875 break; 876 case MOD_UNLOAD: 877 if_clone_detach(&epair_cloner); 878 netisr_unregister(&epair_nh); 879 epair_dpcpu_detach(); 880 if (bootverbose) 881 printf("%s unloaded.\n", EPAIRNAME); 882 break; 883 default: 884 return (EOPNOTSUPP); 885 } 886 return (0); 887 } 888 889 static moduledata_t epair_mod = { 890 "if_epair", 891 epair_modevent, 892 0 893 }; 894 895 DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 896 MODULE_VERSION(if_epair, 1); 897