1 /*- 2 * Copyright (c) 2008 The FreeBSD Foundation 3 * Copyright (c) 2009-2010 Bjoern A. Zeeb <bz@FreeBSD.org> 4 * All rights reserved. 5 * 6 * This software was developed by CK Software GmbH under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 /* 32 * A pair of virtual back-to-back connected ethernet like interfaces 33 * (``two interfaces with a virtual cross-over cable''). 34 * 35 * This is mostly intended to be used to provide connectivity between 36 * different virtual network stack instances. 37 */ 38 /* 39 * Things to re-think once we have more experience: 40 * - ifp->if_reassign function once we can test with vimage. Depending on 41 * how if_vmove() is going to be improved. 42 * - Real random etheraddrs that are checked to be uniquish; we would need 43 * to re-do them in case we move the interface between network stacks 44 * in a private if_reassign function. 45 * In case we bridge to a real interface/network or between indepedent 46 * epairs on multiple stacks/machines, we may need this. 47 * For now let the user handle that case. 48 */ 49 50 #include <sys/cdefs.h> 51 __FBSDID("$FreeBSD$"); 52 53 #include <sys/param.h> 54 #include <sys/kernel.h> 55 #include <sys/mbuf.h> 56 #include <sys/module.h> 57 #include <sys/refcount.h> 58 #include <sys/queue.h> 59 #include <sys/smp.h> 60 #include <sys/socket.h> 61 #include <sys/sockio.h> 62 #include <sys/sysctl.h> 63 #include <sys/types.h> 64 65 #include <net/bpf.h> 66 #include <net/ethernet.h> 67 #include <net/if.h> 68 #include <net/if_clone.h> 69 #include <net/if_media.h> 70 #include <net/if_var.h> 71 #include <net/if_types.h> 72 #include <net/netisr.h> 73 #include <net/vnet.h> 74 75 #define EPAIRNAME "epair" 76 77 SYSCTL_DECL(_net_link); 78 SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW, 0, "epair sysctl"); 79 80 #ifdef EPAIR_DEBUG 81 static int epair_debug = 0; 82 SYSCTL_INT(_net_link_epair, OID_AUTO, epair_debug, CTLFLAG_RW, 83 &epair_debug, 0, "if_epair(4) debugging."); 84 #define DPRINTF(fmt, arg...) \ 85 if (epair_debug) \ 86 printf("[%s:%d] " fmt, __func__, __LINE__, ##arg) 87 #else 88 #define DPRINTF(fmt, arg...) 89 #endif 90 91 static void epair_nh_sintr(struct mbuf *); 92 static struct mbuf *epair_nh_m2cpuid(struct mbuf *, uintptr_t, u_int *); 93 static void epair_nh_drainedcpu(u_int); 94 95 static void epair_start_locked(struct ifnet *); 96 static int epair_media_change(struct ifnet *); 97 static void epair_media_status(struct ifnet *, struct ifmediareq *); 98 99 static int epair_clone_match(struct if_clone *, const char *); 100 static int epair_clone_create(struct if_clone *, char *, size_t, caddr_t); 101 static int epair_clone_destroy(struct if_clone *, struct ifnet *); 102 103 /* Netisr realted definitions and sysctl. */ 104 static struct netisr_handler epair_nh = { 105 .nh_name = EPAIRNAME, 106 .nh_proto = NETISR_EPAIR, 107 .nh_policy = NETISR_POLICY_CPU, 108 .nh_handler = epair_nh_sintr, 109 .nh_m2cpuid = epair_nh_m2cpuid, 110 .nh_drainedcpu = epair_nh_drainedcpu, 111 }; 112 113 static int 114 sysctl_epair_netisr_maxqlen(SYSCTL_HANDLER_ARGS) 115 { 116 int error, qlimit; 117 118 netisr_getqlimit(&epair_nh, &qlimit); 119 error = sysctl_handle_int(oidp, &qlimit, 0, req); 120 if (error || !req->newptr) 121 return (error); 122 if (qlimit < 1) 123 return (EINVAL); 124 return (netisr_setqlimit(&epair_nh, qlimit)); 125 } 126 SYSCTL_PROC(_net_link_epair, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW, 127 0, 0, sysctl_epair_netisr_maxqlen, "I", 128 "Maximum if_epair(4) netisr \"hw\" queue length"); 129 130 struct epair_softc { 131 struct ifnet *ifp; /* This ifp. */ 132 struct ifnet *oifp; /* other ifp of pair. */ 133 struct ifmedia media; /* Media config (fake). */ 134 u_int refcount; /* # of mbufs in flight. */ 135 u_int cpuid; /* CPU ID assigned upon creation. */ 136 void (*if_qflush)(struct ifnet *); 137 /* Original if_qflush routine. */ 138 }; 139 140 /* 141 * Per-CPU list of ifps with data in the ifq that needs to be flushed 142 * to the netisr ``hw'' queue before we allow any further direct queuing 143 * to the ``hw'' queue. 144 */ 145 struct epair_ifp_drain { 146 STAILQ_ENTRY(epair_ifp_drain) ifp_next; 147 struct ifnet *ifp; 148 }; 149 STAILQ_HEAD(eid_list, epair_ifp_drain); 150 151 #define EPAIR_LOCK_INIT(dpcpu) mtx_init(&(dpcpu)->if_epair_mtx, \ 152 "if_epair", NULL, MTX_DEF) 153 #define EPAIR_LOCK_DESTROY(dpcpu) mtx_destroy(&(dpcpu)->if_epair_mtx) 154 #define EPAIR_LOCK_ASSERT(dpcpu) mtx_assert(&(dpcpu)->if_epair_mtx, \ 155 MA_OWNED) 156 #define EPAIR_LOCK(dpcpu) mtx_lock(&(dpcpu)->if_epair_mtx) 157 #define EPAIR_UNLOCK(dpcpu) mtx_unlock(&(dpcpu)->if_epair_mtx) 158 159 #ifdef INVARIANTS 160 #define EPAIR_REFCOUNT_INIT(r, v) refcount_init((r), (v)) 161 #define EPAIR_REFCOUNT_AQUIRE(r) refcount_acquire((r)) 162 #define EPAIR_REFCOUNT_RELEASE(r) refcount_release((r)) 163 #define EPAIR_REFCOUNT_ASSERT(a, p) KASSERT(a, p) 164 #else 165 #define EPAIR_REFCOUNT_INIT(r, v) 166 #define EPAIR_REFCOUNT_AQUIRE(r) 167 #define EPAIR_REFCOUNT_RELEASE(r) 168 #define EPAIR_REFCOUNT_ASSERT(a, p) 169 #endif 170 171 static MALLOC_DEFINE(M_EPAIR, EPAIRNAME, 172 "Pair of virtual cross-over connected Ethernet-like interfaces"); 173 174 static struct if_clone epair_cloner = IFC_CLONE_INITIALIZER( 175 EPAIRNAME, NULL, IF_MAXUNIT, 176 NULL, epair_clone_match, epair_clone_create, epair_clone_destroy); 177 178 /* 179 * DPCPU area and functions. 180 */ 181 struct epair_dpcpu { 182 struct mtx if_epair_mtx; /* Per-CPU locking. */ 183 int epair_drv_flags; /* Per-CPU ``hw'' drv flags. */ 184 struct eid_list epair_ifp_drain_list; /* Per-CPU list of ifps with 185 * data in the ifq. */ 186 }; 187 DPCPU_DEFINE(struct epair_dpcpu, epair_dpcpu); 188 189 static void 190 epair_dpcpu_init(void) 191 { 192 struct epair_dpcpu *epair_dpcpu; 193 struct eid_list *s; 194 u_int cpuid; 195 196 CPU_FOREACH(cpuid) { 197 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 198 199 /* Initialize per-cpu lock. */ 200 EPAIR_LOCK_INIT(epair_dpcpu); 201 202 /* Driver flags are per-cpu as are our netisr "hw" queues. */ 203 epair_dpcpu->epair_drv_flags = 0; 204 205 /* 206 * Initialize per-cpu drain list. 207 * Manually do what STAILQ_HEAD_INITIALIZER would do. 208 */ 209 s = &epair_dpcpu->epair_ifp_drain_list; 210 s->stqh_first = NULL; 211 s->stqh_last = &s->stqh_first; 212 } 213 } 214 215 static void 216 epair_dpcpu_detach(void) 217 { 218 struct epair_dpcpu *epair_dpcpu; 219 u_int cpuid; 220 221 CPU_FOREACH(cpuid) { 222 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 223 224 /* Destroy per-cpu lock. */ 225 EPAIR_LOCK_DESTROY(epair_dpcpu); 226 } 227 } 228 229 /* 230 * Helper functions. 231 */ 232 static u_int 233 cpuid_from_ifp(struct ifnet *ifp) 234 { 235 struct epair_softc *sc; 236 237 if (ifp == NULL) 238 return (0); 239 sc = ifp->if_softc; 240 241 return (sc->cpuid); 242 } 243 244 /* 245 * Netisr handler functions. 246 */ 247 static void 248 epair_nh_sintr(struct mbuf *m) 249 { 250 struct ifnet *ifp; 251 struct epair_softc *sc; 252 253 ifp = m->m_pkthdr.rcvif; 254 (*ifp->if_input)(ifp, m); 255 sc = ifp->if_softc; 256 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 257 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 258 ("%s: ifp=%p sc->refcount not >= 1: %d", 259 __func__, ifp, sc->refcount)); 260 DPRINTF("ifp=%p refcount=%u\n", ifp, sc->refcount); 261 } 262 263 static struct mbuf * 264 epair_nh_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid) 265 { 266 267 *cpuid = cpuid_from_ifp(m->m_pkthdr.rcvif); 268 269 return (m); 270 } 271 272 static void 273 epair_nh_drainedcpu(u_int cpuid) 274 { 275 struct epair_dpcpu *epair_dpcpu; 276 struct epair_ifp_drain *elm, *tvar; 277 struct ifnet *ifp; 278 279 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 280 EPAIR_LOCK(epair_dpcpu); 281 /* 282 * Assume our "hw" queue and possibly ifq will be emptied 283 * again. In case we will overflow the "hw" queue while 284 * draining, epair_start_locked will set IFF_DRV_OACTIVE 285 * again and we will stop and return. 286 */ 287 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list, 288 ifp_next, tvar) { 289 ifp = elm->ifp; 290 epair_dpcpu->epair_drv_flags &= ~IFF_DRV_OACTIVE; 291 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 292 epair_start_locked(ifp); 293 294 IFQ_LOCK(&ifp->if_snd); 295 if (IFQ_IS_EMPTY(&ifp->if_snd)) { 296 struct epair_softc *sc; 297 298 STAILQ_REMOVE(&epair_dpcpu->epair_ifp_drain_list, 299 elm, epair_ifp_drain, ifp_next); 300 /* The cached ifp goes off the list. */ 301 sc = ifp->if_softc; 302 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 303 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 304 ("%s: ifp=%p sc->refcount not >= 1: %d", 305 __func__, ifp, sc->refcount)); 306 free(elm, M_EPAIR); 307 } 308 IFQ_UNLOCK(&ifp->if_snd); 309 310 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) != 0) { 311 /* Our "hw"q overflew again. */ 312 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 313 DPRINTF("hw queue length overflow at %u\n", 314 epair_nh.nh_qlimit); 315 break; 316 } 317 } 318 EPAIR_UNLOCK(epair_dpcpu); 319 } 320 321 /* 322 * Network interface (`if') related functions. 323 */ 324 static void 325 epair_remove_ifp_from_draining(struct ifnet *ifp) 326 { 327 struct epair_dpcpu *epair_dpcpu; 328 struct epair_ifp_drain *elm, *tvar; 329 u_int cpuid; 330 331 CPU_FOREACH(cpuid) { 332 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 333 EPAIR_LOCK(epair_dpcpu); 334 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list, 335 ifp_next, tvar) { 336 if (ifp == elm->ifp) { 337 struct epair_softc *sc; 338 339 STAILQ_REMOVE( 340 &epair_dpcpu->epair_ifp_drain_list, elm, 341 epair_ifp_drain, ifp_next); 342 /* The cached ifp goes off the list. */ 343 sc = ifp->if_softc; 344 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 345 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 346 ("%s: ifp=%p sc->refcount not >= 1: %d", 347 __func__, ifp, sc->refcount)); 348 free(elm, M_EPAIR); 349 } 350 } 351 EPAIR_UNLOCK(epair_dpcpu); 352 } 353 } 354 355 static int 356 epair_add_ifp_for_draining(struct ifnet *ifp) 357 { 358 struct epair_dpcpu *epair_dpcpu; 359 struct epair_softc *sc; 360 struct epair_ifp_drain *elm = NULL; 361 362 sc = ifp->if_softc; 363 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 364 EPAIR_LOCK_ASSERT(epair_dpcpu); 365 STAILQ_FOREACH(elm, &epair_dpcpu->epair_ifp_drain_list, ifp_next) 366 if (elm->ifp == ifp) 367 break; 368 /* If the ifp is there already, return success. */ 369 if (elm != NULL) 370 return (0); 371 372 elm = malloc(sizeof(struct epair_ifp_drain), M_EPAIR, M_NOWAIT|M_ZERO); 373 if (elm == NULL) 374 return (ENOMEM); 375 376 elm->ifp = ifp; 377 /* Add a reference for the ifp pointer on the list. */ 378 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 379 STAILQ_INSERT_TAIL(&epair_dpcpu->epair_ifp_drain_list, elm, ifp_next); 380 381 return (0); 382 } 383 384 static void 385 epair_start_locked(struct ifnet *ifp) 386 { 387 struct epair_dpcpu *epair_dpcpu; 388 struct mbuf *m; 389 struct epair_softc *sc; 390 struct ifnet *oifp; 391 int error; 392 393 DPRINTF("ifp=%p\n", ifp); 394 sc = ifp->if_softc; 395 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 396 EPAIR_LOCK_ASSERT(epair_dpcpu); 397 398 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 399 return; 400 if ((ifp->if_flags & IFF_UP) == 0) 401 return; 402 403 /* 404 * We get patckets here from ether_output via if_handoff() 405 * and ned to put them into the input queue of the oifp 406 * and call oifp->if_input() via netisr/epair_sintr(). 407 */ 408 oifp = sc->oifp; 409 sc = oifp->if_softc; 410 for (;;) { 411 IFQ_DEQUEUE(&ifp->if_snd, m); 412 if (m == NULL) 413 break; 414 BPF_MTAP(ifp, m); 415 416 /* 417 * In case the outgoing interface is not usable, 418 * drop the packet. 419 */ 420 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 421 (oifp->if_flags & IFF_UP) ==0) { 422 ifp->if_oerrors++; 423 m_freem(m); 424 continue; 425 } 426 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname); 427 428 /* 429 * Add a reference so the interface cannot go while the 430 * packet is in transit as we rely on rcvif to stay valid. 431 */ 432 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 433 m->m_pkthdr.rcvif = oifp; 434 CURVNET_SET_QUIET(oifp->if_vnet); 435 error = netisr_queue(NETISR_EPAIR, m); 436 CURVNET_RESTORE(); 437 if (!error) { 438 ifp->if_opackets++; 439 /* Someone else received the packet. */ 440 oifp->if_ipackets++; 441 } else { 442 /* The packet was freed already. */ 443 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 444 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 445 (void) epair_add_ifp_for_draining(ifp); 446 ifp->if_oerrors++; 447 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 448 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 449 ("%s: ifp=%p sc->refcount not >= 1: %d", 450 __func__, oifp, sc->refcount)); 451 } 452 } 453 } 454 455 static void 456 epair_start(struct ifnet *ifp) 457 { 458 struct epair_dpcpu *epair_dpcpu; 459 460 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu); 461 EPAIR_LOCK(epair_dpcpu); 462 epair_start_locked(ifp); 463 EPAIR_UNLOCK(epair_dpcpu); 464 } 465 466 static int 467 epair_transmit_locked(struct ifnet *ifp, struct mbuf *m) 468 { 469 struct epair_dpcpu *epair_dpcpu; 470 struct epair_softc *sc; 471 struct ifnet *oifp; 472 int error, len; 473 short mflags; 474 475 DPRINTF("ifp=%p m=%p\n", ifp, m); 476 sc = ifp->if_softc; 477 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 478 EPAIR_LOCK_ASSERT(epair_dpcpu); 479 480 if (m == NULL) 481 return (0); 482 483 /* 484 * We are not going to use the interface en/dequeue mechanism 485 * on the TX side. We are called from ether_output_frame() 486 * and will put the packet into the incoming queue of the 487 * other interface of our pair via the netsir. 488 */ 489 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 490 m_freem(m); 491 return (ENXIO); 492 } 493 if ((ifp->if_flags & IFF_UP) == 0) { 494 m_freem(m); 495 return (ENETDOWN); 496 } 497 498 BPF_MTAP(ifp, m); 499 500 /* 501 * In case the outgoing interface is not usable, 502 * drop the packet. 503 */ 504 oifp = sc->oifp; 505 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 506 (oifp->if_flags & IFF_UP) ==0) { 507 ifp->if_oerrors++; 508 m_freem(m); 509 return (0); 510 } 511 len = m->m_pkthdr.len; 512 mflags = m->m_flags; 513 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname); 514 515 #ifdef ALTQ 516 /* Support ALTQ via the clasic if_start() path. */ 517 IF_LOCK(&ifp->if_snd); 518 if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 519 ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error); 520 if (error) 521 ifp->if_snd.ifq_drops++; 522 IF_UNLOCK(&ifp->if_snd); 523 if (!error) { 524 ifp->if_obytes += len; 525 if (mflags & (M_BCAST|M_MCAST)) 526 ifp->if_omcasts++; 527 528 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) 529 epair_start_locked(ifp); 530 else 531 (void)epair_add_ifp_for_draining(ifp); 532 } 533 return (error); 534 } 535 IF_UNLOCK(&ifp->if_snd); 536 #endif 537 538 if ((epair_dpcpu->epair_drv_flags & IFF_DRV_OACTIVE) != 0) { 539 /* 540 * Our hardware queue is full, try to fall back 541 * queuing to the ifq but do not call ifp->if_start. 542 * Either we are lucky or the packet is gone. 543 */ 544 IFQ_ENQUEUE(&ifp->if_snd, m, error); 545 if (!error) 546 (void)epair_add_ifp_for_draining(ifp); 547 return (error); 548 } 549 sc = oifp->if_softc; 550 /* 551 * Add a reference so the interface cannot go while the 552 * packet is in transit as we rely on rcvif to stay valid. 553 */ 554 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 555 m->m_pkthdr.rcvif = oifp; 556 CURVNET_SET_QUIET(oifp->if_vnet); 557 error = netisr_queue(NETISR_EPAIR, m); 558 CURVNET_RESTORE(); 559 if (!error) { 560 ifp->if_opackets++; 561 /* 562 * IFQ_HANDOFF_ADJ/ip_handoff() update statistics, 563 * but as we bypass all this we have to duplicate 564 * the logic another time. 565 */ 566 ifp->if_obytes += len; 567 if (mflags & (M_BCAST|M_MCAST)) 568 ifp->if_omcasts++; 569 /* Someone else received the packet. */ 570 oifp->if_ipackets++; 571 } else { 572 /* The packet was freed already. */ 573 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 574 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 575 ifp->if_oerrors++; 576 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 577 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 578 ("%s: ifp=%p sc->refcount not >= 1: %d", 579 __func__, oifp, sc->refcount)); 580 } 581 582 return (error); 583 } 584 585 static int 586 epair_transmit(struct ifnet *ifp, struct mbuf *m) 587 { 588 struct epair_dpcpu *epair_dpcpu; 589 int error; 590 591 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu); 592 EPAIR_LOCK(epair_dpcpu); 593 error = epair_transmit_locked(ifp, m); 594 EPAIR_UNLOCK(epair_dpcpu); 595 return (error); 596 } 597 598 static void 599 epair_qflush(struct ifnet *ifp) 600 { 601 struct epair_softc *sc; 602 603 sc = ifp->if_softc; 604 KASSERT(sc != NULL, ("%s: ifp=%p, epair_softc gone? sc=%p\n", 605 __func__, ifp, sc)); 606 /* 607 * Remove this ifp from all backpointer lists. The interface will not 608 * usable for flushing anyway nor should it have anything to flush 609 * after if_qflush(). 610 */ 611 epair_remove_ifp_from_draining(ifp); 612 613 if (sc->if_qflush) 614 sc->if_qflush(ifp); 615 } 616 617 static int 618 epair_media_change(struct ifnet *ifp __unused) 619 { 620 621 /* Do nothing. */ 622 return (0); 623 } 624 625 static void 626 epair_media_status(struct ifnet *ifp __unused, struct ifmediareq *imr) 627 { 628 629 imr->ifm_status = IFM_AVALID | IFM_ACTIVE; 630 imr->ifm_active = IFM_ETHER | IFM_10G_T | IFM_FDX; 631 } 632 633 static int 634 epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 635 { 636 struct epair_softc *sc; 637 struct ifreq *ifr; 638 int error; 639 640 ifr = (struct ifreq *)data; 641 switch (cmd) { 642 case SIOCSIFFLAGS: 643 case SIOCADDMULTI: 644 case SIOCDELMULTI: 645 error = 0; 646 break; 647 648 case SIOCSIFMEDIA: 649 case SIOCGIFMEDIA: 650 sc = ifp->if_softc; 651 error = ifmedia_ioctl(ifp, ifr, &sc->media, cmd); 652 break; 653 654 case SIOCSIFMTU: 655 /* We basically allow all kinds of MTUs. */ 656 ifp->if_mtu = ifr->ifr_mtu; 657 error = 0; 658 break; 659 660 default: 661 /* Let the common ethernet handler process this. */ 662 error = ether_ioctl(ifp, cmd, data); 663 break; 664 } 665 666 return (error); 667 } 668 669 static void 670 epair_init(void *dummy __unused) 671 { 672 } 673 674 675 /* 676 * Interface cloning functions. 677 * We use our private ones so that we can create/destroy our secondary 678 * device along with the primary one. 679 */ 680 static int 681 epair_clone_match(struct if_clone *ifc, const char *name) 682 { 683 const char *cp; 684 685 DPRINTF("name='%s'\n", name); 686 687 /* 688 * Our base name is epair. 689 * Our interfaces will be named epair<n>[ab]. 690 * So accept anything of the following list: 691 * - epair 692 * - epair<n> 693 * but not the epair<n>[ab] versions. 694 */ 695 if (strncmp(EPAIRNAME, name, sizeof(EPAIRNAME)-1) != 0) 696 return (0); 697 698 for (cp = name + sizeof(EPAIRNAME) - 1; *cp != '\0'; cp++) { 699 if (*cp < '0' || *cp > '9') 700 return (0); 701 } 702 703 return (1); 704 } 705 706 static int 707 epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) 708 { 709 struct epair_softc *sca, *scb; 710 struct ifnet *ifp; 711 char *dp; 712 int error, unit, wildcard; 713 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ 714 715 /* 716 * We are abusing params to create our second interface. 717 * Actually we already created it and called if_clone_createif() 718 * for it to do the official insertion procedure the moment we knew 719 * it cannot fail anymore. So just do attach it here. 720 */ 721 if (params) { 722 scb = (struct epair_softc *)params; 723 ifp = scb->ifp; 724 /* Assign a hopefully unique, locally administered etheraddr. */ 725 eaddr[0] = 0x02; 726 eaddr[3] = (ifp->if_index >> 8) & 0xff; 727 eaddr[4] = ifp->if_index & 0xff; 728 eaddr[5] = 0x0b; 729 ether_ifattach(ifp, eaddr); 730 /* Correctly set the name for the cloner list. */ 731 strlcpy(name, scb->ifp->if_xname, len); 732 return (0); 733 } 734 735 /* Try to see if a special unit was requested. */ 736 error = ifc_name2unit(name, &unit); 737 if (error != 0) 738 return (error); 739 wildcard = (unit < 0); 740 741 error = ifc_alloc_unit(ifc, &unit); 742 if (error != 0) 743 return (error); 744 745 /* 746 * If no unit had been given, we need to adjust the ifName. 747 * Also make sure there is space for our extra [ab] suffix. 748 */ 749 for (dp = name; *dp != '\0'; dp++); 750 if (wildcard) { 751 error = snprintf(dp, len - (dp - name), "%d", unit); 752 if (error > len - (dp - name) - 1) { 753 /* ifName too long. */ 754 ifc_free_unit(ifc, unit); 755 return (ENOSPC); 756 } 757 dp += error; 758 } 759 if (len - (dp - name) - 1 < 1) { 760 /* No space left for our [ab] suffix. */ 761 ifc_free_unit(ifc, unit); 762 return (ENOSPC); 763 } 764 *dp = 'a'; 765 /* Must not change dp so we can replace 'a' by 'b' later. */ 766 *(dp+1) = '\0'; 767 768 /* Allocate memory for both [ab] interfaces */ 769 sca = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 770 EPAIR_REFCOUNT_INIT(&sca->refcount, 1); 771 sca->ifp = if_alloc(IFT_ETHER); 772 if (sca->ifp == NULL) { 773 free(sca, M_EPAIR); 774 ifc_free_unit(ifc, unit); 775 return (ENOSPC); 776 } 777 778 scb = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 779 EPAIR_REFCOUNT_INIT(&scb->refcount, 1); 780 scb->ifp = if_alloc(IFT_ETHER); 781 if (scb->ifp == NULL) { 782 free(scb, M_EPAIR); 783 if_free(sca->ifp); 784 free(sca, M_EPAIR); 785 ifc_free_unit(ifc, unit); 786 return (ENOSPC); 787 } 788 789 /* 790 * Cross-reference the interfaces so we will be able to free both. 791 */ 792 sca->oifp = scb->ifp; 793 scb->oifp = sca->ifp; 794 795 /* 796 * Calculate the cpuid for netisr queueing based on the 797 * ifIndex of the interfaces. As long as we cannot configure 798 * this or use cpuset information easily we cannot guarantee 799 * cache locality but we can at least allow parallelism. 800 */ 801 sca->cpuid = 802 netisr_get_cpuid(sca->ifp->if_index % netisr_get_cpucount()); 803 scb->cpuid = 804 netisr_get_cpuid(scb->ifp->if_index % netisr_get_cpucount()); 805 806 /* Finish initialization of interface <n>a. */ 807 ifp = sca->ifp; 808 ifp->if_softc = sca; 809 strlcpy(ifp->if_xname, name, IFNAMSIZ); 810 ifp->if_dname = ifc->ifc_name; 811 ifp->if_dunit = unit; 812 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 813 ifp->if_capabilities = IFCAP_VLAN_MTU; 814 ifp->if_capenable = IFCAP_VLAN_MTU; 815 ifp->if_start = epair_start; 816 ifp->if_ioctl = epair_ioctl; 817 ifp->if_init = epair_init; 818 ifp->if_snd.ifq_maxlen = ifqmaxlen; 819 /* Assign a hopefully unique, locally administered etheraddr. */ 820 eaddr[0] = 0x02; 821 eaddr[3] = (ifp->if_index >> 8) & 0xff; 822 eaddr[4] = ifp->if_index & 0xff; 823 eaddr[5] = 0x0a; 824 ether_ifattach(ifp, eaddr); 825 sca->if_qflush = ifp->if_qflush; 826 ifp->if_qflush = epair_qflush; 827 ifp->if_transmit = epair_transmit; 828 ifp->if_baudrate = IF_Gbps(10UL); /* arbitrary maximum */ 829 830 /* Swap the name and finish initialization of interface <n>b. */ 831 *dp = 'b'; 832 833 ifp = scb->ifp; 834 ifp->if_softc = scb; 835 strlcpy(ifp->if_xname, name, IFNAMSIZ); 836 ifp->if_dname = ifc->ifc_name; 837 ifp->if_dunit = unit; 838 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 839 ifp->if_capabilities = IFCAP_VLAN_MTU; 840 ifp->if_capenable = IFCAP_VLAN_MTU; 841 ifp->if_start = epair_start; 842 ifp->if_ioctl = epair_ioctl; 843 ifp->if_init = epair_init; 844 ifp->if_snd.ifq_maxlen = ifqmaxlen; 845 /* We need to play some tricks here for the second interface. */ 846 strlcpy(name, EPAIRNAME, len); 847 error = if_clone_create(name, len, (caddr_t)scb); 848 if (error) 849 panic("%s: if_clone_createif() for our 2nd iface failed: %d", 850 __func__, error); 851 scb->if_qflush = ifp->if_qflush; 852 ifp->if_qflush = epair_qflush; 853 ifp->if_transmit = epair_transmit; 854 ifp->if_baudrate = IF_Gbps(10UL); /* arbitrary maximum */ 855 856 /* 857 * Restore name to <n>a as the ifp for this will go into the 858 * cloner list for the initial call. 859 */ 860 strlcpy(name, sca->ifp->if_xname, len); 861 DPRINTF("name='%s/%db' created sca=%p scb=%p\n", name, unit, sca, scb); 862 863 /* Initialise pseudo media types. */ 864 ifmedia_init(&sca->media, 0, epair_media_change, epair_media_status); 865 ifmedia_add(&sca->media, IFM_ETHER | IFM_10G_T, 0, NULL); 866 ifmedia_set(&sca->media, IFM_ETHER | IFM_10G_T); 867 ifmedia_init(&scb->media, 0, epair_media_change, epair_media_status); 868 ifmedia_add(&scb->media, IFM_ETHER | IFM_10G_T, 0, NULL); 869 ifmedia_set(&scb->media, IFM_ETHER | IFM_10G_T); 870 871 /* Tell the world, that we are ready to rock. */ 872 sca->ifp->if_drv_flags |= IFF_DRV_RUNNING; 873 scb->ifp->if_drv_flags |= IFF_DRV_RUNNING; 874 if_link_state_change(sca->ifp, LINK_STATE_UP); 875 if_link_state_change(scb->ifp, LINK_STATE_UP); 876 877 return (0); 878 } 879 880 static int 881 epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) 882 { 883 struct ifnet *oifp; 884 struct epair_softc *sca, *scb; 885 int unit, error; 886 887 DPRINTF("ifp=%p\n", ifp); 888 889 /* 890 * In case we called into if_clone_destroyif() ourselves 891 * again to remove the second interface, the softc will be 892 * NULL. In that case so not do anything but return success. 893 */ 894 if (ifp->if_softc == NULL) 895 return (0); 896 897 unit = ifp->if_dunit; 898 sca = ifp->if_softc; 899 oifp = sca->oifp; 900 scb = oifp->if_softc; 901 902 DPRINTF("ifp=%p oifp=%p\n", ifp, oifp); 903 if_link_state_change(ifp, LINK_STATE_DOWN); 904 if_link_state_change(oifp, LINK_STATE_DOWN); 905 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 906 oifp->if_drv_flags &= ~IFF_DRV_RUNNING; 907 ether_ifdetach(oifp); 908 ether_ifdetach(ifp); 909 /* 910 * Wait for all packets to be dispatched to if_input. 911 * The numbers can only go down as the interfaces are 912 * detached so there is no need to use atomics. 913 */ 914 DPRINTF("sca refcnt=%u scb refcnt=%u\n", sca->refcount, scb->refcount); 915 EPAIR_REFCOUNT_ASSERT(sca->refcount == 1 && scb->refcount == 1, 916 ("%s: ifp=%p sca->refcount!=1: %d || ifp=%p scb->refcount!=1: %d", 917 __func__, ifp, sca->refcount, oifp, scb->refcount)); 918 919 /* 920 * Get rid of our second half. 921 */ 922 oifp->if_softc = NULL; 923 error = if_clone_destroyif(ifc, oifp); 924 if (error) 925 panic("%s: if_clone_destroyif() for our 2nd iface failed: %d", 926 __func__, error); 927 928 /* 929 * Finish cleaning up. Free them and release the unit. 930 * As the other of the two interfaces my reside in a different vnet, 931 * we need to switch before freeing them. 932 */ 933 CURVNET_SET_QUIET(oifp->if_vnet); 934 if_free(oifp); 935 CURVNET_RESTORE(); 936 if_free(ifp); 937 ifmedia_removeall(&sca->media); 938 ifmedia_removeall(&scb->media); 939 free(scb, M_EPAIR); 940 free(sca, M_EPAIR); 941 ifc_free_unit(ifc, unit); 942 943 return (0); 944 } 945 946 static int 947 epair_modevent(module_t mod, int type, void *data) 948 { 949 int qlimit; 950 951 switch (type) { 952 case MOD_LOAD: 953 /* For now limit us to one global mutex and one inq. */ 954 epair_dpcpu_init(); 955 epair_nh.nh_qlimit = 42 * ifqmaxlen; /* 42 shall be the number. */ 956 if (TUNABLE_INT_FETCH("net.link.epair.netisr_maxqlen", &qlimit)) 957 epair_nh.nh_qlimit = qlimit; 958 netisr_register(&epair_nh); 959 if_clone_attach(&epair_cloner); 960 if (bootverbose) 961 printf("%s initialized.\n", EPAIRNAME); 962 break; 963 case MOD_UNLOAD: 964 if_clone_detach(&epair_cloner); 965 netisr_unregister(&epair_nh); 966 epair_dpcpu_detach(); 967 if (bootverbose) 968 printf("%s unloaded.\n", EPAIRNAME); 969 break; 970 default: 971 return (EOPNOTSUPP); 972 } 973 return (0); 974 } 975 976 static moduledata_t epair_mod = { 977 "if_epair", 978 epair_modevent, 979 0 980 }; 981 982 DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 983 MODULE_VERSION(if_epair, 1); 984