1 /*- 2 * Copyright (c) 2008 The FreeBSD Foundation 3 * Copyright (c) 2009-2010 Bjoern A. Zeeb <bz@FreeBSD.org> 4 * All rights reserved. 5 * 6 * This software was developed by CK Software GmbH under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 /* 32 * A pair of virtual back-to-back connected ethernet like interfaces 33 * (``two interfaces with a virtual cross-over cable''). 34 * 35 * This is mostly intended to be used to provide connectivity between 36 * different virtual network stack instances. 37 */ 38 /* 39 * Things to re-think once we have more experience: 40 * - ifp->if_reassign function once we can test with vimage. Depending on 41 * how if_vmove() is going to be improved. 42 * - Real random etheraddrs that are checked to be uniquish; we would need 43 * to re-do them in case we move the interface between network stacks 44 * in a private if_reassign function. 45 * In case we bridge to a real interface/network or between indepedent 46 * epairs on multiple stacks/machines, we may need this. 47 * For now let the user handle that case. 48 */ 49 50 #include <sys/cdefs.h> 51 __FBSDID("$FreeBSD$"); 52 53 #include <sys/param.h> 54 #include <sys/kernel.h> 55 #include <sys/mbuf.h> 56 #include <sys/module.h> 57 #include <sys/refcount.h> 58 #include <sys/queue.h> 59 #include <sys/smp.h> 60 #include <sys/socket.h> 61 #include <sys/sockio.h> 62 #include <sys/sysctl.h> 63 #include <sys/types.h> 64 65 #include <net/bpf.h> 66 #include <net/ethernet.h> 67 #include <net/if.h> 68 #include <net/if_clone.h> 69 #include <net/if_media.h> 70 #include <net/if_var.h> 71 #include <net/if_types.h> 72 #include <net/netisr.h> 73 #include <net/vnet.h> 74 75 SYSCTL_DECL(_net_link); 76 static SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW, 0, "epair sysctl"); 77 78 #ifdef EPAIR_DEBUG 79 static int epair_debug = 0; 80 SYSCTL_INT(_net_link_epair, OID_AUTO, epair_debug, CTLFLAG_RW, 81 &epair_debug, 0, "if_epair(4) debugging."); 82 #define DPRINTF(fmt, arg...) \ 83 if (epair_debug) \ 84 printf("[%s:%d] " fmt, __func__, __LINE__, ##arg) 85 #else 86 #define DPRINTF(fmt, arg...) 87 #endif 88 89 static void epair_nh_sintr(struct mbuf *); 90 static struct mbuf *epair_nh_m2cpuid(struct mbuf *, uintptr_t, u_int *); 91 static void epair_nh_drainedcpu(u_int); 92 93 static void epair_start_locked(struct ifnet *); 94 static int epair_media_change(struct ifnet *); 95 static void epair_media_status(struct ifnet *, struct ifmediareq *); 96 97 static int epair_clone_match(struct if_clone *, const char *); 98 static int epair_clone_create(struct if_clone *, char *, size_t, caddr_t); 99 static int epair_clone_destroy(struct if_clone *, struct ifnet *); 100 101 static const char epairname[] = "epair"; 102 103 /* Netisr realted definitions and sysctl. */ 104 static struct netisr_handler epair_nh = { 105 .nh_name = epairname, 106 .nh_proto = NETISR_EPAIR, 107 .nh_policy = NETISR_POLICY_CPU, 108 .nh_handler = epair_nh_sintr, 109 .nh_m2cpuid = epair_nh_m2cpuid, 110 .nh_drainedcpu = epair_nh_drainedcpu, 111 }; 112 113 static int 114 sysctl_epair_netisr_maxqlen(SYSCTL_HANDLER_ARGS) 115 { 116 int error, qlimit; 117 118 netisr_getqlimit(&epair_nh, &qlimit); 119 error = sysctl_handle_int(oidp, &qlimit, 0, req); 120 if (error || !req->newptr) 121 return (error); 122 if (qlimit < 1) 123 return (EINVAL); 124 return (netisr_setqlimit(&epair_nh, qlimit)); 125 } 126 SYSCTL_PROC(_net_link_epair, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW, 127 0, 0, sysctl_epair_netisr_maxqlen, "I", 128 "Maximum if_epair(4) netisr \"hw\" queue length"); 129 130 struct epair_softc { 131 struct ifnet *ifp; /* This ifp. */ 132 struct ifnet *oifp; /* other ifp of pair. */ 133 struct ifmedia media; /* Media config (fake). */ 134 u_int refcount; /* # of mbufs in flight. */ 135 u_int cpuid; /* CPU ID assigned upon creation. */ 136 void (*if_qflush)(struct ifnet *); 137 /* Original if_qflush routine. */ 138 }; 139 140 /* 141 * Per-CPU list of ifps with data in the ifq that needs to be flushed 142 * to the netisr ``hw'' queue before we allow any further direct queuing 143 * to the ``hw'' queue. 144 */ 145 struct epair_ifp_drain { 146 STAILQ_ENTRY(epair_ifp_drain) ifp_next; 147 struct ifnet *ifp; 148 }; 149 STAILQ_HEAD(eid_list, epair_ifp_drain); 150 151 #define EPAIR_LOCK_INIT(dpcpu) mtx_init(&(dpcpu)->if_epair_mtx, \ 152 "if_epair", NULL, MTX_DEF) 153 #define EPAIR_LOCK_DESTROY(dpcpu) mtx_destroy(&(dpcpu)->if_epair_mtx) 154 #define EPAIR_LOCK_ASSERT(dpcpu) mtx_assert(&(dpcpu)->if_epair_mtx, \ 155 MA_OWNED) 156 #define EPAIR_LOCK(dpcpu) mtx_lock(&(dpcpu)->if_epair_mtx) 157 #define EPAIR_UNLOCK(dpcpu) mtx_unlock(&(dpcpu)->if_epair_mtx) 158 159 #ifdef INVARIANTS 160 #define EPAIR_REFCOUNT_INIT(r, v) refcount_init((r), (v)) 161 #define EPAIR_REFCOUNT_AQUIRE(r) refcount_acquire((r)) 162 #define EPAIR_REFCOUNT_RELEASE(r) refcount_release((r)) 163 #define EPAIR_REFCOUNT_ASSERT(a, p) KASSERT(a, p) 164 #else 165 #define EPAIR_REFCOUNT_INIT(r, v) 166 #define EPAIR_REFCOUNT_AQUIRE(r) 167 #define EPAIR_REFCOUNT_RELEASE(r) 168 #define EPAIR_REFCOUNT_ASSERT(a, p) 169 #endif 170 171 static MALLOC_DEFINE(M_EPAIR, epairname, 172 "Pair of virtual cross-over connected Ethernet-like interfaces"); 173 174 static struct if_clone *epair_cloner; 175 176 /* 177 * DPCPU area and functions. 178 */ 179 struct epair_dpcpu { 180 struct mtx if_epair_mtx; /* Per-CPU locking. */ 181 int epair_drv_flags; /* Per-CPU ``hw'' drv flags. */ 182 struct eid_list epair_ifp_drain_list; /* Per-CPU list of ifps with 183 * data in the ifq. */ 184 }; 185 DPCPU_DEFINE(struct epair_dpcpu, epair_dpcpu); 186 187 static void 188 epair_dpcpu_init(void) 189 { 190 struct epair_dpcpu *epair_dpcpu; 191 struct eid_list *s; 192 u_int cpuid; 193 194 CPU_FOREACH(cpuid) { 195 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 196 197 /* Initialize per-cpu lock. */ 198 EPAIR_LOCK_INIT(epair_dpcpu); 199 200 /* Driver flags are per-cpu as are our netisr "hw" queues. */ 201 epair_dpcpu->epair_drv_flags = 0; 202 203 /* 204 * Initialize per-cpu drain list. 205 * Manually do what STAILQ_HEAD_INITIALIZER would do. 206 */ 207 s = &epair_dpcpu->epair_ifp_drain_list; 208 s->stqh_first = NULL; 209 s->stqh_last = &s->stqh_first; 210 } 211 } 212 213 static void 214 epair_dpcpu_detach(void) 215 { 216 struct epair_dpcpu *epair_dpcpu; 217 u_int cpuid; 218 219 CPU_FOREACH(cpuid) { 220 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 221 222 /* Destroy per-cpu lock. */ 223 EPAIR_LOCK_DESTROY(epair_dpcpu); 224 } 225 } 226 227 /* 228 * Helper functions. 229 */ 230 static u_int 231 cpuid_from_ifp(struct ifnet *ifp) 232 { 233 struct epair_softc *sc; 234 235 if (ifp == NULL) 236 return (0); 237 sc = ifp->if_softc; 238 239 return (sc->cpuid); 240 } 241 242 /* 243 * Netisr handler functions. 244 */ 245 static void 246 epair_nh_sintr(struct mbuf *m) 247 { 248 struct ifnet *ifp; 249 struct epair_softc *sc; 250 251 ifp = m->m_pkthdr.rcvif; 252 (*ifp->if_input)(ifp, m); 253 sc = ifp->if_softc; 254 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 255 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 256 ("%s: ifp=%p sc->refcount not >= 1: %d", 257 __func__, ifp, sc->refcount)); 258 DPRINTF("ifp=%p refcount=%u\n", ifp, sc->refcount); 259 } 260 261 static struct mbuf * 262 epair_nh_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid) 263 { 264 265 *cpuid = cpuid_from_ifp(m->m_pkthdr.rcvif); 266 267 return (m); 268 } 269 270 static void 271 epair_nh_drainedcpu(u_int cpuid) 272 { 273 struct epair_dpcpu *epair_dpcpu; 274 struct epair_ifp_drain *elm, *tvar; 275 struct ifnet *ifp; 276 277 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 278 EPAIR_LOCK(epair_dpcpu); 279 /* 280 * Assume our "hw" queue and possibly ifq will be emptied 281 * again. In case we will overflow the "hw" queue while 282 * draining, epair_start_locked will set IFF_DRV_OACTIVE 283 * again and we will stop and return. 284 */ 285 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list, 286 ifp_next, tvar) { 287 ifp = elm->ifp; 288 epair_dpcpu->epair_drv_flags &= ~IFF_DRV_OACTIVE; 289 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 290 epair_start_locked(ifp); 291 292 IFQ_LOCK(&ifp->if_snd); 293 if (IFQ_IS_EMPTY(&ifp->if_snd)) { 294 struct epair_softc *sc; 295 296 STAILQ_REMOVE(&epair_dpcpu->epair_ifp_drain_list, 297 elm, epair_ifp_drain, ifp_next); 298 /* The cached ifp goes off the list. */ 299 sc = ifp->if_softc; 300 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 301 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 302 ("%s: ifp=%p sc->refcount not >= 1: %d", 303 __func__, ifp, sc->refcount)); 304 free(elm, M_EPAIR); 305 } 306 IFQ_UNLOCK(&ifp->if_snd); 307 308 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) != 0) { 309 /* Our "hw"q overflew again. */ 310 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 311 DPRINTF("hw queue length overflow at %u\n", 312 epair_nh.nh_qlimit); 313 break; 314 } 315 } 316 EPAIR_UNLOCK(epair_dpcpu); 317 } 318 319 /* 320 * Network interface (`if') related functions. 321 */ 322 static void 323 epair_remove_ifp_from_draining(struct ifnet *ifp) 324 { 325 struct epair_dpcpu *epair_dpcpu; 326 struct epair_ifp_drain *elm, *tvar; 327 u_int cpuid; 328 329 CPU_FOREACH(cpuid) { 330 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 331 EPAIR_LOCK(epair_dpcpu); 332 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list, 333 ifp_next, tvar) { 334 if (ifp == elm->ifp) { 335 struct epair_softc *sc; 336 337 STAILQ_REMOVE( 338 &epair_dpcpu->epair_ifp_drain_list, elm, 339 epair_ifp_drain, ifp_next); 340 /* The cached ifp goes off the list. */ 341 sc = ifp->if_softc; 342 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 343 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 344 ("%s: ifp=%p sc->refcount not >= 1: %d", 345 __func__, ifp, sc->refcount)); 346 free(elm, M_EPAIR); 347 } 348 } 349 EPAIR_UNLOCK(epair_dpcpu); 350 } 351 } 352 353 static int 354 epair_add_ifp_for_draining(struct ifnet *ifp) 355 { 356 struct epair_dpcpu *epair_dpcpu; 357 struct epair_softc *sc; 358 struct epair_ifp_drain *elm = NULL; 359 360 sc = ifp->if_softc; 361 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 362 EPAIR_LOCK_ASSERT(epair_dpcpu); 363 STAILQ_FOREACH(elm, &epair_dpcpu->epair_ifp_drain_list, ifp_next) 364 if (elm->ifp == ifp) 365 break; 366 /* If the ifp is there already, return success. */ 367 if (elm != NULL) 368 return (0); 369 370 elm = malloc(sizeof(struct epair_ifp_drain), M_EPAIR, M_NOWAIT|M_ZERO); 371 if (elm == NULL) 372 return (ENOMEM); 373 374 elm->ifp = ifp; 375 /* Add a reference for the ifp pointer on the list. */ 376 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 377 STAILQ_INSERT_TAIL(&epair_dpcpu->epair_ifp_drain_list, elm, ifp_next); 378 379 return (0); 380 } 381 382 static void 383 epair_start_locked(struct ifnet *ifp) 384 { 385 struct epair_dpcpu *epair_dpcpu; 386 struct mbuf *m; 387 struct epair_softc *sc; 388 struct ifnet *oifp; 389 int error; 390 391 DPRINTF("ifp=%p\n", ifp); 392 sc = ifp->if_softc; 393 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 394 EPAIR_LOCK_ASSERT(epair_dpcpu); 395 396 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 397 return; 398 if ((ifp->if_flags & IFF_UP) == 0) 399 return; 400 401 /* 402 * We get patckets here from ether_output via if_handoff() 403 * and ned to put them into the input queue of the oifp 404 * and call oifp->if_input() via netisr/epair_sintr(). 405 */ 406 oifp = sc->oifp; 407 sc = oifp->if_softc; 408 for (;;) { 409 IFQ_DEQUEUE(&ifp->if_snd, m); 410 if (m == NULL) 411 break; 412 BPF_MTAP(ifp, m); 413 414 /* 415 * In case the outgoing interface is not usable, 416 * drop the packet. 417 */ 418 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 419 (oifp->if_flags & IFF_UP) ==0) { 420 ifp->if_oerrors++; 421 m_freem(m); 422 continue; 423 } 424 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname); 425 426 /* 427 * Add a reference so the interface cannot go while the 428 * packet is in transit as we rely on rcvif to stay valid. 429 */ 430 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 431 m->m_pkthdr.rcvif = oifp; 432 CURVNET_SET_QUIET(oifp->if_vnet); 433 error = netisr_queue(NETISR_EPAIR, m); 434 CURVNET_RESTORE(); 435 if (!error) { 436 ifp->if_opackets++; 437 /* Someone else received the packet. */ 438 oifp->if_ipackets++; 439 } else { 440 /* The packet was freed already. */ 441 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 442 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 443 (void) epair_add_ifp_for_draining(ifp); 444 ifp->if_oerrors++; 445 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 446 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 447 ("%s: ifp=%p sc->refcount not >= 1: %d", 448 __func__, oifp, sc->refcount)); 449 } 450 } 451 } 452 453 static void 454 epair_start(struct ifnet *ifp) 455 { 456 struct epair_dpcpu *epair_dpcpu; 457 458 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu); 459 EPAIR_LOCK(epair_dpcpu); 460 epair_start_locked(ifp); 461 EPAIR_UNLOCK(epair_dpcpu); 462 } 463 464 static int 465 epair_transmit_locked(struct ifnet *ifp, struct mbuf *m) 466 { 467 struct epair_dpcpu *epair_dpcpu; 468 struct epair_softc *sc; 469 struct ifnet *oifp; 470 int error, len; 471 short mflags; 472 473 DPRINTF("ifp=%p m=%p\n", ifp, m); 474 sc = ifp->if_softc; 475 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 476 EPAIR_LOCK_ASSERT(epair_dpcpu); 477 478 if (m == NULL) 479 return (0); 480 481 /* 482 * We are not going to use the interface en/dequeue mechanism 483 * on the TX side. We are called from ether_output_frame() 484 * and will put the packet into the incoming queue of the 485 * other interface of our pair via the netsir. 486 */ 487 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 488 m_freem(m); 489 return (ENXIO); 490 } 491 if ((ifp->if_flags & IFF_UP) == 0) { 492 m_freem(m); 493 return (ENETDOWN); 494 } 495 496 BPF_MTAP(ifp, m); 497 498 /* 499 * In case the outgoing interface is not usable, 500 * drop the packet. 501 */ 502 oifp = sc->oifp; 503 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 504 (oifp->if_flags & IFF_UP) ==0) { 505 ifp->if_oerrors++; 506 m_freem(m); 507 return (0); 508 } 509 len = m->m_pkthdr.len; 510 mflags = m->m_flags; 511 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname); 512 513 #ifdef ALTQ 514 /* Support ALTQ via the clasic if_start() path. */ 515 IF_LOCK(&ifp->if_snd); 516 if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 517 ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error); 518 if (error) 519 ifp->if_snd.ifq_drops++; 520 IF_UNLOCK(&ifp->if_snd); 521 if (!error) { 522 ifp->if_obytes += len; 523 if (mflags & (M_BCAST|M_MCAST)) 524 ifp->if_omcasts++; 525 526 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) 527 epair_start_locked(ifp); 528 else 529 (void)epair_add_ifp_for_draining(ifp); 530 } 531 return (error); 532 } 533 IF_UNLOCK(&ifp->if_snd); 534 #endif 535 536 if ((epair_dpcpu->epair_drv_flags & IFF_DRV_OACTIVE) != 0) { 537 /* 538 * Our hardware queue is full, try to fall back 539 * queuing to the ifq but do not call ifp->if_start. 540 * Either we are lucky or the packet is gone. 541 */ 542 IFQ_ENQUEUE(&ifp->if_snd, m, error); 543 if (!error) 544 (void)epair_add_ifp_for_draining(ifp); 545 return (error); 546 } 547 sc = oifp->if_softc; 548 /* 549 * Add a reference so the interface cannot go while the 550 * packet is in transit as we rely on rcvif to stay valid. 551 */ 552 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 553 m->m_pkthdr.rcvif = oifp; 554 CURVNET_SET_QUIET(oifp->if_vnet); 555 error = netisr_queue(NETISR_EPAIR, m); 556 CURVNET_RESTORE(); 557 if (!error) { 558 ifp->if_opackets++; 559 /* 560 * IFQ_HANDOFF_ADJ/ip_handoff() update statistics, 561 * but as we bypass all this we have to duplicate 562 * the logic another time. 563 */ 564 ifp->if_obytes += len; 565 if (mflags & (M_BCAST|M_MCAST)) 566 ifp->if_omcasts++; 567 /* Someone else received the packet. */ 568 oifp->if_ipackets++; 569 } else { 570 /* The packet was freed already. */ 571 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 572 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 573 ifp->if_oerrors++; 574 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 575 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 576 ("%s: ifp=%p sc->refcount not >= 1: %d", 577 __func__, oifp, sc->refcount)); 578 } 579 580 return (error); 581 } 582 583 static int 584 epair_transmit(struct ifnet *ifp, struct mbuf *m) 585 { 586 struct epair_dpcpu *epair_dpcpu; 587 int error; 588 589 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu); 590 EPAIR_LOCK(epair_dpcpu); 591 error = epair_transmit_locked(ifp, m); 592 EPAIR_UNLOCK(epair_dpcpu); 593 return (error); 594 } 595 596 static void 597 epair_qflush(struct ifnet *ifp) 598 { 599 struct epair_softc *sc; 600 601 sc = ifp->if_softc; 602 KASSERT(sc != NULL, ("%s: ifp=%p, epair_softc gone? sc=%p\n", 603 __func__, ifp, sc)); 604 /* 605 * Remove this ifp from all backpointer lists. The interface will not 606 * usable for flushing anyway nor should it have anything to flush 607 * after if_qflush(). 608 */ 609 epair_remove_ifp_from_draining(ifp); 610 611 if (sc->if_qflush) 612 sc->if_qflush(ifp); 613 } 614 615 static int 616 epair_media_change(struct ifnet *ifp __unused) 617 { 618 619 /* Do nothing. */ 620 return (0); 621 } 622 623 static void 624 epair_media_status(struct ifnet *ifp __unused, struct ifmediareq *imr) 625 { 626 627 imr->ifm_status = IFM_AVALID | IFM_ACTIVE; 628 imr->ifm_active = IFM_ETHER | IFM_10G_T | IFM_FDX; 629 } 630 631 static int 632 epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 633 { 634 struct epair_softc *sc; 635 struct ifreq *ifr; 636 int error; 637 638 ifr = (struct ifreq *)data; 639 switch (cmd) { 640 case SIOCSIFFLAGS: 641 case SIOCADDMULTI: 642 case SIOCDELMULTI: 643 error = 0; 644 break; 645 646 case SIOCSIFMEDIA: 647 case SIOCGIFMEDIA: 648 sc = ifp->if_softc; 649 error = ifmedia_ioctl(ifp, ifr, &sc->media, cmd); 650 break; 651 652 case SIOCSIFMTU: 653 /* We basically allow all kinds of MTUs. */ 654 ifp->if_mtu = ifr->ifr_mtu; 655 error = 0; 656 break; 657 658 default: 659 /* Let the common ethernet handler process this. */ 660 error = ether_ioctl(ifp, cmd, data); 661 break; 662 } 663 664 return (error); 665 } 666 667 static void 668 epair_init(void *dummy __unused) 669 { 670 } 671 672 673 /* 674 * Interface cloning functions. 675 * We use our private ones so that we can create/destroy our secondary 676 * device along with the primary one. 677 */ 678 static int 679 epair_clone_match(struct if_clone *ifc, const char *name) 680 { 681 const char *cp; 682 683 DPRINTF("name='%s'\n", name); 684 685 /* 686 * Our base name is epair. 687 * Our interfaces will be named epair<n>[ab]. 688 * So accept anything of the following list: 689 * - epair 690 * - epair<n> 691 * but not the epair<n>[ab] versions. 692 */ 693 if (strncmp(epairname, name, sizeof(epairname)-1) != 0) 694 return (0); 695 696 for (cp = name + sizeof(epairname) - 1; *cp != '\0'; cp++) { 697 if (*cp < '0' || *cp > '9') 698 return (0); 699 } 700 701 return (1); 702 } 703 704 static int 705 epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) 706 { 707 struct epair_softc *sca, *scb; 708 struct ifnet *ifp; 709 char *dp; 710 int error, unit, wildcard; 711 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ 712 713 /* 714 * We are abusing params to create our second interface. 715 * Actually we already created it and called if_clone_create() 716 * for it to do the official insertion procedure the moment we knew 717 * it cannot fail anymore. So just do attach it here. 718 */ 719 if (params) { 720 scb = (struct epair_softc *)params; 721 ifp = scb->ifp; 722 /* Assign a hopefully unique, locally administered etheraddr. */ 723 eaddr[0] = 0x02; 724 eaddr[3] = (ifp->if_index >> 8) & 0xff; 725 eaddr[4] = ifp->if_index & 0xff; 726 eaddr[5] = 0x0b; 727 ether_ifattach(ifp, eaddr); 728 /* Correctly set the name for the cloner list. */ 729 strlcpy(name, scb->ifp->if_xname, len); 730 return (0); 731 } 732 733 /* Try to see if a special unit was requested. */ 734 error = ifc_name2unit(name, &unit); 735 if (error != 0) 736 return (error); 737 wildcard = (unit < 0); 738 739 error = ifc_alloc_unit(ifc, &unit); 740 if (error != 0) 741 return (error); 742 743 /* 744 * If no unit had been given, we need to adjust the ifName. 745 * Also make sure there is space for our extra [ab] suffix. 746 */ 747 for (dp = name; *dp != '\0'; dp++); 748 if (wildcard) { 749 error = snprintf(dp, len - (dp - name), "%d", unit); 750 if (error > len - (dp - name) - 1) { 751 /* ifName too long. */ 752 ifc_free_unit(ifc, unit); 753 return (ENOSPC); 754 } 755 dp += error; 756 } 757 if (len - (dp - name) - 1 < 1) { 758 /* No space left for our [ab] suffix. */ 759 ifc_free_unit(ifc, unit); 760 return (ENOSPC); 761 } 762 *dp = 'a'; 763 /* Must not change dp so we can replace 'a' by 'b' later. */ 764 *(dp+1) = '\0'; 765 766 /* Allocate memory for both [ab] interfaces */ 767 sca = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 768 EPAIR_REFCOUNT_INIT(&sca->refcount, 1); 769 sca->ifp = if_alloc(IFT_ETHER); 770 if (sca->ifp == NULL) { 771 free(sca, M_EPAIR); 772 ifc_free_unit(ifc, unit); 773 return (ENOSPC); 774 } 775 776 scb = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 777 EPAIR_REFCOUNT_INIT(&scb->refcount, 1); 778 scb->ifp = if_alloc(IFT_ETHER); 779 if (scb->ifp == NULL) { 780 free(scb, M_EPAIR); 781 if_free(sca->ifp); 782 free(sca, M_EPAIR); 783 ifc_free_unit(ifc, unit); 784 return (ENOSPC); 785 } 786 787 /* 788 * Cross-reference the interfaces so we will be able to free both. 789 */ 790 sca->oifp = scb->ifp; 791 scb->oifp = sca->ifp; 792 793 /* 794 * Calculate the cpuid for netisr queueing based on the 795 * ifIndex of the interfaces. As long as we cannot configure 796 * this or use cpuset information easily we cannot guarantee 797 * cache locality but we can at least allow parallelism. 798 */ 799 sca->cpuid = 800 netisr_get_cpuid(sca->ifp->if_index % netisr_get_cpucount()); 801 scb->cpuid = 802 netisr_get_cpuid(scb->ifp->if_index % netisr_get_cpucount()); 803 804 /* Finish initialization of interface <n>a. */ 805 ifp = sca->ifp; 806 ifp->if_softc = sca; 807 strlcpy(ifp->if_xname, name, IFNAMSIZ); 808 ifp->if_dname = epairname; 809 ifp->if_dunit = unit; 810 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 811 ifp->if_capabilities = IFCAP_VLAN_MTU; 812 ifp->if_capenable = IFCAP_VLAN_MTU; 813 ifp->if_start = epair_start; 814 ifp->if_ioctl = epair_ioctl; 815 ifp->if_init = epair_init; 816 ifp->if_snd.ifq_maxlen = ifqmaxlen; 817 /* Assign a hopefully unique, locally administered etheraddr. */ 818 eaddr[0] = 0x02; 819 eaddr[3] = (ifp->if_index >> 8) & 0xff; 820 eaddr[4] = ifp->if_index & 0xff; 821 eaddr[5] = 0x0a; 822 ether_ifattach(ifp, eaddr); 823 sca->if_qflush = ifp->if_qflush; 824 ifp->if_qflush = epair_qflush; 825 ifp->if_transmit = epair_transmit; 826 if_initbaudrate(ifp, IF_Gbps(10)); /* arbitrary maximum */ 827 828 /* Swap the name and finish initialization of interface <n>b. */ 829 *dp = 'b'; 830 831 ifp = scb->ifp; 832 ifp->if_softc = scb; 833 strlcpy(ifp->if_xname, name, IFNAMSIZ); 834 ifp->if_dname = epairname; 835 ifp->if_dunit = unit; 836 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 837 ifp->if_capabilities = IFCAP_VLAN_MTU; 838 ifp->if_capenable = IFCAP_VLAN_MTU; 839 ifp->if_start = epair_start; 840 ifp->if_ioctl = epair_ioctl; 841 ifp->if_init = epair_init; 842 ifp->if_snd.ifq_maxlen = ifqmaxlen; 843 /* We need to play some tricks here for the second interface. */ 844 strlcpy(name, epairname, len); 845 error = if_clone_create(name, len, (caddr_t)scb); 846 if (error) 847 panic("%s: if_clone_create() for our 2nd iface failed: %d", 848 __func__, error); 849 scb->if_qflush = ifp->if_qflush; 850 ifp->if_qflush = epair_qflush; 851 ifp->if_transmit = epair_transmit; 852 if_initbaudrate(ifp, IF_Gbps(10)); /* arbitrary maximum */ 853 854 /* 855 * Restore name to <n>a as the ifp for this will go into the 856 * cloner list for the initial call. 857 */ 858 strlcpy(name, sca->ifp->if_xname, len); 859 DPRINTF("name='%s/%db' created sca=%p scb=%p\n", name, unit, sca, scb); 860 861 /* Initialise pseudo media types. */ 862 ifmedia_init(&sca->media, 0, epair_media_change, epair_media_status); 863 ifmedia_add(&sca->media, IFM_ETHER | IFM_10G_T, 0, NULL); 864 ifmedia_set(&sca->media, IFM_ETHER | IFM_10G_T); 865 ifmedia_init(&scb->media, 0, epair_media_change, epair_media_status); 866 ifmedia_add(&scb->media, IFM_ETHER | IFM_10G_T, 0, NULL); 867 ifmedia_set(&scb->media, IFM_ETHER | IFM_10G_T); 868 869 /* Tell the world, that we are ready to rock. */ 870 sca->ifp->if_drv_flags |= IFF_DRV_RUNNING; 871 scb->ifp->if_drv_flags |= IFF_DRV_RUNNING; 872 if_link_state_change(sca->ifp, LINK_STATE_UP); 873 if_link_state_change(scb->ifp, LINK_STATE_UP); 874 875 return (0); 876 } 877 878 static int 879 epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) 880 { 881 struct ifnet *oifp; 882 struct epair_softc *sca, *scb; 883 int unit, error; 884 885 DPRINTF("ifp=%p\n", ifp); 886 887 /* 888 * In case we called into if_clone_destroyif() ourselves 889 * again to remove the second interface, the softc will be 890 * NULL. In that case so not do anything but return success. 891 */ 892 if (ifp->if_softc == NULL) 893 return (0); 894 895 unit = ifp->if_dunit; 896 sca = ifp->if_softc; 897 oifp = sca->oifp; 898 scb = oifp->if_softc; 899 900 DPRINTF("ifp=%p oifp=%p\n", ifp, oifp); 901 if_link_state_change(ifp, LINK_STATE_DOWN); 902 if_link_state_change(oifp, LINK_STATE_DOWN); 903 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 904 oifp->if_drv_flags &= ~IFF_DRV_RUNNING; 905 906 /* 907 * Get rid of our second half. As the other of the two 908 * interfaces may reside in a different vnet, we need to 909 * switch before freeing them. 910 */ 911 CURVNET_SET_QUIET(oifp->if_vnet); 912 ether_ifdetach(oifp); 913 /* 914 * Wait for all packets to be dispatched to if_input. 915 * The numbers can only go down as the interface is 916 * detached so there is no need to use atomics. 917 */ 918 DPRINTF("scb refcnt=%u\n", scb->refcount); 919 EPAIR_REFCOUNT_ASSERT(scb->refcount == 1, 920 ("%s: ifp=%p scb->refcount!=1: %d", __func__, oifp, scb->refcount)); 921 oifp->if_softc = NULL; 922 error = if_clone_destroyif(ifc, oifp); 923 if (error) 924 panic("%s: if_clone_destroyif() for our 2nd iface failed: %d", 925 __func__, error); 926 if_free(oifp); 927 ifmedia_removeall(&scb->media); 928 free(scb, M_EPAIR); 929 CURVNET_RESTORE(); 930 931 ether_ifdetach(ifp); 932 /* 933 * Wait for all packets to be dispatched to if_input. 934 */ 935 DPRINTF("sca refcnt=%u\n", sca->refcount); 936 EPAIR_REFCOUNT_ASSERT(sca->refcount == 1, 937 ("%s: ifp=%p sca->refcount!=1: %d", __func__, ifp, sca->refcount)); 938 if_free(ifp); 939 ifmedia_removeall(&sca->media); 940 free(sca, M_EPAIR); 941 ifc_free_unit(ifc, unit); 942 943 return (0); 944 } 945 946 static int 947 epair_modevent(module_t mod, int type, void *data) 948 { 949 int qlimit; 950 951 switch (type) { 952 case MOD_LOAD: 953 /* For now limit us to one global mutex and one inq. */ 954 epair_dpcpu_init(); 955 epair_nh.nh_qlimit = 42 * ifqmaxlen; /* 42 shall be the number. */ 956 if (TUNABLE_INT_FETCH("net.link.epair.netisr_maxqlen", &qlimit)) 957 epair_nh.nh_qlimit = qlimit; 958 netisr_register(&epair_nh); 959 epair_cloner = if_clone_advanced(epairname, 0, 960 epair_clone_match, epair_clone_create, epair_clone_destroy); 961 if (bootverbose) 962 printf("%s initialized.\n", epairname); 963 break; 964 case MOD_UNLOAD: 965 if_clone_detach(epair_cloner); 966 netisr_unregister(&epair_nh); 967 epair_dpcpu_detach(); 968 if (bootverbose) 969 printf("%s unloaded.\n", epairname); 970 break; 971 default: 972 return (EOPNOTSUPP); 973 } 974 return (0); 975 } 976 977 static moduledata_t epair_mod = { 978 "if_epair", 979 epair_modevent, 980 0 981 }; 982 983 DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 984 MODULE_VERSION(if_epair, 1); 985