1 /*- 2 * Copyright (c) 2008 The FreeBSD Foundation 3 * Copyright (c) 2009-2010 Bjoern A. Zeeb <bz@FreeBSD.org> 4 * All rights reserved. 5 * 6 * This software was developed by CK Software GmbH under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 /* 32 * A pair of virtual back-to-back connected ethernet like interfaces 33 * (``two interfaces with a virtual cross-over cable''). 34 * 35 * This is mostly intended to be used to provide connectivity between 36 * different virtual network stack instances. 37 */ 38 /* 39 * Things to re-think once we have more experience: 40 * - ifp->if_reassign function once we can test with vimage. Depending on 41 * how if_vmove() is going to be improved. 42 * - Real random etheraddrs that are checked to be uniquish; we would need 43 * to re-do them in case we move the interface between network stacks 44 * in a private if_reassign function. 45 * In case we bridge to a real interface/network or between indepedent 46 * epairs on multiple stacks/machines, we may need this. 47 * For now let the user handle that case. 48 */ 49 50 #include <sys/cdefs.h> 51 __FBSDID("$FreeBSD$"); 52 53 #include <sys/param.h> 54 #include <sys/kernel.h> 55 #include <sys/mbuf.h> 56 #include <sys/module.h> 57 #include <sys/refcount.h> 58 #include <sys/queue.h> 59 #include <sys/smp.h> 60 #include <sys/socket.h> 61 #include <sys/sockio.h> 62 #include <sys/sysctl.h> 63 #include <sys/types.h> 64 65 #include <net/bpf.h> 66 #include <net/ethernet.h> 67 #include <net/if.h> 68 #include <net/if_var.h> 69 #include <net/if_clone.h> 70 #include <net/if_media.h> 71 #include <net/if_var.h> 72 #include <net/if_types.h> 73 #include <net/netisr.h> 74 #include <net/vnet.h> 75 76 SYSCTL_DECL(_net_link); 77 static SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW, 0, "epair sysctl"); 78 79 #ifdef EPAIR_DEBUG 80 static int epair_debug = 0; 81 SYSCTL_INT(_net_link_epair, OID_AUTO, epair_debug, CTLFLAG_RW, 82 &epair_debug, 0, "if_epair(4) debugging."); 83 #define DPRINTF(fmt, arg...) \ 84 if (epair_debug) \ 85 printf("[%s:%d] " fmt, __func__, __LINE__, ##arg) 86 #else 87 #define DPRINTF(fmt, arg...) 88 #endif 89 90 static void epair_nh_sintr(struct mbuf *); 91 static struct mbuf *epair_nh_m2cpuid(struct mbuf *, uintptr_t, u_int *); 92 static void epair_nh_drainedcpu(u_int); 93 94 static void epair_start_locked(struct ifnet *); 95 static int epair_media_change(struct ifnet *); 96 static void epair_media_status(struct ifnet *, struct ifmediareq *); 97 98 static int epair_clone_match(struct if_clone *, const char *); 99 static int epair_clone_create(struct if_clone *, char *, size_t, caddr_t); 100 static int epair_clone_destroy(struct if_clone *, struct ifnet *); 101 102 static const char epairname[] = "epair"; 103 104 /* Netisr related definitions and sysctl. */ 105 static struct netisr_handler epair_nh = { 106 .nh_name = epairname, 107 .nh_proto = NETISR_EPAIR, 108 .nh_policy = NETISR_POLICY_CPU, 109 .nh_handler = epair_nh_sintr, 110 .nh_m2cpuid = epair_nh_m2cpuid, 111 .nh_drainedcpu = epair_nh_drainedcpu, 112 }; 113 114 static int 115 sysctl_epair_netisr_maxqlen(SYSCTL_HANDLER_ARGS) 116 { 117 int error, qlimit; 118 119 netisr_getqlimit(&epair_nh, &qlimit); 120 error = sysctl_handle_int(oidp, &qlimit, 0, req); 121 if (error || !req->newptr) 122 return (error); 123 if (qlimit < 1) 124 return (EINVAL); 125 return (netisr_setqlimit(&epair_nh, qlimit)); 126 } 127 SYSCTL_PROC(_net_link_epair, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW, 128 0, 0, sysctl_epair_netisr_maxqlen, "I", 129 "Maximum if_epair(4) netisr \"hw\" queue length"); 130 131 struct epair_softc { 132 struct ifnet *ifp; /* This ifp. */ 133 struct ifnet *oifp; /* other ifp of pair. */ 134 struct ifmedia media; /* Media config (fake). */ 135 u_int refcount; /* # of mbufs in flight. */ 136 u_int cpuid; /* CPU ID assigned upon creation. */ 137 void (*if_qflush)(struct ifnet *); 138 /* Original if_qflush routine. */ 139 }; 140 141 /* 142 * Per-CPU list of ifps with data in the ifq that needs to be flushed 143 * to the netisr ``hw'' queue before we allow any further direct queuing 144 * to the ``hw'' queue. 145 */ 146 struct epair_ifp_drain { 147 STAILQ_ENTRY(epair_ifp_drain) ifp_next; 148 struct ifnet *ifp; 149 }; 150 STAILQ_HEAD(eid_list, epair_ifp_drain); 151 152 #define EPAIR_LOCK_INIT(dpcpu) mtx_init(&(dpcpu)->if_epair_mtx, \ 153 "if_epair", NULL, MTX_DEF) 154 #define EPAIR_LOCK_DESTROY(dpcpu) mtx_destroy(&(dpcpu)->if_epair_mtx) 155 #define EPAIR_LOCK_ASSERT(dpcpu) mtx_assert(&(dpcpu)->if_epair_mtx, \ 156 MA_OWNED) 157 #define EPAIR_LOCK(dpcpu) mtx_lock(&(dpcpu)->if_epair_mtx) 158 #define EPAIR_UNLOCK(dpcpu) mtx_unlock(&(dpcpu)->if_epair_mtx) 159 160 #ifdef INVARIANTS 161 #define EPAIR_REFCOUNT_INIT(r, v) refcount_init((r), (v)) 162 #define EPAIR_REFCOUNT_AQUIRE(r) refcount_acquire((r)) 163 #define EPAIR_REFCOUNT_RELEASE(r) refcount_release((r)) 164 #define EPAIR_REFCOUNT_ASSERT(a, p) KASSERT(a, p) 165 #else 166 #define EPAIR_REFCOUNT_INIT(r, v) 167 #define EPAIR_REFCOUNT_AQUIRE(r) 168 #define EPAIR_REFCOUNT_RELEASE(r) 169 #define EPAIR_REFCOUNT_ASSERT(a, p) 170 #endif 171 172 static MALLOC_DEFINE(M_EPAIR, epairname, 173 "Pair of virtual cross-over connected Ethernet-like interfaces"); 174 175 static VNET_DEFINE(struct if_clone *, epair_cloner); 176 #define V_epair_cloner VNET(epair_cloner) 177 178 /* 179 * DPCPU area and functions. 180 */ 181 struct epair_dpcpu { 182 struct mtx if_epair_mtx; /* Per-CPU locking. */ 183 int epair_drv_flags; /* Per-CPU ``hw'' drv flags. */ 184 struct eid_list epair_ifp_drain_list; /* Per-CPU list of ifps with 185 * data in the ifq. */ 186 }; 187 DPCPU_DEFINE(struct epair_dpcpu, epair_dpcpu); 188 189 static void 190 epair_dpcpu_init(void) 191 { 192 struct epair_dpcpu *epair_dpcpu; 193 struct eid_list *s; 194 u_int cpuid; 195 196 CPU_FOREACH(cpuid) { 197 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 198 199 /* Initialize per-cpu lock. */ 200 EPAIR_LOCK_INIT(epair_dpcpu); 201 202 /* Driver flags are per-cpu as are our netisr "hw" queues. */ 203 epair_dpcpu->epair_drv_flags = 0; 204 205 /* 206 * Initialize per-cpu drain list. 207 * Manually do what STAILQ_HEAD_INITIALIZER would do. 208 */ 209 s = &epair_dpcpu->epair_ifp_drain_list; 210 s->stqh_first = NULL; 211 s->stqh_last = &s->stqh_first; 212 } 213 } 214 215 static void 216 epair_dpcpu_detach(void) 217 { 218 struct epair_dpcpu *epair_dpcpu; 219 u_int cpuid; 220 221 CPU_FOREACH(cpuid) { 222 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 223 224 /* Destroy per-cpu lock. */ 225 EPAIR_LOCK_DESTROY(epair_dpcpu); 226 } 227 } 228 229 /* 230 * Helper functions. 231 */ 232 static u_int 233 cpuid_from_ifp(struct ifnet *ifp) 234 { 235 struct epair_softc *sc; 236 237 if (ifp == NULL) 238 return (0); 239 sc = ifp->if_softc; 240 241 return (sc->cpuid); 242 } 243 244 /* 245 * Netisr handler functions. 246 */ 247 static void 248 epair_nh_sintr(struct mbuf *m) 249 { 250 struct ifnet *ifp; 251 struct epair_softc *sc; 252 253 ifp = m->m_pkthdr.rcvif; 254 (*ifp->if_input)(ifp, m); 255 sc = ifp->if_softc; 256 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 257 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 258 ("%s: ifp=%p sc->refcount not >= 1: %d", 259 __func__, ifp, sc->refcount)); 260 DPRINTF("ifp=%p refcount=%u\n", ifp, sc->refcount); 261 } 262 263 static struct mbuf * 264 epair_nh_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid) 265 { 266 267 *cpuid = cpuid_from_ifp(m->m_pkthdr.rcvif); 268 269 return (m); 270 } 271 272 static void 273 epair_nh_drainedcpu(u_int cpuid) 274 { 275 struct epair_dpcpu *epair_dpcpu; 276 struct epair_ifp_drain *elm, *tvar; 277 struct ifnet *ifp; 278 279 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 280 EPAIR_LOCK(epair_dpcpu); 281 /* 282 * Assume our "hw" queue and possibly ifq will be emptied 283 * again. In case we will overflow the "hw" queue while 284 * draining, epair_start_locked will set IFF_DRV_OACTIVE 285 * again and we will stop and return. 286 */ 287 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list, 288 ifp_next, tvar) { 289 ifp = elm->ifp; 290 epair_dpcpu->epair_drv_flags &= ~IFF_DRV_OACTIVE; 291 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 292 epair_start_locked(ifp); 293 294 IFQ_LOCK(&ifp->if_snd); 295 if (IFQ_IS_EMPTY(&ifp->if_snd)) { 296 struct epair_softc *sc; 297 298 STAILQ_REMOVE(&epair_dpcpu->epair_ifp_drain_list, 299 elm, epair_ifp_drain, ifp_next); 300 /* The cached ifp goes off the list. */ 301 sc = ifp->if_softc; 302 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 303 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 304 ("%s: ifp=%p sc->refcount not >= 1: %d", 305 __func__, ifp, sc->refcount)); 306 free(elm, M_EPAIR); 307 } 308 IFQ_UNLOCK(&ifp->if_snd); 309 310 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) != 0) { 311 /* Our "hw"q overflew again. */ 312 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 313 DPRINTF("hw queue length overflow at %u\n", 314 epair_nh.nh_qlimit); 315 break; 316 } 317 } 318 EPAIR_UNLOCK(epair_dpcpu); 319 } 320 321 /* 322 * Network interface (`if') related functions. 323 */ 324 static void 325 epair_remove_ifp_from_draining(struct ifnet *ifp) 326 { 327 struct epair_dpcpu *epair_dpcpu; 328 struct epair_ifp_drain *elm, *tvar; 329 u_int cpuid; 330 331 CPU_FOREACH(cpuid) { 332 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 333 EPAIR_LOCK(epair_dpcpu); 334 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list, 335 ifp_next, tvar) { 336 if (ifp == elm->ifp) { 337 struct epair_softc *sc; 338 339 STAILQ_REMOVE( 340 &epair_dpcpu->epair_ifp_drain_list, elm, 341 epair_ifp_drain, ifp_next); 342 /* The cached ifp goes off the list. */ 343 sc = ifp->if_softc; 344 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 345 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 346 ("%s: ifp=%p sc->refcount not >= 1: %d", 347 __func__, ifp, sc->refcount)); 348 free(elm, M_EPAIR); 349 } 350 } 351 EPAIR_UNLOCK(epair_dpcpu); 352 } 353 } 354 355 static int 356 epair_add_ifp_for_draining(struct ifnet *ifp) 357 { 358 struct epair_dpcpu *epair_dpcpu; 359 struct epair_softc *sc; 360 struct epair_ifp_drain *elm = NULL; 361 362 sc = ifp->if_softc; 363 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 364 EPAIR_LOCK_ASSERT(epair_dpcpu); 365 STAILQ_FOREACH(elm, &epair_dpcpu->epair_ifp_drain_list, ifp_next) 366 if (elm->ifp == ifp) 367 break; 368 /* If the ifp is there already, return success. */ 369 if (elm != NULL) 370 return (0); 371 372 elm = malloc(sizeof(struct epair_ifp_drain), M_EPAIR, M_NOWAIT|M_ZERO); 373 if (elm == NULL) 374 return (ENOMEM); 375 376 elm->ifp = ifp; 377 /* Add a reference for the ifp pointer on the list. */ 378 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 379 STAILQ_INSERT_TAIL(&epair_dpcpu->epair_ifp_drain_list, elm, ifp_next); 380 381 return (0); 382 } 383 384 static void 385 epair_start_locked(struct ifnet *ifp) 386 { 387 struct epair_dpcpu *epair_dpcpu; 388 struct mbuf *m; 389 struct epair_softc *sc; 390 struct ifnet *oifp; 391 int error; 392 393 DPRINTF("ifp=%p\n", ifp); 394 sc = ifp->if_softc; 395 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 396 EPAIR_LOCK_ASSERT(epair_dpcpu); 397 398 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 399 return; 400 if ((ifp->if_flags & IFF_UP) == 0) 401 return; 402 403 /* 404 * We get patckets here from ether_output via if_handoff() 405 * and ned to put them into the input queue of the oifp 406 * and call oifp->if_input() via netisr/epair_sintr(). 407 */ 408 oifp = sc->oifp; 409 sc = oifp->if_softc; 410 for (;;) { 411 IFQ_DEQUEUE(&ifp->if_snd, m); 412 if (m == NULL) 413 break; 414 BPF_MTAP(ifp, m); 415 416 /* 417 * In case the outgoing interface is not usable, 418 * drop the packet. 419 */ 420 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 421 (oifp->if_flags & IFF_UP) ==0) { 422 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 423 m_freem(m); 424 continue; 425 } 426 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname); 427 428 /* 429 * Add a reference so the interface cannot go while the 430 * packet is in transit as we rely on rcvif to stay valid. 431 */ 432 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 433 m->m_pkthdr.rcvif = oifp; 434 CURVNET_SET_QUIET(oifp->if_vnet); 435 error = netisr_queue(NETISR_EPAIR, m); 436 CURVNET_RESTORE(); 437 if (!error) { 438 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 439 /* Someone else received the packet. */ 440 if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1); 441 } else { 442 /* The packet was freed already. */ 443 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 444 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 445 (void) epair_add_ifp_for_draining(ifp); 446 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 447 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 448 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 449 ("%s: ifp=%p sc->refcount not >= 1: %d", 450 __func__, oifp, sc->refcount)); 451 } 452 } 453 } 454 455 static void 456 epair_start(struct ifnet *ifp) 457 { 458 struct epair_dpcpu *epair_dpcpu; 459 460 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu); 461 EPAIR_LOCK(epair_dpcpu); 462 epair_start_locked(ifp); 463 EPAIR_UNLOCK(epair_dpcpu); 464 } 465 466 static int 467 epair_transmit_locked(struct ifnet *ifp, struct mbuf *m) 468 { 469 struct epair_dpcpu *epair_dpcpu; 470 struct epair_softc *sc; 471 struct ifnet *oifp; 472 int error, len; 473 short mflags; 474 475 DPRINTF("ifp=%p m=%p\n", ifp, m); 476 sc = ifp->if_softc; 477 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 478 EPAIR_LOCK_ASSERT(epair_dpcpu); 479 480 if (m == NULL) 481 return (0); 482 483 /* 484 * We are not going to use the interface en/dequeue mechanism 485 * on the TX side. We are called from ether_output_frame() 486 * and will put the packet into the incoming queue of the 487 * other interface of our pair via the netsir. 488 */ 489 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 490 m_freem(m); 491 return (ENXIO); 492 } 493 if ((ifp->if_flags & IFF_UP) == 0) { 494 m_freem(m); 495 return (ENETDOWN); 496 } 497 498 BPF_MTAP(ifp, m); 499 500 /* 501 * In case the outgoing interface is not usable, 502 * drop the packet. 503 */ 504 oifp = sc->oifp; 505 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 506 (oifp->if_flags & IFF_UP) ==0) { 507 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 508 m_freem(m); 509 return (0); 510 } 511 len = m->m_pkthdr.len; 512 mflags = m->m_flags; 513 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname); 514 515 #ifdef ALTQ 516 /* Support ALTQ via the clasic if_start() path. */ 517 IF_LOCK(&ifp->if_snd); 518 if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 519 ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error); 520 if (error) 521 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 522 IF_UNLOCK(&ifp->if_snd); 523 if (!error) { 524 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 525 if (mflags & (M_BCAST|M_MCAST)) 526 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 527 528 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) 529 epair_start_locked(ifp); 530 else 531 (void)epair_add_ifp_for_draining(ifp); 532 } 533 return (error); 534 } 535 IF_UNLOCK(&ifp->if_snd); 536 #endif 537 538 if ((epair_dpcpu->epair_drv_flags & IFF_DRV_OACTIVE) != 0) { 539 /* 540 * Our hardware queue is full, try to fall back 541 * queuing to the ifq but do not call ifp->if_start. 542 * Either we are lucky or the packet is gone. 543 */ 544 IFQ_ENQUEUE(&ifp->if_snd, m, error); 545 if (!error) 546 (void)epair_add_ifp_for_draining(ifp); 547 return (error); 548 } 549 sc = oifp->if_softc; 550 /* 551 * Add a reference so the interface cannot go while the 552 * packet is in transit as we rely on rcvif to stay valid. 553 */ 554 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 555 m->m_pkthdr.rcvif = oifp; 556 CURVNET_SET_QUIET(oifp->if_vnet); 557 error = netisr_queue(NETISR_EPAIR, m); 558 CURVNET_RESTORE(); 559 if (!error) { 560 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 561 /* 562 * IFQ_HANDOFF_ADJ/ip_handoff() update statistics, 563 * but as we bypass all this we have to duplicate 564 * the logic another time. 565 */ 566 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 567 if (mflags & (M_BCAST|M_MCAST)) 568 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 569 /* Someone else received the packet. */ 570 if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1); 571 } else { 572 /* The packet was freed already. */ 573 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 574 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 575 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 576 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 577 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 578 ("%s: ifp=%p sc->refcount not >= 1: %d", 579 __func__, oifp, sc->refcount)); 580 } 581 582 return (error); 583 } 584 585 static int 586 epair_transmit(struct ifnet *ifp, struct mbuf *m) 587 { 588 struct epair_dpcpu *epair_dpcpu; 589 int error; 590 591 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu); 592 EPAIR_LOCK(epair_dpcpu); 593 error = epair_transmit_locked(ifp, m); 594 EPAIR_UNLOCK(epair_dpcpu); 595 return (error); 596 } 597 598 static void 599 epair_qflush(struct ifnet *ifp) 600 { 601 struct epair_softc *sc; 602 603 sc = ifp->if_softc; 604 KASSERT(sc != NULL, ("%s: ifp=%p, epair_softc gone? sc=%p\n", 605 __func__, ifp, sc)); 606 /* 607 * Remove this ifp from all backpointer lists. The interface will not 608 * usable for flushing anyway nor should it have anything to flush 609 * after if_qflush(). 610 */ 611 epair_remove_ifp_from_draining(ifp); 612 613 if (sc->if_qflush) 614 sc->if_qflush(ifp); 615 } 616 617 static int 618 epair_media_change(struct ifnet *ifp __unused) 619 { 620 621 /* Do nothing. */ 622 return (0); 623 } 624 625 static void 626 epair_media_status(struct ifnet *ifp __unused, struct ifmediareq *imr) 627 { 628 629 imr->ifm_status = IFM_AVALID | IFM_ACTIVE; 630 imr->ifm_active = IFM_ETHER | IFM_10G_T | IFM_FDX; 631 } 632 633 static int 634 epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 635 { 636 struct epair_softc *sc; 637 struct ifreq *ifr; 638 int error; 639 640 ifr = (struct ifreq *)data; 641 switch (cmd) { 642 case SIOCSIFFLAGS: 643 case SIOCADDMULTI: 644 case SIOCDELMULTI: 645 error = 0; 646 break; 647 648 case SIOCSIFMEDIA: 649 case SIOCGIFMEDIA: 650 sc = ifp->if_softc; 651 error = ifmedia_ioctl(ifp, ifr, &sc->media, cmd); 652 break; 653 654 case SIOCSIFMTU: 655 /* We basically allow all kinds of MTUs. */ 656 ifp->if_mtu = ifr->ifr_mtu; 657 error = 0; 658 break; 659 660 default: 661 /* Let the common ethernet handler process this. */ 662 error = ether_ioctl(ifp, cmd, data); 663 break; 664 } 665 666 return (error); 667 } 668 669 static void 670 epair_init(void *dummy __unused) 671 { 672 } 673 674 675 /* 676 * Interface cloning functions. 677 * We use our private ones so that we can create/destroy our secondary 678 * device along with the primary one. 679 */ 680 static int 681 epair_clone_match(struct if_clone *ifc, const char *name) 682 { 683 const char *cp; 684 685 DPRINTF("name='%s'\n", name); 686 687 /* 688 * Our base name is epair. 689 * Our interfaces will be named epair<n>[ab]. 690 * So accept anything of the following list: 691 * - epair 692 * - epair<n> 693 * but not the epair<n>[ab] versions. 694 */ 695 if (strncmp(epairname, name, sizeof(epairname)-1) != 0) 696 return (0); 697 698 for (cp = name + sizeof(epairname) - 1; *cp != '\0'; cp++) { 699 if (*cp < '0' || *cp > '9') 700 return (0); 701 } 702 703 return (1); 704 } 705 706 static int 707 epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) 708 { 709 struct epair_softc *sca, *scb; 710 struct ifnet *ifp; 711 char *dp; 712 int error, unit, wildcard; 713 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ 714 715 /* 716 * We are abusing params to create our second interface. 717 * Actually we already created it and called if_clone_create() 718 * for it to do the official insertion procedure the moment we knew 719 * it cannot fail anymore. So just do attach it here. 720 */ 721 if (params) { 722 scb = (struct epair_softc *)params; 723 ifp = scb->ifp; 724 /* Assign a hopefully unique, locally administered etheraddr. */ 725 eaddr[0] = 0x02; 726 eaddr[3] = (ifp->if_index >> 8) & 0xff; 727 eaddr[4] = ifp->if_index & 0xff; 728 eaddr[5] = 0x0b; 729 ether_ifattach(ifp, eaddr); 730 /* Correctly set the name for the cloner list. */ 731 strlcpy(name, scb->ifp->if_xname, len); 732 return (0); 733 } 734 735 /* Try to see if a special unit was requested. */ 736 error = ifc_name2unit(name, &unit); 737 if (error != 0) 738 return (error); 739 wildcard = (unit < 0); 740 741 error = ifc_alloc_unit(ifc, &unit); 742 if (error != 0) 743 return (error); 744 745 /* 746 * If no unit had been given, we need to adjust the ifName. 747 * Also make sure there is space for our extra [ab] suffix. 748 */ 749 for (dp = name; *dp != '\0'; dp++); 750 if (wildcard) { 751 error = snprintf(dp, len - (dp - name), "%d", unit); 752 if (error > len - (dp - name) - 1) { 753 /* ifName too long. */ 754 ifc_free_unit(ifc, unit); 755 return (ENOSPC); 756 } 757 dp += error; 758 } 759 if (len - (dp - name) - 1 < 1) { 760 /* No space left for our [ab] suffix. */ 761 ifc_free_unit(ifc, unit); 762 return (ENOSPC); 763 } 764 *dp = 'b'; 765 /* Must not change dp so we can replace 'a' by 'b' later. */ 766 *(dp+1) = '\0'; 767 768 /* Check if 'a' and 'b' interfaces already exist. */ 769 if (ifunit(name) != NULL) 770 return (EEXIST); 771 *dp = 'a'; 772 if (ifunit(name) != NULL) 773 return (EEXIST); 774 775 /* Allocate memory for both [ab] interfaces */ 776 sca = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 777 EPAIR_REFCOUNT_INIT(&sca->refcount, 1); 778 sca->ifp = if_alloc(IFT_ETHER); 779 if (sca->ifp == NULL) { 780 free(sca, M_EPAIR); 781 ifc_free_unit(ifc, unit); 782 return (ENOSPC); 783 } 784 785 scb = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 786 EPAIR_REFCOUNT_INIT(&scb->refcount, 1); 787 scb->ifp = if_alloc(IFT_ETHER); 788 if (scb->ifp == NULL) { 789 free(scb, M_EPAIR); 790 if_free(sca->ifp); 791 free(sca, M_EPAIR); 792 ifc_free_unit(ifc, unit); 793 return (ENOSPC); 794 } 795 796 /* 797 * Cross-reference the interfaces so we will be able to free both. 798 */ 799 sca->oifp = scb->ifp; 800 scb->oifp = sca->ifp; 801 802 /* 803 * Calculate the cpuid for netisr queueing based on the 804 * ifIndex of the interfaces. As long as we cannot configure 805 * this or use cpuset information easily we cannot guarantee 806 * cache locality but we can at least allow parallelism. 807 */ 808 sca->cpuid = 809 netisr_get_cpuid(sca->ifp->if_index % netisr_get_cpucount()); 810 scb->cpuid = 811 netisr_get_cpuid(scb->ifp->if_index % netisr_get_cpucount()); 812 813 /* Finish initialization of interface <n>a. */ 814 ifp = sca->ifp; 815 ifp->if_softc = sca; 816 strlcpy(ifp->if_xname, name, IFNAMSIZ); 817 ifp->if_dname = epairname; 818 ifp->if_dunit = unit; 819 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 820 ifp->if_capabilities = IFCAP_VLAN_MTU; 821 ifp->if_capenable = IFCAP_VLAN_MTU; 822 ifp->if_start = epair_start; 823 ifp->if_ioctl = epair_ioctl; 824 ifp->if_init = epair_init; 825 ifp->if_snd.ifq_maxlen = ifqmaxlen; 826 /* Assign a hopefully unique, locally administered etheraddr. */ 827 eaddr[0] = 0x02; 828 eaddr[3] = (ifp->if_index >> 8) & 0xff; 829 eaddr[4] = ifp->if_index & 0xff; 830 eaddr[5] = 0x0a; 831 ether_ifattach(ifp, eaddr); 832 sca->if_qflush = ifp->if_qflush; 833 ifp->if_qflush = epair_qflush; 834 ifp->if_transmit = epair_transmit; 835 ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */ 836 837 /* Swap the name and finish initialization of interface <n>b. */ 838 *dp = 'b'; 839 840 ifp = scb->ifp; 841 ifp->if_softc = scb; 842 strlcpy(ifp->if_xname, name, IFNAMSIZ); 843 ifp->if_dname = epairname; 844 ifp->if_dunit = unit; 845 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 846 ifp->if_capabilities = IFCAP_VLAN_MTU; 847 ifp->if_capenable = IFCAP_VLAN_MTU; 848 ifp->if_start = epair_start; 849 ifp->if_ioctl = epair_ioctl; 850 ifp->if_init = epair_init; 851 ifp->if_snd.ifq_maxlen = ifqmaxlen; 852 /* We need to play some tricks here for the second interface. */ 853 strlcpy(name, epairname, len); 854 error = if_clone_create(name, len, (caddr_t)scb); 855 if (error) 856 panic("%s: if_clone_create() for our 2nd iface failed: %d", 857 __func__, error); 858 scb->if_qflush = ifp->if_qflush; 859 ifp->if_qflush = epair_qflush; 860 ifp->if_transmit = epair_transmit; 861 ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */ 862 863 /* 864 * Restore name to <n>a as the ifp for this will go into the 865 * cloner list for the initial call. 866 */ 867 strlcpy(name, sca->ifp->if_xname, len); 868 DPRINTF("name='%s/%db' created sca=%p scb=%p\n", name, unit, sca, scb); 869 870 /* Initialise pseudo media types. */ 871 ifmedia_init(&sca->media, 0, epair_media_change, epair_media_status); 872 ifmedia_add(&sca->media, IFM_ETHER | IFM_10G_T, 0, NULL); 873 ifmedia_set(&sca->media, IFM_ETHER | IFM_10G_T); 874 ifmedia_init(&scb->media, 0, epair_media_change, epair_media_status); 875 ifmedia_add(&scb->media, IFM_ETHER | IFM_10G_T, 0, NULL); 876 ifmedia_set(&scb->media, IFM_ETHER | IFM_10G_T); 877 878 /* Tell the world, that we are ready to rock. */ 879 sca->ifp->if_drv_flags |= IFF_DRV_RUNNING; 880 scb->ifp->if_drv_flags |= IFF_DRV_RUNNING; 881 if_link_state_change(sca->ifp, LINK_STATE_UP); 882 if_link_state_change(scb->ifp, LINK_STATE_UP); 883 884 return (0); 885 } 886 887 static int 888 epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) 889 { 890 struct ifnet *oifp; 891 struct epair_softc *sca, *scb; 892 int unit, error; 893 894 DPRINTF("ifp=%p\n", ifp); 895 896 /* 897 * In case we called into if_clone_destroyif() ourselves 898 * again to remove the second interface, the softc will be 899 * NULL. In that case so not do anything but return success. 900 */ 901 if (ifp->if_softc == NULL) 902 return (0); 903 904 unit = ifp->if_dunit; 905 sca = ifp->if_softc; 906 oifp = sca->oifp; 907 scb = oifp->if_softc; 908 909 DPRINTF("ifp=%p oifp=%p\n", ifp, oifp); 910 if_link_state_change(ifp, LINK_STATE_DOWN); 911 if_link_state_change(oifp, LINK_STATE_DOWN); 912 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 913 oifp->if_drv_flags &= ~IFF_DRV_RUNNING; 914 915 /* 916 * Get rid of our second half. As the other of the two 917 * interfaces may reside in a different vnet, we need to 918 * switch before freeing them. 919 */ 920 CURVNET_SET_QUIET(oifp->if_vnet); 921 ether_ifdetach(oifp); 922 /* 923 * Wait for all packets to be dispatched to if_input. 924 * The numbers can only go down as the interface is 925 * detached so there is no need to use atomics. 926 */ 927 DPRINTF("scb refcnt=%u\n", scb->refcount); 928 EPAIR_REFCOUNT_ASSERT(scb->refcount == 1, 929 ("%s: ifp=%p scb->refcount!=1: %d", __func__, oifp, scb->refcount)); 930 oifp->if_softc = NULL; 931 error = if_clone_destroyif(ifc, oifp); 932 if (error) 933 panic("%s: if_clone_destroyif() for our 2nd iface failed: %d", 934 __func__, error); 935 if_free(oifp); 936 ifmedia_removeall(&scb->media); 937 free(scb, M_EPAIR); 938 CURVNET_RESTORE(); 939 940 ether_ifdetach(ifp); 941 /* 942 * Wait for all packets to be dispatched to if_input. 943 */ 944 DPRINTF("sca refcnt=%u\n", sca->refcount); 945 EPAIR_REFCOUNT_ASSERT(sca->refcount == 1, 946 ("%s: ifp=%p sca->refcount!=1: %d", __func__, ifp, sca->refcount)); 947 if_free(ifp); 948 ifmedia_removeall(&sca->media); 949 free(sca, M_EPAIR); 950 ifc_free_unit(ifc, unit); 951 952 return (0); 953 } 954 955 static void 956 vnet_epair_init(const void *unused __unused) 957 { 958 959 V_epair_cloner = if_clone_advanced(epairname, 0, 960 epair_clone_match, epair_clone_create, epair_clone_destroy); 961 } 962 VNET_SYSINIT(vnet_epair_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, 963 vnet_epair_init, NULL); 964 965 static void 966 vnet_epair_uninit(const void *unused __unused) 967 { 968 969 if_clone_detach(V_epair_cloner); 970 } 971 VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, 972 vnet_epair_uninit, NULL); 973 974 static int 975 epair_modevent(module_t mod, int type, void *data) 976 { 977 int qlimit; 978 979 switch (type) { 980 case MOD_LOAD: 981 /* For now limit us to one global mutex and one inq. */ 982 epair_dpcpu_init(); 983 epair_nh.nh_qlimit = 42 * ifqmaxlen; /* 42 shall be the number. */ 984 if (TUNABLE_INT_FETCH("net.link.epair.netisr_maxqlen", &qlimit)) 985 epair_nh.nh_qlimit = qlimit; 986 netisr_register(&epair_nh); 987 if (bootverbose) 988 printf("%s initialized.\n", epairname); 989 break; 990 case MOD_UNLOAD: 991 netisr_unregister(&epair_nh); 992 epair_dpcpu_detach(); 993 if (bootverbose) 994 printf("%s unloaded.\n", epairname); 995 break; 996 default: 997 return (EOPNOTSUPP); 998 } 999 return (0); 1000 } 1001 1002 static moduledata_t epair_mod = { 1003 "if_epair", 1004 epair_modevent, 1005 0 1006 }; 1007 1008 DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 1009 MODULE_VERSION(if_epair, 1); 1010