1 /*- 2 * Copyright (c) 2008 The FreeBSD Foundation 3 * Copyright (c) 2009-2010 Bjoern A. Zeeb <bz@FreeBSD.org> 4 * All rights reserved. 5 * 6 * This software was developed by CK Software GmbH under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 /* 32 * A pair of virtual back-to-back connected ethernet like interfaces 33 * (``two interfaces with a virtual cross-over cable''). 34 * 35 * This is mostly intended to be used to provide connectivity between 36 * different virtual network stack instances. 37 */ 38 /* 39 * Things to re-think once we have more experience: 40 * - ifp->if_reassign function once we can test with vimage. Depending on 41 * how if_vmove() is going to be improved. 42 * - Real random etheraddrs that are checked to be uniquish; we would need 43 * to re-do them in case we move the interface between network stacks 44 * in a private if_reassign function. 45 * In case we bridge to a real interface/network or between indepedent 46 * epairs on multiple stacks/machines, we may need this. 47 * For now let the user handle that case. 48 */ 49 50 #include <sys/cdefs.h> 51 __FBSDID("$FreeBSD$"); 52 53 #include <sys/param.h> 54 #include <sys/kernel.h> 55 #include <sys/malloc.h> 56 #include <sys/mbuf.h> 57 #include <sys/module.h> 58 #include <sys/refcount.h> 59 #include <sys/queue.h> 60 #include <sys/smp.h> 61 #include <sys/socket.h> 62 #include <sys/sockio.h> 63 #include <sys/sysctl.h> 64 #include <sys/types.h> 65 #include <sys/libkern.h> 66 67 #include <net/bpf.h> 68 #include <net/ethernet.h> 69 #include <net/if.h> 70 #include <net/if_var.h> 71 #include <net/if_clone.h> 72 #include <net/if_media.h> 73 #include <net/if_var.h> 74 #include <net/if_types.h> 75 #include <net/netisr.h> 76 #include <net/vnet.h> 77 78 SYSCTL_DECL(_net_link); 79 static SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW, 0, "epair sysctl"); 80 81 #ifdef EPAIR_DEBUG 82 static int epair_debug = 0; 83 SYSCTL_INT(_net_link_epair, OID_AUTO, epair_debug, CTLFLAG_RW, 84 &epair_debug, 0, "if_epair(4) debugging."); 85 #define DPRINTF(fmt, arg...) \ 86 if (epair_debug) \ 87 printf("[%s:%d] " fmt, __func__, __LINE__, ##arg) 88 #else 89 #define DPRINTF(fmt, arg...) 90 #endif 91 92 static void epair_nh_sintr(struct mbuf *); 93 static struct mbuf *epair_nh_m2cpuid(struct mbuf *, uintptr_t, u_int *); 94 static void epair_nh_drainedcpu(u_int); 95 96 static void epair_start_locked(struct ifnet *); 97 static int epair_media_change(struct ifnet *); 98 static void epair_media_status(struct ifnet *, struct ifmediareq *); 99 100 static int epair_clone_match(struct if_clone *, const char *); 101 static int epair_clone_create(struct if_clone *, char *, size_t, caddr_t); 102 static int epair_clone_destroy(struct if_clone *, struct ifnet *); 103 104 static const char epairname[] = "epair"; 105 106 /* Netisr related definitions and sysctl. */ 107 static struct netisr_handler epair_nh = { 108 .nh_name = epairname, 109 .nh_proto = NETISR_EPAIR, 110 .nh_policy = NETISR_POLICY_CPU, 111 .nh_handler = epair_nh_sintr, 112 .nh_m2cpuid = epair_nh_m2cpuid, 113 .nh_drainedcpu = epair_nh_drainedcpu, 114 }; 115 116 static int 117 sysctl_epair_netisr_maxqlen(SYSCTL_HANDLER_ARGS) 118 { 119 int error, qlimit; 120 121 netisr_getqlimit(&epair_nh, &qlimit); 122 error = sysctl_handle_int(oidp, &qlimit, 0, req); 123 if (error || !req->newptr) 124 return (error); 125 if (qlimit < 1) 126 return (EINVAL); 127 return (netisr_setqlimit(&epair_nh, qlimit)); 128 } 129 SYSCTL_PROC(_net_link_epair, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW, 130 0, 0, sysctl_epair_netisr_maxqlen, "I", 131 "Maximum if_epair(4) netisr \"hw\" queue length"); 132 133 struct epair_softc { 134 struct ifnet *ifp; /* This ifp. */ 135 struct ifnet *oifp; /* other ifp of pair. */ 136 struct ifmedia media; /* Media config (fake). */ 137 u_int refcount; /* # of mbufs in flight. */ 138 u_int cpuid; /* CPU ID assigned upon creation. */ 139 void (*if_qflush)(struct ifnet *); 140 /* Original if_qflush routine. */ 141 }; 142 143 /* 144 * Per-CPU list of ifps with data in the ifq that needs to be flushed 145 * to the netisr ``hw'' queue before we allow any further direct queuing 146 * to the ``hw'' queue. 147 */ 148 struct epair_ifp_drain { 149 STAILQ_ENTRY(epair_ifp_drain) ifp_next; 150 struct ifnet *ifp; 151 }; 152 STAILQ_HEAD(eid_list, epair_ifp_drain); 153 154 #define EPAIR_LOCK_INIT(dpcpu) mtx_init(&(dpcpu)->if_epair_mtx, \ 155 "if_epair", NULL, MTX_DEF) 156 #define EPAIR_LOCK_DESTROY(dpcpu) mtx_destroy(&(dpcpu)->if_epair_mtx) 157 #define EPAIR_LOCK_ASSERT(dpcpu) mtx_assert(&(dpcpu)->if_epair_mtx, \ 158 MA_OWNED) 159 #define EPAIR_LOCK(dpcpu) mtx_lock(&(dpcpu)->if_epair_mtx) 160 #define EPAIR_UNLOCK(dpcpu) mtx_unlock(&(dpcpu)->if_epair_mtx) 161 162 #ifdef INVARIANTS 163 #define EPAIR_REFCOUNT_INIT(r, v) refcount_init((r), (v)) 164 #define EPAIR_REFCOUNT_AQUIRE(r) refcount_acquire((r)) 165 #define EPAIR_REFCOUNT_RELEASE(r) refcount_release((r)) 166 #define EPAIR_REFCOUNT_ASSERT(a, p) KASSERT(a, p) 167 #else 168 #define EPAIR_REFCOUNT_INIT(r, v) 169 #define EPAIR_REFCOUNT_AQUIRE(r) 170 #define EPAIR_REFCOUNT_RELEASE(r) 171 #define EPAIR_REFCOUNT_ASSERT(a, p) 172 #endif 173 174 static MALLOC_DEFINE(M_EPAIR, epairname, 175 "Pair of virtual cross-over connected Ethernet-like interfaces"); 176 177 static VNET_DEFINE(struct if_clone *, epair_cloner); 178 #define V_epair_cloner VNET(epair_cloner) 179 180 /* 181 * DPCPU area and functions. 182 */ 183 struct epair_dpcpu { 184 struct mtx if_epair_mtx; /* Per-CPU locking. */ 185 int epair_drv_flags; /* Per-CPU ``hw'' drv flags. */ 186 struct eid_list epair_ifp_drain_list; /* Per-CPU list of ifps with 187 * data in the ifq. */ 188 }; 189 DPCPU_DEFINE(struct epair_dpcpu, epair_dpcpu); 190 191 static void 192 epair_dpcpu_init(void) 193 { 194 struct epair_dpcpu *epair_dpcpu; 195 struct eid_list *s; 196 u_int cpuid; 197 198 CPU_FOREACH(cpuid) { 199 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 200 201 /* Initialize per-cpu lock. */ 202 EPAIR_LOCK_INIT(epair_dpcpu); 203 204 /* Driver flags are per-cpu as are our netisr "hw" queues. */ 205 epair_dpcpu->epair_drv_flags = 0; 206 207 /* 208 * Initialize per-cpu drain list. 209 * Manually do what STAILQ_HEAD_INITIALIZER would do. 210 */ 211 s = &epair_dpcpu->epair_ifp_drain_list; 212 s->stqh_first = NULL; 213 s->stqh_last = &s->stqh_first; 214 } 215 } 216 217 static void 218 epair_dpcpu_detach(void) 219 { 220 struct epair_dpcpu *epair_dpcpu; 221 u_int cpuid; 222 223 CPU_FOREACH(cpuid) { 224 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 225 226 /* Destroy per-cpu lock. */ 227 EPAIR_LOCK_DESTROY(epair_dpcpu); 228 } 229 } 230 231 /* 232 * Helper functions. 233 */ 234 static u_int 235 cpuid_from_ifp(struct ifnet *ifp) 236 { 237 struct epair_softc *sc; 238 239 if (ifp == NULL) 240 return (0); 241 sc = ifp->if_softc; 242 243 return (sc->cpuid); 244 } 245 246 /* 247 * Netisr handler functions. 248 */ 249 static void 250 epair_nh_sintr(struct mbuf *m) 251 { 252 struct ifnet *ifp; 253 struct epair_softc *sc; 254 255 ifp = m->m_pkthdr.rcvif; 256 (*ifp->if_input)(ifp, m); 257 sc = ifp->if_softc; 258 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 259 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 260 ("%s: ifp=%p sc->refcount not >= 1: %d", 261 __func__, ifp, sc->refcount)); 262 DPRINTF("ifp=%p refcount=%u\n", ifp, sc->refcount); 263 } 264 265 static struct mbuf * 266 epair_nh_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid) 267 { 268 269 *cpuid = cpuid_from_ifp(m->m_pkthdr.rcvif); 270 271 return (m); 272 } 273 274 static void 275 epair_nh_drainedcpu(u_int cpuid) 276 { 277 struct epair_dpcpu *epair_dpcpu; 278 struct epair_ifp_drain *elm, *tvar; 279 struct ifnet *ifp; 280 281 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 282 EPAIR_LOCK(epair_dpcpu); 283 /* 284 * Assume our "hw" queue and possibly ifq will be emptied 285 * again. In case we will overflow the "hw" queue while 286 * draining, epair_start_locked will set IFF_DRV_OACTIVE 287 * again and we will stop and return. 288 */ 289 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list, 290 ifp_next, tvar) { 291 ifp = elm->ifp; 292 epair_dpcpu->epair_drv_flags &= ~IFF_DRV_OACTIVE; 293 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 294 epair_start_locked(ifp); 295 296 IFQ_LOCK(&ifp->if_snd); 297 if (IFQ_IS_EMPTY(&ifp->if_snd)) { 298 struct epair_softc *sc; 299 300 STAILQ_REMOVE(&epair_dpcpu->epair_ifp_drain_list, 301 elm, epair_ifp_drain, ifp_next); 302 /* The cached ifp goes off the list. */ 303 sc = ifp->if_softc; 304 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 305 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 306 ("%s: ifp=%p sc->refcount not >= 1: %d", 307 __func__, ifp, sc->refcount)); 308 free(elm, M_EPAIR); 309 } 310 IFQ_UNLOCK(&ifp->if_snd); 311 312 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) != 0) { 313 /* Our "hw"q overflew again. */ 314 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 315 DPRINTF("hw queue length overflow at %u\n", 316 epair_nh.nh_qlimit); 317 break; 318 } 319 } 320 EPAIR_UNLOCK(epair_dpcpu); 321 } 322 323 /* 324 * Network interface (`if') related functions. 325 */ 326 static void 327 epair_remove_ifp_from_draining(struct ifnet *ifp) 328 { 329 struct epair_dpcpu *epair_dpcpu; 330 struct epair_ifp_drain *elm, *tvar; 331 u_int cpuid; 332 333 CPU_FOREACH(cpuid) { 334 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 335 EPAIR_LOCK(epair_dpcpu); 336 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list, 337 ifp_next, tvar) { 338 if (ifp == elm->ifp) { 339 struct epair_softc *sc; 340 341 STAILQ_REMOVE( 342 &epair_dpcpu->epair_ifp_drain_list, elm, 343 epair_ifp_drain, ifp_next); 344 /* The cached ifp goes off the list. */ 345 sc = ifp->if_softc; 346 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 347 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 348 ("%s: ifp=%p sc->refcount not >= 1: %d", 349 __func__, ifp, sc->refcount)); 350 free(elm, M_EPAIR); 351 } 352 } 353 EPAIR_UNLOCK(epair_dpcpu); 354 } 355 } 356 357 static int 358 epair_add_ifp_for_draining(struct ifnet *ifp) 359 { 360 struct epair_dpcpu *epair_dpcpu; 361 struct epair_softc *sc; 362 struct epair_ifp_drain *elm = NULL; 363 364 sc = ifp->if_softc; 365 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 366 EPAIR_LOCK_ASSERT(epair_dpcpu); 367 STAILQ_FOREACH(elm, &epair_dpcpu->epair_ifp_drain_list, ifp_next) 368 if (elm->ifp == ifp) 369 break; 370 /* If the ifp is there already, return success. */ 371 if (elm != NULL) 372 return (0); 373 374 elm = malloc(sizeof(struct epair_ifp_drain), M_EPAIR, M_NOWAIT|M_ZERO); 375 if (elm == NULL) 376 return (ENOMEM); 377 378 elm->ifp = ifp; 379 /* Add a reference for the ifp pointer on the list. */ 380 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 381 STAILQ_INSERT_TAIL(&epair_dpcpu->epair_ifp_drain_list, elm, ifp_next); 382 383 return (0); 384 } 385 386 static void 387 epair_start_locked(struct ifnet *ifp) 388 { 389 struct epair_dpcpu *epair_dpcpu; 390 struct mbuf *m; 391 struct epair_softc *sc; 392 struct ifnet *oifp; 393 int error; 394 395 DPRINTF("ifp=%p\n", ifp); 396 sc = ifp->if_softc; 397 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 398 EPAIR_LOCK_ASSERT(epair_dpcpu); 399 400 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 401 return; 402 if ((ifp->if_flags & IFF_UP) == 0) 403 return; 404 405 /* 406 * We get patckets here from ether_output via if_handoff() 407 * and ned to put them into the input queue of the oifp 408 * and call oifp->if_input() via netisr/epair_sintr(). 409 */ 410 oifp = sc->oifp; 411 sc = oifp->if_softc; 412 for (;;) { 413 IFQ_DEQUEUE(&ifp->if_snd, m); 414 if (m == NULL) 415 break; 416 BPF_MTAP(ifp, m); 417 418 /* 419 * In case the outgoing interface is not usable, 420 * drop the packet. 421 */ 422 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 423 (oifp->if_flags & IFF_UP) ==0) { 424 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 425 m_freem(m); 426 continue; 427 } 428 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname); 429 430 /* 431 * Add a reference so the interface cannot go while the 432 * packet is in transit as we rely on rcvif to stay valid. 433 */ 434 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 435 m->m_pkthdr.rcvif = oifp; 436 CURVNET_SET_QUIET(oifp->if_vnet); 437 error = netisr_queue(NETISR_EPAIR, m); 438 CURVNET_RESTORE(); 439 if (!error) { 440 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 441 /* Someone else received the packet. */ 442 if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1); 443 } else { 444 /* The packet was freed already. */ 445 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 446 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 447 (void) epair_add_ifp_for_draining(ifp); 448 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 449 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 450 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 451 ("%s: ifp=%p sc->refcount not >= 1: %d", 452 __func__, oifp, sc->refcount)); 453 } 454 } 455 } 456 457 static void 458 epair_start(struct ifnet *ifp) 459 { 460 struct epair_dpcpu *epair_dpcpu; 461 462 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu); 463 EPAIR_LOCK(epair_dpcpu); 464 epair_start_locked(ifp); 465 EPAIR_UNLOCK(epair_dpcpu); 466 } 467 468 static int 469 epair_transmit_locked(struct ifnet *ifp, struct mbuf *m) 470 { 471 struct epair_dpcpu *epair_dpcpu; 472 struct epair_softc *sc; 473 struct ifnet *oifp; 474 int error, len; 475 short mflags; 476 477 DPRINTF("ifp=%p m=%p\n", ifp, m); 478 sc = ifp->if_softc; 479 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 480 EPAIR_LOCK_ASSERT(epair_dpcpu); 481 482 if (m == NULL) 483 return (0); 484 485 /* 486 * We are not going to use the interface en/dequeue mechanism 487 * on the TX side. We are called from ether_output_frame() 488 * and will put the packet into the incoming queue of the 489 * other interface of our pair via the netsir. 490 */ 491 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 492 m_freem(m); 493 return (ENXIO); 494 } 495 if ((ifp->if_flags & IFF_UP) == 0) { 496 m_freem(m); 497 return (ENETDOWN); 498 } 499 500 BPF_MTAP(ifp, m); 501 502 /* 503 * In case the outgoing interface is not usable, 504 * drop the packet. 505 */ 506 oifp = sc->oifp; 507 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 508 (oifp->if_flags & IFF_UP) ==0) { 509 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 510 m_freem(m); 511 return (0); 512 } 513 len = m->m_pkthdr.len; 514 mflags = m->m_flags; 515 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname); 516 517 #ifdef ALTQ 518 /* Support ALTQ via the classic if_start() path. */ 519 IF_LOCK(&ifp->if_snd); 520 if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 521 ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error); 522 if (error) 523 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 524 IF_UNLOCK(&ifp->if_snd); 525 if (!error) { 526 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 527 if (mflags & (M_BCAST|M_MCAST)) 528 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 529 530 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) 531 epair_start_locked(ifp); 532 else 533 (void)epair_add_ifp_for_draining(ifp); 534 } 535 return (error); 536 } 537 IF_UNLOCK(&ifp->if_snd); 538 #endif 539 540 if ((epair_dpcpu->epair_drv_flags & IFF_DRV_OACTIVE) != 0) { 541 /* 542 * Our hardware queue is full, try to fall back 543 * queuing to the ifq but do not call ifp->if_start. 544 * Either we are lucky or the packet is gone. 545 */ 546 IFQ_ENQUEUE(&ifp->if_snd, m, error); 547 if (!error) 548 (void)epair_add_ifp_for_draining(ifp); 549 return (error); 550 } 551 sc = oifp->if_softc; 552 /* 553 * Add a reference so the interface cannot go while the 554 * packet is in transit as we rely on rcvif to stay valid. 555 */ 556 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 557 m->m_pkthdr.rcvif = oifp; 558 CURVNET_SET_QUIET(oifp->if_vnet); 559 error = netisr_queue(NETISR_EPAIR, m); 560 CURVNET_RESTORE(); 561 if (!error) { 562 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 563 /* 564 * IFQ_HANDOFF_ADJ/ip_handoff() update statistics, 565 * but as we bypass all this we have to duplicate 566 * the logic another time. 567 */ 568 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 569 if (mflags & (M_BCAST|M_MCAST)) 570 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 571 /* Someone else received the packet. */ 572 if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1); 573 } else { 574 /* The packet was freed already. */ 575 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 576 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 577 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 578 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 579 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 580 ("%s: ifp=%p sc->refcount not >= 1: %d", 581 __func__, oifp, sc->refcount)); 582 } 583 584 return (error); 585 } 586 587 static int 588 epair_transmit(struct ifnet *ifp, struct mbuf *m) 589 { 590 struct epair_dpcpu *epair_dpcpu; 591 int error; 592 593 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu); 594 EPAIR_LOCK(epair_dpcpu); 595 error = epair_transmit_locked(ifp, m); 596 EPAIR_UNLOCK(epair_dpcpu); 597 return (error); 598 } 599 600 static void 601 epair_qflush(struct ifnet *ifp) 602 { 603 struct epair_softc *sc; 604 605 sc = ifp->if_softc; 606 KASSERT(sc != NULL, ("%s: ifp=%p, epair_softc gone? sc=%p\n", 607 __func__, ifp, sc)); 608 /* 609 * Remove this ifp from all backpointer lists. The interface will not 610 * usable for flushing anyway nor should it have anything to flush 611 * after if_qflush(). 612 */ 613 epair_remove_ifp_from_draining(ifp); 614 615 if (sc->if_qflush) 616 sc->if_qflush(ifp); 617 } 618 619 static int 620 epair_media_change(struct ifnet *ifp __unused) 621 { 622 623 /* Do nothing. */ 624 return (0); 625 } 626 627 static void 628 epair_media_status(struct ifnet *ifp __unused, struct ifmediareq *imr) 629 { 630 631 imr->ifm_status = IFM_AVALID | IFM_ACTIVE; 632 imr->ifm_active = IFM_ETHER | IFM_10G_T | IFM_FDX; 633 } 634 635 static int 636 epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 637 { 638 struct epair_softc *sc; 639 struct ifreq *ifr; 640 int error; 641 642 ifr = (struct ifreq *)data; 643 switch (cmd) { 644 case SIOCSIFFLAGS: 645 case SIOCADDMULTI: 646 case SIOCDELMULTI: 647 error = 0; 648 break; 649 650 case SIOCSIFMEDIA: 651 case SIOCGIFMEDIA: 652 sc = ifp->if_softc; 653 error = ifmedia_ioctl(ifp, ifr, &sc->media, cmd); 654 break; 655 656 case SIOCSIFMTU: 657 /* We basically allow all kinds of MTUs. */ 658 ifp->if_mtu = ifr->ifr_mtu; 659 error = 0; 660 break; 661 662 default: 663 /* Let the common ethernet handler process this. */ 664 error = ether_ioctl(ifp, cmd, data); 665 break; 666 } 667 668 return (error); 669 } 670 671 static void 672 epair_init(void *dummy __unused) 673 { 674 } 675 676 677 /* 678 * Interface cloning functions. 679 * We use our private ones so that we can create/destroy our secondary 680 * device along with the primary one. 681 */ 682 static int 683 epair_clone_match(struct if_clone *ifc, const char *name) 684 { 685 const char *cp; 686 687 DPRINTF("name='%s'\n", name); 688 689 /* 690 * Our base name is epair. 691 * Our interfaces will be named epair<n>[ab]. 692 * So accept anything of the following list: 693 * - epair 694 * - epair<n> 695 * but not the epair<n>[ab] versions. 696 */ 697 if (strncmp(epairname, name, sizeof(epairname)-1) != 0) 698 return (0); 699 700 for (cp = name + sizeof(epairname) - 1; *cp != '\0'; cp++) { 701 if (*cp < '0' || *cp > '9') 702 return (0); 703 } 704 705 return (1); 706 } 707 708 static int 709 epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) 710 { 711 struct epair_softc *sca, *scb; 712 struct ifnet *ifp; 713 char *dp; 714 int error, unit, wildcard; 715 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ 716 717 /* 718 * We are abusing params to create our second interface. 719 * Actually we already created it and called if_clone_create() 720 * for it to do the official insertion procedure the moment we knew 721 * it cannot fail anymore. So just do attach it here. 722 */ 723 if (params) { 724 scb = (struct epair_softc *)params; 725 ifp = scb->ifp; 726 /* Assign a hopefully unique, locally administered etheraddr. */ 727 eaddr[0] = 0x02; 728 eaddr[1] = arc4random() & 0xff; 729 eaddr[3] = (ifp->if_index >> 8) & 0xff; 730 eaddr[4] = ifp->if_index & 0xff; 731 eaddr[5] = 0x0b; 732 ether_ifattach(ifp, eaddr); 733 /* Correctly set the name for the cloner list. */ 734 strlcpy(name, scb->ifp->if_xname, len); 735 return (0); 736 } 737 738 /* Try to see if a special unit was requested. */ 739 error = ifc_name2unit(name, &unit); 740 if (error != 0) 741 return (error); 742 wildcard = (unit < 0); 743 744 error = ifc_alloc_unit(ifc, &unit); 745 if (error != 0) 746 return (error); 747 748 /* 749 * If no unit had been given, we need to adjust the ifName. 750 * Also make sure there is space for our extra [ab] suffix. 751 */ 752 for (dp = name; *dp != '\0'; dp++); 753 if (wildcard) { 754 error = snprintf(dp, len - (dp - name), "%d", unit); 755 if (error > len - (dp - name) - 1) { 756 /* ifName too long. */ 757 ifc_free_unit(ifc, unit); 758 return (ENOSPC); 759 } 760 dp += error; 761 } 762 if (len - (dp - name) - 1 < 1) { 763 /* No space left for our [ab] suffix. */ 764 ifc_free_unit(ifc, unit); 765 return (ENOSPC); 766 } 767 *dp = 'b'; 768 /* Must not change dp so we can replace 'a' by 'b' later. */ 769 *(dp+1) = '\0'; 770 771 /* Check if 'a' and 'b' interfaces already exist. */ 772 if (ifunit(name) != NULL) 773 return (EEXIST); 774 *dp = 'a'; 775 if (ifunit(name) != NULL) 776 return (EEXIST); 777 778 /* Allocate memory for both [ab] interfaces */ 779 sca = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 780 EPAIR_REFCOUNT_INIT(&sca->refcount, 1); 781 sca->ifp = if_alloc(IFT_ETHER); 782 if (sca->ifp == NULL) { 783 free(sca, M_EPAIR); 784 ifc_free_unit(ifc, unit); 785 return (ENOSPC); 786 } 787 788 scb = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 789 EPAIR_REFCOUNT_INIT(&scb->refcount, 1); 790 scb->ifp = if_alloc(IFT_ETHER); 791 if (scb->ifp == NULL) { 792 free(scb, M_EPAIR); 793 if_free(sca->ifp); 794 free(sca, M_EPAIR); 795 ifc_free_unit(ifc, unit); 796 return (ENOSPC); 797 } 798 799 /* 800 * Cross-reference the interfaces so we will be able to free both. 801 */ 802 sca->oifp = scb->ifp; 803 scb->oifp = sca->ifp; 804 805 /* 806 * Calculate the cpuid for netisr queueing based on the 807 * ifIndex of the interfaces. As long as we cannot configure 808 * this or use cpuset information easily we cannot guarantee 809 * cache locality but we can at least allow parallelism. 810 */ 811 sca->cpuid = 812 netisr_get_cpuid(sca->ifp->if_index); 813 scb->cpuid = 814 netisr_get_cpuid(scb->ifp->if_index); 815 816 /* Initialise pseudo media types. */ 817 ifmedia_init(&sca->media, 0, epair_media_change, epair_media_status); 818 ifmedia_add(&sca->media, IFM_ETHER | IFM_10G_T, 0, NULL); 819 ifmedia_set(&sca->media, IFM_ETHER | IFM_10G_T); 820 ifmedia_init(&scb->media, 0, epair_media_change, epair_media_status); 821 ifmedia_add(&scb->media, IFM_ETHER | IFM_10G_T, 0, NULL); 822 ifmedia_set(&scb->media, IFM_ETHER | IFM_10G_T); 823 824 /* Finish initialization of interface <n>a. */ 825 ifp = sca->ifp; 826 ifp->if_softc = sca; 827 strlcpy(ifp->if_xname, name, IFNAMSIZ); 828 ifp->if_dname = epairname; 829 ifp->if_dunit = unit; 830 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 831 ifp->if_capabilities = IFCAP_VLAN_MTU; 832 ifp->if_capenable = IFCAP_VLAN_MTU; 833 ifp->if_start = epair_start; 834 ifp->if_ioctl = epair_ioctl; 835 ifp->if_init = epair_init; 836 ifp->if_snd.ifq_maxlen = ifqmaxlen; 837 /* Assign a hopefully unique, locally administered etheraddr. */ 838 eaddr[0] = 0x02; 839 eaddr[3] = (ifp->if_index >> 8) & 0xff; 840 eaddr[4] = ifp->if_index & 0xff; 841 eaddr[5] = 0x0a; 842 ether_ifattach(ifp, eaddr); 843 sca->if_qflush = ifp->if_qflush; 844 ifp->if_qflush = epair_qflush; 845 ifp->if_transmit = epair_transmit; 846 ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */ 847 848 /* Swap the name and finish initialization of interface <n>b. */ 849 *dp = 'b'; 850 851 ifp = scb->ifp; 852 ifp->if_softc = scb; 853 strlcpy(ifp->if_xname, name, IFNAMSIZ); 854 ifp->if_dname = epairname; 855 ifp->if_dunit = unit; 856 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 857 ifp->if_capabilities = IFCAP_VLAN_MTU; 858 ifp->if_capenable = IFCAP_VLAN_MTU; 859 ifp->if_start = epair_start; 860 ifp->if_ioctl = epair_ioctl; 861 ifp->if_init = epair_init; 862 ifp->if_snd.ifq_maxlen = ifqmaxlen; 863 /* We need to play some tricks here for the second interface. */ 864 strlcpy(name, epairname, len); 865 error = if_clone_create(name, len, (caddr_t)scb); 866 if (error) 867 panic("%s: if_clone_create() for our 2nd iface failed: %d", 868 __func__, error); 869 scb->if_qflush = ifp->if_qflush; 870 ifp->if_qflush = epair_qflush; 871 ifp->if_transmit = epair_transmit; 872 ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */ 873 874 /* 875 * Restore name to <n>a as the ifp for this will go into the 876 * cloner list for the initial call. 877 */ 878 strlcpy(name, sca->ifp->if_xname, len); 879 DPRINTF("name='%s/%db' created sca=%p scb=%p\n", name, unit, sca, scb); 880 881 /* Tell the world, that we are ready to rock. */ 882 sca->ifp->if_drv_flags |= IFF_DRV_RUNNING; 883 scb->ifp->if_drv_flags |= IFF_DRV_RUNNING; 884 if_link_state_change(sca->ifp, LINK_STATE_UP); 885 if_link_state_change(scb->ifp, LINK_STATE_UP); 886 887 return (0); 888 } 889 890 static int 891 epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) 892 { 893 struct ifnet *oifp; 894 struct epair_softc *sca, *scb; 895 int unit, error; 896 897 DPRINTF("ifp=%p\n", ifp); 898 899 /* 900 * In case we called into if_clone_destroyif() ourselves 901 * again to remove the second interface, the softc will be 902 * NULL. In that case so not do anything but return success. 903 */ 904 if (ifp->if_softc == NULL) 905 return (0); 906 907 unit = ifp->if_dunit; 908 sca = ifp->if_softc; 909 oifp = sca->oifp; 910 scb = oifp->if_softc; 911 912 DPRINTF("ifp=%p oifp=%p\n", ifp, oifp); 913 if_link_state_change(ifp, LINK_STATE_DOWN); 914 if_link_state_change(oifp, LINK_STATE_DOWN); 915 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 916 oifp->if_drv_flags &= ~IFF_DRV_RUNNING; 917 918 /* 919 * Get rid of our second half. As the other of the two 920 * interfaces may reside in a different vnet, we need to 921 * switch before freeing them. 922 */ 923 CURVNET_SET_QUIET(oifp->if_vnet); 924 ether_ifdetach(oifp); 925 /* 926 * Wait for all packets to be dispatched to if_input. 927 * The numbers can only go down as the interface is 928 * detached so there is no need to use atomics. 929 */ 930 DPRINTF("scb refcnt=%u\n", scb->refcount); 931 EPAIR_REFCOUNT_ASSERT(scb->refcount == 1, 932 ("%s: ifp=%p scb->refcount!=1: %d", __func__, oifp, scb->refcount)); 933 oifp->if_softc = NULL; 934 error = if_clone_destroyif(ifc, oifp); 935 if (error) 936 panic("%s: if_clone_destroyif() for our 2nd iface failed: %d", 937 __func__, error); 938 if_free(oifp); 939 ifmedia_removeall(&scb->media); 940 free(scb, M_EPAIR); 941 CURVNET_RESTORE(); 942 943 ether_ifdetach(ifp); 944 /* 945 * Wait for all packets to be dispatched to if_input. 946 */ 947 DPRINTF("sca refcnt=%u\n", sca->refcount); 948 EPAIR_REFCOUNT_ASSERT(sca->refcount == 1, 949 ("%s: ifp=%p sca->refcount!=1: %d", __func__, ifp, sca->refcount)); 950 if_free(ifp); 951 ifmedia_removeall(&sca->media); 952 free(sca, M_EPAIR); 953 ifc_free_unit(ifc, unit); 954 955 return (0); 956 } 957 958 static void 959 vnet_epair_init(const void *unused __unused) 960 { 961 962 V_epair_cloner = if_clone_advanced(epairname, 0, 963 epair_clone_match, epair_clone_create, epair_clone_destroy); 964 #ifdef VIMAGE 965 netisr_register_vnet(&epair_nh); 966 #endif 967 } 968 VNET_SYSINIT(vnet_epair_init, SI_SUB_PSEUDO, SI_ORDER_ANY, 969 vnet_epair_init, NULL); 970 971 static void 972 vnet_epair_uninit(const void *unused __unused) 973 { 974 975 #ifdef VIMAGE 976 netisr_unregister_vnet(&epair_nh); 977 #endif 978 if_clone_detach(V_epair_cloner); 979 } 980 VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, 981 vnet_epair_uninit, NULL); 982 983 static int 984 epair_modevent(module_t mod, int type, void *data) 985 { 986 int qlimit; 987 988 switch (type) { 989 case MOD_LOAD: 990 /* For now limit us to one global mutex and one inq. */ 991 epair_dpcpu_init(); 992 epair_nh.nh_qlimit = 42 * ifqmaxlen; /* 42 shall be the number. */ 993 if (TUNABLE_INT_FETCH("net.link.epair.netisr_maxqlen", &qlimit)) 994 epair_nh.nh_qlimit = qlimit; 995 netisr_register(&epair_nh); 996 if (bootverbose) 997 printf("%s initialized.\n", epairname); 998 break; 999 case MOD_UNLOAD: 1000 netisr_unregister(&epair_nh); 1001 epair_dpcpu_detach(); 1002 if (bootverbose) 1003 printf("%s unloaded.\n", epairname); 1004 break; 1005 default: 1006 return (EOPNOTSUPP); 1007 } 1008 return (0); 1009 } 1010 1011 static moduledata_t epair_mod = { 1012 "if_epair", 1013 epair_modevent, 1014 0 1015 }; 1016 1017 DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE); 1018 MODULE_VERSION(if_epair, 1); 1019