1 /*- 2 * Copyright (c) 2008 The FreeBSD Foundation 3 * Copyright (c) 2009-2010 Bjoern A. Zeeb <bz@FreeBSD.org> 4 * All rights reserved. 5 * 6 * This software was developed by CK Software GmbH under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 /* 32 * A pair of virtual back-to-back connected ethernet like interfaces 33 * (``two interfaces with a virtual cross-over cable''). 34 * 35 * This is mostly intended to be used to provide connectivity between 36 * different virtual network stack instances. 37 */ 38 /* 39 * Things to re-think once we have more experience: 40 * - ifp->if_reassign function once we can test with vimage. Depending on 41 * how if_vmove() is going to be improved. 42 * - Real random etheraddrs that are checked to be uniquish; we would need 43 * to re-do them in case we move the interface between network stacks 44 * in a private if_reassign function. 45 * In case we bridge to a real interface/network or between indepedent 46 * epairs on multiple stacks/machines, we may need this. 47 * For now let the user handle that case. 48 */ 49 50 #include <sys/cdefs.h> 51 __FBSDID("$FreeBSD$"); 52 53 #include <sys/param.h> 54 #include <sys/kernel.h> 55 #include <sys/malloc.h> 56 #include <sys/mbuf.h> 57 #include <sys/module.h> 58 #include <sys/refcount.h> 59 #include <sys/queue.h> 60 #include <sys/smp.h> 61 #include <sys/socket.h> 62 #include <sys/sockio.h> 63 #include <sys/sysctl.h> 64 #include <sys/types.h> 65 66 #include <net/bpf.h> 67 #include <net/ethernet.h> 68 #include <net/if.h> 69 #include <net/if_var.h> 70 #include <net/if_clone.h> 71 #include <net/if_media.h> 72 #include <net/if_var.h> 73 #include <net/if_types.h> 74 #include <net/netisr.h> 75 #include <net/vnet.h> 76 77 SYSCTL_DECL(_net_link); 78 static SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW, 0, "epair sysctl"); 79 80 #ifdef EPAIR_DEBUG 81 static int epair_debug = 0; 82 SYSCTL_INT(_net_link_epair, OID_AUTO, epair_debug, CTLFLAG_RW, 83 &epair_debug, 0, "if_epair(4) debugging."); 84 #define DPRINTF(fmt, arg...) \ 85 if (epair_debug) \ 86 printf("[%s:%d] " fmt, __func__, __LINE__, ##arg) 87 #else 88 #define DPRINTF(fmt, arg...) 89 #endif 90 91 static void epair_nh_sintr(struct mbuf *); 92 static struct mbuf *epair_nh_m2cpuid(struct mbuf *, uintptr_t, u_int *); 93 static void epair_nh_drainedcpu(u_int); 94 95 static void epair_start_locked(struct ifnet *); 96 static int epair_media_change(struct ifnet *); 97 static void epair_media_status(struct ifnet *, struct ifmediareq *); 98 99 static int epair_clone_match(struct if_clone *, const char *); 100 static int epair_clone_create(struct if_clone *, char *, size_t, caddr_t); 101 static int epair_clone_destroy(struct if_clone *, struct ifnet *); 102 103 static const char epairname[] = "epair"; 104 105 /* Netisr related definitions and sysctl. */ 106 static struct netisr_handler epair_nh = { 107 .nh_name = epairname, 108 .nh_proto = NETISR_EPAIR, 109 .nh_policy = NETISR_POLICY_CPU, 110 .nh_handler = epair_nh_sintr, 111 .nh_m2cpuid = epair_nh_m2cpuid, 112 .nh_drainedcpu = epair_nh_drainedcpu, 113 }; 114 115 static int 116 sysctl_epair_netisr_maxqlen(SYSCTL_HANDLER_ARGS) 117 { 118 int error, qlimit; 119 120 netisr_getqlimit(&epair_nh, &qlimit); 121 error = sysctl_handle_int(oidp, &qlimit, 0, req); 122 if (error || !req->newptr) 123 return (error); 124 if (qlimit < 1) 125 return (EINVAL); 126 return (netisr_setqlimit(&epair_nh, qlimit)); 127 } 128 SYSCTL_PROC(_net_link_epair, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW, 129 0, 0, sysctl_epair_netisr_maxqlen, "I", 130 "Maximum if_epair(4) netisr \"hw\" queue length"); 131 132 struct epair_softc { 133 struct ifnet *ifp; /* This ifp. */ 134 struct ifnet *oifp; /* other ifp of pair. */ 135 struct ifmedia media; /* Media config (fake). */ 136 u_int refcount; /* # of mbufs in flight. */ 137 u_int cpuid; /* CPU ID assigned upon creation. */ 138 void (*if_qflush)(struct ifnet *); 139 /* Original if_qflush routine. */ 140 }; 141 142 /* 143 * Per-CPU list of ifps with data in the ifq that needs to be flushed 144 * to the netisr ``hw'' queue before we allow any further direct queuing 145 * to the ``hw'' queue. 146 */ 147 struct epair_ifp_drain { 148 STAILQ_ENTRY(epair_ifp_drain) ifp_next; 149 struct ifnet *ifp; 150 }; 151 STAILQ_HEAD(eid_list, epair_ifp_drain); 152 153 #define EPAIR_LOCK_INIT(dpcpu) mtx_init(&(dpcpu)->if_epair_mtx, \ 154 "if_epair", NULL, MTX_DEF) 155 #define EPAIR_LOCK_DESTROY(dpcpu) mtx_destroy(&(dpcpu)->if_epair_mtx) 156 #define EPAIR_LOCK_ASSERT(dpcpu) mtx_assert(&(dpcpu)->if_epair_mtx, \ 157 MA_OWNED) 158 #define EPAIR_LOCK(dpcpu) mtx_lock(&(dpcpu)->if_epair_mtx) 159 #define EPAIR_UNLOCK(dpcpu) mtx_unlock(&(dpcpu)->if_epair_mtx) 160 161 #ifdef INVARIANTS 162 #define EPAIR_REFCOUNT_INIT(r, v) refcount_init((r), (v)) 163 #define EPAIR_REFCOUNT_AQUIRE(r) refcount_acquire((r)) 164 #define EPAIR_REFCOUNT_RELEASE(r) refcount_release((r)) 165 #define EPAIR_REFCOUNT_ASSERT(a, p) KASSERT(a, p) 166 #else 167 #define EPAIR_REFCOUNT_INIT(r, v) 168 #define EPAIR_REFCOUNT_AQUIRE(r) 169 #define EPAIR_REFCOUNT_RELEASE(r) 170 #define EPAIR_REFCOUNT_ASSERT(a, p) 171 #endif 172 173 static MALLOC_DEFINE(M_EPAIR, epairname, 174 "Pair of virtual cross-over connected Ethernet-like interfaces"); 175 176 static VNET_DEFINE(struct if_clone *, epair_cloner); 177 #define V_epair_cloner VNET(epair_cloner) 178 179 /* 180 * DPCPU area and functions. 181 */ 182 struct epair_dpcpu { 183 struct mtx if_epair_mtx; /* Per-CPU locking. */ 184 int epair_drv_flags; /* Per-CPU ``hw'' drv flags. */ 185 struct eid_list epair_ifp_drain_list; /* Per-CPU list of ifps with 186 * data in the ifq. */ 187 }; 188 DPCPU_DEFINE(struct epair_dpcpu, epair_dpcpu); 189 190 static void 191 epair_dpcpu_init(void) 192 { 193 struct epair_dpcpu *epair_dpcpu; 194 struct eid_list *s; 195 u_int cpuid; 196 197 CPU_FOREACH(cpuid) { 198 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 199 200 /* Initialize per-cpu lock. */ 201 EPAIR_LOCK_INIT(epair_dpcpu); 202 203 /* Driver flags are per-cpu as are our netisr "hw" queues. */ 204 epair_dpcpu->epair_drv_flags = 0; 205 206 /* 207 * Initialize per-cpu drain list. 208 * Manually do what STAILQ_HEAD_INITIALIZER would do. 209 */ 210 s = &epair_dpcpu->epair_ifp_drain_list; 211 s->stqh_first = NULL; 212 s->stqh_last = &s->stqh_first; 213 } 214 } 215 216 static void 217 epair_dpcpu_detach(void) 218 { 219 struct epair_dpcpu *epair_dpcpu; 220 u_int cpuid; 221 222 CPU_FOREACH(cpuid) { 223 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 224 225 /* Destroy per-cpu lock. */ 226 EPAIR_LOCK_DESTROY(epair_dpcpu); 227 } 228 } 229 230 /* 231 * Helper functions. 232 */ 233 static u_int 234 cpuid_from_ifp(struct ifnet *ifp) 235 { 236 struct epair_softc *sc; 237 238 if (ifp == NULL) 239 return (0); 240 sc = ifp->if_softc; 241 242 return (sc->cpuid); 243 } 244 245 /* 246 * Netisr handler functions. 247 */ 248 static void 249 epair_nh_sintr(struct mbuf *m) 250 { 251 struct ifnet *ifp; 252 struct epair_softc *sc; 253 254 ifp = m->m_pkthdr.rcvif; 255 (*ifp->if_input)(ifp, m); 256 sc = ifp->if_softc; 257 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 258 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 259 ("%s: ifp=%p sc->refcount not >= 1: %d", 260 __func__, ifp, sc->refcount)); 261 DPRINTF("ifp=%p refcount=%u\n", ifp, sc->refcount); 262 } 263 264 static struct mbuf * 265 epair_nh_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid) 266 { 267 268 *cpuid = cpuid_from_ifp(m->m_pkthdr.rcvif); 269 270 return (m); 271 } 272 273 static void 274 epair_nh_drainedcpu(u_int cpuid) 275 { 276 struct epair_dpcpu *epair_dpcpu; 277 struct epair_ifp_drain *elm, *tvar; 278 struct ifnet *ifp; 279 280 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 281 EPAIR_LOCK(epair_dpcpu); 282 /* 283 * Assume our "hw" queue and possibly ifq will be emptied 284 * again. In case we will overflow the "hw" queue while 285 * draining, epair_start_locked will set IFF_DRV_OACTIVE 286 * again and we will stop and return. 287 */ 288 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list, 289 ifp_next, tvar) { 290 ifp = elm->ifp; 291 epair_dpcpu->epair_drv_flags &= ~IFF_DRV_OACTIVE; 292 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 293 epair_start_locked(ifp); 294 295 IFQ_LOCK(&ifp->if_snd); 296 if (IFQ_IS_EMPTY(&ifp->if_snd)) { 297 struct epair_softc *sc; 298 299 STAILQ_REMOVE(&epair_dpcpu->epair_ifp_drain_list, 300 elm, epair_ifp_drain, ifp_next); 301 /* The cached ifp goes off the list. */ 302 sc = ifp->if_softc; 303 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 304 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 305 ("%s: ifp=%p sc->refcount not >= 1: %d", 306 __func__, ifp, sc->refcount)); 307 free(elm, M_EPAIR); 308 } 309 IFQ_UNLOCK(&ifp->if_snd); 310 311 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) != 0) { 312 /* Our "hw"q overflew again. */ 313 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 314 DPRINTF("hw queue length overflow at %u\n", 315 epair_nh.nh_qlimit); 316 break; 317 } 318 } 319 EPAIR_UNLOCK(epair_dpcpu); 320 } 321 322 /* 323 * Network interface (`if') related functions. 324 */ 325 static void 326 epair_remove_ifp_from_draining(struct ifnet *ifp) 327 { 328 struct epair_dpcpu *epair_dpcpu; 329 struct epair_ifp_drain *elm, *tvar; 330 u_int cpuid; 331 332 CPU_FOREACH(cpuid) { 333 epair_dpcpu = DPCPU_ID_PTR(cpuid, epair_dpcpu); 334 EPAIR_LOCK(epair_dpcpu); 335 STAILQ_FOREACH_SAFE(elm, &epair_dpcpu->epair_ifp_drain_list, 336 ifp_next, tvar) { 337 if (ifp == elm->ifp) { 338 struct epair_softc *sc; 339 340 STAILQ_REMOVE( 341 &epair_dpcpu->epair_ifp_drain_list, elm, 342 epair_ifp_drain, ifp_next); 343 /* The cached ifp goes off the list. */ 344 sc = ifp->if_softc; 345 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 346 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 347 ("%s: ifp=%p sc->refcount not >= 1: %d", 348 __func__, ifp, sc->refcount)); 349 free(elm, M_EPAIR); 350 } 351 } 352 EPAIR_UNLOCK(epair_dpcpu); 353 } 354 } 355 356 static int 357 epair_add_ifp_for_draining(struct ifnet *ifp) 358 { 359 struct epair_dpcpu *epair_dpcpu; 360 struct epair_softc *sc; 361 struct epair_ifp_drain *elm = NULL; 362 363 sc = ifp->if_softc; 364 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 365 EPAIR_LOCK_ASSERT(epair_dpcpu); 366 STAILQ_FOREACH(elm, &epair_dpcpu->epair_ifp_drain_list, ifp_next) 367 if (elm->ifp == ifp) 368 break; 369 /* If the ifp is there already, return success. */ 370 if (elm != NULL) 371 return (0); 372 373 elm = malloc(sizeof(struct epair_ifp_drain), M_EPAIR, M_NOWAIT|M_ZERO); 374 if (elm == NULL) 375 return (ENOMEM); 376 377 elm->ifp = ifp; 378 /* Add a reference for the ifp pointer on the list. */ 379 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 380 STAILQ_INSERT_TAIL(&epair_dpcpu->epair_ifp_drain_list, elm, ifp_next); 381 382 return (0); 383 } 384 385 static void 386 epair_start_locked(struct ifnet *ifp) 387 { 388 struct epair_dpcpu *epair_dpcpu; 389 struct mbuf *m; 390 struct epair_softc *sc; 391 struct ifnet *oifp; 392 int error; 393 394 DPRINTF("ifp=%p\n", ifp); 395 sc = ifp->if_softc; 396 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 397 EPAIR_LOCK_ASSERT(epair_dpcpu); 398 399 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 400 return; 401 if ((ifp->if_flags & IFF_UP) == 0) 402 return; 403 404 /* 405 * We get patckets here from ether_output via if_handoff() 406 * and ned to put them into the input queue of the oifp 407 * and call oifp->if_input() via netisr/epair_sintr(). 408 */ 409 oifp = sc->oifp; 410 sc = oifp->if_softc; 411 for (;;) { 412 IFQ_DEQUEUE(&ifp->if_snd, m); 413 if (m == NULL) 414 break; 415 BPF_MTAP(ifp, m); 416 417 /* 418 * In case the outgoing interface is not usable, 419 * drop the packet. 420 */ 421 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 422 (oifp->if_flags & IFF_UP) ==0) { 423 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 424 m_freem(m); 425 continue; 426 } 427 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname); 428 429 /* 430 * Add a reference so the interface cannot go while the 431 * packet is in transit as we rely on rcvif to stay valid. 432 */ 433 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 434 m->m_pkthdr.rcvif = oifp; 435 CURVNET_SET_QUIET(oifp->if_vnet); 436 error = netisr_queue(NETISR_EPAIR, m); 437 CURVNET_RESTORE(); 438 if (!error) { 439 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 440 /* Someone else received the packet. */ 441 if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1); 442 } else { 443 /* The packet was freed already. */ 444 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 445 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 446 (void) epair_add_ifp_for_draining(ifp); 447 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 448 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 449 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 450 ("%s: ifp=%p sc->refcount not >= 1: %d", 451 __func__, oifp, sc->refcount)); 452 } 453 } 454 } 455 456 static void 457 epair_start(struct ifnet *ifp) 458 { 459 struct epair_dpcpu *epair_dpcpu; 460 461 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu); 462 EPAIR_LOCK(epair_dpcpu); 463 epair_start_locked(ifp); 464 EPAIR_UNLOCK(epair_dpcpu); 465 } 466 467 static int 468 epair_transmit_locked(struct ifnet *ifp, struct mbuf *m) 469 { 470 struct epair_dpcpu *epair_dpcpu; 471 struct epair_softc *sc; 472 struct ifnet *oifp; 473 int error, len; 474 short mflags; 475 476 DPRINTF("ifp=%p m=%p\n", ifp, m); 477 sc = ifp->if_softc; 478 epair_dpcpu = DPCPU_ID_PTR(sc->cpuid, epair_dpcpu); 479 EPAIR_LOCK_ASSERT(epair_dpcpu); 480 481 if (m == NULL) 482 return (0); 483 484 /* 485 * We are not going to use the interface en/dequeue mechanism 486 * on the TX side. We are called from ether_output_frame() 487 * and will put the packet into the incoming queue of the 488 * other interface of our pair via the netsir. 489 */ 490 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 491 m_freem(m); 492 return (ENXIO); 493 } 494 if ((ifp->if_flags & IFF_UP) == 0) { 495 m_freem(m); 496 return (ENETDOWN); 497 } 498 499 BPF_MTAP(ifp, m); 500 501 /* 502 * In case the outgoing interface is not usable, 503 * drop the packet. 504 */ 505 oifp = sc->oifp; 506 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 507 (oifp->if_flags & IFF_UP) ==0) { 508 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 509 m_freem(m); 510 return (0); 511 } 512 len = m->m_pkthdr.len; 513 mflags = m->m_flags; 514 DPRINTF("packet %s -> %s\n", ifp->if_xname, oifp->if_xname); 515 516 #ifdef ALTQ 517 /* Support ALTQ via the clasic if_start() path. */ 518 IF_LOCK(&ifp->if_snd); 519 if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 520 ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error); 521 if (error) 522 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 523 IF_UNLOCK(&ifp->if_snd); 524 if (!error) { 525 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 526 if (mflags & (M_BCAST|M_MCAST)) 527 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 528 529 if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) 530 epair_start_locked(ifp); 531 else 532 (void)epair_add_ifp_for_draining(ifp); 533 } 534 return (error); 535 } 536 IF_UNLOCK(&ifp->if_snd); 537 #endif 538 539 if ((epair_dpcpu->epair_drv_flags & IFF_DRV_OACTIVE) != 0) { 540 /* 541 * Our hardware queue is full, try to fall back 542 * queuing to the ifq but do not call ifp->if_start. 543 * Either we are lucky or the packet is gone. 544 */ 545 IFQ_ENQUEUE(&ifp->if_snd, m, error); 546 if (!error) 547 (void)epair_add_ifp_for_draining(ifp); 548 return (error); 549 } 550 sc = oifp->if_softc; 551 /* 552 * Add a reference so the interface cannot go while the 553 * packet is in transit as we rely on rcvif to stay valid. 554 */ 555 EPAIR_REFCOUNT_AQUIRE(&sc->refcount); 556 m->m_pkthdr.rcvif = oifp; 557 CURVNET_SET_QUIET(oifp->if_vnet); 558 error = netisr_queue(NETISR_EPAIR, m); 559 CURVNET_RESTORE(); 560 if (!error) { 561 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 562 /* 563 * IFQ_HANDOFF_ADJ/ip_handoff() update statistics, 564 * but as we bypass all this we have to duplicate 565 * the logic another time. 566 */ 567 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 568 if (mflags & (M_BCAST|M_MCAST)) 569 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 570 /* Someone else received the packet. */ 571 if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1); 572 } else { 573 /* The packet was freed already. */ 574 epair_dpcpu->epair_drv_flags |= IFF_DRV_OACTIVE; 575 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 576 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 577 EPAIR_REFCOUNT_RELEASE(&sc->refcount); 578 EPAIR_REFCOUNT_ASSERT((int)sc->refcount >= 1, 579 ("%s: ifp=%p sc->refcount not >= 1: %d", 580 __func__, oifp, sc->refcount)); 581 } 582 583 return (error); 584 } 585 586 static int 587 epair_transmit(struct ifnet *ifp, struct mbuf *m) 588 { 589 struct epair_dpcpu *epair_dpcpu; 590 int error; 591 592 epair_dpcpu = DPCPU_ID_PTR(cpuid_from_ifp(ifp), epair_dpcpu); 593 EPAIR_LOCK(epair_dpcpu); 594 error = epair_transmit_locked(ifp, m); 595 EPAIR_UNLOCK(epair_dpcpu); 596 return (error); 597 } 598 599 static void 600 epair_qflush(struct ifnet *ifp) 601 { 602 struct epair_softc *sc; 603 604 sc = ifp->if_softc; 605 KASSERT(sc != NULL, ("%s: ifp=%p, epair_softc gone? sc=%p\n", 606 __func__, ifp, sc)); 607 /* 608 * Remove this ifp from all backpointer lists. The interface will not 609 * usable for flushing anyway nor should it have anything to flush 610 * after if_qflush(). 611 */ 612 epair_remove_ifp_from_draining(ifp); 613 614 if (sc->if_qflush) 615 sc->if_qflush(ifp); 616 } 617 618 static int 619 epair_media_change(struct ifnet *ifp __unused) 620 { 621 622 /* Do nothing. */ 623 return (0); 624 } 625 626 static void 627 epair_media_status(struct ifnet *ifp __unused, struct ifmediareq *imr) 628 { 629 630 imr->ifm_status = IFM_AVALID | IFM_ACTIVE; 631 imr->ifm_active = IFM_ETHER | IFM_10G_T | IFM_FDX; 632 } 633 634 static int 635 epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 636 { 637 struct epair_softc *sc; 638 struct ifreq *ifr; 639 int error; 640 641 ifr = (struct ifreq *)data; 642 switch (cmd) { 643 case SIOCSIFFLAGS: 644 case SIOCADDMULTI: 645 case SIOCDELMULTI: 646 error = 0; 647 break; 648 649 case SIOCSIFMEDIA: 650 case SIOCGIFMEDIA: 651 sc = ifp->if_softc; 652 error = ifmedia_ioctl(ifp, ifr, &sc->media, cmd); 653 break; 654 655 case SIOCSIFMTU: 656 /* We basically allow all kinds of MTUs. */ 657 ifp->if_mtu = ifr->ifr_mtu; 658 error = 0; 659 break; 660 661 default: 662 /* Let the common ethernet handler process this. */ 663 error = ether_ioctl(ifp, cmd, data); 664 break; 665 } 666 667 return (error); 668 } 669 670 static void 671 epair_init(void *dummy __unused) 672 { 673 } 674 675 676 /* 677 * Interface cloning functions. 678 * We use our private ones so that we can create/destroy our secondary 679 * device along with the primary one. 680 */ 681 static int 682 epair_clone_match(struct if_clone *ifc, const char *name) 683 { 684 const char *cp; 685 686 DPRINTF("name='%s'\n", name); 687 688 /* 689 * Our base name is epair. 690 * Our interfaces will be named epair<n>[ab]. 691 * So accept anything of the following list: 692 * - epair 693 * - epair<n> 694 * but not the epair<n>[ab] versions. 695 */ 696 if (strncmp(epairname, name, sizeof(epairname)-1) != 0) 697 return (0); 698 699 for (cp = name + sizeof(epairname) - 1; *cp != '\0'; cp++) { 700 if (*cp < '0' || *cp > '9') 701 return (0); 702 } 703 704 return (1); 705 } 706 707 static int 708 epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) 709 { 710 struct epair_softc *sca, *scb; 711 struct ifnet *ifp; 712 char *dp; 713 int error, unit, wildcard; 714 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ 715 716 /* 717 * We are abusing params to create our second interface. 718 * Actually we already created it and called if_clone_create() 719 * for it to do the official insertion procedure the moment we knew 720 * it cannot fail anymore. So just do attach it here. 721 */ 722 if (params) { 723 scb = (struct epair_softc *)params; 724 ifp = scb->ifp; 725 /* Assign a hopefully unique, locally administered etheraddr. */ 726 eaddr[0] = 0x02; 727 eaddr[3] = (ifp->if_index >> 8) & 0xff; 728 eaddr[4] = ifp->if_index & 0xff; 729 eaddr[5] = 0x0b; 730 ether_ifattach(ifp, eaddr); 731 /* Correctly set the name for the cloner list. */ 732 strlcpy(name, scb->ifp->if_xname, len); 733 return (0); 734 } 735 736 /* Try to see if a special unit was requested. */ 737 error = ifc_name2unit(name, &unit); 738 if (error != 0) 739 return (error); 740 wildcard = (unit < 0); 741 742 error = ifc_alloc_unit(ifc, &unit); 743 if (error != 0) 744 return (error); 745 746 /* 747 * If no unit had been given, we need to adjust the ifName. 748 * Also make sure there is space for our extra [ab] suffix. 749 */ 750 for (dp = name; *dp != '\0'; dp++); 751 if (wildcard) { 752 error = snprintf(dp, len - (dp - name), "%d", unit); 753 if (error > len - (dp - name) - 1) { 754 /* ifName too long. */ 755 ifc_free_unit(ifc, unit); 756 return (ENOSPC); 757 } 758 dp += error; 759 } 760 if (len - (dp - name) - 1 < 1) { 761 /* No space left for our [ab] suffix. */ 762 ifc_free_unit(ifc, unit); 763 return (ENOSPC); 764 } 765 *dp = 'b'; 766 /* Must not change dp so we can replace 'a' by 'b' later. */ 767 *(dp+1) = '\0'; 768 769 /* Check if 'a' and 'b' interfaces already exist. */ 770 if (ifunit(name) != NULL) 771 return (EEXIST); 772 *dp = 'a'; 773 if (ifunit(name) != NULL) 774 return (EEXIST); 775 776 /* Allocate memory for both [ab] interfaces */ 777 sca = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 778 EPAIR_REFCOUNT_INIT(&sca->refcount, 1); 779 sca->ifp = if_alloc(IFT_ETHER); 780 if (sca->ifp == NULL) { 781 free(sca, M_EPAIR); 782 ifc_free_unit(ifc, unit); 783 return (ENOSPC); 784 } 785 786 scb = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); 787 EPAIR_REFCOUNT_INIT(&scb->refcount, 1); 788 scb->ifp = if_alloc(IFT_ETHER); 789 if (scb->ifp == NULL) { 790 free(scb, M_EPAIR); 791 if_free(sca->ifp); 792 free(sca, M_EPAIR); 793 ifc_free_unit(ifc, unit); 794 return (ENOSPC); 795 } 796 797 /* 798 * Cross-reference the interfaces so we will be able to free both. 799 */ 800 sca->oifp = scb->ifp; 801 scb->oifp = sca->ifp; 802 803 /* 804 * Calculate the cpuid for netisr queueing based on the 805 * ifIndex of the interfaces. As long as we cannot configure 806 * this or use cpuset information easily we cannot guarantee 807 * cache locality but we can at least allow parallelism. 808 */ 809 sca->cpuid = 810 netisr_get_cpuid(sca->ifp->if_index % netisr_get_cpucount()); 811 scb->cpuid = 812 netisr_get_cpuid(scb->ifp->if_index % netisr_get_cpucount()); 813 814 /* Initialise pseudo media types. */ 815 ifmedia_init(&sca->media, 0, epair_media_change, epair_media_status); 816 ifmedia_add(&sca->media, IFM_ETHER | IFM_10G_T, 0, NULL); 817 ifmedia_set(&sca->media, IFM_ETHER | IFM_10G_T); 818 ifmedia_init(&scb->media, 0, epair_media_change, epair_media_status); 819 ifmedia_add(&scb->media, IFM_ETHER | IFM_10G_T, 0, NULL); 820 ifmedia_set(&scb->media, IFM_ETHER | IFM_10G_T); 821 822 /* Finish initialization of interface <n>a. */ 823 ifp = sca->ifp; 824 ifp->if_softc = sca; 825 strlcpy(ifp->if_xname, name, IFNAMSIZ); 826 ifp->if_dname = epairname; 827 ifp->if_dunit = unit; 828 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 829 ifp->if_capabilities = IFCAP_VLAN_MTU; 830 ifp->if_capenable = IFCAP_VLAN_MTU; 831 ifp->if_start = epair_start; 832 ifp->if_ioctl = epair_ioctl; 833 ifp->if_init = epair_init; 834 ifp->if_snd.ifq_maxlen = ifqmaxlen; 835 /* Assign a hopefully unique, locally administered etheraddr. */ 836 eaddr[0] = 0x02; 837 eaddr[3] = (ifp->if_index >> 8) & 0xff; 838 eaddr[4] = ifp->if_index & 0xff; 839 eaddr[5] = 0x0a; 840 ether_ifattach(ifp, eaddr); 841 sca->if_qflush = ifp->if_qflush; 842 ifp->if_qflush = epair_qflush; 843 ifp->if_transmit = epair_transmit; 844 ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */ 845 846 /* Swap the name and finish initialization of interface <n>b. */ 847 *dp = 'b'; 848 849 ifp = scb->ifp; 850 ifp->if_softc = scb; 851 strlcpy(ifp->if_xname, name, IFNAMSIZ); 852 ifp->if_dname = epairname; 853 ifp->if_dunit = unit; 854 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 855 ifp->if_capabilities = IFCAP_VLAN_MTU; 856 ifp->if_capenable = IFCAP_VLAN_MTU; 857 ifp->if_start = epair_start; 858 ifp->if_ioctl = epair_ioctl; 859 ifp->if_init = epair_init; 860 ifp->if_snd.ifq_maxlen = ifqmaxlen; 861 /* We need to play some tricks here for the second interface. */ 862 strlcpy(name, epairname, len); 863 error = if_clone_create(name, len, (caddr_t)scb); 864 if (error) 865 panic("%s: if_clone_create() for our 2nd iface failed: %d", 866 __func__, error); 867 scb->if_qflush = ifp->if_qflush; 868 ifp->if_qflush = epair_qflush; 869 ifp->if_transmit = epair_transmit; 870 ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */ 871 872 /* 873 * Restore name to <n>a as the ifp for this will go into the 874 * cloner list for the initial call. 875 */ 876 strlcpy(name, sca->ifp->if_xname, len); 877 DPRINTF("name='%s/%db' created sca=%p scb=%p\n", name, unit, sca, scb); 878 879 /* Tell the world, that we are ready to rock. */ 880 sca->ifp->if_drv_flags |= IFF_DRV_RUNNING; 881 scb->ifp->if_drv_flags |= IFF_DRV_RUNNING; 882 if_link_state_change(sca->ifp, LINK_STATE_UP); 883 if_link_state_change(scb->ifp, LINK_STATE_UP); 884 885 return (0); 886 } 887 888 static int 889 epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) 890 { 891 struct ifnet *oifp; 892 struct epair_softc *sca, *scb; 893 int unit, error; 894 895 DPRINTF("ifp=%p\n", ifp); 896 897 /* 898 * In case we called into if_clone_destroyif() ourselves 899 * again to remove the second interface, the softc will be 900 * NULL. In that case so not do anything but return success. 901 */ 902 if (ifp->if_softc == NULL) 903 return (0); 904 905 unit = ifp->if_dunit; 906 sca = ifp->if_softc; 907 oifp = sca->oifp; 908 scb = oifp->if_softc; 909 910 DPRINTF("ifp=%p oifp=%p\n", ifp, oifp); 911 if_link_state_change(ifp, LINK_STATE_DOWN); 912 if_link_state_change(oifp, LINK_STATE_DOWN); 913 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 914 oifp->if_drv_flags &= ~IFF_DRV_RUNNING; 915 916 /* 917 * Get rid of our second half. As the other of the two 918 * interfaces may reside in a different vnet, we need to 919 * switch before freeing them. 920 */ 921 CURVNET_SET_QUIET(oifp->if_vnet); 922 ether_ifdetach(oifp); 923 /* 924 * Wait for all packets to be dispatched to if_input. 925 * The numbers can only go down as the interface is 926 * detached so there is no need to use atomics. 927 */ 928 DPRINTF("scb refcnt=%u\n", scb->refcount); 929 EPAIR_REFCOUNT_ASSERT(scb->refcount == 1, 930 ("%s: ifp=%p scb->refcount!=1: %d", __func__, oifp, scb->refcount)); 931 oifp->if_softc = NULL; 932 error = if_clone_destroyif(ifc, oifp); 933 if (error) 934 panic("%s: if_clone_destroyif() for our 2nd iface failed: %d", 935 __func__, error); 936 if_free(oifp); 937 ifmedia_removeall(&scb->media); 938 free(scb, M_EPAIR); 939 CURVNET_RESTORE(); 940 941 ether_ifdetach(ifp); 942 /* 943 * Wait for all packets to be dispatched to if_input. 944 */ 945 DPRINTF("sca refcnt=%u\n", sca->refcount); 946 EPAIR_REFCOUNT_ASSERT(sca->refcount == 1, 947 ("%s: ifp=%p sca->refcount!=1: %d", __func__, ifp, sca->refcount)); 948 if_free(ifp); 949 ifmedia_removeall(&sca->media); 950 free(sca, M_EPAIR); 951 ifc_free_unit(ifc, unit); 952 953 return (0); 954 } 955 956 static void 957 vnet_epair_init(const void *unused __unused) 958 { 959 960 V_epair_cloner = if_clone_advanced(epairname, 0, 961 epair_clone_match, epair_clone_create, epair_clone_destroy); 962 } 963 VNET_SYSINIT(vnet_epair_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, 964 vnet_epair_init, NULL); 965 966 static void 967 vnet_epair_uninit(const void *unused __unused) 968 { 969 970 if_clone_detach(V_epair_cloner); 971 } 972 VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, 973 vnet_epair_uninit, NULL); 974 975 static int 976 epair_modevent(module_t mod, int type, void *data) 977 { 978 int qlimit; 979 980 switch (type) { 981 case MOD_LOAD: 982 /* For now limit us to one global mutex and one inq. */ 983 epair_dpcpu_init(); 984 epair_nh.nh_qlimit = 42 * ifqmaxlen; /* 42 shall be the number. */ 985 if (TUNABLE_INT_FETCH("net.link.epair.netisr_maxqlen", &qlimit)) 986 epair_nh.nh_qlimit = qlimit; 987 netisr_register(&epair_nh); 988 if (bootverbose) 989 printf("%s initialized.\n", epairname); 990 break; 991 case MOD_UNLOAD: 992 netisr_unregister(&epair_nh); 993 epair_dpcpu_detach(); 994 if (bootverbose) 995 printf("%s unloaded.\n", epairname); 996 break; 997 default: 998 return (EOPNOTSUPP); 999 } 1000 return (0); 1001 } 1002 1003 static moduledata_t epair_mod = { 1004 "if_epair", 1005 epair_modevent, 1006 0 1007 }; 1008 1009 DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 1010 MODULE_VERSION(if_epair, 1); 1011