1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2008 The FreeBSD Foundation
5 * Copyright (c) 2009-2021 Bjoern A. Zeeb <bz@FreeBSD.org>
6 *
7 * This software was developed by CK Software GmbH under sponsorship
8 * from the FreeBSD Foundation.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*
33 * A pair of virtual back-to-back connected ethernet like interfaces
34 * (``two interfaces with a virtual cross-over cable'').
35 *
36 * This is mostly intended to be used to provide connectivity between
37 * different virtual network stack instances.
38 */
39
40 #include <sys/cdefs.h>
41 #include "opt_rss.h"
42 #include "opt_inet.h"
43 #include "opt_inet6.h"
44
45 #include <sys/param.h>
46 #include <sys/bus.h>
47 #include <sys/hash.h>
48 #include <sys/interrupt.h>
49 #include <sys/jail.h>
50 #include <sys/kernel.h>
51 #include <sys/libkern.h>
52 #include <sys/malloc.h>
53 #include <sys/mbuf.h>
54 #include <sys/module.h>
55 #include <sys/proc.h>
56 #include <sys/queue.h>
57 #include <sys/sched.h>
58 #include <sys/smp.h>
59 #include <sys/socket.h>
60 #include <sys/sockio.h>
61 #include <sys/sysctl.h>
62 #include <sys/taskqueue.h>
63
64 #include <net/bpf.h>
65 #include <net/ethernet.h>
66 #include <net/if.h>
67 #include <net/if_var.h>
68 #include <net/if_clone.h>
69 #include <net/if_media.h>
70 #include <net/if_private.h>
71 #include <net/if_types.h>
72 #include <net/netisr.h>
73 #ifdef RSS
74 #include <net/rss_config.h>
75 #ifdef INET
76 #include <netinet/in_rss.h>
77 #endif
78 #ifdef INET6
79 #include <netinet6/in6_rss.h>
80 #endif
81 #endif
82 #include <net/vnet.h>
83
84 static const char epairname[] = "epair";
85 #define RXRSIZE 4096 /* Probably overkill by 4-8x. */
86
87 static MALLOC_DEFINE(M_EPAIR, epairname,
88 "Pair of virtual cross-over connected Ethernet-like interfaces");
89
90 VNET_DEFINE_STATIC(struct if_clone *, epair_cloner);
91 #define V_epair_cloner VNET(epair_cloner)
92
93 static unsigned int next_index = 0;
94 #define EPAIR_LOCK_INIT() mtx_init(&epair_n_index_mtx, "epairidx", \
95 NULL, MTX_DEF)
96 #define EPAIR_LOCK_DESTROY() mtx_destroy(&epair_n_index_mtx)
97 #define EPAIR_LOCK() mtx_lock(&epair_n_index_mtx)
98 #define EPAIR_UNLOCK() mtx_unlock(&epair_n_index_mtx)
99
100 SYSCTL_DECL(_net_link);
101 static SYSCTL_NODE(_net_link, OID_AUTO, epair, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
102 "Pair of virtual cross-over connected Ethernet-like interfaces");
103
104 static bool use_ether_gen_addr = true;
105 SYSCTL_BOOL(_net_link_epair, OID_AUTO, ether_gen_addr, CTLFLAG_RWTUN,
106 &use_ether_gen_addr, false,
107 "Generate MAC with FreeBSD OUI using ether_gen_addr(9)");
108
109 struct epair_softc;
110 struct epair_queue {
111 struct mtx mtx;
112 struct mbufq q;
113 int id;
114 enum {
115 EPAIR_QUEUE_IDLE,
116 EPAIR_QUEUE_WAKING,
117 EPAIR_QUEUE_RUNNING,
118 } state;
119 struct task tx_task;
120 struct epair_softc *sc;
121 };
122
123 static struct mtx epair_n_index_mtx;
124 struct epair_softc {
125 struct ifnet *ifp; /* This ifp. */
126 struct ifnet *oifp; /* other ifp of pair. */
127 int num_queues;
128 struct epair_queue *queues;
129 struct ifmedia media; /* Media config (fake). */
130 STAILQ_ENTRY(epair_softc) entry;
131 };
132
133 struct epair_tasks_t {
134 int tasks;
135 struct taskqueue *tq[MAXCPU];
136 };
137
138 static struct epair_tasks_t epair_tasks;
139
140 static void
epair_clear_mbuf(struct mbuf * m)141 epair_clear_mbuf(struct mbuf *m)
142 {
143 M_ASSERTPKTHDR(m);
144
145 /* Remove any CSUM_SND_TAG as ether_input will barf. */
146 if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) {
147 m_snd_tag_rele(m->m_pkthdr.snd_tag);
148 m->m_pkthdr.snd_tag = NULL;
149 m->m_pkthdr.csum_flags &= ~CSUM_SND_TAG;
150 }
151
152 /* Clear vlan information. */
153 m->m_flags &= ~M_VLANTAG;
154 m->m_pkthdr.ether_vtag = 0;
155
156 m_tag_delete_nonpersistent(m);
157 }
158
159 static void
epair_tx_start_deferred(void * arg,int pending)160 epair_tx_start_deferred(void *arg, int pending)
161 {
162 struct epair_queue *q = (struct epair_queue *)arg;
163 if_t ifp;
164 struct mbuf *m, *n;
165 bool resched;
166
167 ifp = q->sc->ifp;
168
169 if_ref(ifp);
170 CURVNET_SET(ifp->if_vnet);
171
172 mtx_lock(&q->mtx);
173 m = mbufq_flush(&q->q);
174 q->state = EPAIR_QUEUE_RUNNING;
175 mtx_unlock(&q->mtx);
176
177 while (m != NULL) {
178 n = STAILQ_NEXT(m, m_stailqpkt);
179 m->m_nextpkt = NULL;
180 if_input(ifp, m);
181 m = n;
182 }
183
184 /*
185 * Avoid flushing the queue more than once per task. We can otherwise
186 * end up starving ourselves in a multi-epair routing configuration.
187 */
188 mtx_lock(&q->mtx);
189 if (!mbufq_empty(&q->q)) {
190 resched = true;
191 q->state = EPAIR_QUEUE_WAKING;
192 } else {
193 resched = false;
194 q->state = EPAIR_QUEUE_IDLE;
195 }
196 mtx_unlock(&q->mtx);
197
198 if (resched)
199 taskqueue_enqueue(epair_tasks.tq[q->id], &q->tx_task);
200
201 CURVNET_RESTORE();
202 if_rele(ifp);
203 }
204
205 static struct epair_queue *
epair_select_queue(struct epair_softc * sc,struct mbuf * m)206 epair_select_queue(struct epair_softc *sc, struct mbuf *m)
207 {
208 uint32_t bucket;
209 #ifdef RSS
210 struct ether_header *eh;
211 int ret;
212
213 ret = rss_m2bucket(m, &bucket);
214 if (ret) {
215 /* Actually hash the packet. */
216 eh = mtod(m, struct ether_header *);
217
218 switch (ntohs(eh->ether_type)) {
219 #ifdef INET
220 case ETHERTYPE_IP:
221 rss_soft_m2cpuid_v4(m, 0, &bucket);
222 break;
223 #endif
224 #ifdef INET6
225 case ETHERTYPE_IPV6:
226 rss_soft_m2cpuid_v6(m, 0, &bucket);
227 break;
228 #endif
229 default:
230 bucket = 0;
231 break;
232 }
233 }
234 bucket %= sc->num_queues;
235 #else
236 bucket = 0;
237 #endif
238 return (&sc->queues[bucket]);
239 }
240
241 static void
epair_prepare_mbuf(struct mbuf * m,struct ifnet * src_ifp)242 epair_prepare_mbuf(struct mbuf *m, struct ifnet *src_ifp)
243 {
244 M_ASSERTPKTHDR(m);
245 epair_clear_mbuf(m);
246 if_setrcvif(m, src_ifp);
247 M_SETFIB(m, src_ifp->if_fib);
248
249 MPASS(m->m_nextpkt == NULL);
250 MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
251 }
252
253 static void
epair_menq(struct mbuf * m,struct epair_softc * osc)254 epair_menq(struct mbuf *m, struct epair_softc *osc)
255 {
256 struct epair_queue *q;
257 struct ifnet *ifp, *oifp;
258 int error, len;
259 bool mcast;
260
261 /*
262 * I know this looks weird. We pass the "other sc" as we need that one
263 * and can get both ifps from it as well.
264 */
265 oifp = osc->ifp;
266 ifp = osc->oifp;
267
268 epair_prepare_mbuf(m, oifp);
269
270 /* Save values as once the mbuf is queued, it's not ours anymore. */
271 len = m->m_pkthdr.len;
272 mcast = (m->m_flags & (M_BCAST | M_MCAST)) != 0;
273
274 q = epair_select_queue(osc, m);
275
276 mtx_lock(&q->mtx);
277 if (q->state == EPAIR_QUEUE_IDLE) {
278 q->state = EPAIR_QUEUE_WAKING;
279 taskqueue_enqueue(epair_tasks.tq[q->id], &q->tx_task);
280 }
281 error = mbufq_enqueue(&q->q, m);
282 mtx_unlock(&q->mtx);
283
284 if (error != 0) {
285 m_freem(m);
286 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
287 } else {
288 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
289 if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
290 if (mcast)
291 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
292 if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1);
293 }
294 }
295
296 static void
epair_start(struct ifnet * ifp)297 epair_start(struct ifnet *ifp)
298 {
299 struct mbuf *m;
300 struct epair_softc *sc;
301 struct ifnet *oifp;
302
303 /*
304 * We get packets here from ether_output via if_handoff()
305 * and need to put them into the input queue of the oifp
306 * and will put the packet into the receive-queue (rxq) of the
307 * other interface (oifp) of our pair.
308 */
309 sc = ifp->if_softc;
310 oifp = sc->oifp;
311 sc = oifp->if_softc;
312 for (;;) {
313 IFQ_DEQUEUE(&ifp->if_snd, m);
314 if (m == NULL)
315 break;
316 M_ASSERTPKTHDR(m);
317 BPF_MTAP(ifp, m);
318
319 /* In case either interface is not usable drop the packet. */
320 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
321 (ifp->if_flags & IFF_UP) == 0 ||
322 (oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
323 (oifp->if_flags & IFF_UP) == 0) {
324 m_freem(m);
325 continue;
326 }
327
328 epair_menq(m, sc);
329 }
330 }
331
332 static int
epair_transmit(struct ifnet * ifp,struct mbuf * m)333 epair_transmit(struct ifnet *ifp, struct mbuf *m)
334 {
335 struct epair_softc *sc;
336 struct ifnet *oifp;
337 #ifdef ALTQ
338 int len;
339 bool mcast;
340 #endif
341
342 if (m == NULL)
343 return (0);
344 M_ASSERTPKTHDR(m);
345
346 /*
347 * We could just transmit this, but it makes testing easier if we're a
348 * little bit more like real hardware.
349 * Allow just that little bit extra for ethernet (and vlan) headers.
350 */
351 if (m->m_pkthdr.len > (ifp->if_mtu + sizeof(struct ether_vlan_header))) {
352 m_freem(m);
353 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
354 return (E2BIG);
355 }
356
357 /*
358 * We are not going to use the interface en/dequeue mechanism
359 * on the TX side. We are called from ether_output_frame()
360 * and will put the packet into the receive-queue (rxq) of the
361 * other interface (oifp) of our pair.
362 */
363 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
364 m_freem(m);
365 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
366 return (ENXIO);
367 }
368 if ((ifp->if_flags & IFF_UP) == 0) {
369 m_freem(m);
370 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
371 return (ENETDOWN);
372 }
373
374 BPF_MTAP(ifp, m);
375
376 /*
377 * In case the outgoing interface is not usable,
378 * drop the packet.
379 */
380 sc = ifp->if_softc;
381 oifp = sc->oifp;
382 if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
383 (oifp->if_flags & IFF_UP) == 0) {
384 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
385 m_freem(m);
386 return (0);
387 }
388
389 #ifdef ALTQ
390 len = m->m_pkthdr.len;
391 mcast = (m->m_flags & (M_BCAST | M_MCAST)) != 0;
392 int error = 0;
393
394 /* Support ALTQ via the classic if_start() path. */
395 IF_LOCK(&ifp->if_snd);
396 if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
397 ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error);
398 if (error)
399 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
400 IF_UNLOCK(&ifp->if_snd);
401 if (!error) {
402 if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
403 if (mcast)
404 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
405 epair_start(ifp);
406 }
407 return (error);
408 }
409 IF_UNLOCK(&ifp->if_snd);
410 #endif
411
412 epair_menq(m, oifp->if_softc);
413 return (0);
414 }
415
416 static void
epair_qflush(struct ifnet * ifp __unused)417 epair_qflush(struct ifnet *ifp __unused)
418 {
419 }
420
421 static int
epair_media_change(struct ifnet * ifp __unused)422 epair_media_change(struct ifnet *ifp __unused)
423 {
424
425 /* Do nothing. */
426 return (0);
427 }
428
429 static void
epair_media_status(struct ifnet * ifp __unused,struct ifmediareq * imr)430 epair_media_status(struct ifnet *ifp __unused, struct ifmediareq *imr)
431 {
432
433 imr->ifm_status = IFM_AVALID | IFM_ACTIVE;
434 imr->ifm_active = IFM_ETHER | IFM_10G_T | IFM_FDX;
435 }
436
437 static int
epair_ioctl(struct ifnet * ifp,u_long cmd,caddr_t data)438 epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
439 {
440 struct epair_softc *sc;
441 struct ifreq *ifr;
442 int error;
443
444 ifr = (struct ifreq *)data;
445 switch (cmd) {
446 case SIOCSIFFLAGS:
447 case SIOCADDMULTI:
448 case SIOCDELMULTI:
449 error = 0;
450 break;
451
452 case SIOCSIFMEDIA:
453 case SIOCGIFMEDIA:
454 sc = ifp->if_softc;
455 error = ifmedia_ioctl(ifp, ifr, &sc->media, cmd);
456 break;
457
458 case SIOCSIFMTU:
459 /* We basically allow all kinds of MTUs. */
460 ifp->if_mtu = ifr->ifr_mtu;
461 error = 0;
462 break;
463
464 default:
465 /* Let the common ethernet handler process this. */
466 error = ether_ioctl(ifp, cmd, data);
467 break;
468 }
469
470 return (error);
471 }
472
473 static void
epair_init(void * dummy __unused)474 epair_init(void *dummy __unused)
475 {
476 }
477
478 /*
479 * Interface cloning functions.
480 * We use our private ones so that we can create/destroy our secondary
481 * device along with the primary one.
482 */
483 static int
epair_clone_match(struct if_clone * ifc,const char * name)484 epair_clone_match(struct if_clone *ifc, const char *name)
485 {
486 const char *cp;
487
488 /*
489 * Our base name is epair.
490 * Our interfaces will be named epair<n>[ab].
491 * So accept anything of the following list:
492 * - epair
493 * - epair<n>
494 * but not the epair<n>[ab] versions.
495 */
496 if (strncmp(epairname, name, sizeof(epairname)-1) != 0)
497 return (0);
498
499 for (cp = name + sizeof(epairname) - 1; *cp != '\0'; cp++) {
500 if (*cp < '0' || *cp > '9')
501 return (0);
502 }
503
504 return (1);
505 }
506
507 static void
epair_generate_mac_byname(struct epair_softc * sc,uint8_t eaddr[])508 epair_generate_mac_byname(struct epair_softc *sc, uint8_t eaddr[])
509 {
510 struct ether_addr gen_eaddr;
511 int i;
512
513 ether_gen_addr_byname(if_name(sc->ifp), &gen_eaddr);
514 for (i = 0; i < ETHER_ADDR_LEN; i++)
515 eaddr[i] = gen_eaddr.octet[i];
516 }
517
518 static void
epair_clone_add(struct if_clone * ifc,struct epair_softc * scb)519 epair_clone_add(struct if_clone *ifc, struct epair_softc *scb)
520 {
521 struct ifnet *ifp;
522 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */
523
524 ifp = scb->ifp;
525 if (!use_ether_gen_addr) {
526 /* Copy epairNa etheraddr and change the last byte. */
527 memcpy(eaddr, scb->oifp->if_hw_addr, ETHER_ADDR_LEN);
528 eaddr[5] = 0x0b;
529 } else
530 epair_generate_mac_byname(scb, eaddr);
531 ether_ifattach(ifp, eaddr);
532
533 if_clone_addif(ifc, ifp);
534 }
535
536 static struct epair_softc *
epair_alloc_sc(struct if_clone * ifc)537 epair_alloc_sc(struct if_clone *ifc)
538 {
539 struct epair_softc *sc;
540
541 struct ifnet *ifp = if_alloc(IFT_ETHER);
542 sc = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO);
543 sc->ifp = ifp;
544 sc->num_queues = epair_tasks.tasks;
545 sc->queues = mallocarray(sc->num_queues, sizeof(struct epair_queue),
546 M_EPAIR, M_WAITOK);
547 for (int i = 0; i < sc->num_queues; i++) {
548 struct epair_queue *q = &sc->queues[i];
549 q->id = i;
550 q->state = EPAIR_QUEUE_IDLE;
551 mtx_init(&q->mtx, "epairq", NULL, MTX_DEF | MTX_NEW);
552 mbufq_init(&q->q, RXRSIZE);
553 q->sc = sc;
554 NET_TASK_INIT(&q->tx_task, 0, epair_tx_start_deferred, q);
555 }
556
557 /* Initialise pseudo media types. */
558 ifmedia_init(&sc->media, 0, epair_media_change, epair_media_status);
559 ifmedia_add(&sc->media, IFM_ETHER | IFM_10G_T, 0, NULL);
560 ifmedia_set(&sc->media, IFM_ETHER | IFM_10G_T);
561
562 return (sc);
563 }
564
565 static void
epair_setup_ifp(struct epair_softc * sc,char * name,int unit)566 epair_setup_ifp(struct epair_softc *sc, char *name, int unit)
567 {
568 struct ifnet *ifp = sc->ifp;
569
570 ifp->if_softc = sc;
571 strlcpy(ifp->if_xname, name, IFNAMSIZ);
572 ifp->if_dname = epairname;
573 ifp->if_dunit = unit;
574 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
575 ifp->if_capabilities = IFCAP_VLAN_MTU;
576 ifp->if_capenable = IFCAP_VLAN_MTU;
577 ifp->if_transmit = epair_transmit;
578 ifp->if_qflush = epair_qflush;
579 ifp->if_start = epair_start;
580 ifp->if_ioctl = epair_ioctl;
581 ifp->if_init = epair_init;
582 if_setsendqlen(ifp, ifqmaxlen);
583 if_setsendqready(ifp);
584
585 ifp->if_baudrate = IF_Gbps(10); /* arbitrary maximum */
586 }
587
588 static void
epair_generate_mac(struct epair_softc * sc,uint8_t * eaddr)589 epair_generate_mac(struct epair_softc *sc, uint8_t *eaddr)
590 {
591 uint32_t key[3];
592 uint32_t hash;
593 uint64_t hostid;
594
595 EPAIR_LOCK();
596 #ifdef SMP
597 /* Get an approximate distribution. */
598 hash = next_index % mp_ncpus;
599 #else
600 hash = 0;
601 #endif
602 EPAIR_UNLOCK();
603
604 /*
605 * Calculate the etheraddr hashing the hostid and the
606 * interface index. The result would be hopefully unique.
607 * Note that the "a" component of an epair instance may get moved
608 * to a different VNET after creation. In that case its index
609 * will be freed and the index can get reused by new epair instance.
610 * Make sure we do not create same etheraddr again.
611 */
612 getcredhostid(curthread->td_ucred, (unsigned long *)&hostid);
613 if (hostid == 0)
614 arc4rand(&hostid, sizeof(hostid), 0);
615
616 struct ifnet *ifp = sc->ifp;
617 EPAIR_LOCK();
618 if (ifp->if_index > next_index)
619 next_index = ifp->if_index;
620 else
621 next_index++;
622
623 key[0] = (uint32_t)next_index;
624 EPAIR_UNLOCK();
625 key[1] = (uint32_t)(hostid & 0xffffffff);
626 key[2] = (uint32_t)((hostid >> 32) & 0xfffffffff);
627 hash = jenkins_hash32(key, 3, 0);
628
629 eaddr[0] = 0x02;
630 memcpy(&eaddr[1], &hash, 4);
631 eaddr[5] = 0x0a;
632 }
633
634 static void
epair_free_sc(struct epair_softc * sc)635 epair_free_sc(struct epair_softc *sc)
636 {
637
638 if_free(sc->ifp);
639 ifmedia_removeall(&sc->media);
640 for (int i = 0; i < sc->num_queues; i++) {
641 struct epair_queue *q = &sc->queues[i];
642 mtx_destroy(&q->mtx);
643 }
644 free(sc->queues, M_EPAIR);
645 free(sc, M_EPAIR);
646 }
647
648 static void
epair_set_state(struct ifnet * ifp,bool running)649 epair_set_state(struct ifnet *ifp, bool running)
650 {
651 if (running) {
652 ifp->if_drv_flags |= IFF_DRV_RUNNING;
653 if_link_state_change(ifp, LINK_STATE_UP);
654 } else {
655 if_link_state_change(ifp, LINK_STATE_DOWN);
656 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
657 }
658 }
659
660 static int
epair_handle_unit(struct if_clone * ifc,char * name,size_t len,int * punit)661 epair_handle_unit(struct if_clone *ifc, char *name, size_t len, int *punit)
662 {
663 int error = 0, unit, wildcard;
664 char *dp;
665
666 /* Try to see if a special unit was requested. */
667 error = ifc_name2unit(name, &unit);
668 if (error != 0)
669 return (error);
670 wildcard = (unit < 0);
671
672 error = ifc_alloc_unit(ifc, &unit);
673 if (error != 0)
674 return (error);
675
676 /*
677 * If no unit had been given, we need to adjust the ifName.
678 * Also make sure there is space for our extra [ab] suffix.
679 */
680 for (dp = name; *dp != '\0'; dp++);
681 if (wildcard) {
682 int slen = snprintf(dp, len - (dp - name), "%d", unit);
683 if (slen > len - (dp - name) - 1) {
684 /* ifName too long. */
685 error = ENOSPC;
686 goto done;
687 }
688 dp += slen;
689 }
690 if (len - (dp - name) - 1 < 1) {
691 /* No space left for our [ab] suffix. */
692 error = ENOSPC;
693 goto done;
694 }
695 *dp = 'b';
696 /* Must not change dp so we can replace 'a' by 'b' later. */
697 *(dp+1) = '\0';
698
699 /* Check if 'a' and 'b' interfaces already exist. */
700 if (ifunit(name) != NULL) {
701 error = EEXIST;
702 goto done;
703 }
704
705 *dp = 'a';
706 if (ifunit(name) != NULL) {
707 error = EEXIST;
708 goto done;
709 }
710 *punit = unit;
711 done:
712 if (error != 0)
713 ifc_free_unit(ifc, unit);
714
715 return (error);
716 }
717
718 static int
epair_clone_create(struct if_clone * ifc,char * name,size_t len,struct ifc_data * ifd,struct ifnet ** ifpp)719 epair_clone_create(struct if_clone *ifc, char *name, size_t len,
720 struct ifc_data *ifd, struct ifnet **ifpp)
721 {
722 struct epair_softc *sca, *scb;
723 struct ifnet *ifp;
724 char *dp;
725 int error, unit;
726 uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */
727
728 error = epair_handle_unit(ifc, name, len, &unit);
729 if (error != 0)
730 return (error);
731
732 /* Allocate memory for both [ab] interfaces */
733 sca = epair_alloc_sc(ifc);
734 scb = epair_alloc_sc(ifc);
735
736 /*
737 * Cross-reference the interfaces so we will be able to free both.
738 */
739 sca->oifp = scb->ifp;
740 scb->oifp = sca->ifp;
741
742 /* Finish initialization of interface <n>a. */
743 ifp = sca->ifp;
744 epair_setup_ifp(sca, name, unit);
745 if (!use_ether_gen_addr)
746 epair_generate_mac(sca, eaddr);
747 else
748 epair_generate_mac_byname(sca, eaddr);
749
750 ether_ifattach(ifp, eaddr);
751
752 /* Swap the name and finish initialization of interface <n>b. */
753 dp = name + strlen(name) - 1;
754 *dp = 'b';
755
756 epair_setup_ifp(scb, name, unit);
757
758 ifp = scb->ifp;
759 /* We need to play some tricks here for the second interface. */
760 strlcpy(name, epairname, len);
761 /* Correctly set the name for the cloner list. */
762 strlcpy(name, scb->ifp->if_xname, len);
763
764 epair_clone_add(ifc, scb);
765
766 /*
767 * Restore name to <n>a as the ifp for this will go into the
768 * cloner list for the initial call.
769 */
770 strlcpy(name, sca->ifp->if_xname, len);
771
772 /* Tell the world, that we are ready to rock. */
773 epair_set_state(sca->ifp, true);
774 epair_set_state(scb->ifp, true);
775
776 *ifpp = sca->ifp;
777
778 return (0);
779 }
780
781 static void
epair_drain_rings(struct epair_softc * sc)782 epair_drain_rings(struct epair_softc *sc)
783 {
784 for (int i = 0; i < sc->num_queues; i++) {
785 struct epair_queue *q;
786 struct mbuf *m, *n;
787
788 q = &sc->queues[i];
789 mtx_lock(&q->mtx);
790 m = mbufq_flush(&q->q);
791 mtx_unlock(&q->mtx);
792
793 for (; m != NULL; m = n) {
794 n = m->m_nextpkt;
795 m_freem(m);
796 }
797 }
798 }
799
800 static int
epair_clone_destroy(struct if_clone * ifc,struct ifnet * ifp,uint32_t flags)801 epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags)
802 {
803 struct ifnet *oifp;
804 struct epair_softc *sca, *scb;
805 int unit, error;
806
807 /*
808 * In case we called into if_clone_destroyif() ourselves
809 * again to remove the second interface, the softc will be
810 * NULL. In that case so not do anything but return success.
811 */
812 if (ifp->if_softc == NULL)
813 return (0);
814
815 unit = ifp->if_dunit;
816 sca = ifp->if_softc;
817 oifp = sca->oifp;
818 scb = oifp->if_softc;
819
820 /* Frist get the interfaces down and detached. */
821 epair_set_state(ifp, false);
822 epair_set_state(oifp, false);
823
824 ether_ifdetach(ifp);
825 ether_ifdetach(oifp);
826
827 /* Third free any queued packets and all the resources. */
828 CURVNET_SET_QUIET(oifp->if_vnet);
829 epair_drain_rings(scb);
830 oifp->if_softc = NULL;
831 error = if_clone_destroyif(ifc, oifp);
832 if (error)
833 panic("%s: if_clone_destroyif() for our 2nd iface failed: %d",
834 __func__, error);
835 epair_free_sc(scb);
836 CURVNET_RESTORE();
837
838 epair_drain_rings(sca);
839 epair_free_sc(sca);
840
841 /* Last free the cloner unit. */
842 ifc_free_unit(ifc, unit);
843
844 return (0);
845 }
846
847 static void
vnet_epair_init(const void * unused __unused)848 vnet_epair_init(const void *unused __unused)
849 {
850 struct if_clone_addreq req = {
851 .match_f = epair_clone_match,
852 .create_f = epair_clone_create,
853 .destroy_f = epair_clone_destroy,
854 };
855 V_epair_cloner = ifc_attach_cloner(epairname, &req);
856 }
857 VNET_SYSINIT(vnet_epair_init, SI_SUB_PSEUDO, SI_ORDER_ANY,
858 vnet_epair_init, NULL);
859
860 static void
vnet_epair_uninit(const void * unused __unused)861 vnet_epair_uninit(const void *unused __unused)
862 {
863
864 ifc_detach_cloner(V_epair_cloner);
865 }
866 VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
867 vnet_epair_uninit, NULL);
868
869 static int
epair_mod_init(void)870 epair_mod_init(void)
871 {
872 char name[32];
873 epair_tasks.tasks = 0;
874
875 #ifdef RSS
876 int cpu;
877
878 CPU_FOREACH(cpu) {
879 cpuset_t cpu_mask;
880
881 /* Pin to this CPU so we get appropriate NUMA allocations. */
882 thread_lock(curthread);
883 sched_bind(curthread, cpu);
884 thread_unlock(curthread);
885
886 snprintf(name, sizeof(name), "epair_task_%d", cpu);
887
888 epair_tasks.tq[cpu] = taskqueue_create(name, M_WAITOK,
889 taskqueue_thread_enqueue,
890 &epair_tasks.tq[cpu]);
891 CPU_SETOF(cpu, &cpu_mask);
892 taskqueue_start_threads_cpuset(&epair_tasks.tq[cpu], 1, PI_NET,
893 &cpu_mask, "%s", name);
894
895 epair_tasks.tasks++;
896 }
897 thread_lock(curthread);
898 sched_unbind(curthread);
899 thread_unlock(curthread);
900 #else
901 snprintf(name, sizeof(name), "epair_task");
902
903 epair_tasks.tq[0] = taskqueue_create(name, M_WAITOK,
904 taskqueue_thread_enqueue,
905 &epair_tasks.tq[0]);
906 taskqueue_start_threads(&epair_tasks.tq[0], 1, PI_NET, "%s", name);
907
908 epair_tasks.tasks = 1;
909 #endif
910
911 return (0);
912 }
913
914 static void
epair_mod_cleanup(void)915 epair_mod_cleanup(void)
916 {
917
918 for (int i = 0; i < epair_tasks.tasks; i++) {
919 taskqueue_drain_all(epair_tasks.tq[i]);
920 taskqueue_free(epair_tasks.tq[i]);
921 }
922 }
923
924 static int
epair_modevent(module_t mod,int type,void * data)925 epair_modevent(module_t mod, int type, void *data)
926 {
927 int ret;
928
929 switch (type) {
930 case MOD_LOAD:
931 EPAIR_LOCK_INIT();
932 ret = epair_mod_init();
933 if (ret != 0)
934 return (ret);
935 if (bootverbose)
936 printf("%s: %s initialized.\n", __func__, epairname);
937 break;
938 case MOD_UNLOAD:
939 epair_mod_cleanup();
940 EPAIR_LOCK_DESTROY();
941 if (bootverbose)
942 printf("%s: %s unloaded.\n", __func__, epairname);
943 break;
944 default:
945 return (EOPNOTSUPP);
946 }
947 return (0);
948 }
949
950 static moduledata_t epair_mod = {
951 "if_epair",
952 epair_modevent,
953 0
954 };
955
956 DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE);
957 MODULE_VERSION(if_epair, 3);
958