xref: /freebsd/sys/net/if_lagg.c (revision ff0ba87247820afbdfdc1b307c803f7923d0e4d3)
1 /*	$OpenBSD: if_trunk.c,v 1.30 2007/01/31 06:20:19 reyk Exp $	*/
2 
3 /*
4  * Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org>
5  * Copyright (c) 2007 Andrew Thompson <thompsa@FreeBSD.org>
6  * Copyright (c) 2014 Marcelo Araujo <araujo@FreeBSD.org>
7  *
8  * Permission to use, copy, modify, and distribute this software for any
9  * purpose with or without fee is hereby granted, provided that the above
10  * copyright notice and this permission notice appear in all copies.
11  *
12  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19  */
20 
21 #include <sys/cdefs.h>
22 __FBSDID("$FreeBSD$");
23 
24 #include "opt_inet.h"
25 #include "opt_inet6.h"
26 
27 #include <sys/param.h>
28 #include <sys/kernel.h>
29 #include <sys/malloc.h>
30 #include <sys/mbuf.h>
31 #include <sys/queue.h>
32 #include <sys/socket.h>
33 #include <sys/sockio.h>
34 #include <sys/sysctl.h>
35 #include <sys/module.h>
36 #include <sys/priv.h>
37 #include <sys/systm.h>
38 #include <sys/proc.h>
39 #include <sys/hash.h>
40 #include <sys/lock.h>
41 #include <sys/rmlock.h>
42 #include <sys/taskqueue.h>
43 #include <sys/eventhandler.h>
44 
45 #include <net/ethernet.h>
46 #include <net/if.h>
47 #include <net/if_clone.h>
48 #include <net/if_arp.h>
49 #include <net/if_dl.h>
50 #include <net/if_media.h>
51 #include <net/if_types.h>
52 #include <net/if_var.h>
53 #include <net/bpf.h>
54 #include <net/vnet.h>
55 
56 #if defined(INET) || defined(INET6)
57 #include <netinet/in.h>
58 #include <netinet/ip.h>
59 #endif
60 #ifdef INET
61 #include <netinet/in_systm.h>
62 #include <netinet/if_ether.h>
63 #endif
64 
65 #ifdef INET6
66 #include <netinet/ip6.h>
67 #include <netinet6/in6_var.h>
68 #include <netinet6/in6_ifattach.h>
69 #endif
70 
71 #include <net/if_vlan_var.h>
72 #include <net/if_lagg.h>
73 #include <net/ieee8023ad_lacp.h>
74 
75 /* Special flags we should propagate to the lagg ports. */
76 static struct {
77 	int flag;
78 	int (*func)(struct ifnet *, int);
79 } lagg_pflags[] = {
80 	{IFF_PROMISC, ifpromisc},
81 	{IFF_ALLMULTI, if_allmulti},
82 	{0, NULL}
83 };
84 
85 VNET_DEFINE(SLIST_HEAD(__trhead, lagg_softc), lagg_list); /* list of laggs */
86 #define	V_lagg_list	VNET(lagg_list)
87 static VNET_DEFINE(struct mtx, lagg_list_mtx);
88 #define	V_lagg_list_mtx	VNET(lagg_list_mtx)
89 #define	LAGG_LIST_LOCK_INIT(x)		mtx_init(&V_lagg_list_mtx, \
90 					"if_lagg list", NULL, MTX_DEF)
91 #define	LAGG_LIST_LOCK_DESTROY(x)	mtx_destroy(&V_lagg_list_mtx)
92 #define	LAGG_LIST_LOCK(x)		mtx_lock(&V_lagg_list_mtx)
93 #define	LAGG_LIST_UNLOCK(x)		mtx_unlock(&V_lagg_list_mtx)
94 eventhandler_tag	lagg_detach_cookie = NULL;
95 
96 static int	lagg_clone_create(struct if_clone *, int, caddr_t);
97 static void	lagg_clone_destroy(struct ifnet *);
98 static VNET_DEFINE(struct if_clone *, lagg_cloner);
99 #define	V_lagg_cloner	VNET(lagg_cloner)
100 static const char laggname[] = "lagg";
101 
102 static void	lagg_lladdr(struct lagg_softc *, uint8_t *);
103 static void	lagg_capabilities(struct lagg_softc *);
104 static void	lagg_port_lladdr(struct lagg_port *, uint8_t *);
105 static void	lagg_port_setlladdr(void *, int);
106 static int	lagg_port_create(struct lagg_softc *, struct ifnet *);
107 static int	lagg_port_destroy(struct lagg_port *, int);
108 static struct mbuf *lagg_input(struct ifnet *, struct mbuf *);
109 static void	lagg_linkstate(struct lagg_softc *);
110 static void	lagg_port_state(struct ifnet *, int);
111 static int	lagg_port_ioctl(struct ifnet *, u_long, caddr_t);
112 static int	lagg_port_output(struct ifnet *, struct mbuf *,
113 		    const struct sockaddr *, struct route *);
114 static void	lagg_port_ifdetach(void *arg __unused, struct ifnet *);
115 #ifdef LAGG_PORT_STACKING
116 static int	lagg_port_checkstacking(struct lagg_softc *);
117 #endif
118 static void	lagg_port2req(struct lagg_port *, struct lagg_reqport *);
119 static void	lagg_init(void *);
120 static void	lagg_stop(struct lagg_softc *);
121 static int	lagg_ioctl(struct ifnet *, u_long, caddr_t);
122 static int	lagg_ether_setmulti(struct lagg_softc *);
123 static int	lagg_ether_cmdmulti(struct lagg_port *, int);
124 static	int	lagg_setflag(struct lagg_port *, int, int,
125 		    int (*func)(struct ifnet *, int));
126 static	int	lagg_setflags(struct lagg_port *, int status);
127 static uint64_t lagg_get_counter(struct ifnet *ifp, ift_counter cnt);
128 static int	lagg_transmit(struct ifnet *, struct mbuf *);
129 static void	lagg_qflush(struct ifnet *);
130 static int	lagg_media_change(struct ifnet *);
131 static void	lagg_media_status(struct ifnet *, struct ifmediareq *);
132 static struct lagg_port *lagg_link_active(struct lagg_softc *,
133 	    struct lagg_port *);
134 static const void *lagg_gethdr(struct mbuf *, u_int, u_int, void *);
135 
136 /* Simple round robin */
137 static void	lagg_rr_attach(struct lagg_softc *);
138 static int	lagg_rr_start(struct lagg_softc *, struct mbuf *);
139 static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *,
140 		    struct mbuf *);
141 
142 /* Active failover */
143 static int	lagg_fail_start(struct lagg_softc *, struct mbuf *);
144 static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *,
145 		    struct mbuf *);
146 
147 /* Loadbalancing */
148 static void	lagg_lb_attach(struct lagg_softc *);
149 static void	lagg_lb_detach(struct lagg_softc *);
150 static int	lagg_lb_port_create(struct lagg_port *);
151 static void	lagg_lb_port_destroy(struct lagg_port *);
152 static int	lagg_lb_start(struct lagg_softc *, struct mbuf *);
153 static struct mbuf *lagg_lb_input(struct lagg_softc *, struct lagg_port *,
154 		    struct mbuf *);
155 static int	lagg_lb_porttable(struct lagg_softc *, struct lagg_port *);
156 
157 /* Broadcast */
158 static int    lagg_bcast_start(struct lagg_softc *, struct mbuf *);
159 static struct mbuf *lagg_bcast_input(struct lagg_softc *, struct lagg_port *,
160 		    struct mbuf *);
161 
162 /* 802.3ad LACP */
163 static void	lagg_lacp_attach(struct lagg_softc *);
164 static void	lagg_lacp_detach(struct lagg_softc *);
165 static int	lagg_lacp_start(struct lagg_softc *, struct mbuf *);
166 static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *,
167 		    struct mbuf *);
168 static void	lagg_lacp_lladdr(struct lagg_softc *);
169 
170 /* lagg protocol table */
171 static const struct lagg_proto {
172 	lagg_proto	pr_num;
173 	void		(*pr_attach)(struct lagg_softc *);
174 	void		(*pr_detach)(struct lagg_softc *);
175 	int		(*pr_start)(struct lagg_softc *, struct mbuf *);
176 	struct mbuf *	(*pr_input)(struct lagg_softc *, struct lagg_port *,
177 			    struct mbuf *);
178 	int		(*pr_addport)(struct lagg_port *);
179 	void		(*pr_delport)(struct lagg_port *);
180 	void		(*pr_linkstate)(struct lagg_port *);
181 	void 		(*pr_init)(struct lagg_softc *);
182 	void 		(*pr_stop)(struct lagg_softc *);
183 	void 		(*pr_lladdr)(struct lagg_softc *);
184 	void		(*pr_request)(struct lagg_softc *, void *);
185 	void		(*pr_portreq)(struct lagg_port *, void *);
186 } lagg_protos[] = {
187     {
188 	.pr_num = LAGG_PROTO_NONE
189     },
190     {
191 	.pr_num = LAGG_PROTO_ROUNDROBIN,
192 	.pr_attach = lagg_rr_attach,
193 	.pr_start = lagg_rr_start,
194 	.pr_input = lagg_rr_input,
195     },
196     {
197 	.pr_num = LAGG_PROTO_FAILOVER,
198 	.pr_start = lagg_fail_start,
199 	.pr_input = lagg_fail_input,
200     },
201     {
202 	.pr_num = LAGG_PROTO_LOADBALANCE,
203 	.pr_attach = lagg_lb_attach,
204 	.pr_detach = lagg_lb_detach,
205 	.pr_start = lagg_lb_start,
206 	.pr_input = lagg_lb_input,
207 	.pr_addport = lagg_lb_port_create,
208 	.pr_delport = lagg_lb_port_destroy,
209     },
210     {
211 	.pr_num = LAGG_PROTO_LACP,
212 	.pr_attach = lagg_lacp_attach,
213 	.pr_detach = lagg_lacp_detach,
214 	.pr_start = lagg_lacp_start,
215 	.pr_input = lagg_lacp_input,
216 	.pr_addport = lacp_port_create,
217 	.pr_delport = lacp_port_destroy,
218 	.pr_linkstate = lacp_linkstate,
219 	.pr_init = lacp_init,
220 	.pr_stop = lacp_stop,
221 	.pr_lladdr = lagg_lacp_lladdr,
222 	.pr_request = lacp_req,
223 	.pr_portreq = lacp_portreq,
224     },
225     {
226 	.pr_num = LAGG_PROTO_ETHERCHANNEL,
227 	.pr_attach = lagg_lb_attach,
228 	.pr_detach = lagg_lb_detach,
229 	.pr_start = lagg_lb_start,
230 	.pr_input = lagg_lb_input,
231     },
232     {
233 	.pr_num = LAGG_PROTO_BROADCAST,
234 	.pr_start = lagg_bcast_start,
235 	.pr_input = lagg_bcast_input,
236     },
237 };
238 
239 SYSCTL_DECL(_net_link);
240 SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0,
241     "Link Aggregation");
242 
243 /* Allow input on any failover links */
244 static VNET_DEFINE(int, lagg_failover_rx_all);
245 #define	V_lagg_failover_rx_all	VNET(lagg_failover_rx_all)
246 SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW | CTLFLAG_VNET,
247     &VNET_NAME(lagg_failover_rx_all), 0,
248     "Accept input from any interface in a failover lagg");
249 
250 /* Default value for using M_FLOWID */
251 static VNET_DEFINE(int, def_use_flowid) = 1;
252 #define	V_def_use_flowid	VNET(def_use_flowid)
253 SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RWTUN,
254     &VNET_NAME(def_use_flowid), 0,
255     "Default setting for using flow id for load sharing");
256 
257 /* Default value for using M_FLOWID */
258 static VNET_DEFINE(int, def_flowid_shift) = 16;
259 #define	V_def_flowid_shift	VNET(def_flowid_shift)
260 SYSCTL_INT(_net_link_lagg, OID_AUTO, default_flowid_shift, CTLFLAG_RWTUN,
261     &VNET_NAME(def_flowid_shift), 0,
262     "Default setting for flowid shift for load sharing");
263 
264 static void
265 vnet_lagg_init(const void *unused __unused)
266 {
267 
268 	LAGG_LIST_LOCK_INIT();
269 	SLIST_INIT(&V_lagg_list);
270 	V_lagg_cloner = if_clone_simple(laggname, lagg_clone_create,
271 	    lagg_clone_destroy, 0);
272 }
273 VNET_SYSINIT(vnet_lagg_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
274     vnet_lagg_init, NULL);
275 
276 static void
277 vnet_lagg_uninit(const void *unused __unused)
278 {
279 
280 	if_clone_detach(V_lagg_cloner);
281 	LAGG_LIST_LOCK_DESTROY();
282 }
283 VNET_SYSUNINIT(vnet_lagg_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
284     vnet_lagg_uninit, NULL);
285 
286 static int
287 lagg_modevent(module_t mod, int type, void *data)
288 {
289 
290 	switch (type) {
291 	case MOD_LOAD:
292 		lagg_input_p = lagg_input;
293 		lagg_linkstate_p = lagg_port_state;
294 		lagg_detach_cookie = EVENTHANDLER_REGISTER(
295 		    ifnet_departure_event, lagg_port_ifdetach, NULL,
296 		    EVENTHANDLER_PRI_ANY);
297 		break;
298 	case MOD_UNLOAD:
299 		EVENTHANDLER_DEREGISTER(ifnet_departure_event,
300 		    lagg_detach_cookie);
301 		lagg_input_p = NULL;
302 		lagg_linkstate_p = NULL;
303 		break;
304 	default:
305 		return (EOPNOTSUPP);
306 	}
307 	return (0);
308 }
309 
310 static moduledata_t lagg_mod = {
311 	"if_lagg",
312 	lagg_modevent,
313 	0
314 };
315 
316 DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
317 MODULE_VERSION(if_lagg, 1);
318 
319 static void
320 lagg_proto_attach(struct lagg_softc *sc, lagg_proto pr)
321 {
322 
323 	KASSERT(sc->sc_proto == LAGG_PROTO_NONE, ("%s: sc %p has proto",
324 	    __func__, sc));
325 
326 	if (sc->sc_ifflags & IFF_DEBUG)
327 		if_printf(sc->sc_ifp, "using proto %u\n", pr);
328 
329 	if (lagg_protos[pr].pr_attach != NULL)
330 		lagg_protos[pr].pr_attach(sc);
331 	sc->sc_proto = pr;
332 }
333 
334 static void
335 lagg_proto_detach(struct lagg_softc *sc)
336 {
337 	lagg_proto pr;
338 
339 	LAGG_WLOCK_ASSERT(sc);
340 
341 	pr = sc->sc_proto;
342 	sc->sc_proto = LAGG_PROTO_NONE;
343 
344 	if (lagg_protos[pr].pr_detach != NULL)
345 		lagg_protos[pr].pr_detach(sc);
346 	else
347 		LAGG_WUNLOCK(sc);
348 }
349 
350 static int
351 lagg_proto_start(struct lagg_softc *sc, struct mbuf *m)
352 {
353 
354 	return (lagg_protos[sc->sc_proto].pr_start(sc, m));
355 }
356 
357 static struct mbuf *
358 lagg_proto_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
359 {
360 
361 	return (lagg_protos[sc->sc_proto].pr_input(sc, lp, m));
362 }
363 
364 static int
365 lagg_proto_addport(struct lagg_softc *sc, struct lagg_port *lp)
366 {
367 
368 	if (lagg_protos[sc->sc_proto].pr_addport == NULL)
369 		return (0);
370 	else
371 		return (lagg_protos[sc->sc_proto].pr_addport(lp));
372 }
373 
374 static void
375 lagg_proto_delport(struct lagg_softc *sc, struct lagg_port *lp)
376 {
377 
378 	if (lagg_protos[sc->sc_proto].pr_delport != NULL)
379 		lagg_protos[sc->sc_proto].pr_delport(lp);
380 }
381 
382 static void
383 lagg_proto_linkstate(struct lagg_softc *sc, struct lagg_port *lp)
384 {
385 
386 	if (lagg_protos[sc->sc_proto].pr_linkstate != NULL)
387 		lagg_protos[sc->sc_proto].pr_linkstate(lp);
388 }
389 
390 static void
391 lagg_proto_init(struct lagg_softc *sc)
392 {
393 
394 	if (lagg_protos[sc->sc_proto].pr_init != NULL)
395 		lagg_protos[sc->sc_proto].pr_init(sc);
396 }
397 
398 static void
399 lagg_proto_stop(struct lagg_softc *sc)
400 {
401 
402 	if (lagg_protos[sc->sc_proto].pr_stop != NULL)
403 		lagg_protos[sc->sc_proto].pr_stop(sc);
404 }
405 
406 static void
407 lagg_proto_lladdr(struct lagg_softc *sc)
408 {
409 
410 	if (lagg_protos[sc->sc_proto].pr_lladdr != NULL)
411 		lagg_protos[sc->sc_proto].pr_lladdr(sc);
412 }
413 
414 static void
415 lagg_proto_request(struct lagg_softc *sc, void *v)
416 {
417 
418 	if (lagg_protos[sc->sc_proto].pr_request != NULL)
419 		lagg_protos[sc->sc_proto].pr_request(sc, v);
420 }
421 
422 static void
423 lagg_proto_portreq(struct lagg_softc *sc, struct lagg_port *lp, void *v)
424 {
425 
426 	if (lagg_protos[sc->sc_proto].pr_portreq != NULL)
427 		lagg_protos[sc->sc_proto].pr_portreq(lp, v);
428 }
429 
430 /*
431  * This routine is run via an vlan
432  * config EVENT
433  */
434 static void
435 lagg_register_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
436 {
437 	struct lagg_softc *sc = ifp->if_softc;
438 	struct lagg_port *lp;
439 	struct rm_priotracker tracker;
440 
441 	if (ifp->if_softc !=  arg)   /* Not our event */
442 		return;
443 
444 	LAGG_RLOCK(sc, &tracker);
445 	if (!SLIST_EMPTY(&sc->sc_ports)) {
446 		SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
447 			EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag);
448 	}
449 	LAGG_RUNLOCK(sc, &tracker);
450 }
451 
452 /*
453  * This routine is run via an vlan
454  * unconfig EVENT
455  */
456 static void
457 lagg_unregister_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
458 {
459 	struct lagg_softc *sc = ifp->if_softc;
460 	struct lagg_port *lp;
461 	struct rm_priotracker tracker;
462 
463 	if (ifp->if_softc !=  arg)   /* Not our event */
464 		return;
465 
466 	LAGG_RLOCK(sc, &tracker);
467 	if (!SLIST_EMPTY(&sc->sc_ports)) {
468 		SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
469 			EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag);
470 	}
471 	LAGG_RUNLOCK(sc, &tracker);
472 }
473 
474 static int
475 lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
476 {
477 	struct lagg_softc *sc;
478 	struct ifnet *ifp;
479 	static const u_char eaddr[6];	/* 00:00:00:00:00:00 */
480 
481 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
482 	ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
483 	if (ifp == NULL) {
484 		free(sc, M_DEVBUF);
485 		return (ENOSPC);
486 	}
487 
488 	if (V_def_use_flowid)
489 		sc->sc_opts |= LAGG_OPT_USE_FLOWID;
490 	sc->flowid_shift = V_def_flowid_shift;
491 
492 	/* Hash all layers by default */
493 	sc->sc_flags = LAGG_F_HASHL2|LAGG_F_HASHL3|LAGG_F_HASHL4;
494 
495 	lagg_proto_attach(sc, LAGG_PROTO_DEFAULT);
496 
497 	LAGG_LOCK_INIT(sc);
498 	SLIST_INIT(&sc->sc_ports);
499 	TASK_INIT(&sc->sc_lladdr_task, 0, lagg_port_setlladdr, sc);
500 
501 	/* Initialise pseudo media types */
502 	ifmedia_init(&sc->sc_media, 0, lagg_media_change,
503 	    lagg_media_status);
504 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
505 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
506 
507 	if_initname(ifp, laggname, unit);
508 	ifp->if_softc = sc;
509 	ifp->if_transmit = lagg_transmit;
510 	ifp->if_qflush = lagg_qflush;
511 	ifp->if_init = lagg_init;
512 	ifp->if_ioctl = lagg_ioctl;
513 	ifp->if_get_counter = lagg_get_counter;
514 	ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
515 	ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS;
516 
517 	/*
518 	 * Attach as an ordinary ethernet device, children will be attached
519 	 * as special device IFT_IEEE8023ADLAG.
520 	 */
521 	ether_ifattach(ifp, eaddr);
522 
523 	sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
524 		lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
525 	sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
526 		lagg_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
527 
528 	/* Insert into the global list of laggs */
529 	LAGG_LIST_LOCK();
530 	SLIST_INSERT_HEAD(&V_lagg_list, sc, sc_entries);
531 	LAGG_LIST_UNLOCK();
532 
533 	return (0);
534 }
535 
536 static void
537 lagg_clone_destroy(struct ifnet *ifp)
538 {
539 	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
540 	struct lagg_port *lp;
541 
542 	LAGG_WLOCK(sc);
543 
544 	lagg_stop(sc);
545 	ifp->if_flags &= ~IFF_UP;
546 
547 	EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach);
548 	EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach);
549 
550 	/* Shutdown and remove lagg ports */
551 	while ((lp = SLIST_FIRST(&sc->sc_ports)) != NULL)
552 		lagg_port_destroy(lp, 1);
553 	/* Unhook the aggregation protocol */
554 	lagg_proto_detach(sc);
555 
556 	ifmedia_removeall(&sc->sc_media);
557 	ether_ifdetach(ifp);
558 	if_free(ifp);
559 
560 	LAGG_LIST_LOCK();
561 	SLIST_REMOVE(&V_lagg_list, sc, lagg_softc, sc_entries);
562 	LAGG_LIST_UNLOCK();
563 
564 	taskqueue_drain(taskqueue_swi, &sc->sc_lladdr_task);
565 	LAGG_LOCK_DESTROY(sc);
566 	free(sc, M_DEVBUF);
567 }
568 
569 static void
570 lagg_lladdr(struct lagg_softc *sc, uint8_t *lladdr)
571 {
572 	struct ifnet *ifp = sc->sc_ifp;
573 	struct lagg_port lp;
574 
575 	if (memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)
576 		return;
577 
578 	LAGG_WLOCK_ASSERT(sc);
579 	/*
580 	 * Set the link layer address on the lagg interface.
581 	 * lagg_proto_lladdr() notifies the MAC change to
582 	 * the aggregation protocol.  iflladdr_event handler which
583 	 * may trigger gratuitous ARPs for INET will be handled in
584 	 * a taskqueue.
585 	 */
586 	bcopy(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN);
587 	lagg_proto_lladdr(sc);
588 
589 	bzero(&lp, sizeof(lp));
590 	lp.lp_ifp = sc->sc_ifp;
591 	lp.lp_softc = sc;
592 
593 	lagg_port_lladdr(&lp, lladdr);
594 }
595 
596 static void
597 lagg_capabilities(struct lagg_softc *sc)
598 {
599 	struct lagg_port *lp;
600 	int cap = ~0, ena = ~0;
601 	u_long hwa = ~0UL;
602 	struct ifnet_hw_tsomax hw_tsomax;
603 
604 	LAGG_WLOCK_ASSERT(sc);
605 
606 	memset(&hw_tsomax, 0, sizeof(hw_tsomax));
607 
608 	/* Get capabilities from the lagg ports */
609 	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
610 		cap &= lp->lp_ifp->if_capabilities;
611 		ena &= lp->lp_ifp->if_capenable;
612 		hwa &= lp->lp_ifp->if_hwassist;
613 		if_hw_tsomax_common(lp->lp_ifp, &hw_tsomax);
614 	}
615 	cap = (cap == ~0 ? 0 : cap);
616 	ena = (ena == ~0 ? 0 : ena);
617 	hwa = (hwa == ~0 ? 0 : hwa);
618 
619 	if (sc->sc_ifp->if_capabilities != cap ||
620 	    sc->sc_ifp->if_capenable != ena ||
621 	    sc->sc_ifp->if_hwassist != hwa ||
622 	    if_hw_tsomax_update(sc->sc_ifp, &hw_tsomax) != 0) {
623 		sc->sc_ifp->if_capabilities = cap;
624 		sc->sc_ifp->if_capenable = ena;
625 		sc->sc_ifp->if_hwassist = hwa;
626 		getmicrotime(&sc->sc_ifp->if_lastchange);
627 
628 		if (sc->sc_ifflags & IFF_DEBUG)
629 			if_printf(sc->sc_ifp,
630 			    "capabilities 0x%08x enabled 0x%08x\n", cap, ena);
631 	}
632 }
633 
634 static void
635 lagg_port_lladdr(struct lagg_port *lp, uint8_t *lladdr)
636 {
637 	struct lagg_softc *sc = lp->lp_softc;
638 	struct ifnet *ifp = lp->lp_ifp;
639 	struct lagg_llq *llq;
640 	int pending = 0;
641 	int primary;
642 
643 	LAGG_WLOCK_ASSERT(sc);
644 
645 	primary = (sc->sc_primary->lp_ifp == ifp) ? 1 : 0;
646 	if (primary == 0 && (lp->lp_detaching ||
647 	    memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0))
648 		return;
649 
650 	/* Check to make sure its not already queued to be changed */
651 	SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) {
652 		if (llq->llq_ifp == ifp) {
653 			pending = 1;
654 			break;
655 		}
656 	}
657 
658 	if (!pending) {
659 		llq = malloc(sizeof(struct lagg_llq), M_DEVBUF, M_NOWAIT);
660 		if (llq == NULL)	/* XXX what to do */
661 			return;
662 	}
663 
664 	/* Update the lladdr even if pending, it may have changed */
665 	llq->llq_ifp = ifp;
666 	llq->llq_primary = primary;
667 	bcopy(lladdr, llq->llq_lladdr, ETHER_ADDR_LEN);
668 
669 	if (!pending)
670 		SLIST_INSERT_HEAD(&sc->sc_llq_head, llq, llq_entries);
671 
672 	taskqueue_enqueue(taskqueue_swi, &sc->sc_lladdr_task);
673 }
674 
675 /*
676  * Set the interface MAC address from a taskqueue to avoid a LOR.
677  */
678 static void
679 lagg_port_setlladdr(void *arg, int pending)
680 {
681 	struct lagg_softc *sc = (struct lagg_softc *)arg;
682 	struct lagg_llq *llq, *head;
683 	struct ifnet *ifp;
684 	int error;
685 
686 	/* Grab a local reference of the queue and remove it from the softc */
687 	LAGG_WLOCK(sc);
688 	head = SLIST_FIRST(&sc->sc_llq_head);
689 	SLIST_FIRST(&sc->sc_llq_head) = NULL;
690 	LAGG_WUNLOCK(sc);
691 
692 	/*
693 	 * Traverse the queue and set the lladdr on each ifp. It is safe to do
694 	 * unlocked as we have the only reference to it.
695 	 */
696 	for (llq = head; llq != NULL; llq = head) {
697 		ifp = llq->llq_ifp;
698 
699 		CURVNET_SET(ifp->if_vnet);
700 		if (llq->llq_primary == 0) {
701 			/*
702 			 * Set the link layer address on the laggport interface.
703 			 * if_setlladdr() triggers gratuitous ARPs for INET.
704 			 */
705 			error = if_setlladdr(ifp, llq->llq_lladdr,
706 			    ETHER_ADDR_LEN);
707 			if (error)
708 				printf("%s: setlladdr failed on %s\n", __func__,
709 				    ifp->if_xname);
710 		} else
711 			EVENTHANDLER_INVOKE(iflladdr_event, ifp);
712 		CURVNET_RESTORE();
713 		head = SLIST_NEXT(llq, llq_entries);
714 		free(llq, M_DEVBUF);
715 	}
716 }
717 
718 static int
719 lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
720 {
721 	struct lagg_softc *sc_ptr;
722 	struct lagg_port *lp, *tlp;
723 	int error, i;
724 	uint64_t *pval;
725 
726 	LAGG_WLOCK_ASSERT(sc);
727 
728 	/* Limit the maximal number of lagg ports */
729 	if (sc->sc_count >= LAGG_MAX_PORTS)
730 		return (ENOSPC);
731 
732 	/* Check if port has already been associated to a lagg */
733 	if (ifp->if_lagg != NULL) {
734 		/* Port is already in the current lagg? */
735 		lp = (struct lagg_port *)ifp->if_lagg;
736 		if (lp->lp_softc == sc)
737 			return (EEXIST);
738 		return (EBUSY);
739 	}
740 
741 	/* XXX Disallow non-ethernet interfaces (this should be any of 802) */
742 	if (ifp->if_type != IFT_ETHER)
743 		return (EPROTONOSUPPORT);
744 
745 	/* Allow the first Ethernet member to define the MTU */
746 	if (SLIST_EMPTY(&sc->sc_ports))
747 		sc->sc_ifp->if_mtu = ifp->if_mtu;
748 	else if (sc->sc_ifp->if_mtu != ifp->if_mtu) {
749 		if_printf(sc->sc_ifp, "invalid MTU for %s\n",
750 		    ifp->if_xname);
751 		return (EINVAL);
752 	}
753 
754 	if ((lp = malloc(sizeof(struct lagg_port),
755 	    M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
756 		return (ENOMEM);
757 
758 	/* Check if port is a stacked lagg */
759 	LAGG_LIST_LOCK();
760 	SLIST_FOREACH(sc_ptr, &V_lagg_list, sc_entries) {
761 		if (ifp == sc_ptr->sc_ifp) {
762 			LAGG_LIST_UNLOCK();
763 			free(lp, M_DEVBUF);
764 			return (EINVAL);
765 			/* XXX disable stacking for the moment, its untested */
766 #ifdef LAGG_PORT_STACKING
767 			lp->lp_flags |= LAGG_PORT_STACK;
768 			if (lagg_port_checkstacking(sc_ptr) >=
769 			    LAGG_MAX_STACKING) {
770 				LAGG_LIST_UNLOCK();
771 				free(lp, M_DEVBUF);
772 				return (E2BIG);
773 			}
774 #endif
775 		}
776 	}
777 	LAGG_LIST_UNLOCK();
778 
779 	/* Change the interface type */
780 	lp->lp_iftype = ifp->if_type;
781 	ifp->if_type = IFT_IEEE8023ADLAG;
782 	ifp->if_lagg = lp;
783 	lp->lp_ioctl = ifp->if_ioctl;
784 	ifp->if_ioctl = lagg_port_ioctl;
785 	lp->lp_output = ifp->if_output;
786 	ifp->if_output = lagg_port_output;
787 
788 	lp->lp_ifp = ifp;
789 	lp->lp_softc = sc;
790 
791 	/* Save port link layer address */
792 	bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ETHER_ADDR_LEN);
793 
794 	if (SLIST_EMPTY(&sc->sc_ports)) {
795 		sc->sc_primary = lp;
796 		lagg_lladdr(sc, IF_LLADDR(ifp));
797 	} else {
798 		/* Update link layer address for this port */
799 		lagg_port_lladdr(lp, IF_LLADDR(sc->sc_ifp));
800 	}
801 
802 	/* Insert into the list of ports. Keep ports sorted by if_index. */
803 	SLIST_FOREACH(tlp, &sc->sc_ports, lp_entries) {
804 		if (tlp->lp_ifp->if_index < ifp->if_index && (
805 		    SLIST_NEXT(tlp, lp_entries) == NULL ||
806 		    SLIST_NEXT(tlp, lp_entries)->lp_ifp->if_index <
807 		    ifp->if_index))
808 			break;
809 	}
810 	if (tlp != NULL)
811 		SLIST_INSERT_AFTER(tlp, lp, lp_entries);
812 	else
813 		SLIST_INSERT_HEAD(&sc->sc_ports, lp, lp_entries);
814 	sc->sc_count++;
815 
816 	/* Update lagg capabilities */
817 	lagg_capabilities(sc);
818 	lagg_linkstate(sc);
819 
820 	/* Read port counters */
821 	pval = lp->port_counters.val;
822 	for (i = 0; i < IFCOUNTERS; i++, pval++)
823 		*pval = ifp->if_get_counter(ifp, i);
824 	/* Add multicast addresses and interface flags to this port */
825 	lagg_ether_cmdmulti(lp, 1);
826 	lagg_setflags(lp, 1);
827 
828 	if ((error = lagg_proto_addport(sc, lp)) != 0) {
829 		/* Remove the port, without calling pr_delport. */
830 		lagg_port_destroy(lp, 0);
831 		return (error);
832 	}
833 
834 	return (0);
835 }
836 
837 #ifdef LAGG_PORT_STACKING
838 static int
839 lagg_port_checkstacking(struct lagg_softc *sc)
840 {
841 	struct lagg_softc *sc_ptr;
842 	struct lagg_port *lp;
843 	int m = 0;
844 
845 	LAGG_WLOCK_ASSERT(sc);
846 
847 	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
848 		if (lp->lp_flags & LAGG_PORT_STACK) {
849 			sc_ptr = (struct lagg_softc *)lp->lp_ifp->if_softc;
850 			m = MAX(m, lagg_port_checkstacking(sc_ptr));
851 		}
852 	}
853 
854 	return (m + 1);
855 }
856 #endif
857 
858 static int
859 lagg_port_destroy(struct lagg_port *lp, int rundelport)
860 {
861 	struct lagg_softc *sc = lp->lp_softc;
862 	struct lagg_port *lp_ptr;
863 	struct lagg_llq *llq;
864 	struct ifnet *ifp = lp->lp_ifp;
865 	uint64_t *pval, vdiff;
866 	int i;
867 
868 	LAGG_WLOCK_ASSERT(sc);
869 
870 	if (rundelport)
871 		lagg_proto_delport(sc, lp);
872 
873 	/*
874 	 * Remove multicast addresses and interface flags from this port and
875 	 * reset the MAC address, skip if the interface is being detached.
876 	 */
877 	if (!lp->lp_detaching) {
878 		lagg_ether_cmdmulti(lp, 0);
879 		lagg_setflags(lp, 0);
880 		lagg_port_lladdr(lp, lp->lp_lladdr);
881 	}
882 
883 	/* Restore interface */
884 	ifp->if_type = lp->lp_iftype;
885 	ifp->if_ioctl = lp->lp_ioctl;
886 	ifp->if_output = lp->lp_output;
887 	ifp->if_lagg = NULL;
888 
889 	/* Update detached port counters */
890 	pval = lp->port_counters.val;
891 	for (i = 0; i < IFCOUNTERS; i++, pval++) {
892 		vdiff = ifp->if_get_counter(ifp, i) - *pval;
893 		sc->detached_counters.val[i] += vdiff;
894 	}
895 
896 	/* Finally, remove the port from the lagg */
897 	SLIST_REMOVE(&sc->sc_ports, lp, lagg_port, lp_entries);
898 	sc->sc_count--;
899 
900 	/* Update the primary interface */
901 	if (lp == sc->sc_primary) {
902 		uint8_t lladdr[ETHER_ADDR_LEN];
903 
904 		if ((lp_ptr = SLIST_FIRST(&sc->sc_ports)) == NULL) {
905 			bzero(&lladdr, ETHER_ADDR_LEN);
906 		} else {
907 			bcopy(lp_ptr->lp_lladdr,
908 			    lladdr, ETHER_ADDR_LEN);
909 		}
910 		lagg_lladdr(sc, lladdr);
911 		sc->sc_primary = lp_ptr;
912 
913 		/* Update link layer address for each port */
914 		SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries)
915 			lagg_port_lladdr(lp_ptr, lladdr);
916 	}
917 
918 	/* Remove any pending lladdr changes from the queue */
919 	if (lp->lp_detaching) {
920 		SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) {
921 			if (llq->llq_ifp == ifp) {
922 				SLIST_REMOVE(&sc->sc_llq_head, llq, lagg_llq,
923 				    llq_entries);
924 				free(llq, M_DEVBUF);
925 				break;	/* Only appears once */
926 			}
927 		}
928 	}
929 
930 	if (lp->lp_ifflags)
931 		if_printf(ifp, "%s: lp_ifflags unclean\n", __func__);
932 
933 	free(lp, M_DEVBUF);
934 
935 	/* Update lagg capabilities */
936 	lagg_capabilities(sc);
937 	lagg_linkstate(sc);
938 
939 	return (0);
940 }
941 
942 static int
943 lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
944 {
945 	struct lagg_reqport *rp = (struct lagg_reqport *)data;
946 	struct lagg_softc *sc;
947 	struct lagg_port *lp = NULL;
948 	int error = 0;
949 	struct rm_priotracker tracker;
950 
951 	/* Should be checked by the caller */
952 	if (ifp->if_type != IFT_IEEE8023ADLAG ||
953 	    (lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL)
954 		goto fallback;
955 
956 	switch (cmd) {
957 	case SIOCGLAGGPORT:
958 		if (rp->rp_portname[0] == '\0' ||
959 		    ifunit(rp->rp_portname) != ifp) {
960 			error = EINVAL;
961 			break;
962 		}
963 
964 		LAGG_RLOCK(sc, &tracker);
965 		if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) {
966 			error = ENOENT;
967 			LAGG_RUNLOCK(sc, &tracker);
968 			break;
969 		}
970 
971 		lagg_port2req(lp, rp);
972 		LAGG_RUNLOCK(sc, &tracker);
973 		break;
974 
975 	case SIOCSIFCAP:
976 		if (lp->lp_ioctl == NULL) {
977 			error = EINVAL;
978 			break;
979 		}
980 		error = (*lp->lp_ioctl)(ifp, cmd, data);
981 		if (error)
982 			break;
983 
984 		/* Update lagg interface capabilities */
985 		LAGG_WLOCK(sc);
986 		lagg_capabilities(sc);
987 		LAGG_WUNLOCK(sc);
988 		break;
989 
990 	case SIOCSIFMTU:
991 		/* Do not allow the MTU to be changed once joined */
992 		error = EINVAL;
993 		break;
994 
995 	default:
996 		goto fallback;
997 	}
998 
999 	return (error);
1000 
1001 fallback:
1002 	if (lp->lp_ioctl != NULL)
1003 		return ((*lp->lp_ioctl)(ifp, cmd, data));
1004 
1005 	return (EINVAL);
1006 }
1007 
1008 /*
1009  * Requests counter @cnt data.
1010  *
1011  * Counter value is calculated the following way:
1012  * 1) for each port, sum  difference between current and "initial" measurements.
1013  * 2) add lagg logical interface counters.
1014  * 3) add data from detached_counters array.
1015  *
1016  * We also do the following things on ports attach/detach:
1017  * 1) On port attach we store all counters it has into port_counter array.
1018  * 2) On port detach we add the different between "initial" and
1019  *   current counters data to detached_counters array.
1020  */
1021 static uint64_t
1022 lagg_get_counter(struct ifnet *ifp, ift_counter cnt)
1023 {
1024 	struct lagg_softc *sc;
1025 	struct lagg_port *lp;
1026 	struct ifnet *lpifp;
1027 	struct rm_priotracker tracker;
1028 	uint64_t newval, oldval, vsum;
1029 
1030 	/* Revise this when we've got non-generic counters. */
1031 	KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt));
1032 
1033 	sc = (struct lagg_softc *)ifp->if_softc;
1034 	LAGG_RLOCK(sc, &tracker);
1035 
1036 	vsum = 0;
1037 	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1038 		/* Saved attached value */
1039 		oldval = lp->port_counters.val[cnt];
1040 		/* current value */
1041 		lpifp = lp->lp_ifp;
1042 		newval = lpifp->if_get_counter(lpifp, cnt);
1043 		/* Calculate diff and save new */
1044 		vsum += newval - oldval;
1045 	}
1046 
1047 	/*
1048 	 * Add counter data which might be added by upper
1049 	 * layer protocols operating on logical interface.
1050 	 */
1051 	vsum += if_get_counter_default(ifp, cnt);
1052 
1053 	/*
1054 	 * Add counter data from detached ports counters
1055 	 */
1056 	vsum += sc->detached_counters.val[cnt];
1057 
1058 	LAGG_RUNLOCK(sc, &tracker);
1059 
1060 	return (vsum);
1061 }
1062 
1063 /*
1064  * For direct output to child ports.
1065  */
1066 static int
1067 lagg_port_output(struct ifnet *ifp, struct mbuf *m,
1068 	const struct sockaddr *dst, struct route *ro)
1069 {
1070 	struct lagg_port *lp = ifp->if_lagg;
1071 
1072 	switch (dst->sa_family) {
1073 		case pseudo_AF_HDRCMPLT:
1074 		case AF_UNSPEC:
1075 			return ((*lp->lp_output)(ifp, m, dst, ro));
1076 	}
1077 
1078 	/* drop any other frames */
1079 	m_freem(m);
1080 	return (ENETDOWN);
1081 }
1082 
1083 static void
1084 lagg_port_ifdetach(void *arg __unused, struct ifnet *ifp)
1085 {
1086 	struct lagg_port *lp;
1087 	struct lagg_softc *sc;
1088 
1089 	if ((lp = ifp->if_lagg) == NULL)
1090 		return;
1091 	/* If the ifnet is just being renamed, don't do anything. */
1092 	if (ifp->if_flags & IFF_RENAMING)
1093 		return;
1094 
1095 	sc = lp->lp_softc;
1096 
1097 	LAGG_WLOCK(sc);
1098 	lp->lp_detaching = 1;
1099 	lagg_port_destroy(lp, 1);
1100 	LAGG_WUNLOCK(sc);
1101 }
1102 
1103 static void
1104 lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp)
1105 {
1106 	struct lagg_softc *sc = lp->lp_softc;
1107 
1108 	strlcpy(rp->rp_ifname, sc->sc_ifname, sizeof(rp->rp_ifname));
1109 	strlcpy(rp->rp_portname, lp->lp_ifp->if_xname, sizeof(rp->rp_portname));
1110 	rp->rp_prio = lp->lp_prio;
1111 	rp->rp_flags = lp->lp_flags;
1112 	lagg_proto_portreq(sc, lp, &rp->rp_psc);
1113 
1114 	/* Add protocol specific flags */
1115 	switch (sc->sc_proto) {
1116 		case LAGG_PROTO_FAILOVER:
1117 			if (lp == sc->sc_primary)
1118 				rp->rp_flags |= LAGG_PORT_MASTER;
1119 			if (lp == lagg_link_active(sc, sc->sc_primary))
1120 				rp->rp_flags |= LAGG_PORT_ACTIVE;
1121 			break;
1122 
1123 		case LAGG_PROTO_ROUNDROBIN:
1124 		case LAGG_PROTO_LOADBALANCE:
1125 		case LAGG_PROTO_ETHERCHANNEL:
1126 		case LAGG_PROTO_BROADCAST:
1127 			if (LAGG_PORTACTIVE(lp))
1128 				rp->rp_flags |= LAGG_PORT_ACTIVE;
1129 			break;
1130 
1131 		case LAGG_PROTO_LACP:
1132 			/* LACP has a different definition of active */
1133 			if (lacp_isactive(lp))
1134 				rp->rp_flags |= LAGG_PORT_ACTIVE;
1135 			if (lacp_iscollecting(lp))
1136 				rp->rp_flags |= LAGG_PORT_COLLECTING;
1137 			if (lacp_isdistributing(lp))
1138 				rp->rp_flags |= LAGG_PORT_DISTRIBUTING;
1139 			break;
1140 	}
1141 
1142 }
1143 
1144 static void
1145 lagg_init(void *xsc)
1146 {
1147 	struct lagg_softc *sc = (struct lagg_softc *)xsc;
1148 	struct lagg_port *lp;
1149 	struct ifnet *ifp = sc->sc_ifp;
1150 
1151 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1152 		return;
1153 
1154 	LAGG_WLOCK(sc);
1155 
1156 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1157 	/* Update the port lladdrs */
1158 	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1159 		lagg_port_lladdr(lp, IF_LLADDR(ifp));
1160 
1161 	lagg_proto_init(sc);
1162 
1163 	LAGG_WUNLOCK(sc);
1164 }
1165 
1166 static void
1167 lagg_stop(struct lagg_softc *sc)
1168 {
1169 	struct ifnet *ifp = sc->sc_ifp;
1170 
1171 	LAGG_WLOCK_ASSERT(sc);
1172 
1173 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1174 		return;
1175 
1176 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1177 
1178 	lagg_proto_stop(sc);
1179 }
1180 
1181 static int
1182 lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1183 {
1184 	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
1185 	struct lagg_reqall *ra = (struct lagg_reqall *)data;
1186 	struct lagg_reqopts *ro = (struct lagg_reqopts *)data;
1187 	struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf;
1188 	struct lagg_reqflags *rf = (struct lagg_reqflags *)data;
1189 	struct ifreq *ifr = (struct ifreq *)data;
1190 	struct lagg_port *lp;
1191 	struct ifnet *tpif;
1192 	struct thread *td = curthread;
1193 	char *buf, *outbuf;
1194 	int count, buflen, len, error = 0;
1195 	struct rm_priotracker tracker;
1196 
1197 	bzero(&rpbuf, sizeof(rpbuf));
1198 
1199 	switch (cmd) {
1200 	case SIOCGLAGG:
1201 		LAGG_RLOCK(sc, &tracker);
1202 		count = 0;
1203 		SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1204 			count++;
1205 		buflen = count * sizeof(struct lagg_reqport);
1206 		LAGG_RUNLOCK(sc, &tracker);
1207 
1208 		outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
1209 
1210 		LAGG_RLOCK(sc, &tracker);
1211 		ra->ra_proto = sc->sc_proto;
1212 		lagg_proto_request(sc, &ra->ra_psc);
1213 		count = 0;
1214 		buf = outbuf;
1215 		len = min(ra->ra_size, buflen);
1216 		SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1217 			if (len < sizeof(rpbuf))
1218 				break;
1219 
1220 			lagg_port2req(lp, &rpbuf);
1221 			memcpy(buf, &rpbuf, sizeof(rpbuf));
1222 			count++;
1223 			buf += sizeof(rpbuf);
1224 			len -= sizeof(rpbuf);
1225 		}
1226 		LAGG_RUNLOCK(sc, &tracker);
1227 		ra->ra_ports = count;
1228 		ra->ra_size = count * sizeof(rpbuf);
1229 		error = copyout(outbuf, ra->ra_port, ra->ra_size);
1230 		free(outbuf, M_TEMP);
1231 		break;
1232 	case SIOCSLAGG:
1233 		error = priv_check(td, PRIV_NET_LAGG);
1234 		if (error)
1235 			break;
1236 		if (ra->ra_proto < 1 || ra->ra_proto >= LAGG_PROTO_MAX) {
1237 			error = EPROTONOSUPPORT;
1238 			break;
1239 		}
1240 
1241 		LAGG_WLOCK(sc);
1242 		lagg_proto_detach(sc);
1243 		lagg_proto_attach(sc, ra->ra_proto);
1244 		break;
1245 	case SIOCGLAGGOPTS:
1246 		ro->ro_opts = sc->sc_opts;
1247 		if (sc->sc_proto == LAGG_PROTO_LACP) {
1248 			struct lacp_softc *lsc;
1249 
1250 			lsc = (struct lacp_softc *)sc->sc_psc;
1251 			if (lsc->lsc_debug.lsc_tx_test != 0)
1252 				ro->ro_opts |= LAGG_OPT_LACP_TXTEST;
1253 			if (lsc->lsc_debug.lsc_rx_test != 0)
1254 				ro->ro_opts |= LAGG_OPT_LACP_RXTEST;
1255 			if (lsc->lsc_strict_mode != 0)
1256 				ro->ro_opts |= LAGG_OPT_LACP_STRICT;
1257 
1258 			ro->ro_active = sc->sc_active;
1259 		} else {
1260 			ro->ro_active = 0;
1261 			SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1262 				ro->ro_active += LAGG_PORTACTIVE(lp);
1263 		}
1264 		ro->ro_flapping = sc->sc_flapping;
1265 		ro->ro_flowid_shift = sc->flowid_shift;
1266 		break;
1267 	case SIOCSLAGGOPTS:
1268 		error = priv_check(td, PRIV_NET_LAGG);
1269 		if (error)
1270 			break;
1271 		if (ro->ro_opts == 0)
1272 			break;
1273 		/*
1274 		 * Set options.  LACP options are stored in sc->sc_psc,
1275 		 * not in sc_opts.
1276 		 */
1277 		int valid, lacp;
1278 
1279 		switch (ro->ro_opts) {
1280 		case LAGG_OPT_USE_FLOWID:
1281 		case -LAGG_OPT_USE_FLOWID:
1282 		case LAGG_OPT_FLOWIDSHIFT:
1283 			valid = 1;
1284 			lacp = 0;
1285 			break;
1286 		case LAGG_OPT_LACP_TXTEST:
1287 		case -LAGG_OPT_LACP_TXTEST:
1288 		case LAGG_OPT_LACP_RXTEST:
1289 		case -LAGG_OPT_LACP_RXTEST:
1290 		case LAGG_OPT_LACP_STRICT:
1291 		case -LAGG_OPT_LACP_STRICT:
1292 			valid = lacp = 1;
1293 			break;
1294 		default:
1295 			valid = lacp = 0;
1296 			break;
1297 		}
1298 
1299 		LAGG_WLOCK(sc);
1300 		if (valid == 0 ||
1301 		    (lacp == 1 && sc->sc_proto != LAGG_PROTO_LACP)) {
1302 			/* Invalid combination of options specified. */
1303 			error = EINVAL;
1304 			LAGG_WUNLOCK(sc);
1305 			break;	/* Return from SIOCSLAGGOPTS. */
1306 		}
1307 		/*
1308 		 * Store new options into sc->sc_opts except for
1309 		 * FLOWIDSHIFT and LACP options.
1310 		 */
1311 		if (lacp == 0) {
1312 			if (ro->ro_opts == LAGG_OPT_FLOWIDSHIFT)
1313 				sc->flowid_shift = ro->ro_flowid_shift;
1314 			else if (ro->ro_opts > 0)
1315 				sc->sc_opts |= ro->ro_opts;
1316 			else
1317 				sc->sc_opts &= ~ro->ro_opts;
1318 		} else {
1319 			struct lacp_softc *lsc;
1320 
1321 			lsc = (struct lacp_softc *)sc->sc_psc;
1322 
1323 			switch (ro->ro_opts) {
1324 			case LAGG_OPT_LACP_TXTEST:
1325 				lsc->lsc_debug.lsc_tx_test = 1;
1326 				break;
1327 			case -LAGG_OPT_LACP_TXTEST:
1328 				lsc->lsc_debug.lsc_tx_test = 0;
1329 				break;
1330 			case LAGG_OPT_LACP_RXTEST:
1331 				lsc->lsc_debug.lsc_rx_test = 1;
1332 				break;
1333 			case -LAGG_OPT_LACP_RXTEST:
1334 				lsc->lsc_debug.lsc_rx_test = 0;
1335 				break;
1336 			case LAGG_OPT_LACP_STRICT:
1337 				lsc->lsc_strict_mode = 1;
1338 				break;
1339 			case -LAGG_OPT_LACP_STRICT:
1340 				lsc->lsc_strict_mode = 0;
1341 				break;
1342 			}
1343 		}
1344 		LAGG_WUNLOCK(sc);
1345 		break;
1346 	case SIOCGLAGGFLAGS:
1347 		rf->rf_flags = sc->sc_flags;
1348 		break;
1349 	case SIOCSLAGGHASH:
1350 		error = priv_check(td, PRIV_NET_LAGG);
1351 		if (error)
1352 			break;
1353 		if ((rf->rf_flags & LAGG_F_HASHMASK) == 0) {
1354 			error = EINVAL;
1355 			break;
1356 		}
1357 		LAGG_WLOCK(sc);
1358 		sc->sc_flags &= ~LAGG_F_HASHMASK;
1359 		sc->sc_flags |= rf->rf_flags & LAGG_F_HASHMASK;
1360 		LAGG_WUNLOCK(sc);
1361 		break;
1362 	case SIOCGLAGGPORT:
1363 		if (rp->rp_portname[0] == '\0' ||
1364 		    (tpif = ifunit(rp->rp_portname)) == NULL) {
1365 			error = EINVAL;
1366 			break;
1367 		}
1368 
1369 		LAGG_RLOCK(sc, &tracker);
1370 		if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
1371 		    lp->lp_softc != sc) {
1372 			error = ENOENT;
1373 			LAGG_RUNLOCK(sc, &tracker);
1374 			break;
1375 		}
1376 
1377 		lagg_port2req(lp, rp);
1378 		LAGG_RUNLOCK(sc, &tracker);
1379 		break;
1380 	case SIOCSLAGGPORT:
1381 		error = priv_check(td, PRIV_NET_LAGG);
1382 		if (error)
1383 			break;
1384 		if (rp->rp_portname[0] == '\0' ||
1385 		    (tpif = ifunit(rp->rp_portname)) == NULL) {
1386 			error = EINVAL;
1387 			break;
1388 		}
1389 #ifdef INET6
1390 		/*
1391 		 * A laggport interface should not have inet6 address
1392 		 * because two interfaces with a valid link-local
1393 		 * scope zone must not be merged in any form.  This
1394 		 * restriction is needed to prevent violation of
1395 		 * link-local scope zone.  Attempts to add a laggport
1396 		 * interface which has inet6 addresses triggers
1397 		 * removal of all inet6 addresses on the member
1398 		 * interface.
1399 		 */
1400 		if (in6ifa_llaonifp(tpif)) {
1401 			in6_ifdetach(tpif);
1402 				if_printf(sc->sc_ifp,
1403 				    "IPv6 addresses on %s have been removed "
1404 				    "before adding it as a member to prevent "
1405 				    "IPv6 address scope violation.\n",
1406 				    tpif->if_xname);
1407 		}
1408 #endif
1409 		LAGG_WLOCK(sc);
1410 		error = lagg_port_create(sc, tpif);
1411 		LAGG_WUNLOCK(sc);
1412 		break;
1413 	case SIOCSLAGGDELPORT:
1414 		error = priv_check(td, PRIV_NET_LAGG);
1415 		if (error)
1416 			break;
1417 		if (rp->rp_portname[0] == '\0' ||
1418 		    (tpif = ifunit(rp->rp_portname)) == NULL) {
1419 			error = EINVAL;
1420 			break;
1421 		}
1422 
1423 		LAGG_WLOCK(sc);
1424 		if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
1425 		    lp->lp_softc != sc) {
1426 			error = ENOENT;
1427 			LAGG_WUNLOCK(sc);
1428 			break;
1429 		}
1430 
1431 		error = lagg_port_destroy(lp, 1);
1432 		LAGG_WUNLOCK(sc);
1433 		break;
1434 	case SIOCSIFFLAGS:
1435 		/* Set flags on ports too */
1436 		LAGG_WLOCK(sc);
1437 		SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1438 			lagg_setflags(lp, 1);
1439 		}
1440 		LAGG_WUNLOCK(sc);
1441 
1442 		if (!(ifp->if_flags & IFF_UP) &&
1443 		    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1444 			/*
1445 			 * If interface is marked down and it is running,
1446 			 * then stop and disable it.
1447 			 */
1448 			LAGG_WLOCK(sc);
1449 			lagg_stop(sc);
1450 			LAGG_WUNLOCK(sc);
1451 		} else if ((ifp->if_flags & IFF_UP) &&
1452 		    !(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1453 			/*
1454 			 * If interface is marked up and it is stopped, then
1455 			 * start it.
1456 			 */
1457 			(*ifp->if_init)(sc);
1458 		}
1459 		break;
1460 	case SIOCADDMULTI:
1461 	case SIOCDELMULTI:
1462 		LAGG_WLOCK(sc);
1463 		error = lagg_ether_setmulti(sc);
1464 		LAGG_WUNLOCK(sc);
1465 		break;
1466 	case SIOCSIFMEDIA:
1467 	case SIOCGIFMEDIA:
1468 		error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
1469 		break;
1470 
1471 	case SIOCSIFCAP:
1472 	case SIOCSIFMTU:
1473 		/* Do not allow the MTU or caps to be directly changed */
1474 		error = EINVAL;
1475 		break;
1476 
1477 	default:
1478 		error = ether_ioctl(ifp, cmd, data);
1479 		break;
1480 	}
1481 	return (error);
1482 }
1483 
1484 static int
1485 lagg_ether_setmulti(struct lagg_softc *sc)
1486 {
1487 	struct lagg_port *lp;
1488 
1489 	LAGG_WLOCK_ASSERT(sc);
1490 
1491 	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1492 		/* First, remove any existing filter entries. */
1493 		lagg_ether_cmdmulti(lp, 0);
1494 		/* copy all addresses from the lagg interface to the port */
1495 		lagg_ether_cmdmulti(lp, 1);
1496 	}
1497 	return (0);
1498 }
1499 
1500 static int
1501 lagg_ether_cmdmulti(struct lagg_port *lp, int set)
1502 {
1503 	struct lagg_softc *sc = lp->lp_softc;
1504 	struct ifnet *ifp = lp->lp_ifp;
1505 	struct ifnet *scifp = sc->sc_ifp;
1506 	struct lagg_mc *mc;
1507 	struct ifmultiaddr *ifma;
1508 	int error;
1509 
1510 	LAGG_WLOCK_ASSERT(sc);
1511 
1512 	if (set) {
1513 		IF_ADDR_WLOCK(scifp);
1514 		TAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) {
1515 			if (ifma->ifma_addr->sa_family != AF_LINK)
1516 				continue;
1517 			mc = malloc(sizeof(struct lagg_mc), M_DEVBUF, M_NOWAIT);
1518 			if (mc == NULL) {
1519 				IF_ADDR_WUNLOCK(scifp);
1520 				return (ENOMEM);
1521 			}
1522 			bcopy(ifma->ifma_addr, &mc->mc_addr,
1523 			    ifma->ifma_addr->sa_len);
1524 			mc->mc_addr.sdl_index = ifp->if_index;
1525 			mc->mc_ifma = NULL;
1526 			SLIST_INSERT_HEAD(&lp->lp_mc_head, mc, mc_entries);
1527 		}
1528 		IF_ADDR_WUNLOCK(scifp);
1529 		SLIST_FOREACH (mc, &lp->lp_mc_head, mc_entries) {
1530 			error = if_addmulti(ifp,
1531 			    (struct sockaddr *)&mc->mc_addr, &mc->mc_ifma);
1532 			if (error)
1533 				return (error);
1534 		}
1535 	} else {
1536 		while ((mc = SLIST_FIRST(&lp->lp_mc_head)) != NULL) {
1537 			SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries);
1538 			if (mc->mc_ifma && !lp->lp_detaching)
1539 				if_delmulti_ifma(mc->mc_ifma);
1540 			free(mc, M_DEVBUF);
1541 		}
1542 	}
1543 	return (0);
1544 }
1545 
1546 /* Handle a ref counted flag that should be set on the lagg port as well */
1547 static int
1548 lagg_setflag(struct lagg_port *lp, int flag, int status,
1549     int (*func)(struct ifnet *, int))
1550 {
1551 	struct lagg_softc *sc = lp->lp_softc;
1552 	struct ifnet *scifp = sc->sc_ifp;
1553 	struct ifnet *ifp = lp->lp_ifp;
1554 	int error;
1555 
1556 	LAGG_WLOCK_ASSERT(sc);
1557 
1558 	status = status ? (scifp->if_flags & flag) : 0;
1559 	/* Now "status" contains the flag value or 0 */
1560 
1561 	/*
1562 	 * See if recorded ports status is different from what
1563 	 * we want it to be.  If it is, flip it.  We record ports
1564 	 * status in lp_ifflags so that we won't clear ports flag
1565 	 * we haven't set.  In fact, we don't clear or set ports
1566 	 * flags directly, but get or release references to them.
1567 	 * That's why we can be sure that recorded flags still are
1568 	 * in accord with actual ports flags.
1569 	 */
1570 	if (status != (lp->lp_ifflags & flag)) {
1571 		error = (*func)(ifp, status);
1572 		if (error)
1573 			return (error);
1574 		lp->lp_ifflags &= ~flag;
1575 		lp->lp_ifflags |= status;
1576 	}
1577 	return (0);
1578 }
1579 
1580 /*
1581  * Handle IFF_* flags that require certain changes on the lagg port
1582  * if "status" is true, update ports flags respective to the lagg
1583  * if "status" is false, forcedly clear the flags set on port.
1584  */
1585 static int
1586 lagg_setflags(struct lagg_port *lp, int status)
1587 {
1588 	int error, i;
1589 
1590 	for (i = 0; lagg_pflags[i].flag; i++) {
1591 		error = lagg_setflag(lp, lagg_pflags[i].flag,
1592 		    status, lagg_pflags[i].func);
1593 		if (error)
1594 			return (error);
1595 	}
1596 	return (0);
1597 }
1598 
1599 static int
1600 lagg_transmit(struct ifnet *ifp, struct mbuf *m)
1601 {
1602 	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
1603 	int error, len, mcast;
1604 	struct rm_priotracker tracker;
1605 
1606 	len = m->m_pkthdr.len;
1607 	mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
1608 
1609 	LAGG_RLOCK(sc, &tracker);
1610 	/* We need a Tx algorithm and at least one port */
1611 	if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
1612 		LAGG_RUNLOCK(sc, &tracker);
1613 		m_freem(m);
1614 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
1615 		return (ENXIO);
1616 	}
1617 
1618 	ETHER_BPF_MTAP(ifp, m);
1619 
1620 	error = lagg_proto_start(sc, m);
1621 	LAGG_RUNLOCK(sc, &tracker);
1622 
1623 	if (error != 0)
1624 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
1625 
1626 	return (error);
1627 }
1628 
1629 /*
1630  * The ifp->if_qflush entry point for lagg(4) is no-op.
1631  */
1632 static void
1633 lagg_qflush(struct ifnet *ifp __unused)
1634 {
1635 }
1636 
1637 static struct mbuf *
1638 lagg_input(struct ifnet *ifp, struct mbuf *m)
1639 {
1640 	struct lagg_port *lp = ifp->if_lagg;
1641 	struct lagg_softc *sc = lp->lp_softc;
1642 	struct ifnet *scifp = sc->sc_ifp;
1643 	struct rm_priotracker tracker;
1644 
1645 	LAGG_RLOCK(sc, &tracker);
1646 	if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
1647 	    (lp->lp_flags & LAGG_PORT_DISABLED) ||
1648 	    sc->sc_proto == LAGG_PROTO_NONE) {
1649 		LAGG_RUNLOCK(sc, &tracker);
1650 		m_freem(m);
1651 		return (NULL);
1652 	}
1653 
1654 	ETHER_BPF_MTAP(scifp, m);
1655 
1656 	m = (lp->lp_detaching == 0) ? lagg_proto_input(sc, lp, m) : NULL;
1657 
1658 	if (m != NULL) {
1659 		if (scifp->if_flags & IFF_MONITOR) {
1660 			m_freem(m);
1661 			m = NULL;
1662 		}
1663 	}
1664 
1665 	LAGG_RUNLOCK(sc, &tracker);
1666 	return (m);
1667 }
1668 
1669 static int
1670 lagg_media_change(struct ifnet *ifp)
1671 {
1672 	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
1673 
1674 	if (sc->sc_ifflags & IFF_DEBUG)
1675 		printf("%s\n", __func__);
1676 
1677 	/* Ignore */
1678 	return (0);
1679 }
1680 
1681 static void
1682 lagg_media_status(struct ifnet *ifp, struct ifmediareq *imr)
1683 {
1684 	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
1685 	struct lagg_port *lp;
1686 	struct rm_priotracker tracker;
1687 
1688 	imr->ifm_status = IFM_AVALID;
1689 	imr->ifm_active = IFM_ETHER | IFM_AUTO;
1690 
1691 	LAGG_RLOCK(sc, &tracker);
1692 	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1693 		if (LAGG_PORTACTIVE(lp))
1694 			imr->ifm_status |= IFM_ACTIVE;
1695 	}
1696 	LAGG_RUNLOCK(sc, &tracker);
1697 }
1698 
1699 static void
1700 lagg_linkstate(struct lagg_softc *sc)
1701 {
1702 	struct lagg_port *lp;
1703 	int new_link = LINK_STATE_DOWN;
1704 	uint64_t speed;
1705 
1706 	/* Our link is considered up if at least one of our ports is active */
1707 	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1708 		if (lp->lp_ifp->if_link_state == LINK_STATE_UP) {
1709 			new_link = LINK_STATE_UP;
1710 			break;
1711 		}
1712 	}
1713 	if_link_state_change(sc->sc_ifp, new_link);
1714 
1715 	/* Update if_baudrate to reflect the max possible speed */
1716 	switch (sc->sc_proto) {
1717 		case LAGG_PROTO_FAILOVER:
1718 			sc->sc_ifp->if_baudrate = sc->sc_primary != NULL ?
1719 			    sc->sc_primary->lp_ifp->if_baudrate : 0;
1720 			break;
1721 		case LAGG_PROTO_ROUNDROBIN:
1722 		case LAGG_PROTO_LOADBALANCE:
1723 		case LAGG_PROTO_ETHERCHANNEL:
1724 		case LAGG_PROTO_BROADCAST:
1725 			speed = 0;
1726 			SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
1727 				speed += lp->lp_ifp->if_baudrate;
1728 			sc->sc_ifp->if_baudrate = speed;
1729 			break;
1730 		case LAGG_PROTO_LACP:
1731 			/* LACP updates if_baudrate itself */
1732 			break;
1733 	}
1734 }
1735 
1736 static void
1737 lagg_port_state(struct ifnet *ifp, int state)
1738 {
1739 	struct lagg_port *lp = (struct lagg_port *)ifp->if_lagg;
1740 	struct lagg_softc *sc = NULL;
1741 
1742 	if (lp != NULL)
1743 		sc = lp->lp_softc;
1744 	if (sc == NULL)
1745 		return;
1746 
1747 	LAGG_WLOCK(sc);
1748 	lagg_linkstate(sc);
1749 	lagg_proto_linkstate(sc, lp);
1750 	LAGG_WUNLOCK(sc);
1751 }
1752 
1753 struct lagg_port *
1754 lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp)
1755 {
1756 	struct lagg_port *lp_next, *rval = NULL;
1757 	// int new_link = LINK_STATE_DOWN;
1758 
1759 	LAGG_RLOCK_ASSERT(sc);
1760 	/*
1761 	 * Search a port which reports an active link state.
1762 	 */
1763 
1764 	if (lp == NULL)
1765 		goto search;
1766 	if (LAGG_PORTACTIVE(lp)) {
1767 		rval = lp;
1768 		goto found;
1769 	}
1770 	if ((lp_next = SLIST_NEXT(lp, lp_entries)) != NULL &&
1771 	    LAGG_PORTACTIVE(lp_next)) {
1772 		rval = lp_next;
1773 		goto found;
1774 	}
1775 
1776 search:
1777 	SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
1778 		if (LAGG_PORTACTIVE(lp_next)) {
1779 			rval = lp_next;
1780 			goto found;
1781 		}
1782 	}
1783 
1784 found:
1785 	if (rval != NULL) {
1786 		/*
1787 		 * The IEEE 802.1D standard assumes that a lagg with
1788 		 * multiple ports is always full duplex. This is valid
1789 		 * for load sharing laggs and if at least two links
1790 		 * are active. Unfortunately, checking the latter would
1791 		 * be too expensive at this point.
1792 		 XXX
1793 		if ((sc->sc_capabilities & IFCAP_LAGG_FULLDUPLEX) &&
1794 		    (sc->sc_count > 1))
1795 			new_link = LINK_STATE_FULL_DUPLEX;
1796 		else
1797 			new_link = rval->lp_link_state;
1798 		 */
1799 	}
1800 
1801 	return (rval);
1802 }
1803 
1804 static const void *
1805 lagg_gethdr(struct mbuf *m, u_int off, u_int len, void *buf)
1806 {
1807 	if (m->m_pkthdr.len < (off + len)) {
1808 		return (NULL);
1809 	} else if (m->m_len < (off + len)) {
1810 		m_copydata(m, off, len, buf);
1811 		return (buf);
1812 	}
1813 	return (mtod(m, char *) + off);
1814 }
1815 
1816 uint32_t
1817 lagg_hashmbuf(struct lagg_softc *sc, struct mbuf *m, uint32_t key)
1818 {
1819 	uint16_t etype;
1820 	uint32_t p = key;
1821 	int off;
1822 	struct ether_header *eh;
1823 	const struct ether_vlan_header *vlan;
1824 #ifdef INET
1825 	const struct ip *ip;
1826 	const uint32_t *ports;
1827 	int iphlen;
1828 #endif
1829 #ifdef INET6
1830 	const struct ip6_hdr *ip6;
1831 	uint32_t flow;
1832 #endif
1833 	union {
1834 #ifdef INET
1835 		struct ip ip;
1836 #endif
1837 #ifdef INET6
1838 		struct ip6_hdr ip6;
1839 #endif
1840 		struct ether_vlan_header vlan;
1841 		uint32_t port;
1842 	} buf;
1843 
1844 
1845 	off = sizeof(*eh);
1846 	if (m->m_len < off)
1847 		goto out;
1848 	eh = mtod(m, struct ether_header *);
1849 	etype = ntohs(eh->ether_type);
1850 	if (sc->sc_flags & LAGG_F_HASHL2) {
1851 		p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p);
1852 		p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
1853 	}
1854 
1855 	/* Special handling for encapsulating VLAN frames */
1856 	if ((m->m_flags & M_VLANTAG) && (sc->sc_flags & LAGG_F_HASHL2)) {
1857 		p = hash32_buf(&m->m_pkthdr.ether_vtag,
1858 		    sizeof(m->m_pkthdr.ether_vtag), p);
1859 	} else if (etype == ETHERTYPE_VLAN) {
1860 		vlan = lagg_gethdr(m, off,  sizeof(*vlan), &buf);
1861 		if (vlan == NULL)
1862 			goto out;
1863 
1864 		if (sc->sc_flags & LAGG_F_HASHL2)
1865 			p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p);
1866 		etype = ntohs(vlan->evl_proto);
1867 		off += sizeof(*vlan) - sizeof(*eh);
1868 	}
1869 
1870 	switch (etype) {
1871 #ifdef INET
1872 	case ETHERTYPE_IP:
1873 		ip = lagg_gethdr(m, off, sizeof(*ip), &buf);
1874 		if (ip == NULL)
1875 			goto out;
1876 
1877 		if (sc->sc_flags & LAGG_F_HASHL3) {
1878 			p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p);
1879 			p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p);
1880 		}
1881 		if (!(sc->sc_flags & LAGG_F_HASHL4))
1882 			break;
1883 		switch (ip->ip_p) {
1884 			case IPPROTO_TCP:
1885 			case IPPROTO_UDP:
1886 			case IPPROTO_SCTP:
1887 				iphlen = ip->ip_hl << 2;
1888 				if (iphlen < sizeof(*ip))
1889 					break;
1890 				off += iphlen;
1891 				ports = lagg_gethdr(m, off, sizeof(*ports), &buf);
1892 				if (ports == NULL)
1893 					break;
1894 				p = hash32_buf(ports, sizeof(*ports), p);
1895 				break;
1896 		}
1897 		break;
1898 #endif
1899 #ifdef INET6
1900 	case ETHERTYPE_IPV6:
1901 		if (!(sc->sc_flags & LAGG_F_HASHL3))
1902 			break;
1903 		ip6 = lagg_gethdr(m, off, sizeof(*ip6), &buf);
1904 		if (ip6 == NULL)
1905 			goto out;
1906 
1907 		p = hash32_buf(&ip6->ip6_src, sizeof(struct in6_addr), p);
1908 		p = hash32_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p);
1909 		flow = ip6->ip6_flow & IPV6_FLOWLABEL_MASK;
1910 		p = hash32_buf(&flow, sizeof(flow), p);	/* IPv6 flow label */
1911 		break;
1912 #endif
1913 	}
1914 out:
1915 	return (p);
1916 }
1917 
1918 int
1919 lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
1920 {
1921 
1922 	return (ifp->if_transmit)(ifp, m);
1923 }
1924 
1925 /*
1926  * Simple round robin aggregation
1927  */
1928 static void
1929 lagg_rr_attach(struct lagg_softc *sc)
1930 {
1931 	sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
1932 	sc->sc_seq = 0;
1933 }
1934 
1935 static int
1936 lagg_rr_start(struct lagg_softc *sc, struct mbuf *m)
1937 {
1938 	struct lagg_port *lp;
1939 	uint32_t p;
1940 
1941 	p = atomic_fetchadd_32(&sc->sc_seq, 1);
1942 	p %= sc->sc_count;
1943 	lp = SLIST_FIRST(&sc->sc_ports);
1944 	while (p--)
1945 		lp = SLIST_NEXT(lp, lp_entries);
1946 
1947 	/*
1948 	 * Check the port's link state. This will return the next active
1949 	 * port if the link is down or the port is NULL.
1950 	 */
1951 	if ((lp = lagg_link_active(sc, lp)) == NULL) {
1952 		m_freem(m);
1953 		return (ENETDOWN);
1954 	}
1955 
1956 	/* Send mbuf */
1957 	return (lagg_enqueue(lp->lp_ifp, m));
1958 }
1959 
1960 static struct mbuf *
1961 lagg_rr_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
1962 {
1963 	struct ifnet *ifp = sc->sc_ifp;
1964 
1965 	/* Just pass in the packet to our lagg device */
1966 	m->m_pkthdr.rcvif = ifp;
1967 
1968 	return (m);
1969 }
1970 
1971 /*
1972  * Broadcast mode
1973  */
1974 static int
1975 lagg_bcast_start(struct lagg_softc *sc, struct mbuf *m)
1976 {
1977 	int active_ports = 0;
1978 	int errors = 0;
1979 	int ret;
1980 	struct lagg_port *lp, *last = NULL;
1981 	struct mbuf *m0;
1982 
1983 	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
1984 		if (!LAGG_PORTACTIVE(lp))
1985 			continue;
1986 
1987 		active_ports++;
1988 
1989 		if (last != NULL) {
1990 			m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT);
1991 			if (m0 == NULL) {
1992 				ret = ENOBUFS;
1993 				errors++;
1994 				break;
1995 			}
1996 
1997 			ret = lagg_enqueue(last->lp_ifp, m0);
1998 			if (ret != 0)
1999 				errors++;
2000 		}
2001 		last = lp;
2002 	}
2003 	if (last == NULL) {
2004 		m_freem(m);
2005 		return (ENOENT);
2006 	}
2007 	if ((last = lagg_link_active(sc, last)) == NULL) {
2008 		m_freem(m);
2009 		return (ENETDOWN);
2010 	}
2011 
2012 	ret = lagg_enqueue(last->lp_ifp, m);
2013 	if (ret != 0)
2014 		errors++;
2015 
2016 	if (errors == 0)
2017 		return (ret);
2018 
2019 	return (0);
2020 }
2021 
2022 static struct mbuf*
2023 lagg_bcast_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
2024 {
2025 	struct ifnet *ifp = sc->sc_ifp;
2026 
2027 	/* Just pass in the packet to our lagg device */
2028 	m->m_pkthdr.rcvif = ifp;
2029 	return (m);
2030 }
2031 
2032 /*
2033  * Active failover
2034  */
2035 static int
2036 lagg_fail_start(struct lagg_softc *sc, struct mbuf *m)
2037 {
2038 	struct lagg_port *lp;
2039 
2040 	/* Use the master port if active or the next available port */
2041 	if ((lp = lagg_link_active(sc, sc->sc_primary)) == NULL) {
2042 		m_freem(m);
2043 		return (ENETDOWN);
2044 	}
2045 
2046 	/* Send mbuf */
2047 	return (lagg_enqueue(lp->lp_ifp, m));
2048 }
2049 
2050 static struct mbuf *
2051 lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
2052 {
2053 	struct ifnet *ifp = sc->sc_ifp;
2054 	struct lagg_port *tmp_tp;
2055 
2056 	if (lp == sc->sc_primary || V_lagg_failover_rx_all) {
2057 		m->m_pkthdr.rcvif = ifp;
2058 		return (m);
2059 	}
2060 
2061 	if (!LAGG_PORTACTIVE(sc->sc_primary)) {
2062 		tmp_tp = lagg_link_active(sc, sc->sc_primary);
2063 		/*
2064 		 * If tmp_tp is null, we've recieved a packet when all
2065 		 * our links are down. Weird, but process it anyways.
2066 		 */
2067 		if ((tmp_tp == NULL || tmp_tp == lp)) {
2068 			m->m_pkthdr.rcvif = ifp;
2069 			return (m);
2070 		}
2071 	}
2072 
2073 	m_freem(m);
2074 	return (NULL);
2075 }
2076 
2077 /*
2078  * Loadbalancing
2079  */
2080 static void
2081 lagg_lb_attach(struct lagg_softc *sc)
2082 {
2083 	struct lagg_port *lp;
2084 	struct lagg_lb *lb;
2085 
2086 	lb = malloc(sizeof(struct lagg_lb), M_DEVBUF, M_WAITOK | M_ZERO);
2087 
2088 	sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX;
2089 
2090 	lb->lb_key = arc4random();
2091 	sc->sc_psc = lb;
2092 
2093 	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
2094 		lagg_lb_port_create(lp);
2095 }
2096 
2097 static void
2098 lagg_lb_detach(struct lagg_softc *sc)
2099 {
2100 	struct lagg_lb *lb;
2101 
2102 	lb = (struct lagg_lb *)sc->sc_psc;
2103 	LAGG_WUNLOCK(sc);
2104 	if (lb != NULL)
2105 		free(lb, M_DEVBUF);
2106 }
2107 
2108 static int
2109 lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp)
2110 {
2111 	struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
2112 	struct lagg_port *lp_next;
2113 	int i = 0;
2114 
2115 	bzero(&lb->lb_ports, sizeof(lb->lb_ports));
2116 	SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
2117 		if (lp_next == lp)
2118 			continue;
2119 		if (i >= LAGG_MAX_PORTS)
2120 			return (EINVAL);
2121 		if (sc->sc_ifflags & IFF_DEBUG)
2122 			printf("%s: port %s at index %d\n",
2123 			    sc->sc_ifname, lp_next->lp_ifp->if_xname, i);
2124 		lb->lb_ports[i++] = lp_next;
2125 	}
2126 
2127 	return (0);
2128 }
2129 
2130 static int
2131 lagg_lb_port_create(struct lagg_port *lp)
2132 {
2133 	struct lagg_softc *sc = lp->lp_softc;
2134 	return (lagg_lb_porttable(sc, NULL));
2135 }
2136 
2137 static void
2138 lagg_lb_port_destroy(struct lagg_port *lp)
2139 {
2140 	struct lagg_softc *sc = lp->lp_softc;
2141 	lagg_lb_porttable(sc, lp);
2142 }
2143 
2144 static int
2145 lagg_lb_start(struct lagg_softc *sc, struct mbuf *m)
2146 {
2147 	struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc;
2148 	struct lagg_port *lp = NULL;
2149 	uint32_t p = 0;
2150 
2151 	if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) && (m->m_flags & M_FLOWID))
2152 		p = m->m_pkthdr.flowid >> sc->flowid_shift;
2153 	else
2154 		p = lagg_hashmbuf(sc, m, lb->lb_key);
2155 	p %= sc->sc_count;
2156 	lp = lb->lb_ports[p];
2157 
2158 	/*
2159 	 * Check the port's link state. This will return the next active
2160 	 * port if the link is down or the port is NULL.
2161 	 */
2162 	if ((lp = lagg_link_active(sc, lp)) == NULL) {
2163 		m_freem(m);
2164 		return (ENETDOWN);
2165 	}
2166 
2167 	/* Send mbuf */
2168 	return (lagg_enqueue(lp->lp_ifp, m));
2169 }
2170 
2171 static struct mbuf *
2172 lagg_lb_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
2173 {
2174 	struct ifnet *ifp = sc->sc_ifp;
2175 
2176 	/* Just pass in the packet to our lagg device */
2177 	m->m_pkthdr.rcvif = ifp;
2178 
2179 	return (m);
2180 }
2181 
2182 /*
2183  * 802.3ad LACP
2184  */
2185 static void
2186 lagg_lacp_attach(struct lagg_softc *sc)
2187 {
2188 	struct lagg_port *lp;
2189 
2190 	lacp_attach(sc);
2191 	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
2192 		lacp_port_create(lp);
2193 }
2194 
2195 static void
2196 lagg_lacp_detach(struct lagg_softc *sc)
2197 {
2198 	struct lagg_port *lp;
2199 	void *psc;
2200 
2201 	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
2202 		lacp_port_destroy(lp);
2203 
2204 	psc = sc->sc_psc;
2205 	sc->sc_psc = NULL;
2206 	LAGG_WUNLOCK(sc);
2207 
2208 	lacp_detach(psc);
2209 }
2210 
2211 static void
2212 lagg_lacp_lladdr(struct lagg_softc *sc)
2213 {
2214 	struct lagg_port *lp;
2215 
2216 	/* purge all the lacp ports */
2217 	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
2218 		lacp_port_destroy(lp);
2219 
2220 	/* add them back in */
2221 	SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
2222 		lacp_port_create(lp);
2223 }
2224 
2225 static int
2226 lagg_lacp_start(struct lagg_softc *sc, struct mbuf *m)
2227 {
2228 	struct lagg_port *lp;
2229 
2230 	lp = lacp_select_tx_port(sc, m);
2231 	if (lp == NULL) {
2232 		m_freem(m);
2233 		return (ENETDOWN);
2234 	}
2235 
2236 	/* Send mbuf */
2237 	return (lagg_enqueue(lp->lp_ifp, m));
2238 }
2239 
2240 static struct mbuf *
2241 lagg_lacp_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m)
2242 {
2243 	struct ifnet *ifp = sc->sc_ifp;
2244 	struct ether_header *eh;
2245 	u_short etype;
2246 
2247 	eh = mtod(m, struct ether_header *);
2248 	etype = ntohs(eh->ether_type);
2249 
2250 	/* Tap off LACP control messages */
2251 	if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_SLOW) {
2252 		m = lacp_input(lp, m);
2253 		if (m == NULL)
2254 			return (NULL);
2255 	}
2256 
2257 	/*
2258 	 * If the port is not collecting or not in the active aggregator then
2259 	 * free and return.
2260 	 */
2261 	if (lacp_iscollecting(lp) == 0 || lacp_isactive(lp) == 0) {
2262 		m_freem(m);
2263 		return (NULL);
2264 	}
2265 
2266 	m->m_pkthdr.rcvif = ifp;
2267 	return (m);
2268 }
2269 
2270