xref: /freebsd/sys/net/ieee8023ad_lacp.c (revision 94942af266ac119ede0ca836f9aa5a5ac0582938)
1 /*	$NetBSD: ieee8023ad_lacp.c,v 1.3 2005/12/11 12:24:54 christos Exp $	*/
2 
3 /*-
4  * Copyright (c)2005 YAMAMOTO Takashi,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/callout.h>
34 #include <sys/mbuf.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/kernel.h> /* hz */
38 #include <sys/socket.h> /* for net/if.h */
39 #include <sys/sockio.h>
40 #include <machine/stdarg.h>
41 #include <sys/lock.h>
42 #include <sys/rwlock.h>
43 #include <sys/taskqueue.h>
44 
45 #include <net/if.h>
46 #include <net/if_dl.h>
47 #include <net/ethernet.h>
48 #include <net/if_media.h>
49 #include <net/if_types.h>
50 
51 #include <net/if_lagg.h>
52 #include <net/ieee8023ad_lacp.h>
53 
54 /*
55  * actor system priority and port priority.
56  * XXX should be configurable.
57  */
58 
59 #define	LACP_SYSTEM_PRIO	0x8000
60 #define	LACP_PORT_PRIO		0x8000
61 
62 const uint8_t ethermulticastaddr_slowprotocols[ETHER_ADDR_LEN] =
63     { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 };
64 
65 static const struct tlv_template lacp_info_tlv_template[] = {
66 	{ LACP_TYPE_ACTORINFO,
67 	    sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) },
68 	{ LACP_TYPE_PARTNERINFO,
69 	    sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) },
70 	{ LACP_TYPE_COLLECTORINFO,
71 	    sizeof(struct tlvhdr) + sizeof(struct lacp_collectorinfo) },
72 	{ 0, 0 },
73 };
74 
75 typedef void (*lacp_timer_func_t)(struct lacp_port *);
76 
77 static const struct tlv_template marker_info_tlv_template[] = {
78 	{ MARKER_TYPE_INFO, 16 },
79 	{ 0, 0 },
80 };
81 
82 static const struct tlv_template marker_response_tlv_template[] = {
83 	{ MARKER_TYPE_RESPONSE, 16 },
84 	{ 0, 0 },
85 };
86 
87 static void	lacp_fill_actorinfo(struct lacp_port *, struct lacp_peerinfo *);
88 
89 static uint64_t	lacp_aggregator_bandwidth(struct lacp_aggregator *);
90 static void	lacp_suppress_distributing(struct lacp_softc *,
91 		    struct lacp_aggregator *);
92 static void	lacp_transit_expire(void *);
93 static void	lacp_select_active_aggregator(struct lacp_softc *);
94 static uint16_t	lacp_compose_key(struct lacp_port *);
95 static int	tlv_check(const void *, size_t, const struct tlvhdr *,
96 		    const struct tlv_template *, boolean_t);
97 static void	lacp_tick(void *);
98 
99 static void	lacp_fill_aggregator_id(struct lacp_aggregator *,
100 		    const struct lacp_port *);
101 static void	lacp_fill_aggregator_id_peer(struct lacp_peerinfo *,
102 		    const struct lacp_peerinfo *);
103 static int	lacp_aggregator_is_compatible(const struct lacp_aggregator *,
104 		    const struct lacp_port *);
105 static int	lacp_peerinfo_is_compatible(const struct lacp_peerinfo *,
106 		    const struct lacp_peerinfo *);
107 
108 static struct lacp_aggregator *lacp_aggregator_get(struct lacp_softc *,
109 		    struct lacp_port *);
110 static void	lacp_aggregator_addref(struct lacp_softc *,
111 		    struct lacp_aggregator *);
112 static void	lacp_aggregator_delref(struct lacp_softc *,
113 		    struct lacp_aggregator *);
114 
115 /* receive machine */
116 
117 static void	lacp_dequeue(void *, int);
118 static int	lacp_pdu_input(struct lagg_port *, struct mbuf *);
119 static int	lacp_marker_input(struct lagg_port *, struct mbuf *);
120 static void	lacp_sm_rx(struct lacp_port *, const struct lacpdu *);
121 static void	lacp_sm_rx_timer(struct lacp_port *);
122 static void	lacp_sm_rx_set_expired(struct lacp_port *);
123 static void	lacp_sm_rx_update_ntt(struct lacp_port *,
124 		    const struct lacpdu *);
125 static void	lacp_sm_rx_record_pdu(struct lacp_port *,
126 		    const struct lacpdu *);
127 static void	lacp_sm_rx_update_selected(struct lacp_port *,
128 		    const struct lacpdu *);
129 static void	lacp_sm_rx_record_default(struct lacp_port *);
130 static void	lacp_sm_rx_update_default_selected(struct lacp_port *);
131 static void	lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *,
132 		    const struct lacp_peerinfo *);
133 
134 /* mux machine */
135 
136 static void	lacp_sm_mux(struct lacp_port *);
137 static void	lacp_set_mux(struct lacp_port *, enum lacp_mux_state);
138 static void	lacp_sm_mux_timer(struct lacp_port *);
139 
140 /* periodic transmit machine */
141 
142 static void	lacp_sm_ptx_update_timeout(struct lacp_port *, uint8_t);
143 static void	lacp_sm_ptx_tx_schedule(struct lacp_port *);
144 static void	lacp_sm_ptx_timer(struct lacp_port *);
145 
146 /* transmit machine */
147 
148 static void	lacp_sm_tx(struct lacp_port *);
149 static void	lacp_sm_assert_ntt(struct lacp_port *);
150 
151 static void	lacp_run_timers(struct lacp_port *);
152 static int	lacp_compare_peerinfo(const struct lacp_peerinfo *,
153 		    const struct lacp_peerinfo *);
154 static int	lacp_compare_systemid(const struct lacp_systemid *,
155 		    const struct lacp_systemid *);
156 static void	lacp_port_enable(struct lacp_port *);
157 static void	lacp_port_disable(struct lacp_port *);
158 static void	lacp_select(struct lacp_port *);
159 static void	lacp_unselect(struct lacp_port *);
160 static void	lacp_disable_collecting(struct lacp_port *);
161 static void	lacp_enable_collecting(struct lacp_port *);
162 static void	lacp_disable_distributing(struct lacp_port *);
163 static void	lacp_enable_distributing(struct lacp_port *);
164 static int	lacp_xmit_lacpdu(struct lacp_port *);
165 
166 #if defined(LACP_DEBUG)
167 static void	lacp_dump_lacpdu(const struct lacpdu *);
168 static const char *lacp_format_partner(const struct lacp_peerinfo *, char *,
169 		    size_t);
170 static const char *lacp_format_lagid(const struct lacp_peerinfo *,
171 		    const struct lacp_peerinfo *, char *, size_t);
172 static const char *lacp_format_lagid_aggregator(const struct lacp_aggregator *,
173 		    char *, size_t);
174 static const char *lacp_format_state(uint8_t, char *, size_t);
175 static const char *lacp_format_mac(const uint8_t *, char *, size_t);
176 static const char *lacp_format_systemid(const struct lacp_systemid *, char *,
177 		    size_t);
178 static const char *lacp_format_portid(const struct lacp_portid *, char *,
179 		    size_t);
180 static void	lacp_dprintf(const struct lacp_port *, const char *, ...)
181 		    __attribute__((__format__(__printf__, 2, 3)));
182 #define	LACP_DPRINTF(a)	lacp_dprintf a
183 #else
184 #define LACP_DPRINTF(a) /* nothing */
185 #endif
186 
187 /*
188  * partner administration variables.
189  * XXX should be configurable.
190  */
191 
192 static const struct lacp_peerinfo lacp_partner_admin = {
193 	.lip_systemid = { .lsi_prio = 0xffff },
194 	.lip_portid = { .lpi_prio = 0xffff },
195 #if 1
196 	/* optimistic */
197 	.lip_state = LACP_STATE_SYNC | LACP_STATE_AGGREGATION |
198 	    LACP_STATE_COLLECTING | LACP_STATE_DISTRIBUTING,
199 #else
200 	/* pessimistic */
201 	.lip_state = 0,
202 #endif
203 };
204 
205 static const lacp_timer_func_t lacp_timer_funcs[LACP_NTIMER] = {
206 	[LACP_TIMER_CURRENT_WHILE] = lacp_sm_rx_timer,
207 	[LACP_TIMER_PERIODIC] = lacp_sm_ptx_timer,
208 	[LACP_TIMER_WAIT_WHILE] = lacp_sm_mux_timer,
209 };
210 
211 void
212 lacp_input(struct lagg_port *lgp, struct mbuf *m)
213 {
214 	struct lagg_softc *lgs = lgp->lp_lagg;
215 	struct lacp_softc *lsc = LACP_SOFTC(lgs);
216 	uint8_t subtype;
217 
218 	if (m->m_pkthdr.len < sizeof(struct ether_header) + sizeof(subtype)) {
219 		m_freem(m);
220 		return;
221 	}
222 
223 	m_copydata(m, sizeof(struct ether_header), sizeof(subtype), &subtype);
224 	switch (subtype) {
225 		case SLOWPROTOCOLS_SUBTYPE_LACP:
226 			IF_HANDOFF(&lsc->lsc_queue, m, NULL);
227 			taskqueue_enqueue(taskqueue_swi, &lsc->lsc_qtask);
228 			break;
229 
230 		case SLOWPROTOCOLS_SUBTYPE_MARKER:
231 			lacp_marker_input(lgp, m);
232 			break;
233 
234 		default:
235 			/* Unknown LACP packet type */
236 			m_freem(m);
237 			break;
238 	}
239 }
240 
241 static void
242 lacp_dequeue(void *arg, int pending)
243 {
244 	struct lacp_softc *lsc = (struct lacp_softc *)arg;
245 	struct lagg_softc *sc = lsc->lsc_lagg;
246 	struct lagg_port *lgp;
247 	struct mbuf *m;
248 
249 	LAGG_WLOCK(sc);
250 	for (;;) {
251 		IF_DEQUEUE(&lsc->lsc_queue, m);
252 		if (m == NULL)
253 			break;
254 		lgp = m->m_pkthdr.rcvif->if_lagg;
255 		lacp_pdu_input(lgp, m);
256 	}
257 	LAGG_WUNLOCK(sc);
258 }
259 
260 /*
261  * lacp_pdu_input: process lacpdu
262  */
263 static int
264 lacp_pdu_input(struct lagg_port *lgp, struct mbuf *m)
265 {
266 	struct lacp_port *lp = LACP_PORT(lgp);
267 	struct lacpdu *du;
268 	int error = 0;
269 
270 	LAGG_WLOCK_ASSERT(lgp->lp_lagg);
271 
272 	if (__predict_false(lp->lp_flags & LACP_PORT_DETACHING)) {
273 		goto bad;
274 	}
275 
276 	if (m->m_pkthdr.len != sizeof(*du)) {
277 		goto bad;
278 	}
279 
280 	if ((m->m_flags & M_MCAST) == 0) {
281 		goto bad;
282 	}
283 
284 	if (m->m_len < sizeof(*du)) {
285 		m = m_pullup(m, sizeof(*du));
286 		if (m == NULL) {
287 			return (ENOMEM);
288 		}
289 	}
290 
291 	du = mtod(m, struct lacpdu *);
292 
293 	if (memcmp(&du->ldu_eh.ether_dhost,
294 	    &ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN)) {
295 		goto bad;
296 	}
297 
298 	/* XXX
299 	KASSERT(du->ldu_sph.sph_subtype == SLOWPROTOCOLS_SUBTYPE_LACP,
300 	    ("a very bad kassert!"));
301 	*/
302 
303 	/*
304 	 * ignore the version for compatibility with
305 	 * the future protocol revisions.
306 	 */
307 
308 #if 0
309 	if (du->ldu_sph.sph_version != 1) {
310 		goto bad;
311 	}
312 #endif
313 
314 	/*
315 	 * ignore tlv types for compatibility with
316 	 * the future protocol revisions.
317 	 */
318 
319 	if (tlv_check(du, sizeof(*du), &du->ldu_tlv_actor,
320 	    lacp_info_tlv_template, FALSE)) {
321 		goto bad;
322 	}
323 
324 #if defined(LACP_DEBUG)
325 	LACP_DPRINTF((lp, "lacpdu receive\n"));
326 	lacp_dump_lacpdu(du);
327 #endif /* defined(LACP_DEBUG) */
328 	lacp_sm_rx(lp, du);
329 
330 	m_freem(m);
331 
332 	return (error);
333 
334 bad:
335 	m_freem(m);
336 	return (EINVAL);
337 }
338 
339 static void
340 lacp_fill_actorinfo(struct lacp_port *lp, struct lacp_peerinfo *info)
341 {
342 	struct lagg_port *lgp = lp->lp_lagg;
343 	struct lagg_softc *lgs = lgp->lp_lagg;
344 
345 	info->lip_systemid.lsi_prio = htons(LACP_SYSTEM_PRIO);
346 	memcpy(&info->lip_systemid.lsi_mac,
347 	    IF_LLADDR(lgs->sc_ifp), ETHER_ADDR_LEN);
348 	info->lip_portid.lpi_prio = htons(LACP_PORT_PRIO);
349 	info->lip_portid.lpi_portno = htons(lp->lp_ifp->if_index);
350 	info->lip_state = lp->lp_state;
351 }
352 
353 static int
354 lacp_xmit_lacpdu(struct lacp_port *lp)
355 {
356 	struct lagg_port *lgp = lp->lp_lagg;
357 	struct mbuf *m;
358 	struct lacpdu *du;
359 	int error;
360 
361 	LAGG_WLOCK_ASSERT(lgp->lp_lagg);
362 
363 	m = m_gethdr(M_DONTWAIT, MT_DATA);
364 	if (m == NULL) {
365 		return (ENOMEM);
366 	}
367 	m->m_len = m->m_pkthdr.len = sizeof(*du);
368 
369 	du = mtod(m, struct lacpdu *);
370 	memset(du, 0, sizeof(*du));
371 
372 	memcpy(&du->ldu_eh.ether_dhost, ethermulticastaddr_slowprotocols,
373 	    ETHER_ADDR_LEN);
374 	memcpy(&du->ldu_eh.ether_shost, lgp->lp_lladdr, ETHER_ADDR_LEN);
375 	du->ldu_eh.ether_type = htons(ETHERTYPE_SLOW);
376 
377 	du->ldu_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_LACP;
378 	du->ldu_sph.sph_version = 1;
379 
380 	TLV_SET(&du->ldu_tlv_actor, LACP_TYPE_ACTORINFO, sizeof(du->ldu_actor));
381 	du->ldu_actor = lp->lp_actor;
382 
383 	TLV_SET(&du->ldu_tlv_partner, LACP_TYPE_PARTNERINFO,
384 	    sizeof(du->ldu_partner));
385 	du->ldu_partner = lp->lp_partner;
386 
387 	TLV_SET(&du->ldu_tlv_collector, LACP_TYPE_COLLECTORINFO,
388 	    sizeof(du->ldu_collector));
389 	du->ldu_collector.lci_maxdelay = 0;
390 
391 #if defined(LACP_DEBUG)
392 	LACP_DPRINTF((lp, "lacpdu transmit\n"));
393 	lacp_dump_lacpdu(du);
394 #endif /* defined(LACP_DEBUG) */
395 
396 	m->m_flags |= M_MCAST;
397 
398 	/*
399 	 * XXX should use higher priority queue.
400 	 * otherwise network congestion can break aggregation.
401 	 */
402 
403 	error = lagg_enqueue(lp->lp_ifp, m);
404 	return (error);
405 }
406 
407 void
408 lacp_linkstate(struct lagg_port *lgp)
409 {
410 	struct lacp_port *lp = LACP_PORT(lgp);
411 	struct ifnet *ifp = lgp->lp_ifp;
412 	struct ifmediareq ifmr;
413 	int error = 0;
414 	u_int media;
415 	uint8_t old_state;
416 	uint16_t old_key;
417 
418 	LAGG_WLOCK_ASSERT(lgp->lp_lagg);
419 
420 	bzero((char *)&ifmr, sizeof(ifmr));
421 	error = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (caddr_t)&ifmr);
422 	if (error != 0)
423 		return;
424 
425 	media = ifmr.ifm_active;
426 	LACP_DPRINTF((lp, "media changed 0x%x -> 0x%x, ether = %d, fdx = %d, "
427 	    "link = %d\n", lp->lp_media, media, IFM_TYPE(media) == IFM_ETHER,
428 	    (media & IFM_FDX) != 0, ifp->if_link_state == LINK_STATE_UP));
429 	old_state = lp->lp_state;
430 	old_key = lp->lp_key;
431 
432 	lp->lp_media = media;
433 	/*
434 	 * If the port is not an active full duplex Ethernet link then it can
435 	 * not be aggregated.
436 	 */
437 	if (IFM_TYPE(media) != IFM_ETHER || (media & IFM_FDX) == 0 ||
438 	    ifp->if_link_state != LINK_STATE_UP) {
439 		lacp_port_disable(lp);
440 	} else {
441 		lacp_port_enable(lp);
442 	}
443 	lp->lp_key = lacp_compose_key(lp);
444 
445 	if (old_state != lp->lp_state || old_key != lp->lp_key) {
446 		LACP_DPRINTF((lp, "-> UNSELECTED\n"));
447 		lp->lp_selected = LACP_UNSELECTED;
448 	}
449 }
450 
451 static void
452 lacp_tick(void *arg)
453 {
454 	struct lacp_softc *lsc = arg;
455 	struct lagg_softc *sc = lsc->lsc_lagg;
456 	struct lacp_port *lp;
457 
458 	LAGG_WLOCK(sc);
459 	LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) {
460 		if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0)
461 			continue;
462 
463 		lacp_run_timers(lp);
464 
465 		lacp_select(lp);
466 		lacp_sm_mux(lp);
467 		lacp_sm_tx(lp);
468 		lacp_sm_ptx_tx_schedule(lp);
469 	}
470 	LAGG_WUNLOCK(sc);
471 	callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc);
472 }
473 
474 int
475 lacp_port_create(struct lagg_port *lgp)
476 {
477 	struct lagg_softc *lgs = lgp->lp_lagg;
478 	struct lacp_softc *lsc = LACP_SOFTC(lgs);
479 	struct lacp_port *lp;
480 	struct ifnet *ifp = lgp->lp_ifp;
481 	struct sockaddr_dl sdl;
482 	struct ifmultiaddr *rifma = NULL;
483 	int error;
484 
485 	boolean_t active = TRUE; /* XXX should be configurable */
486 	boolean_t fast = FALSE; /* XXX should be configurable */
487 
488 	LAGG_WLOCK_ASSERT(lgs);
489 
490 	bzero((char *)&sdl, sizeof(sdl));
491 	sdl.sdl_len = sizeof(sdl);
492 	sdl.sdl_family = AF_LINK;
493 	sdl.sdl_index = ifp->if_index;
494 	sdl.sdl_type = IFT_ETHER;
495 	sdl.sdl_alen = ETHER_ADDR_LEN;
496 
497 	bcopy(&ethermulticastaddr_slowprotocols,
498 	    LLADDR(&sdl), ETHER_ADDR_LEN);
499 	error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma);
500 	if (error) {
501 		printf("%s: ADDMULTI failed on %s\n", __func__, lgp->lp_ifname);
502 		return (error);
503 	}
504 
505 	lp = malloc(sizeof(struct lacp_port),
506 	    M_DEVBUF, M_NOWAIT|M_ZERO);
507 	if (lp == NULL)
508 		return (ENOMEM);
509 
510 	lgp->lp_psc = (caddr_t)lp;
511 	lp->lp_ifp = ifp;
512 	lp->lp_lagg = lgp;
513 	lp->lp_lsc = lsc;
514 	lp->lp_ifma = rifma;
515 
516 	LIST_INSERT_HEAD(&lsc->lsc_ports, lp, lp_next);
517 
518 	lacp_fill_actorinfo(lp, &lp->lp_actor);
519 	lp->lp_state =
520 	    (active ? LACP_STATE_ACTIVITY : 0) |
521 	    (fast ? LACP_STATE_TIMEOUT : 0);
522 	lp->lp_aggregator = NULL;
523 	lacp_linkstate(lgp);
524 	lacp_sm_rx_set_expired(lp);
525 
526 	return (0);
527 }
528 
529 void
530 lacp_port_destroy(struct lagg_port *lgp)
531 {
532 	struct lacp_port *lp = LACP_PORT(lgp);
533 	int i;
534 
535 	LAGG_WLOCK_ASSERT(lgp->lp_lagg);
536 
537 	for (i = 0; i < LACP_NTIMER; i++) {
538 		LACP_TIMER_DISARM(lp, i);
539 	}
540 
541 	lacp_disable_collecting(lp);
542 	lacp_disable_distributing(lp);
543 	lacp_unselect(lp);
544 	lgp->lp_flags &= ~LAGG_PORT_DISABLED;
545 
546 	/* The address may have already been removed by if_purgemaddrs() */
547 	if (!lgp->lp_detaching)
548 		if_delmulti_ifma(lp->lp_ifma);
549 
550 	LIST_REMOVE(lp, lp_next);
551 	free(lp, M_DEVBUF);
552 }
553 
554 int
555 lacp_port_isactive(struct lagg_port *lgp)
556 {
557 	struct lacp_port *lp = LACP_PORT(lgp);
558 	struct lacp_softc *lsc = lp->lp_lsc;
559 	struct lacp_aggregator *la = lp->lp_aggregator;
560 
561 	/* This port is joined to the active aggregator */
562 	if (la != NULL && la == lsc->lsc_active_aggregator)
563 		return (1);
564 
565 	return (0);
566 }
567 
568 static void
569 lacp_disable_collecting(struct lacp_port *lp)
570 {
571 	struct lagg_port *lgp = lp->lp_lagg;
572 
573 	LACP_DPRINTF((lp, "collecting disabled\n"));
574 
575 	lp->lp_state &= ~LACP_STATE_COLLECTING;
576 	lgp->lp_flags &= ~LAGG_PORT_COLLECTING;
577 }
578 
579 static void
580 lacp_enable_collecting(struct lacp_port *lp)
581 {
582 	struct lagg_port *lgp = lp->lp_lagg;
583 
584 	LACP_DPRINTF((lp, "collecting enabled\n"));
585 
586 	lp->lp_state |= LACP_STATE_COLLECTING;
587 	lgp->lp_flags |= LAGG_PORT_COLLECTING;
588 }
589 
590 static void
591 lacp_disable_distributing(struct lacp_port *lp)
592 {
593 	struct lacp_aggregator *la = lp->lp_aggregator;
594 	struct lacp_softc *lsc = lp->lp_lsc;
595 	struct lagg_port *lgp = lp->lp_lagg;
596 #if defined(LACP_DEBUG)
597 	char buf[LACP_LAGIDSTR_MAX+1];
598 #endif /* defined(LACP_DEBUG) */
599 
600 	LAGG_WLOCK_ASSERT(lgp->lp_lagg);
601 
602 	if (la == NULL || (lp->lp_state & LACP_STATE_DISTRIBUTING) == 0) {
603 		return;
604 	}
605 
606 	KASSERT(!TAILQ_EMPTY(&la->la_ports), ("no aggregator ports"));
607 	KASSERT(la->la_nports > 0, ("nports invalid (%d)", la->la_nports));
608 	KASSERT(la->la_refcnt >= la->la_nports, ("aggregator refcnt invalid"));
609 
610 	LACP_DPRINTF((lp, "disable distributing on aggregator %s, "
611 	    "nports %d -> %d\n",
612 	    lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
613 	    la->la_nports, la->la_nports - 1));
614 
615 	TAILQ_REMOVE(&la->la_ports, lp, lp_dist_q);
616 	la->la_nports--;
617 
618 	lacp_suppress_distributing(lsc, la);
619 
620 	lp->lp_state &= ~LACP_STATE_DISTRIBUTING;
621 	lgp->lp_flags &= ~LAGG_PORT_DISTRIBUTING;
622 
623 	if (lsc->lsc_active_aggregator == la) {
624 		lacp_select_active_aggregator(lsc);
625 	}
626 }
627 
628 static void
629 lacp_enable_distributing(struct lacp_port *lp)
630 {
631 	struct lacp_aggregator *la = lp->lp_aggregator;
632 	struct lacp_softc *lsc = lp->lp_lsc;
633 	struct lagg_port *lgp = lp->lp_lagg;
634 #if defined(LACP_DEBUG)
635 	char buf[LACP_LAGIDSTR_MAX+1];
636 #endif /* defined(LACP_DEBUG) */
637 
638 	LAGG_WLOCK_ASSERT(lgp->lp_lagg);
639 
640 	if ((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0) {
641 		return;
642 	}
643 
644 	LACP_DPRINTF((lp, "enable distributing on aggregator %s, "
645 	    "nports %d -> %d\n",
646 	    lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
647 	    la->la_nports, la->la_nports + 1));
648 
649 	KASSERT(la->la_refcnt > la->la_nports, ("aggregator refcnt invalid"));
650 	TAILQ_INSERT_HEAD(&la->la_ports, lp, lp_dist_q);
651 	la->la_nports++;
652 
653 	lacp_suppress_distributing(lsc, la);
654 
655 	lp->lp_state |= LACP_STATE_DISTRIBUTING;
656 	lgp->lp_flags |= LAGG_PORT_DISTRIBUTING;
657 
658 	if (lsc->lsc_active_aggregator != la) {
659 		lacp_select_active_aggregator(lsc);
660 	}
661 }
662 
663 static void
664 lacp_transit_expire(void *vp)
665 {
666 	struct lacp_softc *lsc = vp;
667 
668 	LACP_DPRINTF((NULL, "%s\n", __func__));
669 	lsc->lsc_suppress_distributing = FALSE;
670 }
671 
672 int
673 lacp_attach(struct lagg_softc *lgs)
674 {
675 	struct lacp_softc *lsc;
676 
677 	LAGG_WLOCK_ASSERT(lgs);
678 
679 	lsc = malloc(sizeof(struct lacp_softc),
680 	    M_DEVBUF, M_NOWAIT|M_ZERO);
681 	if (lsc == NULL)
682 		return (ENOMEM);
683 
684 	lgs->sc_psc = (caddr_t)lsc;
685 	lsc->lsc_lagg = lgs;
686 
687 	lsc->lsc_hashkey = arc4random();
688 	lsc->lsc_active_aggregator = NULL;
689 	TAILQ_INIT(&lsc->lsc_aggregators);
690 	LIST_INIT(&lsc->lsc_ports);
691 
692 	TASK_INIT(&lsc->lsc_qtask, 0, lacp_dequeue, lsc);
693 	mtx_init(&lsc->lsc_queue.ifq_mtx, "lacp queue", NULL, MTX_DEF);
694 	lsc->lsc_queue.ifq_maxlen = ifqmaxlen;
695 
696 	callout_init(&lsc->lsc_transit_callout, CALLOUT_MPSAFE);
697 	callout_init(&lsc->lsc_callout, CALLOUT_MPSAFE);
698 
699 	/* if the lagg is already up then do the same */
700 	if (lgs->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
701 		lacp_init(lgs);
702 
703 	return (0);
704 }
705 
706 int
707 lacp_detach(struct lagg_softc *lgs)
708 {
709 	struct lacp_softc *lsc = LACP_SOFTC(lgs);
710 
711 	KASSERT(TAILQ_EMPTY(&lsc->lsc_aggregators),
712 	    ("aggregators still active"));
713 	KASSERT(lsc->lsc_active_aggregator == NULL,
714 	    ("aggregator still attached"));
715 
716 	lgs->sc_psc = NULL;
717 	callout_drain(&lsc->lsc_transit_callout);
718 	callout_drain(&lsc->lsc_callout);
719 	taskqueue_drain(taskqueue_swi, &lsc->lsc_qtask);
720 	IF_DRAIN(&lsc->lsc_queue);
721 	mtx_destroy(&lsc->lsc_queue.ifq_mtx);
722 
723 	free(lsc, M_DEVBUF);
724 	return (0);
725 }
726 
727 void
728 lacp_init(struct lagg_softc *lgs)
729 {
730 	struct lacp_softc *lsc = LACP_SOFTC(lgs);
731 
732 	callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc);
733 }
734 
735 void
736 lacp_stop(struct lagg_softc *lgs)
737 {
738 	struct lacp_softc *lsc = LACP_SOFTC(lgs);
739 
740 	callout_stop(&lsc->lsc_transit_callout);
741 	callout_stop(&lsc->lsc_callout);
742 }
743 
744 struct lagg_port *
745 lacp_select_tx_port(struct lagg_softc *lgs, struct mbuf *m)
746 {
747 	struct lacp_softc *lsc = LACP_SOFTC(lgs);
748 	struct lacp_aggregator *la;
749 	struct lacp_port *lp;
750 	uint32_t hash;
751 	int nports;
752 
753 	LAGG_RLOCK_ASSERT(lgs);
754 
755 	if (__predict_false(lsc->lsc_suppress_distributing)) {
756 		LACP_DPRINTF((NULL, "%s: waiting transit\n", __func__));
757 		return (NULL);
758 	}
759 
760 	la = lsc->lsc_active_aggregator;
761 	if (__predict_false(la == NULL)) {
762 		LACP_DPRINTF((NULL, "%s: no active aggregator\n", __func__));
763 		return (NULL);
764 	}
765 
766 	nports = la->la_nports;
767 	KASSERT(nports > 0, ("no ports available"));
768 
769 	hash = lagg_hashmbuf(m, lsc->lsc_hashkey);
770 	hash %= nports;
771 	lp = TAILQ_FIRST(&la->la_ports);
772 	while (hash--) {
773 		lp = TAILQ_NEXT(lp, lp_dist_q);
774 	}
775 
776 	KASSERT((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0,
777 	    ("aggregated port is not distributing"));
778 
779 	return (lp->lp_lagg);
780 }
781 /*
782  * lacp_suppress_distributing: drop transmit packets for a while
783  * to preserve packet ordering.
784  */
785 
786 static void
787 lacp_suppress_distributing(struct lacp_softc *lsc, struct lacp_aggregator *la)
788 {
789 	if (lsc->lsc_active_aggregator != la) {
790 		return;
791 	}
792 
793 	LACP_DPRINTF((NULL, "%s\n", __func__));
794 	lsc->lsc_suppress_distributing = TRUE;
795 	/* XXX should consider collector max delay */
796 	callout_reset(&lsc->lsc_transit_callout,
797 	    LACP_TRANSIT_DELAY * hz / 1000, lacp_transit_expire, lsc);
798 }
799 
800 static int
801 lacp_compare_peerinfo(const struct lacp_peerinfo *a,
802     const struct lacp_peerinfo *b)
803 {
804 	return (memcmp(a, b, offsetof(struct lacp_peerinfo, lip_state)));
805 }
806 
807 static int
808 lacp_compare_systemid(const struct lacp_systemid *a,
809     const struct lacp_systemid *b)
810 {
811 	return (memcmp(a, b, sizeof(*a)));
812 }
813 
814 #if 0	/* unused */
815 static int
816 lacp_compare_portid(const struct lacp_portid *a,
817     const struct lacp_portid *b)
818 {
819 	return (memcmp(a, b, sizeof(*a)));
820 }
821 #endif
822 
823 static uint64_t
824 lacp_aggregator_bandwidth(struct lacp_aggregator *la)
825 {
826 	struct lacp_port *lp;
827 	uint64_t speed;
828 
829 	lp = TAILQ_FIRST(&la->la_ports);
830 	if (lp == NULL) {
831 		return (0);
832 	}
833 
834 	speed = ifmedia_baudrate(lp->lp_media);
835 	speed *= la->la_nports;
836 	if (speed == 0) {
837 		LACP_DPRINTF((lp, "speed 0? media=0x%x nports=%d\n",
838 		    lp->lp_media, la->la_nports));
839 	}
840 
841 	return (speed);
842 }
843 
844 /*
845  * lacp_select_active_aggregator: select an aggregator to be used to transmit
846  * packets from lagg(4) interface.
847  */
848 
849 static void
850 lacp_select_active_aggregator(struct lacp_softc *lsc)
851 {
852 	struct lacp_aggregator *la;
853 	struct lacp_aggregator *best_la = NULL;
854 	uint64_t best_speed = 0;
855 #if defined(LACP_DEBUG)
856 	char buf[LACP_LAGIDSTR_MAX+1];
857 #endif /* defined(LACP_DEBUG) */
858 
859 	LACP_DPRINTF((NULL, "%s:\n", __func__));
860 
861 	TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) {
862 		uint64_t speed;
863 
864 		if (la->la_nports == 0) {
865 			continue;
866 		}
867 
868 		speed = lacp_aggregator_bandwidth(la);
869 		LACP_DPRINTF((NULL, "%s, speed=%jd, nports=%d\n",
870 		    lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
871 		    speed, la->la_nports));
872 		if (speed > best_speed ||
873 		    (speed == best_speed &&
874 		    la == lsc->lsc_active_aggregator)) {
875 			best_la = la;
876 			best_speed = speed;
877 		}
878 	}
879 
880 	KASSERT(best_la == NULL || best_la->la_nports > 0,
881 	    ("invalid aggregator refcnt"));
882 	KASSERT(best_la == NULL || !TAILQ_EMPTY(&best_la->la_ports),
883 	    ("invalid aggregator list"));
884 
885 #if defined(LACP_DEBUG)
886 	if (lsc->lsc_active_aggregator != best_la) {
887 		LACP_DPRINTF((NULL, "active aggregator changed\n"));
888 		LACP_DPRINTF((NULL, "old %s\n",
889 		    lacp_format_lagid_aggregator(lsc->lsc_active_aggregator,
890 		    buf, sizeof(buf))));
891 	} else {
892 		LACP_DPRINTF((NULL, "active aggregator not changed\n"));
893 	}
894 	LACP_DPRINTF((NULL, "new %s\n",
895 	    lacp_format_lagid_aggregator(best_la, buf, sizeof(buf))));
896 #endif /* defined(LACP_DEBUG) */
897 
898 	if (lsc->lsc_active_aggregator != best_la) {
899 		lsc->lsc_active_aggregator = best_la;
900 		if (best_la) {
901 			lacp_suppress_distributing(lsc, best_la);
902 		}
903 	}
904 }
905 
906 static uint16_t
907 lacp_compose_key(struct lacp_port *lp)
908 {
909 	struct lagg_port *lgp = lp->lp_lagg;
910 	struct lagg_softc *lgs = lgp->lp_lagg;
911 	u_int media = lp->lp_media;
912 	uint16_t key;
913 
914 	if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0) {
915 
916 		/*
917 		 * non-aggregatable links should have unique keys.
918 		 *
919 		 * XXX this isn't really unique as if_index is 16 bit.
920 		 */
921 
922 		/* bit 0..14:	(some bits of) if_index of this port */
923 		key = lp->lp_ifp->if_index;
924 		/* bit 15:	1 */
925 		key |= 0x8000;
926 	} else {
927 		u_int subtype = IFM_SUBTYPE(media);
928 
929 		KASSERT(IFM_TYPE(media) == IFM_ETHER, ("invalid media type"));
930 		KASSERT((media & IFM_FDX) != 0, ("aggregating HDX interface"));
931 
932 		/* bit 0..4:	IFM_SUBTYPE */
933 		key = subtype;
934 		/* bit 5..14:	(some bits of) if_index of lagg device */
935 		key |= 0x7fe0 & ((lgs->sc_ifp->if_index) << 5);
936 		/* bit 15:	0 */
937 	}
938 	return (htons(key));
939 }
940 
941 static void
942 lacp_aggregator_addref(struct lacp_softc *lsc, struct lacp_aggregator *la)
943 {
944 #if defined(LACP_DEBUG)
945 	char buf[LACP_LAGIDSTR_MAX+1];
946 #endif
947 
948 	LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n",
949 	    __func__,
950 	    lacp_format_lagid(&la->la_actor, &la->la_partner,
951 	    buf, sizeof(buf)),
952 	    la->la_refcnt, la->la_refcnt + 1));
953 
954 	KASSERT(la->la_refcnt > 0, ("refcount <= 0"));
955 	la->la_refcnt++;
956 	KASSERT(la->la_refcnt > la->la_nports, ("invalid refcount"));
957 }
958 
959 static void
960 lacp_aggregator_delref(struct lacp_softc *lsc, struct lacp_aggregator *la)
961 {
962 #if defined(LACP_DEBUG)
963 	char buf[LACP_LAGIDSTR_MAX+1];
964 #endif
965 
966 	LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n",
967 	    __func__,
968 	    lacp_format_lagid(&la->la_actor, &la->la_partner,
969 	    buf, sizeof(buf)),
970 	    la->la_refcnt, la->la_refcnt - 1));
971 
972 	KASSERT(la->la_refcnt > la->la_nports, ("invalid refcnt"));
973 	la->la_refcnt--;
974 	if (la->la_refcnt > 0) {
975 		return;
976 	}
977 
978 	KASSERT(la->la_refcnt == 0, ("refcount not zero"));
979 	KASSERT(lsc->lsc_active_aggregator != la, ("aggregator active"));
980 
981 	TAILQ_REMOVE(&lsc->lsc_aggregators, la, la_q);
982 
983 	free(la, M_DEVBUF);
984 }
985 
986 /*
987  * lacp_aggregator_get: allocate an aggregator.
988  */
989 
990 static struct lacp_aggregator *
991 lacp_aggregator_get(struct lacp_softc *lsc, struct lacp_port *lp)
992 {
993 	struct lacp_aggregator *la;
994 
995 	la = malloc(sizeof(*la), M_DEVBUF, M_NOWAIT);
996 	if (la) {
997 		la->la_refcnt = 1;
998 		la->la_nports = 0;
999 		TAILQ_INIT(&la->la_ports);
1000 		la->la_pending = 0;
1001 		TAILQ_INSERT_TAIL(&lsc->lsc_aggregators, la, la_q);
1002 	}
1003 
1004 	return (la);
1005 }
1006 
1007 /*
1008  * lacp_fill_aggregator_id: setup a newly allocated aggregator from a port.
1009  */
1010 
1011 static void
1012 lacp_fill_aggregator_id(struct lacp_aggregator *la, const struct lacp_port *lp)
1013 {
1014 	lacp_fill_aggregator_id_peer(&la->la_partner, &lp->lp_partner);
1015 	lacp_fill_aggregator_id_peer(&la->la_actor, &lp->lp_actor);
1016 
1017 	la->la_actor.lip_state = lp->lp_state & LACP_STATE_AGGREGATION;
1018 }
1019 
1020 static void
1021 lacp_fill_aggregator_id_peer(struct lacp_peerinfo *lpi_aggr,
1022     const struct lacp_peerinfo *lpi_port)
1023 {
1024 	memset(lpi_aggr, 0, sizeof(*lpi_aggr));
1025 	lpi_aggr->lip_systemid = lpi_port->lip_systemid;
1026 	lpi_aggr->lip_key = lpi_port->lip_key;
1027 }
1028 
1029 /*
1030  * lacp_aggregator_is_compatible: check if a port can join to an aggregator.
1031  */
1032 
1033 static int
1034 lacp_aggregator_is_compatible(const struct lacp_aggregator *la,
1035     const struct lacp_port *lp)
1036 {
1037 	if (!(lp->lp_state & LACP_STATE_AGGREGATION) ||
1038 	    !(lp->lp_partner.lip_state & LACP_STATE_AGGREGATION)) {
1039 		return (0);
1040 	}
1041 
1042 	if (!(la->la_actor.lip_state & LACP_STATE_AGGREGATION)) {
1043 		return (0);
1044 	}
1045 
1046 	if (!lacp_peerinfo_is_compatible(&la->la_partner, &lp->lp_partner)) {
1047 		return (0);
1048 	}
1049 
1050 	if (!lacp_peerinfo_is_compatible(&la->la_actor, &lp->lp_actor)) {
1051 		return (0);
1052 	}
1053 
1054 	return (1);
1055 }
1056 
1057 static int
1058 lacp_peerinfo_is_compatible(const struct lacp_peerinfo *a,
1059     const struct lacp_peerinfo *b)
1060 {
1061 	if (memcmp(&a->lip_systemid, &b->lip_systemid,
1062 	    sizeof(a->lip_systemid))) {
1063 		return (0);
1064 	}
1065 
1066 	if (memcmp(&a->lip_key, &b->lip_key, sizeof(a->lip_key))) {
1067 		return (0);
1068 	}
1069 
1070 	return (1);
1071 }
1072 
1073 static void
1074 lacp_port_enable(struct lacp_port *lp)
1075 {
1076 	struct lagg_port *lgp = lp->lp_lagg;
1077 
1078 	lp->lp_state |= LACP_STATE_AGGREGATION;
1079 	lgp->lp_flags &= ~LAGG_PORT_DISABLED;
1080 }
1081 
1082 static void
1083 lacp_port_disable(struct lacp_port *lp)
1084 {
1085 	struct lagg_port *lgp = lp->lp_lagg;
1086 
1087 	lacp_set_mux(lp, LACP_MUX_DETACHED);
1088 
1089 	lp->lp_state &= ~LACP_STATE_AGGREGATION;
1090 	lp->lp_selected = LACP_UNSELECTED;
1091 	lacp_sm_rx_record_default(lp);
1092 	lp->lp_partner.lip_state &= ~LACP_STATE_AGGREGATION;
1093 	lp->lp_state &= ~LACP_STATE_EXPIRED;
1094 	lgp->lp_flags |= LAGG_PORT_DISABLED;
1095 }
1096 
1097 /*
1098  * lacp_select: select an aggregator.  create one if necessary.
1099  */
1100 static void
1101 lacp_select(struct lacp_port *lp)
1102 {
1103 	struct lacp_softc *lsc = lp->lp_lsc;
1104 	struct lacp_aggregator *la;
1105 #if defined(LACP_DEBUG)
1106 	char buf[LACP_LAGIDSTR_MAX+1];
1107 #endif
1108 
1109 	if (lp->lp_aggregator) {
1110 		return;
1111 	}
1112 
1113 	KASSERT(!LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE),
1114 	    ("timer_wait_while still active"));
1115 
1116 	LACP_DPRINTF((lp, "port lagid=%s\n",
1117 	    lacp_format_lagid(&lp->lp_actor, &lp->lp_partner,
1118 	    buf, sizeof(buf))));
1119 
1120 	TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) {
1121 		if (lacp_aggregator_is_compatible(la, lp)) {
1122 			break;
1123 		}
1124 	}
1125 
1126 	if (la == NULL) {
1127 		la = lacp_aggregator_get(lsc, lp);
1128 		if (la == NULL) {
1129 			LACP_DPRINTF((lp, "aggregator creation failed\n"));
1130 
1131 			/*
1132 			 * will retry on the next tick.
1133 			 */
1134 
1135 			return;
1136 		}
1137 		lacp_fill_aggregator_id(la, lp);
1138 		LACP_DPRINTF((lp, "aggregator created\n"));
1139 	} else {
1140 		LACP_DPRINTF((lp, "compatible aggregator found\n"));
1141 		lacp_aggregator_addref(lsc, la);
1142 	}
1143 
1144 	LACP_DPRINTF((lp, "aggregator lagid=%s\n",
1145 	    lacp_format_lagid(&la->la_actor, &la->la_partner,
1146 	    buf, sizeof(buf))));
1147 
1148 	lp->lp_aggregator = la;
1149 	lp->lp_selected = LACP_SELECTED;
1150 }
1151 
1152 /*
1153  * lacp_unselect: finish unselect/detach process.
1154  */
1155 
1156 static void
1157 lacp_unselect(struct lacp_port *lp)
1158 {
1159 	struct lacp_softc *lsc = lp->lp_lsc;
1160 	struct lacp_aggregator *la = lp->lp_aggregator;
1161 
1162 	KASSERT(!LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE),
1163 	    ("timer_wait_while still active"));
1164 
1165 	if (la == NULL) {
1166 		return;
1167 	}
1168 
1169 	lp->lp_aggregator = NULL;
1170 	lacp_aggregator_delref(lsc, la);
1171 }
1172 
1173 /* mux machine */
1174 
1175 static void
1176 lacp_sm_mux(struct lacp_port *lp)
1177 {
1178 	enum lacp_mux_state new_state;
1179 	boolean_t p_sync =
1180 		    (lp->lp_partner.lip_state & LACP_STATE_SYNC) != 0;
1181 	boolean_t p_collecting =
1182 	    (lp->lp_partner.lip_state & LACP_STATE_COLLECTING) != 0;
1183 	enum lacp_selected selected = lp->lp_selected;
1184 	struct lacp_aggregator *la;
1185 
1186 	/* LACP_DPRINTF((lp, "%s: state %d\n", __func__, lp->lp_mux_state)); */
1187 
1188 re_eval:
1189 	la = lp->lp_aggregator;
1190 	KASSERT(lp->lp_mux_state == LACP_MUX_DETACHED || la != NULL,
1191 	    ("MUX not detached"));
1192 	new_state = lp->lp_mux_state;
1193 	switch (lp->lp_mux_state) {
1194 	case LACP_MUX_DETACHED:
1195 		if (selected != LACP_UNSELECTED) {
1196 			new_state = LACP_MUX_WAITING;
1197 		}
1198 		break;
1199 	case LACP_MUX_WAITING:
1200 		KASSERT(la->la_pending > 0 ||
1201 		    !LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE),
1202 		    ("timer_wait_while still active"));
1203 		if (selected == LACP_SELECTED && la->la_pending == 0) {
1204 			new_state = LACP_MUX_ATTACHED;
1205 		} else if (selected == LACP_UNSELECTED) {
1206 			new_state = LACP_MUX_DETACHED;
1207 		}
1208 		break;
1209 	case LACP_MUX_ATTACHED:
1210 		if (selected == LACP_SELECTED && p_sync) {
1211 			new_state = LACP_MUX_COLLECTING;
1212 		} else if (selected != LACP_SELECTED) {
1213 			new_state = LACP_MUX_DETACHED;
1214 		}
1215 		break;
1216 	case LACP_MUX_COLLECTING:
1217 		if (selected == LACP_SELECTED && p_sync && p_collecting) {
1218 			new_state = LACP_MUX_DISTRIBUTING;
1219 		} else if (selected != LACP_SELECTED || !p_sync) {
1220 			new_state = LACP_MUX_ATTACHED;
1221 		}
1222 		break;
1223 	case LACP_MUX_DISTRIBUTING:
1224 		if (selected != LACP_SELECTED || !p_sync || !p_collecting) {
1225 			new_state = LACP_MUX_COLLECTING;
1226 		}
1227 		break;
1228 	default:
1229 		panic("%s: unknown state", __func__);
1230 	}
1231 
1232 	if (lp->lp_mux_state == new_state) {
1233 		return;
1234 	}
1235 
1236 	lacp_set_mux(lp, new_state);
1237 	goto re_eval;
1238 }
1239 
1240 static void
1241 lacp_set_mux(struct lacp_port *lp, enum lacp_mux_state new_state)
1242 {
1243 	struct lacp_aggregator *la = lp->lp_aggregator;
1244 
1245 	if (lp->lp_mux_state == new_state) {
1246 		return;
1247 	}
1248 
1249 	switch (new_state) {
1250 	case LACP_MUX_DETACHED:
1251 		lp->lp_state &= ~LACP_STATE_SYNC;
1252 		lacp_disable_distributing(lp);
1253 		lacp_disable_collecting(lp);
1254 		lacp_sm_assert_ntt(lp);
1255 		/* cancel timer */
1256 		if (LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE)) {
1257 			KASSERT(la->la_pending > 0,
1258 			    ("timer_wait_while not active"));
1259 			la->la_pending--;
1260 		}
1261 		LACP_TIMER_DISARM(lp, LACP_TIMER_WAIT_WHILE);
1262 		lacp_unselect(lp);
1263 		break;
1264 	case LACP_MUX_WAITING:
1265 		LACP_TIMER_ARM(lp, LACP_TIMER_WAIT_WHILE,
1266 		    LACP_AGGREGATE_WAIT_TIME);
1267 		la->la_pending++;
1268 		break;
1269 	case LACP_MUX_ATTACHED:
1270 		lp->lp_state |= LACP_STATE_SYNC;
1271 		lacp_disable_collecting(lp);
1272 		lacp_sm_assert_ntt(lp);
1273 		break;
1274 	case LACP_MUX_COLLECTING:
1275 		lacp_enable_collecting(lp);
1276 		lacp_disable_distributing(lp);
1277 		lacp_sm_assert_ntt(lp);
1278 		break;
1279 	case LACP_MUX_DISTRIBUTING:
1280 		lacp_enable_distributing(lp);
1281 		break;
1282 	default:
1283 		panic("%s: unknown state", __func__);
1284 	}
1285 
1286 	LACP_DPRINTF((lp, "mux_state %d -> %d\n", lp->lp_mux_state, new_state));
1287 
1288 	lp->lp_mux_state = new_state;
1289 }
1290 
1291 static void
1292 lacp_sm_mux_timer(struct lacp_port *lp)
1293 {
1294 	struct lacp_aggregator *la = lp->lp_aggregator;
1295 #if defined(LACP_DEBUG)
1296 	char buf[LACP_LAGIDSTR_MAX+1];
1297 #endif
1298 
1299 	KASSERT(la->la_pending > 0, ("no pending event"));
1300 
1301 	LACP_DPRINTF((lp, "%s: aggregator %s, pending %d -> %d\n", __func__,
1302 	    lacp_format_lagid(&la->la_actor, &la->la_partner,
1303 	    buf, sizeof(buf)),
1304 	    la->la_pending, la->la_pending - 1));
1305 
1306 	la->la_pending--;
1307 }
1308 
1309 /* periodic transmit machine */
1310 
1311 static void
1312 lacp_sm_ptx_update_timeout(struct lacp_port *lp, uint8_t oldpstate)
1313 {
1314 	if (LACP_STATE_EQ(oldpstate, lp->lp_partner.lip_state,
1315 	    LACP_STATE_TIMEOUT)) {
1316 		return;
1317 	}
1318 
1319 	LACP_DPRINTF((lp, "partner timeout changed\n"));
1320 
1321 	/*
1322 	 * FAST_PERIODIC -> SLOW_PERIODIC
1323 	 * or
1324 	 * SLOW_PERIODIC (-> PERIODIC_TX) -> FAST_PERIODIC
1325 	 *
1326 	 * let lacp_sm_ptx_tx_schedule to update timeout.
1327 	 */
1328 
1329 	LACP_TIMER_DISARM(lp, LACP_TIMER_PERIODIC);
1330 
1331 	/*
1332 	 * if timeout has been shortened, assert NTT.
1333 	 */
1334 
1335 	if ((lp->lp_partner.lip_state & LACP_STATE_TIMEOUT)) {
1336 		lacp_sm_assert_ntt(lp);
1337 	}
1338 }
1339 
1340 static void
1341 lacp_sm_ptx_tx_schedule(struct lacp_port *lp)
1342 {
1343 	int timeout;
1344 
1345 	if (!(lp->lp_state & LACP_STATE_ACTIVITY) &&
1346 	    !(lp->lp_partner.lip_state & LACP_STATE_ACTIVITY)) {
1347 
1348 		/*
1349 		 * NO_PERIODIC
1350 		 */
1351 
1352 		LACP_TIMER_DISARM(lp, LACP_TIMER_PERIODIC);
1353 		return;
1354 	}
1355 
1356 	if (LACP_TIMER_ISARMED(lp, LACP_TIMER_PERIODIC)) {
1357 		return;
1358 	}
1359 
1360 	timeout = (lp->lp_partner.lip_state & LACP_STATE_TIMEOUT) ?
1361 	    LACP_FAST_PERIODIC_TIME : LACP_SLOW_PERIODIC_TIME;
1362 
1363 	LACP_TIMER_ARM(lp, LACP_TIMER_PERIODIC, timeout);
1364 }
1365 
1366 static void
1367 lacp_sm_ptx_timer(struct lacp_port *lp)
1368 {
1369 	lacp_sm_assert_ntt(lp);
1370 }
1371 
1372 static void
1373 lacp_sm_rx(struct lacp_port *lp, const struct lacpdu *du)
1374 {
1375 	int timeout;
1376 
1377 	/*
1378 	 * check LACP_DISABLED first
1379 	 */
1380 
1381 	if (!(lp->lp_state & LACP_STATE_AGGREGATION)) {
1382 		return;
1383 	}
1384 
1385 	/*
1386 	 * check loopback condition.
1387 	 */
1388 
1389 	if (!lacp_compare_systemid(&du->ldu_actor.lip_systemid,
1390 	    &lp->lp_actor.lip_systemid)) {
1391 		return;
1392 	}
1393 
1394 	/*
1395 	 * EXPIRED, DEFAULTED, CURRENT -> CURRENT
1396 	 */
1397 
1398 	lacp_sm_rx_update_selected(lp, du);
1399 	lacp_sm_rx_update_ntt(lp, du);
1400 	lacp_sm_rx_record_pdu(lp, du);
1401 
1402 	timeout = (lp->lp_state & LACP_STATE_TIMEOUT) ?
1403 	    LACP_SHORT_TIMEOUT_TIME : LACP_LONG_TIMEOUT_TIME;
1404 	LACP_TIMER_ARM(lp, LACP_TIMER_CURRENT_WHILE, timeout);
1405 
1406 	lp->lp_state &= ~LACP_STATE_EXPIRED;
1407 
1408 	/*
1409 	 * kick transmit machine without waiting the next tick.
1410 	 */
1411 
1412 	lacp_sm_tx(lp);
1413 }
1414 
1415 static void
1416 lacp_sm_rx_set_expired(struct lacp_port *lp)
1417 {
1418 	lp->lp_partner.lip_state &= ~LACP_STATE_SYNC;
1419 	lp->lp_partner.lip_state |= LACP_STATE_TIMEOUT;
1420 	LACP_TIMER_ARM(lp, LACP_TIMER_CURRENT_WHILE, LACP_SHORT_TIMEOUT_TIME);
1421 	lp->lp_state |= LACP_STATE_EXPIRED;
1422 }
1423 
1424 static void
1425 lacp_sm_rx_timer(struct lacp_port *lp)
1426 {
1427 	if ((lp->lp_state & LACP_STATE_EXPIRED) == 0) {
1428 		/* CURRENT -> EXPIRED */
1429 		LACP_DPRINTF((lp, "%s: CURRENT -> EXPIRED\n", __func__));
1430 		lacp_sm_rx_set_expired(lp);
1431 	} else {
1432 		/* EXPIRED -> DEFAULTED */
1433 		LACP_DPRINTF((lp, "%s: EXPIRED -> DEFAULTED\n", __func__));
1434 		lacp_sm_rx_update_default_selected(lp);
1435 		lacp_sm_rx_record_default(lp);
1436 		lp->lp_state &= ~LACP_STATE_EXPIRED;
1437 	}
1438 }
1439 
1440 static void
1441 lacp_sm_rx_record_pdu(struct lacp_port *lp, const struct lacpdu *du)
1442 {
1443 	boolean_t active;
1444 	uint8_t oldpstate;
1445 #if defined(LACP_DEBUG)
1446 	char buf[LACP_STATESTR_MAX+1];
1447 #endif
1448 
1449 	/* LACP_DPRINTF((lp, "%s\n", __func__)); */
1450 
1451 	oldpstate = lp->lp_partner.lip_state;
1452 
1453 	active = (du->ldu_actor.lip_state & LACP_STATE_ACTIVITY)
1454 	    || ((lp->lp_state & LACP_STATE_ACTIVITY) &&
1455 	    (du->ldu_partner.lip_state & LACP_STATE_ACTIVITY));
1456 
1457 	lp->lp_partner = du->ldu_actor;
1458 	if (active &&
1459 	    ((LACP_STATE_EQ(lp->lp_state, du->ldu_partner.lip_state,
1460 	    LACP_STATE_AGGREGATION) &&
1461 	    !lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner))
1462 	    || (du->ldu_partner.lip_state & LACP_STATE_AGGREGATION) == 0)) {
1463 		/* XXX nothing? */
1464 	} else {
1465 		lp->lp_partner.lip_state &= ~LACP_STATE_SYNC;
1466 	}
1467 
1468 	lp->lp_state &= ~LACP_STATE_DEFAULTED;
1469 
1470 	if (oldpstate != lp->lp_partner.lip_state) {
1471 		LACP_DPRINTF((lp, "old pstate %s\n",
1472 		    lacp_format_state(oldpstate, buf, sizeof(buf))));
1473 		LACP_DPRINTF((lp, "new pstate %s\n",
1474 		    lacp_format_state(lp->lp_partner.lip_state, buf,
1475 		    sizeof(buf))));
1476 	}
1477 
1478 	lacp_sm_ptx_update_timeout(lp, oldpstate);
1479 }
1480 
1481 static void
1482 lacp_sm_rx_update_ntt(struct lacp_port *lp, const struct lacpdu *du)
1483 {
1484 	/* LACP_DPRINTF((lp, "%s\n", __func__)); */
1485 
1486 	if (lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner) ||
1487 	    !LACP_STATE_EQ(lp->lp_state, du->ldu_partner.lip_state,
1488 	    LACP_STATE_ACTIVITY | LACP_STATE_SYNC | LACP_STATE_AGGREGATION)) {
1489 		LACP_DPRINTF((lp, "%s: assert ntt\n", __func__));
1490 		lacp_sm_assert_ntt(lp);
1491 	}
1492 }
1493 
1494 static void
1495 lacp_sm_rx_record_default(struct lacp_port *lp)
1496 {
1497 	uint8_t oldpstate;
1498 
1499 	/* LACP_DPRINTF((lp, "%s\n", __func__)); */
1500 
1501 	oldpstate = lp->lp_partner.lip_state;
1502 	lp->lp_partner = lacp_partner_admin;
1503 	lp->lp_state |= LACP_STATE_DEFAULTED;
1504 	lacp_sm_ptx_update_timeout(lp, oldpstate);
1505 }
1506 
1507 static void
1508 lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *lp,
1509     const struct lacp_peerinfo *info)
1510 {
1511 	/* LACP_DPRINTF((lp, "%s\n", __func__)); */
1512 
1513 	if (lacp_compare_peerinfo(&lp->lp_partner, info) ||
1514 	    !LACP_STATE_EQ(lp->lp_partner.lip_state, info->lip_state,
1515 	    LACP_STATE_AGGREGATION)) {
1516 		lp->lp_selected = LACP_UNSELECTED;
1517 		/* mux machine will clean up lp->lp_aggregator */
1518 	}
1519 }
1520 
1521 static void
1522 lacp_sm_rx_update_selected(struct lacp_port *lp, const struct lacpdu *du)
1523 {
1524 	/* LACP_DPRINTF((lp, "%s\n", __func__)); */
1525 
1526 	lacp_sm_rx_update_selected_from_peerinfo(lp, &du->ldu_actor);
1527 }
1528 
1529 static void
1530 lacp_sm_rx_update_default_selected(struct lacp_port *lp)
1531 {
1532 	/* LACP_DPRINTF((lp, "%s\n", __func__)); */
1533 
1534 	lacp_sm_rx_update_selected_from_peerinfo(lp, &lacp_partner_admin);
1535 }
1536 
1537 /* transmit machine */
1538 
1539 static void
1540 lacp_sm_tx(struct lacp_port *lp)
1541 {
1542 	int error;
1543 
1544 	if (!(lp->lp_state & LACP_STATE_AGGREGATION)
1545 #if 1
1546 	    || (!(lp->lp_state & LACP_STATE_ACTIVITY)
1547 	    && !(lp->lp_partner.lip_state & LACP_STATE_ACTIVITY))
1548 #endif
1549 	    ) {
1550 		lp->lp_flags &= ~LACP_PORT_NTT;
1551 	}
1552 
1553 	if (!(lp->lp_flags & LACP_PORT_NTT)) {
1554 		return;
1555 	}
1556 
1557 	/* Rate limit to 3 PDUs per LACP_FAST_PERIODIC_TIME */
1558 	if (ppsratecheck(&lp->lp_last_lacpdu, &lp->lp_lacpdu_sent,
1559 		    (3 / LACP_FAST_PERIODIC_TIME)) == 0) {
1560 		LACP_DPRINTF((lp, "rate limited pdu\n"));
1561 		return;
1562 	}
1563 
1564 	error = lacp_xmit_lacpdu(lp);
1565 
1566 	if (error == 0) {
1567 		lp->lp_flags &= ~LACP_PORT_NTT;
1568 	} else {
1569 		LACP_DPRINTF((lp, "lacpdu transmit failure, error %d\n",
1570 		    error));
1571 	}
1572 }
1573 
1574 static void
1575 lacp_sm_assert_ntt(struct lacp_port *lp)
1576 {
1577 
1578 	lp->lp_flags |= LACP_PORT_NTT;
1579 }
1580 
1581 static void
1582 lacp_run_timers(struct lacp_port *lp)
1583 {
1584 	int i;
1585 
1586 	for (i = 0; i < LACP_NTIMER; i++) {
1587 		KASSERT(lp->lp_timer[i] >= 0,
1588 		    ("invalid timer value %d", lp->lp_timer[i]));
1589 		if (lp->lp_timer[i] == 0) {
1590 			continue;
1591 		} else if (--lp->lp_timer[i] <= 0) {
1592 			if (lacp_timer_funcs[i]) {
1593 				(*lacp_timer_funcs[i])(lp);
1594 			}
1595 		}
1596 	}
1597 }
1598 
1599 int
1600 lacp_marker_input(struct lagg_port *lgp, struct mbuf *m)
1601 {
1602 	struct lacp_port *lp = LACP_PORT(lgp);
1603 	struct markerdu *mdu;
1604 	int error = 0;
1605 
1606 	LAGG_RLOCK_ASSERT(lgp->lp_lagg);
1607 
1608 	if (__predict_false(lp->lp_flags & LACP_PORT_DETACHING)) {
1609 		goto bad;
1610 	}
1611 
1612 	if (m->m_pkthdr.len != sizeof(*mdu)) {
1613 		goto bad;
1614 	}
1615 
1616 	if ((m->m_flags & M_MCAST) == 0) {
1617 		goto bad;
1618 	}
1619 
1620 	if (m->m_len < sizeof(*mdu)) {
1621 		m = m_pullup(m, sizeof(*mdu));
1622 		if (m == NULL) {
1623 			return (ENOMEM);
1624 		}
1625 	}
1626 
1627 	mdu = mtod(m, struct markerdu *);
1628 
1629 	if (memcmp(&mdu->mdu_eh.ether_dhost,
1630 	    &ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN)) {
1631 		goto bad;
1632 	}
1633 
1634 	/* XXX
1635 	KASSERT(mdu->mdu_sph.sph_subtype == SLOWPROTOCOLS_SUBTYPE_MARKER,
1636 	    ("a very bad kassert!"));
1637 	*/
1638 
1639 	if (mdu->mdu_sph.sph_version != 1) {
1640 		goto bad;
1641 	}
1642 
1643 	switch (mdu->mdu_tlv.tlv_type) {
1644 	case MARKER_TYPE_INFO:
1645 		if (tlv_check(mdu, sizeof(*mdu), &mdu->mdu_tlv,
1646 		    marker_info_tlv_template, TRUE)) {
1647 			goto bad;
1648 		}
1649 		mdu->mdu_tlv.tlv_type = MARKER_TYPE_RESPONSE;
1650 		memcpy(&mdu->mdu_eh.ether_dhost,
1651 		    &ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN);
1652 		memcpy(&mdu->mdu_eh.ether_shost,
1653 		    lgp->lp_lladdr, ETHER_ADDR_LEN);
1654 		error = lagg_enqueue(lp->lp_ifp, m);
1655 		break;
1656 
1657 	case MARKER_TYPE_RESPONSE:
1658 		if (tlv_check(mdu, sizeof(*mdu), &mdu->mdu_tlv,
1659 		    marker_response_tlv_template, TRUE)) {
1660 			goto bad;
1661 		}
1662 		/*
1663 		 * we are not interested in responses as
1664 		 * we don't have a marker sender.
1665 		 */
1666 		/* FALLTHROUGH */
1667 	default:
1668 		goto bad;
1669 	}
1670 
1671 	return (error);
1672 
1673 bad:
1674 	m_freem(m);
1675 	return (EINVAL);
1676 }
1677 
1678 static int
1679 tlv_check(const void *p, size_t size, const struct tlvhdr *tlv,
1680     const struct tlv_template *tmpl, boolean_t check_type)
1681 {
1682 	while (/* CONSTCOND */ 1) {
1683 		if ((const char *)tlv - (const char *)p + sizeof(*tlv) > size) {
1684 			return (EINVAL);
1685 		}
1686 		if ((check_type && tlv->tlv_type != tmpl->tmpl_type) ||
1687 		    tlv->tlv_length != tmpl->tmpl_length) {
1688 			return (EINVAL);
1689 		}
1690 		if (tmpl->tmpl_type == 0) {
1691 			break;
1692 		}
1693 		tlv = (const struct tlvhdr *)
1694 		    ((const char *)tlv + tlv->tlv_length);
1695 		tmpl++;
1696 	}
1697 
1698 	return (0);
1699 }
1700 
1701 #if defined(LACP_DEBUG)
1702 const char *
1703 lacp_format_mac(const uint8_t *mac, char *buf, size_t buflen)
1704 {
1705 	snprintf(buf, buflen, "%02X-%02X-%02X-%02X-%02X-%02X",
1706 	    (int)mac[0],
1707 	    (int)mac[1],
1708 	    (int)mac[2],
1709 	    (int)mac[3],
1710 	    (int)mac[4],
1711 	    (int)mac[5]);
1712 
1713 	return (buf);
1714 }
1715 
1716 const char *
1717 lacp_format_systemid(const struct lacp_systemid *sysid,
1718     char *buf, size_t buflen)
1719 {
1720 	char macbuf[LACP_MACSTR_MAX+1];
1721 
1722 	snprintf(buf, buflen, "%04X,%s",
1723 	    ntohs(sysid->lsi_prio),
1724 	    lacp_format_mac(sysid->lsi_mac, macbuf, sizeof(macbuf)));
1725 
1726 	return (buf);
1727 }
1728 
1729 const char *
1730 lacp_format_portid(const struct lacp_portid *portid, char *buf, size_t buflen)
1731 {
1732 	snprintf(buf, buflen, "%04X,%04X",
1733 	    ntohs(portid->lpi_prio),
1734 	    ntohs(portid->lpi_portno));
1735 
1736 	return (buf);
1737 }
1738 
1739 const char *
1740 lacp_format_partner(const struct lacp_peerinfo *peer, char *buf, size_t buflen)
1741 {
1742 	char sysid[LACP_SYSTEMIDSTR_MAX+1];
1743 	char portid[LACP_PORTIDSTR_MAX+1];
1744 
1745 	snprintf(buf, buflen, "(%s,%04X,%s)",
1746 	    lacp_format_systemid(&peer->lip_systemid, sysid, sizeof(sysid)),
1747 	    ntohs(peer->lip_key),
1748 	    lacp_format_portid(&peer->lip_portid, portid, sizeof(portid)));
1749 
1750 	return (buf);
1751 }
1752 
1753 const char *
1754 lacp_format_lagid(const struct lacp_peerinfo *a,
1755     const struct lacp_peerinfo *b, char *buf, size_t buflen)
1756 {
1757 	char astr[LACP_PARTNERSTR_MAX+1];
1758 	char bstr[LACP_PARTNERSTR_MAX+1];
1759 
1760 #if 0
1761 	/*
1762 	 * there's a convention to display small numbered peer
1763 	 * in the left.
1764 	 */
1765 
1766 	if (lacp_compare_peerinfo(a, b) > 0) {
1767 		const struct lacp_peerinfo *t;
1768 
1769 		t = a;
1770 		a = b;
1771 		b = t;
1772 	}
1773 #endif
1774 
1775 	snprintf(buf, buflen, "[%s,%s]",
1776 	    lacp_format_partner(a, astr, sizeof(astr)),
1777 	    lacp_format_partner(b, bstr, sizeof(bstr)));
1778 
1779 	return (buf);
1780 }
1781 
1782 const char *
1783 lacp_format_lagid_aggregator(const struct lacp_aggregator *la,
1784     char *buf, size_t buflen)
1785 {
1786 	if (la == NULL) {
1787 		return ("(none)");
1788 	}
1789 
1790 	return (lacp_format_lagid(&la->la_actor, &la->la_partner, buf, buflen));
1791 }
1792 
1793 const char *
1794 lacp_format_state(uint8_t state, char *buf, size_t buflen)
1795 {
1796 	snprintf(buf, buflen, "%b", state, LACP_STATE_BITS);
1797 	return (buf);
1798 }
1799 
1800 static void
1801 lacp_dump_lacpdu(const struct lacpdu *du)
1802 {
1803 	char buf[LACP_PARTNERSTR_MAX+1];
1804 	char buf2[LACP_STATESTR_MAX+1];
1805 
1806 	printf("actor=%s\n",
1807 	    lacp_format_partner(&du->ldu_actor, buf, sizeof(buf)));
1808 	printf("actor.state=%s\n",
1809 	    lacp_format_state(du->ldu_actor.lip_state, buf2, sizeof(buf2)));
1810 	printf("partner=%s\n",
1811 	    lacp_format_partner(&du->ldu_partner, buf, sizeof(buf)));
1812 	printf("partner.state=%s\n",
1813 	    lacp_format_state(du->ldu_partner.lip_state, buf2, sizeof(buf2)));
1814 
1815 	printf("maxdelay=%d\n", ntohs(du->ldu_collector.lci_maxdelay));
1816 }
1817 
1818 static void
1819 lacp_dprintf(const struct lacp_port *lp, const char *fmt, ...)
1820 {
1821 	va_list va;
1822 
1823 	if (lp) {
1824 		printf("%s: ", lp->lp_ifp->if_xname);
1825 	}
1826 
1827 	va_start(va, fmt);
1828 	vprintf(fmt, va);
1829 	va_end(va);
1830 }
1831 #endif
1832