xref: /freebsd/sys/net/ieee8023ad_lacp.c (revision 78007886c995898a9494648343e5236bca1cbba3)
1 /*	$NetBSD: ieee8023ad_lacp.c,v 1.3 2005/12/11 12:24:54 christos Exp $	*/
2 
3 /*-
4  * Copyright (c)2005 YAMAMOTO Takashi,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/callout.h>
34 #include <sys/mbuf.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/kernel.h> /* hz */
38 #include <sys/socket.h> /* for net/if.h */
39 #include <sys/sockio.h>
40 #include <machine/stdarg.h>
41 
42 #include <net/if.h>
43 #include <net/if_dl.h>
44 #include <net/ethernet.h>
45 #include <net/if_media.h>
46 #include <net/if_types.h>
47 
48 #include <net/if_lagg.h>
49 #include <net/ieee8023ad_lacp.h>
50 
51 /*
52  * actor system priority and port priority.
53  * XXX should be configurable.
54  */
55 
56 #define	LACP_SYSTEM_PRIO	0x8000
57 #define	LACP_PORT_PRIO		0x8000
58 
59 const uint8_t ethermulticastaddr_slowprotocols[ETHER_ADDR_LEN] =
60     { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 };
61 
62 static const struct tlv_template lacp_info_tlv_template[] = {
63 	{ LACP_TYPE_ACTORINFO,
64 	    sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) },
65 	{ LACP_TYPE_PARTNERINFO,
66 	    sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) },
67 	{ LACP_TYPE_COLLECTORINFO,
68 	    sizeof(struct tlvhdr) + sizeof(struct lacp_collectorinfo) },
69 	{ 0, 0 },
70 };
71 
72 typedef void (*lacp_timer_func_t)(struct lacp_port *);
73 
74 static const struct tlv_template marker_info_tlv_template[] = {
75 	{ MARKER_TYPE_INFO, 16 },
76 	{ 0, 0 },
77 };
78 
79 static const struct tlv_template marker_response_tlv_template[] = {
80 	{ MARKER_TYPE_RESPONSE, 16 },
81 	{ 0, 0 },
82 };
83 
84 static void	lacp_fill_actorinfo(struct lacp_port *, struct lacp_peerinfo *);
85 
86 static uint64_t	lacp_aggregator_bandwidth(struct lacp_aggregator *);
87 static void	lacp_suppress_distributing(struct lacp_softc *,
88 		    struct lacp_aggregator *);
89 static void	lacp_transit_expire(void *);
90 static void	lacp_select_active_aggregator(struct lacp_softc *);
91 static uint16_t	lacp_compose_key(struct lacp_port *);
92 static int	tlv_check(const void *, size_t, const struct tlvhdr *,
93 		    const struct tlv_template *, boolean_t);
94 static void	lacp_tick(void *);
95 
96 static void	lacp_fill_aggregator_id(struct lacp_aggregator *,
97 		    const struct lacp_port *);
98 static void	lacp_fill_aggregator_id_peer(struct lacp_peerinfo *,
99 		    const struct lacp_peerinfo *);
100 static int	lacp_aggregator_is_compatible(const struct lacp_aggregator *,
101 		    const struct lacp_port *);
102 static int	lacp_peerinfo_is_compatible(const struct lacp_peerinfo *,
103 		    const struct lacp_peerinfo *);
104 
105 static struct lacp_aggregator *lacp_aggregator_get(struct lacp_softc *,
106 		    struct lacp_port *);
107 static void	lacp_aggregator_addref(struct lacp_softc *,
108 		    struct lacp_aggregator *);
109 static void	lacp_aggregator_delref(struct lacp_softc *,
110 		    struct lacp_aggregator *);
111 
112 /* receive machine */
113 
114 static void	lacp_sm_rx(struct lacp_port *, const struct lacpdu *);
115 static void	lacp_sm_rx_timer(struct lacp_port *);
116 static void	lacp_sm_rx_set_expired(struct lacp_port *);
117 static void	lacp_sm_rx_update_ntt(struct lacp_port *,
118 		    const struct lacpdu *);
119 static void	lacp_sm_rx_record_pdu(struct lacp_port *,
120 		    const struct lacpdu *);
121 static void	lacp_sm_rx_update_selected(struct lacp_port *,
122 		    const struct lacpdu *);
123 static void	lacp_sm_rx_record_default(struct lacp_port *);
124 static void	lacp_sm_rx_update_default_selected(struct lacp_port *);
125 static void	lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *,
126 		    const struct lacp_peerinfo *);
127 
128 /* mux machine */
129 
130 static void	lacp_sm_mux(struct lacp_port *);
131 static void	lacp_set_mux(struct lacp_port *, enum lacp_mux_state);
132 static void	lacp_sm_mux_timer(struct lacp_port *);
133 
134 /* periodic transmit machine */
135 
136 static void	lacp_sm_ptx_update_timeout(struct lacp_port *, uint8_t);
137 static void	lacp_sm_ptx_tx_schedule(struct lacp_port *);
138 static void	lacp_sm_ptx_timer(struct lacp_port *);
139 
140 /* transmit machine */
141 
142 static void	lacp_sm_tx(struct lacp_port *);
143 static void	lacp_sm_assert_ntt(struct lacp_port *);
144 
145 static void	lacp_run_timers(struct lacp_port *);
146 static int	lacp_compare_peerinfo(const struct lacp_peerinfo *,
147 		    const struct lacp_peerinfo *);
148 static int	lacp_compare_systemid(const struct lacp_systemid *,
149 		    const struct lacp_systemid *);
150 static void	lacp_port_enable(struct lacp_port *);
151 static void	lacp_port_disable(struct lacp_port *);
152 static void	lacp_select(struct lacp_port *);
153 static void	lacp_unselect(struct lacp_port *);
154 static void	lacp_disable_collecting(struct lacp_port *);
155 static void	lacp_enable_collecting(struct lacp_port *);
156 static void	lacp_disable_distributing(struct lacp_port *);
157 static void	lacp_enable_distributing(struct lacp_port *);
158 static int	lacp_xmit_lacpdu(struct lacp_port *);
159 
160 #if defined(LACP_DEBUG)
161 static void	lacp_dump_lacpdu(const struct lacpdu *);
162 static const char *lacp_format_partner(const struct lacp_peerinfo *, char *,
163 		    size_t);
164 static const char *lacp_format_lagid(const struct lacp_peerinfo *,
165 		    const struct lacp_peerinfo *, char *, size_t);
166 static const char *lacp_format_lagid_aggregator(const struct lacp_aggregator *,
167 		    char *, size_t);
168 static const char *lacp_format_state(uint8_t, char *, size_t);
169 static const char *lacp_format_mac(const uint8_t *, char *, size_t);
170 static const char *lacp_format_systemid(const struct lacp_systemid *, char *,
171 		    size_t);
172 static const char *lacp_format_portid(const struct lacp_portid *, char *,
173 		    size_t);
174 static void	lacp_dprintf(const struct lacp_port *, const char *, ...)
175 		    __attribute__((__format__(__printf__, 2, 3)));
176 #define	LACP_DPRINTF(a)	lacp_dprintf a
177 #else
178 #define LACP_DPRINTF(a) /* nothing */
179 #endif
180 
181 /*
182  * partner administration variables.
183  * XXX should be configurable.
184  */
185 
186 static const struct lacp_peerinfo lacp_partner_admin = {
187 	.lip_systemid = { .lsi_prio = 0xffff },
188 	.lip_portid = { .lpi_prio = 0xffff },
189 #if 1
190 	/* optimistic */
191 	.lip_state = LACP_STATE_SYNC | LACP_STATE_AGGREGATION |
192 	    LACP_STATE_COLLECTING | LACP_STATE_DISTRIBUTING,
193 #else
194 	/* pessimistic */
195 	.lip_state = 0,
196 #endif
197 };
198 
199 static const lacp_timer_func_t lacp_timer_funcs[LACP_NTIMER] = {
200 	[LACP_TIMER_CURRENT_WHILE] = lacp_sm_rx_timer,
201 	[LACP_TIMER_PERIODIC] = lacp_sm_ptx_timer,
202 	[LACP_TIMER_WAIT_WHILE] = lacp_sm_mux_timer,
203 };
204 
205 /*
206  * lacp_input: process lacpdu
207  */
208 int
209 lacp_input(struct lagg_port *lgp, struct mbuf *m)
210 {
211 	struct lacp_port *lp = LACP_PORT(lgp);
212 	struct lacpdu *du;
213 	int error = 0;
214 
215 	LAGG_LOCK_ASSERT(lgp->lp_lagg);
216 
217 	if (__predict_false(lp->lp_flags & LACP_PORT_DETACHING)) {
218 		goto bad;
219 	}
220 
221 	if (m->m_pkthdr.len != sizeof(*du)) {
222 		goto bad;
223 	}
224 
225 	if ((m->m_flags & M_MCAST) == 0) {
226 		goto bad;
227 	}
228 
229 	if (m->m_len < sizeof(*du)) {
230 		m = m_pullup(m, sizeof(*du));
231 		if (m == NULL) {
232 			return (ENOMEM);
233 		}
234 	}
235 
236 	du = mtod(m, struct lacpdu *);
237 
238 	if (memcmp(&du->ldu_eh.ether_dhost,
239 	    &ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN)) {
240 		goto bad;
241 	}
242 
243 	/* XXX
244 	KASSERT(du->ldu_sph.sph_subtype == SLOWPROTOCOLS_SUBTYPE_LACP,
245 	    ("a very bad kassert!"));
246 	*/
247 
248 	/*
249 	 * ignore the version for compatibility with
250 	 * the future protocol revisions.
251 	 */
252 
253 #if 0
254 	if (du->ldu_sph.sph_version != 1) {
255 		goto bad;
256 	}
257 #endif
258 
259 	/*
260 	 * ignore tlv types for compatibility with
261 	 * the future protocol revisions.
262 	 */
263 
264 	if (tlv_check(du, sizeof(*du), &du->ldu_tlv_actor,
265 	    lacp_info_tlv_template, FALSE)) {
266 		goto bad;
267 	}
268 
269 #if defined(LACP_DEBUG)
270 	LACP_DPRINTF((lp, "lacpdu receive\n"));
271 	lacp_dump_lacpdu(du);
272 #endif /* defined(LACP_DEBUG) */
273 	lacp_sm_rx(lp, du);
274 
275 	m_freem(m);
276 
277 	return (error);
278 
279 bad:
280 	m_freem(m);
281 	return (EINVAL);
282 }
283 
284 static void
285 lacp_fill_actorinfo(struct lacp_port *lp, struct lacp_peerinfo *info)
286 {
287 	struct lagg_port *lgp = lp->lp_lagg;
288 	struct lagg_softc *lgs = lgp->lp_lagg;
289 
290 	info->lip_systemid.lsi_prio = htons(LACP_SYSTEM_PRIO);
291 	memcpy(&info->lip_systemid.lsi_mac,
292 	    IF_LLADDR(lgs->sc_ifp), ETHER_ADDR_LEN);
293 	info->lip_portid.lpi_prio = htons(LACP_PORT_PRIO);
294 	info->lip_portid.lpi_portno = htons(lp->lp_ifp->if_index);
295 	info->lip_state = lp->lp_state;
296 }
297 
298 static int
299 lacp_xmit_lacpdu(struct lacp_port *lp)
300 {
301 	struct lagg_port *lgp = lp->lp_lagg;
302 	struct mbuf *m;
303 	struct lacpdu *du;
304 	int error;
305 
306 	LAGG_LOCK_ASSERT(lgp->lp_lagg);
307 
308 	m = m_gethdr(M_DONTWAIT, MT_DATA);
309 	if (m == NULL) {
310 		return (ENOMEM);
311 	}
312 	m->m_len = m->m_pkthdr.len = sizeof(*du);
313 
314 	du = mtod(m, struct lacpdu *);
315 	memset(du, 0, sizeof(*du));
316 
317 	memcpy(&du->ldu_eh.ether_dhost, ethermulticastaddr_slowprotocols,
318 	    ETHER_ADDR_LEN);
319 	memcpy(&du->ldu_eh.ether_shost, lgp->lp_lladdr, ETHER_ADDR_LEN);
320 	du->ldu_eh.ether_type = htons(ETHERTYPE_SLOW);
321 
322 	du->ldu_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_LACP;
323 	du->ldu_sph.sph_version = 1;
324 
325 	TLV_SET(&du->ldu_tlv_actor, LACP_TYPE_ACTORINFO, sizeof(du->ldu_actor));
326 	du->ldu_actor = lp->lp_actor;
327 
328 	TLV_SET(&du->ldu_tlv_partner, LACP_TYPE_PARTNERINFO,
329 	    sizeof(du->ldu_partner));
330 	du->ldu_partner = lp->lp_partner;
331 
332 	TLV_SET(&du->ldu_tlv_collector, LACP_TYPE_COLLECTORINFO,
333 	    sizeof(du->ldu_collector));
334 	du->ldu_collector.lci_maxdelay = 0;
335 
336 #if defined(LACP_DEBUG)
337 	LACP_DPRINTF((lp, "lacpdu transmit\n"));
338 	lacp_dump_lacpdu(du);
339 #endif /* defined(LACP_DEBUG) */
340 
341 	m->m_flags |= M_MCAST;
342 
343 	/*
344 	 * XXX should use higher priority queue.
345 	 * otherwise network congestion can break aggregation.
346 	 */
347 
348 	error = lagg_enqueue(lp->lp_ifp, m);
349 	return (error);
350 }
351 
352 void
353 lacp_linkstate(struct lagg_port *lgp)
354 {
355 	struct lacp_port *lp = LACP_PORT(lgp);
356 	struct ifnet *ifp = lgp->lp_ifp;
357 	struct ifmediareq ifmr;
358 	int error = 0;
359 	u_int media;
360 	uint8_t old_state;
361 	uint16_t old_key;
362 
363 	LAGG_LOCK_ASSERT(lgp->lp_lagg);
364 
365 	bzero((char *)&ifmr, sizeof(ifmr));
366 	error = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (caddr_t)&ifmr);
367 	if (error != 0)
368 		return;
369 
370 	media = ifmr.ifm_active;
371 	LACP_DPRINTF((lp, "media changed 0x%x -> 0x%x\n", lp->lp_media, media));
372 	old_state = lp->lp_state;
373 	old_key = lp->lp_key;
374 
375 	lp->lp_media = media;
376 	if ((media & IFM_HDX) != 0 || ifp->if_link_state == LINK_STATE_DOWN) {
377 		lacp_port_disable(lp);
378 	} else {
379 		lacp_port_enable(lp);
380 	}
381 	lp->lp_key = lacp_compose_key(lp);
382 
383 	if (old_state != lp->lp_state || old_key != lp->lp_key) {
384 		LACP_DPRINTF((lp, "-> UNSELECTED\n"));
385 		lp->lp_selected = LACP_UNSELECTED;
386 	}
387 }
388 
389 static void
390 lacp_tick(void *arg)
391 {
392 	struct lacp_softc *lsc = arg;
393 	struct lacp_port *lp;
394 
395 	LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) {
396 		if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0)
397 			continue;
398 
399 		lacp_run_timers(lp);
400 
401 		lacp_select(lp);
402 		lacp_sm_mux(lp);
403 		lacp_sm_tx(lp);
404 		lacp_sm_ptx_tx_schedule(lp);
405 	}
406 	callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc);
407 }
408 
409 int
410 lacp_port_create(struct lagg_port *lgp)
411 {
412 	struct lagg_softc *lgs = lgp->lp_lagg;
413 	struct lacp_softc *lsc = LACP_SOFTC(lgs);
414 	struct lacp_port *lp;
415 	struct ifnet *ifp = lgp->lp_ifp;
416 	struct sockaddr_dl sdl;
417 	struct ifmultiaddr *rifma = NULL;
418 	int error;
419 
420 	boolean_t active = TRUE; /* XXX should be configurable */
421 	boolean_t fast = FALSE; /* XXX should be configurable */
422 
423 	LAGG_LOCK_ASSERT(lgs);
424 
425 	bzero((char *)&sdl, sizeof(sdl));
426 	sdl.sdl_len = sizeof(sdl);
427 	sdl.sdl_family = AF_LINK;
428 	sdl.sdl_index = ifp->if_index;
429 	sdl.sdl_type = IFT_ETHER;
430 	sdl.sdl_alen = ETHER_ADDR_LEN;
431 
432 	bcopy(&ethermulticastaddr_slowprotocols,
433 	    LLADDR(&sdl), ETHER_ADDR_LEN);
434 	error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma);
435 	if (error) {
436 		printf("%s: ADDMULTI failed on %s\n", __func__, lgp->lp_ifname);
437 		return (error);
438 	}
439 
440 	lp = malloc(sizeof(struct lacp_port),
441 	    M_DEVBUF, M_NOWAIT|M_ZERO);
442 	if (lp == NULL)
443 		return (ENOMEM);
444 
445 	lgp->lp_psc = (caddr_t)lp;
446 	lp->lp_ifp = ifp;
447 	lp->lp_lagg = lgp;
448 	lp->lp_lsc = lsc;
449 
450 	LIST_INSERT_HEAD(&lsc->lsc_ports, lp, lp_next);
451 
452 	lacp_fill_actorinfo(lp, &lp->lp_actor);
453 	lp->lp_state =
454 	    (active ? LACP_STATE_ACTIVITY : 0) |
455 	    (fast ? LACP_STATE_TIMEOUT : 0);
456 	lp->lp_aggregator = NULL;
457 	lacp_linkstate(lgp);
458 	lacp_sm_rx_set_expired(lp);
459 
460 	return (0);
461 }
462 
463 void
464 lacp_port_destroy(struct lagg_port *lgp)
465 {
466 	struct lacp_port *lp = LACP_PORT(lgp);
467 	struct ifnet *ifp = lgp->lp_ifp;
468 	struct sockaddr_dl sdl;
469 	int i, error;
470 
471 	LAGG_LOCK_ASSERT(lgp->lp_lagg);
472 
473 	for (i = 0; i < LACP_NTIMER; i++) {
474 		LACP_TIMER_DISARM(lp, i);
475 	}
476 
477 	lacp_disable_collecting(lp);
478 	lacp_disable_distributing(lp);
479 	lacp_unselect(lp);
480 
481 	bzero((char *)&sdl, sizeof(sdl));
482 	sdl.sdl_len = sizeof(sdl);
483 	sdl.sdl_family = AF_LINK;
484 	sdl.sdl_index = ifp->if_index;
485 	sdl.sdl_type = IFT_ETHER;
486 	sdl.sdl_alen = ETHER_ADDR_LEN;
487 
488 	bcopy(&ethermulticastaddr_slowprotocols,
489 	    LLADDR(&sdl), ETHER_ADDR_LEN);
490 	error = if_delmulti(ifp, (struct sockaddr *)&sdl);
491 	if (error)
492 		printf("%s: DELMULTI failed on %s\n", __func__, lgp->lp_ifname);
493 
494 	LIST_REMOVE(lp, lp_next);
495 	free(lp, M_DEVBUF);
496 }
497 
498 int
499 lacp_port_isactive(struct lagg_port *lgp)
500 {
501 	struct lacp_port *lp = LACP_PORT(lgp);
502 	struct lacp_softc *lsc = lp->lp_lsc;
503 	struct lacp_aggregator *la = lp->lp_aggregator;
504 
505 	/* This port is joined to the active aggregator */
506 	if (la != NULL && la == lsc->lsc_active_aggregator)
507 		return (1);
508 
509 	return (0);
510 }
511 
512 static void
513 lacp_disable_collecting(struct lacp_port *lp)
514 {
515 	struct lagg_port *lgp = lp->lp_lagg;
516 
517 	LACP_DPRINTF((lp, "collecting disabled\n"));
518 
519 	lp->lp_state &= ~LACP_STATE_COLLECTING;
520 	lgp->lp_flags &= ~LAGG_PORT_COLLECTING;
521 }
522 
523 static void
524 lacp_enable_collecting(struct lacp_port *lp)
525 {
526 	struct lagg_port *lgp = lp->lp_lagg;
527 
528 	LACP_DPRINTF((lp, "collecting enabled\n"));
529 
530 	lp->lp_state |= LACP_STATE_COLLECTING;
531 	lgp->lp_flags |= LAGG_PORT_COLLECTING;
532 }
533 
534 static void
535 lacp_disable_distributing(struct lacp_port *lp)
536 {
537 	struct lacp_aggregator *la = lp->lp_aggregator;
538 	struct lacp_softc *lsc = lp->lp_lsc;
539 	struct lagg_port *lgp = lp->lp_lagg;
540 #if defined(LACP_DEBUG)
541 	char buf[LACP_LAGIDSTR_MAX+1];
542 #endif /* defined(LACP_DEBUG) */
543 
544 	LAGG_LOCK_ASSERT(lgp->lp_lagg);
545 
546 	if (la == NULL || (lp->lp_state & LACP_STATE_DISTRIBUTING) == 0) {
547 		return;
548 	}
549 
550 	KASSERT(!TAILQ_EMPTY(&la->la_ports), ("no aggregator ports"));
551 	KASSERT(la->la_nports > 0, ("nports invalid (%d)", la->la_nports));
552 	KASSERT(la->la_refcnt >= la->la_nports, ("aggregator refcnt invalid"));
553 
554 	LACP_DPRINTF((lp, "disable distributing on aggregator %s, "
555 	    "nports %d -> %d\n",
556 	    lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
557 	    la->la_nports, la->la_nports - 1));
558 
559 	TAILQ_REMOVE(&la->la_ports, lp, lp_dist_q);
560 	la->la_nports--;
561 
562 	lacp_suppress_distributing(lsc, la);
563 
564 	lp->lp_state &= ~LACP_STATE_DISTRIBUTING;
565 	lgp->lp_flags &= ~LAGG_PORT_DISTRIBUTING;
566 
567 	if (lsc->lsc_active_aggregator == la) {
568 		lacp_select_active_aggregator(lsc);
569 	}
570 }
571 
572 static void
573 lacp_enable_distributing(struct lacp_port *lp)
574 {
575 	struct lacp_aggregator *la = lp->lp_aggregator;
576 	struct lacp_softc *lsc = lp->lp_lsc;
577 	struct lagg_port *lgp = lp->lp_lagg;
578 #if defined(LACP_DEBUG)
579 	char buf[LACP_LAGIDSTR_MAX+1];
580 #endif /* defined(LACP_DEBUG) */
581 
582 	LAGG_LOCK_ASSERT(lgp->lp_lagg);
583 
584 	if ((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0) {
585 		return;
586 	}
587 
588 	LACP_DPRINTF((lp, "enable distributing on aggregator %s, "
589 	    "nports %d -> %d\n",
590 	    lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
591 	    la->la_nports, la->la_nports + 1));
592 
593 	KASSERT(la->la_refcnt > la->la_nports, ("aggregator refcnt invalid"));
594 	TAILQ_INSERT_HEAD(&la->la_ports, lp, lp_dist_q);
595 	la->la_nports++;
596 
597 	lacp_suppress_distributing(lsc, la);
598 
599 	lp->lp_state |= LACP_STATE_DISTRIBUTING;
600 	lgp->lp_flags |= LAGG_PORT_DISTRIBUTING;
601 
602 	if (lsc->lsc_active_aggregator != la) {
603 		lacp_select_active_aggregator(lsc);
604 	}
605 }
606 
607 static void
608 lacp_transit_expire(void *vp)
609 {
610 	struct lacp_softc *lsc = vp;
611 
612 	LACP_DPRINTF((NULL, "%s\n", __func__));
613 	lsc->lsc_suppress_distributing = FALSE;
614 }
615 
616 int
617 lacp_attach(struct lagg_softc *lgs)
618 {
619 	struct lacp_softc *lsc;
620 
621 	LAGG_LOCK_ASSERT(lgs);
622 
623 	lsc = malloc(sizeof(struct lacp_softc),
624 	    M_DEVBUF, M_NOWAIT|M_ZERO);
625 	if (lsc == NULL)
626 		return (ENOMEM);
627 
628 	lgs->sc_psc = (caddr_t)lsc;
629 	lsc->lsc_lagg = lgs;
630 
631 	lsc->lsc_hashkey = arc4random();
632 	lsc->lsc_active_aggregator = NULL;
633 	TAILQ_INIT(&lsc->lsc_aggregators);
634 	LIST_INIT(&lsc->lsc_ports);
635 
636 	callout_init_mtx(&lsc->lsc_transit_callout, &lgs->sc_mtx, 0);
637 	callout_init_mtx(&lsc->lsc_callout, &lgs->sc_mtx, 0);
638 
639 	/* if the lagg is already up then do the same */
640 	if (lgs->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
641 		lacp_init(lgs);
642 
643 	return (0);
644 }
645 
646 int
647 lacp_detach(struct lagg_softc *lgs)
648 {
649 	struct lacp_softc *lsc = LACP_SOFTC(lgs);
650 
651 	KASSERT(TAILQ_EMPTY(&lsc->lsc_aggregators),
652 	    ("aggregators still active"));
653 	KASSERT(lsc->lsc_active_aggregator == NULL,
654 	    ("aggregator still attached"));
655 
656 	lgs->sc_psc = NULL;
657 	callout_drain(&lsc->lsc_transit_callout);
658 	callout_drain(&lsc->lsc_callout);
659 
660 	free(lsc, M_DEVBUF);
661 	return (0);
662 }
663 
664 void
665 lacp_init(struct lagg_softc *lgs)
666 {
667 	struct lacp_softc *lsc = LACP_SOFTC(lgs);
668 
669 	callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc);
670 }
671 
672 void
673 lacp_stop(struct lagg_softc *lgs)
674 {
675 	struct lacp_softc *lsc = LACP_SOFTC(lgs);
676 
677 	callout_stop(&lsc->lsc_transit_callout);
678 	callout_stop(&lsc->lsc_callout);
679 }
680 
681 struct lagg_port *
682 lacp_select_tx_port(struct lagg_softc *lgs, struct mbuf *m)
683 {
684 	struct lacp_softc *lsc = LACP_SOFTC(lgs);
685 	struct lacp_aggregator *la;
686 	struct lacp_port *lp;
687 	uint32_t hash;
688 	int nports;
689 
690 	LAGG_LOCK_ASSERT(lgs);
691 
692 	if (__predict_false(lsc->lsc_suppress_distributing)) {
693 		LACP_DPRINTF((NULL, "%s: waiting transit\n", __func__));
694 		return (NULL);
695 	}
696 
697 	la = lsc->lsc_active_aggregator;
698 	if (__predict_false(la == NULL)) {
699 		LACP_DPRINTF((NULL, "%s: no active aggregator\n", __func__));
700 		return (NULL);
701 	}
702 
703 	nports = la->la_nports;
704 	KASSERT(nports > 0, ("no ports available"));
705 
706 	hash = lagg_hashmbuf(m, lsc->lsc_hashkey);
707 	hash %= nports;
708 	lp = TAILQ_FIRST(&la->la_ports);
709 	while (hash--) {
710 		lp = TAILQ_NEXT(lp, lp_dist_q);
711 	}
712 
713 	KASSERT((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0,
714 	    ("aggregated port is not distributing"));
715 
716 	return (lp->lp_lagg);
717 }
718 /*
719  * lacp_suppress_distributing: drop transmit packets for a while
720  * to preserve packet ordering.
721  */
722 
723 static void
724 lacp_suppress_distributing(struct lacp_softc *lsc, struct lacp_aggregator *la)
725 {
726 	if (lsc->lsc_active_aggregator != la) {
727 		return;
728 	}
729 
730 	LACP_DPRINTF((NULL, "%s\n", __func__));
731 	lsc->lsc_suppress_distributing = TRUE;
732 	/* XXX should consider collector max delay */
733 	callout_reset(&lsc->lsc_transit_callout,
734 	    LACP_TRANSIT_DELAY * hz / 1000, lacp_transit_expire, lsc);
735 }
736 
737 static int
738 lacp_compare_peerinfo(const struct lacp_peerinfo *a,
739     const struct lacp_peerinfo *b)
740 {
741 	return (memcmp(a, b, offsetof(struct lacp_peerinfo, lip_state)));
742 }
743 
744 static int
745 lacp_compare_systemid(const struct lacp_systemid *a,
746     const struct lacp_systemid *b)
747 {
748 	return (memcmp(a, b, sizeof(*a)));
749 }
750 
751 #if 0	/* unused */
752 static int
753 lacp_compare_portid(const struct lacp_portid *a,
754     const struct lacp_portid *b)
755 {
756 	return (memcmp(a, b, sizeof(*a)));
757 }
758 #endif
759 
760 static uint64_t
761 lacp_aggregator_bandwidth(struct lacp_aggregator *la)
762 {
763 	struct lacp_port *lp;
764 	uint64_t speed;
765 
766 	lp = TAILQ_FIRST(&la->la_ports);
767 	if (lp == NULL) {
768 		return (0);
769 	}
770 
771 	speed = ifmedia_baudrate(lp->lp_media);
772 	speed *= la->la_nports;
773 	if (speed == 0) {
774 		LACP_DPRINTF((lp, "speed 0? media=0x%x nports=%d\n",
775 		    lp->lp_media, la->la_nports));
776 	}
777 
778 	return (speed);
779 }
780 
781 /*
782  * lacp_select_active_aggregator: select an aggregator to be used to transmit
783  * packets from lagg(4) interface.
784  */
785 
786 static void
787 lacp_select_active_aggregator(struct lacp_softc *lsc)
788 {
789 	struct lacp_aggregator *la;
790 	struct lacp_aggregator *best_la = NULL;
791 	uint64_t best_speed = 0;
792 #if defined(LACP_DEBUG)
793 	char buf[LACP_LAGIDSTR_MAX+1];
794 #endif /* defined(LACP_DEBUG) */
795 
796 	LACP_DPRINTF((NULL, "%s:\n", __func__));
797 
798 	TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) {
799 		uint64_t speed;
800 
801 		if (la->la_nports == 0) {
802 			continue;
803 		}
804 
805 		speed = lacp_aggregator_bandwidth(la);
806 		LACP_DPRINTF((NULL, "%s, speed=%jd, nports=%d\n",
807 		    lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
808 		    speed, la->la_nports));
809 		if (speed > best_speed ||
810 		    (speed == best_speed &&
811 		    la == lsc->lsc_active_aggregator)) {
812 			best_la = la;
813 			best_speed = speed;
814 		}
815 	}
816 
817 	KASSERT(best_la == NULL || best_la->la_nports > 0,
818 	    ("invalid aggregator refcnt"));
819 	KASSERT(best_la == NULL || !TAILQ_EMPTY(&best_la->la_ports),
820 	    ("invalid aggregator list"));
821 
822 #if defined(LACP_DEBUG)
823 	if (lsc->lsc_active_aggregator != best_la) {
824 		LACP_DPRINTF((NULL, "active aggregator changed\n"));
825 		LACP_DPRINTF((NULL, "old %s\n",
826 		    lacp_format_lagid_aggregator(lsc->lsc_active_aggregator,
827 		    buf, sizeof(buf))));
828 	} else {
829 		LACP_DPRINTF((NULL, "active aggregator not changed\n"));
830 	}
831 	LACP_DPRINTF((NULL, "new %s\n",
832 	    lacp_format_lagid_aggregator(best_la, buf, sizeof(buf))));
833 #endif /* defined(LACP_DEBUG) */
834 
835 	if (lsc->lsc_active_aggregator != best_la) {
836 		lsc->lsc_active_aggregator = best_la;
837 		if (best_la) {
838 			lacp_suppress_distributing(lsc, best_la);
839 		}
840 	}
841 }
842 
843 static uint16_t
844 lacp_compose_key(struct lacp_port *lp)
845 {
846 	struct lagg_port *lgp = lp->lp_lagg;
847 	struct lagg_softc *lgs = lgp->lp_lagg;
848 	u_int media = lp->lp_media;
849 	uint16_t key;
850 
851 	KASSERT(IFM_TYPE(media) == IFM_ETHER, ("invalid interface type"));
852 
853 	if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0) {
854 
855 		/*
856 		 * non-aggregatable links should have unique keys.
857 		 *
858 		 * XXX this isn't really unique as if_index is 16 bit.
859 		 */
860 
861 		/* bit 0..14:	(some bits of) if_index of this port */
862 		key = lp->lp_ifp->if_index;
863 		/* bit 15:	1 */
864 		key |= 0x8000;
865 	} else {
866 		u_int subtype = IFM_SUBTYPE(media);
867 
868 		KASSERT((media & IFM_HDX) == 0, ("aggregating HDX interface"));
869 
870 		/* bit 0..4:	IFM_SUBTYPE */
871 		key = subtype;
872 		/* bit 5..14:	(some bits of) if_index of lagg device */
873 		key |= 0x7fe0 & ((lgs->sc_ifp->if_index) << 5);
874 		/* bit 15:	0 */
875 	}
876 	return (htons(key));
877 }
878 
879 static void
880 lacp_aggregator_addref(struct lacp_softc *lsc, struct lacp_aggregator *la)
881 {
882 #if defined(LACP_DEBUG)
883 	char buf[LACP_LAGIDSTR_MAX+1];
884 #endif
885 
886 	LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n",
887 	    __func__,
888 	    lacp_format_lagid(&la->la_actor, &la->la_partner,
889 	    buf, sizeof(buf)),
890 	    la->la_refcnt, la->la_refcnt + 1));
891 
892 	KASSERT(la->la_refcnt > 0, ("refcount <= 0"));
893 	la->la_refcnt++;
894 	KASSERT(la->la_refcnt > la->la_nports, ("invalid refcount"));
895 }
896 
897 static void
898 lacp_aggregator_delref(struct lacp_softc *lsc, struct lacp_aggregator *la)
899 {
900 #if defined(LACP_DEBUG)
901 	char buf[LACP_LAGIDSTR_MAX+1];
902 #endif
903 
904 	LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n",
905 	    __func__,
906 	    lacp_format_lagid(&la->la_actor, &la->la_partner,
907 	    buf, sizeof(buf)),
908 	    la->la_refcnt, la->la_refcnt - 1));
909 
910 	KASSERT(la->la_refcnt > la->la_nports, ("invalid refcnt"));
911 	la->la_refcnt--;
912 	if (la->la_refcnt > 0) {
913 		return;
914 	}
915 
916 	KASSERT(la->la_refcnt == 0, ("refcount not zero"));
917 	KASSERT(lsc->lsc_active_aggregator != la, ("aggregator active"));
918 
919 	TAILQ_REMOVE(&lsc->lsc_aggregators, la, la_q);
920 
921 	free(la, M_DEVBUF);
922 }
923 
924 /*
925  * lacp_aggregator_get: allocate an aggregator.
926  */
927 
928 static struct lacp_aggregator *
929 lacp_aggregator_get(struct lacp_softc *lsc, struct lacp_port *lp)
930 {
931 	struct lacp_aggregator *la;
932 
933 	la = malloc(sizeof(*la), M_DEVBUF, M_NOWAIT);
934 	if (la) {
935 		la->la_refcnt = 1;
936 		la->la_nports = 0;
937 		TAILQ_INIT(&la->la_ports);
938 		la->la_pending = 0;
939 		TAILQ_INSERT_TAIL(&lsc->lsc_aggregators, la, la_q);
940 	}
941 
942 	return (la);
943 }
944 
945 /*
946  * lacp_fill_aggregator_id: setup a newly allocated aggregator from a port.
947  */
948 
949 static void
950 lacp_fill_aggregator_id(struct lacp_aggregator *la, const struct lacp_port *lp)
951 {
952 	lacp_fill_aggregator_id_peer(&la->la_partner, &lp->lp_partner);
953 	lacp_fill_aggregator_id_peer(&la->la_actor, &lp->lp_actor);
954 
955 	la->la_actor.lip_state = lp->lp_state & LACP_STATE_AGGREGATION;
956 }
957 
958 static void
959 lacp_fill_aggregator_id_peer(struct lacp_peerinfo *lpi_aggr,
960     const struct lacp_peerinfo *lpi_port)
961 {
962 	memset(lpi_aggr, 0, sizeof(*lpi_aggr));
963 	lpi_aggr->lip_systemid = lpi_port->lip_systemid;
964 	lpi_aggr->lip_key = lpi_port->lip_key;
965 }
966 
967 /*
968  * lacp_aggregator_is_compatible: check if a port can join to an aggregator.
969  */
970 
971 static int
972 lacp_aggregator_is_compatible(const struct lacp_aggregator *la,
973     const struct lacp_port *lp)
974 {
975 	if (!(lp->lp_state & LACP_STATE_AGGREGATION) ||
976 	    !(lp->lp_partner.lip_state & LACP_STATE_AGGREGATION)) {
977 		return (0);
978 	}
979 
980 	if (!(la->la_actor.lip_state & LACP_STATE_AGGREGATION)) {
981 		return (0);
982 	}
983 
984 	if (!lacp_peerinfo_is_compatible(&la->la_partner, &lp->lp_partner)) {
985 		return (0);
986 	}
987 
988 	if (!lacp_peerinfo_is_compatible(&la->la_actor, &lp->lp_actor)) {
989 		return (0);
990 	}
991 
992 	return (1);
993 }
994 
995 static int
996 lacp_peerinfo_is_compatible(const struct lacp_peerinfo *a,
997     const struct lacp_peerinfo *b)
998 {
999 	if (memcmp(&a->lip_systemid, &b->lip_systemid,
1000 	    sizeof(a->lip_systemid))) {
1001 		return (0);
1002 	}
1003 
1004 	if (memcmp(&a->lip_key, &b->lip_key, sizeof(a->lip_key))) {
1005 		return (0);
1006 	}
1007 
1008 	return (1);
1009 }
1010 
1011 static void
1012 lacp_port_enable(struct lacp_port *lp)
1013 {
1014 	lp->lp_state |= LACP_STATE_AGGREGATION;
1015 }
1016 
1017 static void
1018 lacp_port_disable(struct lacp_port *lp)
1019 {
1020 	lacp_set_mux(lp, LACP_MUX_DETACHED);
1021 
1022 	lp->lp_state &= ~LACP_STATE_AGGREGATION;
1023 	lp->lp_selected = LACP_UNSELECTED;
1024 	lacp_sm_rx_record_default(lp);
1025 	lp->lp_partner.lip_state &= ~LACP_STATE_AGGREGATION;
1026 	lp->lp_state &= ~LACP_STATE_EXPIRED;
1027 }
1028 
1029 /*
1030  * lacp_select: select an aggregator.  create one if necessary.
1031  */
1032 static void
1033 lacp_select(struct lacp_port *lp)
1034 {
1035 	struct lacp_softc *lsc = lp->lp_lsc;
1036 	struct lacp_aggregator *la;
1037 #if defined(LACP_DEBUG)
1038 	char buf[LACP_LAGIDSTR_MAX+1];
1039 #endif
1040 
1041 	if (lp->lp_aggregator) {
1042 		return;
1043 	}
1044 
1045 	KASSERT(!LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE),
1046 	    ("timer_wait_while still active"));
1047 
1048 	LACP_DPRINTF((lp, "port lagid=%s\n",
1049 	    lacp_format_lagid(&lp->lp_actor, &lp->lp_partner,
1050 	    buf, sizeof(buf))));
1051 
1052 	TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) {
1053 		if (lacp_aggregator_is_compatible(la, lp)) {
1054 			break;
1055 		}
1056 	}
1057 
1058 	if (la == NULL) {
1059 		la = lacp_aggregator_get(lsc, lp);
1060 		if (la == NULL) {
1061 			LACP_DPRINTF((lp, "aggregator creation failed\n"));
1062 
1063 			/*
1064 			 * will retry on the next tick.
1065 			 */
1066 
1067 			return;
1068 		}
1069 		lacp_fill_aggregator_id(la, lp);
1070 		LACP_DPRINTF((lp, "aggregator created\n"));
1071 	} else {
1072 		LACP_DPRINTF((lp, "compatible aggregator found\n"));
1073 		lacp_aggregator_addref(lsc, la);
1074 	}
1075 
1076 	LACP_DPRINTF((lp, "aggregator lagid=%s\n",
1077 	    lacp_format_lagid(&la->la_actor, &la->la_partner,
1078 	    buf, sizeof(buf))));
1079 
1080 	lp->lp_aggregator = la;
1081 	lp->lp_selected = LACP_SELECTED;
1082 }
1083 
1084 /*
1085  * lacp_unselect: finish unselect/detach process.
1086  */
1087 
1088 static void
1089 lacp_unselect(struct lacp_port *lp)
1090 {
1091 	struct lacp_softc *lsc = lp->lp_lsc;
1092 	struct lacp_aggregator *la = lp->lp_aggregator;
1093 
1094 	KASSERT(!LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE),
1095 	    ("timer_wait_while still active"));
1096 
1097 	if (la == NULL) {
1098 		return;
1099 	}
1100 
1101 	lp->lp_aggregator = NULL;
1102 	lacp_aggregator_delref(lsc, la);
1103 }
1104 
1105 /* mux machine */
1106 
1107 static void
1108 lacp_sm_mux(struct lacp_port *lp)
1109 {
1110 	enum lacp_mux_state new_state;
1111 	boolean_t p_sync =
1112 		    (lp->lp_partner.lip_state & LACP_STATE_SYNC) != 0;
1113 	boolean_t p_collecting =
1114 	    (lp->lp_partner.lip_state & LACP_STATE_COLLECTING) != 0;
1115 	enum lacp_selected selected = lp->lp_selected;
1116 	struct lacp_aggregator *la;
1117 
1118 	/* LACP_DPRINTF((lp, "%s: state %d\n", __func__, lp->lp_mux_state)); */
1119 
1120 re_eval:
1121 	la = lp->lp_aggregator;
1122 	KASSERT(lp->lp_mux_state == LACP_MUX_DETACHED || la != NULL,
1123 	    ("MUX not detached"));
1124 	new_state = lp->lp_mux_state;
1125 	switch (lp->lp_mux_state) {
1126 	case LACP_MUX_DETACHED:
1127 		if (selected != LACP_UNSELECTED) {
1128 			new_state = LACP_MUX_WAITING;
1129 		}
1130 		break;
1131 	case LACP_MUX_WAITING:
1132 		KASSERT(la->la_pending > 0 ||
1133 		    !LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE),
1134 		    ("timer_wait_while still active"));
1135 		if (selected == LACP_SELECTED && la->la_pending == 0) {
1136 			new_state = LACP_MUX_ATTACHED;
1137 		} else if (selected == LACP_UNSELECTED) {
1138 			new_state = LACP_MUX_DETACHED;
1139 		}
1140 		break;
1141 	case LACP_MUX_ATTACHED:
1142 		if (selected == LACP_SELECTED && p_sync) {
1143 			new_state = LACP_MUX_COLLECTING;
1144 		} else if (selected != LACP_SELECTED) {
1145 			new_state = LACP_MUX_DETACHED;
1146 		}
1147 		break;
1148 	case LACP_MUX_COLLECTING:
1149 		if (selected == LACP_SELECTED && p_sync && p_collecting) {
1150 			new_state = LACP_MUX_DISTRIBUTING;
1151 		} else if (selected != LACP_SELECTED || !p_sync) {
1152 			new_state = LACP_MUX_ATTACHED;
1153 		}
1154 		break;
1155 	case LACP_MUX_DISTRIBUTING:
1156 		if (selected != LACP_SELECTED || !p_sync || !p_collecting) {
1157 			new_state = LACP_MUX_COLLECTING;
1158 		}
1159 		break;
1160 	default:
1161 		panic("%s: unknown state", __func__);
1162 	}
1163 
1164 	if (lp->lp_mux_state == new_state) {
1165 		return;
1166 	}
1167 
1168 	lacp_set_mux(lp, new_state);
1169 	goto re_eval;
1170 }
1171 
1172 static void
1173 lacp_set_mux(struct lacp_port *lp, enum lacp_mux_state new_state)
1174 {
1175 	struct lacp_aggregator *la = lp->lp_aggregator;
1176 
1177 	if (lp->lp_mux_state == new_state) {
1178 		return;
1179 	}
1180 
1181 	switch (new_state) {
1182 	case LACP_MUX_DETACHED:
1183 		lp->lp_state &= ~LACP_STATE_SYNC;
1184 		lacp_disable_distributing(lp);
1185 		lacp_disable_collecting(lp);
1186 		lacp_sm_assert_ntt(lp);
1187 		/* cancel timer */
1188 		if (LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE)) {
1189 			KASSERT(la->la_pending > 0,
1190 			    ("timer_wait_while not active"));
1191 			la->la_pending--;
1192 		}
1193 		LACP_TIMER_DISARM(lp, LACP_TIMER_WAIT_WHILE);
1194 		lacp_unselect(lp);
1195 		break;
1196 	case LACP_MUX_WAITING:
1197 		LACP_TIMER_ARM(lp, LACP_TIMER_WAIT_WHILE,
1198 		    LACP_AGGREGATE_WAIT_TIME);
1199 		la->la_pending++;
1200 		break;
1201 	case LACP_MUX_ATTACHED:
1202 		lp->lp_state |= LACP_STATE_SYNC;
1203 		lacp_disable_collecting(lp);
1204 		lacp_sm_assert_ntt(lp);
1205 		break;
1206 	case LACP_MUX_COLLECTING:
1207 		lacp_enable_collecting(lp);
1208 		lacp_disable_distributing(lp);
1209 		lacp_sm_assert_ntt(lp);
1210 		break;
1211 	case LACP_MUX_DISTRIBUTING:
1212 		lacp_enable_distributing(lp);
1213 		break;
1214 	default:
1215 		panic("%s: unknown state", __func__);
1216 	}
1217 
1218 	LACP_DPRINTF((lp, "mux_state %d -> %d\n", lp->lp_mux_state, new_state));
1219 
1220 	lp->lp_mux_state = new_state;
1221 }
1222 
1223 static void
1224 lacp_sm_mux_timer(struct lacp_port *lp)
1225 {
1226 	struct lacp_aggregator *la = lp->lp_aggregator;
1227 #if defined(LACP_DEBUG)
1228 	char buf[LACP_LAGIDSTR_MAX+1];
1229 #endif
1230 
1231 	KASSERT(la->la_pending > 0, ("no pending event"));
1232 
1233 	LACP_DPRINTF((lp, "%s: aggregator %s, pending %d -> %d\n", __func__,
1234 	    lacp_format_lagid(&la->la_actor, &la->la_partner,
1235 	    buf, sizeof(buf)),
1236 	    la->la_pending, la->la_pending - 1));
1237 
1238 	la->la_pending--;
1239 }
1240 
1241 /* periodic transmit machine */
1242 
1243 static void
1244 lacp_sm_ptx_update_timeout(struct lacp_port *lp, uint8_t oldpstate)
1245 {
1246 	if (LACP_STATE_EQ(oldpstate, lp->lp_partner.lip_state,
1247 	    LACP_STATE_TIMEOUT)) {
1248 		return;
1249 	}
1250 
1251 	LACP_DPRINTF((lp, "partner timeout changed\n"));
1252 
1253 	/*
1254 	 * FAST_PERIODIC -> SLOW_PERIODIC
1255 	 * or
1256 	 * SLOW_PERIODIC (-> PERIODIC_TX) -> FAST_PERIODIC
1257 	 *
1258 	 * let lacp_sm_ptx_tx_schedule to update timeout.
1259 	 */
1260 
1261 	LACP_TIMER_DISARM(lp, LACP_TIMER_PERIODIC);
1262 
1263 	/*
1264 	 * if timeout has been shortened, assert NTT.
1265 	 */
1266 
1267 	if ((lp->lp_partner.lip_state & LACP_STATE_TIMEOUT)) {
1268 		lacp_sm_assert_ntt(lp);
1269 	}
1270 }
1271 
1272 static void
1273 lacp_sm_ptx_tx_schedule(struct lacp_port *lp)
1274 {
1275 	int timeout;
1276 
1277 	if (!(lp->lp_state & LACP_STATE_ACTIVITY) &&
1278 	    !(lp->lp_partner.lip_state & LACP_STATE_ACTIVITY)) {
1279 
1280 		/*
1281 		 * NO_PERIODIC
1282 		 */
1283 
1284 		LACP_TIMER_DISARM(lp, LACP_TIMER_PERIODIC);
1285 		return;
1286 	}
1287 
1288 	if (LACP_TIMER_ISARMED(lp, LACP_TIMER_PERIODIC)) {
1289 		return;
1290 	}
1291 
1292 	timeout = (lp->lp_partner.lip_state & LACP_STATE_TIMEOUT) ?
1293 	    LACP_FAST_PERIODIC_TIME : LACP_SLOW_PERIODIC_TIME;
1294 
1295 	LACP_TIMER_ARM(lp, LACP_TIMER_PERIODIC, timeout);
1296 }
1297 
1298 static void
1299 lacp_sm_ptx_timer(struct lacp_port *lp)
1300 {
1301 	lacp_sm_assert_ntt(lp);
1302 }
1303 
1304 static void
1305 lacp_sm_rx(struct lacp_port *lp, const struct lacpdu *du)
1306 {
1307 	int timeout;
1308 
1309 	/*
1310 	 * check LACP_DISABLED first
1311 	 */
1312 
1313 	if (!(lp->lp_state & LACP_STATE_AGGREGATION)) {
1314 		return;
1315 	}
1316 
1317 	/*
1318 	 * check loopback condition.
1319 	 */
1320 
1321 	if (!lacp_compare_systemid(&du->ldu_actor.lip_systemid,
1322 	    &lp->lp_actor.lip_systemid)) {
1323 		return;
1324 	}
1325 
1326 	/*
1327 	 * EXPIRED, DEFAULTED, CURRENT -> CURRENT
1328 	 */
1329 
1330 	lacp_sm_rx_update_selected(lp, du);
1331 	lacp_sm_rx_update_ntt(lp, du);
1332 	lacp_sm_rx_record_pdu(lp, du);
1333 
1334 	timeout = (lp->lp_state & LACP_STATE_TIMEOUT) ?
1335 	    LACP_SHORT_TIMEOUT_TIME : LACP_LONG_TIMEOUT_TIME;
1336 	LACP_TIMER_ARM(lp, LACP_TIMER_CURRENT_WHILE, timeout);
1337 
1338 	lp->lp_state &= ~LACP_STATE_EXPIRED;
1339 
1340 	/*
1341 	 * kick transmit machine without waiting the next tick.
1342 	 */
1343 
1344 	lacp_sm_tx(lp);
1345 }
1346 
1347 static void
1348 lacp_sm_rx_set_expired(struct lacp_port *lp)
1349 {
1350 	lp->lp_partner.lip_state &= ~LACP_STATE_SYNC;
1351 	lp->lp_partner.lip_state |= LACP_STATE_TIMEOUT;
1352 	LACP_TIMER_ARM(lp, LACP_TIMER_CURRENT_WHILE, LACP_SHORT_TIMEOUT_TIME);
1353 	lp->lp_state |= LACP_STATE_EXPIRED;
1354 }
1355 
1356 static void
1357 lacp_sm_rx_timer(struct lacp_port *lp)
1358 {
1359 	if ((lp->lp_state & LACP_STATE_EXPIRED) == 0) {
1360 		/* CURRENT -> EXPIRED */
1361 		LACP_DPRINTF((lp, "%s: CURRENT -> EXPIRED\n", __func__));
1362 		lacp_sm_rx_set_expired(lp);
1363 	} else {
1364 		/* EXPIRED -> DEFAULTED */
1365 		LACP_DPRINTF((lp, "%s: EXPIRED -> DEFAULTED\n", __func__));
1366 		lacp_sm_rx_update_default_selected(lp);
1367 		lacp_sm_rx_record_default(lp);
1368 		lp->lp_state &= ~LACP_STATE_EXPIRED;
1369 	}
1370 }
1371 
1372 static void
1373 lacp_sm_rx_record_pdu(struct lacp_port *lp, const struct lacpdu *du)
1374 {
1375 	boolean_t active;
1376 	uint8_t oldpstate;
1377 #if defined(LACP_DEBUG)
1378 	char buf[LACP_STATESTR_MAX+1];
1379 #endif
1380 
1381 	/* LACP_DPRINTF((lp, "%s\n", __func__)); */
1382 
1383 	oldpstate = lp->lp_partner.lip_state;
1384 
1385 	active = (du->ldu_actor.lip_state & LACP_STATE_ACTIVITY)
1386 	    || ((lp->lp_state & LACP_STATE_ACTIVITY) &&
1387 	    (du->ldu_partner.lip_state & LACP_STATE_ACTIVITY));
1388 
1389 	lp->lp_partner = du->ldu_actor;
1390 	if (active &&
1391 	    ((LACP_STATE_EQ(lp->lp_state, du->ldu_partner.lip_state,
1392 	    LACP_STATE_AGGREGATION) &&
1393 	    !lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner))
1394 	    || (du->ldu_partner.lip_state & LACP_STATE_AGGREGATION) == 0)) {
1395 		/* XXX nothing? */
1396 	} else {
1397 		lp->lp_partner.lip_state &= ~LACP_STATE_SYNC;
1398 	}
1399 
1400 	lp->lp_state &= ~LACP_STATE_DEFAULTED;
1401 
1402 	if (oldpstate != lp->lp_partner.lip_state) {
1403 		LACP_DPRINTF((lp, "old pstate %s\n",
1404 		    lacp_format_state(oldpstate, buf, sizeof(buf))));
1405 		LACP_DPRINTF((lp, "new pstate %s\n",
1406 		    lacp_format_state(lp->lp_partner.lip_state, buf,
1407 		    sizeof(buf))));
1408 	}
1409 
1410 	lacp_sm_ptx_update_timeout(lp, oldpstate);
1411 }
1412 
1413 static void
1414 lacp_sm_rx_update_ntt(struct lacp_port *lp, const struct lacpdu *du)
1415 {
1416 	/* LACP_DPRINTF((lp, "%s\n", __func__)); */
1417 
1418 	if (lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner) ||
1419 	    !LACP_STATE_EQ(lp->lp_state, du->ldu_partner.lip_state,
1420 	    LACP_STATE_ACTIVITY | LACP_STATE_SYNC | LACP_STATE_AGGREGATION)) {
1421 		LACP_DPRINTF((lp, "%s: assert ntt\n", __func__));
1422 		lacp_sm_assert_ntt(lp);
1423 	}
1424 }
1425 
1426 static void
1427 lacp_sm_rx_record_default(struct lacp_port *lp)
1428 {
1429 	uint8_t oldpstate;
1430 
1431 	/* LACP_DPRINTF((lp, "%s\n", __func__)); */
1432 
1433 	oldpstate = lp->lp_partner.lip_state;
1434 	lp->lp_partner = lacp_partner_admin;
1435 	lp->lp_state |= LACP_STATE_DEFAULTED;
1436 	lacp_sm_ptx_update_timeout(lp, oldpstate);
1437 }
1438 
1439 static void
1440 lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *lp,
1441     const struct lacp_peerinfo *info)
1442 {
1443 	/* LACP_DPRINTF((lp, "%s\n", __func__)); */
1444 
1445 	if (lacp_compare_peerinfo(&lp->lp_partner, info) ||
1446 	    !LACP_STATE_EQ(lp->lp_partner.lip_state, info->lip_state,
1447 	    LACP_STATE_AGGREGATION)) {
1448 		lp->lp_selected = LACP_UNSELECTED;
1449 		/* mux machine will clean up lp->lp_aggregator */
1450 	}
1451 }
1452 
1453 static void
1454 lacp_sm_rx_update_selected(struct lacp_port *lp, const struct lacpdu *du)
1455 {
1456 	/* LACP_DPRINTF((lp, "%s\n", __func__)); */
1457 
1458 	lacp_sm_rx_update_selected_from_peerinfo(lp, &du->ldu_actor);
1459 }
1460 
1461 static void
1462 lacp_sm_rx_update_default_selected(struct lacp_port *lp)
1463 {
1464 	/* LACP_DPRINTF((lp, "%s\n", __func__)); */
1465 
1466 	lacp_sm_rx_update_selected_from_peerinfo(lp, &lacp_partner_admin);
1467 }
1468 
1469 /* transmit machine */
1470 
1471 static void
1472 lacp_sm_tx(struct lacp_port *lp)
1473 {
1474 	int error;
1475 
1476 	if (!(lp->lp_state & LACP_STATE_AGGREGATION)
1477 #if 1
1478 	    || (!(lp->lp_state & LACP_STATE_ACTIVITY)
1479 	    && !(lp->lp_partner.lip_state & LACP_STATE_ACTIVITY))
1480 #endif
1481 	    ) {
1482 		lp->lp_flags &= ~LACP_PORT_NTT;
1483 	}
1484 
1485 	if (!(lp->lp_flags & LACP_PORT_NTT)) {
1486 		return;
1487 	}
1488 
1489 	/* Rate limit to 3 PDUs per LACP_FAST_PERIODIC_TIME */
1490 	if (ppsratecheck(&lp->lp_last_lacpdu, &lp->lp_lacpdu_sent,
1491 		    (3 / LACP_FAST_PERIODIC_TIME)) == 0) {
1492 		LACP_DPRINTF((lp, "rate limited pdu\n"));
1493 		return;
1494 	}
1495 
1496 	error = lacp_xmit_lacpdu(lp);
1497 
1498 	if (error == 0) {
1499 		lp->lp_flags &= ~LACP_PORT_NTT;
1500 	} else {
1501 		LACP_DPRINTF((lp, "lacpdu transmit failure, error %d\n",
1502 		    error));
1503 	}
1504 }
1505 
1506 static void
1507 lacp_sm_assert_ntt(struct lacp_port *lp)
1508 {
1509 
1510 	lp->lp_flags |= LACP_PORT_NTT;
1511 }
1512 
1513 static void
1514 lacp_run_timers(struct lacp_port *lp)
1515 {
1516 	int i;
1517 
1518 	for (i = 0; i < LACP_NTIMER; i++) {
1519 		KASSERT(lp->lp_timer[i] >= 0,
1520 		    ("invalid timer value %d", lp->lp_timer[i]));
1521 		if (lp->lp_timer[i] == 0) {
1522 			continue;
1523 		} else if (--lp->lp_timer[i] <= 0) {
1524 			if (lacp_timer_funcs[i]) {
1525 				(*lacp_timer_funcs[i])(lp);
1526 			}
1527 		}
1528 	}
1529 }
1530 
1531 int
1532 lacp_marker_input(struct lagg_port *lgp, struct mbuf *m)
1533 {
1534 	struct lacp_port *lp = LACP_PORT(lgp);
1535 	struct markerdu *mdu;
1536 	int error = 0;
1537 
1538 	LAGG_LOCK_ASSERT(lgp->lp_lagg);
1539 
1540 	if (__predict_false(lp->lp_flags & LACP_PORT_DETACHING)) {
1541 		goto bad;
1542 	}
1543 
1544 	if (m->m_pkthdr.len != sizeof(*mdu)) {
1545 		goto bad;
1546 	}
1547 
1548 	if ((m->m_flags & M_MCAST) == 0) {
1549 		goto bad;
1550 	}
1551 
1552 	if (m->m_len < sizeof(*mdu)) {
1553 		m = m_pullup(m, sizeof(*mdu));
1554 		if (m == NULL) {
1555 			return (ENOMEM);
1556 		}
1557 	}
1558 
1559 	mdu = mtod(m, struct markerdu *);
1560 
1561 	if (memcmp(&mdu->mdu_eh.ether_dhost,
1562 	    &ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN)) {
1563 		goto bad;
1564 	}
1565 
1566 	/* XXX
1567 	KASSERT(mdu->mdu_sph.sph_subtype == SLOWPROTOCOLS_SUBTYPE_MARKER,
1568 	    ("a very bad kassert!"));
1569 	*/
1570 
1571 	if (mdu->mdu_sph.sph_version != 1) {
1572 		goto bad;
1573 	}
1574 
1575 	switch (mdu->mdu_tlv.tlv_type) {
1576 	case MARKER_TYPE_INFO:
1577 		if (tlv_check(mdu, sizeof(*mdu), &mdu->mdu_tlv,
1578 		    marker_info_tlv_template, TRUE)) {
1579 			goto bad;
1580 		}
1581 		mdu->mdu_tlv.tlv_type = MARKER_TYPE_RESPONSE;
1582 		memcpy(&mdu->mdu_eh.ether_dhost,
1583 		    &ethermulticastaddr_slowprotocols, ETHER_ADDR_LEN);
1584 		memcpy(&mdu->mdu_eh.ether_shost,
1585 		    lgp->lp_lladdr, ETHER_ADDR_LEN);
1586 		error = lagg_enqueue(lp->lp_ifp, m);
1587 		break;
1588 
1589 	case MARKER_TYPE_RESPONSE:
1590 		if (tlv_check(mdu, sizeof(*mdu), &mdu->mdu_tlv,
1591 		    marker_response_tlv_template, TRUE)) {
1592 			goto bad;
1593 		}
1594 		/*
1595 		 * we are not interested in responses as
1596 		 * we don't have a marker sender.
1597 		 */
1598 		/* FALLTHROUGH */
1599 	default:
1600 		goto bad;
1601 	}
1602 
1603 	return (error);
1604 
1605 bad:
1606 	m_freem(m);
1607 	return (EINVAL);
1608 }
1609 
1610 static int
1611 tlv_check(const void *p, size_t size, const struct tlvhdr *tlv,
1612     const struct tlv_template *tmpl, boolean_t check_type)
1613 {
1614 	while (/* CONSTCOND */ 1) {
1615 		if ((const char *)tlv - (const char *)p + sizeof(*tlv) > size) {
1616 			return (EINVAL);
1617 		}
1618 		if ((check_type && tlv->tlv_type != tmpl->tmpl_type) ||
1619 		    tlv->tlv_length != tmpl->tmpl_length) {
1620 			return (EINVAL);
1621 		}
1622 		if (tmpl->tmpl_type == 0) {
1623 			break;
1624 		}
1625 		tlv = (const struct tlvhdr *)
1626 		    ((const char *)tlv + tlv->tlv_length);
1627 		tmpl++;
1628 	}
1629 
1630 	return (0);
1631 }
1632 
1633 #if defined(LACP_DEBUG)
1634 const char *
1635 lacp_format_mac(const uint8_t *mac, char *buf, size_t buflen)
1636 {
1637 	snprintf(buf, buflen, "%02X-%02X-%02X-%02X-%02X-%02X",
1638 	    (int)mac[0],
1639 	    (int)mac[1],
1640 	    (int)mac[2],
1641 	    (int)mac[3],
1642 	    (int)mac[4],
1643 	    (int)mac[5]);
1644 
1645 	return (buf);
1646 }
1647 
1648 const char *
1649 lacp_format_systemid(const struct lacp_systemid *sysid,
1650     char *buf, size_t buflen)
1651 {
1652 	char macbuf[LACP_MACSTR_MAX+1];
1653 
1654 	snprintf(buf, buflen, "%04X,%s",
1655 	    ntohs(sysid->lsi_prio),
1656 	    lacp_format_mac(sysid->lsi_mac, macbuf, sizeof(macbuf)));
1657 
1658 	return (buf);
1659 }
1660 
1661 const char *
1662 lacp_format_portid(const struct lacp_portid *portid, char *buf, size_t buflen)
1663 {
1664 	snprintf(buf, buflen, "%04X,%04X",
1665 	    ntohs(portid->lpi_prio),
1666 	    ntohs(portid->lpi_portno));
1667 
1668 	return (buf);
1669 }
1670 
1671 const char *
1672 lacp_format_partner(const struct lacp_peerinfo *peer, char *buf, size_t buflen)
1673 {
1674 	char sysid[LACP_SYSTEMIDSTR_MAX+1];
1675 	char portid[LACP_PORTIDSTR_MAX+1];
1676 
1677 	snprintf(buf, buflen, "(%s,%04X,%s)",
1678 	    lacp_format_systemid(&peer->lip_systemid, sysid, sizeof(sysid)),
1679 	    ntohs(peer->lip_key),
1680 	    lacp_format_portid(&peer->lip_portid, portid, sizeof(portid)));
1681 
1682 	return (buf);
1683 }
1684 
1685 const char *
1686 lacp_format_lagid(const struct lacp_peerinfo *a,
1687     const struct lacp_peerinfo *b, char *buf, size_t buflen)
1688 {
1689 	char astr[LACP_PARTNERSTR_MAX+1];
1690 	char bstr[LACP_PARTNERSTR_MAX+1];
1691 
1692 #if 0
1693 	/*
1694 	 * there's a convention to display small numbered peer
1695 	 * in the left.
1696 	 */
1697 
1698 	if (lacp_compare_peerinfo(a, b) > 0) {
1699 		const struct lacp_peerinfo *t;
1700 
1701 		t = a;
1702 		a = b;
1703 		b = t;
1704 	}
1705 #endif
1706 
1707 	snprintf(buf, buflen, "[%s,%s]",
1708 	    lacp_format_partner(a, astr, sizeof(astr)),
1709 	    lacp_format_partner(b, bstr, sizeof(bstr)));
1710 
1711 	return (buf);
1712 }
1713 
1714 const char *
1715 lacp_format_lagid_aggregator(const struct lacp_aggregator *la,
1716     char *buf, size_t buflen)
1717 {
1718 	if (la == NULL) {
1719 		return ("(none)");
1720 	}
1721 
1722 	return (lacp_format_lagid(&la->la_actor, &la->la_partner, buf, buflen));
1723 }
1724 
1725 const char *
1726 lacp_format_state(uint8_t state, char *buf, size_t buflen)
1727 {
1728 	snprintf(buf, buflen, "%b", state, LACP_STATE_BITS);
1729 	return (buf);
1730 }
1731 
1732 static void
1733 lacp_dump_lacpdu(const struct lacpdu *du)
1734 {
1735 	char buf[LACP_PARTNERSTR_MAX+1];
1736 	char buf2[LACP_STATESTR_MAX+1];
1737 
1738 	printf("actor=%s\n",
1739 	    lacp_format_partner(&du->ldu_actor, buf, sizeof(buf)));
1740 	printf("actor.state=%s\n",
1741 	    lacp_format_state(du->ldu_actor.lip_state, buf2, sizeof(buf2)));
1742 	printf("partner=%s\n",
1743 	    lacp_format_partner(&du->ldu_partner, buf, sizeof(buf)));
1744 	printf("partner.state=%s\n",
1745 	    lacp_format_state(du->ldu_partner.lip_state, buf2, sizeof(buf2)));
1746 
1747 	printf("maxdelay=%d\n", ntohs(du->ldu_collector.lci_maxdelay));
1748 }
1749 
1750 static void
1751 lacp_dprintf(const struct lacp_port *lp, const char *fmt, ...)
1752 {
1753 	va_list va;
1754 
1755 	if (lp) {
1756 		printf("%s: ", lp->lp_ifp->if_xname);
1757 	}
1758 
1759 	va_start(va, fmt);
1760 	vprintf(fmt, va);
1761 	va_end(va);
1762 }
1763 #endif
1764