xref: /titanic_44/usr/src/uts/common/io/aggr/aggr_lacp.c (revision b9bc7f7832704fda46b4d6b04f3f7be1227dc644)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * IEEE 802.3ad Link Aggregation - LACP & Marker Protocol processing.
30  */
31 
32 #include <sys/types.h>
33 #include <sys/sysmacros.h>
34 #include <sys/conf.h>
35 #include <sys/cmn_err.h>
36 #include <sys/list.h>
37 #include <sys/ksynch.h>
38 #include <sys/kmem.h>
39 #include <sys/stream.h>
40 #include <sys/modctl.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/atomic.h>
44 #include <sys/stat.h>
45 #include <sys/byteorder.h>
46 #include <sys/strsun.h>
47 #include <sys/isa_defs.h>
48 
49 #include <sys/aggr.h>
50 #include <sys/aggr_impl.h>
51 
52 static struct ether_addr	etherzeroaddr = {
53 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00
54 };
55 
56 /*
57  * Slow_Protocol_Multicast address, as per IEEE 802.3ad spec.
58  */
59 static struct ether_addr   slow_multicast_addr = {
60 	0x01, 0x80, 0xc2, 0x00, 0x00, 0x02
61 };
62 
63 #ifdef DEBUG
64 /* LACP state machine debugging support */
65 static uint32_t aggr_lacp_debug = 0;
66 #define	AGGR_LACP_DBG(x)	if (aggr_lacp_debug) { (void) printf x; }
67 #else
68 #define	AGGR_LACP_DBG(x)	{}
69 #endif /* DEBUG */
70 
71 #define	NSECS_PER_SEC   1000000000ll
72 
73 /* used by lacp_misconfig_walker() */
74 typedef struct lacp_misconfig_check_state_s {
75 	aggr_port_t *cs_portp;
76 	boolean_t cs_found;
77 } lacp_misconfig_check_state_t;
78 
79 static const char *lacp_receive_str[] = LACP_RECEIVE_STATE_STRINGS;
80 static const char *lacp_periodic_str[] = LACP_PERIODIC_STRINGS;
81 static const char *lacp_mux_str[] = LACP_MUX_STRINGS;
82 
83 static uint16_t lacp_port_priority = 0x1000;
84 static uint16_t lacp_system_priority = 0x1000;
85 
86 /*
87  * Maintains a list of all ports in ATTACHED state. This information
88  * is used to detect misconfiguration.
89  */
90 typedef struct lacp_sel_ports {
91 	uint16_t sp_key;
92 	/* Note: sp_partner_system must be 2-byte aligned */
93 	struct ether_addr sp_partner_system;
94 	char sp_devname[MAXNAMELEN + 1];
95 	uint32_t sp_partner_key;
96 	struct lacp_sel_ports *sp_next;
97 } lacp_sel_ports_t;
98 
99 static lacp_sel_ports_t *sel_ports = NULL;
100 static kmutex_t lacp_sel_lock;
101 
102 static void periodic_timer_pop_locked(aggr_port_t *);
103 static void periodic_timer_pop(void *);
104 static void lacp_xmit_sm(aggr_port_t *);
105 static void lacp_periodic_sm(aggr_port_t *);
106 static void fill_lacp_pdu(aggr_port_t *, lacp_t *);
107 static void fill_lacp_ether(aggr_port_t *, struct ether_header *);
108 static void lacp_on(aggr_port_t *);
109 static void lacp_off(aggr_port_t *);
110 static boolean_t valid_lacp_pdu(aggr_port_t *, lacp_t *);
111 static void lacp_receive_sm(aggr_port_t *, lacp_t *);
112 static void aggr_set_coll_dist(aggr_port_t *, boolean_t);
113 static void aggr_set_coll_dist_locked(aggr_port_t *, boolean_t);
114 static void start_wait_while_timer(aggr_port_t *);
115 static void stop_wait_while_timer(aggr_port_t *);
116 static void lacp_reset_port(aggr_port_t *);
117 static void stop_current_while_timer(aggr_port_t *);
118 static void current_while_timer_pop(void *);
119 static void update_default_selected(aggr_port_t *);
120 static boolean_t update_selected(aggr_port_t *, lacp_t *);
121 static boolean_t lacp_sel_ports_add(aggr_port_t *);
122 static void lacp_sel_ports_del(aggr_port_t *);
123 
124 void
125 aggr_lacp_init(void)
126 {
127 	mutex_init(&lacp_sel_lock, NULL, MUTEX_DEFAULT, NULL);
128 }
129 
130 void
131 aggr_lacp_fini(void)
132 {
133 	mutex_destroy(&lacp_sel_lock);
134 }
135 
136 /*
137  * Set the port LACP state to SELECTED. Returns B_FALSE if the operation
138  * could not be performed due to a memory allocation error, B_TRUE otherwise.
139  */
140 static boolean_t
141 lacp_port_select(aggr_port_t *portp)
142 {
143 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
144 
145 	if (!lacp_sel_ports_add(portp))
146 		return (B_FALSE);
147 	portp->lp_lacp.sm.selected = AGGR_SELECTED;
148 	return (B_TRUE);
149 }
150 
151 /*
152  * Set the port LACP state to UNSELECTED.
153  */
154 static void
155 lacp_port_unselect(aggr_port_t *portp)
156 {
157 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
158 
159 	lacp_sel_ports_del(portp);
160 	portp->lp_lacp.sm.selected = AGGR_UNSELECTED;
161 }
162 
163 /*
164  * Initialize group specific LACP state and parameters.
165  */
166 void
167 aggr_lacp_init_grp(aggr_grp_t *aggrp)
168 {
169 	aggrp->aggr.PeriodicTimer = AGGR_LACP_TIMER_SHORT;
170 	aggrp->aggr.ActorSystemPriority = (uint16_t)lacp_system_priority;
171 	aggrp->aggr.CollectorMaxDelay = 10;
172 	aggrp->lg_lacp_mode = AGGR_LACP_OFF;
173 	aggrp->aggr.ready = B_FALSE;
174 }
175 
176 /*
177  * Complete LACP info initialization at port creation time.
178  */
179 void
180 aggr_lacp_init_port(aggr_port_t *portp)
181 {
182 	aggr_grp_t *aggrp = portp->lp_grp;
183 	aggr_lacp_port_t *pl = &portp->lp_lacp;
184 
185 	ASSERT(AGGR_LACP_LOCK_HELD(aggrp));
186 	ASSERT(RW_LOCK_HELD(&aggrp->lg_lock));
187 	ASSERT(RW_LOCK_HELD(&portp->lp_lock));
188 
189 	/* actor port # */
190 	pl->ActorPortNumber = portp->lp_portid;
191 	AGGR_LACP_DBG(("aggr_lacp_init_port(%s): "
192 	    "ActorPortNumber = 0x%x\n", portp->lp_devname,
193 	    pl->ActorPortNumber));
194 
195 	pl->ActorPortPriority = (uint16_t)lacp_port_priority;
196 	pl->ActorPortAggrId = 0;	/* aggregator id - not used */
197 	pl->NTT = B_FALSE;			/* need to transmit */
198 
199 	pl->ActorAdminPortKey = aggrp->lg_key;
200 	pl->ActorOperPortKey = pl->ActorAdminPortKey;
201 	AGGR_LACP_DBG(("aggr_lacp_init_port(%s) "
202 	    "ActorAdminPortKey = 0x%x, ActorAdminPortKey = 0x%x\n",
203 	    portp->lp_devname, pl->ActorAdminPortKey, pl->ActorOperPortKey));
204 
205 	/* Actor admin. port state */
206 	pl->ActorAdminPortState.bit.activity = B_FALSE;
207 	pl->ActorAdminPortState.bit.timeout = B_TRUE;
208 	pl->ActorAdminPortState.bit.aggregation = B_TRUE;
209 	pl->ActorAdminPortState.bit.sync = B_FALSE;
210 	pl->ActorAdminPortState.bit.collecting = B_FALSE;
211 	pl->ActorAdminPortState.bit.distributing = B_FALSE;
212 	pl->ActorAdminPortState.bit.defaulted = B_FALSE;
213 	pl->ActorAdminPortState.bit.expired = B_FALSE;
214 	pl->ActorOperPortState = pl->ActorAdminPortState;
215 
216 	/*
217 	 * Partner Administrative Information
218 	 * (All initialized to zero except for the following)
219 	 * Fast Timeouts.
220 	 */
221 	pl->PartnerAdminPortState.bit.timeout =
222 	    pl->PartnerOperPortState.bit.timeout = B_TRUE;
223 
224 	pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */
225 
226 	/*
227 	 * State machine information.
228 	 */
229 	pl->sm.lacp_on = B_FALSE;		/* LACP Off default */
230 	pl->sm.begin = B_TRUE;		/* Prevents transmissions */
231 	pl->sm.lacp_enabled = B_FALSE;
232 	pl->sm.port_enabled = B_FALSE;		/* Link Down */
233 	pl->sm.actor_churn = B_FALSE;
234 	pl->sm.partner_churn = B_FALSE;
235 	pl->sm.ready_n = B_FALSE;
236 	pl->sm.port_moved = B_FALSE;
237 
238 	lacp_port_unselect(portp);
239 
240 	pl->sm.periodic_state = LACP_NO_PERIODIC;
241 	pl->sm.receive_state = LACP_INITIALIZE;
242 	pl->sm.mux_state = LACP_DETACHED;
243 	pl->sm.churn_state = LACP_NO_ACTOR_CHURN;
244 
245 	/*
246 	 * Timer information.
247 	 */
248 	pl->current_while_timer.id = 0;
249 	pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
250 
251 	pl->periodic_timer.id = 0;
252 	pl->periodic_timer.val = FAST_PERIODIC_TIME;
253 
254 	pl->wait_while_timer.id = 0;
255 	pl->wait_while_timer.val = AGGREGATE_WAIT_TIME;
256 }
257 
258 /*
259  * Port initialization when we need to
260  * turn LACP on/off, etc. Not everything is
261  * reset like in the above routine.
262  *		Do NOT modify things like link status.
263  */
264 static void
265 lacp_reset_port(aggr_port_t *portp)
266 {
267 	aggr_lacp_port_t *pl = &portp->lp_lacp;
268 
269 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
270 
271 	pl->NTT = B_FALSE;			/* need to transmit */
272 
273 	/* reset operational port state */
274 	pl->ActorOperPortState.bit.timeout =
275 	    pl->ActorAdminPortState.bit.timeout;
276 
277 	pl->ActorOperPortState.bit.sync = B_FALSE;
278 	pl->ActorOperPortState.bit.collecting = B_FALSE;
279 	pl->ActorOperPortState.bit.distributing = B_FALSE;
280 	pl->ActorOperPortState.bit.defaulted = B_TRUE;
281 	pl->ActorOperPortState.bit.expired = B_FALSE;
282 
283 	pl->PartnerOperPortState.bit.timeout = B_TRUE;	/* fast t/o */
284 	pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */
285 
286 	/*
287 	 * State machine information.
288 	 */
289 	pl->sm.begin = B_TRUE;		/* Prevents transmissions */
290 	pl->sm.actor_churn = B_FALSE;
291 	pl->sm.partner_churn = B_FALSE;
292 	pl->sm.ready_n = B_FALSE;
293 
294 	lacp_port_unselect(portp);
295 
296 	pl->sm.periodic_state = LACP_NO_PERIODIC;
297 	pl->sm.receive_state = LACP_INITIALIZE;
298 	pl->sm.mux_state = LACP_DETACHED;
299 	pl->sm.churn_state = LACP_NO_ACTOR_CHURN;
300 
301 	/*
302 	 * Timer information.
303 	 */
304 	pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
305 	pl->periodic_timer.val = FAST_PERIODIC_TIME;
306 }
307 
308 static void
309 aggr_lacp_mcast_on(aggr_port_t *port)
310 {
311 	ASSERT(AGGR_LACP_LOCK_HELD(port->lp_grp));
312 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
313 
314 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
315 		return;
316 
317 	(void) aggr_port_multicst(port, B_TRUE,
318 	    (uchar_t *)&slow_multicast_addr);
319 }
320 
321 static void
322 aggr_lacp_mcast_off(aggr_port_t *port)
323 {
324 	ASSERT(AGGR_LACP_LOCK_HELD(port->lp_grp));
325 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
326 
327 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
328 		return;
329 
330 	(void) aggr_port_multicst(port, B_FALSE,
331 	    (uchar_t *)&slow_multicast_addr);
332 }
333 
334 static void
335 start_periodic_timer(aggr_port_t *portp)
336 {
337 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
338 
339 	if (portp->lp_lacp.periodic_timer.id == 0) {
340 		portp->lp_lacp.periodic_timer.id =
341 		    timeout(periodic_timer_pop, portp,
342 		    drv_usectohz(1000000 * portp->lp_lacp.periodic_timer.val));
343 	}
344 }
345 
346 static void
347 stop_periodic_timer(aggr_port_t *portp)
348 {
349 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
350 
351 	if (portp->lp_lacp.periodic_timer.id != 0) {
352 		AGGR_LACP_UNLOCK(portp->lp_grp);
353 		(void) untimeout(portp->lp_lacp.periodic_timer.id);
354 		AGGR_LACP_LOCK(portp->lp_grp);
355 		portp->lp_lacp.periodic_timer.id = 0;
356 	}
357 }
358 
359 /*
360  * When the timer pops, we arrive here to
361  * clear out LACPDU count as well as transmit an
362  * LACPDU. We then set the periodic state and let
363  * the periodic state machine restart the timer.
364  */
365 
366 static void
367 periodic_timer_pop_locked(aggr_port_t *portp)
368 {
369 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
370 
371 	portp->lp_lacp.periodic_timer.id = NULL;
372 	portp->lp_lacp_stats.LACPDUsTx = 0;
373 
374 	/* current timestamp */
375 	portp->lp_lacp.time = gethrtime();
376 	portp->lp_lacp.NTT = B_TRUE;
377 	lacp_xmit_sm(portp);
378 
379 	/*
380 	 * Set Periodic State machine state based on the
381 	 * value of the Partner Operation Port State timeout
382 	 * bit.
383 	 */
384 	if (portp->lp_lacp.PartnerOperPortState.bit.timeout) {
385 		portp->lp_lacp.periodic_timer.val = FAST_PERIODIC_TIME;
386 		portp->lp_lacp.sm.periodic_state = LACP_FAST_PERIODIC;
387 	} else {
388 		portp->lp_lacp.periodic_timer.val = SLOW_PERIODIC_TIME;
389 		portp->lp_lacp.sm.periodic_state = LACP_SLOW_PERIODIC;
390 	}
391 
392 	lacp_periodic_sm(portp);
393 }
394 
395 static void
396 periodic_timer_pop(void *data)
397 {
398 	aggr_port_t *portp = data;
399 
400 	if (portp->lp_closing)
401 		return;
402 
403 	AGGR_LACP_LOCK(portp->lp_grp);
404 	periodic_timer_pop_locked(portp);
405 	AGGR_LACP_UNLOCK(portp->lp_grp);
406 }
407 
408 /*
409  * Invoked from:
410  *	- startup upon aggregation
411  *	- when the periodic timer pops
412  *	- when the periodic timer value is changed
413  *	- when the port is attached or detached
414  *	- when LACP mode is changed.
415  */
416 static void
417 lacp_periodic_sm(aggr_port_t *portp)
418 {
419 	lacp_periodic_state_t oldstate = portp->lp_lacp.sm.periodic_state;
420 	aggr_lacp_port_t *pl = &portp->lp_lacp;
421 
422 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
423 
424 	/* LACP_OFF state not in specification so check here.  */
425 	if (!pl->sm.lacp_on) {
426 		/* Stop timer whether it is running or not */
427 		stop_periodic_timer(portp);
428 		pl->sm.periodic_state = LACP_NO_PERIODIC;
429 		pl->NTT = B_FALSE;
430 		AGGR_LACP_DBG(("lacp_periodic_sm(%s):NO LACP "
431 		    "%s--->%s\n", portp->lp_devname,
432 		    lacp_periodic_str[oldstate],
433 		    lacp_periodic_str[pl->sm.periodic_state]));
434 		return;
435 	}
436 
437 	if (pl->sm.begin || !pl->sm.lacp_enabled ||
438 	    !pl->sm.port_enabled ||
439 	    !pl->ActorOperPortState.bit.activity &&
440 	    !pl->PartnerOperPortState.bit.activity) {
441 
442 		/* Stop timer whether it is running or not */
443 		stop_periodic_timer(portp);
444 		pl->sm.periodic_state = LACP_NO_PERIODIC;
445 		pl->NTT = B_FALSE;
446 		AGGR_LACP_DBG(("lacp_periodic_sm(%s):STOP %s--->%s\n",
447 		    portp->lp_devname, lacp_periodic_str[oldstate],
448 		    lacp_periodic_str[pl->sm.periodic_state]));
449 		return;
450 	}
451 
452 	/*
453 	 * Startup with FAST_PERIODIC_TIME if no previous LACPDU
454 	 * has been received. Then after we timeout, then it is
455 	 * possible to go to SLOW_PERIODIC_TIME.
456 	 */
457 	if (pl->sm.periodic_state == LACP_NO_PERIODIC) {
458 		pl->periodic_timer.val = FAST_PERIODIC_TIME;
459 		pl->sm.periodic_state = LACP_FAST_PERIODIC;
460 	} else if ((pl->sm.periodic_state == LACP_SLOW_PERIODIC) &&
461 	    pl->PartnerOperPortState.bit.timeout) {
462 		/*
463 		 * If we receive a bit indicating we are going to
464 		 * fast periodic from slow periodic, stop the timer
465 		 * and let the periodic_timer_pop routine deal
466 		 * with reseting the periodic state and transmitting
467 		 * a LACPDU.
468 		 */
469 		stop_periodic_timer(portp);
470 		periodic_timer_pop_locked(portp);
471 	}
472 
473 	/* Rearm timer with value provided by partner */
474 	start_periodic_timer(portp);
475 }
476 
477 /*
478  * This routine transmits an LACPDU if lacp_enabled
479  * is TRUE and if NTT is set.
480  */
481 static void
482 lacp_xmit_sm(aggr_port_t *portp)
483 {
484 	aggr_lacp_port_t *pl = &portp->lp_lacp;
485 	size_t	len;
486 	mblk_t  *mp;
487 	hrtime_t now, elapsed;
488 	const mac_txinfo_t *mtp;
489 
490 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
491 
492 	/* LACP_OFF state not in specification so check here.  */
493 	if (!pl->sm.lacp_on || !pl->NTT || !portp->lp_started)
494 		return;
495 
496 	/*
497 	 * Do nothing if LACP has been turned off or if the
498 	 * periodic state machine is not enabled.
499 	 */
500 	if ((pl->sm.periodic_state == LACP_NO_PERIODIC) ||
501 	    !pl->sm.lacp_enabled || pl->sm.begin) {
502 		pl->NTT = B_FALSE;
503 		return;
504 	}
505 
506 	/*
507 	 * If we have sent 5 Slow packets in the last second, avoid
508 	 * sending any more here. No more than three LACPDUs may be transmitted
509 	 * in any Fast_Periodic_Time interval.
510 	 */
511 	if (portp->lp_lacp_stats.LACPDUsTx >= 3) {
512 		/*
513 		 * Grab the current time value and see if
514 		 * more than 1 second has passed. If so,
515 		 * reset the timestamp and clear the count.
516 		 */
517 		now = gethrtime();
518 		elapsed = now - pl->time;
519 		if (elapsed > NSECS_PER_SEC) {
520 			portp->lp_lacp_stats.LACPDUsTx = 0;
521 			pl->time = now;
522 		} else {
523 			return;
524 		}
525 	}
526 
527 	len = sizeof (lacp_t) + sizeof (struct ether_header);
528 	mp = allocb(len, BPRI_MED);
529 	if (mp == NULL)
530 		return;
531 
532 	mp->b_wptr = mp->b_rptr + len;
533 	bzero(mp->b_rptr, len);
534 
535 	fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr);
536 	fill_lacp_pdu(portp,
537 	    (lacp_t *)(mp->b_rptr + sizeof (struct ether_header)));
538 
539 	/*
540 	 * Store the transmit info pointer locally in case it changes between
541 	 * loading mt_fn and mt_arg.
542 	 */
543 	mtp = portp->lp_txinfo;
544 	mtp->mt_fn(mtp->mt_arg, mp);
545 
546 	pl->NTT = B_FALSE;
547 	portp->lp_lacp_stats.LACPDUsTx++;
548 }
549 
550 /*
551  * Initialize the ethernet header of a LACP packet sent from the specified
552  * port.
553  */
554 static void
555 fill_lacp_ether(aggr_port_t *port, struct ether_header *ether)
556 {
557 	bcopy(port->lp_addr, (uint8_t *)&(ether->ether_shost), ETHERADDRL);
558 	bcopy(&slow_multicast_addr, (uint8_t *)&(ether->ether_dhost),
559 	    ETHERADDRL);
560 	ether->ether_type = htons(ETHERTYPE_SLOW);
561 }
562 
563 static void
564 fill_lacp_pdu(aggr_port_t *portp, lacp_t *lacp)
565 {
566 	aggr_lacp_port_t *pl = &portp->lp_lacp;
567 	aggr_grp_t *aggrp = portp->lp_grp;
568 
569 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
570 
571 	lacp->subtype = LACP_SUBTYPE;
572 	lacp->version = LACP_VERSION;
573 
574 	rw_enter(&aggrp->lg_lock, RW_READER);
575 	rw_enter(&portp->lp_lock, RW_READER);
576 
577 	/*
578 	 * Actor Information
579 	 */
580 	lacp->actor_info.tlv_type = ACTOR_TLV;
581 	lacp->actor_info.information_len = sizeof (link_info_t);
582 	lacp->actor_info.system_priority =
583 	    htons(aggrp->aggr.ActorSystemPriority);
584 	bcopy(aggrp->lg_addr, (uchar_t *)&lacp->actor_info.system_id,
585 	    ETHERADDRL);
586 	lacp->actor_info.key = htons(pl->ActorOperPortKey);
587 	lacp->actor_info.port_priority = htons(pl->ActorPortPriority);
588 	lacp->actor_info.port = htons(pl->ActorPortNumber);
589 	lacp->actor_info.state.state = pl->ActorOperPortState.state;
590 
591 	/*
592 	 * Partner Information
593 	 */
594 	lacp->partner_info.tlv_type = PARTNER_TLV;
595 	lacp->partner_info.information_len = sizeof (link_info_t);
596 	lacp->partner_info.system_priority =
597 	    htons(pl->PartnerOperSysPriority);
598 	lacp->partner_info.system_id = pl->PartnerOperSystem;
599 	lacp->partner_info.key = htons(pl->PartnerOperKey);
600 	lacp->partner_info.port_priority =
601 	    htons(pl->PartnerOperPortPriority);
602 	lacp->partner_info.port = htons(pl->PartnerOperPortNum);
603 	lacp->partner_info.state.state = pl->PartnerOperPortState.state;
604 
605 	/* Collector Information */
606 	lacp->tlv_collector = COLLECTOR_TLV;
607 	lacp->collector_len = 0x10;
608 	lacp->collector_max_delay = htons(aggrp->aggr.CollectorMaxDelay);
609 
610 	/* Termination Information */
611 	lacp->tlv_terminator = TERMINATOR_TLV;
612 	lacp->terminator_len = 0x0;
613 
614 	rw_exit(&portp->lp_lock);
615 	rw_exit(&aggrp->lg_lock);
616 }
617 
618 /*
619  * lacp_mux_sm - LACP mux state machine
620  *		This state machine is invoked from:
621  *			- startup upon aggregation
622  *			- from the Selection logic
623  *			- when the wait_while_timer pops
624  *			- when the aggregation MAC address is changed
625  *			- when receiving DL_NOTE_LINK_UP/DOWN
626  *			- when receiving DL_NOTE_AGGR_AVAIL/UNAVAIL
627  *			- when LACP mode is changed.
628  *			- when a DL_NOTE_SPEED is received
629  */
630 static void
631 lacp_mux_sm(aggr_port_t *portp)
632 {
633 	aggr_grp_t *aggrp = portp->lp_grp;
634 	boolean_t NTT_updated = B_FALSE;
635 	aggr_lacp_port_t *pl = &portp->lp_lacp;
636 	lacp_mux_state_t oldstate = pl->sm.mux_state;
637 
638 	ASSERT(AGGR_LACP_LOCK_HELD(aggrp));
639 
640 	/* LACP_OFF state not in specification so check here.  */
641 	if (!pl->sm.lacp_on) {
642 		pl->sm.mux_state = LACP_DETACHED;
643 		pl->ActorOperPortState.bit.sync = B_FALSE;
644 
645 		if (pl->ActorOperPortState.bit.collecting ||
646 		    pl->ActorOperPortState.bit.distributing) {
647 			AGGR_LACP_DBG(("trunk link: (%s): "
648 			    "Collector_Distributor Disabled.\n",
649 			    portp->lp_devname));
650 		}
651 
652 		pl->ActorOperPortState.bit.collecting =
653 		    pl->ActorOperPortState.bit.distributing = B_FALSE;
654 		return;
655 	}
656 
657 	if (pl->sm.begin || !pl->sm.lacp_enabled)
658 		pl->sm.mux_state = LACP_DETACHED;
659 
660 again:
661 	/* determine next state, or return if state unchanged */
662 	switch (pl->sm.mux_state) {
663 	case LACP_DETACHED:
664 		if (pl->sm.begin) {
665 			break;
666 		}
667 
668 		if ((pl->sm.selected == AGGR_SELECTED) ||
669 		    (pl->sm.selected == AGGR_STANDBY)) {
670 			pl->sm.mux_state = LACP_WAITING;
671 			break;
672 		}
673 		return;
674 
675 	case LACP_WAITING:
676 		if (pl->sm.selected == AGGR_UNSELECTED) {
677 			pl->sm.mux_state = LACP_DETACHED;
678 			break;
679 		}
680 
681 		if ((pl->sm.selected == AGGR_SELECTED) && aggrp->aggr.ready) {
682 			pl->sm.mux_state = LACP_ATTACHED;
683 			break;
684 		}
685 		return;
686 
687 	case LACP_ATTACHED:
688 		if ((pl->sm.selected == AGGR_UNSELECTED) ||
689 		    (pl->sm.selected == AGGR_STANDBY)) {
690 			pl->sm.mux_state = LACP_DETACHED;
691 			break;
692 		}
693 
694 		if ((pl->sm.selected == AGGR_SELECTED) &&
695 		    pl->PartnerOperPortState.bit.sync) {
696 			pl->sm.mux_state = LACP_COLLECTING_DISTRIBUTING;
697 			break;
698 		}
699 		return;
700 
701 	case LACP_COLLECTING_DISTRIBUTING:
702 		if ((pl->sm.selected == AGGR_UNSELECTED) ||
703 		    (pl->sm.selected == AGGR_STANDBY) ||
704 		    !pl->PartnerOperPortState.bit.sync) {
705 			pl->sm.mux_state = LACP_ATTACHED;
706 			break;
707 		}
708 		return;
709 	}
710 
711 	AGGR_LACP_DBG(("lacp_mux_sm(%s):%s--->%s\n",
712 	    portp->lp_devname, lacp_mux_str[oldstate],
713 	    lacp_mux_str[pl->sm.mux_state]));
714 
715 	/* perform actions on entering a new state */
716 	switch (pl->sm.mux_state) {
717 	case LACP_DETACHED:
718 		if (pl->ActorOperPortState.bit.collecting ||
719 		    pl->ActorOperPortState.bit.distributing) {
720 			AGGR_LACP_DBG(("trunk link: (%s): "
721 			    "Collector_Distributor Disabled.\n",
722 			    portp->lp_devname));
723 		}
724 
725 		pl->ActorOperPortState.bit.sync =
726 		    pl->ActorOperPortState.bit.collecting = B_FALSE;
727 
728 		/* Turn OFF Collector_Distributor */
729 		aggr_set_coll_dist(portp, B_FALSE);
730 
731 		pl->ActorOperPortState.bit.distributing = B_FALSE;
732 		NTT_updated = B_TRUE;
733 		break;
734 
735 	case LACP_WAITING:
736 		start_wait_while_timer(portp);
737 		break;
738 
739 	case LACP_ATTACHED:
740 		if (pl->ActorOperPortState.bit.collecting ||
741 		    pl->ActorOperPortState.bit.distributing) {
742 			AGGR_LACP_DBG(("trunk link: (%s): "
743 			    "Collector_Distributor Disabled.\n",
744 			    portp->lp_devname));
745 		}
746 
747 		pl->ActorOperPortState.bit.sync = B_TRUE;
748 		pl->ActorOperPortState.bit.collecting = B_FALSE;
749 
750 		/* Turn OFF Collector_Distributor */
751 		aggr_set_coll_dist(portp, B_FALSE);
752 
753 		pl->ActorOperPortState.bit.distributing = B_FALSE;
754 		NTT_updated = B_TRUE;
755 		if (pl->PartnerOperPortState.bit.sync) {
756 			/*
757 			 * We had already received an updated sync from
758 			 * the partner. Attempt to transition to
759 			 * collecting/distributing now.
760 			 */
761 			goto again;
762 		}
763 		break;
764 
765 	case LACP_COLLECTING_DISTRIBUTING:
766 		if (!pl->ActorOperPortState.bit.collecting &&
767 		    !pl->ActorOperPortState.bit.distributing) {
768 			AGGR_LACP_DBG(("trunk link: (%s): "
769 			    "Collector_Distributor Enabled.\n",
770 			    portp->lp_devname));
771 		}
772 		pl->ActorOperPortState.bit.distributing = B_TRUE;
773 
774 		/* Turn Collector_Distributor back ON */
775 		aggr_set_coll_dist(portp, B_TRUE);
776 
777 		pl->ActorOperPortState.bit.collecting = B_TRUE;
778 		NTT_updated = B_TRUE;
779 		break;
780 	}
781 
782 	/*
783 	 * If we updated the state of the NTT variable, then
784 	 * initiate a LACPDU transmission.
785 	 */
786 	if (NTT_updated) {
787 		pl->NTT = B_TRUE;
788 		lacp_xmit_sm(portp);
789 	}
790 } /* lacp_mux_sm */
791 
792 
793 static void
794 receive_marker_pdu(aggr_port_t *portp, mblk_t *mp)
795 {
796 	marker_pdu_t		*markerp = (marker_pdu_t *)mp->b_rptr;
797 	const mac_txinfo_t	*mtp;
798 
799 	AGGR_LACP_LOCK(portp->lp_grp);
800 
801 	AGGR_LACP_DBG(("trunk link: (%s): MARKER PDU received:\n",
802 	    portp->lp_devname));
803 
804 	/* LACP_OFF state not in specification so check here.  */
805 	if (!portp->lp_lacp.sm.lacp_on)
806 		goto bail;
807 
808 	if (MBLKL(mp) < sizeof (marker_pdu_t))
809 		goto bail;
810 
811 	if (markerp->version != MARKER_VERSION) {
812 		AGGR_LACP_DBG(("trunk link (%s): Malformed MARKER PDU: "
813 		    "version = %d does not match s/w version %d\n",
814 		    portp->lp_devname, markerp->version, MARKER_VERSION));
815 		goto bail;
816 	}
817 
818 	if (markerp->tlv_marker == MARKER_RESPONSE_TLV) {
819 		/* We do not yet send out MARKER info PDUs */
820 		AGGR_LACP_DBG(("trunk link (%s): MARKER RESPONSE PDU: "
821 		    " MARKER TLV = %d - We don't send out info type!\n",
822 		    portp->lp_devname, markerp->tlv_marker));
823 		goto bail;
824 	}
825 
826 	if (markerp->tlv_marker != MARKER_INFO_TLV) {
827 		AGGR_LACP_DBG(("trunk link (%s): Malformed MARKER PDU: "
828 		    " MARKER TLV = %d \n", portp->lp_devname,
829 		    markerp->tlv_marker));
830 		goto bail;
831 	}
832 
833 	if (markerp->marker_len != MARKER_INFO_RESPONSE_LENGTH) {
834 		AGGR_LACP_DBG(("trunk link (%s): Malformed MARKER PDU: "
835 		    " MARKER length = %d \n", portp->lp_devname,
836 		    markerp->marker_len));
837 		goto bail;
838 	}
839 
840 	if (markerp->requestor_port != portp->lp_lacp.PartnerOperPortNum) {
841 		AGGR_LACP_DBG(("trunk link (%s): MARKER PDU: "
842 		    " MARKER Port %d not equal to Partner port %d\n",
843 		    portp->lp_devname, markerp->requestor_port,
844 		    portp->lp_lacp.PartnerOperPortNum));
845 		goto bail;
846 	}
847 
848 	if (ether_cmp(&markerp->system_id,
849 	    &portp->lp_lacp.PartnerOperSystem) != 0) {
850 		AGGR_LACP_DBG(("trunk link (%s): MARKER PDU: "
851 		    " MARKER MAC not equal to Partner MAC\n",
852 		    portp->lp_devname));
853 		goto bail;
854 	}
855 
856 	/*
857 	 * Turn into Marker Response PDU
858 	 * and return mblk to sending system
859 	 */
860 	markerp->tlv_marker = MARKER_RESPONSE_TLV;
861 
862 	/* reuse the space that was used by received ethernet header */
863 	ASSERT(MBLKHEAD(mp) >= sizeof (struct ether_header));
864 	mp->b_rptr -= sizeof (struct ether_header);
865 	fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr);
866 	AGGR_LACP_UNLOCK(portp->lp_grp);
867 
868 	/*
869 	 * Store the transmit info pointer locally in case it changes between
870 	 * loading mt_fn and mt_arg.
871 	 */
872 	mtp = portp->lp_txinfo;
873 	mtp->mt_fn(mtp->mt_arg, mp);
874 	return;
875 
876 bail:
877 	AGGR_LACP_UNLOCK(portp->lp_grp);
878 	freemsg(mp);
879 }
880 
881 
882 /*
883  * Update the LACP mode (off, active, or passive) of the specified group.
884  */
885 void
886 aggr_lacp_update_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode)
887 {
888 	aggr_lacp_mode_t old_mode = grp->lg_lacp_mode;
889 	aggr_port_t *port;
890 
891 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
892 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
893 
894 	if (mode == old_mode)
895 		return;
896 
897 	grp->lg_lacp_mode = mode;
898 
899 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
900 		port->lp_lacp.ActorAdminPortState.bit.activity =
901 		    port->lp_lacp.ActorOperPortState.bit.activity =
902 		    (mode == AGGR_LACP_ACTIVE);
903 
904 		if (old_mode == AGGR_LACP_OFF) {
905 			/* OFF -> {PASSIVE,ACTIVE} */
906 			/* turn OFF Collector_Distributor */
907 			aggr_set_coll_dist(port, B_FALSE);
908 			rw_enter(&port->lp_lock, RW_WRITER);
909 			lacp_on(port);
910 			if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
911 				aggr_lacp_port_attached(port);
912 			rw_exit(&port->lp_lock);
913 		} else if (mode == AGGR_LACP_OFF) {
914 			/* {PASSIVE,ACTIVE} -> OFF */
915 			rw_enter(&port->lp_lock, RW_WRITER);
916 			lacp_off(port);
917 			rw_exit(&port->lp_lock);
918 			if (!grp->lg_closing) {
919 				/* Turn ON Collector_Distributor */
920 				aggr_set_coll_dist(port, B_TRUE);
921 			}
922 		} else {
923 			/* PASSIVE->ACTIVE or ACTIVE->PASSIVE */
924 			port->lp_lacp.sm.begin = B_TRUE;
925 			lacp_mux_sm(port);
926 			lacp_periodic_sm(port);
927 
928 			/* kick off state machines */
929 			lacp_receive_sm(port, NULL);
930 			lacp_mux_sm(port);
931 		}
932 
933 		if (grp->lg_closing)
934 			break;
935 	}
936 }
937 
938 
939 /*
940  * Update the LACP timer (short or long) of the specified group.
941  */
942 void
943 aggr_lacp_update_timer(aggr_grp_t *grp, aggr_lacp_timer_t timer)
944 {
945 	aggr_port_t *port;
946 
947 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
948 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
949 
950 	if (timer == grp->aggr.PeriodicTimer)
951 		return;
952 
953 	grp->aggr.PeriodicTimer = timer;
954 
955 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
956 		port->lp_lacp.ActorAdminPortState.bit.timeout =
957 		    port->lp_lacp.ActorOperPortState.bit.timeout =
958 		    (timer == AGGR_LACP_TIMER_SHORT);
959 	}
960 }
961 
962 
963 /*
964  * Sets the initial LACP mode (off, active, passive) and LACP timer
965  * (short, long) of the specified group.
966  */
967 void
968 aggr_lacp_set_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode,
969     aggr_lacp_timer_t timer)
970 {
971 	aggr_port_t *port;
972 
973 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
974 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
975 
976 	grp->lg_lacp_mode = mode;
977 	grp->aggr.PeriodicTimer = timer;
978 
979 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
980 		port->lp_lacp.ActorAdminPortState.bit.activity =
981 		    port->lp_lacp.ActorOperPortState.bit.activity =
982 		    (mode == AGGR_LACP_ACTIVE);
983 
984 		port->lp_lacp.ActorAdminPortState.bit.timeout =
985 		    port->lp_lacp.ActorOperPortState.bit.timeout =
986 		    (timer == AGGR_LACP_TIMER_SHORT);
987 
988 		if (grp->lg_lacp_mode == AGGR_LACP_OFF) {
989 			/* Turn ON Collector_Distributor */
990 			aggr_set_coll_dist(port, B_TRUE);
991 		} else { /* LACP_ACTIVE/PASSIVE */
992 			rw_enter(&port->lp_lock, RW_WRITER);
993 			lacp_on(port);
994 			rw_exit(&port->lp_lock);
995 		}
996 	}
997 }
998 
999 /*
1000  * Verify that the Partner MAC and Key recorded by the specified
1001  * port are not found in other ports that are not part of our
1002  * aggregation. Returns B_TRUE if such a port is found, B_FALSE
1003  * otherwise.
1004  */
1005 static boolean_t
1006 lacp_misconfig_check(aggr_port_t *portp)
1007 {
1008 	aggr_grp_t *grp = portp->lp_grp;
1009 	lacp_sel_ports_t *cport;
1010 
1011 	mutex_enter(&lacp_sel_lock);
1012 
1013 	for (cport = sel_ports; cport != NULL; cport = cport->sp_next) {
1014 
1015 		/* skip entries of the group of the port being checked */
1016 		if (cport->sp_key == grp->lg_key)
1017 			continue;
1018 
1019 		if ((ether_cmp(&cport->sp_partner_system,
1020 		    &grp->aggr.PartnerSystem) == 0) &&
1021 		    (cport->sp_partner_key == grp->aggr.PartnerOperAggrKey)) {
1022 			char mac_str[ETHERADDRL*3];
1023 			struct ether_addr *mac = &cport->sp_partner_system;
1024 
1025 			/*
1026 			 * The Partner port information is already in use
1027 			 * by ports in another aggregation so disable this
1028 			 * port.
1029 			 */
1030 
1031 			(void) snprintf(mac_str, sizeof (mac_str),
1032 			    "%x:%x:%x:%x:%x:%x",
1033 			    mac->ether_addr_octet[0], mac->ether_addr_octet[1],
1034 			    mac->ether_addr_octet[2], mac->ether_addr_octet[3],
1035 			    mac->ether_addr_octet[4], mac->ether_addr_octet[5]);
1036 
1037 			portp->lp_lacp.sm.selected = AGGR_UNSELECTED;
1038 			cmn_err(CE_NOTE, "aggr key %d port %s: Port Partner "
1039 			    "MAC %s and key %d in use on aggregation "
1040 			    "key %d port %s\n", grp->lg_key,
1041 			    portp->lp_devname, mac_str,
1042 			    portp->lp_lacp.PartnerOperKey, cport->sp_key,
1043 			    cport->sp_devname);
1044 			break;
1045 		}
1046 	}
1047 
1048 	mutex_exit(&lacp_sel_lock);
1049 	return (cport != NULL);
1050 }
1051 
1052 /*
1053  * Remove the specified port from the list of selected ports.
1054  */
1055 static void
1056 lacp_sel_ports_del(aggr_port_t *portp)
1057 {
1058 	lacp_sel_ports_t *cport, **prev = NULL;
1059 
1060 	mutex_enter(&lacp_sel_lock);
1061 
1062 	prev = &sel_ports;
1063 	for (cport = sel_ports; cport != NULL; prev = &cport->sp_next,
1064 	    cport = cport->sp_next) {
1065 		if (bcmp(portp->lp_devname, cport->sp_devname,
1066 		    MAXNAMELEN + 1) == 0) {
1067 			break;
1068 		}
1069 	}
1070 
1071 	if (cport == NULL) {
1072 		mutex_exit(&lacp_sel_lock);
1073 		return;
1074 	}
1075 
1076 	*prev = cport->sp_next;
1077 	kmem_free(cport, sizeof (*cport));
1078 
1079 	mutex_exit(&lacp_sel_lock);
1080 }
1081 
1082 /*
1083  * Add the specified port to the list of selected ports. Returns B_FALSE
1084  * if the operation could not be performed due to an memory allocation
1085  * error.
1086  */
1087 static boolean_t
1088 lacp_sel_ports_add(aggr_port_t *portp)
1089 {
1090 	lacp_sel_ports_t *new_port;
1091 	lacp_sel_ports_t *cport, **last;
1092 
1093 	mutex_enter(&lacp_sel_lock);
1094 
1095 	/* check if port is already in the list */
1096 	last = &sel_ports;
1097 	for (cport = sel_ports; cport != NULL;
1098 	    last = &cport->sp_next, cport = cport->sp_next) {
1099 		if (bcmp(portp->lp_devname, cport->sp_devname,
1100 		    MAXNAMELEN + 1) == 0) {
1101 			ASSERT(cport->sp_partner_key ==
1102 			    portp->lp_lacp.PartnerOperKey);
1103 			ASSERT(ether_cmp(&cport->sp_partner_system,
1104 			    &portp->lp_lacp.PartnerOperSystem) == 0);
1105 
1106 			mutex_exit(&lacp_sel_lock);
1107 			return (B_TRUE);
1108 		}
1109 	}
1110 
1111 	/* create and initialize new entry */
1112 	new_port = kmem_zalloc(sizeof (lacp_sel_ports_t), KM_NOSLEEP);
1113 	if (new_port == NULL) {
1114 		mutex_exit(&lacp_sel_lock);
1115 		return (B_FALSE);
1116 	}
1117 
1118 	new_port->sp_key = portp->lp_grp->lg_key;
1119 	bcopy(&portp->lp_lacp.PartnerOperSystem,
1120 	    &new_port->sp_partner_system, sizeof (new_port->sp_partner_system));
1121 	new_port->sp_partner_key = portp->lp_lacp.PartnerOperKey;
1122 	bcopy(portp->lp_devname, new_port->sp_devname, MAXNAMELEN + 1);
1123 
1124 	*last = new_port;
1125 
1126 	mutex_exit(&lacp_sel_lock);
1127 	return (B_TRUE);
1128 }
1129 
1130 /*
1131  * lacp_selection_logic - LACP selection logic
1132  *		Sets the selected variable on a per port basis
1133  *		and sets Ready when all waiting ports are ready
1134  *		to go online.
1135  *
1136  * parameters:
1137  *      - portp - instance this applies to.
1138  *
1139  * invoked:
1140  *    - when initialization is needed
1141  *    - when UNSELECTED is set from the lacp_receive_sm() in LACP_CURRENT state
1142  *    - When the lacp_receive_sm goes to the LACP_DEFAULTED state
1143  *    - every time the wait_while_timer pops
1144  *    - everytime we turn LACP on/off
1145  */
1146 static void
1147 lacp_selection_logic(aggr_port_t *portp)
1148 {
1149 	aggr_port_t *tpp;
1150 	aggr_grp_t *aggrp = portp->lp_grp;
1151 	int ports_waiting;
1152 	boolean_t reset_mac = B_FALSE;
1153 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1154 
1155 	ASSERT(AGGR_LACP_LOCK_HELD(aggrp));
1156 
1157 	/* LACP_OFF state not in specification so check here.  */
1158 	if (!pl->sm.lacp_on) {
1159 		lacp_port_unselect(portp);
1160 		aggrp->aggr.ready = B_FALSE;
1161 		lacp_mux_sm(portp);
1162 		return;
1163 	}
1164 
1165 	if (pl->sm.begin || !pl->sm.lacp_enabled ||
1166 	    (portp->lp_state != AGGR_PORT_STATE_ATTACHED)) {
1167 
1168 		AGGR_LACP_DBG(("lacp_selection_logic:(%s): "
1169 		    "selected %d-->%d (begin=%d, lacp_enabled = %d, "
1170 		    "lp_state=%d)\n", portp->lp_devname, pl->sm.selected,
1171 		    AGGR_UNSELECTED, pl->sm.begin, pl->sm.lacp_enabled,
1172 		    portp->lp_state));
1173 
1174 		lacp_port_unselect(portp);
1175 		aggrp->aggr.ready = B_FALSE;
1176 		lacp_mux_sm(portp);
1177 		return;
1178 	}
1179 
1180 	/*
1181 	 * If LACP is not enabled then selected is never set.
1182 	 */
1183 	if (!pl->sm.lacp_enabled) {
1184 		AGGR_LACP_DBG(("lacp_selection_logic:(%s): selected %d-->%d\n",
1185 		    portp->lp_devname, pl->sm.selected, AGGR_UNSELECTED));
1186 
1187 		lacp_port_unselect(portp);
1188 		lacp_mux_sm(portp);
1189 		return;
1190 	}
1191 
1192 	/*
1193 	 * Check if the Partner MAC or Key are zero. If so, we have
1194 	 * not received any LACP info or it has expired and the
1195 	 * receive machine is in the LACP_DEFAULTED state.
1196 	 */
1197 	if (ether_cmp(&pl->PartnerOperSystem, &etherzeroaddr) == 0 ||
1198 	    (pl->PartnerOperKey == 0)) {
1199 
1200 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1201 			if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1202 			    &etherzeroaddr) != 0 &&
1203 			    (tpp->lp_lacp.PartnerOperKey != 0))
1204 				break;
1205 		}
1206 
1207 		/*
1208 		 * If all ports have no key or aggregation address,
1209 		 * then clear the negotiated Partner MAC and key.
1210 		 */
1211 		if (tpp == NULL) {
1212 			/* Clear the aggregation Partner MAC and key */
1213 			aggrp->aggr.PartnerSystem = etherzeroaddr;
1214 			aggrp->aggr.PartnerOperAggrKey = 0;
1215 		}
1216 
1217 		return;
1218 	}
1219 
1220 	/*
1221 	 * Insure that at least one port in the aggregation
1222 	 * matches the Partner aggregation MAC and key. If not,
1223 	 * then clear the aggregation MAC and key. Later we will
1224 	 * set the Partner aggregation MAC and key to that of the
1225 	 * current port's Partner MAC and key.
1226 	 */
1227 	if (ether_cmp(&pl->PartnerOperSystem,
1228 	    &aggrp->aggr.PartnerSystem) != 0 ||
1229 	    (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) {
1230 
1231 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1232 			if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1233 			    &aggrp->aggr.PartnerSystem) == 0 &&
1234 			    (tpp->lp_lacp.PartnerOperKey ==
1235 			    aggrp->aggr.PartnerOperAggrKey))
1236 				break;
1237 		}
1238 
1239 		if (tpp == NULL) {
1240 			/* Clear the aggregation Partner MAC and key */
1241 			aggrp->aggr.PartnerSystem = etherzeroaddr;
1242 			aggrp->aggr.PartnerOperAggrKey = 0;
1243 			reset_mac = B_TRUE;
1244 		}
1245 	}
1246 
1247 	/*
1248 	 * If our Actor MAC is found in the Partner MAC
1249 	 * on this port then we have a loopback misconfiguration.
1250 	 */
1251 	if (ether_cmp(&pl->PartnerOperSystem,
1252 	    (struct ether_addr *)&aggrp->lg_addr) == 0) {
1253 		cmn_err(CE_NOTE, "trunk link: (%s): Loopback condition.\n",
1254 		    portp->lp_devname);
1255 
1256 		lacp_port_unselect(portp);
1257 		lacp_mux_sm(portp);
1258 		return;
1259 	}
1260 
1261 	/*
1262 	 * If our Partner MAC and Key are found on any other
1263 	 * ports that are not in our aggregation, we have
1264 	 * a misconfiguration.
1265 	 */
1266 	if (lacp_misconfig_check(portp)) {
1267 		lacp_mux_sm(portp);
1268 		return;
1269 	}
1270 
1271 	/*
1272 	 * If the Aggregation Partner MAC and Key have not been
1273 	 * set, then this is either the first port or the aggregation
1274 	 * MAC and key have been reset. In either case we must set
1275 	 * the values of the Partner MAC and key.
1276 	 */
1277 	if (ether_cmp(&aggrp->aggr.PartnerSystem, &etherzeroaddr) == 0 &&
1278 	    (aggrp->aggr.PartnerOperAggrKey == 0)) {
1279 		/* Set aggregation Partner MAC and key */
1280 		aggrp->aggr.PartnerSystem = pl->PartnerOperSystem;
1281 		aggrp->aggr.PartnerOperAggrKey = pl->PartnerOperKey;
1282 
1283 		/*
1284 		 * If we reset Partner aggregation MAC, then restart
1285 		 * selection_logic on ports that match new MAC address.
1286 		 */
1287 		if (reset_mac) {
1288 			for (tpp = aggrp->lg_ports; tpp; tpp =
1289 			    tpp->lp_next) {
1290 				if (tpp == portp)
1291 					continue;
1292 				if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1293 				    &aggrp->aggr.PartnerSystem) == 0 &&
1294 				    (tpp->lp_lacp.PartnerOperKey ==
1295 				    aggrp->aggr.PartnerOperAggrKey))
1296 					lacp_selection_logic(tpp);
1297 			}
1298 		}
1299 	} else if (ether_cmp(&pl->PartnerOperSystem,
1300 	    &aggrp->aggr.PartnerSystem) != 0 ||
1301 	    (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) {
1302 		/*
1303 		 * The Partner port information does not match
1304 		 * that of the other ports in the aggregation
1305 		 * so disable this port.
1306 		 */
1307 		lacp_port_unselect(portp);
1308 
1309 		cmn_err(CE_NOTE, "trunk link: (%s): Port Partner MAC or"
1310 		    " key (%d) incompatible with Aggregation Partner "
1311 		    "MAC or key (%d)\n",
1312 		    portp->lp_devname, pl->PartnerOperKey,
1313 		    aggrp->aggr.PartnerOperAggrKey);
1314 
1315 		lacp_mux_sm(portp);
1316 		return;
1317 	}
1318 
1319 	/* If we get to here, automatically set selected */
1320 	if (pl->sm.selected != AGGR_SELECTED) {
1321 		AGGR_LACP_DBG(("lacp_selection_logic:(%s): "
1322 		    "selected %d-->%d\n", portp->lp_devname,
1323 		    pl->sm.selected, AGGR_SELECTED));
1324 		if (!lacp_port_select(portp))
1325 			return;
1326 		lacp_mux_sm(portp);
1327 	}
1328 
1329 	/*
1330 	 * From this point onward we have selected the port
1331 	 * and are simply checking if the Ready flag should
1332 	 * be set.
1333 	 */
1334 
1335 	/*
1336 	 * If at least two ports are waiting to aggregate
1337 	 * and ready_n is set on all ports waiting to aggregate
1338 	 * then set READY for the aggregation.
1339 	 */
1340 
1341 	ports_waiting = 0;
1342 
1343 	if (!aggrp->aggr.ready) {
1344 		/*
1345 		 * If all ports in the aggregation have received compatible
1346 		 * partner information and they match up correctly with the
1347 		 * switch, there is no need to wait for all the
1348 		 * wait_while_timers to pop.
1349 		 */
1350 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1351 			if (((tpp->lp_lacp.sm.mux_state == LACP_WAITING) ||
1352 			    tpp->lp_lacp.sm.begin) &&
1353 			    !pl->PartnerOperPortState.bit.sync) {
1354 				/* Add up ports uninitialized or waiting */
1355 				ports_waiting++;
1356 				if (!tpp->lp_lacp.sm.ready_n)
1357 					return;
1358 			}
1359 		}
1360 	}
1361 
1362 	if (aggrp->aggr.ready) {
1363 		AGGR_LACP_DBG(("lacp_selection_logic:(%s): "
1364 		    "aggr.ready already set\n", portp->lp_devname));
1365 		lacp_mux_sm(portp);
1366 	} else {
1367 		AGGR_LACP_DBG(("lacp_selection_logic:(%s): Ready %d-->%d\n",
1368 		    portp->lp_devname, aggrp->aggr.ready, B_TRUE));
1369 		aggrp->aggr.ready = B_TRUE;
1370 
1371 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next)
1372 			lacp_mux_sm(tpp);
1373 	}
1374 
1375 }
1376 
1377 /*
1378  * wait_while_timer_pop - When the timer pops, we arrive here to
1379  *			set ready_n and trigger the selection logic.
1380  */
1381 static void
1382 wait_while_timer_pop(void *data)
1383 {
1384 	aggr_port_t *portp = data;
1385 
1386 	if (portp->lp_closing)
1387 		return;
1388 
1389 	AGGR_LACP_LOCK(portp->lp_grp);
1390 
1391 	AGGR_LACP_DBG(("trunk link:(%s): wait_while_timer pop \n",
1392 	    portp->lp_devname));
1393 	portp->lp_lacp.wait_while_timer.id = 0;
1394 	portp->lp_lacp.sm.ready_n = B_TRUE;
1395 
1396 	lacp_selection_logic(portp);
1397 	AGGR_LACP_UNLOCK(portp->lp_grp);
1398 }
1399 
1400 static void
1401 start_wait_while_timer(aggr_port_t *portp)
1402 {
1403 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1404 
1405 	if (portp->lp_lacp.wait_while_timer.id == 0) {
1406 		portp->lp_lacp.wait_while_timer.id =
1407 		    timeout(wait_while_timer_pop, portp,
1408 		    drv_usectohz(1000000 *
1409 		    portp->lp_lacp.wait_while_timer.val));
1410 	}
1411 }
1412 
1413 
1414 static void
1415 stop_wait_while_timer(portp)
1416 aggr_port_t *portp;
1417 {
1418 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1419 
1420 	if (portp->lp_lacp.wait_while_timer.id != 0) {
1421 		AGGR_LACP_UNLOCK(portp->lp_grp);
1422 		(void) untimeout(portp->lp_lacp.wait_while_timer.id);
1423 		AGGR_LACP_LOCK(portp->lp_grp);
1424 		portp->lp_lacp.wait_while_timer.id = 0;
1425 	}
1426 }
1427 
1428 /*
1429  * Invoked when a port has been attached to a group.
1430  * Complete the processing that couldn't be finished from lacp_on()
1431  * because the port was not started. We know that the link is full
1432  * duplex and ON, otherwise it wouldn't be attached.
1433  */
1434 void
1435 aggr_lacp_port_attached(aggr_port_t *portp)
1436 {
1437 	aggr_grp_t *grp = portp->lp_grp;
1438 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1439 
1440 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
1441 	ASSERT(portp->lp_state == AGGR_PORT_STATE_ATTACHED);
1442 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
1443 
1444 	AGGR_LACP_DBG(("aggr_lacp_port_attached: port %s\n",
1445 	    portp->lp_devname));
1446 
1447 	portp->lp_lacp.sm.port_enabled = B_TRUE;	/* link on */
1448 
1449 	if (grp->lg_lacp_mode == AGGR_LACP_OFF) {
1450 		pl->ActorAdminPortState.bit.activity =
1451 		    pl->ActorOperPortState.bit.activity = B_FALSE;
1452 
1453 		/* Turn ON Collector_Distributor */
1454 		aggr_set_coll_dist_locked(portp, B_TRUE);
1455 
1456 		return;
1457 	}
1458 
1459 	pl->ActorAdminPortState.bit.activity =
1460 	    pl->ActorOperPortState.bit.activity =
1461 	    (grp->lg_lacp_mode == AGGR_LACP_ACTIVE);
1462 
1463 	pl->ActorAdminPortState.bit.timeout =
1464 	    pl->ActorOperPortState.bit.timeout =
1465 	    (grp->aggr.PeriodicTimer == AGGR_LACP_TIMER_SHORT);
1466 
1467 	pl->sm.lacp_enabled = B_TRUE;
1468 	pl->ActorOperPortState.bit.aggregation = B_TRUE;
1469 	pl->sm.begin = B_TRUE;
1470 
1471 	if (!pl->sm.lacp_on) {
1472 		/* Turn OFF Collector_Distributor */
1473 		aggr_set_coll_dist_locked(portp, B_FALSE);
1474 
1475 		lacp_on(portp);
1476 	} else {
1477 		lacp_receive_sm(portp, NULL);
1478 		lacp_mux_sm(portp);
1479 
1480 		/* Enable Multicast Slow Protocol address */
1481 		aggr_lacp_mcast_on(portp);
1482 
1483 		/* periodic_sm is started up from the receive machine */
1484 		lacp_selection_logic(portp);
1485 	}
1486 }
1487 
1488 /*
1489  * Invoked when a port has been detached from a group. Turn off
1490  * LACP processing if it was enabled.
1491  */
1492 void
1493 aggr_lacp_port_detached(aggr_port_t *portp)
1494 {
1495 	aggr_grp_t *grp = portp->lp_grp;
1496 
1497 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
1498 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
1499 
1500 	AGGR_LACP_DBG(("aggr_lacp_port_detached: port %s\n",
1501 	    portp->lp_devname));
1502 
1503 	portp->lp_lacp.sm.port_enabled = B_FALSE;
1504 
1505 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
1506 		return;
1507 
1508 	/* Disable Slow Protocol PDUs */
1509 	lacp_off(portp);
1510 }
1511 
1512 
1513 /*
1514  * Invoked after the outbound port selection policy has been changed.
1515  */
1516 void
1517 aggr_lacp_policy_changed(aggr_grp_t *grp)
1518 {
1519 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
1520 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1521 
1522 	/* suspend transmission for CollectorMaxDelay time */
1523 	delay(grp->aggr.CollectorMaxDelay * 10);
1524 }
1525 
1526 
1527 /*
1528  * Enable Slow Protocol LACP and Marker PDUs.
1529  */
1530 static void
1531 lacp_on(aggr_port_t *portp)
1532 {
1533 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1534 	ASSERT(RW_WRITE_HELD(&portp->lp_grp->lg_lock));
1535 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
1536 
1537 	/*
1538 	 * Reset the state machines and Partner operational
1539 	 * information. Careful to not reset things like
1540 	 * our link state.
1541 	 */
1542 	lacp_reset_port(portp);
1543 	portp->lp_lacp.sm.lacp_on = B_TRUE;
1544 
1545 	AGGR_LACP_DBG(("lacp_on:(%s): \n", portp->lp_devname));
1546 
1547 	lacp_receive_sm(portp, NULL);
1548 	lacp_mux_sm(portp);
1549 
1550 	if (portp->lp_state != AGGR_PORT_STATE_ATTACHED)
1551 		return;
1552 
1553 	/* Enable Multicast Slow Protocol address */
1554 	aggr_lacp_mcast_on(portp);
1555 
1556 	/* periodic_sm is started up from the receive machine */
1557 	lacp_selection_logic(portp);
1558 } /* lacp_on */
1559 
1560 
1561 /* Disable Slow Protocol LACP and Marker PDUs */
1562 static void
1563 lacp_off(aggr_port_t *portp)
1564 {
1565 	aggr_grp_t *grp = portp->lp_grp;
1566 
1567 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1568 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1569 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
1570 
1571 	portp->lp_lacp.sm.lacp_on = B_FALSE;
1572 
1573 	AGGR_LACP_DBG(("lacp_off:(%s): \n", portp->lp_devname));
1574 
1575 	/*
1576 	 * Disable Slow Protocol Timers. We must temporarely release
1577 	 * the group and port locks in order to avod deadlocks. Make
1578 	 * sure that the port nor the group are closing after re-acquiring
1579 	 * their locks.
1580 	 */
1581 	rw_exit(&portp->lp_lock);
1582 	rw_exit(&grp->lg_lock);
1583 
1584 	stop_periodic_timer(portp);
1585 	stop_current_while_timer(portp);
1586 	stop_wait_while_timer(portp);
1587 
1588 	rw_enter(&grp->lg_lock, RW_WRITER);
1589 	rw_enter(&portp->lp_lock, RW_WRITER);
1590 
1591 	if (!portp->lp_closing && !grp->lg_closing) {
1592 		lacp_mux_sm(portp);
1593 		lacp_periodic_sm(portp);
1594 		lacp_selection_logic(portp);
1595 	}
1596 
1597 	/* Turn OFF Collector_Distributor */
1598 	aggr_set_coll_dist_locked(portp, B_FALSE);
1599 
1600 	/* Disable Multicast Slow Protocol address */
1601 	aggr_lacp_mcast_off(portp);
1602 
1603 	lacp_reset_port(portp);
1604 }
1605 
1606 
1607 static boolean_t
1608 valid_lacp_pdu(aggr_port_t *portp, lacp_t *lacp)
1609 {
1610 	/*
1611 	 * 43.4.12 - "a Receive machine shall not validate
1612 	 * the Version Number, TLV_type, or Reserved fields in received
1613 	 * LACPDUs."
1614 	 * ... "a Receive machine may validate the Actor_Information_Length,
1615 	 * Partner_Information_Length, Collector_Information_Length,
1616 	 * or Terminator_Length fields."
1617 	 */
1618 	if ((lacp->actor_info.information_len != sizeof (link_info_t)) ||
1619 	    (lacp->partner_info.information_len != sizeof (link_info_t)) ||
1620 	    (lacp->collector_len != LACP_COLLECTOR_INFO_LEN) ||
1621 	    (lacp->terminator_len != LACP_TERMINATOR_INFO_LEN)) {
1622 		AGGR_LACP_DBG(("trunk link (%s): Malformed LACPDU: "
1623 		    " Terminator Length = %d \n", portp->lp_devname,
1624 		    lacp->terminator_len));
1625 		return (B_FALSE);
1626 	}
1627 
1628 	return (B_TRUE);
1629 }
1630 
1631 
1632 static void
1633 start_current_while_timer(aggr_port_t *portp, uint_t time)
1634 {
1635 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1636 
1637 	if (portp->lp_lacp.current_while_timer.id == 0) {
1638 		if (time > 0) {
1639 			portp->lp_lacp.current_while_timer.val = time;
1640 		} else if (portp->lp_lacp.ActorOperPortState.bit.timeout) {
1641 			portp->lp_lacp.current_while_timer.val =
1642 			    SHORT_TIMEOUT_TIME;
1643 		} else {
1644 			portp->lp_lacp.current_while_timer.val =
1645 			    LONG_TIMEOUT_TIME;
1646 		}
1647 
1648 		portp->lp_lacp.current_while_timer.id =
1649 		    timeout(current_while_timer_pop, portp,
1650 		    drv_usectohz((clock_t)1000000 *
1651 		    (clock_t)portp->lp_lacp.current_while_timer.val));
1652 	}
1653 }
1654 
1655 
1656 static void
1657 stop_current_while_timer(aggr_port_t *portp)
1658 {
1659 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1660 
1661 	if (portp->lp_lacp.current_while_timer.id != 0) {
1662 		AGGR_LACP_UNLOCK(portp->lp_grp);
1663 		(void) untimeout(portp->lp_lacp.current_while_timer.id);
1664 		AGGR_LACP_LOCK(portp->lp_grp);
1665 		portp->lp_lacp.current_while_timer.id = 0;
1666 	}
1667 }
1668 
1669 
1670 static void
1671 current_while_timer_pop(void *data)
1672 {
1673 	aggr_port_t *portp = (aggr_port_t *)data;
1674 
1675 	if (portp->lp_closing)
1676 		return;
1677 
1678 	AGGR_LACP_LOCK(portp->lp_grp);
1679 
1680 	AGGR_LACP_DBG(("trunk link:(%s): current_while_timer "
1681 	    "pop id=%p\n", portp->lp_devname,
1682 	    portp->lp_lacp.current_while_timer.id));
1683 
1684 	portp->lp_lacp.current_while_timer.id = 0;
1685 	lacp_receive_sm(portp, NULL);
1686 	AGGR_LACP_UNLOCK(portp->lp_grp);
1687 }
1688 
1689 
1690 /*
1691  * record_Default - Simply copies over administrative values
1692  * to the partner operational values, and sets our state to indicate we
1693  * are using defaulted values.
1694  */
1695 static void
1696 record_Default(aggr_port_t *portp)
1697 {
1698 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1699 
1700 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1701 
1702 	pl->PartnerOperPortNum = pl->PartnerAdminPortNum;
1703 	pl->PartnerOperPortPriority = pl->PartnerAdminPortPriority;
1704 	pl->PartnerOperSystem = pl->PartnerAdminSystem;
1705 	pl->PartnerOperSysPriority = pl->PartnerAdminSysPriority;
1706 	pl->PartnerOperKey = pl->PartnerAdminKey;
1707 	pl->PartnerOperPortState.state = pl->PartnerAdminPortState.state;
1708 
1709 	pl->ActorOperPortState.bit.defaulted = B_TRUE;
1710 }
1711 
1712 
1713 /* Returns B_TRUE on sync value changing */
1714 static boolean_t
1715 record_PDU(aggr_port_t *portp, lacp_t *lacp)
1716 {
1717 	aggr_grp_t *aggrp = portp->lp_grp;
1718 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1719 	uint8_t save_sync;
1720 
1721 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1722 
1723 	/*
1724 	 * Partner Information
1725 	 */
1726 	pl->PartnerOperPortNum = ntohs(lacp->actor_info.port);
1727 	pl->PartnerOperPortPriority =
1728 	    ntohs(lacp->actor_info.port_priority);
1729 	pl->PartnerOperSystem = lacp->actor_info.system_id;
1730 	pl->PartnerOperSysPriority =
1731 	    htons(lacp->actor_info.system_priority);
1732 	pl->PartnerOperKey = ntohs(lacp->actor_info.key);
1733 
1734 	/* All state info except for Synchronization */
1735 	save_sync = pl->PartnerOperPortState.bit.sync;
1736 	pl->PartnerOperPortState.state = lacp->actor_info.state.state;
1737 
1738 	/* Defaulted set to FALSE */
1739 	pl->ActorOperPortState.bit.defaulted = B_FALSE;
1740 
1741 	/*
1742 	 * 43.4.9 - (Partner_Port, Partner_Port_Priority, Partner_system,
1743 	 *		Partner_System_Priority, Partner_Key, and
1744 	 *		Partner_State.Aggregation) are compared to the
1745 	 *		corresponding operations paramters values for
1746 	 *		the Actor. If these are equal, or if this is
1747 	 *		an individual link, we are synchronized.
1748 	 */
1749 	if (((ntohs(lacp->partner_info.port) == pl->ActorPortNumber) &&
1750 	    (ntohs(lacp->partner_info.port_priority) ==
1751 	    pl->ActorPortPriority) &&
1752 	    (ether_cmp(&lacp->partner_info.system_id,
1753 	    (struct ether_addr *)&aggrp->lg_addr) == 0) &&
1754 	    (ntohs(lacp->partner_info.system_priority) ==
1755 	    aggrp->aggr.ActorSystemPriority) &&
1756 	    (ntohs(lacp->partner_info.key) == pl->ActorOperPortKey) &&
1757 	    (lacp->partner_info.state.bit.aggregation ==
1758 	    pl->ActorOperPortState.bit.aggregation)) ||
1759 	    (!lacp->actor_info.state.bit.aggregation)) {
1760 
1761 		pl->PartnerOperPortState.bit.sync =
1762 		    lacp->actor_info.state.bit.sync;
1763 	} else {
1764 		pl->PartnerOperPortState.bit.sync = B_FALSE;
1765 	}
1766 
1767 	if (save_sync != pl->PartnerOperPortState.bit.sync) {
1768 		AGGR_LACP_DBG(("record_PDU:(%s): partner sync "
1769 		    "%d -->%d\n", portp->lp_devname, save_sync,
1770 		    pl->PartnerOperPortState.bit.sync));
1771 		return (B_TRUE);
1772 	} else {
1773 		return (B_FALSE);
1774 	}
1775 }
1776 
1777 
1778 /*
1779  * update_selected - If any of the Partner parameters has
1780  *			changed from a previous value, then
1781  *			unselect the link from the aggregator.
1782  */
1783 static boolean_t
1784 update_selected(aggr_port_t *portp, lacp_t *lacp)
1785 {
1786 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1787 
1788 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1789 
1790 	if ((pl->PartnerOperPortNum != ntohs(lacp->actor_info.port)) ||
1791 	    (pl->PartnerOperPortPriority !=
1792 	    ntohs(lacp->actor_info.port_priority)) ||
1793 	    (ether_cmp(&pl->PartnerOperSystem,
1794 	    &lacp->actor_info.system_id) != 0) ||
1795 	    (pl->PartnerOperSysPriority !=
1796 	    ntohs(lacp->actor_info.system_priority)) ||
1797 	    (pl->PartnerOperKey != ntohs(lacp->actor_info.key)) ||
1798 	    (pl->PartnerOperPortState.bit.aggregation !=
1799 	    lacp->actor_info.state.bit.aggregation)) {
1800 		AGGR_LACP_DBG(("update_selected:(%s): "
1801 		    "selected  %d-->%d\n", portp->lp_devname, pl->sm.selected,
1802 		    AGGR_UNSELECTED));
1803 
1804 		lacp_port_unselect(portp);
1805 		return (B_TRUE);
1806 	} else {
1807 		return (B_FALSE);
1808 	}
1809 }
1810 
1811 
1812 /*
1813  * update_default_selected - If any of the operational Partner parameters
1814  *			is different than that of the administrative values
1815  *			then unselect the link from the aggregator.
1816  */
1817 static void
1818 update_default_selected(aggr_port_t *portp)
1819 {
1820 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1821 
1822 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1823 
1824 	if ((pl->PartnerAdminPortNum != pl->PartnerOperPortNum) ||
1825 	    (pl->PartnerOperPortPriority != pl->PartnerAdminPortPriority) ||
1826 	    (ether_cmp(&pl->PartnerOperSystem, &pl->PartnerAdminSystem) != 0) ||
1827 	    (pl->PartnerOperSysPriority != pl->PartnerAdminSysPriority) ||
1828 	    (pl->PartnerOperKey != pl->PartnerAdminKey) ||
1829 	    (pl->PartnerOperPortState.bit.aggregation !=
1830 	    pl->PartnerAdminPortState.bit.aggregation)) {
1831 
1832 		AGGR_LACP_DBG(("update_default_selected:(%s): "
1833 		    "selected  %d-->%d\n", portp->lp_devname,
1834 		    pl->sm.selected, AGGR_UNSELECTED));
1835 
1836 		lacp_port_unselect(portp);
1837 	}
1838 }
1839 
1840 
1841 /*
1842  * update_NTT - If any of the Partner values in the received LACPDU
1843  *			are different than that of the Actor operational
1844  *			values then set NTT to true.
1845  */
1846 static void
1847 update_NTT(aggr_port_t *portp, lacp_t *lacp)
1848 {
1849 	aggr_grp_t *aggrp = portp->lp_grp;
1850 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1851 
1852 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1853 
1854 	if ((pl->ActorPortNumber != ntohs(lacp->partner_info.port)) ||
1855 	    (pl->ActorPortPriority !=
1856 	    ntohs(lacp->partner_info.port_priority)) ||
1857 	    (ether_cmp(&aggrp->lg_addr,
1858 	    &lacp->partner_info.system_id) != 0) ||
1859 	    (aggrp->aggr.ActorSystemPriority !=
1860 	    ntohs(lacp->partner_info.system_priority)) ||
1861 	    (pl->ActorOperPortKey != ntohs(lacp->partner_info.key)) ||
1862 	    (pl->ActorOperPortState.bit.activity !=
1863 	    lacp->partner_info.state.bit.activity) ||
1864 	    (pl->ActorOperPortState.bit.timeout !=
1865 	    lacp->partner_info.state.bit.timeout) ||
1866 	    (pl->ActorOperPortState.bit.sync !=
1867 	    lacp->partner_info.state.bit.sync) ||
1868 	    (pl->ActorOperPortState.bit.aggregation !=
1869 	    lacp->partner_info.state.bit.aggregation)) {
1870 
1871 		AGGR_LACP_DBG(("update_NTT:(%s): NTT  %d-->%d\n",
1872 		    portp->lp_devname, pl->NTT, B_TRUE));
1873 
1874 		pl->NTT = B_TRUE;
1875 	}
1876 }
1877 
1878 /*
1879  * lacp_receive_sm - LACP receive state machine
1880  *
1881  * parameters:
1882  *      - portp - instance this applies to.
1883  *      - lacp - pointer in the case of a received LACPDU.
1884  *                This value is NULL if there is no LACPDU.
1885  *
1886  * invoked:
1887  *    - when initialization is needed
1888  *    - upon reception of an LACPDU. This is the common case.
1889  *    - every time the current_while_timer pops
1890  */
1891 static void
1892 lacp_receive_sm(aggr_port_t *portp, lacp_t *lacp)
1893 {
1894 	boolean_t sync_updated, selected_updated, save_activity;
1895 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1896 	lacp_receive_state_t oldstate = pl->sm.receive_state;
1897 
1898 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1899 
1900 	/* LACP_OFF state not in specification so check here.  */
1901 	if (!pl->sm.lacp_on)
1902 		return;
1903 
1904 	/* figure next state */
1905 	if (pl->sm.begin || pl->sm.port_moved) {
1906 		pl->sm.receive_state = LACP_INITIALIZE;
1907 	} else if (!pl->sm.port_enabled) {	/* DL_NOTE_LINK_DOWN */
1908 		pl->sm.receive_state = LACP_PORT_DISABLED;
1909 	} else if (!pl->sm.lacp_enabled) { /* DL_NOTE_AGGR_UNAVAIL */
1910 		pl->sm.receive_state =
1911 		    (pl->sm.receive_state == LACP_PORT_DISABLED) ?
1912 		    LACP_DISABLED : LACP_PORT_DISABLED;
1913 	} else if (lacp != NULL) {
1914 		if ((pl->sm.receive_state == LACP_EXPIRED) ||
1915 		    (pl->sm.receive_state == LACP_DEFAULTED)) {
1916 			pl->sm.receive_state = LACP_CURRENT;
1917 		}
1918 	} else if ((pl->sm.receive_state == LACP_CURRENT) &&
1919 	    (pl->current_while_timer.id == 0)) {
1920 		pl->sm.receive_state = LACP_EXPIRED;
1921 	} else if ((pl->sm.receive_state == LACP_EXPIRED) &&
1922 	    (pl->current_while_timer.id == 0)) {
1923 		pl->sm.receive_state = LACP_DEFAULTED;
1924 	}
1925 
1926 
1927 	if (!((lacp && (oldstate == LACP_CURRENT) &&
1928 	    (pl->sm.receive_state == LACP_CURRENT)))) {
1929 		AGGR_LACP_DBG(("lacp_receive_sm(%s):%s--->%s\n",
1930 		    portp->lp_devname, lacp_receive_str[oldstate],
1931 		    lacp_receive_str[pl->sm.receive_state]));
1932 	}
1933 
1934 	switch (pl->sm.receive_state) {
1935 	case LACP_INITIALIZE:
1936 		lacp_port_unselect(portp);
1937 		record_Default(portp);
1938 		pl->ActorOperPortState.bit.expired = B_FALSE;
1939 		pl->sm.port_moved = B_FALSE;
1940 		pl->sm.receive_state = LACP_PORT_DISABLED;
1941 		pl->sm.begin = B_FALSE;
1942 		lacp_receive_sm(portp, NULL);
1943 		break;
1944 
1945 	case LACP_PORT_DISABLED:
1946 		pl->PartnerOperPortState.bit.sync = B_FALSE;
1947 		/*
1948 		 * Stop current_while_timer in case
1949 		 * we got here from link down
1950 		 */
1951 		stop_current_while_timer(portp);
1952 
1953 		if (pl->sm.port_enabled && !pl->sm.lacp_enabled) {
1954 			pl->sm.receive_state = LACP_DISABLED;
1955 			lacp_receive_sm(portp, lacp);
1956 			/* We goto LACP_DISABLED state */
1957 			break;
1958 		} else if (pl->sm.port_enabled && pl->sm.lacp_enabled) {
1959 			pl->sm.receive_state = LACP_EXPIRED;
1960 			/*
1961 			 * FALL THROUGH TO LACP_EXPIRED CASE:
1962 			 * We have no way of knowing if we get into
1963 			 * lacp_receive_sm() from a  current_while_timer
1964 			 * expiring as it has never been kicked off yet!
1965 			 */
1966 		} else {
1967 			/* We stay in LACP_PORT_DISABLED state */
1968 			break;
1969 		}
1970 		/* LACP_PORT_DISABLED -> LACP_EXPIRED */
1971 		/* FALLTHROUGH */
1972 
1973 	case LACP_EXPIRED:
1974 		/*
1975 		 * Arrives here from LACP_PORT_DISABLED state as well as
1976 		 * as well as current_while_timer expiring.
1977 		 */
1978 		pl->PartnerOperPortState.bit.sync = B_FALSE;
1979 		pl->PartnerOperPortState.bit.timeout = B_TRUE;
1980 
1981 		pl->ActorOperPortState.bit.expired = B_TRUE;
1982 		start_current_while_timer(portp, SHORT_TIMEOUT_TIME);
1983 		lacp_periodic_sm(portp);
1984 		break;
1985 
1986 	case LACP_DISABLED:
1987 		/*
1988 		 * This is the normal state for recv_sm when LACP_OFF
1989 		 * is set or the NIC is in half duplex mode.
1990 		 */
1991 		lacp_port_unselect(portp);
1992 		record_Default(portp);
1993 		pl->PartnerOperPortState.bit.aggregation = B_FALSE;
1994 		pl->ActorOperPortState.bit.expired = B_FALSE;
1995 		break;
1996 
1997 	case LACP_DEFAULTED:
1998 		/*
1999 		 * Current_while_timer expired a second time.
2000 		 */
2001 		update_default_selected(portp);
2002 		record_Default(portp);	/* overwrite Partner Oper val */
2003 		pl->ActorOperPortState.bit.expired = B_FALSE;
2004 		pl->PartnerOperPortState.bit.sync = B_TRUE;
2005 
2006 		lacp_selection_logic(portp);
2007 		lacp_mux_sm(portp);
2008 		break;
2009 
2010 	case LACP_CURRENT:
2011 		/*
2012 		 * Reception of LACPDU
2013 		 */
2014 
2015 		if (!lacp) /* no LACPDU so current_while_timer popped */
2016 			break;
2017 
2018 		AGGR_LACP_DBG(("lacp_receive_sm: (%s): LACPDU received:\n",
2019 		    portp->lp_devname));
2020 
2021 		/*
2022 		 * Validate Actor_Information_Length,
2023 		 * Partner_Information_Length, Collector_Information_Length,
2024 		 * and Terminator_Length fields.
2025 		 */
2026 		if (!valid_lacp_pdu(portp, lacp)) {
2027 			AGGR_LACP_DBG(("lacp_receive_sm (%s): "
2028 			    "Invalid LACPDU received\n",
2029 			    portp->lp_devname));
2030 			break;
2031 		}
2032 
2033 		save_activity = pl->PartnerOperPortState.bit.activity;
2034 		selected_updated = update_selected(portp, lacp);
2035 		update_NTT(portp, lacp);
2036 		sync_updated = record_PDU(portp, lacp);
2037 
2038 		pl->ActorOperPortState.bit.expired = B_FALSE;
2039 
2040 		if (selected_updated) {
2041 			lacp_selection_logic(portp);
2042 			lacp_mux_sm(portp);
2043 		} else if (sync_updated) {
2044 			lacp_mux_sm(portp);
2045 		}
2046 
2047 		/*
2048 		 * If the periodic timer value bit has been modified
2049 		 * or the partner activity bit has been changed then
2050 		 * we need to respectively:
2051 		 *  - restart the timer with the proper timeout value.
2052 		 *  - possibly enable/disable transmission of LACPDUs.
2053 		 */
2054 		if ((pl->PartnerOperPortState.bit.timeout &&
2055 		    (pl->periodic_timer.val != FAST_PERIODIC_TIME)) ||
2056 		    (!pl->PartnerOperPortState.bit.timeout &&
2057 		    (pl->periodic_timer.val != SLOW_PERIODIC_TIME)) ||
2058 		    (pl->PartnerOperPortState.bit.activity !=
2059 		    save_activity)) {
2060 			lacp_periodic_sm(portp);
2061 		}
2062 
2063 		stop_current_while_timer(portp);
2064 		/* Check if we need to transmit an LACPDU */
2065 		if (pl->NTT)
2066 			lacp_xmit_sm(portp);
2067 		start_current_while_timer(portp, 0);
2068 
2069 		break;
2070 	}
2071 }
2072 
2073 static void
2074 aggr_set_coll_dist(aggr_port_t *portp, boolean_t enable)
2075 {
2076 	rw_enter(&portp->lp_lock, RW_WRITER);
2077 	aggr_set_coll_dist_locked(portp, enable);
2078 	rw_exit(&portp->lp_lock);
2079 }
2080 
2081 static void
2082 aggr_set_coll_dist_locked(aggr_port_t *portp, boolean_t enable)
2083 {
2084 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
2085 
2086 	AGGR_LACP_DBG(("AGGR_SET_COLL_DIST_TYPE: (%s) %s\n",
2087 	    portp->lp_devname, enable ? "ENABLED" : "DISABLED"));
2088 
2089 	if (!enable) {
2090 		/*
2091 		 * Turn OFF Collector_Distributor.
2092 		 */
2093 		portp->lp_collector_enabled = B_FALSE;
2094 		aggr_send_port_disable(portp);
2095 		return;
2096 	}
2097 
2098 	/*
2099 	 * Turn ON Collector_Distributor.
2100 	 */
2101 
2102 	if (!portp->lp_lacp.sm.lacp_on || (portp->lp_lacp.sm.lacp_on &&
2103 	    (portp->lp_lacp.sm.mux_state == LACP_COLLECTING_DISTRIBUTING))) {
2104 		/* Port is compatible and can be aggregated */
2105 		portp->lp_collector_enabled = B_TRUE;
2106 		aggr_send_port_enable(portp);
2107 	}
2108 }
2109 
2110 /*
2111  * Process a received Marker or LACPDU.
2112  */
2113 void
2114 aggr_lacp_rx(aggr_port_t *portp, mblk_t *dmp)
2115 {
2116 	lacp_t	*lacp;
2117 
2118 	dmp->b_rptr += sizeof (struct ether_header);
2119 
2120 	if (MBLKL(dmp) < sizeof (lacp_t)) {
2121 		freemsg(dmp);
2122 		return;
2123 	}
2124 
2125 	lacp = (lacp_t *)dmp->b_rptr;
2126 
2127 	switch (lacp->subtype) {
2128 	case LACP_SUBTYPE:
2129 		AGGR_LACP_DBG(("aggr_lacp_rx:(%s): LACPDU received.\n",
2130 		    portp->lp_devname));
2131 
2132 		AGGR_LACP_LOCK(portp->lp_grp);
2133 		if (!portp->lp_lacp.sm.lacp_on) {
2134 			AGGR_LACP_UNLOCK(portp->lp_grp);
2135 			break;
2136 		}
2137 		lacp_receive_sm(portp, lacp);
2138 		AGGR_LACP_UNLOCK(portp->lp_grp);
2139 		break;
2140 
2141 	case MARKER_SUBTYPE:
2142 		AGGR_LACP_DBG(("aggr_lacp_rx:(%s): Marker Packet received.\n",
2143 		    portp->lp_devname));
2144 
2145 		(void) receive_marker_pdu(portp, dmp);
2146 		break;
2147 
2148 	default:
2149 		AGGR_LACP_DBG(("aggr_lacp_rx: (%s): "
2150 		    "Unknown Slow Protocol type %d\n",
2151 		    portp->lp_devname, lacp->subtype));
2152 		break;
2153 	}
2154 
2155 	freemsg(dmp);
2156 }
2157