xref: /titanic_50/usr/src/uts/common/io/aggr/aggr_lacp.c (revision 7c64d3750da7fda7e450b8f9b0b963905ded6379)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * IEEE 802.3ad Link Aggregation - LACP & Marker Protocol processing.
30  */
31 
32 #include <sys/types.h>
33 #include <sys/sysmacros.h>
34 #include <sys/conf.h>
35 #include <sys/cmn_err.h>
36 #include <sys/list.h>
37 #include <sys/ksynch.h>
38 #include <sys/kmem.h>
39 #include <sys/stream.h>
40 #include <sys/modctl.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/atomic.h>
44 #include <sys/stat.h>
45 #include <sys/byteorder.h>
46 #include <sys/strsun.h>
47 #include <sys/isa_defs.h>
48 
49 #include <sys/aggr.h>
50 #include <sys/aggr_impl.h>
51 
52 static struct ether_addr	etherzeroaddr = {
53 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00
54 };
55 
56 /*
57  * Slow_Protocol_Multicast address, as per IEEE 802.3ad spec.
58  */
59 static struct ether_addr   slow_multicast_addr = {
60 	0x01, 0x80, 0xc2, 0x00, 0x00, 0x02
61 };
62 
63 #ifdef DEBUG
64 /* LACP state machine debugging support */
65 static uint32_t aggr_lacp_debug = 0;
66 #define	AGGR_LACP_DBG(x)	if (aggr_lacp_debug) { (void) printf x; }
67 #else
68 #define	AGGR_LACP_DBG(x)	{}
69 #endif /* DEBUG */
70 
71 #define	NSECS_PER_SEC   1000000000ll
72 
73 /* used by lacp_misconfig_walker() */
74 typedef struct lacp_misconfig_check_state_s {
75 	aggr_port_t *cs_portp;
76 	boolean_t cs_found;
77 } lacp_misconfig_check_state_t;
78 
79 static const char *lacp_receive_str[] = LACP_RECEIVE_STATE_STRINGS;
80 static const char *lacp_periodic_str[] = LACP_PERIODIC_STRINGS;
81 static const char *lacp_mux_str[] = LACP_MUX_STRINGS;
82 
83 static uint16_t lacp_port_priority = 0x1000;
84 static uint16_t lacp_system_priority = 0x1000;
85 
86 /*
87  * Maintains a list of all ports in ATTACHED state. This information
88  * is used to detect misconfiguration.
89  */
90 typedef struct lacp_sel_ports {
91 	datalink_id_t sp_grp_linkid;
92 	datalink_id_t sp_linkid;
93 	/* Note: sp_partner_system must be 2-byte aligned */
94 	struct ether_addr sp_partner_system;
95 	uint32_t sp_partner_key;
96 	struct lacp_sel_ports *sp_next;
97 } lacp_sel_ports_t;
98 
99 static lacp_sel_ports_t *sel_ports = NULL;
100 static kmutex_t lacp_sel_lock;
101 
102 static void periodic_timer_pop_locked(aggr_port_t *);
103 static void periodic_timer_pop(void *);
104 static void lacp_xmit_sm(aggr_port_t *);
105 static void lacp_periodic_sm(aggr_port_t *);
106 static void fill_lacp_pdu(aggr_port_t *, lacp_t *);
107 static void fill_lacp_ether(aggr_port_t *, struct ether_header *);
108 static void lacp_on(aggr_port_t *);
109 static void lacp_off(aggr_port_t *);
110 static boolean_t valid_lacp_pdu(aggr_port_t *, lacp_t *);
111 static void lacp_receive_sm(aggr_port_t *, lacp_t *);
112 static void aggr_set_coll_dist(aggr_port_t *, boolean_t);
113 static void aggr_set_coll_dist_locked(aggr_port_t *, boolean_t);
114 static void start_wait_while_timer(aggr_port_t *);
115 static void stop_wait_while_timer(aggr_port_t *);
116 static void lacp_reset_port(aggr_port_t *);
117 static void stop_current_while_timer(aggr_port_t *);
118 static void current_while_timer_pop(void *);
119 static void update_default_selected(aggr_port_t *);
120 static boolean_t update_selected(aggr_port_t *, lacp_t *);
121 static boolean_t lacp_sel_ports_add(aggr_port_t *);
122 static void lacp_sel_ports_del(aggr_port_t *);
123 
124 void
125 aggr_lacp_init(void)
126 {
127 	mutex_init(&lacp_sel_lock, NULL, MUTEX_DEFAULT, NULL);
128 }
129 
130 void
131 aggr_lacp_fini(void)
132 {
133 	mutex_destroy(&lacp_sel_lock);
134 }
135 
136 /*
137  * Set the port LACP state to SELECTED. Returns B_FALSE if the operation
138  * could not be performed due to a memory allocation error, B_TRUE otherwise.
139  */
140 static boolean_t
141 lacp_port_select(aggr_port_t *portp)
142 {
143 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
144 
145 	if (!lacp_sel_ports_add(portp))
146 		return (B_FALSE);
147 	portp->lp_lacp.sm.selected = AGGR_SELECTED;
148 	return (B_TRUE);
149 }
150 
151 /*
152  * Set the port LACP state to UNSELECTED.
153  */
154 static void
155 lacp_port_unselect(aggr_port_t *portp)
156 {
157 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
158 
159 	lacp_sel_ports_del(portp);
160 	portp->lp_lacp.sm.selected = AGGR_UNSELECTED;
161 }
162 
163 /*
164  * Initialize group specific LACP state and parameters.
165  */
166 void
167 aggr_lacp_init_grp(aggr_grp_t *aggrp)
168 {
169 	aggrp->aggr.PeriodicTimer = AGGR_LACP_TIMER_SHORT;
170 	aggrp->aggr.ActorSystemPriority = (uint16_t)lacp_system_priority;
171 	aggrp->aggr.CollectorMaxDelay = 10;
172 	aggrp->lg_lacp_mode = AGGR_LACP_OFF;
173 	aggrp->aggr.ready = B_FALSE;
174 }
175 
176 /*
177  * Complete LACP info initialization at port creation time.
178  */
179 void
180 aggr_lacp_init_port(aggr_port_t *portp)
181 {
182 	aggr_grp_t *aggrp = portp->lp_grp;
183 	aggr_lacp_port_t *pl = &portp->lp_lacp;
184 
185 	ASSERT(AGGR_LACP_LOCK_HELD(aggrp));
186 	ASSERT(RW_LOCK_HELD(&aggrp->lg_lock));
187 	ASSERT(RW_LOCK_HELD(&portp->lp_lock));
188 
189 	/* actor port # */
190 	pl->ActorPortNumber = portp->lp_portid;
191 	AGGR_LACP_DBG(("aggr_lacp_init_port(%d): "
192 	    "ActorPortNumber = 0x%x\n", portp->lp_linkid,
193 	    pl->ActorPortNumber));
194 
195 	pl->ActorPortPriority = (uint16_t)lacp_port_priority;
196 	pl->ActorPortAggrId = 0;	/* aggregator id - not used */
197 	pl->NTT = B_FALSE;			/* need to transmit */
198 
199 	pl->ActorAdminPortKey = aggrp->lg_key;
200 	pl->ActorOperPortKey = pl->ActorAdminPortKey;
201 	AGGR_LACP_DBG(("aggr_lacp_init_port(%d) "
202 	    "ActorAdminPortKey = 0x%x, ActorAdminPortKey = 0x%x\n",
203 	    portp->lp_linkid, pl->ActorAdminPortKey, pl->ActorOperPortKey));
204 
205 	/* Actor admin. port state */
206 	pl->ActorAdminPortState.bit.activity = B_FALSE;
207 	pl->ActorAdminPortState.bit.timeout = B_TRUE;
208 	pl->ActorAdminPortState.bit.aggregation = B_TRUE;
209 	pl->ActorAdminPortState.bit.sync = B_FALSE;
210 	pl->ActorAdminPortState.bit.collecting = B_FALSE;
211 	pl->ActorAdminPortState.bit.distributing = B_FALSE;
212 	pl->ActorAdminPortState.bit.defaulted = B_FALSE;
213 	pl->ActorAdminPortState.bit.expired = B_FALSE;
214 	pl->ActorOperPortState = pl->ActorAdminPortState;
215 
216 	/*
217 	 * Partner Administrative Information
218 	 * (All initialized to zero except for the following)
219 	 * Fast Timeouts.
220 	 */
221 	pl->PartnerAdminPortState.bit.timeout =
222 	    pl->PartnerOperPortState.bit.timeout = B_TRUE;
223 
224 	pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */
225 
226 	/*
227 	 * State machine information.
228 	 */
229 	pl->sm.lacp_on = B_FALSE;		/* LACP Off default */
230 	pl->sm.begin = B_TRUE;		/* Prevents transmissions */
231 	pl->sm.lacp_enabled = B_FALSE;
232 	pl->sm.port_enabled = B_FALSE;		/* Link Down */
233 	pl->sm.actor_churn = B_FALSE;
234 	pl->sm.partner_churn = B_FALSE;
235 	pl->sm.ready_n = B_FALSE;
236 	pl->sm.port_moved = B_FALSE;
237 
238 	lacp_port_unselect(portp);
239 
240 	pl->sm.periodic_state = LACP_NO_PERIODIC;
241 	pl->sm.receive_state = LACP_INITIALIZE;
242 	pl->sm.mux_state = LACP_DETACHED;
243 	pl->sm.churn_state = LACP_NO_ACTOR_CHURN;
244 
245 	/*
246 	 * Timer information.
247 	 */
248 	pl->current_while_timer.id = 0;
249 	pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
250 
251 	pl->periodic_timer.id = 0;
252 	pl->periodic_timer.val = FAST_PERIODIC_TIME;
253 
254 	pl->wait_while_timer.id = 0;
255 	pl->wait_while_timer.val = AGGREGATE_WAIT_TIME;
256 }
257 
258 /*
259  * Port initialization when we need to
260  * turn LACP on/off, etc. Not everything is
261  * reset like in the above routine.
262  *		Do NOT modify things like link status.
263  */
264 static void
265 lacp_reset_port(aggr_port_t *portp)
266 {
267 	aggr_lacp_port_t *pl = &portp->lp_lacp;
268 
269 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
270 
271 	pl->NTT = B_FALSE;			/* need to transmit */
272 
273 	/* reset operational port state */
274 	pl->ActorOperPortState.bit.timeout =
275 	    pl->ActorAdminPortState.bit.timeout;
276 
277 	pl->ActorOperPortState.bit.sync = B_FALSE;
278 	pl->ActorOperPortState.bit.collecting = B_FALSE;
279 	pl->ActorOperPortState.bit.distributing = B_FALSE;
280 	pl->ActorOperPortState.bit.defaulted = B_TRUE;
281 	pl->ActorOperPortState.bit.expired = B_FALSE;
282 
283 	pl->PartnerOperPortState.bit.timeout = B_TRUE;	/* fast t/o */
284 	pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */
285 
286 	/*
287 	 * State machine information.
288 	 */
289 	pl->sm.begin = B_TRUE;		/* Prevents transmissions */
290 	pl->sm.actor_churn = B_FALSE;
291 	pl->sm.partner_churn = B_FALSE;
292 	pl->sm.ready_n = B_FALSE;
293 
294 	lacp_port_unselect(portp);
295 
296 	pl->sm.periodic_state = LACP_NO_PERIODIC;
297 	pl->sm.receive_state = LACP_INITIALIZE;
298 	pl->sm.mux_state = LACP_DETACHED;
299 	pl->sm.churn_state = LACP_NO_ACTOR_CHURN;
300 
301 	/*
302 	 * Timer information.
303 	 */
304 	pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
305 	pl->periodic_timer.val = FAST_PERIODIC_TIME;
306 }
307 
308 static void
309 aggr_lacp_mcast_on(aggr_port_t *port)
310 {
311 	ASSERT(AGGR_LACP_LOCK_HELD(port->lp_grp));
312 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
313 
314 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
315 		return;
316 
317 	(void) aggr_port_multicst(port, B_TRUE,
318 	    (uchar_t *)&slow_multicast_addr);
319 }
320 
321 static void
322 aggr_lacp_mcast_off(aggr_port_t *port)
323 {
324 	ASSERT(AGGR_LACP_LOCK_HELD(port->lp_grp));
325 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
326 
327 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
328 		return;
329 
330 	(void) aggr_port_multicst(port, B_FALSE,
331 	    (uchar_t *)&slow_multicast_addr);
332 }
333 
334 static void
335 start_periodic_timer(aggr_port_t *portp)
336 {
337 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
338 
339 	if (portp->lp_lacp.periodic_timer.id == 0) {
340 		portp->lp_lacp.periodic_timer.id =
341 		    timeout(periodic_timer_pop, portp,
342 		    drv_usectohz(1000000 * portp->lp_lacp.periodic_timer.val));
343 	}
344 }
345 
346 static void
347 stop_periodic_timer(aggr_port_t *portp)
348 {
349 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
350 
351 	if (portp->lp_lacp.periodic_timer.id != 0) {
352 		AGGR_LACP_UNLOCK(portp->lp_grp);
353 		(void) untimeout(portp->lp_lacp.periodic_timer.id);
354 		AGGR_LACP_LOCK(portp->lp_grp);
355 		portp->lp_lacp.periodic_timer.id = 0;
356 	}
357 }
358 
359 /*
360  * When the timer pops, we arrive here to
361  * clear out LACPDU count as well as transmit an
362  * LACPDU. We then set the periodic state and let
363  * the periodic state machine restart the timer.
364  */
365 
366 static void
367 periodic_timer_pop_locked(aggr_port_t *portp)
368 {
369 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
370 
371 	portp->lp_lacp.periodic_timer.id = NULL;
372 	portp->lp_lacp_stats.LACPDUsTx = 0;
373 
374 	/* current timestamp */
375 	portp->lp_lacp.time = gethrtime();
376 	portp->lp_lacp.NTT = B_TRUE;
377 	lacp_xmit_sm(portp);
378 
379 	/*
380 	 * Set Periodic State machine state based on the
381 	 * value of the Partner Operation Port State timeout
382 	 * bit.
383 	 */
384 	if (portp->lp_lacp.PartnerOperPortState.bit.timeout) {
385 		portp->lp_lacp.periodic_timer.val = FAST_PERIODIC_TIME;
386 		portp->lp_lacp.sm.periodic_state = LACP_FAST_PERIODIC;
387 	} else {
388 		portp->lp_lacp.periodic_timer.val = SLOW_PERIODIC_TIME;
389 		portp->lp_lacp.sm.periodic_state = LACP_SLOW_PERIODIC;
390 	}
391 
392 	lacp_periodic_sm(portp);
393 }
394 
395 static void
396 periodic_timer_pop(void *data)
397 {
398 	aggr_port_t *portp = data;
399 
400 	if (portp->lp_closing)
401 		return;
402 
403 	AGGR_LACP_LOCK(portp->lp_grp);
404 	periodic_timer_pop_locked(portp);
405 	AGGR_LACP_UNLOCK(portp->lp_grp);
406 }
407 
408 /*
409  * Invoked from:
410  *	- startup upon aggregation
411  *	- when the periodic timer pops
412  *	- when the periodic timer value is changed
413  *	- when the port is attached or detached
414  *	- when LACP mode is changed.
415  */
416 static void
417 lacp_periodic_sm(aggr_port_t *portp)
418 {
419 	lacp_periodic_state_t oldstate = portp->lp_lacp.sm.periodic_state;
420 	aggr_lacp_port_t *pl = &portp->lp_lacp;
421 
422 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
423 
424 	/* LACP_OFF state not in specification so check here.  */
425 	if (!pl->sm.lacp_on) {
426 		/* Stop timer whether it is running or not */
427 		stop_periodic_timer(portp);
428 		pl->sm.periodic_state = LACP_NO_PERIODIC;
429 		pl->NTT = B_FALSE;
430 		AGGR_LACP_DBG(("lacp_periodic_sm(%d):NO LACP "
431 		    "%s--->%s\n", portp->lp_linkid,
432 		    lacp_periodic_str[oldstate],
433 		    lacp_periodic_str[pl->sm.periodic_state]));
434 		return;
435 	}
436 
437 	if (pl->sm.begin || !pl->sm.lacp_enabled ||
438 	    !pl->sm.port_enabled ||
439 	    !pl->ActorOperPortState.bit.activity &&
440 	    !pl->PartnerOperPortState.bit.activity) {
441 
442 		/* Stop timer whether it is running or not */
443 		stop_periodic_timer(portp);
444 		pl->sm.periodic_state = LACP_NO_PERIODIC;
445 		pl->NTT = B_FALSE;
446 		AGGR_LACP_DBG(("lacp_periodic_sm(%d):STOP %s--->%s\n",
447 		    portp->lp_linkid, lacp_periodic_str[oldstate],
448 		    lacp_periodic_str[pl->sm.periodic_state]));
449 		return;
450 	}
451 
452 	/*
453 	 * Startup with FAST_PERIODIC_TIME if no previous LACPDU
454 	 * has been received. Then after we timeout, then it is
455 	 * possible to go to SLOW_PERIODIC_TIME.
456 	 */
457 	if (pl->sm.periodic_state == LACP_NO_PERIODIC) {
458 		pl->periodic_timer.val = FAST_PERIODIC_TIME;
459 		pl->sm.periodic_state = LACP_FAST_PERIODIC;
460 	} else if ((pl->sm.periodic_state == LACP_SLOW_PERIODIC) &&
461 	    pl->PartnerOperPortState.bit.timeout) {
462 		/*
463 		 * If we receive a bit indicating we are going to
464 		 * fast periodic from slow periodic, stop the timer
465 		 * and let the periodic_timer_pop routine deal
466 		 * with reseting the periodic state and transmitting
467 		 * a LACPDU.
468 		 */
469 		stop_periodic_timer(portp);
470 		periodic_timer_pop_locked(portp);
471 	}
472 
473 	/* Rearm timer with value provided by partner */
474 	start_periodic_timer(portp);
475 }
476 
477 /*
478  * This routine transmits an LACPDU if lacp_enabled
479  * is TRUE and if NTT is set.
480  */
481 static void
482 lacp_xmit_sm(aggr_port_t *portp)
483 {
484 	aggr_lacp_port_t *pl = &portp->lp_lacp;
485 	size_t	len;
486 	mblk_t  *mp;
487 	hrtime_t now, elapsed;
488 	const mac_txinfo_t *mtp;
489 
490 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
491 
492 	/* LACP_OFF state not in specification so check here.  */
493 	if (!pl->sm.lacp_on || !pl->NTT || !portp->lp_started)
494 		return;
495 
496 	/*
497 	 * Do nothing if LACP has been turned off or if the
498 	 * periodic state machine is not enabled.
499 	 */
500 	if ((pl->sm.periodic_state == LACP_NO_PERIODIC) ||
501 	    !pl->sm.lacp_enabled || pl->sm.begin) {
502 		pl->NTT = B_FALSE;
503 		return;
504 	}
505 
506 	/*
507 	 * If we have sent 5 Slow packets in the last second, avoid
508 	 * sending any more here. No more than three LACPDUs may be transmitted
509 	 * in any Fast_Periodic_Time interval.
510 	 */
511 	if (portp->lp_lacp_stats.LACPDUsTx >= 3) {
512 		/*
513 		 * Grab the current time value and see if
514 		 * more than 1 second has passed. If so,
515 		 * reset the timestamp and clear the count.
516 		 */
517 		now = gethrtime();
518 		elapsed = now - pl->time;
519 		if (elapsed > NSECS_PER_SEC) {
520 			portp->lp_lacp_stats.LACPDUsTx = 0;
521 			pl->time = now;
522 		} else {
523 			return;
524 		}
525 	}
526 
527 	len = sizeof (lacp_t) + sizeof (struct ether_header);
528 	mp = allocb(len, BPRI_MED);
529 	if (mp == NULL)
530 		return;
531 
532 	mp->b_wptr = mp->b_rptr + len;
533 	bzero(mp->b_rptr, len);
534 
535 	fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr);
536 	fill_lacp_pdu(portp,
537 	    (lacp_t *)(mp->b_rptr + sizeof (struct ether_header)));
538 
539 	/*
540 	 * Store the transmit info pointer locally in case it changes between
541 	 * loading mt_fn and mt_arg.
542 	 */
543 	mtp = portp->lp_txinfo;
544 	mtp->mt_fn(mtp->mt_arg, mp);
545 
546 	pl->NTT = B_FALSE;
547 	portp->lp_lacp_stats.LACPDUsTx++;
548 }
549 
550 /*
551  * Initialize the ethernet header of a LACP packet sent from the specified
552  * port.
553  */
554 static void
555 fill_lacp_ether(aggr_port_t *port, struct ether_header *ether)
556 {
557 	bcopy(port->lp_addr, (uint8_t *)&(ether->ether_shost), ETHERADDRL);
558 	bcopy(&slow_multicast_addr, (uint8_t *)&(ether->ether_dhost),
559 	    ETHERADDRL);
560 	ether->ether_type = htons(ETHERTYPE_SLOW);
561 }
562 
563 static void
564 fill_lacp_pdu(aggr_port_t *portp, lacp_t *lacp)
565 {
566 	aggr_lacp_port_t *pl = &portp->lp_lacp;
567 	aggr_grp_t *aggrp = portp->lp_grp;
568 
569 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
570 
571 	lacp->subtype = LACP_SUBTYPE;
572 	lacp->version = LACP_VERSION;
573 
574 	rw_enter(&aggrp->lg_lock, RW_READER);
575 	rw_enter(&portp->lp_lock, RW_READER);
576 
577 	/*
578 	 * Actor Information
579 	 */
580 	lacp->actor_info.tlv_type = ACTOR_TLV;
581 	lacp->actor_info.information_len = sizeof (link_info_t);
582 	lacp->actor_info.system_priority =
583 	    htons(aggrp->aggr.ActorSystemPriority);
584 	bcopy(aggrp->lg_addr, (uchar_t *)&lacp->actor_info.system_id,
585 	    ETHERADDRL);
586 	lacp->actor_info.key = htons(pl->ActorOperPortKey);
587 	lacp->actor_info.port_priority = htons(pl->ActorPortPriority);
588 	lacp->actor_info.port = htons(pl->ActorPortNumber);
589 	lacp->actor_info.state.state = pl->ActorOperPortState.state;
590 
591 	/*
592 	 * Partner Information
593 	 */
594 	lacp->partner_info.tlv_type = PARTNER_TLV;
595 	lacp->partner_info.information_len = sizeof (link_info_t);
596 	lacp->partner_info.system_priority =
597 	    htons(pl->PartnerOperSysPriority);
598 	lacp->partner_info.system_id = pl->PartnerOperSystem;
599 	lacp->partner_info.key = htons(pl->PartnerOperKey);
600 	lacp->partner_info.port_priority =
601 	    htons(pl->PartnerOperPortPriority);
602 	lacp->partner_info.port = htons(pl->PartnerOperPortNum);
603 	lacp->partner_info.state.state = pl->PartnerOperPortState.state;
604 
605 	/* Collector Information */
606 	lacp->tlv_collector = COLLECTOR_TLV;
607 	lacp->collector_len = 0x10;
608 	lacp->collector_max_delay = htons(aggrp->aggr.CollectorMaxDelay);
609 
610 	/* Termination Information */
611 	lacp->tlv_terminator = TERMINATOR_TLV;
612 	lacp->terminator_len = 0x0;
613 
614 	rw_exit(&portp->lp_lock);
615 	rw_exit(&aggrp->lg_lock);
616 }
617 
618 /*
619  * lacp_mux_sm - LACP mux state machine
620  *		This state machine is invoked from:
621  *			- startup upon aggregation
622  *			- from the Selection logic
623  *			- when the wait_while_timer pops
624  *			- when the aggregation MAC address is changed
625  *			- when receiving DL_NOTE_LINK_UP/DOWN
626  *			- when receiving DL_NOTE_AGGR_AVAIL/UNAVAIL
627  *			- when LACP mode is changed.
628  *			- when a DL_NOTE_SPEED is received
629  */
630 static void
631 lacp_mux_sm(aggr_port_t *portp)
632 {
633 	aggr_grp_t *aggrp = portp->lp_grp;
634 	boolean_t NTT_updated = B_FALSE;
635 	aggr_lacp_port_t *pl = &portp->lp_lacp;
636 	lacp_mux_state_t oldstate = pl->sm.mux_state;
637 
638 	ASSERT(AGGR_LACP_LOCK_HELD(aggrp));
639 
640 	/* LACP_OFF state not in specification so check here.  */
641 	if (!pl->sm.lacp_on) {
642 		pl->sm.mux_state = LACP_DETACHED;
643 		pl->ActorOperPortState.bit.sync = B_FALSE;
644 
645 		if (pl->ActorOperPortState.bit.collecting ||
646 		    pl->ActorOperPortState.bit.distributing) {
647 			AGGR_LACP_DBG(("trunk link: (%d): "
648 			    "Collector_Distributor Disabled.\n",
649 			    portp->lp_linkid));
650 		}
651 
652 		pl->ActorOperPortState.bit.collecting =
653 		    pl->ActorOperPortState.bit.distributing = B_FALSE;
654 		return;
655 	}
656 
657 	if (pl->sm.begin || !pl->sm.lacp_enabled)
658 		pl->sm.mux_state = LACP_DETACHED;
659 
660 again:
661 	/* determine next state, or return if state unchanged */
662 	switch (pl->sm.mux_state) {
663 	case LACP_DETACHED:
664 		if (pl->sm.begin) {
665 			break;
666 		}
667 
668 		if ((pl->sm.selected == AGGR_SELECTED) ||
669 		    (pl->sm.selected == AGGR_STANDBY)) {
670 			pl->sm.mux_state = LACP_WAITING;
671 			break;
672 		}
673 		return;
674 
675 	case LACP_WAITING:
676 		if (pl->sm.selected == AGGR_UNSELECTED) {
677 			pl->sm.mux_state = LACP_DETACHED;
678 			break;
679 		}
680 
681 		if ((pl->sm.selected == AGGR_SELECTED) && aggrp->aggr.ready) {
682 			pl->sm.mux_state = LACP_ATTACHED;
683 			break;
684 		}
685 		return;
686 
687 	case LACP_ATTACHED:
688 		if ((pl->sm.selected == AGGR_UNSELECTED) ||
689 		    (pl->sm.selected == AGGR_STANDBY)) {
690 			pl->sm.mux_state = LACP_DETACHED;
691 			break;
692 		}
693 
694 		if ((pl->sm.selected == AGGR_SELECTED) &&
695 		    pl->PartnerOperPortState.bit.sync) {
696 			pl->sm.mux_state = LACP_COLLECTING_DISTRIBUTING;
697 			break;
698 		}
699 		return;
700 
701 	case LACP_COLLECTING_DISTRIBUTING:
702 		if ((pl->sm.selected == AGGR_UNSELECTED) ||
703 		    (pl->sm.selected == AGGR_STANDBY) ||
704 		    !pl->PartnerOperPortState.bit.sync) {
705 			pl->sm.mux_state = LACP_ATTACHED;
706 			break;
707 		}
708 		return;
709 	}
710 
711 	AGGR_LACP_DBG(("lacp_mux_sm(%d):%s--->%s\n",
712 	    portp->lp_linkid, lacp_mux_str[oldstate],
713 	    lacp_mux_str[pl->sm.mux_state]));
714 
715 	/* perform actions on entering a new state */
716 	switch (pl->sm.mux_state) {
717 	case LACP_DETACHED:
718 		if (pl->ActorOperPortState.bit.collecting ||
719 		    pl->ActorOperPortState.bit.distributing) {
720 			AGGR_LACP_DBG(("trunk link: (%d): "
721 			    "Collector_Distributor Disabled.\n",
722 			    portp->lp_linkid));
723 		}
724 
725 		pl->ActorOperPortState.bit.sync =
726 		    pl->ActorOperPortState.bit.collecting = B_FALSE;
727 
728 		/* Turn OFF Collector_Distributor */
729 		aggr_set_coll_dist(portp, B_FALSE);
730 
731 		pl->ActorOperPortState.bit.distributing = B_FALSE;
732 		NTT_updated = B_TRUE;
733 		break;
734 
735 	case LACP_WAITING:
736 		start_wait_while_timer(portp);
737 		break;
738 
739 	case LACP_ATTACHED:
740 		if (pl->ActorOperPortState.bit.collecting ||
741 		    pl->ActorOperPortState.bit.distributing) {
742 			AGGR_LACP_DBG(("trunk link: (%d): "
743 			    "Collector_Distributor Disabled.\n",
744 			    portp->lp_linkid));
745 		}
746 
747 		pl->ActorOperPortState.bit.sync = B_TRUE;
748 		pl->ActorOperPortState.bit.collecting = B_FALSE;
749 
750 		/* Turn OFF Collector_Distributor */
751 		aggr_set_coll_dist(portp, B_FALSE);
752 
753 		pl->ActorOperPortState.bit.distributing = B_FALSE;
754 		NTT_updated = B_TRUE;
755 		if (pl->PartnerOperPortState.bit.sync) {
756 			/*
757 			 * We had already received an updated sync from
758 			 * the partner. Attempt to transition to
759 			 * collecting/distributing now.
760 			 */
761 			goto again;
762 		}
763 		break;
764 
765 	case LACP_COLLECTING_DISTRIBUTING:
766 		if (!pl->ActorOperPortState.bit.collecting &&
767 		    !pl->ActorOperPortState.bit.distributing) {
768 			AGGR_LACP_DBG(("trunk link: (%d): "
769 			    "Collector_Distributor Enabled.\n",
770 			    portp->lp_linkid));
771 		}
772 		pl->ActorOperPortState.bit.distributing = B_TRUE;
773 
774 		/* Turn Collector_Distributor back ON */
775 		aggr_set_coll_dist(portp, B_TRUE);
776 
777 		pl->ActorOperPortState.bit.collecting = B_TRUE;
778 		NTT_updated = B_TRUE;
779 		break;
780 	}
781 
782 	/*
783 	 * If we updated the state of the NTT variable, then
784 	 * initiate a LACPDU transmission.
785 	 */
786 	if (NTT_updated) {
787 		pl->NTT = B_TRUE;
788 		lacp_xmit_sm(portp);
789 	}
790 } /* lacp_mux_sm */
791 
792 
793 static void
794 receive_marker_pdu(aggr_port_t *portp, mblk_t *mp)
795 {
796 	marker_pdu_t		*markerp = (marker_pdu_t *)mp->b_rptr;
797 	const mac_txinfo_t	*mtp;
798 
799 	AGGR_LACP_LOCK(portp->lp_grp);
800 
801 	AGGR_LACP_DBG(("trunk link: (%d): MARKER PDU received:\n",
802 	    portp->lp_linkid));
803 
804 	/* LACP_OFF state not in specification so check here.  */
805 	if (!portp->lp_lacp.sm.lacp_on)
806 		goto bail;
807 
808 	if (MBLKL(mp) < sizeof (marker_pdu_t))
809 		goto bail;
810 
811 	if (markerp->version != MARKER_VERSION) {
812 		AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: "
813 		    "version = %d does not match s/w version %d\n",
814 		    portp->lp_linkid, markerp->version, MARKER_VERSION));
815 		goto bail;
816 	}
817 
818 	if (markerp->tlv_marker == MARKER_RESPONSE_TLV) {
819 		/* We do not yet send out MARKER info PDUs */
820 		AGGR_LACP_DBG(("trunk link (%d): MARKER RESPONSE PDU: "
821 		    " MARKER TLV = %d - We don't send out info type!\n",
822 		    portp->lp_linkid, markerp->tlv_marker));
823 		goto bail;
824 	}
825 
826 	if (markerp->tlv_marker != MARKER_INFO_TLV) {
827 		AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: "
828 		    " MARKER TLV = %d \n", portp->lp_linkid,
829 		    markerp->tlv_marker));
830 		goto bail;
831 	}
832 
833 	if (markerp->marker_len != MARKER_INFO_RESPONSE_LENGTH) {
834 		AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: "
835 		    " MARKER length = %d \n", portp->lp_linkid,
836 		    markerp->marker_len));
837 		goto bail;
838 	}
839 
840 	if (markerp->requestor_port != portp->lp_lacp.PartnerOperPortNum) {
841 		AGGR_LACP_DBG(("trunk link (%d): MARKER PDU: "
842 		    " MARKER Port %d not equal to Partner port %d\n",
843 		    portp->lp_linkid, markerp->requestor_port,
844 		    portp->lp_lacp.PartnerOperPortNum));
845 		goto bail;
846 	}
847 
848 	if (ether_cmp(&markerp->system_id,
849 	    &portp->lp_lacp.PartnerOperSystem) != 0) {
850 		AGGR_LACP_DBG(("trunk link (%d): MARKER PDU: "
851 		    " MARKER MAC not equal to Partner MAC\n",
852 		    portp->lp_linkid));
853 		goto bail;
854 	}
855 
856 	/*
857 	 * Turn into Marker Response PDU
858 	 * and return mblk to sending system
859 	 */
860 	markerp->tlv_marker = MARKER_RESPONSE_TLV;
861 
862 	/* reuse the space that was used by received ethernet header */
863 	ASSERT(MBLKHEAD(mp) >= sizeof (struct ether_header));
864 	mp->b_rptr -= sizeof (struct ether_header);
865 	fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr);
866 	AGGR_LACP_UNLOCK(portp->lp_grp);
867 
868 	/*
869 	 * Store the transmit info pointer locally in case it changes between
870 	 * loading mt_fn and mt_arg.
871 	 */
872 	mtp = portp->lp_txinfo;
873 	mtp->mt_fn(mtp->mt_arg, mp);
874 	return;
875 
876 bail:
877 	AGGR_LACP_UNLOCK(portp->lp_grp);
878 	freemsg(mp);
879 }
880 
881 
882 /*
883  * Update the LACP mode (off, active, or passive) of the specified group.
884  */
885 void
886 aggr_lacp_update_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode)
887 {
888 	aggr_lacp_mode_t old_mode = grp->lg_lacp_mode;
889 	aggr_port_t *port;
890 
891 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
892 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
893 
894 	if (mode == old_mode)
895 		return;
896 
897 	grp->lg_lacp_mode = mode;
898 
899 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
900 		port->lp_lacp.ActorAdminPortState.bit.activity =
901 		    port->lp_lacp.ActorOperPortState.bit.activity =
902 		    (mode == AGGR_LACP_ACTIVE);
903 
904 		if (old_mode == AGGR_LACP_OFF) {
905 			/* OFF -> {PASSIVE,ACTIVE} */
906 			/* turn OFF Collector_Distributor */
907 			aggr_set_coll_dist(port, B_FALSE);
908 			rw_enter(&port->lp_lock, RW_WRITER);
909 			lacp_on(port);
910 			if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
911 				aggr_lacp_port_attached(port);
912 			rw_exit(&port->lp_lock);
913 		} else if (mode == AGGR_LACP_OFF) {
914 			/* {PASSIVE,ACTIVE} -> OFF */
915 			rw_enter(&port->lp_lock, RW_WRITER);
916 			lacp_off(port);
917 			rw_exit(&port->lp_lock);
918 			if (!grp->lg_closing) {
919 				/* Turn ON Collector_Distributor */
920 				aggr_set_coll_dist(port, B_TRUE);
921 			}
922 		} else {
923 			/* PASSIVE->ACTIVE or ACTIVE->PASSIVE */
924 			port->lp_lacp.sm.begin = B_TRUE;
925 			lacp_mux_sm(port);
926 			lacp_periodic_sm(port);
927 
928 			/* kick off state machines */
929 			lacp_receive_sm(port, NULL);
930 			lacp_mux_sm(port);
931 		}
932 
933 		if (grp->lg_closing)
934 			break;
935 	}
936 }
937 
938 
939 /*
940  * Update the LACP timer (short or long) of the specified group.
941  */
942 void
943 aggr_lacp_update_timer(aggr_grp_t *grp, aggr_lacp_timer_t timer)
944 {
945 	aggr_port_t *port;
946 
947 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
948 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
949 
950 	if (timer == grp->aggr.PeriodicTimer)
951 		return;
952 
953 	grp->aggr.PeriodicTimer = timer;
954 
955 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
956 		port->lp_lacp.ActorAdminPortState.bit.timeout =
957 		    port->lp_lacp.ActorOperPortState.bit.timeout =
958 		    (timer == AGGR_LACP_TIMER_SHORT);
959 	}
960 }
961 
962 
963 /*
964  * Sets the initial LACP mode (off, active, passive) and LACP timer
965  * (short, long) of the specified group.
966  */
967 void
968 aggr_lacp_set_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode,
969     aggr_lacp_timer_t timer)
970 {
971 	aggr_port_t *port;
972 
973 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
974 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
975 
976 	grp->lg_lacp_mode = mode;
977 	grp->aggr.PeriodicTimer = timer;
978 
979 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
980 		port->lp_lacp.ActorAdminPortState.bit.activity =
981 		    port->lp_lacp.ActorOperPortState.bit.activity =
982 		    (mode == AGGR_LACP_ACTIVE);
983 
984 		port->lp_lacp.ActorAdminPortState.bit.timeout =
985 		    port->lp_lacp.ActorOperPortState.bit.timeout =
986 		    (timer == AGGR_LACP_TIMER_SHORT);
987 
988 		if (grp->lg_lacp_mode == AGGR_LACP_OFF) {
989 			/* Turn ON Collector_Distributor */
990 			aggr_set_coll_dist(port, B_TRUE);
991 		} else { /* LACP_ACTIVE/PASSIVE */
992 			rw_enter(&port->lp_lock, RW_WRITER);
993 			lacp_on(port);
994 			rw_exit(&port->lp_lock);
995 		}
996 	}
997 }
998 
999 /*
1000  * Verify that the Partner MAC and Key recorded by the specified
1001  * port are not found in other ports that are not part of our
1002  * aggregation. Returns B_TRUE if such a port is found, B_FALSE
1003  * otherwise.
1004  */
1005 static boolean_t
1006 lacp_misconfig_check(aggr_port_t *portp)
1007 {
1008 	aggr_grp_t *grp = portp->lp_grp;
1009 	lacp_sel_ports_t *cport;
1010 
1011 	mutex_enter(&lacp_sel_lock);
1012 
1013 	for (cport = sel_ports; cport != NULL; cport = cport->sp_next) {
1014 
1015 		/* skip entries of the group of the port being checked */
1016 		if (cport->sp_grp_linkid == grp->lg_linkid)
1017 			continue;
1018 
1019 		if ((ether_cmp(&cport->sp_partner_system,
1020 		    &grp->aggr.PartnerSystem) == 0) &&
1021 		    (cport->sp_partner_key == grp->aggr.PartnerOperAggrKey)) {
1022 			char mac_str[ETHERADDRL*3];
1023 			struct ether_addr *mac = &cport->sp_partner_system;
1024 
1025 			/*
1026 			 * The Partner port information is already in use
1027 			 * by ports in another aggregation so disable this
1028 			 * port.
1029 			 */
1030 
1031 			(void) snprintf(mac_str, sizeof (mac_str),
1032 			    "%x:%x:%x:%x:%x:%x",
1033 			    mac->ether_addr_octet[0], mac->ether_addr_octet[1],
1034 			    mac->ether_addr_octet[2], mac->ether_addr_octet[3],
1035 			    mac->ether_addr_octet[4], mac->ether_addr_octet[5]);
1036 
1037 			portp->lp_lacp.sm.selected = AGGR_UNSELECTED;
1038 
1039 			cmn_err(CE_NOTE, "aggr %d port %d: Port Partner "
1040 			    "MAC %s and key %d in use on aggregation %d "
1041 			    "port %d\n", grp->lg_linkid, portp->lp_linkid,
1042 			    mac_str, portp->lp_lacp.PartnerOperKey,
1043 			    cport->sp_grp_linkid, cport->sp_linkid);
1044 			break;
1045 		}
1046 	}
1047 
1048 	mutex_exit(&lacp_sel_lock);
1049 	return (cport != NULL);
1050 }
1051 
1052 /*
1053  * Remove the specified port from the list of selected ports.
1054  */
1055 static void
1056 lacp_sel_ports_del(aggr_port_t *portp)
1057 {
1058 	lacp_sel_ports_t *cport, **prev = NULL;
1059 
1060 	mutex_enter(&lacp_sel_lock);
1061 
1062 	prev = &sel_ports;
1063 	for (cport = sel_ports; cport != NULL; prev = &cport->sp_next,
1064 	    cport = cport->sp_next) {
1065 		if (portp->lp_linkid == cport->sp_linkid)
1066 			break;
1067 	}
1068 
1069 	if (cport == NULL) {
1070 		mutex_exit(&lacp_sel_lock);
1071 		return;
1072 	}
1073 
1074 	*prev = cport->sp_next;
1075 	kmem_free(cport, sizeof (*cport));
1076 
1077 	mutex_exit(&lacp_sel_lock);
1078 }
1079 
1080 /*
1081  * Add the specified port to the list of selected ports. Returns B_FALSE
1082  * if the operation could not be performed due to an memory allocation
1083  * error.
1084  */
1085 static boolean_t
1086 lacp_sel_ports_add(aggr_port_t *portp)
1087 {
1088 	lacp_sel_ports_t *new_port;
1089 	lacp_sel_ports_t *cport, **last;
1090 
1091 	mutex_enter(&lacp_sel_lock);
1092 
1093 	/* check if port is already in the list */
1094 	last = &sel_ports;
1095 	for (cport = sel_ports; cport != NULL;
1096 	    last = &cport->sp_next, cport = cport->sp_next) {
1097 		if (portp->lp_linkid == cport->sp_linkid) {
1098 			ASSERT(cport->sp_partner_key ==
1099 			    portp->lp_lacp.PartnerOperKey);
1100 			ASSERT(ether_cmp(&cport->sp_partner_system,
1101 			    &portp->lp_lacp.PartnerOperSystem) == 0);
1102 
1103 			mutex_exit(&lacp_sel_lock);
1104 			return (B_TRUE);
1105 		}
1106 	}
1107 
1108 	/* create and initialize new entry */
1109 	new_port = kmem_zalloc(sizeof (lacp_sel_ports_t), KM_NOSLEEP);
1110 	if (new_port == NULL) {
1111 		mutex_exit(&lacp_sel_lock);
1112 		return (B_FALSE);
1113 	}
1114 
1115 	new_port->sp_grp_linkid = portp->lp_grp->lg_linkid;
1116 	bcopy(&portp->lp_lacp.PartnerOperSystem,
1117 	    &new_port->sp_partner_system, sizeof (new_port->sp_partner_system));
1118 	new_port->sp_partner_key = portp->lp_lacp.PartnerOperKey;
1119 	new_port->sp_linkid = portp->lp_linkid;
1120 
1121 	*last = new_port;
1122 
1123 	mutex_exit(&lacp_sel_lock);
1124 	return (B_TRUE);
1125 }
1126 
1127 /*
1128  * lacp_selection_logic - LACP selection logic
1129  *		Sets the selected variable on a per port basis
1130  *		and sets Ready when all waiting ports are ready
1131  *		to go online.
1132  *
1133  * parameters:
1134  *      - portp - instance this applies to.
1135  *
1136  * invoked:
1137  *    - when initialization is needed
1138  *    - when UNSELECTED is set from the lacp_receive_sm() in LACP_CURRENT state
1139  *    - When the lacp_receive_sm goes to the LACP_DEFAULTED state
1140  *    - every time the wait_while_timer pops
1141  *    - everytime we turn LACP on/off
1142  */
1143 static void
1144 lacp_selection_logic(aggr_port_t *portp)
1145 {
1146 	aggr_port_t *tpp;
1147 	aggr_grp_t *aggrp = portp->lp_grp;
1148 	int ports_waiting;
1149 	boolean_t reset_mac = B_FALSE;
1150 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1151 
1152 	ASSERT(AGGR_LACP_LOCK_HELD(aggrp));
1153 
1154 	/* LACP_OFF state not in specification so check here.  */
1155 	if (!pl->sm.lacp_on) {
1156 		lacp_port_unselect(portp);
1157 		aggrp->aggr.ready = B_FALSE;
1158 		lacp_mux_sm(portp);
1159 		return;
1160 	}
1161 
1162 	if (pl->sm.begin || !pl->sm.lacp_enabled ||
1163 	    (portp->lp_state != AGGR_PORT_STATE_ATTACHED)) {
1164 
1165 		AGGR_LACP_DBG(("lacp_selection_logic:(%d): "
1166 		    "selected %d-->%d (begin=%d, lacp_enabled = %d, "
1167 		    "lp_state=%d)\n", portp->lp_linkid, pl->sm.selected,
1168 		    AGGR_UNSELECTED, pl->sm.begin, pl->sm.lacp_enabled,
1169 		    portp->lp_state));
1170 
1171 		lacp_port_unselect(portp);
1172 		aggrp->aggr.ready = B_FALSE;
1173 		lacp_mux_sm(portp);
1174 		return;
1175 	}
1176 
1177 	/*
1178 	 * If LACP is not enabled then selected is never set.
1179 	 */
1180 	if (!pl->sm.lacp_enabled) {
1181 		AGGR_LACP_DBG(("lacp_selection_logic:(%d): selected %d-->%d\n",
1182 		    portp->lp_linkid, pl->sm.selected, AGGR_UNSELECTED));
1183 
1184 		lacp_port_unselect(portp);
1185 		lacp_mux_sm(portp);
1186 		return;
1187 	}
1188 
1189 	/*
1190 	 * Check if the Partner MAC or Key are zero. If so, we have
1191 	 * not received any LACP info or it has expired and the
1192 	 * receive machine is in the LACP_DEFAULTED state.
1193 	 */
1194 	if (ether_cmp(&pl->PartnerOperSystem, &etherzeroaddr) == 0 ||
1195 	    (pl->PartnerOperKey == 0)) {
1196 
1197 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1198 			if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1199 			    &etherzeroaddr) != 0 &&
1200 			    (tpp->lp_lacp.PartnerOperKey != 0))
1201 				break;
1202 		}
1203 
1204 		/*
1205 		 * If all ports have no key or aggregation address,
1206 		 * then clear the negotiated Partner MAC and key.
1207 		 */
1208 		if (tpp == NULL) {
1209 			/* Clear the aggregation Partner MAC and key */
1210 			aggrp->aggr.PartnerSystem = etherzeroaddr;
1211 			aggrp->aggr.PartnerOperAggrKey = 0;
1212 		}
1213 
1214 		return;
1215 	}
1216 
1217 	/*
1218 	 * Insure that at least one port in the aggregation
1219 	 * matches the Partner aggregation MAC and key. If not,
1220 	 * then clear the aggregation MAC and key. Later we will
1221 	 * set the Partner aggregation MAC and key to that of the
1222 	 * current port's Partner MAC and key.
1223 	 */
1224 	if (ether_cmp(&pl->PartnerOperSystem,
1225 	    &aggrp->aggr.PartnerSystem) != 0 ||
1226 	    (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) {
1227 
1228 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1229 			if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1230 			    &aggrp->aggr.PartnerSystem) == 0 &&
1231 			    (tpp->lp_lacp.PartnerOperKey ==
1232 			    aggrp->aggr.PartnerOperAggrKey))
1233 				break;
1234 		}
1235 
1236 		if (tpp == NULL) {
1237 			/* Clear the aggregation Partner MAC and key */
1238 			aggrp->aggr.PartnerSystem = etherzeroaddr;
1239 			aggrp->aggr.PartnerOperAggrKey = 0;
1240 			reset_mac = B_TRUE;
1241 		}
1242 	}
1243 
1244 	/*
1245 	 * If our Actor MAC is found in the Partner MAC
1246 	 * on this port then we have a loopback misconfiguration.
1247 	 */
1248 	if (ether_cmp(&pl->PartnerOperSystem,
1249 	    (struct ether_addr *)&aggrp->lg_addr) == 0) {
1250 		cmn_err(CE_NOTE, "trunk link: (%d): Loopback condition.\n",
1251 		    portp->lp_linkid);
1252 
1253 		lacp_port_unselect(portp);
1254 		lacp_mux_sm(portp);
1255 		return;
1256 	}
1257 
1258 	/*
1259 	 * If our Partner MAC and Key are found on any other
1260 	 * ports that are not in our aggregation, we have
1261 	 * a misconfiguration.
1262 	 */
1263 	if (lacp_misconfig_check(portp)) {
1264 		lacp_mux_sm(portp);
1265 		return;
1266 	}
1267 
1268 	/*
1269 	 * If the Aggregation Partner MAC and Key have not been
1270 	 * set, then this is either the first port or the aggregation
1271 	 * MAC and key have been reset. In either case we must set
1272 	 * the values of the Partner MAC and key.
1273 	 */
1274 	if (ether_cmp(&aggrp->aggr.PartnerSystem, &etherzeroaddr) == 0 &&
1275 	    (aggrp->aggr.PartnerOperAggrKey == 0)) {
1276 		/* Set aggregation Partner MAC and key */
1277 		aggrp->aggr.PartnerSystem = pl->PartnerOperSystem;
1278 		aggrp->aggr.PartnerOperAggrKey = pl->PartnerOperKey;
1279 
1280 		/*
1281 		 * If we reset Partner aggregation MAC, then restart
1282 		 * selection_logic on ports that match new MAC address.
1283 		 */
1284 		if (reset_mac) {
1285 			for (tpp = aggrp->lg_ports; tpp; tpp =
1286 			    tpp->lp_next) {
1287 				if (tpp == portp)
1288 					continue;
1289 				if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1290 				    &aggrp->aggr.PartnerSystem) == 0 &&
1291 				    (tpp->lp_lacp.PartnerOperKey ==
1292 				    aggrp->aggr.PartnerOperAggrKey))
1293 					lacp_selection_logic(tpp);
1294 			}
1295 		}
1296 	} else if (ether_cmp(&pl->PartnerOperSystem,
1297 	    &aggrp->aggr.PartnerSystem) != 0 ||
1298 	    (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) {
1299 		/*
1300 		 * The Partner port information does not match
1301 		 * that of the other ports in the aggregation
1302 		 * so disable this port.
1303 		 */
1304 		lacp_port_unselect(portp);
1305 
1306 		cmn_err(CE_NOTE, "trunk link: (%d): Port Partner MAC "
1307 		    "or key (%d) incompatible with Aggregation Partner "
1308 		    "MAC or key (%d)\n", portp->lp_linkid, pl->PartnerOperKey,
1309 		    aggrp->aggr.PartnerOperAggrKey);
1310 
1311 		lacp_mux_sm(portp);
1312 		return;
1313 	}
1314 
1315 	/* If we get to here, automatically set selected */
1316 	if (pl->sm.selected != AGGR_SELECTED) {
1317 		AGGR_LACP_DBG(("lacp_selection_logic:(%d): "
1318 		    "selected %d-->%d\n", portp->lp_linkid,
1319 		    pl->sm.selected, AGGR_SELECTED));
1320 		if (!lacp_port_select(portp))
1321 			return;
1322 		lacp_mux_sm(portp);
1323 	}
1324 
1325 	/*
1326 	 * From this point onward we have selected the port
1327 	 * and are simply checking if the Ready flag should
1328 	 * be set.
1329 	 */
1330 
1331 	/*
1332 	 * If at least two ports are waiting to aggregate
1333 	 * and ready_n is set on all ports waiting to aggregate
1334 	 * then set READY for the aggregation.
1335 	 */
1336 
1337 	ports_waiting = 0;
1338 
1339 	if (!aggrp->aggr.ready) {
1340 		/*
1341 		 * If all ports in the aggregation have received compatible
1342 		 * partner information and they match up correctly with the
1343 		 * switch, there is no need to wait for all the
1344 		 * wait_while_timers to pop.
1345 		 */
1346 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1347 			if (((tpp->lp_lacp.sm.mux_state == LACP_WAITING) ||
1348 			    tpp->lp_lacp.sm.begin) &&
1349 			    !pl->PartnerOperPortState.bit.sync) {
1350 				/* Add up ports uninitialized or waiting */
1351 				ports_waiting++;
1352 				if (!tpp->lp_lacp.sm.ready_n)
1353 					return;
1354 			}
1355 		}
1356 	}
1357 
1358 	if (aggrp->aggr.ready) {
1359 		AGGR_LACP_DBG(("lacp_selection_logic:(%d): "
1360 		    "aggr.ready already set\n", portp->lp_linkid));
1361 		lacp_mux_sm(portp);
1362 	} else {
1363 		AGGR_LACP_DBG(("lacp_selection_logic:(%d): Ready %d-->%d\n",
1364 		    portp->lp_linkid, aggrp->aggr.ready, B_TRUE));
1365 		aggrp->aggr.ready = B_TRUE;
1366 
1367 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next)
1368 			lacp_mux_sm(tpp);
1369 	}
1370 
1371 }
1372 
1373 /*
1374  * wait_while_timer_pop - When the timer pops, we arrive here to
1375  *			set ready_n and trigger the selection logic.
1376  */
1377 static void
1378 wait_while_timer_pop(void *data)
1379 {
1380 	aggr_port_t *portp = data;
1381 
1382 	if (portp->lp_closing)
1383 		return;
1384 
1385 	AGGR_LACP_LOCK(portp->lp_grp);
1386 
1387 	AGGR_LACP_DBG(("trunk link:(%d): wait_while_timer pop \n",
1388 	    portp->lp_linkid));
1389 	portp->lp_lacp.wait_while_timer.id = 0;
1390 	portp->lp_lacp.sm.ready_n = B_TRUE;
1391 
1392 	lacp_selection_logic(portp);
1393 	AGGR_LACP_UNLOCK(portp->lp_grp);
1394 }
1395 
1396 static void
1397 start_wait_while_timer(aggr_port_t *portp)
1398 {
1399 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1400 
1401 	if (portp->lp_lacp.wait_while_timer.id == 0) {
1402 		portp->lp_lacp.wait_while_timer.id =
1403 		    timeout(wait_while_timer_pop, portp,
1404 		    drv_usectohz(1000000 *
1405 		    portp->lp_lacp.wait_while_timer.val));
1406 	}
1407 }
1408 
1409 
1410 static void
1411 stop_wait_while_timer(portp)
1412 aggr_port_t *portp;
1413 {
1414 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1415 
1416 	if (portp->lp_lacp.wait_while_timer.id != 0) {
1417 		AGGR_LACP_UNLOCK(portp->lp_grp);
1418 		(void) untimeout(portp->lp_lacp.wait_while_timer.id);
1419 		AGGR_LACP_LOCK(portp->lp_grp);
1420 		portp->lp_lacp.wait_while_timer.id = 0;
1421 	}
1422 }
1423 
1424 /*
1425  * Invoked when a port has been attached to a group.
1426  * Complete the processing that couldn't be finished from lacp_on()
1427  * because the port was not started. We know that the link is full
1428  * duplex and ON, otherwise it wouldn't be attached.
1429  */
1430 void
1431 aggr_lacp_port_attached(aggr_port_t *portp)
1432 {
1433 	aggr_grp_t *grp = portp->lp_grp;
1434 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1435 
1436 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
1437 	ASSERT(portp->lp_state == AGGR_PORT_STATE_ATTACHED);
1438 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
1439 
1440 	AGGR_LACP_DBG(("aggr_lacp_port_attached: port %d\n",
1441 	    portp->lp_linkid));
1442 
1443 	portp->lp_lacp.sm.port_enabled = B_TRUE;	/* link on */
1444 
1445 	if (grp->lg_lacp_mode == AGGR_LACP_OFF) {
1446 		pl->ActorAdminPortState.bit.activity =
1447 		    pl->ActorOperPortState.bit.activity = B_FALSE;
1448 
1449 		/* Turn ON Collector_Distributor */
1450 		aggr_set_coll_dist_locked(portp, B_TRUE);
1451 
1452 		return;
1453 	}
1454 
1455 	pl->ActorAdminPortState.bit.activity =
1456 	    pl->ActorOperPortState.bit.activity =
1457 	    (grp->lg_lacp_mode == AGGR_LACP_ACTIVE);
1458 
1459 	pl->ActorAdminPortState.bit.timeout =
1460 	    pl->ActorOperPortState.bit.timeout =
1461 	    (grp->aggr.PeriodicTimer == AGGR_LACP_TIMER_SHORT);
1462 
1463 	pl->sm.lacp_enabled = B_TRUE;
1464 	pl->ActorOperPortState.bit.aggregation = B_TRUE;
1465 	pl->sm.begin = B_TRUE;
1466 
1467 	if (!pl->sm.lacp_on) {
1468 		/* Turn OFF Collector_Distributor */
1469 		aggr_set_coll_dist_locked(portp, B_FALSE);
1470 
1471 		lacp_on(portp);
1472 	} else {
1473 		lacp_receive_sm(portp, NULL);
1474 		lacp_mux_sm(portp);
1475 
1476 		/* Enable Multicast Slow Protocol address */
1477 		aggr_lacp_mcast_on(portp);
1478 
1479 		/* periodic_sm is started up from the receive machine */
1480 		lacp_selection_logic(portp);
1481 	}
1482 }
1483 
1484 /*
1485  * Invoked when a port has been detached from a group. Turn off
1486  * LACP processing if it was enabled.
1487  */
1488 void
1489 aggr_lacp_port_detached(aggr_port_t *portp)
1490 {
1491 	aggr_grp_t *grp = portp->lp_grp;
1492 
1493 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
1494 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
1495 
1496 	AGGR_LACP_DBG(("aggr_lacp_port_detached: port %d\n",
1497 	    portp->lp_linkid));
1498 
1499 	portp->lp_lacp.sm.port_enabled = B_FALSE;
1500 
1501 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
1502 		return;
1503 
1504 	/* Disable Slow Protocol PDUs */
1505 	lacp_off(portp);
1506 }
1507 
1508 
1509 /*
1510  * Invoked after the outbound port selection policy has been changed.
1511  */
1512 void
1513 aggr_lacp_policy_changed(aggr_grp_t *grp)
1514 {
1515 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
1516 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1517 
1518 	/* suspend transmission for CollectorMaxDelay time */
1519 	delay(grp->aggr.CollectorMaxDelay * 10);
1520 }
1521 
1522 
1523 /*
1524  * Enable Slow Protocol LACP and Marker PDUs.
1525  */
1526 static void
1527 lacp_on(aggr_port_t *portp)
1528 {
1529 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1530 	ASSERT(RW_WRITE_HELD(&portp->lp_grp->lg_lock));
1531 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
1532 
1533 	/*
1534 	 * Reset the state machines and Partner operational
1535 	 * information. Careful to not reset things like
1536 	 * our link state.
1537 	 */
1538 	lacp_reset_port(portp);
1539 	portp->lp_lacp.sm.lacp_on = B_TRUE;
1540 
1541 	AGGR_LACP_DBG(("lacp_on:(%d): \n", portp->lp_linkid));
1542 
1543 	lacp_receive_sm(portp, NULL);
1544 	lacp_mux_sm(portp);
1545 
1546 	if (portp->lp_state != AGGR_PORT_STATE_ATTACHED)
1547 		return;
1548 
1549 	/* Enable Multicast Slow Protocol address */
1550 	aggr_lacp_mcast_on(portp);
1551 
1552 	/* periodic_sm is started up from the receive machine */
1553 	lacp_selection_logic(portp);
1554 } /* lacp_on */
1555 
1556 
1557 /* Disable Slow Protocol LACP and Marker PDUs */
1558 static void
1559 lacp_off(aggr_port_t *portp)
1560 {
1561 	aggr_grp_t *grp = portp->lp_grp;
1562 
1563 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1564 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1565 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
1566 
1567 	portp->lp_lacp.sm.lacp_on = B_FALSE;
1568 
1569 	AGGR_LACP_DBG(("lacp_off:(%d): \n", portp->lp_linkid));
1570 
1571 	/*
1572 	 * Disable Slow Protocol Timers.  We must temporarily release
1573 	 * the group and port locks to avoid deadlocks. Make sure that
1574 	 * neither the port nor group are closing after re-acquiring
1575 	 * their locks.
1576 	 */
1577 	rw_exit(&portp->lp_lock);
1578 	rw_exit(&grp->lg_lock);
1579 
1580 	stop_periodic_timer(portp);
1581 	stop_current_while_timer(portp);
1582 	stop_wait_while_timer(portp);
1583 
1584 	rw_enter(&grp->lg_lock, RW_WRITER);
1585 	rw_enter(&portp->lp_lock, RW_WRITER);
1586 
1587 	if (!portp->lp_closing && !grp->lg_closing) {
1588 		lacp_mux_sm(portp);
1589 		lacp_periodic_sm(portp);
1590 		lacp_selection_logic(portp);
1591 	}
1592 
1593 	/* Turn OFF Collector_Distributor */
1594 	aggr_set_coll_dist_locked(portp, B_FALSE);
1595 
1596 	/* Disable Multicast Slow Protocol address */
1597 	aggr_lacp_mcast_off(portp);
1598 
1599 	lacp_reset_port(portp);
1600 }
1601 
1602 
1603 static boolean_t
1604 valid_lacp_pdu(aggr_port_t *portp, lacp_t *lacp)
1605 {
1606 	/*
1607 	 * 43.4.12 - "a Receive machine shall not validate
1608 	 * the Version Number, TLV_type, or Reserved fields in received
1609 	 * LACPDUs."
1610 	 * ... "a Receive machine may validate the Actor_Information_Length,
1611 	 * Partner_Information_Length, Collector_Information_Length,
1612 	 * or Terminator_Length fields."
1613 	 */
1614 	if ((lacp->actor_info.information_len != sizeof (link_info_t)) ||
1615 	    (lacp->partner_info.information_len != sizeof (link_info_t)) ||
1616 	    (lacp->collector_len != LACP_COLLECTOR_INFO_LEN) ||
1617 	    (lacp->terminator_len != LACP_TERMINATOR_INFO_LEN)) {
1618 		AGGR_LACP_DBG(("trunk link (%d): Malformed LACPDU: "
1619 		    " Terminator Length = %d \n", portp->lp_linkid,
1620 		    lacp->terminator_len));
1621 		return (B_FALSE);
1622 	}
1623 
1624 	return (B_TRUE);
1625 }
1626 
1627 
1628 static void
1629 start_current_while_timer(aggr_port_t *portp, uint_t time)
1630 {
1631 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1632 
1633 	if (portp->lp_lacp.current_while_timer.id == 0) {
1634 		if (time > 0) {
1635 			portp->lp_lacp.current_while_timer.val = time;
1636 		} else if (portp->lp_lacp.ActorOperPortState.bit.timeout) {
1637 			portp->lp_lacp.current_while_timer.val =
1638 			    SHORT_TIMEOUT_TIME;
1639 		} else {
1640 			portp->lp_lacp.current_while_timer.val =
1641 			    LONG_TIMEOUT_TIME;
1642 		}
1643 
1644 		portp->lp_lacp.current_while_timer.id =
1645 		    timeout(current_while_timer_pop, portp,
1646 		    drv_usectohz((clock_t)1000000 *
1647 		    (clock_t)portp->lp_lacp.current_while_timer.val));
1648 	}
1649 }
1650 
1651 
1652 static void
1653 stop_current_while_timer(aggr_port_t *portp)
1654 {
1655 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1656 
1657 	if (portp->lp_lacp.current_while_timer.id != 0) {
1658 		AGGR_LACP_UNLOCK(portp->lp_grp);
1659 		(void) untimeout(portp->lp_lacp.current_while_timer.id);
1660 		AGGR_LACP_LOCK(portp->lp_grp);
1661 		portp->lp_lacp.current_while_timer.id = 0;
1662 	}
1663 }
1664 
1665 
1666 static void
1667 current_while_timer_pop(void *data)
1668 {
1669 	aggr_port_t *portp = (aggr_port_t *)data;
1670 
1671 	if (portp->lp_closing)
1672 		return;
1673 
1674 	AGGR_LACP_LOCK(portp->lp_grp);
1675 
1676 	AGGR_LACP_DBG(("trunk link:(%d): current_while_timer "
1677 	    "pop id=%p\n", portp->lp_linkid,
1678 	    portp->lp_lacp.current_while_timer.id));
1679 
1680 	portp->lp_lacp.current_while_timer.id = 0;
1681 	lacp_receive_sm(portp, NULL);
1682 	AGGR_LACP_UNLOCK(portp->lp_grp);
1683 }
1684 
1685 
1686 /*
1687  * record_Default - Simply copies over administrative values
1688  * to the partner operational values, and sets our state to indicate we
1689  * are using defaulted values.
1690  */
1691 static void
1692 record_Default(aggr_port_t *portp)
1693 {
1694 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1695 
1696 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1697 
1698 	pl->PartnerOperPortNum = pl->PartnerAdminPortNum;
1699 	pl->PartnerOperPortPriority = pl->PartnerAdminPortPriority;
1700 	pl->PartnerOperSystem = pl->PartnerAdminSystem;
1701 	pl->PartnerOperSysPriority = pl->PartnerAdminSysPriority;
1702 	pl->PartnerOperKey = pl->PartnerAdminKey;
1703 	pl->PartnerOperPortState.state = pl->PartnerAdminPortState.state;
1704 
1705 	pl->ActorOperPortState.bit.defaulted = B_TRUE;
1706 }
1707 
1708 
1709 /* Returns B_TRUE on sync value changing */
1710 static boolean_t
1711 record_PDU(aggr_port_t *portp, lacp_t *lacp)
1712 {
1713 	aggr_grp_t *aggrp = portp->lp_grp;
1714 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1715 	uint8_t save_sync;
1716 
1717 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1718 
1719 	/*
1720 	 * Partner Information
1721 	 */
1722 	pl->PartnerOperPortNum = ntohs(lacp->actor_info.port);
1723 	pl->PartnerOperPortPriority =
1724 	    ntohs(lacp->actor_info.port_priority);
1725 	pl->PartnerOperSystem = lacp->actor_info.system_id;
1726 	pl->PartnerOperSysPriority =
1727 	    htons(lacp->actor_info.system_priority);
1728 	pl->PartnerOperKey = ntohs(lacp->actor_info.key);
1729 
1730 	/* All state info except for Synchronization */
1731 	save_sync = pl->PartnerOperPortState.bit.sync;
1732 	pl->PartnerOperPortState.state = lacp->actor_info.state.state;
1733 
1734 	/* Defaulted set to FALSE */
1735 	pl->ActorOperPortState.bit.defaulted = B_FALSE;
1736 
1737 	/*
1738 	 * 43.4.9 - (Partner_Port, Partner_Port_Priority, Partner_system,
1739 	 *		Partner_System_Priority, Partner_Key, and
1740 	 *		Partner_State.Aggregation) are compared to the
1741 	 *		corresponding operations paramters values for
1742 	 *		the Actor. If these are equal, or if this is
1743 	 *		an individual link, we are synchronized.
1744 	 */
1745 	if (((ntohs(lacp->partner_info.port) == pl->ActorPortNumber) &&
1746 	    (ntohs(lacp->partner_info.port_priority) ==
1747 	    pl->ActorPortPriority) &&
1748 	    (ether_cmp(&lacp->partner_info.system_id,
1749 	    (struct ether_addr *)&aggrp->lg_addr) == 0) &&
1750 	    (ntohs(lacp->partner_info.system_priority) ==
1751 	    aggrp->aggr.ActorSystemPriority) &&
1752 	    (ntohs(lacp->partner_info.key) == pl->ActorOperPortKey) &&
1753 	    (lacp->partner_info.state.bit.aggregation ==
1754 	    pl->ActorOperPortState.bit.aggregation)) ||
1755 	    (!lacp->actor_info.state.bit.aggregation)) {
1756 
1757 		pl->PartnerOperPortState.bit.sync =
1758 		    lacp->actor_info.state.bit.sync;
1759 	} else {
1760 		pl->PartnerOperPortState.bit.sync = B_FALSE;
1761 	}
1762 
1763 	if (save_sync != pl->PartnerOperPortState.bit.sync) {
1764 		AGGR_LACP_DBG(("record_PDU:(%d): partner sync "
1765 		    "%d -->%d\n", portp->lp_linkid, save_sync,
1766 		    pl->PartnerOperPortState.bit.sync));
1767 		return (B_TRUE);
1768 	} else {
1769 		return (B_FALSE);
1770 	}
1771 }
1772 
1773 
1774 /*
1775  * update_selected - If any of the Partner parameters has
1776  *			changed from a previous value, then
1777  *			unselect the link from the aggregator.
1778  */
1779 static boolean_t
1780 update_selected(aggr_port_t *portp, lacp_t *lacp)
1781 {
1782 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1783 
1784 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1785 
1786 	if ((pl->PartnerOperPortNum != ntohs(lacp->actor_info.port)) ||
1787 	    (pl->PartnerOperPortPriority !=
1788 	    ntohs(lacp->actor_info.port_priority)) ||
1789 	    (ether_cmp(&pl->PartnerOperSystem,
1790 	    &lacp->actor_info.system_id) != 0) ||
1791 	    (pl->PartnerOperSysPriority !=
1792 	    ntohs(lacp->actor_info.system_priority)) ||
1793 	    (pl->PartnerOperKey != ntohs(lacp->actor_info.key)) ||
1794 	    (pl->PartnerOperPortState.bit.aggregation !=
1795 	    lacp->actor_info.state.bit.aggregation)) {
1796 		AGGR_LACP_DBG(("update_selected:(%d): "
1797 		    "selected  %d-->%d\n", portp->lp_linkid, pl->sm.selected,
1798 		    AGGR_UNSELECTED));
1799 
1800 		lacp_port_unselect(portp);
1801 		return (B_TRUE);
1802 	} else {
1803 		return (B_FALSE);
1804 	}
1805 }
1806 
1807 
1808 /*
1809  * update_default_selected - If any of the operational Partner parameters
1810  *			is different than that of the administrative values
1811  *			then unselect the link from the aggregator.
1812  */
1813 static void
1814 update_default_selected(aggr_port_t *portp)
1815 {
1816 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1817 
1818 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1819 
1820 	if ((pl->PartnerAdminPortNum != pl->PartnerOperPortNum) ||
1821 	    (pl->PartnerOperPortPriority != pl->PartnerAdminPortPriority) ||
1822 	    (ether_cmp(&pl->PartnerOperSystem, &pl->PartnerAdminSystem) != 0) ||
1823 	    (pl->PartnerOperSysPriority != pl->PartnerAdminSysPriority) ||
1824 	    (pl->PartnerOperKey != pl->PartnerAdminKey) ||
1825 	    (pl->PartnerOperPortState.bit.aggregation !=
1826 	    pl->PartnerAdminPortState.bit.aggregation)) {
1827 
1828 		AGGR_LACP_DBG(("update_default_selected:(%d): "
1829 		    "selected  %d-->%d\n", portp->lp_linkid,
1830 		    pl->sm.selected, AGGR_UNSELECTED));
1831 
1832 		lacp_port_unselect(portp);
1833 	}
1834 }
1835 
1836 
1837 /*
1838  * update_NTT - If any of the Partner values in the received LACPDU
1839  *			are different than that of the Actor operational
1840  *			values then set NTT to true.
1841  */
1842 static void
1843 update_NTT(aggr_port_t *portp, lacp_t *lacp)
1844 {
1845 	aggr_grp_t *aggrp = portp->lp_grp;
1846 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1847 
1848 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1849 
1850 	if ((pl->ActorPortNumber != ntohs(lacp->partner_info.port)) ||
1851 	    (pl->ActorPortPriority !=
1852 	    ntohs(lacp->partner_info.port_priority)) ||
1853 	    (ether_cmp(&aggrp->lg_addr,
1854 	    &lacp->partner_info.system_id) != 0) ||
1855 	    (aggrp->aggr.ActorSystemPriority !=
1856 	    ntohs(lacp->partner_info.system_priority)) ||
1857 	    (pl->ActorOperPortKey != ntohs(lacp->partner_info.key)) ||
1858 	    (pl->ActorOperPortState.bit.activity !=
1859 	    lacp->partner_info.state.bit.activity) ||
1860 	    (pl->ActorOperPortState.bit.timeout !=
1861 	    lacp->partner_info.state.bit.timeout) ||
1862 	    (pl->ActorOperPortState.bit.sync !=
1863 	    lacp->partner_info.state.bit.sync) ||
1864 	    (pl->ActorOperPortState.bit.aggregation !=
1865 	    lacp->partner_info.state.bit.aggregation)) {
1866 
1867 		AGGR_LACP_DBG(("update_NTT:(%d): NTT  %d-->%d\n",
1868 		    portp->lp_linkid, pl->NTT, B_TRUE));
1869 
1870 		pl->NTT = B_TRUE;
1871 	}
1872 }
1873 
1874 /*
1875  * lacp_receive_sm - LACP receive state machine
1876  *
1877  * parameters:
1878  *      - portp - instance this applies to.
1879  *      - lacp - pointer in the case of a received LACPDU.
1880  *                This value is NULL if there is no LACPDU.
1881  *
1882  * invoked:
1883  *    - when initialization is needed
1884  *    - upon reception of an LACPDU. This is the common case.
1885  *    - every time the current_while_timer pops
1886  */
1887 static void
1888 lacp_receive_sm(aggr_port_t *portp, lacp_t *lacp)
1889 {
1890 	boolean_t sync_updated, selected_updated, save_activity;
1891 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1892 	lacp_receive_state_t oldstate = pl->sm.receive_state;
1893 
1894 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1895 
1896 	/* LACP_OFF state not in specification so check here.  */
1897 	if (!pl->sm.lacp_on)
1898 		return;
1899 
1900 	/* figure next state */
1901 	if (pl->sm.begin || pl->sm.port_moved) {
1902 		pl->sm.receive_state = LACP_INITIALIZE;
1903 	} else if (!pl->sm.port_enabled) {	/* DL_NOTE_LINK_DOWN */
1904 		pl->sm.receive_state = LACP_PORT_DISABLED;
1905 	} else if (!pl->sm.lacp_enabled) { /* DL_NOTE_AGGR_UNAVAIL */
1906 		pl->sm.receive_state =
1907 		    (pl->sm.receive_state == LACP_PORT_DISABLED) ?
1908 		    LACP_DISABLED : LACP_PORT_DISABLED;
1909 	} else if (lacp != NULL) {
1910 		if ((pl->sm.receive_state == LACP_EXPIRED) ||
1911 		    (pl->sm.receive_state == LACP_DEFAULTED)) {
1912 			pl->sm.receive_state = LACP_CURRENT;
1913 		}
1914 	} else if ((pl->sm.receive_state == LACP_CURRENT) &&
1915 	    (pl->current_while_timer.id == 0)) {
1916 		pl->sm.receive_state = LACP_EXPIRED;
1917 	} else if ((pl->sm.receive_state == LACP_EXPIRED) &&
1918 	    (pl->current_while_timer.id == 0)) {
1919 		pl->sm.receive_state = LACP_DEFAULTED;
1920 	}
1921 
1922 
1923 	if (!((lacp && (oldstate == LACP_CURRENT) &&
1924 	    (pl->sm.receive_state == LACP_CURRENT)))) {
1925 		AGGR_LACP_DBG(("lacp_receive_sm(%d):%s--->%s\n",
1926 		    portp->lp_linkid, lacp_receive_str[oldstate],
1927 		    lacp_receive_str[pl->sm.receive_state]));
1928 	}
1929 
1930 	switch (pl->sm.receive_state) {
1931 	case LACP_INITIALIZE:
1932 		lacp_port_unselect(portp);
1933 		record_Default(portp);
1934 		pl->ActorOperPortState.bit.expired = B_FALSE;
1935 		pl->sm.port_moved = B_FALSE;
1936 		pl->sm.receive_state = LACP_PORT_DISABLED;
1937 		pl->sm.begin = B_FALSE;
1938 		lacp_receive_sm(portp, NULL);
1939 		break;
1940 
1941 	case LACP_PORT_DISABLED:
1942 		pl->PartnerOperPortState.bit.sync = B_FALSE;
1943 		/*
1944 		 * Stop current_while_timer in case
1945 		 * we got here from link down
1946 		 */
1947 		stop_current_while_timer(portp);
1948 
1949 		if (pl->sm.port_enabled && !pl->sm.lacp_enabled) {
1950 			pl->sm.receive_state = LACP_DISABLED;
1951 			lacp_receive_sm(portp, lacp);
1952 			/* We goto LACP_DISABLED state */
1953 			break;
1954 		} else if (pl->sm.port_enabled && pl->sm.lacp_enabled) {
1955 			pl->sm.receive_state = LACP_EXPIRED;
1956 			/*
1957 			 * FALL THROUGH TO LACP_EXPIRED CASE:
1958 			 * We have no way of knowing if we get into
1959 			 * lacp_receive_sm() from a  current_while_timer
1960 			 * expiring as it has never been kicked off yet!
1961 			 */
1962 		} else {
1963 			/* We stay in LACP_PORT_DISABLED state */
1964 			break;
1965 		}
1966 		/* LACP_PORT_DISABLED -> LACP_EXPIRED */
1967 		/* FALLTHROUGH */
1968 
1969 	case LACP_EXPIRED:
1970 		/*
1971 		 * Arrives here from LACP_PORT_DISABLED state as well as
1972 		 * as well as current_while_timer expiring.
1973 		 */
1974 		pl->PartnerOperPortState.bit.sync = B_FALSE;
1975 		pl->PartnerOperPortState.bit.timeout = B_TRUE;
1976 
1977 		pl->ActorOperPortState.bit.expired = B_TRUE;
1978 		start_current_while_timer(portp, SHORT_TIMEOUT_TIME);
1979 		lacp_periodic_sm(portp);
1980 		break;
1981 
1982 	case LACP_DISABLED:
1983 		/*
1984 		 * This is the normal state for recv_sm when LACP_OFF
1985 		 * is set or the NIC is in half duplex mode.
1986 		 */
1987 		lacp_port_unselect(portp);
1988 		record_Default(portp);
1989 		pl->PartnerOperPortState.bit.aggregation = B_FALSE;
1990 		pl->ActorOperPortState.bit.expired = B_FALSE;
1991 		break;
1992 
1993 	case LACP_DEFAULTED:
1994 		/*
1995 		 * Current_while_timer expired a second time.
1996 		 */
1997 		update_default_selected(portp);
1998 		record_Default(portp);	/* overwrite Partner Oper val */
1999 		pl->ActorOperPortState.bit.expired = B_FALSE;
2000 		pl->PartnerOperPortState.bit.sync = B_TRUE;
2001 
2002 		lacp_selection_logic(portp);
2003 		lacp_mux_sm(portp);
2004 		break;
2005 
2006 	case LACP_CURRENT:
2007 		/*
2008 		 * Reception of LACPDU
2009 		 */
2010 
2011 		if (!lacp) /* no LACPDU so current_while_timer popped */
2012 			break;
2013 
2014 		AGGR_LACP_DBG(("lacp_receive_sm: (%d): LACPDU received:\n",
2015 		    portp->lp_linkid));
2016 
2017 		/*
2018 		 * Validate Actor_Information_Length,
2019 		 * Partner_Information_Length, Collector_Information_Length,
2020 		 * and Terminator_Length fields.
2021 		 */
2022 		if (!valid_lacp_pdu(portp, lacp)) {
2023 			AGGR_LACP_DBG(("lacp_receive_sm (%d): "
2024 			    "Invalid LACPDU received\n",
2025 			    portp->lp_linkid));
2026 			break;
2027 		}
2028 
2029 		save_activity = pl->PartnerOperPortState.bit.activity;
2030 		selected_updated = update_selected(portp, lacp);
2031 		update_NTT(portp, lacp);
2032 		sync_updated = record_PDU(portp, lacp);
2033 
2034 		pl->ActorOperPortState.bit.expired = B_FALSE;
2035 
2036 		if (selected_updated) {
2037 			lacp_selection_logic(portp);
2038 			lacp_mux_sm(portp);
2039 		} else if (sync_updated) {
2040 			lacp_mux_sm(portp);
2041 		}
2042 
2043 		/*
2044 		 * If the periodic timer value bit has been modified
2045 		 * or the partner activity bit has been changed then
2046 		 * we need to respectively:
2047 		 *  - restart the timer with the proper timeout value.
2048 		 *  - possibly enable/disable transmission of LACPDUs.
2049 		 */
2050 		if ((pl->PartnerOperPortState.bit.timeout &&
2051 		    (pl->periodic_timer.val != FAST_PERIODIC_TIME)) ||
2052 		    (!pl->PartnerOperPortState.bit.timeout &&
2053 		    (pl->periodic_timer.val != SLOW_PERIODIC_TIME)) ||
2054 		    (pl->PartnerOperPortState.bit.activity !=
2055 		    save_activity)) {
2056 			lacp_periodic_sm(portp);
2057 		}
2058 
2059 		stop_current_while_timer(portp);
2060 		/* Check if we need to transmit an LACPDU */
2061 		if (pl->NTT)
2062 			lacp_xmit_sm(portp);
2063 		start_current_while_timer(portp, 0);
2064 
2065 		break;
2066 	}
2067 }
2068 
2069 static void
2070 aggr_set_coll_dist(aggr_port_t *portp, boolean_t enable)
2071 {
2072 	rw_enter(&portp->lp_lock, RW_WRITER);
2073 	aggr_set_coll_dist_locked(portp, enable);
2074 	rw_exit(&portp->lp_lock);
2075 }
2076 
2077 static void
2078 aggr_set_coll_dist_locked(aggr_port_t *portp, boolean_t enable)
2079 {
2080 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
2081 
2082 	AGGR_LACP_DBG(("AGGR_SET_COLL_DIST_TYPE: (%d) %s\n",
2083 	    portp->lp_linkid, enable ? "ENABLED" : "DISABLED"));
2084 
2085 	if (!enable) {
2086 		/*
2087 		 * Turn OFF Collector_Distributor.
2088 		 */
2089 		portp->lp_collector_enabled = B_FALSE;
2090 		aggr_send_port_disable(portp);
2091 		return;
2092 	}
2093 
2094 	/*
2095 	 * Turn ON Collector_Distributor.
2096 	 */
2097 
2098 	if (!portp->lp_lacp.sm.lacp_on || (portp->lp_lacp.sm.lacp_on &&
2099 	    (portp->lp_lacp.sm.mux_state == LACP_COLLECTING_DISTRIBUTING))) {
2100 		/* Port is compatible and can be aggregated */
2101 		portp->lp_collector_enabled = B_TRUE;
2102 		aggr_send_port_enable(portp);
2103 	}
2104 }
2105 
2106 /*
2107  * Process a received Marker or LACPDU.
2108  */
2109 void
2110 aggr_lacp_rx(aggr_port_t *portp, mblk_t *dmp)
2111 {
2112 	lacp_t	*lacp;
2113 
2114 	dmp->b_rptr += sizeof (struct ether_header);
2115 
2116 	if (MBLKL(dmp) < sizeof (lacp_t)) {
2117 		freemsg(dmp);
2118 		return;
2119 	}
2120 
2121 	lacp = (lacp_t *)dmp->b_rptr;
2122 
2123 	switch (lacp->subtype) {
2124 	case LACP_SUBTYPE:
2125 		AGGR_LACP_DBG(("aggr_lacp_rx:(%d): LACPDU received.\n",
2126 		    portp->lp_linkid));
2127 
2128 		AGGR_LACP_LOCK(portp->lp_grp);
2129 		if (!portp->lp_lacp.sm.lacp_on) {
2130 			AGGR_LACP_UNLOCK(portp->lp_grp);
2131 			break;
2132 		}
2133 		lacp_receive_sm(portp, lacp);
2134 		AGGR_LACP_UNLOCK(portp->lp_grp);
2135 		break;
2136 
2137 	case MARKER_SUBTYPE:
2138 		AGGR_LACP_DBG(("aggr_lacp_rx:(%d): Marker Packet received.\n",
2139 		    portp->lp_linkid));
2140 
2141 		(void) receive_marker_pdu(portp, dmp);
2142 		break;
2143 
2144 	default:
2145 		AGGR_LACP_DBG(("aggr_lacp_rx: (%d): "
2146 		    "Unknown Slow Protocol type %d\n",
2147 		    portp->lp_linkid, lacp->subtype));
2148 		break;
2149 	}
2150 
2151 	freemsg(dmp);
2152 }
2153