xref: /titanic_51/usr/src/uts/common/io/aggr/aggr_lacp.c (revision 81f63062a60a29358c252e0d10807f8a8547fbb5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * IEEE 802.3ad Link Aggregation - LACP & Marker Protocol processing.
30  */
31 
32 #include <sys/types.h>
33 #include <sys/sysmacros.h>
34 #include <sys/conf.h>
35 #include <sys/cmn_err.h>
36 #include <sys/list.h>
37 #include <sys/ksynch.h>
38 #include <sys/kmem.h>
39 #include <sys/stream.h>
40 #include <sys/modctl.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/atomic.h>
44 #include <sys/stat.h>
45 #include <sys/byteorder.h>
46 #include <sys/strsun.h>
47 #include <sys/isa_defs.h>
48 
49 #include <sys/aggr.h>
50 #include <sys/aggr_impl.h>
51 
52 static struct ether_addr	etherzeroaddr = {
53 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00
54 };
55 
56 /*
57  * Slow_Protocol_Multicast address, as per IEEE 802.3ad spec.
58  */
59 static struct ether_addr   slow_multicast_addr = {
60 	0x01, 0x80, 0xc2, 0x00, 0x00, 0x02
61 };
62 
63 #ifdef DEBUG
64 /* LACP state machine debugging support */
65 static uint32_t aggr_lacp_debug = 0;
66 #define	AGGR_LACP_DBG(x)	if (aggr_lacp_debug) { (void) printf x; }
67 #else
68 #define	AGGR_LACP_DBG(x)	{}
69 #endif /* DEBUG */
70 
71 #define	NSECS_PER_SEC   1000000000ll
72 
73 /* used by lacp_misconfig_walker() */
74 typedef struct lacp_misconfig_check_state_s {
75 	aggr_port_t *cs_portp;
76 	boolean_t cs_found;
77 } lacp_misconfig_check_state_t;
78 
79 static const char *lacp_receive_str[] = LACP_RECEIVE_STATE_STRINGS;
80 static const char *lacp_periodic_str[] = LACP_PERIODIC_STRINGS;
81 static const char *lacp_mux_str[] = LACP_MUX_STRINGS;
82 
83 static uint16_t lacp_port_priority = 0x1000;
84 static uint16_t lacp_system_priority = 0x1000;
85 
86 /*
87  * Maintains a list of all ports in ATTACHED state. This information
88  * is used to detect misconfiguration.
89  */
90 typedef struct lacp_sel_ports {
91 	uint16_t sp_key;
92 	char sp_devname[MAXNAMELEN + 1];
93 	struct ether_addr sp_partner_system;
94 	uint32_t sp_partner_key;
95 	struct lacp_sel_ports *sp_next;
96 } lacp_sel_ports_t;
97 
98 static lacp_sel_ports_t *sel_ports = NULL;
99 static kmutex_t lacp_sel_lock;
100 
101 static void periodic_timer_pop_locked(aggr_port_t *);
102 static void periodic_timer_pop(void *);
103 static void lacp_xmit_sm(aggr_port_t *);
104 static void lacp_periodic_sm(aggr_port_t *);
105 static void fill_lacp_pdu(aggr_port_t *, lacp_t *);
106 static void fill_lacp_ether(aggr_port_t *, struct ether_header *);
107 static void lacp_on(aggr_port_t *);
108 static void lacp_off(aggr_port_t *);
109 static boolean_t valid_lacp_pdu(aggr_port_t *, lacp_t *);
110 static void lacp_receive_sm(aggr_port_t *, lacp_t *);
111 static void aggr_set_coll_dist(aggr_port_t *, boolean_t);
112 static void aggr_set_coll_dist_locked(aggr_port_t *, boolean_t);
113 static void start_wait_while_timer(aggr_port_t *);
114 static void stop_wait_while_timer(aggr_port_t *);
115 static void lacp_reset_port(aggr_port_t *);
116 static void stop_current_while_timer(aggr_port_t *);
117 static void current_while_timer_pop(void *);
118 static void update_default_selected(aggr_port_t *);
119 static boolean_t update_selected(aggr_port_t *, lacp_t *);
120 static boolean_t lacp_sel_ports_add(aggr_port_t *);
121 static void lacp_sel_ports_del(aggr_port_t *);
122 
123 void
124 aggr_lacp_init(void)
125 {
126 	mutex_init(&lacp_sel_lock, NULL, MUTEX_DEFAULT, NULL);
127 }
128 
129 void
130 aggr_lacp_fini(void)
131 {
132 	mutex_destroy(&lacp_sel_lock);
133 }
134 
135 /*
136  * Set the port LACP state to SELECTED. Returns B_FALSE if the operation
137  * could not be performed due to a memory allocation error, B_TRUE otherwise.
138  */
139 static boolean_t
140 lacp_port_select(aggr_port_t *portp)
141 {
142 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
143 
144 	if (!lacp_sel_ports_add(portp))
145 		return (B_FALSE);
146 	portp->lp_lacp.sm.selected = AGGR_SELECTED;
147 	return (B_TRUE);
148 }
149 
150 /*
151  * Set the port LACP state to UNSELECTED.
152  */
153 static void
154 lacp_port_unselect(aggr_port_t *portp)
155 {
156 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
157 
158 	lacp_sel_ports_del(portp);
159 	portp->lp_lacp.sm.selected = AGGR_UNSELECTED;
160 }
161 
162 /*
163  * Initialize group specific LACP state and parameters.
164  */
165 void
166 aggr_lacp_init_grp(aggr_grp_t *aggrp)
167 {
168 	aggrp->aggr.PeriodicTimer = AGGR_LACP_TIMER_SHORT;
169 	aggrp->aggr.ActorSystemPriority = (uint16_t)lacp_system_priority;
170 	aggrp->aggr.CollectorMaxDelay = 10;
171 	aggrp->lg_lacp_mode = AGGR_LACP_OFF;
172 	aggrp->aggr.ready = B_FALSE;
173 }
174 
175 /*
176  * Complete LACP info initialization at port creation time.
177  */
178 void
179 aggr_lacp_init_port(aggr_port_t *portp)
180 {
181 	aggr_grp_t *aggrp = portp->lp_grp;
182 	aggr_lacp_port_t *pl = &portp->lp_lacp;
183 
184 	ASSERT(AGGR_LACP_LOCK_HELD(aggrp));
185 	ASSERT(RW_LOCK_HELD(&aggrp->lg_lock));
186 	ASSERT(RW_LOCK_HELD(&portp->lp_lock));
187 
188 	/* actor port # */
189 	pl->ActorPortNumber = portp->lp_portid;
190 	AGGR_LACP_DBG(("aggr_lacp_init_port(%s): "
191 	    "ActorPortNumber = 0x%x\n", portp->lp_devname,
192 	    pl->ActorPortNumber));
193 
194 	pl->ActorPortPriority = (uint16_t)lacp_port_priority;
195 	pl->ActorPortAggrId = 0;	/* aggregator id - not used */
196 	pl->NTT = B_FALSE;			/* need to transmit */
197 
198 	pl->ActorAdminPortKey = aggrp->lg_key;
199 	pl->ActorOperPortKey = pl->ActorAdminPortKey;
200 	AGGR_LACP_DBG(("aggr_lacp_init_port(%s) "
201 	    "ActorAdminPortKey = 0x%x, ActorAdminPortKey = 0x%x\n",
202 	    portp->lp_devname, pl->ActorAdminPortKey, pl->ActorOperPortKey));
203 
204 	/* Actor admin. port state */
205 	pl->ActorAdminPortState.bit.activity = B_FALSE;
206 	pl->ActorAdminPortState.bit.timeout = B_TRUE;
207 	pl->ActorAdminPortState.bit.aggregation = B_TRUE;
208 	pl->ActorAdminPortState.bit.sync = B_FALSE;
209 	pl->ActorAdminPortState.bit.collecting = B_FALSE;
210 	pl->ActorAdminPortState.bit.distributing = B_FALSE;
211 	pl->ActorAdminPortState.bit.defaulted = B_FALSE;
212 	pl->ActorAdminPortState.bit.expired = B_FALSE;
213 	pl->ActorOperPortState = pl->ActorAdminPortState;
214 
215 	/*
216 	 * Partner Administrative Information
217 	 * (All initialized to zero except for the following)
218 	 * Fast Timeouts.
219 	 */
220 	pl->PartnerAdminPortState.bit.timeout =
221 	    pl->PartnerOperPortState.bit.timeout = B_TRUE;
222 
223 	pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */
224 
225 	/*
226 	 * State machine information.
227 	 */
228 	pl->sm.lacp_on = B_FALSE;		/* LACP Off default */
229 	pl->sm.begin = B_TRUE;		/* Prevents transmissions */
230 	pl->sm.lacp_enabled = B_FALSE;
231 	pl->sm.port_enabled = B_FALSE;		/* Link Down */
232 	pl->sm.actor_churn = B_FALSE;
233 	pl->sm.partner_churn = B_FALSE;
234 	pl->sm.ready_n = B_FALSE;
235 	pl->sm.port_moved = B_FALSE;
236 
237 	lacp_port_unselect(portp);
238 
239 	pl->sm.periodic_state = LACP_NO_PERIODIC;
240 	pl->sm.receive_state = LACP_INITIALIZE;
241 	pl->sm.mux_state = LACP_DETACHED;
242 	pl->sm.churn_state = LACP_NO_ACTOR_CHURN;
243 
244 	/*
245 	 * Timer information.
246 	 */
247 	pl->current_while_timer.id = 0;
248 	pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
249 
250 	pl->periodic_timer.id = 0;
251 	pl->periodic_timer.val = FAST_PERIODIC_TIME;
252 
253 	pl->wait_while_timer.id = 0;
254 	pl->wait_while_timer.val = AGGREGATE_WAIT_TIME;
255 }
256 
257 /*
258  * Port initialization when we need to
259  * turn LACP on/off, etc. Not everything is
260  * reset like in the above routine.
261  *		Do NOT modify things like link status.
262  */
263 static void
264 lacp_reset_port(aggr_port_t *portp)
265 {
266 	aggr_lacp_port_t *pl = &portp->lp_lacp;
267 
268 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
269 
270 	pl->NTT = B_FALSE;			/* need to transmit */
271 
272 	/* reset operational port state */
273 	pl->ActorOperPortState.bit.timeout =
274 	    pl->ActorAdminPortState.bit.timeout;
275 
276 	pl->ActorOperPortState.bit.sync = B_FALSE;
277 	pl->ActorOperPortState.bit.collecting = B_FALSE;
278 	pl->ActorOperPortState.bit.distributing = B_FALSE;
279 	pl->ActorOperPortState.bit.defaulted = B_TRUE;
280 	pl->ActorOperPortState.bit.expired = B_FALSE;
281 
282 	pl->PartnerOperPortState.bit.timeout = B_TRUE;	/* fast t/o */
283 	pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */
284 
285 	/*
286 	 * State machine information.
287 	 */
288 	pl->sm.begin = B_TRUE;		/* Prevents transmissions */
289 	pl->sm.actor_churn = B_FALSE;
290 	pl->sm.partner_churn = B_FALSE;
291 	pl->sm.ready_n = B_FALSE;
292 
293 	lacp_port_unselect(portp);
294 
295 	pl->sm.periodic_state = LACP_NO_PERIODIC;
296 	pl->sm.receive_state = LACP_INITIALIZE;
297 	pl->sm.mux_state = LACP_DETACHED;
298 	pl->sm.churn_state = LACP_NO_ACTOR_CHURN;
299 
300 	/*
301 	 * Timer information.
302 	 */
303 	pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
304 	pl->periodic_timer.val = FAST_PERIODIC_TIME;
305 }
306 
307 static void
308 aggr_lacp_mcast_on(aggr_port_t *port)
309 {
310 	ASSERT(AGGR_LACP_LOCK_HELD(port->lp_grp));
311 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
312 
313 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
314 		return;
315 
316 	(void) aggr_port_multicst(port, B_TRUE,
317 	    (uchar_t *)&slow_multicast_addr);
318 }
319 
320 static void
321 aggr_lacp_mcast_off(aggr_port_t *port)
322 {
323 	ASSERT(AGGR_LACP_LOCK_HELD(port->lp_grp));
324 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
325 
326 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
327 		return;
328 
329 	(void) aggr_port_multicst(port, B_FALSE,
330 	    (uchar_t *)&slow_multicast_addr);
331 }
332 
333 static void
334 start_periodic_timer(aggr_port_t *portp)
335 {
336 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
337 
338 	if (portp->lp_lacp.periodic_timer.id == 0) {
339 		portp->lp_lacp.periodic_timer.id =
340 		    timeout(periodic_timer_pop, portp,
341 		    drv_usectohz(1000000 * portp->lp_lacp.periodic_timer.val));
342 	}
343 }
344 
345 static void
346 stop_periodic_timer(aggr_port_t *portp)
347 {
348 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
349 
350 	if (portp->lp_lacp.periodic_timer.id != 0) {
351 		AGGR_LACP_UNLOCK(portp->lp_grp);
352 		(void) untimeout(portp->lp_lacp.periodic_timer.id);
353 		AGGR_LACP_LOCK(portp->lp_grp);
354 		portp->lp_lacp.periodic_timer.id = 0;
355 	}
356 }
357 
358 /*
359  * When the timer pops, we arrive here to
360  * clear out LACPDU count as well as transmit an
361  * LACPDU. We then set the periodic state and let
362  * the periodic state machine restart the timer.
363  */
364 
365 static void
366 periodic_timer_pop_locked(aggr_port_t *portp)
367 {
368 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
369 
370 	portp->lp_lacp.periodic_timer.id = NULL;
371 	portp->lp_lacp_stats.LACPDUsTx = 0;
372 
373 	/* current timestamp */
374 	portp->lp_lacp.time = gethrtime();
375 	portp->lp_lacp.NTT = B_TRUE;
376 	lacp_xmit_sm(portp);
377 
378 	/*
379 	 * Set Periodic State machine state based on the
380 	 * value of the Partner Operation Port State timeout
381 	 * bit.
382 	 */
383 	if (portp->lp_lacp.PartnerOperPortState.bit.timeout) {
384 		portp->lp_lacp.periodic_timer.val = FAST_PERIODIC_TIME;
385 		portp->lp_lacp.sm.periodic_state = LACP_FAST_PERIODIC;
386 	} else {
387 		portp->lp_lacp.periodic_timer.val = SLOW_PERIODIC_TIME;
388 		portp->lp_lacp.sm.periodic_state = LACP_SLOW_PERIODIC;
389 	}
390 
391 	lacp_periodic_sm(portp);
392 }
393 
394 static void
395 periodic_timer_pop(void *data)
396 {
397 	aggr_port_t *portp = data;
398 
399 	if (portp->lp_closing)
400 		return;
401 
402 	AGGR_LACP_LOCK(portp->lp_grp);
403 	periodic_timer_pop_locked(portp);
404 	AGGR_LACP_UNLOCK(portp->lp_grp);
405 }
406 
407 /*
408  * Invoked from:
409  *	- startup upon aggregation
410  *	- when the periodic timer pops
411  *	- when the periodic timer value is changed
412  *	- when the port is attached or detached
413  *	- when LACP mode is changed.
414  */
415 static void
416 lacp_periodic_sm(aggr_port_t *portp)
417 {
418 	lacp_periodic_state_t oldstate = portp->lp_lacp.sm.periodic_state;
419 	aggr_lacp_port_t *pl = &portp->lp_lacp;
420 
421 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
422 
423 	/* LACP_OFF state not in specification so check here.  */
424 	if (!pl->sm.lacp_on) {
425 		/* Stop timer whether it is running or not */
426 		stop_periodic_timer(portp);
427 		pl->sm.periodic_state = LACP_NO_PERIODIC;
428 		pl->NTT = B_FALSE;
429 		AGGR_LACP_DBG(("lacp_periodic_sm(%s):NO LACP "
430 		    "%s--->%s\n", portp->lp_devname,
431 		    lacp_periodic_str[oldstate],
432 		    lacp_periodic_str[pl->sm.periodic_state]));
433 		return;
434 	}
435 
436 	if (pl->sm.begin || !pl->sm.lacp_enabled ||
437 	    !pl->sm.port_enabled ||
438 	    !pl->ActorOperPortState.bit.activity &&
439 	    !pl->PartnerOperPortState.bit.activity) {
440 
441 		/* Stop timer whether it is running or not */
442 		stop_periodic_timer(portp);
443 		pl->sm.periodic_state = LACP_NO_PERIODIC;
444 		pl->NTT = B_FALSE;
445 		AGGR_LACP_DBG(("lacp_periodic_sm(%s):STOP %s--->%s\n",
446 		    portp->lp_devname, lacp_periodic_str[oldstate],
447 		    lacp_periodic_str[pl->sm.periodic_state]));
448 		return;
449 	}
450 
451 	/*
452 	 * Startup with FAST_PERIODIC_TIME if no previous LACPDU
453 	 * has been received. Then after we timeout, then it is
454 	 * possible to go to SLOW_PERIODIC_TIME.
455 	 */
456 	if (pl->sm.periodic_state == LACP_NO_PERIODIC) {
457 		pl->periodic_timer.val = FAST_PERIODIC_TIME;
458 		pl->sm.periodic_state = LACP_FAST_PERIODIC;
459 	} else if ((pl->sm.periodic_state == LACP_SLOW_PERIODIC) &&
460 	    pl->PartnerOperPortState.bit.timeout) {
461 		/*
462 		 * If we receive a bit indicating we are going to
463 		 * fast periodic from slow periodic, stop the timer
464 		 * and let the periodic_timer_pop routine deal
465 		 * with reseting the periodic state and transmitting
466 		 * a LACPDU.
467 		 */
468 		stop_periodic_timer(portp);
469 		periodic_timer_pop_locked(portp);
470 	}
471 
472 	/* Rearm timer with value provided by partner */
473 	start_periodic_timer(portp);
474 }
475 
476 /*
477  * This routine transmits an LACPDU if lacp_enabled
478  * is TRUE and if NTT is set.
479  */
480 static void
481 lacp_xmit_sm(aggr_port_t *portp)
482 {
483 	aggr_lacp_port_t *pl = &portp->lp_lacp;
484 	size_t	len;
485 	mblk_t  *mp;
486 	hrtime_t now, elapsed;
487 	const mac_txinfo_t *mtp;
488 
489 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
490 
491 	/* LACP_OFF state not in specification so check here.  */
492 	if (!pl->sm.lacp_on || !pl->NTT || !portp->lp_started)
493 		return;
494 
495 	/*
496 	 * Do nothing if LACP has been turned off or if the
497 	 * periodic state machine is not enabled.
498 	 */
499 	if ((pl->sm.periodic_state == LACP_NO_PERIODIC) ||
500 	    !pl->sm.lacp_enabled || pl->sm.begin) {
501 		pl->NTT = B_FALSE;
502 		return;
503 	}
504 
505 	/*
506 	 * If we have sent 5 Slow packets in the last second, avoid
507 	 * sending any more here. No more than three LACPDUs may be transmitted
508 	 * in any Fast_Periodic_Time interval.
509 	 */
510 	if (portp->lp_lacp_stats.LACPDUsTx >= 3) {
511 		/*
512 		 * Grab the current time value and see if
513 		 * more than 1 second has passed. If so,
514 		 * reset the timestamp and clear the count.
515 		 */
516 		now = gethrtime();
517 		elapsed = now - pl->time;
518 		if (elapsed > NSECS_PER_SEC) {
519 			portp->lp_lacp_stats.LACPDUsTx = 0;
520 			pl->time = now;
521 		} else {
522 			return;
523 		}
524 	}
525 
526 	len = sizeof (lacp_t) + sizeof (struct ether_header);
527 	mp = allocb(len, BPRI_MED);
528 	if (mp == NULL)
529 		return;
530 
531 	mp->b_wptr = mp->b_rptr + len;
532 	bzero(mp->b_rptr, len);
533 
534 	fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr);
535 	fill_lacp_pdu(portp,
536 	    (lacp_t *)(mp->b_rptr + sizeof (struct ether_header)));
537 
538 	/*
539 	 * Store the transmit info pointer locally in case it changes between
540 	 * loading mt_fn and mt_arg.
541 	 */
542 	mtp = portp->lp_txinfo;
543 	mtp->mt_fn(mtp->mt_arg, mp);
544 
545 	pl->NTT = B_FALSE;
546 	portp->lp_lacp_stats.LACPDUsTx++;
547 }
548 
549 /*
550  * Initialize the ethernet header of a LACP packet sent from the specified
551  * port.
552  */
553 static void
554 fill_lacp_ether(aggr_port_t *port, struct ether_header *ether)
555 {
556 	bcopy(port->lp_addr, (uint8_t *)&(ether->ether_shost), ETHERADDRL);
557 	bcopy(&slow_multicast_addr, (uint8_t *)&(ether->ether_dhost),
558 	    ETHERADDRL);
559 	ether->ether_type = htons(ETHERTYPE_SLOW);
560 }
561 
562 static void
563 fill_lacp_pdu(aggr_port_t *portp, lacp_t *lacp)
564 {
565 	aggr_lacp_port_t *pl = &portp->lp_lacp;
566 	aggr_grp_t *aggrp = portp->lp_grp;
567 
568 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
569 
570 	lacp->subtype = LACP_SUBTYPE;
571 	lacp->version = LACP_VERSION;
572 
573 	rw_enter(&aggrp->lg_lock, RW_READER);
574 	rw_enter(&portp->lp_lock, RW_READER);
575 
576 	/*
577 	 * Actor Information
578 	 */
579 	lacp->actor_info.tlv_type = ACTOR_TLV;
580 	lacp->actor_info.information_len = sizeof (link_info_t);
581 	lacp->actor_info.system_priority =
582 	    htons(aggrp->aggr.ActorSystemPriority);
583 	bcopy(aggrp->lg_addr, (uchar_t *)&lacp->actor_info.system_id,
584 	    ETHERADDRL);
585 	lacp->actor_info.key = htons(pl->ActorOperPortKey);
586 	lacp->actor_info.port_priority = htons(pl->ActorPortPriority);
587 	lacp->actor_info.port = htons(pl->ActorPortNumber);
588 	lacp->actor_info.state.state = pl->ActorOperPortState.state;
589 
590 	/*
591 	 * Partner Information
592 	 */
593 	lacp->partner_info.tlv_type = PARTNER_TLV;
594 	lacp->partner_info.information_len = sizeof (link_info_t);
595 	lacp->partner_info.system_priority =
596 	    htons(pl->PartnerOperSysPriority);
597 	lacp->partner_info.system_id = pl->PartnerOperSystem;
598 	lacp->partner_info.key = htons(pl->PartnerOperKey);
599 	lacp->partner_info.port_priority =
600 	    htons(pl->PartnerOperPortPriority);
601 	lacp->partner_info.port = htons(pl->PartnerOperPortNum);
602 	lacp->partner_info.state.state = pl->PartnerOperPortState.state;
603 
604 	/* Collector Information */
605 	lacp->tlv_collector = COLLECTOR_TLV;
606 	lacp->collector_len = 0x10;
607 	lacp->collector_max_delay = htons(aggrp->aggr.CollectorMaxDelay);
608 
609 	/* Termination Information */
610 	lacp->tlv_terminator = TERMINATOR_TLV;
611 	lacp->terminator_len = 0x0;
612 
613 	rw_exit(&portp->lp_lock);
614 	rw_exit(&aggrp->lg_lock);
615 }
616 
617 /*
618  * lacp_mux_sm - LACP mux state machine
619  *		This state machine is invoked from:
620  *			- startup upon aggregation
621  *			- from the Selection logic
622  *			- when the wait_while_timer pops
623  *			- when the aggregation MAC address is changed
624  *			- when receiving DL_NOTE_LINK_UP/DOWN
625  *			- when receiving DL_NOTE_AGGR_AVAIL/UNAVAIL
626  *			- when LACP mode is changed.
627  *			- when a DL_NOTE_SPEED is received
628  */
629 static void
630 lacp_mux_sm(aggr_port_t *portp)
631 {
632 	aggr_grp_t *aggrp = portp->lp_grp;
633 	boolean_t NTT_updated = B_FALSE;
634 	aggr_lacp_port_t *pl = &portp->lp_lacp;
635 	lacp_mux_state_t oldstate = pl->sm.mux_state;
636 
637 	ASSERT(AGGR_LACP_LOCK_HELD(aggrp));
638 
639 	/* LACP_OFF state not in specification so check here.  */
640 	if (!pl->sm.lacp_on) {
641 		pl->sm.mux_state = LACP_DETACHED;
642 		pl->ActorOperPortState.bit.sync = B_FALSE;
643 
644 		if (pl->ActorOperPortState.bit.collecting ||
645 		    pl->ActorOperPortState.bit.distributing) {
646 			AGGR_LACP_DBG(("trunk link: (%s): "
647 			    "Collector_Distributor Disabled.\n",
648 			    portp->lp_devname));
649 		}
650 
651 		pl->ActorOperPortState.bit.collecting =
652 		    pl->ActorOperPortState.bit.distributing = B_FALSE;
653 		return;
654 	}
655 
656 	if (pl->sm.begin || !pl->sm.lacp_enabled)
657 		pl->sm.mux_state = LACP_DETACHED;
658 
659 again:
660 	/* determine next state, or return if state unchanged */
661 	switch (pl->sm.mux_state) {
662 	case LACP_DETACHED:
663 		if (pl->sm.begin) {
664 			break;
665 		}
666 
667 		if ((pl->sm.selected == AGGR_SELECTED) ||
668 		    (pl->sm.selected == AGGR_STANDBY)) {
669 			pl->sm.mux_state = LACP_WAITING;
670 			break;
671 		}
672 		return;
673 
674 	case LACP_WAITING:
675 		if (pl->sm.selected == AGGR_UNSELECTED) {
676 			pl->sm.mux_state = LACP_DETACHED;
677 			break;
678 		}
679 
680 		if ((pl->sm.selected == AGGR_SELECTED) && aggrp->aggr.ready) {
681 			pl->sm.mux_state = LACP_ATTACHED;
682 			break;
683 		}
684 		return;
685 
686 	case LACP_ATTACHED:
687 		if ((pl->sm.selected == AGGR_UNSELECTED) ||
688 		    (pl->sm.selected == AGGR_STANDBY)) {
689 			pl->sm.mux_state = LACP_DETACHED;
690 			break;
691 		}
692 
693 		if ((pl->sm.selected == AGGR_SELECTED) &&
694 		    pl->PartnerOperPortState.bit.sync) {
695 			pl->sm.mux_state = LACP_COLLECTING_DISTRIBUTING;
696 			break;
697 		}
698 		return;
699 
700 	case LACP_COLLECTING_DISTRIBUTING:
701 		if ((pl->sm.selected == AGGR_UNSELECTED) ||
702 		    (pl->sm.selected == AGGR_STANDBY) ||
703 		    !pl->PartnerOperPortState.bit.sync) {
704 			pl->sm.mux_state = LACP_ATTACHED;
705 			break;
706 		}
707 		return;
708 	}
709 
710 	AGGR_LACP_DBG(("lacp_mux_sm(%s):%s--->%s\n",
711 	    portp->lp_devname, lacp_mux_str[oldstate],
712 	    lacp_mux_str[pl->sm.mux_state]));
713 
714 	/* perform actions on entering a new state */
715 	switch (pl->sm.mux_state) {
716 	case LACP_DETACHED:
717 		if (pl->ActorOperPortState.bit.collecting ||
718 		    pl->ActorOperPortState.bit.distributing) {
719 			AGGR_LACP_DBG(("trunk link: (%s): "
720 			    "Collector_Distributor Disabled.\n",
721 			    portp->lp_devname));
722 		}
723 
724 		pl->ActorOperPortState.bit.sync =
725 		    pl->ActorOperPortState.bit.collecting = B_FALSE;
726 
727 		/* Turn OFF Collector_Distributor */
728 		aggr_set_coll_dist(portp, B_FALSE);
729 
730 		pl->ActorOperPortState.bit.distributing = B_FALSE;
731 		NTT_updated = B_TRUE;
732 		break;
733 
734 	case LACP_WAITING:
735 		start_wait_while_timer(portp);
736 		break;
737 
738 	case LACP_ATTACHED:
739 		if (pl->ActorOperPortState.bit.collecting ||
740 		    pl->ActorOperPortState.bit.distributing) {
741 			AGGR_LACP_DBG(("trunk link: (%s): "
742 			    "Collector_Distributor Disabled.\n",
743 			    portp->lp_devname));
744 		}
745 
746 		pl->ActorOperPortState.bit.sync = B_TRUE;
747 		pl->ActorOperPortState.bit.collecting = B_FALSE;
748 
749 		/* Turn OFF Collector_Distributor */
750 		aggr_set_coll_dist(portp, B_FALSE);
751 
752 		pl->ActorOperPortState.bit.distributing = B_FALSE;
753 		NTT_updated = B_TRUE;
754 		if (pl->PartnerOperPortState.bit.sync) {
755 			/*
756 			 * We had already received an updated sync from
757 			 * the partner. Attempt to transition to
758 			 * collecting/distributing now.
759 			 */
760 			goto again;
761 		}
762 		break;
763 
764 	case LACP_COLLECTING_DISTRIBUTING:
765 		if (!pl->ActorOperPortState.bit.collecting &&
766 		    !pl->ActorOperPortState.bit.distributing) {
767 			AGGR_LACP_DBG(("trunk link: (%s): "
768 			    "Collector_Distributor Enabled.\n",
769 			    portp->lp_devname));
770 		}
771 		pl->ActorOperPortState.bit.distributing = B_TRUE;
772 
773 		/* Turn Collector_Distributor back ON */
774 		aggr_set_coll_dist(portp, B_TRUE);
775 
776 		pl->ActorOperPortState.bit.collecting = B_TRUE;
777 		NTT_updated = B_TRUE;
778 		break;
779 	}
780 
781 	/*
782 	 * If we updated the state of the NTT variable, then
783 	 * initiate a LACPDU transmission.
784 	 */
785 	if (NTT_updated) {
786 		pl->NTT = B_TRUE;
787 		lacp_xmit_sm(portp);
788 	}
789 } /* lacp_mux_sm */
790 
791 
792 static void
793 receive_marker_pdu(aggr_port_t *portp, mblk_t *mp)
794 {
795 	marker_pdu_t		*markerp = (marker_pdu_t *)mp->b_rptr;
796 	const mac_txinfo_t	*mtp;
797 
798 	AGGR_LACP_LOCK(portp->lp_grp);
799 
800 	AGGR_LACP_DBG(("trunk link: (%s): MARKER PDU received:\n",
801 	    portp->lp_devname));
802 
803 	/* LACP_OFF state not in specification so check here.  */
804 	if (!portp->lp_lacp.sm.lacp_on)
805 		goto bail;
806 
807 	if (MBLKL(mp) < sizeof (marker_pdu_t))
808 		goto bail;
809 
810 	if (markerp->version != MARKER_VERSION) {
811 		AGGR_LACP_DBG(("trunk link (%s): Malformed MARKER PDU: "
812 		    "version = %d does not match s/w version %d\n",
813 		    portp->lp_devname, markerp->version, MARKER_VERSION));
814 		goto bail;
815 	}
816 
817 	if (markerp->tlv_marker == MARKER_RESPONSE_TLV) {
818 		/* We do not yet send out MARKER info PDUs */
819 		AGGR_LACP_DBG(("trunk link (%s): MARKER RESPONSE PDU: "
820 		    " MARKER TLV = %d - We don't send out info type!\n",
821 		    portp->lp_devname, markerp->tlv_marker));
822 		goto bail;
823 	}
824 
825 	if (markerp->tlv_marker != MARKER_INFO_TLV) {
826 		AGGR_LACP_DBG(("trunk link (%s): Malformed MARKER PDU: "
827 		    " MARKER TLV = %d \n", portp->lp_devname,
828 		    markerp->tlv_marker));
829 		goto bail;
830 	}
831 
832 	if (markerp->marker_len != MARKER_INFO_RESPONSE_LENGTH) {
833 		AGGR_LACP_DBG(("trunk link (%s): Malformed MARKER PDU: "
834 		    " MARKER length = %d \n", portp->lp_devname,
835 		    markerp->marker_len));
836 		goto bail;
837 	}
838 
839 	if (markerp->requestor_port != portp->lp_lacp.PartnerOperPortNum) {
840 		AGGR_LACP_DBG(("trunk link (%s): MARKER PDU: "
841 		    " MARKER Port %d not equal to Partner port %d\n",
842 		    portp->lp_devname, markerp->requestor_port,
843 		    portp->lp_lacp.PartnerOperPortNum));
844 		goto bail;
845 	}
846 
847 	if (ether_cmp(&markerp->system_id,
848 	    &portp->lp_lacp.PartnerOperSystem) != 0) {
849 		AGGR_LACP_DBG(("trunk link (%s): MARKER PDU: "
850 		    " MARKER MAC not equal to Partner MAC\n",
851 		    portp->lp_devname));
852 		goto bail;
853 	}
854 
855 	/*
856 	 * Turn into Marker Response PDU
857 	 * and return mblk to sending system
858 	 */
859 	markerp->tlv_marker = MARKER_RESPONSE_TLV;
860 
861 	/* reuse the space that was used by received ethernet header */
862 	ASSERT(MBLKHEAD(mp) >= sizeof (struct ether_header));
863 	mp->b_rptr -= sizeof (struct ether_header);
864 	fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr);
865 	AGGR_LACP_UNLOCK(portp->lp_grp);
866 
867 	/*
868 	 * Store the transmit info pointer locally in case it changes between
869 	 * loading mt_fn and mt_arg.
870 	 */
871 	mtp = portp->lp_txinfo;
872 	mtp->mt_fn(mtp->mt_arg, mp);
873 	return;
874 
875 bail:
876 	AGGR_LACP_UNLOCK(portp->lp_grp);
877 	freemsg(mp);
878 }
879 
880 
881 /*
882  * Update the LACP mode (off, active, or passive) of the specified group.
883  */
884 void
885 aggr_lacp_update_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode)
886 {
887 	aggr_lacp_mode_t old_mode = grp->lg_lacp_mode;
888 	aggr_port_t *port;
889 
890 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
891 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
892 
893 	if (mode == old_mode)
894 		return;
895 
896 	grp->lg_lacp_mode = mode;
897 
898 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
899 		port->lp_lacp.ActorAdminPortState.bit.activity =
900 		    port->lp_lacp.ActorOperPortState.bit.activity =
901 		    (mode == AGGR_LACP_ACTIVE);
902 
903 		if (old_mode == AGGR_LACP_OFF) {
904 			/* OFF -> {PASSIVE,ACTIVE} */
905 			/* turn OFF Collector_Distributor */
906 			aggr_set_coll_dist(port, B_FALSE);
907 			rw_enter(&port->lp_lock, RW_WRITER);
908 			lacp_on(port);
909 			if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
910 				aggr_lacp_port_attached(port);
911 			rw_exit(&port->lp_lock);
912 		} else if (mode == AGGR_LACP_OFF) {
913 			/* {PASSIVE,ACTIVE} -> OFF */
914 			rw_enter(&port->lp_lock, RW_WRITER);
915 			lacp_off(port);
916 			rw_exit(&port->lp_lock);
917 			if (!grp->lg_closing) {
918 				/* Turn ON Collector_Distributor */
919 				aggr_set_coll_dist(port, B_TRUE);
920 			}
921 		} else {
922 			/* PASSIVE->ACTIVE or ACTIVE->PASSIVE */
923 			port->lp_lacp.sm.begin = B_TRUE;
924 			lacp_mux_sm(port);
925 			lacp_periodic_sm(port);
926 
927 			/* kick off state machines */
928 			lacp_receive_sm(port, NULL);
929 			lacp_mux_sm(port);
930 		}
931 
932 		if (grp->lg_closing)
933 			break;
934 	}
935 }
936 
937 
938 /*
939  * Update the LACP timer (short or long) of the specified group.
940  */
941 void
942 aggr_lacp_update_timer(aggr_grp_t *grp, aggr_lacp_timer_t timer)
943 {
944 	aggr_port_t *port;
945 
946 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
947 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
948 
949 	if (timer == grp->aggr.PeriodicTimer)
950 		return;
951 
952 	grp->aggr.PeriodicTimer = timer;
953 
954 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
955 		port->lp_lacp.ActorAdminPortState.bit.timeout =
956 		    port->lp_lacp.ActorOperPortState.bit.timeout =
957 		    (timer == AGGR_LACP_TIMER_SHORT);
958 	}
959 }
960 
961 
962 /*
963  * Sets the initial LACP mode (off, active, passive) and LACP timer
964  * (short, long) of the specified group.
965  */
966 void
967 aggr_lacp_set_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode,
968     aggr_lacp_timer_t timer)
969 {
970 	aggr_port_t *port;
971 
972 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
973 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
974 
975 	grp->lg_lacp_mode = mode;
976 	grp->aggr.PeriodicTimer = timer;
977 
978 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
979 		port->lp_lacp.ActorAdminPortState.bit.activity =
980 		    port->lp_lacp.ActorOperPortState.bit.activity =
981 		    (mode == AGGR_LACP_ACTIVE);
982 
983 		port->lp_lacp.ActorAdminPortState.bit.timeout =
984 		    port->lp_lacp.ActorOperPortState.bit.timeout =
985 		    (timer == AGGR_LACP_TIMER_SHORT);
986 
987 		if (grp->lg_lacp_mode == AGGR_LACP_OFF) {
988 			/* Turn ON Collector_Distributor */
989 			aggr_set_coll_dist(port, B_TRUE);
990 		} else { /* LACP_ACTIVE/PASSIVE */
991 			rw_enter(&port->lp_lock, RW_WRITER);
992 			lacp_on(port);
993 			rw_exit(&port->lp_lock);
994 		}
995 	}
996 }
997 
998 /*
999  * Verify that the Partner MAC and Key recorded by the specified
1000  * port are not found in other ports that are not part of our
1001  * aggregation. Returns B_TRUE if such a port is found, B_FALSE
1002  * otherwise.
1003  */
1004 static boolean_t
1005 lacp_misconfig_check(aggr_port_t *portp)
1006 {
1007 	aggr_grp_t *grp = portp->lp_grp;
1008 	lacp_sel_ports_t *cport;
1009 
1010 	mutex_enter(&lacp_sel_lock);
1011 
1012 	for (cport = sel_ports; cport != NULL; cport = cport->sp_next) {
1013 
1014 		/* skip entries of the group of the port being checked */
1015 		if (cport->sp_key == grp->lg_key)
1016 			continue;
1017 
1018 		if ((ether_cmp(&cport->sp_partner_system,
1019 		    &grp->aggr.PartnerSystem) == 0) &&
1020 		    (cport->sp_partner_key == grp->aggr.PartnerOperAggrKey)) {
1021 			char mac_str[ETHERADDRL*3];
1022 			struct ether_addr *mac = &cport->sp_partner_system;
1023 
1024 			/*
1025 			 * The Partner port information is already in use
1026 			 * by ports in another aggregation so disable this
1027 			 * port.
1028 			 */
1029 
1030 			(void) snprintf(mac_str, sizeof (mac_str),
1031 			    "%x:%x:%x:%x:%x:%x",
1032 			    mac->ether_addr_octet[0], mac->ether_addr_octet[1],
1033 			    mac->ether_addr_octet[2], mac->ether_addr_octet[3],
1034 			    mac->ether_addr_octet[4], mac->ether_addr_octet[5]);
1035 
1036 			portp->lp_lacp.sm.selected = AGGR_UNSELECTED;
1037 			cmn_err(CE_NOTE, "aggr key %d port %s: Port Partner "
1038 			    "MAC %s and key %d in use on aggregation "
1039 			    "key %d port %s\n", grp->lg_key,
1040 			    portp->lp_devname, mac_str,
1041 			    portp->lp_lacp.PartnerOperKey, cport->sp_key,
1042 			    cport->sp_devname);
1043 			break;
1044 		}
1045 	}
1046 
1047 	mutex_exit(&lacp_sel_lock);
1048 	return (cport != NULL);
1049 }
1050 
1051 /*
1052  * Remove the specified port from the list of selected ports.
1053  */
1054 static void
1055 lacp_sel_ports_del(aggr_port_t *portp)
1056 {
1057 	lacp_sel_ports_t *cport, **prev = NULL;
1058 
1059 	mutex_enter(&lacp_sel_lock);
1060 
1061 	prev = &sel_ports;
1062 	for (cport = sel_ports; cport != NULL; prev = &cport->sp_next,
1063 	    cport = cport->sp_next) {
1064 		if (bcmp(portp->lp_devname, cport->sp_devname,
1065 		    MAXNAMELEN + 1) == 0) {
1066 			break;
1067 		}
1068 	}
1069 
1070 	if (cport == NULL) {
1071 		mutex_exit(&lacp_sel_lock);
1072 		return;
1073 	}
1074 
1075 	*prev = cport->sp_next;
1076 	kmem_free(cport, sizeof (*cport));
1077 
1078 	mutex_exit(&lacp_sel_lock);
1079 }
1080 
1081 /*
1082  * Add the specified port to the list of selected ports. Returns B_FALSE
1083  * if the operation could not be performed due to an memory allocation
1084  * error.
1085  */
1086 static boolean_t
1087 lacp_sel_ports_add(aggr_port_t *portp)
1088 {
1089 	lacp_sel_ports_t *new_port;
1090 	lacp_sel_ports_t *cport, **last;
1091 
1092 	mutex_enter(&lacp_sel_lock);
1093 
1094 	/* check if port is already in the list */
1095 	last = &sel_ports;
1096 	for (cport = sel_ports; cport != NULL;
1097 	    last = &cport->sp_next, cport = cport->sp_next) {
1098 		if (bcmp(portp->lp_devname, cport->sp_devname,
1099 		    MAXNAMELEN + 1) == 0) {
1100 			ASSERT(cport->sp_partner_key ==
1101 			    portp->lp_lacp.PartnerOperKey);
1102 			ASSERT(ether_cmp(&cport->sp_partner_system,
1103 			    &portp->lp_lacp.PartnerOperSystem) == 0);
1104 
1105 			mutex_exit(&lacp_sel_lock);
1106 			return (B_TRUE);
1107 		}
1108 	}
1109 
1110 	/* create and initialize new entry */
1111 	new_port = kmem_zalloc(sizeof (lacp_sel_ports_t), KM_NOSLEEP);
1112 	if (new_port == NULL) {
1113 		mutex_exit(&lacp_sel_lock);
1114 		return (B_FALSE);
1115 	}
1116 
1117 	new_port->sp_key = portp->lp_grp->lg_key;
1118 	bcopy(&portp->lp_lacp.PartnerOperSystem,
1119 	    &new_port->sp_partner_system, sizeof (new_port->sp_partner_system));
1120 	new_port->sp_partner_key = portp->lp_lacp.PartnerOperKey;
1121 	bcopy(portp->lp_devname, new_port->sp_devname, MAXNAMELEN + 1);
1122 
1123 	*last = new_port;
1124 
1125 	mutex_exit(&lacp_sel_lock);
1126 	return (B_TRUE);
1127 }
1128 
1129 /*
1130  * lacp_selection_logic - LACP selection logic
1131  *		Sets the selected variable on a per port basis
1132  *		and sets Ready when all waiting ports are ready
1133  *		to go online.
1134  *
1135  * parameters:
1136  *      - portp - instance this applies to.
1137  *
1138  * invoked:
1139  *    - when initialization is needed
1140  *    - when UNSELECTED is set from the lacp_receive_sm() in LACP_CURRENT state
1141  *    - When the lacp_receive_sm goes to the LACP_DEFAULTED state
1142  *    - every time the wait_while_timer pops
1143  *    - everytime we turn LACP on/off
1144  */
1145 static void
1146 lacp_selection_logic(aggr_port_t *portp)
1147 {
1148 	aggr_port_t *tpp;
1149 	aggr_grp_t *aggrp = portp->lp_grp;
1150 	int ports_waiting;
1151 	boolean_t reset_mac = B_FALSE;
1152 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1153 
1154 	ASSERT(AGGR_LACP_LOCK_HELD(aggrp));
1155 
1156 	/* LACP_OFF state not in specification so check here.  */
1157 	if (!pl->sm.lacp_on) {
1158 		lacp_port_unselect(portp);
1159 		aggrp->aggr.ready = B_FALSE;
1160 		lacp_mux_sm(portp);
1161 		return;
1162 	}
1163 
1164 	if (pl->sm.begin || !pl->sm.lacp_enabled ||
1165 	    (portp->lp_state != AGGR_PORT_STATE_ATTACHED)) {
1166 
1167 		AGGR_LACP_DBG(("lacp_selection_logic:(%s): "
1168 		    "selected %d-->%d (begin=%d, lacp_enabled = %d, "
1169 		    "lp_state=%d)\n", portp->lp_devname, pl->sm.selected,
1170 		    AGGR_UNSELECTED, pl->sm.begin, pl->sm.lacp_enabled,
1171 		    portp->lp_state));
1172 
1173 		lacp_port_unselect(portp);
1174 		aggrp->aggr.ready = B_FALSE;
1175 		lacp_mux_sm(portp);
1176 		return;
1177 	}
1178 
1179 	/*
1180 	 * If LACP is not enabled then selected is never set.
1181 	 */
1182 	if (!pl->sm.lacp_enabled) {
1183 		AGGR_LACP_DBG(("lacp_selection_logic:(%s): selected %d-->%d\n",
1184 		    portp->lp_devname, pl->sm.selected, AGGR_UNSELECTED));
1185 
1186 		lacp_port_unselect(portp);
1187 		lacp_mux_sm(portp);
1188 		return;
1189 	}
1190 
1191 	/*
1192 	 * Check if the Partner MAC or Key are zero. If so, we have
1193 	 * not received any LACP info or it has expired and the
1194 	 * receive machine is in the LACP_DEFAULTED state.
1195 	 */
1196 	if (ether_cmp(&pl->PartnerOperSystem, &etherzeroaddr) == 0 ||
1197 	    (pl->PartnerOperKey == 0)) {
1198 
1199 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1200 			if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1201 			    &etherzeroaddr) != 0 &&
1202 			    (tpp->lp_lacp.PartnerOperKey != 0))
1203 				break;
1204 		}
1205 
1206 		/*
1207 		 * If all ports have no key or aggregation address,
1208 		 * then clear the negotiated Partner MAC and key.
1209 		 */
1210 		if (tpp == NULL) {
1211 			/* Clear the aggregation Partner MAC and key */
1212 			aggrp->aggr.PartnerSystem = etherzeroaddr;
1213 			aggrp->aggr.PartnerOperAggrKey = 0;
1214 		}
1215 
1216 		return;
1217 	}
1218 
1219 	/*
1220 	 * Insure that at least one port in the aggregation
1221 	 * matches the Partner aggregation MAC and key. If not,
1222 	 * then clear the aggregation MAC and key. Later we will
1223 	 * set the Partner aggregation MAC and key to that of the
1224 	 * current port's Partner MAC and key.
1225 	 */
1226 	if (ether_cmp(&pl->PartnerOperSystem,
1227 	    &aggrp->aggr.PartnerSystem) != 0 ||
1228 	    (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) {
1229 
1230 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1231 			if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1232 			    &aggrp->aggr.PartnerSystem) == 0 &&
1233 			    (tpp->lp_lacp.PartnerOperKey ==
1234 			    aggrp->aggr.PartnerOperAggrKey))
1235 				break;
1236 		}
1237 
1238 		if (tpp == NULL) {
1239 			/* Clear the aggregation Partner MAC and key */
1240 			aggrp->aggr.PartnerSystem = etherzeroaddr;
1241 			aggrp->aggr.PartnerOperAggrKey = 0;
1242 			reset_mac = B_TRUE;
1243 		}
1244 	}
1245 
1246 	/*
1247 	 * If our Actor MAC is found in the Partner MAC
1248 	 * on this port then we have a loopback misconfiguration.
1249 	 */
1250 	if (ether_cmp(&pl->PartnerOperSystem,
1251 	    (struct ether_addr *)&aggrp->lg_addr) == 0) {
1252 		cmn_err(CE_NOTE, "trunk link: (%s): Loopback condition.\n",
1253 		    portp->lp_devname);
1254 
1255 		lacp_port_unselect(portp);
1256 		lacp_mux_sm(portp);
1257 		return;
1258 	}
1259 
1260 	/*
1261 	 * If our Partner MAC and Key are found on any other
1262 	 * ports that are not in our aggregation, we have
1263 	 * a misconfiguration.
1264 	 */
1265 	if (lacp_misconfig_check(portp)) {
1266 		lacp_mux_sm(portp);
1267 		return;
1268 	}
1269 
1270 	/*
1271 	 * If the Aggregation Partner MAC and Key have not been
1272 	 * set, then this is either the first port or the aggregation
1273 	 * MAC and key have been reset. In either case we must set
1274 	 * the values of the Partner MAC and key.
1275 	 */
1276 	if (ether_cmp(&aggrp->aggr.PartnerSystem, &etherzeroaddr) == 0 &&
1277 	    (aggrp->aggr.PartnerOperAggrKey == 0)) {
1278 		/* Set aggregation Partner MAC and key */
1279 		aggrp->aggr.PartnerSystem = pl->PartnerOperSystem;
1280 		aggrp->aggr.PartnerOperAggrKey = pl->PartnerOperKey;
1281 
1282 		/*
1283 		 * If we reset Partner aggregation MAC, then restart
1284 		 * selection_logic on ports that match new MAC address.
1285 		 */
1286 		if (reset_mac) {
1287 			for (tpp = aggrp->lg_ports; tpp; tpp =
1288 			    tpp->lp_next) {
1289 				if (tpp == portp)
1290 					continue;
1291 				if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1292 				    &aggrp->aggr.PartnerSystem) == 0 &&
1293 				    (tpp->lp_lacp.PartnerOperKey ==
1294 				    aggrp->aggr.PartnerOperAggrKey))
1295 					lacp_selection_logic(tpp);
1296 			}
1297 		}
1298 	} else if (ether_cmp(&pl->PartnerOperSystem,
1299 	    &aggrp->aggr.PartnerSystem) != 0 ||
1300 	    (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) {
1301 		/*
1302 		 * The Partner port information does not match
1303 		 * that of the other ports in the aggregation
1304 		 * so disable this port.
1305 		 */
1306 		lacp_port_unselect(portp);
1307 
1308 		cmn_err(CE_NOTE, "trunk link: (%s): Port Partner MAC or"
1309 		    " key (%d) incompatible with Aggregation Partner "
1310 		    "MAC or key (%d)\n",
1311 		    portp->lp_devname, pl->PartnerOperKey,
1312 		    aggrp->aggr.PartnerOperAggrKey);
1313 
1314 		lacp_mux_sm(portp);
1315 		return;
1316 	}
1317 
1318 	/* If we get to here, automatically set selected */
1319 	if (pl->sm.selected != AGGR_SELECTED) {
1320 		AGGR_LACP_DBG(("lacp_selection_logic:(%s): "
1321 		    "selected %d-->%d\n", portp->lp_devname,
1322 		    pl->sm.selected, AGGR_SELECTED));
1323 		if (!lacp_port_select(portp))
1324 			return;
1325 		lacp_mux_sm(portp);
1326 	}
1327 
1328 	/*
1329 	 * From this point onward we have selected the port
1330 	 * and are simply checking if the Ready flag should
1331 	 * be set.
1332 	 */
1333 
1334 	/*
1335 	 * If at least two ports are waiting to aggregate
1336 	 * and ready_n is set on all ports waiting to aggregate
1337 	 * then set READY for the aggregation.
1338 	 */
1339 
1340 	ports_waiting = 0;
1341 
1342 	if (!aggrp->aggr.ready) {
1343 		/*
1344 		 * If all ports in the aggregation have received compatible
1345 		 * partner information and they match up correctly with the
1346 		 * switch, there is no need to wait for all the
1347 		 * wait_while_timers to pop.
1348 		 */
1349 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1350 			if (((tpp->lp_lacp.sm.mux_state == LACP_WAITING) ||
1351 			    tpp->lp_lacp.sm.begin) &&
1352 			    !pl->PartnerOperPortState.bit.sync) {
1353 				/* Add up ports uninitialized or waiting */
1354 				ports_waiting++;
1355 				if (!tpp->lp_lacp.sm.ready_n)
1356 					return;
1357 			}
1358 		}
1359 	}
1360 
1361 	if (aggrp->aggr.ready) {
1362 		AGGR_LACP_DBG(("lacp_selection_logic:(%s): "
1363 		    "aggr.ready already set\n", portp->lp_devname));
1364 		lacp_mux_sm(portp);
1365 	} else {
1366 		AGGR_LACP_DBG(("lacp_selection_logic:(%s): Ready %d-->%d\n",
1367 		    portp->lp_devname, aggrp->aggr.ready, B_TRUE));
1368 		aggrp->aggr.ready = B_TRUE;
1369 
1370 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next)
1371 			lacp_mux_sm(tpp);
1372 	}
1373 
1374 }
1375 
1376 /*
1377  * wait_while_timer_pop - When the timer pops, we arrive here to
1378  *			set ready_n and trigger the selection logic.
1379  */
1380 static void
1381 wait_while_timer_pop(void *data)
1382 {
1383 	aggr_port_t *portp = data;
1384 
1385 	if (portp->lp_closing)
1386 		return;
1387 
1388 	AGGR_LACP_LOCK(portp->lp_grp);
1389 
1390 	AGGR_LACP_DBG(("trunk link:(%s): wait_while_timer pop \n",
1391 	    portp->lp_devname));
1392 	portp->lp_lacp.wait_while_timer.id = 0;
1393 	portp->lp_lacp.sm.ready_n = B_TRUE;
1394 
1395 	lacp_selection_logic(portp);
1396 	AGGR_LACP_UNLOCK(portp->lp_grp);
1397 }
1398 
1399 static void
1400 start_wait_while_timer(aggr_port_t *portp)
1401 {
1402 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1403 
1404 	if (portp->lp_lacp.wait_while_timer.id == 0) {
1405 		portp->lp_lacp.wait_while_timer.id =
1406 		    timeout(wait_while_timer_pop, portp,
1407 		    drv_usectohz(1000000 *
1408 		    portp->lp_lacp.wait_while_timer.val));
1409 	}
1410 }
1411 
1412 
1413 static void
1414 stop_wait_while_timer(portp)
1415 aggr_port_t *portp;
1416 {
1417 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1418 
1419 	if (portp->lp_lacp.wait_while_timer.id != 0) {
1420 		AGGR_LACP_UNLOCK(portp->lp_grp);
1421 		(void) untimeout(portp->lp_lacp.wait_while_timer.id);
1422 		AGGR_LACP_LOCK(portp->lp_grp);
1423 		portp->lp_lacp.wait_while_timer.id = 0;
1424 	}
1425 }
1426 
1427 /*
1428  * Invoked when a port has been attached to a group.
1429  * Complete the processing that couldn't be finished from lacp_on()
1430  * because the port was not started. We know that the link is full
1431  * duplex and ON, otherwise it wouldn't be attached.
1432  */
1433 void
1434 aggr_lacp_port_attached(aggr_port_t *portp)
1435 {
1436 	aggr_grp_t *grp = portp->lp_grp;
1437 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1438 
1439 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
1440 	ASSERT(portp->lp_state == AGGR_PORT_STATE_ATTACHED);
1441 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
1442 
1443 	AGGR_LACP_DBG(("aggr_lacp_port_attached: port %s\n",
1444 	    portp->lp_devname));
1445 
1446 	portp->lp_lacp.sm.port_enabled = B_TRUE;	/* link on */
1447 
1448 	if (grp->lg_lacp_mode == AGGR_LACP_OFF) {
1449 		pl->ActorAdminPortState.bit.activity =
1450 		    pl->ActorOperPortState.bit.activity = B_FALSE;
1451 
1452 		/* Turn ON Collector_Distributor */
1453 		aggr_set_coll_dist_locked(portp, B_TRUE);
1454 
1455 		return;
1456 	}
1457 
1458 	pl->ActorAdminPortState.bit.activity =
1459 	    pl->ActorOperPortState.bit.activity =
1460 	    (grp->lg_lacp_mode == AGGR_LACP_ACTIVE);
1461 
1462 	pl->ActorAdminPortState.bit.timeout =
1463 	    pl->ActorOperPortState.bit.timeout =
1464 	    (grp->aggr.PeriodicTimer == AGGR_LACP_TIMER_SHORT);
1465 
1466 	pl->sm.lacp_enabled = B_TRUE;
1467 	pl->ActorOperPortState.bit.aggregation = B_TRUE;
1468 	pl->sm.begin = B_TRUE;
1469 
1470 	if (!pl->sm.lacp_on) {
1471 		/* Turn OFF Collector_Distributor */
1472 		aggr_set_coll_dist_locked(portp, B_FALSE);
1473 
1474 		lacp_on(portp);
1475 	} else {
1476 		lacp_receive_sm(portp, NULL);
1477 		lacp_mux_sm(portp);
1478 
1479 		/* Enable Multicast Slow Protocol address */
1480 		aggr_lacp_mcast_on(portp);
1481 
1482 		/* periodic_sm is started up from the receive machine */
1483 		lacp_selection_logic(portp);
1484 	}
1485 }
1486 
1487 /*
1488  * Invoked when a port has been detached from a group. Turn off
1489  * LACP processing if it was enabled.
1490  */
1491 void
1492 aggr_lacp_port_detached(aggr_port_t *portp)
1493 {
1494 	aggr_grp_t *grp = portp->lp_grp;
1495 
1496 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
1497 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
1498 
1499 	AGGR_LACP_DBG(("aggr_lacp_port_detached: port %s\n",
1500 	    portp->lp_devname));
1501 
1502 	portp->lp_lacp.sm.port_enabled = B_FALSE;
1503 
1504 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
1505 		return;
1506 
1507 	/* Disable Slow Protocol PDUs */
1508 	lacp_off(portp);
1509 }
1510 
1511 
1512 /*
1513  * Invoked after the outbound port selection policy has been changed.
1514  */
1515 void
1516 aggr_lacp_policy_changed(aggr_grp_t *grp)
1517 {
1518 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
1519 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1520 
1521 	/* suspend transmission for CollectorMaxDelay time */
1522 	delay(grp->aggr.CollectorMaxDelay * 10);
1523 }
1524 
1525 
1526 /*
1527  * Enable Slow Protocol LACP and Marker PDUs.
1528  */
1529 static void
1530 lacp_on(aggr_port_t *portp)
1531 {
1532 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1533 	ASSERT(RW_WRITE_HELD(&portp->lp_grp->lg_lock));
1534 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
1535 
1536 	/*
1537 	 * Reset the state machines and Partner operational
1538 	 * information. Careful to not reset things like
1539 	 * our link state.
1540 	 */
1541 	lacp_reset_port(portp);
1542 	portp->lp_lacp.sm.lacp_on = B_TRUE;
1543 
1544 	AGGR_LACP_DBG(("lacp_on:(%s): \n", portp->lp_devname));
1545 
1546 	lacp_receive_sm(portp, NULL);
1547 	lacp_mux_sm(portp);
1548 
1549 	if (portp->lp_state != AGGR_PORT_STATE_ATTACHED)
1550 		return;
1551 
1552 	/* Enable Multicast Slow Protocol address */
1553 	aggr_lacp_mcast_on(portp);
1554 
1555 	/* periodic_sm is started up from the receive machine */
1556 	lacp_selection_logic(portp);
1557 } /* lacp_on */
1558 
1559 
1560 /* Disable Slow Protocol LACP and Marker PDUs */
1561 static void
1562 lacp_off(aggr_port_t *portp)
1563 {
1564 	aggr_grp_t *grp = portp->lp_grp;
1565 
1566 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1567 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1568 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
1569 
1570 	portp->lp_lacp.sm.lacp_on = B_FALSE;
1571 
1572 	AGGR_LACP_DBG(("lacp_off:(%s): \n", portp->lp_devname));
1573 
1574 	/*
1575 	 * Disable Slow Protocol Timers. We must temporarely release
1576 	 * the group and port locks in order to avod deadlocks. Make
1577 	 * sure that the port nor the group are closing after re-acquiring
1578 	 * their locks.
1579 	 */
1580 	rw_exit(&portp->lp_lock);
1581 	rw_exit(&grp->lg_lock);
1582 
1583 	stop_periodic_timer(portp);
1584 	stop_current_while_timer(portp);
1585 	stop_wait_while_timer(portp);
1586 
1587 	rw_enter(&grp->lg_lock, RW_WRITER);
1588 	rw_enter(&portp->lp_lock, RW_WRITER);
1589 
1590 	if (!portp->lp_closing && !grp->lg_closing) {
1591 		lacp_mux_sm(portp);
1592 		lacp_periodic_sm(portp);
1593 		lacp_selection_logic(portp);
1594 	}
1595 
1596 	/* Turn OFF Collector_Distributor */
1597 	aggr_set_coll_dist_locked(portp, B_FALSE);
1598 
1599 	/* Disable Multicast Slow Protocol address */
1600 	aggr_lacp_mcast_off(portp);
1601 
1602 	lacp_reset_port(portp);
1603 }
1604 
1605 
1606 static boolean_t
1607 valid_lacp_pdu(aggr_port_t *portp, lacp_t *lacp)
1608 {
1609 	/*
1610 	 * 43.4.12 - "a Receive machine shall not validate
1611 	 * the Version Number, TLV_type, or Reserved fields in received
1612 	 * LACPDUs."
1613 	 * ... "a Receive machine may validate the Actor_Information_Length,
1614 	 * Partner_Information_Length, Collector_Information_Length,
1615 	 * or Terminator_Length fields."
1616 	 */
1617 	if ((lacp->actor_info.information_len != sizeof (link_info_t)) ||
1618 	    (lacp->partner_info.information_len != sizeof (link_info_t)) ||
1619 	    (lacp->collector_len != LACP_COLLECTOR_INFO_LEN) ||
1620 	    (lacp->terminator_len != LACP_TERMINATOR_INFO_LEN)) {
1621 		AGGR_LACP_DBG(("trunk link (%s): Malformed LACPDU: "
1622 		    " Terminator Length = %d \n", portp->lp_devname,
1623 		    lacp->terminator_len));
1624 		return (B_FALSE);
1625 	}
1626 
1627 	return (B_TRUE);
1628 }
1629 
1630 
1631 static void
1632 start_current_while_timer(aggr_port_t *portp, uint_t time)
1633 {
1634 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1635 
1636 	if (portp->lp_lacp.current_while_timer.id == 0) {
1637 		if (time > 0) {
1638 			portp->lp_lacp.current_while_timer.val = time;
1639 		} else if (portp->lp_lacp.ActorOperPortState.bit.timeout) {
1640 			portp->lp_lacp.current_while_timer.val =
1641 			    SHORT_TIMEOUT_TIME;
1642 		} else {
1643 			portp->lp_lacp.current_while_timer.val =
1644 			    LONG_TIMEOUT_TIME;
1645 		}
1646 
1647 		portp->lp_lacp.current_while_timer.id =
1648 		    timeout(current_while_timer_pop, portp,
1649 		    drv_usectohz((clock_t)1000000 *
1650 		    (clock_t)portp->lp_lacp.current_while_timer.val));
1651 	}
1652 }
1653 
1654 
1655 static void
1656 stop_current_while_timer(aggr_port_t *portp)
1657 {
1658 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1659 
1660 	if (portp->lp_lacp.current_while_timer.id != 0) {
1661 		AGGR_LACP_UNLOCK(portp->lp_grp);
1662 		(void) untimeout(portp->lp_lacp.current_while_timer.id);
1663 		AGGR_LACP_LOCK(portp->lp_grp);
1664 		portp->lp_lacp.current_while_timer.id = 0;
1665 	}
1666 }
1667 
1668 
1669 static void
1670 current_while_timer_pop(void *data)
1671 {
1672 	aggr_port_t *portp = (aggr_port_t *)data;
1673 
1674 	if (portp->lp_closing)
1675 		return;
1676 
1677 	AGGR_LACP_LOCK(portp->lp_grp);
1678 
1679 	AGGR_LACP_DBG(("trunk link:(%s): current_while_timer "
1680 	    "pop id=%p\n", portp->lp_devname,
1681 	    portp->lp_lacp.current_while_timer.id));
1682 
1683 	portp->lp_lacp.current_while_timer.id = 0;
1684 	lacp_receive_sm(portp, NULL);
1685 	AGGR_LACP_UNLOCK(portp->lp_grp);
1686 }
1687 
1688 
1689 /*
1690  * record_Default - Simply copies over administrative values
1691  * to the partner operational values, and sets our state to indicate we
1692  * are using defaulted values.
1693  */
1694 static void
1695 record_Default(aggr_port_t *portp)
1696 {
1697 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1698 
1699 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1700 
1701 	pl->PartnerOperPortNum = pl->PartnerAdminPortNum;
1702 	pl->PartnerOperPortPriority = pl->PartnerAdminPortPriority;
1703 	pl->PartnerOperSystem = pl->PartnerAdminSystem;
1704 	pl->PartnerOperSysPriority = pl->PartnerAdminSysPriority;
1705 	pl->PartnerOperKey = pl->PartnerAdminKey;
1706 	pl->PartnerOperPortState.state = pl->PartnerAdminPortState.state;
1707 
1708 	pl->ActorOperPortState.bit.defaulted = B_TRUE;
1709 }
1710 
1711 
1712 /* Returns B_TRUE on sync value changing */
1713 static boolean_t
1714 record_PDU(aggr_port_t *portp, lacp_t *lacp)
1715 {
1716 	aggr_grp_t *aggrp = portp->lp_grp;
1717 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1718 	uint8_t save_sync;
1719 
1720 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1721 
1722 	/*
1723 	 * Partner Information
1724 	 */
1725 	pl->PartnerOperPortNum = ntohs(lacp->actor_info.port);
1726 	pl->PartnerOperPortPriority =
1727 	    ntohs(lacp->actor_info.port_priority);
1728 	pl->PartnerOperSystem = lacp->actor_info.system_id;
1729 	pl->PartnerOperSysPriority =
1730 	    htons(lacp->actor_info.system_priority);
1731 	pl->PartnerOperKey = ntohs(lacp->actor_info.key);
1732 
1733 	/* All state info except for Synchronization */
1734 	save_sync = pl->PartnerOperPortState.bit.sync;
1735 	pl->PartnerOperPortState.state = lacp->actor_info.state.state;
1736 
1737 	/* Defaulted set to FALSE */
1738 	pl->ActorOperPortState.bit.defaulted = B_FALSE;
1739 
1740 	/*
1741 	 * 43.4.9 - (Partner_Port, Partner_Port_Priority, Partner_system,
1742 	 *		Partner_System_Priority, Partner_Key, and
1743 	 *		Partner_State.Aggregation) are compared to the
1744 	 *		corresponding operations paramters values for
1745 	 *		the Actor. If these are equal, or if this is
1746 	 *		an individual link, we are synchronized.
1747 	 */
1748 	if (((ntohs(lacp->partner_info.port) == pl->ActorPortNumber) &&
1749 	    (ntohs(lacp->partner_info.port_priority) ==
1750 	    pl->ActorPortPriority) &&
1751 	    (ether_cmp(&lacp->partner_info.system_id,
1752 	    (struct ether_addr *)&aggrp->lg_addr) == 0) &&
1753 	    (ntohs(lacp->partner_info.system_priority) ==
1754 	    aggrp->aggr.ActorSystemPriority) &&
1755 	    (ntohs(lacp->partner_info.key) == pl->ActorOperPortKey) &&
1756 	    (lacp->partner_info.state.bit.aggregation ==
1757 	    pl->ActorOperPortState.bit.aggregation)) ||
1758 	    (!lacp->actor_info.state.bit.aggregation)) {
1759 
1760 		pl->PartnerOperPortState.bit.sync =
1761 		    lacp->actor_info.state.bit.sync;
1762 	} else {
1763 		pl->PartnerOperPortState.bit.sync = B_FALSE;
1764 	}
1765 
1766 	if (save_sync != pl->PartnerOperPortState.bit.sync) {
1767 		AGGR_LACP_DBG(("record_PDU:(%s): partner sync "
1768 		    "%d -->%d\n", portp->lp_devname, save_sync,
1769 		    pl->PartnerOperPortState.bit.sync));
1770 		return (B_TRUE);
1771 	} else {
1772 		return (B_FALSE);
1773 	}
1774 }
1775 
1776 
1777 /*
1778  * update_selected - If any of the Partner parameters has
1779  *			changed from a previous value, then
1780  *			unselect the link from the aggregator.
1781  */
1782 static boolean_t
1783 update_selected(aggr_port_t *portp, lacp_t *lacp)
1784 {
1785 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1786 
1787 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1788 
1789 	if ((pl->PartnerOperPortNum != ntohs(lacp->actor_info.port)) ||
1790 	    (pl->PartnerOperPortPriority !=
1791 	    ntohs(lacp->actor_info.port_priority)) ||
1792 	    (ether_cmp(&pl->PartnerOperSystem,
1793 	    &lacp->actor_info.system_id) != 0) ||
1794 	    (pl->PartnerOperSysPriority !=
1795 	    ntohs(lacp->actor_info.system_priority)) ||
1796 	    (pl->PartnerOperKey != ntohs(lacp->actor_info.key)) ||
1797 	    (pl->PartnerOperPortState.bit.aggregation !=
1798 	    lacp->actor_info.state.bit.aggregation)) {
1799 		AGGR_LACP_DBG(("update_selected:(%s): "
1800 		    "selected  %d-->%d\n", portp->lp_devname, pl->sm.selected,
1801 		    AGGR_UNSELECTED));
1802 
1803 		lacp_port_unselect(portp);
1804 		return (B_TRUE);
1805 	} else {
1806 		return (B_FALSE);
1807 	}
1808 }
1809 
1810 
1811 /*
1812  * update_default_selected - If any of the operational Partner parameters
1813  *			is different than that of the administrative values
1814  *			then unselect the link from the aggregator.
1815  */
1816 static void
1817 update_default_selected(aggr_port_t *portp)
1818 {
1819 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1820 
1821 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1822 
1823 	if ((pl->PartnerAdminPortNum != pl->PartnerOperPortNum) ||
1824 	    (pl->PartnerOperPortPriority != pl->PartnerAdminPortPriority) ||
1825 	    (ether_cmp(&pl->PartnerOperSystem, &pl->PartnerAdminSystem) != 0) ||
1826 	    (pl->PartnerOperSysPriority != pl->PartnerAdminSysPriority) ||
1827 	    (pl->PartnerOperKey != pl->PartnerAdminKey) ||
1828 	    (pl->PartnerOperPortState.bit.aggregation !=
1829 	    pl->PartnerAdminPortState.bit.aggregation)) {
1830 
1831 		AGGR_LACP_DBG(("update_default_selected:(%s): "
1832 		    "selected  %d-->%d\n", portp->lp_devname,
1833 		    pl->sm.selected, AGGR_UNSELECTED));
1834 
1835 		lacp_port_unselect(portp);
1836 	}
1837 }
1838 
1839 
1840 /*
1841  * update_NTT - If any of the Partner values in the received LACPDU
1842  *			are different than that of the Actor operational
1843  *			values then set NTT to true.
1844  */
1845 static void
1846 update_NTT(aggr_port_t *portp, lacp_t *lacp)
1847 {
1848 	aggr_grp_t *aggrp = portp->lp_grp;
1849 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1850 
1851 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1852 
1853 	if ((pl->ActorPortNumber != ntohs(lacp->partner_info.port)) ||
1854 	    (pl->ActorPortPriority !=
1855 	    ntohs(lacp->partner_info.port_priority)) ||
1856 	    (ether_cmp(&aggrp->lg_addr,
1857 	    &lacp->partner_info.system_id) != 0) ||
1858 	    (aggrp->aggr.ActorSystemPriority !=
1859 	    ntohs(lacp->partner_info.system_priority)) ||
1860 	    (pl->ActorOperPortKey != ntohs(lacp->partner_info.key)) ||
1861 	    (pl->ActorOperPortState.bit.activity !=
1862 	    lacp->partner_info.state.bit.activity) ||
1863 	    (pl->ActorOperPortState.bit.timeout !=
1864 	    lacp->partner_info.state.bit.timeout) ||
1865 	    (pl->ActorOperPortState.bit.sync !=
1866 	    lacp->partner_info.state.bit.sync) ||
1867 	    (pl->ActorOperPortState.bit.aggregation !=
1868 	    lacp->partner_info.state.bit.aggregation)) {
1869 
1870 		AGGR_LACP_DBG(("update_NTT:(%s): NTT  %d-->%d\n",
1871 		    portp->lp_devname, pl->NTT, B_TRUE));
1872 
1873 		pl->NTT = B_TRUE;
1874 	}
1875 }
1876 
1877 /*
1878  * lacp_receive_sm - LACP receive state machine
1879  *
1880  * parameters:
1881  *      - portp - instance this applies to.
1882  *      - lacp - pointer in the case of a received LACPDU.
1883  *                This value is NULL if there is no LACPDU.
1884  *
1885  * invoked:
1886  *    - when initialization is needed
1887  *    - upon reception of an LACPDU. This is the common case.
1888  *    - every time the current_while_timer pops
1889  */
1890 static void
1891 lacp_receive_sm(aggr_port_t *portp, lacp_t *lacp)
1892 {
1893 	boolean_t sync_updated, selected_updated, save_activity;
1894 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1895 	lacp_receive_state_t oldstate = pl->sm.receive_state;
1896 
1897 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1898 
1899 	/* LACP_OFF state not in specification so check here.  */
1900 	if (!pl->sm.lacp_on)
1901 		return;
1902 
1903 	/* figure next state */
1904 	if (pl->sm.begin || pl->sm.port_moved) {
1905 		pl->sm.receive_state = LACP_INITIALIZE;
1906 	} else if (!pl->sm.port_enabled) {	/* DL_NOTE_LINK_DOWN */
1907 		pl->sm.receive_state = LACP_PORT_DISABLED;
1908 	} else if (!pl->sm.lacp_enabled) { /* DL_NOTE_AGGR_UNAVAIL */
1909 		pl->sm.receive_state =
1910 		    (pl->sm.receive_state == LACP_PORT_DISABLED) ?
1911 		    LACP_DISABLED : LACP_PORT_DISABLED;
1912 	} else if (lacp != NULL) {
1913 		if ((pl->sm.receive_state == LACP_EXPIRED) ||
1914 		    (pl->sm.receive_state == LACP_DEFAULTED)) {
1915 			pl->sm.receive_state = LACP_CURRENT;
1916 		}
1917 	} else if ((pl->sm.receive_state == LACP_CURRENT) &&
1918 	    (pl->current_while_timer.id == 0)) {
1919 		pl->sm.receive_state = LACP_EXPIRED;
1920 	} else if ((pl->sm.receive_state == LACP_EXPIRED) &&
1921 	    (pl->current_while_timer.id == 0)) {
1922 		pl->sm.receive_state = LACP_DEFAULTED;
1923 	}
1924 
1925 
1926 	if (!((lacp && (oldstate == LACP_CURRENT) &&
1927 	    (pl->sm.receive_state == LACP_CURRENT)))) {
1928 		AGGR_LACP_DBG(("lacp_receive_sm(%s):%s--->%s\n",
1929 		    portp->lp_devname, lacp_receive_str[oldstate],
1930 		    lacp_receive_str[pl->sm.receive_state]));
1931 	}
1932 
1933 	switch (pl->sm.receive_state) {
1934 	case LACP_INITIALIZE:
1935 		lacp_port_unselect(portp);
1936 		record_Default(portp);
1937 		pl->ActorOperPortState.bit.expired = B_FALSE;
1938 		pl->sm.port_moved = B_FALSE;
1939 		pl->sm.receive_state = LACP_PORT_DISABLED;
1940 		pl->sm.begin = B_FALSE;
1941 		lacp_receive_sm(portp, NULL);
1942 		break;
1943 
1944 	case LACP_PORT_DISABLED:
1945 		pl->PartnerOperPortState.bit.sync = B_FALSE;
1946 		/*
1947 		 * Stop current_while_timer in case
1948 		 * we got here from link down
1949 		 */
1950 		stop_current_while_timer(portp);
1951 
1952 		if (pl->sm.port_enabled && !pl->sm.lacp_enabled) {
1953 			pl->sm.receive_state = LACP_DISABLED;
1954 			lacp_receive_sm(portp, lacp);
1955 			/* We goto LACP_DISABLED state */
1956 			break;
1957 		} else if (pl->sm.port_enabled && pl->sm.lacp_enabled) {
1958 			pl->sm.receive_state = LACP_EXPIRED;
1959 			/*
1960 			 * FALL THROUGH TO LACP_EXPIRED CASE:
1961 			 * We have no way of knowing if we get into
1962 			 * lacp_receive_sm() from a  current_while_timer
1963 			 * expiring as it has never been kicked off yet!
1964 			 */
1965 		} else {
1966 			/* We stay in LACP_PORT_DISABLED state */
1967 			break;
1968 		}
1969 		/* LACP_PORT_DISABLED -> LACP_EXPIRED */
1970 		/* FALLTHROUGH */
1971 
1972 	case LACP_EXPIRED:
1973 		/*
1974 		 * Arrives here from LACP_PORT_DISABLED state as well as
1975 		 * as well as current_while_timer expiring.
1976 		 */
1977 		pl->PartnerOperPortState.bit.sync = B_FALSE;
1978 		pl->PartnerOperPortState.bit.timeout = B_TRUE;
1979 
1980 		pl->ActorOperPortState.bit.expired = B_TRUE;
1981 		start_current_while_timer(portp, SHORT_TIMEOUT_TIME);
1982 		lacp_periodic_sm(portp);
1983 		break;
1984 
1985 	case LACP_DISABLED:
1986 		/*
1987 		 * This is the normal state for recv_sm when LACP_OFF
1988 		 * is set or the NIC is in half duplex mode.
1989 		 */
1990 		lacp_port_unselect(portp);
1991 		record_Default(portp);
1992 		pl->PartnerOperPortState.bit.aggregation = B_FALSE;
1993 		pl->ActorOperPortState.bit.expired = B_FALSE;
1994 		break;
1995 
1996 	case LACP_DEFAULTED:
1997 		/*
1998 		 * Current_while_timer expired a second time.
1999 		 */
2000 		update_default_selected(portp);
2001 		record_Default(portp);	/* overwrite Partner Oper val */
2002 		pl->ActorOperPortState.bit.expired = B_FALSE;
2003 		pl->PartnerOperPortState.bit.sync = B_TRUE;
2004 
2005 		lacp_selection_logic(portp);
2006 		lacp_mux_sm(portp);
2007 		break;
2008 
2009 	case LACP_CURRENT:
2010 		/*
2011 		 * Reception of LACPDU
2012 		 */
2013 
2014 		if (!lacp) /* no LACPDU so current_while_timer popped */
2015 			break;
2016 
2017 		AGGR_LACP_DBG(("lacp_receive_sm: (%s): LACPDU received:\n",
2018 		    portp->lp_devname));
2019 
2020 		/*
2021 		 * Validate Actor_Information_Length,
2022 		 * Partner_Information_Length, Collector_Information_Length,
2023 		 * and Terminator_Length fields.
2024 		 */
2025 		if (!valid_lacp_pdu(portp, lacp)) {
2026 			AGGR_LACP_DBG(("lacp_receive_sm (%s): "
2027 			    "Invalid LACPDU received\n",
2028 			    portp->lp_devname));
2029 			break;
2030 		}
2031 
2032 		save_activity = pl->PartnerOperPortState.bit.activity;
2033 		selected_updated = update_selected(portp, lacp);
2034 		update_NTT(portp, lacp);
2035 		sync_updated = record_PDU(portp, lacp);
2036 
2037 		pl->ActorOperPortState.bit.expired = B_FALSE;
2038 
2039 		if (selected_updated) {
2040 			lacp_selection_logic(portp);
2041 			lacp_mux_sm(portp);
2042 		} else if (sync_updated) {
2043 			lacp_mux_sm(portp);
2044 		}
2045 
2046 		/*
2047 		 * If the periodic timer value bit has been modified
2048 		 * or the partner activity bit has been changed then
2049 		 * we need to respectively:
2050 		 *  - restart the timer with the proper timeout value.
2051 		 *  - possibly enable/disable transmission of LACPDUs.
2052 		 */
2053 		if ((pl->PartnerOperPortState.bit.timeout &&
2054 		    (pl->periodic_timer.val != FAST_PERIODIC_TIME)) ||
2055 		    (!pl->PartnerOperPortState.bit.timeout &&
2056 		    (pl->periodic_timer.val != SLOW_PERIODIC_TIME)) ||
2057 		    (pl->PartnerOperPortState.bit.activity !=
2058 		    save_activity)) {
2059 			lacp_periodic_sm(portp);
2060 		}
2061 
2062 		stop_current_while_timer(portp);
2063 		/* Check if we need to transmit an LACPDU */
2064 		if (pl->NTT)
2065 			lacp_xmit_sm(portp);
2066 		start_current_while_timer(portp, 0);
2067 
2068 		break;
2069 	}
2070 }
2071 
2072 static void
2073 aggr_set_coll_dist(aggr_port_t *portp, boolean_t enable)
2074 {
2075 	rw_enter(&portp->lp_lock, RW_WRITER);
2076 	aggr_set_coll_dist_locked(portp, enable);
2077 	rw_exit(&portp->lp_lock);
2078 }
2079 
2080 static void
2081 aggr_set_coll_dist_locked(aggr_port_t *portp, boolean_t enable)
2082 {
2083 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
2084 
2085 	AGGR_LACP_DBG(("AGGR_SET_COLL_DIST_TYPE: (%s) %s\n",
2086 	    portp->lp_devname, enable ? "ENABLED" : "DISABLED"));
2087 
2088 	if (!enable) {
2089 		/*
2090 		 * Turn OFF Collector_Distributor.
2091 		 */
2092 		portp->lp_collector_enabled = B_FALSE;
2093 		aggr_send_port_disable(portp);
2094 		return;
2095 	}
2096 
2097 	/*
2098 	 * Turn ON Collector_Distributor.
2099 	 */
2100 
2101 	if (!portp->lp_lacp.sm.lacp_on || (portp->lp_lacp.sm.lacp_on &&
2102 	    (portp->lp_lacp.sm.mux_state == LACP_COLLECTING_DISTRIBUTING))) {
2103 		/* Port is compatible and can be aggregated */
2104 		portp->lp_collector_enabled = B_TRUE;
2105 		aggr_send_port_enable(portp);
2106 	}
2107 }
2108 
2109 /*
2110  * Process a received Marker or LACPDU.
2111  */
2112 void
2113 aggr_lacp_rx(aggr_port_t *portp, mblk_t *dmp)
2114 {
2115 	lacp_t	*lacp;
2116 
2117 	dmp->b_rptr += sizeof (struct ether_header);
2118 
2119 	if (MBLKL(dmp) < sizeof (lacp_t)) {
2120 		freemsg(dmp);
2121 		return;
2122 	}
2123 
2124 	lacp = (lacp_t *)dmp->b_rptr;
2125 
2126 	switch (lacp->subtype) {
2127 	case LACP_SUBTYPE:
2128 		AGGR_LACP_DBG(("aggr_lacp_rx:(%s): LACPDU received.\n",
2129 		    portp->lp_devname));
2130 
2131 		AGGR_LACP_LOCK(portp->lp_grp);
2132 		if (!portp->lp_lacp.sm.lacp_on) {
2133 			AGGR_LACP_UNLOCK(portp->lp_grp);
2134 			break;
2135 		}
2136 		lacp_receive_sm(portp, lacp);
2137 		AGGR_LACP_UNLOCK(portp->lp_grp);
2138 		break;
2139 
2140 	case MARKER_SUBTYPE:
2141 		AGGR_LACP_DBG(("aggr_lacp_rx:(%s): Marker Packet received.\n",
2142 		    portp->lp_devname));
2143 
2144 		(void) receive_marker_pdu(portp, dmp);
2145 		break;
2146 
2147 	default:
2148 		AGGR_LACP_DBG(("aggr_lacp_rx: (%s): "
2149 		    "Unknown Slow Protocol type %d\n",
2150 		    portp->lp_devname, lacp->subtype));
2151 		break;
2152 	}
2153 
2154 	freemsg(dmp);
2155 }
2156