xref: /titanic_51/usr/src/uts/common/io/aggr/aggr_lacp.c (revision c40f76e346ad844b9326c2049644b7b1d1a93e48)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * IEEE 802.3ad Link Aggregation - LACP & Marker Protocol processing.
30  */
31 
32 #include <sys/types.h>
33 #include <sys/sysmacros.h>
34 #include <sys/conf.h>
35 #include <sys/cmn_err.h>
36 #include <sys/list.h>
37 #include <sys/ksynch.h>
38 #include <sys/kmem.h>
39 #include <sys/stream.h>
40 #include <sys/modctl.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/atomic.h>
44 #include <sys/stat.h>
45 #include <sys/byteorder.h>
46 #include <sys/strsun.h>
47 #include <sys/isa_defs.h>
48 
49 #include <sys/aggr.h>
50 #include <sys/aggr_impl.h>
51 
52 static struct ether_addr	etherzeroaddr = {
53 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00
54 };
55 
56 /*
57  * Slow_Protocol_Multicast address, as per IEEE 802.3ad spec.
58  */
59 static struct ether_addr   slow_multicast_addr = {
60 	0x01, 0x80, 0xc2, 0x00, 0x00, 0x02
61 };
62 
63 #ifdef DEBUG
64 /* LACP state machine debugging support */
65 static uint32_t aggr_lacp_debug = 0;
66 #define	AGGR_LACP_DBG(x)	if (aggr_lacp_debug) { (void) printf x; }
67 #else
68 #define	AGGR_LACP_DBG(x)	{}
69 #endif /* DEBUG */
70 
71 #define	NSECS_PER_SEC   1000000000ll
72 
73 /* used by lacp_misconfig_walker() */
74 typedef struct lacp_misconfig_check_state_s {
75 	aggr_port_t *cs_portp;
76 	boolean_t cs_found;
77 } lacp_misconfig_check_state_t;
78 
79 static const char *lacp_receive_str[] = LACP_RECEIVE_STATE_STRINGS;
80 static const char *lacp_periodic_str[] = LACP_PERIODIC_STRINGS;
81 static const char *lacp_mux_str[] = LACP_MUX_STRINGS;
82 
83 static uint16_t lacp_port_priority = 0x1000;
84 static uint16_t lacp_system_priority = 0x1000;
85 
86 /*
87  * Maintains a list of all ports in ATTACHED state. This information
88  * is used to detect misconfiguration.
89  */
90 typedef struct lacp_sel_ports {
91 	uint16_t sp_key;
92 	char sp_devname[MAXNAMELEN + 1];
93 	struct ether_addr sp_partner_system;
94 	uint32_t sp_partner_key;
95 	struct lacp_sel_ports *sp_next;
96 } lacp_sel_ports_t;
97 
98 static lacp_sel_ports_t *sel_ports = NULL;
99 static kmutex_t lacp_sel_lock;
100 
101 static void periodic_timer_pop_locked(aggr_port_t *);
102 static void periodic_timer_pop(void *);
103 static void lacp_xmit_sm(aggr_port_t *);
104 static void lacp_periodic_sm(aggr_port_t *);
105 static void fill_lacp_pdu(aggr_port_t *, lacp_t *);
106 static void fill_lacp_ether(aggr_port_t *, struct ether_header *);
107 static void lacp_on(aggr_port_t *);
108 static void lacp_off(aggr_port_t *);
109 static boolean_t valid_lacp_pdu(aggr_port_t *, lacp_t *);
110 static void lacp_receive_sm(aggr_port_t *, lacp_t *);
111 static void aggr_set_coll_dist(aggr_port_t *, boolean_t);
112 static void aggr_set_coll_dist_locked(aggr_port_t *, boolean_t);
113 static void start_wait_while_timer(aggr_port_t *);
114 static void stop_wait_while_timer(aggr_port_t *);
115 static void lacp_reset_port(aggr_port_t *);
116 static void stop_current_while_timer(aggr_port_t *);
117 static void current_while_timer_pop(void *);
118 static void update_default_selected(aggr_port_t *);
119 static boolean_t update_selected(aggr_port_t *, lacp_t *);
120 static boolean_t lacp_sel_ports_add(aggr_port_t *);
121 static void lacp_sel_ports_del(aggr_port_t *);
122 
123 void
124 aggr_lacp_init(void)
125 {
126 	mutex_init(&lacp_sel_lock, NULL, MUTEX_DEFAULT, NULL);
127 }
128 
129 void
130 aggr_lacp_fini(void)
131 {
132 	mutex_destroy(&lacp_sel_lock);
133 }
134 
135 static int
136 inst_num(char *devname)
137 {
138 	int inst = 0;
139 	int fact = 1;
140 	char *p = &devname[strlen(devname)-1];
141 
142 	while (*p >= '0' && *p <= '9' && p >= devname) {
143 		inst += (*p - '0') * fact;
144 		fact *= 10;
145 		p--;
146 	}
147 
148 	return (inst);
149 }
150 
151 /*
152  * Set the port LACP state to SELECTED. Returns B_FALSE if the operation
153  * could not be performed due to a memory allocation error, B_TRUE otherwise.
154  */
155 static boolean_t
156 lacp_port_select(aggr_port_t *portp)
157 {
158 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
159 
160 	if (!lacp_sel_ports_add(portp))
161 		return (B_FALSE);
162 	portp->lp_lacp.sm.selected = AGGR_SELECTED;
163 	return (B_TRUE);
164 }
165 
166 /*
167  * Set the port LACP state to UNSELECTED.
168  */
169 static void
170 lacp_port_unselect(aggr_port_t *portp)
171 {
172 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
173 
174 	lacp_sel_ports_del(portp);
175 	portp->lp_lacp.sm.selected = AGGR_UNSELECTED;
176 }
177 
178 /*
179  * Initialize group specific LACP state and parameters.
180  */
181 void
182 aggr_lacp_init_grp(aggr_grp_t *aggrp)
183 {
184 	aggrp->aggr.PeriodicTimer = AGGR_LACP_TIMER_SHORT;
185 	aggrp->aggr.ActorSystemPriority = (uint16_t)lacp_system_priority;
186 	aggrp->aggr.CollectorMaxDelay = 10;
187 	aggrp->lg_lacp_mode = AGGR_LACP_OFF;
188 	aggrp->aggr.ready = B_FALSE;
189 }
190 
191 /*
192  * Complete LACP info initialization at port creation time.
193  */
194 void
195 aggr_lacp_init_port(aggr_port_t *portp)
196 {
197 	aggr_grp_t *aggrp = portp->lp_grp;
198 	aggr_lacp_port_t *pl = &portp->lp_lacp;
199 	uint16_t offset;
200 	uint32_t instance;
201 
202 	ASSERT(AGGR_LACP_LOCK_HELD(aggrp));
203 	ASSERT(RW_LOCK_HELD(&aggrp->lg_lock));
204 	ASSERT(RW_LOCK_HELD(&portp->lp_lock));
205 
206 	/*
207 	 * Port numbers must be unique. For now, we encode the first two
208 	 * characters into the top byte of the port number. This will work
209 	 * with multiple types of NICs provided that the first two
210 	 * characters are unique.
211 	 */
212 	offset = ((portp->lp_devname[0] + portp->lp_devname[1]) << 8);
213 	instance = inst_num(portp->lp_devname);
214 	/* actor port # */
215 	pl->ActorPortNumber = offset + instance;
216 	AGGR_LACP_DBG(("aggr_lacp_init_port(%s): "
217 	    "ActorPortNumber = 0x%x\n", portp->lp_devname,
218 	    pl->ActorPortNumber));
219 
220 	pl->ActorPortPriority = (uint16_t)lacp_port_priority;
221 	pl->ActorPortAggrId = 0;	/* aggregator id - not used */
222 	pl->NTT = B_FALSE;			/* need to transmit */
223 
224 	pl->ActorAdminPortKey = aggrp->lg_key;
225 	pl->ActorOperPortKey = pl->ActorAdminPortKey;
226 	AGGR_LACP_DBG(("aggr_lacp_init_port(%s) "
227 	    "ActorAdminPortKey = 0x%x, ActorAdminPortKey = 0x%x\n",
228 	    portp->lp_devname, pl->ActorAdminPortKey, pl->ActorOperPortKey));
229 
230 	/* Actor admin. port state */
231 	pl->ActorAdminPortState.bit.activity = B_FALSE;
232 	pl->ActorAdminPortState.bit.timeout = B_TRUE;
233 	pl->ActorAdminPortState.bit.aggregation = B_TRUE;
234 	pl->ActorAdminPortState.bit.sync = B_FALSE;
235 	pl->ActorAdminPortState.bit.collecting = B_FALSE;
236 	pl->ActorAdminPortState.bit.distributing = B_FALSE;
237 	pl->ActorAdminPortState.bit.defaulted = B_FALSE;
238 	pl->ActorAdminPortState.bit.expired = B_FALSE;
239 	pl->ActorOperPortState = pl->ActorAdminPortState;
240 
241 	/*
242 	 * Partner Administrative Information
243 	 * (All initialized to zero except for the following)
244 	 * Fast Timeouts.
245 	 */
246 	pl->PartnerAdminPortState.bit.timeout =
247 	    pl->PartnerOperPortState.bit.timeout = B_TRUE;
248 
249 	pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */
250 
251 	/*
252 	 * State machine information.
253 	 */
254 	pl->sm.lacp_on = B_FALSE;		/* LACP Off default */
255 	pl->sm.begin = B_TRUE;		/* Prevents transmissions */
256 	pl->sm.lacp_enabled = B_FALSE;
257 	pl->sm.port_enabled = B_FALSE;		/* Link Down */
258 	pl->sm.actor_churn = B_FALSE;
259 	pl->sm.partner_churn = B_FALSE;
260 	pl->sm.ready_n = B_FALSE;
261 	pl->sm.port_moved = B_FALSE;
262 
263 	lacp_port_unselect(portp);
264 
265 	pl->sm.periodic_state = LACP_NO_PERIODIC;
266 	pl->sm.receive_state = LACP_INITIALIZE;
267 	pl->sm.mux_state = LACP_DETACHED;
268 	pl->sm.churn_state = LACP_NO_ACTOR_CHURN;
269 
270 	/*
271 	 * Timer information.
272 	 */
273 	pl->current_while_timer.id = 0;
274 	pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
275 
276 	pl->periodic_timer.id = 0;
277 	pl->periodic_timer.val = FAST_PERIODIC_TIME;
278 
279 	pl->wait_while_timer.id = 0;
280 	pl->wait_while_timer.val = AGGREGATE_WAIT_TIME;
281 }
282 
283 /*
284  * Port initialization when we need to
285  * turn LACP on/off, etc. Not everything is
286  * reset like in the above routine.
287  *		Do NOT modify things like link status.
288  */
289 static void
290 lacp_reset_port(aggr_port_t *portp)
291 {
292 	aggr_lacp_port_t *pl = &portp->lp_lacp;
293 
294 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
295 
296 	pl->NTT = B_FALSE;			/* need to transmit */
297 
298 	/* reset operational port state */
299 	pl->ActorOperPortState.bit.timeout =
300 		pl->ActorAdminPortState.bit.timeout;
301 
302 	pl->ActorOperPortState.bit.sync = B_FALSE;
303 	pl->ActorOperPortState.bit.collecting = B_FALSE;
304 	pl->ActorOperPortState.bit.distributing = B_FALSE;
305 	pl->ActorOperPortState.bit.defaulted = B_TRUE;
306 	pl->ActorOperPortState.bit.expired = B_FALSE;
307 
308 	pl->PartnerOperPortState.bit.timeout = B_TRUE;	/* fast t/o */
309 	pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */
310 
311 	/*
312 	 * State machine information.
313 	 */
314 	pl->sm.begin = B_TRUE;		/* Prevents transmissions */
315 	pl->sm.actor_churn = B_FALSE;
316 	pl->sm.partner_churn = B_FALSE;
317 	pl->sm.ready_n = B_FALSE;
318 
319 	lacp_port_unselect(portp);
320 
321 	pl->sm.periodic_state = LACP_NO_PERIODIC;
322 	pl->sm.receive_state = LACP_INITIALIZE;
323 	pl->sm.mux_state = LACP_DETACHED;
324 	pl->sm.churn_state = LACP_NO_ACTOR_CHURN;
325 
326 	/*
327 	 * Timer information.
328 	 */
329 	pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
330 	pl->periodic_timer.val = FAST_PERIODIC_TIME;
331 }
332 
333 static void
334 aggr_lacp_mcast_on(aggr_port_t *port)
335 {
336 	ASSERT(AGGR_LACP_LOCK_HELD(port->lp_grp));
337 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
338 
339 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
340 		return;
341 
342 	(void) aggr_port_multicst(port, B_TRUE,
343 	    (uchar_t *)&slow_multicast_addr);
344 }
345 
346 static void
347 aggr_lacp_mcast_off(aggr_port_t *port)
348 {
349 	ASSERT(AGGR_LACP_LOCK_HELD(port->lp_grp));
350 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
351 
352 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
353 		return;
354 
355 	(void) aggr_port_multicst(port, B_FALSE,
356 	    (uchar_t *)&slow_multicast_addr);
357 }
358 
359 static void
360 start_periodic_timer(aggr_port_t *portp)
361 {
362 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
363 
364 	if (portp->lp_lacp.periodic_timer.id == 0) {
365 		portp->lp_lacp.periodic_timer.id =
366 		    timeout(periodic_timer_pop, portp,
367 		    drv_usectohz(1000000 * portp->lp_lacp.periodic_timer.val));
368 	}
369 }
370 
371 static void
372 stop_periodic_timer(aggr_port_t *portp)
373 {
374 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
375 
376 	if (portp->lp_lacp.periodic_timer.id != 0) {
377 		AGGR_LACP_UNLOCK(portp->lp_grp);
378 		(void) untimeout(portp->lp_lacp.periodic_timer.id);
379 		AGGR_LACP_LOCK(portp->lp_grp);
380 		portp->lp_lacp.periodic_timer.id = 0;
381 	}
382 }
383 
384 /*
385  * When the timer pops, we arrive here to
386  * clear out LACPDU count as well as transmit an
387  * LACPDU. We then set the periodic state and let
388  * the periodic state machine restart the timer.
389  */
390 
391 static void
392 periodic_timer_pop_locked(aggr_port_t *portp)
393 {
394 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
395 
396 	portp->lp_lacp.periodic_timer.id = NULL;
397 	portp->lp_lacp_stats.LACPDUsTx = 0;
398 
399 	/* current timestamp */
400 	portp->lp_lacp.time = gethrtime();
401 	portp->lp_lacp.NTT = B_TRUE;
402 	lacp_xmit_sm(portp);
403 
404 	/*
405 	 * Set Periodic State machine state based on the
406 	 * value of the Partner Operation Port State timeout
407 	 * bit.
408 	 */
409 	if (portp->lp_lacp.PartnerOperPortState.bit.timeout) {
410 		portp->lp_lacp.periodic_timer.val = FAST_PERIODIC_TIME;
411 		portp->lp_lacp.sm.periodic_state = LACP_FAST_PERIODIC;
412 	} else {
413 		portp->lp_lacp.periodic_timer.val = SLOW_PERIODIC_TIME;
414 		portp->lp_lacp.sm.periodic_state = LACP_SLOW_PERIODIC;
415 	}
416 
417 	lacp_periodic_sm(portp);
418 }
419 
420 static void
421 periodic_timer_pop(void *data)
422 {
423 	aggr_port_t *portp = data;
424 
425 	if (portp->lp_closing)
426 		return;
427 
428 	AGGR_LACP_LOCK(portp->lp_grp);
429 	periodic_timer_pop_locked(portp);
430 	AGGR_LACP_UNLOCK(portp->lp_grp);
431 }
432 
433 /*
434  * Invoked from:
435  *	- startup upon aggregation
436  *	- when the periodic timer pops
437  *	- when the periodic timer value is changed
438  *	- when the port is attached or detached
439  *	- when LACP mode is changed.
440  */
441 static void
442 lacp_periodic_sm(aggr_port_t *portp)
443 {
444 	lacp_periodic_state_t oldstate = portp->lp_lacp.sm.periodic_state;
445 	aggr_lacp_port_t *pl = &portp->lp_lacp;
446 
447 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
448 
449 	/* LACP_OFF state not in specification so check here.  */
450 	if (!pl->sm.lacp_on) {
451 		/* Stop timer whether it is running or not */
452 		stop_periodic_timer(portp);
453 		pl->sm.periodic_state = LACP_NO_PERIODIC;
454 		pl->NTT = B_FALSE;
455 		AGGR_LACP_DBG(("lacp_periodic_sm(%s):NO LACP "
456 		    "%s--->%s\n", portp->lp_devname,
457 		    lacp_periodic_str[oldstate],
458 		    lacp_periodic_str[pl->sm.periodic_state]));
459 		return;
460 	}
461 
462 	if (pl->sm.begin || !pl->sm.lacp_enabled ||
463 	    !pl->sm.port_enabled ||
464 	    !pl->ActorOperPortState.bit.activity &&
465 	    !pl->PartnerOperPortState.bit.activity) {
466 
467 		/* Stop timer whether it is running or not */
468 		stop_periodic_timer(portp);
469 		pl->sm.periodic_state = LACP_NO_PERIODIC;
470 		pl->NTT = B_FALSE;
471 		AGGR_LACP_DBG(("lacp_periodic_sm(%s):STOP %s--->%s\n",
472 		    portp->lp_devname, lacp_periodic_str[oldstate],
473 		    lacp_periodic_str[pl->sm.periodic_state]));
474 		return;
475 	}
476 
477 	/*
478 	 * Startup with FAST_PERIODIC_TIME if no previous LACPDU
479 	 * has been received. Then after we timeout, then it is
480 	 * possible to go to SLOW_PERIODIC_TIME.
481 	 */
482 	if (pl->sm.periodic_state == LACP_NO_PERIODIC) {
483 		pl->periodic_timer.val = FAST_PERIODIC_TIME;
484 		pl->sm.periodic_state = LACP_FAST_PERIODIC;
485 	} else if ((pl->sm.periodic_state == LACP_SLOW_PERIODIC) &&
486 	    pl->PartnerOperPortState.bit.timeout) {
487 		/*
488 		 * If we receive a bit indicating we are going to
489 		 * fast periodic from slow periodic, stop the timer
490 		 * and let the periodic_timer_pop routine deal
491 		 * with reseting the periodic state and transmitting
492 		 * a LACPDU.
493 		 */
494 		stop_periodic_timer(portp);
495 		periodic_timer_pop_locked(portp);
496 	}
497 
498 	/* Rearm timer with value provided by partner */
499 	start_periodic_timer(portp);
500 }
501 
502 /*
503  * This routine transmits an LACPDU if lacp_enabled
504  * is TRUE and if NTT is set.
505  */
506 static void
507 lacp_xmit_sm(aggr_port_t *portp)
508 {
509 	aggr_lacp_port_t *pl = &portp->lp_lacp;
510 	size_t	len;
511 	mblk_t  *mp;
512 	hrtime_t now, elapsed;
513 	const mac_txinfo_t *mtp;
514 
515 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
516 
517 	/* LACP_OFF state not in specification so check here.  */
518 	if (!pl->sm.lacp_on || !pl->NTT || !portp->lp_started)
519 		return;
520 
521 	/*
522 	 * Do nothing if LACP has been turned off or if the
523 	 * periodic state machine is not enabled.
524 	 */
525 	if ((pl->sm.periodic_state == LACP_NO_PERIODIC) ||
526 	    !pl->sm.lacp_enabled || pl->sm.begin) {
527 		pl->NTT = B_FALSE;
528 		return;
529 	}
530 
531 	/*
532 	 * If we have sent 5 Slow packets in the last second, avoid
533 	 * sending any more here. No more than three LACPDUs may be transmitted
534 	 * in any Fast_Periodic_Time interval.
535 	 */
536 	if (portp->lp_lacp_stats.LACPDUsTx >= 3) {
537 		/*
538 		 * Grab the current time value and see if
539 		 * more than 1 second has passed. If so,
540 		 * reset the timestamp and clear the count.
541 		 */
542 		now = gethrtime();
543 		elapsed = now - pl->time;
544 		if (elapsed > NSECS_PER_SEC) {
545 			portp->lp_lacp_stats.LACPDUsTx = 0;
546 			pl->time = now;
547 		} else {
548 			return;
549 		}
550 	}
551 
552 	len = sizeof (lacp_t) + sizeof (struct ether_header);
553 	mp = allocb(len, BPRI_MED);
554 	if (mp == NULL)
555 		return;
556 
557 	mp->b_wptr = mp->b_rptr + len;
558 	bzero(mp->b_rptr, len);
559 
560 	fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr);
561 	fill_lacp_pdu(portp,
562 	    (lacp_t *)(mp->b_rptr + sizeof (struct ether_header)));
563 
564 	/*
565 	 * Store the transmit info pointer locally in case it changes between
566 	 * loading mt_fn and mt_arg.
567 	 */
568 	mtp = portp->lp_txinfo;
569 	mtp->mt_fn(mtp->mt_arg, mp);
570 
571 	pl->NTT = B_FALSE;
572 	portp->lp_lacp_stats.LACPDUsTx++;
573 }
574 
575 /*
576  * Initialize the ethernet header of a LACP packet sent from the specified
577  * port.
578  */
579 static void
580 fill_lacp_ether(aggr_port_t *port, struct ether_header *ether)
581 {
582 	bcopy(port->lp_addr, (uint8_t *)&(ether->ether_shost), ETHERADDRL);
583 	bcopy(&slow_multicast_addr, (uint8_t *)&(ether->ether_dhost),
584 	    ETHERADDRL);
585 	ether->ether_type = htons(ETHERTYPE_SLOW);
586 }
587 
588 static void
589 fill_lacp_pdu(aggr_port_t *portp, lacp_t *lacp)
590 {
591 	aggr_lacp_port_t *pl = &portp->lp_lacp;
592 	aggr_grp_t *aggrp = portp->lp_grp;
593 
594 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
595 
596 	lacp->subtype = LACP_SUBTYPE;
597 	lacp->version = LACP_VERSION;
598 
599 	rw_enter(&aggrp->lg_lock, RW_READER);
600 	rw_enter(&portp->lp_lock, RW_READER);
601 
602 	/*
603 	 * Actor Information
604 	 */
605 	lacp->actor_info.tlv_type = ACTOR_TLV;
606 	lacp->actor_info.information_len = sizeof (link_info_t);
607 	lacp->actor_info.system_priority =
608 	    htons(aggrp->aggr.ActorSystemPriority);
609 	bcopy(aggrp->lg_addr, (uchar_t *)&lacp->actor_info.system_id,
610 	    ETHERADDRL);
611 	lacp->actor_info.key = htons(pl->ActorOperPortKey);
612 	lacp->actor_info.port_priority = htons(pl->ActorPortPriority);
613 	lacp->actor_info.port = htons(pl->ActorPortNumber);
614 	lacp->actor_info.state.state = pl->ActorOperPortState.state;
615 
616 	/*
617 	 * Partner Information
618 	 */
619 	lacp->partner_info.tlv_type = PARTNER_TLV;
620 	lacp->partner_info.information_len = sizeof (link_info_t);
621 	lacp->partner_info.system_priority =
622 	    htons(pl->PartnerOperSysPriority);
623 	lacp->partner_info.system_id = pl->PartnerOperSystem;
624 	lacp->partner_info.key = htons(pl->PartnerOperKey);
625 	lacp->partner_info.port_priority =
626 	    htons(pl->PartnerOperPortPriority);
627 	lacp->partner_info.port = htons(pl->PartnerOperPortNum);
628 	lacp->partner_info.state.state = pl->PartnerOperPortState.state;
629 
630 	/* Collector Information */
631 	lacp->tlv_collector = COLLECTOR_TLV;
632 	lacp->collector_len = 0x10;
633 	lacp->collector_max_delay = htons(aggrp->aggr.CollectorMaxDelay);
634 
635 	/* Termination Information */
636 	lacp->tlv_terminator = TERMINATOR_TLV;
637 	lacp->terminator_len = 0x0;
638 
639 	rw_exit(&portp->lp_lock);
640 	rw_exit(&aggrp->lg_lock);
641 }
642 
643 /*
644  * lacp_mux_sm - LACP mux state machine
645  *		This state machine is invoked from:
646  *			- startup upon aggregation
647  *			- from the Selection logic
648  *			- when the wait_while_timer pops
649  *			- when the aggregation MAC address is changed
650  *			- when receiving DL_NOTE_LINK_UP/DOWN
651  *			- when receiving DL_NOTE_AGGR_AVAIL/UNAVAIL
652  *			- when LACP mode is changed.
653  *			- when a DL_NOTE_SPEED is received
654  */
655 static void
656 lacp_mux_sm(aggr_port_t *portp)
657 {
658 	aggr_grp_t *aggrp = portp->lp_grp;
659 	boolean_t NTT_updated = B_FALSE;
660 	aggr_lacp_port_t *pl = &portp->lp_lacp;
661 	lacp_mux_state_t oldstate = pl->sm.mux_state;
662 
663 	ASSERT(AGGR_LACP_LOCK_HELD(aggrp));
664 
665 	/* LACP_OFF state not in specification so check here.  */
666 	if (!pl->sm.lacp_on) {
667 		pl->sm.mux_state = LACP_DETACHED;
668 		pl->ActorOperPortState.bit.sync = B_FALSE;
669 
670 		if (pl->ActorOperPortState.bit.collecting ||
671 		    pl->ActorOperPortState.bit.distributing) {
672 			AGGR_LACP_DBG(("trunk link: (%s): "
673 			    "Collector_Distributor Disabled.\n",
674 			    portp->lp_devname));
675 		}
676 
677 		pl->ActorOperPortState.bit.collecting =
678 		    pl->ActorOperPortState.bit.distributing = B_FALSE;
679 		return;
680 	}
681 
682 	if (pl->sm.begin || !pl->sm.lacp_enabled)
683 		pl->sm.mux_state = LACP_DETACHED;
684 
685 again:
686 	/* determine next state, or return if state unchanged */
687 	switch (pl->sm.mux_state) {
688 	case LACP_DETACHED:
689 		if (pl->sm.begin) {
690 			break;
691 		}
692 
693 		if ((pl->sm.selected == AGGR_SELECTED) ||
694 		    (pl->sm.selected == AGGR_STANDBY)) {
695 			pl->sm.mux_state = LACP_WAITING;
696 			break;
697 		}
698 		return;
699 
700 	case LACP_WAITING:
701 		if (pl->sm.selected == AGGR_UNSELECTED) {
702 			pl->sm.mux_state = LACP_DETACHED;
703 			break;
704 		}
705 
706 		if ((pl->sm.selected == AGGR_SELECTED) && aggrp->aggr.ready) {
707 			pl->sm.mux_state = LACP_ATTACHED;
708 			break;
709 		}
710 		return;
711 
712 	case LACP_ATTACHED:
713 		if ((pl->sm.selected == AGGR_UNSELECTED) ||
714 		    (pl->sm.selected == AGGR_STANDBY)) {
715 			pl->sm.mux_state = LACP_DETACHED;
716 			break;
717 		}
718 
719 		if ((pl->sm.selected == AGGR_SELECTED) &&
720 		    pl->PartnerOperPortState.bit.sync) {
721 			pl->sm.mux_state = LACP_COLLECTING_DISTRIBUTING;
722 			break;
723 		}
724 		return;
725 
726 	case LACP_COLLECTING_DISTRIBUTING:
727 		if ((pl->sm.selected == AGGR_UNSELECTED) ||
728 		    (pl->sm.selected == AGGR_STANDBY) ||
729 		    !pl->PartnerOperPortState.bit.sync) {
730 			pl->sm.mux_state = LACP_ATTACHED;
731 			break;
732 		}
733 		return;
734 	}
735 
736 	AGGR_LACP_DBG(("lacp_mux_sm(%s):%s--->%s\n",
737 	    portp->lp_devname, lacp_mux_str[oldstate],
738 	    lacp_mux_str[pl->sm.mux_state]));
739 
740 	/* perform actions on entering a new state */
741 	switch (pl->sm.mux_state) {
742 	case LACP_DETACHED:
743 		if (pl->ActorOperPortState.bit.collecting ||
744 		    pl->ActorOperPortState.bit.distributing) {
745 			AGGR_LACP_DBG(("trunk link: (%s): "
746 			    "Collector_Distributor Disabled.\n",
747 			    portp->lp_devname));
748 		}
749 
750 		pl->ActorOperPortState.bit.sync =
751 		    pl->ActorOperPortState.bit.collecting = B_FALSE;
752 
753 		/* Turn OFF Collector_Distributor */
754 		aggr_set_coll_dist(portp, B_FALSE);
755 
756 		pl->ActorOperPortState.bit.distributing = B_FALSE;
757 		NTT_updated = B_TRUE;
758 		break;
759 
760 	case LACP_WAITING:
761 		start_wait_while_timer(portp);
762 		break;
763 
764 	case LACP_ATTACHED:
765 		if (pl->ActorOperPortState.bit.collecting ||
766 		    pl->ActorOperPortState.bit.distributing) {
767 			AGGR_LACP_DBG(("trunk link: (%s): "
768 			    "Collector_Distributor Disabled.\n",
769 			    portp->lp_devname));
770 		}
771 
772 		pl->ActorOperPortState.bit.sync = B_TRUE;
773 		pl->ActorOperPortState.bit.collecting = B_FALSE;
774 
775 		/* Turn OFF Collector_Distributor */
776 		aggr_set_coll_dist(portp, B_FALSE);
777 
778 		pl->ActorOperPortState.bit.distributing = B_FALSE;
779 		NTT_updated = B_TRUE;
780 		if (pl->PartnerOperPortState.bit.sync) {
781 			/*
782 			 * We had already received an updated sync from
783 			 * the partner. Attempt to transition to
784 			 * collecting/distributing now.
785 			 */
786 			goto again;
787 		}
788 		break;
789 
790 	case LACP_COLLECTING_DISTRIBUTING:
791 		if (!pl->ActorOperPortState.bit.collecting &&
792 		    !pl->ActorOperPortState.bit.distributing) {
793 			AGGR_LACP_DBG(("trunk link: (%s): "
794 			    "Collector_Distributor Enabled.\n",
795 			    portp->lp_devname));
796 		}
797 		pl->ActorOperPortState.bit.distributing = B_TRUE;
798 
799 		/* Turn Collector_Distributor back ON */
800 		aggr_set_coll_dist(portp, B_TRUE);
801 
802 		pl->ActorOperPortState.bit.collecting = B_TRUE;
803 		NTT_updated = B_TRUE;
804 		break;
805 	}
806 
807 	/*
808 	 * If we updated the state of the NTT variable, then
809 	 * initiate a LACPDU transmission.
810 	 */
811 	if (NTT_updated) {
812 		pl->NTT = B_TRUE;
813 		lacp_xmit_sm(portp);
814 	}
815 } /* lacp_mux_sm */
816 
817 
818 static void
819 receive_marker_pdu(aggr_port_t *portp, mblk_t *mp)
820 {
821 	marker_pdu_t		*markerp = (marker_pdu_t *)mp->b_rptr;
822 	const mac_txinfo_t	*mtp;
823 
824 	AGGR_LACP_LOCK(portp->lp_grp);
825 
826 	AGGR_LACP_DBG(("trunk link: (%s): MARKER PDU received:\n",
827 	    portp->lp_devname));
828 
829 	/* LACP_OFF state not in specification so check here.  */
830 	if (!portp->lp_lacp.sm.lacp_on)
831 		goto bail;
832 
833 	if (MBLKL(mp) < sizeof (marker_pdu_t))
834 		goto bail;
835 
836 	if (markerp->version != MARKER_VERSION) {
837 		AGGR_LACP_DBG(("trunk link (%s): Malformed MARKER PDU: "
838 		    "version = %d does not match s/w version %d\n",
839 		    portp->lp_devname, markerp->version, MARKER_VERSION));
840 		goto bail;
841 	}
842 
843 	if (markerp->tlv_marker == MARKER_RESPONSE_TLV) {
844 		/* We do not yet send out MARKER info PDUs */
845 		AGGR_LACP_DBG(("trunk link (%s): MARKER RESPONSE PDU: "
846 		    " MARKER TLV = %d - We don't send out info type!\n",
847 		    portp->lp_devname, markerp->tlv_marker));
848 		goto bail;
849 	}
850 
851 	if (markerp->tlv_marker != MARKER_INFO_TLV) {
852 		AGGR_LACP_DBG(("trunk link (%s): Malformed MARKER PDU: "
853 		    " MARKER TLV = %d \n", portp->lp_devname,
854 		    markerp->tlv_marker));
855 		goto bail;
856 	}
857 
858 	if (markerp->marker_len != MARKER_INFO_RESPONSE_LENGTH) {
859 		AGGR_LACP_DBG(("trunk link (%s): Malformed MARKER PDU: "
860 		    " MARKER length = %d \n", portp->lp_devname,
861 		    markerp->marker_len));
862 		goto bail;
863 	}
864 
865 	if (markerp->requestor_port != portp->lp_lacp.PartnerOperPortNum) {
866 		AGGR_LACP_DBG(("trunk link (%s): MARKER PDU: "
867 		    " MARKER Port %d not equal to Partner port %d\n",
868 		    portp->lp_devname, markerp->requestor_port,
869 		    portp->lp_lacp.PartnerOperPortNum));
870 		goto bail;
871 	}
872 
873 	if (ether_cmp(&markerp->system_id,
874 	    &portp->lp_lacp.PartnerOperSystem) != 0) {
875 		AGGR_LACP_DBG(("trunk link (%s): MARKER PDU: "
876 		    " MARKER MAC not equal to Partner MAC\n",
877 		    portp->lp_devname));
878 		goto bail;
879 	}
880 
881 	/*
882 	 * Turn into Marker Response PDU
883 	 * and return mblk to sending system
884 	 */
885 	markerp->tlv_marker = MARKER_RESPONSE_TLV;
886 
887 	/* reuse the space that was used by received ethernet header */
888 	ASSERT(MBLKHEAD(mp) >= sizeof (struct ether_header));
889 	mp->b_rptr -= sizeof (struct ether_header);
890 	fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr);
891 	AGGR_LACP_UNLOCK(portp->lp_grp);
892 
893 	/*
894 	 * Store the transmit info pointer locally in case it changes between
895 	 * loading mt_fn and mt_arg.
896 	 */
897 	mtp = portp->lp_txinfo;
898 	mtp->mt_fn(mtp->mt_arg, mp);
899 	return;
900 
901 bail:
902 	AGGR_LACP_UNLOCK(portp->lp_grp);
903 	freemsg(mp);
904 }
905 
906 
907 /*
908  * Update the LACP mode (off, active, or passive) of the specified group.
909  */
910 void
911 aggr_lacp_update_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode)
912 {
913 	aggr_lacp_mode_t old_mode = grp->lg_lacp_mode;
914 	aggr_port_t *port;
915 
916 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
917 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
918 
919 	if (mode == old_mode)
920 		return;
921 
922 	grp->lg_lacp_mode = mode;
923 
924 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
925 		port->lp_lacp.ActorAdminPortState.bit.activity =
926 		    port->lp_lacp.ActorOperPortState.bit.activity =
927 		    (mode == AGGR_LACP_ACTIVE);
928 
929 		if (old_mode == AGGR_LACP_OFF) {
930 			/* OFF -> {PASSIVE,ACTIVE} */
931 			/* turn OFF Collector_Distributor */
932 			aggr_set_coll_dist(port, B_FALSE);
933 			rw_enter(&port->lp_lock, RW_WRITER);
934 			lacp_on(port);
935 			if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
936 				aggr_lacp_port_attached(port);
937 			rw_exit(&port->lp_lock);
938 		} else if (mode == AGGR_LACP_OFF) {
939 			/* {PASSIVE,ACTIVE} -> OFF */
940 			rw_enter(&port->lp_lock, RW_WRITER);
941 			lacp_off(port);
942 			rw_exit(&port->lp_lock);
943 			if (!grp->lg_closing) {
944 				/* Turn ON Collector_Distributor */
945 				aggr_set_coll_dist(port, B_TRUE);
946 			}
947 		} else {
948 			/* PASSIVE->ACTIVE or ACTIVE->PASSIVE */
949 			port->lp_lacp.sm.begin = B_TRUE;
950 			lacp_mux_sm(port);
951 			lacp_periodic_sm(port);
952 
953 			/* kick off state machines */
954 			lacp_receive_sm(port, NULL);
955 			lacp_mux_sm(port);
956 		}
957 
958 		if (grp->lg_closing)
959 			break;
960 	}
961 }
962 
963 
964 /*
965  * Update the LACP timer (short or long) of the specified group.
966  */
967 void
968 aggr_lacp_update_timer(aggr_grp_t *grp, aggr_lacp_timer_t timer)
969 {
970 	aggr_port_t *port;
971 
972 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
973 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
974 
975 	if (timer == grp->aggr.PeriodicTimer)
976 		return;
977 
978 	grp->aggr.PeriodicTimer = timer;
979 
980 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
981 		port->lp_lacp.ActorAdminPortState.bit.timeout =
982 		    port->lp_lacp.ActorOperPortState.bit.timeout =
983 		    (timer == AGGR_LACP_TIMER_SHORT);
984 	}
985 }
986 
987 
988 /*
989  * Sets the initial LACP mode (off, active, passive) and LACP timer
990  * (short, long) of the specified group.
991  */
992 void
993 aggr_lacp_set_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode,
994     aggr_lacp_timer_t timer)
995 {
996 	aggr_port_t *port;
997 
998 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
999 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1000 
1001 	grp->lg_lacp_mode = mode;
1002 	grp->aggr.PeriodicTimer = timer;
1003 
1004 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1005 		port->lp_lacp.ActorAdminPortState.bit.activity =
1006 		    port->lp_lacp.ActorOperPortState.bit.activity =
1007 		    (mode == AGGR_LACP_ACTIVE);
1008 
1009 		port->lp_lacp.ActorAdminPortState.bit.timeout =
1010 			port->lp_lacp.ActorOperPortState.bit.timeout =
1011 			(timer == AGGR_LACP_TIMER_SHORT);
1012 
1013 		if (grp->lg_lacp_mode == AGGR_LACP_OFF) {
1014 			/* Turn ON Collector_Distributor */
1015 			aggr_set_coll_dist(port, B_TRUE);
1016 		} else { /* LACP_ACTIVE/PASSIVE */
1017 			rw_enter(&port->lp_lock, RW_WRITER);
1018 			lacp_on(port);
1019 			rw_exit(&port->lp_lock);
1020 		}
1021 	}
1022 }
1023 
1024 /*
1025  * Verify that the Partner MAC and Key recorded by the specified
1026  * port are not found in other ports that are not part of our
1027  * aggregation. Returns B_TRUE if such a port is found, B_FALSE
1028  * otherwise.
1029  */
1030 static boolean_t
1031 lacp_misconfig_check(aggr_port_t *portp)
1032 {
1033 	aggr_grp_t *grp = portp->lp_grp;
1034 	lacp_sel_ports_t *cport;
1035 
1036 	mutex_enter(&lacp_sel_lock);
1037 
1038 	for (cport = sel_ports; cport != NULL; cport = cport->sp_next) {
1039 
1040 		/* skip entries of the group of the port being checked */
1041 		if (cport->sp_key == grp->lg_key)
1042 			continue;
1043 
1044 		if ((ether_cmp(&cport->sp_partner_system,
1045 		    &grp->aggr.PartnerSystem) == 0) &&
1046 		    (cport->sp_partner_key == grp->aggr.PartnerOperAggrKey)) {
1047 			char mac_str[ETHERADDRL*3];
1048 			struct ether_addr *mac = &cport->sp_partner_system;
1049 
1050 			/*
1051 			 * The Partner port information is already in use
1052 			 * by ports in another aggregation so disable this
1053 			 * port.
1054 			 */
1055 
1056 			(void) snprintf(mac_str, sizeof (mac_str),
1057 			    "%x:%x:%x:%x:%x:%x",
1058 			    mac->ether_addr_octet[0], mac->ether_addr_octet[1],
1059 			    mac->ether_addr_octet[2], mac->ether_addr_octet[3],
1060 			    mac->ether_addr_octet[4], mac->ether_addr_octet[5]);
1061 
1062 			portp->lp_lacp.sm.selected = AGGR_UNSELECTED;
1063 			cmn_err(CE_NOTE, "aggr key %d port %s: Port Partner "
1064 			    "MAC %s and key %d in use on aggregation "
1065 			    "key %d port %s\n", grp->lg_key,
1066 			    portp->lp_devname, mac_str,
1067 			    portp->lp_lacp.PartnerOperKey, cport->sp_key,
1068 			    cport->sp_devname);
1069 			break;
1070 		}
1071 	}
1072 
1073 	mutex_exit(&lacp_sel_lock);
1074 	return (cport != NULL);
1075 }
1076 
1077 /*
1078  * Remove the specified port from the list of selected ports.
1079  */
1080 static void
1081 lacp_sel_ports_del(aggr_port_t *portp)
1082 {
1083 	lacp_sel_ports_t *cport, **prev = NULL;
1084 
1085 	mutex_enter(&lacp_sel_lock);
1086 
1087 	prev = &sel_ports;
1088 	for (cport = sel_ports; cport != NULL; prev = &cport->sp_next,
1089 	    cport = cport->sp_next) {
1090 		if (bcmp(portp->lp_devname, cport->sp_devname,
1091 		    MAXNAMELEN + 1) == 0) {
1092 			break;
1093 		}
1094 	}
1095 
1096 	if (cport == NULL) {
1097 		mutex_exit(&lacp_sel_lock);
1098 		return;
1099 	}
1100 
1101 	*prev = cport->sp_next;
1102 	kmem_free(cport, sizeof (*cport));
1103 
1104 	mutex_exit(&lacp_sel_lock);
1105 }
1106 
1107 /*
1108  * Add the specified port to the list of selected ports. Returns B_FALSE
1109  * if the operation could not be performed due to an memory allocation
1110  * error.
1111  */
1112 static boolean_t
1113 lacp_sel_ports_add(aggr_port_t *portp)
1114 {
1115 	lacp_sel_ports_t *new_port;
1116 	lacp_sel_ports_t *cport, **last;
1117 
1118 	mutex_enter(&lacp_sel_lock);
1119 
1120 	/* check if port is already in the list */
1121 	last = &sel_ports;
1122 	for (cport = sel_ports; cport != NULL;
1123 	    last = &cport->sp_next, cport = cport->sp_next) {
1124 		if (bcmp(portp->lp_devname, cport->sp_devname,
1125 		    MAXNAMELEN + 1) == 0) {
1126 			ASSERT(cport->sp_partner_key ==
1127 			    portp->lp_lacp.PartnerOperKey);
1128 			ASSERT(ether_cmp(&cport->sp_partner_system,
1129 			    &portp->lp_lacp.PartnerOperSystem) == 0);
1130 
1131 			mutex_exit(&lacp_sel_lock);
1132 			return (B_TRUE);
1133 		}
1134 	}
1135 
1136 	/* create and initialize new entry */
1137 	new_port = kmem_zalloc(sizeof (lacp_sel_ports_t), KM_NOSLEEP);
1138 	if (new_port == NULL) {
1139 		mutex_exit(&lacp_sel_lock);
1140 		return (B_FALSE);
1141 	}
1142 
1143 	new_port->sp_key = portp->lp_grp->lg_key;
1144 	bcopy(&portp->lp_lacp.PartnerOperSystem,
1145 	    &new_port->sp_partner_system, sizeof (new_port->sp_partner_system));
1146 	new_port->sp_partner_key = portp->lp_lacp.PartnerOperKey;
1147 	bcopy(portp->lp_devname, new_port->sp_devname, MAXNAMELEN + 1);
1148 
1149 	*last = new_port;
1150 
1151 	mutex_exit(&lacp_sel_lock);
1152 	return (B_TRUE);
1153 }
1154 
1155 /*
1156  * lacp_selection_logic - LACP selection logic
1157  *		Sets the selected variable on a per port basis
1158  *		and sets Ready when all waiting ports are ready
1159  *		to go online.
1160  *
1161  * parameters:
1162  *      - portp - instance this applies to.
1163  *
1164  * invoked:
1165  *    - when initialization is needed
1166  *    - when UNSELECTED is set from the lacp_receive_sm() in LACP_CURRENT state
1167  *    - When the lacp_receive_sm goes to the LACP_DEFAULTED state
1168  *    - every time the wait_while_timer pops
1169  *    - everytime we turn LACP on/off
1170  */
1171 static void
1172 lacp_selection_logic(aggr_port_t *portp)
1173 {
1174 	aggr_port_t *tpp;
1175 	aggr_grp_t *aggrp = portp->lp_grp;
1176 	int ports_waiting;
1177 	boolean_t reset_mac = B_FALSE;
1178 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1179 
1180 	ASSERT(AGGR_LACP_LOCK_HELD(aggrp));
1181 
1182 	/* LACP_OFF state not in specification so check here.  */
1183 	if (!pl->sm.lacp_on) {
1184 		lacp_port_unselect(portp);
1185 		aggrp->aggr.ready = B_FALSE;
1186 		lacp_mux_sm(portp);
1187 		return;
1188 	}
1189 
1190 	if (pl->sm.begin || !pl->sm.lacp_enabled ||
1191 	    (portp->lp_state != AGGR_PORT_STATE_ATTACHED)) {
1192 
1193 		AGGR_LACP_DBG(("lacp_selection_logic:(%s): "
1194 		    "selected %d-->%d (begin=%d, lacp_enabled = %d, "
1195 		    "lp_state=%d)\n", portp->lp_devname, pl->sm.selected,
1196 		    AGGR_UNSELECTED, pl->sm.begin, pl->sm.lacp_enabled,
1197 		    portp->lp_state));
1198 
1199 		lacp_port_unselect(portp);
1200 		aggrp->aggr.ready = B_FALSE;
1201 		lacp_mux_sm(portp);
1202 		return;
1203 	}
1204 
1205 	/*
1206 	 * If LACP is not enabled then selected is never set.
1207 	 */
1208 	if (!pl->sm.lacp_enabled) {
1209 		AGGR_LACP_DBG(("lacp_selection_logic:(%s): selected %d-->%d\n",
1210 		    portp->lp_devname, pl->sm.selected, AGGR_UNSELECTED));
1211 
1212 		lacp_port_unselect(portp);
1213 		lacp_mux_sm(portp);
1214 		return;
1215 	}
1216 
1217 	/*
1218 	 * Check if the Partner MAC or Key are zero. If so, we have
1219 	 * not received any LACP info or it has expired and the
1220 	 * receive machine is in the LACP_DEFAULTED state.
1221 	 */
1222 	if (ether_cmp(&pl->PartnerOperSystem, &etherzeroaddr) == 0 ||
1223 	    (pl->PartnerOperKey == 0)) {
1224 
1225 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1226 			if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1227 			    &etherzeroaddr) != 0 &&
1228 			    (tpp->lp_lacp.PartnerOperKey != 0))
1229 				break;
1230 		}
1231 
1232 		/*
1233 		 * If all ports have no key or aggregation address,
1234 		 * then clear the negotiated Partner MAC and key.
1235 		 */
1236 		if (tpp == NULL) {
1237 			/* Clear the aggregation Partner MAC and key */
1238 			aggrp->aggr.PartnerSystem = etherzeroaddr;
1239 			aggrp->aggr.PartnerOperAggrKey = 0;
1240 		}
1241 
1242 		return;
1243 	}
1244 
1245 	/*
1246 	 * Insure that at least one port in the aggregation
1247 	 * matches the Partner aggregation MAC and key. If not,
1248 	 * then clear the aggregation MAC and key. Later we will
1249 	 * set the Partner aggregation MAC and key to that of the
1250 	 * current port's Partner MAC and key.
1251 	 */
1252 	if (ether_cmp(&pl->PartnerOperSystem,
1253 	    &aggrp->aggr.PartnerSystem) != 0 ||
1254 	    (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) {
1255 
1256 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1257 			if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1258 			    &aggrp->aggr.PartnerSystem) == 0 &&
1259 			    (tpp->lp_lacp.PartnerOperKey ==
1260 			    aggrp->aggr.PartnerOperAggrKey))
1261 				break;
1262 		}
1263 
1264 		if (tpp == NULL) {
1265 			/* Clear the aggregation Partner MAC and key */
1266 			aggrp->aggr.PartnerSystem = etherzeroaddr;
1267 			aggrp->aggr.PartnerOperAggrKey = 0;
1268 			reset_mac = B_TRUE;
1269 		}
1270 	}
1271 
1272 	/*
1273 	 * If our Actor MAC is found in the Partner MAC
1274 	 * on this port then we have a loopback misconfiguration.
1275 	 */
1276 	if (ether_cmp(&pl->PartnerOperSystem,
1277 	    (struct ether_addr *)&aggrp->lg_addr) == 0) {
1278 		cmn_err(CE_NOTE, "trunk link: (%s): Loopback condition.\n",
1279 		    portp->lp_devname);
1280 
1281 		lacp_port_unselect(portp);
1282 		lacp_mux_sm(portp);
1283 		return;
1284 	}
1285 
1286 	/*
1287 	 * If our Partner MAC and Key are found on any other
1288 	 * ports that are not in our aggregation, we have
1289 	 * a misconfiguration.
1290 	 */
1291 	if (lacp_misconfig_check(portp)) {
1292 		lacp_mux_sm(portp);
1293 		return;
1294 	}
1295 
1296 	/*
1297 	 * If the Aggregation Partner MAC and Key have not been
1298 	 * set, then this is either the first port or the aggregation
1299 	 * MAC and key have been reset. In either case we must set
1300 	 * the values of the Partner MAC and key.
1301 	 */
1302 	if (ether_cmp(&aggrp->aggr.PartnerSystem, &etherzeroaddr) == 0 &&
1303 	    (aggrp->aggr.PartnerOperAggrKey == 0)) {
1304 		/* Set aggregation Partner MAC and key */
1305 		aggrp->aggr.PartnerSystem = pl->PartnerOperSystem;
1306 		aggrp->aggr.PartnerOperAggrKey = pl->PartnerOperKey;
1307 
1308 		/*
1309 		 * If we reset Partner aggregation MAC, then restart
1310 		 * selection_logic on ports that match new MAC address.
1311 		 */
1312 		if (reset_mac) {
1313 			for (tpp = aggrp->lg_ports; tpp; tpp =
1314 			    tpp->lp_next) {
1315 				if (tpp == portp)
1316 					continue;
1317 				if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1318 				    &aggrp->aggr.PartnerSystem) == 0 &&
1319 				    (tpp->lp_lacp.PartnerOperKey ==
1320 				    aggrp->aggr.PartnerOperAggrKey))
1321 					lacp_selection_logic(tpp);
1322 			}
1323 		}
1324 	} else if (ether_cmp(&pl->PartnerOperSystem,
1325 	    &aggrp->aggr.PartnerSystem) != 0 ||
1326 	    (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) {
1327 		/*
1328 		 * The Partner port information does not match
1329 		 * that of the other ports in the aggregation
1330 		 * so disable this port.
1331 		 */
1332 		lacp_port_unselect(portp);
1333 
1334 		cmn_err(CE_NOTE, "trunk link: (%s): Port Partner MAC or"
1335 		    " key (%d) incompatible with Aggregation Partner "
1336 		    "MAC or key (%d)\n",
1337 		    portp->lp_devname, pl->PartnerOperKey,
1338 		    aggrp->aggr.PartnerOperAggrKey);
1339 
1340 		lacp_mux_sm(portp);
1341 		return;
1342 	}
1343 
1344 	/* If we get to here, automatically set selected */
1345 	if (pl->sm.selected != AGGR_SELECTED) {
1346 		AGGR_LACP_DBG(("lacp_selection_logic:(%s): "
1347 		    "selected %d-->%d\n", portp->lp_devname,
1348 		    pl->sm.selected, AGGR_SELECTED));
1349 		if (!lacp_port_select(portp))
1350 			return;
1351 		lacp_mux_sm(portp);
1352 	}
1353 
1354 	/*
1355 	 * From this point onward we have selected the port
1356 	 * and are simply checking if the Ready flag should
1357 	 * be set.
1358 	 */
1359 
1360 	/*
1361 	 * If at least two ports are waiting to aggregate
1362 	 * and ready_n is set on all ports waiting to aggregate
1363 	 * then set READY for the aggregation.
1364 	 */
1365 
1366 	ports_waiting = 0;
1367 
1368 	if (!aggrp->aggr.ready) {
1369 		/*
1370 		 * If all ports in the aggregation have received compatible
1371 		 * partner information and they match up correctly with the
1372 		 * switch, there is no need to wait for all the
1373 		 * wait_while_timers to pop.
1374 		 */
1375 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1376 			if (((tpp->lp_lacp.sm.mux_state == LACP_WAITING) ||
1377 			    tpp->lp_lacp.sm.begin) &&
1378 			    !pl->PartnerOperPortState.bit.sync) {
1379 				/* Add up ports uninitialized or waiting */
1380 				ports_waiting++;
1381 				if (!tpp->lp_lacp.sm.ready_n)
1382 					return;
1383 			}
1384 		}
1385 	}
1386 
1387 	if (aggrp->aggr.ready) {
1388 		AGGR_LACP_DBG(("lacp_selection_logic:(%s): "
1389 		    "aggr.ready already set\n", portp->lp_devname));
1390 		lacp_mux_sm(portp);
1391 	} else {
1392 		AGGR_LACP_DBG(("lacp_selection_logic:(%s): Ready %d-->%d\n",
1393 		    portp->lp_devname, aggrp->aggr.ready, B_TRUE));
1394 		aggrp->aggr.ready = B_TRUE;
1395 
1396 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next)
1397 			lacp_mux_sm(tpp);
1398 	}
1399 
1400 }
1401 
1402 /*
1403  * wait_while_timer_pop - When the timer pops, we arrive here to
1404  *			set ready_n and trigger the selection logic.
1405  */
1406 static void
1407 wait_while_timer_pop(void *data)
1408 {
1409 	aggr_port_t *portp = data;
1410 
1411 	if (portp->lp_closing)
1412 		return;
1413 
1414 	AGGR_LACP_LOCK(portp->lp_grp);
1415 
1416 	AGGR_LACP_DBG(("trunk link:(%s): wait_while_timer pop \n",
1417 	    portp->lp_devname));
1418 	portp->lp_lacp.wait_while_timer.id = 0;
1419 	portp->lp_lacp.sm.ready_n = B_TRUE;
1420 
1421 	lacp_selection_logic(portp);
1422 	AGGR_LACP_UNLOCK(portp->lp_grp);
1423 }
1424 
1425 static void
1426 start_wait_while_timer(aggr_port_t *portp)
1427 {
1428 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1429 
1430 	if (portp->lp_lacp.wait_while_timer.id == 0) {
1431 		portp->lp_lacp.wait_while_timer.id =
1432 		    timeout(wait_while_timer_pop, portp,
1433 		    drv_usectohz(1000000 *
1434 		    portp->lp_lacp.wait_while_timer.val));
1435 	}
1436 }
1437 
1438 
1439 static void
1440 stop_wait_while_timer(portp)
1441 aggr_port_t *portp;
1442 {
1443 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1444 
1445 	if (portp->lp_lacp.wait_while_timer.id != 0) {
1446 		AGGR_LACP_UNLOCK(portp->lp_grp);
1447 		(void) untimeout(portp->lp_lacp.wait_while_timer.id);
1448 		AGGR_LACP_LOCK(portp->lp_grp);
1449 		portp->lp_lacp.wait_while_timer.id = 0;
1450 	}
1451 }
1452 
1453 /*
1454  * Invoked when a port has been attached to a group.
1455  * Complete the processing that couldn't be finished from lacp_on()
1456  * because the port was not started. We know that the link is full
1457  * duplex and ON, otherwise it wouldn't be attached.
1458  */
1459 void
1460 aggr_lacp_port_attached(aggr_port_t *portp)
1461 {
1462 	aggr_grp_t *grp = portp->lp_grp;
1463 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1464 
1465 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
1466 	ASSERT(portp->lp_state == AGGR_PORT_STATE_ATTACHED);
1467 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
1468 
1469 	AGGR_LACP_DBG(("aggr_lacp_port_attached: port %s\n",
1470 	    portp->lp_devname));
1471 
1472 	portp->lp_lacp.sm.port_enabled = B_TRUE;	/* link on */
1473 
1474 	if (grp->lg_lacp_mode == AGGR_LACP_OFF) {
1475 		pl->ActorAdminPortState.bit.activity =
1476 		    pl->ActorOperPortState.bit.activity = B_FALSE;
1477 
1478 		/* Turn ON Collector_Distributor */
1479 		aggr_set_coll_dist_locked(portp, B_TRUE);
1480 
1481 		return;
1482 	}
1483 
1484 	pl->ActorAdminPortState.bit.activity =
1485 	    pl->ActorOperPortState.bit.activity =
1486 	    (grp->lg_lacp_mode == AGGR_LACP_ACTIVE);
1487 
1488 	pl->ActorAdminPortState.bit.timeout =
1489 	    pl->ActorOperPortState.bit.timeout =
1490 	    (grp->aggr.PeriodicTimer == AGGR_LACP_TIMER_SHORT);
1491 
1492 	pl->sm.lacp_enabled = B_TRUE;
1493 	pl->ActorOperPortState.bit.aggregation = B_TRUE;
1494 	pl->sm.begin = B_TRUE;
1495 
1496 	if (!pl->sm.lacp_on) {
1497 		/* Turn OFF Collector_Distributor */
1498 		aggr_set_coll_dist_locked(portp, B_FALSE);
1499 
1500 		lacp_on(portp);
1501 	} else {
1502 		lacp_receive_sm(portp, NULL);
1503 		lacp_mux_sm(portp);
1504 
1505 		/* Enable Multicast Slow Protocol address */
1506 		aggr_lacp_mcast_on(portp);
1507 
1508 		/* periodic_sm is started up from the receive machine */
1509 		lacp_selection_logic(portp);
1510 	}
1511 }
1512 
1513 /*
1514  * Invoked when a port has been detached from a group. Turn off
1515  * LACP processing if it was enabled.
1516  */
1517 void
1518 aggr_lacp_port_detached(aggr_port_t *portp)
1519 {
1520 	aggr_grp_t *grp = portp->lp_grp;
1521 
1522 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
1523 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
1524 
1525 	AGGR_LACP_DBG(("aggr_lacp_port_detached: port %s\n",
1526 	    portp->lp_devname));
1527 
1528 	portp->lp_lacp.sm.port_enabled = B_FALSE;
1529 
1530 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
1531 		return;
1532 
1533 	/* Disable Slow Protocol PDUs */
1534 	lacp_off(portp);
1535 }
1536 
1537 
1538 /*
1539  * Invoked after the outbound port selection policy has been changed.
1540  */
1541 void
1542 aggr_lacp_policy_changed(aggr_grp_t *grp)
1543 {
1544 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
1545 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1546 
1547 	/* suspend transmission for CollectorMaxDelay time */
1548 	delay(grp->aggr.CollectorMaxDelay * 10);
1549 }
1550 
1551 
1552 /*
1553  * Enable Slow Protocol LACP and Marker PDUs.
1554  */
1555 static void
1556 lacp_on(aggr_port_t *portp)
1557 {
1558 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1559 	ASSERT(RW_WRITE_HELD(&portp->lp_grp->lg_lock));
1560 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
1561 
1562 	/*
1563 	 * Reset the state machines and Partner operational
1564 	 * information. Careful to not reset things like
1565 	 * our link state.
1566 	 */
1567 	lacp_reset_port(portp);
1568 	portp->lp_lacp.sm.lacp_on = B_TRUE;
1569 
1570 	AGGR_LACP_DBG(("lacp_on:(%s): \n", portp->lp_devname));
1571 
1572 	lacp_receive_sm(portp, NULL);
1573 	lacp_mux_sm(portp);
1574 
1575 	if (portp->lp_state != AGGR_PORT_STATE_ATTACHED)
1576 		return;
1577 
1578 	/* Enable Multicast Slow Protocol address */
1579 	aggr_lacp_mcast_on(portp);
1580 
1581 	/* periodic_sm is started up from the receive machine */
1582 	lacp_selection_logic(portp);
1583 } /* lacp_on */
1584 
1585 
1586 /* Disable Slow Protocol LACP and Marker PDUs */
1587 static void
1588 lacp_off(aggr_port_t *portp)
1589 {
1590 	aggr_grp_t *grp = portp->lp_grp;
1591 
1592 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1593 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1594 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
1595 
1596 	portp->lp_lacp.sm.lacp_on = B_FALSE;
1597 
1598 	AGGR_LACP_DBG(("lacp_off:(%s): \n", portp->lp_devname));
1599 
1600 	/*
1601 	 * Disable Slow Protocol Timers. We must temporarely release
1602 	 * the group and port locks in order to avod deadlocks. Make
1603 	 * sure that the port nor the group are closing after re-acquiring
1604 	 * their locks.
1605 	 */
1606 	rw_exit(&portp->lp_lock);
1607 	rw_exit(&grp->lg_lock);
1608 
1609 	stop_periodic_timer(portp);
1610 	stop_current_while_timer(portp);
1611 	stop_wait_while_timer(portp);
1612 
1613 	rw_enter(&grp->lg_lock, RW_WRITER);
1614 	rw_enter(&portp->lp_lock, RW_WRITER);
1615 
1616 	if (!portp->lp_closing && !grp->lg_closing) {
1617 		lacp_mux_sm(portp);
1618 		lacp_periodic_sm(portp);
1619 		lacp_selection_logic(portp);
1620 	}
1621 
1622 	/* Turn OFF Collector_Distributor */
1623 	aggr_set_coll_dist_locked(portp, B_FALSE);
1624 
1625 	/* Disable Multicast Slow Protocol address */
1626 	aggr_lacp_mcast_off(portp);
1627 
1628 	lacp_reset_port(portp);
1629 }
1630 
1631 
1632 static boolean_t
1633 valid_lacp_pdu(aggr_port_t *portp, lacp_t *lacp)
1634 {
1635 	/*
1636 	 * 43.4.12 - "a Receive machine shall not validate
1637 	 * the Version Number, TLV_type, or Reserved fields in received
1638 	 * LACPDUs."
1639 	 * ... "a Receive machine may validate the Actor_Information_Length,
1640 	 * Partner_Information_Length, Collector_Information_Length,
1641 	 * or Terminator_Length fields."
1642 	 */
1643 	if ((lacp->actor_info.information_len != sizeof (link_info_t)) ||
1644 	    (lacp->partner_info.information_len != sizeof (link_info_t)) ||
1645 	    (lacp->collector_len != LACP_COLLECTOR_INFO_LEN) ||
1646 	    (lacp->terminator_len != LACP_TERMINATOR_INFO_LEN)) {
1647 		AGGR_LACP_DBG(("trunk link (%s): Malformed LACPDU: "
1648 		    " Terminator Length = %d \n", portp->lp_devname,
1649 		    lacp->terminator_len));
1650 		return (B_FALSE);
1651 	}
1652 
1653 	return (B_TRUE);
1654 }
1655 
1656 
1657 static void
1658 start_current_while_timer(aggr_port_t *portp, uint_t time)
1659 {
1660 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1661 
1662 	if (portp->lp_lacp.current_while_timer.id == 0) {
1663 		if (time > 0) {
1664 			portp->lp_lacp.current_while_timer.val = time;
1665 		} else if (portp->lp_lacp.ActorOperPortState.bit.timeout) {
1666 			portp->lp_lacp.current_while_timer.val =
1667 			    SHORT_TIMEOUT_TIME;
1668 		} else {
1669 			portp->lp_lacp.current_while_timer.val =
1670 			    LONG_TIMEOUT_TIME;
1671 		}
1672 
1673 		portp->lp_lacp.current_while_timer.id =
1674 		    timeout(current_while_timer_pop, portp,
1675 		    drv_usectohz((clock_t)1000000 *
1676 		    (clock_t)portp->lp_lacp.current_while_timer.val));
1677 	}
1678 }
1679 
1680 
1681 static void
1682 stop_current_while_timer(aggr_port_t *portp)
1683 {
1684 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1685 
1686 	if (portp->lp_lacp.current_while_timer.id != 0) {
1687 		AGGR_LACP_UNLOCK(portp->lp_grp);
1688 		(void) untimeout(portp->lp_lacp.current_while_timer.id);
1689 		AGGR_LACP_LOCK(portp->lp_grp);
1690 		portp->lp_lacp.current_while_timer.id = 0;
1691 	}
1692 }
1693 
1694 
1695 static void
1696 current_while_timer_pop(void *data)
1697 {
1698 	aggr_port_t *portp = (aggr_port_t *)data;
1699 
1700 	if (portp->lp_closing)
1701 		return;
1702 
1703 	AGGR_LACP_LOCK(portp->lp_grp);
1704 
1705 	AGGR_LACP_DBG(("trunk link:(%s): current_while_timer "
1706 	    "pop id=%p\n", portp->lp_devname,
1707 	    portp->lp_lacp.current_while_timer.id));
1708 
1709 	portp->lp_lacp.current_while_timer.id = 0;
1710 	lacp_receive_sm(portp, NULL);
1711 	AGGR_LACP_UNLOCK(portp->lp_grp);
1712 }
1713 
1714 
1715 /*
1716  * record_Default - Simply copies over administrative values
1717  * to the partner operational values, and sets our state to indicate we
1718  * are using defaulted values.
1719  */
1720 static void
1721 record_Default(aggr_port_t *portp)
1722 {
1723 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1724 
1725 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1726 
1727 	pl->PartnerOperPortNum = pl->PartnerAdminPortNum;
1728 	pl->PartnerOperPortPriority = pl->PartnerAdminPortPriority;
1729 	pl->PartnerOperSystem = pl->PartnerAdminSystem;
1730 	pl->PartnerOperSysPriority = pl->PartnerAdminSysPriority;
1731 	pl->PartnerOperKey = pl->PartnerAdminKey;
1732 	pl->PartnerOperPortState.state = pl->PartnerAdminPortState.state;
1733 
1734 	pl->ActorOperPortState.bit.defaulted = B_TRUE;
1735 }
1736 
1737 
1738 /* Returns B_TRUE on sync value changing */
1739 static boolean_t
1740 record_PDU(aggr_port_t *portp, lacp_t *lacp)
1741 {
1742 	aggr_grp_t *aggrp = portp->lp_grp;
1743 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1744 	uint8_t save_sync;
1745 
1746 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1747 
1748 	/*
1749 	 * Partner Information
1750 	 */
1751 	pl->PartnerOperPortNum = ntohs(lacp->actor_info.port);
1752 	pl->PartnerOperPortPriority =
1753 	    ntohs(lacp->actor_info.port_priority);
1754 	pl->PartnerOperSystem = lacp->actor_info.system_id;
1755 	pl->PartnerOperSysPriority =
1756 	    htons(lacp->actor_info.system_priority);
1757 	pl->PartnerOperKey = ntohs(lacp->actor_info.key);
1758 
1759 	/* All state info except for Synchronization */
1760 	save_sync = pl->PartnerOperPortState.bit.sync;
1761 	pl->PartnerOperPortState.state = lacp->actor_info.state.state;
1762 
1763 	/* Defaulted set to FALSE */
1764 	pl->ActorOperPortState.bit.defaulted = B_FALSE;
1765 
1766 	/*
1767 	 * 43.4.9 - (Partner_Port, Partner_Port_Priority, Partner_system,
1768 	 *		Partner_System_Priority, Partner_Key, and
1769 	 *		Partner_State.Aggregation) are compared to the
1770 	 *		corresponding operations paramters values for
1771 	 *		the Actor. If these are equal, or if this is
1772 	 *		an individual link, we are synchronized.
1773 	 */
1774 	if (((ntohs(lacp->partner_info.port) == pl->ActorPortNumber) &&
1775 	    (ntohs(lacp->partner_info.port_priority) ==
1776 	    pl->ActorPortPriority) &&
1777 	    (ether_cmp(&lacp->partner_info.system_id,
1778 		(struct ether_addr *)&aggrp->lg_addr) == 0) &&
1779 	    (ntohs(lacp->partner_info.system_priority) ==
1780 	    aggrp->aggr.ActorSystemPriority) &&
1781 	    (ntohs(lacp->partner_info.key) == pl->ActorOperPortKey) &&
1782 	    (lacp->partner_info.state.bit.aggregation ==
1783 	    pl->ActorOperPortState.bit.aggregation)) ||
1784 	    (!lacp->actor_info.state.bit.aggregation)) {
1785 
1786 		pl->PartnerOperPortState.bit.sync =
1787 		    lacp->actor_info.state.bit.sync;
1788 	} else {
1789 		pl->PartnerOperPortState.bit.sync = B_FALSE;
1790 	}
1791 
1792 	if (save_sync != pl->PartnerOperPortState.bit.sync) {
1793 		AGGR_LACP_DBG(("record_PDU:(%s): partner sync "
1794 		    "%d -->%d\n", portp->lp_devname, save_sync,
1795 		    pl->PartnerOperPortState.bit.sync));
1796 		return (B_TRUE);
1797 	} else {
1798 		return (B_FALSE);
1799 	}
1800 }
1801 
1802 
1803 /*
1804  * update_selected - If any of the Partner parameters has
1805  *			changed from a previous value, then
1806  *			unselect the link from the aggregator.
1807  */
1808 static boolean_t
1809 update_selected(aggr_port_t *portp, lacp_t *lacp)
1810 {
1811 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1812 
1813 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1814 
1815 	if ((pl->PartnerOperPortNum != ntohs(lacp->actor_info.port)) ||
1816 	    (pl->PartnerOperPortPriority !=
1817 	    ntohs(lacp->actor_info.port_priority)) ||
1818 	    (ether_cmp(&pl->PartnerOperSystem,
1819 	    &lacp->actor_info.system_id) != 0) ||
1820 	    (pl->PartnerOperSysPriority !=
1821 	    ntohs(lacp->actor_info.system_priority)) ||
1822 	    (pl->PartnerOperKey != ntohs(lacp->actor_info.key)) ||
1823 	    (pl->PartnerOperPortState.bit.aggregation !=
1824 	    lacp->actor_info.state.bit.aggregation)) {
1825 		AGGR_LACP_DBG(("update_selected:(%s): "
1826 		    "selected  %d-->%d\n", portp->lp_devname, pl->sm.selected,
1827 		    AGGR_UNSELECTED));
1828 
1829 		lacp_port_unselect(portp);
1830 		return (B_TRUE);
1831 	} else {
1832 		return (B_FALSE);
1833 	}
1834 }
1835 
1836 
1837 /*
1838  * update_default_selected - If any of the operational Partner parameters
1839  *			is different than that of the administrative values
1840  *			then unselect the link from the aggregator.
1841  */
1842 static void
1843 update_default_selected(aggr_port_t *portp)
1844 {
1845 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1846 
1847 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1848 
1849 	if ((pl->PartnerAdminPortNum != pl->PartnerOperPortNum) ||
1850 	    (pl->PartnerOperPortPriority != pl->PartnerAdminPortPriority) ||
1851 	    (ether_cmp(&pl->PartnerOperSystem, &pl->PartnerAdminSystem) != 0) ||
1852 	    (pl->PartnerOperSysPriority != pl->PartnerAdminSysPriority) ||
1853 	    (pl->PartnerOperKey != pl->PartnerAdminKey) ||
1854 	    (pl->PartnerOperPortState.bit.aggregation !=
1855 	    pl->PartnerAdminPortState.bit.aggregation)) {
1856 
1857 		AGGR_LACP_DBG(("update_default_selected:(%s): "
1858 		    "selected  %d-->%d\n", portp->lp_devname,
1859 		    pl->sm.selected, AGGR_UNSELECTED));
1860 
1861 		lacp_port_unselect(portp);
1862 	}
1863 }
1864 
1865 
1866 /*
1867  * update_NTT - If any of the Partner values in the received LACPDU
1868  *			are different than that of the Actor operational
1869  *			values then set NTT to true.
1870  */
1871 static void
1872 update_NTT(aggr_port_t *portp, lacp_t *lacp)
1873 {
1874 	aggr_grp_t *aggrp = portp->lp_grp;
1875 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1876 
1877 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1878 
1879 	if ((pl->ActorPortNumber != ntohs(lacp->partner_info.port)) ||
1880 	    (pl->ActorPortPriority !=
1881 	    ntohs(lacp->partner_info.port_priority)) ||
1882 	    (ether_cmp(&aggrp->lg_addr,
1883 	    &lacp->partner_info.system_id) != 0) ||
1884 	    (aggrp->aggr.ActorSystemPriority !=
1885 	    ntohs(lacp->partner_info.system_priority)) ||
1886 	    (pl->ActorOperPortKey != ntohs(lacp->partner_info.key)) ||
1887 	    (pl->ActorOperPortState.bit.activity !=
1888 	    lacp->partner_info.state.bit.activity) ||
1889 	    (pl->ActorOperPortState.bit.timeout !=
1890 	    lacp->partner_info.state.bit.timeout) ||
1891 	    (pl->ActorOperPortState.bit.sync !=
1892 	    lacp->partner_info.state.bit.sync) ||
1893 	    (pl->ActorOperPortState.bit.aggregation !=
1894 	    lacp->partner_info.state.bit.aggregation)) {
1895 
1896 		AGGR_LACP_DBG(("update_NTT:(%s): NTT  %d-->%d\n",
1897 		    portp->lp_devname, pl->NTT, B_TRUE));
1898 
1899 		pl->NTT = B_TRUE;
1900 	}
1901 }
1902 
1903 /*
1904  * lacp_receive_sm - LACP receive state machine
1905  *
1906  * parameters:
1907  *      - portp - instance this applies to.
1908  *      - lacp - pointer in the case of a received LACPDU.
1909  *                This value is NULL if there is no LACPDU.
1910  *
1911  * invoked:
1912  *    - when initialization is needed
1913  *    - upon reception of an LACPDU. This is the common case.
1914  *    - every time the current_while_timer pops
1915  */
1916 static void
1917 lacp_receive_sm(aggr_port_t *portp, lacp_t *lacp)
1918 {
1919 	boolean_t sync_updated, selected_updated, save_activity;
1920 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1921 	lacp_receive_state_t oldstate = pl->sm.receive_state;
1922 
1923 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1924 
1925 	/* LACP_OFF state not in specification so check here.  */
1926 	if (!pl->sm.lacp_on)
1927 		return;
1928 
1929 	/* figure next state */
1930 	if (pl->sm.begin || pl->sm.port_moved) {
1931 		pl->sm.receive_state = LACP_INITIALIZE;
1932 	} else if (!pl->sm.port_enabled) {	/* DL_NOTE_LINK_DOWN */
1933 		pl->sm.receive_state = LACP_PORT_DISABLED;
1934 	} else if (!pl->sm.lacp_enabled) { /* DL_NOTE_AGGR_UNAVAIL */
1935 		pl->sm.receive_state =
1936 		    (pl->sm.receive_state == LACP_PORT_DISABLED) ?
1937 		    LACP_DISABLED : LACP_PORT_DISABLED;
1938 	} else if (lacp != NULL) {
1939 		if ((pl->sm.receive_state == LACP_EXPIRED) ||
1940 		    (pl->sm.receive_state == LACP_DEFAULTED)) {
1941 			pl->sm.receive_state = LACP_CURRENT;
1942 		}
1943 	} else if ((pl->sm.receive_state == LACP_CURRENT) &&
1944 	    (pl->current_while_timer.id == 0)) {
1945 		pl->sm.receive_state = LACP_EXPIRED;
1946 	} else if ((pl->sm.receive_state == LACP_EXPIRED) &&
1947 	    (pl->current_while_timer.id == 0)) {
1948 		pl->sm.receive_state = LACP_DEFAULTED;
1949 	}
1950 
1951 
1952 	if (!((lacp && (oldstate == LACP_CURRENT) &&
1953 	    (pl->sm.receive_state == LACP_CURRENT)))) {
1954 		AGGR_LACP_DBG(("lacp_receive_sm(%s):%s--->%s\n",
1955 		    portp->lp_devname, lacp_receive_str[oldstate],
1956 		    lacp_receive_str[pl->sm.receive_state]));
1957 	}
1958 
1959 	switch (pl->sm.receive_state) {
1960 	case LACP_INITIALIZE:
1961 		lacp_port_unselect(portp);
1962 		record_Default(portp);
1963 		pl->ActorOperPortState.bit.expired = B_FALSE;
1964 		pl->sm.port_moved = B_FALSE;
1965 		pl->sm.receive_state = LACP_PORT_DISABLED;
1966 		pl->sm.begin = B_FALSE;
1967 		lacp_receive_sm(portp, NULL);
1968 		break;
1969 
1970 	case LACP_PORT_DISABLED:
1971 		pl->PartnerOperPortState.bit.sync = B_FALSE;
1972 		/*
1973 		 * Stop current_while_timer in case
1974 		 * we got here from link down
1975 		 */
1976 		stop_current_while_timer(portp);
1977 
1978 		if (pl->sm.port_enabled && !pl->sm.lacp_enabled) {
1979 			pl->sm.receive_state = LACP_DISABLED;
1980 			lacp_receive_sm(portp, lacp);
1981 			/* We goto LACP_DISABLED state */
1982 			break;
1983 		} else if (pl->sm.port_enabled && pl->sm.lacp_enabled) {
1984 			pl->sm.receive_state = LACP_EXPIRED;
1985 			/*
1986 			 * FALL THROUGH TO LACP_EXPIRED CASE:
1987 			 * We have no way of knowing if we get into
1988 			 * lacp_receive_sm() from a  current_while_timer
1989 			 * expiring as it has never been kicked off yet!
1990 			 */
1991 		} else {
1992 			/* We stay in LACP_PORT_DISABLED state */
1993 			break;
1994 		}
1995 		/* LACP_PORT_DISABLED -> LACP_EXPIRED */
1996 		/* FALLTHROUGH */
1997 
1998 	case LACP_EXPIRED:
1999 		/*
2000 		 * Arrives here from LACP_PORT_DISABLED state as well as
2001 		 * as well as current_while_timer expiring.
2002 		 */
2003 		pl->PartnerOperPortState.bit.sync = B_FALSE;
2004 		pl->PartnerOperPortState.bit.timeout = B_TRUE;
2005 
2006 		pl->ActorOperPortState.bit.expired = B_TRUE;
2007 		start_current_while_timer(portp, SHORT_TIMEOUT_TIME);
2008 		lacp_periodic_sm(portp);
2009 		break;
2010 
2011 	case LACP_DISABLED:
2012 		/*
2013 		 * This is the normal state for recv_sm when LACP_OFF
2014 		 * is set or the NIC is in half duplex mode.
2015 		 */
2016 		lacp_port_unselect(portp);
2017 		record_Default(portp);
2018 		pl->PartnerOperPortState.bit.aggregation = B_FALSE;
2019 		pl->ActorOperPortState.bit.expired = B_FALSE;
2020 		break;
2021 
2022 	case LACP_DEFAULTED:
2023 		/*
2024 		 * Current_while_timer expired a second time.
2025 		 */
2026 		update_default_selected(portp);
2027 		record_Default(portp);	/* overwrite Partner Oper val */
2028 		pl->ActorOperPortState.bit.expired = B_FALSE;
2029 		pl->PartnerOperPortState.bit.sync = B_TRUE;
2030 
2031 		lacp_selection_logic(portp);
2032 		lacp_mux_sm(portp);
2033 		break;
2034 
2035 	case LACP_CURRENT:
2036 		/*
2037 		 * Reception of LACPDU
2038 		 */
2039 
2040 		if (!lacp) /* no LACPDU so current_while_timer popped */
2041 			break;
2042 
2043 		AGGR_LACP_DBG(("lacp_receive_sm: (%s): LACPDU received:\n",
2044 		    portp->lp_devname));
2045 
2046 		/*
2047 		 * Validate Actor_Information_Length,
2048 		 * Partner_Information_Length, Collector_Information_Length,
2049 		 * and Terminator_Length fields.
2050 		 */
2051 		if (!valid_lacp_pdu(portp, lacp)) {
2052 			AGGR_LACP_DBG(("lacp_receive_sm (%s): "
2053 			    "Invalid LACPDU received\n",
2054 			    portp->lp_devname));
2055 			break;
2056 		}
2057 
2058 		save_activity = pl->PartnerOperPortState.bit.activity;
2059 		selected_updated = update_selected(portp, lacp);
2060 		update_NTT(portp, lacp);
2061 		sync_updated = record_PDU(portp, lacp);
2062 
2063 		pl->ActorOperPortState.bit.expired = B_FALSE;
2064 
2065 		if (selected_updated) {
2066 			lacp_selection_logic(portp);
2067 			lacp_mux_sm(portp);
2068 		} else if (sync_updated) {
2069 			lacp_mux_sm(portp);
2070 		}
2071 
2072 		/*
2073 		 * If the periodic timer value bit has been modified
2074 		 * or the partner activity bit has been changed then
2075 		 * we need to respectively:
2076 		 *  - restart the timer with the proper timeout value.
2077 		 *  - possibly enable/disable transmission of LACPDUs.
2078 		 */
2079 		if ((pl->PartnerOperPortState.bit.timeout &&
2080 		    (pl->periodic_timer.val != FAST_PERIODIC_TIME)) ||
2081 		    (!pl->PartnerOperPortState.bit.timeout &&
2082 		    (pl->periodic_timer.val != SLOW_PERIODIC_TIME)) ||
2083 		    (pl->PartnerOperPortState.bit.activity !=
2084 		    save_activity)) {
2085 			lacp_periodic_sm(portp);
2086 		}
2087 
2088 		stop_current_while_timer(portp);
2089 		/* Check if we need to transmit an LACPDU */
2090 		if (pl->NTT)
2091 			lacp_xmit_sm(portp);
2092 		start_current_while_timer(portp, 0);
2093 
2094 		break;
2095 	}
2096 }
2097 
2098 static void
2099 aggr_set_coll_dist(aggr_port_t *portp, boolean_t enable)
2100 {
2101 	rw_enter(&portp->lp_lock, RW_WRITER);
2102 	aggr_set_coll_dist_locked(portp, enable);
2103 	rw_exit(&portp->lp_lock);
2104 }
2105 
2106 static void
2107 aggr_set_coll_dist_locked(aggr_port_t *portp, boolean_t enable)
2108 {
2109 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
2110 
2111 	AGGR_LACP_DBG(("AGGR_SET_COLL_DIST_TYPE: (%s) %s\n",
2112 	    portp->lp_devname, enable ? "ENABLED" : "DISABLED"));
2113 
2114 	if (!enable) {
2115 		/*
2116 		 * Turn OFF Collector_Distributor.
2117 		 */
2118 		portp->lp_collector_enabled = B_FALSE;
2119 		aggr_send_port_disable(portp);
2120 		return;
2121 	}
2122 
2123 	/*
2124 	 * Turn ON Collector_Distributor.
2125 	 */
2126 
2127 	if (!portp->lp_lacp.sm.lacp_on || (portp->lp_lacp.sm.lacp_on &&
2128 	    (portp->lp_lacp.sm.mux_state == LACP_COLLECTING_DISTRIBUTING))) {
2129 		/* Port is compatible and can be aggregated */
2130 		portp->lp_collector_enabled = B_TRUE;
2131 		aggr_send_port_enable(portp);
2132 	}
2133 }
2134 
2135 /*
2136  * Process a received Marker or LACPDU.
2137  */
2138 void
2139 aggr_lacp_rx(aggr_port_t *portp, mblk_t *dmp)
2140 {
2141 	lacp_t	*lacp;
2142 
2143 	dmp->b_rptr += sizeof (struct ether_header);
2144 
2145 	if (MBLKL(dmp) < sizeof (lacp_t)) {
2146 		freemsg(dmp);
2147 		return;
2148 	}
2149 
2150 	lacp = (lacp_t *)dmp->b_rptr;
2151 
2152 	switch (lacp->subtype) {
2153 	case LACP_SUBTYPE:
2154 		AGGR_LACP_DBG(("aggr_lacp_rx:(%s): LACPDU received.\n",
2155 		    portp->lp_devname));
2156 
2157 		AGGR_LACP_LOCK(portp->lp_grp);
2158 		if (!portp->lp_lacp.sm.lacp_on) {
2159 			AGGR_LACP_UNLOCK(portp->lp_grp);
2160 			break;
2161 		}
2162 		lacp_receive_sm(portp, lacp);
2163 		AGGR_LACP_UNLOCK(portp->lp_grp);
2164 		break;
2165 
2166 	case MARKER_SUBTYPE:
2167 		AGGR_LACP_DBG(("aggr_lacp_rx:(%s): Marker Packet received.\n",
2168 		    portp->lp_devname));
2169 
2170 		(void) receive_marker_pdu(portp, dmp);
2171 		break;
2172 
2173 	default:
2174 		AGGR_LACP_DBG(("aggr_lacp_rx: (%s): "
2175 		    "Unknown Slow Protocol type %d\n",
2176 		    portp->lp_devname, lacp->subtype));
2177 		break;
2178 	}
2179 
2180 	freemsg(dmp);
2181 }
2182