xref: /titanic_51/usr/src/uts/common/io/aggr/aggr_lacp.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * IEEE 802.3ad Link Aggregation - LACP & Marker Protocol processing.
31  */
32 
33 #include <sys/types.h>
34 #include <sys/sysmacros.h>
35 #include <sys/conf.h>
36 #include <sys/cmn_err.h>
37 #include <sys/list.h>
38 #include <sys/ksynch.h>
39 #include <sys/kmem.h>
40 #include <sys/stream.h>
41 #include <sys/modctl.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/atomic.h>
45 #include <sys/stat.h>
46 #include <sys/byteorder.h>
47 #include <sys/strsun.h>
48 #include <sys/isa_defs.h>
49 
50 #include <sys/aggr.h>
51 #include <sys/aggr_impl.h>
52 
53 static struct ether_addr	etherzeroaddr = {
54 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00
55 };
56 
57 /*
58  * Slow_Protocol_Multicast address, as per IEEE 802.3ad spec.
59  */
60 static struct ether_addr   slow_multicast_addr = {
61 	0x01, 0x80, 0xc2, 0x00, 0x00, 0x02
62 };
63 
64 #ifdef DEBUG
65 /* LACP state machine debugging support */
66 static uint32_t aggr_lacp_debug = 0;
67 #define	AGGR_LACP_DBG(x)	if (aggr_lacp_debug) { (void) printf x; }
68 #else
69 #define	AGGR_LACP_DBG(x)	{}
70 #endif /* DEBUG */
71 
72 #define	NSECS_PER_SEC   1000000000ll
73 
74 /* used by lacp_misconfig_walker() */
75 typedef struct lacp_misconfig_check_state_s {
76 	aggr_port_t *cs_portp;
77 	boolean_t cs_found;
78 } lacp_misconfig_check_state_t;
79 
80 static const char *lacp_receive_str[] = LACP_RECEIVE_STATE_STRINGS;
81 static const char *lacp_periodic_str[] = LACP_PERIODIC_STRINGS;
82 static const char *lacp_mux_str[] = LACP_MUX_STRINGS;
83 
84 static uint16_t lacp_port_priority = 0x1000;
85 static uint16_t lacp_system_priority = 0x1000;
86 
87 static void periodic_timer_pop_locked(aggr_port_t *);
88 static void periodic_timer_pop(void *);
89 static void lacp_xmit_sm(aggr_port_t *);
90 static void lacp_periodic_sm(aggr_port_t *);
91 static void fill_lacp_pdu(aggr_port_t *, lacp_t *);
92 static void fill_lacp_ether(aggr_port_t *, struct ether_header *);
93 static void lacp_on(aggr_port_t *);
94 static void lacp_off(aggr_port_t *);
95 static boolean_t valid_lacp_pdu(aggr_port_t *, lacp_t *);
96 static void lacp_receive_sm(aggr_port_t *, lacp_t *);
97 static void aggr_set_coll_dist(aggr_port_t *, boolean_t);
98 static void aggr_set_coll_dist_locked(aggr_port_t *, boolean_t);
99 static void start_wait_while_timer(aggr_port_t *);
100 static void stop_wait_while_timer(aggr_port_t *);
101 static void lacp_reset_port(aggr_port_t *);
102 static void stop_current_while_timer(aggr_port_t *);
103 static void current_while_timer_pop(void *);
104 static void update_default_selected(aggr_port_t *);
105 static boolean_t update_selected(aggr_port_t *, lacp_t *);
106 
107 static int
108 inst_num(char *devname)
109 {
110 	int inst = 0;
111 	int fact = 1;
112 	char *p = &devname[strlen(devname)-1];
113 
114 	while (*p >= '0' && *p <= '9' && p >= devname) {
115 		inst += (*p - '0') * fact;
116 		fact *= 10;
117 		p--;
118 	}
119 
120 	return (inst);
121 }
122 
123 /*
124  * Initialize group specific LACP state and parameters.
125  */
126 void
127 aggr_lacp_init_grp(aggr_grp_t *aggrp)
128 {
129 	aggrp->aggr.PeriodicTimer = AGGR_LACP_TIMER_SHORT;
130 	aggrp->aggr.ActorSystemPriority = (uint16_t)lacp_system_priority;
131 	aggrp->aggr.CollectorMaxDelay = 10;
132 	aggrp->lg_lacp_mode = AGGR_LACP_OFF;
133 	aggrp->aggr.ready = B_FALSE;
134 }
135 
136 /*
137  * Complete LACP info initialization at port creation time.
138  */
139 void
140 aggr_lacp_init_port(aggr_port_t *portp)
141 {
142 	aggr_grp_t *aggrp = portp->lp_grp;
143 	aggr_lacp_port_t *pl = &portp->lp_lacp;
144 	uint16_t offset;
145 	uint32_t instance;
146 
147 	ASSERT(AGGR_LACP_LOCK_HELD(aggrp));
148 	ASSERT(RW_WRITE_HELD(&aggrp->lg_lock) || RW_READ_HELD(&aggrp->lg_lock));
149 	ASSERT(RW_WRITE_HELD(&portp->lp_lock) || RW_READ_HELD(&portp->lp_lock));
150 
151 	/*
152 	 * Port numbers must be unique. For now, we encode the first two
153 	 * characters into the top byte of the port number. This will work
154 	 * with multiple types of NICs provided that the first two
155 	 * characters are unique.
156 	 */
157 	offset = ((portp->lp_devname[0] + portp->lp_devname[1]) << 8);
158 	instance = inst_num(portp->lp_devname);
159 	/* actor port # */
160 	pl->ActorPortNumber = offset + instance + portp->lp_port;
161 	AGGR_LACP_DBG(("aggr_lacp_init_port(%s/%d): "
162 	    "ActorPortNumber = 0x%x\n", portp->lp_devname,
163 	    portp->lp_port, pl->ActorPortNumber));
164 
165 	pl->ActorPortPriority = (uint16_t)lacp_port_priority;
166 	pl->ActorPortAggrId = 0;	/* aggregator id - not used */
167 	pl->NTT = B_FALSE;			/* need to transmit */
168 
169 	pl->ActorAdminPortKey = aggrp->lg_key;
170 	pl->ActorOperPortKey = pl->ActorAdminPortKey;
171 	AGGR_LACP_DBG(("aggr_lacp_init_port(%s/%d) "
172 	    "ActorAdminPortKey = 0x%x, ActorAdminPortKey = 0x%x\n",
173 	    portp->lp_devname, portp->lp_port, pl->ActorAdminPortKey,
174 	    pl->ActorOperPortKey));
175 
176 	/* Actor admin. port state */
177 	pl->ActorAdminPortState.bit.activity = B_FALSE;
178 	pl->ActorAdminPortState.bit.timeout = B_TRUE;
179 	pl->ActorAdminPortState.bit.aggregation = B_TRUE;
180 	pl->ActorAdminPortState.bit.sync = B_FALSE;
181 	pl->ActorAdminPortState.bit.collecting = B_FALSE;
182 	pl->ActorAdminPortState.bit.distributing = B_FALSE;
183 	pl->ActorAdminPortState.bit.defaulted = B_FALSE;
184 	pl->ActorAdminPortState.bit.expired = B_FALSE;
185 	pl->ActorOperPortState = pl->ActorAdminPortState;
186 
187 	/*
188 	 * Partner Administrative Information
189 	 * (All initialized to zero except for the following)
190 	 * Fast Timeouts.
191 	 */
192 	pl->PartnerAdminPortState.bit.timeout =
193 	    pl->PartnerOperPortState.bit.timeout = B_TRUE;
194 
195 	pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */
196 
197 	/*
198 	 * State machine information.
199 	 */
200 	pl->sm.lacp_on = B_FALSE;		/* LACP Off default */
201 	pl->sm.begin = B_TRUE;		/* Prevents transmissions */
202 	pl->sm.lacp_enabled = B_FALSE;
203 	pl->sm.port_enabled = B_FALSE;		/* Link Down */
204 	pl->sm.actor_churn = B_FALSE;
205 	pl->sm.partner_churn = B_FALSE;
206 	pl->sm.ready_n = B_FALSE;
207 	pl->sm.selected = AGGR_UNSELECTED;
208 	pl->sm.port_moved = B_FALSE;
209 
210 	pl->sm.periodic_state = LACP_NO_PERIODIC;
211 	pl->sm.receive_state = LACP_INITIALIZE;
212 	pl->sm.mux_state = LACP_DETACHED;
213 	pl->sm.churn_state = LACP_NO_ACTOR_CHURN;
214 
215 	/*
216 	 * Timer information.
217 	 */
218 	pl->current_while_timer.id = 0;
219 	pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
220 
221 	pl->periodic_timer.id = 0;
222 	pl->periodic_timer.val = FAST_PERIODIC_TIME;
223 
224 	pl->wait_while_timer.id = 0;
225 	pl->wait_while_timer.val = AGGREGATE_WAIT_TIME;
226 }
227 
228 /*
229  * Port initialization when we need to
230  * turn LACP on/off, etc. Not everything is
231  * reset like in the above routine.
232  *		Do NOT modify things like link status.
233  */
234 static void
235 lacp_reset_port(aggr_port_t *portp)
236 {
237 	aggr_lacp_port_t *pl = &portp->lp_lacp;
238 
239 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
240 
241 	pl->NTT = B_FALSE;			/* need to transmit */
242 
243 	/* reset operational port state */
244 	pl->ActorOperPortState.bit.timeout =
245 		pl->ActorAdminPortState.bit.timeout;
246 
247 	pl->ActorOperPortState.bit.sync = B_FALSE;
248 	pl->ActorOperPortState.bit.collecting = B_FALSE;
249 	pl->ActorOperPortState.bit.distributing = B_FALSE;
250 	pl->ActorOperPortState.bit.defaulted = B_TRUE;
251 	pl->ActorOperPortState.bit.expired = B_FALSE;
252 
253 	pl->PartnerOperPortState.bit.timeout = B_TRUE;	/* fast t/o */
254 	pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */
255 
256 	/*
257 	 * State machine information.
258 	 */
259 	pl->sm.begin = B_TRUE;		/* Prevents transmissions */
260 	pl->sm.actor_churn = B_FALSE;
261 	pl->sm.partner_churn = B_FALSE;
262 	pl->sm.ready_n = B_FALSE;
263 	pl->sm.selected = AGGR_UNSELECTED;
264 
265 	pl->sm.periodic_state = LACP_NO_PERIODIC;
266 	pl->sm.receive_state = LACP_INITIALIZE;
267 	pl->sm.mux_state = LACP_DETACHED;
268 	pl->sm.churn_state = LACP_NO_ACTOR_CHURN;
269 
270 	/*
271 	 * Timer information.
272 	 */
273 	pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
274 	pl->periodic_timer.val = FAST_PERIODIC_TIME;
275 }
276 
277 static void
278 aggr_lacp_mcast_on(aggr_port_t *port)
279 {
280 	ASSERT(AGGR_LACP_LOCK_HELD(port->lp_grp));
281 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
282 
283 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
284 		return;
285 
286 	(void) aggr_port_multicst(port, B_TRUE,
287 	    (uchar_t *)&slow_multicast_addr);
288 }
289 
290 static void
291 aggr_lacp_mcast_off(aggr_port_t *port)
292 {
293 	ASSERT(AGGR_LACP_LOCK_HELD(port->lp_grp));
294 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
295 
296 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
297 		return;
298 
299 	(void) aggr_port_multicst(port, B_FALSE,
300 	    (uchar_t *)&slow_multicast_addr);
301 }
302 
303 static void
304 start_periodic_timer(aggr_port_t *portp)
305 {
306 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
307 
308 	if (portp->lp_lacp.periodic_timer.id == 0) {
309 		portp->lp_lacp.periodic_timer.id =
310 		    timeout(periodic_timer_pop, portp,
311 		    drv_usectohz(1000000 * portp->lp_lacp.periodic_timer.val));
312 	}
313 }
314 
315 static void
316 stop_periodic_timer(aggr_port_t *portp)
317 {
318 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
319 
320 	if (portp->lp_lacp.periodic_timer.id != 0) {
321 		AGGR_LACP_UNLOCK(portp->lp_grp);
322 		(void) untimeout(portp->lp_lacp.periodic_timer.id);
323 		AGGR_LACP_LOCK(portp->lp_grp);
324 		portp->lp_lacp.periodic_timer.id = 0;
325 	}
326 }
327 
328 /*
329  * When the timer pops, we arrive here to
330  * clear out LACPDU count as well as transmit an
331  * LACPDU. We then set the periodic state and let
332  * the periodic state machine restart the timer.
333  */
334 
335 static void
336 periodic_timer_pop_locked(aggr_port_t *portp)
337 {
338 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
339 
340 	portp->lp_lacp.periodic_timer.id = NULL;
341 	portp->lp_lacp_stats.LACPDUsTx = 0;
342 
343 	/* current timestamp */
344 	portp->lp_lacp.time = gethrtime();
345 	portp->lp_lacp.NTT = B_TRUE;
346 	lacp_xmit_sm(portp);
347 
348 	/*
349 	 * Set Periodic State machine state based on the
350 	 * value of the Partner Operation Port State timeout
351 	 * bit.
352 	 */
353 	if (portp->lp_lacp.PartnerOperPortState.bit.timeout) {
354 		portp->lp_lacp.periodic_timer.val = FAST_PERIODIC_TIME;
355 		portp->lp_lacp.sm.periodic_state = LACP_FAST_PERIODIC;
356 	} else {
357 		portp->lp_lacp.periodic_timer.val = SLOW_PERIODIC_TIME;
358 		portp->lp_lacp.sm.periodic_state = LACP_SLOW_PERIODIC;
359 	}
360 
361 	lacp_periodic_sm(portp);
362 }
363 
364 static void
365 periodic_timer_pop(void *data)
366 {
367 	aggr_port_t *portp = data;
368 
369 	if (portp->lp_closing)
370 		return;
371 
372 	AGGR_LACP_LOCK(portp->lp_grp);
373 	periodic_timer_pop_locked(portp);
374 	AGGR_LACP_UNLOCK(portp->lp_grp);
375 }
376 
377 /*
378  * Invoked from:
379  *	- startup upon aggregation
380  *	- when the periodic timer pops
381  *	- when the periodic timer value is changed
382  *	- when the port is attached or detached
383  *	- when LACP mode is changed.
384  */
385 static void
386 lacp_periodic_sm(aggr_port_t *portp)
387 {
388 	lacp_periodic_state_t oldstate = portp->lp_lacp.sm.periodic_state;
389 	aggr_lacp_port_t *pl = &portp->lp_lacp;
390 
391 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
392 
393 	/* LACP_OFF state not in specification so check here.  */
394 	if (!pl->sm.lacp_on) {
395 		/* Stop timer whether it is running or not */
396 		stop_periodic_timer(portp);
397 		pl->sm.periodic_state = LACP_NO_PERIODIC;
398 		pl->NTT = B_FALSE;
399 		AGGR_LACP_DBG(("lacp_periodic_sm(%s/%d):NO LACP "
400 		    "%s--->%s\n", portp->lp_devname, portp->lp_port,
401 		    lacp_periodic_str[oldstate],
402 		    lacp_periodic_str[pl->sm.periodic_state]));
403 		return;
404 	}
405 
406 	if (pl->sm.begin || !pl->sm.lacp_enabled ||
407 	    !pl->sm.port_enabled ||
408 	    !pl->ActorOperPortState.bit.activity &&
409 	    !pl->PartnerOperPortState.bit.activity) {
410 
411 		/* Stop timer whether it is running or not */
412 		stop_periodic_timer(portp);
413 		pl->sm.periodic_state = LACP_NO_PERIODIC;
414 		pl->NTT = B_FALSE;
415 		AGGR_LACP_DBG(("lacp_periodic_sm(%s/%d):STOP %s--->%s\n",
416 		    portp->lp_devname, portp->lp_port,
417 		    lacp_periodic_str[oldstate],
418 		    lacp_periodic_str[pl->sm.periodic_state]));
419 		return;
420 	}
421 
422 	/*
423 	 * Startup with FAST_PERIODIC_TIME if no previous LACPDU
424 	 * has been received. Then after we timeout, then it is
425 	 * possible to go to SLOW_PERIODIC_TIME.
426 	 */
427 	if (pl->sm.periodic_state == LACP_NO_PERIODIC) {
428 		pl->periodic_timer.val = FAST_PERIODIC_TIME;
429 		pl->sm.periodic_state = LACP_FAST_PERIODIC;
430 	} else if ((pl->sm.periodic_state == LACP_SLOW_PERIODIC) &&
431 	    pl->PartnerOperPortState.bit.timeout) {
432 		/*
433 		 * If we receive a bit indicating we are going to
434 		 * fast periodic from slow periodic, stop the timer
435 		 * and let the periodic_timer_pop routine deal
436 		 * with reseting the periodic state and transmitting
437 		 * a LACPDU.
438 		 */
439 		stop_periodic_timer(portp);
440 		periodic_timer_pop_locked(portp);
441 	}
442 
443 	/* Rearm timer with value provided by partner */
444 	start_periodic_timer(portp);
445 }
446 
447 /*
448  * This routine transmits an LACPDU if lacp_enabled
449  * is TRUE and if NTT is set.
450  */
451 static void
452 lacp_xmit_sm(aggr_port_t *portp)
453 {
454 	aggr_lacp_port_t *pl = &portp->lp_lacp;
455 	size_t	len;
456 	mblk_t  *mp;
457 	hrtime_t now, elapsed;
458 
459 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
460 
461 	/* LACP_OFF state not in specification so check here.  */
462 	if (!pl->sm.lacp_on || !pl->NTT || !portp->lp_started)
463 		return;
464 
465 	/*
466 	 * Do nothing if LACP has been turned off or if the
467 	 * periodic state machine is not enabled.
468 	 */
469 	if ((pl->sm.periodic_state == LACP_NO_PERIODIC) ||
470 	    !pl->sm.lacp_enabled || pl->sm.begin) {
471 		pl->NTT = B_FALSE;
472 		return;
473 	}
474 
475 	/*
476 	 * If we have sent 5 Slow packets in the last second, avoid
477 	 * sending any more here. No more than three LACPDUs may be transmitted
478 	 * in any Fast_Periodic_Time interval.
479 	 */
480 	if (portp->lp_lacp_stats.LACPDUsTx >= 3) {
481 		/*
482 		 * Grab the current time value and see if
483 		 * more than 1 second has passed. If so,
484 		 * reset the timestamp and clear the count.
485 		 */
486 		now = gethrtime();
487 		elapsed = now - pl->time;
488 		if (elapsed > NSECS_PER_SEC) {
489 			portp->lp_lacp_stats.LACPDUsTx = 0;
490 			pl->time = now;
491 		} else {
492 			return;
493 		}
494 	}
495 
496 	len = sizeof (lacp_t) + sizeof (struct ether_header);
497 	mp = allocb(len, BPRI_MED);
498 	if (mp == NULL)
499 		return;
500 
501 	mp->b_wptr = mp->b_rptr + len;
502 	bzero(mp->b_rptr, len);
503 
504 	fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr);
505 	fill_lacp_pdu(portp,
506 	    (lacp_t *)(mp->b_rptr + sizeof (struct ether_header)));
507 
508 	portp->lp_tx(portp->lp_tx_arg, mp);
509 
510 	pl->NTT = B_FALSE;
511 	portp->lp_lacp_stats.LACPDUsTx++;
512 }
513 
514 /*
515  * Initialize the ethernet header of a LACP packet sent from the specified
516  * port.
517  */
518 static void
519 fill_lacp_ether(aggr_port_t *port, struct ether_header *ether)
520 {
521 	bcopy(port->lp_addr, (uint8_t *)&(ether->ether_shost), ETHERADDRL);
522 	bcopy(&slow_multicast_addr, (uint8_t *)&(ether->ether_dhost),
523 	    ETHERADDRL);
524 	ether->ether_type = htons(ETHERTYPE_SLOW);
525 }
526 
527 static void
528 fill_lacp_pdu(aggr_port_t *portp, lacp_t *lacp)
529 {
530 	aggr_lacp_port_t *pl = &portp->lp_lacp;
531 	aggr_grp_t *aggrp = portp->lp_grp;
532 
533 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
534 
535 	lacp->subtype = LACP_SUBTYPE;
536 	lacp->version = LACP_VERSION;
537 
538 	rw_enter(&aggrp->lg_lock, RW_READER);
539 	rw_enter(&portp->lp_lock, RW_READER);
540 
541 	/*
542 	 * Actor Information
543 	 */
544 	lacp->actor_info.tlv_type = ACTOR_TLV;
545 	lacp->actor_info.information_len = sizeof (link_info_t);
546 	lacp->actor_info.system_priority =
547 	    htons(aggrp->aggr.ActorSystemPriority);
548 	bcopy(aggrp->lg_addr, (uchar_t *)&lacp->actor_info.system_id,
549 	    ETHERADDRL);
550 	lacp->actor_info.key = htons(pl->ActorOperPortKey);
551 	lacp->actor_info.port_priority = htons(pl->ActorPortPriority);
552 	lacp->actor_info.port = htons(pl->ActorPortNumber);
553 	lacp->actor_info.state.state = pl->ActorOperPortState.state;
554 
555 	/*
556 	 * Partner Information
557 	 */
558 	lacp->partner_info.tlv_type = PARTNER_TLV;
559 	lacp->partner_info.information_len = sizeof (link_info_t);
560 	lacp->partner_info.system_priority =
561 	    htons(pl->PartnerOperSysPriority);
562 	lacp->partner_info.system_id = pl->PartnerOperSystem;
563 	lacp->partner_info.key = htons(pl->PartnerOperKey);
564 	lacp->partner_info.port_priority =
565 	    htons(pl->PartnerOperPortPriority);
566 	lacp->partner_info.port = htons(pl->PartnerOperPortNum);
567 	lacp->partner_info.state.state = pl->PartnerOperPortState.state;
568 
569 	/* Collector Information */
570 	lacp->tlv_collector = COLLECTOR_TLV;
571 	lacp->collector_len = 0x10;
572 	lacp->collector_max_delay = htons(aggrp->aggr.CollectorMaxDelay);
573 
574 	/* Termination Information */
575 	lacp->tlv_terminator = TERMINATOR_TLV;
576 	lacp->terminator_len = 0x0;
577 
578 	rw_exit(&portp->lp_lock);
579 	rw_exit(&aggrp->lg_lock);
580 }
581 
582 /*
583  * lacp_mux_sm - LACP mux state machine
584  *		This state machine is invoked from:
585  *			- startup upon aggregation
586  *			- from the Selection logic
587  *			- when the wait_while_timer pops
588  *			- when the aggregation MAC address is changed
589  *			- when receiving DL_NOTE_LINK_UP/DOWN
590  *			- when receiving DL_NOTE_AGGR_AVAIL/UNAVAIL
591  *			- when LACP mode is changed.
592  *			- when a DL_NOTE_SPEED is received
593  */
594 static void
595 lacp_mux_sm(aggr_port_t *portp)
596 {
597 	aggr_grp_t *aggrp = portp->lp_grp;
598 	boolean_t NTT_updated = B_FALSE;
599 	aggr_lacp_port_t *pl = &portp->lp_lacp;
600 	lacp_mux_state_t oldstate = pl->sm.mux_state;
601 
602 	ASSERT(AGGR_LACP_LOCK_HELD(aggrp));
603 
604 	/* LACP_OFF state not in specification so check here.  */
605 	if (!pl->sm.lacp_on) {
606 		pl->sm.mux_state = LACP_DETACHED;
607 		pl->ActorOperPortState.bit.sync = B_FALSE;
608 
609 		if (pl->ActorOperPortState.bit.collecting ||
610 		    pl->ActorOperPortState.bit.distributing) {
611 			AGGR_LACP_DBG(("trunk link: (%s/%d): "
612 			    "Collector_Distributor Disabled.\n",
613 			    portp->lp_devname, portp->lp_port));
614 		}
615 
616 		pl->ActorOperPortState.bit.collecting =
617 		    pl->ActorOperPortState.bit.distributing = B_FALSE;
618 		return;
619 	}
620 
621 	if (pl->sm.begin || !pl->sm.lacp_enabled)
622 		pl->sm.mux_state = LACP_DETACHED;
623 
624 	/* determine next state, or return if state unchanged */
625 	switch (pl->sm.mux_state) {
626 	case LACP_DETACHED:
627 		if (pl->sm.begin) {
628 			break;
629 		}
630 
631 		if ((pl->sm.selected == AGGR_SELECTED) ||
632 		    (pl->sm.selected == AGGR_STANDBY)) {
633 			pl->sm.mux_state = LACP_WAITING;
634 			break;
635 		}
636 		return;
637 
638 	case LACP_WAITING:
639 		if (pl->sm.selected == AGGR_UNSELECTED) {
640 			pl->sm.mux_state = LACP_DETACHED;
641 			break;
642 		}
643 
644 		if ((pl->sm.selected == AGGR_SELECTED) && aggrp->aggr.ready) {
645 			pl->sm.mux_state = LACP_ATTACHED;
646 			break;
647 		}
648 		return;
649 
650 	case LACP_ATTACHED:
651 		if ((pl->sm.selected == AGGR_UNSELECTED) ||
652 		    (pl->sm.selected == AGGR_STANDBY)) {
653 			pl->sm.mux_state = LACP_DETACHED;
654 			break;
655 		}
656 
657 		if ((pl->sm.selected == AGGR_SELECTED) &&
658 		    pl->PartnerOperPortState.bit.sync) {
659 			pl->sm.mux_state = LACP_COLLECTING_DISTRIBUTING;
660 			break;
661 		}
662 		return;
663 
664 	case LACP_COLLECTING_DISTRIBUTING:
665 		if ((pl->sm.selected == AGGR_UNSELECTED) ||
666 		    (pl->sm.selected == AGGR_STANDBY) ||
667 		    !pl->PartnerOperPortState.bit.sync) {
668 			pl->sm.mux_state = LACP_ATTACHED;
669 			break;
670 		}
671 		return;
672 	}
673 
674 	AGGR_LACP_DBG(("lacp_mux_sm(%s/%d):%s--->%s\n",
675 	    portp->lp_devname, portp->lp_port, lacp_mux_str[oldstate],
676 	    lacp_mux_str[pl->sm.mux_state]));
677 
678 	/* perform actions on entering a new state */
679 	switch (pl->sm.mux_state) {
680 	case LACP_DETACHED:
681 		if (pl->ActorOperPortState.bit.collecting ||
682 		    pl->ActorOperPortState.bit.distributing) {
683 			AGGR_LACP_DBG(("trunk link: (%s/%d): "
684 			    "Collector_Distributor Disabled.\n",
685 			    portp->lp_devname, portp->lp_port));
686 		}
687 
688 		pl->ActorOperPortState.bit.sync =
689 		    pl->ActorOperPortState.bit.collecting = B_FALSE;
690 
691 		/* Turn OFF Collector_Distributor */
692 		aggr_set_coll_dist(portp, B_FALSE);
693 
694 		pl->ActorOperPortState.bit.distributing = B_FALSE;
695 		NTT_updated = B_TRUE;
696 		break;
697 
698 	case LACP_WAITING:
699 		start_wait_while_timer(portp);
700 		break;
701 
702 	case LACP_ATTACHED:
703 		if (pl->ActorOperPortState.bit.collecting ||
704 		    pl->ActorOperPortState.bit.distributing) {
705 			AGGR_LACP_DBG(("trunk link: (%s%d): "
706 			    "Collector_Distributor Disabled.\n",
707 			    portp->lp_devname, portp->lp_port));
708 		}
709 
710 		pl->ActorOperPortState.bit.sync = B_TRUE;
711 		pl->ActorOperPortState.bit.collecting = B_FALSE;
712 
713 		/* Turn OFF Collector_Distributor */
714 		aggr_set_coll_dist(portp, B_FALSE);
715 
716 		pl->ActorOperPortState.bit.distributing = B_FALSE;
717 		NTT_updated = B_TRUE;
718 		break;
719 
720 	case LACP_COLLECTING_DISTRIBUTING:
721 		if (!pl->ActorOperPortState.bit.collecting &&
722 		    !pl->ActorOperPortState.bit.distributing) {
723 			AGGR_LACP_DBG(("trunk link: (%s/%d): "
724 			    "Collector_Distributor Enabled.\n",
725 			    portp->lp_devname, portp->lp_port));
726 		}
727 		pl->ActorOperPortState.bit.distributing = B_TRUE;
728 
729 		/* Turn Collector_Distributor back ON */
730 		aggr_set_coll_dist(portp, B_TRUE);
731 
732 		pl->ActorOperPortState.bit.collecting = B_TRUE;
733 		NTT_updated = B_TRUE;
734 		break;
735 	}
736 
737 	/*
738 	 * If we updated the state of the NTT variable, then
739 	 * initiate a LACPDU transmission.
740 	 */
741 	if (NTT_updated) {
742 		pl->NTT = B_TRUE;
743 		lacp_xmit_sm(portp);
744 	}
745 } /* lacp_mux_sm */
746 
747 
748 static void
749 receive_marker_pdu(aggr_port_t *portp, mblk_t *mp)
750 {
751 	marker_pdu_t	*markerp = (marker_pdu_t *)mp->b_rptr;
752 
753 	AGGR_LACP_LOCK(portp->lp_grp);
754 
755 	AGGR_LACP_DBG(("trunk link: (%s/%d): MARKER PDU received:\n",
756 	    portp->lp_devname, portp->lp_port));
757 
758 	/* LACP_OFF state not in specification so check here.  */
759 	if (!portp->lp_lacp.sm.lacp_on)
760 		goto bail;
761 
762 	if (MBLKL(mp) < sizeof (marker_pdu_t))
763 		goto bail;
764 
765 	if (markerp->version != MARKER_VERSION) {
766 		AGGR_LACP_DBG(("trunk link (%s/%d): Malformed MARKER PDU: "
767 		    "version = %d does not match s/w version %d\n",
768 		    portp->lp_devname, portp->lp_port,
769 		    markerp->version, MARKER_VERSION));
770 		goto bail;
771 	}
772 
773 	if (markerp->tlv_marker == MARKER_RESPONSE_TLV) {
774 		/* We do not yet send out MARKER info PDUs */
775 		AGGR_LACP_DBG(("trunk link (%s/%d): MARKER RESPONSE PDU: "
776 		    " MARKER TLV = %d - We don't send out info type!\n",
777 		    portp->lp_devname, portp->lp_port,
778 		    markerp->tlv_marker));
779 		goto bail;
780 	}
781 
782 	if (markerp->tlv_marker != MARKER_INFO_TLV) {
783 		AGGR_LACP_DBG(("trunk link (%s/%d): Malformed MARKER PDU: "
784 		    " MARKER TLV = %d \n", portp->lp_devname, portp->lp_port,
785 		    markerp->tlv_marker));
786 		goto bail;
787 	}
788 
789 	if (markerp->marker_len != MARKER_INFO_RESPONSE_LENGTH) {
790 		AGGR_LACP_DBG(("trunk link (%s/%d): Malformed MARKER PDU: "
791 		    " MARKER length = %d \n", portp->lp_devname, portp->lp_port,
792 		    markerp->marker_len));
793 		goto bail;
794 	}
795 
796 	if (markerp->requestor_port != portp->lp_lacp.PartnerOperPortNum) {
797 		AGGR_LACP_DBG(("trunk link (%s/%d): MARKER PDU: "
798 		    " MARKER Port %d not equal to Partner port %d\n",
799 		    portp->lp_devname, portp->lp_port,
800 		    markerp->requestor_port,
801 		    portp->lp_lacp.PartnerOperPortNum));
802 		goto bail;
803 	}
804 
805 	if (ether_cmp(&markerp->system_id,
806 	    &portp->lp_lacp.PartnerOperSystem) != 0) {
807 		AGGR_LACP_DBG(("trunk link (%s/%d): MARKER PDU: "
808 		    " MARKER MAC not equal to Partner MAC\n",
809 		    portp->lp_devname, portp->lp_port));
810 		goto bail;
811 	}
812 
813 	/*
814 	 * Turn into Marker Response PDU
815 	 * and return mblk to sending system
816 	 */
817 	markerp->tlv_marker = MARKER_RESPONSE_TLV;
818 
819 	/* reuse the space that was used by received ethernet header */
820 	ASSERT(MBLKHEAD(mp) >= sizeof (struct ether_header));
821 	mp->b_rptr -= sizeof (struct ether_header);
822 	fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr);
823 	AGGR_LACP_UNLOCK(portp->lp_grp);
824 
825 	portp->lp_tx(portp->lp_tx_arg, mp);
826 	return;
827 
828 bail:
829 	AGGR_LACP_UNLOCK(portp->lp_grp);
830 	freemsg(mp);
831 }
832 
833 
834 /*
835  * Update the LACP mode (off, active, or passive) of the specified group.
836  */
837 void
838 aggr_lacp_update_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode)
839 {
840 	aggr_lacp_mode_t old_mode = grp->lg_lacp_mode;
841 	aggr_port_t *port;
842 
843 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
844 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
845 
846 	if (mode == old_mode)
847 		return;
848 
849 	grp->lg_lacp_mode = mode;
850 
851 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
852 		port->lp_lacp.ActorAdminPortState.bit.activity =
853 		    port->lp_lacp.ActorOperPortState.bit.activity =
854 		    (mode == AGGR_LACP_ACTIVE);
855 
856 		if (old_mode == AGGR_LACP_OFF) {
857 			/* OFF -> {PASSIVE,ACTIVE} */
858 			/* turn OFF Collector_Distributor */
859 			aggr_set_coll_dist(port, B_FALSE);
860 			rw_enter(&port->lp_lock, RW_WRITER);
861 			lacp_on(port);
862 			if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
863 				aggr_lacp_port_attached(port);
864 			rw_exit(&port->lp_lock);
865 		} else if (mode == AGGR_LACP_OFF) {
866 			/* {PASSIVE,ACTIVE} -> OFF */
867 			rw_enter(&port->lp_lock, RW_WRITER);
868 			lacp_off(port);
869 			rw_exit(&port->lp_lock);
870 			if (!grp->lg_closing) {
871 				/* Turn ON Collector_Distributor */
872 				aggr_set_coll_dist(port, B_TRUE);
873 			}
874 		} else {
875 			/* PASSIVE->ACTIVE or ACTIVE->PASSIVE */
876 			port->lp_lacp.sm.begin = B_TRUE;
877 			lacp_mux_sm(port);
878 			lacp_periodic_sm(port);
879 
880 			/* kick off state machines */
881 			lacp_receive_sm(port, NULL);
882 			lacp_mux_sm(port);
883 		}
884 
885 		if (grp->lg_closing)
886 			break;
887 	}
888 }
889 
890 
891 /*
892  * Update the LACP timer (short or long) of the specified group.
893  */
894 void
895 aggr_lacp_update_timer(aggr_grp_t *grp, aggr_lacp_timer_t timer)
896 {
897 	aggr_port_t *port;
898 
899 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
900 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
901 
902 	if (timer == grp->aggr.PeriodicTimer)
903 		return;
904 
905 	grp->aggr.PeriodicTimer = timer;
906 
907 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
908 		port->lp_lacp.ActorAdminPortState.bit.timeout =
909 		    port->lp_lacp.ActorOperPortState.bit.timeout =
910 		    (timer == AGGR_LACP_TIMER_SHORT);
911 	}
912 }
913 
914 
915 /*
916  * Sets the initial LACP mode (off, active, passive) and LACP timer
917  * (short, long) of the specified group.
918  */
919 void
920 aggr_lacp_set_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode,
921     aggr_lacp_timer_t timer)
922 {
923 	aggr_port_t *port;
924 
925 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
926 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
927 
928 	grp->lg_lacp_mode = mode;
929 	grp->aggr.PeriodicTimer = timer;
930 
931 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
932 		port->lp_lacp.ActorAdminPortState.bit.activity =
933 		    port->lp_lacp.ActorOperPortState.bit.activity =
934 		    (mode == AGGR_LACP_ACTIVE);
935 
936 		port->lp_lacp.ActorAdminPortState.bit.timeout =
937 			port->lp_lacp.ActorOperPortState.bit.timeout =
938 			(timer == AGGR_LACP_TIMER_SHORT);
939 
940 		if (grp->lg_lacp_mode == AGGR_LACP_OFF) {
941 			/* Turn ON Collector_Distributor */
942 			aggr_set_coll_dist(port, B_TRUE);
943 		} else { /* LACP_ACTIVE/PASSIVE */
944 			rw_enter(&port->lp_lock, RW_WRITER);
945 			lacp_on(port);
946 			rw_exit(&port->lp_lock);
947 		}
948 	}
949 }
950 
951 static void
952 lacp_misconfig_walker(aggr_grp_t *grp, void *arg)
953 {
954 	lacp_misconfig_check_state_t *state = arg;
955 	aggr_port_t *port = state->cs_portp;
956 	aggr_port_t *cport;
957 
958 	if (state->cs_found)
959 		return;
960 
961 	if (grp == state->cs_portp->lp_grp)
962 		return;
963 
964 	AGGR_LACP_LOCK(grp);
965 	rw_enter(&grp->lg_lock, RW_READER);
966 
967 	for (cport = grp->lg_ports; cport != NULL; cport = cport->lp_next) {
968 		if ((ether_cmp(&port->lp_lacp.PartnerOperSystem,
969 		    &grp->aggr.PartnerSystem) == 0) &&
970 		    (port->lp_lacp.PartnerOperKey ==
971 		    grp->aggr.PartnerOperAggrKey)) {
972 			/*
973 			 * The Partner port information is already in use
974 			 * by ports in another aggregation so disable this
975 			 * port.
976 			 */
977 			port->lp_lacp.sm.selected = AGGR_UNSELECTED;
978 			cmn_err(CE_NOTE, "aggr: (%s/%d): Port Partner "
979 			    "MAC and key (%d) in use on aggregation "
980 			    "key (%d)\n",
981 			    port->lp_devname, port->lp_port,
982 			    port->lp_lacp.PartnerOperKey,
983 			    grp->aggr.PartnerOperAggrKey);
984 			state->cs_found = B_TRUE;
985 			break;
986 		}
987 	}
988 
989 	rw_exit(&grp->lg_lock);
990 	AGGR_LACP_UNLOCK(grp);
991 }
992 
993 /*
994  * Verify that the Partner MAC and Key recorded by the specified
995  * port are not found in other ports that are not part of our
996  * aggregation. Returns B_TRUE if such a port is found, B_FALSE
997  * otherwise.
998  */
999 static boolean_t
1000 lacp_misconfig_check(aggr_port_t *portp)
1001 {
1002 	lacp_misconfig_check_state_t state;
1003 
1004 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1005 
1006 	state.cs_portp = portp;
1007 	state.cs_found = B_FALSE;
1008 
1009 	aggr_grp_walk(lacp_misconfig_walker, &state);
1010 
1011 	return (state.cs_found);
1012 }
1013 
1014 
1015 /*
1016  * lacp_selection_logic - LACP selection logic
1017  *		Sets the selected variable on a per port basis
1018  *		and sets Ready when all waiting ports are ready
1019  *		to go online.
1020  *
1021  * parameters:
1022  *      - portp - instance this applies to.
1023  *
1024  * invoked:
1025  *    - when initialization is needed
1026  *    - when UNSELECTED is set from the lacp_receive_sm() in LACP_CURRENT state
1027  *    - When the lacp_receive_sm goes to the LACP_DEFAULTED state
1028  *    - every time the wait_while_timer pops
1029  *    - everytime we turn LACP on/off
1030  */
1031 static void
1032 lacp_selection_logic(aggr_port_t *portp)
1033 {
1034 	aggr_port_t *tpp;
1035 	aggr_grp_t *aggrp = portp->lp_grp;
1036 	int ports_waiting;
1037 	boolean_t reset_mac = B_FALSE;
1038 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1039 
1040 	ASSERT(AGGR_LACP_LOCK_HELD(aggrp));
1041 
1042 	/* LACP_OFF state not in specification so check here.  */
1043 	if (!pl->sm.lacp_on) {
1044 		pl->sm.selected = AGGR_UNSELECTED;
1045 		aggrp->aggr.ready = B_FALSE;
1046 		lacp_mux_sm(portp);
1047 		return;
1048 	}
1049 
1050 	if (pl->sm.begin || !pl->sm.lacp_enabled ||
1051 	    (portp->lp_state != AGGR_PORT_STATE_ATTACHED)) {
1052 
1053 		AGGR_LACP_DBG(("lacp_selection_logic:(%s/%d): "
1054 		    "selected %d-->%d (begin=%d, lacp_enabled = %d, "
1055 		    "lp_state=%d)\n", portp->lp_devname, portp->lp_port,
1056 		    pl->sm.selected, AGGR_UNSELECTED,
1057 		    pl->sm.begin, pl->sm.lacp_enabled,
1058 		    portp->lp_state));
1059 
1060 		pl->sm.selected = AGGR_UNSELECTED;
1061 		aggrp->aggr.ready = B_FALSE;
1062 		lacp_mux_sm(portp);
1063 		return;
1064 	}
1065 
1066 	/*
1067 	 * If LACP is not enabled then selected is never set.
1068 	 */
1069 	if (!pl->sm.lacp_enabled) {
1070 		AGGR_LACP_DBG(("lacp_selection_logic:(%s/%d): "
1071 		    "selected %d-->%d\n", portp->lp_devname, portp->lp_port,
1072 		    pl->sm.selected, AGGR_UNSELECTED));
1073 
1074 		pl->sm.selected = AGGR_UNSELECTED;
1075 		lacp_mux_sm(portp);
1076 		return;
1077 	}
1078 
1079 	/*
1080 	 * Check if the Partner MAC or Key are zero. If so, we have
1081 	 * not received any LACP info or it has expired and the
1082 	 * receive machine is in the LACP_DEFAULTED state.
1083 	 */
1084 	if (ether_cmp(&pl->PartnerOperSystem, &etherzeroaddr) == 0 ||
1085 	    (pl->PartnerOperKey == 0)) {
1086 
1087 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1088 			if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1089 			    &etherzeroaddr) != 0 &&
1090 			    (tpp->lp_lacp.PartnerOperKey != 0))
1091 				break;
1092 		}
1093 
1094 		/*
1095 		 * If all ports have no key or aggregation address,
1096 		 * then clear the negotiated Partner MAC and key.
1097 		 */
1098 		if (tpp == NULL) {
1099 			/* Clear the aggregation Partner MAC and key */
1100 			aggrp->aggr.PartnerSystem = etherzeroaddr;
1101 			aggrp->aggr.PartnerOperAggrKey = 0;
1102 		}
1103 
1104 		return;
1105 	}
1106 
1107 	/*
1108 	 * Insure that at least one port in the aggregation
1109 	 * matches the Partner aggregation MAC and key. If not,
1110 	 * then clear the aggregation MAC and key. Later we will
1111 	 * set the Partner aggregation MAC and key to that of the
1112 	 * current port's Partner MAC and key.
1113 	 */
1114 	if (ether_cmp(&pl->PartnerOperSystem,
1115 	    &aggrp->aggr.PartnerSystem) != 0 ||
1116 	    (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) {
1117 
1118 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1119 			if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1120 			    &aggrp->aggr.PartnerSystem) == 0 &&
1121 			    (tpp->lp_lacp.PartnerOperKey ==
1122 			    aggrp->aggr.PartnerOperAggrKey))
1123 				break;
1124 		}
1125 
1126 		if (tpp == NULL) {
1127 			/* Clear the aggregation Partner MAC and key */
1128 			aggrp->aggr.PartnerSystem = etherzeroaddr;
1129 			aggrp->aggr.PartnerOperAggrKey = 0;
1130 			reset_mac = B_TRUE;
1131 		}
1132 	}
1133 
1134 	/*
1135 	 * If our Actor MAC is found in the Partner MAC
1136 	 * on this port then we have a loopback misconfiguration.
1137 	 */
1138 	if (ether_cmp(&pl->PartnerOperSystem,
1139 	    (struct ether_addr *)&aggrp->lg_addr) == 0) {
1140 		pl->sm.selected = AGGR_UNSELECTED;
1141 		cmn_err(CE_NOTE, "trunk link: (%s/%d): Loopback condition.\n",
1142 		    portp->lp_devname, portp->lp_port);
1143 
1144 		lacp_mux_sm(portp);
1145 		return;
1146 	}
1147 
1148 	/*
1149 	 * If our Partner MAC and Key are found on any other
1150 	 * ports that are not in our aggregation, we have
1151 	 * a misconfiguration.
1152 	 */
1153 	if (lacp_misconfig_check(portp)) {
1154 		lacp_mux_sm(portp);
1155 		return;
1156 	}
1157 
1158 	/*
1159 	 * If the Aggregation Partner MAC and Key have not been
1160 	 * set, then this is either the first port or the aggregation
1161 	 * MAC and key have been reset. In either case we must set
1162 	 * the values of the Partner MAC and key.
1163 	 */
1164 	if (ether_cmp(&aggrp->aggr.PartnerSystem, &etherzeroaddr) == 0 &&
1165 	    (aggrp->aggr.PartnerOperAggrKey == 0)) {
1166 		/* Set aggregation Partner MAC and key */
1167 		aggrp->aggr.PartnerSystem = pl->PartnerOperSystem;
1168 		aggrp->aggr.PartnerOperAggrKey = pl->PartnerOperKey;
1169 
1170 		/*
1171 		 * If we reset Partner aggregation MAC, then restart
1172 		 * selection_logic on ports that match new MAC address.
1173 		 */
1174 		if (reset_mac) {
1175 			for (tpp = aggrp->lg_ports; tpp; tpp =
1176 			    tpp->lp_next) {
1177 				if (tpp == portp)
1178 					continue;
1179 				if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1180 				    &aggrp->aggr.PartnerSystem) == 0 &&
1181 				    (tpp->lp_lacp.PartnerOperKey ==
1182 				    aggrp->aggr.PartnerOperAggrKey))
1183 					lacp_selection_logic(tpp);
1184 			}
1185 		}
1186 	} else if (ether_cmp(&pl->PartnerOperSystem,
1187 	    &aggrp->aggr.PartnerSystem) != 0 ||
1188 	    (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) {
1189 		/*
1190 		 * The Partner port information does not match
1191 		 * that of the other ports in the aggregation
1192 		 * so disable this port.
1193 		 */
1194 		pl->sm.selected = AGGR_UNSELECTED;
1195 		cmn_err(CE_NOTE, "trunk link: (%s/%d): Port Partner MAC or"
1196 		    " key (%d) incompatible with Aggregation Partner "
1197 		    "MAC or key (%d)\n",
1198 		    portp->lp_devname, portp->lp_port,
1199 		    pl->PartnerOperKey, aggrp->aggr.PartnerOperAggrKey);
1200 
1201 		lacp_mux_sm(portp);
1202 		return;
1203 	}
1204 
1205 	/* If we get to here, automatically set selected */
1206 	if (pl->sm.selected != AGGR_SELECTED) {
1207 		AGGR_LACP_DBG(("lacp_selection_logic:(%s/%d): "
1208 		    "selected %d-->%d\n", portp->lp_devname, portp->lp_port,
1209 		    pl->sm.selected, AGGR_SELECTED));
1210 
1211 		pl->sm.selected = AGGR_SELECTED;
1212 		lacp_mux_sm(portp);
1213 	}
1214 
1215 	/*
1216 	 * From this point onward we have selected the port
1217 	 * and are simply checking if the Ready flag should
1218 	 * be set.
1219 	 */
1220 
1221 	/*
1222 	 * If at least two ports are waiting to aggregate
1223 	 * and ready_n is set on all ports waiting to aggregate
1224 	 * then set READY for the aggregation.
1225 	 */
1226 
1227 	ports_waiting = 0;
1228 
1229 	if (!aggrp->aggr.ready) {
1230 		/*
1231 		 * If all ports in the aggregation have received compatible
1232 		 * partner information and they match up correctly with the
1233 		 * switch, there is no need to wait for all the
1234 		 * wait_while_timers to pop.
1235 		 */
1236 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1237 			if (((tpp->lp_lacp.sm.mux_state == LACP_WAITING) ||
1238 			    tpp->lp_lacp.sm.begin) &&
1239 			    !pl->PartnerOperPortState.bit.sync) {
1240 				/* Add up ports uninitialized or waiting */
1241 				ports_waiting++;
1242 				if (!tpp->lp_lacp.sm.ready_n)
1243 					return;
1244 			}
1245 		}
1246 	}
1247 
1248 	if (aggrp->aggr.ready) {
1249 		AGGR_LACP_DBG(("lacp_selection_logic:(%s/%d): "
1250 		    "aggr.ready already set\n", portp->lp_devname,
1251 		    portp->lp_port));
1252 		lacp_mux_sm(portp);
1253 	} else {
1254 		AGGR_LACP_DBG(("lacp_selection_logic:(%s/%d): Ready %d-->%d\n",
1255 		    portp->lp_devname, portp->lp_port, aggrp->aggr.ready,
1256 		    B_TRUE));
1257 		aggrp->aggr.ready = B_TRUE;
1258 
1259 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next)
1260 			lacp_mux_sm(tpp);
1261 	}
1262 
1263 }
1264 
1265 /*
1266  * wait_while_timer_pop - When the timer pops, we arrive here to
1267  *			set ready_n and trigger the selection logic.
1268  */
1269 static void
1270 wait_while_timer_pop(void *data)
1271 {
1272 	aggr_port_t *portp = data;
1273 
1274 	if (portp->lp_closing)
1275 		return;
1276 
1277 	AGGR_LACP_LOCK(portp->lp_grp);
1278 
1279 	AGGR_LACP_DBG(("trunk link:(%s/%d): wait_while_timer pop \n",
1280 	    portp->lp_devname, portp->lp_port));
1281 	portp->lp_lacp.wait_while_timer.id = 0;
1282 	portp->lp_lacp.sm.ready_n = B_TRUE;
1283 
1284 	lacp_selection_logic(portp);
1285 	AGGR_LACP_UNLOCK(portp->lp_grp);
1286 }
1287 
1288 static void
1289 start_wait_while_timer(aggr_port_t *portp)
1290 {
1291 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1292 
1293 	if (portp->lp_lacp.wait_while_timer.id == 0) {
1294 		portp->lp_lacp.wait_while_timer.id =
1295 		    timeout(wait_while_timer_pop, portp,
1296 		    drv_usectohz(1000000 *
1297 		    portp->lp_lacp.wait_while_timer.val));
1298 	}
1299 }
1300 
1301 
1302 static void
1303 stop_wait_while_timer(portp)
1304 aggr_port_t *portp;
1305 {
1306 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1307 
1308 	if (portp->lp_lacp.wait_while_timer.id != 0) {
1309 		AGGR_LACP_UNLOCK(portp->lp_grp);
1310 		(void) untimeout(portp->lp_lacp.wait_while_timer.id);
1311 		AGGR_LACP_LOCK(portp->lp_grp);
1312 		portp->lp_lacp.wait_while_timer.id = 0;
1313 	}
1314 }
1315 
1316 /*
1317  * Invoked when a port has been attached to a group.
1318  * Complete the processing that couldn't be finished from lacp_on()
1319  * because the port was not started. We know that the link is full
1320  * duplex and ON, otherwise it wouldn't be attached.
1321  */
1322 void
1323 aggr_lacp_port_attached(aggr_port_t *portp)
1324 {
1325 	aggr_grp_t *grp = portp->lp_grp;
1326 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1327 
1328 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
1329 	ASSERT(portp->lp_state == AGGR_PORT_STATE_ATTACHED);
1330 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
1331 
1332 	AGGR_LACP_DBG(("aggr_lacp_port_attached: "
1333 	    "port %s/%d\n", portp->lp_devname, portp->lp_port));
1334 
1335 	portp->lp_lacp.sm.port_enabled = B_TRUE;	/* link on */
1336 
1337 	if (grp->lg_lacp_mode == AGGR_LACP_OFF) {
1338 		pl->ActorAdminPortState.bit.activity =
1339 		    pl->ActorOperPortState.bit.activity = B_FALSE;
1340 
1341 		/* Turn ON Collector_Distributor */
1342 		aggr_set_coll_dist_locked(portp, B_TRUE);
1343 
1344 		return;
1345 	}
1346 
1347 	pl->ActorAdminPortState.bit.activity =
1348 	    pl->ActorOperPortState.bit.activity =
1349 	    (grp->lg_lacp_mode == AGGR_LACP_ACTIVE);
1350 
1351 	pl->ActorAdminPortState.bit.timeout =
1352 	    pl->ActorOperPortState.bit.timeout =
1353 	    (grp->aggr.PeriodicTimer == AGGR_LACP_TIMER_SHORT);
1354 
1355 	pl->sm.lacp_enabled = B_TRUE;
1356 	pl->ActorOperPortState.bit.aggregation = B_TRUE;
1357 	pl->sm.begin = B_TRUE;
1358 
1359 	if (!pl->sm.lacp_on) {
1360 		/* Turn OFF Collector_Distributor */
1361 		aggr_set_coll_dist_locked(portp, B_FALSE);
1362 
1363 		lacp_on(portp);
1364 	} else {
1365 		lacp_receive_sm(portp, NULL);
1366 		lacp_mux_sm(portp);
1367 
1368 		/* Enable Multicast Slow Protocol address */
1369 		aggr_lacp_mcast_on(portp);
1370 
1371 		/* periodic_sm is started up from the receive machine */
1372 		lacp_selection_logic(portp);
1373 	}
1374 }
1375 
1376 /*
1377  * Invoked when a port has been detached from a group. Turn off
1378  * LACP processing if it was enabled.
1379  */
1380 void
1381 aggr_lacp_port_detached(aggr_port_t *portp)
1382 {
1383 	aggr_grp_t *grp = portp->lp_grp;
1384 
1385 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
1386 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
1387 
1388 	AGGR_LACP_DBG(("aggr_lacp_port_detached: port %s/%d\n",
1389 	    portp->lp_devname, portp->lp_port));
1390 
1391 	portp->lp_lacp.sm.port_enabled = B_FALSE;
1392 
1393 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
1394 		return;
1395 
1396 	/* Disable Slow Protocol PDUs */
1397 	lacp_off(portp);
1398 }
1399 
1400 
1401 /*
1402  * Invoked after the outbound port selection policy has been changed.
1403  */
1404 void
1405 aggr_lacp_policy_changed(aggr_grp_t *grp)
1406 {
1407 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
1408 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1409 
1410 	/* suspend transmission for CollectorMaxDelay time */
1411 	delay(grp->aggr.CollectorMaxDelay * 10);
1412 }
1413 
1414 
1415 /*
1416  * Enable Slow Protocol LACP and Marker PDUs.
1417  */
1418 static void
1419 lacp_on(aggr_port_t *portp)
1420 {
1421 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1422 	ASSERT(RW_WRITE_HELD(&portp->lp_grp->lg_lock));
1423 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
1424 
1425 	/*
1426 	 * Reset the state machines and Partner operational
1427 	 * information. Careful to not reset things like
1428 	 * our link state.
1429 	 */
1430 	lacp_reset_port(portp);
1431 	portp->lp_lacp.sm.lacp_on = B_TRUE;
1432 
1433 	AGGR_LACP_DBG(("lacp_on:(%s/%d): \n", portp->lp_devname,
1434 	    portp->lp_port));
1435 
1436 	lacp_receive_sm(portp, NULL);
1437 	lacp_mux_sm(portp);
1438 
1439 	if (portp->lp_state != AGGR_PORT_STATE_ATTACHED)
1440 		return;
1441 
1442 	/* Enable Multicast Slow Protocol address */
1443 	aggr_lacp_mcast_on(portp);
1444 
1445 	/* periodic_sm is started up from the receive machine */
1446 	lacp_selection_logic(portp);
1447 } /* lacp_on */
1448 
1449 
1450 /* Disable Slow Protocol LACP and Marker PDUs */
1451 static void
1452 lacp_off(aggr_port_t *portp)
1453 {
1454 	aggr_grp_t *grp = portp->lp_grp;
1455 
1456 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1457 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1458 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
1459 
1460 	portp->lp_lacp.sm.lacp_on = B_FALSE;
1461 
1462 	AGGR_LACP_DBG(("lacp_off:(%s/%d): \n", portp->lp_devname,
1463 	    portp->lp_port));
1464 
1465 	/*
1466 	 * Disable Slow Protocol Timers. We must temporarely release
1467 	 * the group and port locks in order to avod deadlocks. Make
1468 	 * sure that the port nor the group are closing after re-acquiring
1469 	 * their locks.
1470 	 */
1471 	rw_exit(&portp->lp_lock);
1472 	rw_exit(&grp->lg_lock);
1473 
1474 	stop_periodic_timer(portp);
1475 	stop_current_while_timer(portp);
1476 	stop_wait_while_timer(portp);
1477 
1478 	rw_enter(&grp->lg_lock, RW_WRITER);
1479 	rw_enter(&portp->lp_lock, RW_WRITER);
1480 
1481 	if (!portp->lp_closing && !grp->lg_closing) {
1482 		lacp_mux_sm(portp);
1483 		lacp_periodic_sm(portp);
1484 		lacp_selection_logic(portp);
1485 	}
1486 
1487 	/* Turn OFF Collector_Distributor */
1488 	aggr_set_coll_dist_locked(portp, B_FALSE);
1489 
1490 	/* Disable Multicast Slow Protocol address */
1491 	aggr_lacp_mcast_off(portp);
1492 
1493 	lacp_reset_port(portp);
1494 }
1495 
1496 
1497 static boolean_t
1498 valid_lacp_pdu(aggr_port_t *portp, lacp_t *lacp)
1499 {
1500 	/*
1501 	 * 43.4.12 - "a Receive machine shall not validate
1502 	 * the Version Number, TLV_type, or Reserved fields in received
1503 	 * LACPDUs."
1504 	 * ... "a Receive machine may validate the Actor_Information_Length,
1505 	 * Partner_Information_Length, Collector_Information_Length,
1506 	 * or Terminator_Length fields."
1507 	 */
1508 	if ((lacp->actor_info.information_len != sizeof (link_info_t)) ||
1509 	    (lacp->partner_info.information_len != sizeof (link_info_t)) ||
1510 	    (lacp->collector_len != LACP_COLLECTOR_INFO_LEN) ||
1511 	    (lacp->terminator_len != LACP_TERMINATOR_INFO_LEN)) {
1512 		AGGR_LACP_DBG(("trunk link (%s/%d): Malformed LACPDU: "
1513 		    " Terminator Length = %d \n", portp->lp_devname,
1514 		    portp->lp_port, lacp->terminator_len));
1515 		return (B_FALSE);
1516 	}
1517 
1518 	return (B_TRUE);
1519 }
1520 
1521 
1522 static void
1523 start_current_while_timer(aggr_port_t *portp, uint_t time)
1524 {
1525 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1526 
1527 	if (portp->lp_lacp.current_while_timer.id == 0) {
1528 		if (time > 0) {
1529 			portp->lp_lacp.current_while_timer.val = time;
1530 		} else if (portp->lp_lacp.ActorOperPortState.bit.timeout) {
1531 			portp->lp_lacp.current_while_timer.val =
1532 			    SHORT_TIMEOUT_TIME;
1533 		} else {
1534 			portp->lp_lacp.current_while_timer.val =
1535 			    LONG_TIMEOUT_TIME;
1536 		}
1537 
1538 		portp->lp_lacp.current_while_timer.id =
1539 		    timeout(current_while_timer_pop, portp,
1540 		    drv_usectohz((clock_t)1000000 *
1541 		    (clock_t)portp->lp_lacp.current_while_timer.val));
1542 	}
1543 }
1544 
1545 
1546 static void
1547 stop_current_while_timer(aggr_port_t *portp)
1548 {
1549 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1550 
1551 	if (portp->lp_lacp.current_while_timer.id != 0) {
1552 		AGGR_LACP_UNLOCK(portp->lp_grp);
1553 		(void) untimeout(portp->lp_lacp.current_while_timer.id);
1554 		AGGR_LACP_LOCK(portp->lp_grp);
1555 		portp->lp_lacp.current_while_timer.id = 0;
1556 	}
1557 }
1558 
1559 
1560 static void
1561 current_while_timer_pop(void *data)
1562 {
1563 	aggr_port_t *portp = (aggr_port_t *)data;
1564 
1565 	if (portp->lp_closing)
1566 		return;
1567 
1568 	AGGR_LACP_LOCK(portp->lp_grp);
1569 
1570 	AGGR_LACP_DBG(("trunk link:(%s/%d): current_while_timer "
1571 	    "pop id=%p\n", portp->lp_devname, portp->lp_port,
1572 	    portp->lp_lacp.current_while_timer.id));
1573 
1574 	portp->lp_lacp.current_while_timer.id = 0;
1575 	lacp_receive_sm(portp, NULL);
1576 	AGGR_LACP_UNLOCK(portp->lp_grp);
1577 }
1578 
1579 
1580 /*
1581  * record_Default - Simply copies over administrative values
1582  * to the partner operational values, and sets our state to indicate we
1583  * are using defaulted values.
1584  */
1585 static void
1586 record_Default(aggr_port_t *portp)
1587 {
1588 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1589 
1590 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1591 
1592 	pl->PartnerOperPortNum = pl->PartnerAdminPortNum;
1593 	pl->PartnerOperPortPriority = pl->PartnerAdminPortPriority;
1594 	pl->PartnerOperSystem = pl->PartnerAdminSystem;
1595 	pl->PartnerOperSysPriority = pl->PartnerAdminSysPriority;
1596 	pl->PartnerOperKey = pl->PartnerAdminKey;
1597 	pl->PartnerOperPortState.state = pl->PartnerAdminPortState.state;
1598 
1599 	pl->ActorOperPortState.bit.defaulted = B_TRUE;
1600 }
1601 
1602 
1603 /* Returns B_TRUE on sync value changing */
1604 static boolean_t
1605 record_PDU(aggr_port_t *portp, lacp_t *lacp)
1606 {
1607 	aggr_grp_t *aggrp = portp->lp_grp;
1608 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1609 	uint8_t save_sync;
1610 
1611 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1612 
1613 	/*
1614 	 * Partner Information
1615 	 */
1616 	pl->PartnerOperPortNum = ntohs(lacp->actor_info.port);
1617 	pl->PartnerOperPortPriority =
1618 	    ntohs(lacp->actor_info.port_priority);
1619 	pl->PartnerOperSystem = lacp->actor_info.system_id;
1620 	pl->PartnerOperSysPriority =
1621 	    htons(lacp->actor_info.system_priority);
1622 	pl->PartnerOperKey = ntohs(lacp->actor_info.key);
1623 
1624 	/* All state info except for Synchronization */
1625 	save_sync = pl->PartnerOperPortState.bit.sync;
1626 	pl->PartnerOperPortState.state = lacp->actor_info.state.state;
1627 
1628 	/* Defaulted set to FALSE */
1629 	pl->ActorOperPortState.bit.defaulted = B_FALSE;
1630 
1631 	/*
1632 	 * 43.4.9 - (Partner_Port, Partner_Port_Priority, Partner_system,
1633 	 *		Partner_System_Priority, Partner_Key, and
1634 	 *		Partner_State.Aggregation) are compared to the
1635 	 *		corresponding operations paramters values for
1636 	 *		the Actor. If these are equal, or if this is
1637 	 *		an individual link, we are synchronized.
1638 	 */
1639 	if (((ntohs(lacp->partner_info.port) == pl->ActorPortNumber) &&
1640 	    (ntohs(lacp->partner_info.port_priority) ==
1641 	    pl->ActorPortPriority) &&
1642 	    (ether_cmp(&lacp->partner_info.system_id,
1643 		(struct ether_addr *)&aggrp->lg_addr) == 0) &&
1644 	    (ntohs(lacp->partner_info.system_priority) ==
1645 	    aggrp->aggr.ActorSystemPriority) &&
1646 	    (ntohs(lacp->partner_info.key) == pl->ActorOperPortKey) &&
1647 	    (lacp->partner_info.state.bit.aggregation ==
1648 	    pl->ActorOperPortState.bit.aggregation)) ||
1649 	    (!lacp->actor_info.state.bit.aggregation)) {
1650 
1651 		pl->PartnerOperPortState.bit.sync =
1652 		    lacp->actor_info.state.bit.sync;
1653 	} else {
1654 		pl->PartnerOperPortState.bit.sync = B_FALSE;
1655 	}
1656 
1657 	if (save_sync != pl->PartnerOperPortState.bit.sync) {
1658 		AGGR_LACP_DBG(("record_PDU:(%s/%d): partner sync "
1659 		    "%d -->%d\n", portp->lp_devname, portp->lp_port,
1660 		    save_sync, pl->PartnerOperPortState.bit.sync));
1661 		return (B_TRUE);
1662 	} else {
1663 		return (B_FALSE);
1664 	}
1665 }
1666 
1667 
1668 /*
1669  * update_selected - If any of the Partner parameters has
1670  *			changed from a previous value, then
1671  *			unselect the link from the aggregator.
1672  */
1673 static boolean_t
1674 update_selected(aggr_port_t *portp, lacp_t *lacp)
1675 {
1676 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1677 
1678 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1679 
1680 	if ((pl->PartnerOperPortNum != ntohs(lacp->actor_info.port)) ||
1681 	    (pl->PartnerOperPortPriority !=
1682 	    ntohs(lacp->actor_info.port_priority)) ||
1683 	    (ether_cmp(&pl->PartnerOperSystem,
1684 	    &lacp->actor_info.system_id) != 0) ||
1685 	    (pl->PartnerOperSysPriority !=
1686 	    ntohs(lacp->actor_info.system_priority)) ||
1687 	    (pl->PartnerOperKey != ntohs(lacp->actor_info.key)) ||
1688 	    (pl->PartnerOperPortState.bit.aggregation !=
1689 	    lacp->actor_info.state.bit.aggregation)) {
1690 		AGGR_LACP_DBG(("update_selected:(%s/%d): "
1691 		    "selected  %d-->%d\n", portp->lp_devname, portp->lp_port,
1692 		    pl->sm.selected, AGGR_UNSELECTED));
1693 
1694 		pl->sm.selected = AGGR_UNSELECTED;
1695 		return (B_TRUE);
1696 	} else {
1697 		return (B_FALSE);
1698 	}
1699 }
1700 
1701 
1702 /*
1703  * update_default_selected - If any of the operational Partner parameters
1704  *			is different than that of the administrative values
1705  *			then unselect the link from the aggregator.
1706  */
1707 static void
1708 update_default_selected(aggr_port_t *portp)
1709 {
1710 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1711 
1712 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1713 
1714 	if ((pl->PartnerAdminPortNum != pl->PartnerOperPortNum) ||
1715 	    (pl->PartnerOperPortPriority != pl->PartnerAdminPortPriority) ||
1716 	    (ether_cmp(&pl->PartnerOperSystem, &pl->PartnerAdminSystem) != 0) ||
1717 	    (pl->PartnerOperSysPriority != pl->PartnerAdminSysPriority) ||
1718 	    (pl->PartnerOperKey != pl->PartnerAdminKey) ||
1719 	    (pl->PartnerOperPortState.bit.aggregation !=
1720 	    pl->PartnerAdminPortState.bit.aggregation)) {
1721 
1722 		AGGR_LACP_DBG(("update_default_selected:(%s/%d): "
1723 		    "selected  %d-->%d\n", portp->lp_devname, portp->lp_port,
1724 		    pl->sm.selected, AGGR_UNSELECTED));
1725 		pl->sm.selected = AGGR_UNSELECTED;
1726 	}
1727 }
1728 
1729 
1730 /*
1731  * update_NTT - If any of the Partner values in the received LACPDU
1732  *			are different than that of the Actor operational
1733  *			values then set NTT to true.
1734  */
1735 static void
1736 update_NTT(aggr_port_t *portp, lacp_t *lacp)
1737 {
1738 	aggr_grp_t *aggrp = portp->lp_grp;
1739 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1740 
1741 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1742 
1743 	if ((pl->ActorPortNumber != ntohs(lacp->partner_info.port)) ||
1744 	    (pl->ActorPortPriority !=
1745 	    ntohs(lacp->partner_info.port_priority)) ||
1746 	    (ether_cmp(&aggrp->lg_addr,
1747 	    &lacp->partner_info.system_id) != 0) ||
1748 	    (aggrp->aggr.ActorSystemPriority !=
1749 	    ntohs(lacp->partner_info.system_priority)) ||
1750 	    (pl->ActorOperPortKey != ntohs(lacp->partner_info.key)) ||
1751 	    (pl->ActorOperPortState.bit.activity !=
1752 	    lacp->partner_info.state.bit.activity) ||
1753 	    (pl->ActorOperPortState.bit.timeout !=
1754 	    lacp->partner_info.state.bit.timeout) ||
1755 	    (pl->ActorOperPortState.bit.sync !=
1756 	    lacp->partner_info.state.bit.sync) ||
1757 	    (pl->ActorOperPortState.bit.aggregation !=
1758 	    lacp->partner_info.state.bit.aggregation)) {
1759 
1760 		AGGR_LACP_DBG(("update_NTT:(%s/%d): NTT  %d-->%d\n",
1761 		    portp->lp_devname, portp->lp_port, pl->NTT,
1762 		    B_TRUE));
1763 
1764 		pl->NTT = B_TRUE;
1765 	}
1766 }
1767 
1768 /*
1769  * lacp_receive_sm - LACP receive state machine
1770  *
1771  * parameters:
1772  *      - portp - instance this applies to.
1773  *      - lacp - pointer in the case of a received LACPDU.
1774  *                This value is NULL if there is no LACPDU.
1775  *
1776  * invoked:
1777  *    - when initialization is needed
1778  *    - upon reception of an LACPDU. This is the common case.
1779  *    - every time the current_while_timer pops
1780  */
1781 static void
1782 lacp_receive_sm(aggr_port_t *portp, lacp_t *lacp)
1783 {
1784 	boolean_t sync_updated, selected_updated, save_activity;
1785 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1786 	lacp_receive_state_t oldstate = pl->sm.receive_state;
1787 
1788 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1789 
1790 	/* LACP_OFF state not in specification so check here.  */
1791 	if (!pl->sm.lacp_on)
1792 		return;
1793 
1794 	/* figure next state */
1795 	if (pl->sm.begin || pl->sm.port_moved) {
1796 		pl->sm.receive_state = LACP_INITIALIZE;
1797 	} else if (!pl->sm.port_enabled) {	/* DL_NOTE_LINK_DOWN */
1798 		pl->sm.receive_state = LACP_PORT_DISABLED;
1799 	} else if (!pl->sm.lacp_enabled) { /* DL_NOTE_AGGR_UNAVAIL */
1800 		pl->sm.receive_state =
1801 		    (pl->sm.receive_state == LACP_PORT_DISABLED) ?
1802 		    LACP_DISABLED : LACP_PORT_DISABLED;
1803 	} else if (lacp != NULL) {
1804 		if ((pl->sm.receive_state == LACP_EXPIRED) ||
1805 		    (pl->sm.receive_state == LACP_DEFAULTED)) {
1806 			pl->sm.receive_state = LACP_CURRENT;
1807 		}
1808 	} else if ((pl->sm.receive_state == LACP_CURRENT) &&
1809 	    (pl->current_while_timer.id == 0)) {
1810 		pl->sm.receive_state = LACP_EXPIRED;
1811 	} else if ((pl->sm.receive_state == LACP_EXPIRED) &&
1812 	    (pl->current_while_timer.id == 0)) {
1813 		pl->sm.receive_state = LACP_DEFAULTED;
1814 	}
1815 
1816 
1817 	if (!((lacp && (oldstate == LACP_CURRENT) &&
1818 	    (pl->sm.receive_state == LACP_CURRENT)))) {
1819 		AGGR_LACP_DBG(("lacp_receive_sm(%s/%d):%s--->%s\n",
1820 		    portp->lp_devname, portp->lp_port,
1821 		    lacp_receive_str[oldstate],
1822 		    lacp_receive_str[pl->sm.receive_state]));
1823 	}
1824 
1825 	switch (pl->sm.receive_state) {
1826 	case LACP_INITIALIZE:
1827 		pl->sm.selected = AGGR_UNSELECTED;
1828 		record_Default(portp);
1829 		pl->ActorOperPortState.bit.expired = B_FALSE;
1830 		pl->sm.port_moved = B_FALSE;
1831 		pl->sm.receive_state = LACP_PORT_DISABLED;
1832 		pl->sm.begin = B_FALSE;
1833 		lacp_receive_sm(portp, NULL);
1834 		break;
1835 
1836 	case LACP_PORT_DISABLED:
1837 		pl->PartnerOperPortState.bit.sync = B_FALSE;
1838 		/*
1839 		 * Stop current_while_timer in case
1840 		 * we got here from link down
1841 		 */
1842 		stop_current_while_timer(portp);
1843 
1844 		if (pl->sm.port_enabled && !pl->sm.lacp_enabled) {
1845 			pl->sm.receive_state = LACP_DISABLED;
1846 			lacp_receive_sm(portp, lacp);
1847 			/* We goto LACP_DISABLED state */
1848 			break;
1849 		} else if (pl->sm.port_enabled && pl->sm.lacp_enabled) {
1850 			pl->sm.receive_state = LACP_EXPIRED;
1851 			/*
1852 			 * FALL THROUGH TO LACP_EXPIRED CASE:
1853 			 * We have no way of knowing if we get into
1854 			 * lacp_receive_sm() from a  current_while_timer
1855 			 * expiring as it has never been kicked off yet!
1856 			 */
1857 		} else {
1858 			/* We stay in LACP_PORT_DISABLED state */
1859 			break;
1860 		}
1861 		/* LACP_PORT_DISABLED -> LACP_EXPIRED */
1862 		/* FALLTHROUGH */
1863 
1864 	case LACP_EXPIRED:
1865 		/*
1866 		 * Arrives here from LACP_PORT_DISABLED state as well as
1867 		 * as well as current_while_timer expiring.
1868 		 */
1869 		pl->PartnerOperPortState.bit.sync = B_FALSE;
1870 		pl->PartnerOperPortState.bit.timeout = B_TRUE;
1871 
1872 		pl->ActorOperPortState.bit.expired = B_TRUE;
1873 		start_current_while_timer(portp, SHORT_TIMEOUT_TIME);
1874 		lacp_periodic_sm(portp);
1875 		break;
1876 
1877 	case LACP_DISABLED:
1878 		/*
1879 		 * This is the normal state for recv_sm when LACP_OFF
1880 		 * is set or the NIC is in half duplex mode.
1881 		 */
1882 		pl->sm.selected = AGGR_UNSELECTED;
1883 		record_Default(portp);
1884 		pl->PartnerOperPortState.bit.aggregation = B_FALSE;
1885 		pl->ActorOperPortState.bit.expired = B_FALSE;
1886 		break;
1887 
1888 	case LACP_DEFAULTED:
1889 		/*
1890 		 * Current_while_timer expired a second time.
1891 		 */
1892 		update_default_selected(portp);
1893 		record_Default(portp);	/* overwrite Partner Oper val */
1894 		pl->ActorOperPortState.bit.expired = B_FALSE;
1895 		pl->PartnerOperPortState.bit.sync = B_TRUE;
1896 
1897 		lacp_selection_logic(portp);
1898 		lacp_mux_sm(portp);
1899 		break;
1900 
1901 	case LACP_CURRENT:
1902 		/*
1903 		 * Reception of LACPDU
1904 		 */
1905 
1906 		if (!lacp) /* no LACPDU so current_while_timer popped */
1907 			break;
1908 
1909 		AGGR_LACP_DBG(("lacp_receive_sm: (%s/%d): "
1910 		    "LACPDU received:\n", portp->lp_devname, portp->lp_port));
1911 
1912 		/*
1913 		 * Validate Actor_Information_Length,
1914 		 * Partner_Information_Length, Collector_Information_Length,
1915 		 * and Terminator_Length fields.
1916 		 */
1917 		if (!valid_lacp_pdu(portp, lacp)) {
1918 			AGGR_LACP_DBG(("lacp_receive_sm (%s/%d): "
1919 			    "Invalid LACPDU received\n",
1920 			    portp->lp_devname, portp->lp_port));
1921 			break;
1922 		}
1923 
1924 		save_activity = pl->PartnerOperPortState.bit.activity;
1925 		selected_updated = update_selected(portp, lacp);
1926 		update_NTT(portp, lacp);
1927 		sync_updated = record_PDU(portp, lacp);
1928 
1929 		pl->ActorOperPortState.bit.expired = B_FALSE;
1930 
1931 		if (selected_updated) {
1932 			lacp_selection_logic(portp);
1933 			lacp_mux_sm(portp);
1934 		} else if (sync_updated) {
1935 			lacp_mux_sm(portp);
1936 		}
1937 
1938 		/*
1939 		 * If the periodic timer value bit has been modified
1940 		 * or the partner activity bit has been changed then
1941 		 * we need to respectively:
1942 		 *  - restart the timer with the proper timeout value.
1943 		 *  - possibly enable/disable transmission of LACPDUs.
1944 		 */
1945 		if ((pl->PartnerOperPortState.bit.timeout &&
1946 		    (pl->periodic_timer.val != FAST_PERIODIC_TIME)) ||
1947 		    (!pl->PartnerOperPortState.bit.timeout &&
1948 		    (pl->periodic_timer.val != SLOW_PERIODIC_TIME)) ||
1949 		    (pl->PartnerOperPortState.bit.activity !=
1950 		    save_activity)) {
1951 			lacp_periodic_sm(portp);
1952 		}
1953 
1954 		stop_current_while_timer(portp);
1955 		/* Check if we need to transmit an LACPDU */
1956 		if (pl->NTT)
1957 			lacp_xmit_sm(portp);
1958 		start_current_while_timer(portp, 0);
1959 
1960 		break;
1961 	}
1962 }
1963 
1964 static void
1965 aggr_set_coll_dist(aggr_port_t *portp, boolean_t enable)
1966 {
1967 	rw_enter(&portp->lp_lock, RW_WRITER);
1968 	aggr_set_coll_dist_locked(portp, enable);
1969 	rw_exit(&portp->lp_lock);
1970 }
1971 
1972 static void
1973 aggr_set_coll_dist_locked(aggr_port_t *portp, boolean_t enable)
1974 {
1975 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
1976 
1977 	AGGR_LACP_DBG(("AGGR_SET_COLL_DIST_TYPE: (%s/%d) %s\n",
1978 	    portp->lp_devname, portp->lp_port,
1979 	    enable ? "ENABLED" : "DISABLED"));
1980 
1981 	if (!enable) {
1982 		/*
1983 		 * Turn OFF Collector_Distributor.
1984 		 */
1985 		portp->lp_collector_enabled = B_FALSE;
1986 		aggr_send_port_disable(portp);
1987 		return;
1988 	}
1989 
1990 	/*
1991 	 * Turn ON Collector_Distributor.
1992 	 */
1993 
1994 	if (!portp->lp_lacp.sm.lacp_on || (portp->lp_lacp.sm.lacp_on &&
1995 	    (portp->lp_lacp.sm.mux_state == LACP_COLLECTING_DISTRIBUTING))) {
1996 		/* Port is compatible and can be aggregated */
1997 		portp->lp_collector_enabled = B_TRUE;
1998 		aggr_send_port_enable(portp);
1999 	}
2000 }
2001 
2002 /*
2003  * Process a received Marker or LACPDU.
2004  */
2005 void
2006 aggr_lacp_rx(aggr_port_t *portp, mblk_t *dmp)
2007 {
2008 	lacp_t	*lacp;
2009 
2010 	dmp->b_rptr += sizeof (struct ether_header);
2011 
2012 	if (MBLKL(dmp) < sizeof (lacp_t)) {
2013 		freemsg(dmp);
2014 		return;
2015 	}
2016 
2017 	lacp = (lacp_t *)dmp->b_rptr;
2018 
2019 	switch (lacp->subtype) {
2020 	case LACP_SUBTYPE:
2021 		AGGR_LACP_DBG(("aggr_lacp_rx:(%s/%d): "
2022 		    "LACPDU received.\n", portp->lp_devname, portp->lp_port));
2023 
2024 		AGGR_LACP_LOCK(portp->lp_grp);
2025 		if (!portp->lp_lacp.sm.lacp_on) {
2026 			AGGR_LACP_UNLOCK(portp->lp_grp);
2027 			break;
2028 		}
2029 		lacp_receive_sm(portp, lacp);
2030 		AGGR_LACP_UNLOCK(portp->lp_grp);
2031 		break;
2032 
2033 	case MARKER_SUBTYPE:
2034 		AGGR_LACP_DBG(("aggr_lacp_rx:(%s%d): "
2035 		    "Marker Packet received.\n",
2036 		    portp->lp_devname, portp->lp_port));
2037 
2038 		(void) receive_marker_pdu(portp, dmp);
2039 		break;
2040 
2041 	default:
2042 		AGGR_LACP_DBG(("aggr_lacp_rx: (%s%d): "
2043 		    "Unknown Slow Protocol type %d\n",
2044 		    portp->lp_devname, portp->lp_port, lacp->subtype));
2045 		break;
2046 	}
2047 
2048 	freemsg(dmp);
2049 }
2050