xref: /illumos-gate/usr/src/uts/common/io/aggr/aggr_lacp.c (revision 4874e7c912c8252ba6f6cd406d7045e457cf168c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2017, Joyent, Inc.
24  * Copyright 2024 MNX Cloud, Inc.
25  */
26 
27 /*
28  * IEEE 802.3ad Link Aggregation - LACP & Marker Protocol processing.
29  */
30 
31 #include <sys/types.h>
32 #include <sys/sysmacros.h>
33 #include <sys/callb.h>
34 #include <sys/conf.h>
35 #include <sys/cmn_err.h>
36 #include <sys/disp.h>
37 #include <sys/list.h>
38 #include <sys/ksynch.h>
39 #include <sys/kmem.h>
40 #include <sys/stream.h>
41 #include <sys/modctl.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/atomic.h>
45 #include <sys/stat.h>
46 #include <sys/byteorder.h>
47 #include <sys/strsun.h>
48 #include <sys/isa_defs.h>
49 #include <sys/sdt.h>
50 
51 #include <sys/aggr.h>
52 #include <sys/aggr_impl.h>
53 
54 static struct ether_addr	etherzeroaddr = {
55 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00
56 };
57 
58 /*
59  * Slow_Protocol_Multicast address, as per IEEE 802.3ad spec.
60  */
61 static struct ether_addr   slow_multicast_addr = {
62 	0x01, 0x80, 0xc2, 0x00, 0x00, 0x02
63 };
64 
65 #ifdef DEBUG
66 /* LACP state machine debugging support */
67 static uint32_t aggr_lacp_debug = 0;
68 #define	AGGR_LACP_DBG(x)	if (aggr_lacp_debug) { (void) printf x; }
69 #else
70 #define	AGGR_LACP_DBG(x)	{}
71 #endif /* DEBUG */
72 
73 #define	NSECS_PER_SEC   1000000000ll
74 
75 /* used by lacp_misconfig_walker() */
76 typedef struct lacp_misconfig_check_state_s {
77 	aggr_port_t *cs_portp;
78 	boolean_t cs_found;
79 } lacp_misconfig_check_state_t;
80 
81 static const char *lacp_receive_str[] = LACP_RECEIVE_STATE_STRINGS;
82 static const char *lacp_periodic_str[] = LACP_PERIODIC_STRINGS;
83 static const char *lacp_mux_str[] = LACP_MUX_STRINGS;
84 
85 static uint16_t lacp_port_priority = 0x1000;
86 static uint16_t lacp_system_priority = 0x1000;
87 
88 /*
89  * Maintains a list of all ports in ATTACHED state. This information
90  * is used to detect misconfiguration.
91  */
92 typedef struct lacp_sel_ports {
93 	datalink_id_t sp_grp_linkid;
94 	datalink_id_t sp_linkid;
95 	/* Note: sp_partner_system must be 2-byte aligned */
96 	struct ether_addr sp_partner_system;
97 	uint32_t sp_partner_key;
98 	struct lacp_sel_ports *sp_next;
99 } lacp_sel_ports_t;
100 
101 static lacp_sel_ports_t *sel_ports = NULL;
102 static kmutex_t lacp_sel_lock;
103 
104 static void periodic_timer_pop(void *);
105 static void periodic_timer_pop_handler(aggr_port_t *);
106 static void lacp_xmit_sm(aggr_port_t *);
107 static void lacp_periodic_sm(aggr_port_t *);
108 static void fill_lacp_pdu(aggr_port_t *, lacp_t *);
109 static void fill_lacp_ether(aggr_port_t *, struct ether_header *);
110 static void lacp_on(aggr_port_t *);
111 static void lacp_off(aggr_port_t *);
112 static boolean_t valid_lacp_pdu(aggr_port_t *, lacp_t *);
113 static void lacp_receive_sm(aggr_port_t *, lacp_t *);
114 static void aggr_set_coll_dist(aggr_port_t *, boolean_t);
115 static void start_wait_while_timer(aggr_port_t *);
116 static void stop_wait_while_timer(aggr_port_t *);
117 static void lacp_reset_port(aggr_port_t *);
118 static void stop_current_while_timer(aggr_port_t *);
119 static void current_while_timer_pop(void *);
120 static void current_while_timer_pop_handler(aggr_port_t *);
121 static void update_default_selected(aggr_port_t *);
122 static boolean_t update_selected(aggr_port_t *, lacp_t *);
123 static boolean_t lacp_sel_ports_add(aggr_port_t *);
124 static void lacp_sel_ports_del(aggr_port_t *);
125 static void wait_while_timer_pop(void *);
126 static void wait_while_timer_pop_handler(aggr_port_t *);
127 
128 void
aggr_lacp_init(void)129 aggr_lacp_init(void)
130 {
131 	mutex_init(&lacp_sel_lock, NULL, MUTEX_DEFAULT, NULL);
132 }
133 
134 void
aggr_lacp_fini(void)135 aggr_lacp_fini(void)
136 {
137 	mutex_destroy(&lacp_sel_lock);
138 }
139 
140 /*
141  * The following functions are used for handling LACP timers.
142  *
143  * Note that we cannot fully rely on the aggr's mac perimeter in the timeout
144  * handler routine, otherwise it may cause deadlock with the untimeout() call
145  * which is usually called with the mac perimeter held. Instead, a
146  * lacp_timer_lock mutex is introduced, which protects a bitwise flag
147  * (lacp_timer_bits). This flag is set/cleared by timeout()/stop_timer()
148  * routines and is checked by a dedicated thread, that executes the real
149  * timeout operation.
150  */
151 static void
aggr_port_timer_thread(void * arg)152 aggr_port_timer_thread(void *arg)
153 {
154 	aggr_port_t		*port = arg;
155 	aggr_lacp_port_t	*pl = &port->lp_lacp;
156 	aggr_grp_t		*grp = port->lp_grp;
157 	uint32_t		lacp_timer_bits;
158 	mac_perim_handle_t	mph;
159 	callb_cpr_t		cprinfo;
160 
161 	CALLB_CPR_INIT(&cprinfo, &pl->lacp_timer_lock, callb_generic_cpr,
162 	    "aggr_port_timer_thread");
163 
164 	mutex_enter(&pl->lacp_timer_lock);
165 
166 	for (;;) {
167 
168 		if ((lacp_timer_bits = pl->lacp_timer_bits) == 0) {
169 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
170 			cv_wait(&pl->lacp_timer_cv, &pl->lacp_timer_lock);
171 			CALLB_CPR_SAFE_END(&cprinfo, &pl->lacp_timer_lock);
172 			continue;
173 		}
174 		pl->lacp_timer_bits = 0;
175 
176 		if (lacp_timer_bits & LACP_THREAD_EXIT)
177 			break;
178 
179 		if (lacp_timer_bits & LACP_PERIODIC_TIMEOUT)
180 			pl->periodic_timer.id = 0;
181 		if (lacp_timer_bits & LACP_WAIT_WHILE_TIMEOUT)
182 			pl->wait_while_timer.id = 0;
183 		if (lacp_timer_bits & LACP_CURRENT_WHILE_TIMEOUT)
184 			pl->current_while_timer.id = 0;
185 
186 		mutex_exit(&pl->lacp_timer_lock);
187 
188 		mac_perim_enter_by_mh(grp->lg_mh, &mph);
189 		if (port->lp_closing) {
190 			mac_perim_exit(mph);
191 			mutex_enter(&pl->lacp_timer_lock);
192 			break;
193 		}
194 
195 		if (lacp_timer_bits & LACP_PERIODIC_TIMEOUT)
196 			periodic_timer_pop_handler(port);
197 		if (lacp_timer_bits & LACP_WAIT_WHILE_TIMEOUT)
198 			wait_while_timer_pop_handler(port);
199 		if (lacp_timer_bits & LACP_CURRENT_WHILE_TIMEOUT)
200 			current_while_timer_pop_handler(port);
201 		mac_perim_exit(mph);
202 
203 		mutex_enter(&pl->lacp_timer_lock);
204 		if (pl->lacp_timer_bits & LACP_THREAD_EXIT)
205 			break;
206 	}
207 
208 	pl->lacp_timer_bits = 0;
209 	pl->lacp_timer_thread = NULL;
210 	cv_broadcast(&pl->lacp_timer_cv);
211 
212 	/* CALLB_CPR_EXIT drops the lock */
213 	CALLB_CPR_EXIT(&cprinfo);
214 
215 	/*
216 	 * Release the reference of the grp so aggr_grp_delete() can call
217 	 * mac_unregister() safely.
218 	 */
219 	aggr_grp_port_rele(port);
220 	thread_exit();
221 }
222 
223 /*
224  * Set the port LACP state to SELECTED. Returns B_FALSE if the operation
225  * could not be performed due to a memory allocation error, B_TRUE otherwise.
226  */
227 static boolean_t
lacp_port_select(aggr_port_t * portp)228 lacp_port_select(aggr_port_t *portp)
229 {
230 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
231 
232 	if (!lacp_sel_ports_add(portp))
233 		return (B_FALSE);
234 	portp->lp_lacp.sm.selected = AGGR_SELECTED;
235 	return (B_TRUE);
236 }
237 
238 /*
239  * Set the port LACP state to UNSELECTED.
240  */
241 static void
lacp_port_unselect(aggr_port_t * portp)242 lacp_port_unselect(aggr_port_t *portp)
243 {
244 	aggr_grp_t	*grp = portp->lp_grp;
245 
246 	ASSERT((grp->lg_mh == NULL) || MAC_PERIM_HELD(grp->lg_mh));
247 
248 	lacp_sel_ports_del(portp);
249 	portp->lp_lacp.sm.selected = AGGR_UNSELECTED;
250 }
251 
252 /*
253  * Initialize group specific LACP state and parameters.
254  */
255 void
aggr_lacp_init_grp(aggr_grp_t * aggrp)256 aggr_lacp_init_grp(aggr_grp_t *aggrp)
257 {
258 	aggrp->aggr.PeriodicTimer = AGGR_LACP_TIMER_SHORT;
259 	aggrp->aggr.ActorSystemPriority = (uint16_t)lacp_system_priority;
260 	aggrp->aggr.CollectorMaxDelay = 10;
261 	aggrp->lg_lacp_mode = AGGR_LACP_OFF;
262 	aggrp->aggr.ready = B_FALSE;
263 }
264 
265 /*
266  * Complete LACP info initialization at port creation time.
267  */
268 void
aggr_lacp_init_port(aggr_port_t * portp)269 aggr_lacp_init_port(aggr_port_t *portp)
270 {
271 	aggr_grp_t *aggrp = portp->lp_grp;
272 	aggr_lacp_port_t *pl = &portp->lp_lacp;
273 
274 	ASSERT(aggrp->lg_mh == NULL || MAC_PERIM_HELD(aggrp->lg_mh));
275 	ASSERT(MAC_PERIM_HELD(portp->lp_mh));
276 
277 	/* actor port # */
278 	pl->ActorPortNumber = portp->lp_portid;
279 	AGGR_LACP_DBG(("aggr_lacp_init_port(%d): "
280 	    "ActorPortNumber = 0x%x\n", portp->lp_linkid,
281 	    pl->ActorPortNumber));
282 
283 	pl->ActorPortPriority = (uint16_t)lacp_port_priority;
284 	pl->ActorPortAggrId = 0;	/* aggregator id - not used */
285 	pl->NTT = B_FALSE;			/* need to transmit */
286 
287 	pl->ActorAdminPortKey = aggrp->lg_key;
288 	pl->ActorOperPortKey = pl->ActorAdminPortKey;
289 	AGGR_LACP_DBG(("aggr_lacp_init_port(%d) "
290 	    "ActorAdminPortKey = 0x%x, ActorAdminPortKey = 0x%x\n",
291 	    portp->lp_linkid, pl->ActorAdminPortKey, pl->ActorOperPortKey));
292 
293 	/* Actor admin. port state */
294 	pl->ActorAdminPortState.bit.activity = B_FALSE;
295 	pl->ActorAdminPortState.bit.timeout = B_TRUE;
296 	pl->ActorAdminPortState.bit.aggregation = B_TRUE;
297 	pl->ActorAdminPortState.bit.sync = B_FALSE;
298 	pl->ActorAdminPortState.bit.collecting = B_FALSE;
299 	pl->ActorAdminPortState.bit.distributing = B_FALSE;
300 	pl->ActorAdminPortState.bit.defaulted = B_FALSE;
301 	pl->ActorAdminPortState.bit.expired = B_FALSE;
302 	pl->ActorOperPortState = pl->ActorAdminPortState;
303 
304 	/*
305 	 * Partner Administrative Information
306 	 * (All initialized to zero except for the following)
307 	 * Fast Timeouts.
308 	 */
309 	pl->PartnerAdminPortState.bit.timeout =
310 	    pl->PartnerOperPortState.bit.timeout = B_TRUE;
311 
312 	pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */
313 
314 	/*
315 	 * State machine information.
316 	 */
317 	pl->sm.lacp_on = B_FALSE;		/* LACP Off default */
318 	pl->sm.begin = B_TRUE;		/* Prevents transmissions */
319 	pl->sm.lacp_enabled = B_FALSE;
320 	pl->sm.port_enabled = B_FALSE;		/* Link Down */
321 	pl->sm.actor_churn = B_FALSE;
322 	pl->sm.partner_churn = B_FALSE;
323 	pl->sm.ready_n = B_FALSE;
324 	pl->sm.port_moved = B_FALSE;
325 
326 	lacp_port_unselect(portp);
327 
328 	pl->sm.periodic_state = LACP_NO_PERIODIC;
329 	pl->sm.receive_state = LACP_INITIALIZE;
330 	pl->sm.mux_state = LACP_DETACHED;
331 	pl->sm.churn_state = LACP_NO_ACTOR_CHURN;
332 
333 	/*
334 	 * Timer information.
335 	 */
336 	pl->current_while_timer.id = 0;
337 	pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
338 
339 	pl->periodic_timer.id = 0;
340 	pl->periodic_timer.val = FAST_PERIODIC_TIME;
341 
342 	pl->wait_while_timer.id = 0;
343 	pl->wait_while_timer.val = AGGREGATE_WAIT_TIME;
344 
345 	pl->lacp_timer_bits = 0;
346 
347 	mutex_init(&pl->lacp_timer_lock, NULL, MUTEX_DRIVER, NULL);
348 	cv_init(&pl->lacp_timer_cv, NULL, CV_DRIVER, NULL);
349 
350 	pl->lacp_timer_thread = thread_create(NULL, 0, aggr_port_timer_thread,
351 	    portp, 0, &p0, TS_RUN, minclsyspri);
352 
353 	/*
354 	 * Hold a reference of the grp and the port and this reference will
355 	 * be release when the thread exits.
356 	 *
357 	 * The reference on the port is used for aggr_port_delete() to
358 	 * continue without waiting for the thread to exit; the reference
359 	 * on the grp is used for aggr_grp_delete() to wait for the thread
360 	 * to exit before calling mac_unregister().
361 	 */
362 	aggr_grp_port_hold(portp);
363 }
364 
365 /*
366  * Port initialization when we need to
367  * turn LACP on/off, etc. Not everything is
368  * reset like in the above routine.
369  *		Do NOT modify things like link status.
370  */
371 static void
lacp_reset_port(aggr_port_t * portp)372 lacp_reset_port(aggr_port_t *portp)
373 {
374 	aggr_lacp_port_t *pl = &portp->lp_lacp;
375 
376 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
377 
378 	pl->NTT = B_FALSE;			/* need to transmit */
379 
380 	/* reset operational port state */
381 	pl->ActorOperPortState.bit.timeout =
382 	    pl->ActorAdminPortState.bit.timeout;
383 
384 	pl->ActorOperPortState.bit.sync = B_FALSE;
385 	pl->ActorOperPortState.bit.collecting = B_FALSE;
386 	pl->ActorOperPortState.bit.distributing = B_FALSE;
387 	pl->ActorOperPortState.bit.defaulted = B_TRUE;
388 	pl->ActorOperPortState.bit.expired = B_FALSE;
389 
390 	pl->PartnerOperPortState.bit.timeout = B_TRUE;	/* fast t/o */
391 	pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */
392 
393 	/*
394 	 * State machine information.
395 	 */
396 	pl->sm.begin = B_TRUE;		/* Prevents transmissions */
397 	pl->sm.actor_churn = B_FALSE;
398 	pl->sm.partner_churn = B_FALSE;
399 	pl->sm.ready_n = B_FALSE;
400 
401 	lacp_port_unselect(portp);
402 
403 	pl->sm.periodic_state = LACP_NO_PERIODIC;
404 	pl->sm.receive_state = LACP_INITIALIZE;
405 	pl->sm.mux_state = LACP_DETACHED;
406 	pl->sm.churn_state = LACP_NO_ACTOR_CHURN;
407 
408 	/*
409 	 * Timer information.
410 	 */
411 	pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
412 	pl->periodic_timer.val = FAST_PERIODIC_TIME;
413 }
414 
415 static void
aggr_lacp_mcast_on(aggr_port_t * port)416 aggr_lacp_mcast_on(aggr_port_t *port)
417 {
418 	ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh));
419 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
420 
421 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
422 		return;
423 
424 	(void) aggr_port_multicst(port, B_TRUE,
425 	    (uchar_t *)&slow_multicast_addr);
426 }
427 
428 static void
aggr_lacp_mcast_off(aggr_port_t * port)429 aggr_lacp_mcast_off(aggr_port_t *port)
430 {
431 	ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh));
432 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
433 
434 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
435 		return;
436 
437 	(void) aggr_port_multicst(port, B_FALSE,
438 	    (uchar_t *)&slow_multicast_addr);
439 }
440 
441 static void
start_periodic_timer(aggr_port_t * portp)442 start_periodic_timer(aggr_port_t *portp)
443 {
444 	aggr_lacp_port_t *pl = &portp->lp_lacp;
445 
446 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
447 
448 	mutex_enter(&pl->lacp_timer_lock);
449 	if (pl->periodic_timer.id == 0) {
450 		pl->periodic_timer.id = timeout(periodic_timer_pop, portp,
451 		    drv_usectohz(1000000 * portp->lp_lacp.periodic_timer.val));
452 	}
453 	mutex_exit(&pl->lacp_timer_lock);
454 }
455 
456 static void
stop_periodic_timer(aggr_port_t * portp)457 stop_periodic_timer(aggr_port_t *portp)
458 {
459 	aggr_lacp_port_t *pl = &portp->lp_lacp;
460 	timeout_id_t id;
461 
462 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
463 
464 	mutex_enter(&pl->lacp_timer_lock);
465 	if ((id = pl->periodic_timer.id) != 0) {
466 		pl->lacp_timer_bits &= ~LACP_PERIODIC_TIMEOUT;
467 		pl->periodic_timer.id = 0;
468 	}
469 	mutex_exit(&pl->lacp_timer_lock);
470 
471 	if (id != 0)
472 		(void) untimeout(id);
473 }
474 
475 /*
476  * When the timer pops, we arrive here to
477  * clear out LACPDU count as well as transmit an
478  * LACPDU. We then set the periodic state and let
479  * the periodic state machine restart the timer.
480  */
481 static void
periodic_timer_pop(void * data)482 periodic_timer_pop(void *data)
483 {
484 	aggr_port_t *portp = data;
485 	aggr_lacp_port_t *pl = &portp->lp_lacp;
486 
487 	mutex_enter(&pl->lacp_timer_lock);
488 	pl->lacp_timer_bits |= LACP_PERIODIC_TIMEOUT;
489 	cv_broadcast(&pl->lacp_timer_cv);
490 	mutex_exit(&pl->lacp_timer_lock);
491 }
492 
493 /*
494  * When the timer pops, we arrive here to
495  * clear out LACPDU count as well as transmit an
496  * LACPDU. We then set the periodic state and let
497  * the periodic state machine restart the timer.
498  */
499 static void
periodic_timer_pop_handler(aggr_port_t * portp)500 periodic_timer_pop_handler(aggr_port_t *portp)
501 {
502 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
503 
504 	portp->lp_lacp_stats.LACPDUsTx = 0;
505 
506 	/* current timestamp */
507 	portp->lp_lacp.time = gethrtime();
508 	portp->lp_lacp.NTT = B_TRUE;
509 	lacp_xmit_sm(portp);
510 
511 	/*
512 	 * Set Periodic State machine state based on the
513 	 * value of the Partner Operation Port State timeout
514 	 * bit.
515 	 */
516 	if (portp->lp_lacp.PartnerOperPortState.bit.timeout) {
517 		portp->lp_lacp.periodic_timer.val = FAST_PERIODIC_TIME;
518 		portp->lp_lacp.sm.periodic_state = LACP_FAST_PERIODIC;
519 	} else {
520 		portp->lp_lacp.periodic_timer.val = SLOW_PERIODIC_TIME;
521 		portp->lp_lacp.sm.periodic_state = LACP_SLOW_PERIODIC;
522 	}
523 
524 	lacp_periodic_sm(portp);
525 }
526 
527 /*
528  * Invoked from:
529  *	- startup upon aggregation
530  *	- when the periodic timer pops
531  *	- when the periodic timer value is changed
532  *	- when the port is attached or detached
533  *	- when LACP mode is changed.
534  */
535 static void
lacp_periodic_sm(aggr_port_t * portp)536 lacp_periodic_sm(aggr_port_t *portp)
537 {
538 	lacp_periodic_state_t oldstate = portp->lp_lacp.sm.periodic_state;
539 	aggr_lacp_port_t *pl = &portp->lp_lacp;
540 
541 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
542 
543 	/* LACP_OFF state not in specification so check here.  */
544 	if (!pl->sm.lacp_on) {
545 		/* Stop timer whether it is running or not */
546 		stop_periodic_timer(portp);
547 		pl->sm.periodic_state = LACP_NO_PERIODIC;
548 		pl->NTT = B_FALSE;
549 		AGGR_LACP_DBG(("lacp_periodic_sm(%d):NO LACP "
550 		    "%s--->%s\n", portp->lp_linkid,
551 		    lacp_periodic_str[oldstate],
552 		    lacp_periodic_str[pl->sm.periodic_state]));
553 		return;
554 	}
555 
556 	if (pl->sm.begin || !pl->sm.lacp_enabled ||
557 	    !pl->sm.port_enabled ||
558 	    (!pl->ActorOperPortState.bit.activity &&
559 	    !pl->PartnerOperPortState.bit.activity)) {
560 
561 		/* Stop timer whether it is running or not */
562 		stop_periodic_timer(portp);
563 		pl->sm.periodic_state = LACP_NO_PERIODIC;
564 		pl->NTT = B_FALSE;
565 		AGGR_LACP_DBG(("lacp_periodic_sm(%d):STOP %s--->%s\n",
566 		    portp->lp_linkid, lacp_periodic_str[oldstate],
567 		    lacp_periodic_str[pl->sm.periodic_state]));
568 		return;
569 	}
570 
571 	/*
572 	 * Startup with FAST_PERIODIC_TIME if no previous LACPDU
573 	 * has been received. Then after we timeout, then it is
574 	 * possible to go to SLOW_PERIODIC_TIME.
575 	 */
576 	if (pl->sm.periodic_state == LACP_NO_PERIODIC) {
577 		pl->periodic_timer.val = FAST_PERIODIC_TIME;
578 		pl->sm.periodic_state = LACP_FAST_PERIODIC;
579 	} else if ((pl->sm.periodic_state == LACP_SLOW_PERIODIC) &&
580 	    pl->PartnerOperPortState.bit.timeout) {
581 		/*
582 		 * If we receive a bit indicating we are going to
583 		 * fast periodic from slow periodic, stop the timer
584 		 * and let the periodic_timer_pop routine deal
585 		 * with reseting the periodic state and transmitting
586 		 * a LACPDU.
587 		 */
588 		stop_periodic_timer(portp);
589 		periodic_timer_pop_handler(portp);
590 	}
591 
592 	/* Rearm timer with value provided by partner */
593 	start_periodic_timer(portp);
594 }
595 
596 /*
597  * This routine transmits an LACPDU if lacp_enabled
598  * is TRUE and if NTT is set.
599  */
600 static void
lacp_xmit_sm(aggr_port_t * portp)601 lacp_xmit_sm(aggr_port_t *portp)
602 {
603 	aggr_lacp_port_t *pl = &portp->lp_lacp;
604 	size_t	len;
605 	mblk_t  *mp;
606 	hrtime_t now, elapsed;
607 
608 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
609 
610 	/* LACP_OFF state not in specification so check here.  */
611 	if (!pl->sm.lacp_on || !pl->NTT)
612 		return;
613 
614 	/*
615 	 * Do nothing if LACP has been turned off or if the
616 	 * periodic state machine is not enabled.
617 	 */
618 	if ((pl->sm.periodic_state == LACP_NO_PERIODIC) ||
619 	    !pl->sm.lacp_enabled || pl->sm.begin) {
620 		pl->NTT = B_FALSE;
621 		return;
622 	}
623 
624 	/*
625 	 * If we have sent 5 Slow packets in the last second, avoid
626 	 * sending any more here. No more than three LACPDUs may be transmitted
627 	 * in any Fast_Periodic_Time interval.
628 	 */
629 	if (portp->lp_lacp_stats.LACPDUsTx >= 3) {
630 		/*
631 		 * Grab the current time value and see if
632 		 * more than 1 second has passed. If so,
633 		 * reset the timestamp and clear the count.
634 		 */
635 		now = gethrtime();
636 		elapsed = now - pl->time;
637 		if (elapsed > NSECS_PER_SEC) {
638 			portp->lp_lacp_stats.LACPDUsTx = 0;
639 			pl->time = now;
640 		} else {
641 			return;
642 		}
643 	}
644 
645 	len = sizeof (lacp_t) + sizeof (struct ether_header);
646 	mp = allocb(len, BPRI_MED);
647 	if (mp == NULL)
648 		return;
649 
650 	mp->b_wptr = mp->b_rptr + len;
651 	bzero(mp->b_rptr, len);
652 
653 	fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr);
654 	fill_lacp_pdu(portp,
655 	    (lacp_t *)(mp->b_rptr + sizeof (struct ether_header)));
656 
657 	/* Send the packet over the first TX ring */
658 	mp = mac_hwring_send_priv(portp->lp_mch, portp->lp_tx_rings[0], mp);
659 	if (mp != NULL)
660 		freemsg(mp);
661 
662 	pl->NTT = B_FALSE;
663 	portp->lp_lacp_stats.LACPDUsTx++;
664 }
665 
666 /*
667  * Initialize the ethernet header of a LACP packet sent from the specified
668  * port.
669  */
670 static void
fill_lacp_ether(aggr_port_t * port,struct ether_header * ether)671 fill_lacp_ether(aggr_port_t *port, struct ether_header *ether)
672 {
673 	bcopy(port->lp_addr, (uint8_t *)&(ether->ether_shost), ETHERADDRL);
674 	bcopy(&slow_multicast_addr, (uint8_t *)&(ether->ether_dhost),
675 	    ETHERADDRL);
676 	ether->ether_type = htons(ETHERTYPE_SLOW);
677 }
678 
679 static void
fill_lacp_pdu(aggr_port_t * portp,lacp_t * lacp)680 fill_lacp_pdu(aggr_port_t *portp, lacp_t *lacp)
681 {
682 	aggr_lacp_port_t *pl = &portp->lp_lacp;
683 	aggr_grp_t *aggrp = portp->lp_grp;
684 	mac_perim_handle_t pmph;
685 
686 	ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
687 	mac_perim_enter_by_mh(portp->lp_mh, &pmph);
688 
689 	lacp->subtype = LACP_SUBTYPE;
690 	lacp->version = LACP_VERSION;
691 
692 	/*
693 	 * Actor Information
694 	 */
695 	lacp->actor_info.tlv_type = ACTOR_TLV;
696 	lacp->actor_info.information_len = sizeof (link_info_t);
697 	lacp->actor_info.system_priority =
698 	    htons(aggrp->aggr.ActorSystemPriority);
699 	bcopy(aggrp->lg_addr, (uchar_t *)&lacp->actor_info.system_id,
700 	    ETHERADDRL);
701 	lacp->actor_info.key = htons(pl->ActorOperPortKey);
702 	lacp->actor_info.port_priority = htons(pl->ActorPortPriority);
703 	lacp->actor_info.port = htons(pl->ActorPortNumber);
704 	lacp->actor_info.state.state = pl->ActorOperPortState.state;
705 
706 	/*
707 	 * Partner Information
708 	 */
709 	lacp->partner_info.tlv_type = PARTNER_TLV;
710 	lacp->partner_info.information_len = sizeof (link_info_t);
711 	lacp->partner_info.system_priority =
712 	    htons(pl->PartnerOperSysPriority);
713 	lacp->partner_info.system_id = pl->PartnerOperSystem;
714 	lacp->partner_info.key = htons(pl->PartnerOperKey);
715 	lacp->partner_info.port_priority =
716 	    htons(pl->PartnerOperPortPriority);
717 	lacp->partner_info.port = htons(pl->PartnerOperPortNum);
718 	lacp->partner_info.state.state = pl->PartnerOperPortState.state;
719 
720 	/* Collector Information */
721 	lacp->tlv_collector = COLLECTOR_TLV;
722 	lacp->collector_len = 0x10;
723 	lacp->collector_max_delay = htons(aggrp->aggr.CollectorMaxDelay);
724 
725 	/* Termination Information */
726 	lacp->tlv_terminator = TERMINATOR_TLV;
727 	lacp->terminator_len = 0x0;
728 
729 	mac_perim_exit(pmph);
730 }
731 
732 /*
733  * lacp_mux_sm - LACP mux state machine
734  *		This state machine is invoked from:
735  *			- startup upon aggregation
736  *			- from the Selection logic
737  *			- when the wait_while_timer pops
738  *			- when the aggregation MAC address is changed
739  *			- when receiving DL_NOTE_LINK_UP/DOWN
740  *			- when receiving DL_NOTE_AGGR_AVAIL/UNAVAIL
741  *			- when LACP mode is changed.
742  *			- when a DL_NOTE_SPEED is received
743  */
744 static void
lacp_mux_sm(aggr_port_t * portp)745 lacp_mux_sm(aggr_port_t *portp)
746 {
747 	aggr_grp_t *aggrp = portp->lp_grp;
748 	boolean_t NTT_updated = B_FALSE;
749 	aggr_lacp_port_t *pl = &portp->lp_lacp;
750 	lacp_mux_state_t oldstate = pl->sm.mux_state;
751 
752 	ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
753 
754 	/* LACP_OFF state not in specification so check here.  */
755 	if (!pl->sm.lacp_on) {
756 		pl->sm.mux_state = LACP_DETACHED;
757 		pl->ActorOperPortState.bit.sync = B_FALSE;
758 
759 		if (pl->ActorOperPortState.bit.collecting ||
760 		    pl->ActorOperPortState.bit.distributing) {
761 			AGGR_LACP_DBG(("trunk link: (%d): "
762 			    "Collector_Distributor Disabled.\n",
763 			    portp->lp_linkid));
764 		}
765 
766 		pl->ActorOperPortState.bit.collecting =
767 		    pl->ActorOperPortState.bit.distributing = B_FALSE;
768 		return;
769 	}
770 
771 	if (pl->sm.begin || !pl->sm.lacp_enabled)
772 		pl->sm.mux_state = LACP_DETACHED;
773 
774 again:
775 	/* determine next state, or return if state unchanged */
776 	switch (pl->sm.mux_state) {
777 	case LACP_DETACHED:
778 		if (pl->sm.begin) {
779 			break;
780 		}
781 
782 		if ((pl->sm.selected == AGGR_SELECTED) ||
783 		    (pl->sm.selected == AGGR_STANDBY)) {
784 			pl->sm.mux_state = LACP_WAITING;
785 			break;
786 		}
787 		return;
788 
789 	case LACP_WAITING:
790 		if (pl->sm.selected == AGGR_UNSELECTED) {
791 			pl->sm.mux_state = LACP_DETACHED;
792 			break;
793 		}
794 
795 		if ((pl->sm.selected == AGGR_SELECTED) && aggrp->aggr.ready) {
796 			pl->sm.mux_state = LACP_ATTACHED;
797 			break;
798 		}
799 		return;
800 
801 	case LACP_ATTACHED:
802 		if ((pl->sm.selected == AGGR_UNSELECTED) ||
803 		    (pl->sm.selected == AGGR_STANDBY)) {
804 			pl->sm.mux_state = LACP_DETACHED;
805 			break;
806 		}
807 
808 		if ((pl->sm.selected == AGGR_SELECTED) &&
809 		    pl->PartnerOperPortState.bit.sync) {
810 			pl->sm.mux_state = LACP_COLLECTING_DISTRIBUTING;
811 			break;
812 		}
813 		return;
814 
815 	case LACP_COLLECTING_DISTRIBUTING:
816 		if ((pl->sm.selected == AGGR_UNSELECTED) ||
817 		    (pl->sm.selected == AGGR_STANDBY) ||
818 		    !pl->PartnerOperPortState.bit.sync) {
819 			pl->sm.mux_state = LACP_ATTACHED;
820 			break;
821 		}
822 		return;
823 	}
824 
825 	AGGR_LACP_DBG(("lacp_mux_sm(%d):%s--->%s\n",
826 	    portp->lp_linkid, lacp_mux_str[oldstate],
827 	    lacp_mux_str[pl->sm.mux_state]));
828 
829 	/* perform actions on entering a new state */
830 	switch (pl->sm.mux_state) {
831 	case LACP_DETACHED:
832 		if (pl->ActorOperPortState.bit.collecting ||
833 		    pl->ActorOperPortState.bit.distributing) {
834 			AGGR_LACP_DBG(("trunk link: (%d): "
835 			    "Collector_Distributor Disabled.\n",
836 			    portp->lp_linkid));
837 		}
838 
839 		pl->ActorOperPortState.bit.sync =
840 		    pl->ActorOperPortState.bit.collecting = B_FALSE;
841 
842 		/* Turn OFF Collector_Distributor */
843 		aggr_set_coll_dist(portp, B_FALSE);
844 
845 		pl->ActorOperPortState.bit.distributing = B_FALSE;
846 		NTT_updated = B_TRUE;
847 		break;
848 
849 	case LACP_WAITING:
850 		start_wait_while_timer(portp);
851 		break;
852 
853 	case LACP_ATTACHED:
854 		if (pl->ActorOperPortState.bit.collecting ||
855 		    pl->ActorOperPortState.bit.distributing) {
856 			AGGR_LACP_DBG(("trunk link: (%d): "
857 			    "Collector_Distributor Disabled.\n",
858 			    portp->lp_linkid));
859 		}
860 
861 		pl->ActorOperPortState.bit.sync = B_TRUE;
862 		pl->ActorOperPortState.bit.collecting = B_FALSE;
863 
864 		/* Turn OFF Collector_Distributor */
865 		aggr_set_coll_dist(portp, B_FALSE);
866 
867 		pl->ActorOperPortState.bit.distributing = B_FALSE;
868 		NTT_updated = B_TRUE;
869 		if (pl->PartnerOperPortState.bit.sync) {
870 			/*
871 			 * We had already received an updated sync from
872 			 * the partner. Attempt to transition to
873 			 * collecting/distributing now.
874 			 */
875 			goto again;
876 		}
877 		break;
878 
879 	case LACP_COLLECTING_DISTRIBUTING:
880 		if (!pl->ActorOperPortState.bit.collecting &&
881 		    !pl->ActorOperPortState.bit.distributing) {
882 			AGGR_LACP_DBG(("trunk link: (%d): "
883 			    "Collector_Distributor Enabled.\n",
884 			    portp->lp_linkid));
885 		}
886 		pl->ActorOperPortState.bit.distributing = B_TRUE;
887 
888 		/* Turn Collector_Distributor back ON */
889 		aggr_set_coll_dist(portp, B_TRUE);
890 
891 		pl->ActorOperPortState.bit.collecting = B_TRUE;
892 		NTT_updated = B_TRUE;
893 		break;
894 	}
895 
896 	/*
897 	 * If we updated the state of the NTT variable, then
898 	 * initiate a LACPDU transmission.
899 	 */
900 	if (NTT_updated) {
901 		pl->NTT = B_TRUE;
902 		lacp_xmit_sm(portp);
903 	}
904 } /* lacp_mux_sm */
905 
906 
907 static int
receive_marker_pdu(aggr_port_t * portp,mblk_t * mp)908 receive_marker_pdu(aggr_port_t *portp, mblk_t *mp)
909 {
910 	marker_pdu_t		*markerp = (marker_pdu_t *)mp->b_rptr;
911 
912 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
913 
914 	AGGR_LACP_DBG(("trunk link: (%d): MARKER PDU received:\n",
915 	    portp->lp_linkid));
916 
917 	/* LACP_OFF state not in specification so check here.  */
918 	if (!portp->lp_lacp.sm.lacp_on)
919 		return (-1);
920 
921 	if (MBLKL(mp) < sizeof (marker_pdu_t))
922 		return (-1);
923 
924 	if (markerp->version != MARKER_VERSION) {
925 		AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: "
926 		    "version = %d does not match s/w version %d\n",
927 		    portp->lp_linkid, markerp->version, MARKER_VERSION));
928 		return (-1);
929 	}
930 
931 	if (markerp->tlv_marker == MARKER_RESPONSE_TLV) {
932 		/* We do not yet send out MARKER info PDUs */
933 		AGGR_LACP_DBG(("trunk link (%d): MARKER RESPONSE PDU: "
934 		    " MARKER TLV = %d - We don't send out info type!\n",
935 		    portp->lp_linkid, markerp->tlv_marker));
936 		return (-1);
937 	}
938 
939 	if (markerp->tlv_marker != MARKER_INFO_TLV) {
940 		AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: "
941 		    " MARKER TLV = %d \n", portp->lp_linkid,
942 		    markerp->tlv_marker));
943 		return (-1);
944 	}
945 
946 	if (markerp->marker_len != MARKER_INFO_RESPONSE_LENGTH) {
947 		AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: "
948 		    " MARKER length = %d \n", portp->lp_linkid,
949 		    markerp->marker_len));
950 		return (-1);
951 	}
952 
953 	if (markerp->requestor_port != portp->lp_lacp.PartnerOperPortNum) {
954 		AGGR_LACP_DBG(("trunk link (%d): MARKER PDU: "
955 		    " MARKER Port %d not equal to Partner port %d\n",
956 		    portp->lp_linkid, markerp->requestor_port,
957 		    portp->lp_lacp.PartnerOperPortNum));
958 		return (-1);
959 	}
960 
961 	if (ether_cmp(&markerp->system_id,
962 	    &portp->lp_lacp.PartnerOperSystem) != 0) {
963 		AGGR_LACP_DBG(("trunk link (%d): MARKER PDU: "
964 		    " MARKER MAC not equal to Partner MAC\n",
965 		    portp->lp_linkid));
966 		return (-1);
967 	}
968 
969 	/*
970 	 * Turn into Marker Response PDU
971 	 * and return mblk to sending system
972 	 */
973 	markerp->tlv_marker = MARKER_RESPONSE_TLV;
974 
975 	/* reuse the space that was used by received ethernet header */
976 	ASSERT(MBLKHEAD(mp) >= sizeof (struct ether_header));
977 	mp->b_rptr -= sizeof (struct ether_header);
978 	fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr);
979 	return (0);
980 }
981 
982 /*
983  * Update the LACP mode (off, active, or passive) of the specified group.
984  */
985 void
aggr_lacp_update_mode(aggr_grp_t * grp,aggr_lacp_mode_t mode)986 aggr_lacp_update_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode)
987 {
988 	aggr_lacp_mode_t old_mode = grp->lg_lacp_mode;
989 	aggr_port_t *port;
990 
991 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
992 	ASSERT(!grp->lg_closing);
993 
994 	if (mode == old_mode)
995 		return;
996 
997 	grp->lg_lacp_mode = mode;
998 
999 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1000 		port->lp_lacp.ActorAdminPortState.bit.activity =
1001 		    port->lp_lacp.ActorOperPortState.bit.activity =
1002 		    (mode == AGGR_LACP_ACTIVE);
1003 
1004 		if (old_mode == AGGR_LACP_OFF) {
1005 			/* OFF -> {PASSIVE,ACTIVE} */
1006 			/* turn OFF Collector_Distributor */
1007 			aggr_set_coll_dist(port, B_FALSE);
1008 			lacp_on(port);
1009 		} else if (mode == AGGR_LACP_OFF) {
1010 			/* {PASSIVE,ACTIVE} -> OFF */
1011 			lacp_off(port);
1012 			/* Turn ON Collector_Distributor */
1013 			aggr_set_coll_dist(port, B_TRUE);
1014 		} else {
1015 			/* PASSIVE->ACTIVE or ACTIVE->PASSIVE */
1016 			port->lp_lacp.sm.begin = B_TRUE;
1017 			lacp_mux_sm(port);
1018 			lacp_periodic_sm(port);
1019 
1020 			/* kick off state machines */
1021 			lacp_receive_sm(port, NULL);
1022 			lacp_mux_sm(port);
1023 		}
1024 	}
1025 }
1026 
1027 
1028 /*
1029  * Update the LACP timer (short or long) of the specified group.
1030  */
1031 void
aggr_lacp_update_timer(aggr_grp_t * grp,aggr_lacp_timer_t timer)1032 aggr_lacp_update_timer(aggr_grp_t *grp, aggr_lacp_timer_t timer)
1033 {
1034 	aggr_port_t *port;
1035 
1036 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1037 
1038 	if (timer == grp->aggr.PeriodicTimer)
1039 		return;
1040 
1041 	grp->aggr.PeriodicTimer = timer;
1042 
1043 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1044 		port->lp_lacp.ActorAdminPortState.bit.timeout =
1045 		    port->lp_lacp.ActorOperPortState.bit.timeout =
1046 		    (timer == AGGR_LACP_TIMER_SHORT);
1047 	}
1048 }
1049 
1050 void
aggr_port_lacp_set_mode(aggr_grp_t * grp,aggr_port_t * port)1051 aggr_port_lacp_set_mode(aggr_grp_t *grp, aggr_port_t *port)
1052 {
1053 	aggr_lacp_mode_t	mode;
1054 	aggr_lacp_timer_t	timer;
1055 
1056 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1057 
1058 	mode = grp->lg_lacp_mode;
1059 	timer = grp->aggr.PeriodicTimer;
1060 
1061 	port->lp_lacp.ActorAdminPortState.bit.activity =
1062 	    port->lp_lacp.ActorOperPortState.bit.activity =
1063 	    (mode == AGGR_LACP_ACTIVE);
1064 
1065 	port->lp_lacp.ActorAdminPortState.bit.timeout =
1066 	    port->lp_lacp.ActorOperPortState.bit.timeout =
1067 	    (timer == AGGR_LACP_TIMER_SHORT);
1068 
1069 	if (mode == AGGR_LACP_OFF) {
1070 		/* Turn ON Collector_Distributor */
1071 		aggr_set_coll_dist(port, B_TRUE);
1072 	} else { /* LACP_ACTIVE/PASSIVE */
1073 		lacp_on(port);
1074 	}
1075 }
1076 
1077 /*
1078  * Sets the initial LACP mode (off, active, passive) and LACP timer
1079  * (short, long) of the specified group.
1080  */
1081 void
aggr_lacp_set_mode(aggr_grp_t * grp,aggr_lacp_mode_t mode,aggr_lacp_timer_t timer)1082 aggr_lacp_set_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode,
1083     aggr_lacp_timer_t timer)
1084 {
1085 	aggr_port_t *port;
1086 
1087 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1088 
1089 	grp->lg_lacp_mode = mode;
1090 	grp->aggr.PeriodicTimer = timer;
1091 
1092 	for (port = grp->lg_ports; port != NULL; port = port->lp_next)
1093 		aggr_port_lacp_set_mode(grp, port);
1094 }
1095 
1096 /*
1097  * Verify that the Partner MAC and Key recorded by the specified
1098  * port are not found in other ports that are not part of our
1099  * aggregation. Returns B_TRUE if such a port is found, B_FALSE
1100  * otherwise.
1101  */
1102 static boolean_t
lacp_misconfig_check(aggr_port_t * portp)1103 lacp_misconfig_check(aggr_port_t *portp)
1104 {
1105 	aggr_grp_t *grp = portp->lp_grp;
1106 	lacp_sel_ports_t *cport;
1107 
1108 	mutex_enter(&lacp_sel_lock);
1109 
1110 	for (cport = sel_ports; cport != NULL; cport = cport->sp_next) {
1111 
1112 		/* skip entries of the group of the port being checked */
1113 		if (cport->sp_grp_linkid == grp->lg_linkid)
1114 			continue;
1115 
1116 		if ((ether_cmp(&cport->sp_partner_system,
1117 		    &grp->aggr.PartnerSystem) == 0) &&
1118 		    (cport->sp_partner_key == grp->aggr.PartnerOperAggrKey)) {
1119 			char mac_str[ETHERADDRL*3];
1120 			struct ether_addr *mac = &cport->sp_partner_system;
1121 
1122 			/*
1123 			 * The Partner port information is already in use
1124 			 * by ports in another aggregation so disable this
1125 			 * port.
1126 			 */
1127 
1128 			(void) snprintf(mac_str, sizeof (mac_str),
1129 			    "%x:%x:%x:%x:%x:%x",
1130 			    mac->ether_addr_octet[0], mac->ether_addr_octet[1],
1131 			    mac->ether_addr_octet[2], mac->ether_addr_octet[3],
1132 			    mac->ether_addr_octet[4], mac->ether_addr_octet[5]);
1133 
1134 			portp->lp_lacp.sm.selected = AGGR_UNSELECTED;
1135 
1136 			cmn_err(CE_NOTE, "aggr %d port %d: Port Partner "
1137 			    "MAC %s and key %d in use on aggregation %d "
1138 			    "port %d\n", grp->lg_linkid, portp->lp_linkid,
1139 			    mac_str, portp->lp_lacp.PartnerOperKey,
1140 			    cport->sp_grp_linkid, cport->sp_linkid);
1141 			break;
1142 		}
1143 	}
1144 
1145 	mutex_exit(&lacp_sel_lock);
1146 	return (cport != NULL);
1147 }
1148 
1149 /*
1150  * Remove the specified port from the list of selected ports.
1151  */
1152 static void
lacp_sel_ports_del(aggr_port_t * portp)1153 lacp_sel_ports_del(aggr_port_t *portp)
1154 {
1155 	lacp_sel_ports_t *cport, **prev = NULL;
1156 
1157 	mutex_enter(&lacp_sel_lock);
1158 
1159 	prev = &sel_ports;
1160 	for (cport = sel_ports; cport != NULL; prev = &cport->sp_next,
1161 	    cport = cport->sp_next) {
1162 		if (portp->lp_linkid == cport->sp_linkid)
1163 			break;
1164 	}
1165 
1166 	if (cport == NULL) {
1167 		mutex_exit(&lacp_sel_lock);
1168 		return;
1169 	}
1170 
1171 	*prev = cport->sp_next;
1172 	kmem_free(cport, sizeof (*cport));
1173 
1174 	mutex_exit(&lacp_sel_lock);
1175 }
1176 
1177 /*
1178  * Add the specified port to the list of selected ports. Returns B_FALSE
1179  * if the operation could not be performed due to an memory allocation
1180  * error.
1181  */
1182 static boolean_t
lacp_sel_ports_add(aggr_port_t * portp)1183 lacp_sel_ports_add(aggr_port_t *portp)
1184 {
1185 	lacp_sel_ports_t *new_port;
1186 	lacp_sel_ports_t *cport, **last;
1187 
1188 	mutex_enter(&lacp_sel_lock);
1189 
1190 	/* check if port is already in the list */
1191 	last = &sel_ports;
1192 	for (cport = sel_ports; cport != NULL;
1193 	    last = &cport->sp_next, cport = cport->sp_next) {
1194 		if (portp->lp_linkid == cport->sp_linkid) {
1195 			ASSERT(cport->sp_partner_key ==
1196 			    portp->lp_lacp.PartnerOperKey);
1197 			ASSERT(ether_cmp(&cport->sp_partner_system,
1198 			    &portp->lp_lacp.PartnerOperSystem) == 0);
1199 
1200 			mutex_exit(&lacp_sel_lock);
1201 			return (B_TRUE);
1202 		}
1203 	}
1204 
1205 	/* create and initialize new entry */
1206 	new_port = kmem_zalloc(sizeof (lacp_sel_ports_t), KM_NOSLEEP);
1207 	if (new_port == NULL) {
1208 		mutex_exit(&lacp_sel_lock);
1209 		return (B_FALSE);
1210 	}
1211 
1212 	new_port->sp_grp_linkid = portp->lp_grp->lg_linkid;
1213 	bcopy(&portp->lp_lacp.PartnerOperSystem,
1214 	    &new_port->sp_partner_system, sizeof (new_port->sp_partner_system));
1215 	new_port->sp_partner_key = portp->lp_lacp.PartnerOperKey;
1216 	new_port->sp_linkid = portp->lp_linkid;
1217 
1218 	*last = new_port;
1219 
1220 	mutex_exit(&lacp_sel_lock);
1221 	return (B_TRUE);
1222 }
1223 
1224 /*
1225  * lacp_selection_logic - LACP selection logic
1226  *		Sets the selected variable on a per port basis
1227  *		and sets Ready when all waiting ports are ready
1228  *		to go online.
1229  *
1230  * parameters:
1231  *      - portp - instance this applies to.
1232  *
1233  * invoked:
1234  *    - when initialization is needed
1235  *    - when UNSELECTED is set from the lacp_receive_sm() in LACP_CURRENT state
1236  *    - When the lacp_receive_sm goes to the LACP_DEFAULTED state
1237  *    - every time the wait_while_timer pops
1238  *    - everytime we turn LACP on/off
1239  */
1240 static void
lacp_selection_logic(aggr_port_t * portp)1241 lacp_selection_logic(aggr_port_t *portp)
1242 {
1243 	aggr_port_t *tpp;
1244 	aggr_grp_t *aggrp = portp->lp_grp;
1245 	int ports_waiting;
1246 	boolean_t reset_mac = B_FALSE;
1247 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1248 
1249 	ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
1250 
1251 	/* LACP_OFF state not in specification so check here.  */
1252 	if (!pl->sm.lacp_on) {
1253 		lacp_port_unselect(portp);
1254 		aggrp->aggr.ready = B_FALSE;
1255 		lacp_mux_sm(portp);
1256 		return;
1257 	}
1258 
1259 	if (pl->sm.begin || !pl->sm.lacp_enabled ||
1260 	    (portp->lp_state != AGGR_PORT_STATE_ATTACHED)) {
1261 
1262 		AGGR_LACP_DBG(("lacp_selection_logic:(%d): "
1263 		    "selected %d-->%d (begin=%d, lacp_enabled = %d, "
1264 		    "lp_state=%d)\n", portp->lp_linkid, pl->sm.selected,
1265 		    AGGR_UNSELECTED, pl->sm.begin, pl->sm.lacp_enabled,
1266 		    portp->lp_state));
1267 
1268 		lacp_port_unselect(portp);
1269 		aggrp->aggr.ready = B_FALSE;
1270 		lacp_mux_sm(portp);
1271 		return;
1272 	}
1273 
1274 	/*
1275 	 * If LACP is not enabled then selected is never set.
1276 	 */
1277 	if (!pl->sm.lacp_enabled) {
1278 		AGGR_LACP_DBG(("lacp_selection_logic:(%d): selected %d-->%d\n",
1279 		    portp->lp_linkid, pl->sm.selected, AGGR_UNSELECTED));
1280 
1281 		lacp_port_unselect(portp);
1282 		lacp_mux_sm(portp);
1283 		return;
1284 	}
1285 
1286 	/*
1287 	 * Check if the Partner MAC or Key are zero. If so, we have
1288 	 * not received any LACP info or it has expired and the
1289 	 * receive machine is in the LACP_DEFAULTED state.
1290 	 */
1291 	if (ether_cmp(&pl->PartnerOperSystem, &etherzeroaddr) == 0 ||
1292 	    (pl->PartnerOperKey == 0)) {
1293 
1294 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1295 			if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1296 			    &etherzeroaddr) != 0 &&
1297 			    (tpp->lp_lacp.PartnerOperKey != 0))
1298 				break;
1299 		}
1300 
1301 		/*
1302 		 * If all ports have no key or aggregation address,
1303 		 * then clear the negotiated Partner MAC and key.
1304 		 */
1305 		if (tpp == NULL) {
1306 			/* Clear the aggregation Partner MAC and key */
1307 			aggrp->aggr.PartnerSystem = etherzeroaddr;
1308 			aggrp->aggr.PartnerOperAggrKey = 0;
1309 		}
1310 
1311 		return;
1312 	}
1313 
1314 	/*
1315 	 * Insure that at least one port in the aggregation
1316 	 * matches the Partner aggregation MAC and key. If not,
1317 	 * then clear the aggregation MAC and key. Later we will
1318 	 * set the Partner aggregation MAC and key to that of the
1319 	 * current port's Partner MAC and key.
1320 	 */
1321 	if (ether_cmp(&pl->PartnerOperSystem,
1322 	    &aggrp->aggr.PartnerSystem) != 0 ||
1323 	    (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) {
1324 
1325 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1326 			if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1327 			    &aggrp->aggr.PartnerSystem) == 0 &&
1328 			    (tpp->lp_lacp.PartnerOperKey ==
1329 			    aggrp->aggr.PartnerOperAggrKey)) {
1330 				/* Set aggregation Partner MAC and key */
1331 				aggrp->aggr.PartnerSystem =
1332 				    pl->PartnerOperSystem;
1333 				aggrp->aggr.PartnerOperAggrKey =
1334 				    pl->PartnerOperKey;
1335 				break;
1336 			}
1337 		}
1338 
1339 		if (tpp == NULL) {
1340 			/* Clear the aggregation Partner MAC and key */
1341 			aggrp->aggr.PartnerSystem = etherzeroaddr;
1342 			aggrp->aggr.PartnerOperAggrKey = 0;
1343 			reset_mac = B_TRUE;
1344 		}
1345 	}
1346 
1347 	/*
1348 	 * If our Actor MAC is found in the Partner MAC
1349 	 * on this port then we have a loopback misconfiguration.
1350 	 */
1351 	if (ether_cmp(&pl->PartnerOperSystem,
1352 	    (struct ether_addr *)&aggrp->lg_addr) == 0) {
1353 		cmn_err(CE_NOTE, "trunk link: (%d): Loopback condition.\n",
1354 		    portp->lp_linkid);
1355 
1356 		lacp_port_unselect(portp);
1357 		lacp_mux_sm(portp);
1358 		return;
1359 	}
1360 
1361 	/*
1362 	 * If our Partner MAC and Key are found on any other
1363 	 * ports that are not in our aggregation, we have
1364 	 * a misconfiguration.
1365 	 */
1366 	if (lacp_misconfig_check(portp)) {
1367 		lacp_mux_sm(portp);
1368 		return;
1369 	}
1370 
1371 	/*
1372 	 * If the Aggregation Partner MAC and Key have not been
1373 	 * set, then this is either the first port or the aggregation
1374 	 * MAC and key have been reset. In either case we must set
1375 	 * the values of the Partner MAC and key.
1376 	 */
1377 	if (ether_cmp(&aggrp->aggr.PartnerSystem, &etherzeroaddr) == 0 &&
1378 	    (aggrp->aggr.PartnerOperAggrKey == 0)) {
1379 		/* Set aggregation Partner MAC and key */
1380 		aggrp->aggr.PartnerSystem = pl->PartnerOperSystem;
1381 		aggrp->aggr.PartnerOperAggrKey = pl->PartnerOperKey;
1382 
1383 		/*
1384 		 * If we reset Partner aggregation MAC, then restart
1385 		 * selection_logic on ports that match new MAC address.
1386 		 */
1387 		if (reset_mac) {
1388 			for (tpp = aggrp->lg_ports; tpp; tpp =
1389 			    tpp->lp_next) {
1390 				if (tpp == portp)
1391 					continue;
1392 				if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1393 				    &aggrp->aggr.PartnerSystem) == 0 &&
1394 				    (tpp->lp_lacp.PartnerOperKey ==
1395 				    aggrp->aggr.PartnerOperAggrKey))
1396 					lacp_selection_logic(tpp);
1397 			}
1398 		}
1399 	} else if (ether_cmp(&pl->PartnerOperSystem,
1400 	    &aggrp->aggr.PartnerSystem) != 0 ||
1401 	    (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) {
1402 		/*
1403 		 * The Partner port information does not match
1404 		 * that of the other ports in the aggregation
1405 		 * so disable this port.
1406 		 */
1407 		lacp_port_unselect(portp);
1408 
1409 		cmn_err(CE_NOTE, "trunk link: (%d): Port Partner MAC "
1410 		    "or key (%d) incompatible with Aggregation Partner "
1411 		    "MAC or key (%d)\n", portp->lp_linkid, pl->PartnerOperKey,
1412 		    aggrp->aggr.PartnerOperAggrKey);
1413 
1414 		lacp_mux_sm(portp);
1415 		return;
1416 	}
1417 
1418 	/* If we get to here, automatically set selected */
1419 	if (pl->sm.selected != AGGR_SELECTED) {
1420 		AGGR_LACP_DBG(("lacp_selection_logic:(%d): "
1421 		    "selected %d-->%d\n", portp->lp_linkid,
1422 		    pl->sm.selected, AGGR_SELECTED));
1423 		if (!lacp_port_select(portp))
1424 			return;
1425 		lacp_mux_sm(portp);
1426 	}
1427 
1428 	/*
1429 	 * From this point onward we have selected the port
1430 	 * and are simply checking if the Ready flag should
1431 	 * be set.
1432 	 */
1433 
1434 	/*
1435 	 * If at least two ports are waiting to aggregate
1436 	 * and ready_n is set on all ports waiting to aggregate
1437 	 * then set READY for the aggregation.
1438 	 */
1439 
1440 	ports_waiting = 0;
1441 
1442 	if (!aggrp->aggr.ready) {
1443 		/*
1444 		 * If all ports in the aggregation have received compatible
1445 		 * partner information and they match up correctly with the
1446 		 * switch, there is no need to wait for all the
1447 		 * wait_while_timers to pop.
1448 		 */
1449 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1450 			if (((tpp->lp_lacp.sm.mux_state == LACP_WAITING) ||
1451 			    tpp->lp_lacp.sm.begin) &&
1452 			    !tpp->lp_lacp.PartnerOperPortState.bit.sync) {
1453 				/* Add up ports uninitialized or waiting */
1454 				ports_waiting++;
1455 				if (!tpp->lp_lacp.sm.ready_n) {
1456 					DTRACE_PROBE1(port___not__ready,
1457 					    aggr_port_t *, tpp);
1458 					return;
1459 				}
1460 			}
1461 		}
1462 	}
1463 
1464 	if (aggrp->aggr.ready) {
1465 		AGGR_LACP_DBG(("lacp_selection_logic:(%d): "
1466 		    "aggr.ready already set\n", portp->lp_linkid));
1467 		lacp_mux_sm(portp);
1468 	} else {
1469 		AGGR_LACP_DBG(("lacp_selection_logic:(%d): Ready %d-->%d\n",
1470 		    portp->lp_linkid, aggrp->aggr.ready, B_TRUE));
1471 		aggrp->aggr.ready = B_TRUE;
1472 
1473 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next)
1474 			lacp_mux_sm(tpp);
1475 	}
1476 
1477 }
1478 
1479 /*
1480  * wait_while_timer_pop - When the timer pops, we arrive here to
1481  *			set ready_n and trigger the selection logic.
1482  */
1483 static void
wait_while_timer_pop(void * data)1484 wait_while_timer_pop(void *data)
1485 {
1486 	aggr_port_t *portp = data;
1487 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1488 
1489 	mutex_enter(&pl->lacp_timer_lock);
1490 	pl->lacp_timer_bits |= LACP_WAIT_WHILE_TIMEOUT;
1491 	cv_broadcast(&pl->lacp_timer_cv);
1492 	mutex_exit(&pl->lacp_timer_lock);
1493 }
1494 
1495 /*
1496  * wait_while_timer_pop_handler - When the timer pops, we arrive here to
1497  *			set ready_n and trigger the selection logic.
1498  */
1499 static void
wait_while_timer_pop_handler(aggr_port_t * portp)1500 wait_while_timer_pop_handler(aggr_port_t *portp)
1501 {
1502 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1503 
1504 	AGGR_LACP_DBG(("trunk link:(%d): wait_while_timer pop \n",
1505 	    portp->lp_linkid));
1506 	portp->lp_lacp.sm.ready_n = B_TRUE;
1507 
1508 	lacp_selection_logic(portp);
1509 }
1510 
1511 static void
start_wait_while_timer(aggr_port_t * portp)1512 start_wait_while_timer(aggr_port_t *portp)
1513 {
1514 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1515 
1516 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1517 
1518 	mutex_enter(&pl->lacp_timer_lock);
1519 	if (pl->wait_while_timer.id == 0) {
1520 		pl->wait_while_timer.id =
1521 		    timeout(wait_while_timer_pop, portp,
1522 		    drv_usectohz(1000000 *
1523 		    portp->lp_lacp.wait_while_timer.val));
1524 	}
1525 	mutex_exit(&pl->lacp_timer_lock);
1526 }
1527 
1528 
1529 static void
stop_wait_while_timer(aggr_port_t * portp)1530 stop_wait_while_timer(aggr_port_t *portp)
1531 {
1532 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1533 	timeout_id_t id;
1534 
1535 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1536 
1537 	mutex_enter(&pl->lacp_timer_lock);
1538 	if ((id = pl->wait_while_timer.id) != 0) {
1539 		pl->lacp_timer_bits &= ~LACP_WAIT_WHILE_TIMEOUT;
1540 		pl->wait_while_timer.id = 0;
1541 	}
1542 	mutex_exit(&pl->lacp_timer_lock);
1543 
1544 	if (id != 0)
1545 		(void) untimeout(id);
1546 }
1547 
1548 /*
1549  * Invoked when a port has been attached to a group.
1550  * Complete the processing that couldn't be finished from lacp_on()
1551  * because the port was not started. We know that the link is full
1552  * duplex and ON, otherwise it wouldn't be attached.
1553  */
1554 void
aggr_lacp_port_attached(aggr_port_t * portp)1555 aggr_lacp_port_attached(aggr_port_t *portp)
1556 {
1557 	aggr_grp_t *grp = portp->lp_grp;
1558 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1559 
1560 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1561 	ASSERT(MAC_PERIM_HELD(portp->lp_mh));
1562 	ASSERT(portp->lp_state == AGGR_PORT_STATE_ATTACHED);
1563 
1564 	AGGR_LACP_DBG(("aggr_lacp_port_attached: port %d\n",
1565 	    portp->lp_linkid));
1566 
1567 	portp->lp_lacp.sm.port_enabled = B_TRUE;	/* link on */
1568 
1569 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
1570 		return;
1571 
1572 	pl->sm.lacp_enabled = B_TRUE;
1573 	pl->ActorOperPortState.bit.aggregation = B_TRUE;
1574 	pl->sm.begin = B_TRUE;
1575 
1576 	lacp_receive_sm(portp, NULL);
1577 	lacp_mux_sm(portp);
1578 
1579 	/* Enable Multicast Slow Protocol address */
1580 	aggr_lacp_mcast_on(portp);
1581 
1582 	/* periodic_sm is started up from the receive machine */
1583 	lacp_selection_logic(portp);
1584 }
1585 
1586 /*
1587  * Invoked when a port has been detached from a group. Turn off
1588  * LACP processing if it was enabled.
1589  */
1590 void
aggr_lacp_port_detached(aggr_port_t * portp)1591 aggr_lacp_port_detached(aggr_port_t *portp)
1592 {
1593 	aggr_grp_t *grp = portp->lp_grp;
1594 
1595 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1596 	ASSERT(MAC_PERIM_HELD(portp->lp_mh));
1597 
1598 	AGGR_LACP_DBG(("aggr_lacp_port_detached: port %d\n",
1599 	    portp->lp_linkid));
1600 
1601 	portp->lp_lacp.sm.port_enabled = B_FALSE;
1602 
1603 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
1604 		return;
1605 
1606 	portp->lp_lacp.sm.lacp_enabled = B_FALSE;
1607 	lacp_selection_logic(portp);
1608 	lacp_mux_sm(portp);
1609 	lacp_periodic_sm(portp);
1610 
1611 	/*
1612 	 * Disable Slow Protocol Timers.
1613 	 */
1614 	stop_periodic_timer(portp);
1615 	stop_current_while_timer(portp);
1616 	stop_wait_while_timer(portp);
1617 
1618 	/* Disable Multicast Slow Protocol address */
1619 	aggr_lacp_mcast_off(portp);
1620 	aggr_set_coll_dist(portp, B_FALSE);
1621 }
1622 
1623 /*
1624  * Enable Slow Protocol LACP and Marker PDUs.
1625  */
1626 static void
lacp_on(aggr_port_t * portp)1627 lacp_on(aggr_port_t *portp)
1628 {
1629 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1630 	mac_perim_handle_t mph;
1631 
1632 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1633 
1634 	mac_perim_enter_by_mh(portp->lp_mh, &mph);
1635 
1636 	/*
1637 	 * Reset the state machines and Partner operational
1638 	 * information. Careful to not reset things like
1639 	 * our link state.
1640 	 */
1641 	lacp_reset_port(portp);
1642 	pl->sm.lacp_on = B_TRUE;
1643 
1644 	AGGR_LACP_DBG(("lacp_on:(%d): \n", portp->lp_linkid));
1645 
1646 	if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) {
1647 		pl->sm.port_enabled = B_TRUE;
1648 		pl->sm.lacp_enabled = B_TRUE;
1649 		pl->ActorOperPortState.bit.aggregation = B_TRUE;
1650 	}
1651 
1652 	lacp_receive_sm(portp, NULL);
1653 	lacp_mux_sm(portp);
1654 
1655 	if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) {
1656 		/* Enable Multicast Slow Protocol address */
1657 		aggr_lacp_mcast_on(portp);
1658 
1659 		/* periodic_sm is started up from the receive machine */
1660 		lacp_selection_logic(portp);
1661 	}
1662 	mac_perim_exit(mph);
1663 } /* lacp_on */
1664 
1665 /* Disable Slow Protocol LACP and Marker PDUs */
1666 static void
lacp_off(aggr_port_t * portp)1667 lacp_off(aggr_port_t *portp)
1668 {
1669 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1670 	mac_perim_handle_t mph;
1671 
1672 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1673 	mac_perim_enter_by_mh(portp->lp_mh, &mph);
1674 
1675 	pl->sm.lacp_on = B_FALSE;
1676 
1677 	AGGR_LACP_DBG(("lacp_off:(%d): \n", portp->lp_linkid));
1678 
1679 	if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) {
1680 		/*
1681 		 * Disable Slow Protocol Timers.
1682 		 */
1683 		stop_periodic_timer(portp);
1684 		stop_current_while_timer(portp);
1685 		stop_wait_while_timer(portp);
1686 
1687 		/* Disable Multicast Slow Protocol address */
1688 		aggr_lacp_mcast_off(portp);
1689 
1690 		pl->sm.port_enabled = B_FALSE;
1691 		pl->sm.lacp_enabled = B_FALSE;
1692 		pl->ActorOperPortState.bit.aggregation = B_FALSE;
1693 	}
1694 
1695 	lacp_mux_sm(portp);
1696 	lacp_periodic_sm(portp);
1697 	lacp_selection_logic(portp);
1698 
1699 	/* Turn OFF Collector_Distributor */
1700 	aggr_set_coll_dist(portp, B_FALSE);
1701 
1702 	lacp_reset_port(portp);
1703 	mac_perim_exit(mph);
1704 }
1705 
1706 
1707 static boolean_t
valid_lacp_pdu(aggr_port_t * portp,lacp_t * lacp)1708 valid_lacp_pdu(aggr_port_t *portp, lacp_t *lacp)
1709 {
1710 	/*
1711 	 * 43.4.12 - "a Receive machine shall not validate
1712 	 * the Version Number, TLV_type, or Reserved fields in received
1713 	 * LACPDUs."
1714 	 * ... "a Receive machine may validate the Actor_Information_Length,
1715 	 * Partner_Information_Length, Collector_Information_Length,
1716 	 * or Terminator_Length fields."
1717 	 */
1718 	if ((lacp->actor_info.information_len != sizeof (link_info_t)) ||
1719 	    (lacp->partner_info.information_len != sizeof (link_info_t)) ||
1720 	    (lacp->collector_len != LACP_COLLECTOR_INFO_LEN) ||
1721 	    (lacp->terminator_len != LACP_TERMINATOR_INFO_LEN)) {
1722 		AGGR_LACP_DBG(("trunk link (%d): Malformed LACPDU: "
1723 		    " Terminator Length = %d \n", portp->lp_linkid,
1724 		    lacp->terminator_len));
1725 		return (B_FALSE);
1726 	}
1727 
1728 	return (B_TRUE);
1729 }
1730 
1731 
1732 static void
start_current_while_timer(aggr_port_t * portp,uint_t time)1733 start_current_while_timer(aggr_port_t *portp, uint_t time)
1734 {
1735 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1736 
1737 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1738 
1739 	mutex_enter(&pl->lacp_timer_lock);
1740 	if (pl->current_while_timer.id == 0) {
1741 		if (time > 0)
1742 			pl->current_while_timer.val = time;
1743 		else if (pl->ActorOperPortState.bit.timeout)
1744 			pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
1745 		else
1746 			pl->current_while_timer.val = LONG_TIMEOUT_TIME;
1747 
1748 		pl->current_while_timer.id =
1749 		    timeout(current_while_timer_pop, portp,
1750 		    drv_usectohz((clock_t)1000000 *
1751 		    (clock_t)portp->lp_lacp.current_while_timer.val));
1752 	}
1753 	mutex_exit(&pl->lacp_timer_lock);
1754 }
1755 
1756 
1757 static void
stop_current_while_timer(aggr_port_t * portp)1758 stop_current_while_timer(aggr_port_t *portp)
1759 {
1760 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1761 	timeout_id_t id;
1762 
1763 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1764 
1765 	mutex_enter(&pl->lacp_timer_lock);
1766 	if ((id = pl->current_while_timer.id) != 0) {
1767 		pl->lacp_timer_bits &= ~LACP_CURRENT_WHILE_TIMEOUT;
1768 		pl->current_while_timer.id = 0;
1769 	}
1770 	mutex_exit(&pl->lacp_timer_lock);
1771 
1772 	if (id != 0)
1773 		(void) untimeout(id);
1774 }
1775 
1776 static void
current_while_timer_pop(void * data)1777 current_while_timer_pop(void *data)
1778 {
1779 	aggr_port_t *portp = (aggr_port_t *)data;
1780 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1781 
1782 	mutex_enter(&pl->lacp_timer_lock);
1783 	pl->lacp_timer_bits |= LACP_CURRENT_WHILE_TIMEOUT;
1784 	cv_broadcast(&pl->lacp_timer_cv);
1785 	mutex_exit(&pl->lacp_timer_lock);
1786 }
1787 
1788 static void
current_while_timer_pop_handler(aggr_port_t * portp)1789 current_while_timer_pop_handler(aggr_port_t *portp)
1790 {
1791 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1792 
1793 	AGGR_LACP_DBG(("trunk link:(%d): current_while_timer "
1794 	    "pop id=%p\n", portp->lp_linkid,
1795 	    portp->lp_lacp.current_while_timer.id));
1796 
1797 	lacp_receive_sm(portp, NULL);
1798 }
1799 
1800 /*
1801  * record_Default - Simply copies over administrative values
1802  * to the partner operational values, and sets our state to indicate we
1803  * are using defaulted values.
1804  */
1805 static void
record_Default(aggr_port_t * portp)1806 record_Default(aggr_port_t *portp)
1807 {
1808 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1809 
1810 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1811 
1812 	pl->PartnerOperPortNum = pl->PartnerAdminPortNum;
1813 	pl->PartnerOperPortPriority = pl->PartnerAdminPortPriority;
1814 	pl->PartnerOperSystem = pl->PartnerAdminSystem;
1815 	pl->PartnerOperSysPriority = pl->PartnerAdminSysPriority;
1816 	pl->PartnerOperKey = pl->PartnerAdminKey;
1817 	pl->PartnerOperPortState.state = pl->PartnerAdminPortState.state;
1818 
1819 	pl->ActorOperPortState.bit.defaulted = B_TRUE;
1820 }
1821 
1822 
1823 /* Returns B_TRUE on sync value changing */
1824 static boolean_t
record_PDU(aggr_port_t * portp,lacp_t * lacp)1825 record_PDU(aggr_port_t *portp, lacp_t *lacp)
1826 {
1827 	aggr_grp_t *aggrp = portp->lp_grp;
1828 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1829 	uint8_t save_sync;
1830 
1831 	ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
1832 
1833 	/*
1834 	 * Partner Information
1835 	 */
1836 	pl->PartnerOperPortNum = ntohs(lacp->actor_info.port);
1837 	pl->PartnerOperPortPriority =
1838 	    ntohs(lacp->actor_info.port_priority);
1839 	pl->PartnerOperSystem = lacp->actor_info.system_id;
1840 	pl->PartnerOperSysPriority =
1841 	    htons(lacp->actor_info.system_priority);
1842 	pl->PartnerOperKey = ntohs(lacp->actor_info.key);
1843 
1844 	/* All state info except for Synchronization */
1845 	save_sync = pl->PartnerOperPortState.bit.sync;
1846 	pl->PartnerOperPortState.state = lacp->actor_info.state.state;
1847 
1848 	/* Defaulted set to FALSE */
1849 	pl->ActorOperPortState.bit.defaulted = B_FALSE;
1850 
1851 	/*
1852 	 * 43.4.9 - (Partner_Port, Partner_Port_Priority, Partner_system,
1853 	 *		Partner_System_Priority, Partner_Key, and
1854 	 *		Partner_State.Aggregation) are compared to the
1855 	 *		corresponding operations paramters values for
1856 	 *		the Actor. If these are equal, or if this is
1857 	 *		an individual link, we are synchronized.
1858 	 */
1859 	if (((ntohs(lacp->partner_info.port) == pl->ActorPortNumber) &&
1860 	    (ntohs(lacp->partner_info.port_priority) ==
1861 	    pl->ActorPortPriority) &&
1862 	    (ether_cmp(&lacp->partner_info.system_id,
1863 	    (struct ether_addr *)&aggrp->lg_addr) == 0) &&
1864 	    (ntohs(lacp->partner_info.system_priority) ==
1865 	    aggrp->aggr.ActorSystemPriority) &&
1866 	    (ntohs(lacp->partner_info.key) == pl->ActorOperPortKey) &&
1867 	    (lacp->partner_info.state.bit.aggregation ==
1868 	    pl->ActorOperPortState.bit.aggregation)) ||
1869 	    (!lacp->actor_info.state.bit.aggregation)) {
1870 
1871 		pl->PartnerOperPortState.bit.sync =
1872 		    lacp->actor_info.state.bit.sync;
1873 	} else {
1874 		pl->PartnerOperPortState.bit.sync = B_FALSE;
1875 	}
1876 
1877 	if (save_sync != pl->PartnerOperPortState.bit.sync) {
1878 		AGGR_LACP_DBG(("record_PDU:(%d): partner sync "
1879 		    "%d -->%d\n", portp->lp_linkid, save_sync,
1880 		    pl->PartnerOperPortState.bit.sync));
1881 		return (B_TRUE);
1882 	} else {
1883 		return (B_FALSE);
1884 	}
1885 }
1886 
1887 
1888 /*
1889  * update_selected - If any of the Partner parameters has
1890  *			changed from a previous value, then
1891  *			unselect the link from the aggregator.
1892  */
1893 static boolean_t
update_selected(aggr_port_t * portp,lacp_t * lacp)1894 update_selected(aggr_port_t *portp, lacp_t *lacp)
1895 {
1896 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1897 
1898 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1899 
1900 	if ((pl->PartnerOperPortNum != ntohs(lacp->actor_info.port)) ||
1901 	    (pl->PartnerOperPortPriority !=
1902 	    ntohs(lacp->actor_info.port_priority)) ||
1903 	    (ether_cmp(&pl->PartnerOperSystem,
1904 	    &lacp->actor_info.system_id) != 0) ||
1905 	    (pl->PartnerOperSysPriority !=
1906 	    ntohs(lacp->actor_info.system_priority)) ||
1907 	    (pl->PartnerOperKey != ntohs(lacp->actor_info.key)) ||
1908 	    (pl->PartnerOperPortState.bit.aggregation !=
1909 	    lacp->actor_info.state.bit.aggregation)) {
1910 		AGGR_LACP_DBG(("update_selected:(%d): "
1911 		    "selected  %d-->%d\n", portp->lp_linkid, pl->sm.selected,
1912 		    AGGR_UNSELECTED));
1913 
1914 		lacp_port_unselect(portp);
1915 		return (B_TRUE);
1916 	} else {
1917 		return (B_FALSE);
1918 	}
1919 }
1920 
1921 
1922 /*
1923  * update_default_selected - If any of the operational Partner parameters
1924  *			is different than that of the administrative values
1925  *			then unselect the link from the aggregator.
1926  */
1927 static void
update_default_selected(aggr_port_t * portp)1928 update_default_selected(aggr_port_t *portp)
1929 {
1930 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1931 
1932 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1933 
1934 	if ((pl->PartnerAdminPortNum != pl->PartnerOperPortNum) ||
1935 	    (pl->PartnerOperPortPriority != pl->PartnerAdminPortPriority) ||
1936 	    (ether_cmp(&pl->PartnerOperSystem, &pl->PartnerAdminSystem) != 0) ||
1937 	    (pl->PartnerOperSysPriority != pl->PartnerAdminSysPriority) ||
1938 	    (pl->PartnerOperKey != pl->PartnerAdminKey) ||
1939 	    (pl->PartnerOperPortState.bit.aggregation !=
1940 	    pl->PartnerAdminPortState.bit.aggregation)) {
1941 
1942 		AGGR_LACP_DBG(("update_default_selected:(%d): "
1943 		    "selected  %d-->%d\n", portp->lp_linkid,
1944 		    pl->sm.selected, AGGR_UNSELECTED));
1945 
1946 		lacp_port_unselect(portp);
1947 	}
1948 }
1949 
1950 
1951 /*
1952  * update_NTT - If any of the Partner values in the received LACPDU
1953  *			are different than that of the Actor operational
1954  *			values then set NTT to true.
1955  */
1956 static void
update_NTT(aggr_port_t * portp,lacp_t * lacp)1957 update_NTT(aggr_port_t *portp, lacp_t *lacp)
1958 {
1959 	aggr_grp_t *aggrp = portp->lp_grp;
1960 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1961 
1962 	ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
1963 
1964 	if ((pl->ActorPortNumber != ntohs(lacp->partner_info.port)) ||
1965 	    (pl->ActorPortPriority !=
1966 	    ntohs(lacp->partner_info.port_priority)) ||
1967 	    (ether_cmp(&aggrp->lg_addr,
1968 	    &lacp->partner_info.system_id) != 0) ||
1969 	    (aggrp->aggr.ActorSystemPriority !=
1970 	    ntohs(lacp->partner_info.system_priority)) ||
1971 	    (pl->ActorOperPortKey != ntohs(lacp->partner_info.key)) ||
1972 	    (pl->ActorOperPortState.bit.activity !=
1973 	    lacp->partner_info.state.bit.activity) ||
1974 	    (pl->ActorOperPortState.bit.timeout !=
1975 	    lacp->partner_info.state.bit.timeout) ||
1976 	    (pl->ActorOperPortState.bit.sync !=
1977 	    lacp->partner_info.state.bit.sync) ||
1978 	    (pl->ActorOperPortState.bit.aggregation !=
1979 	    lacp->partner_info.state.bit.aggregation)) {
1980 
1981 		AGGR_LACP_DBG(("update_NTT:(%d): NTT  %d-->%d\n",
1982 		    portp->lp_linkid, pl->NTT, B_TRUE));
1983 
1984 		pl->NTT = B_TRUE;
1985 	}
1986 }
1987 
1988 /*
1989  * lacp_receive_sm - LACP receive state machine
1990  *
1991  * parameters:
1992  *      - portp - instance this applies to.
1993  *      - lacp - pointer in the case of a received LACPDU.
1994  *                This value is NULL if there is no LACPDU.
1995  *
1996  * invoked:
1997  *    - when initialization is needed
1998  *    - upon reception of an LACPDU. This is the common case.
1999  *    - every time the current_while_timer pops
2000  */
2001 static void
lacp_receive_sm(aggr_port_t * portp,lacp_t * lacp)2002 lacp_receive_sm(aggr_port_t *portp, lacp_t *lacp)
2003 {
2004 	boolean_t sync_updated, selected_updated, save_activity;
2005 	aggr_lacp_port_t *pl = &portp->lp_lacp;
2006 	lacp_receive_state_t oldstate = pl->sm.receive_state;
2007 
2008 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
2009 
2010 	/* LACP_OFF state not in specification so check here.  */
2011 	if (!pl->sm.lacp_on)
2012 		return;
2013 
2014 	/* figure next state */
2015 	if (pl->sm.begin || pl->sm.port_moved) {
2016 		pl->sm.receive_state = LACP_INITIALIZE;
2017 	} else if (!pl->sm.port_enabled) {	/* DL_NOTE_LINK_DOWN */
2018 		pl->sm.receive_state = LACP_PORT_DISABLED;
2019 	} else if (!pl->sm.lacp_enabled) { /* DL_NOTE_AGGR_UNAVAIL */
2020 		pl->sm.receive_state =
2021 		    (pl->sm.receive_state == LACP_PORT_DISABLED) ?
2022 		    LACP_DISABLED : LACP_PORT_DISABLED;
2023 	} else if (lacp != NULL) {
2024 		if ((pl->sm.receive_state == LACP_EXPIRED) ||
2025 		    (pl->sm.receive_state == LACP_DEFAULTED)) {
2026 			pl->sm.receive_state = LACP_CURRENT;
2027 		}
2028 	} else if ((pl->sm.receive_state == LACP_CURRENT) &&
2029 	    (pl->current_while_timer.id == 0)) {
2030 		pl->sm.receive_state = LACP_EXPIRED;
2031 	} else if ((pl->sm.receive_state == LACP_EXPIRED) &&
2032 	    (pl->current_while_timer.id == 0)) {
2033 		pl->sm.receive_state = LACP_DEFAULTED;
2034 	}
2035 
2036 	if (!((lacp && (oldstate == LACP_CURRENT) &&
2037 	    (pl->sm.receive_state == LACP_CURRENT)))) {
2038 		AGGR_LACP_DBG(("lacp_receive_sm(%d):%s--->%s\n",
2039 		    portp->lp_linkid, lacp_receive_str[oldstate],
2040 		    lacp_receive_str[pl->sm.receive_state]));
2041 	}
2042 
2043 	switch (pl->sm.receive_state) {
2044 	case LACP_INITIALIZE:
2045 		lacp_port_unselect(portp);
2046 		record_Default(portp);
2047 		pl->ActorOperPortState.bit.expired = B_FALSE;
2048 		pl->sm.port_moved = B_FALSE;
2049 		pl->sm.receive_state = LACP_PORT_DISABLED;
2050 		pl->sm.begin = B_FALSE;
2051 		lacp_receive_sm(portp, NULL);
2052 		break;
2053 
2054 	case LACP_PORT_DISABLED:
2055 		pl->PartnerOperPortState.bit.sync = B_FALSE;
2056 		/*
2057 		 * Stop current_while_timer in case
2058 		 * we got here from link down
2059 		 */
2060 		stop_current_while_timer(portp);
2061 
2062 		if (pl->sm.port_enabled && !pl->sm.lacp_enabled) {
2063 			pl->sm.receive_state = LACP_DISABLED;
2064 			lacp_receive_sm(portp, lacp);
2065 			/* We goto LACP_DISABLED state */
2066 			break;
2067 		} else if (pl->sm.port_enabled && pl->sm.lacp_enabled) {
2068 			pl->sm.receive_state = LACP_EXPIRED;
2069 			/*
2070 			 * FALL THROUGH TO LACP_EXPIRED CASE:
2071 			 * We have no way of knowing if we get into
2072 			 * lacp_receive_sm() from a  current_while_timer
2073 			 * expiring as it has never been kicked off yet!
2074 			 */
2075 		} else {
2076 			/* We stay in LACP_PORT_DISABLED state */
2077 			break;
2078 		}
2079 		/* LACP_PORT_DISABLED -> LACP_EXPIRED */
2080 		/* FALLTHROUGH */
2081 
2082 	case LACP_EXPIRED:
2083 		/*
2084 		 * Arrives here from LACP_PORT_DISABLED state as well as
2085 		 * as well as current_while_timer expiring.
2086 		 */
2087 		pl->PartnerOperPortState.bit.sync = B_FALSE;
2088 		pl->PartnerOperPortState.bit.timeout = B_TRUE;
2089 
2090 		pl->ActorOperPortState.bit.expired = B_TRUE;
2091 		start_current_while_timer(portp, SHORT_TIMEOUT_TIME);
2092 		lacp_periodic_sm(portp);
2093 		break;
2094 
2095 	case LACP_DISABLED:
2096 		/*
2097 		 * This is the normal state for recv_sm when LACP_OFF
2098 		 * is set or the NIC is in half duplex mode.
2099 		 */
2100 		lacp_port_unselect(portp);
2101 		record_Default(portp);
2102 		pl->PartnerOperPortState.bit.aggregation = B_FALSE;
2103 		pl->ActorOperPortState.bit.expired = B_FALSE;
2104 		break;
2105 
2106 	case LACP_DEFAULTED:
2107 		/*
2108 		 * Current_while_timer expired a second time.
2109 		 */
2110 		update_default_selected(portp);
2111 		record_Default(portp);	/* overwrite Partner Oper val */
2112 		pl->ActorOperPortState.bit.expired = B_FALSE;
2113 		pl->PartnerOperPortState.bit.sync = B_TRUE;
2114 
2115 		lacp_selection_logic(portp);
2116 		lacp_mux_sm(portp);
2117 		break;
2118 
2119 	case LACP_CURRENT:
2120 		/*
2121 		 * Reception of LACPDU
2122 		 */
2123 
2124 		if (!lacp) /* no LACPDU so current_while_timer popped */
2125 			break;
2126 
2127 		AGGR_LACP_DBG(("lacp_receive_sm: (%d): LACPDU received:\n",
2128 		    portp->lp_linkid));
2129 
2130 		/*
2131 		 * Validate Actor_Information_Length,
2132 		 * Partner_Information_Length, Collector_Information_Length,
2133 		 * and Terminator_Length fields.
2134 		 */
2135 		if (!valid_lacp_pdu(portp, lacp)) {
2136 			AGGR_LACP_DBG(("lacp_receive_sm (%d): "
2137 			    "Invalid LACPDU received\n",
2138 			    portp->lp_linkid));
2139 			break;
2140 		}
2141 
2142 		save_activity = pl->PartnerOperPortState.bit.activity;
2143 		selected_updated = update_selected(portp, lacp);
2144 		update_NTT(portp, lacp);
2145 		sync_updated = record_PDU(portp, lacp);
2146 
2147 		pl->ActorOperPortState.bit.expired = B_FALSE;
2148 
2149 		if (selected_updated) {
2150 			lacp_selection_logic(portp);
2151 			lacp_mux_sm(portp);
2152 		} else if (sync_updated) {
2153 			lacp_mux_sm(portp);
2154 		}
2155 
2156 		/*
2157 		 * If the periodic timer value bit has been modified
2158 		 * or the partner activity bit has been changed then
2159 		 * we need to respectively:
2160 		 *  - restart the timer with the proper timeout value.
2161 		 *  - possibly enable/disable transmission of LACPDUs.
2162 		 */
2163 		if ((pl->PartnerOperPortState.bit.timeout &&
2164 		    (pl->periodic_timer.val != FAST_PERIODIC_TIME)) ||
2165 		    (!pl->PartnerOperPortState.bit.timeout &&
2166 		    (pl->periodic_timer.val != SLOW_PERIODIC_TIME)) ||
2167 		    (pl->PartnerOperPortState.bit.activity !=
2168 		    save_activity)) {
2169 			lacp_periodic_sm(portp);
2170 		}
2171 
2172 		stop_current_while_timer(portp);
2173 		/* Check if we need to transmit an LACPDU */
2174 		if (pl->NTT)
2175 			lacp_xmit_sm(portp);
2176 		start_current_while_timer(portp, 0);
2177 
2178 		break;
2179 	}
2180 }
2181 
2182 static void
aggr_set_coll_dist(aggr_port_t * portp,boolean_t enable)2183 aggr_set_coll_dist(aggr_port_t *portp, boolean_t enable)
2184 {
2185 	mac_perim_handle_t mph;
2186 
2187 	AGGR_LACP_DBG(("AGGR_SET_COLL_DIST_TYPE: (%d) %s\n",
2188 	    portp->lp_linkid, enable ? "ENABLED" : "DISABLED"));
2189 
2190 	mac_perim_enter_by_mh(portp->lp_mh, &mph);
2191 	if (!enable) {
2192 		/*
2193 		 * Turn OFF Collector_Distributor.
2194 		 */
2195 		portp->lp_collector_enabled = B_FALSE;
2196 		aggr_send_port_disable(portp);
2197 		goto done;
2198 	}
2199 
2200 	/*
2201 	 * Turn ON Collector_Distributor.
2202 	 */
2203 
2204 	if (!portp->lp_lacp.sm.lacp_on || (portp->lp_lacp.sm.lacp_on &&
2205 	    (portp->lp_lacp.sm.mux_state == LACP_COLLECTING_DISTRIBUTING))) {
2206 		/* Port is compatible and can be aggregated */
2207 		portp->lp_collector_enabled = B_TRUE;
2208 		aggr_send_port_enable(portp);
2209 	}
2210 
2211 done:
2212 	mac_perim_exit(mph);
2213 }
2214 
2215 /*
2216  * Because the LACP packet processing needs to enter the aggr's mac perimeter
2217  * and that would potentially cause a deadlock with the thread in which the
2218  * grp/port is deleted, we defer the packet process to a worker thread. Here
2219  * we only enqueue the received Marker or LACPDU for later processing.
2220  */
2221 void
aggr_lacp_rx_enqueue(aggr_port_t * portp,mblk_t * dmp)2222 aggr_lacp_rx_enqueue(aggr_port_t *portp, mblk_t *dmp)
2223 {
2224 	aggr_grp_t *grp = portp->lp_grp;
2225 	lacp_t	*lacp;
2226 
2227 	dmp->b_rptr += sizeof (struct ether_header);
2228 
2229 	if (MBLKL(dmp) < sizeof (lacp_t)) {
2230 		freemsg(dmp);
2231 		return;
2232 	}
2233 
2234 	lacp = (lacp_t *)dmp->b_rptr;
2235 	if (lacp->subtype != LACP_SUBTYPE && lacp->subtype != MARKER_SUBTYPE) {
2236 		AGGR_LACP_DBG(("aggr_lacp_rx_enqueue: (%d): "
2237 		    "Unknown Slow Protocol type %d\n",
2238 		    portp->lp_linkid, lacp->subtype));
2239 		freemsg(dmp);
2240 		return;
2241 	}
2242 
2243 	mutex_enter(&grp->lg_lacp_lock);
2244 
2245 	/*
2246 	 * If the lg_lacp_done is set, this aggregation is in the process of
2247 	 * being deleted, return directly.
2248 	 */
2249 	if (grp->lg_lacp_done) {
2250 		mutex_exit(&grp->lg_lacp_lock);
2251 		freemsg(dmp);
2252 		return;
2253 	}
2254 
2255 	if (grp->lg_lacp_tail == NULL) {
2256 		grp->lg_lacp_head = grp->lg_lacp_tail = dmp;
2257 	} else {
2258 		grp->lg_lacp_tail->b_next = dmp;
2259 		grp->lg_lacp_tail = dmp;
2260 	}
2261 
2262 	/*
2263 	 * Hold a reference of the port so that the port won't be freed when it
2264 	 * is removed from the aggr. The b_prev field is borrowed to save the
2265 	 * port information.
2266 	 */
2267 	AGGR_PORT_REFHOLD(portp);
2268 	dmp->b_prev = (mblk_t *)portp;
2269 	cv_broadcast(&grp->lg_lacp_cv);
2270 	mutex_exit(&grp->lg_lacp_lock);
2271 }
2272 
2273 static void
aggr_lacp_rx(mblk_t * dmp)2274 aggr_lacp_rx(mblk_t *dmp)
2275 {
2276 	aggr_port_t *portp = (aggr_port_t *)dmp->b_prev;
2277 	mac_perim_handle_t mph;
2278 	lacp_t	*lacp;
2279 
2280 	dmp->b_prev = NULL;
2281 
2282 	mac_perim_enter_by_mh(portp->lp_grp->lg_mh, &mph);
2283 	if (portp->lp_closing)
2284 		goto done;
2285 
2286 	lacp = (lacp_t *)dmp->b_rptr;
2287 	switch (lacp->subtype) {
2288 	case LACP_SUBTYPE:
2289 		AGGR_LACP_DBG(("aggr_lacp_rx:(%d): LACPDU received.\n",
2290 		    portp->lp_linkid));
2291 
2292 		if (!portp->lp_lacp.sm.lacp_on) {
2293 			break;
2294 		}
2295 		lacp_receive_sm(portp, lacp);
2296 		break;
2297 
2298 	case MARKER_SUBTYPE:
2299 		AGGR_LACP_DBG(("aggr_lacp_rx:(%d): Marker Packet received.\n",
2300 		    portp->lp_linkid));
2301 
2302 		if (receive_marker_pdu(portp, dmp) != 0)
2303 			break;
2304 
2305 		/* Send the packet over the first TX ring */
2306 		dmp = mac_hwring_send_priv(portp->lp_mch,
2307 		    portp->lp_tx_rings[0], dmp);
2308 		if (dmp != NULL)
2309 			freemsg(dmp);
2310 		mac_perim_exit(mph);
2311 		AGGR_PORT_REFRELE(portp);
2312 		return;
2313 	}
2314 
2315 done:
2316 	mac_perim_exit(mph);
2317 	AGGR_PORT_REFRELE(portp);
2318 	freemsg(dmp);
2319 }
2320 
2321 void
aggr_lacp_rx_thread(void * arg)2322 aggr_lacp_rx_thread(void *arg)
2323 {
2324 	callb_cpr_t	cprinfo;
2325 	aggr_grp_t	*grp = (aggr_grp_t *)arg;
2326 	aggr_port_t	*port;
2327 	mblk_t		*mp, *nextmp;
2328 
2329 	CALLB_CPR_INIT(&cprinfo, &grp->lg_lacp_lock, callb_generic_cpr,
2330 	    "aggr_lacp_rx_thread");
2331 
2332 	mutex_enter(&grp->lg_lacp_lock);
2333 
2334 	/*
2335 	 * Quit the thread if the grp is deleted.
2336 	 */
2337 	while (!grp->lg_lacp_done) {
2338 		if ((mp = grp->lg_lacp_head) == NULL) {
2339 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
2340 			cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock);
2341 			CALLB_CPR_SAFE_END(&cprinfo, &grp->lg_lacp_lock);
2342 			continue;
2343 		}
2344 
2345 		grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
2346 		mutex_exit(&grp->lg_lacp_lock);
2347 
2348 		while (mp != NULL) {
2349 			nextmp = mp->b_next;
2350 			mp->b_next = NULL;
2351 			aggr_lacp_rx(mp);
2352 			mp = nextmp;
2353 		}
2354 		mutex_enter(&grp->lg_lacp_lock);
2355 	}
2356 
2357 	/*
2358 	 * The grp is being destroyed, simply free all of the LACP messages
2359 	 * left in the queue which did not have the chance to be processed.
2360 	 * We cannot use freemsgchain() here since we need to clear the
2361 	 * b_prev field.
2362 	 */
2363 	for (mp = grp->lg_lacp_head; mp != NULL; mp = nextmp) {
2364 		port = (aggr_port_t *)mp->b_prev;
2365 		AGGR_PORT_REFRELE(port);
2366 		nextmp = mp->b_next;
2367 		mp->b_next = NULL;
2368 		mp->b_prev = NULL;
2369 		freemsg(mp);
2370 	}
2371 
2372 	grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
2373 	grp->lg_lacp_rx_thread = NULL;
2374 	cv_broadcast(&grp->lg_lacp_cv);
2375 	CALLB_CPR_EXIT(&cprinfo);
2376 	thread_exit();
2377 }
2378