xref: /titanic_44/usr/src/uts/common/io/aggr/aggr_lacp.c (revision 2ac91f16ff88f3d466d00d9096a7dfc263539f65)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*
26  * IEEE 802.3ad Link Aggregation - LACP & Marker Protocol processing.
27  */
28 
29 #include <sys/types.h>
30 #include <sys/sysmacros.h>
31 #include <sys/callb.h>
32 #include <sys/conf.h>
33 #include <sys/cmn_err.h>
34 #include <sys/disp.h>
35 #include <sys/list.h>
36 #include <sys/ksynch.h>
37 #include <sys/kmem.h>
38 #include <sys/stream.h>
39 #include <sys/modctl.h>
40 #include <sys/ddi.h>
41 #include <sys/sunddi.h>
42 #include <sys/atomic.h>
43 #include <sys/stat.h>
44 #include <sys/byteorder.h>
45 #include <sys/strsun.h>
46 #include <sys/isa_defs.h>
47 #include <sys/sdt.h>
48 
49 #include <sys/aggr.h>
50 #include <sys/aggr_impl.h>
51 
52 static struct ether_addr	etherzeroaddr = {
53 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00
54 };
55 
56 /*
57  * Slow_Protocol_Multicast address, as per IEEE 802.3ad spec.
58  */
59 static struct ether_addr   slow_multicast_addr = {
60 	0x01, 0x80, 0xc2, 0x00, 0x00, 0x02
61 };
62 
63 #ifdef DEBUG
64 /* LACP state machine debugging support */
65 static uint32_t aggr_lacp_debug = 0;
66 #define	AGGR_LACP_DBG(x)	if (aggr_lacp_debug) { (void) printf x; }
67 #else
68 #define	AGGR_LACP_DBG(x)	{}
69 #endif /* DEBUG */
70 
71 #define	NSECS_PER_SEC   1000000000ll
72 
73 /* used by lacp_misconfig_walker() */
74 typedef struct lacp_misconfig_check_state_s {
75 	aggr_port_t *cs_portp;
76 	boolean_t cs_found;
77 } lacp_misconfig_check_state_t;
78 
79 static const char *lacp_receive_str[] = LACP_RECEIVE_STATE_STRINGS;
80 static const char *lacp_periodic_str[] = LACP_PERIODIC_STRINGS;
81 static const char *lacp_mux_str[] = LACP_MUX_STRINGS;
82 
83 static uint16_t lacp_port_priority = 0x1000;
84 static uint16_t lacp_system_priority = 0x1000;
85 
86 /*
87  * Maintains a list of all ports in ATTACHED state. This information
88  * is used to detect misconfiguration.
89  */
90 typedef struct lacp_sel_ports {
91 	datalink_id_t sp_grp_linkid;
92 	datalink_id_t sp_linkid;
93 	/* Note: sp_partner_system must be 2-byte aligned */
94 	struct ether_addr sp_partner_system;
95 	uint32_t sp_partner_key;
96 	struct lacp_sel_ports *sp_next;
97 } lacp_sel_ports_t;
98 
99 static lacp_sel_ports_t *sel_ports = NULL;
100 static kmutex_t lacp_sel_lock;
101 
102 static void periodic_timer_pop(void *);
103 static void periodic_timer_pop_handler(aggr_port_t *);
104 static void lacp_xmit_sm(aggr_port_t *);
105 static void lacp_periodic_sm(aggr_port_t *);
106 static void fill_lacp_pdu(aggr_port_t *, lacp_t *);
107 static void fill_lacp_ether(aggr_port_t *, struct ether_header *);
108 static void lacp_on(aggr_port_t *);
109 static void lacp_off(aggr_port_t *);
110 static boolean_t valid_lacp_pdu(aggr_port_t *, lacp_t *);
111 static void lacp_receive_sm(aggr_port_t *, lacp_t *);
112 static void aggr_set_coll_dist(aggr_port_t *, boolean_t);
113 static void start_wait_while_timer(aggr_port_t *);
114 static void stop_wait_while_timer(aggr_port_t *);
115 static void lacp_reset_port(aggr_port_t *);
116 static void stop_current_while_timer(aggr_port_t *);
117 static void current_while_timer_pop(void *);
118 static void current_while_timer_pop_handler(aggr_port_t *);
119 static void update_default_selected(aggr_port_t *);
120 static boolean_t update_selected(aggr_port_t *, lacp_t *);
121 static boolean_t lacp_sel_ports_add(aggr_port_t *);
122 static void lacp_sel_ports_del(aggr_port_t *);
123 static void wait_while_timer_pop(void *);
124 static void wait_while_timer_pop_handler(aggr_port_t *);
125 
126 void
aggr_lacp_init(void)127 aggr_lacp_init(void)
128 {
129 	mutex_init(&lacp_sel_lock, NULL, MUTEX_DEFAULT, NULL);
130 }
131 
132 void
aggr_lacp_fini(void)133 aggr_lacp_fini(void)
134 {
135 	mutex_destroy(&lacp_sel_lock);
136 }
137 
138 /*
139  * The following functions are used for handling LACP timers.
140  *
141  * Note that we cannot fully rely on the aggr's mac perimeter in the timeout
142  * handler routine, otherwise it may cause deadlock with the untimeout() call
143  * which is usually called with the mac perimeter held. Instead, a
144  * lacp_timer_lock mutex is introduced, which protects a bitwise flag
145  * (lacp_timer_bits). This flag is set/cleared by timeout()/stop_timer()
146  * routines and is checked by a dedicated thread, that executes the real
147  * timeout operation.
148  */
149 static void
aggr_port_timer_thread(void * arg)150 aggr_port_timer_thread(void *arg)
151 {
152 	aggr_port_t		*port = arg;
153 	aggr_lacp_port_t	*pl = &port->lp_lacp;
154 	aggr_grp_t		*grp = port->lp_grp;
155 	uint32_t		lacp_timer_bits;
156 	mac_perim_handle_t	mph;
157 	callb_cpr_t		cprinfo;
158 
159 	CALLB_CPR_INIT(&cprinfo, &pl->lacp_timer_lock, callb_generic_cpr,
160 	    "aggr_port_timer_thread");
161 
162 	mutex_enter(&pl->lacp_timer_lock);
163 
164 	for (;;) {
165 
166 		if ((lacp_timer_bits = pl->lacp_timer_bits) == 0) {
167 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
168 			cv_wait(&pl->lacp_timer_cv, &pl->lacp_timer_lock);
169 			CALLB_CPR_SAFE_END(&cprinfo, &pl->lacp_timer_lock);
170 			continue;
171 		}
172 		pl->lacp_timer_bits = 0;
173 
174 		if (lacp_timer_bits & LACP_THREAD_EXIT)
175 			break;
176 
177 		if (lacp_timer_bits & LACP_PERIODIC_TIMEOUT)
178 			pl->periodic_timer.id = 0;
179 		if (lacp_timer_bits & LACP_WAIT_WHILE_TIMEOUT)
180 			pl->wait_while_timer.id = 0;
181 		if (lacp_timer_bits & LACP_CURRENT_WHILE_TIMEOUT)
182 			pl->current_while_timer.id = 0;
183 
184 		mutex_exit(&pl->lacp_timer_lock);
185 
186 		mac_perim_enter_by_mh(grp->lg_mh, &mph);
187 		if (port->lp_closing) {
188 			mac_perim_exit(mph);
189 			mutex_enter(&pl->lacp_timer_lock);
190 			break;
191 		}
192 
193 		if (lacp_timer_bits & LACP_PERIODIC_TIMEOUT)
194 			periodic_timer_pop_handler(port);
195 		if (lacp_timer_bits & LACP_WAIT_WHILE_TIMEOUT)
196 			wait_while_timer_pop_handler(port);
197 		if (lacp_timer_bits & LACP_CURRENT_WHILE_TIMEOUT)
198 			current_while_timer_pop_handler(port);
199 		mac_perim_exit(mph);
200 
201 		mutex_enter(&pl->lacp_timer_lock);
202 		if (pl->lacp_timer_bits & LACP_THREAD_EXIT)
203 			break;
204 	}
205 
206 	pl->lacp_timer_bits = 0;
207 	pl->lacp_timer_thread = NULL;
208 	cv_broadcast(&pl->lacp_timer_cv);
209 
210 	/* CALLB_CPR_EXIT drops the lock */
211 	CALLB_CPR_EXIT(&cprinfo);
212 
213 	/*
214 	 * Release the reference of the grp so aggr_grp_delete() can call
215 	 * mac_unregister() safely.
216 	 */
217 	aggr_grp_port_rele(port);
218 	thread_exit();
219 }
220 
221 /*
222  * Set the port LACP state to SELECTED. Returns B_FALSE if the operation
223  * could not be performed due to a memory allocation error, B_TRUE otherwise.
224  */
225 static boolean_t
lacp_port_select(aggr_port_t * portp)226 lacp_port_select(aggr_port_t *portp)
227 {
228 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
229 
230 	if (!lacp_sel_ports_add(portp))
231 		return (B_FALSE);
232 	portp->lp_lacp.sm.selected = AGGR_SELECTED;
233 	return (B_TRUE);
234 }
235 
236 /*
237  * Set the port LACP state to UNSELECTED.
238  */
239 static void
lacp_port_unselect(aggr_port_t * portp)240 lacp_port_unselect(aggr_port_t *portp)
241 {
242 	aggr_grp_t	*grp = portp->lp_grp;
243 
244 	ASSERT((grp->lg_mh == NULL) || MAC_PERIM_HELD(grp->lg_mh));
245 
246 	lacp_sel_ports_del(portp);
247 	portp->lp_lacp.sm.selected = AGGR_UNSELECTED;
248 }
249 
250 /*
251  * Initialize group specific LACP state and parameters.
252  */
253 void
aggr_lacp_init_grp(aggr_grp_t * aggrp)254 aggr_lacp_init_grp(aggr_grp_t *aggrp)
255 {
256 	aggrp->aggr.PeriodicTimer = AGGR_LACP_TIMER_SHORT;
257 	aggrp->aggr.ActorSystemPriority = (uint16_t)lacp_system_priority;
258 	aggrp->aggr.CollectorMaxDelay = 10;
259 	aggrp->lg_lacp_mode = AGGR_LACP_OFF;
260 	aggrp->aggr.ready = B_FALSE;
261 }
262 
263 /*
264  * Complete LACP info initialization at port creation time.
265  */
266 void
aggr_lacp_init_port(aggr_port_t * portp)267 aggr_lacp_init_port(aggr_port_t *portp)
268 {
269 	aggr_grp_t *aggrp = portp->lp_grp;
270 	aggr_lacp_port_t *pl = &portp->lp_lacp;
271 
272 	ASSERT(aggrp->lg_mh == NULL || MAC_PERIM_HELD(aggrp->lg_mh));
273 	ASSERT(MAC_PERIM_HELD(portp->lp_mh));
274 
275 	/* actor port # */
276 	pl->ActorPortNumber = portp->lp_portid;
277 	AGGR_LACP_DBG(("aggr_lacp_init_port(%d): "
278 	    "ActorPortNumber = 0x%x\n", portp->lp_linkid,
279 	    pl->ActorPortNumber));
280 
281 	pl->ActorPortPriority = (uint16_t)lacp_port_priority;
282 	pl->ActorPortAggrId = 0;	/* aggregator id - not used */
283 	pl->NTT = B_FALSE;			/* need to transmit */
284 
285 	pl->ActorAdminPortKey = aggrp->lg_key;
286 	pl->ActorOperPortKey = pl->ActorAdminPortKey;
287 	AGGR_LACP_DBG(("aggr_lacp_init_port(%d) "
288 	    "ActorAdminPortKey = 0x%x, ActorAdminPortKey = 0x%x\n",
289 	    portp->lp_linkid, pl->ActorAdminPortKey, pl->ActorOperPortKey));
290 
291 	/* Actor admin. port state */
292 	pl->ActorAdminPortState.bit.activity = B_FALSE;
293 	pl->ActorAdminPortState.bit.timeout = B_TRUE;
294 	pl->ActorAdminPortState.bit.aggregation = B_TRUE;
295 	pl->ActorAdminPortState.bit.sync = B_FALSE;
296 	pl->ActorAdminPortState.bit.collecting = B_FALSE;
297 	pl->ActorAdminPortState.bit.distributing = B_FALSE;
298 	pl->ActorAdminPortState.bit.defaulted = B_FALSE;
299 	pl->ActorAdminPortState.bit.expired = B_FALSE;
300 	pl->ActorOperPortState = pl->ActorAdminPortState;
301 
302 	/*
303 	 * Partner Administrative Information
304 	 * (All initialized to zero except for the following)
305 	 * Fast Timeouts.
306 	 */
307 	pl->PartnerAdminPortState.bit.timeout =
308 	    pl->PartnerOperPortState.bit.timeout = B_TRUE;
309 
310 	pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */
311 
312 	/*
313 	 * State machine information.
314 	 */
315 	pl->sm.lacp_on = B_FALSE;		/* LACP Off default */
316 	pl->sm.begin = B_TRUE;		/* Prevents transmissions */
317 	pl->sm.lacp_enabled = B_FALSE;
318 	pl->sm.port_enabled = B_FALSE;		/* Link Down */
319 	pl->sm.actor_churn = B_FALSE;
320 	pl->sm.partner_churn = B_FALSE;
321 	pl->sm.ready_n = B_FALSE;
322 	pl->sm.port_moved = B_FALSE;
323 
324 	lacp_port_unselect(portp);
325 
326 	pl->sm.periodic_state = LACP_NO_PERIODIC;
327 	pl->sm.receive_state = LACP_INITIALIZE;
328 	pl->sm.mux_state = LACP_DETACHED;
329 	pl->sm.churn_state = LACP_NO_ACTOR_CHURN;
330 
331 	/*
332 	 * Timer information.
333 	 */
334 	pl->current_while_timer.id = 0;
335 	pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
336 
337 	pl->periodic_timer.id = 0;
338 	pl->periodic_timer.val = FAST_PERIODIC_TIME;
339 
340 	pl->wait_while_timer.id = 0;
341 	pl->wait_while_timer.val = AGGREGATE_WAIT_TIME;
342 
343 	pl->lacp_timer_bits = 0;
344 
345 	mutex_init(&pl->lacp_timer_lock, NULL, MUTEX_DRIVER, NULL);
346 	cv_init(&pl->lacp_timer_cv, NULL, CV_DRIVER, NULL);
347 
348 	pl->lacp_timer_thread = thread_create(NULL, 0, aggr_port_timer_thread,
349 	    portp, 0, &p0, TS_RUN, minclsyspri);
350 
351 	/*
352 	 * Hold a reference of the grp and the port and this reference will
353 	 * be release when the thread exits.
354 	 *
355 	 * The reference on the port is used for aggr_port_delete() to
356 	 * continue without waiting for the thread to exit; the reference
357 	 * on the grp is used for aggr_grp_delete() to wait for the thread
358 	 * to exit before calling mac_unregister().
359 	 */
360 	aggr_grp_port_hold(portp);
361 }
362 
363 /*
364  * Port initialization when we need to
365  * turn LACP on/off, etc. Not everything is
366  * reset like in the above routine.
367  *		Do NOT modify things like link status.
368  */
369 static void
lacp_reset_port(aggr_port_t * portp)370 lacp_reset_port(aggr_port_t *portp)
371 {
372 	aggr_lacp_port_t *pl = &portp->lp_lacp;
373 
374 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
375 
376 	pl->NTT = B_FALSE;			/* need to transmit */
377 
378 	/* reset operational port state */
379 	pl->ActorOperPortState.bit.timeout =
380 	    pl->ActorAdminPortState.bit.timeout;
381 
382 	pl->ActorOperPortState.bit.sync = B_FALSE;
383 	pl->ActorOperPortState.bit.collecting = B_FALSE;
384 	pl->ActorOperPortState.bit.distributing = B_FALSE;
385 	pl->ActorOperPortState.bit.defaulted = B_TRUE;
386 	pl->ActorOperPortState.bit.expired = B_FALSE;
387 
388 	pl->PartnerOperPortState.bit.timeout = B_TRUE;	/* fast t/o */
389 	pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */
390 
391 	/*
392 	 * State machine information.
393 	 */
394 	pl->sm.begin = B_TRUE;		/* Prevents transmissions */
395 	pl->sm.actor_churn = B_FALSE;
396 	pl->sm.partner_churn = B_FALSE;
397 	pl->sm.ready_n = B_FALSE;
398 
399 	lacp_port_unselect(portp);
400 
401 	pl->sm.periodic_state = LACP_NO_PERIODIC;
402 	pl->sm.receive_state = LACP_INITIALIZE;
403 	pl->sm.mux_state = LACP_DETACHED;
404 	pl->sm.churn_state = LACP_NO_ACTOR_CHURN;
405 
406 	/*
407 	 * Timer information.
408 	 */
409 	pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
410 	pl->periodic_timer.val = FAST_PERIODIC_TIME;
411 }
412 
413 static void
aggr_lacp_mcast_on(aggr_port_t * port)414 aggr_lacp_mcast_on(aggr_port_t *port)
415 {
416 	ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh));
417 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
418 
419 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
420 		return;
421 
422 	(void) aggr_port_multicst(port, B_TRUE,
423 	    (uchar_t *)&slow_multicast_addr);
424 }
425 
426 static void
aggr_lacp_mcast_off(aggr_port_t * port)427 aggr_lacp_mcast_off(aggr_port_t *port)
428 {
429 	ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh));
430 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
431 
432 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
433 		return;
434 
435 	(void) aggr_port_multicst(port, B_FALSE,
436 	    (uchar_t *)&slow_multicast_addr);
437 }
438 
439 static void
start_periodic_timer(aggr_port_t * portp)440 start_periodic_timer(aggr_port_t *portp)
441 {
442 	aggr_lacp_port_t *pl = &portp->lp_lacp;
443 
444 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
445 
446 	mutex_enter(&pl->lacp_timer_lock);
447 	if (pl->periodic_timer.id == 0) {
448 		pl->periodic_timer.id = timeout(periodic_timer_pop, portp,
449 		    drv_usectohz(1000000 * portp->lp_lacp.periodic_timer.val));
450 	}
451 	mutex_exit(&pl->lacp_timer_lock);
452 }
453 
454 static void
stop_periodic_timer(aggr_port_t * portp)455 stop_periodic_timer(aggr_port_t *portp)
456 {
457 	aggr_lacp_port_t *pl = &portp->lp_lacp;
458 	timeout_id_t id;
459 
460 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
461 
462 	mutex_enter(&pl->lacp_timer_lock);
463 	if ((id = pl->periodic_timer.id) != 0) {
464 		pl->lacp_timer_bits &= ~LACP_PERIODIC_TIMEOUT;
465 		pl->periodic_timer.id = 0;
466 	}
467 	mutex_exit(&pl->lacp_timer_lock);
468 
469 	if (id != 0)
470 		(void) untimeout(id);
471 }
472 
473 /*
474  * When the timer pops, we arrive here to
475  * clear out LACPDU count as well as transmit an
476  * LACPDU. We then set the periodic state and let
477  * the periodic state machine restart the timer.
478  */
479 static void
periodic_timer_pop(void * data)480 periodic_timer_pop(void *data)
481 {
482 	aggr_port_t *portp = data;
483 	aggr_lacp_port_t *pl = &portp->lp_lacp;
484 
485 	mutex_enter(&pl->lacp_timer_lock);
486 	pl->lacp_timer_bits |= LACP_PERIODIC_TIMEOUT;
487 	cv_broadcast(&pl->lacp_timer_cv);
488 	mutex_exit(&pl->lacp_timer_lock);
489 }
490 
491 /*
492  * When the timer pops, we arrive here to
493  * clear out LACPDU count as well as transmit an
494  * LACPDU. We then set the periodic state and let
495  * the periodic state machine restart the timer.
496  */
497 static void
periodic_timer_pop_handler(aggr_port_t * portp)498 periodic_timer_pop_handler(aggr_port_t *portp)
499 {
500 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
501 
502 	portp->lp_lacp_stats.LACPDUsTx = 0;
503 
504 	/* current timestamp */
505 	portp->lp_lacp.time = gethrtime();
506 	portp->lp_lacp.NTT = B_TRUE;
507 	lacp_xmit_sm(portp);
508 
509 	/*
510 	 * Set Periodic State machine state based on the
511 	 * value of the Partner Operation Port State timeout
512 	 * bit.
513 	 */
514 	if (portp->lp_lacp.PartnerOperPortState.bit.timeout) {
515 		portp->lp_lacp.periodic_timer.val = FAST_PERIODIC_TIME;
516 		portp->lp_lacp.sm.periodic_state = LACP_FAST_PERIODIC;
517 	} else {
518 		portp->lp_lacp.periodic_timer.val = SLOW_PERIODIC_TIME;
519 		portp->lp_lacp.sm.periodic_state = LACP_SLOW_PERIODIC;
520 	}
521 
522 	lacp_periodic_sm(portp);
523 }
524 
525 /*
526  * Invoked from:
527  *	- startup upon aggregation
528  *	- when the periodic timer pops
529  *	- when the periodic timer value is changed
530  *	- when the port is attached or detached
531  *	- when LACP mode is changed.
532  */
533 static void
lacp_periodic_sm(aggr_port_t * portp)534 lacp_periodic_sm(aggr_port_t *portp)
535 {
536 	lacp_periodic_state_t oldstate = portp->lp_lacp.sm.periodic_state;
537 	aggr_lacp_port_t *pl = &portp->lp_lacp;
538 
539 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
540 
541 	/* LACP_OFF state not in specification so check here.  */
542 	if (!pl->sm.lacp_on) {
543 		/* Stop timer whether it is running or not */
544 		stop_periodic_timer(portp);
545 		pl->sm.periodic_state = LACP_NO_PERIODIC;
546 		pl->NTT = B_FALSE;
547 		AGGR_LACP_DBG(("lacp_periodic_sm(%d):NO LACP "
548 		    "%s--->%s\n", portp->lp_linkid,
549 		    lacp_periodic_str[oldstate],
550 		    lacp_periodic_str[pl->sm.periodic_state]));
551 		return;
552 	}
553 
554 	if (pl->sm.begin || !pl->sm.lacp_enabled ||
555 	    !pl->sm.port_enabled ||
556 	    !pl->ActorOperPortState.bit.activity &&
557 	    !pl->PartnerOperPortState.bit.activity) {
558 
559 		/* Stop timer whether it is running or not */
560 		stop_periodic_timer(portp);
561 		pl->sm.periodic_state = LACP_NO_PERIODIC;
562 		pl->NTT = B_FALSE;
563 		AGGR_LACP_DBG(("lacp_periodic_sm(%d):STOP %s--->%s\n",
564 		    portp->lp_linkid, lacp_periodic_str[oldstate],
565 		    lacp_periodic_str[pl->sm.periodic_state]));
566 		return;
567 	}
568 
569 	/*
570 	 * Startup with FAST_PERIODIC_TIME if no previous LACPDU
571 	 * has been received. Then after we timeout, then it is
572 	 * possible to go to SLOW_PERIODIC_TIME.
573 	 */
574 	if (pl->sm.periodic_state == LACP_NO_PERIODIC) {
575 		pl->periodic_timer.val = FAST_PERIODIC_TIME;
576 		pl->sm.periodic_state = LACP_FAST_PERIODIC;
577 	} else if ((pl->sm.periodic_state == LACP_SLOW_PERIODIC) &&
578 	    pl->PartnerOperPortState.bit.timeout) {
579 		/*
580 		 * If we receive a bit indicating we are going to
581 		 * fast periodic from slow periodic, stop the timer
582 		 * and let the periodic_timer_pop routine deal
583 		 * with reseting the periodic state and transmitting
584 		 * a LACPDU.
585 		 */
586 		stop_periodic_timer(portp);
587 		periodic_timer_pop_handler(portp);
588 	}
589 
590 	/* Rearm timer with value provided by partner */
591 	start_periodic_timer(portp);
592 }
593 
594 /*
595  * This routine transmits an LACPDU if lacp_enabled
596  * is TRUE and if NTT is set.
597  */
598 static void
lacp_xmit_sm(aggr_port_t * portp)599 lacp_xmit_sm(aggr_port_t *portp)
600 {
601 	aggr_lacp_port_t *pl = &portp->lp_lacp;
602 	size_t	len;
603 	mblk_t  *mp;
604 	hrtime_t now, elapsed;
605 
606 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
607 
608 	/* LACP_OFF state not in specification so check here.  */
609 	if (!pl->sm.lacp_on || !pl->NTT || !portp->lp_started)
610 		return;
611 
612 	/*
613 	 * Do nothing if LACP has been turned off or if the
614 	 * periodic state machine is not enabled.
615 	 */
616 	if ((pl->sm.periodic_state == LACP_NO_PERIODIC) ||
617 	    !pl->sm.lacp_enabled || pl->sm.begin) {
618 		pl->NTT = B_FALSE;
619 		return;
620 	}
621 
622 	/*
623 	 * If we have sent 5 Slow packets in the last second, avoid
624 	 * sending any more here. No more than three LACPDUs may be transmitted
625 	 * in any Fast_Periodic_Time interval.
626 	 */
627 	if (portp->lp_lacp_stats.LACPDUsTx >= 3) {
628 		/*
629 		 * Grab the current time value and see if
630 		 * more than 1 second has passed. If so,
631 		 * reset the timestamp and clear the count.
632 		 */
633 		now = gethrtime();
634 		elapsed = now - pl->time;
635 		if (elapsed > NSECS_PER_SEC) {
636 			portp->lp_lacp_stats.LACPDUsTx = 0;
637 			pl->time = now;
638 		} else {
639 			return;
640 		}
641 	}
642 
643 	len = sizeof (lacp_t) + sizeof (struct ether_header);
644 	mp = allocb(len, BPRI_MED);
645 	if (mp == NULL)
646 		return;
647 
648 	mp->b_wptr = mp->b_rptr + len;
649 	bzero(mp->b_rptr, len);
650 
651 	fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr);
652 	fill_lacp_pdu(portp,
653 	    (lacp_t *)(mp->b_rptr + sizeof (struct ether_header)));
654 
655 	/* Send the packet over the first TX ring */
656 	mp = mac_hwring_send_priv(portp->lp_mch, portp->lp_tx_rings[0], mp);
657 	if (mp != NULL)
658 		freemsg(mp);
659 
660 	pl->NTT = B_FALSE;
661 	portp->lp_lacp_stats.LACPDUsTx++;
662 }
663 
664 /*
665  * Initialize the ethernet header of a LACP packet sent from the specified
666  * port.
667  */
668 static void
fill_lacp_ether(aggr_port_t * port,struct ether_header * ether)669 fill_lacp_ether(aggr_port_t *port, struct ether_header *ether)
670 {
671 	bcopy(port->lp_addr, (uint8_t *)&(ether->ether_shost), ETHERADDRL);
672 	bcopy(&slow_multicast_addr, (uint8_t *)&(ether->ether_dhost),
673 	    ETHERADDRL);
674 	ether->ether_type = htons(ETHERTYPE_SLOW);
675 }
676 
677 static void
fill_lacp_pdu(aggr_port_t * portp,lacp_t * lacp)678 fill_lacp_pdu(aggr_port_t *portp, lacp_t *lacp)
679 {
680 	aggr_lacp_port_t *pl = &portp->lp_lacp;
681 	aggr_grp_t *aggrp = portp->lp_grp;
682 	mac_perim_handle_t pmph;
683 
684 	ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
685 	mac_perim_enter_by_mh(portp->lp_mh, &pmph);
686 
687 	lacp->subtype = LACP_SUBTYPE;
688 	lacp->version = LACP_VERSION;
689 
690 	/*
691 	 * Actor Information
692 	 */
693 	lacp->actor_info.tlv_type = ACTOR_TLV;
694 	lacp->actor_info.information_len = sizeof (link_info_t);
695 	lacp->actor_info.system_priority =
696 	    htons(aggrp->aggr.ActorSystemPriority);
697 	bcopy(aggrp->lg_addr, (uchar_t *)&lacp->actor_info.system_id,
698 	    ETHERADDRL);
699 	lacp->actor_info.key = htons(pl->ActorOperPortKey);
700 	lacp->actor_info.port_priority = htons(pl->ActorPortPriority);
701 	lacp->actor_info.port = htons(pl->ActorPortNumber);
702 	lacp->actor_info.state.state = pl->ActorOperPortState.state;
703 
704 	/*
705 	 * Partner Information
706 	 */
707 	lacp->partner_info.tlv_type = PARTNER_TLV;
708 	lacp->partner_info.information_len = sizeof (link_info_t);
709 	lacp->partner_info.system_priority =
710 	    htons(pl->PartnerOperSysPriority);
711 	lacp->partner_info.system_id = pl->PartnerOperSystem;
712 	lacp->partner_info.key = htons(pl->PartnerOperKey);
713 	lacp->partner_info.port_priority =
714 	    htons(pl->PartnerOperPortPriority);
715 	lacp->partner_info.port = htons(pl->PartnerOperPortNum);
716 	lacp->partner_info.state.state = pl->PartnerOperPortState.state;
717 
718 	/* Collector Information */
719 	lacp->tlv_collector = COLLECTOR_TLV;
720 	lacp->collector_len = 0x10;
721 	lacp->collector_max_delay = htons(aggrp->aggr.CollectorMaxDelay);
722 
723 	/* Termination Information */
724 	lacp->tlv_terminator = TERMINATOR_TLV;
725 	lacp->terminator_len = 0x0;
726 
727 	mac_perim_exit(pmph);
728 }
729 
730 /*
731  * lacp_mux_sm - LACP mux state machine
732  *		This state machine is invoked from:
733  *			- startup upon aggregation
734  *			- from the Selection logic
735  *			- when the wait_while_timer pops
736  *			- when the aggregation MAC address is changed
737  *			- when receiving DL_NOTE_LINK_UP/DOWN
738  *			- when receiving DL_NOTE_AGGR_AVAIL/UNAVAIL
739  *			- when LACP mode is changed.
740  *			- when a DL_NOTE_SPEED is received
741  */
742 static void
lacp_mux_sm(aggr_port_t * portp)743 lacp_mux_sm(aggr_port_t *portp)
744 {
745 	aggr_grp_t *aggrp = portp->lp_grp;
746 	boolean_t NTT_updated = B_FALSE;
747 	aggr_lacp_port_t *pl = &portp->lp_lacp;
748 	lacp_mux_state_t oldstate = pl->sm.mux_state;
749 
750 	ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
751 
752 	/* LACP_OFF state not in specification so check here.  */
753 	if (!pl->sm.lacp_on) {
754 		pl->sm.mux_state = LACP_DETACHED;
755 		pl->ActorOperPortState.bit.sync = B_FALSE;
756 
757 		if (pl->ActorOperPortState.bit.collecting ||
758 		    pl->ActorOperPortState.bit.distributing) {
759 			AGGR_LACP_DBG(("trunk link: (%d): "
760 			    "Collector_Distributor Disabled.\n",
761 			    portp->lp_linkid));
762 		}
763 
764 		pl->ActorOperPortState.bit.collecting =
765 		    pl->ActorOperPortState.bit.distributing = B_FALSE;
766 		return;
767 	}
768 
769 	if (pl->sm.begin || !pl->sm.lacp_enabled)
770 		pl->sm.mux_state = LACP_DETACHED;
771 
772 again:
773 	/* determine next state, or return if state unchanged */
774 	switch (pl->sm.mux_state) {
775 	case LACP_DETACHED:
776 		if (pl->sm.begin) {
777 			break;
778 		}
779 
780 		if ((pl->sm.selected == AGGR_SELECTED) ||
781 		    (pl->sm.selected == AGGR_STANDBY)) {
782 			pl->sm.mux_state = LACP_WAITING;
783 			break;
784 		}
785 		return;
786 
787 	case LACP_WAITING:
788 		if (pl->sm.selected == AGGR_UNSELECTED) {
789 			pl->sm.mux_state = LACP_DETACHED;
790 			break;
791 		}
792 
793 		if ((pl->sm.selected == AGGR_SELECTED) && aggrp->aggr.ready) {
794 			pl->sm.mux_state = LACP_ATTACHED;
795 			break;
796 		}
797 		return;
798 
799 	case LACP_ATTACHED:
800 		if ((pl->sm.selected == AGGR_UNSELECTED) ||
801 		    (pl->sm.selected == AGGR_STANDBY)) {
802 			pl->sm.mux_state = LACP_DETACHED;
803 			break;
804 		}
805 
806 		if ((pl->sm.selected == AGGR_SELECTED) &&
807 		    pl->PartnerOperPortState.bit.sync) {
808 			pl->sm.mux_state = LACP_COLLECTING_DISTRIBUTING;
809 			break;
810 		}
811 		return;
812 
813 	case LACP_COLLECTING_DISTRIBUTING:
814 		if ((pl->sm.selected == AGGR_UNSELECTED) ||
815 		    (pl->sm.selected == AGGR_STANDBY) ||
816 		    !pl->PartnerOperPortState.bit.sync) {
817 			pl->sm.mux_state = LACP_ATTACHED;
818 			break;
819 		}
820 		return;
821 	}
822 
823 	AGGR_LACP_DBG(("lacp_mux_sm(%d):%s--->%s\n",
824 	    portp->lp_linkid, lacp_mux_str[oldstate],
825 	    lacp_mux_str[pl->sm.mux_state]));
826 
827 	/* perform actions on entering a new state */
828 	switch (pl->sm.mux_state) {
829 	case LACP_DETACHED:
830 		if (pl->ActorOperPortState.bit.collecting ||
831 		    pl->ActorOperPortState.bit.distributing) {
832 			AGGR_LACP_DBG(("trunk link: (%d): "
833 			    "Collector_Distributor Disabled.\n",
834 			    portp->lp_linkid));
835 		}
836 
837 		pl->ActorOperPortState.bit.sync =
838 		    pl->ActorOperPortState.bit.collecting = B_FALSE;
839 
840 		/* Turn OFF Collector_Distributor */
841 		aggr_set_coll_dist(portp, B_FALSE);
842 
843 		pl->ActorOperPortState.bit.distributing = B_FALSE;
844 		NTT_updated = B_TRUE;
845 		break;
846 
847 	case LACP_WAITING:
848 		start_wait_while_timer(portp);
849 		break;
850 
851 	case LACP_ATTACHED:
852 		if (pl->ActorOperPortState.bit.collecting ||
853 		    pl->ActorOperPortState.bit.distributing) {
854 			AGGR_LACP_DBG(("trunk link: (%d): "
855 			    "Collector_Distributor Disabled.\n",
856 			    portp->lp_linkid));
857 		}
858 
859 		pl->ActorOperPortState.bit.sync = B_TRUE;
860 		pl->ActorOperPortState.bit.collecting = B_FALSE;
861 
862 		/* Turn OFF Collector_Distributor */
863 		aggr_set_coll_dist(portp, B_FALSE);
864 
865 		pl->ActorOperPortState.bit.distributing = B_FALSE;
866 		NTT_updated = B_TRUE;
867 		if (pl->PartnerOperPortState.bit.sync) {
868 			/*
869 			 * We had already received an updated sync from
870 			 * the partner. Attempt to transition to
871 			 * collecting/distributing now.
872 			 */
873 			goto again;
874 		}
875 		break;
876 
877 	case LACP_COLLECTING_DISTRIBUTING:
878 		if (!pl->ActorOperPortState.bit.collecting &&
879 		    !pl->ActorOperPortState.bit.distributing) {
880 			AGGR_LACP_DBG(("trunk link: (%d): "
881 			    "Collector_Distributor Enabled.\n",
882 			    portp->lp_linkid));
883 		}
884 		pl->ActorOperPortState.bit.distributing = B_TRUE;
885 
886 		/* Turn Collector_Distributor back ON */
887 		aggr_set_coll_dist(portp, B_TRUE);
888 
889 		pl->ActorOperPortState.bit.collecting = B_TRUE;
890 		NTT_updated = B_TRUE;
891 		break;
892 	}
893 
894 	/*
895 	 * If we updated the state of the NTT variable, then
896 	 * initiate a LACPDU transmission.
897 	 */
898 	if (NTT_updated) {
899 		pl->NTT = B_TRUE;
900 		lacp_xmit_sm(portp);
901 	}
902 } /* lacp_mux_sm */
903 
904 
905 static int
receive_marker_pdu(aggr_port_t * portp,mblk_t * mp)906 receive_marker_pdu(aggr_port_t *portp, mblk_t *mp)
907 {
908 	marker_pdu_t		*markerp = (marker_pdu_t *)mp->b_rptr;
909 
910 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
911 
912 	AGGR_LACP_DBG(("trunk link: (%d): MARKER PDU received:\n",
913 	    portp->lp_linkid));
914 
915 	/* LACP_OFF state not in specification so check here.  */
916 	if (!portp->lp_lacp.sm.lacp_on)
917 		return (-1);
918 
919 	if (MBLKL(mp) < sizeof (marker_pdu_t))
920 		return (-1);
921 
922 	if (markerp->version != MARKER_VERSION) {
923 		AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: "
924 		    "version = %d does not match s/w version %d\n",
925 		    portp->lp_linkid, markerp->version, MARKER_VERSION));
926 		return (-1);
927 	}
928 
929 	if (markerp->tlv_marker == MARKER_RESPONSE_TLV) {
930 		/* We do not yet send out MARKER info PDUs */
931 		AGGR_LACP_DBG(("trunk link (%d): MARKER RESPONSE PDU: "
932 		    " MARKER TLV = %d - We don't send out info type!\n",
933 		    portp->lp_linkid, markerp->tlv_marker));
934 		return (-1);
935 	}
936 
937 	if (markerp->tlv_marker != MARKER_INFO_TLV) {
938 		AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: "
939 		    " MARKER TLV = %d \n", portp->lp_linkid,
940 		    markerp->tlv_marker));
941 		return (-1);
942 	}
943 
944 	if (markerp->marker_len != MARKER_INFO_RESPONSE_LENGTH) {
945 		AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: "
946 		    " MARKER length = %d \n", portp->lp_linkid,
947 		    markerp->marker_len));
948 		return (-1);
949 	}
950 
951 	if (markerp->requestor_port != portp->lp_lacp.PartnerOperPortNum) {
952 		AGGR_LACP_DBG(("trunk link (%d): MARKER PDU: "
953 		    " MARKER Port %d not equal to Partner port %d\n",
954 		    portp->lp_linkid, markerp->requestor_port,
955 		    portp->lp_lacp.PartnerOperPortNum));
956 		return (-1);
957 	}
958 
959 	if (ether_cmp(&markerp->system_id,
960 	    &portp->lp_lacp.PartnerOperSystem) != 0) {
961 		AGGR_LACP_DBG(("trunk link (%d): MARKER PDU: "
962 		    " MARKER MAC not equal to Partner MAC\n",
963 		    portp->lp_linkid));
964 		return (-1);
965 	}
966 
967 	/*
968 	 * Turn into Marker Response PDU
969 	 * and return mblk to sending system
970 	 */
971 	markerp->tlv_marker = MARKER_RESPONSE_TLV;
972 
973 	/* reuse the space that was used by received ethernet header */
974 	ASSERT(MBLKHEAD(mp) >= sizeof (struct ether_header));
975 	mp->b_rptr -= sizeof (struct ether_header);
976 	fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr);
977 	return (0);
978 }
979 
980 /*
981  * Update the LACP mode (off, active, or passive) of the specified group.
982  */
983 void
aggr_lacp_update_mode(aggr_grp_t * grp,aggr_lacp_mode_t mode)984 aggr_lacp_update_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode)
985 {
986 	aggr_lacp_mode_t old_mode = grp->lg_lacp_mode;
987 	aggr_port_t *port;
988 
989 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
990 	ASSERT(!grp->lg_closing);
991 
992 	if (mode == old_mode)
993 		return;
994 
995 	grp->lg_lacp_mode = mode;
996 
997 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
998 		port->lp_lacp.ActorAdminPortState.bit.activity =
999 		    port->lp_lacp.ActorOperPortState.bit.activity =
1000 		    (mode == AGGR_LACP_ACTIVE);
1001 
1002 		if (old_mode == AGGR_LACP_OFF) {
1003 			/* OFF -> {PASSIVE,ACTIVE} */
1004 			/* turn OFF Collector_Distributor */
1005 			aggr_set_coll_dist(port, B_FALSE);
1006 			lacp_on(port);
1007 		} else if (mode == AGGR_LACP_OFF) {
1008 			/* {PASSIVE,ACTIVE} -> OFF */
1009 			lacp_off(port);
1010 			/* Turn ON Collector_Distributor */
1011 			aggr_set_coll_dist(port, B_TRUE);
1012 		} else {
1013 			/* PASSIVE->ACTIVE or ACTIVE->PASSIVE */
1014 			port->lp_lacp.sm.begin = B_TRUE;
1015 			lacp_mux_sm(port);
1016 			lacp_periodic_sm(port);
1017 
1018 			/* kick off state machines */
1019 			lacp_receive_sm(port, NULL);
1020 			lacp_mux_sm(port);
1021 		}
1022 	}
1023 }
1024 
1025 
1026 /*
1027  * Update the LACP timer (short or long) of the specified group.
1028  */
1029 void
aggr_lacp_update_timer(aggr_grp_t * grp,aggr_lacp_timer_t timer)1030 aggr_lacp_update_timer(aggr_grp_t *grp, aggr_lacp_timer_t timer)
1031 {
1032 	aggr_port_t *port;
1033 
1034 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1035 
1036 	if (timer == grp->aggr.PeriodicTimer)
1037 		return;
1038 
1039 	grp->aggr.PeriodicTimer = timer;
1040 
1041 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1042 		port->lp_lacp.ActorAdminPortState.bit.timeout =
1043 		    port->lp_lacp.ActorOperPortState.bit.timeout =
1044 		    (timer == AGGR_LACP_TIMER_SHORT);
1045 	}
1046 }
1047 
1048 void
aggr_port_lacp_set_mode(aggr_grp_t * grp,aggr_port_t * port)1049 aggr_port_lacp_set_mode(aggr_grp_t *grp, aggr_port_t *port)
1050 {
1051 	aggr_lacp_mode_t	mode;
1052 	aggr_lacp_timer_t	timer;
1053 
1054 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1055 
1056 	mode = grp->lg_lacp_mode;
1057 	timer = grp->aggr.PeriodicTimer;
1058 
1059 	port->lp_lacp.ActorAdminPortState.bit.activity =
1060 	    port->lp_lacp.ActorOperPortState.bit.activity =
1061 	    (mode == AGGR_LACP_ACTIVE);
1062 
1063 	port->lp_lacp.ActorAdminPortState.bit.timeout =
1064 	    port->lp_lacp.ActorOperPortState.bit.timeout =
1065 	    (timer == AGGR_LACP_TIMER_SHORT);
1066 
1067 	if (mode == AGGR_LACP_OFF) {
1068 		/* Turn ON Collector_Distributor */
1069 		aggr_set_coll_dist(port, B_TRUE);
1070 	} else { /* LACP_ACTIVE/PASSIVE */
1071 		lacp_on(port);
1072 	}
1073 }
1074 
1075 /*
1076  * Sets the initial LACP mode (off, active, passive) and LACP timer
1077  * (short, long) of the specified group.
1078  */
1079 void
aggr_lacp_set_mode(aggr_grp_t * grp,aggr_lacp_mode_t mode,aggr_lacp_timer_t timer)1080 aggr_lacp_set_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode,
1081     aggr_lacp_timer_t timer)
1082 {
1083 	aggr_port_t *port;
1084 
1085 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1086 
1087 	grp->lg_lacp_mode = mode;
1088 	grp->aggr.PeriodicTimer = timer;
1089 
1090 	for (port = grp->lg_ports; port != NULL; port = port->lp_next)
1091 		aggr_port_lacp_set_mode(grp, port);
1092 }
1093 
1094 /*
1095  * Verify that the Partner MAC and Key recorded by the specified
1096  * port are not found in other ports that are not part of our
1097  * aggregation. Returns B_TRUE if such a port is found, B_FALSE
1098  * otherwise.
1099  */
1100 static boolean_t
lacp_misconfig_check(aggr_port_t * portp)1101 lacp_misconfig_check(aggr_port_t *portp)
1102 {
1103 	aggr_grp_t *grp = portp->lp_grp;
1104 	lacp_sel_ports_t *cport;
1105 
1106 	mutex_enter(&lacp_sel_lock);
1107 
1108 	for (cport = sel_ports; cport != NULL; cport = cport->sp_next) {
1109 
1110 		/* skip entries of the group of the port being checked */
1111 		if (cport->sp_grp_linkid == grp->lg_linkid)
1112 			continue;
1113 
1114 		if ((ether_cmp(&cport->sp_partner_system,
1115 		    &grp->aggr.PartnerSystem) == 0) &&
1116 		    (cport->sp_partner_key == grp->aggr.PartnerOperAggrKey)) {
1117 			char mac_str[ETHERADDRL*3];
1118 			struct ether_addr *mac = &cport->sp_partner_system;
1119 
1120 			/*
1121 			 * The Partner port information is already in use
1122 			 * by ports in another aggregation so disable this
1123 			 * port.
1124 			 */
1125 
1126 			(void) snprintf(mac_str, sizeof (mac_str),
1127 			    "%x:%x:%x:%x:%x:%x",
1128 			    mac->ether_addr_octet[0], mac->ether_addr_octet[1],
1129 			    mac->ether_addr_octet[2], mac->ether_addr_octet[3],
1130 			    mac->ether_addr_octet[4], mac->ether_addr_octet[5]);
1131 
1132 			portp->lp_lacp.sm.selected = AGGR_UNSELECTED;
1133 
1134 			cmn_err(CE_NOTE, "aggr %d port %d: Port Partner "
1135 			    "MAC %s and key %d in use on aggregation %d "
1136 			    "port %d\n", grp->lg_linkid, portp->lp_linkid,
1137 			    mac_str, portp->lp_lacp.PartnerOperKey,
1138 			    cport->sp_grp_linkid, cport->sp_linkid);
1139 			break;
1140 		}
1141 	}
1142 
1143 	mutex_exit(&lacp_sel_lock);
1144 	return (cport != NULL);
1145 }
1146 
1147 /*
1148  * Remove the specified port from the list of selected ports.
1149  */
1150 static void
lacp_sel_ports_del(aggr_port_t * portp)1151 lacp_sel_ports_del(aggr_port_t *portp)
1152 {
1153 	lacp_sel_ports_t *cport, **prev = NULL;
1154 
1155 	mutex_enter(&lacp_sel_lock);
1156 
1157 	prev = &sel_ports;
1158 	for (cport = sel_ports; cport != NULL; prev = &cport->sp_next,
1159 	    cport = cport->sp_next) {
1160 		if (portp->lp_linkid == cport->sp_linkid)
1161 			break;
1162 	}
1163 
1164 	if (cport == NULL) {
1165 		mutex_exit(&lacp_sel_lock);
1166 		return;
1167 	}
1168 
1169 	*prev = cport->sp_next;
1170 	kmem_free(cport, sizeof (*cport));
1171 
1172 	mutex_exit(&lacp_sel_lock);
1173 }
1174 
1175 /*
1176  * Add the specified port to the list of selected ports. Returns B_FALSE
1177  * if the operation could not be performed due to an memory allocation
1178  * error.
1179  */
1180 static boolean_t
lacp_sel_ports_add(aggr_port_t * portp)1181 lacp_sel_ports_add(aggr_port_t *portp)
1182 {
1183 	lacp_sel_ports_t *new_port;
1184 	lacp_sel_ports_t *cport, **last;
1185 
1186 	mutex_enter(&lacp_sel_lock);
1187 
1188 	/* check if port is already in the list */
1189 	last = &sel_ports;
1190 	for (cport = sel_ports; cport != NULL;
1191 	    last = &cport->sp_next, cport = cport->sp_next) {
1192 		if (portp->lp_linkid == cport->sp_linkid) {
1193 			ASSERT(cport->sp_partner_key ==
1194 			    portp->lp_lacp.PartnerOperKey);
1195 			ASSERT(ether_cmp(&cport->sp_partner_system,
1196 			    &portp->lp_lacp.PartnerOperSystem) == 0);
1197 
1198 			mutex_exit(&lacp_sel_lock);
1199 			return (B_TRUE);
1200 		}
1201 	}
1202 
1203 	/* create and initialize new entry */
1204 	new_port = kmem_zalloc(sizeof (lacp_sel_ports_t), KM_NOSLEEP);
1205 	if (new_port == NULL) {
1206 		mutex_exit(&lacp_sel_lock);
1207 		return (B_FALSE);
1208 	}
1209 
1210 	new_port->sp_grp_linkid = portp->lp_grp->lg_linkid;
1211 	bcopy(&portp->lp_lacp.PartnerOperSystem,
1212 	    &new_port->sp_partner_system, sizeof (new_port->sp_partner_system));
1213 	new_port->sp_partner_key = portp->lp_lacp.PartnerOperKey;
1214 	new_port->sp_linkid = portp->lp_linkid;
1215 
1216 	*last = new_port;
1217 
1218 	mutex_exit(&lacp_sel_lock);
1219 	return (B_TRUE);
1220 }
1221 
1222 /*
1223  * lacp_selection_logic - LACP selection logic
1224  *		Sets the selected variable on a per port basis
1225  *		and sets Ready when all waiting ports are ready
1226  *		to go online.
1227  *
1228  * parameters:
1229  *      - portp - instance this applies to.
1230  *
1231  * invoked:
1232  *    - when initialization is needed
1233  *    - when UNSELECTED is set from the lacp_receive_sm() in LACP_CURRENT state
1234  *    - When the lacp_receive_sm goes to the LACP_DEFAULTED state
1235  *    - every time the wait_while_timer pops
1236  *    - everytime we turn LACP on/off
1237  */
1238 static void
lacp_selection_logic(aggr_port_t * portp)1239 lacp_selection_logic(aggr_port_t *portp)
1240 {
1241 	aggr_port_t *tpp;
1242 	aggr_grp_t *aggrp = portp->lp_grp;
1243 	int ports_waiting;
1244 	boolean_t reset_mac = B_FALSE;
1245 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1246 
1247 	ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
1248 
1249 	/* LACP_OFF state not in specification so check here.  */
1250 	if (!pl->sm.lacp_on) {
1251 		lacp_port_unselect(portp);
1252 		aggrp->aggr.ready = B_FALSE;
1253 		lacp_mux_sm(portp);
1254 		return;
1255 	}
1256 
1257 	if (pl->sm.begin || !pl->sm.lacp_enabled ||
1258 	    (portp->lp_state != AGGR_PORT_STATE_ATTACHED)) {
1259 
1260 		AGGR_LACP_DBG(("lacp_selection_logic:(%d): "
1261 		    "selected %d-->%d (begin=%d, lacp_enabled = %d, "
1262 		    "lp_state=%d)\n", portp->lp_linkid, pl->sm.selected,
1263 		    AGGR_UNSELECTED, pl->sm.begin, pl->sm.lacp_enabled,
1264 		    portp->lp_state));
1265 
1266 		lacp_port_unselect(portp);
1267 		aggrp->aggr.ready = B_FALSE;
1268 		lacp_mux_sm(portp);
1269 		return;
1270 	}
1271 
1272 	/*
1273 	 * If LACP is not enabled then selected is never set.
1274 	 */
1275 	if (!pl->sm.lacp_enabled) {
1276 		AGGR_LACP_DBG(("lacp_selection_logic:(%d): selected %d-->%d\n",
1277 		    portp->lp_linkid, pl->sm.selected, AGGR_UNSELECTED));
1278 
1279 		lacp_port_unselect(portp);
1280 		lacp_mux_sm(portp);
1281 		return;
1282 	}
1283 
1284 	/*
1285 	 * Check if the Partner MAC or Key are zero. If so, we have
1286 	 * not received any LACP info or it has expired and the
1287 	 * receive machine is in the LACP_DEFAULTED state.
1288 	 */
1289 	if (ether_cmp(&pl->PartnerOperSystem, &etherzeroaddr) == 0 ||
1290 	    (pl->PartnerOperKey == 0)) {
1291 
1292 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1293 			if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1294 			    &etherzeroaddr) != 0 &&
1295 			    (tpp->lp_lacp.PartnerOperKey != 0))
1296 				break;
1297 		}
1298 
1299 		/*
1300 		 * If all ports have no key or aggregation address,
1301 		 * then clear the negotiated Partner MAC and key.
1302 		 */
1303 		if (tpp == NULL) {
1304 			/* Clear the aggregation Partner MAC and key */
1305 			aggrp->aggr.PartnerSystem = etherzeroaddr;
1306 			aggrp->aggr.PartnerOperAggrKey = 0;
1307 		}
1308 
1309 		return;
1310 	}
1311 
1312 	/*
1313 	 * Insure that at least one port in the aggregation
1314 	 * matches the Partner aggregation MAC and key. If not,
1315 	 * then clear the aggregation MAC and key. Later we will
1316 	 * set the Partner aggregation MAC and key to that of the
1317 	 * current port's Partner MAC and key.
1318 	 */
1319 	if (ether_cmp(&pl->PartnerOperSystem,
1320 	    &aggrp->aggr.PartnerSystem) != 0 ||
1321 	    (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) {
1322 
1323 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1324 			if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1325 			    &aggrp->aggr.PartnerSystem) == 0 &&
1326 			    (tpp->lp_lacp.PartnerOperKey ==
1327 			    aggrp->aggr.PartnerOperAggrKey)) {
1328 				/* Set aggregation Partner MAC and key */
1329 				aggrp->aggr.PartnerSystem =
1330 				    pl->PartnerOperSystem;
1331 				aggrp->aggr.PartnerOperAggrKey =
1332 				    pl->PartnerOperKey;
1333 				break;
1334 			}
1335 		}
1336 
1337 		if (tpp == NULL) {
1338 			/* Clear the aggregation Partner MAC and key */
1339 			aggrp->aggr.PartnerSystem = etherzeroaddr;
1340 			aggrp->aggr.PartnerOperAggrKey = 0;
1341 			reset_mac = B_TRUE;
1342 		}
1343 	}
1344 
1345 	/*
1346 	 * If our Actor MAC is found in the Partner MAC
1347 	 * on this port then we have a loopback misconfiguration.
1348 	 */
1349 	if (ether_cmp(&pl->PartnerOperSystem,
1350 	    (struct ether_addr *)&aggrp->lg_addr) == 0) {
1351 		cmn_err(CE_NOTE, "trunk link: (%d): Loopback condition.\n",
1352 		    portp->lp_linkid);
1353 
1354 		lacp_port_unselect(portp);
1355 		lacp_mux_sm(portp);
1356 		return;
1357 	}
1358 
1359 	/*
1360 	 * If our Partner MAC and Key are found on any other
1361 	 * ports that are not in our aggregation, we have
1362 	 * a misconfiguration.
1363 	 */
1364 	if (lacp_misconfig_check(portp)) {
1365 		lacp_mux_sm(portp);
1366 		return;
1367 	}
1368 
1369 	/*
1370 	 * If the Aggregation Partner MAC and Key have not been
1371 	 * set, then this is either the first port or the aggregation
1372 	 * MAC and key have been reset. In either case we must set
1373 	 * the values of the Partner MAC and key.
1374 	 */
1375 	if (ether_cmp(&aggrp->aggr.PartnerSystem, &etherzeroaddr) == 0 &&
1376 	    (aggrp->aggr.PartnerOperAggrKey == 0)) {
1377 		/* Set aggregation Partner MAC and key */
1378 		aggrp->aggr.PartnerSystem = pl->PartnerOperSystem;
1379 		aggrp->aggr.PartnerOperAggrKey = pl->PartnerOperKey;
1380 
1381 		/*
1382 		 * If we reset Partner aggregation MAC, then restart
1383 		 * selection_logic on ports that match new MAC address.
1384 		 */
1385 		if (reset_mac) {
1386 			for (tpp = aggrp->lg_ports; tpp; tpp =
1387 			    tpp->lp_next) {
1388 				if (tpp == portp)
1389 					continue;
1390 				if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1391 				    &aggrp->aggr.PartnerSystem) == 0 &&
1392 				    (tpp->lp_lacp.PartnerOperKey ==
1393 				    aggrp->aggr.PartnerOperAggrKey))
1394 					lacp_selection_logic(tpp);
1395 			}
1396 		}
1397 	} else if (ether_cmp(&pl->PartnerOperSystem,
1398 	    &aggrp->aggr.PartnerSystem) != 0 ||
1399 	    (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) {
1400 		/*
1401 		 * The Partner port information does not match
1402 		 * that of the other ports in the aggregation
1403 		 * so disable this port.
1404 		 */
1405 		lacp_port_unselect(portp);
1406 
1407 		cmn_err(CE_NOTE, "trunk link: (%d): Port Partner MAC "
1408 		    "or key (%d) incompatible with Aggregation Partner "
1409 		    "MAC or key (%d)\n", portp->lp_linkid, pl->PartnerOperKey,
1410 		    aggrp->aggr.PartnerOperAggrKey);
1411 
1412 		lacp_mux_sm(portp);
1413 		return;
1414 	}
1415 
1416 	/* If we get to here, automatically set selected */
1417 	if (pl->sm.selected != AGGR_SELECTED) {
1418 		AGGR_LACP_DBG(("lacp_selection_logic:(%d): "
1419 		    "selected %d-->%d\n", portp->lp_linkid,
1420 		    pl->sm.selected, AGGR_SELECTED));
1421 		if (!lacp_port_select(portp))
1422 			return;
1423 		lacp_mux_sm(portp);
1424 	}
1425 
1426 	/*
1427 	 * From this point onward we have selected the port
1428 	 * and are simply checking if the Ready flag should
1429 	 * be set.
1430 	 */
1431 
1432 	/*
1433 	 * If at least two ports are waiting to aggregate
1434 	 * and ready_n is set on all ports waiting to aggregate
1435 	 * then set READY for the aggregation.
1436 	 */
1437 
1438 	ports_waiting = 0;
1439 
1440 	if (!aggrp->aggr.ready) {
1441 		/*
1442 		 * If all ports in the aggregation have received compatible
1443 		 * partner information and they match up correctly with the
1444 		 * switch, there is no need to wait for all the
1445 		 * wait_while_timers to pop.
1446 		 */
1447 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1448 			if (((tpp->lp_lacp.sm.mux_state == LACP_WAITING) ||
1449 			    tpp->lp_lacp.sm.begin) &&
1450 			    !tpp->lp_lacp.PartnerOperPortState.bit.sync) {
1451 				/* Add up ports uninitialized or waiting */
1452 				ports_waiting++;
1453 				if (!tpp->lp_lacp.sm.ready_n) {
1454 					DTRACE_PROBE1(port___not__ready,
1455 					    aggr_port_t *, tpp);
1456 					return;
1457 				}
1458 			}
1459 		}
1460 	}
1461 
1462 	if (aggrp->aggr.ready) {
1463 		AGGR_LACP_DBG(("lacp_selection_logic:(%d): "
1464 		    "aggr.ready already set\n", portp->lp_linkid));
1465 		lacp_mux_sm(portp);
1466 	} else {
1467 		AGGR_LACP_DBG(("lacp_selection_logic:(%d): Ready %d-->%d\n",
1468 		    portp->lp_linkid, aggrp->aggr.ready, B_TRUE));
1469 		aggrp->aggr.ready = B_TRUE;
1470 
1471 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next)
1472 			lacp_mux_sm(tpp);
1473 	}
1474 
1475 }
1476 
1477 /*
1478  * wait_while_timer_pop - When the timer pops, we arrive here to
1479  *			set ready_n and trigger the selection logic.
1480  */
1481 static void
wait_while_timer_pop(void * data)1482 wait_while_timer_pop(void *data)
1483 {
1484 	aggr_port_t *portp = data;
1485 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1486 
1487 	mutex_enter(&pl->lacp_timer_lock);
1488 	pl->lacp_timer_bits |= LACP_WAIT_WHILE_TIMEOUT;
1489 	cv_broadcast(&pl->lacp_timer_cv);
1490 	mutex_exit(&pl->lacp_timer_lock);
1491 }
1492 
1493 /*
1494  * wait_while_timer_pop_handler - When the timer pops, we arrive here to
1495  *			set ready_n and trigger the selection logic.
1496  */
1497 static void
wait_while_timer_pop_handler(aggr_port_t * portp)1498 wait_while_timer_pop_handler(aggr_port_t *portp)
1499 {
1500 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1501 
1502 	AGGR_LACP_DBG(("trunk link:(%d): wait_while_timer pop \n",
1503 	    portp->lp_linkid));
1504 	portp->lp_lacp.sm.ready_n = B_TRUE;
1505 
1506 	lacp_selection_logic(portp);
1507 }
1508 
1509 static void
start_wait_while_timer(aggr_port_t * portp)1510 start_wait_while_timer(aggr_port_t *portp)
1511 {
1512 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1513 
1514 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1515 
1516 	mutex_enter(&pl->lacp_timer_lock);
1517 	if (pl->wait_while_timer.id == 0) {
1518 		pl->wait_while_timer.id =
1519 		    timeout(wait_while_timer_pop, portp,
1520 		    drv_usectohz(1000000 *
1521 		    portp->lp_lacp.wait_while_timer.val));
1522 	}
1523 	mutex_exit(&pl->lacp_timer_lock);
1524 }
1525 
1526 
1527 static void
stop_wait_while_timer(aggr_port_t * portp)1528 stop_wait_while_timer(aggr_port_t *portp)
1529 {
1530 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1531 	timeout_id_t id;
1532 
1533 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1534 
1535 	mutex_enter(&pl->lacp_timer_lock);
1536 	if ((id = pl->wait_while_timer.id) != 0) {
1537 		pl->lacp_timer_bits &= ~LACP_WAIT_WHILE_TIMEOUT;
1538 		pl->wait_while_timer.id = 0;
1539 	}
1540 	mutex_exit(&pl->lacp_timer_lock);
1541 
1542 	if (id != 0)
1543 		(void) untimeout(id);
1544 }
1545 
1546 /*
1547  * Invoked when a port has been attached to a group.
1548  * Complete the processing that couldn't be finished from lacp_on()
1549  * because the port was not started. We know that the link is full
1550  * duplex and ON, otherwise it wouldn't be attached.
1551  */
1552 void
aggr_lacp_port_attached(aggr_port_t * portp)1553 aggr_lacp_port_attached(aggr_port_t *portp)
1554 {
1555 	aggr_grp_t *grp = portp->lp_grp;
1556 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1557 
1558 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1559 	ASSERT(MAC_PERIM_HELD(portp->lp_mh));
1560 	ASSERT(portp->lp_state == AGGR_PORT_STATE_ATTACHED);
1561 
1562 	AGGR_LACP_DBG(("aggr_lacp_port_attached: port %d\n",
1563 	    portp->lp_linkid));
1564 
1565 	portp->lp_lacp.sm.port_enabled = B_TRUE;	/* link on */
1566 
1567 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
1568 		return;
1569 
1570 	pl->sm.lacp_enabled = B_TRUE;
1571 	pl->ActorOperPortState.bit.aggregation = B_TRUE;
1572 	pl->sm.begin = B_TRUE;
1573 
1574 	lacp_receive_sm(portp, NULL);
1575 	lacp_mux_sm(portp);
1576 
1577 	/* Enable Multicast Slow Protocol address */
1578 	aggr_lacp_mcast_on(portp);
1579 
1580 	/* periodic_sm is started up from the receive machine */
1581 	lacp_selection_logic(portp);
1582 }
1583 
1584 /*
1585  * Invoked when a port has been detached from a group. Turn off
1586  * LACP processing if it was enabled.
1587  */
1588 void
aggr_lacp_port_detached(aggr_port_t * portp)1589 aggr_lacp_port_detached(aggr_port_t *portp)
1590 {
1591 	aggr_grp_t *grp = portp->lp_grp;
1592 
1593 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1594 	ASSERT(MAC_PERIM_HELD(portp->lp_mh));
1595 
1596 	AGGR_LACP_DBG(("aggr_lacp_port_detached: port %d\n",
1597 	    portp->lp_linkid));
1598 
1599 	portp->lp_lacp.sm.port_enabled = B_FALSE;
1600 
1601 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
1602 		return;
1603 
1604 	portp->lp_lacp.sm.lacp_enabled = B_FALSE;
1605 	lacp_selection_logic(portp);
1606 	lacp_mux_sm(portp);
1607 	lacp_periodic_sm(portp);
1608 
1609 	/*
1610 	 * Disable Slow Protocol Timers.
1611 	 */
1612 	stop_periodic_timer(portp);
1613 	stop_current_while_timer(portp);
1614 	stop_wait_while_timer(portp);
1615 
1616 	/* Disable Multicast Slow Protocol address */
1617 	aggr_lacp_mcast_off(portp);
1618 	aggr_set_coll_dist(portp, B_FALSE);
1619 }
1620 
1621 /*
1622  * Enable Slow Protocol LACP and Marker PDUs.
1623  */
1624 static void
lacp_on(aggr_port_t * portp)1625 lacp_on(aggr_port_t *portp)
1626 {
1627 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1628 	mac_perim_handle_t mph;
1629 
1630 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1631 
1632 	mac_perim_enter_by_mh(portp->lp_mh, &mph);
1633 
1634 	/*
1635 	 * Reset the state machines and Partner operational
1636 	 * information. Careful to not reset things like
1637 	 * our link state.
1638 	 */
1639 	lacp_reset_port(portp);
1640 	pl->sm.lacp_on = B_TRUE;
1641 
1642 	AGGR_LACP_DBG(("lacp_on:(%d): \n", portp->lp_linkid));
1643 
1644 	if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) {
1645 		pl->sm.port_enabled = B_TRUE;
1646 		pl->sm.lacp_enabled = B_TRUE;
1647 		pl->ActorOperPortState.bit.aggregation = B_TRUE;
1648 	}
1649 
1650 	lacp_receive_sm(portp, NULL);
1651 	lacp_mux_sm(portp);
1652 
1653 	if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) {
1654 		/* Enable Multicast Slow Protocol address */
1655 		aggr_lacp_mcast_on(portp);
1656 
1657 		/* periodic_sm is started up from the receive machine */
1658 		lacp_selection_logic(portp);
1659 	}
1660 done:
1661 	mac_perim_exit(mph);
1662 } /* lacp_on */
1663 
1664 /* Disable Slow Protocol LACP and Marker PDUs */
1665 static void
lacp_off(aggr_port_t * portp)1666 lacp_off(aggr_port_t *portp)
1667 {
1668 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1669 	mac_perim_handle_t mph;
1670 
1671 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1672 	mac_perim_enter_by_mh(portp->lp_mh, &mph);
1673 
1674 	pl->sm.lacp_on = B_FALSE;
1675 
1676 	AGGR_LACP_DBG(("lacp_off:(%d): \n", portp->lp_linkid));
1677 
1678 	if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) {
1679 		/*
1680 		 * Disable Slow Protocol Timers.
1681 		 */
1682 		stop_periodic_timer(portp);
1683 		stop_current_while_timer(portp);
1684 		stop_wait_while_timer(portp);
1685 
1686 		/* Disable Multicast Slow Protocol address */
1687 		aggr_lacp_mcast_off(portp);
1688 
1689 		pl->sm.port_enabled = B_FALSE;
1690 		pl->sm.lacp_enabled = B_FALSE;
1691 		pl->ActorOperPortState.bit.aggregation = B_FALSE;
1692 	}
1693 
1694 	lacp_mux_sm(portp);
1695 	lacp_periodic_sm(portp);
1696 	lacp_selection_logic(portp);
1697 
1698 	/* Turn OFF Collector_Distributor */
1699 	aggr_set_coll_dist(portp, B_FALSE);
1700 
1701 	lacp_reset_port(portp);
1702 	mac_perim_exit(mph);
1703 }
1704 
1705 
1706 static boolean_t
valid_lacp_pdu(aggr_port_t * portp,lacp_t * lacp)1707 valid_lacp_pdu(aggr_port_t *portp, lacp_t *lacp)
1708 {
1709 	/*
1710 	 * 43.4.12 - "a Receive machine shall not validate
1711 	 * the Version Number, TLV_type, or Reserved fields in received
1712 	 * LACPDUs."
1713 	 * ... "a Receive machine may validate the Actor_Information_Length,
1714 	 * Partner_Information_Length, Collector_Information_Length,
1715 	 * or Terminator_Length fields."
1716 	 */
1717 	if ((lacp->actor_info.information_len != sizeof (link_info_t)) ||
1718 	    (lacp->partner_info.information_len != sizeof (link_info_t)) ||
1719 	    (lacp->collector_len != LACP_COLLECTOR_INFO_LEN) ||
1720 	    (lacp->terminator_len != LACP_TERMINATOR_INFO_LEN)) {
1721 		AGGR_LACP_DBG(("trunk link (%d): Malformed LACPDU: "
1722 		    " Terminator Length = %d \n", portp->lp_linkid,
1723 		    lacp->terminator_len));
1724 		return (B_FALSE);
1725 	}
1726 
1727 	return (B_TRUE);
1728 }
1729 
1730 
1731 static void
start_current_while_timer(aggr_port_t * portp,uint_t time)1732 start_current_while_timer(aggr_port_t *portp, uint_t time)
1733 {
1734 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1735 
1736 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1737 
1738 	mutex_enter(&pl->lacp_timer_lock);
1739 	if (pl->current_while_timer.id == 0) {
1740 		if (time > 0)
1741 			pl->current_while_timer.val = time;
1742 		else if (pl->ActorOperPortState.bit.timeout)
1743 			pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
1744 		else
1745 			pl->current_while_timer.val = LONG_TIMEOUT_TIME;
1746 
1747 		pl->current_while_timer.id =
1748 		    timeout(current_while_timer_pop, portp,
1749 		    drv_usectohz((clock_t)1000000 *
1750 		    (clock_t)portp->lp_lacp.current_while_timer.val));
1751 	}
1752 	mutex_exit(&pl->lacp_timer_lock);
1753 }
1754 
1755 
1756 static void
stop_current_while_timer(aggr_port_t * portp)1757 stop_current_while_timer(aggr_port_t *portp)
1758 {
1759 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1760 	timeout_id_t id;
1761 
1762 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1763 
1764 	mutex_enter(&pl->lacp_timer_lock);
1765 	if ((id = pl->current_while_timer.id) != 0) {
1766 		pl->lacp_timer_bits &= ~LACP_CURRENT_WHILE_TIMEOUT;
1767 		pl->current_while_timer.id = 0;
1768 	}
1769 	mutex_exit(&pl->lacp_timer_lock);
1770 
1771 	if (id != 0)
1772 		(void) untimeout(id);
1773 }
1774 
1775 static void
current_while_timer_pop(void * data)1776 current_while_timer_pop(void *data)
1777 {
1778 	aggr_port_t *portp = (aggr_port_t *)data;
1779 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1780 
1781 	mutex_enter(&pl->lacp_timer_lock);
1782 	pl->lacp_timer_bits |= LACP_CURRENT_WHILE_TIMEOUT;
1783 	cv_broadcast(&pl->lacp_timer_cv);
1784 	mutex_exit(&pl->lacp_timer_lock);
1785 }
1786 
1787 static void
current_while_timer_pop_handler(aggr_port_t * portp)1788 current_while_timer_pop_handler(aggr_port_t *portp)
1789 {
1790 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1791 
1792 	AGGR_LACP_DBG(("trunk link:(%d): current_while_timer "
1793 	    "pop id=%p\n", portp->lp_linkid,
1794 	    portp->lp_lacp.current_while_timer.id));
1795 
1796 	lacp_receive_sm(portp, NULL);
1797 }
1798 
1799 /*
1800  * record_Default - Simply copies over administrative values
1801  * to the partner operational values, and sets our state to indicate we
1802  * are using defaulted values.
1803  */
1804 static void
record_Default(aggr_port_t * portp)1805 record_Default(aggr_port_t *portp)
1806 {
1807 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1808 
1809 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1810 
1811 	pl->PartnerOperPortNum = pl->PartnerAdminPortNum;
1812 	pl->PartnerOperPortPriority = pl->PartnerAdminPortPriority;
1813 	pl->PartnerOperSystem = pl->PartnerAdminSystem;
1814 	pl->PartnerOperSysPriority = pl->PartnerAdminSysPriority;
1815 	pl->PartnerOperKey = pl->PartnerAdminKey;
1816 	pl->PartnerOperPortState.state = pl->PartnerAdminPortState.state;
1817 
1818 	pl->ActorOperPortState.bit.defaulted = B_TRUE;
1819 }
1820 
1821 
1822 /* Returns B_TRUE on sync value changing */
1823 static boolean_t
record_PDU(aggr_port_t * portp,lacp_t * lacp)1824 record_PDU(aggr_port_t *portp, lacp_t *lacp)
1825 {
1826 	aggr_grp_t *aggrp = portp->lp_grp;
1827 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1828 	uint8_t save_sync;
1829 
1830 	ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
1831 
1832 	/*
1833 	 * Partner Information
1834 	 */
1835 	pl->PartnerOperPortNum = ntohs(lacp->actor_info.port);
1836 	pl->PartnerOperPortPriority =
1837 	    ntohs(lacp->actor_info.port_priority);
1838 	pl->PartnerOperSystem = lacp->actor_info.system_id;
1839 	pl->PartnerOperSysPriority =
1840 	    htons(lacp->actor_info.system_priority);
1841 	pl->PartnerOperKey = ntohs(lacp->actor_info.key);
1842 
1843 	/* All state info except for Synchronization */
1844 	save_sync = pl->PartnerOperPortState.bit.sync;
1845 	pl->PartnerOperPortState.state = lacp->actor_info.state.state;
1846 
1847 	/* Defaulted set to FALSE */
1848 	pl->ActorOperPortState.bit.defaulted = B_FALSE;
1849 
1850 	/*
1851 	 * 43.4.9 - (Partner_Port, Partner_Port_Priority, Partner_system,
1852 	 *		Partner_System_Priority, Partner_Key, and
1853 	 *		Partner_State.Aggregation) are compared to the
1854 	 *		corresponding operations paramters values for
1855 	 *		the Actor. If these are equal, or if this is
1856 	 *		an individual link, we are synchronized.
1857 	 */
1858 	if (((ntohs(lacp->partner_info.port) == pl->ActorPortNumber) &&
1859 	    (ntohs(lacp->partner_info.port_priority) ==
1860 	    pl->ActorPortPriority) &&
1861 	    (ether_cmp(&lacp->partner_info.system_id,
1862 	    (struct ether_addr *)&aggrp->lg_addr) == 0) &&
1863 	    (ntohs(lacp->partner_info.system_priority) ==
1864 	    aggrp->aggr.ActorSystemPriority) &&
1865 	    (ntohs(lacp->partner_info.key) == pl->ActorOperPortKey) &&
1866 	    (lacp->partner_info.state.bit.aggregation ==
1867 	    pl->ActorOperPortState.bit.aggregation)) ||
1868 	    (!lacp->actor_info.state.bit.aggregation)) {
1869 
1870 		pl->PartnerOperPortState.bit.sync =
1871 		    lacp->actor_info.state.bit.sync;
1872 	} else {
1873 		pl->PartnerOperPortState.bit.sync = B_FALSE;
1874 	}
1875 
1876 	if (save_sync != pl->PartnerOperPortState.bit.sync) {
1877 		AGGR_LACP_DBG(("record_PDU:(%d): partner sync "
1878 		    "%d -->%d\n", portp->lp_linkid, save_sync,
1879 		    pl->PartnerOperPortState.bit.sync));
1880 		return (B_TRUE);
1881 	} else {
1882 		return (B_FALSE);
1883 	}
1884 }
1885 
1886 
1887 /*
1888  * update_selected - If any of the Partner parameters has
1889  *			changed from a previous value, then
1890  *			unselect the link from the aggregator.
1891  */
1892 static boolean_t
update_selected(aggr_port_t * portp,lacp_t * lacp)1893 update_selected(aggr_port_t *portp, lacp_t *lacp)
1894 {
1895 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1896 
1897 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1898 
1899 	if ((pl->PartnerOperPortNum != ntohs(lacp->actor_info.port)) ||
1900 	    (pl->PartnerOperPortPriority !=
1901 	    ntohs(lacp->actor_info.port_priority)) ||
1902 	    (ether_cmp(&pl->PartnerOperSystem,
1903 	    &lacp->actor_info.system_id) != 0) ||
1904 	    (pl->PartnerOperSysPriority !=
1905 	    ntohs(lacp->actor_info.system_priority)) ||
1906 	    (pl->PartnerOperKey != ntohs(lacp->actor_info.key)) ||
1907 	    (pl->PartnerOperPortState.bit.aggregation !=
1908 	    lacp->actor_info.state.bit.aggregation)) {
1909 		AGGR_LACP_DBG(("update_selected:(%d): "
1910 		    "selected  %d-->%d\n", portp->lp_linkid, pl->sm.selected,
1911 		    AGGR_UNSELECTED));
1912 
1913 		lacp_port_unselect(portp);
1914 		return (B_TRUE);
1915 	} else {
1916 		return (B_FALSE);
1917 	}
1918 }
1919 
1920 
1921 /*
1922  * update_default_selected - If any of the operational Partner parameters
1923  *			is different than that of the administrative values
1924  *			then unselect the link from the aggregator.
1925  */
1926 static void
update_default_selected(aggr_port_t * portp)1927 update_default_selected(aggr_port_t *portp)
1928 {
1929 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1930 
1931 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1932 
1933 	if ((pl->PartnerAdminPortNum != pl->PartnerOperPortNum) ||
1934 	    (pl->PartnerOperPortPriority != pl->PartnerAdminPortPriority) ||
1935 	    (ether_cmp(&pl->PartnerOperSystem, &pl->PartnerAdminSystem) != 0) ||
1936 	    (pl->PartnerOperSysPriority != pl->PartnerAdminSysPriority) ||
1937 	    (pl->PartnerOperKey != pl->PartnerAdminKey) ||
1938 	    (pl->PartnerOperPortState.bit.aggregation !=
1939 	    pl->PartnerAdminPortState.bit.aggregation)) {
1940 
1941 		AGGR_LACP_DBG(("update_default_selected:(%d): "
1942 		    "selected  %d-->%d\n", portp->lp_linkid,
1943 		    pl->sm.selected, AGGR_UNSELECTED));
1944 
1945 		lacp_port_unselect(portp);
1946 	}
1947 }
1948 
1949 
1950 /*
1951  * update_NTT - If any of the Partner values in the received LACPDU
1952  *			are different than that of the Actor operational
1953  *			values then set NTT to true.
1954  */
1955 static void
update_NTT(aggr_port_t * portp,lacp_t * lacp)1956 update_NTT(aggr_port_t *portp, lacp_t *lacp)
1957 {
1958 	aggr_grp_t *aggrp = portp->lp_grp;
1959 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1960 
1961 	ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
1962 
1963 	if ((pl->ActorPortNumber != ntohs(lacp->partner_info.port)) ||
1964 	    (pl->ActorPortPriority !=
1965 	    ntohs(lacp->partner_info.port_priority)) ||
1966 	    (ether_cmp(&aggrp->lg_addr,
1967 	    &lacp->partner_info.system_id) != 0) ||
1968 	    (aggrp->aggr.ActorSystemPriority !=
1969 	    ntohs(lacp->partner_info.system_priority)) ||
1970 	    (pl->ActorOperPortKey != ntohs(lacp->partner_info.key)) ||
1971 	    (pl->ActorOperPortState.bit.activity !=
1972 	    lacp->partner_info.state.bit.activity) ||
1973 	    (pl->ActorOperPortState.bit.timeout !=
1974 	    lacp->partner_info.state.bit.timeout) ||
1975 	    (pl->ActorOperPortState.bit.sync !=
1976 	    lacp->partner_info.state.bit.sync) ||
1977 	    (pl->ActorOperPortState.bit.aggregation !=
1978 	    lacp->partner_info.state.bit.aggregation)) {
1979 
1980 		AGGR_LACP_DBG(("update_NTT:(%d): NTT  %d-->%d\n",
1981 		    portp->lp_linkid, pl->NTT, B_TRUE));
1982 
1983 		pl->NTT = B_TRUE;
1984 	}
1985 }
1986 
1987 /*
1988  * lacp_receive_sm - LACP receive state machine
1989  *
1990  * parameters:
1991  *      - portp - instance this applies to.
1992  *      - lacp - pointer in the case of a received LACPDU.
1993  *                This value is NULL if there is no LACPDU.
1994  *
1995  * invoked:
1996  *    - when initialization is needed
1997  *    - upon reception of an LACPDU. This is the common case.
1998  *    - every time the current_while_timer pops
1999  */
2000 static void
lacp_receive_sm(aggr_port_t * portp,lacp_t * lacp)2001 lacp_receive_sm(aggr_port_t *portp, lacp_t *lacp)
2002 {
2003 	boolean_t sync_updated, selected_updated, save_activity;
2004 	aggr_lacp_port_t *pl = &portp->lp_lacp;
2005 	lacp_receive_state_t oldstate = pl->sm.receive_state;
2006 
2007 	ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
2008 
2009 	/* LACP_OFF state not in specification so check here.  */
2010 	if (!pl->sm.lacp_on)
2011 		return;
2012 
2013 	/* figure next state */
2014 	if (pl->sm.begin || pl->sm.port_moved) {
2015 		pl->sm.receive_state = LACP_INITIALIZE;
2016 	} else if (!pl->sm.port_enabled) {	/* DL_NOTE_LINK_DOWN */
2017 		pl->sm.receive_state = LACP_PORT_DISABLED;
2018 	} else if (!pl->sm.lacp_enabled) { /* DL_NOTE_AGGR_UNAVAIL */
2019 		pl->sm.receive_state =
2020 		    (pl->sm.receive_state == LACP_PORT_DISABLED) ?
2021 		    LACP_DISABLED : LACP_PORT_DISABLED;
2022 	} else if (lacp != NULL) {
2023 		if ((pl->sm.receive_state == LACP_EXPIRED) ||
2024 		    (pl->sm.receive_state == LACP_DEFAULTED)) {
2025 			pl->sm.receive_state = LACP_CURRENT;
2026 		}
2027 	} else if ((pl->sm.receive_state == LACP_CURRENT) &&
2028 	    (pl->current_while_timer.id == 0)) {
2029 		pl->sm.receive_state = LACP_EXPIRED;
2030 	} else if ((pl->sm.receive_state == LACP_EXPIRED) &&
2031 	    (pl->current_while_timer.id == 0)) {
2032 		pl->sm.receive_state = LACP_DEFAULTED;
2033 	}
2034 
2035 	if (!((lacp && (oldstate == LACP_CURRENT) &&
2036 	    (pl->sm.receive_state == LACP_CURRENT)))) {
2037 		AGGR_LACP_DBG(("lacp_receive_sm(%d):%s--->%s\n",
2038 		    portp->lp_linkid, lacp_receive_str[oldstate],
2039 		    lacp_receive_str[pl->sm.receive_state]));
2040 	}
2041 
2042 	switch (pl->sm.receive_state) {
2043 	case LACP_INITIALIZE:
2044 		lacp_port_unselect(portp);
2045 		record_Default(portp);
2046 		pl->ActorOperPortState.bit.expired = B_FALSE;
2047 		pl->sm.port_moved = B_FALSE;
2048 		pl->sm.receive_state = LACP_PORT_DISABLED;
2049 		pl->sm.begin = B_FALSE;
2050 		lacp_receive_sm(portp, NULL);
2051 		break;
2052 
2053 	case LACP_PORT_DISABLED:
2054 		pl->PartnerOperPortState.bit.sync = B_FALSE;
2055 		/*
2056 		 * Stop current_while_timer in case
2057 		 * we got here from link down
2058 		 */
2059 		stop_current_while_timer(portp);
2060 
2061 		if (pl->sm.port_enabled && !pl->sm.lacp_enabled) {
2062 			pl->sm.receive_state = LACP_DISABLED;
2063 			lacp_receive_sm(portp, lacp);
2064 			/* We goto LACP_DISABLED state */
2065 			break;
2066 		} else if (pl->sm.port_enabled && pl->sm.lacp_enabled) {
2067 			pl->sm.receive_state = LACP_EXPIRED;
2068 			/*
2069 			 * FALL THROUGH TO LACP_EXPIRED CASE:
2070 			 * We have no way of knowing if we get into
2071 			 * lacp_receive_sm() from a  current_while_timer
2072 			 * expiring as it has never been kicked off yet!
2073 			 */
2074 		} else {
2075 			/* We stay in LACP_PORT_DISABLED state */
2076 			break;
2077 		}
2078 		/* LACP_PORT_DISABLED -> LACP_EXPIRED */
2079 		/* FALLTHROUGH */
2080 
2081 	case LACP_EXPIRED:
2082 		/*
2083 		 * Arrives here from LACP_PORT_DISABLED state as well as
2084 		 * as well as current_while_timer expiring.
2085 		 */
2086 		pl->PartnerOperPortState.bit.sync = B_FALSE;
2087 		pl->PartnerOperPortState.bit.timeout = B_TRUE;
2088 
2089 		pl->ActorOperPortState.bit.expired = B_TRUE;
2090 		start_current_while_timer(portp, SHORT_TIMEOUT_TIME);
2091 		lacp_periodic_sm(portp);
2092 		break;
2093 
2094 	case LACP_DISABLED:
2095 		/*
2096 		 * This is the normal state for recv_sm when LACP_OFF
2097 		 * is set or the NIC is in half duplex mode.
2098 		 */
2099 		lacp_port_unselect(portp);
2100 		record_Default(portp);
2101 		pl->PartnerOperPortState.bit.aggregation = B_FALSE;
2102 		pl->ActorOperPortState.bit.expired = B_FALSE;
2103 		break;
2104 
2105 	case LACP_DEFAULTED:
2106 		/*
2107 		 * Current_while_timer expired a second time.
2108 		 */
2109 		update_default_selected(portp);
2110 		record_Default(portp);	/* overwrite Partner Oper val */
2111 		pl->ActorOperPortState.bit.expired = B_FALSE;
2112 		pl->PartnerOperPortState.bit.sync = B_TRUE;
2113 
2114 		lacp_selection_logic(portp);
2115 		lacp_mux_sm(portp);
2116 		break;
2117 
2118 	case LACP_CURRENT:
2119 		/*
2120 		 * Reception of LACPDU
2121 		 */
2122 
2123 		if (!lacp) /* no LACPDU so current_while_timer popped */
2124 			break;
2125 
2126 		AGGR_LACP_DBG(("lacp_receive_sm: (%d): LACPDU received:\n",
2127 		    portp->lp_linkid));
2128 
2129 		/*
2130 		 * Validate Actor_Information_Length,
2131 		 * Partner_Information_Length, Collector_Information_Length,
2132 		 * and Terminator_Length fields.
2133 		 */
2134 		if (!valid_lacp_pdu(portp, lacp)) {
2135 			AGGR_LACP_DBG(("lacp_receive_sm (%d): "
2136 			    "Invalid LACPDU received\n",
2137 			    portp->lp_linkid));
2138 			break;
2139 		}
2140 
2141 		save_activity = pl->PartnerOperPortState.bit.activity;
2142 		selected_updated = update_selected(portp, lacp);
2143 		update_NTT(portp, lacp);
2144 		sync_updated = record_PDU(portp, lacp);
2145 
2146 		pl->ActorOperPortState.bit.expired = B_FALSE;
2147 
2148 		if (selected_updated) {
2149 			lacp_selection_logic(portp);
2150 			lacp_mux_sm(portp);
2151 		} else if (sync_updated) {
2152 			lacp_mux_sm(portp);
2153 		}
2154 
2155 		/*
2156 		 * If the periodic timer value bit has been modified
2157 		 * or the partner activity bit has been changed then
2158 		 * we need to respectively:
2159 		 *  - restart the timer with the proper timeout value.
2160 		 *  - possibly enable/disable transmission of LACPDUs.
2161 		 */
2162 		if ((pl->PartnerOperPortState.bit.timeout &&
2163 		    (pl->periodic_timer.val != FAST_PERIODIC_TIME)) ||
2164 		    (!pl->PartnerOperPortState.bit.timeout &&
2165 		    (pl->periodic_timer.val != SLOW_PERIODIC_TIME)) ||
2166 		    (pl->PartnerOperPortState.bit.activity !=
2167 		    save_activity)) {
2168 			lacp_periodic_sm(portp);
2169 		}
2170 
2171 		stop_current_while_timer(portp);
2172 		/* Check if we need to transmit an LACPDU */
2173 		if (pl->NTT)
2174 			lacp_xmit_sm(portp);
2175 		start_current_while_timer(portp, 0);
2176 
2177 		break;
2178 	}
2179 }
2180 
2181 static void
aggr_set_coll_dist(aggr_port_t * portp,boolean_t enable)2182 aggr_set_coll_dist(aggr_port_t *portp, boolean_t enable)
2183 {
2184 	mac_perim_handle_t mph;
2185 
2186 	AGGR_LACP_DBG(("AGGR_SET_COLL_DIST_TYPE: (%d) %s\n",
2187 	    portp->lp_linkid, enable ? "ENABLED" : "DISABLED"));
2188 
2189 	mac_perim_enter_by_mh(portp->lp_mh, &mph);
2190 	if (!enable) {
2191 		/*
2192 		 * Turn OFF Collector_Distributor.
2193 		 */
2194 		portp->lp_collector_enabled = B_FALSE;
2195 		aggr_send_port_disable(portp);
2196 		goto done;
2197 	}
2198 
2199 	/*
2200 	 * Turn ON Collector_Distributor.
2201 	 */
2202 
2203 	if (!portp->lp_lacp.sm.lacp_on || (portp->lp_lacp.sm.lacp_on &&
2204 	    (portp->lp_lacp.sm.mux_state == LACP_COLLECTING_DISTRIBUTING))) {
2205 		/* Port is compatible and can be aggregated */
2206 		portp->lp_collector_enabled = B_TRUE;
2207 		aggr_send_port_enable(portp);
2208 	}
2209 
2210 done:
2211 	mac_perim_exit(mph);
2212 }
2213 
2214 /*
2215  * Because the LACP packet processing needs to enter the aggr's mac perimeter
2216  * and that would potentially cause a deadlock with the thread in which the
2217  * grp/port is deleted, we defer the packet process to a worker thread. Here
2218  * we only enqueue the received Marker or LACPDU for later processing.
2219  */
2220 void
aggr_lacp_rx_enqueue(aggr_port_t * portp,mblk_t * dmp)2221 aggr_lacp_rx_enqueue(aggr_port_t *portp, mblk_t *dmp)
2222 {
2223 	aggr_grp_t *grp = portp->lp_grp;
2224 	lacp_t	*lacp;
2225 
2226 	dmp->b_rptr += sizeof (struct ether_header);
2227 
2228 	if (MBLKL(dmp) < sizeof (lacp_t)) {
2229 		freemsg(dmp);
2230 		return;
2231 	}
2232 
2233 	lacp = (lacp_t *)dmp->b_rptr;
2234 	if (lacp->subtype != LACP_SUBTYPE && lacp->subtype != MARKER_SUBTYPE) {
2235 		AGGR_LACP_DBG(("aggr_lacp_rx_enqueue: (%d): "
2236 		    "Unknown Slow Protocol type %d\n",
2237 		    portp->lp_linkid, lacp->subtype));
2238 		freemsg(dmp);
2239 		return;
2240 	}
2241 
2242 	mutex_enter(&grp->lg_lacp_lock);
2243 
2244 	/*
2245 	 * If the lg_lacp_done is set, this aggregation is in the process of
2246 	 * being deleted, return directly.
2247 	 */
2248 	if (grp->lg_lacp_done) {
2249 		mutex_exit(&grp->lg_lacp_lock);
2250 		freemsg(dmp);
2251 		return;
2252 	}
2253 
2254 	if (grp->lg_lacp_tail == NULL) {
2255 		grp->lg_lacp_head = grp->lg_lacp_tail = dmp;
2256 	} else {
2257 		grp->lg_lacp_tail->b_next = dmp;
2258 		grp->lg_lacp_tail = dmp;
2259 	}
2260 
2261 	/*
2262 	 * Hold a reference of the port so that the port won't be freed when it
2263 	 * is removed from the aggr. The b_prev field is borrowed to save the
2264 	 * port information.
2265 	 */
2266 	AGGR_PORT_REFHOLD(portp);
2267 	dmp->b_prev = (mblk_t *)portp;
2268 	cv_broadcast(&grp->lg_lacp_cv);
2269 	mutex_exit(&grp->lg_lacp_lock);
2270 }
2271 
2272 static void
aggr_lacp_rx(mblk_t * dmp)2273 aggr_lacp_rx(mblk_t *dmp)
2274 {
2275 	aggr_port_t *portp = (aggr_port_t *)dmp->b_prev;
2276 	mac_perim_handle_t mph;
2277 	lacp_t	*lacp;
2278 
2279 	dmp->b_prev = NULL;
2280 
2281 	mac_perim_enter_by_mh(portp->lp_grp->lg_mh, &mph);
2282 	if (portp->lp_closing)
2283 		goto done;
2284 
2285 	lacp = (lacp_t *)dmp->b_rptr;
2286 	switch (lacp->subtype) {
2287 	case LACP_SUBTYPE:
2288 		AGGR_LACP_DBG(("aggr_lacp_rx:(%d): LACPDU received.\n",
2289 		    portp->lp_linkid));
2290 
2291 		if (!portp->lp_lacp.sm.lacp_on) {
2292 			break;
2293 		}
2294 		lacp_receive_sm(portp, lacp);
2295 		break;
2296 
2297 	case MARKER_SUBTYPE:
2298 		AGGR_LACP_DBG(("aggr_lacp_rx:(%d): Marker Packet received.\n",
2299 		    portp->lp_linkid));
2300 
2301 		if (receive_marker_pdu(portp, dmp) != 0)
2302 			break;
2303 
2304 		/* Send the packet over the first TX ring */
2305 		dmp = mac_hwring_send_priv(portp->lp_mch,
2306 		    portp->lp_tx_rings[0], dmp);
2307 		if (dmp != NULL)
2308 			freemsg(dmp);
2309 		mac_perim_exit(mph);
2310 		AGGR_PORT_REFRELE(portp);
2311 		return;
2312 	}
2313 
2314 done:
2315 	mac_perim_exit(mph);
2316 	AGGR_PORT_REFRELE(portp);
2317 	freemsg(dmp);
2318 }
2319 
2320 void
aggr_lacp_rx_thread(void * arg)2321 aggr_lacp_rx_thread(void *arg)
2322 {
2323 	callb_cpr_t	cprinfo;
2324 	aggr_grp_t	*grp = (aggr_grp_t *)arg;
2325 	aggr_port_t	*port;
2326 	mblk_t		*mp, *nextmp;
2327 
2328 	CALLB_CPR_INIT(&cprinfo, &grp->lg_lacp_lock, callb_generic_cpr,
2329 	    "aggr_lacp_rx_thread");
2330 
2331 	mutex_enter(&grp->lg_lacp_lock);
2332 
2333 	/*
2334 	 * Quit the thread if the grp is deleted.
2335 	 */
2336 	while (!grp->lg_lacp_done) {
2337 		if ((mp = grp->lg_lacp_head) == NULL) {
2338 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
2339 			cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock);
2340 			CALLB_CPR_SAFE_END(&cprinfo, &grp->lg_lacp_lock);
2341 			continue;
2342 		}
2343 
2344 		grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
2345 		mutex_exit(&grp->lg_lacp_lock);
2346 
2347 		while (mp != NULL) {
2348 			nextmp = mp->b_next;
2349 			mp->b_next = NULL;
2350 			aggr_lacp_rx(mp);
2351 			mp = nextmp;
2352 		}
2353 		mutex_enter(&grp->lg_lacp_lock);
2354 	}
2355 
2356 	/*
2357 	 * The grp is being destroyed, simply free all of the LACP messages
2358 	 * left in the queue which did not have the chance to be processed.
2359 	 * We cannot use freemsgchain() here since we need to clear the
2360 	 * b_prev field.
2361 	 */
2362 	for (mp = grp->lg_lacp_head; mp != NULL; mp = nextmp) {
2363 		port = (aggr_port_t *)mp->b_prev;
2364 		AGGR_PORT_REFRELE(port);
2365 		nextmp = mp->b_next;
2366 		mp->b_next = NULL;
2367 		mp->b_prev = NULL;
2368 		freemsg(mp);
2369 	}
2370 
2371 	grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
2372 	grp->lg_lacp_rx_thread = NULL;
2373 	cv_broadcast(&grp->lg_lacp_cv);
2374 	CALLB_CPR_EXIT(&cprinfo);
2375 	thread_exit();
2376 }
2377