1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2017, Joyent, Inc.
24 * Copyright 2024 MNX Cloud, Inc.
25 */
26
27 /*
28 * IEEE 802.3ad Link Aggregation - LACP & Marker Protocol processing.
29 */
30
31 #include <sys/types.h>
32 #include <sys/sysmacros.h>
33 #include <sys/callb.h>
34 #include <sys/conf.h>
35 #include <sys/cmn_err.h>
36 #include <sys/disp.h>
37 #include <sys/list.h>
38 #include <sys/ksynch.h>
39 #include <sys/kmem.h>
40 #include <sys/stream.h>
41 #include <sys/modctl.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/atomic.h>
45 #include <sys/stat.h>
46 #include <sys/byteorder.h>
47 #include <sys/strsun.h>
48 #include <sys/isa_defs.h>
49 #include <sys/sdt.h>
50
51 #include <sys/aggr.h>
52 #include <sys/aggr_impl.h>
53
54 static struct ether_addr etherzeroaddr = {
55 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
56 };
57
58 /*
59 * Slow_Protocol_Multicast address, as per IEEE 802.3ad spec.
60 */
61 static struct ether_addr slow_multicast_addr = {
62 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02
63 };
64
65 #ifdef DEBUG
66 /* LACP state machine debugging support */
67 static uint32_t aggr_lacp_debug = 0;
68 #define AGGR_LACP_DBG(x) if (aggr_lacp_debug) { (void) printf x; }
69 #else
70 #define AGGR_LACP_DBG(x) {}
71 #endif /* DEBUG */
72
73 #define NSECS_PER_SEC 1000000000ll
74
75 /* used by lacp_misconfig_walker() */
76 typedef struct lacp_misconfig_check_state_s {
77 aggr_port_t *cs_portp;
78 boolean_t cs_found;
79 } lacp_misconfig_check_state_t;
80
81 static const char *lacp_receive_str[] = LACP_RECEIVE_STATE_STRINGS;
82 static const char *lacp_periodic_str[] = LACP_PERIODIC_STRINGS;
83 static const char *lacp_mux_str[] = LACP_MUX_STRINGS;
84
85 static uint16_t lacp_port_priority = 0x1000;
86 static uint16_t lacp_system_priority = 0x1000;
87
88 /*
89 * Maintains a list of all ports in ATTACHED state. This information
90 * is used to detect misconfiguration.
91 */
92 typedef struct lacp_sel_ports {
93 datalink_id_t sp_grp_linkid;
94 datalink_id_t sp_linkid;
95 /* Note: sp_partner_system must be 2-byte aligned */
96 struct ether_addr sp_partner_system;
97 uint32_t sp_partner_key;
98 struct lacp_sel_ports *sp_next;
99 } lacp_sel_ports_t;
100
101 static lacp_sel_ports_t *sel_ports = NULL;
102 static kmutex_t lacp_sel_lock;
103
104 static void periodic_timer_pop(void *);
105 static void periodic_timer_pop_handler(aggr_port_t *);
106 static void lacp_xmit_sm(aggr_port_t *);
107 static void lacp_periodic_sm(aggr_port_t *);
108 static void fill_lacp_pdu(aggr_port_t *, lacp_t *);
109 static void fill_lacp_ether(aggr_port_t *, struct ether_header *);
110 static void lacp_on(aggr_port_t *);
111 static void lacp_off(aggr_port_t *);
112 static boolean_t valid_lacp_pdu(aggr_port_t *, lacp_t *);
113 static void lacp_receive_sm(aggr_port_t *, lacp_t *);
114 static void aggr_set_coll_dist(aggr_port_t *, boolean_t);
115 static void start_wait_while_timer(aggr_port_t *);
116 static void stop_wait_while_timer(aggr_port_t *);
117 static void lacp_reset_port(aggr_port_t *);
118 static void stop_current_while_timer(aggr_port_t *);
119 static void current_while_timer_pop(void *);
120 static void current_while_timer_pop_handler(aggr_port_t *);
121 static void update_default_selected(aggr_port_t *);
122 static boolean_t update_selected(aggr_port_t *, lacp_t *);
123 static boolean_t lacp_sel_ports_add(aggr_port_t *);
124 static void lacp_sel_ports_del(aggr_port_t *);
125 static void wait_while_timer_pop(void *);
126 static void wait_while_timer_pop_handler(aggr_port_t *);
127
128 void
aggr_lacp_init(void)129 aggr_lacp_init(void)
130 {
131 mutex_init(&lacp_sel_lock, NULL, MUTEX_DEFAULT, NULL);
132 }
133
134 void
aggr_lacp_fini(void)135 aggr_lacp_fini(void)
136 {
137 mutex_destroy(&lacp_sel_lock);
138 }
139
140 /*
141 * The following functions are used for handling LACP timers.
142 *
143 * Note that we cannot fully rely on the aggr's mac perimeter in the timeout
144 * handler routine, otherwise it may cause deadlock with the untimeout() call
145 * which is usually called with the mac perimeter held. Instead, a
146 * lacp_timer_lock mutex is introduced, which protects a bitwise flag
147 * (lacp_timer_bits). This flag is set/cleared by timeout()/stop_timer()
148 * routines and is checked by a dedicated thread, that executes the real
149 * timeout operation.
150 */
151 static void
aggr_port_timer_thread(void * arg)152 aggr_port_timer_thread(void *arg)
153 {
154 aggr_port_t *port = arg;
155 aggr_lacp_port_t *pl = &port->lp_lacp;
156 aggr_grp_t *grp = port->lp_grp;
157 uint32_t lacp_timer_bits;
158 mac_perim_handle_t mph;
159 callb_cpr_t cprinfo;
160
161 CALLB_CPR_INIT(&cprinfo, &pl->lacp_timer_lock, callb_generic_cpr,
162 "aggr_port_timer_thread");
163
164 mutex_enter(&pl->lacp_timer_lock);
165
166 for (;;) {
167
168 if ((lacp_timer_bits = pl->lacp_timer_bits) == 0) {
169 CALLB_CPR_SAFE_BEGIN(&cprinfo);
170 cv_wait(&pl->lacp_timer_cv, &pl->lacp_timer_lock);
171 CALLB_CPR_SAFE_END(&cprinfo, &pl->lacp_timer_lock);
172 continue;
173 }
174 pl->lacp_timer_bits = 0;
175
176 if (lacp_timer_bits & LACP_THREAD_EXIT)
177 break;
178
179 if (lacp_timer_bits & LACP_PERIODIC_TIMEOUT)
180 pl->periodic_timer.id = 0;
181 if (lacp_timer_bits & LACP_WAIT_WHILE_TIMEOUT)
182 pl->wait_while_timer.id = 0;
183 if (lacp_timer_bits & LACP_CURRENT_WHILE_TIMEOUT)
184 pl->current_while_timer.id = 0;
185
186 mutex_exit(&pl->lacp_timer_lock);
187
188 mac_perim_enter_by_mh(grp->lg_mh, &mph);
189 if (port->lp_closing) {
190 mac_perim_exit(mph);
191 mutex_enter(&pl->lacp_timer_lock);
192 break;
193 }
194
195 if (lacp_timer_bits & LACP_PERIODIC_TIMEOUT)
196 periodic_timer_pop_handler(port);
197 if (lacp_timer_bits & LACP_WAIT_WHILE_TIMEOUT)
198 wait_while_timer_pop_handler(port);
199 if (lacp_timer_bits & LACP_CURRENT_WHILE_TIMEOUT)
200 current_while_timer_pop_handler(port);
201 mac_perim_exit(mph);
202
203 mutex_enter(&pl->lacp_timer_lock);
204 if (pl->lacp_timer_bits & LACP_THREAD_EXIT)
205 break;
206 }
207
208 pl->lacp_timer_bits = 0;
209 pl->lacp_timer_thread = NULL;
210 cv_broadcast(&pl->lacp_timer_cv);
211
212 /* CALLB_CPR_EXIT drops the lock */
213 CALLB_CPR_EXIT(&cprinfo);
214
215 /*
216 * Release the reference of the grp so aggr_grp_delete() can call
217 * mac_unregister() safely.
218 */
219 aggr_grp_port_rele(port);
220 thread_exit();
221 }
222
223 /*
224 * Set the port LACP state to SELECTED. Returns B_FALSE if the operation
225 * could not be performed due to a memory allocation error, B_TRUE otherwise.
226 */
227 static boolean_t
lacp_port_select(aggr_port_t * portp)228 lacp_port_select(aggr_port_t *portp)
229 {
230 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
231
232 if (!lacp_sel_ports_add(portp))
233 return (B_FALSE);
234 portp->lp_lacp.sm.selected = AGGR_SELECTED;
235 return (B_TRUE);
236 }
237
238 /*
239 * Set the port LACP state to UNSELECTED.
240 */
241 static void
lacp_port_unselect(aggr_port_t * portp)242 lacp_port_unselect(aggr_port_t *portp)
243 {
244 aggr_grp_t *grp = portp->lp_grp;
245
246 ASSERT((grp->lg_mh == NULL) || MAC_PERIM_HELD(grp->lg_mh));
247
248 lacp_sel_ports_del(portp);
249 portp->lp_lacp.sm.selected = AGGR_UNSELECTED;
250 }
251
252 /*
253 * Initialize group specific LACP state and parameters.
254 */
255 void
aggr_lacp_init_grp(aggr_grp_t * aggrp)256 aggr_lacp_init_grp(aggr_grp_t *aggrp)
257 {
258 aggrp->aggr.PeriodicTimer = AGGR_LACP_TIMER_SHORT;
259 aggrp->aggr.ActorSystemPriority = (uint16_t)lacp_system_priority;
260 aggrp->aggr.CollectorMaxDelay = 10;
261 aggrp->lg_lacp_mode = AGGR_LACP_OFF;
262 aggrp->aggr.ready = B_FALSE;
263 }
264
265 /*
266 * Complete LACP info initialization at port creation time.
267 */
268 void
aggr_lacp_init_port(aggr_port_t * portp)269 aggr_lacp_init_port(aggr_port_t *portp)
270 {
271 aggr_grp_t *aggrp = portp->lp_grp;
272 aggr_lacp_port_t *pl = &portp->lp_lacp;
273
274 ASSERT(aggrp->lg_mh == NULL || MAC_PERIM_HELD(aggrp->lg_mh));
275 ASSERT(MAC_PERIM_HELD(portp->lp_mh));
276
277 /* actor port # */
278 pl->ActorPortNumber = portp->lp_portid;
279 AGGR_LACP_DBG(("aggr_lacp_init_port(%d): "
280 "ActorPortNumber = 0x%x\n", portp->lp_linkid,
281 pl->ActorPortNumber));
282
283 pl->ActorPortPriority = (uint16_t)lacp_port_priority;
284 pl->ActorPortAggrId = 0; /* aggregator id - not used */
285 pl->NTT = B_FALSE; /* need to transmit */
286
287 pl->ActorAdminPortKey = aggrp->lg_key;
288 pl->ActorOperPortKey = pl->ActorAdminPortKey;
289 AGGR_LACP_DBG(("aggr_lacp_init_port(%d) "
290 "ActorAdminPortKey = 0x%x, ActorAdminPortKey = 0x%x\n",
291 portp->lp_linkid, pl->ActorAdminPortKey, pl->ActorOperPortKey));
292
293 /* Actor admin. port state */
294 pl->ActorAdminPortState.bit.activity = B_FALSE;
295 pl->ActorAdminPortState.bit.timeout = B_TRUE;
296 pl->ActorAdminPortState.bit.aggregation = B_TRUE;
297 pl->ActorAdminPortState.bit.sync = B_FALSE;
298 pl->ActorAdminPortState.bit.collecting = B_FALSE;
299 pl->ActorAdminPortState.bit.distributing = B_FALSE;
300 pl->ActorAdminPortState.bit.defaulted = B_FALSE;
301 pl->ActorAdminPortState.bit.expired = B_FALSE;
302 pl->ActorOperPortState = pl->ActorAdminPortState;
303
304 /*
305 * Partner Administrative Information
306 * (All initialized to zero except for the following)
307 * Fast Timeouts.
308 */
309 pl->PartnerAdminPortState.bit.timeout =
310 pl->PartnerOperPortState.bit.timeout = B_TRUE;
311
312 pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */
313
314 /*
315 * State machine information.
316 */
317 pl->sm.lacp_on = B_FALSE; /* LACP Off default */
318 pl->sm.begin = B_TRUE; /* Prevents transmissions */
319 pl->sm.lacp_enabled = B_FALSE;
320 pl->sm.port_enabled = B_FALSE; /* Link Down */
321 pl->sm.actor_churn = B_FALSE;
322 pl->sm.partner_churn = B_FALSE;
323 pl->sm.ready_n = B_FALSE;
324 pl->sm.port_moved = B_FALSE;
325
326 lacp_port_unselect(portp);
327
328 pl->sm.periodic_state = LACP_NO_PERIODIC;
329 pl->sm.receive_state = LACP_INITIALIZE;
330 pl->sm.mux_state = LACP_DETACHED;
331 pl->sm.churn_state = LACP_NO_ACTOR_CHURN;
332
333 /*
334 * Timer information.
335 */
336 pl->current_while_timer.id = 0;
337 pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
338
339 pl->periodic_timer.id = 0;
340 pl->periodic_timer.val = FAST_PERIODIC_TIME;
341
342 pl->wait_while_timer.id = 0;
343 pl->wait_while_timer.val = AGGREGATE_WAIT_TIME;
344
345 pl->lacp_timer_bits = 0;
346
347 mutex_init(&pl->lacp_timer_lock, NULL, MUTEX_DRIVER, NULL);
348 cv_init(&pl->lacp_timer_cv, NULL, CV_DRIVER, NULL);
349
350 pl->lacp_timer_thread = thread_create(NULL, 0, aggr_port_timer_thread,
351 portp, 0, &p0, TS_RUN, minclsyspri);
352
353 /*
354 * Hold a reference of the grp and the port and this reference will
355 * be release when the thread exits.
356 *
357 * The reference on the port is used for aggr_port_delete() to
358 * continue without waiting for the thread to exit; the reference
359 * on the grp is used for aggr_grp_delete() to wait for the thread
360 * to exit before calling mac_unregister().
361 */
362 aggr_grp_port_hold(portp);
363 }
364
365 /*
366 * Port initialization when we need to
367 * turn LACP on/off, etc. Not everything is
368 * reset like in the above routine.
369 * Do NOT modify things like link status.
370 */
371 static void
lacp_reset_port(aggr_port_t * portp)372 lacp_reset_port(aggr_port_t *portp)
373 {
374 aggr_lacp_port_t *pl = &portp->lp_lacp;
375
376 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
377
378 pl->NTT = B_FALSE; /* need to transmit */
379
380 /* reset operational port state */
381 pl->ActorOperPortState.bit.timeout =
382 pl->ActorAdminPortState.bit.timeout;
383
384 pl->ActorOperPortState.bit.sync = B_FALSE;
385 pl->ActorOperPortState.bit.collecting = B_FALSE;
386 pl->ActorOperPortState.bit.distributing = B_FALSE;
387 pl->ActorOperPortState.bit.defaulted = B_TRUE;
388 pl->ActorOperPortState.bit.expired = B_FALSE;
389
390 pl->PartnerOperPortState.bit.timeout = B_TRUE; /* fast t/o */
391 pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */
392
393 /*
394 * State machine information.
395 */
396 pl->sm.begin = B_TRUE; /* Prevents transmissions */
397 pl->sm.actor_churn = B_FALSE;
398 pl->sm.partner_churn = B_FALSE;
399 pl->sm.ready_n = B_FALSE;
400
401 lacp_port_unselect(portp);
402
403 pl->sm.periodic_state = LACP_NO_PERIODIC;
404 pl->sm.receive_state = LACP_INITIALIZE;
405 pl->sm.mux_state = LACP_DETACHED;
406 pl->sm.churn_state = LACP_NO_ACTOR_CHURN;
407
408 /*
409 * Timer information.
410 */
411 pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
412 pl->periodic_timer.val = FAST_PERIODIC_TIME;
413 }
414
415 static void
aggr_lacp_mcast_on(aggr_port_t * port)416 aggr_lacp_mcast_on(aggr_port_t *port)
417 {
418 ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh));
419 ASSERT(MAC_PERIM_HELD(port->lp_mh));
420
421 if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
422 return;
423
424 (void) aggr_port_multicst(port, B_TRUE,
425 (uchar_t *)&slow_multicast_addr);
426 }
427
428 static void
aggr_lacp_mcast_off(aggr_port_t * port)429 aggr_lacp_mcast_off(aggr_port_t *port)
430 {
431 ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh));
432 ASSERT(MAC_PERIM_HELD(port->lp_mh));
433
434 if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
435 return;
436
437 (void) aggr_port_multicst(port, B_FALSE,
438 (uchar_t *)&slow_multicast_addr);
439 }
440
441 static void
start_periodic_timer(aggr_port_t * portp)442 start_periodic_timer(aggr_port_t *portp)
443 {
444 aggr_lacp_port_t *pl = &portp->lp_lacp;
445
446 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
447
448 mutex_enter(&pl->lacp_timer_lock);
449 if (pl->periodic_timer.id == 0) {
450 pl->periodic_timer.id = timeout(periodic_timer_pop, portp,
451 drv_usectohz(1000000 * portp->lp_lacp.periodic_timer.val));
452 }
453 mutex_exit(&pl->lacp_timer_lock);
454 }
455
456 static void
stop_periodic_timer(aggr_port_t * portp)457 stop_periodic_timer(aggr_port_t *portp)
458 {
459 aggr_lacp_port_t *pl = &portp->lp_lacp;
460 timeout_id_t id;
461
462 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
463
464 mutex_enter(&pl->lacp_timer_lock);
465 if ((id = pl->periodic_timer.id) != 0) {
466 pl->lacp_timer_bits &= ~LACP_PERIODIC_TIMEOUT;
467 pl->periodic_timer.id = 0;
468 }
469 mutex_exit(&pl->lacp_timer_lock);
470
471 if (id != 0)
472 (void) untimeout(id);
473 }
474
475 /*
476 * When the timer pops, we arrive here to
477 * clear out LACPDU count as well as transmit an
478 * LACPDU. We then set the periodic state and let
479 * the periodic state machine restart the timer.
480 */
481 static void
periodic_timer_pop(void * data)482 periodic_timer_pop(void *data)
483 {
484 aggr_port_t *portp = data;
485 aggr_lacp_port_t *pl = &portp->lp_lacp;
486
487 mutex_enter(&pl->lacp_timer_lock);
488 pl->lacp_timer_bits |= LACP_PERIODIC_TIMEOUT;
489 cv_broadcast(&pl->lacp_timer_cv);
490 mutex_exit(&pl->lacp_timer_lock);
491 }
492
493 /*
494 * When the timer pops, we arrive here to
495 * clear out LACPDU count as well as transmit an
496 * LACPDU. We then set the periodic state and let
497 * the periodic state machine restart the timer.
498 */
499 static void
periodic_timer_pop_handler(aggr_port_t * portp)500 periodic_timer_pop_handler(aggr_port_t *portp)
501 {
502 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
503
504 portp->lp_lacp_stats.LACPDUsTx = 0;
505
506 /* current timestamp */
507 portp->lp_lacp.time = gethrtime();
508 portp->lp_lacp.NTT = B_TRUE;
509 lacp_xmit_sm(portp);
510
511 /*
512 * Set Periodic State machine state based on the
513 * value of the Partner Operation Port State timeout
514 * bit.
515 */
516 if (portp->lp_lacp.PartnerOperPortState.bit.timeout) {
517 portp->lp_lacp.periodic_timer.val = FAST_PERIODIC_TIME;
518 portp->lp_lacp.sm.periodic_state = LACP_FAST_PERIODIC;
519 } else {
520 portp->lp_lacp.periodic_timer.val = SLOW_PERIODIC_TIME;
521 portp->lp_lacp.sm.periodic_state = LACP_SLOW_PERIODIC;
522 }
523
524 lacp_periodic_sm(portp);
525 }
526
527 /*
528 * Invoked from:
529 * - startup upon aggregation
530 * - when the periodic timer pops
531 * - when the periodic timer value is changed
532 * - when the port is attached or detached
533 * - when LACP mode is changed.
534 */
535 static void
lacp_periodic_sm(aggr_port_t * portp)536 lacp_periodic_sm(aggr_port_t *portp)
537 {
538 lacp_periodic_state_t oldstate = portp->lp_lacp.sm.periodic_state;
539 aggr_lacp_port_t *pl = &portp->lp_lacp;
540
541 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
542
543 /* LACP_OFF state not in specification so check here. */
544 if (!pl->sm.lacp_on) {
545 /* Stop timer whether it is running or not */
546 stop_periodic_timer(portp);
547 pl->sm.periodic_state = LACP_NO_PERIODIC;
548 pl->NTT = B_FALSE;
549 AGGR_LACP_DBG(("lacp_periodic_sm(%d):NO LACP "
550 "%s--->%s\n", portp->lp_linkid,
551 lacp_periodic_str[oldstate],
552 lacp_periodic_str[pl->sm.periodic_state]));
553 return;
554 }
555
556 if (pl->sm.begin || !pl->sm.lacp_enabled ||
557 !pl->sm.port_enabled ||
558 (!pl->ActorOperPortState.bit.activity &&
559 !pl->PartnerOperPortState.bit.activity)) {
560
561 /* Stop timer whether it is running or not */
562 stop_periodic_timer(portp);
563 pl->sm.periodic_state = LACP_NO_PERIODIC;
564 pl->NTT = B_FALSE;
565 AGGR_LACP_DBG(("lacp_periodic_sm(%d):STOP %s--->%s\n",
566 portp->lp_linkid, lacp_periodic_str[oldstate],
567 lacp_periodic_str[pl->sm.periodic_state]));
568 return;
569 }
570
571 /*
572 * Startup with FAST_PERIODIC_TIME if no previous LACPDU
573 * has been received. Then after we timeout, then it is
574 * possible to go to SLOW_PERIODIC_TIME.
575 */
576 if (pl->sm.periodic_state == LACP_NO_PERIODIC) {
577 pl->periodic_timer.val = FAST_PERIODIC_TIME;
578 pl->sm.periodic_state = LACP_FAST_PERIODIC;
579 } else if ((pl->sm.periodic_state == LACP_SLOW_PERIODIC) &&
580 pl->PartnerOperPortState.bit.timeout) {
581 /*
582 * If we receive a bit indicating we are going to
583 * fast periodic from slow periodic, stop the timer
584 * and let the periodic_timer_pop routine deal
585 * with reseting the periodic state and transmitting
586 * a LACPDU.
587 */
588 stop_periodic_timer(portp);
589 periodic_timer_pop_handler(portp);
590 }
591
592 /* Rearm timer with value provided by partner */
593 start_periodic_timer(portp);
594 }
595
596 /*
597 * This routine transmits an LACPDU if lacp_enabled
598 * is TRUE and if NTT is set.
599 */
600 static void
lacp_xmit_sm(aggr_port_t * portp)601 lacp_xmit_sm(aggr_port_t *portp)
602 {
603 aggr_lacp_port_t *pl = &portp->lp_lacp;
604 size_t len;
605 mblk_t *mp;
606 hrtime_t now, elapsed;
607
608 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
609
610 /* LACP_OFF state not in specification so check here. */
611 if (!pl->sm.lacp_on || !pl->NTT)
612 return;
613
614 /*
615 * Do nothing if LACP has been turned off or if the
616 * periodic state machine is not enabled.
617 */
618 if ((pl->sm.periodic_state == LACP_NO_PERIODIC) ||
619 !pl->sm.lacp_enabled || pl->sm.begin) {
620 pl->NTT = B_FALSE;
621 return;
622 }
623
624 /*
625 * If we have sent 5 Slow packets in the last second, avoid
626 * sending any more here. No more than three LACPDUs may be transmitted
627 * in any Fast_Periodic_Time interval.
628 */
629 if (portp->lp_lacp_stats.LACPDUsTx >= 3) {
630 /*
631 * Grab the current time value and see if
632 * more than 1 second has passed. If so,
633 * reset the timestamp and clear the count.
634 */
635 now = gethrtime();
636 elapsed = now - pl->time;
637 if (elapsed > NSECS_PER_SEC) {
638 portp->lp_lacp_stats.LACPDUsTx = 0;
639 pl->time = now;
640 } else {
641 return;
642 }
643 }
644
645 len = sizeof (lacp_t) + sizeof (struct ether_header);
646 mp = allocb(len, BPRI_MED);
647 if (mp == NULL)
648 return;
649
650 mp->b_wptr = mp->b_rptr + len;
651 bzero(mp->b_rptr, len);
652
653 fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr);
654 fill_lacp_pdu(portp,
655 (lacp_t *)(mp->b_rptr + sizeof (struct ether_header)));
656
657 /* Send the packet over the first TX ring */
658 mp = mac_hwring_send_priv(portp->lp_mch, portp->lp_tx_rings[0], mp);
659 if (mp != NULL)
660 freemsg(mp);
661
662 pl->NTT = B_FALSE;
663 portp->lp_lacp_stats.LACPDUsTx++;
664 }
665
666 /*
667 * Initialize the ethernet header of a LACP packet sent from the specified
668 * port.
669 */
670 static void
fill_lacp_ether(aggr_port_t * port,struct ether_header * ether)671 fill_lacp_ether(aggr_port_t *port, struct ether_header *ether)
672 {
673 bcopy(port->lp_addr, (uint8_t *)&(ether->ether_shost), ETHERADDRL);
674 bcopy(&slow_multicast_addr, (uint8_t *)&(ether->ether_dhost),
675 ETHERADDRL);
676 ether->ether_type = htons(ETHERTYPE_SLOW);
677 }
678
679 static void
fill_lacp_pdu(aggr_port_t * portp,lacp_t * lacp)680 fill_lacp_pdu(aggr_port_t *portp, lacp_t *lacp)
681 {
682 aggr_lacp_port_t *pl = &portp->lp_lacp;
683 aggr_grp_t *aggrp = portp->lp_grp;
684 mac_perim_handle_t pmph;
685
686 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
687 mac_perim_enter_by_mh(portp->lp_mh, &pmph);
688
689 lacp->subtype = LACP_SUBTYPE;
690 lacp->version = LACP_VERSION;
691
692 /*
693 * Actor Information
694 */
695 lacp->actor_info.tlv_type = ACTOR_TLV;
696 lacp->actor_info.information_len = sizeof (link_info_t);
697 lacp->actor_info.system_priority =
698 htons(aggrp->aggr.ActorSystemPriority);
699 bcopy(aggrp->lg_addr, (uchar_t *)&lacp->actor_info.system_id,
700 ETHERADDRL);
701 lacp->actor_info.key = htons(pl->ActorOperPortKey);
702 lacp->actor_info.port_priority = htons(pl->ActorPortPriority);
703 lacp->actor_info.port = htons(pl->ActorPortNumber);
704 lacp->actor_info.state.state = pl->ActorOperPortState.state;
705
706 /*
707 * Partner Information
708 */
709 lacp->partner_info.tlv_type = PARTNER_TLV;
710 lacp->partner_info.information_len = sizeof (link_info_t);
711 lacp->partner_info.system_priority =
712 htons(pl->PartnerOperSysPriority);
713 lacp->partner_info.system_id = pl->PartnerOperSystem;
714 lacp->partner_info.key = htons(pl->PartnerOperKey);
715 lacp->partner_info.port_priority =
716 htons(pl->PartnerOperPortPriority);
717 lacp->partner_info.port = htons(pl->PartnerOperPortNum);
718 lacp->partner_info.state.state = pl->PartnerOperPortState.state;
719
720 /* Collector Information */
721 lacp->tlv_collector = COLLECTOR_TLV;
722 lacp->collector_len = 0x10;
723 lacp->collector_max_delay = htons(aggrp->aggr.CollectorMaxDelay);
724
725 /* Termination Information */
726 lacp->tlv_terminator = TERMINATOR_TLV;
727 lacp->terminator_len = 0x0;
728
729 mac_perim_exit(pmph);
730 }
731
732 /*
733 * lacp_mux_sm - LACP mux state machine
734 * This state machine is invoked from:
735 * - startup upon aggregation
736 * - from the Selection logic
737 * - when the wait_while_timer pops
738 * - when the aggregation MAC address is changed
739 * - when receiving DL_NOTE_LINK_UP/DOWN
740 * - when receiving DL_NOTE_AGGR_AVAIL/UNAVAIL
741 * - when LACP mode is changed.
742 * - when a DL_NOTE_SPEED is received
743 */
744 static void
lacp_mux_sm(aggr_port_t * portp)745 lacp_mux_sm(aggr_port_t *portp)
746 {
747 aggr_grp_t *aggrp = portp->lp_grp;
748 boolean_t NTT_updated = B_FALSE;
749 aggr_lacp_port_t *pl = &portp->lp_lacp;
750 lacp_mux_state_t oldstate = pl->sm.mux_state;
751
752 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
753
754 /* LACP_OFF state not in specification so check here. */
755 if (!pl->sm.lacp_on) {
756 pl->sm.mux_state = LACP_DETACHED;
757 pl->ActorOperPortState.bit.sync = B_FALSE;
758
759 if (pl->ActorOperPortState.bit.collecting ||
760 pl->ActorOperPortState.bit.distributing) {
761 AGGR_LACP_DBG(("trunk link: (%d): "
762 "Collector_Distributor Disabled.\n",
763 portp->lp_linkid));
764 }
765
766 pl->ActorOperPortState.bit.collecting =
767 pl->ActorOperPortState.bit.distributing = B_FALSE;
768 return;
769 }
770
771 if (pl->sm.begin || !pl->sm.lacp_enabled)
772 pl->sm.mux_state = LACP_DETACHED;
773
774 again:
775 /* determine next state, or return if state unchanged */
776 switch (pl->sm.mux_state) {
777 case LACP_DETACHED:
778 if (pl->sm.begin) {
779 break;
780 }
781
782 if ((pl->sm.selected == AGGR_SELECTED) ||
783 (pl->sm.selected == AGGR_STANDBY)) {
784 pl->sm.mux_state = LACP_WAITING;
785 break;
786 }
787 return;
788
789 case LACP_WAITING:
790 if (pl->sm.selected == AGGR_UNSELECTED) {
791 pl->sm.mux_state = LACP_DETACHED;
792 break;
793 }
794
795 if ((pl->sm.selected == AGGR_SELECTED) && aggrp->aggr.ready) {
796 pl->sm.mux_state = LACP_ATTACHED;
797 break;
798 }
799 return;
800
801 case LACP_ATTACHED:
802 if ((pl->sm.selected == AGGR_UNSELECTED) ||
803 (pl->sm.selected == AGGR_STANDBY)) {
804 pl->sm.mux_state = LACP_DETACHED;
805 break;
806 }
807
808 if ((pl->sm.selected == AGGR_SELECTED) &&
809 pl->PartnerOperPortState.bit.sync) {
810 pl->sm.mux_state = LACP_COLLECTING_DISTRIBUTING;
811 break;
812 }
813 return;
814
815 case LACP_COLLECTING_DISTRIBUTING:
816 if ((pl->sm.selected == AGGR_UNSELECTED) ||
817 (pl->sm.selected == AGGR_STANDBY) ||
818 !pl->PartnerOperPortState.bit.sync) {
819 pl->sm.mux_state = LACP_ATTACHED;
820 break;
821 }
822 return;
823 }
824
825 AGGR_LACP_DBG(("lacp_mux_sm(%d):%s--->%s\n",
826 portp->lp_linkid, lacp_mux_str[oldstate],
827 lacp_mux_str[pl->sm.mux_state]));
828
829 /* perform actions on entering a new state */
830 switch (pl->sm.mux_state) {
831 case LACP_DETACHED:
832 if (pl->ActorOperPortState.bit.collecting ||
833 pl->ActorOperPortState.bit.distributing) {
834 AGGR_LACP_DBG(("trunk link: (%d): "
835 "Collector_Distributor Disabled.\n",
836 portp->lp_linkid));
837 }
838
839 pl->ActorOperPortState.bit.sync =
840 pl->ActorOperPortState.bit.collecting = B_FALSE;
841
842 /* Turn OFF Collector_Distributor */
843 aggr_set_coll_dist(portp, B_FALSE);
844
845 pl->ActorOperPortState.bit.distributing = B_FALSE;
846 NTT_updated = B_TRUE;
847 break;
848
849 case LACP_WAITING:
850 start_wait_while_timer(portp);
851 break;
852
853 case LACP_ATTACHED:
854 if (pl->ActorOperPortState.bit.collecting ||
855 pl->ActorOperPortState.bit.distributing) {
856 AGGR_LACP_DBG(("trunk link: (%d): "
857 "Collector_Distributor Disabled.\n",
858 portp->lp_linkid));
859 }
860
861 pl->ActorOperPortState.bit.sync = B_TRUE;
862 pl->ActorOperPortState.bit.collecting = B_FALSE;
863
864 /* Turn OFF Collector_Distributor */
865 aggr_set_coll_dist(portp, B_FALSE);
866
867 pl->ActorOperPortState.bit.distributing = B_FALSE;
868 NTT_updated = B_TRUE;
869 if (pl->PartnerOperPortState.bit.sync) {
870 /*
871 * We had already received an updated sync from
872 * the partner. Attempt to transition to
873 * collecting/distributing now.
874 */
875 goto again;
876 }
877 break;
878
879 case LACP_COLLECTING_DISTRIBUTING:
880 if (!pl->ActorOperPortState.bit.collecting &&
881 !pl->ActorOperPortState.bit.distributing) {
882 AGGR_LACP_DBG(("trunk link: (%d): "
883 "Collector_Distributor Enabled.\n",
884 portp->lp_linkid));
885 }
886 pl->ActorOperPortState.bit.distributing = B_TRUE;
887
888 /* Turn Collector_Distributor back ON */
889 aggr_set_coll_dist(portp, B_TRUE);
890
891 pl->ActorOperPortState.bit.collecting = B_TRUE;
892 NTT_updated = B_TRUE;
893 break;
894 }
895
896 /*
897 * If we updated the state of the NTT variable, then
898 * initiate a LACPDU transmission.
899 */
900 if (NTT_updated) {
901 pl->NTT = B_TRUE;
902 lacp_xmit_sm(portp);
903 }
904 } /* lacp_mux_sm */
905
906
907 static int
receive_marker_pdu(aggr_port_t * portp,mblk_t * mp)908 receive_marker_pdu(aggr_port_t *portp, mblk_t *mp)
909 {
910 marker_pdu_t *markerp = (marker_pdu_t *)mp->b_rptr;
911
912 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
913
914 AGGR_LACP_DBG(("trunk link: (%d): MARKER PDU received:\n",
915 portp->lp_linkid));
916
917 /* LACP_OFF state not in specification so check here. */
918 if (!portp->lp_lacp.sm.lacp_on)
919 return (-1);
920
921 if (MBLKL(mp) < sizeof (marker_pdu_t))
922 return (-1);
923
924 if (markerp->version != MARKER_VERSION) {
925 AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: "
926 "version = %d does not match s/w version %d\n",
927 portp->lp_linkid, markerp->version, MARKER_VERSION));
928 return (-1);
929 }
930
931 if (markerp->tlv_marker == MARKER_RESPONSE_TLV) {
932 /* We do not yet send out MARKER info PDUs */
933 AGGR_LACP_DBG(("trunk link (%d): MARKER RESPONSE PDU: "
934 " MARKER TLV = %d - We don't send out info type!\n",
935 portp->lp_linkid, markerp->tlv_marker));
936 return (-1);
937 }
938
939 if (markerp->tlv_marker != MARKER_INFO_TLV) {
940 AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: "
941 " MARKER TLV = %d \n", portp->lp_linkid,
942 markerp->tlv_marker));
943 return (-1);
944 }
945
946 if (markerp->marker_len != MARKER_INFO_RESPONSE_LENGTH) {
947 AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: "
948 " MARKER length = %d \n", portp->lp_linkid,
949 markerp->marker_len));
950 return (-1);
951 }
952
953 if (markerp->requestor_port != portp->lp_lacp.PartnerOperPortNum) {
954 AGGR_LACP_DBG(("trunk link (%d): MARKER PDU: "
955 " MARKER Port %d not equal to Partner port %d\n",
956 portp->lp_linkid, markerp->requestor_port,
957 portp->lp_lacp.PartnerOperPortNum));
958 return (-1);
959 }
960
961 if (ether_cmp(&markerp->system_id,
962 &portp->lp_lacp.PartnerOperSystem) != 0) {
963 AGGR_LACP_DBG(("trunk link (%d): MARKER PDU: "
964 " MARKER MAC not equal to Partner MAC\n",
965 portp->lp_linkid));
966 return (-1);
967 }
968
969 /*
970 * Turn into Marker Response PDU
971 * and return mblk to sending system
972 */
973 markerp->tlv_marker = MARKER_RESPONSE_TLV;
974
975 /* reuse the space that was used by received ethernet header */
976 ASSERT(MBLKHEAD(mp) >= sizeof (struct ether_header));
977 mp->b_rptr -= sizeof (struct ether_header);
978 fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr);
979 return (0);
980 }
981
982 /*
983 * Update the LACP mode (off, active, or passive) of the specified group.
984 */
985 void
aggr_lacp_update_mode(aggr_grp_t * grp,aggr_lacp_mode_t mode)986 aggr_lacp_update_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode)
987 {
988 aggr_lacp_mode_t old_mode = grp->lg_lacp_mode;
989 aggr_port_t *port;
990
991 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
992 ASSERT(!grp->lg_closing);
993
994 if (mode == old_mode)
995 return;
996
997 grp->lg_lacp_mode = mode;
998
999 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1000 port->lp_lacp.ActorAdminPortState.bit.activity =
1001 port->lp_lacp.ActorOperPortState.bit.activity =
1002 (mode == AGGR_LACP_ACTIVE);
1003
1004 if (old_mode == AGGR_LACP_OFF) {
1005 /* OFF -> {PASSIVE,ACTIVE} */
1006 /* turn OFF Collector_Distributor */
1007 aggr_set_coll_dist(port, B_FALSE);
1008 lacp_on(port);
1009 } else if (mode == AGGR_LACP_OFF) {
1010 /* {PASSIVE,ACTIVE} -> OFF */
1011 lacp_off(port);
1012 /* Turn ON Collector_Distributor */
1013 aggr_set_coll_dist(port, B_TRUE);
1014 } else {
1015 /* PASSIVE->ACTIVE or ACTIVE->PASSIVE */
1016 port->lp_lacp.sm.begin = B_TRUE;
1017 lacp_mux_sm(port);
1018 lacp_periodic_sm(port);
1019
1020 /* kick off state machines */
1021 lacp_receive_sm(port, NULL);
1022 lacp_mux_sm(port);
1023 }
1024 }
1025 }
1026
1027
1028 /*
1029 * Update the LACP timer (short or long) of the specified group.
1030 */
1031 void
aggr_lacp_update_timer(aggr_grp_t * grp,aggr_lacp_timer_t timer)1032 aggr_lacp_update_timer(aggr_grp_t *grp, aggr_lacp_timer_t timer)
1033 {
1034 aggr_port_t *port;
1035
1036 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1037
1038 if (timer == grp->aggr.PeriodicTimer)
1039 return;
1040
1041 grp->aggr.PeriodicTimer = timer;
1042
1043 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1044 port->lp_lacp.ActorAdminPortState.bit.timeout =
1045 port->lp_lacp.ActorOperPortState.bit.timeout =
1046 (timer == AGGR_LACP_TIMER_SHORT);
1047 }
1048 }
1049
1050 void
aggr_port_lacp_set_mode(aggr_grp_t * grp,aggr_port_t * port)1051 aggr_port_lacp_set_mode(aggr_grp_t *grp, aggr_port_t *port)
1052 {
1053 aggr_lacp_mode_t mode;
1054 aggr_lacp_timer_t timer;
1055
1056 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1057
1058 mode = grp->lg_lacp_mode;
1059 timer = grp->aggr.PeriodicTimer;
1060
1061 port->lp_lacp.ActorAdminPortState.bit.activity =
1062 port->lp_lacp.ActorOperPortState.bit.activity =
1063 (mode == AGGR_LACP_ACTIVE);
1064
1065 port->lp_lacp.ActorAdminPortState.bit.timeout =
1066 port->lp_lacp.ActorOperPortState.bit.timeout =
1067 (timer == AGGR_LACP_TIMER_SHORT);
1068
1069 if (mode == AGGR_LACP_OFF) {
1070 /* Turn ON Collector_Distributor */
1071 aggr_set_coll_dist(port, B_TRUE);
1072 } else { /* LACP_ACTIVE/PASSIVE */
1073 lacp_on(port);
1074 }
1075 }
1076
1077 /*
1078 * Sets the initial LACP mode (off, active, passive) and LACP timer
1079 * (short, long) of the specified group.
1080 */
1081 void
aggr_lacp_set_mode(aggr_grp_t * grp,aggr_lacp_mode_t mode,aggr_lacp_timer_t timer)1082 aggr_lacp_set_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode,
1083 aggr_lacp_timer_t timer)
1084 {
1085 aggr_port_t *port;
1086
1087 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1088
1089 grp->lg_lacp_mode = mode;
1090 grp->aggr.PeriodicTimer = timer;
1091
1092 for (port = grp->lg_ports; port != NULL; port = port->lp_next)
1093 aggr_port_lacp_set_mode(grp, port);
1094 }
1095
1096 /*
1097 * Verify that the Partner MAC and Key recorded by the specified
1098 * port are not found in other ports that are not part of our
1099 * aggregation. Returns B_TRUE if such a port is found, B_FALSE
1100 * otherwise.
1101 */
1102 static boolean_t
lacp_misconfig_check(aggr_port_t * portp)1103 lacp_misconfig_check(aggr_port_t *portp)
1104 {
1105 aggr_grp_t *grp = portp->lp_grp;
1106 lacp_sel_ports_t *cport;
1107
1108 mutex_enter(&lacp_sel_lock);
1109
1110 for (cport = sel_ports; cport != NULL; cport = cport->sp_next) {
1111
1112 /* skip entries of the group of the port being checked */
1113 if (cport->sp_grp_linkid == grp->lg_linkid)
1114 continue;
1115
1116 if ((ether_cmp(&cport->sp_partner_system,
1117 &grp->aggr.PartnerSystem) == 0) &&
1118 (cport->sp_partner_key == grp->aggr.PartnerOperAggrKey)) {
1119 char mac_str[ETHERADDRL*3];
1120 struct ether_addr *mac = &cport->sp_partner_system;
1121
1122 /*
1123 * The Partner port information is already in use
1124 * by ports in another aggregation so disable this
1125 * port.
1126 */
1127
1128 (void) snprintf(mac_str, sizeof (mac_str),
1129 "%x:%x:%x:%x:%x:%x",
1130 mac->ether_addr_octet[0], mac->ether_addr_octet[1],
1131 mac->ether_addr_octet[2], mac->ether_addr_octet[3],
1132 mac->ether_addr_octet[4], mac->ether_addr_octet[5]);
1133
1134 portp->lp_lacp.sm.selected = AGGR_UNSELECTED;
1135
1136 cmn_err(CE_NOTE, "aggr %d port %d: Port Partner "
1137 "MAC %s and key %d in use on aggregation %d "
1138 "port %d\n", grp->lg_linkid, portp->lp_linkid,
1139 mac_str, portp->lp_lacp.PartnerOperKey,
1140 cport->sp_grp_linkid, cport->sp_linkid);
1141 break;
1142 }
1143 }
1144
1145 mutex_exit(&lacp_sel_lock);
1146 return (cport != NULL);
1147 }
1148
1149 /*
1150 * Remove the specified port from the list of selected ports.
1151 */
1152 static void
lacp_sel_ports_del(aggr_port_t * portp)1153 lacp_sel_ports_del(aggr_port_t *portp)
1154 {
1155 lacp_sel_ports_t *cport, **prev = NULL;
1156
1157 mutex_enter(&lacp_sel_lock);
1158
1159 prev = &sel_ports;
1160 for (cport = sel_ports; cport != NULL; prev = &cport->sp_next,
1161 cport = cport->sp_next) {
1162 if (portp->lp_linkid == cport->sp_linkid)
1163 break;
1164 }
1165
1166 if (cport == NULL) {
1167 mutex_exit(&lacp_sel_lock);
1168 return;
1169 }
1170
1171 *prev = cport->sp_next;
1172 kmem_free(cport, sizeof (*cport));
1173
1174 mutex_exit(&lacp_sel_lock);
1175 }
1176
1177 /*
1178 * Add the specified port to the list of selected ports. Returns B_FALSE
1179 * if the operation could not be performed due to an memory allocation
1180 * error.
1181 */
1182 static boolean_t
lacp_sel_ports_add(aggr_port_t * portp)1183 lacp_sel_ports_add(aggr_port_t *portp)
1184 {
1185 lacp_sel_ports_t *new_port;
1186 lacp_sel_ports_t *cport, **last;
1187
1188 mutex_enter(&lacp_sel_lock);
1189
1190 /* check if port is already in the list */
1191 last = &sel_ports;
1192 for (cport = sel_ports; cport != NULL;
1193 last = &cport->sp_next, cport = cport->sp_next) {
1194 if (portp->lp_linkid == cport->sp_linkid) {
1195 ASSERT(cport->sp_partner_key ==
1196 portp->lp_lacp.PartnerOperKey);
1197 ASSERT(ether_cmp(&cport->sp_partner_system,
1198 &portp->lp_lacp.PartnerOperSystem) == 0);
1199
1200 mutex_exit(&lacp_sel_lock);
1201 return (B_TRUE);
1202 }
1203 }
1204
1205 /* create and initialize new entry */
1206 new_port = kmem_zalloc(sizeof (lacp_sel_ports_t), KM_NOSLEEP);
1207 if (new_port == NULL) {
1208 mutex_exit(&lacp_sel_lock);
1209 return (B_FALSE);
1210 }
1211
1212 new_port->sp_grp_linkid = portp->lp_grp->lg_linkid;
1213 bcopy(&portp->lp_lacp.PartnerOperSystem,
1214 &new_port->sp_partner_system, sizeof (new_port->sp_partner_system));
1215 new_port->sp_partner_key = portp->lp_lacp.PartnerOperKey;
1216 new_port->sp_linkid = portp->lp_linkid;
1217
1218 *last = new_port;
1219
1220 mutex_exit(&lacp_sel_lock);
1221 return (B_TRUE);
1222 }
1223
1224 /*
1225 * lacp_selection_logic - LACP selection logic
1226 * Sets the selected variable on a per port basis
1227 * and sets Ready when all waiting ports are ready
1228 * to go online.
1229 *
1230 * parameters:
1231 * - portp - instance this applies to.
1232 *
1233 * invoked:
1234 * - when initialization is needed
1235 * - when UNSELECTED is set from the lacp_receive_sm() in LACP_CURRENT state
1236 * - When the lacp_receive_sm goes to the LACP_DEFAULTED state
1237 * - every time the wait_while_timer pops
1238 * - everytime we turn LACP on/off
1239 */
1240 static void
lacp_selection_logic(aggr_port_t * portp)1241 lacp_selection_logic(aggr_port_t *portp)
1242 {
1243 aggr_port_t *tpp;
1244 aggr_grp_t *aggrp = portp->lp_grp;
1245 int ports_waiting;
1246 boolean_t reset_mac = B_FALSE;
1247 aggr_lacp_port_t *pl = &portp->lp_lacp;
1248
1249 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
1250
1251 /* LACP_OFF state not in specification so check here. */
1252 if (!pl->sm.lacp_on) {
1253 lacp_port_unselect(portp);
1254 aggrp->aggr.ready = B_FALSE;
1255 lacp_mux_sm(portp);
1256 return;
1257 }
1258
1259 if (pl->sm.begin || !pl->sm.lacp_enabled ||
1260 (portp->lp_state != AGGR_PORT_STATE_ATTACHED)) {
1261
1262 AGGR_LACP_DBG(("lacp_selection_logic:(%d): "
1263 "selected %d-->%d (begin=%d, lacp_enabled = %d, "
1264 "lp_state=%d)\n", portp->lp_linkid, pl->sm.selected,
1265 AGGR_UNSELECTED, pl->sm.begin, pl->sm.lacp_enabled,
1266 portp->lp_state));
1267
1268 lacp_port_unselect(portp);
1269 aggrp->aggr.ready = B_FALSE;
1270 lacp_mux_sm(portp);
1271 return;
1272 }
1273
1274 /*
1275 * If LACP is not enabled then selected is never set.
1276 */
1277 if (!pl->sm.lacp_enabled) {
1278 AGGR_LACP_DBG(("lacp_selection_logic:(%d): selected %d-->%d\n",
1279 portp->lp_linkid, pl->sm.selected, AGGR_UNSELECTED));
1280
1281 lacp_port_unselect(portp);
1282 lacp_mux_sm(portp);
1283 return;
1284 }
1285
1286 /*
1287 * Check if the Partner MAC or Key are zero. If so, we have
1288 * not received any LACP info or it has expired and the
1289 * receive machine is in the LACP_DEFAULTED state.
1290 */
1291 if (ether_cmp(&pl->PartnerOperSystem, ðerzeroaddr) == 0 ||
1292 (pl->PartnerOperKey == 0)) {
1293
1294 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1295 if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1296 ðerzeroaddr) != 0 &&
1297 (tpp->lp_lacp.PartnerOperKey != 0))
1298 break;
1299 }
1300
1301 /*
1302 * If all ports have no key or aggregation address,
1303 * then clear the negotiated Partner MAC and key.
1304 */
1305 if (tpp == NULL) {
1306 /* Clear the aggregation Partner MAC and key */
1307 aggrp->aggr.PartnerSystem = etherzeroaddr;
1308 aggrp->aggr.PartnerOperAggrKey = 0;
1309 }
1310
1311 return;
1312 }
1313
1314 /*
1315 * Insure that at least one port in the aggregation
1316 * matches the Partner aggregation MAC and key. If not,
1317 * then clear the aggregation MAC and key. Later we will
1318 * set the Partner aggregation MAC and key to that of the
1319 * current port's Partner MAC and key.
1320 */
1321 if (ether_cmp(&pl->PartnerOperSystem,
1322 &aggrp->aggr.PartnerSystem) != 0 ||
1323 (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) {
1324
1325 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1326 if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1327 &aggrp->aggr.PartnerSystem) == 0 &&
1328 (tpp->lp_lacp.PartnerOperKey ==
1329 aggrp->aggr.PartnerOperAggrKey)) {
1330 /* Set aggregation Partner MAC and key */
1331 aggrp->aggr.PartnerSystem =
1332 pl->PartnerOperSystem;
1333 aggrp->aggr.PartnerOperAggrKey =
1334 pl->PartnerOperKey;
1335 break;
1336 }
1337 }
1338
1339 if (tpp == NULL) {
1340 /* Clear the aggregation Partner MAC and key */
1341 aggrp->aggr.PartnerSystem = etherzeroaddr;
1342 aggrp->aggr.PartnerOperAggrKey = 0;
1343 reset_mac = B_TRUE;
1344 }
1345 }
1346
1347 /*
1348 * If our Actor MAC is found in the Partner MAC
1349 * on this port then we have a loopback misconfiguration.
1350 */
1351 if (ether_cmp(&pl->PartnerOperSystem,
1352 (struct ether_addr *)&aggrp->lg_addr) == 0) {
1353 cmn_err(CE_NOTE, "trunk link: (%d): Loopback condition.\n",
1354 portp->lp_linkid);
1355
1356 lacp_port_unselect(portp);
1357 lacp_mux_sm(portp);
1358 return;
1359 }
1360
1361 /*
1362 * If our Partner MAC and Key are found on any other
1363 * ports that are not in our aggregation, we have
1364 * a misconfiguration.
1365 */
1366 if (lacp_misconfig_check(portp)) {
1367 lacp_mux_sm(portp);
1368 return;
1369 }
1370
1371 /*
1372 * If the Aggregation Partner MAC and Key have not been
1373 * set, then this is either the first port or the aggregation
1374 * MAC and key have been reset. In either case we must set
1375 * the values of the Partner MAC and key.
1376 */
1377 if (ether_cmp(&aggrp->aggr.PartnerSystem, ðerzeroaddr) == 0 &&
1378 (aggrp->aggr.PartnerOperAggrKey == 0)) {
1379 /* Set aggregation Partner MAC and key */
1380 aggrp->aggr.PartnerSystem = pl->PartnerOperSystem;
1381 aggrp->aggr.PartnerOperAggrKey = pl->PartnerOperKey;
1382
1383 /*
1384 * If we reset Partner aggregation MAC, then restart
1385 * selection_logic on ports that match new MAC address.
1386 */
1387 if (reset_mac) {
1388 for (tpp = aggrp->lg_ports; tpp; tpp =
1389 tpp->lp_next) {
1390 if (tpp == portp)
1391 continue;
1392 if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1393 &aggrp->aggr.PartnerSystem) == 0 &&
1394 (tpp->lp_lacp.PartnerOperKey ==
1395 aggrp->aggr.PartnerOperAggrKey))
1396 lacp_selection_logic(tpp);
1397 }
1398 }
1399 } else if (ether_cmp(&pl->PartnerOperSystem,
1400 &aggrp->aggr.PartnerSystem) != 0 ||
1401 (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) {
1402 /*
1403 * The Partner port information does not match
1404 * that of the other ports in the aggregation
1405 * so disable this port.
1406 */
1407 lacp_port_unselect(portp);
1408
1409 cmn_err(CE_NOTE, "trunk link: (%d): Port Partner MAC "
1410 "or key (%d) incompatible with Aggregation Partner "
1411 "MAC or key (%d)\n", portp->lp_linkid, pl->PartnerOperKey,
1412 aggrp->aggr.PartnerOperAggrKey);
1413
1414 lacp_mux_sm(portp);
1415 return;
1416 }
1417
1418 /* If we get to here, automatically set selected */
1419 if (pl->sm.selected != AGGR_SELECTED) {
1420 AGGR_LACP_DBG(("lacp_selection_logic:(%d): "
1421 "selected %d-->%d\n", portp->lp_linkid,
1422 pl->sm.selected, AGGR_SELECTED));
1423 if (!lacp_port_select(portp))
1424 return;
1425 lacp_mux_sm(portp);
1426 }
1427
1428 /*
1429 * From this point onward we have selected the port
1430 * and are simply checking if the Ready flag should
1431 * be set.
1432 */
1433
1434 /*
1435 * If at least two ports are waiting to aggregate
1436 * and ready_n is set on all ports waiting to aggregate
1437 * then set READY for the aggregation.
1438 */
1439
1440 ports_waiting = 0;
1441
1442 if (!aggrp->aggr.ready) {
1443 /*
1444 * If all ports in the aggregation have received compatible
1445 * partner information and they match up correctly with the
1446 * switch, there is no need to wait for all the
1447 * wait_while_timers to pop.
1448 */
1449 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1450 if (((tpp->lp_lacp.sm.mux_state == LACP_WAITING) ||
1451 tpp->lp_lacp.sm.begin) &&
1452 !tpp->lp_lacp.PartnerOperPortState.bit.sync) {
1453 /* Add up ports uninitialized or waiting */
1454 ports_waiting++;
1455 if (!tpp->lp_lacp.sm.ready_n) {
1456 DTRACE_PROBE1(port___not__ready,
1457 aggr_port_t *, tpp);
1458 return;
1459 }
1460 }
1461 }
1462 }
1463
1464 if (aggrp->aggr.ready) {
1465 AGGR_LACP_DBG(("lacp_selection_logic:(%d): "
1466 "aggr.ready already set\n", portp->lp_linkid));
1467 lacp_mux_sm(portp);
1468 } else {
1469 AGGR_LACP_DBG(("lacp_selection_logic:(%d): Ready %d-->%d\n",
1470 portp->lp_linkid, aggrp->aggr.ready, B_TRUE));
1471 aggrp->aggr.ready = B_TRUE;
1472
1473 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next)
1474 lacp_mux_sm(tpp);
1475 }
1476
1477 }
1478
1479 /*
1480 * wait_while_timer_pop - When the timer pops, we arrive here to
1481 * set ready_n and trigger the selection logic.
1482 */
1483 static void
wait_while_timer_pop(void * data)1484 wait_while_timer_pop(void *data)
1485 {
1486 aggr_port_t *portp = data;
1487 aggr_lacp_port_t *pl = &portp->lp_lacp;
1488
1489 mutex_enter(&pl->lacp_timer_lock);
1490 pl->lacp_timer_bits |= LACP_WAIT_WHILE_TIMEOUT;
1491 cv_broadcast(&pl->lacp_timer_cv);
1492 mutex_exit(&pl->lacp_timer_lock);
1493 }
1494
1495 /*
1496 * wait_while_timer_pop_handler - When the timer pops, we arrive here to
1497 * set ready_n and trigger the selection logic.
1498 */
1499 static void
wait_while_timer_pop_handler(aggr_port_t * portp)1500 wait_while_timer_pop_handler(aggr_port_t *portp)
1501 {
1502 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1503
1504 AGGR_LACP_DBG(("trunk link:(%d): wait_while_timer pop \n",
1505 portp->lp_linkid));
1506 portp->lp_lacp.sm.ready_n = B_TRUE;
1507
1508 lacp_selection_logic(portp);
1509 }
1510
1511 static void
start_wait_while_timer(aggr_port_t * portp)1512 start_wait_while_timer(aggr_port_t *portp)
1513 {
1514 aggr_lacp_port_t *pl = &portp->lp_lacp;
1515
1516 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1517
1518 mutex_enter(&pl->lacp_timer_lock);
1519 if (pl->wait_while_timer.id == 0) {
1520 pl->wait_while_timer.id =
1521 timeout(wait_while_timer_pop, portp,
1522 drv_usectohz(1000000 *
1523 portp->lp_lacp.wait_while_timer.val));
1524 }
1525 mutex_exit(&pl->lacp_timer_lock);
1526 }
1527
1528
1529 static void
stop_wait_while_timer(aggr_port_t * portp)1530 stop_wait_while_timer(aggr_port_t *portp)
1531 {
1532 aggr_lacp_port_t *pl = &portp->lp_lacp;
1533 timeout_id_t id;
1534
1535 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1536
1537 mutex_enter(&pl->lacp_timer_lock);
1538 if ((id = pl->wait_while_timer.id) != 0) {
1539 pl->lacp_timer_bits &= ~LACP_WAIT_WHILE_TIMEOUT;
1540 pl->wait_while_timer.id = 0;
1541 }
1542 mutex_exit(&pl->lacp_timer_lock);
1543
1544 if (id != 0)
1545 (void) untimeout(id);
1546 }
1547
1548 /*
1549 * Invoked when a port has been attached to a group.
1550 * Complete the processing that couldn't be finished from lacp_on()
1551 * because the port was not started. We know that the link is full
1552 * duplex and ON, otherwise it wouldn't be attached.
1553 */
1554 void
aggr_lacp_port_attached(aggr_port_t * portp)1555 aggr_lacp_port_attached(aggr_port_t *portp)
1556 {
1557 aggr_grp_t *grp = portp->lp_grp;
1558 aggr_lacp_port_t *pl = &portp->lp_lacp;
1559
1560 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1561 ASSERT(MAC_PERIM_HELD(portp->lp_mh));
1562 ASSERT(portp->lp_state == AGGR_PORT_STATE_ATTACHED);
1563
1564 AGGR_LACP_DBG(("aggr_lacp_port_attached: port %d\n",
1565 portp->lp_linkid));
1566
1567 portp->lp_lacp.sm.port_enabled = B_TRUE; /* link on */
1568
1569 if (grp->lg_lacp_mode == AGGR_LACP_OFF)
1570 return;
1571
1572 pl->sm.lacp_enabled = B_TRUE;
1573 pl->ActorOperPortState.bit.aggregation = B_TRUE;
1574 pl->sm.begin = B_TRUE;
1575
1576 lacp_receive_sm(portp, NULL);
1577 lacp_mux_sm(portp);
1578
1579 /* Enable Multicast Slow Protocol address */
1580 aggr_lacp_mcast_on(portp);
1581
1582 /* periodic_sm is started up from the receive machine */
1583 lacp_selection_logic(portp);
1584 }
1585
1586 /*
1587 * Invoked when a port has been detached from a group. Turn off
1588 * LACP processing if it was enabled.
1589 */
1590 void
aggr_lacp_port_detached(aggr_port_t * portp)1591 aggr_lacp_port_detached(aggr_port_t *portp)
1592 {
1593 aggr_grp_t *grp = portp->lp_grp;
1594
1595 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1596 ASSERT(MAC_PERIM_HELD(portp->lp_mh));
1597
1598 AGGR_LACP_DBG(("aggr_lacp_port_detached: port %d\n",
1599 portp->lp_linkid));
1600
1601 portp->lp_lacp.sm.port_enabled = B_FALSE;
1602
1603 if (grp->lg_lacp_mode == AGGR_LACP_OFF)
1604 return;
1605
1606 portp->lp_lacp.sm.lacp_enabled = B_FALSE;
1607 lacp_selection_logic(portp);
1608 lacp_mux_sm(portp);
1609 lacp_periodic_sm(portp);
1610
1611 /*
1612 * Disable Slow Protocol Timers.
1613 */
1614 stop_periodic_timer(portp);
1615 stop_current_while_timer(portp);
1616 stop_wait_while_timer(portp);
1617
1618 /* Disable Multicast Slow Protocol address */
1619 aggr_lacp_mcast_off(portp);
1620 aggr_set_coll_dist(portp, B_FALSE);
1621 }
1622
1623 /*
1624 * Enable Slow Protocol LACP and Marker PDUs.
1625 */
1626 static void
lacp_on(aggr_port_t * portp)1627 lacp_on(aggr_port_t *portp)
1628 {
1629 aggr_lacp_port_t *pl = &portp->lp_lacp;
1630 mac_perim_handle_t mph;
1631
1632 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1633
1634 mac_perim_enter_by_mh(portp->lp_mh, &mph);
1635
1636 /*
1637 * Reset the state machines and Partner operational
1638 * information. Careful to not reset things like
1639 * our link state.
1640 */
1641 lacp_reset_port(portp);
1642 pl->sm.lacp_on = B_TRUE;
1643
1644 AGGR_LACP_DBG(("lacp_on:(%d): \n", portp->lp_linkid));
1645
1646 if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) {
1647 pl->sm.port_enabled = B_TRUE;
1648 pl->sm.lacp_enabled = B_TRUE;
1649 pl->ActorOperPortState.bit.aggregation = B_TRUE;
1650 }
1651
1652 lacp_receive_sm(portp, NULL);
1653 lacp_mux_sm(portp);
1654
1655 if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) {
1656 /* Enable Multicast Slow Protocol address */
1657 aggr_lacp_mcast_on(portp);
1658
1659 /* periodic_sm is started up from the receive machine */
1660 lacp_selection_logic(portp);
1661 }
1662 mac_perim_exit(mph);
1663 } /* lacp_on */
1664
1665 /* Disable Slow Protocol LACP and Marker PDUs */
1666 static void
lacp_off(aggr_port_t * portp)1667 lacp_off(aggr_port_t *portp)
1668 {
1669 aggr_lacp_port_t *pl = &portp->lp_lacp;
1670 mac_perim_handle_t mph;
1671
1672 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1673 mac_perim_enter_by_mh(portp->lp_mh, &mph);
1674
1675 pl->sm.lacp_on = B_FALSE;
1676
1677 AGGR_LACP_DBG(("lacp_off:(%d): \n", portp->lp_linkid));
1678
1679 if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) {
1680 /*
1681 * Disable Slow Protocol Timers.
1682 */
1683 stop_periodic_timer(portp);
1684 stop_current_while_timer(portp);
1685 stop_wait_while_timer(portp);
1686
1687 /* Disable Multicast Slow Protocol address */
1688 aggr_lacp_mcast_off(portp);
1689
1690 pl->sm.port_enabled = B_FALSE;
1691 pl->sm.lacp_enabled = B_FALSE;
1692 pl->ActorOperPortState.bit.aggregation = B_FALSE;
1693 }
1694
1695 lacp_mux_sm(portp);
1696 lacp_periodic_sm(portp);
1697 lacp_selection_logic(portp);
1698
1699 /* Turn OFF Collector_Distributor */
1700 aggr_set_coll_dist(portp, B_FALSE);
1701
1702 lacp_reset_port(portp);
1703 mac_perim_exit(mph);
1704 }
1705
1706
1707 static boolean_t
valid_lacp_pdu(aggr_port_t * portp,lacp_t * lacp)1708 valid_lacp_pdu(aggr_port_t *portp, lacp_t *lacp)
1709 {
1710 /*
1711 * 43.4.12 - "a Receive machine shall not validate
1712 * the Version Number, TLV_type, or Reserved fields in received
1713 * LACPDUs."
1714 * ... "a Receive machine may validate the Actor_Information_Length,
1715 * Partner_Information_Length, Collector_Information_Length,
1716 * or Terminator_Length fields."
1717 */
1718 if ((lacp->actor_info.information_len != sizeof (link_info_t)) ||
1719 (lacp->partner_info.information_len != sizeof (link_info_t)) ||
1720 (lacp->collector_len != LACP_COLLECTOR_INFO_LEN) ||
1721 (lacp->terminator_len != LACP_TERMINATOR_INFO_LEN)) {
1722 AGGR_LACP_DBG(("trunk link (%d): Malformed LACPDU: "
1723 " Terminator Length = %d \n", portp->lp_linkid,
1724 lacp->terminator_len));
1725 return (B_FALSE);
1726 }
1727
1728 return (B_TRUE);
1729 }
1730
1731
1732 static void
start_current_while_timer(aggr_port_t * portp,uint_t time)1733 start_current_while_timer(aggr_port_t *portp, uint_t time)
1734 {
1735 aggr_lacp_port_t *pl = &portp->lp_lacp;
1736
1737 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1738
1739 mutex_enter(&pl->lacp_timer_lock);
1740 if (pl->current_while_timer.id == 0) {
1741 if (time > 0)
1742 pl->current_while_timer.val = time;
1743 else if (pl->ActorOperPortState.bit.timeout)
1744 pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
1745 else
1746 pl->current_while_timer.val = LONG_TIMEOUT_TIME;
1747
1748 pl->current_while_timer.id =
1749 timeout(current_while_timer_pop, portp,
1750 drv_usectohz((clock_t)1000000 *
1751 (clock_t)portp->lp_lacp.current_while_timer.val));
1752 }
1753 mutex_exit(&pl->lacp_timer_lock);
1754 }
1755
1756
1757 static void
stop_current_while_timer(aggr_port_t * portp)1758 stop_current_while_timer(aggr_port_t *portp)
1759 {
1760 aggr_lacp_port_t *pl = &portp->lp_lacp;
1761 timeout_id_t id;
1762
1763 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1764
1765 mutex_enter(&pl->lacp_timer_lock);
1766 if ((id = pl->current_while_timer.id) != 0) {
1767 pl->lacp_timer_bits &= ~LACP_CURRENT_WHILE_TIMEOUT;
1768 pl->current_while_timer.id = 0;
1769 }
1770 mutex_exit(&pl->lacp_timer_lock);
1771
1772 if (id != 0)
1773 (void) untimeout(id);
1774 }
1775
1776 static void
current_while_timer_pop(void * data)1777 current_while_timer_pop(void *data)
1778 {
1779 aggr_port_t *portp = (aggr_port_t *)data;
1780 aggr_lacp_port_t *pl = &portp->lp_lacp;
1781
1782 mutex_enter(&pl->lacp_timer_lock);
1783 pl->lacp_timer_bits |= LACP_CURRENT_WHILE_TIMEOUT;
1784 cv_broadcast(&pl->lacp_timer_cv);
1785 mutex_exit(&pl->lacp_timer_lock);
1786 }
1787
1788 static void
current_while_timer_pop_handler(aggr_port_t * portp)1789 current_while_timer_pop_handler(aggr_port_t *portp)
1790 {
1791 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1792
1793 AGGR_LACP_DBG(("trunk link:(%d): current_while_timer "
1794 "pop id=%p\n", portp->lp_linkid,
1795 portp->lp_lacp.current_while_timer.id));
1796
1797 lacp_receive_sm(portp, NULL);
1798 }
1799
1800 /*
1801 * record_Default - Simply copies over administrative values
1802 * to the partner operational values, and sets our state to indicate we
1803 * are using defaulted values.
1804 */
1805 static void
record_Default(aggr_port_t * portp)1806 record_Default(aggr_port_t *portp)
1807 {
1808 aggr_lacp_port_t *pl = &portp->lp_lacp;
1809
1810 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1811
1812 pl->PartnerOperPortNum = pl->PartnerAdminPortNum;
1813 pl->PartnerOperPortPriority = pl->PartnerAdminPortPriority;
1814 pl->PartnerOperSystem = pl->PartnerAdminSystem;
1815 pl->PartnerOperSysPriority = pl->PartnerAdminSysPriority;
1816 pl->PartnerOperKey = pl->PartnerAdminKey;
1817 pl->PartnerOperPortState.state = pl->PartnerAdminPortState.state;
1818
1819 pl->ActorOperPortState.bit.defaulted = B_TRUE;
1820 }
1821
1822
1823 /* Returns B_TRUE on sync value changing */
1824 static boolean_t
record_PDU(aggr_port_t * portp,lacp_t * lacp)1825 record_PDU(aggr_port_t *portp, lacp_t *lacp)
1826 {
1827 aggr_grp_t *aggrp = portp->lp_grp;
1828 aggr_lacp_port_t *pl = &portp->lp_lacp;
1829 uint8_t save_sync;
1830
1831 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
1832
1833 /*
1834 * Partner Information
1835 */
1836 pl->PartnerOperPortNum = ntohs(lacp->actor_info.port);
1837 pl->PartnerOperPortPriority =
1838 ntohs(lacp->actor_info.port_priority);
1839 pl->PartnerOperSystem = lacp->actor_info.system_id;
1840 pl->PartnerOperSysPriority =
1841 htons(lacp->actor_info.system_priority);
1842 pl->PartnerOperKey = ntohs(lacp->actor_info.key);
1843
1844 /* All state info except for Synchronization */
1845 save_sync = pl->PartnerOperPortState.bit.sync;
1846 pl->PartnerOperPortState.state = lacp->actor_info.state.state;
1847
1848 /* Defaulted set to FALSE */
1849 pl->ActorOperPortState.bit.defaulted = B_FALSE;
1850
1851 /*
1852 * 43.4.9 - (Partner_Port, Partner_Port_Priority, Partner_system,
1853 * Partner_System_Priority, Partner_Key, and
1854 * Partner_State.Aggregation) are compared to the
1855 * corresponding operations paramters values for
1856 * the Actor. If these are equal, or if this is
1857 * an individual link, we are synchronized.
1858 */
1859 if (((ntohs(lacp->partner_info.port) == pl->ActorPortNumber) &&
1860 (ntohs(lacp->partner_info.port_priority) ==
1861 pl->ActorPortPriority) &&
1862 (ether_cmp(&lacp->partner_info.system_id,
1863 (struct ether_addr *)&aggrp->lg_addr) == 0) &&
1864 (ntohs(lacp->partner_info.system_priority) ==
1865 aggrp->aggr.ActorSystemPriority) &&
1866 (ntohs(lacp->partner_info.key) == pl->ActorOperPortKey) &&
1867 (lacp->partner_info.state.bit.aggregation ==
1868 pl->ActorOperPortState.bit.aggregation)) ||
1869 (!lacp->actor_info.state.bit.aggregation)) {
1870
1871 pl->PartnerOperPortState.bit.sync =
1872 lacp->actor_info.state.bit.sync;
1873 } else {
1874 pl->PartnerOperPortState.bit.sync = B_FALSE;
1875 }
1876
1877 if (save_sync != pl->PartnerOperPortState.bit.sync) {
1878 AGGR_LACP_DBG(("record_PDU:(%d): partner sync "
1879 "%d -->%d\n", portp->lp_linkid, save_sync,
1880 pl->PartnerOperPortState.bit.sync));
1881 return (B_TRUE);
1882 } else {
1883 return (B_FALSE);
1884 }
1885 }
1886
1887
1888 /*
1889 * update_selected - If any of the Partner parameters has
1890 * changed from a previous value, then
1891 * unselect the link from the aggregator.
1892 */
1893 static boolean_t
update_selected(aggr_port_t * portp,lacp_t * lacp)1894 update_selected(aggr_port_t *portp, lacp_t *lacp)
1895 {
1896 aggr_lacp_port_t *pl = &portp->lp_lacp;
1897
1898 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1899
1900 if ((pl->PartnerOperPortNum != ntohs(lacp->actor_info.port)) ||
1901 (pl->PartnerOperPortPriority !=
1902 ntohs(lacp->actor_info.port_priority)) ||
1903 (ether_cmp(&pl->PartnerOperSystem,
1904 &lacp->actor_info.system_id) != 0) ||
1905 (pl->PartnerOperSysPriority !=
1906 ntohs(lacp->actor_info.system_priority)) ||
1907 (pl->PartnerOperKey != ntohs(lacp->actor_info.key)) ||
1908 (pl->PartnerOperPortState.bit.aggregation !=
1909 lacp->actor_info.state.bit.aggregation)) {
1910 AGGR_LACP_DBG(("update_selected:(%d): "
1911 "selected %d-->%d\n", portp->lp_linkid, pl->sm.selected,
1912 AGGR_UNSELECTED));
1913
1914 lacp_port_unselect(portp);
1915 return (B_TRUE);
1916 } else {
1917 return (B_FALSE);
1918 }
1919 }
1920
1921
1922 /*
1923 * update_default_selected - If any of the operational Partner parameters
1924 * is different than that of the administrative values
1925 * then unselect the link from the aggregator.
1926 */
1927 static void
update_default_selected(aggr_port_t * portp)1928 update_default_selected(aggr_port_t *portp)
1929 {
1930 aggr_lacp_port_t *pl = &portp->lp_lacp;
1931
1932 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1933
1934 if ((pl->PartnerAdminPortNum != pl->PartnerOperPortNum) ||
1935 (pl->PartnerOperPortPriority != pl->PartnerAdminPortPriority) ||
1936 (ether_cmp(&pl->PartnerOperSystem, &pl->PartnerAdminSystem) != 0) ||
1937 (pl->PartnerOperSysPriority != pl->PartnerAdminSysPriority) ||
1938 (pl->PartnerOperKey != pl->PartnerAdminKey) ||
1939 (pl->PartnerOperPortState.bit.aggregation !=
1940 pl->PartnerAdminPortState.bit.aggregation)) {
1941
1942 AGGR_LACP_DBG(("update_default_selected:(%d): "
1943 "selected %d-->%d\n", portp->lp_linkid,
1944 pl->sm.selected, AGGR_UNSELECTED));
1945
1946 lacp_port_unselect(portp);
1947 }
1948 }
1949
1950
1951 /*
1952 * update_NTT - If any of the Partner values in the received LACPDU
1953 * are different than that of the Actor operational
1954 * values then set NTT to true.
1955 */
1956 static void
update_NTT(aggr_port_t * portp,lacp_t * lacp)1957 update_NTT(aggr_port_t *portp, lacp_t *lacp)
1958 {
1959 aggr_grp_t *aggrp = portp->lp_grp;
1960 aggr_lacp_port_t *pl = &portp->lp_lacp;
1961
1962 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
1963
1964 if ((pl->ActorPortNumber != ntohs(lacp->partner_info.port)) ||
1965 (pl->ActorPortPriority !=
1966 ntohs(lacp->partner_info.port_priority)) ||
1967 (ether_cmp(&aggrp->lg_addr,
1968 &lacp->partner_info.system_id) != 0) ||
1969 (aggrp->aggr.ActorSystemPriority !=
1970 ntohs(lacp->partner_info.system_priority)) ||
1971 (pl->ActorOperPortKey != ntohs(lacp->partner_info.key)) ||
1972 (pl->ActorOperPortState.bit.activity !=
1973 lacp->partner_info.state.bit.activity) ||
1974 (pl->ActorOperPortState.bit.timeout !=
1975 lacp->partner_info.state.bit.timeout) ||
1976 (pl->ActorOperPortState.bit.sync !=
1977 lacp->partner_info.state.bit.sync) ||
1978 (pl->ActorOperPortState.bit.aggregation !=
1979 lacp->partner_info.state.bit.aggregation)) {
1980
1981 AGGR_LACP_DBG(("update_NTT:(%d): NTT %d-->%d\n",
1982 portp->lp_linkid, pl->NTT, B_TRUE));
1983
1984 pl->NTT = B_TRUE;
1985 }
1986 }
1987
1988 /*
1989 * lacp_receive_sm - LACP receive state machine
1990 *
1991 * parameters:
1992 * - portp - instance this applies to.
1993 * - lacp - pointer in the case of a received LACPDU.
1994 * This value is NULL if there is no LACPDU.
1995 *
1996 * invoked:
1997 * - when initialization is needed
1998 * - upon reception of an LACPDU. This is the common case.
1999 * - every time the current_while_timer pops
2000 */
2001 static void
lacp_receive_sm(aggr_port_t * portp,lacp_t * lacp)2002 lacp_receive_sm(aggr_port_t *portp, lacp_t *lacp)
2003 {
2004 boolean_t sync_updated, selected_updated, save_activity;
2005 aggr_lacp_port_t *pl = &portp->lp_lacp;
2006 lacp_receive_state_t oldstate = pl->sm.receive_state;
2007
2008 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
2009
2010 /* LACP_OFF state not in specification so check here. */
2011 if (!pl->sm.lacp_on)
2012 return;
2013
2014 /* figure next state */
2015 if (pl->sm.begin || pl->sm.port_moved) {
2016 pl->sm.receive_state = LACP_INITIALIZE;
2017 } else if (!pl->sm.port_enabled) { /* DL_NOTE_LINK_DOWN */
2018 pl->sm.receive_state = LACP_PORT_DISABLED;
2019 } else if (!pl->sm.lacp_enabled) { /* DL_NOTE_AGGR_UNAVAIL */
2020 pl->sm.receive_state =
2021 (pl->sm.receive_state == LACP_PORT_DISABLED) ?
2022 LACP_DISABLED : LACP_PORT_DISABLED;
2023 } else if (lacp != NULL) {
2024 if ((pl->sm.receive_state == LACP_EXPIRED) ||
2025 (pl->sm.receive_state == LACP_DEFAULTED)) {
2026 pl->sm.receive_state = LACP_CURRENT;
2027 }
2028 } else if ((pl->sm.receive_state == LACP_CURRENT) &&
2029 (pl->current_while_timer.id == 0)) {
2030 pl->sm.receive_state = LACP_EXPIRED;
2031 } else if ((pl->sm.receive_state == LACP_EXPIRED) &&
2032 (pl->current_while_timer.id == 0)) {
2033 pl->sm.receive_state = LACP_DEFAULTED;
2034 }
2035
2036 if (!((lacp && (oldstate == LACP_CURRENT) &&
2037 (pl->sm.receive_state == LACP_CURRENT)))) {
2038 AGGR_LACP_DBG(("lacp_receive_sm(%d):%s--->%s\n",
2039 portp->lp_linkid, lacp_receive_str[oldstate],
2040 lacp_receive_str[pl->sm.receive_state]));
2041 }
2042
2043 switch (pl->sm.receive_state) {
2044 case LACP_INITIALIZE:
2045 lacp_port_unselect(portp);
2046 record_Default(portp);
2047 pl->ActorOperPortState.bit.expired = B_FALSE;
2048 pl->sm.port_moved = B_FALSE;
2049 pl->sm.receive_state = LACP_PORT_DISABLED;
2050 pl->sm.begin = B_FALSE;
2051 lacp_receive_sm(portp, NULL);
2052 break;
2053
2054 case LACP_PORT_DISABLED:
2055 pl->PartnerOperPortState.bit.sync = B_FALSE;
2056 /*
2057 * Stop current_while_timer in case
2058 * we got here from link down
2059 */
2060 stop_current_while_timer(portp);
2061
2062 if (pl->sm.port_enabled && !pl->sm.lacp_enabled) {
2063 pl->sm.receive_state = LACP_DISABLED;
2064 lacp_receive_sm(portp, lacp);
2065 /* We goto LACP_DISABLED state */
2066 break;
2067 } else if (pl->sm.port_enabled && pl->sm.lacp_enabled) {
2068 pl->sm.receive_state = LACP_EXPIRED;
2069 /*
2070 * FALL THROUGH TO LACP_EXPIRED CASE:
2071 * We have no way of knowing if we get into
2072 * lacp_receive_sm() from a current_while_timer
2073 * expiring as it has never been kicked off yet!
2074 */
2075 } else {
2076 /* We stay in LACP_PORT_DISABLED state */
2077 break;
2078 }
2079 /* LACP_PORT_DISABLED -> LACP_EXPIRED */
2080 /* FALLTHROUGH */
2081
2082 case LACP_EXPIRED:
2083 /*
2084 * Arrives here from LACP_PORT_DISABLED state as well as
2085 * as well as current_while_timer expiring.
2086 */
2087 pl->PartnerOperPortState.bit.sync = B_FALSE;
2088 pl->PartnerOperPortState.bit.timeout = B_TRUE;
2089
2090 pl->ActorOperPortState.bit.expired = B_TRUE;
2091 start_current_while_timer(portp, SHORT_TIMEOUT_TIME);
2092 lacp_periodic_sm(portp);
2093 break;
2094
2095 case LACP_DISABLED:
2096 /*
2097 * This is the normal state for recv_sm when LACP_OFF
2098 * is set or the NIC is in half duplex mode.
2099 */
2100 lacp_port_unselect(portp);
2101 record_Default(portp);
2102 pl->PartnerOperPortState.bit.aggregation = B_FALSE;
2103 pl->ActorOperPortState.bit.expired = B_FALSE;
2104 break;
2105
2106 case LACP_DEFAULTED:
2107 /*
2108 * Current_while_timer expired a second time.
2109 */
2110 update_default_selected(portp);
2111 record_Default(portp); /* overwrite Partner Oper val */
2112 pl->ActorOperPortState.bit.expired = B_FALSE;
2113 pl->PartnerOperPortState.bit.sync = B_TRUE;
2114
2115 lacp_selection_logic(portp);
2116 lacp_mux_sm(portp);
2117 break;
2118
2119 case LACP_CURRENT:
2120 /*
2121 * Reception of LACPDU
2122 */
2123
2124 if (!lacp) /* no LACPDU so current_while_timer popped */
2125 break;
2126
2127 AGGR_LACP_DBG(("lacp_receive_sm: (%d): LACPDU received:\n",
2128 portp->lp_linkid));
2129
2130 /*
2131 * Validate Actor_Information_Length,
2132 * Partner_Information_Length, Collector_Information_Length,
2133 * and Terminator_Length fields.
2134 */
2135 if (!valid_lacp_pdu(portp, lacp)) {
2136 AGGR_LACP_DBG(("lacp_receive_sm (%d): "
2137 "Invalid LACPDU received\n",
2138 portp->lp_linkid));
2139 break;
2140 }
2141
2142 save_activity = pl->PartnerOperPortState.bit.activity;
2143 selected_updated = update_selected(portp, lacp);
2144 update_NTT(portp, lacp);
2145 sync_updated = record_PDU(portp, lacp);
2146
2147 pl->ActorOperPortState.bit.expired = B_FALSE;
2148
2149 if (selected_updated) {
2150 lacp_selection_logic(portp);
2151 lacp_mux_sm(portp);
2152 } else if (sync_updated) {
2153 lacp_mux_sm(portp);
2154 }
2155
2156 /*
2157 * If the periodic timer value bit has been modified
2158 * or the partner activity bit has been changed then
2159 * we need to respectively:
2160 * - restart the timer with the proper timeout value.
2161 * - possibly enable/disable transmission of LACPDUs.
2162 */
2163 if ((pl->PartnerOperPortState.bit.timeout &&
2164 (pl->periodic_timer.val != FAST_PERIODIC_TIME)) ||
2165 (!pl->PartnerOperPortState.bit.timeout &&
2166 (pl->periodic_timer.val != SLOW_PERIODIC_TIME)) ||
2167 (pl->PartnerOperPortState.bit.activity !=
2168 save_activity)) {
2169 lacp_periodic_sm(portp);
2170 }
2171
2172 stop_current_while_timer(portp);
2173 /* Check if we need to transmit an LACPDU */
2174 if (pl->NTT)
2175 lacp_xmit_sm(portp);
2176 start_current_while_timer(portp, 0);
2177
2178 break;
2179 }
2180 }
2181
2182 static void
aggr_set_coll_dist(aggr_port_t * portp,boolean_t enable)2183 aggr_set_coll_dist(aggr_port_t *portp, boolean_t enable)
2184 {
2185 mac_perim_handle_t mph;
2186
2187 AGGR_LACP_DBG(("AGGR_SET_COLL_DIST_TYPE: (%d) %s\n",
2188 portp->lp_linkid, enable ? "ENABLED" : "DISABLED"));
2189
2190 mac_perim_enter_by_mh(portp->lp_mh, &mph);
2191 if (!enable) {
2192 /*
2193 * Turn OFF Collector_Distributor.
2194 */
2195 portp->lp_collector_enabled = B_FALSE;
2196 aggr_send_port_disable(portp);
2197 goto done;
2198 }
2199
2200 /*
2201 * Turn ON Collector_Distributor.
2202 */
2203
2204 if (!portp->lp_lacp.sm.lacp_on || (portp->lp_lacp.sm.lacp_on &&
2205 (portp->lp_lacp.sm.mux_state == LACP_COLLECTING_DISTRIBUTING))) {
2206 /* Port is compatible and can be aggregated */
2207 portp->lp_collector_enabled = B_TRUE;
2208 aggr_send_port_enable(portp);
2209 }
2210
2211 done:
2212 mac_perim_exit(mph);
2213 }
2214
2215 /*
2216 * Because the LACP packet processing needs to enter the aggr's mac perimeter
2217 * and that would potentially cause a deadlock with the thread in which the
2218 * grp/port is deleted, we defer the packet process to a worker thread. Here
2219 * we only enqueue the received Marker or LACPDU for later processing.
2220 */
2221 void
aggr_lacp_rx_enqueue(aggr_port_t * portp,mblk_t * dmp)2222 aggr_lacp_rx_enqueue(aggr_port_t *portp, mblk_t *dmp)
2223 {
2224 aggr_grp_t *grp = portp->lp_grp;
2225 lacp_t *lacp;
2226
2227 dmp->b_rptr += sizeof (struct ether_header);
2228
2229 if (MBLKL(dmp) < sizeof (lacp_t)) {
2230 freemsg(dmp);
2231 return;
2232 }
2233
2234 lacp = (lacp_t *)dmp->b_rptr;
2235 if (lacp->subtype != LACP_SUBTYPE && lacp->subtype != MARKER_SUBTYPE) {
2236 AGGR_LACP_DBG(("aggr_lacp_rx_enqueue: (%d): "
2237 "Unknown Slow Protocol type %d\n",
2238 portp->lp_linkid, lacp->subtype));
2239 freemsg(dmp);
2240 return;
2241 }
2242
2243 mutex_enter(&grp->lg_lacp_lock);
2244
2245 /*
2246 * If the lg_lacp_done is set, this aggregation is in the process of
2247 * being deleted, return directly.
2248 */
2249 if (grp->lg_lacp_done) {
2250 mutex_exit(&grp->lg_lacp_lock);
2251 freemsg(dmp);
2252 return;
2253 }
2254
2255 if (grp->lg_lacp_tail == NULL) {
2256 grp->lg_lacp_head = grp->lg_lacp_tail = dmp;
2257 } else {
2258 grp->lg_lacp_tail->b_next = dmp;
2259 grp->lg_lacp_tail = dmp;
2260 }
2261
2262 /*
2263 * Hold a reference of the port so that the port won't be freed when it
2264 * is removed from the aggr. The b_prev field is borrowed to save the
2265 * port information.
2266 */
2267 AGGR_PORT_REFHOLD(portp);
2268 dmp->b_prev = (mblk_t *)portp;
2269 cv_broadcast(&grp->lg_lacp_cv);
2270 mutex_exit(&grp->lg_lacp_lock);
2271 }
2272
2273 static void
aggr_lacp_rx(mblk_t * dmp)2274 aggr_lacp_rx(mblk_t *dmp)
2275 {
2276 aggr_port_t *portp = (aggr_port_t *)dmp->b_prev;
2277 mac_perim_handle_t mph;
2278 lacp_t *lacp;
2279
2280 dmp->b_prev = NULL;
2281
2282 mac_perim_enter_by_mh(portp->lp_grp->lg_mh, &mph);
2283 if (portp->lp_closing)
2284 goto done;
2285
2286 lacp = (lacp_t *)dmp->b_rptr;
2287 switch (lacp->subtype) {
2288 case LACP_SUBTYPE:
2289 AGGR_LACP_DBG(("aggr_lacp_rx:(%d): LACPDU received.\n",
2290 portp->lp_linkid));
2291
2292 if (!portp->lp_lacp.sm.lacp_on) {
2293 break;
2294 }
2295 lacp_receive_sm(portp, lacp);
2296 break;
2297
2298 case MARKER_SUBTYPE:
2299 AGGR_LACP_DBG(("aggr_lacp_rx:(%d): Marker Packet received.\n",
2300 portp->lp_linkid));
2301
2302 if (receive_marker_pdu(portp, dmp) != 0)
2303 break;
2304
2305 /* Send the packet over the first TX ring */
2306 dmp = mac_hwring_send_priv(portp->lp_mch,
2307 portp->lp_tx_rings[0], dmp);
2308 if (dmp != NULL)
2309 freemsg(dmp);
2310 mac_perim_exit(mph);
2311 AGGR_PORT_REFRELE(portp);
2312 return;
2313 }
2314
2315 done:
2316 mac_perim_exit(mph);
2317 AGGR_PORT_REFRELE(portp);
2318 freemsg(dmp);
2319 }
2320
2321 void
aggr_lacp_rx_thread(void * arg)2322 aggr_lacp_rx_thread(void *arg)
2323 {
2324 callb_cpr_t cprinfo;
2325 aggr_grp_t *grp = (aggr_grp_t *)arg;
2326 aggr_port_t *port;
2327 mblk_t *mp, *nextmp;
2328
2329 CALLB_CPR_INIT(&cprinfo, &grp->lg_lacp_lock, callb_generic_cpr,
2330 "aggr_lacp_rx_thread");
2331
2332 mutex_enter(&grp->lg_lacp_lock);
2333
2334 /*
2335 * Quit the thread if the grp is deleted.
2336 */
2337 while (!grp->lg_lacp_done) {
2338 if ((mp = grp->lg_lacp_head) == NULL) {
2339 CALLB_CPR_SAFE_BEGIN(&cprinfo);
2340 cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock);
2341 CALLB_CPR_SAFE_END(&cprinfo, &grp->lg_lacp_lock);
2342 continue;
2343 }
2344
2345 grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
2346 mutex_exit(&grp->lg_lacp_lock);
2347
2348 while (mp != NULL) {
2349 nextmp = mp->b_next;
2350 mp->b_next = NULL;
2351 aggr_lacp_rx(mp);
2352 mp = nextmp;
2353 }
2354 mutex_enter(&grp->lg_lacp_lock);
2355 }
2356
2357 /*
2358 * The grp is being destroyed, simply free all of the LACP messages
2359 * left in the queue which did not have the chance to be processed.
2360 * We cannot use freemsgchain() here since we need to clear the
2361 * b_prev field.
2362 */
2363 for (mp = grp->lg_lacp_head; mp != NULL; mp = nextmp) {
2364 port = (aggr_port_t *)mp->b_prev;
2365 AGGR_PORT_REFRELE(port);
2366 nextmp = mp->b_next;
2367 mp->b_next = NULL;
2368 mp->b_prev = NULL;
2369 freemsg(mp);
2370 }
2371
2372 grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
2373 grp->lg_lacp_rx_thread = NULL;
2374 cv_broadcast(&grp->lg_lacp_cv);
2375 CALLB_CPR_EXIT(&cprinfo);
2376 thread_exit();
2377 }
2378