1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /*
26 * IEEE 802.3ad Link Aggregation - LACP & Marker Protocol processing.
27 */
28
29 #include <sys/types.h>
30 #include <sys/sysmacros.h>
31 #include <sys/callb.h>
32 #include <sys/conf.h>
33 #include <sys/cmn_err.h>
34 #include <sys/disp.h>
35 #include <sys/list.h>
36 #include <sys/ksynch.h>
37 #include <sys/kmem.h>
38 #include <sys/stream.h>
39 #include <sys/modctl.h>
40 #include <sys/ddi.h>
41 #include <sys/sunddi.h>
42 #include <sys/atomic.h>
43 #include <sys/stat.h>
44 #include <sys/byteorder.h>
45 #include <sys/strsun.h>
46 #include <sys/isa_defs.h>
47 #include <sys/sdt.h>
48
49 #include <sys/aggr.h>
50 #include <sys/aggr_impl.h>
51
52 static struct ether_addr etherzeroaddr = {
53 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
54 };
55
56 /*
57 * Slow_Protocol_Multicast address, as per IEEE 802.3ad spec.
58 */
59 static struct ether_addr slow_multicast_addr = {
60 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02
61 };
62
63 #ifdef DEBUG
64 /* LACP state machine debugging support */
65 static uint32_t aggr_lacp_debug = 0;
66 #define AGGR_LACP_DBG(x) if (aggr_lacp_debug) { (void) printf x; }
67 #else
68 #define AGGR_LACP_DBG(x) {}
69 #endif /* DEBUG */
70
71 #define NSECS_PER_SEC 1000000000ll
72
73 /* used by lacp_misconfig_walker() */
74 typedef struct lacp_misconfig_check_state_s {
75 aggr_port_t *cs_portp;
76 boolean_t cs_found;
77 } lacp_misconfig_check_state_t;
78
79 static const char *lacp_receive_str[] = LACP_RECEIVE_STATE_STRINGS;
80 static const char *lacp_periodic_str[] = LACP_PERIODIC_STRINGS;
81 static const char *lacp_mux_str[] = LACP_MUX_STRINGS;
82
83 static uint16_t lacp_port_priority = 0x1000;
84 static uint16_t lacp_system_priority = 0x1000;
85
86 /*
87 * Maintains a list of all ports in ATTACHED state. This information
88 * is used to detect misconfiguration.
89 */
90 typedef struct lacp_sel_ports {
91 datalink_id_t sp_grp_linkid;
92 datalink_id_t sp_linkid;
93 /* Note: sp_partner_system must be 2-byte aligned */
94 struct ether_addr sp_partner_system;
95 uint32_t sp_partner_key;
96 struct lacp_sel_ports *sp_next;
97 } lacp_sel_ports_t;
98
99 static lacp_sel_ports_t *sel_ports = NULL;
100 static kmutex_t lacp_sel_lock;
101
102 static void periodic_timer_pop(void *);
103 static void periodic_timer_pop_handler(aggr_port_t *);
104 static void lacp_xmit_sm(aggr_port_t *);
105 static void lacp_periodic_sm(aggr_port_t *);
106 static void fill_lacp_pdu(aggr_port_t *, lacp_t *);
107 static void fill_lacp_ether(aggr_port_t *, struct ether_header *);
108 static void lacp_on(aggr_port_t *);
109 static void lacp_off(aggr_port_t *);
110 static boolean_t valid_lacp_pdu(aggr_port_t *, lacp_t *);
111 static void lacp_receive_sm(aggr_port_t *, lacp_t *);
112 static void aggr_set_coll_dist(aggr_port_t *, boolean_t);
113 static void start_wait_while_timer(aggr_port_t *);
114 static void stop_wait_while_timer(aggr_port_t *);
115 static void lacp_reset_port(aggr_port_t *);
116 static void stop_current_while_timer(aggr_port_t *);
117 static void current_while_timer_pop(void *);
118 static void current_while_timer_pop_handler(aggr_port_t *);
119 static void update_default_selected(aggr_port_t *);
120 static boolean_t update_selected(aggr_port_t *, lacp_t *);
121 static boolean_t lacp_sel_ports_add(aggr_port_t *);
122 static void lacp_sel_ports_del(aggr_port_t *);
123 static void wait_while_timer_pop(void *);
124 static void wait_while_timer_pop_handler(aggr_port_t *);
125
126 void
aggr_lacp_init(void)127 aggr_lacp_init(void)
128 {
129 mutex_init(&lacp_sel_lock, NULL, MUTEX_DEFAULT, NULL);
130 }
131
132 void
aggr_lacp_fini(void)133 aggr_lacp_fini(void)
134 {
135 mutex_destroy(&lacp_sel_lock);
136 }
137
138 /*
139 * The following functions are used for handling LACP timers.
140 *
141 * Note that we cannot fully rely on the aggr's mac perimeter in the timeout
142 * handler routine, otherwise it may cause deadlock with the untimeout() call
143 * which is usually called with the mac perimeter held. Instead, a
144 * lacp_timer_lock mutex is introduced, which protects a bitwise flag
145 * (lacp_timer_bits). This flag is set/cleared by timeout()/stop_timer()
146 * routines and is checked by a dedicated thread, that executes the real
147 * timeout operation.
148 */
149 static void
aggr_port_timer_thread(void * arg)150 aggr_port_timer_thread(void *arg)
151 {
152 aggr_port_t *port = arg;
153 aggr_lacp_port_t *pl = &port->lp_lacp;
154 aggr_grp_t *grp = port->lp_grp;
155 uint32_t lacp_timer_bits;
156 mac_perim_handle_t mph;
157 callb_cpr_t cprinfo;
158
159 CALLB_CPR_INIT(&cprinfo, &pl->lacp_timer_lock, callb_generic_cpr,
160 "aggr_port_timer_thread");
161
162 mutex_enter(&pl->lacp_timer_lock);
163
164 for (;;) {
165
166 if ((lacp_timer_bits = pl->lacp_timer_bits) == 0) {
167 CALLB_CPR_SAFE_BEGIN(&cprinfo);
168 cv_wait(&pl->lacp_timer_cv, &pl->lacp_timer_lock);
169 CALLB_CPR_SAFE_END(&cprinfo, &pl->lacp_timer_lock);
170 continue;
171 }
172 pl->lacp_timer_bits = 0;
173
174 if (lacp_timer_bits & LACP_THREAD_EXIT)
175 break;
176
177 if (lacp_timer_bits & LACP_PERIODIC_TIMEOUT)
178 pl->periodic_timer.id = 0;
179 if (lacp_timer_bits & LACP_WAIT_WHILE_TIMEOUT)
180 pl->wait_while_timer.id = 0;
181 if (lacp_timer_bits & LACP_CURRENT_WHILE_TIMEOUT)
182 pl->current_while_timer.id = 0;
183
184 mutex_exit(&pl->lacp_timer_lock);
185
186 mac_perim_enter_by_mh(grp->lg_mh, &mph);
187 if (port->lp_closing) {
188 mac_perim_exit(mph);
189 mutex_enter(&pl->lacp_timer_lock);
190 break;
191 }
192
193 if (lacp_timer_bits & LACP_PERIODIC_TIMEOUT)
194 periodic_timer_pop_handler(port);
195 if (lacp_timer_bits & LACP_WAIT_WHILE_TIMEOUT)
196 wait_while_timer_pop_handler(port);
197 if (lacp_timer_bits & LACP_CURRENT_WHILE_TIMEOUT)
198 current_while_timer_pop_handler(port);
199 mac_perim_exit(mph);
200
201 mutex_enter(&pl->lacp_timer_lock);
202 if (pl->lacp_timer_bits & LACP_THREAD_EXIT)
203 break;
204 }
205
206 pl->lacp_timer_bits = 0;
207 pl->lacp_timer_thread = NULL;
208 cv_broadcast(&pl->lacp_timer_cv);
209
210 /* CALLB_CPR_EXIT drops the lock */
211 CALLB_CPR_EXIT(&cprinfo);
212
213 /*
214 * Release the reference of the grp so aggr_grp_delete() can call
215 * mac_unregister() safely.
216 */
217 aggr_grp_port_rele(port);
218 thread_exit();
219 }
220
221 /*
222 * Set the port LACP state to SELECTED. Returns B_FALSE if the operation
223 * could not be performed due to a memory allocation error, B_TRUE otherwise.
224 */
225 static boolean_t
lacp_port_select(aggr_port_t * portp)226 lacp_port_select(aggr_port_t *portp)
227 {
228 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
229
230 if (!lacp_sel_ports_add(portp))
231 return (B_FALSE);
232 portp->lp_lacp.sm.selected = AGGR_SELECTED;
233 return (B_TRUE);
234 }
235
236 /*
237 * Set the port LACP state to UNSELECTED.
238 */
239 static void
lacp_port_unselect(aggr_port_t * portp)240 lacp_port_unselect(aggr_port_t *portp)
241 {
242 aggr_grp_t *grp = portp->lp_grp;
243
244 ASSERT((grp->lg_mh == NULL) || MAC_PERIM_HELD(grp->lg_mh));
245
246 lacp_sel_ports_del(portp);
247 portp->lp_lacp.sm.selected = AGGR_UNSELECTED;
248 }
249
250 /*
251 * Initialize group specific LACP state and parameters.
252 */
253 void
aggr_lacp_init_grp(aggr_grp_t * aggrp)254 aggr_lacp_init_grp(aggr_grp_t *aggrp)
255 {
256 aggrp->aggr.PeriodicTimer = AGGR_LACP_TIMER_SHORT;
257 aggrp->aggr.ActorSystemPriority = (uint16_t)lacp_system_priority;
258 aggrp->aggr.CollectorMaxDelay = 10;
259 aggrp->lg_lacp_mode = AGGR_LACP_OFF;
260 aggrp->aggr.ready = B_FALSE;
261 }
262
263 /*
264 * Complete LACP info initialization at port creation time.
265 */
266 void
aggr_lacp_init_port(aggr_port_t * portp)267 aggr_lacp_init_port(aggr_port_t *portp)
268 {
269 aggr_grp_t *aggrp = portp->lp_grp;
270 aggr_lacp_port_t *pl = &portp->lp_lacp;
271
272 ASSERT(aggrp->lg_mh == NULL || MAC_PERIM_HELD(aggrp->lg_mh));
273 ASSERT(MAC_PERIM_HELD(portp->lp_mh));
274
275 /* actor port # */
276 pl->ActorPortNumber = portp->lp_portid;
277 AGGR_LACP_DBG(("aggr_lacp_init_port(%d): "
278 "ActorPortNumber = 0x%x\n", portp->lp_linkid,
279 pl->ActorPortNumber));
280
281 pl->ActorPortPriority = (uint16_t)lacp_port_priority;
282 pl->ActorPortAggrId = 0; /* aggregator id - not used */
283 pl->NTT = B_FALSE; /* need to transmit */
284
285 pl->ActorAdminPortKey = aggrp->lg_key;
286 pl->ActorOperPortKey = pl->ActorAdminPortKey;
287 AGGR_LACP_DBG(("aggr_lacp_init_port(%d) "
288 "ActorAdminPortKey = 0x%x, ActorAdminPortKey = 0x%x\n",
289 portp->lp_linkid, pl->ActorAdminPortKey, pl->ActorOperPortKey));
290
291 /* Actor admin. port state */
292 pl->ActorAdminPortState.bit.activity = B_FALSE;
293 pl->ActorAdminPortState.bit.timeout = B_TRUE;
294 pl->ActorAdminPortState.bit.aggregation = B_TRUE;
295 pl->ActorAdminPortState.bit.sync = B_FALSE;
296 pl->ActorAdminPortState.bit.collecting = B_FALSE;
297 pl->ActorAdminPortState.bit.distributing = B_FALSE;
298 pl->ActorAdminPortState.bit.defaulted = B_FALSE;
299 pl->ActorAdminPortState.bit.expired = B_FALSE;
300 pl->ActorOperPortState = pl->ActorAdminPortState;
301
302 /*
303 * Partner Administrative Information
304 * (All initialized to zero except for the following)
305 * Fast Timeouts.
306 */
307 pl->PartnerAdminPortState.bit.timeout =
308 pl->PartnerOperPortState.bit.timeout = B_TRUE;
309
310 pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */
311
312 /*
313 * State machine information.
314 */
315 pl->sm.lacp_on = B_FALSE; /* LACP Off default */
316 pl->sm.begin = B_TRUE; /* Prevents transmissions */
317 pl->sm.lacp_enabled = B_FALSE;
318 pl->sm.port_enabled = B_FALSE; /* Link Down */
319 pl->sm.actor_churn = B_FALSE;
320 pl->sm.partner_churn = B_FALSE;
321 pl->sm.ready_n = B_FALSE;
322 pl->sm.port_moved = B_FALSE;
323
324 lacp_port_unselect(portp);
325
326 pl->sm.periodic_state = LACP_NO_PERIODIC;
327 pl->sm.receive_state = LACP_INITIALIZE;
328 pl->sm.mux_state = LACP_DETACHED;
329 pl->sm.churn_state = LACP_NO_ACTOR_CHURN;
330
331 /*
332 * Timer information.
333 */
334 pl->current_while_timer.id = 0;
335 pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
336
337 pl->periodic_timer.id = 0;
338 pl->periodic_timer.val = FAST_PERIODIC_TIME;
339
340 pl->wait_while_timer.id = 0;
341 pl->wait_while_timer.val = AGGREGATE_WAIT_TIME;
342
343 pl->lacp_timer_bits = 0;
344
345 mutex_init(&pl->lacp_timer_lock, NULL, MUTEX_DRIVER, NULL);
346 cv_init(&pl->lacp_timer_cv, NULL, CV_DRIVER, NULL);
347
348 pl->lacp_timer_thread = thread_create(NULL, 0, aggr_port_timer_thread,
349 portp, 0, &p0, TS_RUN, minclsyspri);
350
351 /*
352 * Hold a reference of the grp and the port and this reference will
353 * be release when the thread exits.
354 *
355 * The reference on the port is used for aggr_port_delete() to
356 * continue without waiting for the thread to exit; the reference
357 * on the grp is used for aggr_grp_delete() to wait for the thread
358 * to exit before calling mac_unregister().
359 */
360 aggr_grp_port_hold(portp);
361 }
362
363 /*
364 * Port initialization when we need to
365 * turn LACP on/off, etc. Not everything is
366 * reset like in the above routine.
367 * Do NOT modify things like link status.
368 */
369 static void
lacp_reset_port(aggr_port_t * portp)370 lacp_reset_port(aggr_port_t *portp)
371 {
372 aggr_lacp_port_t *pl = &portp->lp_lacp;
373
374 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
375
376 pl->NTT = B_FALSE; /* need to transmit */
377
378 /* reset operational port state */
379 pl->ActorOperPortState.bit.timeout =
380 pl->ActorAdminPortState.bit.timeout;
381
382 pl->ActorOperPortState.bit.sync = B_FALSE;
383 pl->ActorOperPortState.bit.collecting = B_FALSE;
384 pl->ActorOperPortState.bit.distributing = B_FALSE;
385 pl->ActorOperPortState.bit.defaulted = B_TRUE;
386 pl->ActorOperPortState.bit.expired = B_FALSE;
387
388 pl->PartnerOperPortState.bit.timeout = B_TRUE; /* fast t/o */
389 pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */
390
391 /*
392 * State machine information.
393 */
394 pl->sm.begin = B_TRUE; /* Prevents transmissions */
395 pl->sm.actor_churn = B_FALSE;
396 pl->sm.partner_churn = B_FALSE;
397 pl->sm.ready_n = B_FALSE;
398
399 lacp_port_unselect(portp);
400
401 pl->sm.periodic_state = LACP_NO_PERIODIC;
402 pl->sm.receive_state = LACP_INITIALIZE;
403 pl->sm.mux_state = LACP_DETACHED;
404 pl->sm.churn_state = LACP_NO_ACTOR_CHURN;
405
406 /*
407 * Timer information.
408 */
409 pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
410 pl->periodic_timer.val = FAST_PERIODIC_TIME;
411 }
412
413 static void
aggr_lacp_mcast_on(aggr_port_t * port)414 aggr_lacp_mcast_on(aggr_port_t *port)
415 {
416 ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh));
417 ASSERT(MAC_PERIM_HELD(port->lp_mh));
418
419 if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
420 return;
421
422 (void) aggr_port_multicst(port, B_TRUE,
423 (uchar_t *)&slow_multicast_addr);
424 }
425
426 static void
aggr_lacp_mcast_off(aggr_port_t * port)427 aggr_lacp_mcast_off(aggr_port_t *port)
428 {
429 ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh));
430 ASSERT(MAC_PERIM_HELD(port->lp_mh));
431
432 if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
433 return;
434
435 (void) aggr_port_multicst(port, B_FALSE,
436 (uchar_t *)&slow_multicast_addr);
437 }
438
439 static void
start_periodic_timer(aggr_port_t * portp)440 start_periodic_timer(aggr_port_t *portp)
441 {
442 aggr_lacp_port_t *pl = &portp->lp_lacp;
443
444 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
445
446 mutex_enter(&pl->lacp_timer_lock);
447 if (pl->periodic_timer.id == 0) {
448 pl->periodic_timer.id = timeout(periodic_timer_pop, portp,
449 drv_usectohz(1000000 * portp->lp_lacp.periodic_timer.val));
450 }
451 mutex_exit(&pl->lacp_timer_lock);
452 }
453
454 static void
stop_periodic_timer(aggr_port_t * portp)455 stop_periodic_timer(aggr_port_t *portp)
456 {
457 aggr_lacp_port_t *pl = &portp->lp_lacp;
458 timeout_id_t id;
459
460 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
461
462 mutex_enter(&pl->lacp_timer_lock);
463 if ((id = pl->periodic_timer.id) != 0) {
464 pl->lacp_timer_bits &= ~LACP_PERIODIC_TIMEOUT;
465 pl->periodic_timer.id = 0;
466 }
467 mutex_exit(&pl->lacp_timer_lock);
468
469 if (id != 0)
470 (void) untimeout(id);
471 }
472
473 /*
474 * When the timer pops, we arrive here to
475 * clear out LACPDU count as well as transmit an
476 * LACPDU. We then set the periodic state and let
477 * the periodic state machine restart the timer.
478 */
479 static void
periodic_timer_pop(void * data)480 periodic_timer_pop(void *data)
481 {
482 aggr_port_t *portp = data;
483 aggr_lacp_port_t *pl = &portp->lp_lacp;
484
485 mutex_enter(&pl->lacp_timer_lock);
486 pl->lacp_timer_bits |= LACP_PERIODIC_TIMEOUT;
487 cv_broadcast(&pl->lacp_timer_cv);
488 mutex_exit(&pl->lacp_timer_lock);
489 }
490
491 /*
492 * When the timer pops, we arrive here to
493 * clear out LACPDU count as well as transmit an
494 * LACPDU. We then set the periodic state and let
495 * the periodic state machine restart the timer.
496 */
497 static void
periodic_timer_pop_handler(aggr_port_t * portp)498 periodic_timer_pop_handler(aggr_port_t *portp)
499 {
500 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
501
502 portp->lp_lacp_stats.LACPDUsTx = 0;
503
504 /* current timestamp */
505 portp->lp_lacp.time = gethrtime();
506 portp->lp_lacp.NTT = B_TRUE;
507 lacp_xmit_sm(portp);
508
509 /*
510 * Set Periodic State machine state based on the
511 * value of the Partner Operation Port State timeout
512 * bit.
513 */
514 if (portp->lp_lacp.PartnerOperPortState.bit.timeout) {
515 portp->lp_lacp.periodic_timer.val = FAST_PERIODIC_TIME;
516 portp->lp_lacp.sm.periodic_state = LACP_FAST_PERIODIC;
517 } else {
518 portp->lp_lacp.periodic_timer.val = SLOW_PERIODIC_TIME;
519 portp->lp_lacp.sm.periodic_state = LACP_SLOW_PERIODIC;
520 }
521
522 lacp_periodic_sm(portp);
523 }
524
525 /*
526 * Invoked from:
527 * - startup upon aggregation
528 * - when the periodic timer pops
529 * - when the periodic timer value is changed
530 * - when the port is attached or detached
531 * - when LACP mode is changed.
532 */
533 static void
lacp_periodic_sm(aggr_port_t * portp)534 lacp_periodic_sm(aggr_port_t *portp)
535 {
536 lacp_periodic_state_t oldstate = portp->lp_lacp.sm.periodic_state;
537 aggr_lacp_port_t *pl = &portp->lp_lacp;
538
539 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
540
541 /* LACP_OFF state not in specification so check here. */
542 if (!pl->sm.lacp_on) {
543 /* Stop timer whether it is running or not */
544 stop_periodic_timer(portp);
545 pl->sm.periodic_state = LACP_NO_PERIODIC;
546 pl->NTT = B_FALSE;
547 AGGR_LACP_DBG(("lacp_periodic_sm(%d):NO LACP "
548 "%s--->%s\n", portp->lp_linkid,
549 lacp_periodic_str[oldstate],
550 lacp_periodic_str[pl->sm.periodic_state]));
551 return;
552 }
553
554 if (pl->sm.begin || !pl->sm.lacp_enabled ||
555 !pl->sm.port_enabled ||
556 !pl->ActorOperPortState.bit.activity &&
557 !pl->PartnerOperPortState.bit.activity) {
558
559 /* Stop timer whether it is running or not */
560 stop_periodic_timer(portp);
561 pl->sm.periodic_state = LACP_NO_PERIODIC;
562 pl->NTT = B_FALSE;
563 AGGR_LACP_DBG(("lacp_periodic_sm(%d):STOP %s--->%s\n",
564 portp->lp_linkid, lacp_periodic_str[oldstate],
565 lacp_periodic_str[pl->sm.periodic_state]));
566 return;
567 }
568
569 /*
570 * Startup with FAST_PERIODIC_TIME if no previous LACPDU
571 * has been received. Then after we timeout, then it is
572 * possible to go to SLOW_PERIODIC_TIME.
573 */
574 if (pl->sm.periodic_state == LACP_NO_PERIODIC) {
575 pl->periodic_timer.val = FAST_PERIODIC_TIME;
576 pl->sm.periodic_state = LACP_FAST_PERIODIC;
577 } else if ((pl->sm.periodic_state == LACP_SLOW_PERIODIC) &&
578 pl->PartnerOperPortState.bit.timeout) {
579 /*
580 * If we receive a bit indicating we are going to
581 * fast periodic from slow periodic, stop the timer
582 * and let the periodic_timer_pop routine deal
583 * with reseting the periodic state and transmitting
584 * a LACPDU.
585 */
586 stop_periodic_timer(portp);
587 periodic_timer_pop_handler(portp);
588 }
589
590 /* Rearm timer with value provided by partner */
591 start_periodic_timer(portp);
592 }
593
594 /*
595 * This routine transmits an LACPDU if lacp_enabled
596 * is TRUE and if NTT is set.
597 */
598 static void
lacp_xmit_sm(aggr_port_t * portp)599 lacp_xmit_sm(aggr_port_t *portp)
600 {
601 aggr_lacp_port_t *pl = &portp->lp_lacp;
602 size_t len;
603 mblk_t *mp;
604 hrtime_t now, elapsed;
605
606 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
607
608 /* LACP_OFF state not in specification so check here. */
609 if (!pl->sm.lacp_on || !pl->NTT || !portp->lp_started)
610 return;
611
612 /*
613 * Do nothing if LACP has been turned off or if the
614 * periodic state machine is not enabled.
615 */
616 if ((pl->sm.periodic_state == LACP_NO_PERIODIC) ||
617 !pl->sm.lacp_enabled || pl->sm.begin) {
618 pl->NTT = B_FALSE;
619 return;
620 }
621
622 /*
623 * If we have sent 5 Slow packets in the last second, avoid
624 * sending any more here. No more than three LACPDUs may be transmitted
625 * in any Fast_Periodic_Time interval.
626 */
627 if (portp->lp_lacp_stats.LACPDUsTx >= 3) {
628 /*
629 * Grab the current time value and see if
630 * more than 1 second has passed. If so,
631 * reset the timestamp and clear the count.
632 */
633 now = gethrtime();
634 elapsed = now - pl->time;
635 if (elapsed > NSECS_PER_SEC) {
636 portp->lp_lacp_stats.LACPDUsTx = 0;
637 pl->time = now;
638 } else {
639 return;
640 }
641 }
642
643 len = sizeof (lacp_t) + sizeof (struct ether_header);
644 mp = allocb(len, BPRI_MED);
645 if (mp == NULL)
646 return;
647
648 mp->b_wptr = mp->b_rptr + len;
649 bzero(mp->b_rptr, len);
650
651 fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr);
652 fill_lacp_pdu(portp,
653 (lacp_t *)(mp->b_rptr + sizeof (struct ether_header)));
654
655 /* Send the packet over the first TX ring */
656 mp = mac_hwring_send_priv(portp->lp_mch, portp->lp_tx_rings[0], mp);
657 if (mp != NULL)
658 freemsg(mp);
659
660 pl->NTT = B_FALSE;
661 portp->lp_lacp_stats.LACPDUsTx++;
662 }
663
664 /*
665 * Initialize the ethernet header of a LACP packet sent from the specified
666 * port.
667 */
668 static void
fill_lacp_ether(aggr_port_t * port,struct ether_header * ether)669 fill_lacp_ether(aggr_port_t *port, struct ether_header *ether)
670 {
671 bcopy(port->lp_addr, (uint8_t *)&(ether->ether_shost), ETHERADDRL);
672 bcopy(&slow_multicast_addr, (uint8_t *)&(ether->ether_dhost),
673 ETHERADDRL);
674 ether->ether_type = htons(ETHERTYPE_SLOW);
675 }
676
677 static void
fill_lacp_pdu(aggr_port_t * portp,lacp_t * lacp)678 fill_lacp_pdu(aggr_port_t *portp, lacp_t *lacp)
679 {
680 aggr_lacp_port_t *pl = &portp->lp_lacp;
681 aggr_grp_t *aggrp = portp->lp_grp;
682 mac_perim_handle_t pmph;
683
684 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
685 mac_perim_enter_by_mh(portp->lp_mh, &pmph);
686
687 lacp->subtype = LACP_SUBTYPE;
688 lacp->version = LACP_VERSION;
689
690 /*
691 * Actor Information
692 */
693 lacp->actor_info.tlv_type = ACTOR_TLV;
694 lacp->actor_info.information_len = sizeof (link_info_t);
695 lacp->actor_info.system_priority =
696 htons(aggrp->aggr.ActorSystemPriority);
697 bcopy(aggrp->lg_addr, (uchar_t *)&lacp->actor_info.system_id,
698 ETHERADDRL);
699 lacp->actor_info.key = htons(pl->ActorOperPortKey);
700 lacp->actor_info.port_priority = htons(pl->ActorPortPriority);
701 lacp->actor_info.port = htons(pl->ActorPortNumber);
702 lacp->actor_info.state.state = pl->ActorOperPortState.state;
703
704 /*
705 * Partner Information
706 */
707 lacp->partner_info.tlv_type = PARTNER_TLV;
708 lacp->partner_info.information_len = sizeof (link_info_t);
709 lacp->partner_info.system_priority =
710 htons(pl->PartnerOperSysPriority);
711 lacp->partner_info.system_id = pl->PartnerOperSystem;
712 lacp->partner_info.key = htons(pl->PartnerOperKey);
713 lacp->partner_info.port_priority =
714 htons(pl->PartnerOperPortPriority);
715 lacp->partner_info.port = htons(pl->PartnerOperPortNum);
716 lacp->partner_info.state.state = pl->PartnerOperPortState.state;
717
718 /* Collector Information */
719 lacp->tlv_collector = COLLECTOR_TLV;
720 lacp->collector_len = 0x10;
721 lacp->collector_max_delay = htons(aggrp->aggr.CollectorMaxDelay);
722
723 /* Termination Information */
724 lacp->tlv_terminator = TERMINATOR_TLV;
725 lacp->terminator_len = 0x0;
726
727 mac_perim_exit(pmph);
728 }
729
730 /*
731 * lacp_mux_sm - LACP mux state machine
732 * This state machine is invoked from:
733 * - startup upon aggregation
734 * - from the Selection logic
735 * - when the wait_while_timer pops
736 * - when the aggregation MAC address is changed
737 * - when receiving DL_NOTE_LINK_UP/DOWN
738 * - when receiving DL_NOTE_AGGR_AVAIL/UNAVAIL
739 * - when LACP mode is changed.
740 * - when a DL_NOTE_SPEED is received
741 */
742 static void
lacp_mux_sm(aggr_port_t * portp)743 lacp_mux_sm(aggr_port_t *portp)
744 {
745 aggr_grp_t *aggrp = portp->lp_grp;
746 boolean_t NTT_updated = B_FALSE;
747 aggr_lacp_port_t *pl = &portp->lp_lacp;
748 lacp_mux_state_t oldstate = pl->sm.mux_state;
749
750 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
751
752 /* LACP_OFF state not in specification so check here. */
753 if (!pl->sm.lacp_on) {
754 pl->sm.mux_state = LACP_DETACHED;
755 pl->ActorOperPortState.bit.sync = B_FALSE;
756
757 if (pl->ActorOperPortState.bit.collecting ||
758 pl->ActorOperPortState.bit.distributing) {
759 AGGR_LACP_DBG(("trunk link: (%d): "
760 "Collector_Distributor Disabled.\n",
761 portp->lp_linkid));
762 }
763
764 pl->ActorOperPortState.bit.collecting =
765 pl->ActorOperPortState.bit.distributing = B_FALSE;
766 return;
767 }
768
769 if (pl->sm.begin || !pl->sm.lacp_enabled)
770 pl->sm.mux_state = LACP_DETACHED;
771
772 again:
773 /* determine next state, or return if state unchanged */
774 switch (pl->sm.mux_state) {
775 case LACP_DETACHED:
776 if (pl->sm.begin) {
777 break;
778 }
779
780 if ((pl->sm.selected == AGGR_SELECTED) ||
781 (pl->sm.selected == AGGR_STANDBY)) {
782 pl->sm.mux_state = LACP_WAITING;
783 break;
784 }
785 return;
786
787 case LACP_WAITING:
788 if (pl->sm.selected == AGGR_UNSELECTED) {
789 pl->sm.mux_state = LACP_DETACHED;
790 break;
791 }
792
793 if ((pl->sm.selected == AGGR_SELECTED) && aggrp->aggr.ready) {
794 pl->sm.mux_state = LACP_ATTACHED;
795 break;
796 }
797 return;
798
799 case LACP_ATTACHED:
800 if ((pl->sm.selected == AGGR_UNSELECTED) ||
801 (pl->sm.selected == AGGR_STANDBY)) {
802 pl->sm.mux_state = LACP_DETACHED;
803 break;
804 }
805
806 if ((pl->sm.selected == AGGR_SELECTED) &&
807 pl->PartnerOperPortState.bit.sync) {
808 pl->sm.mux_state = LACP_COLLECTING_DISTRIBUTING;
809 break;
810 }
811 return;
812
813 case LACP_COLLECTING_DISTRIBUTING:
814 if ((pl->sm.selected == AGGR_UNSELECTED) ||
815 (pl->sm.selected == AGGR_STANDBY) ||
816 !pl->PartnerOperPortState.bit.sync) {
817 pl->sm.mux_state = LACP_ATTACHED;
818 break;
819 }
820 return;
821 }
822
823 AGGR_LACP_DBG(("lacp_mux_sm(%d):%s--->%s\n",
824 portp->lp_linkid, lacp_mux_str[oldstate],
825 lacp_mux_str[pl->sm.mux_state]));
826
827 /* perform actions on entering a new state */
828 switch (pl->sm.mux_state) {
829 case LACP_DETACHED:
830 if (pl->ActorOperPortState.bit.collecting ||
831 pl->ActorOperPortState.bit.distributing) {
832 AGGR_LACP_DBG(("trunk link: (%d): "
833 "Collector_Distributor Disabled.\n",
834 portp->lp_linkid));
835 }
836
837 pl->ActorOperPortState.bit.sync =
838 pl->ActorOperPortState.bit.collecting = B_FALSE;
839
840 /* Turn OFF Collector_Distributor */
841 aggr_set_coll_dist(portp, B_FALSE);
842
843 pl->ActorOperPortState.bit.distributing = B_FALSE;
844 NTT_updated = B_TRUE;
845 break;
846
847 case LACP_WAITING:
848 start_wait_while_timer(portp);
849 break;
850
851 case LACP_ATTACHED:
852 if (pl->ActorOperPortState.bit.collecting ||
853 pl->ActorOperPortState.bit.distributing) {
854 AGGR_LACP_DBG(("trunk link: (%d): "
855 "Collector_Distributor Disabled.\n",
856 portp->lp_linkid));
857 }
858
859 pl->ActorOperPortState.bit.sync = B_TRUE;
860 pl->ActorOperPortState.bit.collecting = B_FALSE;
861
862 /* Turn OFF Collector_Distributor */
863 aggr_set_coll_dist(portp, B_FALSE);
864
865 pl->ActorOperPortState.bit.distributing = B_FALSE;
866 NTT_updated = B_TRUE;
867 if (pl->PartnerOperPortState.bit.sync) {
868 /*
869 * We had already received an updated sync from
870 * the partner. Attempt to transition to
871 * collecting/distributing now.
872 */
873 goto again;
874 }
875 break;
876
877 case LACP_COLLECTING_DISTRIBUTING:
878 if (!pl->ActorOperPortState.bit.collecting &&
879 !pl->ActorOperPortState.bit.distributing) {
880 AGGR_LACP_DBG(("trunk link: (%d): "
881 "Collector_Distributor Enabled.\n",
882 portp->lp_linkid));
883 }
884 pl->ActorOperPortState.bit.distributing = B_TRUE;
885
886 /* Turn Collector_Distributor back ON */
887 aggr_set_coll_dist(portp, B_TRUE);
888
889 pl->ActorOperPortState.bit.collecting = B_TRUE;
890 NTT_updated = B_TRUE;
891 break;
892 }
893
894 /*
895 * If we updated the state of the NTT variable, then
896 * initiate a LACPDU transmission.
897 */
898 if (NTT_updated) {
899 pl->NTT = B_TRUE;
900 lacp_xmit_sm(portp);
901 }
902 } /* lacp_mux_sm */
903
904
905 static int
receive_marker_pdu(aggr_port_t * portp,mblk_t * mp)906 receive_marker_pdu(aggr_port_t *portp, mblk_t *mp)
907 {
908 marker_pdu_t *markerp = (marker_pdu_t *)mp->b_rptr;
909
910 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
911
912 AGGR_LACP_DBG(("trunk link: (%d): MARKER PDU received:\n",
913 portp->lp_linkid));
914
915 /* LACP_OFF state not in specification so check here. */
916 if (!portp->lp_lacp.sm.lacp_on)
917 return (-1);
918
919 if (MBLKL(mp) < sizeof (marker_pdu_t))
920 return (-1);
921
922 if (markerp->version != MARKER_VERSION) {
923 AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: "
924 "version = %d does not match s/w version %d\n",
925 portp->lp_linkid, markerp->version, MARKER_VERSION));
926 return (-1);
927 }
928
929 if (markerp->tlv_marker == MARKER_RESPONSE_TLV) {
930 /* We do not yet send out MARKER info PDUs */
931 AGGR_LACP_DBG(("trunk link (%d): MARKER RESPONSE PDU: "
932 " MARKER TLV = %d - We don't send out info type!\n",
933 portp->lp_linkid, markerp->tlv_marker));
934 return (-1);
935 }
936
937 if (markerp->tlv_marker != MARKER_INFO_TLV) {
938 AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: "
939 " MARKER TLV = %d \n", portp->lp_linkid,
940 markerp->tlv_marker));
941 return (-1);
942 }
943
944 if (markerp->marker_len != MARKER_INFO_RESPONSE_LENGTH) {
945 AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: "
946 " MARKER length = %d \n", portp->lp_linkid,
947 markerp->marker_len));
948 return (-1);
949 }
950
951 if (markerp->requestor_port != portp->lp_lacp.PartnerOperPortNum) {
952 AGGR_LACP_DBG(("trunk link (%d): MARKER PDU: "
953 " MARKER Port %d not equal to Partner port %d\n",
954 portp->lp_linkid, markerp->requestor_port,
955 portp->lp_lacp.PartnerOperPortNum));
956 return (-1);
957 }
958
959 if (ether_cmp(&markerp->system_id,
960 &portp->lp_lacp.PartnerOperSystem) != 0) {
961 AGGR_LACP_DBG(("trunk link (%d): MARKER PDU: "
962 " MARKER MAC not equal to Partner MAC\n",
963 portp->lp_linkid));
964 return (-1);
965 }
966
967 /*
968 * Turn into Marker Response PDU
969 * and return mblk to sending system
970 */
971 markerp->tlv_marker = MARKER_RESPONSE_TLV;
972
973 /* reuse the space that was used by received ethernet header */
974 ASSERT(MBLKHEAD(mp) >= sizeof (struct ether_header));
975 mp->b_rptr -= sizeof (struct ether_header);
976 fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr);
977 return (0);
978 }
979
980 /*
981 * Update the LACP mode (off, active, or passive) of the specified group.
982 */
983 void
aggr_lacp_update_mode(aggr_grp_t * grp,aggr_lacp_mode_t mode)984 aggr_lacp_update_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode)
985 {
986 aggr_lacp_mode_t old_mode = grp->lg_lacp_mode;
987 aggr_port_t *port;
988
989 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
990 ASSERT(!grp->lg_closing);
991
992 if (mode == old_mode)
993 return;
994
995 grp->lg_lacp_mode = mode;
996
997 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
998 port->lp_lacp.ActorAdminPortState.bit.activity =
999 port->lp_lacp.ActorOperPortState.bit.activity =
1000 (mode == AGGR_LACP_ACTIVE);
1001
1002 if (old_mode == AGGR_LACP_OFF) {
1003 /* OFF -> {PASSIVE,ACTIVE} */
1004 /* turn OFF Collector_Distributor */
1005 aggr_set_coll_dist(port, B_FALSE);
1006 lacp_on(port);
1007 } else if (mode == AGGR_LACP_OFF) {
1008 /* {PASSIVE,ACTIVE} -> OFF */
1009 lacp_off(port);
1010 /* Turn ON Collector_Distributor */
1011 aggr_set_coll_dist(port, B_TRUE);
1012 } else {
1013 /* PASSIVE->ACTIVE or ACTIVE->PASSIVE */
1014 port->lp_lacp.sm.begin = B_TRUE;
1015 lacp_mux_sm(port);
1016 lacp_periodic_sm(port);
1017
1018 /* kick off state machines */
1019 lacp_receive_sm(port, NULL);
1020 lacp_mux_sm(port);
1021 }
1022 }
1023 }
1024
1025
1026 /*
1027 * Update the LACP timer (short or long) of the specified group.
1028 */
1029 void
aggr_lacp_update_timer(aggr_grp_t * grp,aggr_lacp_timer_t timer)1030 aggr_lacp_update_timer(aggr_grp_t *grp, aggr_lacp_timer_t timer)
1031 {
1032 aggr_port_t *port;
1033
1034 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1035
1036 if (timer == grp->aggr.PeriodicTimer)
1037 return;
1038
1039 grp->aggr.PeriodicTimer = timer;
1040
1041 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1042 port->lp_lacp.ActorAdminPortState.bit.timeout =
1043 port->lp_lacp.ActorOperPortState.bit.timeout =
1044 (timer == AGGR_LACP_TIMER_SHORT);
1045 }
1046 }
1047
1048 void
aggr_port_lacp_set_mode(aggr_grp_t * grp,aggr_port_t * port)1049 aggr_port_lacp_set_mode(aggr_grp_t *grp, aggr_port_t *port)
1050 {
1051 aggr_lacp_mode_t mode;
1052 aggr_lacp_timer_t timer;
1053
1054 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1055
1056 mode = grp->lg_lacp_mode;
1057 timer = grp->aggr.PeriodicTimer;
1058
1059 port->lp_lacp.ActorAdminPortState.bit.activity =
1060 port->lp_lacp.ActorOperPortState.bit.activity =
1061 (mode == AGGR_LACP_ACTIVE);
1062
1063 port->lp_lacp.ActorAdminPortState.bit.timeout =
1064 port->lp_lacp.ActorOperPortState.bit.timeout =
1065 (timer == AGGR_LACP_TIMER_SHORT);
1066
1067 if (mode == AGGR_LACP_OFF) {
1068 /* Turn ON Collector_Distributor */
1069 aggr_set_coll_dist(port, B_TRUE);
1070 } else { /* LACP_ACTIVE/PASSIVE */
1071 lacp_on(port);
1072 }
1073 }
1074
1075 /*
1076 * Sets the initial LACP mode (off, active, passive) and LACP timer
1077 * (short, long) of the specified group.
1078 */
1079 void
aggr_lacp_set_mode(aggr_grp_t * grp,aggr_lacp_mode_t mode,aggr_lacp_timer_t timer)1080 aggr_lacp_set_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode,
1081 aggr_lacp_timer_t timer)
1082 {
1083 aggr_port_t *port;
1084
1085 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1086
1087 grp->lg_lacp_mode = mode;
1088 grp->aggr.PeriodicTimer = timer;
1089
1090 for (port = grp->lg_ports; port != NULL; port = port->lp_next)
1091 aggr_port_lacp_set_mode(grp, port);
1092 }
1093
1094 /*
1095 * Verify that the Partner MAC and Key recorded by the specified
1096 * port are not found in other ports that are not part of our
1097 * aggregation. Returns B_TRUE if such a port is found, B_FALSE
1098 * otherwise.
1099 */
1100 static boolean_t
lacp_misconfig_check(aggr_port_t * portp)1101 lacp_misconfig_check(aggr_port_t *portp)
1102 {
1103 aggr_grp_t *grp = portp->lp_grp;
1104 lacp_sel_ports_t *cport;
1105
1106 mutex_enter(&lacp_sel_lock);
1107
1108 for (cport = sel_ports; cport != NULL; cport = cport->sp_next) {
1109
1110 /* skip entries of the group of the port being checked */
1111 if (cport->sp_grp_linkid == grp->lg_linkid)
1112 continue;
1113
1114 if ((ether_cmp(&cport->sp_partner_system,
1115 &grp->aggr.PartnerSystem) == 0) &&
1116 (cport->sp_partner_key == grp->aggr.PartnerOperAggrKey)) {
1117 char mac_str[ETHERADDRL*3];
1118 struct ether_addr *mac = &cport->sp_partner_system;
1119
1120 /*
1121 * The Partner port information is already in use
1122 * by ports in another aggregation so disable this
1123 * port.
1124 */
1125
1126 (void) snprintf(mac_str, sizeof (mac_str),
1127 "%x:%x:%x:%x:%x:%x",
1128 mac->ether_addr_octet[0], mac->ether_addr_octet[1],
1129 mac->ether_addr_octet[2], mac->ether_addr_octet[3],
1130 mac->ether_addr_octet[4], mac->ether_addr_octet[5]);
1131
1132 portp->lp_lacp.sm.selected = AGGR_UNSELECTED;
1133
1134 cmn_err(CE_NOTE, "aggr %d port %d: Port Partner "
1135 "MAC %s and key %d in use on aggregation %d "
1136 "port %d\n", grp->lg_linkid, portp->lp_linkid,
1137 mac_str, portp->lp_lacp.PartnerOperKey,
1138 cport->sp_grp_linkid, cport->sp_linkid);
1139 break;
1140 }
1141 }
1142
1143 mutex_exit(&lacp_sel_lock);
1144 return (cport != NULL);
1145 }
1146
1147 /*
1148 * Remove the specified port from the list of selected ports.
1149 */
1150 static void
lacp_sel_ports_del(aggr_port_t * portp)1151 lacp_sel_ports_del(aggr_port_t *portp)
1152 {
1153 lacp_sel_ports_t *cport, **prev = NULL;
1154
1155 mutex_enter(&lacp_sel_lock);
1156
1157 prev = &sel_ports;
1158 for (cport = sel_ports; cport != NULL; prev = &cport->sp_next,
1159 cport = cport->sp_next) {
1160 if (portp->lp_linkid == cport->sp_linkid)
1161 break;
1162 }
1163
1164 if (cport == NULL) {
1165 mutex_exit(&lacp_sel_lock);
1166 return;
1167 }
1168
1169 *prev = cport->sp_next;
1170 kmem_free(cport, sizeof (*cport));
1171
1172 mutex_exit(&lacp_sel_lock);
1173 }
1174
1175 /*
1176 * Add the specified port to the list of selected ports. Returns B_FALSE
1177 * if the operation could not be performed due to an memory allocation
1178 * error.
1179 */
1180 static boolean_t
lacp_sel_ports_add(aggr_port_t * portp)1181 lacp_sel_ports_add(aggr_port_t *portp)
1182 {
1183 lacp_sel_ports_t *new_port;
1184 lacp_sel_ports_t *cport, **last;
1185
1186 mutex_enter(&lacp_sel_lock);
1187
1188 /* check if port is already in the list */
1189 last = &sel_ports;
1190 for (cport = sel_ports; cport != NULL;
1191 last = &cport->sp_next, cport = cport->sp_next) {
1192 if (portp->lp_linkid == cport->sp_linkid) {
1193 ASSERT(cport->sp_partner_key ==
1194 portp->lp_lacp.PartnerOperKey);
1195 ASSERT(ether_cmp(&cport->sp_partner_system,
1196 &portp->lp_lacp.PartnerOperSystem) == 0);
1197
1198 mutex_exit(&lacp_sel_lock);
1199 return (B_TRUE);
1200 }
1201 }
1202
1203 /* create and initialize new entry */
1204 new_port = kmem_zalloc(sizeof (lacp_sel_ports_t), KM_NOSLEEP);
1205 if (new_port == NULL) {
1206 mutex_exit(&lacp_sel_lock);
1207 return (B_FALSE);
1208 }
1209
1210 new_port->sp_grp_linkid = portp->lp_grp->lg_linkid;
1211 bcopy(&portp->lp_lacp.PartnerOperSystem,
1212 &new_port->sp_partner_system, sizeof (new_port->sp_partner_system));
1213 new_port->sp_partner_key = portp->lp_lacp.PartnerOperKey;
1214 new_port->sp_linkid = portp->lp_linkid;
1215
1216 *last = new_port;
1217
1218 mutex_exit(&lacp_sel_lock);
1219 return (B_TRUE);
1220 }
1221
1222 /*
1223 * lacp_selection_logic - LACP selection logic
1224 * Sets the selected variable on a per port basis
1225 * and sets Ready when all waiting ports are ready
1226 * to go online.
1227 *
1228 * parameters:
1229 * - portp - instance this applies to.
1230 *
1231 * invoked:
1232 * - when initialization is needed
1233 * - when UNSELECTED is set from the lacp_receive_sm() in LACP_CURRENT state
1234 * - When the lacp_receive_sm goes to the LACP_DEFAULTED state
1235 * - every time the wait_while_timer pops
1236 * - everytime we turn LACP on/off
1237 */
1238 static void
lacp_selection_logic(aggr_port_t * portp)1239 lacp_selection_logic(aggr_port_t *portp)
1240 {
1241 aggr_port_t *tpp;
1242 aggr_grp_t *aggrp = portp->lp_grp;
1243 int ports_waiting;
1244 boolean_t reset_mac = B_FALSE;
1245 aggr_lacp_port_t *pl = &portp->lp_lacp;
1246
1247 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
1248
1249 /* LACP_OFF state not in specification so check here. */
1250 if (!pl->sm.lacp_on) {
1251 lacp_port_unselect(portp);
1252 aggrp->aggr.ready = B_FALSE;
1253 lacp_mux_sm(portp);
1254 return;
1255 }
1256
1257 if (pl->sm.begin || !pl->sm.lacp_enabled ||
1258 (portp->lp_state != AGGR_PORT_STATE_ATTACHED)) {
1259
1260 AGGR_LACP_DBG(("lacp_selection_logic:(%d): "
1261 "selected %d-->%d (begin=%d, lacp_enabled = %d, "
1262 "lp_state=%d)\n", portp->lp_linkid, pl->sm.selected,
1263 AGGR_UNSELECTED, pl->sm.begin, pl->sm.lacp_enabled,
1264 portp->lp_state));
1265
1266 lacp_port_unselect(portp);
1267 aggrp->aggr.ready = B_FALSE;
1268 lacp_mux_sm(portp);
1269 return;
1270 }
1271
1272 /*
1273 * If LACP is not enabled then selected is never set.
1274 */
1275 if (!pl->sm.lacp_enabled) {
1276 AGGR_LACP_DBG(("lacp_selection_logic:(%d): selected %d-->%d\n",
1277 portp->lp_linkid, pl->sm.selected, AGGR_UNSELECTED));
1278
1279 lacp_port_unselect(portp);
1280 lacp_mux_sm(portp);
1281 return;
1282 }
1283
1284 /*
1285 * Check if the Partner MAC or Key are zero. If so, we have
1286 * not received any LACP info or it has expired and the
1287 * receive machine is in the LACP_DEFAULTED state.
1288 */
1289 if (ether_cmp(&pl->PartnerOperSystem, ðerzeroaddr) == 0 ||
1290 (pl->PartnerOperKey == 0)) {
1291
1292 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1293 if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1294 ðerzeroaddr) != 0 &&
1295 (tpp->lp_lacp.PartnerOperKey != 0))
1296 break;
1297 }
1298
1299 /*
1300 * If all ports have no key or aggregation address,
1301 * then clear the negotiated Partner MAC and key.
1302 */
1303 if (tpp == NULL) {
1304 /* Clear the aggregation Partner MAC and key */
1305 aggrp->aggr.PartnerSystem = etherzeroaddr;
1306 aggrp->aggr.PartnerOperAggrKey = 0;
1307 }
1308
1309 return;
1310 }
1311
1312 /*
1313 * Insure that at least one port in the aggregation
1314 * matches the Partner aggregation MAC and key. If not,
1315 * then clear the aggregation MAC and key. Later we will
1316 * set the Partner aggregation MAC and key to that of the
1317 * current port's Partner MAC and key.
1318 */
1319 if (ether_cmp(&pl->PartnerOperSystem,
1320 &aggrp->aggr.PartnerSystem) != 0 ||
1321 (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) {
1322
1323 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1324 if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1325 &aggrp->aggr.PartnerSystem) == 0 &&
1326 (tpp->lp_lacp.PartnerOperKey ==
1327 aggrp->aggr.PartnerOperAggrKey)) {
1328 /* Set aggregation Partner MAC and key */
1329 aggrp->aggr.PartnerSystem =
1330 pl->PartnerOperSystem;
1331 aggrp->aggr.PartnerOperAggrKey =
1332 pl->PartnerOperKey;
1333 break;
1334 }
1335 }
1336
1337 if (tpp == NULL) {
1338 /* Clear the aggregation Partner MAC and key */
1339 aggrp->aggr.PartnerSystem = etherzeroaddr;
1340 aggrp->aggr.PartnerOperAggrKey = 0;
1341 reset_mac = B_TRUE;
1342 }
1343 }
1344
1345 /*
1346 * If our Actor MAC is found in the Partner MAC
1347 * on this port then we have a loopback misconfiguration.
1348 */
1349 if (ether_cmp(&pl->PartnerOperSystem,
1350 (struct ether_addr *)&aggrp->lg_addr) == 0) {
1351 cmn_err(CE_NOTE, "trunk link: (%d): Loopback condition.\n",
1352 portp->lp_linkid);
1353
1354 lacp_port_unselect(portp);
1355 lacp_mux_sm(portp);
1356 return;
1357 }
1358
1359 /*
1360 * If our Partner MAC and Key are found on any other
1361 * ports that are not in our aggregation, we have
1362 * a misconfiguration.
1363 */
1364 if (lacp_misconfig_check(portp)) {
1365 lacp_mux_sm(portp);
1366 return;
1367 }
1368
1369 /*
1370 * If the Aggregation Partner MAC and Key have not been
1371 * set, then this is either the first port or the aggregation
1372 * MAC and key have been reset. In either case we must set
1373 * the values of the Partner MAC and key.
1374 */
1375 if (ether_cmp(&aggrp->aggr.PartnerSystem, ðerzeroaddr) == 0 &&
1376 (aggrp->aggr.PartnerOperAggrKey == 0)) {
1377 /* Set aggregation Partner MAC and key */
1378 aggrp->aggr.PartnerSystem = pl->PartnerOperSystem;
1379 aggrp->aggr.PartnerOperAggrKey = pl->PartnerOperKey;
1380
1381 /*
1382 * If we reset Partner aggregation MAC, then restart
1383 * selection_logic on ports that match new MAC address.
1384 */
1385 if (reset_mac) {
1386 for (tpp = aggrp->lg_ports; tpp; tpp =
1387 tpp->lp_next) {
1388 if (tpp == portp)
1389 continue;
1390 if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1391 &aggrp->aggr.PartnerSystem) == 0 &&
1392 (tpp->lp_lacp.PartnerOperKey ==
1393 aggrp->aggr.PartnerOperAggrKey))
1394 lacp_selection_logic(tpp);
1395 }
1396 }
1397 } else if (ether_cmp(&pl->PartnerOperSystem,
1398 &aggrp->aggr.PartnerSystem) != 0 ||
1399 (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) {
1400 /*
1401 * The Partner port information does not match
1402 * that of the other ports in the aggregation
1403 * so disable this port.
1404 */
1405 lacp_port_unselect(portp);
1406
1407 cmn_err(CE_NOTE, "trunk link: (%d): Port Partner MAC "
1408 "or key (%d) incompatible with Aggregation Partner "
1409 "MAC or key (%d)\n", portp->lp_linkid, pl->PartnerOperKey,
1410 aggrp->aggr.PartnerOperAggrKey);
1411
1412 lacp_mux_sm(portp);
1413 return;
1414 }
1415
1416 /* If we get to here, automatically set selected */
1417 if (pl->sm.selected != AGGR_SELECTED) {
1418 AGGR_LACP_DBG(("lacp_selection_logic:(%d): "
1419 "selected %d-->%d\n", portp->lp_linkid,
1420 pl->sm.selected, AGGR_SELECTED));
1421 if (!lacp_port_select(portp))
1422 return;
1423 lacp_mux_sm(portp);
1424 }
1425
1426 /*
1427 * From this point onward we have selected the port
1428 * and are simply checking if the Ready flag should
1429 * be set.
1430 */
1431
1432 /*
1433 * If at least two ports are waiting to aggregate
1434 * and ready_n is set on all ports waiting to aggregate
1435 * then set READY for the aggregation.
1436 */
1437
1438 ports_waiting = 0;
1439
1440 if (!aggrp->aggr.ready) {
1441 /*
1442 * If all ports in the aggregation have received compatible
1443 * partner information and they match up correctly with the
1444 * switch, there is no need to wait for all the
1445 * wait_while_timers to pop.
1446 */
1447 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1448 if (((tpp->lp_lacp.sm.mux_state == LACP_WAITING) ||
1449 tpp->lp_lacp.sm.begin) &&
1450 !tpp->lp_lacp.PartnerOperPortState.bit.sync) {
1451 /* Add up ports uninitialized or waiting */
1452 ports_waiting++;
1453 if (!tpp->lp_lacp.sm.ready_n) {
1454 DTRACE_PROBE1(port___not__ready,
1455 aggr_port_t *, tpp);
1456 return;
1457 }
1458 }
1459 }
1460 }
1461
1462 if (aggrp->aggr.ready) {
1463 AGGR_LACP_DBG(("lacp_selection_logic:(%d): "
1464 "aggr.ready already set\n", portp->lp_linkid));
1465 lacp_mux_sm(portp);
1466 } else {
1467 AGGR_LACP_DBG(("lacp_selection_logic:(%d): Ready %d-->%d\n",
1468 portp->lp_linkid, aggrp->aggr.ready, B_TRUE));
1469 aggrp->aggr.ready = B_TRUE;
1470
1471 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next)
1472 lacp_mux_sm(tpp);
1473 }
1474
1475 }
1476
1477 /*
1478 * wait_while_timer_pop - When the timer pops, we arrive here to
1479 * set ready_n and trigger the selection logic.
1480 */
1481 static void
wait_while_timer_pop(void * data)1482 wait_while_timer_pop(void *data)
1483 {
1484 aggr_port_t *portp = data;
1485 aggr_lacp_port_t *pl = &portp->lp_lacp;
1486
1487 mutex_enter(&pl->lacp_timer_lock);
1488 pl->lacp_timer_bits |= LACP_WAIT_WHILE_TIMEOUT;
1489 cv_broadcast(&pl->lacp_timer_cv);
1490 mutex_exit(&pl->lacp_timer_lock);
1491 }
1492
1493 /*
1494 * wait_while_timer_pop_handler - When the timer pops, we arrive here to
1495 * set ready_n and trigger the selection logic.
1496 */
1497 static void
wait_while_timer_pop_handler(aggr_port_t * portp)1498 wait_while_timer_pop_handler(aggr_port_t *portp)
1499 {
1500 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1501
1502 AGGR_LACP_DBG(("trunk link:(%d): wait_while_timer pop \n",
1503 portp->lp_linkid));
1504 portp->lp_lacp.sm.ready_n = B_TRUE;
1505
1506 lacp_selection_logic(portp);
1507 }
1508
1509 static void
start_wait_while_timer(aggr_port_t * portp)1510 start_wait_while_timer(aggr_port_t *portp)
1511 {
1512 aggr_lacp_port_t *pl = &portp->lp_lacp;
1513
1514 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1515
1516 mutex_enter(&pl->lacp_timer_lock);
1517 if (pl->wait_while_timer.id == 0) {
1518 pl->wait_while_timer.id =
1519 timeout(wait_while_timer_pop, portp,
1520 drv_usectohz(1000000 *
1521 portp->lp_lacp.wait_while_timer.val));
1522 }
1523 mutex_exit(&pl->lacp_timer_lock);
1524 }
1525
1526
1527 static void
stop_wait_while_timer(aggr_port_t * portp)1528 stop_wait_while_timer(aggr_port_t *portp)
1529 {
1530 aggr_lacp_port_t *pl = &portp->lp_lacp;
1531 timeout_id_t id;
1532
1533 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1534
1535 mutex_enter(&pl->lacp_timer_lock);
1536 if ((id = pl->wait_while_timer.id) != 0) {
1537 pl->lacp_timer_bits &= ~LACP_WAIT_WHILE_TIMEOUT;
1538 pl->wait_while_timer.id = 0;
1539 }
1540 mutex_exit(&pl->lacp_timer_lock);
1541
1542 if (id != 0)
1543 (void) untimeout(id);
1544 }
1545
1546 /*
1547 * Invoked when a port has been attached to a group.
1548 * Complete the processing that couldn't be finished from lacp_on()
1549 * because the port was not started. We know that the link is full
1550 * duplex and ON, otherwise it wouldn't be attached.
1551 */
1552 void
aggr_lacp_port_attached(aggr_port_t * portp)1553 aggr_lacp_port_attached(aggr_port_t *portp)
1554 {
1555 aggr_grp_t *grp = portp->lp_grp;
1556 aggr_lacp_port_t *pl = &portp->lp_lacp;
1557
1558 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1559 ASSERT(MAC_PERIM_HELD(portp->lp_mh));
1560 ASSERT(portp->lp_state == AGGR_PORT_STATE_ATTACHED);
1561
1562 AGGR_LACP_DBG(("aggr_lacp_port_attached: port %d\n",
1563 portp->lp_linkid));
1564
1565 portp->lp_lacp.sm.port_enabled = B_TRUE; /* link on */
1566
1567 if (grp->lg_lacp_mode == AGGR_LACP_OFF)
1568 return;
1569
1570 pl->sm.lacp_enabled = B_TRUE;
1571 pl->ActorOperPortState.bit.aggregation = B_TRUE;
1572 pl->sm.begin = B_TRUE;
1573
1574 lacp_receive_sm(portp, NULL);
1575 lacp_mux_sm(portp);
1576
1577 /* Enable Multicast Slow Protocol address */
1578 aggr_lacp_mcast_on(portp);
1579
1580 /* periodic_sm is started up from the receive machine */
1581 lacp_selection_logic(portp);
1582 }
1583
1584 /*
1585 * Invoked when a port has been detached from a group. Turn off
1586 * LACP processing if it was enabled.
1587 */
1588 void
aggr_lacp_port_detached(aggr_port_t * portp)1589 aggr_lacp_port_detached(aggr_port_t *portp)
1590 {
1591 aggr_grp_t *grp = portp->lp_grp;
1592
1593 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1594 ASSERT(MAC_PERIM_HELD(portp->lp_mh));
1595
1596 AGGR_LACP_DBG(("aggr_lacp_port_detached: port %d\n",
1597 portp->lp_linkid));
1598
1599 portp->lp_lacp.sm.port_enabled = B_FALSE;
1600
1601 if (grp->lg_lacp_mode == AGGR_LACP_OFF)
1602 return;
1603
1604 portp->lp_lacp.sm.lacp_enabled = B_FALSE;
1605 lacp_selection_logic(portp);
1606 lacp_mux_sm(portp);
1607 lacp_periodic_sm(portp);
1608
1609 /*
1610 * Disable Slow Protocol Timers.
1611 */
1612 stop_periodic_timer(portp);
1613 stop_current_while_timer(portp);
1614 stop_wait_while_timer(portp);
1615
1616 /* Disable Multicast Slow Protocol address */
1617 aggr_lacp_mcast_off(portp);
1618 aggr_set_coll_dist(portp, B_FALSE);
1619 }
1620
1621 /*
1622 * Enable Slow Protocol LACP and Marker PDUs.
1623 */
1624 static void
lacp_on(aggr_port_t * portp)1625 lacp_on(aggr_port_t *portp)
1626 {
1627 aggr_lacp_port_t *pl = &portp->lp_lacp;
1628 mac_perim_handle_t mph;
1629
1630 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1631
1632 mac_perim_enter_by_mh(portp->lp_mh, &mph);
1633
1634 /*
1635 * Reset the state machines and Partner operational
1636 * information. Careful to not reset things like
1637 * our link state.
1638 */
1639 lacp_reset_port(portp);
1640 pl->sm.lacp_on = B_TRUE;
1641
1642 AGGR_LACP_DBG(("lacp_on:(%d): \n", portp->lp_linkid));
1643
1644 if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) {
1645 pl->sm.port_enabled = B_TRUE;
1646 pl->sm.lacp_enabled = B_TRUE;
1647 pl->ActorOperPortState.bit.aggregation = B_TRUE;
1648 }
1649
1650 lacp_receive_sm(portp, NULL);
1651 lacp_mux_sm(portp);
1652
1653 if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) {
1654 /* Enable Multicast Slow Protocol address */
1655 aggr_lacp_mcast_on(portp);
1656
1657 /* periodic_sm is started up from the receive machine */
1658 lacp_selection_logic(portp);
1659 }
1660 done:
1661 mac_perim_exit(mph);
1662 } /* lacp_on */
1663
1664 /* Disable Slow Protocol LACP and Marker PDUs */
1665 static void
lacp_off(aggr_port_t * portp)1666 lacp_off(aggr_port_t *portp)
1667 {
1668 aggr_lacp_port_t *pl = &portp->lp_lacp;
1669 mac_perim_handle_t mph;
1670
1671 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1672 mac_perim_enter_by_mh(portp->lp_mh, &mph);
1673
1674 pl->sm.lacp_on = B_FALSE;
1675
1676 AGGR_LACP_DBG(("lacp_off:(%d): \n", portp->lp_linkid));
1677
1678 if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) {
1679 /*
1680 * Disable Slow Protocol Timers.
1681 */
1682 stop_periodic_timer(portp);
1683 stop_current_while_timer(portp);
1684 stop_wait_while_timer(portp);
1685
1686 /* Disable Multicast Slow Protocol address */
1687 aggr_lacp_mcast_off(portp);
1688
1689 pl->sm.port_enabled = B_FALSE;
1690 pl->sm.lacp_enabled = B_FALSE;
1691 pl->ActorOperPortState.bit.aggregation = B_FALSE;
1692 }
1693
1694 lacp_mux_sm(portp);
1695 lacp_periodic_sm(portp);
1696 lacp_selection_logic(portp);
1697
1698 /* Turn OFF Collector_Distributor */
1699 aggr_set_coll_dist(portp, B_FALSE);
1700
1701 lacp_reset_port(portp);
1702 mac_perim_exit(mph);
1703 }
1704
1705
1706 static boolean_t
valid_lacp_pdu(aggr_port_t * portp,lacp_t * lacp)1707 valid_lacp_pdu(aggr_port_t *portp, lacp_t *lacp)
1708 {
1709 /*
1710 * 43.4.12 - "a Receive machine shall not validate
1711 * the Version Number, TLV_type, or Reserved fields in received
1712 * LACPDUs."
1713 * ... "a Receive machine may validate the Actor_Information_Length,
1714 * Partner_Information_Length, Collector_Information_Length,
1715 * or Terminator_Length fields."
1716 */
1717 if ((lacp->actor_info.information_len != sizeof (link_info_t)) ||
1718 (lacp->partner_info.information_len != sizeof (link_info_t)) ||
1719 (lacp->collector_len != LACP_COLLECTOR_INFO_LEN) ||
1720 (lacp->terminator_len != LACP_TERMINATOR_INFO_LEN)) {
1721 AGGR_LACP_DBG(("trunk link (%d): Malformed LACPDU: "
1722 " Terminator Length = %d \n", portp->lp_linkid,
1723 lacp->terminator_len));
1724 return (B_FALSE);
1725 }
1726
1727 return (B_TRUE);
1728 }
1729
1730
1731 static void
start_current_while_timer(aggr_port_t * portp,uint_t time)1732 start_current_while_timer(aggr_port_t *portp, uint_t time)
1733 {
1734 aggr_lacp_port_t *pl = &portp->lp_lacp;
1735
1736 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1737
1738 mutex_enter(&pl->lacp_timer_lock);
1739 if (pl->current_while_timer.id == 0) {
1740 if (time > 0)
1741 pl->current_while_timer.val = time;
1742 else if (pl->ActorOperPortState.bit.timeout)
1743 pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
1744 else
1745 pl->current_while_timer.val = LONG_TIMEOUT_TIME;
1746
1747 pl->current_while_timer.id =
1748 timeout(current_while_timer_pop, portp,
1749 drv_usectohz((clock_t)1000000 *
1750 (clock_t)portp->lp_lacp.current_while_timer.val));
1751 }
1752 mutex_exit(&pl->lacp_timer_lock);
1753 }
1754
1755
1756 static void
stop_current_while_timer(aggr_port_t * portp)1757 stop_current_while_timer(aggr_port_t *portp)
1758 {
1759 aggr_lacp_port_t *pl = &portp->lp_lacp;
1760 timeout_id_t id;
1761
1762 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1763
1764 mutex_enter(&pl->lacp_timer_lock);
1765 if ((id = pl->current_while_timer.id) != 0) {
1766 pl->lacp_timer_bits &= ~LACP_CURRENT_WHILE_TIMEOUT;
1767 pl->current_while_timer.id = 0;
1768 }
1769 mutex_exit(&pl->lacp_timer_lock);
1770
1771 if (id != 0)
1772 (void) untimeout(id);
1773 }
1774
1775 static void
current_while_timer_pop(void * data)1776 current_while_timer_pop(void *data)
1777 {
1778 aggr_port_t *portp = (aggr_port_t *)data;
1779 aggr_lacp_port_t *pl = &portp->lp_lacp;
1780
1781 mutex_enter(&pl->lacp_timer_lock);
1782 pl->lacp_timer_bits |= LACP_CURRENT_WHILE_TIMEOUT;
1783 cv_broadcast(&pl->lacp_timer_cv);
1784 mutex_exit(&pl->lacp_timer_lock);
1785 }
1786
1787 static void
current_while_timer_pop_handler(aggr_port_t * portp)1788 current_while_timer_pop_handler(aggr_port_t *portp)
1789 {
1790 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1791
1792 AGGR_LACP_DBG(("trunk link:(%d): current_while_timer "
1793 "pop id=%p\n", portp->lp_linkid,
1794 portp->lp_lacp.current_while_timer.id));
1795
1796 lacp_receive_sm(portp, NULL);
1797 }
1798
1799 /*
1800 * record_Default - Simply copies over administrative values
1801 * to the partner operational values, and sets our state to indicate we
1802 * are using defaulted values.
1803 */
1804 static void
record_Default(aggr_port_t * portp)1805 record_Default(aggr_port_t *portp)
1806 {
1807 aggr_lacp_port_t *pl = &portp->lp_lacp;
1808
1809 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1810
1811 pl->PartnerOperPortNum = pl->PartnerAdminPortNum;
1812 pl->PartnerOperPortPriority = pl->PartnerAdminPortPriority;
1813 pl->PartnerOperSystem = pl->PartnerAdminSystem;
1814 pl->PartnerOperSysPriority = pl->PartnerAdminSysPriority;
1815 pl->PartnerOperKey = pl->PartnerAdminKey;
1816 pl->PartnerOperPortState.state = pl->PartnerAdminPortState.state;
1817
1818 pl->ActorOperPortState.bit.defaulted = B_TRUE;
1819 }
1820
1821
1822 /* Returns B_TRUE on sync value changing */
1823 static boolean_t
record_PDU(aggr_port_t * portp,lacp_t * lacp)1824 record_PDU(aggr_port_t *portp, lacp_t *lacp)
1825 {
1826 aggr_grp_t *aggrp = portp->lp_grp;
1827 aggr_lacp_port_t *pl = &portp->lp_lacp;
1828 uint8_t save_sync;
1829
1830 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
1831
1832 /*
1833 * Partner Information
1834 */
1835 pl->PartnerOperPortNum = ntohs(lacp->actor_info.port);
1836 pl->PartnerOperPortPriority =
1837 ntohs(lacp->actor_info.port_priority);
1838 pl->PartnerOperSystem = lacp->actor_info.system_id;
1839 pl->PartnerOperSysPriority =
1840 htons(lacp->actor_info.system_priority);
1841 pl->PartnerOperKey = ntohs(lacp->actor_info.key);
1842
1843 /* All state info except for Synchronization */
1844 save_sync = pl->PartnerOperPortState.bit.sync;
1845 pl->PartnerOperPortState.state = lacp->actor_info.state.state;
1846
1847 /* Defaulted set to FALSE */
1848 pl->ActorOperPortState.bit.defaulted = B_FALSE;
1849
1850 /*
1851 * 43.4.9 - (Partner_Port, Partner_Port_Priority, Partner_system,
1852 * Partner_System_Priority, Partner_Key, and
1853 * Partner_State.Aggregation) are compared to the
1854 * corresponding operations paramters values for
1855 * the Actor. If these are equal, or if this is
1856 * an individual link, we are synchronized.
1857 */
1858 if (((ntohs(lacp->partner_info.port) == pl->ActorPortNumber) &&
1859 (ntohs(lacp->partner_info.port_priority) ==
1860 pl->ActorPortPriority) &&
1861 (ether_cmp(&lacp->partner_info.system_id,
1862 (struct ether_addr *)&aggrp->lg_addr) == 0) &&
1863 (ntohs(lacp->partner_info.system_priority) ==
1864 aggrp->aggr.ActorSystemPriority) &&
1865 (ntohs(lacp->partner_info.key) == pl->ActorOperPortKey) &&
1866 (lacp->partner_info.state.bit.aggregation ==
1867 pl->ActorOperPortState.bit.aggregation)) ||
1868 (!lacp->actor_info.state.bit.aggregation)) {
1869
1870 pl->PartnerOperPortState.bit.sync =
1871 lacp->actor_info.state.bit.sync;
1872 } else {
1873 pl->PartnerOperPortState.bit.sync = B_FALSE;
1874 }
1875
1876 if (save_sync != pl->PartnerOperPortState.bit.sync) {
1877 AGGR_LACP_DBG(("record_PDU:(%d): partner sync "
1878 "%d -->%d\n", portp->lp_linkid, save_sync,
1879 pl->PartnerOperPortState.bit.sync));
1880 return (B_TRUE);
1881 } else {
1882 return (B_FALSE);
1883 }
1884 }
1885
1886
1887 /*
1888 * update_selected - If any of the Partner parameters has
1889 * changed from a previous value, then
1890 * unselect the link from the aggregator.
1891 */
1892 static boolean_t
update_selected(aggr_port_t * portp,lacp_t * lacp)1893 update_selected(aggr_port_t *portp, lacp_t *lacp)
1894 {
1895 aggr_lacp_port_t *pl = &portp->lp_lacp;
1896
1897 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1898
1899 if ((pl->PartnerOperPortNum != ntohs(lacp->actor_info.port)) ||
1900 (pl->PartnerOperPortPriority !=
1901 ntohs(lacp->actor_info.port_priority)) ||
1902 (ether_cmp(&pl->PartnerOperSystem,
1903 &lacp->actor_info.system_id) != 0) ||
1904 (pl->PartnerOperSysPriority !=
1905 ntohs(lacp->actor_info.system_priority)) ||
1906 (pl->PartnerOperKey != ntohs(lacp->actor_info.key)) ||
1907 (pl->PartnerOperPortState.bit.aggregation !=
1908 lacp->actor_info.state.bit.aggregation)) {
1909 AGGR_LACP_DBG(("update_selected:(%d): "
1910 "selected %d-->%d\n", portp->lp_linkid, pl->sm.selected,
1911 AGGR_UNSELECTED));
1912
1913 lacp_port_unselect(portp);
1914 return (B_TRUE);
1915 } else {
1916 return (B_FALSE);
1917 }
1918 }
1919
1920
1921 /*
1922 * update_default_selected - If any of the operational Partner parameters
1923 * is different than that of the administrative values
1924 * then unselect the link from the aggregator.
1925 */
1926 static void
update_default_selected(aggr_port_t * portp)1927 update_default_selected(aggr_port_t *portp)
1928 {
1929 aggr_lacp_port_t *pl = &portp->lp_lacp;
1930
1931 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
1932
1933 if ((pl->PartnerAdminPortNum != pl->PartnerOperPortNum) ||
1934 (pl->PartnerOperPortPriority != pl->PartnerAdminPortPriority) ||
1935 (ether_cmp(&pl->PartnerOperSystem, &pl->PartnerAdminSystem) != 0) ||
1936 (pl->PartnerOperSysPriority != pl->PartnerAdminSysPriority) ||
1937 (pl->PartnerOperKey != pl->PartnerAdminKey) ||
1938 (pl->PartnerOperPortState.bit.aggregation !=
1939 pl->PartnerAdminPortState.bit.aggregation)) {
1940
1941 AGGR_LACP_DBG(("update_default_selected:(%d): "
1942 "selected %d-->%d\n", portp->lp_linkid,
1943 pl->sm.selected, AGGR_UNSELECTED));
1944
1945 lacp_port_unselect(portp);
1946 }
1947 }
1948
1949
1950 /*
1951 * update_NTT - If any of the Partner values in the received LACPDU
1952 * are different than that of the Actor operational
1953 * values then set NTT to true.
1954 */
1955 static void
update_NTT(aggr_port_t * portp,lacp_t * lacp)1956 update_NTT(aggr_port_t *portp, lacp_t *lacp)
1957 {
1958 aggr_grp_t *aggrp = portp->lp_grp;
1959 aggr_lacp_port_t *pl = &portp->lp_lacp;
1960
1961 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh));
1962
1963 if ((pl->ActorPortNumber != ntohs(lacp->partner_info.port)) ||
1964 (pl->ActorPortPriority !=
1965 ntohs(lacp->partner_info.port_priority)) ||
1966 (ether_cmp(&aggrp->lg_addr,
1967 &lacp->partner_info.system_id) != 0) ||
1968 (aggrp->aggr.ActorSystemPriority !=
1969 ntohs(lacp->partner_info.system_priority)) ||
1970 (pl->ActorOperPortKey != ntohs(lacp->partner_info.key)) ||
1971 (pl->ActorOperPortState.bit.activity !=
1972 lacp->partner_info.state.bit.activity) ||
1973 (pl->ActorOperPortState.bit.timeout !=
1974 lacp->partner_info.state.bit.timeout) ||
1975 (pl->ActorOperPortState.bit.sync !=
1976 lacp->partner_info.state.bit.sync) ||
1977 (pl->ActorOperPortState.bit.aggregation !=
1978 lacp->partner_info.state.bit.aggregation)) {
1979
1980 AGGR_LACP_DBG(("update_NTT:(%d): NTT %d-->%d\n",
1981 portp->lp_linkid, pl->NTT, B_TRUE));
1982
1983 pl->NTT = B_TRUE;
1984 }
1985 }
1986
1987 /*
1988 * lacp_receive_sm - LACP receive state machine
1989 *
1990 * parameters:
1991 * - portp - instance this applies to.
1992 * - lacp - pointer in the case of a received LACPDU.
1993 * This value is NULL if there is no LACPDU.
1994 *
1995 * invoked:
1996 * - when initialization is needed
1997 * - upon reception of an LACPDU. This is the common case.
1998 * - every time the current_while_timer pops
1999 */
2000 static void
lacp_receive_sm(aggr_port_t * portp,lacp_t * lacp)2001 lacp_receive_sm(aggr_port_t *portp, lacp_t *lacp)
2002 {
2003 boolean_t sync_updated, selected_updated, save_activity;
2004 aggr_lacp_port_t *pl = &portp->lp_lacp;
2005 lacp_receive_state_t oldstate = pl->sm.receive_state;
2006
2007 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh));
2008
2009 /* LACP_OFF state not in specification so check here. */
2010 if (!pl->sm.lacp_on)
2011 return;
2012
2013 /* figure next state */
2014 if (pl->sm.begin || pl->sm.port_moved) {
2015 pl->sm.receive_state = LACP_INITIALIZE;
2016 } else if (!pl->sm.port_enabled) { /* DL_NOTE_LINK_DOWN */
2017 pl->sm.receive_state = LACP_PORT_DISABLED;
2018 } else if (!pl->sm.lacp_enabled) { /* DL_NOTE_AGGR_UNAVAIL */
2019 pl->sm.receive_state =
2020 (pl->sm.receive_state == LACP_PORT_DISABLED) ?
2021 LACP_DISABLED : LACP_PORT_DISABLED;
2022 } else if (lacp != NULL) {
2023 if ((pl->sm.receive_state == LACP_EXPIRED) ||
2024 (pl->sm.receive_state == LACP_DEFAULTED)) {
2025 pl->sm.receive_state = LACP_CURRENT;
2026 }
2027 } else if ((pl->sm.receive_state == LACP_CURRENT) &&
2028 (pl->current_while_timer.id == 0)) {
2029 pl->sm.receive_state = LACP_EXPIRED;
2030 } else if ((pl->sm.receive_state == LACP_EXPIRED) &&
2031 (pl->current_while_timer.id == 0)) {
2032 pl->sm.receive_state = LACP_DEFAULTED;
2033 }
2034
2035 if (!((lacp && (oldstate == LACP_CURRENT) &&
2036 (pl->sm.receive_state == LACP_CURRENT)))) {
2037 AGGR_LACP_DBG(("lacp_receive_sm(%d):%s--->%s\n",
2038 portp->lp_linkid, lacp_receive_str[oldstate],
2039 lacp_receive_str[pl->sm.receive_state]));
2040 }
2041
2042 switch (pl->sm.receive_state) {
2043 case LACP_INITIALIZE:
2044 lacp_port_unselect(portp);
2045 record_Default(portp);
2046 pl->ActorOperPortState.bit.expired = B_FALSE;
2047 pl->sm.port_moved = B_FALSE;
2048 pl->sm.receive_state = LACP_PORT_DISABLED;
2049 pl->sm.begin = B_FALSE;
2050 lacp_receive_sm(portp, NULL);
2051 break;
2052
2053 case LACP_PORT_DISABLED:
2054 pl->PartnerOperPortState.bit.sync = B_FALSE;
2055 /*
2056 * Stop current_while_timer in case
2057 * we got here from link down
2058 */
2059 stop_current_while_timer(portp);
2060
2061 if (pl->sm.port_enabled && !pl->sm.lacp_enabled) {
2062 pl->sm.receive_state = LACP_DISABLED;
2063 lacp_receive_sm(portp, lacp);
2064 /* We goto LACP_DISABLED state */
2065 break;
2066 } else if (pl->sm.port_enabled && pl->sm.lacp_enabled) {
2067 pl->sm.receive_state = LACP_EXPIRED;
2068 /*
2069 * FALL THROUGH TO LACP_EXPIRED CASE:
2070 * We have no way of knowing if we get into
2071 * lacp_receive_sm() from a current_while_timer
2072 * expiring as it has never been kicked off yet!
2073 */
2074 } else {
2075 /* We stay in LACP_PORT_DISABLED state */
2076 break;
2077 }
2078 /* LACP_PORT_DISABLED -> LACP_EXPIRED */
2079 /* FALLTHROUGH */
2080
2081 case LACP_EXPIRED:
2082 /*
2083 * Arrives here from LACP_PORT_DISABLED state as well as
2084 * as well as current_while_timer expiring.
2085 */
2086 pl->PartnerOperPortState.bit.sync = B_FALSE;
2087 pl->PartnerOperPortState.bit.timeout = B_TRUE;
2088
2089 pl->ActorOperPortState.bit.expired = B_TRUE;
2090 start_current_while_timer(portp, SHORT_TIMEOUT_TIME);
2091 lacp_periodic_sm(portp);
2092 break;
2093
2094 case LACP_DISABLED:
2095 /*
2096 * This is the normal state for recv_sm when LACP_OFF
2097 * is set or the NIC is in half duplex mode.
2098 */
2099 lacp_port_unselect(portp);
2100 record_Default(portp);
2101 pl->PartnerOperPortState.bit.aggregation = B_FALSE;
2102 pl->ActorOperPortState.bit.expired = B_FALSE;
2103 break;
2104
2105 case LACP_DEFAULTED:
2106 /*
2107 * Current_while_timer expired a second time.
2108 */
2109 update_default_selected(portp);
2110 record_Default(portp); /* overwrite Partner Oper val */
2111 pl->ActorOperPortState.bit.expired = B_FALSE;
2112 pl->PartnerOperPortState.bit.sync = B_TRUE;
2113
2114 lacp_selection_logic(portp);
2115 lacp_mux_sm(portp);
2116 break;
2117
2118 case LACP_CURRENT:
2119 /*
2120 * Reception of LACPDU
2121 */
2122
2123 if (!lacp) /* no LACPDU so current_while_timer popped */
2124 break;
2125
2126 AGGR_LACP_DBG(("lacp_receive_sm: (%d): LACPDU received:\n",
2127 portp->lp_linkid));
2128
2129 /*
2130 * Validate Actor_Information_Length,
2131 * Partner_Information_Length, Collector_Information_Length,
2132 * and Terminator_Length fields.
2133 */
2134 if (!valid_lacp_pdu(portp, lacp)) {
2135 AGGR_LACP_DBG(("lacp_receive_sm (%d): "
2136 "Invalid LACPDU received\n",
2137 portp->lp_linkid));
2138 break;
2139 }
2140
2141 save_activity = pl->PartnerOperPortState.bit.activity;
2142 selected_updated = update_selected(portp, lacp);
2143 update_NTT(portp, lacp);
2144 sync_updated = record_PDU(portp, lacp);
2145
2146 pl->ActorOperPortState.bit.expired = B_FALSE;
2147
2148 if (selected_updated) {
2149 lacp_selection_logic(portp);
2150 lacp_mux_sm(portp);
2151 } else if (sync_updated) {
2152 lacp_mux_sm(portp);
2153 }
2154
2155 /*
2156 * If the periodic timer value bit has been modified
2157 * or the partner activity bit has been changed then
2158 * we need to respectively:
2159 * - restart the timer with the proper timeout value.
2160 * - possibly enable/disable transmission of LACPDUs.
2161 */
2162 if ((pl->PartnerOperPortState.bit.timeout &&
2163 (pl->periodic_timer.val != FAST_PERIODIC_TIME)) ||
2164 (!pl->PartnerOperPortState.bit.timeout &&
2165 (pl->periodic_timer.val != SLOW_PERIODIC_TIME)) ||
2166 (pl->PartnerOperPortState.bit.activity !=
2167 save_activity)) {
2168 lacp_periodic_sm(portp);
2169 }
2170
2171 stop_current_while_timer(portp);
2172 /* Check if we need to transmit an LACPDU */
2173 if (pl->NTT)
2174 lacp_xmit_sm(portp);
2175 start_current_while_timer(portp, 0);
2176
2177 break;
2178 }
2179 }
2180
2181 static void
aggr_set_coll_dist(aggr_port_t * portp,boolean_t enable)2182 aggr_set_coll_dist(aggr_port_t *portp, boolean_t enable)
2183 {
2184 mac_perim_handle_t mph;
2185
2186 AGGR_LACP_DBG(("AGGR_SET_COLL_DIST_TYPE: (%d) %s\n",
2187 portp->lp_linkid, enable ? "ENABLED" : "DISABLED"));
2188
2189 mac_perim_enter_by_mh(portp->lp_mh, &mph);
2190 if (!enable) {
2191 /*
2192 * Turn OFF Collector_Distributor.
2193 */
2194 portp->lp_collector_enabled = B_FALSE;
2195 aggr_send_port_disable(portp);
2196 goto done;
2197 }
2198
2199 /*
2200 * Turn ON Collector_Distributor.
2201 */
2202
2203 if (!portp->lp_lacp.sm.lacp_on || (portp->lp_lacp.sm.lacp_on &&
2204 (portp->lp_lacp.sm.mux_state == LACP_COLLECTING_DISTRIBUTING))) {
2205 /* Port is compatible and can be aggregated */
2206 portp->lp_collector_enabled = B_TRUE;
2207 aggr_send_port_enable(portp);
2208 }
2209
2210 done:
2211 mac_perim_exit(mph);
2212 }
2213
2214 /*
2215 * Because the LACP packet processing needs to enter the aggr's mac perimeter
2216 * and that would potentially cause a deadlock with the thread in which the
2217 * grp/port is deleted, we defer the packet process to a worker thread. Here
2218 * we only enqueue the received Marker or LACPDU for later processing.
2219 */
2220 void
aggr_lacp_rx_enqueue(aggr_port_t * portp,mblk_t * dmp)2221 aggr_lacp_rx_enqueue(aggr_port_t *portp, mblk_t *dmp)
2222 {
2223 aggr_grp_t *grp = portp->lp_grp;
2224 lacp_t *lacp;
2225
2226 dmp->b_rptr += sizeof (struct ether_header);
2227
2228 if (MBLKL(dmp) < sizeof (lacp_t)) {
2229 freemsg(dmp);
2230 return;
2231 }
2232
2233 lacp = (lacp_t *)dmp->b_rptr;
2234 if (lacp->subtype != LACP_SUBTYPE && lacp->subtype != MARKER_SUBTYPE) {
2235 AGGR_LACP_DBG(("aggr_lacp_rx_enqueue: (%d): "
2236 "Unknown Slow Protocol type %d\n",
2237 portp->lp_linkid, lacp->subtype));
2238 freemsg(dmp);
2239 return;
2240 }
2241
2242 mutex_enter(&grp->lg_lacp_lock);
2243
2244 /*
2245 * If the lg_lacp_done is set, this aggregation is in the process of
2246 * being deleted, return directly.
2247 */
2248 if (grp->lg_lacp_done) {
2249 mutex_exit(&grp->lg_lacp_lock);
2250 freemsg(dmp);
2251 return;
2252 }
2253
2254 if (grp->lg_lacp_tail == NULL) {
2255 grp->lg_lacp_head = grp->lg_lacp_tail = dmp;
2256 } else {
2257 grp->lg_lacp_tail->b_next = dmp;
2258 grp->lg_lacp_tail = dmp;
2259 }
2260
2261 /*
2262 * Hold a reference of the port so that the port won't be freed when it
2263 * is removed from the aggr. The b_prev field is borrowed to save the
2264 * port information.
2265 */
2266 AGGR_PORT_REFHOLD(portp);
2267 dmp->b_prev = (mblk_t *)portp;
2268 cv_broadcast(&grp->lg_lacp_cv);
2269 mutex_exit(&grp->lg_lacp_lock);
2270 }
2271
2272 static void
aggr_lacp_rx(mblk_t * dmp)2273 aggr_lacp_rx(mblk_t *dmp)
2274 {
2275 aggr_port_t *portp = (aggr_port_t *)dmp->b_prev;
2276 mac_perim_handle_t mph;
2277 lacp_t *lacp;
2278
2279 dmp->b_prev = NULL;
2280
2281 mac_perim_enter_by_mh(portp->lp_grp->lg_mh, &mph);
2282 if (portp->lp_closing)
2283 goto done;
2284
2285 lacp = (lacp_t *)dmp->b_rptr;
2286 switch (lacp->subtype) {
2287 case LACP_SUBTYPE:
2288 AGGR_LACP_DBG(("aggr_lacp_rx:(%d): LACPDU received.\n",
2289 portp->lp_linkid));
2290
2291 if (!portp->lp_lacp.sm.lacp_on) {
2292 break;
2293 }
2294 lacp_receive_sm(portp, lacp);
2295 break;
2296
2297 case MARKER_SUBTYPE:
2298 AGGR_LACP_DBG(("aggr_lacp_rx:(%d): Marker Packet received.\n",
2299 portp->lp_linkid));
2300
2301 if (receive_marker_pdu(portp, dmp) != 0)
2302 break;
2303
2304 /* Send the packet over the first TX ring */
2305 dmp = mac_hwring_send_priv(portp->lp_mch,
2306 portp->lp_tx_rings[0], dmp);
2307 if (dmp != NULL)
2308 freemsg(dmp);
2309 mac_perim_exit(mph);
2310 AGGR_PORT_REFRELE(portp);
2311 return;
2312 }
2313
2314 done:
2315 mac_perim_exit(mph);
2316 AGGR_PORT_REFRELE(portp);
2317 freemsg(dmp);
2318 }
2319
2320 void
aggr_lacp_rx_thread(void * arg)2321 aggr_lacp_rx_thread(void *arg)
2322 {
2323 callb_cpr_t cprinfo;
2324 aggr_grp_t *grp = (aggr_grp_t *)arg;
2325 aggr_port_t *port;
2326 mblk_t *mp, *nextmp;
2327
2328 CALLB_CPR_INIT(&cprinfo, &grp->lg_lacp_lock, callb_generic_cpr,
2329 "aggr_lacp_rx_thread");
2330
2331 mutex_enter(&grp->lg_lacp_lock);
2332
2333 /*
2334 * Quit the thread if the grp is deleted.
2335 */
2336 while (!grp->lg_lacp_done) {
2337 if ((mp = grp->lg_lacp_head) == NULL) {
2338 CALLB_CPR_SAFE_BEGIN(&cprinfo);
2339 cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock);
2340 CALLB_CPR_SAFE_END(&cprinfo, &grp->lg_lacp_lock);
2341 continue;
2342 }
2343
2344 grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
2345 mutex_exit(&grp->lg_lacp_lock);
2346
2347 while (mp != NULL) {
2348 nextmp = mp->b_next;
2349 mp->b_next = NULL;
2350 aggr_lacp_rx(mp);
2351 mp = nextmp;
2352 }
2353 mutex_enter(&grp->lg_lacp_lock);
2354 }
2355
2356 /*
2357 * The grp is being destroyed, simply free all of the LACP messages
2358 * left in the queue which did not have the chance to be processed.
2359 * We cannot use freemsgchain() here since we need to clear the
2360 * b_prev field.
2361 */
2362 for (mp = grp->lg_lacp_head; mp != NULL; mp = nextmp) {
2363 port = (aggr_port_t *)mp->b_prev;
2364 AGGR_PORT_REFRELE(port);
2365 nextmp = mp->b_next;
2366 mp->b_next = NULL;
2367 mp->b_prev = NULL;
2368 freemsg(mp);
2369 }
2370
2371 grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
2372 grp->lg_lacp_rx_thread = NULL;
2373 cv_broadcast(&grp->lg_lacp_cv);
2374 CALLB_CPR_EXIT(&cprinfo);
2375 thread_exit();
2376 }
2377