1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2017, Joyent, Inc. 24 * Copyright 2024 MNX Cloud, Inc. 25 */ 26 27 /* 28 * IEEE 802.3ad Link Aggregation - LACP & Marker Protocol processing. 29 */ 30 31 #include <sys/types.h> 32 #include <sys/sysmacros.h> 33 #include <sys/callb.h> 34 #include <sys/conf.h> 35 #include <sys/cmn_err.h> 36 #include <sys/disp.h> 37 #include <sys/list.h> 38 #include <sys/ksynch.h> 39 #include <sys/kmem.h> 40 #include <sys/stream.h> 41 #include <sys/modctl.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/atomic.h> 45 #include <sys/stat.h> 46 #include <sys/byteorder.h> 47 #include <sys/strsun.h> 48 #include <sys/isa_defs.h> 49 #include <sys/sdt.h> 50 51 #include <sys/aggr.h> 52 #include <sys/aggr_impl.h> 53 54 static struct ether_addr etherzeroaddr = { 55 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 56 }; 57 58 /* 59 * Slow_Protocol_Multicast address, as per IEEE 802.3ad spec. 60 */ 61 static struct ether_addr slow_multicast_addr = { 62 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 63 }; 64 65 #ifdef DEBUG 66 /* LACP state machine debugging support */ 67 static uint32_t aggr_lacp_debug = 0; 68 #define AGGR_LACP_DBG(x) if (aggr_lacp_debug) { (void) printf x; } 69 #else 70 #define AGGR_LACP_DBG(x) {} 71 #endif /* DEBUG */ 72 73 #define NSECS_PER_SEC 1000000000ll 74 75 /* used by lacp_misconfig_walker() */ 76 typedef struct lacp_misconfig_check_state_s { 77 aggr_port_t *cs_portp; 78 boolean_t cs_found; 79 } lacp_misconfig_check_state_t; 80 81 static const char *lacp_receive_str[] = LACP_RECEIVE_STATE_STRINGS; 82 static const char *lacp_periodic_str[] = LACP_PERIODIC_STRINGS; 83 static const char *lacp_mux_str[] = LACP_MUX_STRINGS; 84 85 static uint16_t lacp_port_priority = 0x1000; 86 static uint16_t lacp_system_priority = 0x1000; 87 88 /* 89 * Maintains a list of all ports in ATTACHED state. This information 90 * is used to detect misconfiguration. 91 */ 92 typedef struct lacp_sel_ports { 93 datalink_id_t sp_grp_linkid; 94 datalink_id_t sp_linkid; 95 /* Note: sp_partner_system must be 2-byte aligned */ 96 struct ether_addr sp_partner_system; 97 uint32_t sp_partner_key; 98 struct lacp_sel_ports *sp_next; 99 } lacp_sel_ports_t; 100 101 static lacp_sel_ports_t *sel_ports = NULL; 102 static kmutex_t lacp_sel_lock; 103 104 static void periodic_timer_pop(void *); 105 static void periodic_timer_pop_handler(aggr_port_t *); 106 static void lacp_xmit_sm(aggr_port_t *); 107 static void lacp_periodic_sm(aggr_port_t *); 108 static void fill_lacp_pdu(aggr_port_t *, lacp_t *); 109 static void fill_lacp_ether(aggr_port_t *, struct ether_header *); 110 static void lacp_on(aggr_port_t *); 111 static void lacp_off(aggr_port_t *); 112 static boolean_t valid_lacp_pdu(aggr_port_t *, lacp_t *); 113 static void lacp_receive_sm(aggr_port_t *, lacp_t *); 114 static void aggr_set_coll_dist(aggr_port_t *, boolean_t); 115 static void start_wait_while_timer(aggr_port_t *); 116 static void stop_wait_while_timer(aggr_port_t *); 117 static void lacp_reset_port(aggr_port_t *); 118 static void stop_current_while_timer(aggr_port_t *); 119 static void current_while_timer_pop(void *); 120 static void current_while_timer_pop_handler(aggr_port_t *); 121 static void update_default_selected(aggr_port_t *); 122 static boolean_t update_selected(aggr_port_t *, lacp_t *); 123 static boolean_t lacp_sel_ports_add(aggr_port_t *); 124 static void lacp_sel_ports_del(aggr_port_t *); 125 static void wait_while_timer_pop(void *); 126 static void wait_while_timer_pop_handler(aggr_port_t *); 127 128 void 129 aggr_lacp_init(void) 130 { 131 mutex_init(&lacp_sel_lock, NULL, MUTEX_DEFAULT, NULL); 132 } 133 134 void 135 aggr_lacp_fini(void) 136 { 137 mutex_destroy(&lacp_sel_lock); 138 } 139 140 /* 141 * The following functions are used for handling LACP timers. 142 * 143 * Note that we cannot fully rely on the aggr's mac perimeter in the timeout 144 * handler routine, otherwise it may cause deadlock with the untimeout() call 145 * which is usually called with the mac perimeter held. Instead, a 146 * lacp_timer_lock mutex is introduced, which protects a bitwise flag 147 * (lacp_timer_bits). This flag is set/cleared by timeout()/stop_timer() 148 * routines and is checked by a dedicated thread, that executes the real 149 * timeout operation. 150 */ 151 static void 152 aggr_port_timer_thread(void *arg) 153 { 154 aggr_port_t *port = arg; 155 aggr_lacp_port_t *pl = &port->lp_lacp; 156 aggr_grp_t *grp = port->lp_grp; 157 uint32_t lacp_timer_bits; 158 mac_perim_handle_t mph; 159 callb_cpr_t cprinfo; 160 161 CALLB_CPR_INIT(&cprinfo, &pl->lacp_timer_lock, callb_generic_cpr, 162 "aggr_port_timer_thread"); 163 164 mutex_enter(&pl->lacp_timer_lock); 165 166 for (;;) { 167 168 if ((lacp_timer_bits = pl->lacp_timer_bits) == 0) { 169 CALLB_CPR_SAFE_BEGIN(&cprinfo); 170 cv_wait(&pl->lacp_timer_cv, &pl->lacp_timer_lock); 171 CALLB_CPR_SAFE_END(&cprinfo, &pl->lacp_timer_lock); 172 continue; 173 } 174 pl->lacp_timer_bits = 0; 175 176 if (lacp_timer_bits & LACP_THREAD_EXIT) 177 break; 178 179 if (lacp_timer_bits & LACP_PERIODIC_TIMEOUT) 180 pl->periodic_timer.id = 0; 181 if (lacp_timer_bits & LACP_WAIT_WHILE_TIMEOUT) 182 pl->wait_while_timer.id = 0; 183 if (lacp_timer_bits & LACP_CURRENT_WHILE_TIMEOUT) 184 pl->current_while_timer.id = 0; 185 186 mutex_exit(&pl->lacp_timer_lock); 187 188 mac_perim_enter_by_mh(grp->lg_mh, &mph); 189 if (port->lp_closing) { 190 mac_perim_exit(mph); 191 mutex_enter(&pl->lacp_timer_lock); 192 break; 193 } 194 195 if (lacp_timer_bits & LACP_PERIODIC_TIMEOUT) 196 periodic_timer_pop_handler(port); 197 if (lacp_timer_bits & LACP_WAIT_WHILE_TIMEOUT) 198 wait_while_timer_pop_handler(port); 199 if (lacp_timer_bits & LACP_CURRENT_WHILE_TIMEOUT) 200 current_while_timer_pop_handler(port); 201 mac_perim_exit(mph); 202 203 mutex_enter(&pl->lacp_timer_lock); 204 if (pl->lacp_timer_bits & LACP_THREAD_EXIT) 205 break; 206 } 207 208 pl->lacp_timer_bits = 0; 209 pl->lacp_timer_thread = NULL; 210 cv_broadcast(&pl->lacp_timer_cv); 211 212 /* CALLB_CPR_EXIT drops the lock */ 213 CALLB_CPR_EXIT(&cprinfo); 214 215 /* 216 * Release the reference of the grp so aggr_grp_delete() can call 217 * mac_unregister() safely. 218 */ 219 aggr_grp_port_rele(port); 220 thread_exit(); 221 } 222 223 /* 224 * Set the port LACP state to SELECTED. Returns B_FALSE if the operation 225 * could not be performed due to a memory allocation error, B_TRUE otherwise. 226 */ 227 static boolean_t 228 lacp_port_select(aggr_port_t *portp) 229 { 230 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 231 232 if (!lacp_sel_ports_add(portp)) 233 return (B_FALSE); 234 portp->lp_lacp.sm.selected = AGGR_SELECTED; 235 return (B_TRUE); 236 } 237 238 /* 239 * Set the port LACP state to UNSELECTED. 240 */ 241 static void 242 lacp_port_unselect(aggr_port_t *portp) 243 { 244 aggr_grp_t *grp = portp->lp_grp; 245 246 ASSERT((grp->lg_mh == NULL) || MAC_PERIM_HELD(grp->lg_mh)); 247 248 lacp_sel_ports_del(portp); 249 portp->lp_lacp.sm.selected = AGGR_UNSELECTED; 250 } 251 252 /* 253 * Initialize group specific LACP state and parameters. 254 */ 255 void 256 aggr_lacp_init_grp(aggr_grp_t *aggrp) 257 { 258 aggrp->aggr.PeriodicTimer = AGGR_LACP_TIMER_SHORT; 259 aggrp->aggr.ActorSystemPriority = (uint16_t)lacp_system_priority; 260 aggrp->aggr.CollectorMaxDelay = 10; 261 aggrp->lg_lacp_mode = AGGR_LACP_OFF; 262 aggrp->aggr.ready = B_FALSE; 263 } 264 265 /* 266 * Complete LACP info initialization at port creation time. 267 */ 268 void 269 aggr_lacp_init_port(aggr_port_t *portp) 270 { 271 aggr_grp_t *aggrp = portp->lp_grp; 272 aggr_lacp_port_t *pl = &portp->lp_lacp; 273 274 ASSERT(aggrp->lg_mh == NULL || MAC_PERIM_HELD(aggrp->lg_mh)); 275 ASSERT(MAC_PERIM_HELD(portp->lp_mh)); 276 277 /* actor port # */ 278 pl->ActorPortNumber = portp->lp_portid; 279 AGGR_LACP_DBG(("aggr_lacp_init_port(%d): " 280 "ActorPortNumber = 0x%x\n", portp->lp_linkid, 281 pl->ActorPortNumber)); 282 283 pl->ActorPortPriority = (uint16_t)lacp_port_priority; 284 pl->ActorPortAggrId = 0; /* aggregator id - not used */ 285 pl->NTT = B_FALSE; /* need to transmit */ 286 287 pl->ActorAdminPortKey = aggrp->lg_key; 288 pl->ActorOperPortKey = pl->ActorAdminPortKey; 289 AGGR_LACP_DBG(("aggr_lacp_init_port(%d) " 290 "ActorAdminPortKey = 0x%x, ActorAdminPortKey = 0x%x\n", 291 portp->lp_linkid, pl->ActorAdminPortKey, pl->ActorOperPortKey)); 292 293 /* Actor admin. port state */ 294 pl->ActorAdminPortState.bit.activity = B_FALSE; 295 pl->ActorAdminPortState.bit.timeout = B_TRUE; 296 pl->ActorAdminPortState.bit.aggregation = B_TRUE; 297 pl->ActorAdminPortState.bit.sync = B_FALSE; 298 pl->ActorAdminPortState.bit.collecting = B_FALSE; 299 pl->ActorAdminPortState.bit.distributing = B_FALSE; 300 pl->ActorAdminPortState.bit.defaulted = B_FALSE; 301 pl->ActorAdminPortState.bit.expired = B_FALSE; 302 pl->ActorOperPortState = pl->ActorAdminPortState; 303 304 /* 305 * Partner Administrative Information 306 * (All initialized to zero except for the following) 307 * Fast Timeouts. 308 */ 309 pl->PartnerAdminPortState.bit.timeout = 310 pl->PartnerOperPortState.bit.timeout = B_TRUE; 311 312 pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */ 313 314 /* 315 * State machine information. 316 */ 317 pl->sm.lacp_on = B_FALSE; /* LACP Off default */ 318 pl->sm.begin = B_TRUE; /* Prevents transmissions */ 319 pl->sm.lacp_enabled = B_FALSE; 320 pl->sm.port_enabled = B_FALSE; /* Link Down */ 321 pl->sm.actor_churn = B_FALSE; 322 pl->sm.partner_churn = B_FALSE; 323 pl->sm.ready_n = B_FALSE; 324 pl->sm.port_moved = B_FALSE; 325 326 lacp_port_unselect(portp); 327 328 pl->sm.periodic_state = LACP_NO_PERIODIC; 329 pl->sm.receive_state = LACP_INITIALIZE; 330 pl->sm.mux_state = LACP_DETACHED; 331 pl->sm.churn_state = LACP_NO_ACTOR_CHURN; 332 333 /* 334 * Timer information. 335 */ 336 pl->current_while_timer.id = 0; 337 pl->current_while_timer.val = SHORT_TIMEOUT_TIME; 338 339 pl->periodic_timer.id = 0; 340 pl->periodic_timer.val = FAST_PERIODIC_TIME; 341 342 pl->wait_while_timer.id = 0; 343 pl->wait_while_timer.val = AGGREGATE_WAIT_TIME; 344 345 pl->lacp_timer_bits = 0; 346 347 mutex_init(&pl->lacp_timer_lock, NULL, MUTEX_DRIVER, NULL); 348 cv_init(&pl->lacp_timer_cv, NULL, CV_DRIVER, NULL); 349 350 pl->lacp_timer_thread = thread_create(NULL, 0, aggr_port_timer_thread, 351 portp, 0, &p0, TS_RUN, minclsyspri); 352 353 /* 354 * Hold a reference of the grp and the port and this reference will 355 * be release when the thread exits. 356 * 357 * The reference on the port is used for aggr_port_delete() to 358 * continue without waiting for the thread to exit; the reference 359 * on the grp is used for aggr_grp_delete() to wait for the thread 360 * to exit before calling mac_unregister(). 361 */ 362 aggr_grp_port_hold(portp); 363 } 364 365 /* 366 * Port initialization when we need to 367 * turn LACP on/off, etc. Not everything is 368 * reset like in the above routine. 369 * Do NOT modify things like link status. 370 */ 371 static void 372 lacp_reset_port(aggr_port_t *portp) 373 { 374 aggr_lacp_port_t *pl = &portp->lp_lacp; 375 376 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 377 378 pl->NTT = B_FALSE; /* need to transmit */ 379 380 /* reset operational port state */ 381 pl->ActorOperPortState.bit.timeout = 382 pl->ActorAdminPortState.bit.timeout; 383 384 pl->ActorOperPortState.bit.sync = B_FALSE; 385 pl->ActorOperPortState.bit.collecting = B_FALSE; 386 pl->ActorOperPortState.bit.distributing = B_FALSE; 387 pl->ActorOperPortState.bit.defaulted = B_TRUE; 388 pl->ActorOperPortState.bit.expired = B_FALSE; 389 390 pl->PartnerOperPortState.bit.timeout = B_TRUE; /* fast t/o */ 391 pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */ 392 393 /* 394 * State machine information. 395 */ 396 pl->sm.begin = B_TRUE; /* Prevents transmissions */ 397 pl->sm.actor_churn = B_FALSE; 398 pl->sm.partner_churn = B_FALSE; 399 pl->sm.ready_n = B_FALSE; 400 401 lacp_port_unselect(portp); 402 403 pl->sm.periodic_state = LACP_NO_PERIODIC; 404 pl->sm.receive_state = LACP_INITIALIZE; 405 pl->sm.mux_state = LACP_DETACHED; 406 pl->sm.churn_state = LACP_NO_ACTOR_CHURN; 407 408 /* 409 * Timer information. 410 */ 411 pl->current_while_timer.val = SHORT_TIMEOUT_TIME; 412 pl->periodic_timer.val = FAST_PERIODIC_TIME; 413 } 414 415 static void 416 aggr_lacp_mcast_on(aggr_port_t *port) 417 { 418 ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh)); 419 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 420 421 if (port->lp_state != AGGR_PORT_STATE_ATTACHED) 422 return; 423 424 (void) aggr_port_multicst(port, B_TRUE, 425 (uchar_t *)&slow_multicast_addr); 426 } 427 428 static void 429 aggr_lacp_mcast_off(aggr_port_t *port) 430 { 431 ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh)); 432 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 433 434 if (port->lp_state != AGGR_PORT_STATE_ATTACHED) 435 return; 436 437 (void) aggr_port_multicst(port, B_FALSE, 438 (uchar_t *)&slow_multicast_addr); 439 } 440 441 static void 442 start_periodic_timer(aggr_port_t *portp) 443 { 444 aggr_lacp_port_t *pl = &portp->lp_lacp; 445 446 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 447 448 mutex_enter(&pl->lacp_timer_lock); 449 if (pl->periodic_timer.id == 0) { 450 pl->periodic_timer.id = timeout(periodic_timer_pop, portp, 451 drv_usectohz(1000000 * portp->lp_lacp.periodic_timer.val)); 452 } 453 mutex_exit(&pl->lacp_timer_lock); 454 } 455 456 static void 457 stop_periodic_timer(aggr_port_t *portp) 458 { 459 aggr_lacp_port_t *pl = &portp->lp_lacp; 460 timeout_id_t id; 461 462 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 463 464 mutex_enter(&pl->lacp_timer_lock); 465 if ((id = pl->periodic_timer.id) != 0) { 466 pl->lacp_timer_bits &= ~LACP_PERIODIC_TIMEOUT; 467 pl->periodic_timer.id = 0; 468 } 469 mutex_exit(&pl->lacp_timer_lock); 470 471 if (id != 0) 472 (void) untimeout(id); 473 } 474 475 /* 476 * When the timer pops, we arrive here to 477 * clear out LACPDU count as well as transmit an 478 * LACPDU. We then set the periodic state and let 479 * the periodic state machine restart the timer. 480 */ 481 static void 482 periodic_timer_pop(void *data) 483 { 484 aggr_port_t *portp = data; 485 aggr_lacp_port_t *pl = &portp->lp_lacp; 486 487 mutex_enter(&pl->lacp_timer_lock); 488 pl->lacp_timer_bits |= LACP_PERIODIC_TIMEOUT; 489 cv_broadcast(&pl->lacp_timer_cv); 490 mutex_exit(&pl->lacp_timer_lock); 491 } 492 493 /* 494 * When the timer pops, we arrive here to 495 * clear out LACPDU count as well as transmit an 496 * LACPDU. We then set the periodic state and let 497 * the periodic state machine restart the timer. 498 */ 499 static void 500 periodic_timer_pop_handler(aggr_port_t *portp) 501 { 502 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 503 504 portp->lp_lacp_stats.LACPDUsTx = 0; 505 506 /* current timestamp */ 507 portp->lp_lacp.time = gethrtime(); 508 portp->lp_lacp.NTT = B_TRUE; 509 lacp_xmit_sm(portp); 510 511 /* 512 * Set Periodic State machine state based on the 513 * value of the Partner Operation Port State timeout 514 * bit. 515 */ 516 if (portp->lp_lacp.PartnerOperPortState.bit.timeout) { 517 portp->lp_lacp.periodic_timer.val = FAST_PERIODIC_TIME; 518 portp->lp_lacp.sm.periodic_state = LACP_FAST_PERIODIC; 519 } else { 520 portp->lp_lacp.periodic_timer.val = SLOW_PERIODIC_TIME; 521 portp->lp_lacp.sm.periodic_state = LACP_SLOW_PERIODIC; 522 } 523 524 lacp_periodic_sm(portp); 525 } 526 527 /* 528 * Invoked from: 529 * - startup upon aggregation 530 * - when the periodic timer pops 531 * - when the periodic timer value is changed 532 * - when the port is attached or detached 533 * - when LACP mode is changed. 534 */ 535 static void 536 lacp_periodic_sm(aggr_port_t *portp) 537 { 538 lacp_periodic_state_t oldstate = portp->lp_lacp.sm.periodic_state; 539 aggr_lacp_port_t *pl = &portp->lp_lacp; 540 541 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 542 543 /* LACP_OFF state not in specification so check here. */ 544 if (!pl->sm.lacp_on) { 545 /* Stop timer whether it is running or not */ 546 stop_periodic_timer(portp); 547 pl->sm.periodic_state = LACP_NO_PERIODIC; 548 pl->NTT = B_FALSE; 549 AGGR_LACP_DBG(("lacp_periodic_sm(%d):NO LACP " 550 "%s--->%s\n", portp->lp_linkid, 551 lacp_periodic_str[oldstate], 552 lacp_periodic_str[pl->sm.periodic_state])); 553 return; 554 } 555 556 if (pl->sm.begin || !pl->sm.lacp_enabled || 557 !pl->sm.port_enabled || 558 (!pl->ActorOperPortState.bit.activity && 559 !pl->PartnerOperPortState.bit.activity)) { 560 561 /* Stop timer whether it is running or not */ 562 stop_periodic_timer(portp); 563 pl->sm.periodic_state = LACP_NO_PERIODIC; 564 pl->NTT = B_FALSE; 565 AGGR_LACP_DBG(("lacp_periodic_sm(%d):STOP %s--->%s\n", 566 portp->lp_linkid, lacp_periodic_str[oldstate], 567 lacp_periodic_str[pl->sm.periodic_state])); 568 return; 569 } 570 571 /* 572 * Startup with FAST_PERIODIC_TIME if no previous LACPDU 573 * has been received. Then after we timeout, then it is 574 * possible to go to SLOW_PERIODIC_TIME. 575 */ 576 if (pl->sm.periodic_state == LACP_NO_PERIODIC) { 577 pl->periodic_timer.val = FAST_PERIODIC_TIME; 578 pl->sm.periodic_state = LACP_FAST_PERIODIC; 579 } else if ((pl->sm.periodic_state == LACP_SLOW_PERIODIC) && 580 pl->PartnerOperPortState.bit.timeout) { 581 /* 582 * If we receive a bit indicating we are going to 583 * fast periodic from slow periodic, stop the timer 584 * and let the periodic_timer_pop routine deal 585 * with reseting the periodic state and transmitting 586 * a LACPDU. 587 */ 588 stop_periodic_timer(portp); 589 periodic_timer_pop_handler(portp); 590 } 591 592 /* Rearm timer with value provided by partner */ 593 start_periodic_timer(portp); 594 } 595 596 /* 597 * This routine transmits an LACPDU if lacp_enabled 598 * is TRUE and if NTT is set. 599 */ 600 static void 601 lacp_xmit_sm(aggr_port_t *portp) 602 { 603 aggr_lacp_port_t *pl = &portp->lp_lacp; 604 size_t len; 605 mblk_t *mp; 606 hrtime_t now, elapsed; 607 608 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 609 610 /* LACP_OFF state not in specification so check here. */ 611 if (!pl->sm.lacp_on || !pl->NTT) 612 return; 613 614 /* 615 * Do nothing if LACP has been turned off or if the 616 * periodic state machine is not enabled. 617 */ 618 if ((pl->sm.periodic_state == LACP_NO_PERIODIC) || 619 !pl->sm.lacp_enabled || pl->sm.begin) { 620 pl->NTT = B_FALSE; 621 return; 622 } 623 624 /* 625 * If we have sent 5 Slow packets in the last second, avoid 626 * sending any more here. No more than three LACPDUs may be transmitted 627 * in any Fast_Periodic_Time interval. 628 */ 629 if (portp->lp_lacp_stats.LACPDUsTx >= 3) { 630 /* 631 * Grab the current time value and see if 632 * more than 1 second has passed. If so, 633 * reset the timestamp and clear the count. 634 */ 635 now = gethrtime(); 636 elapsed = now - pl->time; 637 if (elapsed > NSECS_PER_SEC) { 638 portp->lp_lacp_stats.LACPDUsTx = 0; 639 pl->time = now; 640 } else { 641 return; 642 } 643 } 644 645 len = sizeof (lacp_t) + sizeof (struct ether_header); 646 mp = allocb(len, BPRI_MED); 647 if (mp == NULL) 648 return; 649 650 mp->b_wptr = mp->b_rptr + len; 651 bzero(mp->b_rptr, len); 652 653 fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr); 654 fill_lacp_pdu(portp, 655 (lacp_t *)(mp->b_rptr + sizeof (struct ether_header))); 656 657 /* Send the packet over the first TX ring */ 658 mp = mac_hwring_send_priv(portp->lp_mch, portp->lp_tx_rings[0], mp); 659 if (mp != NULL) 660 freemsg(mp); 661 662 pl->NTT = B_FALSE; 663 portp->lp_lacp_stats.LACPDUsTx++; 664 } 665 666 /* 667 * Initialize the ethernet header of a LACP packet sent from the specified 668 * port. 669 */ 670 static void 671 fill_lacp_ether(aggr_port_t *port, struct ether_header *ether) 672 { 673 bcopy(port->lp_addr, (uint8_t *)&(ether->ether_shost), ETHERADDRL); 674 bcopy(&slow_multicast_addr, (uint8_t *)&(ether->ether_dhost), 675 ETHERADDRL); 676 ether->ether_type = htons(ETHERTYPE_SLOW); 677 } 678 679 static void 680 fill_lacp_pdu(aggr_port_t *portp, lacp_t *lacp) 681 { 682 aggr_lacp_port_t *pl = &portp->lp_lacp; 683 aggr_grp_t *aggrp = portp->lp_grp; 684 mac_perim_handle_t pmph; 685 686 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh)); 687 mac_perim_enter_by_mh(portp->lp_mh, &pmph); 688 689 lacp->subtype = LACP_SUBTYPE; 690 lacp->version = LACP_VERSION; 691 692 /* 693 * Actor Information 694 */ 695 lacp->actor_info.tlv_type = ACTOR_TLV; 696 lacp->actor_info.information_len = sizeof (link_info_t); 697 lacp->actor_info.system_priority = 698 htons(aggrp->aggr.ActorSystemPriority); 699 bcopy(aggrp->lg_addr, (uchar_t *)&lacp->actor_info.system_id, 700 ETHERADDRL); 701 lacp->actor_info.key = htons(pl->ActorOperPortKey); 702 lacp->actor_info.port_priority = htons(pl->ActorPortPriority); 703 lacp->actor_info.port = htons(pl->ActorPortNumber); 704 lacp->actor_info.state.state = pl->ActorOperPortState.state; 705 706 /* 707 * Partner Information 708 */ 709 lacp->partner_info.tlv_type = PARTNER_TLV; 710 lacp->partner_info.information_len = sizeof (link_info_t); 711 lacp->partner_info.system_priority = 712 htons(pl->PartnerOperSysPriority); 713 lacp->partner_info.system_id = pl->PartnerOperSystem; 714 lacp->partner_info.key = htons(pl->PartnerOperKey); 715 lacp->partner_info.port_priority = 716 htons(pl->PartnerOperPortPriority); 717 lacp->partner_info.port = htons(pl->PartnerOperPortNum); 718 lacp->partner_info.state.state = pl->PartnerOperPortState.state; 719 720 /* Collector Information */ 721 lacp->tlv_collector = COLLECTOR_TLV; 722 lacp->collector_len = 0x10; 723 lacp->collector_max_delay = htons(aggrp->aggr.CollectorMaxDelay); 724 725 /* Termination Information */ 726 lacp->tlv_terminator = TERMINATOR_TLV; 727 lacp->terminator_len = 0x0; 728 729 mac_perim_exit(pmph); 730 } 731 732 /* 733 * lacp_mux_sm - LACP mux state machine 734 * This state machine is invoked from: 735 * - startup upon aggregation 736 * - from the Selection logic 737 * - when the wait_while_timer pops 738 * - when the aggregation MAC address is changed 739 * - when receiving DL_NOTE_LINK_UP/DOWN 740 * - when receiving DL_NOTE_AGGR_AVAIL/UNAVAIL 741 * - when LACP mode is changed. 742 * - when a DL_NOTE_SPEED is received 743 */ 744 static void 745 lacp_mux_sm(aggr_port_t *portp) 746 { 747 aggr_grp_t *aggrp = portp->lp_grp; 748 boolean_t NTT_updated = B_FALSE; 749 aggr_lacp_port_t *pl = &portp->lp_lacp; 750 lacp_mux_state_t oldstate = pl->sm.mux_state; 751 752 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh)); 753 754 /* LACP_OFF state not in specification so check here. */ 755 if (!pl->sm.lacp_on) { 756 pl->sm.mux_state = LACP_DETACHED; 757 pl->ActorOperPortState.bit.sync = B_FALSE; 758 759 if (pl->ActorOperPortState.bit.collecting || 760 pl->ActorOperPortState.bit.distributing) { 761 AGGR_LACP_DBG(("trunk link: (%d): " 762 "Collector_Distributor Disabled.\n", 763 portp->lp_linkid)); 764 } 765 766 pl->ActorOperPortState.bit.collecting = 767 pl->ActorOperPortState.bit.distributing = B_FALSE; 768 return; 769 } 770 771 if (pl->sm.begin || !pl->sm.lacp_enabled) 772 pl->sm.mux_state = LACP_DETACHED; 773 774 again: 775 /* determine next state, or return if state unchanged */ 776 switch (pl->sm.mux_state) { 777 case LACP_DETACHED: 778 if (pl->sm.begin) { 779 break; 780 } 781 782 if ((pl->sm.selected == AGGR_SELECTED) || 783 (pl->sm.selected == AGGR_STANDBY)) { 784 pl->sm.mux_state = LACP_WAITING; 785 break; 786 } 787 return; 788 789 case LACP_WAITING: 790 if (pl->sm.selected == AGGR_UNSELECTED) { 791 pl->sm.mux_state = LACP_DETACHED; 792 break; 793 } 794 795 if ((pl->sm.selected == AGGR_SELECTED) && aggrp->aggr.ready) { 796 pl->sm.mux_state = LACP_ATTACHED; 797 break; 798 } 799 return; 800 801 case LACP_ATTACHED: 802 if ((pl->sm.selected == AGGR_UNSELECTED) || 803 (pl->sm.selected == AGGR_STANDBY)) { 804 pl->sm.mux_state = LACP_DETACHED; 805 break; 806 } 807 808 if ((pl->sm.selected == AGGR_SELECTED) && 809 pl->PartnerOperPortState.bit.sync) { 810 pl->sm.mux_state = LACP_COLLECTING_DISTRIBUTING; 811 break; 812 } 813 return; 814 815 case LACP_COLLECTING_DISTRIBUTING: 816 if ((pl->sm.selected == AGGR_UNSELECTED) || 817 (pl->sm.selected == AGGR_STANDBY) || 818 !pl->PartnerOperPortState.bit.sync) { 819 pl->sm.mux_state = LACP_ATTACHED; 820 break; 821 } 822 return; 823 } 824 825 AGGR_LACP_DBG(("lacp_mux_sm(%d):%s--->%s\n", 826 portp->lp_linkid, lacp_mux_str[oldstate], 827 lacp_mux_str[pl->sm.mux_state])); 828 829 /* perform actions on entering a new state */ 830 switch (pl->sm.mux_state) { 831 case LACP_DETACHED: 832 if (pl->ActorOperPortState.bit.collecting || 833 pl->ActorOperPortState.bit.distributing) { 834 AGGR_LACP_DBG(("trunk link: (%d): " 835 "Collector_Distributor Disabled.\n", 836 portp->lp_linkid)); 837 } 838 839 pl->ActorOperPortState.bit.sync = 840 pl->ActorOperPortState.bit.collecting = B_FALSE; 841 842 /* Turn OFF Collector_Distributor */ 843 aggr_set_coll_dist(portp, B_FALSE); 844 845 pl->ActorOperPortState.bit.distributing = B_FALSE; 846 NTT_updated = B_TRUE; 847 break; 848 849 case LACP_WAITING: 850 start_wait_while_timer(portp); 851 break; 852 853 case LACP_ATTACHED: 854 if (pl->ActorOperPortState.bit.collecting || 855 pl->ActorOperPortState.bit.distributing) { 856 AGGR_LACP_DBG(("trunk link: (%d): " 857 "Collector_Distributor Disabled.\n", 858 portp->lp_linkid)); 859 } 860 861 pl->ActorOperPortState.bit.sync = B_TRUE; 862 pl->ActorOperPortState.bit.collecting = B_FALSE; 863 864 /* Turn OFF Collector_Distributor */ 865 aggr_set_coll_dist(portp, B_FALSE); 866 867 pl->ActorOperPortState.bit.distributing = B_FALSE; 868 NTT_updated = B_TRUE; 869 if (pl->PartnerOperPortState.bit.sync) { 870 /* 871 * We had already received an updated sync from 872 * the partner. Attempt to transition to 873 * collecting/distributing now. 874 */ 875 goto again; 876 } 877 break; 878 879 case LACP_COLLECTING_DISTRIBUTING: 880 if (!pl->ActorOperPortState.bit.collecting && 881 !pl->ActorOperPortState.bit.distributing) { 882 AGGR_LACP_DBG(("trunk link: (%d): " 883 "Collector_Distributor Enabled.\n", 884 portp->lp_linkid)); 885 } 886 pl->ActorOperPortState.bit.distributing = B_TRUE; 887 888 /* Turn Collector_Distributor back ON */ 889 aggr_set_coll_dist(portp, B_TRUE); 890 891 pl->ActorOperPortState.bit.collecting = B_TRUE; 892 NTT_updated = B_TRUE; 893 break; 894 } 895 896 /* 897 * If we updated the state of the NTT variable, then 898 * initiate a LACPDU transmission. 899 */ 900 if (NTT_updated) { 901 pl->NTT = B_TRUE; 902 lacp_xmit_sm(portp); 903 } 904 } /* lacp_mux_sm */ 905 906 907 static int 908 receive_marker_pdu(aggr_port_t *portp, mblk_t *mp) 909 { 910 marker_pdu_t *markerp = (marker_pdu_t *)mp->b_rptr; 911 912 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 913 914 AGGR_LACP_DBG(("trunk link: (%d): MARKER PDU received:\n", 915 portp->lp_linkid)); 916 917 /* LACP_OFF state not in specification so check here. */ 918 if (!portp->lp_lacp.sm.lacp_on) 919 return (-1); 920 921 if (MBLKL(mp) < sizeof (marker_pdu_t)) 922 return (-1); 923 924 if (markerp->version != MARKER_VERSION) { 925 AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: " 926 "version = %d does not match s/w version %d\n", 927 portp->lp_linkid, markerp->version, MARKER_VERSION)); 928 return (-1); 929 } 930 931 if (markerp->tlv_marker == MARKER_RESPONSE_TLV) { 932 /* We do not yet send out MARKER info PDUs */ 933 AGGR_LACP_DBG(("trunk link (%d): MARKER RESPONSE PDU: " 934 " MARKER TLV = %d - We don't send out info type!\n", 935 portp->lp_linkid, markerp->tlv_marker)); 936 return (-1); 937 } 938 939 if (markerp->tlv_marker != MARKER_INFO_TLV) { 940 AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: " 941 " MARKER TLV = %d \n", portp->lp_linkid, 942 markerp->tlv_marker)); 943 return (-1); 944 } 945 946 if (markerp->marker_len != MARKER_INFO_RESPONSE_LENGTH) { 947 AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: " 948 " MARKER length = %d \n", portp->lp_linkid, 949 markerp->marker_len)); 950 return (-1); 951 } 952 953 if (markerp->requestor_port != portp->lp_lacp.PartnerOperPortNum) { 954 AGGR_LACP_DBG(("trunk link (%d): MARKER PDU: " 955 " MARKER Port %d not equal to Partner port %d\n", 956 portp->lp_linkid, markerp->requestor_port, 957 portp->lp_lacp.PartnerOperPortNum)); 958 return (-1); 959 } 960 961 if (ether_cmp(&markerp->system_id, 962 &portp->lp_lacp.PartnerOperSystem) != 0) { 963 AGGR_LACP_DBG(("trunk link (%d): MARKER PDU: " 964 " MARKER MAC not equal to Partner MAC\n", 965 portp->lp_linkid)); 966 return (-1); 967 } 968 969 /* 970 * Turn into Marker Response PDU 971 * and return mblk to sending system 972 */ 973 markerp->tlv_marker = MARKER_RESPONSE_TLV; 974 975 /* reuse the space that was used by received ethernet header */ 976 ASSERT(MBLKHEAD(mp) >= sizeof (struct ether_header)); 977 mp->b_rptr -= sizeof (struct ether_header); 978 fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr); 979 return (0); 980 } 981 982 /* 983 * Update the LACP mode (off, active, or passive) of the specified group. 984 */ 985 void 986 aggr_lacp_update_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode) 987 { 988 aggr_lacp_mode_t old_mode = grp->lg_lacp_mode; 989 aggr_port_t *port; 990 991 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 992 ASSERT(!grp->lg_closing); 993 994 if (mode == old_mode) 995 return; 996 997 grp->lg_lacp_mode = mode; 998 999 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1000 port->lp_lacp.ActorAdminPortState.bit.activity = 1001 port->lp_lacp.ActorOperPortState.bit.activity = 1002 (mode == AGGR_LACP_ACTIVE); 1003 1004 if (old_mode == AGGR_LACP_OFF) { 1005 /* OFF -> {PASSIVE,ACTIVE} */ 1006 /* turn OFF Collector_Distributor */ 1007 aggr_set_coll_dist(port, B_FALSE); 1008 lacp_on(port); 1009 } else if (mode == AGGR_LACP_OFF) { 1010 /* {PASSIVE,ACTIVE} -> OFF */ 1011 lacp_off(port); 1012 /* Turn ON Collector_Distributor */ 1013 aggr_set_coll_dist(port, B_TRUE); 1014 } else { 1015 /* PASSIVE->ACTIVE or ACTIVE->PASSIVE */ 1016 port->lp_lacp.sm.begin = B_TRUE; 1017 lacp_mux_sm(port); 1018 lacp_periodic_sm(port); 1019 1020 /* kick off state machines */ 1021 lacp_receive_sm(port, NULL); 1022 lacp_mux_sm(port); 1023 } 1024 } 1025 } 1026 1027 1028 /* 1029 * Update the LACP timer (short or long) of the specified group. 1030 */ 1031 void 1032 aggr_lacp_update_timer(aggr_grp_t *grp, aggr_lacp_timer_t timer) 1033 { 1034 aggr_port_t *port; 1035 1036 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 1037 1038 if (timer == grp->aggr.PeriodicTimer) 1039 return; 1040 1041 grp->aggr.PeriodicTimer = timer; 1042 1043 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1044 port->lp_lacp.ActorAdminPortState.bit.timeout = 1045 port->lp_lacp.ActorOperPortState.bit.timeout = 1046 (timer == AGGR_LACP_TIMER_SHORT); 1047 } 1048 } 1049 1050 void 1051 aggr_port_lacp_set_mode(aggr_grp_t *grp, aggr_port_t *port) 1052 { 1053 aggr_lacp_mode_t mode; 1054 aggr_lacp_timer_t timer; 1055 1056 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 1057 1058 mode = grp->lg_lacp_mode; 1059 timer = grp->aggr.PeriodicTimer; 1060 1061 port->lp_lacp.ActorAdminPortState.bit.activity = 1062 port->lp_lacp.ActorOperPortState.bit.activity = 1063 (mode == AGGR_LACP_ACTIVE); 1064 1065 port->lp_lacp.ActorAdminPortState.bit.timeout = 1066 port->lp_lacp.ActorOperPortState.bit.timeout = 1067 (timer == AGGR_LACP_TIMER_SHORT); 1068 1069 if (mode == AGGR_LACP_OFF) { 1070 /* Turn ON Collector_Distributor */ 1071 aggr_set_coll_dist(port, B_TRUE); 1072 } else { /* LACP_ACTIVE/PASSIVE */ 1073 lacp_on(port); 1074 } 1075 } 1076 1077 /* 1078 * Sets the initial LACP mode (off, active, passive) and LACP timer 1079 * (short, long) of the specified group. 1080 */ 1081 void 1082 aggr_lacp_set_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode, 1083 aggr_lacp_timer_t timer) 1084 { 1085 aggr_port_t *port; 1086 1087 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 1088 1089 grp->lg_lacp_mode = mode; 1090 grp->aggr.PeriodicTimer = timer; 1091 1092 for (port = grp->lg_ports; port != NULL; port = port->lp_next) 1093 aggr_port_lacp_set_mode(grp, port); 1094 } 1095 1096 /* 1097 * Verify that the Partner MAC and Key recorded by the specified 1098 * port are not found in other ports that are not part of our 1099 * aggregation. Returns B_TRUE if such a port is found, B_FALSE 1100 * otherwise. 1101 */ 1102 static boolean_t 1103 lacp_misconfig_check(aggr_port_t *portp) 1104 { 1105 aggr_grp_t *grp = portp->lp_grp; 1106 lacp_sel_ports_t *cport; 1107 1108 mutex_enter(&lacp_sel_lock); 1109 1110 for (cport = sel_ports; cport != NULL; cport = cport->sp_next) { 1111 1112 /* skip entries of the group of the port being checked */ 1113 if (cport->sp_grp_linkid == grp->lg_linkid) 1114 continue; 1115 1116 if ((ether_cmp(&cport->sp_partner_system, 1117 &grp->aggr.PartnerSystem) == 0) && 1118 (cport->sp_partner_key == grp->aggr.PartnerOperAggrKey)) { 1119 char mac_str[ETHERADDRL*3]; 1120 struct ether_addr *mac = &cport->sp_partner_system; 1121 1122 /* 1123 * The Partner port information is already in use 1124 * by ports in another aggregation so disable this 1125 * port. 1126 */ 1127 1128 (void) snprintf(mac_str, sizeof (mac_str), 1129 "%x:%x:%x:%x:%x:%x", 1130 mac->ether_addr_octet[0], mac->ether_addr_octet[1], 1131 mac->ether_addr_octet[2], mac->ether_addr_octet[3], 1132 mac->ether_addr_octet[4], mac->ether_addr_octet[5]); 1133 1134 portp->lp_lacp.sm.selected = AGGR_UNSELECTED; 1135 1136 cmn_err(CE_NOTE, "aggr %d port %d: Port Partner " 1137 "MAC %s and key %d in use on aggregation %d " 1138 "port %d\n", grp->lg_linkid, portp->lp_linkid, 1139 mac_str, portp->lp_lacp.PartnerOperKey, 1140 cport->sp_grp_linkid, cport->sp_linkid); 1141 break; 1142 } 1143 } 1144 1145 mutex_exit(&lacp_sel_lock); 1146 return (cport != NULL); 1147 } 1148 1149 /* 1150 * Remove the specified port from the list of selected ports. 1151 */ 1152 static void 1153 lacp_sel_ports_del(aggr_port_t *portp) 1154 { 1155 lacp_sel_ports_t *cport, **prev = NULL; 1156 1157 mutex_enter(&lacp_sel_lock); 1158 1159 prev = &sel_ports; 1160 for (cport = sel_ports; cport != NULL; prev = &cport->sp_next, 1161 cport = cport->sp_next) { 1162 if (portp->lp_linkid == cport->sp_linkid) 1163 break; 1164 } 1165 1166 if (cport == NULL) { 1167 mutex_exit(&lacp_sel_lock); 1168 return; 1169 } 1170 1171 *prev = cport->sp_next; 1172 kmem_free(cport, sizeof (*cport)); 1173 1174 mutex_exit(&lacp_sel_lock); 1175 } 1176 1177 /* 1178 * Add the specified port to the list of selected ports. Returns B_FALSE 1179 * if the operation could not be performed due to an memory allocation 1180 * error. 1181 */ 1182 static boolean_t 1183 lacp_sel_ports_add(aggr_port_t *portp) 1184 { 1185 lacp_sel_ports_t *new_port; 1186 lacp_sel_ports_t *cport, **last; 1187 1188 mutex_enter(&lacp_sel_lock); 1189 1190 /* check if port is already in the list */ 1191 last = &sel_ports; 1192 for (cport = sel_ports; cport != NULL; 1193 last = &cport->sp_next, cport = cport->sp_next) { 1194 if (portp->lp_linkid == cport->sp_linkid) { 1195 ASSERT(cport->sp_partner_key == 1196 portp->lp_lacp.PartnerOperKey); 1197 ASSERT(ether_cmp(&cport->sp_partner_system, 1198 &portp->lp_lacp.PartnerOperSystem) == 0); 1199 1200 mutex_exit(&lacp_sel_lock); 1201 return (B_TRUE); 1202 } 1203 } 1204 1205 /* create and initialize new entry */ 1206 new_port = kmem_zalloc(sizeof (lacp_sel_ports_t), KM_NOSLEEP); 1207 if (new_port == NULL) { 1208 mutex_exit(&lacp_sel_lock); 1209 return (B_FALSE); 1210 } 1211 1212 new_port->sp_grp_linkid = portp->lp_grp->lg_linkid; 1213 bcopy(&portp->lp_lacp.PartnerOperSystem, 1214 &new_port->sp_partner_system, sizeof (new_port->sp_partner_system)); 1215 new_port->sp_partner_key = portp->lp_lacp.PartnerOperKey; 1216 new_port->sp_linkid = portp->lp_linkid; 1217 1218 *last = new_port; 1219 1220 mutex_exit(&lacp_sel_lock); 1221 return (B_TRUE); 1222 } 1223 1224 /* 1225 * lacp_selection_logic - LACP selection logic 1226 * Sets the selected variable on a per port basis 1227 * and sets Ready when all waiting ports are ready 1228 * to go online. 1229 * 1230 * parameters: 1231 * - portp - instance this applies to. 1232 * 1233 * invoked: 1234 * - when initialization is needed 1235 * - when UNSELECTED is set from the lacp_receive_sm() in LACP_CURRENT state 1236 * - When the lacp_receive_sm goes to the LACP_DEFAULTED state 1237 * - every time the wait_while_timer pops 1238 * - everytime we turn LACP on/off 1239 */ 1240 static void 1241 lacp_selection_logic(aggr_port_t *portp) 1242 { 1243 aggr_port_t *tpp; 1244 aggr_grp_t *aggrp = portp->lp_grp; 1245 int ports_waiting; 1246 boolean_t reset_mac = B_FALSE; 1247 aggr_lacp_port_t *pl = &portp->lp_lacp; 1248 1249 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh)); 1250 1251 /* LACP_OFF state not in specification so check here. */ 1252 if (!pl->sm.lacp_on) { 1253 lacp_port_unselect(portp); 1254 aggrp->aggr.ready = B_FALSE; 1255 lacp_mux_sm(portp); 1256 return; 1257 } 1258 1259 if (pl->sm.begin || !pl->sm.lacp_enabled || 1260 (portp->lp_state != AGGR_PORT_STATE_ATTACHED)) { 1261 1262 AGGR_LACP_DBG(("lacp_selection_logic:(%d): " 1263 "selected %d-->%d (begin=%d, lacp_enabled = %d, " 1264 "lp_state=%d)\n", portp->lp_linkid, pl->sm.selected, 1265 AGGR_UNSELECTED, pl->sm.begin, pl->sm.lacp_enabled, 1266 portp->lp_state)); 1267 1268 lacp_port_unselect(portp); 1269 aggrp->aggr.ready = B_FALSE; 1270 lacp_mux_sm(portp); 1271 return; 1272 } 1273 1274 /* 1275 * If LACP is not enabled then selected is never set. 1276 */ 1277 if (!pl->sm.lacp_enabled) { 1278 AGGR_LACP_DBG(("lacp_selection_logic:(%d): selected %d-->%d\n", 1279 portp->lp_linkid, pl->sm.selected, AGGR_UNSELECTED)); 1280 1281 lacp_port_unselect(portp); 1282 lacp_mux_sm(portp); 1283 return; 1284 } 1285 1286 /* 1287 * Check if the Partner MAC or Key are zero. If so, we have 1288 * not received any LACP info or it has expired and the 1289 * receive machine is in the LACP_DEFAULTED state. 1290 */ 1291 if (ether_cmp(&pl->PartnerOperSystem, ðerzeroaddr) == 0 || 1292 (pl->PartnerOperKey == 0)) { 1293 1294 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) { 1295 if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem, 1296 ðerzeroaddr) != 0 && 1297 (tpp->lp_lacp.PartnerOperKey != 0)) 1298 break; 1299 } 1300 1301 /* 1302 * If all ports have no key or aggregation address, 1303 * then clear the negotiated Partner MAC and key. 1304 */ 1305 if (tpp == NULL) { 1306 /* Clear the aggregation Partner MAC and key */ 1307 aggrp->aggr.PartnerSystem = etherzeroaddr; 1308 aggrp->aggr.PartnerOperAggrKey = 0; 1309 } 1310 1311 return; 1312 } 1313 1314 /* 1315 * Insure that at least one port in the aggregation 1316 * matches the Partner aggregation MAC and key. If not, 1317 * then clear the aggregation MAC and key. Later we will 1318 * set the Partner aggregation MAC and key to that of the 1319 * current port's Partner MAC and key. 1320 */ 1321 if (ether_cmp(&pl->PartnerOperSystem, 1322 &aggrp->aggr.PartnerSystem) != 0 || 1323 (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) { 1324 1325 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) { 1326 if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem, 1327 &aggrp->aggr.PartnerSystem) == 0 && 1328 (tpp->lp_lacp.PartnerOperKey == 1329 aggrp->aggr.PartnerOperAggrKey)) { 1330 /* Set aggregation Partner MAC and key */ 1331 aggrp->aggr.PartnerSystem = 1332 pl->PartnerOperSystem; 1333 aggrp->aggr.PartnerOperAggrKey = 1334 pl->PartnerOperKey; 1335 break; 1336 } 1337 } 1338 1339 if (tpp == NULL) { 1340 /* Clear the aggregation Partner MAC and key */ 1341 aggrp->aggr.PartnerSystem = etherzeroaddr; 1342 aggrp->aggr.PartnerOperAggrKey = 0; 1343 reset_mac = B_TRUE; 1344 } 1345 } 1346 1347 /* 1348 * If our Actor MAC is found in the Partner MAC 1349 * on this port then we have a loopback misconfiguration. 1350 */ 1351 if (ether_cmp(&pl->PartnerOperSystem, 1352 (struct ether_addr *)&aggrp->lg_addr) == 0) { 1353 cmn_err(CE_NOTE, "trunk link: (%d): Loopback condition.\n", 1354 portp->lp_linkid); 1355 1356 lacp_port_unselect(portp); 1357 lacp_mux_sm(portp); 1358 return; 1359 } 1360 1361 /* 1362 * If our Partner MAC and Key are found on any other 1363 * ports that are not in our aggregation, we have 1364 * a misconfiguration. 1365 */ 1366 if (lacp_misconfig_check(portp)) { 1367 lacp_mux_sm(portp); 1368 return; 1369 } 1370 1371 /* 1372 * If the Aggregation Partner MAC and Key have not been 1373 * set, then this is either the first port or the aggregation 1374 * MAC and key have been reset. In either case we must set 1375 * the values of the Partner MAC and key. 1376 */ 1377 if (ether_cmp(&aggrp->aggr.PartnerSystem, ðerzeroaddr) == 0 && 1378 (aggrp->aggr.PartnerOperAggrKey == 0)) { 1379 /* Set aggregation Partner MAC and key */ 1380 aggrp->aggr.PartnerSystem = pl->PartnerOperSystem; 1381 aggrp->aggr.PartnerOperAggrKey = pl->PartnerOperKey; 1382 1383 /* 1384 * If we reset Partner aggregation MAC, then restart 1385 * selection_logic on ports that match new MAC address. 1386 */ 1387 if (reset_mac) { 1388 for (tpp = aggrp->lg_ports; tpp; tpp = 1389 tpp->lp_next) { 1390 if (tpp == portp) 1391 continue; 1392 if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem, 1393 &aggrp->aggr.PartnerSystem) == 0 && 1394 (tpp->lp_lacp.PartnerOperKey == 1395 aggrp->aggr.PartnerOperAggrKey)) 1396 lacp_selection_logic(tpp); 1397 } 1398 } 1399 } else if (ether_cmp(&pl->PartnerOperSystem, 1400 &aggrp->aggr.PartnerSystem) != 0 || 1401 (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) { 1402 /* 1403 * The Partner port information does not match 1404 * that of the other ports in the aggregation 1405 * so disable this port. 1406 */ 1407 lacp_port_unselect(portp); 1408 1409 cmn_err(CE_NOTE, "trunk link: (%d): Port Partner MAC " 1410 "or key (%d) incompatible with Aggregation Partner " 1411 "MAC or key (%d)\n", portp->lp_linkid, pl->PartnerOperKey, 1412 aggrp->aggr.PartnerOperAggrKey); 1413 1414 lacp_mux_sm(portp); 1415 return; 1416 } 1417 1418 /* If we get to here, automatically set selected */ 1419 if (pl->sm.selected != AGGR_SELECTED) { 1420 AGGR_LACP_DBG(("lacp_selection_logic:(%d): " 1421 "selected %d-->%d\n", portp->lp_linkid, 1422 pl->sm.selected, AGGR_SELECTED)); 1423 if (!lacp_port_select(portp)) 1424 return; 1425 lacp_mux_sm(portp); 1426 } 1427 1428 /* 1429 * From this point onward we have selected the port 1430 * and are simply checking if the Ready flag should 1431 * be set. 1432 */ 1433 1434 /* 1435 * If at least two ports are waiting to aggregate 1436 * and ready_n is set on all ports waiting to aggregate 1437 * then set READY for the aggregation. 1438 */ 1439 1440 ports_waiting = 0; 1441 1442 if (!aggrp->aggr.ready) { 1443 /* 1444 * If all ports in the aggregation have received compatible 1445 * partner information and they match up correctly with the 1446 * switch, there is no need to wait for all the 1447 * wait_while_timers to pop. 1448 */ 1449 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) { 1450 if (((tpp->lp_lacp.sm.mux_state == LACP_WAITING) || 1451 tpp->lp_lacp.sm.begin) && 1452 !tpp->lp_lacp.PartnerOperPortState.bit.sync) { 1453 /* Add up ports uninitialized or waiting */ 1454 ports_waiting++; 1455 if (!tpp->lp_lacp.sm.ready_n) { 1456 DTRACE_PROBE1(port___not__ready, 1457 aggr_port_t *, tpp); 1458 return; 1459 } 1460 } 1461 } 1462 } 1463 1464 if (aggrp->aggr.ready) { 1465 AGGR_LACP_DBG(("lacp_selection_logic:(%d): " 1466 "aggr.ready already set\n", portp->lp_linkid)); 1467 lacp_mux_sm(portp); 1468 } else { 1469 AGGR_LACP_DBG(("lacp_selection_logic:(%d): Ready %d-->%d\n", 1470 portp->lp_linkid, aggrp->aggr.ready, B_TRUE)); 1471 aggrp->aggr.ready = B_TRUE; 1472 1473 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) 1474 lacp_mux_sm(tpp); 1475 } 1476 1477 } 1478 1479 /* 1480 * wait_while_timer_pop - When the timer pops, we arrive here to 1481 * set ready_n and trigger the selection logic. 1482 */ 1483 static void 1484 wait_while_timer_pop(void *data) 1485 { 1486 aggr_port_t *portp = data; 1487 aggr_lacp_port_t *pl = &portp->lp_lacp; 1488 1489 mutex_enter(&pl->lacp_timer_lock); 1490 pl->lacp_timer_bits |= LACP_WAIT_WHILE_TIMEOUT; 1491 cv_broadcast(&pl->lacp_timer_cv); 1492 mutex_exit(&pl->lacp_timer_lock); 1493 } 1494 1495 /* 1496 * wait_while_timer_pop_handler - When the timer pops, we arrive here to 1497 * set ready_n and trigger the selection logic. 1498 */ 1499 static void 1500 wait_while_timer_pop_handler(aggr_port_t *portp) 1501 { 1502 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1503 1504 AGGR_LACP_DBG(("trunk link:(%d): wait_while_timer pop \n", 1505 portp->lp_linkid)); 1506 portp->lp_lacp.sm.ready_n = B_TRUE; 1507 1508 lacp_selection_logic(portp); 1509 } 1510 1511 static void 1512 start_wait_while_timer(aggr_port_t *portp) 1513 { 1514 aggr_lacp_port_t *pl = &portp->lp_lacp; 1515 1516 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1517 1518 mutex_enter(&pl->lacp_timer_lock); 1519 if (pl->wait_while_timer.id == 0) { 1520 pl->wait_while_timer.id = 1521 timeout(wait_while_timer_pop, portp, 1522 drv_usectohz(1000000 * 1523 portp->lp_lacp.wait_while_timer.val)); 1524 } 1525 mutex_exit(&pl->lacp_timer_lock); 1526 } 1527 1528 1529 static void 1530 stop_wait_while_timer(aggr_port_t *portp) 1531 { 1532 aggr_lacp_port_t *pl = &portp->lp_lacp; 1533 timeout_id_t id; 1534 1535 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1536 1537 mutex_enter(&pl->lacp_timer_lock); 1538 if ((id = pl->wait_while_timer.id) != 0) { 1539 pl->lacp_timer_bits &= ~LACP_WAIT_WHILE_TIMEOUT; 1540 pl->wait_while_timer.id = 0; 1541 } 1542 mutex_exit(&pl->lacp_timer_lock); 1543 1544 if (id != 0) 1545 (void) untimeout(id); 1546 } 1547 1548 /* 1549 * Invoked when a port has been attached to a group. 1550 * Complete the processing that couldn't be finished from lacp_on() 1551 * because the port was not started. We know that the link is full 1552 * duplex and ON, otherwise it wouldn't be attached. 1553 */ 1554 void 1555 aggr_lacp_port_attached(aggr_port_t *portp) 1556 { 1557 aggr_grp_t *grp = portp->lp_grp; 1558 aggr_lacp_port_t *pl = &portp->lp_lacp; 1559 1560 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 1561 ASSERT(MAC_PERIM_HELD(portp->lp_mh)); 1562 ASSERT(portp->lp_state == AGGR_PORT_STATE_ATTACHED); 1563 1564 AGGR_LACP_DBG(("aggr_lacp_port_attached: port %d\n", 1565 portp->lp_linkid)); 1566 1567 portp->lp_lacp.sm.port_enabled = B_TRUE; /* link on */ 1568 1569 if (grp->lg_lacp_mode == AGGR_LACP_OFF) 1570 return; 1571 1572 pl->sm.lacp_enabled = B_TRUE; 1573 pl->ActorOperPortState.bit.aggregation = B_TRUE; 1574 pl->sm.begin = B_TRUE; 1575 1576 lacp_receive_sm(portp, NULL); 1577 lacp_mux_sm(portp); 1578 1579 /* Enable Multicast Slow Protocol address */ 1580 aggr_lacp_mcast_on(portp); 1581 1582 /* periodic_sm is started up from the receive machine */ 1583 lacp_selection_logic(portp); 1584 } 1585 1586 /* 1587 * Invoked when a port has been detached from a group. Turn off 1588 * LACP processing if it was enabled. 1589 */ 1590 void 1591 aggr_lacp_port_detached(aggr_port_t *portp) 1592 { 1593 aggr_grp_t *grp = portp->lp_grp; 1594 1595 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 1596 ASSERT(MAC_PERIM_HELD(portp->lp_mh)); 1597 1598 AGGR_LACP_DBG(("aggr_lacp_port_detached: port %d\n", 1599 portp->lp_linkid)); 1600 1601 portp->lp_lacp.sm.port_enabled = B_FALSE; 1602 1603 if (grp->lg_lacp_mode == AGGR_LACP_OFF) 1604 return; 1605 1606 portp->lp_lacp.sm.lacp_enabled = B_FALSE; 1607 lacp_selection_logic(portp); 1608 lacp_mux_sm(portp); 1609 lacp_periodic_sm(portp); 1610 1611 /* 1612 * Disable Slow Protocol Timers. 1613 */ 1614 stop_periodic_timer(portp); 1615 stop_current_while_timer(portp); 1616 stop_wait_while_timer(portp); 1617 1618 /* Disable Multicast Slow Protocol address */ 1619 aggr_lacp_mcast_off(portp); 1620 aggr_set_coll_dist(portp, B_FALSE); 1621 } 1622 1623 /* 1624 * Enable Slow Protocol LACP and Marker PDUs. 1625 */ 1626 static void 1627 lacp_on(aggr_port_t *portp) 1628 { 1629 aggr_lacp_port_t *pl = &portp->lp_lacp; 1630 mac_perim_handle_t mph; 1631 1632 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1633 1634 mac_perim_enter_by_mh(portp->lp_mh, &mph); 1635 1636 /* 1637 * Reset the state machines and Partner operational 1638 * information. Careful to not reset things like 1639 * our link state. 1640 */ 1641 lacp_reset_port(portp); 1642 pl->sm.lacp_on = B_TRUE; 1643 1644 AGGR_LACP_DBG(("lacp_on:(%d): \n", portp->lp_linkid)); 1645 1646 if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) { 1647 pl->sm.port_enabled = B_TRUE; 1648 pl->sm.lacp_enabled = B_TRUE; 1649 pl->ActorOperPortState.bit.aggregation = B_TRUE; 1650 } 1651 1652 lacp_receive_sm(portp, NULL); 1653 lacp_mux_sm(portp); 1654 1655 if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) { 1656 /* Enable Multicast Slow Protocol address */ 1657 aggr_lacp_mcast_on(portp); 1658 1659 /* periodic_sm is started up from the receive machine */ 1660 lacp_selection_logic(portp); 1661 } 1662 mac_perim_exit(mph); 1663 } /* lacp_on */ 1664 1665 /* Disable Slow Protocol LACP and Marker PDUs */ 1666 static void 1667 lacp_off(aggr_port_t *portp) 1668 { 1669 aggr_lacp_port_t *pl = &portp->lp_lacp; 1670 mac_perim_handle_t mph; 1671 1672 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1673 mac_perim_enter_by_mh(portp->lp_mh, &mph); 1674 1675 pl->sm.lacp_on = B_FALSE; 1676 1677 AGGR_LACP_DBG(("lacp_off:(%d): \n", portp->lp_linkid)); 1678 1679 if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) { 1680 /* 1681 * Disable Slow Protocol Timers. 1682 */ 1683 stop_periodic_timer(portp); 1684 stop_current_while_timer(portp); 1685 stop_wait_while_timer(portp); 1686 1687 /* Disable Multicast Slow Protocol address */ 1688 aggr_lacp_mcast_off(portp); 1689 1690 pl->sm.port_enabled = B_FALSE; 1691 pl->sm.lacp_enabled = B_FALSE; 1692 pl->ActorOperPortState.bit.aggregation = B_FALSE; 1693 } 1694 1695 lacp_mux_sm(portp); 1696 lacp_periodic_sm(portp); 1697 lacp_selection_logic(portp); 1698 1699 /* Turn OFF Collector_Distributor */ 1700 aggr_set_coll_dist(portp, B_FALSE); 1701 1702 lacp_reset_port(portp); 1703 mac_perim_exit(mph); 1704 } 1705 1706 1707 static boolean_t 1708 valid_lacp_pdu(aggr_port_t *portp, lacp_t *lacp) 1709 { 1710 /* 1711 * 43.4.12 - "a Receive machine shall not validate 1712 * the Version Number, TLV_type, or Reserved fields in received 1713 * LACPDUs." 1714 * ... "a Receive machine may validate the Actor_Information_Length, 1715 * Partner_Information_Length, Collector_Information_Length, 1716 * or Terminator_Length fields." 1717 */ 1718 if ((lacp->actor_info.information_len != sizeof (link_info_t)) || 1719 (lacp->partner_info.information_len != sizeof (link_info_t)) || 1720 (lacp->collector_len != LACP_COLLECTOR_INFO_LEN) || 1721 (lacp->terminator_len != LACP_TERMINATOR_INFO_LEN)) { 1722 AGGR_LACP_DBG(("trunk link (%d): Malformed LACPDU: " 1723 " Terminator Length = %d \n", portp->lp_linkid, 1724 lacp->terminator_len)); 1725 return (B_FALSE); 1726 } 1727 1728 return (B_TRUE); 1729 } 1730 1731 1732 static void 1733 start_current_while_timer(aggr_port_t *portp, uint_t time) 1734 { 1735 aggr_lacp_port_t *pl = &portp->lp_lacp; 1736 1737 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1738 1739 mutex_enter(&pl->lacp_timer_lock); 1740 if (pl->current_while_timer.id == 0) { 1741 if (time > 0) 1742 pl->current_while_timer.val = time; 1743 else if (pl->ActorOperPortState.bit.timeout) 1744 pl->current_while_timer.val = SHORT_TIMEOUT_TIME; 1745 else 1746 pl->current_while_timer.val = LONG_TIMEOUT_TIME; 1747 1748 pl->current_while_timer.id = 1749 timeout(current_while_timer_pop, portp, 1750 drv_usectohz((clock_t)1000000 * 1751 (clock_t)portp->lp_lacp.current_while_timer.val)); 1752 } 1753 mutex_exit(&pl->lacp_timer_lock); 1754 } 1755 1756 1757 static void 1758 stop_current_while_timer(aggr_port_t *portp) 1759 { 1760 aggr_lacp_port_t *pl = &portp->lp_lacp; 1761 timeout_id_t id; 1762 1763 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1764 1765 mutex_enter(&pl->lacp_timer_lock); 1766 if ((id = pl->current_while_timer.id) != 0) { 1767 pl->lacp_timer_bits &= ~LACP_CURRENT_WHILE_TIMEOUT; 1768 pl->current_while_timer.id = 0; 1769 } 1770 mutex_exit(&pl->lacp_timer_lock); 1771 1772 if (id != 0) 1773 (void) untimeout(id); 1774 } 1775 1776 static void 1777 current_while_timer_pop(void *data) 1778 { 1779 aggr_port_t *portp = (aggr_port_t *)data; 1780 aggr_lacp_port_t *pl = &portp->lp_lacp; 1781 1782 mutex_enter(&pl->lacp_timer_lock); 1783 pl->lacp_timer_bits |= LACP_CURRENT_WHILE_TIMEOUT; 1784 cv_broadcast(&pl->lacp_timer_cv); 1785 mutex_exit(&pl->lacp_timer_lock); 1786 } 1787 1788 static void 1789 current_while_timer_pop_handler(aggr_port_t *portp) 1790 { 1791 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1792 1793 AGGR_LACP_DBG(("trunk link:(%d): current_while_timer " 1794 "pop id=%p\n", portp->lp_linkid, 1795 portp->lp_lacp.current_while_timer.id)); 1796 1797 lacp_receive_sm(portp, NULL); 1798 } 1799 1800 /* 1801 * record_Default - Simply copies over administrative values 1802 * to the partner operational values, and sets our state to indicate we 1803 * are using defaulted values. 1804 */ 1805 static void 1806 record_Default(aggr_port_t *portp) 1807 { 1808 aggr_lacp_port_t *pl = &portp->lp_lacp; 1809 1810 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1811 1812 pl->PartnerOperPortNum = pl->PartnerAdminPortNum; 1813 pl->PartnerOperPortPriority = pl->PartnerAdminPortPriority; 1814 pl->PartnerOperSystem = pl->PartnerAdminSystem; 1815 pl->PartnerOperSysPriority = pl->PartnerAdminSysPriority; 1816 pl->PartnerOperKey = pl->PartnerAdminKey; 1817 pl->PartnerOperPortState.state = pl->PartnerAdminPortState.state; 1818 1819 pl->ActorOperPortState.bit.defaulted = B_TRUE; 1820 } 1821 1822 1823 /* Returns B_TRUE on sync value changing */ 1824 static boolean_t 1825 record_PDU(aggr_port_t *portp, lacp_t *lacp) 1826 { 1827 aggr_grp_t *aggrp = portp->lp_grp; 1828 aggr_lacp_port_t *pl = &portp->lp_lacp; 1829 uint8_t save_sync; 1830 1831 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh)); 1832 1833 /* 1834 * Partner Information 1835 */ 1836 pl->PartnerOperPortNum = ntohs(lacp->actor_info.port); 1837 pl->PartnerOperPortPriority = 1838 ntohs(lacp->actor_info.port_priority); 1839 pl->PartnerOperSystem = lacp->actor_info.system_id; 1840 pl->PartnerOperSysPriority = 1841 htons(lacp->actor_info.system_priority); 1842 pl->PartnerOperKey = ntohs(lacp->actor_info.key); 1843 1844 /* All state info except for Synchronization */ 1845 save_sync = pl->PartnerOperPortState.bit.sync; 1846 pl->PartnerOperPortState.state = lacp->actor_info.state.state; 1847 1848 /* Defaulted set to FALSE */ 1849 pl->ActorOperPortState.bit.defaulted = B_FALSE; 1850 1851 /* 1852 * 43.4.9 - (Partner_Port, Partner_Port_Priority, Partner_system, 1853 * Partner_System_Priority, Partner_Key, and 1854 * Partner_State.Aggregation) are compared to the 1855 * corresponding operations paramters values for 1856 * the Actor. If these are equal, or if this is 1857 * an individual link, we are synchronized. 1858 */ 1859 if (((ntohs(lacp->partner_info.port) == pl->ActorPortNumber) && 1860 (ntohs(lacp->partner_info.port_priority) == 1861 pl->ActorPortPriority) && 1862 (ether_cmp(&lacp->partner_info.system_id, 1863 (struct ether_addr *)&aggrp->lg_addr) == 0) && 1864 (ntohs(lacp->partner_info.system_priority) == 1865 aggrp->aggr.ActorSystemPriority) && 1866 (ntohs(lacp->partner_info.key) == pl->ActorOperPortKey) && 1867 (lacp->partner_info.state.bit.aggregation == 1868 pl->ActorOperPortState.bit.aggregation)) || 1869 (!lacp->actor_info.state.bit.aggregation)) { 1870 1871 pl->PartnerOperPortState.bit.sync = 1872 lacp->actor_info.state.bit.sync; 1873 } else { 1874 pl->PartnerOperPortState.bit.sync = B_FALSE; 1875 } 1876 1877 if (save_sync != pl->PartnerOperPortState.bit.sync) { 1878 AGGR_LACP_DBG(("record_PDU:(%d): partner sync " 1879 "%d -->%d\n", portp->lp_linkid, save_sync, 1880 pl->PartnerOperPortState.bit.sync)); 1881 return (B_TRUE); 1882 } else { 1883 return (B_FALSE); 1884 } 1885 } 1886 1887 1888 /* 1889 * update_selected - If any of the Partner parameters has 1890 * changed from a previous value, then 1891 * unselect the link from the aggregator. 1892 */ 1893 static boolean_t 1894 update_selected(aggr_port_t *portp, lacp_t *lacp) 1895 { 1896 aggr_lacp_port_t *pl = &portp->lp_lacp; 1897 1898 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1899 1900 if ((pl->PartnerOperPortNum != ntohs(lacp->actor_info.port)) || 1901 (pl->PartnerOperPortPriority != 1902 ntohs(lacp->actor_info.port_priority)) || 1903 (ether_cmp(&pl->PartnerOperSystem, 1904 &lacp->actor_info.system_id) != 0) || 1905 (pl->PartnerOperSysPriority != 1906 ntohs(lacp->actor_info.system_priority)) || 1907 (pl->PartnerOperKey != ntohs(lacp->actor_info.key)) || 1908 (pl->PartnerOperPortState.bit.aggregation != 1909 lacp->actor_info.state.bit.aggregation)) { 1910 AGGR_LACP_DBG(("update_selected:(%d): " 1911 "selected %d-->%d\n", portp->lp_linkid, pl->sm.selected, 1912 AGGR_UNSELECTED)); 1913 1914 lacp_port_unselect(portp); 1915 return (B_TRUE); 1916 } else { 1917 return (B_FALSE); 1918 } 1919 } 1920 1921 1922 /* 1923 * update_default_selected - If any of the operational Partner parameters 1924 * is different than that of the administrative values 1925 * then unselect the link from the aggregator. 1926 */ 1927 static void 1928 update_default_selected(aggr_port_t *portp) 1929 { 1930 aggr_lacp_port_t *pl = &portp->lp_lacp; 1931 1932 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1933 1934 if ((pl->PartnerAdminPortNum != pl->PartnerOperPortNum) || 1935 (pl->PartnerOperPortPriority != pl->PartnerAdminPortPriority) || 1936 (ether_cmp(&pl->PartnerOperSystem, &pl->PartnerAdminSystem) != 0) || 1937 (pl->PartnerOperSysPriority != pl->PartnerAdminSysPriority) || 1938 (pl->PartnerOperKey != pl->PartnerAdminKey) || 1939 (pl->PartnerOperPortState.bit.aggregation != 1940 pl->PartnerAdminPortState.bit.aggregation)) { 1941 1942 AGGR_LACP_DBG(("update_default_selected:(%d): " 1943 "selected %d-->%d\n", portp->lp_linkid, 1944 pl->sm.selected, AGGR_UNSELECTED)); 1945 1946 lacp_port_unselect(portp); 1947 } 1948 } 1949 1950 1951 /* 1952 * update_NTT - If any of the Partner values in the received LACPDU 1953 * are different than that of the Actor operational 1954 * values then set NTT to true. 1955 */ 1956 static void 1957 update_NTT(aggr_port_t *portp, lacp_t *lacp) 1958 { 1959 aggr_grp_t *aggrp = portp->lp_grp; 1960 aggr_lacp_port_t *pl = &portp->lp_lacp; 1961 1962 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh)); 1963 1964 if ((pl->ActorPortNumber != ntohs(lacp->partner_info.port)) || 1965 (pl->ActorPortPriority != 1966 ntohs(lacp->partner_info.port_priority)) || 1967 (ether_cmp(&aggrp->lg_addr, 1968 &lacp->partner_info.system_id) != 0) || 1969 (aggrp->aggr.ActorSystemPriority != 1970 ntohs(lacp->partner_info.system_priority)) || 1971 (pl->ActorOperPortKey != ntohs(lacp->partner_info.key)) || 1972 (pl->ActorOperPortState.bit.activity != 1973 lacp->partner_info.state.bit.activity) || 1974 (pl->ActorOperPortState.bit.timeout != 1975 lacp->partner_info.state.bit.timeout) || 1976 (pl->ActorOperPortState.bit.sync != 1977 lacp->partner_info.state.bit.sync) || 1978 (pl->ActorOperPortState.bit.aggregation != 1979 lacp->partner_info.state.bit.aggregation)) { 1980 1981 AGGR_LACP_DBG(("update_NTT:(%d): NTT %d-->%d\n", 1982 portp->lp_linkid, pl->NTT, B_TRUE)); 1983 1984 pl->NTT = B_TRUE; 1985 } 1986 } 1987 1988 /* 1989 * lacp_receive_sm - LACP receive state machine 1990 * 1991 * parameters: 1992 * - portp - instance this applies to. 1993 * - lacp - pointer in the case of a received LACPDU. 1994 * This value is NULL if there is no LACPDU. 1995 * 1996 * invoked: 1997 * - when initialization is needed 1998 * - upon reception of an LACPDU. This is the common case. 1999 * - every time the current_while_timer pops 2000 */ 2001 static void 2002 lacp_receive_sm(aggr_port_t *portp, lacp_t *lacp) 2003 { 2004 boolean_t sync_updated, selected_updated, save_activity; 2005 aggr_lacp_port_t *pl = &portp->lp_lacp; 2006 lacp_receive_state_t oldstate = pl->sm.receive_state; 2007 2008 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 2009 2010 /* LACP_OFF state not in specification so check here. */ 2011 if (!pl->sm.lacp_on) 2012 return; 2013 2014 /* figure next state */ 2015 if (pl->sm.begin || pl->sm.port_moved) { 2016 pl->sm.receive_state = LACP_INITIALIZE; 2017 } else if (!pl->sm.port_enabled) { /* DL_NOTE_LINK_DOWN */ 2018 pl->sm.receive_state = LACP_PORT_DISABLED; 2019 } else if (!pl->sm.lacp_enabled) { /* DL_NOTE_AGGR_UNAVAIL */ 2020 pl->sm.receive_state = 2021 (pl->sm.receive_state == LACP_PORT_DISABLED) ? 2022 LACP_DISABLED : LACP_PORT_DISABLED; 2023 } else if (lacp != NULL) { 2024 if ((pl->sm.receive_state == LACP_EXPIRED) || 2025 (pl->sm.receive_state == LACP_DEFAULTED)) { 2026 pl->sm.receive_state = LACP_CURRENT; 2027 } 2028 } else if ((pl->sm.receive_state == LACP_CURRENT) && 2029 (pl->current_while_timer.id == 0)) { 2030 pl->sm.receive_state = LACP_EXPIRED; 2031 } else if ((pl->sm.receive_state == LACP_EXPIRED) && 2032 (pl->current_while_timer.id == 0)) { 2033 pl->sm.receive_state = LACP_DEFAULTED; 2034 } 2035 2036 if (!((lacp && (oldstate == LACP_CURRENT) && 2037 (pl->sm.receive_state == LACP_CURRENT)))) { 2038 AGGR_LACP_DBG(("lacp_receive_sm(%d):%s--->%s\n", 2039 portp->lp_linkid, lacp_receive_str[oldstate], 2040 lacp_receive_str[pl->sm.receive_state])); 2041 } 2042 2043 switch (pl->sm.receive_state) { 2044 case LACP_INITIALIZE: 2045 lacp_port_unselect(portp); 2046 record_Default(portp); 2047 pl->ActorOperPortState.bit.expired = B_FALSE; 2048 pl->sm.port_moved = B_FALSE; 2049 pl->sm.receive_state = LACP_PORT_DISABLED; 2050 pl->sm.begin = B_FALSE; 2051 lacp_receive_sm(portp, NULL); 2052 break; 2053 2054 case LACP_PORT_DISABLED: 2055 pl->PartnerOperPortState.bit.sync = B_FALSE; 2056 /* 2057 * Stop current_while_timer in case 2058 * we got here from link down 2059 */ 2060 stop_current_while_timer(portp); 2061 2062 if (pl->sm.port_enabled && !pl->sm.lacp_enabled) { 2063 pl->sm.receive_state = LACP_DISABLED; 2064 lacp_receive_sm(portp, lacp); 2065 /* We goto LACP_DISABLED state */ 2066 break; 2067 } else if (pl->sm.port_enabled && pl->sm.lacp_enabled) { 2068 pl->sm.receive_state = LACP_EXPIRED; 2069 /* 2070 * FALL THROUGH TO LACP_EXPIRED CASE: 2071 * We have no way of knowing if we get into 2072 * lacp_receive_sm() from a current_while_timer 2073 * expiring as it has never been kicked off yet! 2074 */ 2075 } else { 2076 /* We stay in LACP_PORT_DISABLED state */ 2077 break; 2078 } 2079 /* LACP_PORT_DISABLED -> LACP_EXPIRED */ 2080 /* FALLTHROUGH */ 2081 2082 case LACP_EXPIRED: 2083 /* 2084 * Arrives here from LACP_PORT_DISABLED state as well as 2085 * as well as current_while_timer expiring. 2086 */ 2087 pl->PartnerOperPortState.bit.sync = B_FALSE; 2088 pl->PartnerOperPortState.bit.timeout = B_TRUE; 2089 2090 pl->ActorOperPortState.bit.expired = B_TRUE; 2091 start_current_while_timer(portp, SHORT_TIMEOUT_TIME); 2092 lacp_periodic_sm(portp); 2093 break; 2094 2095 case LACP_DISABLED: 2096 /* 2097 * This is the normal state for recv_sm when LACP_OFF 2098 * is set or the NIC is in half duplex mode. 2099 */ 2100 lacp_port_unselect(portp); 2101 record_Default(portp); 2102 pl->PartnerOperPortState.bit.aggregation = B_FALSE; 2103 pl->ActorOperPortState.bit.expired = B_FALSE; 2104 break; 2105 2106 case LACP_DEFAULTED: 2107 /* 2108 * Current_while_timer expired a second time. 2109 */ 2110 update_default_selected(portp); 2111 record_Default(portp); /* overwrite Partner Oper val */ 2112 pl->ActorOperPortState.bit.expired = B_FALSE; 2113 pl->PartnerOperPortState.bit.sync = B_TRUE; 2114 2115 lacp_selection_logic(portp); 2116 lacp_mux_sm(portp); 2117 break; 2118 2119 case LACP_CURRENT: 2120 /* 2121 * Reception of LACPDU 2122 */ 2123 2124 if (!lacp) /* no LACPDU so current_while_timer popped */ 2125 break; 2126 2127 AGGR_LACP_DBG(("lacp_receive_sm: (%d): LACPDU received:\n", 2128 portp->lp_linkid)); 2129 2130 /* 2131 * Validate Actor_Information_Length, 2132 * Partner_Information_Length, Collector_Information_Length, 2133 * and Terminator_Length fields. 2134 */ 2135 if (!valid_lacp_pdu(portp, lacp)) { 2136 AGGR_LACP_DBG(("lacp_receive_sm (%d): " 2137 "Invalid LACPDU received\n", 2138 portp->lp_linkid)); 2139 break; 2140 } 2141 2142 save_activity = pl->PartnerOperPortState.bit.activity; 2143 selected_updated = update_selected(portp, lacp); 2144 update_NTT(portp, lacp); 2145 sync_updated = record_PDU(portp, lacp); 2146 2147 pl->ActorOperPortState.bit.expired = B_FALSE; 2148 2149 if (selected_updated) { 2150 lacp_selection_logic(portp); 2151 lacp_mux_sm(portp); 2152 } else if (sync_updated) { 2153 lacp_mux_sm(portp); 2154 } 2155 2156 /* 2157 * If the periodic timer value bit has been modified 2158 * or the partner activity bit has been changed then 2159 * we need to respectively: 2160 * - restart the timer with the proper timeout value. 2161 * - possibly enable/disable transmission of LACPDUs. 2162 */ 2163 if ((pl->PartnerOperPortState.bit.timeout && 2164 (pl->periodic_timer.val != FAST_PERIODIC_TIME)) || 2165 (!pl->PartnerOperPortState.bit.timeout && 2166 (pl->periodic_timer.val != SLOW_PERIODIC_TIME)) || 2167 (pl->PartnerOperPortState.bit.activity != 2168 save_activity)) { 2169 lacp_periodic_sm(portp); 2170 } 2171 2172 stop_current_while_timer(portp); 2173 /* Check if we need to transmit an LACPDU */ 2174 if (pl->NTT) 2175 lacp_xmit_sm(portp); 2176 start_current_while_timer(portp, 0); 2177 2178 break; 2179 } 2180 } 2181 2182 static void 2183 aggr_set_coll_dist(aggr_port_t *portp, boolean_t enable) 2184 { 2185 mac_perim_handle_t mph; 2186 2187 AGGR_LACP_DBG(("AGGR_SET_COLL_DIST_TYPE: (%d) %s\n", 2188 portp->lp_linkid, enable ? "ENABLED" : "DISABLED")); 2189 2190 mac_perim_enter_by_mh(portp->lp_mh, &mph); 2191 if (!enable) { 2192 /* 2193 * Turn OFF Collector_Distributor. 2194 */ 2195 portp->lp_collector_enabled = B_FALSE; 2196 aggr_send_port_disable(portp); 2197 goto done; 2198 } 2199 2200 /* 2201 * Turn ON Collector_Distributor. 2202 */ 2203 2204 if (!portp->lp_lacp.sm.lacp_on || (portp->lp_lacp.sm.lacp_on && 2205 (portp->lp_lacp.sm.mux_state == LACP_COLLECTING_DISTRIBUTING))) { 2206 /* Port is compatible and can be aggregated */ 2207 portp->lp_collector_enabled = B_TRUE; 2208 aggr_send_port_enable(portp); 2209 } 2210 2211 done: 2212 mac_perim_exit(mph); 2213 } 2214 2215 /* 2216 * Because the LACP packet processing needs to enter the aggr's mac perimeter 2217 * and that would potentially cause a deadlock with the thread in which the 2218 * grp/port is deleted, we defer the packet process to a worker thread. Here 2219 * we only enqueue the received Marker or LACPDU for later processing. 2220 */ 2221 void 2222 aggr_lacp_rx_enqueue(aggr_port_t *portp, mblk_t *dmp) 2223 { 2224 aggr_grp_t *grp = portp->lp_grp; 2225 lacp_t *lacp; 2226 2227 dmp->b_rptr += sizeof (struct ether_header); 2228 2229 if (MBLKL(dmp) < sizeof (lacp_t)) { 2230 freemsg(dmp); 2231 return; 2232 } 2233 2234 lacp = (lacp_t *)dmp->b_rptr; 2235 if (lacp->subtype != LACP_SUBTYPE && lacp->subtype != MARKER_SUBTYPE) { 2236 AGGR_LACP_DBG(("aggr_lacp_rx_enqueue: (%d): " 2237 "Unknown Slow Protocol type %d\n", 2238 portp->lp_linkid, lacp->subtype)); 2239 freemsg(dmp); 2240 return; 2241 } 2242 2243 mutex_enter(&grp->lg_lacp_lock); 2244 2245 /* 2246 * If the lg_lacp_done is set, this aggregation is in the process of 2247 * being deleted, return directly. 2248 */ 2249 if (grp->lg_lacp_done) { 2250 mutex_exit(&grp->lg_lacp_lock); 2251 freemsg(dmp); 2252 return; 2253 } 2254 2255 if (grp->lg_lacp_tail == NULL) { 2256 grp->lg_lacp_head = grp->lg_lacp_tail = dmp; 2257 } else { 2258 grp->lg_lacp_tail->b_next = dmp; 2259 grp->lg_lacp_tail = dmp; 2260 } 2261 2262 /* 2263 * Hold a reference of the port so that the port won't be freed when it 2264 * is removed from the aggr. The b_prev field is borrowed to save the 2265 * port information. 2266 */ 2267 AGGR_PORT_REFHOLD(portp); 2268 dmp->b_prev = (mblk_t *)portp; 2269 cv_broadcast(&grp->lg_lacp_cv); 2270 mutex_exit(&grp->lg_lacp_lock); 2271 } 2272 2273 static void 2274 aggr_lacp_rx(mblk_t *dmp) 2275 { 2276 aggr_port_t *portp = (aggr_port_t *)dmp->b_prev; 2277 mac_perim_handle_t mph; 2278 lacp_t *lacp; 2279 2280 dmp->b_prev = NULL; 2281 2282 mac_perim_enter_by_mh(portp->lp_grp->lg_mh, &mph); 2283 if (portp->lp_closing) 2284 goto done; 2285 2286 lacp = (lacp_t *)dmp->b_rptr; 2287 switch (lacp->subtype) { 2288 case LACP_SUBTYPE: 2289 AGGR_LACP_DBG(("aggr_lacp_rx:(%d): LACPDU received.\n", 2290 portp->lp_linkid)); 2291 2292 if (!portp->lp_lacp.sm.lacp_on) { 2293 break; 2294 } 2295 lacp_receive_sm(portp, lacp); 2296 break; 2297 2298 case MARKER_SUBTYPE: 2299 AGGR_LACP_DBG(("aggr_lacp_rx:(%d): Marker Packet received.\n", 2300 portp->lp_linkid)); 2301 2302 if (receive_marker_pdu(portp, dmp) != 0) 2303 break; 2304 2305 /* Send the packet over the first TX ring */ 2306 dmp = mac_hwring_send_priv(portp->lp_mch, 2307 portp->lp_tx_rings[0], dmp); 2308 if (dmp != NULL) 2309 freemsg(dmp); 2310 mac_perim_exit(mph); 2311 AGGR_PORT_REFRELE(portp); 2312 return; 2313 } 2314 2315 done: 2316 mac_perim_exit(mph); 2317 AGGR_PORT_REFRELE(portp); 2318 freemsg(dmp); 2319 } 2320 2321 void 2322 aggr_lacp_rx_thread(void *arg) 2323 { 2324 callb_cpr_t cprinfo; 2325 aggr_grp_t *grp = (aggr_grp_t *)arg; 2326 aggr_port_t *port; 2327 mblk_t *mp, *nextmp; 2328 2329 CALLB_CPR_INIT(&cprinfo, &grp->lg_lacp_lock, callb_generic_cpr, 2330 "aggr_lacp_rx_thread"); 2331 2332 mutex_enter(&grp->lg_lacp_lock); 2333 2334 /* 2335 * Quit the thread if the grp is deleted. 2336 */ 2337 while (!grp->lg_lacp_done) { 2338 if ((mp = grp->lg_lacp_head) == NULL) { 2339 CALLB_CPR_SAFE_BEGIN(&cprinfo); 2340 cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock); 2341 CALLB_CPR_SAFE_END(&cprinfo, &grp->lg_lacp_lock); 2342 continue; 2343 } 2344 2345 grp->lg_lacp_head = grp->lg_lacp_tail = NULL; 2346 mutex_exit(&grp->lg_lacp_lock); 2347 2348 while (mp != NULL) { 2349 nextmp = mp->b_next; 2350 mp->b_next = NULL; 2351 aggr_lacp_rx(mp); 2352 mp = nextmp; 2353 } 2354 mutex_enter(&grp->lg_lacp_lock); 2355 } 2356 2357 /* 2358 * The grp is being destroyed, simply free all of the LACP messages 2359 * left in the queue which did not have the chance to be processed. 2360 * We cannot use freemsgchain() here since we need to clear the 2361 * b_prev field. 2362 */ 2363 for (mp = grp->lg_lacp_head; mp != NULL; mp = nextmp) { 2364 port = (aggr_port_t *)mp->b_prev; 2365 AGGR_PORT_REFRELE(port); 2366 nextmp = mp->b_next; 2367 mp->b_next = NULL; 2368 mp->b_prev = NULL; 2369 freemsg(mp); 2370 } 2371 2372 grp->lg_lacp_head = grp->lg_lacp_tail = NULL; 2373 grp->lg_lacp_rx_thread = NULL; 2374 cv_broadcast(&grp->lg_lacp_cv); 2375 CALLB_CPR_EXIT(&cprinfo); 2376 thread_exit(); 2377 } 2378