1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * IEEE 802.3ad Link Aggregation - LACP & Marker Protocol processing. 28 */ 29 30 #include <sys/types.h> 31 #include <sys/sysmacros.h> 32 #include <sys/callb.h> 33 #include <sys/conf.h> 34 #include <sys/cmn_err.h> 35 #include <sys/disp.h> 36 #include <sys/list.h> 37 #include <sys/ksynch.h> 38 #include <sys/kmem.h> 39 #include <sys/stream.h> 40 #include <sys/modctl.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/atomic.h> 44 #include <sys/stat.h> 45 #include <sys/byteorder.h> 46 #include <sys/strsun.h> 47 #include <sys/isa_defs.h> 48 #include <sys/sdt.h> 49 50 #include <sys/aggr.h> 51 #include <sys/aggr_impl.h> 52 53 static struct ether_addr etherzeroaddr = { 54 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 55 }; 56 57 /* 58 * Slow_Protocol_Multicast address, as per IEEE 802.3ad spec. 59 */ 60 static struct ether_addr slow_multicast_addr = { 61 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 62 }; 63 64 #ifdef DEBUG 65 /* LACP state machine debugging support */ 66 static uint32_t aggr_lacp_debug = 0; 67 #define AGGR_LACP_DBG(x) if (aggr_lacp_debug) { (void) printf x; } 68 #else 69 #define AGGR_LACP_DBG(x) {} 70 #endif /* DEBUG */ 71 72 #define NSECS_PER_SEC 1000000000ll 73 74 /* used by lacp_misconfig_walker() */ 75 typedef struct lacp_misconfig_check_state_s { 76 aggr_port_t *cs_portp; 77 boolean_t cs_found; 78 } lacp_misconfig_check_state_t; 79 80 static const char *lacp_receive_str[] = LACP_RECEIVE_STATE_STRINGS; 81 static const char *lacp_periodic_str[] = LACP_PERIODIC_STRINGS; 82 static const char *lacp_mux_str[] = LACP_MUX_STRINGS; 83 84 static uint16_t lacp_port_priority = 0x1000; 85 static uint16_t lacp_system_priority = 0x1000; 86 87 /* 88 * Maintains a list of all ports in ATTACHED state. This information 89 * is used to detect misconfiguration. 90 */ 91 typedef struct lacp_sel_ports { 92 datalink_id_t sp_grp_linkid; 93 datalink_id_t sp_linkid; 94 /* Note: sp_partner_system must be 2-byte aligned */ 95 struct ether_addr sp_partner_system; 96 uint32_t sp_partner_key; 97 struct lacp_sel_ports *sp_next; 98 } lacp_sel_ports_t; 99 100 static lacp_sel_ports_t *sel_ports = NULL; 101 static kmutex_t lacp_sel_lock; 102 103 static void periodic_timer_pop(void *); 104 static void periodic_timer_pop_handler(aggr_port_t *); 105 static void lacp_xmit_sm(aggr_port_t *); 106 static void lacp_periodic_sm(aggr_port_t *); 107 static void fill_lacp_pdu(aggr_port_t *, lacp_t *); 108 static void fill_lacp_ether(aggr_port_t *, struct ether_header *); 109 static void lacp_on(aggr_port_t *); 110 static void lacp_off(aggr_port_t *); 111 static boolean_t valid_lacp_pdu(aggr_port_t *, lacp_t *); 112 static void lacp_receive_sm(aggr_port_t *, lacp_t *); 113 static void aggr_set_coll_dist(aggr_port_t *, boolean_t); 114 static void start_wait_while_timer(aggr_port_t *); 115 static void stop_wait_while_timer(aggr_port_t *); 116 static void lacp_reset_port(aggr_port_t *); 117 static void stop_current_while_timer(aggr_port_t *); 118 static void current_while_timer_pop(void *); 119 static void current_while_timer_pop_handler(aggr_port_t *); 120 static void update_default_selected(aggr_port_t *); 121 static boolean_t update_selected(aggr_port_t *, lacp_t *); 122 static boolean_t lacp_sel_ports_add(aggr_port_t *); 123 static void lacp_sel_ports_del(aggr_port_t *); 124 static void wait_while_timer_pop(void *); 125 static void wait_while_timer_pop_handler(aggr_port_t *); 126 127 void 128 aggr_lacp_init(void) 129 { 130 mutex_init(&lacp_sel_lock, NULL, MUTEX_DEFAULT, NULL); 131 } 132 133 void 134 aggr_lacp_fini(void) 135 { 136 mutex_destroy(&lacp_sel_lock); 137 } 138 139 /* 140 * The following functions are used for handling LACP timers. 141 * 142 * Note that we cannot fully rely on the aggr's mac perimeter in the timeout 143 * handler routine, otherwise it may cause deadlock with the untimeout() call 144 * which is usually called with the mac perimeter held. Instead, a 145 * lacp_timer_lock mutex is introduced, which protects a bitwise flag 146 * (lacp_timer_bits). This flag is set/cleared by timeout()/stop_timer() 147 * routines and is checked by a dedicated thread, that executes the real 148 * timeout operation. 149 */ 150 static void 151 aggr_port_timer_thread(void *arg) 152 { 153 aggr_port_t *port = arg; 154 aggr_lacp_port_t *pl = &port->lp_lacp; 155 aggr_grp_t *grp = port->lp_grp; 156 uint32_t lacp_timer_bits; 157 mac_perim_handle_t mph; 158 callb_cpr_t cprinfo; 159 160 CALLB_CPR_INIT(&cprinfo, &pl->lacp_timer_lock, callb_generic_cpr, 161 "aggr_port_timer_thread"); 162 163 mutex_enter(&pl->lacp_timer_lock); 164 165 for (;;) { 166 167 if ((lacp_timer_bits = pl->lacp_timer_bits) == 0) { 168 CALLB_CPR_SAFE_BEGIN(&cprinfo); 169 cv_wait(&pl->lacp_timer_cv, &pl->lacp_timer_lock); 170 CALLB_CPR_SAFE_END(&cprinfo, &pl->lacp_timer_lock); 171 continue; 172 } 173 pl->lacp_timer_bits = 0; 174 175 if (lacp_timer_bits & LACP_THREAD_EXIT) 176 break; 177 178 if (lacp_timer_bits & LACP_PERIODIC_TIMEOUT) 179 pl->periodic_timer.id = 0; 180 if (lacp_timer_bits & LACP_WAIT_WHILE_TIMEOUT) 181 pl->wait_while_timer.id = 0; 182 if (lacp_timer_bits & LACP_CURRENT_WHILE_TIMEOUT) 183 pl->current_while_timer.id = 0; 184 185 mutex_exit(&pl->lacp_timer_lock); 186 187 mac_perim_enter_by_mh(grp->lg_mh, &mph); 188 if (port->lp_closing) { 189 mac_perim_exit(mph); 190 mutex_enter(&pl->lacp_timer_lock); 191 break; 192 } 193 194 if (lacp_timer_bits & LACP_PERIODIC_TIMEOUT) 195 periodic_timer_pop_handler(port); 196 if (lacp_timer_bits & LACP_WAIT_WHILE_TIMEOUT) 197 wait_while_timer_pop_handler(port); 198 if (lacp_timer_bits & LACP_CURRENT_WHILE_TIMEOUT) 199 current_while_timer_pop_handler(port); 200 mac_perim_exit(mph); 201 202 mutex_enter(&pl->lacp_timer_lock); 203 if (pl->lacp_timer_bits & LACP_THREAD_EXIT) 204 break; 205 } 206 207 pl->lacp_timer_bits = 0; 208 pl->lacp_timer_thread = NULL; 209 cv_broadcast(&pl->lacp_timer_cv); 210 211 /* CALLB_CPR_EXIT drops the lock */ 212 CALLB_CPR_EXIT(&cprinfo); 213 214 /* 215 * Release the reference of the grp so aggr_grp_delete() can call 216 * mac_unregister() safely. 217 */ 218 aggr_grp_port_rele(port); 219 thread_exit(); 220 } 221 222 /* 223 * Set the port LACP state to SELECTED. Returns B_FALSE if the operation 224 * could not be performed due to a memory allocation error, B_TRUE otherwise. 225 */ 226 static boolean_t 227 lacp_port_select(aggr_port_t *portp) 228 { 229 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 230 231 if (!lacp_sel_ports_add(portp)) 232 return (B_FALSE); 233 portp->lp_lacp.sm.selected = AGGR_SELECTED; 234 return (B_TRUE); 235 } 236 237 /* 238 * Set the port LACP state to UNSELECTED. 239 */ 240 static void 241 lacp_port_unselect(aggr_port_t *portp) 242 { 243 aggr_grp_t *grp = portp->lp_grp; 244 245 ASSERT((grp->lg_mh == NULL) || MAC_PERIM_HELD(grp->lg_mh)); 246 247 lacp_sel_ports_del(portp); 248 portp->lp_lacp.sm.selected = AGGR_UNSELECTED; 249 } 250 251 /* 252 * Initialize group specific LACP state and parameters. 253 */ 254 void 255 aggr_lacp_init_grp(aggr_grp_t *aggrp) 256 { 257 aggrp->aggr.PeriodicTimer = AGGR_LACP_TIMER_SHORT; 258 aggrp->aggr.ActorSystemPriority = (uint16_t)lacp_system_priority; 259 aggrp->aggr.CollectorMaxDelay = 10; 260 aggrp->lg_lacp_mode = AGGR_LACP_OFF; 261 aggrp->aggr.ready = B_FALSE; 262 } 263 264 /* 265 * Complete LACP info initialization at port creation time. 266 */ 267 void 268 aggr_lacp_init_port(aggr_port_t *portp) 269 { 270 aggr_grp_t *aggrp = portp->lp_grp; 271 aggr_lacp_port_t *pl = &portp->lp_lacp; 272 273 ASSERT(aggrp->lg_mh == NULL || MAC_PERIM_HELD(aggrp->lg_mh)); 274 ASSERT(MAC_PERIM_HELD(portp->lp_mh)); 275 276 /* actor port # */ 277 pl->ActorPortNumber = portp->lp_portid; 278 AGGR_LACP_DBG(("aggr_lacp_init_port(%d): " 279 "ActorPortNumber = 0x%x\n", portp->lp_linkid, 280 pl->ActorPortNumber)); 281 282 pl->ActorPortPriority = (uint16_t)lacp_port_priority; 283 pl->ActorPortAggrId = 0; /* aggregator id - not used */ 284 pl->NTT = B_FALSE; /* need to transmit */ 285 286 pl->ActorAdminPortKey = aggrp->lg_key; 287 pl->ActorOperPortKey = pl->ActorAdminPortKey; 288 AGGR_LACP_DBG(("aggr_lacp_init_port(%d) " 289 "ActorAdminPortKey = 0x%x, ActorAdminPortKey = 0x%x\n", 290 portp->lp_linkid, pl->ActorAdminPortKey, pl->ActorOperPortKey)); 291 292 /* Actor admin. port state */ 293 pl->ActorAdminPortState.bit.activity = B_FALSE; 294 pl->ActorAdminPortState.bit.timeout = B_TRUE; 295 pl->ActorAdminPortState.bit.aggregation = B_TRUE; 296 pl->ActorAdminPortState.bit.sync = B_FALSE; 297 pl->ActorAdminPortState.bit.collecting = B_FALSE; 298 pl->ActorAdminPortState.bit.distributing = B_FALSE; 299 pl->ActorAdminPortState.bit.defaulted = B_FALSE; 300 pl->ActorAdminPortState.bit.expired = B_FALSE; 301 pl->ActorOperPortState = pl->ActorAdminPortState; 302 303 /* 304 * Partner Administrative Information 305 * (All initialized to zero except for the following) 306 * Fast Timeouts. 307 */ 308 pl->PartnerAdminPortState.bit.timeout = 309 pl->PartnerOperPortState.bit.timeout = B_TRUE; 310 311 pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */ 312 313 /* 314 * State machine information. 315 */ 316 pl->sm.lacp_on = B_FALSE; /* LACP Off default */ 317 pl->sm.begin = B_TRUE; /* Prevents transmissions */ 318 pl->sm.lacp_enabled = B_FALSE; 319 pl->sm.port_enabled = B_FALSE; /* Link Down */ 320 pl->sm.actor_churn = B_FALSE; 321 pl->sm.partner_churn = B_FALSE; 322 pl->sm.ready_n = B_FALSE; 323 pl->sm.port_moved = B_FALSE; 324 325 lacp_port_unselect(portp); 326 327 pl->sm.periodic_state = LACP_NO_PERIODIC; 328 pl->sm.receive_state = LACP_INITIALIZE; 329 pl->sm.mux_state = LACP_DETACHED; 330 pl->sm.churn_state = LACP_NO_ACTOR_CHURN; 331 332 /* 333 * Timer information. 334 */ 335 pl->current_while_timer.id = 0; 336 pl->current_while_timer.val = SHORT_TIMEOUT_TIME; 337 338 pl->periodic_timer.id = 0; 339 pl->periodic_timer.val = FAST_PERIODIC_TIME; 340 341 pl->wait_while_timer.id = 0; 342 pl->wait_while_timer.val = AGGREGATE_WAIT_TIME; 343 344 pl->lacp_timer_bits = 0; 345 346 mutex_init(&pl->lacp_timer_lock, NULL, MUTEX_DRIVER, NULL); 347 cv_init(&pl->lacp_timer_cv, NULL, CV_DRIVER, NULL); 348 349 pl->lacp_timer_thread = thread_create(NULL, 0, aggr_port_timer_thread, 350 portp, 0, &p0, TS_RUN, minclsyspri); 351 352 /* 353 * Hold a reference of the grp and the port and this reference will 354 * be release when the thread exits. 355 * 356 * The reference on the port is used for aggr_port_delete() to 357 * continue without waiting for the thread to exit; the reference 358 * on the grp is used for aggr_grp_delete() to wait for the thread 359 * to exit before calling mac_unregister(). 360 */ 361 aggr_grp_port_hold(portp); 362 } 363 364 /* 365 * Port initialization when we need to 366 * turn LACP on/off, etc. Not everything is 367 * reset like in the above routine. 368 * Do NOT modify things like link status. 369 */ 370 static void 371 lacp_reset_port(aggr_port_t *portp) 372 { 373 aggr_lacp_port_t *pl = &portp->lp_lacp; 374 375 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 376 377 pl->NTT = B_FALSE; /* need to transmit */ 378 379 /* reset operational port state */ 380 pl->ActorOperPortState.bit.timeout = 381 pl->ActorAdminPortState.bit.timeout; 382 383 pl->ActorOperPortState.bit.sync = B_FALSE; 384 pl->ActorOperPortState.bit.collecting = B_FALSE; 385 pl->ActorOperPortState.bit.distributing = B_FALSE; 386 pl->ActorOperPortState.bit.defaulted = B_TRUE; 387 pl->ActorOperPortState.bit.expired = B_FALSE; 388 389 pl->PartnerOperPortState.bit.timeout = B_TRUE; /* fast t/o */ 390 pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */ 391 392 /* 393 * State machine information. 394 */ 395 pl->sm.begin = B_TRUE; /* Prevents transmissions */ 396 pl->sm.actor_churn = B_FALSE; 397 pl->sm.partner_churn = B_FALSE; 398 pl->sm.ready_n = B_FALSE; 399 400 lacp_port_unselect(portp); 401 402 pl->sm.periodic_state = LACP_NO_PERIODIC; 403 pl->sm.receive_state = LACP_INITIALIZE; 404 pl->sm.mux_state = LACP_DETACHED; 405 pl->sm.churn_state = LACP_NO_ACTOR_CHURN; 406 407 /* 408 * Timer information. 409 */ 410 pl->current_while_timer.val = SHORT_TIMEOUT_TIME; 411 pl->periodic_timer.val = FAST_PERIODIC_TIME; 412 } 413 414 static void 415 aggr_lacp_mcast_on(aggr_port_t *port) 416 { 417 ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh)); 418 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 419 420 if (port->lp_state != AGGR_PORT_STATE_ATTACHED) 421 return; 422 423 (void) aggr_port_multicst(port, B_TRUE, 424 (uchar_t *)&slow_multicast_addr); 425 } 426 427 static void 428 aggr_lacp_mcast_off(aggr_port_t *port) 429 { 430 ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh)); 431 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 432 433 if (port->lp_state != AGGR_PORT_STATE_ATTACHED) 434 return; 435 436 (void) aggr_port_multicst(port, B_FALSE, 437 (uchar_t *)&slow_multicast_addr); 438 } 439 440 static void 441 start_periodic_timer(aggr_port_t *portp) 442 { 443 aggr_lacp_port_t *pl = &portp->lp_lacp; 444 445 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 446 447 mutex_enter(&pl->lacp_timer_lock); 448 if (pl->periodic_timer.id == 0) { 449 pl->periodic_timer.id = timeout(periodic_timer_pop, portp, 450 drv_usectohz(1000000 * portp->lp_lacp.periodic_timer.val)); 451 } 452 mutex_exit(&pl->lacp_timer_lock); 453 } 454 455 static void 456 stop_periodic_timer(aggr_port_t *portp) 457 { 458 aggr_lacp_port_t *pl = &portp->lp_lacp; 459 timeout_id_t id; 460 461 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 462 463 mutex_enter(&pl->lacp_timer_lock); 464 if ((id = pl->periodic_timer.id) != 0) { 465 pl->lacp_timer_bits &= ~LACP_PERIODIC_TIMEOUT; 466 pl->periodic_timer.id = 0; 467 } 468 mutex_exit(&pl->lacp_timer_lock); 469 470 if (id != 0) 471 (void) untimeout(id); 472 } 473 474 /* 475 * When the timer pops, we arrive here to 476 * clear out LACPDU count as well as transmit an 477 * LACPDU. We then set the periodic state and let 478 * the periodic state machine restart the timer. 479 */ 480 static void 481 periodic_timer_pop(void *data) 482 { 483 aggr_port_t *portp = data; 484 aggr_lacp_port_t *pl = &portp->lp_lacp; 485 486 mutex_enter(&pl->lacp_timer_lock); 487 pl->lacp_timer_bits |= LACP_PERIODIC_TIMEOUT; 488 cv_broadcast(&pl->lacp_timer_cv); 489 mutex_exit(&pl->lacp_timer_lock); 490 } 491 492 /* 493 * When the timer pops, we arrive here to 494 * clear out LACPDU count as well as transmit an 495 * LACPDU. We then set the periodic state and let 496 * the periodic state machine restart the timer. 497 */ 498 static void 499 periodic_timer_pop_handler(aggr_port_t *portp) 500 { 501 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 502 503 portp->lp_lacp_stats.LACPDUsTx = 0; 504 505 /* current timestamp */ 506 portp->lp_lacp.time = gethrtime(); 507 portp->lp_lacp.NTT = B_TRUE; 508 lacp_xmit_sm(portp); 509 510 /* 511 * Set Periodic State machine state based on the 512 * value of the Partner Operation Port State timeout 513 * bit. 514 */ 515 if (portp->lp_lacp.PartnerOperPortState.bit.timeout) { 516 portp->lp_lacp.periodic_timer.val = FAST_PERIODIC_TIME; 517 portp->lp_lacp.sm.periodic_state = LACP_FAST_PERIODIC; 518 } else { 519 portp->lp_lacp.periodic_timer.val = SLOW_PERIODIC_TIME; 520 portp->lp_lacp.sm.periodic_state = LACP_SLOW_PERIODIC; 521 } 522 523 lacp_periodic_sm(portp); 524 } 525 526 /* 527 * Invoked from: 528 * - startup upon aggregation 529 * - when the periodic timer pops 530 * - when the periodic timer value is changed 531 * - when the port is attached or detached 532 * - when LACP mode is changed. 533 */ 534 static void 535 lacp_periodic_sm(aggr_port_t *portp) 536 { 537 lacp_periodic_state_t oldstate = portp->lp_lacp.sm.periodic_state; 538 aggr_lacp_port_t *pl = &portp->lp_lacp; 539 540 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 541 542 /* LACP_OFF state not in specification so check here. */ 543 if (!pl->sm.lacp_on) { 544 /* Stop timer whether it is running or not */ 545 stop_periodic_timer(portp); 546 pl->sm.periodic_state = LACP_NO_PERIODIC; 547 pl->NTT = B_FALSE; 548 AGGR_LACP_DBG(("lacp_periodic_sm(%d):NO LACP " 549 "%s--->%s\n", portp->lp_linkid, 550 lacp_periodic_str[oldstate], 551 lacp_periodic_str[pl->sm.periodic_state])); 552 return; 553 } 554 555 if (pl->sm.begin || !pl->sm.lacp_enabled || 556 !pl->sm.port_enabled || 557 !pl->ActorOperPortState.bit.activity && 558 !pl->PartnerOperPortState.bit.activity) { 559 560 /* Stop timer whether it is running or not */ 561 stop_periodic_timer(portp); 562 pl->sm.periodic_state = LACP_NO_PERIODIC; 563 pl->NTT = B_FALSE; 564 AGGR_LACP_DBG(("lacp_periodic_sm(%d):STOP %s--->%s\n", 565 portp->lp_linkid, lacp_periodic_str[oldstate], 566 lacp_periodic_str[pl->sm.periodic_state])); 567 return; 568 } 569 570 /* 571 * Startup with FAST_PERIODIC_TIME if no previous LACPDU 572 * has been received. Then after we timeout, then it is 573 * possible to go to SLOW_PERIODIC_TIME. 574 */ 575 if (pl->sm.periodic_state == LACP_NO_PERIODIC) { 576 pl->periodic_timer.val = FAST_PERIODIC_TIME; 577 pl->sm.periodic_state = LACP_FAST_PERIODIC; 578 } else if ((pl->sm.periodic_state == LACP_SLOW_PERIODIC) && 579 pl->PartnerOperPortState.bit.timeout) { 580 /* 581 * If we receive a bit indicating we are going to 582 * fast periodic from slow periodic, stop the timer 583 * and let the periodic_timer_pop routine deal 584 * with reseting the periodic state and transmitting 585 * a LACPDU. 586 */ 587 stop_periodic_timer(portp); 588 periodic_timer_pop_handler(portp); 589 } 590 591 /* Rearm timer with value provided by partner */ 592 start_periodic_timer(portp); 593 } 594 595 /* 596 * This routine transmits an LACPDU if lacp_enabled 597 * is TRUE and if NTT is set. 598 */ 599 static void 600 lacp_xmit_sm(aggr_port_t *portp) 601 { 602 aggr_lacp_port_t *pl = &portp->lp_lacp; 603 size_t len; 604 mblk_t *mp; 605 hrtime_t now, elapsed; 606 607 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 608 609 /* LACP_OFF state not in specification so check here. */ 610 if (!pl->sm.lacp_on || !pl->NTT || !portp->lp_started) 611 return; 612 613 /* 614 * Do nothing if LACP has been turned off or if the 615 * periodic state machine is not enabled. 616 */ 617 if ((pl->sm.periodic_state == LACP_NO_PERIODIC) || 618 !pl->sm.lacp_enabled || pl->sm.begin) { 619 pl->NTT = B_FALSE; 620 return; 621 } 622 623 /* 624 * If we have sent 5 Slow packets in the last second, avoid 625 * sending any more here. No more than three LACPDUs may be transmitted 626 * in any Fast_Periodic_Time interval. 627 */ 628 if (portp->lp_lacp_stats.LACPDUsTx >= 3) { 629 /* 630 * Grab the current time value and see if 631 * more than 1 second has passed. If so, 632 * reset the timestamp and clear the count. 633 */ 634 now = gethrtime(); 635 elapsed = now - pl->time; 636 if (elapsed > NSECS_PER_SEC) { 637 portp->lp_lacp_stats.LACPDUsTx = 0; 638 pl->time = now; 639 } else { 640 return; 641 } 642 } 643 644 len = sizeof (lacp_t) + sizeof (struct ether_header); 645 mp = allocb(len, BPRI_MED); 646 if (mp == NULL) 647 return; 648 649 mp->b_wptr = mp->b_rptr + len; 650 bzero(mp->b_rptr, len); 651 652 fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr); 653 fill_lacp_pdu(portp, 654 (lacp_t *)(mp->b_rptr + sizeof (struct ether_header))); 655 656 /* Send the packet over the first TX ring */ 657 mp = mac_hwring_send_priv(portp->lp_mch, portp->lp_tx_rings[0], mp); 658 if (mp != NULL) 659 freemsg(mp); 660 661 pl->NTT = B_FALSE; 662 portp->lp_lacp_stats.LACPDUsTx++; 663 } 664 665 /* 666 * Initialize the ethernet header of a LACP packet sent from the specified 667 * port. 668 */ 669 static void 670 fill_lacp_ether(aggr_port_t *port, struct ether_header *ether) 671 { 672 bcopy(port->lp_addr, (uint8_t *)&(ether->ether_shost), ETHERADDRL); 673 bcopy(&slow_multicast_addr, (uint8_t *)&(ether->ether_dhost), 674 ETHERADDRL); 675 ether->ether_type = htons(ETHERTYPE_SLOW); 676 } 677 678 static void 679 fill_lacp_pdu(aggr_port_t *portp, lacp_t *lacp) 680 { 681 aggr_lacp_port_t *pl = &portp->lp_lacp; 682 aggr_grp_t *aggrp = portp->lp_grp; 683 mac_perim_handle_t pmph; 684 685 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh)); 686 mac_perim_enter_by_mh(portp->lp_mh, &pmph); 687 688 lacp->subtype = LACP_SUBTYPE; 689 lacp->version = LACP_VERSION; 690 691 /* 692 * Actor Information 693 */ 694 lacp->actor_info.tlv_type = ACTOR_TLV; 695 lacp->actor_info.information_len = sizeof (link_info_t); 696 lacp->actor_info.system_priority = 697 htons(aggrp->aggr.ActorSystemPriority); 698 bcopy(aggrp->lg_addr, (uchar_t *)&lacp->actor_info.system_id, 699 ETHERADDRL); 700 lacp->actor_info.key = htons(pl->ActorOperPortKey); 701 lacp->actor_info.port_priority = htons(pl->ActorPortPriority); 702 lacp->actor_info.port = htons(pl->ActorPortNumber); 703 lacp->actor_info.state.state = pl->ActorOperPortState.state; 704 705 /* 706 * Partner Information 707 */ 708 lacp->partner_info.tlv_type = PARTNER_TLV; 709 lacp->partner_info.information_len = sizeof (link_info_t); 710 lacp->partner_info.system_priority = 711 htons(pl->PartnerOperSysPriority); 712 lacp->partner_info.system_id = pl->PartnerOperSystem; 713 lacp->partner_info.key = htons(pl->PartnerOperKey); 714 lacp->partner_info.port_priority = 715 htons(pl->PartnerOperPortPriority); 716 lacp->partner_info.port = htons(pl->PartnerOperPortNum); 717 lacp->partner_info.state.state = pl->PartnerOperPortState.state; 718 719 /* Collector Information */ 720 lacp->tlv_collector = COLLECTOR_TLV; 721 lacp->collector_len = 0x10; 722 lacp->collector_max_delay = htons(aggrp->aggr.CollectorMaxDelay); 723 724 /* Termination Information */ 725 lacp->tlv_terminator = TERMINATOR_TLV; 726 lacp->terminator_len = 0x0; 727 728 mac_perim_exit(pmph); 729 } 730 731 /* 732 * lacp_mux_sm - LACP mux state machine 733 * This state machine is invoked from: 734 * - startup upon aggregation 735 * - from the Selection logic 736 * - when the wait_while_timer pops 737 * - when the aggregation MAC address is changed 738 * - when receiving DL_NOTE_LINK_UP/DOWN 739 * - when receiving DL_NOTE_AGGR_AVAIL/UNAVAIL 740 * - when LACP mode is changed. 741 * - when a DL_NOTE_SPEED is received 742 */ 743 static void 744 lacp_mux_sm(aggr_port_t *portp) 745 { 746 aggr_grp_t *aggrp = portp->lp_grp; 747 boolean_t NTT_updated = B_FALSE; 748 aggr_lacp_port_t *pl = &portp->lp_lacp; 749 lacp_mux_state_t oldstate = pl->sm.mux_state; 750 751 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh)); 752 753 /* LACP_OFF state not in specification so check here. */ 754 if (!pl->sm.lacp_on) { 755 pl->sm.mux_state = LACP_DETACHED; 756 pl->ActorOperPortState.bit.sync = B_FALSE; 757 758 if (pl->ActorOperPortState.bit.collecting || 759 pl->ActorOperPortState.bit.distributing) { 760 AGGR_LACP_DBG(("trunk link: (%d): " 761 "Collector_Distributor Disabled.\n", 762 portp->lp_linkid)); 763 } 764 765 pl->ActorOperPortState.bit.collecting = 766 pl->ActorOperPortState.bit.distributing = B_FALSE; 767 return; 768 } 769 770 if (pl->sm.begin || !pl->sm.lacp_enabled) 771 pl->sm.mux_state = LACP_DETACHED; 772 773 again: 774 /* determine next state, or return if state unchanged */ 775 switch (pl->sm.mux_state) { 776 case LACP_DETACHED: 777 if (pl->sm.begin) { 778 break; 779 } 780 781 if ((pl->sm.selected == AGGR_SELECTED) || 782 (pl->sm.selected == AGGR_STANDBY)) { 783 pl->sm.mux_state = LACP_WAITING; 784 break; 785 } 786 return; 787 788 case LACP_WAITING: 789 if (pl->sm.selected == AGGR_UNSELECTED) { 790 pl->sm.mux_state = LACP_DETACHED; 791 break; 792 } 793 794 if ((pl->sm.selected == AGGR_SELECTED) && aggrp->aggr.ready) { 795 pl->sm.mux_state = LACP_ATTACHED; 796 break; 797 } 798 return; 799 800 case LACP_ATTACHED: 801 if ((pl->sm.selected == AGGR_UNSELECTED) || 802 (pl->sm.selected == AGGR_STANDBY)) { 803 pl->sm.mux_state = LACP_DETACHED; 804 break; 805 } 806 807 if ((pl->sm.selected == AGGR_SELECTED) && 808 pl->PartnerOperPortState.bit.sync) { 809 pl->sm.mux_state = LACP_COLLECTING_DISTRIBUTING; 810 break; 811 } 812 return; 813 814 case LACP_COLLECTING_DISTRIBUTING: 815 if ((pl->sm.selected == AGGR_UNSELECTED) || 816 (pl->sm.selected == AGGR_STANDBY) || 817 !pl->PartnerOperPortState.bit.sync) { 818 pl->sm.mux_state = LACP_ATTACHED; 819 break; 820 } 821 return; 822 } 823 824 AGGR_LACP_DBG(("lacp_mux_sm(%d):%s--->%s\n", 825 portp->lp_linkid, lacp_mux_str[oldstate], 826 lacp_mux_str[pl->sm.mux_state])); 827 828 /* perform actions on entering a new state */ 829 switch (pl->sm.mux_state) { 830 case LACP_DETACHED: 831 if (pl->ActorOperPortState.bit.collecting || 832 pl->ActorOperPortState.bit.distributing) { 833 AGGR_LACP_DBG(("trunk link: (%d): " 834 "Collector_Distributor Disabled.\n", 835 portp->lp_linkid)); 836 } 837 838 pl->ActorOperPortState.bit.sync = 839 pl->ActorOperPortState.bit.collecting = B_FALSE; 840 841 /* Turn OFF Collector_Distributor */ 842 aggr_set_coll_dist(portp, B_FALSE); 843 844 pl->ActorOperPortState.bit.distributing = B_FALSE; 845 NTT_updated = B_TRUE; 846 break; 847 848 case LACP_WAITING: 849 start_wait_while_timer(portp); 850 break; 851 852 case LACP_ATTACHED: 853 if (pl->ActorOperPortState.bit.collecting || 854 pl->ActorOperPortState.bit.distributing) { 855 AGGR_LACP_DBG(("trunk link: (%d): " 856 "Collector_Distributor Disabled.\n", 857 portp->lp_linkid)); 858 } 859 860 pl->ActorOperPortState.bit.sync = B_TRUE; 861 pl->ActorOperPortState.bit.collecting = B_FALSE; 862 863 /* Turn OFF Collector_Distributor */ 864 aggr_set_coll_dist(portp, B_FALSE); 865 866 pl->ActorOperPortState.bit.distributing = B_FALSE; 867 NTT_updated = B_TRUE; 868 if (pl->PartnerOperPortState.bit.sync) { 869 /* 870 * We had already received an updated sync from 871 * the partner. Attempt to transition to 872 * collecting/distributing now. 873 */ 874 goto again; 875 } 876 break; 877 878 case LACP_COLLECTING_DISTRIBUTING: 879 if (!pl->ActorOperPortState.bit.collecting && 880 !pl->ActorOperPortState.bit.distributing) { 881 AGGR_LACP_DBG(("trunk link: (%d): " 882 "Collector_Distributor Enabled.\n", 883 portp->lp_linkid)); 884 } 885 pl->ActorOperPortState.bit.distributing = B_TRUE; 886 887 /* Turn Collector_Distributor back ON */ 888 aggr_set_coll_dist(portp, B_TRUE); 889 890 pl->ActorOperPortState.bit.collecting = B_TRUE; 891 NTT_updated = B_TRUE; 892 break; 893 } 894 895 /* 896 * If we updated the state of the NTT variable, then 897 * initiate a LACPDU transmission. 898 */ 899 if (NTT_updated) { 900 pl->NTT = B_TRUE; 901 lacp_xmit_sm(portp); 902 } 903 } /* lacp_mux_sm */ 904 905 906 static int 907 receive_marker_pdu(aggr_port_t *portp, mblk_t *mp) 908 { 909 marker_pdu_t *markerp = (marker_pdu_t *)mp->b_rptr; 910 911 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 912 913 AGGR_LACP_DBG(("trunk link: (%d): MARKER PDU received:\n", 914 portp->lp_linkid)); 915 916 /* LACP_OFF state not in specification so check here. */ 917 if (!portp->lp_lacp.sm.lacp_on) 918 return (-1); 919 920 if (MBLKL(mp) < sizeof (marker_pdu_t)) 921 return (-1); 922 923 if (markerp->version != MARKER_VERSION) { 924 AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: " 925 "version = %d does not match s/w version %d\n", 926 portp->lp_linkid, markerp->version, MARKER_VERSION)); 927 return (-1); 928 } 929 930 if (markerp->tlv_marker == MARKER_RESPONSE_TLV) { 931 /* We do not yet send out MARKER info PDUs */ 932 AGGR_LACP_DBG(("trunk link (%d): MARKER RESPONSE PDU: " 933 " MARKER TLV = %d - We don't send out info type!\n", 934 portp->lp_linkid, markerp->tlv_marker)); 935 return (-1); 936 } 937 938 if (markerp->tlv_marker != MARKER_INFO_TLV) { 939 AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: " 940 " MARKER TLV = %d \n", portp->lp_linkid, 941 markerp->tlv_marker)); 942 return (-1); 943 } 944 945 if (markerp->marker_len != MARKER_INFO_RESPONSE_LENGTH) { 946 AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: " 947 " MARKER length = %d \n", portp->lp_linkid, 948 markerp->marker_len)); 949 return (-1); 950 } 951 952 if (markerp->requestor_port != portp->lp_lacp.PartnerOperPortNum) { 953 AGGR_LACP_DBG(("trunk link (%d): MARKER PDU: " 954 " MARKER Port %d not equal to Partner port %d\n", 955 portp->lp_linkid, markerp->requestor_port, 956 portp->lp_lacp.PartnerOperPortNum)); 957 return (-1); 958 } 959 960 if (ether_cmp(&markerp->system_id, 961 &portp->lp_lacp.PartnerOperSystem) != 0) { 962 AGGR_LACP_DBG(("trunk link (%d): MARKER PDU: " 963 " MARKER MAC not equal to Partner MAC\n", 964 portp->lp_linkid)); 965 return (-1); 966 } 967 968 /* 969 * Turn into Marker Response PDU 970 * and return mblk to sending system 971 */ 972 markerp->tlv_marker = MARKER_RESPONSE_TLV; 973 974 /* reuse the space that was used by received ethernet header */ 975 ASSERT(MBLKHEAD(mp) >= sizeof (struct ether_header)); 976 mp->b_rptr -= sizeof (struct ether_header); 977 fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr); 978 return (0); 979 } 980 981 /* 982 * Update the LACP mode (off, active, or passive) of the specified group. 983 */ 984 void 985 aggr_lacp_update_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode) 986 { 987 aggr_lacp_mode_t old_mode = grp->lg_lacp_mode; 988 aggr_port_t *port; 989 990 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 991 ASSERT(!grp->lg_closing); 992 993 if (mode == old_mode) 994 return; 995 996 grp->lg_lacp_mode = mode; 997 998 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 999 port->lp_lacp.ActorAdminPortState.bit.activity = 1000 port->lp_lacp.ActorOperPortState.bit.activity = 1001 (mode == AGGR_LACP_ACTIVE); 1002 1003 if (old_mode == AGGR_LACP_OFF) { 1004 /* OFF -> {PASSIVE,ACTIVE} */ 1005 /* turn OFF Collector_Distributor */ 1006 aggr_set_coll_dist(port, B_FALSE); 1007 lacp_on(port); 1008 } else if (mode == AGGR_LACP_OFF) { 1009 /* {PASSIVE,ACTIVE} -> OFF */ 1010 lacp_off(port); 1011 /* Turn ON Collector_Distributor */ 1012 aggr_set_coll_dist(port, B_TRUE); 1013 } else { 1014 /* PASSIVE->ACTIVE or ACTIVE->PASSIVE */ 1015 port->lp_lacp.sm.begin = B_TRUE; 1016 lacp_mux_sm(port); 1017 lacp_periodic_sm(port); 1018 1019 /* kick off state machines */ 1020 lacp_receive_sm(port, NULL); 1021 lacp_mux_sm(port); 1022 } 1023 } 1024 } 1025 1026 1027 /* 1028 * Update the LACP timer (short or long) of the specified group. 1029 */ 1030 void 1031 aggr_lacp_update_timer(aggr_grp_t *grp, aggr_lacp_timer_t timer) 1032 { 1033 aggr_port_t *port; 1034 1035 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 1036 1037 if (timer == grp->aggr.PeriodicTimer) 1038 return; 1039 1040 grp->aggr.PeriodicTimer = timer; 1041 1042 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1043 port->lp_lacp.ActorAdminPortState.bit.timeout = 1044 port->lp_lacp.ActorOperPortState.bit.timeout = 1045 (timer == AGGR_LACP_TIMER_SHORT); 1046 } 1047 } 1048 1049 void 1050 aggr_port_lacp_set_mode(aggr_grp_t *grp, aggr_port_t *port) 1051 { 1052 aggr_lacp_mode_t mode; 1053 aggr_lacp_timer_t timer; 1054 1055 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 1056 1057 mode = grp->lg_lacp_mode; 1058 timer = grp->aggr.PeriodicTimer; 1059 1060 port->lp_lacp.ActorAdminPortState.bit.activity = 1061 port->lp_lacp.ActorOperPortState.bit.activity = 1062 (mode == AGGR_LACP_ACTIVE); 1063 1064 port->lp_lacp.ActorAdminPortState.bit.timeout = 1065 port->lp_lacp.ActorOperPortState.bit.timeout = 1066 (timer == AGGR_LACP_TIMER_SHORT); 1067 1068 if (mode == AGGR_LACP_OFF) { 1069 /* Turn ON Collector_Distributor */ 1070 aggr_set_coll_dist(port, B_TRUE); 1071 } else { /* LACP_ACTIVE/PASSIVE */ 1072 lacp_on(port); 1073 } 1074 } 1075 1076 /* 1077 * Sets the initial LACP mode (off, active, passive) and LACP timer 1078 * (short, long) of the specified group. 1079 */ 1080 void 1081 aggr_lacp_set_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode, 1082 aggr_lacp_timer_t timer) 1083 { 1084 aggr_port_t *port; 1085 1086 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 1087 1088 grp->lg_lacp_mode = mode; 1089 grp->aggr.PeriodicTimer = timer; 1090 1091 for (port = grp->lg_ports; port != NULL; port = port->lp_next) 1092 aggr_port_lacp_set_mode(grp, port); 1093 } 1094 1095 /* 1096 * Verify that the Partner MAC and Key recorded by the specified 1097 * port are not found in other ports that are not part of our 1098 * aggregation. Returns B_TRUE if such a port is found, B_FALSE 1099 * otherwise. 1100 */ 1101 static boolean_t 1102 lacp_misconfig_check(aggr_port_t *portp) 1103 { 1104 aggr_grp_t *grp = portp->lp_grp; 1105 lacp_sel_ports_t *cport; 1106 1107 mutex_enter(&lacp_sel_lock); 1108 1109 for (cport = sel_ports; cport != NULL; cport = cport->sp_next) { 1110 1111 /* skip entries of the group of the port being checked */ 1112 if (cport->sp_grp_linkid == grp->lg_linkid) 1113 continue; 1114 1115 if ((ether_cmp(&cport->sp_partner_system, 1116 &grp->aggr.PartnerSystem) == 0) && 1117 (cport->sp_partner_key == grp->aggr.PartnerOperAggrKey)) { 1118 char mac_str[ETHERADDRL*3]; 1119 struct ether_addr *mac = &cport->sp_partner_system; 1120 1121 /* 1122 * The Partner port information is already in use 1123 * by ports in another aggregation so disable this 1124 * port. 1125 */ 1126 1127 (void) snprintf(mac_str, sizeof (mac_str), 1128 "%x:%x:%x:%x:%x:%x", 1129 mac->ether_addr_octet[0], mac->ether_addr_octet[1], 1130 mac->ether_addr_octet[2], mac->ether_addr_octet[3], 1131 mac->ether_addr_octet[4], mac->ether_addr_octet[5]); 1132 1133 portp->lp_lacp.sm.selected = AGGR_UNSELECTED; 1134 1135 cmn_err(CE_NOTE, "aggr %d port %d: Port Partner " 1136 "MAC %s and key %d in use on aggregation %d " 1137 "port %d\n", grp->lg_linkid, portp->lp_linkid, 1138 mac_str, portp->lp_lacp.PartnerOperKey, 1139 cport->sp_grp_linkid, cport->sp_linkid); 1140 break; 1141 } 1142 } 1143 1144 mutex_exit(&lacp_sel_lock); 1145 return (cport != NULL); 1146 } 1147 1148 /* 1149 * Remove the specified port from the list of selected ports. 1150 */ 1151 static void 1152 lacp_sel_ports_del(aggr_port_t *portp) 1153 { 1154 lacp_sel_ports_t *cport, **prev = NULL; 1155 1156 mutex_enter(&lacp_sel_lock); 1157 1158 prev = &sel_ports; 1159 for (cport = sel_ports; cport != NULL; prev = &cport->sp_next, 1160 cport = cport->sp_next) { 1161 if (portp->lp_linkid == cport->sp_linkid) 1162 break; 1163 } 1164 1165 if (cport == NULL) { 1166 mutex_exit(&lacp_sel_lock); 1167 return; 1168 } 1169 1170 *prev = cport->sp_next; 1171 kmem_free(cport, sizeof (*cport)); 1172 1173 mutex_exit(&lacp_sel_lock); 1174 } 1175 1176 /* 1177 * Add the specified port to the list of selected ports. Returns B_FALSE 1178 * if the operation could not be performed due to an memory allocation 1179 * error. 1180 */ 1181 static boolean_t 1182 lacp_sel_ports_add(aggr_port_t *portp) 1183 { 1184 lacp_sel_ports_t *new_port; 1185 lacp_sel_ports_t *cport, **last; 1186 1187 mutex_enter(&lacp_sel_lock); 1188 1189 /* check if port is already in the list */ 1190 last = &sel_ports; 1191 for (cport = sel_ports; cport != NULL; 1192 last = &cport->sp_next, cport = cport->sp_next) { 1193 if (portp->lp_linkid == cport->sp_linkid) { 1194 ASSERT(cport->sp_partner_key == 1195 portp->lp_lacp.PartnerOperKey); 1196 ASSERT(ether_cmp(&cport->sp_partner_system, 1197 &portp->lp_lacp.PartnerOperSystem) == 0); 1198 1199 mutex_exit(&lacp_sel_lock); 1200 return (B_TRUE); 1201 } 1202 } 1203 1204 /* create and initialize new entry */ 1205 new_port = kmem_zalloc(sizeof (lacp_sel_ports_t), KM_NOSLEEP); 1206 if (new_port == NULL) { 1207 mutex_exit(&lacp_sel_lock); 1208 return (B_FALSE); 1209 } 1210 1211 new_port->sp_grp_linkid = portp->lp_grp->lg_linkid; 1212 bcopy(&portp->lp_lacp.PartnerOperSystem, 1213 &new_port->sp_partner_system, sizeof (new_port->sp_partner_system)); 1214 new_port->sp_partner_key = portp->lp_lacp.PartnerOperKey; 1215 new_port->sp_linkid = portp->lp_linkid; 1216 1217 *last = new_port; 1218 1219 mutex_exit(&lacp_sel_lock); 1220 return (B_TRUE); 1221 } 1222 1223 /* 1224 * lacp_selection_logic - LACP selection logic 1225 * Sets the selected variable on a per port basis 1226 * and sets Ready when all waiting ports are ready 1227 * to go online. 1228 * 1229 * parameters: 1230 * - portp - instance this applies to. 1231 * 1232 * invoked: 1233 * - when initialization is needed 1234 * - when UNSELECTED is set from the lacp_receive_sm() in LACP_CURRENT state 1235 * - When the lacp_receive_sm goes to the LACP_DEFAULTED state 1236 * - every time the wait_while_timer pops 1237 * - everytime we turn LACP on/off 1238 */ 1239 static void 1240 lacp_selection_logic(aggr_port_t *portp) 1241 { 1242 aggr_port_t *tpp; 1243 aggr_grp_t *aggrp = portp->lp_grp; 1244 int ports_waiting; 1245 boolean_t reset_mac = B_FALSE; 1246 aggr_lacp_port_t *pl = &portp->lp_lacp; 1247 1248 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh)); 1249 1250 /* LACP_OFF state not in specification so check here. */ 1251 if (!pl->sm.lacp_on) { 1252 lacp_port_unselect(portp); 1253 aggrp->aggr.ready = B_FALSE; 1254 lacp_mux_sm(portp); 1255 return; 1256 } 1257 1258 if (pl->sm.begin || !pl->sm.lacp_enabled || 1259 (portp->lp_state != AGGR_PORT_STATE_ATTACHED)) { 1260 1261 AGGR_LACP_DBG(("lacp_selection_logic:(%d): " 1262 "selected %d-->%d (begin=%d, lacp_enabled = %d, " 1263 "lp_state=%d)\n", portp->lp_linkid, pl->sm.selected, 1264 AGGR_UNSELECTED, pl->sm.begin, pl->sm.lacp_enabled, 1265 portp->lp_state)); 1266 1267 lacp_port_unselect(portp); 1268 aggrp->aggr.ready = B_FALSE; 1269 lacp_mux_sm(portp); 1270 return; 1271 } 1272 1273 /* 1274 * If LACP is not enabled then selected is never set. 1275 */ 1276 if (!pl->sm.lacp_enabled) { 1277 AGGR_LACP_DBG(("lacp_selection_logic:(%d): selected %d-->%d\n", 1278 portp->lp_linkid, pl->sm.selected, AGGR_UNSELECTED)); 1279 1280 lacp_port_unselect(portp); 1281 lacp_mux_sm(portp); 1282 return; 1283 } 1284 1285 /* 1286 * Check if the Partner MAC or Key are zero. If so, we have 1287 * not received any LACP info or it has expired and the 1288 * receive machine is in the LACP_DEFAULTED state. 1289 */ 1290 if (ether_cmp(&pl->PartnerOperSystem, ðerzeroaddr) == 0 || 1291 (pl->PartnerOperKey == 0)) { 1292 1293 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) { 1294 if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem, 1295 ðerzeroaddr) != 0 && 1296 (tpp->lp_lacp.PartnerOperKey != 0)) 1297 break; 1298 } 1299 1300 /* 1301 * If all ports have no key or aggregation address, 1302 * then clear the negotiated Partner MAC and key. 1303 */ 1304 if (tpp == NULL) { 1305 /* Clear the aggregation Partner MAC and key */ 1306 aggrp->aggr.PartnerSystem = etherzeroaddr; 1307 aggrp->aggr.PartnerOperAggrKey = 0; 1308 } 1309 1310 return; 1311 } 1312 1313 /* 1314 * Insure that at least one port in the aggregation 1315 * matches the Partner aggregation MAC and key. If not, 1316 * then clear the aggregation MAC and key. Later we will 1317 * set the Partner aggregation MAC and key to that of the 1318 * current port's Partner MAC and key. 1319 */ 1320 if (ether_cmp(&pl->PartnerOperSystem, 1321 &aggrp->aggr.PartnerSystem) != 0 || 1322 (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) { 1323 1324 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) { 1325 if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem, 1326 &aggrp->aggr.PartnerSystem) == 0 && 1327 (tpp->lp_lacp.PartnerOperKey == 1328 aggrp->aggr.PartnerOperAggrKey)) { 1329 /* Set aggregation Partner MAC and key */ 1330 aggrp->aggr.PartnerSystem = 1331 pl->PartnerOperSystem; 1332 aggrp->aggr.PartnerOperAggrKey = 1333 pl->PartnerOperKey; 1334 break; 1335 } 1336 } 1337 1338 if (tpp == NULL) { 1339 /* Clear the aggregation Partner MAC and key */ 1340 aggrp->aggr.PartnerSystem = etherzeroaddr; 1341 aggrp->aggr.PartnerOperAggrKey = 0; 1342 reset_mac = B_TRUE; 1343 } 1344 } 1345 1346 /* 1347 * If our Actor MAC is found in the Partner MAC 1348 * on this port then we have a loopback misconfiguration. 1349 */ 1350 if (ether_cmp(&pl->PartnerOperSystem, 1351 (struct ether_addr *)&aggrp->lg_addr) == 0) { 1352 cmn_err(CE_NOTE, "trunk link: (%d): Loopback condition.\n", 1353 portp->lp_linkid); 1354 1355 lacp_port_unselect(portp); 1356 lacp_mux_sm(portp); 1357 return; 1358 } 1359 1360 /* 1361 * If our Partner MAC and Key are found on any other 1362 * ports that are not in our aggregation, we have 1363 * a misconfiguration. 1364 */ 1365 if (lacp_misconfig_check(portp)) { 1366 lacp_mux_sm(portp); 1367 return; 1368 } 1369 1370 /* 1371 * If the Aggregation Partner MAC and Key have not been 1372 * set, then this is either the first port or the aggregation 1373 * MAC and key have been reset. In either case we must set 1374 * the values of the Partner MAC and key. 1375 */ 1376 if (ether_cmp(&aggrp->aggr.PartnerSystem, ðerzeroaddr) == 0 && 1377 (aggrp->aggr.PartnerOperAggrKey == 0)) { 1378 /* Set aggregation Partner MAC and key */ 1379 aggrp->aggr.PartnerSystem = pl->PartnerOperSystem; 1380 aggrp->aggr.PartnerOperAggrKey = pl->PartnerOperKey; 1381 1382 /* 1383 * If we reset Partner aggregation MAC, then restart 1384 * selection_logic on ports that match new MAC address. 1385 */ 1386 if (reset_mac) { 1387 for (tpp = aggrp->lg_ports; tpp; tpp = 1388 tpp->lp_next) { 1389 if (tpp == portp) 1390 continue; 1391 if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem, 1392 &aggrp->aggr.PartnerSystem) == 0 && 1393 (tpp->lp_lacp.PartnerOperKey == 1394 aggrp->aggr.PartnerOperAggrKey)) 1395 lacp_selection_logic(tpp); 1396 } 1397 } 1398 } else if (ether_cmp(&pl->PartnerOperSystem, 1399 &aggrp->aggr.PartnerSystem) != 0 || 1400 (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) { 1401 /* 1402 * The Partner port information does not match 1403 * that of the other ports in the aggregation 1404 * so disable this port. 1405 */ 1406 lacp_port_unselect(portp); 1407 1408 cmn_err(CE_NOTE, "trunk link: (%d): Port Partner MAC " 1409 "or key (%d) incompatible with Aggregation Partner " 1410 "MAC or key (%d)\n", portp->lp_linkid, pl->PartnerOperKey, 1411 aggrp->aggr.PartnerOperAggrKey); 1412 1413 lacp_mux_sm(portp); 1414 return; 1415 } 1416 1417 /* If we get to here, automatically set selected */ 1418 if (pl->sm.selected != AGGR_SELECTED) { 1419 AGGR_LACP_DBG(("lacp_selection_logic:(%d): " 1420 "selected %d-->%d\n", portp->lp_linkid, 1421 pl->sm.selected, AGGR_SELECTED)); 1422 if (!lacp_port_select(portp)) 1423 return; 1424 lacp_mux_sm(portp); 1425 } 1426 1427 /* 1428 * From this point onward we have selected the port 1429 * and are simply checking if the Ready flag should 1430 * be set. 1431 */ 1432 1433 /* 1434 * If at least two ports are waiting to aggregate 1435 * and ready_n is set on all ports waiting to aggregate 1436 * then set READY for the aggregation. 1437 */ 1438 1439 ports_waiting = 0; 1440 1441 if (!aggrp->aggr.ready) { 1442 /* 1443 * If all ports in the aggregation have received compatible 1444 * partner information and they match up correctly with the 1445 * switch, there is no need to wait for all the 1446 * wait_while_timers to pop. 1447 */ 1448 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) { 1449 if (((tpp->lp_lacp.sm.mux_state == LACP_WAITING) || 1450 tpp->lp_lacp.sm.begin) && 1451 !tpp->lp_lacp.PartnerOperPortState.bit.sync) { 1452 /* Add up ports uninitialized or waiting */ 1453 ports_waiting++; 1454 if (!tpp->lp_lacp.sm.ready_n) { 1455 DTRACE_PROBE1(port___not__ready, 1456 aggr_port_t *, tpp); 1457 return; 1458 } 1459 } 1460 } 1461 } 1462 1463 if (aggrp->aggr.ready) { 1464 AGGR_LACP_DBG(("lacp_selection_logic:(%d): " 1465 "aggr.ready already set\n", portp->lp_linkid)); 1466 lacp_mux_sm(portp); 1467 } else { 1468 AGGR_LACP_DBG(("lacp_selection_logic:(%d): Ready %d-->%d\n", 1469 portp->lp_linkid, aggrp->aggr.ready, B_TRUE)); 1470 aggrp->aggr.ready = B_TRUE; 1471 1472 for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) 1473 lacp_mux_sm(tpp); 1474 } 1475 1476 } 1477 1478 /* 1479 * wait_while_timer_pop - When the timer pops, we arrive here to 1480 * set ready_n and trigger the selection logic. 1481 */ 1482 static void 1483 wait_while_timer_pop(void *data) 1484 { 1485 aggr_port_t *portp = data; 1486 aggr_lacp_port_t *pl = &portp->lp_lacp; 1487 1488 mutex_enter(&pl->lacp_timer_lock); 1489 pl->lacp_timer_bits |= LACP_WAIT_WHILE_TIMEOUT; 1490 cv_broadcast(&pl->lacp_timer_cv); 1491 mutex_exit(&pl->lacp_timer_lock); 1492 } 1493 1494 /* 1495 * wait_while_timer_pop_handler - When the timer pops, we arrive here to 1496 * set ready_n and trigger the selection logic. 1497 */ 1498 static void 1499 wait_while_timer_pop_handler(aggr_port_t *portp) 1500 { 1501 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1502 1503 AGGR_LACP_DBG(("trunk link:(%d): wait_while_timer pop \n", 1504 portp->lp_linkid)); 1505 portp->lp_lacp.sm.ready_n = B_TRUE; 1506 1507 lacp_selection_logic(portp); 1508 } 1509 1510 static void 1511 start_wait_while_timer(aggr_port_t *portp) 1512 { 1513 aggr_lacp_port_t *pl = &portp->lp_lacp; 1514 1515 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1516 1517 mutex_enter(&pl->lacp_timer_lock); 1518 if (pl->wait_while_timer.id == 0) { 1519 pl->wait_while_timer.id = 1520 timeout(wait_while_timer_pop, portp, 1521 drv_usectohz(1000000 * 1522 portp->lp_lacp.wait_while_timer.val)); 1523 } 1524 mutex_exit(&pl->lacp_timer_lock); 1525 } 1526 1527 1528 static void 1529 stop_wait_while_timer(aggr_port_t *portp) 1530 { 1531 aggr_lacp_port_t *pl = &portp->lp_lacp; 1532 timeout_id_t id; 1533 1534 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1535 1536 mutex_enter(&pl->lacp_timer_lock); 1537 if ((id = pl->wait_while_timer.id) != 0) { 1538 pl->lacp_timer_bits &= ~LACP_WAIT_WHILE_TIMEOUT; 1539 pl->wait_while_timer.id = 0; 1540 } 1541 mutex_exit(&pl->lacp_timer_lock); 1542 1543 if (id != 0) 1544 (void) untimeout(id); 1545 } 1546 1547 /* 1548 * Invoked when a port has been attached to a group. 1549 * Complete the processing that couldn't be finished from lacp_on() 1550 * because the port was not started. We know that the link is full 1551 * duplex and ON, otherwise it wouldn't be attached. 1552 */ 1553 void 1554 aggr_lacp_port_attached(aggr_port_t *portp) 1555 { 1556 aggr_grp_t *grp = portp->lp_grp; 1557 aggr_lacp_port_t *pl = &portp->lp_lacp; 1558 1559 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 1560 ASSERT(MAC_PERIM_HELD(portp->lp_mh)); 1561 ASSERT(portp->lp_state == AGGR_PORT_STATE_ATTACHED); 1562 1563 AGGR_LACP_DBG(("aggr_lacp_port_attached: port %d\n", 1564 portp->lp_linkid)); 1565 1566 portp->lp_lacp.sm.port_enabled = B_TRUE; /* link on */ 1567 1568 if (grp->lg_lacp_mode == AGGR_LACP_OFF) 1569 return; 1570 1571 pl->sm.lacp_enabled = B_TRUE; 1572 pl->ActorOperPortState.bit.aggregation = B_TRUE; 1573 pl->sm.begin = B_TRUE; 1574 1575 lacp_receive_sm(portp, NULL); 1576 lacp_mux_sm(portp); 1577 1578 /* Enable Multicast Slow Protocol address */ 1579 aggr_lacp_mcast_on(portp); 1580 1581 /* periodic_sm is started up from the receive machine */ 1582 lacp_selection_logic(portp); 1583 } 1584 1585 /* 1586 * Invoked when a port has been detached from a group. Turn off 1587 * LACP processing if it was enabled. 1588 */ 1589 void 1590 aggr_lacp_port_detached(aggr_port_t *portp) 1591 { 1592 aggr_grp_t *grp = portp->lp_grp; 1593 1594 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 1595 ASSERT(MAC_PERIM_HELD(portp->lp_mh)); 1596 1597 AGGR_LACP_DBG(("aggr_lacp_port_detached: port %d\n", 1598 portp->lp_linkid)); 1599 1600 portp->lp_lacp.sm.port_enabled = B_FALSE; 1601 1602 if (grp->lg_lacp_mode == AGGR_LACP_OFF) 1603 return; 1604 1605 portp->lp_lacp.sm.lacp_enabled = B_FALSE; 1606 lacp_selection_logic(portp); 1607 lacp_mux_sm(portp); 1608 lacp_periodic_sm(portp); 1609 1610 /* 1611 * Disable Slow Protocol Timers. 1612 */ 1613 stop_periodic_timer(portp); 1614 stop_current_while_timer(portp); 1615 stop_wait_while_timer(portp); 1616 1617 /* Disable Multicast Slow Protocol address */ 1618 aggr_lacp_mcast_off(portp); 1619 aggr_set_coll_dist(portp, B_FALSE); 1620 } 1621 1622 /* 1623 * Enable Slow Protocol LACP and Marker PDUs. 1624 */ 1625 static void 1626 lacp_on(aggr_port_t *portp) 1627 { 1628 aggr_lacp_port_t *pl = &portp->lp_lacp; 1629 mac_perim_handle_t mph; 1630 1631 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1632 1633 mac_perim_enter_by_mh(portp->lp_mh, &mph); 1634 1635 /* 1636 * Reset the state machines and Partner operational 1637 * information. Careful to not reset things like 1638 * our link state. 1639 */ 1640 lacp_reset_port(portp); 1641 pl->sm.lacp_on = B_TRUE; 1642 1643 AGGR_LACP_DBG(("lacp_on:(%d): \n", portp->lp_linkid)); 1644 1645 if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) { 1646 pl->sm.port_enabled = B_TRUE; 1647 pl->sm.lacp_enabled = B_TRUE; 1648 pl->ActorOperPortState.bit.aggregation = B_TRUE; 1649 } 1650 1651 lacp_receive_sm(portp, NULL); 1652 lacp_mux_sm(portp); 1653 1654 if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) { 1655 /* Enable Multicast Slow Protocol address */ 1656 aggr_lacp_mcast_on(portp); 1657 1658 /* periodic_sm is started up from the receive machine */ 1659 lacp_selection_logic(portp); 1660 } 1661 done: 1662 mac_perim_exit(mph); 1663 } /* lacp_on */ 1664 1665 /* Disable Slow Protocol LACP and Marker PDUs */ 1666 static void 1667 lacp_off(aggr_port_t *portp) 1668 { 1669 aggr_lacp_port_t *pl = &portp->lp_lacp; 1670 mac_perim_handle_t mph; 1671 1672 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1673 mac_perim_enter_by_mh(portp->lp_mh, &mph); 1674 1675 pl->sm.lacp_on = B_FALSE; 1676 1677 AGGR_LACP_DBG(("lacp_off:(%d): \n", portp->lp_linkid)); 1678 1679 if (portp->lp_state == AGGR_PORT_STATE_ATTACHED) { 1680 /* 1681 * Disable Slow Protocol Timers. 1682 */ 1683 stop_periodic_timer(portp); 1684 stop_current_while_timer(portp); 1685 stop_wait_while_timer(portp); 1686 1687 /* Disable Multicast Slow Protocol address */ 1688 aggr_lacp_mcast_off(portp); 1689 1690 pl->sm.port_enabled = B_FALSE; 1691 pl->sm.lacp_enabled = B_FALSE; 1692 pl->ActorOperPortState.bit.aggregation = B_FALSE; 1693 } 1694 1695 lacp_mux_sm(portp); 1696 lacp_periodic_sm(portp); 1697 lacp_selection_logic(portp); 1698 1699 /* Turn OFF Collector_Distributor */ 1700 aggr_set_coll_dist(portp, B_FALSE); 1701 1702 lacp_reset_port(portp); 1703 mac_perim_exit(mph); 1704 } 1705 1706 1707 static boolean_t 1708 valid_lacp_pdu(aggr_port_t *portp, lacp_t *lacp) 1709 { 1710 /* 1711 * 43.4.12 - "a Receive machine shall not validate 1712 * the Version Number, TLV_type, or Reserved fields in received 1713 * LACPDUs." 1714 * ... "a Receive machine may validate the Actor_Information_Length, 1715 * Partner_Information_Length, Collector_Information_Length, 1716 * or Terminator_Length fields." 1717 */ 1718 if ((lacp->actor_info.information_len != sizeof (link_info_t)) || 1719 (lacp->partner_info.information_len != sizeof (link_info_t)) || 1720 (lacp->collector_len != LACP_COLLECTOR_INFO_LEN) || 1721 (lacp->terminator_len != LACP_TERMINATOR_INFO_LEN)) { 1722 AGGR_LACP_DBG(("trunk link (%d): Malformed LACPDU: " 1723 " Terminator Length = %d \n", portp->lp_linkid, 1724 lacp->terminator_len)); 1725 return (B_FALSE); 1726 } 1727 1728 return (B_TRUE); 1729 } 1730 1731 1732 static void 1733 start_current_while_timer(aggr_port_t *portp, uint_t time) 1734 { 1735 aggr_lacp_port_t *pl = &portp->lp_lacp; 1736 1737 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1738 1739 mutex_enter(&pl->lacp_timer_lock); 1740 if (pl->current_while_timer.id == 0) { 1741 if (time > 0) 1742 pl->current_while_timer.val = time; 1743 else if (pl->ActorOperPortState.bit.timeout) 1744 pl->current_while_timer.val = SHORT_TIMEOUT_TIME; 1745 else 1746 pl->current_while_timer.val = LONG_TIMEOUT_TIME; 1747 1748 pl->current_while_timer.id = 1749 timeout(current_while_timer_pop, portp, 1750 drv_usectohz((clock_t)1000000 * 1751 (clock_t)portp->lp_lacp.current_while_timer.val)); 1752 } 1753 mutex_exit(&pl->lacp_timer_lock); 1754 } 1755 1756 1757 static void 1758 stop_current_while_timer(aggr_port_t *portp) 1759 { 1760 aggr_lacp_port_t *pl = &portp->lp_lacp; 1761 timeout_id_t id; 1762 1763 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1764 1765 mutex_enter(&pl->lacp_timer_lock); 1766 if ((id = pl->current_while_timer.id) != 0) { 1767 pl->lacp_timer_bits &= ~LACP_CURRENT_WHILE_TIMEOUT; 1768 pl->current_while_timer.id = 0; 1769 } 1770 mutex_exit(&pl->lacp_timer_lock); 1771 1772 if (id != 0) 1773 (void) untimeout(id); 1774 } 1775 1776 static void 1777 current_while_timer_pop(void *data) 1778 { 1779 aggr_port_t *portp = (aggr_port_t *)data; 1780 aggr_lacp_port_t *pl = &portp->lp_lacp; 1781 1782 mutex_enter(&pl->lacp_timer_lock); 1783 pl->lacp_timer_bits |= LACP_CURRENT_WHILE_TIMEOUT; 1784 cv_broadcast(&pl->lacp_timer_cv); 1785 mutex_exit(&pl->lacp_timer_lock); 1786 } 1787 1788 static void 1789 current_while_timer_pop_handler(aggr_port_t *portp) 1790 { 1791 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1792 1793 AGGR_LACP_DBG(("trunk link:(%d): current_while_timer " 1794 "pop id=%p\n", portp->lp_linkid, 1795 portp->lp_lacp.current_while_timer.id)); 1796 1797 lacp_receive_sm(portp, NULL); 1798 } 1799 1800 /* 1801 * record_Default - Simply copies over administrative values 1802 * to the partner operational values, and sets our state to indicate we 1803 * are using defaulted values. 1804 */ 1805 static void 1806 record_Default(aggr_port_t *portp) 1807 { 1808 aggr_lacp_port_t *pl = &portp->lp_lacp; 1809 1810 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1811 1812 pl->PartnerOperPortNum = pl->PartnerAdminPortNum; 1813 pl->PartnerOperPortPriority = pl->PartnerAdminPortPriority; 1814 pl->PartnerOperSystem = pl->PartnerAdminSystem; 1815 pl->PartnerOperSysPriority = pl->PartnerAdminSysPriority; 1816 pl->PartnerOperKey = pl->PartnerAdminKey; 1817 pl->PartnerOperPortState.state = pl->PartnerAdminPortState.state; 1818 1819 pl->ActorOperPortState.bit.defaulted = B_TRUE; 1820 } 1821 1822 1823 /* Returns B_TRUE on sync value changing */ 1824 static boolean_t 1825 record_PDU(aggr_port_t *portp, lacp_t *lacp) 1826 { 1827 aggr_grp_t *aggrp = portp->lp_grp; 1828 aggr_lacp_port_t *pl = &portp->lp_lacp; 1829 uint8_t save_sync; 1830 1831 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh)); 1832 1833 /* 1834 * Partner Information 1835 */ 1836 pl->PartnerOperPortNum = ntohs(lacp->actor_info.port); 1837 pl->PartnerOperPortPriority = 1838 ntohs(lacp->actor_info.port_priority); 1839 pl->PartnerOperSystem = lacp->actor_info.system_id; 1840 pl->PartnerOperSysPriority = 1841 htons(lacp->actor_info.system_priority); 1842 pl->PartnerOperKey = ntohs(lacp->actor_info.key); 1843 1844 /* All state info except for Synchronization */ 1845 save_sync = pl->PartnerOperPortState.bit.sync; 1846 pl->PartnerOperPortState.state = lacp->actor_info.state.state; 1847 1848 /* Defaulted set to FALSE */ 1849 pl->ActorOperPortState.bit.defaulted = B_FALSE; 1850 1851 /* 1852 * 43.4.9 - (Partner_Port, Partner_Port_Priority, Partner_system, 1853 * Partner_System_Priority, Partner_Key, and 1854 * Partner_State.Aggregation) are compared to the 1855 * corresponding operations paramters values for 1856 * the Actor. If these are equal, or if this is 1857 * an individual link, we are synchronized. 1858 */ 1859 if (((ntohs(lacp->partner_info.port) == pl->ActorPortNumber) && 1860 (ntohs(lacp->partner_info.port_priority) == 1861 pl->ActorPortPriority) && 1862 (ether_cmp(&lacp->partner_info.system_id, 1863 (struct ether_addr *)&aggrp->lg_addr) == 0) && 1864 (ntohs(lacp->partner_info.system_priority) == 1865 aggrp->aggr.ActorSystemPriority) && 1866 (ntohs(lacp->partner_info.key) == pl->ActorOperPortKey) && 1867 (lacp->partner_info.state.bit.aggregation == 1868 pl->ActorOperPortState.bit.aggregation)) || 1869 (!lacp->actor_info.state.bit.aggregation)) { 1870 1871 pl->PartnerOperPortState.bit.sync = 1872 lacp->actor_info.state.bit.sync; 1873 } else { 1874 pl->PartnerOperPortState.bit.sync = B_FALSE; 1875 } 1876 1877 if (save_sync != pl->PartnerOperPortState.bit.sync) { 1878 AGGR_LACP_DBG(("record_PDU:(%d): partner sync " 1879 "%d -->%d\n", portp->lp_linkid, save_sync, 1880 pl->PartnerOperPortState.bit.sync)); 1881 return (B_TRUE); 1882 } else { 1883 return (B_FALSE); 1884 } 1885 } 1886 1887 1888 /* 1889 * update_selected - If any of the Partner parameters has 1890 * changed from a previous value, then 1891 * unselect the link from the aggregator. 1892 */ 1893 static boolean_t 1894 update_selected(aggr_port_t *portp, lacp_t *lacp) 1895 { 1896 aggr_lacp_port_t *pl = &portp->lp_lacp; 1897 1898 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1899 1900 if ((pl->PartnerOperPortNum != ntohs(lacp->actor_info.port)) || 1901 (pl->PartnerOperPortPriority != 1902 ntohs(lacp->actor_info.port_priority)) || 1903 (ether_cmp(&pl->PartnerOperSystem, 1904 &lacp->actor_info.system_id) != 0) || 1905 (pl->PartnerOperSysPriority != 1906 ntohs(lacp->actor_info.system_priority)) || 1907 (pl->PartnerOperKey != ntohs(lacp->actor_info.key)) || 1908 (pl->PartnerOperPortState.bit.aggregation != 1909 lacp->actor_info.state.bit.aggregation)) { 1910 AGGR_LACP_DBG(("update_selected:(%d): " 1911 "selected %d-->%d\n", portp->lp_linkid, pl->sm.selected, 1912 AGGR_UNSELECTED)); 1913 1914 lacp_port_unselect(portp); 1915 return (B_TRUE); 1916 } else { 1917 return (B_FALSE); 1918 } 1919 } 1920 1921 1922 /* 1923 * update_default_selected - If any of the operational Partner parameters 1924 * is different than that of the administrative values 1925 * then unselect the link from the aggregator. 1926 */ 1927 static void 1928 update_default_selected(aggr_port_t *portp) 1929 { 1930 aggr_lacp_port_t *pl = &portp->lp_lacp; 1931 1932 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 1933 1934 if ((pl->PartnerAdminPortNum != pl->PartnerOperPortNum) || 1935 (pl->PartnerOperPortPriority != pl->PartnerAdminPortPriority) || 1936 (ether_cmp(&pl->PartnerOperSystem, &pl->PartnerAdminSystem) != 0) || 1937 (pl->PartnerOperSysPriority != pl->PartnerAdminSysPriority) || 1938 (pl->PartnerOperKey != pl->PartnerAdminKey) || 1939 (pl->PartnerOperPortState.bit.aggregation != 1940 pl->PartnerAdminPortState.bit.aggregation)) { 1941 1942 AGGR_LACP_DBG(("update_default_selected:(%d): " 1943 "selected %d-->%d\n", portp->lp_linkid, 1944 pl->sm.selected, AGGR_UNSELECTED)); 1945 1946 lacp_port_unselect(portp); 1947 } 1948 } 1949 1950 1951 /* 1952 * update_NTT - If any of the Partner values in the received LACPDU 1953 * are different than that of the Actor operational 1954 * values then set NTT to true. 1955 */ 1956 static void 1957 update_NTT(aggr_port_t *portp, lacp_t *lacp) 1958 { 1959 aggr_grp_t *aggrp = portp->lp_grp; 1960 aggr_lacp_port_t *pl = &portp->lp_lacp; 1961 1962 ASSERT(MAC_PERIM_HELD(aggrp->lg_mh)); 1963 1964 if ((pl->ActorPortNumber != ntohs(lacp->partner_info.port)) || 1965 (pl->ActorPortPriority != 1966 ntohs(lacp->partner_info.port_priority)) || 1967 (ether_cmp(&aggrp->lg_addr, 1968 &lacp->partner_info.system_id) != 0) || 1969 (aggrp->aggr.ActorSystemPriority != 1970 ntohs(lacp->partner_info.system_priority)) || 1971 (pl->ActorOperPortKey != ntohs(lacp->partner_info.key)) || 1972 (pl->ActorOperPortState.bit.activity != 1973 lacp->partner_info.state.bit.activity) || 1974 (pl->ActorOperPortState.bit.timeout != 1975 lacp->partner_info.state.bit.timeout) || 1976 (pl->ActorOperPortState.bit.sync != 1977 lacp->partner_info.state.bit.sync) || 1978 (pl->ActorOperPortState.bit.aggregation != 1979 lacp->partner_info.state.bit.aggregation)) { 1980 1981 AGGR_LACP_DBG(("update_NTT:(%d): NTT %d-->%d\n", 1982 portp->lp_linkid, pl->NTT, B_TRUE)); 1983 1984 pl->NTT = B_TRUE; 1985 } 1986 } 1987 1988 /* 1989 * lacp_receive_sm - LACP receive state machine 1990 * 1991 * parameters: 1992 * - portp - instance this applies to. 1993 * - lacp - pointer in the case of a received LACPDU. 1994 * This value is NULL if there is no LACPDU. 1995 * 1996 * invoked: 1997 * - when initialization is needed 1998 * - upon reception of an LACPDU. This is the common case. 1999 * - every time the current_while_timer pops 2000 */ 2001 static void 2002 lacp_receive_sm(aggr_port_t *portp, lacp_t *lacp) 2003 { 2004 boolean_t sync_updated, selected_updated, save_activity; 2005 aggr_lacp_port_t *pl = &portp->lp_lacp; 2006 lacp_receive_state_t oldstate = pl->sm.receive_state; 2007 2008 ASSERT(MAC_PERIM_HELD(portp->lp_grp->lg_mh)); 2009 2010 /* LACP_OFF state not in specification so check here. */ 2011 if (!pl->sm.lacp_on) 2012 return; 2013 2014 /* figure next state */ 2015 if (pl->sm.begin || pl->sm.port_moved) { 2016 pl->sm.receive_state = LACP_INITIALIZE; 2017 } else if (!pl->sm.port_enabled) { /* DL_NOTE_LINK_DOWN */ 2018 pl->sm.receive_state = LACP_PORT_DISABLED; 2019 } else if (!pl->sm.lacp_enabled) { /* DL_NOTE_AGGR_UNAVAIL */ 2020 pl->sm.receive_state = 2021 (pl->sm.receive_state == LACP_PORT_DISABLED) ? 2022 LACP_DISABLED : LACP_PORT_DISABLED; 2023 } else if (lacp != NULL) { 2024 if ((pl->sm.receive_state == LACP_EXPIRED) || 2025 (pl->sm.receive_state == LACP_DEFAULTED)) { 2026 pl->sm.receive_state = LACP_CURRENT; 2027 } 2028 } else if ((pl->sm.receive_state == LACP_CURRENT) && 2029 (pl->current_while_timer.id == 0)) { 2030 pl->sm.receive_state = LACP_EXPIRED; 2031 } else if ((pl->sm.receive_state == LACP_EXPIRED) && 2032 (pl->current_while_timer.id == 0)) { 2033 pl->sm.receive_state = LACP_DEFAULTED; 2034 } 2035 2036 if (!((lacp && (oldstate == LACP_CURRENT) && 2037 (pl->sm.receive_state == LACP_CURRENT)))) { 2038 AGGR_LACP_DBG(("lacp_receive_sm(%d):%s--->%s\n", 2039 portp->lp_linkid, lacp_receive_str[oldstate], 2040 lacp_receive_str[pl->sm.receive_state])); 2041 } 2042 2043 switch (pl->sm.receive_state) { 2044 case LACP_INITIALIZE: 2045 lacp_port_unselect(portp); 2046 record_Default(portp); 2047 pl->ActorOperPortState.bit.expired = B_FALSE; 2048 pl->sm.port_moved = B_FALSE; 2049 pl->sm.receive_state = LACP_PORT_DISABLED; 2050 pl->sm.begin = B_FALSE; 2051 lacp_receive_sm(portp, NULL); 2052 break; 2053 2054 case LACP_PORT_DISABLED: 2055 pl->PartnerOperPortState.bit.sync = B_FALSE; 2056 /* 2057 * Stop current_while_timer in case 2058 * we got here from link down 2059 */ 2060 stop_current_while_timer(portp); 2061 2062 if (pl->sm.port_enabled && !pl->sm.lacp_enabled) { 2063 pl->sm.receive_state = LACP_DISABLED; 2064 lacp_receive_sm(portp, lacp); 2065 /* We goto LACP_DISABLED state */ 2066 break; 2067 } else if (pl->sm.port_enabled && pl->sm.lacp_enabled) { 2068 pl->sm.receive_state = LACP_EXPIRED; 2069 /* 2070 * FALL THROUGH TO LACP_EXPIRED CASE: 2071 * We have no way of knowing if we get into 2072 * lacp_receive_sm() from a current_while_timer 2073 * expiring as it has never been kicked off yet! 2074 */ 2075 } else { 2076 /* We stay in LACP_PORT_DISABLED state */ 2077 break; 2078 } 2079 /* LACP_PORT_DISABLED -> LACP_EXPIRED */ 2080 /* FALLTHROUGH */ 2081 2082 case LACP_EXPIRED: 2083 /* 2084 * Arrives here from LACP_PORT_DISABLED state as well as 2085 * as well as current_while_timer expiring. 2086 */ 2087 pl->PartnerOperPortState.bit.sync = B_FALSE; 2088 pl->PartnerOperPortState.bit.timeout = B_TRUE; 2089 2090 pl->ActorOperPortState.bit.expired = B_TRUE; 2091 start_current_while_timer(portp, SHORT_TIMEOUT_TIME); 2092 lacp_periodic_sm(portp); 2093 break; 2094 2095 case LACP_DISABLED: 2096 /* 2097 * This is the normal state for recv_sm when LACP_OFF 2098 * is set or the NIC is in half duplex mode. 2099 */ 2100 lacp_port_unselect(portp); 2101 record_Default(portp); 2102 pl->PartnerOperPortState.bit.aggregation = B_FALSE; 2103 pl->ActorOperPortState.bit.expired = B_FALSE; 2104 break; 2105 2106 case LACP_DEFAULTED: 2107 /* 2108 * Current_while_timer expired a second time. 2109 */ 2110 update_default_selected(portp); 2111 record_Default(portp); /* overwrite Partner Oper val */ 2112 pl->ActorOperPortState.bit.expired = B_FALSE; 2113 pl->PartnerOperPortState.bit.sync = B_TRUE; 2114 2115 lacp_selection_logic(portp); 2116 lacp_mux_sm(portp); 2117 break; 2118 2119 case LACP_CURRENT: 2120 /* 2121 * Reception of LACPDU 2122 */ 2123 2124 if (!lacp) /* no LACPDU so current_while_timer popped */ 2125 break; 2126 2127 AGGR_LACP_DBG(("lacp_receive_sm: (%d): LACPDU received:\n", 2128 portp->lp_linkid)); 2129 2130 /* 2131 * Validate Actor_Information_Length, 2132 * Partner_Information_Length, Collector_Information_Length, 2133 * and Terminator_Length fields. 2134 */ 2135 if (!valid_lacp_pdu(portp, lacp)) { 2136 AGGR_LACP_DBG(("lacp_receive_sm (%d): " 2137 "Invalid LACPDU received\n", 2138 portp->lp_linkid)); 2139 break; 2140 } 2141 2142 save_activity = pl->PartnerOperPortState.bit.activity; 2143 selected_updated = update_selected(portp, lacp); 2144 update_NTT(portp, lacp); 2145 sync_updated = record_PDU(portp, lacp); 2146 2147 pl->ActorOperPortState.bit.expired = B_FALSE; 2148 2149 if (selected_updated) { 2150 lacp_selection_logic(portp); 2151 lacp_mux_sm(portp); 2152 } else if (sync_updated) { 2153 lacp_mux_sm(portp); 2154 } 2155 2156 /* 2157 * If the periodic timer value bit has been modified 2158 * or the partner activity bit has been changed then 2159 * we need to respectively: 2160 * - restart the timer with the proper timeout value. 2161 * - possibly enable/disable transmission of LACPDUs. 2162 */ 2163 if ((pl->PartnerOperPortState.bit.timeout && 2164 (pl->periodic_timer.val != FAST_PERIODIC_TIME)) || 2165 (!pl->PartnerOperPortState.bit.timeout && 2166 (pl->periodic_timer.val != SLOW_PERIODIC_TIME)) || 2167 (pl->PartnerOperPortState.bit.activity != 2168 save_activity)) { 2169 lacp_periodic_sm(portp); 2170 } 2171 2172 stop_current_while_timer(portp); 2173 /* Check if we need to transmit an LACPDU */ 2174 if (pl->NTT) 2175 lacp_xmit_sm(portp); 2176 start_current_while_timer(portp, 0); 2177 2178 break; 2179 } 2180 } 2181 2182 static void 2183 aggr_set_coll_dist(aggr_port_t *portp, boolean_t enable) 2184 { 2185 mac_perim_handle_t mph; 2186 2187 AGGR_LACP_DBG(("AGGR_SET_COLL_DIST_TYPE: (%d) %s\n", 2188 portp->lp_linkid, enable ? "ENABLED" : "DISABLED")); 2189 2190 mac_perim_enter_by_mh(portp->lp_mh, &mph); 2191 if (!enable) { 2192 /* 2193 * Turn OFF Collector_Distributor. 2194 */ 2195 portp->lp_collector_enabled = B_FALSE; 2196 aggr_send_port_disable(portp); 2197 goto done; 2198 } 2199 2200 /* 2201 * Turn ON Collector_Distributor. 2202 */ 2203 2204 if (!portp->lp_lacp.sm.lacp_on || (portp->lp_lacp.sm.lacp_on && 2205 (portp->lp_lacp.sm.mux_state == LACP_COLLECTING_DISTRIBUTING))) { 2206 /* Port is compatible and can be aggregated */ 2207 portp->lp_collector_enabled = B_TRUE; 2208 aggr_send_port_enable(portp); 2209 } 2210 2211 done: 2212 mac_perim_exit(mph); 2213 } 2214 2215 /* 2216 * Because the LACP packet processing needs to enter the aggr's mac perimeter 2217 * and that would potentially cause a deadlock with the thread in which the 2218 * grp/port is deleted, we defer the packet process to a worker thread. Here 2219 * we only enqueue the received Marker or LACPDU for later processing. 2220 */ 2221 void 2222 aggr_lacp_rx_enqueue(aggr_port_t *portp, mblk_t *dmp) 2223 { 2224 aggr_grp_t *grp = portp->lp_grp; 2225 lacp_t *lacp; 2226 2227 dmp->b_rptr += sizeof (struct ether_header); 2228 2229 if (MBLKL(dmp) < sizeof (lacp_t)) { 2230 freemsg(dmp); 2231 return; 2232 } 2233 2234 lacp = (lacp_t *)dmp->b_rptr; 2235 if (lacp->subtype != LACP_SUBTYPE && lacp->subtype != MARKER_SUBTYPE) { 2236 AGGR_LACP_DBG(("aggr_lacp_rx_enqueue: (%d): " 2237 "Unknown Slow Protocol type %d\n", 2238 portp->lp_linkid, lacp->subtype)); 2239 freemsg(dmp); 2240 return; 2241 } 2242 2243 mutex_enter(&grp->lg_lacp_lock); 2244 2245 /* 2246 * If the lg_lacp_done is set, this aggregation is in the process of 2247 * being deleted, return directly. 2248 */ 2249 if (grp->lg_lacp_done) { 2250 mutex_exit(&grp->lg_lacp_lock); 2251 freemsg(dmp); 2252 return; 2253 } 2254 2255 if (grp->lg_lacp_tail == NULL) { 2256 grp->lg_lacp_head = grp->lg_lacp_tail = dmp; 2257 } else { 2258 grp->lg_lacp_tail->b_next = dmp; 2259 grp->lg_lacp_tail = dmp; 2260 } 2261 2262 /* 2263 * Hold a reference of the port so that the port won't be freed when it 2264 * is removed from the aggr. The b_prev field is borrowed to save the 2265 * port information. 2266 */ 2267 AGGR_PORT_REFHOLD(portp); 2268 dmp->b_prev = (mblk_t *)portp; 2269 cv_broadcast(&grp->lg_lacp_cv); 2270 mutex_exit(&grp->lg_lacp_lock); 2271 } 2272 2273 static void 2274 aggr_lacp_rx(mblk_t *dmp) 2275 { 2276 aggr_port_t *portp = (aggr_port_t *)dmp->b_prev; 2277 mac_perim_handle_t mph; 2278 lacp_t *lacp; 2279 2280 dmp->b_prev = NULL; 2281 2282 mac_perim_enter_by_mh(portp->lp_grp->lg_mh, &mph); 2283 if (portp->lp_closing) 2284 goto done; 2285 2286 lacp = (lacp_t *)dmp->b_rptr; 2287 switch (lacp->subtype) { 2288 case LACP_SUBTYPE: 2289 AGGR_LACP_DBG(("aggr_lacp_rx:(%d): LACPDU received.\n", 2290 portp->lp_linkid)); 2291 2292 if (!portp->lp_lacp.sm.lacp_on) { 2293 break; 2294 } 2295 lacp_receive_sm(portp, lacp); 2296 break; 2297 2298 case MARKER_SUBTYPE: 2299 AGGR_LACP_DBG(("aggr_lacp_rx:(%d): Marker Packet received.\n", 2300 portp->lp_linkid)); 2301 2302 if (receive_marker_pdu(portp, dmp) != 0) 2303 break; 2304 2305 /* Send the packet over the first TX ring */ 2306 dmp = mac_hwring_send_priv(portp->lp_mch, 2307 portp->lp_tx_rings[0], dmp); 2308 if (dmp != NULL) 2309 freemsg(dmp); 2310 mac_perim_exit(mph); 2311 AGGR_PORT_REFRELE(portp); 2312 return; 2313 } 2314 2315 done: 2316 mac_perim_exit(mph); 2317 AGGR_PORT_REFRELE(portp); 2318 freemsg(dmp); 2319 } 2320 2321 void 2322 aggr_lacp_rx_thread(void *arg) 2323 { 2324 callb_cpr_t cprinfo; 2325 aggr_grp_t *grp = (aggr_grp_t *)arg; 2326 aggr_port_t *port; 2327 mblk_t *mp, *nextmp; 2328 2329 CALLB_CPR_INIT(&cprinfo, &grp->lg_lacp_lock, callb_generic_cpr, 2330 "aggr_lacp_rx_thread"); 2331 2332 mutex_enter(&grp->lg_lacp_lock); 2333 2334 /* 2335 * Quit the thread if the grp is deleted. 2336 */ 2337 while (!grp->lg_lacp_done) { 2338 if ((mp = grp->lg_lacp_head) == NULL) { 2339 CALLB_CPR_SAFE_BEGIN(&cprinfo); 2340 cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock); 2341 CALLB_CPR_SAFE_END(&cprinfo, &grp->lg_lacp_lock); 2342 continue; 2343 } 2344 2345 grp->lg_lacp_head = grp->lg_lacp_tail = NULL; 2346 mutex_exit(&grp->lg_lacp_lock); 2347 2348 while (mp != NULL) { 2349 nextmp = mp->b_next; 2350 mp->b_next = NULL; 2351 aggr_lacp_rx(mp); 2352 mp = nextmp; 2353 } 2354 mutex_enter(&grp->lg_lacp_lock); 2355 } 2356 2357 /* 2358 * The grp is being destroyed, simply free all of the LACP messages 2359 * left in the queue which did not have the chance to be processed. 2360 * We cannot use freemsgchain() here since we need to clear the 2361 * b_prev field. 2362 */ 2363 while ((mp = grp->lg_lacp_head) != NULL) { 2364 port = (aggr_port_t *)mp->b_prev; 2365 AGGR_PORT_REFRELE(port); 2366 nextmp = mp->b_next; 2367 mp->b_next = NULL; 2368 mp->b_prev = NULL; 2369 freemsg(mp); 2370 mp = nextmp; 2371 } 2372 2373 grp->lg_lacp_head = grp->lg_lacp_tail = NULL; 2374 grp->lg_lacp_rx_thread = NULL; 2375 cv_broadcast(&grp->lg_lacp_cv); 2376 CALLB_CPR_EXIT(&cprinfo); 2377 thread_exit(); 2378 } 2379