1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups. 30 * 31 * An instance of the structure aggr_grp_t is allocated for each 32 * link aggregation group. When created, aggr_grp_t objects are 33 * entered into the aggr_grp_hash hash table maintained by the modhash 34 * module. The hash key is the port number associated with the link 35 * aggregation group. The port number associated with a group corresponds 36 * the key associated with the group. 37 * 38 * A set of MAC ports are associated with each association group. 39 */ 40 41 #include <sys/types.h> 42 #include <sys/sysmacros.h> 43 #include <sys/conf.h> 44 #include <sys/cmn_err.h> 45 #include <sys/list.h> 46 #include <sys/ksynch.h> 47 #include <sys/kmem.h> 48 #include <sys/stream.h> 49 #include <sys/modctl.h> 50 #include <sys/ddi.h> 51 #include <sys/sunddi.h> 52 #include <sys/atomic.h> 53 #include <sys/stat.h> 54 #include <sys/modhash.h> 55 #include <sys/strsun.h> 56 #include <sys/dlpi.h> 57 58 #include <sys/aggr.h> 59 #include <sys/aggr_impl.h> 60 61 static void aggr_m_info(void *, mac_info_t *); 62 static int aggr_m_start(void *); 63 static void aggr_m_stop(void *); 64 static int aggr_m_promisc(void *, boolean_t); 65 static int aggr_m_multicst(void *, boolean_t, const uint8_t *); 66 static int aggr_m_unicst(void *, const uint8_t *); 67 static uint64_t aggr_m_stat(void *, enum mac_stat); 68 static void aggr_m_resources(void *); 69 static void aggr_m_ioctl(void *, queue_t *, mblk_t *); 70 71 static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, const char *, uint32_t); 72 static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *); 73 static void aggr_stats_op(enum mac_stat, uint64_t *, uint64_t *, boolean_t); 74 static void aggr_grp_capab_set(aggr_grp_t *); 75 static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *); 76 77 static kmem_cache_t *aggr_grp_cache; 78 static mod_hash_t *aggr_grp_hash; 79 static krwlock_t aggr_grp_lock; 80 static uint_t aggr_grp_cnt; 81 82 #define GRP_HASHSZ 64 83 #define GRP_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)key) 84 85 static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0}; 86 static uchar_t aggr_brdcst_mac[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 87 88 /* used by grp_info_walker */ 89 typedef struct aggr_grp_info_state { 90 uint32_t ls_group_key; 91 boolean_t ls_group_found; 92 aggr_grp_info_new_grp_fn_t ls_new_grp_fn; 93 aggr_grp_info_new_port_fn_t ls_new_port_fn; 94 void *ls_fn_arg; 95 int ls_rc; 96 } aggr_grp_info_state_t; 97 98 /*ARGSUSED*/ 99 static int 100 aggr_grp_constructor(void *buf, void *arg, int kmflag) 101 { 102 aggr_grp_t *grp = buf; 103 104 bzero(grp, sizeof (*grp)); 105 rw_init(&grp->lg_lock, NULL, RW_DRIVER, NULL); 106 mutex_init(&grp->aggr.gl_lock, NULL, MUTEX_DEFAULT, NULL); 107 108 grp->lg_link_state = LINK_STATE_UNKNOWN; 109 110 return (0); 111 } 112 113 /*ARGSUSED*/ 114 static void 115 aggr_grp_destructor(void *buf, void *arg) 116 { 117 aggr_grp_t *grp = buf; 118 119 if (grp->lg_tx_ports != NULL) { 120 kmem_free(grp->lg_tx_ports, 121 grp->lg_tx_ports_size * sizeof (aggr_port_t *)); 122 } 123 124 mutex_destroy(&grp->aggr.gl_lock); 125 rw_destroy(&grp->lg_lock); 126 } 127 128 void 129 aggr_grp_init(void) 130 { 131 aggr_grp_cache = kmem_cache_create("aggr_grp_cache", 132 sizeof (aggr_grp_t), 0, aggr_grp_constructor, 133 aggr_grp_destructor, NULL, NULL, NULL, 0); 134 135 aggr_grp_hash = mod_hash_create_idhash("aggr_grp_hash", 136 GRP_HASHSZ, mod_hash_null_valdtor); 137 rw_init(&aggr_grp_lock, NULL, RW_DEFAULT, NULL); 138 aggr_grp_cnt = 0; 139 } 140 141 void 142 aggr_grp_fini(void) 143 { 144 rw_destroy(&aggr_grp_lock); 145 mod_hash_destroy_idhash(aggr_grp_hash); 146 kmem_cache_destroy(aggr_grp_cache); 147 } 148 149 uint_t 150 aggr_grp_count(void) 151 { 152 uint_t count; 153 154 rw_enter(&aggr_grp_lock, RW_READER); 155 count = aggr_grp_cnt; 156 rw_exit(&aggr_grp_lock); 157 return (count); 158 } 159 160 /* 161 * Attach a port to a link aggregation group. 162 * 163 * A port is attached to a link aggregation group once its speed 164 * and link state have been verified. 165 * 166 * Returns B_TRUE if the group link state or speed has changed. If 167 * it's the case, the caller must notify the MAC layer via a call 168 * to mac_link(). 169 */ 170 boolean_t 171 aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port) 172 { 173 boolean_t link_changed = B_FALSE; 174 175 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 176 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 177 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 178 179 if (port->lp_state == AGGR_PORT_STATE_ATTACHED) 180 return (B_FALSE); 181 182 /* 183 * Validate the MAC port link speed and update the group 184 * link speed if needed. 185 */ 186 if (port->lp_ifspeed == 0 || 187 port->lp_link_state != LINK_STATE_UP || 188 port->lp_link_duplex != LINK_DUPLEX_FULL) { 189 /* 190 * Can't attach a MAC port with unknown link speed, 191 * down link, or not in full duplex mode. 192 */ 193 return (B_FALSE); 194 } 195 196 if (grp->lg_ifspeed == 0) { 197 /* 198 * The group inherits the speed of the first link being 199 * attached. 200 */ 201 grp->lg_ifspeed = port->lp_ifspeed; 202 link_changed = B_TRUE; 203 } else if (grp->lg_ifspeed != port->lp_ifspeed) { 204 /* 205 * The link speed of the MAC port must be the same as 206 * the group link speed, as per 802.3ad. Since it is 207 * not, the attach is cancelled. 208 */ 209 return (B_FALSE); 210 } 211 212 grp->lg_nattached_ports++; 213 214 /* 215 * Update the group link state. 216 */ 217 if (grp->lg_link_state != LINK_STATE_UP) { 218 grp->lg_link_state = LINK_STATE_UP; 219 grp->lg_link_duplex = LINK_DUPLEX_FULL; 220 link_changed = B_TRUE; 221 } 222 223 aggr_grp_multicst_port(port, B_TRUE); 224 225 /* 226 * Update port's state. 227 */ 228 port->lp_state = AGGR_PORT_STATE_ATTACHED; 229 230 /* 231 * If LACP is OFF, the port can be used to send data as soon 232 * as its link is up and verified to be compatible with the 233 * aggregation. 234 * 235 * If LACP is active or passive, notify the LACP subsystem, which 236 * will enable sending on the port following the LACP protocol. 237 */ 238 if (grp->lg_lacp_mode == AGGR_LACP_OFF) 239 aggr_send_port_enable(port); 240 else 241 aggr_lacp_port_attached(port); 242 243 return (link_changed); 244 } 245 246 boolean_t 247 aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port) 248 { 249 boolean_t link_changed = B_FALSE; 250 251 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 252 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 253 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 254 255 /* update state */ 256 if (port->lp_state != AGGR_PORT_STATE_ATTACHED) 257 return (B_FALSE); 258 port->lp_state = AGGR_PORT_STATE_STANDBY; 259 260 aggr_grp_multicst_port(port, B_FALSE); 261 262 if (grp->lg_lacp_mode == AGGR_LACP_OFF) 263 aggr_send_port_disable(port); 264 else 265 aggr_lacp_port_detached(port); 266 267 grp->lg_nattached_ports--; 268 if (grp->lg_nattached_ports == 0) { 269 /* the last attached MAC port of the group is being detached */ 270 grp->lg_ifspeed = 0; 271 grp->lg_link_state = LINK_STATE_DOWN; 272 grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN; 273 link_changed = B_TRUE; 274 } 275 276 return (link_changed); 277 } 278 279 /* 280 * Update the MAC addresses of the constituent ports of the specified 281 * group. This function is invoked: 282 * - after creating a new aggregation group. 283 * - after adding new ports to an aggregation group. 284 * - after removing a port from a group when the MAC address of 285 * that port was used for the MAC address of the group. 286 * - after the MAC address of a port changed when the MAC address 287 * of that port was used for the MAC address of the group. 288 */ 289 void 290 aggr_grp_update_ports_mac(aggr_grp_t *grp) 291 { 292 aggr_port_t *cport; 293 294 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 295 296 for (cport = grp->lg_ports; cport != NULL; 297 cport = cport->lp_next) { 298 rw_enter(&cport->lp_lock, RW_WRITER); 299 if (aggr_port_unicst(cport, grp->lg_addr) != 0) 300 (void) aggr_grp_detach_port(grp, cport); 301 rw_exit(&cport->lp_lock); 302 if (grp->lg_closing) 303 break; 304 } 305 } 306 307 /* 308 * Invoked when the MAC address of a port has changed. If the port's 309 * MAC address was used for the group MAC address, returns B_TRUE. 310 * In that case, it is the responsibility of the caller to 311 * invoke aggr_grp_update_ports_mac() after releasing the 312 * the port lock, and aggr_grp_notify() after releasing the 313 * group lock. 314 */ 315 boolean_t 316 aggr_grp_port_mac_changed(aggr_grp_t *grp, aggr_port_t *port) 317 { 318 boolean_t grp_addr_changed = B_FALSE; 319 320 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 321 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 322 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 323 324 if (grp->lg_addr_fixed) { 325 /* 326 * The group is using a fixed MAC address or an automatic 327 * MAC address has not been set. 328 */ 329 return (B_FALSE); 330 } 331 332 if (grp->lg_mac_addr_port == port) { 333 /* 334 * The MAC address of the port was assigned to the group 335 * MAC address. Update the group MAC address. 336 */ 337 bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL); 338 grp_addr_changed = B_TRUE; 339 } else { 340 /* 341 * Update the actual port MAC address to the MAC address 342 * of the group. 343 */ 344 if (aggr_port_unicst(port, grp->lg_addr) != 0) 345 (void) aggr_grp_detach_port(grp, port); 346 } 347 348 return (grp_addr_changed); 349 } 350 351 /* 352 * Add a port to a link aggregation group. 353 */ 354 static int 355 aggr_grp_add_port(aggr_grp_t *grp, const char *name, uint_t portnum, 356 aggr_port_t **pp) 357 { 358 aggr_port_t *port, **cport; 359 int err; 360 361 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 362 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 363 364 /* create new port */ 365 err = aggr_port_create(name, portnum, &port); 366 if (err != 0) 367 return (err); 368 369 rw_enter(&port->lp_lock, RW_WRITER); 370 371 /* add port to list of group constituent ports */ 372 cport = &grp->lg_ports; 373 while (*cport != NULL) 374 cport = &((*cport)->lp_next); 375 *cport = port; 376 377 /* 378 * Back reference to the group it is member of. A port always 379 * holds a reference to its group to ensure that the back 380 * reference is always valid. 381 */ 382 port->lp_grp = grp; 383 AGGR_GRP_REFHOLD(grp); 384 grp->lg_nports++; 385 386 aggr_lacp_init_port(port); 387 388 rw_exit(&port->lp_lock); 389 390 if (pp != NULL) 391 *pp = port; 392 393 return (0); 394 } 395 396 /* 397 * Add one or more ports to an existing link aggregation group. 398 */ 399 int 400 aggr_grp_add_ports(uint32_t key, uint_t nports, laioc_port_t *ports) 401 { 402 int rc, i, nadded = 0; 403 aggr_grp_t *grp = NULL; 404 aggr_port_t *port; 405 406 /* get group corresponding to key */ 407 rw_enter(&aggr_grp_lock, RW_READER); 408 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key), 409 (mod_hash_val_t *)&grp) != 0) { 410 rw_exit(&aggr_grp_lock); 411 return (ENOENT); 412 } 413 AGGR_GRP_REFHOLD(grp); 414 rw_exit(&aggr_grp_lock); 415 416 AGGR_LACP_LOCK(grp); 417 rw_enter(&grp->lg_lock, RW_WRITER); 418 419 /* add the specified ports to group */ 420 for (i = 0; i < nports; i++) { 421 /* add port to group */ 422 if ((rc = aggr_grp_add_port(grp, ports[i].lp_devname, 423 ports[i].lp_port, &port)) != 0) 424 goto bail; 425 ASSERT(port != NULL); 426 nadded++; 427 428 /* check capabilities */ 429 if (!aggr_grp_capab_check(grp, port)) { 430 rc = ENOTSUP; 431 goto bail; 432 } 433 434 /* start port if group has already been started */ 435 if (grp->lg_started) { 436 rw_enter(&port->lp_lock, RW_WRITER); 437 rc = aggr_port_start(port); 438 if (rc != 0) { 439 rw_exit(&port->lp_lock); 440 goto bail; 441 } 442 443 /* set port promiscuous mode */ 444 rc = aggr_port_promisc(port, grp->lg_promisc); 445 if (rc != 0) { 446 rw_exit(&port->lp_lock); 447 goto bail; 448 } 449 rw_exit(&port->lp_lock); 450 } 451 } 452 453 /* update the MAC address of the constituent ports */ 454 aggr_grp_update_ports_mac(grp); 455 456 bail: 457 if (rc != 0) { 458 /* stop and remove ports that have been added */ 459 for (i = 0; i < nadded && !grp->lg_closing; i++) { 460 port = aggr_grp_port_lookup(grp, ports[i].lp_devname, 461 ports[i].lp_port); 462 ASSERT(port != NULL); 463 if (grp->lg_started) { 464 rw_enter(&port->lp_lock, RW_WRITER); 465 aggr_port_stop(port); 466 rw_exit(&port->lp_lock); 467 } 468 (void) aggr_grp_rem_port(grp, port, NULL); 469 } 470 } 471 472 rw_exit(&grp->lg_lock); 473 AGGR_LACP_UNLOCK(grp); 474 if (rc == 0 && !grp->lg_closing) 475 mac_resource_update(&grp->lg_mac); 476 AGGR_GRP_REFRELE(grp); 477 return (rc); 478 } 479 480 /* 481 * Update properties of an existing link aggregation group. 482 */ 483 int 484 aggr_grp_modify(uint32_t key, aggr_grp_t *grp_arg, uint8_t update_mask, 485 uint32_t policy, boolean_t mac_fixed, const uchar_t *mac_addr, 486 aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer) 487 { 488 int rc = 0; 489 aggr_grp_t *grp = NULL; 490 boolean_t mac_addr_changed = B_FALSE; 491 492 if (grp_arg == NULL) { 493 /* get group corresponding to key */ 494 rw_enter(&aggr_grp_lock, RW_READER); 495 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key), 496 (mod_hash_val_t *)&grp) != 0) { 497 rc = ENOENT; 498 goto bail; 499 } 500 AGGR_LACP_LOCK(grp); 501 rw_enter(&grp->lg_lock, RW_WRITER); 502 } else { 503 grp = grp_arg; 504 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 505 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 506 } 507 508 ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock)); 509 AGGR_GRP_REFHOLD(grp); 510 511 /* validate fixed address if specified */ 512 if ((update_mask & AGGR_MODIFY_MAC) && mac_fixed && 513 ((bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) || 514 (mac_addr[0] & 0x01))) { 515 rc = EINVAL; 516 goto bail; 517 } 518 519 /* update policy if requested */ 520 if (update_mask & AGGR_MODIFY_POLICY) 521 aggr_send_update_policy(grp, policy); 522 523 /* update unicast MAC address if requested */ 524 if (update_mask & AGGR_MODIFY_MAC) { 525 if (mac_fixed) { 526 /* user-supplied MAC address */ 527 grp->lg_mac_addr_port = NULL; 528 if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) != 0) { 529 bcopy(mac_addr, grp->lg_addr, ETHERADDRL); 530 mac_addr_changed = B_TRUE; 531 } 532 } else if (grp->lg_addr_fixed) { 533 /* switch from user-supplied to automatic */ 534 aggr_port_t *port = grp->lg_ports; 535 536 rw_enter(&port->lp_lock, RW_WRITER); 537 bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL); 538 grp->lg_mac_addr_port = port; 539 mac_addr_changed = B_TRUE; 540 rw_exit(&port->lp_lock); 541 } 542 grp->lg_addr_fixed = mac_fixed; 543 } 544 545 if (mac_addr_changed) 546 aggr_grp_update_ports_mac(grp); 547 548 if (update_mask & AGGR_MODIFY_LACP_MODE) 549 aggr_lacp_update_mode(grp, lacp_mode); 550 551 if ((update_mask & AGGR_MODIFY_LACP_TIMER) && !grp->lg_closing) 552 aggr_lacp_update_timer(grp, lacp_timer); 553 554 bail: 555 if (grp_arg == NULL) { 556 if (grp != NULL) { 557 rw_exit(&grp->lg_lock); 558 AGGR_LACP_UNLOCK(grp); 559 } 560 rw_exit(&aggr_grp_lock); 561 /* pass new unicast address up to MAC layer */ 562 if (grp != NULL && mac_addr_changed && !grp->lg_closing) 563 mac_unicst_update(&grp->lg_mac, grp->lg_addr); 564 } 565 566 if (grp != NULL) 567 AGGR_GRP_REFRELE(grp); 568 569 return (rc); 570 } 571 572 /* 573 * Create a new link aggregation group upon request from administrator. 574 * Returns 0 on success, an errno on failure. 575 */ 576 int 577 aggr_grp_create(uint32_t key, uint_t nports, laioc_port_t *ports, 578 uint32_t policy, boolean_t mac_fixed, uchar_t *mac_addr, 579 aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer) 580 { 581 aggr_grp_t *grp = NULL; 582 aggr_port_t *port; 583 mac_t *mac; 584 mac_info_t *mip; 585 int err; 586 int i; 587 588 /* need at least one port */ 589 if (nports == 0) 590 return (EINVAL); 591 592 rw_enter(&aggr_grp_lock, RW_WRITER); 593 594 /* does a group with the same key already exist? */ 595 err = mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key), 596 (mod_hash_val_t *)&grp); 597 if (err == 0) { 598 rw_exit(&aggr_grp_lock); 599 return (EEXIST); 600 } 601 602 grp = kmem_cache_alloc(aggr_grp_cache, KM_SLEEP); 603 604 AGGR_LACP_LOCK(grp); 605 rw_enter(&grp->lg_lock, RW_WRITER); 606 607 grp->lg_refs = 1; 608 grp->lg_closing = B_FALSE; 609 grp->lg_key = key; 610 611 grp->lg_ifspeed = 0; 612 grp->lg_link_state = LINK_STATE_UNKNOWN; 613 grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN; 614 grp->lg_started = B_FALSE; 615 grp->lg_promisc = B_FALSE; 616 aggr_lacp_init_grp(grp); 617 618 /* add MAC ports to group */ 619 grp->lg_ports = NULL; 620 grp->lg_nports = 0; 621 grp->lg_nattached_ports = 0; 622 grp->lg_ntx_ports = 0; 623 624 for (i = 0; i < nports; i++) { 625 err = aggr_grp_add_port(grp, ports[i].lp_devname, 626 ports[i].lp_port, NULL); 627 if (err != 0) 628 goto bail; 629 } 630 631 /* 632 * If no explicit MAC address was specified by the administrator, 633 * set it to the MAC address of the first port. 634 */ 635 grp->lg_addr_fixed = mac_fixed; 636 if (grp->lg_addr_fixed) { 637 /* validate specified address */ 638 if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) { 639 err = EINVAL; 640 goto bail; 641 } 642 bcopy(mac_addr, grp->lg_addr, ETHERADDRL); 643 } else { 644 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL); 645 grp->lg_mac_addr_port = grp->lg_ports; 646 } 647 648 /* update the MAC address of the constituent ports */ 649 aggr_grp_update_ports_mac(grp); 650 651 /* update outbound load balancing policy */ 652 aggr_send_update_policy(grp, policy); 653 654 /* register with the MAC module */ 655 mac = &grp->lg_mac; 656 bzero(mac, sizeof (*mac)); 657 658 mac->m_ident = MAC_IDENT; 659 660 mac->m_driver = grp; 661 mac->m_dip = aggr_dip; 662 mac->m_port = key; 663 664 mip = &(mac->m_info); 665 mip->mi_media = DL_ETHER; 666 mip->mi_sdu_min = 0; 667 mip->mi_sdu_max = ETHERMTU; 668 669 MAC_STAT_MIB(mip->mi_stat); 670 MAC_STAT_ETHER(mip->mi_stat); 671 mip->mi_stat[MAC_STAT_LINK_DUPLEX] = B_TRUE; 672 673 mip->mi_addr_length = ETHERADDRL; 674 bcopy(aggr_brdcst_mac, mip->mi_brdcst_addr, ETHERADDRL); 675 bcopy(grp->lg_addr, mip->mi_unicst_addr, ETHERADDRL); 676 677 mac->m_stat = aggr_m_stat; 678 mac->m_start = aggr_m_start; 679 mac->m_stop = aggr_m_stop; 680 mac->m_promisc = aggr_m_promisc; 681 mac->m_multicst = aggr_m_multicst; 682 mac->m_unicst = aggr_m_unicst; 683 mac->m_tx = aggr_m_tx; 684 mac->m_resources = aggr_m_resources; 685 mac->m_ioctl = aggr_m_ioctl; 686 687 /* set the initial group capabilities */ 688 aggr_grp_capab_set(grp); 689 690 if ((err = mac_register(mac)) != 0) 691 goto bail; 692 693 /* set LACP mode */ 694 aggr_lacp_set_mode(grp, lacp_mode, lacp_timer); 695 696 /* add new group to hash table */ 697 err = mod_hash_insert(aggr_grp_hash, GRP_HASH_KEY(key), 698 (mod_hash_val_t)grp); 699 ASSERT(err == 0); 700 aggr_grp_cnt++; 701 702 rw_exit(&grp->lg_lock); 703 AGGR_LACP_UNLOCK(grp); 704 rw_exit(&aggr_grp_lock); 705 return (0); 706 707 bail: 708 if (grp != NULL) { 709 aggr_port_t *cport; 710 711 port = grp->lg_ports; 712 while (port != NULL) { 713 cport = port->lp_next; 714 aggr_port_delete(port); 715 port = cport; 716 } 717 718 rw_exit(&grp->lg_lock); 719 AGGR_LACP_UNLOCK(grp); 720 721 kmem_cache_free(aggr_grp_cache, grp); 722 } 723 724 rw_exit(&aggr_grp_lock); 725 return (err); 726 } 727 728 /* 729 * Return a pointer to the member of a group with specified device name 730 * and port number. 731 */ 732 static aggr_port_t * 733 aggr_grp_port_lookup(aggr_grp_t *grp, const char *devname, uint32_t portnum) 734 { 735 aggr_port_t *port; 736 737 ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock)); 738 739 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 740 if ((strcmp(port->lp_devname, devname) == 0) && 741 (port->lp_port == portnum)) 742 break; 743 } 744 745 return (port); 746 } 747 748 /* 749 * Stop, detach and remove a port from a link aggregation group. 750 */ 751 static int 752 aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port, boolean_t *do_notify) 753 { 754 aggr_port_t **pport; 755 boolean_t grp_mac_addr_changed = B_FALSE; 756 uint64_t val; 757 uint_t i; 758 759 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 760 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 761 ASSERT(grp->lg_nports > 1); 762 763 if (do_notify != NULL) 764 *do_notify = B_FALSE; 765 766 /* unlink port */ 767 for (pport = &grp->lg_ports; *pport != port; 768 pport = &(*pport)->lp_next) { 769 if (*pport == NULL) 770 return (ENOENT); 771 } 772 *pport = port->lp_next; 773 774 rw_enter(&port->lp_lock, RW_WRITER); 775 port->lp_closing = B_TRUE; 776 777 /* 778 * If the MAC address of the port being removed was assigned 779 * to the group, update the group MAC address 780 * using the MAC address of a different port. 781 */ 782 if (!grp->lg_addr_fixed && grp->lg_mac_addr_port == port) { 783 /* 784 * Set the MAC address of the group to the 785 * MAC address of its first port. 786 */ 787 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL); 788 grp->lg_mac_addr_port = grp->lg_ports; 789 grp_mac_addr_changed = B_TRUE; 790 } 791 792 (void) aggr_grp_detach_port(grp, port); 793 794 /* 795 * Add the statistics of the ports while it was aggregated 796 * to the group's residual statistics. 797 */ 798 for (i = 0; i < MAC_NSTAT && !grp->lg_closing; i++) { 799 /* avoid stats that are not counters */ 800 if (i == MAC_STAT_IFSPEED || i == MAC_STAT_LINK_DUPLEX) 801 continue; 802 803 /* get current value */ 804 val = aggr_port_stat(port, i); 805 /* subtract value at the point of aggregation */ 806 val -= port->lp_stat[i]; 807 /* add to the residual stat */ 808 grp->lg_stat[i] += val; 809 } 810 811 grp->lg_nports--; 812 813 rw_exit(&port->lp_lock); 814 815 aggr_port_delete(port); 816 817 /* 818 * If the group MAC address has changed, update the MAC address of 819 * the remaining consistuent ports according to the new MAC 820 * address of the group. 821 */ 822 if (grp->lg_closing) { 823 *do_notify = B_FALSE; 824 } else { 825 if (grp_mac_addr_changed) 826 aggr_grp_update_ports_mac(grp); 827 828 if (do_notify != NULL) 829 *do_notify = grp_mac_addr_changed; 830 } 831 832 return (0); 833 } 834 835 /* 836 * Remove one or more ports from an existing link aggregation group. 837 */ 838 int 839 aggr_grp_rem_ports(uint32_t key, uint_t nports, laioc_port_t *ports) 840 { 841 int rc = 0, i; 842 aggr_grp_t *grp = NULL; 843 aggr_port_t *port; 844 boolean_t notify = B_FALSE, grp_mac_addr_changed; 845 846 /* get group corresponding to key */ 847 rw_enter(&aggr_grp_lock, RW_READER); 848 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key), 849 (mod_hash_val_t *)&grp) != 0) { 850 rw_exit(&aggr_grp_lock); 851 return (ENOENT); 852 } 853 AGGR_GRP_REFHOLD(grp); 854 rw_exit(&aggr_grp_lock); 855 856 AGGR_LACP_LOCK(grp); 857 rw_enter(&grp->lg_lock, RW_WRITER); 858 859 /* we need to keep at least one port per group */ 860 if (nports >= grp->lg_nports) { 861 rc = EINVAL; 862 goto bail; 863 } 864 865 /* first verify that all the groups are valid */ 866 for (i = 0; i < nports; i++) { 867 if (aggr_grp_port_lookup(grp, ports[i].lp_devname, 868 ports[i].lp_port) == NULL) { 869 /* port not found */ 870 rc = ENOENT; 871 goto bail; 872 } 873 } 874 875 /* remove the specified ports from group */ 876 for (i = 0; i < nports && !grp->lg_closing; i++) { 877 /* lookup port */ 878 port = aggr_grp_port_lookup(grp, ports[i].lp_devname, 879 ports[i].lp_port); 880 ASSERT(port != NULL); 881 882 /* stop port if group has already been started */ 883 if (grp->lg_started) { 884 rw_enter(&port->lp_lock, RW_WRITER); 885 aggr_port_stop(port); 886 rw_exit(&port->lp_lock); 887 } 888 889 /* remove port from group */ 890 rc = aggr_grp_rem_port(grp, port, &grp_mac_addr_changed); 891 ASSERT(rc == 0); 892 notify = notify || grp_mac_addr_changed; 893 } 894 895 bail: 896 rw_exit(&grp->lg_lock); 897 AGGR_LACP_UNLOCK(grp); 898 if (notify && !grp->lg_closing) 899 mac_unicst_update(&grp->lg_mac, grp->lg_addr); 900 if (rc == 0 && !grp->lg_closing) 901 mac_resource_update(&grp->lg_mac); 902 AGGR_GRP_REFRELE(grp); 903 904 return (rc); 905 } 906 907 int 908 aggr_grp_delete(uint32_t key) 909 { 910 aggr_grp_t *grp = NULL; 911 aggr_port_t *port, *cport; 912 mod_hash_val_t val; 913 914 rw_enter(&aggr_grp_lock, RW_WRITER); 915 916 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key), 917 (mod_hash_val_t *)&grp) != 0) { 918 rw_exit(&aggr_grp_lock); 919 return (ENOENT); 920 } 921 AGGR_LACP_LOCK(grp); 922 rw_enter(&grp->lg_lock, RW_WRITER); 923 grp->lg_closing = B_TRUE; 924 925 /* 926 * Unregister from the MAC service module. Since this can 927 * fail if a client hasn't closed the MAC port, we gracefully 928 * fail the operation. 929 */ 930 if (mac_unregister(&grp->lg_mac)) { 931 rw_exit(&grp->lg_lock); 932 AGGR_LACP_UNLOCK(grp); 933 rw_exit(&aggr_grp_lock); 934 return (EBUSY); 935 } 936 937 /* detach and free MAC ports associated with group */ 938 port = grp->lg_ports; 939 while (port != NULL) { 940 cport = port->lp_next; 941 rw_enter(&port->lp_lock, RW_WRITER); 942 if (grp->lg_started) 943 aggr_port_stop(port); 944 (void) aggr_grp_detach_port(grp, port); 945 rw_exit(&port->lp_lock); 946 aggr_port_delete(port); 947 port = cport; 948 } 949 950 rw_exit(&grp->lg_lock); 951 AGGR_LACP_UNLOCK(grp); 952 953 (void) mod_hash_remove(aggr_grp_hash, GRP_HASH_KEY(key), &val); 954 ASSERT(grp == (aggr_grp_t *)val); 955 956 ASSERT(aggr_grp_cnt > 0); 957 aggr_grp_cnt--; 958 959 rw_exit(&aggr_grp_lock); 960 AGGR_GRP_REFRELE(grp); 961 962 return (0); 963 } 964 965 void 966 aggr_grp_free(aggr_grp_t *grp) 967 { 968 ASSERT(grp->lg_refs == 0); 969 kmem_cache_free(aggr_grp_cache, grp); 970 } 971 972 /* 973 * Walker invoked when building the list of configured groups and 974 * their ports that must be passed up to user-space. 975 */ 976 977 /*ARGSUSED*/ 978 static uint_t 979 aggr_grp_info_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 980 { 981 aggr_grp_t *grp; 982 aggr_port_t *port; 983 aggr_grp_info_state_t *state = arg; 984 985 if (state->ls_rc != 0) 986 return (MH_WALK_TERMINATE); /* terminate walk */ 987 988 grp = (aggr_grp_t *)val; 989 990 rw_enter(&grp->lg_lock, RW_READER); 991 992 if (state->ls_group_key != 0 && grp->lg_key != state->ls_group_key) 993 goto bail; 994 995 state->ls_group_found = B_TRUE; 996 997 state->ls_rc = state->ls_new_grp_fn(state->ls_fn_arg, grp->lg_key, 998 grp->lg_addr, grp->lg_addr_fixed, grp->lg_tx_policy, 999 grp->lg_nports, grp->lg_lacp_mode, grp->aggr.PeriodicTimer); 1000 1001 if (state->ls_rc != 0) 1002 goto bail; 1003 1004 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1005 1006 rw_enter(&port->lp_lock, RW_READER); 1007 1008 state->ls_rc = state->ls_new_port_fn(state->ls_fn_arg, 1009 port->lp_devname, port->lp_port, port->lp_addr, 1010 port->lp_state, &port->lp_lacp.ActorOperPortState); 1011 1012 rw_exit(&port->lp_lock); 1013 1014 if (state->ls_rc != 0) 1015 goto bail; 1016 } 1017 1018 bail: 1019 rw_exit(&grp->lg_lock); 1020 return ((state->ls_rc == 0) ? MH_WALK_CONTINUE : MH_WALK_TERMINATE); 1021 } 1022 1023 int 1024 aggr_grp_info(uint_t *ngroups, uint32_t group_key, void *fn_arg, 1025 aggr_grp_info_new_grp_fn_t new_grp_fn, 1026 aggr_grp_info_new_port_fn_t new_port_fn) 1027 { 1028 aggr_grp_info_state_t state; 1029 int rc = 0; 1030 1031 rw_enter(&aggr_grp_lock, RW_READER); 1032 1033 *ngroups = aggr_grp_cnt; 1034 1035 bzero(&state, sizeof (state)); 1036 state.ls_group_key = group_key; 1037 state.ls_new_grp_fn = new_grp_fn; 1038 state.ls_new_port_fn = new_port_fn; 1039 state.ls_fn_arg = fn_arg; 1040 1041 mod_hash_walk(aggr_grp_hash, aggr_grp_info_walker, &state); 1042 1043 if ((rc = state.ls_rc) == 0 && group_key != 0 && 1044 !state.ls_group_found) 1045 rc = ENOENT; 1046 1047 rw_exit(&aggr_grp_lock); 1048 return (rc); 1049 } 1050 1051 static void 1052 aggr_m_resources(void *arg) 1053 { 1054 aggr_grp_t *grp = arg; 1055 aggr_port_t *port; 1056 1057 /* Call each port's m_resources function */ 1058 for (port = grp->lg_ports; port != NULL; port = port->lp_next) 1059 mac_resources(port->lp_mh); 1060 } 1061 1062 /*ARGSUSED*/ 1063 static void 1064 aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp) 1065 { 1066 miocnak(q, mp, 0, ENOTSUP); 1067 } 1068 1069 static uint64_t 1070 aggr_m_stat(void *arg, enum mac_stat stat) 1071 { 1072 aggr_grp_t *grp = arg; 1073 aggr_port_t *port; 1074 uint64_t val; 1075 1076 rw_enter(&grp->lg_lock, RW_READER); 1077 1078 switch (stat) { 1079 case MAC_STAT_IFSPEED: 1080 val = grp->lg_ifspeed; 1081 break; 1082 case MAC_STAT_LINK_DUPLEX: 1083 val = grp->lg_link_duplex; 1084 break; 1085 default: 1086 /* 1087 * The remaining statistics are counters. They are computed 1088 * by aggregating the counters of the members MACs while they 1089 * were aggregated, plus the residual counter of the group 1090 * itself, which is updated each time a MAC is removed from 1091 * the group. 1092 */ 1093 val = 0; 1094 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1095 /* actual port statistic */ 1096 val += aggr_port_stat(port, stat); 1097 /* minus the port stat when it was added */ 1098 val -= port->lp_stat[stat]; 1099 /* plus any residual amount for the group */ 1100 val += grp->lg_stat[stat]; 1101 } 1102 } 1103 1104 rw_exit(&grp->lg_lock); 1105 return (val); 1106 } 1107 1108 static int 1109 aggr_m_start(void *arg) 1110 { 1111 aggr_grp_t *grp = arg; 1112 aggr_port_t *port; 1113 1114 AGGR_LACP_LOCK(grp); 1115 rw_enter(&grp->lg_lock, RW_WRITER); 1116 1117 /* 1118 * Attempts to start all configured members of the group. 1119 * Group members will be attached when their link-up notification 1120 * is received. 1121 */ 1122 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1123 rw_enter(&port->lp_lock, RW_WRITER); 1124 if (aggr_port_start(port) != 0) { 1125 rw_exit(&port->lp_lock); 1126 continue; 1127 } 1128 1129 /* set port promiscuous mode */ 1130 if (aggr_port_promisc(port, grp->lg_promisc) != 0) 1131 aggr_port_stop(port); 1132 rw_exit(&port->lp_lock); 1133 } 1134 1135 grp->lg_started = B_TRUE; 1136 1137 rw_exit(&grp->lg_lock); 1138 AGGR_LACP_UNLOCK(grp); 1139 1140 return (0); 1141 } 1142 1143 static void 1144 aggr_m_stop(void *arg) 1145 { 1146 aggr_grp_t *grp = arg; 1147 aggr_port_t *port; 1148 1149 rw_enter(&grp->lg_lock, RW_WRITER); 1150 1151 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1152 rw_enter(&port->lp_lock, RW_WRITER); 1153 aggr_port_stop(port); 1154 rw_exit(&port->lp_lock); 1155 } 1156 1157 grp->lg_started = B_FALSE; 1158 1159 rw_exit(&grp->lg_lock); 1160 } 1161 1162 static int 1163 aggr_m_promisc(void *arg, boolean_t on) 1164 { 1165 aggr_grp_t *grp = arg; 1166 aggr_port_t *port; 1167 1168 AGGR_LACP_LOCK(grp); 1169 rw_enter(&grp->lg_lock, RW_WRITER); 1170 AGGR_GRP_REFHOLD(grp); 1171 1172 if (on == grp->lg_promisc) 1173 goto bail; 1174 1175 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1176 rw_enter(&port->lp_lock, RW_WRITER); 1177 AGGR_PORT_REFHOLD(port); 1178 if (port->lp_started) { 1179 if (aggr_port_promisc(port, on) != 0) 1180 (void) aggr_grp_detach_port(grp, port); 1181 } 1182 rw_exit(&port->lp_lock); 1183 AGGR_PORT_REFRELE(port); 1184 if (grp->lg_closing) 1185 break; 1186 } 1187 1188 grp->lg_promisc = on; 1189 1190 bail: 1191 rw_exit(&grp->lg_lock); 1192 AGGR_LACP_UNLOCK(grp); 1193 AGGR_GRP_REFRELE(grp); 1194 1195 return (0); 1196 } 1197 1198 /* 1199 * Add or remove the multicast addresses that are defined for the group 1200 * to or from the specified port. 1201 * This function is called before stopping a port, before a port 1202 * is detached from a group, and when attaching a port to a group. 1203 */ 1204 void 1205 aggr_grp_multicst_port(aggr_port_t *port, boolean_t add) 1206 { 1207 aggr_grp_t *grp = port->lp_grp; 1208 1209 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 1210 ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock)); 1211 1212 if (!port->lp_started) 1213 return; 1214 1215 mac_multicst_refresh(&grp->lg_mac, aggr_port_multicst, port, 1216 add); 1217 } 1218 1219 static int 1220 aggr_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) 1221 { 1222 aggr_grp_t *grp = arg; 1223 aggr_port_t *port = NULL; 1224 int err = 0, cerr; 1225 1226 rw_enter(&grp->lg_lock, RW_WRITER); 1227 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1228 if (port->lp_state != AGGR_PORT_STATE_ATTACHED) 1229 continue; 1230 cerr = aggr_port_multicst(port, add, addrp); 1231 if (cerr != 0 && err == 0) 1232 err = cerr; 1233 } 1234 rw_exit(&grp->lg_lock); 1235 return (err); 1236 } 1237 1238 static int 1239 aggr_m_unicst(void *arg, const uint8_t *macaddr) 1240 { 1241 aggr_grp_t *grp = arg; 1242 int rc; 1243 1244 AGGR_LACP_LOCK(grp); 1245 rw_enter(&grp->lg_lock, RW_WRITER); 1246 rc = aggr_grp_modify(0, grp, AGGR_MODIFY_MAC, 0, B_TRUE, macaddr, 1247 0, 0); 1248 rw_exit(&grp->lg_lock); 1249 AGGR_LACP_UNLOCK(grp); 1250 1251 return (rc); 1252 } 1253 1254 /* 1255 * Initialize the capabilities that are advertised for the group 1256 * according to the capabilities of the constituent ports. 1257 */ 1258 static void 1259 aggr_grp_capab_set(aggr_grp_t *grp) 1260 { 1261 uint32_t cksum = (uint32_t)-1; 1262 uint32_t poll = DL_CAPAB_POLL; 1263 aggr_port_t *port; 1264 const mac_info_t *port_mi; 1265 1266 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 1267 1268 ASSERT(grp->lg_ports != NULL); 1269 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1270 port_mi = mac_info(port->lp_mh); 1271 cksum &= port_mi->mi_cksum; 1272 poll &= port_mi->mi_poll; 1273 } 1274 1275 grp->lg_mac.m_info.mi_cksum = cksum; 1276 grp->lg_mac.m_info.mi_poll = poll; 1277 } 1278 1279 /* 1280 * Checks whether the capabilities of the ports being added are compatible 1281 * with the current capabilities of the aggregation. 1282 */ 1283 static boolean_t 1284 aggr_grp_capab_check(aggr_grp_t *grp, aggr_port_t *port) 1285 { 1286 const mac_info_t *port_mi = mac_info(port->lp_mh); 1287 uint32_t grp_cksum = grp->lg_mac.m_info.mi_cksum; 1288 1289 ASSERT(grp->lg_ports != NULL); 1290 1291 return (((grp_cksum & port_mi->mi_cksum) == grp_cksum) && 1292 (grp->lg_mac.m_info.mi_poll == port_mi->mi_poll)); 1293 } 1294