1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups. 31 * 32 * An instance of the structure aggr_grp_t is allocated for each 33 * link aggregation group. When created, aggr_grp_t objects are 34 * entered into the aggr_grp_hash hash table maintained by the modhash 35 * module. The hash key is the port number associated with the link 36 * aggregation group. The port number associated with a group corresponds 37 * the key associated with the group. 38 * 39 * A set of MAC ports are associated with each association group. 40 */ 41 42 #include <sys/types.h> 43 #include <sys/sysmacros.h> 44 #include <sys/conf.h> 45 #include <sys/cmn_err.h> 46 #include <sys/list.h> 47 #include <sys/ksynch.h> 48 #include <sys/kmem.h> 49 #include <sys/stream.h> 50 #include <sys/modctl.h> 51 #include <sys/ddi.h> 52 #include <sys/sunddi.h> 53 #include <sys/atomic.h> 54 #include <sys/stat.h> 55 #include <sys/modhash.h> 56 #include <sys/strsun.h> 57 #include <sys/dlpi.h> 58 59 #include <sys/aggr.h> 60 #include <sys/aggr_impl.h> 61 62 static void aggr_m_info(void *, mac_info_t *); 63 static int aggr_m_start(void *); 64 static void aggr_m_stop(void *); 65 static int aggr_m_promisc(void *, boolean_t); 66 static int aggr_m_multicst(void *, boolean_t, const uint8_t *); 67 static int aggr_m_unicst(void *, const uint8_t *); 68 static uint64_t aggr_m_stat(void *, enum mac_stat); 69 static void aggr_m_resources(void *); 70 static void aggr_m_ioctl(void *, queue_t *, mblk_t *); 71 72 static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, const char *, uint32_t); 73 static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *); 74 static void aggr_stats_op(enum mac_stat, uint64_t *, uint64_t *, boolean_t); 75 static void aggr_grp_capab_set(aggr_grp_t *); 76 static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *); 77 78 static kmem_cache_t *aggr_grp_cache; 79 static mod_hash_t *aggr_grp_hash; 80 static krwlock_t aggr_grp_lock; 81 static uint_t aggr_grp_cnt; 82 83 #define GRP_HASHSZ 64 84 #define GRP_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)key) 85 86 static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0}; 87 static uchar_t aggr_brdcst_mac[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 88 89 /* used by grp_info_walker */ 90 typedef struct aggr_grp_info_state { 91 uint32_t ls_group_key; 92 boolean_t ls_group_found; 93 aggr_grp_info_new_grp_fn_t ls_new_grp_fn; 94 aggr_grp_info_new_port_fn_t ls_new_port_fn; 95 void *ls_fn_arg; 96 int ls_rc; 97 } aggr_grp_info_state_t; 98 99 /*ARGSUSED*/ 100 static int 101 aggr_grp_constructor(void *buf, void *arg, int kmflag) 102 { 103 aggr_grp_t *grp = buf; 104 105 bzero(grp, sizeof (*grp)); 106 rw_init(&grp->lg_lock, NULL, RW_DRIVER, NULL); 107 mutex_init(&grp->aggr.gl_lock, NULL, MUTEX_DEFAULT, NULL); 108 109 grp->lg_link_state = LINK_STATE_UNKNOWN; 110 111 return (0); 112 } 113 114 /*ARGSUSED*/ 115 static void 116 aggr_grp_destructor(void *buf, void *arg) 117 { 118 aggr_grp_t *grp = buf; 119 120 if (grp->lg_tx_ports != NULL) { 121 kmem_free(grp->lg_tx_ports, 122 grp->lg_tx_ports_size * sizeof (aggr_port_t *)); 123 } 124 125 mutex_destroy(&grp->aggr.gl_lock); 126 rw_destroy(&grp->lg_lock); 127 } 128 129 void 130 aggr_grp_init(void) 131 { 132 aggr_grp_cache = kmem_cache_create("aggr_grp_cache", 133 sizeof (aggr_grp_t), 0, aggr_grp_constructor, 134 aggr_grp_destructor, NULL, NULL, NULL, 0); 135 136 aggr_grp_hash = mod_hash_create_idhash("aggr_grp_hash", 137 GRP_HASHSZ, mod_hash_null_valdtor); 138 rw_init(&aggr_grp_lock, NULL, RW_DEFAULT, NULL); 139 aggr_grp_cnt = 0; 140 } 141 142 int 143 aggr_grp_fini(void) 144 { 145 if (aggr_grp_cnt > 0) 146 return (EBUSY); 147 148 rw_destroy(&aggr_grp_lock); 149 mod_hash_destroy_idhash(aggr_grp_hash); 150 kmem_cache_destroy(aggr_grp_cache); 151 return (0); 152 } 153 154 uint_t 155 aggr_grp_count(void) 156 { 157 uint_t count; 158 159 rw_enter(&aggr_grp_lock, RW_READER); 160 count = aggr_grp_cnt; 161 rw_exit(&aggr_grp_lock); 162 return (count); 163 } 164 165 /* 166 * Attach a port to a link aggregation group. 167 * 168 * A port is attached to a link aggregation group once its speed 169 * and link state have been verified. 170 * 171 * Returns B_TRUE if the group link state or speed has changed. If 172 * it's the case, the caller must notify the MAC layer via a call 173 * to mac_link(). 174 */ 175 boolean_t 176 aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port) 177 { 178 boolean_t link_changed = B_FALSE; 179 180 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 181 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 182 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 183 184 if (port->lp_state == AGGR_PORT_STATE_ATTACHED) 185 return (B_FALSE); 186 187 /* 188 * Validate the MAC port link speed and update the group 189 * link speed if needed. 190 */ 191 if (port->lp_ifspeed == 0 || 192 port->lp_link_state != LINK_STATE_UP || 193 port->lp_link_duplex != LINK_DUPLEX_FULL) { 194 /* 195 * Can't attach a MAC port with unknown link speed, 196 * down link, or not in full duplex mode. 197 */ 198 return (B_FALSE); 199 } 200 201 if (grp->lg_ifspeed == 0) { 202 /* 203 * The group inherits the speed of the first link being 204 * attached. 205 */ 206 grp->lg_ifspeed = port->lp_ifspeed; 207 link_changed = B_TRUE; 208 } else if (grp->lg_ifspeed != port->lp_ifspeed) { 209 /* 210 * The link speed of the MAC port must be the same as 211 * the group link speed, as per 802.3ad. Since it is 212 * not, the attach is cancelled. 213 */ 214 return (B_FALSE); 215 } 216 217 grp->lg_nattached_ports++; 218 219 /* 220 * Update the group link state. 221 */ 222 if (grp->lg_link_state != LINK_STATE_UP) { 223 grp->lg_link_state = LINK_STATE_UP; 224 grp->lg_link_duplex = LINK_DUPLEX_FULL; 225 link_changed = B_TRUE; 226 } 227 228 aggr_grp_multicst_port(port, B_TRUE); 229 230 /* 231 * Update port's state. 232 */ 233 port->lp_state = AGGR_PORT_STATE_ATTACHED; 234 235 /* 236 * If LACP is OFF, the port can be used to send data as soon 237 * as its link is up and verified to be compatible with the 238 * aggregation. 239 * 240 * If LACP is active or passive, notify the LACP subsystem, which 241 * will enable sending on the port following the LACP protocol. 242 */ 243 if (grp->lg_lacp_mode == AGGR_LACP_OFF) 244 aggr_send_port_enable(port); 245 else 246 aggr_lacp_port_attached(port); 247 248 return (link_changed); 249 } 250 251 boolean_t 252 aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port) 253 { 254 boolean_t link_changed = B_FALSE; 255 256 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 257 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 258 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 259 260 /* update state */ 261 if (port->lp_state != AGGR_PORT_STATE_ATTACHED) 262 return (B_FALSE); 263 port->lp_state = AGGR_PORT_STATE_STANDBY; 264 265 aggr_grp_multicst_port(port, B_FALSE); 266 267 if (grp->lg_lacp_mode == AGGR_LACP_OFF) 268 aggr_send_port_disable(port); 269 else 270 aggr_lacp_port_detached(port); 271 272 grp->lg_nattached_ports--; 273 if (grp->lg_nattached_ports == 0) { 274 /* the last attached MAC port of the group is being detached */ 275 grp->lg_ifspeed = 0; 276 grp->lg_link_state = LINK_STATE_DOWN; 277 grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN; 278 link_changed = B_TRUE; 279 } 280 281 return (link_changed); 282 } 283 284 /* 285 * Update the MAC addresses of the constituent ports of the specified 286 * group. This function is invoked: 287 * - after creating a new aggregation group. 288 * - after adding new ports to an aggregation group. 289 * - after removing a port from a group when the MAC address of 290 * that port was used for the MAC address of the group. 291 * - after the MAC address of a port changed when the MAC address 292 * of that port was used for the MAC address of the group. 293 */ 294 void 295 aggr_grp_update_ports_mac(aggr_grp_t *grp) 296 { 297 aggr_port_t *cport; 298 299 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 300 301 for (cport = grp->lg_ports; cport != NULL; 302 cport = cport->lp_next) { 303 rw_enter(&cport->lp_lock, RW_WRITER); 304 if (aggr_port_unicst(cport, grp->lg_addr) != 0) 305 (void) aggr_grp_detach_port(grp, cport); 306 rw_exit(&cport->lp_lock); 307 if (grp->lg_closing) 308 break; 309 } 310 } 311 312 /* 313 * Invoked when the MAC address of a port has changed. If the port's 314 * MAC address was used for the group MAC address, returns B_TRUE. 315 * In that case, it is the responsibility of the caller to 316 * invoke aggr_grp_update_ports_mac() after releasing the 317 * the port lock, and aggr_grp_notify() after releasing the 318 * group lock. 319 */ 320 boolean_t 321 aggr_grp_port_mac_changed(aggr_grp_t *grp, aggr_port_t *port) 322 { 323 boolean_t grp_addr_changed = B_FALSE; 324 325 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 326 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 327 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 328 329 if (grp->lg_addr_fixed) { 330 /* 331 * The group is using a fixed MAC address or an automatic 332 * MAC address has not been set. 333 */ 334 return (B_FALSE); 335 } 336 337 if (grp->lg_mac_addr_port == port) { 338 /* 339 * The MAC address of the port was assigned to the group 340 * MAC address. Update the group MAC address. 341 */ 342 bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL); 343 grp_addr_changed = B_TRUE; 344 } else { 345 /* 346 * Update the actual port MAC address to the MAC address 347 * of the group. 348 */ 349 if (aggr_port_unicst(port, grp->lg_addr) != 0) 350 (void) aggr_grp_detach_port(grp, port); 351 } 352 353 return (grp_addr_changed); 354 } 355 356 /* 357 * Add a port to a link aggregation group. 358 */ 359 static int 360 aggr_grp_add_port(aggr_grp_t *grp, const char *name, uint_t portnum, 361 aggr_port_t **pp) 362 { 363 aggr_port_t *port, **cport; 364 int err; 365 366 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 367 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 368 369 /* create new port */ 370 err = aggr_port_create(name, portnum, &port); 371 if (err != 0) 372 return (err); 373 374 rw_enter(&port->lp_lock, RW_WRITER); 375 376 /* add port to list of group constituent ports */ 377 cport = &grp->lg_ports; 378 while (*cport != NULL) 379 cport = &((*cport)->lp_next); 380 *cport = port; 381 382 /* 383 * Back reference to the group it is member of. A port always 384 * holds a reference to its group to ensure that the back 385 * reference is always valid. 386 */ 387 port->lp_grp = grp; 388 AGGR_GRP_REFHOLD(grp); 389 grp->lg_nports++; 390 391 aggr_lacp_init_port(port); 392 393 rw_exit(&port->lp_lock); 394 395 if (pp != NULL) 396 *pp = port; 397 398 return (0); 399 } 400 401 /* 402 * Add one or more ports to an existing link aggregation group. 403 */ 404 int 405 aggr_grp_add_ports(uint32_t key, uint_t nports, laioc_port_t *ports) 406 { 407 int rc, i, nadded = 0; 408 aggr_grp_t *grp = NULL; 409 aggr_port_t *port; 410 411 /* get group corresponding to key */ 412 rw_enter(&aggr_grp_lock, RW_READER); 413 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key), 414 (mod_hash_val_t *)&grp) != 0) { 415 rw_exit(&aggr_grp_lock); 416 return (ENOENT); 417 } 418 AGGR_GRP_REFHOLD(grp); 419 rw_exit(&aggr_grp_lock); 420 421 AGGR_LACP_LOCK(grp); 422 rw_enter(&grp->lg_lock, RW_WRITER); 423 424 /* add the specified ports to group */ 425 for (i = 0; i < nports; i++) { 426 /* add port to group */ 427 if ((rc = aggr_grp_add_port(grp, ports[i].lp_devname, 428 ports[i].lp_port, &port)) != 0) 429 goto bail; 430 ASSERT(port != NULL); 431 nadded++; 432 433 /* check capabilities */ 434 if (!aggr_grp_capab_check(grp, port)) { 435 rc = ENOTSUP; 436 goto bail; 437 } 438 439 /* start port if group has already been started */ 440 if (grp->lg_started) { 441 rw_enter(&port->lp_lock, RW_WRITER); 442 rc = aggr_port_start(port); 443 if (rc != 0) { 444 rw_exit(&port->lp_lock); 445 goto bail; 446 } 447 448 /* set port promiscuous mode */ 449 rc = aggr_port_promisc(port, grp->lg_promisc); 450 if (rc != 0) { 451 rw_exit(&port->lp_lock); 452 goto bail; 453 } 454 rw_exit(&port->lp_lock); 455 } 456 } 457 458 /* update the MAC address of the constituent ports */ 459 aggr_grp_update_ports_mac(grp); 460 461 bail: 462 if (rc != 0) { 463 /* stop and remove ports that have been added */ 464 for (i = 0; i < nadded && !grp->lg_closing; i++) { 465 port = aggr_grp_port_lookup(grp, ports[i].lp_devname, 466 ports[i].lp_port); 467 ASSERT(port != NULL); 468 if (grp->lg_started) { 469 rw_enter(&port->lp_lock, RW_WRITER); 470 aggr_port_stop(port); 471 rw_exit(&port->lp_lock); 472 } 473 (void) aggr_grp_rem_port(grp, port, NULL); 474 } 475 } 476 477 rw_exit(&grp->lg_lock); 478 AGGR_LACP_UNLOCK(grp); 479 if (rc == 0 && !grp->lg_closing) 480 mac_resource_update(&grp->lg_mac); 481 AGGR_GRP_REFRELE(grp); 482 return (rc); 483 } 484 485 /* 486 * Update properties of an existing link aggregation group. 487 */ 488 int 489 aggr_grp_modify(uint32_t key, aggr_grp_t *grp_arg, uint8_t update_mask, 490 uint32_t policy, boolean_t mac_fixed, const uchar_t *mac_addr, 491 aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer) 492 { 493 int rc = 0; 494 aggr_grp_t *grp = NULL; 495 boolean_t mac_addr_changed = B_FALSE; 496 497 if (grp_arg == NULL) { 498 /* get group corresponding to key */ 499 rw_enter(&aggr_grp_lock, RW_READER); 500 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key), 501 (mod_hash_val_t *)&grp) != 0) { 502 rc = ENOENT; 503 goto bail; 504 } 505 AGGR_LACP_LOCK(grp); 506 rw_enter(&grp->lg_lock, RW_WRITER); 507 } else { 508 grp = grp_arg; 509 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 510 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 511 } 512 513 ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock)); 514 AGGR_GRP_REFHOLD(grp); 515 516 /* validate fixed address if specified */ 517 if ((update_mask & AGGR_MODIFY_MAC) && mac_fixed && 518 ((bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) || 519 (mac_addr[0] & 0x01))) { 520 rc = EINVAL; 521 goto bail; 522 } 523 524 /* update policy if requested */ 525 if (update_mask & AGGR_MODIFY_POLICY) 526 aggr_send_update_policy(grp, policy); 527 528 /* update unicast MAC address if requested */ 529 if (update_mask & AGGR_MODIFY_MAC) { 530 if (mac_fixed) { 531 /* user-supplied MAC address */ 532 grp->lg_mac_addr_port = NULL; 533 if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) != 0) { 534 bcopy(mac_addr, grp->lg_addr, ETHERADDRL); 535 mac_addr_changed = B_TRUE; 536 } 537 } else if (grp->lg_addr_fixed) { 538 /* switch from user-supplied to automatic */ 539 aggr_port_t *port = grp->lg_ports; 540 541 rw_enter(&port->lp_lock, RW_WRITER); 542 bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL); 543 grp->lg_mac_addr_port = port; 544 mac_addr_changed = B_TRUE; 545 rw_exit(&port->lp_lock); 546 } 547 grp->lg_addr_fixed = mac_fixed; 548 } 549 550 if (mac_addr_changed) 551 aggr_grp_update_ports_mac(grp); 552 553 if (update_mask & AGGR_MODIFY_LACP_MODE) 554 aggr_lacp_update_mode(grp, lacp_mode); 555 556 if ((update_mask & AGGR_MODIFY_LACP_TIMER) && !grp->lg_closing) 557 aggr_lacp_update_timer(grp, lacp_timer); 558 559 bail: 560 if (grp_arg == NULL) { 561 if (grp != NULL) { 562 rw_exit(&grp->lg_lock); 563 AGGR_LACP_UNLOCK(grp); 564 } 565 rw_exit(&aggr_grp_lock); 566 /* pass new unicast address up to MAC layer */ 567 if (grp != NULL && mac_addr_changed && !grp->lg_closing) 568 mac_unicst_update(&grp->lg_mac, grp->lg_addr); 569 } 570 571 if (grp != NULL) 572 AGGR_GRP_REFRELE(grp); 573 574 return (rc); 575 } 576 577 /* 578 * Create a new link aggregation group upon request from administrator. 579 * Returns 0 on success, an errno on failure. 580 */ 581 int 582 aggr_grp_create(uint32_t key, uint_t nports, laioc_port_t *ports, 583 uint32_t policy, boolean_t mac_fixed, uchar_t *mac_addr, 584 aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer) 585 { 586 aggr_grp_t *grp = NULL; 587 aggr_port_t *port; 588 mac_t *mac; 589 mac_info_t *mip; 590 int err; 591 int i; 592 593 /* need at least one port */ 594 if (nports == 0) 595 return (EINVAL); 596 597 rw_enter(&aggr_grp_lock, RW_WRITER); 598 599 /* does a group with the same key already exist? */ 600 err = mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key), 601 (mod_hash_val_t *)&grp); 602 if (err == 0) { 603 rw_exit(&aggr_grp_lock); 604 return (EEXIST); 605 } 606 607 grp = kmem_cache_alloc(aggr_grp_cache, KM_SLEEP); 608 609 AGGR_LACP_LOCK(grp); 610 rw_enter(&grp->lg_lock, RW_WRITER); 611 612 grp->lg_refs = 1; 613 grp->lg_closing = B_FALSE; 614 grp->lg_key = key; 615 616 grp->lg_ifspeed = 0; 617 grp->lg_link_state = LINK_STATE_UNKNOWN; 618 grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN; 619 grp->lg_started = B_FALSE; 620 grp->lg_promisc = B_FALSE; 621 aggr_lacp_init_grp(grp); 622 623 /* add MAC ports to group */ 624 grp->lg_ports = NULL; 625 grp->lg_nports = 0; 626 grp->lg_nattached_ports = 0; 627 grp->lg_ntx_ports = 0; 628 629 for (i = 0; i < nports; i++) { 630 err = aggr_grp_add_port(grp, ports[i].lp_devname, 631 ports[i].lp_port, NULL); 632 if (err != 0) 633 goto bail; 634 } 635 636 /* 637 * If no explicit MAC address was specified by the administrator, 638 * set it to the MAC address of the first port. 639 */ 640 grp->lg_addr_fixed = mac_fixed; 641 if (grp->lg_addr_fixed) { 642 /* validate specified address */ 643 if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) { 644 err = EINVAL; 645 goto bail; 646 } 647 bcopy(mac_addr, grp->lg_addr, ETHERADDRL); 648 } else { 649 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL); 650 grp->lg_mac_addr_port = grp->lg_ports; 651 } 652 653 /* update the MAC address of the constituent ports */ 654 aggr_grp_update_ports_mac(grp); 655 656 /* update outbound load balancing policy */ 657 aggr_send_update_policy(grp, policy); 658 659 /* register with the MAC module */ 660 mac = &grp->lg_mac; 661 bzero(mac, sizeof (*mac)); 662 663 mac->m_ident = MAC_IDENT; 664 665 mac->m_driver = grp; 666 mac->m_dip = aggr_dip; 667 mac->m_port = key; 668 669 mip = &(mac->m_info); 670 mip->mi_media = DL_ETHER; 671 mip->mi_sdu_min = 0; 672 mip->mi_sdu_max = ETHERMTU; 673 674 MAC_STAT_MIB(mip->mi_stat); 675 MAC_STAT_ETHER(mip->mi_stat); 676 mip->mi_stat[MAC_STAT_LINK_DUPLEX] = B_TRUE; 677 678 mip->mi_addr_length = ETHERADDRL; 679 bcopy(aggr_brdcst_mac, mip->mi_brdcst_addr, ETHERADDRL); 680 bcopy(grp->lg_addr, mip->mi_unicst_addr, ETHERADDRL); 681 682 mac->m_stat = aggr_m_stat; 683 mac->m_start = aggr_m_start; 684 mac->m_stop = aggr_m_stop; 685 mac->m_promisc = aggr_m_promisc; 686 mac->m_multicst = aggr_m_multicst; 687 mac->m_unicst = aggr_m_unicst; 688 mac->m_tx = aggr_m_tx; 689 mac->m_resources = aggr_m_resources; 690 mac->m_ioctl = aggr_m_ioctl; 691 692 /* set the initial group capabilities */ 693 aggr_grp_capab_set(grp); 694 695 if ((err = mac_register(mac)) != 0) 696 goto bail; 697 698 /* set LACP mode */ 699 aggr_lacp_set_mode(grp, lacp_mode, lacp_timer); 700 701 /* add new group to hash table */ 702 err = mod_hash_insert(aggr_grp_hash, GRP_HASH_KEY(key), 703 (mod_hash_val_t)grp); 704 ASSERT(err == 0); 705 aggr_grp_cnt++; 706 707 rw_exit(&grp->lg_lock); 708 AGGR_LACP_UNLOCK(grp); 709 rw_exit(&aggr_grp_lock); 710 return (0); 711 712 bail: 713 if (grp != NULL) { 714 aggr_port_t *cport; 715 716 port = grp->lg_ports; 717 while (port != NULL) { 718 cport = port->lp_next; 719 aggr_port_delete(port); 720 port = cport; 721 } 722 723 rw_exit(&grp->lg_lock); 724 AGGR_LACP_UNLOCK(grp); 725 726 kmem_cache_free(aggr_grp_cache, grp); 727 } 728 729 rw_exit(&aggr_grp_lock); 730 return (err); 731 } 732 733 /* 734 * Return a pointer to the member of a group with specified device name 735 * and port number. 736 */ 737 static aggr_port_t * 738 aggr_grp_port_lookup(aggr_grp_t *grp, const char *devname, uint32_t portnum) 739 { 740 aggr_port_t *port; 741 742 ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock)); 743 744 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 745 if ((strcmp(port->lp_devname, devname) == 0) && 746 (port->lp_port == portnum)) 747 break; 748 } 749 750 return (port); 751 } 752 753 /* 754 * Stop, detach and remove a port from a link aggregation group. 755 */ 756 static int 757 aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port, boolean_t *do_notify) 758 { 759 aggr_port_t **pport; 760 boolean_t grp_mac_addr_changed = B_FALSE; 761 uint64_t val; 762 uint_t i; 763 764 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 765 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 766 ASSERT(grp->lg_nports > 1); 767 768 if (do_notify != NULL) 769 *do_notify = B_FALSE; 770 771 /* unlink port */ 772 for (pport = &grp->lg_ports; *pport != port; 773 pport = &(*pport)->lp_next) { 774 if (*pport == NULL) 775 return (ENOENT); 776 } 777 *pport = port->lp_next; 778 779 rw_enter(&port->lp_lock, RW_WRITER); 780 port->lp_closing = B_TRUE; 781 782 /* 783 * If the MAC address of the port being removed was assigned 784 * to the group, update the group MAC address 785 * using the MAC address of a different port. 786 */ 787 if (!grp->lg_addr_fixed && grp->lg_mac_addr_port == port) { 788 /* 789 * Set the MAC address of the group to the 790 * MAC address of its first port. 791 */ 792 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL); 793 grp->lg_mac_addr_port = grp->lg_ports; 794 grp_mac_addr_changed = B_TRUE; 795 } 796 797 (void) aggr_grp_detach_port(grp, port); 798 799 /* 800 * Add the statistics of the ports while it was aggregated 801 * to the group's residual statistics. 802 */ 803 for (i = 0; i < MAC_NSTAT && !grp->lg_closing; i++) { 804 /* avoid stats that are not counters */ 805 if (i == MAC_STAT_IFSPEED || i == MAC_STAT_LINK_DUPLEX) 806 continue; 807 808 /* get current value */ 809 val = aggr_port_stat(port, i); 810 /* subtract value at the point of aggregation */ 811 val -= port->lp_stat[i]; 812 /* add to the residual stat */ 813 grp->lg_stat[i] += val; 814 } 815 816 grp->lg_nports--; 817 818 rw_exit(&port->lp_lock); 819 820 aggr_port_delete(port); 821 822 /* 823 * If the group MAC address has changed, update the MAC address of 824 * the remaining consistuent ports according to the new MAC 825 * address of the group. 826 */ 827 if (grp->lg_closing) { 828 *do_notify = B_FALSE; 829 } else { 830 if (grp_mac_addr_changed) 831 aggr_grp_update_ports_mac(grp); 832 833 if (do_notify != NULL) 834 *do_notify = grp_mac_addr_changed; 835 } 836 837 return (0); 838 } 839 840 /* 841 * Remove one or more ports from an existing link aggregation group. 842 */ 843 int 844 aggr_grp_rem_ports(uint32_t key, uint_t nports, laioc_port_t *ports) 845 { 846 int rc = 0, i; 847 aggr_grp_t *grp = NULL; 848 aggr_port_t *port; 849 boolean_t notify = B_FALSE, grp_mac_addr_changed; 850 851 /* get group corresponding to key */ 852 rw_enter(&aggr_grp_lock, RW_READER); 853 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key), 854 (mod_hash_val_t *)&grp) != 0) { 855 rw_exit(&aggr_grp_lock); 856 return (ENOENT); 857 } 858 AGGR_GRP_REFHOLD(grp); 859 rw_exit(&aggr_grp_lock); 860 861 AGGR_LACP_LOCK(grp); 862 rw_enter(&grp->lg_lock, RW_WRITER); 863 864 /* we need to keep at least one port per group */ 865 if (nports >= grp->lg_nports) { 866 rc = EINVAL; 867 goto bail; 868 } 869 870 /* first verify that all the groups are valid */ 871 for (i = 0; i < nports; i++) { 872 if (aggr_grp_port_lookup(grp, ports[i].lp_devname, 873 ports[i].lp_port) == NULL) { 874 /* port not found */ 875 rc = ENOENT; 876 goto bail; 877 } 878 } 879 880 /* remove the specified ports from group */ 881 for (i = 0; i < nports && !grp->lg_closing; i++) { 882 /* lookup port */ 883 port = aggr_grp_port_lookup(grp, ports[i].lp_devname, 884 ports[i].lp_port); 885 ASSERT(port != NULL); 886 887 /* stop port if group has already been started */ 888 if (grp->lg_started) { 889 rw_enter(&port->lp_lock, RW_WRITER); 890 aggr_port_stop(port); 891 rw_exit(&port->lp_lock); 892 } 893 894 /* remove port from group */ 895 rc = aggr_grp_rem_port(grp, port, &grp_mac_addr_changed); 896 ASSERT(rc == 0); 897 notify = notify || grp_mac_addr_changed; 898 } 899 900 bail: 901 rw_exit(&grp->lg_lock); 902 AGGR_LACP_UNLOCK(grp); 903 if (notify && !grp->lg_closing) 904 mac_unicst_update(&grp->lg_mac, grp->lg_addr); 905 if (rc == 0 && !grp->lg_closing) 906 mac_resource_update(&grp->lg_mac); 907 AGGR_GRP_REFRELE(grp); 908 909 return (rc); 910 } 911 912 int 913 aggr_grp_delete(uint32_t key) 914 { 915 aggr_grp_t *grp = NULL; 916 aggr_port_t *port, *cport; 917 mod_hash_val_t val; 918 919 rw_enter(&aggr_grp_lock, RW_WRITER); 920 921 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key), 922 (mod_hash_val_t *)&grp) != 0) { 923 rw_exit(&aggr_grp_lock); 924 return (ENOENT); 925 } 926 AGGR_LACP_LOCK(grp); 927 rw_enter(&grp->lg_lock, RW_WRITER); 928 grp->lg_closing = B_TRUE; 929 930 /* 931 * Unregister from the MAC service module. Since this can 932 * fail if a client hasn't closed the MAC port, we gracefully 933 * fail the operation. 934 */ 935 if (mac_unregister(&grp->lg_mac)) { 936 rw_exit(&grp->lg_lock); 937 AGGR_LACP_UNLOCK(grp); 938 rw_exit(&aggr_grp_lock); 939 return (EBUSY); 940 } 941 942 /* detach and free MAC ports associated with group */ 943 port = grp->lg_ports; 944 while (port != NULL) { 945 cport = port->lp_next; 946 rw_enter(&port->lp_lock, RW_WRITER); 947 if (grp->lg_started) 948 aggr_port_stop(port); 949 (void) aggr_grp_detach_port(grp, port); 950 rw_exit(&port->lp_lock); 951 aggr_port_delete(port); 952 port = cport; 953 } 954 955 rw_exit(&grp->lg_lock); 956 AGGR_LACP_UNLOCK(grp); 957 958 (void) mod_hash_remove(aggr_grp_hash, GRP_HASH_KEY(key), &val); 959 ASSERT(grp == (aggr_grp_t *)val); 960 961 ASSERT(aggr_grp_cnt > 0); 962 aggr_grp_cnt--; 963 964 rw_exit(&aggr_grp_lock); 965 AGGR_GRP_REFRELE(grp); 966 967 return (0); 968 } 969 970 void 971 aggr_grp_free(aggr_grp_t *grp) 972 { 973 ASSERT(grp->lg_refs == 0); 974 kmem_cache_free(aggr_grp_cache, grp); 975 } 976 977 /* 978 * Walker invoked when building the list of configured groups and 979 * their ports that must be passed up to user-space. 980 */ 981 982 /*ARGSUSED*/ 983 static uint_t 984 aggr_grp_info_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 985 { 986 aggr_grp_t *grp; 987 aggr_port_t *port; 988 aggr_grp_info_state_t *state = arg; 989 990 if (state->ls_rc != 0) 991 return (MH_WALK_TERMINATE); /* terminate walk */ 992 993 grp = (aggr_grp_t *)val; 994 995 rw_enter(&grp->lg_lock, RW_READER); 996 997 if (state->ls_group_key != 0 && grp->lg_key != state->ls_group_key) 998 goto bail; 999 1000 state->ls_group_found = B_TRUE; 1001 1002 state->ls_rc = state->ls_new_grp_fn(state->ls_fn_arg, grp->lg_key, 1003 grp->lg_addr, grp->lg_addr_fixed, grp->lg_tx_policy, 1004 grp->lg_nports, grp->lg_lacp_mode, grp->aggr.PeriodicTimer); 1005 1006 if (state->ls_rc != 0) 1007 goto bail; 1008 1009 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1010 1011 rw_enter(&port->lp_lock, RW_READER); 1012 1013 state->ls_rc = state->ls_new_port_fn(state->ls_fn_arg, 1014 port->lp_devname, port->lp_port, port->lp_addr, 1015 port->lp_state, &port->lp_lacp.ActorOperPortState); 1016 1017 rw_exit(&port->lp_lock); 1018 1019 if (state->ls_rc != 0) 1020 goto bail; 1021 } 1022 1023 bail: 1024 rw_exit(&grp->lg_lock); 1025 return ((state->ls_rc == 0) ? MH_WALK_CONTINUE : MH_WALK_TERMINATE); 1026 } 1027 1028 int 1029 aggr_grp_info(uint_t *ngroups, uint32_t group_key, void *fn_arg, 1030 aggr_grp_info_new_grp_fn_t new_grp_fn, 1031 aggr_grp_info_new_port_fn_t new_port_fn) 1032 { 1033 aggr_grp_info_state_t state; 1034 int rc = 0; 1035 1036 rw_enter(&aggr_grp_lock, RW_READER); 1037 1038 *ngroups = aggr_grp_cnt; 1039 1040 bzero(&state, sizeof (state)); 1041 state.ls_group_key = group_key; 1042 state.ls_new_grp_fn = new_grp_fn; 1043 state.ls_new_port_fn = new_port_fn; 1044 state.ls_fn_arg = fn_arg; 1045 1046 mod_hash_walk(aggr_grp_hash, aggr_grp_info_walker, &state); 1047 1048 if ((rc = state.ls_rc) == 0 && group_key != 0 && 1049 !state.ls_group_found) 1050 rc = ENOENT; 1051 1052 rw_exit(&aggr_grp_lock); 1053 return (rc); 1054 } 1055 1056 /* 1057 * Aggregation group walker. 1058 */ 1059 1060 typedef struct aggr_grp_walker_state_s { 1061 aggr_grp_walker_fn_t ws_walker_fn; 1062 void *ws_arg; 1063 } aggr_grp_walker_state_t; 1064 1065 void 1066 aggr_grp_walk(aggr_grp_walker_fn_t walker, void *arg) 1067 { 1068 aggr_grp_walker_state_t state; 1069 1070 state.ws_walker_fn = walker; 1071 state.ws_arg = arg; 1072 1073 rw_enter(&aggr_grp_lock, RW_READER); 1074 mod_hash_walk(aggr_grp_hash, aggr_grp_info_walker, &state); 1075 rw_exit(&aggr_grp_lock); 1076 } 1077 1078 static void 1079 aggr_m_resources(void *arg) 1080 { 1081 aggr_grp_t *grp = arg; 1082 aggr_port_t *port; 1083 1084 /* Call each port's m_resources function */ 1085 for (port = grp->lg_ports; port != NULL; port = port->lp_next) 1086 mac_resources(port->lp_mh); 1087 } 1088 1089 /*ARGSUSED*/ 1090 static void 1091 aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp) 1092 { 1093 miocnak(q, mp, 0, ENOTSUP); 1094 } 1095 1096 static uint64_t 1097 aggr_m_stat(void *arg, enum mac_stat stat) 1098 { 1099 aggr_grp_t *grp = arg; 1100 aggr_port_t *port; 1101 uint64_t val; 1102 1103 rw_enter(&grp->lg_lock, RW_READER); 1104 1105 switch (stat) { 1106 case MAC_STAT_IFSPEED: 1107 val = grp->lg_ifspeed; 1108 break; 1109 case MAC_STAT_LINK_DUPLEX: 1110 val = grp->lg_link_duplex; 1111 break; 1112 default: 1113 /* 1114 * The remaining statistics are counters. They are computed 1115 * by aggregating the counters of the members MACs while they 1116 * were aggregated, plus the residual counter of the group 1117 * itself, which is updated each time a MAC is removed from 1118 * the group. 1119 */ 1120 val = 0; 1121 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1122 /* actual port statistic */ 1123 val += aggr_port_stat(port, stat); 1124 /* minus the port stat when it was added */ 1125 val -= port->lp_stat[stat]; 1126 /* plus any residual amount for the group */ 1127 val += grp->lg_stat[stat]; 1128 } 1129 } 1130 1131 rw_exit(&grp->lg_lock); 1132 return (val); 1133 } 1134 1135 static int 1136 aggr_m_start(void *arg) 1137 { 1138 aggr_grp_t *grp = arg; 1139 aggr_port_t *port; 1140 1141 AGGR_LACP_LOCK(grp); 1142 rw_enter(&grp->lg_lock, RW_WRITER); 1143 1144 /* 1145 * Attempts to start all configured members of the group. 1146 * Group members will be attached when their link-up notification 1147 * is received. 1148 */ 1149 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1150 rw_enter(&port->lp_lock, RW_WRITER); 1151 if (aggr_port_start(port) != 0) { 1152 rw_exit(&port->lp_lock); 1153 continue; 1154 } 1155 1156 /* set port promiscuous mode */ 1157 if (aggr_port_promisc(port, grp->lg_promisc) != 0) 1158 aggr_port_stop(port); 1159 rw_exit(&port->lp_lock); 1160 } 1161 1162 grp->lg_started = B_TRUE; 1163 1164 rw_exit(&grp->lg_lock); 1165 AGGR_LACP_UNLOCK(grp); 1166 1167 return (0); 1168 } 1169 1170 static void 1171 aggr_m_stop(void *arg) 1172 { 1173 aggr_grp_t *grp = arg; 1174 aggr_port_t *port; 1175 1176 rw_enter(&grp->lg_lock, RW_WRITER); 1177 1178 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1179 rw_enter(&port->lp_lock, RW_WRITER); 1180 aggr_port_stop(port); 1181 rw_exit(&port->lp_lock); 1182 } 1183 1184 grp->lg_started = B_FALSE; 1185 1186 rw_exit(&grp->lg_lock); 1187 } 1188 1189 static int 1190 aggr_m_promisc(void *arg, boolean_t on) 1191 { 1192 aggr_grp_t *grp = arg; 1193 aggr_port_t *port; 1194 1195 AGGR_LACP_LOCK(grp); 1196 rw_enter(&grp->lg_lock, RW_WRITER); 1197 AGGR_GRP_REFHOLD(grp); 1198 1199 if (on == grp->lg_promisc) 1200 goto bail; 1201 1202 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1203 rw_enter(&port->lp_lock, RW_WRITER); 1204 AGGR_PORT_REFHOLD(port); 1205 if (port->lp_started) { 1206 if (aggr_port_promisc(port, on) != 0) 1207 (void) aggr_grp_detach_port(grp, port); 1208 } 1209 rw_exit(&port->lp_lock); 1210 AGGR_PORT_REFRELE(port); 1211 if (grp->lg_closing) 1212 break; 1213 } 1214 1215 grp->lg_promisc = on; 1216 1217 bail: 1218 rw_exit(&grp->lg_lock); 1219 AGGR_LACP_UNLOCK(grp); 1220 AGGR_GRP_REFRELE(grp); 1221 1222 return (0); 1223 } 1224 1225 /* 1226 * Add or remove the multicast addresses that are defined for the group 1227 * to or from the specified port. 1228 * This function is called before stopping a port, before a port 1229 * is detached from a group, and when attaching a port to a group. 1230 */ 1231 void 1232 aggr_grp_multicst_port(aggr_port_t *port, boolean_t add) 1233 { 1234 aggr_grp_t *grp = port->lp_grp; 1235 1236 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 1237 ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock)); 1238 1239 if (!port->lp_started) 1240 return; 1241 1242 mac_multicst_refresh(&grp->lg_mac, aggr_port_multicst, port, 1243 add); 1244 } 1245 1246 static int 1247 aggr_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) 1248 { 1249 aggr_grp_t *grp = arg; 1250 aggr_port_t *port = NULL; 1251 int err = 0, cerr; 1252 1253 rw_enter(&grp->lg_lock, RW_WRITER); 1254 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1255 if (port->lp_state != AGGR_PORT_STATE_ATTACHED) 1256 continue; 1257 cerr = aggr_port_multicst(port, add, addrp); 1258 if (cerr != 0 && err == 0) 1259 err = cerr; 1260 } 1261 rw_exit(&grp->lg_lock); 1262 return (err); 1263 } 1264 1265 static int 1266 aggr_m_unicst(void *arg, const uint8_t *macaddr) 1267 { 1268 aggr_grp_t *grp = arg; 1269 int rc; 1270 1271 AGGR_LACP_LOCK(grp); 1272 rw_enter(&grp->lg_lock, RW_WRITER); 1273 rc = aggr_grp_modify(0, grp, AGGR_MODIFY_MAC, 0, B_TRUE, macaddr, 1274 0, 0); 1275 rw_exit(&grp->lg_lock); 1276 AGGR_LACP_UNLOCK(grp); 1277 1278 return (rc); 1279 } 1280 1281 /* 1282 * Initialize the capabilities that are advertised for the group 1283 * according to the capabilities of the constituent ports. 1284 */ 1285 static void 1286 aggr_grp_capab_set(aggr_grp_t *grp) 1287 { 1288 uint32_t cksum = (uint32_t)-1; 1289 uint32_t poll = DL_CAPAB_POLL; 1290 aggr_port_t *port; 1291 const mac_info_t *port_mi; 1292 1293 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 1294 1295 ASSERT(grp->lg_ports != NULL); 1296 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1297 port_mi = mac_info(port->lp_mh); 1298 cksum &= port_mi->mi_cksum; 1299 poll &= port_mi->mi_poll; 1300 } 1301 1302 grp->lg_mac.m_info.mi_cksum = cksum; 1303 grp->lg_mac.m_info.mi_poll = poll; 1304 } 1305 1306 /* 1307 * Checks whether the capabilities of the ports being added are compatible 1308 * with the current capabilities of the aggregation. 1309 */ 1310 static boolean_t 1311 aggr_grp_capab_check(aggr_grp_t *grp, aggr_port_t *port) 1312 { 1313 const mac_info_t *port_mi = mac_info(port->lp_mh); 1314 uint32_t grp_cksum = grp->lg_mac.m_info.mi_cksum; 1315 1316 ASSERT(grp->lg_ports != NULL); 1317 1318 return (((grp_cksum & port_mi->mi_cksum) == grp_cksum) && 1319 (grp->lg_mac.m_info.mi_poll == port_mi->mi_poll)); 1320 } 1321