1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups. 30 * 31 * An instance of the structure aggr_grp_t is allocated for each 32 * link aggregation group. When created, aggr_grp_t objects are 33 * entered into the aggr_grp_hash hash table maintained by the modhash 34 * module. The hash key is the port number associated with the link 35 * aggregation group. The port number associated with a group corresponds 36 * the key associated with the group. 37 * 38 * A set of MAC ports are associated with each association group. 39 */ 40 41 #include <sys/types.h> 42 #include <sys/sysmacros.h> 43 #include <sys/conf.h> 44 #include <sys/cmn_err.h> 45 #include <sys/list.h> 46 #include <sys/ksynch.h> 47 #include <sys/kmem.h> 48 #include <sys/stream.h> 49 #include <sys/modctl.h> 50 #include <sys/ddi.h> 51 #include <sys/sunddi.h> 52 #include <sys/atomic.h> 53 #include <sys/stat.h> 54 #include <sys/modhash.h> 55 #include <sys/strsun.h> 56 #include <sys/dlpi.h> 57 58 #include <sys/aggr.h> 59 #include <sys/aggr_impl.h> 60 61 static void aggr_m_info(void *, mac_info_t *); 62 static int aggr_m_start(void *); 63 static void aggr_m_stop(void *); 64 static int aggr_m_promisc(void *, boolean_t); 65 static int aggr_m_multicst(void *, boolean_t, const uint8_t *); 66 static int aggr_m_unicst(void *, const uint8_t *); 67 static uint64_t aggr_m_stat(void *, enum mac_stat); 68 static void aggr_m_resources(void *); 69 static void aggr_m_ioctl(void *, queue_t *, mblk_t *); 70 71 static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, const char *, uint32_t); 72 static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *); 73 static void aggr_stats_op(enum mac_stat, uint64_t *, uint64_t *, boolean_t); 74 static void aggr_grp_capab_set(aggr_grp_t *); 75 static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *); 76 77 static kmem_cache_t *aggr_grp_cache; 78 static mod_hash_t *aggr_grp_hash; 79 static krwlock_t aggr_grp_lock; 80 static uint_t aggr_grp_cnt; 81 82 #define GRP_HASHSZ 64 83 #define GRP_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)key) 84 85 static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0}; 86 static uchar_t aggr_brdcst_mac[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 87 88 /* used by grp_info_walker */ 89 typedef struct aggr_grp_info_state { 90 uint32_t ls_group_key; 91 boolean_t ls_group_found; 92 aggr_grp_info_new_grp_fn_t ls_new_grp_fn; 93 aggr_grp_info_new_port_fn_t ls_new_port_fn; 94 void *ls_fn_arg; 95 int ls_rc; 96 } aggr_grp_info_state_t; 97 98 /*ARGSUSED*/ 99 static int 100 aggr_grp_constructor(void *buf, void *arg, int kmflag) 101 { 102 aggr_grp_t *grp = buf; 103 104 bzero(grp, sizeof (*grp)); 105 rw_init(&grp->lg_lock, NULL, RW_DRIVER, NULL); 106 mutex_init(&grp->aggr.gl_lock, NULL, MUTEX_DEFAULT, NULL); 107 108 grp->lg_link_state = LINK_STATE_UNKNOWN; 109 110 return (0); 111 } 112 113 /*ARGSUSED*/ 114 static void 115 aggr_grp_destructor(void *buf, void *arg) 116 { 117 aggr_grp_t *grp = buf; 118 119 if (grp->lg_tx_ports != NULL) { 120 kmem_free(grp->lg_tx_ports, 121 grp->lg_tx_ports_size * sizeof (aggr_port_t *)); 122 } 123 124 mutex_destroy(&grp->aggr.gl_lock); 125 rw_destroy(&grp->lg_lock); 126 } 127 128 void 129 aggr_grp_init(void) 130 { 131 aggr_grp_cache = kmem_cache_create("aggr_grp_cache", 132 sizeof (aggr_grp_t), 0, aggr_grp_constructor, 133 aggr_grp_destructor, NULL, NULL, NULL, 0); 134 135 aggr_grp_hash = mod_hash_create_idhash("aggr_grp_hash", 136 GRP_HASHSZ, mod_hash_null_valdtor); 137 rw_init(&aggr_grp_lock, NULL, RW_DEFAULT, NULL); 138 aggr_grp_cnt = 0; 139 } 140 141 int 142 aggr_grp_fini(void) 143 { 144 if (aggr_grp_cnt > 0) 145 return (EBUSY); 146 147 rw_destroy(&aggr_grp_lock); 148 mod_hash_destroy_idhash(aggr_grp_hash); 149 kmem_cache_destroy(aggr_grp_cache); 150 return (0); 151 } 152 153 uint_t 154 aggr_grp_count(void) 155 { 156 uint_t count; 157 158 rw_enter(&aggr_grp_lock, RW_READER); 159 count = aggr_grp_cnt; 160 rw_exit(&aggr_grp_lock); 161 return (count); 162 } 163 164 /* 165 * Attach a port to a link aggregation group. 166 * 167 * A port is attached to a link aggregation group once its speed 168 * and link state have been verified. 169 * 170 * Returns B_TRUE if the group link state or speed has changed. If 171 * it's the case, the caller must notify the MAC layer via a call 172 * to mac_link(). 173 */ 174 boolean_t 175 aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port) 176 { 177 boolean_t link_changed = B_FALSE; 178 179 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 180 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 181 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 182 183 if (port->lp_state == AGGR_PORT_STATE_ATTACHED) 184 return (B_FALSE); 185 186 /* 187 * Validate the MAC port link speed and update the group 188 * link speed if needed. 189 */ 190 if (port->lp_ifspeed == 0 || 191 port->lp_link_state != LINK_STATE_UP || 192 port->lp_link_duplex != LINK_DUPLEX_FULL) { 193 /* 194 * Can't attach a MAC port with unknown link speed, 195 * down link, or not in full duplex mode. 196 */ 197 return (B_FALSE); 198 } 199 200 if (grp->lg_ifspeed == 0) { 201 /* 202 * The group inherits the speed of the first link being 203 * attached. 204 */ 205 grp->lg_ifspeed = port->lp_ifspeed; 206 link_changed = B_TRUE; 207 } else if (grp->lg_ifspeed != port->lp_ifspeed) { 208 /* 209 * The link speed of the MAC port must be the same as 210 * the group link speed, as per 802.3ad. Since it is 211 * not, the attach is cancelled. 212 */ 213 return (B_FALSE); 214 } 215 216 grp->lg_nattached_ports++; 217 218 /* 219 * Update the group link state. 220 */ 221 if (grp->lg_link_state != LINK_STATE_UP) { 222 grp->lg_link_state = LINK_STATE_UP; 223 grp->lg_link_duplex = LINK_DUPLEX_FULL; 224 link_changed = B_TRUE; 225 } 226 227 aggr_grp_multicst_port(port, B_TRUE); 228 229 /* 230 * Update port's state. 231 */ 232 port->lp_state = AGGR_PORT_STATE_ATTACHED; 233 234 /* 235 * If LACP is OFF, the port can be used to send data as soon 236 * as its link is up and verified to be compatible with the 237 * aggregation. 238 * 239 * If LACP is active or passive, notify the LACP subsystem, which 240 * will enable sending on the port following the LACP protocol. 241 */ 242 if (grp->lg_lacp_mode == AGGR_LACP_OFF) 243 aggr_send_port_enable(port); 244 else 245 aggr_lacp_port_attached(port); 246 247 return (link_changed); 248 } 249 250 boolean_t 251 aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port) 252 { 253 boolean_t link_changed = B_FALSE; 254 255 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 256 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 257 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 258 259 /* update state */ 260 if (port->lp_state != AGGR_PORT_STATE_ATTACHED) 261 return (B_FALSE); 262 port->lp_state = AGGR_PORT_STATE_STANDBY; 263 264 aggr_grp_multicst_port(port, B_FALSE); 265 266 if (grp->lg_lacp_mode == AGGR_LACP_OFF) 267 aggr_send_port_disable(port); 268 else 269 aggr_lacp_port_detached(port); 270 271 grp->lg_nattached_ports--; 272 if (grp->lg_nattached_ports == 0) { 273 /* the last attached MAC port of the group is being detached */ 274 grp->lg_ifspeed = 0; 275 grp->lg_link_state = LINK_STATE_DOWN; 276 grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN; 277 link_changed = B_TRUE; 278 } 279 280 return (link_changed); 281 } 282 283 /* 284 * Update the MAC addresses of the constituent ports of the specified 285 * group. This function is invoked: 286 * - after creating a new aggregation group. 287 * - after adding new ports to an aggregation group. 288 * - after removing a port from a group when the MAC address of 289 * that port was used for the MAC address of the group. 290 * - after the MAC address of a port changed when the MAC address 291 * of that port was used for the MAC address of the group. 292 */ 293 void 294 aggr_grp_update_ports_mac(aggr_grp_t *grp) 295 { 296 aggr_port_t *cport; 297 298 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 299 300 for (cport = grp->lg_ports; cport != NULL; 301 cport = cport->lp_next) { 302 rw_enter(&cport->lp_lock, RW_WRITER); 303 if (aggr_port_unicst(cport, grp->lg_addr) != 0) 304 (void) aggr_grp_detach_port(grp, cport); 305 rw_exit(&cport->lp_lock); 306 if (grp->lg_closing) 307 break; 308 } 309 } 310 311 /* 312 * Invoked when the MAC address of a port has changed. If the port's 313 * MAC address was used for the group MAC address, returns B_TRUE. 314 * In that case, it is the responsibility of the caller to 315 * invoke aggr_grp_update_ports_mac() after releasing the 316 * the port lock, and aggr_grp_notify() after releasing the 317 * group lock. 318 */ 319 boolean_t 320 aggr_grp_port_mac_changed(aggr_grp_t *grp, aggr_port_t *port) 321 { 322 boolean_t grp_addr_changed = B_FALSE; 323 324 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 325 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 326 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 327 328 if (grp->lg_addr_fixed) { 329 /* 330 * The group is using a fixed MAC address or an automatic 331 * MAC address has not been set. 332 */ 333 return (B_FALSE); 334 } 335 336 if (grp->lg_mac_addr_port == port) { 337 /* 338 * The MAC address of the port was assigned to the group 339 * MAC address. Update the group MAC address. 340 */ 341 bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL); 342 grp_addr_changed = B_TRUE; 343 } else { 344 /* 345 * Update the actual port MAC address to the MAC address 346 * of the group. 347 */ 348 if (aggr_port_unicst(port, grp->lg_addr) != 0) 349 (void) aggr_grp_detach_port(grp, port); 350 } 351 352 return (grp_addr_changed); 353 } 354 355 /* 356 * Add a port to a link aggregation group. 357 */ 358 static int 359 aggr_grp_add_port(aggr_grp_t *grp, const char *name, uint_t portnum, 360 aggr_port_t **pp) 361 { 362 aggr_port_t *port, **cport; 363 int err; 364 365 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 366 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 367 368 /* create new port */ 369 err = aggr_port_create(name, portnum, &port); 370 if (err != 0) 371 return (err); 372 373 rw_enter(&port->lp_lock, RW_WRITER); 374 375 /* add port to list of group constituent ports */ 376 cport = &grp->lg_ports; 377 while (*cport != NULL) 378 cport = &((*cport)->lp_next); 379 *cport = port; 380 381 /* 382 * Back reference to the group it is member of. A port always 383 * holds a reference to its group to ensure that the back 384 * reference is always valid. 385 */ 386 port->lp_grp = grp; 387 AGGR_GRP_REFHOLD(grp); 388 grp->lg_nports++; 389 390 aggr_lacp_init_port(port); 391 392 rw_exit(&port->lp_lock); 393 394 if (pp != NULL) 395 *pp = port; 396 397 return (0); 398 } 399 400 /* 401 * Add one or more ports to an existing link aggregation group. 402 */ 403 int 404 aggr_grp_add_ports(uint32_t key, uint_t nports, laioc_port_t *ports) 405 { 406 int rc, i, nadded = 0; 407 aggr_grp_t *grp = NULL; 408 aggr_port_t *port; 409 410 /* get group corresponding to key */ 411 rw_enter(&aggr_grp_lock, RW_READER); 412 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key), 413 (mod_hash_val_t *)&grp) != 0) { 414 rw_exit(&aggr_grp_lock); 415 return (ENOENT); 416 } 417 AGGR_GRP_REFHOLD(grp); 418 rw_exit(&aggr_grp_lock); 419 420 AGGR_LACP_LOCK(grp); 421 rw_enter(&grp->lg_lock, RW_WRITER); 422 423 /* add the specified ports to group */ 424 for (i = 0; i < nports; i++) { 425 /* add port to group */ 426 if ((rc = aggr_grp_add_port(grp, ports[i].lp_devname, 427 ports[i].lp_port, &port)) != 0) 428 goto bail; 429 ASSERT(port != NULL); 430 nadded++; 431 432 /* check capabilities */ 433 if (!aggr_grp_capab_check(grp, port)) { 434 rc = ENOTSUP; 435 goto bail; 436 } 437 438 /* start port if group has already been started */ 439 if (grp->lg_started) { 440 rw_enter(&port->lp_lock, RW_WRITER); 441 rc = aggr_port_start(port); 442 if (rc != 0) { 443 rw_exit(&port->lp_lock); 444 goto bail; 445 } 446 447 /* set port promiscuous mode */ 448 rc = aggr_port_promisc(port, grp->lg_promisc); 449 if (rc != 0) { 450 rw_exit(&port->lp_lock); 451 goto bail; 452 } 453 rw_exit(&port->lp_lock); 454 } 455 } 456 457 /* update the MAC address of the constituent ports */ 458 aggr_grp_update_ports_mac(grp); 459 460 bail: 461 if (rc != 0) { 462 /* stop and remove ports that have been added */ 463 for (i = 0; i < nadded && !grp->lg_closing; i++) { 464 port = aggr_grp_port_lookup(grp, ports[i].lp_devname, 465 ports[i].lp_port); 466 ASSERT(port != NULL); 467 if (grp->lg_started) { 468 rw_enter(&port->lp_lock, RW_WRITER); 469 aggr_port_stop(port); 470 rw_exit(&port->lp_lock); 471 } 472 (void) aggr_grp_rem_port(grp, port, NULL); 473 } 474 } 475 476 rw_exit(&grp->lg_lock); 477 AGGR_LACP_UNLOCK(grp); 478 if (rc == 0 && !grp->lg_closing) 479 mac_resource_update(&grp->lg_mac); 480 AGGR_GRP_REFRELE(grp); 481 return (rc); 482 } 483 484 /* 485 * Update properties of an existing link aggregation group. 486 */ 487 int 488 aggr_grp_modify(uint32_t key, aggr_grp_t *grp_arg, uint8_t update_mask, 489 uint32_t policy, boolean_t mac_fixed, const uchar_t *mac_addr, 490 aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer) 491 { 492 int rc = 0; 493 aggr_grp_t *grp = NULL; 494 boolean_t mac_addr_changed = B_FALSE; 495 496 if (grp_arg == NULL) { 497 /* get group corresponding to key */ 498 rw_enter(&aggr_grp_lock, RW_READER); 499 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key), 500 (mod_hash_val_t *)&grp) != 0) { 501 rc = ENOENT; 502 goto bail; 503 } 504 AGGR_LACP_LOCK(grp); 505 rw_enter(&grp->lg_lock, RW_WRITER); 506 } else { 507 grp = grp_arg; 508 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 509 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 510 } 511 512 ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock)); 513 AGGR_GRP_REFHOLD(grp); 514 515 /* validate fixed address if specified */ 516 if ((update_mask & AGGR_MODIFY_MAC) && mac_fixed && 517 ((bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) || 518 (mac_addr[0] & 0x01))) { 519 rc = EINVAL; 520 goto bail; 521 } 522 523 /* update policy if requested */ 524 if (update_mask & AGGR_MODIFY_POLICY) 525 aggr_send_update_policy(grp, policy); 526 527 /* update unicast MAC address if requested */ 528 if (update_mask & AGGR_MODIFY_MAC) { 529 if (mac_fixed) { 530 /* user-supplied MAC address */ 531 grp->lg_mac_addr_port = NULL; 532 if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) != 0) { 533 bcopy(mac_addr, grp->lg_addr, ETHERADDRL); 534 mac_addr_changed = B_TRUE; 535 } 536 } else if (grp->lg_addr_fixed) { 537 /* switch from user-supplied to automatic */ 538 aggr_port_t *port = grp->lg_ports; 539 540 rw_enter(&port->lp_lock, RW_WRITER); 541 bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL); 542 grp->lg_mac_addr_port = port; 543 mac_addr_changed = B_TRUE; 544 rw_exit(&port->lp_lock); 545 } 546 grp->lg_addr_fixed = mac_fixed; 547 } 548 549 if (mac_addr_changed) 550 aggr_grp_update_ports_mac(grp); 551 552 if (update_mask & AGGR_MODIFY_LACP_MODE) 553 aggr_lacp_update_mode(grp, lacp_mode); 554 555 if ((update_mask & AGGR_MODIFY_LACP_TIMER) && !grp->lg_closing) 556 aggr_lacp_update_timer(grp, lacp_timer); 557 558 bail: 559 if (grp_arg == NULL) { 560 if (grp != NULL) { 561 rw_exit(&grp->lg_lock); 562 AGGR_LACP_UNLOCK(grp); 563 } 564 rw_exit(&aggr_grp_lock); 565 /* pass new unicast address up to MAC layer */ 566 if (grp != NULL && mac_addr_changed && !grp->lg_closing) 567 mac_unicst_update(&grp->lg_mac, grp->lg_addr); 568 } 569 570 if (grp != NULL) 571 AGGR_GRP_REFRELE(grp); 572 573 return (rc); 574 } 575 576 /* 577 * Create a new link aggregation group upon request from administrator. 578 * Returns 0 on success, an errno on failure. 579 */ 580 int 581 aggr_grp_create(uint32_t key, uint_t nports, laioc_port_t *ports, 582 uint32_t policy, boolean_t mac_fixed, uchar_t *mac_addr, 583 aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer) 584 { 585 aggr_grp_t *grp = NULL; 586 aggr_port_t *port; 587 mac_t *mac; 588 mac_info_t *mip; 589 int err; 590 int i; 591 592 /* need at least one port */ 593 if (nports == 0) 594 return (EINVAL); 595 596 rw_enter(&aggr_grp_lock, RW_WRITER); 597 598 /* does a group with the same key already exist? */ 599 err = mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key), 600 (mod_hash_val_t *)&grp); 601 if (err == 0) { 602 rw_exit(&aggr_grp_lock); 603 return (EEXIST); 604 } 605 606 grp = kmem_cache_alloc(aggr_grp_cache, KM_SLEEP); 607 608 AGGR_LACP_LOCK(grp); 609 rw_enter(&grp->lg_lock, RW_WRITER); 610 611 grp->lg_refs = 1; 612 grp->lg_closing = B_FALSE; 613 grp->lg_key = key; 614 615 grp->lg_ifspeed = 0; 616 grp->lg_link_state = LINK_STATE_UNKNOWN; 617 grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN; 618 grp->lg_started = B_FALSE; 619 grp->lg_promisc = B_FALSE; 620 aggr_lacp_init_grp(grp); 621 622 /* add MAC ports to group */ 623 grp->lg_ports = NULL; 624 grp->lg_nports = 0; 625 grp->lg_nattached_ports = 0; 626 grp->lg_ntx_ports = 0; 627 628 for (i = 0; i < nports; i++) { 629 err = aggr_grp_add_port(grp, ports[i].lp_devname, 630 ports[i].lp_port, NULL); 631 if (err != 0) 632 goto bail; 633 } 634 635 /* 636 * If no explicit MAC address was specified by the administrator, 637 * set it to the MAC address of the first port. 638 */ 639 grp->lg_addr_fixed = mac_fixed; 640 if (grp->lg_addr_fixed) { 641 /* validate specified address */ 642 if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) { 643 err = EINVAL; 644 goto bail; 645 } 646 bcopy(mac_addr, grp->lg_addr, ETHERADDRL); 647 } else { 648 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL); 649 grp->lg_mac_addr_port = grp->lg_ports; 650 } 651 652 /* update the MAC address of the constituent ports */ 653 aggr_grp_update_ports_mac(grp); 654 655 /* update outbound load balancing policy */ 656 aggr_send_update_policy(grp, policy); 657 658 /* register with the MAC module */ 659 mac = &grp->lg_mac; 660 bzero(mac, sizeof (*mac)); 661 662 mac->m_ident = MAC_IDENT; 663 664 mac->m_driver = grp; 665 mac->m_dip = aggr_dip; 666 mac->m_port = key; 667 668 mip = &(mac->m_info); 669 mip->mi_media = DL_ETHER; 670 mip->mi_sdu_min = 0; 671 mip->mi_sdu_max = ETHERMTU; 672 673 MAC_STAT_MIB(mip->mi_stat); 674 MAC_STAT_ETHER(mip->mi_stat); 675 mip->mi_stat[MAC_STAT_LINK_DUPLEX] = B_TRUE; 676 677 mip->mi_addr_length = ETHERADDRL; 678 bcopy(aggr_brdcst_mac, mip->mi_brdcst_addr, ETHERADDRL); 679 bcopy(grp->lg_addr, mip->mi_unicst_addr, ETHERADDRL); 680 681 mac->m_stat = aggr_m_stat; 682 mac->m_start = aggr_m_start; 683 mac->m_stop = aggr_m_stop; 684 mac->m_promisc = aggr_m_promisc; 685 mac->m_multicst = aggr_m_multicst; 686 mac->m_unicst = aggr_m_unicst; 687 mac->m_tx = aggr_m_tx; 688 mac->m_resources = aggr_m_resources; 689 mac->m_ioctl = aggr_m_ioctl; 690 691 /* set the initial group capabilities */ 692 aggr_grp_capab_set(grp); 693 694 if ((err = mac_register(mac)) != 0) 695 goto bail; 696 697 /* set LACP mode */ 698 aggr_lacp_set_mode(grp, lacp_mode, lacp_timer); 699 700 /* add new group to hash table */ 701 err = mod_hash_insert(aggr_grp_hash, GRP_HASH_KEY(key), 702 (mod_hash_val_t)grp); 703 ASSERT(err == 0); 704 aggr_grp_cnt++; 705 706 rw_exit(&grp->lg_lock); 707 AGGR_LACP_UNLOCK(grp); 708 rw_exit(&aggr_grp_lock); 709 return (0); 710 711 bail: 712 if (grp != NULL) { 713 aggr_port_t *cport; 714 715 port = grp->lg_ports; 716 while (port != NULL) { 717 cport = port->lp_next; 718 aggr_port_delete(port); 719 port = cport; 720 } 721 722 rw_exit(&grp->lg_lock); 723 AGGR_LACP_UNLOCK(grp); 724 725 kmem_cache_free(aggr_grp_cache, grp); 726 } 727 728 rw_exit(&aggr_grp_lock); 729 return (err); 730 } 731 732 /* 733 * Return a pointer to the member of a group with specified device name 734 * and port number. 735 */ 736 static aggr_port_t * 737 aggr_grp_port_lookup(aggr_grp_t *grp, const char *devname, uint32_t portnum) 738 { 739 aggr_port_t *port; 740 741 ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock)); 742 743 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 744 if ((strcmp(port->lp_devname, devname) == 0) && 745 (port->lp_port == portnum)) 746 break; 747 } 748 749 return (port); 750 } 751 752 /* 753 * Stop, detach and remove a port from a link aggregation group. 754 */ 755 static int 756 aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port, boolean_t *do_notify) 757 { 758 aggr_port_t **pport; 759 boolean_t grp_mac_addr_changed = B_FALSE; 760 uint64_t val; 761 uint_t i; 762 763 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 764 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 765 ASSERT(grp->lg_nports > 1); 766 767 if (do_notify != NULL) 768 *do_notify = B_FALSE; 769 770 /* unlink port */ 771 for (pport = &grp->lg_ports; *pport != port; 772 pport = &(*pport)->lp_next) { 773 if (*pport == NULL) 774 return (ENOENT); 775 } 776 *pport = port->lp_next; 777 778 rw_enter(&port->lp_lock, RW_WRITER); 779 port->lp_closing = B_TRUE; 780 781 /* 782 * If the MAC address of the port being removed was assigned 783 * to the group, update the group MAC address 784 * using the MAC address of a different port. 785 */ 786 if (!grp->lg_addr_fixed && grp->lg_mac_addr_port == port) { 787 /* 788 * Set the MAC address of the group to the 789 * MAC address of its first port. 790 */ 791 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL); 792 grp->lg_mac_addr_port = grp->lg_ports; 793 grp_mac_addr_changed = B_TRUE; 794 } 795 796 (void) aggr_grp_detach_port(grp, port); 797 798 /* 799 * Add the statistics of the ports while it was aggregated 800 * to the group's residual statistics. 801 */ 802 for (i = 0; i < MAC_NSTAT && !grp->lg_closing; i++) { 803 /* avoid stats that are not counters */ 804 if (i == MAC_STAT_IFSPEED || i == MAC_STAT_LINK_DUPLEX) 805 continue; 806 807 /* get current value */ 808 val = aggr_port_stat(port, i); 809 /* subtract value at the point of aggregation */ 810 val -= port->lp_stat[i]; 811 /* add to the residual stat */ 812 grp->lg_stat[i] += val; 813 } 814 815 grp->lg_nports--; 816 817 rw_exit(&port->lp_lock); 818 819 aggr_port_delete(port); 820 821 /* 822 * If the group MAC address has changed, update the MAC address of 823 * the remaining consistuent ports according to the new MAC 824 * address of the group. 825 */ 826 if (grp->lg_closing) { 827 *do_notify = B_FALSE; 828 } else { 829 if (grp_mac_addr_changed) 830 aggr_grp_update_ports_mac(grp); 831 832 if (do_notify != NULL) 833 *do_notify = grp_mac_addr_changed; 834 } 835 836 return (0); 837 } 838 839 /* 840 * Remove one or more ports from an existing link aggregation group. 841 */ 842 int 843 aggr_grp_rem_ports(uint32_t key, uint_t nports, laioc_port_t *ports) 844 { 845 int rc = 0, i; 846 aggr_grp_t *grp = NULL; 847 aggr_port_t *port; 848 boolean_t notify = B_FALSE, grp_mac_addr_changed; 849 850 /* get group corresponding to key */ 851 rw_enter(&aggr_grp_lock, RW_READER); 852 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key), 853 (mod_hash_val_t *)&grp) != 0) { 854 rw_exit(&aggr_grp_lock); 855 return (ENOENT); 856 } 857 AGGR_GRP_REFHOLD(grp); 858 rw_exit(&aggr_grp_lock); 859 860 AGGR_LACP_LOCK(grp); 861 rw_enter(&grp->lg_lock, RW_WRITER); 862 863 /* we need to keep at least one port per group */ 864 if (nports >= grp->lg_nports) { 865 rc = EINVAL; 866 goto bail; 867 } 868 869 /* first verify that all the groups are valid */ 870 for (i = 0; i < nports; i++) { 871 if (aggr_grp_port_lookup(grp, ports[i].lp_devname, 872 ports[i].lp_port) == NULL) { 873 /* port not found */ 874 rc = ENOENT; 875 goto bail; 876 } 877 } 878 879 /* remove the specified ports from group */ 880 for (i = 0; i < nports && !grp->lg_closing; i++) { 881 /* lookup port */ 882 port = aggr_grp_port_lookup(grp, ports[i].lp_devname, 883 ports[i].lp_port); 884 ASSERT(port != NULL); 885 886 /* stop port if group has already been started */ 887 if (grp->lg_started) { 888 rw_enter(&port->lp_lock, RW_WRITER); 889 aggr_port_stop(port); 890 rw_exit(&port->lp_lock); 891 } 892 893 /* remove port from group */ 894 rc = aggr_grp_rem_port(grp, port, &grp_mac_addr_changed); 895 ASSERT(rc == 0); 896 notify = notify || grp_mac_addr_changed; 897 } 898 899 bail: 900 rw_exit(&grp->lg_lock); 901 AGGR_LACP_UNLOCK(grp); 902 if (notify && !grp->lg_closing) 903 mac_unicst_update(&grp->lg_mac, grp->lg_addr); 904 if (rc == 0 && !grp->lg_closing) 905 mac_resource_update(&grp->lg_mac); 906 AGGR_GRP_REFRELE(grp); 907 908 return (rc); 909 } 910 911 int 912 aggr_grp_delete(uint32_t key) 913 { 914 aggr_grp_t *grp = NULL; 915 aggr_port_t *port, *cport; 916 mod_hash_val_t val; 917 918 rw_enter(&aggr_grp_lock, RW_WRITER); 919 920 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key), 921 (mod_hash_val_t *)&grp) != 0) { 922 rw_exit(&aggr_grp_lock); 923 return (ENOENT); 924 } 925 AGGR_LACP_LOCK(grp); 926 rw_enter(&grp->lg_lock, RW_WRITER); 927 grp->lg_closing = B_TRUE; 928 929 /* 930 * Unregister from the MAC service module. Since this can 931 * fail if a client hasn't closed the MAC port, we gracefully 932 * fail the operation. 933 */ 934 if (mac_unregister(&grp->lg_mac)) { 935 rw_exit(&grp->lg_lock); 936 AGGR_LACP_UNLOCK(grp); 937 rw_exit(&aggr_grp_lock); 938 return (EBUSY); 939 } 940 941 /* detach and free MAC ports associated with group */ 942 port = grp->lg_ports; 943 while (port != NULL) { 944 cport = port->lp_next; 945 rw_enter(&port->lp_lock, RW_WRITER); 946 if (grp->lg_started) 947 aggr_port_stop(port); 948 (void) aggr_grp_detach_port(grp, port); 949 rw_exit(&port->lp_lock); 950 aggr_port_delete(port); 951 port = cport; 952 } 953 954 rw_exit(&grp->lg_lock); 955 AGGR_LACP_UNLOCK(grp); 956 957 (void) mod_hash_remove(aggr_grp_hash, GRP_HASH_KEY(key), &val); 958 ASSERT(grp == (aggr_grp_t *)val); 959 960 ASSERT(aggr_grp_cnt > 0); 961 aggr_grp_cnt--; 962 963 rw_exit(&aggr_grp_lock); 964 AGGR_GRP_REFRELE(grp); 965 966 return (0); 967 } 968 969 void 970 aggr_grp_free(aggr_grp_t *grp) 971 { 972 ASSERT(grp->lg_refs == 0); 973 kmem_cache_free(aggr_grp_cache, grp); 974 } 975 976 /* 977 * Walker invoked when building the list of configured groups and 978 * their ports that must be passed up to user-space. 979 */ 980 981 /*ARGSUSED*/ 982 static uint_t 983 aggr_grp_info_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 984 { 985 aggr_grp_t *grp; 986 aggr_port_t *port; 987 aggr_grp_info_state_t *state = arg; 988 989 if (state->ls_rc != 0) 990 return (MH_WALK_TERMINATE); /* terminate walk */ 991 992 grp = (aggr_grp_t *)val; 993 994 rw_enter(&grp->lg_lock, RW_READER); 995 996 if (state->ls_group_key != 0 && grp->lg_key != state->ls_group_key) 997 goto bail; 998 999 state->ls_group_found = B_TRUE; 1000 1001 state->ls_rc = state->ls_new_grp_fn(state->ls_fn_arg, grp->lg_key, 1002 grp->lg_addr, grp->lg_addr_fixed, grp->lg_tx_policy, 1003 grp->lg_nports, grp->lg_lacp_mode, grp->aggr.PeriodicTimer); 1004 1005 if (state->ls_rc != 0) 1006 goto bail; 1007 1008 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1009 1010 rw_enter(&port->lp_lock, RW_READER); 1011 1012 state->ls_rc = state->ls_new_port_fn(state->ls_fn_arg, 1013 port->lp_devname, port->lp_port, port->lp_addr, 1014 port->lp_state, &port->lp_lacp.ActorOperPortState); 1015 1016 rw_exit(&port->lp_lock); 1017 1018 if (state->ls_rc != 0) 1019 goto bail; 1020 } 1021 1022 bail: 1023 rw_exit(&grp->lg_lock); 1024 return ((state->ls_rc == 0) ? MH_WALK_CONTINUE : MH_WALK_TERMINATE); 1025 } 1026 1027 int 1028 aggr_grp_info(uint_t *ngroups, uint32_t group_key, void *fn_arg, 1029 aggr_grp_info_new_grp_fn_t new_grp_fn, 1030 aggr_grp_info_new_port_fn_t new_port_fn) 1031 { 1032 aggr_grp_info_state_t state; 1033 int rc = 0; 1034 1035 rw_enter(&aggr_grp_lock, RW_READER); 1036 1037 *ngroups = aggr_grp_cnt; 1038 1039 bzero(&state, sizeof (state)); 1040 state.ls_group_key = group_key; 1041 state.ls_new_grp_fn = new_grp_fn; 1042 state.ls_new_port_fn = new_port_fn; 1043 state.ls_fn_arg = fn_arg; 1044 1045 mod_hash_walk(aggr_grp_hash, aggr_grp_info_walker, &state); 1046 1047 if ((rc = state.ls_rc) == 0 && group_key != 0 && 1048 !state.ls_group_found) 1049 rc = ENOENT; 1050 1051 rw_exit(&aggr_grp_lock); 1052 return (rc); 1053 } 1054 1055 static void 1056 aggr_m_resources(void *arg) 1057 { 1058 aggr_grp_t *grp = arg; 1059 aggr_port_t *port; 1060 1061 /* Call each port's m_resources function */ 1062 for (port = grp->lg_ports; port != NULL; port = port->lp_next) 1063 mac_resources(port->lp_mh); 1064 } 1065 1066 /*ARGSUSED*/ 1067 static void 1068 aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp) 1069 { 1070 miocnak(q, mp, 0, ENOTSUP); 1071 } 1072 1073 static uint64_t 1074 aggr_m_stat(void *arg, enum mac_stat stat) 1075 { 1076 aggr_grp_t *grp = arg; 1077 aggr_port_t *port; 1078 uint64_t val; 1079 1080 rw_enter(&grp->lg_lock, RW_READER); 1081 1082 switch (stat) { 1083 case MAC_STAT_IFSPEED: 1084 val = grp->lg_ifspeed; 1085 break; 1086 case MAC_STAT_LINK_DUPLEX: 1087 val = grp->lg_link_duplex; 1088 break; 1089 default: 1090 /* 1091 * The remaining statistics are counters. They are computed 1092 * by aggregating the counters of the members MACs while they 1093 * were aggregated, plus the residual counter of the group 1094 * itself, which is updated each time a MAC is removed from 1095 * the group. 1096 */ 1097 val = 0; 1098 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1099 /* actual port statistic */ 1100 val += aggr_port_stat(port, stat); 1101 /* minus the port stat when it was added */ 1102 val -= port->lp_stat[stat]; 1103 /* plus any residual amount for the group */ 1104 val += grp->lg_stat[stat]; 1105 } 1106 } 1107 1108 rw_exit(&grp->lg_lock); 1109 return (val); 1110 } 1111 1112 static int 1113 aggr_m_start(void *arg) 1114 { 1115 aggr_grp_t *grp = arg; 1116 aggr_port_t *port; 1117 1118 AGGR_LACP_LOCK(grp); 1119 rw_enter(&grp->lg_lock, RW_WRITER); 1120 1121 /* 1122 * Attempts to start all configured members of the group. 1123 * Group members will be attached when their link-up notification 1124 * is received. 1125 */ 1126 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1127 rw_enter(&port->lp_lock, RW_WRITER); 1128 if (aggr_port_start(port) != 0) { 1129 rw_exit(&port->lp_lock); 1130 continue; 1131 } 1132 1133 /* set port promiscuous mode */ 1134 if (aggr_port_promisc(port, grp->lg_promisc) != 0) 1135 aggr_port_stop(port); 1136 rw_exit(&port->lp_lock); 1137 } 1138 1139 grp->lg_started = B_TRUE; 1140 1141 rw_exit(&grp->lg_lock); 1142 AGGR_LACP_UNLOCK(grp); 1143 1144 return (0); 1145 } 1146 1147 static void 1148 aggr_m_stop(void *arg) 1149 { 1150 aggr_grp_t *grp = arg; 1151 aggr_port_t *port; 1152 1153 rw_enter(&grp->lg_lock, RW_WRITER); 1154 1155 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1156 rw_enter(&port->lp_lock, RW_WRITER); 1157 aggr_port_stop(port); 1158 rw_exit(&port->lp_lock); 1159 } 1160 1161 grp->lg_started = B_FALSE; 1162 1163 rw_exit(&grp->lg_lock); 1164 } 1165 1166 static int 1167 aggr_m_promisc(void *arg, boolean_t on) 1168 { 1169 aggr_grp_t *grp = arg; 1170 aggr_port_t *port; 1171 1172 AGGR_LACP_LOCK(grp); 1173 rw_enter(&grp->lg_lock, RW_WRITER); 1174 AGGR_GRP_REFHOLD(grp); 1175 1176 if (on == grp->lg_promisc) 1177 goto bail; 1178 1179 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1180 rw_enter(&port->lp_lock, RW_WRITER); 1181 AGGR_PORT_REFHOLD(port); 1182 if (port->lp_started) { 1183 if (aggr_port_promisc(port, on) != 0) 1184 (void) aggr_grp_detach_port(grp, port); 1185 } 1186 rw_exit(&port->lp_lock); 1187 AGGR_PORT_REFRELE(port); 1188 if (grp->lg_closing) 1189 break; 1190 } 1191 1192 grp->lg_promisc = on; 1193 1194 bail: 1195 rw_exit(&grp->lg_lock); 1196 AGGR_LACP_UNLOCK(grp); 1197 AGGR_GRP_REFRELE(grp); 1198 1199 return (0); 1200 } 1201 1202 /* 1203 * Add or remove the multicast addresses that are defined for the group 1204 * to or from the specified port. 1205 * This function is called before stopping a port, before a port 1206 * is detached from a group, and when attaching a port to a group. 1207 */ 1208 void 1209 aggr_grp_multicst_port(aggr_port_t *port, boolean_t add) 1210 { 1211 aggr_grp_t *grp = port->lp_grp; 1212 1213 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 1214 ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock)); 1215 1216 if (!port->lp_started) 1217 return; 1218 1219 mac_multicst_refresh(&grp->lg_mac, aggr_port_multicst, port, 1220 add); 1221 } 1222 1223 static int 1224 aggr_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) 1225 { 1226 aggr_grp_t *grp = arg; 1227 aggr_port_t *port = NULL; 1228 int err = 0, cerr; 1229 1230 rw_enter(&grp->lg_lock, RW_WRITER); 1231 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1232 if (port->lp_state != AGGR_PORT_STATE_ATTACHED) 1233 continue; 1234 cerr = aggr_port_multicst(port, add, addrp); 1235 if (cerr != 0 && err == 0) 1236 err = cerr; 1237 } 1238 rw_exit(&grp->lg_lock); 1239 return (err); 1240 } 1241 1242 static int 1243 aggr_m_unicst(void *arg, const uint8_t *macaddr) 1244 { 1245 aggr_grp_t *grp = arg; 1246 int rc; 1247 1248 AGGR_LACP_LOCK(grp); 1249 rw_enter(&grp->lg_lock, RW_WRITER); 1250 rc = aggr_grp_modify(0, grp, AGGR_MODIFY_MAC, 0, B_TRUE, macaddr, 1251 0, 0); 1252 rw_exit(&grp->lg_lock); 1253 AGGR_LACP_UNLOCK(grp); 1254 1255 return (rc); 1256 } 1257 1258 /* 1259 * Initialize the capabilities that are advertised for the group 1260 * according to the capabilities of the constituent ports. 1261 */ 1262 static void 1263 aggr_grp_capab_set(aggr_grp_t *grp) 1264 { 1265 uint32_t cksum = (uint32_t)-1; 1266 uint32_t poll = DL_CAPAB_POLL; 1267 aggr_port_t *port; 1268 const mac_info_t *port_mi; 1269 1270 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 1271 1272 ASSERT(grp->lg_ports != NULL); 1273 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1274 port_mi = mac_info(port->lp_mh); 1275 cksum &= port_mi->mi_cksum; 1276 poll &= port_mi->mi_poll; 1277 } 1278 1279 grp->lg_mac.m_info.mi_cksum = cksum; 1280 grp->lg_mac.m_info.mi_poll = poll; 1281 } 1282 1283 /* 1284 * Checks whether the capabilities of the ports being added are compatible 1285 * with the current capabilities of the aggregation. 1286 */ 1287 static boolean_t 1288 aggr_grp_capab_check(aggr_grp_t *grp, aggr_port_t *port) 1289 { 1290 const mac_info_t *port_mi = mac_info(port->lp_mh); 1291 uint32_t grp_cksum = grp->lg_mac.m_info.mi_cksum; 1292 1293 ASSERT(grp->lg_ports != NULL); 1294 1295 return (((grp_cksum & port_mi->mi_cksum) == grp_cksum) && 1296 (grp->lg_mac.m_info.mi_poll == port_mi->mi_poll)); 1297 } 1298