1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups. 31 * 32 * An instance of the structure aggr_grp_t is allocated for each 33 * link aggregation group. When created, aggr_grp_t objects are 34 * entered into the aggr_grp_hash hash table maintained by the GHT 35 * module. The hash key is the port number associated with the link 36 * aggregation group. The port number associated with a group corresponds 37 * the key associated with the group. 38 * 39 * A set of MAC ports are associated with each association group. 40 */ 41 42 #include <sys/types.h> 43 #include <sys/sysmacros.h> 44 #include <sys/conf.h> 45 #include <sys/cmn_err.h> 46 #include <sys/list.h> 47 #include <sys/ksynch.h> 48 #include <sys/kmem.h> 49 #include <sys/stream.h> 50 #include <sys/modctl.h> 51 #include <sys/ddi.h> 52 #include <sys/sunddi.h> 53 #include <sys/atomic.h> 54 #include <sys/stat.h> 55 #include <sys/ght.h> 56 #include <sys/strsun.h> 57 #include <sys/dlpi.h> 58 59 #include <sys/aggr.h> 60 #include <sys/aggr_impl.h> 61 62 static void aggr_m_info(void *, mac_info_t *); 63 static int aggr_m_start(void *); 64 static void aggr_m_stop(void *); 65 static int aggr_m_promisc(void *, boolean_t); 66 static int aggr_m_multicst(void *, boolean_t, const uint8_t *); 67 static int aggr_m_unicst(void *, const uint8_t *); 68 static uint64_t aggr_m_stat(void *, enum mac_stat); 69 static void aggr_m_resources(void *); 70 static void aggr_m_ioctl(void *, queue_t *, mblk_t *); 71 72 static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, const char *, uint32_t); 73 static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *); 74 static void aggr_stats_op(enum mac_stat, uint64_t *, uint64_t *, boolean_t); 75 static void aggr_grp_capab_set(aggr_grp_t *); 76 static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *); 77 78 static kmem_cache_t *aggr_grp_cache; 79 static ght_t aggr_grp_hash; 80 81 #define GRP_HASHSZ 64 82 83 static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0}; 84 static uchar_t aggr_brdcst_mac[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 85 86 /* used by grp_info_walker */ 87 typedef struct aggr_grp_info_state { 88 uint32_t ls_group_key; 89 boolean_t ls_group_found; 90 aggr_grp_info_new_grp_fn_t ls_new_grp_fn; 91 aggr_grp_info_new_port_fn_t ls_new_port_fn; 92 void *ls_fn_arg; 93 int ls_rc; 94 } aggr_grp_info_state_t; 95 96 /*ARGSUSED*/ 97 static int 98 aggr_grp_constructor(void *buf, void *arg, int kmflag) 99 { 100 aggr_grp_t *grp = buf; 101 102 bzero(grp, sizeof (*grp)); 103 rw_init(&grp->lg_lock, NULL, RW_DRIVER, NULL); 104 mutex_init(&grp->aggr.gl_lock, NULL, MUTEX_DEFAULT, NULL); 105 106 grp->lg_link_state = LINK_STATE_UNKNOWN; 107 108 return (0); 109 } 110 111 /*ARGSUSED*/ 112 static void 113 aggr_grp_destructor(void *buf, void *arg) 114 { 115 aggr_grp_t *grp = buf; 116 117 if (grp->lg_tx_ports != NULL) { 118 kmem_free(grp->lg_tx_ports, 119 grp->lg_tx_ports_size * sizeof (aggr_port_t *)); 120 } 121 122 mutex_destroy(&grp->aggr.gl_lock); 123 rw_destroy(&grp->lg_lock); 124 } 125 126 void 127 aggr_grp_init(void) 128 { 129 int err; 130 131 aggr_grp_cache = kmem_cache_create("aggr_grp_cache", 132 sizeof (aggr_grp_t), 0, aggr_grp_constructor, 133 aggr_grp_destructor, NULL, NULL, NULL, 0); 134 135 err = ght_scalar_create("aggr_grp_hash", GRP_HASHSZ, 136 &aggr_grp_hash); 137 ASSERT(err == 0); 138 } 139 140 int 141 aggr_grp_fini(void) 142 { 143 int err; 144 145 if ((err = ght_destroy(aggr_grp_hash)) != 0) 146 return (err); 147 kmem_cache_destroy(aggr_grp_cache); 148 return (0); 149 } 150 151 /* 152 * Attach a port to a link aggregation group. 153 * 154 * A port is attached to a link aggregation group once its speed 155 * and link state have been verified. 156 * 157 * Returns B_TRUE if the group link state or speed has changed. If 158 * it's the case, the caller must notify the MAC layer via a call 159 * to mac_link(). 160 */ 161 boolean_t 162 aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port) 163 { 164 boolean_t link_changed = B_FALSE; 165 166 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 167 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 168 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 169 170 if (port->lp_state == AGGR_PORT_STATE_ATTACHED) 171 return (B_FALSE); 172 173 /* 174 * Validate the MAC port link speed and update the group 175 * link speed if needed. 176 */ 177 if (port->lp_ifspeed == 0 || 178 port->lp_link_state != LINK_STATE_UP || 179 port->lp_link_duplex != LINK_DUPLEX_FULL) { 180 /* 181 * Can't attach a MAC port with unknown link speed, 182 * down link, or not in full duplex mode. 183 */ 184 return (B_FALSE); 185 } 186 187 if (grp->lg_ifspeed == 0) { 188 /* 189 * The group inherits the speed of the first link being 190 * attached. 191 */ 192 grp->lg_ifspeed = port->lp_ifspeed; 193 link_changed = B_TRUE; 194 } else if (grp->lg_ifspeed != port->lp_ifspeed) { 195 /* 196 * The link speed of the MAC port must be the same as 197 * the group link speed, as per 802.3ad. Since it is 198 * not, the attach is cancelled. 199 */ 200 return (B_FALSE); 201 } 202 203 grp->lg_nattached_ports++; 204 205 /* 206 * Update the group link state. 207 */ 208 if (grp->lg_link_state != LINK_STATE_UP) { 209 grp->lg_link_state = LINK_STATE_UP; 210 grp->lg_link_duplex = LINK_DUPLEX_FULL; 211 link_changed = B_TRUE; 212 } 213 214 aggr_grp_multicst_port(port, B_TRUE); 215 216 /* 217 * Update port's state. 218 */ 219 port->lp_state = AGGR_PORT_STATE_ATTACHED; 220 221 /* 222 * If LACP is OFF, the port can be used to send data as soon 223 * as its link is up and verified to be compatible with the 224 * aggregation. 225 * 226 * If LACP is active or passive, notify the LACP subsystem, which 227 * will enable sending on the port following the LACP protocol. 228 */ 229 if (grp->lg_lacp_mode == AGGR_LACP_OFF) 230 aggr_send_port_enable(port); 231 else 232 aggr_lacp_port_attached(port); 233 234 return (link_changed); 235 } 236 237 boolean_t 238 aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port) 239 { 240 boolean_t link_changed = B_FALSE; 241 242 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 243 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 244 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 245 246 /* update state */ 247 if (port->lp_state != AGGR_PORT_STATE_ATTACHED) 248 return (B_FALSE); 249 port->lp_state = AGGR_PORT_STATE_STANDBY; 250 251 aggr_grp_multicst_port(port, B_FALSE); 252 253 if (grp->lg_lacp_mode == AGGR_LACP_OFF) 254 aggr_send_port_disable(port); 255 else 256 aggr_lacp_port_detached(port); 257 258 grp->lg_nattached_ports--; 259 if (grp->lg_nattached_ports == 0) { 260 /* the last attached MAC port of the group is being detached */ 261 grp->lg_ifspeed = 0; 262 grp->lg_link_state = LINK_STATE_DOWN; 263 grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN; 264 link_changed = B_TRUE; 265 } 266 267 return (link_changed); 268 } 269 270 /* 271 * Update the MAC addresses of the constituent ports of the specified 272 * group. This function is invoked: 273 * - after creating a new aggregation group. 274 * - after adding new ports to an aggregation group. 275 * - after removing a port from a group when the MAC address of 276 * that port was used for the MAC address of the group. 277 * - after the MAC address of a port changed when the MAC address 278 * of that port was used for the MAC address of the group. 279 */ 280 void 281 aggr_grp_update_ports_mac(aggr_grp_t *grp) 282 { 283 aggr_port_t *cport; 284 285 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 286 287 for (cport = grp->lg_ports; cport != NULL; 288 cport = cport->lp_next) { 289 rw_enter(&cport->lp_lock, RW_WRITER); 290 if (aggr_port_unicst(cport, grp->lg_addr) != 0) 291 (void) aggr_grp_detach_port(grp, cport); 292 rw_exit(&cport->lp_lock); 293 if (grp->lg_closing) 294 break; 295 } 296 } 297 298 /* 299 * Invoked when the MAC address of a port has changed. If the port's 300 * MAC address was used for the group MAC address, returns B_TRUE. 301 * In that case, it is the responsibility of the caller to 302 * invoke aggr_grp_update_ports_mac() after releasing the 303 * the port lock, and aggr_grp_notify() after releasing the 304 * group lock. 305 */ 306 boolean_t 307 aggr_grp_port_mac_changed(aggr_grp_t *grp, aggr_port_t *port) 308 { 309 boolean_t grp_addr_changed = B_FALSE; 310 311 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 312 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 313 314 if (grp->lg_addr_fixed) { 315 /* 316 * The group is using a fixed MAC address or an automatic 317 * MAC address has not been set. 318 */ 319 return (B_FALSE); 320 } 321 322 if (grp->lg_mac_addr_port == port) { 323 /* 324 * The MAC address of the port was assigned to the group 325 * MAC address. Update the group MAC address. 326 */ 327 bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL); 328 grp_addr_changed = B_TRUE; 329 } else { 330 /* 331 * Update the actual port MAC address to the MAC address 332 * of the group. 333 */ 334 if (aggr_port_unicst(port, grp->lg_addr) != 0) 335 (void) aggr_grp_detach_port(grp, port); 336 } 337 338 return (grp_addr_changed); 339 } 340 341 /* 342 * Add a port to a link aggregation group. 343 */ 344 static int 345 aggr_grp_add_port(aggr_grp_t *grp, const char *name, uint_t portnum, 346 aggr_port_t **pp) 347 { 348 aggr_port_t *port, **cport; 349 int err; 350 351 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 352 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 353 354 /* create new port */ 355 err = aggr_port_create(name, portnum, &port); 356 if (err != 0) 357 return (err); 358 359 rw_enter(&port->lp_lock, RW_WRITER); 360 361 /* add port to list of group constituent ports */ 362 cport = &grp->lg_ports; 363 while (*cport != NULL) 364 cport = &((*cport)->lp_next); 365 *cport = port; 366 367 /* 368 * Back reference to the group it is member of. A port always 369 * holds a reference to its group to ensure that the back 370 * reference is always valid. 371 */ 372 port->lp_grp = grp; 373 AGGR_GRP_REFHOLD(grp); 374 grp->lg_nports++; 375 376 aggr_lacp_init_port(port); 377 378 rw_exit(&port->lp_lock); 379 380 if (pp != NULL) 381 *pp = port; 382 383 return (0); 384 } 385 386 /* 387 * Add one or more ports to an existing link aggregation group. 388 */ 389 int 390 aggr_grp_add_ports(uint32_t key, uint_t nports, laioc_port_t *ports) 391 { 392 int rc, i, nadded = 0; 393 ghte_t hte; 394 aggr_grp_t *grp = NULL; 395 aggr_port_t *port; 396 397 /* get group corresponding to key */ 398 ght_lock(aggr_grp_hash, GHT_READ); 399 if ((rc = ght_find(aggr_grp_hash, GHT_SCALAR_TO_KEY(key), 400 &hte)) == ENOENT) { 401 ght_unlock(aggr_grp_hash); 402 return (rc); 403 } 404 ASSERT(rc == 0); 405 grp = (aggr_grp_t *)GHT_VAL(hte); 406 AGGR_GRP_REFHOLD(grp); 407 ght_unlock(aggr_grp_hash); 408 409 AGGR_LACP_LOCK(grp); 410 rw_enter(&grp->lg_lock, RW_WRITER); 411 412 /* add the specified ports to group */ 413 for (i = 0; i < nports; i++) { 414 /* add port to group */ 415 if ((rc = aggr_grp_add_port(grp, ports[i].lp_devname, 416 ports[i].lp_port, &port)) != 0) 417 goto bail; 418 ASSERT(port != NULL); 419 nadded++; 420 421 /* check capabilities */ 422 if (!aggr_grp_capab_check(grp, port)) { 423 rc = ENOTSUP; 424 goto bail; 425 } 426 427 /* start port if group has already been started */ 428 if (grp->lg_started) { 429 rw_enter(&port->lp_lock, RW_WRITER); 430 rc = aggr_port_start(port); 431 if (rc != 0) { 432 rw_exit(&port->lp_lock); 433 goto bail; 434 } 435 436 /* set port promiscuous mode */ 437 rc = aggr_port_promisc(port, grp->lg_promisc); 438 if (rc != 0) { 439 rw_exit(&port->lp_lock); 440 goto bail; 441 } 442 rw_exit(&port->lp_lock); 443 } 444 } 445 446 /* update the MAC address of the constituent ports */ 447 aggr_grp_update_ports_mac(grp); 448 449 bail: 450 if (rc != 0) { 451 /* stop and remove ports that have been added */ 452 for (i = 0; i < nadded && !grp->lg_closing; i++) { 453 port = aggr_grp_port_lookup(grp, ports[i].lp_devname, 454 ports[i].lp_port); 455 ASSERT(port != NULL); 456 if (grp->lg_started) { 457 rw_enter(&port->lp_lock, RW_WRITER); 458 aggr_port_stop(port); 459 rw_exit(&port->lp_lock); 460 } 461 (void) aggr_grp_rem_port(grp, port, NULL); 462 } 463 } 464 465 rw_exit(&grp->lg_lock); 466 AGGR_LACP_UNLOCK(grp); 467 if (rc == 0 && !grp->lg_closing) 468 mac_resource_update(&grp->lg_mac); 469 AGGR_GRP_REFRELE(grp); 470 return (rc); 471 } 472 473 /* 474 * Update properties of an existing link aggregation group. 475 */ 476 int 477 aggr_grp_modify(uint32_t key, aggr_grp_t *grp_arg, uint8_t update_mask, 478 uint32_t policy, boolean_t mac_fixed, const uchar_t *mac_addr, 479 aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer) 480 { 481 int rc = 0; 482 ghte_t hte; 483 aggr_grp_t *grp = NULL; 484 boolean_t mac_addr_changed = B_FALSE; 485 486 if (grp_arg == NULL) { 487 /* get group corresponding to key */ 488 ght_lock(aggr_grp_hash, GHT_READ); 489 if ((rc = ght_find(aggr_grp_hash, GHT_SCALAR_TO_KEY(key), 490 &hte)) == ENOENT) 491 goto bail; 492 ASSERT(rc == 0); 493 grp = (aggr_grp_t *)GHT_VAL(hte); 494 AGGR_LACP_LOCK(grp); 495 rw_enter(&grp->lg_lock, RW_WRITER); 496 } else { 497 grp = grp_arg; 498 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 499 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 500 } 501 502 ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock)); 503 AGGR_GRP_REFHOLD(grp); 504 505 /* validate fixed address if specified */ 506 if ((update_mask & AGGR_MODIFY_MAC) && mac_fixed && 507 ((bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) || 508 (mac_addr[0] & 0x01))) { 509 rc = EINVAL; 510 goto bail; 511 } 512 513 /* update policy if requested */ 514 if (update_mask & AGGR_MODIFY_POLICY) 515 aggr_send_update_policy(grp, policy); 516 517 /* update unicast MAC address if requested */ 518 if (update_mask & AGGR_MODIFY_MAC) { 519 if (mac_fixed) { 520 /* user-supplied MAC address */ 521 grp->lg_mac_addr_port = NULL; 522 if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) != 0) { 523 bcopy(mac_addr, grp->lg_addr, ETHERADDRL); 524 mac_addr_changed = B_TRUE; 525 } 526 } else if (grp->lg_addr_fixed) { 527 /* switch from user-supplied to automatic */ 528 aggr_port_t *port = grp->lg_ports; 529 530 rw_enter(&port->lp_lock, RW_WRITER); 531 bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL); 532 grp->lg_mac_addr_port = port; 533 mac_addr_changed = B_TRUE; 534 rw_exit(&port->lp_lock); 535 } 536 grp->lg_addr_fixed = mac_fixed; 537 } 538 539 if (mac_addr_changed) 540 aggr_grp_update_ports_mac(grp); 541 542 if (update_mask & AGGR_MODIFY_LACP_MODE) 543 aggr_lacp_update_mode(grp, lacp_mode); 544 545 if ((update_mask & AGGR_MODIFY_LACP_TIMER) && !grp->lg_closing) 546 aggr_lacp_update_timer(grp, lacp_timer); 547 548 bail: 549 if (grp_arg == NULL) { 550 if (grp != NULL) { 551 rw_exit(&grp->lg_lock); 552 AGGR_LACP_UNLOCK(grp); 553 } 554 ght_unlock(aggr_grp_hash); 555 /* pass new unicast address up to MAC layer */ 556 if (grp != NULL && mac_addr_changed && !grp->lg_closing) 557 mac_unicst_update(&grp->lg_mac, grp->lg_addr); 558 } 559 560 if (grp != NULL) 561 AGGR_GRP_REFRELE(grp); 562 563 return (rc); 564 } 565 566 /* 567 * Create a new link aggregation group upon request from administrator. 568 * Returns 0 on success, an errno on failure. 569 */ 570 int 571 aggr_grp_create(uint32_t key, uint_t nports, laioc_port_t *ports, 572 uint32_t policy, boolean_t mac_fixed, uchar_t *mac_addr, 573 aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer) 574 { 575 aggr_grp_t *grp = NULL; 576 aggr_port_t *port; 577 ghte_t hte; 578 mac_t *mac; 579 mac_info_t *mip; 580 int err; 581 int i; 582 583 /* need at least one port */ 584 if (nports == 0) 585 return (EINVAL); 586 587 ght_lock(aggr_grp_hash, GHT_WRITE); 588 589 /* does a group with the same key already exist? */ 590 err = ght_find(aggr_grp_hash, GHT_SCALAR_TO_KEY(key), &hte); 591 if (err != ENOENT) { 592 ght_unlock(aggr_grp_hash); 593 return (EEXIST); 594 } 595 596 grp = kmem_cache_alloc(aggr_grp_cache, KM_SLEEP); 597 598 AGGR_LACP_LOCK(grp); 599 rw_enter(&grp->lg_lock, RW_WRITER); 600 601 grp->lg_refs = 1; 602 grp->lg_closing = B_FALSE; 603 grp->lg_key = key; 604 605 grp->lg_ifspeed = 0; 606 grp->lg_link_state = LINK_STATE_UNKNOWN; 607 grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN; 608 grp->lg_started = B_FALSE; 609 grp->lg_promisc = B_FALSE; 610 aggr_lacp_init_grp(grp); 611 612 /* add MAC ports to group */ 613 grp->lg_ports = NULL; 614 grp->lg_nports = 0; 615 grp->lg_nattached_ports = 0; 616 grp->lg_ntx_ports = 0; 617 618 for (i = 0; i < nports; i++) { 619 err = aggr_grp_add_port(grp, ports[i].lp_devname, 620 ports[i].lp_port, NULL); 621 if (err != 0) 622 goto bail; 623 } 624 625 /* 626 * If no explicit MAC address was specified by the administrator, 627 * set it to the MAC address of the first port. 628 */ 629 grp->lg_addr_fixed = mac_fixed; 630 if (grp->lg_addr_fixed) { 631 /* validate specified address */ 632 if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) { 633 err = EINVAL; 634 goto bail; 635 } 636 bcopy(mac_addr, grp->lg_addr, ETHERADDRL); 637 } else { 638 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL); 639 grp->lg_mac_addr_port = grp->lg_ports; 640 } 641 642 /* update the MAC address of the constituent ports */ 643 aggr_grp_update_ports_mac(grp); 644 645 /* update outbound load balancing policy */ 646 aggr_send_update_policy(grp, policy); 647 648 /* register with the MAC module */ 649 mac = &grp->lg_mac; 650 bzero(mac, sizeof (*mac)); 651 652 mac->m_ident = MAC_IDENT; 653 654 mac->m_driver = grp; 655 mac->m_dip = aggr_dip; 656 mac->m_port = key; 657 658 mip = &(mac->m_info); 659 mip->mi_media = DL_ETHER; 660 mip->mi_sdu_min = 0; 661 mip->mi_sdu_max = ETHERMTU; 662 663 MAC_STAT_MIB(mip->mi_stat); 664 MAC_STAT_ETHER(mip->mi_stat); 665 mip->mi_stat[MAC_STAT_LINK_DUPLEX] = B_TRUE; 666 667 mip->mi_addr_length = ETHERADDRL; 668 bcopy(aggr_brdcst_mac, mip->mi_brdcst_addr, ETHERADDRL); 669 bcopy(grp->lg_addr, mip->mi_unicst_addr, ETHERADDRL); 670 671 mac->m_stat = aggr_m_stat; 672 mac->m_start = aggr_m_start; 673 mac->m_stop = aggr_m_stop; 674 mac->m_promisc = aggr_m_promisc; 675 mac->m_multicst = aggr_m_multicst; 676 mac->m_unicst = aggr_m_unicst; 677 mac->m_tx = aggr_m_tx; 678 mac->m_resources = aggr_m_resources; 679 mac->m_ioctl = aggr_m_ioctl; 680 681 /* set the initial group capabilities */ 682 aggr_grp_capab_set(grp); 683 684 if ((err = mac_register(mac)) != 0) 685 goto bail; 686 687 /* set LACP mode */ 688 aggr_lacp_set_mode(grp, lacp_mode, lacp_timer); 689 690 /* add new group to hash table */ 691 hte = ght_alloc(aggr_grp_hash, KM_SLEEP); 692 GHT_KEY(hte) = GHT_SCALAR_TO_KEY(key); 693 GHT_VAL(hte) = GHT_PTR_TO_VAL(grp); 694 grp->lg_hte = hte; 695 696 err = ght_insert(hte); 697 ASSERT(err == 0); 698 699 rw_exit(&grp->lg_lock); 700 AGGR_LACP_UNLOCK(grp); 701 ght_unlock(aggr_grp_hash); 702 703 return (0); 704 705 bail: 706 if (grp != NULL) { 707 aggr_port_t *cport; 708 709 port = grp->lg_ports; 710 while (port != NULL) { 711 cport = port->lp_next; 712 aggr_port_delete(port); 713 port = cport; 714 } 715 716 rw_exit(&grp->lg_lock); 717 AGGR_LACP_UNLOCK(grp); 718 719 kmem_cache_free(aggr_grp_cache, grp); 720 } 721 722 ght_unlock(aggr_grp_hash); 723 return (err); 724 } 725 726 /* 727 * Return a pointer to the member of a group with specified device name 728 * and port number. 729 */ 730 static aggr_port_t * 731 aggr_grp_port_lookup(aggr_grp_t *grp, const char *devname, uint32_t portnum) 732 { 733 aggr_port_t *port; 734 735 ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock)); 736 737 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 738 if ((strcmp(port->lp_devname, devname) == 0) && 739 (port->lp_port == portnum)) 740 break; 741 } 742 743 return (port); 744 } 745 746 /* 747 * Stop, detach and remove a port from a link aggregation group. 748 */ 749 static int 750 aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port, boolean_t *do_notify) 751 { 752 aggr_port_t **pport; 753 boolean_t grp_mac_addr_changed = B_FALSE; 754 uint64_t val; 755 uint_t i; 756 757 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 758 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 759 ASSERT(grp->lg_nports > 1); 760 761 if (do_notify != NULL) 762 *do_notify = B_FALSE; 763 764 /* unlink port */ 765 for (pport = &grp->lg_ports; *pport != port; 766 pport = &(*pport)->lp_next) { 767 if (*pport == NULL) 768 return (ENOENT); 769 } 770 *pport = port->lp_next; 771 772 rw_enter(&port->lp_lock, RW_WRITER); 773 port->lp_closing = B_TRUE; 774 775 /* 776 * If the MAC address of the port being removed was assigned 777 * to the group, update the group MAC address 778 * using the MAC address of a different port. 779 */ 780 if (!grp->lg_addr_fixed && grp->lg_mac_addr_port == port) { 781 /* 782 * Set the MAC address of the group to the 783 * MAC address of its first port. 784 */ 785 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL); 786 grp->lg_mac_addr_port = grp->lg_ports; 787 grp_mac_addr_changed = B_TRUE; 788 } 789 790 (void) aggr_grp_detach_port(grp, port); 791 792 /* 793 * Add the statistics of the ports while it was aggregated 794 * to the group's residual statistics. 795 */ 796 for (i = 0; i < MAC_NSTAT && !grp->lg_closing; i++) { 797 /* avoid stats that are not counters */ 798 if (i == MAC_STAT_IFSPEED || i == MAC_STAT_LINK_DUPLEX) 799 continue; 800 801 /* get current value */ 802 val = aggr_port_stat(port, i); 803 /* subtract value at the point of aggregation */ 804 val -= port->lp_stat[i]; 805 /* add to the residual stat */ 806 grp->lg_stat[i] += val; 807 } 808 809 grp->lg_nports--; 810 811 rw_exit(&port->lp_lock); 812 813 aggr_port_delete(port); 814 815 /* 816 * If the group MAC address has changed, update the MAC address of 817 * the remaining consistuent ports according to the new MAC 818 * address of the group. 819 */ 820 if (grp->lg_closing) { 821 *do_notify = B_FALSE; 822 } else { 823 if (grp_mac_addr_changed) 824 aggr_grp_update_ports_mac(grp); 825 826 if (do_notify != NULL) 827 *do_notify = grp_mac_addr_changed; 828 } 829 830 return (0); 831 } 832 833 /* 834 * Remove one or more ports from an existing link aggregation group. 835 */ 836 int 837 aggr_grp_rem_ports(uint32_t key, uint_t nports, laioc_port_t *ports) 838 { 839 int rc = 0, i; 840 ghte_t hte; 841 aggr_grp_t *grp = NULL; 842 aggr_port_t *port; 843 boolean_t notify = B_FALSE, grp_mac_addr_changed; 844 845 /* get group corresponding to key */ 846 ght_lock(aggr_grp_hash, GHT_READ); 847 if ((rc = ght_find(aggr_grp_hash, GHT_SCALAR_TO_KEY(key), 848 &hte)) == ENOENT) { 849 ght_unlock(aggr_grp_hash); 850 return (rc); 851 } 852 ASSERT(rc == 0); 853 grp = (aggr_grp_t *)GHT_VAL(hte); 854 AGGR_GRP_REFHOLD(grp); 855 ght_unlock(aggr_grp_hash); 856 AGGR_LACP_LOCK(grp); 857 rw_enter(&grp->lg_lock, RW_WRITER); 858 859 /* we need to keep at least one port per group */ 860 if (nports >= grp->lg_nports) { 861 rc = EINVAL; 862 goto bail; 863 } 864 865 /* first verify that all the groups are valid */ 866 for (i = 0; i < nports; i++) { 867 if (aggr_grp_port_lookup(grp, ports[i].lp_devname, 868 ports[i].lp_port) == NULL) { 869 /* port not found */ 870 rc = ENOENT; 871 goto bail; 872 } 873 } 874 875 /* remove the specified ports from group */ 876 for (i = 0; i < nports && !grp->lg_closing; i++) { 877 /* lookup port */ 878 port = aggr_grp_port_lookup(grp, ports[i].lp_devname, 879 ports[i].lp_port); 880 ASSERT(port != NULL); 881 882 /* stop port if group has already been started */ 883 if (grp->lg_started) { 884 rw_enter(&port->lp_lock, RW_WRITER); 885 aggr_port_stop(port); 886 rw_exit(&port->lp_lock); 887 } 888 889 /* remove port from group */ 890 rc = aggr_grp_rem_port(grp, port, &grp_mac_addr_changed); 891 ASSERT(rc == 0); 892 notify = notify || grp_mac_addr_changed; 893 } 894 895 bail: 896 rw_exit(&grp->lg_lock); 897 AGGR_LACP_UNLOCK(grp); 898 if (notify && !grp->lg_closing) 899 mac_unicst_update(&grp->lg_mac, grp->lg_addr); 900 if (rc == 0 && !grp->lg_closing) 901 mac_resource_update(&grp->lg_mac); 902 AGGR_GRP_REFRELE(grp); 903 904 return (rc); 905 } 906 907 int 908 aggr_grp_delete(uint32_t key) 909 { 910 int err; 911 ghte_t hte; 912 aggr_grp_t *grp; 913 aggr_port_t *port, *cport; 914 915 ght_lock(aggr_grp_hash, GHT_WRITE); 916 917 err = ght_find(aggr_grp_hash, GHT_SCALAR_TO_KEY(key), &hte); 918 if (err == ENOENT) { 919 ght_unlock(aggr_grp_hash); 920 return (err); 921 } 922 ASSERT(err == 0); 923 924 grp = (aggr_grp_t *)GHT_VAL(hte); 925 926 AGGR_LACP_LOCK(grp); 927 rw_enter(&grp->lg_lock, RW_WRITER); 928 grp->lg_closing = B_TRUE; 929 930 /* 931 * Unregister from the MAC service module. Since this can 932 * fail if a client hasn't closed the MAC port, we gracefully 933 * fail the operation. 934 */ 935 if (mac_unregister(&grp->lg_mac)) { 936 rw_exit(&grp->lg_lock); 937 AGGR_LACP_UNLOCK(grp); 938 ght_unlock(aggr_grp_hash); 939 return (EBUSY); 940 } 941 942 /* detach and free MAC ports associated with group */ 943 port = grp->lg_ports; 944 while (port != NULL) { 945 cport = port->lp_next; 946 rw_enter(&port->lp_lock, RW_WRITER); 947 if (grp->lg_started) 948 aggr_port_stop(port); 949 (void) aggr_grp_detach_port(grp, port); 950 rw_exit(&port->lp_lock); 951 aggr_port_delete(port); 952 port = cport; 953 } 954 955 rw_exit(&grp->lg_lock); 956 AGGR_LACP_UNLOCK(grp); 957 958 ght_remove(hte); 959 ght_free(hte); 960 961 ght_unlock(aggr_grp_hash); 962 AGGR_GRP_REFRELE(grp); 963 964 return (0); 965 } 966 967 void 968 aggr_grp_free(aggr_grp_t *grp) 969 { 970 ASSERT(grp->lg_refs == 0); 971 kmem_cache_free(aggr_grp_cache, grp); 972 } 973 974 /* 975 * Walker invoked when building the list of configured groups and 976 * their ports that must be passed up to user-space. 977 */ 978 979 static boolean_t 980 aggr_grp_info_walker(void *arg, ghte_t hte) 981 { 982 aggr_grp_t *grp; 983 aggr_port_t *port; 984 aggr_grp_info_state_t *state = arg; 985 986 if (state->ls_rc != 0) 987 return (B_FALSE); /* terminate walk */ 988 989 grp = (aggr_grp_t *)GHT_VAL(hte); 990 991 rw_enter(&grp->lg_lock, RW_READER); 992 993 if (state->ls_group_key != 0 && grp->lg_key != state->ls_group_key) 994 goto bail; 995 996 state->ls_group_found = B_TRUE; 997 998 state->ls_rc = state->ls_new_grp_fn(state->ls_fn_arg, grp->lg_key, 999 grp->lg_addr, grp->lg_addr_fixed, grp->lg_tx_policy, 1000 grp->lg_nports, grp->lg_lacp_mode, grp->aggr.PeriodicTimer); 1001 1002 if (state->ls_rc != 0) 1003 goto bail; 1004 1005 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1006 1007 rw_enter(&port->lp_lock, RW_READER); 1008 1009 state->ls_rc = state->ls_new_port_fn(state->ls_fn_arg, 1010 port->lp_devname, port->lp_port, port->lp_addr, 1011 port->lp_state, &port->lp_lacp.ActorOperPortState); 1012 1013 rw_exit(&port->lp_lock); 1014 1015 if (state->ls_rc != 0) 1016 goto bail; 1017 } 1018 1019 bail: 1020 rw_exit(&grp->lg_lock); 1021 return (state->ls_rc == 0); 1022 } 1023 1024 int 1025 aggr_grp_info(uint_t *ngroups, uint32_t group_key, void *fn_arg, 1026 aggr_grp_info_new_grp_fn_t new_grp_fn, 1027 aggr_grp_info_new_port_fn_t new_port_fn) 1028 { 1029 aggr_grp_info_state_t state; 1030 int rc = 0; 1031 1032 ght_lock(aggr_grp_hash, GHT_READ); 1033 1034 *ngroups = ght_count(aggr_grp_hash); 1035 1036 bzero(&state, sizeof (state)); 1037 state.ls_group_key = group_key; 1038 state.ls_new_grp_fn = new_grp_fn; 1039 state.ls_new_port_fn = new_port_fn; 1040 state.ls_fn_arg = fn_arg; 1041 1042 ght_walk(aggr_grp_hash, aggr_grp_info_walker, &state); 1043 1044 if ((rc = state.ls_rc) == 0 && group_key != 0 && 1045 !state.ls_group_found) 1046 rc = ENOENT; 1047 1048 ght_unlock(aggr_grp_hash); 1049 return (rc); 1050 } 1051 1052 /* 1053 * Aggregation group walker. 1054 */ 1055 1056 typedef struct aggr_grp_walker_state_s { 1057 aggr_grp_walker_fn_t ws_walker_fn; 1058 void *ws_arg; 1059 } aggr_grp_walker_state_t; 1060 1061 void 1062 aggr_grp_walker(void *arg, ghte_t hte) 1063 { 1064 aggr_grp_walker_state_t *state = arg; 1065 aggr_grp_t *grp; 1066 1067 grp = (aggr_grp_t *)GHT_VAL(hte); 1068 state->ws_walker_fn(grp, state->ws_arg); 1069 } 1070 1071 void 1072 aggr_grp_walk(aggr_grp_walker_fn_t walker, void *arg) 1073 { 1074 aggr_grp_walker_state_t state; 1075 1076 state.ws_walker_fn = walker; 1077 state.ws_arg = arg; 1078 1079 ght_lock(aggr_grp_hash, GHT_READ); 1080 ght_walk(aggr_grp_hash, aggr_grp_info_walker, &state); 1081 ght_unlock(aggr_grp_hash); 1082 } 1083 1084 static void 1085 aggr_m_resources(void *arg) 1086 { 1087 aggr_grp_t *grp = arg; 1088 aggr_port_t *port; 1089 1090 /* Call each port's m_resources function */ 1091 for (port = grp->lg_ports; port != NULL; port = port->lp_next) 1092 mac_resources(port->lp_mh); 1093 } 1094 1095 /*ARGSUSED*/ 1096 static void 1097 aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp) 1098 { 1099 miocnak(q, mp, 0, ENOTSUP); 1100 } 1101 1102 static uint64_t 1103 aggr_m_stat(void *arg, enum mac_stat stat) 1104 { 1105 aggr_grp_t *grp = arg; 1106 aggr_port_t *port; 1107 uint64_t val; 1108 1109 rw_enter(&grp->lg_lock, RW_READER); 1110 1111 switch (stat) { 1112 case MAC_STAT_IFSPEED: 1113 val = grp->lg_ifspeed; 1114 break; 1115 case MAC_STAT_LINK_DUPLEX: 1116 val = grp->lg_link_duplex; 1117 break; 1118 default: 1119 /* 1120 * The remaining statistics are counters. They are computed 1121 * by aggregating the counters of the members MACs while they 1122 * were aggregated, plus the residual counter of the group 1123 * itself, which is updated each time a MAC is removed from 1124 * the group. 1125 */ 1126 val = 0; 1127 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1128 /* actual port statistic */ 1129 val += aggr_port_stat(port, stat); 1130 /* minus the port stat when it was added */ 1131 val -= port->lp_stat[stat]; 1132 /* plus any residual amount for the group */ 1133 val += grp->lg_stat[stat]; 1134 } 1135 } 1136 1137 rw_exit(&grp->lg_lock); 1138 return (val); 1139 } 1140 1141 static int 1142 aggr_m_start(void *arg) 1143 { 1144 aggr_grp_t *grp = arg; 1145 aggr_port_t *port; 1146 1147 AGGR_LACP_LOCK(grp); 1148 rw_enter(&grp->lg_lock, RW_WRITER); 1149 1150 /* 1151 * Attempts to start all configured members of the group. 1152 * Group members will be attached when their link-up notification 1153 * is received. 1154 */ 1155 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1156 rw_enter(&port->lp_lock, RW_WRITER); 1157 if (aggr_port_start(port) != 0) { 1158 rw_exit(&port->lp_lock); 1159 continue; 1160 } 1161 1162 /* set port promiscuous mode */ 1163 if (aggr_port_promisc(port, grp->lg_promisc) != 0) 1164 aggr_port_stop(port); 1165 rw_exit(&port->lp_lock); 1166 } 1167 1168 grp->lg_started = B_TRUE; 1169 1170 rw_exit(&grp->lg_lock); 1171 AGGR_LACP_UNLOCK(grp); 1172 1173 return (0); 1174 } 1175 1176 static void 1177 aggr_m_stop(void *arg) 1178 { 1179 aggr_grp_t *grp = arg; 1180 aggr_port_t *port; 1181 1182 rw_enter(&grp->lg_lock, RW_WRITER); 1183 1184 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1185 rw_enter(&port->lp_lock, RW_WRITER); 1186 aggr_port_stop(port); 1187 rw_exit(&port->lp_lock); 1188 } 1189 1190 grp->lg_started = B_FALSE; 1191 1192 rw_exit(&grp->lg_lock); 1193 } 1194 1195 static int 1196 aggr_m_promisc(void *arg, boolean_t on) 1197 { 1198 aggr_grp_t *grp = arg; 1199 aggr_port_t *port; 1200 1201 rw_enter(&grp->lg_lock, RW_WRITER); 1202 AGGR_GRP_REFHOLD(grp); 1203 1204 if (on == grp->lg_promisc) 1205 goto bail; 1206 1207 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1208 rw_enter(&port->lp_lock, RW_WRITER); 1209 AGGR_PORT_REFHOLD(port); 1210 if (port->lp_started) { 1211 if (aggr_port_promisc(port, on) != 0) 1212 (void) aggr_grp_detach_port(grp, port); 1213 } 1214 rw_exit(&port->lp_lock); 1215 AGGR_PORT_REFRELE(port); 1216 if (grp->lg_closing) 1217 break; 1218 } 1219 1220 grp->lg_promisc = on; 1221 1222 bail: 1223 rw_exit(&grp->lg_lock); 1224 AGGR_GRP_REFRELE(grp); 1225 1226 return (0); 1227 } 1228 1229 /* 1230 * Add or remove the multicast addresses that are defined for the group 1231 * to or from the specified port. 1232 * This function is called before stopping a port, before a port 1233 * is detached from a group, and when attaching a port to a group. 1234 */ 1235 void 1236 aggr_grp_multicst_port(aggr_port_t *port, boolean_t add) 1237 { 1238 aggr_grp_t *grp = port->lp_grp; 1239 1240 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 1241 ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock)); 1242 1243 if (!port->lp_started) 1244 return; 1245 1246 mac_multicst_refresh(&grp->lg_mac, aggr_port_multicst, port, 1247 add); 1248 } 1249 1250 static int 1251 aggr_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) 1252 { 1253 aggr_grp_t *grp = arg; 1254 aggr_port_t *port = NULL; 1255 int err = 0, cerr; 1256 1257 rw_enter(&grp->lg_lock, RW_WRITER); 1258 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1259 if (port->lp_state != AGGR_PORT_STATE_ATTACHED) 1260 continue; 1261 cerr = aggr_port_multicst(port, add, addrp); 1262 if (cerr != 0 && err == 0) 1263 err = cerr; 1264 } 1265 rw_exit(&grp->lg_lock); 1266 return (err); 1267 } 1268 1269 static int 1270 aggr_m_unicst(void *arg, const uint8_t *macaddr) 1271 { 1272 aggr_grp_t *grp = arg; 1273 int rc; 1274 1275 AGGR_LACP_LOCK(grp); 1276 rw_enter(&grp->lg_lock, RW_WRITER); 1277 rc = aggr_grp_modify(0, grp, AGGR_MODIFY_MAC, 0, B_TRUE, macaddr, 1278 0, 0); 1279 rw_exit(&grp->lg_lock); 1280 AGGR_LACP_UNLOCK(grp); 1281 1282 return (rc); 1283 } 1284 1285 /* 1286 * Initialize the capabilities that are advertised for the group 1287 * according to the capabilities of the constituent ports. 1288 */ 1289 static void 1290 aggr_grp_capab_set(aggr_grp_t *grp) 1291 { 1292 uint32_t cksum = (uint32_t)-1; 1293 uint32_t poll = DL_CAPAB_POLL; 1294 aggr_port_t *port; 1295 const mac_info_t *port_mi; 1296 1297 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 1298 1299 ASSERT(grp->lg_ports != NULL); 1300 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1301 port_mi = mac_info(port->lp_mh); 1302 cksum &= port_mi->mi_cksum; 1303 poll &= port_mi->mi_poll; 1304 } 1305 1306 grp->lg_mac.m_info.mi_cksum = cksum; 1307 grp->lg_mac.m_info.mi_poll = poll; 1308 } 1309 1310 /* 1311 * Checks whether the capabilities of the ports being added are compatible 1312 * with the current capabilities of the aggregation. 1313 */ 1314 static boolean_t 1315 aggr_grp_capab_check(aggr_grp_t *grp, aggr_port_t *port) 1316 { 1317 const mac_info_t *port_mi = mac_info(port->lp_mh); 1318 uint32_t grp_cksum = grp->lg_mac.m_info.mi_cksum; 1319 1320 ASSERT(grp->lg_ports != NULL); 1321 1322 return (((grp_cksum & port_mi->mi_cksum) == grp_cksum) && 1323 (grp->lg_mac.m_info.mi_poll == port_mi->mi_poll)); 1324 } 1325