1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups. 30 * 31 * An instance of the structure aggr_grp_t is allocated for each 32 * link aggregation group. When created, aggr_grp_t objects are 33 * entered into the aggr_grp_hash hash table maintained by the modhash 34 * module. The hash key is the port number associated with the link 35 * aggregation group. The port number associated with a group corresponds 36 * the key associated with the group. 37 * 38 * A set of MAC ports are associated with each association group. 39 */ 40 41 #include <sys/types.h> 42 #include <sys/sysmacros.h> 43 #include <sys/conf.h> 44 #include <sys/cmn_err.h> 45 #include <sys/list.h> 46 #include <sys/ksynch.h> 47 #include <sys/kmem.h> 48 #include <sys/stream.h> 49 #include <sys/modctl.h> 50 #include <sys/ddi.h> 51 #include <sys/sunddi.h> 52 #include <sys/atomic.h> 53 #include <sys/stat.h> 54 #include <sys/modhash.h> 55 #include <sys/strsun.h> 56 #include <sys/dlpi.h> 57 58 #include <sys/aggr.h> 59 #include <sys/aggr_impl.h> 60 61 static void aggr_m_info(void *, mac_info_t *); 62 static int aggr_m_start(void *); 63 static void aggr_m_stop(void *); 64 static int aggr_m_promisc(void *, boolean_t); 65 static int aggr_m_multicst(void *, boolean_t, const uint8_t *); 66 static int aggr_m_unicst(void *, const uint8_t *); 67 static uint64_t aggr_m_stat(void *, enum mac_stat); 68 static void aggr_m_resources(void *); 69 static void aggr_m_ioctl(void *, queue_t *, mblk_t *); 70 71 static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, const char *, uint32_t); 72 static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *); 73 static void aggr_stats_op(enum mac_stat, uint64_t *, uint64_t *, boolean_t); 74 static void aggr_grp_capab_set(aggr_grp_t *); 75 static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *); 76 77 static kmem_cache_t *aggr_grp_cache; 78 static mod_hash_t *aggr_grp_hash; 79 static krwlock_t aggr_grp_lock; 80 static uint_t aggr_grp_cnt; 81 82 #define GRP_HASHSZ 64 83 #define GRP_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)key) 84 85 static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0}; 86 static uchar_t aggr_brdcst_mac[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 87 88 /* used by grp_info_walker */ 89 typedef struct aggr_grp_info_state { 90 uint32_t ls_group_key; 91 boolean_t ls_group_found; 92 aggr_grp_info_new_grp_fn_t ls_new_grp_fn; 93 aggr_grp_info_new_port_fn_t ls_new_port_fn; 94 void *ls_fn_arg; 95 int ls_rc; 96 } aggr_grp_info_state_t; 97 98 /*ARGSUSED*/ 99 static int 100 aggr_grp_constructor(void *buf, void *arg, int kmflag) 101 { 102 aggr_grp_t *grp = buf; 103 104 bzero(grp, sizeof (*grp)); 105 rw_init(&grp->lg_lock, NULL, RW_DRIVER, NULL); 106 mutex_init(&grp->aggr.gl_lock, NULL, MUTEX_DEFAULT, NULL); 107 108 grp->lg_link_state = LINK_STATE_UNKNOWN; 109 110 return (0); 111 } 112 113 /*ARGSUSED*/ 114 static void 115 aggr_grp_destructor(void *buf, void *arg) 116 { 117 aggr_grp_t *grp = buf; 118 119 if (grp->lg_tx_ports != NULL) { 120 kmem_free(grp->lg_tx_ports, 121 grp->lg_tx_ports_size * sizeof (aggr_port_t *)); 122 } 123 124 mutex_destroy(&grp->aggr.gl_lock); 125 rw_destroy(&grp->lg_lock); 126 } 127 128 void 129 aggr_grp_init(void) 130 { 131 aggr_grp_cache = kmem_cache_create("aggr_grp_cache", 132 sizeof (aggr_grp_t), 0, aggr_grp_constructor, 133 aggr_grp_destructor, NULL, NULL, NULL, 0); 134 135 aggr_grp_hash = mod_hash_create_idhash("aggr_grp_hash", 136 GRP_HASHSZ, mod_hash_null_valdtor); 137 rw_init(&aggr_grp_lock, NULL, RW_DEFAULT, NULL); 138 aggr_grp_cnt = 0; 139 } 140 141 void 142 aggr_grp_fini(void) 143 { 144 rw_destroy(&aggr_grp_lock); 145 mod_hash_destroy_idhash(aggr_grp_hash); 146 kmem_cache_destroy(aggr_grp_cache); 147 } 148 149 uint_t 150 aggr_grp_count(void) 151 { 152 uint_t count; 153 154 rw_enter(&aggr_grp_lock, RW_READER); 155 count = aggr_grp_cnt; 156 rw_exit(&aggr_grp_lock); 157 return (count); 158 } 159 160 /* 161 * Attach a port to a link aggregation group. 162 * 163 * A port is attached to a link aggregation group once its speed 164 * and link state have been verified. 165 * 166 * Returns B_TRUE if the group link state or speed has changed. If 167 * it's the case, the caller must notify the MAC layer via a call 168 * to mac_link(). 169 */ 170 boolean_t 171 aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port) 172 { 173 boolean_t link_changed = B_FALSE; 174 175 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 176 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 177 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 178 179 if (port->lp_state == AGGR_PORT_STATE_ATTACHED) 180 return (B_FALSE); 181 182 /* 183 * Validate the MAC port link speed and update the group 184 * link speed if needed. 185 */ 186 if (port->lp_ifspeed == 0 || 187 port->lp_link_state != LINK_STATE_UP || 188 port->lp_link_duplex != LINK_DUPLEX_FULL) { 189 /* 190 * Can't attach a MAC port with unknown link speed, 191 * down link, or not in full duplex mode. 192 */ 193 return (B_FALSE); 194 } 195 196 if (grp->lg_ifspeed == 0) { 197 /* 198 * The group inherits the speed of the first link being 199 * attached. 200 */ 201 grp->lg_ifspeed = port->lp_ifspeed; 202 link_changed = B_TRUE; 203 } else if (grp->lg_ifspeed != port->lp_ifspeed) { 204 /* 205 * The link speed of the MAC port must be the same as 206 * the group link speed, as per 802.3ad. Since it is 207 * not, the attach is cancelled. 208 */ 209 return (B_FALSE); 210 } 211 212 grp->lg_nattached_ports++; 213 214 /* 215 * Update the group link state. 216 */ 217 if (grp->lg_link_state != LINK_STATE_UP) { 218 grp->lg_link_state = LINK_STATE_UP; 219 grp->lg_link_duplex = LINK_DUPLEX_FULL; 220 link_changed = B_TRUE; 221 } 222 223 aggr_grp_multicst_port(port, B_TRUE); 224 225 /* 226 * Update port's state. 227 */ 228 port->lp_state = AGGR_PORT_STATE_ATTACHED; 229 230 /* 231 * Set port's receive callback 232 */ 233 port->lp_mrh = mac_rx_add(port->lp_mh, aggr_recv_cb, (void *)port); 234 235 /* 236 * If LACP is OFF, the port can be used to send data as soon 237 * as its link is up and verified to be compatible with the 238 * aggregation. 239 * 240 * If LACP is active or passive, notify the LACP subsystem, which 241 * will enable sending on the port following the LACP protocol. 242 */ 243 if (grp->lg_lacp_mode == AGGR_LACP_OFF) 244 aggr_send_port_enable(port); 245 else 246 aggr_lacp_port_attached(port); 247 248 return (link_changed); 249 } 250 251 boolean_t 252 aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port) 253 { 254 boolean_t link_changed = B_FALSE; 255 256 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 257 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 258 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 259 260 /* update state */ 261 if (port->lp_state != AGGR_PORT_STATE_ATTACHED) 262 return (B_FALSE); 263 264 mac_rx_remove(port->lp_mh, port->lp_mrh); 265 port->lp_state = AGGR_PORT_STATE_STANDBY; 266 267 aggr_grp_multicst_port(port, B_FALSE); 268 269 if (grp->lg_lacp_mode == AGGR_LACP_OFF) 270 aggr_send_port_disable(port); 271 else 272 aggr_lacp_port_detached(port); 273 274 grp->lg_nattached_ports--; 275 if (grp->lg_nattached_ports == 0) { 276 /* the last attached MAC port of the group is being detached */ 277 grp->lg_ifspeed = 0; 278 grp->lg_link_state = LINK_STATE_DOWN; 279 grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN; 280 link_changed = B_TRUE; 281 } 282 283 return (link_changed); 284 } 285 286 /* 287 * Update the MAC addresses of the constituent ports of the specified 288 * group. This function is invoked: 289 * - after creating a new aggregation group. 290 * - after adding new ports to an aggregation group. 291 * - after removing a port from a group when the MAC address of 292 * that port was used for the MAC address of the group. 293 * - after the MAC address of a port changed when the MAC address 294 * of that port was used for the MAC address of the group. 295 */ 296 void 297 aggr_grp_update_ports_mac(aggr_grp_t *grp) 298 { 299 aggr_port_t *cport; 300 301 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 302 303 for (cport = grp->lg_ports; cport != NULL; 304 cport = cport->lp_next) { 305 rw_enter(&cport->lp_lock, RW_WRITER); 306 if (aggr_port_unicst(cport, grp->lg_addr) != 0) 307 (void) aggr_grp_detach_port(grp, cport); 308 rw_exit(&cport->lp_lock); 309 if (grp->lg_closing) 310 break; 311 } 312 } 313 314 /* 315 * Invoked when the MAC address of a port has changed. If the port's 316 * MAC address was used for the group MAC address, returns B_TRUE. 317 * In that case, it is the responsibility of the caller to 318 * invoke aggr_grp_update_ports_mac() after releasing the 319 * the port lock, and aggr_grp_notify() after releasing the 320 * group lock. 321 */ 322 boolean_t 323 aggr_grp_port_mac_changed(aggr_grp_t *grp, aggr_port_t *port) 324 { 325 boolean_t grp_addr_changed = B_FALSE; 326 327 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 328 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 329 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 330 331 if (grp->lg_addr_fixed) { 332 /* 333 * The group is using a fixed MAC address or an automatic 334 * MAC address has not been set. 335 */ 336 return (B_FALSE); 337 } 338 339 if (grp->lg_mac_addr_port == port) { 340 /* 341 * The MAC address of the port was assigned to the group 342 * MAC address. Update the group MAC address. 343 */ 344 bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL); 345 grp_addr_changed = B_TRUE; 346 } else { 347 /* 348 * Update the actual port MAC address to the MAC address 349 * of the group. 350 */ 351 if (aggr_port_unicst(port, grp->lg_addr) != 0) 352 (void) aggr_grp_detach_port(grp, port); 353 } 354 355 return (grp_addr_changed); 356 } 357 358 /* 359 * Add a port to a link aggregation group. 360 */ 361 static int 362 aggr_grp_add_port(aggr_grp_t *grp, const char *name, uint_t portnum, 363 aggr_port_t **pp) 364 { 365 aggr_port_t *port, **cport; 366 int err; 367 368 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 369 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 370 371 /* create new port */ 372 err = aggr_port_create(name, portnum, &port); 373 if (err != 0) 374 return (err); 375 376 rw_enter(&port->lp_lock, RW_WRITER); 377 378 /* add port to list of group constituent ports */ 379 cport = &grp->lg_ports; 380 while (*cport != NULL) 381 cport = &((*cport)->lp_next); 382 *cport = port; 383 384 /* 385 * Back reference to the group it is member of. A port always 386 * holds a reference to its group to ensure that the back 387 * reference is always valid. 388 */ 389 port->lp_grp = grp; 390 AGGR_GRP_REFHOLD(grp); 391 grp->lg_nports++; 392 393 aggr_lacp_init_port(port); 394 395 rw_exit(&port->lp_lock); 396 397 if (pp != NULL) 398 *pp = port; 399 400 return (0); 401 } 402 403 /* 404 * Add one or more ports to an existing link aggregation group. 405 */ 406 int 407 aggr_grp_add_ports(uint32_t key, uint_t nports, laioc_port_t *ports) 408 { 409 int rc, i, nadded = 0; 410 aggr_grp_t *grp = NULL; 411 aggr_port_t *port; 412 413 /* get group corresponding to key */ 414 rw_enter(&aggr_grp_lock, RW_READER); 415 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key), 416 (mod_hash_val_t *)&grp) != 0) { 417 rw_exit(&aggr_grp_lock); 418 return (ENOENT); 419 } 420 AGGR_GRP_REFHOLD(grp); 421 rw_exit(&aggr_grp_lock); 422 423 AGGR_LACP_LOCK(grp); 424 rw_enter(&grp->lg_lock, RW_WRITER); 425 426 /* add the specified ports to group */ 427 for (i = 0; i < nports; i++) { 428 /* add port to group */ 429 if ((rc = aggr_grp_add_port(grp, ports[i].lp_devname, 430 ports[i].lp_port, &port)) != 0) 431 goto bail; 432 ASSERT(port != NULL); 433 nadded++; 434 435 /* check capabilities */ 436 if (!aggr_grp_capab_check(grp, port)) { 437 rc = ENOTSUP; 438 goto bail; 439 } 440 441 /* start port if group has already been started */ 442 if (grp->lg_started) { 443 rw_enter(&port->lp_lock, RW_WRITER); 444 rc = aggr_port_start(port); 445 if (rc != 0) { 446 rw_exit(&port->lp_lock); 447 goto bail; 448 } 449 450 /* set port promiscuous mode */ 451 rc = aggr_port_promisc(port, grp->lg_promisc); 452 if (rc != 0) { 453 rw_exit(&port->lp_lock); 454 goto bail; 455 } 456 rw_exit(&port->lp_lock); 457 } 458 } 459 460 /* update the MAC address of the constituent ports */ 461 aggr_grp_update_ports_mac(grp); 462 463 bail: 464 if (rc != 0) { 465 /* stop and remove ports that have been added */ 466 for (i = 0; i < nadded && !grp->lg_closing; i++) { 467 port = aggr_grp_port_lookup(grp, ports[i].lp_devname, 468 ports[i].lp_port); 469 ASSERT(port != NULL); 470 if (grp->lg_started) { 471 rw_enter(&port->lp_lock, RW_WRITER); 472 aggr_port_stop(port); 473 rw_exit(&port->lp_lock); 474 } 475 (void) aggr_grp_rem_port(grp, port, NULL); 476 } 477 } 478 479 rw_exit(&grp->lg_lock); 480 AGGR_LACP_UNLOCK(grp); 481 if (rc == 0 && !grp->lg_closing) 482 mac_resource_update(&grp->lg_mac); 483 AGGR_GRP_REFRELE(grp); 484 return (rc); 485 } 486 487 /* 488 * Update properties of an existing link aggregation group. 489 */ 490 int 491 aggr_grp_modify(uint32_t key, aggr_grp_t *grp_arg, uint8_t update_mask, 492 uint32_t policy, boolean_t mac_fixed, const uchar_t *mac_addr, 493 aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer) 494 { 495 int rc = 0; 496 aggr_grp_t *grp = NULL; 497 boolean_t mac_addr_changed = B_FALSE; 498 499 if (grp_arg == NULL) { 500 /* get group corresponding to key */ 501 rw_enter(&aggr_grp_lock, RW_READER); 502 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key), 503 (mod_hash_val_t *)&grp) != 0) { 504 rc = ENOENT; 505 goto bail; 506 } 507 AGGR_LACP_LOCK(grp); 508 rw_enter(&grp->lg_lock, RW_WRITER); 509 } else { 510 grp = grp_arg; 511 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 512 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 513 } 514 515 ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock)); 516 AGGR_GRP_REFHOLD(grp); 517 518 /* validate fixed address if specified */ 519 if ((update_mask & AGGR_MODIFY_MAC) && mac_fixed && 520 ((bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) || 521 (mac_addr[0] & 0x01))) { 522 rc = EINVAL; 523 goto bail; 524 } 525 526 /* update policy if requested */ 527 if (update_mask & AGGR_MODIFY_POLICY) 528 aggr_send_update_policy(grp, policy); 529 530 /* update unicast MAC address if requested */ 531 if (update_mask & AGGR_MODIFY_MAC) { 532 if (mac_fixed) { 533 /* user-supplied MAC address */ 534 grp->lg_mac_addr_port = NULL; 535 if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) != 0) { 536 bcopy(mac_addr, grp->lg_addr, ETHERADDRL); 537 mac_addr_changed = B_TRUE; 538 } 539 } else if (grp->lg_addr_fixed) { 540 /* switch from user-supplied to automatic */ 541 aggr_port_t *port = grp->lg_ports; 542 543 rw_enter(&port->lp_lock, RW_WRITER); 544 bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL); 545 grp->lg_mac_addr_port = port; 546 mac_addr_changed = B_TRUE; 547 rw_exit(&port->lp_lock); 548 } 549 grp->lg_addr_fixed = mac_fixed; 550 } 551 552 if (mac_addr_changed) 553 aggr_grp_update_ports_mac(grp); 554 555 if (update_mask & AGGR_MODIFY_LACP_MODE) 556 aggr_lacp_update_mode(grp, lacp_mode); 557 558 if ((update_mask & AGGR_MODIFY_LACP_TIMER) && !grp->lg_closing) 559 aggr_lacp_update_timer(grp, lacp_timer); 560 561 bail: 562 if (grp_arg == NULL) { 563 if (grp != NULL) { 564 rw_exit(&grp->lg_lock); 565 AGGR_LACP_UNLOCK(grp); 566 } 567 rw_exit(&aggr_grp_lock); 568 /* pass new unicast address up to MAC layer */ 569 if (grp != NULL && mac_addr_changed && !grp->lg_closing) 570 mac_unicst_update(&grp->lg_mac, grp->lg_addr); 571 } 572 573 if (grp != NULL) 574 AGGR_GRP_REFRELE(grp); 575 576 return (rc); 577 } 578 579 /* 580 * Create a new link aggregation group upon request from administrator. 581 * Returns 0 on success, an errno on failure. 582 */ 583 int 584 aggr_grp_create(uint32_t key, uint_t nports, laioc_port_t *ports, 585 uint32_t policy, boolean_t mac_fixed, uchar_t *mac_addr, 586 aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer) 587 { 588 aggr_grp_t *grp = NULL; 589 aggr_port_t *port; 590 mac_t *mac; 591 mac_info_t *mip; 592 int err; 593 int i; 594 595 /* need at least one port */ 596 if (nports == 0) 597 return (EINVAL); 598 599 rw_enter(&aggr_grp_lock, RW_WRITER); 600 601 /* does a group with the same key already exist? */ 602 err = mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key), 603 (mod_hash_val_t *)&grp); 604 if (err == 0) { 605 rw_exit(&aggr_grp_lock); 606 return (EEXIST); 607 } 608 609 grp = kmem_cache_alloc(aggr_grp_cache, KM_SLEEP); 610 611 AGGR_LACP_LOCK(grp); 612 rw_enter(&grp->lg_lock, RW_WRITER); 613 614 grp->lg_refs = 1; 615 grp->lg_closing = 0; 616 grp->lg_key = key; 617 618 grp->lg_ifspeed = 0; 619 grp->lg_link_state = LINK_STATE_UNKNOWN; 620 grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN; 621 grp->lg_started = B_FALSE; 622 grp->lg_promisc = B_FALSE; 623 aggr_lacp_init_grp(grp); 624 625 /* add MAC ports to group */ 626 grp->lg_ports = NULL; 627 grp->lg_nports = 0; 628 grp->lg_nattached_ports = 0; 629 grp->lg_ntx_ports = 0; 630 631 for (i = 0; i < nports; i++) { 632 err = aggr_grp_add_port(grp, ports[i].lp_devname, 633 ports[i].lp_port, NULL); 634 if (err != 0) 635 goto bail; 636 } 637 638 /* 639 * If no explicit MAC address was specified by the administrator, 640 * set it to the MAC address of the first port. 641 */ 642 grp->lg_addr_fixed = mac_fixed; 643 if (grp->lg_addr_fixed) { 644 /* validate specified address */ 645 if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) { 646 err = EINVAL; 647 goto bail; 648 } 649 bcopy(mac_addr, grp->lg_addr, ETHERADDRL); 650 } else { 651 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL); 652 grp->lg_mac_addr_port = grp->lg_ports; 653 } 654 655 /* update the MAC address of the constituent ports */ 656 aggr_grp_update_ports_mac(grp); 657 658 /* update outbound load balancing policy */ 659 aggr_send_update_policy(grp, policy); 660 661 /* register with the MAC module */ 662 mac = &grp->lg_mac; 663 bzero(mac, sizeof (*mac)); 664 665 mac->m_ident = MAC_IDENT; 666 667 mac->m_driver = grp; 668 mac->m_dip = aggr_dip; 669 mac->m_port = key; 670 671 mip = &(mac->m_info); 672 mip->mi_media = DL_ETHER; 673 mip->mi_sdu_min = 0; 674 mip->mi_sdu_max = ETHERMTU; 675 676 MAC_STAT_MIB(mip->mi_stat); 677 MAC_STAT_ETHER(mip->mi_stat); 678 mip->mi_stat[MAC_STAT_LINK_DUPLEX] = B_TRUE; 679 680 mip->mi_addr_length = ETHERADDRL; 681 bcopy(aggr_brdcst_mac, mip->mi_brdcst_addr, ETHERADDRL); 682 bcopy(grp->lg_addr, mip->mi_unicst_addr, ETHERADDRL); 683 684 mac->m_stat = aggr_m_stat; 685 mac->m_start = aggr_m_start; 686 mac->m_stop = aggr_m_stop; 687 mac->m_promisc = aggr_m_promisc; 688 mac->m_multicst = aggr_m_multicst; 689 mac->m_unicst = aggr_m_unicst; 690 mac->m_tx = aggr_m_tx; 691 mac->m_resources = aggr_m_resources; 692 mac->m_ioctl = aggr_m_ioctl; 693 694 /* set the initial group capabilities */ 695 aggr_grp_capab_set(grp); 696 697 if ((err = mac_register(mac)) != 0) 698 goto bail; 699 700 /* set LACP mode */ 701 aggr_lacp_set_mode(grp, lacp_mode, lacp_timer); 702 703 /* add new group to hash table */ 704 err = mod_hash_insert(aggr_grp_hash, GRP_HASH_KEY(key), 705 (mod_hash_val_t)grp); 706 ASSERT(err == 0); 707 aggr_grp_cnt++; 708 709 rw_exit(&grp->lg_lock); 710 AGGR_LACP_UNLOCK(grp); 711 rw_exit(&aggr_grp_lock); 712 return (0); 713 714 bail: 715 if (grp != NULL) { 716 aggr_port_t *cport; 717 718 atomic_add_32(&grp->lg_closing, 1); 719 720 port = grp->lg_ports; 721 while (port != NULL) { 722 cport = port->lp_next; 723 aggr_port_delete(port); 724 port = cport; 725 } 726 727 rw_exit(&grp->lg_lock); 728 AGGR_LACP_UNLOCK(grp); 729 730 kmem_cache_free(aggr_grp_cache, grp); 731 } 732 733 rw_exit(&aggr_grp_lock); 734 return (err); 735 } 736 737 /* 738 * Return a pointer to the member of a group with specified device name 739 * and port number. 740 */ 741 static aggr_port_t * 742 aggr_grp_port_lookup(aggr_grp_t *grp, const char *devname, uint32_t portnum) 743 { 744 aggr_port_t *port; 745 746 ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock)); 747 748 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 749 if ((strcmp(port->lp_devname, devname) == 0) && 750 (port->lp_port == portnum)) 751 break; 752 } 753 754 return (port); 755 } 756 757 /* 758 * Stop, detach and remove a port from a link aggregation group. 759 */ 760 static int 761 aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port, boolean_t *do_notify) 762 { 763 aggr_port_t **pport; 764 boolean_t grp_mac_addr_changed = B_FALSE; 765 uint64_t val; 766 uint_t i; 767 768 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 769 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 770 ASSERT(grp->lg_nports > 1); 771 772 if (do_notify != NULL) 773 *do_notify = B_FALSE; 774 775 /* unlink port */ 776 for (pport = &grp->lg_ports; *pport != port; 777 pport = &(*pport)->lp_next) { 778 if (*pport == NULL) 779 return (ENOENT); 780 } 781 *pport = port->lp_next; 782 783 atomic_add_32(&port->lp_closing, 1); 784 785 rw_enter(&port->lp_lock, RW_WRITER); 786 787 /* 788 * If the MAC address of the port being removed was assigned 789 * to the group, update the group MAC address 790 * using the MAC address of a different port. 791 */ 792 if (!grp->lg_addr_fixed && grp->lg_mac_addr_port == port) { 793 /* 794 * Set the MAC address of the group to the 795 * MAC address of its first port. 796 */ 797 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL); 798 grp->lg_mac_addr_port = grp->lg_ports; 799 grp_mac_addr_changed = B_TRUE; 800 } 801 802 (void) aggr_grp_detach_port(grp, port); 803 804 /* 805 * Add the statistics of the ports while it was aggregated 806 * to the group's residual statistics. 807 */ 808 for (i = 0; i < MAC_NSTAT && !grp->lg_closing; i++) { 809 /* avoid stats that are not counters */ 810 if (i == MAC_STAT_IFSPEED || i == MAC_STAT_LINK_DUPLEX) 811 continue; 812 813 /* get current value */ 814 val = aggr_port_stat(port, i); 815 /* subtract value at the point of aggregation */ 816 val -= port->lp_stat[i]; 817 /* add to the residual stat */ 818 grp->lg_stat[i] += val; 819 } 820 821 grp->lg_nports--; 822 823 rw_exit(&port->lp_lock); 824 825 aggr_port_delete(port); 826 827 /* 828 * If the group MAC address has changed, update the MAC address of 829 * the remaining consistuent ports according to the new MAC 830 * address of the group. 831 */ 832 if (grp->lg_closing) { 833 *do_notify = B_FALSE; 834 } else { 835 if (grp_mac_addr_changed) 836 aggr_grp_update_ports_mac(grp); 837 838 if (do_notify != NULL) 839 *do_notify = grp_mac_addr_changed; 840 } 841 842 return (0); 843 } 844 845 /* 846 * Remove one or more ports from an existing link aggregation group. 847 */ 848 int 849 aggr_grp_rem_ports(uint32_t key, uint_t nports, laioc_port_t *ports) 850 { 851 int rc = 0, i; 852 aggr_grp_t *grp = NULL; 853 aggr_port_t *port; 854 boolean_t notify = B_FALSE, grp_mac_addr_changed; 855 856 /* get group corresponding to key */ 857 rw_enter(&aggr_grp_lock, RW_READER); 858 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key), 859 (mod_hash_val_t *)&grp) != 0) { 860 rw_exit(&aggr_grp_lock); 861 return (ENOENT); 862 } 863 AGGR_GRP_REFHOLD(grp); 864 rw_exit(&aggr_grp_lock); 865 866 AGGR_LACP_LOCK(grp); 867 rw_enter(&grp->lg_lock, RW_WRITER); 868 869 /* we need to keep at least one port per group */ 870 if (nports >= grp->lg_nports) { 871 rc = EINVAL; 872 goto bail; 873 } 874 875 /* first verify that all the groups are valid */ 876 for (i = 0; i < nports; i++) { 877 if (aggr_grp_port_lookup(grp, ports[i].lp_devname, 878 ports[i].lp_port) == NULL) { 879 /* port not found */ 880 rc = ENOENT; 881 goto bail; 882 } 883 } 884 885 /* remove the specified ports from group */ 886 for (i = 0; i < nports && !grp->lg_closing; i++) { 887 /* lookup port */ 888 port = aggr_grp_port_lookup(grp, ports[i].lp_devname, 889 ports[i].lp_port); 890 ASSERT(port != NULL); 891 892 /* stop port if group has already been started */ 893 if (grp->lg_started) { 894 rw_enter(&port->lp_lock, RW_WRITER); 895 aggr_port_stop(port); 896 rw_exit(&port->lp_lock); 897 } 898 899 /* remove port from group */ 900 rc = aggr_grp_rem_port(grp, port, &grp_mac_addr_changed); 901 ASSERT(rc == 0); 902 notify = notify || grp_mac_addr_changed; 903 } 904 905 bail: 906 rw_exit(&grp->lg_lock); 907 AGGR_LACP_UNLOCK(grp); 908 if (notify && !grp->lg_closing) 909 mac_unicst_update(&grp->lg_mac, grp->lg_addr); 910 if (rc == 0 && !grp->lg_closing) 911 mac_resource_update(&grp->lg_mac); 912 AGGR_GRP_REFRELE(grp); 913 914 return (rc); 915 } 916 917 int 918 aggr_grp_delete(uint32_t key) 919 { 920 aggr_grp_t *grp = NULL; 921 aggr_port_t *port, *cport; 922 mod_hash_val_t val; 923 924 rw_enter(&aggr_grp_lock, RW_WRITER); 925 926 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key), 927 (mod_hash_val_t *)&grp) != 0) { 928 rw_exit(&aggr_grp_lock); 929 return (ENOENT); 930 } 931 932 atomic_add_32(&grp->lg_closing, 1); 933 934 AGGR_LACP_LOCK(grp); 935 rw_enter(&grp->lg_lock, RW_WRITER); 936 937 /* 938 * Unregister from the MAC service module. Since this can 939 * fail if a client hasn't closed the MAC port, we gracefully 940 * fail the operation. 941 */ 942 if (mac_unregister(&grp->lg_mac)) { 943 rw_exit(&grp->lg_lock); 944 AGGR_LACP_UNLOCK(grp); 945 rw_exit(&aggr_grp_lock); 946 return (EBUSY); 947 } 948 949 /* detach and free MAC ports associated with group */ 950 port = grp->lg_ports; 951 while (port != NULL) { 952 cport = port->lp_next; 953 rw_enter(&port->lp_lock, RW_WRITER); 954 if (grp->lg_started) 955 aggr_port_stop(port); 956 (void) aggr_grp_detach_port(grp, port); 957 rw_exit(&port->lp_lock); 958 aggr_port_delete(port); 959 port = cport; 960 } 961 962 rw_exit(&grp->lg_lock); 963 AGGR_LACP_UNLOCK(grp); 964 965 (void) mod_hash_remove(aggr_grp_hash, GRP_HASH_KEY(key), &val); 966 ASSERT(grp == (aggr_grp_t *)val); 967 968 ASSERT(aggr_grp_cnt > 0); 969 aggr_grp_cnt--; 970 971 rw_exit(&aggr_grp_lock); 972 AGGR_GRP_REFRELE(grp); 973 974 return (0); 975 } 976 977 void 978 aggr_grp_free(aggr_grp_t *grp) 979 { 980 ASSERT(grp->lg_refs == 0); 981 kmem_cache_free(aggr_grp_cache, grp); 982 } 983 984 /* 985 * Walker invoked when building the list of configured groups and 986 * their ports that must be passed up to user-space. 987 */ 988 989 /*ARGSUSED*/ 990 static uint_t 991 aggr_grp_info_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 992 { 993 aggr_grp_t *grp; 994 aggr_port_t *port; 995 aggr_grp_info_state_t *state = arg; 996 997 if (state->ls_rc != 0) 998 return (MH_WALK_TERMINATE); /* terminate walk */ 999 1000 grp = (aggr_grp_t *)val; 1001 1002 rw_enter(&grp->lg_lock, RW_READER); 1003 1004 if (state->ls_group_key != 0 && grp->lg_key != state->ls_group_key) 1005 goto bail; 1006 1007 state->ls_group_found = B_TRUE; 1008 1009 state->ls_rc = state->ls_new_grp_fn(state->ls_fn_arg, grp->lg_key, 1010 grp->lg_addr, grp->lg_addr_fixed, grp->lg_tx_policy, 1011 grp->lg_nports, grp->lg_lacp_mode, grp->aggr.PeriodicTimer); 1012 1013 if (state->ls_rc != 0) 1014 goto bail; 1015 1016 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1017 1018 rw_enter(&port->lp_lock, RW_READER); 1019 1020 state->ls_rc = state->ls_new_port_fn(state->ls_fn_arg, 1021 port->lp_devname, port->lp_port, port->lp_addr, 1022 port->lp_state, &port->lp_lacp.ActorOperPortState); 1023 1024 rw_exit(&port->lp_lock); 1025 1026 if (state->ls_rc != 0) 1027 goto bail; 1028 } 1029 1030 bail: 1031 rw_exit(&grp->lg_lock); 1032 return ((state->ls_rc == 0) ? MH_WALK_CONTINUE : MH_WALK_TERMINATE); 1033 } 1034 1035 int 1036 aggr_grp_info(uint_t *ngroups, uint32_t group_key, void *fn_arg, 1037 aggr_grp_info_new_grp_fn_t new_grp_fn, 1038 aggr_grp_info_new_port_fn_t new_port_fn) 1039 { 1040 aggr_grp_info_state_t state; 1041 int rc = 0; 1042 1043 rw_enter(&aggr_grp_lock, RW_READER); 1044 1045 *ngroups = aggr_grp_cnt; 1046 1047 bzero(&state, sizeof (state)); 1048 state.ls_group_key = group_key; 1049 state.ls_new_grp_fn = new_grp_fn; 1050 state.ls_new_port_fn = new_port_fn; 1051 state.ls_fn_arg = fn_arg; 1052 1053 mod_hash_walk(aggr_grp_hash, aggr_grp_info_walker, &state); 1054 1055 if ((rc = state.ls_rc) == 0 && group_key != 0 && 1056 !state.ls_group_found) 1057 rc = ENOENT; 1058 1059 rw_exit(&aggr_grp_lock); 1060 return (rc); 1061 } 1062 1063 static void 1064 aggr_m_resources(void *arg) 1065 { 1066 aggr_grp_t *grp = arg; 1067 aggr_port_t *port; 1068 1069 /* Call each port's m_resources function */ 1070 for (port = grp->lg_ports; port != NULL; port = port->lp_next) 1071 mac_resources(port->lp_mh); 1072 } 1073 1074 /*ARGSUSED*/ 1075 static void 1076 aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp) 1077 { 1078 miocnak(q, mp, 0, ENOTSUP); 1079 } 1080 1081 static uint64_t 1082 aggr_m_stat(void *arg, enum mac_stat stat) 1083 { 1084 aggr_grp_t *grp = arg; 1085 aggr_port_t *port; 1086 uint64_t val; 1087 1088 rw_enter(&grp->lg_lock, RW_READER); 1089 1090 switch (stat) { 1091 case MAC_STAT_IFSPEED: 1092 val = grp->lg_ifspeed; 1093 break; 1094 case MAC_STAT_LINK_DUPLEX: 1095 val = grp->lg_link_duplex; 1096 break; 1097 default: 1098 /* 1099 * The remaining statistics are counters. They are computed 1100 * by aggregating the counters of the members MACs while they 1101 * were aggregated, plus the residual counter of the group 1102 * itself, which is updated each time a MAC is removed from 1103 * the group. 1104 */ 1105 val = 0; 1106 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1107 /* actual port statistic */ 1108 val += aggr_port_stat(port, stat); 1109 /* minus the port stat when it was added */ 1110 val -= port->lp_stat[stat]; 1111 /* plus any residual amount for the group */ 1112 val += grp->lg_stat[stat]; 1113 } 1114 } 1115 1116 rw_exit(&grp->lg_lock); 1117 return (val); 1118 } 1119 1120 static int 1121 aggr_m_start(void *arg) 1122 { 1123 aggr_grp_t *grp = arg; 1124 aggr_port_t *port; 1125 1126 AGGR_LACP_LOCK(grp); 1127 rw_enter(&grp->lg_lock, RW_WRITER); 1128 1129 /* 1130 * Attempts to start all configured members of the group. 1131 * Group members will be attached when their link-up notification 1132 * is received. 1133 */ 1134 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1135 rw_enter(&port->lp_lock, RW_WRITER); 1136 if (aggr_port_start(port) != 0) { 1137 rw_exit(&port->lp_lock); 1138 continue; 1139 } 1140 1141 /* set port promiscuous mode */ 1142 if (aggr_port_promisc(port, grp->lg_promisc) != 0) 1143 aggr_port_stop(port); 1144 rw_exit(&port->lp_lock); 1145 } 1146 1147 grp->lg_started = B_TRUE; 1148 1149 rw_exit(&grp->lg_lock); 1150 AGGR_LACP_UNLOCK(grp); 1151 1152 return (0); 1153 } 1154 1155 static void 1156 aggr_m_stop(void *arg) 1157 { 1158 aggr_grp_t *grp = arg; 1159 aggr_port_t *port; 1160 1161 rw_enter(&grp->lg_lock, RW_WRITER); 1162 1163 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1164 rw_enter(&port->lp_lock, RW_WRITER); 1165 aggr_port_stop(port); 1166 rw_exit(&port->lp_lock); 1167 } 1168 1169 grp->lg_started = B_FALSE; 1170 1171 rw_exit(&grp->lg_lock); 1172 } 1173 1174 static int 1175 aggr_m_promisc(void *arg, boolean_t on) 1176 { 1177 aggr_grp_t *grp = arg; 1178 aggr_port_t *port; 1179 1180 AGGR_LACP_LOCK(grp); 1181 rw_enter(&grp->lg_lock, RW_WRITER); 1182 AGGR_GRP_REFHOLD(grp); 1183 1184 if (on == grp->lg_promisc) 1185 goto bail; 1186 1187 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1188 rw_enter(&port->lp_lock, RW_WRITER); 1189 AGGR_PORT_REFHOLD(port); 1190 if (port->lp_started) { 1191 if (aggr_port_promisc(port, on) != 0) 1192 (void) aggr_grp_detach_port(grp, port); 1193 } 1194 rw_exit(&port->lp_lock); 1195 AGGR_PORT_REFRELE(port); 1196 if (grp->lg_closing) 1197 break; 1198 } 1199 1200 grp->lg_promisc = on; 1201 1202 bail: 1203 rw_exit(&grp->lg_lock); 1204 AGGR_LACP_UNLOCK(grp); 1205 AGGR_GRP_REFRELE(grp); 1206 1207 return (0); 1208 } 1209 1210 /* 1211 * Add or remove the multicast addresses that are defined for the group 1212 * to or from the specified port. 1213 * This function is called before stopping a port, before a port 1214 * is detached from a group, and when attaching a port to a group. 1215 */ 1216 void 1217 aggr_grp_multicst_port(aggr_port_t *port, boolean_t add) 1218 { 1219 aggr_grp_t *grp = port->lp_grp; 1220 1221 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 1222 ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock)); 1223 1224 if (!port->lp_started) 1225 return; 1226 1227 mac_multicst_refresh(&grp->lg_mac, aggr_port_multicst, port, 1228 add); 1229 } 1230 1231 static int 1232 aggr_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) 1233 { 1234 aggr_grp_t *grp = arg; 1235 aggr_port_t *port = NULL; 1236 int err = 0, cerr; 1237 1238 rw_enter(&grp->lg_lock, RW_WRITER); 1239 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1240 if (port->lp_state != AGGR_PORT_STATE_ATTACHED) 1241 continue; 1242 cerr = aggr_port_multicst(port, add, addrp); 1243 if (cerr != 0 && err == 0) 1244 err = cerr; 1245 } 1246 rw_exit(&grp->lg_lock); 1247 return (err); 1248 } 1249 1250 static int 1251 aggr_m_unicst(void *arg, const uint8_t *macaddr) 1252 { 1253 aggr_grp_t *grp = arg; 1254 int rc; 1255 1256 AGGR_LACP_LOCK(grp); 1257 rw_enter(&grp->lg_lock, RW_WRITER); 1258 rc = aggr_grp_modify(0, grp, AGGR_MODIFY_MAC, 0, B_TRUE, macaddr, 1259 0, 0); 1260 rw_exit(&grp->lg_lock); 1261 AGGR_LACP_UNLOCK(grp); 1262 1263 return (rc); 1264 } 1265 1266 /* 1267 * Initialize the capabilities that are advertised for the group 1268 * according to the capabilities of the constituent ports. 1269 */ 1270 static void 1271 aggr_grp_capab_set(aggr_grp_t *grp) 1272 { 1273 uint32_t cksum = (uint32_t)-1; 1274 uint32_t poll = DL_CAPAB_POLL; 1275 aggr_port_t *port; 1276 const mac_info_t *port_mi; 1277 1278 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 1279 1280 ASSERT(grp->lg_ports != NULL); 1281 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1282 port_mi = mac_info(port->lp_mh); 1283 cksum &= port_mi->mi_cksum; 1284 poll &= port_mi->mi_poll; 1285 } 1286 1287 grp->lg_mac.m_info.mi_cksum = cksum; 1288 grp->lg_mac.m_info.mi_poll = poll; 1289 } 1290 1291 /* 1292 * Checks whether the capabilities of the ports being added are compatible 1293 * with the current capabilities of the aggregation. 1294 */ 1295 static boolean_t 1296 aggr_grp_capab_check(aggr_grp_t *grp, aggr_port_t *port) 1297 { 1298 const mac_info_t *port_mi = mac_info(port->lp_mh); 1299 uint32_t grp_cksum = grp->lg_mac.m_info.mi_cksum; 1300 1301 ASSERT(grp->lg_ports != NULL); 1302 1303 return (((grp_cksum & port_mi->mi_cksum) == grp_cksum) && 1304 (grp->lg_mac.m_info.mi_poll == port_mi->mi_poll)); 1305 } 1306