1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups. 28 * 29 * An instance of the structure aggr_grp_t is allocated for each 30 * link aggregation group. When created, aggr_grp_t objects are 31 * entered into the aggr_grp_hash hash table maintained by the modhash 32 * module. The hash key is the linkid associated with the link 33 * aggregation group. 34 * 35 * A set of MAC ports are associated with each association group. 36 */ 37 38 #include <sys/types.h> 39 #include <sys/sysmacros.h> 40 #include <sys/conf.h> 41 #include <sys/cmn_err.h> 42 #include <sys/disp.h> 43 #include <sys/list.h> 44 #include <sys/ksynch.h> 45 #include <sys/kmem.h> 46 #include <sys/stream.h> 47 #include <sys/modctl.h> 48 #include <sys/ddi.h> 49 #include <sys/sunddi.h> 50 #include <sys/atomic.h> 51 #include <sys/stat.h> 52 #include <sys/modhash.h> 53 #include <sys/id_space.h> 54 #include <sys/strsun.h> 55 #include <sys/dlpi.h> 56 #include <sys/mac_provider.h> 57 #include <sys/dls.h> 58 #include <sys/vlan.h> 59 #include <sys/aggr.h> 60 #include <sys/aggr_impl.h> 61 62 static int aggr_m_start(void *); 63 static void aggr_m_stop(void *); 64 static int aggr_m_promisc(void *, boolean_t); 65 static int aggr_m_multicst(void *, boolean_t, const uint8_t *); 66 static int aggr_m_unicst(void *, const uint8_t *); 67 static int aggr_m_stat(void *, uint_t, uint64_t *); 68 static void aggr_m_ioctl(void *, queue_t *, mblk_t *); 69 static boolean_t aggr_m_capab_get(void *, mac_capab_t, void *); 70 static int aggr_m_setprop(void *, const char *, mac_prop_id_t, uint_t, 71 const void *); 72 static int aggr_m_getprop(void *, const char *, mac_prop_id_t, uint_t, 73 uint_t, void *, uint_t *); 74 75 76 static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, datalink_id_t); 77 static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *, 78 boolean_t *); 79 80 static void aggr_grp_capab_set(aggr_grp_t *); 81 static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *); 82 static uint_t aggr_grp_max_sdu(aggr_grp_t *); 83 static uint32_t aggr_grp_max_margin(aggr_grp_t *); 84 static boolean_t aggr_grp_sdu_check(aggr_grp_t *, aggr_port_t *); 85 static boolean_t aggr_grp_margin_check(aggr_grp_t *, aggr_port_t *); 86 87 static int aggr_add_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *); 88 static void aggr_rem_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *); 89 static int aggr_pseudo_disable_intr(mac_intr_handle_t); 90 static int aggr_pseudo_enable_intr(mac_intr_handle_t); 91 static int aggr_pseudo_start_ring(mac_ring_driver_t, uint64_t); 92 static void aggr_pseudo_stop_ring(mac_ring_driver_t); 93 static int aggr_addmac(void *, const uint8_t *); 94 static int aggr_remmac(void *, const uint8_t *); 95 static mblk_t *aggr_rx_poll(void *, int); 96 static void aggr_fill_ring(void *, mac_ring_type_t, const int, 97 const int, mac_ring_info_t *, mac_ring_handle_t); 98 static void aggr_fill_group(void *, mac_ring_type_t, const int, 99 mac_group_info_t *, mac_group_handle_t); 100 101 static kmem_cache_t *aggr_grp_cache; 102 static mod_hash_t *aggr_grp_hash; 103 static krwlock_t aggr_grp_lock; 104 static uint_t aggr_grp_cnt; 105 static id_space_t *key_ids; 106 107 #define GRP_HASHSZ 64 108 #define GRP_HASH_KEY(linkid) ((mod_hash_key_t)(uintptr_t)linkid) 109 #define AGGR_PORT_NAME_DELIMIT '-' 110 111 static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0}; 112 113 #define AGGR_M_CALLBACK_FLAGS \ 114 (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) 115 116 static mac_callbacks_t aggr_m_callbacks = { 117 AGGR_M_CALLBACK_FLAGS, 118 aggr_m_stat, 119 aggr_m_start, 120 aggr_m_stop, 121 aggr_m_promisc, 122 aggr_m_multicst, 123 NULL, 124 aggr_m_tx, 125 aggr_m_ioctl, 126 aggr_m_capab_get, 127 NULL, 128 NULL, 129 aggr_m_setprop, 130 aggr_m_getprop 131 }; 132 133 /*ARGSUSED*/ 134 static int 135 aggr_grp_constructor(void *buf, void *arg, int kmflag) 136 { 137 aggr_grp_t *grp = buf; 138 139 bzero(grp, sizeof (*grp)); 140 mutex_init(&grp->lg_lacp_lock, NULL, MUTEX_DEFAULT, NULL); 141 cv_init(&grp->lg_lacp_cv, NULL, CV_DEFAULT, NULL); 142 rw_init(&grp->lg_tx_lock, NULL, RW_DRIVER, NULL); 143 mutex_init(&grp->lg_port_lock, NULL, MUTEX_DEFAULT, NULL); 144 cv_init(&grp->lg_port_cv, NULL, CV_DEFAULT, NULL); 145 grp->lg_link_state = LINK_STATE_UNKNOWN; 146 return (0); 147 } 148 149 /*ARGSUSED*/ 150 static void 151 aggr_grp_destructor(void *buf, void *arg) 152 { 153 aggr_grp_t *grp = buf; 154 155 if (grp->lg_tx_ports != NULL) { 156 kmem_free(grp->lg_tx_ports, 157 grp->lg_tx_ports_size * sizeof (aggr_port_t *)); 158 } 159 160 mutex_destroy(&grp->lg_lacp_lock); 161 cv_destroy(&grp->lg_lacp_cv); 162 mutex_destroy(&grp->lg_port_lock); 163 cv_destroy(&grp->lg_port_cv); 164 rw_destroy(&grp->lg_tx_lock); 165 } 166 167 void 168 aggr_grp_init(void) 169 { 170 aggr_grp_cache = kmem_cache_create("aggr_grp_cache", 171 sizeof (aggr_grp_t), 0, aggr_grp_constructor, 172 aggr_grp_destructor, NULL, NULL, NULL, 0); 173 174 aggr_grp_hash = mod_hash_create_idhash("aggr_grp_hash", 175 GRP_HASHSZ, mod_hash_null_valdtor); 176 rw_init(&aggr_grp_lock, NULL, RW_DEFAULT, NULL); 177 aggr_grp_cnt = 0; 178 179 /* 180 * Allocate an id space to manage key values (when key is not 181 * specified). The range of the id space will be from 182 * (AGGR_MAX_KEY + 1) to UINT16_MAX, because the LACP protocol 183 * uses a 16-bit key. 184 */ 185 key_ids = id_space_create("aggr_key_ids", AGGR_MAX_KEY + 1, UINT16_MAX); 186 ASSERT(key_ids != NULL); 187 } 188 189 void 190 aggr_grp_fini(void) 191 { 192 id_space_destroy(key_ids); 193 rw_destroy(&aggr_grp_lock); 194 mod_hash_destroy_idhash(aggr_grp_hash); 195 kmem_cache_destroy(aggr_grp_cache); 196 } 197 198 uint_t 199 aggr_grp_count(void) 200 { 201 uint_t count; 202 203 rw_enter(&aggr_grp_lock, RW_READER); 204 count = aggr_grp_cnt; 205 rw_exit(&aggr_grp_lock); 206 return (count); 207 } 208 209 /* 210 * Since both aggr_port_notify_cb() and aggr_port_timer_thread() functions 211 * requires the mac perimeter, this function holds a reference of the aggr 212 * and aggr won't call mac_unregister() until this reference drops to 0. 213 */ 214 void 215 aggr_grp_port_hold(aggr_port_t *port) 216 { 217 aggr_grp_t *grp = port->lp_grp; 218 219 AGGR_PORT_REFHOLD(port); 220 mutex_enter(&grp->lg_port_lock); 221 grp->lg_port_ref++; 222 mutex_exit(&grp->lg_port_lock); 223 } 224 225 /* 226 * Release the reference of the grp and inform aggr_grp_delete() calling 227 * mac_unregister() is now safe. 228 */ 229 void 230 aggr_grp_port_rele(aggr_port_t *port) 231 { 232 aggr_grp_t *grp = port->lp_grp; 233 234 mutex_enter(&grp->lg_port_lock); 235 if (--grp->lg_port_ref == 0) 236 cv_signal(&grp->lg_port_cv); 237 mutex_exit(&grp->lg_port_lock); 238 AGGR_PORT_REFRELE(port); 239 } 240 241 /* 242 * Wait for the port's lacp timer thread and the port's notification callback 243 * to exit. 244 */ 245 void 246 aggr_grp_port_wait(aggr_grp_t *grp) 247 { 248 mutex_enter(&grp->lg_port_lock); 249 if (grp->lg_port_ref != 0) 250 cv_wait(&grp->lg_port_cv, &grp->lg_port_lock); 251 mutex_exit(&grp->lg_port_lock); 252 } 253 254 /* 255 * Attach a port to a link aggregation group. 256 * 257 * A port is attached to a link aggregation group once its speed 258 * and link state have been verified. 259 * 260 * Returns B_TRUE if the group link state or speed has changed. If 261 * it's the case, the caller must notify the MAC layer via a call 262 * to mac_link(). 263 */ 264 boolean_t 265 aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port) 266 { 267 boolean_t link_state_changed = B_FALSE; 268 269 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 270 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 271 272 if (port->lp_state == AGGR_PORT_STATE_ATTACHED) 273 return (B_FALSE); 274 275 /* 276 * Validate the MAC port link speed and update the group 277 * link speed if needed. 278 */ 279 if (port->lp_ifspeed == 0 || 280 port->lp_link_state != LINK_STATE_UP || 281 port->lp_link_duplex != LINK_DUPLEX_FULL) { 282 /* 283 * Can't attach a MAC port with unknown link speed, 284 * down link, or not in full duplex mode. 285 */ 286 return (B_FALSE); 287 } 288 289 if (grp->lg_ifspeed == 0) { 290 /* 291 * The group inherits the speed of the first link being 292 * attached. 293 */ 294 grp->lg_ifspeed = port->lp_ifspeed; 295 link_state_changed = B_TRUE; 296 } else if (grp->lg_ifspeed != port->lp_ifspeed) { 297 /* 298 * The link speed of the MAC port must be the same as 299 * the group link speed, as per 802.3ad. Since it is 300 * not, the attach is cancelled. 301 */ 302 return (B_FALSE); 303 } 304 305 grp->lg_nattached_ports++; 306 307 /* 308 * Update the group link state. 309 */ 310 if (grp->lg_link_state != LINK_STATE_UP) { 311 grp->lg_link_state = LINK_STATE_UP; 312 grp->lg_link_duplex = LINK_DUPLEX_FULL; 313 link_state_changed = B_TRUE; 314 } 315 316 /* 317 * Update port's state. 318 */ 319 port->lp_state = AGGR_PORT_STATE_ATTACHED; 320 321 aggr_grp_multicst_port(port, B_TRUE); 322 323 /* 324 * Set port's receive callback 325 */ 326 mac_rx_set(port->lp_mch, aggr_recv_cb, port); 327 328 /* 329 * If LACP is OFF, the port can be used to send data as soon 330 * as its link is up and verified to be compatible with the 331 * aggregation. 332 * 333 * If LACP is active or passive, notify the LACP subsystem, which 334 * will enable sending on the port following the LACP protocol. 335 */ 336 if (grp->lg_lacp_mode == AGGR_LACP_OFF) 337 aggr_send_port_enable(port); 338 else 339 aggr_lacp_port_attached(port); 340 341 return (link_state_changed); 342 } 343 344 boolean_t 345 aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port) 346 { 347 boolean_t link_state_changed = B_FALSE; 348 349 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 350 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 351 352 /* update state */ 353 if (port->lp_state != AGGR_PORT_STATE_ATTACHED) 354 return (B_FALSE); 355 356 mac_rx_clear(port->lp_mch); 357 358 aggr_grp_multicst_port(port, B_FALSE); 359 360 if (grp->lg_lacp_mode == AGGR_LACP_OFF) 361 aggr_send_port_disable(port); 362 else 363 aggr_lacp_port_detached(port); 364 365 port->lp_state = AGGR_PORT_STATE_STANDBY; 366 367 grp->lg_nattached_ports--; 368 if (grp->lg_nattached_ports == 0) { 369 /* the last attached MAC port of the group is being detached */ 370 grp->lg_ifspeed = 0; 371 grp->lg_link_state = LINK_STATE_DOWN; 372 grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN; 373 link_state_changed = B_TRUE; 374 } 375 376 return (link_state_changed); 377 } 378 379 /* 380 * Update the MAC addresses of the constituent ports of the specified 381 * group. This function is invoked: 382 * - after creating a new aggregation group. 383 * - after adding new ports to an aggregation group. 384 * - after removing a port from a group when the MAC address of 385 * that port was used for the MAC address of the group. 386 * - after the MAC address of a port changed when the MAC address 387 * of that port was used for the MAC address of the group. 388 * 389 * Return true if the link state of the aggregation changed, for example 390 * as a result of a failure changing the MAC address of one of the 391 * constituent ports. 392 */ 393 boolean_t 394 aggr_grp_update_ports_mac(aggr_grp_t *grp) 395 { 396 aggr_port_t *cport; 397 boolean_t link_state_changed = B_FALSE; 398 mac_perim_handle_t mph; 399 400 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 401 402 for (cport = grp->lg_ports; cport != NULL; 403 cport = cport->lp_next) { 404 mac_perim_enter_by_mh(cport->lp_mh, &mph); 405 if (aggr_port_unicst(cport) != 0) { 406 if (aggr_grp_detach_port(grp, cport)) 407 link_state_changed = B_TRUE; 408 } else { 409 /* 410 * If a port was detached because of a previous 411 * failure changing the MAC address, the port is 412 * reattached when it successfully changes the MAC 413 * address now, and this might cause the link state 414 * of the aggregation to change. 415 */ 416 if (aggr_grp_attach_port(grp, cport)) 417 link_state_changed = B_TRUE; 418 } 419 mac_perim_exit(mph); 420 } 421 return (link_state_changed); 422 } 423 424 /* 425 * Invoked when the MAC address of a port has changed. If the port's 426 * MAC address was used for the group MAC address, set mac_addr_changedp 427 * to B_TRUE to indicate to the caller that it should send a MAC_NOTE_UNICST 428 * notification. If the link state changes due to detach/attach of 429 * the constituent port, set link_state_changedp to B_TRUE to indicate 430 * to the caller that it should send a MAC_NOTE_LINK notification. In both 431 * cases, it is the responsibility of the caller to invoke notification 432 * functions after releasing the the port lock. 433 */ 434 void 435 aggr_grp_port_mac_changed(aggr_grp_t *grp, aggr_port_t *port, 436 boolean_t *mac_addr_changedp, boolean_t *link_state_changedp) 437 { 438 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 439 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 440 ASSERT(mac_addr_changedp != NULL); 441 ASSERT(link_state_changedp != NULL); 442 443 *mac_addr_changedp = B_FALSE; 444 *link_state_changedp = B_FALSE; 445 446 if (grp->lg_addr_fixed) { 447 /* 448 * The group is using a fixed MAC address or an automatic 449 * MAC address has not been set. 450 */ 451 return; 452 } 453 454 if (grp->lg_mac_addr_port == port) { 455 /* 456 * The MAC address of the port was assigned to the group 457 * MAC address. Update the group MAC address. 458 */ 459 bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL); 460 *mac_addr_changedp = B_TRUE; 461 } else { 462 /* 463 * Update the actual port MAC address to the MAC address 464 * of the group. 465 */ 466 if (aggr_port_unicst(port) != 0) { 467 *link_state_changedp = aggr_grp_detach_port(grp, port); 468 } else { 469 /* 470 * If a port was detached because of a previous 471 * failure changing the MAC address, the port is 472 * reattached when it successfully changes the MAC 473 * address now, and this might cause the link state 474 * of the aggregation to change. 475 */ 476 *link_state_changedp = aggr_grp_attach_port(grp, port); 477 } 478 } 479 } 480 481 /* 482 * Add a port to a link aggregation group. 483 */ 484 static int 485 aggr_grp_add_port(aggr_grp_t *grp, datalink_id_t port_linkid, boolean_t force, 486 aggr_port_t **pp) 487 { 488 aggr_port_t *port, **cport; 489 mac_perim_handle_t mph; 490 int err; 491 492 /* 493 * lg_mh could be NULL when the function is called during the creation 494 * of the aggregation. 495 */ 496 ASSERT(grp->lg_mh == NULL || MAC_PERIM_HELD(grp->lg_mh)); 497 498 /* create new port */ 499 err = aggr_port_create(grp, port_linkid, force, &port); 500 if (err != 0) 501 return (err); 502 503 mac_perim_enter_by_mh(port->lp_mh, &mph); 504 505 /* add port to list of group constituent ports */ 506 cport = &grp->lg_ports; 507 while (*cport != NULL) 508 cport = &((*cport)->lp_next); 509 *cport = port; 510 511 /* 512 * Back reference to the group it is member of. A port always 513 * holds a reference to its group to ensure that the back 514 * reference is always valid. 515 */ 516 port->lp_grp = grp; 517 AGGR_GRP_REFHOLD(grp); 518 grp->lg_nports++; 519 520 aggr_lacp_init_port(port); 521 mac_perim_exit(mph); 522 523 if (pp != NULL) 524 *pp = port; 525 526 return (0); 527 } 528 529 /* 530 * Add a pseudo Rx ring for the given HW ring handle. 531 */ 532 static int 533 aggr_add_pseudo_rx_ring(aggr_port_t *port, 534 aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh) 535 { 536 aggr_pseudo_rx_ring_t *ring; 537 int err; 538 int j; 539 540 for (j = 0; j < MAX_RINGS_PER_GROUP; j++) { 541 ring = rx_grp->arg_rings + j; 542 if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE)) 543 break; 544 } 545 546 /* 547 * No slot for this new Rx ring. 548 */ 549 if (j == MAX_RINGS_PER_GROUP) 550 return (EIO); 551 552 ring->arr_flags |= MAC_PSEUDO_RING_INUSE; 553 ring->arr_hw_rh = hw_rh; 554 ring->arr_port = port; 555 rx_grp->arg_ring_cnt++; 556 557 /* 558 * The group is already registered, dynamically add a new ring to the 559 * mac group. 560 */ 561 mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring); 562 if ((err = mac_group_add_ring(rx_grp->arg_gh, j)) != 0) { 563 ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE; 564 ring->arr_hw_rh = NULL; 565 ring->arr_port = NULL; 566 rx_grp->arg_ring_cnt--; 567 mac_hwring_teardown(hw_rh); 568 } 569 return (err); 570 } 571 572 /* 573 * Remove the pseudo Rx ring of the given HW ring handle. 574 */ 575 static void 576 aggr_rem_pseudo_rx_ring(aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh) 577 { 578 aggr_pseudo_rx_ring_t *ring; 579 int j; 580 581 for (j = 0; j < MAX_RINGS_PER_GROUP; j++) { 582 ring = rx_grp->arg_rings + j; 583 if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE) || 584 ring->arr_hw_rh != hw_rh) { 585 continue; 586 } 587 588 mac_group_rem_ring(rx_grp->arg_gh, ring->arr_rh); 589 590 ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE; 591 ring->arr_hw_rh = NULL; 592 ring->arr_port = NULL; 593 rx_grp->arg_ring_cnt--; 594 mac_hwring_teardown(hw_rh); 595 break; 596 } 597 } 598 599 /* 600 * This function is called to create pseudo rings over the hardware rings of 601 * the underlying device. Note that there is a 1:1 mapping between the pseudo 602 * RX rings of the aggr and the hardware rings of the underlying port. 603 */ 604 static int 605 aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) 606 { 607 aggr_grp_t *grp = port->lp_grp; 608 mac_ring_handle_t hw_rh[MAX_RINGS_PER_GROUP]; 609 aggr_unicst_addr_t *addr, *a; 610 mac_perim_handle_t pmph; 611 int hw_rh_cnt, i = 0, j; 612 int err = 0; 613 614 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 615 mac_perim_enter_by_mh(port->lp_mh, &pmph); 616 617 /* 618 * This function must be called after the aggr registers its mac 619 * and its RX group has been initialized. 620 */ 621 ASSERT(rx_grp->arg_gh != NULL); 622 623 /* 624 * Get the list the the underlying HW rings. 625 */ 626 hw_rh_cnt = mac_hwrings_get(port->lp_mch, &port->lp_hwgh, hw_rh); 627 628 if (port->lp_hwgh != NULL) { 629 /* 630 * Quiesce the HW ring and the mac srs on the ring. Note 631 * that the HW ring will be restarted when the pseudo ring 632 * is started. At that time all the packets will be 633 * directly passed up to the pseudo RX ring and handled 634 * by mac srs created over the pseudo RX ring. 635 */ 636 mac_rx_client_quiesce(port->lp_mch); 637 mac_srs_perm_quiesce(port->lp_mch, B_TRUE); 638 } 639 640 /* 641 * Add all the unicast addresses to the newly added port. 642 */ 643 for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next) { 644 if ((err = aggr_port_addmac(port, addr->aua_addr)) != 0) 645 break; 646 } 647 648 for (i = 0; err == 0 && i < hw_rh_cnt; i++) 649 err = aggr_add_pseudo_rx_ring(port, rx_grp, hw_rh[i]); 650 651 if (err != 0) { 652 for (j = 0; j < i; j++) 653 aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[j]); 654 655 for (a = rx_grp->arg_macaddr; a != addr; a = a->aua_next) 656 aggr_port_remmac(port, a->aua_addr); 657 658 if (port->lp_hwgh != NULL) { 659 mac_srs_perm_quiesce(port->lp_mch, B_FALSE); 660 mac_rx_client_restart(port->lp_mch); 661 port->lp_hwgh = NULL; 662 } 663 } else { 664 port->lp_grp_added = B_TRUE; 665 } 666 done: 667 mac_perim_exit(pmph); 668 return (err); 669 } 670 671 /* 672 * This function is called by aggr to remove pseudo RX rings over the 673 * HW rings of the underlying port. 674 */ 675 static void 676 aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) 677 { 678 aggr_grp_t *grp = port->lp_grp; 679 mac_ring_handle_t hw_rh[MAX_RINGS_PER_GROUP]; 680 aggr_unicst_addr_t *addr; 681 mac_group_handle_t hwgh; 682 mac_perim_handle_t pmph; 683 int hw_rh_cnt, i; 684 685 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 686 mac_perim_enter_by_mh(port->lp_mh, &pmph); 687 688 if (!port->lp_grp_added) 689 goto done; 690 691 ASSERT(rx_grp->arg_gh != NULL); 692 hw_rh_cnt = mac_hwrings_get(port->lp_mch, &hwgh, hw_rh); 693 694 /* 695 * If hw_rh_cnt is 0, it means that the underlying port does not 696 * support RX rings. Directly return in this case. 697 */ 698 for (i = 0; i < hw_rh_cnt; i++) 699 aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[i]); 700 701 for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next) 702 aggr_port_remmac(port, addr->aua_addr); 703 704 if (port->lp_hwgh != NULL) { 705 port->lp_hwgh = NULL; 706 707 /* 708 * First clear the permanent-quiesced flag of the RX srs then 709 * restart the HW ring and the mac srs on the ring. Note that 710 * the HW ring and associated SRS will soon been removed when 711 * the port is removed from the aggr. 712 */ 713 mac_srs_perm_quiesce(port->lp_mch, B_FALSE); 714 mac_rx_client_restart(port->lp_mch); 715 } 716 717 port->lp_grp_added = B_FALSE; 718 done: 719 mac_perim_exit(pmph); 720 } 721 722 static int 723 aggr_pseudo_disable_intr(mac_intr_handle_t ih) 724 { 725 aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih; 726 return (mac_hwring_disable_intr(rr_ring->arr_hw_rh)); 727 } 728 729 static int 730 aggr_pseudo_enable_intr(mac_intr_handle_t ih) 731 { 732 aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih; 733 return (mac_hwring_enable_intr(rr_ring->arr_hw_rh)); 734 } 735 736 static int 737 aggr_pseudo_start_ring(mac_ring_driver_t arg, uint64_t mr_gen) 738 { 739 aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)arg; 740 int err; 741 742 err = mac_hwring_start(rr_ring->arr_hw_rh); 743 if (err == 0) 744 rr_ring->arr_gen = mr_gen; 745 return (err); 746 } 747 748 static void 749 aggr_pseudo_stop_ring(mac_ring_driver_t arg) 750 { 751 aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)arg; 752 mac_hwring_stop(rr_ring->arr_hw_rh); 753 } 754 755 /* 756 * Add one or more ports to an existing link aggregation group. 757 */ 758 int 759 aggr_grp_add_ports(datalink_id_t linkid, uint_t nports, boolean_t force, 760 laioc_port_t *ports) 761 { 762 int rc, i, nadded = 0; 763 aggr_grp_t *grp = NULL; 764 aggr_port_t *port; 765 boolean_t link_state_changed = B_FALSE; 766 mac_perim_handle_t mph, pmph; 767 768 /* get group corresponding to linkid */ 769 rw_enter(&aggr_grp_lock, RW_READER); 770 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), 771 (mod_hash_val_t *)&grp) != 0) { 772 rw_exit(&aggr_grp_lock); 773 return (ENOENT); 774 } 775 AGGR_GRP_REFHOLD(grp); 776 777 /* 778 * Hold the perimeter so that the aggregation won't be destroyed. 779 */ 780 mac_perim_enter_by_mh(grp->lg_mh, &mph); 781 rw_exit(&aggr_grp_lock); 782 783 /* add the specified ports to group */ 784 for (i = 0; i < nports; i++) { 785 /* add port to group */ 786 if ((rc = aggr_grp_add_port(grp, ports[i].lp_linkid, 787 force, &port)) != 0) { 788 goto bail; 789 } 790 ASSERT(port != NULL); 791 nadded++; 792 793 /* check capabilities */ 794 if (!aggr_grp_capab_check(grp, port) || 795 !aggr_grp_sdu_check(grp, port) || 796 !aggr_grp_margin_check(grp, port)) { 797 rc = ENOTSUP; 798 goto bail; 799 } 800 801 /* 802 * Create the pseudo ring for each HW ring of the underlying 803 * port. 804 */ 805 rc = aggr_add_pseudo_rx_group(port, &grp->lg_rx_group); 806 if (rc != 0) 807 goto bail; 808 809 mac_perim_enter_by_mh(port->lp_mh, &pmph); 810 811 /* set LACP mode */ 812 aggr_port_lacp_set_mode(grp, port); 813 814 /* start port if group has already been started */ 815 if (grp->lg_started) { 816 rc = aggr_port_start(port); 817 if (rc != 0) { 818 mac_perim_exit(pmph); 819 goto bail; 820 } 821 822 /* 823 * Turn on the promiscuous mode over the port when it 824 * is requested to be turned on to receive the 825 * non-primary address over a port, or the promiscous 826 * mode is enabled over the aggr. 827 */ 828 if (grp->lg_promisc || port->lp_prom_addr != NULL) { 829 rc = aggr_port_promisc(port, B_TRUE); 830 if (rc != 0) { 831 mac_perim_exit(pmph); 832 goto bail; 833 } 834 } 835 } 836 mac_perim_exit(pmph); 837 838 /* 839 * Attach each port if necessary. 840 */ 841 if (aggr_port_notify_link(grp, port)) 842 link_state_changed = B_TRUE; 843 844 /* 845 * Initialize the callback functions for this port. 846 */ 847 aggr_port_init_callbacks(port); 848 } 849 850 /* update the MAC address of the constituent ports */ 851 if (aggr_grp_update_ports_mac(grp)) 852 link_state_changed = B_TRUE; 853 854 if (link_state_changed) 855 mac_link_update(grp->lg_mh, grp->lg_link_state); 856 857 bail: 858 if (rc != 0) { 859 /* stop and remove ports that have been added */ 860 for (i = 0; i < nadded; i++) { 861 port = aggr_grp_port_lookup(grp, ports[i].lp_linkid); 862 ASSERT(port != NULL); 863 if (grp->lg_started) { 864 mac_perim_enter_by_mh(port->lp_mh, &pmph); 865 (void) aggr_port_promisc(port, B_FALSE); 866 aggr_port_stop(port); 867 mac_perim_exit(pmph); 868 } 869 aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group); 870 (void) aggr_grp_rem_port(grp, port, NULL, NULL); 871 } 872 } 873 874 if (rc == 0) 875 mac_resource_update(grp->lg_mh); 876 mac_perim_exit(mph); 877 AGGR_GRP_REFRELE(grp); 878 return (rc); 879 } 880 881 static int 882 aggr_grp_modify_common(aggr_grp_t *grp, uint8_t update_mask, uint32_t policy, 883 boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, 884 aggr_lacp_timer_t lacp_timer) 885 { 886 boolean_t mac_addr_changed = B_FALSE; 887 boolean_t link_state_changed = B_FALSE; 888 mac_perim_handle_t pmph; 889 890 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 891 892 /* validate fixed address if specified */ 893 if ((update_mask & AGGR_MODIFY_MAC) && mac_fixed && 894 ((bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) || 895 (mac_addr[0] & 0x01))) { 896 return (EINVAL); 897 } 898 899 /* update policy if requested */ 900 if (update_mask & AGGR_MODIFY_POLICY) 901 aggr_send_update_policy(grp, policy); 902 903 /* update unicast MAC address if requested */ 904 if (update_mask & AGGR_MODIFY_MAC) { 905 if (mac_fixed) { 906 /* user-supplied MAC address */ 907 grp->lg_mac_addr_port = NULL; 908 if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) != 0) { 909 bcopy(mac_addr, grp->lg_addr, ETHERADDRL); 910 mac_addr_changed = B_TRUE; 911 } 912 } else if (grp->lg_addr_fixed) { 913 /* switch from user-supplied to automatic */ 914 aggr_port_t *port = grp->lg_ports; 915 916 mac_perim_enter_by_mh(port->lp_mh, &pmph); 917 bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL); 918 grp->lg_mac_addr_port = port; 919 mac_addr_changed = B_TRUE; 920 mac_perim_exit(pmph); 921 } 922 grp->lg_addr_fixed = mac_fixed; 923 } 924 925 if (mac_addr_changed) 926 link_state_changed = aggr_grp_update_ports_mac(grp); 927 928 if (update_mask & AGGR_MODIFY_LACP_MODE) 929 aggr_lacp_update_mode(grp, lacp_mode); 930 931 if (update_mask & AGGR_MODIFY_LACP_TIMER) 932 aggr_lacp_update_timer(grp, lacp_timer); 933 934 if (link_state_changed) 935 mac_link_update(grp->lg_mh, grp->lg_link_state); 936 937 if (mac_addr_changed) 938 mac_unicst_update(grp->lg_mh, grp->lg_addr); 939 940 return (0); 941 } 942 943 /* 944 * Update properties of an existing link aggregation group. 945 */ 946 int 947 aggr_grp_modify(datalink_id_t linkid, uint8_t update_mask, uint32_t policy, 948 boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, 949 aggr_lacp_timer_t lacp_timer) 950 { 951 aggr_grp_t *grp = NULL; 952 mac_perim_handle_t mph; 953 int err; 954 955 /* get group corresponding to linkid */ 956 rw_enter(&aggr_grp_lock, RW_READER); 957 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), 958 (mod_hash_val_t *)&grp) != 0) { 959 rw_exit(&aggr_grp_lock); 960 return (ENOENT); 961 } 962 AGGR_GRP_REFHOLD(grp); 963 964 /* 965 * Hold the perimeter so that the aggregation won't be destroyed. 966 */ 967 mac_perim_enter_by_mh(grp->lg_mh, &mph); 968 rw_exit(&aggr_grp_lock); 969 970 err = aggr_grp_modify_common(grp, update_mask, policy, mac_fixed, 971 mac_addr, lacp_mode, lacp_timer); 972 973 mac_perim_exit(mph); 974 AGGR_GRP_REFRELE(grp); 975 return (err); 976 } 977 978 /* 979 * Create a new link aggregation group upon request from administrator. 980 * Returns 0 on success, an errno on failure. 981 */ 982 int 983 aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports, 984 laioc_port_t *ports, uint32_t policy, boolean_t mac_fixed, boolean_t force, 985 uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer) 986 { 987 aggr_grp_t *grp = NULL; 988 aggr_port_t *port; 989 mac_register_t *mac; 990 boolean_t link_state_changed; 991 mac_perim_handle_t mph; 992 int err; 993 int i; 994 995 /* need at least one port */ 996 if (nports == 0) 997 return (EINVAL); 998 999 rw_enter(&aggr_grp_lock, RW_WRITER); 1000 1001 /* does a group with the same linkid already exist? */ 1002 err = mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), 1003 (mod_hash_val_t *)&grp); 1004 if (err == 0) { 1005 rw_exit(&aggr_grp_lock); 1006 return (EEXIST); 1007 } 1008 1009 grp = kmem_cache_alloc(aggr_grp_cache, KM_SLEEP); 1010 1011 grp->lg_refs = 1; 1012 grp->lg_closing = B_FALSE; 1013 grp->lg_force = force; 1014 grp->lg_linkid = linkid; 1015 grp->lg_ifspeed = 0; 1016 grp->lg_link_state = LINK_STATE_UNKNOWN; 1017 grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN; 1018 grp->lg_started = B_FALSE; 1019 grp->lg_promisc = B_FALSE; 1020 grp->lg_lacp_done = B_FALSE; 1021 grp->lg_lacp_head = grp->lg_lacp_tail = NULL; 1022 grp->lg_lacp_rx_thread = thread_create(NULL, 0, 1023 aggr_lacp_rx_thread, grp, 0, &p0, TS_RUN, minclsyspri); 1024 bzero(&grp->lg_rx_group, sizeof (aggr_pseudo_rx_group_t)); 1025 aggr_lacp_init_grp(grp); 1026 1027 /* add MAC ports to group */ 1028 grp->lg_ports = NULL; 1029 grp->lg_nports = 0; 1030 grp->lg_nattached_ports = 0; 1031 grp->lg_ntx_ports = 0; 1032 1033 /* 1034 * If key is not specified by the user, allocate the key. 1035 */ 1036 if ((key == 0) && ((key = (uint32_t)id_alloc(key_ids)) == 0)) { 1037 err = ENOMEM; 1038 goto bail; 1039 } 1040 grp->lg_key = key; 1041 1042 for (i = 0; i < nports; i++) { 1043 err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, NULL); 1044 if (err != 0) 1045 goto bail; 1046 } 1047 1048 /* 1049 * If no explicit MAC address was specified by the administrator, 1050 * set it to the MAC address of the first port. 1051 */ 1052 grp->lg_addr_fixed = mac_fixed; 1053 if (grp->lg_addr_fixed) { 1054 /* validate specified address */ 1055 if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) { 1056 err = EINVAL; 1057 goto bail; 1058 } 1059 bcopy(mac_addr, grp->lg_addr, ETHERADDRL); 1060 } else { 1061 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL); 1062 grp->lg_mac_addr_port = grp->lg_ports; 1063 } 1064 1065 /* set the initial group capabilities */ 1066 aggr_grp_capab_set(grp); 1067 1068 if ((mac = mac_alloc(MAC_VERSION)) == NULL) { 1069 err = ENOMEM; 1070 goto bail; 1071 } 1072 mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 1073 mac->m_driver = grp; 1074 mac->m_dip = aggr_dip; 1075 mac->m_instance = grp->lg_key > AGGR_MAX_KEY ? (uint_t)-1 : grp->lg_key; 1076 mac->m_src_addr = grp->lg_addr; 1077 mac->m_callbacks = &aggr_m_callbacks; 1078 mac->m_min_sdu = 0; 1079 mac->m_max_sdu = grp->lg_max_sdu = aggr_grp_max_sdu(grp); 1080 mac->m_margin = aggr_grp_max_margin(grp); 1081 mac->m_v12n = MAC_VIRT_LEVEL1; 1082 err = mac_register(mac, &grp->lg_mh); 1083 mac_free(mac); 1084 if (err != 0) 1085 goto bail; 1086 1087 if ((err = dls_devnet_create(grp->lg_mh, grp->lg_linkid)) != 0) { 1088 (void) mac_unregister(grp->lg_mh); 1089 grp->lg_mh = NULL; 1090 goto bail; 1091 } 1092 1093 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1094 1095 /* 1096 * Update the MAC address of the constituent ports. 1097 * None of the port is attached at this time, the link state of the 1098 * aggregation will not change. 1099 */ 1100 link_state_changed = aggr_grp_update_ports_mac(grp); 1101 ASSERT(!link_state_changed); 1102 1103 /* update outbound load balancing policy */ 1104 aggr_send_update_policy(grp, policy); 1105 1106 /* set LACP mode */ 1107 aggr_lacp_set_mode(grp, lacp_mode, lacp_timer); 1108 1109 /* 1110 * Attach each port if necessary. 1111 */ 1112 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1113 /* 1114 * Create the pseudo ring for each HW ring of the underlying 1115 * port. Note that this is done after the aggr registers the 1116 * mac. 1117 */ 1118 VERIFY(aggr_add_pseudo_rx_group(port, &grp->lg_rx_group) == 0); 1119 if (aggr_port_notify_link(grp, port)) 1120 link_state_changed = B_TRUE; 1121 1122 /* 1123 * Initialize the callback functions for this port. 1124 */ 1125 aggr_port_init_callbacks(port); 1126 } 1127 1128 if (link_state_changed) 1129 mac_link_update(grp->lg_mh, grp->lg_link_state); 1130 1131 /* add new group to hash table */ 1132 err = mod_hash_insert(aggr_grp_hash, GRP_HASH_KEY(linkid), 1133 (mod_hash_val_t)grp); 1134 ASSERT(err == 0); 1135 aggr_grp_cnt++; 1136 1137 mac_perim_exit(mph); 1138 rw_exit(&aggr_grp_lock); 1139 return (0); 1140 1141 bail: 1142 1143 grp->lg_closing = B_TRUE; 1144 1145 port = grp->lg_ports; 1146 while (port != NULL) { 1147 aggr_port_t *cport; 1148 1149 cport = port->lp_next; 1150 aggr_port_delete(port); 1151 port = cport; 1152 } 1153 1154 /* 1155 * Inform the lacp_rx thread to exit. 1156 */ 1157 mutex_enter(&grp->lg_lacp_lock); 1158 grp->lg_lacp_done = B_TRUE; 1159 cv_signal(&grp->lg_lacp_cv); 1160 while (grp->lg_lacp_rx_thread != NULL) 1161 cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock); 1162 mutex_exit(&grp->lg_lacp_lock); 1163 1164 rw_exit(&aggr_grp_lock); 1165 AGGR_GRP_REFRELE(grp); 1166 return (err); 1167 } 1168 1169 /* 1170 * Return a pointer to the member of a group with specified linkid. 1171 */ 1172 static aggr_port_t * 1173 aggr_grp_port_lookup(aggr_grp_t *grp, datalink_id_t linkid) 1174 { 1175 aggr_port_t *port; 1176 1177 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 1178 1179 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1180 if (port->lp_linkid == linkid) 1181 break; 1182 } 1183 1184 return (port); 1185 } 1186 1187 /* 1188 * Stop, detach and remove a port from a link aggregation group. 1189 */ 1190 static int 1191 aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port, 1192 boolean_t *mac_addr_changedp, boolean_t *link_state_changedp) 1193 { 1194 int rc = 0; 1195 aggr_port_t **pport; 1196 boolean_t mac_addr_changed = B_FALSE; 1197 boolean_t link_state_changed = B_FALSE; 1198 mac_perim_handle_t mph; 1199 uint64_t val; 1200 uint_t i; 1201 uint_t stat; 1202 1203 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 1204 ASSERT(grp->lg_nports > 1); 1205 ASSERT(!grp->lg_closing); 1206 1207 /* unlink port */ 1208 for (pport = &grp->lg_ports; *pport != port; 1209 pport = &(*pport)->lp_next) { 1210 if (*pport == NULL) { 1211 rc = ENOENT; 1212 goto done; 1213 } 1214 } 1215 *pport = port->lp_next; 1216 1217 mac_perim_enter_by_mh(port->lp_mh, &mph); 1218 1219 /* 1220 * If the MAC address of the port being removed was assigned 1221 * to the group, update the group MAC address 1222 * using the MAC address of a different port. 1223 */ 1224 if (!grp->lg_addr_fixed && grp->lg_mac_addr_port == port) { 1225 /* 1226 * Set the MAC address of the group to the 1227 * MAC address of its first port. 1228 */ 1229 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL); 1230 grp->lg_mac_addr_port = grp->lg_ports; 1231 mac_addr_changed = B_TRUE; 1232 } 1233 1234 link_state_changed = aggr_grp_detach_port(grp, port); 1235 1236 /* 1237 * Add the counter statistics of the ports while it was aggregated 1238 * to the group's residual statistics. This is done by obtaining 1239 * the current counter from the underlying MAC then subtracting the 1240 * value of the counter at the moment it was added to the 1241 * aggregation. 1242 */ 1243 for (i = 0; i < MAC_NSTAT; i++) { 1244 stat = i + MAC_STAT_MIN; 1245 if (!MAC_STAT_ISACOUNTER(stat)) 1246 continue; 1247 val = aggr_port_stat(port, stat); 1248 val -= port->lp_stat[i]; 1249 grp->lg_stat[i] += val; 1250 } 1251 for (i = 0; i < ETHER_NSTAT; i++) { 1252 stat = i + MACTYPE_STAT_MIN; 1253 if (!ETHER_STAT_ISACOUNTER(stat)) 1254 continue; 1255 val = aggr_port_stat(port, stat); 1256 val -= port->lp_ether_stat[i]; 1257 grp->lg_ether_stat[i] += val; 1258 } 1259 1260 grp->lg_nports--; 1261 mac_perim_exit(mph); 1262 1263 aggr_port_delete(port); 1264 1265 /* 1266 * If the group MAC address has changed, update the MAC address of 1267 * the remaining constituent ports according to the new MAC 1268 * address of the group. 1269 */ 1270 if (mac_addr_changed && aggr_grp_update_ports_mac(grp)) 1271 link_state_changed = B_TRUE; 1272 1273 done: 1274 if (mac_addr_changedp != NULL) 1275 *mac_addr_changedp = mac_addr_changed; 1276 if (link_state_changedp != NULL) 1277 *link_state_changedp = link_state_changed; 1278 1279 return (rc); 1280 } 1281 1282 /* 1283 * Remove one or more ports from an existing link aggregation group. 1284 */ 1285 int 1286 aggr_grp_rem_ports(datalink_id_t linkid, uint_t nports, laioc_port_t *ports) 1287 { 1288 int rc = 0, i; 1289 aggr_grp_t *grp = NULL; 1290 aggr_port_t *port; 1291 boolean_t mac_addr_update = B_FALSE, mac_addr_changed; 1292 boolean_t link_state_update = B_FALSE, link_state_changed; 1293 mac_perim_handle_t mph, pmph; 1294 1295 /* get group corresponding to linkid */ 1296 rw_enter(&aggr_grp_lock, RW_READER); 1297 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), 1298 (mod_hash_val_t *)&grp) != 0) { 1299 rw_exit(&aggr_grp_lock); 1300 return (ENOENT); 1301 } 1302 AGGR_GRP_REFHOLD(grp); 1303 1304 /* 1305 * Hold the perimeter so that the aggregation won't be destroyed. 1306 */ 1307 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1308 rw_exit(&aggr_grp_lock); 1309 1310 /* we need to keep at least one port per group */ 1311 if (nports >= grp->lg_nports) { 1312 rc = EINVAL; 1313 goto bail; 1314 } 1315 1316 /* first verify that all the groups are valid */ 1317 for (i = 0; i < nports; i++) { 1318 if (aggr_grp_port_lookup(grp, ports[i].lp_linkid) == NULL) { 1319 /* port not found */ 1320 rc = ENOENT; 1321 goto bail; 1322 } 1323 } 1324 1325 /* clear the promiscous mode for the specified ports */ 1326 for (i = 0; i < nports && rc == 0; i++) { 1327 /* lookup port */ 1328 port = aggr_grp_port_lookup(grp, ports[i].lp_linkid); 1329 ASSERT(port != NULL); 1330 1331 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1332 rc = aggr_port_promisc(port, B_FALSE); 1333 mac_perim_exit(pmph); 1334 } 1335 if (rc != 0) { 1336 for (i = 0; i < nports; i++) { 1337 port = aggr_grp_port_lookup(grp, 1338 ports[i].lp_linkid); 1339 ASSERT(port != NULL); 1340 1341 /* 1342 * Turn the promiscuous mode back on if it is required 1343 * to receive the non-primary address over a port, or 1344 * the promiscous mode is enabled over the aggr. 1345 */ 1346 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1347 if (port->lp_started && (grp->lg_promisc || 1348 port->lp_prom_addr != NULL)) { 1349 (void) aggr_port_promisc(port, B_TRUE); 1350 } 1351 mac_perim_exit(pmph); 1352 } 1353 goto bail; 1354 } 1355 1356 /* remove the specified ports from group */ 1357 for (i = 0; i < nports; i++) { 1358 /* lookup port */ 1359 port = aggr_grp_port_lookup(grp, ports[i].lp_linkid); 1360 ASSERT(port != NULL); 1361 1362 /* stop port if group has already been started */ 1363 if (grp->lg_started) { 1364 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1365 aggr_port_stop(port); 1366 mac_perim_exit(pmph); 1367 } 1368 1369 aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group); 1370 /* remove port from group */ 1371 rc = aggr_grp_rem_port(grp, port, &mac_addr_changed, 1372 &link_state_changed); 1373 ASSERT(rc == 0); 1374 mac_addr_update = mac_addr_update || mac_addr_changed; 1375 link_state_update = link_state_update || link_state_changed; 1376 } 1377 1378 bail: 1379 if (mac_addr_update) 1380 mac_unicst_update(grp->lg_mh, grp->lg_addr); 1381 if (link_state_update) 1382 mac_link_update(grp->lg_mh, grp->lg_link_state); 1383 if (rc == 0) 1384 mac_resource_update(grp->lg_mh); 1385 1386 mac_perim_exit(mph); 1387 AGGR_GRP_REFRELE(grp); 1388 1389 return (rc); 1390 } 1391 1392 int 1393 aggr_grp_delete(datalink_id_t linkid) 1394 { 1395 aggr_grp_t *grp = NULL; 1396 aggr_port_t *port, *cport; 1397 datalink_id_t tmpid; 1398 mod_hash_val_t val; 1399 mac_perim_handle_t mph, pmph; 1400 int err; 1401 1402 rw_enter(&aggr_grp_lock, RW_WRITER); 1403 1404 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), 1405 (mod_hash_val_t *)&grp) != 0) { 1406 rw_exit(&aggr_grp_lock); 1407 return (ENOENT); 1408 } 1409 1410 /* 1411 * Note that dls_devnet_destroy() must be called before lg_lock is 1412 * held. Otherwise, it will deadlock if another thread is in 1413 * aggr_m_stat() and thus has a kstat_hold() on the kstats that 1414 * dls_devnet_destroy() needs to delete. 1415 */ 1416 if ((err = dls_devnet_destroy(grp->lg_mh, &tmpid, B_TRUE)) != 0) { 1417 rw_exit(&aggr_grp_lock); 1418 return (err); 1419 } 1420 ASSERT(linkid == tmpid); 1421 1422 /* 1423 * Unregister from the MAC service module. Since this can 1424 * fail if a client hasn't closed the MAC port, we gracefully 1425 * fail the operation. 1426 */ 1427 if ((err = mac_disable(grp->lg_mh)) != 0) { 1428 (void) dls_devnet_create(grp->lg_mh, linkid); 1429 rw_exit(&aggr_grp_lock); 1430 return (err); 1431 } 1432 (void) mod_hash_remove(aggr_grp_hash, GRP_HASH_KEY(linkid), &val); 1433 ASSERT(grp == (aggr_grp_t *)val); 1434 1435 ASSERT(aggr_grp_cnt > 0); 1436 aggr_grp_cnt--; 1437 rw_exit(&aggr_grp_lock); 1438 1439 /* 1440 * Inform the lacp_rx thread to exit. 1441 */ 1442 mutex_enter(&grp->lg_lacp_lock); 1443 grp->lg_lacp_done = B_TRUE; 1444 cv_signal(&grp->lg_lacp_cv); 1445 while (grp->lg_lacp_rx_thread != NULL) 1446 cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock); 1447 mutex_exit(&grp->lg_lacp_lock); 1448 1449 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1450 1451 grp->lg_closing = B_TRUE; 1452 /* detach and free MAC ports associated with group */ 1453 port = grp->lg_ports; 1454 while (port != NULL) { 1455 cport = port->lp_next; 1456 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1457 if (grp->lg_started) 1458 aggr_port_stop(port); 1459 (void) aggr_grp_detach_port(grp, port); 1460 mac_perim_exit(pmph); 1461 aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group); 1462 aggr_port_delete(port); 1463 port = cport; 1464 } 1465 1466 mac_perim_exit(mph); 1467 1468 /* 1469 * Wait for the port's lacp timer thread and its notification callback 1470 * to exit before calling mac_unregister() since both needs to access 1471 * the mac perimeter of the grp. 1472 */ 1473 aggr_grp_port_wait(grp); 1474 1475 VERIFY(mac_unregister(grp->lg_mh) == 0); 1476 grp->lg_mh = NULL; 1477 1478 AGGR_GRP_REFRELE(grp); 1479 return (0); 1480 } 1481 1482 void 1483 aggr_grp_free(aggr_grp_t *grp) 1484 { 1485 ASSERT(grp->lg_refs == 0); 1486 ASSERT(grp->lg_port_ref == 0); 1487 if (grp->lg_key > AGGR_MAX_KEY) { 1488 id_free(key_ids, grp->lg_key); 1489 grp->lg_key = 0; 1490 } 1491 kmem_cache_free(aggr_grp_cache, grp); 1492 } 1493 1494 int 1495 aggr_grp_info(datalink_id_t linkid, void *fn_arg, 1496 aggr_grp_info_new_grp_fn_t new_grp_fn, 1497 aggr_grp_info_new_port_fn_t new_port_fn) 1498 { 1499 aggr_grp_t *grp; 1500 aggr_port_t *port; 1501 mac_perim_handle_t mph, pmph; 1502 int rc = 0; 1503 1504 rw_enter(&aggr_grp_lock, RW_READER); 1505 1506 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), 1507 (mod_hash_val_t *)&grp) != 0) { 1508 rw_exit(&aggr_grp_lock); 1509 return (ENOENT); 1510 } 1511 AGGR_GRP_REFHOLD(grp); 1512 1513 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1514 rw_exit(&aggr_grp_lock); 1515 1516 rc = new_grp_fn(fn_arg, grp->lg_linkid, 1517 (grp->lg_key > AGGR_MAX_KEY) ? 0 : grp->lg_key, grp->lg_addr, 1518 grp->lg_addr_fixed, grp->lg_force, grp->lg_tx_policy, 1519 grp->lg_nports, grp->lg_lacp_mode, grp->aggr.PeriodicTimer); 1520 1521 if (rc != 0) 1522 goto bail; 1523 1524 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1525 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1526 rc = new_port_fn(fn_arg, port->lp_linkid, port->lp_addr, 1527 port->lp_state, &port->lp_lacp.ActorOperPortState); 1528 mac_perim_exit(pmph); 1529 1530 if (rc != 0) 1531 goto bail; 1532 } 1533 1534 bail: 1535 mac_perim_exit(mph); 1536 AGGR_GRP_REFRELE(grp); 1537 return (rc); 1538 } 1539 1540 /*ARGSUSED*/ 1541 static void 1542 aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp) 1543 { 1544 miocnak(q, mp, 0, ENOTSUP); 1545 } 1546 1547 static int 1548 aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val) 1549 { 1550 aggr_port_t *port; 1551 uint_t stat_index; 1552 1553 /* We only aggregate counter statistics. */ 1554 if (IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat) || 1555 IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat)) { 1556 return (ENOTSUP); 1557 } 1558 1559 /* 1560 * Counter statistics for a group are computed by aggregating the 1561 * counters of the members MACs while they were aggregated, plus 1562 * the residual counter of the group itself, which is updated each 1563 * time a MAC is removed from the group. 1564 */ 1565 *val = 0; 1566 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1567 /* actual port statistic */ 1568 *val += aggr_port_stat(port, stat); 1569 /* 1570 * minus the port stat when it was added, plus any residual 1571 * amount for the group. 1572 */ 1573 if (IS_MAC_STAT(stat)) { 1574 stat_index = stat - MAC_STAT_MIN; 1575 *val -= port->lp_stat[stat_index]; 1576 *val += grp->lg_stat[stat_index]; 1577 } else if (IS_MACTYPE_STAT(stat)) { 1578 stat_index = stat - MACTYPE_STAT_MIN; 1579 *val -= port->lp_ether_stat[stat_index]; 1580 *val += grp->lg_ether_stat[stat_index]; 1581 } 1582 } 1583 return (0); 1584 } 1585 1586 static int 1587 aggr_m_stat(void *arg, uint_t stat, uint64_t *val) 1588 { 1589 aggr_grp_t *grp = arg; 1590 mac_perim_handle_t mph; 1591 int rval = 0; 1592 1593 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1594 1595 switch (stat) { 1596 case MAC_STAT_IFSPEED: 1597 *val = grp->lg_ifspeed; 1598 break; 1599 1600 case ETHER_STAT_LINK_DUPLEX: 1601 *val = grp->lg_link_duplex; 1602 break; 1603 1604 default: 1605 /* 1606 * For all other statistics, we return the aggregated stat 1607 * from the underlying ports. aggr_grp_stat() will set 1608 * rval appropriately if the statistic isn't a counter. 1609 */ 1610 rval = aggr_grp_stat(grp, stat, val); 1611 } 1612 1613 mac_perim_exit(mph); 1614 return (rval); 1615 } 1616 1617 static int 1618 aggr_m_start(void *arg) 1619 { 1620 aggr_grp_t *grp = arg; 1621 aggr_port_t *port; 1622 mac_perim_handle_t mph, pmph; 1623 1624 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1625 1626 /* 1627 * Attempts to start all configured members of the group. 1628 * Group members will be attached when their link-up notification 1629 * is received. 1630 */ 1631 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1632 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1633 if (aggr_port_start(port) != 0) { 1634 mac_perim_exit(pmph); 1635 continue; 1636 } 1637 1638 /* 1639 * Turn on the promiscuous mode if it is required to receive 1640 * the non-primary address over a port, or the promiscous 1641 * mode is enabled over the aggr. 1642 */ 1643 if (grp->lg_promisc || port->lp_prom_addr != NULL) { 1644 if (aggr_port_promisc(port, B_TRUE) != 0) 1645 aggr_port_stop(port); 1646 } 1647 mac_perim_exit(pmph); 1648 } 1649 1650 grp->lg_started = B_TRUE; 1651 1652 mac_perim_exit(mph); 1653 return (0); 1654 } 1655 1656 static void 1657 aggr_m_stop(void *arg) 1658 { 1659 aggr_grp_t *grp = arg; 1660 aggr_port_t *port; 1661 mac_perim_handle_t mph, pmph; 1662 1663 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1664 1665 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1666 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1667 1668 /* reset port promiscuous mode */ 1669 (void) aggr_port_promisc(port, B_FALSE); 1670 1671 aggr_port_stop(port); 1672 mac_perim_exit(pmph); 1673 } 1674 1675 grp->lg_started = B_FALSE; 1676 mac_perim_exit(mph); 1677 } 1678 1679 static int 1680 aggr_m_promisc(void *arg, boolean_t on) 1681 { 1682 aggr_grp_t *grp = arg; 1683 aggr_port_t *port; 1684 boolean_t link_state_changed = B_FALSE; 1685 mac_perim_handle_t mph, pmph; 1686 1687 AGGR_GRP_REFHOLD(grp); 1688 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1689 1690 ASSERT(!grp->lg_closing); 1691 1692 if (on == grp->lg_promisc) 1693 goto bail; 1694 1695 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1696 int err = 0; 1697 1698 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1699 AGGR_PORT_REFHOLD(port); 1700 if (!on && (port->lp_prom_addr == NULL)) 1701 err = aggr_port_promisc(port, B_FALSE); 1702 else if (on && port->lp_started) 1703 err = aggr_port_promisc(port, B_TRUE); 1704 1705 if (err != 0) { 1706 if (aggr_grp_detach_port(grp, port)) 1707 link_state_changed = B_TRUE; 1708 } else { 1709 /* 1710 * If a port was detached because of a previous 1711 * failure changing the promiscuity, the port 1712 * is reattached when it successfully changes 1713 * the promiscuity now, and this might cause 1714 * the link state of the aggregation to change. 1715 */ 1716 if (aggr_grp_attach_port(grp, port)) 1717 link_state_changed = B_TRUE; 1718 } 1719 mac_perim_exit(pmph); 1720 AGGR_PORT_REFRELE(port); 1721 } 1722 1723 grp->lg_promisc = on; 1724 1725 if (link_state_changed) 1726 mac_link_update(grp->lg_mh, grp->lg_link_state); 1727 1728 bail: 1729 mac_perim_exit(mph); 1730 AGGR_GRP_REFRELE(grp); 1731 1732 return (0); 1733 } 1734 1735 static void 1736 aggr_grp_port_rename(const char *new_name, void *arg) 1737 { 1738 /* 1739 * aggr port's mac client name is the format of "aggr link name" plus 1740 * AGGR_PORT_NAME_DELIMIT plus "underneath link name". 1741 */ 1742 int aggr_len, link_len, clnt_name_len, i; 1743 char *str_end, *str_st, *str_del; 1744 char aggr_name[MAXNAMELEN]; 1745 char link_name[MAXNAMELEN]; 1746 char *clnt_name; 1747 aggr_grp_t *aggr_grp = arg; 1748 aggr_port_t *aggr_port = aggr_grp->lg_ports; 1749 1750 for (i = 0; i < aggr_grp->lg_nports; i++) { 1751 clnt_name = mac_client_name(aggr_port->lp_mch); 1752 clnt_name_len = strlen(clnt_name); 1753 str_st = clnt_name; 1754 str_end = &(clnt_name[clnt_name_len]); 1755 str_del = strchr(str_st, AGGR_PORT_NAME_DELIMIT); 1756 ASSERT(str_del != NULL); 1757 aggr_len = (intptr_t)((uintptr_t)str_del - (uintptr_t)str_st); 1758 link_len = (intptr_t)((uintptr_t)str_end - (uintptr_t)str_del); 1759 bzero(aggr_name, MAXNAMELEN); 1760 bzero(link_name, MAXNAMELEN); 1761 bcopy(clnt_name, aggr_name, aggr_len); 1762 bcopy(str_del, link_name, link_len + 1); 1763 bzero(clnt_name, MAXNAMELEN); 1764 (void) snprintf(clnt_name, MAXNAMELEN, "%s%s", new_name, 1765 link_name); 1766 1767 (void) mac_rename_primary(aggr_port->lp_mh, NULL); 1768 aggr_port = aggr_port->lp_next; 1769 } 1770 } 1771 1772 /* 1773 * Initialize the capabilities that are advertised for the group 1774 * according to the capabilities of the constituent ports. 1775 */ 1776 static boolean_t 1777 aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data) 1778 { 1779 aggr_grp_t *grp = arg; 1780 1781 switch (cap) { 1782 case MAC_CAPAB_HCKSUM: { 1783 uint32_t *hcksum_txflags = cap_data; 1784 *hcksum_txflags = grp->lg_hcksum_txflags; 1785 break; 1786 } 1787 case MAC_CAPAB_LSO: { 1788 mac_capab_lso_t *cap_lso = cap_data; 1789 1790 if (grp->lg_lso) { 1791 *cap_lso = grp->lg_cap_lso; 1792 break; 1793 } else { 1794 return (B_FALSE); 1795 } 1796 } 1797 case MAC_CAPAB_NO_NATIVEVLAN: 1798 return (!grp->lg_vlan); 1799 case MAC_CAPAB_NO_ZCOPY: 1800 return (!grp->lg_zcopy); 1801 case MAC_CAPAB_RINGS: { 1802 mac_capab_rings_t *cap_rings = cap_data; 1803 1804 if (cap_rings->mr_type == MAC_RING_TYPE_RX) { 1805 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 1806 cap_rings->mr_rnum = grp->lg_rx_group.arg_ring_cnt; 1807 cap_rings->mr_rget = aggr_fill_ring; 1808 1809 /* 1810 * An aggregation advertises only one (pseudo) RX 1811 * group, which virtualizes the main/primary group of 1812 * the underlying devices. 1813 */ 1814 cap_rings->mr_gnum = 1; 1815 cap_rings->mr_gget = aggr_fill_group; 1816 cap_rings->mr_gaddring = NULL; 1817 cap_rings->mr_gremring = NULL; 1818 } else { 1819 return (B_FALSE); 1820 } 1821 break; 1822 } 1823 case MAC_CAPAB_AGGR: 1824 { 1825 mac_capab_aggr_t *aggr_cap; 1826 1827 if (cap_data != NULL) { 1828 aggr_cap = cap_data; 1829 aggr_cap->mca_rename_fn = aggr_grp_port_rename; 1830 aggr_cap->mca_unicst = aggr_m_unicst; 1831 } 1832 return (B_TRUE); 1833 } 1834 default: 1835 return (B_FALSE); 1836 } 1837 return (B_TRUE); 1838 } 1839 1840 /* 1841 * Callback funtion for MAC layer to register groups. 1842 */ 1843 static void 1844 aggr_fill_group(void *arg, mac_ring_type_t rtype, const int index, 1845 mac_group_info_t *infop, mac_group_handle_t gh) 1846 { 1847 aggr_grp_t *grp = arg; 1848 aggr_pseudo_rx_group_t *rx_group; 1849 1850 ASSERT(rtype == MAC_RING_TYPE_RX && index == 0); 1851 rx_group = &grp->lg_rx_group; 1852 rx_group->arg_gh = gh; 1853 rx_group->arg_grp = grp; 1854 1855 infop->mgi_driver = (mac_group_driver_t)rx_group; 1856 infop->mgi_start = NULL; 1857 infop->mgi_stop = NULL; 1858 infop->mgi_addmac = aggr_addmac; 1859 infop->mgi_remmac = aggr_remmac; 1860 infop->mgi_count = rx_group->arg_ring_cnt; 1861 } 1862 1863 /* 1864 * Callback funtion for MAC layer to register all rings. 1865 */ 1866 static void 1867 aggr_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, 1868 const int index, mac_ring_info_t *infop, mac_ring_handle_t rh) 1869 { 1870 aggr_grp_t *grp = arg; 1871 1872 switch (rtype) { 1873 case MAC_RING_TYPE_RX: { 1874 aggr_pseudo_rx_group_t *rx_group = &grp->lg_rx_group; 1875 aggr_pseudo_rx_ring_t *rx_ring; 1876 mac_intr_t aggr_mac_intr; 1877 1878 ASSERT(rg_index == 0); 1879 1880 ASSERT((index >= 0) && (index < rx_group->arg_ring_cnt)); 1881 rx_ring = rx_group->arg_rings + index; 1882 rx_ring->arr_rh = rh; 1883 1884 /* 1885 * Entrypoint to enable interrupt (disable poll) and 1886 * disable interrupt (enable poll). 1887 */ 1888 aggr_mac_intr.mi_handle = (mac_intr_handle_t)rx_ring; 1889 aggr_mac_intr.mi_enable = aggr_pseudo_enable_intr; 1890 aggr_mac_intr.mi_disable = aggr_pseudo_disable_intr; 1891 1892 infop->mri_driver = (mac_ring_driver_t)rx_ring; 1893 infop->mri_start = aggr_pseudo_start_ring; 1894 infop->mri_stop = aggr_pseudo_stop_ring; 1895 1896 infop->mri_intr = aggr_mac_intr; 1897 infop->mri_poll = aggr_rx_poll; 1898 break; 1899 } 1900 default: 1901 break; 1902 } 1903 } 1904 1905 static mblk_t * 1906 aggr_rx_poll(void *arg, int bytes_to_pickup) 1907 { 1908 aggr_pseudo_rx_ring_t *rr_ring = arg; 1909 aggr_port_t *port = rr_ring->arr_port; 1910 aggr_grp_t *grp = port->lp_grp; 1911 mblk_t *mp_chain, *mp, **mpp; 1912 1913 mp_chain = mac_hwring_poll(rr_ring->arr_hw_rh, bytes_to_pickup); 1914 1915 if (grp->lg_lacp_mode == AGGR_LACP_OFF) 1916 return (mp_chain); 1917 1918 mpp = &mp_chain; 1919 while ((mp = *mpp) != NULL) { 1920 if (MBLKL(mp) >= sizeof (struct ether_header)) { 1921 struct ether_header *ehp; 1922 1923 ehp = (struct ether_header *)mp->b_rptr; 1924 if (ntohs(ehp->ether_type) == ETHERTYPE_SLOW) { 1925 *mpp = mp->b_next; 1926 mp->b_next = NULL; 1927 aggr_recv_lacp(port, 1928 (mac_resource_handle_t)rr_ring, mp); 1929 continue; 1930 } 1931 } 1932 1933 if (!port->lp_collector_enabled) { 1934 *mpp = mp->b_next; 1935 mp->b_next = NULL; 1936 freemsg(mp); 1937 continue; 1938 } 1939 mpp = &mp->b_next; 1940 } 1941 return (mp_chain); 1942 } 1943 1944 static int 1945 aggr_addmac(void *arg, const uint8_t *mac_addr) 1946 { 1947 aggr_pseudo_rx_group_t *rx_group = (aggr_pseudo_rx_group_t *)arg; 1948 aggr_unicst_addr_t *addr, **pprev; 1949 aggr_grp_t *grp = rx_group->arg_grp; 1950 aggr_port_t *port, *p; 1951 mac_perim_handle_t mph; 1952 int err = 0; 1953 1954 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1955 1956 if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) { 1957 mac_perim_exit(mph); 1958 return (0); 1959 } 1960 1961 /* 1962 * Insert this mac address into the list of mac addresses owned by 1963 * the aggregation pseudo group. 1964 */ 1965 pprev = &rx_group->arg_macaddr; 1966 while ((addr = *pprev) != NULL) { 1967 if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) == 0) { 1968 mac_perim_exit(mph); 1969 return (EEXIST); 1970 } 1971 pprev = &addr->aua_next; 1972 } 1973 addr = kmem_alloc(sizeof (aggr_unicst_addr_t), KM_SLEEP); 1974 bcopy(mac_addr, addr->aua_addr, ETHERADDRL); 1975 addr->aua_next = NULL; 1976 *pprev = addr; 1977 1978 for (port = grp->lg_ports; port != NULL; port = port->lp_next) 1979 if ((err = aggr_port_addmac(port, mac_addr)) != 0) 1980 break; 1981 1982 if (err != 0) { 1983 for (p = grp->lg_ports; p != port; p = p->lp_next) 1984 aggr_port_remmac(p, mac_addr); 1985 1986 *pprev = NULL; 1987 kmem_free(addr, sizeof (aggr_unicst_addr_t)); 1988 } 1989 1990 mac_perim_exit(mph); 1991 return (err); 1992 } 1993 1994 static int 1995 aggr_remmac(void *arg, const uint8_t *mac_addr) 1996 { 1997 aggr_pseudo_rx_group_t *rx_group = (aggr_pseudo_rx_group_t *)arg; 1998 aggr_unicst_addr_t *addr, **pprev; 1999 aggr_grp_t *grp = rx_group->arg_grp; 2000 aggr_port_t *port; 2001 mac_perim_handle_t mph; 2002 int err = 0; 2003 2004 mac_perim_enter_by_mh(grp->lg_mh, &mph); 2005 2006 if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) { 2007 mac_perim_exit(mph); 2008 return (0); 2009 } 2010 2011 /* 2012 * Insert this mac address into the list of mac addresses owned by 2013 * the aggregation pseudo group. 2014 */ 2015 pprev = &rx_group->arg_macaddr; 2016 while ((addr = *pprev) != NULL) { 2017 if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) != 0) { 2018 pprev = &addr->aua_next; 2019 continue; 2020 } 2021 break; 2022 } 2023 if (addr == NULL) { 2024 mac_perim_exit(mph); 2025 return (EINVAL); 2026 } 2027 2028 for (port = grp->lg_ports; port != NULL; port = port->lp_next) 2029 aggr_port_remmac(port, mac_addr); 2030 2031 *pprev = addr->aua_next; 2032 kmem_free(addr, sizeof (aggr_unicst_addr_t)); 2033 2034 mac_perim_exit(mph); 2035 return (err); 2036 } 2037 2038 /* 2039 * Add or remove the multicast addresses that are defined for the group 2040 * to or from the specified port. 2041 * 2042 * Note that aggr_grp_multicst_port(..., B_TRUE) is called when the port 2043 * is started and attached, and aggr_grp_multicst_port(..., B_FALSE) is 2044 * called when the port is either stopped or detached. 2045 */ 2046 void 2047 aggr_grp_multicst_port(aggr_port_t *port, boolean_t add) 2048 { 2049 aggr_grp_t *grp = port->lp_grp; 2050 2051 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 2052 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 2053 2054 if (!port->lp_started || port->lp_state != AGGR_PORT_STATE_ATTACHED) 2055 return; 2056 2057 mac_multicast_refresh(grp->lg_mh, aggr_port_multicst, port, add); 2058 } 2059 2060 static int 2061 aggr_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) 2062 { 2063 aggr_grp_t *grp = arg; 2064 aggr_port_t *port = NULL; 2065 mac_perim_handle_t mph; 2066 int err = 0, cerr; 2067 2068 mac_perim_enter_by_mh(grp->lg_mh, &mph); 2069 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 2070 if (port->lp_state != AGGR_PORT_STATE_ATTACHED || 2071 !port->lp_started) { 2072 continue; 2073 } 2074 cerr = aggr_port_multicst(port, add, addrp); 2075 if (cerr != 0 && err == 0) 2076 err = cerr; 2077 } 2078 mac_perim_exit(mph); 2079 return (err); 2080 } 2081 2082 static int 2083 aggr_m_unicst(void *arg, const uint8_t *macaddr) 2084 { 2085 aggr_grp_t *grp = arg; 2086 mac_perim_handle_t mph; 2087 int err; 2088 2089 mac_perim_enter_by_mh(grp->lg_mh, &mph); 2090 err = aggr_grp_modify_common(grp, AGGR_MODIFY_MAC, 0, B_TRUE, macaddr, 2091 0, 0); 2092 mac_perim_exit(mph); 2093 return (err); 2094 } 2095 2096 /* 2097 * Initialize the capabilities that are advertised for the group 2098 * according to the capabilities of the constituent ports. 2099 */ 2100 static void 2101 aggr_grp_capab_set(aggr_grp_t *grp) 2102 { 2103 uint32_t cksum; 2104 aggr_port_t *port; 2105 mac_capab_lso_t cap_lso; 2106 2107 ASSERT(grp->lg_mh == NULL); 2108 ASSERT(grp->lg_ports != NULL); 2109 2110 grp->lg_hcksum_txflags = (uint32_t)-1; 2111 grp->lg_zcopy = B_TRUE; 2112 grp->lg_vlan = B_TRUE; 2113 2114 grp->lg_lso = B_TRUE; 2115 grp->lg_cap_lso.lso_flags = (t_uscalar_t)-1; 2116 grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max = (t_uscalar_t)-1; 2117 2118 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 2119 if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &cksum)) 2120 cksum = 0; 2121 grp->lg_hcksum_txflags &= cksum; 2122 2123 grp->lg_vlan &= 2124 !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL); 2125 2126 grp->lg_zcopy &= 2127 !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL); 2128 2129 grp->lg_lso &= 2130 mac_capab_get(port->lp_mh, MAC_CAPAB_LSO, &cap_lso); 2131 if (grp->lg_lso) { 2132 grp->lg_cap_lso.lso_flags &= cap_lso.lso_flags; 2133 if (grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max > 2134 cap_lso.lso_basic_tcp_ipv4.lso_max) 2135 grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max = 2136 cap_lso.lso_basic_tcp_ipv4.lso_max; 2137 } 2138 } 2139 } 2140 2141 /* 2142 * Checks whether the capabilities of the port being added are compatible 2143 * with the current capabilities of the aggregation. 2144 */ 2145 static boolean_t 2146 aggr_grp_capab_check(aggr_grp_t *grp, aggr_port_t *port) 2147 { 2148 uint32_t hcksum_txflags; 2149 2150 ASSERT(grp->lg_ports != NULL); 2151 2152 if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL)) & 2153 grp->lg_vlan) != grp->lg_vlan) { 2154 return (B_FALSE); 2155 } 2156 2157 if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL)) & 2158 grp->lg_zcopy) != grp->lg_zcopy) { 2159 return (B_FALSE); 2160 } 2161 2162 if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &hcksum_txflags)) { 2163 if (grp->lg_hcksum_txflags != 0) 2164 return (B_FALSE); 2165 } else if ((hcksum_txflags & grp->lg_hcksum_txflags) != 2166 grp->lg_hcksum_txflags) { 2167 return (B_FALSE); 2168 } 2169 2170 if (grp->lg_lso) { 2171 mac_capab_lso_t cap_lso; 2172 2173 if (mac_capab_get(port->lp_mh, MAC_CAPAB_LSO, &cap_lso)) { 2174 if ((grp->lg_cap_lso.lso_flags & cap_lso.lso_flags) != 2175 grp->lg_cap_lso.lso_flags) 2176 return (B_FALSE); 2177 if (grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max > 2178 cap_lso.lso_basic_tcp_ipv4.lso_max) 2179 return (B_FALSE); 2180 } else { 2181 return (B_FALSE); 2182 } 2183 } 2184 2185 return (B_TRUE); 2186 } 2187 2188 /* 2189 * Returns the maximum SDU according to the SDU of the constituent ports. 2190 */ 2191 static uint_t 2192 aggr_grp_max_sdu(aggr_grp_t *grp) 2193 { 2194 uint_t max_sdu = (uint_t)-1; 2195 aggr_port_t *port; 2196 2197 ASSERT(grp->lg_ports != NULL); 2198 2199 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 2200 uint_t port_sdu_max; 2201 2202 mac_sdu_get(port->lp_mh, NULL, &port_sdu_max); 2203 if (max_sdu > port_sdu_max) 2204 max_sdu = port_sdu_max; 2205 } 2206 2207 return (max_sdu); 2208 } 2209 2210 /* 2211 * Checks if the maximum SDU of the specified port is compatible 2212 * with the maximum SDU of the specified aggregation group, returns 2213 * B_TRUE if it is, B_FALSE otherwise. 2214 */ 2215 static boolean_t 2216 aggr_grp_sdu_check(aggr_grp_t *grp, aggr_port_t *port) 2217 { 2218 uint_t port_sdu_max; 2219 2220 mac_sdu_get(port->lp_mh, NULL, &port_sdu_max); 2221 return (port_sdu_max >= grp->lg_max_sdu); 2222 } 2223 2224 /* 2225 * Returns the maximum margin according to the margin of the constituent ports. 2226 */ 2227 static uint32_t 2228 aggr_grp_max_margin(aggr_grp_t *grp) 2229 { 2230 uint32_t margin = UINT32_MAX; 2231 aggr_port_t *port; 2232 2233 ASSERT(grp->lg_mh == NULL); 2234 ASSERT(grp->lg_ports != NULL); 2235 2236 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 2237 if (margin > port->lp_margin) 2238 margin = port->lp_margin; 2239 } 2240 2241 grp->lg_margin = margin; 2242 return (margin); 2243 } 2244 2245 /* 2246 * Checks if the maximum margin of the specified port is compatible 2247 * with the maximum margin of the specified aggregation group, returns 2248 * B_TRUE if it is, B_FALSE otherwise. 2249 */ 2250 static boolean_t 2251 aggr_grp_margin_check(aggr_grp_t *grp, aggr_port_t *port) 2252 { 2253 if (port->lp_margin >= grp->lg_margin) 2254 return (B_TRUE); 2255 2256 /* 2257 * See whether the current margin value is allowed to be changed to 2258 * the new value. 2259 */ 2260 if (!mac_margin_update(grp->lg_mh, port->lp_margin)) 2261 return (B_FALSE); 2262 2263 grp->lg_margin = port->lp_margin; 2264 return (B_TRUE); 2265 } 2266 2267 /* 2268 * Set MTU on individual ports of an aggregation group 2269 */ 2270 static int 2271 aggr_set_port_sdu(aggr_grp_t *grp, aggr_port_t *port, uint32_t sdu, 2272 uint32_t *old_mtu) 2273 { 2274 boolean_t removed = B_FALSE; 2275 mac_perim_handle_t mph; 2276 mac_diag_t diag; 2277 int err, rv, retry = 0; 2278 2279 if (port->lp_mah != NULL) { 2280 (void) mac_unicast_remove(port->lp_mch, port->lp_mah); 2281 port->lp_mah = NULL; 2282 removed = B_TRUE; 2283 } 2284 err = mac_set_mtu(port->lp_mh, sdu, old_mtu); 2285 try_again: 2286 if (removed && (rv = mac_unicast_add(port->lp_mch, NULL, 2287 MAC_UNICAST_PRIMARY | MAC_UNICAST_DISABLE_TX_VID_CHECK, 2288 &port->lp_mah, 0, &diag)) != 0) { 2289 /* 2290 * following is a workaround for a bug in 'bge' driver. 2291 * See CR 6794654 for more information and this work around 2292 * will be removed once the CR is fixed. 2293 */ 2294 if (rv == EIO && retry++ < 3) { 2295 delay(2 * hz); 2296 goto try_again; 2297 } 2298 /* 2299 * if mac_unicast_add() failed while setting the MTU, 2300 * detach the port from the group. 2301 */ 2302 mac_perim_enter_by_mh(port->lp_mh, &mph); 2303 (void) aggr_grp_detach_port(grp, port); 2304 mac_perim_exit(mph); 2305 cmn_err(CE_WARN, "Unable to restart the port %s while " 2306 "setting MTU. Detaching the port from the aggregation.", 2307 mac_client_name(port->lp_mch)); 2308 } 2309 return (err); 2310 } 2311 2312 static int 2313 aggr_sdu_update(aggr_grp_t *grp, uint32_t sdu) 2314 { 2315 int err = 0, i, rv; 2316 aggr_port_t *port; 2317 uint32_t *mtu; 2318 2319 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 2320 2321 /* 2322 * If the MTU being set is equal to aggr group's maximum 2323 * allowable value, then there is nothing to change 2324 */ 2325 if (sdu == grp->lg_max_sdu) 2326 return (0); 2327 2328 /* 0 is aggr group's min sdu */ 2329 if (sdu == 0) 2330 return (EINVAL); 2331 2332 mtu = kmem_alloc(sizeof (uint32_t) * grp->lg_nports, KM_SLEEP); 2333 for (port = grp->lg_ports, i = 0; port != NULL && err == 0; 2334 port = port->lp_next, i++) { 2335 err = aggr_set_port_sdu(grp, port, sdu, mtu + i); 2336 } 2337 if (err != 0) { 2338 /* recover from error: reset the mtus of the ports */ 2339 aggr_port_t *tmp; 2340 2341 for (tmp = grp->lg_ports, i = 0; tmp != port; 2342 tmp = tmp->lp_next, i++) { 2343 (void) aggr_set_port_sdu(grp, tmp, *(mtu + i), NULL); 2344 } 2345 goto bail; 2346 } 2347 grp->lg_max_sdu = aggr_grp_max_sdu(grp); 2348 rv = mac_maxsdu_update(grp->lg_mh, grp->lg_max_sdu); 2349 ASSERT(rv == 0); 2350 bail: 2351 kmem_free(mtu, sizeof (uint32_t) * grp->lg_nports); 2352 return (err); 2353 } 2354 2355 /* 2356 * Callback functions for set/get of properties 2357 */ 2358 /*ARGSUSED*/ 2359 static int 2360 aggr_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, 2361 uint_t pr_valsize, const void *pr_val) 2362 { 2363 int err = ENOTSUP; 2364 aggr_grp_t *grp = m_driver; 2365 2366 switch (pr_num) { 2367 case MAC_PROP_MTU: { 2368 uint32_t mtu; 2369 2370 if (pr_valsize < sizeof (mtu)) { 2371 err = EINVAL; 2372 break; 2373 } 2374 bcopy(pr_val, &mtu, sizeof (mtu)); 2375 err = aggr_sdu_update(grp, mtu); 2376 break; 2377 } 2378 default: 2379 break; 2380 } 2381 return (err); 2382 } 2383 2384 int 2385 aggr_grp_possible_mtu_range(aggr_grp_t *grp, mac_propval_range_t *range) 2386 { 2387 mac_propval_range_t *vals; 2388 mac_propval_uint32_range_t *ur; 2389 aggr_port_t *port; 2390 mac_perim_handle_t mph; 2391 mac_prop_t macprop; 2392 uint_t perm, i; 2393 uint32_t min = 0, max = (uint32_t)-1; 2394 int err = 0; 2395 2396 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 2397 2398 vals = kmem_alloc(sizeof (mac_propval_range_t) * grp->lg_nports, 2399 KM_SLEEP); 2400 macprop.mp_id = MAC_PROP_MTU; 2401 macprop.mp_name = "mtu"; 2402 macprop.mp_flags = MAC_PROP_POSSIBLE; 2403 2404 for (port = grp->lg_ports, i = 0; port != NULL; 2405 port = port->lp_next, i++) { 2406 mac_perim_enter_by_mh(port->lp_mh, &mph); 2407 err = mac_get_prop(port->lp_mh, &macprop, vals + i, 2408 sizeof (mac_propval_range_t), &perm); 2409 mac_perim_exit(mph); 2410 if (err != 0) 2411 break; 2412 } 2413 /* 2414 * if any of the underlying ports does not support changing MTU then 2415 * just return ENOTSUP 2416 */ 2417 if (port != NULL) { 2418 ASSERT(err != 0); 2419 goto done; 2420 } 2421 range->mpr_count = 1; 2422 range->mpr_type = MAC_PROPVAL_UINT32; 2423 for (i = 0; i < grp->lg_nports; i++) { 2424 ur = &((vals + i)->range_uint32[0]); 2425 /* 2426 * Take max of the min, for range_min; that is the minimum 2427 * MTU value for an aggregation is the maximum of the 2428 * minimum values of all the underlying ports 2429 */ 2430 if (ur->mpur_min > min) 2431 min = ur->mpur_min; 2432 /* Take min of the max, for range_max */ 2433 if (ur->mpur_max < max) 2434 max = ur->mpur_max; 2435 } 2436 range->range_uint32[0].mpur_min = min; 2437 range->range_uint32[0].mpur_max = max; 2438 done: 2439 kmem_free(vals, sizeof (mac_propval_range_t) * grp->lg_nports); 2440 return (err); 2441 } 2442 2443 /*ARGSUSED*/ 2444 static int 2445 aggr_m_getprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, 2446 uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) 2447 { 2448 mac_propval_range_t range; 2449 int err = ENOTSUP; 2450 aggr_grp_t *grp = m_driver; 2451 2452 switch (pr_num) { 2453 case MAC_PROP_MTU: 2454 if (!(pr_flags & MAC_PROP_POSSIBLE)) 2455 return (ENOTSUP); 2456 if (pr_valsize < sizeof (mac_propval_range_t)) 2457 return (EINVAL); 2458 if ((err = aggr_grp_possible_mtu_range(grp, &range)) != 0) 2459 return (err); 2460 bcopy(&range, pr_val, sizeof (range)); 2461 return (0); 2462 } 2463 return (err); 2464 } 2465