1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups. 28 * 29 * An instance of the structure aggr_grp_t is allocated for each 30 * link aggregation group. When created, aggr_grp_t objects are 31 * entered into the aggr_grp_hash hash table maintained by the modhash 32 * module. The hash key is the linkid associated with the link 33 * aggregation group. 34 * 35 * A set of MAC ports are associated with each association group. 36 */ 37 38 #include <sys/types.h> 39 #include <sys/sysmacros.h> 40 #include <sys/conf.h> 41 #include <sys/cmn_err.h> 42 #include <sys/disp.h> 43 #include <sys/list.h> 44 #include <sys/ksynch.h> 45 #include <sys/kmem.h> 46 #include <sys/stream.h> 47 #include <sys/modctl.h> 48 #include <sys/ddi.h> 49 #include <sys/sunddi.h> 50 #include <sys/atomic.h> 51 #include <sys/stat.h> 52 #include <sys/modhash.h> 53 #include <sys/id_space.h> 54 #include <sys/strsun.h> 55 #include <sys/cred.h> 56 #include <sys/dlpi.h> 57 #include <sys/zone.h> 58 #include <sys/mac_provider.h> 59 #include <sys/dls.h> 60 #include <sys/vlan.h> 61 #include <sys/aggr.h> 62 #include <sys/aggr_impl.h> 63 64 static int aggr_m_start(void *); 65 static void aggr_m_stop(void *); 66 static int aggr_m_promisc(void *, boolean_t); 67 static int aggr_m_multicst(void *, boolean_t, const uint8_t *); 68 static int aggr_m_unicst(void *, const uint8_t *); 69 static int aggr_m_stat(void *, uint_t, uint64_t *); 70 static void aggr_m_ioctl(void *, queue_t *, mblk_t *); 71 static boolean_t aggr_m_capab_get(void *, mac_capab_t, void *); 72 static int aggr_m_setprop(void *, const char *, mac_prop_id_t, uint_t, 73 const void *); 74 static int aggr_m_getprop(void *, const char *, mac_prop_id_t, uint_t, 75 uint_t, void *, uint_t *); 76 77 78 static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, datalink_id_t); 79 static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *, 80 boolean_t *); 81 82 static void aggr_grp_capab_set(aggr_grp_t *); 83 static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *); 84 static uint_t aggr_grp_max_sdu(aggr_grp_t *); 85 static uint32_t aggr_grp_max_margin(aggr_grp_t *); 86 static boolean_t aggr_grp_sdu_check(aggr_grp_t *, aggr_port_t *); 87 static boolean_t aggr_grp_margin_check(aggr_grp_t *, aggr_port_t *); 88 89 static int aggr_add_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *); 90 static void aggr_rem_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *); 91 static int aggr_pseudo_disable_intr(mac_intr_handle_t); 92 static int aggr_pseudo_enable_intr(mac_intr_handle_t); 93 static int aggr_pseudo_start_ring(mac_ring_driver_t, uint64_t); 94 static void aggr_pseudo_stop_ring(mac_ring_driver_t); 95 static int aggr_addmac(void *, const uint8_t *); 96 static int aggr_remmac(void *, const uint8_t *); 97 static mblk_t *aggr_rx_poll(void *, int); 98 static void aggr_fill_ring(void *, mac_ring_type_t, const int, 99 const int, mac_ring_info_t *, mac_ring_handle_t); 100 static void aggr_fill_group(void *, mac_ring_type_t, const int, 101 mac_group_info_t *, mac_group_handle_t); 102 103 static kmem_cache_t *aggr_grp_cache; 104 static mod_hash_t *aggr_grp_hash; 105 static krwlock_t aggr_grp_lock; 106 static uint_t aggr_grp_cnt; 107 static id_space_t *key_ids; 108 109 #define GRP_HASHSZ 64 110 #define GRP_HASH_KEY(linkid) ((mod_hash_key_t)(uintptr_t)linkid) 111 #define AGGR_PORT_NAME_DELIMIT '-' 112 113 static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0}; 114 115 #define AGGR_M_CALLBACK_FLAGS \ 116 (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP) 117 118 static mac_callbacks_t aggr_m_callbacks = { 119 AGGR_M_CALLBACK_FLAGS, 120 aggr_m_stat, 121 aggr_m_start, 122 aggr_m_stop, 123 aggr_m_promisc, 124 aggr_m_multicst, 125 NULL, 126 aggr_m_tx, 127 aggr_m_ioctl, 128 aggr_m_capab_get, 129 NULL, 130 NULL, 131 aggr_m_setprop, 132 aggr_m_getprop 133 }; 134 135 /*ARGSUSED*/ 136 static int 137 aggr_grp_constructor(void *buf, void *arg, int kmflag) 138 { 139 aggr_grp_t *grp = buf; 140 141 bzero(grp, sizeof (*grp)); 142 mutex_init(&grp->lg_lacp_lock, NULL, MUTEX_DEFAULT, NULL); 143 cv_init(&grp->lg_lacp_cv, NULL, CV_DEFAULT, NULL); 144 rw_init(&grp->lg_tx_lock, NULL, RW_DRIVER, NULL); 145 mutex_init(&grp->lg_port_lock, NULL, MUTEX_DEFAULT, NULL); 146 cv_init(&grp->lg_port_cv, NULL, CV_DEFAULT, NULL); 147 grp->lg_link_state = LINK_STATE_UNKNOWN; 148 return (0); 149 } 150 151 /*ARGSUSED*/ 152 static void 153 aggr_grp_destructor(void *buf, void *arg) 154 { 155 aggr_grp_t *grp = buf; 156 157 if (grp->lg_tx_ports != NULL) { 158 kmem_free(grp->lg_tx_ports, 159 grp->lg_tx_ports_size * sizeof (aggr_port_t *)); 160 } 161 162 mutex_destroy(&grp->lg_lacp_lock); 163 cv_destroy(&grp->lg_lacp_cv); 164 mutex_destroy(&grp->lg_port_lock); 165 cv_destroy(&grp->lg_port_cv); 166 rw_destroy(&grp->lg_tx_lock); 167 } 168 169 void 170 aggr_grp_init(void) 171 { 172 aggr_grp_cache = kmem_cache_create("aggr_grp_cache", 173 sizeof (aggr_grp_t), 0, aggr_grp_constructor, 174 aggr_grp_destructor, NULL, NULL, NULL, 0); 175 176 aggr_grp_hash = mod_hash_create_idhash("aggr_grp_hash", 177 GRP_HASHSZ, mod_hash_null_valdtor); 178 rw_init(&aggr_grp_lock, NULL, RW_DEFAULT, NULL); 179 aggr_grp_cnt = 0; 180 181 /* 182 * Allocate an id space to manage key values (when key is not 183 * specified). The range of the id space will be from 184 * (AGGR_MAX_KEY + 1) to UINT16_MAX, because the LACP protocol 185 * uses a 16-bit key. 186 */ 187 key_ids = id_space_create("aggr_key_ids", AGGR_MAX_KEY + 1, UINT16_MAX); 188 ASSERT(key_ids != NULL); 189 } 190 191 void 192 aggr_grp_fini(void) 193 { 194 id_space_destroy(key_ids); 195 rw_destroy(&aggr_grp_lock); 196 mod_hash_destroy_idhash(aggr_grp_hash); 197 kmem_cache_destroy(aggr_grp_cache); 198 } 199 200 uint_t 201 aggr_grp_count(void) 202 { 203 uint_t count; 204 205 rw_enter(&aggr_grp_lock, RW_READER); 206 count = aggr_grp_cnt; 207 rw_exit(&aggr_grp_lock); 208 return (count); 209 } 210 211 /* 212 * Since both aggr_port_notify_cb() and aggr_port_timer_thread() functions 213 * requires the mac perimeter, this function holds a reference of the aggr 214 * and aggr won't call mac_unregister() until this reference drops to 0. 215 */ 216 void 217 aggr_grp_port_hold(aggr_port_t *port) 218 { 219 aggr_grp_t *grp = port->lp_grp; 220 221 AGGR_PORT_REFHOLD(port); 222 mutex_enter(&grp->lg_port_lock); 223 grp->lg_port_ref++; 224 mutex_exit(&grp->lg_port_lock); 225 } 226 227 /* 228 * Release the reference of the grp and inform aggr_grp_delete() calling 229 * mac_unregister() is now safe. 230 */ 231 void 232 aggr_grp_port_rele(aggr_port_t *port) 233 { 234 aggr_grp_t *grp = port->lp_grp; 235 236 mutex_enter(&grp->lg_port_lock); 237 if (--grp->lg_port_ref == 0) 238 cv_signal(&grp->lg_port_cv); 239 mutex_exit(&grp->lg_port_lock); 240 AGGR_PORT_REFRELE(port); 241 } 242 243 /* 244 * Wait for the port's lacp timer thread and the port's notification callback 245 * to exit. 246 */ 247 void 248 aggr_grp_port_wait(aggr_grp_t *grp) 249 { 250 mutex_enter(&grp->lg_port_lock); 251 if (grp->lg_port_ref != 0) 252 cv_wait(&grp->lg_port_cv, &grp->lg_port_lock); 253 mutex_exit(&grp->lg_port_lock); 254 } 255 256 /* 257 * Attach a port to a link aggregation group. 258 * 259 * A port is attached to a link aggregation group once its speed 260 * and link state have been verified. 261 * 262 * Returns B_TRUE if the group link state or speed has changed. If 263 * it's the case, the caller must notify the MAC layer via a call 264 * to mac_link(). 265 */ 266 boolean_t 267 aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port) 268 { 269 boolean_t link_state_changed = B_FALSE; 270 271 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 272 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 273 274 if (port->lp_state == AGGR_PORT_STATE_ATTACHED) 275 return (B_FALSE); 276 277 /* 278 * Validate the MAC port link speed and update the group 279 * link speed if needed. 280 */ 281 if (port->lp_ifspeed == 0 || 282 port->lp_link_state != LINK_STATE_UP || 283 port->lp_link_duplex != LINK_DUPLEX_FULL) { 284 /* 285 * Can't attach a MAC port with unknown link speed, 286 * down link, or not in full duplex mode. 287 */ 288 return (B_FALSE); 289 } 290 291 if (grp->lg_ifspeed == 0) { 292 /* 293 * The group inherits the speed of the first link being 294 * attached. 295 */ 296 grp->lg_ifspeed = port->lp_ifspeed; 297 link_state_changed = B_TRUE; 298 } else if (grp->lg_ifspeed != port->lp_ifspeed) { 299 /* 300 * The link speed of the MAC port must be the same as 301 * the group link speed, as per 802.3ad. Since it is 302 * not, the attach is cancelled. 303 */ 304 return (B_FALSE); 305 } 306 307 grp->lg_nattached_ports++; 308 309 /* 310 * Update the group link state. 311 */ 312 if (grp->lg_link_state != LINK_STATE_UP) { 313 grp->lg_link_state = LINK_STATE_UP; 314 grp->lg_link_duplex = LINK_DUPLEX_FULL; 315 link_state_changed = B_TRUE; 316 } 317 318 /* 319 * Update port's state. 320 */ 321 port->lp_state = AGGR_PORT_STATE_ATTACHED; 322 323 aggr_grp_multicst_port(port, B_TRUE); 324 325 /* 326 * Set port's receive callback 327 */ 328 mac_rx_set(port->lp_mch, aggr_recv_cb, port); 329 330 /* 331 * If LACP is OFF, the port can be used to send data as soon 332 * as its link is up and verified to be compatible with the 333 * aggregation. 334 * 335 * If LACP is active or passive, notify the LACP subsystem, which 336 * will enable sending on the port following the LACP protocol. 337 */ 338 if (grp->lg_lacp_mode == AGGR_LACP_OFF) 339 aggr_send_port_enable(port); 340 else 341 aggr_lacp_port_attached(port); 342 343 return (link_state_changed); 344 } 345 346 boolean_t 347 aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port) 348 { 349 boolean_t link_state_changed = B_FALSE; 350 351 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 352 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 353 354 /* update state */ 355 if (port->lp_state != AGGR_PORT_STATE_ATTACHED) 356 return (B_FALSE); 357 358 mac_rx_clear(port->lp_mch); 359 360 aggr_grp_multicst_port(port, B_FALSE); 361 362 if (grp->lg_lacp_mode == AGGR_LACP_OFF) 363 aggr_send_port_disable(port); 364 else 365 aggr_lacp_port_detached(port); 366 367 port->lp_state = AGGR_PORT_STATE_STANDBY; 368 369 grp->lg_nattached_ports--; 370 if (grp->lg_nattached_ports == 0) { 371 /* the last attached MAC port of the group is being detached */ 372 grp->lg_ifspeed = 0; 373 grp->lg_link_state = LINK_STATE_DOWN; 374 grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN; 375 link_state_changed = B_TRUE; 376 } 377 378 return (link_state_changed); 379 } 380 381 /* 382 * Update the MAC addresses of the constituent ports of the specified 383 * group. This function is invoked: 384 * - after creating a new aggregation group. 385 * - after adding new ports to an aggregation group. 386 * - after removing a port from a group when the MAC address of 387 * that port was used for the MAC address of the group. 388 * - after the MAC address of a port changed when the MAC address 389 * of that port was used for the MAC address of the group. 390 * 391 * Return true if the link state of the aggregation changed, for example 392 * as a result of a failure changing the MAC address of one of the 393 * constituent ports. 394 */ 395 boolean_t 396 aggr_grp_update_ports_mac(aggr_grp_t *grp) 397 { 398 aggr_port_t *cport; 399 boolean_t link_state_changed = B_FALSE; 400 mac_perim_handle_t mph; 401 402 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 403 404 for (cport = grp->lg_ports; cport != NULL; 405 cport = cport->lp_next) { 406 mac_perim_enter_by_mh(cport->lp_mh, &mph); 407 if (aggr_port_unicst(cport) != 0) { 408 if (aggr_grp_detach_port(grp, cport)) 409 link_state_changed = B_TRUE; 410 } else { 411 /* 412 * If a port was detached because of a previous 413 * failure changing the MAC address, the port is 414 * reattached when it successfully changes the MAC 415 * address now, and this might cause the link state 416 * of the aggregation to change. 417 */ 418 if (aggr_grp_attach_port(grp, cport)) 419 link_state_changed = B_TRUE; 420 } 421 mac_perim_exit(mph); 422 } 423 return (link_state_changed); 424 } 425 426 /* 427 * Invoked when the MAC address of a port has changed. If the port's 428 * MAC address was used for the group MAC address, set mac_addr_changedp 429 * to B_TRUE to indicate to the caller that it should send a MAC_NOTE_UNICST 430 * notification. If the link state changes due to detach/attach of 431 * the constituent port, set link_state_changedp to B_TRUE to indicate 432 * to the caller that it should send a MAC_NOTE_LINK notification. In both 433 * cases, it is the responsibility of the caller to invoke notification 434 * functions after releasing the the port lock. 435 */ 436 void 437 aggr_grp_port_mac_changed(aggr_grp_t *grp, aggr_port_t *port, 438 boolean_t *mac_addr_changedp, boolean_t *link_state_changedp) 439 { 440 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 441 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 442 ASSERT(mac_addr_changedp != NULL); 443 ASSERT(link_state_changedp != NULL); 444 445 *mac_addr_changedp = B_FALSE; 446 *link_state_changedp = B_FALSE; 447 448 if (grp->lg_addr_fixed) { 449 /* 450 * The group is using a fixed MAC address or an automatic 451 * MAC address has not been set. 452 */ 453 return; 454 } 455 456 if (grp->lg_mac_addr_port == port) { 457 /* 458 * The MAC address of the port was assigned to the group 459 * MAC address. Update the group MAC address. 460 */ 461 bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL); 462 *mac_addr_changedp = B_TRUE; 463 } else { 464 /* 465 * Update the actual port MAC address to the MAC address 466 * of the group. 467 */ 468 if (aggr_port_unicst(port) != 0) { 469 *link_state_changedp = aggr_grp_detach_port(grp, port); 470 } else { 471 /* 472 * If a port was detached because of a previous 473 * failure changing the MAC address, the port is 474 * reattached when it successfully changes the MAC 475 * address now, and this might cause the link state 476 * of the aggregation to change. 477 */ 478 *link_state_changedp = aggr_grp_attach_port(grp, port); 479 } 480 } 481 } 482 483 /* 484 * Add a port to a link aggregation group. 485 */ 486 static int 487 aggr_grp_add_port(aggr_grp_t *grp, datalink_id_t port_linkid, boolean_t force, 488 aggr_port_t **pp) 489 { 490 aggr_port_t *port, **cport; 491 mac_perim_handle_t mph; 492 zoneid_t port_zoneid = ALL_ZONES; 493 int err; 494 495 /* The port must be int the same zone as the aggregation. */ 496 if (zone_check_datalink(&port_zoneid, port_linkid) != 0) 497 port_zoneid = GLOBAL_ZONEID; 498 if (grp->lg_zoneid != port_zoneid) 499 return (EBUSY); 500 501 /* 502 * lg_mh could be NULL when the function is called during the creation 503 * of the aggregation. 504 */ 505 ASSERT(grp->lg_mh == NULL || MAC_PERIM_HELD(grp->lg_mh)); 506 507 /* create new port */ 508 err = aggr_port_create(grp, port_linkid, force, &port); 509 if (err != 0) 510 return (err); 511 512 mac_perim_enter_by_mh(port->lp_mh, &mph); 513 514 /* add port to list of group constituent ports */ 515 cport = &grp->lg_ports; 516 while (*cport != NULL) 517 cport = &((*cport)->lp_next); 518 *cport = port; 519 520 /* 521 * Back reference to the group it is member of. A port always 522 * holds a reference to its group to ensure that the back 523 * reference is always valid. 524 */ 525 port->lp_grp = grp; 526 AGGR_GRP_REFHOLD(grp); 527 grp->lg_nports++; 528 529 aggr_lacp_init_port(port); 530 mac_perim_exit(mph); 531 532 if (pp != NULL) 533 *pp = port; 534 535 return (0); 536 } 537 538 /* 539 * Add a pseudo Rx ring for the given HW ring handle. 540 */ 541 static int 542 aggr_add_pseudo_rx_ring(aggr_port_t *port, 543 aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh) 544 { 545 aggr_pseudo_rx_ring_t *ring; 546 int err; 547 int j; 548 549 for (j = 0; j < MAX_RINGS_PER_GROUP; j++) { 550 ring = rx_grp->arg_rings + j; 551 if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE)) 552 break; 553 } 554 555 /* 556 * No slot for this new Rx ring. 557 */ 558 if (j == MAX_RINGS_PER_GROUP) 559 return (EIO); 560 561 ring->arr_flags |= MAC_PSEUDO_RING_INUSE; 562 ring->arr_hw_rh = hw_rh; 563 ring->arr_port = port; 564 rx_grp->arg_ring_cnt++; 565 566 /* 567 * The group is already registered, dynamically add a new ring to the 568 * mac group. 569 */ 570 mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring); 571 if ((err = mac_group_add_ring(rx_grp->arg_gh, j)) != 0) { 572 ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE; 573 ring->arr_hw_rh = NULL; 574 ring->arr_port = NULL; 575 rx_grp->arg_ring_cnt--; 576 mac_hwring_teardown(hw_rh); 577 } 578 return (err); 579 } 580 581 /* 582 * Remove the pseudo Rx ring of the given HW ring handle. 583 */ 584 static void 585 aggr_rem_pseudo_rx_ring(aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh) 586 { 587 aggr_pseudo_rx_ring_t *ring; 588 int j; 589 590 for (j = 0; j < MAX_RINGS_PER_GROUP; j++) { 591 ring = rx_grp->arg_rings + j; 592 if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE) || 593 ring->arr_hw_rh != hw_rh) { 594 continue; 595 } 596 597 mac_group_rem_ring(rx_grp->arg_gh, ring->arr_rh); 598 599 ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE; 600 ring->arr_hw_rh = NULL; 601 ring->arr_port = NULL; 602 rx_grp->arg_ring_cnt--; 603 mac_hwring_teardown(hw_rh); 604 break; 605 } 606 } 607 608 /* 609 * This function is called to create pseudo rings over the hardware rings of 610 * the underlying device. Note that there is a 1:1 mapping between the pseudo 611 * RX rings of the aggr and the hardware rings of the underlying port. 612 */ 613 static int 614 aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) 615 { 616 aggr_grp_t *grp = port->lp_grp; 617 mac_ring_handle_t hw_rh[MAX_RINGS_PER_GROUP]; 618 aggr_unicst_addr_t *addr, *a; 619 mac_perim_handle_t pmph; 620 int hw_rh_cnt, i = 0, j; 621 int err = 0; 622 623 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 624 mac_perim_enter_by_mh(port->lp_mh, &pmph); 625 626 /* 627 * This function must be called after the aggr registers its mac 628 * and its RX group has been initialized. 629 */ 630 ASSERT(rx_grp->arg_gh != NULL); 631 632 /* 633 * Get the list the the underlying HW rings. 634 */ 635 hw_rh_cnt = mac_hwrings_get(port->lp_mch, &port->lp_hwgh, hw_rh, 636 MAC_RING_TYPE_RX); 637 638 if (port->lp_hwgh != NULL) { 639 /* 640 * Quiesce the HW ring and the mac srs on the ring. Note 641 * that the HW ring will be restarted when the pseudo ring 642 * is started. At that time all the packets will be 643 * directly passed up to the pseudo RX ring and handled 644 * by mac srs created over the pseudo RX ring. 645 */ 646 mac_rx_client_quiesce(port->lp_mch); 647 mac_srs_perm_quiesce(port->lp_mch, B_TRUE); 648 } 649 650 /* 651 * Add all the unicast addresses to the newly added port. 652 */ 653 for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next) { 654 if ((err = aggr_port_addmac(port, addr->aua_addr)) != 0) 655 break; 656 } 657 658 for (i = 0; err == 0 && i < hw_rh_cnt; i++) 659 err = aggr_add_pseudo_rx_ring(port, rx_grp, hw_rh[i]); 660 661 if (err != 0) { 662 for (j = 0; j < i; j++) 663 aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[j]); 664 665 for (a = rx_grp->arg_macaddr; a != addr; a = a->aua_next) 666 aggr_port_remmac(port, a->aua_addr); 667 668 if (port->lp_hwgh != NULL) { 669 mac_srs_perm_quiesce(port->lp_mch, B_FALSE); 670 mac_rx_client_restart(port->lp_mch); 671 port->lp_hwgh = NULL; 672 } 673 } else { 674 port->lp_grp_added = B_TRUE; 675 } 676 done: 677 mac_perim_exit(pmph); 678 return (err); 679 } 680 681 /* 682 * This function is called by aggr to remove pseudo RX rings over the 683 * HW rings of the underlying port. 684 */ 685 static void 686 aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) 687 { 688 aggr_grp_t *grp = port->lp_grp; 689 mac_ring_handle_t hw_rh[MAX_RINGS_PER_GROUP]; 690 aggr_unicst_addr_t *addr; 691 mac_group_handle_t hwgh; 692 mac_perim_handle_t pmph; 693 int hw_rh_cnt, i; 694 695 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 696 mac_perim_enter_by_mh(port->lp_mh, &pmph); 697 698 if (!port->lp_grp_added) 699 goto done; 700 701 ASSERT(rx_grp->arg_gh != NULL); 702 hw_rh_cnt = mac_hwrings_get(port->lp_mch, &hwgh, hw_rh, 703 MAC_RING_TYPE_RX); 704 705 /* 706 * If hw_rh_cnt is 0, it means that the underlying port does not 707 * support RX rings. Directly return in this case. 708 */ 709 for (i = 0; i < hw_rh_cnt; i++) 710 aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[i]); 711 712 for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next) 713 aggr_port_remmac(port, addr->aua_addr); 714 715 if (port->lp_hwgh != NULL) { 716 port->lp_hwgh = NULL; 717 718 /* 719 * First clear the permanent-quiesced flag of the RX srs then 720 * restart the HW ring and the mac srs on the ring. Note that 721 * the HW ring and associated SRS will soon been removed when 722 * the port is removed from the aggr. 723 */ 724 mac_srs_perm_quiesce(port->lp_mch, B_FALSE); 725 mac_rx_client_restart(port->lp_mch); 726 } 727 728 port->lp_grp_added = B_FALSE; 729 done: 730 mac_perim_exit(pmph); 731 } 732 733 static int 734 aggr_pseudo_disable_intr(mac_intr_handle_t ih) 735 { 736 aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih; 737 return (mac_hwring_disable_intr(rr_ring->arr_hw_rh)); 738 } 739 740 static int 741 aggr_pseudo_enable_intr(mac_intr_handle_t ih) 742 { 743 aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih; 744 return (mac_hwring_enable_intr(rr_ring->arr_hw_rh)); 745 } 746 747 static int 748 aggr_pseudo_start_ring(mac_ring_driver_t arg, uint64_t mr_gen) 749 { 750 aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)arg; 751 int err; 752 753 err = mac_hwring_start(rr_ring->arr_hw_rh); 754 if (err == 0) 755 rr_ring->arr_gen = mr_gen; 756 return (err); 757 } 758 759 static void 760 aggr_pseudo_stop_ring(mac_ring_driver_t arg) 761 { 762 aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)arg; 763 mac_hwring_stop(rr_ring->arr_hw_rh); 764 } 765 766 /* 767 * Add one or more ports to an existing link aggregation group. 768 */ 769 int 770 aggr_grp_add_ports(datalink_id_t linkid, uint_t nports, boolean_t force, 771 laioc_port_t *ports) 772 { 773 int rc, i, nadded = 0; 774 aggr_grp_t *grp = NULL; 775 aggr_port_t *port; 776 boolean_t link_state_changed = B_FALSE; 777 mac_perim_handle_t mph, pmph; 778 779 /* get group corresponding to linkid */ 780 rw_enter(&aggr_grp_lock, RW_READER); 781 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), 782 (mod_hash_val_t *)&grp) != 0) { 783 rw_exit(&aggr_grp_lock); 784 return (ENOENT); 785 } 786 AGGR_GRP_REFHOLD(grp); 787 788 /* 789 * Hold the perimeter so that the aggregation won't be destroyed. 790 */ 791 mac_perim_enter_by_mh(grp->lg_mh, &mph); 792 rw_exit(&aggr_grp_lock); 793 794 /* add the specified ports to group */ 795 for (i = 0; i < nports; i++) { 796 /* add port to group */ 797 if ((rc = aggr_grp_add_port(grp, ports[i].lp_linkid, 798 force, &port)) != 0) { 799 goto bail; 800 } 801 ASSERT(port != NULL); 802 nadded++; 803 804 /* check capabilities */ 805 if (!aggr_grp_capab_check(grp, port) || 806 !aggr_grp_sdu_check(grp, port) || 807 !aggr_grp_margin_check(grp, port)) { 808 rc = ENOTSUP; 809 goto bail; 810 } 811 812 /* 813 * Create the pseudo ring for each HW ring of the underlying 814 * port. 815 */ 816 rc = aggr_add_pseudo_rx_group(port, &grp->lg_rx_group); 817 if (rc != 0) 818 goto bail; 819 820 mac_perim_enter_by_mh(port->lp_mh, &pmph); 821 822 /* set LACP mode */ 823 aggr_port_lacp_set_mode(grp, port); 824 825 /* start port if group has already been started */ 826 if (grp->lg_started) { 827 rc = aggr_port_start(port); 828 if (rc != 0) { 829 mac_perim_exit(pmph); 830 goto bail; 831 } 832 833 /* 834 * Turn on the promiscuous mode over the port when it 835 * is requested to be turned on to receive the 836 * non-primary address over a port, or the promiscous 837 * mode is enabled over the aggr. 838 */ 839 if (grp->lg_promisc || port->lp_prom_addr != NULL) { 840 rc = aggr_port_promisc(port, B_TRUE); 841 if (rc != 0) { 842 mac_perim_exit(pmph); 843 goto bail; 844 } 845 } 846 } 847 mac_perim_exit(pmph); 848 849 /* 850 * Attach each port if necessary. 851 */ 852 if (aggr_port_notify_link(grp, port)) 853 link_state_changed = B_TRUE; 854 855 /* 856 * Initialize the callback functions for this port. 857 */ 858 aggr_port_init_callbacks(port); 859 } 860 861 /* update the MAC address of the constituent ports */ 862 if (aggr_grp_update_ports_mac(grp)) 863 link_state_changed = B_TRUE; 864 865 if (link_state_changed) 866 mac_link_update(grp->lg_mh, grp->lg_link_state); 867 868 bail: 869 if (rc != 0) { 870 /* stop and remove ports that have been added */ 871 for (i = 0; i < nadded; i++) { 872 port = aggr_grp_port_lookup(grp, ports[i].lp_linkid); 873 ASSERT(port != NULL); 874 if (grp->lg_started) { 875 mac_perim_enter_by_mh(port->lp_mh, &pmph); 876 (void) aggr_port_promisc(port, B_FALSE); 877 aggr_port_stop(port); 878 mac_perim_exit(pmph); 879 } 880 aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group); 881 (void) aggr_grp_rem_port(grp, port, NULL, NULL); 882 } 883 } 884 885 mac_perim_exit(mph); 886 AGGR_GRP_REFRELE(grp); 887 return (rc); 888 } 889 890 static int 891 aggr_grp_modify_common(aggr_grp_t *grp, uint8_t update_mask, uint32_t policy, 892 boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, 893 aggr_lacp_timer_t lacp_timer) 894 { 895 boolean_t mac_addr_changed = B_FALSE; 896 boolean_t link_state_changed = B_FALSE; 897 mac_perim_handle_t pmph; 898 899 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 900 901 /* validate fixed address if specified */ 902 if ((update_mask & AGGR_MODIFY_MAC) && mac_fixed && 903 ((bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) || 904 (mac_addr[0] & 0x01))) { 905 return (EINVAL); 906 } 907 908 /* update policy if requested */ 909 if (update_mask & AGGR_MODIFY_POLICY) 910 aggr_send_update_policy(grp, policy); 911 912 /* update unicast MAC address if requested */ 913 if (update_mask & AGGR_MODIFY_MAC) { 914 if (mac_fixed) { 915 /* user-supplied MAC address */ 916 grp->lg_mac_addr_port = NULL; 917 if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) != 0) { 918 bcopy(mac_addr, grp->lg_addr, ETHERADDRL); 919 mac_addr_changed = B_TRUE; 920 } 921 } else if (grp->lg_addr_fixed) { 922 /* switch from user-supplied to automatic */ 923 aggr_port_t *port = grp->lg_ports; 924 925 mac_perim_enter_by_mh(port->lp_mh, &pmph); 926 bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL); 927 grp->lg_mac_addr_port = port; 928 mac_addr_changed = B_TRUE; 929 mac_perim_exit(pmph); 930 } 931 grp->lg_addr_fixed = mac_fixed; 932 } 933 934 if (mac_addr_changed) 935 link_state_changed = aggr_grp_update_ports_mac(grp); 936 937 if (update_mask & AGGR_MODIFY_LACP_MODE) 938 aggr_lacp_update_mode(grp, lacp_mode); 939 940 if (update_mask & AGGR_MODIFY_LACP_TIMER) 941 aggr_lacp_update_timer(grp, lacp_timer); 942 943 if (link_state_changed) 944 mac_link_update(grp->lg_mh, grp->lg_link_state); 945 946 if (mac_addr_changed) 947 mac_unicst_update(grp->lg_mh, grp->lg_addr); 948 949 return (0); 950 } 951 952 /* 953 * Update properties of an existing link aggregation group. 954 */ 955 int 956 aggr_grp_modify(datalink_id_t linkid, uint8_t update_mask, uint32_t policy, 957 boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, 958 aggr_lacp_timer_t lacp_timer) 959 { 960 aggr_grp_t *grp = NULL; 961 mac_perim_handle_t mph; 962 int err; 963 964 /* get group corresponding to linkid */ 965 rw_enter(&aggr_grp_lock, RW_READER); 966 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), 967 (mod_hash_val_t *)&grp) != 0) { 968 rw_exit(&aggr_grp_lock); 969 return (ENOENT); 970 } 971 AGGR_GRP_REFHOLD(grp); 972 973 /* 974 * Hold the perimeter so that the aggregation won't be destroyed. 975 */ 976 mac_perim_enter_by_mh(grp->lg_mh, &mph); 977 rw_exit(&aggr_grp_lock); 978 979 err = aggr_grp_modify_common(grp, update_mask, policy, mac_fixed, 980 mac_addr, lacp_mode, lacp_timer); 981 982 mac_perim_exit(mph); 983 AGGR_GRP_REFRELE(grp); 984 return (err); 985 } 986 987 /* 988 * Create a new link aggregation group upon request from administrator. 989 * Returns 0 on success, an errno on failure. 990 */ 991 int 992 aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports, 993 laioc_port_t *ports, uint32_t policy, boolean_t mac_fixed, boolean_t force, 994 uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer, 995 cred_t *credp) 996 { 997 aggr_grp_t *grp = NULL; 998 aggr_port_t *port; 999 mac_register_t *mac; 1000 boolean_t link_state_changed; 1001 mac_perim_handle_t mph; 1002 int err; 1003 int i; 1004 1005 /* need at least one port */ 1006 if (nports == 0) 1007 return (EINVAL); 1008 1009 rw_enter(&aggr_grp_lock, RW_WRITER); 1010 1011 /* does a group with the same linkid already exist? */ 1012 err = mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), 1013 (mod_hash_val_t *)&grp); 1014 if (err == 0) { 1015 rw_exit(&aggr_grp_lock); 1016 return (EEXIST); 1017 } 1018 1019 grp = kmem_cache_alloc(aggr_grp_cache, KM_SLEEP); 1020 1021 grp->lg_refs = 1; 1022 grp->lg_closing = B_FALSE; 1023 grp->lg_force = force; 1024 grp->lg_linkid = linkid; 1025 grp->lg_zoneid = crgetzoneid(credp); 1026 grp->lg_ifspeed = 0; 1027 grp->lg_link_state = LINK_STATE_UNKNOWN; 1028 grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN; 1029 grp->lg_started = B_FALSE; 1030 grp->lg_promisc = B_FALSE; 1031 grp->lg_lacp_done = B_FALSE; 1032 grp->lg_lacp_head = grp->lg_lacp_tail = NULL; 1033 grp->lg_lacp_rx_thread = thread_create(NULL, 0, 1034 aggr_lacp_rx_thread, grp, 0, &p0, TS_RUN, minclsyspri); 1035 bzero(&grp->lg_rx_group, sizeof (aggr_pseudo_rx_group_t)); 1036 aggr_lacp_init_grp(grp); 1037 1038 /* add MAC ports to group */ 1039 grp->lg_ports = NULL; 1040 grp->lg_nports = 0; 1041 grp->lg_nattached_ports = 0; 1042 grp->lg_ntx_ports = 0; 1043 1044 /* 1045 * If key is not specified by the user, allocate the key. 1046 */ 1047 if ((key == 0) && ((key = (uint32_t)id_alloc(key_ids)) == 0)) { 1048 err = ENOMEM; 1049 goto bail; 1050 } 1051 grp->lg_key = key; 1052 1053 for (i = 0; i < nports; i++) { 1054 err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, NULL); 1055 if (err != 0) 1056 goto bail; 1057 } 1058 1059 /* 1060 * If no explicit MAC address was specified by the administrator, 1061 * set it to the MAC address of the first port. 1062 */ 1063 grp->lg_addr_fixed = mac_fixed; 1064 if (grp->lg_addr_fixed) { 1065 /* validate specified address */ 1066 if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) { 1067 err = EINVAL; 1068 goto bail; 1069 } 1070 bcopy(mac_addr, grp->lg_addr, ETHERADDRL); 1071 } else { 1072 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL); 1073 grp->lg_mac_addr_port = grp->lg_ports; 1074 } 1075 1076 /* set the initial group capabilities */ 1077 aggr_grp_capab_set(grp); 1078 1079 if ((mac = mac_alloc(MAC_VERSION)) == NULL) { 1080 err = ENOMEM; 1081 goto bail; 1082 } 1083 mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 1084 mac->m_driver = grp; 1085 mac->m_dip = aggr_dip; 1086 mac->m_instance = grp->lg_key > AGGR_MAX_KEY ? (uint_t)-1 : grp->lg_key; 1087 mac->m_src_addr = grp->lg_addr; 1088 mac->m_callbacks = &aggr_m_callbacks; 1089 mac->m_min_sdu = 0; 1090 mac->m_max_sdu = grp->lg_max_sdu = aggr_grp_max_sdu(grp); 1091 mac->m_margin = aggr_grp_max_margin(grp); 1092 mac->m_v12n = MAC_VIRT_LEVEL1; 1093 err = mac_register(mac, &grp->lg_mh); 1094 mac_free(mac); 1095 if (err != 0) 1096 goto bail; 1097 1098 err = dls_devnet_create(grp->lg_mh, grp->lg_linkid, crgetzoneid(credp)); 1099 if (err != 0) { 1100 (void) mac_unregister(grp->lg_mh); 1101 grp->lg_mh = NULL; 1102 goto bail; 1103 } 1104 1105 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1106 1107 /* 1108 * Update the MAC address of the constituent ports. 1109 * None of the port is attached at this time, the link state of the 1110 * aggregation will not change. 1111 */ 1112 link_state_changed = aggr_grp_update_ports_mac(grp); 1113 ASSERT(!link_state_changed); 1114 1115 /* update outbound load balancing policy */ 1116 aggr_send_update_policy(grp, policy); 1117 1118 /* set LACP mode */ 1119 aggr_lacp_set_mode(grp, lacp_mode, lacp_timer); 1120 1121 /* 1122 * Attach each port if necessary. 1123 */ 1124 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1125 /* 1126 * Create the pseudo ring for each HW ring of the underlying 1127 * port. Note that this is done after the aggr registers the 1128 * mac. 1129 */ 1130 VERIFY(aggr_add_pseudo_rx_group(port, &grp->lg_rx_group) == 0); 1131 if (aggr_port_notify_link(grp, port)) 1132 link_state_changed = B_TRUE; 1133 1134 /* 1135 * Initialize the callback functions for this port. 1136 */ 1137 aggr_port_init_callbacks(port); 1138 } 1139 1140 if (link_state_changed) 1141 mac_link_update(grp->lg_mh, grp->lg_link_state); 1142 1143 /* add new group to hash table */ 1144 err = mod_hash_insert(aggr_grp_hash, GRP_HASH_KEY(linkid), 1145 (mod_hash_val_t)grp); 1146 ASSERT(err == 0); 1147 aggr_grp_cnt++; 1148 1149 mac_perim_exit(mph); 1150 rw_exit(&aggr_grp_lock); 1151 return (0); 1152 1153 bail: 1154 1155 grp->lg_closing = B_TRUE; 1156 1157 port = grp->lg_ports; 1158 while (port != NULL) { 1159 aggr_port_t *cport; 1160 1161 cport = port->lp_next; 1162 aggr_port_delete(port); 1163 port = cport; 1164 } 1165 1166 /* 1167 * Inform the lacp_rx thread to exit. 1168 */ 1169 mutex_enter(&grp->lg_lacp_lock); 1170 grp->lg_lacp_done = B_TRUE; 1171 cv_signal(&grp->lg_lacp_cv); 1172 while (grp->lg_lacp_rx_thread != NULL) 1173 cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock); 1174 mutex_exit(&grp->lg_lacp_lock); 1175 1176 rw_exit(&aggr_grp_lock); 1177 AGGR_GRP_REFRELE(grp); 1178 return (err); 1179 } 1180 1181 /* 1182 * Return a pointer to the member of a group with specified linkid. 1183 */ 1184 static aggr_port_t * 1185 aggr_grp_port_lookup(aggr_grp_t *grp, datalink_id_t linkid) 1186 { 1187 aggr_port_t *port; 1188 1189 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 1190 1191 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1192 if (port->lp_linkid == linkid) 1193 break; 1194 } 1195 1196 return (port); 1197 } 1198 1199 /* 1200 * Stop, detach and remove a port from a link aggregation group. 1201 */ 1202 static int 1203 aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port, 1204 boolean_t *mac_addr_changedp, boolean_t *link_state_changedp) 1205 { 1206 int rc = 0; 1207 aggr_port_t **pport; 1208 boolean_t mac_addr_changed = B_FALSE; 1209 boolean_t link_state_changed = B_FALSE; 1210 mac_perim_handle_t mph; 1211 uint64_t val; 1212 uint_t i; 1213 uint_t stat; 1214 1215 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 1216 ASSERT(grp->lg_nports > 1); 1217 ASSERT(!grp->lg_closing); 1218 1219 /* unlink port */ 1220 for (pport = &grp->lg_ports; *pport != port; 1221 pport = &(*pport)->lp_next) { 1222 if (*pport == NULL) { 1223 rc = ENOENT; 1224 goto done; 1225 } 1226 } 1227 *pport = port->lp_next; 1228 1229 mac_perim_enter_by_mh(port->lp_mh, &mph); 1230 1231 /* 1232 * If the MAC address of the port being removed was assigned 1233 * to the group, update the group MAC address 1234 * using the MAC address of a different port. 1235 */ 1236 if (!grp->lg_addr_fixed && grp->lg_mac_addr_port == port) { 1237 /* 1238 * Set the MAC address of the group to the 1239 * MAC address of its first port. 1240 */ 1241 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL); 1242 grp->lg_mac_addr_port = grp->lg_ports; 1243 mac_addr_changed = B_TRUE; 1244 } 1245 1246 link_state_changed = aggr_grp_detach_port(grp, port); 1247 1248 /* 1249 * Add the counter statistics of the ports while it was aggregated 1250 * to the group's residual statistics. This is done by obtaining 1251 * the current counter from the underlying MAC then subtracting the 1252 * value of the counter at the moment it was added to the 1253 * aggregation. 1254 */ 1255 for (i = 0; i < MAC_NSTAT; i++) { 1256 stat = i + MAC_STAT_MIN; 1257 if (!MAC_STAT_ISACOUNTER(stat)) 1258 continue; 1259 val = aggr_port_stat(port, stat); 1260 val -= port->lp_stat[i]; 1261 grp->lg_stat[i] += val; 1262 } 1263 for (i = 0; i < ETHER_NSTAT; i++) { 1264 stat = i + MACTYPE_STAT_MIN; 1265 if (!ETHER_STAT_ISACOUNTER(stat)) 1266 continue; 1267 val = aggr_port_stat(port, stat); 1268 val -= port->lp_ether_stat[i]; 1269 grp->lg_ether_stat[i] += val; 1270 } 1271 1272 grp->lg_nports--; 1273 mac_perim_exit(mph); 1274 1275 aggr_port_delete(port); 1276 1277 /* 1278 * If the group MAC address has changed, update the MAC address of 1279 * the remaining constituent ports according to the new MAC 1280 * address of the group. 1281 */ 1282 if (mac_addr_changed && aggr_grp_update_ports_mac(grp)) 1283 link_state_changed = B_TRUE; 1284 1285 done: 1286 if (mac_addr_changedp != NULL) 1287 *mac_addr_changedp = mac_addr_changed; 1288 if (link_state_changedp != NULL) 1289 *link_state_changedp = link_state_changed; 1290 1291 return (rc); 1292 } 1293 1294 /* 1295 * Remove one or more ports from an existing link aggregation group. 1296 */ 1297 int 1298 aggr_grp_rem_ports(datalink_id_t linkid, uint_t nports, laioc_port_t *ports) 1299 { 1300 int rc = 0, i; 1301 aggr_grp_t *grp = NULL; 1302 aggr_port_t *port; 1303 boolean_t mac_addr_update = B_FALSE, mac_addr_changed; 1304 boolean_t link_state_update = B_FALSE, link_state_changed; 1305 mac_perim_handle_t mph, pmph; 1306 1307 /* get group corresponding to linkid */ 1308 rw_enter(&aggr_grp_lock, RW_READER); 1309 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), 1310 (mod_hash_val_t *)&grp) != 0) { 1311 rw_exit(&aggr_grp_lock); 1312 return (ENOENT); 1313 } 1314 AGGR_GRP_REFHOLD(grp); 1315 1316 /* 1317 * Hold the perimeter so that the aggregation won't be destroyed. 1318 */ 1319 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1320 rw_exit(&aggr_grp_lock); 1321 1322 /* we need to keep at least one port per group */ 1323 if (nports >= grp->lg_nports) { 1324 rc = EINVAL; 1325 goto bail; 1326 } 1327 1328 /* first verify that all the groups are valid */ 1329 for (i = 0; i < nports; i++) { 1330 if (aggr_grp_port_lookup(grp, ports[i].lp_linkid) == NULL) { 1331 /* port not found */ 1332 rc = ENOENT; 1333 goto bail; 1334 } 1335 } 1336 1337 /* clear the promiscous mode for the specified ports */ 1338 for (i = 0; i < nports && rc == 0; i++) { 1339 /* lookup port */ 1340 port = aggr_grp_port_lookup(grp, ports[i].lp_linkid); 1341 ASSERT(port != NULL); 1342 1343 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1344 rc = aggr_port_promisc(port, B_FALSE); 1345 mac_perim_exit(pmph); 1346 } 1347 if (rc != 0) { 1348 for (i = 0; i < nports; i++) { 1349 port = aggr_grp_port_lookup(grp, 1350 ports[i].lp_linkid); 1351 ASSERT(port != NULL); 1352 1353 /* 1354 * Turn the promiscuous mode back on if it is required 1355 * to receive the non-primary address over a port, or 1356 * the promiscous mode is enabled over the aggr. 1357 */ 1358 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1359 if (port->lp_started && (grp->lg_promisc || 1360 port->lp_prom_addr != NULL)) { 1361 (void) aggr_port_promisc(port, B_TRUE); 1362 } 1363 mac_perim_exit(pmph); 1364 } 1365 goto bail; 1366 } 1367 1368 /* remove the specified ports from group */ 1369 for (i = 0; i < nports; i++) { 1370 /* lookup port */ 1371 port = aggr_grp_port_lookup(grp, ports[i].lp_linkid); 1372 ASSERT(port != NULL); 1373 1374 /* stop port if group has already been started */ 1375 if (grp->lg_started) { 1376 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1377 aggr_port_stop(port); 1378 mac_perim_exit(pmph); 1379 } 1380 1381 aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group); 1382 /* remove port from group */ 1383 rc = aggr_grp_rem_port(grp, port, &mac_addr_changed, 1384 &link_state_changed); 1385 ASSERT(rc == 0); 1386 mac_addr_update = mac_addr_update || mac_addr_changed; 1387 link_state_update = link_state_update || link_state_changed; 1388 } 1389 1390 bail: 1391 if (mac_addr_update) 1392 mac_unicst_update(grp->lg_mh, grp->lg_addr); 1393 if (link_state_update) 1394 mac_link_update(grp->lg_mh, grp->lg_link_state); 1395 1396 mac_perim_exit(mph); 1397 AGGR_GRP_REFRELE(grp); 1398 1399 return (rc); 1400 } 1401 1402 int 1403 aggr_grp_delete(datalink_id_t linkid, cred_t *cred) 1404 { 1405 aggr_grp_t *grp = NULL; 1406 aggr_port_t *port, *cport; 1407 datalink_id_t tmpid; 1408 mod_hash_val_t val; 1409 mac_perim_handle_t mph, pmph; 1410 int err; 1411 1412 rw_enter(&aggr_grp_lock, RW_WRITER); 1413 1414 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), 1415 (mod_hash_val_t *)&grp) != 0) { 1416 rw_exit(&aggr_grp_lock); 1417 return (ENOENT); 1418 } 1419 1420 /* 1421 * Note that dls_devnet_destroy() must be called before lg_lock is 1422 * held. Otherwise, it will deadlock if another thread is in 1423 * aggr_m_stat() and thus has a kstat_hold() on the kstats that 1424 * dls_devnet_destroy() needs to delete. 1425 */ 1426 if ((err = dls_devnet_destroy(grp->lg_mh, &tmpid, B_TRUE)) != 0) { 1427 rw_exit(&aggr_grp_lock); 1428 return (err); 1429 } 1430 ASSERT(linkid == tmpid); 1431 1432 /* 1433 * Unregister from the MAC service module. Since this can 1434 * fail if a client hasn't closed the MAC port, we gracefully 1435 * fail the operation. 1436 */ 1437 if ((err = mac_disable(grp->lg_mh)) != 0) { 1438 (void) dls_devnet_create(grp->lg_mh, linkid, crgetzoneid(cred)); 1439 rw_exit(&aggr_grp_lock); 1440 return (err); 1441 } 1442 (void) mod_hash_remove(aggr_grp_hash, GRP_HASH_KEY(linkid), &val); 1443 ASSERT(grp == (aggr_grp_t *)val); 1444 1445 ASSERT(aggr_grp_cnt > 0); 1446 aggr_grp_cnt--; 1447 rw_exit(&aggr_grp_lock); 1448 1449 /* 1450 * Inform the lacp_rx thread to exit. 1451 */ 1452 mutex_enter(&grp->lg_lacp_lock); 1453 grp->lg_lacp_done = B_TRUE; 1454 cv_signal(&grp->lg_lacp_cv); 1455 while (grp->lg_lacp_rx_thread != NULL) 1456 cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock); 1457 mutex_exit(&grp->lg_lacp_lock); 1458 1459 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1460 1461 grp->lg_closing = B_TRUE; 1462 /* detach and free MAC ports associated with group */ 1463 port = grp->lg_ports; 1464 while (port != NULL) { 1465 cport = port->lp_next; 1466 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1467 if (grp->lg_started) 1468 aggr_port_stop(port); 1469 (void) aggr_grp_detach_port(grp, port); 1470 mac_perim_exit(pmph); 1471 aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group); 1472 aggr_port_delete(port); 1473 port = cport; 1474 } 1475 1476 mac_perim_exit(mph); 1477 1478 /* 1479 * Wait for the port's lacp timer thread and its notification callback 1480 * to exit before calling mac_unregister() since both needs to access 1481 * the mac perimeter of the grp. 1482 */ 1483 aggr_grp_port_wait(grp); 1484 1485 VERIFY(mac_unregister(grp->lg_mh) == 0); 1486 grp->lg_mh = NULL; 1487 1488 AGGR_GRP_REFRELE(grp); 1489 return (0); 1490 } 1491 1492 void 1493 aggr_grp_free(aggr_grp_t *grp) 1494 { 1495 ASSERT(grp->lg_refs == 0); 1496 ASSERT(grp->lg_port_ref == 0); 1497 if (grp->lg_key > AGGR_MAX_KEY) { 1498 id_free(key_ids, grp->lg_key); 1499 grp->lg_key = 0; 1500 } 1501 kmem_cache_free(aggr_grp_cache, grp); 1502 } 1503 1504 int 1505 aggr_grp_info(datalink_id_t linkid, void *fn_arg, 1506 aggr_grp_info_new_grp_fn_t new_grp_fn, 1507 aggr_grp_info_new_port_fn_t new_port_fn, cred_t *cred) 1508 { 1509 aggr_grp_t *grp; 1510 aggr_port_t *port; 1511 mac_perim_handle_t mph, pmph; 1512 int rc = 0; 1513 1514 /* 1515 * Make sure that the aggregation link is visible from the caller's 1516 * zone. 1517 */ 1518 if (!dls_devnet_islinkvisible(linkid, crgetzoneid(cred))) 1519 return (ENOENT); 1520 1521 rw_enter(&aggr_grp_lock, RW_READER); 1522 1523 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), 1524 (mod_hash_val_t *)&grp) != 0) { 1525 rw_exit(&aggr_grp_lock); 1526 return (ENOENT); 1527 } 1528 AGGR_GRP_REFHOLD(grp); 1529 1530 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1531 rw_exit(&aggr_grp_lock); 1532 1533 rc = new_grp_fn(fn_arg, grp->lg_linkid, 1534 (grp->lg_key > AGGR_MAX_KEY) ? 0 : grp->lg_key, grp->lg_addr, 1535 grp->lg_addr_fixed, grp->lg_force, grp->lg_tx_policy, 1536 grp->lg_nports, grp->lg_lacp_mode, grp->aggr.PeriodicTimer); 1537 1538 if (rc != 0) 1539 goto bail; 1540 1541 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1542 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1543 rc = new_port_fn(fn_arg, port->lp_linkid, port->lp_addr, 1544 port->lp_state, &port->lp_lacp.ActorOperPortState); 1545 mac_perim_exit(pmph); 1546 1547 if (rc != 0) 1548 goto bail; 1549 } 1550 1551 bail: 1552 mac_perim_exit(mph); 1553 AGGR_GRP_REFRELE(grp); 1554 return (rc); 1555 } 1556 1557 /*ARGSUSED*/ 1558 static void 1559 aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp) 1560 { 1561 miocnak(q, mp, 0, ENOTSUP); 1562 } 1563 1564 static int 1565 aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val) 1566 { 1567 aggr_port_t *port; 1568 uint_t stat_index; 1569 1570 /* We only aggregate counter statistics. */ 1571 if (IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat) || 1572 IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat)) { 1573 return (ENOTSUP); 1574 } 1575 1576 /* 1577 * Counter statistics for a group are computed by aggregating the 1578 * counters of the members MACs while they were aggregated, plus 1579 * the residual counter of the group itself, which is updated each 1580 * time a MAC is removed from the group. 1581 */ 1582 *val = 0; 1583 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1584 /* actual port statistic */ 1585 *val += aggr_port_stat(port, stat); 1586 /* 1587 * minus the port stat when it was added, plus any residual 1588 * amount for the group. 1589 */ 1590 if (IS_MAC_STAT(stat)) { 1591 stat_index = stat - MAC_STAT_MIN; 1592 *val -= port->lp_stat[stat_index]; 1593 *val += grp->lg_stat[stat_index]; 1594 } else if (IS_MACTYPE_STAT(stat)) { 1595 stat_index = stat - MACTYPE_STAT_MIN; 1596 *val -= port->lp_ether_stat[stat_index]; 1597 *val += grp->lg_ether_stat[stat_index]; 1598 } 1599 } 1600 return (0); 1601 } 1602 1603 static int 1604 aggr_m_stat(void *arg, uint_t stat, uint64_t *val) 1605 { 1606 aggr_grp_t *grp = arg; 1607 mac_perim_handle_t mph; 1608 int rval = 0; 1609 1610 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1611 1612 switch (stat) { 1613 case MAC_STAT_IFSPEED: 1614 *val = grp->lg_ifspeed; 1615 break; 1616 1617 case ETHER_STAT_LINK_DUPLEX: 1618 *val = grp->lg_link_duplex; 1619 break; 1620 1621 default: 1622 /* 1623 * For all other statistics, we return the aggregated stat 1624 * from the underlying ports. aggr_grp_stat() will set 1625 * rval appropriately if the statistic isn't a counter. 1626 */ 1627 rval = aggr_grp_stat(grp, stat, val); 1628 } 1629 1630 mac_perim_exit(mph); 1631 return (rval); 1632 } 1633 1634 static int 1635 aggr_m_start(void *arg) 1636 { 1637 aggr_grp_t *grp = arg; 1638 aggr_port_t *port; 1639 mac_perim_handle_t mph, pmph; 1640 1641 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1642 1643 /* 1644 * Attempts to start all configured members of the group. 1645 * Group members will be attached when their link-up notification 1646 * is received. 1647 */ 1648 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1649 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1650 if (aggr_port_start(port) != 0) { 1651 mac_perim_exit(pmph); 1652 continue; 1653 } 1654 1655 /* 1656 * Turn on the promiscuous mode if it is required to receive 1657 * the non-primary address over a port, or the promiscous 1658 * mode is enabled over the aggr. 1659 */ 1660 if (grp->lg_promisc || port->lp_prom_addr != NULL) { 1661 if (aggr_port_promisc(port, B_TRUE) != 0) 1662 aggr_port_stop(port); 1663 } 1664 mac_perim_exit(pmph); 1665 } 1666 1667 grp->lg_started = B_TRUE; 1668 1669 mac_perim_exit(mph); 1670 return (0); 1671 } 1672 1673 static void 1674 aggr_m_stop(void *arg) 1675 { 1676 aggr_grp_t *grp = arg; 1677 aggr_port_t *port; 1678 mac_perim_handle_t mph, pmph; 1679 1680 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1681 1682 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1683 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1684 1685 /* reset port promiscuous mode */ 1686 (void) aggr_port_promisc(port, B_FALSE); 1687 1688 aggr_port_stop(port); 1689 mac_perim_exit(pmph); 1690 } 1691 1692 grp->lg_started = B_FALSE; 1693 mac_perim_exit(mph); 1694 } 1695 1696 static int 1697 aggr_m_promisc(void *arg, boolean_t on) 1698 { 1699 aggr_grp_t *grp = arg; 1700 aggr_port_t *port; 1701 boolean_t link_state_changed = B_FALSE; 1702 mac_perim_handle_t mph, pmph; 1703 1704 AGGR_GRP_REFHOLD(grp); 1705 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1706 1707 ASSERT(!grp->lg_closing); 1708 1709 if (on == grp->lg_promisc) 1710 goto bail; 1711 1712 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1713 int err = 0; 1714 1715 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1716 AGGR_PORT_REFHOLD(port); 1717 if (!on && (port->lp_prom_addr == NULL)) 1718 err = aggr_port_promisc(port, B_FALSE); 1719 else if (on && port->lp_started) 1720 err = aggr_port_promisc(port, B_TRUE); 1721 1722 if (err != 0) { 1723 if (aggr_grp_detach_port(grp, port)) 1724 link_state_changed = B_TRUE; 1725 } else { 1726 /* 1727 * If a port was detached because of a previous 1728 * failure changing the promiscuity, the port 1729 * is reattached when it successfully changes 1730 * the promiscuity now, and this might cause 1731 * the link state of the aggregation to change. 1732 */ 1733 if (aggr_grp_attach_port(grp, port)) 1734 link_state_changed = B_TRUE; 1735 } 1736 mac_perim_exit(pmph); 1737 AGGR_PORT_REFRELE(port); 1738 } 1739 1740 grp->lg_promisc = on; 1741 1742 if (link_state_changed) 1743 mac_link_update(grp->lg_mh, grp->lg_link_state); 1744 1745 bail: 1746 mac_perim_exit(mph); 1747 AGGR_GRP_REFRELE(grp); 1748 1749 return (0); 1750 } 1751 1752 static void 1753 aggr_grp_port_rename(const char *new_name, void *arg) 1754 { 1755 /* 1756 * aggr port's mac client name is the format of "aggr link name" plus 1757 * AGGR_PORT_NAME_DELIMIT plus "underneath link name". 1758 */ 1759 int aggr_len, link_len, clnt_name_len, i; 1760 char *str_end, *str_st, *str_del; 1761 char aggr_name[MAXNAMELEN]; 1762 char link_name[MAXNAMELEN]; 1763 char *clnt_name; 1764 aggr_grp_t *aggr_grp = arg; 1765 aggr_port_t *aggr_port = aggr_grp->lg_ports; 1766 1767 for (i = 0; i < aggr_grp->lg_nports; i++) { 1768 clnt_name = mac_client_name(aggr_port->lp_mch); 1769 clnt_name_len = strlen(clnt_name); 1770 str_st = clnt_name; 1771 str_end = &(clnt_name[clnt_name_len]); 1772 str_del = strchr(str_st, AGGR_PORT_NAME_DELIMIT); 1773 ASSERT(str_del != NULL); 1774 aggr_len = (intptr_t)((uintptr_t)str_del - (uintptr_t)str_st); 1775 link_len = (intptr_t)((uintptr_t)str_end - (uintptr_t)str_del); 1776 bzero(aggr_name, MAXNAMELEN); 1777 bzero(link_name, MAXNAMELEN); 1778 bcopy(clnt_name, aggr_name, aggr_len); 1779 bcopy(str_del, link_name, link_len + 1); 1780 bzero(clnt_name, MAXNAMELEN); 1781 (void) snprintf(clnt_name, MAXNAMELEN, "%s%s", new_name, 1782 link_name); 1783 1784 (void) mac_rename_primary(aggr_port->lp_mh, NULL); 1785 aggr_port = aggr_port->lp_next; 1786 } 1787 } 1788 1789 /* 1790 * Initialize the capabilities that are advertised for the group 1791 * according to the capabilities of the constituent ports. 1792 */ 1793 static boolean_t 1794 aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data) 1795 { 1796 aggr_grp_t *grp = arg; 1797 1798 switch (cap) { 1799 case MAC_CAPAB_HCKSUM: { 1800 uint32_t *hcksum_txflags = cap_data; 1801 *hcksum_txflags = grp->lg_hcksum_txflags; 1802 break; 1803 } 1804 case MAC_CAPAB_LSO: { 1805 mac_capab_lso_t *cap_lso = cap_data; 1806 1807 if (grp->lg_lso) { 1808 *cap_lso = grp->lg_cap_lso; 1809 break; 1810 } else { 1811 return (B_FALSE); 1812 } 1813 } 1814 case MAC_CAPAB_NO_NATIVEVLAN: 1815 return (!grp->lg_vlan); 1816 case MAC_CAPAB_NO_ZCOPY: 1817 return (!grp->lg_zcopy); 1818 case MAC_CAPAB_RINGS: { 1819 mac_capab_rings_t *cap_rings = cap_data; 1820 1821 if (cap_rings->mr_type == MAC_RING_TYPE_RX) { 1822 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 1823 cap_rings->mr_rnum = grp->lg_rx_group.arg_ring_cnt; 1824 cap_rings->mr_rget = aggr_fill_ring; 1825 1826 /* 1827 * An aggregation advertises only one (pseudo) RX 1828 * group, which virtualizes the main/primary group of 1829 * the underlying devices. 1830 */ 1831 cap_rings->mr_gnum = 1; 1832 cap_rings->mr_gget = aggr_fill_group; 1833 cap_rings->mr_gaddring = NULL; 1834 cap_rings->mr_gremring = NULL; 1835 } else { 1836 return (B_FALSE); 1837 } 1838 break; 1839 } 1840 case MAC_CAPAB_AGGR: 1841 { 1842 mac_capab_aggr_t *aggr_cap; 1843 1844 if (cap_data != NULL) { 1845 aggr_cap = cap_data; 1846 aggr_cap->mca_rename_fn = aggr_grp_port_rename; 1847 aggr_cap->mca_unicst = aggr_m_unicst; 1848 } 1849 return (B_TRUE); 1850 } 1851 default: 1852 return (B_FALSE); 1853 } 1854 return (B_TRUE); 1855 } 1856 1857 /* 1858 * Callback funtion for MAC layer to register groups. 1859 */ 1860 static void 1861 aggr_fill_group(void *arg, mac_ring_type_t rtype, const int index, 1862 mac_group_info_t *infop, mac_group_handle_t gh) 1863 { 1864 aggr_grp_t *grp = arg; 1865 aggr_pseudo_rx_group_t *rx_group; 1866 1867 ASSERT(rtype == MAC_RING_TYPE_RX && index == 0); 1868 rx_group = &grp->lg_rx_group; 1869 rx_group->arg_gh = gh; 1870 rx_group->arg_grp = grp; 1871 1872 infop->mgi_driver = (mac_group_driver_t)rx_group; 1873 infop->mgi_start = NULL; 1874 infop->mgi_stop = NULL; 1875 infop->mgi_addmac = aggr_addmac; 1876 infop->mgi_remmac = aggr_remmac; 1877 infop->mgi_count = rx_group->arg_ring_cnt; 1878 } 1879 1880 /* 1881 * Callback funtion for MAC layer to register all rings. 1882 */ 1883 static void 1884 aggr_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, 1885 const int index, mac_ring_info_t *infop, mac_ring_handle_t rh) 1886 { 1887 aggr_grp_t *grp = arg; 1888 1889 switch (rtype) { 1890 case MAC_RING_TYPE_RX: { 1891 aggr_pseudo_rx_group_t *rx_group = &grp->lg_rx_group; 1892 aggr_pseudo_rx_ring_t *rx_ring; 1893 mac_intr_t aggr_mac_intr; 1894 1895 ASSERT(rg_index == 0); 1896 1897 ASSERT((index >= 0) && (index < rx_group->arg_ring_cnt)); 1898 rx_ring = rx_group->arg_rings + index; 1899 rx_ring->arr_rh = rh; 1900 1901 /* 1902 * Entrypoint to enable interrupt (disable poll) and 1903 * disable interrupt (enable poll). 1904 */ 1905 aggr_mac_intr.mi_handle = (mac_intr_handle_t)rx_ring; 1906 aggr_mac_intr.mi_enable = aggr_pseudo_enable_intr; 1907 aggr_mac_intr.mi_disable = aggr_pseudo_disable_intr; 1908 1909 infop->mri_driver = (mac_ring_driver_t)rx_ring; 1910 infop->mri_start = aggr_pseudo_start_ring; 1911 infop->mri_stop = aggr_pseudo_stop_ring; 1912 1913 infop->mri_intr = aggr_mac_intr; 1914 infop->mri_poll = aggr_rx_poll; 1915 break; 1916 } 1917 default: 1918 break; 1919 } 1920 } 1921 1922 static mblk_t * 1923 aggr_rx_poll(void *arg, int bytes_to_pickup) 1924 { 1925 aggr_pseudo_rx_ring_t *rr_ring = arg; 1926 aggr_port_t *port = rr_ring->arr_port; 1927 aggr_grp_t *grp = port->lp_grp; 1928 mblk_t *mp_chain, *mp, **mpp; 1929 1930 mp_chain = mac_hwring_poll(rr_ring->arr_hw_rh, bytes_to_pickup); 1931 1932 if (grp->lg_lacp_mode == AGGR_LACP_OFF) 1933 return (mp_chain); 1934 1935 mpp = &mp_chain; 1936 while ((mp = *mpp) != NULL) { 1937 if (MBLKL(mp) >= sizeof (struct ether_header)) { 1938 struct ether_header *ehp; 1939 1940 ehp = (struct ether_header *)mp->b_rptr; 1941 if (ntohs(ehp->ether_type) == ETHERTYPE_SLOW) { 1942 *mpp = mp->b_next; 1943 mp->b_next = NULL; 1944 aggr_recv_lacp(port, 1945 (mac_resource_handle_t)rr_ring, mp); 1946 continue; 1947 } 1948 } 1949 1950 if (!port->lp_collector_enabled) { 1951 *mpp = mp->b_next; 1952 mp->b_next = NULL; 1953 freemsg(mp); 1954 continue; 1955 } 1956 mpp = &mp->b_next; 1957 } 1958 return (mp_chain); 1959 } 1960 1961 static int 1962 aggr_addmac(void *arg, const uint8_t *mac_addr) 1963 { 1964 aggr_pseudo_rx_group_t *rx_group = (aggr_pseudo_rx_group_t *)arg; 1965 aggr_unicst_addr_t *addr, **pprev; 1966 aggr_grp_t *grp = rx_group->arg_grp; 1967 aggr_port_t *port, *p; 1968 mac_perim_handle_t mph; 1969 int err = 0; 1970 1971 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1972 1973 if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) { 1974 mac_perim_exit(mph); 1975 return (0); 1976 } 1977 1978 /* 1979 * Insert this mac address into the list of mac addresses owned by 1980 * the aggregation pseudo group. 1981 */ 1982 pprev = &rx_group->arg_macaddr; 1983 while ((addr = *pprev) != NULL) { 1984 if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) == 0) { 1985 mac_perim_exit(mph); 1986 return (EEXIST); 1987 } 1988 pprev = &addr->aua_next; 1989 } 1990 addr = kmem_alloc(sizeof (aggr_unicst_addr_t), KM_SLEEP); 1991 bcopy(mac_addr, addr->aua_addr, ETHERADDRL); 1992 addr->aua_next = NULL; 1993 *pprev = addr; 1994 1995 for (port = grp->lg_ports; port != NULL; port = port->lp_next) 1996 if ((err = aggr_port_addmac(port, mac_addr)) != 0) 1997 break; 1998 1999 if (err != 0) { 2000 for (p = grp->lg_ports; p != port; p = p->lp_next) 2001 aggr_port_remmac(p, mac_addr); 2002 2003 *pprev = NULL; 2004 kmem_free(addr, sizeof (aggr_unicst_addr_t)); 2005 } 2006 2007 mac_perim_exit(mph); 2008 return (err); 2009 } 2010 2011 static int 2012 aggr_remmac(void *arg, const uint8_t *mac_addr) 2013 { 2014 aggr_pseudo_rx_group_t *rx_group = (aggr_pseudo_rx_group_t *)arg; 2015 aggr_unicst_addr_t *addr, **pprev; 2016 aggr_grp_t *grp = rx_group->arg_grp; 2017 aggr_port_t *port; 2018 mac_perim_handle_t mph; 2019 int err = 0; 2020 2021 mac_perim_enter_by_mh(grp->lg_mh, &mph); 2022 2023 if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) { 2024 mac_perim_exit(mph); 2025 return (0); 2026 } 2027 2028 /* 2029 * Insert this mac address into the list of mac addresses owned by 2030 * the aggregation pseudo group. 2031 */ 2032 pprev = &rx_group->arg_macaddr; 2033 while ((addr = *pprev) != NULL) { 2034 if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) != 0) { 2035 pprev = &addr->aua_next; 2036 continue; 2037 } 2038 break; 2039 } 2040 if (addr == NULL) { 2041 mac_perim_exit(mph); 2042 return (EINVAL); 2043 } 2044 2045 for (port = grp->lg_ports; port != NULL; port = port->lp_next) 2046 aggr_port_remmac(port, mac_addr); 2047 2048 *pprev = addr->aua_next; 2049 kmem_free(addr, sizeof (aggr_unicst_addr_t)); 2050 2051 mac_perim_exit(mph); 2052 return (err); 2053 } 2054 2055 /* 2056 * Add or remove the multicast addresses that are defined for the group 2057 * to or from the specified port. 2058 * 2059 * Note that aggr_grp_multicst_port(..., B_TRUE) is called when the port 2060 * is started and attached, and aggr_grp_multicst_port(..., B_FALSE) is 2061 * called when the port is either stopped or detached. 2062 */ 2063 void 2064 aggr_grp_multicst_port(aggr_port_t *port, boolean_t add) 2065 { 2066 aggr_grp_t *grp = port->lp_grp; 2067 2068 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 2069 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 2070 2071 if (!port->lp_started || port->lp_state != AGGR_PORT_STATE_ATTACHED) 2072 return; 2073 2074 mac_multicast_refresh(grp->lg_mh, aggr_port_multicst, port, add); 2075 } 2076 2077 static int 2078 aggr_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) 2079 { 2080 aggr_grp_t *grp = arg; 2081 aggr_port_t *port = NULL; 2082 mac_perim_handle_t mph; 2083 int err = 0, cerr; 2084 2085 mac_perim_enter_by_mh(grp->lg_mh, &mph); 2086 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 2087 if (port->lp_state != AGGR_PORT_STATE_ATTACHED || 2088 !port->lp_started) { 2089 continue; 2090 } 2091 cerr = aggr_port_multicst(port, add, addrp); 2092 if (cerr != 0 && err == 0) 2093 err = cerr; 2094 } 2095 mac_perim_exit(mph); 2096 return (err); 2097 } 2098 2099 static int 2100 aggr_m_unicst(void *arg, const uint8_t *macaddr) 2101 { 2102 aggr_grp_t *grp = arg; 2103 mac_perim_handle_t mph; 2104 int err; 2105 2106 mac_perim_enter_by_mh(grp->lg_mh, &mph); 2107 err = aggr_grp_modify_common(grp, AGGR_MODIFY_MAC, 0, B_TRUE, macaddr, 2108 0, 0); 2109 mac_perim_exit(mph); 2110 return (err); 2111 } 2112 2113 /* 2114 * Initialize the capabilities that are advertised for the group 2115 * according to the capabilities of the constituent ports. 2116 */ 2117 static void 2118 aggr_grp_capab_set(aggr_grp_t *grp) 2119 { 2120 uint32_t cksum; 2121 aggr_port_t *port; 2122 mac_capab_lso_t cap_lso; 2123 2124 ASSERT(grp->lg_mh == NULL); 2125 ASSERT(grp->lg_ports != NULL); 2126 2127 grp->lg_hcksum_txflags = (uint32_t)-1; 2128 grp->lg_zcopy = B_TRUE; 2129 grp->lg_vlan = B_TRUE; 2130 2131 grp->lg_lso = B_TRUE; 2132 grp->lg_cap_lso.lso_flags = (t_uscalar_t)-1; 2133 grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max = (t_uscalar_t)-1; 2134 2135 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 2136 if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &cksum)) 2137 cksum = 0; 2138 grp->lg_hcksum_txflags &= cksum; 2139 2140 grp->lg_vlan &= 2141 !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL); 2142 2143 grp->lg_zcopy &= 2144 !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL); 2145 2146 grp->lg_lso &= 2147 mac_capab_get(port->lp_mh, MAC_CAPAB_LSO, &cap_lso); 2148 if (grp->lg_lso) { 2149 grp->lg_cap_lso.lso_flags &= cap_lso.lso_flags; 2150 if (grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max > 2151 cap_lso.lso_basic_tcp_ipv4.lso_max) 2152 grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max = 2153 cap_lso.lso_basic_tcp_ipv4.lso_max; 2154 } 2155 } 2156 } 2157 2158 /* 2159 * Checks whether the capabilities of the port being added are compatible 2160 * with the current capabilities of the aggregation. 2161 */ 2162 static boolean_t 2163 aggr_grp_capab_check(aggr_grp_t *grp, aggr_port_t *port) 2164 { 2165 uint32_t hcksum_txflags; 2166 2167 ASSERT(grp->lg_ports != NULL); 2168 2169 if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL)) & 2170 grp->lg_vlan) != grp->lg_vlan) { 2171 return (B_FALSE); 2172 } 2173 2174 if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL)) & 2175 grp->lg_zcopy) != grp->lg_zcopy) { 2176 return (B_FALSE); 2177 } 2178 2179 if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &hcksum_txflags)) { 2180 if (grp->lg_hcksum_txflags != 0) 2181 return (B_FALSE); 2182 } else if ((hcksum_txflags & grp->lg_hcksum_txflags) != 2183 grp->lg_hcksum_txflags) { 2184 return (B_FALSE); 2185 } 2186 2187 if (grp->lg_lso) { 2188 mac_capab_lso_t cap_lso; 2189 2190 if (mac_capab_get(port->lp_mh, MAC_CAPAB_LSO, &cap_lso)) { 2191 if ((grp->lg_cap_lso.lso_flags & cap_lso.lso_flags) != 2192 grp->lg_cap_lso.lso_flags) 2193 return (B_FALSE); 2194 if (grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max > 2195 cap_lso.lso_basic_tcp_ipv4.lso_max) 2196 return (B_FALSE); 2197 } else { 2198 return (B_FALSE); 2199 } 2200 } 2201 2202 return (B_TRUE); 2203 } 2204 2205 /* 2206 * Returns the maximum SDU according to the SDU of the constituent ports. 2207 */ 2208 static uint_t 2209 aggr_grp_max_sdu(aggr_grp_t *grp) 2210 { 2211 uint_t max_sdu = (uint_t)-1; 2212 aggr_port_t *port; 2213 2214 ASSERT(grp->lg_ports != NULL); 2215 2216 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 2217 uint_t port_sdu_max; 2218 2219 mac_sdu_get(port->lp_mh, NULL, &port_sdu_max); 2220 if (max_sdu > port_sdu_max) 2221 max_sdu = port_sdu_max; 2222 } 2223 2224 return (max_sdu); 2225 } 2226 2227 /* 2228 * Checks if the maximum SDU of the specified port is compatible 2229 * with the maximum SDU of the specified aggregation group, returns 2230 * B_TRUE if it is, B_FALSE otherwise. 2231 */ 2232 static boolean_t 2233 aggr_grp_sdu_check(aggr_grp_t *grp, aggr_port_t *port) 2234 { 2235 uint_t port_sdu_max; 2236 2237 mac_sdu_get(port->lp_mh, NULL, &port_sdu_max); 2238 return (port_sdu_max >= grp->lg_max_sdu); 2239 } 2240 2241 /* 2242 * Returns the maximum margin according to the margin of the constituent ports. 2243 */ 2244 static uint32_t 2245 aggr_grp_max_margin(aggr_grp_t *grp) 2246 { 2247 uint32_t margin = UINT32_MAX; 2248 aggr_port_t *port; 2249 2250 ASSERT(grp->lg_mh == NULL); 2251 ASSERT(grp->lg_ports != NULL); 2252 2253 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 2254 if (margin > port->lp_margin) 2255 margin = port->lp_margin; 2256 } 2257 2258 grp->lg_margin = margin; 2259 return (margin); 2260 } 2261 2262 /* 2263 * Checks if the maximum margin of the specified port is compatible 2264 * with the maximum margin of the specified aggregation group, returns 2265 * B_TRUE if it is, B_FALSE otherwise. 2266 */ 2267 static boolean_t 2268 aggr_grp_margin_check(aggr_grp_t *grp, aggr_port_t *port) 2269 { 2270 if (port->lp_margin >= grp->lg_margin) 2271 return (B_TRUE); 2272 2273 /* 2274 * See whether the current margin value is allowed to be changed to 2275 * the new value. 2276 */ 2277 if (!mac_margin_update(grp->lg_mh, port->lp_margin)) 2278 return (B_FALSE); 2279 2280 grp->lg_margin = port->lp_margin; 2281 return (B_TRUE); 2282 } 2283 2284 /* 2285 * Set MTU on individual ports of an aggregation group 2286 */ 2287 static int 2288 aggr_set_port_sdu(aggr_grp_t *grp, aggr_port_t *port, uint32_t sdu, 2289 uint32_t *old_mtu) 2290 { 2291 boolean_t removed = B_FALSE; 2292 mac_perim_handle_t mph; 2293 mac_diag_t diag; 2294 int err, rv, retry = 0; 2295 2296 if (port->lp_mah != NULL) { 2297 (void) mac_unicast_remove(port->lp_mch, port->lp_mah); 2298 port->lp_mah = NULL; 2299 removed = B_TRUE; 2300 } 2301 err = mac_set_mtu(port->lp_mh, sdu, old_mtu); 2302 try_again: 2303 if (removed && (rv = mac_unicast_add(port->lp_mch, NULL, 2304 MAC_UNICAST_PRIMARY | MAC_UNICAST_DISABLE_TX_VID_CHECK, 2305 &port->lp_mah, 0, &diag)) != 0) { 2306 /* 2307 * following is a workaround for a bug in 'bge' driver. 2308 * See CR 6794654 for more information and this work around 2309 * will be removed once the CR is fixed. 2310 */ 2311 if (rv == EIO && retry++ < 3) { 2312 delay(2 * hz); 2313 goto try_again; 2314 } 2315 /* 2316 * if mac_unicast_add() failed while setting the MTU, 2317 * detach the port from the group. 2318 */ 2319 mac_perim_enter_by_mh(port->lp_mh, &mph); 2320 (void) aggr_grp_detach_port(grp, port); 2321 mac_perim_exit(mph); 2322 cmn_err(CE_WARN, "Unable to restart the port %s while " 2323 "setting MTU. Detaching the port from the aggregation.", 2324 mac_client_name(port->lp_mch)); 2325 } 2326 return (err); 2327 } 2328 2329 static int 2330 aggr_sdu_update(aggr_grp_t *grp, uint32_t sdu) 2331 { 2332 int err = 0, i, rv; 2333 aggr_port_t *port; 2334 uint32_t *mtu; 2335 2336 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 2337 2338 /* 2339 * If the MTU being set is equal to aggr group's maximum 2340 * allowable value, then there is nothing to change 2341 */ 2342 if (sdu == grp->lg_max_sdu) 2343 return (0); 2344 2345 /* 0 is aggr group's min sdu */ 2346 if (sdu == 0) 2347 return (EINVAL); 2348 2349 mtu = kmem_alloc(sizeof (uint32_t) * grp->lg_nports, KM_SLEEP); 2350 for (port = grp->lg_ports, i = 0; port != NULL && err == 0; 2351 port = port->lp_next, i++) { 2352 err = aggr_set_port_sdu(grp, port, sdu, mtu + i); 2353 } 2354 if (err != 0) { 2355 /* recover from error: reset the mtus of the ports */ 2356 aggr_port_t *tmp; 2357 2358 for (tmp = grp->lg_ports, i = 0; tmp != port; 2359 tmp = tmp->lp_next, i++) { 2360 (void) aggr_set_port_sdu(grp, tmp, *(mtu + i), NULL); 2361 } 2362 goto bail; 2363 } 2364 grp->lg_max_sdu = aggr_grp_max_sdu(grp); 2365 rv = mac_maxsdu_update(grp->lg_mh, grp->lg_max_sdu); 2366 ASSERT(rv == 0); 2367 bail: 2368 kmem_free(mtu, sizeof (uint32_t) * grp->lg_nports); 2369 return (err); 2370 } 2371 2372 /* 2373 * Callback functions for set/get of properties 2374 */ 2375 /*ARGSUSED*/ 2376 static int 2377 aggr_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, 2378 uint_t pr_valsize, const void *pr_val) 2379 { 2380 int err = ENOTSUP; 2381 aggr_grp_t *grp = m_driver; 2382 2383 switch (pr_num) { 2384 case MAC_PROP_MTU: { 2385 uint32_t mtu; 2386 2387 if (pr_valsize < sizeof (mtu)) { 2388 err = EINVAL; 2389 break; 2390 } 2391 bcopy(pr_val, &mtu, sizeof (mtu)); 2392 err = aggr_sdu_update(grp, mtu); 2393 break; 2394 } 2395 default: 2396 break; 2397 } 2398 return (err); 2399 } 2400 2401 int 2402 aggr_grp_possible_mtu_range(aggr_grp_t *grp, mac_propval_range_t *range) 2403 { 2404 mac_propval_range_t *vals; 2405 mac_propval_uint32_range_t *ur; 2406 aggr_port_t *port; 2407 mac_perim_handle_t mph; 2408 mac_prop_t macprop; 2409 uint_t perm, i; 2410 uint32_t min = 0, max = (uint32_t)-1; 2411 int err = 0; 2412 2413 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 2414 2415 vals = kmem_alloc(sizeof (mac_propval_range_t) * grp->lg_nports, 2416 KM_SLEEP); 2417 macprop.mp_id = MAC_PROP_MTU; 2418 macprop.mp_name = "mtu"; 2419 macprop.mp_flags = MAC_PROP_POSSIBLE; 2420 2421 for (port = grp->lg_ports, i = 0; port != NULL; 2422 port = port->lp_next, i++) { 2423 mac_perim_enter_by_mh(port->lp_mh, &mph); 2424 err = mac_get_prop(port->lp_mh, &macprop, vals + i, 2425 sizeof (mac_propval_range_t), &perm); 2426 mac_perim_exit(mph); 2427 if (err != 0) 2428 break; 2429 } 2430 /* 2431 * if any of the underlying ports does not support changing MTU then 2432 * just return ENOTSUP 2433 */ 2434 if (port != NULL) { 2435 ASSERT(err != 0); 2436 goto done; 2437 } 2438 range->mpr_count = 1; 2439 range->mpr_type = MAC_PROPVAL_UINT32; 2440 for (i = 0; i < grp->lg_nports; i++) { 2441 ur = &((vals + i)->range_uint32[0]); 2442 /* 2443 * Take max of the min, for range_min; that is the minimum 2444 * MTU value for an aggregation is the maximum of the 2445 * minimum values of all the underlying ports 2446 */ 2447 if (ur->mpur_min > min) 2448 min = ur->mpur_min; 2449 /* Take min of the max, for range_max */ 2450 if (ur->mpur_max < max) 2451 max = ur->mpur_max; 2452 } 2453 range->range_uint32[0].mpur_min = min; 2454 range->range_uint32[0].mpur_max = max; 2455 done: 2456 kmem_free(vals, sizeof (mac_propval_range_t) * grp->lg_nports); 2457 return (err); 2458 } 2459 2460 /*ARGSUSED*/ 2461 static int 2462 aggr_m_getprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, 2463 uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) 2464 { 2465 mac_propval_range_t range; 2466 int err = ENOTSUP; 2467 aggr_grp_t *grp = m_driver; 2468 2469 switch (pr_num) { 2470 case MAC_PROP_MTU: 2471 if (!(pr_flags & MAC_PROP_POSSIBLE)) 2472 return (ENOTSUP); 2473 if (pr_valsize < sizeof (mac_propval_range_t)) 2474 return (EINVAL); 2475 if ((err = aggr_grp_possible_mtu_range(grp, &range)) != 0) 2476 return (err); 2477 bcopy(&range, pr_val, sizeof (range)); 2478 return (0); 2479 } 2480 return (err); 2481 } 2482