1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups. 28 * 29 * An instance of the structure aggr_grp_t is allocated for each 30 * link aggregation group. When created, aggr_grp_t objects are 31 * entered into the aggr_grp_hash hash table maintained by the modhash 32 * module. The hash key is the linkid associated with the link 33 * aggregation group. 34 * 35 * A set of MAC ports are associated with each association group. 36 * 37 * Aggr pseudo TX rings 38 * -------------------- 39 * The underlying ports (NICs) in an aggregation can have TX rings. To 40 * enhance aggr's performance, these TX rings are made available to the 41 * aggr layer as pseudo TX rings. The concept of pseudo rings are not new. 42 * They are already present and implemented on the RX side. It is called 43 * as pseudo RX rings. The same concept is extended to the TX side where 44 * each TX ring of an underlying port is reflected in aggr as a pseudo 45 * TX ring. Thus each pseudo TX ring will map to a specific hardware TX 46 * ring. Even in the case of a NIC that does not have a TX ring, a pseudo 47 * TX ring is given to the aggregation layer. 48 * 49 * With this change, the outgoing stack depth looks much better: 50 * 51 * mac_tx() -> mac_tx_aggr_mode() -> mac_tx_soft_ring_process() -> 52 * mac_tx_send() -> aggr_ring_rx() -> <driver>_ring_tx() 53 * 54 * Two new modes are introduced to mac_tx() to handle aggr pseudo TX rings: 55 * SRS_TX_AGGR and SRS_TX_BW_AGGR. 56 * 57 * In SRS_TX_AGGR mode, mac_tx_aggr_mode() routine is called. This routine 58 * invokes an aggr function, aggr_find_tx_ring(), to find a (pseudo) TX 59 * ring belonging to a port on which the packet has to be sent. 60 * aggr_find_tx_ring() first finds the outgoing port based on L2/L3/L4 61 * policy and then uses the fanout_hint passed to it to pick a TX ring from 62 * the selected port. 63 * 64 * In SRS_TX_BW_AGGR mode, mac_tx_bw_mode() function is called where 65 * bandwidth limit is applied first on the outgoing packet and the packets 66 * allowed to go out would call mac_tx_aggr_mode() to send the packet on a 67 * particular TX ring. 68 */ 69 70 #include <sys/types.h> 71 #include <sys/sysmacros.h> 72 #include <sys/conf.h> 73 #include <sys/cmn_err.h> 74 #include <sys/disp.h> 75 #include <sys/list.h> 76 #include <sys/ksynch.h> 77 #include <sys/kmem.h> 78 #include <sys/stream.h> 79 #include <sys/modctl.h> 80 #include <sys/ddi.h> 81 #include <sys/sunddi.h> 82 #include <sys/atomic.h> 83 #include <sys/stat.h> 84 #include <sys/modhash.h> 85 #include <sys/id_space.h> 86 #include <sys/strsun.h> 87 #include <sys/cred.h> 88 #include <sys/dlpi.h> 89 #include <sys/zone.h> 90 #include <sys/mac_provider.h> 91 #include <sys/dls.h> 92 #include <sys/vlan.h> 93 #include <sys/aggr.h> 94 #include <sys/aggr_impl.h> 95 96 static int aggr_m_start(void *); 97 static void aggr_m_stop(void *); 98 static int aggr_m_promisc(void *, boolean_t); 99 static int aggr_m_multicst(void *, boolean_t, const uint8_t *); 100 static int aggr_m_unicst(void *, const uint8_t *); 101 static int aggr_m_stat(void *, uint_t, uint64_t *); 102 static void aggr_m_ioctl(void *, queue_t *, mblk_t *); 103 static boolean_t aggr_m_capab_get(void *, mac_capab_t, void *); 104 static int aggr_m_setprop(void *, const char *, mac_prop_id_t, uint_t, 105 const void *); 106 static void aggr_m_propinfo(void *, const char *, mac_prop_id_t, 107 mac_prop_info_handle_t); 108 109 static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, datalink_id_t); 110 static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *, 111 boolean_t *); 112 113 static void aggr_grp_capab_set(aggr_grp_t *); 114 static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *); 115 static uint_t aggr_grp_max_sdu(aggr_grp_t *); 116 static uint32_t aggr_grp_max_margin(aggr_grp_t *); 117 static boolean_t aggr_grp_sdu_check(aggr_grp_t *, aggr_port_t *); 118 static boolean_t aggr_grp_margin_check(aggr_grp_t *, aggr_port_t *); 119 120 static int aggr_add_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *); 121 static void aggr_rem_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *); 122 static int aggr_pseudo_disable_intr(mac_intr_handle_t); 123 static int aggr_pseudo_enable_intr(mac_intr_handle_t); 124 static int aggr_pseudo_start_ring(mac_ring_driver_t, uint64_t); 125 static void aggr_pseudo_stop_ring(mac_ring_driver_t); 126 static int aggr_addmac(void *, const uint8_t *); 127 static int aggr_remmac(void *, const uint8_t *); 128 static mblk_t *aggr_rx_poll(void *, int); 129 static void aggr_fill_ring(void *, mac_ring_type_t, const int, 130 const int, mac_ring_info_t *, mac_ring_handle_t); 131 static void aggr_fill_group(void *, mac_ring_type_t, const int, 132 mac_group_info_t *, mac_group_handle_t); 133 134 static kmem_cache_t *aggr_grp_cache; 135 static mod_hash_t *aggr_grp_hash; 136 static krwlock_t aggr_grp_lock; 137 static uint_t aggr_grp_cnt; 138 static id_space_t *key_ids; 139 140 #define GRP_HASHSZ 64 141 #define GRP_HASH_KEY(linkid) ((mod_hash_key_t)(uintptr_t)linkid) 142 #define AGGR_PORT_NAME_DELIMIT '-' 143 144 static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0}; 145 146 #define AGGR_M_CALLBACK_FLAGS \ 147 (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO) 148 149 static mac_callbacks_t aggr_m_callbacks = { 150 AGGR_M_CALLBACK_FLAGS, 151 aggr_m_stat, 152 aggr_m_start, 153 aggr_m_stop, 154 aggr_m_promisc, 155 aggr_m_multicst, 156 NULL, 157 NULL, 158 NULL, 159 aggr_m_ioctl, 160 aggr_m_capab_get, 161 NULL, 162 NULL, 163 aggr_m_setprop, 164 NULL, 165 aggr_m_propinfo 166 }; 167 168 /*ARGSUSED*/ 169 static int 170 aggr_grp_constructor(void *buf, void *arg, int kmflag) 171 { 172 aggr_grp_t *grp = buf; 173 174 bzero(grp, sizeof (*grp)); 175 mutex_init(&grp->lg_lacp_lock, NULL, MUTEX_DEFAULT, NULL); 176 cv_init(&grp->lg_lacp_cv, NULL, CV_DEFAULT, NULL); 177 rw_init(&grp->lg_tx_lock, NULL, RW_DRIVER, NULL); 178 mutex_init(&grp->lg_port_lock, NULL, MUTEX_DEFAULT, NULL); 179 cv_init(&grp->lg_port_cv, NULL, CV_DEFAULT, NULL); 180 mutex_init(&grp->lg_tx_flowctl_lock, NULL, MUTEX_DEFAULT, NULL); 181 cv_init(&grp->lg_tx_flowctl_cv, NULL, CV_DEFAULT, NULL); 182 grp->lg_link_state = LINK_STATE_UNKNOWN; 183 return (0); 184 } 185 186 /*ARGSUSED*/ 187 static void 188 aggr_grp_destructor(void *buf, void *arg) 189 { 190 aggr_grp_t *grp = buf; 191 192 if (grp->lg_tx_ports != NULL) { 193 kmem_free(grp->lg_tx_ports, 194 grp->lg_tx_ports_size * sizeof (aggr_port_t *)); 195 } 196 197 mutex_destroy(&grp->lg_lacp_lock); 198 cv_destroy(&grp->lg_lacp_cv); 199 mutex_destroy(&grp->lg_port_lock); 200 cv_destroy(&grp->lg_port_cv); 201 rw_destroy(&grp->lg_tx_lock); 202 mutex_destroy(&grp->lg_tx_flowctl_lock); 203 cv_destroy(&grp->lg_tx_flowctl_cv); 204 } 205 206 void 207 aggr_grp_init(void) 208 { 209 aggr_grp_cache = kmem_cache_create("aggr_grp_cache", 210 sizeof (aggr_grp_t), 0, aggr_grp_constructor, 211 aggr_grp_destructor, NULL, NULL, NULL, 0); 212 213 aggr_grp_hash = mod_hash_create_idhash("aggr_grp_hash", 214 GRP_HASHSZ, mod_hash_null_valdtor); 215 rw_init(&aggr_grp_lock, NULL, RW_DEFAULT, NULL); 216 aggr_grp_cnt = 0; 217 218 /* 219 * Allocate an id space to manage key values (when key is not 220 * specified). The range of the id space will be from 221 * (AGGR_MAX_KEY + 1) to UINT16_MAX, because the LACP protocol 222 * uses a 16-bit key. 223 */ 224 key_ids = id_space_create("aggr_key_ids", AGGR_MAX_KEY + 1, UINT16_MAX); 225 ASSERT(key_ids != NULL); 226 } 227 228 void 229 aggr_grp_fini(void) 230 { 231 id_space_destroy(key_ids); 232 rw_destroy(&aggr_grp_lock); 233 mod_hash_destroy_idhash(aggr_grp_hash); 234 kmem_cache_destroy(aggr_grp_cache); 235 } 236 237 uint_t 238 aggr_grp_count(void) 239 { 240 uint_t count; 241 242 rw_enter(&aggr_grp_lock, RW_READER); 243 count = aggr_grp_cnt; 244 rw_exit(&aggr_grp_lock); 245 return (count); 246 } 247 248 /* 249 * Since both aggr_port_notify_cb() and aggr_port_timer_thread() functions 250 * requires the mac perimeter, this function holds a reference of the aggr 251 * and aggr won't call mac_unregister() until this reference drops to 0. 252 */ 253 void 254 aggr_grp_port_hold(aggr_port_t *port) 255 { 256 aggr_grp_t *grp = port->lp_grp; 257 258 AGGR_PORT_REFHOLD(port); 259 mutex_enter(&grp->lg_port_lock); 260 grp->lg_port_ref++; 261 mutex_exit(&grp->lg_port_lock); 262 } 263 264 /* 265 * Release the reference of the grp and inform aggr_grp_delete() calling 266 * mac_unregister() is now safe. 267 */ 268 void 269 aggr_grp_port_rele(aggr_port_t *port) 270 { 271 aggr_grp_t *grp = port->lp_grp; 272 273 mutex_enter(&grp->lg_port_lock); 274 if (--grp->lg_port_ref == 0) 275 cv_signal(&grp->lg_port_cv); 276 mutex_exit(&grp->lg_port_lock); 277 AGGR_PORT_REFRELE(port); 278 } 279 280 /* 281 * Wait for the port's lacp timer thread and the port's notification callback 282 * to exit. 283 */ 284 void 285 aggr_grp_port_wait(aggr_grp_t *grp) 286 { 287 mutex_enter(&grp->lg_port_lock); 288 if (grp->lg_port_ref != 0) 289 cv_wait(&grp->lg_port_cv, &grp->lg_port_lock); 290 mutex_exit(&grp->lg_port_lock); 291 } 292 293 /* 294 * Attach a port to a link aggregation group. 295 * 296 * A port is attached to a link aggregation group once its speed 297 * and link state have been verified. 298 * 299 * Returns B_TRUE if the group link state or speed has changed. If 300 * it's the case, the caller must notify the MAC layer via a call 301 * to mac_link(). 302 */ 303 boolean_t 304 aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port) 305 { 306 boolean_t link_state_changed = B_FALSE; 307 308 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 309 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 310 311 if (port->lp_state == AGGR_PORT_STATE_ATTACHED) 312 return (B_FALSE); 313 314 /* 315 * Validate the MAC port link speed and update the group 316 * link speed if needed. 317 */ 318 if (port->lp_ifspeed == 0 || 319 port->lp_link_state != LINK_STATE_UP || 320 port->lp_link_duplex != LINK_DUPLEX_FULL) { 321 /* 322 * Can't attach a MAC port with unknown link speed, 323 * down link, or not in full duplex mode. 324 */ 325 return (B_FALSE); 326 } 327 328 if (grp->lg_ifspeed == 0) { 329 /* 330 * The group inherits the speed of the first link being 331 * attached. 332 */ 333 grp->lg_ifspeed = port->lp_ifspeed; 334 link_state_changed = B_TRUE; 335 } else if (grp->lg_ifspeed != port->lp_ifspeed) { 336 /* 337 * The link speed of the MAC port must be the same as 338 * the group link speed, as per 802.3ad. Since it is 339 * not, the attach is cancelled. 340 */ 341 return (B_FALSE); 342 } 343 344 grp->lg_nattached_ports++; 345 346 /* 347 * Update the group link state. 348 */ 349 if (grp->lg_link_state != LINK_STATE_UP) { 350 grp->lg_link_state = LINK_STATE_UP; 351 grp->lg_link_duplex = LINK_DUPLEX_FULL; 352 link_state_changed = B_TRUE; 353 } 354 355 /* 356 * Update port's state. 357 */ 358 port->lp_state = AGGR_PORT_STATE_ATTACHED; 359 360 aggr_grp_multicst_port(port, B_TRUE); 361 362 /* 363 * Set port's receive callback 364 */ 365 mac_rx_set(port->lp_mch, aggr_recv_cb, port); 366 367 /* 368 * If LACP is OFF, the port can be used to send data as soon 369 * as its link is up and verified to be compatible with the 370 * aggregation. 371 * 372 * If LACP is active or passive, notify the LACP subsystem, which 373 * will enable sending on the port following the LACP protocol. 374 */ 375 if (grp->lg_lacp_mode == AGGR_LACP_OFF) 376 aggr_send_port_enable(port); 377 else 378 aggr_lacp_port_attached(port); 379 380 return (link_state_changed); 381 } 382 383 boolean_t 384 aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port) 385 { 386 boolean_t link_state_changed = B_FALSE; 387 388 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 389 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 390 391 /* update state */ 392 if (port->lp_state != AGGR_PORT_STATE_ATTACHED) 393 return (B_FALSE); 394 395 mac_rx_clear(port->lp_mch); 396 397 aggr_grp_multicst_port(port, B_FALSE); 398 399 if (grp->lg_lacp_mode == AGGR_LACP_OFF) 400 aggr_send_port_disable(port); 401 else 402 aggr_lacp_port_detached(port); 403 404 port->lp_state = AGGR_PORT_STATE_STANDBY; 405 406 grp->lg_nattached_ports--; 407 if (grp->lg_nattached_ports == 0) { 408 /* the last attached MAC port of the group is being detached */ 409 grp->lg_ifspeed = 0; 410 grp->lg_link_state = LINK_STATE_DOWN; 411 grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN; 412 link_state_changed = B_TRUE; 413 } 414 415 return (link_state_changed); 416 } 417 418 /* 419 * Update the MAC addresses of the constituent ports of the specified 420 * group. This function is invoked: 421 * - after creating a new aggregation group. 422 * - after adding new ports to an aggregation group. 423 * - after removing a port from a group when the MAC address of 424 * that port was used for the MAC address of the group. 425 * - after the MAC address of a port changed when the MAC address 426 * of that port was used for the MAC address of the group. 427 * 428 * Return true if the link state of the aggregation changed, for example 429 * as a result of a failure changing the MAC address of one of the 430 * constituent ports. 431 */ 432 boolean_t 433 aggr_grp_update_ports_mac(aggr_grp_t *grp) 434 { 435 aggr_port_t *cport; 436 boolean_t link_state_changed = B_FALSE; 437 mac_perim_handle_t mph; 438 439 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 440 441 for (cport = grp->lg_ports; cport != NULL; 442 cport = cport->lp_next) { 443 mac_perim_enter_by_mh(cport->lp_mh, &mph); 444 if (aggr_port_unicst(cport) != 0) { 445 if (aggr_grp_detach_port(grp, cport)) 446 link_state_changed = B_TRUE; 447 } else { 448 /* 449 * If a port was detached because of a previous 450 * failure changing the MAC address, the port is 451 * reattached when it successfully changes the MAC 452 * address now, and this might cause the link state 453 * of the aggregation to change. 454 */ 455 if (aggr_grp_attach_port(grp, cport)) 456 link_state_changed = B_TRUE; 457 } 458 mac_perim_exit(mph); 459 } 460 return (link_state_changed); 461 } 462 463 /* 464 * Invoked when the MAC address of a port has changed. If the port's 465 * MAC address was used for the group MAC address, set mac_addr_changedp 466 * to B_TRUE to indicate to the caller that it should send a MAC_NOTE_UNICST 467 * notification. If the link state changes due to detach/attach of 468 * the constituent port, set link_state_changedp to B_TRUE to indicate 469 * to the caller that it should send a MAC_NOTE_LINK notification. In both 470 * cases, it is the responsibility of the caller to invoke notification 471 * functions after releasing the the port lock. 472 */ 473 void 474 aggr_grp_port_mac_changed(aggr_grp_t *grp, aggr_port_t *port, 475 boolean_t *mac_addr_changedp, boolean_t *link_state_changedp) 476 { 477 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 478 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 479 ASSERT(mac_addr_changedp != NULL); 480 ASSERT(link_state_changedp != NULL); 481 482 *mac_addr_changedp = B_FALSE; 483 *link_state_changedp = B_FALSE; 484 485 if (grp->lg_addr_fixed) { 486 /* 487 * The group is using a fixed MAC address or an automatic 488 * MAC address has not been set. 489 */ 490 return; 491 } 492 493 if (grp->lg_mac_addr_port == port) { 494 /* 495 * The MAC address of the port was assigned to the group 496 * MAC address. Update the group MAC address. 497 */ 498 bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL); 499 *mac_addr_changedp = B_TRUE; 500 } else { 501 /* 502 * Update the actual port MAC address to the MAC address 503 * of the group. 504 */ 505 if (aggr_port_unicst(port) != 0) { 506 *link_state_changedp = aggr_grp_detach_port(grp, port); 507 } else { 508 /* 509 * If a port was detached because of a previous 510 * failure changing the MAC address, the port is 511 * reattached when it successfully changes the MAC 512 * address now, and this might cause the link state 513 * of the aggregation to change. 514 */ 515 *link_state_changedp = aggr_grp_attach_port(grp, port); 516 } 517 } 518 } 519 520 /* 521 * Add a port to a link aggregation group. 522 */ 523 static int 524 aggr_grp_add_port(aggr_grp_t *grp, datalink_id_t port_linkid, boolean_t force, 525 aggr_port_t **pp) 526 { 527 aggr_port_t *port, **cport; 528 mac_perim_handle_t mph; 529 zoneid_t port_zoneid = ALL_ZONES; 530 int err; 531 532 /* The port must be int the same zone as the aggregation. */ 533 if (zone_check_datalink(&port_zoneid, port_linkid) != 0) 534 port_zoneid = GLOBAL_ZONEID; 535 if (grp->lg_zoneid != port_zoneid) 536 return (EBUSY); 537 538 /* 539 * lg_mh could be NULL when the function is called during the creation 540 * of the aggregation. 541 */ 542 ASSERT(grp->lg_mh == NULL || MAC_PERIM_HELD(grp->lg_mh)); 543 544 /* create new port */ 545 err = aggr_port_create(grp, port_linkid, force, &port); 546 if (err != 0) 547 return (err); 548 549 mac_perim_enter_by_mh(port->lp_mh, &mph); 550 551 /* add port to list of group constituent ports */ 552 cport = &grp->lg_ports; 553 while (*cport != NULL) 554 cport = &((*cport)->lp_next); 555 *cport = port; 556 557 /* 558 * Back reference to the group it is member of. A port always 559 * holds a reference to its group to ensure that the back 560 * reference is always valid. 561 */ 562 port->lp_grp = grp; 563 AGGR_GRP_REFHOLD(grp); 564 grp->lg_nports++; 565 566 aggr_lacp_init_port(port); 567 mac_perim_exit(mph); 568 569 if (pp != NULL) 570 *pp = port; 571 572 return (0); 573 } 574 575 /* 576 * Add a pseudo RX ring for the given HW ring handle. 577 */ 578 static int 579 aggr_add_pseudo_rx_ring(aggr_port_t *port, 580 aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh) 581 { 582 aggr_pseudo_rx_ring_t *ring; 583 int err; 584 int j; 585 586 for (j = 0; j < MAX_RINGS_PER_GROUP; j++) { 587 ring = rx_grp->arg_rings + j; 588 if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE)) 589 break; 590 } 591 592 /* 593 * No slot for this new RX ring. 594 */ 595 if (j == MAX_RINGS_PER_GROUP) 596 return (EIO); 597 598 ring->arr_flags |= MAC_PSEUDO_RING_INUSE; 599 ring->arr_hw_rh = hw_rh; 600 ring->arr_port = port; 601 rx_grp->arg_ring_cnt++; 602 603 /* 604 * The group is already registered, dynamically add a new ring to the 605 * mac group. 606 */ 607 if ((err = mac_group_add_ring(rx_grp->arg_gh, j)) != 0) { 608 ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE; 609 ring->arr_hw_rh = NULL; 610 ring->arr_port = NULL; 611 rx_grp->arg_ring_cnt--; 612 } else { 613 mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring, 614 mac_find_ring(rx_grp->arg_gh, j)); 615 } 616 return (err); 617 } 618 619 /* 620 * Remove the pseudo RX ring of the given HW ring handle. 621 */ 622 static void 623 aggr_rem_pseudo_rx_ring(aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh) 624 { 625 aggr_pseudo_rx_ring_t *ring; 626 int j; 627 628 for (j = 0; j < MAX_RINGS_PER_GROUP; j++) { 629 ring = rx_grp->arg_rings + j; 630 if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE) || 631 ring->arr_hw_rh != hw_rh) { 632 continue; 633 } 634 635 mac_group_rem_ring(rx_grp->arg_gh, ring->arr_rh); 636 637 ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE; 638 ring->arr_hw_rh = NULL; 639 ring->arr_port = NULL; 640 rx_grp->arg_ring_cnt--; 641 mac_hwring_teardown(hw_rh); 642 break; 643 } 644 } 645 646 /* 647 * This function is called to create pseudo rings over the hardware rings of 648 * the underlying device. Note that there is a 1:1 mapping between the pseudo 649 * RX rings of the aggr and the hardware rings of the underlying port. 650 */ 651 static int 652 aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) 653 { 654 aggr_grp_t *grp = port->lp_grp; 655 mac_ring_handle_t hw_rh[MAX_RINGS_PER_GROUP]; 656 aggr_unicst_addr_t *addr, *a; 657 mac_perim_handle_t pmph; 658 int hw_rh_cnt, i = 0, j; 659 int err = 0; 660 661 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 662 mac_perim_enter_by_mh(port->lp_mh, &pmph); 663 664 /* 665 * This function must be called after the aggr registers its mac 666 * and its RX group has been initialized. 667 */ 668 ASSERT(rx_grp->arg_gh != NULL); 669 670 /* 671 * Get the list the the underlying HW rings. 672 */ 673 hw_rh_cnt = mac_hwrings_get(port->lp_mch, 674 &port->lp_hwgh, hw_rh, MAC_RING_TYPE_RX); 675 676 if (port->lp_hwgh != NULL) { 677 /* 678 * Quiesce the HW ring and the mac srs on the ring. Note 679 * that the HW ring will be restarted when the pseudo ring 680 * is started. At that time all the packets will be 681 * directly passed up to the pseudo RX ring and handled 682 * by mac srs created over the pseudo RX ring. 683 */ 684 mac_rx_client_quiesce(port->lp_mch); 685 mac_srs_perm_quiesce(port->lp_mch, B_TRUE); 686 } 687 688 /* 689 * Add all the unicast addresses to the newly added port. 690 */ 691 for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next) { 692 if ((err = aggr_port_addmac(port, addr->aua_addr)) != 0) 693 break; 694 } 695 696 for (i = 0; err == 0 && i < hw_rh_cnt; i++) 697 err = aggr_add_pseudo_rx_ring(port, rx_grp, hw_rh[i]); 698 699 if (err != 0) { 700 for (j = 0; j < i; j++) 701 aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[j]); 702 703 for (a = rx_grp->arg_macaddr; a != addr; a = a->aua_next) 704 aggr_port_remmac(port, a->aua_addr); 705 706 if (port->lp_hwgh != NULL) { 707 mac_srs_perm_quiesce(port->lp_mch, B_FALSE); 708 mac_rx_client_restart(port->lp_mch); 709 port->lp_hwgh = NULL; 710 } 711 } else { 712 port->lp_rx_grp_added = B_TRUE; 713 } 714 done: 715 mac_perim_exit(pmph); 716 return (err); 717 } 718 719 /* 720 * This function is called by aggr to remove pseudo RX rings over the 721 * HW rings of the underlying port. 722 */ 723 static void 724 aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) 725 { 726 aggr_grp_t *grp = port->lp_grp; 727 mac_ring_handle_t hw_rh[MAX_RINGS_PER_GROUP]; 728 aggr_unicst_addr_t *addr; 729 mac_group_handle_t hwgh; 730 mac_perim_handle_t pmph; 731 int hw_rh_cnt, i; 732 733 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 734 mac_perim_enter_by_mh(port->lp_mh, &pmph); 735 736 if (!port->lp_rx_grp_added) 737 goto done; 738 739 ASSERT(rx_grp->arg_gh != NULL); 740 hw_rh_cnt = mac_hwrings_get(port->lp_mch, 741 &hwgh, hw_rh, MAC_RING_TYPE_RX); 742 743 /* 744 * If hw_rh_cnt is 0, it means that the underlying port does not 745 * support RX rings. Directly return in this case. 746 */ 747 for (i = 0; i < hw_rh_cnt; i++) 748 aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[i]); 749 750 for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next) 751 aggr_port_remmac(port, addr->aua_addr); 752 753 if (port->lp_hwgh != NULL) { 754 port->lp_hwgh = NULL; 755 756 /* 757 * First clear the permanent-quiesced flag of the RX srs then 758 * restart the HW ring and the mac srs on the ring. Note that 759 * the HW ring and associated SRS will soon been removed when 760 * the port is removed from the aggr. 761 */ 762 mac_srs_perm_quiesce(port->lp_mch, B_FALSE); 763 mac_rx_client_restart(port->lp_mch); 764 } 765 766 port->lp_rx_grp_added = B_FALSE; 767 done: 768 mac_perim_exit(pmph); 769 } 770 771 /* 772 * Add a pseudo TX ring for the given HW ring handle. 773 */ 774 static int 775 aggr_add_pseudo_tx_ring(aggr_port_t *port, 776 aggr_pseudo_tx_group_t *tx_grp, mac_ring_handle_t hw_rh, 777 mac_ring_handle_t *pseudo_rh) 778 { 779 aggr_pseudo_tx_ring_t *ring; 780 int err; 781 int i; 782 783 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 784 for (i = 0; i < MAX_RINGS_PER_GROUP; i++) { 785 ring = tx_grp->atg_rings + i; 786 if (!(ring->atr_flags & MAC_PSEUDO_RING_INUSE)) 787 break; 788 } 789 /* 790 * No slot for this new TX ring. 791 */ 792 if (i == MAX_RINGS_PER_GROUP) 793 return (EIO); 794 /* 795 * The following 4 statements needs to be done before 796 * calling mac_group_add_ring(). Otherwise it will 797 * result in an assertion failure in mac_init_ring(). 798 */ 799 ring->atr_flags |= MAC_PSEUDO_RING_INUSE; 800 ring->atr_hw_rh = hw_rh; 801 ring->atr_port = port; 802 tx_grp->atg_ring_cnt++; 803 804 /* 805 * The TX side has no concept of ring groups unlike RX groups. 806 * There is just a single group which stores all the TX rings. 807 * This group will be used to store aggr's pseudo TX rings. 808 */ 809 if ((err = mac_group_add_ring(tx_grp->atg_gh, i)) != 0) { 810 ring->atr_flags &= ~MAC_PSEUDO_RING_INUSE; 811 ring->atr_hw_rh = NULL; 812 ring->atr_port = NULL; 813 tx_grp->atg_ring_cnt--; 814 } else { 815 *pseudo_rh = mac_find_ring(tx_grp->atg_gh, i); 816 if (hw_rh != NULL) { 817 mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring, 818 mac_find_ring(tx_grp->atg_gh, i)); 819 } 820 } 821 return (err); 822 } 823 824 /* 825 * Remove the pseudo TX ring of the given HW ring handle. 826 */ 827 static void 828 aggr_rem_pseudo_tx_ring(aggr_pseudo_tx_group_t *tx_grp, 829 mac_ring_handle_t pseudo_hw_rh) 830 { 831 aggr_pseudo_tx_ring_t *ring; 832 int i; 833 834 for (i = 0; i < MAX_RINGS_PER_GROUP; i++) { 835 ring = tx_grp->atg_rings + i; 836 if (ring->atr_rh != pseudo_hw_rh) 837 continue; 838 839 ASSERT(ring->atr_flags & MAC_PSEUDO_RING_INUSE); 840 mac_group_rem_ring(tx_grp->atg_gh, pseudo_hw_rh); 841 ring->atr_flags &= ~MAC_PSEUDO_RING_INUSE; 842 mac_hwring_teardown(ring->atr_hw_rh); 843 ring->atr_hw_rh = NULL; 844 ring->atr_port = NULL; 845 tx_grp->atg_ring_cnt--; 846 break; 847 } 848 } 849 850 /* 851 * This function is called to create pseudo rings over hardware rings of 852 * the underlying device. There is a 1:1 mapping between the pseudo TX 853 * rings of the aggr and the hardware rings of the underlying port. 854 */ 855 static int 856 aggr_add_pseudo_tx_group(aggr_port_t *port, aggr_pseudo_tx_group_t *tx_grp) 857 { 858 aggr_grp_t *grp = port->lp_grp; 859 mac_ring_handle_t hw_rh[MAX_RINGS_PER_GROUP], pseudo_rh; 860 mac_perim_handle_t pmph; 861 int hw_rh_cnt, i = 0, j; 862 int err = 0; 863 864 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 865 mac_perim_enter_by_mh(port->lp_mh, &pmph); 866 867 /* 868 * Get the list the the underlying HW rings. 869 */ 870 hw_rh_cnt = mac_hwrings_get(port->lp_mch, 871 NULL, hw_rh, MAC_RING_TYPE_TX); 872 873 /* 874 * Even if the underlying NIC does not have TX rings, we 875 * still make a psuedo TX ring for that NIC with NULL as 876 * the ring handle. 877 */ 878 if (hw_rh_cnt == 0) 879 port->lp_tx_ring_cnt = 1; 880 else 881 port->lp_tx_ring_cnt = hw_rh_cnt; 882 883 port->lp_tx_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) * 884 port->lp_tx_ring_cnt), KM_SLEEP); 885 port->lp_pseudo_tx_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) * 886 port->lp_tx_ring_cnt), KM_SLEEP); 887 888 if (hw_rh_cnt == 0) { 889 if ((err = aggr_add_pseudo_tx_ring(port, tx_grp, 890 NULL, &pseudo_rh)) == 0) { 891 port->lp_tx_rings[0] = NULL; 892 port->lp_pseudo_tx_rings[0] = pseudo_rh; 893 } 894 } else { 895 for (i = 0; err == 0 && i < hw_rh_cnt; i++) { 896 err = aggr_add_pseudo_tx_ring(port, 897 tx_grp, hw_rh[i], &pseudo_rh); 898 if (err != 0) 899 break; 900 port->lp_tx_rings[i] = hw_rh[i]; 901 port->lp_pseudo_tx_rings[i] = pseudo_rh; 902 } 903 } 904 905 if (err != 0) { 906 if (hw_rh_cnt != 0) { 907 for (j = 0; j < i; j++) { 908 aggr_rem_pseudo_tx_ring(tx_grp, 909 port->lp_pseudo_tx_rings[j]); 910 } 911 } 912 kmem_free(port->lp_tx_rings, 913 (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt)); 914 kmem_free(port->lp_pseudo_tx_rings, 915 (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt)); 916 port->lp_tx_ring_cnt = 0; 917 } else { 918 port->lp_tx_grp_added = B_TRUE; 919 port->lp_tx_notify_mh = mac_client_tx_notify(port->lp_mch, 920 aggr_tx_ring_update, port); 921 } 922 mac_perim_exit(pmph); 923 return (err); 924 } 925 926 /* 927 * This function is called by aggr to remove pseudo TX rings over the 928 * HW rings of the underlying port. 929 */ 930 static void 931 aggr_rem_pseudo_tx_group(aggr_port_t *port, aggr_pseudo_tx_group_t *tx_grp) 932 { 933 aggr_grp_t *grp = port->lp_grp; 934 mac_perim_handle_t pmph; 935 int i; 936 937 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 938 mac_perim_enter_by_mh(port->lp_mh, &pmph); 939 940 if (!port->lp_tx_grp_added) 941 goto done; 942 943 ASSERT(tx_grp->atg_gh != NULL); 944 945 for (i = 0; i < port->lp_tx_ring_cnt; i++) 946 aggr_rem_pseudo_tx_ring(tx_grp, port->lp_pseudo_tx_rings[i]); 947 948 kmem_free(port->lp_tx_rings, 949 (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt)); 950 kmem_free(port->lp_pseudo_tx_rings, 951 (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt)); 952 953 port->lp_tx_ring_cnt = 0; 954 (void) mac_client_tx_notify(port->lp_mch, NULL, port->lp_tx_notify_mh); 955 port->lp_tx_grp_added = B_FALSE; 956 done: 957 mac_perim_exit(pmph); 958 } 959 960 static int 961 aggr_pseudo_disable_intr(mac_intr_handle_t ih) 962 { 963 aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih; 964 return (mac_hwring_disable_intr(rr_ring->arr_hw_rh)); 965 } 966 967 static int 968 aggr_pseudo_enable_intr(mac_intr_handle_t ih) 969 { 970 aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih; 971 return (mac_hwring_enable_intr(rr_ring->arr_hw_rh)); 972 } 973 974 static int 975 aggr_pseudo_start_ring(mac_ring_driver_t arg, uint64_t mr_gen) 976 { 977 aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)arg; 978 int err; 979 980 err = mac_hwring_start(rr_ring->arr_hw_rh); 981 if (err == 0) 982 rr_ring->arr_gen = mr_gen; 983 return (err); 984 } 985 986 static void 987 aggr_pseudo_stop_ring(mac_ring_driver_t arg) 988 { 989 aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)arg; 990 mac_hwring_stop(rr_ring->arr_hw_rh); 991 } 992 993 /* 994 * Add one or more ports to an existing link aggregation group. 995 */ 996 int 997 aggr_grp_add_ports(datalink_id_t linkid, uint_t nports, boolean_t force, 998 laioc_port_t *ports) 999 { 1000 int rc, i, nadded = 0; 1001 aggr_grp_t *grp = NULL; 1002 aggr_port_t *port; 1003 boolean_t link_state_changed = B_FALSE; 1004 mac_perim_handle_t mph, pmph; 1005 1006 /* get group corresponding to linkid */ 1007 rw_enter(&aggr_grp_lock, RW_READER); 1008 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), 1009 (mod_hash_val_t *)&grp) != 0) { 1010 rw_exit(&aggr_grp_lock); 1011 return (ENOENT); 1012 } 1013 AGGR_GRP_REFHOLD(grp); 1014 1015 /* 1016 * Hold the perimeter so that the aggregation won't be destroyed. 1017 */ 1018 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1019 rw_exit(&aggr_grp_lock); 1020 1021 /* add the specified ports to group */ 1022 for (i = 0; i < nports; i++) { 1023 /* add port to group */ 1024 if ((rc = aggr_grp_add_port(grp, ports[i].lp_linkid, 1025 force, &port)) != 0) { 1026 goto bail; 1027 } 1028 ASSERT(port != NULL); 1029 nadded++; 1030 1031 /* check capabilities */ 1032 if (!aggr_grp_capab_check(grp, port) || 1033 !aggr_grp_sdu_check(grp, port) || 1034 !aggr_grp_margin_check(grp, port)) { 1035 rc = ENOTSUP; 1036 goto bail; 1037 } 1038 1039 /* 1040 * Create the pseudo ring for each HW ring of the underlying 1041 * port. 1042 */ 1043 rc = aggr_add_pseudo_tx_group(port, &grp->lg_tx_group); 1044 if (rc != 0) 1045 goto bail; 1046 rc = aggr_add_pseudo_rx_group(port, &grp->lg_rx_group); 1047 if (rc != 0) 1048 goto bail; 1049 1050 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1051 1052 /* set LACP mode */ 1053 aggr_port_lacp_set_mode(grp, port); 1054 1055 /* start port if group has already been started */ 1056 if (grp->lg_started) { 1057 rc = aggr_port_start(port); 1058 if (rc != 0) { 1059 mac_perim_exit(pmph); 1060 goto bail; 1061 } 1062 1063 /* 1064 * Turn on the promiscuous mode over the port when it 1065 * is requested to be turned on to receive the 1066 * non-primary address over a port, or the promiscous 1067 * mode is enabled over the aggr. 1068 */ 1069 if (grp->lg_promisc || port->lp_prom_addr != NULL) { 1070 rc = aggr_port_promisc(port, B_TRUE); 1071 if (rc != 0) { 1072 mac_perim_exit(pmph); 1073 goto bail; 1074 } 1075 } 1076 } 1077 mac_perim_exit(pmph); 1078 1079 /* 1080 * Attach each port if necessary. 1081 */ 1082 if (aggr_port_notify_link(grp, port)) 1083 link_state_changed = B_TRUE; 1084 1085 /* 1086 * Initialize the callback functions for this port. 1087 */ 1088 aggr_port_init_callbacks(port); 1089 } 1090 1091 /* update the MAC address of the constituent ports */ 1092 if (aggr_grp_update_ports_mac(grp)) 1093 link_state_changed = B_TRUE; 1094 1095 if (link_state_changed) 1096 mac_link_update(grp->lg_mh, grp->lg_link_state); 1097 1098 bail: 1099 if (rc != 0) { 1100 /* stop and remove ports that have been added */ 1101 for (i = 0; i < nadded; i++) { 1102 port = aggr_grp_port_lookup(grp, ports[i].lp_linkid); 1103 ASSERT(port != NULL); 1104 if (grp->lg_started) { 1105 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1106 (void) aggr_port_promisc(port, B_FALSE); 1107 aggr_port_stop(port); 1108 mac_perim_exit(pmph); 1109 } 1110 aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group); 1111 aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group); 1112 (void) aggr_grp_rem_port(grp, port, NULL, NULL); 1113 } 1114 } 1115 1116 mac_perim_exit(mph); 1117 AGGR_GRP_REFRELE(grp); 1118 return (rc); 1119 } 1120 1121 static int 1122 aggr_grp_modify_common(aggr_grp_t *grp, uint8_t update_mask, uint32_t policy, 1123 boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, 1124 aggr_lacp_timer_t lacp_timer) 1125 { 1126 boolean_t mac_addr_changed = B_FALSE; 1127 boolean_t link_state_changed = B_FALSE; 1128 mac_perim_handle_t pmph; 1129 1130 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 1131 1132 /* validate fixed address if specified */ 1133 if ((update_mask & AGGR_MODIFY_MAC) && mac_fixed && 1134 ((bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) || 1135 (mac_addr[0] & 0x01))) { 1136 return (EINVAL); 1137 } 1138 1139 /* update policy if requested */ 1140 if (update_mask & AGGR_MODIFY_POLICY) 1141 aggr_send_update_policy(grp, policy); 1142 1143 /* update unicast MAC address if requested */ 1144 if (update_mask & AGGR_MODIFY_MAC) { 1145 if (mac_fixed) { 1146 /* user-supplied MAC address */ 1147 grp->lg_mac_addr_port = NULL; 1148 if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) != 0) { 1149 bcopy(mac_addr, grp->lg_addr, ETHERADDRL); 1150 mac_addr_changed = B_TRUE; 1151 } 1152 } else if (grp->lg_addr_fixed) { 1153 /* switch from user-supplied to automatic */ 1154 aggr_port_t *port = grp->lg_ports; 1155 1156 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1157 bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL); 1158 grp->lg_mac_addr_port = port; 1159 mac_addr_changed = B_TRUE; 1160 mac_perim_exit(pmph); 1161 } 1162 grp->lg_addr_fixed = mac_fixed; 1163 } 1164 1165 if (mac_addr_changed) 1166 link_state_changed = aggr_grp_update_ports_mac(grp); 1167 1168 if (update_mask & AGGR_MODIFY_LACP_MODE) 1169 aggr_lacp_update_mode(grp, lacp_mode); 1170 1171 if (update_mask & AGGR_MODIFY_LACP_TIMER) 1172 aggr_lacp_update_timer(grp, lacp_timer); 1173 1174 if (link_state_changed) 1175 mac_link_update(grp->lg_mh, grp->lg_link_state); 1176 1177 if (mac_addr_changed) 1178 mac_unicst_update(grp->lg_mh, grp->lg_addr); 1179 1180 return (0); 1181 } 1182 1183 /* 1184 * Update properties of an existing link aggregation group. 1185 */ 1186 int 1187 aggr_grp_modify(datalink_id_t linkid, uint8_t update_mask, uint32_t policy, 1188 boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, 1189 aggr_lacp_timer_t lacp_timer) 1190 { 1191 aggr_grp_t *grp = NULL; 1192 mac_perim_handle_t mph; 1193 int err; 1194 1195 /* get group corresponding to linkid */ 1196 rw_enter(&aggr_grp_lock, RW_READER); 1197 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), 1198 (mod_hash_val_t *)&grp) != 0) { 1199 rw_exit(&aggr_grp_lock); 1200 return (ENOENT); 1201 } 1202 AGGR_GRP_REFHOLD(grp); 1203 1204 /* 1205 * Hold the perimeter so that the aggregation won't be destroyed. 1206 */ 1207 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1208 rw_exit(&aggr_grp_lock); 1209 1210 err = aggr_grp_modify_common(grp, update_mask, policy, mac_fixed, 1211 mac_addr, lacp_mode, lacp_timer); 1212 1213 mac_perim_exit(mph); 1214 AGGR_GRP_REFRELE(grp); 1215 return (err); 1216 } 1217 1218 /* 1219 * Create a new link aggregation group upon request from administrator. 1220 * Returns 0 on success, an errno on failure. 1221 */ 1222 int 1223 aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports, 1224 laioc_port_t *ports, uint32_t policy, boolean_t mac_fixed, boolean_t force, 1225 uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer, 1226 cred_t *credp) 1227 { 1228 aggr_grp_t *grp = NULL; 1229 aggr_port_t *port; 1230 mac_register_t *mac; 1231 boolean_t link_state_changed; 1232 mac_perim_handle_t mph; 1233 int err; 1234 int i; 1235 kt_did_t tid = 0; 1236 1237 /* need at least one port */ 1238 if (nports == 0) 1239 return (EINVAL); 1240 1241 rw_enter(&aggr_grp_lock, RW_WRITER); 1242 1243 /* does a group with the same linkid already exist? */ 1244 err = mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), 1245 (mod_hash_val_t *)&grp); 1246 if (err == 0) { 1247 rw_exit(&aggr_grp_lock); 1248 return (EEXIST); 1249 } 1250 1251 grp = kmem_cache_alloc(aggr_grp_cache, KM_SLEEP); 1252 1253 grp->lg_refs = 1; 1254 grp->lg_closing = B_FALSE; 1255 grp->lg_force = force; 1256 grp->lg_linkid = linkid; 1257 grp->lg_zoneid = crgetzoneid(credp); 1258 grp->lg_ifspeed = 0; 1259 grp->lg_link_state = LINK_STATE_UNKNOWN; 1260 grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN; 1261 grp->lg_started = B_FALSE; 1262 grp->lg_promisc = B_FALSE; 1263 grp->lg_lacp_done = B_FALSE; 1264 grp->lg_tx_notify_done = B_FALSE; 1265 grp->lg_lacp_head = grp->lg_lacp_tail = NULL; 1266 grp->lg_lacp_rx_thread = thread_create(NULL, 0, 1267 aggr_lacp_rx_thread, grp, 0, &p0, TS_RUN, minclsyspri); 1268 grp->lg_tx_notify_thread = thread_create(NULL, 0, 1269 aggr_tx_notify_thread, grp, 0, &p0, TS_RUN, minclsyspri); 1270 grp->lg_tx_blocked_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) * 1271 MAX_RINGS_PER_GROUP), KM_SLEEP); 1272 grp->lg_tx_blocked_cnt = 0; 1273 bzero(&grp->lg_rx_group, sizeof (aggr_pseudo_rx_group_t)); 1274 bzero(&grp->lg_tx_group, sizeof (aggr_pseudo_tx_group_t)); 1275 aggr_lacp_init_grp(grp); 1276 1277 /* add MAC ports to group */ 1278 grp->lg_ports = NULL; 1279 grp->lg_nports = 0; 1280 grp->lg_nattached_ports = 0; 1281 grp->lg_ntx_ports = 0; 1282 1283 /* 1284 * If key is not specified by the user, allocate the key. 1285 */ 1286 if ((key == 0) && ((key = (uint32_t)id_alloc(key_ids)) == 0)) { 1287 err = ENOMEM; 1288 goto bail; 1289 } 1290 grp->lg_key = key; 1291 1292 for (i = 0; i < nports; i++) { 1293 err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, NULL); 1294 if (err != 0) 1295 goto bail; 1296 } 1297 1298 /* 1299 * If no explicit MAC address was specified by the administrator, 1300 * set it to the MAC address of the first port. 1301 */ 1302 grp->lg_addr_fixed = mac_fixed; 1303 if (grp->lg_addr_fixed) { 1304 /* validate specified address */ 1305 if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) { 1306 err = EINVAL; 1307 goto bail; 1308 } 1309 bcopy(mac_addr, grp->lg_addr, ETHERADDRL); 1310 } else { 1311 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL); 1312 grp->lg_mac_addr_port = grp->lg_ports; 1313 } 1314 1315 /* set the initial group capabilities */ 1316 aggr_grp_capab_set(grp); 1317 1318 if ((mac = mac_alloc(MAC_VERSION)) == NULL) { 1319 err = ENOMEM; 1320 goto bail; 1321 } 1322 mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 1323 mac->m_driver = grp; 1324 mac->m_dip = aggr_dip; 1325 mac->m_instance = grp->lg_key > AGGR_MAX_KEY ? (uint_t)-1 : grp->lg_key; 1326 mac->m_src_addr = grp->lg_addr; 1327 mac->m_callbacks = &aggr_m_callbacks; 1328 mac->m_min_sdu = 0; 1329 mac->m_max_sdu = grp->lg_max_sdu = aggr_grp_max_sdu(grp); 1330 mac->m_margin = aggr_grp_max_margin(grp); 1331 mac->m_v12n = MAC_VIRT_LEVEL1; 1332 err = mac_register(mac, &grp->lg_mh); 1333 mac_free(mac); 1334 if (err != 0) 1335 goto bail; 1336 1337 err = dls_devnet_create(grp->lg_mh, grp->lg_linkid, crgetzoneid(credp)); 1338 if (err != 0) { 1339 (void) mac_unregister(grp->lg_mh); 1340 grp->lg_mh = NULL; 1341 goto bail; 1342 } 1343 1344 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1345 1346 /* 1347 * Update the MAC address of the constituent ports. 1348 * None of the port is attached at this time, the link state of the 1349 * aggregation will not change. 1350 */ 1351 link_state_changed = aggr_grp_update_ports_mac(grp); 1352 ASSERT(!link_state_changed); 1353 1354 /* update outbound load balancing policy */ 1355 aggr_send_update_policy(grp, policy); 1356 1357 /* set LACP mode */ 1358 aggr_lacp_set_mode(grp, lacp_mode, lacp_timer); 1359 1360 /* 1361 * Attach each port if necessary. 1362 */ 1363 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1364 /* 1365 * Create the pseudo ring for each HW ring of the underlying 1366 * port. Note that this is done after the aggr registers the 1367 * mac. 1368 */ 1369 VERIFY(aggr_add_pseudo_tx_group(port, &grp->lg_tx_group) == 0); 1370 VERIFY(aggr_add_pseudo_rx_group(port, &grp->lg_rx_group) == 0); 1371 if (aggr_port_notify_link(grp, port)) 1372 link_state_changed = B_TRUE; 1373 1374 /* 1375 * Initialize the callback functions for this port. 1376 */ 1377 aggr_port_init_callbacks(port); 1378 } 1379 1380 if (link_state_changed) 1381 mac_link_update(grp->lg_mh, grp->lg_link_state); 1382 1383 /* add new group to hash table */ 1384 err = mod_hash_insert(aggr_grp_hash, GRP_HASH_KEY(linkid), 1385 (mod_hash_val_t)grp); 1386 ASSERT(err == 0); 1387 aggr_grp_cnt++; 1388 1389 mac_perim_exit(mph); 1390 rw_exit(&aggr_grp_lock); 1391 return (0); 1392 1393 bail: 1394 1395 grp->lg_closing = B_TRUE; 1396 1397 port = grp->lg_ports; 1398 while (port != NULL) { 1399 aggr_port_t *cport; 1400 1401 cport = port->lp_next; 1402 aggr_port_delete(port); 1403 port = cport; 1404 } 1405 1406 /* 1407 * Inform the lacp_rx thread to exit. 1408 */ 1409 mutex_enter(&grp->lg_lacp_lock); 1410 grp->lg_lacp_done = B_TRUE; 1411 cv_signal(&grp->lg_lacp_cv); 1412 while (grp->lg_lacp_rx_thread != NULL) 1413 cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock); 1414 mutex_exit(&grp->lg_lacp_lock); 1415 /* 1416 * Inform the tx_notify thread to exit. 1417 */ 1418 mutex_enter(&grp->lg_tx_flowctl_lock); 1419 if (grp->lg_tx_notify_thread != NULL) { 1420 tid = grp->lg_tx_notify_thread->t_did; 1421 grp->lg_tx_notify_done = B_TRUE; 1422 cv_signal(&grp->lg_tx_flowctl_cv); 1423 } 1424 mutex_exit(&grp->lg_tx_flowctl_lock); 1425 if (tid != 0) 1426 thread_join(tid); 1427 1428 kmem_free(grp->lg_tx_blocked_rings, 1429 (sizeof (mac_ring_handle_t *) * MAX_RINGS_PER_GROUP)); 1430 rw_exit(&aggr_grp_lock); 1431 AGGR_GRP_REFRELE(grp); 1432 return (err); 1433 } 1434 1435 /* 1436 * Return a pointer to the member of a group with specified linkid. 1437 */ 1438 static aggr_port_t * 1439 aggr_grp_port_lookup(aggr_grp_t *grp, datalink_id_t linkid) 1440 { 1441 aggr_port_t *port; 1442 1443 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 1444 1445 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1446 if (port->lp_linkid == linkid) 1447 break; 1448 } 1449 1450 return (port); 1451 } 1452 1453 /* 1454 * Stop, detach and remove a port from a link aggregation group. 1455 */ 1456 static int 1457 aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port, 1458 boolean_t *mac_addr_changedp, boolean_t *link_state_changedp) 1459 { 1460 int rc = 0; 1461 aggr_port_t **pport; 1462 boolean_t mac_addr_changed = B_FALSE; 1463 boolean_t link_state_changed = B_FALSE; 1464 mac_perim_handle_t mph; 1465 uint64_t val; 1466 uint_t i; 1467 uint_t stat; 1468 1469 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 1470 ASSERT(grp->lg_nports > 1); 1471 ASSERT(!grp->lg_closing); 1472 1473 /* unlink port */ 1474 for (pport = &grp->lg_ports; *pport != port; 1475 pport = &(*pport)->lp_next) { 1476 if (*pport == NULL) { 1477 rc = ENOENT; 1478 goto done; 1479 } 1480 } 1481 *pport = port->lp_next; 1482 1483 mac_perim_enter_by_mh(port->lp_mh, &mph); 1484 1485 /* 1486 * If the MAC address of the port being removed was assigned 1487 * to the group, update the group MAC address 1488 * using the MAC address of a different port. 1489 */ 1490 if (!grp->lg_addr_fixed && grp->lg_mac_addr_port == port) { 1491 /* 1492 * Set the MAC address of the group to the 1493 * MAC address of its first port. 1494 */ 1495 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL); 1496 grp->lg_mac_addr_port = grp->lg_ports; 1497 mac_addr_changed = B_TRUE; 1498 } 1499 1500 link_state_changed = aggr_grp_detach_port(grp, port); 1501 1502 /* 1503 * Add the counter statistics of the ports while it was aggregated 1504 * to the group's residual statistics. This is done by obtaining 1505 * the current counter from the underlying MAC then subtracting the 1506 * value of the counter at the moment it was added to the 1507 * aggregation. 1508 */ 1509 for (i = 0; i < MAC_NSTAT; i++) { 1510 stat = i + MAC_STAT_MIN; 1511 if (!MAC_STAT_ISACOUNTER(stat)) 1512 continue; 1513 val = aggr_port_stat(port, stat); 1514 val -= port->lp_stat[i]; 1515 grp->lg_stat[i] += val; 1516 } 1517 for (i = 0; i < ETHER_NSTAT; i++) { 1518 stat = i + MACTYPE_STAT_MIN; 1519 if (!ETHER_STAT_ISACOUNTER(stat)) 1520 continue; 1521 val = aggr_port_stat(port, stat); 1522 val -= port->lp_ether_stat[i]; 1523 grp->lg_ether_stat[i] += val; 1524 } 1525 1526 grp->lg_nports--; 1527 mac_perim_exit(mph); 1528 1529 aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group); 1530 aggr_port_delete(port); 1531 1532 /* 1533 * If the group MAC address has changed, update the MAC address of 1534 * the remaining constituent ports according to the new MAC 1535 * address of the group. 1536 */ 1537 if (mac_addr_changed && aggr_grp_update_ports_mac(grp)) 1538 link_state_changed = B_TRUE; 1539 1540 done: 1541 if (mac_addr_changedp != NULL) 1542 *mac_addr_changedp = mac_addr_changed; 1543 if (link_state_changedp != NULL) 1544 *link_state_changedp = link_state_changed; 1545 1546 return (rc); 1547 } 1548 1549 /* 1550 * Remove one or more ports from an existing link aggregation group. 1551 */ 1552 int 1553 aggr_grp_rem_ports(datalink_id_t linkid, uint_t nports, laioc_port_t *ports) 1554 { 1555 int rc = 0, i; 1556 aggr_grp_t *grp = NULL; 1557 aggr_port_t *port; 1558 boolean_t mac_addr_update = B_FALSE, mac_addr_changed; 1559 boolean_t link_state_update = B_FALSE, link_state_changed; 1560 mac_perim_handle_t mph, pmph; 1561 1562 /* get group corresponding to linkid */ 1563 rw_enter(&aggr_grp_lock, RW_READER); 1564 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), 1565 (mod_hash_val_t *)&grp) != 0) { 1566 rw_exit(&aggr_grp_lock); 1567 return (ENOENT); 1568 } 1569 AGGR_GRP_REFHOLD(grp); 1570 1571 /* 1572 * Hold the perimeter so that the aggregation won't be destroyed. 1573 */ 1574 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1575 rw_exit(&aggr_grp_lock); 1576 1577 /* we need to keep at least one port per group */ 1578 if (nports >= grp->lg_nports) { 1579 rc = EINVAL; 1580 goto bail; 1581 } 1582 1583 /* first verify that all the groups are valid */ 1584 for (i = 0; i < nports; i++) { 1585 if (aggr_grp_port_lookup(grp, ports[i].lp_linkid) == NULL) { 1586 /* port not found */ 1587 rc = ENOENT; 1588 goto bail; 1589 } 1590 } 1591 1592 /* clear the promiscous mode for the specified ports */ 1593 for (i = 0; i < nports && rc == 0; i++) { 1594 /* lookup port */ 1595 port = aggr_grp_port_lookup(grp, ports[i].lp_linkid); 1596 ASSERT(port != NULL); 1597 1598 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1599 rc = aggr_port_promisc(port, B_FALSE); 1600 mac_perim_exit(pmph); 1601 } 1602 if (rc != 0) { 1603 for (i = 0; i < nports; i++) { 1604 port = aggr_grp_port_lookup(grp, 1605 ports[i].lp_linkid); 1606 ASSERT(port != NULL); 1607 1608 /* 1609 * Turn the promiscuous mode back on if it is required 1610 * to receive the non-primary address over a port, or 1611 * the promiscous mode is enabled over the aggr. 1612 */ 1613 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1614 if (port->lp_started && (grp->lg_promisc || 1615 port->lp_prom_addr != NULL)) { 1616 (void) aggr_port_promisc(port, B_TRUE); 1617 } 1618 mac_perim_exit(pmph); 1619 } 1620 goto bail; 1621 } 1622 1623 /* remove the specified ports from group */ 1624 for (i = 0; i < nports; i++) { 1625 /* lookup port */ 1626 port = aggr_grp_port_lookup(grp, ports[i].lp_linkid); 1627 ASSERT(port != NULL); 1628 1629 /* stop port if group has already been started */ 1630 if (grp->lg_started) { 1631 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1632 aggr_port_stop(port); 1633 mac_perim_exit(pmph); 1634 } 1635 1636 /* 1637 * aggr_rem_pseudo_tx_group() is not called here. Instead 1638 * it is called from inside aggr_grp_rem_port() after the 1639 * port has been detached. The reason is that 1640 * aggr_rem_pseudo_tx_group() removes one ring at a time 1641 * and if there is still traffic going on, then there 1642 * is the possibility of aggr_find_tx_ring() returning a 1643 * removed ring for transmission. Once the port has been 1644 * detached, that port will not be used and 1645 * aggr_find_tx_ring() will not return any rings 1646 * belonging to it. 1647 */ 1648 aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group); 1649 1650 /* remove port from group */ 1651 rc = aggr_grp_rem_port(grp, port, &mac_addr_changed, 1652 &link_state_changed); 1653 ASSERT(rc == 0); 1654 mac_addr_update = mac_addr_update || mac_addr_changed; 1655 link_state_update = link_state_update || link_state_changed; 1656 } 1657 1658 bail: 1659 if (mac_addr_update) 1660 mac_unicst_update(grp->lg_mh, grp->lg_addr); 1661 if (link_state_update) 1662 mac_link_update(grp->lg_mh, grp->lg_link_state); 1663 1664 mac_perim_exit(mph); 1665 AGGR_GRP_REFRELE(grp); 1666 1667 return (rc); 1668 } 1669 1670 int 1671 aggr_grp_delete(datalink_id_t linkid, cred_t *cred) 1672 { 1673 aggr_grp_t *grp = NULL; 1674 aggr_port_t *port, *cport; 1675 datalink_id_t tmpid; 1676 mod_hash_val_t val; 1677 mac_perim_handle_t mph, pmph; 1678 int err; 1679 kt_did_t tid = 0; 1680 1681 rw_enter(&aggr_grp_lock, RW_WRITER); 1682 1683 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), 1684 (mod_hash_val_t *)&grp) != 0) { 1685 rw_exit(&aggr_grp_lock); 1686 return (ENOENT); 1687 } 1688 1689 /* 1690 * Note that dls_devnet_destroy() must be called before lg_lock is 1691 * held. Otherwise, it will deadlock if another thread is in 1692 * aggr_m_stat() and thus has a kstat_hold() on the kstats that 1693 * dls_devnet_destroy() needs to delete. 1694 */ 1695 if ((err = dls_devnet_destroy(grp->lg_mh, &tmpid, B_TRUE)) != 0) { 1696 rw_exit(&aggr_grp_lock); 1697 return (err); 1698 } 1699 ASSERT(linkid == tmpid); 1700 1701 /* 1702 * Unregister from the MAC service module. Since this can 1703 * fail if a client hasn't closed the MAC port, we gracefully 1704 * fail the operation. 1705 */ 1706 if ((err = mac_disable(grp->lg_mh)) != 0) { 1707 (void) dls_devnet_create(grp->lg_mh, linkid, crgetzoneid(cred)); 1708 rw_exit(&aggr_grp_lock); 1709 return (err); 1710 } 1711 (void) mod_hash_remove(aggr_grp_hash, GRP_HASH_KEY(linkid), &val); 1712 ASSERT(grp == (aggr_grp_t *)val); 1713 1714 ASSERT(aggr_grp_cnt > 0); 1715 aggr_grp_cnt--; 1716 rw_exit(&aggr_grp_lock); 1717 1718 /* 1719 * Inform the lacp_rx thread to exit. 1720 */ 1721 mutex_enter(&grp->lg_lacp_lock); 1722 grp->lg_lacp_done = B_TRUE; 1723 cv_signal(&grp->lg_lacp_cv); 1724 while (grp->lg_lacp_rx_thread != NULL) 1725 cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock); 1726 mutex_exit(&grp->lg_lacp_lock); 1727 /* 1728 * Inform the tx_notify_thread to exit. 1729 */ 1730 mutex_enter(&grp->lg_tx_flowctl_lock); 1731 if (grp->lg_tx_notify_thread != NULL) { 1732 tid = grp->lg_tx_notify_thread->t_did; 1733 grp->lg_tx_notify_done = B_TRUE; 1734 cv_signal(&grp->lg_tx_flowctl_cv); 1735 } 1736 mutex_exit(&grp->lg_tx_flowctl_lock); 1737 if (tid != 0) 1738 thread_join(tid); 1739 1740 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1741 1742 grp->lg_closing = B_TRUE; 1743 /* detach and free MAC ports associated with group */ 1744 port = grp->lg_ports; 1745 while (port != NULL) { 1746 cport = port->lp_next; 1747 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1748 if (grp->lg_started) 1749 aggr_port_stop(port); 1750 (void) aggr_grp_detach_port(grp, port); 1751 mac_perim_exit(pmph); 1752 aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group); 1753 aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group); 1754 aggr_port_delete(port); 1755 port = cport; 1756 } 1757 1758 mac_perim_exit(mph); 1759 1760 kmem_free(grp->lg_tx_blocked_rings, 1761 (sizeof (mac_ring_handle_t *) * MAX_RINGS_PER_GROUP)); 1762 /* 1763 * Wait for the port's lacp timer thread and its notification callback 1764 * to exit before calling mac_unregister() since both needs to access 1765 * the mac perimeter of the grp. 1766 */ 1767 aggr_grp_port_wait(grp); 1768 1769 VERIFY(mac_unregister(grp->lg_mh) == 0); 1770 grp->lg_mh = NULL; 1771 1772 AGGR_GRP_REFRELE(grp); 1773 return (0); 1774 } 1775 1776 void 1777 aggr_grp_free(aggr_grp_t *grp) 1778 { 1779 ASSERT(grp->lg_refs == 0); 1780 ASSERT(grp->lg_port_ref == 0); 1781 if (grp->lg_key > AGGR_MAX_KEY) { 1782 id_free(key_ids, grp->lg_key); 1783 grp->lg_key = 0; 1784 } 1785 kmem_cache_free(aggr_grp_cache, grp); 1786 } 1787 1788 int 1789 aggr_grp_info(datalink_id_t linkid, void *fn_arg, 1790 aggr_grp_info_new_grp_fn_t new_grp_fn, 1791 aggr_grp_info_new_port_fn_t new_port_fn, cred_t *cred) 1792 { 1793 aggr_grp_t *grp; 1794 aggr_port_t *port; 1795 mac_perim_handle_t mph, pmph; 1796 int rc = 0; 1797 1798 /* 1799 * Make sure that the aggregation link is visible from the caller's 1800 * zone. 1801 */ 1802 if (!dls_devnet_islinkvisible(linkid, crgetzoneid(cred))) 1803 return (ENOENT); 1804 1805 rw_enter(&aggr_grp_lock, RW_READER); 1806 1807 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), 1808 (mod_hash_val_t *)&grp) != 0) { 1809 rw_exit(&aggr_grp_lock); 1810 return (ENOENT); 1811 } 1812 AGGR_GRP_REFHOLD(grp); 1813 1814 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1815 rw_exit(&aggr_grp_lock); 1816 1817 rc = new_grp_fn(fn_arg, grp->lg_linkid, 1818 (grp->lg_key > AGGR_MAX_KEY) ? 0 : grp->lg_key, grp->lg_addr, 1819 grp->lg_addr_fixed, grp->lg_force, grp->lg_tx_policy, 1820 grp->lg_nports, grp->lg_lacp_mode, grp->aggr.PeriodicTimer); 1821 1822 if (rc != 0) 1823 goto bail; 1824 1825 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1826 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1827 rc = new_port_fn(fn_arg, port->lp_linkid, port->lp_addr, 1828 port->lp_state, &port->lp_lacp.ActorOperPortState); 1829 mac_perim_exit(pmph); 1830 1831 if (rc != 0) 1832 goto bail; 1833 } 1834 1835 bail: 1836 mac_perim_exit(mph); 1837 AGGR_GRP_REFRELE(grp); 1838 return (rc); 1839 } 1840 1841 /*ARGSUSED*/ 1842 static void 1843 aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp) 1844 { 1845 miocnak(q, mp, 0, ENOTSUP); 1846 } 1847 1848 static int 1849 aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val) 1850 { 1851 aggr_port_t *port; 1852 uint_t stat_index; 1853 1854 /* We only aggregate counter statistics. */ 1855 if (IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat) || 1856 IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat)) { 1857 return (ENOTSUP); 1858 } 1859 1860 /* 1861 * Counter statistics for a group are computed by aggregating the 1862 * counters of the members MACs while they were aggregated, plus 1863 * the residual counter of the group itself, which is updated each 1864 * time a MAC is removed from the group. 1865 */ 1866 *val = 0; 1867 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1868 /* actual port statistic */ 1869 *val += aggr_port_stat(port, stat); 1870 /* 1871 * minus the port stat when it was added, plus any residual 1872 * amount for the group. 1873 */ 1874 if (IS_MAC_STAT(stat)) { 1875 stat_index = stat - MAC_STAT_MIN; 1876 *val -= port->lp_stat[stat_index]; 1877 *val += grp->lg_stat[stat_index]; 1878 } else if (IS_MACTYPE_STAT(stat)) { 1879 stat_index = stat - MACTYPE_STAT_MIN; 1880 *val -= port->lp_ether_stat[stat_index]; 1881 *val += grp->lg_ether_stat[stat_index]; 1882 } 1883 } 1884 return (0); 1885 } 1886 1887 int 1888 aggr_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) 1889 { 1890 aggr_pseudo_rx_ring_t *rx_ring = (aggr_pseudo_rx_ring_t *)rdriver; 1891 1892 if (rx_ring->arr_hw_rh != NULL) { 1893 *val = mac_pseudo_rx_ring_stat_get(rx_ring->arr_hw_rh, stat); 1894 } else { 1895 aggr_port_t *port = rx_ring->arr_port; 1896 1897 *val = mac_stat_get(port->lp_mh, stat); 1898 1899 } 1900 return (0); 1901 } 1902 1903 int 1904 aggr_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) 1905 { 1906 aggr_pseudo_tx_ring_t *tx_ring = (aggr_pseudo_tx_ring_t *)rdriver; 1907 1908 if (tx_ring->atr_hw_rh != NULL) { 1909 *val = mac_pseudo_tx_ring_stat_get(tx_ring->atr_hw_rh, stat); 1910 } else { 1911 aggr_port_t *port = tx_ring->atr_port; 1912 1913 *val = mac_stat_get(port->lp_mh, stat); 1914 } 1915 return (0); 1916 } 1917 1918 static int 1919 aggr_m_stat(void *arg, uint_t stat, uint64_t *val) 1920 { 1921 aggr_grp_t *grp = arg; 1922 mac_perim_handle_t mph; 1923 int rval = 0; 1924 1925 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1926 1927 switch (stat) { 1928 case MAC_STAT_IFSPEED: 1929 *val = grp->lg_ifspeed; 1930 break; 1931 1932 case ETHER_STAT_LINK_DUPLEX: 1933 *val = grp->lg_link_duplex; 1934 break; 1935 1936 default: 1937 /* 1938 * For all other statistics, we return the aggregated stat 1939 * from the underlying ports. aggr_grp_stat() will set 1940 * rval appropriately if the statistic isn't a counter. 1941 */ 1942 rval = aggr_grp_stat(grp, stat, val); 1943 } 1944 1945 mac_perim_exit(mph); 1946 return (rval); 1947 } 1948 1949 static int 1950 aggr_m_start(void *arg) 1951 { 1952 aggr_grp_t *grp = arg; 1953 aggr_port_t *port; 1954 mac_perim_handle_t mph, pmph; 1955 1956 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1957 1958 /* 1959 * Attempts to start all configured members of the group. 1960 * Group members will be attached when their link-up notification 1961 * is received. 1962 */ 1963 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1964 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1965 if (aggr_port_start(port) != 0) { 1966 mac_perim_exit(pmph); 1967 continue; 1968 } 1969 1970 /* 1971 * Turn on the promiscuous mode if it is required to receive 1972 * the non-primary address over a port, or the promiscous 1973 * mode is enabled over the aggr. 1974 */ 1975 if (grp->lg_promisc || port->lp_prom_addr != NULL) { 1976 if (aggr_port_promisc(port, B_TRUE) != 0) 1977 aggr_port_stop(port); 1978 } 1979 mac_perim_exit(pmph); 1980 } 1981 1982 grp->lg_started = B_TRUE; 1983 1984 mac_perim_exit(mph); 1985 return (0); 1986 } 1987 1988 static void 1989 aggr_m_stop(void *arg) 1990 { 1991 aggr_grp_t *grp = arg; 1992 aggr_port_t *port; 1993 mac_perim_handle_t mph, pmph; 1994 1995 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1996 1997 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1998 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1999 2000 /* reset port promiscuous mode */ 2001 (void) aggr_port_promisc(port, B_FALSE); 2002 2003 aggr_port_stop(port); 2004 mac_perim_exit(pmph); 2005 } 2006 2007 grp->lg_started = B_FALSE; 2008 mac_perim_exit(mph); 2009 } 2010 2011 static int 2012 aggr_m_promisc(void *arg, boolean_t on) 2013 { 2014 aggr_grp_t *grp = arg; 2015 aggr_port_t *port; 2016 boolean_t link_state_changed = B_FALSE; 2017 mac_perim_handle_t mph, pmph; 2018 2019 AGGR_GRP_REFHOLD(grp); 2020 mac_perim_enter_by_mh(grp->lg_mh, &mph); 2021 2022 ASSERT(!grp->lg_closing); 2023 2024 if (on == grp->lg_promisc) 2025 goto bail; 2026 2027 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 2028 int err = 0; 2029 2030 mac_perim_enter_by_mh(port->lp_mh, &pmph); 2031 AGGR_PORT_REFHOLD(port); 2032 if (!on && (port->lp_prom_addr == NULL)) 2033 err = aggr_port_promisc(port, B_FALSE); 2034 else if (on && port->lp_started) 2035 err = aggr_port_promisc(port, B_TRUE); 2036 2037 if (err != 0) { 2038 if (aggr_grp_detach_port(grp, port)) 2039 link_state_changed = B_TRUE; 2040 } else { 2041 /* 2042 * If a port was detached because of a previous 2043 * failure changing the promiscuity, the port 2044 * is reattached when it successfully changes 2045 * the promiscuity now, and this might cause 2046 * the link state of the aggregation to change. 2047 */ 2048 if (aggr_grp_attach_port(grp, port)) 2049 link_state_changed = B_TRUE; 2050 } 2051 mac_perim_exit(pmph); 2052 AGGR_PORT_REFRELE(port); 2053 } 2054 2055 grp->lg_promisc = on; 2056 2057 if (link_state_changed) 2058 mac_link_update(grp->lg_mh, grp->lg_link_state); 2059 2060 bail: 2061 mac_perim_exit(mph); 2062 AGGR_GRP_REFRELE(grp); 2063 2064 return (0); 2065 } 2066 2067 static void 2068 aggr_grp_port_rename(const char *new_name, void *arg) 2069 { 2070 /* 2071 * aggr port's mac client name is the format of "aggr link name" plus 2072 * AGGR_PORT_NAME_DELIMIT plus "underneath link name". 2073 */ 2074 int aggr_len, link_len, clnt_name_len, i; 2075 char *str_end, *str_st, *str_del; 2076 char aggr_name[MAXNAMELEN]; 2077 char link_name[MAXNAMELEN]; 2078 char *clnt_name; 2079 aggr_grp_t *aggr_grp = arg; 2080 aggr_port_t *aggr_port = aggr_grp->lg_ports; 2081 2082 for (i = 0; i < aggr_grp->lg_nports; i++) { 2083 clnt_name = mac_client_name(aggr_port->lp_mch); 2084 clnt_name_len = strlen(clnt_name); 2085 str_st = clnt_name; 2086 str_end = &(clnt_name[clnt_name_len]); 2087 str_del = strchr(str_st, AGGR_PORT_NAME_DELIMIT); 2088 ASSERT(str_del != NULL); 2089 aggr_len = (intptr_t)((uintptr_t)str_del - (uintptr_t)str_st); 2090 link_len = (intptr_t)((uintptr_t)str_end - (uintptr_t)str_del); 2091 bzero(aggr_name, MAXNAMELEN); 2092 bzero(link_name, MAXNAMELEN); 2093 bcopy(clnt_name, aggr_name, aggr_len); 2094 bcopy(str_del, link_name, link_len + 1); 2095 bzero(clnt_name, MAXNAMELEN); 2096 (void) snprintf(clnt_name, MAXNAMELEN, "%s%s", new_name, 2097 link_name); 2098 2099 (void) mac_rename_primary(aggr_port->lp_mh, NULL); 2100 aggr_port = aggr_port->lp_next; 2101 } 2102 } 2103 2104 /* 2105 * Initialize the capabilities that are advertised for the group 2106 * according to the capabilities of the constituent ports. 2107 */ 2108 static boolean_t 2109 aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data) 2110 { 2111 aggr_grp_t *grp = arg; 2112 2113 switch (cap) { 2114 case MAC_CAPAB_HCKSUM: { 2115 uint32_t *hcksum_txflags = cap_data; 2116 *hcksum_txflags = grp->lg_hcksum_txflags; 2117 break; 2118 } 2119 case MAC_CAPAB_LSO: { 2120 mac_capab_lso_t *cap_lso = cap_data; 2121 2122 if (grp->lg_lso) { 2123 *cap_lso = grp->lg_cap_lso; 2124 break; 2125 } else { 2126 return (B_FALSE); 2127 } 2128 } 2129 case MAC_CAPAB_NO_NATIVEVLAN: 2130 return (!grp->lg_vlan); 2131 case MAC_CAPAB_NO_ZCOPY: 2132 return (!grp->lg_zcopy); 2133 case MAC_CAPAB_RINGS: { 2134 mac_capab_rings_t *cap_rings = cap_data; 2135 2136 if (cap_rings->mr_type == MAC_RING_TYPE_RX) { 2137 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 2138 cap_rings->mr_rnum = grp->lg_rx_group.arg_ring_cnt; 2139 2140 /* 2141 * An aggregation advertises only one (pseudo) RX 2142 * group, which virtualizes the main/primary group of 2143 * the underlying devices. 2144 */ 2145 cap_rings->mr_gnum = 1; 2146 cap_rings->mr_gaddring = NULL; 2147 cap_rings->mr_gremring = NULL; 2148 } else { 2149 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 2150 cap_rings->mr_rnum = grp->lg_tx_group.atg_ring_cnt; 2151 cap_rings->mr_gnum = 0; 2152 } 2153 cap_rings->mr_rget = aggr_fill_ring; 2154 cap_rings->mr_gget = aggr_fill_group; 2155 break; 2156 } 2157 case MAC_CAPAB_AGGR: 2158 { 2159 mac_capab_aggr_t *aggr_cap; 2160 2161 if (cap_data != NULL) { 2162 aggr_cap = cap_data; 2163 aggr_cap->mca_rename_fn = aggr_grp_port_rename; 2164 aggr_cap->mca_unicst = aggr_m_unicst; 2165 aggr_cap->mca_find_tx_ring_fn = aggr_find_tx_ring; 2166 aggr_cap->mca_arg = arg; 2167 } 2168 return (B_TRUE); 2169 } 2170 default: 2171 return (B_FALSE); 2172 } 2173 return (B_TRUE); 2174 } 2175 2176 /* 2177 * Callback funtion for MAC layer to register groups. 2178 */ 2179 static void 2180 aggr_fill_group(void *arg, mac_ring_type_t rtype, const int index, 2181 mac_group_info_t *infop, mac_group_handle_t gh) 2182 { 2183 aggr_grp_t *grp = arg; 2184 aggr_pseudo_rx_group_t *rx_group; 2185 aggr_pseudo_tx_group_t *tx_group; 2186 2187 ASSERT(index == 0); 2188 if (rtype == MAC_RING_TYPE_RX) { 2189 rx_group = &grp->lg_rx_group; 2190 rx_group->arg_gh = gh; 2191 rx_group->arg_grp = grp; 2192 2193 infop->mgi_driver = (mac_group_driver_t)rx_group; 2194 infop->mgi_start = NULL; 2195 infop->mgi_stop = NULL; 2196 infop->mgi_addmac = aggr_addmac; 2197 infop->mgi_remmac = aggr_remmac; 2198 infop->mgi_count = rx_group->arg_ring_cnt; 2199 } else { 2200 tx_group = &grp->lg_tx_group; 2201 tx_group->atg_gh = gh; 2202 } 2203 } 2204 2205 /* 2206 * Callback funtion for MAC layer to register all rings. 2207 */ 2208 static void 2209 aggr_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, 2210 const int index, mac_ring_info_t *infop, mac_ring_handle_t rh) 2211 { 2212 aggr_grp_t *grp = arg; 2213 2214 switch (rtype) { 2215 case MAC_RING_TYPE_RX: { 2216 aggr_pseudo_rx_group_t *rx_group = &grp->lg_rx_group; 2217 aggr_pseudo_rx_ring_t *rx_ring; 2218 mac_intr_t aggr_mac_intr; 2219 2220 ASSERT(rg_index == 0); 2221 2222 ASSERT((index >= 0) && (index < rx_group->arg_ring_cnt)); 2223 rx_ring = rx_group->arg_rings + index; 2224 rx_ring->arr_rh = rh; 2225 2226 /* 2227 * Entrypoint to enable interrupt (disable poll) and 2228 * disable interrupt (enable poll). 2229 */ 2230 aggr_mac_intr.mi_handle = (mac_intr_handle_t)rx_ring; 2231 aggr_mac_intr.mi_enable = aggr_pseudo_enable_intr; 2232 aggr_mac_intr.mi_disable = aggr_pseudo_disable_intr; 2233 aggr_mac_intr.mi_ddi_handle = NULL; 2234 2235 infop->mri_driver = (mac_ring_driver_t)rx_ring; 2236 infop->mri_start = aggr_pseudo_start_ring; 2237 infop->mri_stop = aggr_pseudo_stop_ring; 2238 2239 infop->mri_intr = aggr_mac_intr; 2240 infop->mri_poll = aggr_rx_poll; 2241 2242 infop->mri_stat = aggr_rx_ring_stat; 2243 break; 2244 } 2245 case MAC_RING_TYPE_TX: { 2246 aggr_pseudo_tx_group_t *tx_group = &grp->lg_tx_group; 2247 aggr_pseudo_tx_ring_t *tx_ring; 2248 2249 ASSERT(rg_index == -1); 2250 ASSERT(index < tx_group->atg_ring_cnt); 2251 2252 tx_ring = &tx_group->atg_rings[index]; 2253 tx_ring->atr_rh = rh; 2254 2255 infop->mri_driver = (mac_ring_driver_t)tx_ring; 2256 infop->mri_start = NULL; 2257 infop->mri_stop = NULL; 2258 infop->mri_tx = aggr_ring_tx; 2259 infop->mri_stat = aggr_tx_ring_stat; 2260 /* 2261 * Use the hw TX ring handle to find if the ring needs 2262 * serialization or not. For NICs that do not expose 2263 * Tx rings, atr_hw_rh will be NULL. 2264 */ 2265 if (tx_ring->atr_hw_rh != NULL) { 2266 infop->mri_flags = 2267 mac_hwring_getinfo(tx_ring->atr_hw_rh); 2268 } 2269 break; 2270 } 2271 default: 2272 break; 2273 } 2274 } 2275 2276 static mblk_t * 2277 aggr_rx_poll(void *arg, int bytes_to_pickup) 2278 { 2279 aggr_pseudo_rx_ring_t *rr_ring = arg; 2280 aggr_port_t *port = rr_ring->arr_port; 2281 aggr_grp_t *grp = port->lp_grp; 2282 mblk_t *mp_chain, *mp, **mpp; 2283 2284 mp_chain = mac_hwring_poll(rr_ring->arr_hw_rh, bytes_to_pickup); 2285 2286 if (grp->lg_lacp_mode == AGGR_LACP_OFF) 2287 return (mp_chain); 2288 2289 mpp = &mp_chain; 2290 while ((mp = *mpp) != NULL) { 2291 if (MBLKL(mp) >= sizeof (struct ether_header)) { 2292 struct ether_header *ehp; 2293 2294 ehp = (struct ether_header *)mp->b_rptr; 2295 if (ntohs(ehp->ether_type) == ETHERTYPE_SLOW) { 2296 *mpp = mp->b_next; 2297 mp->b_next = NULL; 2298 aggr_recv_lacp(port, 2299 (mac_resource_handle_t)rr_ring, mp); 2300 continue; 2301 } 2302 } 2303 2304 if (!port->lp_collector_enabled) { 2305 *mpp = mp->b_next; 2306 mp->b_next = NULL; 2307 freemsg(mp); 2308 continue; 2309 } 2310 mpp = &mp->b_next; 2311 } 2312 return (mp_chain); 2313 } 2314 2315 static int 2316 aggr_addmac(void *arg, const uint8_t *mac_addr) 2317 { 2318 aggr_pseudo_rx_group_t *rx_group = (aggr_pseudo_rx_group_t *)arg; 2319 aggr_unicst_addr_t *addr, **pprev; 2320 aggr_grp_t *grp = rx_group->arg_grp; 2321 aggr_port_t *port, *p; 2322 mac_perim_handle_t mph; 2323 int err = 0; 2324 2325 mac_perim_enter_by_mh(grp->lg_mh, &mph); 2326 2327 if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) { 2328 mac_perim_exit(mph); 2329 return (0); 2330 } 2331 2332 /* 2333 * Insert this mac address into the list of mac addresses owned by 2334 * the aggregation pseudo group. 2335 */ 2336 pprev = &rx_group->arg_macaddr; 2337 while ((addr = *pprev) != NULL) { 2338 if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) == 0) { 2339 mac_perim_exit(mph); 2340 return (EEXIST); 2341 } 2342 pprev = &addr->aua_next; 2343 } 2344 addr = kmem_alloc(sizeof (aggr_unicst_addr_t), KM_SLEEP); 2345 bcopy(mac_addr, addr->aua_addr, ETHERADDRL); 2346 addr->aua_next = NULL; 2347 *pprev = addr; 2348 2349 for (port = grp->lg_ports; port != NULL; port = port->lp_next) 2350 if ((err = aggr_port_addmac(port, mac_addr)) != 0) 2351 break; 2352 2353 if (err != 0) { 2354 for (p = grp->lg_ports; p != port; p = p->lp_next) 2355 aggr_port_remmac(p, mac_addr); 2356 2357 *pprev = NULL; 2358 kmem_free(addr, sizeof (aggr_unicst_addr_t)); 2359 } 2360 2361 mac_perim_exit(mph); 2362 return (err); 2363 } 2364 2365 static int 2366 aggr_remmac(void *arg, const uint8_t *mac_addr) 2367 { 2368 aggr_pseudo_rx_group_t *rx_group = (aggr_pseudo_rx_group_t *)arg; 2369 aggr_unicst_addr_t *addr, **pprev; 2370 aggr_grp_t *grp = rx_group->arg_grp; 2371 aggr_port_t *port; 2372 mac_perim_handle_t mph; 2373 int err = 0; 2374 2375 mac_perim_enter_by_mh(grp->lg_mh, &mph); 2376 2377 if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) { 2378 mac_perim_exit(mph); 2379 return (0); 2380 } 2381 2382 /* 2383 * Insert this mac address into the list of mac addresses owned by 2384 * the aggregation pseudo group. 2385 */ 2386 pprev = &rx_group->arg_macaddr; 2387 while ((addr = *pprev) != NULL) { 2388 if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) != 0) { 2389 pprev = &addr->aua_next; 2390 continue; 2391 } 2392 break; 2393 } 2394 if (addr == NULL) { 2395 mac_perim_exit(mph); 2396 return (EINVAL); 2397 } 2398 2399 for (port = grp->lg_ports; port != NULL; port = port->lp_next) 2400 aggr_port_remmac(port, mac_addr); 2401 2402 *pprev = addr->aua_next; 2403 kmem_free(addr, sizeof (aggr_unicst_addr_t)); 2404 2405 mac_perim_exit(mph); 2406 return (err); 2407 } 2408 2409 /* 2410 * Add or remove the multicast addresses that are defined for the group 2411 * to or from the specified port. 2412 * 2413 * Note that aggr_grp_multicst_port(..., B_TRUE) is called when the port 2414 * is started and attached, and aggr_grp_multicst_port(..., B_FALSE) is 2415 * called when the port is either stopped or detached. 2416 */ 2417 void 2418 aggr_grp_multicst_port(aggr_port_t *port, boolean_t add) 2419 { 2420 aggr_grp_t *grp = port->lp_grp; 2421 2422 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 2423 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 2424 2425 if (!port->lp_started || port->lp_state != AGGR_PORT_STATE_ATTACHED) 2426 return; 2427 2428 mac_multicast_refresh(grp->lg_mh, aggr_port_multicst, port, add); 2429 } 2430 2431 static int 2432 aggr_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) 2433 { 2434 aggr_grp_t *grp = arg; 2435 aggr_port_t *port = NULL; 2436 mac_perim_handle_t mph; 2437 int err = 0, cerr; 2438 2439 mac_perim_enter_by_mh(grp->lg_mh, &mph); 2440 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 2441 if (port->lp_state != AGGR_PORT_STATE_ATTACHED || 2442 !port->lp_started) { 2443 continue; 2444 } 2445 cerr = aggr_port_multicst(port, add, addrp); 2446 if (cerr != 0 && err == 0) 2447 err = cerr; 2448 } 2449 mac_perim_exit(mph); 2450 return (err); 2451 } 2452 2453 static int 2454 aggr_m_unicst(void *arg, const uint8_t *macaddr) 2455 { 2456 aggr_grp_t *grp = arg; 2457 mac_perim_handle_t mph; 2458 int err; 2459 2460 mac_perim_enter_by_mh(grp->lg_mh, &mph); 2461 err = aggr_grp_modify_common(grp, AGGR_MODIFY_MAC, 0, B_TRUE, macaddr, 2462 0, 0); 2463 mac_perim_exit(mph); 2464 return (err); 2465 } 2466 2467 /* 2468 * Initialize the capabilities that are advertised for the group 2469 * according to the capabilities of the constituent ports. 2470 */ 2471 static void 2472 aggr_grp_capab_set(aggr_grp_t *grp) 2473 { 2474 uint32_t cksum; 2475 aggr_port_t *port; 2476 mac_capab_lso_t cap_lso; 2477 2478 ASSERT(grp->lg_mh == NULL); 2479 ASSERT(grp->lg_ports != NULL); 2480 2481 grp->lg_hcksum_txflags = (uint32_t)-1; 2482 grp->lg_zcopy = B_TRUE; 2483 grp->lg_vlan = B_TRUE; 2484 2485 grp->lg_lso = B_TRUE; 2486 grp->lg_cap_lso.lso_flags = (t_uscalar_t)-1; 2487 grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max = (t_uscalar_t)-1; 2488 2489 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 2490 if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &cksum)) 2491 cksum = 0; 2492 grp->lg_hcksum_txflags &= cksum; 2493 2494 grp->lg_vlan &= 2495 !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL); 2496 2497 grp->lg_zcopy &= 2498 !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL); 2499 2500 grp->lg_lso &= 2501 mac_capab_get(port->lp_mh, MAC_CAPAB_LSO, &cap_lso); 2502 if (grp->lg_lso) { 2503 grp->lg_cap_lso.lso_flags &= cap_lso.lso_flags; 2504 if (grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max > 2505 cap_lso.lso_basic_tcp_ipv4.lso_max) 2506 grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max = 2507 cap_lso.lso_basic_tcp_ipv4.lso_max; 2508 } 2509 } 2510 } 2511 2512 /* 2513 * Checks whether the capabilities of the port being added are compatible 2514 * with the current capabilities of the aggregation. 2515 */ 2516 static boolean_t 2517 aggr_grp_capab_check(aggr_grp_t *grp, aggr_port_t *port) 2518 { 2519 uint32_t hcksum_txflags; 2520 2521 ASSERT(grp->lg_ports != NULL); 2522 2523 if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL)) & 2524 grp->lg_vlan) != grp->lg_vlan) { 2525 return (B_FALSE); 2526 } 2527 2528 if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL)) & 2529 grp->lg_zcopy) != grp->lg_zcopy) { 2530 return (B_FALSE); 2531 } 2532 2533 if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &hcksum_txflags)) { 2534 if (grp->lg_hcksum_txflags != 0) 2535 return (B_FALSE); 2536 } else if ((hcksum_txflags & grp->lg_hcksum_txflags) != 2537 grp->lg_hcksum_txflags) { 2538 return (B_FALSE); 2539 } 2540 2541 if (grp->lg_lso) { 2542 mac_capab_lso_t cap_lso; 2543 2544 if (mac_capab_get(port->lp_mh, MAC_CAPAB_LSO, &cap_lso)) { 2545 if ((grp->lg_cap_lso.lso_flags & cap_lso.lso_flags) != 2546 grp->lg_cap_lso.lso_flags) 2547 return (B_FALSE); 2548 if (grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max > 2549 cap_lso.lso_basic_tcp_ipv4.lso_max) 2550 return (B_FALSE); 2551 } else { 2552 return (B_FALSE); 2553 } 2554 } 2555 2556 return (B_TRUE); 2557 } 2558 2559 /* 2560 * Returns the maximum SDU according to the SDU of the constituent ports. 2561 */ 2562 static uint_t 2563 aggr_grp_max_sdu(aggr_grp_t *grp) 2564 { 2565 uint_t max_sdu = (uint_t)-1; 2566 aggr_port_t *port; 2567 2568 ASSERT(grp->lg_ports != NULL); 2569 2570 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 2571 uint_t port_sdu_max; 2572 2573 mac_sdu_get(port->lp_mh, NULL, &port_sdu_max); 2574 if (max_sdu > port_sdu_max) 2575 max_sdu = port_sdu_max; 2576 } 2577 2578 return (max_sdu); 2579 } 2580 2581 /* 2582 * Checks if the maximum SDU of the specified port is compatible 2583 * with the maximum SDU of the specified aggregation group, returns 2584 * B_TRUE if it is, B_FALSE otherwise. 2585 */ 2586 static boolean_t 2587 aggr_grp_sdu_check(aggr_grp_t *grp, aggr_port_t *port) 2588 { 2589 uint_t port_sdu_max; 2590 2591 mac_sdu_get(port->lp_mh, NULL, &port_sdu_max); 2592 return (port_sdu_max >= grp->lg_max_sdu); 2593 } 2594 2595 /* 2596 * Returns the maximum margin according to the margin of the constituent ports. 2597 */ 2598 static uint32_t 2599 aggr_grp_max_margin(aggr_grp_t *grp) 2600 { 2601 uint32_t margin = UINT32_MAX; 2602 aggr_port_t *port; 2603 2604 ASSERT(grp->lg_mh == NULL); 2605 ASSERT(grp->lg_ports != NULL); 2606 2607 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 2608 if (margin > port->lp_margin) 2609 margin = port->lp_margin; 2610 } 2611 2612 grp->lg_margin = margin; 2613 return (margin); 2614 } 2615 2616 /* 2617 * Checks if the maximum margin of the specified port is compatible 2618 * with the maximum margin of the specified aggregation group, returns 2619 * B_TRUE if it is, B_FALSE otherwise. 2620 */ 2621 static boolean_t 2622 aggr_grp_margin_check(aggr_grp_t *grp, aggr_port_t *port) 2623 { 2624 if (port->lp_margin >= grp->lg_margin) 2625 return (B_TRUE); 2626 2627 /* 2628 * See whether the current margin value is allowed to be changed to 2629 * the new value. 2630 */ 2631 if (!mac_margin_update(grp->lg_mh, port->lp_margin)) 2632 return (B_FALSE); 2633 2634 grp->lg_margin = port->lp_margin; 2635 return (B_TRUE); 2636 } 2637 2638 /* 2639 * Set MTU on individual ports of an aggregation group 2640 */ 2641 static int 2642 aggr_set_port_sdu(aggr_grp_t *grp, aggr_port_t *port, uint32_t sdu, 2643 uint32_t *old_mtu) 2644 { 2645 boolean_t removed = B_FALSE; 2646 mac_perim_handle_t mph; 2647 mac_diag_t diag; 2648 int err, rv, retry = 0; 2649 2650 if (port->lp_mah != NULL) { 2651 (void) mac_unicast_remove(port->lp_mch, port->lp_mah); 2652 port->lp_mah = NULL; 2653 removed = B_TRUE; 2654 } 2655 err = mac_set_mtu(port->lp_mh, sdu, old_mtu); 2656 try_again: 2657 if (removed && (rv = mac_unicast_add(port->lp_mch, NULL, 2658 MAC_UNICAST_PRIMARY | MAC_UNICAST_DISABLE_TX_VID_CHECK, 2659 &port->lp_mah, 0, &diag)) != 0) { 2660 /* 2661 * following is a workaround for a bug in 'bge' driver. 2662 * See CR 6794654 for more information and this work around 2663 * will be removed once the CR is fixed. 2664 */ 2665 if (rv == EIO && retry++ < 3) { 2666 delay(2 * hz); 2667 goto try_again; 2668 } 2669 /* 2670 * if mac_unicast_add() failed while setting the MTU, 2671 * detach the port from the group. 2672 */ 2673 mac_perim_enter_by_mh(port->lp_mh, &mph); 2674 (void) aggr_grp_detach_port(grp, port); 2675 mac_perim_exit(mph); 2676 cmn_err(CE_WARN, "Unable to restart the port %s while " 2677 "setting MTU. Detaching the port from the aggregation.", 2678 mac_client_name(port->lp_mch)); 2679 } 2680 return (err); 2681 } 2682 2683 static int 2684 aggr_sdu_update(aggr_grp_t *grp, uint32_t sdu) 2685 { 2686 int err = 0, i, rv; 2687 aggr_port_t *port; 2688 uint32_t *mtu; 2689 2690 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 2691 2692 /* 2693 * If the MTU being set is equal to aggr group's maximum 2694 * allowable value, then there is nothing to change 2695 */ 2696 if (sdu == grp->lg_max_sdu) 2697 return (0); 2698 2699 /* 0 is aggr group's min sdu */ 2700 if (sdu == 0) 2701 return (EINVAL); 2702 2703 mtu = kmem_alloc(sizeof (uint32_t) * grp->lg_nports, KM_SLEEP); 2704 for (port = grp->lg_ports, i = 0; port != NULL && err == 0; 2705 port = port->lp_next, i++) { 2706 err = aggr_set_port_sdu(grp, port, sdu, mtu + i); 2707 } 2708 if (err != 0) { 2709 /* recover from error: reset the mtus of the ports */ 2710 aggr_port_t *tmp; 2711 2712 for (tmp = grp->lg_ports, i = 0; tmp != port; 2713 tmp = tmp->lp_next, i++) { 2714 (void) aggr_set_port_sdu(grp, tmp, *(mtu + i), NULL); 2715 } 2716 goto bail; 2717 } 2718 grp->lg_max_sdu = aggr_grp_max_sdu(grp); 2719 rv = mac_maxsdu_update(grp->lg_mh, grp->lg_max_sdu); 2720 ASSERT(rv == 0); 2721 bail: 2722 kmem_free(mtu, sizeof (uint32_t) * grp->lg_nports); 2723 return (err); 2724 } 2725 2726 /* 2727 * Callback functions for set/get of properties 2728 */ 2729 /*ARGSUSED*/ 2730 static int 2731 aggr_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, 2732 uint_t pr_valsize, const void *pr_val) 2733 { 2734 int err = ENOTSUP; 2735 aggr_grp_t *grp = m_driver; 2736 2737 switch (pr_num) { 2738 case MAC_PROP_MTU: { 2739 uint32_t mtu; 2740 2741 if (pr_valsize < sizeof (mtu)) { 2742 err = EINVAL; 2743 break; 2744 } 2745 bcopy(pr_val, &mtu, sizeof (mtu)); 2746 err = aggr_sdu_update(grp, mtu); 2747 break; 2748 } 2749 default: 2750 break; 2751 } 2752 return (err); 2753 } 2754 2755 int 2756 aggr_grp_possible_mtu_range(aggr_grp_t *grp, uint32_t *min, uint32_t *max) 2757 { 2758 mac_propval_range_t *vals; 2759 mac_propval_uint32_range_t *ur; 2760 aggr_port_t *port; 2761 mac_perim_handle_t mph; 2762 uint_t i; 2763 int err = 0; 2764 2765 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 2766 2767 *min = 0; 2768 *max = (uint32_t)-1; 2769 2770 vals = kmem_alloc(sizeof (mac_propval_range_t) * grp->lg_nports, 2771 KM_SLEEP); 2772 2773 for (port = grp->lg_ports, i = 0; port != NULL; 2774 port = port->lp_next, i++) { 2775 mac_perim_enter_by_mh(port->lp_mh, &mph); 2776 err = mac_prop_info(port->lp_mh, MAC_PROP_MTU, NULL, 2777 NULL, 0, vals + i, NULL); 2778 mac_perim_exit(mph); 2779 if (err != 0) 2780 break; 2781 } 2782 2783 /* 2784 * if any of the underlying ports does not support changing MTU then 2785 * just return ENOTSUP 2786 */ 2787 if (port != NULL) { 2788 ASSERT(err != 0); 2789 goto done; 2790 } 2791 2792 for (i = 0; i < grp->lg_nports; i++) { 2793 ur = &((vals + i)->mpr_range_uint32[0]); 2794 /* 2795 * Take max of the min, for range_min; that is the minimum 2796 * MTU value for an aggregation is the maximum of the 2797 * minimum values of all the underlying ports 2798 */ 2799 if (ur->mpur_min > *min) 2800 *min = ur->mpur_min; 2801 /* Take min of the max, for range_max */ 2802 if (ur->mpur_max < *max) 2803 *max = ur->mpur_max; 2804 } 2805 done: 2806 kmem_free(vals, sizeof (mac_propval_range_t) * grp->lg_nports); 2807 2808 return (err); 2809 } 2810 2811 static void 2812 aggr_m_propinfo(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, 2813 mac_prop_info_handle_t prh) 2814 { 2815 aggr_grp_t *grp = m_driver; 2816 2817 _NOTE(ARGUNUSED(pr_name)); 2818 2819 switch (pr_num) { 2820 case MAC_PROP_MTU: { 2821 uint32_t min, max; 2822 2823 if (aggr_grp_possible_mtu_range(grp, &min, &max) != 0) 2824 return; 2825 mac_prop_info_set_range_uint32(prh, min, max); 2826 break; 2827 } 2828 } 2829 } 2830