1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * IEEE 802.3ad Link Aggregation - Link Aggregation MAC ports. 28 * 29 * Implements the functions needed to manage the MAC ports that are 30 * part of Link Aggregation groups. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/sysmacros.h> 35 #include <sys/conf.h> 36 #include <sys/cmn_err.h> 37 #include <sys/id_space.h> 38 #include <sys/list.h> 39 #include <sys/ksynch.h> 40 #include <sys/kmem.h> 41 #include <sys/stream.h> 42 #include <sys/modctl.h> 43 #include <sys/ddi.h> 44 #include <sys/sunddi.h> 45 #include <sys/atomic.h> 46 #include <sys/stat.h> 47 #include <sys/sdt.h> 48 #include <sys/dlpi.h> 49 #include <sys/dls.h> 50 #include <sys/aggr.h> 51 #include <sys/aggr_impl.h> 52 53 static kmem_cache_t *aggr_port_cache; 54 static id_space_t *aggr_portids; 55 56 static void aggr_port_notify_cb(void *, mac_notify_type_t); 57 58 /*ARGSUSED*/ 59 static int 60 aggr_port_constructor(void *buf, void *arg, int kmflag) 61 { 62 bzero(buf, sizeof (aggr_port_t)); 63 return (0); 64 } 65 66 /*ARGSUSED*/ 67 static void 68 aggr_port_destructor(void *buf, void *arg) 69 { 70 aggr_port_t *port = buf; 71 72 ASSERT(port->lp_mnh == NULL); 73 ASSERT(port->lp_mphp == NULL); 74 ASSERT(!port->lp_grp_added); 75 ASSERT(port->lp_hwgh == NULL); 76 } 77 78 void 79 aggr_port_init(void) 80 { 81 aggr_port_cache = kmem_cache_create("aggr_port_cache", 82 sizeof (aggr_port_t), 0, aggr_port_constructor, 83 aggr_port_destructor, NULL, NULL, NULL, 0); 84 85 /* 86 * Allocate a id space to manage port identification. The range of 87 * the arena will be from 1 to UINT16_MAX, because the LACP protocol 88 * specifies 16-bit unique identification. 89 */ 90 aggr_portids = id_space_create("aggr_portids", 1, UINT16_MAX); 91 ASSERT(aggr_portids != NULL); 92 } 93 94 void 95 aggr_port_fini(void) 96 { 97 /* 98 * This function is called only after all groups have been 99 * freed. This ensures that there are no remaining allocated 100 * ports when this function is invoked. 101 */ 102 kmem_cache_destroy(aggr_port_cache); 103 id_space_destroy(aggr_portids); 104 } 105 106 /* ARGSUSED */ 107 void 108 aggr_port_init_callbacks(aggr_port_t *port) 109 { 110 /* add the port's receive callback */ 111 port->lp_mnh = mac_notify_add(port->lp_mh, aggr_port_notify_cb, port); 112 /* 113 * Hold a reference of the grp and the port and this reference will 114 * be release when the thread exits. 115 * 116 * The reference on the port is used for aggr_port_delete() to 117 * continue without waiting for the thread to exit; the reference 118 * on the grp is used for aggr_grp_delete() to wait for the thread 119 * to exit before calling mac_unregister(). 120 * 121 * Note that these references will be released either in 122 * aggr_port_delete() when mac_notify_remove() succeeds, or in 123 * the aggr_port_notify_cb() callback when the port is deleted 124 * (lp_closing is set). 125 */ 126 aggr_grp_port_hold(port); 127 } 128 129 /* ARGSUSED */ 130 int 131 aggr_port_create(aggr_grp_t *grp, const datalink_id_t linkid, boolean_t force, 132 aggr_port_t **pp) 133 { 134 int err; 135 mac_handle_t mh; 136 mac_client_handle_t mch = NULL; 137 aggr_port_t *port; 138 uint16_t portid; 139 uint_t i; 140 boolean_t no_link_update = B_FALSE; 141 const mac_info_t *mip; 142 uint32_t note; 143 uint32_t margin; 144 char client_name[MAXNAMELEN]; 145 char aggr_name[MAXNAMELEN]; 146 char port_name[MAXNAMELEN]; 147 mac_diag_t diag; 148 mac_unicast_handle_t mah; 149 150 *pp = NULL; 151 152 if ((err = mac_open_by_linkid(linkid, &mh)) != 0) 153 return (err); 154 155 mip = mac_info(mh); 156 if (mip->mi_media != DL_ETHER || mip->mi_nativemedia != DL_ETHER) { 157 err = EINVAL; 158 goto fail; 159 } 160 161 /* 162 * If the underlying MAC does not support link update notification, it 163 * can only be aggregated if `force' is set. This is because aggr 164 * depends on link notifications to attach ports whose link is up. 165 */ 166 note = mac_no_notification(mh); 167 if ((note & (DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN)) != 0) { 168 no_link_update = B_TRUE; 169 if (!force) { 170 /* 171 * We borrow this error code to indicate that link 172 * notification is not supported. 173 */ 174 err = ENETDOWN; 175 goto fail; 176 } 177 } 178 179 if (((err = dls_mgmt_get_linkinfo(grp->lg_linkid, 180 aggr_name, NULL, NULL, NULL)) != 0) || 181 ((err = dls_mgmt_get_linkinfo(linkid, port_name, 182 NULL, NULL, NULL)) != 0)) { 183 goto fail; 184 } 185 186 (void) snprintf(client_name, MAXNAMELEN, "%s-%s", aggr_name, port_name); 187 if ((err = mac_client_open(mh, &mch, client_name, 188 MAC_OPEN_FLAGS_IS_AGGR_PORT | MAC_OPEN_FLAGS_EXCLUSIVE | 189 MAC_OPEN_FLAGS_DISABLE_TX_VID_CHECK)) != 0) { 190 goto fail; 191 } 192 193 if ((portid = (uint16_t)id_alloc(aggr_portids)) == 0) { 194 err = ENOMEM; 195 goto fail; 196 } 197 198 /* 199 * As the underlying mac's current margin size is used to determine 200 * the margin size of the aggregation itself, request the underlying 201 * mac not to change to a smaller size. 202 */ 203 if ((err = mac_margin_add(mh, &margin, B_TRUE)) != 0) { 204 id_free(aggr_portids, portid); 205 goto fail; 206 } 207 208 if ((err = mac_unicast_primary_add(mch, &mah, &diag)) != 0) { 209 VERIFY(mac_margin_remove(mh, margin) == 0); 210 id_free(aggr_portids, portid); 211 goto fail; 212 } 213 214 port = kmem_cache_alloc(aggr_port_cache, KM_SLEEP); 215 216 port->lp_refs = 1; 217 port->lp_next = NULL; 218 port->lp_mh = mh; 219 port->lp_mch = mch; 220 port->lp_mip = mip; 221 port->lp_linkid = linkid; 222 port->lp_closing = B_FALSE; 223 port->lp_mah = mah; 224 225 /* get the port's original MAC address */ 226 mac_unicast_primary_get(port->lp_mh, port->lp_addr); 227 228 /* initialize state */ 229 port->lp_state = AGGR_PORT_STATE_STANDBY; 230 port->lp_link_state = LINK_STATE_UNKNOWN; 231 port->lp_ifspeed = 0; 232 port->lp_link_duplex = LINK_DUPLEX_UNKNOWN; 233 port->lp_started = B_FALSE; 234 port->lp_tx_enabled = B_FALSE; 235 port->lp_promisc_on = B_FALSE; 236 port->lp_no_link_update = no_link_update; 237 port->lp_portid = portid; 238 port->lp_margin = margin; 239 port->lp_prom_addr = NULL; 240 241 /* 242 * Save the current statistics of the port. They will be used 243 * later by aggr_m_stats() when aggregating the statistics of 244 * the constituent ports. 245 */ 246 for (i = 0; i < MAC_NSTAT; i++) { 247 port->lp_stat[i] = 248 aggr_port_stat(port, i + MAC_STAT_MIN); 249 } 250 for (i = 0; i < ETHER_NSTAT; i++) { 251 port->lp_ether_stat[i] = 252 aggr_port_stat(port, i + MACTYPE_STAT_MIN); 253 } 254 255 /* LACP related state */ 256 port->lp_collector_enabled = B_FALSE; 257 258 *pp = port; 259 return (0); 260 261 fail: 262 if (mch != NULL) 263 mac_client_close(mch, MAC_CLOSE_FLAGS_EXCLUSIVE); 264 mac_close(mh); 265 return (err); 266 } 267 268 void 269 aggr_port_delete(aggr_port_t *port) 270 { 271 aggr_lacp_port_t *pl = &port->lp_lacp; 272 273 ASSERT(port->lp_mphp == NULL); 274 ASSERT(!port->lp_promisc_on); 275 276 port->lp_closing = B_TRUE; 277 278 VERIFY(mac_margin_remove(port->lp_mh, port->lp_margin) == 0); 279 mac_rx_clear(port->lp_mch); 280 /* 281 * If the notification callback is already in process and waiting for 282 * the aggr grp's mac perimeter, don't wait (otherwise there would be 283 * deadlock). Otherwise, if mac_notify_remove() succeeds, we can 284 * release the reference held when mac_notify_add() is called. 285 */ 286 if ((port->lp_mnh != NULL) && 287 (mac_notify_remove(port->lp_mnh, B_FALSE) == 0)) { 288 aggr_grp_port_rele(port); 289 } 290 port->lp_mnh = NULL; 291 292 /* 293 * Inform the the port lacp timer thread to exit. Note that waiting 294 * for the thread to exit may cause deadlock since that thread may 295 * need to enter into the mac perimeter which we are currently in. 296 * It is fine to continue without waiting though since that thread 297 * is holding a reference of the port. 298 */ 299 mutex_enter(&pl->lacp_timer_lock); 300 pl->lacp_timer_bits |= LACP_THREAD_EXIT; 301 cv_broadcast(&pl->lacp_timer_cv); 302 mutex_exit(&pl->lacp_timer_lock); 303 304 /* 305 * Restore the port MAC address. Note it is called after the 306 * port's notification callback being removed. This prevent 307 * port's MAC_NOTE_UNICST notify callback function being called. 308 */ 309 (void) mac_unicast_primary_set(port->lp_mh, port->lp_addr); 310 if (port->lp_mah != NULL) 311 (void) mac_unicast_remove(port->lp_mch, port->lp_mah); 312 mac_client_close(port->lp_mch, MAC_CLOSE_FLAGS_EXCLUSIVE); 313 mac_close(port->lp_mh); 314 AGGR_PORT_REFRELE(port); 315 } 316 317 void 318 aggr_port_free(aggr_port_t *port) 319 { 320 ASSERT(port->lp_refs == 0); 321 if (port->lp_grp != NULL) 322 AGGR_GRP_REFRELE(port->lp_grp); 323 port->lp_grp = NULL; 324 id_free(aggr_portids, port->lp_portid); 325 port->lp_portid = 0; 326 mutex_destroy(&port->lp_lacp.lacp_timer_lock); 327 cv_destroy(&port->lp_lacp.lacp_timer_cv); 328 kmem_cache_free(aggr_port_cache, port); 329 } 330 331 /* 332 * Invoked upon receiving a MAC_NOTE_LINK notification for 333 * one of the constituent ports. 334 */ 335 boolean_t 336 aggr_port_notify_link(aggr_grp_t *grp, aggr_port_t *port) 337 { 338 boolean_t do_attach = B_FALSE; 339 boolean_t do_detach = B_FALSE; 340 boolean_t link_state_changed = B_TRUE; 341 uint64_t ifspeed; 342 link_state_t link_state; 343 link_duplex_t link_duplex; 344 mac_perim_handle_t mph; 345 346 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 347 mac_perim_enter_by_mh(port->lp_mh, &mph); 348 349 /* 350 * link state change? For links that do not support link state 351 * notification, always assume the link is up. 352 */ 353 link_state = port->lp_no_link_update ? LINK_STATE_UP : 354 mac_link_get(port->lp_mh); 355 if (port->lp_link_state != link_state) { 356 if (link_state == LINK_STATE_UP) 357 do_attach = (port->lp_link_state != LINK_STATE_UP); 358 else 359 do_detach = (port->lp_link_state == LINK_STATE_UP); 360 } 361 port->lp_link_state = link_state; 362 363 /* link duplex change? */ 364 link_duplex = aggr_port_stat(port, ETHER_STAT_LINK_DUPLEX); 365 if (port->lp_link_duplex != link_duplex) { 366 if (link_duplex == LINK_DUPLEX_FULL) 367 do_attach |= (port->lp_link_duplex != LINK_DUPLEX_FULL); 368 else 369 do_detach |= (port->lp_link_duplex == LINK_DUPLEX_FULL); 370 } 371 port->lp_link_duplex = link_duplex; 372 373 /* link speed changes? */ 374 ifspeed = aggr_port_stat(port, MAC_STAT_IFSPEED); 375 if (port->lp_ifspeed != ifspeed) { 376 if (port->lp_state == AGGR_PORT_STATE_ATTACHED) 377 do_detach |= (ifspeed != grp->lg_ifspeed); 378 else 379 do_attach |= (ifspeed == grp->lg_ifspeed); 380 } 381 port->lp_ifspeed = ifspeed; 382 383 if (do_attach) { 384 /* attempt to attach the port to the aggregation */ 385 link_state_changed = aggr_grp_attach_port(grp, port); 386 } else if (do_detach) { 387 /* detach the port from the aggregation */ 388 link_state_changed = aggr_grp_detach_port(grp, port); 389 } 390 391 mac_perim_exit(mph); 392 return (link_state_changed); 393 } 394 395 /* 396 * Invoked upon receiving a MAC_NOTE_UNICST for one of the constituent 397 * ports of a group. 398 */ 399 static void 400 aggr_port_notify_unicst(aggr_grp_t *grp, aggr_port_t *port, 401 boolean_t *mac_addr_changedp, boolean_t *link_state_changedp) 402 { 403 boolean_t mac_addr_changed = B_FALSE; 404 boolean_t link_state_changed = B_FALSE; 405 uint8_t mac_addr[ETHERADDRL]; 406 mac_perim_handle_t mph; 407 408 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 409 ASSERT(mac_addr_changedp != NULL); 410 ASSERT(link_state_changedp != NULL); 411 mac_perim_enter_by_mh(port->lp_mh, &mph); 412 413 /* 414 * If it is called when setting the MAC address to the 415 * aggregation group MAC address, do nothing. 416 */ 417 mac_unicast_primary_get(port->lp_mh, mac_addr); 418 if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) { 419 mac_perim_exit(mph); 420 goto done; 421 } 422 423 /* save the new port MAC address */ 424 bcopy(mac_addr, port->lp_addr, ETHERADDRL); 425 426 aggr_grp_port_mac_changed(grp, port, &mac_addr_changed, 427 &link_state_changed); 428 429 mac_perim_exit(mph); 430 431 /* 432 * If this port was used to determine the MAC address of 433 * the group, update the MAC address of the constituent 434 * ports. 435 */ 436 if (mac_addr_changed && aggr_grp_update_ports_mac(grp)) 437 link_state_changed = B_TRUE; 438 439 done: 440 *mac_addr_changedp = mac_addr_changed; 441 *link_state_changedp = link_state_changed; 442 } 443 444 /* 445 * Notification callback invoked by the MAC service module for 446 * a particular MAC port. 447 */ 448 static void 449 aggr_port_notify_cb(void *arg, mac_notify_type_t type) 450 { 451 aggr_port_t *port = arg; 452 aggr_grp_t *grp = port->lp_grp; 453 boolean_t mac_addr_changed, link_state_changed; 454 mac_perim_handle_t mph; 455 456 mac_perim_enter_by_mh(grp->lg_mh, &mph); 457 if (port->lp_closing) { 458 mac_perim_exit(mph); 459 460 /* 461 * Release the reference so it is safe for aggr to call 462 * mac_unregister() now. 463 */ 464 aggr_grp_port_rele(port); 465 return; 466 } 467 468 switch (type) { 469 case MAC_NOTE_TX: 470 mac_tx_update(grp->lg_mh); 471 break; 472 case MAC_NOTE_LINK: 473 if (aggr_port_notify_link(grp, port)) 474 mac_link_update(grp->lg_mh, grp->lg_link_state); 475 break; 476 case MAC_NOTE_UNICST: 477 aggr_port_notify_unicst(grp, port, &mac_addr_changed, 478 &link_state_changed); 479 if (mac_addr_changed) 480 mac_unicst_update(grp->lg_mh, grp->lg_addr); 481 if (link_state_changed) 482 mac_link_update(grp->lg_mh, grp->lg_link_state); 483 break; 484 default: 485 break; 486 } 487 488 mac_perim_exit(mph); 489 } 490 491 int 492 aggr_port_start(aggr_port_t *port) 493 { 494 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 495 496 if (port->lp_started) 497 return (0); 498 499 port->lp_started = B_TRUE; 500 aggr_grp_multicst_port(port, B_TRUE); 501 return (0); 502 } 503 504 void 505 aggr_port_stop(aggr_port_t *port) 506 { 507 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 508 509 if (!port->lp_started) 510 return; 511 512 aggr_grp_multicst_port(port, B_FALSE); 513 514 /* update the port state */ 515 port->lp_started = B_FALSE; 516 } 517 518 int 519 aggr_port_promisc(aggr_port_t *port, boolean_t on) 520 { 521 int rc; 522 523 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 524 525 if (on == port->lp_promisc_on) 526 /* already in desired promiscous mode */ 527 return (0); 528 529 if (on) { 530 mac_rx_clear(port->lp_mch); 531 rc = mac_promisc_add(port->lp_mch, MAC_CLIENT_PROMISC_ALL, 532 aggr_recv_cb, port, &port->lp_mphp, 533 MAC_PROMISC_FLAGS_NO_TX_LOOP); 534 if (rc != 0) { 535 mac_rx_set(port->lp_mch, aggr_recv_cb, port); 536 return (rc); 537 } 538 } else { 539 rc = mac_promisc_remove(port->lp_mphp); 540 if (rc != 0) 541 return (rc); 542 port->lp_mphp = NULL; 543 mac_rx_set(port->lp_mch, aggr_recv_cb, port); 544 } 545 546 port->lp_promisc_on = on; 547 548 return (0); 549 } 550 551 /* 552 * Set the MAC address of a port. 553 */ 554 int 555 aggr_port_unicst(aggr_port_t *port) 556 { 557 aggr_grp_t *grp = port->lp_grp; 558 559 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 560 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 561 562 return (mac_unicast_primary_set(port->lp_mh, grp->lg_addr)); 563 } 564 565 /* 566 * Add or remove a multicast address to/from a port. 567 */ 568 int 569 aggr_port_multicst(void *arg, boolean_t add, const uint8_t *addrp) 570 { 571 aggr_port_t *port = arg; 572 573 if (add) { 574 return (mac_multicast_add(port->lp_mch, addrp)); 575 } else { 576 mac_multicast_remove(port->lp_mch, addrp); 577 return (0); 578 } 579 } 580 581 uint64_t 582 aggr_port_stat(aggr_port_t *port, uint_t stat) 583 { 584 return (mac_stat_get(port->lp_mh, stat)); 585 } 586 587 /* 588 * Add a non-primary unicast address to the underlying port. If the port 589 * supports HW Rx group, try to add the address into the HW Rx group of 590 * the port first. If that fails, or if the port does not support HW Rx 591 * group, enable the port's promiscous mode. 592 */ 593 int 594 aggr_port_addmac(aggr_port_t *port, const uint8_t *mac_addr) 595 { 596 aggr_unicst_addr_t *addr, **pprev; 597 mac_perim_handle_t pmph; 598 int err; 599 600 ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh)); 601 mac_perim_enter_by_mh(port->lp_mh, &pmph); 602 603 /* 604 * If the underlying port support HW Rx group, add the mac to its 605 * RX group directly. 606 */ 607 if ((port->lp_hwgh != NULL) && 608 ((mac_hwgroup_addmac(port->lp_hwgh, mac_addr)) == 0)) { 609 mac_perim_exit(pmph); 610 return (0); 611 } 612 613 /* 614 * If that fails, or if the port does not support HW Rx group, enable 615 * the port's promiscous mode. (Note that we turn on the promiscous 616 * mode only if the port is already started. 617 */ 618 if (port->lp_started && 619 ((err = aggr_port_promisc(port, B_TRUE)) != 0)) { 620 mac_perim_exit(pmph); 621 return (err); 622 } 623 624 /* 625 * Walk through the unicast addresses that requires promiscous mode 626 * enabled on this port, and add this address to the end of the list. 627 */ 628 pprev = &port->lp_prom_addr; 629 while ((addr = *pprev) != NULL) { 630 ASSERT(bcmp(mac_addr, addr->aua_addr, ETHERADDRL) != 0); 631 pprev = &addr->aua_next; 632 } 633 addr = kmem_alloc(sizeof (aggr_unicst_addr_t), KM_SLEEP); 634 bcopy(mac_addr, addr->aua_addr, ETHERADDRL); 635 addr->aua_next = NULL; 636 *pprev = addr; 637 mac_perim_exit(pmph); 638 return (0); 639 } 640 641 /* 642 * Remove a non-primary unicast address from the underlying port. This address 643 * must has been added by aggr_port_addmac(). As a result, we probably need to 644 * remove the address from the port's HW Rx group, or to disable the port's 645 * promiscous mode. 646 */ 647 void 648 aggr_port_remmac(aggr_port_t *port, const uint8_t *mac_addr) 649 { 650 aggr_grp_t *grp = port->lp_grp; 651 aggr_unicst_addr_t *addr, **pprev; 652 mac_perim_handle_t pmph; 653 654 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 655 mac_perim_enter_by_mh(port->lp_mh, &pmph); 656 657 /* 658 * See whether this address is in the list of addresses that requires 659 * the port being promiscous mode. 660 */ 661 pprev = &port->lp_prom_addr; 662 while ((addr = *pprev) != NULL) { 663 if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) == 0) 664 break; 665 pprev = &addr->aua_next; 666 } 667 if (addr != NULL) { 668 /* 669 * This unicast address put the port into the promiscous mode, 670 * delete this address from the lp_prom_addr list. If this is 671 * the last address in that list, disable the promiscous mode 672 * if the aggregation is not in promiscous mode. 673 */ 674 *pprev = addr->aua_next; 675 kmem_free(addr, sizeof (aggr_unicst_addr_t)); 676 if (port->lp_prom_addr == NULL && !grp->lg_promisc) 677 (void) aggr_port_promisc(port, B_FALSE); 678 } else { 679 ASSERT(port->lp_hwgh != NULL); 680 (void) mac_hwgroup_remmac(port->lp_hwgh, mac_addr); 681 } 682 mac_perim_exit(pmph); 683 } 684