1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * IEEE 802.3ad Link Aggregation - Link Aggregation MAC ports. 30 * 31 * Implements the functions needed to manage the MAC ports that are 32 * part of Link Aggregation groups. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/sysmacros.h> 37 #include <sys/conf.h> 38 #include <sys/cmn_err.h> 39 #include <sys/id_space.h> 40 #include <sys/list.h> 41 #include <sys/ksynch.h> 42 #include <sys/kmem.h> 43 #include <sys/stream.h> 44 #include <sys/modctl.h> 45 #include <sys/ddi.h> 46 #include <sys/sunddi.h> 47 #include <sys/atomic.h> 48 #include <sys/stat.h> 49 #include <sys/sdt.h> 50 #include <sys/dlpi.h> 51 #include <sys/aggr.h> 52 #include <sys/aggr_impl.h> 53 54 static kmem_cache_t *aggr_port_cache; 55 static id_space_t *aggr_portids; 56 57 static void aggr_port_notify_cb(void *, mac_notify_type_t); 58 59 /*ARGSUSED*/ 60 static int 61 aggr_port_constructor(void *buf, void *arg, int kmflag) 62 { 63 aggr_port_t *port = buf; 64 65 bzero(buf, sizeof (aggr_port_t)); 66 rw_init(&port->lp_lock, NULL, RW_DRIVER, NULL); 67 68 return (0); 69 } 70 71 /*ARGSUSED*/ 72 static void 73 aggr_port_destructor(void *buf, void *arg) 74 { 75 aggr_port_t *port = buf; 76 77 rw_destroy(&port->lp_lock); 78 } 79 80 void 81 aggr_port_init(void) 82 { 83 aggr_port_cache = kmem_cache_create("aggr_port_cache", 84 sizeof (aggr_port_t), 0, aggr_port_constructor, 85 aggr_port_destructor, NULL, NULL, NULL, 0); 86 87 /* 88 * Allocate a id space to manage port identification. The range of 89 * the arena will be from 1 to UINT16_MAX, because the LACP protocol 90 * specifies 16-bit unique identification. 91 */ 92 aggr_portids = id_space_create("aggr_portids", 1, UINT16_MAX); 93 ASSERT(aggr_portids != NULL); 94 } 95 96 void 97 aggr_port_fini(void) 98 { 99 /* 100 * This function is called only after all groups have been 101 * freed. This ensures that there are no remaining allocated 102 * ports when this function is invoked. 103 */ 104 kmem_cache_destroy(aggr_port_cache); 105 id_space_destroy(aggr_portids); 106 } 107 108 mac_resource_handle_t 109 aggr_port_resource_add(void *arg, mac_resource_t *mrp) 110 { 111 aggr_port_t *port = (aggr_port_t *)arg; 112 aggr_grp_t *grp = port->lp_grp; 113 114 return (mac_resource_add(grp->lg_mh, mrp)); 115 } 116 117 void 118 aggr_port_init_callbacks(aggr_port_t *port) 119 { 120 /* add the port's receive callback */ 121 port->lp_mnh = mac_notify_add(port->lp_mh, aggr_port_notify_cb, 122 (void *)port); 123 124 /* set port's resource_add callback */ 125 mac_resource_set(port->lp_mh, aggr_port_resource_add, (void *)port); 126 } 127 128 int 129 aggr_port_create(const datalink_id_t linkid, boolean_t force, aggr_port_t **pp) 130 { 131 int err; 132 mac_handle_t mh; 133 aggr_port_t *port; 134 uint16_t portid; 135 uint_t i; 136 boolean_t no_link_update = B_FALSE; 137 const mac_info_t *mip; 138 uint32_t note; 139 uint32_t margin; 140 141 *pp = NULL; 142 143 if ((err = mac_open_by_linkid(linkid, &mh)) != 0) 144 return (err); 145 146 mip = mac_info(mh); 147 if (mip->mi_media != DL_ETHER || mip->mi_nativemedia != DL_ETHER) { 148 err = EINVAL; 149 goto fail; 150 } 151 152 /* 153 * If the underlying MAC does not support link update notification, it 154 * can only be aggregated if `force' is set. This is because aggr 155 * depends on link notifications to attach ports whose link is up. 156 */ 157 note = mac_no_notification(mh); 158 if ((note & (DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN)) != 0) { 159 no_link_update = B_TRUE; 160 if (!force) { 161 /* 162 * We borrow this error code to indicate that link 163 * notification is not supported. 164 */ 165 err = ENETDOWN; 166 goto fail; 167 } 168 } 169 170 if ((portid = (uint16_t)id_alloc(aggr_portids)) == 0) { 171 err = ENOMEM; 172 goto fail; 173 } 174 175 /* 176 * As the underlying mac's current margin size is used to determine 177 * the margin size of the aggregation itself, request the underlying 178 * mac not to change to a smaller size. 179 */ 180 if ((err = mac_margin_add(mh, &margin, B_TRUE)) != 0) { 181 id_free(aggr_portids, portid); 182 goto fail; 183 } 184 185 if (!mac_active_set(mh)) { 186 VERIFY(mac_margin_remove(mh, margin) == 0); 187 id_free(aggr_portids, portid); 188 err = EBUSY; 189 goto fail; 190 } 191 192 port = kmem_cache_alloc(aggr_port_cache, KM_SLEEP); 193 194 port->lp_refs = 1; 195 port->lp_next = NULL; 196 port->lp_mh = mh; 197 port->lp_mip = mip; 198 port->lp_linkid = linkid; 199 port->lp_closing = 0; 200 201 /* get the port's original MAC address */ 202 mac_unicst_get(port->lp_mh, port->lp_addr); 203 204 /* set port's transmit information */ 205 port->lp_txinfo = mac_tx_get(port->lp_mh); 206 207 /* initialize state */ 208 port->lp_state = AGGR_PORT_STATE_STANDBY; 209 port->lp_link_state = LINK_STATE_UNKNOWN; 210 port->lp_ifspeed = 0; 211 port->lp_link_duplex = LINK_DUPLEX_UNKNOWN; 212 port->lp_started = B_FALSE; 213 port->lp_tx_enabled = B_FALSE; 214 port->lp_promisc_on = B_FALSE; 215 port->lp_no_link_update = no_link_update; 216 port->lp_portid = portid; 217 port->lp_margin = margin; 218 219 /* 220 * Save the current statistics of the port. They will be used 221 * later by aggr_m_stats() when aggregating the statistics of 222 * the constituent ports. 223 */ 224 for (i = 0; i < MAC_NSTAT; i++) { 225 port->lp_stat[i] = 226 aggr_port_stat(port, i + MAC_STAT_MIN); 227 } 228 for (i = 0; i < ETHER_NSTAT; i++) { 229 port->lp_ether_stat[i] = 230 aggr_port_stat(port, i + MACTYPE_STAT_MIN); 231 } 232 233 /* LACP related state */ 234 port->lp_collector_enabled = B_FALSE; 235 236 *pp = port; 237 return (0); 238 239 fail: 240 mac_close(mh); 241 return (err); 242 } 243 244 void 245 aggr_port_delete(aggr_port_t *port) 246 { 247 VERIFY(mac_margin_remove(port->lp_mh, port->lp_margin) == 0); 248 mac_rx_remove_wait(port->lp_mh); 249 mac_resource_set(port->lp_mh, NULL, NULL); 250 mac_notify_remove(port->lp_mh, port->lp_mnh); 251 mac_active_clear(port->lp_mh); 252 253 /* 254 * Restore the port MAC address. Note it is called after the 255 * port's notification callback being removed. This prevent 256 * port's MAC_NOTE_UNICST notify callback function being called. 257 */ 258 (void) mac_unicst_set(port->lp_mh, port->lp_addr); 259 260 mac_close(port->lp_mh); 261 AGGR_PORT_REFRELE(port); 262 } 263 264 void 265 aggr_port_free(aggr_port_t *port) 266 { 267 ASSERT(port->lp_refs == 0); 268 if (port->lp_grp != NULL) 269 AGGR_GRP_REFRELE(port->lp_grp); 270 port->lp_grp = NULL; 271 id_free(aggr_portids, port->lp_portid); 272 port->lp_portid = 0; 273 kmem_cache_free(aggr_port_cache, port); 274 } 275 276 /* 277 * Invoked upon receiving a MAC_NOTE_LINK notification for 278 * one of the constituent ports. 279 */ 280 boolean_t 281 aggr_port_notify_link(aggr_grp_t *grp, aggr_port_t *port, boolean_t dolock) 282 { 283 boolean_t do_attach = B_FALSE; 284 boolean_t do_detach = B_FALSE; 285 boolean_t link_state_changed = B_TRUE; 286 uint64_t ifspeed; 287 link_state_t link_state; 288 link_duplex_t link_duplex; 289 290 if (dolock) { 291 AGGR_LACP_LOCK(grp); 292 rw_enter(&grp->lg_lock, RW_WRITER); 293 } else { 294 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 295 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 296 } 297 298 rw_enter(&port->lp_lock, RW_WRITER); 299 300 /* 301 * link state change? For links that do not support link state 302 * notification, always assume the link is up. 303 */ 304 link_state = port->lp_no_link_update ? LINK_STATE_UP : 305 mac_link_get(port->lp_mh); 306 if (port->lp_link_state != link_state) { 307 if (link_state == LINK_STATE_UP) 308 do_attach = (port->lp_link_state != LINK_STATE_UP); 309 else 310 do_detach = (port->lp_link_state == LINK_STATE_UP); 311 } 312 port->lp_link_state = link_state; 313 314 /* link duplex change? */ 315 link_duplex = aggr_port_stat(port, ETHER_STAT_LINK_DUPLEX); 316 if (port->lp_link_duplex != link_duplex) { 317 if (link_duplex == LINK_DUPLEX_FULL) 318 do_attach |= (port->lp_link_duplex != LINK_DUPLEX_FULL); 319 else 320 do_detach |= (port->lp_link_duplex == LINK_DUPLEX_FULL); 321 } 322 port->lp_link_duplex = link_duplex; 323 324 /* link speed changes? */ 325 ifspeed = aggr_port_stat(port, MAC_STAT_IFSPEED); 326 if (port->lp_ifspeed != ifspeed) { 327 if (port->lp_state == AGGR_PORT_STATE_ATTACHED) 328 do_detach |= (ifspeed != grp->lg_ifspeed); 329 else 330 do_attach |= (ifspeed == grp->lg_ifspeed); 331 } 332 port->lp_ifspeed = ifspeed; 333 334 if (do_attach) { 335 /* attempt to attach the port to the aggregation */ 336 link_state_changed = aggr_grp_attach_port(grp, port); 337 } else if (do_detach) { 338 /* detach the port from the aggregation */ 339 link_state_changed = aggr_grp_detach_port(grp, port); 340 } 341 342 rw_exit(&port->lp_lock); 343 344 if (dolock) { 345 rw_exit(&grp->lg_lock); 346 AGGR_LACP_UNLOCK(grp); 347 } 348 return (link_state_changed); 349 } 350 351 /* 352 * Invoked upon receiving a MAC_NOTE_UNICST for one of the constituent 353 * ports of a group. 354 */ 355 static void 356 aggr_port_notify_unicst(aggr_grp_t *grp, aggr_port_t *port, 357 boolean_t *mac_addr_changedp, boolean_t *link_state_changedp) 358 { 359 boolean_t mac_addr_changed = B_FALSE; 360 boolean_t link_state_changed = B_FALSE; 361 uint8_t mac_addr[ETHERADDRL]; 362 363 ASSERT(mac_addr_changedp != NULL); 364 ASSERT(link_state_changedp != NULL); 365 AGGR_LACP_LOCK(grp); 366 rw_enter(&grp->lg_lock, RW_WRITER); 367 368 rw_enter(&port->lp_lock, RW_WRITER); 369 370 /* 371 * If it is called when setting the MAC address to the 372 * aggregation group MAC address, do nothing. 373 */ 374 mac_unicst_get(port->lp_mh, mac_addr); 375 if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) { 376 rw_exit(&port->lp_lock); 377 goto done; 378 } 379 380 /* save the new port MAC address */ 381 bcopy(mac_addr, port->lp_addr, ETHERADDRL); 382 383 aggr_grp_port_mac_changed(grp, port, &mac_addr_changed, 384 &link_state_changed); 385 386 rw_exit(&port->lp_lock); 387 388 if (grp->lg_closing) 389 goto done; 390 391 /* 392 * If this port was used to determine the MAC address of 393 * the group, update the MAC address of the constituent 394 * ports. 395 */ 396 if (mac_addr_changed && aggr_grp_update_ports_mac(grp)) 397 link_state_changed = B_TRUE; 398 399 done: 400 *mac_addr_changedp = mac_addr_changed; 401 *link_state_changedp = link_state_changed; 402 rw_exit(&grp->lg_lock); 403 AGGR_LACP_UNLOCK(grp); 404 } 405 406 /* 407 * Notification callback invoked by the MAC service module for 408 * a particular MAC port. 409 */ 410 static void 411 aggr_port_notify_cb(void *arg, mac_notify_type_t type) 412 { 413 aggr_port_t *port = arg; 414 aggr_grp_t *grp = port->lp_grp; 415 boolean_t mac_addr_changed, link_state_changed; 416 417 /* 418 * Do nothing if the aggregation or the port is in the deletion 419 * process. Note that this is necessary to avoid deadlock. 420 */ 421 if ((grp->lg_closing) || (port->lp_closing)) 422 return; 423 424 AGGR_PORT_REFHOLD(port); 425 426 switch (type) { 427 case MAC_NOTE_TX: 428 mac_tx_update(grp->lg_mh); 429 break; 430 case MAC_NOTE_LINK: 431 if (aggr_port_notify_link(grp, port, B_TRUE)) 432 mac_link_update(grp->lg_mh, grp->lg_link_state); 433 break; 434 case MAC_NOTE_UNICST: 435 aggr_port_notify_unicst(grp, port, &mac_addr_changed, 436 &link_state_changed); 437 if (mac_addr_changed) 438 mac_unicst_update(grp->lg_mh, grp->lg_addr); 439 if (link_state_changed) 440 mac_link_update(grp->lg_mh, grp->lg_link_state); 441 break; 442 case MAC_NOTE_PROMISC: 443 port->lp_txinfo = mac_tx_get(port->lp_mh); 444 break; 445 default: 446 break; 447 } 448 449 AGGR_PORT_REFRELE(port); 450 } 451 452 int 453 aggr_port_start(aggr_port_t *port) 454 { 455 int rc; 456 457 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 458 459 if (port->lp_started) 460 return (0); 461 462 if ((rc = mac_start(port->lp_mh)) != 0) 463 return (rc); 464 465 /* update the port state */ 466 port->lp_started = B_TRUE; 467 468 return (rc); 469 } 470 471 void 472 aggr_port_stop(aggr_port_t *port) 473 { 474 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 475 476 if (!port->lp_started) 477 return; 478 479 aggr_grp_multicst_port(port, B_FALSE); 480 481 mac_stop(port->lp_mh); 482 483 /* update the port state */ 484 port->lp_started = B_FALSE; 485 } 486 487 int 488 aggr_port_promisc(aggr_port_t *port, boolean_t on) 489 { 490 int rc; 491 492 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 493 494 if (on == port->lp_promisc_on) 495 /* already in desired promiscous mode */ 496 return (0); 497 498 rc = mac_promisc_set(port->lp_mh, on, MAC_DEVPROMISC); 499 500 if (rc == 0) 501 port->lp_promisc_on = on; 502 503 return (rc); 504 } 505 506 /* 507 * Set the MAC address of a port. 508 */ 509 int 510 aggr_port_unicst(aggr_port_t *port, uint8_t *macaddr) 511 { 512 int rc; 513 514 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 515 516 rc = mac_unicst_set(port->lp_mh, macaddr); 517 518 return (rc); 519 } 520 521 /* 522 * Add or remove a multicast address to/from a port. 523 */ 524 int 525 aggr_port_multicst(void *arg, boolean_t add, const uint8_t *addrp) 526 { 527 aggr_port_t *port = arg; 528 529 return (add ? mac_multicst_add(port->lp_mh, addrp) : 530 mac_multicst_remove(port->lp_mh, addrp)); 531 } 532 533 uint64_t 534 aggr_port_stat(aggr_port_t *port, uint_t stat) 535 { 536 return (mac_stat_get(port->lp_mh, stat)); 537 } 538