1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * IEEE 802.3ad Link Aggregation - Link Aggregation MAC ports. 28 * 29 * Implements the functions needed to manage the MAC ports that are 30 * part of Link Aggregation groups. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/sysmacros.h> 35 #include <sys/conf.h> 36 #include <sys/cmn_err.h> 37 #include <sys/id_space.h> 38 #include <sys/list.h> 39 #include <sys/ksynch.h> 40 #include <sys/kmem.h> 41 #include <sys/stream.h> 42 #include <sys/modctl.h> 43 #include <sys/ddi.h> 44 #include <sys/sunddi.h> 45 #include <sys/atomic.h> 46 #include <sys/stat.h> 47 #include <sys/sdt.h> 48 #include <sys/dlpi.h> 49 #include <sys/aggr.h> 50 #include <sys/aggr_impl.h> 51 52 static kmem_cache_t *aggr_port_cache; 53 static id_space_t *aggr_portids; 54 55 static void aggr_port_notify_cb(void *, mac_notify_type_t); 56 57 /*ARGSUSED*/ 58 static int 59 aggr_port_constructor(void *buf, void *arg, int kmflag) 60 { 61 aggr_port_t *port = buf; 62 63 bzero(buf, sizeof (aggr_port_t)); 64 rw_init(&port->lp_lock, NULL, RW_DRIVER, NULL); 65 66 return (0); 67 } 68 69 /*ARGSUSED*/ 70 static void 71 aggr_port_destructor(void *buf, void *arg) 72 { 73 aggr_port_t *port = buf; 74 75 rw_destroy(&port->lp_lock); 76 } 77 78 void 79 aggr_port_init(void) 80 { 81 aggr_port_cache = kmem_cache_create("aggr_port_cache", 82 sizeof (aggr_port_t), 0, aggr_port_constructor, 83 aggr_port_destructor, NULL, NULL, NULL, 0); 84 85 /* 86 * Allocate a id space to manage port identification. The range of 87 * the arena will be from 1 to UINT16_MAX, because the LACP protocol 88 * specifies 16-bit unique identification. 89 */ 90 aggr_portids = id_space_create("aggr_portids", 1, UINT16_MAX); 91 ASSERT(aggr_portids != NULL); 92 } 93 94 void 95 aggr_port_fini(void) 96 { 97 /* 98 * This function is called only after all groups have been 99 * freed. This ensures that there are no remaining allocated 100 * ports when this function is invoked. 101 */ 102 kmem_cache_destroy(aggr_port_cache); 103 id_space_destroy(aggr_portids); 104 } 105 106 mac_resource_handle_t 107 aggr_port_resource_add(void *arg, mac_resource_t *mrp) 108 { 109 aggr_port_t *port = (aggr_port_t *)arg; 110 aggr_grp_t *grp = port->lp_grp; 111 112 return (mac_resource_add(grp->lg_mh, mrp)); 113 } 114 115 void 116 aggr_port_init_callbacks(aggr_port_t *port) 117 { 118 /* add the port's receive callback */ 119 port->lp_mnh = mac_notify_add(port->lp_mh, aggr_port_notify_cb, 120 (void *)port); 121 122 /* set port's resource_add callback */ 123 mac_resource_set(port->lp_mh, aggr_port_resource_add, (void *)port); 124 } 125 126 int 127 aggr_port_create(const datalink_id_t linkid, boolean_t force, aggr_port_t **pp) 128 { 129 int err; 130 mac_handle_t mh; 131 aggr_port_t *port; 132 uint16_t portid; 133 uint_t i; 134 boolean_t no_link_update = B_FALSE; 135 const mac_info_t *mip; 136 uint32_t note; 137 uint32_t margin; 138 139 *pp = NULL; 140 141 if ((err = mac_open_by_linkid(linkid, &mh)) != 0) 142 return (err); 143 144 mip = mac_info(mh); 145 if (mip->mi_media != DL_ETHER || mip->mi_nativemedia != DL_ETHER) { 146 err = EINVAL; 147 goto fail; 148 } 149 150 /* 151 * If the underlying MAC does not support link update notification, it 152 * can only be aggregated if `force' is set. This is because aggr 153 * depends on link notifications to attach ports whose link is up. 154 */ 155 note = mac_no_notification(mh); 156 if ((note & (DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN)) != 0) { 157 no_link_update = B_TRUE; 158 if (!force) { 159 /* 160 * We borrow this error code to indicate that link 161 * notification is not supported. 162 */ 163 err = ENETDOWN; 164 goto fail; 165 } 166 } 167 168 if ((portid = (uint16_t)id_alloc(aggr_portids)) == 0) { 169 err = ENOMEM; 170 goto fail; 171 } 172 173 /* 174 * As the underlying mac's current margin size is used to determine 175 * the margin size of the aggregation itself, request the underlying 176 * mac not to change to a smaller size. 177 */ 178 if ((err = mac_margin_add(mh, &margin, B_TRUE)) != 0) { 179 id_free(aggr_portids, portid); 180 goto fail; 181 } 182 183 if (!mac_active_set(mh)) { 184 VERIFY(mac_margin_remove(mh, margin) == 0); 185 id_free(aggr_portids, portid); 186 err = EBUSY; 187 goto fail; 188 } 189 190 port = kmem_cache_alloc(aggr_port_cache, KM_SLEEP); 191 192 port->lp_refs = 1; 193 port->lp_next = NULL; 194 port->lp_mh = mh; 195 port->lp_mip = mip; 196 port->lp_linkid = linkid; 197 port->lp_closing = 0; 198 199 /* get the port's original MAC address */ 200 mac_unicst_get(port->lp_mh, port->lp_addr); 201 202 /* set port's transmit information */ 203 port->lp_txinfo = mac_tx_get(port->lp_mh); 204 205 /* initialize state */ 206 port->lp_state = AGGR_PORT_STATE_STANDBY; 207 port->lp_link_state = LINK_STATE_UNKNOWN; 208 port->lp_ifspeed = 0; 209 port->lp_link_duplex = LINK_DUPLEX_UNKNOWN; 210 port->lp_started = B_FALSE; 211 port->lp_tx_enabled = B_FALSE; 212 port->lp_promisc_on = B_FALSE; 213 port->lp_no_link_update = no_link_update; 214 port->lp_portid = portid; 215 port->lp_margin = margin; 216 217 /* 218 * Save the current statistics of the port. They will be used 219 * later by aggr_m_stats() when aggregating the statistics of 220 * the constituent ports. 221 */ 222 for (i = 0; i < MAC_NSTAT; i++) { 223 port->lp_stat[i] = 224 aggr_port_stat(port, i + MAC_STAT_MIN); 225 } 226 for (i = 0; i < ETHER_NSTAT; i++) { 227 port->lp_ether_stat[i] = 228 aggr_port_stat(port, i + MACTYPE_STAT_MIN); 229 } 230 231 /* LACP related state */ 232 port->lp_collector_enabled = B_FALSE; 233 234 *pp = port; 235 return (0); 236 237 fail: 238 mac_close(mh); 239 return (err); 240 } 241 242 void 243 aggr_port_delete(aggr_port_t *port) 244 { 245 VERIFY(mac_margin_remove(port->lp_mh, port->lp_margin) == 0); 246 mac_rx_remove_wait(port->lp_mh); 247 mac_resource_set(port->lp_mh, NULL, NULL); 248 mac_notify_remove(port->lp_mh, port->lp_mnh); 249 mac_active_clear(port->lp_mh); 250 251 /* 252 * Restore the port MAC address. Note it is called after the 253 * port's notification callback being removed. This prevent 254 * port's MAC_NOTE_UNICST notify callback function being called. 255 */ 256 (void) mac_unicst_set(port->lp_mh, port->lp_addr); 257 258 mac_close(port->lp_mh); 259 AGGR_PORT_REFRELE(port); 260 } 261 262 void 263 aggr_port_free(aggr_port_t *port) 264 { 265 ASSERT(port->lp_refs == 0); 266 if (port->lp_grp != NULL) 267 AGGR_GRP_REFRELE(port->lp_grp); 268 port->lp_grp = NULL; 269 id_free(aggr_portids, port->lp_portid); 270 port->lp_portid = 0; 271 kmem_cache_free(aggr_port_cache, port); 272 } 273 274 /* 275 * Invoked upon receiving a MAC_NOTE_LINK notification for 276 * one of the constituent ports. 277 */ 278 boolean_t 279 aggr_port_notify_link(aggr_grp_t *grp, aggr_port_t *port, boolean_t dolock) 280 { 281 boolean_t do_attach = B_FALSE; 282 boolean_t do_detach = B_FALSE; 283 boolean_t link_state_changed = B_TRUE; 284 uint64_t ifspeed; 285 link_state_t link_state; 286 link_duplex_t link_duplex; 287 288 if (dolock) { 289 AGGR_LACP_LOCK_WRITER(grp); 290 rw_enter(&grp->lg_lock, RW_WRITER); 291 } else { 292 ASSERT(AGGR_LACP_LOCK_HELD_WRITER(grp)); 293 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 294 } 295 296 rw_enter(&port->lp_lock, RW_WRITER); 297 298 /* 299 * link state change? For links that do not support link state 300 * notification, always assume the link is up. 301 */ 302 link_state = port->lp_no_link_update ? LINK_STATE_UP : 303 mac_link_get(port->lp_mh); 304 if (port->lp_link_state != link_state) { 305 if (link_state == LINK_STATE_UP) 306 do_attach = (port->lp_link_state != LINK_STATE_UP); 307 else 308 do_detach = (port->lp_link_state == LINK_STATE_UP); 309 } 310 port->lp_link_state = link_state; 311 312 /* link duplex change? */ 313 link_duplex = aggr_port_stat(port, ETHER_STAT_LINK_DUPLEX); 314 if (port->lp_link_duplex != link_duplex) { 315 if (link_duplex == LINK_DUPLEX_FULL) 316 do_attach |= (port->lp_link_duplex != LINK_DUPLEX_FULL); 317 else 318 do_detach |= (port->lp_link_duplex == LINK_DUPLEX_FULL); 319 } 320 port->lp_link_duplex = link_duplex; 321 322 /* link speed changes? */ 323 ifspeed = aggr_port_stat(port, MAC_STAT_IFSPEED); 324 if (port->lp_ifspeed != ifspeed) { 325 if (port->lp_state == AGGR_PORT_STATE_ATTACHED) 326 do_detach |= (ifspeed != grp->lg_ifspeed); 327 else 328 do_attach |= (ifspeed == grp->lg_ifspeed); 329 } 330 port->lp_ifspeed = ifspeed; 331 332 if (do_attach) { 333 /* attempt to attach the port to the aggregation */ 334 link_state_changed = aggr_grp_attach_port(grp, port); 335 } else if (do_detach) { 336 /* detach the port from the aggregation */ 337 link_state_changed = aggr_grp_detach_port(grp, port, B_TRUE); 338 } 339 340 rw_exit(&port->lp_lock); 341 342 if (dolock) { 343 rw_exit(&grp->lg_lock); 344 AGGR_LACP_UNLOCK(grp); 345 } 346 return (link_state_changed); 347 } 348 349 /* 350 * Invoked upon receiving a MAC_NOTE_UNICST for one of the constituent 351 * ports of a group. 352 */ 353 static void 354 aggr_port_notify_unicst(aggr_grp_t *grp, aggr_port_t *port, 355 boolean_t *mac_addr_changedp, boolean_t *link_state_changedp) 356 { 357 boolean_t mac_addr_changed = B_FALSE; 358 boolean_t link_state_changed = B_FALSE; 359 uint8_t mac_addr[ETHERADDRL]; 360 361 ASSERT(mac_addr_changedp != NULL); 362 ASSERT(link_state_changedp != NULL); 363 AGGR_LACP_LOCK_WRITER(grp); 364 rw_enter(&grp->lg_lock, RW_WRITER); 365 366 rw_enter(&port->lp_lock, RW_WRITER); 367 368 /* 369 * If it is called when setting the MAC address to the 370 * aggregation group MAC address, do nothing. 371 */ 372 mac_unicst_get(port->lp_mh, mac_addr); 373 if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) { 374 rw_exit(&port->lp_lock); 375 goto done; 376 } 377 378 /* save the new port MAC address */ 379 bcopy(mac_addr, port->lp_addr, ETHERADDRL); 380 381 aggr_grp_port_mac_changed(grp, port, &mac_addr_changed, 382 &link_state_changed); 383 384 rw_exit(&port->lp_lock); 385 386 if (grp->lg_closing) 387 goto done; 388 389 /* 390 * If this port was used to determine the MAC address of 391 * the group, update the MAC address of the constituent 392 * ports. 393 */ 394 if (mac_addr_changed && aggr_grp_update_ports_mac(grp)) 395 link_state_changed = B_TRUE; 396 397 done: 398 *mac_addr_changedp = mac_addr_changed; 399 *link_state_changedp = link_state_changed; 400 rw_exit(&grp->lg_lock); 401 AGGR_LACP_UNLOCK(grp); 402 } 403 404 /* 405 * Notification callback invoked by the MAC service module for 406 * a particular MAC port. 407 */ 408 static void 409 aggr_port_notify_cb(void *arg, mac_notify_type_t type) 410 { 411 aggr_port_t *port = arg; 412 aggr_grp_t *grp = port->lp_grp; 413 boolean_t mac_addr_changed, link_state_changed; 414 415 /* 416 * Do nothing if the aggregation or the port is in the deletion 417 * process. Note that this is necessary to avoid deadlock. 418 */ 419 if ((grp->lg_closing) || (port->lp_closing)) 420 return; 421 422 AGGR_PORT_REFHOLD(port); 423 424 switch (type) { 425 case MAC_NOTE_TX: 426 mac_tx_update(grp->lg_mh); 427 break; 428 case MAC_NOTE_LINK: 429 if (aggr_port_notify_link(grp, port, B_TRUE)) 430 mac_link_update(grp->lg_mh, grp->lg_link_state); 431 break; 432 case MAC_NOTE_UNICST: 433 aggr_port_notify_unicst(grp, port, &mac_addr_changed, 434 &link_state_changed); 435 if (mac_addr_changed) 436 mac_unicst_update(grp->lg_mh, grp->lg_addr); 437 if (link_state_changed) 438 mac_link_update(grp->lg_mh, grp->lg_link_state); 439 break; 440 case MAC_NOTE_PROMISC: 441 port->lp_txinfo = mac_tx_get(port->lp_mh); 442 break; 443 default: 444 break; 445 } 446 447 AGGR_PORT_REFRELE(port); 448 } 449 450 int 451 aggr_port_start(aggr_port_t *port) 452 { 453 int rc; 454 455 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 456 457 if (port->lp_started) 458 return (0); 459 460 if ((rc = mac_start(port->lp_mh)) != 0) 461 return (rc); 462 463 /* update the port state */ 464 port->lp_started = B_TRUE; 465 466 return (rc); 467 } 468 469 void 470 aggr_port_stop(aggr_port_t *port) 471 { 472 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 473 474 if (!port->lp_started) 475 return; 476 477 aggr_grp_multicst_port(port, B_FALSE); 478 479 mac_stop(port->lp_mh); 480 481 /* update the port state */ 482 port->lp_started = B_FALSE; 483 } 484 485 int 486 aggr_port_promisc(aggr_port_t *port, boolean_t on) 487 { 488 int rc; 489 490 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 491 492 if (on == port->lp_promisc_on) 493 /* already in desired promiscous mode */ 494 return (0); 495 496 rc = mac_promisc_set(port->lp_mh, on, MAC_DEVPROMISC); 497 498 if (rc == 0) 499 port->lp_promisc_on = on; 500 501 return (rc); 502 } 503 504 /* 505 * Set the MAC address of a port. 506 */ 507 int 508 aggr_port_unicst(aggr_port_t *port, uint8_t *macaddr) 509 { 510 int rc; 511 512 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 513 514 rc = mac_unicst_set(port->lp_mh, macaddr); 515 516 return (rc); 517 } 518 519 /* 520 * Add or remove a multicast address to/from a port. 521 */ 522 int 523 aggr_port_multicst(void *arg, boolean_t add, const uint8_t *addrp) 524 { 525 aggr_port_t *port = arg; 526 527 return (add ? mac_multicst_add(port->lp_mh, addrp) : 528 mac_multicst_remove(port->lp_mh, addrp)); 529 } 530 531 uint64_t 532 aggr_port_stat(aggr_port_t *port, uint_t stat) 533 { 534 return (mac_stat_get(port->lp_mh, stat)); 535 } 536