1 /* $NetBSD: ieee8023ad_lacp.c,v 1.3 2005/12/11 12:24:54 christos Exp $ */ 2 3 /*- 4 * Copyright (c)2005 YAMAMOTO Takashi, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/callout.h> 34 #include <sys/mbuf.h> 35 #include <sys/systm.h> 36 #include <sys/malloc.h> 37 #include <sys/kernel.h> /* hz */ 38 #include <sys/socket.h> /* for net/if.h */ 39 #include <sys/sockio.h> 40 #include <machine/stdarg.h> 41 #include <sys/lock.h> 42 #include <sys/rwlock.h> 43 #include <sys/taskqueue.h> 44 45 #include <net/if.h> 46 #include <net/if_dl.h> 47 #include <net/ethernet.h> 48 #include <net/if_media.h> 49 #include <net/if_types.h> 50 51 #include <net/if_lagg.h> 52 #include <net/ieee8023ad_lacp.h> 53 54 /* 55 * actor system priority and port priority. 56 * XXX should be configurable. 57 */ 58 59 #define LACP_SYSTEM_PRIO 0x8000 60 #define LACP_PORT_PRIO 0x8000 61 62 const uint8_t ethermulticastaddr_slowprotocols[ETHER_ADDR_LEN] = 63 { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 }; 64 65 static const struct tlv_template lacp_info_tlv_template[] = { 66 { LACP_TYPE_ACTORINFO, 67 sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) }, 68 { LACP_TYPE_PARTNERINFO, 69 sizeof(struct tlvhdr) + sizeof(struct lacp_peerinfo) }, 70 { LACP_TYPE_COLLECTORINFO, 71 sizeof(struct tlvhdr) + sizeof(struct lacp_collectorinfo) }, 72 { 0, 0 }, 73 }; 74 75 typedef void (*lacp_timer_func_t)(struct lacp_port *); 76 77 static const struct tlv_template marker_info_tlv_template[] = { 78 { MARKER_TYPE_INFO, 16 }, 79 { 0, 0 }, 80 }; 81 82 static const struct tlv_template marker_response_tlv_template[] = { 83 { MARKER_TYPE_RESPONSE, 16 }, 84 { 0, 0 }, 85 }; 86 87 static void lacp_fill_actorinfo(struct lacp_port *, struct lacp_peerinfo *); 88 89 static uint64_t lacp_aggregator_bandwidth(struct lacp_aggregator *); 90 static void lacp_suppress_distributing(struct lacp_softc *, 91 struct lacp_aggregator *); 92 static void lacp_transit_expire(void *); 93 static void lacp_select_active_aggregator(struct lacp_softc *); 94 static uint16_t lacp_compose_key(struct lacp_port *); 95 static int tlv_check(const void *, size_t, const struct tlvhdr *, 96 const struct tlv_template *, boolean_t); 97 static void lacp_tick(void *); 98 99 static void lacp_fill_aggregator_id(struct lacp_aggregator *, 100 const struct lacp_port *); 101 static void lacp_fill_aggregator_id_peer(struct lacp_peerinfo *, 102 const struct lacp_peerinfo *); 103 static int lacp_aggregator_is_compatible(const struct lacp_aggregator *, 104 const struct lacp_port *); 105 static int lacp_peerinfo_is_compatible(const struct lacp_peerinfo *, 106 const struct lacp_peerinfo *); 107 108 static struct lacp_aggregator *lacp_aggregator_get(struct lacp_softc *, 109 struct lacp_port *); 110 static void lacp_aggregator_addref(struct lacp_softc *, 111 struct lacp_aggregator *); 112 static void lacp_aggregator_delref(struct lacp_softc *, 113 struct lacp_aggregator *); 114 115 /* receive machine */ 116 117 static void lacp_dequeue(void *, int); 118 static int lacp_pdu_input(struct lagg_port *, struct mbuf *); 119 static int lacp_marker_input(struct lagg_port *, struct mbuf *); 120 static void lacp_sm_rx(struct lacp_port *, const struct lacpdu *); 121 static void lacp_sm_rx_timer(struct lacp_port *); 122 static void lacp_sm_rx_set_expired(struct lacp_port *); 123 static void lacp_sm_rx_update_ntt(struct lacp_port *, 124 const struct lacpdu *); 125 static void lacp_sm_rx_record_pdu(struct lacp_port *, 126 const struct lacpdu *); 127 static void lacp_sm_rx_update_selected(struct lacp_port *, 128 const struct lacpdu *); 129 static void lacp_sm_rx_record_default(struct lacp_port *); 130 static void lacp_sm_rx_update_default_selected(struct lacp_port *); 131 static void lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *, 132 const struct lacp_peerinfo *); 133 134 /* mux machine */ 135 136 static void lacp_sm_mux(struct lacp_port *); 137 static void lacp_set_mux(struct lacp_port *, enum lacp_mux_state); 138 static void lacp_sm_mux_timer(struct lacp_port *); 139 140 /* periodic transmit machine */ 141 142 static void lacp_sm_ptx_update_timeout(struct lacp_port *, uint8_t); 143 static void lacp_sm_ptx_tx_schedule(struct lacp_port *); 144 static void lacp_sm_ptx_timer(struct lacp_port *); 145 146 /* transmit machine */ 147 148 static void lacp_sm_tx(struct lacp_port *); 149 static void lacp_sm_assert_ntt(struct lacp_port *); 150 151 static void lacp_run_timers(struct lacp_port *); 152 static int lacp_compare_peerinfo(const struct lacp_peerinfo *, 153 const struct lacp_peerinfo *); 154 static int lacp_compare_systemid(const struct lacp_systemid *, 155 const struct lacp_systemid *); 156 static void lacp_port_enable(struct lacp_port *); 157 static void lacp_port_disable(struct lacp_port *); 158 static void lacp_select(struct lacp_port *); 159 static void lacp_unselect(struct lacp_port *); 160 static void lacp_disable_collecting(struct lacp_port *); 161 static void lacp_enable_collecting(struct lacp_port *); 162 static void lacp_disable_distributing(struct lacp_port *); 163 static void lacp_enable_distributing(struct lacp_port *); 164 static int lacp_xmit_lacpdu(struct lacp_port *); 165 166 #if defined(LACP_DEBUG) 167 static void lacp_dump_lacpdu(const struct lacpdu *); 168 static const char *lacp_format_partner(const struct lacp_peerinfo *, char *, 169 size_t); 170 static const char *lacp_format_lagid(const struct lacp_peerinfo *, 171 const struct lacp_peerinfo *, char *, size_t); 172 static const char *lacp_format_lagid_aggregator(const struct lacp_aggregator *, 173 char *, size_t); 174 static const char *lacp_format_state(uint8_t, char *, size_t); 175 static const char *lacp_format_mac(const uint8_t *, char *, size_t); 176 static const char *lacp_format_systemid(const struct lacp_systemid *, char *, 177 size_t); 178 static const char *lacp_format_portid(const struct lacp_portid *, char *, 179 size_t); 180 static void lacp_dprintf(const struct lacp_port *, const char *, ...) 181 __attribute__((__format__(__printf__, 2, 3))); 182 #define LACP_DPRINTF(a) lacp_dprintf a 183 #else 184 #define LACP_DPRINTF(a) /* nothing */ 185 #endif 186 187 /* 188 * partner administration variables. 189 * XXX should be configurable. 190 */ 191 192 static const struct lacp_peerinfo lacp_partner_admin = { 193 .lip_systemid = { .lsi_prio = 0xffff }, 194 .lip_portid = { .lpi_prio = 0xffff }, 195 #if 1 196 /* optimistic */ 197 .lip_state = LACP_STATE_SYNC | LACP_STATE_AGGREGATION | 198 LACP_STATE_COLLECTING | LACP_STATE_DISTRIBUTING, 199 #else 200 /* pessimistic */ 201 .lip_state = 0, 202 #endif 203 }; 204 205 static const lacp_timer_func_t lacp_timer_funcs[LACP_NTIMER] = { 206 [LACP_TIMER_CURRENT_WHILE] = lacp_sm_rx_timer, 207 [LACP_TIMER_PERIODIC] = lacp_sm_ptx_timer, 208 [LACP_TIMER_WAIT_WHILE] = lacp_sm_mux_timer, 209 }; 210 211 void 212 lacp_input(struct lagg_port *lgp, struct mbuf *m) 213 { 214 struct lagg_softc *lgs = lgp->lp_lagg; 215 struct lacp_softc *lsc = LACP_SOFTC(lgs); 216 uint8_t subtype; 217 218 if (m->m_pkthdr.len < sizeof(struct ether_header) + sizeof(subtype)) { 219 m_freem(m); 220 return; 221 } 222 223 m_copydata(m, sizeof(struct ether_header), sizeof(subtype), &subtype); 224 switch (subtype) { 225 case SLOWPROTOCOLS_SUBTYPE_LACP: 226 IF_HANDOFF(&lsc->lsc_queue, m, NULL); 227 taskqueue_enqueue(taskqueue_swi, &lsc->lsc_qtask); 228 break; 229 230 case SLOWPROTOCOLS_SUBTYPE_MARKER: 231 lacp_marker_input(lgp, m); 232 break; 233 234 default: 235 /* Unknown LACP packet type */ 236 m_freem(m); 237 break; 238 } 239 } 240 241 static void 242 lacp_dequeue(void *arg, int pending) 243 { 244 struct lacp_softc *lsc = (struct lacp_softc *)arg; 245 struct lagg_softc *sc = lsc->lsc_lagg; 246 struct lagg_port *lgp; 247 struct mbuf *m; 248 249 LAGG_WLOCK(sc); 250 for (;;) { 251 IF_DEQUEUE(&lsc->lsc_queue, m); 252 if (m == NULL) 253 break; 254 lgp = m->m_pkthdr.rcvif->if_lagg; 255 lacp_pdu_input(lgp, m); 256 } 257 LAGG_WUNLOCK(sc); 258 } 259 260 /* 261 * lacp_pdu_input: process lacpdu 262 */ 263 static int 264 lacp_pdu_input(struct lagg_port *lgp, struct mbuf *m) 265 { 266 struct lacp_port *lp = LACP_PORT(lgp); 267 struct lacpdu *du; 268 int error = 0; 269 270 LAGG_WLOCK_ASSERT(lgp->lp_lagg); 271 272 if (__predict_false(lp->lp_flags & LACP_PORT_DETACHING)) { 273 goto bad; 274 } 275 276 if (m->m_pkthdr.len != sizeof(*du)) { 277 goto bad; 278 } 279 280 if ((m->m_flags & M_MCAST) == 0) { 281 goto bad; 282 } 283 284 if (m->m_len < sizeof(*du)) { 285 m = m_pullup(m, sizeof(*du)); 286 if (m == NULL) { 287 return (ENOMEM); 288 } 289 } 290 291 du = mtod(m, struct lacpdu *); 292 293 if (memcmp(&du->ldu_eh.ether_dhost, 294 ðermulticastaddr_slowprotocols, ETHER_ADDR_LEN)) { 295 goto bad; 296 } 297 298 /* XXX 299 KASSERT(du->ldu_sph.sph_subtype == SLOWPROTOCOLS_SUBTYPE_LACP, 300 ("a very bad kassert!")); 301 */ 302 303 /* 304 * ignore the version for compatibility with 305 * the future protocol revisions. 306 */ 307 308 #if 0 309 if (du->ldu_sph.sph_version != 1) { 310 goto bad; 311 } 312 #endif 313 314 /* 315 * ignore tlv types for compatibility with 316 * the future protocol revisions. 317 */ 318 319 if (tlv_check(du, sizeof(*du), &du->ldu_tlv_actor, 320 lacp_info_tlv_template, FALSE)) { 321 goto bad; 322 } 323 324 #if defined(LACP_DEBUG) 325 LACP_DPRINTF((lp, "lacpdu receive\n")); 326 lacp_dump_lacpdu(du); 327 #endif /* defined(LACP_DEBUG) */ 328 lacp_sm_rx(lp, du); 329 330 m_freem(m); 331 332 return (error); 333 334 bad: 335 m_freem(m); 336 return (EINVAL); 337 } 338 339 static void 340 lacp_fill_actorinfo(struct lacp_port *lp, struct lacp_peerinfo *info) 341 { 342 struct lagg_port *lgp = lp->lp_lagg; 343 struct lagg_softc *lgs = lgp->lp_lagg; 344 345 info->lip_systemid.lsi_prio = htons(LACP_SYSTEM_PRIO); 346 memcpy(&info->lip_systemid.lsi_mac, 347 IF_LLADDR(lgs->sc_ifp), ETHER_ADDR_LEN); 348 info->lip_portid.lpi_prio = htons(LACP_PORT_PRIO); 349 info->lip_portid.lpi_portno = htons(lp->lp_ifp->if_index); 350 info->lip_state = lp->lp_state; 351 } 352 353 static int 354 lacp_xmit_lacpdu(struct lacp_port *lp) 355 { 356 struct lagg_port *lgp = lp->lp_lagg; 357 struct mbuf *m; 358 struct lacpdu *du; 359 int error; 360 361 LAGG_WLOCK_ASSERT(lgp->lp_lagg); 362 363 m = m_gethdr(M_DONTWAIT, MT_DATA); 364 if (m == NULL) { 365 return (ENOMEM); 366 } 367 m->m_len = m->m_pkthdr.len = sizeof(*du); 368 369 du = mtod(m, struct lacpdu *); 370 memset(du, 0, sizeof(*du)); 371 372 memcpy(&du->ldu_eh.ether_dhost, ethermulticastaddr_slowprotocols, 373 ETHER_ADDR_LEN); 374 memcpy(&du->ldu_eh.ether_shost, lgp->lp_lladdr, ETHER_ADDR_LEN); 375 du->ldu_eh.ether_type = htons(ETHERTYPE_SLOW); 376 377 du->ldu_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_LACP; 378 du->ldu_sph.sph_version = 1; 379 380 TLV_SET(&du->ldu_tlv_actor, LACP_TYPE_ACTORINFO, sizeof(du->ldu_actor)); 381 du->ldu_actor = lp->lp_actor; 382 383 TLV_SET(&du->ldu_tlv_partner, LACP_TYPE_PARTNERINFO, 384 sizeof(du->ldu_partner)); 385 du->ldu_partner = lp->lp_partner; 386 387 TLV_SET(&du->ldu_tlv_collector, LACP_TYPE_COLLECTORINFO, 388 sizeof(du->ldu_collector)); 389 du->ldu_collector.lci_maxdelay = 0; 390 391 #if defined(LACP_DEBUG) 392 LACP_DPRINTF((lp, "lacpdu transmit\n")); 393 lacp_dump_lacpdu(du); 394 #endif /* defined(LACP_DEBUG) */ 395 396 m->m_flags |= M_MCAST; 397 398 /* 399 * XXX should use higher priority queue. 400 * otherwise network congestion can break aggregation. 401 */ 402 403 error = lagg_enqueue(lp->lp_ifp, m); 404 return (error); 405 } 406 407 void 408 lacp_linkstate(struct lagg_port *lgp) 409 { 410 struct lacp_port *lp = LACP_PORT(lgp); 411 struct ifnet *ifp = lgp->lp_ifp; 412 struct ifmediareq ifmr; 413 int error = 0; 414 u_int media; 415 uint8_t old_state; 416 uint16_t old_key; 417 418 LAGG_WLOCK_ASSERT(lgp->lp_lagg); 419 420 bzero((char *)&ifmr, sizeof(ifmr)); 421 error = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (caddr_t)&ifmr); 422 if (error != 0) 423 return; 424 425 media = ifmr.ifm_active; 426 LACP_DPRINTF((lp, "media changed 0x%x -> 0x%x, ether = %d, fdx = %d, " 427 "link = %d\n", lp->lp_media, media, IFM_TYPE(media) == IFM_ETHER, 428 (media & IFM_FDX) != 0, ifp->if_link_state == LINK_STATE_UP)); 429 old_state = lp->lp_state; 430 old_key = lp->lp_key; 431 432 lp->lp_media = media; 433 /* 434 * If the port is not an active full duplex Ethernet link then it can 435 * not be aggregated. 436 */ 437 if (IFM_TYPE(media) != IFM_ETHER || (media & IFM_FDX) == 0 || 438 ifp->if_link_state != LINK_STATE_UP) { 439 lacp_port_disable(lp); 440 } else { 441 lacp_port_enable(lp); 442 } 443 lp->lp_key = lacp_compose_key(lp); 444 445 if (old_state != lp->lp_state || old_key != lp->lp_key) { 446 LACP_DPRINTF((lp, "-> UNSELECTED\n")); 447 lp->lp_selected = LACP_UNSELECTED; 448 } 449 } 450 451 static void 452 lacp_tick(void *arg) 453 { 454 struct lacp_softc *lsc = arg; 455 struct lagg_softc *sc = lsc->lsc_lagg; 456 struct lacp_port *lp; 457 458 LAGG_WLOCK(sc); 459 LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) { 460 if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0) 461 continue; 462 463 lacp_run_timers(lp); 464 465 lacp_select(lp); 466 lacp_sm_mux(lp); 467 lacp_sm_tx(lp); 468 lacp_sm_ptx_tx_schedule(lp); 469 } 470 LAGG_WUNLOCK(sc); 471 callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc); 472 } 473 474 int 475 lacp_port_create(struct lagg_port *lgp) 476 { 477 struct lagg_softc *lgs = lgp->lp_lagg; 478 struct lacp_softc *lsc = LACP_SOFTC(lgs); 479 struct lacp_port *lp; 480 struct ifnet *ifp = lgp->lp_ifp; 481 struct sockaddr_dl sdl; 482 struct ifmultiaddr *rifma = NULL; 483 int error; 484 485 boolean_t active = TRUE; /* XXX should be configurable */ 486 boolean_t fast = FALSE; /* XXX should be configurable */ 487 488 LAGG_WLOCK_ASSERT(lgs); 489 490 bzero((char *)&sdl, sizeof(sdl)); 491 sdl.sdl_len = sizeof(sdl); 492 sdl.sdl_family = AF_LINK; 493 sdl.sdl_index = ifp->if_index; 494 sdl.sdl_type = IFT_ETHER; 495 sdl.sdl_alen = ETHER_ADDR_LEN; 496 497 bcopy(ðermulticastaddr_slowprotocols, 498 LLADDR(&sdl), ETHER_ADDR_LEN); 499 error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma); 500 if (error) { 501 printf("%s: ADDMULTI failed on %s\n", __func__, lgp->lp_ifname); 502 return (error); 503 } 504 505 lp = malloc(sizeof(struct lacp_port), 506 M_DEVBUF, M_NOWAIT|M_ZERO); 507 if (lp == NULL) 508 return (ENOMEM); 509 510 lgp->lp_psc = (caddr_t)lp; 511 lp->lp_ifp = ifp; 512 lp->lp_lagg = lgp; 513 lp->lp_lsc = lsc; 514 lp->lp_ifma = rifma; 515 516 LIST_INSERT_HEAD(&lsc->lsc_ports, lp, lp_next); 517 518 lacp_fill_actorinfo(lp, &lp->lp_actor); 519 lp->lp_state = 520 (active ? LACP_STATE_ACTIVITY : 0) | 521 (fast ? LACP_STATE_TIMEOUT : 0); 522 lp->lp_aggregator = NULL; 523 lacp_linkstate(lgp); 524 lacp_sm_rx_set_expired(lp); 525 526 return (0); 527 } 528 529 void 530 lacp_port_destroy(struct lagg_port *lgp) 531 { 532 struct lacp_port *lp = LACP_PORT(lgp); 533 int i; 534 535 LAGG_WLOCK_ASSERT(lgp->lp_lagg); 536 537 for (i = 0; i < LACP_NTIMER; i++) { 538 LACP_TIMER_DISARM(lp, i); 539 } 540 541 lacp_disable_collecting(lp); 542 lacp_disable_distributing(lp); 543 lacp_unselect(lp); 544 lgp->lp_flags &= ~LAGG_PORT_DISABLED; 545 546 /* The address may have already been removed by if_purgemaddrs() */ 547 if (!lgp->lp_detaching) 548 if_delmulti_ifma(lp->lp_ifma); 549 550 LIST_REMOVE(lp, lp_next); 551 free(lp, M_DEVBUF); 552 } 553 554 int 555 lacp_port_isactive(struct lagg_port *lgp) 556 { 557 struct lacp_port *lp = LACP_PORT(lgp); 558 struct lacp_softc *lsc = lp->lp_lsc; 559 struct lacp_aggregator *la = lp->lp_aggregator; 560 561 /* This port is joined to the active aggregator */ 562 if (la != NULL && la == lsc->lsc_active_aggregator) 563 return (1); 564 565 return (0); 566 } 567 568 static void 569 lacp_disable_collecting(struct lacp_port *lp) 570 { 571 struct lagg_port *lgp = lp->lp_lagg; 572 573 LACP_DPRINTF((lp, "collecting disabled\n")); 574 575 lp->lp_state &= ~LACP_STATE_COLLECTING; 576 lgp->lp_flags &= ~LAGG_PORT_COLLECTING; 577 } 578 579 static void 580 lacp_enable_collecting(struct lacp_port *lp) 581 { 582 struct lagg_port *lgp = lp->lp_lagg; 583 584 LACP_DPRINTF((lp, "collecting enabled\n")); 585 586 lp->lp_state |= LACP_STATE_COLLECTING; 587 lgp->lp_flags |= LAGG_PORT_COLLECTING; 588 } 589 590 static void 591 lacp_disable_distributing(struct lacp_port *lp) 592 { 593 struct lacp_aggregator *la = lp->lp_aggregator; 594 struct lacp_softc *lsc = lp->lp_lsc; 595 struct lagg_port *lgp = lp->lp_lagg; 596 #if defined(LACP_DEBUG) 597 char buf[LACP_LAGIDSTR_MAX+1]; 598 #endif /* defined(LACP_DEBUG) */ 599 600 LAGG_WLOCK_ASSERT(lgp->lp_lagg); 601 602 if (la == NULL || (lp->lp_state & LACP_STATE_DISTRIBUTING) == 0) { 603 return; 604 } 605 606 KASSERT(!TAILQ_EMPTY(&la->la_ports), ("no aggregator ports")); 607 KASSERT(la->la_nports > 0, ("nports invalid (%d)", la->la_nports)); 608 KASSERT(la->la_refcnt >= la->la_nports, ("aggregator refcnt invalid")); 609 610 LACP_DPRINTF((lp, "disable distributing on aggregator %s, " 611 "nports %d -> %d\n", 612 lacp_format_lagid_aggregator(la, buf, sizeof(buf)), 613 la->la_nports, la->la_nports - 1)); 614 615 TAILQ_REMOVE(&la->la_ports, lp, lp_dist_q); 616 la->la_nports--; 617 618 lacp_suppress_distributing(lsc, la); 619 620 lp->lp_state &= ~LACP_STATE_DISTRIBUTING; 621 lgp->lp_flags &= ~LAGG_PORT_DISTRIBUTING; 622 623 if (lsc->lsc_active_aggregator == la) { 624 lacp_select_active_aggregator(lsc); 625 } 626 } 627 628 static void 629 lacp_enable_distributing(struct lacp_port *lp) 630 { 631 struct lacp_aggregator *la = lp->lp_aggregator; 632 struct lacp_softc *lsc = lp->lp_lsc; 633 struct lagg_port *lgp = lp->lp_lagg; 634 #if defined(LACP_DEBUG) 635 char buf[LACP_LAGIDSTR_MAX+1]; 636 #endif /* defined(LACP_DEBUG) */ 637 638 LAGG_WLOCK_ASSERT(lgp->lp_lagg); 639 640 if ((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0) { 641 return; 642 } 643 644 LACP_DPRINTF((lp, "enable distributing on aggregator %s, " 645 "nports %d -> %d\n", 646 lacp_format_lagid_aggregator(la, buf, sizeof(buf)), 647 la->la_nports, la->la_nports + 1)); 648 649 KASSERT(la->la_refcnt > la->la_nports, ("aggregator refcnt invalid")); 650 TAILQ_INSERT_HEAD(&la->la_ports, lp, lp_dist_q); 651 la->la_nports++; 652 653 lacp_suppress_distributing(lsc, la); 654 655 lp->lp_state |= LACP_STATE_DISTRIBUTING; 656 lgp->lp_flags |= LAGG_PORT_DISTRIBUTING; 657 658 if (lsc->lsc_active_aggregator != la) { 659 lacp_select_active_aggregator(lsc); 660 } 661 } 662 663 static void 664 lacp_transit_expire(void *vp) 665 { 666 struct lacp_softc *lsc = vp; 667 668 LACP_DPRINTF((NULL, "%s\n", __func__)); 669 lsc->lsc_suppress_distributing = FALSE; 670 } 671 672 int 673 lacp_attach(struct lagg_softc *lgs) 674 { 675 struct lacp_softc *lsc; 676 677 LAGG_WLOCK_ASSERT(lgs); 678 679 lsc = malloc(sizeof(struct lacp_softc), 680 M_DEVBUF, M_NOWAIT|M_ZERO); 681 if (lsc == NULL) 682 return (ENOMEM); 683 684 lgs->sc_psc = (caddr_t)lsc; 685 lsc->lsc_lagg = lgs; 686 687 lsc->lsc_hashkey = arc4random(); 688 lsc->lsc_active_aggregator = NULL; 689 TAILQ_INIT(&lsc->lsc_aggregators); 690 LIST_INIT(&lsc->lsc_ports); 691 692 TASK_INIT(&lsc->lsc_qtask, 0, lacp_dequeue, lsc); 693 mtx_init(&lsc->lsc_queue.ifq_mtx, "lacp queue", NULL, MTX_DEF); 694 lsc->lsc_queue.ifq_maxlen = ifqmaxlen; 695 696 callout_init(&lsc->lsc_transit_callout, CALLOUT_MPSAFE); 697 callout_init(&lsc->lsc_callout, CALLOUT_MPSAFE); 698 699 /* if the lagg is already up then do the same */ 700 if (lgs->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) 701 lacp_init(lgs); 702 703 return (0); 704 } 705 706 int 707 lacp_detach(struct lagg_softc *lgs) 708 { 709 struct lacp_softc *lsc = LACP_SOFTC(lgs); 710 711 KASSERT(TAILQ_EMPTY(&lsc->lsc_aggregators), 712 ("aggregators still active")); 713 KASSERT(lsc->lsc_active_aggregator == NULL, 714 ("aggregator still attached")); 715 716 lgs->sc_psc = NULL; 717 callout_drain(&lsc->lsc_transit_callout); 718 callout_drain(&lsc->lsc_callout); 719 taskqueue_drain(taskqueue_swi, &lsc->lsc_qtask); 720 IF_DRAIN(&lsc->lsc_queue); 721 mtx_destroy(&lsc->lsc_queue.ifq_mtx); 722 723 free(lsc, M_DEVBUF); 724 return (0); 725 } 726 727 void 728 lacp_init(struct lagg_softc *lgs) 729 { 730 struct lacp_softc *lsc = LACP_SOFTC(lgs); 731 732 callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc); 733 } 734 735 void 736 lacp_stop(struct lagg_softc *lgs) 737 { 738 struct lacp_softc *lsc = LACP_SOFTC(lgs); 739 740 callout_stop(&lsc->lsc_transit_callout); 741 callout_stop(&lsc->lsc_callout); 742 } 743 744 struct lagg_port * 745 lacp_select_tx_port(struct lagg_softc *lgs, struct mbuf *m) 746 { 747 struct lacp_softc *lsc = LACP_SOFTC(lgs); 748 struct lacp_aggregator *la; 749 struct lacp_port *lp; 750 uint32_t hash; 751 int nports; 752 753 LAGG_RLOCK_ASSERT(lgs); 754 755 if (__predict_false(lsc->lsc_suppress_distributing)) { 756 LACP_DPRINTF((NULL, "%s: waiting transit\n", __func__)); 757 return (NULL); 758 } 759 760 la = lsc->lsc_active_aggregator; 761 if (__predict_false(la == NULL)) { 762 LACP_DPRINTF((NULL, "%s: no active aggregator\n", __func__)); 763 return (NULL); 764 } 765 766 nports = la->la_nports; 767 KASSERT(nports > 0, ("no ports available")); 768 769 hash = lagg_hashmbuf(m, lsc->lsc_hashkey); 770 hash %= nports; 771 lp = TAILQ_FIRST(&la->la_ports); 772 while (hash--) { 773 lp = TAILQ_NEXT(lp, lp_dist_q); 774 } 775 776 KASSERT((lp->lp_state & LACP_STATE_DISTRIBUTING) != 0, 777 ("aggregated port is not distributing")); 778 779 return (lp->lp_lagg); 780 } 781 /* 782 * lacp_suppress_distributing: drop transmit packets for a while 783 * to preserve packet ordering. 784 */ 785 786 static void 787 lacp_suppress_distributing(struct lacp_softc *lsc, struct lacp_aggregator *la) 788 { 789 if (lsc->lsc_active_aggregator != la) { 790 return; 791 } 792 793 LACP_DPRINTF((NULL, "%s\n", __func__)); 794 lsc->lsc_suppress_distributing = TRUE; 795 /* XXX should consider collector max delay */ 796 callout_reset(&lsc->lsc_transit_callout, 797 LACP_TRANSIT_DELAY * hz / 1000, lacp_transit_expire, lsc); 798 } 799 800 static int 801 lacp_compare_peerinfo(const struct lacp_peerinfo *a, 802 const struct lacp_peerinfo *b) 803 { 804 return (memcmp(a, b, offsetof(struct lacp_peerinfo, lip_state))); 805 } 806 807 static int 808 lacp_compare_systemid(const struct lacp_systemid *a, 809 const struct lacp_systemid *b) 810 { 811 return (memcmp(a, b, sizeof(*a))); 812 } 813 814 #if 0 /* unused */ 815 static int 816 lacp_compare_portid(const struct lacp_portid *a, 817 const struct lacp_portid *b) 818 { 819 return (memcmp(a, b, sizeof(*a))); 820 } 821 #endif 822 823 static uint64_t 824 lacp_aggregator_bandwidth(struct lacp_aggregator *la) 825 { 826 struct lacp_port *lp; 827 uint64_t speed; 828 829 lp = TAILQ_FIRST(&la->la_ports); 830 if (lp == NULL) { 831 return (0); 832 } 833 834 speed = ifmedia_baudrate(lp->lp_media); 835 speed *= la->la_nports; 836 if (speed == 0) { 837 LACP_DPRINTF((lp, "speed 0? media=0x%x nports=%d\n", 838 lp->lp_media, la->la_nports)); 839 } 840 841 return (speed); 842 } 843 844 /* 845 * lacp_select_active_aggregator: select an aggregator to be used to transmit 846 * packets from lagg(4) interface. 847 */ 848 849 static void 850 lacp_select_active_aggregator(struct lacp_softc *lsc) 851 { 852 struct lacp_aggregator *la; 853 struct lacp_aggregator *best_la = NULL; 854 uint64_t best_speed = 0; 855 #if defined(LACP_DEBUG) 856 char buf[LACP_LAGIDSTR_MAX+1]; 857 #endif /* defined(LACP_DEBUG) */ 858 859 LACP_DPRINTF((NULL, "%s:\n", __func__)); 860 861 TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) { 862 uint64_t speed; 863 864 if (la->la_nports == 0) { 865 continue; 866 } 867 868 speed = lacp_aggregator_bandwidth(la); 869 LACP_DPRINTF((NULL, "%s, speed=%jd, nports=%d\n", 870 lacp_format_lagid_aggregator(la, buf, sizeof(buf)), 871 speed, la->la_nports)); 872 if (speed > best_speed || 873 (speed == best_speed && 874 la == lsc->lsc_active_aggregator)) { 875 best_la = la; 876 best_speed = speed; 877 } 878 } 879 880 KASSERT(best_la == NULL || best_la->la_nports > 0, 881 ("invalid aggregator refcnt")); 882 KASSERT(best_la == NULL || !TAILQ_EMPTY(&best_la->la_ports), 883 ("invalid aggregator list")); 884 885 #if defined(LACP_DEBUG) 886 if (lsc->lsc_active_aggregator != best_la) { 887 LACP_DPRINTF((NULL, "active aggregator changed\n")); 888 LACP_DPRINTF((NULL, "old %s\n", 889 lacp_format_lagid_aggregator(lsc->lsc_active_aggregator, 890 buf, sizeof(buf)))); 891 } else { 892 LACP_DPRINTF((NULL, "active aggregator not changed\n")); 893 } 894 LACP_DPRINTF((NULL, "new %s\n", 895 lacp_format_lagid_aggregator(best_la, buf, sizeof(buf)))); 896 #endif /* defined(LACP_DEBUG) */ 897 898 if (lsc->lsc_active_aggregator != best_la) { 899 lsc->lsc_active_aggregator = best_la; 900 if (best_la) { 901 lacp_suppress_distributing(lsc, best_la); 902 } 903 } 904 } 905 906 static uint16_t 907 lacp_compose_key(struct lacp_port *lp) 908 { 909 struct lagg_port *lgp = lp->lp_lagg; 910 struct lagg_softc *lgs = lgp->lp_lagg; 911 u_int media = lp->lp_media; 912 uint16_t key; 913 914 if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0) { 915 916 /* 917 * non-aggregatable links should have unique keys. 918 * 919 * XXX this isn't really unique as if_index is 16 bit. 920 */ 921 922 /* bit 0..14: (some bits of) if_index of this port */ 923 key = lp->lp_ifp->if_index; 924 /* bit 15: 1 */ 925 key |= 0x8000; 926 } else { 927 u_int subtype = IFM_SUBTYPE(media); 928 929 KASSERT(IFM_TYPE(media) == IFM_ETHER, ("invalid media type")); 930 KASSERT((media & IFM_FDX) != 0, ("aggregating HDX interface")); 931 932 /* bit 0..4: IFM_SUBTYPE */ 933 key = subtype; 934 /* bit 5..14: (some bits of) if_index of lagg device */ 935 key |= 0x7fe0 & ((lgs->sc_ifp->if_index) << 5); 936 /* bit 15: 0 */ 937 } 938 return (htons(key)); 939 } 940 941 static void 942 lacp_aggregator_addref(struct lacp_softc *lsc, struct lacp_aggregator *la) 943 { 944 #if defined(LACP_DEBUG) 945 char buf[LACP_LAGIDSTR_MAX+1]; 946 #endif 947 948 LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n", 949 __func__, 950 lacp_format_lagid(&la->la_actor, &la->la_partner, 951 buf, sizeof(buf)), 952 la->la_refcnt, la->la_refcnt + 1)); 953 954 KASSERT(la->la_refcnt > 0, ("refcount <= 0")); 955 la->la_refcnt++; 956 KASSERT(la->la_refcnt > la->la_nports, ("invalid refcount")); 957 } 958 959 static void 960 lacp_aggregator_delref(struct lacp_softc *lsc, struct lacp_aggregator *la) 961 { 962 #if defined(LACP_DEBUG) 963 char buf[LACP_LAGIDSTR_MAX+1]; 964 #endif 965 966 LACP_DPRINTF((NULL, "%s: lagid=%s, refcnt %d -> %d\n", 967 __func__, 968 lacp_format_lagid(&la->la_actor, &la->la_partner, 969 buf, sizeof(buf)), 970 la->la_refcnt, la->la_refcnt - 1)); 971 972 KASSERT(la->la_refcnt > la->la_nports, ("invalid refcnt")); 973 la->la_refcnt--; 974 if (la->la_refcnt > 0) { 975 return; 976 } 977 978 KASSERT(la->la_refcnt == 0, ("refcount not zero")); 979 KASSERT(lsc->lsc_active_aggregator != la, ("aggregator active")); 980 981 TAILQ_REMOVE(&lsc->lsc_aggregators, la, la_q); 982 983 free(la, M_DEVBUF); 984 } 985 986 /* 987 * lacp_aggregator_get: allocate an aggregator. 988 */ 989 990 static struct lacp_aggregator * 991 lacp_aggregator_get(struct lacp_softc *lsc, struct lacp_port *lp) 992 { 993 struct lacp_aggregator *la; 994 995 la = malloc(sizeof(*la), M_DEVBUF, M_NOWAIT); 996 if (la) { 997 la->la_refcnt = 1; 998 la->la_nports = 0; 999 TAILQ_INIT(&la->la_ports); 1000 la->la_pending = 0; 1001 TAILQ_INSERT_TAIL(&lsc->lsc_aggregators, la, la_q); 1002 } 1003 1004 return (la); 1005 } 1006 1007 /* 1008 * lacp_fill_aggregator_id: setup a newly allocated aggregator from a port. 1009 */ 1010 1011 static void 1012 lacp_fill_aggregator_id(struct lacp_aggregator *la, const struct lacp_port *lp) 1013 { 1014 lacp_fill_aggregator_id_peer(&la->la_partner, &lp->lp_partner); 1015 lacp_fill_aggregator_id_peer(&la->la_actor, &lp->lp_actor); 1016 1017 la->la_actor.lip_state = lp->lp_state & LACP_STATE_AGGREGATION; 1018 } 1019 1020 static void 1021 lacp_fill_aggregator_id_peer(struct lacp_peerinfo *lpi_aggr, 1022 const struct lacp_peerinfo *lpi_port) 1023 { 1024 memset(lpi_aggr, 0, sizeof(*lpi_aggr)); 1025 lpi_aggr->lip_systemid = lpi_port->lip_systemid; 1026 lpi_aggr->lip_key = lpi_port->lip_key; 1027 } 1028 1029 /* 1030 * lacp_aggregator_is_compatible: check if a port can join to an aggregator. 1031 */ 1032 1033 static int 1034 lacp_aggregator_is_compatible(const struct lacp_aggregator *la, 1035 const struct lacp_port *lp) 1036 { 1037 if (!(lp->lp_state & LACP_STATE_AGGREGATION) || 1038 !(lp->lp_partner.lip_state & LACP_STATE_AGGREGATION)) { 1039 return (0); 1040 } 1041 1042 if (!(la->la_actor.lip_state & LACP_STATE_AGGREGATION)) { 1043 return (0); 1044 } 1045 1046 if (!lacp_peerinfo_is_compatible(&la->la_partner, &lp->lp_partner)) { 1047 return (0); 1048 } 1049 1050 if (!lacp_peerinfo_is_compatible(&la->la_actor, &lp->lp_actor)) { 1051 return (0); 1052 } 1053 1054 return (1); 1055 } 1056 1057 static int 1058 lacp_peerinfo_is_compatible(const struct lacp_peerinfo *a, 1059 const struct lacp_peerinfo *b) 1060 { 1061 if (memcmp(&a->lip_systemid, &b->lip_systemid, 1062 sizeof(a->lip_systemid))) { 1063 return (0); 1064 } 1065 1066 if (memcmp(&a->lip_key, &b->lip_key, sizeof(a->lip_key))) { 1067 return (0); 1068 } 1069 1070 return (1); 1071 } 1072 1073 static void 1074 lacp_port_enable(struct lacp_port *lp) 1075 { 1076 struct lagg_port *lgp = lp->lp_lagg; 1077 1078 lp->lp_state |= LACP_STATE_AGGREGATION; 1079 lgp->lp_flags &= ~LAGG_PORT_DISABLED; 1080 } 1081 1082 static void 1083 lacp_port_disable(struct lacp_port *lp) 1084 { 1085 struct lagg_port *lgp = lp->lp_lagg; 1086 1087 lacp_set_mux(lp, LACP_MUX_DETACHED); 1088 1089 lp->lp_state &= ~LACP_STATE_AGGREGATION; 1090 lp->lp_selected = LACP_UNSELECTED; 1091 lacp_sm_rx_record_default(lp); 1092 lp->lp_partner.lip_state &= ~LACP_STATE_AGGREGATION; 1093 lp->lp_state &= ~LACP_STATE_EXPIRED; 1094 lgp->lp_flags |= LAGG_PORT_DISABLED; 1095 } 1096 1097 /* 1098 * lacp_select: select an aggregator. create one if necessary. 1099 */ 1100 static void 1101 lacp_select(struct lacp_port *lp) 1102 { 1103 struct lacp_softc *lsc = lp->lp_lsc; 1104 struct lacp_aggregator *la; 1105 #if defined(LACP_DEBUG) 1106 char buf[LACP_LAGIDSTR_MAX+1]; 1107 #endif 1108 1109 if (lp->lp_aggregator) { 1110 return; 1111 } 1112 1113 KASSERT(!LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE), 1114 ("timer_wait_while still active")); 1115 1116 LACP_DPRINTF((lp, "port lagid=%s\n", 1117 lacp_format_lagid(&lp->lp_actor, &lp->lp_partner, 1118 buf, sizeof(buf)))); 1119 1120 TAILQ_FOREACH(la, &lsc->lsc_aggregators, la_q) { 1121 if (lacp_aggregator_is_compatible(la, lp)) { 1122 break; 1123 } 1124 } 1125 1126 if (la == NULL) { 1127 la = lacp_aggregator_get(lsc, lp); 1128 if (la == NULL) { 1129 LACP_DPRINTF((lp, "aggregator creation failed\n")); 1130 1131 /* 1132 * will retry on the next tick. 1133 */ 1134 1135 return; 1136 } 1137 lacp_fill_aggregator_id(la, lp); 1138 LACP_DPRINTF((lp, "aggregator created\n")); 1139 } else { 1140 LACP_DPRINTF((lp, "compatible aggregator found\n")); 1141 lacp_aggregator_addref(lsc, la); 1142 } 1143 1144 LACP_DPRINTF((lp, "aggregator lagid=%s\n", 1145 lacp_format_lagid(&la->la_actor, &la->la_partner, 1146 buf, sizeof(buf)))); 1147 1148 lp->lp_aggregator = la; 1149 lp->lp_selected = LACP_SELECTED; 1150 } 1151 1152 /* 1153 * lacp_unselect: finish unselect/detach process. 1154 */ 1155 1156 static void 1157 lacp_unselect(struct lacp_port *lp) 1158 { 1159 struct lacp_softc *lsc = lp->lp_lsc; 1160 struct lacp_aggregator *la = lp->lp_aggregator; 1161 1162 KASSERT(!LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE), 1163 ("timer_wait_while still active")); 1164 1165 if (la == NULL) { 1166 return; 1167 } 1168 1169 lp->lp_aggregator = NULL; 1170 lacp_aggregator_delref(lsc, la); 1171 } 1172 1173 /* mux machine */ 1174 1175 static void 1176 lacp_sm_mux(struct lacp_port *lp) 1177 { 1178 enum lacp_mux_state new_state; 1179 boolean_t p_sync = 1180 (lp->lp_partner.lip_state & LACP_STATE_SYNC) != 0; 1181 boolean_t p_collecting = 1182 (lp->lp_partner.lip_state & LACP_STATE_COLLECTING) != 0; 1183 enum lacp_selected selected = lp->lp_selected; 1184 struct lacp_aggregator *la; 1185 1186 /* LACP_DPRINTF((lp, "%s: state %d\n", __func__, lp->lp_mux_state)); */ 1187 1188 re_eval: 1189 la = lp->lp_aggregator; 1190 KASSERT(lp->lp_mux_state == LACP_MUX_DETACHED || la != NULL, 1191 ("MUX not detached")); 1192 new_state = lp->lp_mux_state; 1193 switch (lp->lp_mux_state) { 1194 case LACP_MUX_DETACHED: 1195 if (selected != LACP_UNSELECTED) { 1196 new_state = LACP_MUX_WAITING; 1197 } 1198 break; 1199 case LACP_MUX_WAITING: 1200 KASSERT(la->la_pending > 0 || 1201 !LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE), 1202 ("timer_wait_while still active")); 1203 if (selected == LACP_SELECTED && la->la_pending == 0) { 1204 new_state = LACP_MUX_ATTACHED; 1205 } else if (selected == LACP_UNSELECTED) { 1206 new_state = LACP_MUX_DETACHED; 1207 } 1208 break; 1209 case LACP_MUX_ATTACHED: 1210 if (selected == LACP_SELECTED && p_sync) { 1211 new_state = LACP_MUX_COLLECTING; 1212 } else if (selected != LACP_SELECTED) { 1213 new_state = LACP_MUX_DETACHED; 1214 } 1215 break; 1216 case LACP_MUX_COLLECTING: 1217 if (selected == LACP_SELECTED && p_sync && p_collecting) { 1218 new_state = LACP_MUX_DISTRIBUTING; 1219 } else if (selected != LACP_SELECTED || !p_sync) { 1220 new_state = LACP_MUX_ATTACHED; 1221 } 1222 break; 1223 case LACP_MUX_DISTRIBUTING: 1224 if (selected != LACP_SELECTED || !p_sync || !p_collecting) { 1225 new_state = LACP_MUX_COLLECTING; 1226 } 1227 break; 1228 default: 1229 panic("%s: unknown state", __func__); 1230 } 1231 1232 if (lp->lp_mux_state == new_state) { 1233 return; 1234 } 1235 1236 lacp_set_mux(lp, new_state); 1237 goto re_eval; 1238 } 1239 1240 static void 1241 lacp_set_mux(struct lacp_port *lp, enum lacp_mux_state new_state) 1242 { 1243 struct lacp_aggregator *la = lp->lp_aggregator; 1244 1245 if (lp->lp_mux_state == new_state) { 1246 return; 1247 } 1248 1249 switch (new_state) { 1250 case LACP_MUX_DETACHED: 1251 lp->lp_state &= ~LACP_STATE_SYNC; 1252 lacp_disable_distributing(lp); 1253 lacp_disable_collecting(lp); 1254 lacp_sm_assert_ntt(lp); 1255 /* cancel timer */ 1256 if (LACP_TIMER_ISARMED(lp, LACP_TIMER_WAIT_WHILE)) { 1257 KASSERT(la->la_pending > 0, 1258 ("timer_wait_while not active")); 1259 la->la_pending--; 1260 } 1261 LACP_TIMER_DISARM(lp, LACP_TIMER_WAIT_WHILE); 1262 lacp_unselect(lp); 1263 break; 1264 case LACP_MUX_WAITING: 1265 LACP_TIMER_ARM(lp, LACP_TIMER_WAIT_WHILE, 1266 LACP_AGGREGATE_WAIT_TIME); 1267 la->la_pending++; 1268 break; 1269 case LACP_MUX_ATTACHED: 1270 lp->lp_state |= LACP_STATE_SYNC; 1271 lacp_disable_collecting(lp); 1272 lacp_sm_assert_ntt(lp); 1273 break; 1274 case LACP_MUX_COLLECTING: 1275 lacp_enable_collecting(lp); 1276 lacp_disable_distributing(lp); 1277 lacp_sm_assert_ntt(lp); 1278 break; 1279 case LACP_MUX_DISTRIBUTING: 1280 lacp_enable_distributing(lp); 1281 break; 1282 default: 1283 panic("%s: unknown state", __func__); 1284 } 1285 1286 LACP_DPRINTF((lp, "mux_state %d -> %d\n", lp->lp_mux_state, new_state)); 1287 1288 lp->lp_mux_state = new_state; 1289 } 1290 1291 static void 1292 lacp_sm_mux_timer(struct lacp_port *lp) 1293 { 1294 struct lacp_aggregator *la = lp->lp_aggregator; 1295 #if defined(LACP_DEBUG) 1296 char buf[LACP_LAGIDSTR_MAX+1]; 1297 #endif 1298 1299 KASSERT(la->la_pending > 0, ("no pending event")); 1300 1301 LACP_DPRINTF((lp, "%s: aggregator %s, pending %d -> %d\n", __func__, 1302 lacp_format_lagid(&la->la_actor, &la->la_partner, 1303 buf, sizeof(buf)), 1304 la->la_pending, la->la_pending - 1)); 1305 1306 la->la_pending--; 1307 } 1308 1309 /* periodic transmit machine */ 1310 1311 static void 1312 lacp_sm_ptx_update_timeout(struct lacp_port *lp, uint8_t oldpstate) 1313 { 1314 if (LACP_STATE_EQ(oldpstate, lp->lp_partner.lip_state, 1315 LACP_STATE_TIMEOUT)) { 1316 return; 1317 } 1318 1319 LACP_DPRINTF((lp, "partner timeout changed\n")); 1320 1321 /* 1322 * FAST_PERIODIC -> SLOW_PERIODIC 1323 * or 1324 * SLOW_PERIODIC (-> PERIODIC_TX) -> FAST_PERIODIC 1325 * 1326 * let lacp_sm_ptx_tx_schedule to update timeout. 1327 */ 1328 1329 LACP_TIMER_DISARM(lp, LACP_TIMER_PERIODIC); 1330 1331 /* 1332 * if timeout has been shortened, assert NTT. 1333 */ 1334 1335 if ((lp->lp_partner.lip_state & LACP_STATE_TIMEOUT)) { 1336 lacp_sm_assert_ntt(lp); 1337 } 1338 } 1339 1340 static void 1341 lacp_sm_ptx_tx_schedule(struct lacp_port *lp) 1342 { 1343 int timeout; 1344 1345 if (!(lp->lp_state & LACP_STATE_ACTIVITY) && 1346 !(lp->lp_partner.lip_state & LACP_STATE_ACTIVITY)) { 1347 1348 /* 1349 * NO_PERIODIC 1350 */ 1351 1352 LACP_TIMER_DISARM(lp, LACP_TIMER_PERIODIC); 1353 return; 1354 } 1355 1356 if (LACP_TIMER_ISARMED(lp, LACP_TIMER_PERIODIC)) { 1357 return; 1358 } 1359 1360 timeout = (lp->lp_partner.lip_state & LACP_STATE_TIMEOUT) ? 1361 LACP_FAST_PERIODIC_TIME : LACP_SLOW_PERIODIC_TIME; 1362 1363 LACP_TIMER_ARM(lp, LACP_TIMER_PERIODIC, timeout); 1364 } 1365 1366 static void 1367 lacp_sm_ptx_timer(struct lacp_port *lp) 1368 { 1369 lacp_sm_assert_ntt(lp); 1370 } 1371 1372 static void 1373 lacp_sm_rx(struct lacp_port *lp, const struct lacpdu *du) 1374 { 1375 int timeout; 1376 1377 /* 1378 * check LACP_DISABLED first 1379 */ 1380 1381 if (!(lp->lp_state & LACP_STATE_AGGREGATION)) { 1382 return; 1383 } 1384 1385 /* 1386 * check loopback condition. 1387 */ 1388 1389 if (!lacp_compare_systemid(&du->ldu_actor.lip_systemid, 1390 &lp->lp_actor.lip_systemid)) { 1391 return; 1392 } 1393 1394 /* 1395 * EXPIRED, DEFAULTED, CURRENT -> CURRENT 1396 */ 1397 1398 lacp_sm_rx_update_selected(lp, du); 1399 lacp_sm_rx_update_ntt(lp, du); 1400 lacp_sm_rx_record_pdu(lp, du); 1401 1402 timeout = (lp->lp_state & LACP_STATE_TIMEOUT) ? 1403 LACP_SHORT_TIMEOUT_TIME : LACP_LONG_TIMEOUT_TIME; 1404 LACP_TIMER_ARM(lp, LACP_TIMER_CURRENT_WHILE, timeout); 1405 1406 lp->lp_state &= ~LACP_STATE_EXPIRED; 1407 1408 /* 1409 * kick transmit machine without waiting the next tick. 1410 */ 1411 1412 lacp_sm_tx(lp); 1413 } 1414 1415 static void 1416 lacp_sm_rx_set_expired(struct lacp_port *lp) 1417 { 1418 lp->lp_partner.lip_state &= ~LACP_STATE_SYNC; 1419 lp->lp_partner.lip_state |= LACP_STATE_TIMEOUT; 1420 LACP_TIMER_ARM(lp, LACP_TIMER_CURRENT_WHILE, LACP_SHORT_TIMEOUT_TIME); 1421 lp->lp_state |= LACP_STATE_EXPIRED; 1422 } 1423 1424 static void 1425 lacp_sm_rx_timer(struct lacp_port *lp) 1426 { 1427 if ((lp->lp_state & LACP_STATE_EXPIRED) == 0) { 1428 /* CURRENT -> EXPIRED */ 1429 LACP_DPRINTF((lp, "%s: CURRENT -> EXPIRED\n", __func__)); 1430 lacp_sm_rx_set_expired(lp); 1431 } else { 1432 /* EXPIRED -> DEFAULTED */ 1433 LACP_DPRINTF((lp, "%s: EXPIRED -> DEFAULTED\n", __func__)); 1434 lacp_sm_rx_update_default_selected(lp); 1435 lacp_sm_rx_record_default(lp); 1436 lp->lp_state &= ~LACP_STATE_EXPIRED; 1437 } 1438 } 1439 1440 static void 1441 lacp_sm_rx_record_pdu(struct lacp_port *lp, const struct lacpdu *du) 1442 { 1443 boolean_t active; 1444 uint8_t oldpstate; 1445 #if defined(LACP_DEBUG) 1446 char buf[LACP_STATESTR_MAX+1]; 1447 #endif 1448 1449 /* LACP_DPRINTF((lp, "%s\n", __func__)); */ 1450 1451 oldpstate = lp->lp_partner.lip_state; 1452 1453 active = (du->ldu_actor.lip_state & LACP_STATE_ACTIVITY) 1454 || ((lp->lp_state & LACP_STATE_ACTIVITY) && 1455 (du->ldu_partner.lip_state & LACP_STATE_ACTIVITY)); 1456 1457 lp->lp_partner = du->ldu_actor; 1458 if (active && 1459 ((LACP_STATE_EQ(lp->lp_state, du->ldu_partner.lip_state, 1460 LACP_STATE_AGGREGATION) && 1461 !lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner)) 1462 || (du->ldu_partner.lip_state & LACP_STATE_AGGREGATION) == 0)) { 1463 /* XXX nothing? */ 1464 } else { 1465 lp->lp_partner.lip_state &= ~LACP_STATE_SYNC; 1466 } 1467 1468 lp->lp_state &= ~LACP_STATE_DEFAULTED; 1469 1470 if (oldpstate != lp->lp_partner.lip_state) { 1471 LACP_DPRINTF((lp, "old pstate %s\n", 1472 lacp_format_state(oldpstate, buf, sizeof(buf)))); 1473 LACP_DPRINTF((lp, "new pstate %s\n", 1474 lacp_format_state(lp->lp_partner.lip_state, buf, 1475 sizeof(buf)))); 1476 } 1477 1478 lacp_sm_ptx_update_timeout(lp, oldpstate); 1479 } 1480 1481 static void 1482 lacp_sm_rx_update_ntt(struct lacp_port *lp, const struct lacpdu *du) 1483 { 1484 /* LACP_DPRINTF((lp, "%s\n", __func__)); */ 1485 1486 if (lacp_compare_peerinfo(&lp->lp_actor, &du->ldu_partner) || 1487 !LACP_STATE_EQ(lp->lp_state, du->ldu_partner.lip_state, 1488 LACP_STATE_ACTIVITY | LACP_STATE_SYNC | LACP_STATE_AGGREGATION)) { 1489 LACP_DPRINTF((lp, "%s: assert ntt\n", __func__)); 1490 lacp_sm_assert_ntt(lp); 1491 } 1492 } 1493 1494 static void 1495 lacp_sm_rx_record_default(struct lacp_port *lp) 1496 { 1497 uint8_t oldpstate; 1498 1499 /* LACP_DPRINTF((lp, "%s\n", __func__)); */ 1500 1501 oldpstate = lp->lp_partner.lip_state; 1502 lp->lp_partner = lacp_partner_admin; 1503 lp->lp_state |= LACP_STATE_DEFAULTED; 1504 lacp_sm_ptx_update_timeout(lp, oldpstate); 1505 } 1506 1507 static void 1508 lacp_sm_rx_update_selected_from_peerinfo(struct lacp_port *lp, 1509 const struct lacp_peerinfo *info) 1510 { 1511 /* LACP_DPRINTF((lp, "%s\n", __func__)); */ 1512 1513 if (lacp_compare_peerinfo(&lp->lp_partner, info) || 1514 !LACP_STATE_EQ(lp->lp_partner.lip_state, info->lip_state, 1515 LACP_STATE_AGGREGATION)) { 1516 lp->lp_selected = LACP_UNSELECTED; 1517 /* mux machine will clean up lp->lp_aggregator */ 1518 } 1519 } 1520 1521 static void 1522 lacp_sm_rx_update_selected(struct lacp_port *lp, const struct lacpdu *du) 1523 { 1524 /* LACP_DPRINTF((lp, "%s\n", __func__)); */ 1525 1526 lacp_sm_rx_update_selected_from_peerinfo(lp, &du->ldu_actor); 1527 } 1528 1529 static void 1530 lacp_sm_rx_update_default_selected(struct lacp_port *lp) 1531 { 1532 /* LACP_DPRINTF((lp, "%s\n", __func__)); */ 1533 1534 lacp_sm_rx_update_selected_from_peerinfo(lp, &lacp_partner_admin); 1535 } 1536 1537 /* transmit machine */ 1538 1539 static void 1540 lacp_sm_tx(struct lacp_port *lp) 1541 { 1542 int error; 1543 1544 if (!(lp->lp_state & LACP_STATE_AGGREGATION) 1545 #if 1 1546 || (!(lp->lp_state & LACP_STATE_ACTIVITY) 1547 && !(lp->lp_partner.lip_state & LACP_STATE_ACTIVITY)) 1548 #endif 1549 ) { 1550 lp->lp_flags &= ~LACP_PORT_NTT; 1551 } 1552 1553 if (!(lp->lp_flags & LACP_PORT_NTT)) { 1554 return; 1555 } 1556 1557 /* Rate limit to 3 PDUs per LACP_FAST_PERIODIC_TIME */ 1558 if (ppsratecheck(&lp->lp_last_lacpdu, &lp->lp_lacpdu_sent, 1559 (3 / LACP_FAST_PERIODIC_TIME)) == 0) { 1560 LACP_DPRINTF((lp, "rate limited pdu\n")); 1561 return; 1562 } 1563 1564 error = lacp_xmit_lacpdu(lp); 1565 1566 if (error == 0) { 1567 lp->lp_flags &= ~LACP_PORT_NTT; 1568 } else { 1569 LACP_DPRINTF((lp, "lacpdu transmit failure, error %d\n", 1570 error)); 1571 } 1572 } 1573 1574 static void 1575 lacp_sm_assert_ntt(struct lacp_port *lp) 1576 { 1577 1578 lp->lp_flags |= LACP_PORT_NTT; 1579 } 1580 1581 static void 1582 lacp_run_timers(struct lacp_port *lp) 1583 { 1584 int i; 1585 1586 for (i = 0; i < LACP_NTIMER; i++) { 1587 KASSERT(lp->lp_timer[i] >= 0, 1588 ("invalid timer value %d", lp->lp_timer[i])); 1589 if (lp->lp_timer[i] == 0) { 1590 continue; 1591 } else if (--lp->lp_timer[i] <= 0) { 1592 if (lacp_timer_funcs[i]) { 1593 (*lacp_timer_funcs[i])(lp); 1594 } 1595 } 1596 } 1597 } 1598 1599 int 1600 lacp_marker_input(struct lagg_port *lgp, struct mbuf *m) 1601 { 1602 struct lacp_port *lp = LACP_PORT(lgp); 1603 struct markerdu *mdu; 1604 int error = 0; 1605 1606 LAGG_RLOCK_ASSERT(lgp->lp_lagg); 1607 1608 if (__predict_false(lp->lp_flags & LACP_PORT_DETACHING)) { 1609 goto bad; 1610 } 1611 1612 if (m->m_pkthdr.len != sizeof(*mdu)) { 1613 goto bad; 1614 } 1615 1616 if ((m->m_flags & M_MCAST) == 0) { 1617 goto bad; 1618 } 1619 1620 if (m->m_len < sizeof(*mdu)) { 1621 m = m_pullup(m, sizeof(*mdu)); 1622 if (m == NULL) { 1623 return (ENOMEM); 1624 } 1625 } 1626 1627 mdu = mtod(m, struct markerdu *); 1628 1629 if (memcmp(&mdu->mdu_eh.ether_dhost, 1630 ðermulticastaddr_slowprotocols, ETHER_ADDR_LEN)) { 1631 goto bad; 1632 } 1633 1634 /* XXX 1635 KASSERT(mdu->mdu_sph.sph_subtype == SLOWPROTOCOLS_SUBTYPE_MARKER, 1636 ("a very bad kassert!")); 1637 */ 1638 1639 if (mdu->mdu_sph.sph_version != 1) { 1640 goto bad; 1641 } 1642 1643 switch (mdu->mdu_tlv.tlv_type) { 1644 case MARKER_TYPE_INFO: 1645 if (tlv_check(mdu, sizeof(*mdu), &mdu->mdu_tlv, 1646 marker_info_tlv_template, TRUE)) { 1647 goto bad; 1648 } 1649 mdu->mdu_tlv.tlv_type = MARKER_TYPE_RESPONSE; 1650 memcpy(&mdu->mdu_eh.ether_dhost, 1651 ðermulticastaddr_slowprotocols, ETHER_ADDR_LEN); 1652 memcpy(&mdu->mdu_eh.ether_shost, 1653 lgp->lp_lladdr, ETHER_ADDR_LEN); 1654 error = lagg_enqueue(lp->lp_ifp, m); 1655 break; 1656 1657 case MARKER_TYPE_RESPONSE: 1658 if (tlv_check(mdu, sizeof(*mdu), &mdu->mdu_tlv, 1659 marker_response_tlv_template, TRUE)) { 1660 goto bad; 1661 } 1662 /* 1663 * we are not interested in responses as 1664 * we don't have a marker sender. 1665 */ 1666 /* FALLTHROUGH */ 1667 default: 1668 goto bad; 1669 } 1670 1671 return (error); 1672 1673 bad: 1674 m_freem(m); 1675 return (EINVAL); 1676 } 1677 1678 static int 1679 tlv_check(const void *p, size_t size, const struct tlvhdr *tlv, 1680 const struct tlv_template *tmpl, boolean_t check_type) 1681 { 1682 while (/* CONSTCOND */ 1) { 1683 if ((const char *)tlv - (const char *)p + sizeof(*tlv) > size) { 1684 return (EINVAL); 1685 } 1686 if ((check_type && tlv->tlv_type != tmpl->tmpl_type) || 1687 tlv->tlv_length != tmpl->tmpl_length) { 1688 return (EINVAL); 1689 } 1690 if (tmpl->tmpl_type == 0) { 1691 break; 1692 } 1693 tlv = (const struct tlvhdr *) 1694 ((const char *)tlv + tlv->tlv_length); 1695 tmpl++; 1696 } 1697 1698 return (0); 1699 } 1700 1701 #if defined(LACP_DEBUG) 1702 const char * 1703 lacp_format_mac(const uint8_t *mac, char *buf, size_t buflen) 1704 { 1705 snprintf(buf, buflen, "%02X-%02X-%02X-%02X-%02X-%02X", 1706 (int)mac[0], 1707 (int)mac[1], 1708 (int)mac[2], 1709 (int)mac[3], 1710 (int)mac[4], 1711 (int)mac[5]); 1712 1713 return (buf); 1714 } 1715 1716 const char * 1717 lacp_format_systemid(const struct lacp_systemid *sysid, 1718 char *buf, size_t buflen) 1719 { 1720 char macbuf[LACP_MACSTR_MAX+1]; 1721 1722 snprintf(buf, buflen, "%04X,%s", 1723 ntohs(sysid->lsi_prio), 1724 lacp_format_mac(sysid->lsi_mac, macbuf, sizeof(macbuf))); 1725 1726 return (buf); 1727 } 1728 1729 const char * 1730 lacp_format_portid(const struct lacp_portid *portid, char *buf, size_t buflen) 1731 { 1732 snprintf(buf, buflen, "%04X,%04X", 1733 ntohs(portid->lpi_prio), 1734 ntohs(portid->lpi_portno)); 1735 1736 return (buf); 1737 } 1738 1739 const char * 1740 lacp_format_partner(const struct lacp_peerinfo *peer, char *buf, size_t buflen) 1741 { 1742 char sysid[LACP_SYSTEMIDSTR_MAX+1]; 1743 char portid[LACP_PORTIDSTR_MAX+1]; 1744 1745 snprintf(buf, buflen, "(%s,%04X,%s)", 1746 lacp_format_systemid(&peer->lip_systemid, sysid, sizeof(sysid)), 1747 ntohs(peer->lip_key), 1748 lacp_format_portid(&peer->lip_portid, portid, sizeof(portid))); 1749 1750 return (buf); 1751 } 1752 1753 const char * 1754 lacp_format_lagid(const struct lacp_peerinfo *a, 1755 const struct lacp_peerinfo *b, char *buf, size_t buflen) 1756 { 1757 char astr[LACP_PARTNERSTR_MAX+1]; 1758 char bstr[LACP_PARTNERSTR_MAX+1]; 1759 1760 #if 0 1761 /* 1762 * there's a convention to display small numbered peer 1763 * in the left. 1764 */ 1765 1766 if (lacp_compare_peerinfo(a, b) > 0) { 1767 const struct lacp_peerinfo *t; 1768 1769 t = a; 1770 a = b; 1771 b = t; 1772 } 1773 #endif 1774 1775 snprintf(buf, buflen, "[%s,%s]", 1776 lacp_format_partner(a, astr, sizeof(astr)), 1777 lacp_format_partner(b, bstr, sizeof(bstr))); 1778 1779 return (buf); 1780 } 1781 1782 const char * 1783 lacp_format_lagid_aggregator(const struct lacp_aggregator *la, 1784 char *buf, size_t buflen) 1785 { 1786 if (la == NULL) { 1787 return ("(none)"); 1788 } 1789 1790 return (lacp_format_lagid(&la->la_actor, &la->la_partner, buf, buflen)); 1791 } 1792 1793 const char * 1794 lacp_format_state(uint8_t state, char *buf, size_t buflen) 1795 { 1796 snprintf(buf, buflen, "%b", state, LACP_STATE_BITS); 1797 return (buf); 1798 } 1799 1800 static void 1801 lacp_dump_lacpdu(const struct lacpdu *du) 1802 { 1803 char buf[LACP_PARTNERSTR_MAX+1]; 1804 char buf2[LACP_STATESTR_MAX+1]; 1805 1806 printf("actor=%s\n", 1807 lacp_format_partner(&du->ldu_actor, buf, sizeof(buf))); 1808 printf("actor.state=%s\n", 1809 lacp_format_state(du->ldu_actor.lip_state, buf2, sizeof(buf2))); 1810 printf("partner=%s\n", 1811 lacp_format_partner(&du->ldu_partner, buf, sizeof(buf))); 1812 printf("partner.state=%s\n", 1813 lacp_format_state(du->ldu_partner.lip_state, buf2, sizeof(buf2))); 1814 1815 printf("maxdelay=%d\n", ntohs(du->ldu_collector.lci_maxdelay)); 1816 } 1817 1818 static void 1819 lacp_dprintf(const struct lacp_port *lp, const char *fmt, ...) 1820 { 1821 va_list va; 1822 1823 if (lp) { 1824 printf("%s: ", lp->lp_ifp->if_xname); 1825 } 1826 1827 va_start(va, fmt); 1828 vprintf(fmt, va); 1829 va_end(va); 1830 } 1831 #endif 1832