1 /* $OpenBSD: if_trunk.c,v 1.30 2007/01/31 06:20:19 reyk Exp $ */ 2 3 /* 4 * Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org> 5 * Copyright (c) 2007 Andrew Thompson <thompsa@FreeBSD.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include <sys/cdefs.h> 21 __FBSDID("$FreeBSD$"); 22 23 #include "opt_inet.h" 24 #include "opt_inet6.h" 25 26 #include <sys/param.h> 27 #include <sys/kernel.h> 28 #include <sys/malloc.h> 29 #include <sys/mbuf.h> 30 #include <sys/queue.h> 31 #include <sys/socket.h> 32 #include <sys/sockio.h> 33 #include <sys/sysctl.h> 34 #include <sys/module.h> 35 #include <sys/priv.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/hash.h> 39 #include <sys/lock.h> 40 #include <sys/rwlock.h> 41 #include <sys/taskqueue.h> 42 #include <sys/eventhandler.h> 43 44 #include <net/ethernet.h> 45 #include <net/if.h> 46 #include <net/if_clone.h> 47 #include <net/if_arp.h> 48 #include <net/if_dl.h> 49 #include <net/if_llc.h> 50 #include <net/if_media.h> 51 #include <net/if_types.h> 52 #include <net/if_var.h> 53 #include <net/bpf.h> 54 55 #if defined(INET) || defined(INET6) 56 #include <netinet/in.h> 57 #endif 58 #ifdef INET 59 #include <netinet/in_systm.h> 60 #include <netinet/if_ether.h> 61 #include <netinet/ip.h> 62 #endif 63 64 #ifdef INET6 65 #include <netinet/ip6.h> 66 #endif 67 68 #include <net/if_vlan_var.h> 69 #include <net/if_lagg.h> 70 #include <net/ieee8023ad_lacp.h> 71 72 /* Special flags we should propagate to the lagg ports. */ 73 static struct { 74 int flag; 75 int (*func)(struct ifnet *, int); 76 } lagg_pflags[] = { 77 {IFF_PROMISC, ifpromisc}, 78 {IFF_ALLMULTI, if_allmulti}, 79 {0, NULL} 80 }; 81 82 SLIST_HEAD(__trhead, lagg_softc) lagg_list; /* list of laggs */ 83 static struct mtx lagg_list_mtx; 84 eventhandler_tag lagg_detach_cookie = NULL; 85 86 static int lagg_clone_create(struct if_clone *, int, caddr_t); 87 static void lagg_clone_destroy(struct ifnet *); 88 static void lagg_lladdr(struct lagg_softc *, uint8_t *); 89 static void lagg_capabilities(struct lagg_softc *); 90 static void lagg_port_lladdr(struct lagg_port *, uint8_t *); 91 static void lagg_port_setlladdr(void *, int); 92 static int lagg_port_create(struct lagg_softc *, struct ifnet *); 93 static int lagg_port_destroy(struct lagg_port *, int); 94 static struct mbuf *lagg_input(struct ifnet *, struct mbuf *); 95 static void lagg_linkstate(struct lagg_softc *); 96 static void lagg_port_state(struct ifnet *, int); 97 static int lagg_port_ioctl(struct ifnet *, u_long, caddr_t); 98 static int lagg_port_output(struct ifnet *, struct mbuf *, 99 struct sockaddr *, struct route *); 100 static void lagg_port_ifdetach(void *arg __unused, struct ifnet *); 101 #ifdef LAGG_PORT_STACKING 102 static int lagg_port_checkstacking(struct lagg_softc *); 103 #endif 104 static void lagg_port2req(struct lagg_port *, struct lagg_reqport *); 105 static void lagg_init(void *); 106 static void lagg_stop(struct lagg_softc *); 107 static int lagg_ioctl(struct ifnet *, u_long, caddr_t); 108 static int lagg_ether_setmulti(struct lagg_softc *); 109 static int lagg_ether_cmdmulti(struct lagg_port *, int); 110 static int lagg_setflag(struct lagg_port *, int, int, 111 int (*func)(struct ifnet *, int)); 112 static int lagg_setflags(struct lagg_port *, int status); 113 static void lagg_start(struct ifnet *); 114 static int lagg_media_change(struct ifnet *); 115 static void lagg_media_status(struct ifnet *, struct ifmediareq *); 116 static struct lagg_port *lagg_link_active(struct lagg_softc *, 117 struct lagg_port *); 118 static const void *lagg_gethdr(struct mbuf *, u_int, u_int, void *); 119 120 IFC_SIMPLE_DECLARE(lagg, 0); 121 122 /* Simple round robin */ 123 static int lagg_rr_attach(struct lagg_softc *); 124 static int lagg_rr_detach(struct lagg_softc *); 125 static int lagg_rr_start(struct lagg_softc *, struct mbuf *); 126 static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *, 127 struct mbuf *); 128 129 /* Active failover */ 130 static int lagg_fail_attach(struct lagg_softc *); 131 static int lagg_fail_detach(struct lagg_softc *); 132 static int lagg_fail_start(struct lagg_softc *, struct mbuf *); 133 static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *, 134 struct mbuf *); 135 136 /* Loadbalancing */ 137 static int lagg_lb_attach(struct lagg_softc *); 138 static int lagg_lb_detach(struct lagg_softc *); 139 static int lagg_lb_port_create(struct lagg_port *); 140 static void lagg_lb_port_destroy(struct lagg_port *); 141 static int lagg_lb_start(struct lagg_softc *, struct mbuf *); 142 static struct mbuf *lagg_lb_input(struct lagg_softc *, struct lagg_port *, 143 struct mbuf *); 144 static int lagg_lb_porttable(struct lagg_softc *, struct lagg_port *); 145 146 /* 802.3ad LACP */ 147 static int lagg_lacp_attach(struct lagg_softc *); 148 static int lagg_lacp_detach(struct lagg_softc *); 149 static int lagg_lacp_start(struct lagg_softc *, struct mbuf *); 150 static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *, 151 struct mbuf *); 152 static void lagg_lacp_lladdr(struct lagg_softc *); 153 154 /* lagg protocol table */ 155 static const struct { 156 int ti_proto; 157 int (*ti_attach)(struct lagg_softc *); 158 } lagg_protos[] = { 159 { LAGG_PROTO_ROUNDROBIN, lagg_rr_attach }, 160 { LAGG_PROTO_FAILOVER, lagg_fail_attach }, 161 { LAGG_PROTO_LOADBALANCE, lagg_lb_attach }, 162 { LAGG_PROTO_ETHERCHANNEL, lagg_lb_attach }, 163 { LAGG_PROTO_LACP, lagg_lacp_attach }, 164 { LAGG_PROTO_NONE, NULL } 165 }; 166 167 SYSCTL_DECL(_net_link); 168 SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0, "Link Aggregation"); 169 170 static int lagg_failover_rx_all = 0; /* Allow input on any failover links */ 171 SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW, 172 &lagg_failover_rx_all, 0, 173 "Accept input from any interface in a failover lagg"); 174 175 static int 176 lagg_modevent(module_t mod, int type, void *data) 177 { 178 179 switch (type) { 180 case MOD_LOAD: 181 mtx_init(&lagg_list_mtx, "if_lagg list", NULL, MTX_DEF); 182 SLIST_INIT(&lagg_list); 183 if_clone_attach(&lagg_cloner); 184 lagg_input_p = lagg_input; 185 lagg_linkstate_p = lagg_port_state; 186 lagg_detach_cookie = EVENTHANDLER_REGISTER( 187 ifnet_departure_event, lagg_port_ifdetach, NULL, 188 EVENTHANDLER_PRI_ANY); 189 break; 190 case MOD_UNLOAD: 191 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 192 lagg_detach_cookie); 193 if_clone_detach(&lagg_cloner); 194 lagg_input_p = NULL; 195 lagg_linkstate_p = NULL; 196 mtx_destroy(&lagg_list_mtx); 197 break; 198 default: 199 return (EOPNOTSUPP); 200 } 201 return (0); 202 } 203 204 static moduledata_t lagg_mod = { 205 "if_lagg", 206 lagg_modevent, 207 0 208 }; 209 210 DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 211 212 #if __FreeBSD_version >= 800000 213 /* 214 * This routine is run via an vlan 215 * config EVENT 216 */ 217 static void 218 lagg_register_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag) 219 { 220 struct lagg_softc *sc = ifp->if_softc; 221 struct lagg_port *lp; 222 223 if (ifp->if_softc != arg) /* Not our event */ 224 return; 225 226 LAGG_RLOCK(sc); 227 if (!SLIST_EMPTY(&sc->sc_ports)) { 228 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 229 EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag); 230 } 231 LAGG_RUNLOCK(sc); 232 } 233 234 /* 235 * This routine is run via an vlan 236 * unconfig EVENT 237 */ 238 static void 239 lagg_unregister_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag) 240 { 241 struct lagg_softc *sc = ifp->if_softc; 242 struct lagg_port *lp; 243 244 if (ifp->if_softc != arg) /* Not our event */ 245 return; 246 247 LAGG_RLOCK(sc); 248 if (!SLIST_EMPTY(&sc->sc_ports)) { 249 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 250 EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag); 251 } 252 LAGG_RUNLOCK(sc); 253 } 254 #endif 255 256 static int 257 lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) 258 { 259 struct lagg_softc *sc; 260 struct ifnet *ifp; 261 int i, error = 0; 262 static const u_char eaddr[6]; /* 00:00:00:00:00:00 */ 263 264 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); 265 ifp = sc->sc_ifp = if_alloc(IFT_ETHER); 266 if (ifp == NULL) { 267 free(sc, M_DEVBUF); 268 return (ENOSPC); 269 } 270 271 sc->sc_proto = LAGG_PROTO_NONE; 272 for (i = 0; lagg_protos[i].ti_proto != LAGG_PROTO_NONE; i++) { 273 if (lagg_protos[i].ti_proto == LAGG_PROTO_DEFAULT) { 274 sc->sc_proto = lagg_protos[i].ti_proto; 275 if ((error = lagg_protos[i].ti_attach(sc)) != 0) { 276 if_free_type(ifp, IFT_ETHER); 277 free(sc, M_DEVBUF); 278 return (error); 279 } 280 break; 281 } 282 } 283 LAGG_LOCK_INIT(sc); 284 SLIST_INIT(&sc->sc_ports); 285 TASK_INIT(&sc->sc_lladdr_task, 0, lagg_port_setlladdr, sc); 286 287 /* Initialise pseudo media types */ 288 ifmedia_init(&sc->sc_media, 0, lagg_media_change, 289 lagg_media_status); 290 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); 291 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); 292 293 if_initname(ifp, ifc->ifc_name, unit); 294 ifp->if_type = IFT_ETHER; 295 ifp->if_softc = sc; 296 ifp->if_start = lagg_start; 297 ifp->if_init = lagg_init; 298 ifp->if_ioctl = lagg_ioctl; 299 ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST; 300 301 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); 302 ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; 303 IFQ_SET_READY(&ifp->if_snd); 304 305 /* 306 * Attach as an ordinary ethernet device, childs will be attached 307 * as special device IFT_IEEE8023ADLAG. 308 */ 309 ether_ifattach(ifp, eaddr); 310 311 #if __FreeBSD_version >= 800000 312 sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, 313 lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST); 314 sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, 315 lagg_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST); 316 #endif 317 318 /* Insert into the global list of laggs */ 319 mtx_lock(&lagg_list_mtx); 320 SLIST_INSERT_HEAD(&lagg_list, sc, sc_entries); 321 mtx_unlock(&lagg_list_mtx); 322 323 return (0); 324 } 325 326 static void 327 lagg_clone_destroy(struct ifnet *ifp) 328 { 329 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; 330 struct lagg_port *lp; 331 332 LAGG_WLOCK(sc); 333 334 lagg_stop(sc); 335 ifp->if_flags &= ~IFF_UP; 336 337 #if __FreeBSD_version >= 800000 338 EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach); 339 EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach); 340 #endif 341 342 /* Shutdown and remove lagg ports */ 343 while ((lp = SLIST_FIRST(&sc->sc_ports)) != NULL) 344 lagg_port_destroy(lp, 1); 345 /* Unhook the aggregation protocol */ 346 if (sc->sc_detach != NULL) 347 (*sc->sc_detach)(sc); 348 349 LAGG_WUNLOCK(sc); 350 351 ifmedia_removeall(&sc->sc_media); 352 ether_ifdetach(ifp); 353 if_free_type(ifp, IFT_ETHER); 354 355 mtx_lock(&lagg_list_mtx); 356 SLIST_REMOVE(&lagg_list, sc, lagg_softc, sc_entries); 357 mtx_unlock(&lagg_list_mtx); 358 359 taskqueue_drain(taskqueue_swi, &sc->sc_lladdr_task); 360 LAGG_LOCK_DESTROY(sc); 361 free(sc, M_DEVBUF); 362 } 363 364 static void 365 lagg_lladdr(struct lagg_softc *sc, uint8_t *lladdr) 366 { 367 struct ifnet *ifp = sc->sc_ifp; 368 369 if (memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0) 370 return; 371 372 bcopy(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN); 373 /* Let the protocol know the MAC has changed */ 374 if (sc->sc_lladdr != NULL) 375 (*sc->sc_lladdr)(sc); 376 EVENTHANDLER_INVOKE(iflladdr_event, ifp); 377 } 378 379 static void 380 lagg_capabilities(struct lagg_softc *sc) 381 { 382 struct lagg_port *lp; 383 int cap = ~0, ena = ~0; 384 u_long hwa = ~0UL; 385 386 LAGG_WLOCK_ASSERT(sc); 387 388 /* Get capabilities from the lagg ports */ 389 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 390 cap &= lp->lp_ifp->if_capabilities; 391 ena &= lp->lp_ifp->if_capenable; 392 hwa &= lp->lp_ifp->if_hwassist; 393 } 394 cap = (cap == ~0 ? 0 : cap); 395 ena = (ena == ~0 ? 0 : ena); 396 hwa = (hwa == ~0 ? 0 : hwa); 397 398 if (sc->sc_ifp->if_capabilities != cap || 399 sc->sc_ifp->if_capenable != ena || 400 sc->sc_ifp->if_hwassist != hwa) { 401 sc->sc_ifp->if_capabilities = cap; 402 sc->sc_ifp->if_capenable = ena; 403 sc->sc_ifp->if_hwassist = hwa; 404 getmicrotime(&sc->sc_ifp->if_lastchange); 405 406 if (sc->sc_ifflags & IFF_DEBUG) 407 if_printf(sc->sc_ifp, 408 "capabilities 0x%08x enabled 0x%08x\n", cap, ena); 409 } 410 } 411 412 static void 413 lagg_port_lladdr(struct lagg_port *lp, uint8_t *lladdr) 414 { 415 struct lagg_softc *sc = lp->lp_softc; 416 struct ifnet *ifp = lp->lp_ifp; 417 struct lagg_llq *llq; 418 int pending = 0; 419 420 LAGG_WLOCK_ASSERT(sc); 421 422 if (lp->lp_detaching || 423 memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0) 424 return; 425 426 /* Check to make sure its not already queued to be changed */ 427 SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) { 428 if (llq->llq_ifp == ifp) { 429 pending = 1; 430 break; 431 } 432 } 433 434 if (!pending) { 435 llq = malloc(sizeof(struct lagg_llq), M_DEVBUF, M_NOWAIT); 436 if (llq == NULL) /* XXX what to do */ 437 return; 438 } 439 440 /* Update the lladdr even if pending, it may have changed */ 441 llq->llq_ifp = ifp; 442 bcopy(lladdr, llq->llq_lladdr, ETHER_ADDR_LEN); 443 444 if (!pending) 445 SLIST_INSERT_HEAD(&sc->sc_llq_head, llq, llq_entries); 446 447 taskqueue_enqueue(taskqueue_swi, &sc->sc_lladdr_task); 448 } 449 450 /* 451 * Set the interface MAC address from a taskqueue to avoid a LOR. 452 */ 453 static void 454 lagg_port_setlladdr(void *arg, int pending) 455 { 456 struct lagg_softc *sc = (struct lagg_softc *)arg; 457 struct lagg_llq *llq, *head; 458 struct ifnet *ifp; 459 int error; 460 461 /* Grab a local reference of the queue and remove it from the softc */ 462 LAGG_WLOCK(sc); 463 head = SLIST_FIRST(&sc->sc_llq_head); 464 SLIST_FIRST(&sc->sc_llq_head) = NULL; 465 LAGG_WUNLOCK(sc); 466 467 /* 468 * Traverse the queue and set the lladdr on each ifp. It is safe to do 469 * unlocked as we have the only reference to it. 470 */ 471 for (llq = head; llq != NULL; llq = head) { 472 ifp = llq->llq_ifp; 473 474 /* Set the link layer address */ 475 error = if_setlladdr(ifp, llq->llq_lladdr, ETHER_ADDR_LEN); 476 if (error) 477 printf("%s: setlladdr failed on %s\n", __func__, 478 ifp->if_xname); 479 480 head = SLIST_NEXT(llq, llq_entries); 481 free(llq, M_DEVBUF); 482 } 483 } 484 485 static int 486 lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) 487 { 488 struct lagg_softc *sc_ptr; 489 struct lagg_port *lp; 490 int error = 0; 491 492 LAGG_WLOCK_ASSERT(sc); 493 494 /* Limit the maximal number of lagg ports */ 495 if (sc->sc_count >= LAGG_MAX_PORTS) 496 return (ENOSPC); 497 498 /* Check if port has already been associated to a lagg */ 499 if (ifp->if_lagg != NULL) 500 return (EBUSY); 501 502 /* XXX Disallow non-ethernet interfaces (this should be any of 802) */ 503 if (ifp->if_type != IFT_ETHER) 504 return (EPROTONOSUPPORT); 505 506 /* Allow the first Ethernet member to define the MTU */ 507 if (SLIST_EMPTY(&sc->sc_ports)) 508 sc->sc_ifp->if_mtu = ifp->if_mtu; 509 else if (sc->sc_ifp->if_mtu != ifp->if_mtu) { 510 if_printf(sc->sc_ifp, "invalid MTU for %s\n", 511 ifp->if_xname); 512 return (EINVAL); 513 } 514 515 if ((lp = malloc(sizeof(struct lagg_port), 516 M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL) 517 return (ENOMEM); 518 519 /* Check if port is a stacked lagg */ 520 mtx_lock(&lagg_list_mtx); 521 SLIST_FOREACH(sc_ptr, &lagg_list, sc_entries) { 522 if (ifp == sc_ptr->sc_ifp) { 523 mtx_unlock(&lagg_list_mtx); 524 free(lp, M_DEVBUF); 525 return (EINVAL); 526 /* XXX disable stacking for the moment, its untested */ 527 #ifdef LAGG_PORT_STACKING 528 lp->lp_flags |= LAGG_PORT_STACK; 529 if (lagg_port_checkstacking(sc_ptr) >= 530 LAGG_MAX_STACKING) { 531 mtx_unlock(&lagg_list_mtx); 532 free(lp, M_DEVBUF); 533 return (E2BIG); 534 } 535 #endif 536 } 537 } 538 mtx_unlock(&lagg_list_mtx); 539 540 /* Change the interface type */ 541 lp->lp_iftype = ifp->if_type; 542 ifp->if_type = IFT_IEEE8023ADLAG; 543 ifp->if_lagg = lp; 544 lp->lp_ioctl = ifp->if_ioctl; 545 ifp->if_ioctl = lagg_port_ioctl; 546 lp->lp_output = ifp->if_output; 547 ifp->if_output = lagg_port_output; 548 549 lp->lp_ifp = ifp; 550 lp->lp_softc = sc; 551 552 /* Save port link layer address */ 553 bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ETHER_ADDR_LEN); 554 555 if (SLIST_EMPTY(&sc->sc_ports)) { 556 sc->sc_primary = lp; 557 lagg_lladdr(sc, IF_LLADDR(ifp)); 558 } else { 559 /* Update link layer address for this port */ 560 lagg_port_lladdr(lp, IF_LLADDR(sc->sc_ifp)); 561 } 562 563 /* Insert into the list of ports */ 564 SLIST_INSERT_HEAD(&sc->sc_ports, lp, lp_entries); 565 sc->sc_count++; 566 567 /* Update lagg capabilities */ 568 lagg_capabilities(sc); 569 lagg_linkstate(sc); 570 571 /* Add multicast addresses and interface flags to this port */ 572 lagg_ether_cmdmulti(lp, 1); 573 lagg_setflags(lp, 1); 574 575 if (sc->sc_port_create != NULL) 576 error = (*sc->sc_port_create)(lp); 577 if (error) { 578 /* remove the port again, without calling sc_port_destroy */ 579 lagg_port_destroy(lp, 0); 580 return (error); 581 } 582 583 return (error); 584 } 585 586 #ifdef LAGG_PORT_STACKING 587 static int 588 lagg_port_checkstacking(struct lagg_softc *sc) 589 { 590 struct lagg_softc *sc_ptr; 591 struct lagg_port *lp; 592 int m = 0; 593 594 LAGG_WLOCK_ASSERT(sc); 595 596 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 597 if (lp->lp_flags & LAGG_PORT_STACK) { 598 sc_ptr = (struct lagg_softc *)lp->lp_ifp->if_softc; 599 m = MAX(m, lagg_port_checkstacking(sc_ptr)); 600 } 601 } 602 603 return (m + 1); 604 } 605 #endif 606 607 static int 608 lagg_port_destroy(struct lagg_port *lp, int runpd) 609 { 610 struct lagg_softc *sc = lp->lp_softc; 611 struct lagg_port *lp_ptr; 612 struct lagg_llq *llq; 613 struct ifnet *ifp = lp->lp_ifp; 614 615 LAGG_WLOCK_ASSERT(sc); 616 617 if (runpd && sc->sc_port_destroy != NULL) 618 (*sc->sc_port_destroy)(lp); 619 620 /* 621 * Remove multicast addresses and interface flags from this port and 622 * reset the MAC address, skip if the interface is being detached. 623 */ 624 if (!lp->lp_detaching) { 625 lagg_ether_cmdmulti(lp, 0); 626 lagg_setflags(lp, 0); 627 lagg_port_lladdr(lp, lp->lp_lladdr); 628 } 629 630 /* Restore interface */ 631 ifp->if_type = lp->lp_iftype; 632 ifp->if_ioctl = lp->lp_ioctl; 633 ifp->if_output = lp->lp_output; 634 ifp->if_lagg = NULL; 635 636 /* Finally, remove the port from the lagg */ 637 SLIST_REMOVE(&sc->sc_ports, lp, lagg_port, lp_entries); 638 sc->sc_count--; 639 640 /* Update the primary interface */ 641 if (lp == sc->sc_primary) { 642 uint8_t lladdr[ETHER_ADDR_LEN]; 643 644 if ((lp_ptr = SLIST_FIRST(&sc->sc_ports)) == NULL) { 645 bzero(&lladdr, ETHER_ADDR_LEN); 646 } else { 647 bcopy(lp_ptr->lp_lladdr, 648 lladdr, ETHER_ADDR_LEN); 649 } 650 lagg_lladdr(sc, lladdr); 651 sc->sc_primary = lp_ptr; 652 653 /* Update link layer address for each port */ 654 SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries) 655 lagg_port_lladdr(lp_ptr, lladdr); 656 } 657 658 /* Remove any pending lladdr changes from the queue */ 659 if (lp->lp_detaching) { 660 SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) { 661 if (llq->llq_ifp == ifp) { 662 SLIST_REMOVE(&sc->sc_llq_head, llq, lagg_llq, 663 llq_entries); 664 free(llq, M_DEVBUF); 665 break; /* Only appears once */ 666 } 667 } 668 } 669 670 if (lp->lp_ifflags) 671 if_printf(ifp, "%s: lp_ifflags unclean\n", __func__); 672 673 free(lp, M_DEVBUF); 674 675 /* Update lagg capabilities */ 676 lagg_capabilities(sc); 677 lagg_linkstate(sc); 678 679 return (0); 680 } 681 682 static int 683 lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 684 { 685 struct lagg_reqport *rp = (struct lagg_reqport *)data; 686 struct lagg_softc *sc; 687 struct lagg_port *lp = NULL; 688 int error = 0; 689 690 /* Should be checked by the caller */ 691 if (ifp->if_type != IFT_IEEE8023ADLAG || 692 (lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL) 693 goto fallback; 694 695 switch (cmd) { 696 case SIOCGLAGGPORT: 697 if (rp->rp_portname[0] == '\0' || 698 ifunit(rp->rp_portname) != ifp) { 699 error = EINVAL; 700 break; 701 } 702 703 LAGG_RLOCK(sc); 704 if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) { 705 error = ENOENT; 706 LAGG_RUNLOCK(sc); 707 break; 708 } 709 710 lagg_port2req(lp, rp); 711 LAGG_RUNLOCK(sc); 712 break; 713 714 case SIOCSIFCAP: 715 if (lp->lp_ioctl == NULL) { 716 error = EINVAL; 717 break; 718 } 719 error = (*lp->lp_ioctl)(ifp, cmd, data); 720 if (error) 721 break; 722 723 /* Update lagg interface capabilities */ 724 LAGG_WLOCK(sc); 725 lagg_capabilities(sc); 726 LAGG_WUNLOCK(sc); 727 break; 728 729 case SIOCSIFMTU: 730 /* Do not allow the MTU to be changed once joined */ 731 error = EINVAL; 732 break; 733 734 default: 735 goto fallback; 736 } 737 738 return (error); 739 740 fallback: 741 if (lp->lp_ioctl != NULL) 742 return ((*lp->lp_ioctl)(ifp, cmd, data)); 743 744 return (EINVAL); 745 } 746 747 static int 748 lagg_port_output(struct ifnet *ifp, struct mbuf *m, 749 struct sockaddr *dst, struct route *ro) 750 { 751 struct lagg_port *lp = ifp->if_lagg; 752 struct ether_header *eh; 753 short type = 0; 754 755 switch (dst->sa_family) { 756 case pseudo_AF_HDRCMPLT: 757 case AF_UNSPEC: 758 eh = (struct ether_header *)dst->sa_data; 759 type = eh->ether_type; 760 break; 761 } 762 763 /* 764 * Only allow ethernet types required to initiate or maintain the link, 765 * aggregated frames take a different path. 766 */ 767 switch (ntohs(type)) { 768 case ETHERTYPE_PAE: /* EAPOL PAE/802.1x */ 769 return ((*lp->lp_output)(ifp, m, dst, ro)); 770 } 771 772 /* drop any other frames */ 773 m_freem(m); 774 return (EBUSY); 775 } 776 777 static void 778 lagg_port_ifdetach(void *arg __unused, struct ifnet *ifp) 779 { 780 struct lagg_port *lp; 781 struct lagg_softc *sc; 782 783 if ((lp = ifp->if_lagg) == NULL) 784 return; 785 786 sc = lp->lp_softc; 787 788 LAGG_WLOCK(sc); 789 lp->lp_detaching = 1; 790 lagg_port_destroy(lp, 1); 791 LAGG_WUNLOCK(sc); 792 } 793 794 static void 795 lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp) 796 { 797 struct lagg_softc *sc = lp->lp_softc; 798 799 strlcpy(rp->rp_ifname, sc->sc_ifname, sizeof(rp->rp_ifname)); 800 strlcpy(rp->rp_portname, lp->lp_ifp->if_xname, sizeof(rp->rp_portname)); 801 rp->rp_prio = lp->lp_prio; 802 rp->rp_flags = lp->lp_flags; 803 if (sc->sc_portreq != NULL) 804 (*sc->sc_portreq)(lp, (caddr_t)&rp->rp_psc); 805 806 /* Add protocol specific flags */ 807 switch (sc->sc_proto) { 808 case LAGG_PROTO_FAILOVER: 809 if (lp == sc->sc_primary) 810 rp->rp_flags |= LAGG_PORT_MASTER; 811 if (lp == lagg_link_active(sc, sc->sc_primary)) 812 rp->rp_flags |= LAGG_PORT_ACTIVE; 813 break; 814 815 case LAGG_PROTO_ROUNDROBIN: 816 case LAGG_PROTO_LOADBALANCE: 817 case LAGG_PROTO_ETHERCHANNEL: 818 if (LAGG_PORTACTIVE(lp)) 819 rp->rp_flags |= LAGG_PORT_ACTIVE; 820 break; 821 822 case LAGG_PROTO_LACP: 823 /* LACP has a different definition of active */ 824 if (lacp_isactive(lp)) 825 rp->rp_flags |= LAGG_PORT_ACTIVE; 826 if (lacp_iscollecting(lp)) 827 rp->rp_flags |= LAGG_PORT_COLLECTING; 828 if (lacp_isdistributing(lp)) 829 rp->rp_flags |= LAGG_PORT_DISTRIBUTING; 830 break; 831 } 832 833 } 834 835 static void 836 lagg_init(void *xsc) 837 { 838 struct lagg_softc *sc = (struct lagg_softc *)xsc; 839 struct lagg_port *lp; 840 struct ifnet *ifp = sc->sc_ifp; 841 842 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 843 return; 844 845 LAGG_WLOCK(sc); 846 847 ifp->if_drv_flags |= IFF_DRV_RUNNING; 848 /* Update the port lladdrs */ 849 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 850 lagg_port_lladdr(lp, IF_LLADDR(ifp)); 851 852 if (sc->sc_init != NULL) 853 (*sc->sc_init)(sc); 854 855 LAGG_WUNLOCK(sc); 856 } 857 858 static void 859 lagg_stop(struct lagg_softc *sc) 860 { 861 struct ifnet *ifp = sc->sc_ifp; 862 863 LAGG_WLOCK_ASSERT(sc); 864 865 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 866 return; 867 868 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 869 870 if (sc->sc_stop != NULL) 871 (*sc->sc_stop)(sc); 872 } 873 874 static int 875 lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 876 { 877 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; 878 struct lagg_reqall *ra = (struct lagg_reqall *)data; 879 struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf; 880 struct ifreq *ifr = (struct ifreq *)data; 881 struct lagg_port *lp; 882 struct ifnet *tpif; 883 struct thread *td = curthread; 884 char *buf, *outbuf; 885 int count, buflen, len, error = 0; 886 887 bzero(&rpbuf, sizeof(rpbuf)); 888 889 switch (cmd) { 890 case SIOCGLAGG: 891 LAGG_RLOCK(sc); 892 count = 0; 893 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 894 count++; 895 buflen = count * sizeof(struct lagg_reqport); 896 LAGG_RUNLOCK(sc); 897 898 outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO); 899 900 LAGG_RLOCK(sc); 901 ra->ra_proto = sc->sc_proto; 902 if (sc->sc_req != NULL) 903 (*sc->sc_req)(sc, (caddr_t)&ra->ra_psc); 904 905 count = 0; 906 buf = outbuf; 907 len = min(ra->ra_size, buflen); 908 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 909 if (len < sizeof(rpbuf)) 910 break; 911 912 lagg_port2req(lp, &rpbuf); 913 memcpy(buf, &rpbuf, sizeof(rpbuf)); 914 count++; 915 buf += sizeof(rpbuf); 916 len -= sizeof(rpbuf); 917 } 918 LAGG_RUNLOCK(sc); 919 ra->ra_ports = count; 920 ra->ra_size = count * sizeof(rpbuf); 921 error = copyout(outbuf, ra->ra_port, ra->ra_size); 922 free(outbuf, M_TEMP); 923 break; 924 case SIOCSLAGG: 925 error = priv_check(td, PRIV_NET_LAGG); 926 if (error) 927 break; 928 if (ra->ra_proto >= LAGG_PROTO_MAX) { 929 error = EPROTONOSUPPORT; 930 break; 931 } 932 if (sc->sc_proto != LAGG_PROTO_NONE) { 933 LAGG_WLOCK(sc); 934 error = sc->sc_detach(sc); 935 /* Reset protocol and pointers */ 936 sc->sc_proto = LAGG_PROTO_NONE; 937 sc->sc_detach = NULL; 938 sc->sc_start = NULL; 939 sc->sc_input = NULL; 940 sc->sc_port_create = NULL; 941 sc->sc_port_destroy = NULL; 942 sc->sc_linkstate = NULL; 943 sc->sc_init = NULL; 944 sc->sc_stop = NULL; 945 sc->sc_lladdr = NULL; 946 sc->sc_req = NULL; 947 sc->sc_portreq = NULL; 948 LAGG_WUNLOCK(sc); 949 } 950 if (error != 0) 951 break; 952 for (int i = 0; i < (sizeof(lagg_protos) / 953 sizeof(lagg_protos[0])); i++) { 954 if (lagg_protos[i].ti_proto == ra->ra_proto) { 955 if (sc->sc_ifflags & IFF_DEBUG) 956 printf("%s: using proto %u\n", 957 sc->sc_ifname, 958 lagg_protos[i].ti_proto); 959 LAGG_WLOCK(sc); 960 sc->sc_proto = lagg_protos[i].ti_proto; 961 if (sc->sc_proto != LAGG_PROTO_NONE) 962 error = lagg_protos[i].ti_attach(sc); 963 LAGG_WUNLOCK(sc); 964 return (error); 965 } 966 } 967 error = EPROTONOSUPPORT; 968 break; 969 case SIOCGLAGGPORT: 970 if (rp->rp_portname[0] == '\0' || 971 (tpif = ifunit(rp->rp_portname)) == NULL) { 972 error = EINVAL; 973 break; 974 } 975 976 LAGG_RLOCK(sc); 977 if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL || 978 lp->lp_softc != sc) { 979 error = ENOENT; 980 LAGG_RUNLOCK(sc); 981 break; 982 } 983 984 lagg_port2req(lp, rp); 985 LAGG_RUNLOCK(sc); 986 break; 987 case SIOCSLAGGPORT: 988 error = priv_check(td, PRIV_NET_LAGG); 989 if (error) 990 break; 991 if (rp->rp_portname[0] == '\0' || 992 (tpif = ifunit(rp->rp_portname)) == NULL) { 993 error = EINVAL; 994 break; 995 } 996 LAGG_WLOCK(sc); 997 error = lagg_port_create(sc, tpif); 998 LAGG_WUNLOCK(sc); 999 break; 1000 case SIOCSLAGGDELPORT: 1001 error = priv_check(td, PRIV_NET_LAGG); 1002 if (error) 1003 break; 1004 if (rp->rp_portname[0] == '\0' || 1005 (tpif = ifunit(rp->rp_portname)) == NULL) { 1006 error = EINVAL; 1007 break; 1008 } 1009 1010 LAGG_WLOCK(sc); 1011 if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL || 1012 lp->lp_softc != sc) { 1013 error = ENOENT; 1014 LAGG_WUNLOCK(sc); 1015 break; 1016 } 1017 1018 error = lagg_port_destroy(lp, 1); 1019 LAGG_WUNLOCK(sc); 1020 break; 1021 case SIOCSIFFLAGS: 1022 /* Set flags on ports too */ 1023 LAGG_WLOCK(sc); 1024 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 1025 lagg_setflags(lp, 1); 1026 } 1027 LAGG_WUNLOCK(sc); 1028 1029 if (!(ifp->if_flags & IFF_UP) && 1030 (ifp->if_drv_flags & IFF_DRV_RUNNING)) { 1031 /* 1032 * If interface is marked down and it is running, 1033 * then stop and disable it. 1034 */ 1035 LAGG_WLOCK(sc); 1036 lagg_stop(sc); 1037 LAGG_WUNLOCK(sc); 1038 } else if ((ifp->if_flags & IFF_UP) && 1039 !(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 1040 /* 1041 * If interface is marked up and it is stopped, then 1042 * start it. 1043 */ 1044 (*ifp->if_init)(sc); 1045 } 1046 break; 1047 case SIOCADDMULTI: 1048 case SIOCDELMULTI: 1049 LAGG_WLOCK(sc); 1050 error = lagg_ether_setmulti(sc); 1051 LAGG_WUNLOCK(sc); 1052 break; 1053 case SIOCSIFMEDIA: 1054 case SIOCGIFMEDIA: 1055 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); 1056 break; 1057 1058 case SIOCSIFCAP: 1059 case SIOCSIFMTU: 1060 /* Do not allow the MTU or caps to be directly changed */ 1061 error = EINVAL; 1062 break; 1063 1064 default: 1065 error = ether_ioctl(ifp, cmd, data); 1066 break; 1067 } 1068 return (error); 1069 } 1070 1071 static int 1072 lagg_ether_setmulti(struct lagg_softc *sc) 1073 { 1074 struct lagg_port *lp; 1075 1076 LAGG_WLOCK_ASSERT(sc); 1077 1078 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 1079 /* First, remove any existing filter entries. */ 1080 lagg_ether_cmdmulti(lp, 0); 1081 /* copy all addresses from the lagg interface to the port */ 1082 lagg_ether_cmdmulti(lp, 1); 1083 } 1084 return (0); 1085 } 1086 1087 static int 1088 lagg_ether_cmdmulti(struct lagg_port *lp, int set) 1089 { 1090 struct lagg_softc *sc = lp->lp_softc; 1091 struct ifnet *ifp = lp->lp_ifp; 1092 struct ifnet *scifp = sc->sc_ifp; 1093 struct lagg_mc *mc; 1094 struct ifmultiaddr *ifma, *rifma = NULL; 1095 struct sockaddr_dl sdl; 1096 int error; 1097 1098 LAGG_WLOCK_ASSERT(sc); 1099 1100 bzero((char *)&sdl, sizeof(sdl)); 1101 sdl.sdl_len = sizeof(sdl); 1102 sdl.sdl_family = AF_LINK; 1103 sdl.sdl_type = IFT_ETHER; 1104 sdl.sdl_alen = ETHER_ADDR_LEN; 1105 sdl.sdl_index = ifp->if_index; 1106 1107 if (set) { 1108 TAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) { 1109 if (ifma->ifma_addr->sa_family != AF_LINK) 1110 continue; 1111 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1112 LLADDR(&sdl), ETHER_ADDR_LEN); 1113 1114 error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma); 1115 if (error) 1116 return (error); 1117 mc = malloc(sizeof(struct lagg_mc), M_DEVBUF, M_NOWAIT); 1118 if (mc == NULL) 1119 return (ENOMEM); 1120 mc->mc_ifma = rifma; 1121 SLIST_INSERT_HEAD(&lp->lp_mc_head, mc, mc_entries); 1122 } 1123 } else { 1124 while ((mc = SLIST_FIRST(&lp->lp_mc_head)) != NULL) { 1125 SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries); 1126 if_delmulti_ifma(mc->mc_ifma); 1127 free(mc, M_DEVBUF); 1128 } 1129 } 1130 return (0); 1131 } 1132 1133 /* Handle a ref counted flag that should be set on the lagg port as well */ 1134 static int 1135 lagg_setflag(struct lagg_port *lp, int flag, int status, 1136 int (*func)(struct ifnet *, int)) 1137 { 1138 struct lagg_softc *sc = lp->lp_softc; 1139 struct ifnet *scifp = sc->sc_ifp; 1140 struct ifnet *ifp = lp->lp_ifp; 1141 int error; 1142 1143 LAGG_WLOCK_ASSERT(sc); 1144 1145 status = status ? (scifp->if_flags & flag) : 0; 1146 /* Now "status" contains the flag value or 0 */ 1147 1148 /* 1149 * See if recorded ports status is different from what 1150 * we want it to be. If it is, flip it. We record ports 1151 * status in lp_ifflags so that we won't clear ports flag 1152 * we haven't set. In fact, we don't clear or set ports 1153 * flags directly, but get or release references to them. 1154 * That's why we can be sure that recorded flags still are 1155 * in accord with actual ports flags. 1156 */ 1157 if (status != (lp->lp_ifflags & flag)) { 1158 error = (*func)(ifp, status); 1159 if (error) 1160 return (error); 1161 lp->lp_ifflags &= ~flag; 1162 lp->lp_ifflags |= status; 1163 } 1164 return (0); 1165 } 1166 1167 /* 1168 * Handle IFF_* flags that require certain changes on the lagg port 1169 * if "status" is true, update ports flags respective to the lagg 1170 * if "status" is false, forcedly clear the flags set on port. 1171 */ 1172 static int 1173 lagg_setflags(struct lagg_port *lp, int status) 1174 { 1175 int error, i; 1176 1177 for (i = 0; lagg_pflags[i].flag; i++) { 1178 error = lagg_setflag(lp, lagg_pflags[i].flag, 1179 status, lagg_pflags[i].func); 1180 if (error) 1181 return (error); 1182 } 1183 return (0); 1184 } 1185 1186 static void 1187 lagg_start(struct ifnet *ifp) 1188 { 1189 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; 1190 struct mbuf *m; 1191 int error = 0; 1192 1193 LAGG_RLOCK(sc); 1194 /* We need a Tx algorithm and at least one port */ 1195 if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) { 1196 IF_DRAIN(&ifp->if_snd); 1197 LAGG_RUNLOCK(sc); 1198 return; 1199 } 1200 1201 for (;; error = 0) { 1202 IFQ_DEQUEUE(&ifp->if_snd, m); 1203 if (m == NULL) 1204 break; 1205 1206 ETHER_BPF_MTAP(ifp, m); 1207 1208 error = (*sc->sc_start)(sc, m); 1209 if (error == 0) 1210 ifp->if_opackets++; 1211 else 1212 ifp->if_oerrors++; 1213 } 1214 LAGG_RUNLOCK(sc); 1215 } 1216 1217 static struct mbuf * 1218 lagg_input(struct ifnet *ifp, struct mbuf *m) 1219 { 1220 struct lagg_port *lp = ifp->if_lagg; 1221 struct lagg_softc *sc = lp->lp_softc; 1222 struct ifnet *scifp = sc->sc_ifp; 1223 1224 if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 1225 (lp->lp_flags & LAGG_PORT_DISABLED) || 1226 sc->sc_proto == LAGG_PROTO_NONE) { 1227 m_freem(m); 1228 return (NULL); 1229 } 1230 1231 LAGG_RLOCK(sc); 1232 ETHER_BPF_MTAP(scifp, m); 1233 1234 m = (*sc->sc_input)(sc, lp, m); 1235 1236 if (m != NULL) { 1237 scifp->if_ipackets++; 1238 scifp->if_ibytes += m->m_pkthdr.len; 1239 1240 if (scifp->if_flags & IFF_MONITOR) { 1241 m_freem(m); 1242 m = NULL; 1243 } 1244 } 1245 1246 LAGG_RUNLOCK(sc); 1247 return (m); 1248 } 1249 1250 static int 1251 lagg_media_change(struct ifnet *ifp) 1252 { 1253 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; 1254 1255 if (sc->sc_ifflags & IFF_DEBUG) 1256 printf("%s\n", __func__); 1257 1258 /* Ignore */ 1259 return (0); 1260 } 1261 1262 static void 1263 lagg_media_status(struct ifnet *ifp, struct ifmediareq *imr) 1264 { 1265 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; 1266 struct lagg_port *lp; 1267 1268 imr->ifm_status = IFM_AVALID; 1269 imr->ifm_active = IFM_ETHER | IFM_AUTO; 1270 1271 LAGG_RLOCK(sc); 1272 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 1273 if (LAGG_PORTACTIVE(lp)) 1274 imr->ifm_status |= IFM_ACTIVE; 1275 } 1276 LAGG_RUNLOCK(sc); 1277 } 1278 1279 static void 1280 lagg_linkstate(struct lagg_softc *sc) 1281 { 1282 struct lagg_port *lp; 1283 int new_link = LINK_STATE_DOWN; 1284 uint64_t speed; 1285 1286 /* Our link is considered up if at least one of our ports is active */ 1287 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 1288 if (lp->lp_link_state == LINK_STATE_UP) { 1289 new_link = LINK_STATE_UP; 1290 break; 1291 } 1292 } 1293 if_link_state_change(sc->sc_ifp, new_link); 1294 1295 /* Update if_baudrate to reflect the max possible speed */ 1296 switch (sc->sc_proto) { 1297 case LAGG_PROTO_FAILOVER: 1298 sc->sc_ifp->if_baudrate = sc->sc_primary != NULL ? 1299 sc->sc_primary->lp_ifp->if_baudrate : 0; 1300 break; 1301 case LAGG_PROTO_ROUNDROBIN: 1302 case LAGG_PROTO_LOADBALANCE: 1303 case LAGG_PROTO_ETHERCHANNEL: 1304 speed = 0; 1305 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 1306 speed += lp->lp_ifp->if_baudrate; 1307 sc->sc_ifp->if_baudrate = speed; 1308 break; 1309 case LAGG_PROTO_LACP: 1310 /* LACP updates if_baudrate itself */ 1311 break; 1312 } 1313 } 1314 1315 static void 1316 lagg_port_state(struct ifnet *ifp, int state) 1317 { 1318 struct lagg_port *lp = (struct lagg_port *)ifp->if_lagg; 1319 struct lagg_softc *sc = NULL; 1320 1321 if (lp != NULL) 1322 sc = lp->lp_softc; 1323 if (sc == NULL) 1324 return; 1325 1326 LAGG_WLOCK(sc); 1327 lagg_linkstate(sc); 1328 if (sc->sc_linkstate != NULL) 1329 (*sc->sc_linkstate)(lp); 1330 LAGG_WUNLOCK(sc); 1331 } 1332 1333 struct lagg_port * 1334 lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp) 1335 { 1336 struct lagg_port *lp_next, *rval = NULL; 1337 // int new_link = LINK_STATE_DOWN; 1338 1339 LAGG_RLOCK_ASSERT(sc); 1340 /* 1341 * Search a port which reports an active link state. 1342 */ 1343 1344 if (lp == NULL) 1345 goto search; 1346 if (LAGG_PORTACTIVE(lp)) { 1347 rval = lp; 1348 goto found; 1349 } 1350 if ((lp_next = SLIST_NEXT(lp, lp_entries)) != NULL && 1351 LAGG_PORTACTIVE(lp_next)) { 1352 rval = lp_next; 1353 goto found; 1354 } 1355 1356 search: 1357 SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) { 1358 if (LAGG_PORTACTIVE(lp_next)) { 1359 rval = lp_next; 1360 goto found; 1361 } 1362 } 1363 1364 found: 1365 if (rval != NULL) { 1366 /* 1367 * The IEEE 802.1D standard assumes that a lagg with 1368 * multiple ports is always full duplex. This is valid 1369 * for load sharing laggs and if at least two links 1370 * are active. Unfortunately, checking the latter would 1371 * be too expensive at this point. 1372 XXX 1373 if ((sc->sc_capabilities & IFCAP_LAGG_FULLDUPLEX) && 1374 (sc->sc_count > 1)) 1375 new_link = LINK_STATE_FULL_DUPLEX; 1376 else 1377 new_link = rval->lp_link_state; 1378 */ 1379 } 1380 1381 return (rval); 1382 } 1383 1384 static const void * 1385 lagg_gethdr(struct mbuf *m, u_int off, u_int len, void *buf) 1386 { 1387 if (m->m_pkthdr.len < (off + len)) { 1388 return (NULL); 1389 } else if (m->m_len < (off + len)) { 1390 m_copydata(m, off, len, buf); 1391 return (buf); 1392 } 1393 return (mtod(m, char *) + off); 1394 } 1395 1396 uint32_t 1397 lagg_hashmbuf(struct mbuf *m, uint32_t key) 1398 { 1399 uint16_t etype; 1400 uint32_t p = 0; 1401 int off; 1402 struct ether_header *eh; 1403 struct ether_vlan_header vlanbuf; 1404 const struct ether_vlan_header *vlan; 1405 #ifdef INET 1406 const struct ip *ip; 1407 struct ip ipbuf; 1408 #endif 1409 #ifdef INET6 1410 const struct ip6_hdr *ip6; 1411 struct ip6_hdr ip6buf; 1412 uint32_t flow; 1413 #endif 1414 1415 off = sizeof(*eh); 1416 if (m->m_len < off) 1417 goto out; 1418 eh = mtod(m, struct ether_header *); 1419 etype = ntohs(eh->ether_type); 1420 p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, key); 1421 p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p); 1422 1423 /* Special handling for encapsulating VLAN frames */ 1424 if (m->m_flags & M_VLANTAG) { 1425 p = hash32_buf(&m->m_pkthdr.ether_vtag, 1426 sizeof(m->m_pkthdr.ether_vtag), p); 1427 } else if (etype == ETHERTYPE_VLAN) { 1428 vlan = lagg_gethdr(m, off, sizeof(*vlan), &vlanbuf); 1429 if (vlan == NULL) 1430 goto out; 1431 1432 p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p); 1433 etype = ntohs(vlan->evl_proto); 1434 off += sizeof(*vlan) - sizeof(*eh); 1435 } 1436 1437 switch (etype) { 1438 #ifdef INET 1439 case ETHERTYPE_IP: 1440 ip = lagg_gethdr(m, off, sizeof(*ip), &ipbuf); 1441 if (ip == NULL) 1442 goto out; 1443 1444 p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p); 1445 p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p); 1446 break; 1447 #endif 1448 #ifdef INET6 1449 case ETHERTYPE_IPV6: 1450 ip6 = lagg_gethdr(m, off, sizeof(*ip6), &ip6buf); 1451 if (ip6 == NULL) 1452 goto out; 1453 1454 p = hash32_buf(&ip6->ip6_src, sizeof(struct in6_addr), p); 1455 p = hash32_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p); 1456 flow = ip6->ip6_flow & IPV6_FLOWLABEL_MASK; 1457 p = hash32_buf(&flow, sizeof(flow), p); /* IPv6 flow label */ 1458 break; 1459 #endif 1460 } 1461 out: 1462 return (p); 1463 } 1464 1465 int 1466 lagg_enqueue(struct ifnet *ifp, struct mbuf *m) 1467 { 1468 1469 return (ifp->if_transmit)(ifp, m); 1470 } 1471 1472 /* 1473 * Simple round robin aggregation 1474 */ 1475 1476 static int 1477 lagg_rr_attach(struct lagg_softc *sc) 1478 { 1479 sc->sc_detach = lagg_rr_detach; 1480 sc->sc_start = lagg_rr_start; 1481 sc->sc_input = lagg_rr_input; 1482 sc->sc_port_create = NULL; 1483 sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX; 1484 sc->sc_seq = 0; 1485 1486 return (0); 1487 } 1488 1489 static int 1490 lagg_rr_detach(struct lagg_softc *sc) 1491 { 1492 return (0); 1493 } 1494 1495 static int 1496 lagg_rr_start(struct lagg_softc *sc, struct mbuf *m) 1497 { 1498 struct lagg_port *lp; 1499 uint32_t p; 1500 1501 p = atomic_fetchadd_32(&sc->sc_seq, 1); 1502 p %= sc->sc_count; 1503 lp = SLIST_FIRST(&sc->sc_ports); 1504 while (p--) 1505 lp = SLIST_NEXT(lp, lp_entries); 1506 1507 /* 1508 * Check the port's link state. This will return the next active 1509 * port if the link is down or the port is NULL. 1510 */ 1511 if ((lp = lagg_link_active(sc, lp)) == NULL) { 1512 m_freem(m); 1513 return (ENOENT); 1514 } 1515 1516 /* Send mbuf */ 1517 return (lagg_enqueue(lp->lp_ifp, m)); 1518 } 1519 1520 static struct mbuf * 1521 lagg_rr_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) 1522 { 1523 struct ifnet *ifp = sc->sc_ifp; 1524 1525 /* Just pass in the packet to our lagg device */ 1526 m->m_pkthdr.rcvif = ifp; 1527 1528 return (m); 1529 } 1530 1531 /* 1532 * Active failover 1533 */ 1534 1535 static int 1536 lagg_fail_attach(struct lagg_softc *sc) 1537 { 1538 sc->sc_detach = lagg_fail_detach; 1539 sc->sc_start = lagg_fail_start; 1540 sc->sc_input = lagg_fail_input; 1541 sc->sc_port_create = NULL; 1542 sc->sc_port_destroy = NULL; 1543 1544 return (0); 1545 } 1546 1547 static int 1548 lagg_fail_detach(struct lagg_softc *sc) 1549 { 1550 return (0); 1551 } 1552 1553 static int 1554 lagg_fail_start(struct lagg_softc *sc, struct mbuf *m) 1555 { 1556 struct lagg_port *lp; 1557 1558 /* Use the master port if active or the next available port */ 1559 if ((lp = lagg_link_active(sc, sc->sc_primary)) == NULL) { 1560 m_freem(m); 1561 return (ENOENT); 1562 } 1563 1564 /* Send mbuf */ 1565 return (lagg_enqueue(lp->lp_ifp, m)); 1566 } 1567 1568 static struct mbuf * 1569 lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) 1570 { 1571 struct ifnet *ifp = sc->sc_ifp; 1572 struct lagg_port *tmp_tp; 1573 1574 if (lp == sc->sc_primary || lagg_failover_rx_all) { 1575 m->m_pkthdr.rcvif = ifp; 1576 return (m); 1577 } 1578 1579 if (!LAGG_PORTACTIVE(sc->sc_primary)) { 1580 tmp_tp = lagg_link_active(sc, sc->sc_primary); 1581 /* 1582 * If tmp_tp is null, we've recieved a packet when all 1583 * our links are down. Weird, but process it anyways. 1584 */ 1585 if ((tmp_tp == NULL || tmp_tp == lp)) { 1586 m->m_pkthdr.rcvif = ifp; 1587 return (m); 1588 } 1589 } 1590 1591 m_freem(m); 1592 return (NULL); 1593 } 1594 1595 /* 1596 * Loadbalancing 1597 */ 1598 1599 static int 1600 lagg_lb_attach(struct lagg_softc *sc) 1601 { 1602 struct lagg_port *lp; 1603 struct lagg_lb *lb; 1604 1605 if ((lb = (struct lagg_lb *)malloc(sizeof(struct lagg_lb), 1606 M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL) 1607 return (ENOMEM); 1608 1609 sc->sc_detach = lagg_lb_detach; 1610 sc->sc_start = lagg_lb_start; 1611 sc->sc_input = lagg_lb_input; 1612 sc->sc_port_create = lagg_lb_port_create; 1613 sc->sc_port_destroy = lagg_lb_port_destroy; 1614 sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX; 1615 1616 lb->lb_key = arc4random(); 1617 sc->sc_psc = (caddr_t)lb; 1618 1619 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 1620 lagg_lb_port_create(lp); 1621 1622 return (0); 1623 } 1624 1625 static int 1626 lagg_lb_detach(struct lagg_softc *sc) 1627 { 1628 struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc; 1629 if (lb != NULL) 1630 free(lb, M_DEVBUF); 1631 return (0); 1632 } 1633 1634 static int 1635 lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp) 1636 { 1637 struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc; 1638 struct lagg_port *lp_next; 1639 int i = 0; 1640 1641 bzero(&lb->lb_ports, sizeof(lb->lb_ports)); 1642 SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) { 1643 if (lp_next == lp) 1644 continue; 1645 if (i >= LAGG_MAX_PORTS) 1646 return (EINVAL); 1647 if (sc->sc_ifflags & IFF_DEBUG) 1648 printf("%s: port %s at index %d\n", 1649 sc->sc_ifname, lp_next->lp_ifname, i); 1650 lb->lb_ports[i++] = lp_next; 1651 } 1652 1653 return (0); 1654 } 1655 1656 static int 1657 lagg_lb_port_create(struct lagg_port *lp) 1658 { 1659 struct lagg_softc *sc = lp->lp_softc; 1660 return (lagg_lb_porttable(sc, NULL)); 1661 } 1662 1663 static void 1664 lagg_lb_port_destroy(struct lagg_port *lp) 1665 { 1666 struct lagg_softc *sc = lp->lp_softc; 1667 lagg_lb_porttable(sc, lp); 1668 } 1669 1670 static int 1671 lagg_lb_start(struct lagg_softc *sc, struct mbuf *m) 1672 { 1673 struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc; 1674 struct lagg_port *lp = NULL; 1675 uint32_t p = 0; 1676 1677 if (m->m_flags & M_FLOWID) 1678 p = m->m_pkthdr.flowid; 1679 else 1680 p = lagg_hashmbuf(m, lb->lb_key); 1681 p %= sc->sc_count; 1682 lp = lb->lb_ports[p]; 1683 1684 /* 1685 * Check the port's link state. This will return the next active 1686 * port if the link is down or the port is NULL. 1687 */ 1688 if ((lp = lagg_link_active(sc, lp)) == NULL) { 1689 m_freem(m); 1690 return (ENOENT); 1691 } 1692 1693 /* Send mbuf */ 1694 return (lagg_enqueue(lp->lp_ifp, m)); 1695 } 1696 1697 static struct mbuf * 1698 lagg_lb_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) 1699 { 1700 struct ifnet *ifp = sc->sc_ifp; 1701 1702 /* Just pass in the packet to our lagg device */ 1703 m->m_pkthdr.rcvif = ifp; 1704 1705 return (m); 1706 } 1707 1708 /* 1709 * 802.3ad LACP 1710 */ 1711 1712 static int 1713 lagg_lacp_attach(struct lagg_softc *sc) 1714 { 1715 struct lagg_port *lp; 1716 int error; 1717 1718 sc->sc_detach = lagg_lacp_detach; 1719 sc->sc_port_create = lacp_port_create; 1720 sc->sc_port_destroy = lacp_port_destroy; 1721 sc->sc_linkstate = lacp_linkstate; 1722 sc->sc_start = lagg_lacp_start; 1723 sc->sc_input = lagg_lacp_input; 1724 sc->sc_init = lacp_init; 1725 sc->sc_stop = lacp_stop; 1726 sc->sc_lladdr = lagg_lacp_lladdr; 1727 sc->sc_req = lacp_req; 1728 sc->sc_portreq = lacp_portreq; 1729 1730 error = lacp_attach(sc); 1731 if (error) 1732 return (error); 1733 1734 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 1735 lacp_port_create(lp); 1736 1737 return (error); 1738 } 1739 1740 static int 1741 lagg_lacp_detach(struct lagg_softc *sc) 1742 { 1743 struct lagg_port *lp; 1744 int error; 1745 1746 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 1747 lacp_port_destroy(lp); 1748 1749 /* unlocking is safe here */ 1750 LAGG_WUNLOCK(sc); 1751 error = lacp_detach(sc); 1752 LAGG_WLOCK(sc); 1753 1754 return (error); 1755 } 1756 1757 static void 1758 lagg_lacp_lladdr(struct lagg_softc *sc) 1759 { 1760 struct lagg_port *lp; 1761 1762 /* purge all the lacp ports */ 1763 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 1764 lacp_port_destroy(lp); 1765 1766 /* add them back in */ 1767 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 1768 lacp_port_create(lp); 1769 } 1770 1771 static int 1772 lagg_lacp_start(struct lagg_softc *sc, struct mbuf *m) 1773 { 1774 struct lagg_port *lp; 1775 1776 lp = lacp_select_tx_port(sc, m); 1777 if (lp == NULL) { 1778 m_freem(m); 1779 return (EBUSY); 1780 } 1781 1782 /* Send mbuf */ 1783 return (lagg_enqueue(lp->lp_ifp, m)); 1784 } 1785 1786 static struct mbuf * 1787 lagg_lacp_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) 1788 { 1789 struct ifnet *ifp = sc->sc_ifp; 1790 struct ether_header *eh; 1791 u_short etype; 1792 1793 eh = mtod(m, struct ether_header *); 1794 etype = ntohs(eh->ether_type); 1795 1796 /* Tap off LACP control messages */ 1797 if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_SLOW) { 1798 m = lacp_input(lp, m); 1799 if (m == NULL) 1800 return (NULL); 1801 } 1802 1803 /* 1804 * If the port is not collecting or not in the active aggregator then 1805 * free and return. 1806 */ 1807 if (lacp_iscollecting(lp) == 0 || lacp_isactive(lp) == 0) { 1808 m_freem(m); 1809 return (NULL); 1810 } 1811 1812 m->m_pkthdr.rcvif = ifp; 1813 return (m); 1814 } 1815