1 /* $OpenBSD: if_trunk.c,v 1.30 2007/01/31 06:20:19 reyk Exp $ */ 2 3 /* 4 * Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/cdefs.h> 20 __FBSDID("$FreeBSD$"); 21 22 #include "opt_inet.h" 23 #include "opt_inet6.h" 24 25 #include <sys/param.h> 26 #include <sys/kernel.h> 27 #include <sys/malloc.h> 28 #include <sys/mbuf.h> 29 #include <sys/queue.h> 30 #include <sys/socket.h> 31 #include <sys/sockio.h> 32 #include <sys/sysctl.h> 33 #include <sys/module.h> 34 #include <sys/priv.h> 35 #include <sys/systm.h> 36 #include <sys/proc.h> 37 #include <sys/hash.h> 38 #include <sys/lock.h> 39 #include <sys/rwlock.h> 40 #include <sys/taskqueue.h> 41 42 #include <net/ethernet.h> 43 #include <net/if.h> 44 #include <net/if_clone.h> 45 #include <net/if_arp.h> 46 #include <net/if_dl.h> 47 #include <net/if_llc.h> 48 #include <net/if_media.h> 49 #include <net/if_types.h> 50 #include <net/if_var.h> 51 #include <net/bpf.h> 52 53 #ifdef INET 54 #include <netinet/in.h> 55 #include <netinet/in_systm.h> 56 #include <netinet/if_ether.h> 57 #include <netinet/ip.h> 58 #endif 59 60 #ifdef INET6 61 #include <netinet/ip6.h> 62 #endif 63 64 #include <net/if_vlan_var.h> 65 #include <net/if_lagg.h> 66 #include <net/ieee8023ad_lacp.h> 67 68 /* Special flags we should propagate to the lagg ports. */ 69 static struct { 70 int flag; 71 int (*func)(struct ifnet *, int); 72 } lagg_pflags[] = { 73 {IFF_PROMISC, ifpromisc}, 74 {IFF_ALLMULTI, if_allmulti}, 75 {0, NULL} 76 }; 77 78 SLIST_HEAD(__trhead, lagg_softc) lagg_list; /* list of laggs */ 79 static struct mtx lagg_list_mtx; 80 eventhandler_tag lagg_detach_cookie = NULL; 81 82 static int lagg_clone_create(struct if_clone *, int, caddr_t); 83 static void lagg_clone_destroy(struct ifnet *); 84 static void lagg_lladdr(struct lagg_softc *, uint8_t *); 85 static void lagg_capabilities(struct lagg_softc *); 86 static void lagg_port_lladdr(struct lagg_port *, uint8_t *); 87 static void lagg_port_setlladdr(void *, int); 88 static int lagg_port_create(struct lagg_softc *, struct ifnet *); 89 static int lagg_port_destroy(struct lagg_port *, int); 90 static struct mbuf *lagg_input(struct ifnet *, struct mbuf *); 91 static void lagg_port_state(struct ifnet *, int); 92 static int lagg_port_ioctl(struct ifnet *, u_long, caddr_t); 93 static int lagg_port_output(struct ifnet *, struct mbuf *, 94 struct sockaddr *, struct rtentry *); 95 static void lagg_port_ifdetach(void *arg __unused, struct ifnet *); 96 static int lagg_port_checkstacking(struct lagg_softc *); 97 static void lagg_port2req(struct lagg_port *, struct lagg_reqport *); 98 static void lagg_init(void *); 99 static void lagg_stop(struct lagg_softc *); 100 static int lagg_ioctl(struct ifnet *, u_long, caddr_t); 101 static int lagg_ether_setmulti(struct lagg_softc *); 102 static int lagg_ether_cmdmulti(struct lagg_port *, int); 103 static int lagg_setflag(struct lagg_port *, int, int, 104 int (*func)(struct ifnet *, int)); 105 static int lagg_setflags(struct lagg_port *, int status); 106 static void lagg_start(struct ifnet *); 107 static int lagg_media_change(struct ifnet *); 108 static void lagg_media_status(struct ifnet *, struct ifmediareq *); 109 static struct lagg_port *lagg_link_active(struct lagg_softc *, 110 struct lagg_port *); 111 static const void *lagg_gethdr(struct mbuf *, u_int, u_int, void *); 112 113 IFC_SIMPLE_DECLARE(lagg, 0); 114 115 /* Simple round robin */ 116 static int lagg_rr_attach(struct lagg_softc *); 117 static int lagg_rr_detach(struct lagg_softc *); 118 static int lagg_rr_start(struct lagg_softc *, struct mbuf *); 119 static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *, 120 struct mbuf *); 121 122 /* Active failover */ 123 static int lagg_fail_attach(struct lagg_softc *); 124 static int lagg_fail_detach(struct lagg_softc *); 125 static int lagg_fail_start(struct lagg_softc *, struct mbuf *); 126 static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *, 127 struct mbuf *); 128 129 /* Loadbalancing */ 130 static int lagg_lb_attach(struct lagg_softc *); 131 static int lagg_lb_detach(struct lagg_softc *); 132 static int lagg_lb_port_create(struct lagg_port *); 133 static void lagg_lb_port_destroy(struct lagg_port *); 134 static int lagg_lb_start(struct lagg_softc *, struct mbuf *); 135 static struct mbuf *lagg_lb_input(struct lagg_softc *, struct lagg_port *, 136 struct mbuf *); 137 static int lagg_lb_porttable(struct lagg_softc *, struct lagg_port *); 138 139 /* 802.3ad LACP */ 140 static int lagg_lacp_attach(struct lagg_softc *); 141 static int lagg_lacp_detach(struct lagg_softc *); 142 static int lagg_lacp_start(struct lagg_softc *, struct mbuf *); 143 static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *, 144 struct mbuf *); 145 static void lagg_lacp_lladdr(struct lagg_softc *); 146 147 /* lagg protocol table */ 148 static const struct { 149 int ti_proto; 150 int (*ti_attach)(struct lagg_softc *); 151 } lagg_protos[] = { 152 { LAGG_PROTO_ROUNDROBIN, lagg_rr_attach }, 153 { LAGG_PROTO_FAILOVER, lagg_fail_attach }, 154 { LAGG_PROTO_LOADBALANCE, lagg_lb_attach }, 155 { LAGG_PROTO_ETHERCHANNEL, lagg_lb_attach }, 156 { LAGG_PROTO_LACP, lagg_lacp_attach }, 157 { LAGG_PROTO_NONE, NULL } 158 }; 159 160 static int 161 lagg_modevent(module_t mod, int type, void *data) 162 { 163 164 switch (type) { 165 case MOD_LOAD: 166 mtx_init(&lagg_list_mtx, "if_lagg list", NULL, MTX_DEF); 167 SLIST_INIT(&lagg_list); 168 if_clone_attach(&lagg_cloner); 169 lagg_input_p = lagg_input; 170 lagg_linkstate_p = lagg_port_state; 171 lagg_detach_cookie = EVENTHANDLER_REGISTER( 172 ifnet_departure_event, lagg_port_ifdetach, NULL, 173 EVENTHANDLER_PRI_ANY); 174 break; 175 case MOD_UNLOAD: 176 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 177 lagg_detach_cookie); 178 if_clone_detach(&lagg_cloner); 179 lagg_input_p = NULL; 180 lagg_linkstate_p = NULL; 181 mtx_destroy(&lagg_list_mtx); 182 break; 183 default: 184 return (EOPNOTSUPP); 185 } 186 return (0); 187 } 188 189 static moduledata_t lagg_mod = { 190 "if_lagg", 191 lagg_modevent, 192 0 193 }; 194 195 DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 196 197 static int 198 lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) 199 { 200 struct lagg_softc *sc; 201 struct ifnet *ifp; 202 int i, error = 0; 203 static const u_char eaddr[6]; /* 00:00:00:00:00:00 */ 204 205 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); 206 ifp = sc->sc_ifp = if_alloc(IFT_ETHER); 207 if (ifp == NULL) { 208 free(sc, M_DEVBUF); 209 return (ENOSPC); 210 } 211 212 sc->sc_proto = LAGG_PROTO_NONE; 213 for (i = 0; lagg_protos[i].ti_proto != LAGG_PROTO_NONE; i++) { 214 if (lagg_protos[i].ti_proto == LAGG_PROTO_DEFAULT) { 215 sc->sc_proto = lagg_protos[i].ti_proto; 216 if ((error = lagg_protos[i].ti_attach(sc)) != 0) { 217 if_free_type(ifp, IFT_ETHER); 218 free(sc, M_DEVBUF); 219 return (error); 220 } 221 break; 222 } 223 } 224 LAGG_LOCK_INIT(sc); 225 SLIST_INIT(&sc->sc_ports); 226 TASK_INIT(&sc->sc_lladdr_task, 0, lagg_port_setlladdr, sc); 227 228 /* Initialise pseudo media types */ 229 ifmedia_init(&sc->sc_media, 0, lagg_media_change, 230 lagg_media_status); 231 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); 232 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); 233 234 if_initname(ifp, ifc->ifc_name, unit); 235 ifp->if_type = IFT_ETHER; 236 ifp->if_softc = sc; 237 ifp->if_start = lagg_start; 238 ifp->if_init = lagg_init; 239 ifp->if_ioctl = lagg_ioctl; 240 ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST; 241 242 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); 243 ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; 244 IFQ_SET_READY(&ifp->if_snd); 245 246 /* 247 * Attach as an ordinary ethernet device, childs will be attached 248 * as special device IFT_IEEE8023ADLAG. 249 */ 250 ether_ifattach(ifp, eaddr); 251 252 /* Insert into the global list of laggs */ 253 mtx_lock(&lagg_list_mtx); 254 SLIST_INSERT_HEAD(&lagg_list, sc, sc_entries); 255 mtx_unlock(&lagg_list_mtx); 256 257 return (0); 258 } 259 260 static void 261 lagg_clone_destroy(struct ifnet *ifp) 262 { 263 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; 264 struct lagg_port *lp; 265 266 LAGG_WLOCK(sc); 267 268 lagg_stop(sc); 269 ifp->if_flags &= ~IFF_UP; 270 271 /* Shutdown and remove lagg ports */ 272 while ((lp = SLIST_FIRST(&sc->sc_ports)) != NULL) 273 lagg_port_destroy(lp, 1); 274 /* Unhook the aggregation protocol */ 275 (*sc->sc_detach)(sc); 276 277 LAGG_WUNLOCK(sc); 278 279 ifmedia_removeall(&sc->sc_media); 280 ether_ifdetach(ifp); 281 if_free_type(ifp, IFT_ETHER); 282 283 mtx_lock(&lagg_list_mtx); 284 SLIST_REMOVE(&lagg_list, sc, lagg_softc, sc_entries); 285 mtx_unlock(&lagg_list_mtx); 286 287 taskqueue_drain(taskqueue_swi, &sc->sc_lladdr_task); 288 LAGG_LOCK_DESTROY(sc); 289 free(sc, M_DEVBUF); 290 } 291 292 static void 293 lagg_lladdr(struct lagg_softc *sc, uint8_t *lladdr) 294 { 295 struct ifnet *ifp = sc->sc_ifp; 296 297 if (memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0) 298 return; 299 300 bcopy(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN); 301 /* Let the protocol know the MAC has changed */ 302 if (sc->sc_lladdr != NULL) 303 (*sc->sc_lladdr)(sc); 304 } 305 306 static void 307 lagg_capabilities(struct lagg_softc *sc) 308 { 309 struct lagg_port *lp; 310 int cap = ~0, ena = ~0; 311 312 LAGG_WLOCK_ASSERT(sc); 313 314 /* Get capabilities from the lagg ports */ 315 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 316 cap &= lp->lp_ifp->if_capabilities; 317 ena &= lp->lp_ifp->if_capenable; 318 } 319 cap = (cap == ~0 ? 0 : cap); 320 ena = (ena == ~0 ? 0 : ena); 321 322 if (sc->sc_ifp->if_capabilities != cap || 323 sc->sc_ifp->if_capenable != ena) { 324 sc->sc_ifp->if_capabilities = cap; 325 sc->sc_ifp->if_capenable = ena; 326 getmicrotime(&sc->sc_ifp->if_lastchange); 327 328 if (sc->sc_ifflags & IFF_DEBUG) 329 if_printf(sc->sc_ifp, 330 "capabilities 0x%08x enabled 0x%08x\n", cap, ena); 331 } 332 } 333 334 static void 335 lagg_port_lladdr(struct lagg_port *lp, uint8_t *lladdr) 336 { 337 struct lagg_softc *sc = lp->lp_softc; 338 struct ifnet *ifp = lp->lp_ifp; 339 struct lagg_llq *llq; 340 int pending = 0; 341 342 LAGG_WLOCK_ASSERT(sc); 343 344 if (lp->lp_detaching || 345 memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0) 346 return; 347 348 /* Check to make sure its not already queued to be changed */ 349 SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) { 350 if (llq->llq_ifp == ifp) { 351 pending = 1; 352 break; 353 } 354 } 355 356 if (!pending) { 357 llq = malloc(sizeof(struct lagg_llq), M_DEVBUF, M_NOWAIT); 358 if (llq == NULL) /* XXX what to do */ 359 return; 360 } 361 362 /* Update the lladdr even if pending, it may have changed */ 363 llq->llq_ifp = ifp; 364 bcopy(lladdr, llq->llq_lladdr, ETHER_ADDR_LEN); 365 366 if (!pending) 367 SLIST_INSERT_HEAD(&sc->sc_llq_head, llq, llq_entries); 368 369 taskqueue_enqueue(taskqueue_swi, &sc->sc_lladdr_task); 370 } 371 372 /* 373 * Set the interface MAC address from a taskqueue to avoid a LOR. 374 */ 375 static void 376 lagg_port_setlladdr(void *arg, int pending) 377 { 378 struct lagg_softc *sc = (struct lagg_softc *)arg; 379 struct lagg_llq *llq, *head; 380 struct ifnet *ifp; 381 int error; 382 383 /* Grab a local reference of the queue and remove it from the softc */ 384 LAGG_WLOCK(sc); 385 head = SLIST_FIRST(&sc->sc_llq_head); 386 SLIST_FIRST(&sc->sc_llq_head) = NULL; 387 LAGG_WUNLOCK(sc); 388 389 /* 390 * Traverse the queue and set the lladdr on each ifp. It is safe to do 391 * unlocked as we have the only reference to it. 392 */ 393 for (llq = head; llq != NULL; llq = head) { 394 ifp = llq->llq_ifp; 395 396 /* Set the link layer address */ 397 error = if_setlladdr(ifp, llq->llq_lladdr, ETHER_ADDR_LEN); 398 if (error) 399 printf("%s: setlladdr failed on %s\n", __func__, 400 ifp->if_xname); 401 402 head = SLIST_NEXT(llq, llq_entries); 403 free(llq, M_DEVBUF); 404 } 405 } 406 407 static int 408 lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) 409 { 410 struct lagg_softc *sc_ptr; 411 struct lagg_port *lp; 412 int error = 0; 413 414 LAGG_WLOCK_ASSERT(sc); 415 416 /* Limit the maximal number of lagg ports */ 417 if (sc->sc_count >= LAGG_MAX_PORTS) 418 return (ENOSPC); 419 420 /* New lagg port has to be in an idle state */ 421 if (ifp->if_drv_flags & IFF_DRV_OACTIVE) 422 return (EBUSY); 423 424 /* Check if port has already been associated to a lagg */ 425 if (ifp->if_lagg != NULL) 426 return (EBUSY); 427 428 /* XXX Disallow non-ethernet interfaces (this should be any of 802) */ 429 if (ifp->if_type != IFT_ETHER) 430 return (EPROTONOSUPPORT); 431 432 /* Allow the first Ethernet member to define the MTU */ 433 if (SLIST_EMPTY(&sc->sc_ports)) 434 sc->sc_ifp->if_mtu = ifp->if_mtu; 435 else if (sc->sc_ifp->if_mtu != ifp->if_mtu) { 436 if_printf(sc->sc_ifp, "invalid MTU for %s\n", 437 ifp->if_xname); 438 return (EINVAL); 439 } 440 441 if ((lp = malloc(sizeof(struct lagg_port), 442 M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL) 443 return (ENOMEM); 444 445 /* Check if port is a stacked lagg */ 446 mtx_lock(&lagg_list_mtx); 447 SLIST_FOREACH(sc_ptr, &lagg_list, sc_entries) { 448 if (ifp == sc_ptr->sc_ifp) { 449 mtx_unlock(&lagg_list_mtx); 450 free(lp, M_DEVBUF); 451 return (EINVAL); 452 /* XXX disable stacking for the moment, its untested 453 lp->lp_flags |= LAGG_PORT_STACK; 454 if (lagg_port_checkstacking(sc_ptr) >= 455 LAGG_MAX_STACKING) { 456 mtx_unlock(&lagg_list_mtx); 457 free(lp, M_DEVBUF); 458 return (E2BIG); 459 } 460 */ 461 } 462 } 463 mtx_unlock(&lagg_list_mtx); 464 465 /* Change the interface type */ 466 lp->lp_iftype = ifp->if_type; 467 ifp->if_type = IFT_IEEE8023ADLAG; 468 ifp->if_lagg = lp; 469 lp->lp_ioctl = ifp->if_ioctl; 470 ifp->if_ioctl = lagg_port_ioctl; 471 lp->lp_output = ifp->if_output; 472 ifp->if_output = lagg_port_output; 473 474 lp->lp_ifp = ifp; 475 lp->lp_softc = sc; 476 477 /* Save port link layer address */ 478 bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ETHER_ADDR_LEN); 479 480 if (SLIST_EMPTY(&sc->sc_ports)) { 481 sc->sc_primary = lp; 482 lagg_lladdr(sc, IF_LLADDR(ifp)); 483 } else { 484 /* Update link layer address for this port */ 485 lagg_port_lladdr(lp, IF_LLADDR(sc->sc_ifp)); 486 } 487 488 /* Insert into the list of ports */ 489 SLIST_INSERT_HEAD(&sc->sc_ports, lp, lp_entries); 490 sc->sc_count++; 491 492 /* Update lagg capabilities */ 493 lagg_capabilities(sc); 494 495 /* Add multicast addresses and interface flags to this port */ 496 lagg_ether_cmdmulti(lp, 1); 497 lagg_setflags(lp, 1); 498 499 if (sc->sc_port_create != NULL) 500 error = (*sc->sc_port_create)(lp); 501 if (error) { 502 /* remove the port again, without calling sc_port_destroy */ 503 lagg_port_destroy(lp, 0); 504 return (error); 505 } 506 507 return (error); 508 } 509 510 static int 511 lagg_port_checkstacking(struct lagg_softc *sc) 512 { 513 struct lagg_softc *sc_ptr; 514 struct lagg_port *lp; 515 int m = 0; 516 517 LAGG_WLOCK_ASSERT(sc); 518 519 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 520 if (lp->lp_flags & LAGG_PORT_STACK) { 521 sc_ptr = (struct lagg_softc *)lp->lp_ifp->if_softc; 522 m = MAX(m, lagg_port_checkstacking(sc_ptr)); 523 } 524 } 525 526 return (m + 1); 527 } 528 529 static int 530 lagg_port_destroy(struct lagg_port *lp, int runpd) 531 { 532 struct lagg_softc *sc = lp->lp_softc; 533 struct lagg_port *lp_ptr; 534 struct lagg_llq *llq; 535 struct ifnet *ifp = lp->lp_ifp; 536 537 LAGG_WLOCK_ASSERT(sc); 538 539 if (runpd && sc->sc_port_destroy != NULL) 540 (*sc->sc_port_destroy)(lp); 541 542 /* 543 * Remove multicast addresses and interface flags from this port and 544 * reset the MAC address, skip if the interface is being detached. 545 */ 546 if (!lp->lp_detaching) { 547 lagg_ether_cmdmulti(lp, 0); 548 lagg_setflags(lp, 0); 549 lagg_port_lladdr(lp, lp->lp_lladdr); 550 } 551 552 /* Restore interface */ 553 ifp->if_type = lp->lp_iftype; 554 ifp->if_ioctl = lp->lp_ioctl; 555 ifp->if_output = lp->lp_output; 556 ifp->if_lagg = NULL; 557 558 /* Finally, remove the port from the lagg */ 559 SLIST_REMOVE(&sc->sc_ports, lp, lagg_port, lp_entries); 560 sc->sc_count--; 561 562 /* Update the primary interface */ 563 if (lp == sc->sc_primary) { 564 uint8_t lladdr[ETHER_ADDR_LEN]; 565 566 if ((lp_ptr = SLIST_FIRST(&sc->sc_ports)) == NULL) { 567 bzero(&lladdr, ETHER_ADDR_LEN); 568 } else { 569 bcopy(lp_ptr->lp_lladdr, 570 lladdr, ETHER_ADDR_LEN); 571 } 572 lagg_lladdr(sc, lladdr); 573 sc->sc_primary = lp_ptr; 574 575 /* Update link layer address for each port */ 576 SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries) 577 lagg_port_lladdr(lp_ptr, lladdr); 578 } 579 580 /* Remove any pending lladdr changes from the queue */ 581 if (lp->lp_detaching) { 582 SLIST_FOREACH(llq, &sc->sc_llq_head, llq_entries) { 583 if (llq->llq_ifp == ifp) { 584 SLIST_REMOVE(&sc->sc_llq_head, llq, lagg_llq, 585 llq_entries); 586 free(llq, M_DEVBUF); 587 break; /* Only appears once */ 588 } 589 } 590 } 591 592 if (lp->lp_ifflags) 593 if_printf(ifp, "%s: lp_ifflags unclean\n", __func__); 594 595 free(lp, M_DEVBUF); 596 597 /* Update lagg capabilities */ 598 lagg_capabilities(sc); 599 600 return (0); 601 } 602 603 static int 604 lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 605 { 606 struct lagg_reqport *rp = (struct lagg_reqport *)data; 607 struct lagg_softc *sc; 608 struct lagg_port *lp = NULL; 609 int error = 0; 610 611 /* Should be checked by the caller */ 612 if (ifp->if_type != IFT_IEEE8023ADLAG || 613 (lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL) 614 goto fallback; 615 616 switch (cmd) { 617 case SIOCGLAGGPORT: 618 if (rp->rp_portname[0] == '\0' || 619 ifunit(rp->rp_portname) != ifp) { 620 error = EINVAL; 621 break; 622 } 623 624 LAGG_RLOCK(sc); 625 if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) { 626 error = ENOENT; 627 LAGG_RUNLOCK(sc); 628 break; 629 } 630 631 lagg_port2req(lp, rp); 632 LAGG_RUNLOCK(sc); 633 break; 634 635 case SIOCSIFCAP: 636 if (lp->lp_ioctl == NULL) { 637 error = EINVAL; 638 break; 639 } 640 error = (*lp->lp_ioctl)(ifp, cmd, data); 641 if (error) 642 break; 643 644 /* Update lagg interface capabilities */ 645 LAGG_WLOCK(sc); 646 lagg_capabilities(sc); 647 LAGG_WUNLOCK(sc); 648 break; 649 650 case SIOCSIFMTU: 651 /* Do not allow the MTU to be changed once joined */ 652 error = EINVAL; 653 break; 654 655 default: 656 goto fallback; 657 } 658 659 return (error); 660 661 fallback: 662 if (lp->lp_ioctl != NULL) 663 return ((*lp->lp_ioctl)(ifp, cmd, data)); 664 665 return (EINVAL); 666 } 667 668 static int 669 lagg_port_output(struct ifnet *ifp, struct mbuf *m, 670 struct sockaddr *dst, struct rtentry *rt0) 671 { 672 struct lagg_port *lp = ifp->if_lagg; 673 struct ether_header *eh; 674 short type = 0; 675 676 switch (dst->sa_family) { 677 case pseudo_AF_HDRCMPLT: 678 case AF_UNSPEC: 679 eh = (struct ether_header *)dst->sa_data; 680 type = eh->ether_type; 681 break; 682 } 683 684 /* 685 * Only allow ethernet types required to initiate or maintain the link, 686 * aggregated frames take a different path. 687 */ 688 switch (ntohs(type)) { 689 case ETHERTYPE_PAE: /* EAPOL PAE/802.1x */ 690 return ((*lp->lp_output)(ifp, m, dst, rt0)); 691 } 692 693 /* drop any other frames */ 694 m_freem(m); 695 return (EBUSY); 696 } 697 698 static void 699 lagg_port_ifdetach(void *arg __unused, struct ifnet *ifp) 700 { 701 struct lagg_port *lp; 702 struct lagg_softc *sc; 703 704 if ((lp = ifp->if_lagg) == NULL) 705 return; 706 707 sc = lp->lp_softc; 708 709 LAGG_WLOCK(sc); 710 lp->lp_detaching = 1; 711 lagg_port_destroy(lp, 1); 712 LAGG_WUNLOCK(sc); 713 } 714 715 static void 716 lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp) 717 { 718 struct lagg_softc *sc = lp->lp_softc; 719 720 strlcpy(rp->rp_ifname, sc->sc_ifname, sizeof(rp->rp_ifname)); 721 strlcpy(rp->rp_portname, lp->lp_ifp->if_xname, sizeof(rp->rp_portname)); 722 rp->rp_prio = lp->lp_prio; 723 rp->rp_flags = lp->lp_flags; 724 if (sc->sc_portreq != NULL) 725 (*sc->sc_portreq)(lp, (caddr_t)&rp->rp_psc); 726 727 /* Add protocol specific flags */ 728 switch (sc->sc_proto) { 729 case LAGG_PROTO_FAILOVER: 730 if (lp == sc->sc_primary) 731 rp->rp_flags |= LAGG_PORT_MASTER; 732 if (lp == lagg_link_active(sc, sc->sc_primary)) 733 rp->rp_flags |= LAGG_PORT_ACTIVE; 734 break; 735 736 case LAGG_PROTO_ROUNDROBIN: 737 case LAGG_PROTO_LOADBALANCE: 738 case LAGG_PROTO_ETHERCHANNEL: 739 if (LAGG_PORTACTIVE(lp)) 740 rp->rp_flags |= LAGG_PORT_ACTIVE; 741 break; 742 743 case LAGG_PROTO_LACP: 744 /* LACP has a different definition of active */ 745 if (lacp_port_isactive(lp)) 746 rp->rp_flags |= LAGG_PORT_ACTIVE; 747 break; 748 } 749 750 } 751 752 static void 753 lagg_init(void *xsc) 754 { 755 struct lagg_softc *sc = (struct lagg_softc *)xsc; 756 struct lagg_port *lp; 757 struct ifnet *ifp = sc->sc_ifp; 758 759 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 760 return; 761 762 LAGG_WLOCK(sc); 763 764 ifp->if_drv_flags |= IFF_DRV_RUNNING; 765 /* Update the port lladdrs */ 766 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 767 lagg_port_lladdr(lp, IF_LLADDR(ifp)); 768 769 if (sc->sc_init != NULL) 770 (*sc->sc_init)(sc); 771 772 LAGG_WUNLOCK(sc); 773 } 774 775 static void 776 lagg_stop(struct lagg_softc *sc) 777 { 778 struct ifnet *ifp = sc->sc_ifp; 779 780 LAGG_WLOCK_ASSERT(sc); 781 782 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 783 return; 784 785 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 786 787 if (sc->sc_stop != NULL) 788 (*sc->sc_stop)(sc); 789 } 790 791 static int 792 lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 793 { 794 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; 795 struct lagg_reqall *ra = (struct lagg_reqall *)data; 796 struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf; 797 struct ifreq *ifr = (struct ifreq *)data; 798 struct lagg_port *lp; 799 struct ifnet *tpif; 800 struct thread *td = curthread; 801 char *buf, *outbuf; 802 int count, buflen, len, error = 0; 803 804 bzero(&rpbuf, sizeof(rpbuf)); 805 806 switch (cmd) { 807 case SIOCGLAGG: 808 LAGG_RLOCK(sc); 809 count = 0; 810 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 811 count++; 812 buflen = count * sizeof(struct lagg_reqport); 813 LAGG_RUNLOCK(sc); 814 815 outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO); 816 817 LAGG_RLOCK(sc); 818 ra->ra_proto = sc->sc_proto; 819 if (sc->sc_req != NULL) 820 (*sc->sc_req)(sc, (caddr_t)&ra->ra_psc); 821 822 count = 0; 823 buf = outbuf; 824 len = min(ra->ra_size, buflen); 825 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 826 if (len < sizeof(rpbuf)) 827 break; 828 829 lagg_port2req(lp, &rpbuf); 830 memcpy(buf, &rpbuf, sizeof(rpbuf)); 831 count++; 832 buf += sizeof(rpbuf); 833 len -= sizeof(rpbuf); 834 } 835 LAGG_RUNLOCK(sc); 836 ra->ra_ports = count; 837 ra->ra_size = count * sizeof(rpbuf); 838 error = copyout(outbuf, ra->ra_port, ra->ra_size); 839 free(outbuf, M_TEMP); 840 break; 841 case SIOCSLAGG: 842 error = priv_check(td, PRIV_NET_LAGG); 843 if (error) 844 break; 845 if (ra->ra_proto >= LAGG_PROTO_MAX) { 846 error = EPROTONOSUPPORT; 847 break; 848 } 849 if (sc->sc_proto != LAGG_PROTO_NONE) { 850 LAGG_WLOCK(sc); 851 error = sc->sc_detach(sc); 852 /* Reset protocol and pointers */ 853 sc->sc_proto = LAGG_PROTO_NONE; 854 sc->sc_detach = NULL; 855 sc->sc_start = NULL; 856 sc->sc_input = NULL; 857 sc->sc_port_create = NULL; 858 sc->sc_port_destroy = NULL; 859 sc->sc_linkstate = NULL; 860 sc->sc_init = NULL; 861 sc->sc_stop = NULL; 862 sc->sc_lladdr = NULL; 863 sc->sc_req = NULL; 864 sc->sc_portreq = NULL; 865 LAGG_WUNLOCK(sc); 866 } 867 if (error != 0) 868 break; 869 for (int i = 0; i < (sizeof(lagg_protos) / 870 sizeof(lagg_protos[0])); i++) { 871 if (lagg_protos[i].ti_proto == ra->ra_proto) { 872 if (sc->sc_ifflags & IFF_DEBUG) 873 printf("%s: using proto %u\n", 874 sc->sc_ifname, 875 lagg_protos[i].ti_proto); 876 LAGG_WLOCK(sc); 877 sc->sc_proto = lagg_protos[i].ti_proto; 878 if (sc->sc_proto != LAGG_PROTO_NONE) 879 error = lagg_protos[i].ti_attach(sc); 880 LAGG_WUNLOCK(sc); 881 return (error); 882 } 883 } 884 error = EPROTONOSUPPORT; 885 break; 886 case SIOCGLAGGPORT: 887 if (rp->rp_portname[0] == '\0' || 888 (tpif = ifunit(rp->rp_portname)) == NULL) { 889 error = EINVAL; 890 break; 891 } 892 893 LAGG_RLOCK(sc); 894 if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL || 895 lp->lp_softc != sc) { 896 error = ENOENT; 897 LAGG_RUNLOCK(sc); 898 break; 899 } 900 901 lagg_port2req(lp, rp); 902 LAGG_RUNLOCK(sc); 903 break; 904 case SIOCSLAGGPORT: 905 error = priv_check(td, PRIV_NET_LAGG); 906 if (error) 907 break; 908 if (rp->rp_portname[0] == '\0' || 909 (tpif = ifunit(rp->rp_portname)) == NULL) { 910 error = EINVAL; 911 break; 912 } 913 LAGG_WLOCK(sc); 914 error = lagg_port_create(sc, tpif); 915 LAGG_WUNLOCK(sc); 916 break; 917 case SIOCSLAGGDELPORT: 918 error = priv_check(td, PRIV_NET_LAGG); 919 if (error) 920 break; 921 if (rp->rp_portname[0] == '\0' || 922 (tpif = ifunit(rp->rp_portname)) == NULL) { 923 error = EINVAL; 924 break; 925 } 926 927 LAGG_WLOCK(sc); 928 if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL || 929 lp->lp_softc != sc) { 930 error = ENOENT; 931 LAGG_WUNLOCK(sc); 932 break; 933 } 934 935 error = lagg_port_destroy(lp, 1); 936 LAGG_WUNLOCK(sc); 937 break; 938 case SIOCSIFFLAGS: 939 /* Set flags on ports too */ 940 LAGG_WLOCK(sc); 941 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 942 lagg_setflags(lp, 1); 943 } 944 LAGG_WUNLOCK(sc); 945 946 if (!(ifp->if_flags & IFF_UP) && 947 (ifp->if_drv_flags & IFF_DRV_RUNNING)) { 948 /* 949 * If interface is marked down and it is running, 950 * then stop and disable it. 951 */ 952 LAGG_WLOCK(sc); 953 lagg_stop(sc); 954 LAGG_WUNLOCK(sc); 955 } else if ((ifp->if_flags & IFF_UP) && 956 !(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 957 /* 958 * If interface is marked up and it is stopped, then 959 * start it. 960 */ 961 (*ifp->if_init)(sc); 962 } 963 break; 964 case SIOCADDMULTI: 965 case SIOCDELMULTI: 966 LAGG_WLOCK(sc); 967 error = lagg_ether_setmulti(sc); 968 LAGG_WUNLOCK(sc); 969 break; 970 case SIOCSIFMEDIA: 971 case SIOCGIFMEDIA: 972 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); 973 break; 974 975 case SIOCSIFCAP: 976 case SIOCSIFMTU: 977 /* Do not allow the MTU or caps to be directly changed */ 978 error = EINVAL; 979 break; 980 981 default: 982 error = ether_ioctl(ifp, cmd, data); 983 break; 984 } 985 return (error); 986 } 987 988 static int 989 lagg_ether_setmulti(struct lagg_softc *sc) 990 { 991 struct lagg_port *lp; 992 993 LAGG_WLOCK_ASSERT(sc); 994 995 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 996 /* First, remove any existing filter entries. */ 997 lagg_ether_cmdmulti(lp, 0); 998 /* copy all addresses from the lagg interface to the port */ 999 lagg_ether_cmdmulti(lp, 1); 1000 } 1001 return (0); 1002 } 1003 1004 static int 1005 lagg_ether_cmdmulti(struct lagg_port *lp, int set) 1006 { 1007 struct lagg_softc *sc = lp->lp_softc; 1008 struct ifnet *ifp = lp->lp_ifp; 1009 struct ifnet *scifp = sc->sc_ifp; 1010 struct lagg_mc *mc; 1011 struct ifmultiaddr *ifma, *rifma = NULL; 1012 struct sockaddr_dl sdl; 1013 int error; 1014 1015 LAGG_WLOCK_ASSERT(sc); 1016 1017 bzero((char *)&sdl, sizeof(sdl)); 1018 sdl.sdl_len = sizeof(sdl); 1019 sdl.sdl_family = AF_LINK; 1020 sdl.sdl_type = IFT_ETHER; 1021 sdl.sdl_alen = ETHER_ADDR_LEN; 1022 sdl.sdl_index = ifp->if_index; 1023 1024 if (set) { 1025 TAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) { 1026 if (ifma->ifma_addr->sa_family != AF_LINK) 1027 continue; 1028 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1029 LLADDR(&sdl), ETHER_ADDR_LEN); 1030 1031 error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma); 1032 if (error) 1033 return (error); 1034 mc = malloc(sizeof(struct lagg_mc), M_DEVBUF, M_NOWAIT); 1035 if (mc == NULL) 1036 return (ENOMEM); 1037 mc->mc_ifma = rifma; 1038 SLIST_INSERT_HEAD(&lp->lp_mc_head, mc, mc_entries); 1039 } 1040 } else { 1041 while ((mc = SLIST_FIRST(&lp->lp_mc_head)) != NULL) { 1042 SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries); 1043 if_delmulti_ifma(mc->mc_ifma); 1044 free(mc, M_DEVBUF); 1045 } 1046 } 1047 return (0); 1048 } 1049 1050 /* Handle a ref counted flag that should be set on the lagg port as well */ 1051 static int 1052 lagg_setflag(struct lagg_port *lp, int flag, int status, 1053 int (*func)(struct ifnet *, int)) 1054 { 1055 struct lagg_softc *sc = lp->lp_softc; 1056 struct ifnet *scifp = sc->sc_ifp; 1057 struct ifnet *ifp = lp->lp_ifp; 1058 int error; 1059 1060 LAGG_WLOCK_ASSERT(sc); 1061 1062 status = status ? (scifp->if_flags & flag) : 0; 1063 /* Now "status" contains the flag value or 0 */ 1064 1065 /* 1066 * See if recorded ports status is different from what 1067 * we want it to be. If it is, flip it. We record ports 1068 * status in lp_ifflags so that we won't clear ports flag 1069 * we haven't set. In fact, we don't clear or set ports 1070 * flags directly, but get or release references to them. 1071 * That's why we can be sure that recorded flags still are 1072 * in accord with actual ports flags. 1073 */ 1074 if (status != (lp->lp_ifflags & flag)) { 1075 error = (*func)(ifp, status); 1076 if (error) 1077 return (error); 1078 lp->lp_ifflags &= ~flag; 1079 lp->lp_ifflags |= status; 1080 } 1081 return (0); 1082 } 1083 1084 /* 1085 * Handle IFF_* flags that require certain changes on the lagg port 1086 * if "status" is true, update ports flags respective to the lagg 1087 * if "status" is false, forcedly clear the flags set on port. 1088 */ 1089 static int 1090 lagg_setflags(struct lagg_port *lp, int status) 1091 { 1092 int error, i; 1093 1094 for (i = 0; lagg_pflags[i].flag; i++) { 1095 error = lagg_setflag(lp, lagg_pflags[i].flag, 1096 status, lagg_pflags[i].func); 1097 if (error) 1098 return (error); 1099 } 1100 return (0); 1101 } 1102 1103 static void 1104 lagg_start(struct ifnet *ifp) 1105 { 1106 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; 1107 struct mbuf *m; 1108 int error = 0; 1109 1110 LAGG_RLOCK(sc); 1111 for (;; error = 0) { 1112 IFQ_DEQUEUE(&ifp->if_snd, m); 1113 if (m == NULL) 1114 break; 1115 1116 ETHER_BPF_MTAP(ifp, m); 1117 1118 if (sc->sc_proto != LAGG_PROTO_NONE) 1119 error = (*sc->sc_start)(sc, m); 1120 else 1121 m_freem(m); 1122 1123 if (error == 0) 1124 ifp->if_opackets++; 1125 else 1126 ifp->if_oerrors++; 1127 } 1128 LAGG_RUNLOCK(sc); 1129 1130 return; 1131 } 1132 1133 static struct mbuf * 1134 lagg_input(struct ifnet *ifp, struct mbuf *m) 1135 { 1136 struct lagg_port *lp = ifp->if_lagg; 1137 struct lagg_softc *sc = lp->lp_softc; 1138 struct ifnet *scifp = sc->sc_ifp; 1139 1140 if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 1141 (lp->lp_flags & LAGG_PORT_DISABLED) || 1142 sc->sc_proto == LAGG_PROTO_NONE) { 1143 m_freem(m); 1144 return (NULL); 1145 } 1146 1147 LAGG_RLOCK(sc); 1148 ETHER_BPF_MTAP(scifp, m); 1149 1150 m = (*sc->sc_input)(sc, lp, m); 1151 1152 if (m != NULL) { 1153 scifp->if_ipackets++; 1154 scifp->if_ibytes += m->m_pkthdr.len; 1155 } 1156 1157 LAGG_RUNLOCK(sc); 1158 return (m); 1159 } 1160 1161 static int 1162 lagg_media_change(struct ifnet *ifp) 1163 { 1164 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; 1165 1166 if (sc->sc_ifflags & IFF_DEBUG) 1167 printf("%s\n", __func__); 1168 1169 /* Ignore */ 1170 return (0); 1171 } 1172 1173 static void 1174 lagg_media_status(struct ifnet *ifp, struct ifmediareq *imr) 1175 { 1176 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; 1177 struct lagg_port *lp; 1178 1179 imr->ifm_status = IFM_AVALID; 1180 imr->ifm_active = IFM_ETHER | IFM_AUTO; 1181 1182 LAGG_RLOCK(sc); 1183 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 1184 if (LAGG_PORTACTIVE(lp)) 1185 imr->ifm_status |= IFM_ACTIVE; 1186 } 1187 LAGG_RUNLOCK(sc); 1188 } 1189 1190 static void 1191 lagg_port_state(struct ifnet *ifp, int state) 1192 { 1193 struct lagg_port *lp = (struct lagg_port *)ifp->if_lagg; 1194 struct lagg_softc *sc = NULL; 1195 1196 if (lp != NULL) 1197 sc = lp->lp_softc; 1198 if (sc == NULL) 1199 return; 1200 1201 LAGG_WLOCK(sc); 1202 if (sc->sc_linkstate != NULL) 1203 (*sc->sc_linkstate)(lp); 1204 LAGG_WUNLOCK(sc); 1205 } 1206 1207 struct lagg_port * 1208 lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp) 1209 { 1210 struct lagg_port *lp_next, *rval = NULL; 1211 // int new_link = LINK_STATE_DOWN; 1212 1213 LAGG_RLOCK_ASSERT(sc); 1214 /* 1215 * Search a port which reports an active link state. 1216 */ 1217 1218 if (lp == NULL) 1219 goto search; 1220 if (LAGG_PORTACTIVE(lp)) { 1221 rval = lp; 1222 goto found; 1223 } 1224 if ((lp_next = SLIST_NEXT(lp, lp_entries)) != NULL && 1225 LAGG_PORTACTIVE(lp_next)) { 1226 rval = lp_next; 1227 goto found; 1228 } 1229 1230 search: 1231 SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) { 1232 if (LAGG_PORTACTIVE(lp_next)) { 1233 rval = lp_next; 1234 goto found; 1235 } 1236 } 1237 1238 found: 1239 if (rval != NULL) { 1240 /* 1241 * The IEEE 802.1D standard assumes that a lagg with 1242 * multiple ports is always full duplex. This is valid 1243 * for load sharing laggs and if at least two links 1244 * are active. Unfortunately, checking the latter would 1245 * be too expensive at this point. 1246 XXX 1247 if ((sc->sc_capabilities & IFCAP_LAGG_FULLDUPLEX) && 1248 (sc->sc_count > 1)) 1249 new_link = LINK_STATE_FULL_DUPLEX; 1250 else 1251 new_link = rval->lp_link_state; 1252 */ 1253 } 1254 1255 return (rval); 1256 } 1257 1258 static const void * 1259 lagg_gethdr(struct mbuf *m, u_int off, u_int len, void *buf) 1260 { 1261 if (m->m_pkthdr.len < (off + len)) { 1262 return (NULL); 1263 } else if (m->m_len < (off + len)) { 1264 m_copydata(m, off, len, buf); 1265 return (buf); 1266 } 1267 return (mtod(m, char *) + off); 1268 } 1269 1270 uint32_t 1271 lagg_hashmbuf(struct mbuf *m, uint32_t key) 1272 { 1273 uint16_t etype; 1274 uint32_t p = 0; 1275 int off; 1276 struct ether_header *eh; 1277 struct ether_vlan_header vlanbuf; 1278 const struct ether_vlan_header *vlan; 1279 #ifdef INET 1280 const struct ip *ip; 1281 struct ip ipbuf; 1282 #endif 1283 #ifdef INET6 1284 const struct ip6_hdr *ip6; 1285 struct ip6_hdr ip6buf; 1286 uint32_t flow; 1287 #endif 1288 1289 off = sizeof(*eh); 1290 if (m->m_len < off) 1291 goto out; 1292 eh = mtod(m, struct ether_header *); 1293 etype = ntohs(eh->ether_type); 1294 p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, key); 1295 p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p); 1296 1297 /* Special handling for encapsulating VLAN frames */ 1298 if (m->m_flags & M_VLANTAG) { 1299 p = hash32_buf(&m->m_pkthdr.ether_vtag, 1300 sizeof(m->m_pkthdr.ether_vtag), p); 1301 } else if (etype == ETHERTYPE_VLAN) { 1302 vlan = lagg_gethdr(m, off, sizeof(*vlan), &vlanbuf); 1303 if (vlan == NULL) 1304 goto out; 1305 1306 p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p); 1307 etype = ntohs(vlan->evl_proto); 1308 off += sizeof(*vlan) - sizeof(*eh); 1309 } 1310 1311 switch (etype) { 1312 #ifdef INET 1313 case ETHERTYPE_IP: 1314 ip = lagg_gethdr(m, off, sizeof(*ip), &ipbuf); 1315 if (ip == NULL) 1316 goto out; 1317 1318 p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p); 1319 p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p); 1320 break; 1321 #endif 1322 #ifdef INET6 1323 case ETHERTYPE_IPV6: 1324 ip6 = lagg_gethdr(m, off, sizeof(*ip6), &ip6buf); 1325 if (ip6 == NULL) 1326 goto out; 1327 1328 p = hash32_buf(&ip6->ip6_src, sizeof(struct in6_addr), p); 1329 p = hash32_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p); 1330 flow = ip6->ip6_flow & IPV6_FLOWLABEL_MASK; 1331 p = hash32_buf(&flow, sizeof(flow), p); /* IPv6 flow label */ 1332 break; 1333 #endif 1334 } 1335 out: 1336 return (p); 1337 } 1338 1339 int 1340 lagg_enqueue(struct ifnet *ifp, struct mbuf *m) 1341 { 1342 int error = 0; 1343 1344 IFQ_HANDOFF(ifp, m, error); 1345 if (error) 1346 ifp->if_oerrors++; 1347 return (error); 1348 } 1349 1350 /* 1351 * Simple round robin aggregation 1352 */ 1353 1354 static int 1355 lagg_rr_attach(struct lagg_softc *sc) 1356 { 1357 sc->sc_detach = lagg_rr_detach; 1358 sc->sc_start = lagg_rr_start; 1359 sc->sc_input = lagg_rr_input; 1360 sc->sc_port_create = NULL; 1361 sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX; 1362 sc->sc_seq = 0; 1363 1364 return (0); 1365 } 1366 1367 static int 1368 lagg_rr_detach(struct lagg_softc *sc) 1369 { 1370 return (0); 1371 } 1372 1373 static int 1374 lagg_rr_start(struct lagg_softc *sc, struct mbuf *m) 1375 { 1376 struct lagg_port *lp; 1377 uint32_t p; 1378 1379 p = atomic_fetchadd_32(&sc->sc_seq, 1); 1380 p %= sc->sc_count; 1381 lp = SLIST_FIRST(&sc->sc_ports); 1382 while (p--) 1383 lp = SLIST_NEXT(lp, lp_entries); 1384 1385 /* 1386 * Check the port's link state. This will return the next active 1387 * port if the link is down or the port is NULL. 1388 */ 1389 if ((lp = lagg_link_active(sc, lp)) == NULL) { 1390 m_freem(m); 1391 return (ENOENT); 1392 } 1393 1394 /* Send mbuf */ 1395 return (lagg_enqueue(lp->lp_ifp, m)); 1396 } 1397 1398 static struct mbuf * 1399 lagg_rr_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) 1400 { 1401 struct ifnet *ifp = sc->sc_ifp; 1402 1403 /* Just pass in the packet to our lagg device */ 1404 m->m_pkthdr.rcvif = ifp; 1405 1406 return (m); 1407 } 1408 1409 /* 1410 * Active failover 1411 */ 1412 1413 static int 1414 lagg_fail_attach(struct lagg_softc *sc) 1415 { 1416 sc->sc_detach = lagg_fail_detach; 1417 sc->sc_start = lagg_fail_start; 1418 sc->sc_input = lagg_fail_input; 1419 sc->sc_port_create = NULL; 1420 sc->sc_port_destroy = NULL; 1421 1422 return (0); 1423 } 1424 1425 static int 1426 lagg_fail_detach(struct lagg_softc *sc) 1427 { 1428 return (0); 1429 } 1430 1431 static int 1432 lagg_fail_start(struct lagg_softc *sc, struct mbuf *m) 1433 { 1434 struct lagg_port *lp; 1435 1436 /* Use the master port if active or the next available port */ 1437 if ((lp = lagg_link_active(sc, sc->sc_primary)) == NULL) { 1438 m_freem(m); 1439 return (ENOENT); 1440 } 1441 1442 /* Send mbuf */ 1443 return (lagg_enqueue(lp->lp_ifp, m)); 1444 } 1445 1446 static struct mbuf * 1447 lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) 1448 { 1449 struct ifnet *ifp = sc->sc_ifp; 1450 struct lagg_port *tmp_tp; 1451 1452 if (lp == sc->sc_primary) { 1453 m->m_pkthdr.rcvif = ifp; 1454 return (m); 1455 } 1456 1457 if (sc->sc_primary->lp_link_state == LINK_STATE_DOWN) { 1458 tmp_tp = lagg_link_active(sc, NULL); 1459 /* 1460 * If tmp_tp is null, we've recieved a packet when all 1461 * our links are down. Weird, but process it anyways. 1462 */ 1463 if ((tmp_tp == NULL || tmp_tp == lp)) { 1464 m->m_pkthdr.rcvif = ifp; 1465 return (m); 1466 } 1467 } 1468 1469 m_freem(m); 1470 return (NULL); 1471 } 1472 1473 /* 1474 * Loadbalancing 1475 */ 1476 1477 static int 1478 lagg_lb_attach(struct lagg_softc *sc) 1479 { 1480 struct lagg_port *lp; 1481 struct lagg_lb *lb; 1482 1483 if ((lb = (struct lagg_lb *)malloc(sizeof(struct lagg_lb), 1484 M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL) 1485 return (ENOMEM); 1486 1487 sc->sc_detach = lagg_lb_detach; 1488 sc->sc_start = lagg_lb_start; 1489 sc->sc_input = lagg_lb_input; 1490 sc->sc_port_create = lagg_lb_port_create; 1491 sc->sc_port_destroy = lagg_lb_port_destroy; 1492 sc->sc_capabilities = IFCAP_LAGG_FULLDUPLEX; 1493 1494 lb->lb_key = arc4random(); 1495 sc->sc_psc = (caddr_t)lb; 1496 1497 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 1498 lagg_lb_port_create(lp); 1499 1500 return (0); 1501 } 1502 1503 static int 1504 lagg_lb_detach(struct lagg_softc *sc) 1505 { 1506 struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc; 1507 if (lb != NULL) 1508 free(lb, M_DEVBUF); 1509 return (0); 1510 } 1511 1512 static int 1513 lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp) 1514 { 1515 struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc; 1516 struct lagg_port *lp_next; 1517 int i = 0; 1518 1519 bzero(&lb->lb_ports, sizeof(lb->lb_ports)); 1520 SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) { 1521 if (lp_next == lp) 1522 continue; 1523 if (i >= LAGG_MAX_PORTS) 1524 return (EINVAL); 1525 if (sc->sc_ifflags & IFF_DEBUG) 1526 printf("%s: port %s at index %d\n", 1527 sc->sc_ifname, lp_next->lp_ifname, i); 1528 lb->lb_ports[i++] = lp_next; 1529 } 1530 1531 return (0); 1532 } 1533 1534 static int 1535 lagg_lb_port_create(struct lagg_port *lp) 1536 { 1537 struct lagg_softc *sc = lp->lp_softc; 1538 return (lagg_lb_porttable(sc, NULL)); 1539 } 1540 1541 static void 1542 lagg_lb_port_destroy(struct lagg_port *lp) 1543 { 1544 struct lagg_softc *sc = lp->lp_softc; 1545 lagg_lb_porttable(sc, lp); 1546 } 1547 1548 static int 1549 lagg_lb_start(struct lagg_softc *sc, struct mbuf *m) 1550 { 1551 struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc; 1552 struct lagg_port *lp = NULL; 1553 uint32_t p = 0; 1554 int idx; 1555 1556 p = lagg_hashmbuf(m, lb->lb_key); 1557 if ((idx = p % sc->sc_count) >= LAGG_MAX_PORTS) { 1558 m_freem(m); 1559 return (EINVAL); 1560 } 1561 lp = lb->lb_ports[idx]; 1562 1563 /* 1564 * Check the port's link state. This will return the next active 1565 * port if the link is down or the port is NULL. 1566 */ 1567 if ((lp = lagg_link_active(sc, lp)) == NULL) { 1568 m_freem(m); 1569 return (ENOENT); 1570 } 1571 1572 /* Send mbuf */ 1573 return (lagg_enqueue(lp->lp_ifp, m)); 1574 } 1575 1576 static struct mbuf * 1577 lagg_lb_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) 1578 { 1579 struct ifnet *ifp = sc->sc_ifp; 1580 1581 /* Just pass in the packet to our lagg device */ 1582 m->m_pkthdr.rcvif = ifp; 1583 1584 return (m); 1585 } 1586 1587 /* 1588 * 802.3ad LACP 1589 */ 1590 1591 static int 1592 lagg_lacp_attach(struct lagg_softc *sc) 1593 { 1594 struct lagg_port *lp; 1595 int error; 1596 1597 sc->sc_detach = lagg_lacp_detach; 1598 sc->sc_port_create = lacp_port_create; 1599 sc->sc_port_destroy = lacp_port_destroy; 1600 sc->sc_linkstate = lacp_linkstate; 1601 sc->sc_start = lagg_lacp_start; 1602 sc->sc_input = lagg_lacp_input; 1603 sc->sc_init = lacp_init; 1604 sc->sc_stop = lacp_stop; 1605 sc->sc_lladdr = lagg_lacp_lladdr; 1606 sc->sc_req = lacp_req; 1607 sc->sc_portreq = lacp_portreq; 1608 1609 error = lacp_attach(sc); 1610 if (error) 1611 return (error); 1612 1613 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 1614 lacp_port_create(lp); 1615 1616 return (error); 1617 } 1618 1619 static int 1620 lagg_lacp_detach(struct lagg_softc *sc) 1621 { 1622 struct lagg_port *lp; 1623 int error; 1624 1625 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 1626 lacp_port_destroy(lp); 1627 1628 /* unlocking is safe here */ 1629 LAGG_WUNLOCK(sc); 1630 error = lacp_detach(sc); 1631 LAGG_WLOCK(sc); 1632 1633 return (error); 1634 } 1635 1636 static void 1637 lagg_lacp_lladdr(struct lagg_softc *sc) 1638 { 1639 struct lagg_port *lp; 1640 1641 /* purge all the lacp ports */ 1642 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 1643 lacp_port_destroy(lp); 1644 1645 /* add them back in */ 1646 SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 1647 lacp_port_create(lp); 1648 } 1649 1650 static int 1651 lagg_lacp_start(struct lagg_softc *sc, struct mbuf *m) 1652 { 1653 struct lagg_port *lp; 1654 1655 lp = lacp_select_tx_port(sc, m); 1656 if (lp == NULL) { 1657 m_freem(m); 1658 return (EBUSY); 1659 } 1660 1661 /* Send mbuf */ 1662 return (lagg_enqueue(lp->lp_ifp, m)); 1663 } 1664 1665 static struct mbuf * 1666 lagg_lacp_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) 1667 { 1668 struct ifnet *ifp = sc->sc_ifp; 1669 struct ether_header *eh; 1670 u_short etype; 1671 1672 eh = mtod(m, struct ether_header *); 1673 etype = ntohs(eh->ether_type); 1674 1675 /* Tap off LACP control messages */ 1676 if (etype == ETHERTYPE_SLOW) { 1677 lacp_input(lp, m); 1678 return (NULL); 1679 } 1680 1681 /* 1682 * If the port is not collecting or not in the active aggregator then 1683 * free and return. 1684 */ 1685 if ((lp->lp_flags & LAGG_PORT_COLLECTING) == 0 || 1686 lacp_port_isactive(lp) == 0) { 1687 m_freem(m); 1688 return (NULL); 1689 } 1690 1691 m->m_pkthdr.rcvif = ifp; 1692 return (m); 1693 } 1694