1 /* $OpenBSD: if_trunk.c,v 1.30 2007/01/31 06:20:19 reyk Exp $ */ 2 3 /* 4 * Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org> 5 * Copyright (c) 2007 Andrew Thompson <thompsa@FreeBSD.org> 6 * Copyright (c) 2014, 2016 Marcelo Araujo <araujo@FreeBSD.org> 7 * 8 * Permission to use, copy, modify, and distribute this software for any 9 * purpose with or without fee is hereby granted, provided that the above 10 * copyright notice and this permission notice appear in all copies. 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 13 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 14 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 15 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 16 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 17 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 18 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 19 */ 20 21 #include <sys/cdefs.h> 22 __FBSDID("$FreeBSD$"); 23 24 #include "opt_inet.h" 25 #include "opt_inet6.h" 26 #include "opt_kern_tls.h" 27 #include "opt_ratelimit.h" 28 29 #include <sys/param.h> 30 #include <sys/kernel.h> 31 #include <sys/malloc.h> 32 #include <sys/mbuf.h> 33 #include <sys/queue.h> 34 #include <sys/socket.h> 35 #include <sys/sockio.h> 36 #include <sys/sysctl.h> 37 #include <sys/module.h> 38 #include <sys/priv.h> 39 #include <sys/systm.h> 40 #include <sys/proc.h> 41 #include <sys/lock.h> 42 #include <sys/rmlock.h> 43 #include <sys/sx.h> 44 #include <sys/taskqueue.h> 45 #include <sys/eventhandler.h> 46 47 #include <net/ethernet.h> 48 #include <net/if.h> 49 #include <net/if_clone.h> 50 #include <net/if_arp.h> 51 #include <net/if_dl.h> 52 #include <net/if_media.h> 53 #include <net/if_types.h> 54 #include <net/if_var.h> 55 #include <net/if_private.h> 56 #include <net/bpf.h> 57 #include <net/route.h> 58 #include <net/vnet.h> 59 #include <net/infiniband.h> 60 61 #if defined(INET) || defined(INET6) 62 #include <netinet/in.h> 63 #include <netinet/ip.h> 64 #endif 65 #ifdef INET 66 #include <netinet/in_systm.h> 67 #include <netinet/if_ether.h> 68 #endif 69 70 #ifdef INET6 71 #include <netinet/ip6.h> 72 #include <netinet6/in6_var.h> 73 #include <netinet6/in6_ifattach.h> 74 #endif 75 76 #include <net/if_vlan_var.h> 77 #include <net/if_lagg.h> 78 #include <net/ieee8023ad_lacp.h> 79 80 #ifdef INET6 81 /* 82 * XXX: declare here to avoid to include many inet6 related files.. 83 * should be more generalized? 84 */ 85 extern void nd6_setmtu(struct ifnet *); 86 #endif 87 88 #ifdef DEV_NETMAP 89 MODULE_DEPEND(if_lagg, netmap, 1, 1, 1); 90 #endif 91 92 #define LAGG_SX_INIT(_sc) sx_init(&(_sc)->sc_sx, "if_lagg sx") 93 #define LAGG_SX_DESTROY(_sc) sx_destroy(&(_sc)->sc_sx) 94 #define LAGG_XLOCK(_sc) sx_xlock(&(_sc)->sc_sx) 95 #define LAGG_XUNLOCK(_sc) sx_xunlock(&(_sc)->sc_sx) 96 #define LAGG_SXLOCK_ASSERT(_sc) sx_assert(&(_sc)->sc_sx, SA_LOCKED) 97 #define LAGG_XLOCK_ASSERT(_sc) sx_assert(&(_sc)->sc_sx, SA_XLOCKED) 98 99 /* Special flags we should propagate to the lagg ports. */ 100 static struct { 101 int flag; 102 int (*func)(struct ifnet *, int); 103 } lagg_pflags[] = { 104 {IFF_PROMISC, ifpromisc}, 105 {IFF_ALLMULTI, if_allmulti}, 106 {0, NULL} 107 }; 108 109 struct lagg_snd_tag { 110 struct m_snd_tag com; 111 struct m_snd_tag *tag; 112 }; 113 114 VNET_DEFINE(SLIST_HEAD(__trhead, lagg_softc), lagg_list); /* list of laggs */ 115 #define V_lagg_list VNET(lagg_list) 116 VNET_DEFINE_STATIC(struct mtx, lagg_list_mtx); 117 #define V_lagg_list_mtx VNET(lagg_list_mtx) 118 #define LAGG_LIST_LOCK_INIT(x) mtx_init(&V_lagg_list_mtx, \ 119 "if_lagg list", NULL, MTX_DEF) 120 #define LAGG_LIST_LOCK_DESTROY(x) mtx_destroy(&V_lagg_list_mtx) 121 #define LAGG_LIST_LOCK(x) mtx_lock(&V_lagg_list_mtx) 122 #define LAGG_LIST_UNLOCK(x) mtx_unlock(&V_lagg_list_mtx) 123 eventhandler_tag lagg_detach_cookie = NULL; 124 125 static int lagg_clone_create(struct if_clone *, char *, size_t, 126 struct ifc_data *, struct ifnet **); 127 static int lagg_clone_destroy(struct if_clone *, struct ifnet *, uint32_t); 128 VNET_DEFINE_STATIC(struct if_clone *, lagg_cloner); 129 #define V_lagg_cloner VNET(lagg_cloner) 130 static const char laggname[] = "lagg"; 131 static MALLOC_DEFINE(M_LAGG, laggname, "802.3AD Link Aggregation Interface"); 132 133 static void lagg_capabilities(struct lagg_softc *); 134 static int lagg_port_create(struct lagg_softc *, struct ifnet *); 135 static int lagg_port_destroy(struct lagg_port *, int); 136 static struct mbuf *lagg_input_ethernet(struct ifnet *, struct mbuf *); 137 static struct mbuf *lagg_input_infiniband(struct ifnet *, struct mbuf *); 138 static void lagg_linkstate(struct lagg_softc *); 139 static void lagg_port_state(struct ifnet *, int); 140 static int lagg_port_ioctl(struct ifnet *, u_long, caddr_t); 141 static int lagg_port_output(struct ifnet *, struct mbuf *, 142 const struct sockaddr *, struct route *); 143 static void lagg_port_ifdetach(void *arg __unused, struct ifnet *); 144 #ifdef LAGG_PORT_STACKING 145 static int lagg_port_checkstacking(struct lagg_softc *); 146 #endif 147 static void lagg_port2req(struct lagg_port *, struct lagg_reqport *); 148 static void lagg_init(void *); 149 static void lagg_stop(struct lagg_softc *); 150 static int lagg_ioctl(struct ifnet *, u_long, caddr_t); 151 #if defined(KERN_TLS) || defined(RATELIMIT) 152 static int lagg_snd_tag_alloc(struct ifnet *, 153 union if_snd_tag_alloc_params *, 154 struct m_snd_tag **); 155 static int lagg_snd_tag_modify(struct m_snd_tag *, 156 union if_snd_tag_modify_params *); 157 static int lagg_snd_tag_query(struct m_snd_tag *, 158 union if_snd_tag_query_params *); 159 static void lagg_snd_tag_free(struct m_snd_tag *); 160 static struct m_snd_tag *lagg_next_snd_tag(struct m_snd_tag *); 161 static void lagg_ratelimit_query(struct ifnet *, 162 struct if_ratelimit_query_results *); 163 #endif 164 static int lagg_setmulti(struct lagg_port *); 165 static int lagg_clrmulti(struct lagg_port *); 166 static void lagg_setcaps(struct lagg_port *, int cap, int cap2); 167 static int lagg_setflag(struct lagg_port *, int, int, 168 int (*func)(struct ifnet *, int)); 169 static int lagg_setflags(struct lagg_port *, int status); 170 static uint64_t lagg_get_counter(struct ifnet *ifp, ift_counter cnt); 171 static int lagg_transmit_ethernet(struct ifnet *, struct mbuf *); 172 static int lagg_transmit_infiniband(struct ifnet *, struct mbuf *); 173 static void lagg_qflush(struct ifnet *); 174 static int lagg_media_change(struct ifnet *); 175 static void lagg_media_status(struct ifnet *, struct ifmediareq *); 176 static struct lagg_port *lagg_link_active(struct lagg_softc *, 177 struct lagg_port *); 178 179 /* Simple round robin */ 180 static void lagg_rr_attach(struct lagg_softc *); 181 static int lagg_rr_start(struct lagg_softc *, struct mbuf *); 182 static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *, 183 struct mbuf *); 184 185 /* Active failover */ 186 static int lagg_fail_start(struct lagg_softc *, struct mbuf *); 187 static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *, 188 struct mbuf *); 189 190 /* Loadbalancing */ 191 static void lagg_lb_attach(struct lagg_softc *); 192 static void lagg_lb_detach(struct lagg_softc *); 193 static int lagg_lb_port_create(struct lagg_port *); 194 static void lagg_lb_port_destroy(struct lagg_port *); 195 static int lagg_lb_start(struct lagg_softc *, struct mbuf *); 196 static struct mbuf *lagg_lb_input(struct lagg_softc *, struct lagg_port *, 197 struct mbuf *); 198 static int lagg_lb_porttable(struct lagg_softc *, struct lagg_port *); 199 200 /* Broadcast */ 201 static int lagg_bcast_start(struct lagg_softc *, struct mbuf *); 202 static struct mbuf *lagg_bcast_input(struct lagg_softc *, struct lagg_port *, 203 struct mbuf *); 204 205 /* 802.3ad LACP */ 206 static void lagg_lacp_attach(struct lagg_softc *); 207 static void lagg_lacp_detach(struct lagg_softc *); 208 static int lagg_lacp_start(struct lagg_softc *, struct mbuf *); 209 static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *, 210 struct mbuf *); 211 static void lagg_lacp_lladdr(struct lagg_softc *); 212 213 /* lagg protocol table */ 214 static const struct lagg_proto { 215 lagg_proto pr_num; 216 void (*pr_attach)(struct lagg_softc *); 217 void (*pr_detach)(struct lagg_softc *); 218 int (*pr_start)(struct lagg_softc *, struct mbuf *); 219 struct mbuf * (*pr_input)(struct lagg_softc *, struct lagg_port *, 220 struct mbuf *); 221 int (*pr_addport)(struct lagg_port *); 222 void (*pr_delport)(struct lagg_port *); 223 void (*pr_linkstate)(struct lagg_port *); 224 void (*pr_init)(struct lagg_softc *); 225 void (*pr_stop)(struct lagg_softc *); 226 void (*pr_lladdr)(struct lagg_softc *); 227 void (*pr_request)(struct lagg_softc *, void *); 228 void (*pr_portreq)(struct lagg_port *, void *); 229 } lagg_protos[] = { 230 { 231 .pr_num = LAGG_PROTO_NONE 232 }, 233 { 234 .pr_num = LAGG_PROTO_ROUNDROBIN, 235 .pr_attach = lagg_rr_attach, 236 .pr_start = lagg_rr_start, 237 .pr_input = lagg_rr_input, 238 }, 239 { 240 .pr_num = LAGG_PROTO_FAILOVER, 241 .pr_start = lagg_fail_start, 242 .pr_input = lagg_fail_input, 243 }, 244 { 245 .pr_num = LAGG_PROTO_LOADBALANCE, 246 .pr_attach = lagg_lb_attach, 247 .pr_detach = lagg_lb_detach, 248 .pr_start = lagg_lb_start, 249 .pr_input = lagg_lb_input, 250 .pr_addport = lagg_lb_port_create, 251 .pr_delport = lagg_lb_port_destroy, 252 }, 253 { 254 .pr_num = LAGG_PROTO_LACP, 255 .pr_attach = lagg_lacp_attach, 256 .pr_detach = lagg_lacp_detach, 257 .pr_start = lagg_lacp_start, 258 .pr_input = lagg_lacp_input, 259 .pr_addport = lacp_port_create, 260 .pr_delport = lacp_port_destroy, 261 .pr_linkstate = lacp_linkstate, 262 .pr_init = lacp_init, 263 .pr_stop = lacp_stop, 264 .pr_lladdr = lagg_lacp_lladdr, 265 .pr_request = lacp_req, 266 .pr_portreq = lacp_portreq, 267 }, 268 { 269 .pr_num = LAGG_PROTO_BROADCAST, 270 .pr_start = lagg_bcast_start, 271 .pr_input = lagg_bcast_input, 272 }, 273 }; 274 275 SYSCTL_DECL(_net_link); 276 SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 277 "Link Aggregation"); 278 279 /* Allow input on any failover links */ 280 VNET_DEFINE_STATIC(int, lagg_failover_rx_all); 281 #define V_lagg_failover_rx_all VNET(lagg_failover_rx_all) 282 SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW | CTLFLAG_VNET, 283 &VNET_NAME(lagg_failover_rx_all), 0, 284 "Accept input from any interface in a failover lagg"); 285 286 /* Default value for using flowid */ 287 VNET_DEFINE_STATIC(int, def_use_flowid) = 0; 288 #define V_def_use_flowid VNET(def_use_flowid) 289 SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RWTUN, 290 &VNET_NAME(def_use_flowid), 0, 291 "Default setting for using flow id for load sharing"); 292 293 /* Default value for using numa */ 294 VNET_DEFINE_STATIC(int, def_use_numa) = 1; 295 #define V_def_use_numa VNET(def_use_numa) 296 SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_numa, CTLFLAG_RWTUN, 297 &VNET_NAME(def_use_numa), 0, 298 "Use numa to steer flows"); 299 300 /* Default value for flowid shift */ 301 VNET_DEFINE_STATIC(int, def_flowid_shift) = 16; 302 #define V_def_flowid_shift VNET(def_flowid_shift) 303 SYSCTL_INT(_net_link_lagg, OID_AUTO, default_flowid_shift, CTLFLAG_RWTUN, 304 &VNET_NAME(def_flowid_shift), 0, 305 "Default setting for flowid shift for load sharing"); 306 307 static void 308 vnet_lagg_init(const void *unused __unused) 309 { 310 311 LAGG_LIST_LOCK_INIT(); 312 SLIST_INIT(&V_lagg_list); 313 struct if_clone_addreq req = { 314 .create_f = lagg_clone_create, 315 .destroy_f = lagg_clone_destroy, 316 .flags = IFC_F_AUTOUNIT, 317 }; 318 V_lagg_cloner = ifc_attach_cloner(laggname, &req); 319 } 320 VNET_SYSINIT(vnet_lagg_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, 321 vnet_lagg_init, NULL); 322 323 static void 324 vnet_lagg_uninit(const void *unused __unused) 325 { 326 327 ifc_detach_cloner(V_lagg_cloner); 328 LAGG_LIST_LOCK_DESTROY(); 329 } 330 VNET_SYSUNINIT(vnet_lagg_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, 331 vnet_lagg_uninit, NULL); 332 333 static int 334 lagg_modevent(module_t mod, int type, void *data) 335 { 336 337 switch (type) { 338 case MOD_LOAD: 339 lagg_input_ethernet_p = lagg_input_ethernet; 340 lagg_input_infiniband_p = lagg_input_infiniband; 341 lagg_linkstate_p = lagg_port_state; 342 lagg_detach_cookie = EVENTHANDLER_REGISTER( 343 ifnet_departure_event, lagg_port_ifdetach, NULL, 344 EVENTHANDLER_PRI_ANY); 345 break; 346 case MOD_UNLOAD: 347 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 348 lagg_detach_cookie); 349 lagg_input_ethernet_p = NULL; 350 lagg_input_infiniband_p = NULL; 351 lagg_linkstate_p = NULL; 352 break; 353 default: 354 return (EOPNOTSUPP); 355 } 356 return (0); 357 } 358 359 static moduledata_t lagg_mod = { 360 "if_lagg", 361 lagg_modevent, 362 0 363 }; 364 365 DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 366 MODULE_VERSION(if_lagg, 1); 367 MODULE_DEPEND(if_lagg, if_infiniband, 1, 1, 1); 368 369 static void 370 lagg_proto_attach(struct lagg_softc *sc, lagg_proto pr) 371 { 372 373 LAGG_XLOCK_ASSERT(sc); 374 KASSERT(sc->sc_proto == LAGG_PROTO_NONE, ("%s: sc %p has proto", 375 __func__, sc)); 376 377 if (sc->sc_ifflags & IFF_DEBUG) 378 if_printf(sc->sc_ifp, "using proto %u\n", pr); 379 380 if (lagg_protos[pr].pr_attach != NULL) 381 lagg_protos[pr].pr_attach(sc); 382 sc->sc_proto = pr; 383 } 384 385 static void 386 lagg_proto_detach(struct lagg_softc *sc) 387 { 388 lagg_proto pr; 389 390 LAGG_XLOCK_ASSERT(sc); 391 pr = sc->sc_proto; 392 sc->sc_proto = LAGG_PROTO_NONE; 393 394 if (lagg_protos[pr].pr_detach != NULL) 395 lagg_protos[pr].pr_detach(sc); 396 } 397 398 static int 399 lagg_proto_start(struct lagg_softc *sc, struct mbuf *m) 400 { 401 402 return (lagg_protos[sc->sc_proto].pr_start(sc, m)); 403 } 404 405 static struct mbuf * 406 lagg_proto_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) 407 { 408 409 return (lagg_protos[sc->sc_proto].pr_input(sc, lp, m)); 410 } 411 412 static int 413 lagg_proto_addport(struct lagg_softc *sc, struct lagg_port *lp) 414 { 415 416 if (lagg_protos[sc->sc_proto].pr_addport == NULL) 417 return (0); 418 else 419 return (lagg_protos[sc->sc_proto].pr_addport(lp)); 420 } 421 422 static void 423 lagg_proto_delport(struct lagg_softc *sc, struct lagg_port *lp) 424 { 425 426 if (lagg_protos[sc->sc_proto].pr_delport != NULL) 427 lagg_protos[sc->sc_proto].pr_delport(lp); 428 } 429 430 static void 431 lagg_proto_linkstate(struct lagg_softc *sc, struct lagg_port *lp) 432 { 433 434 if (lagg_protos[sc->sc_proto].pr_linkstate != NULL) 435 lagg_protos[sc->sc_proto].pr_linkstate(lp); 436 } 437 438 static void 439 lagg_proto_init(struct lagg_softc *sc) 440 { 441 442 if (lagg_protos[sc->sc_proto].pr_init != NULL) 443 lagg_protos[sc->sc_proto].pr_init(sc); 444 } 445 446 static void 447 lagg_proto_stop(struct lagg_softc *sc) 448 { 449 450 if (lagg_protos[sc->sc_proto].pr_stop != NULL) 451 lagg_protos[sc->sc_proto].pr_stop(sc); 452 } 453 454 static void 455 lagg_proto_lladdr(struct lagg_softc *sc) 456 { 457 458 if (lagg_protos[sc->sc_proto].pr_lladdr != NULL) 459 lagg_protos[sc->sc_proto].pr_lladdr(sc); 460 } 461 462 static void 463 lagg_proto_request(struct lagg_softc *sc, void *v) 464 { 465 466 if (lagg_protos[sc->sc_proto].pr_request != NULL) 467 lagg_protos[sc->sc_proto].pr_request(sc, v); 468 } 469 470 static void 471 lagg_proto_portreq(struct lagg_softc *sc, struct lagg_port *lp, void *v) 472 { 473 474 if (lagg_protos[sc->sc_proto].pr_portreq != NULL) 475 lagg_protos[sc->sc_proto].pr_portreq(lp, v); 476 } 477 478 /* 479 * This routine is run via an vlan 480 * config EVENT 481 */ 482 static void 483 lagg_register_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag) 484 { 485 struct lagg_softc *sc = ifp->if_softc; 486 struct lagg_port *lp; 487 488 if (ifp->if_softc != arg) /* Not our event */ 489 return; 490 491 LAGG_XLOCK(sc); 492 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 493 EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag); 494 LAGG_XUNLOCK(sc); 495 } 496 497 /* 498 * This routine is run via an vlan 499 * unconfig EVENT 500 */ 501 static void 502 lagg_unregister_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag) 503 { 504 struct lagg_softc *sc = ifp->if_softc; 505 struct lagg_port *lp; 506 507 if (ifp->if_softc != arg) /* Not our event */ 508 return; 509 510 LAGG_XLOCK(sc); 511 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 512 EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag); 513 LAGG_XUNLOCK(sc); 514 } 515 516 static int 517 lagg_clone_create(struct if_clone *ifc, char *name, size_t len, 518 struct ifc_data *ifd, struct ifnet **ifpp) 519 { 520 struct iflaggparam iflp; 521 struct lagg_softc *sc; 522 struct ifnet *ifp; 523 int if_type; 524 int error; 525 static const uint8_t eaddr[LAGG_ADDR_LEN]; 526 527 if (ifd->params != NULL) { 528 error = ifc_copyin(ifd, &iflp, sizeof(iflp)); 529 if (error) 530 return (error); 531 532 switch (iflp.lagg_type) { 533 case LAGG_TYPE_ETHERNET: 534 if_type = IFT_ETHER; 535 break; 536 case LAGG_TYPE_INFINIBAND: 537 if_type = IFT_INFINIBAND; 538 break; 539 default: 540 return (EINVAL); 541 } 542 } else { 543 if_type = IFT_ETHER; 544 } 545 546 sc = malloc(sizeof(*sc), M_LAGG, M_WAITOK|M_ZERO); 547 ifp = sc->sc_ifp = if_alloc(if_type); 548 if (ifp == NULL) { 549 free(sc, M_LAGG); 550 return (ENOSPC); 551 } 552 LAGG_SX_INIT(sc); 553 554 mtx_init(&sc->sc_mtx, "lagg-mtx", NULL, MTX_DEF); 555 callout_init_mtx(&sc->sc_watchdog, &sc->sc_mtx, 0); 556 557 LAGG_XLOCK(sc); 558 if (V_def_use_flowid) 559 sc->sc_opts |= LAGG_OPT_USE_FLOWID; 560 if (V_def_use_numa) 561 sc->sc_opts |= LAGG_OPT_USE_NUMA; 562 sc->flowid_shift = V_def_flowid_shift; 563 564 /* Hash all layers by default */ 565 sc->sc_flags = MBUF_HASHFLAG_L2|MBUF_HASHFLAG_L3|MBUF_HASHFLAG_L4; 566 567 lagg_proto_attach(sc, LAGG_PROTO_DEFAULT); 568 569 CK_SLIST_INIT(&sc->sc_ports); 570 571 switch (if_type) { 572 case IFT_ETHER: 573 /* Initialise pseudo media types */ 574 ifmedia_init(&sc->sc_media, 0, lagg_media_change, 575 lagg_media_status); 576 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); 577 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); 578 579 if_initname(ifp, laggname, ifd->unit); 580 ifp->if_transmit = lagg_transmit_ethernet; 581 break; 582 case IFT_INFINIBAND: 583 if_initname(ifp, laggname, ifd->unit); 584 ifp->if_transmit = lagg_transmit_infiniband; 585 break; 586 default: 587 break; 588 } 589 ifp->if_softc = sc; 590 ifp->if_qflush = lagg_qflush; 591 ifp->if_init = lagg_init; 592 ifp->if_ioctl = lagg_ioctl; 593 ifp->if_get_counter = lagg_get_counter; 594 ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST; 595 #if defined(KERN_TLS) || defined(RATELIMIT) 596 ifp->if_snd_tag_alloc = lagg_snd_tag_alloc; 597 ifp->if_ratelimit_query = lagg_ratelimit_query; 598 #endif 599 ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS; 600 601 /* 602 * Attach as an ordinary ethernet device, children will be attached 603 * as special device IFT_IEEE8023ADLAG or IFT_INFINIBANDLAG. 604 */ 605 switch (if_type) { 606 case IFT_ETHER: 607 ether_ifattach(ifp, eaddr); 608 break; 609 case IFT_INFINIBAND: 610 infiniband_ifattach(ifp, eaddr, sc->sc_bcast_addr); 611 break; 612 default: 613 break; 614 } 615 616 sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, 617 lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST); 618 sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, 619 lagg_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST); 620 621 /* Insert into the global list of laggs */ 622 LAGG_LIST_LOCK(); 623 SLIST_INSERT_HEAD(&V_lagg_list, sc, sc_entries); 624 LAGG_LIST_UNLOCK(); 625 LAGG_XUNLOCK(sc); 626 *ifpp = ifp; 627 628 return (0); 629 } 630 631 static int 632 lagg_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags) 633 { 634 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; 635 struct lagg_port *lp; 636 637 LAGG_XLOCK(sc); 638 sc->sc_destroying = 1; 639 lagg_stop(sc); 640 ifp->if_flags &= ~IFF_UP; 641 642 EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach); 643 EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach); 644 645 /* Shutdown and remove lagg ports */ 646 while ((lp = CK_SLIST_FIRST(&sc->sc_ports)) != NULL) 647 lagg_port_destroy(lp, 1); 648 649 /* Unhook the aggregation protocol */ 650 lagg_proto_detach(sc); 651 LAGG_XUNLOCK(sc); 652 653 switch (ifp->if_type) { 654 case IFT_ETHER: 655 ifmedia_removeall(&sc->sc_media); 656 ether_ifdetach(ifp); 657 break; 658 case IFT_INFINIBAND: 659 infiniband_ifdetach(ifp); 660 break; 661 default: 662 break; 663 } 664 if_free(ifp); 665 666 LAGG_LIST_LOCK(); 667 SLIST_REMOVE(&V_lagg_list, sc, lagg_softc, sc_entries); 668 LAGG_LIST_UNLOCK(); 669 670 mtx_destroy(&sc->sc_mtx); 671 LAGG_SX_DESTROY(sc); 672 free(sc, M_LAGG); 673 674 return (0); 675 } 676 677 static void 678 lagg_capabilities(struct lagg_softc *sc) 679 { 680 struct lagg_port *lp; 681 int cap, cap2, ena, ena2, pena, pena2; 682 uint64_t hwa; 683 struct ifnet_hw_tsomax hw_tsomax; 684 685 LAGG_XLOCK_ASSERT(sc); 686 687 /* Get common enabled capabilities for the lagg ports */ 688 ena = ena2 = ~0; 689 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 690 ena &= lp->lp_ifp->if_capenable; 691 ena2 &= lp->lp_ifp->if_capenable2; 692 } 693 if (CK_SLIST_FIRST(&sc->sc_ports) == NULL) 694 ena = ena2 = 0; 695 696 /* 697 * Apply common enabled capabilities back to the lagg ports. 698 * May require several iterations if they are dependent. 699 */ 700 do { 701 pena = ena; 702 pena2 = ena2; 703 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 704 lagg_setcaps(lp, ena, ena2); 705 ena &= lp->lp_ifp->if_capenable; 706 ena2 &= lp->lp_ifp->if_capenable2; 707 } 708 } while (pena != ena || pena2 != ena2); 709 710 /* Get other capabilities from the lagg ports */ 711 cap = cap2 = ~0; 712 hwa = ~(uint64_t)0; 713 memset(&hw_tsomax, 0, sizeof(hw_tsomax)); 714 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 715 cap &= lp->lp_ifp->if_capabilities; 716 cap2 &= lp->lp_ifp->if_capabilities2; 717 hwa &= lp->lp_ifp->if_hwassist; 718 if_hw_tsomax_common(lp->lp_ifp, &hw_tsomax); 719 } 720 if (CK_SLIST_FIRST(&sc->sc_ports) == NULL) 721 cap = cap2 = hwa = 0; 722 723 if (sc->sc_ifp->if_capabilities != cap || 724 sc->sc_ifp->if_capenable != ena || 725 sc->sc_ifp->if_capenable2 != ena2 || 726 sc->sc_ifp->if_hwassist != hwa || 727 if_hw_tsomax_update(sc->sc_ifp, &hw_tsomax) != 0) { 728 sc->sc_ifp->if_capabilities = cap; 729 sc->sc_ifp->if_capabilities2 = cap2; 730 sc->sc_ifp->if_capenable = ena; 731 sc->sc_ifp->if_capenable2 = ena2; 732 sc->sc_ifp->if_hwassist = hwa; 733 getmicrotime(&sc->sc_ifp->if_lastchange); 734 735 if (sc->sc_ifflags & IFF_DEBUG) 736 if_printf(sc->sc_ifp, 737 "capabilities 0x%08x enabled 0x%08x\n", cap, ena); 738 } 739 } 740 741 static int 742 lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) 743 { 744 struct lagg_softc *sc_ptr; 745 struct lagg_port *lp, *tlp; 746 struct ifreq ifr; 747 int error, i, oldmtu; 748 int if_type; 749 uint64_t *pval; 750 751 LAGG_XLOCK_ASSERT(sc); 752 753 if (sc->sc_ifp == ifp) { 754 if_printf(sc->sc_ifp, 755 "cannot add a lagg to itself as a port\n"); 756 return (EINVAL); 757 } 758 759 if (sc->sc_destroying == 1) 760 return (ENXIO); 761 762 /* Limit the maximal number of lagg ports */ 763 if (sc->sc_count >= LAGG_MAX_PORTS) 764 return (ENOSPC); 765 766 /* Check if port has already been associated to a lagg */ 767 if (ifp->if_lagg != NULL) { 768 /* Port is already in the current lagg? */ 769 lp = (struct lagg_port *)ifp->if_lagg; 770 if (lp->lp_softc == sc) 771 return (EEXIST); 772 return (EBUSY); 773 } 774 775 switch (sc->sc_ifp->if_type) { 776 case IFT_ETHER: 777 /* XXX Disallow non-ethernet interfaces (this should be any of 802) */ 778 if (ifp->if_type != IFT_ETHER && ifp->if_type != IFT_L2VLAN) 779 return (EPROTONOSUPPORT); 780 if_type = IFT_IEEE8023ADLAG; 781 break; 782 case IFT_INFINIBAND: 783 /* XXX Disallow non-infiniband interfaces */ 784 if (ifp->if_type != IFT_INFINIBAND) 785 return (EPROTONOSUPPORT); 786 if_type = IFT_INFINIBANDLAG; 787 break; 788 default: 789 break; 790 } 791 792 /* Allow the first Ethernet member to define the MTU */ 793 oldmtu = -1; 794 if (CK_SLIST_EMPTY(&sc->sc_ports)) { 795 sc->sc_ifp->if_mtu = ifp->if_mtu; 796 } else if (sc->sc_ifp->if_mtu != ifp->if_mtu) { 797 if (ifp->if_ioctl == NULL) { 798 if_printf(sc->sc_ifp, "cannot change MTU for %s\n", 799 ifp->if_xname); 800 return (EINVAL); 801 } 802 oldmtu = ifp->if_mtu; 803 strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name)); 804 ifr.ifr_mtu = sc->sc_ifp->if_mtu; 805 error = (*ifp->if_ioctl)(ifp, SIOCSIFMTU, (caddr_t)&ifr); 806 if (error != 0) { 807 if_printf(sc->sc_ifp, "invalid MTU for %s\n", 808 ifp->if_xname); 809 return (error); 810 } 811 ifr.ifr_mtu = oldmtu; 812 } 813 814 lp = malloc(sizeof(struct lagg_port), M_LAGG, M_WAITOK|M_ZERO); 815 lp->lp_softc = sc; 816 817 /* Check if port is a stacked lagg */ 818 LAGG_LIST_LOCK(); 819 SLIST_FOREACH(sc_ptr, &V_lagg_list, sc_entries) { 820 if (ifp == sc_ptr->sc_ifp) { 821 LAGG_LIST_UNLOCK(); 822 free(lp, M_LAGG); 823 if (oldmtu != -1) 824 (*ifp->if_ioctl)(ifp, SIOCSIFMTU, 825 (caddr_t)&ifr); 826 return (EINVAL); 827 /* XXX disable stacking for the moment, its untested */ 828 #ifdef LAGG_PORT_STACKING 829 lp->lp_flags |= LAGG_PORT_STACK; 830 if (lagg_port_checkstacking(sc_ptr) >= 831 LAGG_MAX_STACKING) { 832 LAGG_LIST_UNLOCK(); 833 free(lp, M_LAGG); 834 if (oldmtu != -1) 835 (*ifp->if_ioctl)(ifp, SIOCSIFMTU, 836 (caddr_t)&ifr); 837 return (E2BIG); 838 } 839 #endif 840 } 841 } 842 LAGG_LIST_UNLOCK(); 843 844 if_ref(ifp); 845 lp->lp_ifp = ifp; 846 847 bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ifp->if_addrlen); 848 lp->lp_ifcapenable = ifp->if_capenable; 849 if (CK_SLIST_EMPTY(&sc->sc_ports)) { 850 bcopy(IF_LLADDR(ifp), IF_LLADDR(sc->sc_ifp), ifp->if_addrlen); 851 lagg_proto_lladdr(sc); 852 EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp); 853 } else { 854 if_setlladdr(ifp, IF_LLADDR(sc->sc_ifp), ifp->if_addrlen); 855 } 856 lagg_setflags(lp, 1); 857 858 if (CK_SLIST_EMPTY(&sc->sc_ports)) 859 sc->sc_primary = lp; 860 861 /* Change the interface type */ 862 lp->lp_iftype = ifp->if_type; 863 ifp->if_type = if_type; 864 ifp->if_lagg = lp; 865 lp->lp_ioctl = ifp->if_ioctl; 866 ifp->if_ioctl = lagg_port_ioctl; 867 lp->lp_output = ifp->if_output; 868 ifp->if_output = lagg_port_output; 869 870 /* Read port counters */ 871 pval = lp->port_counters.val; 872 for (i = 0; i < IFCOUNTERS; i++, pval++) 873 *pval = ifp->if_get_counter(ifp, i); 874 875 /* 876 * Insert into the list of ports. 877 * Keep ports sorted by if_index. It is handy, when configuration 878 * is predictable and `ifconfig laggN create ...` command 879 * will lead to the same result each time. 880 */ 881 CK_SLIST_FOREACH(tlp, &sc->sc_ports, lp_entries) { 882 if (tlp->lp_ifp->if_index < ifp->if_index && ( 883 CK_SLIST_NEXT(tlp, lp_entries) == NULL || 884 ((struct lagg_port*)CK_SLIST_NEXT(tlp, lp_entries))->lp_ifp->if_index > 885 ifp->if_index)) 886 break; 887 } 888 if (tlp != NULL) 889 CK_SLIST_INSERT_AFTER(tlp, lp, lp_entries); 890 else 891 CK_SLIST_INSERT_HEAD(&sc->sc_ports, lp, lp_entries); 892 sc->sc_count++; 893 894 lagg_setmulti(lp); 895 896 if ((error = lagg_proto_addport(sc, lp)) != 0) { 897 /* Remove the port, without calling pr_delport. */ 898 lagg_port_destroy(lp, 0); 899 if (oldmtu != -1) 900 (*ifp->if_ioctl)(ifp, SIOCSIFMTU, (caddr_t)&ifr); 901 return (error); 902 } 903 904 /* Update lagg capabilities */ 905 lagg_capabilities(sc); 906 lagg_linkstate(sc); 907 908 return (0); 909 } 910 911 #ifdef LAGG_PORT_STACKING 912 static int 913 lagg_port_checkstacking(struct lagg_softc *sc) 914 { 915 struct lagg_softc *sc_ptr; 916 struct lagg_port *lp; 917 int m = 0; 918 919 LAGG_SXLOCK_ASSERT(sc); 920 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 921 if (lp->lp_flags & LAGG_PORT_STACK) { 922 sc_ptr = (struct lagg_softc *)lp->lp_ifp->if_softc; 923 m = MAX(m, lagg_port_checkstacking(sc_ptr)); 924 } 925 } 926 927 return (m + 1); 928 } 929 #endif 930 931 static void 932 lagg_port_destroy_cb(epoch_context_t ec) 933 { 934 struct lagg_port *lp; 935 struct ifnet *ifp; 936 937 lp = __containerof(ec, struct lagg_port, lp_epoch_ctx); 938 ifp = lp->lp_ifp; 939 940 if_rele(ifp); 941 free(lp, M_LAGG); 942 } 943 944 static int 945 lagg_port_destroy(struct lagg_port *lp, int rundelport) 946 { 947 struct lagg_softc *sc = lp->lp_softc; 948 struct lagg_port *lp_ptr, *lp0; 949 struct ifnet *ifp = lp->lp_ifp; 950 uint64_t *pval, vdiff; 951 int i; 952 953 LAGG_XLOCK_ASSERT(sc); 954 955 if (rundelport) 956 lagg_proto_delport(sc, lp); 957 958 if (lp->lp_detaching == 0) 959 lagg_clrmulti(lp); 960 961 /* Restore interface */ 962 ifp->if_type = lp->lp_iftype; 963 ifp->if_ioctl = lp->lp_ioctl; 964 ifp->if_output = lp->lp_output; 965 ifp->if_lagg = NULL; 966 967 /* Update detached port counters */ 968 pval = lp->port_counters.val; 969 for (i = 0; i < IFCOUNTERS; i++, pval++) { 970 vdiff = ifp->if_get_counter(ifp, i) - *pval; 971 sc->detached_counters.val[i] += vdiff; 972 } 973 974 /* Finally, remove the port from the lagg */ 975 CK_SLIST_REMOVE(&sc->sc_ports, lp, lagg_port, lp_entries); 976 sc->sc_count--; 977 978 /* Update the primary interface */ 979 if (lp == sc->sc_primary) { 980 uint8_t lladdr[LAGG_ADDR_LEN]; 981 982 if ((lp0 = CK_SLIST_FIRST(&sc->sc_ports)) == NULL) 983 bzero(&lladdr, LAGG_ADDR_LEN); 984 else 985 bcopy(lp0->lp_lladdr, lladdr, LAGG_ADDR_LEN); 986 sc->sc_primary = lp0; 987 if (sc->sc_destroying == 0) { 988 bcopy(lladdr, IF_LLADDR(sc->sc_ifp), sc->sc_ifp->if_addrlen); 989 lagg_proto_lladdr(sc); 990 EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp); 991 992 /* 993 * Update lladdr for each port (new primary needs update 994 * as well, to switch from old lladdr to its 'real' one). 995 * We can skip this if the lagg is being destroyed. 996 */ 997 CK_SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries) 998 if_setlladdr(lp_ptr->lp_ifp, lladdr, 999 lp_ptr->lp_ifp->if_addrlen); 1000 } 1001 } 1002 1003 if (lp->lp_ifflags) 1004 if_printf(ifp, "%s: lp_ifflags unclean\n", __func__); 1005 1006 if (lp->lp_detaching == 0) { 1007 lagg_setflags(lp, 0); 1008 lagg_setcaps(lp, lp->lp_ifcapenable, lp->lp_ifcapenable2); 1009 if_setlladdr(ifp, lp->lp_lladdr, ifp->if_addrlen); 1010 } 1011 1012 /* 1013 * free port and release it's ifnet reference after a grace period has 1014 * elapsed. 1015 */ 1016 NET_EPOCH_CALL(lagg_port_destroy_cb, &lp->lp_epoch_ctx); 1017 /* Update lagg capabilities */ 1018 lagg_capabilities(sc); 1019 lagg_linkstate(sc); 1020 1021 return (0); 1022 } 1023 1024 static int 1025 lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1026 { 1027 struct epoch_tracker et; 1028 struct lagg_reqport *rp = (struct lagg_reqport *)data; 1029 struct lagg_softc *sc; 1030 struct lagg_port *lp = NULL; 1031 int error = 0; 1032 1033 /* Should be checked by the caller */ 1034 switch (ifp->if_type) { 1035 case IFT_IEEE8023ADLAG: 1036 case IFT_INFINIBANDLAG: 1037 if ((lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL) 1038 goto fallback; 1039 break; 1040 default: 1041 goto fallback; 1042 } 1043 1044 switch (cmd) { 1045 case SIOCGLAGGPORT: 1046 if (rp->rp_portname[0] == '\0' || 1047 ifunit(rp->rp_portname) != ifp) { 1048 error = EINVAL; 1049 break; 1050 } 1051 1052 NET_EPOCH_ENTER(et); 1053 if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) { 1054 error = ENOENT; 1055 NET_EPOCH_EXIT(et); 1056 break; 1057 } 1058 1059 lagg_port2req(lp, rp); 1060 NET_EPOCH_EXIT(et); 1061 break; 1062 1063 case SIOCSIFCAP: 1064 case SIOCSIFCAPNV: 1065 if (lp->lp_ioctl == NULL) { 1066 error = EINVAL; 1067 break; 1068 } 1069 error = (*lp->lp_ioctl)(ifp, cmd, data); 1070 if (error) 1071 break; 1072 1073 /* Update lagg interface capabilities */ 1074 LAGG_XLOCK(sc); 1075 lagg_capabilities(sc); 1076 LAGG_XUNLOCK(sc); 1077 VLAN_CAPABILITIES(sc->sc_ifp); 1078 break; 1079 1080 case SIOCSIFMTU: 1081 /* Do not allow the MTU to be changed once joined */ 1082 error = EINVAL; 1083 break; 1084 1085 default: 1086 goto fallback; 1087 } 1088 1089 return (error); 1090 1091 fallback: 1092 if (lp != NULL && lp->lp_ioctl != NULL) 1093 return ((*lp->lp_ioctl)(ifp, cmd, data)); 1094 1095 return (EINVAL); 1096 } 1097 1098 /* 1099 * Requests counter @cnt data. 1100 * 1101 * Counter value is calculated the following way: 1102 * 1) for each port, sum difference between current and "initial" measurements. 1103 * 2) add lagg logical interface counters. 1104 * 3) add data from detached_counters array. 1105 * 1106 * We also do the following things on ports attach/detach: 1107 * 1) On port attach we store all counters it has into port_counter array. 1108 * 2) On port detach we add the different between "initial" and 1109 * current counters data to detached_counters array. 1110 */ 1111 static uint64_t 1112 lagg_get_counter(struct ifnet *ifp, ift_counter cnt) 1113 { 1114 struct epoch_tracker et; 1115 struct lagg_softc *sc; 1116 struct lagg_port *lp; 1117 struct ifnet *lpifp; 1118 uint64_t newval, oldval, vsum; 1119 1120 /* Revise this when we've got non-generic counters. */ 1121 KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt)); 1122 1123 sc = (struct lagg_softc *)ifp->if_softc; 1124 1125 vsum = 0; 1126 NET_EPOCH_ENTER(et); 1127 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 1128 /* Saved attached value */ 1129 oldval = lp->port_counters.val[cnt]; 1130 /* current value */ 1131 lpifp = lp->lp_ifp; 1132 newval = lpifp->if_get_counter(lpifp, cnt); 1133 /* Calculate diff and save new */ 1134 vsum += newval - oldval; 1135 } 1136 NET_EPOCH_EXIT(et); 1137 1138 /* 1139 * Add counter data which might be added by upper 1140 * layer protocols operating on logical interface. 1141 */ 1142 vsum += if_get_counter_default(ifp, cnt); 1143 1144 /* 1145 * Add counter data from detached ports counters 1146 */ 1147 vsum += sc->detached_counters.val[cnt]; 1148 1149 return (vsum); 1150 } 1151 1152 /* 1153 * For direct output to child ports. 1154 */ 1155 static int 1156 lagg_port_output(struct ifnet *ifp, struct mbuf *m, 1157 const struct sockaddr *dst, struct route *ro) 1158 { 1159 struct lagg_port *lp = ifp->if_lagg; 1160 1161 switch (dst->sa_family) { 1162 case pseudo_AF_HDRCMPLT: 1163 case AF_UNSPEC: 1164 if (lp != NULL) 1165 return ((*lp->lp_output)(ifp, m, dst, ro)); 1166 } 1167 1168 /* drop any other frames */ 1169 m_freem(m); 1170 return (ENETDOWN); 1171 } 1172 1173 static void 1174 lagg_port_ifdetach(void *arg __unused, struct ifnet *ifp) 1175 { 1176 struct lagg_port *lp; 1177 struct lagg_softc *sc; 1178 1179 if ((lp = ifp->if_lagg) == NULL) 1180 return; 1181 /* If the ifnet is just being renamed, don't do anything. */ 1182 if (ifp->if_flags & IFF_RENAMING) 1183 return; 1184 1185 sc = lp->lp_softc; 1186 1187 LAGG_XLOCK(sc); 1188 lp->lp_detaching = 1; 1189 lagg_port_destroy(lp, 1); 1190 LAGG_XUNLOCK(sc); 1191 VLAN_CAPABILITIES(sc->sc_ifp); 1192 } 1193 1194 static void 1195 lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp) 1196 { 1197 struct lagg_softc *sc = lp->lp_softc; 1198 1199 strlcpy(rp->rp_ifname, sc->sc_ifname, sizeof(rp->rp_ifname)); 1200 strlcpy(rp->rp_portname, lp->lp_ifp->if_xname, sizeof(rp->rp_portname)); 1201 rp->rp_prio = lp->lp_prio; 1202 rp->rp_flags = lp->lp_flags; 1203 lagg_proto_portreq(sc, lp, &rp->rp_psc); 1204 1205 /* Add protocol specific flags */ 1206 switch (sc->sc_proto) { 1207 case LAGG_PROTO_FAILOVER: 1208 if (lp == sc->sc_primary) 1209 rp->rp_flags |= LAGG_PORT_MASTER; 1210 if (lp == lagg_link_active(sc, sc->sc_primary)) 1211 rp->rp_flags |= LAGG_PORT_ACTIVE; 1212 break; 1213 1214 case LAGG_PROTO_ROUNDROBIN: 1215 case LAGG_PROTO_LOADBALANCE: 1216 case LAGG_PROTO_BROADCAST: 1217 if (LAGG_PORTACTIVE(lp)) 1218 rp->rp_flags |= LAGG_PORT_ACTIVE; 1219 break; 1220 1221 case LAGG_PROTO_LACP: 1222 /* LACP has a different definition of active */ 1223 if (lacp_isactive(lp)) 1224 rp->rp_flags |= LAGG_PORT_ACTIVE; 1225 if (lacp_iscollecting(lp)) 1226 rp->rp_flags |= LAGG_PORT_COLLECTING; 1227 if (lacp_isdistributing(lp)) 1228 rp->rp_flags |= LAGG_PORT_DISTRIBUTING; 1229 break; 1230 } 1231 1232 } 1233 1234 static void 1235 lagg_watchdog_infiniband(void *arg) 1236 { 1237 struct epoch_tracker et; 1238 struct lagg_softc *sc; 1239 struct lagg_port *lp; 1240 struct ifnet *ifp; 1241 struct ifnet *lp_ifp; 1242 1243 sc = arg; 1244 1245 /* 1246 * Because infiniband nodes have a fixed MAC address, which is 1247 * generated by the so-called GID, we need to regularly update 1248 * the link level address of the parent lagg<N> device when 1249 * the active port changes. Possibly we could piggy-back on 1250 * link up/down events aswell, but using a timer also provides 1251 * a guarantee against too frequent events. This operation 1252 * does not have to be atomic. 1253 */ 1254 NET_EPOCH_ENTER(et); 1255 lp = lagg_link_active(sc, sc->sc_primary); 1256 if (lp != NULL) { 1257 ifp = sc->sc_ifp; 1258 lp_ifp = lp->lp_ifp; 1259 1260 if (ifp != NULL && lp_ifp != NULL && 1261 (memcmp(IF_LLADDR(ifp), IF_LLADDR(lp_ifp), ifp->if_addrlen) != 0 || 1262 memcmp(sc->sc_bcast_addr, lp_ifp->if_broadcastaddr, ifp->if_addrlen) != 0)) { 1263 memcpy(IF_LLADDR(ifp), IF_LLADDR(lp_ifp), ifp->if_addrlen); 1264 memcpy(sc->sc_bcast_addr, lp_ifp->if_broadcastaddr, ifp->if_addrlen); 1265 1266 CURVNET_SET(ifp->if_vnet); 1267 EVENTHANDLER_INVOKE(iflladdr_event, ifp); 1268 CURVNET_RESTORE(); 1269 } 1270 } 1271 NET_EPOCH_EXIT(et); 1272 1273 callout_reset(&sc->sc_watchdog, hz, &lagg_watchdog_infiniband, arg); 1274 } 1275 1276 static void 1277 lagg_init(void *xsc) 1278 { 1279 struct lagg_softc *sc = (struct lagg_softc *)xsc; 1280 struct ifnet *ifp = sc->sc_ifp; 1281 struct lagg_port *lp; 1282 1283 LAGG_XLOCK(sc); 1284 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1285 LAGG_XUNLOCK(sc); 1286 return; 1287 } 1288 1289 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1290 1291 /* 1292 * Update the port lladdrs if needed. 1293 * This might be if_setlladdr() notification 1294 * that lladdr has been changed. 1295 */ 1296 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 1297 if (memcmp(IF_LLADDR(ifp), IF_LLADDR(lp->lp_ifp), 1298 ifp->if_addrlen) != 0) 1299 if_setlladdr(lp->lp_ifp, IF_LLADDR(ifp), ifp->if_addrlen); 1300 } 1301 1302 lagg_proto_init(sc); 1303 1304 if (ifp->if_type == IFT_INFINIBAND) { 1305 mtx_lock(&sc->sc_mtx); 1306 lagg_watchdog_infiniband(sc); 1307 mtx_unlock(&sc->sc_mtx); 1308 } 1309 1310 LAGG_XUNLOCK(sc); 1311 } 1312 1313 static void 1314 lagg_stop(struct lagg_softc *sc) 1315 { 1316 struct ifnet *ifp = sc->sc_ifp; 1317 1318 LAGG_XLOCK_ASSERT(sc); 1319 1320 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1321 return; 1322 1323 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1324 1325 lagg_proto_stop(sc); 1326 1327 mtx_lock(&sc->sc_mtx); 1328 callout_stop(&sc->sc_watchdog); 1329 mtx_unlock(&sc->sc_mtx); 1330 1331 callout_drain(&sc->sc_watchdog); 1332 } 1333 1334 static int 1335 lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1336 { 1337 struct epoch_tracker et; 1338 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; 1339 struct lagg_reqall *ra = (struct lagg_reqall *)data; 1340 struct lagg_reqopts *ro = (struct lagg_reqopts *)data; 1341 struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf; 1342 struct lagg_reqflags *rf = (struct lagg_reqflags *)data; 1343 struct ifreq *ifr = (struct ifreq *)data; 1344 struct lagg_port *lp; 1345 struct ifnet *tpif; 1346 struct thread *td = curthread; 1347 char *buf, *outbuf; 1348 int count, buflen, len, error = 0, oldmtu; 1349 1350 bzero(&rpbuf, sizeof(rpbuf)); 1351 1352 /* XXX: This can race with lagg_clone_destroy. */ 1353 1354 switch (cmd) { 1355 case SIOCGLAGG: 1356 LAGG_XLOCK(sc); 1357 buflen = sc->sc_count * sizeof(struct lagg_reqport); 1358 outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO); 1359 ra->ra_proto = sc->sc_proto; 1360 lagg_proto_request(sc, &ra->ra_psc); 1361 count = 0; 1362 buf = outbuf; 1363 len = min(ra->ra_size, buflen); 1364 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 1365 if (len < sizeof(rpbuf)) 1366 break; 1367 1368 lagg_port2req(lp, &rpbuf); 1369 memcpy(buf, &rpbuf, sizeof(rpbuf)); 1370 count++; 1371 buf += sizeof(rpbuf); 1372 len -= sizeof(rpbuf); 1373 } 1374 LAGG_XUNLOCK(sc); 1375 ra->ra_ports = count; 1376 ra->ra_size = count * sizeof(rpbuf); 1377 error = copyout(outbuf, ra->ra_port, ra->ra_size); 1378 free(outbuf, M_TEMP); 1379 break; 1380 case SIOCSLAGG: 1381 error = priv_check(td, PRIV_NET_LAGG); 1382 if (error) 1383 break; 1384 if (ra->ra_proto >= LAGG_PROTO_MAX) { 1385 error = EPROTONOSUPPORT; 1386 break; 1387 } 1388 /* Infiniband only supports the failover protocol. */ 1389 if (ra->ra_proto != LAGG_PROTO_FAILOVER && 1390 ifp->if_type == IFT_INFINIBAND) { 1391 error = EPROTONOSUPPORT; 1392 break; 1393 } 1394 LAGG_XLOCK(sc); 1395 lagg_proto_detach(sc); 1396 lagg_proto_attach(sc, ra->ra_proto); 1397 LAGG_XUNLOCK(sc); 1398 break; 1399 case SIOCGLAGGOPTS: 1400 LAGG_XLOCK(sc); 1401 ro->ro_opts = sc->sc_opts; 1402 if (sc->sc_proto == LAGG_PROTO_LACP) { 1403 struct lacp_softc *lsc; 1404 1405 lsc = (struct lacp_softc *)sc->sc_psc; 1406 if (lsc->lsc_debug.lsc_tx_test != 0) 1407 ro->ro_opts |= LAGG_OPT_LACP_TXTEST; 1408 if (lsc->lsc_debug.lsc_rx_test != 0) 1409 ro->ro_opts |= LAGG_OPT_LACP_RXTEST; 1410 if (lsc->lsc_strict_mode != 0) 1411 ro->ro_opts |= LAGG_OPT_LACP_STRICT; 1412 if (lsc->lsc_fast_timeout != 0) 1413 ro->ro_opts |= LAGG_OPT_LACP_FAST_TIMO; 1414 1415 ro->ro_active = sc->sc_active; 1416 } else { 1417 ro->ro_active = 0; 1418 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 1419 ro->ro_active += LAGG_PORTACTIVE(lp); 1420 } 1421 ro->ro_bkt = sc->sc_stride; 1422 ro->ro_flapping = sc->sc_flapping; 1423 ro->ro_flowid_shift = sc->flowid_shift; 1424 LAGG_XUNLOCK(sc); 1425 break; 1426 case SIOCSLAGGOPTS: 1427 error = priv_check(td, PRIV_NET_LAGG); 1428 if (error) 1429 break; 1430 1431 /* 1432 * The stride option was added without defining a corresponding 1433 * LAGG_OPT flag, so handle a non-zero value before checking 1434 * anything else to preserve compatibility. 1435 */ 1436 LAGG_XLOCK(sc); 1437 if (ro->ro_opts == 0 && ro->ro_bkt != 0) { 1438 if (sc->sc_proto != LAGG_PROTO_ROUNDROBIN) { 1439 LAGG_XUNLOCK(sc); 1440 error = EINVAL; 1441 break; 1442 } 1443 sc->sc_stride = ro->ro_bkt; 1444 } 1445 if (ro->ro_opts == 0) { 1446 LAGG_XUNLOCK(sc); 1447 break; 1448 } 1449 1450 /* 1451 * Set options. LACP options are stored in sc->sc_psc, 1452 * not in sc_opts. 1453 */ 1454 int valid, lacp; 1455 1456 switch (ro->ro_opts) { 1457 case LAGG_OPT_USE_FLOWID: 1458 case -LAGG_OPT_USE_FLOWID: 1459 case LAGG_OPT_USE_NUMA: 1460 case -LAGG_OPT_USE_NUMA: 1461 case LAGG_OPT_FLOWIDSHIFT: 1462 case LAGG_OPT_RR_LIMIT: 1463 valid = 1; 1464 lacp = 0; 1465 break; 1466 case LAGG_OPT_LACP_TXTEST: 1467 case -LAGG_OPT_LACP_TXTEST: 1468 case LAGG_OPT_LACP_RXTEST: 1469 case -LAGG_OPT_LACP_RXTEST: 1470 case LAGG_OPT_LACP_STRICT: 1471 case -LAGG_OPT_LACP_STRICT: 1472 case LAGG_OPT_LACP_FAST_TIMO: 1473 case -LAGG_OPT_LACP_FAST_TIMO: 1474 valid = lacp = 1; 1475 break; 1476 default: 1477 valid = lacp = 0; 1478 break; 1479 } 1480 1481 if (valid == 0 || 1482 (lacp == 1 && sc->sc_proto != LAGG_PROTO_LACP)) { 1483 /* Invalid combination of options specified. */ 1484 error = EINVAL; 1485 LAGG_XUNLOCK(sc); 1486 break; /* Return from SIOCSLAGGOPTS. */ 1487 } 1488 1489 /* 1490 * Store new options into sc->sc_opts except for 1491 * FLOWIDSHIFT, RR and LACP options. 1492 */ 1493 if (lacp == 0) { 1494 if (ro->ro_opts == LAGG_OPT_FLOWIDSHIFT) 1495 sc->flowid_shift = ro->ro_flowid_shift; 1496 else if (ro->ro_opts == LAGG_OPT_RR_LIMIT) { 1497 if (sc->sc_proto != LAGG_PROTO_ROUNDROBIN || 1498 ro->ro_bkt == 0) { 1499 error = EINVAL; 1500 LAGG_XUNLOCK(sc); 1501 break; 1502 } 1503 sc->sc_stride = ro->ro_bkt; 1504 } else if (ro->ro_opts > 0) 1505 sc->sc_opts |= ro->ro_opts; 1506 else 1507 sc->sc_opts &= ~ro->ro_opts; 1508 } else { 1509 struct lacp_softc *lsc; 1510 struct lacp_port *lp; 1511 1512 lsc = (struct lacp_softc *)sc->sc_psc; 1513 1514 switch (ro->ro_opts) { 1515 case LAGG_OPT_LACP_TXTEST: 1516 lsc->lsc_debug.lsc_tx_test = 1; 1517 break; 1518 case -LAGG_OPT_LACP_TXTEST: 1519 lsc->lsc_debug.lsc_tx_test = 0; 1520 break; 1521 case LAGG_OPT_LACP_RXTEST: 1522 lsc->lsc_debug.lsc_rx_test = 1; 1523 break; 1524 case -LAGG_OPT_LACP_RXTEST: 1525 lsc->lsc_debug.lsc_rx_test = 0; 1526 break; 1527 case LAGG_OPT_LACP_STRICT: 1528 lsc->lsc_strict_mode = 1; 1529 break; 1530 case -LAGG_OPT_LACP_STRICT: 1531 lsc->lsc_strict_mode = 0; 1532 break; 1533 case LAGG_OPT_LACP_FAST_TIMO: 1534 LACP_LOCK(lsc); 1535 LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) 1536 lp->lp_state |= LACP_STATE_TIMEOUT; 1537 LACP_UNLOCK(lsc); 1538 lsc->lsc_fast_timeout = 1; 1539 break; 1540 case -LAGG_OPT_LACP_FAST_TIMO: 1541 LACP_LOCK(lsc); 1542 LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) 1543 lp->lp_state &= ~LACP_STATE_TIMEOUT; 1544 LACP_UNLOCK(lsc); 1545 lsc->lsc_fast_timeout = 0; 1546 break; 1547 } 1548 } 1549 LAGG_XUNLOCK(sc); 1550 break; 1551 case SIOCGLAGGFLAGS: 1552 rf->rf_flags = 0; 1553 LAGG_XLOCK(sc); 1554 if (sc->sc_flags & MBUF_HASHFLAG_L2) 1555 rf->rf_flags |= LAGG_F_HASHL2; 1556 if (sc->sc_flags & MBUF_HASHFLAG_L3) 1557 rf->rf_flags |= LAGG_F_HASHL3; 1558 if (sc->sc_flags & MBUF_HASHFLAG_L4) 1559 rf->rf_flags |= LAGG_F_HASHL4; 1560 LAGG_XUNLOCK(sc); 1561 break; 1562 case SIOCSLAGGHASH: 1563 error = priv_check(td, PRIV_NET_LAGG); 1564 if (error) 1565 break; 1566 if ((rf->rf_flags & LAGG_F_HASHMASK) == 0) { 1567 error = EINVAL; 1568 break; 1569 } 1570 LAGG_XLOCK(sc); 1571 sc->sc_flags = 0; 1572 if (rf->rf_flags & LAGG_F_HASHL2) 1573 sc->sc_flags |= MBUF_HASHFLAG_L2; 1574 if (rf->rf_flags & LAGG_F_HASHL3) 1575 sc->sc_flags |= MBUF_HASHFLAG_L3; 1576 if (rf->rf_flags & LAGG_F_HASHL4) 1577 sc->sc_flags |= MBUF_HASHFLAG_L4; 1578 LAGG_XUNLOCK(sc); 1579 break; 1580 case SIOCGLAGGPORT: 1581 if (rp->rp_portname[0] == '\0' || 1582 (tpif = ifunit_ref(rp->rp_portname)) == NULL) { 1583 error = EINVAL; 1584 break; 1585 } 1586 1587 NET_EPOCH_ENTER(et); 1588 if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL || 1589 lp->lp_softc != sc) { 1590 error = ENOENT; 1591 NET_EPOCH_EXIT(et); 1592 if_rele(tpif); 1593 break; 1594 } 1595 1596 lagg_port2req(lp, rp); 1597 NET_EPOCH_EXIT(et); 1598 if_rele(tpif); 1599 break; 1600 case SIOCSLAGGPORT: 1601 error = priv_check(td, PRIV_NET_LAGG); 1602 if (error) 1603 break; 1604 if (rp->rp_portname[0] == '\0' || 1605 (tpif = ifunit_ref(rp->rp_portname)) == NULL) { 1606 error = EINVAL; 1607 break; 1608 } 1609 #ifdef INET6 1610 /* 1611 * A laggport interface should not have inet6 address 1612 * because two interfaces with a valid link-local 1613 * scope zone must not be merged in any form. This 1614 * restriction is needed to prevent violation of 1615 * link-local scope zone. Attempts to add a laggport 1616 * interface which has inet6 addresses triggers 1617 * removal of all inet6 addresses on the member 1618 * interface. 1619 */ 1620 if (in6ifa_llaonifp(tpif)) { 1621 in6_ifdetach(tpif); 1622 if_printf(sc->sc_ifp, 1623 "IPv6 addresses on %s have been removed " 1624 "before adding it as a member to prevent " 1625 "IPv6 address scope violation.\n", 1626 tpif->if_xname); 1627 } 1628 #endif 1629 oldmtu = ifp->if_mtu; 1630 LAGG_XLOCK(sc); 1631 error = lagg_port_create(sc, tpif); 1632 LAGG_XUNLOCK(sc); 1633 if_rele(tpif); 1634 1635 /* 1636 * LAGG MTU may change during addition of the first port. 1637 * If it did, do network layer specific procedure. 1638 */ 1639 if (ifp->if_mtu != oldmtu) { 1640 #ifdef INET6 1641 nd6_setmtu(ifp); 1642 #endif 1643 rt_updatemtu(ifp); 1644 } 1645 1646 VLAN_CAPABILITIES(ifp); 1647 break; 1648 case SIOCSLAGGDELPORT: 1649 error = priv_check(td, PRIV_NET_LAGG); 1650 if (error) 1651 break; 1652 if (rp->rp_portname[0] == '\0' || 1653 (tpif = ifunit_ref(rp->rp_portname)) == NULL) { 1654 error = EINVAL; 1655 break; 1656 } 1657 1658 LAGG_XLOCK(sc); 1659 if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL || 1660 lp->lp_softc != sc) { 1661 error = ENOENT; 1662 LAGG_XUNLOCK(sc); 1663 if_rele(tpif); 1664 break; 1665 } 1666 1667 error = lagg_port_destroy(lp, 1); 1668 LAGG_XUNLOCK(sc); 1669 if_rele(tpif); 1670 VLAN_CAPABILITIES(ifp); 1671 break; 1672 case SIOCSIFFLAGS: 1673 /* Set flags on ports too */ 1674 LAGG_XLOCK(sc); 1675 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 1676 lagg_setflags(lp, 1); 1677 } 1678 1679 if (!(ifp->if_flags & IFF_UP) && 1680 (ifp->if_drv_flags & IFF_DRV_RUNNING)) { 1681 /* 1682 * If interface is marked down and it is running, 1683 * then stop and disable it. 1684 */ 1685 lagg_stop(sc); 1686 LAGG_XUNLOCK(sc); 1687 } else if ((ifp->if_flags & IFF_UP) && 1688 !(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 1689 /* 1690 * If interface is marked up and it is stopped, then 1691 * start it. 1692 */ 1693 LAGG_XUNLOCK(sc); 1694 (*ifp->if_init)(sc); 1695 } else 1696 LAGG_XUNLOCK(sc); 1697 break; 1698 case SIOCADDMULTI: 1699 case SIOCDELMULTI: 1700 LAGG_XLOCK(sc); 1701 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 1702 lagg_clrmulti(lp); 1703 lagg_setmulti(lp); 1704 } 1705 LAGG_XUNLOCK(sc); 1706 error = 0; 1707 break; 1708 case SIOCSIFMEDIA: 1709 case SIOCGIFMEDIA: 1710 if (ifp->if_type == IFT_INFINIBAND) 1711 error = EINVAL; 1712 else 1713 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); 1714 break; 1715 1716 case SIOCSIFCAP: 1717 case SIOCSIFCAPNV: 1718 LAGG_XLOCK(sc); 1719 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 1720 if (lp->lp_ioctl != NULL) 1721 (*lp->lp_ioctl)(lp->lp_ifp, cmd, data); 1722 } 1723 lagg_capabilities(sc); 1724 LAGG_XUNLOCK(sc); 1725 VLAN_CAPABILITIES(ifp); 1726 error = 0; 1727 break; 1728 1729 case SIOCGIFCAPNV: 1730 error = 0; 1731 break; 1732 1733 case SIOCSIFMTU: 1734 LAGG_XLOCK(sc); 1735 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 1736 if (lp->lp_ioctl != NULL) 1737 error = (*lp->lp_ioctl)(lp->lp_ifp, cmd, data); 1738 else 1739 error = EINVAL; 1740 if (error != 0) { 1741 if_printf(ifp, 1742 "failed to change MTU to %d on port %s, " 1743 "reverting all ports to original MTU (%d)\n", 1744 ifr->ifr_mtu, lp->lp_ifp->if_xname, ifp->if_mtu); 1745 break; 1746 } 1747 } 1748 if (error == 0) { 1749 ifp->if_mtu = ifr->ifr_mtu; 1750 } else { 1751 /* set every port back to the original MTU */ 1752 ifr->ifr_mtu = ifp->if_mtu; 1753 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 1754 if (lp->lp_ioctl != NULL) 1755 (*lp->lp_ioctl)(lp->lp_ifp, cmd, data); 1756 } 1757 } 1758 lagg_capabilities(sc); 1759 LAGG_XUNLOCK(sc); 1760 VLAN_CAPABILITIES(ifp); 1761 break; 1762 1763 default: 1764 error = ether_ioctl(ifp, cmd, data); 1765 break; 1766 } 1767 return (error); 1768 } 1769 1770 #if defined(KERN_TLS) || defined(RATELIMIT) 1771 #ifdef RATELIMIT 1772 static const struct if_snd_tag_sw lagg_snd_tag_ul_sw = { 1773 .snd_tag_modify = lagg_snd_tag_modify, 1774 .snd_tag_query = lagg_snd_tag_query, 1775 .snd_tag_free = lagg_snd_tag_free, 1776 .next_snd_tag = lagg_next_snd_tag, 1777 .type = IF_SND_TAG_TYPE_UNLIMITED 1778 }; 1779 1780 static const struct if_snd_tag_sw lagg_snd_tag_rl_sw = { 1781 .snd_tag_modify = lagg_snd_tag_modify, 1782 .snd_tag_query = lagg_snd_tag_query, 1783 .snd_tag_free = lagg_snd_tag_free, 1784 .next_snd_tag = lagg_next_snd_tag, 1785 .type = IF_SND_TAG_TYPE_RATE_LIMIT 1786 }; 1787 #endif 1788 1789 #ifdef KERN_TLS 1790 static const struct if_snd_tag_sw lagg_snd_tag_tls_sw = { 1791 .snd_tag_modify = lagg_snd_tag_modify, 1792 .snd_tag_query = lagg_snd_tag_query, 1793 .snd_tag_free = lagg_snd_tag_free, 1794 .next_snd_tag = lagg_next_snd_tag, 1795 .type = IF_SND_TAG_TYPE_TLS 1796 }; 1797 1798 #ifdef RATELIMIT 1799 static const struct if_snd_tag_sw lagg_snd_tag_tls_rl_sw = { 1800 .snd_tag_modify = lagg_snd_tag_modify, 1801 .snd_tag_query = lagg_snd_tag_query, 1802 .snd_tag_free = lagg_snd_tag_free, 1803 .next_snd_tag = lagg_next_snd_tag, 1804 .type = IF_SND_TAG_TYPE_TLS_RATE_LIMIT 1805 }; 1806 #endif 1807 #endif 1808 1809 static inline struct lagg_snd_tag * 1810 mst_to_lst(struct m_snd_tag *mst) 1811 { 1812 1813 return (__containerof(mst, struct lagg_snd_tag, com)); 1814 } 1815 1816 /* 1817 * Look up the port used by a specific flow. This only works for lagg 1818 * protocols with deterministic port mappings (e.g. not roundrobin). 1819 * In addition protocols which use a hash to map flows to ports must 1820 * be configured to use the mbuf flowid rather than hashing packet 1821 * contents. 1822 */ 1823 static struct lagg_port * 1824 lookup_snd_tag_port(struct ifnet *ifp, uint32_t flowid, uint32_t flowtype, 1825 uint8_t numa_domain) 1826 { 1827 struct lagg_softc *sc; 1828 struct lagg_port *lp; 1829 struct lagg_lb *lb; 1830 uint32_t hash, p; 1831 int err; 1832 1833 sc = ifp->if_softc; 1834 1835 switch (sc->sc_proto) { 1836 case LAGG_PROTO_FAILOVER: 1837 return (lagg_link_active(sc, sc->sc_primary)); 1838 case LAGG_PROTO_LOADBALANCE: 1839 if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 || 1840 flowtype == M_HASHTYPE_NONE) 1841 return (NULL); 1842 p = flowid >> sc->flowid_shift; 1843 p %= sc->sc_count; 1844 lb = (struct lagg_lb *)sc->sc_psc; 1845 lp = lb->lb_ports[p]; 1846 return (lagg_link_active(sc, lp)); 1847 case LAGG_PROTO_LACP: 1848 if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 || 1849 flowtype == M_HASHTYPE_NONE) 1850 return (NULL); 1851 hash = flowid >> sc->flowid_shift; 1852 return (lacp_select_tx_port_by_hash(sc, hash, numa_domain, &err)); 1853 default: 1854 return (NULL); 1855 } 1856 } 1857 1858 static int 1859 lagg_snd_tag_alloc(struct ifnet *ifp, 1860 union if_snd_tag_alloc_params *params, 1861 struct m_snd_tag **ppmt) 1862 { 1863 struct epoch_tracker et; 1864 const struct if_snd_tag_sw *sw; 1865 struct lagg_snd_tag *lst; 1866 struct lagg_port *lp; 1867 struct ifnet *lp_ifp; 1868 struct m_snd_tag *mst; 1869 int error; 1870 1871 switch (params->hdr.type) { 1872 #ifdef RATELIMIT 1873 case IF_SND_TAG_TYPE_UNLIMITED: 1874 sw = &lagg_snd_tag_ul_sw; 1875 break; 1876 case IF_SND_TAG_TYPE_RATE_LIMIT: 1877 sw = &lagg_snd_tag_rl_sw; 1878 break; 1879 #endif 1880 #ifdef KERN_TLS 1881 case IF_SND_TAG_TYPE_TLS: 1882 sw = &lagg_snd_tag_tls_sw; 1883 break; 1884 case IF_SND_TAG_TYPE_TLS_RX: 1885 /* Return tag from port interface directly. */ 1886 sw = NULL; 1887 break; 1888 #ifdef RATELIMIT 1889 case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: 1890 sw = &lagg_snd_tag_tls_rl_sw; 1891 break; 1892 #endif 1893 #endif 1894 default: 1895 return (EOPNOTSUPP); 1896 } 1897 1898 NET_EPOCH_ENTER(et); 1899 lp = lookup_snd_tag_port(ifp, params->hdr.flowid, 1900 params->hdr.flowtype, params->hdr.numa_domain); 1901 if (lp == NULL) { 1902 NET_EPOCH_EXIT(et); 1903 return (EOPNOTSUPP); 1904 } 1905 if (lp->lp_ifp == NULL) { 1906 NET_EPOCH_EXIT(et); 1907 return (EOPNOTSUPP); 1908 } 1909 lp_ifp = lp->lp_ifp; 1910 if_ref(lp_ifp); 1911 NET_EPOCH_EXIT(et); 1912 1913 if (sw != NULL) { 1914 lst = malloc(sizeof(*lst), M_LAGG, M_NOWAIT); 1915 if (lst == NULL) { 1916 if_rele(lp_ifp); 1917 return (ENOMEM); 1918 } 1919 } else 1920 lst = NULL; 1921 1922 error = m_snd_tag_alloc(lp_ifp, params, &mst); 1923 if_rele(lp_ifp); 1924 if (error) { 1925 free(lst, M_LAGG); 1926 return (error); 1927 } 1928 1929 if (sw != NULL) { 1930 m_snd_tag_init(&lst->com, ifp, sw); 1931 lst->tag = mst; 1932 1933 *ppmt = &lst->com; 1934 } else 1935 *ppmt = mst; 1936 1937 return (0); 1938 } 1939 1940 static struct m_snd_tag * 1941 lagg_next_snd_tag(struct m_snd_tag *mst) 1942 { 1943 struct lagg_snd_tag *lst; 1944 1945 lst = mst_to_lst(mst); 1946 return (lst->tag); 1947 } 1948 1949 static int 1950 lagg_snd_tag_modify(struct m_snd_tag *mst, 1951 union if_snd_tag_modify_params *params) 1952 { 1953 struct lagg_snd_tag *lst; 1954 1955 lst = mst_to_lst(mst); 1956 return (lst->tag->sw->snd_tag_modify(lst->tag, params)); 1957 } 1958 1959 static int 1960 lagg_snd_tag_query(struct m_snd_tag *mst, 1961 union if_snd_tag_query_params *params) 1962 { 1963 struct lagg_snd_tag *lst; 1964 1965 lst = mst_to_lst(mst); 1966 return (lst->tag->sw->snd_tag_query(lst->tag, params)); 1967 } 1968 1969 static void 1970 lagg_snd_tag_free(struct m_snd_tag *mst) 1971 { 1972 struct lagg_snd_tag *lst; 1973 1974 lst = mst_to_lst(mst); 1975 m_snd_tag_rele(lst->tag); 1976 free(lst, M_LAGG); 1977 } 1978 1979 static void 1980 lagg_ratelimit_query(struct ifnet *ifp __unused, struct if_ratelimit_query_results *q) 1981 { 1982 /* 1983 * For lagg, we have an indirect 1984 * interface. The caller needs to 1985 * get a ratelimit tag on the actual 1986 * interface the flow will go on. 1987 */ 1988 q->rate_table = NULL; 1989 q->flags = RT_IS_INDIRECT; 1990 q->max_flows = 0; 1991 q->number_of_rates = 0; 1992 } 1993 #endif 1994 1995 static int 1996 lagg_setmulti(struct lagg_port *lp) 1997 { 1998 struct lagg_softc *sc = lp->lp_softc; 1999 struct ifnet *ifp = lp->lp_ifp; 2000 struct ifnet *scifp = sc->sc_ifp; 2001 struct lagg_mc *mc; 2002 struct ifmultiaddr *ifma; 2003 int error; 2004 2005 IF_ADDR_WLOCK(scifp); 2006 CK_STAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) { 2007 if (ifma->ifma_addr->sa_family != AF_LINK) 2008 continue; 2009 mc = malloc(sizeof(struct lagg_mc), M_LAGG, M_NOWAIT); 2010 if (mc == NULL) { 2011 IF_ADDR_WUNLOCK(scifp); 2012 return (ENOMEM); 2013 } 2014 bcopy(ifma->ifma_addr, &mc->mc_addr, 2015 ifma->ifma_addr->sa_len); 2016 mc->mc_addr.sdl_index = ifp->if_index; 2017 mc->mc_ifma = NULL; 2018 SLIST_INSERT_HEAD(&lp->lp_mc_head, mc, mc_entries); 2019 } 2020 IF_ADDR_WUNLOCK(scifp); 2021 SLIST_FOREACH (mc, &lp->lp_mc_head, mc_entries) { 2022 error = if_addmulti(ifp, 2023 (struct sockaddr *)&mc->mc_addr, &mc->mc_ifma); 2024 if (error) 2025 return (error); 2026 } 2027 return (0); 2028 } 2029 2030 static int 2031 lagg_clrmulti(struct lagg_port *lp) 2032 { 2033 struct lagg_mc *mc; 2034 2035 LAGG_XLOCK_ASSERT(lp->lp_softc); 2036 while ((mc = SLIST_FIRST(&lp->lp_mc_head)) != NULL) { 2037 SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries); 2038 if (mc->mc_ifma && lp->lp_detaching == 0) 2039 if_delmulti_ifma(mc->mc_ifma); 2040 free(mc, M_LAGG); 2041 } 2042 return (0); 2043 } 2044 2045 static void 2046 lagg_setcaps(struct lagg_port *lp, int cap, int cap2) 2047 { 2048 struct ifreq ifr; 2049 struct siocsifcapnv_driver_data drv_ioctl_data; 2050 2051 if (lp->lp_ifp->if_capenable == cap && 2052 lp->lp_ifp->if_capenable2 == cap2) 2053 return; 2054 if (lp->lp_ioctl == NULL) 2055 return; 2056 /* XXX */ 2057 if ((lp->lp_ifp->if_capabilities & IFCAP_NV) != 0) { 2058 drv_ioctl_data.reqcap = cap; 2059 drv_ioctl_data.reqcap2 = cap2; 2060 drv_ioctl_data.nvcap = NULL; 2061 (*lp->lp_ioctl)(lp->lp_ifp, SIOCSIFCAPNV, 2062 (caddr_t)&drv_ioctl_data); 2063 } else { 2064 ifr.ifr_reqcap = cap; 2065 (*lp->lp_ioctl)(lp->lp_ifp, SIOCSIFCAP, (caddr_t)&ifr); 2066 } 2067 } 2068 2069 /* Handle a ref counted flag that should be set on the lagg port as well */ 2070 static int 2071 lagg_setflag(struct lagg_port *lp, int flag, int status, 2072 int (*func)(struct ifnet *, int)) 2073 { 2074 struct lagg_softc *sc = lp->lp_softc; 2075 struct ifnet *scifp = sc->sc_ifp; 2076 struct ifnet *ifp = lp->lp_ifp; 2077 int error; 2078 2079 LAGG_XLOCK_ASSERT(sc); 2080 2081 status = status ? (scifp->if_flags & flag) : 0; 2082 /* Now "status" contains the flag value or 0 */ 2083 2084 /* 2085 * See if recorded ports status is different from what 2086 * we want it to be. If it is, flip it. We record ports 2087 * status in lp_ifflags so that we won't clear ports flag 2088 * we haven't set. In fact, we don't clear or set ports 2089 * flags directly, but get or release references to them. 2090 * That's why we can be sure that recorded flags still are 2091 * in accord with actual ports flags. 2092 */ 2093 if (status != (lp->lp_ifflags & flag)) { 2094 error = (*func)(ifp, status); 2095 if (error) 2096 return (error); 2097 lp->lp_ifflags &= ~flag; 2098 lp->lp_ifflags |= status; 2099 } 2100 return (0); 2101 } 2102 2103 /* 2104 * Handle IFF_* flags that require certain changes on the lagg port 2105 * if "status" is true, update ports flags respective to the lagg 2106 * if "status" is false, forcedly clear the flags set on port. 2107 */ 2108 static int 2109 lagg_setflags(struct lagg_port *lp, int status) 2110 { 2111 int error, i; 2112 2113 for (i = 0; lagg_pflags[i].flag; i++) { 2114 error = lagg_setflag(lp, lagg_pflags[i].flag, 2115 status, lagg_pflags[i].func); 2116 if (error) 2117 return (error); 2118 } 2119 return (0); 2120 } 2121 2122 static int 2123 lagg_transmit_ethernet(struct ifnet *ifp, struct mbuf *m) 2124 { 2125 struct epoch_tracker et; 2126 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; 2127 int error; 2128 2129 #if defined(KERN_TLS) || defined(RATELIMIT) 2130 if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) 2131 MPASS(m->m_pkthdr.snd_tag->ifp == ifp); 2132 #endif 2133 NET_EPOCH_ENTER(et); 2134 /* We need a Tx algorithm and at least one port */ 2135 if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) { 2136 NET_EPOCH_EXIT(et); 2137 m_freem(m); 2138 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2139 return (ENXIO); 2140 } 2141 2142 ETHER_BPF_MTAP(ifp, m); 2143 2144 error = lagg_proto_start(sc, m); 2145 NET_EPOCH_EXIT(et); 2146 return (error); 2147 } 2148 2149 static int 2150 lagg_transmit_infiniband(struct ifnet *ifp, struct mbuf *m) 2151 { 2152 struct epoch_tracker et; 2153 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; 2154 int error; 2155 2156 #if defined(KERN_TLS) || defined(RATELIMIT) 2157 if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) 2158 MPASS(m->m_pkthdr.snd_tag->ifp == ifp); 2159 #endif 2160 NET_EPOCH_ENTER(et); 2161 /* We need a Tx algorithm and at least one port */ 2162 if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) { 2163 NET_EPOCH_EXIT(et); 2164 m_freem(m); 2165 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2166 return (ENXIO); 2167 } 2168 2169 INFINIBAND_BPF_MTAP(ifp, m); 2170 2171 error = lagg_proto_start(sc, m); 2172 NET_EPOCH_EXIT(et); 2173 return (error); 2174 } 2175 2176 /* 2177 * The ifp->if_qflush entry point for lagg(4) is no-op. 2178 */ 2179 static void 2180 lagg_qflush(struct ifnet *ifp __unused) 2181 { 2182 } 2183 2184 static struct mbuf * 2185 lagg_input_ethernet(struct ifnet *ifp, struct mbuf *m) 2186 { 2187 struct epoch_tracker et; 2188 struct lagg_port *lp = ifp->if_lagg; 2189 struct lagg_softc *sc = lp->lp_softc; 2190 struct ifnet *scifp = sc->sc_ifp; 2191 2192 NET_EPOCH_ENTER(et); 2193 if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 2194 lp->lp_detaching != 0 || 2195 sc->sc_proto == LAGG_PROTO_NONE) { 2196 NET_EPOCH_EXIT(et); 2197 m_freem(m); 2198 return (NULL); 2199 } 2200 2201 ETHER_BPF_MTAP(scifp, m); 2202 2203 m = lagg_proto_input(sc, lp, m); 2204 if (m != NULL && (scifp->if_flags & IFF_MONITOR) != 0) { 2205 m_freem(m); 2206 m = NULL; 2207 } 2208 2209 #ifdef DEV_NETMAP 2210 if (m != NULL && scifp->if_capenable & IFCAP_NETMAP) { 2211 scifp->if_input(scifp, m); 2212 m = NULL; 2213 } 2214 #endif /* DEV_NETMAP */ 2215 2216 NET_EPOCH_EXIT(et); 2217 return (m); 2218 } 2219 2220 static struct mbuf * 2221 lagg_input_infiniband(struct ifnet *ifp, struct mbuf *m) 2222 { 2223 struct epoch_tracker et; 2224 struct lagg_port *lp = ifp->if_lagg; 2225 struct lagg_softc *sc = lp->lp_softc; 2226 struct ifnet *scifp = sc->sc_ifp; 2227 2228 NET_EPOCH_ENTER(et); 2229 if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 2230 lp->lp_detaching != 0 || 2231 sc->sc_proto == LAGG_PROTO_NONE) { 2232 NET_EPOCH_EXIT(et); 2233 m_freem(m); 2234 return (NULL); 2235 } 2236 2237 INFINIBAND_BPF_MTAP(scifp, m); 2238 2239 m = lagg_proto_input(sc, lp, m); 2240 if (m != NULL && (scifp->if_flags & IFF_MONITOR) != 0) { 2241 m_freem(m); 2242 m = NULL; 2243 } 2244 2245 NET_EPOCH_EXIT(et); 2246 return (m); 2247 } 2248 2249 static int 2250 lagg_media_change(struct ifnet *ifp) 2251 { 2252 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; 2253 2254 if (sc->sc_ifflags & IFF_DEBUG) 2255 printf("%s\n", __func__); 2256 2257 /* Ignore */ 2258 return (0); 2259 } 2260 2261 static void 2262 lagg_media_status(struct ifnet *ifp, struct ifmediareq *imr) 2263 { 2264 struct epoch_tracker et; 2265 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; 2266 struct lagg_port *lp; 2267 2268 imr->ifm_status = IFM_AVALID; 2269 imr->ifm_active = IFM_ETHER | IFM_AUTO; 2270 2271 NET_EPOCH_ENTER(et); 2272 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 2273 if (LAGG_PORTACTIVE(lp)) 2274 imr->ifm_status |= IFM_ACTIVE; 2275 } 2276 NET_EPOCH_EXIT(et); 2277 } 2278 2279 static void 2280 lagg_linkstate(struct lagg_softc *sc) 2281 { 2282 struct epoch_tracker et; 2283 struct lagg_port *lp; 2284 int new_link = LINK_STATE_DOWN; 2285 uint64_t speed; 2286 2287 LAGG_XLOCK_ASSERT(sc); 2288 2289 /* LACP handles link state itself */ 2290 if (sc->sc_proto == LAGG_PROTO_LACP) 2291 return; 2292 2293 /* Our link is considered up if at least one of our ports is active */ 2294 NET_EPOCH_ENTER(et); 2295 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 2296 if (lp->lp_ifp->if_link_state == LINK_STATE_UP) { 2297 new_link = LINK_STATE_UP; 2298 break; 2299 } 2300 } 2301 NET_EPOCH_EXIT(et); 2302 if_link_state_change(sc->sc_ifp, new_link); 2303 2304 /* Update if_baudrate to reflect the max possible speed */ 2305 switch (sc->sc_proto) { 2306 case LAGG_PROTO_FAILOVER: 2307 sc->sc_ifp->if_baudrate = sc->sc_primary != NULL ? 2308 sc->sc_primary->lp_ifp->if_baudrate : 0; 2309 break; 2310 case LAGG_PROTO_ROUNDROBIN: 2311 case LAGG_PROTO_LOADBALANCE: 2312 case LAGG_PROTO_BROADCAST: 2313 speed = 0; 2314 NET_EPOCH_ENTER(et); 2315 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 2316 speed += lp->lp_ifp->if_baudrate; 2317 NET_EPOCH_EXIT(et); 2318 sc->sc_ifp->if_baudrate = speed; 2319 break; 2320 case LAGG_PROTO_LACP: 2321 /* LACP updates if_baudrate itself */ 2322 break; 2323 } 2324 } 2325 2326 static void 2327 lagg_port_state(struct ifnet *ifp, int state) 2328 { 2329 struct lagg_port *lp = (struct lagg_port *)ifp->if_lagg; 2330 struct lagg_softc *sc = NULL; 2331 2332 if (lp != NULL) 2333 sc = lp->lp_softc; 2334 if (sc == NULL) 2335 return; 2336 2337 LAGG_XLOCK(sc); 2338 lagg_linkstate(sc); 2339 lagg_proto_linkstate(sc, lp); 2340 LAGG_XUNLOCK(sc); 2341 } 2342 2343 struct lagg_port * 2344 lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp) 2345 { 2346 struct lagg_port *lp_next, *rval = NULL; 2347 2348 /* 2349 * Search a port which reports an active link state. 2350 */ 2351 2352 #ifdef INVARIANTS 2353 /* 2354 * This is called with either in the network epoch 2355 * or with LAGG_XLOCK(sc) held. 2356 */ 2357 if (!in_epoch(net_epoch_preempt)) 2358 LAGG_XLOCK_ASSERT(sc); 2359 #endif 2360 2361 if (lp == NULL) 2362 goto search; 2363 if (LAGG_PORTACTIVE(lp)) { 2364 rval = lp; 2365 goto found; 2366 } 2367 if ((lp_next = CK_SLIST_NEXT(lp, lp_entries)) != NULL && 2368 LAGG_PORTACTIVE(lp_next)) { 2369 rval = lp_next; 2370 goto found; 2371 } 2372 2373 search: 2374 CK_SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) { 2375 if (LAGG_PORTACTIVE(lp_next)) { 2376 return (lp_next); 2377 } 2378 } 2379 found: 2380 return (rval); 2381 } 2382 2383 int 2384 lagg_enqueue(struct ifnet *ifp, struct mbuf *m) 2385 { 2386 2387 #if defined(KERN_TLS) || defined(RATELIMIT) 2388 if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) { 2389 struct lagg_snd_tag *lst; 2390 struct m_snd_tag *mst; 2391 2392 mst = m->m_pkthdr.snd_tag; 2393 lst = mst_to_lst(mst); 2394 if (lst->tag->ifp != ifp) { 2395 m_freem(m); 2396 return (EAGAIN); 2397 } 2398 m->m_pkthdr.snd_tag = m_snd_tag_ref(lst->tag); 2399 m_snd_tag_rele(mst); 2400 } 2401 #endif 2402 return (ifp->if_transmit)(ifp, m); 2403 } 2404 2405 /* 2406 * Simple round robin aggregation 2407 */ 2408 static void 2409 lagg_rr_attach(struct lagg_softc *sc) 2410 { 2411 sc->sc_seq = 0; 2412 sc->sc_stride = 1; 2413 } 2414 2415 static int 2416 lagg_rr_start(struct lagg_softc *sc, struct mbuf *m) 2417 { 2418 struct lagg_port *lp; 2419 uint32_t p; 2420 2421 p = atomic_fetchadd_32(&sc->sc_seq, 1); 2422 p /= sc->sc_stride; 2423 p %= sc->sc_count; 2424 lp = CK_SLIST_FIRST(&sc->sc_ports); 2425 2426 while (p--) 2427 lp = CK_SLIST_NEXT(lp, lp_entries); 2428 2429 /* 2430 * Check the port's link state. This will return the next active 2431 * port if the link is down or the port is NULL. 2432 */ 2433 if ((lp = lagg_link_active(sc, lp)) == NULL) { 2434 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); 2435 m_freem(m); 2436 return (ENETDOWN); 2437 } 2438 2439 /* Send mbuf */ 2440 return (lagg_enqueue(lp->lp_ifp, m)); 2441 } 2442 2443 static struct mbuf * 2444 lagg_rr_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) 2445 { 2446 struct ifnet *ifp = sc->sc_ifp; 2447 2448 /* Just pass in the packet to our lagg device */ 2449 m->m_pkthdr.rcvif = ifp; 2450 2451 return (m); 2452 } 2453 2454 /* 2455 * Broadcast mode 2456 */ 2457 static int 2458 lagg_bcast_start(struct lagg_softc *sc, struct mbuf *m) 2459 { 2460 int errors = 0; 2461 int ret; 2462 struct lagg_port *lp, *last = NULL; 2463 struct mbuf *m0; 2464 2465 NET_EPOCH_ASSERT(); 2466 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 2467 if (!LAGG_PORTACTIVE(lp)) 2468 continue; 2469 2470 if (last != NULL) { 2471 m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT); 2472 if (m0 == NULL) { 2473 ret = ENOBUFS; 2474 errors++; 2475 break; 2476 } 2477 lagg_enqueue(last->lp_ifp, m0); 2478 } 2479 last = lp; 2480 } 2481 2482 if (last == NULL) { 2483 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); 2484 m_freem(m); 2485 return (ENOENT); 2486 } 2487 if ((last = lagg_link_active(sc, last)) == NULL) { 2488 errors++; 2489 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, errors); 2490 m_freem(m); 2491 return (ENETDOWN); 2492 } 2493 2494 ret = lagg_enqueue(last->lp_ifp, m); 2495 if (errors != 0) 2496 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, errors); 2497 2498 return (ret); 2499 } 2500 2501 static struct mbuf* 2502 lagg_bcast_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) 2503 { 2504 struct ifnet *ifp = sc->sc_ifp; 2505 2506 /* Just pass in the packet to our lagg device */ 2507 m->m_pkthdr.rcvif = ifp; 2508 return (m); 2509 } 2510 2511 /* 2512 * Active failover 2513 */ 2514 static int 2515 lagg_fail_start(struct lagg_softc *sc, struct mbuf *m) 2516 { 2517 struct lagg_port *lp; 2518 2519 /* Use the master port if active or the next available port */ 2520 if ((lp = lagg_link_active(sc, sc->sc_primary)) == NULL) { 2521 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); 2522 m_freem(m); 2523 return (ENETDOWN); 2524 } 2525 2526 /* Send mbuf */ 2527 return (lagg_enqueue(lp->lp_ifp, m)); 2528 } 2529 2530 static struct mbuf * 2531 lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) 2532 { 2533 struct ifnet *ifp = sc->sc_ifp; 2534 struct lagg_port *tmp_tp; 2535 2536 if (lp == sc->sc_primary || V_lagg_failover_rx_all) { 2537 m->m_pkthdr.rcvif = ifp; 2538 return (m); 2539 } 2540 2541 if (!LAGG_PORTACTIVE(sc->sc_primary)) { 2542 tmp_tp = lagg_link_active(sc, sc->sc_primary); 2543 /* 2544 * If tmp_tp is null, we've received a packet when all 2545 * our links are down. Weird, but process it anyways. 2546 */ 2547 if ((tmp_tp == NULL || tmp_tp == lp)) { 2548 m->m_pkthdr.rcvif = ifp; 2549 return (m); 2550 } 2551 } 2552 2553 m_freem(m); 2554 return (NULL); 2555 } 2556 2557 /* 2558 * Loadbalancing 2559 */ 2560 static void 2561 lagg_lb_attach(struct lagg_softc *sc) 2562 { 2563 struct lagg_port *lp; 2564 struct lagg_lb *lb; 2565 2566 LAGG_XLOCK_ASSERT(sc); 2567 lb = malloc(sizeof(struct lagg_lb), M_LAGG, M_WAITOK | M_ZERO); 2568 lb->lb_key = m_ether_tcpip_hash_init(); 2569 sc->sc_psc = lb; 2570 2571 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 2572 lagg_lb_port_create(lp); 2573 } 2574 2575 static void 2576 lagg_lb_detach(struct lagg_softc *sc) 2577 { 2578 struct lagg_lb *lb; 2579 2580 lb = (struct lagg_lb *)sc->sc_psc; 2581 if (lb != NULL) 2582 free(lb, M_LAGG); 2583 } 2584 2585 static int 2586 lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp) 2587 { 2588 struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc; 2589 struct lagg_port *lp_next; 2590 int i = 0, rv; 2591 2592 rv = 0; 2593 bzero(&lb->lb_ports, sizeof(lb->lb_ports)); 2594 LAGG_XLOCK_ASSERT(sc); 2595 CK_SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) { 2596 if (lp_next == lp) 2597 continue; 2598 if (i >= LAGG_MAX_PORTS) { 2599 rv = EINVAL; 2600 break; 2601 } 2602 if (sc->sc_ifflags & IFF_DEBUG) 2603 printf("%s: port %s at index %d\n", 2604 sc->sc_ifname, lp_next->lp_ifp->if_xname, i); 2605 lb->lb_ports[i++] = lp_next; 2606 } 2607 2608 return (rv); 2609 } 2610 2611 static int 2612 lagg_lb_port_create(struct lagg_port *lp) 2613 { 2614 struct lagg_softc *sc = lp->lp_softc; 2615 return (lagg_lb_porttable(sc, NULL)); 2616 } 2617 2618 static void 2619 lagg_lb_port_destroy(struct lagg_port *lp) 2620 { 2621 struct lagg_softc *sc = lp->lp_softc; 2622 lagg_lb_porttable(sc, lp); 2623 } 2624 2625 static int 2626 lagg_lb_start(struct lagg_softc *sc, struct mbuf *m) 2627 { 2628 struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc; 2629 struct lagg_port *lp = NULL; 2630 uint32_t p = 0; 2631 2632 if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) && 2633 M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 2634 p = m->m_pkthdr.flowid >> sc->flowid_shift; 2635 else 2636 p = m_ether_tcpip_hash(sc->sc_flags, m, lb->lb_key); 2637 p %= sc->sc_count; 2638 lp = lb->lb_ports[p]; 2639 2640 /* 2641 * Check the port's link state. This will return the next active 2642 * port if the link is down or the port is NULL. 2643 */ 2644 if ((lp = lagg_link_active(sc, lp)) == NULL) { 2645 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); 2646 m_freem(m); 2647 return (ENETDOWN); 2648 } 2649 2650 /* Send mbuf */ 2651 return (lagg_enqueue(lp->lp_ifp, m)); 2652 } 2653 2654 static struct mbuf * 2655 lagg_lb_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) 2656 { 2657 struct ifnet *ifp = sc->sc_ifp; 2658 2659 /* Just pass in the packet to our lagg device */ 2660 m->m_pkthdr.rcvif = ifp; 2661 2662 return (m); 2663 } 2664 2665 /* 2666 * 802.3ad LACP 2667 */ 2668 static void 2669 lagg_lacp_attach(struct lagg_softc *sc) 2670 { 2671 struct lagg_port *lp; 2672 2673 lacp_attach(sc); 2674 LAGG_XLOCK_ASSERT(sc); 2675 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 2676 lacp_port_create(lp); 2677 } 2678 2679 static void 2680 lagg_lacp_detach(struct lagg_softc *sc) 2681 { 2682 struct lagg_port *lp; 2683 void *psc; 2684 2685 LAGG_XLOCK_ASSERT(sc); 2686 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 2687 lacp_port_destroy(lp); 2688 2689 psc = sc->sc_psc; 2690 sc->sc_psc = NULL; 2691 lacp_detach(psc); 2692 } 2693 2694 static void 2695 lagg_lacp_lladdr(struct lagg_softc *sc) 2696 { 2697 struct lagg_port *lp; 2698 2699 LAGG_SXLOCK_ASSERT(sc); 2700 2701 /* purge all the lacp ports */ 2702 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 2703 lacp_port_destroy(lp); 2704 2705 /* add them back in */ 2706 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 2707 lacp_port_create(lp); 2708 } 2709 2710 static int 2711 lagg_lacp_start(struct lagg_softc *sc, struct mbuf *m) 2712 { 2713 struct lagg_port *lp; 2714 int err; 2715 2716 lp = lacp_select_tx_port(sc, m, &err); 2717 if (lp == NULL) { 2718 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); 2719 m_freem(m); 2720 return (err); 2721 } 2722 2723 /* Send mbuf */ 2724 return (lagg_enqueue(lp->lp_ifp, m)); 2725 } 2726 2727 static struct mbuf * 2728 lagg_lacp_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) 2729 { 2730 struct ifnet *ifp = sc->sc_ifp; 2731 struct ether_header *eh; 2732 u_short etype; 2733 2734 eh = mtod(m, struct ether_header *); 2735 etype = ntohs(eh->ether_type); 2736 2737 /* Tap off LACP control messages */ 2738 if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_SLOW) { 2739 m = lacp_input(lp, m); 2740 if (m == NULL) 2741 return (NULL); 2742 } 2743 2744 /* 2745 * If the port is not collecting or not in the active aggregator then 2746 * free and return. 2747 */ 2748 if (lacp_iscollecting(lp) == 0 || lacp_isactive(lp) == 0) { 2749 m_freem(m); 2750 return (NULL); 2751 } 2752 2753 m->m_pkthdr.rcvif = ifp; 2754 return (m); 2755 } 2756