1 /* $OpenBSD: if_trunk.c,v 1.30 2007/01/31 06:20:19 reyk Exp $ */ 2 3 /* 4 * Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org> 5 * Copyright (c) 2007 Andrew Thompson <thompsa@FreeBSD.org> 6 * Copyright (c) 2014, 2016 Marcelo Araujo <araujo@FreeBSD.org> 7 * 8 * Permission to use, copy, modify, and distribute this software for any 9 * purpose with or without fee is hereby granted, provided that the above 10 * copyright notice and this permission notice appear in all copies. 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 13 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 14 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 15 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 16 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 17 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 18 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 19 */ 20 21 #include <sys/cdefs.h> 22 __FBSDID("$FreeBSD$"); 23 24 #include "opt_inet.h" 25 #include "opt_inet6.h" 26 #include "opt_kern_tls.h" 27 #include "opt_ratelimit.h" 28 29 #include <sys/param.h> 30 #include <sys/kernel.h> 31 #include <sys/malloc.h> 32 #include <sys/mbuf.h> 33 #include <sys/queue.h> 34 #include <sys/socket.h> 35 #include <sys/sockio.h> 36 #include <sys/sysctl.h> 37 #include <sys/module.h> 38 #include <sys/priv.h> 39 #include <sys/systm.h> 40 #include <sys/proc.h> 41 #include <sys/lock.h> 42 #include <sys/rmlock.h> 43 #include <sys/sx.h> 44 #include <sys/taskqueue.h> 45 #include <sys/eventhandler.h> 46 47 #include <net/ethernet.h> 48 #include <net/if.h> 49 #include <net/if_clone.h> 50 #include <net/if_arp.h> 51 #include <net/if_dl.h> 52 #include <net/if_media.h> 53 #include <net/if_types.h> 54 #include <net/if_var.h> 55 #include <net/bpf.h> 56 #include <net/route.h> 57 #include <net/vnet.h> 58 #include <net/infiniband.h> 59 60 #if defined(INET) || defined(INET6) 61 #include <netinet/in.h> 62 #include <netinet/ip.h> 63 #endif 64 #ifdef INET 65 #include <netinet/in_systm.h> 66 #include <netinet/if_ether.h> 67 #endif 68 69 #ifdef INET6 70 #include <netinet/ip6.h> 71 #include <netinet6/in6_var.h> 72 #include <netinet6/in6_ifattach.h> 73 #endif 74 75 #include <net/if_vlan_var.h> 76 #include <net/if_lagg.h> 77 #include <net/ieee8023ad_lacp.h> 78 79 #ifdef INET6 80 /* 81 * XXX: declare here to avoid to include many inet6 related files.. 82 * should be more generalized? 83 */ 84 extern void nd6_setmtu(struct ifnet *); 85 #endif 86 87 #ifdef DEV_NETMAP 88 MODULE_DEPEND(if_lagg, netmap, 1, 1, 1); 89 #endif 90 91 #define LAGG_SX_INIT(_sc) sx_init(&(_sc)->sc_sx, "if_lagg sx") 92 #define LAGG_SX_DESTROY(_sc) sx_destroy(&(_sc)->sc_sx) 93 #define LAGG_XLOCK(_sc) sx_xlock(&(_sc)->sc_sx) 94 #define LAGG_XUNLOCK(_sc) sx_xunlock(&(_sc)->sc_sx) 95 #define LAGG_SXLOCK_ASSERT(_sc) sx_assert(&(_sc)->sc_sx, SA_LOCKED) 96 #define LAGG_XLOCK_ASSERT(_sc) sx_assert(&(_sc)->sc_sx, SA_XLOCKED) 97 98 /* Special flags we should propagate to the lagg ports. */ 99 static struct { 100 int flag; 101 int (*func)(struct ifnet *, int); 102 } lagg_pflags[] = { 103 {IFF_PROMISC, ifpromisc}, 104 {IFF_ALLMULTI, if_allmulti}, 105 {0, NULL} 106 }; 107 108 struct lagg_snd_tag { 109 struct m_snd_tag com; 110 struct m_snd_tag *tag; 111 }; 112 113 VNET_DEFINE(SLIST_HEAD(__trhead, lagg_softc), lagg_list); /* list of laggs */ 114 #define V_lagg_list VNET(lagg_list) 115 VNET_DEFINE_STATIC(struct mtx, lagg_list_mtx); 116 #define V_lagg_list_mtx VNET(lagg_list_mtx) 117 #define LAGG_LIST_LOCK_INIT(x) mtx_init(&V_lagg_list_mtx, \ 118 "if_lagg list", NULL, MTX_DEF) 119 #define LAGG_LIST_LOCK_DESTROY(x) mtx_destroy(&V_lagg_list_mtx) 120 #define LAGG_LIST_LOCK(x) mtx_lock(&V_lagg_list_mtx) 121 #define LAGG_LIST_UNLOCK(x) mtx_unlock(&V_lagg_list_mtx) 122 eventhandler_tag lagg_detach_cookie = NULL; 123 124 static int lagg_clone_create(struct if_clone *, char *, size_t, 125 struct ifc_data *, struct ifnet **); 126 static int lagg_clone_destroy(struct if_clone *, struct ifnet *, uint32_t); 127 VNET_DEFINE_STATIC(struct if_clone *, lagg_cloner); 128 #define V_lagg_cloner VNET(lagg_cloner) 129 static const char laggname[] = "lagg"; 130 static MALLOC_DEFINE(M_LAGG, laggname, "802.3AD Link Aggregation Interface"); 131 132 static void lagg_capabilities(struct lagg_softc *); 133 static int lagg_port_create(struct lagg_softc *, struct ifnet *); 134 static int lagg_port_destroy(struct lagg_port *, int); 135 static struct mbuf *lagg_input_ethernet(struct ifnet *, struct mbuf *); 136 static struct mbuf *lagg_input_infiniband(struct ifnet *, struct mbuf *); 137 static void lagg_linkstate(struct lagg_softc *); 138 static void lagg_port_state(struct ifnet *, int); 139 static int lagg_port_ioctl(struct ifnet *, u_long, caddr_t); 140 static int lagg_port_output(struct ifnet *, struct mbuf *, 141 const struct sockaddr *, struct route *); 142 static void lagg_port_ifdetach(void *arg __unused, struct ifnet *); 143 #ifdef LAGG_PORT_STACKING 144 static int lagg_port_checkstacking(struct lagg_softc *); 145 #endif 146 static void lagg_port2req(struct lagg_port *, struct lagg_reqport *); 147 static void lagg_init(void *); 148 static void lagg_stop(struct lagg_softc *); 149 static int lagg_ioctl(struct ifnet *, u_long, caddr_t); 150 #if defined(KERN_TLS) || defined(RATELIMIT) 151 static int lagg_snd_tag_alloc(struct ifnet *, 152 union if_snd_tag_alloc_params *, 153 struct m_snd_tag **); 154 static int lagg_snd_tag_modify(struct m_snd_tag *, 155 union if_snd_tag_modify_params *); 156 static int lagg_snd_tag_query(struct m_snd_tag *, 157 union if_snd_tag_query_params *); 158 static void lagg_snd_tag_free(struct m_snd_tag *); 159 static struct m_snd_tag *lagg_next_snd_tag(struct m_snd_tag *); 160 static void lagg_ratelimit_query(struct ifnet *, 161 struct if_ratelimit_query_results *); 162 #endif 163 static int lagg_setmulti(struct lagg_port *); 164 static int lagg_clrmulti(struct lagg_port *); 165 static void lagg_setcaps(struct lagg_port *, int cap, int cap2); 166 static int lagg_setflag(struct lagg_port *, int, int, 167 int (*func)(struct ifnet *, int)); 168 static int lagg_setflags(struct lagg_port *, int status); 169 static uint64_t lagg_get_counter(struct ifnet *ifp, ift_counter cnt); 170 static int lagg_transmit_ethernet(struct ifnet *, struct mbuf *); 171 static int lagg_transmit_infiniband(struct ifnet *, struct mbuf *); 172 static void lagg_qflush(struct ifnet *); 173 static int lagg_media_change(struct ifnet *); 174 static void lagg_media_status(struct ifnet *, struct ifmediareq *); 175 static struct lagg_port *lagg_link_active(struct lagg_softc *, 176 struct lagg_port *); 177 178 /* Simple round robin */ 179 static void lagg_rr_attach(struct lagg_softc *); 180 static int lagg_rr_start(struct lagg_softc *, struct mbuf *); 181 static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *, 182 struct mbuf *); 183 184 /* Active failover */ 185 static int lagg_fail_start(struct lagg_softc *, struct mbuf *); 186 static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *, 187 struct mbuf *); 188 189 /* Loadbalancing */ 190 static void lagg_lb_attach(struct lagg_softc *); 191 static void lagg_lb_detach(struct lagg_softc *); 192 static int lagg_lb_port_create(struct lagg_port *); 193 static void lagg_lb_port_destroy(struct lagg_port *); 194 static int lagg_lb_start(struct lagg_softc *, struct mbuf *); 195 static struct mbuf *lagg_lb_input(struct lagg_softc *, struct lagg_port *, 196 struct mbuf *); 197 static int lagg_lb_porttable(struct lagg_softc *, struct lagg_port *); 198 199 /* Broadcast */ 200 static int lagg_bcast_start(struct lagg_softc *, struct mbuf *); 201 static struct mbuf *lagg_bcast_input(struct lagg_softc *, struct lagg_port *, 202 struct mbuf *); 203 204 /* 802.3ad LACP */ 205 static void lagg_lacp_attach(struct lagg_softc *); 206 static void lagg_lacp_detach(struct lagg_softc *); 207 static int lagg_lacp_start(struct lagg_softc *, struct mbuf *); 208 static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *, 209 struct mbuf *); 210 static void lagg_lacp_lladdr(struct lagg_softc *); 211 212 /* lagg protocol table */ 213 static const struct lagg_proto { 214 lagg_proto pr_num; 215 void (*pr_attach)(struct lagg_softc *); 216 void (*pr_detach)(struct lagg_softc *); 217 int (*pr_start)(struct lagg_softc *, struct mbuf *); 218 struct mbuf * (*pr_input)(struct lagg_softc *, struct lagg_port *, 219 struct mbuf *); 220 int (*pr_addport)(struct lagg_port *); 221 void (*pr_delport)(struct lagg_port *); 222 void (*pr_linkstate)(struct lagg_port *); 223 void (*pr_init)(struct lagg_softc *); 224 void (*pr_stop)(struct lagg_softc *); 225 void (*pr_lladdr)(struct lagg_softc *); 226 void (*pr_request)(struct lagg_softc *, void *); 227 void (*pr_portreq)(struct lagg_port *, void *); 228 } lagg_protos[] = { 229 { 230 .pr_num = LAGG_PROTO_NONE 231 }, 232 { 233 .pr_num = LAGG_PROTO_ROUNDROBIN, 234 .pr_attach = lagg_rr_attach, 235 .pr_start = lagg_rr_start, 236 .pr_input = lagg_rr_input, 237 }, 238 { 239 .pr_num = LAGG_PROTO_FAILOVER, 240 .pr_start = lagg_fail_start, 241 .pr_input = lagg_fail_input, 242 }, 243 { 244 .pr_num = LAGG_PROTO_LOADBALANCE, 245 .pr_attach = lagg_lb_attach, 246 .pr_detach = lagg_lb_detach, 247 .pr_start = lagg_lb_start, 248 .pr_input = lagg_lb_input, 249 .pr_addport = lagg_lb_port_create, 250 .pr_delport = lagg_lb_port_destroy, 251 }, 252 { 253 .pr_num = LAGG_PROTO_LACP, 254 .pr_attach = lagg_lacp_attach, 255 .pr_detach = lagg_lacp_detach, 256 .pr_start = lagg_lacp_start, 257 .pr_input = lagg_lacp_input, 258 .pr_addport = lacp_port_create, 259 .pr_delport = lacp_port_destroy, 260 .pr_linkstate = lacp_linkstate, 261 .pr_init = lacp_init, 262 .pr_stop = lacp_stop, 263 .pr_lladdr = lagg_lacp_lladdr, 264 .pr_request = lacp_req, 265 .pr_portreq = lacp_portreq, 266 }, 267 { 268 .pr_num = LAGG_PROTO_BROADCAST, 269 .pr_start = lagg_bcast_start, 270 .pr_input = lagg_bcast_input, 271 }, 272 }; 273 274 SYSCTL_DECL(_net_link); 275 SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 276 "Link Aggregation"); 277 278 /* Allow input on any failover links */ 279 VNET_DEFINE_STATIC(int, lagg_failover_rx_all); 280 #define V_lagg_failover_rx_all VNET(lagg_failover_rx_all) 281 SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW | CTLFLAG_VNET, 282 &VNET_NAME(lagg_failover_rx_all), 0, 283 "Accept input from any interface in a failover lagg"); 284 285 /* Default value for using flowid */ 286 VNET_DEFINE_STATIC(int, def_use_flowid) = 0; 287 #define V_def_use_flowid VNET(def_use_flowid) 288 SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RWTUN, 289 &VNET_NAME(def_use_flowid), 0, 290 "Default setting for using flow id for load sharing"); 291 292 /* Default value for using numa */ 293 VNET_DEFINE_STATIC(int, def_use_numa) = 1; 294 #define V_def_use_numa VNET(def_use_numa) 295 SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_numa, CTLFLAG_RWTUN, 296 &VNET_NAME(def_use_numa), 0, 297 "Use numa to steer flows"); 298 299 /* Default value for flowid shift */ 300 VNET_DEFINE_STATIC(int, def_flowid_shift) = 16; 301 #define V_def_flowid_shift VNET(def_flowid_shift) 302 SYSCTL_INT(_net_link_lagg, OID_AUTO, default_flowid_shift, CTLFLAG_RWTUN, 303 &VNET_NAME(def_flowid_shift), 0, 304 "Default setting for flowid shift for load sharing"); 305 306 static void 307 vnet_lagg_init(const void *unused __unused) 308 { 309 310 LAGG_LIST_LOCK_INIT(); 311 SLIST_INIT(&V_lagg_list); 312 struct if_clone_addreq req = { 313 .create_f = lagg_clone_create, 314 .destroy_f = lagg_clone_destroy, 315 .flags = IFC_F_AUTOUNIT, 316 }; 317 V_lagg_cloner = ifc_attach_cloner(laggname, &req); 318 } 319 VNET_SYSINIT(vnet_lagg_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, 320 vnet_lagg_init, NULL); 321 322 static void 323 vnet_lagg_uninit(const void *unused __unused) 324 { 325 326 ifc_detach_cloner(V_lagg_cloner); 327 LAGG_LIST_LOCK_DESTROY(); 328 } 329 VNET_SYSUNINIT(vnet_lagg_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, 330 vnet_lagg_uninit, NULL); 331 332 static int 333 lagg_modevent(module_t mod, int type, void *data) 334 { 335 336 switch (type) { 337 case MOD_LOAD: 338 lagg_input_ethernet_p = lagg_input_ethernet; 339 lagg_input_infiniband_p = lagg_input_infiniband; 340 lagg_linkstate_p = lagg_port_state; 341 lagg_detach_cookie = EVENTHANDLER_REGISTER( 342 ifnet_departure_event, lagg_port_ifdetach, NULL, 343 EVENTHANDLER_PRI_ANY); 344 break; 345 case MOD_UNLOAD: 346 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 347 lagg_detach_cookie); 348 lagg_input_ethernet_p = NULL; 349 lagg_input_infiniband_p = NULL; 350 lagg_linkstate_p = NULL; 351 break; 352 default: 353 return (EOPNOTSUPP); 354 } 355 return (0); 356 } 357 358 static moduledata_t lagg_mod = { 359 "if_lagg", 360 lagg_modevent, 361 0 362 }; 363 364 DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 365 MODULE_VERSION(if_lagg, 1); 366 MODULE_DEPEND(if_lagg, if_infiniband, 1, 1, 1); 367 368 static void 369 lagg_proto_attach(struct lagg_softc *sc, lagg_proto pr) 370 { 371 372 LAGG_XLOCK_ASSERT(sc); 373 KASSERT(sc->sc_proto == LAGG_PROTO_NONE, ("%s: sc %p has proto", 374 __func__, sc)); 375 376 if (sc->sc_ifflags & IFF_DEBUG) 377 if_printf(sc->sc_ifp, "using proto %u\n", pr); 378 379 if (lagg_protos[pr].pr_attach != NULL) 380 lagg_protos[pr].pr_attach(sc); 381 sc->sc_proto = pr; 382 } 383 384 static void 385 lagg_proto_detach(struct lagg_softc *sc) 386 { 387 lagg_proto pr; 388 389 LAGG_XLOCK_ASSERT(sc); 390 pr = sc->sc_proto; 391 sc->sc_proto = LAGG_PROTO_NONE; 392 393 if (lagg_protos[pr].pr_detach != NULL) 394 lagg_protos[pr].pr_detach(sc); 395 } 396 397 static int 398 lagg_proto_start(struct lagg_softc *sc, struct mbuf *m) 399 { 400 401 return (lagg_protos[sc->sc_proto].pr_start(sc, m)); 402 } 403 404 static struct mbuf * 405 lagg_proto_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) 406 { 407 408 return (lagg_protos[sc->sc_proto].pr_input(sc, lp, m)); 409 } 410 411 static int 412 lagg_proto_addport(struct lagg_softc *sc, struct lagg_port *lp) 413 { 414 415 if (lagg_protos[sc->sc_proto].pr_addport == NULL) 416 return (0); 417 else 418 return (lagg_protos[sc->sc_proto].pr_addport(lp)); 419 } 420 421 static void 422 lagg_proto_delport(struct lagg_softc *sc, struct lagg_port *lp) 423 { 424 425 if (lagg_protos[sc->sc_proto].pr_delport != NULL) 426 lagg_protos[sc->sc_proto].pr_delport(lp); 427 } 428 429 static void 430 lagg_proto_linkstate(struct lagg_softc *sc, struct lagg_port *lp) 431 { 432 433 if (lagg_protos[sc->sc_proto].pr_linkstate != NULL) 434 lagg_protos[sc->sc_proto].pr_linkstate(lp); 435 } 436 437 static void 438 lagg_proto_init(struct lagg_softc *sc) 439 { 440 441 if (lagg_protos[sc->sc_proto].pr_init != NULL) 442 lagg_protos[sc->sc_proto].pr_init(sc); 443 } 444 445 static void 446 lagg_proto_stop(struct lagg_softc *sc) 447 { 448 449 if (lagg_protos[sc->sc_proto].pr_stop != NULL) 450 lagg_protos[sc->sc_proto].pr_stop(sc); 451 } 452 453 static void 454 lagg_proto_lladdr(struct lagg_softc *sc) 455 { 456 457 if (lagg_protos[sc->sc_proto].pr_lladdr != NULL) 458 lagg_protos[sc->sc_proto].pr_lladdr(sc); 459 } 460 461 static void 462 lagg_proto_request(struct lagg_softc *sc, void *v) 463 { 464 465 if (lagg_protos[sc->sc_proto].pr_request != NULL) 466 lagg_protos[sc->sc_proto].pr_request(sc, v); 467 } 468 469 static void 470 lagg_proto_portreq(struct lagg_softc *sc, struct lagg_port *lp, void *v) 471 { 472 473 if (lagg_protos[sc->sc_proto].pr_portreq != NULL) 474 lagg_protos[sc->sc_proto].pr_portreq(lp, v); 475 } 476 477 /* 478 * This routine is run via an vlan 479 * config EVENT 480 */ 481 static void 482 lagg_register_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag) 483 { 484 struct lagg_softc *sc = ifp->if_softc; 485 struct lagg_port *lp; 486 487 if (ifp->if_softc != arg) /* Not our event */ 488 return; 489 490 LAGG_XLOCK(sc); 491 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 492 EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag); 493 LAGG_XUNLOCK(sc); 494 } 495 496 /* 497 * This routine is run via an vlan 498 * unconfig EVENT 499 */ 500 static void 501 lagg_unregister_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag) 502 { 503 struct lagg_softc *sc = ifp->if_softc; 504 struct lagg_port *lp; 505 506 if (ifp->if_softc != arg) /* Not our event */ 507 return; 508 509 LAGG_XLOCK(sc); 510 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 511 EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag); 512 LAGG_XUNLOCK(sc); 513 } 514 515 static int 516 lagg_clone_create(struct if_clone *ifc, char *name, size_t len, 517 struct ifc_data *ifd, struct ifnet **ifpp) 518 { 519 struct iflaggparam iflp; 520 struct lagg_softc *sc; 521 struct ifnet *ifp; 522 int if_type; 523 int error; 524 static const uint8_t eaddr[LAGG_ADDR_LEN]; 525 526 if (ifd->params != NULL) { 527 error = ifc_copyin(ifd, &iflp, sizeof(iflp)); 528 if (error) 529 return (error); 530 531 switch (iflp.lagg_type) { 532 case LAGG_TYPE_ETHERNET: 533 if_type = IFT_ETHER; 534 break; 535 case LAGG_TYPE_INFINIBAND: 536 if_type = IFT_INFINIBAND; 537 break; 538 default: 539 return (EINVAL); 540 } 541 } else { 542 if_type = IFT_ETHER; 543 } 544 545 sc = malloc(sizeof(*sc), M_LAGG, M_WAITOK|M_ZERO); 546 ifp = sc->sc_ifp = if_alloc(if_type); 547 if (ifp == NULL) { 548 free(sc, M_LAGG); 549 return (ENOSPC); 550 } 551 LAGG_SX_INIT(sc); 552 553 mtx_init(&sc->sc_mtx, "lagg-mtx", NULL, MTX_DEF); 554 callout_init_mtx(&sc->sc_watchdog, &sc->sc_mtx, 0); 555 556 LAGG_XLOCK(sc); 557 if (V_def_use_flowid) 558 sc->sc_opts |= LAGG_OPT_USE_FLOWID; 559 if (V_def_use_numa) 560 sc->sc_opts |= LAGG_OPT_USE_NUMA; 561 sc->flowid_shift = V_def_flowid_shift; 562 563 /* Hash all layers by default */ 564 sc->sc_flags = MBUF_HASHFLAG_L2|MBUF_HASHFLAG_L3|MBUF_HASHFLAG_L4; 565 566 lagg_proto_attach(sc, LAGG_PROTO_DEFAULT); 567 568 CK_SLIST_INIT(&sc->sc_ports); 569 570 switch (if_type) { 571 case IFT_ETHER: 572 /* Initialise pseudo media types */ 573 ifmedia_init(&sc->sc_media, 0, lagg_media_change, 574 lagg_media_status); 575 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); 576 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); 577 578 if_initname(ifp, laggname, ifd->unit); 579 ifp->if_transmit = lagg_transmit_ethernet; 580 break; 581 case IFT_INFINIBAND: 582 if_initname(ifp, laggname, ifd->unit); 583 ifp->if_transmit = lagg_transmit_infiniband; 584 break; 585 default: 586 break; 587 } 588 ifp->if_softc = sc; 589 ifp->if_qflush = lagg_qflush; 590 ifp->if_init = lagg_init; 591 ifp->if_ioctl = lagg_ioctl; 592 ifp->if_get_counter = lagg_get_counter; 593 ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST; 594 #if defined(KERN_TLS) || defined(RATELIMIT) 595 ifp->if_snd_tag_alloc = lagg_snd_tag_alloc; 596 ifp->if_ratelimit_query = lagg_ratelimit_query; 597 #endif 598 ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS; 599 600 /* 601 * Attach as an ordinary ethernet device, children will be attached 602 * as special device IFT_IEEE8023ADLAG or IFT_INFINIBANDLAG. 603 */ 604 switch (if_type) { 605 case IFT_ETHER: 606 ether_ifattach(ifp, eaddr); 607 break; 608 case IFT_INFINIBAND: 609 infiniband_ifattach(ifp, eaddr, sc->sc_bcast_addr); 610 break; 611 default: 612 break; 613 } 614 615 sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, 616 lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST); 617 sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, 618 lagg_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST); 619 620 /* Insert into the global list of laggs */ 621 LAGG_LIST_LOCK(); 622 SLIST_INSERT_HEAD(&V_lagg_list, sc, sc_entries); 623 LAGG_LIST_UNLOCK(); 624 LAGG_XUNLOCK(sc); 625 *ifpp = ifp; 626 627 return (0); 628 } 629 630 static int 631 lagg_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags) 632 { 633 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; 634 struct lagg_port *lp; 635 636 LAGG_XLOCK(sc); 637 sc->sc_destroying = 1; 638 lagg_stop(sc); 639 ifp->if_flags &= ~IFF_UP; 640 641 EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach); 642 EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach); 643 644 /* Shutdown and remove lagg ports */ 645 while ((lp = CK_SLIST_FIRST(&sc->sc_ports)) != NULL) 646 lagg_port_destroy(lp, 1); 647 648 /* Unhook the aggregation protocol */ 649 lagg_proto_detach(sc); 650 LAGG_XUNLOCK(sc); 651 652 switch (ifp->if_type) { 653 case IFT_ETHER: 654 ifmedia_removeall(&sc->sc_media); 655 ether_ifdetach(ifp); 656 break; 657 case IFT_INFINIBAND: 658 infiniband_ifdetach(ifp); 659 break; 660 default: 661 break; 662 } 663 if_free(ifp); 664 665 LAGG_LIST_LOCK(); 666 SLIST_REMOVE(&V_lagg_list, sc, lagg_softc, sc_entries); 667 LAGG_LIST_UNLOCK(); 668 669 mtx_destroy(&sc->sc_mtx); 670 LAGG_SX_DESTROY(sc); 671 free(sc, M_LAGG); 672 673 return (0); 674 } 675 676 static void 677 lagg_capabilities(struct lagg_softc *sc) 678 { 679 struct lagg_port *lp; 680 int cap, cap2, ena, ena2, pena, pena2; 681 uint64_t hwa; 682 struct ifnet_hw_tsomax hw_tsomax; 683 684 LAGG_XLOCK_ASSERT(sc); 685 686 /* Get common enabled capabilities for the lagg ports */ 687 ena = ena2 = ~0; 688 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 689 ena &= lp->lp_ifp->if_capenable; 690 ena2 &= lp->lp_ifp->if_capenable2; 691 } 692 if (CK_SLIST_FIRST(&sc->sc_ports) == NULL) 693 ena = ena2 = 0; 694 695 /* 696 * Apply common enabled capabilities back to the lagg ports. 697 * May require several iterations if they are dependent. 698 */ 699 do { 700 pena = ena; 701 pena2 = ena2; 702 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 703 lagg_setcaps(lp, ena, ena2); 704 ena &= lp->lp_ifp->if_capenable; 705 ena2 &= lp->lp_ifp->if_capenable2; 706 } 707 } while (pena != ena || pena2 != ena2); 708 709 /* Get other capabilities from the lagg ports */ 710 cap = cap2 = ~0; 711 hwa = ~(uint64_t)0; 712 memset(&hw_tsomax, 0, sizeof(hw_tsomax)); 713 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 714 cap &= lp->lp_ifp->if_capabilities; 715 cap2 &= lp->lp_ifp->if_capabilities2; 716 hwa &= lp->lp_ifp->if_hwassist; 717 if_hw_tsomax_common(lp->lp_ifp, &hw_tsomax); 718 } 719 if (CK_SLIST_FIRST(&sc->sc_ports) == NULL) 720 cap = cap2 = hwa = 0; 721 722 if (sc->sc_ifp->if_capabilities != cap || 723 sc->sc_ifp->if_capenable != ena || 724 sc->sc_ifp->if_capenable2 != ena2 || 725 sc->sc_ifp->if_hwassist != hwa || 726 if_hw_tsomax_update(sc->sc_ifp, &hw_tsomax) != 0) { 727 sc->sc_ifp->if_capabilities = cap; 728 sc->sc_ifp->if_capabilities2 = cap2; 729 sc->sc_ifp->if_capenable = ena; 730 sc->sc_ifp->if_capenable2 = ena2; 731 sc->sc_ifp->if_hwassist = hwa; 732 getmicrotime(&sc->sc_ifp->if_lastchange); 733 734 if (sc->sc_ifflags & IFF_DEBUG) 735 if_printf(sc->sc_ifp, 736 "capabilities 0x%08x enabled 0x%08x\n", cap, ena); 737 } 738 } 739 740 static int 741 lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) 742 { 743 struct lagg_softc *sc_ptr; 744 struct lagg_port *lp, *tlp; 745 struct ifreq ifr; 746 int error, i, oldmtu; 747 int if_type; 748 uint64_t *pval; 749 750 LAGG_XLOCK_ASSERT(sc); 751 752 if (sc->sc_ifp == ifp) { 753 if_printf(sc->sc_ifp, 754 "cannot add a lagg to itself as a port\n"); 755 return (EINVAL); 756 } 757 758 if (sc->sc_destroying == 1) 759 return (ENXIO); 760 761 /* Limit the maximal number of lagg ports */ 762 if (sc->sc_count >= LAGG_MAX_PORTS) 763 return (ENOSPC); 764 765 /* Check if port has already been associated to a lagg */ 766 if (ifp->if_lagg != NULL) { 767 /* Port is already in the current lagg? */ 768 lp = (struct lagg_port *)ifp->if_lagg; 769 if (lp->lp_softc == sc) 770 return (EEXIST); 771 return (EBUSY); 772 } 773 774 switch (sc->sc_ifp->if_type) { 775 case IFT_ETHER: 776 /* XXX Disallow non-ethernet interfaces (this should be any of 802) */ 777 if (ifp->if_type != IFT_ETHER && ifp->if_type != IFT_L2VLAN) 778 return (EPROTONOSUPPORT); 779 if_type = IFT_IEEE8023ADLAG; 780 break; 781 case IFT_INFINIBAND: 782 /* XXX Disallow non-infiniband interfaces */ 783 if (ifp->if_type != IFT_INFINIBAND) 784 return (EPROTONOSUPPORT); 785 if_type = IFT_INFINIBANDLAG; 786 break; 787 default: 788 break; 789 } 790 791 /* Allow the first Ethernet member to define the MTU */ 792 oldmtu = -1; 793 if (CK_SLIST_EMPTY(&sc->sc_ports)) { 794 sc->sc_ifp->if_mtu = ifp->if_mtu; 795 } else if (sc->sc_ifp->if_mtu != ifp->if_mtu) { 796 if (ifp->if_ioctl == NULL) { 797 if_printf(sc->sc_ifp, "cannot change MTU for %s\n", 798 ifp->if_xname); 799 return (EINVAL); 800 } 801 oldmtu = ifp->if_mtu; 802 strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name)); 803 ifr.ifr_mtu = sc->sc_ifp->if_mtu; 804 error = (*ifp->if_ioctl)(ifp, SIOCSIFMTU, (caddr_t)&ifr); 805 if (error != 0) { 806 if_printf(sc->sc_ifp, "invalid MTU for %s\n", 807 ifp->if_xname); 808 return (error); 809 } 810 ifr.ifr_mtu = oldmtu; 811 } 812 813 lp = malloc(sizeof(struct lagg_port), M_LAGG, M_WAITOK|M_ZERO); 814 lp->lp_softc = sc; 815 816 /* Check if port is a stacked lagg */ 817 LAGG_LIST_LOCK(); 818 SLIST_FOREACH(sc_ptr, &V_lagg_list, sc_entries) { 819 if (ifp == sc_ptr->sc_ifp) { 820 LAGG_LIST_UNLOCK(); 821 free(lp, M_LAGG); 822 if (oldmtu != -1) 823 (*ifp->if_ioctl)(ifp, SIOCSIFMTU, 824 (caddr_t)&ifr); 825 return (EINVAL); 826 /* XXX disable stacking for the moment, its untested */ 827 #ifdef LAGG_PORT_STACKING 828 lp->lp_flags |= LAGG_PORT_STACK; 829 if (lagg_port_checkstacking(sc_ptr) >= 830 LAGG_MAX_STACKING) { 831 LAGG_LIST_UNLOCK(); 832 free(lp, M_LAGG); 833 if (oldmtu != -1) 834 (*ifp->if_ioctl)(ifp, SIOCSIFMTU, 835 (caddr_t)&ifr); 836 return (E2BIG); 837 } 838 #endif 839 } 840 } 841 LAGG_LIST_UNLOCK(); 842 843 if_ref(ifp); 844 lp->lp_ifp = ifp; 845 846 bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ifp->if_addrlen); 847 lp->lp_ifcapenable = ifp->if_capenable; 848 if (CK_SLIST_EMPTY(&sc->sc_ports)) { 849 bcopy(IF_LLADDR(ifp), IF_LLADDR(sc->sc_ifp), ifp->if_addrlen); 850 lagg_proto_lladdr(sc); 851 EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp); 852 } else { 853 if_setlladdr(ifp, IF_LLADDR(sc->sc_ifp), ifp->if_addrlen); 854 } 855 lagg_setflags(lp, 1); 856 857 if (CK_SLIST_EMPTY(&sc->sc_ports)) 858 sc->sc_primary = lp; 859 860 /* Change the interface type */ 861 lp->lp_iftype = ifp->if_type; 862 ifp->if_type = if_type; 863 ifp->if_lagg = lp; 864 lp->lp_ioctl = ifp->if_ioctl; 865 ifp->if_ioctl = lagg_port_ioctl; 866 lp->lp_output = ifp->if_output; 867 ifp->if_output = lagg_port_output; 868 869 /* Read port counters */ 870 pval = lp->port_counters.val; 871 for (i = 0; i < IFCOUNTERS; i++, pval++) 872 *pval = ifp->if_get_counter(ifp, i); 873 874 /* 875 * Insert into the list of ports. 876 * Keep ports sorted by if_index. It is handy, when configuration 877 * is predictable and `ifconfig laggN create ...` command 878 * will lead to the same result each time. 879 */ 880 CK_SLIST_FOREACH(tlp, &sc->sc_ports, lp_entries) { 881 if (tlp->lp_ifp->if_index < ifp->if_index && ( 882 CK_SLIST_NEXT(tlp, lp_entries) == NULL || 883 ((struct lagg_port*)CK_SLIST_NEXT(tlp, lp_entries))->lp_ifp->if_index > 884 ifp->if_index)) 885 break; 886 } 887 if (tlp != NULL) 888 CK_SLIST_INSERT_AFTER(tlp, lp, lp_entries); 889 else 890 CK_SLIST_INSERT_HEAD(&sc->sc_ports, lp, lp_entries); 891 sc->sc_count++; 892 893 lagg_setmulti(lp); 894 895 if ((error = lagg_proto_addport(sc, lp)) != 0) { 896 /* Remove the port, without calling pr_delport. */ 897 lagg_port_destroy(lp, 0); 898 if (oldmtu != -1) 899 (*ifp->if_ioctl)(ifp, SIOCSIFMTU, (caddr_t)&ifr); 900 return (error); 901 } 902 903 /* Update lagg capabilities */ 904 lagg_capabilities(sc); 905 lagg_linkstate(sc); 906 907 return (0); 908 } 909 910 #ifdef LAGG_PORT_STACKING 911 static int 912 lagg_port_checkstacking(struct lagg_softc *sc) 913 { 914 struct lagg_softc *sc_ptr; 915 struct lagg_port *lp; 916 int m = 0; 917 918 LAGG_SXLOCK_ASSERT(sc); 919 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 920 if (lp->lp_flags & LAGG_PORT_STACK) { 921 sc_ptr = (struct lagg_softc *)lp->lp_ifp->if_softc; 922 m = MAX(m, lagg_port_checkstacking(sc_ptr)); 923 } 924 } 925 926 return (m + 1); 927 } 928 #endif 929 930 static void 931 lagg_port_destroy_cb(epoch_context_t ec) 932 { 933 struct lagg_port *lp; 934 struct ifnet *ifp; 935 936 lp = __containerof(ec, struct lagg_port, lp_epoch_ctx); 937 ifp = lp->lp_ifp; 938 939 if_rele(ifp); 940 free(lp, M_LAGG); 941 } 942 943 static int 944 lagg_port_destroy(struct lagg_port *lp, int rundelport) 945 { 946 struct lagg_softc *sc = lp->lp_softc; 947 struct lagg_port *lp_ptr, *lp0; 948 struct ifnet *ifp = lp->lp_ifp; 949 uint64_t *pval, vdiff; 950 int i; 951 952 LAGG_XLOCK_ASSERT(sc); 953 954 if (rundelport) 955 lagg_proto_delport(sc, lp); 956 957 if (lp->lp_detaching == 0) 958 lagg_clrmulti(lp); 959 960 /* Restore interface */ 961 ifp->if_type = lp->lp_iftype; 962 ifp->if_ioctl = lp->lp_ioctl; 963 ifp->if_output = lp->lp_output; 964 ifp->if_lagg = NULL; 965 966 /* Update detached port counters */ 967 pval = lp->port_counters.val; 968 for (i = 0; i < IFCOUNTERS; i++, pval++) { 969 vdiff = ifp->if_get_counter(ifp, i) - *pval; 970 sc->detached_counters.val[i] += vdiff; 971 } 972 973 /* Finally, remove the port from the lagg */ 974 CK_SLIST_REMOVE(&sc->sc_ports, lp, lagg_port, lp_entries); 975 sc->sc_count--; 976 977 /* Update the primary interface */ 978 if (lp == sc->sc_primary) { 979 uint8_t lladdr[LAGG_ADDR_LEN]; 980 981 if ((lp0 = CK_SLIST_FIRST(&sc->sc_ports)) == NULL) 982 bzero(&lladdr, LAGG_ADDR_LEN); 983 else 984 bcopy(lp0->lp_lladdr, lladdr, LAGG_ADDR_LEN); 985 sc->sc_primary = lp0; 986 if (sc->sc_destroying == 0) { 987 bcopy(lladdr, IF_LLADDR(sc->sc_ifp), sc->sc_ifp->if_addrlen); 988 lagg_proto_lladdr(sc); 989 EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp); 990 991 /* 992 * Update lladdr for each port (new primary needs update 993 * as well, to switch from old lladdr to its 'real' one). 994 * We can skip this if the lagg is being destroyed. 995 */ 996 CK_SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries) 997 if_setlladdr(lp_ptr->lp_ifp, lladdr, 998 lp_ptr->lp_ifp->if_addrlen); 999 } 1000 } 1001 1002 if (lp->lp_ifflags) 1003 if_printf(ifp, "%s: lp_ifflags unclean\n", __func__); 1004 1005 if (lp->lp_detaching == 0) { 1006 lagg_setflags(lp, 0); 1007 lagg_setcaps(lp, lp->lp_ifcapenable, lp->lp_ifcapenable2); 1008 if_setlladdr(ifp, lp->lp_lladdr, ifp->if_addrlen); 1009 } 1010 1011 /* 1012 * free port and release it's ifnet reference after a grace period has 1013 * elapsed. 1014 */ 1015 NET_EPOCH_CALL(lagg_port_destroy_cb, &lp->lp_epoch_ctx); 1016 /* Update lagg capabilities */ 1017 lagg_capabilities(sc); 1018 lagg_linkstate(sc); 1019 1020 return (0); 1021 } 1022 1023 static int 1024 lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1025 { 1026 struct epoch_tracker et; 1027 struct lagg_reqport *rp = (struct lagg_reqport *)data; 1028 struct lagg_softc *sc; 1029 struct lagg_port *lp = NULL; 1030 int error = 0; 1031 1032 /* Should be checked by the caller */ 1033 switch (ifp->if_type) { 1034 case IFT_IEEE8023ADLAG: 1035 case IFT_INFINIBANDLAG: 1036 if ((lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL) 1037 goto fallback; 1038 break; 1039 default: 1040 goto fallback; 1041 } 1042 1043 switch (cmd) { 1044 case SIOCGLAGGPORT: 1045 if (rp->rp_portname[0] == '\0' || 1046 ifunit(rp->rp_portname) != ifp) { 1047 error = EINVAL; 1048 break; 1049 } 1050 1051 NET_EPOCH_ENTER(et); 1052 if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) { 1053 error = ENOENT; 1054 NET_EPOCH_EXIT(et); 1055 break; 1056 } 1057 1058 lagg_port2req(lp, rp); 1059 NET_EPOCH_EXIT(et); 1060 break; 1061 1062 case SIOCSIFCAP: 1063 case SIOCSIFCAPNV: 1064 if (lp->lp_ioctl == NULL) { 1065 error = EINVAL; 1066 break; 1067 } 1068 error = (*lp->lp_ioctl)(ifp, cmd, data); 1069 if (error) 1070 break; 1071 1072 /* Update lagg interface capabilities */ 1073 LAGG_XLOCK(sc); 1074 lagg_capabilities(sc); 1075 LAGG_XUNLOCK(sc); 1076 VLAN_CAPABILITIES(sc->sc_ifp); 1077 break; 1078 1079 case SIOCSIFMTU: 1080 /* Do not allow the MTU to be changed once joined */ 1081 error = EINVAL; 1082 break; 1083 1084 default: 1085 goto fallback; 1086 } 1087 1088 return (error); 1089 1090 fallback: 1091 if (lp != NULL && lp->lp_ioctl != NULL) 1092 return ((*lp->lp_ioctl)(ifp, cmd, data)); 1093 1094 return (EINVAL); 1095 } 1096 1097 /* 1098 * Requests counter @cnt data. 1099 * 1100 * Counter value is calculated the following way: 1101 * 1) for each port, sum difference between current and "initial" measurements. 1102 * 2) add lagg logical interface counters. 1103 * 3) add data from detached_counters array. 1104 * 1105 * We also do the following things on ports attach/detach: 1106 * 1) On port attach we store all counters it has into port_counter array. 1107 * 2) On port detach we add the different between "initial" and 1108 * current counters data to detached_counters array. 1109 */ 1110 static uint64_t 1111 lagg_get_counter(struct ifnet *ifp, ift_counter cnt) 1112 { 1113 struct epoch_tracker et; 1114 struct lagg_softc *sc; 1115 struct lagg_port *lp; 1116 struct ifnet *lpifp; 1117 uint64_t newval, oldval, vsum; 1118 1119 /* Revise this when we've got non-generic counters. */ 1120 KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt)); 1121 1122 sc = (struct lagg_softc *)ifp->if_softc; 1123 1124 vsum = 0; 1125 NET_EPOCH_ENTER(et); 1126 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 1127 /* Saved attached value */ 1128 oldval = lp->port_counters.val[cnt]; 1129 /* current value */ 1130 lpifp = lp->lp_ifp; 1131 newval = lpifp->if_get_counter(lpifp, cnt); 1132 /* Calculate diff and save new */ 1133 vsum += newval - oldval; 1134 } 1135 NET_EPOCH_EXIT(et); 1136 1137 /* 1138 * Add counter data which might be added by upper 1139 * layer protocols operating on logical interface. 1140 */ 1141 vsum += if_get_counter_default(ifp, cnt); 1142 1143 /* 1144 * Add counter data from detached ports counters 1145 */ 1146 vsum += sc->detached_counters.val[cnt]; 1147 1148 return (vsum); 1149 } 1150 1151 /* 1152 * For direct output to child ports. 1153 */ 1154 static int 1155 lagg_port_output(struct ifnet *ifp, struct mbuf *m, 1156 const struct sockaddr *dst, struct route *ro) 1157 { 1158 struct lagg_port *lp = ifp->if_lagg; 1159 1160 switch (dst->sa_family) { 1161 case pseudo_AF_HDRCMPLT: 1162 case AF_UNSPEC: 1163 if (lp != NULL) 1164 return ((*lp->lp_output)(ifp, m, dst, ro)); 1165 } 1166 1167 /* drop any other frames */ 1168 m_freem(m); 1169 return (ENETDOWN); 1170 } 1171 1172 static void 1173 lagg_port_ifdetach(void *arg __unused, struct ifnet *ifp) 1174 { 1175 struct lagg_port *lp; 1176 struct lagg_softc *sc; 1177 1178 if ((lp = ifp->if_lagg) == NULL) 1179 return; 1180 /* If the ifnet is just being renamed, don't do anything. */ 1181 if (ifp->if_flags & IFF_RENAMING) 1182 return; 1183 1184 sc = lp->lp_softc; 1185 1186 LAGG_XLOCK(sc); 1187 lp->lp_detaching = 1; 1188 lagg_port_destroy(lp, 1); 1189 LAGG_XUNLOCK(sc); 1190 VLAN_CAPABILITIES(sc->sc_ifp); 1191 } 1192 1193 static void 1194 lagg_port2req(struct lagg_port *lp, struct lagg_reqport *rp) 1195 { 1196 struct lagg_softc *sc = lp->lp_softc; 1197 1198 strlcpy(rp->rp_ifname, sc->sc_ifname, sizeof(rp->rp_ifname)); 1199 strlcpy(rp->rp_portname, lp->lp_ifp->if_xname, sizeof(rp->rp_portname)); 1200 rp->rp_prio = lp->lp_prio; 1201 rp->rp_flags = lp->lp_flags; 1202 lagg_proto_portreq(sc, lp, &rp->rp_psc); 1203 1204 /* Add protocol specific flags */ 1205 switch (sc->sc_proto) { 1206 case LAGG_PROTO_FAILOVER: 1207 if (lp == sc->sc_primary) 1208 rp->rp_flags |= LAGG_PORT_MASTER; 1209 if (lp == lagg_link_active(sc, sc->sc_primary)) 1210 rp->rp_flags |= LAGG_PORT_ACTIVE; 1211 break; 1212 1213 case LAGG_PROTO_ROUNDROBIN: 1214 case LAGG_PROTO_LOADBALANCE: 1215 case LAGG_PROTO_BROADCAST: 1216 if (LAGG_PORTACTIVE(lp)) 1217 rp->rp_flags |= LAGG_PORT_ACTIVE; 1218 break; 1219 1220 case LAGG_PROTO_LACP: 1221 /* LACP has a different definition of active */ 1222 if (lacp_isactive(lp)) 1223 rp->rp_flags |= LAGG_PORT_ACTIVE; 1224 if (lacp_iscollecting(lp)) 1225 rp->rp_flags |= LAGG_PORT_COLLECTING; 1226 if (lacp_isdistributing(lp)) 1227 rp->rp_flags |= LAGG_PORT_DISTRIBUTING; 1228 break; 1229 } 1230 1231 } 1232 1233 static void 1234 lagg_watchdog_infiniband(void *arg) 1235 { 1236 struct epoch_tracker et; 1237 struct lagg_softc *sc; 1238 struct lagg_port *lp; 1239 struct ifnet *ifp; 1240 struct ifnet *lp_ifp; 1241 1242 sc = arg; 1243 1244 /* 1245 * Because infiniband nodes have a fixed MAC address, which is 1246 * generated by the so-called GID, we need to regularly update 1247 * the link level address of the parent lagg<N> device when 1248 * the active port changes. Possibly we could piggy-back on 1249 * link up/down events aswell, but using a timer also provides 1250 * a guarantee against too frequent events. This operation 1251 * does not have to be atomic. 1252 */ 1253 NET_EPOCH_ENTER(et); 1254 lp = lagg_link_active(sc, sc->sc_primary); 1255 if (lp != NULL) { 1256 ifp = sc->sc_ifp; 1257 lp_ifp = lp->lp_ifp; 1258 1259 if (ifp != NULL && lp_ifp != NULL && 1260 (memcmp(IF_LLADDR(ifp), IF_LLADDR(lp_ifp), ifp->if_addrlen) != 0 || 1261 memcmp(sc->sc_bcast_addr, lp_ifp->if_broadcastaddr, ifp->if_addrlen) != 0)) { 1262 memcpy(IF_LLADDR(ifp), IF_LLADDR(lp_ifp), ifp->if_addrlen); 1263 memcpy(sc->sc_bcast_addr, lp_ifp->if_broadcastaddr, ifp->if_addrlen); 1264 1265 CURVNET_SET(ifp->if_vnet); 1266 EVENTHANDLER_INVOKE(iflladdr_event, ifp); 1267 CURVNET_RESTORE(); 1268 } 1269 } 1270 NET_EPOCH_EXIT(et); 1271 1272 callout_reset(&sc->sc_watchdog, hz, &lagg_watchdog_infiniband, arg); 1273 } 1274 1275 static void 1276 lagg_init(void *xsc) 1277 { 1278 struct lagg_softc *sc = (struct lagg_softc *)xsc; 1279 struct ifnet *ifp = sc->sc_ifp; 1280 struct lagg_port *lp; 1281 1282 LAGG_XLOCK(sc); 1283 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1284 LAGG_XUNLOCK(sc); 1285 return; 1286 } 1287 1288 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1289 1290 /* 1291 * Update the port lladdrs if needed. 1292 * This might be if_setlladdr() notification 1293 * that lladdr has been changed. 1294 */ 1295 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 1296 if (memcmp(IF_LLADDR(ifp), IF_LLADDR(lp->lp_ifp), 1297 ifp->if_addrlen) != 0) 1298 if_setlladdr(lp->lp_ifp, IF_LLADDR(ifp), ifp->if_addrlen); 1299 } 1300 1301 lagg_proto_init(sc); 1302 1303 if (ifp->if_type == IFT_INFINIBAND) { 1304 mtx_lock(&sc->sc_mtx); 1305 lagg_watchdog_infiniband(sc); 1306 mtx_unlock(&sc->sc_mtx); 1307 } 1308 1309 LAGG_XUNLOCK(sc); 1310 } 1311 1312 static void 1313 lagg_stop(struct lagg_softc *sc) 1314 { 1315 struct ifnet *ifp = sc->sc_ifp; 1316 1317 LAGG_XLOCK_ASSERT(sc); 1318 1319 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1320 return; 1321 1322 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1323 1324 lagg_proto_stop(sc); 1325 1326 mtx_lock(&sc->sc_mtx); 1327 callout_stop(&sc->sc_watchdog); 1328 mtx_unlock(&sc->sc_mtx); 1329 1330 callout_drain(&sc->sc_watchdog); 1331 } 1332 1333 static int 1334 lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1335 { 1336 struct epoch_tracker et; 1337 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; 1338 struct lagg_reqall *ra = (struct lagg_reqall *)data; 1339 struct lagg_reqopts *ro = (struct lagg_reqopts *)data; 1340 struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf; 1341 struct lagg_reqflags *rf = (struct lagg_reqflags *)data; 1342 struct ifreq *ifr = (struct ifreq *)data; 1343 struct lagg_port *lp; 1344 struct ifnet *tpif; 1345 struct thread *td = curthread; 1346 char *buf, *outbuf; 1347 int count, buflen, len, error = 0, oldmtu; 1348 1349 bzero(&rpbuf, sizeof(rpbuf)); 1350 1351 /* XXX: This can race with lagg_clone_destroy. */ 1352 1353 switch (cmd) { 1354 case SIOCGLAGG: 1355 LAGG_XLOCK(sc); 1356 buflen = sc->sc_count * sizeof(struct lagg_reqport); 1357 outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO); 1358 ra->ra_proto = sc->sc_proto; 1359 lagg_proto_request(sc, &ra->ra_psc); 1360 count = 0; 1361 buf = outbuf; 1362 len = min(ra->ra_size, buflen); 1363 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 1364 if (len < sizeof(rpbuf)) 1365 break; 1366 1367 lagg_port2req(lp, &rpbuf); 1368 memcpy(buf, &rpbuf, sizeof(rpbuf)); 1369 count++; 1370 buf += sizeof(rpbuf); 1371 len -= sizeof(rpbuf); 1372 } 1373 LAGG_XUNLOCK(sc); 1374 ra->ra_ports = count; 1375 ra->ra_size = count * sizeof(rpbuf); 1376 error = copyout(outbuf, ra->ra_port, ra->ra_size); 1377 free(outbuf, M_TEMP); 1378 break; 1379 case SIOCSLAGG: 1380 error = priv_check(td, PRIV_NET_LAGG); 1381 if (error) 1382 break; 1383 if (ra->ra_proto >= LAGG_PROTO_MAX) { 1384 error = EPROTONOSUPPORT; 1385 break; 1386 } 1387 /* Infiniband only supports the failover protocol. */ 1388 if (ra->ra_proto != LAGG_PROTO_FAILOVER && 1389 ifp->if_type == IFT_INFINIBAND) { 1390 error = EPROTONOSUPPORT; 1391 break; 1392 } 1393 LAGG_XLOCK(sc); 1394 lagg_proto_detach(sc); 1395 lagg_proto_attach(sc, ra->ra_proto); 1396 LAGG_XUNLOCK(sc); 1397 break; 1398 case SIOCGLAGGOPTS: 1399 LAGG_XLOCK(sc); 1400 ro->ro_opts = sc->sc_opts; 1401 if (sc->sc_proto == LAGG_PROTO_LACP) { 1402 struct lacp_softc *lsc; 1403 1404 lsc = (struct lacp_softc *)sc->sc_psc; 1405 if (lsc->lsc_debug.lsc_tx_test != 0) 1406 ro->ro_opts |= LAGG_OPT_LACP_TXTEST; 1407 if (lsc->lsc_debug.lsc_rx_test != 0) 1408 ro->ro_opts |= LAGG_OPT_LACP_RXTEST; 1409 if (lsc->lsc_strict_mode != 0) 1410 ro->ro_opts |= LAGG_OPT_LACP_STRICT; 1411 if (lsc->lsc_fast_timeout != 0) 1412 ro->ro_opts |= LAGG_OPT_LACP_FAST_TIMO; 1413 1414 ro->ro_active = sc->sc_active; 1415 } else { 1416 ro->ro_active = 0; 1417 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 1418 ro->ro_active += LAGG_PORTACTIVE(lp); 1419 } 1420 ro->ro_bkt = sc->sc_stride; 1421 ro->ro_flapping = sc->sc_flapping; 1422 ro->ro_flowid_shift = sc->flowid_shift; 1423 LAGG_XUNLOCK(sc); 1424 break; 1425 case SIOCSLAGGOPTS: 1426 error = priv_check(td, PRIV_NET_LAGG); 1427 if (error) 1428 break; 1429 1430 /* 1431 * The stride option was added without defining a corresponding 1432 * LAGG_OPT flag, so handle a non-zero value before checking 1433 * anything else to preserve compatibility. 1434 */ 1435 LAGG_XLOCK(sc); 1436 if (ro->ro_opts == 0 && ro->ro_bkt != 0) { 1437 if (sc->sc_proto != LAGG_PROTO_ROUNDROBIN) { 1438 LAGG_XUNLOCK(sc); 1439 error = EINVAL; 1440 break; 1441 } 1442 sc->sc_stride = ro->ro_bkt; 1443 } 1444 if (ro->ro_opts == 0) { 1445 LAGG_XUNLOCK(sc); 1446 break; 1447 } 1448 1449 /* 1450 * Set options. LACP options are stored in sc->sc_psc, 1451 * not in sc_opts. 1452 */ 1453 int valid, lacp; 1454 1455 switch (ro->ro_opts) { 1456 case LAGG_OPT_USE_FLOWID: 1457 case -LAGG_OPT_USE_FLOWID: 1458 case LAGG_OPT_USE_NUMA: 1459 case -LAGG_OPT_USE_NUMA: 1460 case LAGG_OPT_FLOWIDSHIFT: 1461 case LAGG_OPT_RR_LIMIT: 1462 valid = 1; 1463 lacp = 0; 1464 break; 1465 case LAGG_OPT_LACP_TXTEST: 1466 case -LAGG_OPT_LACP_TXTEST: 1467 case LAGG_OPT_LACP_RXTEST: 1468 case -LAGG_OPT_LACP_RXTEST: 1469 case LAGG_OPT_LACP_STRICT: 1470 case -LAGG_OPT_LACP_STRICT: 1471 case LAGG_OPT_LACP_FAST_TIMO: 1472 case -LAGG_OPT_LACP_FAST_TIMO: 1473 valid = lacp = 1; 1474 break; 1475 default: 1476 valid = lacp = 0; 1477 break; 1478 } 1479 1480 if (valid == 0 || 1481 (lacp == 1 && sc->sc_proto != LAGG_PROTO_LACP)) { 1482 /* Invalid combination of options specified. */ 1483 error = EINVAL; 1484 LAGG_XUNLOCK(sc); 1485 break; /* Return from SIOCSLAGGOPTS. */ 1486 } 1487 1488 /* 1489 * Store new options into sc->sc_opts except for 1490 * FLOWIDSHIFT, RR and LACP options. 1491 */ 1492 if (lacp == 0) { 1493 if (ro->ro_opts == LAGG_OPT_FLOWIDSHIFT) 1494 sc->flowid_shift = ro->ro_flowid_shift; 1495 else if (ro->ro_opts == LAGG_OPT_RR_LIMIT) { 1496 if (sc->sc_proto != LAGG_PROTO_ROUNDROBIN || 1497 ro->ro_bkt == 0) { 1498 error = EINVAL; 1499 LAGG_XUNLOCK(sc); 1500 break; 1501 } 1502 sc->sc_stride = ro->ro_bkt; 1503 } else if (ro->ro_opts > 0) 1504 sc->sc_opts |= ro->ro_opts; 1505 else 1506 sc->sc_opts &= ~ro->ro_opts; 1507 } else { 1508 struct lacp_softc *lsc; 1509 struct lacp_port *lp; 1510 1511 lsc = (struct lacp_softc *)sc->sc_psc; 1512 1513 switch (ro->ro_opts) { 1514 case LAGG_OPT_LACP_TXTEST: 1515 lsc->lsc_debug.lsc_tx_test = 1; 1516 break; 1517 case -LAGG_OPT_LACP_TXTEST: 1518 lsc->lsc_debug.lsc_tx_test = 0; 1519 break; 1520 case LAGG_OPT_LACP_RXTEST: 1521 lsc->lsc_debug.lsc_rx_test = 1; 1522 break; 1523 case -LAGG_OPT_LACP_RXTEST: 1524 lsc->lsc_debug.lsc_rx_test = 0; 1525 break; 1526 case LAGG_OPT_LACP_STRICT: 1527 lsc->lsc_strict_mode = 1; 1528 break; 1529 case -LAGG_OPT_LACP_STRICT: 1530 lsc->lsc_strict_mode = 0; 1531 break; 1532 case LAGG_OPT_LACP_FAST_TIMO: 1533 LACP_LOCK(lsc); 1534 LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) 1535 lp->lp_state |= LACP_STATE_TIMEOUT; 1536 LACP_UNLOCK(lsc); 1537 lsc->lsc_fast_timeout = 1; 1538 break; 1539 case -LAGG_OPT_LACP_FAST_TIMO: 1540 LACP_LOCK(lsc); 1541 LIST_FOREACH(lp, &lsc->lsc_ports, lp_next) 1542 lp->lp_state &= ~LACP_STATE_TIMEOUT; 1543 LACP_UNLOCK(lsc); 1544 lsc->lsc_fast_timeout = 0; 1545 break; 1546 } 1547 } 1548 LAGG_XUNLOCK(sc); 1549 break; 1550 case SIOCGLAGGFLAGS: 1551 rf->rf_flags = 0; 1552 LAGG_XLOCK(sc); 1553 if (sc->sc_flags & MBUF_HASHFLAG_L2) 1554 rf->rf_flags |= LAGG_F_HASHL2; 1555 if (sc->sc_flags & MBUF_HASHFLAG_L3) 1556 rf->rf_flags |= LAGG_F_HASHL3; 1557 if (sc->sc_flags & MBUF_HASHFLAG_L4) 1558 rf->rf_flags |= LAGG_F_HASHL4; 1559 LAGG_XUNLOCK(sc); 1560 break; 1561 case SIOCSLAGGHASH: 1562 error = priv_check(td, PRIV_NET_LAGG); 1563 if (error) 1564 break; 1565 if ((rf->rf_flags & LAGG_F_HASHMASK) == 0) { 1566 error = EINVAL; 1567 break; 1568 } 1569 LAGG_XLOCK(sc); 1570 sc->sc_flags = 0; 1571 if (rf->rf_flags & LAGG_F_HASHL2) 1572 sc->sc_flags |= MBUF_HASHFLAG_L2; 1573 if (rf->rf_flags & LAGG_F_HASHL3) 1574 sc->sc_flags |= MBUF_HASHFLAG_L3; 1575 if (rf->rf_flags & LAGG_F_HASHL4) 1576 sc->sc_flags |= MBUF_HASHFLAG_L4; 1577 LAGG_XUNLOCK(sc); 1578 break; 1579 case SIOCGLAGGPORT: 1580 if (rp->rp_portname[0] == '\0' || 1581 (tpif = ifunit_ref(rp->rp_portname)) == NULL) { 1582 error = EINVAL; 1583 break; 1584 } 1585 1586 NET_EPOCH_ENTER(et); 1587 if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL || 1588 lp->lp_softc != sc) { 1589 error = ENOENT; 1590 NET_EPOCH_EXIT(et); 1591 if_rele(tpif); 1592 break; 1593 } 1594 1595 lagg_port2req(lp, rp); 1596 NET_EPOCH_EXIT(et); 1597 if_rele(tpif); 1598 break; 1599 case SIOCSLAGGPORT: 1600 error = priv_check(td, PRIV_NET_LAGG); 1601 if (error) 1602 break; 1603 if (rp->rp_portname[0] == '\0' || 1604 (tpif = ifunit_ref(rp->rp_portname)) == NULL) { 1605 error = EINVAL; 1606 break; 1607 } 1608 #ifdef INET6 1609 /* 1610 * A laggport interface should not have inet6 address 1611 * because two interfaces with a valid link-local 1612 * scope zone must not be merged in any form. This 1613 * restriction is needed to prevent violation of 1614 * link-local scope zone. Attempts to add a laggport 1615 * interface which has inet6 addresses triggers 1616 * removal of all inet6 addresses on the member 1617 * interface. 1618 */ 1619 if (in6ifa_llaonifp(tpif)) { 1620 in6_ifdetach(tpif); 1621 if_printf(sc->sc_ifp, 1622 "IPv6 addresses on %s have been removed " 1623 "before adding it as a member to prevent " 1624 "IPv6 address scope violation.\n", 1625 tpif->if_xname); 1626 } 1627 #endif 1628 oldmtu = ifp->if_mtu; 1629 LAGG_XLOCK(sc); 1630 error = lagg_port_create(sc, tpif); 1631 LAGG_XUNLOCK(sc); 1632 if_rele(tpif); 1633 1634 /* 1635 * LAGG MTU may change during addition of the first port. 1636 * If it did, do network layer specific procedure. 1637 */ 1638 if (ifp->if_mtu != oldmtu) { 1639 #ifdef INET6 1640 nd6_setmtu(ifp); 1641 #endif 1642 rt_updatemtu(ifp); 1643 } 1644 1645 VLAN_CAPABILITIES(ifp); 1646 break; 1647 case SIOCSLAGGDELPORT: 1648 error = priv_check(td, PRIV_NET_LAGG); 1649 if (error) 1650 break; 1651 if (rp->rp_portname[0] == '\0' || 1652 (tpif = ifunit_ref(rp->rp_portname)) == NULL) { 1653 error = EINVAL; 1654 break; 1655 } 1656 1657 LAGG_XLOCK(sc); 1658 if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL || 1659 lp->lp_softc != sc) { 1660 error = ENOENT; 1661 LAGG_XUNLOCK(sc); 1662 if_rele(tpif); 1663 break; 1664 } 1665 1666 error = lagg_port_destroy(lp, 1); 1667 LAGG_XUNLOCK(sc); 1668 if_rele(tpif); 1669 VLAN_CAPABILITIES(ifp); 1670 break; 1671 case SIOCSIFFLAGS: 1672 /* Set flags on ports too */ 1673 LAGG_XLOCK(sc); 1674 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 1675 lagg_setflags(lp, 1); 1676 } 1677 1678 if (!(ifp->if_flags & IFF_UP) && 1679 (ifp->if_drv_flags & IFF_DRV_RUNNING)) { 1680 /* 1681 * If interface is marked down and it is running, 1682 * then stop and disable it. 1683 */ 1684 lagg_stop(sc); 1685 LAGG_XUNLOCK(sc); 1686 } else if ((ifp->if_flags & IFF_UP) && 1687 !(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 1688 /* 1689 * If interface is marked up and it is stopped, then 1690 * start it. 1691 */ 1692 LAGG_XUNLOCK(sc); 1693 (*ifp->if_init)(sc); 1694 } else 1695 LAGG_XUNLOCK(sc); 1696 break; 1697 case SIOCADDMULTI: 1698 case SIOCDELMULTI: 1699 LAGG_XLOCK(sc); 1700 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 1701 lagg_clrmulti(lp); 1702 lagg_setmulti(lp); 1703 } 1704 LAGG_XUNLOCK(sc); 1705 error = 0; 1706 break; 1707 case SIOCSIFMEDIA: 1708 case SIOCGIFMEDIA: 1709 if (ifp->if_type == IFT_INFINIBAND) 1710 error = EINVAL; 1711 else 1712 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); 1713 break; 1714 1715 case SIOCSIFCAP: 1716 case SIOCSIFCAPNV: 1717 LAGG_XLOCK(sc); 1718 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 1719 if (lp->lp_ioctl != NULL) 1720 (*lp->lp_ioctl)(lp->lp_ifp, cmd, data); 1721 } 1722 lagg_capabilities(sc); 1723 LAGG_XUNLOCK(sc); 1724 VLAN_CAPABILITIES(ifp); 1725 error = 0; 1726 break; 1727 1728 case SIOCGIFCAPNV: 1729 error = 0; 1730 break; 1731 1732 case SIOCSIFMTU: 1733 LAGG_XLOCK(sc); 1734 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 1735 if (lp->lp_ioctl != NULL) 1736 error = (*lp->lp_ioctl)(lp->lp_ifp, cmd, data); 1737 else 1738 error = EINVAL; 1739 if (error != 0) { 1740 if_printf(ifp, 1741 "failed to change MTU to %d on port %s, " 1742 "reverting all ports to original MTU (%d)\n", 1743 ifr->ifr_mtu, lp->lp_ifp->if_xname, ifp->if_mtu); 1744 break; 1745 } 1746 } 1747 if (error == 0) { 1748 ifp->if_mtu = ifr->ifr_mtu; 1749 } else { 1750 /* set every port back to the original MTU */ 1751 ifr->ifr_mtu = ifp->if_mtu; 1752 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 1753 if (lp->lp_ioctl != NULL) 1754 (*lp->lp_ioctl)(lp->lp_ifp, cmd, data); 1755 } 1756 } 1757 lagg_capabilities(sc); 1758 LAGG_XUNLOCK(sc); 1759 VLAN_CAPABILITIES(ifp); 1760 break; 1761 1762 default: 1763 error = ether_ioctl(ifp, cmd, data); 1764 break; 1765 } 1766 return (error); 1767 } 1768 1769 #if defined(KERN_TLS) || defined(RATELIMIT) 1770 #ifdef RATELIMIT 1771 static const struct if_snd_tag_sw lagg_snd_tag_ul_sw = { 1772 .snd_tag_modify = lagg_snd_tag_modify, 1773 .snd_tag_query = lagg_snd_tag_query, 1774 .snd_tag_free = lagg_snd_tag_free, 1775 .next_snd_tag = lagg_next_snd_tag, 1776 .type = IF_SND_TAG_TYPE_UNLIMITED 1777 }; 1778 1779 static const struct if_snd_tag_sw lagg_snd_tag_rl_sw = { 1780 .snd_tag_modify = lagg_snd_tag_modify, 1781 .snd_tag_query = lagg_snd_tag_query, 1782 .snd_tag_free = lagg_snd_tag_free, 1783 .next_snd_tag = lagg_next_snd_tag, 1784 .type = IF_SND_TAG_TYPE_RATE_LIMIT 1785 }; 1786 #endif 1787 1788 #ifdef KERN_TLS 1789 static const struct if_snd_tag_sw lagg_snd_tag_tls_sw = { 1790 .snd_tag_modify = lagg_snd_tag_modify, 1791 .snd_tag_query = lagg_snd_tag_query, 1792 .snd_tag_free = lagg_snd_tag_free, 1793 .next_snd_tag = lagg_next_snd_tag, 1794 .type = IF_SND_TAG_TYPE_TLS 1795 }; 1796 1797 #ifdef RATELIMIT 1798 static const struct if_snd_tag_sw lagg_snd_tag_tls_rl_sw = { 1799 .snd_tag_modify = lagg_snd_tag_modify, 1800 .snd_tag_query = lagg_snd_tag_query, 1801 .snd_tag_free = lagg_snd_tag_free, 1802 .next_snd_tag = lagg_next_snd_tag, 1803 .type = IF_SND_TAG_TYPE_TLS_RATE_LIMIT 1804 }; 1805 #endif 1806 #endif 1807 1808 static inline struct lagg_snd_tag * 1809 mst_to_lst(struct m_snd_tag *mst) 1810 { 1811 1812 return (__containerof(mst, struct lagg_snd_tag, com)); 1813 } 1814 1815 /* 1816 * Look up the port used by a specific flow. This only works for lagg 1817 * protocols with deterministic port mappings (e.g. not roundrobin). 1818 * In addition protocols which use a hash to map flows to ports must 1819 * be configured to use the mbuf flowid rather than hashing packet 1820 * contents. 1821 */ 1822 static struct lagg_port * 1823 lookup_snd_tag_port(struct ifnet *ifp, uint32_t flowid, uint32_t flowtype, 1824 uint8_t numa_domain) 1825 { 1826 struct lagg_softc *sc; 1827 struct lagg_port *lp; 1828 struct lagg_lb *lb; 1829 uint32_t hash, p; 1830 int err; 1831 1832 sc = ifp->if_softc; 1833 1834 switch (sc->sc_proto) { 1835 case LAGG_PROTO_FAILOVER: 1836 return (lagg_link_active(sc, sc->sc_primary)); 1837 case LAGG_PROTO_LOADBALANCE: 1838 if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 || 1839 flowtype == M_HASHTYPE_NONE) 1840 return (NULL); 1841 p = flowid >> sc->flowid_shift; 1842 p %= sc->sc_count; 1843 lb = (struct lagg_lb *)sc->sc_psc; 1844 lp = lb->lb_ports[p]; 1845 return (lagg_link_active(sc, lp)); 1846 case LAGG_PROTO_LACP: 1847 if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 || 1848 flowtype == M_HASHTYPE_NONE) 1849 return (NULL); 1850 hash = flowid >> sc->flowid_shift; 1851 return (lacp_select_tx_port_by_hash(sc, hash, numa_domain, &err)); 1852 default: 1853 return (NULL); 1854 } 1855 } 1856 1857 static int 1858 lagg_snd_tag_alloc(struct ifnet *ifp, 1859 union if_snd_tag_alloc_params *params, 1860 struct m_snd_tag **ppmt) 1861 { 1862 struct epoch_tracker et; 1863 const struct if_snd_tag_sw *sw; 1864 struct lagg_snd_tag *lst; 1865 struct lagg_port *lp; 1866 struct ifnet *lp_ifp; 1867 struct m_snd_tag *mst; 1868 int error; 1869 1870 switch (params->hdr.type) { 1871 #ifdef RATELIMIT 1872 case IF_SND_TAG_TYPE_UNLIMITED: 1873 sw = &lagg_snd_tag_ul_sw; 1874 break; 1875 case IF_SND_TAG_TYPE_RATE_LIMIT: 1876 sw = &lagg_snd_tag_rl_sw; 1877 break; 1878 #endif 1879 #ifdef KERN_TLS 1880 case IF_SND_TAG_TYPE_TLS: 1881 sw = &lagg_snd_tag_tls_sw; 1882 break; 1883 case IF_SND_TAG_TYPE_TLS_RX: 1884 /* Return tag from port interface directly. */ 1885 sw = NULL; 1886 break; 1887 #ifdef RATELIMIT 1888 case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: 1889 sw = &lagg_snd_tag_tls_rl_sw; 1890 break; 1891 #endif 1892 #endif 1893 default: 1894 return (EOPNOTSUPP); 1895 } 1896 1897 NET_EPOCH_ENTER(et); 1898 lp = lookup_snd_tag_port(ifp, params->hdr.flowid, 1899 params->hdr.flowtype, params->hdr.numa_domain); 1900 if (lp == NULL) { 1901 NET_EPOCH_EXIT(et); 1902 return (EOPNOTSUPP); 1903 } 1904 if (lp->lp_ifp == NULL) { 1905 NET_EPOCH_EXIT(et); 1906 return (EOPNOTSUPP); 1907 } 1908 lp_ifp = lp->lp_ifp; 1909 if_ref(lp_ifp); 1910 NET_EPOCH_EXIT(et); 1911 1912 if (sw != NULL) { 1913 lst = malloc(sizeof(*lst), M_LAGG, M_NOWAIT); 1914 if (lst == NULL) { 1915 if_rele(lp_ifp); 1916 return (ENOMEM); 1917 } 1918 } else 1919 lst = NULL; 1920 1921 error = m_snd_tag_alloc(lp_ifp, params, &mst); 1922 if_rele(lp_ifp); 1923 if (error) { 1924 free(lst, M_LAGG); 1925 return (error); 1926 } 1927 1928 if (sw != NULL) { 1929 m_snd_tag_init(&lst->com, ifp, sw); 1930 lst->tag = mst; 1931 1932 *ppmt = &lst->com; 1933 } else 1934 *ppmt = mst; 1935 1936 return (0); 1937 } 1938 1939 static struct m_snd_tag * 1940 lagg_next_snd_tag(struct m_snd_tag *mst) 1941 { 1942 struct lagg_snd_tag *lst; 1943 1944 lst = mst_to_lst(mst); 1945 return (lst->tag); 1946 } 1947 1948 static int 1949 lagg_snd_tag_modify(struct m_snd_tag *mst, 1950 union if_snd_tag_modify_params *params) 1951 { 1952 struct lagg_snd_tag *lst; 1953 1954 lst = mst_to_lst(mst); 1955 return (lst->tag->sw->snd_tag_modify(lst->tag, params)); 1956 } 1957 1958 static int 1959 lagg_snd_tag_query(struct m_snd_tag *mst, 1960 union if_snd_tag_query_params *params) 1961 { 1962 struct lagg_snd_tag *lst; 1963 1964 lst = mst_to_lst(mst); 1965 return (lst->tag->sw->snd_tag_query(lst->tag, params)); 1966 } 1967 1968 static void 1969 lagg_snd_tag_free(struct m_snd_tag *mst) 1970 { 1971 struct lagg_snd_tag *lst; 1972 1973 lst = mst_to_lst(mst); 1974 m_snd_tag_rele(lst->tag); 1975 free(lst, M_LAGG); 1976 } 1977 1978 static void 1979 lagg_ratelimit_query(struct ifnet *ifp __unused, struct if_ratelimit_query_results *q) 1980 { 1981 /* 1982 * For lagg, we have an indirect 1983 * interface. The caller needs to 1984 * get a ratelimit tag on the actual 1985 * interface the flow will go on. 1986 */ 1987 q->rate_table = NULL; 1988 q->flags = RT_IS_INDIRECT; 1989 q->max_flows = 0; 1990 q->number_of_rates = 0; 1991 } 1992 #endif 1993 1994 static int 1995 lagg_setmulti(struct lagg_port *lp) 1996 { 1997 struct lagg_softc *sc = lp->lp_softc; 1998 struct ifnet *ifp = lp->lp_ifp; 1999 struct ifnet *scifp = sc->sc_ifp; 2000 struct lagg_mc *mc; 2001 struct ifmultiaddr *ifma; 2002 int error; 2003 2004 IF_ADDR_WLOCK(scifp); 2005 CK_STAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) { 2006 if (ifma->ifma_addr->sa_family != AF_LINK) 2007 continue; 2008 mc = malloc(sizeof(struct lagg_mc), M_LAGG, M_NOWAIT); 2009 if (mc == NULL) { 2010 IF_ADDR_WUNLOCK(scifp); 2011 return (ENOMEM); 2012 } 2013 bcopy(ifma->ifma_addr, &mc->mc_addr, 2014 ifma->ifma_addr->sa_len); 2015 mc->mc_addr.sdl_index = ifp->if_index; 2016 mc->mc_ifma = NULL; 2017 SLIST_INSERT_HEAD(&lp->lp_mc_head, mc, mc_entries); 2018 } 2019 IF_ADDR_WUNLOCK(scifp); 2020 SLIST_FOREACH (mc, &lp->lp_mc_head, mc_entries) { 2021 error = if_addmulti(ifp, 2022 (struct sockaddr *)&mc->mc_addr, &mc->mc_ifma); 2023 if (error) 2024 return (error); 2025 } 2026 return (0); 2027 } 2028 2029 static int 2030 lagg_clrmulti(struct lagg_port *lp) 2031 { 2032 struct lagg_mc *mc; 2033 2034 LAGG_XLOCK_ASSERT(lp->lp_softc); 2035 while ((mc = SLIST_FIRST(&lp->lp_mc_head)) != NULL) { 2036 SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries); 2037 if (mc->mc_ifma && lp->lp_detaching == 0) 2038 if_delmulti_ifma(mc->mc_ifma); 2039 free(mc, M_LAGG); 2040 } 2041 return (0); 2042 } 2043 2044 static void 2045 lagg_setcaps(struct lagg_port *lp, int cap, int cap2) 2046 { 2047 struct ifreq ifr; 2048 struct siocsifcapnv_driver_data drv_ioctl_data; 2049 2050 if (lp->lp_ifp->if_capenable == cap && 2051 lp->lp_ifp->if_capenable2 == cap2) 2052 return; 2053 if (lp->lp_ioctl == NULL) 2054 return; 2055 /* XXX */ 2056 if ((lp->lp_ifp->if_capabilities & IFCAP_NV) != 0) { 2057 drv_ioctl_data.reqcap = cap; 2058 drv_ioctl_data.reqcap2 = cap2; 2059 drv_ioctl_data.nvcap = NULL; 2060 (*lp->lp_ioctl)(lp->lp_ifp, SIOCSIFCAPNV, 2061 (caddr_t)&drv_ioctl_data); 2062 } else { 2063 ifr.ifr_reqcap = cap; 2064 (*lp->lp_ioctl)(lp->lp_ifp, SIOCSIFCAP, (caddr_t)&ifr); 2065 } 2066 } 2067 2068 /* Handle a ref counted flag that should be set on the lagg port as well */ 2069 static int 2070 lagg_setflag(struct lagg_port *lp, int flag, int status, 2071 int (*func)(struct ifnet *, int)) 2072 { 2073 struct lagg_softc *sc = lp->lp_softc; 2074 struct ifnet *scifp = sc->sc_ifp; 2075 struct ifnet *ifp = lp->lp_ifp; 2076 int error; 2077 2078 LAGG_XLOCK_ASSERT(sc); 2079 2080 status = status ? (scifp->if_flags & flag) : 0; 2081 /* Now "status" contains the flag value or 0 */ 2082 2083 /* 2084 * See if recorded ports status is different from what 2085 * we want it to be. If it is, flip it. We record ports 2086 * status in lp_ifflags so that we won't clear ports flag 2087 * we haven't set. In fact, we don't clear or set ports 2088 * flags directly, but get or release references to them. 2089 * That's why we can be sure that recorded flags still are 2090 * in accord with actual ports flags. 2091 */ 2092 if (status != (lp->lp_ifflags & flag)) { 2093 error = (*func)(ifp, status); 2094 if (error) 2095 return (error); 2096 lp->lp_ifflags &= ~flag; 2097 lp->lp_ifflags |= status; 2098 } 2099 return (0); 2100 } 2101 2102 /* 2103 * Handle IFF_* flags that require certain changes on the lagg port 2104 * if "status" is true, update ports flags respective to the lagg 2105 * if "status" is false, forcedly clear the flags set on port. 2106 */ 2107 static int 2108 lagg_setflags(struct lagg_port *lp, int status) 2109 { 2110 int error, i; 2111 2112 for (i = 0; lagg_pflags[i].flag; i++) { 2113 error = lagg_setflag(lp, lagg_pflags[i].flag, 2114 status, lagg_pflags[i].func); 2115 if (error) 2116 return (error); 2117 } 2118 return (0); 2119 } 2120 2121 static int 2122 lagg_transmit_ethernet(struct ifnet *ifp, struct mbuf *m) 2123 { 2124 struct epoch_tracker et; 2125 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; 2126 int error; 2127 2128 #if defined(KERN_TLS) || defined(RATELIMIT) 2129 if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) 2130 MPASS(m->m_pkthdr.snd_tag->ifp == ifp); 2131 #endif 2132 NET_EPOCH_ENTER(et); 2133 /* We need a Tx algorithm and at least one port */ 2134 if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) { 2135 NET_EPOCH_EXIT(et); 2136 m_freem(m); 2137 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2138 return (ENXIO); 2139 } 2140 2141 ETHER_BPF_MTAP(ifp, m); 2142 2143 error = lagg_proto_start(sc, m); 2144 NET_EPOCH_EXIT(et); 2145 return (error); 2146 } 2147 2148 static int 2149 lagg_transmit_infiniband(struct ifnet *ifp, struct mbuf *m) 2150 { 2151 struct epoch_tracker et; 2152 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; 2153 int error; 2154 2155 #if defined(KERN_TLS) || defined(RATELIMIT) 2156 if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) 2157 MPASS(m->m_pkthdr.snd_tag->ifp == ifp); 2158 #endif 2159 NET_EPOCH_ENTER(et); 2160 /* We need a Tx algorithm and at least one port */ 2161 if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) { 2162 NET_EPOCH_EXIT(et); 2163 m_freem(m); 2164 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2165 return (ENXIO); 2166 } 2167 2168 INFINIBAND_BPF_MTAP(ifp, m); 2169 2170 error = lagg_proto_start(sc, m); 2171 NET_EPOCH_EXIT(et); 2172 return (error); 2173 } 2174 2175 /* 2176 * The ifp->if_qflush entry point for lagg(4) is no-op. 2177 */ 2178 static void 2179 lagg_qflush(struct ifnet *ifp __unused) 2180 { 2181 } 2182 2183 static struct mbuf * 2184 lagg_input_ethernet(struct ifnet *ifp, struct mbuf *m) 2185 { 2186 struct epoch_tracker et; 2187 struct lagg_port *lp = ifp->if_lagg; 2188 struct lagg_softc *sc = lp->lp_softc; 2189 struct ifnet *scifp = sc->sc_ifp; 2190 2191 NET_EPOCH_ENTER(et); 2192 if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 2193 lp->lp_detaching != 0 || 2194 sc->sc_proto == LAGG_PROTO_NONE) { 2195 NET_EPOCH_EXIT(et); 2196 m_freem(m); 2197 return (NULL); 2198 } 2199 2200 ETHER_BPF_MTAP(scifp, m); 2201 2202 m = lagg_proto_input(sc, lp, m); 2203 if (m != NULL && (scifp->if_flags & IFF_MONITOR) != 0) { 2204 m_freem(m); 2205 m = NULL; 2206 } 2207 2208 #ifdef DEV_NETMAP 2209 if (m != NULL && scifp->if_capenable & IFCAP_NETMAP) { 2210 scifp->if_input(scifp, m); 2211 m = NULL; 2212 } 2213 #endif /* DEV_NETMAP */ 2214 2215 NET_EPOCH_EXIT(et); 2216 return (m); 2217 } 2218 2219 static struct mbuf * 2220 lagg_input_infiniband(struct ifnet *ifp, struct mbuf *m) 2221 { 2222 struct epoch_tracker et; 2223 struct lagg_port *lp = ifp->if_lagg; 2224 struct lagg_softc *sc = lp->lp_softc; 2225 struct ifnet *scifp = sc->sc_ifp; 2226 2227 NET_EPOCH_ENTER(et); 2228 if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 2229 lp->lp_detaching != 0 || 2230 sc->sc_proto == LAGG_PROTO_NONE) { 2231 NET_EPOCH_EXIT(et); 2232 m_freem(m); 2233 return (NULL); 2234 } 2235 2236 INFINIBAND_BPF_MTAP(scifp, m); 2237 2238 m = lagg_proto_input(sc, lp, m); 2239 if (m != NULL && (scifp->if_flags & IFF_MONITOR) != 0) { 2240 m_freem(m); 2241 m = NULL; 2242 } 2243 2244 NET_EPOCH_EXIT(et); 2245 return (m); 2246 } 2247 2248 static int 2249 lagg_media_change(struct ifnet *ifp) 2250 { 2251 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; 2252 2253 if (sc->sc_ifflags & IFF_DEBUG) 2254 printf("%s\n", __func__); 2255 2256 /* Ignore */ 2257 return (0); 2258 } 2259 2260 static void 2261 lagg_media_status(struct ifnet *ifp, struct ifmediareq *imr) 2262 { 2263 struct epoch_tracker et; 2264 struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; 2265 struct lagg_port *lp; 2266 2267 imr->ifm_status = IFM_AVALID; 2268 imr->ifm_active = IFM_ETHER | IFM_AUTO; 2269 2270 NET_EPOCH_ENTER(et); 2271 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 2272 if (LAGG_PORTACTIVE(lp)) 2273 imr->ifm_status |= IFM_ACTIVE; 2274 } 2275 NET_EPOCH_EXIT(et); 2276 } 2277 2278 static void 2279 lagg_linkstate(struct lagg_softc *sc) 2280 { 2281 struct epoch_tracker et; 2282 struct lagg_port *lp; 2283 int new_link = LINK_STATE_DOWN; 2284 uint64_t speed; 2285 2286 LAGG_XLOCK_ASSERT(sc); 2287 2288 /* LACP handles link state itself */ 2289 if (sc->sc_proto == LAGG_PROTO_LACP) 2290 return; 2291 2292 /* Our link is considered up if at least one of our ports is active */ 2293 NET_EPOCH_ENTER(et); 2294 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 2295 if (lp->lp_ifp->if_link_state == LINK_STATE_UP) { 2296 new_link = LINK_STATE_UP; 2297 break; 2298 } 2299 } 2300 NET_EPOCH_EXIT(et); 2301 if_link_state_change(sc->sc_ifp, new_link); 2302 2303 /* Update if_baudrate to reflect the max possible speed */ 2304 switch (sc->sc_proto) { 2305 case LAGG_PROTO_FAILOVER: 2306 sc->sc_ifp->if_baudrate = sc->sc_primary != NULL ? 2307 sc->sc_primary->lp_ifp->if_baudrate : 0; 2308 break; 2309 case LAGG_PROTO_ROUNDROBIN: 2310 case LAGG_PROTO_LOADBALANCE: 2311 case LAGG_PROTO_BROADCAST: 2312 speed = 0; 2313 NET_EPOCH_ENTER(et); 2314 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 2315 speed += lp->lp_ifp->if_baudrate; 2316 NET_EPOCH_EXIT(et); 2317 sc->sc_ifp->if_baudrate = speed; 2318 break; 2319 case LAGG_PROTO_LACP: 2320 /* LACP updates if_baudrate itself */ 2321 break; 2322 } 2323 } 2324 2325 static void 2326 lagg_port_state(struct ifnet *ifp, int state) 2327 { 2328 struct lagg_port *lp = (struct lagg_port *)ifp->if_lagg; 2329 struct lagg_softc *sc = NULL; 2330 2331 if (lp != NULL) 2332 sc = lp->lp_softc; 2333 if (sc == NULL) 2334 return; 2335 2336 LAGG_XLOCK(sc); 2337 lagg_linkstate(sc); 2338 lagg_proto_linkstate(sc, lp); 2339 LAGG_XUNLOCK(sc); 2340 } 2341 2342 struct lagg_port * 2343 lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp) 2344 { 2345 struct lagg_port *lp_next, *rval = NULL; 2346 2347 /* 2348 * Search a port which reports an active link state. 2349 */ 2350 2351 #ifdef INVARIANTS 2352 /* 2353 * This is called with either in the network epoch 2354 * or with LAGG_XLOCK(sc) held. 2355 */ 2356 if (!in_epoch(net_epoch_preempt)) 2357 LAGG_XLOCK_ASSERT(sc); 2358 #endif 2359 2360 if (lp == NULL) 2361 goto search; 2362 if (LAGG_PORTACTIVE(lp)) { 2363 rval = lp; 2364 goto found; 2365 } 2366 if ((lp_next = CK_SLIST_NEXT(lp, lp_entries)) != NULL && 2367 LAGG_PORTACTIVE(lp_next)) { 2368 rval = lp_next; 2369 goto found; 2370 } 2371 2372 search: 2373 CK_SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) { 2374 if (LAGG_PORTACTIVE(lp_next)) { 2375 return (lp_next); 2376 } 2377 } 2378 found: 2379 return (rval); 2380 } 2381 2382 int 2383 lagg_enqueue(struct ifnet *ifp, struct mbuf *m) 2384 { 2385 2386 #if defined(KERN_TLS) || defined(RATELIMIT) 2387 if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) { 2388 struct lagg_snd_tag *lst; 2389 struct m_snd_tag *mst; 2390 2391 mst = m->m_pkthdr.snd_tag; 2392 lst = mst_to_lst(mst); 2393 if (lst->tag->ifp != ifp) { 2394 m_freem(m); 2395 return (EAGAIN); 2396 } 2397 m->m_pkthdr.snd_tag = m_snd_tag_ref(lst->tag); 2398 m_snd_tag_rele(mst); 2399 } 2400 #endif 2401 return (ifp->if_transmit)(ifp, m); 2402 } 2403 2404 /* 2405 * Simple round robin aggregation 2406 */ 2407 static void 2408 lagg_rr_attach(struct lagg_softc *sc) 2409 { 2410 sc->sc_seq = 0; 2411 sc->sc_stride = 1; 2412 } 2413 2414 static int 2415 lagg_rr_start(struct lagg_softc *sc, struct mbuf *m) 2416 { 2417 struct lagg_port *lp; 2418 uint32_t p; 2419 2420 p = atomic_fetchadd_32(&sc->sc_seq, 1); 2421 p /= sc->sc_stride; 2422 p %= sc->sc_count; 2423 lp = CK_SLIST_FIRST(&sc->sc_ports); 2424 2425 while (p--) 2426 lp = CK_SLIST_NEXT(lp, lp_entries); 2427 2428 /* 2429 * Check the port's link state. This will return the next active 2430 * port if the link is down or the port is NULL. 2431 */ 2432 if ((lp = lagg_link_active(sc, lp)) == NULL) { 2433 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); 2434 m_freem(m); 2435 return (ENETDOWN); 2436 } 2437 2438 /* Send mbuf */ 2439 return (lagg_enqueue(lp->lp_ifp, m)); 2440 } 2441 2442 static struct mbuf * 2443 lagg_rr_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) 2444 { 2445 struct ifnet *ifp = sc->sc_ifp; 2446 2447 /* Just pass in the packet to our lagg device */ 2448 m->m_pkthdr.rcvif = ifp; 2449 2450 return (m); 2451 } 2452 2453 /* 2454 * Broadcast mode 2455 */ 2456 static int 2457 lagg_bcast_start(struct lagg_softc *sc, struct mbuf *m) 2458 { 2459 int errors = 0; 2460 int ret; 2461 struct lagg_port *lp, *last = NULL; 2462 struct mbuf *m0; 2463 2464 NET_EPOCH_ASSERT(); 2465 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { 2466 if (!LAGG_PORTACTIVE(lp)) 2467 continue; 2468 2469 if (last != NULL) { 2470 m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT); 2471 if (m0 == NULL) { 2472 ret = ENOBUFS; 2473 errors++; 2474 break; 2475 } 2476 lagg_enqueue(last->lp_ifp, m0); 2477 } 2478 last = lp; 2479 } 2480 2481 if (last == NULL) { 2482 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); 2483 m_freem(m); 2484 return (ENOENT); 2485 } 2486 if ((last = lagg_link_active(sc, last)) == NULL) { 2487 errors++; 2488 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, errors); 2489 m_freem(m); 2490 return (ENETDOWN); 2491 } 2492 2493 ret = lagg_enqueue(last->lp_ifp, m); 2494 if (errors != 0) 2495 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, errors); 2496 2497 return (ret); 2498 } 2499 2500 static struct mbuf* 2501 lagg_bcast_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) 2502 { 2503 struct ifnet *ifp = sc->sc_ifp; 2504 2505 /* Just pass in the packet to our lagg device */ 2506 m->m_pkthdr.rcvif = ifp; 2507 return (m); 2508 } 2509 2510 /* 2511 * Active failover 2512 */ 2513 static int 2514 lagg_fail_start(struct lagg_softc *sc, struct mbuf *m) 2515 { 2516 struct lagg_port *lp; 2517 2518 /* Use the master port if active or the next available port */ 2519 if ((lp = lagg_link_active(sc, sc->sc_primary)) == NULL) { 2520 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); 2521 m_freem(m); 2522 return (ENETDOWN); 2523 } 2524 2525 /* Send mbuf */ 2526 return (lagg_enqueue(lp->lp_ifp, m)); 2527 } 2528 2529 static struct mbuf * 2530 lagg_fail_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) 2531 { 2532 struct ifnet *ifp = sc->sc_ifp; 2533 struct lagg_port *tmp_tp; 2534 2535 if (lp == sc->sc_primary || V_lagg_failover_rx_all) { 2536 m->m_pkthdr.rcvif = ifp; 2537 return (m); 2538 } 2539 2540 if (!LAGG_PORTACTIVE(sc->sc_primary)) { 2541 tmp_tp = lagg_link_active(sc, sc->sc_primary); 2542 /* 2543 * If tmp_tp is null, we've received a packet when all 2544 * our links are down. Weird, but process it anyways. 2545 */ 2546 if ((tmp_tp == NULL || tmp_tp == lp)) { 2547 m->m_pkthdr.rcvif = ifp; 2548 return (m); 2549 } 2550 } 2551 2552 m_freem(m); 2553 return (NULL); 2554 } 2555 2556 /* 2557 * Loadbalancing 2558 */ 2559 static void 2560 lagg_lb_attach(struct lagg_softc *sc) 2561 { 2562 struct lagg_port *lp; 2563 struct lagg_lb *lb; 2564 2565 LAGG_XLOCK_ASSERT(sc); 2566 lb = malloc(sizeof(struct lagg_lb), M_LAGG, M_WAITOK | M_ZERO); 2567 lb->lb_key = m_ether_tcpip_hash_init(); 2568 sc->sc_psc = lb; 2569 2570 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 2571 lagg_lb_port_create(lp); 2572 } 2573 2574 static void 2575 lagg_lb_detach(struct lagg_softc *sc) 2576 { 2577 struct lagg_lb *lb; 2578 2579 lb = (struct lagg_lb *)sc->sc_psc; 2580 if (lb != NULL) 2581 free(lb, M_LAGG); 2582 } 2583 2584 static int 2585 lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp) 2586 { 2587 struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc; 2588 struct lagg_port *lp_next; 2589 int i = 0, rv; 2590 2591 rv = 0; 2592 bzero(&lb->lb_ports, sizeof(lb->lb_ports)); 2593 LAGG_XLOCK_ASSERT(sc); 2594 CK_SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) { 2595 if (lp_next == lp) 2596 continue; 2597 if (i >= LAGG_MAX_PORTS) { 2598 rv = EINVAL; 2599 break; 2600 } 2601 if (sc->sc_ifflags & IFF_DEBUG) 2602 printf("%s: port %s at index %d\n", 2603 sc->sc_ifname, lp_next->lp_ifp->if_xname, i); 2604 lb->lb_ports[i++] = lp_next; 2605 } 2606 2607 return (rv); 2608 } 2609 2610 static int 2611 lagg_lb_port_create(struct lagg_port *lp) 2612 { 2613 struct lagg_softc *sc = lp->lp_softc; 2614 return (lagg_lb_porttable(sc, NULL)); 2615 } 2616 2617 static void 2618 lagg_lb_port_destroy(struct lagg_port *lp) 2619 { 2620 struct lagg_softc *sc = lp->lp_softc; 2621 lagg_lb_porttable(sc, lp); 2622 } 2623 2624 static int 2625 lagg_lb_start(struct lagg_softc *sc, struct mbuf *m) 2626 { 2627 struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc; 2628 struct lagg_port *lp = NULL; 2629 uint32_t p = 0; 2630 2631 if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) && 2632 M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 2633 p = m->m_pkthdr.flowid >> sc->flowid_shift; 2634 else 2635 p = m_ether_tcpip_hash(sc->sc_flags, m, lb->lb_key); 2636 p %= sc->sc_count; 2637 lp = lb->lb_ports[p]; 2638 2639 /* 2640 * Check the port's link state. This will return the next active 2641 * port if the link is down or the port is NULL. 2642 */ 2643 if ((lp = lagg_link_active(sc, lp)) == NULL) { 2644 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); 2645 m_freem(m); 2646 return (ENETDOWN); 2647 } 2648 2649 /* Send mbuf */ 2650 return (lagg_enqueue(lp->lp_ifp, m)); 2651 } 2652 2653 static struct mbuf * 2654 lagg_lb_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) 2655 { 2656 struct ifnet *ifp = sc->sc_ifp; 2657 2658 /* Just pass in the packet to our lagg device */ 2659 m->m_pkthdr.rcvif = ifp; 2660 2661 return (m); 2662 } 2663 2664 /* 2665 * 802.3ad LACP 2666 */ 2667 static void 2668 lagg_lacp_attach(struct lagg_softc *sc) 2669 { 2670 struct lagg_port *lp; 2671 2672 lacp_attach(sc); 2673 LAGG_XLOCK_ASSERT(sc); 2674 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 2675 lacp_port_create(lp); 2676 } 2677 2678 static void 2679 lagg_lacp_detach(struct lagg_softc *sc) 2680 { 2681 struct lagg_port *lp; 2682 void *psc; 2683 2684 LAGG_XLOCK_ASSERT(sc); 2685 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 2686 lacp_port_destroy(lp); 2687 2688 psc = sc->sc_psc; 2689 sc->sc_psc = NULL; 2690 lacp_detach(psc); 2691 } 2692 2693 static void 2694 lagg_lacp_lladdr(struct lagg_softc *sc) 2695 { 2696 struct lagg_port *lp; 2697 2698 LAGG_SXLOCK_ASSERT(sc); 2699 2700 /* purge all the lacp ports */ 2701 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 2702 lacp_port_destroy(lp); 2703 2704 /* add them back in */ 2705 CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) 2706 lacp_port_create(lp); 2707 } 2708 2709 static int 2710 lagg_lacp_start(struct lagg_softc *sc, struct mbuf *m) 2711 { 2712 struct lagg_port *lp; 2713 int err; 2714 2715 lp = lacp_select_tx_port(sc, m, &err); 2716 if (lp == NULL) { 2717 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); 2718 m_freem(m); 2719 return (err); 2720 } 2721 2722 /* Send mbuf */ 2723 return (lagg_enqueue(lp->lp_ifp, m)); 2724 } 2725 2726 static struct mbuf * 2727 lagg_lacp_input(struct lagg_softc *sc, struct lagg_port *lp, struct mbuf *m) 2728 { 2729 struct ifnet *ifp = sc->sc_ifp; 2730 struct ether_header *eh; 2731 u_short etype; 2732 2733 eh = mtod(m, struct ether_header *); 2734 etype = ntohs(eh->ether_type); 2735 2736 /* Tap off LACP control messages */ 2737 if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_SLOW) { 2738 m = lacp_input(lp, m); 2739 if (m == NULL) 2740 return (NULL); 2741 } 2742 2743 /* 2744 * If the port is not collecting or not in the active aggregator then 2745 * free and return. 2746 */ 2747 if (lacp_iscollecting(lp) == 0 || lacp_isactive(lp) == 0) { 2748 m_freem(m); 2749 return (NULL); 2750 } 2751 2752 m->m_pkthdr.rcvif = ifp; 2753 return (m); 2754 } 2755