1 /*- 2 * Copyright 1998 Massachusetts Institute of Technology 3 * Copyright 2012 ADARA Networks, Inc. 4 * 5 * Portions of this software were developed by Robert N. M. Watson under 6 * contract to ADARA Networks, Inc. 7 * 8 * Permission to use, copy, modify, and distribute this software and 9 * its documentation for any purpose and without fee is hereby 10 * granted, provided that both the above copyright notice and this 11 * permission notice appear in all copies, that both the above 12 * copyright notice and this permission notice appear in all 13 * supporting documentation, and that the name of M.I.T. not be used 14 * in advertising or publicity pertaining to distribution of the 15 * software without specific, written prior permission. M.I.T. makes 16 * no representations about the suitability of this software for any 17 * purpose. It is provided "as is" without express or implied 18 * warranty. 19 * 20 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 21 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 22 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 23 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 24 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 27 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 28 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * if_vlan.c - pseudo-device driver for IEEE 802.1Q virtual LANs. 36 * This is sort of sneaky in the implementation, since 37 * we need to pretend to be enough of an Ethernet implementation 38 * to make arp work. The way we do this is by telling everyone 39 * that we are an Ethernet, and then catch the packets that 40 * ether_output() sends to us via if_transmit(), rewrite them for 41 * use by the real outgoing interface, and ask it to send them. 42 */ 43 44 #include <sys/cdefs.h> 45 __FBSDID("$FreeBSD$"); 46 47 #include "opt_inet.h" 48 #include "opt_vlan.h" 49 #include "opt_ratelimit.h" 50 51 #include <sys/param.h> 52 #include <sys/eventhandler.h> 53 #include <sys/kernel.h> 54 #include <sys/lock.h> 55 #include <sys/malloc.h> 56 #include <sys/mbuf.h> 57 #include <sys/module.h> 58 #include <sys/rmlock.h> 59 #include <sys/priv.h> 60 #include <sys/queue.h> 61 #include <sys/socket.h> 62 #include <sys/sockio.h> 63 #include <sys/sysctl.h> 64 #include <sys/systm.h> 65 #include <sys/sx.h> 66 67 #include <net/bpf.h> 68 #include <net/ethernet.h> 69 #include <net/if.h> 70 #include <net/if_var.h> 71 #include <net/if_clone.h> 72 #include <net/if_dl.h> 73 #include <net/if_types.h> 74 #include <net/if_vlan_var.h> 75 #include <net/vnet.h> 76 77 #ifdef INET 78 #include <netinet/in.h> 79 #include <netinet/if_ether.h> 80 #endif 81 82 #define VLAN_DEF_HWIDTH 4 83 #define VLAN_IFFLAGS (IFF_BROADCAST | IFF_MULTICAST) 84 85 #define UP_AND_RUNNING(ifp) \ 86 ((ifp)->if_flags & IFF_UP && (ifp)->if_drv_flags & IFF_DRV_RUNNING) 87 88 LIST_HEAD(ifvlanhead, ifvlan); 89 90 struct ifvlantrunk { 91 struct ifnet *parent; /* parent interface of this trunk */ 92 struct rmlock lock; 93 #ifdef VLAN_ARRAY 94 #define VLAN_ARRAY_SIZE (EVL_VLID_MASK + 1) 95 struct ifvlan *vlans[VLAN_ARRAY_SIZE]; /* static table */ 96 #else 97 struct ifvlanhead *hash; /* dynamic hash-list table */ 98 uint16_t hmask; 99 uint16_t hwidth; 100 #endif 101 int refcnt; 102 }; 103 104 struct vlan_mc_entry { 105 struct sockaddr_dl mc_addr; 106 SLIST_ENTRY(vlan_mc_entry) mc_entries; 107 }; 108 109 struct ifvlan { 110 struct ifvlantrunk *ifv_trunk; 111 struct ifnet *ifv_ifp; 112 #define TRUNK(ifv) ((ifv)->ifv_trunk) 113 #define PARENT(ifv) ((ifv)->ifv_trunk->parent) 114 void *ifv_cookie; 115 int ifv_pflags; /* special flags we have set on parent */ 116 int ifv_capenable; 117 struct ifv_linkmib { 118 int ifvm_encaplen; /* encapsulation length */ 119 int ifvm_mtufudge; /* MTU fudged by this much */ 120 int ifvm_mintu; /* min transmission unit */ 121 uint16_t ifvm_proto; /* encapsulation ethertype */ 122 uint16_t ifvm_tag; /* tag to apply on packets leaving if */ 123 uint16_t ifvm_vid; /* VLAN ID */ 124 uint8_t ifvm_pcp; /* Priority Code Point (PCP). */ 125 } ifv_mib; 126 SLIST_HEAD(, vlan_mc_entry) vlan_mc_listhead; 127 #ifndef VLAN_ARRAY 128 LIST_ENTRY(ifvlan) ifv_list; 129 #endif 130 }; 131 #define ifv_proto ifv_mib.ifvm_proto 132 #define ifv_tag ifv_mib.ifvm_tag 133 #define ifv_vid ifv_mib.ifvm_vid 134 #define ifv_pcp ifv_mib.ifvm_pcp 135 #define ifv_encaplen ifv_mib.ifvm_encaplen 136 #define ifv_mtufudge ifv_mib.ifvm_mtufudge 137 #define ifv_mintu ifv_mib.ifvm_mintu 138 139 /* Special flags we should propagate to parent. */ 140 static struct { 141 int flag; 142 int (*func)(struct ifnet *, int); 143 } vlan_pflags[] = { 144 {IFF_PROMISC, ifpromisc}, 145 {IFF_ALLMULTI, if_allmulti}, 146 {0, NULL} 147 }; 148 149 SYSCTL_DECL(_net_link); 150 static SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW, 0, 151 "IEEE 802.1Q VLAN"); 152 static SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW, 0, 153 "for consistency"); 154 155 static VNET_DEFINE(int, soft_pad); 156 #define V_soft_pad VNET(soft_pad) 157 SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW | CTLFLAG_VNET, 158 &VNET_NAME(soft_pad), 0, "pad short frames before tagging"); 159 160 /* 161 * For now, make preserving PCP via an mbuf tag optional, as it increases 162 * per-packet memory allocations and frees. In the future, it would be 163 * preferable to reuse ether_vtag for this, or similar. 164 */ 165 static int vlan_mtag_pcp = 0; 166 SYSCTL_INT(_net_link_vlan, OID_AUTO, mtag_pcp, CTLFLAG_RW, &vlan_mtag_pcp, 0, 167 "Retain VLAN PCP information as packets are passed up the stack"); 168 169 static const char vlanname[] = "vlan"; 170 static MALLOC_DEFINE(M_VLAN, vlanname, "802.1Q Virtual LAN Interface"); 171 172 static eventhandler_tag ifdetach_tag; 173 static eventhandler_tag iflladdr_tag; 174 175 /* 176 * We have a global mutex, that is used to serialize configuration 177 * changes and isn't used in normal packet delivery. 178 * 179 * We also have a per-trunk rmlock(9), that is locked shared on packet 180 * processing and exclusive when configuration is changed. 181 * 182 * The VLAN_ARRAY substitutes the dynamic hash with a static array 183 * with 4096 entries. In theory this can give a boost in processing, 184 * however on practice it does not. Probably this is because array 185 * is too big to fit into CPU cache. 186 */ 187 static struct sx ifv_lock; 188 #define VLAN_LOCK_INIT() sx_init(&ifv_lock, "vlan_global") 189 #define VLAN_LOCK_DESTROY() sx_destroy(&ifv_lock) 190 #define VLAN_LOCK_ASSERT() sx_assert(&ifv_lock, SA_LOCKED) 191 #define VLAN_LOCK() sx_xlock(&ifv_lock) 192 #define VLAN_UNLOCK() sx_xunlock(&ifv_lock) 193 #define TRUNK_LOCK_INIT(trunk) rm_init(&(trunk)->lock, vlanname) 194 #define TRUNK_LOCK_DESTROY(trunk) rm_destroy(&(trunk)->lock) 195 #define TRUNK_LOCK(trunk) rm_wlock(&(trunk)->lock) 196 #define TRUNK_UNLOCK(trunk) rm_wunlock(&(trunk)->lock) 197 #define TRUNK_LOCK_ASSERT(trunk) rm_assert(&(trunk)->lock, RA_WLOCKED) 198 #define TRUNK_RLOCK(trunk) rm_rlock(&(trunk)->lock, &tracker) 199 #define TRUNK_RUNLOCK(trunk) rm_runlock(&(trunk)->lock, &tracker) 200 #define TRUNK_LOCK_RASSERT(trunk) rm_assert(&(trunk)->lock, RA_RLOCKED) 201 #define TRUNK_LOCK_READER struct rm_priotracker tracker 202 203 #ifndef VLAN_ARRAY 204 static void vlan_inithash(struct ifvlantrunk *trunk); 205 static void vlan_freehash(struct ifvlantrunk *trunk); 206 static int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv); 207 static int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv); 208 static void vlan_growhash(struct ifvlantrunk *trunk, int howmuch); 209 static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk, 210 uint16_t vid); 211 #endif 212 static void trunk_destroy(struct ifvlantrunk *trunk); 213 214 static void vlan_init(void *foo); 215 static void vlan_input(struct ifnet *ifp, struct mbuf *m); 216 static int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr); 217 #ifdef RATELIMIT 218 static int vlan_snd_tag_alloc(struct ifnet *, 219 union if_snd_tag_alloc_params *, struct m_snd_tag **); 220 #endif 221 static void vlan_qflush(struct ifnet *ifp); 222 static int vlan_setflag(struct ifnet *ifp, int flag, int status, 223 int (*func)(struct ifnet *, int)); 224 static int vlan_setflags(struct ifnet *ifp, int status); 225 static int vlan_setmulti(struct ifnet *ifp); 226 static int vlan_transmit(struct ifnet *ifp, struct mbuf *m); 227 static void vlan_unconfig(struct ifnet *ifp); 228 static void vlan_unconfig_locked(struct ifnet *ifp, int departing); 229 static int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag); 230 static void vlan_link_state(struct ifnet *ifp); 231 static void vlan_capabilities(struct ifvlan *ifv); 232 static void vlan_trunk_capabilities(struct ifnet *ifp); 233 234 static struct ifnet *vlan_clone_match_ethervid(const char *, int *); 235 static int vlan_clone_match(struct if_clone *, const char *); 236 static int vlan_clone_create(struct if_clone *, char *, size_t, caddr_t); 237 static int vlan_clone_destroy(struct if_clone *, struct ifnet *); 238 239 static void vlan_ifdetach(void *arg, struct ifnet *ifp); 240 static void vlan_iflladdr(void *arg, struct ifnet *ifp); 241 242 static struct if_clone *vlan_cloner; 243 244 #ifdef VIMAGE 245 static VNET_DEFINE(struct if_clone *, vlan_cloner); 246 #define V_vlan_cloner VNET(vlan_cloner) 247 #endif 248 249 #ifndef VLAN_ARRAY 250 #define HASH(n, m) ((((n) >> 8) ^ ((n) >> 4) ^ (n)) & (m)) 251 252 static void 253 vlan_inithash(struct ifvlantrunk *trunk) 254 { 255 int i, n; 256 257 /* 258 * The trunk must not be locked here since we call malloc(M_WAITOK). 259 * It is OK in case this function is called before the trunk struct 260 * gets hooked up and becomes visible from other threads. 261 */ 262 263 KASSERT(trunk->hwidth == 0 && trunk->hash == NULL, 264 ("%s: hash already initialized", __func__)); 265 266 trunk->hwidth = VLAN_DEF_HWIDTH; 267 n = 1 << trunk->hwidth; 268 trunk->hmask = n - 1; 269 trunk->hash = malloc(sizeof(struct ifvlanhead) * n, M_VLAN, M_WAITOK); 270 for (i = 0; i < n; i++) 271 LIST_INIT(&trunk->hash[i]); 272 } 273 274 static void 275 vlan_freehash(struct ifvlantrunk *trunk) 276 { 277 #ifdef INVARIANTS 278 int i; 279 280 KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); 281 for (i = 0; i < (1 << trunk->hwidth); i++) 282 KASSERT(LIST_EMPTY(&trunk->hash[i]), 283 ("%s: hash table not empty", __func__)); 284 #endif 285 free(trunk->hash, M_VLAN); 286 trunk->hash = NULL; 287 trunk->hwidth = trunk->hmask = 0; 288 } 289 290 static int 291 vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv) 292 { 293 int i, b; 294 struct ifvlan *ifv2; 295 296 TRUNK_LOCK_ASSERT(trunk); 297 KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); 298 299 b = 1 << trunk->hwidth; 300 i = HASH(ifv->ifv_vid, trunk->hmask); 301 LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list) 302 if (ifv->ifv_vid == ifv2->ifv_vid) 303 return (EEXIST); 304 305 /* 306 * Grow the hash when the number of vlans exceeds half of the number of 307 * hash buckets squared. This will make the average linked-list length 308 * buckets/2. 309 */ 310 if (trunk->refcnt > (b * b) / 2) { 311 vlan_growhash(trunk, 1); 312 i = HASH(ifv->ifv_vid, trunk->hmask); 313 } 314 LIST_INSERT_HEAD(&trunk->hash[i], ifv, ifv_list); 315 trunk->refcnt++; 316 317 return (0); 318 } 319 320 static int 321 vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv) 322 { 323 int i, b; 324 struct ifvlan *ifv2; 325 326 TRUNK_LOCK_ASSERT(trunk); 327 KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); 328 329 b = 1 << trunk->hwidth; 330 i = HASH(ifv->ifv_vid, trunk->hmask); 331 LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list) 332 if (ifv2 == ifv) { 333 trunk->refcnt--; 334 LIST_REMOVE(ifv2, ifv_list); 335 if (trunk->refcnt < (b * b) / 2) 336 vlan_growhash(trunk, -1); 337 return (0); 338 } 339 340 panic("%s: vlan not found\n", __func__); 341 return (ENOENT); /*NOTREACHED*/ 342 } 343 344 /* 345 * Grow the hash larger or smaller if memory permits. 346 */ 347 static void 348 vlan_growhash(struct ifvlantrunk *trunk, int howmuch) 349 { 350 struct ifvlan *ifv; 351 struct ifvlanhead *hash2; 352 int hwidth2, i, j, n, n2; 353 354 TRUNK_LOCK_ASSERT(trunk); 355 KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); 356 357 if (howmuch == 0) { 358 /* Harmless yet obvious coding error */ 359 printf("%s: howmuch is 0\n", __func__); 360 return; 361 } 362 363 hwidth2 = trunk->hwidth + howmuch; 364 n = 1 << trunk->hwidth; 365 n2 = 1 << hwidth2; 366 /* Do not shrink the table below the default */ 367 if (hwidth2 < VLAN_DEF_HWIDTH) 368 return; 369 370 /* M_NOWAIT because we're called with trunk mutex held */ 371 hash2 = malloc(sizeof(struct ifvlanhead) * n2, M_VLAN, M_NOWAIT); 372 if (hash2 == NULL) { 373 printf("%s: out of memory -- hash size not changed\n", 374 __func__); 375 return; /* We can live with the old hash table */ 376 } 377 for (j = 0; j < n2; j++) 378 LIST_INIT(&hash2[j]); 379 for (i = 0; i < n; i++) 380 while ((ifv = LIST_FIRST(&trunk->hash[i])) != NULL) { 381 LIST_REMOVE(ifv, ifv_list); 382 j = HASH(ifv->ifv_vid, n2 - 1); 383 LIST_INSERT_HEAD(&hash2[j], ifv, ifv_list); 384 } 385 free(trunk->hash, M_VLAN); 386 trunk->hash = hash2; 387 trunk->hwidth = hwidth2; 388 trunk->hmask = n2 - 1; 389 390 if (bootverbose) 391 if_printf(trunk->parent, 392 "VLAN hash table resized from %d to %d buckets\n", n, n2); 393 } 394 395 static __inline struct ifvlan * 396 vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid) 397 { 398 struct ifvlan *ifv; 399 400 TRUNK_LOCK_RASSERT(trunk); 401 402 LIST_FOREACH(ifv, &trunk->hash[HASH(vid, trunk->hmask)], ifv_list) 403 if (ifv->ifv_vid == vid) 404 return (ifv); 405 return (NULL); 406 } 407 408 #if 0 409 /* Debugging code to view the hashtables. */ 410 static void 411 vlan_dumphash(struct ifvlantrunk *trunk) 412 { 413 int i; 414 struct ifvlan *ifv; 415 416 for (i = 0; i < (1 << trunk->hwidth); i++) { 417 printf("%d: ", i); 418 LIST_FOREACH(ifv, &trunk->hash[i], ifv_list) 419 printf("%s ", ifv->ifv_ifp->if_xname); 420 printf("\n"); 421 } 422 } 423 #endif /* 0 */ 424 #else 425 426 static __inline struct ifvlan * 427 vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid) 428 { 429 430 return trunk->vlans[vid]; 431 } 432 433 static __inline int 434 vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv) 435 { 436 437 if (trunk->vlans[ifv->ifv_vid] != NULL) 438 return EEXIST; 439 trunk->vlans[ifv->ifv_vid] = ifv; 440 trunk->refcnt++; 441 442 return (0); 443 } 444 445 static __inline int 446 vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv) 447 { 448 449 trunk->vlans[ifv->ifv_vid] = NULL; 450 trunk->refcnt--; 451 452 return (0); 453 } 454 455 static __inline void 456 vlan_freehash(struct ifvlantrunk *trunk) 457 { 458 } 459 460 static __inline void 461 vlan_inithash(struct ifvlantrunk *trunk) 462 { 463 } 464 465 #endif /* !VLAN_ARRAY */ 466 467 static void 468 trunk_destroy(struct ifvlantrunk *trunk) 469 { 470 VLAN_LOCK_ASSERT(); 471 472 TRUNK_LOCK(trunk); 473 vlan_freehash(trunk); 474 trunk->parent->if_vlantrunk = NULL; 475 TRUNK_UNLOCK(trunk); 476 TRUNK_LOCK_DESTROY(trunk); 477 free(trunk, M_VLAN); 478 } 479 480 /* 481 * Program our multicast filter. What we're actually doing is 482 * programming the multicast filter of the parent. This has the 483 * side effect of causing the parent interface to receive multicast 484 * traffic that it doesn't really want, which ends up being discarded 485 * later by the upper protocol layers. Unfortunately, there's no way 486 * to avoid this: there really is only one physical interface. 487 */ 488 static int 489 vlan_setmulti(struct ifnet *ifp) 490 { 491 struct ifnet *ifp_p; 492 struct ifmultiaddr *ifma; 493 struct ifvlan *sc; 494 struct vlan_mc_entry *mc; 495 int error; 496 497 /* Find the parent. */ 498 sc = ifp->if_softc; 499 TRUNK_LOCK_ASSERT(TRUNK(sc)); 500 ifp_p = PARENT(sc); 501 502 CURVNET_SET_QUIET(ifp_p->if_vnet); 503 504 /* First, remove any existing filter entries. */ 505 while ((mc = SLIST_FIRST(&sc->vlan_mc_listhead)) != NULL) { 506 SLIST_REMOVE_HEAD(&sc->vlan_mc_listhead, mc_entries); 507 (void)if_delmulti(ifp_p, (struct sockaddr *)&mc->mc_addr); 508 free(mc, M_VLAN); 509 } 510 511 /* Now program new ones. */ 512 IF_ADDR_WLOCK(ifp); 513 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 514 if (ifma->ifma_addr->sa_family != AF_LINK) 515 continue; 516 mc = malloc(sizeof(struct vlan_mc_entry), M_VLAN, M_NOWAIT); 517 if (mc == NULL) { 518 IF_ADDR_WUNLOCK(ifp); 519 return (ENOMEM); 520 } 521 bcopy(ifma->ifma_addr, &mc->mc_addr, ifma->ifma_addr->sa_len); 522 mc->mc_addr.sdl_index = ifp_p->if_index; 523 SLIST_INSERT_HEAD(&sc->vlan_mc_listhead, mc, mc_entries); 524 } 525 IF_ADDR_WUNLOCK(ifp); 526 SLIST_FOREACH (mc, &sc->vlan_mc_listhead, mc_entries) { 527 error = if_addmulti(ifp_p, (struct sockaddr *)&mc->mc_addr, 528 NULL); 529 if (error) 530 return (error); 531 } 532 533 CURVNET_RESTORE(); 534 return (0); 535 } 536 537 /* 538 * A handler for parent interface link layer address changes. 539 * If the parent interface link layer address is changed we 540 * should also change it on all children vlans. 541 */ 542 static void 543 vlan_iflladdr(void *arg __unused, struct ifnet *ifp) 544 { 545 struct ifvlan *ifv; 546 #ifndef VLAN_ARRAY 547 struct ifvlan *next; 548 #endif 549 int i; 550 551 /* 552 * Check if it's a trunk interface first of all 553 * to avoid needless locking. 554 */ 555 if (ifp->if_vlantrunk == NULL) 556 return; 557 558 VLAN_LOCK(); 559 /* 560 * OK, it's a trunk. Loop over and change all vlan's lladdrs on it. 561 */ 562 #ifdef VLAN_ARRAY 563 for (i = 0; i < VLAN_ARRAY_SIZE; i++) 564 if ((ifv = ifp->if_vlantrunk->vlans[i])) { 565 #else /* VLAN_ARRAY */ 566 for (i = 0; i < (1 << ifp->if_vlantrunk->hwidth); i++) 567 LIST_FOREACH_SAFE(ifv, &ifp->if_vlantrunk->hash[i], ifv_list, next) { 568 #endif /* VLAN_ARRAY */ 569 VLAN_UNLOCK(); 570 if_setlladdr(ifv->ifv_ifp, IF_LLADDR(ifp), 571 ifp->if_addrlen); 572 VLAN_LOCK(); 573 } 574 VLAN_UNLOCK(); 575 576 } 577 578 /* 579 * A handler for network interface departure events. 580 * Track departure of trunks here so that we don't access invalid 581 * pointers or whatever if a trunk is ripped from under us, e.g., 582 * by ejecting its hot-plug card. However, if an ifnet is simply 583 * being renamed, then there's no need to tear down the state. 584 */ 585 static void 586 vlan_ifdetach(void *arg __unused, struct ifnet *ifp) 587 { 588 struct ifvlan *ifv; 589 int i; 590 591 /* 592 * Check if it's a trunk interface first of all 593 * to avoid needless locking. 594 */ 595 if (ifp->if_vlantrunk == NULL) 596 return; 597 598 /* If the ifnet is just being renamed, don't do anything. */ 599 if (ifp->if_flags & IFF_RENAMING) 600 return; 601 602 VLAN_LOCK(); 603 /* 604 * OK, it's a trunk. Loop over and detach all vlan's on it. 605 * Check trunk pointer after each vlan_unconfig() as it will 606 * free it and set to NULL after the last vlan was detached. 607 */ 608 #ifdef VLAN_ARRAY 609 for (i = 0; i < VLAN_ARRAY_SIZE; i++) 610 if ((ifv = ifp->if_vlantrunk->vlans[i])) { 611 vlan_unconfig_locked(ifv->ifv_ifp, 1); 612 if (ifp->if_vlantrunk == NULL) 613 break; 614 } 615 #else /* VLAN_ARRAY */ 616 restart: 617 for (i = 0; i < (1 << ifp->if_vlantrunk->hwidth); i++) 618 if ((ifv = LIST_FIRST(&ifp->if_vlantrunk->hash[i]))) { 619 vlan_unconfig_locked(ifv->ifv_ifp, 1); 620 if (ifp->if_vlantrunk) 621 goto restart; /* trunk->hwidth can change */ 622 else 623 break; 624 } 625 #endif /* VLAN_ARRAY */ 626 /* Trunk should have been destroyed in vlan_unconfig(). */ 627 KASSERT(ifp->if_vlantrunk == NULL, ("%s: purge failed", __func__)); 628 VLAN_UNLOCK(); 629 } 630 631 /* 632 * Return the trunk device for a virtual interface. 633 */ 634 static struct ifnet * 635 vlan_trunkdev(struct ifnet *ifp) 636 { 637 struct ifvlan *ifv; 638 639 if (ifp->if_type != IFT_L2VLAN) 640 return (NULL); 641 ifv = ifp->if_softc; 642 ifp = NULL; 643 VLAN_LOCK(); 644 if (ifv->ifv_trunk) 645 ifp = PARENT(ifv); 646 VLAN_UNLOCK(); 647 return (ifp); 648 } 649 650 /* 651 * Return the 12-bit VLAN VID for this interface, for use by external 652 * components such as Infiniband. 653 * 654 * XXXRW: Note that the function name here is historical; it should be named 655 * vlan_vid(). 656 */ 657 static int 658 vlan_tag(struct ifnet *ifp, uint16_t *vidp) 659 { 660 struct ifvlan *ifv; 661 662 if (ifp->if_type != IFT_L2VLAN) 663 return (EINVAL); 664 ifv = ifp->if_softc; 665 *vidp = ifv->ifv_vid; 666 return (0); 667 } 668 669 /* 670 * Return a driver specific cookie for this interface. Synchronization 671 * with setcookie must be provided by the driver. 672 */ 673 static void * 674 vlan_cookie(struct ifnet *ifp) 675 { 676 struct ifvlan *ifv; 677 678 if (ifp->if_type != IFT_L2VLAN) 679 return (NULL); 680 ifv = ifp->if_softc; 681 return (ifv->ifv_cookie); 682 } 683 684 /* 685 * Store a cookie in our softc that drivers can use to store driver 686 * private per-instance data in. 687 */ 688 static int 689 vlan_setcookie(struct ifnet *ifp, void *cookie) 690 { 691 struct ifvlan *ifv; 692 693 if (ifp->if_type != IFT_L2VLAN) 694 return (EINVAL); 695 ifv = ifp->if_softc; 696 ifv->ifv_cookie = cookie; 697 return (0); 698 } 699 700 /* 701 * Return the vlan device present at the specific VID. 702 */ 703 static struct ifnet * 704 vlan_devat(struct ifnet *ifp, uint16_t vid) 705 { 706 struct ifvlantrunk *trunk; 707 struct ifvlan *ifv; 708 TRUNK_LOCK_READER; 709 710 trunk = ifp->if_vlantrunk; 711 if (trunk == NULL) 712 return (NULL); 713 ifp = NULL; 714 TRUNK_RLOCK(trunk); 715 ifv = vlan_gethash(trunk, vid); 716 if (ifv) 717 ifp = ifv->ifv_ifp; 718 TRUNK_RUNLOCK(trunk); 719 return (ifp); 720 } 721 722 /* 723 * Recalculate the cached VLAN tag exposed via the MIB. 724 */ 725 static void 726 vlan_tag_recalculate(struct ifvlan *ifv) 727 { 728 729 ifv->ifv_tag = EVL_MAKETAG(ifv->ifv_vid, ifv->ifv_pcp, 0); 730 } 731 732 /* 733 * VLAN support can be loaded as a module. The only place in the 734 * system that's intimately aware of this is ether_input. We hook 735 * into this code through vlan_input_p which is defined there and 736 * set here. No one else in the system should be aware of this so 737 * we use an explicit reference here. 738 */ 739 extern void (*vlan_input_p)(struct ifnet *, struct mbuf *); 740 741 /* For if_link_state_change() eyes only... */ 742 extern void (*vlan_link_state_p)(struct ifnet *); 743 744 static int 745 vlan_modevent(module_t mod, int type, void *data) 746 { 747 748 switch (type) { 749 case MOD_LOAD: 750 ifdetach_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, 751 vlan_ifdetach, NULL, EVENTHANDLER_PRI_ANY); 752 if (ifdetach_tag == NULL) 753 return (ENOMEM); 754 iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event, 755 vlan_iflladdr, NULL, EVENTHANDLER_PRI_ANY); 756 if (iflladdr_tag == NULL) 757 return (ENOMEM); 758 VLAN_LOCK_INIT(); 759 vlan_input_p = vlan_input; 760 vlan_link_state_p = vlan_link_state; 761 vlan_trunk_cap_p = vlan_trunk_capabilities; 762 vlan_trunkdev_p = vlan_trunkdev; 763 vlan_cookie_p = vlan_cookie; 764 vlan_setcookie_p = vlan_setcookie; 765 vlan_tag_p = vlan_tag; 766 vlan_devat_p = vlan_devat; 767 #ifndef VIMAGE 768 vlan_cloner = if_clone_advanced(vlanname, 0, vlan_clone_match, 769 vlan_clone_create, vlan_clone_destroy); 770 #endif 771 if (bootverbose) 772 printf("vlan: initialized, using " 773 #ifdef VLAN_ARRAY 774 "full-size arrays" 775 #else 776 "hash tables with chaining" 777 #endif 778 779 "\n"); 780 break; 781 case MOD_UNLOAD: 782 #ifndef VIMAGE 783 if_clone_detach(vlan_cloner); 784 #endif 785 EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_tag); 786 EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_tag); 787 vlan_input_p = NULL; 788 vlan_link_state_p = NULL; 789 vlan_trunk_cap_p = NULL; 790 vlan_trunkdev_p = NULL; 791 vlan_tag_p = NULL; 792 vlan_cookie_p = NULL; 793 vlan_setcookie_p = NULL; 794 vlan_devat_p = NULL; 795 VLAN_LOCK_DESTROY(); 796 if (bootverbose) 797 printf("vlan: unloaded\n"); 798 break; 799 default: 800 return (EOPNOTSUPP); 801 } 802 return (0); 803 } 804 805 static moduledata_t vlan_mod = { 806 "if_vlan", 807 vlan_modevent, 808 0 809 }; 810 811 DECLARE_MODULE(if_vlan, vlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 812 MODULE_VERSION(if_vlan, 3); 813 814 #ifdef VIMAGE 815 static void 816 vnet_vlan_init(const void *unused __unused) 817 { 818 819 vlan_cloner = if_clone_advanced(vlanname, 0, vlan_clone_match, 820 vlan_clone_create, vlan_clone_destroy); 821 V_vlan_cloner = vlan_cloner; 822 } 823 VNET_SYSINIT(vnet_vlan_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, 824 vnet_vlan_init, NULL); 825 826 static void 827 vnet_vlan_uninit(const void *unused __unused) 828 { 829 830 if_clone_detach(V_vlan_cloner); 831 } 832 VNET_SYSUNINIT(vnet_vlan_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST, 833 vnet_vlan_uninit, NULL); 834 #endif 835 836 /* 837 * Check for <etherif>.<vlan> style interface names. 838 */ 839 static struct ifnet * 840 vlan_clone_match_ethervid(const char *name, int *vidp) 841 { 842 char ifname[IFNAMSIZ]; 843 char *cp; 844 struct ifnet *ifp; 845 int vid; 846 847 strlcpy(ifname, name, IFNAMSIZ); 848 if ((cp = strchr(ifname, '.')) == NULL) 849 return (NULL); 850 *cp = '\0'; 851 if ((ifp = ifunit(ifname)) == NULL) 852 return (NULL); 853 /* Parse VID. */ 854 if (*++cp == '\0') 855 return (NULL); 856 vid = 0; 857 for(; *cp >= '0' && *cp <= '9'; cp++) 858 vid = (vid * 10) + (*cp - '0'); 859 if (*cp != '\0') 860 return (NULL); 861 if (vidp != NULL) 862 *vidp = vid; 863 864 return (ifp); 865 } 866 867 static int 868 vlan_clone_match(struct if_clone *ifc, const char *name) 869 { 870 const char *cp; 871 872 if (vlan_clone_match_ethervid(name, NULL) != NULL) 873 return (1); 874 875 if (strncmp(vlanname, name, strlen(vlanname)) != 0) 876 return (0); 877 for (cp = name + 4; *cp != '\0'; cp++) { 878 if (*cp < '0' || *cp > '9') 879 return (0); 880 } 881 882 return (1); 883 } 884 885 static int 886 vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) 887 { 888 char *dp; 889 int wildcard; 890 int unit; 891 int error; 892 int vid; 893 int ethertag; 894 struct ifvlan *ifv; 895 struct ifnet *ifp; 896 struct ifnet *p; 897 struct ifaddr *ifa; 898 struct sockaddr_dl *sdl; 899 struct vlanreq vlr; 900 static const u_char eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ 901 902 /* 903 * There are 3 (ugh) ways to specify the cloned device: 904 * o pass a parameter block with the clone request. 905 * o specify parameters in the text of the clone device name 906 * o specify no parameters and get an unattached device that 907 * must be configured separately. 908 * The first technique is preferred; the latter two are 909 * supported for backwards compatibility. 910 * 911 * XXXRW: Note historic use of the word "tag" here. New ioctls may be 912 * called for. 913 */ 914 if (params) { 915 error = copyin(params, &vlr, sizeof(vlr)); 916 if (error) 917 return error; 918 p = ifunit(vlr.vlr_parent); 919 if (p == NULL) 920 return (ENXIO); 921 error = ifc_name2unit(name, &unit); 922 if (error != 0) 923 return (error); 924 925 ethertag = 1; 926 vid = vlr.vlr_tag; 927 wildcard = (unit < 0); 928 } else if ((p = vlan_clone_match_ethervid(name, &vid)) != NULL) { 929 ethertag = 1; 930 unit = -1; 931 wildcard = 0; 932 } else { 933 ethertag = 0; 934 935 error = ifc_name2unit(name, &unit); 936 if (error != 0) 937 return (error); 938 939 wildcard = (unit < 0); 940 } 941 942 error = ifc_alloc_unit(ifc, &unit); 943 if (error != 0) 944 return (error); 945 946 /* In the wildcard case, we need to update the name. */ 947 if (wildcard) { 948 for (dp = name; *dp != '\0'; dp++); 949 if (snprintf(dp, len - (dp-name), "%d", unit) > 950 len - (dp-name) - 1) { 951 panic("%s: interface name too long", __func__); 952 } 953 } 954 955 ifv = malloc(sizeof(struct ifvlan), M_VLAN, M_WAITOK | M_ZERO); 956 ifp = ifv->ifv_ifp = if_alloc(IFT_ETHER); 957 if (ifp == NULL) { 958 ifc_free_unit(ifc, unit); 959 free(ifv, M_VLAN); 960 return (ENOSPC); 961 } 962 SLIST_INIT(&ifv->vlan_mc_listhead); 963 ifp->if_softc = ifv; 964 /* 965 * Set the name manually rather than using if_initname because 966 * we don't conform to the default naming convention for interfaces. 967 */ 968 strlcpy(ifp->if_xname, name, IFNAMSIZ); 969 ifp->if_dname = vlanname; 970 ifp->if_dunit = unit; 971 /* NB: flags are not set here */ 972 ifp->if_linkmib = &ifv->ifv_mib; 973 ifp->if_linkmiblen = sizeof(ifv->ifv_mib); 974 /* NB: mtu is not set here */ 975 976 ifp->if_init = vlan_init; 977 ifp->if_transmit = vlan_transmit; 978 ifp->if_qflush = vlan_qflush; 979 ifp->if_ioctl = vlan_ioctl; 980 #ifdef RATELIMIT 981 ifp->if_snd_tag_alloc = vlan_snd_tag_alloc; 982 #endif 983 ifp->if_flags = VLAN_IFFLAGS; 984 ether_ifattach(ifp, eaddr); 985 /* Now undo some of the damage... */ 986 ifp->if_baudrate = 0; 987 ifp->if_type = IFT_L2VLAN; 988 ifp->if_hdrlen = ETHER_VLAN_ENCAP_LEN; 989 ifa = ifp->if_addr; 990 sdl = (struct sockaddr_dl *)ifa->ifa_addr; 991 sdl->sdl_type = IFT_L2VLAN; 992 993 if (ethertag) { 994 error = vlan_config(ifv, p, vid); 995 if (error != 0) { 996 /* 997 * Since we've partially failed, we need to back 998 * out all the way, otherwise userland could get 999 * confused. Thus, we destroy the interface. 1000 */ 1001 ether_ifdetach(ifp); 1002 vlan_unconfig(ifp); 1003 if_free(ifp); 1004 ifc_free_unit(ifc, unit); 1005 free(ifv, M_VLAN); 1006 1007 return (error); 1008 } 1009 1010 /* Update flags on the parent, if necessary. */ 1011 vlan_setflags(ifp, 1); 1012 } 1013 1014 return (0); 1015 } 1016 1017 static int 1018 vlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) 1019 { 1020 struct ifvlan *ifv = ifp->if_softc; 1021 int unit = ifp->if_dunit; 1022 1023 ether_ifdetach(ifp); /* first, remove it from system-wide lists */ 1024 vlan_unconfig(ifp); /* now it can be unconfigured and freed */ 1025 if_free(ifp); 1026 free(ifv, M_VLAN); 1027 ifc_free_unit(ifc, unit); 1028 1029 return (0); 1030 } 1031 1032 /* 1033 * The ifp->if_init entry point for vlan(4) is a no-op. 1034 */ 1035 static void 1036 vlan_init(void *foo __unused) 1037 { 1038 } 1039 1040 /* 1041 * The if_transmit method for vlan(4) interface. 1042 */ 1043 static int 1044 vlan_transmit(struct ifnet *ifp, struct mbuf *m) 1045 { 1046 struct ifvlan *ifv; 1047 struct ifnet *p; 1048 struct m_tag *mtag; 1049 uint16_t tag; 1050 int error, len, mcast; 1051 1052 ifv = ifp->if_softc; 1053 p = PARENT(ifv); 1054 len = m->m_pkthdr.len; 1055 mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0; 1056 1057 BPF_MTAP(ifp, m); 1058 1059 /* 1060 * Do not run parent's if_transmit() if the parent is not up, 1061 * or parent's driver will cause a system crash. 1062 */ 1063 if (!UP_AND_RUNNING(p)) { 1064 m_freem(m); 1065 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 1066 return (ENETDOWN); 1067 } 1068 1069 /* 1070 * Pad the frame to the minimum size allowed if told to. 1071 * This option is in accord with IEEE Std 802.1Q, 2003 Ed., 1072 * paragraph C.4.4.3.b. It can help to work around buggy 1073 * bridges that violate paragraph C.4.4.3.a from the same 1074 * document, i.e., fail to pad short frames after untagging. 1075 * E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but 1076 * untagging it will produce a 62-byte frame, which is a runt 1077 * and requires padding. There are VLAN-enabled network 1078 * devices that just discard such runts instead or mishandle 1079 * them somehow. 1080 */ 1081 if (V_soft_pad && p->if_type == IFT_ETHER) { 1082 static char pad[8]; /* just zeros */ 1083 int n; 1084 1085 for (n = ETHERMIN + ETHER_HDR_LEN - m->m_pkthdr.len; 1086 n > 0; n -= sizeof(pad)) 1087 if (!m_append(m, min(n, sizeof(pad)), pad)) 1088 break; 1089 1090 if (n > 0) { 1091 if_printf(ifp, "cannot pad short frame\n"); 1092 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 1093 m_freem(m); 1094 return (0); 1095 } 1096 } 1097 1098 /* 1099 * If underlying interface can do VLAN tag insertion itself, 1100 * just pass the packet along. However, we need some way to 1101 * tell the interface where the packet came from so that it 1102 * knows how to find the VLAN tag to use, so we attach a 1103 * packet tag that holds it. 1104 */ 1105 if (vlan_mtag_pcp && (mtag = m_tag_locate(m, MTAG_8021Q, 1106 MTAG_8021Q_PCP_OUT, NULL)) != NULL) 1107 tag = EVL_MAKETAG(ifv->ifv_vid, *(uint8_t *)(mtag + 1), 0); 1108 else 1109 tag = ifv->ifv_tag; 1110 if (p->if_capenable & IFCAP_VLAN_HWTAGGING) { 1111 m->m_pkthdr.ether_vtag = tag; 1112 m->m_flags |= M_VLANTAG; 1113 } else { 1114 m = ether_vlanencap(m, tag); 1115 if (m == NULL) { 1116 if_printf(ifp, "unable to prepend VLAN header\n"); 1117 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 1118 return (0); 1119 } 1120 } 1121 1122 /* 1123 * Send it, precisely as ether_output() would have. 1124 */ 1125 error = (p->if_transmit)(p, m); 1126 if (error == 0) { 1127 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 1128 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 1129 if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast); 1130 } else 1131 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 1132 return (error); 1133 } 1134 1135 /* 1136 * The ifp->if_qflush entry point for vlan(4) is a no-op. 1137 */ 1138 static void 1139 vlan_qflush(struct ifnet *ifp __unused) 1140 { 1141 } 1142 1143 static void 1144 vlan_input(struct ifnet *ifp, struct mbuf *m) 1145 { 1146 struct ifvlantrunk *trunk = ifp->if_vlantrunk; 1147 struct ifvlan *ifv; 1148 TRUNK_LOCK_READER; 1149 struct m_tag *mtag; 1150 uint16_t vid, tag; 1151 1152 KASSERT(trunk != NULL, ("%s: no trunk", __func__)); 1153 1154 if (m->m_flags & M_VLANTAG) { 1155 /* 1156 * Packet is tagged, but m contains a normal 1157 * Ethernet frame; the tag is stored out-of-band. 1158 */ 1159 tag = m->m_pkthdr.ether_vtag; 1160 m->m_flags &= ~M_VLANTAG; 1161 } else { 1162 struct ether_vlan_header *evl; 1163 1164 /* 1165 * Packet is tagged in-band as specified by 802.1q. 1166 */ 1167 switch (ifp->if_type) { 1168 case IFT_ETHER: 1169 if (m->m_len < sizeof(*evl) && 1170 (m = m_pullup(m, sizeof(*evl))) == NULL) { 1171 if_printf(ifp, "cannot pullup VLAN header\n"); 1172 return; 1173 } 1174 evl = mtod(m, struct ether_vlan_header *); 1175 tag = ntohs(evl->evl_tag); 1176 1177 /* 1178 * Remove the 802.1q header by copying the Ethernet 1179 * addresses over it and adjusting the beginning of 1180 * the data in the mbuf. The encapsulated Ethernet 1181 * type field is already in place. 1182 */ 1183 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 1184 ETHER_HDR_LEN - ETHER_TYPE_LEN); 1185 m_adj(m, ETHER_VLAN_ENCAP_LEN); 1186 break; 1187 1188 default: 1189 #ifdef INVARIANTS 1190 panic("%s: %s has unsupported if_type %u", 1191 __func__, ifp->if_xname, ifp->if_type); 1192 #endif 1193 m_freem(m); 1194 if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); 1195 return; 1196 } 1197 } 1198 1199 vid = EVL_VLANOFTAG(tag); 1200 1201 TRUNK_RLOCK(trunk); 1202 ifv = vlan_gethash(trunk, vid); 1203 if (ifv == NULL || !UP_AND_RUNNING(ifv->ifv_ifp)) { 1204 TRUNK_RUNLOCK(trunk); 1205 m_freem(m); 1206 if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); 1207 return; 1208 } 1209 TRUNK_RUNLOCK(trunk); 1210 1211 if (vlan_mtag_pcp) { 1212 /* 1213 * While uncommon, it is possible that we will find a 802.1q 1214 * packet encapsulated inside another packet that also had an 1215 * 802.1q header. For example, ethernet tunneled over IPSEC 1216 * arriving over ethernet. In that case, we replace the 1217 * existing 802.1q PCP m_tag value. 1218 */ 1219 mtag = m_tag_locate(m, MTAG_8021Q, MTAG_8021Q_PCP_IN, NULL); 1220 if (mtag == NULL) { 1221 mtag = m_tag_alloc(MTAG_8021Q, MTAG_8021Q_PCP_IN, 1222 sizeof(uint8_t), M_NOWAIT); 1223 if (mtag == NULL) { 1224 m_freem(m); 1225 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 1226 return; 1227 } 1228 m_tag_prepend(m, mtag); 1229 } 1230 *(uint8_t *)(mtag + 1) = EVL_PRIOFTAG(tag); 1231 } 1232 1233 m->m_pkthdr.rcvif = ifv->ifv_ifp; 1234 if_inc_counter(ifv->ifv_ifp, IFCOUNTER_IPACKETS, 1); 1235 1236 /* Pass it back through the parent's input routine. */ 1237 (*ifp->if_input)(ifv->ifv_ifp, m); 1238 } 1239 1240 static int 1241 vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid) 1242 { 1243 struct ifvlantrunk *trunk; 1244 struct ifnet *ifp; 1245 int error = 0; 1246 1247 /* 1248 * We can handle non-ethernet hardware types as long as 1249 * they handle the tagging and headers themselves. 1250 */ 1251 if (p->if_type != IFT_ETHER && 1252 (p->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) 1253 return (EPROTONOSUPPORT); 1254 if ((p->if_flags & VLAN_IFFLAGS) != VLAN_IFFLAGS) 1255 return (EPROTONOSUPPORT); 1256 /* 1257 * Don't let the caller set up a VLAN VID with 1258 * anything except VLID bits. 1259 * VID numbers 0x0 and 0xFFF are reserved. 1260 */ 1261 if (vid == 0 || vid == 0xFFF || (vid & ~EVL_VLID_MASK)) 1262 return (EINVAL); 1263 if (ifv->ifv_trunk) 1264 return (EBUSY); 1265 1266 if (p->if_vlantrunk == NULL) { 1267 trunk = malloc(sizeof(struct ifvlantrunk), 1268 M_VLAN, M_WAITOK | M_ZERO); 1269 vlan_inithash(trunk); 1270 VLAN_LOCK(); 1271 if (p->if_vlantrunk != NULL) { 1272 /* A race that is very unlikely to be hit. */ 1273 vlan_freehash(trunk); 1274 free(trunk, M_VLAN); 1275 goto exists; 1276 } 1277 TRUNK_LOCK_INIT(trunk); 1278 TRUNK_LOCK(trunk); 1279 p->if_vlantrunk = trunk; 1280 trunk->parent = p; 1281 } else { 1282 VLAN_LOCK(); 1283 exists: 1284 trunk = p->if_vlantrunk; 1285 TRUNK_LOCK(trunk); 1286 } 1287 1288 ifv->ifv_vid = vid; /* must set this before vlan_inshash() */ 1289 ifv->ifv_pcp = 0; /* Default: best effort delivery. */ 1290 vlan_tag_recalculate(ifv); 1291 error = vlan_inshash(trunk, ifv); 1292 if (error) 1293 goto done; 1294 ifv->ifv_proto = ETHERTYPE_VLAN; 1295 ifv->ifv_encaplen = ETHER_VLAN_ENCAP_LEN; 1296 ifv->ifv_mintu = ETHERMIN; 1297 ifv->ifv_pflags = 0; 1298 ifv->ifv_capenable = -1; 1299 1300 /* 1301 * If the parent supports the VLAN_MTU capability, 1302 * i.e. can Tx/Rx larger than ETHER_MAX_LEN frames, 1303 * use it. 1304 */ 1305 if (p->if_capenable & IFCAP_VLAN_MTU) { 1306 /* 1307 * No need to fudge the MTU since the parent can 1308 * handle extended frames. 1309 */ 1310 ifv->ifv_mtufudge = 0; 1311 } else { 1312 /* 1313 * Fudge the MTU by the encapsulation size. This 1314 * makes us incompatible with strictly compliant 1315 * 802.1Q implementations, but allows us to use 1316 * the feature with other NetBSD implementations, 1317 * which might still be useful. 1318 */ 1319 ifv->ifv_mtufudge = ifv->ifv_encaplen; 1320 } 1321 1322 ifv->ifv_trunk = trunk; 1323 ifp = ifv->ifv_ifp; 1324 /* 1325 * Initialize fields from our parent. This duplicates some 1326 * work with ether_ifattach() but allows for non-ethernet 1327 * interfaces to also work. 1328 */ 1329 ifp->if_mtu = p->if_mtu - ifv->ifv_mtufudge; 1330 ifp->if_baudrate = p->if_baudrate; 1331 ifp->if_output = p->if_output; 1332 ifp->if_input = p->if_input; 1333 ifp->if_resolvemulti = p->if_resolvemulti; 1334 ifp->if_addrlen = p->if_addrlen; 1335 ifp->if_broadcastaddr = p->if_broadcastaddr; 1336 1337 /* 1338 * Copy only a selected subset of flags from the parent. 1339 * Other flags are none of our business. 1340 */ 1341 #define VLAN_COPY_FLAGS (IFF_SIMPLEX) 1342 ifp->if_flags &= ~VLAN_COPY_FLAGS; 1343 ifp->if_flags |= p->if_flags & VLAN_COPY_FLAGS; 1344 #undef VLAN_COPY_FLAGS 1345 1346 ifp->if_link_state = p->if_link_state; 1347 1348 vlan_capabilities(ifv); 1349 1350 /* 1351 * Set up our interface address to reflect the underlying 1352 * physical interface's. 1353 */ 1354 bcopy(IF_LLADDR(p), IF_LLADDR(ifp), p->if_addrlen); 1355 ((struct sockaddr_dl *)ifp->if_addr->ifa_addr)->sdl_alen = 1356 p->if_addrlen; 1357 1358 /* 1359 * Configure multicast addresses that may already be 1360 * joined on the vlan device. 1361 */ 1362 (void)vlan_setmulti(ifp); /* XXX: VLAN lock held */ 1363 1364 /* We are ready for operation now. */ 1365 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1366 done: 1367 TRUNK_UNLOCK(trunk); 1368 if (error == 0) 1369 EVENTHANDLER_INVOKE(vlan_config, p, ifv->ifv_vid); 1370 VLAN_UNLOCK(); 1371 1372 return (error); 1373 } 1374 1375 static void 1376 vlan_unconfig(struct ifnet *ifp) 1377 { 1378 1379 VLAN_LOCK(); 1380 vlan_unconfig_locked(ifp, 0); 1381 VLAN_UNLOCK(); 1382 } 1383 1384 static void 1385 vlan_unconfig_locked(struct ifnet *ifp, int departing) 1386 { 1387 struct ifvlantrunk *trunk; 1388 struct vlan_mc_entry *mc; 1389 struct ifvlan *ifv; 1390 struct ifnet *parent; 1391 int error; 1392 1393 VLAN_LOCK_ASSERT(); 1394 1395 ifv = ifp->if_softc; 1396 trunk = ifv->ifv_trunk; 1397 parent = NULL; 1398 1399 if (trunk != NULL) { 1400 1401 TRUNK_LOCK(trunk); 1402 parent = trunk->parent; 1403 1404 /* 1405 * Since the interface is being unconfigured, we need to 1406 * empty the list of multicast groups that we may have joined 1407 * while we were alive from the parent's list. 1408 */ 1409 while ((mc = SLIST_FIRST(&ifv->vlan_mc_listhead)) != NULL) { 1410 /* 1411 * If the parent interface is being detached, 1412 * all its multicast addresses have already 1413 * been removed. Warn about errors if 1414 * if_delmulti() does fail, but don't abort as 1415 * all callers expect vlan destruction to 1416 * succeed. 1417 */ 1418 if (!departing) { 1419 error = if_delmulti(parent, 1420 (struct sockaddr *)&mc->mc_addr); 1421 if (error) 1422 if_printf(ifp, 1423 "Failed to delete multicast address from parent: %d\n", 1424 error); 1425 } 1426 SLIST_REMOVE_HEAD(&ifv->vlan_mc_listhead, mc_entries); 1427 free(mc, M_VLAN); 1428 } 1429 1430 vlan_setflags(ifp, 0); /* clear special flags on parent */ 1431 vlan_remhash(trunk, ifv); 1432 ifv->ifv_trunk = NULL; 1433 1434 /* 1435 * Check if we were the last. 1436 */ 1437 if (trunk->refcnt == 0) { 1438 parent->if_vlantrunk = NULL; 1439 /* 1440 * XXXGL: If some ithread has already entered 1441 * vlan_input() and is now blocked on the trunk 1442 * lock, then it should preempt us right after 1443 * unlock and finish its work. Then we will acquire 1444 * lock again in trunk_destroy(). 1445 */ 1446 TRUNK_UNLOCK(trunk); 1447 trunk_destroy(trunk); 1448 } else 1449 TRUNK_UNLOCK(trunk); 1450 } 1451 1452 /* Disconnect from parent. */ 1453 if (ifv->ifv_pflags) 1454 if_printf(ifp, "%s: ifv_pflags unclean\n", __func__); 1455 ifp->if_mtu = ETHERMTU; 1456 ifp->if_link_state = LINK_STATE_UNKNOWN; 1457 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1458 1459 /* 1460 * Only dispatch an event if vlan was 1461 * attached, otherwise there is nothing 1462 * to cleanup anyway. 1463 */ 1464 if (parent != NULL) 1465 EVENTHANDLER_INVOKE(vlan_unconfig, parent, ifv->ifv_vid); 1466 } 1467 1468 /* Handle a reference counted flag that should be set on the parent as well */ 1469 static int 1470 vlan_setflag(struct ifnet *ifp, int flag, int status, 1471 int (*func)(struct ifnet *, int)) 1472 { 1473 struct ifvlan *ifv; 1474 int error; 1475 1476 /* XXX VLAN_LOCK_ASSERT(); */ 1477 1478 ifv = ifp->if_softc; 1479 status = status ? (ifp->if_flags & flag) : 0; 1480 /* Now "status" contains the flag value or 0 */ 1481 1482 /* 1483 * See if recorded parent's status is different from what 1484 * we want it to be. If it is, flip it. We record parent's 1485 * status in ifv_pflags so that we won't clear parent's flag 1486 * we haven't set. In fact, we don't clear or set parent's 1487 * flags directly, but get or release references to them. 1488 * That's why we can be sure that recorded flags still are 1489 * in accord with actual parent's flags. 1490 */ 1491 if (status != (ifv->ifv_pflags & flag)) { 1492 error = (*func)(PARENT(ifv), status); 1493 if (error) 1494 return (error); 1495 ifv->ifv_pflags &= ~flag; 1496 ifv->ifv_pflags |= status; 1497 } 1498 return (0); 1499 } 1500 1501 /* 1502 * Handle IFF_* flags that require certain changes on the parent: 1503 * if "status" is true, update parent's flags respective to our if_flags; 1504 * if "status" is false, forcedly clear the flags set on parent. 1505 */ 1506 static int 1507 vlan_setflags(struct ifnet *ifp, int status) 1508 { 1509 int error, i; 1510 1511 for (i = 0; vlan_pflags[i].flag; i++) { 1512 error = vlan_setflag(ifp, vlan_pflags[i].flag, 1513 status, vlan_pflags[i].func); 1514 if (error) 1515 return (error); 1516 } 1517 return (0); 1518 } 1519 1520 /* Inform all vlans that their parent has changed link state */ 1521 static void 1522 vlan_link_state(struct ifnet *ifp) 1523 { 1524 struct ifvlantrunk *trunk = ifp->if_vlantrunk; 1525 struct ifvlan *ifv; 1526 int i; 1527 1528 TRUNK_LOCK(trunk); 1529 #ifdef VLAN_ARRAY 1530 for (i = 0; i < VLAN_ARRAY_SIZE; i++) 1531 if (trunk->vlans[i] != NULL) { 1532 ifv = trunk->vlans[i]; 1533 #else 1534 for (i = 0; i < (1 << trunk->hwidth); i++) 1535 LIST_FOREACH(ifv, &trunk->hash[i], ifv_list) { 1536 #endif 1537 ifv->ifv_ifp->if_baudrate = trunk->parent->if_baudrate; 1538 if_link_state_change(ifv->ifv_ifp, 1539 trunk->parent->if_link_state); 1540 } 1541 TRUNK_UNLOCK(trunk); 1542 } 1543 1544 static void 1545 vlan_capabilities(struct ifvlan *ifv) 1546 { 1547 struct ifnet *p = PARENT(ifv); 1548 struct ifnet *ifp = ifv->ifv_ifp; 1549 struct ifnet_hw_tsomax hw_tsomax; 1550 int cap = 0, ena = 0, mena; 1551 u_long hwa = 0; 1552 1553 TRUNK_LOCK_ASSERT(TRUNK(ifv)); 1554 1555 /* Mask parent interface enabled capabilities disabled by user. */ 1556 mena = p->if_capenable & ifv->ifv_capenable; 1557 1558 /* 1559 * If the parent interface can do checksum offloading 1560 * on VLANs, then propagate its hardware-assisted 1561 * checksumming flags. Also assert that checksum 1562 * offloading requires hardware VLAN tagging. 1563 */ 1564 if (p->if_capabilities & IFCAP_VLAN_HWCSUM) 1565 cap |= p->if_capabilities & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6); 1566 if (p->if_capenable & IFCAP_VLAN_HWCSUM && 1567 p->if_capenable & IFCAP_VLAN_HWTAGGING) { 1568 ena |= mena & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6); 1569 if (ena & IFCAP_TXCSUM) 1570 hwa |= p->if_hwassist & (CSUM_IP | CSUM_TCP | 1571 CSUM_UDP | CSUM_SCTP); 1572 if (ena & IFCAP_TXCSUM_IPV6) 1573 hwa |= p->if_hwassist & (CSUM_TCP_IPV6 | 1574 CSUM_UDP_IPV6 | CSUM_SCTP_IPV6); 1575 } 1576 1577 /* 1578 * If the parent interface can do TSO on VLANs then 1579 * propagate the hardware-assisted flag. TSO on VLANs 1580 * does not necessarily require hardware VLAN tagging. 1581 */ 1582 memset(&hw_tsomax, 0, sizeof(hw_tsomax)); 1583 if_hw_tsomax_common(p, &hw_tsomax); 1584 if_hw_tsomax_update(ifp, &hw_tsomax); 1585 if (p->if_capabilities & IFCAP_VLAN_HWTSO) 1586 cap |= p->if_capabilities & IFCAP_TSO; 1587 if (p->if_capenable & IFCAP_VLAN_HWTSO) { 1588 ena |= mena & IFCAP_TSO; 1589 if (ena & IFCAP_TSO) 1590 hwa |= p->if_hwassist & CSUM_TSO; 1591 } 1592 1593 /* 1594 * If the parent interface can do LRO and checksum offloading on 1595 * VLANs, then guess it may do LRO on VLANs. False positive here 1596 * cost nothing, while false negative may lead to some confusions. 1597 */ 1598 if (p->if_capabilities & IFCAP_VLAN_HWCSUM) 1599 cap |= p->if_capabilities & IFCAP_LRO; 1600 if (p->if_capenable & IFCAP_VLAN_HWCSUM) 1601 ena |= p->if_capenable & IFCAP_LRO; 1602 1603 /* 1604 * If the parent interface can offload TCP connections over VLANs then 1605 * propagate its TOE capability to the VLAN interface. 1606 * 1607 * All TOE drivers in the tree today can deal with VLANs. If this 1608 * changes then IFCAP_VLAN_TOE should be promoted to a full capability 1609 * with its own bit. 1610 */ 1611 #define IFCAP_VLAN_TOE IFCAP_TOE 1612 if (p->if_capabilities & IFCAP_VLAN_TOE) 1613 cap |= p->if_capabilities & IFCAP_TOE; 1614 if (p->if_capenable & IFCAP_VLAN_TOE) { 1615 TOEDEV(ifp) = TOEDEV(p); 1616 ena |= mena & IFCAP_TOE; 1617 } 1618 1619 /* 1620 * If the parent interface supports dynamic link state, so does the 1621 * VLAN interface. 1622 */ 1623 cap |= (p->if_capabilities & IFCAP_LINKSTATE); 1624 ena |= (mena & IFCAP_LINKSTATE); 1625 1626 #ifdef RATELIMIT 1627 /* 1628 * If the parent interface supports ratelimiting, so does the 1629 * VLAN interface. 1630 */ 1631 cap |= (p->if_capabilities & IFCAP_TXRTLMT); 1632 ena |= (mena & IFCAP_TXRTLMT); 1633 #endif 1634 1635 ifp->if_capabilities = cap; 1636 ifp->if_capenable = ena; 1637 ifp->if_hwassist = hwa; 1638 } 1639 1640 static void 1641 vlan_trunk_capabilities(struct ifnet *ifp) 1642 { 1643 struct ifvlantrunk *trunk = ifp->if_vlantrunk; 1644 struct ifvlan *ifv; 1645 int i; 1646 1647 TRUNK_LOCK(trunk); 1648 #ifdef VLAN_ARRAY 1649 for (i = 0; i < VLAN_ARRAY_SIZE; i++) 1650 if (trunk->vlans[i] != NULL) { 1651 ifv = trunk->vlans[i]; 1652 #else 1653 for (i = 0; i < (1 << trunk->hwidth); i++) { 1654 LIST_FOREACH(ifv, &trunk->hash[i], ifv_list) 1655 #endif 1656 vlan_capabilities(ifv); 1657 } 1658 TRUNK_UNLOCK(trunk); 1659 } 1660 1661 static int 1662 vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1663 { 1664 struct ifnet *p; 1665 struct ifreq *ifr; 1666 struct ifaddr *ifa; 1667 struct ifvlan *ifv; 1668 struct ifvlantrunk *trunk; 1669 struct vlanreq vlr; 1670 int error = 0; 1671 1672 ifr = (struct ifreq *)data; 1673 ifa = (struct ifaddr *) data; 1674 ifv = ifp->if_softc; 1675 1676 switch (cmd) { 1677 case SIOCSIFADDR: 1678 ifp->if_flags |= IFF_UP; 1679 #ifdef INET 1680 if (ifa->ifa_addr->sa_family == AF_INET) 1681 arp_ifinit(ifp, ifa); 1682 #endif 1683 break; 1684 case SIOCGIFADDR: 1685 { 1686 struct sockaddr *sa; 1687 1688 sa = (struct sockaddr *)&ifr->ifr_data; 1689 bcopy(IF_LLADDR(ifp), sa->sa_data, ifp->if_addrlen); 1690 } 1691 break; 1692 case SIOCGIFMEDIA: 1693 VLAN_LOCK(); 1694 if (TRUNK(ifv) != NULL) { 1695 p = PARENT(ifv); 1696 VLAN_UNLOCK(); 1697 error = (*p->if_ioctl)(p, SIOCGIFMEDIA, data); 1698 /* Limit the result to the parent's current config. */ 1699 if (error == 0) { 1700 struct ifmediareq *ifmr; 1701 1702 ifmr = (struct ifmediareq *)data; 1703 if (ifmr->ifm_count >= 1 && ifmr->ifm_ulist) { 1704 ifmr->ifm_count = 1; 1705 error = copyout(&ifmr->ifm_current, 1706 ifmr->ifm_ulist, 1707 sizeof(int)); 1708 } 1709 } 1710 } else { 1711 VLAN_UNLOCK(); 1712 error = EINVAL; 1713 } 1714 break; 1715 1716 case SIOCSIFMEDIA: 1717 error = EINVAL; 1718 break; 1719 1720 case SIOCSIFMTU: 1721 /* 1722 * Set the interface MTU. 1723 */ 1724 VLAN_LOCK(); 1725 if (TRUNK(ifv) != NULL) { 1726 if (ifr->ifr_mtu > 1727 (PARENT(ifv)->if_mtu - ifv->ifv_mtufudge) || 1728 ifr->ifr_mtu < 1729 (ifv->ifv_mintu - ifv->ifv_mtufudge)) 1730 error = EINVAL; 1731 else 1732 ifp->if_mtu = ifr->ifr_mtu; 1733 } else 1734 error = EINVAL; 1735 VLAN_UNLOCK(); 1736 break; 1737 1738 case SIOCSETVLAN: 1739 #ifdef VIMAGE 1740 /* 1741 * XXXRW/XXXBZ: The goal in these checks is to allow a VLAN 1742 * interface to be delegated to a jail without allowing the 1743 * jail to change what underlying interface/VID it is 1744 * associated with. We are not entirely convinced that this 1745 * is the right way to accomplish that policy goal. 1746 */ 1747 if (ifp->if_vnet != ifp->if_home_vnet) { 1748 error = EPERM; 1749 break; 1750 } 1751 #endif 1752 error = copyin(ifr->ifr_data, &vlr, sizeof(vlr)); 1753 if (error) 1754 break; 1755 if (vlr.vlr_parent[0] == '\0') { 1756 vlan_unconfig(ifp); 1757 break; 1758 } 1759 p = ifunit(vlr.vlr_parent); 1760 if (p == NULL) { 1761 error = ENOENT; 1762 break; 1763 } 1764 error = vlan_config(ifv, p, vlr.vlr_tag); 1765 if (error) 1766 break; 1767 1768 /* Update flags on the parent, if necessary. */ 1769 vlan_setflags(ifp, 1); 1770 break; 1771 1772 case SIOCGETVLAN: 1773 #ifdef VIMAGE 1774 if (ifp->if_vnet != ifp->if_home_vnet) { 1775 error = EPERM; 1776 break; 1777 } 1778 #endif 1779 bzero(&vlr, sizeof(vlr)); 1780 VLAN_LOCK(); 1781 if (TRUNK(ifv) != NULL) { 1782 strlcpy(vlr.vlr_parent, PARENT(ifv)->if_xname, 1783 sizeof(vlr.vlr_parent)); 1784 vlr.vlr_tag = ifv->ifv_vid; 1785 } 1786 VLAN_UNLOCK(); 1787 error = copyout(&vlr, ifr->ifr_data, sizeof(vlr)); 1788 break; 1789 1790 case SIOCSIFFLAGS: 1791 /* 1792 * We should propagate selected flags to the parent, 1793 * e.g., promiscuous mode. 1794 */ 1795 if (TRUNK(ifv) != NULL) 1796 error = vlan_setflags(ifp, 1); 1797 break; 1798 1799 case SIOCADDMULTI: 1800 case SIOCDELMULTI: 1801 /* 1802 * If we don't have a parent, just remember the membership for 1803 * when we do. 1804 */ 1805 trunk = TRUNK(ifv); 1806 if (trunk != NULL) { 1807 TRUNK_LOCK(trunk); 1808 error = vlan_setmulti(ifp); 1809 TRUNK_UNLOCK(trunk); 1810 } 1811 break; 1812 1813 case SIOCGVLANPCP: 1814 #ifdef VIMAGE 1815 if (ifp->if_vnet != ifp->if_home_vnet) { 1816 error = EPERM; 1817 break; 1818 } 1819 #endif 1820 ifr->ifr_vlan_pcp = ifv->ifv_pcp; 1821 break; 1822 1823 case SIOCSVLANPCP: 1824 #ifdef VIMAGE 1825 if (ifp->if_vnet != ifp->if_home_vnet) { 1826 error = EPERM; 1827 break; 1828 } 1829 #endif 1830 error = priv_check(curthread, PRIV_NET_SETVLANPCP); 1831 if (error) 1832 break; 1833 if (ifr->ifr_vlan_pcp > 7) { 1834 error = EINVAL; 1835 break; 1836 } 1837 ifv->ifv_pcp = ifr->ifr_vlan_pcp; 1838 vlan_tag_recalculate(ifv); 1839 break; 1840 1841 case SIOCSIFCAP: 1842 VLAN_LOCK(); 1843 ifv->ifv_capenable = ifr->ifr_reqcap; 1844 trunk = TRUNK(ifv); 1845 if (trunk != NULL) { 1846 TRUNK_LOCK(trunk); 1847 vlan_capabilities(ifv); 1848 TRUNK_UNLOCK(trunk); 1849 } 1850 VLAN_UNLOCK(); 1851 break; 1852 1853 default: 1854 error = EINVAL; 1855 break; 1856 } 1857 1858 return (error); 1859 } 1860 1861 #ifdef RATELIMIT 1862 static int 1863 vlan_snd_tag_alloc(struct ifnet *ifp, 1864 union if_snd_tag_alloc_params *params, 1865 struct m_snd_tag **ppmt) 1866 { 1867 1868 /* get trunk device */ 1869 ifp = vlan_trunkdev(ifp); 1870 if (ifp == NULL || (ifp->if_capenable & IFCAP_TXRTLMT) == 0) 1871 return (EOPNOTSUPP); 1872 /* forward allocation request */ 1873 return (ifp->if_snd_tag_alloc(ifp, params, ppmt)); 1874 } 1875 #endif 1876