1 /*- 2 * Copyright 1998 Massachusetts Institute of Technology 3 * Copyright 2012 ADARA Networks, Inc. 4 * Copyright 2017 Dell EMC Isilon 5 * 6 * Portions of this software were developed by Robert N. M. Watson under 7 * contract to ADARA Networks, Inc. 8 * 9 * Permission to use, copy, modify, and distribute this software and 10 * its documentation for any purpose and without fee is hereby 11 * granted, provided that both the above copyright notice and this 12 * permission notice appear in all copies, that both the above 13 * copyright notice and this permission notice appear in all 14 * supporting documentation, and that the name of M.I.T. not be used 15 * in advertising or publicity pertaining to distribution of the 16 * software without specific, written prior permission. M.I.T. makes 17 * no representations about the suitability of this software for any 18 * purpose. It is provided "as is" without express or implied 19 * warranty. 20 * 21 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 22 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 23 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 24 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 25 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 28 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 29 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 /* 36 * if_vlan.c - pseudo-device driver for IEEE 802.1Q virtual LANs. 37 * This is sort of sneaky in the implementation, since 38 * we need to pretend to be enough of an Ethernet implementation 39 * to make arp work. The way we do this is by telling everyone 40 * that we are an Ethernet, and then catch the packets that 41 * ether_output() sends to us via if_transmit(), rewrite them for 42 * use by the real outgoing interface, and ask it to send them. 43 */ 44 45 #include <sys/cdefs.h> 46 #include "opt_inet.h" 47 #include "opt_inet6.h" 48 #include "opt_kern_tls.h" 49 #include "opt_netlink.h" 50 #include "opt_vlan.h" 51 #include "opt_ratelimit.h" 52 53 #include <sys/param.h> 54 #include <sys/eventhandler.h> 55 #include <sys/kernel.h> 56 #include <sys/lock.h> 57 #include <sys/malloc.h> 58 #include <sys/mbuf.h> 59 #include <sys/module.h> 60 #include <sys/rmlock.h> 61 #include <sys/priv.h> 62 #include <sys/queue.h> 63 #include <sys/socket.h> 64 #include <sys/sockio.h> 65 #include <sys/sysctl.h> 66 #include <sys/systm.h> 67 #include <sys/sx.h> 68 #include <sys/taskqueue.h> 69 70 #include <net/bpf.h> 71 #include <net/ethernet.h> 72 #include <net/if.h> 73 #include <net/if_var.h> 74 #include <net/if_private.h> 75 #include <net/if_clone.h> 76 #include <net/if_dl.h> 77 #include <net/if_types.h> 78 #include <net/if_vlan_var.h> 79 #include <net/route.h> 80 #include <net/vnet.h> 81 82 #ifdef INET 83 #include <netinet/in.h> 84 #include <netinet/if_ether.h> 85 #endif 86 87 #include <netlink/netlink.h> 88 #include <netlink/netlink_ctl.h> 89 #include <netlink/netlink_route.h> 90 #include <netlink/route/route_var.h> 91 92 #define VLAN_DEF_HWIDTH 4 93 #define VLAN_IFFLAGS (IFF_BROADCAST | IFF_MULTICAST) 94 95 #define UP_AND_RUNNING(ifp) \ 96 ((ifp)->if_flags & IFF_UP && (ifp)->if_drv_flags & IFF_DRV_RUNNING) 97 98 CK_SLIST_HEAD(ifvlanhead, ifvlan); 99 100 struct ifvlantrunk { 101 struct ifnet *parent; /* parent interface of this trunk */ 102 struct mtx lock; 103 #ifdef VLAN_ARRAY 104 #define VLAN_ARRAY_SIZE (EVL_VLID_MASK + 1) 105 struct ifvlan *vlans[VLAN_ARRAY_SIZE]; /* static table */ 106 #else 107 struct ifvlanhead *hash; /* dynamic hash-list table */ 108 uint16_t hmask; 109 uint16_t hwidth; 110 #endif 111 int refcnt; 112 }; 113 114 #if defined(KERN_TLS) || defined(RATELIMIT) 115 struct vlan_snd_tag { 116 struct m_snd_tag com; 117 struct m_snd_tag *tag; 118 }; 119 120 static inline struct vlan_snd_tag * 121 mst_to_vst(struct m_snd_tag *mst) 122 { 123 124 return (__containerof(mst, struct vlan_snd_tag, com)); 125 } 126 #endif 127 128 /* 129 * This macro provides a facility to iterate over every vlan on a trunk with 130 * the assumption that none will be added/removed during iteration. 131 */ 132 #ifdef VLAN_ARRAY 133 #define VLAN_FOREACH(_ifv, _trunk) \ 134 size_t _i; \ 135 for (_i = 0; _i < VLAN_ARRAY_SIZE; _i++) \ 136 if (((_ifv) = (_trunk)->vlans[_i]) != NULL) 137 #else /* VLAN_ARRAY */ 138 #define VLAN_FOREACH(_ifv, _trunk) \ 139 struct ifvlan *_next; \ 140 size_t _i; \ 141 for (_i = 0; _i < (1 << (_trunk)->hwidth); _i++) \ 142 CK_SLIST_FOREACH_SAFE((_ifv), &(_trunk)->hash[_i], ifv_list, _next) 143 #endif /* VLAN_ARRAY */ 144 145 /* 146 * This macro provides a facility to iterate over every vlan on a trunk while 147 * also modifying the number of vlans on the trunk. The iteration continues 148 * until some condition is met or there are no more vlans on the trunk. 149 */ 150 #ifdef VLAN_ARRAY 151 /* The VLAN_ARRAY case is simple -- just a for loop using the condition. */ 152 #define VLAN_FOREACH_UNTIL_SAFE(_ifv, _trunk, _cond) \ 153 size_t _i; \ 154 for (_i = 0; !(_cond) && _i < VLAN_ARRAY_SIZE; _i++) \ 155 if (((_ifv) = (_trunk)->vlans[_i])) 156 #else /* VLAN_ARRAY */ 157 /* 158 * The hash table case is more complicated. We allow for the hash table to be 159 * modified (i.e. vlans removed) while we are iterating over it. To allow for 160 * this we must restart the iteration every time we "touch" something during 161 * the iteration, since removal will resize the hash table and invalidate our 162 * current position. If acting on the touched element causes the trunk to be 163 * emptied, then iteration also stops. 164 */ 165 #define VLAN_FOREACH_UNTIL_SAFE(_ifv, _trunk, _cond) \ 166 size_t _i; \ 167 bool _touch = false; \ 168 for (_i = 0; \ 169 !(_cond) && _i < (1 << (_trunk)->hwidth); \ 170 _i = (_touch && ((_trunk) != NULL) ? 0 : _i + 1), _touch = false) \ 171 if (((_ifv) = CK_SLIST_FIRST(&(_trunk)->hash[_i])) != NULL && \ 172 (_touch = true)) 173 #endif /* VLAN_ARRAY */ 174 175 struct vlan_mc_entry { 176 struct sockaddr_dl mc_addr; 177 CK_SLIST_ENTRY(vlan_mc_entry) mc_entries; 178 struct epoch_context mc_epoch_ctx; 179 }; 180 181 struct ifvlan { 182 struct ifvlantrunk *ifv_trunk; 183 struct ifnet *ifv_ifp; 184 #define TRUNK(ifv) ((ifv)->ifv_trunk) 185 #define PARENT(ifv) (TRUNK(ifv)->parent) 186 void *ifv_cookie; 187 int ifv_pflags; /* special flags we have set on parent */ 188 int ifv_capenable; 189 int ifv_encaplen; /* encapsulation length */ 190 int ifv_mtufudge; /* MTU fudged by this much */ 191 int ifv_mintu; /* min transmission unit */ 192 struct ether_8021q_tag ifv_qtag; 193 #define ifv_proto ifv_qtag.proto 194 #define ifv_vid ifv_qtag.vid 195 #define ifv_pcp ifv_qtag.pcp 196 struct task lladdr_task; 197 CK_SLIST_HEAD(, vlan_mc_entry) vlan_mc_listhead; 198 #ifndef VLAN_ARRAY 199 CK_SLIST_ENTRY(ifvlan) ifv_list; 200 #endif 201 }; 202 203 /* Special flags we should propagate to parent. */ 204 static struct { 205 int flag; 206 int (*func)(struct ifnet *, int); 207 } vlan_pflags[] = { 208 {IFF_PROMISC, ifpromisc}, 209 {IFF_ALLMULTI, if_allmulti}, 210 {0, NULL} 211 }; 212 213 VNET_DECLARE(int, vlan_mtag_pcp); 214 #define V_vlan_mtag_pcp VNET(vlan_mtag_pcp) 215 216 static const char vlanname[] = "vlan"; 217 static MALLOC_DEFINE(M_VLAN, vlanname, "802.1Q Virtual LAN Interface"); 218 219 static eventhandler_tag ifdetach_tag; 220 static eventhandler_tag iflladdr_tag; 221 static eventhandler_tag ifevent_tag; 222 223 /* 224 * if_vlan uses two module-level synchronizations primitives to allow concurrent 225 * modification of vlan interfaces and (mostly) allow for vlans to be destroyed 226 * while they are being used for tx/rx. To accomplish this in a way that has 227 * acceptable performance and cooperation with other parts of the network stack 228 * there is a non-sleepable epoch(9) and an sx(9). 229 * 230 * The performance-sensitive paths that warrant using the epoch(9) are 231 * vlan_transmit and vlan_input. Both have to check for the vlan interface's 232 * existence using if_vlantrunk, and being in the network tx/rx paths the use 233 * of an epoch(9) gives a measureable improvement in performance. 234 * 235 * The reason for having an sx(9) is mostly because there are still areas that 236 * must be sleepable and also have safe concurrent access to a vlan interface. 237 * Since the sx(9) exists, it is used by default in most paths unless sleeping 238 * is not permitted, or if it is not clear whether sleeping is permitted. 239 * 240 */ 241 #define _VLAN_SX_ID ifv_sx 242 243 static struct sx _VLAN_SX_ID; 244 245 #define VLAN_LOCKING_INIT() \ 246 sx_init_flags(&_VLAN_SX_ID, "vlan_sx", SX_RECURSE) 247 248 #define VLAN_LOCKING_DESTROY() \ 249 sx_destroy(&_VLAN_SX_ID) 250 251 #define VLAN_SLOCK() sx_slock(&_VLAN_SX_ID) 252 #define VLAN_SUNLOCK() sx_sunlock(&_VLAN_SX_ID) 253 #define VLAN_XLOCK() sx_xlock(&_VLAN_SX_ID) 254 #define VLAN_XUNLOCK() sx_xunlock(&_VLAN_SX_ID) 255 #define VLAN_SLOCK_ASSERT() sx_assert(&_VLAN_SX_ID, SA_SLOCKED) 256 #define VLAN_XLOCK_ASSERT() sx_assert(&_VLAN_SX_ID, SA_XLOCKED) 257 #define VLAN_SXLOCK_ASSERT() sx_assert(&_VLAN_SX_ID, SA_LOCKED) 258 259 /* 260 * We also have a per-trunk mutex that should be acquired when changing 261 * its state. 262 */ 263 #define TRUNK_LOCK_INIT(trunk) mtx_init(&(trunk)->lock, vlanname, NULL, MTX_DEF) 264 #define TRUNK_LOCK_DESTROY(trunk) mtx_destroy(&(trunk)->lock) 265 #define TRUNK_WLOCK(trunk) mtx_lock(&(trunk)->lock) 266 #define TRUNK_WUNLOCK(trunk) mtx_unlock(&(trunk)->lock) 267 #define TRUNK_WLOCK_ASSERT(trunk) mtx_assert(&(trunk)->lock, MA_OWNED); 268 269 /* 270 * The VLAN_ARRAY substitutes the dynamic hash with a static array 271 * with 4096 entries. In theory this can give a boost in processing, 272 * however in practice it does not. Probably this is because the array 273 * is too big to fit into CPU cache. 274 */ 275 #ifndef VLAN_ARRAY 276 static void vlan_inithash(struct ifvlantrunk *trunk); 277 static void vlan_freehash(struct ifvlantrunk *trunk); 278 static int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv); 279 static int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv); 280 static void vlan_growhash(struct ifvlantrunk *trunk, int howmuch); 281 static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk, 282 uint16_t vid); 283 #endif 284 static void trunk_destroy(struct ifvlantrunk *trunk); 285 286 static void vlan_init(void *foo); 287 static void vlan_input(struct ifnet *ifp, struct mbuf *m); 288 static int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr); 289 #if defined(KERN_TLS) || defined(RATELIMIT) 290 static int vlan_snd_tag_alloc(struct ifnet *, 291 union if_snd_tag_alloc_params *, struct m_snd_tag **); 292 static int vlan_snd_tag_modify(struct m_snd_tag *, 293 union if_snd_tag_modify_params *); 294 static int vlan_snd_tag_query(struct m_snd_tag *, 295 union if_snd_tag_query_params *); 296 static void vlan_snd_tag_free(struct m_snd_tag *); 297 static struct m_snd_tag *vlan_next_snd_tag(struct m_snd_tag *); 298 static void vlan_ratelimit_query(struct ifnet *, 299 struct if_ratelimit_query_results *); 300 #endif 301 static void vlan_qflush(struct ifnet *ifp); 302 static int vlan_setflag(struct ifnet *ifp, int flag, int status, 303 int (*func)(struct ifnet *, int)); 304 static int vlan_setflags(struct ifnet *ifp, int status); 305 static int vlan_setmulti(struct ifnet *ifp); 306 static int vlan_transmit(struct ifnet *ifp, struct mbuf *m); 307 #ifdef ALTQ 308 static void vlan_altq_start(struct ifnet *ifp); 309 static int vlan_altq_transmit(struct ifnet *ifp, struct mbuf *m); 310 #endif 311 static int vlan_output(struct ifnet *ifp, struct mbuf *m, 312 const struct sockaddr *dst, struct route *ro); 313 static void vlan_unconfig(struct ifnet *ifp); 314 static void vlan_unconfig_locked(struct ifnet *ifp, int departing); 315 static int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag, 316 uint16_t proto); 317 static void vlan_link_state(struct ifnet *ifp); 318 static void vlan_capabilities(struct ifvlan *ifv); 319 static void vlan_trunk_capabilities(struct ifnet *ifp); 320 321 static struct ifnet *vlan_clone_match_ethervid(const char *, int *); 322 static int vlan_clone_match(struct if_clone *, const char *); 323 static int vlan_clone_create(struct if_clone *, char *, size_t, 324 struct ifc_data *, struct ifnet **); 325 static int vlan_clone_destroy(struct if_clone *, struct ifnet *, uint32_t); 326 327 static int vlan_clone_create_nl(struct if_clone *ifc, char *name, size_t len, 328 struct ifc_data_nl *ifd); 329 static int vlan_clone_modify_nl(struct ifnet *ifp, struct ifc_data_nl *ifd); 330 static void vlan_clone_dump_nl(struct ifnet *ifp, struct nl_writer *nw); 331 332 static void vlan_ifdetach(void *arg, struct ifnet *ifp); 333 static void vlan_iflladdr(void *arg, struct ifnet *ifp); 334 static void vlan_ifevent(void *arg, struct ifnet *ifp, int event); 335 336 static void vlan_lladdr_fn(void *arg, int pending); 337 338 static struct if_clone *vlan_cloner; 339 340 #ifdef VIMAGE 341 VNET_DEFINE_STATIC(struct if_clone *, vlan_cloner); 342 #define V_vlan_cloner VNET(vlan_cloner) 343 #endif 344 345 #ifdef RATELIMIT 346 static const struct if_snd_tag_sw vlan_snd_tag_ul_sw = { 347 .snd_tag_modify = vlan_snd_tag_modify, 348 .snd_tag_query = vlan_snd_tag_query, 349 .snd_tag_free = vlan_snd_tag_free, 350 .next_snd_tag = vlan_next_snd_tag, 351 .type = IF_SND_TAG_TYPE_UNLIMITED 352 }; 353 354 static const struct if_snd_tag_sw vlan_snd_tag_rl_sw = { 355 .snd_tag_modify = vlan_snd_tag_modify, 356 .snd_tag_query = vlan_snd_tag_query, 357 .snd_tag_free = vlan_snd_tag_free, 358 .next_snd_tag = vlan_next_snd_tag, 359 .type = IF_SND_TAG_TYPE_RATE_LIMIT 360 }; 361 #endif 362 363 #ifdef KERN_TLS 364 static const struct if_snd_tag_sw vlan_snd_tag_tls_sw = { 365 .snd_tag_modify = vlan_snd_tag_modify, 366 .snd_tag_query = vlan_snd_tag_query, 367 .snd_tag_free = vlan_snd_tag_free, 368 .next_snd_tag = vlan_next_snd_tag, 369 .type = IF_SND_TAG_TYPE_TLS 370 }; 371 372 #ifdef RATELIMIT 373 static const struct if_snd_tag_sw vlan_snd_tag_tls_rl_sw = { 374 .snd_tag_modify = vlan_snd_tag_modify, 375 .snd_tag_query = vlan_snd_tag_query, 376 .snd_tag_free = vlan_snd_tag_free, 377 .next_snd_tag = vlan_next_snd_tag, 378 .type = IF_SND_TAG_TYPE_TLS_RATE_LIMIT 379 }; 380 #endif 381 #endif 382 383 static void 384 vlan_mc_free(struct epoch_context *ctx) 385 { 386 struct vlan_mc_entry *mc = __containerof(ctx, struct vlan_mc_entry, mc_epoch_ctx); 387 free(mc, M_VLAN); 388 } 389 390 #ifndef VLAN_ARRAY 391 #define HASH(n, m) ((((n) >> 8) ^ ((n) >> 4) ^ (n)) & (m)) 392 393 static void 394 vlan_inithash(struct ifvlantrunk *trunk) 395 { 396 int i, n; 397 398 /* 399 * The trunk must not be locked here since we call malloc(M_WAITOK). 400 * It is OK in case this function is called before the trunk struct 401 * gets hooked up and becomes visible from other threads. 402 */ 403 404 KASSERT(trunk->hwidth == 0 && trunk->hash == NULL, 405 ("%s: hash already initialized", __func__)); 406 407 trunk->hwidth = VLAN_DEF_HWIDTH; 408 n = 1 << trunk->hwidth; 409 trunk->hmask = n - 1; 410 trunk->hash = malloc(sizeof(struct ifvlanhead) * n, M_VLAN, M_WAITOK); 411 for (i = 0; i < n; i++) 412 CK_SLIST_INIT(&trunk->hash[i]); 413 } 414 415 static void 416 vlan_freehash(struct ifvlantrunk *trunk) 417 { 418 #ifdef INVARIANTS 419 int i; 420 421 KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); 422 for (i = 0; i < (1 << trunk->hwidth); i++) 423 KASSERT(CK_SLIST_EMPTY(&trunk->hash[i]), 424 ("%s: hash table not empty", __func__)); 425 #endif 426 free(trunk->hash, M_VLAN); 427 trunk->hash = NULL; 428 trunk->hwidth = trunk->hmask = 0; 429 } 430 431 static int 432 vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv) 433 { 434 int i, b; 435 struct ifvlan *ifv2; 436 437 VLAN_XLOCK_ASSERT(); 438 KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); 439 440 b = 1 << trunk->hwidth; 441 i = HASH(ifv->ifv_vid, trunk->hmask); 442 CK_SLIST_FOREACH(ifv2, &trunk->hash[i], ifv_list) 443 if (ifv->ifv_vid == ifv2->ifv_vid) 444 return (EEXIST); 445 446 /* 447 * Grow the hash when the number of vlans exceeds half of the number of 448 * hash buckets squared. This will make the average linked-list length 449 * buckets/2. 450 */ 451 if (trunk->refcnt > (b * b) / 2) { 452 vlan_growhash(trunk, 1); 453 i = HASH(ifv->ifv_vid, trunk->hmask); 454 } 455 CK_SLIST_INSERT_HEAD(&trunk->hash[i], ifv, ifv_list); 456 trunk->refcnt++; 457 458 return (0); 459 } 460 461 static int 462 vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv) 463 { 464 int i, b; 465 struct ifvlan *ifv2; 466 467 VLAN_XLOCK_ASSERT(); 468 KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); 469 470 b = 1 << (trunk->hwidth - 1); 471 i = HASH(ifv->ifv_vid, trunk->hmask); 472 CK_SLIST_FOREACH(ifv2, &trunk->hash[i], ifv_list) 473 if (ifv2 == ifv) { 474 trunk->refcnt--; 475 CK_SLIST_REMOVE(&trunk->hash[i], ifv2, ifvlan, ifv_list); 476 if (trunk->refcnt < (b * b) / 2) 477 vlan_growhash(trunk, -1); 478 return (0); 479 } 480 481 panic("%s: vlan not found\n", __func__); 482 return (ENOENT); /*NOTREACHED*/ 483 } 484 485 /* 486 * Grow the hash larger or smaller if memory permits. 487 */ 488 static void 489 vlan_growhash(struct ifvlantrunk *trunk, int howmuch) 490 { 491 struct ifvlan *ifv; 492 struct ifvlanhead *hash2; 493 int hwidth2, i, j, n, n2; 494 495 VLAN_XLOCK_ASSERT(); 496 KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); 497 498 if (howmuch == 0) { 499 /* Harmless yet obvious coding error */ 500 printf("%s: howmuch is 0\n", __func__); 501 return; 502 } 503 504 hwidth2 = trunk->hwidth + howmuch; 505 n = 1 << trunk->hwidth; 506 n2 = 1 << hwidth2; 507 /* Do not shrink the table below the default */ 508 if (hwidth2 < VLAN_DEF_HWIDTH) 509 return; 510 511 hash2 = malloc(sizeof(struct ifvlanhead) * n2, M_VLAN, M_WAITOK); 512 if (hash2 == NULL) { 513 printf("%s: out of memory -- hash size not changed\n", 514 __func__); 515 return; /* We can live with the old hash table */ 516 } 517 for (j = 0; j < n2; j++) 518 CK_SLIST_INIT(&hash2[j]); 519 for (i = 0; i < n; i++) 520 while ((ifv = CK_SLIST_FIRST(&trunk->hash[i])) != NULL) { 521 CK_SLIST_REMOVE(&trunk->hash[i], ifv, ifvlan, ifv_list); 522 j = HASH(ifv->ifv_vid, n2 - 1); 523 CK_SLIST_INSERT_HEAD(&hash2[j], ifv, ifv_list); 524 } 525 NET_EPOCH_WAIT(); 526 free(trunk->hash, M_VLAN); 527 trunk->hash = hash2; 528 trunk->hwidth = hwidth2; 529 trunk->hmask = n2 - 1; 530 531 if (bootverbose) 532 if_printf(trunk->parent, 533 "VLAN hash table resized from %d to %d buckets\n", n, n2); 534 } 535 536 static __inline struct ifvlan * 537 vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid) 538 { 539 struct ifvlan *ifv; 540 541 NET_EPOCH_ASSERT(); 542 543 CK_SLIST_FOREACH(ifv, &trunk->hash[HASH(vid, trunk->hmask)], ifv_list) 544 if (ifv->ifv_vid == vid) 545 return (ifv); 546 return (NULL); 547 } 548 549 #if 0 550 /* Debugging code to view the hashtables. */ 551 static void 552 vlan_dumphash(struct ifvlantrunk *trunk) 553 { 554 int i; 555 struct ifvlan *ifv; 556 557 for (i = 0; i < (1 << trunk->hwidth); i++) { 558 printf("%d: ", i); 559 CK_SLIST_FOREACH(ifv, &trunk->hash[i], ifv_list) 560 printf("%s ", ifv->ifv_ifp->if_xname); 561 printf("\n"); 562 } 563 } 564 #endif /* 0 */ 565 #else 566 567 static __inline struct ifvlan * 568 vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid) 569 { 570 571 return trunk->vlans[vid]; 572 } 573 574 static __inline int 575 vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv) 576 { 577 578 if (trunk->vlans[ifv->ifv_vid] != NULL) 579 return EEXIST; 580 trunk->vlans[ifv->ifv_vid] = ifv; 581 trunk->refcnt++; 582 583 return (0); 584 } 585 586 static __inline int 587 vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv) 588 { 589 590 trunk->vlans[ifv->ifv_vid] = NULL; 591 trunk->refcnt--; 592 593 return (0); 594 } 595 596 static __inline void 597 vlan_freehash(struct ifvlantrunk *trunk) 598 { 599 } 600 601 static __inline void 602 vlan_inithash(struct ifvlantrunk *trunk) 603 { 604 } 605 606 #endif /* !VLAN_ARRAY */ 607 608 static void 609 trunk_destroy(struct ifvlantrunk *trunk) 610 { 611 VLAN_XLOCK_ASSERT(); 612 613 vlan_freehash(trunk); 614 trunk->parent->if_vlantrunk = NULL; 615 TRUNK_LOCK_DESTROY(trunk); 616 if_rele(trunk->parent); 617 free(trunk, M_VLAN); 618 } 619 620 /* 621 * Program our multicast filter. What we're actually doing is 622 * programming the multicast filter of the parent. This has the 623 * side effect of causing the parent interface to receive multicast 624 * traffic that it doesn't really want, which ends up being discarded 625 * later by the upper protocol layers. Unfortunately, there's no way 626 * to avoid this: there really is only one physical interface. 627 */ 628 static int 629 vlan_setmulti(struct ifnet *ifp) 630 { 631 struct ifnet *ifp_p; 632 struct ifmultiaddr *ifma; 633 struct ifvlan *sc; 634 struct vlan_mc_entry *mc; 635 int error; 636 637 VLAN_XLOCK_ASSERT(); 638 639 /* Find the parent. */ 640 sc = ifp->if_softc; 641 ifp_p = PARENT(sc); 642 643 CURVNET_SET_QUIET(ifp_p->if_vnet); 644 645 /* First, remove any existing filter entries. */ 646 while ((mc = CK_SLIST_FIRST(&sc->vlan_mc_listhead)) != NULL) { 647 CK_SLIST_REMOVE_HEAD(&sc->vlan_mc_listhead, mc_entries); 648 (void)if_delmulti(ifp_p, (struct sockaddr *)&mc->mc_addr); 649 NET_EPOCH_CALL(vlan_mc_free, &mc->mc_epoch_ctx); 650 } 651 652 /* Now program new ones. */ 653 IF_ADDR_WLOCK(ifp); 654 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 655 if (ifma->ifma_addr->sa_family != AF_LINK) 656 continue; 657 mc = malloc(sizeof(struct vlan_mc_entry), M_VLAN, M_NOWAIT); 658 if (mc == NULL) { 659 IF_ADDR_WUNLOCK(ifp); 660 CURVNET_RESTORE(); 661 return (ENOMEM); 662 } 663 bcopy(ifma->ifma_addr, &mc->mc_addr, ifma->ifma_addr->sa_len); 664 mc->mc_addr.sdl_index = ifp_p->if_index; 665 CK_SLIST_INSERT_HEAD(&sc->vlan_mc_listhead, mc, mc_entries); 666 } 667 IF_ADDR_WUNLOCK(ifp); 668 CK_SLIST_FOREACH (mc, &sc->vlan_mc_listhead, mc_entries) { 669 error = if_addmulti(ifp_p, (struct sockaddr *)&mc->mc_addr, 670 NULL); 671 if (error) { 672 CURVNET_RESTORE(); 673 return (error); 674 } 675 } 676 677 CURVNET_RESTORE(); 678 return (0); 679 } 680 681 /* 682 * A handler for interface ifnet events. 683 */ 684 static void 685 vlan_ifevent(void *arg __unused, struct ifnet *ifp, int event) 686 { 687 struct epoch_tracker et; 688 struct ifvlan *ifv; 689 struct ifvlantrunk *trunk; 690 691 if (event != IFNET_EVENT_UPDATE_BAUDRATE) 692 return; 693 694 NET_EPOCH_ENTER(et); 695 trunk = ifp->if_vlantrunk; 696 if (trunk == NULL) { 697 NET_EPOCH_EXIT(et); 698 return; 699 } 700 701 TRUNK_WLOCK(trunk); 702 VLAN_FOREACH(ifv, trunk) { 703 ifv->ifv_ifp->if_baudrate = ifp->if_baudrate; 704 } 705 TRUNK_WUNLOCK(trunk); 706 NET_EPOCH_EXIT(et); 707 } 708 709 /* 710 * A handler for parent interface link layer address changes. 711 * If the parent interface link layer address is changed we 712 * should also change it on all children vlans. 713 */ 714 static void 715 vlan_iflladdr(void *arg __unused, struct ifnet *ifp) 716 { 717 struct epoch_tracker et; 718 struct ifvlan *ifv; 719 struct ifnet *ifv_ifp; 720 struct ifvlantrunk *trunk; 721 struct sockaddr_dl *sdl; 722 723 /* Need the epoch since this is run on taskqueue_swi. */ 724 NET_EPOCH_ENTER(et); 725 trunk = ifp->if_vlantrunk; 726 if (trunk == NULL) { 727 NET_EPOCH_EXIT(et); 728 return; 729 } 730 731 /* 732 * OK, it's a trunk. Loop over and change all vlan's lladdrs on it. 733 * We need an exclusive lock here to prevent concurrent SIOCSIFLLADDR 734 * ioctl calls on the parent garbling the lladdr of the child vlan. 735 */ 736 TRUNK_WLOCK(trunk); 737 VLAN_FOREACH(ifv, trunk) { 738 /* 739 * Copy new new lladdr into the ifv_ifp, enqueue a task 740 * to actually call if_setlladdr. if_setlladdr needs to 741 * be deferred to a taskqueue because it will call into 742 * the if_vlan ioctl path and try to acquire the global 743 * lock. 744 */ 745 ifv_ifp = ifv->ifv_ifp; 746 bcopy(IF_LLADDR(ifp), IF_LLADDR(ifv_ifp), 747 ifp->if_addrlen); 748 sdl = (struct sockaddr_dl *)ifv_ifp->if_addr->ifa_addr; 749 sdl->sdl_alen = ifp->if_addrlen; 750 taskqueue_enqueue(taskqueue_thread, &ifv->lladdr_task); 751 } 752 TRUNK_WUNLOCK(trunk); 753 NET_EPOCH_EXIT(et); 754 } 755 756 /* 757 * A handler for network interface departure events. 758 * Track departure of trunks here so that we don't access invalid 759 * pointers or whatever if a trunk is ripped from under us, e.g., 760 * by ejecting its hot-plug card. However, if an ifnet is simply 761 * being renamed, then there's no need to tear down the state. 762 */ 763 static void 764 vlan_ifdetach(void *arg __unused, struct ifnet *ifp) 765 { 766 struct ifvlan *ifv; 767 struct ifvlantrunk *trunk; 768 769 /* If the ifnet is just being renamed, don't do anything. */ 770 if (ifp->if_flags & IFF_RENAMING) 771 return; 772 VLAN_XLOCK(); 773 trunk = ifp->if_vlantrunk; 774 if (trunk == NULL) { 775 VLAN_XUNLOCK(); 776 return; 777 } 778 779 /* 780 * OK, it's a trunk. Loop over and detach all vlan's on it. 781 * Check trunk pointer after each vlan_unconfig() as it will 782 * free it and set to NULL after the last vlan was detached. 783 */ 784 VLAN_FOREACH_UNTIL_SAFE(ifv, ifp->if_vlantrunk, 785 ifp->if_vlantrunk == NULL) 786 vlan_unconfig_locked(ifv->ifv_ifp, 1); 787 788 /* Trunk should have been destroyed in vlan_unconfig(). */ 789 KASSERT(ifp->if_vlantrunk == NULL, ("%s: purge failed", __func__)); 790 VLAN_XUNLOCK(); 791 } 792 793 /* 794 * Return the trunk device for a virtual interface. 795 */ 796 static struct ifnet * 797 vlan_trunkdev(struct ifnet *ifp) 798 { 799 struct ifvlan *ifv; 800 801 NET_EPOCH_ASSERT(); 802 803 if (ifp->if_type != IFT_L2VLAN) 804 return (NULL); 805 806 ifv = ifp->if_softc; 807 ifp = NULL; 808 if (ifv->ifv_trunk) 809 ifp = PARENT(ifv); 810 return (ifp); 811 } 812 813 /* 814 * Return the 12-bit VLAN VID for this interface, for use by external 815 * components such as Infiniband. 816 * 817 * XXXRW: Note that the function name here is historical; it should be named 818 * vlan_vid(). 819 */ 820 static int 821 vlan_tag(struct ifnet *ifp, uint16_t *vidp) 822 { 823 struct ifvlan *ifv; 824 825 if (ifp->if_type != IFT_L2VLAN) 826 return (EINVAL); 827 ifv = ifp->if_softc; 828 *vidp = ifv->ifv_vid; 829 return (0); 830 } 831 832 static int 833 vlan_pcp(struct ifnet *ifp, uint16_t *pcpp) 834 { 835 struct ifvlan *ifv; 836 837 if (ifp->if_type != IFT_L2VLAN) 838 return (EINVAL); 839 ifv = ifp->if_softc; 840 *pcpp = ifv->ifv_pcp; 841 return (0); 842 } 843 844 /* 845 * Return a driver specific cookie for this interface. Synchronization 846 * with setcookie must be provided by the driver. 847 */ 848 static void * 849 vlan_cookie(struct ifnet *ifp) 850 { 851 struct ifvlan *ifv; 852 853 if (ifp->if_type != IFT_L2VLAN) 854 return (NULL); 855 ifv = ifp->if_softc; 856 return (ifv->ifv_cookie); 857 } 858 859 /* 860 * Store a cookie in our softc that drivers can use to store driver 861 * private per-instance data in. 862 */ 863 static int 864 vlan_setcookie(struct ifnet *ifp, void *cookie) 865 { 866 struct ifvlan *ifv; 867 868 if (ifp->if_type != IFT_L2VLAN) 869 return (EINVAL); 870 ifv = ifp->if_softc; 871 ifv->ifv_cookie = cookie; 872 return (0); 873 } 874 875 /* 876 * Return the vlan device present at the specific VID. 877 */ 878 static struct ifnet * 879 vlan_devat(struct ifnet *ifp, uint16_t vid) 880 { 881 struct ifvlantrunk *trunk; 882 struct ifvlan *ifv; 883 884 NET_EPOCH_ASSERT(); 885 886 trunk = ifp->if_vlantrunk; 887 if (trunk == NULL) 888 return (NULL); 889 ifp = NULL; 890 ifv = vlan_gethash(trunk, vid); 891 if (ifv) 892 ifp = ifv->ifv_ifp; 893 return (ifp); 894 } 895 896 /* 897 * VLAN support can be loaded as a module. The only place in the 898 * system that's intimately aware of this is ether_input. We hook 899 * into this code through vlan_input_p which is defined there and 900 * set here. No one else in the system should be aware of this so 901 * we use an explicit reference here. 902 */ 903 extern void (*vlan_input_p)(struct ifnet *, struct mbuf *); 904 905 /* For if_link_state_change() eyes only... */ 906 extern void (*vlan_link_state_p)(struct ifnet *); 907 908 static struct if_clone_addreq_v2 vlan_addreq = { 909 .version = 2, 910 .match_f = vlan_clone_match, 911 .create_f = vlan_clone_create, 912 .destroy_f = vlan_clone_destroy, 913 .create_nl_f = vlan_clone_create_nl, 914 .modify_nl_f = vlan_clone_modify_nl, 915 .dump_nl_f = vlan_clone_dump_nl, 916 }; 917 918 static int 919 vlan_modevent(module_t mod, int type, void *data) 920 { 921 922 switch (type) { 923 case MOD_LOAD: 924 ifdetach_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, 925 vlan_ifdetach, NULL, EVENTHANDLER_PRI_ANY); 926 if (ifdetach_tag == NULL) 927 return (ENOMEM); 928 iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event, 929 vlan_iflladdr, NULL, EVENTHANDLER_PRI_ANY); 930 if (iflladdr_tag == NULL) 931 return (ENOMEM); 932 ifevent_tag = EVENTHANDLER_REGISTER(ifnet_event, 933 vlan_ifevent, NULL, EVENTHANDLER_PRI_ANY); 934 if (ifevent_tag == NULL) 935 return (ENOMEM); 936 VLAN_LOCKING_INIT(); 937 vlan_input_p = vlan_input; 938 vlan_link_state_p = vlan_link_state; 939 vlan_trunk_cap_p = vlan_trunk_capabilities; 940 vlan_trunkdev_p = vlan_trunkdev; 941 vlan_cookie_p = vlan_cookie; 942 vlan_setcookie_p = vlan_setcookie; 943 vlan_tag_p = vlan_tag; 944 vlan_pcp_p = vlan_pcp; 945 vlan_devat_p = vlan_devat; 946 #ifndef VIMAGE 947 vlan_cloner = ifc_attach_cloner(vlanname, (struct if_clone_addreq *)&vlan_addreq); 948 #endif 949 if (bootverbose) 950 printf("vlan: initialized, using " 951 #ifdef VLAN_ARRAY 952 "full-size arrays" 953 #else 954 "hash tables with chaining" 955 #endif 956 957 "\n"); 958 break; 959 case MOD_UNLOAD: 960 #ifndef VIMAGE 961 ifc_detach_cloner(vlan_cloner); 962 #endif 963 EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_tag); 964 EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_tag); 965 EVENTHANDLER_DEREGISTER(ifnet_event, ifevent_tag); 966 vlan_input_p = NULL; 967 vlan_link_state_p = NULL; 968 vlan_trunk_cap_p = NULL; 969 vlan_trunkdev_p = NULL; 970 vlan_tag_p = NULL; 971 vlan_cookie_p = NULL; 972 vlan_setcookie_p = NULL; 973 vlan_devat_p = NULL; 974 VLAN_LOCKING_DESTROY(); 975 if (bootverbose) 976 printf("vlan: unloaded\n"); 977 break; 978 default: 979 return (EOPNOTSUPP); 980 } 981 return (0); 982 } 983 984 static moduledata_t vlan_mod = { 985 "if_vlan", 986 vlan_modevent, 987 0 988 }; 989 990 DECLARE_MODULE(if_vlan, vlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 991 MODULE_VERSION(if_vlan, 3); 992 993 #ifdef VIMAGE 994 static void 995 vnet_vlan_init(const void *unused __unused) 996 { 997 vlan_cloner = ifc_attach_cloner(vlanname, (struct if_clone_addreq *)&vlan_addreq); 998 V_vlan_cloner = vlan_cloner; 999 } 1000 VNET_SYSINIT(vnet_vlan_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, 1001 vnet_vlan_init, NULL); 1002 1003 static void 1004 vnet_vlan_uninit(const void *unused __unused) 1005 { 1006 1007 ifc_detach_cloner(V_vlan_cloner); 1008 } 1009 VNET_SYSUNINIT(vnet_vlan_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, 1010 vnet_vlan_uninit, NULL); 1011 #endif 1012 1013 /* 1014 * Check for <etherif>.<vlan>[.<vlan> ...] style interface names. 1015 */ 1016 static struct ifnet * 1017 vlan_clone_match_ethervid(const char *name, int *vidp) 1018 { 1019 char ifname[IFNAMSIZ]; 1020 char *cp; 1021 struct ifnet *ifp; 1022 int vid; 1023 1024 strlcpy(ifname, name, IFNAMSIZ); 1025 if ((cp = strrchr(ifname, '.')) == NULL) 1026 return (NULL); 1027 *cp = '\0'; 1028 if ((ifp = ifunit_ref(ifname)) == NULL) 1029 return (NULL); 1030 /* Parse VID. */ 1031 if (*++cp == '\0') { 1032 if_rele(ifp); 1033 return (NULL); 1034 } 1035 vid = 0; 1036 for(; *cp >= '0' && *cp <= '9'; cp++) 1037 vid = (vid * 10) + (*cp - '0'); 1038 if (*cp != '\0') { 1039 if_rele(ifp); 1040 return (NULL); 1041 } 1042 if (vidp != NULL) 1043 *vidp = vid; 1044 1045 return (ifp); 1046 } 1047 1048 static int 1049 vlan_clone_match(struct if_clone *ifc, const char *name) 1050 { 1051 struct ifnet *ifp; 1052 const char *cp; 1053 1054 ifp = vlan_clone_match_ethervid(name, NULL); 1055 if (ifp != NULL) { 1056 if_rele(ifp); 1057 return (1); 1058 } 1059 1060 if (strncmp(vlanname, name, strlen(vlanname)) != 0) 1061 return (0); 1062 for (cp = name + 4; *cp != '\0'; cp++) { 1063 if (*cp < '0' || *cp > '9') 1064 return (0); 1065 } 1066 1067 return (1); 1068 } 1069 1070 static int 1071 vlan_clone_create(struct if_clone *ifc, char *name, size_t len, 1072 struct ifc_data *ifd, struct ifnet **ifpp) 1073 { 1074 char *dp; 1075 bool wildcard = false; 1076 bool subinterface = false; 1077 int unit; 1078 int error; 1079 int vid = 0; 1080 uint16_t proto = ETHERTYPE_VLAN; 1081 struct ifvlan *ifv; 1082 struct ifnet *ifp; 1083 struct ifnet *p = NULL; 1084 struct ifaddr *ifa; 1085 struct sockaddr_dl *sdl; 1086 struct vlanreq vlr; 1087 static const u_char eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ 1088 1089 1090 /* 1091 * There are three ways to specify the cloned device: 1092 * o pass a parameter block with the clone request. 1093 * o specify parameters in the text of the clone device name 1094 * o specify no parameters and get an unattached device that 1095 * must be configured separately. 1096 * The first technique is preferred; the latter two are supported 1097 * for backwards compatibility. 1098 * 1099 * XXXRW: Note historic use of the word "tag" here. New ioctls may be 1100 * called for. 1101 */ 1102 1103 if (ifd->params != NULL) { 1104 error = ifc_copyin(ifd, &vlr, sizeof(vlr)); 1105 if (error) 1106 return error; 1107 vid = vlr.vlr_tag; 1108 proto = vlr.vlr_proto; 1109 if (proto == 0) 1110 proto = ETHERTYPE_VLAN; 1111 p = ifunit_ref(vlr.vlr_parent); 1112 if (p == NULL) 1113 return (ENXIO); 1114 } 1115 1116 if ((error = ifc_name2unit(name, &unit)) == 0) { 1117 1118 /* 1119 * vlanX interface. Set wildcard to true if the unit number 1120 * is not fixed (-1) 1121 */ 1122 wildcard = (unit < 0); 1123 } else { 1124 struct ifnet *p_tmp = vlan_clone_match_ethervid(name, &vid); 1125 if (p_tmp != NULL) { 1126 error = 0; 1127 subinterface = true; 1128 unit = IF_DUNIT_NONE; 1129 wildcard = false; 1130 if (p != NULL) { 1131 if_rele(p_tmp); 1132 if (p != p_tmp) 1133 error = EINVAL; 1134 } else 1135 p = p_tmp; 1136 } else 1137 error = ENXIO; 1138 } 1139 1140 if (error != 0) { 1141 if (p != NULL) 1142 if_rele(p); 1143 return (error); 1144 } 1145 1146 if (!subinterface) { 1147 /* vlanX interface, mark X as busy or allocate new unit # */ 1148 error = ifc_alloc_unit(ifc, &unit); 1149 if (error != 0) { 1150 if (p != NULL) 1151 if_rele(p); 1152 return (error); 1153 } 1154 } 1155 1156 /* In the wildcard case, we need to update the name. */ 1157 if (wildcard) { 1158 for (dp = name; *dp != '\0'; dp++); 1159 if (snprintf(dp, len - (dp-name), "%d", unit) > 1160 len - (dp-name) - 1) { 1161 panic("%s: interface name too long", __func__); 1162 } 1163 } 1164 1165 ifv = malloc(sizeof(struct ifvlan), M_VLAN, M_WAITOK | M_ZERO); 1166 ifp = ifv->ifv_ifp = if_alloc(IFT_ETHER); 1167 if (ifp == NULL) { 1168 if (!subinterface) 1169 ifc_free_unit(ifc, unit); 1170 free(ifv, M_VLAN); 1171 if (p != NULL) 1172 if_rele(p); 1173 return (ENOSPC); 1174 } 1175 CK_SLIST_INIT(&ifv->vlan_mc_listhead); 1176 ifp->if_softc = ifv; 1177 /* 1178 * Set the name manually rather than using if_initname because 1179 * we don't conform to the default naming convention for interfaces. 1180 */ 1181 strlcpy(ifp->if_xname, name, IFNAMSIZ); 1182 ifp->if_dname = vlanname; 1183 ifp->if_dunit = unit; 1184 1185 ifp->if_init = vlan_init; 1186 #ifdef ALTQ 1187 ifp->if_start = vlan_altq_start; 1188 ifp->if_transmit = vlan_altq_transmit; 1189 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); 1190 ifp->if_snd.ifq_drv_maxlen = 0; 1191 IFQ_SET_READY(&ifp->if_snd); 1192 #else 1193 ifp->if_transmit = vlan_transmit; 1194 #endif 1195 ifp->if_qflush = vlan_qflush; 1196 ifp->if_ioctl = vlan_ioctl; 1197 #if defined(KERN_TLS) || defined(RATELIMIT) 1198 ifp->if_snd_tag_alloc = vlan_snd_tag_alloc; 1199 ifp->if_ratelimit_query = vlan_ratelimit_query; 1200 #endif 1201 ifp->if_flags = VLAN_IFFLAGS; 1202 ether_ifattach(ifp, eaddr); 1203 /* Now undo some of the damage... */ 1204 ifp->if_baudrate = 0; 1205 ifp->if_type = IFT_L2VLAN; 1206 ifp->if_hdrlen = ETHER_VLAN_ENCAP_LEN; 1207 ifa = ifp->if_addr; 1208 sdl = (struct sockaddr_dl *)ifa->ifa_addr; 1209 sdl->sdl_type = IFT_L2VLAN; 1210 1211 if (p != NULL) { 1212 error = vlan_config(ifv, p, vid, proto); 1213 if_rele(p); 1214 if (error != 0) { 1215 /* 1216 * Since we've partially failed, we need to back 1217 * out all the way, otherwise userland could get 1218 * confused. Thus, we destroy the interface. 1219 */ 1220 ether_ifdetach(ifp); 1221 vlan_unconfig(ifp); 1222 if_free(ifp); 1223 if (!subinterface) 1224 ifc_free_unit(ifc, unit); 1225 free(ifv, M_VLAN); 1226 1227 return (error); 1228 } 1229 } 1230 *ifpp = ifp; 1231 1232 return (0); 1233 } 1234 1235 /* 1236 * 1237 * Parsers of IFLA_INFO_DATA inside IFLA_LINKINFO of RTM_NEWLINK 1238 * {{nla_len=8, nla_type=IFLA_LINK}, 2}, 1239 * {{nla_len=12, nla_type=IFLA_IFNAME}, "xvlan22"}, 1240 * {{nla_len=24, nla_type=IFLA_LINKINFO}, 1241 * [ 1242 * {{nla_len=8, nla_type=IFLA_INFO_KIND}, "vlan"...}, 1243 * {{nla_len=12, nla_type=IFLA_INFO_DATA}, "\x06\x00\x01\x00\x16\x00\x00\x00"}]} 1244 */ 1245 1246 struct nl_parsed_vlan { 1247 uint16_t vlan_id; 1248 uint16_t vlan_proto; 1249 struct ifla_vlan_flags vlan_flags; 1250 }; 1251 1252 #define _OUT(_field) offsetof(struct nl_parsed_vlan, _field) 1253 static const struct nlattr_parser nla_p_vlan[] = { 1254 { .type = IFLA_VLAN_ID, .off = _OUT(vlan_id), .cb = nlattr_get_uint16 }, 1255 { .type = IFLA_VLAN_FLAGS, .off = _OUT(vlan_flags), .cb = nlattr_get_nla }, 1256 { .type = IFLA_VLAN_PROTOCOL, .off = _OUT(vlan_proto), .cb = nlattr_get_uint16 }, 1257 }; 1258 #undef _OUT 1259 NL_DECLARE_ATTR_PARSER(vlan_parser, nla_p_vlan); 1260 1261 static int 1262 vlan_clone_create_nl(struct if_clone *ifc, char *name, size_t len, 1263 struct ifc_data_nl *ifd) 1264 { 1265 struct epoch_tracker et; 1266 struct ifnet *ifp_parent; 1267 struct nl_pstate *npt = ifd->npt; 1268 struct nl_parsed_link *lattrs = ifd->lattrs; 1269 int error; 1270 1271 /* 1272 * lattrs.ifla_ifname is the new interface name 1273 * lattrs.ifi_index contains parent interface index 1274 * lattrs.ifla_idata contains un-parsed vlan data 1275 */ 1276 struct nl_parsed_vlan attrs = { 1277 .vlan_id = 0xFEFE, 1278 .vlan_proto = ETHERTYPE_VLAN 1279 }; 1280 1281 if (lattrs->ifla_idata == NULL) { 1282 nlmsg_report_err_msg(npt, "vlan id is required, guessing not supported"); 1283 return (ENOTSUP); 1284 } 1285 1286 error = nl_parse_nested(lattrs->ifla_idata, &vlan_parser, npt, &attrs); 1287 if (error != 0) 1288 return (error); 1289 if (attrs.vlan_id > 4095) { 1290 nlmsg_report_err_msg(npt, "Invalid VID: %d", attrs.vlan_id); 1291 return (EINVAL); 1292 } 1293 if (attrs.vlan_proto != ETHERTYPE_VLAN && attrs.vlan_proto != ETHERTYPE_QINQ) { 1294 nlmsg_report_err_msg(npt, "Unsupported ethertype: 0x%04X", attrs.vlan_proto); 1295 return (ENOTSUP); 1296 } 1297 1298 struct vlanreq params = { 1299 .vlr_tag = attrs.vlan_id, 1300 .vlr_proto = attrs.vlan_proto, 1301 }; 1302 struct ifc_data ifd_new = { .flags = IFC_F_SYSSPACE, .unit = ifd->unit, .params = ¶ms }; 1303 1304 NET_EPOCH_ENTER(et); 1305 ifp_parent = ifnet_byindex(lattrs->ifi_index); 1306 if (ifp_parent != NULL) 1307 strlcpy(params.vlr_parent, if_name(ifp_parent), sizeof(params.vlr_parent)); 1308 NET_EPOCH_EXIT(et); 1309 1310 if (ifp_parent == NULL) { 1311 nlmsg_report_err_msg(npt, "unable to find parent interface %u", lattrs->ifi_index); 1312 return (ENOENT); 1313 } 1314 1315 error = vlan_clone_create(ifc, name, len, &ifd_new, &ifd->ifp); 1316 1317 return (error); 1318 } 1319 1320 static int 1321 vlan_clone_modify_nl(struct ifnet *ifp, struct ifc_data_nl *ifd) 1322 { 1323 struct nl_parsed_link *lattrs = ifd->lattrs; 1324 1325 if ((lattrs->ifla_idata != NULL) && ((ifd->flags & IFC_F_CREATE) == 0)) { 1326 struct epoch_tracker et; 1327 struct nl_parsed_vlan attrs = { 1328 .vlan_proto = ETHERTYPE_VLAN, 1329 }; 1330 int error; 1331 1332 error = nl_parse_nested(lattrs->ifla_idata, &vlan_parser, ifd->npt, &attrs); 1333 if (error != 0) 1334 return (error); 1335 1336 NET_EPOCH_ENTER(et); 1337 struct ifnet *ifp_parent = ifnet_byindex_ref(lattrs->ifla_link); 1338 NET_EPOCH_EXIT(et); 1339 1340 if (ifp_parent == NULL) { 1341 nlmsg_report_err_msg(ifd->npt, "unable to find parent interface %u", 1342 lattrs->ifla_link); 1343 return (ENOENT); 1344 } 1345 1346 struct ifvlan *ifv = ifp->if_softc; 1347 error = vlan_config(ifv, ifp_parent, attrs.vlan_id, attrs.vlan_proto); 1348 1349 if_rele(ifp_parent); 1350 if (error != 0) 1351 return (error); 1352 } 1353 1354 return (nl_modify_ifp_generic(ifp, ifd->lattrs, ifd->bm, ifd->npt)); 1355 } 1356 1357 /* 1358 * {{nla_len=24, nla_type=IFLA_LINKINFO}, 1359 * [ 1360 * {{nla_len=8, nla_type=IFLA_INFO_KIND}, "vlan"...}, 1361 * {{nla_len=12, nla_type=IFLA_INFO_DATA}, "\x06\x00\x01\x00\x16\x00\x00\x00"}]} 1362 */ 1363 static void 1364 vlan_clone_dump_nl(struct ifnet *ifp, struct nl_writer *nw) 1365 { 1366 uint32_t parent_index = 0; 1367 uint16_t vlan_id = 0; 1368 uint16_t vlan_proto = 0; 1369 1370 VLAN_SLOCK(); 1371 struct ifvlan *ifv = ifp->if_softc; 1372 if (TRUNK(ifv) != NULL) 1373 parent_index = PARENT(ifv)->if_index; 1374 vlan_id = ifv->ifv_vid; 1375 vlan_proto = ifv->ifv_proto; 1376 VLAN_SUNLOCK(); 1377 1378 if (parent_index != 0) 1379 nlattr_add_u32(nw, IFLA_LINK, parent_index); 1380 1381 int off = nlattr_add_nested(nw, IFLA_LINKINFO); 1382 if (off != 0) { 1383 nlattr_add_string(nw, IFLA_INFO_KIND, "vlan"); 1384 int off2 = nlattr_add_nested(nw, IFLA_INFO_DATA); 1385 if (off2 != 0) { 1386 nlattr_add_u16(nw, IFLA_VLAN_ID, vlan_id); 1387 nlattr_add_u16(nw, IFLA_VLAN_PROTOCOL, vlan_proto); 1388 nlattr_set_len(nw, off2); 1389 } 1390 nlattr_set_len(nw, off); 1391 } 1392 } 1393 1394 static int 1395 vlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags) 1396 { 1397 struct ifvlan *ifv = ifp->if_softc; 1398 int unit = ifp->if_dunit; 1399 1400 if (ifp->if_vlantrunk) 1401 return (EBUSY); 1402 1403 #ifdef ALTQ 1404 IFQ_PURGE(&ifp->if_snd); 1405 #endif 1406 ether_ifdetach(ifp); /* first, remove it from system-wide lists */ 1407 vlan_unconfig(ifp); /* now it can be unconfigured and freed */ 1408 /* 1409 * We should have the only reference to the ifv now, so we can now 1410 * drain any remaining lladdr task before freeing the ifnet and the 1411 * ifvlan. 1412 */ 1413 taskqueue_drain(taskqueue_thread, &ifv->lladdr_task); 1414 NET_EPOCH_WAIT(); 1415 if_free(ifp); 1416 free(ifv, M_VLAN); 1417 if (unit != IF_DUNIT_NONE) 1418 ifc_free_unit(ifc, unit); 1419 1420 return (0); 1421 } 1422 1423 /* 1424 * The ifp->if_init entry point for vlan(4) is a no-op. 1425 */ 1426 static void 1427 vlan_init(void *foo __unused) 1428 { 1429 } 1430 1431 /* 1432 * The if_transmit method for vlan(4) interface. 1433 */ 1434 static int 1435 vlan_transmit(struct ifnet *ifp, struct mbuf *m) 1436 { 1437 struct ifvlan *ifv; 1438 struct ifnet *p; 1439 int error, len, mcast; 1440 1441 NET_EPOCH_ASSERT(); 1442 1443 ifv = ifp->if_softc; 1444 if (TRUNK(ifv) == NULL) { 1445 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 1446 m_freem(m); 1447 return (ENETDOWN); 1448 } 1449 p = PARENT(ifv); 1450 len = m->m_pkthdr.len; 1451 mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0; 1452 1453 BPF_MTAP(ifp, m); 1454 1455 #if defined(KERN_TLS) || defined(RATELIMIT) 1456 if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) { 1457 struct vlan_snd_tag *vst; 1458 struct m_snd_tag *mst; 1459 1460 MPASS(m->m_pkthdr.snd_tag->ifp == ifp); 1461 mst = m->m_pkthdr.snd_tag; 1462 vst = mst_to_vst(mst); 1463 if (vst->tag->ifp != p) { 1464 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 1465 m_freem(m); 1466 return (EAGAIN); 1467 } 1468 1469 m->m_pkthdr.snd_tag = m_snd_tag_ref(vst->tag); 1470 m_snd_tag_rele(mst); 1471 } 1472 #endif 1473 1474 /* 1475 * Do not run parent's if_transmit() if the parent is not up, 1476 * or parent's driver will cause a system crash. 1477 */ 1478 if (!UP_AND_RUNNING(p)) { 1479 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 1480 m_freem(m); 1481 return (ENETDOWN); 1482 } 1483 1484 if (!ether_8021q_frame(&m, ifp, p, &ifv->ifv_qtag)) { 1485 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 1486 return (0); 1487 } 1488 1489 /* 1490 * Send it, precisely as ether_output() would have. 1491 */ 1492 error = (p->if_transmit)(p, m); 1493 if (error == 0) { 1494 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 1495 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 1496 if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast); 1497 } else 1498 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 1499 return (error); 1500 } 1501 1502 static int 1503 vlan_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, 1504 struct route *ro) 1505 { 1506 struct ifvlan *ifv; 1507 struct ifnet *p; 1508 1509 NET_EPOCH_ASSERT(); 1510 1511 /* 1512 * Find the first non-VLAN parent interface. 1513 */ 1514 ifv = ifp->if_softc; 1515 do { 1516 if (TRUNK(ifv) == NULL) { 1517 m_freem(m); 1518 return (ENETDOWN); 1519 } 1520 p = PARENT(ifv); 1521 ifv = p->if_softc; 1522 } while (p->if_type == IFT_L2VLAN); 1523 1524 return p->if_output(ifp, m, dst, ro); 1525 } 1526 1527 #ifdef ALTQ 1528 static void 1529 vlan_altq_start(if_t ifp) 1530 { 1531 struct ifaltq *ifq = &ifp->if_snd; 1532 struct mbuf *m; 1533 1534 IFQ_LOCK(ifq); 1535 IFQ_DEQUEUE_NOLOCK(ifq, m); 1536 while (m != NULL) { 1537 vlan_transmit(ifp, m); 1538 IFQ_DEQUEUE_NOLOCK(ifq, m); 1539 } 1540 IFQ_UNLOCK(ifq); 1541 } 1542 1543 static int 1544 vlan_altq_transmit(if_t ifp, struct mbuf *m) 1545 { 1546 int err; 1547 1548 if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 1549 IFQ_ENQUEUE(&ifp->if_snd, m, err); 1550 if (err == 0) 1551 vlan_altq_start(ifp); 1552 } else 1553 err = vlan_transmit(ifp, m); 1554 1555 return (err); 1556 } 1557 #endif /* ALTQ */ 1558 1559 /* 1560 * The ifp->if_qflush entry point for vlan(4) is a no-op. 1561 */ 1562 static void 1563 vlan_qflush(struct ifnet *ifp __unused) 1564 { 1565 } 1566 1567 static void 1568 vlan_input(struct ifnet *ifp, struct mbuf *m) 1569 { 1570 struct ifvlantrunk *trunk; 1571 struct ifvlan *ifv; 1572 struct m_tag *mtag; 1573 uint16_t vid, tag; 1574 1575 NET_EPOCH_ASSERT(); 1576 1577 trunk = ifp->if_vlantrunk; 1578 if (trunk == NULL) { 1579 m_freem(m); 1580 return; 1581 } 1582 1583 if (m->m_flags & M_VLANTAG) { 1584 /* 1585 * Packet is tagged, but m contains a normal 1586 * Ethernet frame; the tag is stored out-of-band. 1587 */ 1588 tag = m->m_pkthdr.ether_vtag; 1589 m->m_flags &= ~M_VLANTAG; 1590 } else { 1591 struct ether_vlan_header *evl; 1592 1593 /* 1594 * Packet is tagged in-band as specified by 802.1q. 1595 */ 1596 switch (ifp->if_type) { 1597 case IFT_ETHER: 1598 if (m->m_len < sizeof(*evl) && 1599 (m = m_pullup(m, sizeof(*evl))) == NULL) { 1600 if_printf(ifp, "cannot pullup VLAN header\n"); 1601 return; 1602 } 1603 evl = mtod(m, struct ether_vlan_header *); 1604 tag = ntohs(evl->evl_tag); 1605 1606 /* 1607 * Remove the 802.1q header by copying the Ethernet 1608 * addresses over it and adjusting the beginning of 1609 * the data in the mbuf. The encapsulated Ethernet 1610 * type field is already in place. 1611 */ 1612 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 1613 ETHER_HDR_LEN - ETHER_TYPE_LEN); 1614 m_adj(m, ETHER_VLAN_ENCAP_LEN); 1615 break; 1616 1617 default: 1618 #ifdef INVARIANTS 1619 panic("%s: %s has unsupported if_type %u", 1620 __func__, ifp->if_xname, ifp->if_type); 1621 #endif 1622 if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); 1623 m_freem(m); 1624 return; 1625 } 1626 } 1627 1628 vid = EVL_VLANOFTAG(tag); 1629 1630 ifv = vlan_gethash(trunk, vid); 1631 if (ifv == NULL || !UP_AND_RUNNING(ifv->ifv_ifp)) { 1632 if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); 1633 m_freem(m); 1634 return; 1635 } 1636 1637 if (V_vlan_mtag_pcp) { 1638 /* 1639 * While uncommon, it is possible that we will find a 802.1q 1640 * packet encapsulated inside another packet that also had an 1641 * 802.1q header. For example, ethernet tunneled over IPSEC 1642 * arriving over ethernet. In that case, we replace the 1643 * existing 802.1q PCP m_tag value. 1644 */ 1645 mtag = m_tag_locate(m, MTAG_8021Q, MTAG_8021Q_PCP_IN, NULL); 1646 if (mtag == NULL) { 1647 mtag = m_tag_alloc(MTAG_8021Q, MTAG_8021Q_PCP_IN, 1648 sizeof(uint8_t), M_NOWAIT); 1649 if (mtag == NULL) { 1650 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 1651 m_freem(m); 1652 return; 1653 } 1654 m_tag_prepend(m, mtag); 1655 } 1656 *(uint8_t *)(mtag + 1) = EVL_PRIOFTAG(tag); 1657 } 1658 1659 m->m_pkthdr.rcvif = ifv->ifv_ifp; 1660 if_inc_counter(ifv->ifv_ifp, IFCOUNTER_IPACKETS, 1); 1661 1662 /* Pass it back through the parent's input routine. */ 1663 (*ifv->ifv_ifp->if_input)(ifv->ifv_ifp, m); 1664 } 1665 1666 static void 1667 vlan_lladdr_fn(void *arg, int pending __unused) 1668 { 1669 struct ifvlan *ifv; 1670 struct ifnet *ifp; 1671 1672 ifv = (struct ifvlan *)arg; 1673 ifp = ifv->ifv_ifp; 1674 1675 CURVNET_SET(ifp->if_vnet); 1676 1677 /* The ifv_ifp already has the lladdr copied in. */ 1678 if_setlladdr(ifp, IF_LLADDR(ifp), ifp->if_addrlen); 1679 1680 CURVNET_RESTORE(); 1681 } 1682 1683 static int 1684 vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid, 1685 uint16_t proto) 1686 { 1687 struct epoch_tracker et; 1688 struct ifvlantrunk *trunk; 1689 struct ifnet *ifp; 1690 int error = 0; 1691 1692 /* 1693 * We can handle non-ethernet hardware types as long as 1694 * they handle the tagging and headers themselves. 1695 */ 1696 if (p->if_type != IFT_ETHER && 1697 p->if_type != IFT_L2VLAN && 1698 (p->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) 1699 return (EPROTONOSUPPORT); 1700 if ((p->if_flags & VLAN_IFFLAGS) != VLAN_IFFLAGS) 1701 return (EPROTONOSUPPORT); 1702 /* 1703 * Don't let the caller set up a VLAN VID with 1704 * anything except VLID bits. 1705 * VID numbers 0x0 and 0xFFF are reserved. 1706 */ 1707 if (vid == 0 || vid == 0xFFF || (vid & ~EVL_VLID_MASK)) 1708 return (EINVAL); 1709 if (ifv->ifv_trunk) { 1710 trunk = ifv->ifv_trunk; 1711 if (trunk->parent != p) 1712 return (EBUSY); 1713 1714 VLAN_XLOCK(); 1715 1716 ifv->ifv_proto = proto; 1717 1718 if (ifv->ifv_vid != vid) { 1719 /* Re-hash */ 1720 vlan_remhash(trunk, ifv); 1721 ifv->ifv_vid = vid; 1722 error = vlan_inshash(trunk, ifv); 1723 } 1724 /* Will unlock */ 1725 goto done; 1726 } 1727 1728 VLAN_XLOCK(); 1729 if (p->if_vlantrunk == NULL) { 1730 trunk = malloc(sizeof(struct ifvlantrunk), 1731 M_VLAN, M_WAITOK | M_ZERO); 1732 vlan_inithash(trunk); 1733 TRUNK_LOCK_INIT(trunk); 1734 TRUNK_WLOCK(trunk); 1735 p->if_vlantrunk = trunk; 1736 trunk->parent = p; 1737 if_ref(trunk->parent); 1738 TRUNK_WUNLOCK(trunk); 1739 } else { 1740 trunk = p->if_vlantrunk; 1741 } 1742 1743 ifv->ifv_vid = vid; /* must set this before vlan_inshash() */ 1744 ifv->ifv_pcp = 0; /* Default: best effort delivery. */ 1745 error = vlan_inshash(trunk, ifv); 1746 if (error) 1747 goto done; 1748 ifv->ifv_proto = proto; 1749 ifv->ifv_encaplen = ETHER_VLAN_ENCAP_LEN; 1750 ifv->ifv_mintu = ETHERMIN; 1751 ifv->ifv_pflags = 0; 1752 ifv->ifv_capenable = -1; 1753 1754 /* 1755 * If the parent supports the VLAN_MTU capability, 1756 * i.e. can Tx/Rx larger than ETHER_MAX_LEN frames, 1757 * use it. 1758 */ 1759 if (p->if_capenable & IFCAP_VLAN_MTU) { 1760 /* 1761 * No need to fudge the MTU since the parent can 1762 * handle extended frames. 1763 */ 1764 ifv->ifv_mtufudge = 0; 1765 } else { 1766 /* 1767 * Fudge the MTU by the encapsulation size. This 1768 * makes us incompatible with strictly compliant 1769 * 802.1Q implementations, but allows us to use 1770 * the feature with other NetBSD implementations, 1771 * which might still be useful. 1772 */ 1773 ifv->ifv_mtufudge = ifv->ifv_encaplen; 1774 } 1775 1776 ifv->ifv_trunk = trunk; 1777 ifp = ifv->ifv_ifp; 1778 /* 1779 * Initialize fields from our parent. This duplicates some 1780 * work with ether_ifattach() but allows for non-ethernet 1781 * interfaces to also work. 1782 */ 1783 ifp->if_mtu = p->if_mtu - ifv->ifv_mtufudge; 1784 ifp->if_baudrate = p->if_baudrate; 1785 ifp->if_input = p->if_input; 1786 ifp->if_resolvemulti = p->if_resolvemulti; 1787 ifp->if_addrlen = p->if_addrlen; 1788 ifp->if_broadcastaddr = p->if_broadcastaddr; 1789 ifp->if_pcp = ifv->ifv_pcp; 1790 1791 /* 1792 * We wrap the parent's if_output using vlan_output to ensure that it 1793 * can't become stale. 1794 */ 1795 ifp->if_output = vlan_output; 1796 1797 /* 1798 * Copy only a selected subset of flags from the parent. 1799 * Other flags are none of our business. 1800 */ 1801 #define VLAN_COPY_FLAGS (IFF_SIMPLEX) 1802 ifp->if_flags &= ~VLAN_COPY_FLAGS; 1803 ifp->if_flags |= p->if_flags & VLAN_COPY_FLAGS; 1804 #undef VLAN_COPY_FLAGS 1805 1806 ifp->if_link_state = p->if_link_state; 1807 1808 NET_EPOCH_ENTER(et); 1809 vlan_capabilities(ifv); 1810 NET_EPOCH_EXIT(et); 1811 1812 /* 1813 * Set up our interface address to reflect the underlying 1814 * physical interface's. 1815 */ 1816 TASK_INIT(&ifv->lladdr_task, 0, vlan_lladdr_fn, ifv); 1817 ((struct sockaddr_dl *)ifp->if_addr->ifa_addr)->sdl_alen = 1818 p->if_addrlen; 1819 1820 /* 1821 * Do not schedule link address update if it was the same 1822 * as previous parent's. This helps avoid updating for each 1823 * associated llentry. 1824 */ 1825 if (memcmp(IF_LLADDR(p), IF_LLADDR(ifp), p->if_addrlen) != 0) { 1826 bcopy(IF_LLADDR(p), IF_LLADDR(ifp), p->if_addrlen); 1827 taskqueue_enqueue(taskqueue_thread, &ifv->lladdr_task); 1828 } 1829 1830 /* We are ready for operation now. */ 1831 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1832 1833 /* Update flags on the parent, if necessary. */ 1834 vlan_setflags(ifp, 1); 1835 1836 /* 1837 * Configure multicast addresses that may already be 1838 * joined on the vlan device. 1839 */ 1840 (void)vlan_setmulti(ifp); 1841 1842 done: 1843 if (error == 0) 1844 EVENTHANDLER_INVOKE(vlan_config, p, ifv->ifv_vid); 1845 VLAN_XUNLOCK(); 1846 1847 return (error); 1848 } 1849 1850 static void 1851 vlan_unconfig(struct ifnet *ifp) 1852 { 1853 1854 VLAN_XLOCK(); 1855 vlan_unconfig_locked(ifp, 0); 1856 VLAN_XUNLOCK(); 1857 } 1858 1859 static void 1860 vlan_unconfig_locked(struct ifnet *ifp, int departing) 1861 { 1862 struct ifvlantrunk *trunk; 1863 struct vlan_mc_entry *mc; 1864 struct ifvlan *ifv; 1865 struct ifnet *parent; 1866 int error; 1867 1868 VLAN_XLOCK_ASSERT(); 1869 1870 ifv = ifp->if_softc; 1871 trunk = ifv->ifv_trunk; 1872 parent = NULL; 1873 1874 if (trunk != NULL) { 1875 parent = trunk->parent; 1876 1877 /* 1878 * Since the interface is being unconfigured, we need to 1879 * empty the list of multicast groups that we may have joined 1880 * while we were alive from the parent's list. 1881 */ 1882 while ((mc = CK_SLIST_FIRST(&ifv->vlan_mc_listhead)) != NULL) { 1883 /* 1884 * If the parent interface is being detached, 1885 * all its multicast addresses have already 1886 * been removed. Warn about errors if 1887 * if_delmulti() does fail, but don't abort as 1888 * all callers expect vlan destruction to 1889 * succeed. 1890 */ 1891 if (!departing) { 1892 error = if_delmulti(parent, 1893 (struct sockaddr *)&mc->mc_addr); 1894 if (error) 1895 if_printf(ifp, 1896 "Failed to delete multicast address from parent: %d\n", 1897 error); 1898 } 1899 CK_SLIST_REMOVE_HEAD(&ifv->vlan_mc_listhead, mc_entries); 1900 NET_EPOCH_CALL(vlan_mc_free, &mc->mc_epoch_ctx); 1901 } 1902 1903 vlan_setflags(ifp, 0); /* clear special flags on parent */ 1904 1905 vlan_remhash(trunk, ifv); 1906 ifv->ifv_trunk = NULL; 1907 1908 /* 1909 * Check if we were the last. 1910 */ 1911 if (trunk->refcnt == 0) { 1912 parent->if_vlantrunk = NULL; 1913 NET_EPOCH_WAIT(); 1914 trunk_destroy(trunk); 1915 } 1916 } 1917 1918 /* Disconnect from parent. */ 1919 if (ifv->ifv_pflags) 1920 if_printf(ifp, "%s: ifv_pflags unclean\n", __func__); 1921 ifp->if_mtu = ETHERMTU; 1922 ifp->if_link_state = LINK_STATE_UNKNOWN; 1923 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1924 1925 /* 1926 * Only dispatch an event if vlan was 1927 * attached, otherwise there is nothing 1928 * to cleanup anyway. 1929 */ 1930 if (parent != NULL) 1931 EVENTHANDLER_INVOKE(vlan_unconfig, parent, ifv->ifv_vid); 1932 } 1933 1934 /* Handle a reference counted flag that should be set on the parent as well */ 1935 static int 1936 vlan_setflag(struct ifnet *ifp, int flag, int status, 1937 int (*func)(struct ifnet *, int)) 1938 { 1939 struct ifvlan *ifv; 1940 int error; 1941 1942 VLAN_SXLOCK_ASSERT(); 1943 1944 ifv = ifp->if_softc; 1945 status = status ? (ifp->if_flags & flag) : 0; 1946 /* Now "status" contains the flag value or 0 */ 1947 1948 /* 1949 * See if recorded parent's status is different from what 1950 * we want it to be. If it is, flip it. We record parent's 1951 * status in ifv_pflags so that we won't clear parent's flag 1952 * we haven't set. In fact, we don't clear or set parent's 1953 * flags directly, but get or release references to them. 1954 * That's why we can be sure that recorded flags still are 1955 * in accord with actual parent's flags. 1956 */ 1957 if (status != (ifv->ifv_pflags & flag)) { 1958 error = (*func)(PARENT(ifv), status); 1959 if (error) 1960 return (error); 1961 ifv->ifv_pflags &= ~flag; 1962 ifv->ifv_pflags |= status; 1963 } 1964 return (0); 1965 } 1966 1967 /* 1968 * Handle IFF_* flags that require certain changes on the parent: 1969 * if "status" is true, update parent's flags respective to our if_flags; 1970 * if "status" is false, forcedly clear the flags set on parent. 1971 */ 1972 static int 1973 vlan_setflags(struct ifnet *ifp, int status) 1974 { 1975 int error, i; 1976 1977 for (i = 0; vlan_pflags[i].flag; i++) { 1978 error = vlan_setflag(ifp, vlan_pflags[i].flag, 1979 status, vlan_pflags[i].func); 1980 if (error) 1981 return (error); 1982 } 1983 return (0); 1984 } 1985 1986 /* Inform all vlans that their parent has changed link state */ 1987 static void 1988 vlan_link_state(struct ifnet *ifp) 1989 { 1990 struct epoch_tracker et; 1991 struct ifvlantrunk *trunk; 1992 struct ifvlan *ifv; 1993 1994 NET_EPOCH_ENTER(et); 1995 trunk = ifp->if_vlantrunk; 1996 if (trunk == NULL) { 1997 NET_EPOCH_EXIT(et); 1998 return; 1999 } 2000 2001 TRUNK_WLOCK(trunk); 2002 VLAN_FOREACH(ifv, trunk) { 2003 ifv->ifv_ifp->if_baudrate = trunk->parent->if_baudrate; 2004 if_link_state_change(ifv->ifv_ifp, 2005 trunk->parent->if_link_state); 2006 } 2007 TRUNK_WUNLOCK(trunk); 2008 NET_EPOCH_EXIT(et); 2009 } 2010 2011 static void 2012 vlan_capabilities(struct ifvlan *ifv) 2013 { 2014 struct ifnet *p; 2015 struct ifnet *ifp; 2016 struct ifnet_hw_tsomax hw_tsomax; 2017 int cap = 0, ena = 0, mena; 2018 u_long hwa = 0; 2019 2020 NET_EPOCH_ASSERT(); 2021 VLAN_SXLOCK_ASSERT(); 2022 2023 p = PARENT(ifv); 2024 ifp = ifv->ifv_ifp; 2025 2026 /* Mask parent interface enabled capabilities disabled by user. */ 2027 mena = p->if_capenable & ifv->ifv_capenable; 2028 2029 /* 2030 * If the parent interface can do checksum offloading 2031 * on VLANs, then propagate its hardware-assisted 2032 * checksumming flags. Also assert that checksum 2033 * offloading requires hardware VLAN tagging. 2034 */ 2035 if (p->if_capabilities & IFCAP_VLAN_HWCSUM) 2036 cap |= p->if_capabilities & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6); 2037 if (p->if_capenable & IFCAP_VLAN_HWCSUM && 2038 p->if_capenable & IFCAP_VLAN_HWTAGGING) { 2039 ena |= mena & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6); 2040 if (ena & IFCAP_TXCSUM) 2041 hwa |= p->if_hwassist & (CSUM_IP | CSUM_TCP | 2042 CSUM_UDP | CSUM_SCTP); 2043 if (ena & IFCAP_TXCSUM_IPV6) 2044 hwa |= p->if_hwassist & (CSUM_TCP_IPV6 | 2045 CSUM_UDP_IPV6 | CSUM_SCTP_IPV6); 2046 } 2047 2048 /* 2049 * If the parent interface can do TSO on VLANs then 2050 * propagate the hardware-assisted flag. TSO on VLANs 2051 * does not necessarily require hardware VLAN tagging. 2052 */ 2053 memset(&hw_tsomax, 0, sizeof(hw_tsomax)); 2054 if_hw_tsomax_common(p, &hw_tsomax); 2055 if_hw_tsomax_update(ifp, &hw_tsomax); 2056 if (p->if_capabilities & IFCAP_VLAN_HWTSO) 2057 cap |= p->if_capabilities & IFCAP_TSO; 2058 if (p->if_capenable & IFCAP_VLAN_HWTSO) { 2059 ena |= mena & IFCAP_TSO; 2060 if (ena & IFCAP_TSO) 2061 hwa |= p->if_hwassist & CSUM_TSO; 2062 } 2063 2064 /* 2065 * If the parent interface can do LRO and checksum offloading on 2066 * VLANs, then guess it may do LRO on VLANs. False positive here 2067 * cost nothing, while false negative may lead to some confusions. 2068 */ 2069 if (p->if_capabilities & IFCAP_VLAN_HWCSUM) 2070 cap |= p->if_capabilities & IFCAP_LRO; 2071 if (p->if_capenable & IFCAP_VLAN_HWCSUM) 2072 ena |= mena & IFCAP_LRO; 2073 2074 /* 2075 * If the parent interface can offload TCP connections over VLANs then 2076 * propagate its TOE capability to the VLAN interface. 2077 * 2078 * All TOE drivers in the tree today can deal with VLANs. If this 2079 * changes then IFCAP_VLAN_TOE should be promoted to a full capability 2080 * with its own bit. 2081 */ 2082 #define IFCAP_VLAN_TOE IFCAP_TOE 2083 if (p->if_capabilities & IFCAP_VLAN_TOE) 2084 cap |= p->if_capabilities & IFCAP_TOE; 2085 if (p->if_capenable & IFCAP_VLAN_TOE) { 2086 SETTOEDEV(ifp, TOEDEV(p)); 2087 ena |= mena & IFCAP_TOE; 2088 } 2089 2090 /* 2091 * If the parent interface supports dynamic link state, so does the 2092 * VLAN interface. 2093 */ 2094 cap |= (p->if_capabilities & IFCAP_LINKSTATE); 2095 ena |= (mena & IFCAP_LINKSTATE); 2096 2097 #ifdef RATELIMIT 2098 /* 2099 * If the parent interface supports ratelimiting, so does the 2100 * VLAN interface. 2101 */ 2102 cap |= (p->if_capabilities & IFCAP_TXRTLMT); 2103 ena |= (mena & IFCAP_TXRTLMT); 2104 #endif 2105 2106 /* 2107 * If the parent interface supports unmapped mbufs, so does 2108 * the VLAN interface. Note that this should be fine even for 2109 * interfaces that don't support hardware tagging as headers 2110 * are prepended in normal mbufs to unmapped mbufs holding 2111 * payload data. 2112 */ 2113 cap |= (p->if_capabilities & IFCAP_MEXTPG); 2114 ena |= (mena & IFCAP_MEXTPG); 2115 2116 /* 2117 * If the parent interface can offload encryption and segmentation 2118 * of TLS records over TCP, propagate it's capability to the VLAN 2119 * interface. 2120 * 2121 * All TLS drivers in the tree today can deal with VLANs. If 2122 * this ever changes, then a new IFCAP_VLAN_TXTLS can be 2123 * defined. 2124 */ 2125 if (p->if_capabilities & (IFCAP_TXTLS | IFCAP_TXTLS_RTLMT)) 2126 cap |= p->if_capabilities & (IFCAP_TXTLS | IFCAP_TXTLS_RTLMT); 2127 if (p->if_capenable & (IFCAP_TXTLS | IFCAP_TXTLS_RTLMT)) 2128 ena |= mena & (IFCAP_TXTLS | IFCAP_TXTLS_RTLMT); 2129 2130 ifp->if_capabilities = cap; 2131 ifp->if_capenable = ena; 2132 ifp->if_hwassist = hwa; 2133 } 2134 2135 static void 2136 vlan_trunk_capabilities(struct ifnet *ifp) 2137 { 2138 struct epoch_tracker et; 2139 struct ifvlantrunk *trunk; 2140 struct ifvlan *ifv; 2141 2142 VLAN_SLOCK(); 2143 trunk = ifp->if_vlantrunk; 2144 if (trunk == NULL) { 2145 VLAN_SUNLOCK(); 2146 return; 2147 } 2148 NET_EPOCH_ENTER(et); 2149 VLAN_FOREACH(ifv, trunk) 2150 vlan_capabilities(ifv); 2151 NET_EPOCH_EXIT(et); 2152 VLAN_SUNLOCK(); 2153 } 2154 2155 static int 2156 vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 2157 { 2158 struct ifnet *p; 2159 struct ifreq *ifr; 2160 #ifdef INET 2161 struct ifaddr *ifa; 2162 #endif 2163 struct ifvlan *ifv; 2164 struct ifvlantrunk *trunk; 2165 struct vlanreq vlr; 2166 int error = 0, oldmtu; 2167 2168 ifr = (struct ifreq *)data; 2169 #ifdef INET 2170 ifa = (struct ifaddr *) data; 2171 #endif 2172 ifv = ifp->if_softc; 2173 2174 switch (cmd) { 2175 case SIOCSIFADDR: 2176 ifp->if_flags |= IFF_UP; 2177 #ifdef INET 2178 if (ifa->ifa_addr->sa_family == AF_INET) 2179 arp_ifinit(ifp, ifa); 2180 #endif 2181 break; 2182 case SIOCGIFADDR: 2183 bcopy(IF_LLADDR(ifp), &ifr->ifr_addr.sa_data[0], 2184 ifp->if_addrlen); 2185 break; 2186 case SIOCGIFMEDIA: 2187 VLAN_SLOCK(); 2188 if (TRUNK(ifv) != NULL) { 2189 p = PARENT(ifv); 2190 if_ref(p); 2191 error = (*p->if_ioctl)(p, SIOCGIFMEDIA, data); 2192 if_rele(p); 2193 /* Limit the result to the parent's current config. */ 2194 if (error == 0) { 2195 struct ifmediareq *ifmr; 2196 2197 ifmr = (struct ifmediareq *)data; 2198 if (ifmr->ifm_count >= 1 && ifmr->ifm_ulist) { 2199 ifmr->ifm_count = 1; 2200 error = copyout(&ifmr->ifm_current, 2201 ifmr->ifm_ulist, 2202 sizeof(int)); 2203 } 2204 } 2205 } else { 2206 error = EINVAL; 2207 } 2208 VLAN_SUNLOCK(); 2209 break; 2210 2211 case SIOCSIFMEDIA: 2212 error = EINVAL; 2213 break; 2214 2215 case SIOCSIFMTU: 2216 /* 2217 * Set the interface MTU. 2218 */ 2219 VLAN_SLOCK(); 2220 trunk = TRUNK(ifv); 2221 if (trunk != NULL) { 2222 TRUNK_WLOCK(trunk); 2223 if (ifr->ifr_mtu > 2224 (PARENT(ifv)->if_mtu - ifv->ifv_mtufudge) || 2225 ifr->ifr_mtu < 2226 (ifv->ifv_mintu - ifv->ifv_mtufudge)) 2227 error = EINVAL; 2228 else 2229 ifp->if_mtu = ifr->ifr_mtu; 2230 TRUNK_WUNLOCK(trunk); 2231 } else 2232 error = EINVAL; 2233 VLAN_SUNLOCK(); 2234 break; 2235 2236 case SIOCSETVLAN: 2237 #ifdef VIMAGE 2238 /* 2239 * XXXRW/XXXBZ: The goal in these checks is to allow a VLAN 2240 * interface to be delegated to a jail without allowing the 2241 * jail to change what underlying interface/VID it is 2242 * associated with. We are not entirely convinced that this 2243 * is the right way to accomplish that policy goal. 2244 */ 2245 if (ifp->if_vnet != ifp->if_home_vnet) { 2246 error = EPERM; 2247 break; 2248 } 2249 #endif 2250 error = copyin(ifr_data_get_ptr(ifr), &vlr, sizeof(vlr)); 2251 if (error) 2252 break; 2253 if (vlr.vlr_parent[0] == '\0') { 2254 vlan_unconfig(ifp); 2255 break; 2256 } 2257 p = ifunit_ref(vlr.vlr_parent); 2258 if (p == NULL) { 2259 error = ENOENT; 2260 break; 2261 } 2262 if (vlr.vlr_proto == 0) 2263 vlr.vlr_proto = ETHERTYPE_VLAN; 2264 oldmtu = ifp->if_mtu; 2265 error = vlan_config(ifv, p, vlr.vlr_tag, vlr.vlr_proto); 2266 if_rele(p); 2267 2268 /* 2269 * VLAN MTU may change during addition of the vlandev. 2270 * If it did, do network layer specific procedure. 2271 */ 2272 if (ifp->if_mtu != oldmtu) 2273 if_notifymtu(ifp); 2274 break; 2275 2276 case SIOCGETVLAN: 2277 #ifdef VIMAGE 2278 if (ifp->if_vnet != ifp->if_home_vnet) { 2279 error = EPERM; 2280 break; 2281 } 2282 #endif 2283 bzero(&vlr, sizeof(vlr)); 2284 VLAN_SLOCK(); 2285 if (TRUNK(ifv) != NULL) { 2286 strlcpy(vlr.vlr_parent, PARENT(ifv)->if_xname, 2287 sizeof(vlr.vlr_parent)); 2288 vlr.vlr_tag = ifv->ifv_vid; 2289 vlr.vlr_proto = ifv->ifv_proto; 2290 } 2291 VLAN_SUNLOCK(); 2292 error = copyout(&vlr, ifr_data_get_ptr(ifr), sizeof(vlr)); 2293 break; 2294 2295 case SIOCSIFFLAGS: 2296 /* 2297 * We should propagate selected flags to the parent, 2298 * e.g., promiscuous mode. 2299 */ 2300 VLAN_SLOCK(); 2301 if (TRUNK(ifv) != NULL) 2302 error = vlan_setflags(ifp, 1); 2303 VLAN_SUNLOCK(); 2304 break; 2305 2306 case SIOCADDMULTI: 2307 case SIOCDELMULTI: 2308 /* 2309 * If we don't have a parent, just remember the membership for 2310 * when we do. 2311 * 2312 * XXX We need the rmlock here to avoid sleeping while 2313 * holding in6_multi_mtx. 2314 */ 2315 VLAN_XLOCK(); 2316 trunk = TRUNK(ifv); 2317 if (trunk != NULL) 2318 error = vlan_setmulti(ifp); 2319 VLAN_XUNLOCK(); 2320 2321 break; 2322 case SIOCGVLANPCP: 2323 #ifdef VIMAGE 2324 if (ifp->if_vnet != ifp->if_home_vnet) { 2325 error = EPERM; 2326 break; 2327 } 2328 #endif 2329 ifr->ifr_vlan_pcp = ifv->ifv_pcp; 2330 break; 2331 2332 case SIOCSVLANPCP: 2333 #ifdef VIMAGE 2334 if (ifp->if_vnet != ifp->if_home_vnet) { 2335 error = EPERM; 2336 break; 2337 } 2338 #endif 2339 error = priv_check(curthread, PRIV_NET_SETVLANPCP); 2340 if (error) 2341 break; 2342 if (ifr->ifr_vlan_pcp > VLAN_PCP_MAX) { 2343 error = EINVAL; 2344 break; 2345 } 2346 ifv->ifv_pcp = ifr->ifr_vlan_pcp; 2347 ifp->if_pcp = ifv->ifv_pcp; 2348 /* broadcast event about PCP change */ 2349 EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_PCP); 2350 break; 2351 2352 case SIOCSIFCAP: 2353 VLAN_SLOCK(); 2354 ifv->ifv_capenable = ifr->ifr_reqcap; 2355 trunk = TRUNK(ifv); 2356 if (trunk != NULL) { 2357 struct epoch_tracker et; 2358 2359 NET_EPOCH_ENTER(et); 2360 vlan_capabilities(ifv); 2361 NET_EPOCH_EXIT(et); 2362 } 2363 VLAN_SUNLOCK(); 2364 break; 2365 2366 default: 2367 error = EINVAL; 2368 break; 2369 } 2370 2371 return (error); 2372 } 2373 2374 #if defined(KERN_TLS) || defined(RATELIMIT) 2375 static int 2376 vlan_snd_tag_alloc(struct ifnet *ifp, 2377 union if_snd_tag_alloc_params *params, 2378 struct m_snd_tag **ppmt) 2379 { 2380 struct epoch_tracker et; 2381 const struct if_snd_tag_sw *sw; 2382 struct vlan_snd_tag *vst; 2383 struct ifvlan *ifv; 2384 struct ifnet *parent; 2385 struct m_snd_tag *mst; 2386 int error; 2387 2388 NET_EPOCH_ENTER(et); 2389 ifv = ifp->if_softc; 2390 2391 switch (params->hdr.type) { 2392 #ifdef RATELIMIT 2393 case IF_SND_TAG_TYPE_UNLIMITED: 2394 sw = &vlan_snd_tag_ul_sw; 2395 break; 2396 case IF_SND_TAG_TYPE_RATE_LIMIT: 2397 sw = &vlan_snd_tag_rl_sw; 2398 break; 2399 #endif 2400 #ifdef KERN_TLS 2401 case IF_SND_TAG_TYPE_TLS: 2402 sw = &vlan_snd_tag_tls_sw; 2403 break; 2404 case IF_SND_TAG_TYPE_TLS_RX: 2405 sw = NULL; 2406 if (params->tls_rx.vlan_id != 0) 2407 goto failure; 2408 params->tls_rx.vlan_id = ifv->ifv_vid; 2409 break; 2410 #ifdef RATELIMIT 2411 case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: 2412 sw = &vlan_snd_tag_tls_rl_sw; 2413 break; 2414 #endif 2415 #endif 2416 default: 2417 goto failure; 2418 } 2419 2420 if (ifv->ifv_trunk != NULL) 2421 parent = PARENT(ifv); 2422 else 2423 parent = NULL; 2424 if (parent == NULL) 2425 goto failure; 2426 if_ref(parent); 2427 NET_EPOCH_EXIT(et); 2428 2429 if (sw != NULL) { 2430 vst = malloc(sizeof(*vst), M_VLAN, M_NOWAIT); 2431 if (vst == NULL) { 2432 if_rele(parent); 2433 return (ENOMEM); 2434 } 2435 } else 2436 vst = NULL; 2437 2438 error = m_snd_tag_alloc(parent, params, &mst); 2439 if_rele(parent); 2440 if (error) { 2441 free(vst, M_VLAN); 2442 return (error); 2443 } 2444 2445 if (sw != NULL) { 2446 m_snd_tag_init(&vst->com, ifp, sw); 2447 vst->tag = mst; 2448 2449 *ppmt = &vst->com; 2450 } else 2451 *ppmt = mst; 2452 2453 return (0); 2454 failure: 2455 NET_EPOCH_EXIT(et); 2456 return (EOPNOTSUPP); 2457 } 2458 2459 static struct m_snd_tag * 2460 vlan_next_snd_tag(struct m_snd_tag *mst) 2461 { 2462 struct vlan_snd_tag *vst; 2463 2464 vst = mst_to_vst(mst); 2465 return (vst->tag); 2466 } 2467 2468 static int 2469 vlan_snd_tag_modify(struct m_snd_tag *mst, 2470 union if_snd_tag_modify_params *params) 2471 { 2472 struct vlan_snd_tag *vst; 2473 2474 vst = mst_to_vst(mst); 2475 return (vst->tag->sw->snd_tag_modify(vst->tag, params)); 2476 } 2477 2478 static int 2479 vlan_snd_tag_query(struct m_snd_tag *mst, 2480 union if_snd_tag_query_params *params) 2481 { 2482 struct vlan_snd_tag *vst; 2483 2484 vst = mst_to_vst(mst); 2485 return (vst->tag->sw->snd_tag_query(vst->tag, params)); 2486 } 2487 2488 static void 2489 vlan_snd_tag_free(struct m_snd_tag *mst) 2490 { 2491 struct vlan_snd_tag *vst; 2492 2493 vst = mst_to_vst(mst); 2494 m_snd_tag_rele(vst->tag); 2495 free(vst, M_VLAN); 2496 } 2497 2498 static void 2499 vlan_ratelimit_query(struct ifnet *ifp __unused, struct if_ratelimit_query_results *q) 2500 { 2501 /* 2502 * For vlan, we have an indirect 2503 * interface. The caller needs to 2504 * get a ratelimit tag on the actual 2505 * interface the flow will go on. 2506 */ 2507 q->rate_table = NULL; 2508 q->flags = RT_IS_INDIRECT; 2509 q->max_flows = 0; 2510 q->number_of_rates = 0; 2511 } 2512 2513 #endif 2514