xref: /freebsd/sys/net/if_vlan.c (revision 57718be8fa0bd5edc11ab9a72e68cc71982939a6)
1 /*-
2  * Copyright 1998 Massachusetts Institute of Technology
3  *
4  * Permission to use, copy, modify, and distribute this software and
5  * its documentation for any purpose and without fee is hereby
6  * granted, provided that both the above copyright notice and this
7  * permission notice appear in all copies, that both the above
8  * copyright notice and this permission notice appear in all
9  * supporting documentation, and that the name of M.I.T. not be used
10  * in advertising or publicity pertaining to distribution of the
11  * software without specific, written prior permission.  M.I.T. makes
12  * no representations about the suitability of this software for any
13  * purpose.  It is provided "as is" without express or implied
14  * warranty.
15  *
16  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
17  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
18  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
20  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
26  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 /*
31  * if_vlan.c - pseudo-device driver for IEEE 802.1Q virtual LANs.
32  * Might be extended some day to also handle IEEE 802.1p priority
33  * tagging.  This is sort of sneaky in the implementation, since
34  * we need to pretend to be enough of an Ethernet implementation
35  * to make arp work.  The way we do this is by telling everyone
36  * that we are an Ethernet, and then catch the packets that
37  * ether_output() sends to us via if_transmit(), rewrite them for
38  * use by the real outgoing interface, and ask it to send them.
39  */
40 
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
43 
44 #include "opt_inet.h"
45 #include "opt_vlan.h"
46 
47 #include <sys/param.h>
48 #include <sys/eventhandler.h>
49 #include <sys/kernel.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/rmlock.h>
55 #include <sys/queue.h>
56 #include <sys/socket.h>
57 #include <sys/sockio.h>
58 #include <sys/sysctl.h>
59 #include <sys/systm.h>
60 #include <sys/sx.h>
61 
62 #include <net/bpf.h>
63 #include <net/ethernet.h>
64 #include <net/if.h>
65 #include <net/if_var.h>
66 #include <net/if_clone.h>
67 #include <net/if_dl.h>
68 #include <net/if_types.h>
69 #include <net/if_vlan_var.h>
70 #include <net/vnet.h>
71 
72 #ifdef INET
73 #include <netinet/in.h>
74 #include <netinet/if_ether.h>
75 #endif
76 
77 #define	VLAN_DEF_HWIDTH	4
78 #define	VLAN_IFFLAGS	(IFF_BROADCAST | IFF_MULTICAST)
79 
80 #define	UP_AND_RUNNING(ifp) \
81     ((ifp)->if_flags & IFF_UP && (ifp)->if_drv_flags & IFF_DRV_RUNNING)
82 
83 LIST_HEAD(ifvlanhead, ifvlan);
84 
85 struct ifvlantrunk {
86 	struct	ifnet   *parent;	/* parent interface of this trunk */
87 	struct	rmlock	lock;
88 #ifdef VLAN_ARRAY
89 #define	VLAN_ARRAY_SIZE	(EVL_VLID_MASK + 1)
90 	struct	ifvlan	*vlans[VLAN_ARRAY_SIZE]; /* static table */
91 #else
92 	struct	ifvlanhead *hash;	/* dynamic hash-list table */
93 	uint16_t	hmask;
94 	uint16_t	hwidth;
95 #endif
96 	int		refcnt;
97 };
98 
99 struct vlan_mc_entry {
100 	struct sockaddr_dl		mc_addr;
101 	SLIST_ENTRY(vlan_mc_entry)	mc_entries;
102 };
103 
104 struct	ifvlan {
105 	struct	ifvlantrunk *ifv_trunk;
106 	struct	ifnet *ifv_ifp;
107 	counter_u64_t	ifv_ipackets;
108 	counter_u64_t	ifv_ibytes;
109 	counter_u64_t	ifv_opackets;
110 	counter_u64_t	ifv_obytes;
111 	counter_u64_t	ifv_omcasts;
112 	counter_u64_t	ifv_oerrors;
113 #define	TRUNK(ifv)	((ifv)->ifv_trunk)
114 #define	PARENT(ifv)	((ifv)->ifv_trunk->parent)
115 	void	*ifv_cookie;
116 	int	ifv_pflags;	/* special flags we have set on parent */
117 	struct	ifv_linkmib {
118 		int	ifvm_encaplen;	/* encapsulation length */
119 		int	ifvm_mtufudge;	/* MTU fudged by this much */
120 		int	ifvm_mintu;	/* min transmission unit */
121 		uint16_t ifvm_proto;	/* encapsulation ethertype */
122 		uint16_t ifvm_tag;	/* tag to apply on packets leaving if */
123 	}	ifv_mib;
124 	SLIST_HEAD(, vlan_mc_entry) vlan_mc_listhead;
125 #ifndef VLAN_ARRAY
126 	LIST_ENTRY(ifvlan) ifv_list;
127 #endif
128 };
129 #define	ifv_proto	ifv_mib.ifvm_proto
130 #define	ifv_vid		ifv_mib.ifvm_tag
131 #define	ifv_encaplen	ifv_mib.ifvm_encaplen
132 #define	ifv_mtufudge	ifv_mib.ifvm_mtufudge
133 #define	ifv_mintu	ifv_mib.ifvm_mintu
134 
135 /* Special flags we should propagate to parent. */
136 static struct {
137 	int flag;
138 	int (*func)(struct ifnet *, int);
139 } vlan_pflags[] = {
140 	{IFF_PROMISC, ifpromisc},
141 	{IFF_ALLMULTI, if_allmulti},
142 	{0, NULL}
143 };
144 
145 SYSCTL_DECL(_net_link);
146 static SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW, 0,
147     "IEEE 802.1Q VLAN");
148 static SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW, 0,
149     "for consistency");
150 
151 static VNET_DEFINE(int, soft_pad);
152 #define	V_soft_pad	VNET(soft_pad)
153 SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW | CTLFLAG_VNET,
154     &VNET_NAME(soft_pad), 0, "pad short frames before tagging");
155 
156 static const char vlanname[] = "vlan";
157 static MALLOC_DEFINE(M_VLAN, vlanname, "802.1Q Virtual LAN Interface");
158 
159 static eventhandler_tag ifdetach_tag;
160 static eventhandler_tag iflladdr_tag;
161 
162 /*
163  * We have a global mutex, that is used to serialize configuration
164  * changes and isn't used in normal packet delivery.
165  *
166  * We also have a per-trunk rwlock, that is locked shared on packet
167  * processing and exclusive when configuration is changed.
168  *
169  * The VLAN_ARRAY substitutes the dynamic hash with a static array
170  * with 4096 entries. In theory this can give a boost in processing,
171  * however on practice it does not. Probably this is because array
172  * is too big to fit into CPU cache.
173  */
174 static struct sx ifv_lock;
175 #define	VLAN_LOCK_INIT()	sx_init(&ifv_lock, "vlan_global")
176 #define	VLAN_LOCK_DESTROY()	sx_destroy(&ifv_lock)
177 #define	VLAN_LOCK_ASSERT()	sx_assert(&ifv_lock, SA_LOCKED)
178 #define	VLAN_LOCK()		sx_xlock(&ifv_lock)
179 #define	VLAN_UNLOCK()		sx_xunlock(&ifv_lock)
180 #define	TRUNK_LOCK_INIT(trunk)	rm_init(&(trunk)->lock, vlanname)
181 #define	TRUNK_LOCK_DESTROY(trunk) rm_destroy(&(trunk)->lock)
182 #define	TRUNK_LOCK(trunk)	rm_wlock(&(trunk)->lock)
183 #define	TRUNK_UNLOCK(trunk)	rm_wunlock(&(trunk)->lock)
184 #define	TRUNK_LOCK_ASSERT(trunk) rm_assert(&(trunk)->lock, RA_WLOCKED)
185 #define	TRUNK_RLOCK(trunk)	rm_rlock(&(trunk)->lock, &tracker)
186 #define	TRUNK_RUNLOCK(trunk)	rm_runlock(&(trunk)->lock, &tracker)
187 #define	TRUNK_LOCK_RASSERT(trunk) rm_assert(&(trunk)->lock, RA_RLOCKED)
188 #define	TRUNK_LOCK_READER	struct rm_priotracker tracker
189 
190 #ifndef VLAN_ARRAY
191 static	void vlan_inithash(struct ifvlantrunk *trunk);
192 static	void vlan_freehash(struct ifvlantrunk *trunk);
193 static	int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv);
194 static	int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv);
195 static	void vlan_growhash(struct ifvlantrunk *trunk, int howmuch);
196 static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk,
197 	uint16_t vid);
198 #endif
199 static	void trunk_destroy(struct ifvlantrunk *trunk);
200 
201 static	void vlan_init(void *foo);
202 static	void vlan_input(struct ifnet *ifp, struct mbuf *m);
203 static	int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr);
204 static	void vlan_qflush(struct ifnet *ifp);
205 static uint64_t vlan_get_counter(struct ifnet *ifp, ift_counter cnt);
206 static	int vlan_setflag(struct ifnet *ifp, int flag, int status,
207     int (*func)(struct ifnet *, int));
208 static	int vlan_setflags(struct ifnet *ifp, int status);
209 static	int vlan_setmulti(struct ifnet *ifp);
210 static	int vlan_transmit(struct ifnet *ifp, struct mbuf *m);
211 static	void vlan_unconfig(struct ifnet *ifp);
212 static	void vlan_unconfig_locked(struct ifnet *ifp, int departing);
213 static	int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag);
214 static	void vlan_link_state(struct ifnet *ifp);
215 static	void vlan_capabilities(struct ifvlan *ifv);
216 static	void vlan_trunk_capabilities(struct ifnet *ifp);
217 
218 static	struct ifnet *vlan_clone_match_ethervid(struct if_clone *,
219     const char *, int *);
220 static	int vlan_clone_match(struct if_clone *, const char *);
221 static	int vlan_clone_create(struct if_clone *, char *, size_t, caddr_t);
222 static	int vlan_clone_destroy(struct if_clone *, struct ifnet *);
223 
224 static	void vlan_ifdetach(void *arg, struct ifnet *ifp);
225 static  void vlan_iflladdr(void *arg, struct ifnet *ifp);
226 
227 static struct if_clone *vlan_cloner;
228 
229 #ifdef VIMAGE
230 static VNET_DEFINE(struct if_clone *, vlan_cloner);
231 #define	V_vlan_cloner	VNET(vlan_cloner)
232 #endif
233 
234 #ifndef VLAN_ARRAY
235 #define HASH(n, m)	((((n) >> 8) ^ ((n) >> 4) ^ (n)) & (m))
236 
237 static void
238 vlan_inithash(struct ifvlantrunk *trunk)
239 {
240 	int i, n;
241 
242 	/*
243 	 * The trunk must not be locked here since we call malloc(M_WAITOK).
244 	 * It is OK in case this function is called before the trunk struct
245 	 * gets hooked up and becomes visible from other threads.
246 	 */
247 
248 	KASSERT(trunk->hwidth == 0 && trunk->hash == NULL,
249 	    ("%s: hash already initialized", __func__));
250 
251 	trunk->hwidth = VLAN_DEF_HWIDTH;
252 	n = 1 << trunk->hwidth;
253 	trunk->hmask = n - 1;
254 	trunk->hash = malloc(sizeof(struct ifvlanhead) * n, M_VLAN, M_WAITOK);
255 	for (i = 0; i < n; i++)
256 		LIST_INIT(&trunk->hash[i]);
257 }
258 
259 static void
260 vlan_freehash(struct ifvlantrunk *trunk)
261 {
262 #ifdef INVARIANTS
263 	int i;
264 
265 	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
266 	for (i = 0; i < (1 << trunk->hwidth); i++)
267 		KASSERT(LIST_EMPTY(&trunk->hash[i]),
268 		    ("%s: hash table not empty", __func__));
269 #endif
270 	free(trunk->hash, M_VLAN);
271 	trunk->hash = NULL;
272 	trunk->hwidth = trunk->hmask = 0;
273 }
274 
275 static int
276 vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
277 {
278 	int i, b;
279 	struct ifvlan *ifv2;
280 
281 	TRUNK_LOCK_ASSERT(trunk);
282 	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
283 
284 	b = 1 << trunk->hwidth;
285 	i = HASH(ifv->ifv_vid, trunk->hmask);
286 	LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
287 		if (ifv->ifv_vid == ifv2->ifv_vid)
288 			return (EEXIST);
289 
290 	/*
291 	 * Grow the hash when the number of vlans exceeds half of the number of
292 	 * hash buckets squared. This will make the average linked-list length
293 	 * buckets/2.
294 	 */
295 	if (trunk->refcnt > (b * b) / 2) {
296 		vlan_growhash(trunk, 1);
297 		i = HASH(ifv->ifv_vid, trunk->hmask);
298 	}
299 	LIST_INSERT_HEAD(&trunk->hash[i], ifv, ifv_list);
300 	trunk->refcnt++;
301 
302 	return (0);
303 }
304 
305 static int
306 vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
307 {
308 	int i, b;
309 	struct ifvlan *ifv2;
310 
311 	TRUNK_LOCK_ASSERT(trunk);
312 	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
313 
314 	b = 1 << trunk->hwidth;
315 	i = HASH(ifv->ifv_vid, trunk->hmask);
316 	LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
317 		if (ifv2 == ifv) {
318 			trunk->refcnt--;
319 			LIST_REMOVE(ifv2, ifv_list);
320 			if (trunk->refcnt < (b * b) / 2)
321 				vlan_growhash(trunk, -1);
322 			return (0);
323 		}
324 
325 	panic("%s: vlan not found\n", __func__);
326 	return (ENOENT); /*NOTREACHED*/
327 }
328 
329 /*
330  * Grow the hash larger or smaller if memory permits.
331  */
332 static void
333 vlan_growhash(struct ifvlantrunk *trunk, int howmuch)
334 {
335 	struct ifvlan *ifv;
336 	struct ifvlanhead *hash2;
337 	int hwidth2, i, j, n, n2;
338 
339 	TRUNK_LOCK_ASSERT(trunk);
340 	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
341 
342 	if (howmuch == 0) {
343 		/* Harmless yet obvious coding error */
344 		printf("%s: howmuch is 0\n", __func__);
345 		return;
346 	}
347 
348 	hwidth2 = trunk->hwidth + howmuch;
349 	n = 1 << trunk->hwidth;
350 	n2 = 1 << hwidth2;
351 	/* Do not shrink the table below the default */
352 	if (hwidth2 < VLAN_DEF_HWIDTH)
353 		return;
354 
355 	/* M_NOWAIT because we're called with trunk mutex held */
356 	hash2 = malloc(sizeof(struct ifvlanhead) * n2, M_VLAN, M_NOWAIT);
357 	if (hash2 == NULL) {
358 		printf("%s: out of memory -- hash size not changed\n",
359 		    __func__);
360 		return;		/* We can live with the old hash table */
361 	}
362 	for (j = 0; j < n2; j++)
363 		LIST_INIT(&hash2[j]);
364 	for (i = 0; i < n; i++)
365 		while ((ifv = LIST_FIRST(&trunk->hash[i])) != NULL) {
366 			LIST_REMOVE(ifv, ifv_list);
367 			j = HASH(ifv->ifv_vid, n2 - 1);
368 			LIST_INSERT_HEAD(&hash2[j], ifv, ifv_list);
369 		}
370 	free(trunk->hash, M_VLAN);
371 	trunk->hash = hash2;
372 	trunk->hwidth = hwidth2;
373 	trunk->hmask = n2 - 1;
374 
375 	if (bootverbose)
376 		if_printf(trunk->parent,
377 		    "VLAN hash table resized from %d to %d buckets\n", n, n2);
378 }
379 
380 static __inline struct ifvlan *
381 vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid)
382 {
383 	struct ifvlan *ifv;
384 
385 	TRUNK_LOCK_RASSERT(trunk);
386 
387 	LIST_FOREACH(ifv, &trunk->hash[HASH(vid, trunk->hmask)], ifv_list)
388 		if (ifv->ifv_vid == vid)
389 			return (ifv);
390 	return (NULL);
391 }
392 
393 #if 0
394 /* Debugging code to view the hashtables. */
395 static void
396 vlan_dumphash(struct ifvlantrunk *trunk)
397 {
398 	int i;
399 	struct ifvlan *ifv;
400 
401 	for (i = 0; i < (1 << trunk->hwidth); i++) {
402 		printf("%d: ", i);
403 		LIST_FOREACH(ifv, &trunk->hash[i], ifv_list)
404 			printf("%s ", ifv->ifv_ifp->if_xname);
405 		printf("\n");
406 	}
407 }
408 #endif /* 0 */
409 #else
410 
411 static __inline struct ifvlan *
412 vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid)
413 {
414 
415 	return trunk->vlans[vid];
416 }
417 
418 static __inline int
419 vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
420 {
421 
422 	if (trunk->vlans[ifv->ifv_vid] != NULL)
423 		return EEXIST;
424 	trunk->vlans[ifv->ifv_vid] = ifv;
425 	trunk->refcnt++;
426 
427 	return (0);
428 }
429 
430 static __inline int
431 vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
432 {
433 
434 	trunk->vlans[ifv->ifv_vid] = NULL;
435 	trunk->refcnt--;
436 
437 	return (0);
438 }
439 
440 static __inline void
441 vlan_freehash(struct ifvlantrunk *trunk)
442 {
443 }
444 
445 static __inline void
446 vlan_inithash(struct ifvlantrunk *trunk)
447 {
448 }
449 
450 #endif /* !VLAN_ARRAY */
451 
452 static void
453 trunk_destroy(struct ifvlantrunk *trunk)
454 {
455 	VLAN_LOCK_ASSERT();
456 
457 	TRUNK_LOCK(trunk);
458 	vlan_freehash(trunk);
459 	trunk->parent->if_vlantrunk = NULL;
460 	TRUNK_UNLOCK(trunk);
461 	TRUNK_LOCK_DESTROY(trunk);
462 	free(trunk, M_VLAN);
463 }
464 
465 /*
466  * Program our multicast filter. What we're actually doing is
467  * programming the multicast filter of the parent. This has the
468  * side effect of causing the parent interface to receive multicast
469  * traffic that it doesn't really want, which ends up being discarded
470  * later by the upper protocol layers. Unfortunately, there's no way
471  * to avoid this: there really is only one physical interface.
472  */
473 static int
474 vlan_setmulti(struct ifnet *ifp)
475 {
476 	struct ifnet		*ifp_p;
477 	struct ifmultiaddr	*ifma;
478 	struct ifvlan		*sc;
479 	struct vlan_mc_entry	*mc;
480 	int			error;
481 
482 	/* Find the parent. */
483 	sc = ifp->if_softc;
484 	TRUNK_LOCK_ASSERT(TRUNK(sc));
485 	ifp_p = PARENT(sc);
486 
487 	CURVNET_SET_QUIET(ifp_p->if_vnet);
488 
489 	/* First, remove any existing filter entries. */
490 	while ((mc = SLIST_FIRST(&sc->vlan_mc_listhead)) != NULL) {
491 		SLIST_REMOVE_HEAD(&sc->vlan_mc_listhead, mc_entries);
492 		(void)if_delmulti(ifp_p, (struct sockaddr *)&mc->mc_addr);
493 		free(mc, M_VLAN);
494 	}
495 
496 	/* Now program new ones. */
497 	IF_ADDR_WLOCK(ifp);
498 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
499 		if (ifma->ifma_addr->sa_family != AF_LINK)
500 			continue;
501 		mc = malloc(sizeof(struct vlan_mc_entry), M_VLAN, M_NOWAIT);
502 		if (mc == NULL) {
503 			IF_ADDR_WUNLOCK(ifp);
504 			return (ENOMEM);
505 		}
506 		bcopy(ifma->ifma_addr, &mc->mc_addr, ifma->ifma_addr->sa_len);
507 		mc->mc_addr.sdl_index = ifp_p->if_index;
508 		SLIST_INSERT_HEAD(&sc->vlan_mc_listhead, mc, mc_entries);
509 	}
510 	IF_ADDR_WUNLOCK(ifp);
511 	SLIST_FOREACH (mc, &sc->vlan_mc_listhead, mc_entries) {
512 		error = if_addmulti(ifp_p, (struct sockaddr *)&mc->mc_addr,
513 		    NULL);
514 		if (error)
515 			return (error);
516 	}
517 
518 	CURVNET_RESTORE();
519 	return (0);
520 }
521 
522 /*
523  * A handler for parent interface link layer address changes.
524  * If the parent interface link layer address is changed we
525  * should also change it on all children vlans.
526  */
527 static void
528 vlan_iflladdr(void *arg __unused, struct ifnet *ifp)
529 {
530 	struct ifvlan *ifv;
531 #ifndef VLAN_ARRAY
532 	struct ifvlan *next;
533 #endif
534 	int i;
535 
536 	/*
537 	 * Check if it's a trunk interface first of all
538 	 * to avoid needless locking.
539 	 */
540 	if (ifp->if_vlantrunk == NULL)
541 		return;
542 
543 	VLAN_LOCK();
544 	/*
545 	 * OK, it's a trunk.  Loop over and change all vlan's lladdrs on it.
546 	 */
547 #ifdef VLAN_ARRAY
548 	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
549 		if ((ifv = ifp->if_vlantrunk->vlans[i])) {
550 #else /* VLAN_ARRAY */
551 	for (i = 0; i < (1 << ifp->if_vlantrunk->hwidth); i++)
552 		LIST_FOREACH_SAFE(ifv, &ifp->if_vlantrunk->hash[i], ifv_list, next) {
553 #endif /* VLAN_ARRAY */
554 			VLAN_UNLOCK();
555 			if_setlladdr(ifv->ifv_ifp, IF_LLADDR(ifp),
556 			    ifp->if_addrlen);
557 			VLAN_LOCK();
558 		}
559 	VLAN_UNLOCK();
560 
561 }
562 
563 /*
564  * A handler for network interface departure events.
565  * Track departure of trunks here so that we don't access invalid
566  * pointers or whatever if a trunk is ripped from under us, e.g.,
567  * by ejecting its hot-plug card.  However, if an ifnet is simply
568  * being renamed, then there's no need to tear down the state.
569  */
570 static void
571 vlan_ifdetach(void *arg __unused, struct ifnet *ifp)
572 {
573 	struct ifvlan *ifv;
574 	int i;
575 
576 	/*
577 	 * Check if it's a trunk interface first of all
578 	 * to avoid needless locking.
579 	 */
580 	if (ifp->if_vlantrunk == NULL)
581 		return;
582 
583 	/* If the ifnet is just being renamed, don't do anything. */
584 	if (ifp->if_flags & IFF_RENAMING)
585 		return;
586 
587 	VLAN_LOCK();
588 	/*
589 	 * OK, it's a trunk.  Loop over and detach all vlan's on it.
590 	 * Check trunk pointer after each vlan_unconfig() as it will
591 	 * free it and set to NULL after the last vlan was detached.
592 	 */
593 #ifdef VLAN_ARRAY
594 	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
595 		if ((ifv = ifp->if_vlantrunk->vlans[i])) {
596 			vlan_unconfig_locked(ifv->ifv_ifp, 1);
597 			if (ifp->if_vlantrunk == NULL)
598 				break;
599 		}
600 #else /* VLAN_ARRAY */
601 restart:
602 	for (i = 0; i < (1 << ifp->if_vlantrunk->hwidth); i++)
603 		if ((ifv = LIST_FIRST(&ifp->if_vlantrunk->hash[i]))) {
604 			vlan_unconfig_locked(ifv->ifv_ifp, 1);
605 			if (ifp->if_vlantrunk)
606 				goto restart;	/* trunk->hwidth can change */
607 			else
608 				break;
609 		}
610 #endif /* VLAN_ARRAY */
611 	/* Trunk should have been destroyed in vlan_unconfig(). */
612 	KASSERT(ifp->if_vlantrunk == NULL, ("%s: purge failed", __func__));
613 	VLAN_UNLOCK();
614 }
615 
616 /*
617  * Return the trunk device for a virtual interface.
618  */
619 static struct ifnet  *
620 vlan_trunkdev(struct ifnet *ifp)
621 {
622 	struct ifvlan *ifv;
623 
624 	if (ifp->if_type != IFT_L2VLAN)
625 		return (NULL);
626 	ifv = ifp->if_softc;
627 	ifp = NULL;
628 	VLAN_LOCK();
629 	if (ifv->ifv_trunk)
630 		ifp = PARENT(ifv);
631 	VLAN_UNLOCK();
632 	return (ifp);
633 }
634 
635 /*
636  * Return the 12-bit VLAN VID for this interface, for use by external
637  * components such as Infiniband.
638  *
639  * XXXRW: Note that the function name here is historical; it should be named
640  * vlan_vid().
641  */
642 static int
643 vlan_tag(struct ifnet *ifp, uint16_t *vidp)
644 {
645 	struct ifvlan *ifv;
646 
647 	if (ifp->if_type != IFT_L2VLAN)
648 		return (EINVAL);
649 	ifv = ifp->if_softc;
650 	*vidp = ifv->ifv_vid;
651 	return (0);
652 }
653 
654 /*
655  * Return a driver specific cookie for this interface.  Synchronization
656  * with setcookie must be provided by the driver.
657  */
658 static void *
659 vlan_cookie(struct ifnet *ifp)
660 {
661 	struct ifvlan *ifv;
662 
663 	if (ifp->if_type != IFT_L2VLAN)
664 		return (NULL);
665 	ifv = ifp->if_softc;
666 	return (ifv->ifv_cookie);
667 }
668 
669 /*
670  * Store a cookie in our softc that drivers can use to store driver
671  * private per-instance data in.
672  */
673 static int
674 vlan_setcookie(struct ifnet *ifp, void *cookie)
675 {
676 	struct ifvlan *ifv;
677 
678 	if (ifp->if_type != IFT_L2VLAN)
679 		return (EINVAL);
680 	ifv = ifp->if_softc;
681 	ifv->ifv_cookie = cookie;
682 	return (0);
683 }
684 
685 /*
686  * Return the vlan device present at the specific VID.
687  */
688 static struct ifnet *
689 vlan_devat(struct ifnet *ifp, uint16_t vid)
690 {
691 	struct ifvlantrunk *trunk;
692 	struct ifvlan *ifv;
693 	TRUNK_LOCK_READER;
694 
695 	trunk = ifp->if_vlantrunk;
696 	if (trunk == NULL)
697 		return (NULL);
698 	ifp = NULL;
699 	TRUNK_RLOCK(trunk);
700 	ifv = vlan_gethash(trunk, vid);
701 	if (ifv)
702 		ifp = ifv->ifv_ifp;
703 	TRUNK_RUNLOCK(trunk);
704 	return (ifp);
705 }
706 
707 /*
708  * VLAN support can be loaded as a module.  The only place in the
709  * system that's intimately aware of this is ether_input.  We hook
710  * into this code through vlan_input_p which is defined there and
711  * set here.  Noone else in the system should be aware of this so
712  * we use an explicit reference here.
713  */
714 extern	void (*vlan_input_p)(struct ifnet *, struct mbuf *);
715 
716 /* For if_link_state_change() eyes only... */
717 extern	void (*vlan_link_state_p)(struct ifnet *);
718 
719 static int
720 vlan_modevent(module_t mod, int type, void *data)
721 {
722 
723 	switch (type) {
724 	case MOD_LOAD:
725 		ifdetach_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
726 		    vlan_ifdetach, NULL, EVENTHANDLER_PRI_ANY);
727 		if (ifdetach_tag == NULL)
728 			return (ENOMEM);
729 		iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event,
730 		    vlan_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
731 		if (iflladdr_tag == NULL)
732 			return (ENOMEM);
733 		VLAN_LOCK_INIT();
734 		vlan_input_p = vlan_input;
735 		vlan_link_state_p = vlan_link_state;
736 		vlan_trunk_cap_p = vlan_trunk_capabilities;
737 		vlan_trunkdev_p = vlan_trunkdev;
738 		vlan_cookie_p = vlan_cookie;
739 		vlan_setcookie_p = vlan_setcookie;
740 		vlan_tag_p = vlan_tag;
741 		vlan_devat_p = vlan_devat;
742 #ifndef VIMAGE
743 		vlan_cloner = if_clone_advanced(vlanname, 0, vlan_clone_match,
744 		    vlan_clone_create, vlan_clone_destroy);
745 #endif
746 		if (bootverbose)
747 			printf("vlan: initialized, using "
748 #ifdef VLAN_ARRAY
749 			       "full-size arrays"
750 #else
751 			       "hash tables with chaining"
752 #endif
753 
754 			       "\n");
755 		break;
756 	case MOD_UNLOAD:
757 #ifndef VIMAGE
758 		if_clone_detach(vlan_cloner);
759 #endif
760 		EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_tag);
761 		EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_tag);
762 		vlan_input_p = NULL;
763 		vlan_link_state_p = NULL;
764 		vlan_trunk_cap_p = NULL;
765 		vlan_trunkdev_p = NULL;
766 		vlan_tag_p = NULL;
767 		vlan_cookie_p = NULL;
768 		vlan_setcookie_p = NULL;
769 		vlan_devat_p = NULL;
770 		VLAN_LOCK_DESTROY();
771 		if (bootverbose)
772 			printf("vlan: unloaded\n");
773 		break;
774 	default:
775 		return (EOPNOTSUPP);
776 	}
777 	return (0);
778 }
779 
780 static moduledata_t vlan_mod = {
781 	"if_vlan",
782 	vlan_modevent,
783 	0
784 };
785 
786 DECLARE_MODULE(if_vlan, vlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
787 MODULE_VERSION(if_vlan, 3);
788 
789 #ifdef VIMAGE
790 static void
791 vnet_vlan_init(const void *unused __unused)
792 {
793 
794 	vlan_cloner = if_clone_advanced(vlanname, 0, vlan_clone_match,
795 		    vlan_clone_create, vlan_clone_destroy);
796 	V_vlan_cloner = vlan_cloner;
797 }
798 VNET_SYSINIT(vnet_vlan_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
799     vnet_vlan_init, NULL);
800 
801 static void
802 vnet_vlan_uninit(const void *unused __unused)
803 {
804 
805 	if_clone_detach(V_vlan_cloner);
806 }
807 VNET_SYSUNINIT(vnet_vlan_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
808     vnet_vlan_uninit, NULL);
809 #endif
810 
811 static struct ifnet *
812 vlan_clone_match_ethervid(struct if_clone *ifc, const char *name, int *vidp)
813 {
814 	const char *cp;
815 	struct ifnet *ifp;
816 	int vid;
817 
818 	/* Check for <etherif>.<vlan> style interface names. */
819 	IFNET_RLOCK_NOSLEEP();
820 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
821 		/*
822 		 * We can handle non-ethernet hardware types as long as
823 		 * they handle the tagging and headers themselves.
824 		 */
825 		if (ifp->if_type != IFT_ETHER &&
826 		    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0)
827 			continue;
828 		if (strncmp(ifp->if_xname, name, strlen(ifp->if_xname)) != 0)
829 			continue;
830 		cp = name + strlen(ifp->if_xname);
831 		if (*cp++ != '.')
832 			continue;
833 		if (*cp == '\0')
834 			continue;
835 		vid = 0;
836 		for(; *cp >= '0' && *cp <= '9'; cp++)
837 			vid = (vid * 10) + (*cp - '0');
838 		if (*cp != '\0')
839 			continue;
840 		if (vidp != NULL)
841 			*vidp = vid;
842 		break;
843 	}
844 	IFNET_RUNLOCK_NOSLEEP();
845 
846 	return (ifp);
847 }
848 
849 static int
850 vlan_clone_match(struct if_clone *ifc, const char *name)
851 {
852 	const char *cp;
853 
854 	if (vlan_clone_match_ethervid(ifc, name, NULL) != NULL)
855 		return (1);
856 
857 	if (strncmp(vlanname, name, strlen(vlanname)) != 0)
858 		return (0);
859 	for (cp = name + 4; *cp != '\0'; cp++) {
860 		if (*cp < '0' || *cp > '9')
861 			return (0);
862 	}
863 
864 	return (1);
865 }
866 
867 static int
868 vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
869 {
870 	char *dp;
871 	int wildcard;
872 	int unit;
873 	int error;
874 	int vid;
875 	int ethertag;
876 	struct ifvlan *ifv;
877 	struct ifnet *ifp;
878 	struct ifnet *p;
879 	struct ifaddr *ifa;
880 	struct sockaddr_dl *sdl;
881 	struct vlanreq vlr;
882 	static const u_char eaddr[ETHER_ADDR_LEN];	/* 00:00:00:00:00:00 */
883 
884 	/*
885 	 * There are 3 (ugh) ways to specify the cloned device:
886 	 * o pass a parameter block with the clone request.
887 	 * o specify parameters in the text of the clone device name
888 	 * o specify no parameters and get an unattached device that
889 	 *   must be configured separately.
890 	 * The first technique is preferred; the latter two are
891 	 * supported for backwards compatibilty.
892 	 *
893 	 * XXXRW: Note historic use of the word "tag" here.  New ioctls may be
894 	 * called for.
895 	 */
896 	if (params) {
897 		error = copyin(params, &vlr, sizeof(vlr));
898 		if (error)
899 			return error;
900 		p = ifunit(vlr.vlr_parent);
901 		if (p == NULL)
902 			return ENXIO;
903 		/*
904 		 * Don't let the caller set up a VLAN VID with
905 		 * anything except VLID bits.
906 		 */
907 		if (vlr.vlr_tag & ~EVL_VLID_MASK)
908 			return (EINVAL);
909 		error = ifc_name2unit(name, &unit);
910 		if (error != 0)
911 			return (error);
912 
913 		ethertag = 1;
914 		vid = vlr.vlr_tag;
915 		wildcard = (unit < 0);
916 	} else if ((p = vlan_clone_match_ethervid(ifc, name, &vid)) != NULL) {
917 		ethertag = 1;
918 		unit = -1;
919 		wildcard = 0;
920 
921 		/*
922 		 * Don't let the caller set up a VLAN VID with
923 		 * anything except VLID bits.
924 		 */
925 		if (vid & ~EVL_VLID_MASK)
926 			return (EINVAL);
927 	} else {
928 		ethertag = 0;
929 
930 		error = ifc_name2unit(name, &unit);
931 		if (error != 0)
932 			return (error);
933 
934 		wildcard = (unit < 0);
935 	}
936 
937 	error = ifc_alloc_unit(ifc, &unit);
938 	if (error != 0)
939 		return (error);
940 
941 	/* In the wildcard case, we need to update the name. */
942 	if (wildcard) {
943 		for (dp = name; *dp != '\0'; dp++);
944 		if (snprintf(dp, len - (dp-name), "%d", unit) >
945 		    len - (dp-name) - 1) {
946 			panic("%s: interface name too long", __func__);
947 		}
948 	}
949 
950 	ifv = malloc(sizeof(struct ifvlan), M_VLAN, M_WAITOK | M_ZERO);
951 	ifp = ifv->ifv_ifp = if_alloc(IFT_ETHER);
952 	if (ifp == NULL) {
953 		ifc_free_unit(ifc, unit);
954 		free(ifv, M_VLAN);
955 		return (ENOSPC);
956 	}
957 	SLIST_INIT(&ifv->vlan_mc_listhead);
958 	/* Prepare pcpu counters */
959 	ifv->ifv_ipackets = counter_u64_alloc(M_WAITOK);
960 	ifv->ifv_opackets = counter_u64_alloc(M_WAITOK);
961 	ifv->ifv_ibytes = counter_u64_alloc(M_WAITOK);
962 	ifv->ifv_obytes = counter_u64_alloc(M_WAITOK);
963 	ifv->ifv_omcasts = counter_u64_alloc(M_WAITOK);
964 	ifv->ifv_oerrors = counter_u64_alloc(M_WAITOK);
965 
966 	ifp->if_softc = ifv;
967 	/*
968 	 * Set the name manually rather than using if_initname because
969 	 * we don't conform to the default naming convention for interfaces.
970 	 */
971 	strlcpy(ifp->if_xname, name, IFNAMSIZ);
972 	ifp->if_dname = vlanname;
973 	ifp->if_dunit = unit;
974 	/* NB: flags are not set here */
975 	ifp->if_linkmib = &ifv->ifv_mib;
976 	ifp->if_linkmiblen = sizeof(ifv->ifv_mib);
977 	/* NB: mtu is not set here */
978 
979 	ifp->if_init = vlan_init;
980 	ifp->if_transmit = vlan_transmit;
981 	ifp->if_qflush = vlan_qflush;
982 	ifp->if_ioctl = vlan_ioctl;
983 	ifp->if_flags = VLAN_IFFLAGS;
984 	ifp->if_get_counter = vlan_get_counter;
985 	ether_ifattach(ifp, eaddr);
986 	/* Now undo some of the damage... */
987 	ifp->if_baudrate = 0;
988 	ifp->if_type = IFT_L2VLAN;
989 	ifp->if_hdrlen = ETHER_VLAN_ENCAP_LEN;
990 	ifa = ifp->if_addr;
991 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
992 	sdl->sdl_type = IFT_L2VLAN;
993 
994 	if (ethertag) {
995 		error = vlan_config(ifv, p, vid);
996 		if (error != 0) {
997 			/*
998 			 * Since we've partially failed, we need to back
999 			 * out all the way, otherwise userland could get
1000 			 * confused.  Thus, we destroy the interface.
1001 			 */
1002 			ether_ifdetach(ifp);
1003 			vlan_unconfig(ifp);
1004 			if_free(ifp);
1005 			ifc_free_unit(ifc, unit);
1006 			free(ifv, M_VLAN);
1007 
1008 			return (error);
1009 		}
1010 
1011 		/* Update flags on the parent, if necessary. */
1012 		vlan_setflags(ifp, 1);
1013 	}
1014 
1015 	return (0);
1016 }
1017 
1018 static int
1019 vlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
1020 {
1021 	struct ifvlan *ifv = ifp->if_softc;
1022 	int unit = ifp->if_dunit;
1023 
1024 	ether_ifdetach(ifp);	/* first, remove it from system-wide lists */
1025 	vlan_unconfig(ifp);	/* now it can be unconfigured and freed */
1026 	if_free(ifp);
1027 	counter_u64_free(ifv->ifv_ipackets);
1028 	counter_u64_free(ifv->ifv_opackets);
1029 	counter_u64_free(ifv->ifv_ibytes);
1030 	counter_u64_free(ifv->ifv_obytes);
1031 	counter_u64_free(ifv->ifv_omcasts);
1032 	counter_u64_free(ifv->ifv_oerrors);
1033 	free(ifv, M_VLAN);
1034 	ifc_free_unit(ifc, unit);
1035 
1036 	return (0);
1037 }
1038 
1039 /*
1040  * The ifp->if_init entry point for vlan(4) is a no-op.
1041  */
1042 static void
1043 vlan_init(void *foo __unused)
1044 {
1045 }
1046 
1047 /*
1048  * The if_transmit method for vlan(4) interface.
1049  */
1050 static int
1051 vlan_transmit(struct ifnet *ifp, struct mbuf *m)
1052 {
1053 	struct ifvlan *ifv;
1054 	struct ifnet *p;
1055 	int error, len, mcast;
1056 
1057 	ifv = ifp->if_softc;
1058 	p = PARENT(ifv);
1059 	len = m->m_pkthdr.len;
1060 	mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
1061 
1062 	BPF_MTAP(ifp, m);
1063 
1064 	/*
1065 	 * Do not run parent's if_transmit() if the parent is not up,
1066 	 * or parent's driver will cause a system crash.
1067 	 */
1068 	if (!UP_AND_RUNNING(p)) {
1069 		m_freem(m);
1070 		counter_u64_add(ifv->ifv_oerrors, 1);
1071 		return (ENETDOWN);
1072 	}
1073 
1074 	/*
1075 	 * Pad the frame to the minimum size allowed if told to.
1076 	 * This option is in accord with IEEE Std 802.1Q, 2003 Ed.,
1077 	 * paragraph C.4.4.3.b.  It can help to work around buggy
1078 	 * bridges that violate paragraph C.4.4.3.a from the same
1079 	 * document, i.e., fail to pad short frames after untagging.
1080 	 * E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but
1081 	 * untagging it will produce a 62-byte frame, which is a runt
1082 	 * and requires padding.  There are VLAN-enabled network
1083 	 * devices that just discard such runts instead or mishandle
1084 	 * them somehow.
1085 	 */
1086 	if (V_soft_pad && p->if_type == IFT_ETHER) {
1087 		static char pad[8];	/* just zeros */
1088 		int n;
1089 
1090 		for (n = ETHERMIN + ETHER_HDR_LEN - m->m_pkthdr.len;
1091 		     n > 0; n -= sizeof(pad))
1092 			if (!m_append(m, min(n, sizeof(pad)), pad))
1093 				break;
1094 
1095 		if (n > 0) {
1096 			if_printf(ifp, "cannot pad short frame\n");
1097 			counter_u64_add(ifv->ifv_oerrors, 1);
1098 			m_freem(m);
1099 			return (0);
1100 		}
1101 	}
1102 
1103 	/*
1104 	 * If underlying interface can do VLAN tag insertion itself,
1105 	 * just pass the packet along. However, we need some way to
1106 	 * tell the interface where the packet came from so that it
1107 	 * knows how to find the VLAN tag to use, so we attach a
1108 	 * packet tag that holds it.
1109 	 */
1110 	if (p->if_capenable & IFCAP_VLAN_HWTAGGING) {
1111 		m->m_pkthdr.ether_vtag = ifv->ifv_vid;
1112 		m->m_flags |= M_VLANTAG;
1113 	} else {
1114 		m = ether_vlanencap(m, ifv->ifv_vid);
1115 		if (m == NULL) {
1116 			if_printf(ifp, "unable to prepend VLAN header\n");
1117 			counter_u64_add(ifv->ifv_oerrors, 1);
1118 			return (0);
1119 		}
1120 	}
1121 
1122 	/*
1123 	 * Send it, precisely as ether_output() would have.
1124 	 */
1125 	error = (p->if_transmit)(p, m);
1126 	if (error == 0) {
1127 		counter_u64_add(ifv->ifv_opackets, 1);
1128 		counter_u64_add(ifv->ifv_obytes, len);
1129 		counter_u64_add(ifv->ifv_omcasts, mcast);
1130 	} else
1131 		counter_u64_add(ifv->ifv_oerrors, 1);
1132 	return (error);
1133 }
1134 
1135 static uint64_t
1136 vlan_get_counter(struct ifnet *ifp, ift_counter cnt)
1137 {
1138 	struct ifvlan *ifv;
1139 
1140 	ifv = ifp->if_softc;
1141 
1142 	switch (cnt) {
1143 		case IFCOUNTER_IPACKETS:
1144 			return (counter_u64_fetch(ifv->ifv_ipackets));
1145 		case IFCOUNTER_OPACKETS:
1146 			return (counter_u64_fetch(ifv->ifv_opackets));
1147 		case IFCOUNTER_IBYTES:
1148 			return (counter_u64_fetch(ifv->ifv_ibytes));
1149 		case IFCOUNTER_OBYTES:
1150 			return (counter_u64_fetch(ifv->ifv_obytes));
1151 		case IFCOUNTER_OMCASTS:
1152 			return (counter_u64_fetch(ifv->ifv_omcasts));
1153 		case IFCOUNTER_OERRORS:
1154 			return (counter_u64_fetch(ifv->ifv_oerrors));
1155 		default:
1156 			return (if_get_counter_default(ifp, cnt));
1157 	}
1158 	/* NOTREACHED */
1159 }
1160 
1161 /*
1162  * The ifp->if_qflush entry point for vlan(4) is a no-op.
1163  */
1164 static void
1165 vlan_qflush(struct ifnet *ifp __unused)
1166 {
1167 }
1168 
1169 static void
1170 vlan_input(struct ifnet *ifp, struct mbuf *m)
1171 {
1172 	struct ifvlantrunk *trunk = ifp->if_vlantrunk;
1173 	struct ifvlan *ifv;
1174 	TRUNK_LOCK_READER;
1175 	uint16_t vid;
1176 
1177 	KASSERT(trunk != NULL, ("%s: no trunk", __func__));
1178 
1179 	if (m->m_flags & M_VLANTAG) {
1180 		/*
1181 		 * Packet is tagged, but m contains a normal
1182 		 * Ethernet frame; the tag is stored out-of-band.
1183 		 */
1184 		vid = EVL_VLANOFTAG(m->m_pkthdr.ether_vtag);
1185 		m->m_flags &= ~M_VLANTAG;
1186 	} else {
1187 		struct ether_vlan_header *evl;
1188 
1189 		/*
1190 		 * Packet is tagged in-band as specified by 802.1q.
1191 		 */
1192 		switch (ifp->if_type) {
1193 		case IFT_ETHER:
1194 			if (m->m_len < sizeof(*evl) &&
1195 			    (m = m_pullup(m, sizeof(*evl))) == NULL) {
1196 				if_printf(ifp, "cannot pullup VLAN header\n");
1197 				return;
1198 			}
1199 			evl = mtod(m, struct ether_vlan_header *);
1200 			vid = EVL_VLANOFTAG(ntohs(evl->evl_tag));
1201 
1202 			/*
1203 			 * Remove the 802.1q header by copying the Ethernet
1204 			 * addresses over it and adjusting the beginning of
1205 			 * the data in the mbuf.  The encapsulated Ethernet
1206 			 * type field is already in place.
1207 			 */
1208 			bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
1209 			      ETHER_HDR_LEN - ETHER_TYPE_LEN);
1210 			m_adj(m, ETHER_VLAN_ENCAP_LEN);
1211 			break;
1212 
1213 		default:
1214 #ifdef INVARIANTS
1215 			panic("%s: %s has unsupported if_type %u",
1216 			      __func__, ifp->if_xname, ifp->if_type);
1217 #endif
1218 			m_freem(m);
1219 			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
1220 			return;
1221 		}
1222 	}
1223 
1224 	TRUNK_RLOCK(trunk);
1225 	ifv = vlan_gethash(trunk, vid);
1226 	if (ifv == NULL || !UP_AND_RUNNING(ifv->ifv_ifp)) {
1227 		TRUNK_RUNLOCK(trunk);
1228 		m_freem(m);
1229 		if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
1230 		return;
1231 	}
1232 	TRUNK_RUNLOCK(trunk);
1233 
1234 	m->m_pkthdr.rcvif = ifv->ifv_ifp;
1235 	counter_u64_add(ifv->ifv_ipackets, 1);
1236 	counter_u64_add(ifv->ifv_ibytes, m->m_pkthdr.len);
1237 
1238 	/* Pass it back through the parent's input routine. */
1239 	(*ifp->if_input)(ifv->ifv_ifp, m);
1240 }
1241 
1242 static int
1243 vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid)
1244 {
1245 	struct ifvlantrunk *trunk;
1246 	struct ifnet *ifp;
1247 	int error = 0;
1248 
1249 	/* VID numbers 0x0 and 0xFFF are reserved */
1250 	if (vid == 0 || vid == 0xFFF)
1251 		return (EINVAL);
1252 	if (p->if_type != IFT_ETHER &&
1253 	    (p->if_capenable & IFCAP_VLAN_HWTAGGING) == 0)
1254 		return (EPROTONOSUPPORT);
1255 	if ((p->if_flags & VLAN_IFFLAGS) != VLAN_IFFLAGS)
1256 		return (EPROTONOSUPPORT);
1257 	if (ifv->ifv_trunk)
1258 		return (EBUSY);
1259 
1260 	if (p->if_vlantrunk == NULL) {
1261 		trunk = malloc(sizeof(struct ifvlantrunk),
1262 		    M_VLAN, M_WAITOK | M_ZERO);
1263 		vlan_inithash(trunk);
1264 		VLAN_LOCK();
1265 		if (p->if_vlantrunk != NULL) {
1266 			/* A race that that is very unlikely to be hit. */
1267 			vlan_freehash(trunk);
1268 			free(trunk, M_VLAN);
1269 			goto exists;
1270 		}
1271 		TRUNK_LOCK_INIT(trunk);
1272 		TRUNK_LOCK(trunk);
1273 		p->if_vlantrunk = trunk;
1274 		trunk->parent = p;
1275 	} else {
1276 		VLAN_LOCK();
1277 exists:
1278 		trunk = p->if_vlantrunk;
1279 		TRUNK_LOCK(trunk);
1280 	}
1281 
1282 	ifv->ifv_vid = vid;	/* must set this before vlan_inshash() */
1283 	error = vlan_inshash(trunk, ifv);
1284 	if (error)
1285 		goto done;
1286 	ifv->ifv_proto = ETHERTYPE_VLAN;
1287 	ifv->ifv_encaplen = ETHER_VLAN_ENCAP_LEN;
1288 	ifv->ifv_mintu = ETHERMIN;
1289 	ifv->ifv_pflags = 0;
1290 
1291 	/*
1292 	 * If the parent supports the VLAN_MTU capability,
1293 	 * i.e. can Tx/Rx larger than ETHER_MAX_LEN frames,
1294 	 * use it.
1295 	 */
1296 	if (p->if_capenable & IFCAP_VLAN_MTU) {
1297 		/*
1298 		 * No need to fudge the MTU since the parent can
1299 		 * handle extended frames.
1300 		 */
1301 		ifv->ifv_mtufudge = 0;
1302 	} else {
1303 		/*
1304 		 * Fudge the MTU by the encapsulation size.  This
1305 		 * makes us incompatible with strictly compliant
1306 		 * 802.1Q implementations, but allows us to use
1307 		 * the feature with other NetBSD implementations,
1308 		 * which might still be useful.
1309 		 */
1310 		ifv->ifv_mtufudge = ifv->ifv_encaplen;
1311 	}
1312 
1313 	ifv->ifv_trunk = trunk;
1314 	ifp = ifv->ifv_ifp;
1315 	/*
1316 	 * Initialize fields from our parent.  This duplicates some
1317 	 * work with ether_ifattach() but allows for non-ethernet
1318 	 * interfaces to also work.
1319 	 */
1320 	ifp->if_mtu = p->if_mtu - ifv->ifv_mtufudge;
1321 	ifp->if_baudrate = p->if_baudrate;
1322 	ifp->if_output = p->if_output;
1323 	ifp->if_input = p->if_input;
1324 	ifp->if_resolvemulti = p->if_resolvemulti;
1325 	ifp->if_addrlen = p->if_addrlen;
1326 	ifp->if_broadcastaddr = p->if_broadcastaddr;
1327 
1328 	/*
1329 	 * Copy only a selected subset of flags from the parent.
1330 	 * Other flags are none of our business.
1331 	 */
1332 #define VLAN_COPY_FLAGS (IFF_SIMPLEX)
1333 	ifp->if_flags &= ~VLAN_COPY_FLAGS;
1334 	ifp->if_flags |= p->if_flags & VLAN_COPY_FLAGS;
1335 #undef VLAN_COPY_FLAGS
1336 
1337 	ifp->if_link_state = p->if_link_state;
1338 
1339 	vlan_capabilities(ifv);
1340 
1341 	/*
1342 	 * Set up our interface address to reflect the underlying
1343 	 * physical interface's.
1344 	 */
1345 	bcopy(IF_LLADDR(p), IF_LLADDR(ifp), p->if_addrlen);
1346 	((struct sockaddr_dl *)ifp->if_addr->ifa_addr)->sdl_alen =
1347 	    p->if_addrlen;
1348 
1349 	/*
1350 	 * Configure multicast addresses that may already be
1351 	 * joined on the vlan device.
1352 	 */
1353 	(void)vlan_setmulti(ifp); /* XXX: VLAN lock held */
1354 
1355 	/* We are ready for operation now. */
1356 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1357 done:
1358 	TRUNK_UNLOCK(trunk);
1359 	if (error == 0)
1360 		EVENTHANDLER_INVOKE(vlan_config, p, ifv->ifv_vid);
1361 	VLAN_UNLOCK();
1362 
1363 	return (error);
1364 }
1365 
1366 static void
1367 vlan_unconfig(struct ifnet *ifp)
1368 {
1369 
1370 	VLAN_LOCK();
1371 	vlan_unconfig_locked(ifp, 0);
1372 	VLAN_UNLOCK();
1373 }
1374 
1375 static void
1376 vlan_unconfig_locked(struct ifnet *ifp, int departing)
1377 {
1378 	struct ifvlantrunk *trunk;
1379 	struct vlan_mc_entry *mc;
1380 	struct ifvlan *ifv;
1381 	struct ifnet  *parent;
1382 	int error;
1383 
1384 	VLAN_LOCK_ASSERT();
1385 
1386 	ifv = ifp->if_softc;
1387 	trunk = ifv->ifv_trunk;
1388 	parent = NULL;
1389 
1390 	if (trunk != NULL) {
1391 
1392 		TRUNK_LOCK(trunk);
1393 		parent = trunk->parent;
1394 
1395 		/*
1396 		 * Since the interface is being unconfigured, we need to
1397 		 * empty the list of multicast groups that we may have joined
1398 		 * while we were alive from the parent's list.
1399 		 */
1400 		while ((mc = SLIST_FIRST(&ifv->vlan_mc_listhead)) != NULL) {
1401 			/*
1402 			 * If the parent interface is being detached,
1403 			 * all its multicast addresses have already
1404 			 * been removed.  Warn about errors if
1405 			 * if_delmulti() does fail, but don't abort as
1406 			 * all callers expect vlan destruction to
1407 			 * succeed.
1408 			 */
1409 			if (!departing) {
1410 				error = if_delmulti(parent,
1411 				    (struct sockaddr *)&mc->mc_addr);
1412 				if (error)
1413 					if_printf(ifp,
1414 		    "Failed to delete multicast address from parent: %d\n",
1415 					    error);
1416 			}
1417 			SLIST_REMOVE_HEAD(&ifv->vlan_mc_listhead, mc_entries);
1418 			free(mc, M_VLAN);
1419 		}
1420 
1421 		vlan_setflags(ifp, 0); /* clear special flags on parent */
1422 		vlan_remhash(trunk, ifv);
1423 		ifv->ifv_trunk = NULL;
1424 
1425 		/*
1426 		 * Check if we were the last.
1427 		 */
1428 		if (trunk->refcnt == 0) {
1429 			parent->if_vlantrunk = NULL;
1430 			/*
1431 			 * XXXGL: If some ithread has already entered
1432 			 * vlan_input() and is now blocked on the trunk
1433 			 * lock, then it should preempt us right after
1434 			 * unlock and finish its work. Then we will acquire
1435 			 * lock again in trunk_destroy().
1436 			 */
1437 			TRUNK_UNLOCK(trunk);
1438 			trunk_destroy(trunk);
1439 		} else
1440 			TRUNK_UNLOCK(trunk);
1441 	}
1442 
1443 	/* Disconnect from parent. */
1444 	if (ifv->ifv_pflags)
1445 		if_printf(ifp, "%s: ifv_pflags unclean\n", __func__);
1446 	ifp->if_mtu = ETHERMTU;
1447 	ifp->if_link_state = LINK_STATE_UNKNOWN;
1448 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1449 
1450 	/*
1451 	 * Only dispatch an event if vlan was
1452 	 * attached, otherwise there is nothing
1453 	 * to cleanup anyway.
1454 	 */
1455 	if (parent != NULL)
1456 		EVENTHANDLER_INVOKE(vlan_unconfig, parent, ifv->ifv_vid);
1457 }
1458 
1459 /* Handle a reference counted flag that should be set on the parent as well */
1460 static int
1461 vlan_setflag(struct ifnet *ifp, int flag, int status,
1462 	     int (*func)(struct ifnet *, int))
1463 {
1464 	struct ifvlan *ifv;
1465 	int error;
1466 
1467 	/* XXX VLAN_LOCK_ASSERT(); */
1468 
1469 	ifv = ifp->if_softc;
1470 	status = status ? (ifp->if_flags & flag) : 0;
1471 	/* Now "status" contains the flag value or 0 */
1472 
1473 	/*
1474 	 * See if recorded parent's status is different from what
1475 	 * we want it to be.  If it is, flip it.  We record parent's
1476 	 * status in ifv_pflags so that we won't clear parent's flag
1477 	 * we haven't set.  In fact, we don't clear or set parent's
1478 	 * flags directly, but get or release references to them.
1479 	 * That's why we can be sure that recorded flags still are
1480 	 * in accord with actual parent's flags.
1481 	 */
1482 	if (status != (ifv->ifv_pflags & flag)) {
1483 		error = (*func)(PARENT(ifv), status);
1484 		if (error)
1485 			return (error);
1486 		ifv->ifv_pflags &= ~flag;
1487 		ifv->ifv_pflags |= status;
1488 	}
1489 	return (0);
1490 }
1491 
1492 /*
1493  * Handle IFF_* flags that require certain changes on the parent:
1494  * if "status" is true, update parent's flags respective to our if_flags;
1495  * if "status" is false, forcedly clear the flags set on parent.
1496  */
1497 static int
1498 vlan_setflags(struct ifnet *ifp, int status)
1499 {
1500 	int error, i;
1501 
1502 	for (i = 0; vlan_pflags[i].flag; i++) {
1503 		error = vlan_setflag(ifp, vlan_pflags[i].flag,
1504 				     status, vlan_pflags[i].func);
1505 		if (error)
1506 			return (error);
1507 	}
1508 	return (0);
1509 }
1510 
1511 /* Inform all vlans that their parent has changed link state */
1512 static void
1513 vlan_link_state(struct ifnet *ifp)
1514 {
1515 	struct ifvlantrunk *trunk = ifp->if_vlantrunk;
1516 	struct ifvlan *ifv;
1517 	int i;
1518 
1519 	TRUNK_LOCK(trunk);
1520 #ifdef VLAN_ARRAY
1521 	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
1522 		if (trunk->vlans[i] != NULL) {
1523 			ifv = trunk->vlans[i];
1524 #else
1525 	for (i = 0; i < (1 << trunk->hwidth); i++)
1526 		LIST_FOREACH(ifv, &trunk->hash[i], ifv_list) {
1527 #endif
1528 			ifv->ifv_ifp->if_baudrate = trunk->parent->if_baudrate;
1529 			if_link_state_change(ifv->ifv_ifp,
1530 			    trunk->parent->if_link_state);
1531 		}
1532 	TRUNK_UNLOCK(trunk);
1533 }
1534 
1535 static void
1536 vlan_capabilities(struct ifvlan *ifv)
1537 {
1538 	struct ifnet *p = PARENT(ifv);
1539 	struct ifnet *ifp = ifv->ifv_ifp;
1540 	struct ifnet_hw_tsomax hw_tsomax;
1541 
1542 	TRUNK_LOCK_ASSERT(TRUNK(ifv));
1543 
1544 	/*
1545 	 * If the parent interface can do checksum offloading
1546 	 * on VLANs, then propagate its hardware-assisted
1547 	 * checksumming flags. Also assert that checksum
1548 	 * offloading requires hardware VLAN tagging.
1549 	 */
1550 	if (p->if_capabilities & IFCAP_VLAN_HWCSUM)
1551 		ifp->if_capabilities = p->if_capabilities & IFCAP_HWCSUM;
1552 
1553 	if (p->if_capenable & IFCAP_VLAN_HWCSUM &&
1554 	    p->if_capenable & IFCAP_VLAN_HWTAGGING) {
1555 		ifp->if_capenable = p->if_capenable & IFCAP_HWCSUM;
1556 		ifp->if_hwassist = p->if_hwassist & (CSUM_IP | CSUM_TCP |
1557 		    CSUM_UDP | CSUM_SCTP);
1558 	} else {
1559 		ifp->if_capenable = 0;
1560 		ifp->if_hwassist = 0;
1561 	}
1562 	/*
1563 	 * If the parent interface can do TSO on VLANs then
1564 	 * propagate the hardware-assisted flag. TSO on VLANs
1565 	 * does not necessarily require hardware VLAN tagging.
1566 	 */
1567 	memset(&hw_tsomax, 0, sizeof(hw_tsomax));
1568 	if_hw_tsomax_common(p, &hw_tsomax);
1569 	if_hw_tsomax_update(ifp, &hw_tsomax);
1570 	if (p->if_capabilities & IFCAP_VLAN_HWTSO)
1571 		ifp->if_capabilities |= p->if_capabilities & IFCAP_TSO;
1572 	if (p->if_capenable & IFCAP_VLAN_HWTSO) {
1573 		ifp->if_capenable |= p->if_capenable & IFCAP_TSO;
1574 		ifp->if_hwassist |= p->if_hwassist & CSUM_TSO;
1575 	} else {
1576 		ifp->if_capenable &= ~(p->if_capenable & IFCAP_TSO);
1577 		ifp->if_hwassist &= ~(p->if_hwassist & CSUM_TSO);
1578 	}
1579 
1580 	/*
1581 	 * If the parent interface can offload TCP connections over VLANs then
1582 	 * propagate its TOE capability to the VLAN interface.
1583 	 *
1584 	 * All TOE drivers in the tree today can deal with VLANs.  If this
1585 	 * changes then IFCAP_VLAN_TOE should be promoted to a full capability
1586 	 * with its own bit.
1587 	 */
1588 #define	IFCAP_VLAN_TOE IFCAP_TOE
1589 	if (p->if_capabilities & IFCAP_VLAN_TOE)
1590 		ifp->if_capabilities |= p->if_capabilities & IFCAP_TOE;
1591 	if (p->if_capenable & IFCAP_VLAN_TOE) {
1592 		TOEDEV(ifp) = TOEDEV(p);
1593 		ifp->if_capenable |= p->if_capenable & IFCAP_TOE;
1594 	}
1595 }
1596 
1597 static void
1598 vlan_trunk_capabilities(struct ifnet *ifp)
1599 {
1600 	struct ifvlantrunk *trunk = ifp->if_vlantrunk;
1601 	struct ifvlan *ifv;
1602 	int i;
1603 
1604 	TRUNK_LOCK(trunk);
1605 #ifdef VLAN_ARRAY
1606 	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
1607 		if (trunk->vlans[i] != NULL) {
1608 			ifv = trunk->vlans[i];
1609 #else
1610 	for (i = 0; i < (1 << trunk->hwidth); i++) {
1611 		LIST_FOREACH(ifv, &trunk->hash[i], ifv_list)
1612 #endif
1613 			vlan_capabilities(ifv);
1614 	}
1615 	TRUNK_UNLOCK(trunk);
1616 }
1617 
1618 static int
1619 vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1620 {
1621 	struct ifnet *p;
1622 	struct ifreq *ifr;
1623 	struct ifaddr *ifa;
1624 	struct ifvlan *ifv;
1625 	struct ifvlantrunk *trunk;
1626 	struct vlanreq vlr;
1627 	int error = 0;
1628 
1629 	ifr = (struct ifreq *)data;
1630 	ifa = (struct ifaddr *) data;
1631 	ifv = ifp->if_softc;
1632 
1633 	switch (cmd) {
1634 	case SIOCSIFADDR:
1635 		ifp->if_flags |= IFF_UP;
1636 #ifdef INET
1637 		if (ifa->ifa_addr->sa_family == AF_INET)
1638 			arp_ifinit(ifp, ifa);
1639 #endif
1640 		break;
1641 	case SIOCGIFADDR:
1642                 {
1643 			struct sockaddr *sa;
1644 
1645 			sa = (struct sockaddr *)&ifr->ifr_data;
1646 			bcopy(IF_LLADDR(ifp), sa->sa_data, ifp->if_addrlen);
1647                 }
1648 		break;
1649 	case SIOCGIFMEDIA:
1650 		VLAN_LOCK();
1651 		if (TRUNK(ifv) != NULL) {
1652 			p = PARENT(ifv);
1653 			VLAN_UNLOCK();
1654 			error = (*p->if_ioctl)(p, SIOCGIFMEDIA, data);
1655 			/* Limit the result to the parent's current config. */
1656 			if (error == 0) {
1657 				struct ifmediareq *ifmr;
1658 
1659 				ifmr = (struct ifmediareq *)data;
1660 				if (ifmr->ifm_count >= 1 && ifmr->ifm_ulist) {
1661 					ifmr->ifm_count = 1;
1662 					error = copyout(&ifmr->ifm_current,
1663 						ifmr->ifm_ulist,
1664 						sizeof(int));
1665 				}
1666 			}
1667 		} else {
1668 			VLAN_UNLOCK();
1669 			error = EINVAL;
1670 		}
1671 		break;
1672 
1673 	case SIOCSIFMEDIA:
1674 		error = EINVAL;
1675 		break;
1676 
1677 	case SIOCSIFMTU:
1678 		/*
1679 		 * Set the interface MTU.
1680 		 */
1681 		VLAN_LOCK();
1682 		if (TRUNK(ifv) != NULL) {
1683 			if (ifr->ifr_mtu >
1684 			     (PARENT(ifv)->if_mtu - ifv->ifv_mtufudge) ||
1685 			    ifr->ifr_mtu <
1686 			     (ifv->ifv_mintu - ifv->ifv_mtufudge))
1687 				error = EINVAL;
1688 			else
1689 				ifp->if_mtu = ifr->ifr_mtu;
1690 		} else
1691 			error = EINVAL;
1692 		VLAN_UNLOCK();
1693 		break;
1694 
1695 	case SIOCSETVLAN:
1696 #ifdef VIMAGE
1697 		/*
1698 		 * XXXRW/XXXBZ: The goal in these checks is to allow a VLAN
1699 		 * interface to be delegated to a jail without allowing the
1700 		 * jail to change what underlying interface/VID it is
1701 		 * associated with.  We are not entirely convinced that this
1702 		 * is the right way to accomplish that policy goal.
1703 		 */
1704 		if (ifp->if_vnet != ifp->if_home_vnet) {
1705 			error = EPERM;
1706 			break;
1707 		}
1708 #endif
1709 		error = copyin(ifr->ifr_data, &vlr, sizeof(vlr));
1710 		if (error)
1711 			break;
1712 		if (vlr.vlr_parent[0] == '\0') {
1713 			vlan_unconfig(ifp);
1714 			break;
1715 		}
1716 		p = ifunit(vlr.vlr_parent);
1717 		if (p == NULL) {
1718 			error = ENOENT;
1719 			break;
1720 		}
1721 		/*
1722 		 * Don't let the caller set up a VLAN VID with
1723 		 * anything except VLID bits.
1724 		 */
1725 		if (vlr.vlr_tag & ~EVL_VLID_MASK) {
1726 			error = EINVAL;
1727 			break;
1728 		}
1729 		error = vlan_config(ifv, p, vlr.vlr_tag);
1730 		if (error)
1731 			break;
1732 
1733 		/* Update flags on the parent, if necessary. */
1734 		vlan_setflags(ifp, 1);
1735 		break;
1736 
1737 	case SIOCGETVLAN:
1738 #ifdef VIMAGE
1739 		if (ifp->if_vnet != ifp->if_home_vnet) {
1740 			error = EPERM;
1741 			break;
1742 		}
1743 #endif
1744 		bzero(&vlr, sizeof(vlr));
1745 		VLAN_LOCK();
1746 		if (TRUNK(ifv) != NULL) {
1747 			strlcpy(vlr.vlr_parent, PARENT(ifv)->if_xname,
1748 			    sizeof(vlr.vlr_parent));
1749 			vlr.vlr_tag = ifv->ifv_vid;
1750 		}
1751 		VLAN_UNLOCK();
1752 		error = copyout(&vlr, ifr->ifr_data, sizeof(vlr));
1753 		break;
1754 
1755 	case SIOCSIFFLAGS:
1756 		/*
1757 		 * We should propagate selected flags to the parent,
1758 		 * e.g., promiscuous mode.
1759 		 */
1760 		if (TRUNK(ifv) != NULL)
1761 			error = vlan_setflags(ifp, 1);
1762 		break;
1763 
1764 	case SIOCADDMULTI:
1765 	case SIOCDELMULTI:
1766 		/*
1767 		 * If we don't have a parent, just remember the membership for
1768 		 * when we do.
1769 		 */
1770 		trunk = TRUNK(ifv);
1771 		if (trunk != NULL) {
1772 			TRUNK_LOCK(trunk);
1773 			error = vlan_setmulti(ifp);
1774 			TRUNK_UNLOCK(trunk);
1775 		}
1776 		break;
1777 
1778 	default:
1779 		error = EINVAL;
1780 		break;
1781 	}
1782 
1783 	return (error);
1784 }
1785