xref: /freebsd/sys/net/if_vlan.c (revision b1f92fa22938fe29ab7e53692ffe0ed7a0ecc4d0)
1 /*-
2  * Copyright 1998 Massachusetts Institute of Technology
3  *
4  * Permission to use, copy, modify, and distribute this software and
5  * its documentation for any purpose and without fee is hereby
6  * granted, provided that both the above copyright notice and this
7  * permission notice appear in all copies, that both the above
8  * copyright notice and this permission notice appear in all
9  * supporting documentation, and that the name of M.I.T. not be used
10  * in advertising or publicity pertaining to distribution of the
11  * software without specific, written prior permission.  M.I.T. makes
12  * no representations about the suitability of this software for any
13  * purpose.  It is provided "as is" without express or implied
14  * warranty.
15  *
16  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
17  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
18  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
20  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
26  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 /*
31  * if_vlan.c - pseudo-device driver for IEEE 802.1Q virtual LANs.
32  * Might be extended some day to also handle IEEE 802.1p priority
33  * tagging.  This is sort of sneaky in the implementation, since
34  * we need to pretend to be enough of an Ethernet implementation
35  * to make arp work.  The way we do this is by telling everyone
36  * that we are an Ethernet, and then catch the packets that
37  * ether_output() sends to us via if_transmit(), rewrite them for
38  * use by the real outgoing interface, and ask it to send them.
39  */
40 
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
43 
44 #include "opt_inet.h"
45 #include "opt_vlan.h"
46 
47 #include <sys/param.h>
48 #include <sys/eventhandler.h>
49 #include <sys/kernel.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/rmlock.h>
55 #include <sys/queue.h>
56 #include <sys/socket.h>
57 #include <sys/sockio.h>
58 #include <sys/sysctl.h>
59 #include <sys/systm.h>
60 #include <sys/sx.h>
61 
62 #include <net/bpf.h>
63 #include <net/ethernet.h>
64 #include <net/if.h>
65 #include <net/if_var.h>
66 #include <net/if_clone.h>
67 #include <net/if_dl.h>
68 #include <net/if_types.h>
69 #include <net/if_vlan_var.h>
70 #include <net/vnet.h>
71 
72 #ifdef INET
73 #include <netinet/in.h>
74 #include <netinet/if_ether.h>
75 #endif
76 
77 #define	VLAN_DEF_HWIDTH	4
78 #define	VLAN_IFFLAGS	(IFF_BROADCAST | IFF_MULTICAST)
79 
80 #define	UP_AND_RUNNING(ifp) \
81     ((ifp)->if_flags & IFF_UP && (ifp)->if_drv_flags & IFF_DRV_RUNNING)
82 
83 LIST_HEAD(ifvlanhead, ifvlan);
84 
85 struct ifvlantrunk {
86 	struct	ifnet   *parent;	/* parent interface of this trunk */
87 	struct	rmlock	lock;
88 #ifdef VLAN_ARRAY
89 #define	VLAN_ARRAY_SIZE	(EVL_VLID_MASK + 1)
90 	struct	ifvlan	*vlans[VLAN_ARRAY_SIZE]; /* static table */
91 #else
92 	struct	ifvlanhead *hash;	/* dynamic hash-list table */
93 	uint16_t	hmask;
94 	uint16_t	hwidth;
95 #endif
96 	int		refcnt;
97 };
98 
99 struct vlan_mc_entry {
100 	struct sockaddr_dl		mc_addr;
101 	SLIST_ENTRY(vlan_mc_entry)	mc_entries;
102 };
103 
104 struct	ifvlan {
105 	struct	ifvlantrunk *ifv_trunk;
106 	struct	ifnet *ifv_ifp;
107 #define	TRUNK(ifv)	((ifv)->ifv_trunk)
108 #define	PARENT(ifv)	((ifv)->ifv_trunk->parent)
109 	void	*ifv_cookie;
110 	int	ifv_pflags;	/* special flags we have set on parent */
111 	struct	ifv_linkmib {
112 		int	ifvm_encaplen;	/* encapsulation length */
113 		int	ifvm_mtufudge;	/* MTU fudged by this much */
114 		int	ifvm_mintu;	/* min transmission unit */
115 		uint16_t ifvm_proto;	/* encapsulation ethertype */
116 		uint16_t ifvm_tag;	/* tag to apply on packets leaving if */
117 	}	ifv_mib;
118 	SLIST_HEAD(, vlan_mc_entry) vlan_mc_listhead;
119 #ifndef VLAN_ARRAY
120 	LIST_ENTRY(ifvlan) ifv_list;
121 #endif
122 };
123 #define	ifv_proto	ifv_mib.ifvm_proto
124 #define	ifv_vid		ifv_mib.ifvm_tag
125 #define	ifv_encaplen	ifv_mib.ifvm_encaplen
126 #define	ifv_mtufudge	ifv_mib.ifvm_mtufudge
127 #define	ifv_mintu	ifv_mib.ifvm_mintu
128 
129 /* Special flags we should propagate to parent. */
130 static struct {
131 	int flag;
132 	int (*func)(struct ifnet *, int);
133 } vlan_pflags[] = {
134 	{IFF_PROMISC, ifpromisc},
135 	{IFF_ALLMULTI, if_allmulti},
136 	{0, NULL}
137 };
138 
139 SYSCTL_DECL(_net_link);
140 static SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW, 0,
141     "IEEE 802.1Q VLAN");
142 static SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW, 0,
143     "for consistency");
144 
145 static VNET_DEFINE(int, soft_pad);
146 #define	V_soft_pad	VNET(soft_pad)
147 SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW | CTLFLAG_VNET,
148     &VNET_NAME(soft_pad), 0, "pad short frames before tagging");
149 
150 static const char vlanname[] = "vlan";
151 static MALLOC_DEFINE(M_VLAN, vlanname, "802.1Q Virtual LAN Interface");
152 
153 static eventhandler_tag ifdetach_tag;
154 static eventhandler_tag iflladdr_tag;
155 
156 /*
157  * We have a global mutex, that is used to serialize configuration
158  * changes and isn't used in normal packet delivery.
159  *
160  * We also have a per-trunk rmlock(9), that is locked shared on packet
161  * processing and exclusive when configuration is changed.
162  *
163  * The VLAN_ARRAY substitutes the dynamic hash with a static array
164  * with 4096 entries. In theory this can give a boost in processing,
165  * however on practice it does not. Probably this is because array
166  * is too big to fit into CPU cache.
167  */
168 static struct sx ifv_lock;
169 #define	VLAN_LOCK_INIT()	sx_init(&ifv_lock, "vlan_global")
170 #define	VLAN_LOCK_DESTROY()	sx_destroy(&ifv_lock)
171 #define	VLAN_LOCK_ASSERT()	sx_assert(&ifv_lock, SA_LOCKED)
172 #define	VLAN_LOCK()		sx_xlock(&ifv_lock)
173 #define	VLAN_UNLOCK()		sx_xunlock(&ifv_lock)
174 #define	TRUNK_LOCK_INIT(trunk)	rm_init(&(trunk)->lock, vlanname)
175 #define	TRUNK_LOCK_DESTROY(trunk) rm_destroy(&(trunk)->lock)
176 #define	TRUNK_LOCK(trunk)	rm_wlock(&(trunk)->lock)
177 #define	TRUNK_UNLOCK(trunk)	rm_wunlock(&(trunk)->lock)
178 #define	TRUNK_LOCK_ASSERT(trunk) rm_assert(&(trunk)->lock, RA_WLOCKED)
179 #define	TRUNK_RLOCK(trunk)	rm_rlock(&(trunk)->lock, &tracker)
180 #define	TRUNK_RUNLOCK(trunk)	rm_runlock(&(trunk)->lock, &tracker)
181 #define	TRUNK_LOCK_RASSERT(trunk) rm_assert(&(trunk)->lock, RA_RLOCKED)
182 #define	TRUNK_LOCK_READER	struct rm_priotracker tracker
183 
184 #ifndef VLAN_ARRAY
185 static	void vlan_inithash(struct ifvlantrunk *trunk);
186 static	void vlan_freehash(struct ifvlantrunk *trunk);
187 static	int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv);
188 static	int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv);
189 static	void vlan_growhash(struct ifvlantrunk *trunk, int howmuch);
190 static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk,
191 	uint16_t vid);
192 #endif
193 static	void trunk_destroy(struct ifvlantrunk *trunk);
194 
195 static	void vlan_init(void *foo);
196 static	void vlan_input(struct ifnet *ifp, struct mbuf *m);
197 static	int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr);
198 static	void vlan_qflush(struct ifnet *ifp);
199 static	int vlan_setflag(struct ifnet *ifp, int flag, int status,
200     int (*func)(struct ifnet *, int));
201 static	int vlan_setflags(struct ifnet *ifp, int status);
202 static	int vlan_setmulti(struct ifnet *ifp);
203 static	int vlan_transmit(struct ifnet *ifp, struct mbuf *m);
204 static	void vlan_unconfig(struct ifnet *ifp);
205 static	void vlan_unconfig_locked(struct ifnet *ifp, int departing);
206 static	int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag);
207 static	void vlan_link_state(struct ifnet *ifp);
208 static	void vlan_capabilities(struct ifvlan *ifv);
209 static	void vlan_trunk_capabilities(struct ifnet *ifp);
210 
211 static	struct ifnet *vlan_clone_match_ethervid(const char *, int *);
212 static	int vlan_clone_match(struct if_clone *, const char *);
213 static	int vlan_clone_create(struct if_clone *, char *, size_t, caddr_t);
214 static	int vlan_clone_destroy(struct if_clone *, struct ifnet *);
215 
216 static	void vlan_ifdetach(void *arg, struct ifnet *ifp);
217 static  void vlan_iflladdr(void *arg, struct ifnet *ifp);
218 
219 static struct if_clone *vlan_cloner;
220 
221 #ifdef VIMAGE
222 static VNET_DEFINE(struct if_clone *, vlan_cloner);
223 #define	V_vlan_cloner	VNET(vlan_cloner)
224 #endif
225 
226 #ifndef VLAN_ARRAY
227 #define HASH(n, m)	((((n) >> 8) ^ ((n) >> 4) ^ (n)) & (m))
228 
229 static void
230 vlan_inithash(struct ifvlantrunk *trunk)
231 {
232 	int i, n;
233 
234 	/*
235 	 * The trunk must not be locked here since we call malloc(M_WAITOK).
236 	 * It is OK in case this function is called before the trunk struct
237 	 * gets hooked up and becomes visible from other threads.
238 	 */
239 
240 	KASSERT(trunk->hwidth == 0 && trunk->hash == NULL,
241 	    ("%s: hash already initialized", __func__));
242 
243 	trunk->hwidth = VLAN_DEF_HWIDTH;
244 	n = 1 << trunk->hwidth;
245 	trunk->hmask = n - 1;
246 	trunk->hash = malloc(sizeof(struct ifvlanhead) * n, M_VLAN, M_WAITOK);
247 	for (i = 0; i < n; i++)
248 		LIST_INIT(&trunk->hash[i]);
249 }
250 
251 static void
252 vlan_freehash(struct ifvlantrunk *trunk)
253 {
254 #ifdef INVARIANTS
255 	int i;
256 
257 	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
258 	for (i = 0; i < (1 << trunk->hwidth); i++)
259 		KASSERT(LIST_EMPTY(&trunk->hash[i]),
260 		    ("%s: hash table not empty", __func__));
261 #endif
262 	free(trunk->hash, M_VLAN);
263 	trunk->hash = NULL;
264 	trunk->hwidth = trunk->hmask = 0;
265 }
266 
267 static int
268 vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
269 {
270 	int i, b;
271 	struct ifvlan *ifv2;
272 
273 	TRUNK_LOCK_ASSERT(trunk);
274 	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
275 
276 	b = 1 << trunk->hwidth;
277 	i = HASH(ifv->ifv_vid, trunk->hmask);
278 	LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
279 		if (ifv->ifv_vid == ifv2->ifv_vid)
280 			return (EEXIST);
281 
282 	/*
283 	 * Grow the hash when the number of vlans exceeds half of the number of
284 	 * hash buckets squared. This will make the average linked-list length
285 	 * buckets/2.
286 	 */
287 	if (trunk->refcnt > (b * b) / 2) {
288 		vlan_growhash(trunk, 1);
289 		i = HASH(ifv->ifv_vid, trunk->hmask);
290 	}
291 	LIST_INSERT_HEAD(&trunk->hash[i], ifv, ifv_list);
292 	trunk->refcnt++;
293 
294 	return (0);
295 }
296 
297 static int
298 vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
299 {
300 	int i, b;
301 	struct ifvlan *ifv2;
302 
303 	TRUNK_LOCK_ASSERT(trunk);
304 	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
305 
306 	b = 1 << trunk->hwidth;
307 	i = HASH(ifv->ifv_vid, trunk->hmask);
308 	LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
309 		if (ifv2 == ifv) {
310 			trunk->refcnt--;
311 			LIST_REMOVE(ifv2, ifv_list);
312 			if (trunk->refcnt < (b * b) / 2)
313 				vlan_growhash(trunk, -1);
314 			return (0);
315 		}
316 
317 	panic("%s: vlan not found\n", __func__);
318 	return (ENOENT); /*NOTREACHED*/
319 }
320 
321 /*
322  * Grow the hash larger or smaller if memory permits.
323  */
324 static void
325 vlan_growhash(struct ifvlantrunk *trunk, int howmuch)
326 {
327 	struct ifvlan *ifv;
328 	struct ifvlanhead *hash2;
329 	int hwidth2, i, j, n, n2;
330 
331 	TRUNK_LOCK_ASSERT(trunk);
332 	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
333 
334 	if (howmuch == 0) {
335 		/* Harmless yet obvious coding error */
336 		printf("%s: howmuch is 0\n", __func__);
337 		return;
338 	}
339 
340 	hwidth2 = trunk->hwidth + howmuch;
341 	n = 1 << trunk->hwidth;
342 	n2 = 1 << hwidth2;
343 	/* Do not shrink the table below the default */
344 	if (hwidth2 < VLAN_DEF_HWIDTH)
345 		return;
346 
347 	/* M_NOWAIT because we're called with trunk mutex held */
348 	hash2 = malloc(sizeof(struct ifvlanhead) * n2, M_VLAN, M_NOWAIT);
349 	if (hash2 == NULL) {
350 		printf("%s: out of memory -- hash size not changed\n",
351 		    __func__);
352 		return;		/* We can live with the old hash table */
353 	}
354 	for (j = 0; j < n2; j++)
355 		LIST_INIT(&hash2[j]);
356 	for (i = 0; i < n; i++)
357 		while ((ifv = LIST_FIRST(&trunk->hash[i])) != NULL) {
358 			LIST_REMOVE(ifv, ifv_list);
359 			j = HASH(ifv->ifv_vid, n2 - 1);
360 			LIST_INSERT_HEAD(&hash2[j], ifv, ifv_list);
361 		}
362 	free(trunk->hash, M_VLAN);
363 	trunk->hash = hash2;
364 	trunk->hwidth = hwidth2;
365 	trunk->hmask = n2 - 1;
366 
367 	if (bootverbose)
368 		if_printf(trunk->parent,
369 		    "VLAN hash table resized from %d to %d buckets\n", n, n2);
370 }
371 
372 static __inline struct ifvlan *
373 vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid)
374 {
375 	struct ifvlan *ifv;
376 
377 	TRUNK_LOCK_RASSERT(trunk);
378 
379 	LIST_FOREACH(ifv, &trunk->hash[HASH(vid, trunk->hmask)], ifv_list)
380 		if (ifv->ifv_vid == vid)
381 			return (ifv);
382 	return (NULL);
383 }
384 
385 #if 0
386 /* Debugging code to view the hashtables. */
387 static void
388 vlan_dumphash(struct ifvlantrunk *trunk)
389 {
390 	int i;
391 	struct ifvlan *ifv;
392 
393 	for (i = 0; i < (1 << trunk->hwidth); i++) {
394 		printf("%d: ", i);
395 		LIST_FOREACH(ifv, &trunk->hash[i], ifv_list)
396 			printf("%s ", ifv->ifv_ifp->if_xname);
397 		printf("\n");
398 	}
399 }
400 #endif /* 0 */
401 #else
402 
403 static __inline struct ifvlan *
404 vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid)
405 {
406 
407 	return trunk->vlans[vid];
408 }
409 
410 static __inline int
411 vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
412 {
413 
414 	if (trunk->vlans[ifv->ifv_vid] != NULL)
415 		return EEXIST;
416 	trunk->vlans[ifv->ifv_vid] = ifv;
417 	trunk->refcnt++;
418 
419 	return (0);
420 }
421 
422 static __inline int
423 vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
424 {
425 
426 	trunk->vlans[ifv->ifv_vid] = NULL;
427 	trunk->refcnt--;
428 
429 	return (0);
430 }
431 
432 static __inline void
433 vlan_freehash(struct ifvlantrunk *trunk)
434 {
435 }
436 
437 static __inline void
438 vlan_inithash(struct ifvlantrunk *trunk)
439 {
440 }
441 
442 #endif /* !VLAN_ARRAY */
443 
444 static void
445 trunk_destroy(struct ifvlantrunk *trunk)
446 {
447 	VLAN_LOCK_ASSERT();
448 
449 	TRUNK_LOCK(trunk);
450 	vlan_freehash(trunk);
451 	trunk->parent->if_vlantrunk = NULL;
452 	TRUNK_UNLOCK(trunk);
453 	TRUNK_LOCK_DESTROY(trunk);
454 	free(trunk, M_VLAN);
455 }
456 
457 /*
458  * Program our multicast filter. What we're actually doing is
459  * programming the multicast filter of the parent. This has the
460  * side effect of causing the parent interface to receive multicast
461  * traffic that it doesn't really want, which ends up being discarded
462  * later by the upper protocol layers. Unfortunately, there's no way
463  * to avoid this: there really is only one physical interface.
464  */
465 static int
466 vlan_setmulti(struct ifnet *ifp)
467 {
468 	struct ifnet		*ifp_p;
469 	struct ifmultiaddr	*ifma;
470 	struct ifvlan		*sc;
471 	struct vlan_mc_entry	*mc;
472 	int			error;
473 
474 	/* Find the parent. */
475 	sc = ifp->if_softc;
476 	TRUNK_LOCK_ASSERT(TRUNK(sc));
477 	ifp_p = PARENT(sc);
478 
479 	CURVNET_SET_QUIET(ifp_p->if_vnet);
480 
481 	/* First, remove any existing filter entries. */
482 	while ((mc = SLIST_FIRST(&sc->vlan_mc_listhead)) != NULL) {
483 		SLIST_REMOVE_HEAD(&sc->vlan_mc_listhead, mc_entries);
484 		(void)if_delmulti(ifp_p, (struct sockaddr *)&mc->mc_addr);
485 		free(mc, M_VLAN);
486 	}
487 
488 	/* Now program new ones. */
489 	IF_ADDR_WLOCK(ifp);
490 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
491 		if (ifma->ifma_addr->sa_family != AF_LINK)
492 			continue;
493 		mc = malloc(sizeof(struct vlan_mc_entry), M_VLAN, M_NOWAIT);
494 		if (mc == NULL) {
495 			IF_ADDR_WUNLOCK(ifp);
496 			return (ENOMEM);
497 		}
498 		bcopy(ifma->ifma_addr, &mc->mc_addr, ifma->ifma_addr->sa_len);
499 		mc->mc_addr.sdl_index = ifp_p->if_index;
500 		SLIST_INSERT_HEAD(&sc->vlan_mc_listhead, mc, mc_entries);
501 	}
502 	IF_ADDR_WUNLOCK(ifp);
503 	SLIST_FOREACH (mc, &sc->vlan_mc_listhead, mc_entries) {
504 		error = if_addmulti(ifp_p, (struct sockaddr *)&mc->mc_addr,
505 		    NULL);
506 		if (error)
507 			return (error);
508 	}
509 
510 	CURVNET_RESTORE();
511 	return (0);
512 }
513 
514 /*
515  * A handler for parent interface link layer address changes.
516  * If the parent interface link layer address is changed we
517  * should also change it on all children vlans.
518  */
519 static void
520 vlan_iflladdr(void *arg __unused, struct ifnet *ifp)
521 {
522 	struct ifvlan *ifv;
523 #ifndef VLAN_ARRAY
524 	struct ifvlan *next;
525 #endif
526 	int i;
527 
528 	/*
529 	 * Check if it's a trunk interface first of all
530 	 * to avoid needless locking.
531 	 */
532 	if (ifp->if_vlantrunk == NULL)
533 		return;
534 
535 	VLAN_LOCK();
536 	/*
537 	 * OK, it's a trunk.  Loop over and change all vlan's lladdrs on it.
538 	 */
539 #ifdef VLAN_ARRAY
540 	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
541 		if ((ifv = ifp->if_vlantrunk->vlans[i])) {
542 #else /* VLAN_ARRAY */
543 	for (i = 0; i < (1 << ifp->if_vlantrunk->hwidth); i++)
544 		LIST_FOREACH_SAFE(ifv, &ifp->if_vlantrunk->hash[i], ifv_list, next) {
545 #endif /* VLAN_ARRAY */
546 			VLAN_UNLOCK();
547 			if_setlladdr(ifv->ifv_ifp, IF_LLADDR(ifp),
548 			    ifp->if_addrlen);
549 			VLAN_LOCK();
550 		}
551 	VLAN_UNLOCK();
552 
553 }
554 
555 /*
556  * A handler for network interface departure events.
557  * Track departure of trunks here so that we don't access invalid
558  * pointers or whatever if a trunk is ripped from under us, e.g.,
559  * by ejecting its hot-plug card.  However, if an ifnet is simply
560  * being renamed, then there's no need to tear down the state.
561  */
562 static void
563 vlan_ifdetach(void *arg __unused, struct ifnet *ifp)
564 {
565 	struct ifvlan *ifv;
566 	int i;
567 
568 	/*
569 	 * Check if it's a trunk interface first of all
570 	 * to avoid needless locking.
571 	 */
572 	if (ifp->if_vlantrunk == NULL)
573 		return;
574 
575 	/* If the ifnet is just being renamed, don't do anything. */
576 	if (ifp->if_flags & IFF_RENAMING)
577 		return;
578 
579 	VLAN_LOCK();
580 	/*
581 	 * OK, it's a trunk.  Loop over and detach all vlan's on it.
582 	 * Check trunk pointer after each vlan_unconfig() as it will
583 	 * free it and set to NULL after the last vlan was detached.
584 	 */
585 #ifdef VLAN_ARRAY
586 	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
587 		if ((ifv = ifp->if_vlantrunk->vlans[i])) {
588 			vlan_unconfig_locked(ifv->ifv_ifp, 1);
589 			if (ifp->if_vlantrunk == NULL)
590 				break;
591 		}
592 #else /* VLAN_ARRAY */
593 restart:
594 	for (i = 0; i < (1 << ifp->if_vlantrunk->hwidth); i++)
595 		if ((ifv = LIST_FIRST(&ifp->if_vlantrunk->hash[i]))) {
596 			vlan_unconfig_locked(ifv->ifv_ifp, 1);
597 			if (ifp->if_vlantrunk)
598 				goto restart;	/* trunk->hwidth can change */
599 			else
600 				break;
601 		}
602 #endif /* VLAN_ARRAY */
603 	/* Trunk should have been destroyed in vlan_unconfig(). */
604 	KASSERT(ifp->if_vlantrunk == NULL, ("%s: purge failed", __func__));
605 	VLAN_UNLOCK();
606 }
607 
608 /*
609  * Return the trunk device for a virtual interface.
610  */
611 static struct ifnet  *
612 vlan_trunkdev(struct ifnet *ifp)
613 {
614 	struct ifvlan *ifv;
615 
616 	if (ifp->if_type != IFT_L2VLAN)
617 		return (NULL);
618 	ifv = ifp->if_softc;
619 	ifp = NULL;
620 	VLAN_LOCK();
621 	if (ifv->ifv_trunk)
622 		ifp = PARENT(ifv);
623 	VLAN_UNLOCK();
624 	return (ifp);
625 }
626 
627 /*
628  * Return the 12-bit VLAN VID for this interface, for use by external
629  * components such as Infiniband.
630  *
631  * XXXRW: Note that the function name here is historical; it should be named
632  * vlan_vid().
633  */
634 static int
635 vlan_tag(struct ifnet *ifp, uint16_t *vidp)
636 {
637 	struct ifvlan *ifv;
638 
639 	if (ifp->if_type != IFT_L2VLAN)
640 		return (EINVAL);
641 	ifv = ifp->if_softc;
642 	*vidp = ifv->ifv_vid;
643 	return (0);
644 }
645 
646 /*
647  * Return a driver specific cookie for this interface.  Synchronization
648  * with setcookie must be provided by the driver.
649  */
650 static void *
651 vlan_cookie(struct ifnet *ifp)
652 {
653 	struct ifvlan *ifv;
654 
655 	if (ifp->if_type != IFT_L2VLAN)
656 		return (NULL);
657 	ifv = ifp->if_softc;
658 	return (ifv->ifv_cookie);
659 }
660 
661 /*
662  * Store a cookie in our softc that drivers can use to store driver
663  * private per-instance data in.
664  */
665 static int
666 vlan_setcookie(struct ifnet *ifp, void *cookie)
667 {
668 	struct ifvlan *ifv;
669 
670 	if (ifp->if_type != IFT_L2VLAN)
671 		return (EINVAL);
672 	ifv = ifp->if_softc;
673 	ifv->ifv_cookie = cookie;
674 	return (0);
675 }
676 
677 /*
678  * Return the vlan device present at the specific VID.
679  */
680 static struct ifnet *
681 vlan_devat(struct ifnet *ifp, uint16_t vid)
682 {
683 	struct ifvlantrunk *trunk;
684 	struct ifvlan *ifv;
685 	TRUNK_LOCK_READER;
686 
687 	trunk = ifp->if_vlantrunk;
688 	if (trunk == NULL)
689 		return (NULL);
690 	ifp = NULL;
691 	TRUNK_RLOCK(trunk);
692 	ifv = vlan_gethash(trunk, vid);
693 	if (ifv)
694 		ifp = ifv->ifv_ifp;
695 	TRUNK_RUNLOCK(trunk);
696 	return (ifp);
697 }
698 
699 /*
700  * VLAN support can be loaded as a module.  The only place in the
701  * system that's intimately aware of this is ether_input.  We hook
702  * into this code through vlan_input_p which is defined there and
703  * set here.  Noone else in the system should be aware of this so
704  * we use an explicit reference here.
705  */
706 extern	void (*vlan_input_p)(struct ifnet *, struct mbuf *);
707 
708 /* For if_link_state_change() eyes only... */
709 extern	void (*vlan_link_state_p)(struct ifnet *);
710 
711 static int
712 vlan_modevent(module_t mod, int type, void *data)
713 {
714 
715 	switch (type) {
716 	case MOD_LOAD:
717 		ifdetach_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
718 		    vlan_ifdetach, NULL, EVENTHANDLER_PRI_ANY);
719 		if (ifdetach_tag == NULL)
720 			return (ENOMEM);
721 		iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event,
722 		    vlan_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
723 		if (iflladdr_tag == NULL)
724 			return (ENOMEM);
725 		VLAN_LOCK_INIT();
726 		vlan_input_p = vlan_input;
727 		vlan_link_state_p = vlan_link_state;
728 		vlan_trunk_cap_p = vlan_trunk_capabilities;
729 		vlan_trunkdev_p = vlan_trunkdev;
730 		vlan_cookie_p = vlan_cookie;
731 		vlan_setcookie_p = vlan_setcookie;
732 		vlan_tag_p = vlan_tag;
733 		vlan_devat_p = vlan_devat;
734 #ifndef VIMAGE
735 		vlan_cloner = if_clone_advanced(vlanname, 0, vlan_clone_match,
736 		    vlan_clone_create, vlan_clone_destroy);
737 #endif
738 		if (bootverbose)
739 			printf("vlan: initialized, using "
740 #ifdef VLAN_ARRAY
741 			       "full-size arrays"
742 #else
743 			       "hash tables with chaining"
744 #endif
745 
746 			       "\n");
747 		break;
748 	case MOD_UNLOAD:
749 #ifndef VIMAGE
750 		if_clone_detach(vlan_cloner);
751 #endif
752 		EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_tag);
753 		EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_tag);
754 		vlan_input_p = NULL;
755 		vlan_link_state_p = NULL;
756 		vlan_trunk_cap_p = NULL;
757 		vlan_trunkdev_p = NULL;
758 		vlan_tag_p = NULL;
759 		vlan_cookie_p = NULL;
760 		vlan_setcookie_p = NULL;
761 		vlan_devat_p = NULL;
762 		VLAN_LOCK_DESTROY();
763 		if (bootverbose)
764 			printf("vlan: unloaded\n");
765 		break;
766 	default:
767 		return (EOPNOTSUPP);
768 	}
769 	return (0);
770 }
771 
772 static moduledata_t vlan_mod = {
773 	"if_vlan",
774 	vlan_modevent,
775 	0
776 };
777 
778 DECLARE_MODULE(if_vlan, vlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
779 MODULE_VERSION(if_vlan, 3);
780 
781 #ifdef VIMAGE
782 static void
783 vnet_vlan_init(const void *unused __unused)
784 {
785 
786 	vlan_cloner = if_clone_advanced(vlanname, 0, vlan_clone_match,
787 		    vlan_clone_create, vlan_clone_destroy);
788 	V_vlan_cloner = vlan_cloner;
789 }
790 VNET_SYSINIT(vnet_vlan_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
791     vnet_vlan_init, NULL);
792 
793 static void
794 vnet_vlan_uninit(const void *unused __unused)
795 {
796 
797 	if_clone_detach(V_vlan_cloner);
798 }
799 VNET_SYSUNINIT(vnet_vlan_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
800     vnet_vlan_uninit, NULL);
801 #endif
802 
803 /*
804  * Check for <etherif>.<vlan> style interface names.
805  */
806 static struct ifnet *
807 vlan_clone_match_ethervid(const char *name, int *vidp)
808 {
809 	char ifname[IFNAMSIZ];
810 	char *cp;
811 	struct ifnet *ifp;
812 	int vid;
813 
814 	strlcpy(ifname, name, IFNAMSIZ);
815 	if ((cp = strchr(ifname, '.')) == NULL)
816 		return (NULL);
817 	*cp = '\0';
818 	if ((ifp = ifunit(ifname)) == NULL)
819 		return (NULL);
820 	/* Parse VID. */
821 	if (*++cp == '\0')
822 		return (NULL);
823 	vid = 0;
824 	for(; *cp >= '0' && *cp <= '9'; cp++)
825 		vid = (vid * 10) + (*cp - '0');
826 	if (*cp != '\0')
827 		return (NULL);
828 	if (vidp != NULL)
829 		*vidp = vid;
830 
831 	return (ifp);
832 }
833 
834 static int
835 vlan_clone_match(struct if_clone *ifc, const char *name)
836 {
837 	const char *cp;
838 
839 	if (vlan_clone_match_ethervid(name, NULL) != NULL)
840 		return (1);
841 
842 	if (strncmp(vlanname, name, strlen(vlanname)) != 0)
843 		return (0);
844 	for (cp = name + 4; *cp != '\0'; cp++) {
845 		if (*cp < '0' || *cp > '9')
846 			return (0);
847 	}
848 
849 	return (1);
850 }
851 
852 static int
853 vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
854 {
855 	char *dp;
856 	int wildcard;
857 	int unit;
858 	int error;
859 	int vid;
860 	int ethertag;
861 	struct ifvlan *ifv;
862 	struct ifnet *ifp;
863 	struct ifnet *p;
864 	struct ifaddr *ifa;
865 	struct sockaddr_dl *sdl;
866 	struct vlanreq vlr;
867 	static const u_char eaddr[ETHER_ADDR_LEN];	/* 00:00:00:00:00:00 */
868 
869 	/*
870 	 * There are 3 (ugh) ways to specify the cloned device:
871 	 * o pass a parameter block with the clone request.
872 	 * o specify parameters in the text of the clone device name
873 	 * o specify no parameters and get an unattached device that
874 	 *   must be configured separately.
875 	 * The first technique is preferred; the latter two are
876 	 * supported for backwards compatibilty.
877 	 *
878 	 * XXXRW: Note historic use of the word "tag" here.  New ioctls may be
879 	 * called for.
880 	 */
881 	if (params) {
882 		error = copyin(params, &vlr, sizeof(vlr));
883 		if (error)
884 			return error;
885 		p = ifunit(vlr.vlr_parent);
886 		if (p == NULL)
887 			return (ENXIO);
888 		error = ifc_name2unit(name, &unit);
889 		if (error != 0)
890 			return (error);
891 
892 		ethertag = 1;
893 		vid = vlr.vlr_tag;
894 		wildcard = (unit < 0);
895 	} else if ((p = vlan_clone_match_ethervid(name, &vid)) != NULL) {
896 		ethertag = 1;
897 		unit = -1;
898 		wildcard = 0;
899 	} else {
900 		ethertag = 0;
901 
902 		error = ifc_name2unit(name, &unit);
903 		if (error != 0)
904 			return (error);
905 
906 		wildcard = (unit < 0);
907 	}
908 
909 	error = ifc_alloc_unit(ifc, &unit);
910 	if (error != 0)
911 		return (error);
912 
913 	/* In the wildcard case, we need to update the name. */
914 	if (wildcard) {
915 		for (dp = name; *dp != '\0'; dp++);
916 		if (snprintf(dp, len - (dp-name), "%d", unit) >
917 		    len - (dp-name) - 1) {
918 			panic("%s: interface name too long", __func__);
919 		}
920 	}
921 
922 	ifv = malloc(sizeof(struct ifvlan), M_VLAN, M_WAITOK | M_ZERO);
923 	ifp = ifv->ifv_ifp = if_alloc(IFT_ETHER);
924 	if (ifp == NULL) {
925 		ifc_free_unit(ifc, unit);
926 		free(ifv, M_VLAN);
927 		return (ENOSPC);
928 	}
929 	SLIST_INIT(&ifv->vlan_mc_listhead);
930 	ifp->if_softc = ifv;
931 	/*
932 	 * Set the name manually rather than using if_initname because
933 	 * we don't conform to the default naming convention for interfaces.
934 	 */
935 	strlcpy(ifp->if_xname, name, IFNAMSIZ);
936 	ifp->if_dname = vlanname;
937 	ifp->if_dunit = unit;
938 	/* NB: flags are not set here */
939 	ifp->if_linkmib = &ifv->ifv_mib;
940 	ifp->if_linkmiblen = sizeof(ifv->ifv_mib);
941 	/* NB: mtu is not set here */
942 
943 	ifp->if_init = vlan_init;
944 	ifp->if_transmit = vlan_transmit;
945 	ifp->if_qflush = vlan_qflush;
946 	ifp->if_ioctl = vlan_ioctl;
947 	ifp->if_flags = VLAN_IFFLAGS;
948 	ether_ifattach(ifp, eaddr);
949 	/* Now undo some of the damage... */
950 	ifp->if_baudrate = 0;
951 	ifp->if_type = IFT_L2VLAN;
952 	ifp->if_hdrlen = ETHER_VLAN_ENCAP_LEN;
953 	ifa = ifp->if_addr;
954 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
955 	sdl->sdl_type = IFT_L2VLAN;
956 
957 	if (ethertag) {
958 		error = vlan_config(ifv, p, vid);
959 		if (error != 0) {
960 			/*
961 			 * Since we've partially failed, we need to back
962 			 * out all the way, otherwise userland could get
963 			 * confused.  Thus, we destroy the interface.
964 			 */
965 			ether_ifdetach(ifp);
966 			vlan_unconfig(ifp);
967 			if_free(ifp);
968 			ifc_free_unit(ifc, unit);
969 			free(ifv, M_VLAN);
970 
971 			return (error);
972 		}
973 
974 		/* Update flags on the parent, if necessary. */
975 		vlan_setflags(ifp, 1);
976 	}
977 
978 	return (0);
979 }
980 
981 static int
982 vlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
983 {
984 	struct ifvlan *ifv = ifp->if_softc;
985 	int unit = ifp->if_dunit;
986 
987 	ether_ifdetach(ifp);	/* first, remove it from system-wide lists */
988 	vlan_unconfig(ifp);	/* now it can be unconfigured and freed */
989 	if_free(ifp);
990 	free(ifv, M_VLAN);
991 	ifc_free_unit(ifc, unit);
992 
993 	return (0);
994 }
995 
996 /*
997  * The ifp->if_init entry point for vlan(4) is a no-op.
998  */
999 static void
1000 vlan_init(void *foo __unused)
1001 {
1002 }
1003 
1004 /*
1005  * The if_transmit method for vlan(4) interface.
1006  */
1007 static int
1008 vlan_transmit(struct ifnet *ifp, struct mbuf *m)
1009 {
1010 	struct ifvlan *ifv;
1011 	struct ifnet *p;
1012 	int error, len, mcast;
1013 
1014 	ifv = ifp->if_softc;
1015 	p = PARENT(ifv);
1016 	len = m->m_pkthdr.len;
1017 	mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
1018 
1019 	BPF_MTAP(ifp, m);
1020 
1021 	/*
1022 	 * Do not run parent's if_transmit() if the parent is not up,
1023 	 * or parent's driver will cause a system crash.
1024 	 */
1025 	if (!UP_AND_RUNNING(p)) {
1026 		m_freem(m);
1027 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
1028 		return (ENETDOWN);
1029 	}
1030 
1031 	/*
1032 	 * Pad the frame to the minimum size allowed if told to.
1033 	 * This option is in accord with IEEE Std 802.1Q, 2003 Ed.,
1034 	 * paragraph C.4.4.3.b.  It can help to work around buggy
1035 	 * bridges that violate paragraph C.4.4.3.a from the same
1036 	 * document, i.e., fail to pad short frames after untagging.
1037 	 * E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but
1038 	 * untagging it will produce a 62-byte frame, which is a runt
1039 	 * and requires padding.  There are VLAN-enabled network
1040 	 * devices that just discard such runts instead or mishandle
1041 	 * them somehow.
1042 	 */
1043 	if (V_soft_pad && p->if_type == IFT_ETHER) {
1044 		static char pad[8];	/* just zeros */
1045 		int n;
1046 
1047 		for (n = ETHERMIN + ETHER_HDR_LEN - m->m_pkthdr.len;
1048 		     n > 0; n -= sizeof(pad))
1049 			if (!m_append(m, min(n, sizeof(pad)), pad))
1050 				break;
1051 
1052 		if (n > 0) {
1053 			if_printf(ifp, "cannot pad short frame\n");
1054 			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
1055 			m_freem(m);
1056 			return (0);
1057 		}
1058 	}
1059 
1060 	/*
1061 	 * If underlying interface can do VLAN tag insertion itself,
1062 	 * just pass the packet along. However, we need some way to
1063 	 * tell the interface where the packet came from so that it
1064 	 * knows how to find the VLAN tag to use, so we attach a
1065 	 * packet tag that holds it.
1066 	 */
1067 	if (p->if_capenable & IFCAP_VLAN_HWTAGGING) {
1068 		m->m_pkthdr.ether_vtag = ifv->ifv_vid;
1069 		m->m_flags |= M_VLANTAG;
1070 	} else {
1071 		m = ether_vlanencap(m, ifv->ifv_vid);
1072 		if (m == NULL) {
1073 			if_printf(ifp, "unable to prepend VLAN header\n");
1074 			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
1075 			return (0);
1076 		}
1077 	}
1078 
1079 	/*
1080 	 * Send it, precisely as ether_output() would have.
1081 	 */
1082 	error = (p->if_transmit)(p, m);
1083 	if (error == 0) {
1084 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
1085 		if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
1086 		if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast);
1087 	} else
1088 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
1089 	return (error);
1090 }
1091 
1092 /*
1093  * The ifp->if_qflush entry point for vlan(4) is a no-op.
1094  */
1095 static void
1096 vlan_qflush(struct ifnet *ifp __unused)
1097 {
1098 }
1099 
1100 static void
1101 vlan_input(struct ifnet *ifp, struct mbuf *m)
1102 {
1103 	struct ifvlantrunk *trunk = ifp->if_vlantrunk;
1104 	struct ifvlan *ifv;
1105 	TRUNK_LOCK_READER;
1106 	uint16_t vid;
1107 
1108 	KASSERT(trunk != NULL, ("%s: no trunk", __func__));
1109 
1110 	if (m->m_flags & M_VLANTAG) {
1111 		/*
1112 		 * Packet is tagged, but m contains a normal
1113 		 * Ethernet frame; the tag is stored out-of-band.
1114 		 */
1115 		vid = EVL_VLANOFTAG(m->m_pkthdr.ether_vtag);
1116 		m->m_flags &= ~M_VLANTAG;
1117 	} else {
1118 		struct ether_vlan_header *evl;
1119 
1120 		/*
1121 		 * Packet is tagged in-band as specified by 802.1q.
1122 		 */
1123 		switch (ifp->if_type) {
1124 		case IFT_ETHER:
1125 			if (m->m_len < sizeof(*evl) &&
1126 			    (m = m_pullup(m, sizeof(*evl))) == NULL) {
1127 				if_printf(ifp, "cannot pullup VLAN header\n");
1128 				return;
1129 			}
1130 			evl = mtod(m, struct ether_vlan_header *);
1131 			vid = EVL_VLANOFTAG(ntohs(evl->evl_tag));
1132 
1133 			/*
1134 			 * Remove the 802.1q header by copying the Ethernet
1135 			 * addresses over it and adjusting the beginning of
1136 			 * the data in the mbuf.  The encapsulated Ethernet
1137 			 * type field is already in place.
1138 			 */
1139 			bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
1140 			      ETHER_HDR_LEN - ETHER_TYPE_LEN);
1141 			m_adj(m, ETHER_VLAN_ENCAP_LEN);
1142 			break;
1143 
1144 		default:
1145 #ifdef INVARIANTS
1146 			panic("%s: %s has unsupported if_type %u",
1147 			      __func__, ifp->if_xname, ifp->if_type);
1148 #endif
1149 			m_freem(m);
1150 			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
1151 			return;
1152 		}
1153 	}
1154 
1155 	TRUNK_RLOCK(trunk);
1156 	ifv = vlan_gethash(trunk, vid);
1157 	if (ifv == NULL || !UP_AND_RUNNING(ifv->ifv_ifp)) {
1158 		TRUNK_RUNLOCK(trunk);
1159 		m_freem(m);
1160 		if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
1161 		return;
1162 	}
1163 	TRUNK_RUNLOCK(trunk);
1164 
1165 	m->m_pkthdr.rcvif = ifv->ifv_ifp;
1166 	if_inc_counter(ifv->ifv_ifp, IFCOUNTER_IPACKETS, 1);
1167 
1168 	/* Pass it back through the parent's input routine. */
1169 	(*ifp->if_input)(ifv->ifv_ifp, m);
1170 }
1171 
1172 static int
1173 vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid)
1174 {
1175 	struct ifvlantrunk *trunk;
1176 	struct ifnet *ifp;
1177 	int error = 0;
1178 
1179 	/*
1180 	 * We can handle non-ethernet hardware types as long as
1181 	 * they handle the tagging and headers themselves.
1182 	 */
1183 	if (p->if_type != IFT_ETHER &&
1184 	    (p->if_capenable & IFCAP_VLAN_HWTAGGING) == 0)
1185 		return (EPROTONOSUPPORT);
1186 	if ((p->if_flags & VLAN_IFFLAGS) != VLAN_IFFLAGS)
1187 		return (EPROTONOSUPPORT);
1188 	/*
1189 	 * Don't let the caller set up a VLAN VID with
1190 	 * anything except VLID bits.
1191 	 * VID numbers 0x0 and 0xFFF are reserved.
1192 	 */
1193 	if (vid == 0 || vid == 0xFFF || (vid & ~EVL_VLID_MASK))
1194 		return (EINVAL);
1195 	if (ifv->ifv_trunk)
1196 		return (EBUSY);
1197 
1198 	if (p->if_vlantrunk == NULL) {
1199 		trunk = malloc(sizeof(struct ifvlantrunk),
1200 		    M_VLAN, M_WAITOK | M_ZERO);
1201 		vlan_inithash(trunk);
1202 		VLAN_LOCK();
1203 		if (p->if_vlantrunk != NULL) {
1204 			/* A race that that is very unlikely to be hit. */
1205 			vlan_freehash(trunk);
1206 			free(trunk, M_VLAN);
1207 			goto exists;
1208 		}
1209 		TRUNK_LOCK_INIT(trunk);
1210 		TRUNK_LOCK(trunk);
1211 		p->if_vlantrunk = trunk;
1212 		trunk->parent = p;
1213 	} else {
1214 		VLAN_LOCK();
1215 exists:
1216 		trunk = p->if_vlantrunk;
1217 		TRUNK_LOCK(trunk);
1218 	}
1219 
1220 	ifv->ifv_vid = vid;	/* must set this before vlan_inshash() */
1221 	error = vlan_inshash(trunk, ifv);
1222 	if (error)
1223 		goto done;
1224 	ifv->ifv_proto = ETHERTYPE_VLAN;
1225 	ifv->ifv_encaplen = ETHER_VLAN_ENCAP_LEN;
1226 	ifv->ifv_mintu = ETHERMIN;
1227 	ifv->ifv_pflags = 0;
1228 
1229 	/*
1230 	 * If the parent supports the VLAN_MTU capability,
1231 	 * i.e. can Tx/Rx larger than ETHER_MAX_LEN frames,
1232 	 * use it.
1233 	 */
1234 	if (p->if_capenable & IFCAP_VLAN_MTU) {
1235 		/*
1236 		 * No need to fudge the MTU since the parent can
1237 		 * handle extended frames.
1238 		 */
1239 		ifv->ifv_mtufudge = 0;
1240 	} else {
1241 		/*
1242 		 * Fudge the MTU by the encapsulation size.  This
1243 		 * makes us incompatible with strictly compliant
1244 		 * 802.1Q implementations, but allows us to use
1245 		 * the feature with other NetBSD implementations,
1246 		 * which might still be useful.
1247 		 */
1248 		ifv->ifv_mtufudge = ifv->ifv_encaplen;
1249 	}
1250 
1251 	ifv->ifv_trunk = trunk;
1252 	ifp = ifv->ifv_ifp;
1253 	/*
1254 	 * Initialize fields from our parent.  This duplicates some
1255 	 * work with ether_ifattach() but allows for non-ethernet
1256 	 * interfaces to also work.
1257 	 */
1258 	ifp->if_mtu = p->if_mtu - ifv->ifv_mtufudge;
1259 	ifp->if_baudrate = p->if_baudrate;
1260 	ifp->if_output = p->if_output;
1261 	ifp->if_input = p->if_input;
1262 	ifp->if_resolvemulti = p->if_resolvemulti;
1263 	ifp->if_addrlen = p->if_addrlen;
1264 	ifp->if_broadcastaddr = p->if_broadcastaddr;
1265 
1266 	/*
1267 	 * Copy only a selected subset of flags from the parent.
1268 	 * Other flags are none of our business.
1269 	 */
1270 #define VLAN_COPY_FLAGS (IFF_SIMPLEX)
1271 	ifp->if_flags &= ~VLAN_COPY_FLAGS;
1272 	ifp->if_flags |= p->if_flags & VLAN_COPY_FLAGS;
1273 #undef VLAN_COPY_FLAGS
1274 
1275 	ifp->if_link_state = p->if_link_state;
1276 
1277 	vlan_capabilities(ifv);
1278 
1279 	/*
1280 	 * Set up our interface address to reflect the underlying
1281 	 * physical interface's.
1282 	 */
1283 	bcopy(IF_LLADDR(p), IF_LLADDR(ifp), p->if_addrlen);
1284 	((struct sockaddr_dl *)ifp->if_addr->ifa_addr)->sdl_alen =
1285 	    p->if_addrlen;
1286 
1287 	/*
1288 	 * Configure multicast addresses that may already be
1289 	 * joined on the vlan device.
1290 	 */
1291 	(void)vlan_setmulti(ifp); /* XXX: VLAN lock held */
1292 
1293 	/* We are ready for operation now. */
1294 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1295 done:
1296 	TRUNK_UNLOCK(trunk);
1297 	if (error == 0)
1298 		EVENTHANDLER_INVOKE(vlan_config, p, ifv->ifv_vid);
1299 	VLAN_UNLOCK();
1300 
1301 	return (error);
1302 }
1303 
1304 static void
1305 vlan_unconfig(struct ifnet *ifp)
1306 {
1307 
1308 	VLAN_LOCK();
1309 	vlan_unconfig_locked(ifp, 0);
1310 	VLAN_UNLOCK();
1311 }
1312 
1313 static void
1314 vlan_unconfig_locked(struct ifnet *ifp, int departing)
1315 {
1316 	struct ifvlantrunk *trunk;
1317 	struct vlan_mc_entry *mc;
1318 	struct ifvlan *ifv;
1319 	struct ifnet  *parent;
1320 	int error;
1321 
1322 	VLAN_LOCK_ASSERT();
1323 
1324 	ifv = ifp->if_softc;
1325 	trunk = ifv->ifv_trunk;
1326 	parent = NULL;
1327 
1328 	if (trunk != NULL) {
1329 
1330 		TRUNK_LOCK(trunk);
1331 		parent = trunk->parent;
1332 
1333 		/*
1334 		 * Since the interface is being unconfigured, we need to
1335 		 * empty the list of multicast groups that we may have joined
1336 		 * while we were alive from the parent's list.
1337 		 */
1338 		while ((mc = SLIST_FIRST(&ifv->vlan_mc_listhead)) != NULL) {
1339 			/*
1340 			 * If the parent interface is being detached,
1341 			 * all its multicast addresses have already
1342 			 * been removed.  Warn about errors if
1343 			 * if_delmulti() does fail, but don't abort as
1344 			 * all callers expect vlan destruction to
1345 			 * succeed.
1346 			 */
1347 			if (!departing) {
1348 				error = if_delmulti(parent,
1349 				    (struct sockaddr *)&mc->mc_addr);
1350 				if (error)
1351 					if_printf(ifp,
1352 		    "Failed to delete multicast address from parent: %d\n",
1353 					    error);
1354 			}
1355 			SLIST_REMOVE_HEAD(&ifv->vlan_mc_listhead, mc_entries);
1356 			free(mc, M_VLAN);
1357 		}
1358 
1359 		vlan_setflags(ifp, 0); /* clear special flags on parent */
1360 		vlan_remhash(trunk, ifv);
1361 		ifv->ifv_trunk = NULL;
1362 
1363 		/*
1364 		 * Check if we were the last.
1365 		 */
1366 		if (trunk->refcnt == 0) {
1367 			parent->if_vlantrunk = NULL;
1368 			/*
1369 			 * XXXGL: If some ithread has already entered
1370 			 * vlan_input() and is now blocked on the trunk
1371 			 * lock, then it should preempt us right after
1372 			 * unlock and finish its work. Then we will acquire
1373 			 * lock again in trunk_destroy().
1374 			 */
1375 			TRUNK_UNLOCK(trunk);
1376 			trunk_destroy(trunk);
1377 		} else
1378 			TRUNK_UNLOCK(trunk);
1379 	}
1380 
1381 	/* Disconnect from parent. */
1382 	if (ifv->ifv_pflags)
1383 		if_printf(ifp, "%s: ifv_pflags unclean\n", __func__);
1384 	ifp->if_mtu = ETHERMTU;
1385 	ifp->if_link_state = LINK_STATE_UNKNOWN;
1386 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1387 
1388 	/*
1389 	 * Only dispatch an event if vlan was
1390 	 * attached, otherwise there is nothing
1391 	 * to cleanup anyway.
1392 	 */
1393 	if (parent != NULL)
1394 		EVENTHANDLER_INVOKE(vlan_unconfig, parent, ifv->ifv_vid);
1395 }
1396 
1397 /* Handle a reference counted flag that should be set on the parent as well */
1398 static int
1399 vlan_setflag(struct ifnet *ifp, int flag, int status,
1400 	     int (*func)(struct ifnet *, int))
1401 {
1402 	struct ifvlan *ifv;
1403 	int error;
1404 
1405 	/* XXX VLAN_LOCK_ASSERT(); */
1406 
1407 	ifv = ifp->if_softc;
1408 	status = status ? (ifp->if_flags & flag) : 0;
1409 	/* Now "status" contains the flag value or 0 */
1410 
1411 	/*
1412 	 * See if recorded parent's status is different from what
1413 	 * we want it to be.  If it is, flip it.  We record parent's
1414 	 * status in ifv_pflags so that we won't clear parent's flag
1415 	 * we haven't set.  In fact, we don't clear or set parent's
1416 	 * flags directly, but get or release references to them.
1417 	 * That's why we can be sure that recorded flags still are
1418 	 * in accord with actual parent's flags.
1419 	 */
1420 	if (status != (ifv->ifv_pflags & flag)) {
1421 		error = (*func)(PARENT(ifv), status);
1422 		if (error)
1423 			return (error);
1424 		ifv->ifv_pflags &= ~flag;
1425 		ifv->ifv_pflags |= status;
1426 	}
1427 	return (0);
1428 }
1429 
1430 /*
1431  * Handle IFF_* flags that require certain changes on the parent:
1432  * if "status" is true, update parent's flags respective to our if_flags;
1433  * if "status" is false, forcedly clear the flags set on parent.
1434  */
1435 static int
1436 vlan_setflags(struct ifnet *ifp, int status)
1437 {
1438 	int error, i;
1439 
1440 	for (i = 0; vlan_pflags[i].flag; i++) {
1441 		error = vlan_setflag(ifp, vlan_pflags[i].flag,
1442 				     status, vlan_pflags[i].func);
1443 		if (error)
1444 			return (error);
1445 	}
1446 	return (0);
1447 }
1448 
1449 /* Inform all vlans that their parent has changed link state */
1450 static void
1451 vlan_link_state(struct ifnet *ifp)
1452 {
1453 	struct ifvlantrunk *trunk = ifp->if_vlantrunk;
1454 	struct ifvlan *ifv;
1455 	int i;
1456 
1457 	TRUNK_LOCK(trunk);
1458 #ifdef VLAN_ARRAY
1459 	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
1460 		if (trunk->vlans[i] != NULL) {
1461 			ifv = trunk->vlans[i];
1462 #else
1463 	for (i = 0; i < (1 << trunk->hwidth); i++)
1464 		LIST_FOREACH(ifv, &trunk->hash[i], ifv_list) {
1465 #endif
1466 			ifv->ifv_ifp->if_baudrate = trunk->parent->if_baudrate;
1467 			if_link_state_change(ifv->ifv_ifp,
1468 			    trunk->parent->if_link_state);
1469 		}
1470 	TRUNK_UNLOCK(trunk);
1471 }
1472 
1473 static void
1474 vlan_capabilities(struct ifvlan *ifv)
1475 {
1476 	struct ifnet *p = PARENT(ifv);
1477 	struct ifnet *ifp = ifv->ifv_ifp;
1478 	struct ifnet_hw_tsomax hw_tsomax;
1479 
1480 	TRUNK_LOCK_ASSERT(TRUNK(ifv));
1481 
1482 	/*
1483 	 * If the parent interface can do checksum offloading
1484 	 * on VLANs, then propagate its hardware-assisted
1485 	 * checksumming flags. Also assert that checksum
1486 	 * offloading requires hardware VLAN tagging.
1487 	 */
1488 	if (p->if_capabilities & IFCAP_VLAN_HWCSUM)
1489 		ifp->if_capabilities = p->if_capabilities & IFCAP_HWCSUM;
1490 
1491 	if (p->if_capenable & IFCAP_VLAN_HWCSUM &&
1492 	    p->if_capenable & IFCAP_VLAN_HWTAGGING) {
1493 		ifp->if_capenable = p->if_capenable & IFCAP_HWCSUM;
1494 		ifp->if_hwassist = p->if_hwassist & (CSUM_IP | CSUM_TCP |
1495 		    CSUM_UDP | CSUM_SCTP);
1496 	} else {
1497 		ifp->if_capenable = 0;
1498 		ifp->if_hwassist = 0;
1499 	}
1500 	/*
1501 	 * If the parent interface can do TSO on VLANs then
1502 	 * propagate the hardware-assisted flag. TSO on VLANs
1503 	 * does not necessarily require hardware VLAN tagging.
1504 	 */
1505 	memset(&hw_tsomax, 0, sizeof(hw_tsomax));
1506 	if_hw_tsomax_common(p, &hw_tsomax);
1507 	if_hw_tsomax_update(ifp, &hw_tsomax);
1508 	if (p->if_capabilities & IFCAP_VLAN_HWTSO)
1509 		ifp->if_capabilities |= p->if_capabilities & IFCAP_TSO;
1510 	if (p->if_capenable & IFCAP_VLAN_HWTSO) {
1511 		ifp->if_capenable |= p->if_capenable & IFCAP_TSO;
1512 		ifp->if_hwassist |= p->if_hwassist & CSUM_TSO;
1513 	} else {
1514 		ifp->if_capenable &= ~(p->if_capenable & IFCAP_TSO);
1515 		ifp->if_hwassist &= ~(p->if_hwassist & CSUM_TSO);
1516 	}
1517 
1518 	/*
1519 	 * If the parent interface can offload TCP connections over VLANs then
1520 	 * propagate its TOE capability to the VLAN interface.
1521 	 *
1522 	 * All TOE drivers in the tree today can deal with VLANs.  If this
1523 	 * changes then IFCAP_VLAN_TOE should be promoted to a full capability
1524 	 * with its own bit.
1525 	 */
1526 #define	IFCAP_VLAN_TOE IFCAP_TOE
1527 	if (p->if_capabilities & IFCAP_VLAN_TOE)
1528 		ifp->if_capabilities |= p->if_capabilities & IFCAP_TOE;
1529 	if (p->if_capenable & IFCAP_VLAN_TOE) {
1530 		TOEDEV(ifp) = TOEDEV(p);
1531 		ifp->if_capenable |= p->if_capenable & IFCAP_TOE;
1532 	}
1533 }
1534 
1535 static void
1536 vlan_trunk_capabilities(struct ifnet *ifp)
1537 {
1538 	struct ifvlantrunk *trunk = ifp->if_vlantrunk;
1539 	struct ifvlan *ifv;
1540 	int i;
1541 
1542 	TRUNK_LOCK(trunk);
1543 #ifdef VLAN_ARRAY
1544 	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
1545 		if (trunk->vlans[i] != NULL) {
1546 			ifv = trunk->vlans[i];
1547 #else
1548 	for (i = 0; i < (1 << trunk->hwidth); i++) {
1549 		LIST_FOREACH(ifv, &trunk->hash[i], ifv_list)
1550 #endif
1551 			vlan_capabilities(ifv);
1552 	}
1553 	TRUNK_UNLOCK(trunk);
1554 }
1555 
1556 static int
1557 vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1558 {
1559 	struct ifnet *p;
1560 	struct ifreq *ifr;
1561 	struct ifaddr *ifa;
1562 	struct ifvlan *ifv;
1563 	struct ifvlantrunk *trunk;
1564 	struct vlanreq vlr;
1565 	int error = 0;
1566 
1567 	ifr = (struct ifreq *)data;
1568 	ifa = (struct ifaddr *) data;
1569 	ifv = ifp->if_softc;
1570 
1571 	switch (cmd) {
1572 	case SIOCSIFADDR:
1573 		ifp->if_flags |= IFF_UP;
1574 #ifdef INET
1575 		if (ifa->ifa_addr->sa_family == AF_INET)
1576 			arp_ifinit(ifp, ifa);
1577 #endif
1578 		break;
1579 	case SIOCGIFADDR:
1580                 {
1581 			struct sockaddr *sa;
1582 
1583 			sa = (struct sockaddr *)&ifr->ifr_data;
1584 			bcopy(IF_LLADDR(ifp), sa->sa_data, ifp->if_addrlen);
1585                 }
1586 		break;
1587 	case SIOCGIFMEDIA:
1588 		VLAN_LOCK();
1589 		if (TRUNK(ifv) != NULL) {
1590 			p = PARENT(ifv);
1591 			VLAN_UNLOCK();
1592 			error = (*p->if_ioctl)(p, SIOCGIFMEDIA, data);
1593 			/* Limit the result to the parent's current config. */
1594 			if (error == 0) {
1595 				struct ifmediareq *ifmr;
1596 
1597 				ifmr = (struct ifmediareq *)data;
1598 				if (ifmr->ifm_count >= 1 && ifmr->ifm_ulist) {
1599 					ifmr->ifm_count = 1;
1600 					error = copyout(&ifmr->ifm_current,
1601 						ifmr->ifm_ulist,
1602 						sizeof(int));
1603 				}
1604 			}
1605 		} else {
1606 			VLAN_UNLOCK();
1607 			error = EINVAL;
1608 		}
1609 		break;
1610 
1611 	case SIOCSIFMEDIA:
1612 		error = EINVAL;
1613 		break;
1614 
1615 	case SIOCSIFMTU:
1616 		/*
1617 		 * Set the interface MTU.
1618 		 */
1619 		VLAN_LOCK();
1620 		if (TRUNK(ifv) != NULL) {
1621 			if (ifr->ifr_mtu >
1622 			     (PARENT(ifv)->if_mtu - ifv->ifv_mtufudge) ||
1623 			    ifr->ifr_mtu <
1624 			     (ifv->ifv_mintu - ifv->ifv_mtufudge))
1625 				error = EINVAL;
1626 			else
1627 				ifp->if_mtu = ifr->ifr_mtu;
1628 		} else
1629 			error = EINVAL;
1630 		VLAN_UNLOCK();
1631 		break;
1632 
1633 	case SIOCSETVLAN:
1634 #ifdef VIMAGE
1635 		/*
1636 		 * XXXRW/XXXBZ: The goal in these checks is to allow a VLAN
1637 		 * interface to be delegated to a jail without allowing the
1638 		 * jail to change what underlying interface/VID it is
1639 		 * associated with.  We are not entirely convinced that this
1640 		 * is the right way to accomplish that policy goal.
1641 		 */
1642 		if (ifp->if_vnet != ifp->if_home_vnet) {
1643 			error = EPERM;
1644 			break;
1645 		}
1646 #endif
1647 		error = copyin(ifr->ifr_data, &vlr, sizeof(vlr));
1648 		if (error)
1649 			break;
1650 		if (vlr.vlr_parent[0] == '\0') {
1651 			vlan_unconfig(ifp);
1652 			break;
1653 		}
1654 		p = ifunit(vlr.vlr_parent);
1655 		if (p == NULL) {
1656 			error = ENOENT;
1657 			break;
1658 		}
1659 		error = vlan_config(ifv, p, vlr.vlr_tag);
1660 		if (error)
1661 			break;
1662 
1663 		/* Update flags on the parent, if necessary. */
1664 		vlan_setflags(ifp, 1);
1665 		break;
1666 
1667 	case SIOCGETVLAN:
1668 #ifdef VIMAGE
1669 		if (ifp->if_vnet != ifp->if_home_vnet) {
1670 			error = EPERM;
1671 			break;
1672 		}
1673 #endif
1674 		bzero(&vlr, sizeof(vlr));
1675 		VLAN_LOCK();
1676 		if (TRUNK(ifv) != NULL) {
1677 			strlcpy(vlr.vlr_parent, PARENT(ifv)->if_xname,
1678 			    sizeof(vlr.vlr_parent));
1679 			vlr.vlr_tag = ifv->ifv_vid;
1680 		}
1681 		VLAN_UNLOCK();
1682 		error = copyout(&vlr, ifr->ifr_data, sizeof(vlr));
1683 		break;
1684 
1685 	case SIOCSIFFLAGS:
1686 		/*
1687 		 * We should propagate selected flags to the parent,
1688 		 * e.g., promiscuous mode.
1689 		 */
1690 		if (TRUNK(ifv) != NULL)
1691 			error = vlan_setflags(ifp, 1);
1692 		break;
1693 
1694 	case SIOCADDMULTI:
1695 	case SIOCDELMULTI:
1696 		/*
1697 		 * If we don't have a parent, just remember the membership for
1698 		 * when we do.
1699 		 */
1700 		trunk = TRUNK(ifv);
1701 		if (trunk != NULL) {
1702 			TRUNK_LOCK(trunk);
1703 			error = vlan_setmulti(ifp);
1704 			TRUNK_UNLOCK(trunk);
1705 		}
1706 		break;
1707 
1708 	default:
1709 		error = EINVAL;
1710 		break;
1711 	}
1712 
1713 	return (error);
1714 }
1715