xref: /freebsd/sys/net/if_vlan.c (revision 84dfba8d183d31e3412639ecb4b8ad4433cf7e80)
1 /*-
2  * Copyright 1998 Massachusetts Institute of Technology
3  *
4  * Permission to use, copy, modify, and distribute this software and
5  * its documentation for any purpose and without fee is hereby
6  * granted, provided that both the above copyright notice and this
7  * permission notice appear in all copies, that both the above
8  * copyright notice and this permission notice appear in all
9  * supporting documentation, and that the name of M.I.T. not be used
10  * in advertising or publicity pertaining to distribution of the
11  * software without specific, written prior permission.  M.I.T. makes
12  * no representations about the suitability of this software for any
13  * purpose.  It is provided "as is" without express or implied
14  * warranty.
15  *
16  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
17  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
18  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
20  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
26  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 /*
31  * if_vlan.c - pseudo-device driver for IEEE 802.1Q virtual LANs.
32  * Might be extended some day to also handle IEEE 802.1p priority
33  * tagging.  This is sort of sneaky in the implementation, since
34  * we need to pretend to be enough of an Ethernet implementation
35  * to make arp work.  The way we do this is by telling everyone
36  * that we are an Ethernet, and then catch the packets that
37  * ether_output() sends to us via if_transmit(), rewrite them for
38  * use by the real outgoing interface, and ask it to send them.
39  */
40 
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
43 
44 #include "opt_inet.h"
45 #include "opt_vlan.h"
46 
47 #include <sys/param.h>
48 #include <sys/eventhandler.h>
49 #include <sys/kernel.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/rwlock.h>
55 #include <sys/queue.h>
56 #include <sys/socket.h>
57 #include <sys/sockio.h>
58 #include <sys/sysctl.h>
59 #include <sys/systm.h>
60 #include <sys/sx.h>
61 
62 #include <net/bpf.h>
63 #include <net/ethernet.h>
64 #include <net/if.h>
65 #include <net/if_var.h>
66 #include <net/if_clone.h>
67 #include <net/if_dl.h>
68 #include <net/if_types.h>
69 #include <net/if_vlan_var.h>
70 #include <net/vnet.h>
71 
72 #ifdef INET
73 #include <netinet/in.h>
74 #include <netinet/if_ether.h>
75 #endif
76 
77 #define	VLAN_DEF_HWIDTH	4
78 #define	VLAN_IFFLAGS	(IFF_BROADCAST | IFF_MULTICAST)
79 
80 #define	UP_AND_RUNNING(ifp) \
81     ((ifp)->if_flags & IFF_UP && (ifp)->if_drv_flags & IFF_DRV_RUNNING)
82 
83 LIST_HEAD(ifvlanhead, ifvlan);
84 
85 struct ifvlantrunk {
86 	struct	ifnet   *parent;	/* parent interface of this trunk */
87 	struct	rwlock	rw;
88 #ifdef VLAN_ARRAY
89 #define	VLAN_ARRAY_SIZE	(EVL_VLID_MASK + 1)
90 	struct	ifvlan	*vlans[VLAN_ARRAY_SIZE]; /* static table */
91 #else
92 	struct	ifvlanhead *hash;	/* dynamic hash-list table */
93 	uint16_t	hmask;
94 	uint16_t	hwidth;
95 #endif
96 	int		refcnt;
97 };
98 
99 struct vlan_mc_entry {
100 	struct sockaddr_dl		mc_addr;
101 	SLIST_ENTRY(vlan_mc_entry)	mc_entries;
102 };
103 
104 struct	ifvlan {
105 	struct	ifvlantrunk *ifv_trunk;
106 	struct	ifnet *ifv_ifp;
107 	void	*ifv_cookie;
108 #define	TRUNK(ifv)	((ifv)->ifv_trunk)
109 #define	PARENT(ifv)	((ifv)->ifv_trunk->parent)
110 	int	ifv_pflags;	/* special flags we have set on parent */
111 	struct	ifv_linkmib {
112 		int	ifvm_encaplen;	/* encapsulation length */
113 		int	ifvm_mtufudge;	/* MTU fudged by this much */
114 		int	ifvm_mintu;	/* min transmission unit */
115 		uint16_t ifvm_proto;	/* encapsulation ethertype */
116 		uint16_t ifvm_tag;	/* tag to apply on packets leaving if */
117 	}	ifv_mib;
118 	SLIST_HEAD(, vlan_mc_entry) vlan_mc_listhead;
119 #ifndef VLAN_ARRAY
120 	LIST_ENTRY(ifvlan) ifv_list;
121 #endif
122 };
123 #define	ifv_proto	ifv_mib.ifvm_proto
124 #define	ifv_vid		ifv_mib.ifvm_tag
125 #define	ifv_encaplen	ifv_mib.ifvm_encaplen
126 #define	ifv_mtufudge	ifv_mib.ifvm_mtufudge
127 #define	ifv_mintu	ifv_mib.ifvm_mintu
128 
129 /* Special flags we should propagate to parent. */
130 static struct {
131 	int flag;
132 	int (*func)(struct ifnet *, int);
133 } vlan_pflags[] = {
134 	{IFF_PROMISC, ifpromisc},
135 	{IFF_ALLMULTI, if_allmulti},
136 	{0, NULL}
137 };
138 
139 SYSCTL_DECL(_net_link);
140 static SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW, 0,
141     "IEEE 802.1Q VLAN");
142 static SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW, 0,
143     "for consistency");
144 
145 static int soft_pad = 0;
146 SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW, &soft_pad, 0,
147 	   "pad short frames before tagging");
148 
149 static const char vlanname[] = "vlan";
150 static MALLOC_DEFINE(M_VLAN, vlanname, "802.1Q Virtual LAN Interface");
151 
152 static eventhandler_tag ifdetach_tag;
153 static eventhandler_tag iflladdr_tag;
154 
155 /*
156  * We have a global mutex, that is used to serialize configuration
157  * changes and isn't used in normal packet delivery.
158  *
159  * We also have a per-trunk rwlock, that is locked shared on packet
160  * processing and exclusive when configuration is changed.
161  *
162  * The VLAN_ARRAY substitutes the dynamic hash with a static array
163  * with 4096 entries. In theory this can give a boost in processing,
164  * however on practice it does not. Probably this is because array
165  * is too big to fit into CPU cache.
166  */
167 static struct sx ifv_lock;
168 #define	VLAN_LOCK_INIT()	sx_init(&ifv_lock, "vlan_global")
169 #define	VLAN_LOCK_DESTROY()	sx_destroy(&ifv_lock)
170 #define	VLAN_LOCK_ASSERT()	sx_assert(&ifv_lock, SA_LOCKED)
171 #define	VLAN_LOCK()		sx_xlock(&ifv_lock)
172 #define	VLAN_UNLOCK()		sx_xunlock(&ifv_lock)
173 #define	TRUNK_LOCK_INIT(trunk)	rw_init(&(trunk)->rw, vlanname)
174 #define	TRUNK_LOCK_DESTROY(trunk) rw_destroy(&(trunk)->rw)
175 #define	TRUNK_LOCK(trunk)	rw_wlock(&(trunk)->rw)
176 #define	TRUNK_UNLOCK(trunk)	rw_wunlock(&(trunk)->rw)
177 #define	TRUNK_LOCK_ASSERT(trunk) rw_assert(&(trunk)->rw, RA_WLOCKED)
178 #define	TRUNK_RLOCK(trunk)	rw_rlock(&(trunk)->rw)
179 #define	TRUNK_RUNLOCK(trunk)	rw_runlock(&(trunk)->rw)
180 #define	TRUNK_LOCK_RASSERT(trunk) rw_assert(&(trunk)->rw, RA_RLOCKED)
181 
182 #ifndef VLAN_ARRAY
183 static	void vlan_inithash(struct ifvlantrunk *trunk);
184 static	void vlan_freehash(struct ifvlantrunk *trunk);
185 static	int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv);
186 static	int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv);
187 static	void vlan_growhash(struct ifvlantrunk *trunk, int howmuch);
188 static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk,
189 	uint16_t vid);
190 #endif
191 static	void trunk_destroy(struct ifvlantrunk *trunk);
192 
193 static	void vlan_init(void *foo);
194 static	void vlan_input(struct ifnet *ifp, struct mbuf *m);
195 static	int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr);
196 static	void vlan_qflush(struct ifnet *ifp);
197 static	int vlan_setflag(struct ifnet *ifp, int flag, int status,
198     int (*func)(struct ifnet *, int));
199 static	int vlan_setflags(struct ifnet *ifp, int status);
200 static	int vlan_setmulti(struct ifnet *ifp);
201 static	int vlan_transmit(struct ifnet *ifp, struct mbuf *m);
202 static	void vlan_unconfig(struct ifnet *ifp);
203 static	void vlan_unconfig_locked(struct ifnet *ifp, int departing);
204 static	int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag);
205 static	void vlan_link_state(struct ifnet *ifp);
206 static	void vlan_capabilities(struct ifvlan *ifv);
207 static	void vlan_trunk_capabilities(struct ifnet *ifp);
208 
209 static	struct ifnet *vlan_clone_match_ethervid(struct if_clone *,
210     const char *, int *);
211 static	int vlan_clone_match(struct if_clone *, const char *);
212 static	int vlan_clone_create(struct if_clone *, char *, size_t, caddr_t);
213 static	int vlan_clone_destroy(struct if_clone *, struct ifnet *);
214 
215 static	void vlan_ifdetach(void *arg, struct ifnet *ifp);
216 static  void vlan_iflladdr(void *arg, struct ifnet *ifp);
217 
218 static struct if_clone *vlan_cloner;
219 
220 #ifdef VIMAGE
221 static VNET_DEFINE(struct if_clone *, vlan_cloner);
222 #define	V_vlan_cloner	VNET(vlan_cloner)
223 #endif
224 
225 #ifndef VLAN_ARRAY
226 #define HASH(n, m)	((((n) >> 8) ^ ((n) >> 4) ^ (n)) & (m))
227 
228 static void
229 vlan_inithash(struct ifvlantrunk *trunk)
230 {
231 	int i, n;
232 
233 	/*
234 	 * The trunk must not be locked here since we call malloc(M_WAITOK).
235 	 * It is OK in case this function is called before the trunk struct
236 	 * gets hooked up and becomes visible from other threads.
237 	 */
238 
239 	KASSERT(trunk->hwidth == 0 && trunk->hash == NULL,
240 	    ("%s: hash already initialized", __func__));
241 
242 	trunk->hwidth = VLAN_DEF_HWIDTH;
243 	n = 1 << trunk->hwidth;
244 	trunk->hmask = n - 1;
245 	trunk->hash = malloc(sizeof(struct ifvlanhead) * n, M_VLAN, M_WAITOK);
246 	for (i = 0; i < n; i++)
247 		LIST_INIT(&trunk->hash[i]);
248 }
249 
250 static void
251 vlan_freehash(struct ifvlantrunk *trunk)
252 {
253 #ifdef INVARIANTS
254 	int i;
255 
256 	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
257 	for (i = 0; i < (1 << trunk->hwidth); i++)
258 		KASSERT(LIST_EMPTY(&trunk->hash[i]),
259 		    ("%s: hash table not empty", __func__));
260 #endif
261 	free(trunk->hash, M_VLAN);
262 	trunk->hash = NULL;
263 	trunk->hwidth = trunk->hmask = 0;
264 }
265 
266 static int
267 vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
268 {
269 	int i, b;
270 	struct ifvlan *ifv2;
271 
272 	TRUNK_LOCK_ASSERT(trunk);
273 	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
274 
275 	b = 1 << trunk->hwidth;
276 	i = HASH(ifv->ifv_vid, trunk->hmask);
277 	LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
278 		if (ifv->ifv_vid == ifv2->ifv_vid)
279 			return (EEXIST);
280 
281 	/*
282 	 * Grow the hash when the number of vlans exceeds half of the number of
283 	 * hash buckets squared. This will make the average linked-list length
284 	 * buckets/2.
285 	 */
286 	if (trunk->refcnt > (b * b) / 2) {
287 		vlan_growhash(trunk, 1);
288 		i = HASH(ifv->ifv_vid, trunk->hmask);
289 	}
290 	LIST_INSERT_HEAD(&trunk->hash[i], ifv, ifv_list);
291 	trunk->refcnt++;
292 
293 	return (0);
294 }
295 
296 static int
297 vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
298 {
299 	int i, b;
300 	struct ifvlan *ifv2;
301 
302 	TRUNK_LOCK_ASSERT(trunk);
303 	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
304 
305 	b = 1 << trunk->hwidth;
306 	i = HASH(ifv->ifv_vid, trunk->hmask);
307 	LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
308 		if (ifv2 == ifv) {
309 			trunk->refcnt--;
310 			LIST_REMOVE(ifv2, ifv_list);
311 			if (trunk->refcnt < (b * b) / 2)
312 				vlan_growhash(trunk, -1);
313 			return (0);
314 		}
315 
316 	panic("%s: vlan not found\n", __func__);
317 	return (ENOENT); /*NOTREACHED*/
318 }
319 
320 /*
321  * Grow the hash larger or smaller if memory permits.
322  */
323 static void
324 vlan_growhash(struct ifvlantrunk *trunk, int howmuch)
325 {
326 	struct ifvlan *ifv;
327 	struct ifvlanhead *hash2;
328 	int hwidth2, i, j, n, n2;
329 
330 	TRUNK_LOCK_ASSERT(trunk);
331 	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
332 
333 	if (howmuch == 0) {
334 		/* Harmless yet obvious coding error */
335 		printf("%s: howmuch is 0\n", __func__);
336 		return;
337 	}
338 
339 	hwidth2 = trunk->hwidth + howmuch;
340 	n = 1 << trunk->hwidth;
341 	n2 = 1 << hwidth2;
342 	/* Do not shrink the table below the default */
343 	if (hwidth2 < VLAN_DEF_HWIDTH)
344 		return;
345 
346 	/* M_NOWAIT because we're called with trunk mutex held */
347 	hash2 = malloc(sizeof(struct ifvlanhead) * n2, M_VLAN, M_NOWAIT);
348 	if (hash2 == NULL) {
349 		printf("%s: out of memory -- hash size not changed\n",
350 		    __func__);
351 		return;		/* We can live with the old hash table */
352 	}
353 	for (j = 0; j < n2; j++)
354 		LIST_INIT(&hash2[j]);
355 	for (i = 0; i < n; i++)
356 		while ((ifv = LIST_FIRST(&trunk->hash[i])) != NULL) {
357 			LIST_REMOVE(ifv, ifv_list);
358 			j = HASH(ifv->ifv_vid, n2 - 1);
359 			LIST_INSERT_HEAD(&hash2[j], ifv, ifv_list);
360 		}
361 	free(trunk->hash, M_VLAN);
362 	trunk->hash = hash2;
363 	trunk->hwidth = hwidth2;
364 	trunk->hmask = n2 - 1;
365 
366 	if (bootverbose)
367 		if_printf(trunk->parent,
368 		    "VLAN hash table resized from %d to %d buckets\n", n, n2);
369 }
370 
371 static __inline struct ifvlan *
372 vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid)
373 {
374 	struct ifvlan *ifv;
375 
376 	TRUNK_LOCK_RASSERT(trunk);
377 
378 	LIST_FOREACH(ifv, &trunk->hash[HASH(vid, trunk->hmask)], ifv_list)
379 		if (ifv->ifv_vid == vid)
380 			return (ifv);
381 	return (NULL);
382 }
383 
384 #if 0
385 /* Debugging code to view the hashtables. */
386 static void
387 vlan_dumphash(struct ifvlantrunk *trunk)
388 {
389 	int i;
390 	struct ifvlan *ifv;
391 
392 	for (i = 0; i < (1 << trunk->hwidth); i++) {
393 		printf("%d: ", i);
394 		LIST_FOREACH(ifv, &trunk->hash[i], ifv_list)
395 			printf("%s ", ifv->ifv_ifp->if_xname);
396 		printf("\n");
397 	}
398 }
399 #endif /* 0 */
400 #else
401 
402 static __inline struct ifvlan *
403 vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid)
404 {
405 
406 	return trunk->vlans[vid];
407 }
408 
409 static __inline int
410 vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
411 {
412 
413 	if (trunk->vlans[ifv->ifv_vid] != NULL)
414 		return EEXIST;
415 	trunk->vlans[ifv->ifv_vid] = ifv;
416 	trunk->refcnt++;
417 
418 	return (0);
419 }
420 
421 static __inline int
422 vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
423 {
424 
425 	trunk->vlans[ifv->ifv_vid] = NULL;
426 	trunk->refcnt--;
427 
428 	return (0);
429 }
430 
431 static __inline void
432 vlan_freehash(struct ifvlantrunk *trunk)
433 {
434 }
435 
436 static __inline void
437 vlan_inithash(struct ifvlantrunk *trunk)
438 {
439 }
440 
441 #endif /* !VLAN_ARRAY */
442 
443 static void
444 trunk_destroy(struct ifvlantrunk *trunk)
445 {
446 	VLAN_LOCK_ASSERT();
447 
448 	TRUNK_LOCK(trunk);
449 	vlan_freehash(trunk);
450 	trunk->parent->if_vlantrunk = NULL;
451 	TRUNK_UNLOCK(trunk);
452 	TRUNK_LOCK_DESTROY(trunk);
453 	free(trunk, M_VLAN);
454 }
455 
456 /*
457  * Program our multicast filter. What we're actually doing is
458  * programming the multicast filter of the parent. This has the
459  * side effect of causing the parent interface to receive multicast
460  * traffic that it doesn't really want, which ends up being discarded
461  * later by the upper protocol layers. Unfortunately, there's no way
462  * to avoid this: there really is only one physical interface.
463  *
464  * XXX: There is a possible race here if more than one thread is
465  *      modifying the multicast state of the vlan interface at the same time.
466  */
467 static int
468 vlan_setmulti(struct ifnet *ifp)
469 {
470 	struct ifnet		*ifp_p;
471 	struct ifmultiaddr	*ifma, *rifma = NULL;
472 	struct ifvlan		*sc;
473 	struct vlan_mc_entry	*mc;
474 	int			error;
475 
476 	/*VLAN_LOCK_ASSERT();*/
477 
478 	/* Find the parent. */
479 	sc = ifp->if_softc;
480 	ifp_p = PARENT(sc);
481 
482 	CURVNET_SET_QUIET(ifp_p->if_vnet);
483 
484 	/* First, remove any existing filter entries. */
485 	while ((mc = SLIST_FIRST(&sc->vlan_mc_listhead)) != NULL) {
486 		error = if_delmulti(ifp_p, (struct sockaddr *)&mc->mc_addr);
487 		if (error)
488 			return (error);
489 		SLIST_REMOVE_HEAD(&sc->vlan_mc_listhead, mc_entries);
490 		free(mc, M_VLAN);
491 	}
492 
493 	/* Now program new ones. */
494 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
495 		if (ifma->ifma_addr->sa_family != AF_LINK)
496 			continue;
497 		mc = malloc(sizeof(struct vlan_mc_entry), M_VLAN, M_NOWAIT);
498 		if (mc == NULL)
499 			return (ENOMEM);
500 		bcopy(ifma->ifma_addr, &mc->mc_addr, ifma->ifma_addr->sa_len);
501 		mc->mc_addr.sdl_index = ifp_p->if_index;
502 		SLIST_INSERT_HEAD(&sc->vlan_mc_listhead, mc, mc_entries);
503 		error = if_addmulti(ifp_p, (struct sockaddr *)&mc->mc_addr,
504 		    &rifma);
505 		if (error)
506 			return (error);
507 	}
508 
509 	CURVNET_RESTORE();
510 	return (0);
511 }
512 
513 /*
514  * A handler for parent interface link layer address changes.
515  * If the parent interface link layer address is changed we
516  * should also change it on all children vlans.
517  */
518 static void
519 vlan_iflladdr(void *arg __unused, struct ifnet *ifp)
520 {
521 	struct ifvlan *ifv;
522 #ifndef VLAN_ARRAY
523 	struct ifvlan *next;
524 #endif
525 	int i;
526 
527 	/*
528 	 * Check if it's a trunk interface first of all
529 	 * to avoid needless locking.
530 	 */
531 	if (ifp->if_vlantrunk == NULL)
532 		return;
533 
534 	VLAN_LOCK();
535 	/*
536 	 * OK, it's a trunk.  Loop over and change all vlan's lladdrs on it.
537 	 */
538 #ifdef VLAN_ARRAY
539 	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
540 		if ((ifv = ifp->if_vlantrunk->vlans[i])) {
541 #else /* VLAN_ARRAY */
542 	for (i = 0; i < (1 << ifp->if_vlantrunk->hwidth); i++)
543 		LIST_FOREACH_SAFE(ifv, &ifp->if_vlantrunk->hash[i], ifv_list, next) {
544 #endif /* VLAN_ARRAY */
545 			VLAN_UNLOCK();
546 			if_setlladdr(ifv->ifv_ifp, IF_LLADDR(ifp),
547 			    ifp->if_addrlen);
548 			VLAN_LOCK();
549 		}
550 	VLAN_UNLOCK();
551 
552 }
553 
554 /*
555  * A handler for network interface departure events.
556  * Track departure of trunks here so that we don't access invalid
557  * pointers or whatever if a trunk is ripped from under us, e.g.,
558  * by ejecting its hot-plug card.  However, if an ifnet is simply
559  * being renamed, then there's no need to tear down the state.
560  */
561 static void
562 vlan_ifdetach(void *arg __unused, struct ifnet *ifp)
563 {
564 	struct ifvlan *ifv;
565 	int i;
566 
567 	/*
568 	 * Check if it's a trunk interface first of all
569 	 * to avoid needless locking.
570 	 */
571 	if (ifp->if_vlantrunk == NULL)
572 		return;
573 
574 	/* If the ifnet is just being renamed, don't do anything. */
575 	if (ifp->if_flags & IFF_RENAMING)
576 		return;
577 
578 	VLAN_LOCK();
579 	/*
580 	 * OK, it's a trunk.  Loop over and detach all vlan's on it.
581 	 * Check trunk pointer after each vlan_unconfig() as it will
582 	 * free it and set to NULL after the last vlan was detached.
583 	 */
584 #ifdef VLAN_ARRAY
585 	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
586 		if ((ifv = ifp->if_vlantrunk->vlans[i])) {
587 			vlan_unconfig_locked(ifv->ifv_ifp, 1);
588 			if (ifp->if_vlantrunk == NULL)
589 				break;
590 		}
591 #else /* VLAN_ARRAY */
592 restart:
593 	for (i = 0; i < (1 << ifp->if_vlantrunk->hwidth); i++)
594 		if ((ifv = LIST_FIRST(&ifp->if_vlantrunk->hash[i]))) {
595 			vlan_unconfig_locked(ifv->ifv_ifp, 1);
596 			if (ifp->if_vlantrunk)
597 				goto restart;	/* trunk->hwidth can change */
598 			else
599 				break;
600 		}
601 #endif /* VLAN_ARRAY */
602 	/* Trunk should have been destroyed in vlan_unconfig(). */
603 	KASSERT(ifp->if_vlantrunk == NULL, ("%s: purge failed", __func__));
604 	VLAN_UNLOCK();
605 }
606 
607 /*
608  * Return the trunk device for a virtual interface.
609  */
610 static struct ifnet  *
611 vlan_trunkdev(struct ifnet *ifp)
612 {
613 	struct ifvlan *ifv;
614 
615 	if (ifp->if_type != IFT_L2VLAN)
616 		return (NULL);
617 	ifv = ifp->if_softc;
618 	ifp = NULL;
619 	VLAN_LOCK();
620 	if (ifv->ifv_trunk)
621 		ifp = PARENT(ifv);
622 	VLAN_UNLOCK();
623 	return (ifp);
624 }
625 
626 /*
627  * Return the 12-bit VLAN VID for this interface, for use by external
628  * components such as Infiniband.
629  *
630  * XXXRW: Note that the function name here is historical; it should be named
631  * vlan_vid().
632  */
633 static int
634 vlan_tag(struct ifnet *ifp, uint16_t *vidp)
635 {
636 	struct ifvlan *ifv;
637 
638 	if (ifp->if_type != IFT_L2VLAN)
639 		return (EINVAL);
640 	ifv = ifp->if_softc;
641 	*vidp = ifv->ifv_vid;
642 	return (0);
643 }
644 
645 /*
646  * Return a driver specific cookie for this interface.  Synchronization
647  * with setcookie must be provided by the driver.
648  */
649 static void *
650 vlan_cookie(struct ifnet *ifp)
651 {
652 	struct ifvlan *ifv;
653 
654 	if (ifp->if_type != IFT_L2VLAN)
655 		return (NULL);
656 	ifv = ifp->if_softc;
657 	return (ifv->ifv_cookie);
658 }
659 
660 /*
661  * Store a cookie in our softc that drivers can use to store driver
662  * private per-instance data in.
663  */
664 static int
665 vlan_setcookie(struct ifnet *ifp, void *cookie)
666 {
667 	struct ifvlan *ifv;
668 
669 	if (ifp->if_type != IFT_L2VLAN)
670 		return (EINVAL);
671 	ifv = ifp->if_softc;
672 	ifv->ifv_cookie = cookie;
673 	return (0);
674 }
675 
676 /*
677  * Return the vlan device present at the specific VID.
678  */
679 static struct ifnet *
680 vlan_devat(struct ifnet *ifp, uint16_t vid)
681 {
682 	struct ifvlantrunk *trunk;
683 	struct ifvlan *ifv;
684 
685 	trunk = ifp->if_vlantrunk;
686 	if (trunk == NULL)
687 		return (NULL);
688 	ifp = NULL;
689 	TRUNK_RLOCK(trunk);
690 	ifv = vlan_gethash(trunk, vid);
691 	if (ifv)
692 		ifp = ifv->ifv_ifp;
693 	TRUNK_RUNLOCK(trunk);
694 	return (ifp);
695 }
696 
697 /*
698  * VLAN support can be loaded as a module.  The only place in the
699  * system that's intimately aware of this is ether_input.  We hook
700  * into this code through vlan_input_p which is defined there and
701  * set here.  Noone else in the system should be aware of this so
702  * we use an explicit reference here.
703  */
704 extern	void (*vlan_input_p)(struct ifnet *, struct mbuf *);
705 
706 /* For if_link_state_change() eyes only... */
707 extern	void (*vlan_link_state_p)(struct ifnet *);
708 
709 static int
710 vlan_modevent(module_t mod, int type, void *data)
711 {
712 
713 	switch (type) {
714 	case MOD_LOAD:
715 		ifdetach_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
716 		    vlan_ifdetach, NULL, EVENTHANDLER_PRI_ANY);
717 		if (ifdetach_tag == NULL)
718 			return (ENOMEM);
719 		iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event,
720 		    vlan_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
721 		if (iflladdr_tag == NULL)
722 			return (ENOMEM);
723 		VLAN_LOCK_INIT();
724 		vlan_input_p = vlan_input;
725 		vlan_link_state_p = vlan_link_state;
726 		vlan_trunk_cap_p = vlan_trunk_capabilities;
727 		vlan_trunkdev_p = vlan_trunkdev;
728 		vlan_cookie_p = vlan_cookie;
729 		vlan_setcookie_p = vlan_setcookie;
730 		vlan_tag_p = vlan_tag;
731 		vlan_devat_p = vlan_devat;
732 #ifndef VIMAGE
733 		vlan_cloner = if_clone_advanced(vlanname, 0, vlan_clone_match,
734 		    vlan_clone_create, vlan_clone_destroy);
735 #endif
736 		if (bootverbose)
737 			printf("vlan: initialized, using "
738 #ifdef VLAN_ARRAY
739 			       "full-size arrays"
740 #else
741 			       "hash tables with chaining"
742 #endif
743 
744 			       "\n");
745 		break;
746 	case MOD_UNLOAD:
747 #ifndef VIMAGE
748 		if_clone_detach(vlan_cloner);
749 #endif
750 		EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_tag);
751 		EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_tag);
752 		vlan_input_p = NULL;
753 		vlan_link_state_p = NULL;
754 		vlan_trunk_cap_p = NULL;
755 		vlan_trunkdev_p = NULL;
756 		vlan_tag_p = NULL;
757 		vlan_cookie_p = NULL;
758 		vlan_setcookie_p = NULL;
759 		vlan_devat_p = NULL;
760 		VLAN_LOCK_DESTROY();
761 		if (bootverbose)
762 			printf("vlan: unloaded\n");
763 		break;
764 	default:
765 		return (EOPNOTSUPP);
766 	}
767 	return (0);
768 }
769 
770 static moduledata_t vlan_mod = {
771 	"if_vlan",
772 	vlan_modevent,
773 	0
774 };
775 
776 DECLARE_MODULE(if_vlan, vlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
777 MODULE_VERSION(if_vlan, 3);
778 
779 #ifdef VIMAGE
780 static void
781 vnet_vlan_init(const void *unused __unused)
782 {
783 
784 	vlan_cloner = if_clone_advanced(vlanname, 0, vlan_clone_match,
785 		    vlan_clone_create, vlan_clone_destroy);
786 	V_vlan_cloner = vlan_cloner;
787 }
788 VNET_SYSINIT(vnet_vlan_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
789     vnet_vlan_init, NULL);
790 
791 static void
792 vnet_vlan_uninit(const void *unused __unused)
793 {
794 
795 	if_clone_detach(V_vlan_cloner);
796 }
797 VNET_SYSUNINIT(vnet_vlan_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
798     vnet_vlan_uninit, NULL);
799 #endif
800 
801 static struct ifnet *
802 vlan_clone_match_ethervid(struct if_clone *ifc, const char *name, int *vidp)
803 {
804 	const char *cp;
805 	struct ifnet *ifp;
806 	int vid;
807 
808 	/* Check for <etherif>.<vlan> style interface names. */
809 	IFNET_RLOCK_NOSLEEP();
810 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
811 		/*
812 		 * We can handle non-ethernet hardware types as long as
813 		 * they handle the tagging and headers themselves.
814 		 */
815 		if (ifp->if_type != IFT_ETHER &&
816 		    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0)
817 			continue;
818 		if (strncmp(ifp->if_xname, name, strlen(ifp->if_xname)) != 0)
819 			continue;
820 		cp = name + strlen(ifp->if_xname);
821 		if (*cp++ != '.')
822 			continue;
823 		if (*cp == '\0')
824 			continue;
825 		vid = 0;
826 		for(; *cp >= '0' && *cp <= '9'; cp++)
827 			vid = (vid * 10) + (*cp - '0');
828 		if (*cp != '\0')
829 			continue;
830 		if (vidp != NULL)
831 			*vidp = vid;
832 		break;
833 	}
834 	IFNET_RUNLOCK_NOSLEEP();
835 
836 	return (ifp);
837 }
838 
839 static int
840 vlan_clone_match(struct if_clone *ifc, const char *name)
841 {
842 	const char *cp;
843 
844 	if (vlan_clone_match_ethervid(ifc, name, NULL) != NULL)
845 		return (1);
846 
847 	if (strncmp(vlanname, name, strlen(vlanname)) != 0)
848 		return (0);
849 	for (cp = name + 4; *cp != '\0'; cp++) {
850 		if (*cp < '0' || *cp > '9')
851 			return (0);
852 	}
853 
854 	return (1);
855 }
856 
857 static int
858 vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
859 {
860 	char *dp;
861 	int wildcard;
862 	int unit;
863 	int error;
864 	int vid;
865 	int ethertag;
866 	struct ifvlan *ifv;
867 	struct ifnet *ifp;
868 	struct ifnet *p;
869 	struct ifaddr *ifa;
870 	struct sockaddr_dl *sdl;
871 	struct vlanreq vlr;
872 	static const u_char eaddr[ETHER_ADDR_LEN];	/* 00:00:00:00:00:00 */
873 
874 	/*
875 	 * There are 3 (ugh) ways to specify the cloned device:
876 	 * o pass a parameter block with the clone request.
877 	 * o specify parameters in the text of the clone device name
878 	 * o specify no parameters and get an unattached device that
879 	 *   must be configured separately.
880 	 * The first technique is preferred; the latter two are
881 	 * supported for backwards compatibilty.
882 	 *
883 	 * XXXRW: Note historic use of the word "tag" here.  New ioctls may be
884 	 * called for.
885 	 */
886 	if (params) {
887 		error = copyin(params, &vlr, sizeof(vlr));
888 		if (error)
889 			return error;
890 		p = ifunit(vlr.vlr_parent);
891 		if (p == NULL)
892 			return ENXIO;
893 		/*
894 		 * Don't let the caller set up a VLAN VID with
895 		 * anything except VLID bits.
896 		 */
897 		if (vlr.vlr_tag & ~EVL_VLID_MASK)
898 			return (EINVAL);
899 		error = ifc_name2unit(name, &unit);
900 		if (error != 0)
901 			return (error);
902 
903 		ethertag = 1;
904 		vid = vlr.vlr_tag;
905 		wildcard = (unit < 0);
906 	} else if ((p = vlan_clone_match_ethervid(ifc, name, &vid)) != NULL) {
907 		ethertag = 1;
908 		unit = -1;
909 		wildcard = 0;
910 
911 		/*
912 		 * Don't let the caller set up a VLAN VID with
913 		 * anything except VLID bits.
914 		 */
915 		if (vid & ~EVL_VLID_MASK)
916 			return (EINVAL);
917 	} else {
918 		ethertag = 0;
919 
920 		error = ifc_name2unit(name, &unit);
921 		if (error != 0)
922 			return (error);
923 
924 		wildcard = (unit < 0);
925 	}
926 
927 	error = ifc_alloc_unit(ifc, &unit);
928 	if (error != 0)
929 		return (error);
930 
931 	/* In the wildcard case, we need to update the name. */
932 	if (wildcard) {
933 		for (dp = name; *dp != '\0'; dp++);
934 		if (snprintf(dp, len - (dp-name), "%d", unit) >
935 		    len - (dp-name) - 1) {
936 			panic("%s: interface name too long", __func__);
937 		}
938 	}
939 
940 	ifv = malloc(sizeof(struct ifvlan), M_VLAN, M_WAITOK | M_ZERO);
941 	ifp = ifv->ifv_ifp = if_alloc(IFT_ETHER);
942 	if (ifp == NULL) {
943 		ifc_free_unit(ifc, unit);
944 		free(ifv, M_VLAN);
945 		return (ENOSPC);
946 	}
947 	SLIST_INIT(&ifv->vlan_mc_listhead);
948 
949 	ifp->if_softc = ifv;
950 	/*
951 	 * Set the name manually rather than using if_initname because
952 	 * we don't conform to the default naming convention for interfaces.
953 	 */
954 	strlcpy(ifp->if_xname, name, IFNAMSIZ);
955 	ifp->if_dname = vlanname;
956 	ifp->if_dunit = unit;
957 	/* NB: flags are not set here */
958 	ifp->if_linkmib = &ifv->ifv_mib;
959 	ifp->if_linkmiblen = sizeof(ifv->ifv_mib);
960 	/* NB: mtu is not set here */
961 
962 	ifp->if_init = vlan_init;
963 	ifp->if_transmit = vlan_transmit;
964 	ifp->if_qflush = vlan_qflush;
965 	ifp->if_ioctl = vlan_ioctl;
966 	ifp->if_flags = VLAN_IFFLAGS;
967 	ether_ifattach(ifp, eaddr);
968 	/* Now undo some of the damage... */
969 	ifp->if_baudrate = 0;
970 	ifp->if_type = IFT_L2VLAN;
971 	ifp->if_hdrlen = ETHER_VLAN_ENCAP_LEN;
972 	ifa = ifp->if_addr;
973 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
974 	sdl->sdl_type = IFT_L2VLAN;
975 
976 	if (ethertag) {
977 		error = vlan_config(ifv, p, vid);
978 		if (error != 0) {
979 			/*
980 			 * Since we've partially failed, we need to back
981 			 * out all the way, otherwise userland could get
982 			 * confused.  Thus, we destroy the interface.
983 			 */
984 			ether_ifdetach(ifp);
985 			vlan_unconfig(ifp);
986 			if_free(ifp);
987 			ifc_free_unit(ifc, unit);
988 			free(ifv, M_VLAN);
989 
990 			return (error);
991 		}
992 
993 		/* Update flags on the parent, if necessary. */
994 		vlan_setflags(ifp, 1);
995 	}
996 
997 	return (0);
998 }
999 
1000 static int
1001 vlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
1002 {
1003 	struct ifvlan *ifv = ifp->if_softc;
1004 	int unit = ifp->if_dunit;
1005 
1006 	ether_ifdetach(ifp);	/* first, remove it from system-wide lists */
1007 	vlan_unconfig(ifp);	/* now it can be unconfigured and freed */
1008 	if_free(ifp);
1009 	free(ifv, M_VLAN);
1010 	ifc_free_unit(ifc, unit);
1011 
1012 	return (0);
1013 }
1014 
1015 /*
1016  * The ifp->if_init entry point for vlan(4) is a no-op.
1017  */
1018 static void
1019 vlan_init(void *foo __unused)
1020 {
1021 }
1022 
1023 /*
1024  * The if_transmit method for vlan(4) interface.
1025  */
1026 static int
1027 vlan_transmit(struct ifnet *ifp, struct mbuf *m)
1028 {
1029 	struct ifvlan *ifv;
1030 	struct ifnet *p;
1031 	int error, len, mcast;
1032 
1033 	ifv = ifp->if_softc;
1034 	p = PARENT(ifv);
1035 	len = m->m_pkthdr.len;
1036 	mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
1037 
1038 	BPF_MTAP(ifp, m);
1039 
1040 	/*
1041 	 * Do not run parent's if_transmit() if the parent is not up,
1042 	 * or parent's driver will cause a system crash.
1043 	 */
1044 	if (!UP_AND_RUNNING(p)) {
1045 		m_freem(m);
1046 		ifp->if_oerrors++;
1047 		return (ENETDOWN);
1048 	}
1049 
1050 	/*
1051 	 * Pad the frame to the minimum size allowed if told to.
1052 	 * This option is in accord with IEEE Std 802.1Q, 2003 Ed.,
1053 	 * paragraph C.4.4.3.b.  It can help to work around buggy
1054 	 * bridges that violate paragraph C.4.4.3.a from the same
1055 	 * document, i.e., fail to pad short frames after untagging.
1056 	 * E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but
1057 	 * untagging it will produce a 62-byte frame, which is a runt
1058 	 * and requires padding.  There are VLAN-enabled network
1059 	 * devices that just discard such runts instead or mishandle
1060 	 * them somehow.
1061 	 */
1062 	if (soft_pad && p->if_type == IFT_ETHER) {
1063 		static char pad[8];	/* just zeros */
1064 		int n;
1065 
1066 		for (n = ETHERMIN + ETHER_HDR_LEN - m->m_pkthdr.len;
1067 		     n > 0; n -= sizeof(pad))
1068 			if (!m_append(m, min(n, sizeof(pad)), pad))
1069 				break;
1070 
1071 		if (n > 0) {
1072 			if_printf(ifp, "cannot pad short frame\n");
1073 			ifp->if_oerrors++;
1074 			m_freem(m);
1075 			return (0);
1076 		}
1077 	}
1078 
1079 	/*
1080 	 * If underlying interface can do VLAN tag insertion itself,
1081 	 * just pass the packet along. However, we need some way to
1082 	 * tell the interface where the packet came from so that it
1083 	 * knows how to find the VLAN tag to use, so we attach a
1084 	 * packet tag that holds it.
1085 	 */
1086 	if (p->if_capenable & IFCAP_VLAN_HWTAGGING) {
1087 		m->m_pkthdr.ether_vtag = ifv->ifv_vid;
1088 		m->m_flags |= M_VLANTAG;
1089 	} else {
1090 		m = ether_vlanencap(m, ifv->ifv_vid);
1091 		if (m == NULL) {
1092 			if_printf(ifp, "unable to prepend VLAN header\n");
1093 			ifp->if_oerrors++;
1094 			return (0);
1095 		}
1096 	}
1097 
1098 	/*
1099 	 * Send it, precisely as ether_output() would have.
1100 	 */
1101 	error = (p->if_transmit)(p, m);
1102 	if (!error) {
1103 		ifp->if_opackets++;
1104 		ifp->if_omcasts += mcast;
1105 		ifp->if_obytes += len;
1106 	} else
1107 		ifp->if_oerrors++;
1108 	return (error);
1109 }
1110 
1111 /*
1112  * The ifp->if_qflush entry point for vlan(4) is a no-op.
1113  */
1114 static void
1115 vlan_qflush(struct ifnet *ifp __unused)
1116 {
1117 }
1118 
1119 static void
1120 vlan_input(struct ifnet *ifp, struct mbuf *m)
1121 {
1122 	struct ifvlantrunk *trunk = ifp->if_vlantrunk;
1123 	struct ifvlan *ifv;
1124 	uint16_t vid;
1125 
1126 	KASSERT(trunk != NULL, ("%s: no trunk", __func__));
1127 
1128 	if (m->m_flags & M_VLANTAG) {
1129 		/*
1130 		 * Packet is tagged, but m contains a normal
1131 		 * Ethernet frame; the tag is stored out-of-band.
1132 		 */
1133 		vid = EVL_VLANOFTAG(m->m_pkthdr.ether_vtag);
1134 		m->m_flags &= ~M_VLANTAG;
1135 	} else {
1136 		struct ether_vlan_header *evl;
1137 
1138 		/*
1139 		 * Packet is tagged in-band as specified by 802.1q.
1140 		 */
1141 		switch (ifp->if_type) {
1142 		case IFT_ETHER:
1143 			if (m->m_len < sizeof(*evl) &&
1144 			    (m = m_pullup(m, sizeof(*evl))) == NULL) {
1145 				if_printf(ifp, "cannot pullup VLAN header\n");
1146 				return;
1147 			}
1148 			evl = mtod(m, struct ether_vlan_header *);
1149 			vid = EVL_VLANOFTAG(ntohs(evl->evl_tag));
1150 
1151 			/*
1152 			 * Remove the 802.1q header by copying the Ethernet
1153 			 * addresses over it and adjusting the beginning of
1154 			 * the data in the mbuf.  The encapsulated Ethernet
1155 			 * type field is already in place.
1156 			 */
1157 			bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
1158 			      ETHER_HDR_LEN - ETHER_TYPE_LEN);
1159 			m_adj(m, ETHER_VLAN_ENCAP_LEN);
1160 			break;
1161 
1162 		default:
1163 #ifdef INVARIANTS
1164 			panic("%s: %s has unsupported if_type %u",
1165 			      __func__, ifp->if_xname, ifp->if_type);
1166 #endif
1167 			m_freem(m);
1168 			ifp->if_noproto++;
1169 			return;
1170 		}
1171 	}
1172 
1173 	TRUNK_RLOCK(trunk);
1174 	ifv = vlan_gethash(trunk, vid);
1175 	if (ifv == NULL || !UP_AND_RUNNING(ifv->ifv_ifp)) {
1176 		TRUNK_RUNLOCK(trunk);
1177 		m_freem(m);
1178 		ifp->if_noproto++;
1179 		return;
1180 	}
1181 	TRUNK_RUNLOCK(trunk);
1182 
1183 	m->m_pkthdr.rcvif = ifv->ifv_ifp;
1184 	ifv->ifv_ifp->if_ipackets++;
1185 
1186 	/* Pass it back through the parent's input routine. */
1187 	(*ifp->if_input)(ifv->ifv_ifp, m);
1188 }
1189 
1190 static int
1191 vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid)
1192 {
1193 	struct ifvlantrunk *trunk;
1194 	struct ifnet *ifp;
1195 	int error = 0;
1196 
1197 	/* VID numbers 0x0 and 0xFFF are reserved */
1198 	if (vid == 0 || vid == 0xFFF)
1199 		return (EINVAL);
1200 	if (p->if_type != IFT_ETHER &&
1201 	    (p->if_capenable & IFCAP_VLAN_HWTAGGING) == 0)
1202 		return (EPROTONOSUPPORT);
1203 	if ((p->if_flags & VLAN_IFFLAGS) != VLAN_IFFLAGS)
1204 		return (EPROTONOSUPPORT);
1205 	if (ifv->ifv_trunk)
1206 		return (EBUSY);
1207 
1208 	if (p->if_vlantrunk == NULL) {
1209 		trunk = malloc(sizeof(struct ifvlantrunk),
1210 		    M_VLAN, M_WAITOK | M_ZERO);
1211 		vlan_inithash(trunk);
1212 		VLAN_LOCK();
1213 		if (p->if_vlantrunk != NULL) {
1214 			/* A race that that is very unlikely to be hit. */
1215 			vlan_freehash(trunk);
1216 			free(trunk, M_VLAN);
1217 			goto exists;
1218 		}
1219 		TRUNK_LOCK_INIT(trunk);
1220 		TRUNK_LOCK(trunk);
1221 		p->if_vlantrunk = trunk;
1222 		trunk->parent = p;
1223 	} else {
1224 		VLAN_LOCK();
1225 exists:
1226 		trunk = p->if_vlantrunk;
1227 		TRUNK_LOCK(trunk);
1228 	}
1229 
1230 	ifv->ifv_vid = vid;	/* must set this before vlan_inshash() */
1231 	error = vlan_inshash(trunk, ifv);
1232 	if (error)
1233 		goto done;
1234 	ifv->ifv_proto = ETHERTYPE_VLAN;
1235 	ifv->ifv_encaplen = ETHER_VLAN_ENCAP_LEN;
1236 	ifv->ifv_mintu = ETHERMIN;
1237 	ifv->ifv_pflags = 0;
1238 
1239 	/*
1240 	 * If the parent supports the VLAN_MTU capability,
1241 	 * i.e. can Tx/Rx larger than ETHER_MAX_LEN frames,
1242 	 * use it.
1243 	 */
1244 	if (p->if_capenable & IFCAP_VLAN_MTU) {
1245 		/*
1246 		 * No need to fudge the MTU since the parent can
1247 		 * handle extended frames.
1248 		 */
1249 		ifv->ifv_mtufudge = 0;
1250 	} else {
1251 		/*
1252 		 * Fudge the MTU by the encapsulation size.  This
1253 		 * makes us incompatible with strictly compliant
1254 		 * 802.1Q implementations, but allows us to use
1255 		 * the feature with other NetBSD implementations,
1256 		 * which might still be useful.
1257 		 */
1258 		ifv->ifv_mtufudge = ifv->ifv_encaplen;
1259 	}
1260 
1261 	ifv->ifv_trunk = trunk;
1262 	ifp = ifv->ifv_ifp;
1263 	/*
1264 	 * Initialize fields from our parent.  This duplicates some
1265 	 * work with ether_ifattach() but allows for non-ethernet
1266 	 * interfaces to also work.
1267 	 */
1268 	ifp->if_mtu = p->if_mtu - ifv->ifv_mtufudge;
1269 	ifp->if_baudrate = p->if_baudrate;
1270 	ifp->if_output = p->if_output;
1271 	ifp->if_input = p->if_input;
1272 	ifp->if_resolvemulti = p->if_resolvemulti;
1273 	ifp->if_addrlen = p->if_addrlen;
1274 	ifp->if_broadcastaddr = p->if_broadcastaddr;
1275 
1276 	/*
1277 	 * Copy only a selected subset of flags from the parent.
1278 	 * Other flags are none of our business.
1279 	 */
1280 #define VLAN_COPY_FLAGS (IFF_SIMPLEX)
1281 	ifp->if_flags &= ~VLAN_COPY_FLAGS;
1282 	ifp->if_flags |= p->if_flags & VLAN_COPY_FLAGS;
1283 #undef VLAN_COPY_FLAGS
1284 
1285 	ifp->if_link_state = p->if_link_state;
1286 
1287 	vlan_capabilities(ifv);
1288 
1289 	/*
1290 	 * Set up our interface address to reflect the underlying
1291 	 * physical interface's.
1292 	 */
1293 	bcopy(IF_LLADDR(p), IF_LLADDR(ifp), p->if_addrlen);
1294 	((struct sockaddr_dl *)ifp->if_addr->ifa_addr)->sdl_alen =
1295 	    p->if_addrlen;
1296 
1297 	/*
1298 	 * Configure multicast addresses that may already be
1299 	 * joined on the vlan device.
1300 	 */
1301 	(void)vlan_setmulti(ifp); /* XXX: VLAN lock held */
1302 
1303 	/* We are ready for operation now. */
1304 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1305 done:
1306 	TRUNK_UNLOCK(trunk);
1307 	if (error == 0)
1308 		EVENTHANDLER_INVOKE(vlan_config, p, ifv->ifv_vid);
1309 	VLAN_UNLOCK();
1310 
1311 	return (error);
1312 }
1313 
1314 static void
1315 vlan_unconfig(struct ifnet *ifp)
1316 {
1317 
1318 	VLAN_LOCK();
1319 	vlan_unconfig_locked(ifp, 0);
1320 	VLAN_UNLOCK();
1321 }
1322 
1323 static void
1324 vlan_unconfig_locked(struct ifnet *ifp, int departing)
1325 {
1326 	struct ifvlantrunk *trunk;
1327 	struct vlan_mc_entry *mc;
1328 	struct ifvlan *ifv;
1329 	struct ifnet  *parent;
1330 	int error;
1331 
1332 	VLAN_LOCK_ASSERT();
1333 
1334 	ifv = ifp->if_softc;
1335 	trunk = ifv->ifv_trunk;
1336 	parent = NULL;
1337 
1338 	if (trunk != NULL) {
1339 
1340 		TRUNK_LOCK(trunk);
1341 		parent = trunk->parent;
1342 
1343 		/*
1344 		 * Since the interface is being unconfigured, we need to
1345 		 * empty the list of multicast groups that we may have joined
1346 		 * while we were alive from the parent's list.
1347 		 */
1348 		while ((mc = SLIST_FIRST(&ifv->vlan_mc_listhead)) != NULL) {
1349 			/*
1350 			 * If the parent interface is being detached,
1351 			 * all its multicast addresses have already
1352 			 * been removed.  Warn about errors if
1353 			 * if_delmulti() does fail, but don't abort as
1354 			 * all callers expect vlan destruction to
1355 			 * succeed.
1356 			 */
1357 			if (!departing) {
1358 				error = if_delmulti(parent,
1359 				    (struct sockaddr *)&mc->mc_addr);
1360 				if (error)
1361 					if_printf(ifp,
1362 		    "Failed to delete multicast address from parent: %d\n",
1363 					    error);
1364 			}
1365 			SLIST_REMOVE_HEAD(&ifv->vlan_mc_listhead, mc_entries);
1366 			free(mc, M_VLAN);
1367 		}
1368 
1369 		vlan_setflags(ifp, 0); /* clear special flags on parent */
1370 		vlan_remhash(trunk, ifv);
1371 		ifv->ifv_trunk = NULL;
1372 
1373 		/*
1374 		 * Check if we were the last.
1375 		 */
1376 		if (trunk->refcnt == 0) {
1377 			trunk->parent->if_vlantrunk = NULL;
1378 			/*
1379 			 * XXXGL: If some ithread has already entered
1380 			 * vlan_input() and is now blocked on the trunk
1381 			 * lock, then it should preempt us right after
1382 			 * unlock and finish its work. Then we will acquire
1383 			 * lock again in trunk_destroy().
1384 			 */
1385 			TRUNK_UNLOCK(trunk);
1386 			trunk_destroy(trunk);
1387 		} else
1388 			TRUNK_UNLOCK(trunk);
1389 	}
1390 
1391 	/* Disconnect from parent. */
1392 	if (ifv->ifv_pflags)
1393 		if_printf(ifp, "%s: ifv_pflags unclean\n", __func__);
1394 	ifp->if_mtu = ETHERMTU;
1395 	ifp->if_link_state = LINK_STATE_UNKNOWN;
1396 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1397 
1398 	/*
1399 	 * Only dispatch an event if vlan was
1400 	 * attached, otherwise there is nothing
1401 	 * to cleanup anyway.
1402 	 */
1403 	if (parent != NULL)
1404 		EVENTHANDLER_INVOKE(vlan_unconfig, parent, ifv->ifv_vid);
1405 }
1406 
1407 /* Handle a reference counted flag that should be set on the parent as well */
1408 static int
1409 vlan_setflag(struct ifnet *ifp, int flag, int status,
1410 	     int (*func)(struct ifnet *, int))
1411 {
1412 	struct ifvlan *ifv;
1413 	int error;
1414 
1415 	/* XXX VLAN_LOCK_ASSERT(); */
1416 
1417 	ifv = ifp->if_softc;
1418 	status = status ? (ifp->if_flags & flag) : 0;
1419 	/* Now "status" contains the flag value or 0 */
1420 
1421 	/*
1422 	 * See if recorded parent's status is different from what
1423 	 * we want it to be.  If it is, flip it.  We record parent's
1424 	 * status in ifv_pflags so that we won't clear parent's flag
1425 	 * we haven't set.  In fact, we don't clear or set parent's
1426 	 * flags directly, but get or release references to them.
1427 	 * That's why we can be sure that recorded flags still are
1428 	 * in accord with actual parent's flags.
1429 	 */
1430 	if (status != (ifv->ifv_pflags & flag)) {
1431 		error = (*func)(PARENT(ifv), status);
1432 		if (error)
1433 			return (error);
1434 		ifv->ifv_pflags &= ~flag;
1435 		ifv->ifv_pflags |= status;
1436 	}
1437 	return (0);
1438 }
1439 
1440 /*
1441  * Handle IFF_* flags that require certain changes on the parent:
1442  * if "status" is true, update parent's flags respective to our if_flags;
1443  * if "status" is false, forcedly clear the flags set on parent.
1444  */
1445 static int
1446 vlan_setflags(struct ifnet *ifp, int status)
1447 {
1448 	int error, i;
1449 
1450 	for (i = 0; vlan_pflags[i].flag; i++) {
1451 		error = vlan_setflag(ifp, vlan_pflags[i].flag,
1452 				     status, vlan_pflags[i].func);
1453 		if (error)
1454 			return (error);
1455 	}
1456 	return (0);
1457 }
1458 
1459 /* Inform all vlans that their parent has changed link state */
1460 static void
1461 vlan_link_state(struct ifnet *ifp)
1462 {
1463 	struct ifvlantrunk *trunk = ifp->if_vlantrunk;
1464 	struct ifvlan *ifv;
1465 	int i;
1466 
1467 	TRUNK_LOCK(trunk);
1468 #ifdef VLAN_ARRAY
1469 	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
1470 		if (trunk->vlans[i] != NULL) {
1471 			ifv = trunk->vlans[i];
1472 #else
1473 	for (i = 0; i < (1 << trunk->hwidth); i++)
1474 		LIST_FOREACH(ifv, &trunk->hash[i], ifv_list) {
1475 #endif
1476 			ifv->ifv_ifp->if_baudrate = trunk->parent->if_baudrate;
1477 			if_link_state_change(ifv->ifv_ifp,
1478 			    trunk->parent->if_link_state);
1479 		}
1480 	TRUNK_UNLOCK(trunk);
1481 }
1482 
1483 static void
1484 vlan_capabilities(struct ifvlan *ifv)
1485 {
1486 	struct ifnet *p = PARENT(ifv);
1487 	struct ifnet *ifp = ifv->ifv_ifp;
1488 
1489 	TRUNK_LOCK_ASSERT(TRUNK(ifv));
1490 
1491 	/*
1492 	 * If the parent interface can do checksum offloading
1493 	 * on VLANs, then propagate its hardware-assisted
1494 	 * checksumming flags. Also assert that checksum
1495 	 * offloading requires hardware VLAN tagging.
1496 	 */
1497 	if (p->if_capabilities & IFCAP_VLAN_HWCSUM)
1498 		ifp->if_capabilities = p->if_capabilities & IFCAP_HWCSUM;
1499 
1500 	if (p->if_capenable & IFCAP_VLAN_HWCSUM &&
1501 	    p->if_capenable & IFCAP_VLAN_HWTAGGING) {
1502 		ifp->if_capenable = p->if_capenable & IFCAP_HWCSUM;
1503 		ifp->if_hwassist = p->if_hwassist & (CSUM_IP | CSUM_TCP |
1504 		    CSUM_UDP | CSUM_SCTP | CSUM_FRAGMENT);
1505 	} else {
1506 		ifp->if_capenable = 0;
1507 		ifp->if_hwassist = 0;
1508 	}
1509 	/*
1510 	 * If the parent interface can do TSO on VLANs then
1511 	 * propagate the hardware-assisted flag. TSO on VLANs
1512 	 * does not necessarily require hardware VLAN tagging.
1513 	 */
1514 	if (p->if_capabilities & IFCAP_VLAN_HWTSO)
1515 		ifp->if_capabilities |= p->if_capabilities & IFCAP_TSO;
1516 	if (p->if_capenable & IFCAP_VLAN_HWTSO) {
1517 		ifp->if_capenable |= p->if_capenable & IFCAP_TSO;
1518 		ifp->if_hwassist |= p->if_hwassist & CSUM_TSO;
1519 	} else {
1520 		ifp->if_capenable &= ~(p->if_capenable & IFCAP_TSO);
1521 		ifp->if_hwassist &= ~(p->if_hwassist & CSUM_TSO);
1522 	}
1523 
1524 	/*
1525 	 * If the parent interface can offload TCP connections over VLANs then
1526 	 * propagate its TOE capability to the VLAN interface.
1527 	 *
1528 	 * All TOE drivers in the tree today can deal with VLANs.  If this
1529 	 * changes then IFCAP_VLAN_TOE should be promoted to a full capability
1530 	 * with its own bit.
1531 	 */
1532 #define	IFCAP_VLAN_TOE IFCAP_TOE
1533 	if (p->if_capabilities & IFCAP_VLAN_TOE)
1534 		ifp->if_capabilities |= p->if_capabilities & IFCAP_TOE;
1535 	if (p->if_capenable & IFCAP_VLAN_TOE) {
1536 		TOEDEV(ifp) = TOEDEV(p);
1537 		ifp->if_capenable |= p->if_capenable & IFCAP_TOE;
1538 	}
1539 }
1540 
1541 static void
1542 vlan_trunk_capabilities(struct ifnet *ifp)
1543 {
1544 	struct ifvlantrunk *trunk = ifp->if_vlantrunk;
1545 	struct ifvlan *ifv;
1546 	int i;
1547 
1548 	TRUNK_LOCK(trunk);
1549 #ifdef VLAN_ARRAY
1550 	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
1551 		if (trunk->vlans[i] != NULL) {
1552 			ifv = trunk->vlans[i];
1553 #else
1554 	for (i = 0; i < (1 << trunk->hwidth); i++) {
1555 		LIST_FOREACH(ifv, &trunk->hash[i], ifv_list)
1556 #endif
1557 			vlan_capabilities(ifv);
1558 	}
1559 	TRUNK_UNLOCK(trunk);
1560 }
1561 
1562 static int
1563 vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1564 {
1565 	struct ifnet *p;
1566 	struct ifreq *ifr;
1567 	struct ifaddr *ifa;
1568 	struct ifvlan *ifv;
1569 	struct vlanreq vlr;
1570 	int error = 0;
1571 
1572 	ifr = (struct ifreq *)data;
1573 	ifa = (struct ifaddr *) data;
1574 	ifv = ifp->if_softc;
1575 
1576 	switch (cmd) {
1577 	case SIOCSIFADDR:
1578 		ifp->if_flags |= IFF_UP;
1579 #ifdef INET
1580 		if (ifa->ifa_addr->sa_family == AF_INET)
1581 			arp_ifinit(ifp, ifa);
1582 #endif
1583 		break;
1584 	case SIOCGIFADDR:
1585                 {
1586 			struct sockaddr *sa;
1587 
1588 			sa = (struct sockaddr *)&ifr->ifr_data;
1589 			bcopy(IF_LLADDR(ifp), sa->sa_data, ifp->if_addrlen);
1590                 }
1591 		break;
1592 	case SIOCGIFMEDIA:
1593 		VLAN_LOCK();
1594 		if (TRUNK(ifv) != NULL) {
1595 			p = PARENT(ifv);
1596 			VLAN_UNLOCK();
1597 			error = (*p->if_ioctl)(p, SIOCGIFMEDIA, data);
1598 			/* Limit the result to the parent's current config. */
1599 			if (error == 0) {
1600 				struct ifmediareq *ifmr;
1601 
1602 				ifmr = (struct ifmediareq *)data;
1603 				if (ifmr->ifm_count >= 1 && ifmr->ifm_ulist) {
1604 					ifmr->ifm_count = 1;
1605 					error = copyout(&ifmr->ifm_current,
1606 						ifmr->ifm_ulist,
1607 						sizeof(int));
1608 				}
1609 			}
1610 		} else {
1611 			VLAN_UNLOCK();
1612 			error = EINVAL;
1613 		}
1614 		break;
1615 
1616 	case SIOCSIFMEDIA:
1617 		error = EINVAL;
1618 		break;
1619 
1620 	case SIOCSIFMTU:
1621 		/*
1622 		 * Set the interface MTU.
1623 		 */
1624 		VLAN_LOCK();
1625 		if (TRUNK(ifv) != NULL) {
1626 			if (ifr->ifr_mtu >
1627 			     (PARENT(ifv)->if_mtu - ifv->ifv_mtufudge) ||
1628 			    ifr->ifr_mtu <
1629 			     (ifv->ifv_mintu - ifv->ifv_mtufudge))
1630 				error = EINVAL;
1631 			else
1632 				ifp->if_mtu = ifr->ifr_mtu;
1633 		} else
1634 			error = EINVAL;
1635 		VLAN_UNLOCK();
1636 		break;
1637 
1638 	case SIOCSETVLAN:
1639 #ifdef VIMAGE
1640 		/*
1641 		 * XXXRW/XXXBZ: The goal in these checks is to allow a VLAN
1642 		 * interface to be delegated to a jail without allowing the
1643 		 * jail to change what underlying interface/VID it is
1644 		 * associated with.  We are not entirely convinced that this
1645 		 * is the right way to accomplish that policy goal.
1646 		 */
1647 		if (ifp->if_vnet != ifp->if_home_vnet) {
1648 			error = EPERM;
1649 			break;
1650 		}
1651 #endif
1652 		error = copyin(ifr->ifr_data, &vlr, sizeof(vlr));
1653 		if (error)
1654 			break;
1655 		if (vlr.vlr_parent[0] == '\0') {
1656 			vlan_unconfig(ifp);
1657 			break;
1658 		}
1659 		p = ifunit(vlr.vlr_parent);
1660 		if (p == NULL) {
1661 			error = ENOENT;
1662 			break;
1663 		}
1664 		/*
1665 		 * Don't let the caller set up a VLAN VID with
1666 		 * anything except VLID bits.
1667 		 */
1668 		if (vlr.vlr_tag & ~EVL_VLID_MASK) {
1669 			error = EINVAL;
1670 			break;
1671 		}
1672 		error = vlan_config(ifv, p, vlr.vlr_tag);
1673 		if (error)
1674 			break;
1675 
1676 		/* Update flags on the parent, if necessary. */
1677 		vlan_setflags(ifp, 1);
1678 		break;
1679 
1680 	case SIOCGETVLAN:
1681 #ifdef VIMAGE
1682 		if (ifp->if_vnet != ifp->if_home_vnet) {
1683 			error = EPERM;
1684 			break;
1685 		}
1686 #endif
1687 		bzero(&vlr, sizeof(vlr));
1688 		VLAN_LOCK();
1689 		if (TRUNK(ifv) != NULL) {
1690 			strlcpy(vlr.vlr_parent, PARENT(ifv)->if_xname,
1691 			    sizeof(vlr.vlr_parent));
1692 			vlr.vlr_tag = ifv->ifv_vid;
1693 		}
1694 		VLAN_UNLOCK();
1695 		error = copyout(&vlr, ifr->ifr_data, sizeof(vlr));
1696 		break;
1697 
1698 	case SIOCSIFFLAGS:
1699 		/*
1700 		 * We should propagate selected flags to the parent,
1701 		 * e.g., promiscuous mode.
1702 		 */
1703 		if (TRUNK(ifv) != NULL)
1704 			error = vlan_setflags(ifp, 1);
1705 		break;
1706 
1707 	case SIOCADDMULTI:
1708 	case SIOCDELMULTI:
1709 		/*
1710 		 * If we don't have a parent, just remember the membership for
1711 		 * when we do.
1712 		 */
1713 		if (TRUNK(ifv) != NULL)
1714 			error = vlan_setmulti(ifp);
1715 		break;
1716 
1717 	default:
1718 		error = EINVAL;
1719 		break;
1720 	}
1721 
1722 	return (error);
1723 }
1724