xref: /freebsd/sys/net/if_vlan.c (revision d1d015864103b253b3fcb2f72a0da5b0cfeb31b6)
1 /*-
2  * Copyright 1998 Massachusetts Institute of Technology
3  *
4  * Permission to use, copy, modify, and distribute this software and
5  * its documentation for any purpose and without fee is hereby
6  * granted, provided that both the above copyright notice and this
7  * permission notice appear in all copies, that both the above
8  * copyright notice and this permission notice appear in all
9  * supporting documentation, and that the name of M.I.T. not be used
10  * in advertising or publicity pertaining to distribution of the
11  * software without specific, written prior permission.  M.I.T. makes
12  * no representations about the suitability of this software for any
13  * purpose.  It is provided "as is" without express or implied
14  * warranty.
15  *
16  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
17  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
18  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
20  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
26  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 /*
31  * if_vlan.c - pseudo-device driver for IEEE 802.1Q virtual LANs.
32  * Might be extended some day to also handle IEEE 802.1p priority
33  * tagging.  This is sort of sneaky in the implementation, since
34  * we need to pretend to be enough of an Ethernet implementation
35  * to make arp work.  The way we do this is by telling everyone
36  * that we are an Ethernet, and then catch the packets that
37  * ether_output() sends to us via if_transmit(), rewrite them for
38  * use by the real outgoing interface, and ask it to send them.
39  */
40 
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
43 
44 #include "opt_inet.h"
45 #include "opt_vlan.h"
46 
47 #include <sys/param.h>
48 #include <sys/kernel.h>
49 #include <sys/lock.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/rwlock.h>
54 #include <sys/queue.h>
55 #include <sys/socket.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/systm.h>
59 #include <sys/sx.h>
60 
61 #include <net/bpf.h>
62 #include <net/ethernet.h>
63 #include <net/if.h>
64 #include <net/if_clone.h>
65 #include <net/if_dl.h>
66 #include <net/if_types.h>
67 #include <net/if_vlan_var.h>
68 #include <net/vnet.h>
69 
70 #ifdef INET
71 #include <netinet/in.h>
72 #include <netinet/if_ether.h>
73 #endif
74 
75 #define	VLAN_DEF_HWIDTH	4
76 #define	VLAN_IFFLAGS	(IFF_BROADCAST | IFF_MULTICAST)
77 
78 #define	UP_AND_RUNNING(ifp) \
79     ((ifp)->if_flags & IFF_UP && (ifp)->if_drv_flags & IFF_DRV_RUNNING)
80 
81 LIST_HEAD(ifvlanhead, ifvlan);
82 
83 struct ifvlantrunk {
84 	struct	ifnet   *parent;	/* parent interface of this trunk */
85 	struct	rwlock	rw;
86 #ifdef VLAN_ARRAY
87 #define	VLAN_ARRAY_SIZE	(EVL_VLID_MASK + 1)
88 	struct	ifvlan	*vlans[VLAN_ARRAY_SIZE]; /* static table */
89 #else
90 	struct	ifvlanhead *hash;	/* dynamic hash-list table */
91 	uint16_t	hmask;
92 	uint16_t	hwidth;
93 #endif
94 	int		refcnt;
95 };
96 
97 struct vlan_mc_entry {
98 	struct sockaddr_dl		mc_addr;
99 	SLIST_ENTRY(vlan_mc_entry)	mc_entries;
100 };
101 
102 struct	ifvlan {
103 	struct	ifvlantrunk *ifv_trunk;
104 	struct	ifnet *ifv_ifp;
105 	void	*ifv_cookie;
106 #define	TRUNK(ifv)	((ifv)->ifv_trunk)
107 #define	PARENT(ifv)	((ifv)->ifv_trunk->parent)
108 	int	ifv_pflags;	/* special flags we have set on parent */
109 	struct	ifv_linkmib {
110 		int	ifvm_encaplen;	/* encapsulation length */
111 		int	ifvm_mtufudge;	/* MTU fudged by this much */
112 		int	ifvm_mintu;	/* min transmission unit */
113 		uint16_t ifvm_proto;	/* encapsulation ethertype */
114 		uint16_t ifvm_tag;	/* tag to apply on packets leaving if */
115 	}	ifv_mib;
116 	SLIST_HEAD(, vlan_mc_entry) vlan_mc_listhead;
117 #ifndef VLAN_ARRAY
118 	LIST_ENTRY(ifvlan) ifv_list;
119 #endif
120 };
121 #define	ifv_proto	ifv_mib.ifvm_proto
122 #define	ifv_vid		ifv_mib.ifvm_tag
123 #define	ifv_encaplen	ifv_mib.ifvm_encaplen
124 #define	ifv_mtufudge	ifv_mib.ifvm_mtufudge
125 #define	ifv_mintu	ifv_mib.ifvm_mintu
126 
127 /* Special flags we should propagate to parent. */
128 static struct {
129 	int flag;
130 	int (*func)(struct ifnet *, int);
131 } vlan_pflags[] = {
132 	{IFF_PROMISC, ifpromisc},
133 	{IFF_ALLMULTI, if_allmulti},
134 	{0, NULL}
135 };
136 
137 SYSCTL_DECL(_net_link);
138 static SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW, 0,
139     "IEEE 802.1Q VLAN");
140 static SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW, 0,
141     "for consistency");
142 
143 static int soft_pad = 0;
144 SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW, &soft_pad, 0,
145 	   "pad short frames before tagging");
146 
147 static const char vlanname[] = "vlan";
148 static MALLOC_DEFINE(M_VLAN, vlanname, "802.1Q Virtual LAN Interface");
149 
150 static eventhandler_tag ifdetach_tag;
151 static eventhandler_tag iflladdr_tag;
152 
153 /*
154  * We have a global mutex, that is used to serialize configuration
155  * changes and isn't used in normal packet delivery.
156  *
157  * We also have a per-trunk rwlock, that is locked shared on packet
158  * processing and exclusive when configuration is changed.
159  *
160  * The VLAN_ARRAY substitutes the dynamic hash with a static array
161  * with 4096 entries. In theory this can give a boost in processing,
162  * however on practice it does not. Probably this is because array
163  * is too big to fit into CPU cache.
164  */
165 static struct sx ifv_lock;
166 #define	VLAN_LOCK_INIT()	sx_init(&ifv_lock, "vlan_global")
167 #define	VLAN_LOCK_DESTROY()	sx_destroy(&ifv_lock)
168 #define	VLAN_LOCK_ASSERT()	sx_assert(&ifv_lock, SA_LOCKED)
169 #define	VLAN_LOCK()		sx_xlock(&ifv_lock)
170 #define	VLAN_UNLOCK()		sx_xunlock(&ifv_lock)
171 #define	TRUNK_LOCK_INIT(trunk)	rw_init(&(trunk)->rw, vlanname)
172 #define	TRUNK_LOCK_DESTROY(trunk) rw_destroy(&(trunk)->rw)
173 #define	TRUNK_LOCK(trunk)	rw_wlock(&(trunk)->rw)
174 #define	TRUNK_UNLOCK(trunk)	rw_wunlock(&(trunk)->rw)
175 #define	TRUNK_LOCK_ASSERT(trunk) rw_assert(&(trunk)->rw, RA_WLOCKED)
176 #define	TRUNK_RLOCK(trunk)	rw_rlock(&(trunk)->rw)
177 #define	TRUNK_RUNLOCK(trunk)	rw_runlock(&(trunk)->rw)
178 #define	TRUNK_LOCK_RASSERT(trunk) rw_assert(&(trunk)->rw, RA_RLOCKED)
179 
180 #ifndef VLAN_ARRAY
181 static	void vlan_inithash(struct ifvlantrunk *trunk);
182 static	void vlan_freehash(struct ifvlantrunk *trunk);
183 static	int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv);
184 static	int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv);
185 static	void vlan_growhash(struct ifvlantrunk *trunk, int howmuch);
186 static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk,
187 	uint16_t vid);
188 #endif
189 static	void trunk_destroy(struct ifvlantrunk *trunk);
190 
191 static	void vlan_init(void *foo);
192 static	void vlan_input(struct ifnet *ifp, struct mbuf *m);
193 static	int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr);
194 static	void vlan_qflush(struct ifnet *ifp);
195 static	int vlan_setflag(struct ifnet *ifp, int flag, int status,
196     int (*func)(struct ifnet *, int));
197 static	int vlan_setflags(struct ifnet *ifp, int status);
198 static	int vlan_setmulti(struct ifnet *ifp);
199 static	int vlan_transmit(struct ifnet *ifp, struct mbuf *m);
200 static	void vlan_unconfig(struct ifnet *ifp);
201 static	void vlan_unconfig_locked(struct ifnet *ifp, int departing);
202 static	int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag);
203 static	void vlan_link_state(struct ifnet *ifp);
204 static	void vlan_capabilities(struct ifvlan *ifv);
205 static	void vlan_trunk_capabilities(struct ifnet *ifp);
206 
207 static	struct ifnet *vlan_clone_match_ethervid(struct if_clone *,
208     const char *, int *);
209 static	int vlan_clone_match(struct if_clone *, const char *);
210 static	int vlan_clone_create(struct if_clone *, char *, size_t, caddr_t);
211 static	int vlan_clone_destroy(struct if_clone *, struct ifnet *);
212 
213 static	void vlan_ifdetach(void *arg, struct ifnet *ifp);
214 static  void vlan_iflladdr(void *arg, struct ifnet *ifp);
215 
216 static struct if_clone *vlan_cloner;
217 
218 #ifdef VIMAGE
219 static VNET_DEFINE(struct if_clone *, vlan_cloner);
220 #define	V_vlan_cloner	VNET(vlan_cloner)
221 #endif
222 
223 #ifndef VLAN_ARRAY
224 #define HASH(n, m)	((((n) >> 8) ^ ((n) >> 4) ^ (n)) & (m))
225 
226 static void
227 vlan_inithash(struct ifvlantrunk *trunk)
228 {
229 	int i, n;
230 
231 	/*
232 	 * The trunk must not be locked here since we call malloc(M_WAITOK).
233 	 * It is OK in case this function is called before the trunk struct
234 	 * gets hooked up and becomes visible from other threads.
235 	 */
236 
237 	KASSERT(trunk->hwidth == 0 && trunk->hash == NULL,
238 	    ("%s: hash already initialized", __func__));
239 
240 	trunk->hwidth = VLAN_DEF_HWIDTH;
241 	n = 1 << trunk->hwidth;
242 	trunk->hmask = n - 1;
243 	trunk->hash = malloc(sizeof(struct ifvlanhead) * n, M_VLAN, M_WAITOK);
244 	for (i = 0; i < n; i++)
245 		LIST_INIT(&trunk->hash[i]);
246 }
247 
248 static void
249 vlan_freehash(struct ifvlantrunk *trunk)
250 {
251 #ifdef INVARIANTS
252 	int i;
253 
254 	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
255 	for (i = 0; i < (1 << trunk->hwidth); i++)
256 		KASSERT(LIST_EMPTY(&trunk->hash[i]),
257 		    ("%s: hash table not empty", __func__));
258 #endif
259 	free(trunk->hash, M_VLAN);
260 	trunk->hash = NULL;
261 	trunk->hwidth = trunk->hmask = 0;
262 }
263 
264 static int
265 vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
266 {
267 	int i, b;
268 	struct ifvlan *ifv2;
269 
270 	TRUNK_LOCK_ASSERT(trunk);
271 	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
272 
273 	b = 1 << trunk->hwidth;
274 	i = HASH(ifv->ifv_vid, trunk->hmask);
275 	LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
276 		if (ifv->ifv_vid == ifv2->ifv_vid)
277 			return (EEXIST);
278 
279 	/*
280 	 * Grow the hash when the number of vlans exceeds half of the number of
281 	 * hash buckets squared. This will make the average linked-list length
282 	 * buckets/2.
283 	 */
284 	if (trunk->refcnt > (b * b) / 2) {
285 		vlan_growhash(trunk, 1);
286 		i = HASH(ifv->ifv_vid, trunk->hmask);
287 	}
288 	LIST_INSERT_HEAD(&trunk->hash[i], ifv, ifv_list);
289 	trunk->refcnt++;
290 
291 	return (0);
292 }
293 
294 static int
295 vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
296 {
297 	int i, b;
298 	struct ifvlan *ifv2;
299 
300 	TRUNK_LOCK_ASSERT(trunk);
301 	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
302 
303 	b = 1 << trunk->hwidth;
304 	i = HASH(ifv->ifv_vid, trunk->hmask);
305 	LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list)
306 		if (ifv2 == ifv) {
307 			trunk->refcnt--;
308 			LIST_REMOVE(ifv2, ifv_list);
309 			if (trunk->refcnt < (b * b) / 2)
310 				vlan_growhash(trunk, -1);
311 			return (0);
312 		}
313 
314 	panic("%s: vlan not found\n", __func__);
315 	return (ENOENT); /*NOTREACHED*/
316 }
317 
318 /*
319  * Grow the hash larger or smaller if memory permits.
320  */
321 static void
322 vlan_growhash(struct ifvlantrunk *trunk, int howmuch)
323 {
324 	struct ifvlan *ifv;
325 	struct ifvlanhead *hash2;
326 	int hwidth2, i, j, n, n2;
327 
328 	TRUNK_LOCK_ASSERT(trunk);
329 	KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__));
330 
331 	if (howmuch == 0) {
332 		/* Harmless yet obvious coding error */
333 		printf("%s: howmuch is 0\n", __func__);
334 		return;
335 	}
336 
337 	hwidth2 = trunk->hwidth + howmuch;
338 	n = 1 << trunk->hwidth;
339 	n2 = 1 << hwidth2;
340 	/* Do not shrink the table below the default */
341 	if (hwidth2 < VLAN_DEF_HWIDTH)
342 		return;
343 
344 	/* M_NOWAIT because we're called with trunk mutex held */
345 	hash2 = malloc(sizeof(struct ifvlanhead) * n2, M_VLAN, M_NOWAIT);
346 	if (hash2 == NULL) {
347 		printf("%s: out of memory -- hash size not changed\n",
348 		    __func__);
349 		return;		/* We can live with the old hash table */
350 	}
351 	for (j = 0; j < n2; j++)
352 		LIST_INIT(&hash2[j]);
353 	for (i = 0; i < n; i++)
354 		while ((ifv = LIST_FIRST(&trunk->hash[i])) != NULL) {
355 			LIST_REMOVE(ifv, ifv_list);
356 			j = HASH(ifv->ifv_vid, n2 - 1);
357 			LIST_INSERT_HEAD(&hash2[j], ifv, ifv_list);
358 		}
359 	free(trunk->hash, M_VLAN);
360 	trunk->hash = hash2;
361 	trunk->hwidth = hwidth2;
362 	trunk->hmask = n2 - 1;
363 
364 	if (bootverbose)
365 		if_printf(trunk->parent,
366 		    "VLAN hash table resized from %d to %d buckets\n", n, n2);
367 }
368 
369 static __inline struct ifvlan *
370 vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid)
371 {
372 	struct ifvlan *ifv;
373 
374 	TRUNK_LOCK_RASSERT(trunk);
375 
376 	LIST_FOREACH(ifv, &trunk->hash[HASH(vid, trunk->hmask)], ifv_list)
377 		if (ifv->ifv_vid == vid)
378 			return (ifv);
379 	return (NULL);
380 }
381 
382 #if 0
383 /* Debugging code to view the hashtables. */
384 static void
385 vlan_dumphash(struct ifvlantrunk *trunk)
386 {
387 	int i;
388 	struct ifvlan *ifv;
389 
390 	for (i = 0; i < (1 << trunk->hwidth); i++) {
391 		printf("%d: ", i);
392 		LIST_FOREACH(ifv, &trunk->hash[i], ifv_list)
393 			printf("%s ", ifv->ifv_ifp->if_xname);
394 		printf("\n");
395 	}
396 }
397 #endif /* 0 */
398 #else
399 
400 static __inline struct ifvlan *
401 vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid)
402 {
403 
404 	return trunk->vlans[vid];
405 }
406 
407 static __inline int
408 vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
409 {
410 
411 	if (trunk->vlans[ifv->ifv_vid] != NULL)
412 		return EEXIST;
413 	trunk->vlans[ifv->ifv_vid] = ifv;
414 	trunk->refcnt++;
415 
416 	return (0);
417 }
418 
419 static __inline int
420 vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv)
421 {
422 
423 	trunk->vlans[ifv->ifv_vid] = NULL;
424 	trunk->refcnt--;
425 
426 	return (0);
427 }
428 
429 static __inline void
430 vlan_freehash(struct ifvlantrunk *trunk)
431 {
432 }
433 
434 static __inline void
435 vlan_inithash(struct ifvlantrunk *trunk)
436 {
437 }
438 
439 #endif /* !VLAN_ARRAY */
440 
441 static void
442 trunk_destroy(struct ifvlantrunk *trunk)
443 {
444 	VLAN_LOCK_ASSERT();
445 
446 	TRUNK_LOCK(trunk);
447 	vlan_freehash(trunk);
448 	trunk->parent->if_vlantrunk = NULL;
449 	TRUNK_UNLOCK(trunk);
450 	TRUNK_LOCK_DESTROY(trunk);
451 	free(trunk, M_VLAN);
452 }
453 
454 /*
455  * Program our multicast filter. What we're actually doing is
456  * programming the multicast filter of the parent. This has the
457  * side effect of causing the parent interface to receive multicast
458  * traffic that it doesn't really want, which ends up being discarded
459  * later by the upper protocol layers. Unfortunately, there's no way
460  * to avoid this: there really is only one physical interface.
461  *
462  * XXX: There is a possible race here if more than one thread is
463  *      modifying the multicast state of the vlan interface at the same time.
464  */
465 static int
466 vlan_setmulti(struct ifnet *ifp)
467 {
468 	struct ifnet		*ifp_p;
469 	struct ifmultiaddr	*ifma, *rifma = NULL;
470 	struct ifvlan		*sc;
471 	struct vlan_mc_entry	*mc;
472 	int			error;
473 
474 	/*VLAN_LOCK_ASSERT();*/
475 
476 	/* Find the parent. */
477 	sc = ifp->if_softc;
478 	ifp_p = PARENT(sc);
479 
480 	CURVNET_SET_QUIET(ifp_p->if_vnet);
481 
482 	/* First, remove any existing filter entries. */
483 	while ((mc = SLIST_FIRST(&sc->vlan_mc_listhead)) != NULL) {
484 		error = if_delmulti(ifp_p, (struct sockaddr *)&mc->mc_addr);
485 		if (error)
486 			return (error);
487 		SLIST_REMOVE_HEAD(&sc->vlan_mc_listhead, mc_entries);
488 		free(mc, M_VLAN);
489 	}
490 
491 	/* Now program new ones. */
492 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
493 		if (ifma->ifma_addr->sa_family != AF_LINK)
494 			continue;
495 		mc = malloc(sizeof(struct vlan_mc_entry), M_VLAN, M_NOWAIT);
496 		if (mc == NULL)
497 			return (ENOMEM);
498 		bcopy(ifma->ifma_addr, &mc->mc_addr, ifma->ifma_addr->sa_len);
499 		mc->mc_addr.sdl_index = ifp_p->if_index;
500 		SLIST_INSERT_HEAD(&sc->vlan_mc_listhead, mc, mc_entries);
501 		error = if_addmulti(ifp_p, (struct sockaddr *)&mc->mc_addr,
502 		    &rifma);
503 		if (error)
504 			return (error);
505 	}
506 
507 	CURVNET_RESTORE();
508 	return (0);
509 }
510 
511 /*
512  * A handler for parent interface link layer address changes.
513  * If the parent interface link layer address is changed we
514  * should also change it on all children vlans.
515  */
516 static void
517 vlan_iflladdr(void *arg __unused, struct ifnet *ifp)
518 {
519 	struct ifvlan *ifv;
520 #ifndef VLAN_ARRAY
521 	struct ifvlan *next;
522 #endif
523 	int i;
524 
525 	/*
526 	 * Check if it's a trunk interface first of all
527 	 * to avoid needless locking.
528 	 */
529 	if (ifp->if_vlantrunk == NULL)
530 		return;
531 
532 	VLAN_LOCK();
533 	/*
534 	 * OK, it's a trunk.  Loop over and change all vlan's lladdrs on it.
535 	 */
536 #ifdef VLAN_ARRAY
537 	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
538 		if ((ifv = ifp->if_vlantrunk->vlans[i])) {
539 #else /* VLAN_ARRAY */
540 	for (i = 0; i < (1 << ifp->if_vlantrunk->hwidth); i++)
541 		LIST_FOREACH_SAFE(ifv, &ifp->if_vlantrunk->hash[i], ifv_list, next) {
542 #endif /* VLAN_ARRAY */
543 			VLAN_UNLOCK();
544 			if_setlladdr(ifv->ifv_ifp, IF_LLADDR(ifp),
545 			    ifp->if_addrlen);
546 			VLAN_LOCK();
547 		}
548 	VLAN_UNLOCK();
549 
550 }
551 
552 /*
553  * A handler for network interface departure events.
554  * Track departure of trunks here so that we don't access invalid
555  * pointers or whatever if a trunk is ripped from under us, e.g.,
556  * by ejecting its hot-plug card.  However, if an ifnet is simply
557  * being renamed, then there's no need to tear down the state.
558  */
559 static void
560 vlan_ifdetach(void *arg __unused, struct ifnet *ifp)
561 {
562 	struct ifvlan *ifv;
563 	int i;
564 
565 	/*
566 	 * Check if it's a trunk interface first of all
567 	 * to avoid needless locking.
568 	 */
569 	if (ifp->if_vlantrunk == NULL)
570 		return;
571 
572 	/* If the ifnet is just being renamed, don't do anything. */
573 	if (ifp->if_flags & IFF_RENAMING)
574 		return;
575 
576 	VLAN_LOCK();
577 	/*
578 	 * OK, it's a trunk.  Loop over and detach all vlan's on it.
579 	 * Check trunk pointer after each vlan_unconfig() as it will
580 	 * free it and set to NULL after the last vlan was detached.
581 	 */
582 #ifdef VLAN_ARRAY
583 	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
584 		if ((ifv = ifp->if_vlantrunk->vlans[i])) {
585 			vlan_unconfig_locked(ifv->ifv_ifp, 1);
586 			if (ifp->if_vlantrunk == NULL)
587 				break;
588 		}
589 #else /* VLAN_ARRAY */
590 restart:
591 	for (i = 0; i < (1 << ifp->if_vlantrunk->hwidth); i++)
592 		if ((ifv = LIST_FIRST(&ifp->if_vlantrunk->hash[i]))) {
593 			vlan_unconfig_locked(ifv->ifv_ifp, 1);
594 			if (ifp->if_vlantrunk)
595 				goto restart;	/* trunk->hwidth can change */
596 			else
597 				break;
598 		}
599 #endif /* VLAN_ARRAY */
600 	/* Trunk should have been destroyed in vlan_unconfig(). */
601 	KASSERT(ifp->if_vlantrunk == NULL, ("%s: purge failed", __func__));
602 	VLAN_UNLOCK();
603 }
604 
605 /*
606  * Return the trunk device for a virtual interface.
607  */
608 static struct ifnet  *
609 vlan_trunkdev(struct ifnet *ifp)
610 {
611 	struct ifvlan *ifv;
612 
613 	if (ifp->if_type != IFT_L2VLAN)
614 		return (NULL);
615 	ifv = ifp->if_softc;
616 	ifp = NULL;
617 	VLAN_LOCK();
618 	if (ifv->ifv_trunk)
619 		ifp = PARENT(ifv);
620 	VLAN_UNLOCK();
621 	return (ifp);
622 }
623 
624 /*
625  * Return the 12-bit VLAN VID for this interface, for use by external
626  * components such as Infiniband.
627  *
628  * XXXRW: Note that the function name here is historical; it should be named
629  * vlan_vid().
630  */
631 static int
632 vlan_tag(struct ifnet *ifp, uint16_t *vidp)
633 {
634 	struct ifvlan *ifv;
635 
636 	if (ifp->if_type != IFT_L2VLAN)
637 		return (EINVAL);
638 	ifv = ifp->if_softc;
639 	*vidp = ifv->ifv_vid;
640 	return (0);
641 }
642 
643 /*
644  * Return a driver specific cookie for this interface.  Synchronization
645  * with setcookie must be provided by the driver.
646  */
647 static void *
648 vlan_cookie(struct ifnet *ifp)
649 {
650 	struct ifvlan *ifv;
651 
652 	if (ifp->if_type != IFT_L2VLAN)
653 		return (NULL);
654 	ifv = ifp->if_softc;
655 	return (ifv->ifv_cookie);
656 }
657 
658 /*
659  * Store a cookie in our softc that drivers can use to store driver
660  * private per-instance data in.
661  */
662 static int
663 vlan_setcookie(struct ifnet *ifp, void *cookie)
664 {
665 	struct ifvlan *ifv;
666 
667 	if (ifp->if_type != IFT_L2VLAN)
668 		return (EINVAL);
669 	ifv = ifp->if_softc;
670 	ifv->ifv_cookie = cookie;
671 	return (0);
672 }
673 
674 /*
675  * Return the vlan device present at the specific VID.
676  */
677 static struct ifnet *
678 vlan_devat(struct ifnet *ifp, uint16_t vid)
679 {
680 	struct ifvlantrunk *trunk;
681 	struct ifvlan *ifv;
682 
683 	trunk = ifp->if_vlantrunk;
684 	if (trunk == NULL)
685 		return (NULL);
686 	ifp = NULL;
687 	TRUNK_RLOCK(trunk);
688 	ifv = vlan_gethash(trunk, vid);
689 	if (ifv)
690 		ifp = ifv->ifv_ifp;
691 	TRUNK_RUNLOCK(trunk);
692 	return (ifp);
693 }
694 
695 /*
696  * VLAN support can be loaded as a module.  The only place in the
697  * system that's intimately aware of this is ether_input.  We hook
698  * into this code through vlan_input_p which is defined there and
699  * set here.  Noone else in the system should be aware of this so
700  * we use an explicit reference here.
701  */
702 extern	void (*vlan_input_p)(struct ifnet *, struct mbuf *);
703 
704 /* For if_link_state_change() eyes only... */
705 extern	void (*vlan_link_state_p)(struct ifnet *);
706 
707 static int
708 vlan_modevent(module_t mod, int type, void *data)
709 {
710 
711 	switch (type) {
712 	case MOD_LOAD:
713 		ifdetach_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
714 		    vlan_ifdetach, NULL, EVENTHANDLER_PRI_ANY);
715 		if (ifdetach_tag == NULL)
716 			return (ENOMEM);
717 		iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event,
718 		    vlan_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
719 		if (iflladdr_tag == NULL)
720 			return (ENOMEM);
721 		VLAN_LOCK_INIT();
722 		vlan_input_p = vlan_input;
723 		vlan_link_state_p = vlan_link_state;
724 		vlan_trunk_cap_p = vlan_trunk_capabilities;
725 		vlan_trunkdev_p = vlan_trunkdev;
726 		vlan_cookie_p = vlan_cookie;
727 		vlan_setcookie_p = vlan_setcookie;
728 		vlan_tag_p = vlan_tag;
729 		vlan_devat_p = vlan_devat;
730 #ifndef VIMAGE
731 		vlan_cloner = if_clone_advanced(vlanname, 0, vlan_clone_match,
732 		    vlan_clone_create, vlan_clone_destroy);
733 #endif
734 		if (bootverbose)
735 			printf("vlan: initialized, using "
736 #ifdef VLAN_ARRAY
737 			       "full-size arrays"
738 #else
739 			       "hash tables with chaining"
740 #endif
741 
742 			       "\n");
743 		break;
744 	case MOD_UNLOAD:
745 #ifndef VIMAGE
746 		if_clone_detach(vlan_cloner);
747 #endif
748 		EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_tag);
749 		EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_tag);
750 		vlan_input_p = NULL;
751 		vlan_link_state_p = NULL;
752 		vlan_trunk_cap_p = NULL;
753 		vlan_trunkdev_p = NULL;
754 		vlan_tag_p = NULL;
755 		vlan_cookie_p = NULL;
756 		vlan_setcookie_p = NULL;
757 		vlan_devat_p = NULL;
758 		VLAN_LOCK_DESTROY();
759 		if (bootverbose)
760 			printf("vlan: unloaded\n");
761 		break;
762 	default:
763 		return (EOPNOTSUPP);
764 	}
765 	return (0);
766 }
767 
768 static moduledata_t vlan_mod = {
769 	"if_vlan",
770 	vlan_modevent,
771 	0
772 };
773 
774 DECLARE_MODULE(if_vlan, vlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
775 MODULE_VERSION(if_vlan, 3);
776 
777 #ifdef VIMAGE
778 static void
779 vnet_vlan_init(const void *unused __unused)
780 {
781 
782 	vlan_cloner = if_clone_advanced(vlanname, 0, vlan_clone_match,
783 		    vlan_clone_create, vlan_clone_destroy);
784 	V_vlan_cloner = vlan_cloner;
785 }
786 VNET_SYSINIT(vnet_vlan_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
787     vnet_vlan_init, NULL);
788 
789 static void
790 vnet_vlan_uninit(const void *unused __unused)
791 {
792 
793 	if_clone_detach(V_vlan_cloner);
794 }
795 VNET_SYSUNINIT(vnet_vlan_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
796     vnet_vlan_uninit, NULL);
797 #endif
798 
799 static struct ifnet *
800 vlan_clone_match_ethervid(struct if_clone *ifc, const char *name, int *vidp)
801 {
802 	const char *cp;
803 	struct ifnet *ifp;
804 	int vid;
805 
806 	/* Check for <etherif>.<vlan> style interface names. */
807 	IFNET_RLOCK_NOSLEEP();
808 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
809 		/*
810 		 * We can handle non-ethernet hardware types as long as
811 		 * they handle the tagging and headers themselves.
812 		 */
813 		if (ifp->if_type != IFT_ETHER &&
814 		    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0)
815 			continue;
816 		if (strncmp(ifp->if_xname, name, strlen(ifp->if_xname)) != 0)
817 			continue;
818 		cp = name + strlen(ifp->if_xname);
819 		if (*cp++ != '.')
820 			continue;
821 		if (*cp == '\0')
822 			continue;
823 		vid = 0;
824 		for(; *cp >= '0' && *cp <= '9'; cp++)
825 			vid = (vid * 10) + (*cp - '0');
826 		if (*cp != '\0')
827 			continue;
828 		if (vidp != NULL)
829 			*vidp = vid;
830 		break;
831 	}
832 	IFNET_RUNLOCK_NOSLEEP();
833 
834 	return (ifp);
835 }
836 
837 static int
838 vlan_clone_match(struct if_clone *ifc, const char *name)
839 {
840 	const char *cp;
841 
842 	if (vlan_clone_match_ethervid(ifc, name, NULL) != NULL)
843 		return (1);
844 
845 	if (strncmp(vlanname, name, strlen(vlanname)) != 0)
846 		return (0);
847 	for (cp = name + 4; *cp != '\0'; cp++) {
848 		if (*cp < '0' || *cp > '9')
849 			return (0);
850 	}
851 
852 	return (1);
853 }
854 
855 static int
856 vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
857 {
858 	char *dp;
859 	int wildcard;
860 	int unit;
861 	int error;
862 	int vid;
863 	int ethertag;
864 	struct ifvlan *ifv;
865 	struct ifnet *ifp;
866 	struct ifnet *p;
867 	struct ifaddr *ifa;
868 	struct sockaddr_dl *sdl;
869 	struct vlanreq vlr;
870 	static const u_char eaddr[ETHER_ADDR_LEN];	/* 00:00:00:00:00:00 */
871 
872 	/*
873 	 * There are 3 (ugh) ways to specify the cloned device:
874 	 * o pass a parameter block with the clone request.
875 	 * o specify parameters in the text of the clone device name
876 	 * o specify no parameters and get an unattached device that
877 	 *   must be configured separately.
878 	 * The first technique is preferred; the latter two are
879 	 * supported for backwards compatibilty.
880 	 *
881 	 * XXXRW: Note historic use of the word "tag" here.  New ioctls may be
882 	 * called for.
883 	 */
884 	if (params) {
885 		error = copyin(params, &vlr, sizeof(vlr));
886 		if (error)
887 			return error;
888 		p = ifunit(vlr.vlr_parent);
889 		if (p == NULL)
890 			return ENXIO;
891 		/*
892 		 * Don't let the caller set up a VLAN VID with
893 		 * anything except VLID bits.
894 		 */
895 		if (vlr.vlr_tag & ~EVL_VLID_MASK)
896 			return (EINVAL);
897 		error = ifc_name2unit(name, &unit);
898 		if (error != 0)
899 			return (error);
900 
901 		ethertag = 1;
902 		vid = vlr.vlr_tag;
903 		wildcard = (unit < 0);
904 	} else if ((p = vlan_clone_match_ethervid(ifc, name, &vid)) != NULL) {
905 		ethertag = 1;
906 		unit = -1;
907 		wildcard = 0;
908 
909 		/*
910 		 * Don't let the caller set up a VLAN VID with
911 		 * anything except VLID bits.
912 		 */
913 		if (vid & ~EVL_VLID_MASK)
914 			return (EINVAL);
915 	} else {
916 		ethertag = 0;
917 
918 		error = ifc_name2unit(name, &unit);
919 		if (error != 0)
920 			return (error);
921 
922 		wildcard = (unit < 0);
923 	}
924 
925 	error = ifc_alloc_unit(ifc, &unit);
926 	if (error != 0)
927 		return (error);
928 
929 	/* In the wildcard case, we need to update the name. */
930 	if (wildcard) {
931 		for (dp = name; *dp != '\0'; dp++);
932 		if (snprintf(dp, len - (dp-name), "%d", unit) >
933 		    len - (dp-name) - 1) {
934 			panic("%s: interface name too long", __func__);
935 		}
936 	}
937 
938 	ifv = malloc(sizeof(struct ifvlan), M_VLAN, M_WAITOK | M_ZERO);
939 	ifp = ifv->ifv_ifp = if_alloc(IFT_ETHER);
940 	if (ifp == NULL) {
941 		ifc_free_unit(ifc, unit);
942 		free(ifv, M_VLAN);
943 		return (ENOSPC);
944 	}
945 	SLIST_INIT(&ifv->vlan_mc_listhead);
946 
947 	ifp->if_softc = ifv;
948 	/*
949 	 * Set the name manually rather than using if_initname because
950 	 * we don't conform to the default naming convention for interfaces.
951 	 */
952 	strlcpy(ifp->if_xname, name, IFNAMSIZ);
953 	ifp->if_dname = vlanname;
954 	ifp->if_dunit = unit;
955 	/* NB: flags are not set here */
956 	ifp->if_linkmib = &ifv->ifv_mib;
957 	ifp->if_linkmiblen = sizeof(ifv->ifv_mib);
958 	/* NB: mtu is not set here */
959 
960 	ifp->if_init = vlan_init;
961 	ifp->if_transmit = vlan_transmit;
962 	ifp->if_qflush = vlan_qflush;
963 	ifp->if_ioctl = vlan_ioctl;
964 	ifp->if_flags = VLAN_IFFLAGS;
965 	ether_ifattach(ifp, eaddr);
966 	/* Now undo some of the damage... */
967 	ifp->if_baudrate = 0;
968 	ifp->if_type = IFT_L2VLAN;
969 	ifp->if_hdrlen = ETHER_VLAN_ENCAP_LEN;
970 	ifa = ifp->if_addr;
971 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
972 	sdl->sdl_type = IFT_L2VLAN;
973 
974 	if (ethertag) {
975 		error = vlan_config(ifv, p, vid);
976 		if (error != 0) {
977 			/*
978 			 * Since we've partially failed, we need to back
979 			 * out all the way, otherwise userland could get
980 			 * confused.  Thus, we destroy the interface.
981 			 */
982 			ether_ifdetach(ifp);
983 			vlan_unconfig(ifp);
984 			if_free(ifp);
985 			ifc_free_unit(ifc, unit);
986 			free(ifv, M_VLAN);
987 
988 			return (error);
989 		}
990 
991 		/* Update flags on the parent, if necessary. */
992 		vlan_setflags(ifp, 1);
993 	}
994 
995 	return (0);
996 }
997 
998 static int
999 vlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
1000 {
1001 	struct ifvlan *ifv = ifp->if_softc;
1002 	int unit = ifp->if_dunit;
1003 
1004 	ether_ifdetach(ifp);	/* first, remove it from system-wide lists */
1005 	vlan_unconfig(ifp);	/* now it can be unconfigured and freed */
1006 	if_free(ifp);
1007 	free(ifv, M_VLAN);
1008 	ifc_free_unit(ifc, unit);
1009 
1010 	return (0);
1011 }
1012 
1013 /*
1014  * The ifp->if_init entry point for vlan(4) is a no-op.
1015  */
1016 static void
1017 vlan_init(void *foo __unused)
1018 {
1019 }
1020 
1021 /*
1022  * The if_transmit method for vlan(4) interface.
1023  */
1024 static int
1025 vlan_transmit(struct ifnet *ifp, struct mbuf *m)
1026 {
1027 	struct ifvlan *ifv;
1028 	struct ifnet *p;
1029 	int error, len, mcast;
1030 
1031 	ifv = ifp->if_softc;
1032 	p = PARENT(ifv);
1033 	len = m->m_pkthdr.len;
1034 	mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
1035 
1036 	BPF_MTAP(ifp, m);
1037 
1038 	/*
1039 	 * Do not run parent's if_transmit() if the parent is not up,
1040 	 * or parent's driver will cause a system crash.
1041 	 */
1042 	if (!UP_AND_RUNNING(p)) {
1043 		m_freem(m);
1044 		ifp->if_oerrors++;
1045 		return (ENETDOWN);
1046 	}
1047 
1048 	/*
1049 	 * Pad the frame to the minimum size allowed if told to.
1050 	 * This option is in accord with IEEE Std 802.1Q, 2003 Ed.,
1051 	 * paragraph C.4.4.3.b.  It can help to work around buggy
1052 	 * bridges that violate paragraph C.4.4.3.a from the same
1053 	 * document, i.e., fail to pad short frames after untagging.
1054 	 * E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but
1055 	 * untagging it will produce a 62-byte frame, which is a runt
1056 	 * and requires padding.  There are VLAN-enabled network
1057 	 * devices that just discard such runts instead or mishandle
1058 	 * them somehow.
1059 	 */
1060 	if (soft_pad && p->if_type == IFT_ETHER) {
1061 		static char pad[8];	/* just zeros */
1062 		int n;
1063 
1064 		for (n = ETHERMIN + ETHER_HDR_LEN - m->m_pkthdr.len;
1065 		     n > 0; n -= sizeof(pad))
1066 			if (!m_append(m, min(n, sizeof(pad)), pad))
1067 				break;
1068 
1069 		if (n > 0) {
1070 			if_printf(ifp, "cannot pad short frame\n");
1071 			ifp->if_oerrors++;
1072 			m_freem(m);
1073 			return (0);
1074 		}
1075 	}
1076 
1077 	/*
1078 	 * If underlying interface can do VLAN tag insertion itself,
1079 	 * just pass the packet along. However, we need some way to
1080 	 * tell the interface where the packet came from so that it
1081 	 * knows how to find the VLAN tag to use, so we attach a
1082 	 * packet tag that holds it.
1083 	 */
1084 	if (p->if_capenable & IFCAP_VLAN_HWTAGGING) {
1085 		m->m_pkthdr.ether_vtag = ifv->ifv_vid;
1086 		m->m_flags |= M_VLANTAG;
1087 	} else {
1088 		m = ether_vlanencap(m, ifv->ifv_vid);
1089 		if (m == NULL) {
1090 			if_printf(ifp, "unable to prepend VLAN header\n");
1091 			ifp->if_oerrors++;
1092 			return (0);
1093 		}
1094 	}
1095 
1096 	/*
1097 	 * Send it, precisely as ether_output() would have.
1098 	 */
1099 	error = (p->if_transmit)(p, m);
1100 	if (!error) {
1101 		ifp->if_opackets++;
1102 		ifp->if_omcasts += mcast;
1103 		ifp->if_obytes += len;
1104 	} else
1105 		ifp->if_oerrors++;
1106 	return (error);
1107 }
1108 
1109 /*
1110  * The ifp->if_qflush entry point for vlan(4) is a no-op.
1111  */
1112 static void
1113 vlan_qflush(struct ifnet *ifp __unused)
1114 {
1115 }
1116 
1117 static void
1118 vlan_input(struct ifnet *ifp, struct mbuf *m)
1119 {
1120 	struct ifvlantrunk *trunk = ifp->if_vlantrunk;
1121 	struct ifvlan *ifv;
1122 	uint16_t vid;
1123 
1124 	KASSERT(trunk != NULL, ("%s: no trunk", __func__));
1125 
1126 	if (m->m_flags & M_VLANTAG) {
1127 		/*
1128 		 * Packet is tagged, but m contains a normal
1129 		 * Ethernet frame; the tag is stored out-of-band.
1130 		 */
1131 		vid = EVL_VLANOFTAG(m->m_pkthdr.ether_vtag);
1132 		m->m_flags &= ~M_VLANTAG;
1133 	} else {
1134 		struct ether_vlan_header *evl;
1135 
1136 		/*
1137 		 * Packet is tagged in-band as specified by 802.1q.
1138 		 */
1139 		switch (ifp->if_type) {
1140 		case IFT_ETHER:
1141 			if (m->m_len < sizeof(*evl) &&
1142 			    (m = m_pullup(m, sizeof(*evl))) == NULL) {
1143 				if_printf(ifp, "cannot pullup VLAN header\n");
1144 				return;
1145 			}
1146 			evl = mtod(m, struct ether_vlan_header *);
1147 			vid = EVL_VLANOFTAG(ntohs(evl->evl_tag));
1148 
1149 			/*
1150 			 * Remove the 802.1q header by copying the Ethernet
1151 			 * addresses over it and adjusting the beginning of
1152 			 * the data in the mbuf.  The encapsulated Ethernet
1153 			 * type field is already in place.
1154 			 */
1155 			bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
1156 			      ETHER_HDR_LEN - ETHER_TYPE_LEN);
1157 			m_adj(m, ETHER_VLAN_ENCAP_LEN);
1158 			break;
1159 
1160 		default:
1161 #ifdef INVARIANTS
1162 			panic("%s: %s has unsupported if_type %u",
1163 			      __func__, ifp->if_xname, ifp->if_type);
1164 #endif
1165 			m_freem(m);
1166 			ifp->if_noproto++;
1167 			return;
1168 		}
1169 	}
1170 
1171 	TRUNK_RLOCK(trunk);
1172 	ifv = vlan_gethash(trunk, vid);
1173 	if (ifv == NULL || !UP_AND_RUNNING(ifv->ifv_ifp)) {
1174 		TRUNK_RUNLOCK(trunk);
1175 		m_freem(m);
1176 		ifp->if_noproto++;
1177 		return;
1178 	}
1179 	TRUNK_RUNLOCK(trunk);
1180 
1181 	m->m_pkthdr.rcvif = ifv->ifv_ifp;
1182 	ifv->ifv_ifp->if_ipackets++;
1183 
1184 	/* Pass it back through the parent's input routine. */
1185 	(*ifp->if_input)(ifv->ifv_ifp, m);
1186 }
1187 
1188 static int
1189 vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid)
1190 {
1191 	struct ifvlantrunk *trunk;
1192 	struct ifnet *ifp;
1193 	int error = 0;
1194 
1195 	/* VID numbers 0x0 and 0xFFF are reserved */
1196 	if (vid == 0 || vid == 0xFFF)
1197 		return (EINVAL);
1198 	if (p->if_type != IFT_ETHER &&
1199 	    (p->if_capenable & IFCAP_VLAN_HWTAGGING) == 0)
1200 		return (EPROTONOSUPPORT);
1201 	if ((p->if_flags & VLAN_IFFLAGS) != VLAN_IFFLAGS)
1202 		return (EPROTONOSUPPORT);
1203 	if (ifv->ifv_trunk)
1204 		return (EBUSY);
1205 
1206 	if (p->if_vlantrunk == NULL) {
1207 		trunk = malloc(sizeof(struct ifvlantrunk),
1208 		    M_VLAN, M_WAITOK | M_ZERO);
1209 		vlan_inithash(trunk);
1210 		VLAN_LOCK();
1211 		if (p->if_vlantrunk != NULL) {
1212 			/* A race that that is very unlikely to be hit. */
1213 			vlan_freehash(trunk);
1214 			free(trunk, M_VLAN);
1215 			goto exists;
1216 		}
1217 		TRUNK_LOCK_INIT(trunk);
1218 		TRUNK_LOCK(trunk);
1219 		p->if_vlantrunk = trunk;
1220 		trunk->parent = p;
1221 	} else {
1222 		VLAN_LOCK();
1223 exists:
1224 		trunk = p->if_vlantrunk;
1225 		TRUNK_LOCK(trunk);
1226 	}
1227 
1228 	ifv->ifv_vid = vid;	/* must set this before vlan_inshash() */
1229 	error = vlan_inshash(trunk, ifv);
1230 	if (error)
1231 		goto done;
1232 	ifv->ifv_proto = ETHERTYPE_VLAN;
1233 	ifv->ifv_encaplen = ETHER_VLAN_ENCAP_LEN;
1234 	ifv->ifv_mintu = ETHERMIN;
1235 	ifv->ifv_pflags = 0;
1236 
1237 	/*
1238 	 * If the parent supports the VLAN_MTU capability,
1239 	 * i.e. can Tx/Rx larger than ETHER_MAX_LEN frames,
1240 	 * use it.
1241 	 */
1242 	if (p->if_capenable & IFCAP_VLAN_MTU) {
1243 		/*
1244 		 * No need to fudge the MTU since the parent can
1245 		 * handle extended frames.
1246 		 */
1247 		ifv->ifv_mtufudge = 0;
1248 	} else {
1249 		/*
1250 		 * Fudge the MTU by the encapsulation size.  This
1251 		 * makes us incompatible with strictly compliant
1252 		 * 802.1Q implementations, but allows us to use
1253 		 * the feature with other NetBSD implementations,
1254 		 * which might still be useful.
1255 		 */
1256 		ifv->ifv_mtufudge = ifv->ifv_encaplen;
1257 	}
1258 
1259 	ifv->ifv_trunk = trunk;
1260 	ifp = ifv->ifv_ifp;
1261 	/*
1262 	 * Initialize fields from our parent.  This duplicates some
1263 	 * work with ether_ifattach() but allows for non-ethernet
1264 	 * interfaces to also work.
1265 	 */
1266 	ifp->if_mtu = p->if_mtu - ifv->ifv_mtufudge;
1267 	ifp->if_baudrate = p->if_baudrate;
1268 	ifp->if_output = p->if_output;
1269 	ifp->if_input = p->if_input;
1270 	ifp->if_resolvemulti = p->if_resolvemulti;
1271 	ifp->if_addrlen = p->if_addrlen;
1272 	ifp->if_broadcastaddr = p->if_broadcastaddr;
1273 
1274 	/*
1275 	 * Copy only a selected subset of flags from the parent.
1276 	 * Other flags are none of our business.
1277 	 */
1278 #define VLAN_COPY_FLAGS (IFF_SIMPLEX)
1279 	ifp->if_flags &= ~VLAN_COPY_FLAGS;
1280 	ifp->if_flags |= p->if_flags & VLAN_COPY_FLAGS;
1281 #undef VLAN_COPY_FLAGS
1282 
1283 	ifp->if_link_state = p->if_link_state;
1284 
1285 	vlan_capabilities(ifv);
1286 
1287 	/*
1288 	 * Set up our interface address to reflect the underlying
1289 	 * physical interface's.
1290 	 */
1291 	bcopy(IF_LLADDR(p), IF_LLADDR(ifp), p->if_addrlen);
1292 	((struct sockaddr_dl *)ifp->if_addr->ifa_addr)->sdl_alen =
1293 	    p->if_addrlen;
1294 
1295 	/*
1296 	 * Configure multicast addresses that may already be
1297 	 * joined on the vlan device.
1298 	 */
1299 	(void)vlan_setmulti(ifp); /* XXX: VLAN lock held */
1300 
1301 	/* We are ready for operation now. */
1302 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1303 done:
1304 	TRUNK_UNLOCK(trunk);
1305 	if (error == 0)
1306 		EVENTHANDLER_INVOKE(vlan_config, p, ifv->ifv_vid);
1307 	VLAN_UNLOCK();
1308 
1309 	return (error);
1310 }
1311 
1312 static void
1313 vlan_unconfig(struct ifnet *ifp)
1314 {
1315 
1316 	VLAN_LOCK();
1317 	vlan_unconfig_locked(ifp, 0);
1318 	VLAN_UNLOCK();
1319 }
1320 
1321 static void
1322 vlan_unconfig_locked(struct ifnet *ifp, int departing)
1323 {
1324 	struct ifvlantrunk *trunk;
1325 	struct vlan_mc_entry *mc;
1326 	struct ifvlan *ifv;
1327 	struct ifnet  *parent;
1328 	int error;
1329 
1330 	VLAN_LOCK_ASSERT();
1331 
1332 	ifv = ifp->if_softc;
1333 	trunk = ifv->ifv_trunk;
1334 	parent = NULL;
1335 
1336 	if (trunk != NULL) {
1337 
1338 		TRUNK_LOCK(trunk);
1339 		parent = trunk->parent;
1340 
1341 		/*
1342 		 * Since the interface is being unconfigured, we need to
1343 		 * empty the list of multicast groups that we may have joined
1344 		 * while we were alive from the parent's list.
1345 		 */
1346 		while ((mc = SLIST_FIRST(&ifv->vlan_mc_listhead)) != NULL) {
1347 			/*
1348 			 * If the parent interface is being detached,
1349 			 * all its multicast addresses have already
1350 			 * been removed.  Warn about errors if
1351 			 * if_delmulti() does fail, but don't abort as
1352 			 * all callers expect vlan destruction to
1353 			 * succeed.
1354 			 */
1355 			if (!departing) {
1356 				error = if_delmulti(parent,
1357 				    (struct sockaddr *)&mc->mc_addr);
1358 				if (error)
1359 					if_printf(ifp,
1360 		    "Failed to delete multicast address from parent: %d\n",
1361 					    error);
1362 			}
1363 			SLIST_REMOVE_HEAD(&ifv->vlan_mc_listhead, mc_entries);
1364 			free(mc, M_VLAN);
1365 		}
1366 
1367 		vlan_setflags(ifp, 0); /* clear special flags on parent */
1368 		vlan_remhash(trunk, ifv);
1369 		ifv->ifv_trunk = NULL;
1370 
1371 		/*
1372 		 * Check if we were the last.
1373 		 */
1374 		if (trunk->refcnt == 0) {
1375 			trunk->parent->if_vlantrunk = NULL;
1376 			/*
1377 			 * XXXGL: If some ithread has already entered
1378 			 * vlan_input() and is now blocked on the trunk
1379 			 * lock, then it should preempt us right after
1380 			 * unlock and finish its work. Then we will acquire
1381 			 * lock again in trunk_destroy().
1382 			 */
1383 			TRUNK_UNLOCK(trunk);
1384 			trunk_destroy(trunk);
1385 		} else
1386 			TRUNK_UNLOCK(trunk);
1387 	}
1388 
1389 	/* Disconnect from parent. */
1390 	if (ifv->ifv_pflags)
1391 		if_printf(ifp, "%s: ifv_pflags unclean\n", __func__);
1392 	ifp->if_mtu = ETHERMTU;
1393 	ifp->if_link_state = LINK_STATE_UNKNOWN;
1394 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1395 
1396 	/*
1397 	 * Only dispatch an event if vlan was
1398 	 * attached, otherwise there is nothing
1399 	 * to cleanup anyway.
1400 	 */
1401 	if (parent != NULL)
1402 		EVENTHANDLER_INVOKE(vlan_unconfig, parent, ifv->ifv_vid);
1403 }
1404 
1405 /* Handle a reference counted flag that should be set on the parent as well */
1406 static int
1407 vlan_setflag(struct ifnet *ifp, int flag, int status,
1408 	     int (*func)(struct ifnet *, int))
1409 {
1410 	struct ifvlan *ifv;
1411 	int error;
1412 
1413 	/* XXX VLAN_LOCK_ASSERT(); */
1414 
1415 	ifv = ifp->if_softc;
1416 	status = status ? (ifp->if_flags & flag) : 0;
1417 	/* Now "status" contains the flag value or 0 */
1418 
1419 	/*
1420 	 * See if recorded parent's status is different from what
1421 	 * we want it to be.  If it is, flip it.  We record parent's
1422 	 * status in ifv_pflags so that we won't clear parent's flag
1423 	 * we haven't set.  In fact, we don't clear or set parent's
1424 	 * flags directly, but get or release references to them.
1425 	 * That's why we can be sure that recorded flags still are
1426 	 * in accord with actual parent's flags.
1427 	 */
1428 	if (status != (ifv->ifv_pflags & flag)) {
1429 		error = (*func)(PARENT(ifv), status);
1430 		if (error)
1431 			return (error);
1432 		ifv->ifv_pflags &= ~flag;
1433 		ifv->ifv_pflags |= status;
1434 	}
1435 	return (0);
1436 }
1437 
1438 /*
1439  * Handle IFF_* flags that require certain changes on the parent:
1440  * if "status" is true, update parent's flags respective to our if_flags;
1441  * if "status" is false, forcedly clear the flags set on parent.
1442  */
1443 static int
1444 vlan_setflags(struct ifnet *ifp, int status)
1445 {
1446 	int error, i;
1447 
1448 	for (i = 0; vlan_pflags[i].flag; i++) {
1449 		error = vlan_setflag(ifp, vlan_pflags[i].flag,
1450 				     status, vlan_pflags[i].func);
1451 		if (error)
1452 			return (error);
1453 	}
1454 	return (0);
1455 }
1456 
1457 /* Inform all vlans that their parent has changed link state */
1458 static void
1459 vlan_link_state(struct ifnet *ifp)
1460 {
1461 	struct ifvlantrunk *trunk = ifp->if_vlantrunk;
1462 	struct ifvlan *ifv;
1463 	int i;
1464 
1465 	TRUNK_LOCK(trunk);
1466 #ifdef VLAN_ARRAY
1467 	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
1468 		if (trunk->vlans[i] != NULL) {
1469 			ifv = trunk->vlans[i];
1470 #else
1471 	for (i = 0; i < (1 << trunk->hwidth); i++)
1472 		LIST_FOREACH(ifv, &trunk->hash[i], ifv_list) {
1473 #endif
1474 			ifv->ifv_ifp->if_baudrate = trunk->parent->if_baudrate;
1475 			if_link_state_change(ifv->ifv_ifp,
1476 			    trunk->parent->if_link_state);
1477 		}
1478 	TRUNK_UNLOCK(trunk);
1479 }
1480 
1481 static void
1482 vlan_capabilities(struct ifvlan *ifv)
1483 {
1484 	struct ifnet *p = PARENT(ifv);
1485 	struct ifnet *ifp = ifv->ifv_ifp;
1486 
1487 	TRUNK_LOCK_ASSERT(TRUNK(ifv));
1488 
1489 	/*
1490 	 * If the parent interface can do checksum offloading
1491 	 * on VLANs, then propagate its hardware-assisted
1492 	 * checksumming flags. Also assert that checksum
1493 	 * offloading requires hardware VLAN tagging.
1494 	 */
1495 	if (p->if_capabilities & IFCAP_VLAN_HWCSUM)
1496 		ifp->if_capabilities = p->if_capabilities & IFCAP_HWCSUM;
1497 
1498 	if (p->if_capenable & IFCAP_VLAN_HWCSUM &&
1499 	    p->if_capenable & IFCAP_VLAN_HWTAGGING) {
1500 		ifp->if_capenable = p->if_capenable & IFCAP_HWCSUM;
1501 		ifp->if_hwassist = p->if_hwassist & (CSUM_IP | CSUM_TCP |
1502 		    CSUM_UDP | CSUM_SCTP | CSUM_FRAGMENT);
1503 	} else {
1504 		ifp->if_capenable = 0;
1505 		ifp->if_hwassist = 0;
1506 	}
1507 	/*
1508 	 * If the parent interface can do TSO on VLANs then
1509 	 * propagate the hardware-assisted flag. TSO on VLANs
1510 	 * does not necessarily require hardware VLAN tagging.
1511 	 */
1512 	if (p->if_capabilities & IFCAP_VLAN_HWTSO)
1513 		ifp->if_capabilities |= p->if_capabilities & IFCAP_TSO;
1514 	if (p->if_capenable & IFCAP_VLAN_HWTSO) {
1515 		ifp->if_capenable |= p->if_capenable & IFCAP_TSO;
1516 		ifp->if_hwassist |= p->if_hwassist & CSUM_TSO;
1517 	} else {
1518 		ifp->if_capenable &= ~(p->if_capenable & IFCAP_TSO);
1519 		ifp->if_hwassist &= ~(p->if_hwassist & CSUM_TSO);
1520 	}
1521 
1522 	/*
1523 	 * If the parent interface can offload TCP connections over VLANs then
1524 	 * propagate its TOE capability to the VLAN interface.
1525 	 *
1526 	 * All TOE drivers in the tree today can deal with VLANs.  If this
1527 	 * changes then IFCAP_VLAN_TOE should be promoted to a full capability
1528 	 * with its own bit.
1529 	 */
1530 #define	IFCAP_VLAN_TOE IFCAP_TOE
1531 	if (p->if_capabilities & IFCAP_VLAN_TOE)
1532 		ifp->if_capabilities |= p->if_capabilities & IFCAP_TOE;
1533 	if (p->if_capenable & IFCAP_VLAN_TOE) {
1534 		TOEDEV(ifp) = TOEDEV(p);
1535 		ifp->if_capenable |= p->if_capenable & IFCAP_TOE;
1536 	}
1537 }
1538 
1539 static void
1540 vlan_trunk_capabilities(struct ifnet *ifp)
1541 {
1542 	struct ifvlantrunk *trunk = ifp->if_vlantrunk;
1543 	struct ifvlan *ifv;
1544 	int i;
1545 
1546 	TRUNK_LOCK(trunk);
1547 #ifdef VLAN_ARRAY
1548 	for (i = 0; i < VLAN_ARRAY_SIZE; i++)
1549 		if (trunk->vlans[i] != NULL) {
1550 			ifv = trunk->vlans[i];
1551 #else
1552 	for (i = 0; i < (1 << trunk->hwidth); i++) {
1553 		LIST_FOREACH(ifv, &trunk->hash[i], ifv_list)
1554 #endif
1555 			vlan_capabilities(ifv);
1556 	}
1557 	TRUNK_UNLOCK(trunk);
1558 }
1559 
1560 static int
1561 vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1562 {
1563 	struct ifnet *p;
1564 	struct ifreq *ifr;
1565 	struct ifaddr *ifa;
1566 	struct ifvlan *ifv;
1567 	struct vlanreq vlr;
1568 	int error = 0;
1569 
1570 	ifr = (struct ifreq *)data;
1571 	ifa = (struct ifaddr *) data;
1572 	ifv = ifp->if_softc;
1573 
1574 	switch (cmd) {
1575 	case SIOCSIFADDR:
1576 		ifp->if_flags |= IFF_UP;
1577 #ifdef INET
1578 		if (ifa->ifa_addr->sa_family == AF_INET)
1579 			arp_ifinit(ifp, ifa);
1580 #endif
1581 		break;
1582 	case SIOCGIFADDR:
1583                 {
1584 			struct sockaddr *sa;
1585 
1586 			sa = (struct sockaddr *)&ifr->ifr_data;
1587 			bcopy(IF_LLADDR(ifp), sa->sa_data, ifp->if_addrlen);
1588                 }
1589 		break;
1590 	case SIOCGIFMEDIA:
1591 		VLAN_LOCK();
1592 		if (TRUNK(ifv) != NULL) {
1593 			p = PARENT(ifv);
1594 			VLAN_UNLOCK();
1595 			error = (*p->if_ioctl)(p, SIOCGIFMEDIA, data);
1596 			/* Limit the result to the parent's current config. */
1597 			if (error == 0) {
1598 				struct ifmediareq *ifmr;
1599 
1600 				ifmr = (struct ifmediareq *)data;
1601 				if (ifmr->ifm_count >= 1 && ifmr->ifm_ulist) {
1602 					ifmr->ifm_count = 1;
1603 					error = copyout(&ifmr->ifm_current,
1604 						ifmr->ifm_ulist,
1605 						sizeof(int));
1606 				}
1607 			}
1608 		} else {
1609 			VLAN_UNLOCK();
1610 			error = EINVAL;
1611 		}
1612 		break;
1613 
1614 	case SIOCSIFMEDIA:
1615 		error = EINVAL;
1616 		break;
1617 
1618 	case SIOCSIFMTU:
1619 		/*
1620 		 * Set the interface MTU.
1621 		 */
1622 		VLAN_LOCK();
1623 		if (TRUNK(ifv) != NULL) {
1624 			if (ifr->ifr_mtu >
1625 			     (PARENT(ifv)->if_mtu - ifv->ifv_mtufudge) ||
1626 			    ifr->ifr_mtu <
1627 			     (ifv->ifv_mintu - ifv->ifv_mtufudge))
1628 				error = EINVAL;
1629 			else
1630 				ifp->if_mtu = ifr->ifr_mtu;
1631 		} else
1632 			error = EINVAL;
1633 		VLAN_UNLOCK();
1634 		break;
1635 
1636 	case SIOCSETVLAN:
1637 #ifdef VIMAGE
1638 		/*
1639 		 * XXXRW/XXXBZ: The goal in these checks is to allow a VLAN
1640 		 * interface to be delegated to a jail without allowing the
1641 		 * jail to change what underlying interface/VID it is
1642 		 * associated with.  We are not entirely convinced that this
1643 		 * is the right way to accomplish that policy goal.
1644 		 */
1645 		if (ifp->if_vnet != ifp->if_home_vnet) {
1646 			error = EPERM;
1647 			break;
1648 		}
1649 #endif
1650 		error = copyin(ifr->ifr_data, &vlr, sizeof(vlr));
1651 		if (error)
1652 			break;
1653 		if (vlr.vlr_parent[0] == '\0') {
1654 			vlan_unconfig(ifp);
1655 			break;
1656 		}
1657 		p = ifunit(vlr.vlr_parent);
1658 		if (p == NULL) {
1659 			error = ENOENT;
1660 			break;
1661 		}
1662 		/*
1663 		 * Don't let the caller set up a VLAN VID with
1664 		 * anything except VLID bits.
1665 		 */
1666 		if (vlr.vlr_tag & ~EVL_VLID_MASK) {
1667 			error = EINVAL;
1668 			break;
1669 		}
1670 		error = vlan_config(ifv, p, vlr.vlr_tag);
1671 		if (error)
1672 			break;
1673 
1674 		/* Update flags on the parent, if necessary. */
1675 		vlan_setflags(ifp, 1);
1676 		break;
1677 
1678 	case SIOCGETVLAN:
1679 #ifdef VIMAGE
1680 		if (ifp->if_vnet != ifp->if_home_vnet) {
1681 			error = EPERM;
1682 			break;
1683 		}
1684 #endif
1685 		bzero(&vlr, sizeof(vlr));
1686 		VLAN_LOCK();
1687 		if (TRUNK(ifv) != NULL) {
1688 			strlcpy(vlr.vlr_parent, PARENT(ifv)->if_xname,
1689 			    sizeof(vlr.vlr_parent));
1690 			vlr.vlr_tag = ifv->ifv_vid;
1691 		}
1692 		VLAN_UNLOCK();
1693 		error = copyout(&vlr, ifr->ifr_data, sizeof(vlr));
1694 		break;
1695 
1696 	case SIOCSIFFLAGS:
1697 		/*
1698 		 * We should propagate selected flags to the parent,
1699 		 * e.g., promiscuous mode.
1700 		 */
1701 		if (TRUNK(ifv) != NULL)
1702 			error = vlan_setflags(ifp, 1);
1703 		break;
1704 
1705 	case SIOCADDMULTI:
1706 	case SIOCDELMULTI:
1707 		/*
1708 		 * If we don't have a parent, just remember the membership for
1709 		 * when we do.
1710 		 */
1711 		if (TRUNK(ifv) != NULL)
1712 			error = vlan_setmulti(ifp);
1713 		break;
1714 
1715 	default:
1716 		error = EINVAL;
1717 		break;
1718 	}
1719 
1720 	return (error);
1721 }
1722