xref: /freebsd/sys/net/if_bridge.c (revision 9b37d84c87e69dabc69d818aa4d2fea718bd8b74)
1 /*	$NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $	*/
2 
3 /*-
4  * SPDX-License-Identifier: BSD-4-Clause
5  *
6  * Copyright 2001 Wasabi Systems, Inc.
7  * All rights reserved.
8  *
9  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed for the NetBSD Project by
22  *	Wasabi Systems, Inc.
23  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
24  *    or promote products derived from this software without specific prior
25  *    written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 /*
41  * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
42  * All rights reserved.
43  *
44  * Redistribution and use in source and binary forms, with or without
45  * modification, are permitted provided that the following conditions
46  * are met:
47  * 1. Redistributions of source code must retain the above copyright
48  *    notice, this list of conditions and the following disclaimer.
49  * 2. Redistributions in binary form must reproduce the above copyright
50  *    notice, this list of conditions and the following disclaimer in the
51  *    documentation and/or other materials provided with the distribution.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
54  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
55  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
56  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
57  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
58  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
59  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
61  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
62  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
63  * POSSIBILITY OF SUCH DAMAGE.
64  *
65  * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
66  */
67 
68 /*
69  * Network interface bridge support.
70  *
71  * TODO:
72  *
73  *	- Currently only supports Ethernet-like interfaces (Ethernet,
74  *	  802.11, VLANs on Ethernet, etc.)  Figure out a nice way
75  *	  to bridge other types of interfaces (maybe consider
76  *	  heterogeneous bridges).
77  */
78 
79 #include <sys/cdefs.h>
80 #include "opt_inet.h"
81 #include "opt_inet6.h"
82 
83 #include <sys/param.h>
84 #include <sys/eventhandler.h>
85 #include <sys/mbuf.h>
86 #include <sys/malloc.h>
87 #include <sys/protosw.h>
88 #include <sys/systm.h>
89 #include <sys/jail.h>
90 #include <sys/time.h>
91 #include <sys/socket.h> /* for net/if.h */
92 #include <sys/sockio.h>
93 #include <sys/ctype.h>  /* string functions */
94 #include <sys/kernel.h>
95 #include <sys/random.h>
96 #include <sys/syslog.h>
97 #include <sys/sysctl.h>
98 #include <vm/uma.h>
99 #include <sys/module.h>
100 #include <sys/priv.h>
101 #include <sys/proc.h>
102 #include <sys/lock.h>
103 #include <sys/mutex.h>
104 
105 #include <net/bpf.h>
106 #include <net/if.h>
107 #include <net/if_clone.h>
108 #include <net/if_dl.h>
109 #include <net/if_types.h>
110 #include <net/if_var.h>
111 #include <net/if_private.h>
112 #include <net/pfil.h>
113 #include <net/vnet.h>
114 
115 #include <netinet/in.h>
116 #include <netinet/in_systm.h>
117 #include <netinet/in_var.h>
118 #include <netinet/ip.h>
119 #include <netinet/ip_var.h>
120 #ifdef INET6
121 #include <netinet/ip6.h>
122 #include <netinet6/ip6_var.h>
123 #include <netinet6/in6_ifattach.h>
124 #endif
125 #if defined(INET) || defined(INET6)
126 #include <netinet/ip_carp.h>
127 #endif
128 #include <machine/in_cksum.h>
129 #include <netinet/if_ether.h>
130 #include <net/bridgestp.h>
131 #include <net/if_bridgevar.h>
132 #include <net/if_llc.h>
133 #include <net/if_vlan_var.h>
134 
135 #include <net/route.h>
136 
137 /*
138  * At various points in the code we need to know if we're hooked into the INET
139  * and/or INET6 pfil.  Define some macros to do that based on which IP versions
140  * are enabled in the kernel.  This avoids littering the rest of the code with
141  * #ifnet INET6 to avoid referencing V_inet6_pfil_head.
142  */
143 #ifdef INET6
144 #define		PFIL_HOOKED_IN_INET6	PFIL_HOOKED_IN(V_inet6_pfil_head)
145 #define		PFIL_HOOKED_OUT_INET6	PFIL_HOOKED_OUT(V_inet6_pfil_head)
146 #else
147 #define		PFIL_HOOKED_IN_INET6	false
148 #define		PFIL_HOOKED_OUT_INET6	false
149 #endif
150 
151 #ifdef INET
152 #define		PFIL_HOOKED_IN_INET	PFIL_HOOKED_IN(V_inet_pfil_head)
153 #define		PFIL_HOOKED_OUT_INET	PFIL_HOOKED_OUT(V_inet_pfil_head)
154 #else
155 #define		PFIL_HOOKED_IN_INET	false
156 #define		PFIL_HOOKED_OUT_INET	false
157 #endif
158 
159 #define		PFIL_HOOKED_IN_46	(PFIL_HOOKED_IN_INET6 || PFIL_HOOKED_IN_INET)
160 #define		PFIL_HOOKED_OUT_46	(PFIL_HOOKED_OUT_INET6 || PFIL_HOOKED_OUT_INET)
161 
162 /*
163  * Size of the route hash table.  Must be a power of two.
164  */
165 #ifndef BRIDGE_RTHASH_SIZE
166 #define	BRIDGE_RTHASH_SIZE		1024
167 #endif
168 
169 #define	BRIDGE_RTHASH_MASK		(BRIDGE_RTHASH_SIZE - 1)
170 
171 /*
172  * Default maximum number of addresses to cache.
173  */
174 #ifndef BRIDGE_RTABLE_MAX
175 #define	BRIDGE_RTABLE_MAX		2000
176 #endif
177 
178 /*
179  * Timeout (in seconds) for entries learned dynamically.
180  */
181 #ifndef BRIDGE_RTABLE_TIMEOUT
182 #define	BRIDGE_RTABLE_TIMEOUT		(20 * 60)	/* same as ARP */
183 #endif
184 
185 /*
186  * Number of seconds between walks of the route list.
187  */
188 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
189 #define	BRIDGE_RTABLE_PRUNE_PERIOD	(5 * 60)
190 #endif
191 
192 /*
193  * List of capabilities to possibly mask on the member interface.
194  */
195 #define	BRIDGE_IFCAPS_MASK		(IFCAP_TOE|IFCAP_TSO|IFCAP_TXCSUM|\
196 					 IFCAP_TXCSUM_IPV6|IFCAP_MEXTPG)
197 
198 /*
199  * List of capabilities to strip
200  */
201 #define	BRIDGE_IFCAPS_STRIP		IFCAP_LRO
202 
203 /*
204  * Bridge locking
205  *
206  * The bridge relies heavily on the epoch(9) system to protect its data
207  * structures. This means we can safely use CK_LISTs while in NET_EPOCH, but we
208  * must ensure there is only one writer at a time.
209  *
210  * That is: for read accesses we only need to be in NET_EPOCH, but for write
211  * accesses we must hold:
212  *
213  *  - BRIDGE_RT_LOCK, for any change to bridge_rtnodes
214  *  - BRIDGE_LOCK, for any other change
215  *
216  * The BRIDGE_LOCK is a sleepable lock, because it is held across ioctl()
217  * calls to bridge member interfaces and these ioctl()s can sleep.
218  * The BRIDGE_RT_LOCK is a non-sleepable mutex, because it is sometimes
219  * required while we're in NET_EPOCH and then we're not allowed to sleep.
220  */
221 #define BRIDGE_LOCK_INIT(_sc)		do {			\
222 	sx_init(&(_sc)->sc_sx, "if_bridge");			\
223 	mtx_init(&(_sc)->sc_rt_mtx, "if_bridge rt", NULL, MTX_DEF);	\
224 } while (0)
225 #define BRIDGE_LOCK_DESTROY(_sc)	do {	\
226 	sx_destroy(&(_sc)->sc_sx);		\
227 	mtx_destroy(&(_sc)->sc_rt_mtx);		\
228 } while (0)
229 #define BRIDGE_LOCK(_sc)		sx_xlock(&(_sc)->sc_sx)
230 #define BRIDGE_UNLOCK(_sc)		sx_xunlock(&(_sc)->sc_sx)
231 #define BRIDGE_LOCK_ASSERT(_sc)		sx_assert(&(_sc)->sc_sx, SX_XLOCKED)
232 #define BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(_sc)	\
233 	    MPASS(in_epoch(net_epoch_preempt) || sx_xlocked(&(_sc)->sc_sx))
234 #define BRIDGE_UNLOCK_ASSERT(_sc)	sx_assert(&(_sc)->sc_sx, SX_UNLOCKED)
235 #define BRIDGE_RT_LOCK(_sc)		mtx_lock(&(_sc)->sc_rt_mtx)
236 #define BRIDGE_RT_UNLOCK(_sc)		mtx_unlock(&(_sc)->sc_rt_mtx)
237 #define BRIDGE_RT_LOCK_ASSERT(_sc)	mtx_assert(&(_sc)->sc_rt_mtx, MA_OWNED)
238 #define BRIDGE_RT_LOCK_OR_NET_EPOCH_ASSERT(_sc)	\
239 	    MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(_sc)->sc_rt_mtx))
240 
241 struct bridge_softc;
242 
243 /*
244  * Bridge interface list entry.
245  */
246 struct bridge_iflist {
247 	CK_LIST_ENTRY(bridge_iflist) bif_next;
248 	struct ifnet		*bif_ifp;	/* member if */
249 	struct bridge_softc	*bif_sc;	/* parent bridge */
250 	struct bstp_port	bif_stp;	/* STP state */
251 	uint32_t		bif_flags;	/* member if flags */
252 	int			bif_savedcaps;	/* saved capabilities */
253 	uint32_t		bif_addrmax;	/* max # of addresses */
254 	uint32_t		bif_addrcnt;	/* cur. # of addresses */
255 	uint32_t		bif_addrexceeded;/* # of address violations */
256 	struct epoch_context	bif_epoch_ctx;
257 };
258 
259 /*
260  * Bridge route node.
261  */
262 struct bridge_rtnode {
263 	CK_LIST_ENTRY(bridge_rtnode) brt_hash;	/* hash table linkage */
264 	CK_LIST_ENTRY(bridge_rtnode) brt_list;	/* list linkage */
265 	struct bridge_iflist	*brt_dst;	/* destination if */
266 	unsigned long		brt_expire;	/* expiration time */
267 	uint8_t			brt_flags;	/* address flags */
268 	uint8_t			brt_addr[ETHER_ADDR_LEN];
269 	ether_vlanid_t		brt_vlan;	/* vlan id */
270 	struct	vnet		*brt_vnet;
271 	struct	epoch_context	brt_epoch_ctx;
272 };
273 #define	brt_ifp			brt_dst->bif_ifp
274 
275 /*
276  * Software state for each bridge.
277  */
278 struct bridge_softc {
279 	struct ifnet		*sc_ifp;	/* make this an interface */
280 	LIST_ENTRY(bridge_softc) sc_list;
281 	struct sx		sc_sx;
282 	struct mtx		sc_rt_mtx;
283 	uint32_t		sc_brtmax;	/* max # of addresses */
284 	uint32_t		sc_brtcnt;	/* cur. # of addresses */
285 	uint32_t		sc_brttimeout;	/* rt timeout in seconds */
286 	struct callout		sc_brcallout;	/* bridge callout */
287 	CK_LIST_HEAD(, bridge_iflist) sc_iflist;	/* member interface list */
288 	CK_LIST_HEAD(, bridge_rtnode) *sc_rthash;	/* our forwarding table */
289 	CK_LIST_HEAD(, bridge_rtnode) sc_rtlist;	/* list version of above */
290 	uint32_t		sc_rthash_key;	/* key for hash */
291 	CK_LIST_HEAD(, bridge_iflist) sc_spanlist;	/* span ports list */
292 	struct bstp_state	sc_stp;		/* STP state */
293 	uint32_t		sc_brtexceeded;	/* # of cache drops */
294 	struct ifnet		*sc_ifaddr;	/* member mac copied from */
295 	struct ether_addr	sc_defaddr;	/* Default MAC address */
296 	if_input_fn_t		sc_if_input;	/* Saved copy of if_input */
297 	struct epoch_context	sc_epoch_ctx;
298 };
299 
300 VNET_DEFINE_STATIC(struct sx, bridge_list_sx);
301 #define	V_bridge_list_sx	VNET(bridge_list_sx)
302 static eventhandler_tag bridge_detach_cookie;
303 
304 int	bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
305 
306 VNET_DEFINE_STATIC(uma_zone_t, bridge_rtnode_zone);
307 #define	V_bridge_rtnode_zone	VNET(bridge_rtnode_zone)
308 
309 static int	bridge_clone_create(struct if_clone *, char *, size_t,
310 		    struct ifc_data *, struct ifnet **);
311 static int	bridge_clone_destroy(struct if_clone *, struct ifnet *, uint32_t);
312 
313 static int	bridge_ioctl(struct ifnet *, u_long, caddr_t);
314 static void	bridge_mutecaps(struct bridge_softc *);
315 static void	bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *,
316 		    int);
317 static void	bridge_ifdetach(void *arg __unused, struct ifnet *);
318 static void	bridge_init(void *);
319 static void	bridge_dummynet(struct mbuf *, struct ifnet *);
320 static bool	bridge_same(const void *, const void *);
321 static void	*bridge_get_softc(struct ifnet *);
322 static void	bridge_stop(struct ifnet *, int);
323 static int	bridge_transmit(struct ifnet *, struct mbuf *);
324 #ifdef ALTQ
325 static void	bridge_altq_start(if_t);
326 static int	bridge_altq_transmit(if_t, struct mbuf *);
327 #endif
328 static void	bridge_qflush(struct ifnet *);
329 static struct mbuf *bridge_input(struct ifnet *, struct mbuf *);
330 static void	bridge_inject(struct ifnet *, struct mbuf *);
331 static int	bridge_output(struct ifnet *, struct mbuf *, struct sockaddr *,
332 		    struct rtentry *);
333 static int	bridge_enqueue(struct bridge_softc *, struct ifnet *,
334 		    struct mbuf *);
335 static void	bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
336 
337 static void	bridge_forward(struct bridge_softc *, struct bridge_iflist *,
338 		    struct mbuf *m);
339 static bool	bridge_member_ifaddrs(void);
340 
341 static void	bridge_timer(void *);
342 
343 static void	bridge_broadcast(struct bridge_softc *, struct ifnet *,
344 		    struct mbuf *, int);
345 static void	bridge_span(struct bridge_softc *, struct mbuf *);
346 
347 static int	bridge_rtupdate(struct bridge_softc *, const uint8_t *,
348 		    ether_vlanid_t, struct bridge_iflist *, int, uint8_t);
349 static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *,
350 		    ether_vlanid_t);
351 static void	bridge_rttrim(struct bridge_softc *);
352 static void	bridge_rtage(struct bridge_softc *);
353 static void	bridge_rtflush(struct bridge_softc *, int);
354 static int	bridge_rtdaddr(struct bridge_softc *, const uint8_t *,
355 		    ether_vlanid_t);
356 
357 static void	bridge_rtable_init(struct bridge_softc *);
358 static void	bridge_rtable_fini(struct bridge_softc *);
359 
360 static int	bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
361 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
362 		    const uint8_t *, ether_vlanid_t);
363 static int	bridge_rtnode_insert(struct bridge_softc *,
364 		    struct bridge_rtnode *);
365 static void	bridge_rtnode_destroy(struct bridge_softc *,
366 		    struct bridge_rtnode *);
367 static void	bridge_rtable_expire(struct ifnet *, int);
368 static void	bridge_state_change(struct ifnet *, int);
369 
370 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
371 		    const char *name);
372 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
373 		    struct ifnet *ifp);
374 static void	bridge_delete_member(struct bridge_softc *,
375 		    struct bridge_iflist *, int);
376 static void	bridge_delete_span(struct bridge_softc *,
377 		    struct bridge_iflist *);
378 
379 static int	bridge_ioctl_add(struct bridge_softc *, void *);
380 static int	bridge_ioctl_del(struct bridge_softc *, void *);
381 static int	bridge_ioctl_gifflags(struct bridge_softc *, void *);
382 static int	bridge_ioctl_sifflags(struct bridge_softc *, void *);
383 static int	bridge_ioctl_scache(struct bridge_softc *, void *);
384 static int	bridge_ioctl_gcache(struct bridge_softc *, void *);
385 static int	bridge_ioctl_gifs(struct bridge_softc *, void *);
386 static int	bridge_ioctl_rts(struct bridge_softc *, void *);
387 static int	bridge_ioctl_saddr(struct bridge_softc *, void *);
388 static int	bridge_ioctl_sto(struct bridge_softc *, void *);
389 static int	bridge_ioctl_gto(struct bridge_softc *, void *);
390 static int	bridge_ioctl_daddr(struct bridge_softc *, void *);
391 static int	bridge_ioctl_flush(struct bridge_softc *, void *);
392 static int	bridge_ioctl_gpri(struct bridge_softc *, void *);
393 static int	bridge_ioctl_spri(struct bridge_softc *, void *);
394 static int	bridge_ioctl_ght(struct bridge_softc *, void *);
395 static int	bridge_ioctl_sht(struct bridge_softc *, void *);
396 static int	bridge_ioctl_gfd(struct bridge_softc *, void *);
397 static int	bridge_ioctl_sfd(struct bridge_softc *, void *);
398 static int	bridge_ioctl_gma(struct bridge_softc *, void *);
399 static int	bridge_ioctl_sma(struct bridge_softc *, void *);
400 static int	bridge_ioctl_sifprio(struct bridge_softc *, void *);
401 static int	bridge_ioctl_sifcost(struct bridge_softc *, void *);
402 static int	bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *);
403 static int	bridge_ioctl_addspan(struct bridge_softc *, void *);
404 static int	bridge_ioctl_delspan(struct bridge_softc *, void *);
405 static int	bridge_ioctl_gbparam(struct bridge_softc *, void *);
406 static int	bridge_ioctl_grte(struct bridge_softc *, void *);
407 static int	bridge_ioctl_gifsstp(struct bridge_softc *, void *);
408 static int	bridge_ioctl_sproto(struct bridge_softc *, void *);
409 static int	bridge_ioctl_stxhc(struct bridge_softc *, void *);
410 static int	bridge_pfil(struct mbuf **, struct ifnet *, struct ifnet *,
411 		    int);
412 #ifdef INET
413 static int	bridge_ip_checkbasic(struct mbuf **mp);
414 static int	bridge_fragment(struct ifnet *, struct mbuf **mp,
415 		    struct ether_header *, int, struct llc *);
416 #endif /* INET */
417 #ifdef INET6
418 static int	bridge_ip6_checkbasic(struct mbuf **mp);
419 #endif /* INET6 */
420 static void	bridge_linkstate(struct ifnet *ifp);
421 static void	bridge_linkcheck(struct bridge_softc *sc);
422 
423 /*
424  * Use the "null" value from IEEE 802.1Q-2014 Table 9-2
425  * to indicate untagged frames.
426  */
427 #define	VLANTAGOF(_m)	\
428     ((_m->m_flags & M_VLANTAG) ? EVL_VLANOFTAG(_m->m_pkthdr.ether_vtag) : DOT1Q_VID_NULL)
429 
430 static struct bstp_cb_ops bridge_ops = {
431 	.bcb_state = bridge_state_change,
432 	.bcb_rtage = bridge_rtable_expire
433 };
434 
435 SYSCTL_DECL(_net_link);
436 static SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
437     "Bridge");
438 
439 /* only pass IP[46] packets when pfil is enabled */
440 VNET_DEFINE_STATIC(int, pfil_onlyip) = 1;
441 #define	V_pfil_onlyip	VNET(pfil_onlyip)
442 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip,
443     CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_onlyip), 0,
444     "Only pass IP packets when pfil is enabled");
445 
446 /* run pfil hooks on the bridge interface */
447 VNET_DEFINE_STATIC(int, pfil_bridge) = 0;
448 #define	V_pfil_bridge	VNET(pfil_bridge)
449 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge,
450     CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_bridge), 0,
451     "Packet filter on the bridge interface");
452 
453 /* layer2 filter with ipfw */
454 VNET_DEFINE_STATIC(int, pfil_ipfw);
455 #define	V_pfil_ipfw	VNET(pfil_ipfw)
456 
457 /* layer2 ARP filter with ipfw */
458 VNET_DEFINE_STATIC(int, pfil_ipfw_arp);
459 #define	V_pfil_ipfw_arp	VNET(pfil_ipfw_arp)
460 SYSCTL_INT(_net_link_bridge, OID_AUTO, ipfw_arp,
461     CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_ipfw_arp), 0,
462     "Filter ARP packets through IPFW layer2");
463 
464 /* run pfil hooks on the member interface */
465 VNET_DEFINE_STATIC(int, pfil_member) = 0;
466 #define	V_pfil_member	VNET(pfil_member)
467 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member,
468     CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_member), 0,
469     "Packet filter on the member interface");
470 
471 /* run pfil hooks on the physical interface for locally destined packets */
472 VNET_DEFINE_STATIC(int, pfil_local_phys);
473 #define	V_pfil_local_phys	VNET(pfil_local_phys)
474 SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_local_phys,
475     CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_local_phys), 0,
476     "Packet filter on the physical interface for locally destined packets");
477 
478 /* log STP state changes */
479 VNET_DEFINE_STATIC(int, log_stp);
480 #define	V_log_stp	VNET(log_stp)
481 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp,
482     CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(log_stp), 0,
483     "Log STP state changes");
484 
485 /* share MAC with first bridge member */
486 VNET_DEFINE_STATIC(int, bridge_inherit_mac);
487 #define	V_bridge_inherit_mac	VNET(bridge_inherit_mac)
488 SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac,
489     CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(bridge_inherit_mac), 0,
490     "Inherit MAC address from the first bridge member");
491 
492 VNET_DEFINE_STATIC(int, allow_llz_overlap) = 0;
493 #define	V_allow_llz_overlap	VNET(allow_llz_overlap)
494 SYSCTL_INT(_net_link_bridge, OID_AUTO, allow_llz_overlap,
495     CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(allow_llz_overlap), 0,
496     "Allow overlap of link-local scope "
497     "zones of a bridge interface and the member interfaces");
498 
499 /* log MAC address port flapping */
500 VNET_DEFINE_STATIC(bool, log_mac_flap) = true;
501 #define	V_log_mac_flap	VNET(log_mac_flap)
502 SYSCTL_BOOL(_net_link_bridge, OID_AUTO, log_mac_flap,
503     CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(log_mac_flap), true,
504     "Log MAC address port flapping");
505 
506 /* allow IP addresses on bridge members */
507 VNET_DEFINE_STATIC(bool, member_ifaddrs) = false;
508 #define	V_member_ifaddrs	VNET(member_ifaddrs)
509 SYSCTL_BOOL(_net_link_bridge, OID_AUTO, member_ifaddrs,
510     CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(member_ifaddrs), false,
511     "Allow layer 3 addresses on bridge members");
512 
513 static bool
514 bridge_member_ifaddrs(void)
515 {
516 	return (V_member_ifaddrs);
517 }
518 
519 VNET_DEFINE_STATIC(int, log_interval) = 5;
520 VNET_DEFINE_STATIC(int, log_count) = 0;
521 VNET_DEFINE_STATIC(struct timeval, log_last) = { 0 };
522 
523 #define	V_log_interval	VNET(log_interval)
524 #define	V_log_count	VNET(log_count)
525 #define	V_log_last	VNET(log_last)
526 
527 struct bridge_control {
528 	int	(*bc_func)(struct bridge_softc *, void *);
529 	int	bc_argsize;
530 	int	bc_flags;
531 };
532 
533 #define	BC_F_COPYIN		0x01	/* copy arguments in */
534 #define	BC_F_COPYOUT		0x02	/* copy arguments out */
535 #define	BC_F_SUSER		0x04	/* do super-user check */
536 
537 static const struct bridge_control bridge_control_table[] = {
538 	{ bridge_ioctl_add,		sizeof(struct ifbreq),
539 	  BC_F_COPYIN|BC_F_SUSER },
540 	{ bridge_ioctl_del,		sizeof(struct ifbreq),
541 	  BC_F_COPYIN|BC_F_SUSER },
542 
543 	{ bridge_ioctl_gifflags,	sizeof(struct ifbreq),
544 	  BC_F_COPYIN|BC_F_COPYOUT },
545 	{ bridge_ioctl_sifflags,	sizeof(struct ifbreq),
546 	  BC_F_COPYIN|BC_F_SUSER },
547 
548 	{ bridge_ioctl_scache,		sizeof(struct ifbrparam),
549 	  BC_F_COPYIN|BC_F_SUSER },
550 	{ bridge_ioctl_gcache,		sizeof(struct ifbrparam),
551 	  BC_F_COPYOUT },
552 
553 	{ bridge_ioctl_gifs,		sizeof(struct ifbifconf),
554 	  BC_F_COPYIN|BC_F_COPYOUT },
555 	{ bridge_ioctl_rts,		sizeof(struct ifbaconf),
556 	  BC_F_COPYIN|BC_F_COPYOUT },
557 
558 	{ bridge_ioctl_saddr,		sizeof(struct ifbareq),
559 	  BC_F_COPYIN|BC_F_SUSER },
560 
561 	{ bridge_ioctl_sto,		sizeof(struct ifbrparam),
562 	  BC_F_COPYIN|BC_F_SUSER },
563 	{ bridge_ioctl_gto,		sizeof(struct ifbrparam),
564 	  BC_F_COPYOUT },
565 
566 	{ bridge_ioctl_daddr,		sizeof(struct ifbareq),
567 	  BC_F_COPYIN|BC_F_SUSER },
568 
569 	{ bridge_ioctl_flush,		sizeof(struct ifbreq),
570 	  BC_F_COPYIN|BC_F_SUSER },
571 
572 	{ bridge_ioctl_gpri,		sizeof(struct ifbrparam),
573 	  BC_F_COPYOUT },
574 	{ bridge_ioctl_spri,		sizeof(struct ifbrparam),
575 	  BC_F_COPYIN|BC_F_SUSER },
576 
577 	{ bridge_ioctl_ght,		sizeof(struct ifbrparam),
578 	  BC_F_COPYOUT },
579 	{ bridge_ioctl_sht,		sizeof(struct ifbrparam),
580 	  BC_F_COPYIN|BC_F_SUSER },
581 
582 	{ bridge_ioctl_gfd,		sizeof(struct ifbrparam),
583 	  BC_F_COPYOUT },
584 	{ bridge_ioctl_sfd,		sizeof(struct ifbrparam),
585 	  BC_F_COPYIN|BC_F_SUSER },
586 
587 	{ bridge_ioctl_gma,		sizeof(struct ifbrparam),
588 	  BC_F_COPYOUT },
589 	{ bridge_ioctl_sma,		sizeof(struct ifbrparam),
590 	  BC_F_COPYIN|BC_F_SUSER },
591 
592 	{ bridge_ioctl_sifprio,		sizeof(struct ifbreq),
593 	  BC_F_COPYIN|BC_F_SUSER },
594 
595 	{ bridge_ioctl_sifcost,		sizeof(struct ifbreq),
596 	  BC_F_COPYIN|BC_F_SUSER },
597 
598 	{ bridge_ioctl_addspan,		sizeof(struct ifbreq),
599 	  BC_F_COPYIN|BC_F_SUSER },
600 	{ bridge_ioctl_delspan,		sizeof(struct ifbreq),
601 	  BC_F_COPYIN|BC_F_SUSER },
602 
603 	{ bridge_ioctl_gbparam,		sizeof(struct ifbropreq),
604 	  BC_F_COPYOUT },
605 
606 	{ bridge_ioctl_grte,		sizeof(struct ifbrparam),
607 	  BC_F_COPYOUT },
608 
609 	{ bridge_ioctl_gifsstp,		sizeof(struct ifbpstpconf),
610 	  BC_F_COPYIN|BC_F_COPYOUT },
611 
612 	{ bridge_ioctl_sproto,		sizeof(struct ifbrparam),
613 	  BC_F_COPYIN|BC_F_SUSER },
614 
615 	{ bridge_ioctl_stxhc,		sizeof(struct ifbrparam),
616 	  BC_F_COPYIN|BC_F_SUSER },
617 
618 	{ bridge_ioctl_sifmaxaddr,	sizeof(struct ifbreq),
619 	  BC_F_COPYIN|BC_F_SUSER },
620 
621 };
622 static const int bridge_control_table_size = nitems(bridge_control_table);
623 
624 VNET_DEFINE_STATIC(LIST_HEAD(, bridge_softc), bridge_list) =
625     LIST_HEAD_INITIALIZER();
626 #define	V_bridge_list	VNET(bridge_list)
627 #define	BRIDGE_LIST_LOCK_INIT(x)	sx_init(&V_bridge_list_sx,	\
628 					    "if_bridge list")
629 #define	BRIDGE_LIST_LOCK_DESTROY(x)	sx_destroy(&V_bridge_list_sx)
630 #define	BRIDGE_LIST_LOCK(x)		sx_xlock(&V_bridge_list_sx)
631 #define	BRIDGE_LIST_UNLOCK(x)		sx_xunlock(&V_bridge_list_sx)
632 
633 VNET_DEFINE_STATIC(struct if_clone *, bridge_cloner);
634 #define	V_bridge_cloner	VNET(bridge_cloner)
635 
636 static const char bridge_name[] = "bridge";
637 
638 static void
639 vnet_bridge_init(const void *unused __unused)
640 {
641 
642 	V_bridge_rtnode_zone = uma_zcreate("bridge_rtnode",
643 	    sizeof(struct bridge_rtnode), NULL, NULL, NULL, NULL,
644 	    UMA_ALIGN_PTR, 0);
645 	BRIDGE_LIST_LOCK_INIT();
646 
647 	struct if_clone_addreq req = {
648 		.create_f = bridge_clone_create,
649 		.destroy_f = bridge_clone_destroy,
650 		.flags = IFC_F_AUTOUNIT,
651 	};
652 	V_bridge_cloner = ifc_attach_cloner(bridge_name, &req);
653 }
654 VNET_SYSINIT(vnet_bridge_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
655     vnet_bridge_init, NULL);
656 
657 static void
658 vnet_bridge_uninit(const void *unused __unused)
659 {
660 
661 	ifc_detach_cloner(V_bridge_cloner);
662 	V_bridge_cloner = NULL;
663 	BRIDGE_LIST_LOCK_DESTROY();
664 
665 	/* Callbacks may use the UMA zone. */
666 	NET_EPOCH_DRAIN_CALLBACKS();
667 
668 	uma_zdestroy(V_bridge_rtnode_zone);
669 }
670 VNET_SYSUNINIT(vnet_bridge_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY,
671     vnet_bridge_uninit, NULL);
672 
673 static int
674 bridge_modevent(module_t mod, int type, void *data)
675 {
676 
677 	switch (type) {
678 	case MOD_LOAD:
679 		bridge_dn_p = bridge_dummynet;
680 		bridge_same_p = bridge_same;
681 		bridge_get_softc_p = bridge_get_softc;
682 		bridge_member_ifaddrs_p = bridge_member_ifaddrs;
683 		bridge_detach_cookie = EVENTHANDLER_REGISTER(
684 		    ifnet_departure_event, bridge_ifdetach, NULL,
685 		    EVENTHANDLER_PRI_ANY);
686 		break;
687 	case MOD_UNLOAD:
688 		EVENTHANDLER_DEREGISTER(ifnet_departure_event,
689 		    bridge_detach_cookie);
690 		bridge_dn_p = NULL;
691 		bridge_same_p = NULL;
692 		bridge_get_softc_p = NULL;
693 		bridge_member_ifaddrs_p = NULL;
694 		break;
695 	default:
696 		return (EOPNOTSUPP);
697 	}
698 	return (0);
699 }
700 
701 static moduledata_t bridge_mod = {
702 	"if_bridge",
703 	bridge_modevent,
704 	0
705 };
706 
707 DECLARE_MODULE(if_bridge, bridge_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
708 MODULE_VERSION(if_bridge, 1);
709 MODULE_DEPEND(if_bridge, bridgestp, 1, 1, 1);
710 
711 /*
712  * handler for net.link.bridge.ipfw
713  */
714 static int
715 sysctl_pfil_ipfw(SYSCTL_HANDLER_ARGS)
716 {
717 	int enable = V_pfil_ipfw;
718 	int error;
719 
720 	error = sysctl_handle_int(oidp, &enable, 0, req);
721 	enable &= 1;
722 
723 	if (enable != V_pfil_ipfw) {
724 		V_pfil_ipfw = enable;
725 
726 		/*
727 		 * Disable pfil so that ipfw doesnt run twice, if the user
728 		 * really wants both then they can re-enable pfil_bridge and/or
729 		 * pfil_member. Also allow non-ip packets as ipfw can filter by
730 		 * layer2 type.
731 		 */
732 		if (V_pfil_ipfw) {
733 			V_pfil_onlyip = 0;
734 			V_pfil_bridge = 0;
735 			V_pfil_member = 0;
736 		}
737 	}
738 
739 	return (error);
740 }
741 SYSCTL_PROC(_net_link_bridge, OID_AUTO, ipfw,
742     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_VNET | CTLFLAG_NEEDGIANT,
743     &VNET_NAME(pfil_ipfw), 0, &sysctl_pfil_ipfw, "I",
744     "Layer2 filter with IPFW");
745 
746 #ifdef VIMAGE
747 static void
748 bridge_reassign(struct ifnet *ifp, struct vnet *newvnet, char *arg)
749 {
750 	struct bridge_softc *sc = ifp->if_softc;
751 	struct bridge_iflist *bif;
752 
753 	BRIDGE_LOCK(sc);
754 
755 	while ((bif = CK_LIST_FIRST(&sc->sc_iflist)) != NULL)
756 		bridge_delete_member(sc, bif, 0);
757 
758 	while ((bif = CK_LIST_FIRST(&sc->sc_spanlist)) != NULL) {
759 		bridge_delete_span(sc, bif);
760 	}
761 
762 	BRIDGE_UNLOCK(sc);
763 
764 	ether_reassign(ifp, newvnet, arg);
765 }
766 #endif
767 
768 /*
769  * bridge_get_softc:
770  *
771  * Return the bridge softc for an ifnet.
772  */
773 static void *
774 bridge_get_softc(struct ifnet *ifp)
775 {
776 	struct bridge_iflist *bif;
777 
778 	NET_EPOCH_ASSERT();
779 
780 	bif = ifp->if_bridge;
781 	if (bif == NULL)
782 		return (NULL);
783 	return (bif->bif_sc);
784 }
785 
786 /*
787  * bridge_same:
788  *
789  * Return true if two interfaces are in the same bridge.  This is only used by
790  * bridgestp via bridge_same_p.
791  */
792 static bool
793 bridge_same(const void *bifap, const void *bifbp)
794 {
795 	const struct bridge_iflist *bifa = bifap, *bifb = bifbp;
796 
797 	NET_EPOCH_ASSERT();
798 
799 	if (bifa == NULL || bifb == NULL)
800 		return (false);
801 
802 	return (bifa->bif_sc == bifb->bif_sc);
803 }
804 
805 /*
806  * bridge_clone_create:
807  *
808  *	Create a new bridge instance.
809  */
810 static int
811 bridge_clone_create(struct if_clone *ifc, char *name, size_t len,
812     struct ifc_data *ifd, struct ifnet **ifpp)
813 {
814 	struct bridge_softc *sc;
815 	struct ifnet *ifp;
816 
817 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
818 	ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
819 
820 	BRIDGE_LOCK_INIT(sc);
821 	sc->sc_brtmax = BRIDGE_RTABLE_MAX;
822 	sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
823 
824 	/* Initialize our routing table. */
825 	bridge_rtable_init(sc);
826 
827 	callout_init_mtx(&sc->sc_brcallout, &sc->sc_rt_mtx, 0);
828 
829 	CK_LIST_INIT(&sc->sc_iflist);
830 	CK_LIST_INIT(&sc->sc_spanlist);
831 
832 	ifp->if_softc = sc;
833 	if_initname(ifp, bridge_name, ifd->unit);
834 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
835 	ifp->if_ioctl = bridge_ioctl;
836 #ifdef ALTQ
837 	ifp->if_start = bridge_altq_start;
838 	ifp->if_transmit = bridge_altq_transmit;
839 	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
840 	ifp->if_snd.ifq_drv_maxlen = 0;
841 	IFQ_SET_READY(&ifp->if_snd);
842 #else
843 	ifp->if_transmit = bridge_transmit;
844 #endif
845 	ifp->if_qflush = bridge_qflush;
846 	ifp->if_init = bridge_init;
847 	ifp->if_type = IFT_BRIDGE;
848 
849 	ether_gen_addr(ifp, &sc->sc_defaddr);
850 
851 	bstp_attach(&sc->sc_stp, &bridge_ops);
852 	ether_ifattach(ifp, sc->sc_defaddr.octet);
853 	/* Now undo some of the damage... */
854 	ifp->if_baudrate = 0;
855 	ifp->if_type = IFT_BRIDGE;
856 #ifdef VIMAGE
857 	ifp->if_reassign = bridge_reassign;
858 #endif
859 	sc->sc_if_input = ifp->if_input;	/* ether_input */
860 	ifp->if_input = bridge_inject;
861 
862 	/*
863 	 * Allow BRIDGE_INPUT() to pass in packets originating from the bridge
864 	 * itself via bridge_inject().  This is required for netmap but
865 	 * otherwise has no effect.
866 	 */
867 	ifp->if_bridge_input = bridge_input;
868 
869 	BRIDGE_LIST_LOCK();
870 	LIST_INSERT_HEAD(&V_bridge_list, sc, sc_list);
871 	BRIDGE_LIST_UNLOCK();
872 	*ifpp = ifp;
873 
874 	return (0);
875 }
876 
877 static void
878 bridge_clone_destroy_cb(struct epoch_context *ctx)
879 {
880 	struct bridge_softc *sc;
881 
882 	sc = __containerof(ctx, struct bridge_softc, sc_epoch_ctx);
883 
884 	BRIDGE_LOCK_DESTROY(sc);
885 	free(sc, M_DEVBUF);
886 }
887 
888 /*
889  * bridge_clone_destroy:
890  *
891  *	Destroy a bridge instance.
892  */
893 static int
894 bridge_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags)
895 {
896 	struct bridge_softc *sc = ifp->if_softc;
897 	struct bridge_iflist *bif;
898 	struct epoch_tracker et;
899 
900 	BRIDGE_LOCK(sc);
901 
902 	bridge_stop(ifp, 1);
903 	ifp->if_flags &= ~IFF_UP;
904 
905 	while ((bif = CK_LIST_FIRST(&sc->sc_iflist)) != NULL)
906 		bridge_delete_member(sc, bif, 0);
907 
908 	while ((bif = CK_LIST_FIRST(&sc->sc_spanlist)) != NULL) {
909 		bridge_delete_span(sc, bif);
910 	}
911 
912 	/* Tear down the routing table. */
913 	bridge_rtable_fini(sc);
914 
915 	BRIDGE_UNLOCK(sc);
916 
917 	NET_EPOCH_ENTER(et);
918 
919 	callout_drain(&sc->sc_brcallout);
920 
921 	BRIDGE_LIST_LOCK();
922 	LIST_REMOVE(sc, sc_list);
923 	BRIDGE_LIST_UNLOCK();
924 
925 	bstp_detach(&sc->sc_stp);
926 #ifdef ALTQ
927 	IFQ_PURGE(&ifp->if_snd);
928 #endif
929 	NET_EPOCH_EXIT(et);
930 
931 	ether_ifdetach(ifp);
932 	if_free(ifp);
933 
934 	NET_EPOCH_CALL(bridge_clone_destroy_cb, &sc->sc_epoch_ctx);
935 
936 	return (0);
937 }
938 
939 /*
940  * bridge_ioctl:
941  *
942  *	Handle a control request from the operator.
943  */
944 static int
945 bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
946 {
947 	struct bridge_softc *sc = ifp->if_softc;
948 	struct ifreq *ifr = (struct ifreq *)data;
949 	struct bridge_iflist *bif;
950 	struct thread *td = curthread;
951 	union {
952 		struct ifbreq ifbreq;
953 		struct ifbifconf ifbifconf;
954 		struct ifbareq ifbareq;
955 		struct ifbaconf ifbaconf;
956 		struct ifbrparam ifbrparam;
957 		struct ifbropreq ifbropreq;
958 	} args;
959 	struct ifdrv *ifd = (struct ifdrv *) data;
960 	const struct bridge_control *bc;
961 	int error = 0, oldmtu;
962 
963 	BRIDGE_LOCK(sc);
964 
965 	switch (cmd) {
966 	case SIOCADDMULTI:
967 	case SIOCDELMULTI:
968 		break;
969 
970 	case SIOCGDRVSPEC:
971 	case SIOCSDRVSPEC:
972 		if (ifd->ifd_cmd >= bridge_control_table_size) {
973 			error = EINVAL;
974 			break;
975 		}
976 		bc = &bridge_control_table[ifd->ifd_cmd];
977 
978 		if (cmd == SIOCGDRVSPEC &&
979 		    (bc->bc_flags & BC_F_COPYOUT) == 0) {
980 			error = EINVAL;
981 			break;
982 		}
983 		else if (cmd == SIOCSDRVSPEC &&
984 		    (bc->bc_flags & BC_F_COPYOUT) != 0) {
985 			error = EINVAL;
986 			break;
987 		}
988 
989 		if (bc->bc_flags & BC_F_SUSER) {
990 			error = priv_check(td, PRIV_NET_BRIDGE);
991 			if (error)
992 				break;
993 		}
994 
995 		if (ifd->ifd_len != bc->bc_argsize ||
996 		    ifd->ifd_len > sizeof(args)) {
997 			error = EINVAL;
998 			break;
999 		}
1000 
1001 		bzero(&args, sizeof(args));
1002 		if (bc->bc_flags & BC_F_COPYIN) {
1003 			error = copyin(ifd->ifd_data, &args, ifd->ifd_len);
1004 			if (error)
1005 				break;
1006 		}
1007 
1008 		oldmtu = ifp->if_mtu;
1009 		error = (*bc->bc_func)(sc, &args);
1010 		if (error)
1011 			break;
1012 
1013 		/*
1014 		 * Bridge MTU may change during addition of the first port.
1015 		 * If it did, do network layer specific procedure.
1016 		 */
1017 		if (ifp->if_mtu != oldmtu)
1018 			if_notifymtu(ifp);
1019 
1020 		if (bc->bc_flags & BC_F_COPYOUT)
1021 			error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
1022 
1023 		break;
1024 
1025 	case SIOCSIFFLAGS:
1026 		if (!(ifp->if_flags & IFF_UP) &&
1027 		    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1028 			/*
1029 			 * If interface is marked down and it is running,
1030 			 * then stop and disable it.
1031 			 */
1032 			bridge_stop(ifp, 1);
1033 		} else if ((ifp->if_flags & IFF_UP) &&
1034 		    !(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1035 			/*
1036 			 * If interface is marked up and it is stopped, then
1037 			 * start it.
1038 			 */
1039 			BRIDGE_UNLOCK(sc);
1040 			(*ifp->if_init)(sc);
1041 			BRIDGE_LOCK(sc);
1042 		}
1043 		break;
1044 
1045 	case SIOCSIFMTU:
1046 		oldmtu = sc->sc_ifp->if_mtu;
1047 
1048 		if (ifr->ifr_mtu < IF_MINMTU) {
1049 			error = EINVAL;
1050 			break;
1051 		}
1052 		if (CK_LIST_EMPTY(&sc->sc_iflist)) {
1053 			sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1054 			break;
1055 		}
1056 		CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
1057 			error = (*bif->bif_ifp->if_ioctl)(bif->bif_ifp,
1058 			    SIOCSIFMTU, (caddr_t)ifr);
1059 			if (error != 0) {
1060 				log(LOG_NOTICE, "%s: invalid MTU: %u for"
1061 				    " member %s\n", sc->sc_ifp->if_xname,
1062 				    ifr->ifr_mtu,
1063 				    bif->bif_ifp->if_xname);
1064 				error = EINVAL;
1065 				break;
1066 			}
1067 		}
1068 		if (error) {
1069 			/* Restore the previous MTU on all member interfaces. */
1070 			ifr->ifr_mtu = oldmtu;
1071 			CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
1072 				(*bif->bif_ifp->if_ioctl)(bif->bif_ifp,
1073 				    SIOCSIFMTU, (caddr_t)ifr);
1074 			}
1075 		} else {
1076 			sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1077 		}
1078 		break;
1079 	default:
1080 		/*
1081 		 * drop the lock as ether_ioctl() will call bridge_start() and
1082 		 * cause the lock to be recursed.
1083 		 */
1084 		BRIDGE_UNLOCK(sc);
1085 		error = ether_ioctl(ifp, cmd, data);
1086 		BRIDGE_LOCK(sc);
1087 		break;
1088 	}
1089 
1090 	BRIDGE_UNLOCK(sc);
1091 
1092 	return (error);
1093 }
1094 
1095 /*
1096  * bridge_mutecaps:
1097  *
1098  *	Clear or restore unwanted capabilities on the member interface
1099  */
1100 static void
1101 bridge_mutecaps(struct bridge_softc *sc)
1102 {
1103 	struct bridge_iflist *bif;
1104 	int enabled, mask;
1105 
1106 	BRIDGE_LOCK_ASSERT(sc);
1107 
1108 	/* Initial bitmask of capabilities to test */
1109 	mask = BRIDGE_IFCAPS_MASK;
1110 
1111 	CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
1112 		/* Every member must support it or its disabled */
1113 		mask &= bif->bif_savedcaps;
1114 	}
1115 
1116 	CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
1117 		enabled = bif->bif_ifp->if_capenable;
1118 		enabled &= ~BRIDGE_IFCAPS_STRIP;
1119 		/* strip off mask bits and enable them again if allowed */
1120 		enabled &= ~BRIDGE_IFCAPS_MASK;
1121 		enabled |= mask;
1122 		bridge_set_ifcap(sc, bif, enabled);
1123 	}
1124 }
1125 
1126 static void
1127 bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
1128 {
1129 	struct ifnet *ifp = bif->bif_ifp;
1130 	struct ifreq ifr;
1131 	int error, mask, stuck;
1132 
1133 	bzero(&ifr, sizeof(ifr));
1134 	ifr.ifr_reqcap = set;
1135 
1136 	if (ifp->if_capenable != set) {
1137 		error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr);
1138 		if (error)
1139 			if_printf(sc->sc_ifp,
1140 			    "error setting capabilities on %s: %d\n",
1141 			    ifp->if_xname, error);
1142 		mask = BRIDGE_IFCAPS_MASK | BRIDGE_IFCAPS_STRIP;
1143 		stuck = ifp->if_capenable & mask & ~set;
1144 		if (stuck != 0)
1145 			if_printf(sc->sc_ifp,
1146 			    "can't disable some capabilities on %s: 0x%x\n",
1147 			    ifp->if_xname, stuck);
1148 	}
1149 }
1150 
1151 /*
1152  * bridge_lookup_member:
1153  *
1154  *	Lookup a bridge member interface.
1155  */
1156 static struct bridge_iflist *
1157 bridge_lookup_member(struct bridge_softc *sc, const char *name)
1158 {
1159 	struct bridge_iflist *bif;
1160 	struct ifnet *ifp;
1161 
1162 	BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(sc);
1163 
1164 	CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
1165 		ifp = bif->bif_ifp;
1166 		if (strcmp(ifp->if_xname, name) == 0)
1167 			return (bif);
1168 	}
1169 
1170 	return (NULL);
1171 }
1172 
1173 /*
1174  * bridge_lookup_member_if:
1175  *
1176  *	Lookup a bridge member interface by ifnet*.
1177  */
1178 static struct bridge_iflist *
1179 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
1180 {
1181 	BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(sc);
1182 	return (member_ifp->if_bridge);
1183 }
1184 
1185 static void
1186 bridge_delete_member_cb(struct epoch_context *ctx)
1187 {
1188 	struct bridge_iflist *bif;
1189 
1190 	bif = __containerof(ctx, struct bridge_iflist, bif_epoch_ctx);
1191 
1192 	free(bif, M_DEVBUF);
1193 }
1194 
1195 /*
1196  * bridge_delete_member:
1197  *
1198  *	Delete the specified member interface.
1199  */
1200 static void
1201 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
1202     int gone)
1203 {
1204 	struct ifnet *ifs = bif->bif_ifp;
1205 	struct ifnet *fif = NULL;
1206 	struct bridge_iflist *bifl;
1207 
1208 	BRIDGE_LOCK_ASSERT(sc);
1209 
1210 	if (bif->bif_flags & IFBIF_STP)
1211 		bstp_disable(&bif->bif_stp);
1212 
1213 	ifs->if_bridge = NULL;
1214 	CK_LIST_REMOVE(bif, bif_next);
1215 
1216 	/*
1217 	 * If removing the interface that gave the bridge its mac address, set
1218 	 * the mac address of the bridge to the address of the next member, or
1219 	 * to its default address if no members are left.
1220 	 */
1221 	if (V_bridge_inherit_mac && sc->sc_ifaddr == ifs) {
1222 		if (CK_LIST_EMPTY(&sc->sc_iflist)) {
1223 			bcopy(&sc->sc_defaddr,
1224 			    IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
1225 			sc->sc_ifaddr = NULL;
1226 		} else {
1227 			bifl = CK_LIST_FIRST(&sc->sc_iflist);
1228 			fif = bifl->bif_ifp;
1229 			bcopy(IF_LLADDR(fif),
1230 			    IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
1231 			sc->sc_ifaddr = fif;
1232 		}
1233 		EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
1234 	}
1235 
1236 	bridge_linkcheck(sc);
1237 	bridge_mutecaps(sc);	/* recalcuate now this interface is removed */
1238 	BRIDGE_RT_LOCK(sc);
1239 	bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
1240 	BRIDGE_RT_UNLOCK(sc);
1241 	KASSERT(bif->bif_addrcnt == 0,
1242 	    ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt));
1243 
1244 	ifs->if_bridge_output = NULL;
1245 	ifs->if_bridge_input = NULL;
1246 	ifs->if_bridge_linkstate = NULL;
1247 	if (!gone) {
1248 		switch (ifs->if_type) {
1249 		case IFT_ETHER:
1250 		case IFT_L2VLAN:
1251 			/*
1252 			 * Take the interface out of promiscuous mode, but only
1253 			 * if it was promiscuous in the first place. It might
1254 			 * not be if we're in the bridge_ioctl_add() error path.
1255 			 */
1256 			if (ifs->if_flags & IFF_PROMISC)
1257 				(void) ifpromisc(ifs, 0);
1258 			break;
1259 
1260 		case IFT_GIF:
1261 			break;
1262 
1263 		default:
1264 #ifdef DIAGNOSTIC
1265 			panic("bridge_delete_member: impossible");
1266 #endif
1267 			break;
1268 		}
1269 		/* reneable any interface capabilities */
1270 		bridge_set_ifcap(sc, bif, bif->bif_savedcaps);
1271 	}
1272 	bstp_destroy(&bif->bif_stp);	/* prepare to free */
1273 
1274 	NET_EPOCH_CALL(bridge_delete_member_cb, &bif->bif_epoch_ctx);
1275 }
1276 
1277 /*
1278  * bridge_delete_span:
1279  *
1280  *	Delete the specified span interface.
1281  */
1282 static void
1283 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
1284 {
1285 	BRIDGE_LOCK_ASSERT(sc);
1286 
1287 	KASSERT(bif->bif_ifp->if_bridge == NULL,
1288 	    ("%s: not a span interface", __func__));
1289 
1290 	CK_LIST_REMOVE(bif, bif_next);
1291 
1292 	NET_EPOCH_CALL(bridge_delete_member_cb, &bif->bif_epoch_ctx);
1293 }
1294 
1295 static int
1296 bridge_ioctl_add(struct bridge_softc *sc, void *arg)
1297 {
1298 	struct ifbreq *req = arg;
1299 	struct bridge_iflist *bif = NULL;
1300 	struct ifnet *ifs;
1301 	int error = 0;
1302 
1303 	ifs = ifunit(req->ifbr_ifsname);
1304 	if (ifs == NULL)
1305 		return (ENOENT);
1306 	if (ifs->if_ioctl == NULL)	/* must be supported */
1307 		return (EINVAL);
1308 
1309 	/* If it's in the span list, it can't be a member. */
1310 	CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
1311 		if (ifs == bif->bif_ifp)
1312 			return (EBUSY);
1313 
1314 	if (ifs->if_bridge) {
1315 		struct bridge_iflist *sbif = ifs->if_bridge;
1316 		if (sbif->bif_sc == sc)
1317 			return (EEXIST);
1318 
1319 		return (EBUSY);
1320 	}
1321 
1322 	switch (ifs->if_type) {
1323 	case IFT_ETHER:
1324 	case IFT_L2VLAN:
1325 	case IFT_GIF:
1326 		/* permitted interface types */
1327 		break;
1328 	default:
1329 		return (EINVAL);
1330 	}
1331 
1332 #ifdef INET6
1333 	/*
1334 	 * Two valid inet6 addresses with link-local scope must not be
1335 	 * on the parent interface and the member interfaces at the
1336 	 * same time.  This restriction is needed to prevent violation
1337 	 * of link-local scope zone.  Attempts to add a member
1338 	 * interface which has inet6 addresses when the parent has
1339 	 * inet6 triggers removal of all inet6 addresses on the member
1340 	 * interface.
1341 	 */
1342 
1343 	/* Check if the parent interface has a link-local scope addr. */
1344 	if (V_allow_llz_overlap == 0 &&
1345 	    in6ifa_llaonifp(sc->sc_ifp) != NULL) {
1346 		/*
1347 		 * If any, remove all inet6 addresses from the member
1348 		 * interfaces.
1349 		 */
1350 		CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
1351  			if (in6ifa_llaonifp(bif->bif_ifp)) {
1352 				in6_ifdetach(bif->bif_ifp);
1353 				if_printf(sc->sc_ifp,
1354 				    "IPv6 addresses on %s have been removed "
1355 				    "before adding it as a member to prevent "
1356 				    "IPv6 address scope violation.\n",
1357 				    bif->bif_ifp->if_xname);
1358 			}
1359 		}
1360 		if (in6ifa_llaonifp(ifs)) {
1361 			in6_ifdetach(ifs);
1362 			if_printf(sc->sc_ifp,
1363 			    "IPv6 addresses on %s have been removed "
1364 			    "before adding it as a member to prevent "
1365 			    "IPv6 address scope violation.\n",
1366 			    ifs->if_xname);
1367 		}
1368 	}
1369 #endif
1370 
1371 	/*
1372 	 * If member_ifaddrs is disabled, do not allow an interface with
1373 	 * assigned IP addresses to be added to a bridge.
1374 	 */
1375 	if (!V_member_ifaddrs) {
1376 		struct ifaddr *ifa;
1377 
1378 		CK_STAILQ_FOREACH(ifa, &ifs->if_addrhead, ifa_link) {
1379 #ifdef INET
1380 			if (ifa->ifa_addr->sa_family == AF_INET)
1381 				return (EINVAL);
1382 #endif
1383 #ifdef INET6
1384 			if (ifa->ifa_addr->sa_family == AF_INET6)
1385 				return (EINVAL);
1386 #endif
1387 		}
1388 	}
1389 
1390 	/* Allow the first Ethernet member to define the MTU */
1391 	if (CK_LIST_EMPTY(&sc->sc_iflist))
1392 		sc->sc_ifp->if_mtu = ifs->if_mtu;
1393 	else if (sc->sc_ifp->if_mtu != ifs->if_mtu) {
1394 		struct ifreq ifr;
1395 
1396 		snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s",
1397 		    ifs->if_xname);
1398 		ifr.ifr_mtu = sc->sc_ifp->if_mtu;
1399 
1400 		error = (*ifs->if_ioctl)(ifs,
1401 		    SIOCSIFMTU, (caddr_t)&ifr);
1402 		if (error != 0) {
1403 			log(LOG_NOTICE, "%s: invalid MTU: %u for"
1404 			    " new member %s\n", sc->sc_ifp->if_xname,
1405 			    ifr.ifr_mtu,
1406 			    ifs->if_xname);
1407 			return (EINVAL);
1408 		}
1409 	}
1410 
1411 	bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO);
1412 	if (bif == NULL)
1413 		return (ENOMEM);
1414 
1415 	bif->bif_sc = sc;
1416 	bif->bif_ifp = ifs;
1417 	bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
1418 	bif->bif_savedcaps = ifs->if_capenable;
1419 
1420 	/*
1421 	 * Assign the interface's MAC address to the bridge if it's the first
1422 	 * member and the MAC address of the bridge has not been changed from
1423 	 * the default randomly generated one.
1424 	 */
1425 	if (V_bridge_inherit_mac && CK_LIST_EMPTY(&sc->sc_iflist) &&
1426 	    !memcmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr.octet, ETHER_ADDR_LEN)) {
1427 		bcopy(IF_LLADDR(ifs), IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
1428 		sc->sc_ifaddr = ifs;
1429 		EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
1430 	}
1431 
1432 	ifs->if_bridge = bif;
1433 	ifs->if_bridge_output = bridge_output;
1434 	ifs->if_bridge_input = bridge_input;
1435 	ifs->if_bridge_linkstate = bridge_linkstate;
1436 	bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp);
1437 	/*
1438 	 * XXX: XLOCK HERE!?!
1439 	 *
1440 	 * NOTE: insert_***HEAD*** should be safe for the traversals.
1441 	 */
1442 	CK_LIST_INSERT_HEAD(&sc->sc_iflist, bif, bif_next);
1443 
1444 	/* Set interface capabilities to the intersection set of all members */
1445 	bridge_mutecaps(sc);
1446 	bridge_linkcheck(sc);
1447 
1448 	/* Place the interface into promiscuous mode */
1449 	switch (ifs->if_type) {
1450 		case IFT_ETHER:
1451 		case IFT_L2VLAN:
1452 			error = ifpromisc(ifs, 1);
1453 			break;
1454 	}
1455 
1456 	if (error)
1457 		bridge_delete_member(sc, bif, 0);
1458 	return (error);
1459 }
1460 
1461 static int
1462 bridge_ioctl_del(struct bridge_softc *sc, void *arg)
1463 {
1464 	struct ifbreq *req = arg;
1465 	struct bridge_iflist *bif;
1466 
1467 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1468 	if (bif == NULL)
1469 		return (ENOENT);
1470 
1471 	bridge_delete_member(sc, bif, 0);
1472 
1473 	return (0);
1474 }
1475 
1476 static int
1477 bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
1478 {
1479 	struct ifbreq *req = arg;
1480 	struct bridge_iflist *bif;
1481 	struct bstp_port *bp;
1482 
1483 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1484 	if (bif == NULL)
1485 		return (ENOENT);
1486 
1487 	bp = &bif->bif_stp;
1488 	req->ifbr_ifsflags = bif->bif_flags;
1489 	req->ifbr_state = bp->bp_state;
1490 	req->ifbr_priority = bp->bp_priority;
1491 	req->ifbr_path_cost = bp->bp_path_cost;
1492 	req->ifbr_portno = bif->bif_ifp->if_index & 0xfff;
1493 	req->ifbr_proto = bp->bp_protover;
1494 	req->ifbr_role = bp->bp_role;
1495 	req->ifbr_stpflags = bp->bp_flags;
1496 	req->ifbr_addrcnt = bif->bif_addrcnt;
1497 	req->ifbr_addrmax = bif->bif_addrmax;
1498 	req->ifbr_addrexceeded = bif->bif_addrexceeded;
1499 
1500 	/* Copy STP state options as flags */
1501 	if (bp->bp_operedge)
1502 		req->ifbr_ifsflags |= IFBIF_BSTP_EDGE;
1503 	if (bp->bp_flags & BSTP_PORT_AUTOEDGE)
1504 		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE;
1505 	if (bp->bp_ptp_link)
1506 		req->ifbr_ifsflags |= IFBIF_BSTP_PTP;
1507 	if (bp->bp_flags & BSTP_PORT_AUTOPTP)
1508 		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP;
1509 	if (bp->bp_flags & BSTP_PORT_ADMEDGE)
1510 		req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE;
1511 	if (bp->bp_flags & BSTP_PORT_ADMCOST)
1512 		req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST;
1513 	return (0);
1514 }
1515 
1516 static int
1517 bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
1518 {
1519 	struct epoch_tracker et;
1520 	struct ifbreq *req = arg;
1521 	struct bridge_iflist *bif;
1522 	struct bstp_port *bp;
1523 	int error;
1524 
1525 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1526 	if (bif == NULL)
1527 		return (ENOENT);
1528 	bp = &bif->bif_stp;
1529 
1530 	if (req->ifbr_ifsflags & IFBIF_SPAN)
1531 		/* SPAN is readonly */
1532 		return (EINVAL);
1533 
1534 	NET_EPOCH_ENTER(et);
1535 
1536 	if (req->ifbr_ifsflags & IFBIF_STP) {
1537 		if ((bif->bif_flags & IFBIF_STP) == 0) {
1538 			error = bstp_enable(&bif->bif_stp);
1539 			if (error) {
1540 				NET_EPOCH_EXIT(et);
1541 				return (error);
1542 			}
1543 		}
1544 	} else {
1545 		if ((bif->bif_flags & IFBIF_STP) != 0)
1546 			bstp_disable(&bif->bif_stp);
1547 	}
1548 
1549 	/* Pass on STP flags */
1550 	bstp_set_edge(bp, req->ifbr_ifsflags & IFBIF_BSTP_EDGE ? 1 : 0);
1551 	bstp_set_autoedge(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0);
1552 	bstp_set_ptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_PTP ? 1 : 0);
1553 	bstp_set_autoptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0);
1554 
1555 	/* Save the bits relating to the bridge */
1556 	bif->bif_flags = req->ifbr_ifsflags & IFBIFMASK;
1557 
1558 	NET_EPOCH_EXIT(et);
1559 
1560 	return (0);
1561 }
1562 
1563 static int
1564 bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
1565 {
1566 	struct ifbrparam *param = arg;
1567 
1568 	sc->sc_brtmax = param->ifbrp_csize;
1569 	bridge_rttrim(sc);
1570 
1571 	return (0);
1572 }
1573 
1574 static int
1575 bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
1576 {
1577 	struct ifbrparam *param = arg;
1578 
1579 	param->ifbrp_csize = sc->sc_brtmax;
1580 
1581 	return (0);
1582 }
1583 
1584 static int
1585 bridge_ioctl_gifs(struct bridge_softc *sc, void *arg)
1586 {
1587 	struct ifbifconf *bifc = arg;
1588 	struct bridge_iflist *bif;
1589 	struct ifbreq breq;
1590 	char *buf, *outbuf;
1591 	int count, buflen, len, error = 0;
1592 
1593 	count = 0;
1594 	CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next)
1595 		count++;
1596 	CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
1597 		count++;
1598 
1599 	buflen = sizeof(breq) * count;
1600 	if (bifc->ifbic_len == 0) {
1601 		bifc->ifbic_len = buflen;
1602 		return (0);
1603 	}
1604 	outbuf = malloc(buflen, M_TEMP, M_NOWAIT | M_ZERO);
1605 	if (outbuf == NULL)
1606 		return (ENOMEM);
1607 
1608 	count = 0;
1609 	buf = outbuf;
1610 	len = min(bifc->ifbic_len, buflen);
1611 	bzero(&breq, sizeof(breq));
1612 	CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
1613 		if (len < sizeof(breq))
1614 			break;
1615 
1616 		strlcpy(breq.ifbr_ifsname, bif->bif_ifp->if_xname,
1617 		    sizeof(breq.ifbr_ifsname));
1618 		/* Fill in the ifbreq structure */
1619 		error = bridge_ioctl_gifflags(sc, &breq);
1620 		if (error)
1621 			break;
1622 		memcpy(buf, &breq, sizeof(breq));
1623 		count++;
1624 		buf += sizeof(breq);
1625 		len -= sizeof(breq);
1626 	}
1627 	CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) {
1628 		if (len < sizeof(breq))
1629 			break;
1630 
1631 		strlcpy(breq.ifbr_ifsname, bif->bif_ifp->if_xname,
1632 		    sizeof(breq.ifbr_ifsname));
1633 		breq.ifbr_ifsflags = bif->bif_flags;
1634 		breq.ifbr_portno = bif->bif_ifp->if_index & 0xfff;
1635 		memcpy(buf, &breq, sizeof(breq));
1636 		count++;
1637 		buf += sizeof(breq);
1638 		len -= sizeof(breq);
1639 	}
1640 
1641 	bifc->ifbic_len = sizeof(breq) * count;
1642 	error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len);
1643 	free(outbuf, M_TEMP);
1644 	return (error);
1645 }
1646 
1647 static int
1648 bridge_ioctl_rts(struct bridge_softc *sc, void *arg)
1649 {
1650 	struct ifbaconf *bac = arg;
1651 	struct bridge_rtnode *brt;
1652 	struct ifbareq bareq;
1653 	char *buf, *outbuf;
1654 	int count, buflen, len, error = 0;
1655 
1656 	if (bac->ifbac_len == 0)
1657 		return (0);
1658 
1659 	count = 0;
1660 	CK_LIST_FOREACH(brt, &sc->sc_rtlist, brt_list)
1661 		count++;
1662 	buflen = sizeof(bareq) * count;
1663 
1664 	outbuf = malloc(buflen, M_TEMP, M_NOWAIT | M_ZERO);
1665 	if (outbuf == NULL)
1666 		return (ENOMEM);
1667 
1668 	count = 0;
1669 	buf = outbuf;
1670 	len = min(bac->ifbac_len, buflen);
1671 	bzero(&bareq, sizeof(bareq));
1672 	CK_LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
1673 		if (len < sizeof(bareq))
1674 			goto out;
1675 		strlcpy(bareq.ifba_ifsname, brt->brt_ifp->if_xname,
1676 		    sizeof(bareq.ifba_ifsname));
1677 		memcpy(bareq.ifba_dst, brt->brt_addr, sizeof(brt->brt_addr));
1678 		bareq.ifba_vlan = brt->brt_vlan;
1679 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
1680 				time_uptime < brt->brt_expire)
1681 			bareq.ifba_expire = brt->brt_expire - time_uptime;
1682 		else
1683 			bareq.ifba_expire = 0;
1684 		bareq.ifba_flags = brt->brt_flags;
1685 
1686 		memcpy(buf, &bareq, sizeof(bareq));
1687 		count++;
1688 		buf += sizeof(bareq);
1689 		len -= sizeof(bareq);
1690 	}
1691 out:
1692 	bac->ifbac_len = sizeof(bareq) * count;
1693 	error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len);
1694 	free(outbuf, M_TEMP);
1695 	return (error);
1696 }
1697 
1698 static int
1699 bridge_ioctl_saddr(struct bridge_softc *sc, void *arg)
1700 {
1701 	struct ifbareq *req = arg;
1702 	struct bridge_iflist *bif;
1703 	struct epoch_tracker et;
1704 	int error;
1705 
1706 	NET_EPOCH_ENTER(et);
1707 	bif = bridge_lookup_member(sc, req->ifba_ifsname);
1708 	if (bif == NULL) {
1709 		NET_EPOCH_EXIT(et);
1710 		return (ENOENT);
1711 	}
1712 
1713 	/* bridge_rtupdate() may acquire the lock. */
1714 	error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
1715 	    req->ifba_flags);
1716 	NET_EPOCH_EXIT(et);
1717 
1718 	return (error);
1719 }
1720 
1721 static int
1722 bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
1723 {
1724 	struct ifbrparam *param = arg;
1725 
1726 	sc->sc_brttimeout = param->ifbrp_ctime;
1727 	return (0);
1728 }
1729 
1730 static int
1731 bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
1732 {
1733 	struct ifbrparam *param = arg;
1734 
1735 	param->ifbrp_ctime = sc->sc_brttimeout;
1736 	return (0);
1737 }
1738 
1739 static int
1740 bridge_ioctl_daddr(struct bridge_softc *sc, void *arg)
1741 {
1742 	struct ifbareq *req = arg;
1743 	int vlan = req->ifba_vlan;
1744 
1745 	/* Userspace uses '0' to mean 'any vlan' */
1746 	if (vlan == 0)
1747 		vlan = DOT1Q_VID_RSVD_IMPL;
1748 
1749 	return (bridge_rtdaddr(sc, req->ifba_dst, vlan));
1750 }
1751 
1752 static int
1753 bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
1754 {
1755 	struct ifbreq *req = arg;
1756 
1757 	BRIDGE_RT_LOCK(sc);
1758 	bridge_rtflush(sc, req->ifbr_ifsflags);
1759 	BRIDGE_RT_UNLOCK(sc);
1760 
1761 	return (0);
1762 }
1763 
1764 static int
1765 bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
1766 {
1767 	struct ifbrparam *param = arg;
1768 	struct bstp_state *bs = &sc->sc_stp;
1769 
1770 	param->ifbrp_prio = bs->bs_bridge_priority;
1771 	return (0);
1772 }
1773 
1774 static int
1775 bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
1776 {
1777 	struct ifbrparam *param = arg;
1778 
1779 	return (bstp_set_priority(&sc->sc_stp, param->ifbrp_prio));
1780 }
1781 
1782 static int
1783 bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
1784 {
1785 	struct ifbrparam *param = arg;
1786 	struct bstp_state *bs = &sc->sc_stp;
1787 
1788 	param->ifbrp_hellotime = bs->bs_bridge_htime >> 8;
1789 	return (0);
1790 }
1791 
1792 static int
1793 bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
1794 {
1795 	struct ifbrparam *param = arg;
1796 
1797 	return (bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime));
1798 }
1799 
1800 static int
1801 bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
1802 {
1803 	struct ifbrparam *param = arg;
1804 	struct bstp_state *bs = &sc->sc_stp;
1805 
1806 	param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8;
1807 	return (0);
1808 }
1809 
1810 static int
1811 bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
1812 {
1813 	struct ifbrparam *param = arg;
1814 
1815 	return (bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay));
1816 }
1817 
1818 static int
1819 bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
1820 {
1821 	struct ifbrparam *param = arg;
1822 	struct bstp_state *bs = &sc->sc_stp;
1823 
1824 	param->ifbrp_maxage = bs->bs_bridge_max_age >> 8;
1825 	return (0);
1826 }
1827 
1828 static int
1829 bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
1830 {
1831 	struct ifbrparam *param = arg;
1832 
1833 	return (bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage));
1834 }
1835 
1836 static int
1837 bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
1838 {
1839 	struct ifbreq *req = arg;
1840 	struct bridge_iflist *bif;
1841 
1842 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1843 	if (bif == NULL)
1844 		return (ENOENT);
1845 
1846 	return (bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority));
1847 }
1848 
1849 static int
1850 bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
1851 {
1852 	struct ifbreq *req = arg;
1853 	struct bridge_iflist *bif;
1854 
1855 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1856 	if (bif == NULL)
1857 		return (ENOENT);
1858 
1859 	return (bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost));
1860 }
1861 
1862 static int
1863 bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *arg)
1864 {
1865 	struct ifbreq *req = arg;
1866 	struct bridge_iflist *bif;
1867 
1868 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1869 	if (bif == NULL)
1870 		return (ENOENT);
1871 
1872 	bif->bif_addrmax = req->ifbr_addrmax;
1873 	return (0);
1874 }
1875 
1876 static int
1877 bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
1878 {
1879 	struct ifbreq *req = arg;
1880 	struct bridge_iflist *bif = NULL;
1881 	struct ifnet *ifs;
1882 
1883 	ifs = ifunit(req->ifbr_ifsname);
1884 	if (ifs == NULL)
1885 		return (ENOENT);
1886 
1887 	CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
1888 		if (ifs == bif->bif_ifp)
1889 			return (EBUSY);
1890 
1891 	if (ifs->if_bridge != NULL)
1892 		return (EBUSY);
1893 
1894 	switch (ifs->if_type) {
1895 		case IFT_ETHER:
1896 		case IFT_GIF:
1897 		case IFT_L2VLAN:
1898 			break;
1899 		default:
1900 			return (EINVAL);
1901 	}
1902 
1903 	bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO);
1904 	if (bif == NULL)
1905 		return (ENOMEM);
1906 
1907 	bif->bif_ifp = ifs;
1908 	bif->bif_flags = IFBIF_SPAN;
1909 
1910 	CK_LIST_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
1911 
1912 	return (0);
1913 }
1914 
1915 static int
1916 bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
1917 {
1918 	struct ifbreq *req = arg;
1919 	struct bridge_iflist *bif;
1920 	struct ifnet *ifs;
1921 
1922 	ifs = ifunit(req->ifbr_ifsname);
1923 	if (ifs == NULL)
1924 		return (ENOENT);
1925 
1926 	CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
1927 		if (ifs == bif->bif_ifp)
1928 			break;
1929 
1930 	if (bif == NULL)
1931 		return (ENOENT);
1932 
1933 	bridge_delete_span(sc, bif);
1934 
1935 	return (0);
1936 }
1937 
1938 static int
1939 bridge_ioctl_gbparam(struct bridge_softc *sc, void *arg)
1940 {
1941 	struct ifbropreq *req = arg;
1942 	struct bstp_state *bs = &sc->sc_stp;
1943 	struct bstp_port *root_port;
1944 
1945 	req->ifbop_maxage = bs->bs_bridge_max_age >> 8;
1946 	req->ifbop_hellotime = bs->bs_bridge_htime >> 8;
1947 	req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8;
1948 
1949 	root_port = bs->bs_root_port;
1950 	if (root_port == NULL)
1951 		req->ifbop_root_port = 0;
1952 	else
1953 		req->ifbop_root_port = root_port->bp_ifp->if_index;
1954 
1955 	req->ifbop_holdcount = bs->bs_txholdcount;
1956 	req->ifbop_priority = bs->bs_bridge_priority;
1957 	req->ifbop_protocol = bs->bs_protover;
1958 	req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost;
1959 	req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id;
1960 	req->ifbop_designated_root = bs->bs_root_pv.pv_root_id;
1961 	req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id;
1962 	req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec;
1963 	req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec;
1964 
1965 	return (0);
1966 }
1967 
1968 static int
1969 bridge_ioctl_grte(struct bridge_softc *sc, void *arg)
1970 {
1971 	struct ifbrparam *param = arg;
1972 
1973 	param->ifbrp_cexceeded = sc->sc_brtexceeded;
1974 	return (0);
1975 }
1976 
1977 static int
1978 bridge_ioctl_gifsstp(struct bridge_softc *sc, void *arg)
1979 {
1980 	struct ifbpstpconf *bifstp = arg;
1981 	struct bridge_iflist *bif;
1982 	struct bstp_port *bp;
1983 	struct ifbpstpreq bpreq;
1984 	char *buf, *outbuf;
1985 	int count, buflen, len, error = 0;
1986 
1987 	count = 0;
1988 	CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
1989 		if ((bif->bif_flags & IFBIF_STP) != 0)
1990 			count++;
1991 	}
1992 
1993 	buflen = sizeof(bpreq) * count;
1994 	if (bifstp->ifbpstp_len == 0) {
1995 		bifstp->ifbpstp_len = buflen;
1996 		return (0);
1997 	}
1998 
1999 	outbuf = malloc(buflen, M_TEMP, M_NOWAIT | M_ZERO);
2000 	if (outbuf == NULL)
2001 		return (ENOMEM);
2002 
2003 	count = 0;
2004 	buf = outbuf;
2005 	len = min(bifstp->ifbpstp_len, buflen);
2006 	bzero(&bpreq, sizeof(bpreq));
2007 	CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
2008 		if (len < sizeof(bpreq))
2009 			break;
2010 
2011 		if ((bif->bif_flags & IFBIF_STP) == 0)
2012 			continue;
2013 
2014 		bp = &bif->bif_stp;
2015 		bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff;
2016 		bpreq.ifbp_fwd_trans = bp->bp_forward_transitions;
2017 		bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost;
2018 		bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id;
2019 		bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id;
2020 		bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id;
2021 
2022 		memcpy(buf, &bpreq, sizeof(bpreq));
2023 		count++;
2024 		buf += sizeof(bpreq);
2025 		len -= sizeof(bpreq);
2026 	}
2027 
2028 	bifstp->ifbpstp_len = sizeof(bpreq) * count;
2029 	error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len);
2030 	free(outbuf, M_TEMP);
2031 	return (error);
2032 }
2033 
2034 static int
2035 bridge_ioctl_sproto(struct bridge_softc *sc, void *arg)
2036 {
2037 	struct ifbrparam *param = arg;
2038 
2039 	return (bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto));
2040 }
2041 
2042 static int
2043 bridge_ioctl_stxhc(struct bridge_softc *sc, void *arg)
2044 {
2045 	struct ifbrparam *param = arg;
2046 
2047 	return (bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc));
2048 }
2049 
2050 /*
2051  * bridge_ifdetach:
2052  *
2053  *	Detach an interface from a bridge.  Called when a member
2054  *	interface is detaching.
2055  */
2056 static void
2057 bridge_ifdetach(void *arg __unused, struct ifnet *ifp)
2058 {
2059 	struct bridge_iflist *bif = ifp->if_bridge;
2060 	struct bridge_softc *sc = NULL;
2061 
2062 	if (bif)
2063 		sc = bif->bif_sc;
2064 
2065 	if (ifp->if_flags & IFF_RENAMING)
2066 		return;
2067 	if (V_bridge_cloner == NULL) {
2068 		/*
2069 		 * This detach handler can be called after
2070 		 * vnet_bridge_uninit().  Just return in that case.
2071 		 */
2072 		return;
2073 	}
2074 	/* Check if the interface is a bridge member */
2075 	if (sc != NULL) {
2076 		BRIDGE_LOCK(sc);
2077 		bridge_delete_member(sc, bif, 1);
2078 		BRIDGE_UNLOCK(sc);
2079 		return;
2080 	}
2081 
2082 	/* Check if the interface is a span port */
2083 	BRIDGE_LIST_LOCK();
2084 	LIST_FOREACH(sc, &V_bridge_list, sc_list) {
2085 		BRIDGE_LOCK(sc);
2086 		CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
2087 			if (ifp == bif->bif_ifp) {
2088 				bridge_delete_span(sc, bif);
2089 				break;
2090 			}
2091 
2092 		BRIDGE_UNLOCK(sc);
2093 	}
2094 	BRIDGE_LIST_UNLOCK();
2095 }
2096 
2097 /*
2098  * bridge_init:
2099  *
2100  *	Initialize a bridge interface.
2101  */
2102 static void
2103 bridge_init(void *xsc)
2104 {
2105 	struct bridge_softc *sc = (struct bridge_softc *)xsc;
2106 	struct ifnet *ifp = sc->sc_ifp;
2107 
2108 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2109 		return;
2110 
2111 	BRIDGE_LOCK(sc);
2112 	callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz,
2113 	    bridge_timer, sc);
2114 
2115 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
2116 	bstp_init(&sc->sc_stp);		/* Initialize Spanning Tree */
2117 
2118 	BRIDGE_UNLOCK(sc);
2119 }
2120 
2121 /*
2122  * bridge_stop:
2123  *
2124  *	Stop the bridge interface.
2125  */
2126 static void
2127 bridge_stop(struct ifnet *ifp, int disable)
2128 {
2129 	struct bridge_softc *sc = ifp->if_softc;
2130 
2131 	BRIDGE_LOCK_ASSERT(sc);
2132 
2133 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2134 		return;
2135 
2136 	BRIDGE_RT_LOCK(sc);
2137 	callout_stop(&sc->sc_brcallout);
2138 
2139 	bstp_stop(&sc->sc_stp);
2140 
2141 	bridge_rtflush(sc, IFBF_FLUSHDYN);
2142 	BRIDGE_RT_UNLOCK(sc);
2143 
2144 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2145 }
2146 
2147 /*
2148  * bridge_enqueue:
2149  *
2150  *	Enqueue a packet on a bridge member interface.
2151  *
2152  */
2153 static int
2154 bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m)
2155 {
2156 	int len, err = 0;
2157 	short mflags;
2158 	struct mbuf *m0;
2159 
2160 	/* We may be sending a fragment so traverse the mbuf */
2161 	for (; m; m = m0) {
2162 		m0 = m->m_nextpkt;
2163 		m->m_nextpkt = NULL;
2164 		len = m->m_pkthdr.len;
2165 		mflags = m->m_flags;
2166 
2167 		/*
2168 		 * If underlying interface can not do VLAN tag insertion itself
2169 		 * then attach a packet tag that holds it.
2170 		 */
2171 		if ((m->m_flags & M_VLANTAG) &&
2172 		    (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
2173 			m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
2174 			if (m == NULL) {
2175 				if_printf(dst_ifp,
2176 				    "unable to prepend VLAN header\n");
2177 				if_inc_counter(dst_ifp, IFCOUNTER_OERRORS, 1);
2178 				continue;
2179 			}
2180 			m->m_flags &= ~M_VLANTAG;
2181 		}
2182 
2183 		M_ASSERTPKTHDR(m); /* We shouldn't transmit mbuf without pkthdr */
2184 		if ((err = dst_ifp->if_transmit(dst_ifp, m))) {
2185 			int n;
2186 
2187 			for (m = m0, n = 1; m != NULL; m = m0, n++) {
2188 				m0 = m->m_nextpkt;
2189 				m_freem(m);
2190 			}
2191 			if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, n);
2192 			break;
2193 		}
2194 
2195 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1);
2196 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, len);
2197 		if (mflags & M_MCAST)
2198 			if_inc_counter(sc->sc_ifp, IFCOUNTER_OMCASTS, 1);
2199 	}
2200 
2201 	return (err);
2202 }
2203 
2204 /*
2205  * bridge_dummynet:
2206  *
2207  * 	Receive a queued packet from dummynet and pass it on to the output
2208  * 	interface.
2209  *
2210  *	The mbuf has the Ethernet header already attached.
2211  */
2212 static void
2213 bridge_dummynet(struct mbuf *m, struct ifnet *ifp)
2214 {
2215 	struct bridge_iflist *bif = ifp->if_bridge;
2216 	struct bridge_softc *sc = NULL;
2217 
2218 	if (bif)
2219 		sc = bif->bif_sc;
2220 
2221 	/*
2222 	 * The packet didnt originate from a member interface. This should only
2223 	 * ever happen if a member interface is removed while packets are
2224 	 * queued for it.
2225 	 */
2226 	if (sc == NULL) {
2227 		m_freem(m);
2228 		return;
2229 	}
2230 
2231 	if (PFIL_HOOKED_OUT_46) {
2232 		if (bridge_pfil(&m, sc->sc_ifp, ifp, PFIL_OUT) != 0)
2233 			return;
2234 		if (m == NULL)
2235 			return;
2236 	}
2237 
2238 	bridge_enqueue(sc, ifp, m);
2239 }
2240 
2241 /*
2242  * bridge_output:
2243  *
2244  *	Send output from a bridge member interface.  This
2245  *	performs the bridging function for locally originated
2246  *	packets.
2247  *
2248  *	The mbuf has the Ethernet header already attached.  We must
2249  *	enqueue or free the mbuf before returning.
2250  */
2251 static int
2252 bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
2253     struct rtentry *rt)
2254 {
2255 	struct ether_header *eh;
2256 	struct bridge_iflist *sbif;
2257 	struct ifnet *bifp, *dst_if;
2258 	struct bridge_softc *sc;
2259 	ether_vlanid_t vlan;
2260 
2261 	NET_EPOCH_ASSERT();
2262 
2263 	if (m->m_len < ETHER_HDR_LEN) {
2264 		m = m_pullup(m, ETHER_HDR_LEN);
2265 		if (m == NULL)
2266 			return (0);
2267 	}
2268 
2269 	sbif = ifp->if_bridge;
2270 	sc = sbif->bif_sc;
2271 	bifp = sc->sc_ifp;
2272 
2273 	eh = mtod(m, struct ether_header *);
2274 	vlan = VLANTAGOF(m);
2275 
2276 	/*
2277 	 * If bridge is down, but the original output interface is up,
2278 	 * go ahead and send out that interface.  Otherwise, the packet
2279 	 * is dropped below.
2280 	 */
2281 	if ((bifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2282 		dst_if = ifp;
2283 		goto sendunicast;
2284 	}
2285 
2286 	/*
2287 	 * If the packet is a multicast, or we don't know a better way to
2288 	 * get there, send to all interfaces.
2289 	 */
2290 	if (ETHER_IS_MULTICAST(eh->ether_dhost))
2291 		dst_if = NULL;
2292 	else
2293 		dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan);
2294 	/* Tap any traffic not passing back out the originating interface */
2295 	if (dst_if != ifp)
2296 		ETHER_BPF_MTAP(bifp, m);
2297 	if (dst_if == NULL) {
2298 		struct bridge_iflist *bif;
2299 		struct mbuf *mc;
2300 		int used = 0;
2301 
2302 		bridge_span(sc, m);
2303 
2304 		CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
2305 			dst_if = bif->bif_ifp;
2306 
2307 			if (dst_if->if_type == IFT_GIF)
2308 				continue;
2309 			if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0)
2310 				continue;
2311 
2312 			/*
2313 			 * If this is not the original output interface,
2314 			 * and the interface is participating in spanning
2315 			 * tree, make sure the port is in a state that
2316 			 * allows forwarding.
2317 			 */
2318 			if (dst_if != ifp && (bif->bif_flags & IFBIF_STP) &&
2319 			    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING)
2320 				continue;
2321 
2322 			if (CK_LIST_NEXT(bif, bif_next) == NULL) {
2323 				used = 1;
2324 				mc = m;
2325 			} else {
2326 				mc = m_dup(m, M_NOWAIT);
2327 				if (mc == NULL) {
2328 					if_inc_counter(bifp, IFCOUNTER_OERRORS, 1);
2329 					continue;
2330 				}
2331 			}
2332 
2333 			bridge_enqueue(sc, dst_if, mc);
2334 		}
2335 		if (used == 0)
2336 			m_freem(m);
2337 		return (0);
2338 	}
2339 
2340 sendunicast:
2341 	/*
2342 	 * XXX Spanning tree consideration here?
2343 	 */
2344 
2345 	bridge_span(sc, m);
2346 	if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2347 		m_freem(m);
2348 		return (0);
2349 	}
2350 
2351 	bridge_enqueue(sc, dst_if, m);
2352 	return (0);
2353 }
2354 
2355 /*
2356  * bridge_transmit:
2357  *
2358  *	Do output on a bridge.
2359  *
2360  */
2361 static int
2362 bridge_transmit(struct ifnet *ifp, struct mbuf *m)
2363 {
2364 	struct bridge_softc *sc;
2365 	struct ether_header *eh;
2366 	struct ifnet *dst_if;
2367 	int error = 0;
2368 
2369 	sc = ifp->if_softc;
2370 
2371 	ETHER_BPF_MTAP(ifp, m);
2372 
2373 	eh = mtod(m, struct ether_header *);
2374 
2375 	if (((m->m_flags & (M_BCAST|M_MCAST)) == 0) &&
2376 	    (dst_if = bridge_rtlookup(sc, eh->ether_dhost, DOT1Q_VID_NULL)) !=
2377 	    NULL) {
2378 		error = bridge_enqueue(sc, dst_if, m);
2379 	} else
2380 		bridge_broadcast(sc, ifp, m, 0);
2381 
2382 	return (error);
2383 }
2384 
2385 #ifdef ALTQ
2386 static void
2387 bridge_altq_start(if_t ifp)
2388 {
2389 	struct ifaltq *ifq = &ifp->if_snd;
2390 	struct mbuf *m;
2391 
2392 	IFQ_LOCK(ifq);
2393 	IFQ_DEQUEUE_NOLOCK(ifq, m);
2394 	while (m != NULL) {
2395 		bridge_transmit(ifp, m);
2396 		IFQ_DEQUEUE_NOLOCK(ifq, m);
2397 	}
2398 	IFQ_UNLOCK(ifq);
2399 }
2400 
2401 static int
2402 bridge_altq_transmit(if_t ifp, struct mbuf *m)
2403 {
2404 	int err;
2405 
2406 	if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
2407 		IFQ_ENQUEUE(&ifp->if_snd, m, err);
2408 		if (err == 0)
2409 			bridge_altq_start(ifp);
2410 	} else
2411 		err = bridge_transmit(ifp, m);
2412 
2413 	return (err);
2414 }
2415 #endif	/* ALTQ */
2416 
2417 /*
2418  * The ifp->if_qflush entry point for if_bridge(4) is no-op.
2419  */
2420 static void
2421 bridge_qflush(struct ifnet *ifp __unused)
2422 {
2423 }
2424 
2425 /*
2426  * bridge_forward:
2427  *
2428  *	The forwarding function of the bridge.
2429  *
2430  *	NOTE: Releases the lock on return.
2431  */
2432 static void
2433 bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
2434     struct mbuf *m)
2435 {
2436 	struct bridge_iflist *dbif;
2437 	struct ifnet *src_if, *dst_if, *ifp;
2438 	struct ether_header *eh;
2439 	uint16_t vlan;
2440 	uint8_t *dst;
2441 	int error;
2442 
2443 	NET_EPOCH_ASSERT();
2444 
2445 	src_if = m->m_pkthdr.rcvif;
2446 	ifp = sc->sc_ifp;
2447 
2448 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
2449 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
2450 	vlan = VLANTAGOF(m);
2451 
2452 	if ((sbif->bif_flags & IFBIF_STP) &&
2453 	    sbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING)
2454 		goto drop;
2455 
2456 	eh = mtod(m, struct ether_header *);
2457 	dst = eh->ether_dhost;
2458 
2459 	/* If the interface is learning, record the address. */
2460 	if (sbif->bif_flags & IFBIF_LEARNING) {
2461 		error = bridge_rtupdate(sc, eh->ether_shost, vlan,
2462 		    sbif, 0, IFBAF_DYNAMIC);
2463 		/*
2464 		 * If the interface has addresses limits then deny any source
2465 		 * that is not in the cache.
2466 		 */
2467 		if (error && sbif->bif_addrmax)
2468 			goto drop;
2469 	}
2470 
2471 	if ((sbif->bif_flags & IFBIF_STP) != 0 &&
2472 	    sbif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING)
2473 		goto drop;
2474 
2475 #ifdef DEV_NETMAP
2476 	/*
2477 	 * Hand the packet to netmap only if it wasn't injected by netmap
2478 	 * itself.
2479 	 */
2480 	if ((m->m_flags & M_BRIDGE_INJECT) == 0 &&
2481 	    (if_getcapenable(ifp) & IFCAP_NETMAP) != 0) {
2482 		ifp->if_input(ifp, m);
2483 		return;
2484 	}
2485 	m->m_flags &= ~M_BRIDGE_INJECT;
2486 #endif
2487 
2488 	/*
2489 	 * At this point, the port either doesn't participate
2490 	 * in spanning tree or it is in the forwarding state.
2491 	 */
2492 
2493 	/*
2494 	 * If the packet is unicast, destined for someone on
2495 	 * "this" side of the bridge, drop it.
2496 	 */
2497 	if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
2498 		dst_if = bridge_rtlookup(sc, dst, vlan);
2499 		if (src_if == dst_if)
2500 			goto drop;
2501 	} else {
2502 		/*
2503 		 * Check if its a reserved multicast address, any address
2504 		 * listed in 802.1D section 7.12.6 may not be forwarded by the
2505 		 * bridge.
2506 		 * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F
2507 		 */
2508 		if (dst[0] == 0x01 && dst[1] == 0x80 &&
2509 		    dst[2] == 0xc2 && dst[3] == 0x00 &&
2510 		    dst[4] == 0x00 && dst[5] <= 0x0f)
2511 			goto drop;
2512 
2513 		/* ...forward it to all interfaces. */
2514 		if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
2515 		dst_if = NULL;
2516 	}
2517 
2518 	/*
2519 	 * If we have a destination interface which is a member of our bridge,
2520 	 * OR this is a unicast packet, push it through the bpf(4) machinery.
2521 	 * For broadcast or multicast packets, don't bother because it will
2522 	 * be reinjected into ether_input. We do this before we pass the packets
2523 	 * through the pfil(9) framework, as it is possible that pfil(9) will
2524 	 * drop the packet, or possibly modify it, making it difficult to debug
2525 	 * firewall issues on the bridge.
2526 	 */
2527 	if (dst_if != NULL || (m->m_flags & (M_BCAST | M_MCAST)) == 0)
2528 		ETHER_BPF_MTAP(ifp, m);
2529 
2530 	/* run the packet filter */
2531 	if (PFIL_HOOKED_IN_46) {
2532 		if (bridge_pfil(&m, ifp, src_if, PFIL_IN) != 0)
2533 			return;
2534 		if (m == NULL)
2535 			return;
2536 	}
2537 
2538 	if (dst_if == NULL) {
2539 		bridge_broadcast(sc, src_if, m, 1);
2540 		return;
2541 	}
2542 
2543 	/*
2544 	 * At this point, we're dealing with a unicast frame
2545 	 * going to a different interface.
2546 	 */
2547 	if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0)
2548 		goto drop;
2549 
2550 	dbif = bridge_lookup_member_if(sc, dst_if);
2551 	if (dbif == NULL)
2552 		/* Not a member of the bridge (anymore?) */
2553 		goto drop;
2554 
2555 	/* Private segments can not talk to each other */
2556 	if (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE)
2557 		goto drop;
2558 
2559 	if ((dbif->bif_flags & IFBIF_STP) &&
2560 	    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING)
2561 		goto drop;
2562 
2563 	if (PFIL_HOOKED_OUT_46) {
2564 		if (bridge_pfil(&m, ifp, dst_if, PFIL_OUT) != 0)
2565 			return;
2566 		if (m == NULL)
2567 			return;
2568 	}
2569 
2570 	bridge_enqueue(sc, dst_if, m);
2571 	return;
2572 
2573 drop:
2574 	m_freem(m);
2575 }
2576 
2577 /*
2578  * bridge_input:
2579  *
2580  *	Receive input from a member interface.  Queue the packet for
2581  *	bridging if it is not for us.
2582  */
2583 static struct mbuf *
2584 bridge_input(struct ifnet *ifp, struct mbuf *m)
2585 {
2586 	struct bridge_softc *sc = NULL;
2587 	struct bridge_iflist *bif, *bif2;
2588 	struct ifnet *bifp;
2589 	struct ether_header *eh;
2590 	struct mbuf *mc, *mc2;
2591 	ether_vlanid_t vlan;
2592 	int error;
2593 
2594 	NET_EPOCH_ASSERT();
2595 
2596 	eh = mtod(m, struct ether_header *);
2597 	vlan = VLANTAGOF(m);
2598 
2599 	bif = ifp->if_bridge;
2600 	if (bif)
2601 		sc = bif->bif_sc;
2602 
2603 	if (sc == NULL) {
2604 		/*
2605 		 * This packet originated from the bridge itself, so it must
2606 		 * have been transmitted by netmap.  Derive the "source"
2607 		 * interface from the source address and drop the packet if the
2608 		 * source address isn't known.
2609 		 */
2610 		KASSERT((m->m_flags & M_BRIDGE_INJECT) != 0,
2611 		    ("%s: ifnet %p missing a bridge softc", __func__, ifp));
2612 		sc = if_getsoftc(ifp);
2613 		ifp = bridge_rtlookup(sc, eh->ether_shost, vlan);
2614 		if (ifp == NULL) {
2615 			if_inc_counter(sc->sc_ifp, IFCOUNTER_IERRORS, 1);
2616 			m_freem(m);
2617 			return (NULL);
2618 		}
2619 		m->m_pkthdr.rcvif = ifp;
2620 	}
2621 	bifp = sc->sc_ifp;
2622 	if ((bifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2623 		return (m);
2624 
2625 	/*
2626 	 * Implement support for bridge monitoring. If this flag has been
2627 	 * set on this interface, discard the packet once we push it through
2628 	 * the bpf(4) machinery, but before we do, increment the byte and
2629 	 * packet counters associated with this interface.
2630 	 */
2631 	if ((bifp->if_flags & IFF_MONITOR) != 0) {
2632 		m->m_pkthdr.rcvif  = bifp;
2633 		ETHER_BPF_MTAP(bifp, m);
2634 		if_inc_counter(bifp, IFCOUNTER_IPACKETS, 1);
2635 		if_inc_counter(bifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
2636 		m_freem(m);
2637 		return (NULL);
2638 	}
2639 
2640 	bridge_span(sc, m);
2641 
2642 	if (m->m_flags & (M_BCAST|M_MCAST)) {
2643 		/* Tap off 802.1D packets; they do not get forwarded. */
2644 		if (memcmp(eh->ether_dhost, bstp_etheraddr,
2645 		    ETHER_ADDR_LEN) == 0) {
2646 			bstp_input(&bif->bif_stp, ifp, m); /* consumes mbuf */
2647 			return (NULL);
2648 		}
2649 
2650 		if ((bif->bif_flags & IFBIF_STP) &&
2651 		    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
2652 			return (m);
2653 		}
2654 
2655 		/*
2656 		 * Make a deep copy of the packet and enqueue the copy
2657 		 * for bridge processing; return the original packet for
2658 		 * local processing.
2659 		 */
2660 		mc = m_dup(m, M_NOWAIT);
2661 		if (mc == NULL) {
2662 			return (m);
2663 		}
2664 
2665 		/* Perform the bridge forwarding function with the copy. */
2666 		bridge_forward(sc, bif, mc);
2667 
2668 #ifdef DEV_NETMAP
2669 		/*
2670 		 * If netmap is enabled and has not already seen this packet,
2671 		 * then it will be consumed by bridge_forward().
2672 		 */
2673 		if ((if_getcapenable(bifp) & IFCAP_NETMAP) != 0 &&
2674 		    (m->m_flags & M_BRIDGE_INJECT) == 0) {
2675 			m_freem(m);
2676 			return (NULL);
2677 		}
2678 #endif
2679 
2680 		/*
2681 		 * Reinject the mbuf as arriving on the bridge so we have a
2682 		 * chance at claiming multicast packets. We can not loop back
2683 		 * here from ether_input as a bridge is never a member of a
2684 		 * bridge.
2685 		 */
2686 		KASSERT(bifp->if_bridge == NULL,
2687 		    ("loop created in bridge_input"));
2688 		mc2 = m_dup(m, M_NOWAIT);
2689 		if (mc2 != NULL) {
2690 			/* Keep the layer3 header aligned */
2691 			int i = min(mc2->m_pkthdr.len, max_protohdr);
2692 			mc2 = m_copyup(mc2, i, ETHER_ALIGN);
2693 		}
2694 		if (mc2 != NULL) {
2695 			mc2->m_pkthdr.rcvif = bifp;
2696 			mc2->m_flags &= ~M_BRIDGE_INJECT;
2697 			sc->sc_if_input(bifp, mc2);
2698 		}
2699 
2700 		/* Return the original packet for local processing. */
2701 		return (m);
2702 	}
2703 
2704 	if ((bif->bif_flags & IFBIF_STP) &&
2705 	    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
2706 		return (m);
2707 	}
2708 
2709 #if defined(INET) || defined(INET6)
2710 #define	CARP_CHECK_WE_ARE_DST(iface) \
2711 	((iface)->if_carp && (*carp_forus_p)((iface), eh->ether_dhost))
2712 #define	CARP_CHECK_WE_ARE_SRC(iface) \
2713 	((iface)->if_carp && (*carp_forus_p)((iface), eh->ether_shost))
2714 #else
2715 #define	CARP_CHECK_WE_ARE_DST(iface)	false
2716 #define	CARP_CHECK_WE_ARE_SRC(iface)	false
2717 #endif
2718 
2719 #ifdef DEV_NETMAP
2720 #define	GRAB_FOR_NETMAP(ifp, m) do {					\
2721 	if ((if_getcapenable(ifp) & IFCAP_NETMAP) != 0 &&		\
2722 	    ((m)->m_flags & M_BRIDGE_INJECT) == 0) {			\
2723 		(ifp)->if_input(ifp, m);				\
2724 		return (NULL);						\
2725 	}								\
2726 } while (0)
2727 #else
2728 #define	GRAB_FOR_NETMAP(ifp, m)
2729 #endif
2730 
2731 #define GRAB_OUR_PACKETS(iface)						\
2732 	if ((iface)->if_type == IFT_GIF)				\
2733 		continue;						\
2734 	/* It is destined for us. */					\
2735 	if (memcmp(IF_LLADDR(iface), eh->ether_dhost, ETHER_ADDR_LEN) == 0 || \
2736 	    CARP_CHECK_WE_ARE_DST(iface)) {				\
2737 		if (bif->bif_flags & IFBIF_LEARNING) {			\
2738 			error = bridge_rtupdate(sc, eh->ether_shost,	\
2739 			    vlan, bif, 0, IFBAF_DYNAMIC);		\
2740 			if (error && bif->bif_addrmax) {		\
2741 				m_freem(m);				\
2742 				return (NULL);				\
2743 			}						\
2744 		}							\
2745 		m->m_pkthdr.rcvif = iface;				\
2746 		if ((iface) == ifp) {					\
2747 			/* Skip bridge processing... src == dest */	\
2748 			return (m);					\
2749 		}							\
2750 		/* It's passing over or to the bridge, locally. */	\
2751 		ETHER_BPF_MTAP(bifp, m);				\
2752 		if_inc_counter(bifp, IFCOUNTER_IPACKETS, 1);		\
2753 		if_inc_counter(bifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);\
2754 		/* Hand the packet over to netmap if necessary. */	\
2755 		GRAB_FOR_NETMAP(bifp, m);				\
2756 		/* Filter on the physical interface. */			\
2757 		if (V_pfil_local_phys && PFIL_HOOKED_IN_46) {		\
2758 			if (bridge_pfil(&m, NULL, ifp,			\
2759 			    PFIL_IN) != 0 || m == NULL) {		\
2760 				return (NULL);				\
2761 			}						\
2762 		}							\
2763 		if ((iface) != bifp)					\
2764 			ETHER_BPF_MTAP(iface, m);			\
2765 		return (m);						\
2766 	}								\
2767 									\
2768 	/* We just received a packet that we sent out. */		\
2769 	if (memcmp(IF_LLADDR(iface), eh->ether_shost, ETHER_ADDR_LEN) == 0 || \
2770 	    CARP_CHECK_WE_ARE_SRC(iface)) {				\
2771 		m_freem(m);						\
2772 		return (NULL);						\
2773 	}
2774 
2775 	/*
2776 	 * Unicast.  Make sure it's not for the bridge.
2777 	 */
2778 	do { GRAB_OUR_PACKETS(bifp) } while (0);
2779 
2780 	/*
2781 	 * We only need to check members interfaces if member_ifaddrs is
2782 	 * enabled; otherwise we should have never traffic destined for a
2783 	 * member's lladdr.
2784 	 */
2785 
2786 	if (V_member_ifaddrs) {
2787 		/*
2788 		 * Give a chance for ifp at first priority. This will help when
2789 		 * the packet comes through the interface like VLAN's with the
2790 		 * same MACs on several interfaces from the same bridge. This
2791 		 * also will save some CPU cycles in case the destination
2792 		 * interface and the input interface (eq ifp) are the same.
2793 		 */
2794 		do { GRAB_OUR_PACKETS(ifp) } while (0);
2795 
2796 		/* Now check the all bridge members. */
2797 		CK_LIST_FOREACH(bif2, &sc->sc_iflist, bif_next) {
2798 			GRAB_OUR_PACKETS(bif2->bif_ifp)
2799 		}
2800 	}
2801 
2802 #undef CARP_CHECK_WE_ARE_DST
2803 #undef CARP_CHECK_WE_ARE_SRC
2804 #undef GRAB_FOR_NETMAP
2805 #undef GRAB_OUR_PACKETS
2806 
2807 	/* Perform the bridge forwarding function. */
2808 	bridge_forward(sc, bif, m);
2809 
2810 	return (NULL);
2811 }
2812 
2813 /*
2814  * Inject a packet back into the host ethernet stack.  This will generally only
2815  * be used by netmap when an application writes to the host TX ring.  The
2816  * M_BRIDGE_INJECT flag ensures that the packet is re-routed to the bridge
2817  * interface after ethernet processing.
2818  */
2819 static void
2820 bridge_inject(struct ifnet *ifp, struct mbuf *m)
2821 {
2822 	struct bridge_softc *sc;
2823 
2824 	KASSERT((if_getcapenable(ifp) & IFCAP_NETMAP) != 0,
2825 	    ("%s: iface %s is not running in netmap mode",
2826 	    __func__, if_name(ifp)));
2827 	KASSERT((m->m_flags & M_BRIDGE_INJECT) == 0,
2828 	    ("%s: mbuf %p has M_BRIDGE_INJECT set", __func__, m));
2829 
2830 	m->m_flags |= M_BRIDGE_INJECT;
2831 	sc = if_getsoftc(ifp);
2832 	sc->sc_if_input(ifp, m);
2833 }
2834 
2835 /*
2836  * bridge_broadcast:
2837  *
2838  *	Send a frame to all interfaces that are members of
2839  *	the bridge, except for the one on which the packet
2840  *	arrived.
2841  *
2842  *	NOTE: Releases the lock on return.
2843  */
2844 static void
2845 bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
2846     struct mbuf *m, int runfilt)
2847 {
2848 	struct bridge_iflist *dbif, *sbif;
2849 	struct mbuf *mc;
2850 	struct ifnet *dst_if;
2851 	int used = 0, i;
2852 
2853 	NET_EPOCH_ASSERT();
2854 
2855 	sbif = bridge_lookup_member_if(sc, src_if);
2856 
2857 	/* Filter on the bridge interface before broadcasting */
2858 	if (runfilt && PFIL_HOOKED_OUT_46) {
2859 		if (bridge_pfil(&m, sc->sc_ifp, NULL, PFIL_OUT) != 0)
2860 			return;
2861 		if (m == NULL)
2862 			return;
2863 	}
2864 
2865 	CK_LIST_FOREACH(dbif, &sc->sc_iflist, bif_next) {
2866 		dst_if = dbif->bif_ifp;
2867 		if (dst_if == src_if)
2868 			continue;
2869 
2870 		/* Private segments can not talk to each other */
2871 		if (sbif && (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE))
2872 			continue;
2873 
2874 		if ((dbif->bif_flags & IFBIF_STP) &&
2875 		    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING)
2876 			continue;
2877 
2878 		if ((dbif->bif_flags & IFBIF_DISCOVER) == 0 &&
2879 		    (m->m_flags & (M_BCAST|M_MCAST)) == 0)
2880 			continue;
2881 
2882 		if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0)
2883 			continue;
2884 
2885 		if (CK_LIST_NEXT(dbif, bif_next) == NULL) {
2886 			mc = m;
2887 			used = 1;
2888 		} else {
2889 			mc = m_dup(m, M_NOWAIT);
2890 			if (mc == NULL) {
2891 				if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
2892 				continue;
2893 			}
2894 		}
2895 
2896 		/*
2897 		 * Filter on the output interface. Pass a NULL bridge interface
2898 		 * pointer so we do not redundantly filter on the bridge for
2899 		 * each interface we broadcast on.
2900 		 */
2901 		if (runfilt && PFIL_HOOKED_OUT_46) {
2902 			if (used == 0) {
2903 				/* Keep the layer3 header aligned */
2904 				i = min(mc->m_pkthdr.len, max_protohdr);
2905 				mc = m_copyup(mc, i, ETHER_ALIGN);
2906 				if (mc == NULL) {
2907 					if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
2908 					continue;
2909 				}
2910 			}
2911 			if (bridge_pfil(&mc, NULL, dst_if, PFIL_OUT) != 0)
2912 				continue;
2913 			if (mc == NULL)
2914 				continue;
2915 		}
2916 
2917 		bridge_enqueue(sc, dst_if, mc);
2918 	}
2919 	if (used == 0)
2920 		m_freem(m);
2921 }
2922 
2923 /*
2924  * bridge_span:
2925  *
2926  *	Duplicate a packet out one or more interfaces that are in span mode,
2927  *	the original mbuf is unmodified.
2928  */
2929 static void
2930 bridge_span(struct bridge_softc *sc, struct mbuf *m)
2931 {
2932 	struct bridge_iflist *bif;
2933 	struct ifnet *dst_if;
2934 	struct mbuf *mc;
2935 
2936 	NET_EPOCH_ASSERT();
2937 
2938 	if (CK_LIST_EMPTY(&sc->sc_spanlist))
2939 		return;
2940 
2941 	CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) {
2942 		dst_if = bif->bif_ifp;
2943 
2944 		if ((dst_if->if_drv_flags & IFF_DRV_RUNNING) == 0)
2945 			continue;
2946 
2947 		mc = m_dup(m, M_NOWAIT);
2948 		if (mc == NULL) {
2949 			if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
2950 			continue;
2951 		}
2952 
2953 		bridge_enqueue(sc, dst_if, mc);
2954 	}
2955 }
2956 
2957 /*
2958  * bridge_rtupdate:
2959  *
2960  *	Add a bridge routing entry.
2961  */
2962 static int
2963 bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst,
2964 		ether_vlanid_t vlan, struct bridge_iflist *bif,
2965 		int setflags, uint8_t flags)
2966 {
2967 	struct bridge_rtnode *brt;
2968 	struct bridge_iflist *obif;
2969 	int error;
2970 
2971 	BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(sc);
2972 
2973 	/* Check the source address is valid and not multicast. */
2974 	if (ETHER_IS_MULTICAST(dst) ||
2975 	    (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
2976 	     dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0)
2977 		return (EINVAL);
2978 
2979 	/*
2980 	 * A route for this destination might already exist.  If so,
2981 	 * update it, otherwise create a new one.
2982 	 */
2983 	if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) {
2984 		BRIDGE_RT_LOCK(sc);
2985 
2986 		/* Check again, now that we have the lock. There could have
2987 		 * been a race and we only want to insert this once. */
2988 		if (bridge_rtnode_lookup(sc, dst, vlan) != NULL) {
2989 			BRIDGE_RT_UNLOCK(sc);
2990 			return (0);
2991 		}
2992 
2993 		if (sc->sc_brtcnt >= sc->sc_brtmax) {
2994 			sc->sc_brtexceeded++;
2995 			BRIDGE_RT_UNLOCK(sc);
2996 			return (ENOSPC);
2997 		}
2998 		/* Check per interface address limits (if enabled) */
2999 		if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) {
3000 			bif->bif_addrexceeded++;
3001 			BRIDGE_RT_UNLOCK(sc);
3002 			return (ENOSPC);
3003 		}
3004 
3005 		/*
3006 		 * Allocate a new bridge forwarding node, and
3007 		 * initialize the expiration time and Ethernet
3008 		 * address.
3009 		 */
3010 		brt = uma_zalloc(V_bridge_rtnode_zone, M_NOWAIT | M_ZERO);
3011 		if (brt == NULL) {
3012 			BRIDGE_RT_UNLOCK(sc);
3013 			return (ENOMEM);
3014 		}
3015 		brt->brt_vnet = curvnet;
3016 
3017 		if (bif->bif_flags & IFBIF_STICKY)
3018 			brt->brt_flags = IFBAF_STICKY;
3019 		else
3020 			brt->brt_flags = IFBAF_DYNAMIC;
3021 
3022 		memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
3023 		brt->brt_vlan = vlan;
3024 
3025 		brt->brt_dst = bif;
3026 		if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
3027 			uma_zfree(V_bridge_rtnode_zone, brt);
3028 			BRIDGE_RT_UNLOCK(sc);
3029 			return (error);
3030 		}
3031 		bif->bif_addrcnt++;
3032 
3033 		BRIDGE_RT_UNLOCK(sc);
3034 	}
3035 
3036 	if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
3037 	    (obif = brt->brt_dst) != bif) {
3038 		MPASS(obif != NULL);
3039 
3040 		BRIDGE_RT_LOCK(sc);
3041 		brt->brt_dst->bif_addrcnt--;
3042 		brt->brt_dst = bif;
3043 		brt->brt_dst->bif_addrcnt++;
3044 		BRIDGE_RT_UNLOCK(sc);
3045 
3046 		if (V_log_mac_flap &&
3047 		    ppsratecheck(&V_log_last, &V_log_count, V_log_interval)) {
3048 			log(LOG_NOTICE,
3049 			    "%s: mac address %6D vlan %d moved from %s to %s\n",
3050 			    sc->sc_ifp->if_xname,
3051 			    &brt->brt_addr[0], ":",
3052 			    brt->brt_vlan,
3053 			    obif->bif_ifp->if_xname,
3054 			    bif->bif_ifp->if_xname);
3055 		}
3056 	}
3057 
3058 	if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
3059 		brt->brt_expire = time_uptime + sc->sc_brttimeout;
3060 	if (setflags)
3061 		brt->brt_flags = flags;
3062 
3063 	return (0);
3064 }
3065 
3066 /*
3067  * bridge_rtlookup:
3068  *
3069  *	Lookup the destination interface for an address.
3070  */
3071 static struct ifnet *
3072 bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr,
3073 		ether_vlanid_t vlan)
3074 {
3075 	struct bridge_rtnode *brt;
3076 
3077 	NET_EPOCH_ASSERT();
3078 
3079 	if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL)
3080 		return (NULL);
3081 
3082 	return (brt->brt_ifp);
3083 }
3084 
3085 /*
3086  * bridge_rttrim:
3087  *
3088  *	Trim the routine table so that we have a number
3089  *	of routing entries less than or equal to the
3090  *	maximum number.
3091  */
3092 static void
3093 bridge_rttrim(struct bridge_softc *sc)
3094 {
3095 	struct bridge_rtnode *brt, *nbrt;
3096 
3097 	NET_EPOCH_ASSERT();
3098 	BRIDGE_RT_LOCK_ASSERT(sc);
3099 
3100 	/* Make sure we actually need to do this. */
3101 	if (sc->sc_brtcnt <= sc->sc_brtmax)
3102 		return;
3103 
3104 	/* Force an aging cycle; this might trim enough addresses. */
3105 	bridge_rtage(sc);
3106 	if (sc->sc_brtcnt <= sc->sc_brtmax)
3107 		return;
3108 
3109 	CK_LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
3110 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
3111 			bridge_rtnode_destroy(sc, brt);
3112 			if (sc->sc_brtcnt <= sc->sc_brtmax)
3113 				return;
3114 		}
3115 	}
3116 }
3117 
3118 /*
3119  * bridge_timer:
3120  *
3121  *	Aging timer for the bridge.
3122  */
3123 static void
3124 bridge_timer(void *arg)
3125 {
3126 	struct bridge_softc *sc = arg;
3127 
3128 	BRIDGE_RT_LOCK_ASSERT(sc);
3129 
3130 	/* Destruction of rtnodes requires a proper vnet context */
3131 	CURVNET_SET(sc->sc_ifp->if_vnet);
3132 	bridge_rtage(sc);
3133 
3134 	if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
3135 		callout_reset(&sc->sc_brcallout,
3136 		    bridge_rtable_prune_period * hz, bridge_timer, sc);
3137 	CURVNET_RESTORE();
3138 }
3139 
3140 /*
3141  * bridge_rtage:
3142  *
3143  *	Perform an aging cycle.
3144  */
3145 static void
3146 bridge_rtage(struct bridge_softc *sc)
3147 {
3148 	struct bridge_rtnode *brt, *nbrt;
3149 
3150 	BRIDGE_RT_LOCK_ASSERT(sc);
3151 
3152 	CK_LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
3153 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
3154 			if (time_uptime >= brt->brt_expire)
3155 				bridge_rtnode_destroy(sc, brt);
3156 		}
3157 	}
3158 }
3159 
3160 /*
3161  * bridge_rtflush:
3162  *
3163  *	Remove all dynamic addresses from the bridge.
3164  */
3165 static void
3166 bridge_rtflush(struct bridge_softc *sc, int full)
3167 {
3168 	struct bridge_rtnode *brt, *nbrt;
3169 
3170 	BRIDGE_RT_LOCK_ASSERT(sc);
3171 
3172 	CK_LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
3173 		if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
3174 			bridge_rtnode_destroy(sc, brt);
3175 	}
3176 }
3177 
3178 /*
3179  * bridge_rtdaddr:
3180  *
3181  *	Remove an address from the table.
3182  */
3183 static int
3184 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr,
3185 	       ether_vlanid_t vlan)
3186 {
3187 	struct bridge_rtnode *brt;
3188 	int found = 0;
3189 
3190 	BRIDGE_RT_LOCK(sc);
3191 
3192 	/*
3193 	 * If vlan is DOT1Q_VID_RSVD_IMPL then we want to delete for all vlans
3194 	 * so the lookup may return more than one.
3195 	 */
3196 	while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) {
3197 		bridge_rtnode_destroy(sc, brt);
3198 		found = 1;
3199 	}
3200 
3201 	BRIDGE_RT_UNLOCK(sc);
3202 
3203 	return (found ? 0 : ENOENT);
3204 }
3205 
3206 /*
3207  * bridge_rtdelete:
3208  *
3209  *	Delete routes to a speicifc member interface.
3210  */
3211 static void
3212 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
3213 {
3214 	struct bridge_rtnode *brt, *nbrt;
3215 
3216 	BRIDGE_RT_LOCK_ASSERT(sc);
3217 
3218 	CK_LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
3219 		if (brt->brt_ifp == ifp && (full ||
3220 			    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC))
3221 			bridge_rtnode_destroy(sc, brt);
3222 	}
3223 }
3224 
3225 /*
3226  * bridge_rtable_init:
3227  *
3228  *	Initialize the route table for this bridge.
3229  */
3230 static void
3231 bridge_rtable_init(struct bridge_softc *sc)
3232 {
3233 	int i;
3234 
3235 	sc->sc_rthash = malloc(sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE,
3236 	    M_DEVBUF, M_WAITOK);
3237 
3238 	for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
3239 		CK_LIST_INIT(&sc->sc_rthash[i]);
3240 
3241 	sc->sc_rthash_key = arc4random();
3242 	CK_LIST_INIT(&sc->sc_rtlist);
3243 }
3244 
3245 /*
3246  * bridge_rtable_fini:
3247  *
3248  *	Deconstruct the route table for this bridge.
3249  */
3250 static void
3251 bridge_rtable_fini(struct bridge_softc *sc)
3252 {
3253 
3254 	KASSERT(sc->sc_brtcnt == 0,
3255 	    ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt));
3256 	free(sc->sc_rthash, M_DEVBUF);
3257 }
3258 
3259 /*
3260  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
3261  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
3262  */
3263 #define	mix(a, b, c)							\
3264 do {									\
3265 	a -= b; a -= c; a ^= (c >> 13);					\
3266 	b -= c; b -= a; b ^= (a << 8);					\
3267 	c -= a; c -= b; c ^= (b >> 13);					\
3268 	a -= b; a -= c; a ^= (c >> 12);					\
3269 	b -= c; b -= a; b ^= (a << 16);					\
3270 	c -= a; c -= b; c ^= (b >> 5);					\
3271 	a -= b; a -= c; a ^= (c >> 3);					\
3272 	b -= c; b -= a; b ^= (a << 10);					\
3273 	c -= a; c -= b; c ^= (b >> 15);					\
3274 } while (/*CONSTCOND*/0)
3275 
3276 static __inline uint32_t
3277 bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
3278 {
3279 	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
3280 
3281 	b += addr[5] << 8;
3282 	b += addr[4];
3283 	a += addr[3] << 24;
3284 	a += addr[2] << 16;
3285 	a += addr[1] << 8;
3286 	a += addr[0];
3287 
3288 	mix(a, b, c);
3289 
3290 	return (c & BRIDGE_RTHASH_MASK);
3291 }
3292 
3293 #undef mix
3294 
3295 static int
3296 bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b)
3297 {
3298 	int i, d;
3299 
3300 	for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
3301 		d = ((int)a[i]) - ((int)b[i]);
3302 	}
3303 
3304 	return (d);
3305 }
3306 
3307 /*
3308  * bridge_rtnode_lookup:
3309  *
3310  *	Look up a bridge route node for the specified destination. Compare the
3311  *	vlan id or if zero then just return the first match.
3312  */
3313 static struct bridge_rtnode *
3314 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr,
3315 		     ether_vlanid_t vlan)
3316 {
3317 	struct bridge_rtnode *brt;
3318 	uint32_t hash;
3319 	int dir;
3320 
3321 	BRIDGE_RT_LOCK_OR_NET_EPOCH_ASSERT(sc);
3322 
3323 	hash = bridge_rthash(sc, addr);
3324 	CK_LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
3325 		dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
3326 		if (dir == 0 && (brt->brt_vlan == vlan || vlan == DOT1Q_VID_RSVD_IMPL))
3327 			return (brt);
3328 		if (dir > 0)
3329 			return (NULL);
3330 	}
3331 
3332 	return (NULL);
3333 }
3334 
3335 /*
3336  * bridge_rtnode_insert:
3337  *
3338  *	Insert the specified bridge node into the route table.  We
3339  *	assume the entry is not already in the table.
3340  */
3341 static int
3342 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
3343 {
3344 	struct bridge_rtnode *lbrt;
3345 	uint32_t hash;
3346 	int dir;
3347 
3348 	BRIDGE_RT_LOCK_ASSERT(sc);
3349 
3350 	hash = bridge_rthash(sc, brt->brt_addr);
3351 
3352 	lbrt = CK_LIST_FIRST(&sc->sc_rthash[hash]);
3353 	if (lbrt == NULL) {
3354 		CK_LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
3355 		goto out;
3356 	}
3357 
3358 	do {
3359 		dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
3360 		if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan)
3361 			return (EEXIST);
3362 		if (dir > 0) {
3363 			CK_LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
3364 			goto out;
3365 		}
3366 		if (CK_LIST_NEXT(lbrt, brt_hash) == NULL) {
3367 			CK_LIST_INSERT_AFTER(lbrt, brt, brt_hash);
3368 			goto out;
3369 		}
3370 		lbrt = CK_LIST_NEXT(lbrt, brt_hash);
3371 	} while (lbrt != NULL);
3372 
3373 #ifdef DIAGNOSTIC
3374 	panic("bridge_rtnode_insert: impossible");
3375 #endif
3376 
3377 out:
3378 	CK_LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
3379 	sc->sc_brtcnt++;
3380 
3381 	return (0);
3382 }
3383 
3384 static void
3385 bridge_rtnode_destroy_cb(struct epoch_context *ctx)
3386 {
3387 	struct bridge_rtnode *brt;
3388 
3389 	brt = __containerof(ctx, struct bridge_rtnode, brt_epoch_ctx);
3390 
3391 	CURVNET_SET(brt->brt_vnet);
3392 	uma_zfree(V_bridge_rtnode_zone, brt);
3393 	CURVNET_RESTORE();
3394 }
3395 
3396 /*
3397  * bridge_rtnode_destroy:
3398  *
3399  *	Destroy a bridge rtnode.
3400  */
3401 static void
3402 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
3403 {
3404 	BRIDGE_RT_LOCK_ASSERT(sc);
3405 
3406 	CK_LIST_REMOVE(brt, brt_hash);
3407 
3408 	CK_LIST_REMOVE(brt, brt_list);
3409 	sc->sc_brtcnt--;
3410 	brt->brt_dst->bif_addrcnt--;
3411 
3412 	NET_EPOCH_CALL(bridge_rtnode_destroy_cb, &brt->brt_epoch_ctx);
3413 }
3414 
3415 /*
3416  * bridge_rtable_expire:
3417  *
3418  *	Set the expiry time for all routes on an interface.
3419  */
3420 static void
3421 bridge_rtable_expire(struct ifnet *ifp, int age)
3422 {
3423 	struct bridge_iflist *bif = NULL;
3424 	struct bridge_softc *sc = NULL;
3425 	struct bridge_rtnode *brt;
3426 
3427 	CURVNET_SET(ifp->if_vnet);
3428 
3429 	bif = ifp->if_bridge;
3430 	if (bif)
3431 		sc = bif->bif_sc;
3432 	MPASS(sc != NULL);
3433 	BRIDGE_RT_LOCK(sc);
3434 
3435 	/*
3436 	 * If the age is zero then flush, otherwise set all the expiry times to
3437 	 * age for the interface
3438 	 */
3439 	if (age == 0)
3440 		bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN);
3441 	else {
3442 		CK_LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
3443 			/* Cap the expiry time to 'age' */
3444 			if (brt->brt_ifp == ifp &&
3445 			    brt->brt_expire > time_uptime + age &&
3446 			    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
3447 				brt->brt_expire = time_uptime + age;
3448 		}
3449 	}
3450 	BRIDGE_RT_UNLOCK(sc);
3451 	CURVNET_RESTORE();
3452 }
3453 
3454 /*
3455  * bridge_state_change:
3456  *
3457  *	Callback from the bridgestp code when a port changes states.
3458  */
3459 static void
3460 bridge_state_change(struct ifnet *ifp, int state)
3461 {
3462 	struct bridge_iflist *bif = ifp->if_bridge;
3463 	struct bridge_softc *sc = bif->bif_sc;
3464 	static const char *stpstates[] = {
3465 		"disabled",
3466 		"listening",
3467 		"learning",
3468 		"forwarding",
3469 		"blocking",
3470 		"discarding"
3471 	};
3472 
3473 	CURVNET_SET(ifp->if_vnet);
3474 	if (V_log_stp)
3475 		log(LOG_NOTICE, "%s: state changed to %s on %s\n",
3476 		    sc->sc_ifp->if_xname, stpstates[state], ifp->if_xname);
3477 	CURVNET_RESTORE();
3478 }
3479 
3480 /*
3481  * Send bridge packets through pfil if they are one of the types pfil can deal
3482  * with, or if they are ARP or REVARP.  (pfil will pass ARP and REVARP without
3483  * question.) If *bifp or *ifp are NULL then packet filtering is skipped for
3484  * that interface.
3485  */
3486 static int
3487 bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
3488 {
3489 	int snap, error, i;
3490 	struct ether_header *eh1, eh2;
3491 	struct llc llc1;
3492 	u_int16_t ether_type;
3493 	pfil_return_t rv;
3494 #ifdef INET
3495 	struct ip *ip = NULL;
3496 	int hlen = 0;
3497 #endif
3498 
3499 	snap = 0;
3500 	error = -1;	/* Default error if not error == 0 */
3501 
3502 #if 0
3503 	/* we may return with the IP fields swapped, ensure its not shared */
3504 	KASSERT(M_WRITABLE(*mp), ("%s: modifying a shared mbuf", __func__));
3505 #endif
3506 
3507 	if (V_pfil_bridge == 0 && V_pfil_member == 0 && V_pfil_ipfw == 0)
3508 		return (0); /* filtering is disabled */
3509 
3510 	i = min((*mp)->m_pkthdr.len, max_protohdr);
3511 	if ((*mp)->m_len < i) {
3512 	    *mp = m_pullup(*mp, i);
3513 	    if (*mp == NULL) {
3514 		printf("%s: m_pullup failed\n", __func__);
3515 		return (-1);
3516 	    }
3517 	}
3518 
3519 	eh1 = mtod(*mp, struct ether_header *);
3520 	ether_type = ntohs(eh1->ether_type);
3521 
3522 	/*
3523 	 * Check for SNAP/LLC.
3524 	 */
3525 	if (ether_type < ETHERMTU) {
3526 		struct llc *llc2 = (struct llc *)(eh1 + 1);
3527 
3528 		if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
3529 		    llc2->llc_dsap == LLC_SNAP_LSAP &&
3530 		    llc2->llc_ssap == LLC_SNAP_LSAP &&
3531 		    llc2->llc_control == LLC_UI) {
3532 			ether_type = htons(llc2->llc_un.type_snap.ether_type);
3533 			snap = 1;
3534 		}
3535 	}
3536 
3537 	/*
3538 	 * If we're trying to filter bridge traffic, only look at traffic for
3539 	 * protocols available in the kernel (IPv4 and/or IPv6) to avoid
3540 	 * passing traffic for an unsupported protocol to the filter.  This is
3541 	 * lame since if we really wanted, say, an AppleTalk filter, we are
3542 	 * hosed, but of course we don't have an AppleTalk filter to begin
3543 	 * with.  (Note that since pfil doesn't understand ARP it will pass
3544 	 * *ALL* ARP traffic.)
3545 	 */
3546 	switch (ether_type) {
3547 #ifdef INET
3548 		case ETHERTYPE_ARP:
3549 		case ETHERTYPE_REVARP:
3550 			if (V_pfil_ipfw_arp == 0)
3551 				return (0); /* Automatically pass */
3552 
3553 			/* FALLTHROUGH */
3554 		case ETHERTYPE_IP:
3555 #endif
3556 #ifdef INET6
3557 		case ETHERTYPE_IPV6:
3558 #endif /* INET6 */
3559 			break;
3560 
3561 		default:
3562 			/*
3563 			 * We get here if the packet isn't from a supported
3564 			 * protocol.  Check to see if the user wants to pass
3565 			 * non-IP packets, these will not be checked by pfil(9)
3566 			 * and passed unconditionally so the default is to
3567 			 * drop.
3568 			 */
3569 			if (V_pfil_onlyip)
3570 				goto bad;
3571 	}
3572 
3573 	/* Run the packet through pfil before stripping link headers */
3574 	if (PFIL_HOOKED_OUT(V_link_pfil_head) && V_pfil_ipfw != 0 &&
3575 	    dir == PFIL_OUT && ifp != NULL) {
3576 		switch (pfil_mbuf_out(V_link_pfil_head, mp, ifp, NULL)) {
3577 		case PFIL_DROPPED:
3578 			return (EACCES);
3579 		case PFIL_CONSUMED:
3580 			return (0);
3581 		}
3582 	}
3583 
3584 	/* Strip off the Ethernet header and keep a copy. */
3585 	m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t) &eh2);
3586 	m_adj(*mp, ETHER_HDR_LEN);
3587 
3588 	/* Strip off snap header, if present */
3589 	if (snap) {
3590 		m_copydata(*mp, 0, sizeof(struct llc), (caddr_t) &llc1);
3591 		m_adj(*mp, sizeof(struct llc));
3592 	}
3593 
3594 	/*
3595 	 * Check the IP header for alignment and errors
3596 	 */
3597 	if (dir == PFIL_IN) {
3598 		switch (ether_type) {
3599 #ifdef INET
3600 			case ETHERTYPE_IP:
3601 				error = bridge_ip_checkbasic(mp);
3602 				break;
3603 #endif
3604 #ifdef INET6
3605 			case ETHERTYPE_IPV6:
3606 				error = bridge_ip6_checkbasic(mp);
3607 				break;
3608 #endif /* INET6 */
3609 			default:
3610 				error = 0;
3611 		}
3612 		if (error)
3613 			goto bad;
3614 	}
3615 
3616 	error = 0;
3617 
3618 	/*
3619 	 * Run the packet through pfil
3620 	 */
3621 	rv = PFIL_PASS;
3622 	switch (ether_type) {
3623 #ifdef INET
3624 	case ETHERTYPE_IP:
3625 		/*
3626 		 * Run pfil on the member interface and the bridge, both can
3627 		 * be skipped by clearing pfil_member or pfil_bridge.
3628 		 *
3629 		 * Keep the order:
3630 		 *   in_if -> bridge_if -> out_if
3631 		 */
3632 		if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL && (rv =
3633 		    pfil_mbuf_out(V_inet_pfil_head, mp, bifp, NULL)) !=
3634 		    PFIL_PASS)
3635 			break;
3636 
3637 		if (V_pfil_member && ifp != NULL) {
3638 			rv = (dir == PFIL_OUT) ?
3639 			    pfil_mbuf_out(V_inet_pfil_head, mp, ifp, NULL) :
3640 			    pfil_mbuf_in(V_inet_pfil_head, mp, ifp, NULL);
3641 			if (rv != PFIL_PASS)
3642 				break;
3643 		}
3644 
3645 		if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL && (rv =
3646 		    pfil_mbuf_in(V_inet_pfil_head, mp, bifp, NULL)) !=
3647 		    PFIL_PASS)
3648 			break;
3649 
3650 		/* check if we need to fragment the packet */
3651 		/* bridge_fragment generates a mbuf chain of packets */
3652 		/* that already include eth headers */
3653 		if (V_pfil_member && ifp != NULL && dir == PFIL_OUT) {
3654 			i = (*mp)->m_pkthdr.len;
3655 			if (i > ifp->if_mtu) {
3656 				error = bridge_fragment(ifp, mp, &eh2, snap,
3657 					    &llc1);
3658 				return (error);
3659 			}
3660 		}
3661 
3662 		/* Recalculate the ip checksum. */
3663 		ip = mtod(*mp, struct ip *);
3664 		hlen = ip->ip_hl << 2;
3665 		if (hlen < sizeof(struct ip))
3666 			goto bad;
3667 		if (hlen > (*mp)->m_len) {
3668 			if ((*mp = m_pullup(*mp, hlen)) == NULL)
3669 				goto bad;
3670 			ip = mtod(*mp, struct ip *);
3671 			if (ip == NULL)
3672 				goto bad;
3673 		}
3674 		ip->ip_sum = 0;
3675 		if (hlen == sizeof(struct ip))
3676 			ip->ip_sum = in_cksum_hdr(ip);
3677 		else
3678 			ip->ip_sum = in_cksum(*mp, hlen);
3679 
3680 		break;
3681 #endif /* INET */
3682 #ifdef INET6
3683 	case ETHERTYPE_IPV6:
3684 		if (V_pfil_bridge && dir == PFIL_OUT && bifp != NULL && (rv =
3685 		    pfil_mbuf_out(V_inet6_pfil_head, mp, bifp, NULL)) !=
3686 		    PFIL_PASS)
3687 			break;
3688 
3689 		if (V_pfil_member && ifp != NULL) {
3690 			rv = (dir == PFIL_OUT) ?
3691 			    pfil_mbuf_out(V_inet6_pfil_head, mp, ifp, NULL) :
3692 			    pfil_mbuf_in(V_inet6_pfil_head, mp, ifp, NULL);
3693 			if (rv != PFIL_PASS)
3694 				break;
3695 		}
3696 
3697 		if (V_pfil_bridge && dir == PFIL_IN && bifp != NULL && (rv =
3698 		    pfil_mbuf_in(V_inet6_pfil_head, mp, bifp, NULL)) !=
3699 		    PFIL_PASS)
3700 			break;
3701 		break;
3702 #endif
3703 	}
3704 
3705 	switch (rv) {
3706 	case PFIL_CONSUMED:
3707 		return (0);
3708 	case PFIL_DROPPED:
3709 		return (EACCES);
3710 	default:
3711 		break;
3712 	}
3713 
3714 	error = -1;
3715 
3716 	/*
3717 	 * Finally, put everything back the way it was and return
3718 	 */
3719 	if (snap) {
3720 		M_PREPEND(*mp, sizeof(struct llc), M_NOWAIT);
3721 		if (*mp == NULL)
3722 			return (error);
3723 		bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
3724 	}
3725 
3726 	M_PREPEND(*mp, ETHER_HDR_LEN, M_NOWAIT);
3727 	if (*mp == NULL)
3728 		return (error);
3729 	bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
3730 
3731 	return (0);
3732 
3733 bad:
3734 	m_freem(*mp);
3735 	*mp = NULL;
3736 	return (error);
3737 }
3738 
3739 #ifdef INET
3740 /*
3741  * Perform basic checks on header size since
3742  * pfil assumes ip_input has already processed
3743  * it for it.  Cut-and-pasted from ip_input.c.
3744  * Given how simple the IPv6 version is,
3745  * does the IPv4 version really need to be
3746  * this complicated?
3747  *
3748  * XXX Should we update ipstat here, or not?
3749  * XXX Right now we update ipstat but not
3750  * XXX csum_counter.
3751  */
3752 static int
3753 bridge_ip_checkbasic(struct mbuf **mp)
3754 {
3755 	struct mbuf *m = *mp;
3756 	struct ip *ip;
3757 	int len, hlen;
3758 	u_short sum;
3759 
3760 	if (*mp == NULL)
3761 		return (-1);
3762 
3763 	if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
3764 		if ((m = m_copyup(m, sizeof(struct ip),
3765 			(max_linkhdr + 3) & ~3)) == NULL) {
3766 			/* XXXJRT new stat, please */
3767 			KMOD_IPSTAT_INC(ips_toosmall);
3768 			goto bad;
3769 		}
3770 	} else if (__predict_false(m->m_len < sizeof (struct ip))) {
3771 		if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
3772 			KMOD_IPSTAT_INC(ips_toosmall);
3773 			goto bad;
3774 		}
3775 	}
3776 	ip = mtod(m, struct ip *);
3777 	if (ip == NULL) goto bad;
3778 
3779 	if (ip->ip_v != IPVERSION) {
3780 		KMOD_IPSTAT_INC(ips_badvers);
3781 		goto bad;
3782 	}
3783 	hlen = ip->ip_hl << 2;
3784 	if (hlen < sizeof(struct ip)) { /* minimum header length */
3785 		KMOD_IPSTAT_INC(ips_badhlen);
3786 		goto bad;
3787 	}
3788 	if (hlen > m->m_len) {
3789 		if ((m = m_pullup(m, hlen)) == NULL) {
3790 			KMOD_IPSTAT_INC(ips_badhlen);
3791 			goto bad;
3792 		}
3793 		ip = mtod(m, struct ip *);
3794 		if (ip == NULL) goto bad;
3795 	}
3796 
3797 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
3798 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
3799 	} else {
3800 		if (hlen == sizeof(struct ip)) {
3801 			sum = in_cksum_hdr(ip);
3802 		} else {
3803 			sum = in_cksum(m, hlen);
3804 		}
3805 	}
3806 	if (sum) {
3807 		KMOD_IPSTAT_INC(ips_badsum);
3808 		goto bad;
3809 	}
3810 
3811 	/* Retrieve the packet length. */
3812 	len = ntohs(ip->ip_len);
3813 
3814 	/*
3815 	 * Check for additional length bogosity
3816 	 */
3817 	if (len < hlen) {
3818 		KMOD_IPSTAT_INC(ips_badlen);
3819 		goto bad;
3820 	}
3821 
3822 	/*
3823 	 * Check that the amount of data in the buffers
3824 	 * is as at least much as the IP header would have us expect.
3825 	 * Drop packet if shorter than we expect.
3826 	 */
3827 	if (m->m_pkthdr.len < len) {
3828 		KMOD_IPSTAT_INC(ips_tooshort);
3829 		goto bad;
3830 	}
3831 
3832 	/* Checks out, proceed */
3833 	*mp = m;
3834 	return (0);
3835 
3836 bad:
3837 	*mp = m;
3838 	return (-1);
3839 }
3840 #endif /* INET */
3841 
3842 #ifdef INET6
3843 /*
3844  * Same as above, but for IPv6.
3845  * Cut-and-pasted from ip6_input.c.
3846  * XXX Should we update ip6stat, or not?
3847  */
3848 static int
3849 bridge_ip6_checkbasic(struct mbuf **mp)
3850 {
3851 	struct mbuf *m = *mp;
3852 	struct ip6_hdr *ip6;
3853 
3854 	/*
3855 	 * If the IPv6 header is not aligned, slurp it up into a new
3856 	 * mbuf with space for link headers, in the event we forward
3857 	 * it.  Otherwise, if it is aligned, make sure the entire base
3858 	 * IPv6 header is in the first mbuf of the chain.
3859 	 */
3860 	if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
3861 		struct ifnet *inifp = m->m_pkthdr.rcvif;
3862 		if ((m = m_copyup(m, sizeof(struct ip6_hdr),
3863 			    (max_linkhdr + 3) & ~3)) == NULL) {
3864 			/* XXXJRT new stat, please */
3865 			IP6STAT_INC(ip6s_toosmall);
3866 			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
3867 			goto bad;
3868 		}
3869 	} else if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
3870 		struct ifnet *inifp = m->m_pkthdr.rcvif;
3871 		if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
3872 			IP6STAT_INC(ip6s_toosmall);
3873 			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
3874 			goto bad;
3875 		}
3876 	}
3877 
3878 	ip6 = mtod(m, struct ip6_hdr *);
3879 
3880 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
3881 		IP6STAT_INC(ip6s_badvers);
3882 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
3883 		goto bad;
3884 	}
3885 
3886 	/* Checks out, proceed */
3887 	*mp = m;
3888 	return (0);
3889 
3890 bad:
3891 	*mp = m;
3892 	return (-1);
3893 }
3894 #endif /* INET6 */
3895 
3896 #ifdef INET
3897 /*
3898  * bridge_fragment:
3899  *
3900  *	Fragment mbuf chain in multiple packets and prepend ethernet header.
3901  */
3902 static int
3903 bridge_fragment(struct ifnet *ifp, struct mbuf **mp, struct ether_header *eh,
3904     int snap, struct llc *llc)
3905 {
3906 	struct mbuf *m = *mp, *nextpkt = NULL, *mprev = NULL, *mcur = NULL;
3907 	struct ip *ip;
3908 	int error = -1;
3909 
3910 	if (m->m_len < sizeof(struct ip) &&
3911 	    (m = m_pullup(m, sizeof(struct ip))) == NULL)
3912 		goto dropit;
3913 	ip = mtod(m, struct ip *);
3914 
3915 	m->m_pkthdr.csum_flags |= CSUM_IP;
3916 	error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist);
3917 	if (error)
3918 		goto dropit;
3919 
3920 	/*
3921 	 * Walk the chain and re-add the Ethernet header for
3922 	 * each mbuf packet.
3923 	 */
3924 	for (mcur = m; mcur; mcur = mcur->m_nextpkt) {
3925 		nextpkt = mcur->m_nextpkt;
3926 		mcur->m_nextpkt = NULL;
3927 		if (snap) {
3928 			M_PREPEND(mcur, sizeof(struct llc), M_NOWAIT);
3929 			if (mcur == NULL) {
3930 				error = ENOBUFS;
3931 				if (mprev != NULL)
3932 					mprev->m_nextpkt = nextpkt;
3933 				goto dropit;
3934 			}
3935 			bcopy(llc, mtod(mcur, caddr_t),sizeof(struct llc));
3936 		}
3937 
3938 		M_PREPEND(mcur, ETHER_HDR_LEN, M_NOWAIT);
3939 		if (mcur == NULL) {
3940 			error = ENOBUFS;
3941 			if (mprev != NULL)
3942 				mprev->m_nextpkt = nextpkt;
3943 			goto dropit;
3944 		}
3945 		bcopy(eh, mtod(mcur, caddr_t), ETHER_HDR_LEN);
3946 
3947 		/*
3948 		 * The previous two M_PREPEND could have inserted one or two
3949 		 * mbufs in front so we have to update the previous packet's
3950 		 * m_nextpkt.
3951 		 */
3952 		mcur->m_nextpkt = nextpkt;
3953 		if (mprev != NULL)
3954 			mprev->m_nextpkt = mcur;
3955 		else {
3956 			/* The first mbuf in the original chain needs to be
3957 			 * updated. */
3958 			*mp = mcur;
3959 		}
3960 		mprev = mcur;
3961 	}
3962 
3963 	KMOD_IPSTAT_INC(ips_fragmented);
3964 	return (error);
3965 
3966 dropit:
3967 	for (mcur = *mp; mcur; mcur = m) { /* droping the full packet chain */
3968 		m = mcur->m_nextpkt;
3969 		m_freem(mcur);
3970 	}
3971 	return (error);
3972 }
3973 #endif /* INET */
3974 
3975 static void
3976 bridge_linkstate(struct ifnet *ifp)
3977 {
3978 	struct bridge_softc *sc = NULL;
3979 	struct bridge_iflist *bif;
3980 	struct epoch_tracker et;
3981 
3982 	NET_EPOCH_ENTER(et);
3983 
3984 	bif = ifp->if_bridge;
3985 	if (bif)
3986 		sc = bif->bif_sc;
3987 
3988 	if (sc != NULL) {
3989 		bridge_linkcheck(sc);
3990 		bstp_linkstate(&bif->bif_stp);
3991 	}
3992 
3993 	NET_EPOCH_EXIT(et);
3994 }
3995 
3996 static void
3997 bridge_linkcheck(struct bridge_softc *sc)
3998 {
3999 	struct bridge_iflist *bif;
4000 	int new_link, hasls;
4001 
4002 	BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(sc);
4003 
4004 	new_link = LINK_STATE_DOWN;
4005 	hasls = 0;
4006 	/* Our link is considered up if at least one of our ports is active */
4007 	CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
4008 		if (bif->bif_ifp->if_capabilities & IFCAP_LINKSTATE)
4009 			hasls++;
4010 		if (bif->bif_ifp->if_link_state == LINK_STATE_UP) {
4011 			new_link = LINK_STATE_UP;
4012 			break;
4013 		}
4014 	}
4015 	if (!CK_LIST_EMPTY(&sc->sc_iflist) && !hasls) {
4016 		/* If no interfaces support link-state then we default to up */
4017 		new_link = LINK_STATE_UP;
4018 	}
4019 	if_link_state_change(sc->sc_ifp, new_link);
4020 }
4021