xref: /freebsd/sys/net/if_vxlan.c (revision 25c2f4cb95686fa3dc2144872abe6df2a983c608)
1 /*-
2  * Copyright (c) 2014, Bryan Venteicher <bryanv@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "opt_inet.h"
28 #include "opt_inet6.h"
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/eventhandler.h>
35 #include <sys/kernel.h>
36 #include <sys/lock.h>
37 #include <sys/hash.h>
38 #include <sys/malloc.h>
39 #include <sys/mbuf.h>
40 #include <sys/module.h>
41 #include <sys/refcount.h>
42 #include <sys/rmlock.h>
43 #include <sys/priv.h>
44 #include <sys/proc.h>
45 #include <sys/queue.h>
46 #include <sys/sbuf.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/sockio.h>
50 #include <sys/sysctl.h>
51 #include <sys/systm.h>
52 
53 #include <net/bpf.h>
54 #include <net/ethernet.h>
55 #include <net/if.h>
56 #include <net/if_var.h>
57 #include <net/if_clone.h>
58 #include <net/if_dl.h>
59 #include <net/if_media.h>
60 #include <net/if_types.h>
61 #include <net/if_vxlan.h>
62 #include <net/netisr.h>
63 
64 #include <netinet/in.h>
65 #include <netinet/in_systm.h>
66 #include <netinet/in_var.h>
67 #include <netinet/in_pcb.h>
68 #include <netinet/ip.h>
69 #include <netinet/ip6.h>
70 #include <netinet/ip_var.h>
71 #include <netinet6/ip6_var.h>
72 #include <netinet/udp.h>
73 #include <netinet/udp_var.h>
74 
75 struct vxlan_softc;
76 LIST_HEAD(vxlan_softc_head, vxlan_softc);
77 
78 struct vxlan_socket_mc_info {
79 	union vxlan_sockaddr		 vxlsomc_saddr;
80 	union vxlan_sockaddr		 vxlsomc_gaddr;
81 	int				 vxlsomc_ifidx;
82 	int				 vxlsomc_users;
83 };
84 
85 #define VXLAN_SO_MC_MAX_GROUPS		32
86 
87 #define VXLAN_SO_VNI_HASH_SHIFT		6
88 #define VXLAN_SO_VNI_HASH_SIZE		(1 << VXLAN_SO_VNI_HASH_SHIFT)
89 #define VXLAN_SO_VNI_HASH(_vni)		((_vni) % VXLAN_SO_VNI_HASH_SIZE)
90 
91 struct vxlan_socket {
92 	struct socket			*vxlso_sock;
93 	struct rmlock			 vxlso_lock;
94 	u_int				 vxlso_refcnt;
95 	union vxlan_sockaddr		 vxlso_laddr;
96 	LIST_ENTRY(vxlan_socket)	 vxlso_entry;
97 	struct vxlan_softc_head		 vxlso_vni_hash[VXLAN_SO_VNI_HASH_SIZE];
98 	struct vxlan_socket_mc_info	 vxlso_mc[VXLAN_SO_MC_MAX_GROUPS];
99 };
100 
101 #define VXLAN_SO_RLOCK(_vso, _p)	rm_rlock(&(_vso)->vxlso_lock, (_p))
102 #define VXLAN_SO_RUNLOCK(_vso, _p)	rm_runlock(&(_vso)->vxlso_lock, (_p))
103 #define VXLAN_SO_WLOCK(_vso)		rm_wlock(&(_vso)->vxlso_lock)
104 #define VXLAN_SO_WUNLOCK(_vso)		rm_wunlock(&(_vso)->vxlso_lock)
105 #define VXLAN_SO_LOCK_ASSERT(_vso) \
106     rm_assert(&(_vso)->vxlso_lock, RA_LOCKED)
107 #define VXLAN_SO_LOCK_WASSERT(_vso) \
108     rm_assert(&(_vso)->vxlso_lock, RA_WLOCKED)
109 
110 #define VXLAN_SO_ACQUIRE(_vso)		refcount_acquire(&(_vso)->vxlso_refcnt)
111 #define VXLAN_SO_RELEASE(_vso)		refcount_release(&(_vso)->vxlso_refcnt)
112 
113 struct vxlan_ftable_entry {
114 	LIST_ENTRY(vxlan_ftable_entry)	 vxlfe_hash;
115 	uint16_t			 vxlfe_flags;
116 	uint8_t				 vxlfe_mac[ETHER_ADDR_LEN];
117 	union vxlan_sockaddr		 vxlfe_raddr;
118 	time_t				 vxlfe_expire;
119 };
120 
121 #define VXLAN_FE_FLAG_DYNAMIC		0x01
122 #define VXLAN_FE_FLAG_STATIC		0x02
123 
124 #define VXLAN_FE_IS_DYNAMIC(_fe) \
125     ((_fe)->vxlfe_flags & VXLAN_FE_FLAG_DYNAMIC)
126 
127 #define VXLAN_SC_FTABLE_SHIFT		9
128 #define VXLAN_SC_FTABLE_SIZE		(1 << VXLAN_SC_FTABLE_SHIFT)
129 #define VXLAN_SC_FTABLE_MASK		(VXLAN_SC_FTABLE_SIZE - 1)
130 #define VXLAN_SC_FTABLE_HASH(_sc, _mac)	\
131     (vxlan_mac_hash(_sc, _mac) % VXLAN_SC_FTABLE_SIZE)
132 
133 LIST_HEAD(vxlan_ftable_head, vxlan_ftable_entry);
134 
135 struct vxlan_statistics {
136 	uint32_t	ftable_nospace;
137 	uint32_t	ftable_lock_upgrade_failed;
138 };
139 
140 struct vxlan_softc {
141 	struct ifnet			*vxl_ifp;
142 	struct vxlan_socket		*vxl_sock;
143 	uint32_t			 vxl_vni;
144 	union vxlan_sockaddr		 vxl_src_addr;
145 	union vxlan_sockaddr		 vxl_dst_addr;
146 	uint32_t			 vxl_flags;
147 #define VXLAN_FLAG_INIT		0x0001
148 #define VXLAN_FLAG_TEARDOWN	0x0002
149 #define VXLAN_FLAG_LEARN	0x0004
150 
151 	uint32_t			 vxl_port_hash_key;
152 	uint16_t			 vxl_min_port;
153 	uint16_t			 vxl_max_port;
154 	uint8_t				 vxl_ttl;
155 
156 	/* Lookup table from MAC address to forwarding entry. */
157 	uint32_t			 vxl_ftable_cnt;
158 	uint32_t			 vxl_ftable_max;
159 	uint32_t			 vxl_ftable_timeout;
160 	uint32_t			 vxl_ftable_hash_key;
161 	struct vxlan_ftable_head	*vxl_ftable;
162 
163 	/* Derived from vxl_dst_addr. */
164 	struct vxlan_ftable_entry	 vxl_default_fe;
165 
166 	struct ip_moptions		*vxl_im4o;
167 	struct ip6_moptions		*vxl_im6o;
168 
169 	struct rmlock			 vxl_lock;
170 	volatile u_int			 vxl_refcnt;
171 
172 	int				 vxl_unit;
173 	int				 vxl_vso_mc_index;
174 	struct vxlan_statistics		 vxl_stats;
175 	struct sysctl_oid		*vxl_sysctl_node;
176 	struct sysctl_ctx_list		 vxl_sysctl_ctx;
177 	struct callout			 vxl_callout;
178 	uint8_t				 vxl_hwaddr[ETHER_ADDR_LEN];
179 	int				 vxl_mc_ifindex;
180 	struct ifnet			*vxl_mc_ifp;
181 	struct ifmedia 			 vxl_media;
182 	char				 vxl_mc_ifname[IFNAMSIZ];
183 	LIST_ENTRY(vxlan_softc)		 vxl_entry;
184 	LIST_ENTRY(vxlan_softc)		 vxl_ifdetach_list;
185 };
186 
187 #define VXLAN_RLOCK(_sc, _p)	rm_rlock(&(_sc)->vxl_lock, (_p))
188 #define VXLAN_RUNLOCK(_sc, _p)	rm_runlock(&(_sc)->vxl_lock, (_p))
189 #define VXLAN_WLOCK(_sc)	rm_wlock(&(_sc)->vxl_lock)
190 #define VXLAN_WUNLOCK(_sc)	rm_wunlock(&(_sc)->vxl_lock)
191 #define VXLAN_LOCK_WOWNED(_sc)	rm_wowned(&(_sc)->vxl_lock)
192 #define VXLAN_LOCK_ASSERT(_sc)	rm_assert(&(_sc)->vxl_lock, RA_LOCKED)
193 #define VXLAN_LOCK_WASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_WLOCKED)
194 #define VXLAN_UNLOCK(_sc, _p) do {		\
195     if (VXLAN_LOCK_WOWNED(_sc))			\
196 	VXLAN_WUNLOCK(_sc);			\
197     else					\
198 	VXLAN_RUNLOCK(_sc, _p);			\
199 } while (0)
200 
201 #define VXLAN_ACQUIRE(_sc)	refcount_acquire(&(_sc)->vxl_refcnt)
202 #define VXLAN_RELEASE(_sc)	refcount_release(&(_sc)->vxl_refcnt)
203 
204 #define	satoconstsin(sa)	((const struct sockaddr_in *)(sa))
205 #define	satoconstsin6(sa)	((const struct sockaddr_in6 *)(sa))
206 
207 struct vxlanudphdr {
208 	struct udphdr		vxlh_udp;
209 	struct vxlan_header	vxlh_hdr;
210 } __packed;
211 
212 static int	vxlan_ftable_addr_cmp(const uint8_t *, const uint8_t *);
213 static void	vxlan_ftable_init(struct vxlan_softc *);
214 static void	vxlan_ftable_fini(struct vxlan_softc *);
215 static void	vxlan_ftable_flush(struct vxlan_softc *, int);
216 static void	vxlan_ftable_expire(struct vxlan_softc *);
217 static int	vxlan_ftable_update_locked(struct vxlan_softc *,
218 		    const struct sockaddr *, const uint8_t *,
219 		    struct rm_priotracker *);
220 static int	vxlan_ftable_update(struct vxlan_softc *,
221 		    const struct sockaddr *, const uint8_t *);
222 static int	vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS);
223 
224 static struct vxlan_ftable_entry *
225 		vxlan_ftable_entry_alloc(void);
226 static void	vxlan_ftable_entry_free(struct vxlan_ftable_entry *);
227 static void	vxlan_ftable_entry_init(struct vxlan_softc *,
228 		    struct vxlan_ftable_entry *, const uint8_t *,
229 		    const struct sockaddr *, uint32_t);
230 static void	vxlan_ftable_entry_destroy(struct vxlan_softc *,
231 		    struct vxlan_ftable_entry *);
232 static int	vxlan_ftable_entry_insert(struct vxlan_softc *,
233 		    struct vxlan_ftable_entry *);
234 static struct vxlan_ftable_entry *
235 		vxlan_ftable_entry_lookup(struct vxlan_softc *,
236 		    const uint8_t *);
237 static void	vxlan_ftable_entry_dump(struct vxlan_ftable_entry *,
238 		    struct sbuf *);
239 
240 static struct vxlan_socket *
241 		vxlan_socket_alloc(const union vxlan_sockaddr *);
242 static void	vxlan_socket_destroy(struct vxlan_socket *);
243 static void	vxlan_socket_release(struct vxlan_socket *);
244 static struct vxlan_socket *
245 		vxlan_socket_lookup(union vxlan_sockaddr *vxlsa);
246 static void	vxlan_socket_insert(struct vxlan_socket *);
247 static int	vxlan_socket_init(struct vxlan_socket *, struct ifnet *);
248 static int	vxlan_socket_bind(struct vxlan_socket *, struct ifnet *);
249 static int	vxlan_socket_create(struct ifnet *, int,
250 		    const union vxlan_sockaddr *, struct vxlan_socket **);
251 static void	vxlan_socket_ifdetach(struct vxlan_socket *,
252 		    struct ifnet *, struct vxlan_softc_head *);
253 
254 static struct vxlan_socket *
255 		vxlan_socket_mc_lookup(const union vxlan_sockaddr *);
256 static int	vxlan_sockaddr_mc_info_match(
257 		    const struct vxlan_socket_mc_info *,
258 		    const union vxlan_sockaddr *,
259 		    const union vxlan_sockaddr *, int);
260 static int	vxlan_socket_mc_join_group(struct vxlan_socket *,
261 		    const union vxlan_sockaddr *, const union vxlan_sockaddr *,
262 		    int *, union vxlan_sockaddr *);
263 static int	vxlan_socket_mc_leave_group(struct vxlan_socket *,
264 		    const union vxlan_sockaddr *,
265 		    const union vxlan_sockaddr *, int);
266 static int	vxlan_socket_mc_add_group(struct vxlan_socket *,
267 		    const union vxlan_sockaddr *, const union vxlan_sockaddr *,
268 		    int, int *);
269 static void	vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *,
270 		    int);
271 
272 static struct vxlan_softc *
273 		vxlan_socket_lookup_softc_locked(struct vxlan_socket *,
274 		    uint32_t);
275 static struct vxlan_softc *
276 		vxlan_socket_lookup_softc(struct vxlan_socket *, uint32_t);
277 static int	vxlan_socket_insert_softc(struct vxlan_socket *,
278 		    struct vxlan_softc *);
279 static void	vxlan_socket_remove_softc(struct vxlan_socket *,
280 		    struct vxlan_softc *);
281 
282 static struct ifnet *
283 		vxlan_multicast_if_ref(struct vxlan_softc *, int);
284 static void	vxlan_free_multicast(struct vxlan_softc *);
285 static int	vxlan_setup_multicast_interface(struct vxlan_softc *);
286 
287 static int	vxlan_setup_multicast(struct vxlan_softc *);
288 static int	vxlan_setup_socket(struct vxlan_softc *);
289 static void	vxlan_setup_interface(struct vxlan_softc *);
290 static int	vxlan_valid_init_config(struct vxlan_softc *);
291 static void	vxlan_init_wait(struct vxlan_softc *);
292 static void	vxlan_init_complete(struct vxlan_softc *);
293 static void	vxlan_init(void *);
294 static void	vxlan_release(struct vxlan_softc *);
295 static void	vxlan_teardown_wait(struct vxlan_softc *);
296 static void	vxlan_teardown_complete(struct vxlan_softc *);
297 static void	vxlan_teardown_locked(struct vxlan_softc *);
298 static void	vxlan_teardown(struct vxlan_softc *);
299 static void	vxlan_ifdetach(struct vxlan_softc *, struct ifnet *,
300 		    struct vxlan_softc_head *);
301 static void	vxlan_timer(void *);
302 
303 static int	vxlan_ctrl_get_config(struct vxlan_softc *, void *);
304 static int	vxlan_ctrl_set_vni(struct vxlan_softc *, void *);
305 static int	vxlan_ctrl_set_local_addr(struct vxlan_softc *, void *);
306 static int	vxlan_ctrl_set_remote_addr(struct vxlan_softc *, void *);
307 static int	vxlan_ctrl_set_local_port(struct vxlan_softc *, void *);
308 static int	vxlan_ctrl_set_remote_port(struct vxlan_softc *, void *);
309 static int	vxlan_ctrl_set_port_range(struct vxlan_softc *, void *);
310 static int	vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *, void *);
311 static int	vxlan_ctrl_set_ftable_max(struct vxlan_softc *, void *);
312 static int	vxlan_ctrl_set_multicast_if(struct vxlan_softc * , void *);
313 static int	vxlan_ctrl_set_ttl(struct vxlan_softc *, void *);
314 static int	vxlan_ctrl_set_learn(struct vxlan_softc *, void *);
315 static int	vxlan_ctrl_ftable_entry_add(struct vxlan_softc *, void *);
316 static int	vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *, void *);
317 static int	vxlan_ctrl_flush(struct vxlan_softc *, void *);
318 static int	vxlan_ioctl_drvspec(struct vxlan_softc *,
319 		    struct ifdrv *, int);
320 static int	vxlan_ioctl_ifflags(struct vxlan_softc *);
321 static int	vxlan_ioctl(struct ifnet *, u_long, caddr_t);
322 
323 #if defined(INET) || defined(INET6)
324 static uint16_t vxlan_pick_source_port(struct vxlan_softc *, struct mbuf *);
325 static void	vxlan_encap_header(struct vxlan_softc *, struct mbuf *,
326 		    int, uint16_t, uint16_t);
327 #endif
328 static int	vxlan_encap4(struct vxlan_softc *,
329 		    const union vxlan_sockaddr *, struct mbuf *);
330 static int	vxlan_encap6(struct vxlan_softc *,
331 		    const union vxlan_sockaddr *, struct mbuf *);
332 static int	vxlan_transmit(struct ifnet *, struct mbuf *);
333 static void	vxlan_qflush(struct ifnet *);
334 static void	vxlan_rcv_udp_packet(struct mbuf *, int, struct inpcb *,
335 		    const struct sockaddr *, void *);
336 static int	vxlan_input(struct vxlan_socket *, uint32_t, struct mbuf **,
337 		    const struct sockaddr *);
338 
339 static void	vxlan_set_default_config(struct vxlan_softc *);
340 static int	vxlan_set_user_config(struct vxlan_softc *,
341 		     struct ifvxlanparam *);
342 static int	vxlan_clone_create(struct if_clone *, int, caddr_t);
343 static void	vxlan_clone_destroy(struct ifnet *);
344 
345 static uint32_t vxlan_mac_hash(struct vxlan_softc *, const uint8_t *);
346 static void	vxlan_fakeaddr(struct vxlan_softc *);
347 static int	vxlan_media_change(struct ifnet *);
348 static void	vxlan_media_status(struct ifnet *, struct ifmediareq *);
349 
350 static int	vxlan_sockaddr_cmp(const union vxlan_sockaddr *,
351 		    const struct sockaddr *);
352 static void	vxlan_sockaddr_copy(union vxlan_sockaddr *,
353 		    const struct sockaddr *);
354 static int	vxlan_sockaddr_in_equal(const union vxlan_sockaddr *,
355 		    const struct sockaddr *);
356 static void	vxlan_sockaddr_in_copy(union vxlan_sockaddr *,
357 		    const struct sockaddr *);
358 static int	vxlan_sockaddr_supported(const union vxlan_sockaddr *, int);
359 static int	vxlan_sockaddr_in_any(const union vxlan_sockaddr *);
360 static int	vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *);
361 
362 static int	vxlan_can_change_config(struct vxlan_softc *);
363 static int	vxlan_check_vni(uint32_t);
364 static int	vxlan_check_ttl(int);
365 static int	vxlan_check_ftable_timeout(uint32_t);
366 static int	vxlan_check_ftable_max(uint32_t);
367 
368 static void	vxlan_sysctl_setup(struct vxlan_softc *);
369 static void	vxlan_sysctl_destroy(struct vxlan_softc *);
370 static int	vxlan_tunable_int(struct vxlan_softc *, const char *, int);
371 
372 static void	vxlan_ifdetach_event(void *, struct ifnet *);
373 static void	vxlan_load(void);
374 static void	vxlan_unload(void);
375 static int	vxlan_modevent(module_t, int, void *);
376 
377 static const char vxlan_name[] = "vxlan";
378 static MALLOC_DEFINE(M_VXLAN, vxlan_name,
379     "Virtual eXtensible LAN Interface");
380 static struct if_clone *vxlan_cloner;
381 static struct mtx vxlan_list_mtx;
382 static LIST_HEAD(, vxlan_socket) vxlan_socket_list;
383 
384 static eventhandler_tag vxlan_ifdetach_event_tag;
385 
386 SYSCTL_DECL(_net_link);
387 SYSCTL_NODE(_net_link, OID_AUTO, vxlan, CTLFLAG_RW, 0,
388     "Virtual eXtensible Local Area Network");
389 
390 static int vxlan_legacy_port = 0;
391 TUNABLE_INT("net.link.vxlan.legacy_port", &vxlan_legacy_port);
392 static int vxlan_reuse_port = 0;
393 TUNABLE_INT("net.link.vxlan.reuse_port", &vxlan_reuse_port);
394 
395 /* Default maximum number of addresses in the forwarding table. */
396 #ifndef VXLAN_FTABLE_MAX
397 #define VXLAN_FTABLE_MAX	2000
398 #endif
399 
400 /* Timeout (in seconds) of addresses learned in the forwarding table. */
401 #ifndef VXLAN_FTABLE_TIMEOUT
402 #define VXLAN_FTABLE_TIMEOUT	(20 * 60)
403 #endif
404 
405 /*
406  * Maximum timeout (in seconds) of addresses learned in the forwarding
407  * table.
408  */
409 #ifndef VXLAN_FTABLE_MAX_TIMEOUT
410 #define VXLAN_FTABLE_MAX_TIMEOUT	(60 * 60 * 24)
411 #endif
412 
413 /* Number of seconds between pruning attempts of the forwarding table. */
414 #ifndef VXLAN_FTABLE_PRUNE
415 #define VXLAN_FTABLE_PRUNE	(5 * 60)
416 #endif
417 
418 static int vxlan_ftable_prune_period = VXLAN_FTABLE_PRUNE;
419 
420 struct vxlan_control {
421 	int	(*vxlc_func)(struct vxlan_softc *, void *);
422 	int	vxlc_argsize;
423 	int	vxlc_flags;
424 #define VXLAN_CTRL_FLAG_COPYIN	0x01
425 #define VXLAN_CTRL_FLAG_COPYOUT	0x02
426 #define VXLAN_CTRL_FLAG_SUSER	0x04
427 };
428 
429 static const struct vxlan_control vxlan_control_table[] = {
430 	[VXLAN_CMD_GET_CONFIG] =
431 	    {	vxlan_ctrl_get_config, sizeof(struct ifvxlancfg),
432 		VXLAN_CTRL_FLAG_COPYOUT
433 	    },
434 
435 	[VXLAN_CMD_SET_VNI] =
436 	    {   vxlan_ctrl_set_vni, sizeof(struct ifvxlancmd),
437 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
438 	    },
439 
440 	[VXLAN_CMD_SET_LOCAL_ADDR] =
441 	    {   vxlan_ctrl_set_local_addr, sizeof(struct ifvxlancmd),
442 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
443 	    },
444 
445 	[VXLAN_CMD_SET_REMOTE_ADDR] =
446 	    {   vxlan_ctrl_set_remote_addr, sizeof(struct ifvxlancmd),
447 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
448 	    },
449 
450 	[VXLAN_CMD_SET_LOCAL_PORT] =
451 	    {   vxlan_ctrl_set_local_port, sizeof(struct ifvxlancmd),
452 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
453 	    },
454 
455 	[VXLAN_CMD_SET_REMOTE_PORT] =
456 	    {   vxlan_ctrl_set_remote_port, sizeof(struct ifvxlancmd),
457 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
458 	    },
459 
460 	[VXLAN_CMD_SET_PORT_RANGE] =
461 	    {   vxlan_ctrl_set_port_range, sizeof(struct ifvxlancmd),
462 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
463 	    },
464 
465 	[VXLAN_CMD_SET_FTABLE_TIMEOUT] =
466 	    {	vxlan_ctrl_set_ftable_timeout, sizeof(struct ifvxlancmd),
467 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
468 	    },
469 
470 	[VXLAN_CMD_SET_FTABLE_MAX] =
471 	    {	vxlan_ctrl_set_ftable_max, sizeof(struct ifvxlancmd),
472 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
473 	    },
474 
475 	[VXLAN_CMD_SET_MULTICAST_IF] =
476 	    {	vxlan_ctrl_set_multicast_if, sizeof(struct ifvxlancmd),
477 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
478 	    },
479 
480 	[VXLAN_CMD_SET_TTL] =
481 	    {	vxlan_ctrl_set_ttl, sizeof(struct ifvxlancmd),
482 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
483 	    },
484 
485 	[VXLAN_CMD_SET_LEARN] =
486 	    {	vxlan_ctrl_set_learn, sizeof(struct ifvxlancmd),
487 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
488 	    },
489 
490 	[VXLAN_CMD_FTABLE_ENTRY_ADD] =
491 	    {	vxlan_ctrl_ftable_entry_add, sizeof(struct ifvxlancmd),
492 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
493 	    },
494 
495 	[VXLAN_CMD_FTABLE_ENTRY_REM] =
496 	    {	vxlan_ctrl_ftable_entry_rem, sizeof(struct ifvxlancmd),
497 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
498 	    },
499 
500 	[VXLAN_CMD_FLUSH] =
501 	    {   vxlan_ctrl_flush, sizeof(struct ifvxlancmd),
502 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
503 	    },
504 };
505 
506 static const int vxlan_control_table_size = nitems(vxlan_control_table);
507 
508 static int
509 vxlan_ftable_addr_cmp(const uint8_t *a, const uint8_t *b)
510 {
511 	int i, d;
512 
513 	for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++)
514 		d = ((int)a[i]) - ((int)b[i]);
515 
516 	return (d);
517 }
518 
519 static void
520 vxlan_ftable_init(struct vxlan_softc *sc)
521 {
522 	int i;
523 
524 	sc->vxl_ftable = malloc(sizeof(struct vxlan_ftable_head) *
525 	    VXLAN_SC_FTABLE_SIZE, M_VXLAN, M_ZERO | M_WAITOK);
526 
527 	for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++)
528 		LIST_INIT(&sc->vxl_ftable[i]);
529 	sc->vxl_ftable_hash_key = arc4random();
530 }
531 
532 static void
533 vxlan_ftable_fini(struct vxlan_softc *sc)
534 {
535 	int i;
536 
537 	for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
538 		KASSERT(LIST_EMPTY(&sc->vxl_ftable[i]),
539 		    ("%s: vxlan %p ftable[%d] not empty", __func__, sc, i));
540 	}
541 	MPASS(sc->vxl_ftable_cnt == 0);
542 
543 	free(sc->vxl_ftable, M_VXLAN);
544 	sc->vxl_ftable = NULL;
545 }
546 
547 static void
548 vxlan_ftable_flush(struct vxlan_softc *sc, int all)
549 {
550 	struct vxlan_ftable_entry *fe, *tfe;
551 	int i;
552 
553 	for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
554 		LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) {
555 			if (all || VXLAN_FE_IS_DYNAMIC(fe))
556 				vxlan_ftable_entry_destroy(sc, fe);
557 		}
558 	}
559 }
560 
561 static void
562 vxlan_ftable_expire(struct vxlan_softc *sc)
563 {
564 	struct vxlan_ftable_entry *fe, *tfe;
565 	int i;
566 
567 	VXLAN_LOCK_WASSERT(sc);
568 
569 	for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
570 		LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) {
571 			if (VXLAN_FE_IS_DYNAMIC(fe) &&
572 			    time_uptime >= fe->vxlfe_expire)
573 				vxlan_ftable_entry_destroy(sc, fe);
574 		}
575 	}
576 }
577 
578 static int
579 vxlan_ftable_update_locked(struct vxlan_softc *sc, const struct sockaddr *sa,
580     const uint8_t *mac, struct rm_priotracker *tracker)
581 {
582 	union vxlan_sockaddr vxlsa;
583 	struct vxlan_ftable_entry *fe;
584 	int error;
585 
586 	VXLAN_LOCK_ASSERT(sc);
587 
588 again:
589 	/*
590 	 * A forwarding entry for this MAC address might already exist. If
591 	 * so, update it, otherwise create a new one. We may have to upgrade
592 	 * the lock if we have to change or create an entry.
593 	 */
594 	fe = vxlan_ftable_entry_lookup(sc, mac);
595 	if (fe != NULL) {
596 		fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout;
597 
598 		if (!VXLAN_FE_IS_DYNAMIC(fe) ||
599 		    vxlan_sockaddr_in_equal(&fe->vxlfe_raddr, sa))
600 			return (0);
601 		if (!VXLAN_LOCK_WOWNED(sc)) {
602 			VXLAN_RUNLOCK(sc, tracker);
603 			VXLAN_WLOCK(sc);
604 			sc->vxl_stats.ftable_lock_upgrade_failed++;
605 			goto again;
606 		}
607 		vxlan_sockaddr_in_copy(&fe->vxlfe_raddr, sa);
608 		return (0);
609 	}
610 
611 	if (!VXLAN_LOCK_WOWNED(sc)) {
612 		VXLAN_RUNLOCK(sc, tracker);
613 		VXLAN_WLOCK(sc);
614 		sc->vxl_stats.ftable_lock_upgrade_failed++;
615 		goto again;
616 	}
617 
618 	if (sc->vxl_ftable_cnt >= sc->vxl_ftable_max) {
619 		sc->vxl_stats.ftable_nospace++;
620 		return (ENOSPC);
621 	}
622 
623 	fe = vxlan_ftable_entry_alloc();
624 	if (fe == NULL)
625 		return (ENOMEM);
626 
627 	/*
628 	 * The source port may be randomly select by the remove host, so
629 	 * use the port of the default destination address.
630 	 */
631 	vxlan_sockaddr_copy(&vxlsa, sa);
632 	vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port;
633 
634 	vxlan_ftable_entry_init(sc, fe, mac, &vxlsa.sa,
635 	    VXLAN_FE_FLAG_DYNAMIC);
636 
637 	/* The prior lookup failed, so the insert should not. */
638 	error = vxlan_ftable_entry_insert(sc, fe);
639 	MPASS(error == 0);
640 
641 	return (0);
642 }
643 
644 static int
645 vxlan_ftable_update(struct vxlan_softc *sc, const struct sockaddr *sa,
646     const uint8_t *mac)
647 {
648 	struct rm_priotracker tracker;
649 	int error;
650 
651 	VXLAN_RLOCK(sc, &tracker);
652 	error = vxlan_ftable_update_locked(sc, sa, mac, &tracker);
653 	VXLAN_UNLOCK(sc, &tracker);
654 
655 	return (error);
656 }
657 
658 static int
659 vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS)
660 {
661 	struct rm_priotracker tracker;
662 	struct sbuf sb;
663 	struct vxlan_softc *sc;
664 	struct vxlan_ftable_entry *fe;
665 	size_t size;
666 	int i, error;
667 
668 	/*
669 	 * This is mostly intended for debugging during development. It is
670 	 * not practical to dump an entire large table this way.
671 	 */
672 
673 	sc = arg1;
674 	size = PAGE_SIZE;	/* Calculate later. */
675 
676 	sbuf_new(&sb, NULL, size, SBUF_FIXEDLEN);
677 	sbuf_putc(&sb, '\n');
678 
679 	VXLAN_RLOCK(sc, &tracker);
680 	for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
681 		LIST_FOREACH(fe, &sc->vxl_ftable[i], vxlfe_hash) {
682 			if (sbuf_error(&sb) != 0)
683 				break;
684 			vxlan_ftable_entry_dump(fe, &sb);
685 		}
686 	}
687 	VXLAN_RUNLOCK(sc, &tracker);
688 
689 	if (sbuf_len(&sb) == 1)
690 		sbuf_setpos(&sb, 0);
691 
692 	sbuf_finish(&sb);
693 	error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
694 	sbuf_delete(&sb);
695 
696 	return (error);
697 }
698 
699 static struct vxlan_ftable_entry *
700 vxlan_ftable_entry_alloc(void)
701 {
702 	struct vxlan_ftable_entry *fe;
703 
704 	fe = malloc(sizeof(*fe), M_VXLAN, M_ZERO | M_NOWAIT);
705 
706 	return (fe);
707 }
708 
709 static void
710 vxlan_ftable_entry_free(struct vxlan_ftable_entry *fe)
711 {
712 
713 	free(fe, M_VXLAN);
714 }
715 
716 static void
717 vxlan_ftable_entry_init(struct vxlan_softc *sc, struct vxlan_ftable_entry *fe,
718     const uint8_t *mac, const struct sockaddr *sa, uint32_t flags)
719 {
720 
721 	fe->vxlfe_flags = flags;
722 	fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout;
723 	memcpy(fe->vxlfe_mac, mac, ETHER_ADDR_LEN);
724 	vxlan_sockaddr_copy(&fe->vxlfe_raddr, sa);
725 }
726 
727 static void
728 vxlan_ftable_entry_destroy(struct vxlan_softc *sc,
729     struct vxlan_ftable_entry *fe)
730 {
731 
732 	sc->vxl_ftable_cnt--;
733 	LIST_REMOVE(fe, vxlfe_hash);
734 	vxlan_ftable_entry_free(fe);
735 }
736 
737 static int
738 vxlan_ftable_entry_insert(struct vxlan_softc *sc,
739     struct vxlan_ftable_entry *fe)
740 {
741 	struct vxlan_ftable_entry *lfe;
742 	uint32_t hash;
743 	int dir;
744 
745 	VXLAN_LOCK_WASSERT(sc);
746 	hash = VXLAN_SC_FTABLE_HASH(sc, fe->vxlfe_mac);
747 
748 	lfe = LIST_FIRST(&sc->vxl_ftable[hash]);
749 	if (lfe == NULL) {
750 		LIST_INSERT_HEAD(&sc->vxl_ftable[hash], fe, vxlfe_hash);
751 		goto out;
752 	}
753 
754 	do {
755 		dir = vxlan_ftable_addr_cmp(fe->vxlfe_mac, lfe->vxlfe_mac);
756 		if (dir == 0)
757 			return (EEXIST);
758 		if (dir > 0) {
759 			LIST_INSERT_BEFORE(lfe, fe, vxlfe_hash);
760 			goto out;
761 		} else if (LIST_NEXT(lfe, vxlfe_hash) == NULL) {
762 			LIST_INSERT_AFTER(lfe, fe, vxlfe_hash);
763 			goto out;
764 		} else
765 			lfe = LIST_NEXT(lfe, vxlfe_hash);
766 	} while (lfe != NULL);
767 
768 out:
769 	sc->vxl_ftable_cnt++;
770 
771 	return (0);
772 }
773 
774 static struct vxlan_ftable_entry *
775 vxlan_ftable_entry_lookup(struct vxlan_softc *sc, const uint8_t *mac)
776 {
777 	struct vxlan_ftable_entry *fe;
778 	uint32_t hash;
779 	int dir;
780 
781 	VXLAN_LOCK_ASSERT(sc);
782 	hash = VXLAN_SC_FTABLE_HASH(sc, mac);
783 
784 	LIST_FOREACH(fe, &sc->vxl_ftable[hash], vxlfe_hash) {
785 		dir = vxlan_ftable_addr_cmp(mac, fe->vxlfe_mac);
786 		if (dir == 0)
787 			return (fe);
788 		if (dir > 0)
789 			break;
790 	}
791 
792 	return (NULL);
793 }
794 
795 static void
796 vxlan_ftable_entry_dump(struct vxlan_ftable_entry *fe, struct sbuf *sb)
797 {
798 	char buf[64];
799 	const union vxlan_sockaddr *sa;
800 	const void *addr;
801 	int i, len, af, width;
802 
803 	sa = &fe->vxlfe_raddr;
804 	af = sa->sa.sa_family;
805 	len = sbuf_len(sb);
806 
807 	sbuf_printf(sb, "%c 0x%02X ", VXLAN_FE_IS_DYNAMIC(fe) ? 'D' : 'S',
808 	    fe->vxlfe_flags);
809 
810 	for (i = 0; i < ETHER_ADDR_LEN - 1; i++)
811 		sbuf_printf(sb, "%02X:", fe->vxlfe_mac[i]);
812 	sbuf_printf(sb, "%02X ", fe->vxlfe_mac[i]);
813 
814 	if (af == AF_INET) {
815 		addr = &sa->in4.sin_addr;
816 		width = INET_ADDRSTRLEN - 1;
817 	} else {
818 		addr = &sa->in6.sin6_addr;
819 		width = INET6_ADDRSTRLEN - 1;
820 	}
821 	inet_ntop(af, addr, buf, sizeof(buf));
822 	sbuf_printf(sb, "%*s ", width, buf);
823 
824 	sbuf_printf(sb, "%08jd", (intmax_t)fe->vxlfe_expire);
825 
826 	sbuf_putc(sb, '\n');
827 
828 	/* Truncate a partial line. */
829 	if (sbuf_error(sb) != 0)
830 		sbuf_setpos(sb, len);
831 }
832 
833 static struct vxlan_socket *
834 vxlan_socket_alloc(const union vxlan_sockaddr *sa)
835 {
836 	struct vxlan_socket *vso;
837 	int i;
838 
839 	vso = malloc(sizeof(*vso), M_VXLAN, M_WAITOK | M_ZERO);
840 	rm_init(&vso->vxlso_lock, "vxlansorm");
841 	refcount_init(&vso->vxlso_refcnt, 0);
842 	for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++)
843 		LIST_INIT(&vso->vxlso_vni_hash[i]);
844 	vso->vxlso_laddr = *sa;
845 
846 	return (vso);
847 }
848 
849 static void
850 vxlan_socket_destroy(struct vxlan_socket *vso)
851 {
852 	struct socket *so;
853 	struct vxlan_socket_mc_info *mc;
854 	int i;
855 
856 	for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
857 		mc = &vso->vxlso_mc[i];
858 		KASSERT(mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC,
859 		    ("%s: socket %p mc[%d] still has address",
860 		     __func__, vso, i));
861 	}
862 
863 	for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) {
864 		KASSERT(LIST_EMPTY(&vso->vxlso_vni_hash[i]),
865 		    ("%s: socket %p vni_hash[%d] not empty",
866 		     __func__, vso, i));
867 	}
868 
869 	so = vso->vxlso_sock;
870 	if (so != NULL) {
871 		vso->vxlso_sock = NULL;
872 		soclose(so);
873 	}
874 
875 	rm_destroy(&vso->vxlso_lock);
876 	free(vso, M_VXLAN);
877 }
878 
879 static void
880 vxlan_socket_release(struct vxlan_socket *vso)
881 {
882 	int destroy;
883 
884 	mtx_lock(&vxlan_list_mtx);
885 	destroy = VXLAN_SO_RELEASE(vso);
886 	if (destroy != 0)
887 		LIST_REMOVE(vso, vxlso_entry);
888 	mtx_unlock(&vxlan_list_mtx);
889 
890 	if (destroy != 0)
891 		vxlan_socket_destroy(vso);
892 }
893 
894 static struct vxlan_socket *
895 vxlan_socket_lookup(union vxlan_sockaddr *vxlsa)
896 {
897 	struct vxlan_socket *vso;
898 
899 	mtx_lock(&vxlan_list_mtx);
900 	LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry) {
901 		if (vxlan_sockaddr_cmp(&vso->vxlso_laddr, &vxlsa->sa) == 0) {
902 			VXLAN_SO_ACQUIRE(vso);
903 			break;
904 		}
905 	}
906 	mtx_unlock(&vxlan_list_mtx);
907 
908 	return (vso);
909 }
910 
911 static void
912 vxlan_socket_insert(struct vxlan_socket *vso)
913 {
914 
915 	mtx_lock(&vxlan_list_mtx);
916 	VXLAN_SO_ACQUIRE(vso);
917 	LIST_INSERT_HEAD(&vxlan_socket_list, vso, vxlso_entry);
918 	mtx_unlock(&vxlan_list_mtx);
919 }
920 
921 static int
922 vxlan_socket_init(struct vxlan_socket *vso, struct ifnet *ifp)
923 {
924 	struct thread *td;
925 	int error;
926 
927 	td = curthread;
928 
929 	error = socreate(vso->vxlso_laddr.sa.sa_family, &vso->vxlso_sock,
930 	    SOCK_DGRAM, IPPROTO_UDP, td->td_ucred, td);
931 	if (error) {
932 		if_printf(ifp, "cannot create socket: %d\n", error);
933 		return (error);
934 	}
935 
936 	error = udp_set_kernel_tunneling(vso->vxlso_sock,
937 	    vxlan_rcv_udp_packet, NULL, vso);
938 	if (error) {
939 		if_printf(ifp, "cannot set tunneling function: %d\n", error);
940 		return (error);
941 	}
942 
943 	if (vxlan_reuse_port != 0) {
944 		struct sockopt sopt;
945 		int val = 1;
946 
947 		bzero(&sopt, sizeof(sopt));
948 		sopt.sopt_dir = SOPT_SET;
949 		sopt.sopt_level = IPPROTO_IP;
950 		sopt.sopt_name = SO_REUSEPORT;
951 		sopt.sopt_val = &val;
952 		sopt.sopt_valsize = sizeof(val);
953 		error = sosetopt(vso->vxlso_sock, &sopt);
954 		if (error) {
955 			if_printf(ifp,
956 			    "cannot set REUSEADDR socket opt: %d\n", error);
957 			return (error);
958 		}
959 	}
960 
961 	return (0);
962 }
963 
964 static int
965 vxlan_socket_bind(struct vxlan_socket *vso, struct ifnet *ifp)
966 {
967 	union vxlan_sockaddr laddr;
968 	struct thread *td;
969 	int error;
970 
971 	td = curthread;
972 	laddr = vso->vxlso_laddr;
973 
974 	error = sobind(vso->vxlso_sock, &laddr.sa, td);
975 	if (error) {
976 		if (error != EADDRINUSE)
977 			if_printf(ifp, "cannot bind socket: %d\n", error);
978 		return (error);
979 	}
980 
981 	return (0);
982 }
983 
984 static int
985 vxlan_socket_create(struct ifnet *ifp, int multicast,
986     const union vxlan_sockaddr *saddr, struct vxlan_socket **vsop)
987 {
988 	union vxlan_sockaddr laddr;
989 	struct vxlan_socket *vso;
990 	int error;
991 
992 	laddr = *saddr;
993 
994 	/*
995 	 * If this socket will be multicast, then only the local port
996 	 * must be specified when binding.
997 	 */
998 	if (multicast != 0) {
999 		if (VXLAN_SOCKADDR_IS_IPV4(&laddr))
1000 			laddr.in4.sin_addr.s_addr = INADDR_ANY;
1001 #ifdef INET6
1002 		else
1003 			laddr.in6.sin6_addr = in6addr_any;
1004 #endif
1005 	}
1006 
1007 	vso = vxlan_socket_alloc(&laddr);
1008 	if (vso == NULL)
1009 		return (ENOMEM);
1010 
1011 	error = vxlan_socket_init(vso, ifp);
1012 	if (error)
1013 		goto fail;
1014 
1015 	error = vxlan_socket_bind(vso, ifp);
1016 	if (error)
1017 		goto fail;
1018 
1019 	/*
1020 	 * There is a small window between the bind completing and
1021 	 * inserting the socket, so that a concurrent create may fail.
1022 	 * Let's not worry about that for now.
1023 	 */
1024 	vxlan_socket_insert(vso);
1025 	*vsop = vso;
1026 
1027 	return (0);
1028 
1029 fail:
1030 	vxlan_socket_destroy(vso);
1031 
1032 	return (error);
1033 }
1034 
1035 static void
1036 vxlan_socket_ifdetach(struct vxlan_socket *vso, struct ifnet *ifp,
1037     struct vxlan_softc_head *list)
1038 {
1039 	struct rm_priotracker tracker;
1040 	struct vxlan_softc *sc;
1041 	int i;
1042 
1043 	VXLAN_SO_RLOCK(vso, &tracker);
1044 	for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) {
1045 		LIST_FOREACH(sc, &vso->vxlso_vni_hash[i], vxl_entry)
1046 			vxlan_ifdetach(sc, ifp, list);
1047 	}
1048 	VXLAN_SO_RUNLOCK(vso, &tracker);
1049 }
1050 
1051 static struct vxlan_socket *
1052 vxlan_socket_mc_lookup(const union vxlan_sockaddr *vxlsa)
1053 {
1054 	struct vxlan_socket *vso;
1055 	union vxlan_sockaddr laddr;
1056 
1057 	laddr = *vxlsa;
1058 
1059 	if (VXLAN_SOCKADDR_IS_IPV4(&laddr))
1060 		laddr.in4.sin_addr.s_addr = INADDR_ANY;
1061 #ifdef INET6
1062 	else
1063 		laddr.in6.sin6_addr = in6addr_any;
1064 #endif
1065 
1066 	vso = vxlan_socket_lookup(&laddr);
1067 
1068 	return (vso);
1069 }
1070 
1071 static int
1072 vxlan_sockaddr_mc_info_match(const struct vxlan_socket_mc_info *mc,
1073     const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
1074     int ifidx)
1075 {
1076 
1077 	if (!vxlan_sockaddr_in_any(local) &&
1078 	    !vxlan_sockaddr_in_equal(&mc->vxlsomc_saddr, &local->sa))
1079 		return (0);
1080 	if (!vxlan_sockaddr_in_equal(&mc->vxlsomc_gaddr, &group->sa))
1081 		return (0);
1082 	if (ifidx != 0 && ifidx != mc->vxlsomc_ifidx)
1083 		return (0);
1084 
1085 	return (1);
1086 }
1087 
1088 static int
1089 vxlan_socket_mc_join_group(struct vxlan_socket *vso,
1090     const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
1091     int *ifidx, union vxlan_sockaddr *source)
1092 {
1093 	struct sockopt sopt;
1094 	int error;
1095 
1096 	*source = *local;
1097 
1098 	if (VXLAN_SOCKADDR_IS_IPV4(group)) {
1099 		struct ip_mreq mreq;
1100 
1101 		mreq.imr_multiaddr = group->in4.sin_addr;
1102 		mreq.imr_interface = local->in4.sin_addr;
1103 
1104 		bzero(&sopt, sizeof(sopt));
1105 		sopt.sopt_dir = SOPT_SET;
1106 		sopt.sopt_level = IPPROTO_IP;
1107 		sopt.sopt_name = IP_ADD_MEMBERSHIP;
1108 		sopt.sopt_val = &mreq;
1109 		sopt.sopt_valsize = sizeof(mreq);
1110 		error = sosetopt(vso->vxlso_sock, &sopt);
1111 		if (error)
1112 			return (error);
1113 
1114 		/*
1115 		 * BMV: Ideally, there would be a formal way for us to get
1116 		 * the local interface that was selected based on the
1117 		 * imr_interface address. We could then update *ifidx so
1118 		 * vxlan_sockaddr_mc_info_match() would return a match for
1119 		 * later creates that explicitly set the multicast interface.
1120 		 *
1121 		 * If we really need to, we can of course look in the INP's
1122 		 * membership list:
1123 		 *     sotoinpcb(vso->vxlso_sock)->inp_moptions->
1124 		 *         imo_membership[]->inm_ifp
1125 		 * similarly to imo_match_group().
1126 		 */
1127 		source->in4.sin_addr = local->in4.sin_addr;
1128 
1129 	} else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
1130 		struct ipv6_mreq mreq;
1131 
1132 		mreq.ipv6mr_multiaddr = group->in6.sin6_addr;
1133 		mreq.ipv6mr_interface = *ifidx;
1134 
1135 		bzero(&sopt, sizeof(sopt));
1136 		sopt.sopt_dir = SOPT_SET;
1137 		sopt.sopt_level = IPPROTO_IPV6;
1138 		sopt.sopt_name = IPV6_JOIN_GROUP;
1139 		sopt.sopt_val = &mreq;
1140 		sopt.sopt_valsize = sizeof(mreq);
1141 		error = sosetopt(vso->vxlso_sock, &sopt);
1142 		if (error)
1143 			return (error);
1144 
1145 		/*
1146 		 * BMV: As with IPv4, we would really like to know what
1147 		 * interface in6p_lookup_mcast_ifp() selected.
1148 		 */
1149 	} else
1150 		error = EAFNOSUPPORT;
1151 
1152 	return (error);
1153 }
1154 
1155 static int
1156 vxlan_socket_mc_leave_group(struct vxlan_socket *vso,
1157     const union vxlan_sockaddr *group, const union vxlan_sockaddr *source,
1158     int ifidx)
1159 {
1160 	struct sockopt sopt;
1161 	int error;
1162 
1163 	bzero(&sopt, sizeof(sopt));
1164 	sopt.sopt_dir = SOPT_SET;
1165 
1166 	if (VXLAN_SOCKADDR_IS_IPV4(group)) {
1167 		struct ip_mreq mreq;
1168 
1169 		mreq.imr_multiaddr = group->in4.sin_addr;
1170 		mreq.imr_interface = source->in4.sin_addr;
1171 
1172 		sopt.sopt_level = IPPROTO_IP;
1173 		sopt.sopt_name = IP_DROP_MEMBERSHIP;
1174 		sopt.sopt_val = &mreq;
1175 		sopt.sopt_valsize = sizeof(mreq);
1176 		error = sosetopt(vso->vxlso_sock, &sopt);
1177 
1178 	} else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
1179 		struct ipv6_mreq mreq;
1180 
1181 		mreq.ipv6mr_multiaddr = group->in6.sin6_addr;
1182 		mreq.ipv6mr_interface = ifidx;
1183 
1184 		sopt.sopt_level = IPPROTO_IPV6;
1185 		sopt.sopt_name = IPV6_LEAVE_GROUP;
1186 		sopt.sopt_val = &mreq;
1187 		sopt.sopt_valsize = sizeof(mreq);
1188 		error = sosetopt(vso->vxlso_sock, &sopt);
1189 
1190 	} else
1191 		error = EAFNOSUPPORT;
1192 
1193 	return (error);
1194 }
1195 
1196 static int
1197 vxlan_socket_mc_add_group(struct vxlan_socket *vso,
1198     const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
1199     int ifidx, int *idx)
1200 {
1201 	union vxlan_sockaddr source;
1202 	struct vxlan_socket_mc_info *mc;
1203 	int i, empty, error;
1204 
1205 	/*
1206 	 * Within a socket, the same multicast group may be used by multiple
1207 	 * interfaces, each with a different network identifier. But a socket
1208 	 * may only join a multicast group once, so keep track of the users
1209 	 * here.
1210 	 */
1211 
1212 	VXLAN_SO_WLOCK(vso);
1213 	for (empty = 0, i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
1214 		mc = &vso->vxlso_mc[i];
1215 
1216 		if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) {
1217 			empty++;
1218 			continue;
1219 		}
1220 
1221 		if (vxlan_sockaddr_mc_info_match(mc, group, local, ifidx))
1222 			goto out;
1223 	}
1224 	VXLAN_SO_WUNLOCK(vso);
1225 
1226 	if (empty == 0)
1227 		return (ENOSPC);
1228 
1229 	error = vxlan_socket_mc_join_group(vso, group, local, &ifidx, &source);
1230 	if (error)
1231 		return (error);
1232 
1233 	VXLAN_SO_WLOCK(vso);
1234 	for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
1235 		mc = &vso->vxlso_mc[i];
1236 
1237 		if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) {
1238 			vxlan_sockaddr_copy(&mc->vxlsomc_gaddr, &group->sa);
1239 			vxlan_sockaddr_copy(&mc->vxlsomc_saddr, &source.sa);
1240 			mc->vxlsomc_ifidx = ifidx;
1241 			goto out;
1242 		}
1243 	}
1244 	VXLAN_SO_WUNLOCK(vso);
1245 
1246 	error = vxlan_socket_mc_leave_group(vso, group, &source, ifidx);
1247 	MPASS(error == 0);
1248 
1249 	return (ENOSPC);
1250 
1251 out:
1252 	mc->vxlsomc_users++;
1253 	VXLAN_SO_WUNLOCK(vso);
1254 
1255 	*idx = i;
1256 
1257 	return (0);
1258 }
1259 
1260 static void
1261 vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *vso, int idx)
1262 {
1263 	union vxlan_sockaddr group, source;
1264 	struct vxlan_socket_mc_info *mc;
1265 	int ifidx, leave;
1266 
1267 	KASSERT(idx >= 0 && idx < VXLAN_SO_MC_MAX_GROUPS,
1268 	    ("%s: vso %p idx %d out of bounds", __func__, vso, idx));
1269 
1270 	leave = 0;
1271 	mc = &vso->vxlso_mc[idx];
1272 
1273 	VXLAN_SO_WLOCK(vso);
1274 	mc->vxlsomc_users--;
1275 	if (mc->vxlsomc_users == 0) {
1276 		group = mc->vxlsomc_gaddr;
1277 		source = mc->vxlsomc_saddr;
1278 		ifidx = mc->vxlsomc_ifidx;
1279 		bzero(mc, sizeof(*mc));
1280 		leave = 1;
1281 	}
1282 	VXLAN_SO_WUNLOCK(vso);
1283 
1284 	if (leave != 0) {
1285 		/*
1286 		 * Our socket's membership in this group may have already
1287 		 * been removed if we joined through an interface that's
1288 		 * been detached.
1289 		 */
1290 		vxlan_socket_mc_leave_group(vso, &group, &source, ifidx);
1291 	}
1292 }
1293 
1294 static struct vxlan_softc *
1295 vxlan_socket_lookup_softc_locked(struct vxlan_socket *vso, uint32_t vni)
1296 {
1297 	struct vxlan_softc *sc;
1298 	uint32_t hash;
1299 
1300 	VXLAN_SO_LOCK_ASSERT(vso);
1301 	hash = VXLAN_SO_VNI_HASH(vni);
1302 
1303 	LIST_FOREACH(sc, &vso->vxlso_vni_hash[hash], vxl_entry) {
1304 		if (sc->vxl_vni == vni) {
1305 			VXLAN_ACQUIRE(sc);
1306 			break;
1307 		}
1308 	}
1309 
1310 	return (sc);
1311 }
1312 
1313 static struct vxlan_softc *
1314 vxlan_socket_lookup_softc(struct vxlan_socket *vso, uint32_t vni)
1315 {
1316 	struct rm_priotracker tracker;
1317 	struct vxlan_softc *sc;
1318 
1319 	VXLAN_SO_RLOCK(vso, &tracker);
1320 	sc = vxlan_socket_lookup_softc_locked(vso, vni);
1321 	VXLAN_SO_RUNLOCK(vso, &tracker);
1322 
1323 	return (sc);
1324 }
1325 
1326 static int
1327 vxlan_socket_insert_softc(struct vxlan_socket *vso, struct vxlan_softc *sc)
1328 {
1329 	struct vxlan_softc *tsc;
1330 	uint32_t vni, hash;
1331 
1332 	vni = sc->vxl_vni;
1333 	hash = VXLAN_SO_VNI_HASH(vni);
1334 
1335 	VXLAN_SO_WLOCK(vso);
1336 	tsc = vxlan_socket_lookup_softc_locked(vso, vni);
1337 	if (tsc != NULL) {
1338 		VXLAN_SO_WUNLOCK(vso);
1339 		vxlan_release(tsc);
1340 		return (EEXIST);
1341 	}
1342 
1343 	VXLAN_ACQUIRE(sc);
1344 	LIST_INSERT_HEAD(&vso->vxlso_vni_hash[hash], sc, vxl_entry);
1345 	VXLAN_SO_WUNLOCK(vso);
1346 
1347 	return (0);
1348 }
1349 
1350 static void
1351 vxlan_socket_remove_softc(struct vxlan_socket *vso, struct vxlan_softc *sc)
1352 {
1353 
1354 	VXLAN_SO_WLOCK(vso);
1355 	LIST_REMOVE(sc, vxl_entry);
1356 	VXLAN_SO_WUNLOCK(vso);
1357 
1358 	vxlan_release(sc);
1359 }
1360 
1361 static struct ifnet *
1362 vxlan_multicast_if_ref(struct vxlan_softc *sc, int ipv4)
1363 {
1364 	struct ifnet *ifp;
1365 
1366 	VXLAN_LOCK_ASSERT(sc);
1367 
1368 	if (ipv4 && sc->vxl_im4o != NULL)
1369 		ifp = sc->vxl_im4o->imo_multicast_ifp;
1370 	else if (!ipv4 && sc->vxl_im6o != NULL)
1371 		ifp = sc->vxl_im6o->im6o_multicast_ifp;
1372 	else
1373 		ifp = NULL;
1374 
1375 	if (ifp != NULL)
1376 		if_ref(ifp);
1377 
1378 	return (ifp);
1379 }
1380 
1381 static void
1382 vxlan_free_multicast(struct vxlan_softc *sc)
1383 {
1384 
1385 	if (sc->vxl_mc_ifp != NULL) {
1386 		if_rele(sc->vxl_mc_ifp);
1387 		sc->vxl_mc_ifp = NULL;
1388 		sc->vxl_mc_ifindex = 0;
1389 	}
1390 
1391 	if (sc->vxl_im4o != NULL) {
1392 		free(sc->vxl_im4o, M_VXLAN);
1393 		sc->vxl_im4o = NULL;
1394 	}
1395 
1396 	if (sc->vxl_im6o != NULL) {
1397 		free(sc->vxl_im6o, M_VXLAN);
1398 		sc->vxl_im6o = NULL;
1399 	}
1400 }
1401 
1402 static int
1403 vxlan_setup_multicast_interface(struct vxlan_softc *sc)
1404 {
1405 	struct ifnet *ifp;
1406 
1407 	ifp = ifunit_ref(sc->vxl_mc_ifname);
1408 	if (ifp == NULL) {
1409 		if_printf(sc->vxl_ifp, "multicast interfaces %s does "
1410 		    "not exist\n", sc->vxl_mc_ifname);
1411 		return (ENOENT);
1412 	}
1413 
1414 	if ((ifp->if_flags & IFF_MULTICAST) == 0) {
1415 		if_printf(sc->vxl_ifp, "interface %s does not support "
1416 		     "multicast\n", sc->vxl_mc_ifname);
1417 		if_rele(ifp);
1418 		return (ENOTSUP);
1419 	}
1420 
1421 	sc->vxl_mc_ifp = ifp;
1422 	sc->vxl_mc_ifindex = ifp->if_index;
1423 
1424 	return (0);
1425 }
1426 
1427 static int
1428 vxlan_setup_multicast(struct vxlan_softc *sc)
1429 {
1430 	const union vxlan_sockaddr *group;
1431 	int error;
1432 
1433 	group = &sc->vxl_dst_addr;
1434 	error = 0;
1435 
1436 	if (sc->vxl_mc_ifname[0] != '\0') {
1437 		error = vxlan_setup_multicast_interface(sc);
1438 		if (error)
1439 			return (error);
1440 	}
1441 
1442 	/*
1443 	 * Initialize an multicast options structure that is sufficiently
1444 	 * populated for use in the respective IP output routine. This
1445 	 * structure is typically stored in the socket, but our sockets
1446 	 * may be shared among multiple interfaces.
1447 	 */
1448 	if (VXLAN_SOCKADDR_IS_IPV4(group)) {
1449 		sc->vxl_im4o = malloc(sizeof(struct ip_moptions), M_VXLAN,
1450 		    M_ZERO | M_WAITOK);
1451 		sc->vxl_im4o->imo_multicast_ifp = sc->vxl_mc_ifp;
1452 		sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl;
1453 		sc->vxl_im4o->imo_multicast_vif = -1;
1454 	} else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
1455 		sc->vxl_im6o = malloc(sizeof(struct ip6_moptions), M_VXLAN,
1456 		    M_ZERO | M_WAITOK);
1457 		sc->vxl_im6o->im6o_multicast_ifp = sc->vxl_mc_ifp;
1458 		sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl;
1459 	}
1460 
1461 	return (error);
1462 }
1463 
1464 static int
1465 vxlan_setup_socket(struct vxlan_softc *sc)
1466 {
1467 	struct vxlan_socket *vso;
1468 	struct ifnet *ifp;
1469 	union vxlan_sockaddr *saddr, *daddr;
1470 	int multicast, error;
1471 
1472 	vso = NULL;
1473 	ifp = sc->vxl_ifp;
1474 	saddr = &sc->vxl_src_addr;
1475 	daddr = &sc->vxl_dst_addr;
1476 
1477 	multicast = vxlan_sockaddr_in_multicast(daddr);
1478 	MPASS(multicast != -1);
1479 	sc->vxl_vso_mc_index = -1;
1480 
1481 	/*
1482 	 * Try to create the socket. If that fails, attempt to use an
1483 	 * existing socket.
1484 	 */
1485 	error = vxlan_socket_create(ifp, multicast, saddr, &vso);
1486 	if (error) {
1487 		if (multicast != 0)
1488 			vso = vxlan_socket_mc_lookup(saddr);
1489 		else
1490 			vso = vxlan_socket_lookup(saddr);
1491 
1492 		if (vso == NULL) {
1493 			if_printf(ifp, "cannot create socket (error: %d), "
1494 			    "and no existing socket found\n", error);
1495 			goto out;
1496 		}
1497 	}
1498 
1499 	if (multicast != 0) {
1500 		error = vxlan_setup_multicast(sc);
1501 		if (error)
1502 			goto out;
1503 
1504 		error = vxlan_socket_mc_add_group(vso, daddr, saddr,
1505 		    sc->vxl_mc_ifindex, &sc->vxl_vso_mc_index);
1506 		if (error)
1507 			goto out;
1508 	}
1509 
1510 	sc->vxl_sock = vso;
1511 	error = vxlan_socket_insert_softc(vso, sc);
1512 	if (error) {
1513 		sc->vxl_sock = NULL;
1514 		if_printf(ifp, "network identifier %d already exists in "
1515 		    "this socket\n", sc->vxl_vni);
1516 		goto out;
1517 	}
1518 
1519 	return (0);
1520 
1521 out:
1522 	if (vso != NULL) {
1523 		if (sc->vxl_vso_mc_index != -1) {
1524 			vxlan_socket_mc_release_group_by_idx(vso,
1525 			    sc->vxl_vso_mc_index);
1526 			sc->vxl_vso_mc_index = -1;
1527 		}
1528 		if (multicast != 0)
1529 			vxlan_free_multicast(sc);
1530 		vxlan_socket_release(vso);
1531 	}
1532 
1533 	return (error);
1534 }
1535 
1536 static void
1537 vxlan_setup_interface(struct vxlan_softc *sc)
1538 {
1539 	struct ifnet *ifp;
1540 
1541 	ifp = sc->vxl_ifp;
1542 	ifp->if_hdrlen = ETHER_HDR_LEN + sizeof(struct vxlanudphdr);
1543 
1544 	if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr) != 0)
1545 		ifp->if_hdrlen += sizeof(struct ip);
1546 	else if (VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_dst_addr) != 0)
1547 		ifp->if_hdrlen += sizeof(struct ip6_hdr);
1548 }
1549 
1550 static int
1551 vxlan_valid_init_config(struct vxlan_softc *sc)
1552 {
1553 	const char *reason;
1554 
1555 	if (vxlan_check_vni(sc->vxl_vni) != 0) {
1556 		reason = "invalid virtual network identifier specified";
1557 		goto fail;
1558 	}
1559 
1560 	if (vxlan_sockaddr_supported(&sc->vxl_src_addr, 1) == 0) {
1561 		reason = "source address type is not supported";
1562 		goto fail;
1563 	}
1564 
1565 	if (vxlan_sockaddr_supported(&sc->vxl_dst_addr, 0) == 0) {
1566 		reason = "destination address type is not supported";
1567 		goto fail;
1568 	}
1569 
1570 	if (vxlan_sockaddr_in_any(&sc->vxl_dst_addr) != 0) {
1571 		reason = "no valid destination address specified";
1572 		goto fail;
1573 	}
1574 
1575 	if (vxlan_sockaddr_in_multicast(&sc->vxl_dst_addr) == 0 &&
1576 	    sc->vxl_mc_ifname[0] != '\0') {
1577 		reason = "can only specify interface with a group address";
1578 		goto fail;
1579 	}
1580 
1581 	if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) {
1582 		if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_src_addr) ^
1583 		    VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr)) {
1584 			reason = "source and destination address must both "
1585 			    "be either IPv4 or IPv6";
1586 			goto fail;
1587 		}
1588 	}
1589 
1590 	if (sc->vxl_src_addr.in4.sin_port == 0) {
1591 		reason = "local port not specified";
1592 		goto fail;
1593 	}
1594 
1595 	if (sc->vxl_dst_addr.in4.sin_port == 0) {
1596 		reason = "remote port not specified";
1597 		goto fail;
1598 	}
1599 
1600 	return (0);
1601 
1602 fail:
1603 	if_printf(sc->vxl_ifp, "cannot initialize interface: %s\n", reason);
1604 	return (EINVAL);
1605 }
1606 
1607 static void
1608 vxlan_init_wait(struct vxlan_softc *sc)
1609 {
1610 
1611 	VXLAN_LOCK_WASSERT(sc);
1612 	while (sc->vxl_flags & VXLAN_FLAG_INIT)
1613 		rm_sleep(sc, &sc->vxl_lock, 0, "vxlint", hz);
1614 }
1615 
1616 static void
1617 vxlan_init_complete(struct vxlan_softc *sc)
1618 {
1619 
1620 	VXLAN_WLOCK(sc);
1621 	sc->vxl_flags &= ~VXLAN_FLAG_INIT;
1622 	wakeup(sc);
1623 	VXLAN_WUNLOCK(sc);
1624 }
1625 
1626 static void
1627 vxlan_init(void *xsc)
1628 {
1629 	static const uint8_t empty_mac[ETHER_ADDR_LEN];
1630 	struct vxlan_softc *sc;
1631 	struct ifnet *ifp;
1632 
1633 	sc = xsc;
1634 	ifp = sc->vxl_ifp;
1635 
1636 	VXLAN_WLOCK(sc);
1637 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1638 		VXLAN_WUNLOCK(sc);
1639 		return;
1640 	}
1641 	sc->vxl_flags |= VXLAN_FLAG_INIT;
1642 	VXLAN_WUNLOCK(sc);
1643 
1644 	if (vxlan_valid_init_config(sc) != 0)
1645 		goto out;
1646 
1647 	vxlan_setup_interface(sc);
1648 
1649 	if (vxlan_setup_socket(sc) != 0)
1650 		goto out;
1651 
1652 	/* Initialize the default forwarding entry. */
1653 	vxlan_ftable_entry_init(sc, &sc->vxl_default_fe, empty_mac,
1654 	    &sc->vxl_dst_addr.sa, VXLAN_FE_FLAG_STATIC);
1655 
1656 	VXLAN_WLOCK(sc);
1657 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1658 	callout_reset(&sc->vxl_callout, vxlan_ftable_prune_period * hz,
1659 	    vxlan_timer, sc);
1660 	VXLAN_WUNLOCK(sc);
1661 
1662 	if_link_state_change(ifp, LINK_STATE_UP);
1663 out:
1664 	vxlan_init_complete(sc);
1665 }
1666 
1667 static void
1668 vxlan_release(struct vxlan_softc *sc)
1669 {
1670 
1671 	/*
1672 	 * The softc may be destroyed as soon as we release our reference,
1673 	 * so we cannot serialize the wakeup with the softc lock. We use a
1674 	 * timeout in our sleeps so a missed wakeup is unfortunate but not
1675 	 * fatal.
1676 	 */
1677 	if (VXLAN_RELEASE(sc) != 0)
1678 		wakeup(sc);
1679 }
1680 
1681 static void
1682 vxlan_teardown_wait(struct vxlan_softc *sc)
1683 {
1684 
1685 	VXLAN_LOCK_WASSERT(sc);
1686 	while (sc->vxl_flags & VXLAN_FLAG_TEARDOWN)
1687 		rm_sleep(sc, &sc->vxl_lock, 0, "vxltrn", hz);
1688 }
1689 
1690 static void
1691 vxlan_teardown_complete(struct vxlan_softc *sc)
1692 {
1693 
1694 	VXLAN_WLOCK(sc);
1695 	sc->vxl_flags &= ~VXLAN_FLAG_TEARDOWN;
1696 	wakeup(sc);
1697 	VXLAN_WUNLOCK(sc);
1698 }
1699 
1700 static void
1701 vxlan_teardown_locked(struct vxlan_softc *sc)
1702 {
1703 	struct ifnet *ifp;
1704 	struct vxlan_socket *vso;
1705 
1706 	ifp = sc->vxl_ifp;
1707 
1708 	VXLAN_LOCK_WASSERT(sc);
1709 	MPASS(sc->vxl_flags & VXLAN_FLAG_TEARDOWN);
1710 
1711 	ifp->if_flags &= ~IFF_UP;
1712 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1713 	callout_stop(&sc->vxl_callout);
1714 	vso = sc->vxl_sock;
1715 	sc->vxl_sock = NULL;
1716 
1717 	VXLAN_WUNLOCK(sc);
1718 	if_link_state_change(ifp, LINK_STATE_DOWN);
1719 
1720 	if (vso != NULL) {
1721 		vxlan_socket_remove_softc(vso, sc);
1722 
1723 		if (sc->vxl_vso_mc_index != -1) {
1724 			vxlan_socket_mc_release_group_by_idx(vso,
1725 			    sc->vxl_vso_mc_index);
1726 			sc->vxl_vso_mc_index = -1;
1727 		}
1728 	}
1729 
1730 	VXLAN_WLOCK(sc);
1731 	while (sc->vxl_refcnt != 0)
1732 		rm_sleep(sc, &sc->vxl_lock, 0, "vxldrn", hz);
1733 	VXLAN_WUNLOCK(sc);
1734 
1735 	callout_drain(&sc->vxl_callout);
1736 
1737 	vxlan_free_multicast(sc);
1738 	if (vso != NULL)
1739 		vxlan_socket_release(vso);
1740 
1741 	vxlan_teardown_complete(sc);
1742 }
1743 
1744 static void
1745 vxlan_teardown(struct vxlan_softc *sc)
1746 {
1747 
1748 	VXLAN_WLOCK(sc);
1749 	if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) {
1750 		vxlan_teardown_wait(sc);
1751 		VXLAN_WUNLOCK(sc);
1752 		return;
1753 	}
1754 
1755 	sc->vxl_flags |= VXLAN_FLAG_TEARDOWN;
1756 	vxlan_teardown_locked(sc);
1757 }
1758 
1759 static void
1760 vxlan_ifdetach(struct vxlan_softc *sc, struct ifnet *ifp,
1761     struct vxlan_softc_head *list)
1762 {
1763 
1764 	VXLAN_WLOCK(sc);
1765 
1766 	if (sc->vxl_mc_ifp != ifp)
1767 		goto out;
1768 	if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN)
1769 		goto out;
1770 
1771 	sc->vxl_flags |= VXLAN_FLAG_TEARDOWN;
1772 	LIST_INSERT_HEAD(list, sc, vxl_ifdetach_list);
1773 
1774 out:
1775 	VXLAN_WUNLOCK(sc);
1776 }
1777 
1778 static void
1779 vxlan_timer(void *xsc)
1780 {
1781 	struct vxlan_softc *sc;
1782 
1783 	sc = xsc;
1784 	VXLAN_LOCK_WASSERT(sc);
1785 
1786 	vxlan_ftable_expire(sc);
1787 	callout_schedule(&sc->vxl_callout, vxlan_ftable_prune_period * hz);
1788 }
1789 
1790 static int
1791 vxlan_ioctl_ifflags(struct vxlan_softc *sc)
1792 {
1793 	struct ifnet *ifp;
1794 
1795 	ifp = sc->vxl_ifp;
1796 
1797 	if (ifp->if_flags & IFF_UP) {
1798 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1799 			vxlan_init(sc);
1800 	} else {
1801 		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1802 			vxlan_teardown(sc);
1803 	}
1804 
1805 	return (0);
1806 }
1807 
1808 static int
1809 vxlan_ctrl_get_config(struct vxlan_softc *sc, void *arg)
1810 {
1811 	struct rm_priotracker tracker;
1812 	struct ifvxlancfg *cfg;
1813 
1814 	cfg = arg;
1815 	bzero(cfg, sizeof(*cfg));
1816 
1817 	VXLAN_RLOCK(sc, &tracker);
1818 	cfg->vxlc_vni = sc->vxl_vni;
1819 	memcpy(&cfg->vxlc_local_sa, &sc->vxl_src_addr,
1820 	    sizeof(union vxlan_sockaddr));
1821 	memcpy(&cfg->vxlc_remote_sa, &sc->vxl_dst_addr,
1822 	    sizeof(union vxlan_sockaddr));
1823 	cfg->vxlc_mc_ifindex = sc->vxl_mc_ifindex;
1824 	cfg->vxlc_ftable_cnt = sc->vxl_ftable_cnt;
1825 	cfg->vxlc_ftable_max = sc->vxl_ftable_max;
1826 	cfg->vxlc_ftable_timeout = sc->vxl_ftable_timeout;
1827 	cfg->vxlc_port_min = sc->vxl_min_port;
1828 	cfg->vxlc_port_max = sc->vxl_max_port;
1829 	cfg->vxlc_learn = (sc->vxl_flags & VXLAN_FLAG_LEARN) != 0;
1830 	cfg->vxlc_ttl = sc->vxl_ttl;
1831 	VXLAN_RUNLOCK(sc, &tracker);
1832 
1833 	return (0);
1834 }
1835 
1836 static int
1837 vxlan_ctrl_set_vni(struct vxlan_softc *sc, void *arg)
1838 {
1839 	struct ifvxlancmd *cmd;
1840 	int error;
1841 
1842 	cmd = arg;
1843 
1844 	if (vxlan_check_vni(cmd->vxlcmd_vni) != 0)
1845 		return (EINVAL);
1846 
1847 	VXLAN_WLOCK(sc);
1848 	if (vxlan_can_change_config(sc)) {
1849 		sc->vxl_vni = cmd->vxlcmd_vni;
1850 		error = 0;
1851 	} else
1852 		error = EBUSY;
1853 	VXLAN_WUNLOCK(sc);
1854 
1855 	return (error);
1856 }
1857 
1858 static int
1859 vxlan_ctrl_set_local_addr(struct vxlan_softc *sc, void *arg)
1860 {
1861 	struct ifvxlancmd *cmd;
1862 	union vxlan_sockaddr *vxlsa;
1863 	int error;
1864 
1865 	cmd = arg;
1866 	vxlsa = &cmd->vxlcmd_sa;
1867 
1868 	if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa))
1869 		return (EINVAL);
1870 	if (vxlan_sockaddr_in_multicast(vxlsa) != 0)
1871 		return (EINVAL);
1872 
1873 	VXLAN_WLOCK(sc);
1874 	if (vxlan_can_change_config(sc)) {
1875 		vxlan_sockaddr_in_copy(&sc->vxl_src_addr, &vxlsa->sa);
1876 		error = 0;
1877 	} else
1878 		error = EBUSY;
1879 	VXLAN_WUNLOCK(sc);
1880 
1881 	return (error);
1882 }
1883 
1884 static int
1885 vxlan_ctrl_set_remote_addr(struct vxlan_softc *sc, void *arg)
1886 {
1887 	struct ifvxlancmd *cmd;
1888 	union vxlan_sockaddr *vxlsa;
1889 	int error;
1890 
1891 	cmd = arg;
1892 	vxlsa = &cmd->vxlcmd_sa;
1893 
1894 	if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa))
1895 		return (EINVAL);
1896 
1897 	VXLAN_WLOCK(sc);
1898 	if (vxlan_can_change_config(sc)) {
1899 		vxlan_sockaddr_in_copy(&sc->vxl_dst_addr, &vxlsa->sa);
1900 		error = 0;
1901 	} else
1902 		error = EBUSY;
1903 	VXLAN_WUNLOCK(sc);
1904 
1905 	return (error);
1906 }
1907 
1908 static int
1909 vxlan_ctrl_set_local_port(struct vxlan_softc *sc, void *arg)
1910 {
1911 	struct ifvxlancmd *cmd;
1912 	int error;
1913 
1914 	cmd = arg;
1915 
1916 	if (cmd->vxlcmd_port == 0)
1917 		return (EINVAL);
1918 
1919 	VXLAN_WLOCK(sc);
1920 	if (vxlan_can_change_config(sc)) {
1921 		sc->vxl_src_addr.in4.sin_port = htons(cmd->vxlcmd_port);
1922 		error = 0;
1923 	} else
1924 		error = EBUSY;
1925 	VXLAN_WUNLOCK(sc);
1926 
1927 	return (error);
1928 }
1929 
1930 static int
1931 vxlan_ctrl_set_remote_port(struct vxlan_softc *sc, void *arg)
1932 {
1933 	struct ifvxlancmd *cmd;
1934 	int error;
1935 
1936 	cmd = arg;
1937 
1938 	if (cmd->vxlcmd_port == 0)
1939 		return (EINVAL);
1940 
1941 	VXLAN_WLOCK(sc);
1942 	if (vxlan_can_change_config(sc)) {
1943 		sc->vxl_dst_addr.in4.sin_port = htons(cmd->vxlcmd_port);
1944 		error = 0;
1945 	} else
1946 		error = EBUSY;
1947 	VXLAN_WUNLOCK(sc);
1948 
1949 	return (error);
1950 }
1951 
1952 static int
1953 vxlan_ctrl_set_port_range(struct vxlan_softc *sc, void *arg)
1954 {
1955 	struct ifvxlancmd *cmd;
1956 	uint16_t min, max;
1957 	int error;
1958 
1959 	cmd = arg;
1960 	min = cmd->vxlcmd_port_min;
1961 	max = cmd->vxlcmd_port_max;
1962 
1963 	if (max < min)
1964 		return (EINVAL);
1965 
1966 	VXLAN_WLOCK(sc);
1967 	if (vxlan_can_change_config(sc)) {
1968 		sc->vxl_min_port = min;
1969 		sc->vxl_max_port = max;
1970 		error = 0;
1971 	} else
1972 		error = EBUSY;
1973 	VXLAN_WUNLOCK(sc);
1974 
1975 	return (error);
1976 }
1977 
1978 static int
1979 vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *sc, void *arg)
1980 {
1981 	struct ifvxlancmd *cmd;
1982 	int error;
1983 
1984 	cmd = arg;
1985 
1986 	VXLAN_WLOCK(sc);
1987 	if (vxlan_check_ftable_timeout(cmd->vxlcmd_ftable_timeout) == 0) {
1988 		sc->vxl_ftable_timeout = cmd->vxlcmd_ftable_timeout;
1989 		error = 0;
1990 	} else
1991 		error = EINVAL;
1992 	VXLAN_WUNLOCK(sc);
1993 
1994 	return (error);
1995 }
1996 
1997 static int
1998 vxlan_ctrl_set_ftable_max(struct vxlan_softc *sc, void *arg)
1999 {
2000 	struct ifvxlancmd *cmd;
2001 	int error;
2002 
2003 	cmd = arg;
2004 
2005 	VXLAN_WLOCK(sc);
2006 	if (vxlan_check_ftable_max(cmd->vxlcmd_ftable_max) == 0) {
2007 		sc->vxl_ftable_max = cmd->vxlcmd_ftable_max;
2008 		error = 0;
2009 	} else
2010 		error = EINVAL;
2011 	VXLAN_WUNLOCK(sc);
2012 
2013 	return (error);
2014 }
2015 
2016 static int
2017 vxlan_ctrl_set_multicast_if(struct vxlan_softc * sc, void *arg)
2018 {
2019 	struct ifvxlancmd *cmd;
2020 	int error;
2021 
2022 	cmd = arg;
2023 
2024 	VXLAN_WLOCK(sc);
2025 	if (vxlan_can_change_config(sc)) {
2026 		strlcpy(sc->vxl_mc_ifname, cmd->vxlcmd_ifname, IFNAMSIZ);
2027 		error = 0;
2028 	} else
2029 		error = EBUSY;
2030 	VXLAN_WUNLOCK(sc);
2031 
2032 	return (error);
2033 }
2034 
2035 static int
2036 vxlan_ctrl_set_ttl(struct vxlan_softc *sc, void *arg)
2037 {
2038 	struct ifvxlancmd *cmd;
2039 	int error;
2040 
2041 	cmd = arg;
2042 
2043 	VXLAN_WLOCK(sc);
2044 	if (vxlan_check_ttl(cmd->vxlcmd_ttl) == 0) {
2045 		sc->vxl_ttl = cmd->vxlcmd_ttl;
2046 		if (sc->vxl_im4o != NULL)
2047 			sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl;
2048 		if (sc->vxl_im6o != NULL)
2049 			sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl;
2050 		error = 0;
2051 	} else
2052 		error = EINVAL;
2053 	VXLAN_WUNLOCK(sc);
2054 
2055 	return (error);
2056 }
2057 
2058 static int
2059 vxlan_ctrl_set_learn(struct vxlan_softc *sc, void *arg)
2060 {
2061 	struct ifvxlancmd *cmd;
2062 
2063 	cmd = arg;
2064 
2065 	VXLAN_WLOCK(sc);
2066 	if (cmd->vxlcmd_flags & VXLAN_CMD_FLAG_LEARN)
2067 		sc->vxl_flags |= VXLAN_FLAG_LEARN;
2068 	else
2069 		sc->vxl_flags &= ~VXLAN_FLAG_LEARN;
2070 	VXLAN_WUNLOCK(sc);
2071 
2072 	return (0);
2073 }
2074 
2075 static int
2076 vxlan_ctrl_ftable_entry_add(struct vxlan_softc *sc, void *arg)
2077 {
2078 	union vxlan_sockaddr vxlsa;
2079 	struct ifvxlancmd *cmd;
2080 	struct vxlan_ftable_entry *fe;
2081 	int error;
2082 
2083 	cmd = arg;
2084 	vxlsa = cmd->vxlcmd_sa;
2085 
2086 	if (!VXLAN_SOCKADDR_IS_IPV46(&vxlsa))
2087 		return (EINVAL);
2088 	if (vxlan_sockaddr_in_any(&vxlsa) != 0)
2089 		return (EINVAL);
2090 	if (vxlan_sockaddr_in_multicast(&vxlsa) != 0)
2091 		return (EINVAL);
2092 	/* BMV: We could support both IPv4 and IPv6 later. */
2093 	if (vxlsa.sa.sa_family != sc->vxl_dst_addr.sa.sa_family)
2094 		return (EAFNOSUPPORT);
2095 
2096 	fe = vxlan_ftable_entry_alloc();
2097 	if (fe == NULL)
2098 		return (ENOMEM);
2099 
2100 	if (vxlsa.in4.sin_port == 0)
2101 		vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port;
2102 
2103 	vxlan_ftable_entry_init(sc, fe, cmd->vxlcmd_mac, &vxlsa.sa,
2104 	    VXLAN_FE_FLAG_STATIC);
2105 
2106 	VXLAN_WLOCK(sc);
2107 	error = vxlan_ftable_entry_insert(sc, fe);
2108 	VXLAN_WUNLOCK(sc);
2109 
2110 	if (error)
2111 		vxlan_ftable_entry_free(fe);
2112 
2113 	return (error);
2114 }
2115 
2116 static int
2117 vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *sc, void *arg)
2118 {
2119 	struct ifvxlancmd *cmd;
2120 	struct vxlan_ftable_entry *fe;
2121 	int error;
2122 
2123 	cmd = arg;
2124 
2125 	VXLAN_WLOCK(sc);
2126 	fe = vxlan_ftable_entry_lookup(sc, cmd->vxlcmd_mac);
2127 	if (fe != NULL) {
2128 		vxlan_ftable_entry_destroy(sc, fe);
2129 		error = 0;
2130 	} else
2131 		error = ENOENT;
2132 	VXLAN_WUNLOCK(sc);
2133 
2134 	return (error);
2135 }
2136 
2137 static int
2138 vxlan_ctrl_flush(struct vxlan_softc *sc, void *arg)
2139 {
2140 	struct ifvxlancmd *cmd;
2141 	int all;
2142 
2143 	cmd = arg;
2144 	all = cmd->vxlcmd_flags & VXLAN_CMD_FLAG_FLUSH_ALL;
2145 
2146 	VXLAN_WLOCK(sc);
2147 	vxlan_ftable_flush(sc, all);
2148 	VXLAN_WUNLOCK(sc);
2149 
2150 	return (0);
2151 }
2152 
2153 static int
2154 vxlan_ioctl_drvspec(struct vxlan_softc *sc, struct ifdrv *ifd, int get)
2155 {
2156 	const struct vxlan_control *vc;
2157 	union {
2158 		struct ifvxlancfg	cfg;
2159 		struct ifvxlancmd	cmd;
2160 	} args;
2161 	int out, error;
2162 
2163 	if (ifd->ifd_cmd >= vxlan_control_table_size)
2164 		return (EINVAL);
2165 
2166 	bzero(&args, sizeof(args));
2167 	vc = &vxlan_control_table[ifd->ifd_cmd];
2168 	out = (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) != 0;
2169 
2170 	if ((get != 0 && out == 0) || (get == 0 && out != 0))
2171 		return (EINVAL);
2172 
2173 	if (vc->vxlc_flags & VXLAN_CTRL_FLAG_SUSER) {
2174 		error = priv_check(curthread, PRIV_NET_VXLAN);
2175 		if (error)
2176 			return (error);
2177 	}
2178 
2179 	if (ifd->ifd_len != vc->vxlc_argsize ||
2180 	    ifd->ifd_len > sizeof(args))
2181 		return (EINVAL);
2182 
2183 	if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYIN) {
2184 		error = copyin(ifd->ifd_data, &args, ifd->ifd_len);
2185 		if (error)
2186 			return (error);
2187 	}
2188 
2189 	error = vc->vxlc_func(sc, &args);
2190 	if (error)
2191 		return (error);
2192 
2193 	if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) {
2194 		error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
2195 		if (error)
2196 			return (error);
2197 	}
2198 
2199 	return (0);
2200 }
2201 
2202 static int
2203 vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2204 {
2205 	struct vxlan_softc *sc;
2206 	struct ifreq *ifr;
2207 	struct ifdrv *ifd;
2208 	int error;
2209 
2210 	sc = ifp->if_softc;
2211 	ifr = (struct ifreq *) data;
2212 	ifd = (struct ifdrv *) data;
2213 
2214 	switch (cmd) {
2215 	case SIOCADDMULTI:
2216 	case SIOCDELMULTI:
2217 		error = 0;
2218 		break;
2219 
2220 	case SIOCGDRVSPEC:
2221 	case SIOCSDRVSPEC:
2222 		error = vxlan_ioctl_drvspec(sc, ifd, cmd == SIOCGDRVSPEC);
2223 		break;
2224 
2225 	case SIOCSIFFLAGS:
2226 		error = vxlan_ioctl_ifflags(sc);
2227 		break;
2228 
2229 	case SIOCSIFMEDIA:
2230 	case SIOCGIFMEDIA:
2231 		error = ifmedia_ioctl(ifp, ifr, &sc->vxl_media, cmd);
2232 		break;
2233 
2234 	default:
2235 		error = ether_ioctl(ifp, cmd, data);
2236 		break;
2237 	}
2238 
2239 	return (error);
2240 }
2241 
2242 #if defined(INET) || defined(INET6)
2243 static uint16_t
2244 vxlan_pick_source_port(struct vxlan_softc *sc, struct mbuf *m)
2245 {
2246 	int range;
2247 	uint32_t hash;
2248 
2249 	range = sc->vxl_max_port - sc->vxl_min_port + 1;
2250 
2251 	/* check if flowid is set and not opaque */
2252 	if (M_HASHTYPE_ISHASH(m))
2253 		hash = m->m_pkthdr.flowid;
2254 	else
2255 		hash = jenkins_hash(m->m_data, ETHER_HDR_LEN,
2256 		    sc->vxl_port_hash_key);
2257 
2258 	return (sc->vxl_min_port + (hash % range));
2259 }
2260 
2261 static void
2262 vxlan_encap_header(struct vxlan_softc *sc, struct mbuf *m, int ipoff,
2263     uint16_t srcport, uint16_t dstport)
2264 {
2265 	struct vxlanudphdr *hdr;
2266 	struct udphdr *udph;
2267 	struct vxlan_header *vxh;
2268 	int len;
2269 
2270 	len = m->m_pkthdr.len - ipoff;
2271 	MPASS(len >= sizeof(struct vxlanudphdr));
2272 	hdr = mtodo(m, ipoff);
2273 
2274 	udph = &hdr->vxlh_udp;
2275 	udph->uh_sport = srcport;
2276 	udph->uh_dport = dstport;
2277 	udph->uh_ulen = htons(len);
2278 	udph->uh_sum = 0;
2279 
2280 	vxh = &hdr->vxlh_hdr;
2281 	vxh->vxlh_flags = htonl(VXLAN_HDR_FLAGS_VALID_VNI);
2282 	vxh->vxlh_vni = htonl(sc->vxl_vni << VXLAN_HDR_VNI_SHIFT);
2283 }
2284 #endif
2285 
2286 static int
2287 vxlan_encap4(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa,
2288     struct mbuf *m)
2289 {
2290 #ifdef INET
2291 	struct ifnet *ifp;
2292 	struct ip *ip;
2293 	struct in_addr srcaddr, dstaddr;
2294 	uint16_t srcport, dstport;
2295 	int len, mcast, error;
2296 
2297 	ifp = sc->vxl_ifp;
2298 	srcaddr = sc->vxl_src_addr.in4.sin_addr;
2299 	srcport = vxlan_pick_source_port(sc, m);
2300 	dstaddr = fvxlsa->in4.sin_addr;
2301 	dstport = fvxlsa->in4.sin_port;
2302 
2303 	M_PREPEND(m, sizeof(struct ip) + sizeof(struct vxlanudphdr),
2304 	    M_NOWAIT);
2305 	if (m == NULL) {
2306 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2307 		return (ENOBUFS);
2308 	}
2309 
2310 	len = m->m_pkthdr.len;
2311 
2312 	ip = mtod(m, struct ip *);
2313 	ip->ip_tos = 0;
2314 	ip->ip_len = htons(len);
2315 	ip->ip_off = 0;
2316 	ip->ip_ttl = sc->vxl_ttl;
2317 	ip->ip_p = IPPROTO_UDP;
2318 	ip->ip_sum = 0;
2319 	ip->ip_src = srcaddr;
2320 	ip->ip_dst = dstaddr;
2321 
2322 	vxlan_encap_header(sc, m, sizeof(struct ip), srcport, dstport);
2323 
2324 	mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
2325 	m->m_flags &= ~(M_MCAST | M_BCAST);
2326 
2327 	error = ip_output(m, NULL, NULL, 0, sc->vxl_im4o, NULL);
2328 	if (error == 0) {
2329 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
2330 		if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
2331 		if (mcast != 0)
2332 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
2333 	} else
2334 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2335 
2336 	return (error);
2337 #else
2338 	m_freem(m);
2339 	return (ENOTSUP);
2340 #endif
2341 }
2342 
2343 static int
2344 vxlan_encap6(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa,
2345     struct mbuf *m)
2346 {
2347 #ifdef INET6
2348 	struct ifnet *ifp;
2349 	struct ip6_hdr *ip6;
2350 	const struct in6_addr *srcaddr, *dstaddr;
2351 	uint16_t srcport, dstport;
2352 	int len, mcast, error;
2353 
2354 	ifp = sc->vxl_ifp;
2355 	srcaddr = &sc->vxl_src_addr.in6.sin6_addr;
2356 	srcport = vxlan_pick_source_port(sc, m);
2357 	dstaddr = &fvxlsa->in6.sin6_addr;
2358 	dstport = fvxlsa->in6.sin6_port;
2359 
2360 	M_PREPEND(m, sizeof(struct ip6_hdr) + sizeof(struct vxlanudphdr),
2361 	    M_NOWAIT);
2362 	if (m == NULL) {
2363 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2364 		return (ENOBUFS);
2365 	}
2366 
2367 	len = m->m_pkthdr.len;
2368 
2369 	ip6 = mtod(m, struct ip6_hdr *);
2370 	ip6->ip6_flow = 0;		/* BMV: Keep in forwarding entry? */
2371 	ip6->ip6_vfc = IPV6_VERSION;
2372 	ip6->ip6_plen = 0;
2373 	ip6->ip6_nxt = IPPROTO_UDP;
2374 	ip6->ip6_hlim = sc->vxl_ttl;
2375 	ip6->ip6_src = *srcaddr;
2376 	ip6->ip6_dst = *dstaddr;
2377 
2378 	vxlan_encap_header(sc, m, sizeof(struct ip6_hdr), srcport, dstport);
2379 
2380 	/*
2381 	 * XXX BMV We need support for RFC6935 before we can send and
2382 	 * receive IPv6 UDP packets with a zero checksum.
2383 	 */
2384 	{
2385 		struct udphdr *hdr = mtodo(m, sizeof(struct ip6_hdr));
2386 		hdr->uh_sum = in6_cksum_pseudo(ip6,
2387 		    m->m_pkthdr.len - sizeof(struct ip6_hdr), IPPROTO_UDP, 0);
2388 		m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
2389 		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
2390 	}
2391 
2392 	mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
2393 	m->m_flags &= ~(M_MCAST | M_BCAST);
2394 
2395 	error = ip6_output(m, NULL, NULL, 0, sc->vxl_im6o, NULL, NULL);
2396 	if (error == 0) {
2397 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
2398 		if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
2399 		if (mcast != 0)
2400 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
2401 	} else
2402 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2403 
2404 	return (error);
2405 #else
2406 	m_freem(m);
2407 	return (ENOTSUP);
2408 #endif
2409 }
2410 
2411 static int
2412 vxlan_transmit(struct ifnet *ifp, struct mbuf *m)
2413 {
2414 	struct rm_priotracker tracker;
2415 	union vxlan_sockaddr vxlsa;
2416 	struct vxlan_softc *sc;
2417 	struct vxlan_ftable_entry *fe;
2418 	struct ifnet *mcifp;
2419 	struct ether_header *eh;
2420 	int ipv4, error;
2421 
2422 	sc = ifp->if_softc;
2423 	eh = mtod(m, struct ether_header *);
2424 	fe = NULL;
2425 	mcifp = NULL;
2426 
2427 	ETHER_BPF_MTAP(ifp, m);
2428 
2429 	VXLAN_RLOCK(sc, &tracker);
2430 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2431 		VXLAN_RUNLOCK(sc, &tracker);
2432 		m_freem(m);
2433 		return (ENETDOWN);
2434 	}
2435 
2436 	if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
2437 		fe = vxlan_ftable_entry_lookup(sc, eh->ether_dhost);
2438 	if (fe == NULL)
2439 		fe = &sc->vxl_default_fe;
2440 	vxlan_sockaddr_copy(&vxlsa, &fe->vxlfe_raddr.sa);
2441 
2442 	ipv4 = VXLAN_SOCKADDR_IS_IPV4(&vxlsa) != 0;
2443 	if (vxlan_sockaddr_in_multicast(&vxlsa) != 0)
2444 		mcifp = vxlan_multicast_if_ref(sc, ipv4);
2445 
2446 	VXLAN_ACQUIRE(sc);
2447 	VXLAN_RUNLOCK(sc, &tracker);
2448 
2449 	if (ipv4 != 0)
2450 		error = vxlan_encap4(sc, &vxlsa, m);
2451 	else
2452 		error = vxlan_encap6(sc, &vxlsa, m);
2453 
2454 	vxlan_release(sc);
2455 	if (mcifp != NULL)
2456 		if_rele(mcifp);
2457 
2458 	return (error);
2459 }
2460 
2461 static void
2462 vxlan_qflush(struct ifnet *ifp __unused)
2463 {
2464 }
2465 
2466 static void
2467 vxlan_rcv_udp_packet(struct mbuf *m, int offset, struct inpcb *inpcb,
2468     const struct sockaddr *srcsa, void *xvso)
2469 {
2470 	struct vxlan_socket *vso;
2471 	struct vxlan_header *vxh, vxlanhdr;
2472 	uint32_t vni;
2473 	int error;
2474 
2475 	M_ASSERTPKTHDR(m);
2476 	vso = xvso;
2477 	offset += sizeof(struct udphdr);
2478 
2479 	if (m->m_pkthdr.len < offset + sizeof(struct vxlan_header))
2480 		goto out;
2481 
2482 	if (__predict_false(m->m_len < offset + sizeof(struct vxlan_header))) {
2483 		m_copydata(m, offset, sizeof(struct vxlan_header),
2484 		    (caddr_t) &vxlanhdr);
2485 		vxh = &vxlanhdr;
2486 	} else
2487 		vxh = mtodo(m, offset);
2488 
2489 	/*
2490 	 * Drop if there is a reserved bit set in either the flags or VNI
2491 	 * fields of the header. This goes against the specification, but
2492 	 * a bit set may indicate an unsupported new feature. This matches
2493 	 * the behavior of the Linux implementation.
2494 	 */
2495 	if (vxh->vxlh_flags != htonl(VXLAN_HDR_FLAGS_VALID_VNI) ||
2496 	    vxh->vxlh_vni & ~htonl(VXLAN_VNI_MASK))
2497 		goto out;
2498 
2499 	vni = ntohl(vxh->vxlh_vni) >> VXLAN_HDR_VNI_SHIFT;
2500 	/* Adjust to the start of the inner Ethernet frame. */
2501 	m_adj(m, offset + sizeof(struct vxlan_header));
2502 
2503 	error = vxlan_input(vso, vni, &m, srcsa);
2504 	MPASS(error != 0 || m == NULL);
2505 
2506 out:
2507 	if (m != NULL)
2508 		m_freem(m);
2509 }
2510 
2511 static int
2512 vxlan_input(struct vxlan_socket *vso, uint32_t vni, struct mbuf **m0,
2513     const struct sockaddr *sa)
2514 {
2515 	struct vxlan_softc *sc;
2516 	struct ifnet *ifp;
2517 	struct mbuf *m;
2518 	struct ether_header *eh;
2519 	int error;
2520 
2521 	sc = vxlan_socket_lookup_softc(vso, vni);
2522 	if (sc == NULL)
2523 		return (ENOENT);
2524 
2525 	ifp = sc->vxl_ifp;
2526 	m = *m0;
2527 	eh = mtod(m, struct ether_header *);
2528 
2529 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2530 		error = ENETDOWN;
2531 		goto out;
2532 	} else if (ifp == m->m_pkthdr.rcvif) {
2533 		/* XXX Does not catch more complex loops. */
2534 		error = EDEADLK;
2535 		goto out;
2536 	}
2537 
2538 	if (sc->vxl_flags & VXLAN_FLAG_LEARN)
2539 		vxlan_ftable_update(sc, sa, eh->ether_shost);
2540 
2541 	m_clrprotoflags(m);
2542 	m->m_pkthdr.rcvif = ifp;
2543 	M_SETFIB(m, ifp->if_fib);
2544 
2545 	error = netisr_queue_src(NETISR_ETHER, 0, m);
2546 	*m0 = NULL;
2547 
2548 out:
2549 	vxlan_release(sc);
2550 	return (error);
2551 }
2552 
2553 static void
2554 vxlan_set_default_config(struct vxlan_softc *sc)
2555 {
2556 
2557 	sc->vxl_flags |= VXLAN_FLAG_LEARN;
2558 
2559 	sc->vxl_vni = VXLAN_VNI_MAX;
2560 	sc->vxl_ttl = IPDEFTTL;
2561 
2562 	if (!vxlan_tunable_int(sc, "legacy_port", vxlan_legacy_port)) {
2563 		sc->vxl_src_addr.in4.sin_port = htons(VXLAN_PORT);
2564 		sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_PORT);
2565 	} else {
2566 		sc->vxl_src_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT);
2567 		sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT);
2568 	}
2569 
2570 	sc->vxl_min_port = V_ipport_firstauto;
2571 	sc->vxl_max_port = V_ipport_lastauto;
2572 
2573 	sc->vxl_ftable_max = VXLAN_FTABLE_MAX;
2574 	sc->vxl_ftable_timeout = VXLAN_FTABLE_TIMEOUT;
2575 }
2576 
2577 static int
2578 vxlan_set_user_config(struct vxlan_softc *sc, struct ifvxlanparam *vxlp)
2579 {
2580 
2581 #ifndef INET
2582 	if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR4 |
2583 	    VXLAN_PARAM_WITH_REMOTE_ADDR4))
2584 		return (EAFNOSUPPORT);
2585 #endif
2586 
2587 #ifndef INET6
2588 	if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR6 |
2589 	    VXLAN_PARAM_WITH_REMOTE_ADDR6))
2590 		return (EAFNOSUPPORT);
2591 #endif
2592 
2593 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_VNI) {
2594 		if (vxlan_check_vni(vxlp->vxlp_vni) == 0)
2595 			sc->vxl_vni = vxlp->vxlp_vni;
2596 	}
2597 
2598 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR4) {
2599 		sc->vxl_src_addr.in4.sin_len = sizeof(struct sockaddr_in);
2600 		sc->vxl_src_addr.in4.sin_family = AF_INET;
2601 		sc->vxl_src_addr.in4.sin_addr = vxlp->vxlp_local_in4;
2602 	} else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) {
2603 		sc->vxl_src_addr.in6.sin6_len = sizeof(struct sockaddr_in6);
2604 		sc->vxl_src_addr.in6.sin6_family = AF_INET6;
2605 		sc->vxl_src_addr.in6.sin6_addr = vxlp->vxlp_local_in6;
2606 	}
2607 
2608 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR4) {
2609 		sc->vxl_dst_addr.in4.sin_len = sizeof(struct sockaddr_in);
2610 		sc->vxl_dst_addr.in4.sin_family = AF_INET;
2611 		sc->vxl_dst_addr.in4.sin_addr = vxlp->vxlp_remote_in4;
2612 	} else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) {
2613 		sc->vxl_dst_addr.in6.sin6_len = sizeof(struct sockaddr_in6);
2614 		sc->vxl_dst_addr.in6.sin6_family = AF_INET6;
2615 		sc->vxl_dst_addr.in6.sin6_addr = vxlp->vxlp_remote_in6;
2616 	}
2617 
2618 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_PORT)
2619 		sc->vxl_src_addr.in4.sin_port = htons(vxlp->vxlp_local_port);
2620 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_PORT)
2621 		sc->vxl_dst_addr.in4.sin_port = htons(vxlp->vxlp_remote_port);
2622 
2623 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_PORT_RANGE) {
2624 		if (vxlp->vxlp_min_port <= vxlp->vxlp_max_port) {
2625 			sc->vxl_min_port = vxlp->vxlp_min_port;
2626 			sc->vxl_max_port = vxlp->vxlp_max_port;
2627 		}
2628 	}
2629 
2630 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_MULTICAST_IF)
2631 		strlcpy(sc->vxl_mc_ifname, vxlp->vxlp_mc_ifname, IFNAMSIZ);
2632 
2633 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_TIMEOUT) {
2634 		if (vxlan_check_ftable_timeout(vxlp->vxlp_ftable_timeout) == 0)
2635 			sc->vxl_ftable_timeout = vxlp->vxlp_ftable_timeout;
2636 	}
2637 
2638 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_MAX) {
2639 		if (vxlan_check_ftable_max(vxlp->vxlp_ftable_max) == 0)
2640 			sc->vxl_ftable_max = vxlp->vxlp_ftable_max;
2641 	}
2642 
2643 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_TTL) {
2644 		if (vxlan_check_ttl(vxlp->vxlp_ttl) == 0)
2645 			sc->vxl_ttl = vxlp->vxlp_ttl;
2646 	}
2647 
2648 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LEARN) {
2649 		if (vxlp->vxlp_learn == 0)
2650 			sc->vxl_flags &= ~VXLAN_FLAG_LEARN;
2651 	}
2652 
2653 	return (0);
2654 }
2655 
2656 static int
2657 vxlan_clone_create(struct if_clone *ifc, int unit, caddr_t params)
2658 {
2659 	struct vxlan_softc *sc;
2660 	struct ifnet *ifp;
2661 	struct ifvxlanparam vxlp;
2662 	int error;
2663 
2664 	sc = malloc(sizeof(struct vxlan_softc), M_VXLAN, M_WAITOK | M_ZERO);
2665 	sc->vxl_unit = unit;
2666 	vxlan_set_default_config(sc);
2667 
2668 	if (params != 0) {
2669 		error = copyin(params, &vxlp, sizeof(vxlp));
2670 		if (error)
2671 			goto fail;
2672 
2673 		error = vxlan_set_user_config(sc, &vxlp);
2674 		if (error)
2675 			goto fail;
2676 	}
2677 
2678 	ifp = if_alloc(IFT_ETHER);
2679 	if (ifp == NULL) {
2680 		error = ENOSPC;
2681 		goto fail;
2682 	}
2683 
2684 	sc->vxl_ifp = ifp;
2685 	rm_init(&sc->vxl_lock, "vxlanrm");
2686 	callout_init_rw(&sc->vxl_callout, &sc->vxl_lock, 0);
2687 	sc->vxl_port_hash_key = arc4random();
2688 	vxlan_ftable_init(sc);
2689 
2690 	vxlan_sysctl_setup(sc);
2691 
2692 	ifp->if_softc = sc;
2693 	if_initname(ifp, vxlan_name, unit);
2694 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2695 	ifp->if_init = vxlan_init;
2696 	ifp->if_ioctl = vxlan_ioctl;
2697 	ifp->if_transmit = vxlan_transmit;
2698 	ifp->if_qflush = vxlan_qflush;
2699 
2700 	ifmedia_init(&sc->vxl_media, 0, vxlan_media_change, vxlan_media_status);
2701 	ifmedia_add(&sc->vxl_media, IFM_ETHER | IFM_AUTO, 0, NULL);
2702 	ifmedia_set(&sc->vxl_media, IFM_ETHER | IFM_AUTO);
2703 
2704 	vxlan_fakeaddr(sc);
2705 	ether_ifattach(ifp, sc->vxl_hwaddr);
2706 
2707 	ifp->if_baudrate = 0;
2708 	ifp->if_hdrlen = 0;
2709 
2710 	return (0);
2711 
2712 fail:
2713 	free(sc, M_VXLAN);
2714 	return (error);
2715 }
2716 
2717 static void
2718 vxlan_clone_destroy(struct ifnet *ifp)
2719 {
2720 	struct vxlan_softc *sc;
2721 
2722 	sc = ifp->if_softc;
2723 
2724 	vxlan_teardown(sc);
2725 
2726 	vxlan_ftable_flush(sc, 1);
2727 
2728 	ether_ifdetach(ifp);
2729 	if_free(ifp);
2730 	ifmedia_removeall(&sc->vxl_media);
2731 
2732 	vxlan_ftable_fini(sc);
2733 
2734 	vxlan_sysctl_destroy(sc);
2735 	rm_destroy(&sc->vxl_lock);
2736 	free(sc, M_VXLAN);
2737 }
2738 
2739 /* BMV: Taken from if_bridge. */
2740 static uint32_t
2741 vxlan_mac_hash(struct vxlan_softc *sc, const uint8_t *addr)
2742 {
2743 	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->vxl_ftable_hash_key;
2744 
2745 	b += addr[5] << 8;
2746 	b += addr[4];
2747 	a += addr[3] << 24;
2748 	a += addr[2] << 16;
2749 	a += addr[1] << 8;
2750 	a += addr[0];
2751 
2752 /*
2753  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
2754  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
2755  */
2756 #define	mix(a, b, c)							\
2757 do {									\
2758 	a -= b; a -= c; a ^= (c >> 13);					\
2759 	b -= c; b -= a; b ^= (a << 8);					\
2760 	c -= a; c -= b; c ^= (b >> 13);					\
2761 	a -= b; a -= c; a ^= (c >> 12);					\
2762 	b -= c; b -= a; b ^= (a << 16);					\
2763 	c -= a; c -= b; c ^= (b >> 5);					\
2764 	a -= b; a -= c; a ^= (c >> 3);					\
2765 	b -= c; b -= a; b ^= (a << 10);					\
2766 	c -= a; c -= b; c ^= (b >> 15);					\
2767 } while (0)
2768 
2769 	mix(a, b, c);
2770 
2771 #undef mix
2772 
2773 	return (c);
2774 }
2775 
2776 static void
2777 vxlan_fakeaddr(struct vxlan_softc *sc)
2778 {
2779 
2780 	/*
2781 	 * Generate a non-multicast, locally administered address.
2782 	 *
2783 	 * BMV: Should we use the FreeBSD OUI range instead?
2784 	 */
2785 	arc4rand(sc->vxl_hwaddr, ETHER_ADDR_LEN, 1);
2786 	sc->vxl_hwaddr[0] &= ~1;
2787 	sc->vxl_hwaddr[0] |= 2;
2788 }
2789 
2790 static int
2791 vxlan_media_change(struct ifnet *ifp)
2792 {
2793 
2794 	/* Ignore. */
2795 	return (0);
2796 }
2797 
2798 static void
2799 vxlan_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2800 {
2801 
2802 	ifmr->ifm_status = IFM_ACTIVE | IFM_AVALID;
2803 	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2804 }
2805 
2806 static int
2807 vxlan_sockaddr_cmp(const union vxlan_sockaddr *vxladdr,
2808     const struct sockaddr *sa)
2809 {
2810 
2811 	return (bcmp(&vxladdr->sa, sa, vxladdr->sa.sa_len));
2812 }
2813 
2814 static void
2815 vxlan_sockaddr_copy(union vxlan_sockaddr *vxladdr,
2816     const struct sockaddr *sa)
2817 {
2818 
2819 	MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
2820 	bzero(vxladdr, sizeof(*vxladdr));
2821 
2822 	if (sa->sa_family == AF_INET) {
2823 		vxladdr->in4 = *satoconstsin(sa);
2824 		vxladdr->in4.sin_len = sizeof(struct sockaddr_in);
2825 	} else if (sa->sa_family == AF_INET6) {
2826 		vxladdr->in6 = *satoconstsin6(sa);
2827 		vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6);
2828 	}
2829 }
2830 
2831 static int
2832 vxlan_sockaddr_in_equal(const union vxlan_sockaddr *vxladdr,
2833     const struct sockaddr *sa)
2834 {
2835 	int equal;
2836 
2837 	if (sa->sa_family == AF_INET) {
2838 		const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
2839 		equal = in4->s_addr == vxladdr->in4.sin_addr.s_addr;
2840 	} else if (sa->sa_family == AF_INET6) {
2841 		const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
2842 		equal = IN6_ARE_ADDR_EQUAL(in6, &vxladdr->in6.sin6_addr);
2843 	} else
2844 		equal = 0;
2845 
2846 	return (equal);
2847 }
2848 
2849 static void
2850 vxlan_sockaddr_in_copy(union vxlan_sockaddr *vxladdr,
2851     const struct sockaddr *sa)
2852 {
2853 
2854 	MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
2855 
2856 	if (sa->sa_family == AF_INET) {
2857 		const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
2858 		vxladdr->in4.sin_family = AF_INET;
2859 		vxladdr->in4.sin_len = sizeof(struct sockaddr_in);
2860 		vxladdr->in4.sin_addr = *in4;
2861 	} else if (sa->sa_family == AF_INET6) {
2862 		const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
2863 		vxladdr->in6.sin6_family = AF_INET6;
2864 		vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6);
2865 		vxladdr->in6.sin6_addr = *in6;
2866 	}
2867 }
2868 
2869 static int
2870 vxlan_sockaddr_supported(const union vxlan_sockaddr *vxladdr, int unspec)
2871 {
2872 	const struct sockaddr *sa;
2873 	int supported;
2874 
2875 	sa = &vxladdr->sa;
2876 	supported = 0;
2877 
2878 	if (sa->sa_family == AF_UNSPEC && unspec != 0) {
2879 		supported = 1;
2880 	} else if (sa->sa_family == AF_INET) {
2881 #ifdef INET
2882 		supported = 1;
2883 #endif
2884 	} else if (sa->sa_family == AF_INET6) {
2885 #ifdef INET6
2886 		supported = 1;
2887 #endif
2888 	}
2889 
2890 	return (supported);
2891 }
2892 
2893 static int
2894 vxlan_sockaddr_in_any(const union vxlan_sockaddr *vxladdr)
2895 {
2896 	const struct sockaddr *sa;
2897 	int any;
2898 
2899 	sa = &vxladdr->sa;
2900 
2901 	if (sa->sa_family == AF_INET) {
2902 		const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
2903 		any = in4->s_addr == INADDR_ANY;
2904 	} else if (sa->sa_family == AF_INET6) {
2905 		const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
2906 		any = IN6_IS_ADDR_UNSPECIFIED(in6);
2907 	} else
2908 		any = -1;
2909 
2910 	return (any);
2911 }
2912 
2913 static int
2914 vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *vxladdr)
2915 {
2916 	const struct sockaddr *sa;
2917 	int mc;
2918 
2919 	sa = &vxladdr->sa;
2920 
2921 	if (sa->sa_family == AF_INET) {
2922 		const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
2923 		mc = IN_MULTICAST(ntohl(in4->s_addr));
2924 	} else if (sa->sa_family == AF_INET6) {
2925 		const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
2926 		mc = IN6_IS_ADDR_MULTICAST(in6);
2927 	} else
2928 		mc = -1;
2929 
2930 	return (mc);
2931 }
2932 
2933 static int
2934 vxlan_can_change_config(struct vxlan_softc *sc)
2935 {
2936 	struct ifnet *ifp;
2937 
2938 	ifp = sc->vxl_ifp;
2939 	VXLAN_LOCK_ASSERT(sc);
2940 
2941 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2942 		return (0);
2943 	if (sc->vxl_flags & (VXLAN_FLAG_INIT | VXLAN_FLAG_TEARDOWN))
2944 		return (0);
2945 
2946 	return (1);
2947 }
2948 
2949 static int
2950 vxlan_check_vni(uint32_t vni)
2951 {
2952 
2953 	return (vni >= VXLAN_VNI_MAX);
2954 }
2955 
2956 static int
2957 vxlan_check_ttl(int ttl)
2958 {
2959 
2960 	return (ttl > MAXTTL);
2961 }
2962 
2963 static int
2964 vxlan_check_ftable_timeout(uint32_t timeout)
2965 {
2966 
2967 	return (timeout > VXLAN_FTABLE_MAX_TIMEOUT);
2968 }
2969 
2970 static int
2971 vxlan_check_ftable_max(uint32_t max)
2972 {
2973 
2974 	return (max > VXLAN_FTABLE_MAX);
2975 }
2976 
2977 static void
2978 vxlan_sysctl_setup(struct vxlan_softc *sc)
2979 {
2980 	struct sysctl_ctx_list *ctx;
2981 	struct sysctl_oid *node;
2982 	struct vxlan_statistics *stats;
2983 	char namebuf[8];
2984 
2985 	ctx = &sc->vxl_sysctl_ctx;
2986 	stats = &sc->vxl_stats;
2987 	snprintf(namebuf, sizeof(namebuf), "%d", sc->vxl_unit);
2988 
2989 	sysctl_ctx_init(ctx);
2990 	sc->vxl_sysctl_node = SYSCTL_ADD_NODE(ctx,
2991 	    SYSCTL_STATIC_CHILDREN(_net_link_vxlan), OID_AUTO, namebuf,
2992 	    CTLFLAG_RD, NULL, "");
2993 
2994 	node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node),
2995 	    OID_AUTO, "ftable", CTLFLAG_RD, NULL, "");
2996 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "count",
2997 	    CTLFLAG_RD, &sc->vxl_ftable_cnt, 0,
2998 	    "Number of entries in fowarding table");
2999 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "max",
3000 	     CTLFLAG_RD, &sc->vxl_ftable_max, 0,
3001 	    "Maximum number of entries allowed in fowarding table");
3002 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "timeout",
3003 	    CTLFLAG_RD, &sc->vxl_ftable_timeout, 0,
3004 	    "Number of seconds between prunes of the forwarding table");
3005 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "dump",
3006 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_SKIP,
3007 	    sc, 0, vxlan_ftable_sysctl_dump, "A",
3008 	    "Dump the forwarding table entries");
3009 
3010 	node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node),
3011 	    OID_AUTO, "stats", CTLFLAG_RD, NULL, "");
3012 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
3013 	    "ftable_nospace", CTLFLAG_RD, &stats->ftable_nospace, 0,
3014 	    "Fowarding table reached maximum entries");
3015 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
3016 	    "ftable_lock_upgrade_failed", CTLFLAG_RD,
3017 	    &stats->ftable_lock_upgrade_failed, 0,
3018 	    "Forwarding table update required lock upgrade");
3019 }
3020 
3021 static void
3022 vxlan_sysctl_destroy(struct vxlan_softc *sc)
3023 {
3024 
3025 	sysctl_ctx_free(&sc->vxl_sysctl_ctx);
3026 	sc->vxl_sysctl_node = NULL;
3027 }
3028 
3029 static int
3030 vxlan_tunable_int(struct vxlan_softc *sc, const char *knob, int def)
3031 {
3032 	char path[64];
3033 
3034 	snprintf(path, sizeof(path), "net.link.vxlan.%d.%s",
3035 	    sc->vxl_unit, knob);
3036 	TUNABLE_INT_FETCH(path, &def);
3037 
3038 	return (def);
3039 }
3040 
3041 static void
3042 vxlan_ifdetach_event(void *arg __unused, struct ifnet *ifp)
3043 {
3044 	struct vxlan_softc_head list;
3045 	struct vxlan_socket *vso;
3046 	struct vxlan_softc *sc, *tsc;
3047 
3048 	LIST_INIT(&list);
3049 
3050 	if (ifp->if_flags & IFF_RENAMING)
3051 		return;
3052 	if ((ifp->if_flags & IFF_MULTICAST) == 0)
3053 		return;
3054 
3055 	mtx_lock(&vxlan_list_mtx);
3056 	LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry)
3057 		vxlan_socket_ifdetach(vso, ifp, &list);
3058 	mtx_unlock(&vxlan_list_mtx);
3059 
3060 	LIST_FOREACH_SAFE(sc, &list, vxl_ifdetach_list, tsc) {
3061 		LIST_REMOVE(sc, vxl_ifdetach_list);
3062 
3063 		VXLAN_WLOCK(sc);
3064 		if (sc->vxl_flags & VXLAN_FLAG_INIT)
3065 			vxlan_init_wait(sc);
3066 		vxlan_teardown_locked(sc);
3067 	}
3068 }
3069 
3070 static void
3071 vxlan_load(void)
3072 {
3073 
3074 	mtx_init(&vxlan_list_mtx, "vxlan list", NULL, MTX_DEF);
3075 	LIST_INIT(&vxlan_socket_list);
3076 	vxlan_ifdetach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
3077 	    vxlan_ifdetach_event, NULL, EVENTHANDLER_PRI_ANY);
3078 	vxlan_cloner = if_clone_simple(vxlan_name, vxlan_clone_create,
3079 	    vxlan_clone_destroy, 0);
3080 }
3081 
3082 static void
3083 vxlan_unload(void)
3084 {
3085 
3086 	EVENTHANDLER_DEREGISTER(ifnet_departure_event,
3087 	    vxlan_ifdetach_event_tag);
3088 	if_clone_detach(vxlan_cloner);
3089 	mtx_destroy(&vxlan_list_mtx);
3090 	MPASS(LIST_EMPTY(&vxlan_socket_list));
3091 }
3092 
3093 static int
3094 vxlan_modevent(module_t mod, int type, void *unused)
3095 {
3096 	int error;
3097 
3098 	error = 0;
3099 
3100 	switch (type) {
3101 	case MOD_LOAD:
3102 		vxlan_load();
3103 		break;
3104 	case MOD_UNLOAD:
3105 		vxlan_unload();
3106 		break;
3107 	default:
3108 		error = ENOTSUP;
3109 		break;
3110 	}
3111 
3112 	return (error);
3113 }
3114 
3115 static moduledata_t vxlan_mod = {
3116 	"if_vxlan",
3117 	vxlan_modevent,
3118 	0
3119 };
3120 
3121 DECLARE_MODULE(if_vxlan, vxlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
3122 MODULE_VERSION(if_vxlan, 1);
3123