xref: /freebsd/sys/net/if_vxlan.c (revision e25152834cdf3b353892835a4f3b157e066a8ed4)
1 /*-
2  * Copyright (c) 2014, Bryan Venteicher <bryanv@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "opt_inet.h"
28 #include "opt_inet6.h"
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/eventhandler.h>
35 #include <sys/kernel.h>
36 #include <sys/lock.h>
37 #include <sys/hash.h>
38 #include <sys/malloc.h>
39 #include <sys/mbuf.h>
40 #include <sys/module.h>
41 #include <sys/refcount.h>
42 #include <sys/rmlock.h>
43 #include <sys/priv.h>
44 #include <sys/proc.h>
45 #include <sys/queue.h>
46 #include <sys/sbuf.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/sockio.h>
50 #include <sys/sysctl.h>
51 #include <sys/systm.h>
52 
53 #include <net/bpf.h>
54 #include <net/ethernet.h>
55 #include <net/if.h>
56 #include <net/if_var.h>
57 #include <net/if_clone.h>
58 #include <net/if_dl.h>
59 #include <net/if_media.h>
60 #include <net/if_types.h>
61 #include <net/if_vxlan.h>
62 #include <net/netisr.h>
63 
64 #include <netinet/in.h>
65 #include <netinet/in_systm.h>
66 #include <netinet/in_var.h>
67 #include <netinet/in_pcb.h>
68 #include <netinet/ip.h>
69 #include <netinet/ip6.h>
70 #include <netinet/ip_var.h>
71 #include <netinet/udp.h>
72 #include <netinet/udp_var.h>
73 
74 #include <netinet6/ip6_var.h>
75 #include <netinet6/scope6_var.h>
76 
77 struct vxlan_softc;
78 LIST_HEAD(vxlan_softc_head, vxlan_softc);
79 
80 struct vxlan_socket_mc_info {
81 	union vxlan_sockaddr		 vxlsomc_saddr;
82 	union vxlan_sockaddr		 vxlsomc_gaddr;
83 	int				 vxlsomc_ifidx;
84 	int				 vxlsomc_users;
85 };
86 
87 /*
88  * The maximum MTU of encapsulated ethernet frame within IPv4/UDP packet.
89  */
90 #define VXLAN_MAX_MTU	(IP_MAXPACKET - \
91 		60 /* Maximum IPv4 header len */ - \
92 		sizeof(struct udphdr) - \
93 		sizeof(struct vxlan_header) - \
94 		ETHER_HDR_LEN - ETHER_CRC_LEN - ETHER_VLAN_ENCAP_LEN)
95 
96 #define VXLAN_SO_MC_MAX_GROUPS		32
97 
98 #define VXLAN_SO_VNI_HASH_SHIFT		6
99 #define VXLAN_SO_VNI_HASH_SIZE		(1 << VXLAN_SO_VNI_HASH_SHIFT)
100 #define VXLAN_SO_VNI_HASH(_vni)		((_vni) % VXLAN_SO_VNI_HASH_SIZE)
101 
102 struct vxlan_socket {
103 	struct socket			*vxlso_sock;
104 	struct rmlock			 vxlso_lock;
105 	u_int				 vxlso_refcnt;
106 	union vxlan_sockaddr		 vxlso_laddr;
107 	LIST_ENTRY(vxlan_socket)	 vxlso_entry;
108 	struct vxlan_softc_head		 vxlso_vni_hash[VXLAN_SO_VNI_HASH_SIZE];
109 	struct vxlan_socket_mc_info	 vxlso_mc[VXLAN_SO_MC_MAX_GROUPS];
110 };
111 
112 #define VXLAN_SO_RLOCK(_vso, _p)	rm_rlock(&(_vso)->vxlso_lock, (_p))
113 #define VXLAN_SO_RUNLOCK(_vso, _p)	rm_runlock(&(_vso)->vxlso_lock, (_p))
114 #define VXLAN_SO_WLOCK(_vso)		rm_wlock(&(_vso)->vxlso_lock)
115 #define VXLAN_SO_WUNLOCK(_vso)		rm_wunlock(&(_vso)->vxlso_lock)
116 #define VXLAN_SO_LOCK_ASSERT(_vso) \
117     rm_assert(&(_vso)->vxlso_lock, RA_LOCKED)
118 #define VXLAN_SO_LOCK_WASSERT(_vso) \
119     rm_assert(&(_vso)->vxlso_lock, RA_WLOCKED)
120 
121 #define VXLAN_SO_ACQUIRE(_vso)		refcount_acquire(&(_vso)->vxlso_refcnt)
122 #define VXLAN_SO_RELEASE(_vso)		refcount_release(&(_vso)->vxlso_refcnt)
123 
124 struct vxlan_ftable_entry {
125 	LIST_ENTRY(vxlan_ftable_entry)	 vxlfe_hash;
126 	uint16_t			 vxlfe_flags;
127 	uint8_t				 vxlfe_mac[ETHER_ADDR_LEN];
128 	union vxlan_sockaddr		 vxlfe_raddr;
129 	time_t				 vxlfe_expire;
130 };
131 
132 #define VXLAN_FE_FLAG_DYNAMIC		0x01
133 #define VXLAN_FE_FLAG_STATIC		0x02
134 
135 #define VXLAN_FE_IS_DYNAMIC(_fe) \
136     ((_fe)->vxlfe_flags & VXLAN_FE_FLAG_DYNAMIC)
137 
138 #define VXLAN_SC_FTABLE_SHIFT		9
139 #define VXLAN_SC_FTABLE_SIZE		(1 << VXLAN_SC_FTABLE_SHIFT)
140 #define VXLAN_SC_FTABLE_MASK		(VXLAN_SC_FTABLE_SIZE - 1)
141 #define VXLAN_SC_FTABLE_HASH(_sc, _mac)	\
142     (vxlan_mac_hash(_sc, _mac) % VXLAN_SC_FTABLE_SIZE)
143 
144 LIST_HEAD(vxlan_ftable_head, vxlan_ftable_entry);
145 
146 struct vxlan_statistics {
147 	uint32_t	ftable_nospace;
148 	uint32_t	ftable_lock_upgrade_failed;
149 };
150 
151 struct vxlan_softc {
152 	struct ifnet			*vxl_ifp;
153 	struct vxlan_socket		*vxl_sock;
154 	uint32_t			 vxl_vni;
155 	union vxlan_sockaddr		 vxl_src_addr;
156 	union vxlan_sockaddr		 vxl_dst_addr;
157 	uint32_t			 vxl_flags;
158 #define VXLAN_FLAG_INIT		0x0001
159 #define VXLAN_FLAG_TEARDOWN	0x0002
160 #define VXLAN_FLAG_LEARN	0x0004
161 
162 	uint32_t			 vxl_port_hash_key;
163 	uint16_t			 vxl_min_port;
164 	uint16_t			 vxl_max_port;
165 	uint8_t				 vxl_ttl;
166 
167 	/* Lookup table from MAC address to forwarding entry. */
168 	uint32_t			 vxl_ftable_cnt;
169 	uint32_t			 vxl_ftable_max;
170 	uint32_t			 vxl_ftable_timeout;
171 	uint32_t			 vxl_ftable_hash_key;
172 	struct vxlan_ftable_head	*vxl_ftable;
173 
174 	/* Derived from vxl_dst_addr. */
175 	struct vxlan_ftable_entry	 vxl_default_fe;
176 
177 	struct ip_moptions		*vxl_im4o;
178 	struct ip6_moptions		*vxl_im6o;
179 
180 	struct rmlock			 vxl_lock;
181 	volatile u_int			 vxl_refcnt;
182 
183 	int				 vxl_unit;
184 	int				 vxl_vso_mc_index;
185 	struct vxlan_statistics		 vxl_stats;
186 	struct sysctl_oid		*vxl_sysctl_node;
187 	struct sysctl_ctx_list		 vxl_sysctl_ctx;
188 	struct callout			 vxl_callout;
189 	struct ether_addr		 vxl_hwaddr;
190 	int				 vxl_mc_ifindex;
191 	struct ifnet			*vxl_mc_ifp;
192 	struct ifmedia 			 vxl_media;
193 	char				 vxl_mc_ifname[IFNAMSIZ];
194 	LIST_ENTRY(vxlan_softc)		 vxl_entry;
195 	LIST_ENTRY(vxlan_softc)		 vxl_ifdetach_list;
196 };
197 
198 #define VXLAN_RLOCK(_sc, _p)	rm_rlock(&(_sc)->vxl_lock, (_p))
199 #define VXLAN_RUNLOCK(_sc, _p)	rm_runlock(&(_sc)->vxl_lock, (_p))
200 #define VXLAN_WLOCK(_sc)	rm_wlock(&(_sc)->vxl_lock)
201 #define VXLAN_WUNLOCK(_sc)	rm_wunlock(&(_sc)->vxl_lock)
202 #define VXLAN_LOCK_WOWNED(_sc)	rm_wowned(&(_sc)->vxl_lock)
203 #define VXLAN_LOCK_ASSERT(_sc)	rm_assert(&(_sc)->vxl_lock, RA_LOCKED)
204 #define VXLAN_LOCK_WASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_WLOCKED)
205 #define VXLAN_UNLOCK(_sc, _p) do {		\
206     if (VXLAN_LOCK_WOWNED(_sc))			\
207 	VXLAN_WUNLOCK(_sc);			\
208     else					\
209 	VXLAN_RUNLOCK(_sc, _p);			\
210 } while (0)
211 
212 #define VXLAN_ACQUIRE(_sc)	refcount_acquire(&(_sc)->vxl_refcnt)
213 #define VXLAN_RELEASE(_sc)	refcount_release(&(_sc)->vxl_refcnt)
214 
215 #define	satoconstsin(sa)	((const struct sockaddr_in *)(sa))
216 #define	satoconstsin6(sa)	((const struct sockaddr_in6 *)(sa))
217 
218 struct vxlanudphdr {
219 	struct udphdr		vxlh_udp;
220 	struct vxlan_header	vxlh_hdr;
221 } __packed;
222 
223 static int	vxlan_ftable_addr_cmp(const uint8_t *, const uint8_t *);
224 static void	vxlan_ftable_init(struct vxlan_softc *);
225 static void	vxlan_ftable_fini(struct vxlan_softc *);
226 static void	vxlan_ftable_flush(struct vxlan_softc *, int);
227 static void	vxlan_ftable_expire(struct vxlan_softc *);
228 static int	vxlan_ftable_update_locked(struct vxlan_softc *,
229 		    const union vxlan_sockaddr *, const uint8_t *,
230 		    struct rm_priotracker *);
231 static int	vxlan_ftable_learn(struct vxlan_softc *,
232 		    const struct sockaddr *, const uint8_t *);
233 static int	vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS);
234 
235 static struct vxlan_ftable_entry *
236 		vxlan_ftable_entry_alloc(void);
237 static void	vxlan_ftable_entry_free(struct vxlan_ftable_entry *);
238 static void	vxlan_ftable_entry_init(struct vxlan_softc *,
239 		    struct vxlan_ftable_entry *, const uint8_t *,
240 		    const struct sockaddr *, uint32_t);
241 static void	vxlan_ftable_entry_destroy(struct vxlan_softc *,
242 		    struct vxlan_ftable_entry *);
243 static int	vxlan_ftable_entry_insert(struct vxlan_softc *,
244 		    struct vxlan_ftable_entry *);
245 static struct vxlan_ftable_entry *
246 		vxlan_ftable_entry_lookup(struct vxlan_softc *,
247 		    const uint8_t *);
248 static void	vxlan_ftable_entry_dump(struct vxlan_ftable_entry *,
249 		    struct sbuf *);
250 
251 static struct vxlan_socket *
252 		vxlan_socket_alloc(const union vxlan_sockaddr *);
253 static void	vxlan_socket_destroy(struct vxlan_socket *);
254 static void	vxlan_socket_release(struct vxlan_socket *);
255 static struct vxlan_socket *
256 		vxlan_socket_lookup(union vxlan_sockaddr *vxlsa);
257 static void	vxlan_socket_insert(struct vxlan_socket *);
258 static int	vxlan_socket_init(struct vxlan_socket *, struct ifnet *);
259 static int	vxlan_socket_bind(struct vxlan_socket *, struct ifnet *);
260 static int	vxlan_socket_create(struct ifnet *, int,
261 		    const union vxlan_sockaddr *, struct vxlan_socket **);
262 static void	vxlan_socket_ifdetach(struct vxlan_socket *,
263 		    struct ifnet *, struct vxlan_softc_head *);
264 
265 static struct vxlan_socket *
266 		vxlan_socket_mc_lookup(const union vxlan_sockaddr *);
267 static int	vxlan_sockaddr_mc_info_match(
268 		    const struct vxlan_socket_mc_info *,
269 		    const union vxlan_sockaddr *,
270 		    const union vxlan_sockaddr *, int);
271 static int	vxlan_socket_mc_join_group(struct vxlan_socket *,
272 		    const union vxlan_sockaddr *, const union vxlan_sockaddr *,
273 		    int *, union vxlan_sockaddr *);
274 static int	vxlan_socket_mc_leave_group(struct vxlan_socket *,
275 		    const union vxlan_sockaddr *,
276 		    const union vxlan_sockaddr *, int);
277 static int	vxlan_socket_mc_add_group(struct vxlan_socket *,
278 		    const union vxlan_sockaddr *, const union vxlan_sockaddr *,
279 		    int, int *);
280 static void	vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *,
281 		    int);
282 
283 static struct vxlan_softc *
284 		vxlan_socket_lookup_softc_locked(struct vxlan_socket *,
285 		    uint32_t);
286 static struct vxlan_softc *
287 		vxlan_socket_lookup_softc(struct vxlan_socket *, uint32_t);
288 static int	vxlan_socket_insert_softc(struct vxlan_socket *,
289 		    struct vxlan_softc *);
290 static void	vxlan_socket_remove_softc(struct vxlan_socket *,
291 		    struct vxlan_softc *);
292 
293 static struct ifnet *
294 		vxlan_multicast_if_ref(struct vxlan_softc *, int);
295 static void	vxlan_free_multicast(struct vxlan_softc *);
296 static int	vxlan_setup_multicast_interface(struct vxlan_softc *);
297 
298 static int	vxlan_setup_multicast(struct vxlan_softc *);
299 static int	vxlan_setup_socket(struct vxlan_softc *);
300 static void	vxlan_setup_interface(struct vxlan_softc *);
301 static int	vxlan_valid_init_config(struct vxlan_softc *);
302 static void	vxlan_init_wait(struct vxlan_softc *);
303 static void	vxlan_init_complete(struct vxlan_softc *);
304 static void	vxlan_init(void *);
305 static void	vxlan_release(struct vxlan_softc *);
306 static void	vxlan_teardown_wait(struct vxlan_softc *);
307 static void	vxlan_teardown_complete(struct vxlan_softc *);
308 static void	vxlan_teardown_locked(struct vxlan_softc *);
309 static void	vxlan_teardown(struct vxlan_softc *);
310 static void	vxlan_ifdetach(struct vxlan_softc *, struct ifnet *,
311 		    struct vxlan_softc_head *);
312 static void	vxlan_timer(void *);
313 
314 static int	vxlan_ctrl_get_config(struct vxlan_softc *, void *);
315 static int	vxlan_ctrl_set_vni(struct vxlan_softc *, void *);
316 static int	vxlan_ctrl_set_local_addr(struct vxlan_softc *, void *);
317 static int	vxlan_ctrl_set_remote_addr(struct vxlan_softc *, void *);
318 static int	vxlan_ctrl_set_local_port(struct vxlan_softc *, void *);
319 static int	vxlan_ctrl_set_remote_port(struct vxlan_softc *, void *);
320 static int	vxlan_ctrl_set_port_range(struct vxlan_softc *, void *);
321 static int	vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *, void *);
322 static int	vxlan_ctrl_set_ftable_max(struct vxlan_softc *, void *);
323 static int	vxlan_ctrl_set_multicast_if(struct vxlan_softc * , void *);
324 static int	vxlan_ctrl_set_ttl(struct vxlan_softc *, void *);
325 static int	vxlan_ctrl_set_learn(struct vxlan_softc *, void *);
326 static int	vxlan_ctrl_ftable_entry_add(struct vxlan_softc *, void *);
327 static int	vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *, void *);
328 static int	vxlan_ctrl_flush(struct vxlan_softc *, void *);
329 static int	vxlan_ioctl_drvspec(struct vxlan_softc *,
330 		    struct ifdrv *, int);
331 static int	vxlan_ioctl_ifflags(struct vxlan_softc *);
332 static int	vxlan_ioctl(struct ifnet *, u_long, caddr_t);
333 
334 #if defined(INET) || defined(INET6)
335 static uint16_t vxlan_pick_source_port(struct vxlan_softc *, struct mbuf *);
336 static void	vxlan_encap_header(struct vxlan_softc *, struct mbuf *,
337 		    int, uint16_t, uint16_t);
338 #endif
339 static int	vxlan_encap4(struct vxlan_softc *,
340 		    const union vxlan_sockaddr *, struct mbuf *);
341 static int	vxlan_encap6(struct vxlan_softc *,
342 		    const union vxlan_sockaddr *, struct mbuf *);
343 static int	vxlan_transmit(struct ifnet *, struct mbuf *);
344 static void	vxlan_qflush(struct ifnet *);
345 static void	vxlan_rcv_udp_packet(struct mbuf *, int, struct inpcb *,
346 		    const struct sockaddr *, void *);
347 static int	vxlan_input(struct vxlan_socket *, uint32_t, struct mbuf **,
348 		    const struct sockaddr *);
349 
350 static void	vxlan_set_default_config(struct vxlan_softc *);
351 static int	vxlan_set_user_config(struct vxlan_softc *,
352 		     struct ifvxlanparam *);
353 static int	vxlan_clone_create(struct if_clone *, int, caddr_t);
354 static void	vxlan_clone_destroy(struct ifnet *);
355 
356 static uint32_t vxlan_mac_hash(struct vxlan_softc *, const uint8_t *);
357 static int	vxlan_media_change(struct ifnet *);
358 static void	vxlan_media_status(struct ifnet *, struct ifmediareq *);
359 
360 static int	vxlan_sockaddr_cmp(const union vxlan_sockaddr *,
361 		    const struct sockaddr *);
362 static void	vxlan_sockaddr_copy(union vxlan_sockaddr *,
363 		    const struct sockaddr *);
364 static int	vxlan_sockaddr_in_equal(const union vxlan_sockaddr *,
365 		    const struct sockaddr *);
366 static void	vxlan_sockaddr_in_copy(union vxlan_sockaddr *,
367 		    const struct sockaddr *);
368 static int	vxlan_sockaddr_supported(const union vxlan_sockaddr *, int);
369 static int	vxlan_sockaddr_in_any(const union vxlan_sockaddr *);
370 static int	vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *);
371 static int	vxlan_sockaddr_in6_embedscope(union vxlan_sockaddr *);
372 
373 static int	vxlan_can_change_config(struct vxlan_softc *);
374 static int	vxlan_check_vni(uint32_t);
375 static int	vxlan_check_ttl(int);
376 static int	vxlan_check_ftable_timeout(uint32_t);
377 static int	vxlan_check_ftable_max(uint32_t);
378 
379 static void	vxlan_sysctl_setup(struct vxlan_softc *);
380 static void	vxlan_sysctl_destroy(struct vxlan_softc *);
381 static int	vxlan_tunable_int(struct vxlan_softc *, const char *, int);
382 
383 static void	vxlan_ifdetach_event(void *, struct ifnet *);
384 static void	vxlan_load(void);
385 static void	vxlan_unload(void);
386 static int	vxlan_modevent(module_t, int, void *);
387 
388 static const char vxlan_name[] = "vxlan";
389 static MALLOC_DEFINE(M_VXLAN, vxlan_name,
390     "Virtual eXtensible LAN Interface");
391 static struct if_clone *vxlan_cloner;
392 
393 static struct mtx vxlan_list_mtx;
394 #define VXLAN_LIST_LOCK()	mtx_lock(&vxlan_list_mtx)
395 #define VXLAN_LIST_UNLOCK()	mtx_unlock(&vxlan_list_mtx)
396 
397 static LIST_HEAD(, vxlan_socket) vxlan_socket_list;
398 
399 static eventhandler_tag vxlan_ifdetach_event_tag;
400 
401 SYSCTL_DECL(_net_link);
402 SYSCTL_NODE(_net_link, OID_AUTO, vxlan, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
403     "Virtual eXtensible Local Area Network");
404 
405 static int vxlan_legacy_port = 0;
406 TUNABLE_INT("net.link.vxlan.legacy_port", &vxlan_legacy_port);
407 static int vxlan_reuse_port = 0;
408 TUNABLE_INT("net.link.vxlan.reuse_port", &vxlan_reuse_port);
409 
410 /* Default maximum number of addresses in the forwarding table. */
411 #ifndef VXLAN_FTABLE_MAX
412 #define VXLAN_FTABLE_MAX	2000
413 #endif
414 
415 /* Timeout (in seconds) of addresses learned in the forwarding table. */
416 #ifndef VXLAN_FTABLE_TIMEOUT
417 #define VXLAN_FTABLE_TIMEOUT	(20 * 60)
418 #endif
419 
420 /*
421  * Maximum timeout (in seconds) of addresses learned in the forwarding
422  * table.
423  */
424 #ifndef VXLAN_FTABLE_MAX_TIMEOUT
425 #define VXLAN_FTABLE_MAX_TIMEOUT	(60 * 60 * 24)
426 #endif
427 
428 /* Number of seconds between pruning attempts of the forwarding table. */
429 #ifndef VXLAN_FTABLE_PRUNE
430 #define VXLAN_FTABLE_PRUNE	(5 * 60)
431 #endif
432 
433 static int vxlan_ftable_prune_period = VXLAN_FTABLE_PRUNE;
434 
435 struct vxlan_control {
436 	int	(*vxlc_func)(struct vxlan_softc *, void *);
437 	int	vxlc_argsize;
438 	int	vxlc_flags;
439 #define VXLAN_CTRL_FLAG_COPYIN	0x01
440 #define VXLAN_CTRL_FLAG_COPYOUT	0x02
441 #define VXLAN_CTRL_FLAG_SUSER	0x04
442 };
443 
444 static const struct vxlan_control vxlan_control_table[] = {
445 	[VXLAN_CMD_GET_CONFIG] =
446 	    {	vxlan_ctrl_get_config, sizeof(struct ifvxlancfg),
447 		VXLAN_CTRL_FLAG_COPYOUT
448 	    },
449 
450 	[VXLAN_CMD_SET_VNI] =
451 	    {   vxlan_ctrl_set_vni, sizeof(struct ifvxlancmd),
452 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
453 	    },
454 
455 	[VXLAN_CMD_SET_LOCAL_ADDR] =
456 	    {   vxlan_ctrl_set_local_addr, sizeof(struct ifvxlancmd),
457 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
458 	    },
459 
460 	[VXLAN_CMD_SET_REMOTE_ADDR] =
461 	    {   vxlan_ctrl_set_remote_addr, sizeof(struct ifvxlancmd),
462 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
463 	    },
464 
465 	[VXLAN_CMD_SET_LOCAL_PORT] =
466 	    {   vxlan_ctrl_set_local_port, sizeof(struct ifvxlancmd),
467 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
468 	    },
469 
470 	[VXLAN_CMD_SET_REMOTE_PORT] =
471 	    {   vxlan_ctrl_set_remote_port, sizeof(struct ifvxlancmd),
472 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
473 	    },
474 
475 	[VXLAN_CMD_SET_PORT_RANGE] =
476 	    {   vxlan_ctrl_set_port_range, sizeof(struct ifvxlancmd),
477 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
478 	    },
479 
480 	[VXLAN_CMD_SET_FTABLE_TIMEOUT] =
481 	    {	vxlan_ctrl_set_ftable_timeout, sizeof(struct ifvxlancmd),
482 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
483 	    },
484 
485 	[VXLAN_CMD_SET_FTABLE_MAX] =
486 	    {	vxlan_ctrl_set_ftable_max, sizeof(struct ifvxlancmd),
487 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
488 	    },
489 
490 	[VXLAN_CMD_SET_MULTICAST_IF] =
491 	    {	vxlan_ctrl_set_multicast_if, sizeof(struct ifvxlancmd),
492 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
493 	    },
494 
495 	[VXLAN_CMD_SET_TTL] =
496 	    {	vxlan_ctrl_set_ttl, sizeof(struct ifvxlancmd),
497 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
498 	    },
499 
500 	[VXLAN_CMD_SET_LEARN] =
501 	    {	vxlan_ctrl_set_learn, sizeof(struct ifvxlancmd),
502 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
503 	    },
504 
505 	[VXLAN_CMD_FTABLE_ENTRY_ADD] =
506 	    {	vxlan_ctrl_ftable_entry_add, sizeof(struct ifvxlancmd),
507 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
508 	    },
509 
510 	[VXLAN_CMD_FTABLE_ENTRY_REM] =
511 	    {	vxlan_ctrl_ftable_entry_rem, sizeof(struct ifvxlancmd),
512 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
513 	    },
514 
515 	[VXLAN_CMD_FLUSH] =
516 	    {   vxlan_ctrl_flush, sizeof(struct ifvxlancmd),
517 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
518 	    },
519 };
520 
521 static const int vxlan_control_table_size = nitems(vxlan_control_table);
522 
523 static int
524 vxlan_ftable_addr_cmp(const uint8_t *a, const uint8_t *b)
525 {
526 	int i, d;
527 
528 	for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++)
529 		d = ((int)a[i]) - ((int)b[i]);
530 
531 	return (d);
532 }
533 
534 static void
535 vxlan_ftable_init(struct vxlan_softc *sc)
536 {
537 	int i;
538 
539 	sc->vxl_ftable = malloc(sizeof(struct vxlan_ftable_head) *
540 	    VXLAN_SC_FTABLE_SIZE, M_VXLAN, M_ZERO | M_WAITOK);
541 
542 	for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++)
543 		LIST_INIT(&sc->vxl_ftable[i]);
544 	sc->vxl_ftable_hash_key = arc4random();
545 }
546 
547 static void
548 vxlan_ftable_fini(struct vxlan_softc *sc)
549 {
550 	int i;
551 
552 	for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
553 		KASSERT(LIST_EMPTY(&sc->vxl_ftable[i]),
554 		    ("%s: vxlan %p ftable[%d] not empty", __func__, sc, i));
555 	}
556 	MPASS(sc->vxl_ftable_cnt == 0);
557 
558 	free(sc->vxl_ftable, M_VXLAN);
559 	sc->vxl_ftable = NULL;
560 }
561 
562 static void
563 vxlan_ftable_flush(struct vxlan_softc *sc, int all)
564 {
565 	struct vxlan_ftable_entry *fe, *tfe;
566 	int i;
567 
568 	for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
569 		LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) {
570 			if (all || VXLAN_FE_IS_DYNAMIC(fe))
571 				vxlan_ftable_entry_destroy(sc, fe);
572 		}
573 	}
574 }
575 
576 static void
577 vxlan_ftable_expire(struct vxlan_softc *sc)
578 {
579 	struct vxlan_ftable_entry *fe, *tfe;
580 	int i;
581 
582 	VXLAN_LOCK_WASSERT(sc);
583 
584 	for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
585 		LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) {
586 			if (VXLAN_FE_IS_DYNAMIC(fe) &&
587 			    time_uptime >= fe->vxlfe_expire)
588 				vxlan_ftable_entry_destroy(sc, fe);
589 		}
590 	}
591 }
592 
593 static int
594 vxlan_ftable_update_locked(struct vxlan_softc *sc,
595     const union vxlan_sockaddr *vxlsa, const uint8_t *mac,
596     struct rm_priotracker *tracker)
597 {
598 	struct vxlan_ftable_entry *fe;
599 	int error __unused;
600 
601 	VXLAN_LOCK_ASSERT(sc);
602 
603 again:
604 	/*
605 	 * A forwarding entry for this MAC address might already exist. If
606 	 * so, update it, otherwise create a new one. We may have to upgrade
607 	 * the lock if we have to change or create an entry.
608 	 */
609 	fe = vxlan_ftable_entry_lookup(sc, mac);
610 	if (fe != NULL) {
611 		fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout;
612 
613 		if (!VXLAN_FE_IS_DYNAMIC(fe) ||
614 		    vxlan_sockaddr_in_equal(&fe->vxlfe_raddr, &vxlsa->sa))
615 			return (0);
616 		if (!VXLAN_LOCK_WOWNED(sc)) {
617 			VXLAN_RUNLOCK(sc, tracker);
618 			VXLAN_WLOCK(sc);
619 			sc->vxl_stats.ftable_lock_upgrade_failed++;
620 			goto again;
621 		}
622 		vxlan_sockaddr_in_copy(&fe->vxlfe_raddr, &vxlsa->sa);
623 		return (0);
624 	}
625 
626 	if (!VXLAN_LOCK_WOWNED(sc)) {
627 		VXLAN_RUNLOCK(sc, tracker);
628 		VXLAN_WLOCK(sc);
629 		sc->vxl_stats.ftable_lock_upgrade_failed++;
630 		goto again;
631 	}
632 
633 	if (sc->vxl_ftable_cnt >= sc->vxl_ftable_max) {
634 		sc->vxl_stats.ftable_nospace++;
635 		return (ENOSPC);
636 	}
637 
638 	fe = vxlan_ftable_entry_alloc();
639 	if (fe == NULL)
640 		return (ENOMEM);
641 
642 	vxlan_ftable_entry_init(sc, fe, mac, &vxlsa->sa, VXLAN_FE_FLAG_DYNAMIC);
643 
644 	/* The prior lookup failed, so the insert should not. */
645 	error = vxlan_ftable_entry_insert(sc, fe);
646 	MPASS(error == 0);
647 
648 	return (0);
649 }
650 
651 static int
652 vxlan_ftable_learn(struct vxlan_softc *sc, const struct sockaddr *sa,
653     const uint8_t *mac)
654 {
655 	struct rm_priotracker tracker;
656 	union vxlan_sockaddr vxlsa;
657 	int error;
658 
659 	/*
660 	 * The source port may be randomly selected by the remote host, so
661 	 * use the port of the default destination address.
662 	 */
663 	vxlan_sockaddr_copy(&vxlsa, sa);
664 	vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port;
665 
666 	if (VXLAN_SOCKADDR_IS_IPV6(&vxlsa)) {
667 		error = vxlan_sockaddr_in6_embedscope(&vxlsa);
668 		if (error)
669 			return (error);
670 	}
671 
672 	VXLAN_RLOCK(sc, &tracker);
673 	error = vxlan_ftable_update_locked(sc, &vxlsa, mac, &tracker);
674 	VXLAN_UNLOCK(sc, &tracker);
675 
676 	return (error);
677 }
678 
679 static int
680 vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS)
681 {
682 	struct rm_priotracker tracker;
683 	struct sbuf sb;
684 	struct vxlan_softc *sc;
685 	struct vxlan_ftable_entry *fe;
686 	size_t size;
687 	int i, error;
688 
689 	/*
690 	 * This is mostly intended for debugging during development. It is
691 	 * not practical to dump an entire large table this way.
692 	 */
693 
694 	sc = arg1;
695 	size = PAGE_SIZE;	/* Calculate later. */
696 
697 	sbuf_new(&sb, NULL, size, SBUF_FIXEDLEN);
698 	sbuf_putc(&sb, '\n');
699 
700 	VXLAN_RLOCK(sc, &tracker);
701 	for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
702 		LIST_FOREACH(fe, &sc->vxl_ftable[i], vxlfe_hash) {
703 			if (sbuf_error(&sb) != 0)
704 				break;
705 			vxlan_ftable_entry_dump(fe, &sb);
706 		}
707 	}
708 	VXLAN_RUNLOCK(sc, &tracker);
709 
710 	if (sbuf_len(&sb) == 1)
711 		sbuf_setpos(&sb, 0);
712 
713 	sbuf_finish(&sb);
714 	error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
715 	sbuf_delete(&sb);
716 
717 	return (error);
718 }
719 
720 static struct vxlan_ftable_entry *
721 vxlan_ftable_entry_alloc(void)
722 {
723 	struct vxlan_ftable_entry *fe;
724 
725 	fe = malloc(sizeof(*fe), M_VXLAN, M_ZERO | M_NOWAIT);
726 
727 	return (fe);
728 }
729 
730 static void
731 vxlan_ftable_entry_free(struct vxlan_ftable_entry *fe)
732 {
733 
734 	free(fe, M_VXLAN);
735 }
736 
737 static void
738 vxlan_ftable_entry_init(struct vxlan_softc *sc, struct vxlan_ftable_entry *fe,
739     const uint8_t *mac, const struct sockaddr *sa, uint32_t flags)
740 {
741 
742 	fe->vxlfe_flags = flags;
743 	fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout;
744 	memcpy(fe->vxlfe_mac, mac, ETHER_ADDR_LEN);
745 	vxlan_sockaddr_copy(&fe->vxlfe_raddr, sa);
746 }
747 
748 static void
749 vxlan_ftable_entry_destroy(struct vxlan_softc *sc,
750     struct vxlan_ftable_entry *fe)
751 {
752 
753 	sc->vxl_ftable_cnt--;
754 	LIST_REMOVE(fe, vxlfe_hash);
755 	vxlan_ftable_entry_free(fe);
756 }
757 
758 static int
759 vxlan_ftable_entry_insert(struct vxlan_softc *sc,
760     struct vxlan_ftable_entry *fe)
761 {
762 	struct vxlan_ftable_entry *lfe;
763 	uint32_t hash;
764 	int dir;
765 
766 	VXLAN_LOCK_WASSERT(sc);
767 	hash = VXLAN_SC_FTABLE_HASH(sc, fe->vxlfe_mac);
768 
769 	lfe = LIST_FIRST(&sc->vxl_ftable[hash]);
770 	if (lfe == NULL) {
771 		LIST_INSERT_HEAD(&sc->vxl_ftable[hash], fe, vxlfe_hash);
772 		goto out;
773 	}
774 
775 	do {
776 		dir = vxlan_ftable_addr_cmp(fe->vxlfe_mac, lfe->vxlfe_mac);
777 		if (dir == 0)
778 			return (EEXIST);
779 		if (dir > 0) {
780 			LIST_INSERT_BEFORE(lfe, fe, vxlfe_hash);
781 			goto out;
782 		} else if (LIST_NEXT(lfe, vxlfe_hash) == NULL) {
783 			LIST_INSERT_AFTER(lfe, fe, vxlfe_hash);
784 			goto out;
785 		} else
786 			lfe = LIST_NEXT(lfe, vxlfe_hash);
787 	} while (lfe != NULL);
788 
789 out:
790 	sc->vxl_ftable_cnt++;
791 
792 	return (0);
793 }
794 
795 static struct vxlan_ftable_entry *
796 vxlan_ftable_entry_lookup(struct vxlan_softc *sc, const uint8_t *mac)
797 {
798 	struct vxlan_ftable_entry *fe;
799 	uint32_t hash;
800 	int dir;
801 
802 	VXLAN_LOCK_ASSERT(sc);
803 	hash = VXLAN_SC_FTABLE_HASH(sc, mac);
804 
805 	LIST_FOREACH(fe, &sc->vxl_ftable[hash], vxlfe_hash) {
806 		dir = vxlan_ftable_addr_cmp(mac, fe->vxlfe_mac);
807 		if (dir == 0)
808 			return (fe);
809 		if (dir > 0)
810 			break;
811 	}
812 
813 	return (NULL);
814 }
815 
816 static void
817 vxlan_ftable_entry_dump(struct vxlan_ftable_entry *fe, struct sbuf *sb)
818 {
819 	char buf[64];
820 	const union vxlan_sockaddr *sa;
821 	const void *addr;
822 	int i, len, af, width;
823 
824 	sa = &fe->vxlfe_raddr;
825 	af = sa->sa.sa_family;
826 	len = sbuf_len(sb);
827 
828 	sbuf_printf(sb, "%c 0x%02X ", VXLAN_FE_IS_DYNAMIC(fe) ? 'D' : 'S',
829 	    fe->vxlfe_flags);
830 
831 	for (i = 0; i < ETHER_ADDR_LEN - 1; i++)
832 		sbuf_printf(sb, "%02X:", fe->vxlfe_mac[i]);
833 	sbuf_printf(sb, "%02X ", fe->vxlfe_mac[i]);
834 
835 	if (af == AF_INET) {
836 		addr = &sa->in4.sin_addr;
837 		width = INET_ADDRSTRLEN - 1;
838 	} else {
839 		addr = &sa->in6.sin6_addr;
840 		width = INET6_ADDRSTRLEN - 1;
841 	}
842 	inet_ntop(af, addr, buf, sizeof(buf));
843 	sbuf_printf(sb, "%*s ", width, buf);
844 
845 	sbuf_printf(sb, "%08jd", (intmax_t)fe->vxlfe_expire);
846 
847 	sbuf_putc(sb, '\n');
848 
849 	/* Truncate a partial line. */
850 	if (sbuf_error(sb) != 0)
851 		sbuf_setpos(sb, len);
852 }
853 
854 static struct vxlan_socket *
855 vxlan_socket_alloc(const union vxlan_sockaddr *sa)
856 {
857 	struct vxlan_socket *vso;
858 	int i;
859 
860 	vso = malloc(sizeof(*vso), M_VXLAN, M_WAITOK | M_ZERO);
861 	rm_init(&vso->vxlso_lock, "vxlansorm");
862 	refcount_init(&vso->vxlso_refcnt, 0);
863 	for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++)
864 		LIST_INIT(&vso->vxlso_vni_hash[i]);
865 	vso->vxlso_laddr = *sa;
866 
867 	return (vso);
868 }
869 
870 static void
871 vxlan_socket_destroy(struct vxlan_socket *vso)
872 {
873 	struct socket *so;
874 #ifdef INVARIANTS
875 	int i;
876 	struct vxlan_socket_mc_info *mc;
877 
878 	for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
879 		mc = &vso->vxlso_mc[i];
880 		KASSERT(mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC,
881 		    ("%s: socket %p mc[%d] still has address",
882 		     __func__, vso, i));
883 	}
884 
885 	for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) {
886 		KASSERT(LIST_EMPTY(&vso->vxlso_vni_hash[i]),
887 		    ("%s: socket %p vni_hash[%d] not empty",
888 		     __func__, vso, i));
889 	}
890 #endif
891 	so = vso->vxlso_sock;
892 	if (so != NULL) {
893 		vso->vxlso_sock = NULL;
894 		soclose(so);
895 	}
896 
897 	rm_destroy(&vso->vxlso_lock);
898 	free(vso, M_VXLAN);
899 }
900 
901 static void
902 vxlan_socket_release(struct vxlan_socket *vso)
903 {
904 	int destroy;
905 
906 	VXLAN_LIST_LOCK();
907 	destroy = VXLAN_SO_RELEASE(vso);
908 	if (destroy != 0)
909 		LIST_REMOVE(vso, vxlso_entry);
910 	VXLAN_LIST_UNLOCK();
911 
912 	if (destroy != 0)
913 		vxlan_socket_destroy(vso);
914 }
915 
916 static struct vxlan_socket *
917 vxlan_socket_lookup(union vxlan_sockaddr *vxlsa)
918 {
919 	struct vxlan_socket *vso;
920 
921 	VXLAN_LIST_LOCK();
922 	LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry) {
923 		if (vxlan_sockaddr_cmp(&vso->vxlso_laddr, &vxlsa->sa) == 0) {
924 			VXLAN_SO_ACQUIRE(vso);
925 			break;
926 		}
927 	}
928 	VXLAN_LIST_UNLOCK();
929 
930 	return (vso);
931 }
932 
933 static void
934 vxlan_socket_insert(struct vxlan_socket *vso)
935 {
936 
937 	VXLAN_LIST_LOCK();
938 	VXLAN_SO_ACQUIRE(vso);
939 	LIST_INSERT_HEAD(&vxlan_socket_list, vso, vxlso_entry);
940 	VXLAN_LIST_UNLOCK();
941 }
942 
943 static int
944 vxlan_socket_init(struct vxlan_socket *vso, struct ifnet *ifp)
945 {
946 	struct thread *td;
947 	int error;
948 
949 	td = curthread;
950 
951 	error = socreate(vso->vxlso_laddr.sa.sa_family, &vso->vxlso_sock,
952 	    SOCK_DGRAM, IPPROTO_UDP, td->td_ucred, td);
953 	if (error) {
954 		if_printf(ifp, "cannot create socket: %d\n", error);
955 		return (error);
956 	}
957 
958 	error = udp_set_kernel_tunneling(vso->vxlso_sock,
959 	    vxlan_rcv_udp_packet, NULL, vso);
960 	if (error) {
961 		if_printf(ifp, "cannot set tunneling function: %d\n", error);
962 		return (error);
963 	}
964 
965 	if (vxlan_reuse_port != 0) {
966 		struct sockopt sopt;
967 		int val = 1;
968 
969 		bzero(&sopt, sizeof(sopt));
970 		sopt.sopt_dir = SOPT_SET;
971 		sopt.sopt_level = IPPROTO_IP;
972 		sopt.sopt_name = SO_REUSEPORT;
973 		sopt.sopt_val = &val;
974 		sopt.sopt_valsize = sizeof(val);
975 		error = sosetopt(vso->vxlso_sock, &sopt);
976 		if (error) {
977 			if_printf(ifp,
978 			    "cannot set REUSEADDR socket opt: %d\n", error);
979 			return (error);
980 		}
981 	}
982 
983 	return (0);
984 }
985 
986 static int
987 vxlan_socket_bind(struct vxlan_socket *vso, struct ifnet *ifp)
988 {
989 	union vxlan_sockaddr laddr;
990 	struct thread *td;
991 	int error;
992 
993 	td = curthread;
994 	laddr = vso->vxlso_laddr;
995 
996 	error = sobind(vso->vxlso_sock, &laddr.sa, td);
997 	if (error) {
998 		if (error != EADDRINUSE)
999 			if_printf(ifp, "cannot bind socket: %d\n", error);
1000 		return (error);
1001 	}
1002 
1003 	return (0);
1004 }
1005 
1006 static int
1007 vxlan_socket_create(struct ifnet *ifp, int multicast,
1008     const union vxlan_sockaddr *saddr, struct vxlan_socket **vsop)
1009 {
1010 	union vxlan_sockaddr laddr;
1011 	struct vxlan_socket *vso;
1012 	int error;
1013 
1014 	laddr = *saddr;
1015 
1016 	/*
1017 	 * If this socket will be multicast, then only the local port
1018 	 * must be specified when binding.
1019 	 */
1020 	if (multicast != 0) {
1021 		if (VXLAN_SOCKADDR_IS_IPV4(&laddr))
1022 			laddr.in4.sin_addr.s_addr = INADDR_ANY;
1023 #ifdef INET6
1024 		else
1025 			laddr.in6.sin6_addr = in6addr_any;
1026 #endif
1027 	}
1028 
1029 	vso = vxlan_socket_alloc(&laddr);
1030 	if (vso == NULL)
1031 		return (ENOMEM);
1032 
1033 	error = vxlan_socket_init(vso, ifp);
1034 	if (error)
1035 		goto fail;
1036 
1037 	error = vxlan_socket_bind(vso, ifp);
1038 	if (error)
1039 		goto fail;
1040 
1041 	/*
1042 	 * There is a small window between the bind completing and
1043 	 * inserting the socket, so that a concurrent create may fail.
1044 	 * Let's not worry about that for now.
1045 	 */
1046 	vxlan_socket_insert(vso);
1047 	*vsop = vso;
1048 
1049 	return (0);
1050 
1051 fail:
1052 	vxlan_socket_destroy(vso);
1053 
1054 	return (error);
1055 }
1056 
1057 static void
1058 vxlan_socket_ifdetach(struct vxlan_socket *vso, struct ifnet *ifp,
1059     struct vxlan_softc_head *list)
1060 {
1061 	struct rm_priotracker tracker;
1062 	struct vxlan_softc *sc;
1063 	int i;
1064 
1065 	VXLAN_SO_RLOCK(vso, &tracker);
1066 	for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) {
1067 		LIST_FOREACH(sc, &vso->vxlso_vni_hash[i], vxl_entry)
1068 			vxlan_ifdetach(sc, ifp, list);
1069 	}
1070 	VXLAN_SO_RUNLOCK(vso, &tracker);
1071 }
1072 
1073 static struct vxlan_socket *
1074 vxlan_socket_mc_lookup(const union vxlan_sockaddr *vxlsa)
1075 {
1076 	union vxlan_sockaddr laddr;
1077 	struct vxlan_socket *vso;
1078 
1079 	laddr = *vxlsa;
1080 
1081 	if (VXLAN_SOCKADDR_IS_IPV4(&laddr))
1082 		laddr.in4.sin_addr.s_addr = INADDR_ANY;
1083 #ifdef INET6
1084 	else
1085 		laddr.in6.sin6_addr = in6addr_any;
1086 #endif
1087 
1088 	vso = vxlan_socket_lookup(&laddr);
1089 
1090 	return (vso);
1091 }
1092 
1093 static int
1094 vxlan_sockaddr_mc_info_match(const struct vxlan_socket_mc_info *mc,
1095     const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
1096     int ifidx)
1097 {
1098 
1099 	if (!vxlan_sockaddr_in_any(local) &&
1100 	    !vxlan_sockaddr_in_equal(&mc->vxlsomc_saddr, &local->sa))
1101 		return (0);
1102 	if (!vxlan_sockaddr_in_equal(&mc->vxlsomc_gaddr, &group->sa))
1103 		return (0);
1104 	if (ifidx != 0 && ifidx != mc->vxlsomc_ifidx)
1105 		return (0);
1106 
1107 	return (1);
1108 }
1109 
1110 static int
1111 vxlan_socket_mc_join_group(struct vxlan_socket *vso,
1112     const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
1113     int *ifidx, union vxlan_sockaddr *source)
1114 {
1115 	struct sockopt sopt;
1116 	int error;
1117 
1118 	*source = *local;
1119 
1120 	if (VXLAN_SOCKADDR_IS_IPV4(group)) {
1121 		struct ip_mreq mreq;
1122 
1123 		mreq.imr_multiaddr = group->in4.sin_addr;
1124 		mreq.imr_interface = local->in4.sin_addr;
1125 
1126 		bzero(&sopt, sizeof(sopt));
1127 		sopt.sopt_dir = SOPT_SET;
1128 		sopt.sopt_level = IPPROTO_IP;
1129 		sopt.sopt_name = IP_ADD_MEMBERSHIP;
1130 		sopt.sopt_val = &mreq;
1131 		sopt.sopt_valsize = sizeof(mreq);
1132 		error = sosetopt(vso->vxlso_sock, &sopt);
1133 		if (error)
1134 			return (error);
1135 
1136 		/*
1137 		 * BMV: Ideally, there would be a formal way for us to get
1138 		 * the local interface that was selected based on the
1139 		 * imr_interface address. We could then update *ifidx so
1140 		 * vxlan_sockaddr_mc_info_match() would return a match for
1141 		 * later creates that explicitly set the multicast interface.
1142 		 *
1143 		 * If we really need to, we can of course look in the INP's
1144 		 * membership list:
1145 		 *     sotoinpcb(vso->vxlso_sock)->inp_moptions->
1146 		 *         imo_head[]->imf_inm->inm_ifp
1147 		 * similarly to imo_match_group().
1148 		 */
1149 		source->in4.sin_addr = local->in4.sin_addr;
1150 
1151 	} else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
1152 		struct ipv6_mreq mreq;
1153 
1154 		mreq.ipv6mr_multiaddr = group->in6.sin6_addr;
1155 		mreq.ipv6mr_interface = *ifidx;
1156 
1157 		bzero(&sopt, sizeof(sopt));
1158 		sopt.sopt_dir = SOPT_SET;
1159 		sopt.sopt_level = IPPROTO_IPV6;
1160 		sopt.sopt_name = IPV6_JOIN_GROUP;
1161 		sopt.sopt_val = &mreq;
1162 		sopt.sopt_valsize = sizeof(mreq);
1163 		error = sosetopt(vso->vxlso_sock, &sopt);
1164 		if (error)
1165 			return (error);
1166 
1167 		/*
1168 		 * BMV: As with IPv4, we would really like to know what
1169 		 * interface in6p_lookup_mcast_ifp() selected.
1170 		 */
1171 	} else
1172 		error = EAFNOSUPPORT;
1173 
1174 	return (error);
1175 }
1176 
1177 static int
1178 vxlan_socket_mc_leave_group(struct vxlan_socket *vso,
1179     const union vxlan_sockaddr *group, const union vxlan_sockaddr *source,
1180     int ifidx)
1181 {
1182 	struct sockopt sopt;
1183 	int error;
1184 
1185 	bzero(&sopt, sizeof(sopt));
1186 	sopt.sopt_dir = SOPT_SET;
1187 
1188 	if (VXLAN_SOCKADDR_IS_IPV4(group)) {
1189 		struct ip_mreq mreq;
1190 
1191 		mreq.imr_multiaddr = group->in4.sin_addr;
1192 		mreq.imr_interface = source->in4.sin_addr;
1193 
1194 		sopt.sopt_level = IPPROTO_IP;
1195 		sopt.sopt_name = IP_DROP_MEMBERSHIP;
1196 		sopt.sopt_val = &mreq;
1197 		sopt.sopt_valsize = sizeof(mreq);
1198 		error = sosetopt(vso->vxlso_sock, &sopt);
1199 
1200 	} else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
1201 		struct ipv6_mreq mreq;
1202 
1203 		mreq.ipv6mr_multiaddr = group->in6.sin6_addr;
1204 		mreq.ipv6mr_interface = ifidx;
1205 
1206 		sopt.sopt_level = IPPROTO_IPV6;
1207 		sopt.sopt_name = IPV6_LEAVE_GROUP;
1208 		sopt.sopt_val = &mreq;
1209 		sopt.sopt_valsize = sizeof(mreq);
1210 		error = sosetopt(vso->vxlso_sock, &sopt);
1211 
1212 	} else
1213 		error = EAFNOSUPPORT;
1214 
1215 	return (error);
1216 }
1217 
1218 static int
1219 vxlan_socket_mc_add_group(struct vxlan_socket *vso,
1220     const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
1221     int ifidx, int *idx)
1222 {
1223 	union vxlan_sockaddr source;
1224 	struct vxlan_socket_mc_info *mc;
1225 	int i, empty, error;
1226 
1227 	/*
1228 	 * Within a socket, the same multicast group may be used by multiple
1229 	 * interfaces, each with a different network identifier. But a socket
1230 	 * may only join a multicast group once, so keep track of the users
1231 	 * here.
1232 	 */
1233 
1234 	VXLAN_SO_WLOCK(vso);
1235 	for (empty = 0, i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
1236 		mc = &vso->vxlso_mc[i];
1237 
1238 		if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) {
1239 			empty++;
1240 			continue;
1241 		}
1242 
1243 		if (vxlan_sockaddr_mc_info_match(mc, group, local, ifidx))
1244 			goto out;
1245 	}
1246 	VXLAN_SO_WUNLOCK(vso);
1247 
1248 	if (empty == 0)
1249 		return (ENOSPC);
1250 
1251 	error = vxlan_socket_mc_join_group(vso, group, local, &ifidx, &source);
1252 	if (error)
1253 		return (error);
1254 
1255 	VXLAN_SO_WLOCK(vso);
1256 	for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
1257 		mc = &vso->vxlso_mc[i];
1258 
1259 		if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) {
1260 			vxlan_sockaddr_copy(&mc->vxlsomc_gaddr, &group->sa);
1261 			vxlan_sockaddr_copy(&mc->vxlsomc_saddr, &source.sa);
1262 			mc->vxlsomc_ifidx = ifidx;
1263 			goto out;
1264 		}
1265 	}
1266 	VXLAN_SO_WUNLOCK(vso);
1267 
1268 	error = vxlan_socket_mc_leave_group(vso, group, &source, ifidx);
1269 	MPASS(error == 0);
1270 
1271 	return (ENOSPC);
1272 
1273 out:
1274 	mc->vxlsomc_users++;
1275 	VXLAN_SO_WUNLOCK(vso);
1276 
1277 	*idx = i;
1278 
1279 	return (0);
1280 }
1281 
1282 static void
1283 vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *vso, int idx)
1284 {
1285 	union vxlan_sockaddr group, source;
1286 	struct vxlan_socket_mc_info *mc;
1287 	int ifidx, leave;
1288 
1289 	KASSERT(idx >= 0 && idx < VXLAN_SO_MC_MAX_GROUPS,
1290 	    ("%s: vso %p idx %d out of bounds", __func__, vso, idx));
1291 
1292 	leave = 0;
1293 	mc = &vso->vxlso_mc[idx];
1294 
1295 	VXLAN_SO_WLOCK(vso);
1296 	mc->vxlsomc_users--;
1297 	if (mc->vxlsomc_users == 0) {
1298 		group = mc->vxlsomc_gaddr;
1299 		source = mc->vxlsomc_saddr;
1300 		ifidx = mc->vxlsomc_ifidx;
1301 		bzero(mc, sizeof(*mc));
1302 		leave = 1;
1303 	}
1304 	VXLAN_SO_WUNLOCK(vso);
1305 
1306 	if (leave != 0) {
1307 		/*
1308 		 * Our socket's membership in this group may have already
1309 		 * been removed if we joined through an interface that's
1310 		 * been detached.
1311 		 */
1312 		vxlan_socket_mc_leave_group(vso, &group, &source, ifidx);
1313 	}
1314 }
1315 
1316 static struct vxlan_softc *
1317 vxlan_socket_lookup_softc_locked(struct vxlan_socket *vso, uint32_t vni)
1318 {
1319 	struct vxlan_softc *sc;
1320 	uint32_t hash;
1321 
1322 	VXLAN_SO_LOCK_ASSERT(vso);
1323 	hash = VXLAN_SO_VNI_HASH(vni);
1324 
1325 	LIST_FOREACH(sc, &vso->vxlso_vni_hash[hash], vxl_entry) {
1326 		if (sc->vxl_vni == vni) {
1327 			VXLAN_ACQUIRE(sc);
1328 			break;
1329 		}
1330 	}
1331 
1332 	return (sc);
1333 }
1334 
1335 static struct vxlan_softc *
1336 vxlan_socket_lookup_softc(struct vxlan_socket *vso, uint32_t vni)
1337 {
1338 	struct rm_priotracker tracker;
1339 	struct vxlan_softc *sc;
1340 
1341 	VXLAN_SO_RLOCK(vso, &tracker);
1342 	sc = vxlan_socket_lookup_softc_locked(vso, vni);
1343 	VXLAN_SO_RUNLOCK(vso, &tracker);
1344 
1345 	return (sc);
1346 }
1347 
1348 static int
1349 vxlan_socket_insert_softc(struct vxlan_socket *vso, struct vxlan_softc *sc)
1350 {
1351 	struct vxlan_softc *tsc;
1352 	uint32_t vni, hash;
1353 
1354 	vni = sc->vxl_vni;
1355 	hash = VXLAN_SO_VNI_HASH(vni);
1356 
1357 	VXLAN_SO_WLOCK(vso);
1358 	tsc = vxlan_socket_lookup_softc_locked(vso, vni);
1359 	if (tsc != NULL) {
1360 		VXLAN_SO_WUNLOCK(vso);
1361 		vxlan_release(tsc);
1362 		return (EEXIST);
1363 	}
1364 
1365 	VXLAN_ACQUIRE(sc);
1366 	LIST_INSERT_HEAD(&vso->vxlso_vni_hash[hash], sc, vxl_entry);
1367 	VXLAN_SO_WUNLOCK(vso);
1368 
1369 	return (0);
1370 }
1371 
1372 static void
1373 vxlan_socket_remove_softc(struct vxlan_socket *vso, struct vxlan_softc *sc)
1374 {
1375 
1376 	VXLAN_SO_WLOCK(vso);
1377 	LIST_REMOVE(sc, vxl_entry);
1378 	VXLAN_SO_WUNLOCK(vso);
1379 
1380 	vxlan_release(sc);
1381 }
1382 
1383 static struct ifnet *
1384 vxlan_multicast_if_ref(struct vxlan_softc *sc, int ipv4)
1385 {
1386 	struct ifnet *ifp;
1387 
1388 	VXLAN_LOCK_ASSERT(sc);
1389 
1390 	if (ipv4 && sc->vxl_im4o != NULL)
1391 		ifp = sc->vxl_im4o->imo_multicast_ifp;
1392 	else if (!ipv4 && sc->vxl_im6o != NULL)
1393 		ifp = sc->vxl_im6o->im6o_multicast_ifp;
1394 	else
1395 		ifp = NULL;
1396 
1397 	if (ifp != NULL)
1398 		if_ref(ifp);
1399 
1400 	return (ifp);
1401 }
1402 
1403 static void
1404 vxlan_free_multicast(struct vxlan_softc *sc)
1405 {
1406 
1407 	if (sc->vxl_mc_ifp != NULL) {
1408 		if_rele(sc->vxl_mc_ifp);
1409 		sc->vxl_mc_ifp = NULL;
1410 		sc->vxl_mc_ifindex = 0;
1411 	}
1412 
1413 	if (sc->vxl_im4o != NULL) {
1414 		free(sc->vxl_im4o, M_VXLAN);
1415 		sc->vxl_im4o = NULL;
1416 	}
1417 
1418 	if (sc->vxl_im6o != NULL) {
1419 		free(sc->vxl_im6o, M_VXLAN);
1420 		sc->vxl_im6o = NULL;
1421 	}
1422 }
1423 
1424 static int
1425 vxlan_setup_multicast_interface(struct vxlan_softc *sc)
1426 {
1427 	struct ifnet *ifp;
1428 
1429 	ifp = ifunit_ref(sc->vxl_mc_ifname);
1430 	if (ifp == NULL) {
1431 		if_printf(sc->vxl_ifp, "multicast interface %s does "
1432 		    "not exist\n", sc->vxl_mc_ifname);
1433 		return (ENOENT);
1434 	}
1435 
1436 	if ((ifp->if_flags & IFF_MULTICAST) == 0) {
1437 		if_printf(sc->vxl_ifp, "interface %s does not support "
1438 		     "multicast\n", sc->vxl_mc_ifname);
1439 		if_rele(ifp);
1440 		return (ENOTSUP);
1441 	}
1442 
1443 	sc->vxl_mc_ifp = ifp;
1444 	sc->vxl_mc_ifindex = ifp->if_index;
1445 
1446 	return (0);
1447 }
1448 
1449 static int
1450 vxlan_setup_multicast(struct vxlan_softc *sc)
1451 {
1452 	const union vxlan_sockaddr *group;
1453 	int error;
1454 
1455 	group = &sc->vxl_dst_addr;
1456 	error = 0;
1457 
1458 	if (sc->vxl_mc_ifname[0] != '\0') {
1459 		error = vxlan_setup_multicast_interface(sc);
1460 		if (error)
1461 			return (error);
1462 	}
1463 
1464 	/*
1465 	 * Initialize an multicast options structure that is sufficiently
1466 	 * populated for use in the respective IP output routine. This
1467 	 * structure is typically stored in the socket, but our sockets
1468 	 * may be shared among multiple interfaces.
1469 	 */
1470 	if (VXLAN_SOCKADDR_IS_IPV4(group)) {
1471 		sc->vxl_im4o = malloc(sizeof(struct ip_moptions), M_VXLAN,
1472 		    M_ZERO | M_WAITOK);
1473 		sc->vxl_im4o->imo_multicast_ifp = sc->vxl_mc_ifp;
1474 		sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl;
1475 		sc->vxl_im4o->imo_multicast_vif = -1;
1476 	} else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
1477 		sc->vxl_im6o = malloc(sizeof(struct ip6_moptions), M_VXLAN,
1478 		    M_ZERO | M_WAITOK);
1479 		sc->vxl_im6o->im6o_multicast_ifp = sc->vxl_mc_ifp;
1480 		sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl;
1481 	}
1482 
1483 	return (error);
1484 }
1485 
1486 static int
1487 vxlan_setup_socket(struct vxlan_softc *sc)
1488 {
1489 	struct vxlan_socket *vso;
1490 	struct ifnet *ifp;
1491 	union vxlan_sockaddr *saddr, *daddr;
1492 	int multicast, error;
1493 
1494 	vso = NULL;
1495 	ifp = sc->vxl_ifp;
1496 	saddr = &sc->vxl_src_addr;
1497 	daddr = &sc->vxl_dst_addr;
1498 
1499 	multicast = vxlan_sockaddr_in_multicast(daddr);
1500 	MPASS(multicast != -1);
1501 	sc->vxl_vso_mc_index = -1;
1502 
1503 	/*
1504 	 * Try to create the socket. If that fails, attempt to use an
1505 	 * existing socket.
1506 	 */
1507 	error = vxlan_socket_create(ifp, multicast, saddr, &vso);
1508 	if (error) {
1509 		if (multicast != 0)
1510 			vso = vxlan_socket_mc_lookup(saddr);
1511 		else
1512 			vso = vxlan_socket_lookup(saddr);
1513 
1514 		if (vso == NULL) {
1515 			if_printf(ifp, "cannot create socket (error: %d), "
1516 			    "and no existing socket found\n", error);
1517 			goto out;
1518 		}
1519 	}
1520 
1521 	if (multicast != 0) {
1522 		error = vxlan_setup_multicast(sc);
1523 		if (error)
1524 			goto out;
1525 
1526 		error = vxlan_socket_mc_add_group(vso, daddr, saddr,
1527 		    sc->vxl_mc_ifindex, &sc->vxl_vso_mc_index);
1528 		if (error)
1529 			goto out;
1530 	}
1531 
1532 	sc->vxl_sock = vso;
1533 	error = vxlan_socket_insert_softc(vso, sc);
1534 	if (error) {
1535 		sc->vxl_sock = NULL;
1536 		if_printf(ifp, "network identifier %d already exists in "
1537 		    "this socket\n", sc->vxl_vni);
1538 		goto out;
1539 	}
1540 
1541 	return (0);
1542 
1543 out:
1544 	if (vso != NULL) {
1545 		if (sc->vxl_vso_mc_index != -1) {
1546 			vxlan_socket_mc_release_group_by_idx(vso,
1547 			    sc->vxl_vso_mc_index);
1548 			sc->vxl_vso_mc_index = -1;
1549 		}
1550 		if (multicast != 0)
1551 			vxlan_free_multicast(sc);
1552 		vxlan_socket_release(vso);
1553 	}
1554 
1555 	return (error);
1556 }
1557 
1558 static void
1559 vxlan_setup_interface(struct vxlan_softc *sc)
1560 {
1561 	struct ifnet *ifp;
1562 
1563 	ifp = sc->vxl_ifp;
1564 	ifp->if_hdrlen = ETHER_HDR_LEN + sizeof(struct vxlanudphdr);
1565 
1566 	if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr) != 0)
1567 		ifp->if_hdrlen += sizeof(struct ip);
1568 	else if (VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_dst_addr) != 0)
1569 		ifp->if_hdrlen += sizeof(struct ip6_hdr);
1570 }
1571 
1572 static int
1573 vxlan_valid_init_config(struct vxlan_softc *sc)
1574 {
1575 	const char *reason;
1576 
1577 	if (vxlan_check_vni(sc->vxl_vni) != 0) {
1578 		reason = "invalid virtual network identifier specified";
1579 		goto fail;
1580 	}
1581 
1582 	if (vxlan_sockaddr_supported(&sc->vxl_src_addr, 1) == 0) {
1583 		reason = "source address type is not supported";
1584 		goto fail;
1585 	}
1586 
1587 	if (vxlan_sockaddr_supported(&sc->vxl_dst_addr, 0) == 0) {
1588 		reason = "destination address type is not supported";
1589 		goto fail;
1590 	}
1591 
1592 	if (vxlan_sockaddr_in_any(&sc->vxl_dst_addr) != 0) {
1593 		reason = "no valid destination address specified";
1594 		goto fail;
1595 	}
1596 
1597 	if (vxlan_sockaddr_in_multicast(&sc->vxl_dst_addr) == 0 &&
1598 	    sc->vxl_mc_ifname[0] != '\0') {
1599 		reason = "can only specify interface with a group address";
1600 		goto fail;
1601 	}
1602 
1603 	if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) {
1604 		if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_src_addr) ^
1605 		    VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr)) {
1606 			reason = "source and destination address must both "
1607 			    "be either IPv4 or IPv6";
1608 			goto fail;
1609 		}
1610 	}
1611 
1612 	if (sc->vxl_src_addr.in4.sin_port == 0) {
1613 		reason = "local port not specified";
1614 		goto fail;
1615 	}
1616 
1617 	if (sc->vxl_dst_addr.in4.sin_port == 0) {
1618 		reason = "remote port not specified";
1619 		goto fail;
1620 	}
1621 
1622 	return (0);
1623 
1624 fail:
1625 	if_printf(sc->vxl_ifp, "cannot initialize interface: %s\n", reason);
1626 	return (EINVAL);
1627 }
1628 
1629 static void
1630 vxlan_init_wait(struct vxlan_softc *sc)
1631 {
1632 
1633 	VXLAN_LOCK_WASSERT(sc);
1634 	while (sc->vxl_flags & VXLAN_FLAG_INIT)
1635 		rm_sleep(sc, &sc->vxl_lock, 0, "vxlint", hz);
1636 }
1637 
1638 static void
1639 vxlan_init_complete(struct vxlan_softc *sc)
1640 {
1641 
1642 	VXLAN_WLOCK(sc);
1643 	sc->vxl_flags &= ~VXLAN_FLAG_INIT;
1644 	wakeup(sc);
1645 	VXLAN_WUNLOCK(sc);
1646 }
1647 
1648 static void
1649 vxlan_init(void *xsc)
1650 {
1651 	static const uint8_t empty_mac[ETHER_ADDR_LEN];
1652 	struct vxlan_softc *sc;
1653 	struct ifnet *ifp;
1654 
1655 	sc = xsc;
1656 	ifp = sc->vxl_ifp;
1657 
1658 	VXLAN_WLOCK(sc);
1659 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1660 		VXLAN_WUNLOCK(sc);
1661 		return;
1662 	}
1663 	sc->vxl_flags |= VXLAN_FLAG_INIT;
1664 	VXLAN_WUNLOCK(sc);
1665 
1666 	if (vxlan_valid_init_config(sc) != 0)
1667 		goto out;
1668 
1669 	vxlan_setup_interface(sc);
1670 
1671 	if (vxlan_setup_socket(sc) != 0)
1672 		goto out;
1673 
1674 	/* Initialize the default forwarding entry. */
1675 	vxlan_ftable_entry_init(sc, &sc->vxl_default_fe, empty_mac,
1676 	    &sc->vxl_dst_addr.sa, VXLAN_FE_FLAG_STATIC);
1677 
1678 	VXLAN_WLOCK(sc);
1679 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1680 	callout_reset(&sc->vxl_callout, vxlan_ftable_prune_period * hz,
1681 	    vxlan_timer, sc);
1682 	VXLAN_WUNLOCK(sc);
1683 
1684 	if_link_state_change(ifp, LINK_STATE_UP);
1685 out:
1686 	vxlan_init_complete(sc);
1687 }
1688 
1689 static void
1690 vxlan_release(struct vxlan_softc *sc)
1691 {
1692 
1693 	/*
1694 	 * The softc may be destroyed as soon as we release our reference,
1695 	 * so we cannot serialize the wakeup with the softc lock. We use a
1696 	 * timeout in our sleeps so a missed wakeup is unfortunate but not
1697 	 * fatal.
1698 	 */
1699 	if (VXLAN_RELEASE(sc) != 0)
1700 		wakeup(sc);
1701 }
1702 
1703 static void
1704 vxlan_teardown_wait(struct vxlan_softc *sc)
1705 {
1706 
1707 	VXLAN_LOCK_WASSERT(sc);
1708 	while (sc->vxl_flags & VXLAN_FLAG_TEARDOWN)
1709 		rm_sleep(sc, &sc->vxl_lock, 0, "vxltrn", hz);
1710 }
1711 
1712 static void
1713 vxlan_teardown_complete(struct vxlan_softc *sc)
1714 {
1715 
1716 	VXLAN_WLOCK(sc);
1717 	sc->vxl_flags &= ~VXLAN_FLAG_TEARDOWN;
1718 	wakeup(sc);
1719 	VXLAN_WUNLOCK(sc);
1720 }
1721 
1722 static void
1723 vxlan_teardown_locked(struct vxlan_softc *sc)
1724 {
1725 	struct ifnet *ifp;
1726 	struct vxlan_socket *vso;
1727 
1728 	ifp = sc->vxl_ifp;
1729 
1730 	VXLAN_LOCK_WASSERT(sc);
1731 	MPASS(sc->vxl_flags & VXLAN_FLAG_TEARDOWN);
1732 
1733 	ifp->if_flags &= ~IFF_UP;
1734 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1735 	callout_stop(&sc->vxl_callout);
1736 	vso = sc->vxl_sock;
1737 	sc->vxl_sock = NULL;
1738 
1739 	VXLAN_WUNLOCK(sc);
1740 	if_link_state_change(ifp, LINK_STATE_DOWN);
1741 
1742 	if (vso != NULL) {
1743 		vxlan_socket_remove_softc(vso, sc);
1744 
1745 		if (sc->vxl_vso_mc_index != -1) {
1746 			vxlan_socket_mc_release_group_by_idx(vso,
1747 			    sc->vxl_vso_mc_index);
1748 			sc->vxl_vso_mc_index = -1;
1749 		}
1750 	}
1751 
1752 	VXLAN_WLOCK(sc);
1753 	while (sc->vxl_refcnt != 0)
1754 		rm_sleep(sc, &sc->vxl_lock, 0, "vxldrn", hz);
1755 	VXLAN_WUNLOCK(sc);
1756 
1757 	callout_drain(&sc->vxl_callout);
1758 
1759 	vxlan_free_multicast(sc);
1760 	if (vso != NULL)
1761 		vxlan_socket_release(vso);
1762 
1763 	vxlan_teardown_complete(sc);
1764 }
1765 
1766 static void
1767 vxlan_teardown(struct vxlan_softc *sc)
1768 {
1769 
1770 	VXLAN_WLOCK(sc);
1771 	if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) {
1772 		vxlan_teardown_wait(sc);
1773 		VXLAN_WUNLOCK(sc);
1774 		return;
1775 	}
1776 
1777 	sc->vxl_flags |= VXLAN_FLAG_TEARDOWN;
1778 	vxlan_teardown_locked(sc);
1779 }
1780 
1781 static void
1782 vxlan_ifdetach(struct vxlan_softc *sc, struct ifnet *ifp,
1783     struct vxlan_softc_head *list)
1784 {
1785 
1786 	VXLAN_WLOCK(sc);
1787 
1788 	if (sc->vxl_mc_ifp != ifp)
1789 		goto out;
1790 	if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN)
1791 		goto out;
1792 
1793 	sc->vxl_flags |= VXLAN_FLAG_TEARDOWN;
1794 	LIST_INSERT_HEAD(list, sc, vxl_ifdetach_list);
1795 
1796 out:
1797 	VXLAN_WUNLOCK(sc);
1798 }
1799 
1800 static void
1801 vxlan_timer(void *xsc)
1802 {
1803 	struct vxlan_softc *sc;
1804 
1805 	sc = xsc;
1806 	VXLAN_LOCK_WASSERT(sc);
1807 
1808 	vxlan_ftable_expire(sc);
1809 	callout_schedule(&sc->vxl_callout, vxlan_ftable_prune_period * hz);
1810 }
1811 
1812 static int
1813 vxlan_ioctl_ifflags(struct vxlan_softc *sc)
1814 {
1815 	struct ifnet *ifp;
1816 
1817 	ifp = sc->vxl_ifp;
1818 
1819 	if (ifp->if_flags & IFF_UP) {
1820 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1821 			vxlan_init(sc);
1822 	} else {
1823 		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1824 			vxlan_teardown(sc);
1825 	}
1826 
1827 	return (0);
1828 }
1829 
1830 static int
1831 vxlan_ctrl_get_config(struct vxlan_softc *sc, void *arg)
1832 {
1833 	struct rm_priotracker tracker;
1834 	struct ifvxlancfg *cfg;
1835 
1836 	cfg = arg;
1837 	bzero(cfg, sizeof(*cfg));
1838 
1839 	VXLAN_RLOCK(sc, &tracker);
1840 	cfg->vxlc_vni = sc->vxl_vni;
1841 	memcpy(&cfg->vxlc_local_sa, &sc->vxl_src_addr,
1842 	    sizeof(union vxlan_sockaddr));
1843 	memcpy(&cfg->vxlc_remote_sa, &sc->vxl_dst_addr,
1844 	    sizeof(union vxlan_sockaddr));
1845 	cfg->vxlc_mc_ifindex = sc->vxl_mc_ifindex;
1846 	cfg->vxlc_ftable_cnt = sc->vxl_ftable_cnt;
1847 	cfg->vxlc_ftable_max = sc->vxl_ftable_max;
1848 	cfg->vxlc_ftable_timeout = sc->vxl_ftable_timeout;
1849 	cfg->vxlc_port_min = sc->vxl_min_port;
1850 	cfg->vxlc_port_max = sc->vxl_max_port;
1851 	cfg->vxlc_learn = (sc->vxl_flags & VXLAN_FLAG_LEARN) != 0;
1852 	cfg->vxlc_ttl = sc->vxl_ttl;
1853 	VXLAN_RUNLOCK(sc, &tracker);
1854 
1855 #ifdef INET6
1856 	if (VXLAN_SOCKADDR_IS_IPV6(&cfg->vxlc_local_sa))
1857 		sa6_recoverscope(&cfg->vxlc_local_sa.in6);
1858 	if (VXLAN_SOCKADDR_IS_IPV6(&cfg->vxlc_remote_sa))
1859 		sa6_recoverscope(&cfg->vxlc_remote_sa.in6);
1860 #endif
1861 
1862 	return (0);
1863 }
1864 
1865 static int
1866 vxlan_ctrl_set_vni(struct vxlan_softc *sc, void *arg)
1867 {
1868 	struct ifvxlancmd *cmd;
1869 	int error;
1870 
1871 	cmd = arg;
1872 
1873 	if (vxlan_check_vni(cmd->vxlcmd_vni) != 0)
1874 		return (EINVAL);
1875 
1876 	VXLAN_WLOCK(sc);
1877 	if (vxlan_can_change_config(sc)) {
1878 		sc->vxl_vni = cmd->vxlcmd_vni;
1879 		error = 0;
1880 	} else
1881 		error = EBUSY;
1882 	VXLAN_WUNLOCK(sc);
1883 
1884 	return (error);
1885 }
1886 
1887 static int
1888 vxlan_ctrl_set_local_addr(struct vxlan_softc *sc, void *arg)
1889 {
1890 	struct ifvxlancmd *cmd;
1891 	union vxlan_sockaddr *vxlsa;
1892 	int error;
1893 
1894 	cmd = arg;
1895 	vxlsa = &cmd->vxlcmd_sa;
1896 
1897 	if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa))
1898 		return (EINVAL);
1899 	if (vxlan_sockaddr_in_multicast(vxlsa) != 0)
1900 		return (EINVAL);
1901 	if (VXLAN_SOCKADDR_IS_IPV6(vxlsa)) {
1902 		error = vxlan_sockaddr_in6_embedscope(vxlsa);
1903 		if (error)
1904 			return (error);
1905 	}
1906 
1907 	VXLAN_WLOCK(sc);
1908 	if (vxlan_can_change_config(sc)) {
1909 		vxlan_sockaddr_in_copy(&sc->vxl_src_addr, &vxlsa->sa);
1910 		error = 0;
1911 	} else
1912 		error = EBUSY;
1913 	VXLAN_WUNLOCK(sc);
1914 
1915 	return (error);
1916 }
1917 
1918 static int
1919 vxlan_ctrl_set_remote_addr(struct vxlan_softc *sc, void *arg)
1920 {
1921 	struct ifvxlancmd *cmd;
1922 	union vxlan_sockaddr *vxlsa;
1923 	int error;
1924 
1925 	cmd = arg;
1926 	vxlsa = &cmd->vxlcmd_sa;
1927 
1928 	if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa))
1929 		return (EINVAL);
1930 	if (VXLAN_SOCKADDR_IS_IPV6(vxlsa)) {
1931 		error = vxlan_sockaddr_in6_embedscope(vxlsa);
1932 		if (error)
1933 			return (error);
1934 	}
1935 
1936 	VXLAN_WLOCK(sc);
1937 	if (vxlan_can_change_config(sc)) {
1938 		vxlan_sockaddr_in_copy(&sc->vxl_dst_addr, &vxlsa->sa);
1939 		error = 0;
1940 	} else
1941 		error = EBUSY;
1942 	VXLAN_WUNLOCK(sc);
1943 
1944 	return (error);
1945 }
1946 
1947 static int
1948 vxlan_ctrl_set_local_port(struct vxlan_softc *sc, void *arg)
1949 {
1950 	struct ifvxlancmd *cmd;
1951 	int error;
1952 
1953 	cmd = arg;
1954 
1955 	if (cmd->vxlcmd_port == 0)
1956 		return (EINVAL);
1957 
1958 	VXLAN_WLOCK(sc);
1959 	if (vxlan_can_change_config(sc)) {
1960 		sc->vxl_src_addr.in4.sin_port = htons(cmd->vxlcmd_port);
1961 		error = 0;
1962 	} else
1963 		error = EBUSY;
1964 	VXLAN_WUNLOCK(sc);
1965 
1966 	return (error);
1967 }
1968 
1969 static int
1970 vxlan_ctrl_set_remote_port(struct vxlan_softc *sc, void *arg)
1971 {
1972 	struct ifvxlancmd *cmd;
1973 	int error;
1974 
1975 	cmd = arg;
1976 
1977 	if (cmd->vxlcmd_port == 0)
1978 		return (EINVAL);
1979 
1980 	VXLAN_WLOCK(sc);
1981 	if (vxlan_can_change_config(sc)) {
1982 		sc->vxl_dst_addr.in4.sin_port = htons(cmd->vxlcmd_port);
1983 		error = 0;
1984 	} else
1985 		error = EBUSY;
1986 	VXLAN_WUNLOCK(sc);
1987 
1988 	return (error);
1989 }
1990 
1991 static int
1992 vxlan_ctrl_set_port_range(struct vxlan_softc *sc, void *arg)
1993 {
1994 	struct ifvxlancmd *cmd;
1995 	uint16_t min, max;
1996 	int error;
1997 
1998 	cmd = arg;
1999 	min = cmd->vxlcmd_port_min;
2000 	max = cmd->vxlcmd_port_max;
2001 
2002 	if (max < min)
2003 		return (EINVAL);
2004 
2005 	VXLAN_WLOCK(sc);
2006 	if (vxlan_can_change_config(sc)) {
2007 		sc->vxl_min_port = min;
2008 		sc->vxl_max_port = max;
2009 		error = 0;
2010 	} else
2011 		error = EBUSY;
2012 	VXLAN_WUNLOCK(sc);
2013 
2014 	return (error);
2015 }
2016 
2017 static int
2018 vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *sc, void *arg)
2019 {
2020 	struct ifvxlancmd *cmd;
2021 	int error;
2022 
2023 	cmd = arg;
2024 
2025 	VXLAN_WLOCK(sc);
2026 	if (vxlan_check_ftable_timeout(cmd->vxlcmd_ftable_timeout) == 0) {
2027 		sc->vxl_ftable_timeout = cmd->vxlcmd_ftable_timeout;
2028 		error = 0;
2029 	} else
2030 		error = EINVAL;
2031 	VXLAN_WUNLOCK(sc);
2032 
2033 	return (error);
2034 }
2035 
2036 static int
2037 vxlan_ctrl_set_ftable_max(struct vxlan_softc *sc, void *arg)
2038 {
2039 	struct ifvxlancmd *cmd;
2040 	int error;
2041 
2042 	cmd = arg;
2043 
2044 	VXLAN_WLOCK(sc);
2045 	if (vxlan_check_ftable_max(cmd->vxlcmd_ftable_max) == 0) {
2046 		sc->vxl_ftable_max = cmd->vxlcmd_ftable_max;
2047 		error = 0;
2048 	} else
2049 		error = EINVAL;
2050 	VXLAN_WUNLOCK(sc);
2051 
2052 	return (error);
2053 }
2054 
2055 static int
2056 vxlan_ctrl_set_multicast_if(struct vxlan_softc * sc, void *arg)
2057 {
2058 	struct ifvxlancmd *cmd;
2059 	int error;
2060 
2061 	cmd = arg;
2062 
2063 	VXLAN_WLOCK(sc);
2064 	if (vxlan_can_change_config(sc)) {
2065 		strlcpy(sc->vxl_mc_ifname, cmd->vxlcmd_ifname, IFNAMSIZ);
2066 		error = 0;
2067 	} else
2068 		error = EBUSY;
2069 	VXLAN_WUNLOCK(sc);
2070 
2071 	return (error);
2072 }
2073 
2074 static int
2075 vxlan_ctrl_set_ttl(struct vxlan_softc *sc, void *arg)
2076 {
2077 	struct ifvxlancmd *cmd;
2078 	int error;
2079 
2080 	cmd = arg;
2081 
2082 	VXLAN_WLOCK(sc);
2083 	if (vxlan_check_ttl(cmd->vxlcmd_ttl) == 0) {
2084 		sc->vxl_ttl = cmd->vxlcmd_ttl;
2085 		if (sc->vxl_im4o != NULL)
2086 			sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl;
2087 		if (sc->vxl_im6o != NULL)
2088 			sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl;
2089 		error = 0;
2090 	} else
2091 		error = EINVAL;
2092 	VXLAN_WUNLOCK(sc);
2093 
2094 	return (error);
2095 }
2096 
2097 static int
2098 vxlan_ctrl_set_learn(struct vxlan_softc *sc, void *arg)
2099 {
2100 	struct ifvxlancmd *cmd;
2101 
2102 	cmd = arg;
2103 
2104 	VXLAN_WLOCK(sc);
2105 	if (cmd->vxlcmd_flags & VXLAN_CMD_FLAG_LEARN)
2106 		sc->vxl_flags |= VXLAN_FLAG_LEARN;
2107 	else
2108 		sc->vxl_flags &= ~VXLAN_FLAG_LEARN;
2109 	VXLAN_WUNLOCK(sc);
2110 
2111 	return (0);
2112 }
2113 
2114 static int
2115 vxlan_ctrl_ftable_entry_add(struct vxlan_softc *sc, void *arg)
2116 {
2117 	union vxlan_sockaddr vxlsa;
2118 	struct ifvxlancmd *cmd;
2119 	struct vxlan_ftable_entry *fe;
2120 	int error;
2121 
2122 	cmd = arg;
2123 	vxlsa = cmd->vxlcmd_sa;
2124 
2125 	if (!VXLAN_SOCKADDR_IS_IPV46(&vxlsa))
2126 		return (EINVAL);
2127 	if (vxlan_sockaddr_in_any(&vxlsa) != 0)
2128 		return (EINVAL);
2129 	if (vxlan_sockaddr_in_multicast(&vxlsa) != 0)
2130 		return (EINVAL);
2131 	/* BMV: We could support both IPv4 and IPv6 later. */
2132 	if (vxlsa.sa.sa_family != sc->vxl_dst_addr.sa.sa_family)
2133 		return (EAFNOSUPPORT);
2134 
2135 	if (VXLAN_SOCKADDR_IS_IPV6(&vxlsa)) {
2136 		error = vxlan_sockaddr_in6_embedscope(&vxlsa);
2137 		if (error)
2138 			return (error);
2139 	}
2140 
2141 	fe = vxlan_ftable_entry_alloc();
2142 	if (fe == NULL)
2143 		return (ENOMEM);
2144 
2145 	if (vxlsa.in4.sin_port == 0)
2146 		vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port;
2147 
2148 	vxlan_ftable_entry_init(sc, fe, cmd->vxlcmd_mac, &vxlsa.sa,
2149 	    VXLAN_FE_FLAG_STATIC);
2150 
2151 	VXLAN_WLOCK(sc);
2152 	error = vxlan_ftable_entry_insert(sc, fe);
2153 	VXLAN_WUNLOCK(sc);
2154 
2155 	if (error)
2156 		vxlan_ftable_entry_free(fe);
2157 
2158 	return (error);
2159 }
2160 
2161 static int
2162 vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *sc, void *arg)
2163 {
2164 	struct ifvxlancmd *cmd;
2165 	struct vxlan_ftable_entry *fe;
2166 	int error;
2167 
2168 	cmd = arg;
2169 
2170 	VXLAN_WLOCK(sc);
2171 	fe = vxlan_ftable_entry_lookup(sc, cmd->vxlcmd_mac);
2172 	if (fe != NULL) {
2173 		vxlan_ftable_entry_destroy(sc, fe);
2174 		error = 0;
2175 	} else
2176 		error = ENOENT;
2177 	VXLAN_WUNLOCK(sc);
2178 
2179 	return (error);
2180 }
2181 
2182 static int
2183 vxlan_ctrl_flush(struct vxlan_softc *sc, void *arg)
2184 {
2185 	struct ifvxlancmd *cmd;
2186 	int all;
2187 
2188 	cmd = arg;
2189 	all = cmd->vxlcmd_flags & VXLAN_CMD_FLAG_FLUSH_ALL;
2190 
2191 	VXLAN_WLOCK(sc);
2192 	vxlan_ftable_flush(sc, all);
2193 	VXLAN_WUNLOCK(sc);
2194 
2195 	return (0);
2196 }
2197 
2198 static int
2199 vxlan_ioctl_drvspec(struct vxlan_softc *sc, struct ifdrv *ifd, int get)
2200 {
2201 	const struct vxlan_control *vc;
2202 	union {
2203 		struct ifvxlancfg	cfg;
2204 		struct ifvxlancmd	cmd;
2205 	} args;
2206 	int out, error;
2207 
2208 	if (ifd->ifd_cmd >= vxlan_control_table_size)
2209 		return (EINVAL);
2210 
2211 	bzero(&args, sizeof(args));
2212 	vc = &vxlan_control_table[ifd->ifd_cmd];
2213 	out = (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) != 0;
2214 
2215 	if ((get != 0 && out == 0) || (get == 0 && out != 0))
2216 		return (EINVAL);
2217 
2218 	if (vc->vxlc_flags & VXLAN_CTRL_FLAG_SUSER) {
2219 		error = priv_check(curthread, PRIV_NET_VXLAN);
2220 		if (error)
2221 			return (error);
2222 	}
2223 
2224 	if (ifd->ifd_len != vc->vxlc_argsize ||
2225 	    ifd->ifd_len > sizeof(args))
2226 		return (EINVAL);
2227 
2228 	if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYIN) {
2229 		error = copyin(ifd->ifd_data, &args, ifd->ifd_len);
2230 		if (error)
2231 			return (error);
2232 	}
2233 
2234 	error = vc->vxlc_func(sc, &args);
2235 	if (error)
2236 		return (error);
2237 
2238 	if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) {
2239 		error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
2240 		if (error)
2241 			return (error);
2242 	}
2243 
2244 	return (0);
2245 }
2246 
2247 static int
2248 vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2249 {
2250 	struct vxlan_softc *sc;
2251 	struct ifreq *ifr;
2252 	struct ifdrv *ifd;
2253 	int error;
2254 
2255 	sc = ifp->if_softc;
2256 	ifr = (struct ifreq *) data;
2257 	ifd = (struct ifdrv *) data;
2258 
2259 	error = 0;
2260 
2261 	switch (cmd) {
2262 	case SIOCADDMULTI:
2263 	case SIOCDELMULTI:
2264 		break;
2265 
2266 	case SIOCGDRVSPEC:
2267 	case SIOCSDRVSPEC:
2268 		error = vxlan_ioctl_drvspec(sc, ifd, cmd == SIOCGDRVSPEC);
2269 		break;
2270 
2271 	case SIOCSIFFLAGS:
2272 		error = vxlan_ioctl_ifflags(sc);
2273 		break;
2274 
2275 	case SIOCSIFMEDIA:
2276 	case SIOCGIFMEDIA:
2277 		error = ifmedia_ioctl(ifp, ifr, &sc->vxl_media, cmd);
2278 		break;
2279 
2280 	case SIOCSIFMTU:
2281 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > VXLAN_MAX_MTU)
2282 			error = EINVAL;
2283 		else
2284 			ifp->if_mtu = ifr->ifr_mtu;
2285 		break;
2286 
2287 	default:
2288 		error = ether_ioctl(ifp, cmd, data);
2289 		break;
2290 	}
2291 
2292 	return (error);
2293 }
2294 
2295 #if defined(INET) || defined(INET6)
2296 static uint16_t
2297 vxlan_pick_source_port(struct vxlan_softc *sc, struct mbuf *m)
2298 {
2299 	int range;
2300 	uint32_t hash;
2301 
2302 	range = sc->vxl_max_port - sc->vxl_min_port + 1;
2303 
2304 	if (M_HASHTYPE_ISHASH(m))
2305 		hash = m->m_pkthdr.flowid;
2306 	else
2307 		hash = jenkins_hash(m->m_data, ETHER_HDR_LEN,
2308 		    sc->vxl_port_hash_key);
2309 
2310 	return (sc->vxl_min_port + (hash % range));
2311 }
2312 
2313 static void
2314 vxlan_encap_header(struct vxlan_softc *sc, struct mbuf *m, int ipoff,
2315     uint16_t srcport, uint16_t dstport)
2316 {
2317 	struct vxlanudphdr *hdr;
2318 	struct udphdr *udph;
2319 	struct vxlan_header *vxh;
2320 	int len;
2321 
2322 	len = m->m_pkthdr.len - ipoff;
2323 	MPASS(len >= sizeof(struct vxlanudphdr));
2324 	hdr = mtodo(m, ipoff);
2325 
2326 	udph = &hdr->vxlh_udp;
2327 	udph->uh_sport = srcport;
2328 	udph->uh_dport = dstport;
2329 	udph->uh_ulen = htons(len);
2330 	udph->uh_sum = 0;
2331 
2332 	vxh = &hdr->vxlh_hdr;
2333 	vxh->vxlh_flags = htonl(VXLAN_HDR_FLAGS_VALID_VNI);
2334 	vxh->vxlh_vni = htonl(sc->vxl_vni << VXLAN_HDR_VNI_SHIFT);
2335 }
2336 #endif
2337 
2338 static int
2339 vxlan_encap4(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa,
2340     struct mbuf *m)
2341 {
2342 #ifdef INET
2343 	struct ifnet *ifp;
2344 	struct ip *ip;
2345 	struct in_addr srcaddr, dstaddr;
2346 	uint16_t srcport, dstport;
2347 	int len, mcast, error;
2348 
2349 	ifp = sc->vxl_ifp;
2350 	srcaddr = sc->vxl_src_addr.in4.sin_addr;
2351 	srcport = vxlan_pick_source_port(sc, m);
2352 	dstaddr = fvxlsa->in4.sin_addr;
2353 	dstport = fvxlsa->in4.sin_port;
2354 
2355 	M_PREPEND(m, sizeof(struct ip) + sizeof(struct vxlanudphdr),
2356 	    M_NOWAIT);
2357 	if (m == NULL) {
2358 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2359 		return (ENOBUFS);
2360 	}
2361 
2362 	len = m->m_pkthdr.len;
2363 
2364 	ip = mtod(m, struct ip *);
2365 	ip->ip_tos = 0;
2366 	ip->ip_len = htons(len);
2367 	ip->ip_off = 0;
2368 	ip->ip_ttl = sc->vxl_ttl;
2369 	ip->ip_p = IPPROTO_UDP;
2370 	ip->ip_sum = 0;
2371 	ip->ip_src = srcaddr;
2372 	ip->ip_dst = dstaddr;
2373 
2374 	vxlan_encap_header(sc, m, sizeof(struct ip), srcport, dstport);
2375 
2376 	mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
2377 	m->m_flags &= ~(M_MCAST | M_BCAST);
2378 
2379 	error = ip_output(m, NULL, NULL, 0, sc->vxl_im4o, NULL);
2380 	if (error == 0) {
2381 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
2382 		if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
2383 		if (mcast != 0)
2384 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
2385 	} else
2386 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2387 
2388 	return (error);
2389 #else
2390 	m_freem(m);
2391 	return (ENOTSUP);
2392 #endif
2393 }
2394 
2395 static int
2396 vxlan_encap6(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa,
2397     struct mbuf *m)
2398 {
2399 #ifdef INET6
2400 	struct ifnet *ifp;
2401 	struct ip6_hdr *ip6;
2402 	const struct in6_addr *srcaddr, *dstaddr;
2403 	uint16_t srcport, dstport;
2404 	int len, mcast, error;
2405 
2406 	ifp = sc->vxl_ifp;
2407 	srcaddr = &sc->vxl_src_addr.in6.sin6_addr;
2408 	srcport = vxlan_pick_source_port(sc, m);
2409 	dstaddr = &fvxlsa->in6.sin6_addr;
2410 	dstport = fvxlsa->in6.sin6_port;
2411 
2412 	M_PREPEND(m, sizeof(struct ip6_hdr) + sizeof(struct vxlanudphdr),
2413 	    M_NOWAIT);
2414 	if (m == NULL) {
2415 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2416 		return (ENOBUFS);
2417 	}
2418 
2419 	len = m->m_pkthdr.len;
2420 
2421 	ip6 = mtod(m, struct ip6_hdr *);
2422 	ip6->ip6_flow = 0;		/* BMV: Keep in forwarding entry? */
2423 	ip6->ip6_vfc = IPV6_VERSION;
2424 	ip6->ip6_plen = 0;
2425 	ip6->ip6_nxt = IPPROTO_UDP;
2426 	ip6->ip6_hlim = sc->vxl_ttl;
2427 	ip6->ip6_src = *srcaddr;
2428 	ip6->ip6_dst = *dstaddr;
2429 
2430 	vxlan_encap_header(sc, m, sizeof(struct ip6_hdr), srcport, dstport);
2431 
2432 	/*
2433 	 * XXX BMV We need support for RFC6935 before we can send and
2434 	 * receive IPv6 UDP packets with a zero checksum.
2435 	 */
2436 	{
2437 		struct udphdr *hdr = mtodo(m, sizeof(struct ip6_hdr));
2438 		hdr->uh_sum = in6_cksum_pseudo(ip6,
2439 		    m->m_pkthdr.len - sizeof(struct ip6_hdr), IPPROTO_UDP, 0);
2440 		m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
2441 		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
2442 	}
2443 
2444 	mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
2445 	m->m_flags &= ~(M_MCAST | M_BCAST);
2446 
2447 	error = ip6_output(m, NULL, NULL, 0, sc->vxl_im6o, NULL, NULL);
2448 	if (error == 0) {
2449 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
2450 		if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
2451 		if (mcast != 0)
2452 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
2453 	} else
2454 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2455 
2456 	return (error);
2457 #else
2458 	m_freem(m);
2459 	return (ENOTSUP);
2460 #endif
2461 }
2462 
2463 static int
2464 vxlan_transmit(struct ifnet *ifp, struct mbuf *m)
2465 {
2466 	struct rm_priotracker tracker;
2467 	union vxlan_sockaddr vxlsa;
2468 	struct vxlan_softc *sc;
2469 	struct vxlan_ftable_entry *fe;
2470 	struct ifnet *mcifp;
2471 	struct ether_header *eh;
2472 	int ipv4, error;
2473 
2474 	sc = ifp->if_softc;
2475 	eh = mtod(m, struct ether_header *);
2476 	fe = NULL;
2477 	mcifp = NULL;
2478 
2479 	ETHER_BPF_MTAP(ifp, m);
2480 
2481 	VXLAN_RLOCK(sc, &tracker);
2482 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2483 		VXLAN_RUNLOCK(sc, &tracker);
2484 		m_freem(m);
2485 		return (ENETDOWN);
2486 	}
2487 
2488 	if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
2489 		fe = vxlan_ftable_entry_lookup(sc, eh->ether_dhost);
2490 	if (fe == NULL)
2491 		fe = &sc->vxl_default_fe;
2492 	vxlan_sockaddr_copy(&vxlsa, &fe->vxlfe_raddr.sa);
2493 
2494 	ipv4 = VXLAN_SOCKADDR_IS_IPV4(&vxlsa) != 0;
2495 	if (vxlan_sockaddr_in_multicast(&vxlsa) != 0)
2496 		mcifp = vxlan_multicast_if_ref(sc, ipv4);
2497 
2498 	VXLAN_ACQUIRE(sc);
2499 	VXLAN_RUNLOCK(sc, &tracker);
2500 
2501 	if (ipv4 != 0)
2502 		error = vxlan_encap4(sc, &vxlsa, m);
2503 	else
2504 		error = vxlan_encap6(sc, &vxlsa, m);
2505 
2506 	vxlan_release(sc);
2507 	if (mcifp != NULL)
2508 		if_rele(mcifp);
2509 
2510 	return (error);
2511 }
2512 
2513 static void
2514 vxlan_qflush(struct ifnet *ifp __unused)
2515 {
2516 }
2517 
2518 static void
2519 vxlan_rcv_udp_packet(struct mbuf *m, int offset, struct inpcb *inpcb,
2520     const struct sockaddr *srcsa, void *xvso)
2521 {
2522 	struct vxlan_socket *vso;
2523 	struct vxlan_header *vxh, vxlanhdr;
2524 	uint32_t vni;
2525 	int error __unused;
2526 
2527 	M_ASSERTPKTHDR(m);
2528 	vso = xvso;
2529 	offset += sizeof(struct udphdr);
2530 
2531 	if (m->m_pkthdr.len < offset + sizeof(struct vxlan_header))
2532 		goto out;
2533 
2534 	if (__predict_false(m->m_len < offset + sizeof(struct vxlan_header))) {
2535 		m_copydata(m, offset, sizeof(struct vxlan_header),
2536 		    (caddr_t) &vxlanhdr);
2537 		vxh = &vxlanhdr;
2538 	} else
2539 		vxh = mtodo(m, offset);
2540 
2541 	/*
2542 	 * Drop if there is a reserved bit set in either the flags or VNI
2543 	 * fields of the header. This goes against the specification, but
2544 	 * a bit set may indicate an unsupported new feature. This matches
2545 	 * the behavior of the Linux implementation.
2546 	 */
2547 	if (vxh->vxlh_flags != htonl(VXLAN_HDR_FLAGS_VALID_VNI) ||
2548 	    vxh->vxlh_vni & ~VXLAN_VNI_MASK)
2549 		goto out;
2550 
2551 	vni = ntohl(vxh->vxlh_vni) >> VXLAN_HDR_VNI_SHIFT;
2552 	/* Adjust to the start of the inner Ethernet frame. */
2553 	m_adj(m, offset + sizeof(struct vxlan_header));
2554 
2555 	error = vxlan_input(vso, vni, &m, srcsa);
2556 	MPASS(error != 0 || m == NULL);
2557 
2558 out:
2559 	if (m != NULL)
2560 		m_freem(m);
2561 }
2562 
2563 static int
2564 vxlan_input(struct vxlan_socket *vso, uint32_t vni, struct mbuf **m0,
2565     const struct sockaddr *sa)
2566 {
2567 	struct vxlan_softc *sc;
2568 	struct ifnet *ifp;
2569 	struct mbuf *m;
2570 	struct ether_header *eh;
2571 	int error;
2572 
2573 	sc = vxlan_socket_lookup_softc(vso, vni);
2574 	if (sc == NULL)
2575 		return (ENOENT);
2576 
2577 	ifp = sc->vxl_ifp;
2578 	m = *m0;
2579 	eh = mtod(m, struct ether_header *);
2580 
2581 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2582 		error = ENETDOWN;
2583 		goto out;
2584 	} else if (ifp == m->m_pkthdr.rcvif) {
2585 		/* XXX Does not catch more complex loops. */
2586 		error = EDEADLK;
2587 		goto out;
2588 	}
2589 
2590 	if (sc->vxl_flags & VXLAN_FLAG_LEARN)
2591 		vxlan_ftable_learn(sc, sa, eh->ether_shost);
2592 
2593 	m_clrprotoflags(m);
2594 	m->m_pkthdr.rcvif = ifp;
2595 	M_SETFIB(m, ifp->if_fib);
2596 
2597 	error = netisr_queue_src(NETISR_ETHER, 0, m);
2598 	*m0 = NULL;
2599 
2600 out:
2601 	vxlan_release(sc);
2602 	return (error);
2603 }
2604 
2605 static void
2606 vxlan_set_default_config(struct vxlan_softc *sc)
2607 {
2608 
2609 	sc->vxl_flags |= VXLAN_FLAG_LEARN;
2610 
2611 	sc->vxl_vni = VXLAN_VNI_MAX;
2612 	sc->vxl_ttl = IPDEFTTL;
2613 
2614 	if (!vxlan_tunable_int(sc, "legacy_port", vxlan_legacy_port)) {
2615 		sc->vxl_src_addr.in4.sin_port = htons(VXLAN_PORT);
2616 		sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_PORT);
2617 	} else {
2618 		sc->vxl_src_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT);
2619 		sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT);
2620 	}
2621 
2622 	sc->vxl_min_port = V_ipport_firstauto;
2623 	sc->vxl_max_port = V_ipport_lastauto;
2624 
2625 	sc->vxl_ftable_max = VXLAN_FTABLE_MAX;
2626 	sc->vxl_ftable_timeout = VXLAN_FTABLE_TIMEOUT;
2627 }
2628 
2629 static int
2630 vxlan_set_user_config(struct vxlan_softc *sc, struct ifvxlanparam *vxlp)
2631 {
2632 
2633 #ifndef INET
2634 	if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR4 |
2635 	    VXLAN_PARAM_WITH_REMOTE_ADDR4))
2636 		return (EAFNOSUPPORT);
2637 #endif
2638 
2639 #ifndef INET6
2640 	if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR6 |
2641 	    VXLAN_PARAM_WITH_REMOTE_ADDR6))
2642 		return (EAFNOSUPPORT);
2643 #else
2644 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) {
2645 		int error = vxlan_sockaddr_in6_embedscope(&vxlp->vxlp_local_sa);
2646 		if (error)
2647 			return (error);
2648 	}
2649 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) {
2650 		int error = vxlan_sockaddr_in6_embedscope(
2651 		   &vxlp->vxlp_remote_sa);
2652 		if (error)
2653 			return (error);
2654 	}
2655 #endif
2656 
2657 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_VNI) {
2658 		if (vxlan_check_vni(vxlp->vxlp_vni) == 0)
2659 			sc->vxl_vni = vxlp->vxlp_vni;
2660 	}
2661 
2662 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR4) {
2663 		sc->vxl_src_addr.in4.sin_len = sizeof(struct sockaddr_in);
2664 		sc->vxl_src_addr.in4.sin_family = AF_INET;
2665 		sc->vxl_src_addr.in4.sin_addr =
2666 		    vxlp->vxlp_local_sa.in4.sin_addr;
2667 	} else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) {
2668 		sc->vxl_src_addr.in6.sin6_len = sizeof(struct sockaddr_in6);
2669 		sc->vxl_src_addr.in6.sin6_family = AF_INET6;
2670 		sc->vxl_src_addr.in6.sin6_addr =
2671 		    vxlp->vxlp_local_sa.in6.sin6_addr;
2672 	}
2673 
2674 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR4) {
2675 		sc->vxl_dst_addr.in4.sin_len = sizeof(struct sockaddr_in);
2676 		sc->vxl_dst_addr.in4.sin_family = AF_INET;
2677 		sc->vxl_dst_addr.in4.sin_addr =
2678 		    vxlp->vxlp_remote_sa.in4.sin_addr;
2679 	} else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) {
2680 		sc->vxl_dst_addr.in6.sin6_len = sizeof(struct sockaddr_in6);
2681 		sc->vxl_dst_addr.in6.sin6_family = AF_INET6;
2682 		sc->vxl_dst_addr.in6.sin6_addr =
2683 		    vxlp->vxlp_remote_sa.in6.sin6_addr;
2684 	}
2685 
2686 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_PORT)
2687 		sc->vxl_src_addr.in4.sin_port = htons(vxlp->vxlp_local_port);
2688 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_PORT)
2689 		sc->vxl_dst_addr.in4.sin_port = htons(vxlp->vxlp_remote_port);
2690 
2691 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_PORT_RANGE) {
2692 		if (vxlp->vxlp_min_port <= vxlp->vxlp_max_port) {
2693 			sc->vxl_min_port = vxlp->vxlp_min_port;
2694 			sc->vxl_max_port = vxlp->vxlp_max_port;
2695 		}
2696 	}
2697 
2698 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_MULTICAST_IF)
2699 		strlcpy(sc->vxl_mc_ifname, vxlp->vxlp_mc_ifname, IFNAMSIZ);
2700 
2701 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_TIMEOUT) {
2702 		if (vxlan_check_ftable_timeout(vxlp->vxlp_ftable_timeout) == 0)
2703 			sc->vxl_ftable_timeout = vxlp->vxlp_ftable_timeout;
2704 	}
2705 
2706 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_MAX) {
2707 		if (vxlan_check_ftable_max(vxlp->vxlp_ftable_max) == 0)
2708 			sc->vxl_ftable_max = vxlp->vxlp_ftable_max;
2709 	}
2710 
2711 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_TTL) {
2712 		if (vxlan_check_ttl(vxlp->vxlp_ttl) == 0)
2713 			sc->vxl_ttl = vxlp->vxlp_ttl;
2714 	}
2715 
2716 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LEARN) {
2717 		if (vxlp->vxlp_learn == 0)
2718 			sc->vxl_flags &= ~VXLAN_FLAG_LEARN;
2719 	}
2720 
2721 	return (0);
2722 }
2723 
2724 static int
2725 vxlan_clone_create(struct if_clone *ifc, int unit, caddr_t params)
2726 {
2727 	struct vxlan_softc *sc;
2728 	struct ifnet *ifp;
2729 	struct ifvxlanparam vxlp;
2730 	int error;
2731 
2732 	sc = malloc(sizeof(struct vxlan_softc), M_VXLAN, M_WAITOK | M_ZERO);
2733 	sc->vxl_unit = unit;
2734 	vxlan_set_default_config(sc);
2735 
2736 	if (params != 0) {
2737 		error = copyin(params, &vxlp, sizeof(vxlp));
2738 		if (error)
2739 			goto fail;
2740 
2741 		error = vxlan_set_user_config(sc, &vxlp);
2742 		if (error)
2743 			goto fail;
2744 	}
2745 
2746 	ifp = if_alloc(IFT_ETHER);
2747 	if (ifp == NULL) {
2748 		error = ENOSPC;
2749 		goto fail;
2750 	}
2751 
2752 	sc->vxl_ifp = ifp;
2753 	rm_init(&sc->vxl_lock, "vxlanrm");
2754 	callout_init_rw(&sc->vxl_callout, &sc->vxl_lock, 0);
2755 	sc->vxl_port_hash_key = arc4random();
2756 	vxlan_ftable_init(sc);
2757 
2758 	vxlan_sysctl_setup(sc);
2759 
2760 	ifp->if_softc = sc;
2761 	if_initname(ifp, vxlan_name, unit);
2762 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2763 	ifp->if_init = vxlan_init;
2764 	ifp->if_ioctl = vxlan_ioctl;
2765 	ifp->if_transmit = vxlan_transmit;
2766 	ifp->if_qflush = vxlan_qflush;
2767 	ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU;
2768 	ifp->if_capenable |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU;
2769 
2770 	ifmedia_init(&sc->vxl_media, 0, vxlan_media_change, vxlan_media_status);
2771 	ifmedia_add(&sc->vxl_media, IFM_ETHER | IFM_AUTO, 0, NULL);
2772 	ifmedia_set(&sc->vxl_media, IFM_ETHER | IFM_AUTO);
2773 
2774 	ether_gen_addr(ifp, &sc->vxl_hwaddr);
2775 	ether_ifattach(ifp, sc->vxl_hwaddr.octet);
2776 
2777 	ifp->if_baudrate = 0;
2778 	ifp->if_hdrlen = 0;
2779 
2780 	return (0);
2781 
2782 fail:
2783 	free(sc, M_VXLAN);
2784 	return (error);
2785 }
2786 
2787 static void
2788 vxlan_clone_destroy(struct ifnet *ifp)
2789 {
2790 	struct vxlan_softc *sc;
2791 
2792 	sc = ifp->if_softc;
2793 
2794 	vxlan_teardown(sc);
2795 
2796 	vxlan_ftable_flush(sc, 1);
2797 
2798 	ether_ifdetach(ifp);
2799 	if_free(ifp);
2800 	ifmedia_removeall(&sc->vxl_media);
2801 
2802 	vxlan_ftable_fini(sc);
2803 
2804 	vxlan_sysctl_destroy(sc);
2805 	rm_destroy(&sc->vxl_lock);
2806 	free(sc, M_VXLAN);
2807 }
2808 
2809 /* BMV: Taken from if_bridge. */
2810 static uint32_t
2811 vxlan_mac_hash(struct vxlan_softc *sc, const uint8_t *addr)
2812 {
2813 	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->vxl_ftable_hash_key;
2814 
2815 	b += addr[5] << 8;
2816 	b += addr[4];
2817 	a += addr[3] << 24;
2818 	a += addr[2] << 16;
2819 	a += addr[1] << 8;
2820 	a += addr[0];
2821 
2822 /*
2823  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
2824  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
2825  */
2826 #define	mix(a, b, c)							\
2827 do {									\
2828 	a -= b; a -= c; a ^= (c >> 13);					\
2829 	b -= c; b -= a; b ^= (a << 8);					\
2830 	c -= a; c -= b; c ^= (b >> 13);					\
2831 	a -= b; a -= c; a ^= (c >> 12);					\
2832 	b -= c; b -= a; b ^= (a << 16);					\
2833 	c -= a; c -= b; c ^= (b >> 5);					\
2834 	a -= b; a -= c; a ^= (c >> 3);					\
2835 	b -= c; b -= a; b ^= (a << 10);					\
2836 	c -= a; c -= b; c ^= (b >> 15);					\
2837 } while (0)
2838 
2839 	mix(a, b, c);
2840 
2841 #undef mix
2842 
2843 	return (c);
2844 }
2845 
2846 static int
2847 vxlan_media_change(struct ifnet *ifp)
2848 {
2849 
2850 	/* Ignore. */
2851 	return (0);
2852 }
2853 
2854 static void
2855 vxlan_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2856 {
2857 
2858 	ifmr->ifm_status = IFM_ACTIVE | IFM_AVALID;
2859 	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2860 }
2861 
2862 static int
2863 vxlan_sockaddr_cmp(const union vxlan_sockaddr *vxladdr,
2864     const struct sockaddr *sa)
2865 {
2866 
2867 	return (bcmp(&vxladdr->sa, sa, vxladdr->sa.sa_len));
2868 }
2869 
2870 static void
2871 vxlan_sockaddr_copy(union vxlan_sockaddr *vxladdr,
2872     const struct sockaddr *sa)
2873 {
2874 
2875 	MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
2876 	bzero(vxladdr, sizeof(*vxladdr));
2877 
2878 	if (sa->sa_family == AF_INET) {
2879 		vxladdr->in4 = *satoconstsin(sa);
2880 		vxladdr->in4.sin_len = sizeof(struct sockaddr_in);
2881 	} else if (sa->sa_family == AF_INET6) {
2882 		vxladdr->in6 = *satoconstsin6(sa);
2883 		vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6);
2884 	}
2885 }
2886 
2887 static int
2888 vxlan_sockaddr_in_equal(const union vxlan_sockaddr *vxladdr,
2889     const struct sockaddr *sa)
2890 {
2891 	int equal;
2892 
2893 	if (sa->sa_family == AF_INET) {
2894 		const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
2895 		equal = in4->s_addr == vxladdr->in4.sin_addr.s_addr;
2896 	} else if (sa->sa_family == AF_INET6) {
2897 		const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
2898 		equal = IN6_ARE_ADDR_EQUAL(in6, &vxladdr->in6.sin6_addr);
2899 	} else
2900 		equal = 0;
2901 
2902 	return (equal);
2903 }
2904 
2905 static void
2906 vxlan_sockaddr_in_copy(union vxlan_sockaddr *vxladdr,
2907     const struct sockaddr *sa)
2908 {
2909 
2910 	MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
2911 
2912 	if (sa->sa_family == AF_INET) {
2913 		const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
2914 		vxladdr->in4.sin_family = AF_INET;
2915 		vxladdr->in4.sin_len = sizeof(struct sockaddr_in);
2916 		vxladdr->in4.sin_addr = *in4;
2917 	} else if (sa->sa_family == AF_INET6) {
2918 		const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
2919 		vxladdr->in6.sin6_family = AF_INET6;
2920 		vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6);
2921 		vxladdr->in6.sin6_addr = *in6;
2922 	}
2923 }
2924 
2925 static int
2926 vxlan_sockaddr_supported(const union vxlan_sockaddr *vxladdr, int unspec)
2927 {
2928 	const struct sockaddr *sa;
2929 	int supported;
2930 
2931 	sa = &vxladdr->sa;
2932 	supported = 0;
2933 
2934 	if (sa->sa_family == AF_UNSPEC && unspec != 0) {
2935 		supported = 1;
2936 	} else if (sa->sa_family == AF_INET) {
2937 #ifdef INET
2938 		supported = 1;
2939 #endif
2940 	} else if (sa->sa_family == AF_INET6) {
2941 #ifdef INET6
2942 		supported = 1;
2943 #endif
2944 	}
2945 
2946 	return (supported);
2947 }
2948 
2949 static int
2950 vxlan_sockaddr_in_any(const union vxlan_sockaddr *vxladdr)
2951 {
2952 	const struct sockaddr *sa;
2953 	int any;
2954 
2955 	sa = &vxladdr->sa;
2956 
2957 	if (sa->sa_family == AF_INET) {
2958 		const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
2959 		any = in4->s_addr == INADDR_ANY;
2960 	} else if (sa->sa_family == AF_INET6) {
2961 		const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
2962 		any = IN6_IS_ADDR_UNSPECIFIED(in6);
2963 	} else
2964 		any = -1;
2965 
2966 	return (any);
2967 }
2968 
2969 static int
2970 vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *vxladdr)
2971 {
2972 	const struct sockaddr *sa;
2973 	int mc;
2974 
2975 	sa = &vxladdr->sa;
2976 
2977 	if (sa->sa_family == AF_INET) {
2978 		const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
2979 		mc = IN_MULTICAST(ntohl(in4->s_addr));
2980 	} else if (sa->sa_family == AF_INET6) {
2981 		const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
2982 		mc = IN6_IS_ADDR_MULTICAST(in6);
2983 	} else
2984 		mc = -1;
2985 
2986 	return (mc);
2987 }
2988 
2989 static int
2990 vxlan_sockaddr_in6_embedscope(union vxlan_sockaddr *vxladdr)
2991 {
2992 	int error;
2993 
2994 	MPASS(VXLAN_SOCKADDR_IS_IPV6(vxladdr));
2995 #ifdef INET6
2996 	error = sa6_embedscope(&vxladdr->in6, V_ip6_use_defzone);
2997 #else
2998 	error = EAFNOSUPPORT;
2999 #endif
3000 
3001 	return (error);
3002 }
3003 
3004 static int
3005 vxlan_can_change_config(struct vxlan_softc *sc)
3006 {
3007 	struct ifnet *ifp;
3008 
3009 	ifp = sc->vxl_ifp;
3010 	VXLAN_LOCK_ASSERT(sc);
3011 
3012 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3013 		return (0);
3014 	if (sc->vxl_flags & (VXLAN_FLAG_INIT | VXLAN_FLAG_TEARDOWN))
3015 		return (0);
3016 
3017 	return (1);
3018 }
3019 
3020 static int
3021 vxlan_check_vni(uint32_t vni)
3022 {
3023 
3024 	return (vni >= VXLAN_VNI_MAX);
3025 }
3026 
3027 static int
3028 vxlan_check_ttl(int ttl)
3029 {
3030 
3031 	return (ttl > MAXTTL);
3032 }
3033 
3034 static int
3035 vxlan_check_ftable_timeout(uint32_t timeout)
3036 {
3037 
3038 	return (timeout > VXLAN_FTABLE_MAX_TIMEOUT);
3039 }
3040 
3041 static int
3042 vxlan_check_ftable_max(uint32_t max)
3043 {
3044 
3045 	return (max > VXLAN_FTABLE_MAX);
3046 }
3047 
3048 static void
3049 vxlan_sysctl_setup(struct vxlan_softc *sc)
3050 {
3051 	struct sysctl_ctx_list *ctx;
3052 	struct sysctl_oid *node;
3053 	struct vxlan_statistics *stats;
3054 	char namebuf[8];
3055 
3056 	ctx = &sc->vxl_sysctl_ctx;
3057 	stats = &sc->vxl_stats;
3058 	snprintf(namebuf, sizeof(namebuf), "%d", sc->vxl_unit);
3059 
3060 	sysctl_ctx_init(ctx);
3061 	sc->vxl_sysctl_node = SYSCTL_ADD_NODE(ctx,
3062 	    SYSCTL_STATIC_CHILDREN(_net_link_vxlan), OID_AUTO, namebuf,
3063 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
3064 
3065 	node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node),
3066 	    OID_AUTO, "ftable", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
3067 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "count",
3068 	    CTLFLAG_RD, &sc->vxl_ftable_cnt, 0,
3069 	    "Number of entries in fowarding table");
3070 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "max",
3071 	     CTLFLAG_RD, &sc->vxl_ftable_max, 0,
3072 	    "Maximum number of entries allowed in fowarding table");
3073 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "timeout",
3074 	    CTLFLAG_RD, &sc->vxl_ftable_timeout, 0,
3075 	    "Number of seconds between prunes of the forwarding table");
3076 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "dump",
3077 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_SKIP,
3078 	    sc, 0, vxlan_ftable_sysctl_dump, "A",
3079 	    "Dump the forwarding table entries");
3080 
3081 	node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node),
3082 	    OID_AUTO, "stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
3083 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
3084 	    "ftable_nospace", CTLFLAG_RD, &stats->ftable_nospace, 0,
3085 	    "Fowarding table reached maximum entries");
3086 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
3087 	    "ftable_lock_upgrade_failed", CTLFLAG_RD,
3088 	    &stats->ftable_lock_upgrade_failed, 0,
3089 	    "Forwarding table update required lock upgrade");
3090 }
3091 
3092 static void
3093 vxlan_sysctl_destroy(struct vxlan_softc *sc)
3094 {
3095 
3096 	sysctl_ctx_free(&sc->vxl_sysctl_ctx);
3097 	sc->vxl_sysctl_node = NULL;
3098 }
3099 
3100 static int
3101 vxlan_tunable_int(struct vxlan_softc *sc, const char *knob, int def)
3102 {
3103 	char path[64];
3104 
3105 	snprintf(path, sizeof(path), "net.link.vxlan.%d.%s",
3106 	    sc->vxl_unit, knob);
3107 	TUNABLE_INT_FETCH(path, &def);
3108 
3109 	return (def);
3110 }
3111 
3112 static void
3113 vxlan_ifdetach_event(void *arg __unused, struct ifnet *ifp)
3114 {
3115 	struct vxlan_softc_head list;
3116 	struct vxlan_socket *vso;
3117 	struct vxlan_softc *sc, *tsc;
3118 
3119 	LIST_INIT(&list);
3120 
3121 	if (ifp->if_flags & IFF_RENAMING)
3122 		return;
3123 	if ((ifp->if_flags & IFF_MULTICAST) == 0)
3124 		return;
3125 
3126 	VXLAN_LIST_LOCK();
3127 	LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry)
3128 		vxlan_socket_ifdetach(vso, ifp, &list);
3129 	VXLAN_LIST_UNLOCK();
3130 
3131 	LIST_FOREACH_SAFE(sc, &list, vxl_ifdetach_list, tsc) {
3132 		LIST_REMOVE(sc, vxl_ifdetach_list);
3133 
3134 		VXLAN_WLOCK(sc);
3135 		if (sc->vxl_flags & VXLAN_FLAG_INIT)
3136 			vxlan_init_wait(sc);
3137 		vxlan_teardown_locked(sc);
3138 	}
3139 }
3140 
3141 static void
3142 vxlan_load(void)
3143 {
3144 
3145 	mtx_init(&vxlan_list_mtx, "vxlan list", NULL, MTX_DEF);
3146 	LIST_INIT(&vxlan_socket_list);
3147 	vxlan_ifdetach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
3148 	    vxlan_ifdetach_event, NULL, EVENTHANDLER_PRI_ANY);
3149 	vxlan_cloner = if_clone_simple(vxlan_name, vxlan_clone_create,
3150 	    vxlan_clone_destroy, 0);
3151 }
3152 
3153 static void
3154 vxlan_unload(void)
3155 {
3156 
3157 	EVENTHANDLER_DEREGISTER(ifnet_departure_event,
3158 	    vxlan_ifdetach_event_tag);
3159 	if_clone_detach(vxlan_cloner);
3160 	mtx_destroy(&vxlan_list_mtx);
3161 	MPASS(LIST_EMPTY(&vxlan_socket_list));
3162 }
3163 
3164 static int
3165 vxlan_modevent(module_t mod, int type, void *unused)
3166 {
3167 	int error;
3168 
3169 	error = 0;
3170 
3171 	switch (type) {
3172 	case MOD_LOAD:
3173 		vxlan_load();
3174 		break;
3175 	case MOD_UNLOAD:
3176 		vxlan_unload();
3177 		break;
3178 	default:
3179 		error = ENOTSUP;
3180 		break;
3181 	}
3182 
3183 	return (error);
3184 }
3185 
3186 static moduledata_t vxlan_mod = {
3187 	"if_vxlan",
3188 	vxlan_modevent,
3189 	0
3190 };
3191 
3192 DECLARE_MODULE(if_vxlan, vxlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
3193 MODULE_VERSION(if_vxlan, 1);
3194