xref: /freebsd/sys/net/if_vxlan.c (revision 19d2e3de755b7c9ca2f5c90b5902fc8f214b2490)
1 /*-
2  * Copyright (c) 2014, Bryan Venteicher <bryanv@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "opt_inet.h"
28 #include "opt_inet6.h"
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/eventhandler.h>
35 #include <sys/kernel.h>
36 #include <sys/lock.h>
37 #include <sys/hash.h>
38 #include <sys/malloc.h>
39 #include <sys/mbuf.h>
40 #include <sys/module.h>
41 #include <sys/refcount.h>
42 #include <sys/rmlock.h>
43 #include <sys/priv.h>
44 #include <sys/proc.h>
45 #include <sys/queue.h>
46 #include <sys/sbuf.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/sockio.h>
50 #include <sys/sysctl.h>
51 #include <sys/systm.h>
52 
53 #include <net/bpf.h>
54 #include <net/ethernet.h>
55 #include <net/if.h>
56 #include <net/if_var.h>
57 #include <net/if_clone.h>
58 #include <net/if_dl.h>
59 #include <net/if_types.h>
60 #include <net/if_vxlan.h>
61 #include <net/netisr.h>
62 
63 #include <netinet/in.h>
64 #include <netinet/in_systm.h>
65 #include <netinet/in_var.h>
66 #include <netinet/in_pcb.h>
67 #include <netinet/ip.h>
68 #include <netinet/ip6.h>
69 #include <netinet/ip_var.h>
70 #include <netinet6/ip6_var.h>
71 #include <netinet/udp.h>
72 #include <netinet/udp_var.h>
73 
74 struct vxlan_softc;
75 LIST_HEAD(vxlan_softc_head, vxlan_softc);
76 
77 struct vxlan_socket_mc_info {
78 	union vxlan_sockaddr		 vxlsomc_saddr;
79 	union vxlan_sockaddr		 vxlsomc_gaddr;
80 	int				 vxlsomc_ifidx;
81 	int				 vxlsomc_users;
82 };
83 
84 #define VXLAN_SO_MC_MAX_GROUPS		32
85 
86 #define VXLAN_SO_VNI_HASH_SHIFT		6
87 #define VXLAN_SO_VNI_HASH_SIZE		(1 << VXLAN_SO_VNI_HASH_SHIFT)
88 #define VXLAN_SO_VNI_HASH(_vni)		((_vni) % VXLAN_SO_VNI_HASH_SIZE)
89 
90 struct vxlan_socket {
91 	struct socket			*vxlso_sock;
92 	struct rmlock			 vxlso_lock;
93 	u_int				 vxlso_refcnt;
94 	union vxlan_sockaddr		 vxlso_laddr;
95 	LIST_ENTRY(vxlan_socket)	 vxlso_entry;
96 	struct vxlan_softc_head		 vxlso_vni_hash[VXLAN_SO_VNI_HASH_SIZE];
97 	struct vxlan_socket_mc_info	 vxlso_mc[VXLAN_SO_MC_MAX_GROUPS];
98 };
99 
100 #define VXLAN_SO_RLOCK(_vso, _p)	rm_rlock(&(_vso)->vxlso_lock, (_p))
101 #define VXLAN_SO_RUNLOCK(_vso, _p)	rm_runlock(&(_vso)->vxlso_lock, (_p))
102 #define VXLAN_SO_WLOCK(_vso)		rm_wlock(&(_vso)->vxlso_lock)
103 #define VXLAN_SO_WUNLOCK(_vso)		rm_wunlock(&(_vso)->vxlso_lock)
104 #define VXLAN_SO_LOCK_ASSERT(_vso) \
105     rm_assert(&(_vso)->vxlso_lock, RA_LOCKED)
106 #define VXLAN_SO_LOCK_WASSERT(_vso) \
107     rm_assert(&(_vso)->vxlso_lock, RA_WLOCKED)
108 
109 #define VXLAN_SO_ACQUIRE(_vso)		refcount_acquire(&(_vso)->vxlso_refcnt)
110 #define VXLAN_SO_RELEASE(_vso)		refcount_release(&(_vso)->vxlso_refcnt)
111 
112 struct vxlan_ftable_entry {
113 	LIST_ENTRY(vxlan_ftable_entry)	 vxlfe_hash;
114 	uint16_t			 vxlfe_flags;
115 	uint8_t				 vxlfe_mac[ETHER_ADDR_LEN];
116 	union vxlan_sockaddr		 vxlfe_raddr;
117 	time_t				 vxlfe_expire;
118 };
119 
120 #define VXLAN_FE_FLAG_DYNAMIC		0x01
121 #define VXLAN_FE_FLAG_STATIC		0x02
122 
123 #define VXLAN_FE_IS_DYNAMIC(_fe) \
124     ((_fe)->vxlfe_flags & VXLAN_FE_FLAG_DYNAMIC)
125 
126 #define VXLAN_SC_FTABLE_SHIFT		9
127 #define VXLAN_SC_FTABLE_SIZE		(1 << VXLAN_SC_FTABLE_SHIFT)
128 #define VXLAN_SC_FTABLE_MASK		(VXLAN_SC_FTABLE_SIZE - 1)
129 #define VXLAN_SC_FTABLE_HASH(_sc, _mac)	\
130     (vxlan_mac_hash(_sc, _mac) % VXLAN_SC_FTABLE_SIZE)
131 
132 LIST_HEAD(vxlan_ftable_head, vxlan_ftable_entry);
133 
134 struct vxlan_statistics {
135 	uint32_t	ftable_nospace;
136 	uint32_t	ftable_lock_upgrade_failed;
137 };
138 
139 struct vxlan_softc {
140 	struct ifnet			*vxl_ifp;
141 	struct vxlan_socket		*vxl_sock;
142 	uint32_t			 vxl_vni;
143 	union vxlan_sockaddr		 vxl_src_addr;
144 	union vxlan_sockaddr		 vxl_dst_addr;
145 	uint32_t			 vxl_flags;
146 #define VXLAN_FLAG_INIT		0x0001
147 #define VXLAN_FLAG_TEARDOWN	0x0002
148 #define VXLAN_FLAG_LEARN	0x0004
149 
150 	uint32_t			 vxl_port_hash_key;
151 	uint16_t			 vxl_min_port;
152 	uint16_t			 vxl_max_port;
153 	uint8_t				 vxl_ttl;
154 
155 	/* Lookup table from MAC address to forwarding entry. */
156 	uint32_t			 vxl_ftable_cnt;
157 	uint32_t			 vxl_ftable_max;
158 	uint32_t			 vxl_ftable_timeout;
159 	uint32_t			 vxl_ftable_hash_key;
160 	struct vxlan_ftable_head	*vxl_ftable;
161 
162 	/* Derived from vxl_dst_addr. */
163 	struct vxlan_ftable_entry	 vxl_default_fe;
164 
165 	struct ip_moptions		*vxl_im4o;
166 	struct ip6_moptions		*vxl_im6o;
167 
168 	struct rmlock			 vxl_lock;
169 	volatile u_int			 vxl_refcnt;
170 
171 	int				 vxl_unit;
172 	int				 vxl_vso_mc_index;
173 	struct vxlan_statistics		 vxl_stats;
174 	struct sysctl_oid		*vxl_sysctl_node;
175 	struct sysctl_ctx_list		 vxl_sysctl_ctx;
176 	struct callout			 vxl_callout;
177 	uint8_t				 vxl_hwaddr[ETHER_ADDR_LEN];
178 	int				 vxl_mc_ifindex;
179 	struct ifnet			*vxl_mc_ifp;
180 	char				 vxl_mc_ifname[IFNAMSIZ];
181 	LIST_ENTRY(vxlan_softc)		 vxl_entry;
182 	LIST_ENTRY(vxlan_softc)		 vxl_ifdetach_list;
183 };
184 
185 #define VXLAN_RLOCK(_sc, _p)	rm_rlock(&(_sc)->vxl_lock, (_p))
186 #define VXLAN_RUNLOCK(_sc, _p)	rm_runlock(&(_sc)->vxl_lock, (_p))
187 #define VXLAN_WLOCK(_sc)	rm_wlock(&(_sc)->vxl_lock)
188 #define VXLAN_WUNLOCK(_sc)	rm_wunlock(&(_sc)->vxl_lock)
189 #define VXLAN_LOCK_WOWNED(_sc)	rm_wowned(&(_sc)->vxl_lock)
190 #define VXLAN_LOCK_ASSERT(_sc)	rm_assert(&(_sc)->vxl_lock, RA_LOCKED)
191 #define VXLAN_LOCK_WASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_WLOCKED)
192 #define VXLAN_UNLOCK(_sc, _p) do {		\
193     if (VXLAN_LOCK_WOWNED(_sc))			\
194 	VXLAN_WUNLOCK(_sc);			\
195     else					\
196 	VXLAN_RUNLOCK(_sc, _p);			\
197 } while (0)
198 
199 #define VXLAN_ACQUIRE(_sc)	refcount_acquire(&(_sc)->vxl_refcnt)
200 #define VXLAN_RELEASE(_sc)	refcount_release(&(_sc)->vxl_refcnt)
201 
202 #define	satoconstsin(sa)	((const struct sockaddr_in *)(sa))
203 #define	satoconstsin6(sa)	((const struct sockaddr_in6 *)(sa))
204 
205 struct vxlanudphdr {
206 	struct udphdr		vxlh_udp;
207 	struct vxlan_header	vxlh_hdr;
208 } __packed;
209 
210 static int	vxlan_ftable_addr_cmp(const uint8_t *, const uint8_t *);
211 static void	vxlan_ftable_init(struct vxlan_softc *);
212 static void	vxlan_ftable_fini(struct vxlan_softc *);
213 static void	vxlan_ftable_flush(struct vxlan_softc *, int);
214 static void	vxlan_ftable_expire(struct vxlan_softc *);
215 static int	vxlan_ftable_update_locked(struct vxlan_softc *,
216 		    const struct sockaddr *, const uint8_t *,
217 		    struct rm_priotracker *);
218 static int	vxlan_ftable_update(struct vxlan_softc *,
219 		    const struct sockaddr *, const uint8_t *);
220 static int	vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS);
221 
222 static struct vxlan_ftable_entry *
223 		vxlan_ftable_entry_alloc(void);
224 static void	vxlan_ftable_entry_free(struct vxlan_ftable_entry *);
225 static void	vxlan_ftable_entry_init(struct vxlan_softc *,
226 		    struct vxlan_ftable_entry *, const uint8_t *,
227 		    const struct sockaddr *, uint32_t);
228 static void	vxlan_ftable_entry_destroy(struct vxlan_softc *,
229 		    struct vxlan_ftable_entry *);
230 static int	vxlan_ftable_entry_insert(struct vxlan_softc *,
231 		    struct vxlan_ftable_entry *);
232 static struct vxlan_ftable_entry *
233 		vxlan_ftable_entry_lookup(struct vxlan_softc *,
234 		    const uint8_t *);
235 static void	vxlan_ftable_entry_dump(struct vxlan_ftable_entry *,
236 		    struct sbuf *);
237 
238 static struct vxlan_socket *
239 		vxlan_socket_alloc(const union vxlan_sockaddr *);
240 static void	vxlan_socket_destroy(struct vxlan_socket *);
241 static void	vxlan_socket_release(struct vxlan_socket *);
242 static struct vxlan_socket *
243 		vxlan_socket_lookup(union vxlan_sockaddr *vxlsa);
244 static void	vxlan_socket_insert(struct vxlan_socket *);
245 static int	vxlan_socket_init(struct vxlan_socket *, struct ifnet *);
246 static int	vxlan_socket_bind(struct vxlan_socket *, struct ifnet *);
247 static int	vxlan_socket_create(struct ifnet *, int,
248 		    const union vxlan_sockaddr *, struct vxlan_socket **);
249 static void	vxlan_socket_ifdetach(struct vxlan_socket *,
250 		    struct ifnet *, struct vxlan_softc_head *);
251 
252 static struct vxlan_socket *
253 		vxlan_socket_mc_lookup(const union vxlan_sockaddr *);
254 static int	vxlan_sockaddr_mc_info_match(
255 		    const struct vxlan_socket_mc_info *,
256 		    const union vxlan_sockaddr *,
257 		    const union vxlan_sockaddr *, int);
258 static int	vxlan_socket_mc_join_group(struct vxlan_socket *,
259 		    const union vxlan_sockaddr *, const union vxlan_sockaddr *,
260 		    int *, union vxlan_sockaddr *);
261 static int	vxlan_socket_mc_leave_group(struct vxlan_socket *,
262 		    const union vxlan_sockaddr *,
263 		    const union vxlan_sockaddr *, int);
264 static int	vxlan_socket_mc_add_group(struct vxlan_socket *,
265 		    const union vxlan_sockaddr *, const union vxlan_sockaddr *,
266 		    int, int *);
267 static void	vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *,
268 		    int);
269 
270 static struct vxlan_softc *
271 		vxlan_socket_lookup_softc_locked(struct vxlan_socket *,
272 		    uint32_t);
273 static struct vxlan_softc *
274 		vxlan_socket_lookup_softc(struct vxlan_socket *, uint32_t);
275 static int	vxlan_socket_insert_softc(struct vxlan_socket *,
276 		    struct vxlan_softc *);
277 static void	vxlan_socket_remove_softc(struct vxlan_socket *,
278 		    struct vxlan_softc *);
279 
280 static struct ifnet *
281 		vxlan_multicast_if_ref(struct vxlan_softc *, int);
282 static void	vxlan_free_multicast(struct vxlan_softc *);
283 static int	vxlan_setup_multicast_interface(struct vxlan_softc *);
284 
285 static int	vxlan_setup_multicast(struct vxlan_softc *);
286 static int	vxlan_setup_socket(struct vxlan_softc *);
287 static void	vxlan_setup_interface(struct vxlan_softc *);
288 static int	vxlan_valid_init_config(struct vxlan_softc *);
289 static void	vxlan_init_wait(struct vxlan_softc *);
290 static void	vxlan_init_complete(struct vxlan_softc *);
291 static void	vxlan_init(void *);
292 static void	vxlan_release(struct vxlan_softc *);
293 static void	vxlan_teardown_wait(struct vxlan_softc *);
294 static void	vxlan_teardown_complete(struct vxlan_softc *);
295 static void	vxlan_teardown_locked(struct vxlan_softc *);
296 static void	vxlan_teardown(struct vxlan_softc *);
297 static void	vxlan_ifdetach(struct vxlan_softc *, struct ifnet *,
298 		    struct vxlan_softc_head *);
299 static void	vxlan_timer(void *);
300 
301 static int	vxlan_ctrl_get_config(struct vxlan_softc *, void *);
302 static int	vxlan_ctrl_set_vni(struct vxlan_softc *, void *);
303 static int	vxlan_ctrl_set_local_addr(struct vxlan_softc *, void *);
304 static int	vxlan_ctrl_set_remote_addr(struct vxlan_softc *, void *);
305 static int	vxlan_ctrl_set_local_port(struct vxlan_softc *, void *);
306 static int	vxlan_ctrl_set_remote_port(struct vxlan_softc *, void *);
307 static int	vxlan_ctrl_set_port_range(struct vxlan_softc *, void *);
308 static int	vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *, void *);
309 static int	vxlan_ctrl_set_ftable_max(struct vxlan_softc *, void *);
310 static int	vxlan_ctrl_set_multicast_if(struct vxlan_softc * , void *);
311 static int	vxlan_ctrl_set_ttl(struct vxlan_softc *, void *);
312 static int	vxlan_ctrl_set_learn(struct vxlan_softc *, void *);
313 static int	vxlan_ctrl_ftable_entry_add(struct vxlan_softc *, void *);
314 static int	vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *, void *);
315 static int	vxlan_ctrl_flush(struct vxlan_softc *, void *);
316 static int	vxlan_ioctl_drvspec(struct vxlan_softc *,
317 		    struct ifdrv *, int);
318 static int	vxlan_ioctl_ifflags(struct vxlan_softc *);
319 static int	vxlan_ioctl(struct ifnet *, u_long, caddr_t);
320 
321 #if defined(INET) || defined(INET6)
322 static uint16_t vxlan_pick_source_port(struct vxlan_softc *, struct mbuf *);
323 static void	vxlan_encap_header(struct vxlan_softc *, struct mbuf *,
324 		    int, uint16_t, uint16_t);
325 #endif
326 static int	vxlan_encap4(struct vxlan_softc *,
327 		    const union vxlan_sockaddr *, struct mbuf *);
328 static int	vxlan_encap6(struct vxlan_softc *,
329 		    const union vxlan_sockaddr *, struct mbuf *);
330 static int	vxlan_transmit(struct ifnet *, struct mbuf *);
331 static void	vxlan_qflush(struct ifnet *);
332 static void	vxlan_rcv_udp_packet(struct mbuf *, int, struct inpcb *,
333 		    const struct sockaddr *, void *);
334 static int	vxlan_input(struct vxlan_socket *, uint32_t, struct mbuf **,
335 		    const struct sockaddr *);
336 
337 static void	vxlan_set_default_config(struct vxlan_softc *);
338 static int	vxlan_set_user_config(struct vxlan_softc *,
339 		     struct ifvxlanparam *);
340 static int	vxlan_clone_create(struct if_clone *, int, caddr_t);
341 static void	vxlan_clone_destroy(struct ifnet *);
342 
343 static uint32_t vxlan_mac_hash(struct vxlan_softc *, const uint8_t *);
344 static void	vxlan_fakeaddr(struct vxlan_softc *);
345 
346 static int	vxlan_sockaddr_cmp(const union vxlan_sockaddr *,
347 		    const struct sockaddr *);
348 static void	vxlan_sockaddr_copy(union vxlan_sockaddr *,
349 		    const struct sockaddr *);
350 static int	vxlan_sockaddr_in_equal(const union vxlan_sockaddr *,
351 		    const struct sockaddr *);
352 static void	vxlan_sockaddr_in_copy(union vxlan_sockaddr *,
353 		    const struct sockaddr *);
354 static int	vxlan_sockaddr_supported(const union vxlan_sockaddr *, int);
355 static int	vxlan_sockaddr_in_any(const union vxlan_sockaddr *);
356 static int	vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *);
357 
358 static int	vxlan_can_change_config(struct vxlan_softc *);
359 static int	vxlan_check_vni(uint32_t);
360 static int	vxlan_check_ttl(int);
361 static int	vxlan_check_ftable_timeout(uint32_t);
362 static int	vxlan_check_ftable_max(uint32_t);
363 
364 static void	vxlan_sysctl_setup(struct vxlan_softc *);
365 static void	vxlan_sysctl_destroy(struct vxlan_softc *);
366 static int	vxlan_tunable_int(struct vxlan_softc *, const char *, int);
367 
368 static void	vxlan_ifdetach_event(void *, struct ifnet *);
369 static void	vxlan_load(void);
370 static void	vxlan_unload(void);
371 static int	vxlan_modevent(module_t, int, void *);
372 
373 static const char vxlan_name[] = "vxlan";
374 static MALLOC_DEFINE(M_VXLAN, vxlan_name,
375     "Virtual eXtensible LAN Interface");
376 static struct if_clone *vxlan_cloner;
377 static struct mtx vxlan_list_mtx;
378 static LIST_HEAD(, vxlan_socket) vxlan_socket_list;
379 
380 static eventhandler_tag vxlan_ifdetach_event_tag;
381 
382 SYSCTL_DECL(_net_link);
383 SYSCTL_NODE(_net_link, OID_AUTO, vxlan, CTLFLAG_RW, 0,
384     "Virtual eXtensible Local Area Network");
385 
386 static int vxlan_legacy_port = 0;
387 TUNABLE_INT("net.link.vxlan.legacy_port", &vxlan_legacy_port);
388 static int vxlan_reuse_port = 0;
389 TUNABLE_INT("net.link.vxlan.reuse_port", &vxlan_reuse_port);
390 
391 /* Default maximum number of addresses in the forwarding table. */
392 #ifndef VXLAN_FTABLE_MAX
393 #define VXLAN_FTABLE_MAX	2000
394 #endif
395 
396 /* Timeout (in seconds) of addresses learned in the forwarding table. */
397 #ifndef VXLAN_FTABLE_TIMEOUT
398 #define VXLAN_FTABLE_TIMEOUT	(20 * 60)
399 #endif
400 
401 /*
402  * Maximum timeout (in seconds) of addresses learned in the forwarding
403  * table.
404  */
405 #ifndef VXLAN_FTABLE_MAX_TIMEOUT
406 #define VXLAN_FTABLE_MAX_TIMEOUT	(60 * 60 * 24)
407 #endif
408 
409 /* Number of seconds between pruning attempts of the forwarding table. */
410 #ifndef VXLAN_FTABLE_PRUNE
411 #define VXLAN_FTABLE_PRUNE	(5 * 60)
412 #endif
413 
414 static int vxlan_ftable_prune_period = VXLAN_FTABLE_PRUNE;
415 
416 struct vxlan_control {
417 	int	(*vxlc_func)(struct vxlan_softc *, void *);
418 	int	vxlc_argsize;
419 	int	vxlc_flags;
420 #define VXLAN_CTRL_FLAG_COPYIN	0x01
421 #define VXLAN_CTRL_FLAG_COPYOUT	0x02
422 #define VXLAN_CTRL_FLAG_SUSER	0x04
423 };
424 
425 static const struct vxlan_control vxlan_control_table[] = {
426 	[VXLAN_CMD_GET_CONFIG] =
427 	    {	vxlan_ctrl_get_config, sizeof(struct ifvxlancfg),
428 		VXLAN_CTRL_FLAG_COPYOUT
429 	    },
430 
431 	[VXLAN_CMD_SET_VNI] =
432 	    {   vxlan_ctrl_set_vni, sizeof(struct ifvxlancmd),
433 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
434 	    },
435 
436 	[VXLAN_CMD_SET_LOCAL_ADDR] =
437 	    {   vxlan_ctrl_set_local_addr, sizeof(struct ifvxlancmd),
438 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
439 	    },
440 
441 	[VXLAN_CMD_SET_REMOTE_ADDR] =
442 	    {   vxlan_ctrl_set_remote_addr, sizeof(struct ifvxlancmd),
443 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
444 	    },
445 
446 	[VXLAN_CMD_SET_LOCAL_PORT] =
447 	    {   vxlan_ctrl_set_local_port, sizeof(struct ifvxlancmd),
448 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
449 	    },
450 
451 	[VXLAN_CMD_SET_REMOTE_PORT] =
452 	    {   vxlan_ctrl_set_remote_port, sizeof(struct ifvxlancmd),
453 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
454 	    },
455 
456 	[VXLAN_CMD_SET_PORT_RANGE] =
457 	    {   vxlan_ctrl_set_port_range, sizeof(struct ifvxlancmd),
458 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
459 	    },
460 
461 	[VXLAN_CMD_SET_FTABLE_TIMEOUT] =
462 	    {	vxlan_ctrl_set_ftable_timeout, sizeof(struct ifvxlancmd),
463 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
464 	    },
465 
466 	[VXLAN_CMD_SET_FTABLE_MAX] =
467 	    {	vxlan_ctrl_set_ftable_max, sizeof(struct ifvxlancmd),
468 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
469 	    },
470 
471 	[VXLAN_CMD_SET_MULTICAST_IF] =
472 	    {	vxlan_ctrl_set_multicast_if, sizeof(struct ifvxlancmd),
473 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
474 	    },
475 
476 	[VXLAN_CMD_SET_TTL] =
477 	    {	vxlan_ctrl_set_ttl, sizeof(struct ifvxlancmd),
478 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
479 	    },
480 
481 	[VXLAN_CMD_SET_LEARN] =
482 	    {	vxlan_ctrl_set_learn, sizeof(struct ifvxlancmd),
483 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
484 	    },
485 
486 	[VXLAN_CMD_FTABLE_ENTRY_ADD] =
487 	    {	vxlan_ctrl_ftable_entry_add, sizeof(struct ifvxlancmd),
488 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
489 	    },
490 
491 	[VXLAN_CMD_FTABLE_ENTRY_REM] =
492 	    {	vxlan_ctrl_ftable_entry_rem, sizeof(struct ifvxlancmd),
493 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
494 	    },
495 
496 	[VXLAN_CMD_FLUSH] =
497 	    {   vxlan_ctrl_flush, sizeof(struct ifvxlancmd),
498 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
499 	    },
500 };
501 
502 static const int vxlan_control_table_size = nitems(vxlan_control_table);
503 
504 static int
505 vxlan_ftable_addr_cmp(const uint8_t *a, const uint8_t *b)
506 {
507 	int i, d;
508 
509 	for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++)
510 		d = ((int)a[i]) - ((int)b[i]);
511 
512 	return (d);
513 }
514 
515 static void
516 vxlan_ftable_init(struct vxlan_softc *sc)
517 {
518 	int i;
519 
520 	sc->vxl_ftable = malloc(sizeof(struct vxlan_ftable_head) *
521 	    VXLAN_SC_FTABLE_SIZE, M_VXLAN, M_ZERO | M_WAITOK);
522 
523 	for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++)
524 		LIST_INIT(&sc->vxl_ftable[i]);
525 	sc->vxl_ftable_hash_key = arc4random();
526 }
527 
528 static void
529 vxlan_ftable_fini(struct vxlan_softc *sc)
530 {
531 	int i;
532 
533 	for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
534 		KASSERT(LIST_EMPTY(&sc->vxl_ftable[i]),
535 		    ("%s: vxlan %p ftable[%d] not empty", __func__, sc, i));
536 	}
537 	MPASS(sc->vxl_ftable_cnt == 0);
538 
539 	free(sc->vxl_ftable, M_VXLAN);
540 	sc->vxl_ftable = NULL;
541 }
542 
543 static void
544 vxlan_ftable_flush(struct vxlan_softc *sc, int all)
545 {
546 	struct vxlan_ftable_entry *fe, *tfe;
547 	int i;
548 
549 	for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
550 		LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) {
551 			if (all || VXLAN_FE_IS_DYNAMIC(fe))
552 				vxlan_ftable_entry_destroy(sc, fe);
553 		}
554 	}
555 }
556 
557 static void
558 vxlan_ftable_expire(struct vxlan_softc *sc)
559 {
560 	struct vxlan_ftable_entry *fe, *tfe;
561 	int i;
562 
563 	VXLAN_LOCK_WASSERT(sc);
564 
565 	for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
566 		LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) {
567 			if (VXLAN_FE_IS_DYNAMIC(fe) &&
568 			    time_uptime >= fe->vxlfe_expire)
569 				vxlan_ftable_entry_destroy(sc, fe);
570 		}
571 	}
572 }
573 
574 static int
575 vxlan_ftable_update_locked(struct vxlan_softc *sc, const struct sockaddr *sa,
576     const uint8_t *mac, struct rm_priotracker *tracker)
577 {
578 	union vxlan_sockaddr vxlsa;
579 	struct vxlan_ftable_entry *fe;
580 	int error;
581 
582 	VXLAN_LOCK_ASSERT(sc);
583 
584 again:
585 	/*
586 	 * A forwarding entry for this MAC address might already exist. If
587 	 * so, update it, otherwise create a new one. We may have to upgrade
588 	 * the lock if we have to change or create an entry.
589 	 */
590 	fe = vxlan_ftable_entry_lookup(sc, mac);
591 	if (fe != NULL) {
592 		fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout;
593 
594 		if (!VXLAN_FE_IS_DYNAMIC(fe) ||
595 		    vxlan_sockaddr_in_equal(&fe->vxlfe_raddr, sa))
596 			return (0);
597 		if (!VXLAN_LOCK_WOWNED(sc)) {
598 			VXLAN_RUNLOCK(sc, tracker);
599 			VXLAN_WLOCK(sc);
600 			sc->vxl_stats.ftable_lock_upgrade_failed++;
601 			goto again;
602 		}
603 		vxlan_sockaddr_in_copy(&fe->vxlfe_raddr, sa);
604 		return (0);
605 	}
606 
607 	if (!VXLAN_LOCK_WOWNED(sc)) {
608 		VXLAN_RUNLOCK(sc, tracker);
609 		VXLAN_WLOCK(sc);
610 		sc->vxl_stats.ftable_lock_upgrade_failed++;
611 		goto again;
612 	}
613 
614 	if (sc->vxl_ftable_cnt >= sc->vxl_ftable_max) {
615 		sc->vxl_stats.ftable_nospace++;
616 		return (ENOSPC);
617 	}
618 
619 	fe = vxlan_ftable_entry_alloc();
620 	if (fe == NULL)
621 		return (ENOMEM);
622 
623 	/*
624 	 * The source port may be randomly select by the remove host, so
625 	 * use the port of the default destination address.
626 	 */
627 	vxlan_sockaddr_copy(&vxlsa, sa);
628 	vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port;
629 
630 	vxlan_ftable_entry_init(sc, fe, mac, &vxlsa.sa,
631 	    VXLAN_FE_FLAG_DYNAMIC);
632 
633 	/* The prior lookup failed, so the insert should not. */
634 	error = vxlan_ftable_entry_insert(sc, fe);
635 	MPASS(error == 0);
636 
637 	return (0);
638 }
639 
640 static int
641 vxlan_ftable_update(struct vxlan_softc *sc, const struct sockaddr *sa,
642     const uint8_t *mac)
643 {
644 	struct rm_priotracker tracker;
645 	int error;
646 
647 	VXLAN_RLOCK(sc, &tracker);
648 	error = vxlan_ftable_update_locked(sc, sa, mac, &tracker);
649 	VXLAN_UNLOCK(sc, &tracker);
650 
651 	return (error);
652 }
653 
654 static int
655 vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS)
656 {
657 	struct rm_priotracker tracker;
658 	struct sbuf sb;
659 	struct vxlan_softc *sc;
660 	struct vxlan_ftable_entry *fe;
661 	size_t size;
662 	int i, error;
663 
664 	/*
665 	 * This is mostly intended for debugging during development. It is
666 	 * not practical to dump an entire large table this way.
667 	 */
668 
669 	sc = arg1;
670 	size = PAGE_SIZE;	/* Calculate later. */
671 
672 	sbuf_new(&sb, NULL, size, SBUF_FIXEDLEN);
673 	sbuf_putc(&sb, '\n');
674 
675 	VXLAN_RLOCK(sc, &tracker);
676 	for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
677 		LIST_FOREACH(fe, &sc->vxl_ftable[i], vxlfe_hash) {
678 			if (sbuf_error(&sb) != 0)
679 				break;
680 			vxlan_ftable_entry_dump(fe, &sb);
681 		}
682 	}
683 	VXLAN_RUNLOCK(sc, &tracker);
684 
685 	if (sbuf_len(&sb) == 1)
686 		sbuf_setpos(&sb, 0);
687 
688 	sbuf_finish(&sb);
689 	error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
690 	sbuf_delete(&sb);
691 
692 	return (error);
693 }
694 
695 static struct vxlan_ftable_entry *
696 vxlan_ftable_entry_alloc(void)
697 {
698 	struct vxlan_ftable_entry *fe;
699 
700 	fe = malloc(sizeof(*fe), M_VXLAN, M_ZERO | M_NOWAIT);
701 
702 	return (fe);
703 }
704 
705 static void
706 vxlan_ftable_entry_free(struct vxlan_ftable_entry *fe)
707 {
708 
709 	free(fe, M_VXLAN);
710 }
711 
712 static void
713 vxlan_ftable_entry_init(struct vxlan_softc *sc, struct vxlan_ftable_entry *fe,
714     const uint8_t *mac, const struct sockaddr *sa, uint32_t flags)
715 {
716 
717 	fe->vxlfe_flags = flags;
718 	fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout;
719 	memcpy(fe->vxlfe_mac, mac, ETHER_ADDR_LEN);
720 	vxlan_sockaddr_copy(&fe->vxlfe_raddr, sa);
721 }
722 
723 static void
724 vxlan_ftable_entry_destroy(struct vxlan_softc *sc,
725     struct vxlan_ftable_entry *fe)
726 {
727 
728 	sc->vxl_ftable_cnt--;
729 	LIST_REMOVE(fe, vxlfe_hash);
730 	vxlan_ftable_entry_free(fe);
731 }
732 
733 static int
734 vxlan_ftable_entry_insert(struct vxlan_softc *sc,
735     struct vxlan_ftable_entry *fe)
736 {
737 	struct vxlan_ftable_entry *lfe;
738 	uint32_t hash;
739 	int dir;
740 
741 	VXLAN_LOCK_WASSERT(sc);
742 	hash = VXLAN_SC_FTABLE_HASH(sc, fe->vxlfe_mac);
743 
744 	lfe = LIST_FIRST(&sc->vxl_ftable[hash]);
745 	if (lfe == NULL) {
746 		LIST_INSERT_HEAD(&sc->vxl_ftable[hash], fe, vxlfe_hash);
747 		goto out;
748 	}
749 
750 	do {
751 		dir = vxlan_ftable_addr_cmp(fe->vxlfe_mac, lfe->vxlfe_mac);
752 		if (dir == 0)
753 			return (EEXIST);
754 		if (dir > 0) {
755 			LIST_INSERT_BEFORE(lfe, fe, vxlfe_hash);
756 			goto out;
757 		} else if (LIST_NEXT(lfe, vxlfe_hash) == NULL) {
758 			LIST_INSERT_AFTER(lfe, fe, vxlfe_hash);
759 			goto out;
760 		} else
761 			lfe = LIST_NEXT(lfe, vxlfe_hash);
762 	} while (lfe != NULL);
763 
764 out:
765 	sc->vxl_ftable_cnt++;
766 
767 	return (0);
768 }
769 
770 static struct vxlan_ftable_entry *
771 vxlan_ftable_entry_lookup(struct vxlan_softc *sc, const uint8_t *mac)
772 {
773 	struct vxlan_ftable_entry *fe;
774 	uint32_t hash;
775 	int dir;
776 
777 	VXLAN_LOCK_ASSERT(sc);
778 	hash = VXLAN_SC_FTABLE_HASH(sc, mac);
779 
780 	LIST_FOREACH(fe, &sc->vxl_ftable[hash], vxlfe_hash) {
781 		dir = vxlan_ftable_addr_cmp(fe->vxlfe_mac, mac);
782 		if (dir == 0)
783 			return (fe);
784 		if (dir > 0)
785 			break;
786 	}
787 
788 	return (NULL);
789 }
790 
791 static void
792 vxlan_ftable_entry_dump(struct vxlan_ftable_entry *fe, struct sbuf *sb)
793 {
794 	char buf[64];
795 	const union vxlan_sockaddr *sa;
796 	const void *addr;
797 	int i, len, af, width;
798 
799 	sa = &fe->vxlfe_raddr;
800 	af = sa->sa.sa_family;
801 	len = sbuf_len(sb);
802 
803 	sbuf_printf(sb, "%c 0x%02X ", VXLAN_FE_IS_DYNAMIC(fe) ? 'D' : 'S',
804 	    fe->vxlfe_flags);
805 
806 	for (i = 0; i < ETHER_ADDR_LEN - 1; i++)
807 		sbuf_printf(sb, "%02X:", fe->vxlfe_mac[i]);
808 	sbuf_printf(sb, "%02X ", fe->vxlfe_mac[i]);
809 
810 	if (af == AF_INET) {
811 		addr = &sa->in4.sin_addr;
812 		width = INET_ADDRSTRLEN - 1;
813 	} else {
814 		addr = &sa->in6.sin6_addr;
815 		width = INET6_ADDRSTRLEN - 1;
816 	}
817 	inet_ntop(af, addr, buf, sizeof(buf));
818 	sbuf_printf(sb, "%*s ", width, buf);
819 
820 	sbuf_printf(sb, "%08jd", (intmax_t)fe->vxlfe_expire);
821 
822 	sbuf_putc(sb, '\n');
823 
824 	/* Truncate a partial line. */
825 	if (sbuf_error(sb) != 0)
826 		sbuf_setpos(sb, len);
827 }
828 
829 static struct vxlan_socket *
830 vxlan_socket_alloc(const union vxlan_sockaddr *sa)
831 {
832 	struct vxlan_socket *vso;
833 	int i;
834 
835 	vso = malloc(sizeof(*vso), M_VXLAN, M_WAITOK | M_ZERO);
836 	rm_init(&vso->vxlso_lock, "vxlansorm");
837 	refcount_init(&vso->vxlso_refcnt, 0);
838 	for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++)
839 		LIST_INIT(&vso->vxlso_vni_hash[i]);
840 	vso->vxlso_laddr = *sa;
841 
842 	return (vso);
843 }
844 
845 static void
846 vxlan_socket_destroy(struct vxlan_socket *vso)
847 {
848 	struct socket *so;
849 	struct vxlan_socket_mc_info *mc;
850 	int i;
851 
852 	for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
853 		mc = &vso->vxlso_mc[i];
854 		KASSERT(mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC,
855 		    ("%s: socket %p mc[%d] still has address",
856 		     __func__, vso, i));
857 	}
858 
859 	for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) {
860 		KASSERT(LIST_EMPTY(&vso->vxlso_vni_hash[i]),
861 		    ("%s: socket %p vni_hash[%d] not empty",
862 		     __func__, vso, i));
863 	}
864 
865 	so = vso->vxlso_sock;
866 	if (so != NULL) {
867 		vso->vxlso_sock = NULL;
868 		soclose(so);
869 	}
870 
871 	rm_destroy(&vso->vxlso_lock);
872 	free(vso, M_VXLAN);
873 }
874 
875 static void
876 vxlan_socket_release(struct vxlan_socket *vso)
877 {
878 	int destroy;
879 
880 	mtx_lock(&vxlan_list_mtx);
881 	destroy = VXLAN_SO_RELEASE(vso);
882 	if (destroy != 0)
883 		LIST_REMOVE(vso, vxlso_entry);
884 	mtx_unlock(&vxlan_list_mtx);
885 
886 	if (destroy != 0)
887 		vxlan_socket_destroy(vso);
888 }
889 
890 static struct vxlan_socket *
891 vxlan_socket_lookup(union vxlan_sockaddr *vxlsa)
892 {
893 	struct vxlan_socket *vso;
894 
895 	mtx_lock(&vxlan_list_mtx);
896 	LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry) {
897 		if (vxlan_sockaddr_cmp(&vso->vxlso_laddr, &vxlsa->sa) == 0) {
898 			VXLAN_SO_ACQUIRE(vso);
899 			break;
900 		}
901 	}
902 	mtx_unlock(&vxlan_list_mtx);
903 
904 	return (vso);
905 }
906 
907 static void
908 vxlan_socket_insert(struct vxlan_socket *vso)
909 {
910 
911 	mtx_lock(&vxlan_list_mtx);
912 	VXLAN_SO_ACQUIRE(vso);
913 	LIST_INSERT_HEAD(&vxlan_socket_list, vso, vxlso_entry);
914 	mtx_unlock(&vxlan_list_mtx);
915 }
916 
917 static int
918 vxlan_socket_init(struct vxlan_socket *vso, struct ifnet *ifp)
919 {
920 	struct thread *td;
921 	int error;
922 
923 	td = curthread;
924 
925 	error = socreate(vso->vxlso_laddr.sa.sa_family, &vso->vxlso_sock,
926 	    SOCK_DGRAM, IPPROTO_UDP, td->td_ucred, td);
927 	if (error) {
928 		if_printf(ifp, "cannot create socket: %d\n", error);
929 		return (error);
930 	}
931 
932 	error = udp_set_kernel_tunneling(vso->vxlso_sock,
933 	    vxlan_rcv_udp_packet, NULL, vso);
934 	if (error) {
935 		if_printf(ifp, "cannot set tunneling function: %d\n", error);
936 		return (error);
937 	}
938 
939 	if (vxlan_reuse_port != 0) {
940 		struct sockopt sopt;
941 		int val = 1;
942 
943 		bzero(&sopt, sizeof(sopt));
944 		sopt.sopt_dir = SOPT_SET;
945 		sopt.sopt_level = IPPROTO_IP;
946 		sopt.sopt_name = SO_REUSEPORT;
947 		sopt.sopt_val = &val;
948 		sopt.sopt_valsize = sizeof(val);
949 		error = sosetopt(vso->vxlso_sock, &sopt);
950 		if (error) {
951 			if_printf(ifp,
952 			    "cannot set REUSEADDR socket opt: %d\n", error);
953 			return (error);
954 		}
955 	}
956 
957 	return (0);
958 }
959 
960 static int
961 vxlan_socket_bind(struct vxlan_socket *vso, struct ifnet *ifp)
962 {
963 	union vxlan_sockaddr laddr;
964 	struct thread *td;
965 	int error;
966 
967 	td = curthread;
968 	laddr = vso->vxlso_laddr;
969 
970 	error = sobind(vso->vxlso_sock, &laddr.sa, td);
971 	if (error) {
972 		if (error != EADDRINUSE)
973 			if_printf(ifp, "cannot bind socket: %d\n", error);
974 		return (error);
975 	}
976 
977 	return (0);
978 }
979 
980 static int
981 vxlan_socket_create(struct ifnet *ifp, int multicast,
982     const union vxlan_sockaddr *saddr, struct vxlan_socket **vsop)
983 {
984 	union vxlan_sockaddr laddr;
985 	struct vxlan_socket *vso;
986 	int error;
987 
988 	laddr = *saddr;
989 
990 	/*
991 	 * If this socket will be multicast, then only the local port
992 	 * must be specified when binding.
993 	 */
994 	if (multicast != 0) {
995 		if (VXLAN_SOCKADDR_IS_IPV4(&laddr))
996 			laddr.in4.sin_addr.s_addr = INADDR_ANY;
997 #ifdef INET6
998 		else
999 			laddr.in6.sin6_addr = in6addr_any;
1000 #endif
1001 	}
1002 
1003 	vso = vxlan_socket_alloc(&laddr);
1004 	if (vso == NULL)
1005 		return (ENOMEM);
1006 
1007 	error = vxlan_socket_init(vso, ifp);
1008 	if (error)
1009 		goto fail;
1010 
1011 	error = vxlan_socket_bind(vso, ifp);
1012 	if (error)
1013 		goto fail;
1014 
1015 	/*
1016 	 * There is a small window between the bind completing and
1017 	 * inserting the socket, so that a concurrent create may fail.
1018 	 * Let's not worry about that for now.
1019 	 */
1020 	vxlan_socket_insert(vso);
1021 	*vsop = vso;
1022 
1023 	return (0);
1024 
1025 fail:
1026 	vxlan_socket_destroy(vso);
1027 
1028 	return (error);
1029 }
1030 
1031 static void
1032 vxlan_socket_ifdetach(struct vxlan_socket *vso, struct ifnet *ifp,
1033     struct vxlan_softc_head *list)
1034 {
1035 	struct rm_priotracker tracker;
1036 	struct vxlan_softc *sc;
1037 	int i;
1038 
1039 	VXLAN_SO_RLOCK(vso, &tracker);
1040 	for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) {
1041 		LIST_FOREACH(sc, &vso->vxlso_vni_hash[i], vxl_entry)
1042 			vxlan_ifdetach(sc, ifp, list);
1043 	}
1044 	VXLAN_SO_RUNLOCK(vso, &tracker);
1045 }
1046 
1047 static struct vxlan_socket *
1048 vxlan_socket_mc_lookup(const union vxlan_sockaddr *vxlsa)
1049 {
1050 	struct vxlan_socket *vso;
1051 	union vxlan_sockaddr laddr;
1052 
1053 	laddr = *vxlsa;
1054 
1055 	if (VXLAN_SOCKADDR_IS_IPV4(&laddr))
1056 		laddr.in4.sin_addr.s_addr = INADDR_ANY;
1057 #ifdef INET6
1058 	else
1059 		laddr.in6.sin6_addr = in6addr_any;
1060 #endif
1061 
1062 	vso = vxlan_socket_lookup(&laddr);
1063 
1064 	return (vso);
1065 }
1066 
1067 static int
1068 vxlan_sockaddr_mc_info_match(const struct vxlan_socket_mc_info *mc,
1069     const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
1070     int ifidx)
1071 {
1072 
1073 	if (!vxlan_sockaddr_in_any(local) &&
1074 	    !vxlan_sockaddr_in_equal(&mc->vxlsomc_saddr, &local->sa))
1075 		return (0);
1076 	if (!vxlan_sockaddr_in_equal(&mc->vxlsomc_gaddr, &group->sa))
1077 		return (0);
1078 	if (ifidx != 0 && ifidx != mc->vxlsomc_ifidx)
1079 		return (0);
1080 
1081 	return (1);
1082 }
1083 
1084 static int
1085 vxlan_socket_mc_join_group(struct vxlan_socket *vso,
1086     const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
1087     int *ifidx, union vxlan_sockaddr *source)
1088 {
1089 	struct sockopt sopt;
1090 	int error;
1091 
1092 	*source = *local;
1093 
1094 	if (VXLAN_SOCKADDR_IS_IPV4(group)) {
1095 		struct ip_mreq mreq;
1096 
1097 		mreq.imr_multiaddr = group->in4.sin_addr;
1098 		mreq.imr_interface = local->in4.sin_addr;
1099 
1100 		bzero(&sopt, sizeof(sopt));
1101 		sopt.sopt_dir = SOPT_SET;
1102 		sopt.sopt_level = IPPROTO_IP;
1103 		sopt.sopt_name = IP_ADD_MEMBERSHIP;
1104 		sopt.sopt_val = &mreq;
1105 		sopt.sopt_valsize = sizeof(mreq);
1106 		error = sosetopt(vso->vxlso_sock, &sopt);
1107 		if (error)
1108 			return (error);
1109 
1110 		/*
1111 		 * BMV: Ideally, there would be a formal way for us to get
1112 		 * the local interface that was selected based on the
1113 		 * imr_interface address. We could then update *ifidx so
1114 		 * vxlan_sockaddr_mc_info_match() would return a match for
1115 		 * later creates that explicitly set the multicast interface.
1116 		 *
1117 		 * If we really need to, we can of course look in the INP's
1118 		 * membership list:
1119 		 *     sotoinpcb(vso->vxlso_sock)->inp_moptions->
1120 		 *         imo_membership[]->inm_ifp
1121 		 * similarly to imo_match_group().
1122 		 */
1123 		source->in4.sin_addr = local->in4.sin_addr;
1124 
1125 	} else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
1126 		struct ipv6_mreq mreq;
1127 
1128 		mreq.ipv6mr_multiaddr = group->in6.sin6_addr;
1129 		mreq.ipv6mr_interface = *ifidx;
1130 
1131 		bzero(&sopt, sizeof(sopt));
1132 		sopt.sopt_dir = SOPT_SET;
1133 		sopt.sopt_level = IPPROTO_IPV6;
1134 		sopt.sopt_name = IPV6_JOIN_GROUP;
1135 		sopt.sopt_val = &mreq;
1136 		sopt.sopt_valsize = sizeof(mreq);
1137 		error = sosetopt(vso->vxlso_sock, &sopt);
1138 		if (error)
1139 			return (error);
1140 
1141 		/*
1142 		 * BMV: As with IPv4, we would really like to know what
1143 		 * interface in6p_lookup_mcast_ifp() selected.
1144 		 */
1145 	} else
1146 		error = EAFNOSUPPORT;
1147 
1148 	return (error);
1149 }
1150 
1151 static int
1152 vxlan_socket_mc_leave_group(struct vxlan_socket *vso,
1153     const union vxlan_sockaddr *group, const union vxlan_sockaddr *source,
1154     int ifidx)
1155 {
1156 	struct sockopt sopt;
1157 	int error;
1158 
1159 	bzero(&sopt, sizeof(sopt));
1160 	sopt.sopt_dir = SOPT_SET;
1161 
1162 	if (VXLAN_SOCKADDR_IS_IPV4(group)) {
1163 		struct ip_mreq mreq;
1164 
1165 		mreq.imr_multiaddr = group->in4.sin_addr;
1166 		mreq.imr_interface = source->in4.sin_addr;
1167 
1168 		sopt.sopt_level = IPPROTO_IP;
1169 		sopt.sopt_name = IP_DROP_MEMBERSHIP;
1170 		sopt.sopt_val = &mreq;
1171 		sopt.sopt_valsize = sizeof(mreq);
1172 		error = sosetopt(vso->vxlso_sock, &sopt);
1173 
1174 	} else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
1175 		struct ipv6_mreq mreq;
1176 
1177 		mreq.ipv6mr_multiaddr = group->in6.sin6_addr;
1178 		mreq.ipv6mr_interface = ifidx;
1179 
1180 		sopt.sopt_level = IPPROTO_IPV6;
1181 		sopt.sopt_name = IPV6_LEAVE_GROUP;
1182 		sopt.sopt_val = &mreq;
1183 		sopt.sopt_valsize = sizeof(mreq);
1184 		error = sosetopt(vso->vxlso_sock, &sopt);
1185 
1186 	} else
1187 		error = EAFNOSUPPORT;
1188 
1189 	return (error);
1190 }
1191 
1192 static int
1193 vxlan_socket_mc_add_group(struct vxlan_socket *vso,
1194     const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
1195     int ifidx, int *idx)
1196 {
1197 	union vxlan_sockaddr source;
1198 	struct vxlan_socket_mc_info *mc;
1199 	int i, empty, error;
1200 
1201 	/*
1202 	 * Within a socket, the same multicast group may be used by multiple
1203 	 * interfaces, each with a different network identifier. But a socket
1204 	 * may only join a multicast group once, so keep track of the users
1205 	 * here.
1206 	 */
1207 
1208 	VXLAN_SO_WLOCK(vso);
1209 	for (empty = 0, i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
1210 		mc = &vso->vxlso_mc[i];
1211 
1212 		if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) {
1213 			empty++;
1214 			continue;
1215 		}
1216 
1217 		if (vxlan_sockaddr_mc_info_match(mc, group, local, ifidx))
1218 			goto out;
1219 	}
1220 	VXLAN_SO_WUNLOCK(vso);
1221 
1222 	if (empty == 0)
1223 		return (ENOSPC);
1224 
1225 	error = vxlan_socket_mc_join_group(vso, group, local, &ifidx, &source);
1226 	if (error)
1227 		return (error);
1228 
1229 	VXLAN_SO_WLOCK(vso);
1230 	for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
1231 		mc = &vso->vxlso_mc[i];
1232 
1233 		if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) {
1234 			vxlan_sockaddr_copy(&mc->vxlsomc_gaddr, &group->sa);
1235 			vxlan_sockaddr_copy(&mc->vxlsomc_saddr, &source.sa);
1236 			mc->vxlsomc_ifidx = ifidx;
1237 			goto out;
1238 		}
1239 	}
1240 	VXLAN_SO_WUNLOCK(vso);
1241 
1242 	error = vxlan_socket_mc_leave_group(vso, group, &source, ifidx);
1243 	MPASS(error == 0);
1244 
1245 	return (ENOSPC);
1246 
1247 out:
1248 	mc->vxlsomc_users++;
1249 	VXLAN_SO_WUNLOCK(vso);
1250 
1251 	*idx = i;
1252 
1253 	return (0);
1254 }
1255 
1256 static void
1257 vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *vso, int idx)
1258 {
1259 	union vxlan_sockaddr group, source;
1260 	struct vxlan_socket_mc_info *mc;
1261 	int ifidx, leave;
1262 
1263 	KASSERT(idx >= 0 && idx < VXLAN_SO_MC_MAX_GROUPS,
1264 	    ("%s: vso %p idx %d out of bounds", __func__, vso, idx));
1265 
1266 	leave = 0;
1267 	mc = &vso->vxlso_mc[idx];
1268 
1269 	VXLAN_SO_WLOCK(vso);
1270 	mc->vxlsomc_users--;
1271 	if (mc->vxlsomc_users == 0) {
1272 		group = mc->vxlsomc_gaddr;
1273 		source = mc->vxlsomc_saddr;
1274 		ifidx = mc->vxlsomc_ifidx;
1275 		bzero(mc, sizeof(*mc));
1276 		leave = 1;
1277 	}
1278 	VXLAN_SO_WUNLOCK(vso);
1279 
1280 	if (leave != 0) {
1281 		/*
1282 		 * Our socket's membership in this group may have already
1283 		 * been removed if we joined through an interface that's
1284 		 * been detached.
1285 		 */
1286 		vxlan_socket_mc_leave_group(vso, &group, &source, ifidx);
1287 	}
1288 }
1289 
1290 static struct vxlan_softc *
1291 vxlan_socket_lookup_softc_locked(struct vxlan_socket *vso, uint32_t vni)
1292 {
1293 	struct vxlan_softc *sc;
1294 	uint32_t hash;
1295 
1296 	VXLAN_SO_LOCK_ASSERT(vso);
1297 	hash = VXLAN_SO_VNI_HASH(vni);
1298 
1299 	LIST_FOREACH(sc, &vso->vxlso_vni_hash[hash], vxl_entry) {
1300 		if (sc->vxl_vni == vni) {
1301 			VXLAN_ACQUIRE(sc);
1302 			break;
1303 		}
1304 	}
1305 
1306 	return (sc);
1307 }
1308 
1309 static struct vxlan_softc *
1310 vxlan_socket_lookup_softc(struct vxlan_socket *vso, uint32_t vni)
1311 {
1312 	struct rm_priotracker tracker;
1313 	struct vxlan_softc *sc;
1314 
1315 	VXLAN_SO_RLOCK(vso, &tracker);
1316 	sc = vxlan_socket_lookup_softc_locked(vso, vni);
1317 	VXLAN_SO_RUNLOCK(vso, &tracker);
1318 
1319 	return (sc);
1320 }
1321 
1322 static int
1323 vxlan_socket_insert_softc(struct vxlan_socket *vso, struct vxlan_softc *sc)
1324 {
1325 	struct vxlan_softc *tsc;
1326 	uint32_t vni, hash;
1327 
1328 	vni = sc->vxl_vni;
1329 	hash = VXLAN_SO_VNI_HASH(vni);
1330 
1331 	VXLAN_SO_WLOCK(vso);
1332 	tsc = vxlan_socket_lookup_softc_locked(vso, vni);
1333 	if (tsc != NULL) {
1334 		VXLAN_SO_WUNLOCK(vso);
1335 		vxlan_release(tsc);
1336 		return (EEXIST);
1337 	}
1338 
1339 	VXLAN_ACQUIRE(sc);
1340 	LIST_INSERT_HEAD(&vso->vxlso_vni_hash[hash], sc, vxl_entry);
1341 	VXLAN_SO_WUNLOCK(vso);
1342 
1343 	return (0);
1344 }
1345 
1346 static void
1347 vxlan_socket_remove_softc(struct vxlan_socket *vso, struct vxlan_softc *sc)
1348 {
1349 
1350 	VXLAN_SO_WLOCK(vso);
1351 	LIST_REMOVE(sc, vxl_entry);
1352 	VXLAN_SO_WUNLOCK(vso);
1353 
1354 	vxlan_release(sc);
1355 }
1356 
1357 static struct ifnet *
1358 vxlan_multicast_if_ref(struct vxlan_softc *sc, int ipv4)
1359 {
1360 	struct ifnet *ifp;
1361 
1362 	VXLAN_LOCK_ASSERT(sc);
1363 
1364 	if (ipv4 && sc->vxl_im4o != NULL)
1365 		ifp = sc->vxl_im4o->imo_multicast_ifp;
1366 	else if (!ipv4 && sc->vxl_im6o != NULL)
1367 		ifp = sc->vxl_im6o->im6o_multicast_ifp;
1368 	else
1369 		ifp = NULL;
1370 
1371 	if (ifp != NULL)
1372 		if_ref(ifp);
1373 
1374 	return (ifp);
1375 }
1376 
1377 static void
1378 vxlan_free_multicast(struct vxlan_softc *sc)
1379 {
1380 
1381 	if (sc->vxl_mc_ifp != NULL) {
1382 		if_rele(sc->vxl_mc_ifp);
1383 		sc->vxl_mc_ifp = NULL;
1384 		sc->vxl_mc_ifindex = 0;
1385 	}
1386 
1387 	if (sc->vxl_im4o != NULL) {
1388 		free(sc->vxl_im4o, M_VXLAN);
1389 		sc->vxl_im4o = NULL;
1390 	}
1391 
1392 	if (sc->vxl_im6o != NULL) {
1393 		free(sc->vxl_im6o, M_VXLAN);
1394 		sc->vxl_im6o = NULL;
1395 	}
1396 }
1397 
1398 static int
1399 vxlan_setup_multicast_interface(struct vxlan_softc *sc)
1400 {
1401 	struct ifnet *ifp;
1402 
1403 	ifp = ifunit_ref(sc->vxl_mc_ifname);
1404 	if (ifp == NULL) {
1405 		if_printf(sc->vxl_ifp, "multicast interfaces %s does "
1406 		    "not exist\n", sc->vxl_mc_ifname);
1407 		return (ENOENT);
1408 	}
1409 
1410 	if ((ifp->if_flags & IFF_MULTICAST) == 0) {
1411 		if_printf(sc->vxl_ifp, "interface %s does not support "
1412 		     "multicast\n", sc->vxl_mc_ifname);
1413 		if_rele(ifp);
1414 		return (ENOTSUP);
1415 	}
1416 
1417 	sc->vxl_mc_ifp = ifp;
1418 	sc->vxl_mc_ifindex = ifp->if_index;
1419 
1420 	return (0);
1421 }
1422 
1423 static int
1424 vxlan_setup_multicast(struct vxlan_softc *sc)
1425 {
1426 	const union vxlan_sockaddr *group;
1427 	int error;
1428 
1429 	group = &sc->vxl_dst_addr;
1430 	error = 0;
1431 
1432 	if (sc->vxl_mc_ifname[0] != '\0') {
1433 		error = vxlan_setup_multicast_interface(sc);
1434 		if (error)
1435 			return (error);
1436 	}
1437 
1438 	/*
1439 	 * Initialize an multicast options structure that is sufficiently
1440 	 * populated for use in the respective IP output routine. This
1441 	 * structure is typically stored in the socket, but our sockets
1442 	 * may be shared among multiple interfaces.
1443 	 */
1444 	if (VXLAN_SOCKADDR_IS_IPV4(group)) {
1445 		sc->vxl_im4o = malloc(sizeof(struct ip_moptions), M_VXLAN,
1446 		    M_ZERO | M_WAITOK);
1447 		sc->vxl_im4o->imo_multicast_ifp = sc->vxl_mc_ifp;
1448 		sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl;
1449 		sc->vxl_im4o->imo_multicast_vif = -1;
1450 	} else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
1451 		sc->vxl_im6o = malloc(sizeof(struct ip6_moptions), M_VXLAN,
1452 		    M_ZERO | M_WAITOK);
1453 		sc->vxl_im6o->im6o_multicast_ifp = sc->vxl_mc_ifp;
1454 		sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl;
1455 	}
1456 
1457 	return (error);
1458 }
1459 
1460 static int
1461 vxlan_setup_socket(struct vxlan_softc *sc)
1462 {
1463 	struct vxlan_socket *vso;
1464 	struct ifnet *ifp;
1465 	union vxlan_sockaddr *saddr, *daddr;
1466 	int multicast, error;
1467 
1468 	vso = NULL;
1469 	ifp = sc->vxl_ifp;
1470 	saddr = &sc->vxl_src_addr;
1471 	daddr = &sc->vxl_dst_addr;
1472 
1473 	multicast = vxlan_sockaddr_in_multicast(daddr);
1474 	MPASS(multicast != -1);
1475 	sc->vxl_vso_mc_index = -1;
1476 
1477 	/*
1478 	 * Try to create the socket. If that fails, attempt to use an
1479 	 * existing socket.
1480 	 */
1481 	error = vxlan_socket_create(ifp, multicast, saddr, &vso);
1482 	if (error) {
1483 		if (multicast != 0)
1484 			vso = vxlan_socket_mc_lookup(saddr);
1485 		else
1486 			vso = vxlan_socket_lookup(saddr);
1487 
1488 		if (vso == NULL) {
1489 			if_printf(ifp, "cannot create socket (error: %d), "
1490 			    "and no existing socket found\n", error);
1491 			goto out;
1492 		}
1493 	}
1494 
1495 	if (multicast != 0) {
1496 		error = vxlan_setup_multicast(sc);
1497 		if (error)
1498 			goto out;
1499 
1500 		error = vxlan_socket_mc_add_group(vso, daddr, saddr,
1501 		    sc->vxl_mc_ifindex, &sc->vxl_vso_mc_index);
1502 		if (error)
1503 			goto out;
1504 	}
1505 
1506 	sc->vxl_sock = vso;
1507 	error = vxlan_socket_insert_softc(vso, sc);
1508 	if (error) {
1509 		sc->vxl_sock = NULL;
1510 		if_printf(ifp, "network identifier %d already exists in "
1511 		    "this socket\n", sc->vxl_vni);
1512 		goto out;
1513 	}
1514 
1515 	return (0);
1516 
1517 out:
1518 	if (vso != NULL) {
1519 		if (sc->vxl_vso_mc_index != -1) {
1520 			vxlan_socket_mc_release_group_by_idx(vso,
1521 			    sc->vxl_vso_mc_index);
1522 			sc->vxl_vso_mc_index = -1;
1523 		}
1524 		if (multicast != 0)
1525 			vxlan_free_multicast(sc);
1526 		vxlan_socket_release(vso);
1527 	}
1528 
1529 	return (error);
1530 }
1531 
1532 static void
1533 vxlan_setup_interface(struct vxlan_softc *sc)
1534 {
1535 	struct ifnet *ifp;
1536 
1537 	ifp = sc->vxl_ifp;
1538 	ifp->if_hdrlen = ETHER_HDR_LEN + sizeof(struct vxlanudphdr);
1539 
1540 	if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr) != 0)
1541 		ifp->if_hdrlen += sizeof(struct ip);
1542 	else if (VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_dst_addr) != 0)
1543 		ifp->if_hdrlen += sizeof(struct ip6_hdr);
1544 }
1545 
1546 static int
1547 vxlan_valid_init_config(struct vxlan_softc *sc)
1548 {
1549 	const char *reason;
1550 
1551 	if (vxlan_check_vni(sc->vxl_vni) != 0) {
1552 		reason = "invalid virtual network identifier specified";
1553 		goto fail;
1554 	}
1555 
1556 	if (vxlan_sockaddr_supported(&sc->vxl_src_addr, 1) == 0) {
1557 		reason = "source address type is not supported";
1558 		goto fail;
1559 	}
1560 
1561 	if (vxlan_sockaddr_supported(&sc->vxl_dst_addr, 0) == 0) {
1562 		reason = "destination address type is not supported";
1563 		goto fail;
1564 	}
1565 
1566 	if (vxlan_sockaddr_in_any(&sc->vxl_dst_addr) != 0) {
1567 		reason = "no valid destination address specified";
1568 		goto fail;
1569 	}
1570 
1571 	if (vxlan_sockaddr_in_multicast(&sc->vxl_dst_addr) == 0 &&
1572 	    sc->vxl_mc_ifname[0] != '\0') {
1573 		reason = "can only specify interface with a group address";
1574 		goto fail;
1575 	}
1576 
1577 	if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) {
1578 		if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_src_addr) ^
1579 		    VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr)) {
1580 			reason = "source and destination address must both "
1581 			    "be either IPv4 or IPv6";
1582 			goto fail;
1583 		}
1584 	}
1585 
1586 	if (sc->vxl_src_addr.in4.sin_port == 0) {
1587 		reason = "local port not specified";
1588 		goto fail;
1589 	}
1590 
1591 	if (sc->vxl_dst_addr.in4.sin_port == 0) {
1592 		reason = "remote port not specified";
1593 		goto fail;
1594 	}
1595 
1596 	return (0);
1597 
1598 fail:
1599 	if_printf(sc->vxl_ifp, "cannot initialize interface: %s\n", reason);
1600 	return (EINVAL);
1601 }
1602 
1603 static void
1604 vxlan_init_wait(struct vxlan_softc *sc)
1605 {
1606 
1607 	VXLAN_LOCK_WASSERT(sc);
1608 	while (sc->vxl_flags & VXLAN_FLAG_INIT)
1609 		rm_sleep(sc, &sc->vxl_lock, 0, "vxlint", hz);
1610 }
1611 
1612 static void
1613 vxlan_init_complete(struct vxlan_softc *sc)
1614 {
1615 
1616 	VXLAN_WLOCK(sc);
1617 	sc->vxl_flags &= ~VXLAN_FLAG_INIT;
1618 	wakeup(sc);
1619 	VXLAN_WUNLOCK(sc);
1620 }
1621 
1622 static void
1623 vxlan_init(void *xsc)
1624 {
1625 	static const uint8_t empty_mac[ETHER_ADDR_LEN];
1626 	struct vxlan_softc *sc;
1627 	struct ifnet *ifp;
1628 
1629 	sc = xsc;
1630 	ifp = sc->vxl_ifp;
1631 
1632 	VXLAN_WLOCK(sc);
1633 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1634 		VXLAN_WUNLOCK(sc);
1635 		return;
1636 	}
1637 	sc->vxl_flags |= VXLAN_FLAG_INIT;
1638 	VXLAN_WUNLOCK(sc);
1639 
1640 	if (vxlan_valid_init_config(sc) != 0)
1641 		goto out;
1642 
1643 	vxlan_setup_interface(sc);
1644 
1645 	if (vxlan_setup_socket(sc) != 0)
1646 		goto out;
1647 
1648 	/* Initialize the default forwarding entry. */
1649 	vxlan_ftable_entry_init(sc, &sc->vxl_default_fe, empty_mac,
1650 	    &sc->vxl_dst_addr.sa, VXLAN_FE_FLAG_STATIC);
1651 
1652 	VXLAN_WLOCK(sc);
1653 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1654 	callout_reset(&sc->vxl_callout, vxlan_ftable_prune_period * hz,
1655 	    vxlan_timer, sc);
1656 	VXLAN_WUNLOCK(sc);
1657 
1658 out:
1659 	vxlan_init_complete(sc);
1660 }
1661 
1662 static void
1663 vxlan_release(struct vxlan_softc *sc)
1664 {
1665 
1666 	/*
1667 	 * The softc may be destroyed as soon as we release our reference,
1668 	 * so we cannot serialize the wakeup with the softc lock. We use a
1669 	 * timeout in our sleeps so a missed wakeup is unfortunate but not
1670 	 * fatal.
1671 	 */
1672 	if (VXLAN_RELEASE(sc) != 0)
1673 		wakeup(sc);
1674 }
1675 
1676 static void
1677 vxlan_teardown_wait(struct vxlan_softc *sc)
1678 {
1679 
1680 	VXLAN_LOCK_WASSERT(sc);
1681 	while (sc->vxl_flags & VXLAN_FLAG_TEARDOWN)
1682 		rm_sleep(sc, &sc->vxl_lock, 0, "vxltrn", hz);
1683 }
1684 
1685 static void
1686 vxlan_teardown_complete(struct vxlan_softc *sc)
1687 {
1688 
1689 	VXLAN_WLOCK(sc);
1690 	sc->vxl_flags &= ~VXLAN_FLAG_TEARDOWN;
1691 	wakeup(sc);
1692 	VXLAN_WUNLOCK(sc);
1693 }
1694 
1695 static void
1696 vxlan_teardown_locked(struct vxlan_softc *sc)
1697 {
1698 	struct ifnet *ifp;
1699 	struct vxlan_socket *vso;
1700 
1701 	ifp = sc->vxl_ifp;
1702 
1703 	VXLAN_LOCK_WASSERT(sc);
1704 	MPASS(sc->vxl_flags & VXLAN_FLAG_TEARDOWN);
1705 
1706 	ifp->if_flags &= ~IFF_UP;
1707 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1708 	callout_stop(&sc->vxl_callout);
1709 	vso = sc->vxl_sock;
1710 	sc->vxl_sock = NULL;
1711 
1712 	VXLAN_WUNLOCK(sc);
1713 
1714 	if (vso != NULL) {
1715 		vxlan_socket_remove_softc(vso, sc);
1716 
1717 		if (sc->vxl_vso_mc_index != -1) {
1718 			vxlan_socket_mc_release_group_by_idx(vso,
1719 			    sc->vxl_vso_mc_index);
1720 			sc->vxl_vso_mc_index = -1;
1721 		}
1722 	}
1723 
1724 	VXLAN_WLOCK(sc);
1725 	while (sc->vxl_refcnt != 0)
1726 		rm_sleep(sc, &sc->vxl_lock, 0, "vxldrn", hz);
1727 	VXLAN_WUNLOCK(sc);
1728 
1729 	callout_drain(&sc->vxl_callout);
1730 
1731 	vxlan_free_multicast(sc);
1732 	if (vso != NULL)
1733 		vxlan_socket_release(vso);
1734 
1735 	vxlan_teardown_complete(sc);
1736 }
1737 
1738 static void
1739 vxlan_teardown(struct vxlan_softc *sc)
1740 {
1741 
1742 	VXLAN_WLOCK(sc);
1743 	if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) {
1744 		vxlan_teardown_wait(sc);
1745 		VXLAN_WUNLOCK(sc);
1746 		return;
1747 	}
1748 
1749 	sc->vxl_flags |= VXLAN_FLAG_TEARDOWN;
1750 	vxlan_teardown_locked(sc);
1751 }
1752 
1753 static void
1754 vxlan_ifdetach(struct vxlan_softc *sc, struct ifnet *ifp,
1755     struct vxlan_softc_head *list)
1756 {
1757 
1758 	VXLAN_WLOCK(sc);
1759 
1760 	if (sc->vxl_mc_ifp != ifp)
1761 		goto out;
1762 	if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN)
1763 		goto out;
1764 
1765 	sc->vxl_flags |= VXLAN_FLAG_TEARDOWN;
1766 	LIST_INSERT_HEAD(list, sc, vxl_ifdetach_list);
1767 
1768 out:
1769 	VXLAN_WUNLOCK(sc);
1770 }
1771 
1772 static void
1773 vxlan_timer(void *xsc)
1774 {
1775 	struct vxlan_softc *sc;
1776 
1777 	sc = xsc;
1778 	VXLAN_LOCK_WASSERT(sc);
1779 
1780 	vxlan_ftable_expire(sc);
1781 	callout_schedule(&sc->vxl_callout, vxlan_ftable_prune_period * hz);
1782 }
1783 
1784 static int
1785 vxlan_ioctl_ifflags(struct vxlan_softc *sc)
1786 {
1787 	struct ifnet *ifp;
1788 
1789 	ifp = sc->vxl_ifp;
1790 
1791 	if (ifp->if_flags & IFF_UP) {
1792 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1793 			vxlan_init(sc);
1794 	} else {
1795 		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1796 			vxlan_teardown(sc);
1797 	}
1798 
1799 	return (0);
1800 }
1801 
1802 static int
1803 vxlan_ctrl_get_config(struct vxlan_softc *sc, void *arg)
1804 {
1805 	struct rm_priotracker tracker;
1806 	struct ifvxlancfg *cfg;
1807 
1808 	cfg = arg;
1809 	bzero(cfg, sizeof(*cfg));
1810 
1811 	VXLAN_RLOCK(sc, &tracker);
1812 	cfg->vxlc_vni = sc->vxl_vni;
1813 	memcpy(&cfg->vxlc_local_sa, &sc->vxl_src_addr,
1814 	    sizeof(union vxlan_sockaddr));
1815 	memcpy(&cfg->vxlc_remote_sa, &sc->vxl_dst_addr,
1816 	    sizeof(union vxlan_sockaddr));
1817 	cfg->vxlc_mc_ifindex = sc->vxl_mc_ifindex;
1818 	cfg->vxlc_ftable_cnt = sc->vxl_ftable_cnt;
1819 	cfg->vxlc_ftable_max = sc->vxl_ftable_max;
1820 	cfg->vxlc_ftable_timeout = sc->vxl_ftable_timeout;
1821 	cfg->vxlc_port_min = sc->vxl_min_port;
1822 	cfg->vxlc_port_max = sc->vxl_max_port;
1823 	cfg->vxlc_learn = (sc->vxl_flags & VXLAN_FLAG_LEARN) != 0;
1824 	cfg->vxlc_ttl = sc->vxl_ttl;
1825 	VXLAN_RUNLOCK(sc, &tracker);
1826 
1827 	return (0);
1828 }
1829 
1830 static int
1831 vxlan_ctrl_set_vni(struct vxlan_softc *sc, void *arg)
1832 {
1833 	struct ifvxlancmd *cmd;
1834 	int error;
1835 
1836 	cmd = arg;
1837 
1838 	if (vxlan_check_vni(cmd->vxlcmd_vni) != 0)
1839 		return (EINVAL);
1840 
1841 	VXLAN_WLOCK(sc);
1842 	if (vxlan_can_change_config(sc)) {
1843 		sc->vxl_vni = cmd->vxlcmd_vni;
1844 		error = 0;
1845 	} else
1846 		error = EBUSY;
1847 	VXLAN_WUNLOCK(sc);
1848 
1849 	return (error);
1850 }
1851 
1852 static int
1853 vxlan_ctrl_set_local_addr(struct vxlan_softc *sc, void *arg)
1854 {
1855 	struct ifvxlancmd *cmd;
1856 	union vxlan_sockaddr *vxlsa;
1857 	int error;
1858 
1859 	cmd = arg;
1860 	vxlsa = &cmd->vxlcmd_sa;
1861 
1862 	if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa))
1863 		return (EINVAL);
1864 	if (vxlan_sockaddr_in_multicast(vxlsa) != 0)
1865 		return (EINVAL);
1866 
1867 	VXLAN_WLOCK(sc);
1868 	if (vxlan_can_change_config(sc)) {
1869 		vxlan_sockaddr_in_copy(&sc->vxl_src_addr, &vxlsa->sa);
1870 		error = 0;
1871 	} else
1872 		error = EBUSY;
1873 	VXLAN_WUNLOCK(sc);
1874 
1875 	return (error);
1876 }
1877 
1878 static int
1879 vxlan_ctrl_set_remote_addr(struct vxlan_softc *sc, void *arg)
1880 {
1881 	struct ifvxlancmd *cmd;
1882 	union vxlan_sockaddr *vxlsa;
1883 	int error;
1884 
1885 	cmd = arg;
1886 	vxlsa = &cmd->vxlcmd_sa;
1887 
1888 	if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa))
1889 		return (EINVAL);
1890 
1891 	VXLAN_WLOCK(sc);
1892 	if (vxlan_can_change_config(sc)) {
1893 		vxlan_sockaddr_in_copy(&sc->vxl_dst_addr, &vxlsa->sa);
1894 		error = 0;
1895 	} else
1896 		error = EBUSY;
1897 	VXLAN_WUNLOCK(sc);
1898 
1899 	return (error);
1900 }
1901 
1902 static int
1903 vxlan_ctrl_set_local_port(struct vxlan_softc *sc, void *arg)
1904 {
1905 	struct ifvxlancmd *cmd;
1906 	int error;
1907 
1908 	cmd = arg;
1909 
1910 	if (cmd->vxlcmd_port == 0)
1911 		return (EINVAL);
1912 
1913 	VXLAN_WLOCK(sc);
1914 	if (vxlan_can_change_config(sc)) {
1915 		sc->vxl_src_addr.in4.sin_port = htons(cmd->vxlcmd_port);
1916 		error = 0;
1917 	} else
1918 		error = EBUSY;
1919 	VXLAN_WUNLOCK(sc);
1920 
1921 	return (error);
1922 }
1923 
1924 static int
1925 vxlan_ctrl_set_remote_port(struct vxlan_softc *sc, void *arg)
1926 {
1927 	struct ifvxlancmd *cmd;
1928 	int error;
1929 
1930 	cmd = arg;
1931 
1932 	if (cmd->vxlcmd_port == 0)
1933 		return (EINVAL);
1934 
1935 	VXLAN_WLOCK(sc);
1936 	if (vxlan_can_change_config(sc)) {
1937 		sc->vxl_dst_addr.in4.sin_port = htons(cmd->vxlcmd_port);
1938 		error = 0;
1939 	} else
1940 		error = EBUSY;
1941 	VXLAN_WUNLOCK(sc);
1942 
1943 	return (error);
1944 }
1945 
1946 static int
1947 vxlan_ctrl_set_port_range(struct vxlan_softc *sc, void *arg)
1948 {
1949 	struct ifvxlancmd *cmd;
1950 	uint16_t min, max;
1951 	int error;
1952 
1953 	cmd = arg;
1954 	min = cmd->vxlcmd_port_min;
1955 	max = cmd->vxlcmd_port_max;
1956 
1957 	if (max < min)
1958 		return (EINVAL);
1959 
1960 	VXLAN_WLOCK(sc);
1961 	if (vxlan_can_change_config(sc)) {
1962 		sc->vxl_min_port = min;
1963 		sc->vxl_max_port = max;
1964 		error = 0;
1965 	} else
1966 		error = EBUSY;
1967 	VXLAN_WUNLOCK(sc);
1968 
1969 	return (error);
1970 }
1971 
1972 static int
1973 vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *sc, void *arg)
1974 {
1975 	struct ifvxlancmd *cmd;
1976 	int error;
1977 
1978 	cmd = arg;
1979 
1980 	VXLAN_WLOCK(sc);
1981 	if (vxlan_check_ftable_timeout(cmd->vxlcmd_ftable_timeout) == 0) {
1982 		sc->vxl_ftable_timeout = cmd->vxlcmd_ftable_timeout;
1983 		error = 0;
1984 	} else
1985 		error = EINVAL;
1986 	VXLAN_WUNLOCK(sc);
1987 
1988 	return (error);
1989 }
1990 
1991 static int
1992 vxlan_ctrl_set_ftable_max(struct vxlan_softc *sc, void *arg)
1993 {
1994 	struct ifvxlancmd *cmd;
1995 	int error;
1996 
1997 	cmd = arg;
1998 
1999 	VXLAN_WLOCK(sc);
2000 	if (vxlan_check_ftable_max(cmd->vxlcmd_ftable_max) == 0) {
2001 		sc->vxl_ftable_max = cmd->vxlcmd_ftable_max;
2002 		error = 0;
2003 	} else
2004 		error = EINVAL;
2005 	VXLAN_WUNLOCK(sc);
2006 
2007 	return (error);
2008 }
2009 
2010 static int
2011 vxlan_ctrl_set_multicast_if(struct vxlan_softc * sc, void *arg)
2012 {
2013 	struct ifvxlancmd *cmd;
2014 	int error;
2015 
2016 	cmd = arg;
2017 
2018 	VXLAN_WLOCK(sc);
2019 	if (vxlan_can_change_config(sc)) {
2020 		strlcpy(sc->vxl_mc_ifname, cmd->vxlcmd_ifname, IFNAMSIZ);
2021 		error = 0;
2022 	} else
2023 		error = EBUSY;
2024 	VXLAN_WUNLOCK(sc);
2025 
2026 	return (error);
2027 }
2028 
2029 static int
2030 vxlan_ctrl_set_ttl(struct vxlan_softc *sc, void *arg)
2031 {
2032 	struct ifvxlancmd *cmd;
2033 	int error;
2034 
2035 	cmd = arg;
2036 
2037 	VXLAN_WLOCK(sc);
2038 	if (vxlan_check_ttl(cmd->vxlcmd_ttl) == 0) {
2039 		sc->vxl_ttl = cmd->vxlcmd_ttl;
2040 		if (sc->vxl_im4o != NULL)
2041 			sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl;
2042 		if (sc->vxl_im6o != NULL)
2043 			sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl;
2044 		error = 0;
2045 	} else
2046 		error = EINVAL;
2047 	VXLAN_WUNLOCK(sc);
2048 
2049 	return (error);
2050 }
2051 
2052 static int
2053 vxlan_ctrl_set_learn(struct vxlan_softc *sc, void *arg)
2054 {
2055 	struct ifvxlancmd *cmd;
2056 
2057 	cmd = arg;
2058 
2059 	VXLAN_WLOCK(sc);
2060 	if (cmd->vxlcmd_flags & VXLAN_CMD_FLAG_LEARN)
2061 		sc->vxl_flags |= VXLAN_FLAG_LEARN;
2062 	else
2063 		sc->vxl_flags &= ~VXLAN_FLAG_LEARN;
2064 	VXLAN_WUNLOCK(sc);
2065 
2066 	return (0);
2067 }
2068 
2069 static int
2070 vxlan_ctrl_ftable_entry_add(struct vxlan_softc *sc, void *arg)
2071 {
2072 	union vxlan_sockaddr vxlsa;
2073 	struct ifvxlancmd *cmd;
2074 	struct vxlan_ftable_entry *fe;
2075 	int error;
2076 
2077 	cmd = arg;
2078 	vxlsa = cmd->vxlcmd_sa;
2079 
2080 	if (!VXLAN_SOCKADDR_IS_IPV46(&vxlsa))
2081 		return (EINVAL);
2082 	if (vxlan_sockaddr_in_any(&vxlsa) != 0)
2083 		return (EINVAL);
2084 	if (vxlan_sockaddr_in_multicast(&vxlsa) != 0)
2085 		return (EINVAL);
2086 	/* BMV: We could support both IPv4 and IPv6 later. */
2087 	if (vxlsa.sa.sa_family != sc->vxl_dst_addr.sa.sa_family)
2088 		return (EAFNOSUPPORT);
2089 
2090 	fe = vxlan_ftable_entry_alloc();
2091 	if (fe == NULL)
2092 		return (ENOMEM);
2093 
2094 	if (vxlsa.in4.sin_port == 0)
2095 		vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port;
2096 
2097 	vxlan_ftable_entry_init(sc, fe, cmd->vxlcmd_mac, &vxlsa.sa,
2098 	    VXLAN_FE_FLAG_STATIC);
2099 
2100 	VXLAN_WLOCK(sc);
2101 	error = vxlan_ftable_entry_insert(sc, fe);
2102 	VXLAN_WUNLOCK(sc);
2103 
2104 	if (error)
2105 		vxlan_ftable_entry_free(fe);
2106 
2107 	return (error);
2108 }
2109 
2110 static int
2111 vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *sc, void *arg)
2112 {
2113 	struct ifvxlancmd *cmd;
2114 	struct vxlan_ftable_entry *fe;
2115 	int error;
2116 
2117 	cmd = arg;
2118 
2119 	VXLAN_WLOCK(sc);
2120 	fe = vxlan_ftable_entry_lookup(sc, cmd->vxlcmd_mac);
2121 	if (fe != NULL) {
2122 		vxlan_ftable_entry_destroy(sc, fe);
2123 		error = 0;
2124 	} else
2125 		error = ENOENT;
2126 	VXLAN_WUNLOCK(sc);
2127 
2128 	return (error);
2129 }
2130 
2131 static int
2132 vxlan_ctrl_flush(struct vxlan_softc *sc, void *arg)
2133 {
2134 	struct ifvxlancmd *cmd;
2135 	int all;
2136 
2137 	cmd = arg;
2138 	all = cmd->vxlcmd_flags & VXLAN_CMD_FLAG_FLUSH_ALL;
2139 
2140 	VXLAN_WLOCK(sc);
2141 	vxlan_ftable_flush(sc, all);
2142 	VXLAN_WUNLOCK(sc);
2143 
2144 	return (0);
2145 }
2146 
2147 static int
2148 vxlan_ioctl_drvspec(struct vxlan_softc *sc, struct ifdrv *ifd, int get)
2149 {
2150 	const struct vxlan_control *vc;
2151 	union {
2152 		struct ifvxlancfg	cfg;
2153 		struct ifvxlancmd	cmd;
2154 	} args;
2155 	int out, error;
2156 
2157 	if (ifd->ifd_cmd >= vxlan_control_table_size)
2158 		return (EINVAL);
2159 
2160 	bzero(&args, sizeof(args));
2161 	vc = &vxlan_control_table[ifd->ifd_cmd];
2162 	out = (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) != 0;
2163 
2164 	if ((get != 0 && out == 0) || (get == 0 && out != 0))
2165 		return (EINVAL);
2166 
2167 	if (vc->vxlc_flags & VXLAN_CTRL_FLAG_SUSER) {
2168 		error = priv_check(curthread, PRIV_NET_VXLAN);
2169 		if (error)
2170 			return (error);
2171 	}
2172 
2173 	if (ifd->ifd_len != vc->vxlc_argsize ||
2174 	    ifd->ifd_len > sizeof(args))
2175 		return (EINVAL);
2176 
2177 	if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYIN) {
2178 		error = copyin(ifd->ifd_data, &args, ifd->ifd_len);
2179 		if (error)
2180 			return (error);
2181 	}
2182 
2183 	error = vc->vxlc_func(sc, &args);
2184 	if (error)
2185 		return (error);
2186 
2187 	if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) {
2188 		error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
2189 		if (error)
2190 			return (error);
2191 	}
2192 
2193 	return (0);
2194 }
2195 
2196 static int
2197 vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2198 {
2199 	struct vxlan_softc *sc;
2200 	struct ifreq *ifr;
2201 	struct ifdrv *ifd;
2202 	int error;
2203 
2204 	sc = ifp->if_softc;
2205 	ifr = (struct ifreq *) data;
2206 	ifd = (struct ifdrv *) data;
2207 
2208 	switch (cmd) {
2209 	case SIOCADDMULTI:
2210 	case SIOCDELMULTI:
2211 		error = 0;
2212 		break;
2213 
2214 	case SIOCGDRVSPEC:
2215 	case SIOCSDRVSPEC:
2216 		error = vxlan_ioctl_drvspec(sc, ifd, cmd == SIOCGDRVSPEC);
2217 		break;
2218 
2219 	case SIOCSIFFLAGS:
2220 		error = vxlan_ioctl_ifflags(sc);
2221 		break;
2222 	default:
2223 		error = ether_ioctl(ifp, cmd, data);
2224 		break;
2225 	}
2226 
2227 	return (error);
2228 }
2229 
2230 #if defined(INET) || defined(INET6)
2231 static uint16_t
2232 vxlan_pick_source_port(struct vxlan_softc *sc, struct mbuf *m)
2233 {
2234 	int range;
2235 	uint32_t hash;
2236 
2237 	range = sc->vxl_max_port - sc->vxl_min_port + 1;
2238 
2239 	/* check if flowid is set and not opaque */
2240 	if (M_HASHTYPE_ISHASH(m))
2241 		hash = m->m_pkthdr.flowid;
2242 	else
2243 		hash = jenkins_hash(m->m_data, ETHER_HDR_LEN,
2244 		    sc->vxl_port_hash_key);
2245 
2246 	return (sc->vxl_min_port + (hash % range));
2247 }
2248 
2249 static void
2250 vxlan_encap_header(struct vxlan_softc *sc, struct mbuf *m, int ipoff,
2251     uint16_t srcport, uint16_t dstport)
2252 {
2253 	struct vxlanudphdr *hdr;
2254 	struct udphdr *udph;
2255 	struct vxlan_header *vxh;
2256 	int len;
2257 
2258 	len = m->m_pkthdr.len - ipoff;
2259 	MPASS(len >= sizeof(struct vxlanudphdr));
2260 	hdr = mtodo(m, ipoff);
2261 
2262 	udph = &hdr->vxlh_udp;
2263 	udph->uh_sport = srcport;
2264 	udph->uh_dport = dstport;
2265 	udph->uh_ulen = htons(len);
2266 	udph->uh_sum = 0;
2267 
2268 	vxh = &hdr->vxlh_hdr;
2269 	vxh->vxlh_flags = htonl(VXLAN_HDR_FLAGS_VALID_VNI);
2270 	vxh->vxlh_vni = htonl(sc->vxl_vni << VXLAN_HDR_VNI_SHIFT);
2271 }
2272 #endif
2273 
2274 static int
2275 vxlan_encap4(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa,
2276     struct mbuf *m)
2277 {
2278 #ifdef INET
2279 	struct ifnet *ifp;
2280 	struct ip *ip;
2281 	struct in_addr srcaddr, dstaddr;
2282 	uint16_t srcport, dstport;
2283 	int len, mcast, error;
2284 
2285 	ifp = sc->vxl_ifp;
2286 	srcaddr = sc->vxl_src_addr.in4.sin_addr;
2287 	srcport = vxlan_pick_source_port(sc, m);
2288 	dstaddr = fvxlsa->in4.sin_addr;
2289 	dstport = fvxlsa->in4.sin_port;
2290 
2291 	M_PREPEND(m, sizeof(struct ip) + sizeof(struct vxlanudphdr),
2292 	    M_NOWAIT);
2293 	if (m == NULL) {
2294 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2295 		return (ENOBUFS);
2296 	}
2297 
2298 	len = m->m_pkthdr.len;
2299 
2300 	ip = mtod(m, struct ip *);
2301 	ip->ip_tos = 0;
2302 	ip->ip_len = htons(len);
2303 	ip->ip_off = 0;
2304 	ip->ip_ttl = sc->vxl_ttl;
2305 	ip->ip_p = IPPROTO_UDP;
2306 	ip->ip_sum = 0;
2307 	ip->ip_src = srcaddr;
2308 	ip->ip_dst = dstaddr;
2309 
2310 	vxlan_encap_header(sc, m, sizeof(struct ip), srcport, dstport);
2311 
2312 	mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
2313 	m->m_flags &= ~(M_MCAST | M_BCAST);
2314 
2315 	error = ip_output(m, NULL, NULL, 0, sc->vxl_im4o, NULL);
2316 	if (error == 0) {
2317 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
2318 		if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
2319 		if (mcast != 0)
2320 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
2321 	} else
2322 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2323 
2324 	return (error);
2325 #else
2326 	m_freem(m);
2327 	return (ENOTSUP);
2328 #endif
2329 }
2330 
2331 static int
2332 vxlan_encap6(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa,
2333     struct mbuf *m)
2334 {
2335 #ifdef INET6
2336 	struct ifnet *ifp;
2337 	struct ip6_hdr *ip6;
2338 	const struct in6_addr *srcaddr, *dstaddr;
2339 	uint16_t srcport, dstport;
2340 	int len, mcast, error;
2341 
2342 	ifp = sc->vxl_ifp;
2343 	srcaddr = &sc->vxl_src_addr.in6.sin6_addr;
2344 	srcport = vxlan_pick_source_port(sc, m);
2345 	dstaddr = &fvxlsa->in6.sin6_addr;
2346 	dstport = fvxlsa->in6.sin6_port;
2347 
2348 	M_PREPEND(m, sizeof(struct ip6_hdr) + sizeof(struct vxlanudphdr),
2349 	    M_NOWAIT);
2350 	if (m == NULL) {
2351 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2352 		return (ENOBUFS);
2353 	}
2354 
2355 	len = m->m_pkthdr.len;
2356 
2357 	ip6 = mtod(m, struct ip6_hdr *);
2358 	ip6->ip6_flow = 0;		/* BMV: Keep in forwarding entry? */
2359 	ip6->ip6_vfc = IPV6_VERSION;
2360 	ip6->ip6_plen = 0;
2361 	ip6->ip6_nxt = IPPROTO_UDP;
2362 	ip6->ip6_hlim = sc->vxl_ttl;
2363 	ip6->ip6_src = *srcaddr;
2364 	ip6->ip6_dst = *dstaddr;
2365 
2366 	vxlan_encap_header(sc, m, sizeof(struct ip6_hdr), srcport, dstport);
2367 
2368 	/*
2369 	 * XXX BMV We need support for RFC6935 before we can send and
2370 	 * receive IPv6 UDP packets with a zero checksum.
2371 	 */
2372 	{
2373 		struct udphdr *hdr = mtodo(m, sizeof(struct ip6_hdr));
2374 		hdr->uh_sum = in6_cksum_pseudo(ip6,
2375 		    m->m_pkthdr.len - sizeof(struct ip6_hdr), IPPROTO_UDP, 0);
2376 		m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
2377 		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
2378 	}
2379 
2380 	mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
2381 	m->m_flags &= ~(M_MCAST | M_BCAST);
2382 
2383 	error = ip6_output(m, NULL, NULL, 0, sc->vxl_im6o, NULL, NULL);
2384 	if (error == 0) {
2385 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
2386 		if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
2387 		if (mcast != 0)
2388 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
2389 	} else
2390 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2391 
2392 	return (error);
2393 #else
2394 	m_freem(m);
2395 	return (ENOTSUP);
2396 #endif
2397 }
2398 
2399 static int
2400 vxlan_transmit(struct ifnet *ifp, struct mbuf *m)
2401 {
2402 	struct rm_priotracker tracker;
2403 	union vxlan_sockaddr vxlsa;
2404 	struct vxlan_softc *sc;
2405 	struct vxlan_ftable_entry *fe;
2406 	struct ifnet *mcifp;
2407 	struct ether_header *eh;
2408 	int ipv4, error;
2409 
2410 	sc = ifp->if_softc;
2411 	eh = mtod(m, struct ether_header *);
2412 	fe = NULL;
2413 	mcifp = NULL;
2414 
2415 	ETHER_BPF_MTAP(ifp, m);
2416 
2417 	VXLAN_RLOCK(sc, &tracker);
2418 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2419 		VXLAN_RUNLOCK(sc, &tracker);
2420 		m_freem(m);
2421 		return (ENETDOWN);
2422 	}
2423 
2424 	if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
2425 		fe = vxlan_ftable_entry_lookup(sc, eh->ether_dhost);
2426 	if (fe == NULL)
2427 		fe = &sc->vxl_default_fe;
2428 	vxlan_sockaddr_copy(&vxlsa, &fe->vxlfe_raddr.sa);
2429 
2430 	ipv4 = VXLAN_SOCKADDR_IS_IPV4(&vxlsa) != 0;
2431 	if (vxlan_sockaddr_in_multicast(&vxlsa) != 0)
2432 		mcifp = vxlan_multicast_if_ref(sc, ipv4);
2433 
2434 	VXLAN_ACQUIRE(sc);
2435 	VXLAN_RUNLOCK(sc, &tracker);
2436 
2437 	if (ipv4 != 0)
2438 		error = vxlan_encap4(sc, &vxlsa, m);
2439 	else
2440 		error = vxlan_encap6(sc, &vxlsa, m);
2441 
2442 	vxlan_release(sc);
2443 	if (mcifp != NULL)
2444 		if_rele(mcifp);
2445 
2446 	return (error);
2447 }
2448 
2449 static void
2450 vxlan_qflush(struct ifnet *ifp __unused)
2451 {
2452 }
2453 
2454 static void
2455 vxlan_rcv_udp_packet(struct mbuf *m, int offset, struct inpcb *inpcb,
2456     const struct sockaddr *srcsa, void *xvso)
2457 {
2458 	struct vxlan_socket *vso;
2459 	struct vxlan_header *vxh, vxlanhdr;
2460 	uint32_t vni;
2461 	int error;
2462 
2463 	M_ASSERTPKTHDR(m);
2464 	vso = xvso;
2465 	offset += sizeof(struct udphdr);
2466 
2467 	if (m->m_pkthdr.len < offset + sizeof(struct vxlan_header))
2468 		goto out;
2469 
2470 	if (__predict_false(m->m_len < offset + sizeof(struct vxlan_header))) {
2471 		m_copydata(m, offset, sizeof(struct vxlan_header),
2472 		    (caddr_t) &vxlanhdr);
2473 		vxh = &vxlanhdr;
2474 	} else
2475 		vxh = mtodo(m, offset);
2476 
2477 	/*
2478 	 * Drop if there is a reserved bit set in either the flags or VNI
2479 	 * fields of the header. This goes against the specification, but
2480 	 * a bit set may indicate an unsupported new feature. This matches
2481 	 * the behavior of the Linux implementation.
2482 	 */
2483 	if (vxh->vxlh_flags != htonl(VXLAN_HDR_FLAGS_VALID_VNI) ||
2484 	    vxh->vxlh_vni & ~htonl(VXLAN_VNI_MASK))
2485 		goto out;
2486 
2487 	vni = ntohl(vxh->vxlh_vni) >> VXLAN_HDR_VNI_SHIFT;
2488 	/* Adjust to the start of the inner Ethernet frame. */
2489 	m_adj(m, offset + sizeof(struct vxlan_header));
2490 
2491 	error = vxlan_input(vso, vni, &m, srcsa);
2492 	MPASS(error != 0 || m == NULL);
2493 
2494 out:
2495 	if (m != NULL)
2496 		m_freem(m);
2497 }
2498 
2499 static int
2500 vxlan_input(struct vxlan_socket *vso, uint32_t vni, struct mbuf **m0,
2501     const struct sockaddr *sa)
2502 {
2503 	struct vxlan_softc *sc;
2504 	struct ifnet *ifp;
2505 	struct mbuf *m;
2506 	struct ether_header *eh;
2507 	int error;
2508 
2509 	sc = vxlan_socket_lookup_softc(vso, vni);
2510 	if (sc == NULL)
2511 		return (ENOENT);
2512 
2513 	ifp = sc->vxl_ifp;
2514 	m = *m0;
2515 	eh = mtod(m, struct ether_header *);
2516 
2517 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2518 		error = ENETDOWN;
2519 		goto out;
2520 	} else if (ifp == m->m_pkthdr.rcvif) {
2521 		/* XXX Does not catch more complex loops. */
2522 		error = EDEADLK;
2523 		goto out;
2524 	}
2525 
2526 	if (sc->vxl_flags & VXLAN_FLAG_LEARN)
2527 		vxlan_ftable_update(sc, sa, eh->ether_shost);
2528 
2529 	m_clrprotoflags(m);
2530 	m->m_pkthdr.rcvif = ifp;
2531 	M_SETFIB(m, ifp->if_fib);
2532 
2533 	error = netisr_queue_src(NETISR_ETHER, 0, m);
2534 	*m0 = NULL;
2535 
2536 out:
2537 	vxlan_release(sc);
2538 	return (error);
2539 }
2540 
2541 static void
2542 vxlan_set_default_config(struct vxlan_softc *sc)
2543 {
2544 
2545 	sc->vxl_flags |= VXLAN_FLAG_LEARN;
2546 
2547 	sc->vxl_vni = VXLAN_VNI_MAX;
2548 	sc->vxl_ttl = IPDEFTTL;
2549 
2550 	if (!vxlan_tunable_int(sc, "legacy_port", vxlan_legacy_port)) {
2551 		sc->vxl_src_addr.in4.sin_port = htons(VXLAN_PORT);
2552 		sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_PORT);
2553 	} else {
2554 		sc->vxl_src_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT);
2555 		sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT);
2556 	}
2557 
2558 	sc->vxl_min_port = V_ipport_firstauto;
2559 	sc->vxl_max_port = V_ipport_lastauto;
2560 
2561 	sc->vxl_ftable_max = VXLAN_FTABLE_MAX;
2562 	sc->vxl_ftable_timeout = VXLAN_FTABLE_TIMEOUT;
2563 }
2564 
2565 static int
2566 vxlan_set_user_config(struct vxlan_softc *sc, struct ifvxlanparam *vxlp)
2567 {
2568 
2569 #ifndef INET
2570 	if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR4 |
2571 	    VXLAN_PARAM_WITH_REMOTE_ADDR4))
2572 		return (EAFNOSUPPORT);
2573 #endif
2574 
2575 #ifndef INET6
2576 	if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR6 |
2577 	    VXLAN_PARAM_WITH_REMOTE_ADDR6))
2578 		return (EAFNOSUPPORT);
2579 #endif
2580 
2581 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_VNI) {
2582 		if (vxlan_check_vni(vxlp->vxlp_vni) == 0)
2583 			sc->vxl_vni = vxlp->vxlp_vni;
2584 	}
2585 
2586 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR4) {
2587 		sc->vxl_src_addr.in4.sin_len = sizeof(struct sockaddr_in);
2588 		sc->vxl_src_addr.in4.sin_family = AF_INET;
2589 		sc->vxl_src_addr.in4.sin_addr = vxlp->vxlp_local_in4;
2590 	} else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) {
2591 		sc->vxl_src_addr.in6.sin6_len = sizeof(struct sockaddr_in6);
2592 		sc->vxl_src_addr.in6.sin6_family = AF_INET6;
2593 		sc->vxl_src_addr.in6.sin6_addr = vxlp->vxlp_local_in6;
2594 	}
2595 
2596 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR4) {
2597 		sc->vxl_dst_addr.in4.sin_len = sizeof(struct sockaddr_in);
2598 		sc->vxl_dst_addr.in4.sin_family = AF_INET;
2599 		sc->vxl_dst_addr.in4.sin_addr = vxlp->vxlp_remote_in4;
2600 	} else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) {
2601 		sc->vxl_dst_addr.in6.sin6_len = sizeof(struct sockaddr_in6);
2602 		sc->vxl_dst_addr.in6.sin6_family = AF_INET6;
2603 		sc->vxl_dst_addr.in6.sin6_addr = vxlp->vxlp_remote_in6;
2604 	}
2605 
2606 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_PORT)
2607 		sc->vxl_src_addr.in4.sin_port = htons(vxlp->vxlp_local_port);
2608 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_PORT)
2609 		sc->vxl_dst_addr.in4.sin_port = htons(vxlp->vxlp_remote_port);
2610 
2611 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_PORT_RANGE) {
2612 		if (vxlp->vxlp_min_port <= vxlp->vxlp_max_port) {
2613 			sc->vxl_min_port = vxlp->vxlp_min_port;
2614 			sc->vxl_max_port = vxlp->vxlp_max_port;
2615 		}
2616 	}
2617 
2618 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_MULTICAST_IF)
2619 		strlcpy(sc->vxl_mc_ifname, vxlp->vxlp_mc_ifname, IFNAMSIZ);
2620 
2621 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_TIMEOUT) {
2622 		if (vxlan_check_ftable_timeout(vxlp->vxlp_ftable_timeout) == 0)
2623 			sc->vxl_ftable_timeout = vxlp->vxlp_ftable_timeout;
2624 	}
2625 
2626 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_MAX) {
2627 		if (vxlan_check_ftable_max(vxlp->vxlp_ftable_max) == 0)
2628 			sc->vxl_ftable_max = vxlp->vxlp_ftable_max;
2629 	}
2630 
2631 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_TTL) {
2632 		if (vxlan_check_ttl(vxlp->vxlp_ttl) == 0)
2633 			sc->vxl_ttl = vxlp->vxlp_ttl;
2634 	}
2635 
2636 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LEARN) {
2637 		if (vxlp->vxlp_learn == 0)
2638 			sc->vxl_flags &= ~VXLAN_FLAG_LEARN;
2639 	}
2640 
2641 	return (0);
2642 }
2643 
2644 static int
2645 vxlan_clone_create(struct if_clone *ifc, int unit, caddr_t params)
2646 {
2647 	struct vxlan_softc *sc;
2648 	struct ifnet *ifp;
2649 	struct ifvxlanparam vxlp;
2650 	int error;
2651 
2652 	sc = malloc(sizeof(struct vxlan_softc), M_VXLAN, M_WAITOK | M_ZERO);
2653 	sc->vxl_unit = unit;
2654 	vxlan_set_default_config(sc);
2655 
2656 	if (params != 0) {
2657 		error = copyin(params, &vxlp, sizeof(vxlp));
2658 		if (error)
2659 			goto fail;
2660 
2661 		error = vxlan_set_user_config(sc, &vxlp);
2662 		if (error)
2663 			goto fail;
2664 	}
2665 
2666 	ifp = if_alloc(IFT_ETHER);
2667 	if (ifp == NULL) {
2668 		error = ENOSPC;
2669 		goto fail;
2670 	}
2671 
2672 	sc->vxl_ifp = ifp;
2673 	rm_init(&sc->vxl_lock, "vxlanrm");
2674 	callout_init_rw(&sc->vxl_callout, &sc->vxl_lock, 0);
2675 	sc->vxl_port_hash_key = arc4random();
2676 	vxlan_ftable_init(sc);
2677 
2678 	vxlan_sysctl_setup(sc);
2679 
2680 	ifp->if_softc = sc;
2681 	if_initname(ifp, vxlan_name, unit);
2682 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2683 	ifp->if_init = vxlan_init;
2684 	ifp->if_ioctl = vxlan_ioctl;
2685 	ifp->if_transmit = vxlan_transmit;
2686 	ifp->if_qflush = vxlan_qflush;
2687 
2688 	vxlan_fakeaddr(sc);
2689 	ether_ifattach(ifp, sc->vxl_hwaddr);
2690 
2691 	ifp->if_baudrate = 0;
2692 	ifp->if_hdrlen = 0;
2693 
2694 	return (0);
2695 
2696 fail:
2697 	free(sc, M_VXLAN);
2698 	return (error);
2699 }
2700 
2701 static void
2702 vxlan_clone_destroy(struct ifnet *ifp)
2703 {
2704 	struct vxlan_softc *sc;
2705 
2706 	sc = ifp->if_softc;
2707 
2708 	vxlan_teardown(sc);
2709 
2710 	vxlan_ftable_flush(sc, 1);
2711 
2712 	ether_ifdetach(ifp);
2713 	if_free(ifp);
2714 
2715 	vxlan_ftable_fini(sc);
2716 
2717 	vxlan_sysctl_destroy(sc);
2718 	rm_destroy(&sc->vxl_lock);
2719 	free(sc, M_VXLAN);
2720 }
2721 
2722 /* BMV: Taken from if_bridge. */
2723 static uint32_t
2724 vxlan_mac_hash(struct vxlan_softc *sc, const uint8_t *addr)
2725 {
2726 	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->vxl_ftable_hash_key;
2727 
2728 	b += addr[5] << 8;
2729 	b += addr[4];
2730 	a += addr[3] << 24;
2731 	a += addr[2] << 16;
2732 	a += addr[1] << 8;
2733 	a += addr[0];
2734 
2735 /*
2736  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
2737  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
2738  */
2739 #define	mix(a, b, c)							\
2740 do {									\
2741 	a -= b; a -= c; a ^= (c >> 13);					\
2742 	b -= c; b -= a; b ^= (a << 8);					\
2743 	c -= a; c -= b; c ^= (b >> 13);					\
2744 	a -= b; a -= c; a ^= (c >> 12);					\
2745 	b -= c; b -= a; b ^= (a << 16);					\
2746 	c -= a; c -= b; c ^= (b >> 5);					\
2747 	a -= b; a -= c; a ^= (c >> 3);					\
2748 	b -= c; b -= a; b ^= (a << 10);					\
2749 	c -= a; c -= b; c ^= (b >> 15);					\
2750 } while (0)
2751 
2752 	mix(a, b, c);
2753 
2754 #undef mix
2755 
2756 	return (c);
2757 }
2758 
2759 static void
2760 vxlan_fakeaddr(struct vxlan_softc *sc)
2761 {
2762 
2763 	/*
2764 	 * Generate a non-multicast, locally administered address.
2765 	 *
2766 	 * BMV: Should we use the FreeBSD OUI range instead?
2767 	 */
2768 	arc4rand(sc->vxl_hwaddr, ETHER_ADDR_LEN, 1);
2769 	sc->vxl_hwaddr[0] &= ~1;
2770 	sc->vxl_hwaddr[0] |= 2;
2771 }
2772 
2773 static int
2774 vxlan_sockaddr_cmp(const union vxlan_sockaddr *vxladdr,
2775     const struct sockaddr *sa)
2776 {
2777 
2778 	return (bcmp(&vxladdr->sa, sa, vxladdr->sa.sa_len));
2779 }
2780 
2781 static void
2782 vxlan_sockaddr_copy(union vxlan_sockaddr *vxladdr,
2783     const struct sockaddr *sa)
2784 {
2785 
2786 	MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
2787 	bzero(vxladdr, sizeof(*vxladdr));
2788 
2789 	if (sa->sa_family == AF_INET) {
2790 		vxladdr->in4 = *satoconstsin(sa);
2791 		vxladdr->in4.sin_len = sizeof(struct sockaddr_in);
2792 	} else if (sa->sa_family == AF_INET6) {
2793 		vxladdr->in6 = *satoconstsin6(sa);
2794 		vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6);
2795 	}
2796 }
2797 
2798 static int
2799 vxlan_sockaddr_in_equal(const union vxlan_sockaddr *vxladdr,
2800     const struct sockaddr *sa)
2801 {
2802 	int equal;
2803 
2804 	if (sa->sa_family == AF_INET) {
2805 		const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
2806 		equal = in4->s_addr == vxladdr->in4.sin_addr.s_addr;
2807 	} else if (sa->sa_family == AF_INET6) {
2808 		const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
2809 		equal = IN6_ARE_ADDR_EQUAL(in6, &vxladdr->in6.sin6_addr);
2810 	} else
2811 		equal = 0;
2812 
2813 	return (equal);
2814 }
2815 
2816 static void
2817 vxlan_sockaddr_in_copy(union vxlan_sockaddr *vxladdr,
2818     const struct sockaddr *sa)
2819 {
2820 
2821 	MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
2822 
2823 	if (sa->sa_family == AF_INET) {
2824 		const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
2825 		vxladdr->in4.sin_family = AF_INET;
2826 		vxladdr->in4.sin_len = sizeof(struct sockaddr_in);
2827 		vxladdr->in4.sin_addr = *in4;
2828 	} else if (sa->sa_family == AF_INET6) {
2829 		const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
2830 		vxladdr->in6.sin6_family = AF_INET6;
2831 		vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6);
2832 		vxladdr->in6.sin6_addr = *in6;
2833 	}
2834 }
2835 
2836 static int
2837 vxlan_sockaddr_supported(const union vxlan_sockaddr *vxladdr, int unspec)
2838 {
2839 	const struct sockaddr *sa;
2840 	int supported;
2841 
2842 	sa = &vxladdr->sa;
2843 	supported = 0;
2844 
2845 	if (sa->sa_family == AF_UNSPEC && unspec != 0) {
2846 		supported = 1;
2847 	} else if (sa->sa_family == AF_INET) {
2848 #ifdef INET
2849 		supported = 1;
2850 #endif
2851 	} else if (sa->sa_family == AF_INET6) {
2852 #ifdef INET6
2853 		supported = 1;
2854 #endif
2855 	}
2856 
2857 	return (supported);
2858 }
2859 
2860 static int
2861 vxlan_sockaddr_in_any(const union vxlan_sockaddr *vxladdr)
2862 {
2863 	const struct sockaddr *sa;
2864 	int any;
2865 
2866 	sa = &vxladdr->sa;
2867 
2868 	if (sa->sa_family == AF_INET) {
2869 		const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
2870 		any = in4->s_addr == INADDR_ANY;
2871 	} else if (sa->sa_family == AF_INET6) {
2872 		const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
2873 		any = IN6_IS_ADDR_UNSPECIFIED(in6);
2874 	} else
2875 		any = -1;
2876 
2877 	return (any);
2878 }
2879 
2880 static int
2881 vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *vxladdr)
2882 {
2883 	const struct sockaddr *sa;
2884 	int mc;
2885 
2886 	sa = &vxladdr->sa;
2887 
2888 	if (sa->sa_family == AF_INET) {
2889 		const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
2890 		mc = IN_MULTICAST(ntohl(in4->s_addr));
2891 	} else if (sa->sa_family == AF_INET6) {
2892 		const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
2893 		mc = IN6_IS_ADDR_MULTICAST(in6);
2894 	} else
2895 		mc = -1;
2896 
2897 	return (mc);
2898 }
2899 
2900 static int
2901 vxlan_can_change_config(struct vxlan_softc *sc)
2902 {
2903 	struct ifnet *ifp;
2904 
2905 	ifp = sc->vxl_ifp;
2906 	VXLAN_LOCK_ASSERT(sc);
2907 
2908 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2909 		return (0);
2910 	if (sc->vxl_flags & (VXLAN_FLAG_INIT | VXLAN_FLAG_TEARDOWN))
2911 		return (0);
2912 
2913 	return (1);
2914 }
2915 
2916 static int
2917 vxlan_check_vni(uint32_t vni)
2918 {
2919 
2920 	return (vni >= VXLAN_VNI_MAX);
2921 }
2922 
2923 static int
2924 vxlan_check_ttl(int ttl)
2925 {
2926 
2927 	return (ttl > MAXTTL);
2928 }
2929 
2930 static int
2931 vxlan_check_ftable_timeout(uint32_t timeout)
2932 {
2933 
2934 	return (timeout > VXLAN_FTABLE_MAX_TIMEOUT);
2935 }
2936 
2937 static int
2938 vxlan_check_ftable_max(uint32_t max)
2939 {
2940 
2941 	return (max > VXLAN_FTABLE_MAX);
2942 }
2943 
2944 static void
2945 vxlan_sysctl_setup(struct vxlan_softc *sc)
2946 {
2947 	struct sysctl_ctx_list *ctx;
2948 	struct sysctl_oid *node;
2949 	struct vxlan_statistics *stats;
2950 	char namebuf[8];
2951 
2952 	ctx = &sc->vxl_sysctl_ctx;
2953 	stats = &sc->vxl_stats;
2954 	snprintf(namebuf, sizeof(namebuf), "%d", sc->vxl_unit);
2955 
2956 	sysctl_ctx_init(ctx);
2957 	sc->vxl_sysctl_node = SYSCTL_ADD_NODE(ctx,
2958 	    SYSCTL_STATIC_CHILDREN(_net_link_vxlan), OID_AUTO, namebuf,
2959 	    CTLFLAG_RD, NULL, "");
2960 
2961 	node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node),
2962 	    OID_AUTO, "ftable", CTLFLAG_RD, NULL, "");
2963 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "count",
2964 	    CTLFLAG_RD, &sc->vxl_ftable_cnt, 0,
2965 	    "Number of entries in fowarding table");
2966 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "max",
2967 	     CTLFLAG_RD, &sc->vxl_ftable_max, 0,
2968 	    "Maximum number of entries allowed in fowarding table");
2969 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "timeout",
2970 	    CTLFLAG_RD, &sc->vxl_ftable_timeout, 0,
2971 	    "Number of seconds between prunes of the forwarding table");
2972 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "dump",
2973 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_SKIP,
2974 	    sc, 0, vxlan_ftable_sysctl_dump, "A",
2975 	    "Dump the forwarding table entries");
2976 
2977 	node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node),
2978 	    OID_AUTO, "stats", CTLFLAG_RD, NULL, "");
2979 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
2980 	    "ftable_nospace", CTLFLAG_RD, &stats->ftable_nospace, 0,
2981 	    "Fowarding table reached maximum entries");
2982 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
2983 	    "ftable_lock_upgrade_failed", CTLFLAG_RD,
2984 	    &stats->ftable_lock_upgrade_failed, 0,
2985 	    "Forwarding table update required lock upgrade");
2986 }
2987 
2988 static void
2989 vxlan_sysctl_destroy(struct vxlan_softc *sc)
2990 {
2991 
2992 	sysctl_ctx_free(&sc->vxl_sysctl_ctx);
2993 	sc->vxl_sysctl_node = NULL;
2994 }
2995 
2996 static int
2997 vxlan_tunable_int(struct vxlan_softc *sc, const char *knob, int def)
2998 {
2999 	char path[64];
3000 
3001 	snprintf(path, sizeof(path), "net.link.vxlan.%d.%s",
3002 	    sc->vxl_unit, knob);
3003 	TUNABLE_INT_FETCH(path, &def);
3004 
3005 	return (def);
3006 }
3007 
3008 static void
3009 vxlan_ifdetach_event(void *arg __unused, struct ifnet *ifp)
3010 {
3011 	struct vxlan_softc_head list;
3012 	struct vxlan_socket *vso;
3013 	struct vxlan_softc *sc, *tsc;
3014 
3015 	LIST_INIT(&list);
3016 
3017 	if (ifp->if_flags & IFF_RENAMING)
3018 		return;
3019 	if ((ifp->if_flags & IFF_MULTICAST) == 0)
3020 		return;
3021 
3022 	mtx_lock(&vxlan_list_mtx);
3023 	LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry)
3024 		vxlan_socket_ifdetach(vso, ifp, &list);
3025 	mtx_unlock(&vxlan_list_mtx);
3026 
3027 	LIST_FOREACH_SAFE(sc, &list, vxl_ifdetach_list, tsc) {
3028 		LIST_REMOVE(sc, vxl_ifdetach_list);
3029 
3030 		VXLAN_WLOCK(sc);
3031 		if (sc->vxl_flags & VXLAN_FLAG_INIT)
3032 			vxlan_init_wait(sc);
3033 		vxlan_teardown_locked(sc);
3034 	}
3035 }
3036 
3037 static void
3038 vxlan_load(void)
3039 {
3040 
3041 	mtx_init(&vxlan_list_mtx, "vxlan list", NULL, MTX_DEF);
3042 	LIST_INIT(&vxlan_socket_list);
3043 	vxlan_ifdetach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
3044 	    vxlan_ifdetach_event, NULL, EVENTHANDLER_PRI_ANY);
3045 	vxlan_cloner = if_clone_simple(vxlan_name, vxlan_clone_create,
3046 	    vxlan_clone_destroy, 0);
3047 }
3048 
3049 static void
3050 vxlan_unload(void)
3051 {
3052 
3053 	EVENTHANDLER_DEREGISTER(ifnet_departure_event,
3054 	    vxlan_ifdetach_event_tag);
3055 	if_clone_detach(vxlan_cloner);
3056 	mtx_destroy(&vxlan_list_mtx);
3057 	MPASS(LIST_EMPTY(&vxlan_socket_list));
3058 }
3059 
3060 static int
3061 vxlan_modevent(module_t mod, int type, void *unused)
3062 {
3063 	int error;
3064 
3065 	error = 0;
3066 
3067 	switch (type) {
3068 	case MOD_LOAD:
3069 		vxlan_load();
3070 		break;
3071 	case MOD_UNLOAD:
3072 		vxlan_unload();
3073 		break;
3074 	default:
3075 		error = ENOTSUP;
3076 		break;
3077 	}
3078 
3079 	return (error);
3080 }
3081 
3082 static moduledata_t vxlan_mod = {
3083 	"if_vxlan",
3084 	vxlan_modevent,
3085 	0
3086 };
3087 
3088 DECLARE_MODULE(if_vxlan, vxlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
3089 MODULE_VERSION(if_vxlan, 1);
3090