xref: /freebsd/sys/net/if_vxlan.c (revision e7ab133648a168c4bf7c11da840663c5581771d8)
1 /*-
2  * Copyright (c) 2014, Bryan Venteicher <bryanv@FreeBSD.org>
3  * All rights reserved.
4  * Copyright (c) 2020, Chelsio Communications.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice unmodified, this list of conditions, and the following
11  *    disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "opt_inet.h"
29 #include "opt_inet6.h"
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include <sys/param.h>
35 #include <sys/eventhandler.h>
36 #include <sys/kernel.h>
37 #include <sys/lock.h>
38 #include <sys/hash.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/module.h>
42 #include <sys/refcount.h>
43 #include <sys/rmlock.h>
44 #include <sys/priv.h>
45 #include <sys/proc.h>
46 #include <sys/queue.h>
47 #include <sys/sbuf.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/sockio.h>
51 #include <sys/sysctl.h>
52 #include <sys/systm.h>
53 
54 #include <net/bpf.h>
55 #include <net/ethernet.h>
56 #include <net/if.h>
57 #include <net/if_var.h>
58 #include <net/if_private.h>
59 #include <net/if_clone.h>
60 #include <net/if_dl.h>
61 #include <net/if_media.h>
62 #include <net/if_types.h>
63 #include <net/if_vxlan.h>
64 #include <net/netisr.h>
65 #include <net/route.h>
66 #include <net/route/nhop.h>
67 
68 #include <netinet/in.h>
69 #include <netinet/in_systm.h>
70 #include <netinet/in_var.h>
71 #include <netinet/in_pcb.h>
72 #include <netinet/ip.h>
73 #include <netinet/ip6.h>
74 #include <netinet/ip_var.h>
75 #include <netinet/udp.h>
76 #include <netinet/udp_var.h>
77 #include <netinet/in_fib.h>
78 #include <netinet6/in6_fib.h>
79 
80 #include <netinet6/ip6_var.h>
81 #include <netinet6/scope6_var.h>
82 
83 struct vxlan_softc;
84 LIST_HEAD(vxlan_softc_head, vxlan_softc);
85 
86 struct sx vxlan_sx;
87 SX_SYSINIT(vxlan, &vxlan_sx, "VXLAN global start/stop lock");
88 
89 struct vxlan_socket_mc_info {
90 	union vxlan_sockaddr		 vxlsomc_saddr;
91 	union vxlan_sockaddr		 vxlsomc_gaddr;
92 	int				 vxlsomc_ifidx;
93 	int				 vxlsomc_users;
94 };
95 
96 /*
97  * The maximum MTU of encapsulated ethernet frame within IPv4/UDP packet.
98  */
99 #define VXLAN_MAX_MTU	(IP_MAXPACKET - \
100 		60 /* Maximum IPv4 header len */ - \
101 		sizeof(struct udphdr) - \
102 		sizeof(struct vxlan_header) - \
103 		ETHER_HDR_LEN - ETHER_CRC_LEN - ETHER_VLAN_ENCAP_LEN)
104 #define VXLAN_BASIC_IFCAPS (IFCAP_LINKSTATE | IFCAP_JUMBO_MTU)
105 
106 #define VXLAN_SO_MC_MAX_GROUPS		32
107 
108 #define VXLAN_SO_VNI_HASH_SHIFT		6
109 #define VXLAN_SO_VNI_HASH_SIZE		(1 << VXLAN_SO_VNI_HASH_SHIFT)
110 #define VXLAN_SO_VNI_HASH(_vni)		((_vni) % VXLAN_SO_VNI_HASH_SIZE)
111 
112 struct vxlan_socket {
113 	struct socket			*vxlso_sock;
114 	struct rmlock			 vxlso_lock;
115 	u_int				 vxlso_refcnt;
116 	union vxlan_sockaddr		 vxlso_laddr;
117 	LIST_ENTRY(vxlan_socket)	 vxlso_entry;
118 	struct vxlan_softc_head		 vxlso_vni_hash[VXLAN_SO_VNI_HASH_SIZE];
119 	struct vxlan_socket_mc_info	 vxlso_mc[VXLAN_SO_MC_MAX_GROUPS];
120 };
121 
122 #define VXLAN_SO_RLOCK(_vso, _p)	rm_rlock(&(_vso)->vxlso_lock, (_p))
123 #define VXLAN_SO_RUNLOCK(_vso, _p)	rm_runlock(&(_vso)->vxlso_lock, (_p))
124 #define VXLAN_SO_WLOCK(_vso)		rm_wlock(&(_vso)->vxlso_lock)
125 #define VXLAN_SO_WUNLOCK(_vso)		rm_wunlock(&(_vso)->vxlso_lock)
126 #define VXLAN_SO_LOCK_ASSERT(_vso) \
127     rm_assert(&(_vso)->vxlso_lock, RA_LOCKED)
128 #define VXLAN_SO_LOCK_WASSERT(_vso) \
129     rm_assert(&(_vso)->vxlso_lock, RA_WLOCKED)
130 
131 #define VXLAN_SO_ACQUIRE(_vso)		refcount_acquire(&(_vso)->vxlso_refcnt)
132 #define VXLAN_SO_RELEASE(_vso)		refcount_release(&(_vso)->vxlso_refcnt)
133 
134 struct vxlan_ftable_entry {
135 	LIST_ENTRY(vxlan_ftable_entry)	 vxlfe_hash;
136 	uint16_t			 vxlfe_flags;
137 	uint8_t				 vxlfe_mac[ETHER_ADDR_LEN];
138 	union vxlan_sockaddr		 vxlfe_raddr;
139 	time_t				 vxlfe_expire;
140 };
141 
142 #define VXLAN_FE_FLAG_DYNAMIC		0x01
143 #define VXLAN_FE_FLAG_STATIC		0x02
144 
145 #define VXLAN_FE_IS_DYNAMIC(_fe) \
146     ((_fe)->vxlfe_flags & VXLAN_FE_FLAG_DYNAMIC)
147 
148 #define VXLAN_SC_FTABLE_SHIFT		9
149 #define VXLAN_SC_FTABLE_SIZE		(1 << VXLAN_SC_FTABLE_SHIFT)
150 #define VXLAN_SC_FTABLE_MASK		(VXLAN_SC_FTABLE_SIZE - 1)
151 #define VXLAN_SC_FTABLE_HASH(_sc, _mac)	\
152     (vxlan_mac_hash(_sc, _mac) % VXLAN_SC_FTABLE_SIZE)
153 
154 LIST_HEAD(vxlan_ftable_head, vxlan_ftable_entry);
155 
156 struct vxlan_statistics {
157 	uint32_t	ftable_nospace;
158 	uint32_t	ftable_lock_upgrade_failed;
159 	counter_u64_t	txcsum;
160 	counter_u64_t	tso;
161 	counter_u64_t	rxcsum;
162 };
163 
164 struct vxlan_softc {
165 	struct ifnet			*vxl_ifp;
166 	int				 vxl_reqcap;
167 	u_int				 vxl_fibnum;
168 	struct vxlan_socket		*vxl_sock;
169 	uint32_t			 vxl_vni;
170 	union vxlan_sockaddr		 vxl_src_addr;
171 	union vxlan_sockaddr		 vxl_dst_addr;
172 	uint32_t			 vxl_flags;
173 #define VXLAN_FLAG_INIT		0x0001
174 #define VXLAN_FLAG_TEARDOWN	0x0002
175 #define VXLAN_FLAG_LEARN	0x0004
176 #define VXLAN_FLAG_USER_MTU	0x0008
177 
178 	uint32_t			 vxl_port_hash_key;
179 	uint16_t			 vxl_min_port;
180 	uint16_t			 vxl_max_port;
181 	uint8_t				 vxl_ttl;
182 
183 	/* Lookup table from MAC address to forwarding entry. */
184 	uint32_t			 vxl_ftable_cnt;
185 	uint32_t			 vxl_ftable_max;
186 	uint32_t			 vxl_ftable_timeout;
187 	uint32_t			 vxl_ftable_hash_key;
188 	struct vxlan_ftable_head	*vxl_ftable;
189 
190 	/* Derived from vxl_dst_addr. */
191 	struct vxlan_ftable_entry	 vxl_default_fe;
192 
193 	struct ip_moptions		*vxl_im4o;
194 	struct ip6_moptions		*vxl_im6o;
195 
196 	struct rmlock			 vxl_lock;
197 	volatile u_int			 vxl_refcnt;
198 
199 	int				 vxl_unit;
200 	int				 vxl_vso_mc_index;
201 	struct vxlan_statistics		 vxl_stats;
202 	struct sysctl_oid		*vxl_sysctl_node;
203 	struct sysctl_ctx_list		 vxl_sysctl_ctx;
204 	struct callout			 vxl_callout;
205 	struct ether_addr		 vxl_hwaddr;
206 	int				 vxl_mc_ifindex;
207 	struct ifnet			*vxl_mc_ifp;
208 	struct ifmedia 			 vxl_media;
209 	char				 vxl_mc_ifname[IFNAMSIZ];
210 	LIST_ENTRY(vxlan_softc)		 vxl_entry;
211 	LIST_ENTRY(vxlan_softc)		 vxl_ifdetach_list;
212 
213 	/* For rate limiting errors on the tx fast path. */
214 	struct timeval err_time;
215 	int err_pps;
216 };
217 
218 #define VXLAN_RLOCK(_sc, _p)	rm_rlock(&(_sc)->vxl_lock, (_p))
219 #define VXLAN_RUNLOCK(_sc, _p)	rm_runlock(&(_sc)->vxl_lock, (_p))
220 #define VXLAN_WLOCK(_sc)	rm_wlock(&(_sc)->vxl_lock)
221 #define VXLAN_WUNLOCK(_sc)	rm_wunlock(&(_sc)->vxl_lock)
222 #define VXLAN_LOCK_WOWNED(_sc)	rm_wowned(&(_sc)->vxl_lock)
223 #define VXLAN_LOCK_ASSERT(_sc)	rm_assert(&(_sc)->vxl_lock, RA_LOCKED)
224 #define VXLAN_LOCK_WASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_WLOCKED)
225 #define VXLAN_UNLOCK(_sc, _p) do {		\
226     if (VXLAN_LOCK_WOWNED(_sc))			\
227 	VXLAN_WUNLOCK(_sc);			\
228     else					\
229 	VXLAN_RUNLOCK(_sc, _p);			\
230 } while (0)
231 
232 #define VXLAN_ACQUIRE(_sc)	refcount_acquire(&(_sc)->vxl_refcnt)
233 #define VXLAN_RELEASE(_sc)	refcount_release(&(_sc)->vxl_refcnt)
234 
235 #define	satoconstsin(sa)	((const struct sockaddr_in *)(sa))
236 #define	satoconstsin6(sa)	((const struct sockaddr_in6 *)(sa))
237 
238 struct vxlanudphdr {
239 	struct udphdr		vxlh_udp;
240 	struct vxlan_header	vxlh_hdr;
241 } __packed;
242 
243 static int	vxlan_ftable_addr_cmp(const uint8_t *, const uint8_t *);
244 static void	vxlan_ftable_init(struct vxlan_softc *);
245 static void	vxlan_ftable_fini(struct vxlan_softc *);
246 static void	vxlan_ftable_flush(struct vxlan_softc *, int);
247 static void	vxlan_ftable_expire(struct vxlan_softc *);
248 static int	vxlan_ftable_update_locked(struct vxlan_softc *,
249 		    const union vxlan_sockaddr *, const uint8_t *,
250 		    struct rm_priotracker *);
251 static int	vxlan_ftable_learn(struct vxlan_softc *,
252 		    const struct sockaddr *, const uint8_t *);
253 static int	vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS);
254 
255 static struct vxlan_ftable_entry *
256 		vxlan_ftable_entry_alloc(void);
257 static void	vxlan_ftable_entry_free(struct vxlan_ftable_entry *);
258 static void	vxlan_ftable_entry_init(struct vxlan_softc *,
259 		    struct vxlan_ftable_entry *, const uint8_t *,
260 		    const struct sockaddr *, uint32_t);
261 static void	vxlan_ftable_entry_destroy(struct vxlan_softc *,
262 		    struct vxlan_ftable_entry *);
263 static int	vxlan_ftable_entry_insert(struct vxlan_softc *,
264 		    struct vxlan_ftable_entry *);
265 static struct vxlan_ftable_entry *
266 		vxlan_ftable_entry_lookup(struct vxlan_softc *,
267 		    const uint8_t *);
268 static void	vxlan_ftable_entry_dump(struct vxlan_ftable_entry *,
269 		    struct sbuf *);
270 
271 static struct vxlan_socket *
272 		vxlan_socket_alloc(const union vxlan_sockaddr *);
273 static void	vxlan_socket_destroy(struct vxlan_socket *);
274 static void	vxlan_socket_release(struct vxlan_socket *);
275 static struct vxlan_socket *
276 		vxlan_socket_lookup(union vxlan_sockaddr *vxlsa);
277 static void	vxlan_socket_insert(struct vxlan_socket *);
278 static int	vxlan_socket_init(struct vxlan_socket *, struct ifnet *);
279 static int	vxlan_socket_bind(struct vxlan_socket *, struct ifnet *);
280 static int	vxlan_socket_create(struct ifnet *, int,
281 		    const union vxlan_sockaddr *, struct vxlan_socket **);
282 static void	vxlan_socket_ifdetach(struct vxlan_socket *,
283 		    struct ifnet *, struct vxlan_softc_head *);
284 
285 static struct vxlan_socket *
286 		vxlan_socket_mc_lookup(const union vxlan_sockaddr *);
287 static int	vxlan_sockaddr_mc_info_match(
288 		    const struct vxlan_socket_mc_info *,
289 		    const union vxlan_sockaddr *,
290 		    const union vxlan_sockaddr *, int);
291 static int	vxlan_socket_mc_join_group(struct vxlan_socket *,
292 		    const union vxlan_sockaddr *, const union vxlan_sockaddr *,
293 		    int *, union vxlan_sockaddr *);
294 static int	vxlan_socket_mc_leave_group(struct vxlan_socket *,
295 		    const union vxlan_sockaddr *,
296 		    const union vxlan_sockaddr *, int);
297 static int	vxlan_socket_mc_add_group(struct vxlan_socket *,
298 		    const union vxlan_sockaddr *, const union vxlan_sockaddr *,
299 		    int, int *);
300 static void	vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *,
301 		    int);
302 
303 static struct vxlan_softc *
304 		vxlan_socket_lookup_softc_locked(struct vxlan_socket *,
305 		    uint32_t);
306 static struct vxlan_softc *
307 		vxlan_socket_lookup_softc(struct vxlan_socket *, uint32_t);
308 static int	vxlan_socket_insert_softc(struct vxlan_socket *,
309 		    struct vxlan_softc *);
310 static void	vxlan_socket_remove_softc(struct vxlan_socket *,
311 		    struct vxlan_softc *);
312 
313 static struct ifnet *
314 		vxlan_multicast_if_ref(struct vxlan_softc *, int);
315 static void	vxlan_free_multicast(struct vxlan_softc *);
316 static int	vxlan_setup_multicast_interface(struct vxlan_softc *);
317 
318 static int	vxlan_setup_multicast(struct vxlan_softc *);
319 static int	vxlan_setup_socket(struct vxlan_softc *);
320 #ifdef INET6
321 static void	vxlan_setup_zero_checksum_port(struct vxlan_softc *);
322 #endif
323 static void	vxlan_setup_interface_hdrlen(struct vxlan_softc *);
324 static int	vxlan_valid_init_config(struct vxlan_softc *);
325 static void	vxlan_init_wait(struct vxlan_softc *);
326 static void	vxlan_init_complete(struct vxlan_softc *);
327 static void	vxlan_init(void *);
328 static void	vxlan_release(struct vxlan_softc *);
329 static void	vxlan_teardown_wait(struct vxlan_softc *);
330 static void	vxlan_teardown_complete(struct vxlan_softc *);
331 static void	vxlan_teardown_locked(struct vxlan_softc *);
332 static void	vxlan_teardown(struct vxlan_softc *);
333 static void	vxlan_ifdetach(struct vxlan_softc *, struct ifnet *,
334 		    struct vxlan_softc_head *);
335 static void	vxlan_timer(void *);
336 
337 static int	vxlan_ctrl_get_config(struct vxlan_softc *, void *);
338 static int	vxlan_ctrl_set_vni(struct vxlan_softc *, void *);
339 static int	vxlan_ctrl_set_local_addr(struct vxlan_softc *, void *);
340 static int	vxlan_ctrl_set_remote_addr(struct vxlan_softc *, void *);
341 static int	vxlan_ctrl_set_local_port(struct vxlan_softc *, void *);
342 static int	vxlan_ctrl_set_remote_port(struct vxlan_softc *, void *);
343 static int	vxlan_ctrl_set_port_range(struct vxlan_softc *, void *);
344 static int	vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *, void *);
345 static int	vxlan_ctrl_set_ftable_max(struct vxlan_softc *, void *);
346 static int	vxlan_ctrl_set_multicast_if(struct vxlan_softc * , void *);
347 static int	vxlan_ctrl_set_ttl(struct vxlan_softc *, void *);
348 static int	vxlan_ctrl_set_learn(struct vxlan_softc *, void *);
349 static int	vxlan_ctrl_ftable_entry_add(struct vxlan_softc *, void *);
350 static int	vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *, void *);
351 static int	vxlan_ctrl_flush(struct vxlan_softc *, void *);
352 static int	vxlan_ioctl_drvspec(struct vxlan_softc *,
353 		    struct ifdrv *, int);
354 static int	vxlan_ioctl_ifflags(struct vxlan_softc *);
355 static int	vxlan_ioctl(struct ifnet *, u_long, caddr_t);
356 
357 #if defined(INET) || defined(INET6)
358 static uint16_t vxlan_pick_source_port(struct vxlan_softc *, struct mbuf *);
359 static void	vxlan_encap_header(struct vxlan_softc *, struct mbuf *,
360 		    int, uint16_t, uint16_t);
361 #endif
362 static int	vxlan_encap4(struct vxlan_softc *,
363 		    const union vxlan_sockaddr *, struct mbuf *);
364 static int	vxlan_encap6(struct vxlan_softc *,
365 		    const union vxlan_sockaddr *, struct mbuf *);
366 static int	vxlan_transmit(struct ifnet *, struct mbuf *);
367 static void	vxlan_qflush(struct ifnet *);
368 static bool	vxlan_rcv_udp_packet(struct mbuf *, int, struct inpcb *,
369 		    const struct sockaddr *, void *);
370 static int	vxlan_input(struct vxlan_socket *, uint32_t, struct mbuf **,
371 		    const struct sockaddr *);
372 
373 static int	vxlan_stats_alloc(struct vxlan_softc *);
374 static void	vxlan_stats_free(struct vxlan_softc *);
375 static void	vxlan_set_default_config(struct vxlan_softc *);
376 static int	vxlan_set_user_config(struct vxlan_softc *,
377 		     struct ifvxlanparam *);
378 static int	vxlan_set_reqcap(struct vxlan_softc *, struct ifnet *, int);
379 static void	vxlan_set_hwcaps(struct vxlan_softc *);
380 static int	vxlan_clone_create(struct if_clone *, char *, size_t,
381 		    struct ifc_data *, struct ifnet **);
382 static int	vxlan_clone_destroy(struct if_clone *, struct ifnet *, uint32_t);
383 
384 static uint32_t vxlan_mac_hash(struct vxlan_softc *, const uint8_t *);
385 static int	vxlan_media_change(struct ifnet *);
386 static void	vxlan_media_status(struct ifnet *, struct ifmediareq *);
387 
388 static int	vxlan_sockaddr_cmp(const union vxlan_sockaddr *,
389 		    const struct sockaddr *);
390 static void	vxlan_sockaddr_copy(union vxlan_sockaddr *,
391 		    const struct sockaddr *);
392 static int	vxlan_sockaddr_in_equal(const union vxlan_sockaddr *,
393 		    const struct sockaddr *);
394 static void	vxlan_sockaddr_in_copy(union vxlan_sockaddr *,
395 		    const struct sockaddr *);
396 static int	vxlan_sockaddr_supported(const union vxlan_sockaddr *, int);
397 static int	vxlan_sockaddr_in_any(const union vxlan_sockaddr *);
398 static int	vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *);
399 static int	vxlan_sockaddr_in6_embedscope(union vxlan_sockaddr *);
400 
401 static int	vxlan_can_change_config(struct vxlan_softc *);
402 static int	vxlan_check_vni(uint32_t);
403 static int	vxlan_check_ttl(int);
404 static int	vxlan_check_ftable_timeout(uint32_t);
405 static int	vxlan_check_ftable_max(uint32_t);
406 
407 static void	vxlan_sysctl_setup(struct vxlan_softc *);
408 static void	vxlan_sysctl_destroy(struct vxlan_softc *);
409 static int	vxlan_tunable_int(struct vxlan_softc *, const char *, int);
410 
411 static void	vxlan_ifdetach_event(void *, struct ifnet *);
412 static void	vxlan_load(void);
413 static void	vxlan_unload(void);
414 static int	vxlan_modevent(module_t, int, void *);
415 
416 static const char vxlan_name[] = "vxlan";
417 static MALLOC_DEFINE(M_VXLAN, vxlan_name,
418     "Virtual eXtensible LAN Interface");
419 static struct if_clone *vxlan_cloner;
420 
421 static struct mtx vxlan_list_mtx;
422 #define VXLAN_LIST_LOCK()	mtx_lock(&vxlan_list_mtx)
423 #define VXLAN_LIST_UNLOCK()	mtx_unlock(&vxlan_list_mtx)
424 
425 static LIST_HEAD(, vxlan_socket) vxlan_socket_list;
426 
427 static eventhandler_tag vxlan_ifdetach_event_tag;
428 
429 SYSCTL_DECL(_net_link);
430 SYSCTL_NODE(_net_link, OID_AUTO, vxlan, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
431     "Virtual eXtensible Local Area Network");
432 
433 static int vxlan_legacy_port = 0;
434 TUNABLE_INT("net.link.vxlan.legacy_port", &vxlan_legacy_port);
435 static int vxlan_reuse_port = 0;
436 TUNABLE_INT("net.link.vxlan.reuse_port", &vxlan_reuse_port);
437 
438 /* Default maximum number of addresses in the forwarding table. */
439 #ifndef VXLAN_FTABLE_MAX
440 #define VXLAN_FTABLE_MAX	2000
441 #endif
442 
443 /* Timeout (in seconds) of addresses learned in the forwarding table. */
444 #ifndef VXLAN_FTABLE_TIMEOUT
445 #define VXLAN_FTABLE_TIMEOUT	(20 * 60)
446 #endif
447 
448 /*
449  * Maximum timeout (in seconds) of addresses learned in the forwarding
450  * table.
451  */
452 #ifndef VXLAN_FTABLE_MAX_TIMEOUT
453 #define VXLAN_FTABLE_MAX_TIMEOUT	(60 * 60 * 24)
454 #endif
455 
456 /* Number of seconds between pruning attempts of the forwarding table. */
457 #ifndef VXLAN_FTABLE_PRUNE
458 #define VXLAN_FTABLE_PRUNE	(5 * 60)
459 #endif
460 
461 static int vxlan_ftable_prune_period = VXLAN_FTABLE_PRUNE;
462 
463 struct vxlan_control {
464 	int	(*vxlc_func)(struct vxlan_softc *, void *);
465 	int	vxlc_argsize;
466 	int	vxlc_flags;
467 #define VXLAN_CTRL_FLAG_COPYIN	0x01
468 #define VXLAN_CTRL_FLAG_COPYOUT	0x02
469 #define VXLAN_CTRL_FLAG_SUSER	0x04
470 };
471 
472 static const struct vxlan_control vxlan_control_table[] = {
473 	[VXLAN_CMD_GET_CONFIG] =
474 	    {	vxlan_ctrl_get_config, sizeof(struct ifvxlancfg),
475 		VXLAN_CTRL_FLAG_COPYOUT
476 	    },
477 
478 	[VXLAN_CMD_SET_VNI] =
479 	    {   vxlan_ctrl_set_vni, sizeof(struct ifvxlancmd),
480 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
481 	    },
482 
483 	[VXLAN_CMD_SET_LOCAL_ADDR] =
484 	    {   vxlan_ctrl_set_local_addr, sizeof(struct ifvxlancmd),
485 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
486 	    },
487 
488 	[VXLAN_CMD_SET_REMOTE_ADDR] =
489 	    {   vxlan_ctrl_set_remote_addr, sizeof(struct ifvxlancmd),
490 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
491 	    },
492 
493 	[VXLAN_CMD_SET_LOCAL_PORT] =
494 	    {   vxlan_ctrl_set_local_port, sizeof(struct ifvxlancmd),
495 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
496 	    },
497 
498 	[VXLAN_CMD_SET_REMOTE_PORT] =
499 	    {   vxlan_ctrl_set_remote_port, sizeof(struct ifvxlancmd),
500 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
501 	    },
502 
503 	[VXLAN_CMD_SET_PORT_RANGE] =
504 	    {   vxlan_ctrl_set_port_range, sizeof(struct ifvxlancmd),
505 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
506 	    },
507 
508 	[VXLAN_CMD_SET_FTABLE_TIMEOUT] =
509 	    {	vxlan_ctrl_set_ftable_timeout, sizeof(struct ifvxlancmd),
510 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
511 	    },
512 
513 	[VXLAN_CMD_SET_FTABLE_MAX] =
514 	    {	vxlan_ctrl_set_ftable_max, sizeof(struct ifvxlancmd),
515 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
516 	    },
517 
518 	[VXLAN_CMD_SET_MULTICAST_IF] =
519 	    {	vxlan_ctrl_set_multicast_if, sizeof(struct ifvxlancmd),
520 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
521 	    },
522 
523 	[VXLAN_CMD_SET_TTL] =
524 	    {	vxlan_ctrl_set_ttl, sizeof(struct ifvxlancmd),
525 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
526 	    },
527 
528 	[VXLAN_CMD_SET_LEARN] =
529 	    {	vxlan_ctrl_set_learn, sizeof(struct ifvxlancmd),
530 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
531 	    },
532 
533 	[VXLAN_CMD_FTABLE_ENTRY_ADD] =
534 	    {	vxlan_ctrl_ftable_entry_add, sizeof(struct ifvxlancmd),
535 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
536 	    },
537 
538 	[VXLAN_CMD_FTABLE_ENTRY_REM] =
539 	    {	vxlan_ctrl_ftable_entry_rem, sizeof(struct ifvxlancmd),
540 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
541 	    },
542 
543 	[VXLAN_CMD_FLUSH] =
544 	    {   vxlan_ctrl_flush, sizeof(struct ifvxlancmd),
545 		VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
546 	    },
547 };
548 
549 static const int vxlan_control_table_size = nitems(vxlan_control_table);
550 
551 static int
552 vxlan_ftable_addr_cmp(const uint8_t *a, const uint8_t *b)
553 {
554 	int i, d;
555 
556 	for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++)
557 		d = ((int)a[i]) - ((int)b[i]);
558 
559 	return (d);
560 }
561 
562 static void
563 vxlan_ftable_init(struct vxlan_softc *sc)
564 {
565 	int i;
566 
567 	sc->vxl_ftable = malloc(sizeof(struct vxlan_ftable_head) *
568 	    VXLAN_SC_FTABLE_SIZE, M_VXLAN, M_ZERO | M_WAITOK);
569 
570 	for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++)
571 		LIST_INIT(&sc->vxl_ftable[i]);
572 	sc->vxl_ftable_hash_key = arc4random();
573 }
574 
575 static void
576 vxlan_ftable_fini(struct vxlan_softc *sc)
577 {
578 	int i;
579 
580 	for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
581 		KASSERT(LIST_EMPTY(&sc->vxl_ftable[i]),
582 		    ("%s: vxlan %p ftable[%d] not empty", __func__, sc, i));
583 	}
584 	MPASS(sc->vxl_ftable_cnt == 0);
585 
586 	free(sc->vxl_ftable, M_VXLAN);
587 	sc->vxl_ftable = NULL;
588 }
589 
590 static void
591 vxlan_ftable_flush(struct vxlan_softc *sc, int all)
592 {
593 	struct vxlan_ftable_entry *fe, *tfe;
594 	int i;
595 
596 	for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
597 		LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) {
598 			if (all || VXLAN_FE_IS_DYNAMIC(fe))
599 				vxlan_ftable_entry_destroy(sc, fe);
600 		}
601 	}
602 }
603 
604 static void
605 vxlan_ftable_expire(struct vxlan_softc *sc)
606 {
607 	struct vxlan_ftable_entry *fe, *tfe;
608 	int i;
609 
610 	VXLAN_LOCK_WASSERT(sc);
611 
612 	for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
613 		LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) {
614 			if (VXLAN_FE_IS_DYNAMIC(fe) &&
615 			    time_uptime >= fe->vxlfe_expire)
616 				vxlan_ftable_entry_destroy(sc, fe);
617 		}
618 	}
619 }
620 
621 static int
622 vxlan_ftable_update_locked(struct vxlan_softc *sc,
623     const union vxlan_sockaddr *vxlsa, const uint8_t *mac,
624     struct rm_priotracker *tracker)
625 {
626 	struct vxlan_ftable_entry *fe;
627 	int error __unused;
628 
629 	VXLAN_LOCK_ASSERT(sc);
630 
631 again:
632 	/*
633 	 * A forwarding entry for this MAC address might already exist. If
634 	 * so, update it, otherwise create a new one. We may have to upgrade
635 	 * the lock if we have to change or create an entry.
636 	 */
637 	fe = vxlan_ftable_entry_lookup(sc, mac);
638 	if (fe != NULL) {
639 		fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout;
640 
641 		if (!VXLAN_FE_IS_DYNAMIC(fe) ||
642 		    vxlan_sockaddr_in_equal(&fe->vxlfe_raddr, &vxlsa->sa))
643 			return (0);
644 		if (!VXLAN_LOCK_WOWNED(sc)) {
645 			VXLAN_RUNLOCK(sc, tracker);
646 			VXLAN_WLOCK(sc);
647 			sc->vxl_stats.ftable_lock_upgrade_failed++;
648 			goto again;
649 		}
650 		vxlan_sockaddr_in_copy(&fe->vxlfe_raddr, &vxlsa->sa);
651 		return (0);
652 	}
653 
654 	if (!VXLAN_LOCK_WOWNED(sc)) {
655 		VXLAN_RUNLOCK(sc, tracker);
656 		VXLAN_WLOCK(sc);
657 		sc->vxl_stats.ftable_lock_upgrade_failed++;
658 		goto again;
659 	}
660 
661 	if (sc->vxl_ftable_cnt >= sc->vxl_ftable_max) {
662 		sc->vxl_stats.ftable_nospace++;
663 		return (ENOSPC);
664 	}
665 
666 	fe = vxlan_ftable_entry_alloc();
667 	if (fe == NULL)
668 		return (ENOMEM);
669 
670 	vxlan_ftable_entry_init(sc, fe, mac, &vxlsa->sa, VXLAN_FE_FLAG_DYNAMIC);
671 
672 	/* The prior lookup failed, so the insert should not. */
673 	error = vxlan_ftable_entry_insert(sc, fe);
674 	MPASS(error == 0);
675 
676 	return (0);
677 }
678 
679 static int
680 vxlan_ftable_learn(struct vxlan_softc *sc, const struct sockaddr *sa,
681     const uint8_t *mac)
682 {
683 	struct rm_priotracker tracker;
684 	union vxlan_sockaddr vxlsa;
685 	int error;
686 
687 	/*
688 	 * The source port may be randomly selected by the remote host, so
689 	 * use the port of the default destination address.
690 	 */
691 	vxlan_sockaddr_copy(&vxlsa, sa);
692 	vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port;
693 
694 	if (VXLAN_SOCKADDR_IS_IPV6(&vxlsa)) {
695 		error = vxlan_sockaddr_in6_embedscope(&vxlsa);
696 		if (error)
697 			return (error);
698 	}
699 
700 	VXLAN_RLOCK(sc, &tracker);
701 	error = vxlan_ftable_update_locked(sc, &vxlsa, mac, &tracker);
702 	VXLAN_UNLOCK(sc, &tracker);
703 
704 	return (error);
705 }
706 
707 static int
708 vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS)
709 {
710 	struct rm_priotracker tracker;
711 	struct sbuf sb;
712 	struct vxlan_softc *sc;
713 	struct vxlan_ftable_entry *fe;
714 	size_t size;
715 	int i, error;
716 
717 	/*
718 	 * This is mostly intended for debugging during development. It is
719 	 * not practical to dump an entire large table this way.
720 	 */
721 
722 	sc = arg1;
723 	size = PAGE_SIZE;	/* Calculate later. */
724 
725 	sbuf_new(&sb, NULL, size, SBUF_FIXEDLEN);
726 	sbuf_putc(&sb, '\n');
727 
728 	VXLAN_RLOCK(sc, &tracker);
729 	for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
730 		LIST_FOREACH(fe, &sc->vxl_ftable[i], vxlfe_hash) {
731 			if (sbuf_error(&sb) != 0)
732 				break;
733 			vxlan_ftable_entry_dump(fe, &sb);
734 		}
735 	}
736 	VXLAN_RUNLOCK(sc, &tracker);
737 
738 	if (sbuf_len(&sb) == 1)
739 		sbuf_setpos(&sb, 0);
740 
741 	sbuf_finish(&sb);
742 	error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
743 	sbuf_delete(&sb);
744 
745 	return (error);
746 }
747 
748 static struct vxlan_ftable_entry *
749 vxlan_ftable_entry_alloc(void)
750 {
751 	struct vxlan_ftable_entry *fe;
752 
753 	fe = malloc(sizeof(*fe), M_VXLAN, M_ZERO | M_NOWAIT);
754 
755 	return (fe);
756 }
757 
758 static void
759 vxlan_ftable_entry_free(struct vxlan_ftable_entry *fe)
760 {
761 
762 	free(fe, M_VXLAN);
763 }
764 
765 static void
766 vxlan_ftable_entry_init(struct vxlan_softc *sc, struct vxlan_ftable_entry *fe,
767     const uint8_t *mac, const struct sockaddr *sa, uint32_t flags)
768 {
769 
770 	fe->vxlfe_flags = flags;
771 	fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout;
772 	memcpy(fe->vxlfe_mac, mac, ETHER_ADDR_LEN);
773 	vxlan_sockaddr_copy(&fe->vxlfe_raddr, sa);
774 }
775 
776 static void
777 vxlan_ftable_entry_destroy(struct vxlan_softc *sc,
778     struct vxlan_ftable_entry *fe)
779 {
780 
781 	sc->vxl_ftable_cnt--;
782 	LIST_REMOVE(fe, vxlfe_hash);
783 	vxlan_ftable_entry_free(fe);
784 }
785 
786 static int
787 vxlan_ftable_entry_insert(struct vxlan_softc *sc,
788     struct vxlan_ftable_entry *fe)
789 {
790 	struct vxlan_ftable_entry *lfe;
791 	uint32_t hash;
792 	int dir;
793 
794 	VXLAN_LOCK_WASSERT(sc);
795 	hash = VXLAN_SC_FTABLE_HASH(sc, fe->vxlfe_mac);
796 
797 	lfe = LIST_FIRST(&sc->vxl_ftable[hash]);
798 	if (lfe == NULL) {
799 		LIST_INSERT_HEAD(&sc->vxl_ftable[hash], fe, vxlfe_hash);
800 		goto out;
801 	}
802 
803 	do {
804 		dir = vxlan_ftable_addr_cmp(fe->vxlfe_mac, lfe->vxlfe_mac);
805 		if (dir == 0)
806 			return (EEXIST);
807 		if (dir > 0) {
808 			LIST_INSERT_BEFORE(lfe, fe, vxlfe_hash);
809 			goto out;
810 		} else if (LIST_NEXT(lfe, vxlfe_hash) == NULL) {
811 			LIST_INSERT_AFTER(lfe, fe, vxlfe_hash);
812 			goto out;
813 		} else
814 			lfe = LIST_NEXT(lfe, vxlfe_hash);
815 	} while (lfe != NULL);
816 
817 out:
818 	sc->vxl_ftable_cnt++;
819 
820 	return (0);
821 }
822 
823 static struct vxlan_ftable_entry *
824 vxlan_ftable_entry_lookup(struct vxlan_softc *sc, const uint8_t *mac)
825 {
826 	struct vxlan_ftable_entry *fe;
827 	uint32_t hash;
828 	int dir;
829 
830 	VXLAN_LOCK_ASSERT(sc);
831 	hash = VXLAN_SC_FTABLE_HASH(sc, mac);
832 
833 	LIST_FOREACH(fe, &sc->vxl_ftable[hash], vxlfe_hash) {
834 		dir = vxlan_ftable_addr_cmp(mac, fe->vxlfe_mac);
835 		if (dir == 0)
836 			return (fe);
837 		if (dir > 0)
838 			break;
839 	}
840 
841 	return (NULL);
842 }
843 
844 static void
845 vxlan_ftable_entry_dump(struct vxlan_ftable_entry *fe, struct sbuf *sb)
846 {
847 	char buf[64];
848 	const union vxlan_sockaddr *sa;
849 	const void *addr;
850 	int i, len, af, width;
851 
852 	sa = &fe->vxlfe_raddr;
853 	af = sa->sa.sa_family;
854 	len = sbuf_len(sb);
855 
856 	sbuf_printf(sb, "%c 0x%02X ", VXLAN_FE_IS_DYNAMIC(fe) ? 'D' : 'S',
857 	    fe->vxlfe_flags);
858 
859 	for (i = 0; i < ETHER_ADDR_LEN - 1; i++)
860 		sbuf_printf(sb, "%02X:", fe->vxlfe_mac[i]);
861 	sbuf_printf(sb, "%02X ", fe->vxlfe_mac[i]);
862 
863 	if (af == AF_INET) {
864 		addr = &sa->in4.sin_addr;
865 		width = INET_ADDRSTRLEN - 1;
866 	} else {
867 		addr = &sa->in6.sin6_addr;
868 		width = INET6_ADDRSTRLEN - 1;
869 	}
870 	inet_ntop(af, addr, buf, sizeof(buf));
871 	sbuf_printf(sb, "%*s ", width, buf);
872 
873 	sbuf_printf(sb, "%08jd", (intmax_t)fe->vxlfe_expire);
874 
875 	sbuf_putc(sb, '\n');
876 
877 	/* Truncate a partial line. */
878 	if (sbuf_error(sb) != 0)
879 		sbuf_setpos(sb, len);
880 }
881 
882 static struct vxlan_socket *
883 vxlan_socket_alloc(const union vxlan_sockaddr *sa)
884 {
885 	struct vxlan_socket *vso;
886 	int i;
887 
888 	vso = malloc(sizeof(*vso), M_VXLAN, M_WAITOK | M_ZERO);
889 	rm_init(&vso->vxlso_lock, "vxlansorm");
890 	refcount_init(&vso->vxlso_refcnt, 0);
891 	for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++)
892 		LIST_INIT(&vso->vxlso_vni_hash[i]);
893 	vso->vxlso_laddr = *sa;
894 
895 	return (vso);
896 }
897 
898 static void
899 vxlan_socket_destroy(struct vxlan_socket *vso)
900 {
901 	struct socket *so;
902 #ifdef INVARIANTS
903 	int i;
904 	struct vxlan_socket_mc_info *mc;
905 
906 	for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
907 		mc = &vso->vxlso_mc[i];
908 		KASSERT(mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC,
909 		    ("%s: socket %p mc[%d] still has address",
910 		     __func__, vso, i));
911 	}
912 
913 	for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) {
914 		KASSERT(LIST_EMPTY(&vso->vxlso_vni_hash[i]),
915 		    ("%s: socket %p vni_hash[%d] not empty",
916 		     __func__, vso, i));
917 	}
918 #endif
919 	so = vso->vxlso_sock;
920 	if (so != NULL) {
921 		vso->vxlso_sock = NULL;
922 		soclose(so);
923 	}
924 
925 	rm_destroy(&vso->vxlso_lock);
926 	free(vso, M_VXLAN);
927 }
928 
929 static void
930 vxlan_socket_release(struct vxlan_socket *vso)
931 {
932 	int destroy;
933 
934 	VXLAN_LIST_LOCK();
935 	destroy = VXLAN_SO_RELEASE(vso);
936 	if (destroy != 0)
937 		LIST_REMOVE(vso, vxlso_entry);
938 	VXLAN_LIST_UNLOCK();
939 
940 	if (destroy != 0)
941 		vxlan_socket_destroy(vso);
942 }
943 
944 static struct vxlan_socket *
945 vxlan_socket_lookup(union vxlan_sockaddr *vxlsa)
946 {
947 	struct vxlan_socket *vso;
948 
949 	VXLAN_LIST_LOCK();
950 	LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry) {
951 		if (vxlan_sockaddr_cmp(&vso->vxlso_laddr, &vxlsa->sa) == 0) {
952 			VXLAN_SO_ACQUIRE(vso);
953 			break;
954 		}
955 	}
956 	VXLAN_LIST_UNLOCK();
957 
958 	return (vso);
959 }
960 
961 static void
962 vxlan_socket_insert(struct vxlan_socket *vso)
963 {
964 
965 	VXLAN_LIST_LOCK();
966 	VXLAN_SO_ACQUIRE(vso);
967 	LIST_INSERT_HEAD(&vxlan_socket_list, vso, vxlso_entry);
968 	VXLAN_LIST_UNLOCK();
969 }
970 
971 static int
972 vxlan_socket_init(struct vxlan_socket *vso, struct ifnet *ifp)
973 {
974 	struct thread *td;
975 	int error;
976 
977 	td = curthread;
978 
979 	error = socreate(vso->vxlso_laddr.sa.sa_family, &vso->vxlso_sock,
980 	    SOCK_DGRAM, IPPROTO_UDP, td->td_ucred, td);
981 	if (error) {
982 		if_printf(ifp, "cannot create socket: %d\n", error);
983 		return (error);
984 	}
985 
986 	error = udp_set_kernel_tunneling(vso->vxlso_sock,
987 	    vxlan_rcv_udp_packet, NULL, vso);
988 	if (error) {
989 		if_printf(ifp, "cannot set tunneling function: %d\n", error);
990 		return (error);
991 	}
992 
993 	if (vxlan_reuse_port != 0) {
994 		struct sockopt sopt;
995 		int val = 1;
996 
997 		bzero(&sopt, sizeof(sopt));
998 		sopt.sopt_dir = SOPT_SET;
999 		sopt.sopt_level = IPPROTO_IP;
1000 		sopt.sopt_name = SO_REUSEPORT;
1001 		sopt.sopt_val = &val;
1002 		sopt.sopt_valsize = sizeof(val);
1003 		error = sosetopt(vso->vxlso_sock, &sopt);
1004 		if (error) {
1005 			if_printf(ifp,
1006 			    "cannot set REUSEADDR socket opt: %d\n", error);
1007 			return (error);
1008 		}
1009 	}
1010 
1011 	return (0);
1012 }
1013 
1014 static int
1015 vxlan_socket_bind(struct vxlan_socket *vso, struct ifnet *ifp)
1016 {
1017 	union vxlan_sockaddr laddr;
1018 	struct thread *td;
1019 	int error;
1020 
1021 	td = curthread;
1022 	laddr = vso->vxlso_laddr;
1023 
1024 	error = sobind(vso->vxlso_sock, &laddr.sa, td);
1025 	if (error) {
1026 		if (error != EADDRINUSE)
1027 			if_printf(ifp, "cannot bind socket: %d\n", error);
1028 		return (error);
1029 	}
1030 
1031 	return (0);
1032 }
1033 
1034 static int
1035 vxlan_socket_create(struct ifnet *ifp, int multicast,
1036     const union vxlan_sockaddr *saddr, struct vxlan_socket **vsop)
1037 {
1038 	union vxlan_sockaddr laddr;
1039 	struct vxlan_socket *vso;
1040 	int error;
1041 
1042 	laddr = *saddr;
1043 
1044 	/*
1045 	 * If this socket will be multicast, then only the local port
1046 	 * must be specified when binding.
1047 	 */
1048 	if (multicast != 0) {
1049 		if (VXLAN_SOCKADDR_IS_IPV4(&laddr))
1050 			laddr.in4.sin_addr.s_addr = INADDR_ANY;
1051 #ifdef INET6
1052 		else
1053 			laddr.in6.sin6_addr = in6addr_any;
1054 #endif
1055 	}
1056 
1057 	vso = vxlan_socket_alloc(&laddr);
1058 	if (vso == NULL)
1059 		return (ENOMEM);
1060 
1061 	error = vxlan_socket_init(vso, ifp);
1062 	if (error)
1063 		goto fail;
1064 
1065 	error = vxlan_socket_bind(vso, ifp);
1066 	if (error)
1067 		goto fail;
1068 
1069 	/*
1070 	 * There is a small window between the bind completing and
1071 	 * inserting the socket, so that a concurrent create may fail.
1072 	 * Let's not worry about that for now.
1073 	 */
1074 	vxlan_socket_insert(vso);
1075 	*vsop = vso;
1076 
1077 	return (0);
1078 
1079 fail:
1080 	vxlan_socket_destroy(vso);
1081 
1082 	return (error);
1083 }
1084 
1085 static void
1086 vxlan_socket_ifdetach(struct vxlan_socket *vso, struct ifnet *ifp,
1087     struct vxlan_softc_head *list)
1088 {
1089 	struct rm_priotracker tracker;
1090 	struct vxlan_softc *sc;
1091 	int i;
1092 
1093 	VXLAN_SO_RLOCK(vso, &tracker);
1094 	for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) {
1095 		LIST_FOREACH(sc, &vso->vxlso_vni_hash[i], vxl_entry)
1096 			vxlan_ifdetach(sc, ifp, list);
1097 	}
1098 	VXLAN_SO_RUNLOCK(vso, &tracker);
1099 }
1100 
1101 static struct vxlan_socket *
1102 vxlan_socket_mc_lookup(const union vxlan_sockaddr *vxlsa)
1103 {
1104 	union vxlan_sockaddr laddr;
1105 	struct vxlan_socket *vso;
1106 
1107 	laddr = *vxlsa;
1108 
1109 	if (VXLAN_SOCKADDR_IS_IPV4(&laddr))
1110 		laddr.in4.sin_addr.s_addr = INADDR_ANY;
1111 #ifdef INET6
1112 	else
1113 		laddr.in6.sin6_addr = in6addr_any;
1114 #endif
1115 
1116 	vso = vxlan_socket_lookup(&laddr);
1117 
1118 	return (vso);
1119 }
1120 
1121 static int
1122 vxlan_sockaddr_mc_info_match(const struct vxlan_socket_mc_info *mc,
1123     const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
1124     int ifidx)
1125 {
1126 
1127 	if (!vxlan_sockaddr_in_any(local) &&
1128 	    !vxlan_sockaddr_in_equal(&mc->vxlsomc_saddr, &local->sa))
1129 		return (0);
1130 	if (!vxlan_sockaddr_in_equal(&mc->vxlsomc_gaddr, &group->sa))
1131 		return (0);
1132 	if (ifidx != 0 && ifidx != mc->vxlsomc_ifidx)
1133 		return (0);
1134 
1135 	return (1);
1136 }
1137 
1138 static int
1139 vxlan_socket_mc_join_group(struct vxlan_socket *vso,
1140     const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
1141     int *ifidx, union vxlan_sockaddr *source)
1142 {
1143 	struct sockopt sopt;
1144 	int error;
1145 
1146 	*source = *local;
1147 
1148 	if (VXLAN_SOCKADDR_IS_IPV4(group)) {
1149 		struct ip_mreq mreq;
1150 
1151 		mreq.imr_multiaddr = group->in4.sin_addr;
1152 		mreq.imr_interface = local->in4.sin_addr;
1153 
1154 		bzero(&sopt, sizeof(sopt));
1155 		sopt.sopt_dir = SOPT_SET;
1156 		sopt.sopt_level = IPPROTO_IP;
1157 		sopt.sopt_name = IP_ADD_MEMBERSHIP;
1158 		sopt.sopt_val = &mreq;
1159 		sopt.sopt_valsize = sizeof(mreq);
1160 		error = sosetopt(vso->vxlso_sock, &sopt);
1161 		if (error)
1162 			return (error);
1163 
1164 		/*
1165 		 * BMV: Ideally, there would be a formal way for us to get
1166 		 * the local interface that was selected based on the
1167 		 * imr_interface address. We could then update *ifidx so
1168 		 * vxlan_sockaddr_mc_info_match() would return a match for
1169 		 * later creates that explicitly set the multicast interface.
1170 		 *
1171 		 * If we really need to, we can of course look in the INP's
1172 		 * membership list:
1173 		 *     sotoinpcb(vso->vxlso_sock)->inp_moptions->
1174 		 *         imo_head[]->imf_inm->inm_ifp
1175 		 * similarly to imo_match_group().
1176 		 */
1177 		source->in4.sin_addr = local->in4.sin_addr;
1178 
1179 	} else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
1180 		struct ipv6_mreq mreq;
1181 
1182 		mreq.ipv6mr_multiaddr = group->in6.sin6_addr;
1183 		mreq.ipv6mr_interface = *ifidx;
1184 
1185 		bzero(&sopt, sizeof(sopt));
1186 		sopt.sopt_dir = SOPT_SET;
1187 		sopt.sopt_level = IPPROTO_IPV6;
1188 		sopt.sopt_name = IPV6_JOIN_GROUP;
1189 		sopt.sopt_val = &mreq;
1190 		sopt.sopt_valsize = sizeof(mreq);
1191 		error = sosetopt(vso->vxlso_sock, &sopt);
1192 		if (error)
1193 			return (error);
1194 
1195 		/*
1196 		 * BMV: As with IPv4, we would really like to know what
1197 		 * interface in6p_lookup_mcast_ifp() selected.
1198 		 */
1199 	} else
1200 		error = EAFNOSUPPORT;
1201 
1202 	return (error);
1203 }
1204 
1205 static int
1206 vxlan_socket_mc_leave_group(struct vxlan_socket *vso,
1207     const union vxlan_sockaddr *group, const union vxlan_sockaddr *source,
1208     int ifidx)
1209 {
1210 	struct sockopt sopt;
1211 	int error;
1212 
1213 	bzero(&sopt, sizeof(sopt));
1214 	sopt.sopt_dir = SOPT_SET;
1215 
1216 	if (VXLAN_SOCKADDR_IS_IPV4(group)) {
1217 		struct ip_mreq mreq;
1218 
1219 		mreq.imr_multiaddr = group->in4.sin_addr;
1220 		mreq.imr_interface = source->in4.sin_addr;
1221 
1222 		sopt.sopt_level = IPPROTO_IP;
1223 		sopt.sopt_name = IP_DROP_MEMBERSHIP;
1224 		sopt.sopt_val = &mreq;
1225 		sopt.sopt_valsize = sizeof(mreq);
1226 		error = sosetopt(vso->vxlso_sock, &sopt);
1227 
1228 	} else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
1229 		struct ipv6_mreq mreq;
1230 
1231 		mreq.ipv6mr_multiaddr = group->in6.sin6_addr;
1232 		mreq.ipv6mr_interface = ifidx;
1233 
1234 		sopt.sopt_level = IPPROTO_IPV6;
1235 		sopt.sopt_name = IPV6_LEAVE_GROUP;
1236 		sopt.sopt_val = &mreq;
1237 		sopt.sopt_valsize = sizeof(mreq);
1238 		error = sosetopt(vso->vxlso_sock, &sopt);
1239 
1240 	} else
1241 		error = EAFNOSUPPORT;
1242 
1243 	return (error);
1244 }
1245 
1246 static int
1247 vxlan_socket_mc_add_group(struct vxlan_socket *vso,
1248     const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
1249     int ifidx, int *idx)
1250 {
1251 	union vxlan_sockaddr source;
1252 	struct vxlan_socket_mc_info *mc;
1253 	int i, empty, error;
1254 
1255 	/*
1256 	 * Within a socket, the same multicast group may be used by multiple
1257 	 * interfaces, each with a different network identifier. But a socket
1258 	 * may only join a multicast group once, so keep track of the users
1259 	 * here.
1260 	 */
1261 
1262 	VXLAN_SO_WLOCK(vso);
1263 	for (empty = 0, i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
1264 		mc = &vso->vxlso_mc[i];
1265 
1266 		if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) {
1267 			empty++;
1268 			continue;
1269 		}
1270 
1271 		if (vxlan_sockaddr_mc_info_match(mc, group, local, ifidx))
1272 			goto out;
1273 	}
1274 	VXLAN_SO_WUNLOCK(vso);
1275 
1276 	if (empty == 0)
1277 		return (ENOSPC);
1278 
1279 	error = vxlan_socket_mc_join_group(vso, group, local, &ifidx, &source);
1280 	if (error)
1281 		return (error);
1282 
1283 	VXLAN_SO_WLOCK(vso);
1284 	for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
1285 		mc = &vso->vxlso_mc[i];
1286 
1287 		if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) {
1288 			vxlan_sockaddr_copy(&mc->vxlsomc_gaddr, &group->sa);
1289 			vxlan_sockaddr_copy(&mc->vxlsomc_saddr, &source.sa);
1290 			mc->vxlsomc_ifidx = ifidx;
1291 			goto out;
1292 		}
1293 	}
1294 	VXLAN_SO_WUNLOCK(vso);
1295 
1296 	error = vxlan_socket_mc_leave_group(vso, group, &source, ifidx);
1297 	MPASS(error == 0);
1298 
1299 	return (ENOSPC);
1300 
1301 out:
1302 	mc->vxlsomc_users++;
1303 	VXLAN_SO_WUNLOCK(vso);
1304 
1305 	*idx = i;
1306 
1307 	return (0);
1308 }
1309 
1310 static void
1311 vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *vso, int idx)
1312 {
1313 	union vxlan_sockaddr group, source;
1314 	struct vxlan_socket_mc_info *mc;
1315 	int ifidx, leave;
1316 
1317 	KASSERT(idx >= 0 && idx < VXLAN_SO_MC_MAX_GROUPS,
1318 	    ("%s: vso %p idx %d out of bounds", __func__, vso, idx));
1319 
1320 	leave = 0;
1321 	mc = &vso->vxlso_mc[idx];
1322 
1323 	VXLAN_SO_WLOCK(vso);
1324 	mc->vxlsomc_users--;
1325 	if (mc->vxlsomc_users == 0) {
1326 		group = mc->vxlsomc_gaddr;
1327 		source = mc->vxlsomc_saddr;
1328 		ifidx = mc->vxlsomc_ifidx;
1329 		bzero(mc, sizeof(*mc));
1330 		leave = 1;
1331 	}
1332 	VXLAN_SO_WUNLOCK(vso);
1333 
1334 	if (leave != 0) {
1335 		/*
1336 		 * Our socket's membership in this group may have already
1337 		 * been removed if we joined through an interface that's
1338 		 * been detached.
1339 		 */
1340 		vxlan_socket_mc_leave_group(vso, &group, &source, ifidx);
1341 	}
1342 }
1343 
1344 static struct vxlan_softc *
1345 vxlan_socket_lookup_softc_locked(struct vxlan_socket *vso, uint32_t vni)
1346 {
1347 	struct vxlan_softc *sc;
1348 	uint32_t hash;
1349 
1350 	VXLAN_SO_LOCK_ASSERT(vso);
1351 	hash = VXLAN_SO_VNI_HASH(vni);
1352 
1353 	LIST_FOREACH(sc, &vso->vxlso_vni_hash[hash], vxl_entry) {
1354 		if (sc->vxl_vni == vni) {
1355 			VXLAN_ACQUIRE(sc);
1356 			break;
1357 		}
1358 	}
1359 
1360 	return (sc);
1361 }
1362 
1363 static struct vxlan_softc *
1364 vxlan_socket_lookup_softc(struct vxlan_socket *vso, uint32_t vni)
1365 {
1366 	struct rm_priotracker tracker;
1367 	struct vxlan_softc *sc;
1368 
1369 	VXLAN_SO_RLOCK(vso, &tracker);
1370 	sc = vxlan_socket_lookup_softc_locked(vso, vni);
1371 	VXLAN_SO_RUNLOCK(vso, &tracker);
1372 
1373 	return (sc);
1374 }
1375 
1376 static int
1377 vxlan_socket_insert_softc(struct vxlan_socket *vso, struct vxlan_softc *sc)
1378 {
1379 	struct vxlan_softc *tsc;
1380 	uint32_t vni, hash;
1381 
1382 	vni = sc->vxl_vni;
1383 	hash = VXLAN_SO_VNI_HASH(vni);
1384 
1385 	VXLAN_SO_WLOCK(vso);
1386 	tsc = vxlan_socket_lookup_softc_locked(vso, vni);
1387 	if (tsc != NULL) {
1388 		VXLAN_SO_WUNLOCK(vso);
1389 		vxlan_release(tsc);
1390 		return (EEXIST);
1391 	}
1392 
1393 	VXLAN_ACQUIRE(sc);
1394 	LIST_INSERT_HEAD(&vso->vxlso_vni_hash[hash], sc, vxl_entry);
1395 	VXLAN_SO_WUNLOCK(vso);
1396 
1397 	return (0);
1398 }
1399 
1400 static void
1401 vxlan_socket_remove_softc(struct vxlan_socket *vso, struct vxlan_softc *sc)
1402 {
1403 
1404 	VXLAN_SO_WLOCK(vso);
1405 	LIST_REMOVE(sc, vxl_entry);
1406 	VXLAN_SO_WUNLOCK(vso);
1407 
1408 	vxlan_release(sc);
1409 }
1410 
1411 static struct ifnet *
1412 vxlan_multicast_if_ref(struct vxlan_softc *sc, int ipv4)
1413 {
1414 	struct ifnet *ifp;
1415 
1416 	VXLAN_LOCK_ASSERT(sc);
1417 
1418 	if (ipv4 && sc->vxl_im4o != NULL)
1419 		ifp = sc->vxl_im4o->imo_multicast_ifp;
1420 	else if (!ipv4 && sc->vxl_im6o != NULL)
1421 		ifp = sc->vxl_im6o->im6o_multicast_ifp;
1422 	else
1423 		ifp = NULL;
1424 
1425 	if (ifp != NULL)
1426 		if_ref(ifp);
1427 
1428 	return (ifp);
1429 }
1430 
1431 static void
1432 vxlan_free_multicast(struct vxlan_softc *sc)
1433 {
1434 
1435 	if (sc->vxl_mc_ifp != NULL) {
1436 		if_rele(sc->vxl_mc_ifp);
1437 		sc->vxl_mc_ifp = NULL;
1438 		sc->vxl_mc_ifindex = 0;
1439 	}
1440 
1441 	if (sc->vxl_im4o != NULL) {
1442 		free(sc->vxl_im4o, M_VXLAN);
1443 		sc->vxl_im4o = NULL;
1444 	}
1445 
1446 	if (sc->vxl_im6o != NULL) {
1447 		free(sc->vxl_im6o, M_VXLAN);
1448 		sc->vxl_im6o = NULL;
1449 	}
1450 }
1451 
1452 static int
1453 vxlan_setup_multicast_interface(struct vxlan_softc *sc)
1454 {
1455 	struct ifnet *ifp;
1456 
1457 	ifp = ifunit_ref(sc->vxl_mc_ifname);
1458 	if (ifp == NULL) {
1459 		if_printf(sc->vxl_ifp, "multicast interface %s does "
1460 		    "not exist\n", sc->vxl_mc_ifname);
1461 		return (ENOENT);
1462 	}
1463 
1464 	if ((ifp->if_flags & IFF_MULTICAST) == 0) {
1465 		if_printf(sc->vxl_ifp, "interface %s does not support "
1466 		     "multicast\n", sc->vxl_mc_ifname);
1467 		if_rele(ifp);
1468 		return (ENOTSUP);
1469 	}
1470 
1471 	sc->vxl_mc_ifp = ifp;
1472 	sc->vxl_mc_ifindex = ifp->if_index;
1473 
1474 	return (0);
1475 }
1476 
1477 static int
1478 vxlan_setup_multicast(struct vxlan_softc *sc)
1479 {
1480 	const union vxlan_sockaddr *group;
1481 	int error;
1482 
1483 	group = &sc->vxl_dst_addr;
1484 	error = 0;
1485 
1486 	if (sc->vxl_mc_ifname[0] != '\0') {
1487 		error = vxlan_setup_multicast_interface(sc);
1488 		if (error)
1489 			return (error);
1490 	}
1491 
1492 	/*
1493 	 * Initialize an multicast options structure that is sufficiently
1494 	 * populated for use in the respective IP output routine. This
1495 	 * structure is typically stored in the socket, but our sockets
1496 	 * may be shared among multiple interfaces.
1497 	 */
1498 	if (VXLAN_SOCKADDR_IS_IPV4(group)) {
1499 		sc->vxl_im4o = malloc(sizeof(struct ip_moptions), M_VXLAN,
1500 		    M_ZERO | M_WAITOK);
1501 		sc->vxl_im4o->imo_multicast_ifp = sc->vxl_mc_ifp;
1502 		sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl;
1503 		sc->vxl_im4o->imo_multicast_vif = -1;
1504 	} else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
1505 		sc->vxl_im6o = malloc(sizeof(struct ip6_moptions), M_VXLAN,
1506 		    M_ZERO | M_WAITOK);
1507 		sc->vxl_im6o->im6o_multicast_ifp = sc->vxl_mc_ifp;
1508 		sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl;
1509 	}
1510 
1511 	return (error);
1512 }
1513 
1514 static int
1515 vxlan_setup_socket(struct vxlan_softc *sc)
1516 {
1517 	struct vxlan_socket *vso;
1518 	struct ifnet *ifp;
1519 	union vxlan_sockaddr *saddr, *daddr;
1520 	int multicast, error;
1521 
1522 	vso = NULL;
1523 	ifp = sc->vxl_ifp;
1524 	saddr = &sc->vxl_src_addr;
1525 	daddr = &sc->vxl_dst_addr;
1526 
1527 	multicast = vxlan_sockaddr_in_multicast(daddr);
1528 	MPASS(multicast != -1);
1529 	sc->vxl_vso_mc_index = -1;
1530 
1531 	/*
1532 	 * Try to create the socket. If that fails, attempt to use an
1533 	 * existing socket.
1534 	 */
1535 	error = vxlan_socket_create(ifp, multicast, saddr, &vso);
1536 	if (error) {
1537 		if (multicast != 0)
1538 			vso = vxlan_socket_mc_lookup(saddr);
1539 		else
1540 			vso = vxlan_socket_lookup(saddr);
1541 
1542 		if (vso == NULL) {
1543 			if_printf(ifp, "cannot create socket (error: %d), "
1544 			    "and no existing socket found\n", error);
1545 			goto out;
1546 		}
1547 	}
1548 
1549 	if (multicast != 0) {
1550 		error = vxlan_setup_multicast(sc);
1551 		if (error)
1552 			goto out;
1553 
1554 		error = vxlan_socket_mc_add_group(vso, daddr, saddr,
1555 		    sc->vxl_mc_ifindex, &sc->vxl_vso_mc_index);
1556 		if (error)
1557 			goto out;
1558 	}
1559 
1560 	sc->vxl_sock = vso;
1561 	error = vxlan_socket_insert_softc(vso, sc);
1562 	if (error) {
1563 		sc->vxl_sock = NULL;
1564 		if_printf(ifp, "network identifier %d already exists in "
1565 		    "this socket\n", sc->vxl_vni);
1566 		goto out;
1567 	}
1568 
1569 	return (0);
1570 
1571 out:
1572 	if (vso != NULL) {
1573 		if (sc->vxl_vso_mc_index != -1) {
1574 			vxlan_socket_mc_release_group_by_idx(vso,
1575 			    sc->vxl_vso_mc_index);
1576 			sc->vxl_vso_mc_index = -1;
1577 		}
1578 		if (multicast != 0)
1579 			vxlan_free_multicast(sc);
1580 		vxlan_socket_release(vso);
1581 	}
1582 
1583 	return (error);
1584 }
1585 
1586 #ifdef INET6
1587 static void
1588 vxlan_setup_zero_checksum_port(struct vxlan_softc *sc)
1589 {
1590 
1591 	if (!VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_src_addr))
1592 		return;
1593 
1594 	MPASS(sc->vxl_src_addr.in6.sin6_port != 0);
1595 	MPASS(sc->vxl_dst_addr.in6.sin6_port != 0);
1596 
1597 	if (sc->vxl_src_addr.in6.sin6_port != sc->vxl_dst_addr.in6.sin6_port) {
1598 		if_printf(sc->vxl_ifp, "port %d in src address does not match "
1599 		    "port %d in dst address, rfc6935_port (%d) not updated.\n",
1600 		    ntohs(sc->vxl_src_addr.in6.sin6_port),
1601 		    ntohs(sc->vxl_dst_addr.in6.sin6_port),
1602 		    V_zero_checksum_port);
1603 		return;
1604 	}
1605 
1606 	if (V_zero_checksum_port != 0) {
1607 		if (V_zero_checksum_port !=
1608 		    ntohs(sc->vxl_src_addr.in6.sin6_port)) {
1609 			if_printf(sc->vxl_ifp, "rfc6935_port is already set to "
1610 			    "%d, cannot set it to %d.\n", V_zero_checksum_port,
1611 			    ntohs(sc->vxl_src_addr.in6.sin6_port));
1612 		}
1613 		return;
1614 	}
1615 
1616 	V_zero_checksum_port = ntohs(sc->vxl_src_addr.in6.sin6_port);
1617 	if_printf(sc->vxl_ifp, "rfc6935_port set to %d\n",
1618 	    V_zero_checksum_port);
1619 }
1620 #endif
1621 
1622 static void
1623 vxlan_setup_interface_hdrlen(struct vxlan_softc *sc)
1624 {
1625 	struct ifnet *ifp;
1626 
1627 	VXLAN_LOCK_WASSERT(sc);
1628 
1629 	ifp = sc->vxl_ifp;
1630 	ifp->if_hdrlen = ETHER_HDR_LEN + sizeof(struct vxlanudphdr);
1631 
1632 	if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr) != 0)
1633 		ifp->if_hdrlen += sizeof(struct ip);
1634 	else if (VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_dst_addr) != 0)
1635 		ifp->if_hdrlen += sizeof(struct ip6_hdr);
1636 
1637 	if ((sc->vxl_flags & VXLAN_FLAG_USER_MTU) == 0)
1638 		ifp->if_mtu = ETHERMTU - ifp->if_hdrlen;
1639 }
1640 
1641 static int
1642 vxlan_valid_init_config(struct vxlan_softc *sc)
1643 {
1644 	const char *reason;
1645 
1646 	if (vxlan_check_vni(sc->vxl_vni) != 0) {
1647 		reason = "invalid virtual network identifier specified";
1648 		goto fail;
1649 	}
1650 
1651 	if (vxlan_sockaddr_supported(&sc->vxl_src_addr, 1) == 0) {
1652 		reason = "source address type is not supported";
1653 		goto fail;
1654 	}
1655 
1656 	if (vxlan_sockaddr_supported(&sc->vxl_dst_addr, 0) == 0) {
1657 		reason = "destination address type is not supported";
1658 		goto fail;
1659 	}
1660 
1661 	if (vxlan_sockaddr_in_any(&sc->vxl_dst_addr) != 0) {
1662 		reason = "no valid destination address specified";
1663 		goto fail;
1664 	}
1665 
1666 	if (vxlan_sockaddr_in_multicast(&sc->vxl_dst_addr) == 0 &&
1667 	    sc->vxl_mc_ifname[0] != '\0') {
1668 		reason = "can only specify interface with a group address";
1669 		goto fail;
1670 	}
1671 
1672 	if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) {
1673 		if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_src_addr) ^
1674 		    VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr)) {
1675 			reason = "source and destination address must both "
1676 			    "be either IPv4 or IPv6";
1677 			goto fail;
1678 		}
1679 	}
1680 
1681 	if (sc->vxl_src_addr.in4.sin_port == 0) {
1682 		reason = "local port not specified";
1683 		goto fail;
1684 	}
1685 
1686 	if (sc->vxl_dst_addr.in4.sin_port == 0) {
1687 		reason = "remote port not specified";
1688 		goto fail;
1689 	}
1690 
1691 	return (0);
1692 
1693 fail:
1694 	if_printf(sc->vxl_ifp, "cannot initialize interface: %s\n", reason);
1695 	return (EINVAL);
1696 }
1697 
1698 static void
1699 vxlan_init_wait(struct vxlan_softc *sc)
1700 {
1701 
1702 	VXLAN_LOCK_WASSERT(sc);
1703 	while (sc->vxl_flags & VXLAN_FLAG_INIT)
1704 		rm_sleep(sc, &sc->vxl_lock, 0, "vxlint", hz);
1705 }
1706 
1707 static void
1708 vxlan_init_complete(struct vxlan_softc *sc)
1709 {
1710 
1711 	VXLAN_WLOCK(sc);
1712 	sc->vxl_flags &= ~VXLAN_FLAG_INIT;
1713 	wakeup(sc);
1714 	VXLAN_WUNLOCK(sc);
1715 }
1716 
1717 static void
1718 vxlan_init(void *xsc)
1719 {
1720 	static const uint8_t empty_mac[ETHER_ADDR_LEN];
1721 	struct vxlan_softc *sc;
1722 	struct ifnet *ifp;
1723 
1724 	sc = xsc;
1725 	ifp = sc->vxl_ifp;
1726 
1727 	sx_xlock(&vxlan_sx);
1728 	VXLAN_WLOCK(sc);
1729 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1730 		VXLAN_WUNLOCK(sc);
1731 		sx_xunlock(&vxlan_sx);
1732 		return;
1733 	}
1734 	sc->vxl_flags |= VXLAN_FLAG_INIT;
1735 	VXLAN_WUNLOCK(sc);
1736 
1737 	if (vxlan_valid_init_config(sc) != 0)
1738 		goto out;
1739 
1740 	if (vxlan_setup_socket(sc) != 0)
1741 		goto out;
1742 
1743 #ifdef INET6
1744 	vxlan_setup_zero_checksum_port(sc);
1745 #endif
1746 
1747 	/* Initialize the default forwarding entry. */
1748 	vxlan_ftable_entry_init(sc, &sc->vxl_default_fe, empty_mac,
1749 	    &sc->vxl_dst_addr.sa, VXLAN_FE_FLAG_STATIC);
1750 
1751 	VXLAN_WLOCK(sc);
1752 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1753 	callout_reset(&sc->vxl_callout, vxlan_ftable_prune_period * hz,
1754 	    vxlan_timer, sc);
1755 	VXLAN_WUNLOCK(sc);
1756 
1757 	if_link_state_change(ifp, LINK_STATE_UP);
1758 
1759 	EVENTHANDLER_INVOKE(vxlan_start, ifp, sc->vxl_src_addr.in4.sin_family,
1760 	    ntohs(sc->vxl_src_addr.in4.sin_port));
1761 out:
1762 	vxlan_init_complete(sc);
1763 	sx_xunlock(&vxlan_sx);
1764 }
1765 
1766 static void
1767 vxlan_release(struct vxlan_softc *sc)
1768 {
1769 
1770 	/*
1771 	 * The softc may be destroyed as soon as we release our reference,
1772 	 * so we cannot serialize the wakeup with the softc lock. We use a
1773 	 * timeout in our sleeps so a missed wakeup is unfortunate but not
1774 	 * fatal.
1775 	 */
1776 	if (VXLAN_RELEASE(sc) != 0)
1777 		wakeup(sc);
1778 }
1779 
1780 static void
1781 vxlan_teardown_wait(struct vxlan_softc *sc)
1782 {
1783 
1784 	VXLAN_LOCK_WASSERT(sc);
1785 	while (sc->vxl_flags & VXLAN_FLAG_TEARDOWN)
1786 		rm_sleep(sc, &sc->vxl_lock, 0, "vxltrn", hz);
1787 }
1788 
1789 static void
1790 vxlan_teardown_complete(struct vxlan_softc *sc)
1791 {
1792 
1793 	VXLAN_WLOCK(sc);
1794 	sc->vxl_flags &= ~VXLAN_FLAG_TEARDOWN;
1795 	wakeup(sc);
1796 	VXLAN_WUNLOCK(sc);
1797 }
1798 
1799 static void
1800 vxlan_teardown_locked(struct vxlan_softc *sc)
1801 {
1802 	struct ifnet *ifp;
1803 	struct vxlan_socket *vso;
1804 
1805 	sx_assert(&vxlan_sx, SA_XLOCKED);
1806 	VXLAN_LOCK_WASSERT(sc);
1807 	MPASS(sc->vxl_flags & VXLAN_FLAG_TEARDOWN);
1808 
1809 	ifp = sc->vxl_ifp;
1810 	ifp->if_flags &= ~IFF_UP;
1811 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1812 	callout_stop(&sc->vxl_callout);
1813 	vso = sc->vxl_sock;
1814 	sc->vxl_sock = NULL;
1815 
1816 	VXLAN_WUNLOCK(sc);
1817 	if_link_state_change(ifp, LINK_STATE_DOWN);
1818 	EVENTHANDLER_INVOKE(vxlan_stop, ifp, sc->vxl_src_addr.in4.sin_family,
1819 	    ntohs(sc->vxl_src_addr.in4.sin_port));
1820 
1821 	if (vso != NULL) {
1822 		vxlan_socket_remove_softc(vso, sc);
1823 
1824 		if (sc->vxl_vso_mc_index != -1) {
1825 			vxlan_socket_mc_release_group_by_idx(vso,
1826 			    sc->vxl_vso_mc_index);
1827 			sc->vxl_vso_mc_index = -1;
1828 		}
1829 	}
1830 
1831 	VXLAN_WLOCK(sc);
1832 	while (sc->vxl_refcnt != 0)
1833 		rm_sleep(sc, &sc->vxl_lock, 0, "vxldrn", hz);
1834 	VXLAN_WUNLOCK(sc);
1835 
1836 	callout_drain(&sc->vxl_callout);
1837 
1838 	vxlan_free_multicast(sc);
1839 	if (vso != NULL)
1840 		vxlan_socket_release(vso);
1841 
1842 	vxlan_teardown_complete(sc);
1843 }
1844 
1845 static void
1846 vxlan_teardown(struct vxlan_softc *sc)
1847 {
1848 
1849 	sx_xlock(&vxlan_sx);
1850 	VXLAN_WLOCK(sc);
1851 	if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) {
1852 		vxlan_teardown_wait(sc);
1853 		VXLAN_WUNLOCK(sc);
1854 		sx_xunlock(&vxlan_sx);
1855 		return;
1856 	}
1857 
1858 	sc->vxl_flags |= VXLAN_FLAG_TEARDOWN;
1859 	vxlan_teardown_locked(sc);
1860 	sx_xunlock(&vxlan_sx);
1861 }
1862 
1863 static void
1864 vxlan_ifdetach(struct vxlan_softc *sc, struct ifnet *ifp,
1865     struct vxlan_softc_head *list)
1866 {
1867 
1868 	VXLAN_WLOCK(sc);
1869 
1870 	if (sc->vxl_mc_ifp != ifp)
1871 		goto out;
1872 	if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN)
1873 		goto out;
1874 
1875 	sc->vxl_flags |= VXLAN_FLAG_TEARDOWN;
1876 	LIST_INSERT_HEAD(list, sc, vxl_ifdetach_list);
1877 
1878 out:
1879 	VXLAN_WUNLOCK(sc);
1880 }
1881 
1882 static void
1883 vxlan_timer(void *xsc)
1884 {
1885 	struct vxlan_softc *sc;
1886 
1887 	sc = xsc;
1888 	VXLAN_LOCK_WASSERT(sc);
1889 
1890 	vxlan_ftable_expire(sc);
1891 	callout_schedule(&sc->vxl_callout, vxlan_ftable_prune_period * hz);
1892 }
1893 
1894 static int
1895 vxlan_ioctl_ifflags(struct vxlan_softc *sc)
1896 {
1897 	struct ifnet *ifp;
1898 
1899 	ifp = sc->vxl_ifp;
1900 
1901 	if (ifp->if_flags & IFF_UP) {
1902 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1903 			vxlan_init(sc);
1904 	} else {
1905 		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1906 			vxlan_teardown(sc);
1907 	}
1908 
1909 	return (0);
1910 }
1911 
1912 static int
1913 vxlan_ctrl_get_config(struct vxlan_softc *sc, void *arg)
1914 {
1915 	struct rm_priotracker tracker;
1916 	struct ifvxlancfg *cfg;
1917 
1918 	cfg = arg;
1919 	bzero(cfg, sizeof(*cfg));
1920 
1921 	VXLAN_RLOCK(sc, &tracker);
1922 	cfg->vxlc_vni = sc->vxl_vni;
1923 	memcpy(&cfg->vxlc_local_sa, &sc->vxl_src_addr,
1924 	    sizeof(union vxlan_sockaddr));
1925 	memcpy(&cfg->vxlc_remote_sa, &sc->vxl_dst_addr,
1926 	    sizeof(union vxlan_sockaddr));
1927 	cfg->vxlc_mc_ifindex = sc->vxl_mc_ifindex;
1928 	cfg->vxlc_ftable_cnt = sc->vxl_ftable_cnt;
1929 	cfg->vxlc_ftable_max = sc->vxl_ftable_max;
1930 	cfg->vxlc_ftable_timeout = sc->vxl_ftable_timeout;
1931 	cfg->vxlc_port_min = sc->vxl_min_port;
1932 	cfg->vxlc_port_max = sc->vxl_max_port;
1933 	cfg->vxlc_learn = (sc->vxl_flags & VXLAN_FLAG_LEARN) != 0;
1934 	cfg->vxlc_ttl = sc->vxl_ttl;
1935 	VXLAN_RUNLOCK(sc, &tracker);
1936 
1937 #ifdef INET6
1938 	if (VXLAN_SOCKADDR_IS_IPV6(&cfg->vxlc_local_sa))
1939 		sa6_recoverscope(&cfg->vxlc_local_sa.in6);
1940 	if (VXLAN_SOCKADDR_IS_IPV6(&cfg->vxlc_remote_sa))
1941 		sa6_recoverscope(&cfg->vxlc_remote_sa.in6);
1942 #endif
1943 
1944 	return (0);
1945 }
1946 
1947 static int
1948 vxlan_ctrl_set_vni(struct vxlan_softc *sc, void *arg)
1949 {
1950 	struct ifvxlancmd *cmd;
1951 	int error;
1952 
1953 	cmd = arg;
1954 
1955 	if (vxlan_check_vni(cmd->vxlcmd_vni) != 0)
1956 		return (EINVAL);
1957 
1958 	VXLAN_WLOCK(sc);
1959 	if (vxlan_can_change_config(sc)) {
1960 		sc->vxl_vni = cmd->vxlcmd_vni;
1961 		error = 0;
1962 	} else
1963 		error = EBUSY;
1964 	VXLAN_WUNLOCK(sc);
1965 
1966 	return (error);
1967 }
1968 
1969 static int
1970 vxlan_ctrl_set_local_addr(struct vxlan_softc *sc, void *arg)
1971 {
1972 	struct ifvxlancmd *cmd;
1973 	union vxlan_sockaddr *vxlsa;
1974 	int error;
1975 
1976 	cmd = arg;
1977 	vxlsa = &cmd->vxlcmd_sa;
1978 
1979 	if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa))
1980 		return (EINVAL);
1981 	if (vxlan_sockaddr_in_multicast(vxlsa) != 0)
1982 		return (EINVAL);
1983 	if (VXLAN_SOCKADDR_IS_IPV6(vxlsa)) {
1984 		error = vxlan_sockaddr_in6_embedscope(vxlsa);
1985 		if (error)
1986 			return (error);
1987 	}
1988 
1989 	VXLAN_WLOCK(sc);
1990 	if (vxlan_can_change_config(sc)) {
1991 		vxlan_sockaddr_in_copy(&sc->vxl_src_addr, &vxlsa->sa);
1992 		vxlan_set_hwcaps(sc);
1993 		error = 0;
1994 	} else
1995 		error = EBUSY;
1996 	VXLAN_WUNLOCK(sc);
1997 
1998 	return (error);
1999 }
2000 
2001 static int
2002 vxlan_ctrl_set_remote_addr(struct vxlan_softc *sc, void *arg)
2003 {
2004 	struct ifvxlancmd *cmd;
2005 	union vxlan_sockaddr *vxlsa;
2006 	int error;
2007 
2008 	cmd = arg;
2009 	vxlsa = &cmd->vxlcmd_sa;
2010 
2011 	if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa))
2012 		return (EINVAL);
2013 	if (VXLAN_SOCKADDR_IS_IPV6(vxlsa)) {
2014 		error = vxlan_sockaddr_in6_embedscope(vxlsa);
2015 		if (error)
2016 			return (error);
2017 	}
2018 
2019 	VXLAN_WLOCK(sc);
2020 	if (vxlan_can_change_config(sc)) {
2021 		vxlan_sockaddr_in_copy(&sc->vxl_dst_addr, &vxlsa->sa);
2022 		vxlan_setup_interface_hdrlen(sc);
2023 		error = 0;
2024 	} else
2025 		error = EBUSY;
2026 	VXLAN_WUNLOCK(sc);
2027 
2028 	return (error);
2029 }
2030 
2031 static int
2032 vxlan_ctrl_set_local_port(struct vxlan_softc *sc, void *arg)
2033 {
2034 	struct ifvxlancmd *cmd;
2035 	int error;
2036 
2037 	cmd = arg;
2038 
2039 	if (cmd->vxlcmd_port == 0)
2040 		return (EINVAL);
2041 
2042 	VXLAN_WLOCK(sc);
2043 	if (vxlan_can_change_config(sc)) {
2044 		sc->vxl_src_addr.in4.sin_port = htons(cmd->vxlcmd_port);
2045 		error = 0;
2046 	} else
2047 		error = EBUSY;
2048 	VXLAN_WUNLOCK(sc);
2049 
2050 	return (error);
2051 }
2052 
2053 static int
2054 vxlan_ctrl_set_remote_port(struct vxlan_softc *sc, void *arg)
2055 {
2056 	struct ifvxlancmd *cmd;
2057 	int error;
2058 
2059 	cmd = arg;
2060 
2061 	if (cmd->vxlcmd_port == 0)
2062 		return (EINVAL);
2063 
2064 	VXLAN_WLOCK(sc);
2065 	if (vxlan_can_change_config(sc)) {
2066 		sc->vxl_dst_addr.in4.sin_port = htons(cmd->vxlcmd_port);
2067 		error = 0;
2068 	} else
2069 		error = EBUSY;
2070 	VXLAN_WUNLOCK(sc);
2071 
2072 	return (error);
2073 }
2074 
2075 static int
2076 vxlan_ctrl_set_port_range(struct vxlan_softc *sc, void *arg)
2077 {
2078 	struct ifvxlancmd *cmd;
2079 	uint16_t min, max;
2080 	int error;
2081 
2082 	cmd = arg;
2083 	min = cmd->vxlcmd_port_min;
2084 	max = cmd->vxlcmd_port_max;
2085 
2086 	if (max < min)
2087 		return (EINVAL);
2088 
2089 	VXLAN_WLOCK(sc);
2090 	if (vxlan_can_change_config(sc)) {
2091 		sc->vxl_min_port = min;
2092 		sc->vxl_max_port = max;
2093 		error = 0;
2094 	} else
2095 		error = EBUSY;
2096 	VXLAN_WUNLOCK(sc);
2097 
2098 	return (error);
2099 }
2100 
2101 static int
2102 vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *sc, void *arg)
2103 {
2104 	struct ifvxlancmd *cmd;
2105 	int error;
2106 
2107 	cmd = arg;
2108 
2109 	VXLAN_WLOCK(sc);
2110 	if (vxlan_check_ftable_timeout(cmd->vxlcmd_ftable_timeout) == 0) {
2111 		sc->vxl_ftable_timeout = cmd->vxlcmd_ftable_timeout;
2112 		error = 0;
2113 	} else
2114 		error = EINVAL;
2115 	VXLAN_WUNLOCK(sc);
2116 
2117 	return (error);
2118 }
2119 
2120 static int
2121 vxlan_ctrl_set_ftable_max(struct vxlan_softc *sc, void *arg)
2122 {
2123 	struct ifvxlancmd *cmd;
2124 	int error;
2125 
2126 	cmd = arg;
2127 
2128 	VXLAN_WLOCK(sc);
2129 	if (vxlan_check_ftable_max(cmd->vxlcmd_ftable_max) == 0) {
2130 		sc->vxl_ftable_max = cmd->vxlcmd_ftable_max;
2131 		error = 0;
2132 	} else
2133 		error = EINVAL;
2134 	VXLAN_WUNLOCK(sc);
2135 
2136 	return (error);
2137 }
2138 
2139 static int
2140 vxlan_ctrl_set_multicast_if(struct vxlan_softc * sc, void *arg)
2141 {
2142 	struct ifvxlancmd *cmd;
2143 	int error;
2144 
2145 	cmd = arg;
2146 
2147 	VXLAN_WLOCK(sc);
2148 	if (vxlan_can_change_config(sc)) {
2149 		strlcpy(sc->vxl_mc_ifname, cmd->vxlcmd_ifname, IFNAMSIZ);
2150 		vxlan_set_hwcaps(sc);
2151 		error = 0;
2152 	} else
2153 		error = EBUSY;
2154 	VXLAN_WUNLOCK(sc);
2155 
2156 	return (error);
2157 }
2158 
2159 static int
2160 vxlan_ctrl_set_ttl(struct vxlan_softc *sc, void *arg)
2161 {
2162 	struct ifvxlancmd *cmd;
2163 	int error;
2164 
2165 	cmd = arg;
2166 
2167 	VXLAN_WLOCK(sc);
2168 	if (vxlan_check_ttl(cmd->vxlcmd_ttl) == 0) {
2169 		sc->vxl_ttl = cmd->vxlcmd_ttl;
2170 		if (sc->vxl_im4o != NULL)
2171 			sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl;
2172 		if (sc->vxl_im6o != NULL)
2173 			sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl;
2174 		error = 0;
2175 	} else
2176 		error = EINVAL;
2177 	VXLAN_WUNLOCK(sc);
2178 
2179 	return (error);
2180 }
2181 
2182 static int
2183 vxlan_ctrl_set_learn(struct vxlan_softc *sc, void *arg)
2184 {
2185 	struct ifvxlancmd *cmd;
2186 
2187 	cmd = arg;
2188 
2189 	VXLAN_WLOCK(sc);
2190 	if (cmd->vxlcmd_flags & VXLAN_CMD_FLAG_LEARN)
2191 		sc->vxl_flags |= VXLAN_FLAG_LEARN;
2192 	else
2193 		sc->vxl_flags &= ~VXLAN_FLAG_LEARN;
2194 	VXLAN_WUNLOCK(sc);
2195 
2196 	return (0);
2197 }
2198 
2199 static int
2200 vxlan_ctrl_ftable_entry_add(struct vxlan_softc *sc, void *arg)
2201 {
2202 	union vxlan_sockaddr vxlsa;
2203 	struct ifvxlancmd *cmd;
2204 	struct vxlan_ftable_entry *fe;
2205 	int error;
2206 
2207 	cmd = arg;
2208 	vxlsa = cmd->vxlcmd_sa;
2209 
2210 	if (!VXLAN_SOCKADDR_IS_IPV46(&vxlsa))
2211 		return (EINVAL);
2212 	if (vxlan_sockaddr_in_any(&vxlsa) != 0)
2213 		return (EINVAL);
2214 	if (vxlan_sockaddr_in_multicast(&vxlsa) != 0)
2215 		return (EINVAL);
2216 	/* BMV: We could support both IPv4 and IPv6 later. */
2217 	if (vxlsa.sa.sa_family != sc->vxl_dst_addr.sa.sa_family)
2218 		return (EAFNOSUPPORT);
2219 
2220 	if (VXLAN_SOCKADDR_IS_IPV6(&vxlsa)) {
2221 		error = vxlan_sockaddr_in6_embedscope(&vxlsa);
2222 		if (error)
2223 			return (error);
2224 	}
2225 
2226 	fe = vxlan_ftable_entry_alloc();
2227 	if (fe == NULL)
2228 		return (ENOMEM);
2229 
2230 	if (vxlsa.in4.sin_port == 0)
2231 		vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port;
2232 
2233 	vxlan_ftable_entry_init(sc, fe, cmd->vxlcmd_mac, &vxlsa.sa,
2234 	    VXLAN_FE_FLAG_STATIC);
2235 
2236 	VXLAN_WLOCK(sc);
2237 	error = vxlan_ftable_entry_insert(sc, fe);
2238 	VXLAN_WUNLOCK(sc);
2239 
2240 	if (error)
2241 		vxlan_ftable_entry_free(fe);
2242 
2243 	return (error);
2244 }
2245 
2246 static int
2247 vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *sc, void *arg)
2248 {
2249 	struct ifvxlancmd *cmd;
2250 	struct vxlan_ftable_entry *fe;
2251 	int error;
2252 
2253 	cmd = arg;
2254 
2255 	VXLAN_WLOCK(sc);
2256 	fe = vxlan_ftable_entry_lookup(sc, cmd->vxlcmd_mac);
2257 	if (fe != NULL) {
2258 		vxlan_ftable_entry_destroy(sc, fe);
2259 		error = 0;
2260 	} else
2261 		error = ENOENT;
2262 	VXLAN_WUNLOCK(sc);
2263 
2264 	return (error);
2265 }
2266 
2267 static int
2268 vxlan_ctrl_flush(struct vxlan_softc *sc, void *arg)
2269 {
2270 	struct ifvxlancmd *cmd;
2271 	int all;
2272 
2273 	cmd = arg;
2274 	all = cmd->vxlcmd_flags & VXLAN_CMD_FLAG_FLUSH_ALL;
2275 
2276 	VXLAN_WLOCK(sc);
2277 	vxlan_ftable_flush(sc, all);
2278 	VXLAN_WUNLOCK(sc);
2279 
2280 	return (0);
2281 }
2282 
2283 static int
2284 vxlan_ioctl_drvspec(struct vxlan_softc *sc, struct ifdrv *ifd, int get)
2285 {
2286 	const struct vxlan_control *vc;
2287 	union {
2288 		struct ifvxlancfg	cfg;
2289 		struct ifvxlancmd	cmd;
2290 	} args;
2291 	int out, error;
2292 
2293 	if (ifd->ifd_cmd >= vxlan_control_table_size)
2294 		return (EINVAL);
2295 
2296 	bzero(&args, sizeof(args));
2297 	vc = &vxlan_control_table[ifd->ifd_cmd];
2298 	out = (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) != 0;
2299 
2300 	if ((get != 0 && out == 0) || (get == 0 && out != 0))
2301 		return (EINVAL);
2302 
2303 	if (vc->vxlc_flags & VXLAN_CTRL_FLAG_SUSER) {
2304 		error = priv_check(curthread, PRIV_NET_VXLAN);
2305 		if (error)
2306 			return (error);
2307 	}
2308 
2309 	if (ifd->ifd_len != vc->vxlc_argsize ||
2310 	    ifd->ifd_len > sizeof(args))
2311 		return (EINVAL);
2312 
2313 	if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYIN) {
2314 		error = copyin(ifd->ifd_data, &args, ifd->ifd_len);
2315 		if (error)
2316 			return (error);
2317 	}
2318 
2319 	error = vc->vxlc_func(sc, &args);
2320 	if (error)
2321 		return (error);
2322 
2323 	if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) {
2324 		error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
2325 		if (error)
2326 			return (error);
2327 	}
2328 
2329 	return (0);
2330 }
2331 
2332 static int
2333 vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2334 {
2335 	struct rm_priotracker tracker;
2336 	struct vxlan_softc *sc;
2337 	struct ifreq *ifr;
2338 	struct ifdrv *ifd;
2339 	int error;
2340 
2341 	sc = ifp->if_softc;
2342 	ifr = (struct ifreq *) data;
2343 	ifd = (struct ifdrv *) data;
2344 
2345 	error = 0;
2346 
2347 	switch (cmd) {
2348 	case SIOCADDMULTI:
2349 	case SIOCDELMULTI:
2350 		break;
2351 
2352 	case SIOCGDRVSPEC:
2353 	case SIOCSDRVSPEC:
2354 		error = vxlan_ioctl_drvspec(sc, ifd, cmd == SIOCGDRVSPEC);
2355 		break;
2356 
2357 	case SIOCSIFFLAGS:
2358 		error = vxlan_ioctl_ifflags(sc);
2359 		break;
2360 
2361 	case SIOCSIFMEDIA:
2362 	case SIOCGIFMEDIA:
2363 		error = ifmedia_ioctl(ifp, ifr, &sc->vxl_media, cmd);
2364 		break;
2365 
2366 	case SIOCSIFMTU:
2367 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > VXLAN_MAX_MTU) {
2368 			error = EINVAL;
2369 		} else {
2370 			VXLAN_WLOCK(sc);
2371 			ifp->if_mtu = ifr->ifr_mtu;
2372 			sc->vxl_flags |= VXLAN_FLAG_USER_MTU;
2373 			VXLAN_WUNLOCK(sc);
2374 		}
2375 		break;
2376 
2377 	case SIOCSIFCAP:
2378 		VXLAN_WLOCK(sc);
2379 		error = vxlan_set_reqcap(sc, ifp, ifr->ifr_reqcap);
2380 		if (error == 0)
2381 			vxlan_set_hwcaps(sc);
2382 		VXLAN_WUNLOCK(sc);
2383 		break;
2384 
2385 	case SIOCGTUNFIB:
2386 		VXLAN_RLOCK(sc, &tracker);
2387 		ifr->ifr_fib = sc->vxl_fibnum;
2388 		VXLAN_RUNLOCK(sc, &tracker);
2389 		break;
2390 
2391 	case SIOCSTUNFIB:
2392 		if ((error = priv_check(curthread, PRIV_NET_VXLAN)) != 0)
2393 			break;
2394 
2395 		if (ifr->ifr_fib >= rt_numfibs)
2396 			error = EINVAL;
2397 		else {
2398 			VXLAN_WLOCK(sc);
2399 			sc->vxl_fibnum = ifr->ifr_fib;
2400 			VXLAN_WUNLOCK(sc);
2401 		}
2402 		break;
2403 
2404 	default:
2405 		error = ether_ioctl(ifp, cmd, data);
2406 		break;
2407 	}
2408 
2409 	return (error);
2410 }
2411 
2412 #if defined(INET) || defined(INET6)
2413 static uint16_t
2414 vxlan_pick_source_port(struct vxlan_softc *sc, struct mbuf *m)
2415 {
2416 	int range;
2417 	uint32_t hash;
2418 
2419 	range = sc->vxl_max_port - sc->vxl_min_port + 1;
2420 
2421 	if (M_HASHTYPE_ISHASH(m))
2422 		hash = m->m_pkthdr.flowid;
2423 	else
2424 		hash = jenkins_hash(m->m_data, ETHER_HDR_LEN,
2425 		    sc->vxl_port_hash_key);
2426 
2427 	return (sc->vxl_min_port + (hash % range));
2428 }
2429 
2430 static void
2431 vxlan_encap_header(struct vxlan_softc *sc, struct mbuf *m, int ipoff,
2432     uint16_t srcport, uint16_t dstport)
2433 {
2434 	struct vxlanudphdr *hdr;
2435 	struct udphdr *udph;
2436 	struct vxlan_header *vxh;
2437 	int len;
2438 
2439 	len = m->m_pkthdr.len - ipoff;
2440 	MPASS(len >= sizeof(struct vxlanudphdr));
2441 	hdr = mtodo(m, ipoff);
2442 
2443 	udph = &hdr->vxlh_udp;
2444 	udph->uh_sport = srcport;
2445 	udph->uh_dport = dstport;
2446 	udph->uh_ulen = htons(len);
2447 	udph->uh_sum = 0;
2448 
2449 	vxh = &hdr->vxlh_hdr;
2450 	vxh->vxlh_flags = htonl(VXLAN_HDR_FLAGS_VALID_VNI);
2451 	vxh->vxlh_vni = htonl(sc->vxl_vni << VXLAN_HDR_VNI_SHIFT);
2452 }
2453 #endif
2454 
2455 #if defined(INET6) || defined(INET)
2456 /*
2457  * Return the CSUM_INNER_* equivalent of CSUM_* caps.
2458  */
2459 static uint32_t
2460 csum_flags_to_inner_flags(uint32_t csum_flags_in, const uint32_t encap)
2461 {
2462 	uint32_t csum_flags = encap;
2463 	const uint32_t v4 = CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP;
2464 
2465 	/*
2466 	 * csum_flags can request either v4 or v6 offload but not both.
2467 	 * tcp_output always sets CSUM_TSO (both CSUM_IP_TSO and CSUM_IP6_TSO)
2468 	 * so those bits are no good to detect the IP version.  Other bits are
2469 	 * always set with CSUM_TSO and we use those to figure out the IP
2470 	 * version.
2471 	 */
2472 	if (csum_flags_in & v4) {
2473 		if (csum_flags_in & CSUM_IP)
2474 			csum_flags |= CSUM_INNER_IP;
2475 		if (csum_flags_in & CSUM_IP_UDP)
2476 			csum_flags |= CSUM_INNER_IP_UDP;
2477 		if (csum_flags_in & CSUM_IP_TCP)
2478 			csum_flags |= CSUM_INNER_IP_TCP;
2479 		if (csum_flags_in & CSUM_IP_TSO)
2480 			csum_flags |= CSUM_INNER_IP_TSO;
2481 	} else {
2482 #ifdef INVARIANTS
2483 		const uint32_t v6 = CSUM_IP6_UDP | CSUM_IP6_TCP;
2484 
2485 		MPASS((csum_flags_in & v6) != 0);
2486 #endif
2487 		if (csum_flags_in & CSUM_IP6_UDP)
2488 			csum_flags |= CSUM_INNER_IP6_UDP;
2489 		if (csum_flags_in & CSUM_IP6_TCP)
2490 			csum_flags |= CSUM_INNER_IP6_TCP;
2491 		if (csum_flags_in & CSUM_IP6_TSO)
2492 			csum_flags |= CSUM_INNER_IP6_TSO;
2493 	}
2494 
2495 	return (csum_flags);
2496 }
2497 #endif
2498 
2499 static int
2500 vxlan_encap4(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa,
2501     struct mbuf *m)
2502 {
2503 #ifdef INET
2504 	struct ifnet *ifp;
2505 	struct ip *ip;
2506 	struct in_addr srcaddr, dstaddr;
2507 	uint16_t srcport, dstport;
2508 	int plen, mcast, error;
2509 	struct route route, *ro;
2510 	struct sockaddr_in *sin;
2511 	uint32_t csum_flags;
2512 
2513 	NET_EPOCH_ASSERT();
2514 
2515 	ifp = sc->vxl_ifp;
2516 	srcaddr = sc->vxl_src_addr.in4.sin_addr;
2517 	srcport = vxlan_pick_source_port(sc, m);
2518 	dstaddr = fvxlsa->in4.sin_addr;
2519 	dstport = fvxlsa->in4.sin_port;
2520 
2521 	plen = m->m_pkthdr.len;
2522 	M_PREPEND(m, sizeof(struct ip) + sizeof(struct vxlanudphdr),
2523 	    M_NOWAIT);
2524 	if (m == NULL) {
2525 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2526 		return (ENOBUFS);
2527 	}
2528 
2529 	ip = mtod(m, struct ip *);
2530 	ip->ip_tos = 0;
2531 	ip->ip_len = htons(m->m_pkthdr.len);
2532 	ip->ip_off = 0;
2533 	ip->ip_ttl = sc->vxl_ttl;
2534 	ip->ip_p = IPPROTO_UDP;
2535 	ip->ip_sum = 0;
2536 	ip->ip_src = srcaddr;
2537 	ip->ip_dst = dstaddr;
2538 
2539 	vxlan_encap_header(sc, m, sizeof(struct ip), srcport, dstport);
2540 
2541 	mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
2542 	m->m_flags &= ~(M_MCAST | M_BCAST);
2543 
2544 	m->m_pkthdr.csum_flags &= CSUM_FLAGS_TX;
2545 	if (m->m_pkthdr.csum_flags != 0) {
2546 		/*
2547 		 * HW checksum (L3 and/or L4) or TSO has been requested.  Look
2548 		 * up the ifnet for the outbound route and verify that the
2549 		 * outbound ifnet can perform the requested operation on the
2550 		 * inner frame.
2551 		 */
2552 		bzero(&route, sizeof(route));
2553 		ro = &route;
2554 		sin = (struct sockaddr_in *)&ro->ro_dst;
2555 		sin->sin_family = AF_INET;
2556 		sin->sin_len = sizeof(*sin);
2557 		sin->sin_addr = ip->ip_dst;
2558 		ro->ro_nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_NONE,
2559 		    0);
2560 		if (ro->ro_nh == NULL) {
2561 			m_freem(m);
2562 			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2563 			return (EHOSTUNREACH);
2564 		}
2565 
2566 		csum_flags = csum_flags_to_inner_flags(m->m_pkthdr.csum_flags,
2567 		    CSUM_ENCAP_VXLAN);
2568 		if ((csum_flags & ro->ro_nh->nh_ifp->if_hwassist) !=
2569 		    csum_flags) {
2570 			if (ppsratecheck(&sc->err_time, &sc->err_pps, 1)) {
2571 				const struct ifnet *nh_ifp = ro->ro_nh->nh_ifp;
2572 
2573 				if_printf(ifp, "interface %s is missing hwcaps "
2574 				    "0x%08x, csum_flags 0x%08x -> 0x%08x, "
2575 				    "hwassist 0x%08x\n", nh_ifp->if_xname,
2576 				    csum_flags & ~(uint32_t)nh_ifp->if_hwassist,
2577 				    m->m_pkthdr.csum_flags, csum_flags,
2578 				    (uint32_t)nh_ifp->if_hwassist);
2579 			}
2580 			m_freem(m);
2581 			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2582 			return (ENXIO);
2583 		}
2584 		m->m_pkthdr.csum_flags = csum_flags;
2585 		if (csum_flags &
2586 		    (CSUM_INNER_IP | CSUM_INNER_IP_UDP | CSUM_INNER_IP6_UDP |
2587 		    CSUM_INNER_IP_TCP | CSUM_INNER_IP6_TCP)) {
2588 			counter_u64_add(sc->vxl_stats.txcsum, 1);
2589 			if (csum_flags & CSUM_INNER_TSO)
2590 				counter_u64_add(sc->vxl_stats.tso, 1);
2591 		}
2592 	} else
2593 		ro = NULL;
2594 	error = ip_output(m, NULL, ro, 0, sc->vxl_im4o, NULL);
2595 	if (error == 0) {
2596 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
2597 		if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
2598 		if (mcast != 0)
2599 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
2600 	} else
2601 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2602 
2603 	return (error);
2604 #else
2605 	m_freem(m);
2606 	return (ENOTSUP);
2607 #endif
2608 }
2609 
2610 static int
2611 vxlan_encap6(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa,
2612     struct mbuf *m)
2613 {
2614 #ifdef INET6
2615 	struct ifnet *ifp;
2616 	struct ip6_hdr *ip6;
2617 	const struct in6_addr *srcaddr, *dstaddr;
2618 	uint16_t srcport, dstport;
2619 	int plen, mcast, error;
2620 	struct route_in6 route, *ro;
2621 	struct sockaddr_in6 *sin6;
2622 	uint32_t csum_flags;
2623 
2624 	NET_EPOCH_ASSERT();
2625 
2626 	ifp = sc->vxl_ifp;
2627 	srcaddr = &sc->vxl_src_addr.in6.sin6_addr;
2628 	srcport = vxlan_pick_source_port(sc, m);
2629 	dstaddr = &fvxlsa->in6.sin6_addr;
2630 	dstport = fvxlsa->in6.sin6_port;
2631 
2632 	plen = m->m_pkthdr.len;
2633 	M_PREPEND(m, sizeof(struct ip6_hdr) + sizeof(struct vxlanudphdr),
2634 	    M_NOWAIT);
2635 	if (m == NULL) {
2636 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2637 		return (ENOBUFS);
2638 	}
2639 
2640 	ip6 = mtod(m, struct ip6_hdr *);
2641 	ip6->ip6_flow = 0;		/* BMV: Keep in forwarding entry? */
2642 	ip6->ip6_vfc = IPV6_VERSION;
2643 	ip6->ip6_plen = 0;
2644 	ip6->ip6_nxt = IPPROTO_UDP;
2645 	ip6->ip6_hlim = sc->vxl_ttl;
2646 	ip6->ip6_src = *srcaddr;
2647 	ip6->ip6_dst = *dstaddr;
2648 
2649 	vxlan_encap_header(sc, m, sizeof(struct ip6_hdr), srcport, dstport);
2650 
2651 	mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
2652 	m->m_flags &= ~(M_MCAST | M_BCAST);
2653 
2654 	ro = NULL;
2655 	m->m_pkthdr.csum_flags &= CSUM_FLAGS_TX;
2656 	if (m->m_pkthdr.csum_flags != 0) {
2657 		/*
2658 		 * HW checksum (L3 and/or L4) or TSO has been requested.  Look
2659 		 * up the ifnet for the outbound route and verify that the
2660 		 * outbound ifnet can perform the requested operation on the
2661 		 * inner frame.
2662 		 */
2663 		bzero(&route, sizeof(route));
2664 		ro = &route;
2665 		sin6 = (struct sockaddr_in6 *)&ro->ro_dst;
2666 		sin6->sin6_family = AF_INET6;
2667 		sin6->sin6_len = sizeof(*sin6);
2668 		sin6->sin6_addr = ip6->ip6_dst;
2669 		ro->ro_nh = fib6_lookup(M_GETFIB(m), &ip6->ip6_dst, 0,
2670 		    NHR_NONE, 0);
2671 		if (ro->ro_nh == NULL) {
2672 			m_freem(m);
2673 			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2674 			return (EHOSTUNREACH);
2675 		}
2676 
2677 		csum_flags = csum_flags_to_inner_flags(m->m_pkthdr.csum_flags,
2678 		    CSUM_ENCAP_VXLAN);
2679 		if ((csum_flags & ro->ro_nh->nh_ifp->if_hwassist) !=
2680 		    csum_flags) {
2681 			if (ppsratecheck(&sc->err_time, &sc->err_pps, 1)) {
2682 				const struct ifnet *nh_ifp = ro->ro_nh->nh_ifp;
2683 
2684 				if_printf(ifp, "interface %s is missing hwcaps "
2685 				    "0x%08x, csum_flags 0x%08x -> 0x%08x, "
2686 				    "hwassist 0x%08x\n", nh_ifp->if_xname,
2687 				    csum_flags & ~(uint32_t)nh_ifp->if_hwassist,
2688 				    m->m_pkthdr.csum_flags, csum_flags,
2689 				    (uint32_t)nh_ifp->if_hwassist);
2690 			}
2691 			m_freem(m);
2692 			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2693 			return (ENXIO);
2694 		}
2695 		m->m_pkthdr.csum_flags = csum_flags;
2696 		if (csum_flags &
2697 		    (CSUM_INNER_IP | CSUM_INNER_IP_UDP | CSUM_INNER_IP6_UDP |
2698 		    CSUM_INNER_IP_TCP | CSUM_INNER_IP6_TCP)) {
2699 			counter_u64_add(sc->vxl_stats.txcsum, 1);
2700 			if (csum_flags & CSUM_INNER_TSO)
2701 				counter_u64_add(sc->vxl_stats.tso, 1);
2702 		}
2703 	} else if (ntohs(dstport) != V_zero_checksum_port) {
2704 		struct udphdr *hdr = mtodo(m, sizeof(struct ip6_hdr));
2705 
2706 		hdr->uh_sum = in6_cksum_pseudo(ip6,
2707 		    m->m_pkthdr.len - sizeof(struct ip6_hdr), IPPROTO_UDP, 0);
2708 		m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
2709 		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
2710 	}
2711 	error = ip6_output(m, NULL, ro, 0, sc->vxl_im6o, NULL, NULL);
2712 	if (error == 0) {
2713 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
2714 		if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
2715 		if (mcast != 0)
2716 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
2717 	} else
2718 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2719 
2720 	return (error);
2721 #else
2722 	m_freem(m);
2723 	return (ENOTSUP);
2724 #endif
2725 }
2726 
2727 static int
2728 vxlan_transmit(struct ifnet *ifp, struct mbuf *m)
2729 {
2730 	struct rm_priotracker tracker;
2731 	union vxlan_sockaddr vxlsa;
2732 	struct vxlan_softc *sc;
2733 	struct vxlan_ftable_entry *fe;
2734 	struct ifnet *mcifp;
2735 	struct ether_header *eh;
2736 	int ipv4, error;
2737 
2738 	sc = ifp->if_softc;
2739 	eh = mtod(m, struct ether_header *);
2740 	fe = NULL;
2741 	mcifp = NULL;
2742 
2743 	ETHER_BPF_MTAP(ifp, m);
2744 
2745 	VXLAN_RLOCK(sc, &tracker);
2746 	M_SETFIB(m, sc->vxl_fibnum);
2747 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2748 		VXLAN_RUNLOCK(sc, &tracker);
2749 		m_freem(m);
2750 		return (ENETDOWN);
2751 	}
2752 
2753 	if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
2754 		fe = vxlan_ftable_entry_lookup(sc, eh->ether_dhost);
2755 	if (fe == NULL)
2756 		fe = &sc->vxl_default_fe;
2757 	vxlan_sockaddr_copy(&vxlsa, &fe->vxlfe_raddr.sa);
2758 
2759 	ipv4 = VXLAN_SOCKADDR_IS_IPV4(&vxlsa) != 0;
2760 	if (vxlan_sockaddr_in_multicast(&vxlsa) != 0)
2761 		mcifp = vxlan_multicast_if_ref(sc, ipv4);
2762 
2763 	VXLAN_ACQUIRE(sc);
2764 	VXLAN_RUNLOCK(sc, &tracker);
2765 
2766 	if (ipv4 != 0)
2767 		error = vxlan_encap4(sc, &vxlsa, m);
2768 	else
2769 		error = vxlan_encap6(sc, &vxlsa, m);
2770 
2771 	vxlan_release(sc);
2772 	if (mcifp != NULL)
2773 		if_rele(mcifp);
2774 
2775 	return (error);
2776 }
2777 
2778 static void
2779 vxlan_qflush(struct ifnet *ifp __unused)
2780 {
2781 }
2782 
2783 static bool
2784 vxlan_rcv_udp_packet(struct mbuf *m, int offset, struct inpcb *inpcb,
2785     const struct sockaddr *srcsa, void *xvso)
2786 {
2787 	struct vxlan_socket *vso;
2788 	struct vxlan_header *vxh, vxlanhdr;
2789 	uint32_t vni;
2790 	int error __unused;
2791 
2792 	M_ASSERTPKTHDR(m);
2793 	vso = xvso;
2794 	offset += sizeof(struct udphdr);
2795 
2796 	if (m->m_pkthdr.len < offset + sizeof(struct vxlan_header))
2797 		goto out;
2798 
2799 	if (__predict_false(m->m_len < offset + sizeof(struct vxlan_header))) {
2800 		m_copydata(m, offset, sizeof(struct vxlan_header),
2801 		    (caddr_t) &vxlanhdr);
2802 		vxh = &vxlanhdr;
2803 	} else
2804 		vxh = mtodo(m, offset);
2805 
2806 	/*
2807 	 * Drop if there is a reserved bit set in either the flags or VNI
2808 	 * fields of the header. This goes against the specification, but
2809 	 * a bit set may indicate an unsupported new feature. This matches
2810 	 * the behavior of the Linux implementation.
2811 	 */
2812 	if (vxh->vxlh_flags != htonl(VXLAN_HDR_FLAGS_VALID_VNI) ||
2813 	    vxh->vxlh_vni & ~VXLAN_VNI_MASK)
2814 		goto out;
2815 
2816 	vni = ntohl(vxh->vxlh_vni) >> VXLAN_HDR_VNI_SHIFT;
2817 
2818 	/* Adjust to the start of the inner Ethernet frame. */
2819 	m_adj_decap(m, offset + sizeof(struct vxlan_header));
2820 
2821 	error = vxlan_input(vso, vni, &m, srcsa);
2822 	MPASS(error != 0 || m == NULL);
2823 
2824 out:
2825 	if (m != NULL)
2826 		m_freem(m);
2827 
2828 	return (true);
2829 }
2830 
2831 static int
2832 vxlan_input(struct vxlan_socket *vso, uint32_t vni, struct mbuf **m0,
2833     const struct sockaddr *sa)
2834 {
2835 	struct vxlan_softc *sc;
2836 	struct ifnet *ifp;
2837 	struct mbuf *m;
2838 	struct ether_header *eh;
2839 	int error;
2840 
2841 	m = *m0;
2842 
2843 	if (m->m_pkthdr.len < ETHER_HDR_LEN)
2844 		return (EINVAL);
2845 
2846 	sc = vxlan_socket_lookup_softc(vso, vni);
2847 	if (sc == NULL)
2848 		return (ENOENT);
2849 
2850 	ifp = sc->vxl_ifp;
2851 	if (m->m_len < ETHER_HDR_LEN &&
2852 	    (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) {
2853 		*m0 = NULL;
2854 		error = ENOBUFS;
2855 		goto out;
2856 	}
2857 	eh = mtod(m, struct ether_header *);
2858 
2859 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2860 		error = ENETDOWN;
2861 		goto out;
2862 	} else if (ifp == m->m_pkthdr.rcvif) {
2863 		/* XXX Does not catch more complex loops. */
2864 		error = EDEADLK;
2865 		goto out;
2866 	}
2867 
2868 	if (sc->vxl_flags & VXLAN_FLAG_LEARN)
2869 		vxlan_ftable_learn(sc, sa, eh->ether_shost);
2870 
2871 	m_clrprotoflags(m);
2872 	m->m_pkthdr.rcvif = ifp;
2873 	M_SETFIB(m, ifp->if_fib);
2874 	if (((ifp->if_capenable & IFCAP_RXCSUM &&
2875 	    m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC) ||
2876 	    (ifp->if_capenable & IFCAP_RXCSUM_IPV6 &&
2877 	    !(m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC)))) {
2878 		uint32_t csum_flags = 0;
2879 
2880 		if (m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC)
2881 			csum_flags |= CSUM_L3_CALC;
2882 		if (m->m_pkthdr.csum_flags & CSUM_INNER_L3_VALID)
2883 			csum_flags |= CSUM_L3_VALID;
2884 		if (m->m_pkthdr.csum_flags & CSUM_INNER_L4_CALC)
2885 			csum_flags |= CSUM_L4_CALC;
2886 		if (m->m_pkthdr.csum_flags & CSUM_INNER_L4_VALID)
2887 			csum_flags |= CSUM_L4_VALID;
2888 		m->m_pkthdr.csum_flags = csum_flags;
2889 		counter_u64_add(sc->vxl_stats.rxcsum, 1);
2890 	} else {
2891 		/* clear everything */
2892 		m->m_pkthdr.csum_flags = 0;
2893 		m->m_pkthdr.csum_data = 0;
2894 	}
2895 
2896 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
2897 	(*ifp->if_input)(ifp, m);
2898 	*m0 = NULL;
2899 	error = 0;
2900 
2901 out:
2902 	vxlan_release(sc);
2903 	return (error);
2904 }
2905 
2906 static int
2907 vxlan_stats_alloc(struct vxlan_softc *sc)
2908 {
2909 	struct vxlan_statistics *stats = &sc->vxl_stats;
2910 
2911 	stats->txcsum = counter_u64_alloc(M_WAITOK);
2912 	if (stats->txcsum == NULL)
2913 		goto failed;
2914 
2915 	stats->tso = counter_u64_alloc(M_WAITOK);
2916 	if (stats->tso == NULL)
2917 		goto failed;
2918 
2919 	stats->rxcsum = counter_u64_alloc(M_WAITOK);
2920 	if (stats->rxcsum == NULL)
2921 		goto failed;
2922 
2923 	return (0);
2924 failed:
2925 	vxlan_stats_free(sc);
2926 	return (ENOMEM);
2927 }
2928 
2929 static void
2930 vxlan_stats_free(struct vxlan_softc *sc)
2931 {
2932 	struct vxlan_statistics *stats = &sc->vxl_stats;
2933 
2934 	if (stats->txcsum != NULL) {
2935 		counter_u64_free(stats->txcsum);
2936 		stats->txcsum = NULL;
2937 	}
2938 	if (stats->tso != NULL) {
2939 		counter_u64_free(stats->tso);
2940 		stats->tso = NULL;
2941 	}
2942 	if (stats->rxcsum != NULL) {
2943 		counter_u64_free(stats->rxcsum);
2944 		stats->rxcsum = NULL;
2945 	}
2946 }
2947 
2948 static void
2949 vxlan_set_default_config(struct vxlan_softc *sc)
2950 {
2951 
2952 	sc->vxl_flags |= VXLAN_FLAG_LEARN;
2953 
2954 	sc->vxl_vni = VXLAN_VNI_MAX;
2955 	sc->vxl_ttl = IPDEFTTL;
2956 
2957 	if (!vxlan_tunable_int(sc, "legacy_port", vxlan_legacy_port)) {
2958 		sc->vxl_src_addr.in4.sin_port = htons(VXLAN_PORT);
2959 		sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_PORT);
2960 	} else {
2961 		sc->vxl_src_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT);
2962 		sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT);
2963 	}
2964 
2965 	sc->vxl_min_port = V_ipport_firstauto;
2966 	sc->vxl_max_port = V_ipport_lastauto;
2967 
2968 	sc->vxl_ftable_max = VXLAN_FTABLE_MAX;
2969 	sc->vxl_ftable_timeout = VXLAN_FTABLE_TIMEOUT;
2970 }
2971 
2972 static int
2973 vxlan_set_user_config(struct vxlan_softc *sc, struct ifvxlanparam *vxlp)
2974 {
2975 
2976 #ifndef INET
2977 	if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR4 |
2978 	    VXLAN_PARAM_WITH_REMOTE_ADDR4))
2979 		return (EAFNOSUPPORT);
2980 #endif
2981 
2982 #ifndef INET6
2983 	if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR6 |
2984 	    VXLAN_PARAM_WITH_REMOTE_ADDR6))
2985 		return (EAFNOSUPPORT);
2986 #else
2987 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) {
2988 		int error = vxlan_sockaddr_in6_embedscope(&vxlp->vxlp_local_sa);
2989 		if (error)
2990 			return (error);
2991 	}
2992 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) {
2993 		int error = vxlan_sockaddr_in6_embedscope(
2994 		   &vxlp->vxlp_remote_sa);
2995 		if (error)
2996 			return (error);
2997 	}
2998 #endif
2999 
3000 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_VNI) {
3001 		if (vxlan_check_vni(vxlp->vxlp_vni) == 0)
3002 			sc->vxl_vni = vxlp->vxlp_vni;
3003 	}
3004 
3005 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR4) {
3006 		sc->vxl_src_addr.in4.sin_len = sizeof(struct sockaddr_in);
3007 		sc->vxl_src_addr.in4.sin_family = AF_INET;
3008 		sc->vxl_src_addr.in4.sin_addr =
3009 		    vxlp->vxlp_local_sa.in4.sin_addr;
3010 	} else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) {
3011 		sc->vxl_src_addr.in6.sin6_len = sizeof(struct sockaddr_in6);
3012 		sc->vxl_src_addr.in6.sin6_family = AF_INET6;
3013 		sc->vxl_src_addr.in6.sin6_addr =
3014 		    vxlp->vxlp_local_sa.in6.sin6_addr;
3015 	}
3016 
3017 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR4) {
3018 		sc->vxl_dst_addr.in4.sin_len = sizeof(struct sockaddr_in);
3019 		sc->vxl_dst_addr.in4.sin_family = AF_INET;
3020 		sc->vxl_dst_addr.in4.sin_addr =
3021 		    vxlp->vxlp_remote_sa.in4.sin_addr;
3022 	} else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) {
3023 		sc->vxl_dst_addr.in6.sin6_len = sizeof(struct sockaddr_in6);
3024 		sc->vxl_dst_addr.in6.sin6_family = AF_INET6;
3025 		sc->vxl_dst_addr.in6.sin6_addr =
3026 		    vxlp->vxlp_remote_sa.in6.sin6_addr;
3027 	}
3028 
3029 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_PORT)
3030 		sc->vxl_src_addr.in4.sin_port = htons(vxlp->vxlp_local_port);
3031 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_PORT)
3032 		sc->vxl_dst_addr.in4.sin_port = htons(vxlp->vxlp_remote_port);
3033 
3034 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_PORT_RANGE) {
3035 		if (vxlp->vxlp_min_port <= vxlp->vxlp_max_port) {
3036 			sc->vxl_min_port = vxlp->vxlp_min_port;
3037 			sc->vxl_max_port = vxlp->vxlp_max_port;
3038 		}
3039 	}
3040 
3041 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_MULTICAST_IF)
3042 		strlcpy(sc->vxl_mc_ifname, vxlp->vxlp_mc_ifname, IFNAMSIZ);
3043 
3044 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_TIMEOUT) {
3045 		if (vxlan_check_ftable_timeout(vxlp->vxlp_ftable_timeout) == 0)
3046 			sc->vxl_ftable_timeout = vxlp->vxlp_ftable_timeout;
3047 	}
3048 
3049 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_MAX) {
3050 		if (vxlan_check_ftable_max(vxlp->vxlp_ftable_max) == 0)
3051 			sc->vxl_ftable_max = vxlp->vxlp_ftable_max;
3052 	}
3053 
3054 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_TTL) {
3055 		if (vxlan_check_ttl(vxlp->vxlp_ttl) == 0)
3056 			sc->vxl_ttl = vxlp->vxlp_ttl;
3057 	}
3058 
3059 	if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LEARN) {
3060 		if (vxlp->vxlp_learn == 0)
3061 			sc->vxl_flags &= ~VXLAN_FLAG_LEARN;
3062 	}
3063 
3064 	return (0);
3065 }
3066 
3067 static int
3068 vxlan_set_reqcap(struct vxlan_softc *sc, struct ifnet *ifp, int reqcap)
3069 {
3070 	int mask = reqcap ^ ifp->if_capenable;
3071 
3072 	/* Disable TSO if tx checksums are disabled. */
3073 	if (mask & IFCAP_TXCSUM && !(reqcap & IFCAP_TXCSUM) &&
3074 	    reqcap & IFCAP_TSO4) {
3075 		reqcap &= ~IFCAP_TSO4;
3076 		if_printf(ifp, "tso4 disabled due to -txcsum.\n");
3077 	}
3078 	if (mask & IFCAP_TXCSUM_IPV6 && !(reqcap & IFCAP_TXCSUM_IPV6) &&
3079 	    reqcap & IFCAP_TSO6) {
3080 		reqcap &= ~IFCAP_TSO6;
3081 		if_printf(ifp, "tso6 disabled due to -txcsum6.\n");
3082 	}
3083 
3084 	/* Do not enable TSO if tx checksums are disabled. */
3085 	if (mask & IFCAP_TSO4 && reqcap & IFCAP_TSO4 &&
3086 	    !(reqcap & IFCAP_TXCSUM)) {
3087 		if_printf(ifp, "enable txcsum first.\n");
3088 		return (EAGAIN);
3089 	}
3090 	if (mask & IFCAP_TSO6 && reqcap & IFCAP_TSO6 &&
3091 	    !(reqcap & IFCAP_TXCSUM_IPV6)) {
3092 		if_printf(ifp, "enable txcsum6 first.\n");
3093 		return (EAGAIN);
3094 	}
3095 
3096 	sc->vxl_reqcap = reqcap;
3097 	return (0);
3098 }
3099 
3100 /*
3101  * A VXLAN interface inherits the capabilities of the vxlandev or the interface
3102  * hosting the vxlanlocal address.
3103  */
3104 static void
3105 vxlan_set_hwcaps(struct vxlan_softc *sc)
3106 {
3107 	struct epoch_tracker et;
3108 	struct ifnet *p;
3109 	struct ifaddr *ifa;
3110 	u_long hwa;
3111 	int cap, ena;
3112 	bool rel;
3113 	struct ifnet *ifp = sc->vxl_ifp;
3114 
3115 	/* reset caps */
3116 	ifp->if_capabilities &= VXLAN_BASIC_IFCAPS;
3117 	ifp->if_capenable &= VXLAN_BASIC_IFCAPS;
3118 	ifp->if_hwassist = 0;
3119 
3120 	NET_EPOCH_ENTER(et);
3121 	CURVNET_SET(ifp->if_vnet);
3122 
3123 	rel = false;
3124 	p = NULL;
3125 	if (sc->vxl_mc_ifname[0] != '\0') {
3126 		rel = true;
3127 		p = ifunit_ref(sc->vxl_mc_ifname);
3128 	} else if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) {
3129 		if (sc->vxl_src_addr.sa.sa_family == AF_INET) {
3130 			struct sockaddr_in in4 = sc->vxl_src_addr.in4;
3131 
3132 			in4.sin_port = 0;
3133 			ifa = ifa_ifwithaddr((struct sockaddr *)&in4);
3134 			if (ifa != NULL)
3135 				p = ifa->ifa_ifp;
3136 		} else if (sc->vxl_src_addr.sa.sa_family == AF_INET6) {
3137 			struct sockaddr_in6 in6 = sc->vxl_src_addr.in6;
3138 
3139 			in6.sin6_port = 0;
3140 			ifa = ifa_ifwithaddr((struct sockaddr *)&in6);
3141 			if (ifa != NULL)
3142 				p = ifa->ifa_ifp;
3143 		}
3144 	}
3145 	if (p == NULL)
3146 		goto done;
3147 
3148 	cap = ena = hwa = 0;
3149 
3150 	/* checksum offload */
3151 	if (p->if_capabilities & IFCAP_VXLAN_HWCSUM)
3152 		cap |= p->if_capabilities & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6);
3153 	if (p->if_capenable & IFCAP_VXLAN_HWCSUM) {
3154 		ena |= sc->vxl_reqcap & p->if_capenable &
3155 		    (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6);
3156 		if (ena & IFCAP_TXCSUM) {
3157 			if (p->if_hwassist & CSUM_INNER_IP)
3158 				hwa |= CSUM_IP;
3159 			if (p->if_hwassist & CSUM_INNER_IP_UDP)
3160 				hwa |= CSUM_IP_UDP;
3161 			if (p->if_hwassist & CSUM_INNER_IP_TCP)
3162 				hwa |= CSUM_IP_TCP;
3163 		}
3164 		if (ena & IFCAP_TXCSUM_IPV6) {
3165 			if (p->if_hwassist & CSUM_INNER_IP6_UDP)
3166 				hwa |= CSUM_IP6_UDP;
3167 			if (p->if_hwassist & CSUM_INNER_IP6_TCP)
3168 				hwa |= CSUM_IP6_TCP;
3169 		}
3170 	}
3171 
3172 	/* hardware TSO */
3173 	if (p->if_capabilities & IFCAP_VXLAN_HWTSO) {
3174 		cap |= p->if_capabilities & IFCAP_TSO;
3175 		if (p->if_hw_tsomax > IP_MAXPACKET - ifp->if_hdrlen)
3176 			ifp->if_hw_tsomax = IP_MAXPACKET - ifp->if_hdrlen;
3177 		else
3178 			ifp->if_hw_tsomax = p->if_hw_tsomax;
3179 		/* XXX: tsomaxsegcount decrement is cxgbe specific  */
3180 		ifp->if_hw_tsomaxsegcount = p->if_hw_tsomaxsegcount - 1;
3181 		ifp->if_hw_tsomaxsegsize = p->if_hw_tsomaxsegsize;
3182 	}
3183 	if (p->if_capenable & IFCAP_VXLAN_HWTSO) {
3184 		ena |= sc->vxl_reqcap & p->if_capenable & IFCAP_TSO;
3185 		if (ena & IFCAP_TSO) {
3186 			if (p->if_hwassist & CSUM_INNER_IP_TSO)
3187 				hwa |= CSUM_IP_TSO;
3188 			if (p->if_hwassist & CSUM_INNER_IP6_TSO)
3189 				hwa |= CSUM_IP6_TSO;
3190 		}
3191 	}
3192 
3193 	ifp->if_capabilities |= cap;
3194 	ifp->if_capenable |= ena;
3195 	ifp->if_hwassist |= hwa;
3196 	if (rel)
3197 		if_rele(p);
3198 done:
3199 	CURVNET_RESTORE();
3200 	NET_EPOCH_EXIT(et);
3201 }
3202 
3203 static int
3204 vxlan_clone_create(struct if_clone *ifc, char *name, size_t len,
3205     struct ifc_data *ifd, struct ifnet **ifpp)
3206 {
3207 	struct vxlan_softc *sc;
3208 	struct ifnet *ifp;
3209 	struct ifvxlanparam vxlp;
3210 	int error;
3211 
3212 	sc = malloc(sizeof(struct vxlan_softc), M_VXLAN, M_WAITOK | M_ZERO);
3213 	sc->vxl_unit = ifd->unit;
3214 	sc->vxl_fibnum = curthread->td_proc->p_fibnum;
3215 	vxlan_set_default_config(sc);
3216 	error = vxlan_stats_alloc(sc);
3217 	if (error != 0)
3218 		goto fail;
3219 
3220 	if (ifd->params != NULL) {
3221 		error = ifc_copyin(ifd, &vxlp, sizeof(vxlp));
3222 		if (error)
3223 			goto fail;
3224 
3225 		error = vxlan_set_user_config(sc, &vxlp);
3226 		if (error)
3227 			goto fail;
3228 	}
3229 
3230 	ifp = if_alloc(IFT_ETHER);
3231 	if (ifp == NULL) {
3232 		error = ENOSPC;
3233 		goto fail;
3234 	}
3235 
3236 	sc->vxl_ifp = ifp;
3237 	rm_init(&sc->vxl_lock, "vxlanrm");
3238 	callout_init_rw(&sc->vxl_callout, &sc->vxl_lock, 0);
3239 	sc->vxl_port_hash_key = arc4random();
3240 	vxlan_ftable_init(sc);
3241 
3242 	vxlan_sysctl_setup(sc);
3243 
3244 	ifp->if_softc = sc;
3245 	if_initname(ifp, vxlan_name, ifd->unit);
3246 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3247 	ifp->if_init = vxlan_init;
3248 	ifp->if_ioctl = vxlan_ioctl;
3249 	ifp->if_transmit = vxlan_transmit;
3250 	ifp->if_qflush = vxlan_qflush;
3251 	ifp->if_capabilities = VXLAN_BASIC_IFCAPS;
3252 	ifp->if_capenable = VXLAN_BASIC_IFCAPS;
3253 	sc->vxl_reqcap = -1;
3254 	vxlan_set_hwcaps(sc);
3255 
3256 	ifmedia_init(&sc->vxl_media, 0, vxlan_media_change, vxlan_media_status);
3257 	ifmedia_add(&sc->vxl_media, IFM_ETHER | IFM_AUTO, 0, NULL);
3258 	ifmedia_set(&sc->vxl_media, IFM_ETHER | IFM_AUTO);
3259 
3260 	ether_gen_addr(ifp, &sc->vxl_hwaddr);
3261 	ether_ifattach(ifp, sc->vxl_hwaddr.octet);
3262 
3263 	ifp->if_baudrate = 0;
3264 
3265 	VXLAN_WLOCK(sc);
3266 	vxlan_setup_interface_hdrlen(sc);
3267 	VXLAN_WUNLOCK(sc);
3268 	*ifpp = ifp;
3269 
3270 	return (0);
3271 
3272 fail:
3273 	free(sc, M_VXLAN);
3274 	return (error);
3275 }
3276 
3277 static int
3278 vxlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags)
3279 {
3280 	struct vxlan_softc *sc;
3281 
3282 	sc = ifp->if_softc;
3283 
3284 	vxlan_teardown(sc);
3285 
3286 	vxlan_ftable_flush(sc, 1);
3287 
3288 	ether_ifdetach(ifp);
3289 	if_free(ifp);
3290 	ifmedia_removeall(&sc->vxl_media);
3291 
3292 	vxlan_ftable_fini(sc);
3293 
3294 	vxlan_sysctl_destroy(sc);
3295 	rm_destroy(&sc->vxl_lock);
3296 	vxlan_stats_free(sc);
3297 	free(sc, M_VXLAN);
3298 
3299 	return (0);
3300 }
3301 
3302 /* BMV: Taken from if_bridge. */
3303 static uint32_t
3304 vxlan_mac_hash(struct vxlan_softc *sc, const uint8_t *addr)
3305 {
3306 	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->vxl_ftable_hash_key;
3307 
3308 	b += addr[5] << 8;
3309 	b += addr[4];
3310 	a += addr[3] << 24;
3311 	a += addr[2] << 16;
3312 	a += addr[1] << 8;
3313 	a += addr[0];
3314 
3315 /*
3316  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
3317  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
3318  */
3319 #define	mix(a, b, c)							\
3320 do {									\
3321 	a -= b; a -= c; a ^= (c >> 13);					\
3322 	b -= c; b -= a; b ^= (a << 8);					\
3323 	c -= a; c -= b; c ^= (b >> 13);					\
3324 	a -= b; a -= c; a ^= (c >> 12);					\
3325 	b -= c; b -= a; b ^= (a << 16);					\
3326 	c -= a; c -= b; c ^= (b >> 5);					\
3327 	a -= b; a -= c; a ^= (c >> 3);					\
3328 	b -= c; b -= a; b ^= (a << 10);					\
3329 	c -= a; c -= b; c ^= (b >> 15);					\
3330 } while (0)
3331 
3332 	mix(a, b, c);
3333 
3334 #undef mix
3335 
3336 	return (c);
3337 }
3338 
3339 static int
3340 vxlan_media_change(struct ifnet *ifp)
3341 {
3342 
3343 	/* Ignore. */
3344 	return (0);
3345 }
3346 
3347 static void
3348 vxlan_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3349 {
3350 
3351 	ifmr->ifm_status = IFM_ACTIVE | IFM_AVALID;
3352 	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
3353 }
3354 
3355 static int
3356 vxlan_sockaddr_cmp(const union vxlan_sockaddr *vxladdr,
3357     const struct sockaddr *sa)
3358 {
3359 
3360 	return (bcmp(&vxladdr->sa, sa, vxladdr->sa.sa_len));
3361 }
3362 
3363 static void
3364 vxlan_sockaddr_copy(union vxlan_sockaddr *vxladdr,
3365     const struct sockaddr *sa)
3366 {
3367 
3368 	MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
3369 	bzero(vxladdr, sizeof(*vxladdr));
3370 
3371 	if (sa->sa_family == AF_INET) {
3372 		vxladdr->in4 = *satoconstsin(sa);
3373 		vxladdr->in4.sin_len = sizeof(struct sockaddr_in);
3374 	} else if (sa->sa_family == AF_INET6) {
3375 		vxladdr->in6 = *satoconstsin6(sa);
3376 		vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6);
3377 	}
3378 }
3379 
3380 static int
3381 vxlan_sockaddr_in_equal(const union vxlan_sockaddr *vxladdr,
3382     const struct sockaddr *sa)
3383 {
3384 	int equal;
3385 
3386 	if (sa->sa_family == AF_INET) {
3387 		const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
3388 		equal = in4->s_addr == vxladdr->in4.sin_addr.s_addr;
3389 	} else if (sa->sa_family == AF_INET6) {
3390 		const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
3391 		equal = IN6_ARE_ADDR_EQUAL(in6, &vxladdr->in6.sin6_addr);
3392 	} else
3393 		equal = 0;
3394 
3395 	return (equal);
3396 }
3397 
3398 static void
3399 vxlan_sockaddr_in_copy(union vxlan_sockaddr *vxladdr,
3400     const struct sockaddr *sa)
3401 {
3402 
3403 	MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
3404 
3405 	if (sa->sa_family == AF_INET) {
3406 		const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
3407 		vxladdr->in4.sin_family = AF_INET;
3408 		vxladdr->in4.sin_len = sizeof(struct sockaddr_in);
3409 		vxladdr->in4.sin_addr = *in4;
3410 	} else if (sa->sa_family == AF_INET6) {
3411 		const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
3412 		vxladdr->in6.sin6_family = AF_INET6;
3413 		vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6);
3414 		vxladdr->in6.sin6_addr = *in6;
3415 	}
3416 }
3417 
3418 static int
3419 vxlan_sockaddr_supported(const union vxlan_sockaddr *vxladdr, int unspec)
3420 {
3421 	const struct sockaddr *sa;
3422 	int supported;
3423 
3424 	sa = &vxladdr->sa;
3425 	supported = 0;
3426 
3427 	if (sa->sa_family == AF_UNSPEC && unspec != 0) {
3428 		supported = 1;
3429 	} else if (sa->sa_family == AF_INET) {
3430 #ifdef INET
3431 		supported = 1;
3432 #endif
3433 	} else if (sa->sa_family == AF_INET6) {
3434 #ifdef INET6
3435 		supported = 1;
3436 #endif
3437 	}
3438 
3439 	return (supported);
3440 }
3441 
3442 static int
3443 vxlan_sockaddr_in_any(const union vxlan_sockaddr *vxladdr)
3444 {
3445 	const struct sockaddr *sa;
3446 	int any;
3447 
3448 	sa = &vxladdr->sa;
3449 
3450 	if (sa->sa_family == AF_INET) {
3451 		const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
3452 		any = in4->s_addr == INADDR_ANY;
3453 	} else if (sa->sa_family == AF_INET6) {
3454 		const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
3455 		any = IN6_IS_ADDR_UNSPECIFIED(in6);
3456 	} else
3457 		any = -1;
3458 
3459 	return (any);
3460 }
3461 
3462 static int
3463 vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *vxladdr)
3464 {
3465 	const struct sockaddr *sa;
3466 	int mc;
3467 
3468 	sa = &vxladdr->sa;
3469 
3470 	if (sa->sa_family == AF_INET) {
3471 		const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
3472 		mc = IN_MULTICAST(ntohl(in4->s_addr));
3473 	} else if (sa->sa_family == AF_INET6) {
3474 		const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
3475 		mc = IN6_IS_ADDR_MULTICAST(in6);
3476 	} else
3477 		mc = -1;
3478 
3479 	return (mc);
3480 }
3481 
3482 static int
3483 vxlan_sockaddr_in6_embedscope(union vxlan_sockaddr *vxladdr)
3484 {
3485 	int error;
3486 
3487 	MPASS(VXLAN_SOCKADDR_IS_IPV6(vxladdr));
3488 #ifdef INET6
3489 	error = sa6_embedscope(&vxladdr->in6, V_ip6_use_defzone);
3490 #else
3491 	error = EAFNOSUPPORT;
3492 #endif
3493 
3494 	return (error);
3495 }
3496 
3497 static int
3498 vxlan_can_change_config(struct vxlan_softc *sc)
3499 {
3500 	struct ifnet *ifp;
3501 
3502 	ifp = sc->vxl_ifp;
3503 	VXLAN_LOCK_ASSERT(sc);
3504 
3505 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3506 		return (0);
3507 	if (sc->vxl_flags & (VXLAN_FLAG_INIT | VXLAN_FLAG_TEARDOWN))
3508 		return (0);
3509 
3510 	return (1);
3511 }
3512 
3513 static int
3514 vxlan_check_vni(uint32_t vni)
3515 {
3516 
3517 	return (vni >= VXLAN_VNI_MAX);
3518 }
3519 
3520 static int
3521 vxlan_check_ttl(int ttl)
3522 {
3523 
3524 	return (ttl > MAXTTL);
3525 }
3526 
3527 static int
3528 vxlan_check_ftable_timeout(uint32_t timeout)
3529 {
3530 
3531 	return (timeout > VXLAN_FTABLE_MAX_TIMEOUT);
3532 }
3533 
3534 static int
3535 vxlan_check_ftable_max(uint32_t max)
3536 {
3537 
3538 	return (max > VXLAN_FTABLE_MAX);
3539 }
3540 
3541 static void
3542 vxlan_sysctl_setup(struct vxlan_softc *sc)
3543 {
3544 	struct sysctl_ctx_list *ctx;
3545 	struct sysctl_oid *node;
3546 	struct vxlan_statistics *stats;
3547 	char namebuf[8];
3548 
3549 	ctx = &sc->vxl_sysctl_ctx;
3550 	stats = &sc->vxl_stats;
3551 	snprintf(namebuf, sizeof(namebuf), "%d", sc->vxl_unit);
3552 
3553 	sysctl_ctx_init(ctx);
3554 	sc->vxl_sysctl_node = SYSCTL_ADD_NODE(ctx,
3555 	    SYSCTL_STATIC_CHILDREN(_net_link_vxlan), OID_AUTO, namebuf,
3556 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
3557 
3558 	node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node),
3559 	    OID_AUTO, "ftable", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
3560 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "count",
3561 	    CTLFLAG_RD, &sc->vxl_ftable_cnt, 0,
3562 	    "Number of entries in forwarding table");
3563 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "max",
3564 	     CTLFLAG_RD, &sc->vxl_ftable_max, 0,
3565 	    "Maximum number of entries allowed in forwarding table");
3566 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "timeout",
3567 	    CTLFLAG_RD, &sc->vxl_ftable_timeout, 0,
3568 	    "Number of seconds between prunes of the forwarding table");
3569 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "dump",
3570 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_SKIP,
3571 	    sc, 0, vxlan_ftable_sysctl_dump, "A",
3572 	    "Dump the forwarding table entries");
3573 
3574 	node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node),
3575 	    OID_AUTO, "stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
3576 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
3577 	    "ftable_nospace", CTLFLAG_RD, &stats->ftable_nospace, 0,
3578 	    "Fowarding table reached maximum entries");
3579 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
3580 	    "ftable_lock_upgrade_failed", CTLFLAG_RD,
3581 	    &stats->ftable_lock_upgrade_failed, 0,
3582 	    "Forwarding table update required lock upgrade");
3583 
3584 	SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "txcsum",
3585 	    CTLFLAG_RD, &stats->txcsum,
3586 	    "# of times hardware assisted with tx checksum");
3587 	SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "tso",
3588 	    CTLFLAG_RD, &stats->tso, "# of times hardware assisted with TSO");
3589 	SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "rxcsum",
3590 	    CTLFLAG_RD, &stats->rxcsum,
3591 	    "# of times hardware assisted with rx checksum");
3592 }
3593 
3594 static void
3595 vxlan_sysctl_destroy(struct vxlan_softc *sc)
3596 {
3597 
3598 	sysctl_ctx_free(&sc->vxl_sysctl_ctx);
3599 	sc->vxl_sysctl_node = NULL;
3600 }
3601 
3602 static int
3603 vxlan_tunable_int(struct vxlan_softc *sc, const char *knob, int def)
3604 {
3605 	char path[64];
3606 
3607 	snprintf(path, sizeof(path), "net.link.vxlan.%d.%s",
3608 	    sc->vxl_unit, knob);
3609 	TUNABLE_INT_FETCH(path, &def);
3610 
3611 	return (def);
3612 }
3613 
3614 static void
3615 vxlan_ifdetach_event(void *arg __unused, struct ifnet *ifp)
3616 {
3617 	struct vxlan_softc_head list;
3618 	struct vxlan_socket *vso;
3619 	struct vxlan_softc *sc, *tsc;
3620 
3621 	LIST_INIT(&list);
3622 
3623 	if (ifp->if_flags & IFF_RENAMING)
3624 		return;
3625 	if ((ifp->if_flags & IFF_MULTICAST) == 0)
3626 		return;
3627 
3628 	VXLAN_LIST_LOCK();
3629 	LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry)
3630 		vxlan_socket_ifdetach(vso, ifp, &list);
3631 	VXLAN_LIST_UNLOCK();
3632 
3633 	LIST_FOREACH_SAFE(sc, &list, vxl_ifdetach_list, tsc) {
3634 		LIST_REMOVE(sc, vxl_ifdetach_list);
3635 
3636 		sx_xlock(&vxlan_sx);
3637 		VXLAN_WLOCK(sc);
3638 		if (sc->vxl_flags & VXLAN_FLAG_INIT)
3639 			vxlan_init_wait(sc);
3640 		vxlan_teardown_locked(sc);
3641 		sx_xunlock(&vxlan_sx);
3642 	}
3643 }
3644 
3645 static void
3646 vxlan_load(void)
3647 {
3648 
3649 	mtx_init(&vxlan_list_mtx, "vxlan list", NULL, MTX_DEF);
3650 	LIST_INIT(&vxlan_socket_list);
3651 	vxlan_ifdetach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
3652 	    vxlan_ifdetach_event, NULL, EVENTHANDLER_PRI_ANY);
3653 
3654 	struct if_clone_addreq req = {
3655 		.create_f = vxlan_clone_create,
3656 		.destroy_f = vxlan_clone_destroy,
3657 		.flags = IFC_F_AUTOUNIT,
3658 	};
3659 	vxlan_cloner = ifc_attach_cloner(vxlan_name, &req);
3660 }
3661 
3662 static void
3663 vxlan_unload(void)
3664 {
3665 
3666 	EVENTHANDLER_DEREGISTER(ifnet_departure_event,
3667 	    vxlan_ifdetach_event_tag);
3668 	ifc_detach_cloner(vxlan_cloner);
3669 	mtx_destroy(&vxlan_list_mtx);
3670 	MPASS(LIST_EMPTY(&vxlan_socket_list));
3671 }
3672 
3673 static int
3674 vxlan_modevent(module_t mod, int type, void *unused)
3675 {
3676 	int error;
3677 
3678 	error = 0;
3679 
3680 	switch (type) {
3681 	case MOD_LOAD:
3682 		vxlan_load();
3683 		break;
3684 	case MOD_UNLOAD:
3685 		vxlan_unload();
3686 		break;
3687 	default:
3688 		error = ENOTSUP;
3689 		break;
3690 	}
3691 
3692 	return (error);
3693 }
3694 
3695 static moduledata_t vxlan_mod = {
3696 	"if_vxlan",
3697 	vxlan_modevent,
3698 	0
3699 };
3700 
3701 DECLARE_MODULE(if_vxlan, vxlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
3702 MODULE_VERSION(if_vxlan, 1);
3703