1 /*-
2 * Copyright (c) 2014, Bryan Venteicher <bryanv@FreeBSD.org>
3 * All rights reserved.
4 * Copyright (c) 2020, Chelsio Communications.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice unmodified, this list of conditions, and the following
11 * disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include "opt_inet.h"
29 #include "opt_inet6.h"
30
31 #include <sys/param.h>
32 #include <sys/eventhandler.h>
33 #include <sys/kernel.h>
34 #include <sys/lock.h>
35 #include <sys/hash.h>
36 #include <sys/malloc.h>
37 #include <sys/mbuf.h>
38 #include <sys/module.h>
39 #include <sys/refcount.h>
40 #include <sys/rmlock.h>
41 #include <sys/priv.h>
42 #include <sys/proc.h>
43 #include <sys/queue.h>
44 #include <sys/sbuf.h>
45 #include <sys/socket.h>
46 #include <sys/socketvar.h>
47 #include <sys/sockio.h>
48 #include <sys/sysctl.h>
49 #include <sys/systm.h>
50
51 #include <net/bpf.h>
52 #include <net/ethernet.h>
53 #include <net/if.h>
54 #include <net/if_var.h>
55 #include <net/if_private.h>
56 #include <net/if_clone.h>
57 #include <net/if_dl.h>
58 #include <net/if_media.h>
59 #include <net/if_types.h>
60 #include <net/if_vxlan.h>
61 #include <net/netisr.h>
62 #include <net/route.h>
63 #include <net/route/nhop.h>
64
65 #include <netinet/in.h>
66 #include <netinet/in_systm.h>
67 #include <netinet/in_var.h>
68 #include <netinet/in_pcb.h>
69 #include <netinet/ip.h>
70 #include <netinet/ip6.h>
71 #include <netinet/ip_var.h>
72 #include <netinet/udp.h>
73 #include <netinet/udp_var.h>
74 #include <netinet/in_fib.h>
75 #include <netinet6/in6_fib.h>
76
77 #include <netinet6/ip6_var.h>
78 #include <netinet6/scope6_var.h>
79
80 struct vxlan_softc;
81 LIST_HEAD(vxlan_softc_head, vxlan_softc);
82
83 struct sx vxlan_sx;
84 SX_SYSINIT(vxlan, &vxlan_sx, "VXLAN global start/stop lock");
85
86 struct vxlan_socket_mc_info {
87 union vxlan_sockaddr vxlsomc_saddr;
88 union vxlan_sockaddr vxlsomc_gaddr;
89 int vxlsomc_ifidx;
90 int vxlsomc_users;
91 };
92
93 /*
94 * The maximum MTU of encapsulated ethernet frame within IPv4/UDP packet.
95 */
96 #define VXLAN_MAX_MTU (IP_MAXPACKET - \
97 60 /* Maximum IPv4 header len */ - \
98 sizeof(struct udphdr) - \
99 sizeof(struct vxlan_header) - \
100 ETHER_HDR_LEN - ETHER_VLAN_ENCAP_LEN)
101 #define VXLAN_BASIC_IFCAPS (IFCAP_LINKSTATE | IFCAP_JUMBO_MTU)
102
103 #define VXLAN_SO_MC_MAX_GROUPS 32
104
105 #define VXLAN_SO_VNI_HASH_SHIFT 6
106 #define VXLAN_SO_VNI_HASH_SIZE (1 << VXLAN_SO_VNI_HASH_SHIFT)
107 #define VXLAN_SO_VNI_HASH(_vni) ((_vni) % VXLAN_SO_VNI_HASH_SIZE)
108
109 struct vxlan_socket {
110 struct socket *vxlso_sock;
111 struct rmlock vxlso_lock;
112 u_int vxlso_refcnt;
113 union vxlan_sockaddr vxlso_laddr;
114 LIST_ENTRY(vxlan_socket) vxlso_entry;
115 struct vxlan_softc_head vxlso_vni_hash[VXLAN_SO_VNI_HASH_SIZE];
116 struct vxlan_socket_mc_info vxlso_mc[VXLAN_SO_MC_MAX_GROUPS];
117 };
118
119 #define VXLAN_SO_RLOCK(_vso, _p) rm_rlock(&(_vso)->vxlso_lock, (_p))
120 #define VXLAN_SO_RUNLOCK(_vso, _p) rm_runlock(&(_vso)->vxlso_lock, (_p))
121 #define VXLAN_SO_WLOCK(_vso) rm_wlock(&(_vso)->vxlso_lock)
122 #define VXLAN_SO_WUNLOCK(_vso) rm_wunlock(&(_vso)->vxlso_lock)
123 #define VXLAN_SO_LOCK_ASSERT(_vso) \
124 rm_assert(&(_vso)->vxlso_lock, RA_LOCKED)
125 #define VXLAN_SO_LOCK_WASSERT(_vso) \
126 rm_assert(&(_vso)->vxlso_lock, RA_WLOCKED)
127
128 #define VXLAN_SO_ACQUIRE(_vso) refcount_acquire(&(_vso)->vxlso_refcnt)
129 #define VXLAN_SO_RELEASE(_vso) refcount_release(&(_vso)->vxlso_refcnt)
130
131 struct vxlan_ftable_entry {
132 LIST_ENTRY(vxlan_ftable_entry) vxlfe_hash;
133 uint16_t vxlfe_flags;
134 uint8_t vxlfe_mac[ETHER_ADDR_LEN];
135 union vxlan_sockaddr vxlfe_raddr;
136 time_t vxlfe_expire;
137 };
138
139 #define VXLAN_FE_FLAG_DYNAMIC 0x01
140 #define VXLAN_FE_FLAG_STATIC 0x02
141
142 #define VXLAN_FE_IS_DYNAMIC(_fe) \
143 ((_fe)->vxlfe_flags & VXLAN_FE_FLAG_DYNAMIC)
144
145 #define VXLAN_SC_FTABLE_SHIFT 9
146 #define VXLAN_SC_FTABLE_SIZE (1 << VXLAN_SC_FTABLE_SHIFT)
147 #define VXLAN_SC_FTABLE_MASK (VXLAN_SC_FTABLE_SIZE - 1)
148 #define VXLAN_SC_FTABLE_HASH(_sc, _mac) \
149 (vxlan_mac_hash(_sc, _mac) % VXLAN_SC_FTABLE_SIZE)
150
151 LIST_HEAD(vxlan_ftable_head, vxlan_ftable_entry);
152
153 struct vxlan_statistics {
154 uint32_t ftable_nospace;
155 uint32_t ftable_lock_upgrade_failed;
156 counter_u64_t txcsum;
157 counter_u64_t tso;
158 counter_u64_t rxcsum;
159 };
160
161 struct vxlan_softc {
162 struct ifnet *vxl_ifp;
163 int vxl_reqcap;
164 u_int vxl_fibnum;
165 struct vxlan_socket *vxl_sock;
166 uint32_t vxl_vni;
167 union vxlan_sockaddr vxl_src_addr;
168 union vxlan_sockaddr vxl_dst_addr;
169 uint32_t vxl_flags;
170 #define VXLAN_FLAG_INIT 0x0001
171 #define VXLAN_FLAG_TEARDOWN 0x0002
172 #define VXLAN_FLAG_LEARN 0x0004
173 #define VXLAN_FLAG_USER_MTU 0x0008
174
175 uint32_t vxl_port_hash_key;
176 uint16_t vxl_min_port;
177 uint16_t vxl_max_port;
178 uint8_t vxl_ttl;
179
180 /* Lookup table from MAC address to forwarding entry. */
181 uint32_t vxl_ftable_cnt;
182 uint32_t vxl_ftable_max;
183 uint32_t vxl_ftable_timeout;
184 uint32_t vxl_ftable_hash_key;
185 struct vxlan_ftable_head *vxl_ftable;
186
187 /* Derived from vxl_dst_addr. */
188 struct vxlan_ftable_entry vxl_default_fe;
189
190 struct ip_moptions *vxl_im4o;
191 struct ip6_moptions *vxl_im6o;
192
193 struct rmlock vxl_lock;
194 volatile u_int vxl_refcnt;
195
196 int vxl_unit;
197 int vxl_vso_mc_index;
198 struct vxlan_statistics vxl_stats;
199 struct sysctl_oid *vxl_sysctl_node;
200 struct sysctl_ctx_list vxl_sysctl_ctx;
201 struct callout vxl_callout;
202 struct ether_addr vxl_hwaddr;
203 int vxl_mc_ifindex;
204 struct ifnet *vxl_mc_ifp;
205 struct ifmedia vxl_media;
206 char vxl_mc_ifname[IFNAMSIZ];
207 LIST_ENTRY(vxlan_softc) vxl_entry;
208 LIST_ENTRY(vxlan_softc) vxl_ifdetach_list;
209
210 /* For rate limiting errors on the tx fast path. */
211 struct timeval err_time;
212 int err_pps;
213 };
214
215 #define VXLAN_RLOCK(_sc, _p) rm_rlock(&(_sc)->vxl_lock, (_p))
216 #define VXLAN_RUNLOCK(_sc, _p) rm_runlock(&(_sc)->vxl_lock, (_p))
217 #define VXLAN_WLOCK(_sc) rm_wlock(&(_sc)->vxl_lock)
218 #define VXLAN_WUNLOCK(_sc) rm_wunlock(&(_sc)->vxl_lock)
219 #define VXLAN_LOCK_WOWNED(_sc) rm_wowned(&(_sc)->vxl_lock)
220 #define VXLAN_LOCK_ASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_LOCKED)
221 #define VXLAN_LOCK_WASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_WLOCKED)
222 #define VXLAN_UNLOCK(_sc, _p) do { \
223 if (VXLAN_LOCK_WOWNED(_sc)) \
224 VXLAN_WUNLOCK(_sc); \
225 else \
226 VXLAN_RUNLOCK(_sc, _p); \
227 } while (0)
228
229 #define VXLAN_ACQUIRE(_sc) refcount_acquire(&(_sc)->vxl_refcnt)
230 #define VXLAN_RELEASE(_sc) refcount_release(&(_sc)->vxl_refcnt)
231
232 #define satoconstsin(sa) ((const struct sockaddr_in *)(sa))
233 #define satoconstsin6(sa) ((const struct sockaddr_in6 *)(sa))
234
235 struct vxlanudphdr {
236 struct udphdr vxlh_udp;
237 struct vxlan_header vxlh_hdr;
238 } __packed;
239
240 static int vxlan_ftable_addr_cmp(const uint8_t *, const uint8_t *);
241 static void vxlan_ftable_init(struct vxlan_softc *);
242 static void vxlan_ftable_fini(struct vxlan_softc *);
243 static void vxlan_ftable_flush(struct vxlan_softc *, int);
244 static void vxlan_ftable_expire(struct vxlan_softc *);
245 static int vxlan_ftable_update_locked(struct vxlan_softc *,
246 const union vxlan_sockaddr *, const uint8_t *,
247 struct rm_priotracker *);
248 static int vxlan_ftable_learn(struct vxlan_softc *,
249 const struct sockaddr *, const uint8_t *);
250 static int vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS);
251
252 static struct vxlan_ftable_entry *
253 vxlan_ftable_entry_alloc(void);
254 static void vxlan_ftable_entry_free(struct vxlan_ftable_entry *);
255 static void vxlan_ftable_entry_init(struct vxlan_softc *,
256 struct vxlan_ftable_entry *, const uint8_t *,
257 const struct sockaddr *, uint32_t);
258 static void vxlan_ftable_entry_destroy(struct vxlan_softc *,
259 struct vxlan_ftable_entry *);
260 static int vxlan_ftable_entry_insert(struct vxlan_softc *,
261 struct vxlan_ftable_entry *);
262 static struct vxlan_ftable_entry *
263 vxlan_ftable_entry_lookup(struct vxlan_softc *,
264 const uint8_t *);
265 static void vxlan_ftable_entry_dump(struct vxlan_ftable_entry *,
266 struct sbuf *);
267
268 static struct vxlan_socket *
269 vxlan_socket_alloc(const union vxlan_sockaddr *);
270 static void vxlan_socket_destroy(struct vxlan_socket *);
271 static void vxlan_socket_release(struct vxlan_socket *);
272 static struct vxlan_socket *
273 vxlan_socket_lookup(union vxlan_sockaddr *vxlsa);
274 static void vxlan_socket_insert(struct vxlan_socket *);
275 static int vxlan_socket_init(struct vxlan_socket *, struct ifnet *);
276 static int vxlan_socket_bind(struct vxlan_socket *, struct ifnet *);
277 static int vxlan_socket_create(struct ifnet *, int,
278 const union vxlan_sockaddr *, struct vxlan_socket **);
279 static void vxlan_socket_ifdetach(struct vxlan_socket *,
280 struct ifnet *, struct vxlan_softc_head *);
281
282 static struct vxlan_socket *
283 vxlan_socket_mc_lookup(const union vxlan_sockaddr *);
284 static int vxlan_sockaddr_mc_info_match(
285 const struct vxlan_socket_mc_info *,
286 const union vxlan_sockaddr *,
287 const union vxlan_sockaddr *, int);
288 static int vxlan_socket_mc_join_group(struct vxlan_socket *,
289 const union vxlan_sockaddr *, const union vxlan_sockaddr *,
290 int *, union vxlan_sockaddr *);
291 static int vxlan_socket_mc_leave_group(struct vxlan_socket *,
292 const union vxlan_sockaddr *,
293 const union vxlan_sockaddr *, int);
294 static int vxlan_socket_mc_add_group(struct vxlan_socket *,
295 const union vxlan_sockaddr *, const union vxlan_sockaddr *,
296 int, int *);
297 static void vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *,
298 int);
299
300 static struct vxlan_softc *
301 vxlan_socket_lookup_softc_locked(struct vxlan_socket *,
302 uint32_t);
303 static struct vxlan_softc *
304 vxlan_socket_lookup_softc(struct vxlan_socket *, uint32_t);
305 static int vxlan_socket_insert_softc(struct vxlan_socket *,
306 struct vxlan_softc *);
307 static void vxlan_socket_remove_softc(struct vxlan_socket *,
308 struct vxlan_softc *);
309
310 static struct ifnet *
311 vxlan_multicast_if_ref(struct vxlan_softc *, int);
312 static void vxlan_free_multicast(struct vxlan_softc *);
313 static int vxlan_setup_multicast_interface(struct vxlan_softc *);
314
315 static int vxlan_setup_multicast(struct vxlan_softc *);
316 static int vxlan_setup_socket(struct vxlan_softc *);
317 #ifdef INET6
318 static void vxlan_setup_zero_checksum_port(struct vxlan_softc *);
319 #endif
320 static void vxlan_setup_interface_hdrlen(struct vxlan_softc *);
321 static int vxlan_valid_init_config(struct vxlan_softc *);
322 static void vxlan_init_wait(struct vxlan_softc *);
323 static void vxlan_init_complete(struct vxlan_softc *);
324 static void vxlan_init(void *);
325 static void vxlan_release(struct vxlan_softc *);
326 static void vxlan_teardown_wait(struct vxlan_softc *);
327 static void vxlan_teardown_complete(struct vxlan_softc *);
328 static void vxlan_teardown_locked(struct vxlan_softc *);
329 static void vxlan_teardown(struct vxlan_softc *);
330 static void vxlan_ifdetach(struct vxlan_softc *, struct ifnet *,
331 struct vxlan_softc_head *);
332 static void vxlan_timer(void *);
333
334 static int vxlan_ctrl_get_config(struct vxlan_softc *, void *);
335 static int vxlan_ctrl_set_vni(struct vxlan_softc *, void *);
336 static int vxlan_ctrl_set_local_addr(struct vxlan_softc *, void *);
337 static int vxlan_ctrl_set_remote_addr(struct vxlan_softc *, void *);
338 static int vxlan_ctrl_set_local_port(struct vxlan_softc *, void *);
339 static int vxlan_ctrl_set_remote_port(struct vxlan_softc *, void *);
340 static int vxlan_ctrl_set_port_range(struct vxlan_softc *, void *);
341 static int vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *, void *);
342 static int vxlan_ctrl_set_ftable_max(struct vxlan_softc *, void *);
343 static int vxlan_ctrl_set_multicast_if(struct vxlan_softc * , void *);
344 static int vxlan_ctrl_set_ttl(struct vxlan_softc *, void *);
345 static int vxlan_ctrl_set_learn(struct vxlan_softc *, void *);
346 static int vxlan_ctrl_ftable_entry_add(struct vxlan_softc *, void *);
347 static int vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *, void *);
348 static int vxlan_ctrl_flush(struct vxlan_softc *, void *);
349 static int vxlan_ioctl_drvspec(struct vxlan_softc *,
350 struct ifdrv *, int);
351 static int vxlan_ioctl_ifflags(struct vxlan_softc *);
352 static int vxlan_ioctl(struct ifnet *, u_long, caddr_t);
353
354 #if defined(INET) || defined(INET6)
355 static uint16_t vxlan_pick_source_port(struct vxlan_softc *, struct mbuf *);
356 static void vxlan_encap_header(struct vxlan_softc *, struct mbuf *,
357 int, uint16_t, uint16_t);
358 #endif
359 static int vxlan_encap4(struct vxlan_softc *,
360 const union vxlan_sockaddr *, struct mbuf *);
361 static int vxlan_encap6(struct vxlan_softc *,
362 const union vxlan_sockaddr *, struct mbuf *);
363 static int vxlan_transmit(struct ifnet *, struct mbuf *);
364 static void vxlan_qflush(struct ifnet *);
365 static bool vxlan_rcv_udp_packet(struct mbuf *, int, struct inpcb *,
366 const struct sockaddr *, void *);
367 static int vxlan_input(struct vxlan_socket *, uint32_t, struct mbuf **,
368 const struct sockaddr *);
369
370 static void vxlan_stats_alloc(struct vxlan_softc *);
371 static void vxlan_stats_free(struct vxlan_softc *);
372 static void vxlan_set_default_config(struct vxlan_softc *);
373 static int vxlan_set_user_config(struct vxlan_softc *,
374 struct ifvxlanparam *);
375 static int vxlan_set_reqcap(struct vxlan_softc *, struct ifnet *, int);
376 static void vxlan_set_hwcaps(struct vxlan_softc *);
377 static int vxlan_clone_create(struct if_clone *, char *, size_t,
378 struct ifc_data *, struct ifnet **);
379 static int vxlan_clone_destroy(struct if_clone *, struct ifnet *, uint32_t);
380
381 static uint32_t vxlan_mac_hash(struct vxlan_softc *, const uint8_t *);
382 static int vxlan_media_change(struct ifnet *);
383 static void vxlan_media_status(struct ifnet *, struct ifmediareq *);
384
385 static int vxlan_sockaddr_cmp(const union vxlan_sockaddr *,
386 const struct sockaddr *);
387 static void vxlan_sockaddr_copy(union vxlan_sockaddr *,
388 const struct sockaddr *);
389 static int vxlan_sockaddr_in_equal(const union vxlan_sockaddr *,
390 const struct sockaddr *);
391 static void vxlan_sockaddr_in_copy(union vxlan_sockaddr *,
392 const struct sockaddr *);
393 static int vxlan_sockaddr_supported(const union vxlan_sockaddr *, int);
394 static int vxlan_sockaddr_in_any(const union vxlan_sockaddr *);
395 static int vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *);
396 static int vxlan_sockaddr_in6_embedscope(union vxlan_sockaddr *);
397
398 static int vxlan_can_change_config(struct vxlan_softc *);
399 static int vxlan_check_vni(uint32_t);
400 static int vxlan_check_ttl(int);
401 static int vxlan_check_ftable_timeout(uint32_t);
402 static int vxlan_check_ftable_max(uint32_t);
403
404 static void vxlan_sysctl_setup(struct vxlan_softc *);
405 static void vxlan_sysctl_destroy(struct vxlan_softc *);
406 static int vxlan_tunable_int(struct vxlan_softc *, const char *, int);
407
408 static void vxlan_ifdetach_event(void *, struct ifnet *);
409 static void vxlan_load(void);
410 static void vxlan_unload(void);
411 static int vxlan_modevent(module_t, int, void *);
412
413 static const char vxlan_name[] = "vxlan";
414 static MALLOC_DEFINE(M_VXLAN, vxlan_name,
415 "Virtual eXtensible LAN Interface");
416 static struct if_clone *vxlan_cloner;
417
418 static struct mtx vxlan_list_mtx;
419 #define VXLAN_LIST_LOCK() mtx_lock(&vxlan_list_mtx)
420 #define VXLAN_LIST_UNLOCK() mtx_unlock(&vxlan_list_mtx)
421
422 static LIST_HEAD(, vxlan_socket) vxlan_socket_list =
423 LIST_HEAD_INITIALIZER(vxlan_socket_list);
424
425 static eventhandler_tag vxlan_ifdetach_event_tag;
426
427 SYSCTL_DECL(_net_link);
428 SYSCTL_NODE(_net_link, OID_AUTO, vxlan, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
429 "Virtual eXtensible Local Area Network");
430
431 static int vxlan_legacy_port = 0;
432 SYSCTL_INT(_net_link_vxlan, OID_AUTO, legacy_port, CTLFLAG_RDTUN,
433 &vxlan_legacy_port, 0, "Use legacy port");
434 static int vxlan_reuse_port = 0;
435 SYSCTL_INT(_net_link_vxlan, OID_AUTO, reuse_port, CTLFLAG_RDTUN,
436 &vxlan_reuse_port, 0, "Re-use port");
437
438 /*
439 * This macro controls the default upper limitation on nesting of vxlan
440 * tunnels. By default it is 3, as the overhead of IPv6 vxlan tunnel is 70
441 * bytes, this will create at most 210 bytes overhead and the most inner
442 * tunnel's MTU will be 1290 which will meet IPv6 minimum MTU size 1280.
443 * Be careful to configure the tunnels when raising the limit. A large
444 * number of nested tunnels can introduce system crash.
445 */
446 #ifndef MAX_VXLAN_NEST
447 #define MAX_VXLAN_NEST 3
448 #endif
449 static int max_vxlan_nesting = MAX_VXLAN_NEST;
450 SYSCTL_INT(_net_link_vxlan, OID_AUTO, max_nesting, CTLFLAG_RW,
451 &max_vxlan_nesting, 0, "Max nested tunnels");
452
453 /* Default maximum number of addresses in the forwarding table. */
454 #ifndef VXLAN_FTABLE_MAX
455 #define VXLAN_FTABLE_MAX 2000
456 #endif
457
458 /* Timeout (in seconds) of addresses learned in the forwarding table. */
459 #ifndef VXLAN_FTABLE_TIMEOUT
460 #define VXLAN_FTABLE_TIMEOUT (20 * 60)
461 #endif
462
463 /*
464 * Maximum timeout (in seconds) of addresses learned in the forwarding
465 * table.
466 */
467 #ifndef VXLAN_FTABLE_MAX_TIMEOUT
468 #define VXLAN_FTABLE_MAX_TIMEOUT (60 * 60 * 24)
469 #endif
470
471 /* Number of seconds between pruning attempts of the forwarding table. */
472 #ifndef VXLAN_FTABLE_PRUNE
473 #define VXLAN_FTABLE_PRUNE (5 * 60)
474 #endif
475
476 static int vxlan_ftable_prune_period = VXLAN_FTABLE_PRUNE;
477
478 struct vxlan_control {
479 int (*vxlc_func)(struct vxlan_softc *, void *);
480 int vxlc_argsize;
481 int vxlc_flags;
482 #define VXLAN_CTRL_FLAG_COPYIN 0x01
483 #define VXLAN_CTRL_FLAG_COPYOUT 0x02
484 #define VXLAN_CTRL_FLAG_SUSER 0x04
485 };
486
487 static const struct vxlan_control vxlan_control_table[] = {
488 [VXLAN_CMD_GET_CONFIG] =
489 { vxlan_ctrl_get_config, sizeof(struct ifvxlancfg),
490 VXLAN_CTRL_FLAG_COPYOUT
491 },
492
493 [VXLAN_CMD_SET_VNI] =
494 { vxlan_ctrl_set_vni, sizeof(struct ifvxlancmd),
495 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
496 },
497
498 [VXLAN_CMD_SET_LOCAL_ADDR] =
499 { vxlan_ctrl_set_local_addr, sizeof(struct ifvxlancmd),
500 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
501 },
502
503 [VXLAN_CMD_SET_REMOTE_ADDR] =
504 { vxlan_ctrl_set_remote_addr, sizeof(struct ifvxlancmd),
505 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
506 },
507
508 [VXLAN_CMD_SET_LOCAL_PORT] =
509 { vxlan_ctrl_set_local_port, sizeof(struct ifvxlancmd),
510 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
511 },
512
513 [VXLAN_CMD_SET_REMOTE_PORT] =
514 { vxlan_ctrl_set_remote_port, sizeof(struct ifvxlancmd),
515 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
516 },
517
518 [VXLAN_CMD_SET_PORT_RANGE] =
519 { vxlan_ctrl_set_port_range, sizeof(struct ifvxlancmd),
520 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
521 },
522
523 [VXLAN_CMD_SET_FTABLE_TIMEOUT] =
524 { vxlan_ctrl_set_ftable_timeout, sizeof(struct ifvxlancmd),
525 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
526 },
527
528 [VXLAN_CMD_SET_FTABLE_MAX] =
529 { vxlan_ctrl_set_ftable_max, sizeof(struct ifvxlancmd),
530 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
531 },
532
533 [VXLAN_CMD_SET_MULTICAST_IF] =
534 { vxlan_ctrl_set_multicast_if, sizeof(struct ifvxlancmd),
535 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
536 },
537
538 [VXLAN_CMD_SET_TTL] =
539 { vxlan_ctrl_set_ttl, sizeof(struct ifvxlancmd),
540 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
541 },
542
543 [VXLAN_CMD_SET_LEARN] =
544 { vxlan_ctrl_set_learn, sizeof(struct ifvxlancmd),
545 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
546 },
547
548 [VXLAN_CMD_FTABLE_ENTRY_ADD] =
549 { vxlan_ctrl_ftable_entry_add, sizeof(struct ifvxlancmd),
550 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
551 },
552
553 [VXLAN_CMD_FTABLE_ENTRY_REM] =
554 { vxlan_ctrl_ftable_entry_rem, sizeof(struct ifvxlancmd),
555 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
556 },
557
558 [VXLAN_CMD_FLUSH] =
559 { vxlan_ctrl_flush, sizeof(struct ifvxlancmd),
560 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
561 },
562 };
563
564 static const int vxlan_control_table_size = nitems(vxlan_control_table);
565
566 static int
vxlan_ftable_addr_cmp(const uint8_t * a,const uint8_t * b)567 vxlan_ftable_addr_cmp(const uint8_t *a, const uint8_t *b)
568 {
569 int i, d;
570
571 for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++)
572 d = ((int)a[i]) - ((int)b[i]);
573
574 return (d);
575 }
576
577 static void
vxlan_ftable_init(struct vxlan_softc * sc)578 vxlan_ftable_init(struct vxlan_softc *sc)
579 {
580 int i;
581
582 sc->vxl_ftable = malloc(sizeof(struct vxlan_ftable_head) *
583 VXLAN_SC_FTABLE_SIZE, M_VXLAN, M_ZERO | M_WAITOK);
584
585 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++)
586 LIST_INIT(&sc->vxl_ftable[i]);
587 sc->vxl_ftable_hash_key = arc4random();
588 }
589
590 static void
vxlan_ftable_fini(struct vxlan_softc * sc)591 vxlan_ftable_fini(struct vxlan_softc *sc)
592 {
593 int i;
594
595 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
596 KASSERT(LIST_EMPTY(&sc->vxl_ftable[i]),
597 ("%s: vxlan %p ftable[%d] not empty", __func__, sc, i));
598 }
599 MPASS(sc->vxl_ftable_cnt == 0);
600
601 free(sc->vxl_ftable, M_VXLAN);
602 sc->vxl_ftable = NULL;
603 }
604
605 static void
vxlan_ftable_flush(struct vxlan_softc * sc,int all)606 vxlan_ftable_flush(struct vxlan_softc *sc, int all)
607 {
608 struct vxlan_ftable_entry *fe, *tfe;
609 int i;
610
611 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
612 LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) {
613 if (all || VXLAN_FE_IS_DYNAMIC(fe))
614 vxlan_ftable_entry_destroy(sc, fe);
615 }
616 }
617 }
618
619 static void
vxlan_ftable_expire(struct vxlan_softc * sc)620 vxlan_ftable_expire(struct vxlan_softc *sc)
621 {
622 struct vxlan_ftable_entry *fe, *tfe;
623 int i;
624
625 VXLAN_LOCK_WASSERT(sc);
626
627 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
628 LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) {
629 if (VXLAN_FE_IS_DYNAMIC(fe) &&
630 time_uptime >= fe->vxlfe_expire)
631 vxlan_ftable_entry_destroy(sc, fe);
632 }
633 }
634 }
635
636 static int
vxlan_ftable_update_locked(struct vxlan_softc * sc,const union vxlan_sockaddr * vxlsa,const uint8_t * mac,struct rm_priotracker * tracker)637 vxlan_ftable_update_locked(struct vxlan_softc *sc,
638 const union vxlan_sockaddr *vxlsa, const uint8_t *mac,
639 struct rm_priotracker *tracker)
640 {
641 struct vxlan_ftable_entry *fe;
642 int error __unused;
643
644 VXLAN_LOCK_ASSERT(sc);
645
646 again:
647 /*
648 * A forwarding entry for this MAC address might already exist. If
649 * so, update it, otherwise create a new one. We may have to upgrade
650 * the lock if we have to change or create an entry.
651 */
652 fe = vxlan_ftable_entry_lookup(sc, mac);
653 if (fe != NULL) {
654 fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout;
655
656 if (!VXLAN_FE_IS_DYNAMIC(fe) ||
657 vxlan_sockaddr_in_equal(&fe->vxlfe_raddr, &vxlsa->sa))
658 return (0);
659 if (!VXLAN_LOCK_WOWNED(sc)) {
660 VXLAN_RUNLOCK(sc, tracker);
661 VXLAN_WLOCK(sc);
662 sc->vxl_stats.ftable_lock_upgrade_failed++;
663 goto again;
664 }
665 vxlan_sockaddr_in_copy(&fe->vxlfe_raddr, &vxlsa->sa);
666 return (0);
667 }
668
669 if (!VXLAN_LOCK_WOWNED(sc)) {
670 VXLAN_RUNLOCK(sc, tracker);
671 VXLAN_WLOCK(sc);
672 sc->vxl_stats.ftable_lock_upgrade_failed++;
673 goto again;
674 }
675
676 if (sc->vxl_ftable_cnt >= sc->vxl_ftable_max) {
677 sc->vxl_stats.ftable_nospace++;
678 return (ENOSPC);
679 }
680
681 fe = vxlan_ftable_entry_alloc();
682 if (fe == NULL)
683 return (ENOMEM);
684
685 vxlan_ftable_entry_init(sc, fe, mac, &vxlsa->sa, VXLAN_FE_FLAG_DYNAMIC);
686
687 /* The prior lookup failed, so the insert should not. */
688 error = vxlan_ftable_entry_insert(sc, fe);
689 MPASS(error == 0);
690
691 return (0);
692 }
693
694 static int
vxlan_ftable_learn(struct vxlan_softc * sc,const struct sockaddr * sa,const uint8_t * mac)695 vxlan_ftable_learn(struct vxlan_softc *sc, const struct sockaddr *sa,
696 const uint8_t *mac)
697 {
698 struct rm_priotracker tracker;
699 union vxlan_sockaddr vxlsa;
700 int error;
701
702 /*
703 * The source port may be randomly selected by the remote host, so
704 * use the port of the default destination address.
705 */
706 vxlan_sockaddr_copy(&vxlsa, sa);
707 vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port;
708
709 if (VXLAN_SOCKADDR_IS_IPV6(&vxlsa)) {
710 error = vxlan_sockaddr_in6_embedscope(&vxlsa);
711 if (error)
712 return (error);
713 }
714
715 VXLAN_RLOCK(sc, &tracker);
716 error = vxlan_ftable_update_locked(sc, &vxlsa, mac, &tracker);
717 VXLAN_UNLOCK(sc, &tracker);
718
719 return (error);
720 }
721
722 static int
vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS)723 vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS)
724 {
725 struct rm_priotracker tracker;
726 struct sbuf sb;
727 struct vxlan_softc *sc;
728 struct vxlan_ftable_entry *fe;
729 size_t size;
730 int i, error;
731
732 /*
733 * This is mostly intended for debugging during development. It is
734 * not practical to dump an entire large table this way.
735 */
736
737 sc = arg1;
738 size = PAGE_SIZE; /* Calculate later. */
739
740 sbuf_new(&sb, NULL, size, SBUF_FIXEDLEN);
741 sbuf_putc(&sb, '\n');
742
743 VXLAN_RLOCK(sc, &tracker);
744 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
745 LIST_FOREACH(fe, &sc->vxl_ftable[i], vxlfe_hash) {
746 if (sbuf_error(&sb) != 0)
747 break;
748 vxlan_ftable_entry_dump(fe, &sb);
749 }
750 }
751 VXLAN_RUNLOCK(sc, &tracker);
752
753 if (sbuf_len(&sb) == 1)
754 sbuf_setpos(&sb, 0);
755
756 sbuf_finish(&sb);
757 error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
758 sbuf_delete(&sb);
759
760 return (error);
761 }
762
763 static struct vxlan_ftable_entry *
vxlan_ftable_entry_alloc(void)764 vxlan_ftable_entry_alloc(void)
765 {
766 struct vxlan_ftable_entry *fe;
767
768 fe = malloc(sizeof(*fe), M_VXLAN, M_ZERO | M_NOWAIT);
769
770 return (fe);
771 }
772
773 static void
vxlan_ftable_entry_free(struct vxlan_ftable_entry * fe)774 vxlan_ftable_entry_free(struct vxlan_ftable_entry *fe)
775 {
776
777 free(fe, M_VXLAN);
778 }
779
780 static void
vxlan_ftable_entry_init(struct vxlan_softc * sc,struct vxlan_ftable_entry * fe,const uint8_t * mac,const struct sockaddr * sa,uint32_t flags)781 vxlan_ftable_entry_init(struct vxlan_softc *sc, struct vxlan_ftable_entry *fe,
782 const uint8_t *mac, const struct sockaddr *sa, uint32_t flags)
783 {
784
785 fe->vxlfe_flags = flags;
786 fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout;
787 memcpy(fe->vxlfe_mac, mac, ETHER_ADDR_LEN);
788 vxlan_sockaddr_copy(&fe->vxlfe_raddr, sa);
789 }
790
791 static void
vxlan_ftable_entry_destroy(struct vxlan_softc * sc,struct vxlan_ftable_entry * fe)792 vxlan_ftable_entry_destroy(struct vxlan_softc *sc,
793 struct vxlan_ftable_entry *fe)
794 {
795
796 sc->vxl_ftable_cnt--;
797 LIST_REMOVE(fe, vxlfe_hash);
798 vxlan_ftable_entry_free(fe);
799 }
800
801 static int
vxlan_ftable_entry_insert(struct vxlan_softc * sc,struct vxlan_ftable_entry * fe)802 vxlan_ftable_entry_insert(struct vxlan_softc *sc,
803 struct vxlan_ftable_entry *fe)
804 {
805 struct vxlan_ftable_entry *lfe;
806 uint32_t hash;
807 int dir;
808
809 VXLAN_LOCK_WASSERT(sc);
810 hash = VXLAN_SC_FTABLE_HASH(sc, fe->vxlfe_mac);
811
812 lfe = LIST_FIRST(&sc->vxl_ftable[hash]);
813 if (lfe == NULL) {
814 LIST_INSERT_HEAD(&sc->vxl_ftable[hash], fe, vxlfe_hash);
815 goto out;
816 }
817
818 do {
819 dir = vxlan_ftable_addr_cmp(fe->vxlfe_mac, lfe->vxlfe_mac);
820 if (dir == 0)
821 return (EEXIST);
822 if (dir > 0) {
823 LIST_INSERT_BEFORE(lfe, fe, vxlfe_hash);
824 goto out;
825 } else if (LIST_NEXT(lfe, vxlfe_hash) == NULL) {
826 LIST_INSERT_AFTER(lfe, fe, vxlfe_hash);
827 goto out;
828 } else
829 lfe = LIST_NEXT(lfe, vxlfe_hash);
830 } while (lfe != NULL);
831
832 out:
833 sc->vxl_ftable_cnt++;
834
835 return (0);
836 }
837
838 static struct vxlan_ftable_entry *
vxlan_ftable_entry_lookup(struct vxlan_softc * sc,const uint8_t * mac)839 vxlan_ftable_entry_lookup(struct vxlan_softc *sc, const uint8_t *mac)
840 {
841 struct vxlan_ftable_entry *fe;
842 uint32_t hash;
843 int dir;
844
845 VXLAN_LOCK_ASSERT(sc);
846 hash = VXLAN_SC_FTABLE_HASH(sc, mac);
847
848 LIST_FOREACH(fe, &sc->vxl_ftable[hash], vxlfe_hash) {
849 dir = vxlan_ftable_addr_cmp(mac, fe->vxlfe_mac);
850 if (dir == 0)
851 return (fe);
852 if (dir > 0)
853 break;
854 }
855
856 return (NULL);
857 }
858
859 static void
vxlan_ftable_entry_dump(struct vxlan_ftable_entry * fe,struct sbuf * sb)860 vxlan_ftable_entry_dump(struct vxlan_ftable_entry *fe, struct sbuf *sb)
861 {
862 char buf[64];
863 const union vxlan_sockaddr *sa;
864 const void *addr;
865 int i, len, af, width;
866
867 sa = &fe->vxlfe_raddr;
868 af = sa->sa.sa_family;
869 len = sbuf_len(sb);
870
871 sbuf_printf(sb, "%c 0x%02X ", VXLAN_FE_IS_DYNAMIC(fe) ? 'D' : 'S',
872 fe->vxlfe_flags);
873
874 for (i = 0; i < ETHER_ADDR_LEN - 1; i++)
875 sbuf_printf(sb, "%02X:", fe->vxlfe_mac[i]);
876 sbuf_printf(sb, "%02X ", fe->vxlfe_mac[i]);
877
878 if (af == AF_INET) {
879 addr = &sa->in4.sin_addr;
880 width = INET_ADDRSTRLEN - 1;
881 } else {
882 addr = &sa->in6.sin6_addr;
883 width = INET6_ADDRSTRLEN - 1;
884 }
885 inet_ntop(af, addr, buf, sizeof(buf));
886 sbuf_printf(sb, "%*s ", width, buf);
887
888 sbuf_printf(sb, "%08jd", (intmax_t)fe->vxlfe_expire);
889
890 sbuf_putc(sb, '\n');
891
892 /* Truncate a partial line. */
893 if (sbuf_error(sb) != 0)
894 sbuf_setpos(sb, len);
895 }
896
897 static struct vxlan_socket *
vxlan_socket_alloc(const union vxlan_sockaddr * sa)898 vxlan_socket_alloc(const union vxlan_sockaddr *sa)
899 {
900 struct vxlan_socket *vso;
901 int i;
902
903 vso = malloc(sizeof(*vso), M_VXLAN, M_WAITOK | M_ZERO);
904 rm_init(&vso->vxlso_lock, "vxlansorm");
905 refcount_init(&vso->vxlso_refcnt, 0);
906 for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++)
907 LIST_INIT(&vso->vxlso_vni_hash[i]);
908 vso->vxlso_laddr = *sa;
909
910 return (vso);
911 }
912
913 static void
vxlan_socket_destroy(struct vxlan_socket * vso)914 vxlan_socket_destroy(struct vxlan_socket *vso)
915 {
916 struct socket *so;
917 #ifdef INVARIANTS
918 int i;
919 struct vxlan_socket_mc_info *mc;
920
921 for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
922 mc = &vso->vxlso_mc[i];
923 KASSERT(mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC,
924 ("%s: socket %p mc[%d] still has address",
925 __func__, vso, i));
926 }
927
928 for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) {
929 KASSERT(LIST_EMPTY(&vso->vxlso_vni_hash[i]),
930 ("%s: socket %p vni_hash[%d] not empty",
931 __func__, vso, i));
932 }
933 #endif
934 so = vso->vxlso_sock;
935 if (so != NULL) {
936 vso->vxlso_sock = NULL;
937 soclose(so);
938 }
939
940 rm_destroy(&vso->vxlso_lock);
941 free(vso, M_VXLAN);
942 }
943
944 static void
vxlan_socket_release(struct vxlan_socket * vso)945 vxlan_socket_release(struct vxlan_socket *vso)
946 {
947 int destroy;
948
949 VXLAN_LIST_LOCK();
950 destroy = VXLAN_SO_RELEASE(vso);
951 if (destroy != 0)
952 LIST_REMOVE(vso, vxlso_entry);
953 VXLAN_LIST_UNLOCK();
954
955 if (destroy != 0)
956 vxlan_socket_destroy(vso);
957 }
958
959 static struct vxlan_socket *
vxlan_socket_lookup(union vxlan_sockaddr * vxlsa)960 vxlan_socket_lookup(union vxlan_sockaddr *vxlsa)
961 {
962 struct vxlan_socket *vso;
963
964 VXLAN_LIST_LOCK();
965 LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry) {
966 if (vxlan_sockaddr_cmp(&vso->vxlso_laddr, &vxlsa->sa) == 0) {
967 VXLAN_SO_ACQUIRE(vso);
968 break;
969 }
970 }
971 VXLAN_LIST_UNLOCK();
972
973 return (vso);
974 }
975
976 static void
vxlan_socket_insert(struct vxlan_socket * vso)977 vxlan_socket_insert(struct vxlan_socket *vso)
978 {
979
980 VXLAN_LIST_LOCK();
981 VXLAN_SO_ACQUIRE(vso);
982 LIST_INSERT_HEAD(&vxlan_socket_list, vso, vxlso_entry);
983 VXLAN_LIST_UNLOCK();
984 }
985
986 static int
vxlan_socket_init(struct vxlan_socket * vso,struct ifnet * ifp)987 vxlan_socket_init(struct vxlan_socket *vso, struct ifnet *ifp)
988 {
989 struct thread *td;
990 int error;
991
992 td = curthread;
993
994 error = socreate(vso->vxlso_laddr.sa.sa_family, &vso->vxlso_sock,
995 SOCK_DGRAM, IPPROTO_UDP, td->td_ucred, td);
996 if (error) {
997 if_printf(ifp, "cannot create socket: %d\n", error);
998 return (error);
999 }
1000
1001 error = udp_set_kernel_tunneling(vso->vxlso_sock,
1002 vxlan_rcv_udp_packet, NULL, vso);
1003 if (error) {
1004 if_printf(ifp, "cannot set tunneling function: %d\n", error);
1005 return (error);
1006 }
1007
1008 if (vxlan_reuse_port != 0) {
1009 struct sockopt sopt;
1010 int val = 1;
1011
1012 bzero(&sopt, sizeof(sopt));
1013 sopt.sopt_dir = SOPT_SET;
1014 sopt.sopt_level = IPPROTO_IP;
1015 sopt.sopt_name = SO_REUSEPORT;
1016 sopt.sopt_val = &val;
1017 sopt.sopt_valsize = sizeof(val);
1018 error = sosetopt(vso->vxlso_sock, &sopt);
1019 if (error) {
1020 if_printf(ifp,
1021 "cannot set REUSEADDR socket opt: %d\n", error);
1022 return (error);
1023 }
1024 }
1025
1026 return (0);
1027 }
1028
1029 static int
vxlan_socket_bind(struct vxlan_socket * vso,struct ifnet * ifp)1030 vxlan_socket_bind(struct vxlan_socket *vso, struct ifnet *ifp)
1031 {
1032 union vxlan_sockaddr laddr;
1033 struct thread *td;
1034 int error;
1035
1036 td = curthread;
1037 laddr = vso->vxlso_laddr;
1038
1039 error = sobind(vso->vxlso_sock, &laddr.sa, td);
1040 if (error) {
1041 if (error != EADDRINUSE)
1042 if_printf(ifp, "cannot bind socket: %d\n", error);
1043 return (error);
1044 }
1045
1046 return (0);
1047 }
1048
1049 static int
vxlan_socket_create(struct ifnet * ifp,int multicast,const union vxlan_sockaddr * saddr,struct vxlan_socket ** vsop)1050 vxlan_socket_create(struct ifnet *ifp, int multicast,
1051 const union vxlan_sockaddr *saddr, struct vxlan_socket **vsop)
1052 {
1053 union vxlan_sockaddr laddr;
1054 struct vxlan_socket *vso;
1055 int error;
1056
1057 laddr = *saddr;
1058
1059 /*
1060 * If this socket will be multicast, then only the local port
1061 * must be specified when binding.
1062 */
1063 if (multicast != 0) {
1064 if (VXLAN_SOCKADDR_IS_IPV4(&laddr))
1065 laddr.in4.sin_addr.s_addr = INADDR_ANY;
1066 #ifdef INET6
1067 else
1068 laddr.in6.sin6_addr = in6addr_any;
1069 #endif
1070 }
1071
1072 vso = vxlan_socket_alloc(&laddr);
1073 if (vso == NULL)
1074 return (ENOMEM);
1075
1076 error = vxlan_socket_init(vso, ifp);
1077 if (error)
1078 goto fail;
1079
1080 error = vxlan_socket_bind(vso, ifp);
1081 if (error)
1082 goto fail;
1083
1084 /*
1085 * There is a small window between the bind completing and
1086 * inserting the socket, so that a concurrent create may fail.
1087 * Let's not worry about that for now.
1088 */
1089 vxlan_socket_insert(vso);
1090 *vsop = vso;
1091
1092 return (0);
1093
1094 fail:
1095 vxlan_socket_destroy(vso);
1096
1097 return (error);
1098 }
1099
1100 static void
vxlan_socket_ifdetach(struct vxlan_socket * vso,struct ifnet * ifp,struct vxlan_softc_head * list)1101 vxlan_socket_ifdetach(struct vxlan_socket *vso, struct ifnet *ifp,
1102 struct vxlan_softc_head *list)
1103 {
1104 struct rm_priotracker tracker;
1105 struct vxlan_softc *sc;
1106 int i;
1107
1108 VXLAN_SO_RLOCK(vso, &tracker);
1109 for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) {
1110 LIST_FOREACH(sc, &vso->vxlso_vni_hash[i], vxl_entry)
1111 vxlan_ifdetach(sc, ifp, list);
1112 }
1113 VXLAN_SO_RUNLOCK(vso, &tracker);
1114 }
1115
1116 static struct vxlan_socket *
vxlan_socket_mc_lookup(const union vxlan_sockaddr * vxlsa)1117 vxlan_socket_mc_lookup(const union vxlan_sockaddr *vxlsa)
1118 {
1119 union vxlan_sockaddr laddr;
1120 struct vxlan_socket *vso;
1121
1122 laddr = *vxlsa;
1123
1124 if (VXLAN_SOCKADDR_IS_IPV4(&laddr))
1125 laddr.in4.sin_addr.s_addr = INADDR_ANY;
1126 #ifdef INET6
1127 else
1128 laddr.in6.sin6_addr = in6addr_any;
1129 #endif
1130
1131 vso = vxlan_socket_lookup(&laddr);
1132
1133 return (vso);
1134 }
1135
1136 static int
vxlan_sockaddr_mc_info_match(const struct vxlan_socket_mc_info * mc,const union vxlan_sockaddr * group,const union vxlan_sockaddr * local,int ifidx)1137 vxlan_sockaddr_mc_info_match(const struct vxlan_socket_mc_info *mc,
1138 const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
1139 int ifidx)
1140 {
1141
1142 if (!vxlan_sockaddr_in_any(local) &&
1143 !vxlan_sockaddr_in_equal(&mc->vxlsomc_saddr, &local->sa))
1144 return (0);
1145 if (!vxlan_sockaddr_in_equal(&mc->vxlsomc_gaddr, &group->sa))
1146 return (0);
1147 if (ifidx != 0 && ifidx != mc->vxlsomc_ifidx)
1148 return (0);
1149
1150 return (1);
1151 }
1152
1153 static int
vxlan_socket_mc_join_group(struct vxlan_socket * vso,const union vxlan_sockaddr * group,const union vxlan_sockaddr * local,int * ifidx,union vxlan_sockaddr * source)1154 vxlan_socket_mc_join_group(struct vxlan_socket *vso,
1155 const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
1156 int *ifidx, union vxlan_sockaddr *source)
1157 {
1158 struct sockopt sopt;
1159 int error;
1160
1161 *source = *local;
1162
1163 if (VXLAN_SOCKADDR_IS_IPV4(group)) {
1164 struct ip_mreq mreq;
1165
1166 mreq.imr_multiaddr = group->in4.sin_addr;
1167 mreq.imr_interface = local->in4.sin_addr;
1168
1169 bzero(&sopt, sizeof(sopt));
1170 sopt.sopt_dir = SOPT_SET;
1171 sopt.sopt_level = IPPROTO_IP;
1172 sopt.sopt_name = IP_ADD_MEMBERSHIP;
1173 sopt.sopt_val = &mreq;
1174 sopt.sopt_valsize = sizeof(mreq);
1175 error = sosetopt(vso->vxlso_sock, &sopt);
1176 if (error)
1177 return (error);
1178
1179 /*
1180 * BMV: Ideally, there would be a formal way for us to get
1181 * the local interface that was selected based on the
1182 * imr_interface address. We could then update *ifidx so
1183 * vxlan_sockaddr_mc_info_match() would return a match for
1184 * later creates that explicitly set the multicast interface.
1185 *
1186 * If we really need to, we can of course look in the INP's
1187 * membership list:
1188 * sotoinpcb(vso->vxlso_sock)->inp_moptions->
1189 * imo_head[]->imf_inm->inm_ifp
1190 * similarly to imo_match_group().
1191 */
1192 source->in4.sin_addr = local->in4.sin_addr;
1193
1194 } else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
1195 struct ipv6_mreq mreq;
1196
1197 mreq.ipv6mr_multiaddr = group->in6.sin6_addr;
1198 mreq.ipv6mr_interface = *ifidx;
1199
1200 bzero(&sopt, sizeof(sopt));
1201 sopt.sopt_dir = SOPT_SET;
1202 sopt.sopt_level = IPPROTO_IPV6;
1203 sopt.sopt_name = IPV6_JOIN_GROUP;
1204 sopt.sopt_val = &mreq;
1205 sopt.sopt_valsize = sizeof(mreq);
1206 error = sosetopt(vso->vxlso_sock, &sopt);
1207 if (error)
1208 return (error);
1209
1210 /*
1211 * BMV: As with IPv4, we would really like to know what
1212 * interface in6p_lookup_mcast_ifp() selected.
1213 */
1214 } else
1215 error = EAFNOSUPPORT;
1216
1217 return (error);
1218 }
1219
1220 static int
vxlan_socket_mc_leave_group(struct vxlan_socket * vso,const union vxlan_sockaddr * group,const union vxlan_sockaddr * source,int ifidx)1221 vxlan_socket_mc_leave_group(struct vxlan_socket *vso,
1222 const union vxlan_sockaddr *group, const union vxlan_sockaddr *source,
1223 int ifidx)
1224 {
1225 struct sockopt sopt;
1226 int error;
1227
1228 bzero(&sopt, sizeof(sopt));
1229 sopt.sopt_dir = SOPT_SET;
1230
1231 if (VXLAN_SOCKADDR_IS_IPV4(group)) {
1232 struct ip_mreq mreq;
1233
1234 mreq.imr_multiaddr = group->in4.sin_addr;
1235 mreq.imr_interface = source->in4.sin_addr;
1236
1237 sopt.sopt_level = IPPROTO_IP;
1238 sopt.sopt_name = IP_DROP_MEMBERSHIP;
1239 sopt.sopt_val = &mreq;
1240 sopt.sopt_valsize = sizeof(mreq);
1241 error = sosetopt(vso->vxlso_sock, &sopt);
1242
1243 } else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
1244 struct ipv6_mreq mreq;
1245
1246 mreq.ipv6mr_multiaddr = group->in6.sin6_addr;
1247 mreq.ipv6mr_interface = ifidx;
1248
1249 sopt.sopt_level = IPPROTO_IPV6;
1250 sopt.sopt_name = IPV6_LEAVE_GROUP;
1251 sopt.sopt_val = &mreq;
1252 sopt.sopt_valsize = sizeof(mreq);
1253 error = sosetopt(vso->vxlso_sock, &sopt);
1254
1255 } else
1256 error = EAFNOSUPPORT;
1257
1258 return (error);
1259 }
1260
1261 static int
vxlan_socket_mc_add_group(struct vxlan_socket * vso,const union vxlan_sockaddr * group,const union vxlan_sockaddr * local,int ifidx,int * idx)1262 vxlan_socket_mc_add_group(struct vxlan_socket *vso,
1263 const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
1264 int ifidx, int *idx)
1265 {
1266 union vxlan_sockaddr source;
1267 struct vxlan_socket_mc_info *mc;
1268 int i, empty, error;
1269
1270 /*
1271 * Within a socket, the same multicast group may be used by multiple
1272 * interfaces, each with a different network identifier. But a socket
1273 * may only join a multicast group once, so keep track of the users
1274 * here.
1275 */
1276
1277 VXLAN_SO_WLOCK(vso);
1278 for (empty = 0, i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
1279 mc = &vso->vxlso_mc[i];
1280
1281 if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) {
1282 empty++;
1283 continue;
1284 }
1285
1286 if (vxlan_sockaddr_mc_info_match(mc, group, local, ifidx))
1287 goto out;
1288 }
1289 VXLAN_SO_WUNLOCK(vso);
1290
1291 if (empty == 0)
1292 return (ENOSPC);
1293
1294 error = vxlan_socket_mc_join_group(vso, group, local, &ifidx, &source);
1295 if (error)
1296 return (error);
1297
1298 VXLAN_SO_WLOCK(vso);
1299 for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
1300 mc = &vso->vxlso_mc[i];
1301
1302 if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) {
1303 vxlan_sockaddr_copy(&mc->vxlsomc_gaddr, &group->sa);
1304 vxlan_sockaddr_copy(&mc->vxlsomc_saddr, &source.sa);
1305 mc->vxlsomc_ifidx = ifidx;
1306 goto out;
1307 }
1308 }
1309 VXLAN_SO_WUNLOCK(vso);
1310
1311 error = vxlan_socket_mc_leave_group(vso, group, &source, ifidx);
1312 MPASS(error == 0);
1313
1314 return (ENOSPC);
1315
1316 out:
1317 mc->vxlsomc_users++;
1318 VXLAN_SO_WUNLOCK(vso);
1319
1320 *idx = i;
1321
1322 return (0);
1323 }
1324
1325 static void
vxlan_socket_mc_release_group_by_idx(struct vxlan_socket * vso,int idx)1326 vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *vso, int idx)
1327 {
1328 union vxlan_sockaddr group, source;
1329 struct vxlan_socket_mc_info *mc;
1330 int ifidx, leave;
1331
1332 KASSERT(idx >= 0 && idx < VXLAN_SO_MC_MAX_GROUPS,
1333 ("%s: vso %p idx %d out of bounds", __func__, vso, idx));
1334
1335 leave = 0;
1336 mc = &vso->vxlso_mc[idx];
1337
1338 VXLAN_SO_WLOCK(vso);
1339 mc->vxlsomc_users--;
1340 if (mc->vxlsomc_users == 0) {
1341 group = mc->vxlsomc_gaddr;
1342 source = mc->vxlsomc_saddr;
1343 ifidx = mc->vxlsomc_ifidx;
1344 bzero(mc, sizeof(*mc));
1345 leave = 1;
1346 }
1347 VXLAN_SO_WUNLOCK(vso);
1348
1349 if (leave != 0) {
1350 /*
1351 * Our socket's membership in this group may have already
1352 * been removed if we joined through an interface that's
1353 * been detached.
1354 */
1355 vxlan_socket_mc_leave_group(vso, &group, &source, ifidx);
1356 }
1357 }
1358
1359 static struct vxlan_softc *
vxlan_socket_lookup_softc_locked(struct vxlan_socket * vso,uint32_t vni)1360 vxlan_socket_lookup_softc_locked(struct vxlan_socket *vso, uint32_t vni)
1361 {
1362 struct vxlan_softc *sc;
1363 uint32_t hash;
1364
1365 VXLAN_SO_LOCK_ASSERT(vso);
1366 hash = VXLAN_SO_VNI_HASH(vni);
1367
1368 LIST_FOREACH(sc, &vso->vxlso_vni_hash[hash], vxl_entry) {
1369 if (sc->vxl_vni == vni) {
1370 VXLAN_ACQUIRE(sc);
1371 break;
1372 }
1373 }
1374
1375 return (sc);
1376 }
1377
1378 static struct vxlan_softc *
vxlan_socket_lookup_softc(struct vxlan_socket * vso,uint32_t vni)1379 vxlan_socket_lookup_softc(struct vxlan_socket *vso, uint32_t vni)
1380 {
1381 struct rm_priotracker tracker;
1382 struct vxlan_softc *sc;
1383
1384 VXLAN_SO_RLOCK(vso, &tracker);
1385 sc = vxlan_socket_lookup_softc_locked(vso, vni);
1386 VXLAN_SO_RUNLOCK(vso, &tracker);
1387
1388 return (sc);
1389 }
1390
1391 static int
vxlan_socket_insert_softc(struct vxlan_socket * vso,struct vxlan_softc * sc)1392 vxlan_socket_insert_softc(struct vxlan_socket *vso, struct vxlan_softc *sc)
1393 {
1394 struct vxlan_softc *tsc;
1395 uint32_t vni, hash;
1396
1397 vni = sc->vxl_vni;
1398 hash = VXLAN_SO_VNI_HASH(vni);
1399
1400 VXLAN_SO_WLOCK(vso);
1401 tsc = vxlan_socket_lookup_softc_locked(vso, vni);
1402 if (tsc != NULL) {
1403 VXLAN_SO_WUNLOCK(vso);
1404 vxlan_release(tsc);
1405 return (EEXIST);
1406 }
1407
1408 VXLAN_ACQUIRE(sc);
1409 LIST_INSERT_HEAD(&vso->vxlso_vni_hash[hash], sc, vxl_entry);
1410 VXLAN_SO_WUNLOCK(vso);
1411
1412 return (0);
1413 }
1414
1415 static void
vxlan_socket_remove_softc(struct vxlan_socket * vso,struct vxlan_softc * sc)1416 vxlan_socket_remove_softc(struct vxlan_socket *vso, struct vxlan_softc *sc)
1417 {
1418
1419 VXLAN_SO_WLOCK(vso);
1420 LIST_REMOVE(sc, vxl_entry);
1421 VXLAN_SO_WUNLOCK(vso);
1422
1423 vxlan_release(sc);
1424 }
1425
1426 static struct ifnet *
vxlan_multicast_if_ref(struct vxlan_softc * sc,int ipv4)1427 vxlan_multicast_if_ref(struct vxlan_softc *sc, int ipv4)
1428 {
1429 struct ifnet *ifp;
1430
1431 VXLAN_LOCK_ASSERT(sc);
1432
1433 if (ipv4 && sc->vxl_im4o != NULL)
1434 ifp = sc->vxl_im4o->imo_multicast_ifp;
1435 else if (!ipv4 && sc->vxl_im6o != NULL)
1436 ifp = sc->vxl_im6o->im6o_multicast_ifp;
1437 else
1438 ifp = NULL;
1439
1440 if (ifp != NULL)
1441 if_ref(ifp);
1442
1443 return (ifp);
1444 }
1445
1446 static void
vxlan_free_multicast(struct vxlan_softc * sc)1447 vxlan_free_multicast(struct vxlan_softc *sc)
1448 {
1449
1450 if (sc->vxl_mc_ifp != NULL) {
1451 if_rele(sc->vxl_mc_ifp);
1452 sc->vxl_mc_ifp = NULL;
1453 sc->vxl_mc_ifindex = 0;
1454 }
1455
1456 if (sc->vxl_im4o != NULL) {
1457 free(sc->vxl_im4o, M_VXLAN);
1458 sc->vxl_im4o = NULL;
1459 }
1460
1461 if (sc->vxl_im6o != NULL) {
1462 free(sc->vxl_im6o, M_VXLAN);
1463 sc->vxl_im6o = NULL;
1464 }
1465 }
1466
1467 static int
vxlan_setup_multicast_interface(struct vxlan_softc * sc)1468 vxlan_setup_multicast_interface(struct vxlan_softc *sc)
1469 {
1470 struct ifnet *ifp;
1471
1472 ifp = ifunit_ref(sc->vxl_mc_ifname);
1473 if (ifp == NULL) {
1474 if_printf(sc->vxl_ifp, "multicast interface %s does "
1475 "not exist\n", sc->vxl_mc_ifname);
1476 return (ENOENT);
1477 }
1478
1479 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
1480 if_printf(sc->vxl_ifp, "interface %s does not support "
1481 "multicast\n", sc->vxl_mc_ifname);
1482 if_rele(ifp);
1483 return (ENOTSUP);
1484 }
1485
1486 sc->vxl_mc_ifp = ifp;
1487 sc->vxl_mc_ifindex = ifp->if_index;
1488
1489 return (0);
1490 }
1491
1492 static int
vxlan_setup_multicast(struct vxlan_softc * sc)1493 vxlan_setup_multicast(struct vxlan_softc *sc)
1494 {
1495 const union vxlan_sockaddr *group;
1496 int error;
1497
1498 group = &sc->vxl_dst_addr;
1499 error = 0;
1500
1501 if (sc->vxl_mc_ifname[0] != '\0') {
1502 error = vxlan_setup_multicast_interface(sc);
1503 if (error)
1504 return (error);
1505 }
1506
1507 /*
1508 * Initialize an multicast options structure that is sufficiently
1509 * populated for use in the respective IP output routine. This
1510 * structure is typically stored in the socket, but our sockets
1511 * may be shared among multiple interfaces.
1512 */
1513 if (VXLAN_SOCKADDR_IS_IPV4(group)) {
1514 sc->vxl_im4o = malloc(sizeof(struct ip_moptions), M_VXLAN,
1515 M_ZERO | M_WAITOK);
1516 sc->vxl_im4o->imo_multicast_ifp = sc->vxl_mc_ifp;
1517 sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl;
1518 sc->vxl_im4o->imo_multicast_vif = -1;
1519 } else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
1520 sc->vxl_im6o = malloc(sizeof(struct ip6_moptions), M_VXLAN,
1521 M_ZERO | M_WAITOK);
1522 sc->vxl_im6o->im6o_multicast_ifp = sc->vxl_mc_ifp;
1523 sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl;
1524 }
1525
1526 return (error);
1527 }
1528
1529 static int
vxlan_setup_socket(struct vxlan_softc * sc)1530 vxlan_setup_socket(struct vxlan_softc *sc)
1531 {
1532 struct vxlan_socket *vso;
1533 struct ifnet *ifp;
1534 union vxlan_sockaddr *saddr, *daddr;
1535 int multicast, error;
1536
1537 vso = NULL;
1538 ifp = sc->vxl_ifp;
1539 saddr = &sc->vxl_src_addr;
1540 daddr = &sc->vxl_dst_addr;
1541
1542 multicast = vxlan_sockaddr_in_multicast(daddr);
1543 MPASS(multicast != -1);
1544 sc->vxl_vso_mc_index = -1;
1545
1546 /*
1547 * Try to create the socket. If that fails, attempt to use an
1548 * existing socket.
1549 */
1550 error = vxlan_socket_create(ifp, multicast, saddr, &vso);
1551 if (error) {
1552 if (multicast != 0)
1553 vso = vxlan_socket_mc_lookup(saddr);
1554 else
1555 vso = vxlan_socket_lookup(saddr);
1556
1557 if (vso == NULL) {
1558 if_printf(ifp, "cannot create socket (error: %d), "
1559 "and no existing socket found\n", error);
1560 goto out;
1561 }
1562 }
1563
1564 if (multicast != 0) {
1565 error = vxlan_setup_multicast(sc);
1566 if (error)
1567 goto out;
1568
1569 error = vxlan_socket_mc_add_group(vso, daddr, saddr,
1570 sc->vxl_mc_ifindex, &sc->vxl_vso_mc_index);
1571 if (error)
1572 goto out;
1573 }
1574
1575 sc->vxl_sock = vso;
1576 error = vxlan_socket_insert_softc(vso, sc);
1577 if (error) {
1578 sc->vxl_sock = NULL;
1579 if_printf(ifp, "network identifier %d already exists in "
1580 "this socket\n", sc->vxl_vni);
1581 goto out;
1582 }
1583
1584 return (0);
1585
1586 out:
1587 if (vso != NULL) {
1588 if (sc->vxl_vso_mc_index != -1) {
1589 vxlan_socket_mc_release_group_by_idx(vso,
1590 sc->vxl_vso_mc_index);
1591 sc->vxl_vso_mc_index = -1;
1592 }
1593 if (multicast != 0)
1594 vxlan_free_multicast(sc);
1595 vxlan_socket_release(vso);
1596 }
1597
1598 return (error);
1599 }
1600
1601 #ifdef INET6
1602 static void
vxlan_setup_zero_checksum_port(struct vxlan_softc * sc)1603 vxlan_setup_zero_checksum_port(struct vxlan_softc *sc)
1604 {
1605
1606 if (!VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_src_addr))
1607 return;
1608
1609 MPASS(sc->vxl_src_addr.in6.sin6_port != 0);
1610 MPASS(sc->vxl_dst_addr.in6.sin6_port != 0);
1611
1612 if (sc->vxl_src_addr.in6.sin6_port != sc->vxl_dst_addr.in6.sin6_port) {
1613 if_printf(sc->vxl_ifp, "port %d in src address does not match "
1614 "port %d in dst address, rfc6935_port (%d) not updated.\n",
1615 ntohs(sc->vxl_src_addr.in6.sin6_port),
1616 ntohs(sc->vxl_dst_addr.in6.sin6_port),
1617 V_zero_checksum_port);
1618 return;
1619 }
1620
1621 if (V_zero_checksum_port != 0) {
1622 if (V_zero_checksum_port !=
1623 ntohs(sc->vxl_src_addr.in6.sin6_port)) {
1624 if_printf(sc->vxl_ifp, "rfc6935_port is already set to "
1625 "%d, cannot set it to %d.\n", V_zero_checksum_port,
1626 ntohs(sc->vxl_src_addr.in6.sin6_port));
1627 }
1628 return;
1629 }
1630
1631 V_zero_checksum_port = ntohs(sc->vxl_src_addr.in6.sin6_port);
1632 if_printf(sc->vxl_ifp, "rfc6935_port set to %d\n",
1633 V_zero_checksum_port);
1634 }
1635 #endif
1636
1637 static void
vxlan_setup_interface_hdrlen(struct vxlan_softc * sc)1638 vxlan_setup_interface_hdrlen(struct vxlan_softc *sc)
1639 {
1640 struct ifnet *ifp;
1641
1642 VXLAN_LOCK_WASSERT(sc);
1643
1644 ifp = sc->vxl_ifp;
1645 ifp->if_hdrlen = ETHER_HDR_LEN + sizeof(struct vxlanudphdr);
1646
1647 if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr) != 0)
1648 ifp->if_hdrlen += sizeof(struct ip);
1649 else if (VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_dst_addr) != 0)
1650 ifp->if_hdrlen += sizeof(struct ip6_hdr);
1651
1652 if ((sc->vxl_flags & VXLAN_FLAG_USER_MTU) == 0)
1653 ifp->if_mtu = ETHERMTU - ifp->if_hdrlen;
1654 }
1655
1656 static int
vxlan_valid_init_config(struct vxlan_softc * sc)1657 vxlan_valid_init_config(struct vxlan_softc *sc)
1658 {
1659 const char *reason;
1660
1661 if (vxlan_check_vni(sc->vxl_vni) != 0) {
1662 reason = "invalid virtual network identifier specified";
1663 goto fail;
1664 }
1665
1666 if (vxlan_sockaddr_supported(&sc->vxl_src_addr, 1) == 0) {
1667 reason = "source address type is not supported";
1668 goto fail;
1669 }
1670
1671 if (vxlan_sockaddr_supported(&sc->vxl_dst_addr, 0) == 0) {
1672 reason = "destination address type is not supported";
1673 goto fail;
1674 }
1675
1676 if (vxlan_sockaddr_in_any(&sc->vxl_dst_addr) != 0) {
1677 reason = "no valid destination address specified";
1678 goto fail;
1679 }
1680
1681 if (vxlan_sockaddr_in_multicast(&sc->vxl_dst_addr) == 0 &&
1682 sc->vxl_mc_ifname[0] != '\0') {
1683 reason = "can only specify interface with a group address";
1684 goto fail;
1685 }
1686
1687 if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) {
1688 if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_src_addr) ^
1689 VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr)) {
1690 reason = "source and destination address must both "
1691 "be either IPv4 or IPv6";
1692 goto fail;
1693 }
1694 }
1695
1696 if (sc->vxl_src_addr.in4.sin_port == 0) {
1697 reason = "local port not specified";
1698 goto fail;
1699 }
1700
1701 if (sc->vxl_dst_addr.in4.sin_port == 0) {
1702 reason = "remote port not specified";
1703 goto fail;
1704 }
1705
1706 return (0);
1707
1708 fail:
1709 if_printf(sc->vxl_ifp, "cannot initialize interface: %s\n", reason);
1710 return (EINVAL);
1711 }
1712
1713 static void
vxlan_init_wait(struct vxlan_softc * sc)1714 vxlan_init_wait(struct vxlan_softc *sc)
1715 {
1716
1717 VXLAN_LOCK_WASSERT(sc);
1718 while (sc->vxl_flags & VXLAN_FLAG_INIT)
1719 rm_sleep(sc, &sc->vxl_lock, 0, "vxlint", hz);
1720 }
1721
1722 static void
vxlan_init_complete(struct vxlan_softc * sc)1723 vxlan_init_complete(struct vxlan_softc *sc)
1724 {
1725
1726 VXLAN_WLOCK(sc);
1727 sc->vxl_flags &= ~VXLAN_FLAG_INIT;
1728 wakeup(sc);
1729 VXLAN_WUNLOCK(sc);
1730 }
1731
1732 static void
vxlan_init(void * xsc)1733 vxlan_init(void *xsc)
1734 {
1735 static const uint8_t empty_mac[ETHER_ADDR_LEN];
1736 struct vxlan_softc *sc;
1737 struct ifnet *ifp;
1738
1739 sc = xsc;
1740 ifp = sc->vxl_ifp;
1741
1742 sx_xlock(&vxlan_sx);
1743 VXLAN_WLOCK(sc);
1744 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1745 VXLAN_WUNLOCK(sc);
1746 sx_xunlock(&vxlan_sx);
1747 return;
1748 }
1749 sc->vxl_flags |= VXLAN_FLAG_INIT;
1750 VXLAN_WUNLOCK(sc);
1751
1752 if (vxlan_valid_init_config(sc) != 0)
1753 goto out;
1754
1755 if (vxlan_setup_socket(sc) != 0)
1756 goto out;
1757
1758 #ifdef INET6
1759 vxlan_setup_zero_checksum_port(sc);
1760 #endif
1761
1762 /* Initialize the default forwarding entry. */
1763 vxlan_ftable_entry_init(sc, &sc->vxl_default_fe, empty_mac,
1764 &sc->vxl_dst_addr.sa, VXLAN_FE_FLAG_STATIC);
1765
1766 VXLAN_WLOCK(sc);
1767 ifp->if_drv_flags |= IFF_DRV_RUNNING;
1768 callout_reset(&sc->vxl_callout, vxlan_ftable_prune_period * hz,
1769 vxlan_timer, sc);
1770 VXLAN_WUNLOCK(sc);
1771
1772 if_link_state_change(ifp, LINK_STATE_UP);
1773
1774 EVENTHANDLER_INVOKE(vxlan_start, ifp, sc->vxl_src_addr.in4.sin_family,
1775 ntohs(sc->vxl_src_addr.in4.sin_port));
1776 out:
1777 vxlan_init_complete(sc);
1778 sx_xunlock(&vxlan_sx);
1779 }
1780
1781 static void
vxlan_release(struct vxlan_softc * sc)1782 vxlan_release(struct vxlan_softc *sc)
1783 {
1784
1785 /*
1786 * The softc may be destroyed as soon as we release our reference,
1787 * so we cannot serialize the wakeup with the softc lock. We use a
1788 * timeout in our sleeps so a missed wakeup is unfortunate but not
1789 * fatal.
1790 */
1791 if (VXLAN_RELEASE(sc) != 0)
1792 wakeup(sc);
1793 }
1794
1795 static void
vxlan_teardown_wait(struct vxlan_softc * sc)1796 vxlan_teardown_wait(struct vxlan_softc *sc)
1797 {
1798
1799 VXLAN_LOCK_WASSERT(sc);
1800 while (sc->vxl_flags & VXLAN_FLAG_TEARDOWN)
1801 rm_sleep(sc, &sc->vxl_lock, 0, "vxltrn", hz);
1802 }
1803
1804 static void
vxlan_teardown_complete(struct vxlan_softc * sc)1805 vxlan_teardown_complete(struct vxlan_softc *sc)
1806 {
1807
1808 VXLAN_WLOCK(sc);
1809 sc->vxl_flags &= ~VXLAN_FLAG_TEARDOWN;
1810 wakeup(sc);
1811 VXLAN_WUNLOCK(sc);
1812 }
1813
1814 static void
vxlan_teardown_locked(struct vxlan_softc * sc)1815 vxlan_teardown_locked(struct vxlan_softc *sc)
1816 {
1817 struct ifnet *ifp;
1818 struct vxlan_socket *vso;
1819 bool running;
1820
1821 sx_assert(&vxlan_sx, SA_XLOCKED);
1822 VXLAN_LOCK_WASSERT(sc);
1823 MPASS(sc->vxl_flags & VXLAN_FLAG_TEARDOWN);
1824
1825 ifp = sc->vxl_ifp;
1826 ifp->if_flags &= ~IFF_UP;
1827 running = (ifp->if_drv_flags & IFF_DRV_RUNNING) != 0;
1828 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1829 callout_stop(&sc->vxl_callout);
1830 vso = sc->vxl_sock;
1831 sc->vxl_sock = NULL;
1832
1833 VXLAN_WUNLOCK(sc);
1834 if_link_state_change(ifp, LINK_STATE_DOWN);
1835 if (running)
1836 EVENTHANDLER_INVOKE(vxlan_stop, ifp,
1837 sc->vxl_src_addr.in4.sin_family,
1838 ntohs(sc->vxl_src_addr.in4.sin_port));
1839
1840 if (vso != NULL) {
1841 vxlan_socket_remove_softc(vso, sc);
1842
1843 if (sc->vxl_vso_mc_index != -1) {
1844 vxlan_socket_mc_release_group_by_idx(vso,
1845 sc->vxl_vso_mc_index);
1846 sc->vxl_vso_mc_index = -1;
1847 }
1848 }
1849
1850 VXLAN_WLOCK(sc);
1851 while (sc->vxl_refcnt != 0)
1852 rm_sleep(sc, &sc->vxl_lock, 0, "vxldrn", hz);
1853 VXLAN_WUNLOCK(sc);
1854
1855 callout_drain(&sc->vxl_callout);
1856
1857 vxlan_free_multicast(sc);
1858 if (vso != NULL)
1859 vxlan_socket_release(vso);
1860
1861 vxlan_teardown_complete(sc);
1862 }
1863
1864 static void
vxlan_teardown(struct vxlan_softc * sc)1865 vxlan_teardown(struct vxlan_softc *sc)
1866 {
1867
1868 sx_xlock(&vxlan_sx);
1869 VXLAN_WLOCK(sc);
1870 if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) {
1871 vxlan_teardown_wait(sc);
1872 VXLAN_WUNLOCK(sc);
1873 sx_xunlock(&vxlan_sx);
1874 return;
1875 }
1876
1877 sc->vxl_flags |= VXLAN_FLAG_TEARDOWN;
1878 vxlan_teardown_locked(sc);
1879 sx_xunlock(&vxlan_sx);
1880 }
1881
1882 static void
vxlan_ifdetach(struct vxlan_softc * sc,struct ifnet * ifp,struct vxlan_softc_head * list)1883 vxlan_ifdetach(struct vxlan_softc *sc, struct ifnet *ifp,
1884 struct vxlan_softc_head *list)
1885 {
1886
1887 VXLAN_WLOCK(sc);
1888
1889 if (sc->vxl_mc_ifp != ifp)
1890 goto out;
1891 if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN)
1892 goto out;
1893
1894 sc->vxl_flags |= VXLAN_FLAG_TEARDOWN;
1895 LIST_INSERT_HEAD(list, sc, vxl_ifdetach_list);
1896
1897 out:
1898 VXLAN_WUNLOCK(sc);
1899 }
1900
1901 static void
vxlan_timer(void * xsc)1902 vxlan_timer(void *xsc)
1903 {
1904 struct vxlan_softc *sc;
1905
1906 sc = xsc;
1907 VXLAN_LOCK_WASSERT(sc);
1908
1909 vxlan_ftable_expire(sc);
1910 callout_schedule(&sc->vxl_callout, vxlan_ftable_prune_period * hz);
1911 }
1912
1913 static int
vxlan_ioctl_ifflags(struct vxlan_softc * sc)1914 vxlan_ioctl_ifflags(struct vxlan_softc *sc)
1915 {
1916 struct ifnet *ifp;
1917
1918 ifp = sc->vxl_ifp;
1919
1920 if (ifp->if_flags & IFF_UP) {
1921 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1922 vxlan_init(sc);
1923 } else {
1924 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1925 vxlan_teardown(sc);
1926 }
1927
1928 return (0);
1929 }
1930
1931 static int
vxlan_ctrl_get_config(struct vxlan_softc * sc,void * arg)1932 vxlan_ctrl_get_config(struct vxlan_softc *sc, void *arg)
1933 {
1934 struct rm_priotracker tracker;
1935 struct ifvxlancfg *cfg;
1936
1937 cfg = arg;
1938 bzero(cfg, sizeof(*cfg));
1939
1940 VXLAN_RLOCK(sc, &tracker);
1941 cfg->vxlc_vni = sc->vxl_vni;
1942 memcpy(&cfg->vxlc_local_sa, &sc->vxl_src_addr,
1943 sizeof(union vxlan_sockaddr));
1944 memcpy(&cfg->vxlc_remote_sa, &sc->vxl_dst_addr,
1945 sizeof(union vxlan_sockaddr));
1946 cfg->vxlc_mc_ifindex = sc->vxl_mc_ifindex;
1947 cfg->vxlc_ftable_cnt = sc->vxl_ftable_cnt;
1948 cfg->vxlc_ftable_max = sc->vxl_ftable_max;
1949 cfg->vxlc_ftable_timeout = sc->vxl_ftable_timeout;
1950 cfg->vxlc_port_min = sc->vxl_min_port;
1951 cfg->vxlc_port_max = sc->vxl_max_port;
1952 cfg->vxlc_learn = (sc->vxl_flags & VXLAN_FLAG_LEARN) != 0;
1953 cfg->vxlc_ttl = sc->vxl_ttl;
1954 VXLAN_RUNLOCK(sc, &tracker);
1955
1956 #ifdef INET6
1957 if (VXLAN_SOCKADDR_IS_IPV6(&cfg->vxlc_local_sa))
1958 sa6_recoverscope(&cfg->vxlc_local_sa.in6);
1959 if (VXLAN_SOCKADDR_IS_IPV6(&cfg->vxlc_remote_sa))
1960 sa6_recoverscope(&cfg->vxlc_remote_sa.in6);
1961 #endif
1962
1963 return (0);
1964 }
1965
1966 static int
vxlan_ctrl_set_vni(struct vxlan_softc * sc,void * arg)1967 vxlan_ctrl_set_vni(struct vxlan_softc *sc, void *arg)
1968 {
1969 struct ifvxlancmd *cmd;
1970 int error;
1971
1972 cmd = arg;
1973
1974 if (vxlan_check_vni(cmd->vxlcmd_vni) != 0)
1975 return (EINVAL);
1976
1977 VXLAN_WLOCK(sc);
1978 if (vxlan_can_change_config(sc)) {
1979 sc->vxl_vni = cmd->vxlcmd_vni;
1980 error = 0;
1981 } else
1982 error = EBUSY;
1983 VXLAN_WUNLOCK(sc);
1984
1985 return (error);
1986 }
1987
1988 static int
vxlan_ctrl_set_local_addr(struct vxlan_softc * sc,void * arg)1989 vxlan_ctrl_set_local_addr(struct vxlan_softc *sc, void *arg)
1990 {
1991 struct ifvxlancmd *cmd;
1992 union vxlan_sockaddr *vxlsa;
1993 int error;
1994
1995 cmd = arg;
1996 vxlsa = &cmd->vxlcmd_sa;
1997
1998 if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa))
1999 return (EINVAL);
2000 if (vxlan_sockaddr_in_multicast(vxlsa) != 0)
2001 return (EINVAL);
2002 if (VXLAN_SOCKADDR_IS_IPV6(vxlsa)) {
2003 error = vxlan_sockaddr_in6_embedscope(vxlsa);
2004 if (error)
2005 return (error);
2006 }
2007
2008 VXLAN_WLOCK(sc);
2009 if (vxlan_can_change_config(sc)) {
2010 vxlan_sockaddr_in_copy(&sc->vxl_src_addr, &vxlsa->sa);
2011 vxlan_set_hwcaps(sc);
2012 error = 0;
2013 } else
2014 error = EBUSY;
2015 VXLAN_WUNLOCK(sc);
2016
2017 return (error);
2018 }
2019
2020 static int
vxlan_ctrl_set_remote_addr(struct vxlan_softc * sc,void * arg)2021 vxlan_ctrl_set_remote_addr(struct vxlan_softc *sc, void *arg)
2022 {
2023 struct ifvxlancmd *cmd;
2024 union vxlan_sockaddr *vxlsa;
2025 int error;
2026
2027 cmd = arg;
2028 vxlsa = &cmd->vxlcmd_sa;
2029
2030 if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa))
2031 return (EINVAL);
2032 if (VXLAN_SOCKADDR_IS_IPV6(vxlsa)) {
2033 error = vxlan_sockaddr_in6_embedscope(vxlsa);
2034 if (error)
2035 return (error);
2036 }
2037
2038 VXLAN_WLOCK(sc);
2039 if (vxlan_can_change_config(sc)) {
2040 vxlan_sockaddr_in_copy(&sc->vxl_dst_addr, &vxlsa->sa);
2041 vxlan_setup_interface_hdrlen(sc);
2042 error = 0;
2043 } else
2044 error = EBUSY;
2045 VXLAN_WUNLOCK(sc);
2046
2047 return (error);
2048 }
2049
2050 static int
vxlan_ctrl_set_local_port(struct vxlan_softc * sc,void * arg)2051 vxlan_ctrl_set_local_port(struct vxlan_softc *sc, void *arg)
2052 {
2053 struct ifvxlancmd *cmd;
2054 int error;
2055
2056 cmd = arg;
2057
2058 if (cmd->vxlcmd_port == 0)
2059 return (EINVAL);
2060
2061 VXLAN_WLOCK(sc);
2062 if (vxlan_can_change_config(sc)) {
2063 sc->vxl_src_addr.in4.sin_port = htons(cmd->vxlcmd_port);
2064 error = 0;
2065 } else
2066 error = EBUSY;
2067 VXLAN_WUNLOCK(sc);
2068
2069 return (error);
2070 }
2071
2072 static int
vxlan_ctrl_set_remote_port(struct vxlan_softc * sc,void * arg)2073 vxlan_ctrl_set_remote_port(struct vxlan_softc *sc, void *arg)
2074 {
2075 struct ifvxlancmd *cmd;
2076 int error;
2077
2078 cmd = arg;
2079
2080 if (cmd->vxlcmd_port == 0)
2081 return (EINVAL);
2082
2083 VXLAN_WLOCK(sc);
2084 if (vxlan_can_change_config(sc)) {
2085 sc->vxl_dst_addr.in4.sin_port = htons(cmd->vxlcmd_port);
2086 error = 0;
2087 } else
2088 error = EBUSY;
2089 VXLAN_WUNLOCK(sc);
2090
2091 return (error);
2092 }
2093
2094 static int
vxlan_ctrl_set_port_range(struct vxlan_softc * sc,void * arg)2095 vxlan_ctrl_set_port_range(struct vxlan_softc *sc, void *arg)
2096 {
2097 struct ifvxlancmd *cmd;
2098 uint16_t min, max;
2099 int error;
2100
2101 cmd = arg;
2102 min = cmd->vxlcmd_port_min;
2103 max = cmd->vxlcmd_port_max;
2104
2105 if (max < min)
2106 return (EINVAL);
2107
2108 VXLAN_WLOCK(sc);
2109 if (vxlan_can_change_config(sc)) {
2110 sc->vxl_min_port = min;
2111 sc->vxl_max_port = max;
2112 error = 0;
2113 } else
2114 error = EBUSY;
2115 VXLAN_WUNLOCK(sc);
2116
2117 return (error);
2118 }
2119
2120 static int
vxlan_ctrl_set_ftable_timeout(struct vxlan_softc * sc,void * arg)2121 vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *sc, void *arg)
2122 {
2123 struct ifvxlancmd *cmd;
2124 int error;
2125
2126 cmd = arg;
2127
2128 VXLAN_WLOCK(sc);
2129 if (vxlan_check_ftable_timeout(cmd->vxlcmd_ftable_timeout) == 0) {
2130 sc->vxl_ftable_timeout = cmd->vxlcmd_ftable_timeout;
2131 error = 0;
2132 } else
2133 error = EINVAL;
2134 VXLAN_WUNLOCK(sc);
2135
2136 return (error);
2137 }
2138
2139 static int
vxlan_ctrl_set_ftable_max(struct vxlan_softc * sc,void * arg)2140 vxlan_ctrl_set_ftable_max(struct vxlan_softc *sc, void *arg)
2141 {
2142 struct ifvxlancmd *cmd;
2143 int error;
2144
2145 cmd = arg;
2146
2147 VXLAN_WLOCK(sc);
2148 if (vxlan_check_ftable_max(cmd->vxlcmd_ftable_max) == 0) {
2149 sc->vxl_ftable_max = cmd->vxlcmd_ftable_max;
2150 error = 0;
2151 } else
2152 error = EINVAL;
2153 VXLAN_WUNLOCK(sc);
2154
2155 return (error);
2156 }
2157
2158 static int
vxlan_ctrl_set_multicast_if(struct vxlan_softc * sc,void * arg)2159 vxlan_ctrl_set_multicast_if(struct vxlan_softc * sc, void *arg)
2160 {
2161 struct ifvxlancmd *cmd;
2162 int error;
2163
2164 cmd = arg;
2165
2166 VXLAN_WLOCK(sc);
2167 if (vxlan_can_change_config(sc)) {
2168 strlcpy(sc->vxl_mc_ifname, cmd->vxlcmd_ifname, IFNAMSIZ);
2169 vxlan_set_hwcaps(sc);
2170 error = 0;
2171 } else
2172 error = EBUSY;
2173 VXLAN_WUNLOCK(sc);
2174
2175 return (error);
2176 }
2177
2178 static int
vxlan_ctrl_set_ttl(struct vxlan_softc * sc,void * arg)2179 vxlan_ctrl_set_ttl(struct vxlan_softc *sc, void *arg)
2180 {
2181 struct ifvxlancmd *cmd;
2182 int error;
2183
2184 cmd = arg;
2185
2186 VXLAN_WLOCK(sc);
2187 if (vxlan_check_ttl(cmd->vxlcmd_ttl) == 0) {
2188 sc->vxl_ttl = cmd->vxlcmd_ttl;
2189 if (sc->vxl_im4o != NULL)
2190 sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl;
2191 if (sc->vxl_im6o != NULL)
2192 sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl;
2193 error = 0;
2194 } else
2195 error = EINVAL;
2196 VXLAN_WUNLOCK(sc);
2197
2198 return (error);
2199 }
2200
2201 static int
vxlan_ctrl_set_learn(struct vxlan_softc * sc,void * arg)2202 vxlan_ctrl_set_learn(struct vxlan_softc *sc, void *arg)
2203 {
2204 struct ifvxlancmd *cmd;
2205
2206 cmd = arg;
2207
2208 VXLAN_WLOCK(sc);
2209 if (cmd->vxlcmd_flags & VXLAN_CMD_FLAG_LEARN)
2210 sc->vxl_flags |= VXLAN_FLAG_LEARN;
2211 else
2212 sc->vxl_flags &= ~VXLAN_FLAG_LEARN;
2213 VXLAN_WUNLOCK(sc);
2214
2215 return (0);
2216 }
2217
2218 static int
vxlan_ctrl_ftable_entry_add(struct vxlan_softc * sc,void * arg)2219 vxlan_ctrl_ftable_entry_add(struct vxlan_softc *sc, void *arg)
2220 {
2221 union vxlan_sockaddr vxlsa;
2222 struct ifvxlancmd *cmd;
2223 struct vxlan_ftable_entry *fe;
2224 int error;
2225
2226 cmd = arg;
2227 vxlsa = cmd->vxlcmd_sa;
2228
2229 if (!VXLAN_SOCKADDR_IS_IPV46(&vxlsa))
2230 return (EINVAL);
2231 if (vxlan_sockaddr_in_any(&vxlsa) != 0)
2232 return (EINVAL);
2233 if (vxlan_sockaddr_in_multicast(&vxlsa) != 0)
2234 return (EINVAL);
2235 /* BMV: We could support both IPv4 and IPv6 later. */
2236 if (vxlsa.sa.sa_family != sc->vxl_dst_addr.sa.sa_family)
2237 return (EAFNOSUPPORT);
2238
2239 if (VXLAN_SOCKADDR_IS_IPV6(&vxlsa)) {
2240 error = vxlan_sockaddr_in6_embedscope(&vxlsa);
2241 if (error)
2242 return (error);
2243 }
2244
2245 fe = vxlan_ftable_entry_alloc();
2246 if (fe == NULL)
2247 return (ENOMEM);
2248
2249 if (vxlsa.in4.sin_port == 0)
2250 vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port;
2251
2252 vxlan_ftable_entry_init(sc, fe, cmd->vxlcmd_mac, &vxlsa.sa,
2253 VXLAN_FE_FLAG_STATIC);
2254
2255 VXLAN_WLOCK(sc);
2256 error = vxlan_ftable_entry_insert(sc, fe);
2257 VXLAN_WUNLOCK(sc);
2258
2259 if (error)
2260 vxlan_ftable_entry_free(fe);
2261
2262 return (error);
2263 }
2264
2265 static int
vxlan_ctrl_ftable_entry_rem(struct vxlan_softc * sc,void * arg)2266 vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *sc, void *arg)
2267 {
2268 struct ifvxlancmd *cmd;
2269 struct vxlan_ftable_entry *fe;
2270 int error;
2271
2272 cmd = arg;
2273
2274 VXLAN_WLOCK(sc);
2275 fe = vxlan_ftable_entry_lookup(sc, cmd->vxlcmd_mac);
2276 if (fe != NULL) {
2277 vxlan_ftable_entry_destroy(sc, fe);
2278 error = 0;
2279 } else
2280 error = ENOENT;
2281 VXLAN_WUNLOCK(sc);
2282
2283 return (error);
2284 }
2285
2286 static int
vxlan_ctrl_flush(struct vxlan_softc * sc,void * arg)2287 vxlan_ctrl_flush(struct vxlan_softc *sc, void *arg)
2288 {
2289 struct ifvxlancmd *cmd;
2290 int all;
2291
2292 cmd = arg;
2293 all = cmd->vxlcmd_flags & VXLAN_CMD_FLAG_FLUSH_ALL;
2294
2295 VXLAN_WLOCK(sc);
2296 vxlan_ftable_flush(sc, all);
2297 VXLAN_WUNLOCK(sc);
2298
2299 return (0);
2300 }
2301
2302 static int
vxlan_ioctl_drvspec(struct vxlan_softc * sc,struct ifdrv * ifd,int get)2303 vxlan_ioctl_drvspec(struct vxlan_softc *sc, struct ifdrv *ifd, int get)
2304 {
2305 const struct vxlan_control *vc;
2306 union {
2307 struct ifvxlancfg cfg;
2308 struct ifvxlancmd cmd;
2309 } args;
2310 int out, error;
2311
2312 if (ifd->ifd_cmd >= vxlan_control_table_size)
2313 return (EINVAL);
2314
2315 bzero(&args, sizeof(args));
2316 vc = &vxlan_control_table[ifd->ifd_cmd];
2317 out = (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) != 0;
2318
2319 if ((get != 0 && out == 0) || (get == 0 && out != 0))
2320 return (EINVAL);
2321
2322 if (vc->vxlc_flags & VXLAN_CTRL_FLAG_SUSER) {
2323 error = priv_check(curthread, PRIV_NET_VXLAN);
2324 if (error)
2325 return (error);
2326 }
2327
2328 if (ifd->ifd_len != vc->vxlc_argsize ||
2329 ifd->ifd_len > sizeof(args))
2330 return (EINVAL);
2331
2332 if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYIN) {
2333 error = copyin(ifd->ifd_data, &args, ifd->ifd_len);
2334 if (error)
2335 return (error);
2336 }
2337
2338 error = vc->vxlc_func(sc, &args);
2339 if (error)
2340 return (error);
2341
2342 if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) {
2343 error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
2344 if (error)
2345 return (error);
2346 }
2347
2348 return (0);
2349 }
2350
2351 static int
vxlan_ioctl(struct ifnet * ifp,u_long cmd,caddr_t data)2352 vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2353 {
2354 struct rm_priotracker tracker;
2355 struct vxlan_softc *sc;
2356 struct ifreq *ifr;
2357 struct ifdrv *ifd;
2358 int error;
2359
2360 sc = ifp->if_softc;
2361 ifr = (struct ifreq *) data;
2362 ifd = (struct ifdrv *) data;
2363
2364 error = 0;
2365
2366 switch (cmd) {
2367 case SIOCADDMULTI:
2368 case SIOCDELMULTI:
2369 break;
2370
2371 case SIOCGDRVSPEC:
2372 case SIOCSDRVSPEC:
2373 error = vxlan_ioctl_drvspec(sc, ifd, cmd == SIOCGDRVSPEC);
2374 break;
2375
2376 case SIOCSIFFLAGS:
2377 error = vxlan_ioctl_ifflags(sc);
2378 break;
2379
2380 case SIOCSIFMEDIA:
2381 case SIOCGIFMEDIA:
2382 error = ifmedia_ioctl(ifp, ifr, &sc->vxl_media, cmd);
2383 break;
2384
2385 case SIOCSIFMTU:
2386 if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > VXLAN_MAX_MTU) {
2387 error = EINVAL;
2388 } else {
2389 VXLAN_WLOCK(sc);
2390 ifp->if_mtu = ifr->ifr_mtu;
2391 sc->vxl_flags |= VXLAN_FLAG_USER_MTU;
2392 VXLAN_WUNLOCK(sc);
2393 }
2394 break;
2395
2396 case SIOCSIFCAP:
2397 VXLAN_WLOCK(sc);
2398 error = vxlan_set_reqcap(sc, ifp, ifr->ifr_reqcap);
2399 if (error == 0)
2400 vxlan_set_hwcaps(sc);
2401 VXLAN_WUNLOCK(sc);
2402 break;
2403
2404 case SIOCGTUNFIB:
2405 VXLAN_RLOCK(sc, &tracker);
2406 ifr->ifr_fib = sc->vxl_fibnum;
2407 VXLAN_RUNLOCK(sc, &tracker);
2408 break;
2409
2410 case SIOCSTUNFIB:
2411 if ((error = priv_check(curthread, PRIV_NET_VXLAN)) != 0)
2412 break;
2413
2414 if (ifr->ifr_fib >= rt_numfibs)
2415 error = EINVAL;
2416 else {
2417 VXLAN_WLOCK(sc);
2418 sc->vxl_fibnum = ifr->ifr_fib;
2419 VXLAN_WUNLOCK(sc);
2420 }
2421 break;
2422
2423 default:
2424 error = ether_ioctl(ifp, cmd, data);
2425 break;
2426 }
2427
2428 return (error);
2429 }
2430
2431 #if defined(INET) || defined(INET6)
2432 static uint16_t
vxlan_pick_source_port(struct vxlan_softc * sc,struct mbuf * m)2433 vxlan_pick_source_port(struct vxlan_softc *sc, struct mbuf *m)
2434 {
2435 int range;
2436 uint32_t hash;
2437
2438 range = sc->vxl_max_port - sc->vxl_min_port + 1;
2439
2440 if (M_HASHTYPE_ISHASH(m))
2441 hash = m->m_pkthdr.flowid;
2442 else
2443 hash = jenkins_hash(m->m_data, ETHER_HDR_LEN,
2444 sc->vxl_port_hash_key);
2445
2446 return (sc->vxl_min_port + (hash % range));
2447 }
2448
2449 static void
vxlan_encap_header(struct vxlan_softc * sc,struct mbuf * m,int ipoff,uint16_t srcport,uint16_t dstport)2450 vxlan_encap_header(struct vxlan_softc *sc, struct mbuf *m, int ipoff,
2451 uint16_t srcport, uint16_t dstport)
2452 {
2453 struct vxlanudphdr *hdr;
2454 struct udphdr *udph;
2455 struct vxlan_header *vxh;
2456 int len;
2457
2458 len = m->m_pkthdr.len - ipoff;
2459 MPASS(len >= sizeof(struct vxlanudphdr));
2460 hdr = mtodo(m, ipoff);
2461
2462 udph = &hdr->vxlh_udp;
2463 udph->uh_sport = srcport;
2464 udph->uh_dport = dstport;
2465 udph->uh_ulen = htons(len);
2466 udph->uh_sum = 0;
2467
2468 vxh = &hdr->vxlh_hdr;
2469 vxh->vxlh_flags = htonl(VXLAN_HDR_FLAGS_VALID_VNI);
2470 vxh->vxlh_vni = htonl(sc->vxl_vni << VXLAN_HDR_VNI_SHIFT);
2471 }
2472 #endif
2473
2474 #if defined(INET6) || defined(INET)
2475 /*
2476 * Return the CSUM_INNER_* equivalent of CSUM_* caps.
2477 */
2478 static uint32_t
csum_flags_to_inner_flags(uint32_t csum_flags_in,const uint32_t encap)2479 csum_flags_to_inner_flags(uint32_t csum_flags_in, const uint32_t encap)
2480 {
2481 uint32_t csum_flags = encap;
2482 const uint32_t v4 = CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP;
2483
2484 /*
2485 * csum_flags can request either v4 or v6 offload but not both.
2486 * tcp_output always sets CSUM_TSO (both CSUM_IP_TSO and CSUM_IP6_TSO)
2487 * so those bits are no good to detect the IP version. Other bits are
2488 * always set with CSUM_TSO and we use those to figure out the IP
2489 * version.
2490 */
2491 if (csum_flags_in & v4) {
2492 if (csum_flags_in & CSUM_IP)
2493 csum_flags |= CSUM_INNER_IP;
2494 if (csum_flags_in & CSUM_IP_UDP)
2495 csum_flags |= CSUM_INNER_IP_UDP;
2496 if (csum_flags_in & CSUM_IP_TCP)
2497 csum_flags |= CSUM_INNER_IP_TCP;
2498 if (csum_flags_in & CSUM_IP_TSO)
2499 csum_flags |= CSUM_INNER_IP_TSO;
2500 } else {
2501 #ifdef INVARIANTS
2502 const uint32_t v6 = CSUM_IP6_UDP | CSUM_IP6_TCP;
2503
2504 MPASS((csum_flags_in & v6) != 0);
2505 #endif
2506 if (csum_flags_in & CSUM_IP6_UDP)
2507 csum_flags |= CSUM_INNER_IP6_UDP;
2508 if (csum_flags_in & CSUM_IP6_TCP)
2509 csum_flags |= CSUM_INNER_IP6_TCP;
2510 if (csum_flags_in & CSUM_IP6_TSO)
2511 csum_flags |= CSUM_INNER_IP6_TSO;
2512 }
2513
2514 return (csum_flags);
2515 }
2516 #endif
2517
2518 static int
vxlan_encap4(struct vxlan_softc * sc,const union vxlan_sockaddr * fvxlsa,struct mbuf * m)2519 vxlan_encap4(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa,
2520 struct mbuf *m)
2521 {
2522 #ifdef INET
2523 struct ifnet *ifp;
2524 struct ip *ip;
2525 struct in_addr srcaddr, dstaddr;
2526 uint16_t srcport, dstport;
2527 int plen, mcast, error;
2528 struct route route, *ro;
2529 struct sockaddr_in *sin;
2530 uint32_t csum_flags;
2531
2532 NET_EPOCH_ASSERT();
2533
2534 ifp = sc->vxl_ifp;
2535 srcaddr = sc->vxl_src_addr.in4.sin_addr;
2536 srcport = htons(vxlan_pick_source_port(sc, m));
2537 dstaddr = fvxlsa->in4.sin_addr;
2538 dstport = fvxlsa->in4.sin_port;
2539
2540 plen = m->m_pkthdr.len;
2541 M_PREPEND(m, sizeof(struct ip) + sizeof(struct vxlanudphdr),
2542 M_NOWAIT);
2543 if (m == NULL) {
2544 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2545 return (ENOBUFS);
2546 }
2547
2548 ip = mtod(m, struct ip *);
2549 ip->ip_tos = 0;
2550 ip->ip_len = htons(m->m_pkthdr.len);
2551 ip->ip_off = 0;
2552 ip->ip_ttl = sc->vxl_ttl;
2553 ip->ip_p = IPPROTO_UDP;
2554 ip->ip_sum = 0;
2555 ip->ip_src = srcaddr;
2556 ip->ip_dst = dstaddr;
2557
2558 vxlan_encap_header(sc, m, sizeof(struct ip), srcport, dstport);
2559
2560 mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
2561 m->m_flags &= ~(M_MCAST | M_BCAST);
2562
2563 m->m_pkthdr.csum_flags &= CSUM_FLAGS_TX;
2564 if (m->m_pkthdr.csum_flags != 0) {
2565 /*
2566 * HW checksum (L3 and/or L4) or TSO has been requested. Look
2567 * up the ifnet for the outbound route and verify that the
2568 * outbound ifnet can perform the requested operation on the
2569 * inner frame.
2570 */
2571 bzero(&route, sizeof(route));
2572 ro = &route;
2573 sin = (struct sockaddr_in *)&ro->ro_dst;
2574 sin->sin_family = AF_INET;
2575 sin->sin_len = sizeof(*sin);
2576 sin->sin_addr = ip->ip_dst;
2577 ro->ro_nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_NONE,
2578 0);
2579 if (ro->ro_nh == NULL) {
2580 m_freem(m);
2581 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2582 return (EHOSTUNREACH);
2583 }
2584
2585 csum_flags = csum_flags_to_inner_flags(m->m_pkthdr.csum_flags,
2586 CSUM_ENCAP_VXLAN);
2587 if ((csum_flags & ro->ro_nh->nh_ifp->if_hwassist) !=
2588 csum_flags) {
2589 if (ppsratecheck(&sc->err_time, &sc->err_pps, 1)) {
2590 const struct ifnet *nh_ifp = ro->ro_nh->nh_ifp;
2591
2592 if_printf(ifp, "interface %s is missing hwcaps "
2593 "0x%08x, csum_flags 0x%08x -> 0x%08x, "
2594 "hwassist 0x%08x\n", nh_ifp->if_xname,
2595 csum_flags & ~(uint32_t)nh_ifp->if_hwassist,
2596 m->m_pkthdr.csum_flags, csum_flags,
2597 (uint32_t)nh_ifp->if_hwassist);
2598 }
2599 m_freem(m);
2600 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2601 return (ENXIO);
2602 }
2603 m->m_pkthdr.csum_flags = csum_flags;
2604 if (csum_flags &
2605 (CSUM_INNER_IP | CSUM_INNER_IP_UDP | CSUM_INNER_IP6_UDP |
2606 CSUM_INNER_IP_TCP | CSUM_INNER_IP6_TCP)) {
2607 counter_u64_add(sc->vxl_stats.txcsum, 1);
2608 if (csum_flags & CSUM_INNER_TSO)
2609 counter_u64_add(sc->vxl_stats.tso, 1);
2610 }
2611 } else
2612 ro = NULL;
2613 error = ip_output(m, NULL, ro, 0, sc->vxl_im4o, NULL);
2614 if (error == 0) {
2615 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
2616 if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
2617 if (mcast != 0)
2618 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
2619 } else
2620 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2621
2622 return (error);
2623 #else
2624 m_freem(m);
2625 return (ENOTSUP);
2626 #endif
2627 }
2628
2629 static int
vxlan_encap6(struct vxlan_softc * sc,const union vxlan_sockaddr * fvxlsa,struct mbuf * m)2630 vxlan_encap6(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa,
2631 struct mbuf *m)
2632 {
2633 #ifdef INET6
2634 struct ifnet *ifp;
2635 struct ip6_hdr *ip6;
2636 const struct in6_addr *srcaddr, *dstaddr;
2637 uint16_t srcport, dstport;
2638 int plen, mcast, error;
2639 struct route_in6 route, *ro;
2640 struct sockaddr_in6 *sin6;
2641 uint32_t csum_flags;
2642
2643 NET_EPOCH_ASSERT();
2644
2645 ifp = sc->vxl_ifp;
2646 srcaddr = &sc->vxl_src_addr.in6.sin6_addr;
2647 srcport = htons(vxlan_pick_source_port(sc, m));
2648 dstaddr = &fvxlsa->in6.sin6_addr;
2649 dstport = fvxlsa->in6.sin6_port;
2650
2651 plen = m->m_pkthdr.len;
2652 M_PREPEND(m, sizeof(struct ip6_hdr) + sizeof(struct vxlanudphdr),
2653 M_NOWAIT);
2654 if (m == NULL) {
2655 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2656 return (ENOBUFS);
2657 }
2658
2659 ip6 = mtod(m, struct ip6_hdr *);
2660 ip6->ip6_flow = 0; /* BMV: Keep in forwarding entry? */
2661 ip6->ip6_vfc = IPV6_VERSION;
2662 ip6->ip6_plen = 0;
2663 ip6->ip6_nxt = IPPROTO_UDP;
2664 ip6->ip6_hlim = sc->vxl_ttl;
2665 ip6->ip6_src = *srcaddr;
2666 ip6->ip6_dst = *dstaddr;
2667
2668 vxlan_encap_header(sc, m, sizeof(struct ip6_hdr), srcport, dstport);
2669
2670 mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
2671 m->m_flags &= ~(M_MCAST | M_BCAST);
2672
2673 ro = NULL;
2674 m->m_pkthdr.csum_flags &= CSUM_FLAGS_TX;
2675 if (m->m_pkthdr.csum_flags != 0) {
2676 /*
2677 * HW checksum (L3 and/or L4) or TSO has been requested. Look
2678 * up the ifnet for the outbound route and verify that the
2679 * outbound ifnet can perform the requested operation on the
2680 * inner frame.
2681 */
2682 bzero(&route, sizeof(route));
2683 ro = &route;
2684 sin6 = (struct sockaddr_in6 *)&ro->ro_dst;
2685 sin6->sin6_family = AF_INET6;
2686 sin6->sin6_len = sizeof(*sin6);
2687 sin6->sin6_addr = ip6->ip6_dst;
2688 ro->ro_nh = fib6_lookup(M_GETFIB(m), &ip6->ip6_dst, 0,
2689 NHR_NONE, 0);
2690 if (ro->ro_nh == NULL) {
2691 m_freem(m);
2692 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2693 return (EHOSTUNREACH);
2694 }
2695
2696 csum_flags = csum_flags_to_inner_flags(m->m_pkthdr.csum_flags,
2697 CSUM_ENCAP_VXLAN);
2698 if ((csum_flags & ro->ro_nh->nh_ifp->if_hwassist) !=
2699 csum_flags) {
2700 if (ppsratecheck(&sc->err_time, &sc->err_pps, 1)) {
2701 const struct ifnet *nh_ifp = ro->ro_nh->nh_ifp;
2702
2703 if_printf(ifp, "interface %s is missing hwcaps "
2704 "0x%08x, csum_flags 0x%08x -> 0x%08x, "
2705 "hwassist 0x%08x\n", nh_ifp->if_xname,
2706 csum_flags & ~(uint32_t)nh_ifp->if_hwassist,
2707 m->m_pkthdr.csum_flags, csum_flags,
2708 (uint32_t)nh_ifp->if_hwassist);
2709 }
2710 m_freem(m);
2711 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2712 return (ENXIO);
2713 }
2714 m->m_pkthdr.csum_flags = csum_flags;
2715 if (csum_flags &
2716 (CSUM_INNER_IP | CSUM_INNER_IP_UDP | CSUM_INNER_IP6_UDP |
2717 CSUM_INNER_IP_TCP | CSUM_INNER_IP6_TCP)) {
2718 counter_u64_add(sc->vxl_stats.txcsum, 1);
2719 if (csum_flags & CSUM_INNER_TSO)
2720 counter_u64_add(sc->vxl_stats.tso, 1);
2721 }
2722 } else if (ntohs(dstport) != V_zero_checksum_port) {
2723 struct udphdr *hdr = mtodo(m, sizeof(struct ip6_hdr));
2724
2725 hdr->uh_sum = in6_cksum_pseudo(ip6,
2726 m->m_pkthdr.len - sizeof(struct ip6_hdr), IPPROTO_UDP, 0);
2727 m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
2728 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
2729 }
2730 error = ip6_output(m, NULL, ro, 0, sc->vxl_im6o, NULL, NULL);
2731 if (error == 0) {
2732 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
2733 if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
2734 if (mcast != 0)
2735 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
2736 } else
2737 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2738
2739 return (error);
2740 #else
2741 m_freem(m);
2742 return (ENOTSUP);
2743 #endif
2744 }
2745
2746 #define MTAG_VXLAN_LOOP 0x7876706c /* vxlp */
2747 static int
vxlan_transmit(struct ifnet * ifp,struct mbuf * m)2748 vxlan_transmit(struct ifnet *ifp, struct mbuf *m)
2749 {
2750 struct rm_priotracker tracker;
2751 union vxlan_sockaddr vxlsa;
2752 struct vxlan_softc *sc;
2753 struct vxlan_ftable_entry *fe;
2754 struct ifnet *mcifp;
2755 struct ether_header *eh;
2756 int ipv4, error;
2757
2758 sc = ifp->if_softc;
2759 eh = mtod(m, struct ether_header *);
2760 fe = NULL;
2761 mcifp = NULL;
2762
2763 ETHER_BPF_MTAP(ifp, m);
2764
2765 VXLAN_RLOCK(sc, &tracker);
2766 M_SETFIB(m, sc->vxl_fibnum);
2767 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2768 VXLAN_RUNLOCK(sc, &tracker);
2769 m_freem(m);
2770 return (ENETDOWN);
2771 }
2772 if (__predict_false(if_tunnel_check_nesting(ifp, m, MTAG_VXLAN_LOOP,
2773 max_vxlan_nesting) != 0)) {
2774 VXLAN_RUNLOCK(sc, &tracker);
2775 m_freem(m);
2776 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2777 return (ELOOP);
2778 }
2779
2780 if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
2781 fe = vxlan_ftable_entry_lookup(sc, eh->ether_dhost);
2782 if (fe == NULL)
2783 fe = &sc->vxl_default_fe;
2784 vxlan_sockaddr_copy(&vxlsa, &fe->vxlfe_raddr.sa);
2785
2786 ipv4 = VXLAN_SOCKADDR_IS_IPV4(&vxlsa) != 0;
2787 if (vxlan_sockaddr_in_multicast(&vxlsa) != 0)
2788 mcifp = vxlan_multicast_if_ref(sc, ipv4);
2789
2790 VXLAN_ACQUIRE(sc);
2791 VXLAN_RUNLOCK(sc, &tracker);
2792
2793 if (ipv4 != 0)
2794 error = vxlan_encap4(sc, &vxlsa, m);
2795 else
2796 error = vxlan_encap6(sc, &vxlsa, m);
2797
2798 vxlan_release(sc);
2799 if (mcifp != NULL)
2800 if_rele(mcifp);
2801
2802 return (error);
2803 }
2804
2805 static void
vxlan_qflush(struct ifnet * ifp __unused)2806 vxlan_qflush(struct ifnet *ifp __unused)
2807 {
2808 }
2809
2810 static bool
vxlan_rcv_udp_packet(struct mbuf * m,int offset,struct inpcb * inpcb,const struct sockaddr * srcsa,void * xvso)2811 vxlan_rcv_udp_packet(struct mbuf *m, int offset, struct inpcb *inpcb,
2812 const struct sockaddr *srcsa, void *xvso)
2813 {
2814 struct vxlan_socket *vso;
2815 struct vxlan_header *vxh, vxlanhdr;
2816 uint32_t vni;
2817 int error __unused;
2818
2819 M_ASSERTPKTHDR(m);
2820 vso = xvso;
2821 offset += sizeof(struct udphdr);
2822
2823 if (m->m_pkthdr.len < offset + sizeof(struct vxlan_header))
2824 goto out;
2825
2826 if (__predict_false(m->m_len < offset + sizeof(struct vxlan_header))) {
2827 m_copydata(m, offset, sizeof(struct vxlan_header),
2828 (caddr_t) &vxlanhdr);
2829 vxh = &vxlanhdr;
2830 } else
2831 vxh = mtodo(m, offset);
2832
2833 /*
2834 * Drop if there is a reserved bit set in either the flags or VNI
2835 * fields of the header. This goes against the specification, but
2836 * a bit set may indicate an unsupported new feature. This matches
2837 * the behavior of the Linux implementation.
2838 */
2839 if (vxh->vxlh_flags != htonl(VXLAN_HDR_FLAGS_VALID_VNI) ||
2840 vxh->vxlh_vni & ~VXLAN_VNI_MASK)
2841 goto out;
2842
2843 vni = ntohl(vxh->vxlh_vni) >> VXLAN_HDR_VNI_SHIFT;
2844
2845 /* Adjust to the start of the inner Ethernet frame. */
2846 m_adj_decap(m, offset + sizeof(struct vxlan_header));
2847
2848 error = vxlan_input(vso, vni, &m, srcsa);
2849 MPASS(error != 0 || m == NULL);
2850
2851 out:
2852 if (m != NULL)
2853 m_freem(m);
2854
2855 return (true);
2856 }
2857
2858 static int
vxlan_input(struct vxlan_socket * vso,uint32_t vni,struct mbuf ** m0,const struct sockaddr * sa)2859 vxlan_input(struct vxlan_socket *vso, uint32_t vni, struct mbuf **m0,
2860 const struct sockaddr *sa)
2861 {
2862 struct vxlan_softc *sc;
2863 struct ifnet *ifp;
2864 struct mbuf *m;
2865 struct ether_header *eh;
2866 int error;
2867
2868 m = *m0;
2869
2870 if (m->m_pkthdr.len < ETHER_HDR_LEN)
2871 return (EINVAL);
2872
2873 sc = vxlan_socket_lookup_softc(vso, vni);
2874 if (sc == NULL)
2875 return (ENOENT);
2876
2877 ifp = sc->vxl_ifp;
2878 if (m->m_len < ETHER_HDR_LEN &&
2879 (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) {
2880 *m0 = NULL;
2881 error = ENOBUFS;
2882 goto out;
2883 }
2884 eh = mtod(m, struct ether_header *);
2885
2886 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2887 error = ENETDOWN;
2888 goto out;
2889 } else if (ifp == m->m_pkthdr.rcvif) {
2890 /* XXX Does not catch more complex loops. */
2891 error = EDEADLK;
2892 goto out;
2893 }
2894
2895 if (sc->vxl_flags & VXLAN_FLAG_LEARN)
2896 vxlan_ftable_learn(sc, sa, eh->ether_shost);
2897
2898 m_clrprotoflags(m);
2899 m->m_pkthdr.rcvif = ifp;
2900 M_SETFIB(m, ifp->if_fib);
2901 if (((ifp->if_capenable & IFCAP_RXCSUM &&
2902 m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC) ||
2903 (ifp->if_capenable & IFCAP_RXCSUM_IPV6 &&
2904 !(m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC)))) {
2905 uint32_t csum_flags = 0;
2906
2907 if (m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC)
2908 csum_flags |= CSUM_L3_CALC;
2909 if (m->m_pkthdr.csum_flags & CSUM_INNER_L3_VALID)
2910 csum_flags |= CSUM_L3_VALID;
2911 if (m->m_pkthdr.csum_flags & CSUM_INNER_L4_CALC)
2912 csum_flags |= CSUM_L4_CALC;
2913 if (m->m_pkthdr.csum_flags & CSUM_INNER_L4_VALID)
2914 csum_flags |= CSUM_L4_VALID;
2915 m->m_pkthdr.csum_flags = csum_flags;
2916 counter_u64_add(sc->vxl_stats.rxcsum, 1);
2917 } else {
2918 /* clear everything */
2919 m->m_pkthdr.csum_flags = 0;
2920 m->m_pkthdr.csum_data = 0;
2921 }
2922
2923 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
2924 (*ifp->if_input)(ifp, m);
2925 *m0 = NULL;
2926 error = 0;
2927
2928 out:
2929 vxlan_release(sc);
2930 return (error);
2931 }
2932
2933 static void
vxlan_stats_alloc(struct vxlan_softc * sc)2934 vxlan_stats_alloc(struct vxlan_softc *sc)
2935 {
2936 struct vxlan_statistics *stats = &sc->vxl_stats;
2937
2938 stats->txcsum = counter_u64_alloc(M_WAITOK);
2939 stats->tso = counter_u64_alloc(M_WAITOK);
2940 stats->rxcsum = counter_u64_alloc(M_WAITOK);
2941 }
2942
2943 static void
vxlan_stats_free(struct vxlan_softc * sc)2944 vxlan_stats_free(struct vxlan_softc *sc)
2945 {
2946 struct vxlan_statistics *stats = &sc->vxl_stats;
2947
2948 counter_u64_free(stats->txcsum);
2949 counter_u64_free(stats->tso);
2950 counter_u64_free(stats->rxcsum);
2951 }
2952
2953 static void
vxlan_set_default_config(struct vxlan_softc * sc)2954 vxlan_set_default_config(struct vxlan_softc *sc)
2955 {
2956
2957 sc->vxl_flags |= VXLAN_FLAG_LEARN;
2958
2959 sc->vxl_vni = VXLAN_VNI_MAX;
2960 sc->vxl_ttl = IPDEFTTL;
2961
2962 if (!vxlan_tunable_int(sc, "legacy_port", vxlan_legacy_port)) {
2963 sc->vxl_src_addr.in4.sin_port = htons(VXLAN_PORT);
2964 sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_PORT);
2965 } else {
2966 sc->vxl_src_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT);
2967 sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT);
2968 }
2969
2970 sc->vxl_min_port = V_ipport_firstauto;
2971 sc->vxl_max_port = V_ipport_lastauto;
2972
2973 sc->vxl_ftable_max = VXLAN_FTABLE_MAX;
2974 sc->vxl_ftable_timeout = VXLAN_FTABLE_TIMEOUT;
2975 }
2976
2977 static int
vxlan_set_user_config(struct vxlan_softc * sc,struct ifvxlanparam * vxlp)2978 vxlan_set_user_config(struct vxlan_softc *sc, struct ifvxlanparam *vxlp)
2979 {
2980
2981 #ifndef INET
2982 if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR4 |
2983 VXLAN_PARAM_WITH_REMOTE_ADDR4))
2984 return (EAFNOSUPPORT);
2985 #endif
2986
2987 #ifndef INET6
2988 if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR6 |
2989 VXLAN_PARAM_WITH_REMOTE_ADDR6))
2990 return (EAFNOSUPPORT);
2991 #else
2992 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) {
2993 int error = vxlan_sockaddr_in6_embedscope(&vxlp->vxlp_local_sa);
2994 if (error)
2995 return (error);
2996 }
2997 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) {
2998 int error = vxlan_sockaddr_in6_embedscope(
2999 &vxlp->vxlp_remote_sa);
3000 if (error)
3001 return (error);
3002 }
3003 #endif
3004
3005 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_VNI) {
3006 if (vxlan_check_vni(vxlp->vxlp_vni) == 0)
3007 sc->vxl_vni = vxlp->vxlp_vni;
3008 }
3009
3010 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR4) {
3011 sc->vxl_src_addr.in4.sin_len = sizeof(struct sockaddr_in);
3012 sc->vxl_src_addr.in4.sin_family = AF_INET;
3013 sc->vxl_src_addr.in4.sin_addr =
3014 vxlp->vxlp_local_sa.in4.sin_addr;
3015 } else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) {
3016 sc->vxl_src_addr.in6.sin6_len = sizeof(struct sockaddr_in6);
3017 sc->vxl_src_addr.in6.sin6_family = AF_INET6;
3018 sc->vxl_src_addr.in6.sin6_addr =
3019 vxlp->vxlp_local_sa.in6.sin6_addr;
3020 }
3021
3022 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR4) {
3023 sc->vxl_dst_addr.in4.sin_len = sizeof(struct sockaddr_in);
3024 sc->vxl_dst_addr.in4.sin_family = AF_INET;
3025 sc->vxl_dst_addr.in4.sin_addr =
3026 vxlp->vxlp_remote_sa.in4.sin_addr;
3027 } else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) {
3028 sc->vxl_dst_addr.in6.sin6_len = sizeof(struct sockaddr_in6);
3029 sc->vxl_dst_addr.in6.sin6_family = AF_INET6;
3030 sc->vxl_dst_addr.in6.sin6_addr =
3031 vxlp->vxlp_remote_sa.in6.sin6_addr;
3032 }
3033
3034 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_PORT)
3035 sc->vxl_src_addr.in4.sin_port = htons(vxlp->vxlp_local_port);
3036 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_PORT)
3037 sc->vxl_dst_addr.in4.sin_port = htons(vxlp->vxlp_remote_port);
3038
3039 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_PORT_RANGE) {
3040 if (vxlp->vxlp_min_port <= vxlp->vxlp_max_port) {
3041 sc->vxl_min_port = vxlp->vxlp_min_port;
3042 sc->vxl_max_port = vxlp->vxlp_max_port;
3043 }
3044 }
3045
3046 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_MULTICAST_IF)
3047 strlcpy(sc->vxl_mc_ifname, vxlp->vxlp_mc_ifname, IFNAMSIZ);
3048
3049 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_TIMEOUT) {
3050 if (vxlan_check_ftable_timeout(vxlp->vxlp_ftable_timeout) == 0)
3051 sc->vxl_ftable_timeout = vxlp->vxlp_ftable_timeout;
3052 }
3053
3054 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_MAX) {
3055 if (vxlan_check_ftable_max(vxlp->vxlp_ftable_max) == 0)
3056 sc->vxl_ftable_max = vxlp->vxlp_ftable_max;
3057 }
3058
3059 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_TTL) {
3060 if (vxlan_check_ttl(vxlp->vxlp_ttl) == 0)
3061 sc->vxl_ttl = vxlp->vxlp_ttl;
3062 }
3063
3064 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LEARN) {
3065 if (vxlp->vxlp_learn == 0)
3066 sc->vxl_flags &= ~VXLAN_FLAG_LEARN;
3067 }
3068
3069 return (0);
3070 }
3071
3072 static int
vxlan_set_reqcap(struct vxlan_softc * sc,struct ifnet * ifp,int reqcap)3073 vxlan_set_reqcap(struct vxlan_softc *sc, struct ifnet *ifp, int reqcap)
3074 {
3075 int mask = reqcap ^ ifp->if_capenable;
3076
3077 /* Disable TSO if tx checksums are disabled. */
3078 if (mask & IFCAP_TXCSUM && !(reqcap & IFCAP_TXCSUM) &&
3079 reqcap & IFCAP_TSO4) {
3080 reqcap &= ~IFCAP_TSO4;
3081 if_printf(ifp, "tso4 disabled due to -txcsum.\n");
3082 }
3083 if (mask & IFCAP_TXCSUM_IPV6 && !(reqcap & IFCAP_TXCSUM_IPV6) &&
3084 reqcap & IFCAP_TSO6) {
3085 reqcap &= ~IFCAP_TSO6;
3086 if_printf(ifp, "tso6 disabled due to -txcsum6.\n");
3087 }
3088
3089 /* Do not enable TSO if tx checksums are disabled. */
3090 if (mask & IFCAP_TSO4 && reqcap & IFCAP_TSO4 &&
3091 !(reqcap & IFCAP_TXCSUM)) {
3092 if_printf(ifp, "enable txcsum first.\n");
3093 return (EAGAIN);
3094 }
3095 if (mask & IFCAP_TSO6 && reqcap & IFCAP_TSO6 &&
3096 !(reqcap & IFCAP_TXCSUM_IPV6)) {
3097 if_printf(ifp, "enable txcsum6 first.\n");
3098 return (EAGAIN);
3099 }
3100
3101 sc->vxl_reqcap = reqcap;
3102 return (0);
3103 }
3104
3105 /*
3106 * A VXLAN interface inherits the capabilities of the vxlandev or the interface
3107 * hosting the vxlanlocal address.
3108 */
3109 static void
vxlan_set_hwcaps(struct vxlan_softc * sc)3110 vxlan_set_hwcaps(struct vxlan_softc *sc)
3111 {
3112 struct epoch_tracker et;
3113 struct ifnet *p;
3114 struct ifaddr *ifa;
3115 u_long hwa;
3116 int cap, ena;
3117 bool rel;
3118 struct ifnet *ifp = sc->vxl_ifp;
3119
3120 /* reset caps */
3121 ifp->if_capabilities &= VXLAN_BASIC_IFCAPS;
3122 ifp->if_capenable &= VXLAN_BASIC_IFCAPS;
3123 ifp->if_hwassist = 0;
3124
3125 NET_EPOCH_ENTER(et);
3126 CURVNET_SET(ifp->if_vnet);
3127
3128 rel = false;
3129 p = NULL;
3130 if (sc->vxl_mc_ifname[0] != '\0') {
3131 rel = true;
3132 p = ifunit_ref(sc->vxl_mc_ifname);
3133 } else if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) {
3134 if (sc->vxl_src_addr.sa.sa_family == AF_INET) {
3135 struct sockaddr_in in4 = sc->vxl_src_addr.in4;
3136
3137 in4.sin_port = 0;
3138 ifa = ifa_ifwithaddr((struct sockaddr *)&in4);
3139 if (ifa != NULL)
3140 p = ifa->ifa_ifp;
3141 } else if (sc->vxl_src_addr.sa.sa_family == AF_INET6) {
3142 struct sockaddr_in6 in6 = sc->vxl_src_addr.in6;
3143
3144 in6.sin6_port = 0;
3145 ifa = ifa_ifwithaddr((struct sockaddr *)&in6);
3146 if (ifa != NULL)
3147 p = ifa->ifa_ifp;
3148 }
3149 }
3150 if (p == NULL)
3151 goto done;
3152
3153 cap = ena = hwa = 0;
3154
3155 /* checksum offload */
3156 if (p->if_capabilities & IFCAP_VXLAN_HWCSUM)
3157 cap |= p->if_capabilities & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6);
3158 if (p->if_capenable & IFCAP_VXLAN_HWCSUM) {
3159 ena |= sc->vxl_reqcap & p->if_capenable &
3160 (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6);
3161 if (ena & IFCAP_TXCSUM) {
3162 if (p->if_hwassist & CSUM_INNER_IP)
3163 hwa |= CSUM_IP;
3164 if (p->if_hwassist & CSUM_INNER_IP_UDP)
3165 hwa |= CSUM_IP_UDP;
3166 if (p->if_hwassist & CSUM_INNER_IP_TCP)
3167 hwa |= CSUM_IP_TCP;
3168 }
3169 if (ena & IFCAP_TXCSUM_IPV6) {
3170 if (p->if_hwassist & CSUM_INNER_IP6_UDP)
3171 hwa |= CSUM_IP6_UDP;
3172 if (p->if_hwassist & CSUM_INNER_IP6_TCP)
3173 hwa |= CSUM_IP6_TCP;
3174 }
3175 }
3176
3177 /* hardware TSO */
3178 if (p->if_capabilities & IFCAP_VXLAN_HWTSO) {
3179 cap |= p->if_capabilities & IFCAP_TSO;
3180 if (p->if_hw_tsomax > IP_MAXPACKET - ifp->if_hdrlen)
3181 ifp->if_hw_tsomax = IP_MAXPACKET - ifp->if_hdrlen;
3182 else
3183 ifp->if_hw_tsomax = p->if_hw_tsomax;
3184 /* XXX: tsomaxsegcount decrement is cxgbe specific */
3185 ifp->if_hw_tsomaxsegcount = p->if_hw_tsomaxsegcount - 1;
3186 ifp->if_hw_tsomaxsegsize = p->if_hw_tsomaxsegsize;
3187 }
3188 if (p->if_capenable & IFCAP_VXLAN_HWTSO) {
3189 ena |= sc->vxl_reqcap & p->if_capenable & IFCAP_TSO;
3190 if (ena & IFCAP_TSO) {
3191 if (p->if_hwassist & CSUM_INNER_IP_TSO)
3192 hwa |= CSUM_IP_TSO;
3193 if (p->if_hwassist & CSUM_INNER_IP6_TSO)
3194 hwa |= CSUM_IP6_TSO;
3195 }
3196 }
3197
3198 ifp->if_capabilities |= cap;
3199 ifp->if_capenable |= ena;
3200 ifp->if_hwassist |= hwa;
3201 if (rel)
3202 if_rele(p);
3203 done:
3204 CURVNET_RESTORE();
3205 NET_EPOCH_EXIT(et);
3206 }
3207
3208 static int
vxlan_clone_create(struct if_clone * ifc,char * name,size_t len,struct ifc_data * ifd,struct ifnet ** ifpp)3209 vxlan_clone_create(struct if_clone *ifc, char *name, size_t len,
3210 struct ifc_data *ifd, struct ifnet **ifpp)
3211 {
3212 struct vxlan_softc *sc;
3213 struct ifnet *ifp;
3214 struct ifvxlanparam vxlp;
3215 int error;
3216
3217 sc = malloc(sizeof(struct vxlan_softc), M_VXLAN, M_WAITOK | M_ZERO);
3218 sc->vxl_unit = ifd->unit;
3219 sc->vxl_fibnum = curthread->td_proc->p_fibnum;
3220 vxlan_set_default_config(sc);
3221
3222 if (ifd->params != NULL) {
3223 error = ifc_copyin(ifd, &vxlp, sizeof(vxlp));
3224 if (error)
3225 goto fail;
3226
3227 error = vxlan_set_user_config(sc, &vxlp);
3228 if (error)
3229 goto fail;
3230 }
3231
3232 vxlan_stats_alloc(sc);
3233 ifp = if_alloc(IFT_ETHER);
3234 sc->vxl_ifp = ifp;
3235 rm_init(&sc->vxl_lock, "vxlanrm");
3236 callout_init_rw(&sc->vxl_callout, &sc->vxl_lock, 0);
3237 sc->vxl_port_hash_key = arc4random();
3238 vxlan_ftable_init(sc);
3239
3240 vxlan_sysctl_setup(sc);
3241
3242 ifp->if_softc = sc;
3243 if_initname(ifp, vxlan_name, ifd->unit);
3244 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3245 ifp->if_init = vxlan_init;
3246 ifp->if_ioctl = vxlan_ioctl;
3247 ifp->if_transmit = vxlan_transmit;
3248 ifp->if_qflush = vxlan_qflush;
3249 ifp->if_capabilities = VXLAN_BASIC_IFCAPS;
3250 ifp->if_capenable = VXLAN_BASIC_IFCAPS;
3251 sc->vxl_reqcap = -1;
3252 vxlan_set_hwcaps(sc);
3253
3254 ifmedia_init(&sc->vxl_media, 0, vxlan_media_change, vxlan_media_status);
3255 ifmedia_add(&sc->vxl_media, IFM_ETHER | IFM_AUTO, 0, NULL);
3256 ifmedia_set(&sc->vxl_media, IFM_ETHER | IFM_AUTO);
3257
3258 ether_gen_addr(ifp, &sc->vxl_hwaddr);
3259 ether_ifattach(ifp, sc->vxl_hwaddr.octet);
3260
3261 ifp->if_baudrate = 0;
3262
3263 VXLAN_WLOCK(sc);
3264 vxlan_setup_interface_hdrlen(sc);
3265 VXLAN_WUNLOCK(sc);
3266 *ifpp = ifp;
3267
3268 return (0);
3269
3270 fail:
3271 free(sc, M_VXLAN);
3272 return (error);
3273 }
3274
3275 static int
vxlan_clone_destroy(struct if_clone * ifc,struct ifnet * ifp,uint32_t flags)3276 vxlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags)
3277 {
3278 struct vxlan_softc *sc;
3279
3280 sc = ifp->if_softc;
3281
3282 vxlan_teardown(sc);
3283
3284 vxlan_ftable_flush(sc, 1);
3285
3286 ether_ifdetach(ifp);
3287 if_free(ifp);
3288 ifmedia_removeall(&sc->vxl_media);
3289
3290 vxlan_ftable_fini(sc);
3291
3292 vxlan_sysctl_destroy(sc);
3293 rm_destroy(&sc->vxl_lock);
3294 vxlan_stats_free(sc);
3295 free(sc, M_VXLAN);
3296
3297 return (0);
3298 }
3299
3300 /* BMV: Taken from if_bridge. */
3301 static uint32_t
vxlan_mac_hash(struct vxlan_softc * sc,const uint8_t * addr)3302 vxlan_mac_hash(struct vxlan_softc *sc, const uint8_t *addr)
3303 {
3304 uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->vxl_ftable_hash_key;
3305
3306 b += addr[5] << 8;
3307 b += addr[4];
3308 a += addr[3] << 24;
3309 a += addr[2] << 16;
3310 a += addr[1] << 8;
3311 a += addr[0];
3312
3313 /*
3314 * The following hash function is adapted from "Hash Functions" by Bob Jenkins
3315 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
3316 */
3317 #define mix(a, b, c) \
3318 do { \
3319 a -= b; a -= c; a ^= (c >> 13); \
3320 b -= c; b -= a; b ^= (a << 8); \
3321 c -= a; c -= b; c ^= (b >> 13); \
3322 a -= b; a -= c; a ^= (c >> 12); \
3323 b -= c; b -= a; b ^= (a << 16); \
3324 c -= a; c -= b; c ^= (b >> 5); \
3325 a -= b; a -= c; a ^= (c >> 3); \
3326 b -= c; b -= a; b ^= (a << 10); \
3327 c -= a; c -= b; c ^= (b >> 15); \
3328 } while (0)
3329
3330 mix(a, b, c);
3331
3332 #undef mix
3333
3334 return (c);
3335 }
3336
3337 static int
vxlan_media_change(struct ifnet * ifp)3338 vxlan_media_change(struct ifnet *ifp)
3339 {
3340
3341 /* Ignore. */
3342 return (0);
3343 }
3344
3345 static void
vxlan_media_status(struct ifnet * ifp,struct ifmediareq * ifmr)3346 vxlan_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3347 {
3348
3349 ifmr->ifm_status = IFM_ACTIVE | IFM_AVALID;
3350 ifmr->ifm_active = IFM_ETHER | IFM_FDX;
3351 }
3352
3353 static int
vxlan_sockaddr_cmp(const union vxlan_sockaddr * vxladdr,const struct sockaddr * sa)3354 vxlan_sockaddr_cmp(const union vxlan_sockaddr *vxladdr,
3355 const struct sockaddr *sa)
3356 {
3357
3358 return (bcmp(&vxladdr->sa, sa, vxladdr->sa.sa_len));
3359 }
3360
3361 static void
vxlan_sockaddr_copy(union vxlan_sockaddr * vxladdr,const struct sockaddr * sa)3362 vxlan_sockaddr_copy(union vxlan_sockaddr *vxladdr,
3363 const struct sockaddr *sa)
3364 {
3365
3366 MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
3367 bzero(vxladdr, sizeof(*vxladdr));
3368
3369 if (sa->sa_family == AF_INET) {
3370 vxladdr->in4 = *satoconstsin(sa);
3371 vxladdr->in4.sin_len = sizeof(struct sockaddr_in);
3372 } else if (sa->sa_family == AF_INET6) {
3373 vxladdr->in6 = *satoconstsin6(sa);
3374 vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6);
3375 }
3376 }
3377
3378 static int
vxlan_sockaddr_in_equal(const union vxlan_sockaddr * vxladdr,const struct sockaddr * sa)3379 vxlan_sockaddr_in_equal(const union vxlan_sockaddr *vxladdr,
3380 const struct sockaddr *sa)
3381 {
3382 int equal;
3383
3384 if (sa->sa_family == AF_INET) {
3385 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
3386 equal = in4->s_addr == vxladdr->in4.sin_addr.s_addr;
3387 } else if (sa->sa_family == AF_INET6) {
3388 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
3389 equal = IN6_ARE_ADDR_EQUAL(in6, &vxladdr->in6.sin6_addr);
3390 } else
3391 equal = 0;
3392
3393 return (equal);
3394 }
3395
3396 static void
vxlan_sockaddr_in_copy(union vxlan_sockaddr * vxladdr,const struct sockaddr * sa)3397 vxlan_sockaddr_in_copy(union vxlan_sockaddr *vxladdr,
3398 const struct sockaddr *sa)
3399 {
3400
3401 MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
3402
3403 if (sa->sa_family == AF_INET) {
3404 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
3405 vxladdr->in4.sin_family = AF_INET;
3406 vxladdr->in4.sin_len = sizeof(struct sockaddr_in);
3407 vxladdr->in4.sin_addr = *in4;
3408 } else if (sa->sa_family == AF_INET6) {
3409 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
3410 vxladdr->in6.sin6_family = AF_INET6;
3411 vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6);
3412 vxladdr->in6.sin6_addr = *in6;
3413 }
3414 }
3415
3416 static int
vxlan_sockaddr_supported(const union vxlan_sockaddr * vxladdr,int unspec)3417 vxlan_sockaddr_supported(const union vxlan_sockaddr *vxladdr, int unspec)
3418 {
3419 const struct sockaddr *sa;
3420 int supported;
3421
3422 sa = &vxladdr->sa;
3423 supported = 0;
3424
3425 if (sa->sa_family == AF_UNSPEC && unspec != 0) {
3426 supported = 1;
3427 } else if (sa->sa_family == AF_INET) {
3428 #ifdef INET
3429 supported = 1;
3430 #endif
3431 } else if (sa->sa_family == AF_INET6) {
3432 #ifdef INET6
3433 supported = 1;
3434 #endif
3435 }
3436
3437 return (supported);
3438 }
3439
3440 static int
vxlan_sockaddr_in_any(const union vxlan_sockaddr * vxladdr)3441 vxlan_sockaddr_in_any(const union vxlan_sockaddr *vxladdr)
3442 {
3443 const struct sockaddr *sa;
3444 int any;
3445
3446 sa = &vxladdr->sa;
3447
3448 if (sa->sa_family == AF_INET) {
3449 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
3450 any = in4->s_addr == INADDR_ANY;
3451 } else if (sa->sa_family == AF_INET6) {
3452 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
3453 any = IN6_IS_ADDR_UNSPECIFIED(in6);
3454 } else
3455 any = -1;
3456
3457 return (any);
3458 }
3459
3460 static int
vxlan_sockaddr_in_multicast(const union vxlan_sockaddr * vxladdr)3461 vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *vxladdr)
3462 {
3463 const struct sockaddr *sa;
3464 int mc;
3465
3466 sa = &vxladdr->sa;
3467
3468 if (sa->sa_family == AF_INET) {
3469 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
3470 mc = IN_MULTICAST(ntohl(in4->s_addr));
3471 } else if (sa->sa_family == AF_INET6) {
3472 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
3473 mc = IN6_IS_ADDR_MULTICAST(in6);
3474 } else
3475 mc = -1;
3476
3477 return (mc);
3478 }
3479
3480 static int
vxlan_sockaddr_in6_embedscope(union vxlan_sockaddr * vxladdr)3481 vxlan_sockaddr_in6_embedscope(union vxlan_sockaddr *vxladdr)
3482 {
3483 int error;
3484
3485 MPASS(VXLAN_SOCKADDR_IS_IPV6(vxladdr));
3486 #ifdef INET6
3487 error = sa6_embedscope(&vxladdr->in6, V_ip6_use_defzone);
3488 #else
3489 error = EAFNOSUPPORT;
3490 #endif
3491
3492 return (error);
3493 }
3494
3495 static int
vxlan_can_change_config(struct vxlan_softc * sc)3496 vxlan_can_change_config(struct vxlan_softc *sc)
3497 {
3498 struct ifnet *ifp;
3499
3500 ifp = sc->vxl_ifp;
3501 VXLAN_LOCK_ASSERT(sc);
3502
3503 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3504 return (0);
3505 if (sc->vxl_flags & (VXLAN_FLAG_INIT | VXLAN_FLAG_TEARDOWN))
3506 return (0);
3507
3508 return (1);
3509 }
3510
3511 static int
vxlan_check_vni(uint32_t vni)3512 vxlan_check_vni(uint32_t vni)
3513 {
3514
3515 return (vni >= VXLAN_VNI_MAX);
3516 }
3517
3518 static int
vxlan_check_ttl(int ttl)3519 vxlan_check_ttl(int ttl)
3520 {
3521
3522 return (ttl > MAXTTL);
3523 }
3524
3525 static int
vxlan_check_ftable_timeout(uint32_t timeout)3526 vxlan_check_ftable_timeout(uint32_t timeout)
3527 {
3528
3529 return (timeout > VXLAN_FTABLE_MAX_TIMEOUT);
3530 }
3531
3532 static int
vxlan_check_ftable_max(uint32_t max)3533 vxlan_check_ftable_max(uint32_t max)
3534 {
3535
3536 return (max > VXLAN_FTABLE_MAX);
3537 }
3538
3539 static void
vxlan_sysctl_setup(struct vxlan_softc * sc)3540 vxlan_sysctl_setup(struct vxlan_softc *sc)
3541 {
3542 struct sysctl_ctx_list *ctx;
3543 struct sysctl_oid *node;
3544 struct vxlan_statistics *stats;
3545 char namebuf[8];
3546
3547 ctx = &sc->vxl_sysctl_ctx;
3548 stats = &sc->vxl_stats;
3549 snprintf(namebuf, sizeof(namebuf), "%d", sc->vxl_unit);
3550
3551 sysctl_ctx_init(ctx);
3552 sc->vxl_sysctl_node = SYSCTL_ADD_NODE(ctx,
3553 SYSCTL_STATIC_CHILDREN(_net_link_vxlan), OID_AUTO, namebuf,
3554 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
3555
3556 node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node),
3557 OID_AUTO, "ftable", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
3558 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "count",
3559 CTLFLAG_RD, &sc->vxl_ftable_cnt, 0,
3560 "Number of entries in forwarding table");
3561 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "max",
3562 CTLFLAG_RD, &sc->vxl_ftable_max, 0,
3563 "Maximum number of entries allowed in forwarding table");
3564 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "timeout",
3565 CTLFLAG_RD, &sc->vxl_ftable_timeout, 0,
3566 "Number of seconds between prunes of the forwarding table");
3567 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "dump",
3568 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_SKIP,
3569 sc, 0, vxlan_ftable_sysctl_dump, "A",
3570 "Dump the forwarding table entries");
3571
3572 node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node),
3573 OID_AUTO, "stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
3574 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
3575 "ftable_nospace", CTLFLAG_RD, &stats->ftable_nospace, 0,
3576 "Fowarding table reached maximum entries");
3577 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
3578 "ftable_lock_upgrade_failed", CTLFLAG_RD,
3579 &stats->ftable_lock_upgrade_failed, 0,
3580 "Forwarding table update required lock upgrade");
3581
3582 SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "txcsum",
3583 CTLFLAG_RD, &stats->txcsum,
3584 "# of times hardware assisted with tx checksum");
3585 SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "tso",
3586 CTLFLAG_RD, &stats->tso, "# of times hardware assisted with TSO");
3587 SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "rxcsum",
3588 CTLFLAG_RD, &stats->rxcsum,
3589 "# of times hardware assisted with rx checksum");
3590 }
3591
3592 static void
vxlan_sysctl_destroy(struct vxlan_softc * sc)3593 vxlan_sysctl_destroy(struct vxlan_softc *sc)
3594 {
3595
3596 sysctl_ctx_free(&sc->vxl_sysctl_ctx);
3597 sc->vxl_sysctl_node = NULL;
3598 }
3599
3600 static int
vxlan_tunable_int(struct vxlan_softc * sc,const char * knob,int def)3601 vxlan_tunable_int(struct vxlan_softc *sc, const char *knob, int def)
3602 {
3603 char path[64];
3604
3605 snprintf(path, sizeof(path), "net.link.vxlan.%d.%s",
3606 sc->vxl_unit, knob);
3607 TUNABLE_INT_FETCH(path, &def);
3608
3609 return (def);
3610 }
3611
3612 static void
vxlan_ifdetach_event(void * arg __unused,struct ifnet * ifp)3613 vxlan_ifdetach_event(void *arg __unused, struct ifnet *ifp)
3614 {
3615 struct vxlan_softc_head list = LIST_HEAD_INITIALIZER(list);
3616 struct vxlan_socket *vso;
3617 struct vxlan_softc *sc, *tsc;
3618
3619 if (ifp->if_flags & IFF_RENAMING)
3620 return;
3621 if ((ifp->if_flags & IFF_MULTICAST) == 0)
3622 return;
3623
3624 VXLAN_LIST_LOCK();
3625 LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry)
3626 vxlan_socket_ifdetach(vso, ifp, &list);
3627 VXLAN_LIST_UNLOCK();
3628
3629 LIST_FOREACH_SAFE(sc, &list, vxl_ifdetach_list, tsc) {
3630 LIST_REMOVE(sc, vxl_ifdetach_list);
3631
3632 sx_xlock(&vxlan_sx);
3633 VXLAN_WLOCK(sc);
3634 if (sc->vxl_flags & VXLAN_FLAG_INIT)
3635 vxlan_init_wait(sc);
3636 vxlan_teardown_locked(sc);
3637 sx_xunlock(&vxlan_sx);
3638 }
3639 }
3640
3641 static void
vxlan_load(void)3642 vxlan_load(void)
3643 {
3644
3645 mtx_init(&vxlan_list_mtx, "vxlan list", NULL, MTX_DEF);
3646 vxlan_ifdetach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
3647 vxlan_ifdetach_event, NULL, EVENTHANDLER_PRI_ANY);
3648
3649 struct if_clone_addreq req = {
3650 .create_f = vxlan_clone_create,
3651 .destroy_f = vxlan_clone_destroy,
3652 .flags = IFC_F_AUTOUNIT,
3653 };
3654 vxlan_cloner = ifc_attach_cloner(vxlan_name, &req);
3655 }
3656
3657 static void
vxlan_unload(void)3658 vxlan_unload(void)
3659 {
3660
3661 EVENTHANDLER_DEREGISTER(ifnet_departure_event,
3662 vxlan_ifdetach_event_tag);
3663 ifc_detach_cloner(vxlan_cloner);
3664 mtx_destroy(&vxlan_list_mtx);
3665 MPASS(LIST_EMPTY(&vxlan_socket_list));
3666 }
3667
3668 static int
vxlan_modevent(module_t mod,int type,void * unused)3669 vxlan_modevent(module_t mod, int type, void *unused)
3670 {
3671 int error;
3672
3673 error = 0;
3674
3675 switch (type) {
3676 case MOD_LOAD:
3677 vxlan_load();
3678 break;
3679 case MOD_UNLOAD:
3680 vxlan_unload();
3681 break;
3682 default:
3683 error = ENOTSUP;
3684 break;
3685 }
3686
3687 return (error);
3688 }
3689
3690 static moduledata_t vxlan_mod = {
3691 "if_vxlan",
3692 vxlan_modevent,
3693 0
3694 };
3695
3696 DECLARE_MODULE(if_vxlan, vxlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
3697 MODULE_VERSION(if_vxlan, 1);
3698