1 /*- 2 * Copyright (c) 2014, Bryan Venteicher <bryanv@FreeBSD.org> 3 * All rights reserved. 4 * Copyright (c) 2020, Chelsio Communications. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice unmodified, this list of conditions, and the following 11 * disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include "opt_inet.h" 29 #include "opt_inet6.h" 30 31 #include <sys/param.h> 32 #include <sys/eventhandler.h> 33 #include <sys/kernel.h> 34 #include <sys/lock.h> 35 #include <sys/hash.h> 36 #include <sys/malloc.h> 37 #include <sys/mbuf.h> 38 #include <sys/module.h> 39 #include <sys/refcount.h> 40 #include <sys/rmlock.h> 41 #include <sys/priv.h> 42 #include <sys/proc.h> 43 #include <sys/queue.h> 44 #include <sys/sbuf.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/sockio.h> 48 #include <sys/sysctl.h> 49 #include <sys/systm.h> 50 51 #include <net/bpf.h> 52 #include <net/ethernet.h> 53 #include <net/if.h> 54 #include <net/if_var.h> 55 #include <net/if_private.h> 56 #include <net/if_clone.h> 57 #include <net/if_dl.h> 58 #include <net/if_media.h> 59 #include <net/if_types.h> 60 #include <net/if_vxlan.h> 61 #include <net/netisr.h> 62 #include <net/route.h> 63 #include <net/route/nhop.h> 64 65 #include <netinet/in.h> 66 #include <netinet/in_systm.h> 67 #include <netinet/in_var.h> 68 #include <netinet/in_pcb.h> 69 #include <netinet/ip.h> 70 #include <netinet/ip6.h> 71 #include <netinet/ip_var.h> 72 #include <netinet/udp.h> 73 #include <netinet/udp_var.h> 74 #include <netinet/in_fib.h> 75 #include <netinet6/in6_fib.h> 76 77 #include <netinet6/ip6_var.h> 78 #include <netinet6/scope6_var.h> 79 80 struct vxlan_softc; 81 LIST_HEAD(vxlan_softc_head, vxlan_softc); 82 83 struct sx vxlan_sx; 84 SX_SYSINIT(vxlan, &vxlan_sx, "VXLAN global start/stop lock"); 85 86 struct vxlan_socket_mc_info { 87 union vxlan_sockaddr vxlsomc_saddr; 88 union vxlan_sockaddr vxlsomc_gaddr; 89 int vxlsomc_ifidx; 90 int vxlsomc_users; 91 }; 92 93 /* 94 * The maximum MTU of encapsulated ethernet frame within IPv4/UDP packet. 95 */ 96 #define VXLAN_MAX_MTU (IP_MAXPACKET - \ 97 60 /* Maximum IPv4 header len */ - \ 98 sizeof(struct udphdr) - \ 99 sizeof(struct vxlan_header) - \ 100 ETHER_HDR_LEN - ETHER_VLAN_ENCAP_LEN) 101 #define VXLAN_BASIC_IFCAPS (IFCAP_LINKSTATE | IFCAP_JUMBO_MTU) 102 103 #define VXLAN_SO_MC_MAX_GROUPS 32 104 105 #define VXLAN_SO_VNI_HASH_SHIFT 6 106 #define VXLAN_SO_VNI_HASH_SIZE (1 << VXLAN_SO_VNI_HASH_SHIFT) 107 #define VXLAN_SO_VNI_HASH(_vni) ((_vni) % VXLAN_SO_VNI_HASH_SIZE) 108 109 struct vxlan_socket { 110 struct socket *vxlso_sock; 111 struct rmlock vxlso_lock; 112 u_int vxlso_refcnt; 113 union vxlan_sockaddr vxlso_laddr; 114 LIST_ENTRY(vxlan_socket) vxlso_entry; 115 struct vxlan_softc_head vxlso_vni_hash[VXLAN_SO_VNI_HASH_SIZE]; 116 struct vxlan_socket_mc_info vxlso_mc[VXLAN_SO_MC_MAX_GROUPS]; 117 }; 118 119 #define VXLAN_SO_RLOCK(_vso, _p) rm_rlock(&(_vso)->vxlso_lock, (_p)) 120 #define VXLAN_SO_RUNLOCK(_vso, _p) rm_runlock(&(_vso)->vxlso_lock, (_p)) 121 #define VXLAN_SO_WLOCK(_vso) rm_wlock(&(_vso)->vxlso_lock) 122 #define VXLAN_SO_WUNLOCK(_vso) rm_wunlock(&(_vso)->vxlso_lock) 123 #define VXLAN_SO_LOCK_ASSERT(_vso) \ 124 rm_assert(&(_vso)->vxlso_lock, RA_LOCKED) 125 #define VXLAN_SO_LOCK_WASSERT(_vso) \ 126 rm_assert(&(_vso)->vxlso_lock, RA_WLOCKED) 127 128 #define VXLAN_SO_ACQUIRE(_vso) refcount_acquire(&(_vso)->vxlso_refcnt) 129 #define VXLAN_SO_RELEASE(_vso) refcount_release(&(_vso)->vxlso_refcnt) 130 131 struct vxlan_ftable_entry { 132 LIST_ENTRY(vxlan_ftable_entry) vxlfe_hash; 133 uint16_t vxlfe_flags; 134 uint8_t vxlfe_mac[ETHER_ADDR_LEN]; 135 union vxlan_sockaddr vxlfe_raddr; 136 time_t vxlfe_expire; 137 }; 138 139 #define VXLAN_FE_FLAG_DYNAMIC 0x01 140 #define VXLAN_FE_FLAG_STATIC 0x02 141 142 #define VXLAN_FE_IS_DYNAMIC(_fe) \ 143 ((_fe)->vxlfe_flags & VXLAN_FE_FLAG_DYNAMIC) 144 145 #define VXLAN_SC_FTABLE_SHIFT 9 146 #define VXLAN_SC_FTABLE_SIZE (1 << VXLAN_SC_FTABLE_SHIFT) 147 #define VXLAN_SC_FTABLE_MASK (VXLAN_SC_FTABLE_SIZE - 1) 148 #define VXLAN_SC_FTABLE_HASH(_sc, _mac) \ 149 (vxlan_mac_hash(_sc, _mac) % VXLAN_SC_FTABLE_SIZE) 150 151 LIST_HEAD(vxlan_ftable_head, vxlan_ftable_entry); 152 153 struct vxlan_statistics { 154 uint32_t ftable_nospace; 155 uint32_t ftable_lock_upgrade_failed; 156 counter_u64_t txcsum; 157 counter_u64_t tso; 158 counter_u64_t rxcsum; 159 }; 160 161 struct vxlan_softc { 162 struct ifnet *vxl_ifp; 163 int vxl_reqcap; 164 u_int vxl_fibnum; 165 struct vxlan_socket *vxl_sock; 166 uint32_t vxl_vni; 167 union vxlan_sockaddr vxl_src_addr; 168 union vxlan_sockaddr vxl_dst_addr; 169 uint32_t vxl_flags; 170 #define VXLAN_FLAG_INIT 0x0001 171 #define VXLAN_FLAG_TEARDOWN 0x0002 172 #define VXLAN_FLAG_LEARN 0x0004 173 #define VXLAN_FLAG_USER_MTU 0x0008 174 175 uint32_t vxl_port_hash_key; 176 uint16_t vxl_min_port; 177 uint16_t vxl_max_port; 178 uint8_t vxl_ttl; 179 180 /* Lookup table from MAC address to forwarding entry. */ 181 uint32_t vxl_ftable_cnt; 182 uint32_t vxl_ftable_max; 183 uint32_t vxl_ftable_timeout; 184 uint32_t vxl_ftable_hash_key; 185 struct vxlan_ftable_head *vxl_ftable; 186 187 /* Derived from vxl_dst_addr. */ 188 struct vxlan_ftable_entry vxl_default_fe; 189 190 struct ip_moptions *vxl_im4o; 191 struct ip6_moptions *vxl_im6o; 192 193 struct rmlock vxl_lock; 194 volatile u_int vxl_refcnt; 195 196 int vxl_unit; 197 int vxl_vso_mc_index; 198 struct vxlan_statistics vxl_stats; 199 struct sysctl_oid *vxl_sysctl_node; 200 struct sysctl_ctx_list vxl_sysctl_ctx; 201 struct callout vxl_callout; 202 struct ether_addr vxl_hwaddr; 203 int vxl_mc_ifindex; 204 struct ifnet *vxl_mc_ifp; 205 struct ifmedia vxl_media; 206 char vxl_mc_ifname[IFNAMSIZ]; 207 LIST_ENTRY(vxlan_softc) vxl_entry; 208 LIST_ENTRY(vxlan_softc) vxl_ifdetach_list; 209 210 /* For rate limiting errors on the tx fast path. */ 211 struct timeval err_time; 212 int err_pps; 213 }; 214 215 #define VXLAN_RLOCK(_sc, _p) rm_rlock(&(_sc)->vxl_lock, (_p)) 216 #define VXLAN_RUNLOCK(_sc, _p) rm_runlock(&(_sc)->vxl_lock, (_p)) 217 #define VXLAN_WLOCK(_sc) rm_wlock(&(_sc)->vxl_lock) 218 #define VXLAN_WUNLOCK(_sc) rm_wunlock(&(_sc)->vxl_lock) 219 #define VXLAN_LOCK_WOWNED(_sc) rm_wowned(&(_sc)->vxl_lock) 220 #define VXLAN_LOCK_ASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_LOCKED) 221 #define VXLAN_LOCK_WASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_WLOCKED) 222 #define VXLAN_UNLOCK(_sc, _p) do { \ 223 if (VXLAN_LOCK_WOWNED(_sc)) \ 224 VXLAN_WUNLOCK(_sc); \ 225 else \ 226 VXLAN_RUNLOCK(_sc, _p); \ 227 } while (0) 228 229 #define VXLAN_ACQUIRE(_sc) refcount_acquire(&(_sc)->vxl_refcnt) 230 #define VXLAN_RELEASE(_sc) refcount_release(&(_sc)->vxl_refcnt) 231 232 #define satoconstsin(sa) ((const struct sockaddr_in *)(sa)) 233 #define satoconstsin6(sa) ((const struct sockaddr_in6 *)(sa)) 234 235 struct vxlanudphdr { 236 struct udphdr vxlh_udp; 237 struct vxlan_header vxlh_hdr; 238 } __packed; 239 240 static int vxlan_ftable_addr_cmp(const uint8_t *, const uint8_t *); 241 static void vxlan_ftable_init(struct vxlan_softc *); 242 static void vxlan_ftable_fini(struct vxlan_softc *); 243 static void vxlan_ftable_flush(struct vxlan_softc *, int); 244 static void vxlan_ftable_expire(struct vxlan_softc *); 245 static int vxlan_ftable_update_locked(struct vxlan_softc *, 246 const union vxlan_sockaddr *, const uint8_t *, 247 struct rm_priotracker *); 248 static int vxlan_ftable_learn(struct vxlan_softc *, 249 const struct sockaddr *, const uint8_t *); 250 static int vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS); 251 252 static struct vxlan_ftable_entry * 253 vxlan_ftable_entry_alloc(void); 254 static void vxlan_ftable_entry_free(struct vxlan_ftable_entry *); 255 static void vxlan_ftable_entry_init(struct vxlan_softc *, 256 struct vxlan_ftable_entry *, const uint8_t *, 257 const struct sockaddr *, uint32_t); 258 static void vxlan_ftable_entry_destroy(struct vxlan_softc *, 259 struct vxlan_ftable_entry *); 260 static int vxlan_ftable_entry_insert(struct vxlan_softc *, 261 struct vxlan_ftable_entry *); 262 static struct vxlan_ftable_entry * 263 vxlan_ftable_entry_lookup(struct vxlan_softc *, 264 const uint8_t *); 265 static void vxlan_ftable_entry_dump(struct vxlan_ftable_entry *, 266 struct sbuf *); 267 268 static struct vxlan_socket * 269 vxlan_socket_alloc(const union vxlan_sockaddr *); 270 static void vxlan_socket_destroy(struct vxlan_socket *); 271 static void vxlan_socket_release(struct vxlan_socket *); 272 static struct vxlan_socket * 273 vxlan_socket_lookup(union vxlan_sockaddr *vxlsa); 274 static void vxlan_socket_insert(struct vxlan_socket *); 275 static int vxlan_socket_init(struct vxlan_socket *, struct ifnet *); 276 static int vxlan_socket_bind(struct vxlan_socket *, struct ifnet *); 277 static int vxlan_socket_create(struct ifnet *, int, 278 const union vxlan_sockaddr *, struct vxlan_socket **); 279 static void vxlan_socket_ifdetach(struct vxlan_socket *, 280 struct ifnet *, struct vxlan_softc_head *); 281 282 static struct vxlan_socket * 283 vxlan_socket_mc_lookup(const union vxlan_sockaddr *); 284 static int vxlan_sockaddr_mc_info_match( 285 const struct vxlan_socket_mc_info *, 286 const union vxlan_sockaddr *, 287 const union vxlan_sockaddr *, int); 288 static int vxlan_socket_mc_join_group(struct vxlan_socket *, 289 const union vxlan_sockaddr *, const union vxlan_sockaddr *, 290 int *, union vxlan_sockaddr *); 291 static int vxlan_socket_mc_leave_group(struct vxlan_socket *, 292 const union vxlan_sockaddr *, 293 const union vxlan_sockaddr *, int); 294 static int vxlan_socket_mc_add_group(struct vxlan_socket *, 295 const union vxlan_sockaddr *, const union vxlan_sockaddr *, 296 int, int *); 297 static void vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *, 298 int); 299 300 static struct vxlan_softc * 301 vxlan_socket_lookup_softc_locked(struct vxlan_socket *, 302 uint32_t); 303 static struct vxlan_softc * 304 vxlan_socket_lookup_softc(struct vxlan_socket *, uint32_t); 305 static int vxlan_socket_insert_softc(struct vxlan_socket *, 306 struct vxlan_softc *); 307 static void vxlan_socket_remove_softc(struct vxlan_socket *, 308 struct vxlan_softc *); 309 310 static struct ifnet * 311 vxlan_multicast_if_ref(struct vxlan_softc *, int); 312 static void vxlan_free_multicast(struct vxlan_softc *); 313 static int vxlan_setup_multicast_interface(struct vxlan_softc *); 314 315 static int vxlan_setup_multicast(struct vxlan_softc *); 316 static int vxlan_setup_socket(struct vxlan_softc *); 317 #ifdef INET6 318 static void vxlan_setup_zero_checksum_port(struct vxlan_softc *); 319 #endif 320 static void vxlan_setup_interface_hdrlen(struct vxlan_softc *); 321 static int vxlan_valid_init_config(struct vxlan_softc *); 322 static void vxlan_init_wait(struct vxlan_softc *); 323 static void vxlan_init_complete(struct vxlan_softc *); 324 static void vxlan_init(void *); 325 static void vxlan_release(struct vxlan_softc *); 326 static void vxlan_teardown_wait(struct vxlan_softc *); 327 static void vxlan_teardown_complete(struct vxlan_softc *); 328 static void vxlan_teardown_locked(struct vxlan_softc *); 329 static void vxlan_teardown(struct vxlan_softc *); 330 static void vxlan_ifdetach(struct vxlan_softc *, struct ifnet *, 331 struct vxlan_softc_head *); 332 static void vxlan_timer(void *); 333 334 static int vxlan_ctrl_get_config(struct vxlan_softc *, void *); 335 static int vxlan_ctrl_set_vni(struct vxlan_softc *, void *); 336 static int vxlan_ctrl_set_local_addr(struct vxlan_softc *, void *); 337 static int vxlan_ctrl_set_remote_addr(struct vxlan_softc *, void *); 338 static int vxlan_ctrl_set_local_port(struct vxlan_softc *, void *); 339 static int vxlan_ctrl_set_remote_port(struct vxlan_softc *, void *); 340 static int vxlan_ctrl_set_port_range(struct vxlan_softc *, void *); 341 static int vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *, void *); 342 static int vxlan_ctrl_set_ftable_max(struct vxlan_softc *, void *); 343 static int vxlan_ctrl_set_multicast_if(struct vxlan_softc * , void *); 344 static int vxlan_ctrl_set_ttl(struct vxlan_softc *, void *); 345 static int vxlan_ctrl_set_learn(struct vxlan_softc *, void *); 346 static int vxlan_ctrl_ftable_entry_add(struct vxlan_softc *, void *); 347 static int vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *, void *); 348 static int vxlan_ctrl_flush(struct vxlan_softc *, void *); 349 static int vxlan_ioctl_drvspec(struct vxlan_softc *, 350 struct ifdrv *, int); 351 static int vxlan_ioctl_ifflags(struct vxlan_softc *); 352 static int vxlan_ioctl(struct ifnet *, u_long, caddr_t); 353 354 #if defined(INET) || defined(INET6) 355 static uint16_t vxlan_pick_source_port(struct vxlan_softc *, struct mbuf *); 356 static void vxlan_encap_header(struct vxlan_softc *, struct mbuf *, 357 int, uint16_t, uint16_t); 358 #endif 359 static int vxlan_encap4(struct vxlan_softc *, 360 const union vxlan_sockaddr *, struct mbuf *); 361 static int vxlan_encap6(struct vxlan_softc *, 362 const union vxlan_sockaddr *, struct mbuf *); 363 static int vxlan_transmit(struct ifnet *, struct mbuf *); 364 static void vxlan_qflush(struct ifnet *); 365 static bool vxlan_rcv_udp_packet(struct mbuf *, int, struct inpcb *, 366 const struct sockaddr *, void *); 367 static int vxlan_input(struct vxlan_socket *, uint32_t, struct mbuf **, 368 const struct sockaddr *); 369 370 static void vxlan_stats_alloc(struct vxlan_softc *); 371 static void vxlan_stats_free(struct vxlan_softc *); 372 static void vxlan_set_default_config(struct vxlan_softc *); 373 static int vxlan_set_user_config(struct vxlan_softc *, 374 struct ifvxlanparam *); 375 static int vxlan_set_reqcap(struct vxlan_softc *, struct ifnet *, int); 376 static void vxlan_set_hwcaps(struct vxlan_softc *); 377 static int vxlan_clone_create(struct if_clone *, char *, size_t, 378 struct ifc_data *, struct ifnet **); 379 static int vxlan_clone_destroy(struct if_clone *, struct ifnet *, uint32_t); 380 381 static uint32_t vxlan_mac_hash(struct vxlan_softc *, const uint8_t *); 382 static int vxlan_media_change(struct ifnet *); 383 static void vxlan_media_status(struct ifnet *, struct ifmediareq *); 384 385 static int vxlan_sockaddr_cmp(const union vxlan_sockaddr *, 386 const struct sockaddr *); 387 static void vxlan_sockaddr_copy(union vxlan_sockaddr *, 388 const struct sockaddr *); 389 static int vxlan_sockaddr_in_equal(const union vxlan_sockaddr *, 390 const struct sockaddr *); 391 static void vxlan_sockaddr_in_copy(union vxlan_sockaddr *, 392 const struct sockaddr *); 393 static int vxlan_sockaddr_supported(const union vxlan_sockaddr *, int); 394 static int vxlan_sockaddr_in_any(const union vxlan_sockaddr *); 395 static int vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *); 396 static int vxlan_sockaddr_in6_embedscope(union vxlan_sockaddr *); 397 398 static int vxlan_can_change_config(struct vxlan_softc *); 399 static int vxlan_check_vni(uint32_t); 400 static int vxlan_check_ttl(int); 401 static int vxlan_check_ftable_timeout(uint32_t); 402 static int vxlan_check_ftable_max(uint32_t); 403 404 static void vxlan_sysctl_setup(struct vxlan_softc *); 405 static void vxlan_sysctl_destroy(struct vxlan_softc *); 406 static int vxlan_tunable_int(struct vxlan_softc *, const char *, int); 407 408 static void vxlan_ifdetach_event(void *, struct ifnet *); 409 static void vxlan_load(void); 410 static void vxlan_unload(void); 411 static int vxlan_modevent(module_t, int, void *); 412 413 static const char vxlan_name[] = "vxlan"; 414 static MALLOC_DEFINE(M_VXLAN, vxlan_name, 415 "Virtual eXtensible LAN Interface"); 416 static struct if_clone *vxlan_cloner; 417 418 static struct mtx vxlan_list_mtx; 419 #define VXLAN_LIST_LOCK() mtx_lock(&vxlan_list_mtx) 420 #define VXLAN_LIST_UNLOCK() mtx_unlock(&vxlan_list_mtx) 421 422 static LIST_HEAD(, vxlan_socket) vxlan_socket_list; 423 424 static eventhandler_tag vxlan_ifdetach_event_tag; 425 426 SYSCTL_DECL(_net_link); 427 SYSCTL_NODE(_net_link, OID_AUTO, vxlan, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 428 "Virtual eXtensible Local Area Network"); 429 430 static int vxlan_legacy_port = 0; 431 TUNABLE_INT("net.link.vxlan.legacy_port", &vxlan_legacy_port); 432 static int vxlan_reuse_port = 0; 433 TUNABLE_INT("net.link.vxlan.reuse_port", &vxlan_reuse_port); 434 435 /* 436 * This macro controls the default upper limitation on nesting of vxlan 437 * tunnels. By default it is 3, as the overhead of IPv6 vxlan tunnel is 70 438 * bytes, this will create at most 210 bytes overhead and the most inner 439 * tunnel's MTU will be 1290 which will meet IPv6 minimum MTU size 1280. 440 * Be careful to configure the tunnels when raising the limit. A large 441 * number of nested tunnels can introduce system crash. 442 */ 443 #ifndef MAX_VXLAN_NEST 444 #define MAX_VXLAN_NEST 3 445 #endif 446 static int max_vxlan_nesting = MAX_VXLAN_NEST; 447 SYSCTL_INT(_net_link_vxlan, OID_AUTO, max_nesting, CTLFLAG_RW, 448 &max_vxlan_nesting, 0, "Max nested tunnels"); 449 450 /* Default maximum number of addresses in the forwarding table. */ 451 #ifndef VXLAN_FTABLE_MAX 452 #define VXLAN_FTABLE_MAX 2000 453 #endif 454 455 /* Timeout (in seconds) of addresses learned in the forwarding table. */ 456 #ifndef VXLAN_FTABLE_TIMEOUT 457 #define VXLAN_FTABLE_TIMEOUT (20 * 60) 458 #endif 459 460 /* 461 * Maximum timeout (in seconds) of addresses learned in the forwarding 462 * table. 463 */ 464 #ifndef VXLAN_FTABLE_MAX_TIMEOUT 465 #define VXLAN_FTABLE_MAX_TIMEOUT (60 * 60 * 24) 466 #endif 467 468 /* Number of seconds between pruning attempts of the forwarding table. */ 469 #ifndef VXLAN_FTABLE_PRUNE 470 #define VXLAN_FTABLE_PRUNE (5 * 60) 471 #endif 472 473 static int vxlan_ftable_prune_period = VXLAN_FTABLE_PRUNE; 474 475 struct vxlan_control { 476 int (*vxlc_func)(struct vxlan_softc *, void *); 477 int vxlc_argsize; 478 int vxlc_flags; 479 #define VXLAN_CTRL_FLAG_COPYIN 0x01 480 #define VXLAN_CTRL_FLAG_COPYOUT 0x02 481 #define VXLAN_CTRL_FLAG_SUSER 0x04 482 }; 483 484 static const struct vxlan_control vxlan_control_table[] = { 485 [VXLAN_CMD_GET_CONFIG] = 486 { vxlan_ctrl_get_config, sizeof(struct ifvxlancfg), 487 VXLAN_CTRL_FLAG_COPYOUT 488 }, 489 490 [VXLAN_CMD_SET_VNI] = 491 { vxlan_ctrl_set_vni, sizeof(struct ifvxlancmd), 492 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 493 }, 494 495 [VXLAN_CMD_SET_LOCAL_ADDR] = 496 { vxlan_ctrl_set_local_addr, sizeof(struct ifvxlancmd), 497 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 498 }, 499 500 [VXLAN_CMD_SET_REMOTE_ADDR] = 501 { vxlan_ctrl_set_remote_addr, sizeof(struct ifvxlancmd), 502 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 503 }, 504 505 [VXLAN_CMD_SET_LOCAL_PORT] = 506 { vxlan_ctrl_set_local_port, sizeof(struct ifvxlancmd), 507 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 508 }, 509 510 [VXLAN_CMD_SET_REMOTE_PORT] = 511 { vxlan_ctrl_set_remote_port, sizeof(struct ifvxlancmd), 512 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 513 }, 514 515 [VXLAN_CMD_SET_PORT_RANGE] = 516 { vxlan_ctrl_set_port_range, sizeof(struct ifvxlancmd), 517 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 518 }, 519 520 [VXLAN_CMD_SET_FTABLE_TIMEOUT] = 521 { vxlan_ctrl_set_ftable_timeout, sizeof(struct ifvxlancmd), 522 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 523 }, 524 525 [VXLAN_CMD_SET_FTABLE_MAX] = 526 { vxlan_ctrl_set_ftable_max, sizeof(struct ifvxlancmd), 527 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 528 }, 529 530 [VXLAN_CMD_SET_MULTICAST_IF] = 531 { vxlan_ctrl_set_multicast_if, sizeof(struct ifvxlancmd), 532 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 533 }, 534 535 [VXLAN_CMD_SET_TTL] = 536 { vxlan_ctrl_set_ttl, sizeof(struct ifvxlancmd), 537 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 538 }, 539 540 [VXLAN_CMD_SET_LEARN] = 541 { vxlan_ctrl_set_learn, sizeof(struct ifvxlancmd), 542 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 543 }, 544 545 [VXLAN_CMD_FTABLE_ENTRY_ADD] = 546 { vxlan_ctrl_ftable_entry_add, sizeof(struct ifvxlancmd), 547 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 548 }, 549 550 [VXLAN_CMD_FTABLE_ENTRY_REM] = 551 { vxlan_ctrl_ftable_entry_rem, sizeof(struct ifvxlancmd), 552 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 553 }, 554 555 [VXLAN_CMD_FLUSH] = 556 { vxlan_ctrl_flush, sizeof(struct ifvxlancmd), 557 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 558 }, 559 }; 560 561 static const int vxlan_control_table_size = nitems(vxlan_control_table); 562 563 static int 564 vxlan_ftable_addr_cmp(const uint8_t *a, const uint8_t *b) 565 { 566 int i, d; 567 568 for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) 569 d = ((int)a[i]) - ((int)b[i]); 570 571 return (d); 572 } 573 574 static void 575 vxlan_ftable_init(struct vxlan_softc *sc) 576 { 577 int i; 578 579 sc->vxl_ftable = malloc(sizeof(struct vxlan_ftable_head) * 580 VXLAN_SC_FTABLE_SIZE, M_VXLAN, M_ZERO | M_WAITOK); 581 582 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) 583 LIST_INIT(&sc->vxl_ftable[i]); 584 sc->vxl_ftable_hash_key = arc4random(); 585 } 586 587 static void 588 vxlan_ftable_fini(struct vxlan_softc *sc) 589 { 590 int i; 591 592 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) { 593 KASSERT(LIST_EMPTY(&sc->vxl_ftable[i]), 594 ("%s: vxlan %p ftable[%d] not empty", __func__, sc, i)); 595 } 596 MPASS(sc->vxl_ftable_cnt == 0); 597 598 free(sc->vxl_ftable, M_VXLAN); 599 sc->vxl_ftable = NULL; 600 } 601 602 static void 603 vxlan_ftable_flush(struct vxlan_softc *sc, int all) 604 { 605 struct vxlan_ftable_entry *fe, *tfe; 606 int i; 607 608 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) { 609 LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) { 610 if (all || VXLAN_FE_IS_DYNAMIC(fe)) 611 vxlan_ftable_entry_destroy(sc, fe); 612 } 613 } 614 } 615 616 static void 617 vxlan_ftable_expire(struct vxlan_softc *sc) 618 { 619 struct vxlan_ftable_entry *fe, *tfe; 620 int i; 621 622 VXLAN_LOCK_WASSERT(sc); 623 624 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) { 625 LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) { 626 if (VXLAN_FE_IS_DYNAMIC(fe) && 627 time_uptime >= fe->vxlfe_expire) 628 vxlan_ftable_entry_destroy(sc, fe); 629 } 630 } 631 } 632 633 static int 634 vxlan_ftable_update_locked(struct vxlan_softc *sc, 635 const union vxlan_sockaddr *vxlsa, const uint8_t *mac, 636 struct rm_priotracker *tracker) 637 { 638 struct vxlan_ftable_entry *fe; 639 int error __unused; 640 641 VXLAN_LOCK_ASSERT(sc); 642 643 again: 644 /* 645 * A forwarding entry for this MAC address might already exist. If 646 * so, update it, otherwise create a new one. We may have to upgrade 647 * the lock if we have to change or create an entry. 648 */ 649 fe = vxlan_ftable_entry_lookup(sc, mac); 650 if (fe != NULL) { 651 fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout; 652 653 if (!VXLAN_FE_IS_DYNAMIC(fe) || 654 vxlan_sockaddr_in_equal(&fe->vxlfe_raddr, &vxlsa->sa)) 655 return (0); 656 if (!VXLAN_LOCK_WOWNED(sc)) { 657 VXLAN_RUNLOCK(sc, tracker); 658 VXLAN_WLOCK(sc); 659 sc->vxl_stats.ftable_lock_upgrade_failed++; 660 goto again; 661 } 662 vxlan_sockaddr_in_copy(&fe->vxlfe_raddr, &vxlsa->sa); 663 return (0); 664 } 665 666 if (!VXLAN_LOCK_WOWNED(sc)) { 667 VXLAN_RUNLOCK(sc, tracker); 668 VXLAN_WLOCK(sc); 669 sc->vxl_stats.ftable_lock_upgrade_failed++; 670 goto again; 671 } 672 673 if (sc->vxl_ftable_cnt >= sc->vxl_ftable_max) { 674 sc->vxl_stats.ftable_nospace++; 675 return (ENOSPC); 676 } 677 678 fe = vxlan_ftable_entry_alloc(); 679 if (fe == NULL) 680 return (ENOMEM); 681 682 vxlan_ftable_entry_init(sc, fe, mac, &vxlsa->sa, VXLAN_FE_FLAG_DYNAMIC); 683 684 /* The prior lookup failed, so the insert should not. */ 685 error = vxlan_ftable_entry_insert(sc, fe); 686 MPASS(error == 0); 687 688 return (0); 689 } 690 691 static int 692 vxlan_ftable_learn(struct vxlan_softc *sc, const struct sockaddr *sa, 693 const uint8_t *mac) 694 { 695 struct rm_priotracker tracker; 696 union vxlan_sockaddr vxlsa; 697 int error; 698 699 /* 700 * The source port may be randomly selected by the remote host, so 701 * use the port of the default destination address. 702 */ 703 vxlan_sockaddr_copy(&vxlsa, sa); 704 vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port; 705 706 if (VXLAN_SOCKADDR_IS_IPV6(&vxlsa)) { 707 error = vxlan_sockaddr_in6_embedscope(&vxlsa); 708 if (error) 709 return (error); 710 } 711 712 VXLAN_RLOCK(sc, &tracker); 713 error = vxlan_ftable_update_locked(sc, &vxlsa, mac, &tracker); 714 VXLAN_UNLOCK(sc, &tracker); 715 716 return (error); 717 } 718 719 static int 720 vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS) 721 { 722 struct rm_priotracker tracker; 723 struct sbuf sb; 724 struct vxlan_softc *sc; 725 struct vxlan_ftable_entry *fe; 726 size_t size; 727 int i, error; 728 729 /* 730 * This is mostly intended for debugging during development. It is 731 * not practical to dump an entire large table this way. 732 */ 733 734 sc = arg1; 735 size = PAGE_SIZE; /* Calculate later. */ 736 737 sbuf_new(&sb, NULL, size, SBUF_FIXEDLEN); 738 sbuf_putc(&sb, '\n'); 739 740 VXLAN_RLOCK(sc, &tracker); 741 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) { 742 LIST_FOREACH(fe, &sc->vxl_ftable[i], vxlfe_hash) { 743 if (sbuf_error(&sb) != 0) 744 break; 745 vxlan_ftable_entry_dump(fe, &sb); 746 } 747 } 748 VXLAN_RUNLOCK(sc, &tracker); 749 750 if (sbuf_len(&sb) == 1) 751 sbuf_setpos(&sb, 0); 752 753 sbuf_finish(&sb); 754 error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 755 sbuf_delete(&sb); 756 757 return (error); 758 } 759 760 static struct vxlan_ftable_entry * 761 vxlan_ftable_entry_alloc(void) 762 { 763 struct vxlan_ftable_entry *fe; 764 765 fe = malloc(sizeof(*fe), M_VXLAN, M_ZERO | M_NOWAIT); 766 767 return (fe); 768 } 769 770 static void 771 vxlan_ftable_entry_free(struct vxlan_ftable_entry *fe) 772 { 773 774 free(fe, M_VXLAN); 775 } 776 777 static void 778 vxlan_ftable_entry_init(struct vxlan_softc *sc, struct vxlan_ftable_entry *fe, 779 const uint8_t *mac, const struct sockaddr *sa, uint32_t flags) 780 { 781 782 fe->vxlfe_flags = flags; 783 fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout; 784 memcpy(fe->vxlfe_mac, mac, ETHER_ADDR_LEN); 785 vxlan_sockaddr_copy(&fe->vxlfe_raddr, sa); 786 } 787 788 static void 789 vxlan_ftable_entry_destroy(struct vxlan_softc *sc, 790 struct vxlan_ftable_entry *fe) 791 { 792 793 sc->vxl_ftable_cnt--; 794 LIST_REMOVE(fe, vxlfe_hash); 795 vxlan_ftable_entry_free(fe); 796 } 797 798 static int 799 vxlan_ftable_entry_insert(struct vxlan_softc *sc, 800 struct vxlan_ftable_entry *fe) 801 { 802 struct vxlan_ftable_entry *lfe; 803 uint32_t hash; 804 int dir; 805 806 VXLAN_LOCK_WASSERT(sc); 807 hash = VXLAN_SC_FTABLE_HASH(sc, fe->vxlfe_mac); 808 809 lfe = LIST_FIRST(&sc->vxl_ftable[hash]); 810 if (lfe == NULL) { 811 LIST_INSERT_HEAD(&sc->vxl_ftable[hash], fe, vxlfe_hash); 812 goto out; 813 } 814 815 do { 816 dir = vxlan_ftable_addr_cmp(fe->vxlfe_mac, lfe->vxlfe_mac); 817 if (dir == 0) 818 return (EEXIST); 819 if (dir > 0) { 820 LIST_INSERT_BEFORE(lfe, fe, vxlfe_hash); 821 goto out; 822 } else if (LIST_NEXT(lfe, vxlfe_hash) == NULL) { 823 LIST_INSERT_AFTER(lfe, fe, vxlfe_hash); 824 goto out; 825 } else 826 lfe = LIST_NEXT(lfe, vxlfe_hash); 827 } while (lfe != NULL); 828 829 out: 830 sc->vxl_ftable_cnt++; 831 832 return (0); 833 } 834 835 static struct vxlan_ftable_entry * 836 vxlan_ftable_entry_lookup(struct vxlan_softc *sc, const uint8_t *mac) 837 { 838 struct vxlan_ftable_entry *fe; 839 uint32_t hash; 840 int dir; 841 842 VXLAN_LOCK_ASSERT(sc); 843 hash = VXLAN_SC_FTABLE_HASH(sc, mac); 844 845 LIST_FOREACH(fe, &sc->vxl_ftable[hash], vxlfe_hash) { 846 dir = vxlan_ftable_addr_cmp(mac, fe->vxlfe_mac); 847 if (dir == 0) 848 return (fe); 849 if (dir > 0) 850 break; 851 } 852 853 return (NULL); 854 } 855 856 static void 857 vxlan_ftable_entry_dump(struct vxlan_ftable_entry *fe, struct sbuf *sb) 858 { 859 char buf[64]; 860 const union vxlan_sockaddr *sa; 861 const void *addr; 862 int i, len, af, width; 863 864 sa = &fe->vxlfe_raddr; 865 af = sa->sa.sa_family; 866 len = sbuf_len(sb); 867 868 sbuf_printf(sb, "%c 0x%02X ", VXLAN_FE_IS_DYNAMIC(fe) ? 'D' : 'S', 869 fe->vxlfe_flags); 870 871 for (i = 0; i < ETHER_ADDR_LEN - 1; i++) 872 sbuf_printf(sb, "%02X:", fe->vxlfe_mac[i]); 873 sbuf_printf(sb, "%02X ", fe->vxlfe_mac[i]); 874 875 if (af == AF_INET) { 876 addr = &sa->in4.sin_addr; 877 width = INET_ADDRSTRLEN - 1; 878 } else { 879 addr = &sa->in6.sin6_addr; 880 width = INET6_ADDRSTRLEN - 1; 881 } 882 inet_ntop(af, addr, buf, sizeof(buf)); 883 sbuf_printf(sb, "%*s ", width, buf); 884 885 sbuf_printf(sb, "%08jd", (intmax_t)fe->vxlfe_expire); 886 887 sbuf_putc(sb, '\n'); 888 889 /* Truncate a partial line. */ 890 if (sbuf_error(sb) != 0) 891 sbuf_setpos(sb, len); 892 } 893 894 static struct vxlan_socket * 895 vxlan_socket_alloc(const union vxlan_sockaddr *sa) 896 { 897 struct vxlan_socket *vso; 898 int i; 899 900 vso = malloc(sizeof(*vso), M_VXLAN, M_WAITOK | M_ZERO); 901 rm_init(&vso->vxlso_lock, "vxlansorm"); 902 refcount_init(&vso->vxlso_refcnt, 0); 903 for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) 904 LIST_INIT(&vso->vxlso_vni_hash[i]); 905 vso->vxlso_laddr = *sa; 906 907 return (vso); 908 } 909 910 static void 911 vxlan_socket_destroy(struct vxlan_socket *vso) 912 { 913 struct socket *so; 914 #ifdef INVARIANTS 915 int i; 916 struct vxlan_socket_mc_info *mc; 917 918 for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) { 919 mc = &vso->vxlso_mc[i]; 920 KASSERT(mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC, 921 ("%s: socket %p mc[%d] still has address", 922 __func__, vso, i)); 923 } 924 925 for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) { 926 KASSERT(LIST_EMPTY(&vso->vxlso_vni_hash[i]), 927 ("%s: socket %p vni_hash[%d] not empty", 928 __func__, vso, i)); 929 } 930 #endif 931 so = vso->vxlso_sock; 932 if (so != NULL) { 933 vso->vxlso_sock = NULL; 934 soclose(so); 935 } 936 937 rm_destroy(&vso->vxlso_lock); 938 free(vso, M_VXLAN); 939 } 940 941 static void 942 vxlan_socket_release(struct vxlan_socket *vso) 943 { 944 int destroy; 945 946 VXLAN_LIST_LOCK(); 947 destroy = VXLAN_SO_RELEASE(vso); 948 if (destroy != 0) 949 LIST_REMOVE(vso, vxlso_entry); 950 VXLAN_LIST_UNLOCK(); 951 952 if (destroy != 0) 953 vxlan_socket_destroy(vso); 954 } 955 956 static struct vxlan_socket * 957 vxlan_socket_lookup(union vxlan_sockaddr *vxlsa) 958 { 959 struct vxlan_socket *vso; 960 961 VXLAN_LIST_LOCK(); 962 LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry) { 963 if (vxlan_sockaddr_cmp(&vso->vxlso_laddr, &vxlsa->sa) == 0) { 964 VXLAN_SO_ACQUIRE(vso); 965 break; 966 } 967 } 968 VXLAN_LIST_UNLOCK(); 969 970 return (vso); 971 } 972 973 static void 974 vxlan_socket_insert(struct vxlan_socket *vso) 975 { 976 977 VXLAN_LIST_LOCK(); 978 VXLAN_SO_ACQUIRE(vso); 979 LIST_INSERT_HEAD(&vxlan_socket_list, vso, vxlso_entry); 980 VXLAN_LIST_UNLOCK(); 981 } 982 983 static int 984 vxlan_socket_init(struct vxlan_socket *vso, struct ifnet *ifp) 985 { 986 struct thread *td; 987 int error; 988 989 td = curthread; 990 991 error = socreate(vso->vxlso_laddr.sa.sa_family, &vso->vxlso_sock, 992 SOCK_DGRAM, IPPROTO_UDP, td->td_ucred, td); 993 if (error) { 994 if_printf(ifp, "cannot create socket: %d\n", error); 995 return (error); 996 } 997 998 error = udp_set_kernel_tunneling(vso->vxlso_sock, 999 vxlan_rcv_udp_packet, NULL, vso); 1000 if (error) { 1001 if_printf(ifp, "cannot set tunneling function: %d\n", error); 1002 return (error); 1003 } 1004 1005 if (vxlan_reuse_port != 0) { 1006 struct sockopt sopt; 1007 int val = 1; 1008 1009 bzero(&sopt, sizeof(sopt)); 1010 sopt.sopt_dir = SOPT_SET; 1011 sopt.sopt_level = IPPROTO_IP; 1012 sopt.sopt_name = SO_REUSEPORT; 1013 sopt.sopt_val = &val; 1014 sopt.sopt_valsize = sizeof(val); 1015 error = sosetopt(vso->vxlso_sock, &sopt); 1016 if (error) { 1017 if_printf(ifp, 1018 "cannot set REUSEADDR socket opt: %d\n", error); 1019 return (error); 1020 } 1021 } 1022 1023 return (0); 1024 } 1025 1026 static int 1027 vxlan_socket_bind(struct vxlan_socket *vso, struct ifnet *ifp) 1028 { 1029 union vxlan_sockaddr laddr; 1030 struct thread *td; 1031 int error; 1032 1033 td = curthread; 1034 laddr = vso->vxlso_laddr; 1035 1036 error = sobind(vso->vxlso_sock, &laddr.sa, td); 1037 if (error) { 1038 if (error != EADDRINUSE) 1039 if_printf(ifp, "cannot bind socket: %d\n", error); 1040 return (error); 1041 } 1042 1043 return (0); 1044 } 1045 1046 static int 1047 vxlan_socket_create(struct ifnet *ifp, int multicast, 1048 const union vxlan_sockaddr *saddr, struct vxlan_socket **vsop) 1049 { 1050 union vxlan_sockaddr laddr; 1051 struct vxlan_socket *vso; 1052 int error; 1053 1054 laddr = *saddr; 1055 1056 /* 1057 * If this socket will be multicast, then only the local port 1058 * must be specified when binding. 1059 */ 1060 if (multicast != 0) { 1061 if (VXLAN_SOCKADDR_IS_IPV4(&laddr)) 1062 laddr.in4.sin_addr.s_addr = INADDR_ANY; 1063 #ifdef INET6 1064 else 1065 laddr.in6.sin6_addr = in6addr_any; 1066 #endif 1067 } 1068 1069 vso = vxlan_socket_alloc(&laddr); 1070 if (vso == NULL) 1071 return (ENOMEM); 1072 1073 error = vxlan_socket_init(vso, ifp); 1074 if (error) 1075 goto fail; 1076 1077 error = vxlan_socket_bind(vso, ifp); 1078 if (error) 1079 goto fail; 1080 1081 /* 1082 * There is a small window between the bind completing and 1083 * inserting the socket, so that a concurrent create may fail. 1084 * Let's not worry about that for now. 1085 */ 1086 vxlan_socket_insert(vso); 1087 *vsop = vso; 1088 1089 return (0); 1090 1091 fail: 1092 vxlan_socket_destroy(vso); 1093 1094 return (error); 1095 } 1096 1097 static void 1098 vxlan_socket_ifdetach(struct vxlan_socket *vso, struct ifnet *ifp, 1099 struct vxlan_softc_head *list) 1100 { 1101 struct rm_priotracker tracker; 1102 struct vxlan_softc *sc; 1103 int i; 1104 1105 VXLAN_SO_RLOCK(vso, &tracker); 1106 for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) { 1107 LIST_FOREACH(sc, &vso->vxlso_vni_hash[i], vxl_entry) 1108 vxlan_ifdetach(sc, ifp, list); 1109 } 1110 VXLAN_SO_RUNLOCK(vso, &tracker); 1111 } 1112 1113 static struct vxlan_socket * 1114 vxlan_socket_mc_lookup(const union vxlan_sockaddr *vxlsa) 1115 { 1116 union vxlan_sockaddr laddr; 1117 struct vxlan_socket *vso; 1118 1119 laddr = *vxlsa; 1120 1121 if (VXLAN_SOCKADDR_IS_IPV4(&laddr)) 1122 laddr.in4.sin_addr.s_addr = INADDR_ANY; 1123 #ifdef INET6 1124 else 1125 laddr.in6.sin6_addr = in6addr_any; 1126 #endif 1127 1128 vso = vxlan_socket_lookup(&laddr); 1129 1130 return (vso); 1131 } 1132 1133 static int 1134 vxlan_sockaddr_mc_info_match(const struct vxlan_socket_mc_info *mc, 1135 const union vxlan_sockaddr *group, const union vxlan_sockaddr *local, 1136 int ifidx) 1137 { 1138 1139 if (!vxlan_sockaddr_in_any(local) && 1140 !vxlan_sockaddr_in_equal(&mc->vxlsomc_saddr, &local->sa)) 1141 return (0); 1142 if (!vxlan_sockaddr_in_equal(&mc->vxlsomc_gaddr, &group->sa)) 1143 return (0); 1144 if (ifidx != 0 && ifidx != mc->vxlsomc_ifidx) 1145 return (0); 1146 1147 return (1); 1148 } 1149 1150 static int 1151 vxlan_socket_mc_join_group(struct vxlan_socket *vso, 1152 const union vxlan_sockaddr *group, const union vxlan_sockaddr *local, 1153 int *ifidx, union vxlan_sockaddr *source) 1154 { 1155 struct sockopt sopt; 1156 int error; 1157 1158 *source = *local; 1159 1160 if (VXLAN_SOCKADDR_IS_IPV4(group)) { 1161 struct ip_mreq mreq; 1162 1163 mreq.imr_multiaddr = group->in4.sin_addr; 1164 mreq.imr_interface = local->in4.sin_addr; 1165 1166 bzero(&sopt, sizeof(sopt)); 1167 sopt.sopt_dir = SOPT_SET; 1168 sopt.sopt_level = IPPROTO_IP; 1169 sopt.sopt_name = IP_ADD_MEMBERSHIP; 1170 sopt.sopt_val = &mreq; 1171 sopt.sopt_valsize = sizeof(mreq); 1172 error = sosetopt(vso->vxlso_sock, &sopt); 1173 if (error) 1174 return (error); 1175 1176 /* 1177 * BMV: Ideally, there would be a formal way for us to get 1178 * the local interface that was selected based on the 1179 * imr_interface address. We could then update *ifidx so 1180 * vxlan_sockaddr_mc_info_match() would return a match for 1181 * later creates that explicitly set the multicast interface. 1182 * 1183 * If we really need to, we can of course look in the INP's 1184 * membership list: 1185 * sotoinpcb(vso->vxlso_sock)->inp_moptions-> 1186 * imo_head[]->imf_inm->inm_ifp 1187 * similarly to imo_match_group(). 1188 */ 1189 source->in4.sin_addr = local->in4.sin_addr; 1190 1191 } else if (VXLAN_SOCKADDR_IS_IPV6(group)) { 1192 struct ipv6_mreq mreq; 1193 1194 mreq.ipv6mr_multiaddr = group->in6.sin6_addr; 1195 mreq.ipv6mr_interface = *ifidx; 1196 1197 bzero(&sopt, sizeof(sopt)); 1198 sopt.sopt_dir = SOPT_SET; 1199 sopt.sopt_level = IPPROTO_IPV6; 1200 sopt.sopt_name = IPV6_JOIN_GROUP; 1201 sopt.sopt_val = &mreq; 1202 sopt.sopt_valsize = sizeof(mreq); 1203 error = sosetopt(vso->vxlso_sock, &sopt); 1204 if (error) 1205 return (error); 1206 1207 /* 1208 * BMV: As with IPv4, we would really like to know what 1209 * interface in6p_lookup_mcast_ifp() selected. 1210 */ 1211 } else 1212 error = EAFNOSUPPORT; 1213 1214 return (error); 1215 } 1216 1217 static int 1218 vxlan_socket_mc_leave_group(struct vxlan_socket *vso, 1219 const union vxlan_sockaddr *group, const union vxlan_sockaddr *source, 1220 int ifidx) 1221 { 1222 struct sockopt sopt; 1223 int error; 1224 1225 bzero(&sopt, sizeof(sopt)); 1226 sopt.sopt_dir = SOPT_SET; 1227 1228 if (VXLAN_SOCKADDR_IS_IPV4(group)) { 1229 struct ip_mreq mreq; 1230 1231 mreq.imr_multiaddr = group->in4.sin_addr; 1232 mreq.imr_interface = source->in4.sin_addr; 1233 1234 sopt.sopt_level = IPPROTO_IP; 1235 sopt.sopt_name = IP_DROP_MEMBERSHIP; 1236 sopt.sopt_val = &mreq; 1237 sopt.sopt_valsize = sizeof(mreq); 1238 error = sosetopt(vso->vxlso_sock, &sopt); 1239 1240 } else if (VXLAN_SOCKADDR_IS_IPV6(group)) { 1241 struct ipv6_mreq mreq; 1242 1243 mreq.ipv6mr_multiaddr = group->in6.sin6_addr; 1244 mreq.ipv6mr_interface = ifidx; 1245 1246 sopt.sopt_level = IPPROTO_IPV6; 1247 sopt.sopt_name = IPV6_LEAVE_GROUP; 1248 sopt.sopt_val = &mreq; 1249 sopt.sopt_valsize = sizeof(mreq); 1250 error = sosetopt(vso->vxlso_sock, &sopt); 1251 1252 } else 1253 error = EAFNOSUPPORT; 1254 1255 return (error); 1256 } 1257 1258 static int 1259 vxlan_socket_mc_add_group(struct vxlan_socket *vso, 1260 const union vxlan_sockaddr *group, const union vxlan_sockaddr *local, 1261 int ifidx, int *idx) 1262 { 1263 union vxlan_sockaddr source; 1264 struct vxlan_socket_mc_info *mc; 1265 int i, empty, error; 1266 1267 /* 1268 * Within a socket, the same multicast group may be used by multiple 1269 * interfaces, each with a different network identifier. But a socket 1270 * may only join a multicast group once, so keep track of the users 1271 * here. 1272 */ 1273 1274 VXLAN_SO_WLOCK(vso); 1275 for (empty = 0, i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) { 1276 mc = &vso->vxlso_mc[i]; 1277 1278 if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) { 1279 empty++; 1280 continue; 1281 } 1282 1283 if (vxlan_sockaddr_mc_info_match(mc, group, local, ifidx)) 1284 goto out; 1285 } 1286 VXLAN_SO_WUNLOCK(vso); 1287 1288 if (empty == 0) 1289 return (ENOSPC); 1290 1291 error = vxlan_socket_mc_join_group(vso, group, local, &ifidx, &source); 1292 if (error) 1293 return (error); 1294 1295 VXLAN_SO_WLOCK(vso); 1296 for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) { 1297 mc = &vso->vxlso_mc[i]; 1298 1299 if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) { 1300 vxlan_sockaddr_copy(&mc->vxlsomc_gaddr, &group->sa); 1301 vxlan_sockaddr_copy(&mc->vxlsomc_saddr, &source.sa); 1302 mc->vxlsomc_ifidx = ifidx; 1303 goto out; 1304 } 1305 } 1306 VXLAN_SO_WUNLOCK(vso); 1307 1308 error = vxlan_socket_mc_leave_group(vso, group, &source, ifidx); 1309 MPASS(error == 0); 1310 1311 return (ENOSPC); 1312 1313 out: 1314 mc->vxlsomc_users++; 1315 VXLAN_SO_WUNLOCK(vso); 1316 1317 *idx = i; 1318 1319 return (0); 1320 } 1321 1322 static void 1323 vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *vso, int idx) 1324 { 1325 union vxlan_sockaddr group, source; 1326 struct vxlan_socket_mc_info *mc; 1327 int ifidx, leave; 1328 1329 KASSERT(idx >= 0 && idx < VXLAN_SO_MC_MAX_GROUPS, 1330 ("%s: vso %p idx %d out of bounds", __func__, vso, idx)); 1331 1332 leave = 0; 1333 mc = &vso->vxlso_mc[idx]; 1334 1335 VXLAN_SO_WLOCK(vso); 1336 mc->vxlsomc_users--; 1337 if (mc->vxlsomc_users == 0) { 1338 group = mc->vxlsomc_gaddr; 1339 source = mc->vxlsomc_saddr; 1340 ifidx = mc->vxlsomc_ifidx; 1341 bzero(mc, sizeof(*mc)); 1342 leave = 1; 1343 } 1344 VXLAN_SO_WUNLOCK(vso); 1345 1346 if (leave != 0) { 1347 /* 1348 * Our socket's membership in this group may have already 1349 * been removed if we joined through an interface that's 1350 * been detached. 1351 */ 1352 vxlan_socket_mc_leave_group(vso, &group, &source, ifidx); 1353 } 1354 } 1355 1356 static struct vxlan_softc * 1357 vxlan_socket_lookup_softc_locked(struct vxlan_socket *vso, uint32_t vni) 1358 { 1359 struct vxlan_softc *sc; 1360 uint32_t hash; 1361 1362 VXLAN_SO_LOCK_ASSERT(vso); 1363 hash = VXLAN_SO_VNI_HASH(vni); 1364 1365 LIST_FOREACH(sc, &vso->vxlso_vni_hash[hash], vxl_entry) { 1366 if (sc->vxl_vni == vni) { 1367 VXLAN_ACQUIRE(sc); 1368 break; 1369 } 1370 } 1371 1372 return (sc); 1373 } 1374 1375 static struct vxlan_softc * 1376 vxlan_socket_lookup_softc(struct vxlan_socket *vso, uint32_t vni) 1377 { 1378 struct rm_priotracker tracker; 1379 struct vxlan_softc *sc; 1380 1381 VXLAN_SO_RLOCK(vso, &tracker); 1382 sc = vxlan_socket_lookup_softc_locked(vso, vni); 1383 VXLAN_SO_RUNLOCK(vso, &tracker); 1384 1385 return (sc); 1386 } 1387 1388 static int 1389 vxlan_socket_insert_softc(struct vxlan_socket *vso, struct vxlan_softc *sc) 1390 { 1391 struct vxlan_softc *tsc; 1392 uint32_t vni, hash; 1393 1394 vni = sc->vxl_vni; 1395 hash = VXLAN_SO_VNI_HASH(vni); 1396 1397 VXLAN_SO_WLOCK(vso); 1398 tsc = vxlan_socket_lookup_softc_locked(vso, vni); 1399 if (tsc != NULL) { 1400 VXLAN_SO_WUNLOCK(vso); 1401 vxlan_release(tsc); 1402 return (EEXIST); 1403 } 1404 1405 VXLAN_ACQUIRE(sc); 1406 LIST_INSERT_HEAD(&vso->vxlso_vni_hash[hash], sc, vxl_entry); 1407 VXLAN_SO_WUNLOCK(vso); 1408 1409 return (0); 1410 } 1411 1412 static void 1413 vxlan_socket_remove_softc(struct vxlan_socket *vso, struct vxlan_softc *sc) 1414 { 1415 1416 VXLAN_SO_WLOCK(vso); 1417 LIST_REMOVE(sc, vxl_entry); 1418 VXLAN_SO_WUNLOCK(vso); 1419 1420 vxlan_release(sc); 1421 } 1422 1423 static struct ifnet * 1424 vxlan_multicast_if_ref(struct vxlan_softc *sc, int ipv4) 1425 { 1426 struct ifnet *ifp; 1427 1428 VXLAN_LOCK_ASSERT(sc); 1429 1430 if (ipv4 && sc->vxl_im4o != NULL) 1431 ifp = sc->vxl_im4o->imo_multicast_ifp; 1432 else if (!ipv4 && sc->vxl_im6o != NULL) 1433 ifp = sc->vxl_im6o->im6o_multicast_ifp; 1434 else 1435 ifp = NULL; 1436 1437 if (ifp != NULL) 1438 if_ref(ifp); 1439 1440 return (ifp); 1441 } 1442 1443 static void 1444 vxlan_free_multicast(struct vxlan_softc *sc) 1445 { 1446 1447 if (sc->vxl_mc_ifp != NULL) { 1448 if_rele(sc->vxl_mc_ifp); 1449 sc->vxl_mc_ifp = NULL; 1450 sc->vxl_mc_ifindex = 0; 1451 } 1452 1453 if (sc->vxl_im4o != NULL) { 1454 free(sc->vxl_im4o, M_VXLAN); 1455 sc->vxl_im4o = NULL; 1456 } 1457 1458 if (sc->vxl_im6o != NULL) { 1459 free(sc->vxl_im6o, M_VXLAN); 1460 sc->vxl_im6o = NULL; 1461 } 1462 } 1463 1464 static int 1465 vxlan_setup_multicast_interface(struct vxlan_softc *sc) 1466 { 1467 struct ifnet *ifp; 1468 1469 ifp = ifunit_ref(sc->vxl_mc_ifname); 1470 if (ifp == NULL) { 1471 if_printf(sc->vxl_ifp, "multicast interface %s does " 1472 "not exist\n", sc->vxl_mc_ifname); 1473 return (ENOENT); 1474 } 1475 1476 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 1477 if_printf(sc->vxl_ifp, "interface %s does not support " 1478 "multicast\n", sc->vxl_mc_ifname); 1479 if_rele(ifp); 1480 return (ENOTSUP); 1481 } 1482 1483 sc->vxl_mc_ifp = ifp; 1484 sc->vxl_mc_ifindex = ifp->if_index; 1485 1486 return (0); 1487 } 1488 1489 static int 1490 vxlan_setup_multicast(struct vxlan_softc *sc) 1491 { 1492 const union vxlan_sockaddr *group; 1493 int error; 1494 1495 group = &sc->vxl_dst_addr; 1496 error = 0; 1497 1498 if (sc->vxl_mc_ifname[0] != '\0') { 1499 error = vxlan_setup_multicast_interface(sc); 1500 if (error) 1501 return (error); 1502 } 1503 1504 /* 1505 * Initialize an multicast options structure that is sufficiently 1506 * populated for use in the respective IP output routine. This 1507 * structure is typically stored in the socket, but our sockets 1508 * may be shared among multiple interfaces. 1509 */ 1510 if (VXLAN_SOCKADDR_IS_IPV4(group)) { 1511 sc->vxl_im4o = malloc(sizeof(struct ip_moptions), M_VXLAN, 1512 M_ZERO | M_WAITOK); 1513 sc->vxl_im4o->imo_multicast_ifp = sc->vxl_mc_ifp; 1514 sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl; 1515 sc->vxl_im4o->imo_multicast_vif = -1; 1516 } else if (VXLAN_SOCKADDR_IS_IPV6(group)) { 1517 sc->vxl_im6o = malloc(sizeof(struct ip6_moptions), M_VXLAN, 1518 M_ZERO | M_WAITOK); 1519 sc->vxl_im6o->im6o_multicast_ifp = sc->vxl_mc_ifp; 1520 sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl; 1521 } 1522 1523 return (error); 1524 } 1525 1526 static int 1527 vxlan_setup_socket(struct vxlan_softc *sc) 1528 { 1529 struct vxlan_socket *vso; 1530 struct ifnet *ifp; 1531 union vxlan_sockaddr *saddr, *daddr; 1532 int multicast, error; 1533 1534 vso = NULL; 1535 ifp = sc->vxl_ifp; 1536 saddr = &sc->vxl_src_addr; 1537 daddr = &sc->vxl_dst_addr; 1538 1539 multicast = vxlan_sockaddr_in_multicast(daddr); 1540 MPASS(multicast != -1); 1541 sc->vxl_vso_mc_index = -1; 1542 1543 /* 1544 * Try to create the socket. If that fails, attempt to use an 1545 * existing socket. 1546 */ 1547 error = vxlan_socket_create(ifp, multicast, saddr, &vso); 1548 if (error) { 1549 if (multicast != 0) 1550 vso = vxlan_socket_mc_lookup(saddr); 1551 else 1552 vso = vxlan_socket_lookup(saddr); 1553 1554 if (vso == NULL) { 1555 if_printf(ifp, "cannot create socket (error: %d), " 1556 "and no existing socket found\n", error); 1557 goto out; 1558 } 1559 } 1560 1561 if (multicast != 0) { 1562 error = vxlan_setup_multicast(sc); 1563 if (error) 1564 goto out; 1565 1566 error = vxlan_socket_mc_add_group(vso, daddr, saddr, 1567 sc->vxl_mc_ifindex, &sc->vxl_vso_mc_index); 1568 if (error) 1569 goto out; 1570 } 1571 1572 sc->vxl_sock = vso; 1573 error = vxlan_socket_insert_softc(vso, sc); 1574 if (error) { 1575 sc->vxl_sock = NULL; 1576 if_printf(ifp, "network identifier %d already exists in " 1577 "this socket\n", sc->vxl_vni); 1578 goto out; 1579 } 1580 1581 return (0); 1582 1583 out: 1584 if (vso != NULL) { 1585 if (sc->vxl_vso_mc_index != -1) { 1586 vxlan_socket_mc_release_group_by_idx(vso, 1587 sc->vxl_vso_mc_index); 1588 sc->vxl_vso_mc_index = -1; 1589 } 1590 if (multicast != 0) 1591 vxlan_free_multicast(sc); 1592 vxlan_socket_release(vso); 1593 } 1594 1595 return (error); 1596 } 1597 1598 #ifdef INET6 1599 static void 1600 vxlan_setup_zero_checksum_port(struct vxlan_softc *sc) 1601 { 1602 1603 if (!VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_src_addr)) 1604 return; 1605 1606 MPASS(sc->vxl_src_addr.in6.sin6_port != 0); 1607 MPASS(sc->vxl_dst_addr.in6.sin6_port != 0); 1608 1609 if (sc->vxl_src_addr.in6.sin6_port != sc->vxl_dst_addr.in6.sin6_port) { 1610 if_printf(sc->vxl_ifp, "port %d in src address does not match " 1611 "port %d in dst address, rfc6935_port (%d) not updated.\n", 1612 ntohs(sc->vxl_src_addr.in6.sin6_port), 1613 ntohs(sc->vxl_dst_addr.in6.sin6_port), 1614 V_zero_checksum_port); 1615 return; 1616 } 1617 1618 if (V_zero_checksum_port != 0) { 1619 if (V_zero_checksum_port != 1620 ntohs(sc->vxl_src_addr.in6.sin6_port)) { 1621 if_printf(sc->vxl_ifp, "rfc6935_port is already set to " 1622 "%d, cannot set it to %d.\n", V_zero_checksum_port, 1623 ntohs(sc->vxl_src_addr.in6.sin6_port)); 1624 } 1625 return; 1626 } 1627 1628 V_zero_checksum_port = ntohs(sc->vxl_src_addr.in6.sin6_port); 1629 if_printf(sc->vxl_ifp, "rfc6935_port set to %d\n", 1630 V_zero_checksum_port); 1631 } 1632 #endif 1633 1634 static void 1635 vxlan_setup_interface_hdrlen(struct vxlan_softc *sc) 1636 { 1637 struct ifnet *ifp; 1638 1639 VXLAN_LOCK_WASSERT(sc); 1640 1641 ifp = sc->vxl_ifp; 1642 ifp->if_hdrlen = ETHER_HDR_LEN + sizeof(struct vxlanudphdr); 1643 1644 if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr) != 0) 1645 ifp->if_hdrlen += sizeof(struct ip); 1646 else if (VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_dst_addr) != 0) 1647 ifp->if_hdrlen += sizeof(struct ip6_hdr); 1648 1649 if ((sc->vxl_flags & VXLAN_FLAG_USER_MTU) == 0) 1650 ifp->if_mtu = ETHERMTU - ifp->if_hdrlen; 1651 } 1652 1653 static int 1654 vxlan_valid_init_config(struct vxlan_softc *sc) 1655 { 1656 const char *reason; 1657 1658 if (vxlan_check_vni(sc->vxl_vni) != 0) { 1659 reason = "invalid virtual network identifier specified"; 1660 goto fail; 1661 } 1662 1663 if (vxlan_sockaddr_supported(&sc->vxl_src_addr, 1) == 0) { 1664 reason = "source address type is not supported"; 1665 goto fail; 1666 } 1667 1668 if (vxlan_sockaddr_supported(&sc->vxl_dst_addr, 0) == 0) { 1669 reason = "destination address type is not supported"; 1670 goto fail; 1671 } 1672 1673 if (vxlan_sockaddr_in_any(&sc->vxl_dst_addr) != 0) { 1674 reason = "no valid destination address specified"; 1675 goto fail; 1676 } 1677 1678 if (vxlan_sockaddr_in_multicast(&sc->vxl_dst_addr) == 0 && 1679 sc->vxl_mc_ifname[0] != '\0') { 1680 reason = "can only specify interface with a group address"; 1681 goto fail; 1682 } 1683 1684 if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) { 1685 if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_src_addr) ^ 1686 VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr)) { 1687 reason = "source and destination address must both " 1688 "be either IPv4 or IPv6"; 1689 goto fail; 1690 } 1691 } 1692 1693 if (sc->vxl_src_addr.in4.sin_port == 0) { 1694 reason = "local port not specified"; 1695 goto fail; 1696 } 1697 1698 if (sc->vxl_dst_addr.in4.sin_port == 0) { 1699 reason = "remote port not specified"; 1700 goto fail; 1701 } 1702 1703 return (0); 1704 1705 fail: 1706 if_printf(sc->vxl_ifp, "cannot initialize interface: %s\n", reason); 1707 return (EINVAL); 1708 } 1709 1710 static void 1711 vxlan_init_wait(struct vxlan_softc *sc) 1712 { 1713 1714 VXLAN_LOCK_WASSERT(sc); 1715 while (sc->vxl_flags & VXLAN_FLAG_INIT) 1716 rm_sleep(sc, &sc->vxl_lock, 0, "vxlint", hz); 1717 } 1718 1719 static void 1720 vxlan_init_complete(struct vxlan_softc *sc) 1721 { 1722 1723 VXLAN_WLOCK(sc); 1724 sc->vxl_flags &= ~VXLAN_FLAG_INIT; 1725 wakeup(sc); 1726 VXLAN_WUNLOCK(sc); 1727 } 1728 1729 static void 1730 vxlan_init(void *xsc) 1731 { 1732 static const uint8_t empty_mac[ETHER_ADDR_LEN]; 1733 struct vxlan_softc *sc; 1734 struct ifnet *ifp; 1735 1736 sc = xsc; 1737 ifp = sc->vxl_ifp; 1738 1739 sx_xlock(&vxlan_sx); 1740 VXLAN_WLOCK(sc); 1741 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1742 VXLAN_WUNLOCK(sc); 1743 sx_xunlock(&vxlan_sx); 1744 return; 1745 } 1746 sc->vxl_flags |= VXLAN_FLAG_INIT; 1747 VXLAN_WUNLOCK(sc); 1748 1749 if (vxlan_valid_init_config(sc) != 0) 1750 goto out; 1751 1752 if (vxlan_setup_socket(sc) != 0) 1753 goto out; 1754 1755 #ifdef INET6 1756 vxlan_setup_zero_checksum_port(sc); 1757 #endif 1758 1759 /* Initialize the default forwarding entry. */ 1760 vxlan_ftable_entry_init(sc, &sc->vxl_default_fe, empty_mac, 1761 &sc->vxl_dst_addr.sa, VXLAN_FE_FLAG_STATIC); 1762 1763 VXLAN_WLOCK(sc); 1764 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1765 callout_reset(&sc->vxl_callout, vxlan_ftable_prune_period * hz, 1766 vxlan_timer, sc); 1767 VXLAN_WUNLOCK(sc); 1768 1769 if_link_state_change(ifp, LINK_STATE_UP); 1770 1771 EVENTHANDLER_INVOKE(vxlan_start, ifp, sc->vxl_src_addr.in4.sin_family, 1772 ntohs(sc->vxl_src_addr.in4.sin_port)); 1773 out: 1774 vxlan_init_complete(sc); 1775 sx_xunlock(&vxlan_sx); 1776 } 1777 1778 static void 1779 vxlan_release(struct vxlan_softc *sc) 1780 { 1781 1782 /* 1783 * The softc may be destroyed as soon as we release our reference, 1784 * so we cannot serialize the wakeup with the softc lock. We use a 1785 * timeout in our sleeps so a missed wakeup is unfortunate but not 1786 * fatal. 1787 */ 1788 if (VXLAN_RELEASE(sc) != 0) 1789 wakeup(sc); 1790 } 1791 1792 static void 1793 vxlan_teardown_wait(struct vxlan_softc *sc) 1794 { 1795 1796 VXLAN_LOCK_WASSERT(sc); 1797 while (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) 1798 rm_sleep(sc, &sc->vxl_lock, 0, "vxltrn", hz); 1799 } 1800 1801 static void 1802 vxlan_teardown_complete(struct vxlan_softc *sc) 1803 { 1804 1805 VXLAN_WLOCK(sc); 1806 sc->vxl_flags &= ~VXLAN_FLAG_TEARDOWN; 1807 wakeup(sc); 1808 VXLAN_WUNLOCK(sc); 1809 } 1810 1811 static void 1812 vxlan_teardown_locked(struct vxlan_softc *sc) 1813 { 1814 struct ifnet *ifp; 1815 struct vxlan_socket *vso; 1816 1817 sx_assert(&vxlan_sx, SA_XLOCKED); 1818 VXLAN_LOCK_WASSERT(sc); 1819 MPASS(sc->vxl_flags & VXLAN_FLAG_TEARDOWN); 1820 1821 ifp = sc->vxl_ifp; 1822 ifp->if_flags &= ~IFF_UP; 1823 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1824 callout_stop(&sc->vxl_callout); 1825 vso = sc->vxl_sock; 1826 sc->vxl_sock = NULL; 1827 1828 VXLAN_WUNLOCK(sc); 1829 if_link_state_change(ifp, LINK_STATE_DOWN); 1830 EVENTHANDLER_INVOKE(vxlan_stop, ifp, sc->vxl_src_addr.in4.sin_family, 1831 ntohs(sc->vxl_src_addr.in4.sin_port)); 1832 1833 if (vso != NULL) { 1834 vxlan_socket_remove_softc(vso, sc); 1835 1836 if (sc->vxl_vso_mc_index != -1) { 1837 vxlan_socket_mc_release_group_by_idx(vso, 1838 sc->vxl_vso_mc_index); 1839 sc->vxl_vso_mc_index = -1; 1840 } 1841 } 1842 1843 VXLAN_WLOCK(sc); 1844 while (sc->vxl_refcnt != 0) 1845 rm_sleep(sc, &sc->vxl_lock, 0, "vxldrn", hz); 1846 VXLAN_WUNLOCK(sc); 1847 1848 callout_drain(&sc->vxl_callout); 1849 1850 vxlan_free_multicast(sc); 1851 if (vso != NULL) 1852 vxlan_socket_release(vso); 1853 1854 vxlan_teardown_complete(sc); 1855 } 1856 1857 static void 1858 vxlan_teardown(struct vxlan_softc *sc) 1859 { 1860 1861 sx_xlock(&vxlan_sx); 1862 VXLAN_WLOCK(sc); 1863 if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) { 1864 vxlan_teardown_wait(sc); 1865 VXLAN_WUNLOCK(sc); 1866 sx_xunlock(&vxlan_sx); 1867 return; 1868 } 1869 1870 sc->vxl_flags |= VXLAN_FLAG_TEARDOWN; 1871 vxlan_teardown_locked(sc); 1872 sx_xunlock(&vxlan_sx); 1873 } 1874 1875 static void 1876 vxlan_ifdetach(struct vxlan_softc *sc, struct ifnet *ifp, 1877 struct vxlan_softc_head *list) 1878 { 1879 1880 VXLAN_WLOCK(sc); 1881 1882 if (sc->vxl_mc_ifp != ifp) 1883 goto out; 1884 if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) 1885 goto out; 1886 1887 sc->vxl_flags |= VXLAN_FLAG_TEARDOWN; 1888 LIST_INSERT_HEAD(list, sc, vxl_ifdetach_list); 1889 1890 out: 1891 VXLAN_WUNLOCK(sc); 1892 } 1893 1894 static void 1895 vxlan_timer(void *xsc) 1896 { 1897 struct vxlan_softc *sc; 1898 1899 sc = xsc; 1900 VXLAN_LOCK_WASSERT(sc); 1901 1902 vxlan_ftable_expire(sc); 1903 callout_schedule(&sc->vxl_callout, vxlan_ftable_prune_period * hz); 1904 } 1905 1906 static int 1907 vxlan_ioctl_ifflags(struct vxlan_softc *sc) 1908 { 1909 struct ifnet *ifp; 1910 1911 ifp = sc->vxl_ifp; 1912 1913 if (ifp->if_flags & IFF_UP) { 1914 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1915 vxlan_init(sc); 1916 } else { 1917 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 1918 vxlan_teardown(sc); 1919 } 1920 1921 return (0); 1922 } 1923 1924 static int 1925 vxlan_ctrl_get_config(struct vxlan_softc *sc, void *arg) 1926 { 1927 struct rm_priotracker tracker; 1928 struct ifvxlancfg *cfg; 1929 1930 cfg = arg; 1931 bzero(cfg, sizeof(*cfg)); 1932 1933 VXLAN_RLOCK(sc, &tracker); 1934 cfg->vxlc_vni = sc->vxl_vni; 1935 memcpy(&cfg->vxlc_local_sa, &sc->vxl_src_addr, 1936 sizeof(union vxlan_sockaddr)); 1937 memcpy(&cfg->vxlc_remote_sa, &sc->vxl_dst_addr, 1938 sizeof(union vxlan_sockaddr)); 1939 cfg->vxlc_mc_ifindex = sc->vxl_mc_ifindex; 1940 cfg->vxlc_ftable_cnt = sc->vxl_ftable_cnt; 1941 cfg->vxlc_ftable_max = sc->vxl_ftable_max; 1942 cfg->vxlc_ftable_timeout = sc->vxl_ftable_timeout; 1943 cfg->vxlc_port_min = sc->vxl_min_port; 1944 cfg->vxlc_port_max = sc->vxl_max_port; 1945 cfg->vxlc_learn = (sc->vxl_flags & VXLAN_FLAG_LEARN) != 0; 1946 cfg->vxlc_ttl = sc->vxl_ttl; 1947 VXLAN_RUNLOCK(sc, &tracker); 1948 1949 #ifdef INET6 1950 if (VXLAN_SOCKADDR_IS_IPV6(&cfg->vxlc_local_sa)) 1951 sa6_recoverscope(&cfg->vxlc_local_sa.in6); 1952 if (VXLAN_SOCKADDR_IS_IPV6(&cfg->vxlc_remote_sa)) 1953 sa6_recoverscope(&cfg->vxlc_remote_sa.in6); 1954 #endif 1955 1956 return (0); 1957 } 1958 1959 static int 1960 vxlan_ctrl_set_vni(struct vxlan_softc *sc, void *arg) 1961 { 1962 struct ifvxlancmd *cmd; 1963 int error; 1964 1965 cmd = arg; 1966 1967 if (vxlan_check_vni(cmd->vxlcmd_vni) != 0) 1968 return (EINVAL); 1969 1970 VXLAN_WLOCK(sc); 1971 if (vxlan_can_change_config(sc)) { 1972 sc->vxl_vni = cmd->vxlcmd_vni; 1973 error = 0; 1974 } else 1975 error = EBUSY; 1976 VXLAN_WUNLOCK(sc); 1977 1978 return (error); 1979 } 1980 1981 static int 1982 vxlan_ctrl_set_local_addr(struct vxlan_softc *sc, void *arg) 1983 { 1984 struct ifvxlancmd *cmd; 1985 union vxlan_sockaddr *vxlsa; 1986 int error; 1987 1988 cmd = arg; 1989 vxlsa = &cmd->vxlcmd_sa; 1990 1991 if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa)) 1992 return (EINVAL); 1993 if (vxlan_sockaddr_in_multicast(vxlsa) != 0) 1994 return (EINVAL); 1995 if (VXLAN_SOCKADDR_IS_IPV6(vxlsa)) { 1996 error = vxlan_sockaddr_in6_embedscope(vxlsa); 1997 if (error) 1998 return (error); 1999 } 2000 2001 VXLAN_WLOCK(sc); 2002 if (vxlan_can_change_config(sc)) { 2003 vxlan_sockaddr_in_copy(&sc->vxl_src_addr, &vxlsa->sa); 2004 vxlan_set_hwcaps(sc); 2005 error = 0; 2006 } else 2007 error = EBUSY; 2008 VXLAN_WUNLOCK(sc); 2009 2010 return (error); 2011 } 2012 2013 static int 2014 vxlan_ctrl_set_remote_addr(struct vxlan_softc *sc, void *arg) 2015 { 2016 struct ifvxlancmd *cmd; 2017 union vxlan_sockaddr *vxlsa; 2018 int error; 2019 2020 cmd = arg; 2021 vxlsa = &cmd->vxlcmd_sa; 2022 2023 if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa)) 2024 return (EINVAL); 2025 if (VXLAN_SOCKADDR_IS_IPV6(vxlsa)) { 2026 error = vxlan_sockaddr_in6_embedscope(vxlsa); 2027 if (error) 2028 return (error); 2029 } 2030 2031 VXLAN_WLOCK(sc); 2032 if (vxlan_can_change_config(sc)) { 2033 vxlan_sockaddr_in_copy(&sc->vxl_dst_addr, &vxlsa->sa); 2034 vxlan_setup_interface_hdrlen(sc); 2035 error = 0; 2036 } else 2037 error = EBUSY; 2038 VXLAN_WUNLOCK(sc); 2039 2040 return (error); 2041 } 2042 2043 static int 2044 vxlan_ctrl_set_local_port(struct vxlan_softc *sc, void *arg) 2045 { 2046 struct ifvxlancmd *cmd; 2047 int error; 2048 2049 cmd = arg; 2050 2051 if (cmd->vxlcmd_port == 0) 2052 return (EINVAL); 2053 2054 VXLAN_WLOCK(sc); 2055 if (vxlan_can_change_config(sc)) { 2056 sc->vxl_src_addr.in4.sin_port = htons(cmd->vxlcmd_port); 2057 error = 0; 2058 } else 2059 error = EBUSY; 2060 VXLAN_WUNLOCK(sc); 2061 2062 return (error); 2063 } 2064 2065 static int 2066 vxlan_ctrl_set_remote_port(struct vxlan_softc *sc, void *arg) 2067 { 2068 struct ifvxlancmd *cmd; 2069 int error; 2070 2071 cmd = arg; 2072 2073 if (cmd->vxlcmd_port == 0) 2074 return (EINVAL); 2075 2076 VXLAN_WLOCK(sc); 2077 if (vxlan_can_change_config(sc)) { 2078 sc->vxl_dst_addr.in4.sin_port = htons(cmd->vxlcmd_port); 2079 error = 0; 2080 } else 2081 error = EBUSY; 2082 VXLAN_WUNLOCK(sc); 2083 2084 return (error); 2085 } 2086 2087 static int 2088 vxlan_ctrl_set_port_range(struct vxlan_softc *sc, void *arg) 2089 { 2090 struct ifvxlancmd *cmd; 2091 uint16_t min, max; 2092 int error; 2093 2094 cmd = arg; 2095 min = cmd->vxlcmd_port_min; 2096 max = cmd->vxlcmd_port_max; 2097 2098 if (max < min) 2099 return (EINVAL); 2100 2101 VXLAN_WLOCK(sc); 2102 if (vxlan_can_change_config(sc)) { 2103 sc->vxl_min_port = min; 2104 sc->vxl_max_port = max; 2105 error = 0; 2106 } else 2107 error = EBUSY; 2108 VXLAN_WUNLOCK(sc); 2109 2110 return (error); 2111 } 2112 2113 static int 2114 vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *sc, void *arg) 2115 { 2116 struct ifvxlancmd *cmd; 2117 int error; 2118 2119 cmd = arg; 2120 2121 VXLAN_WLOCK(sc); 2122 if (vxlan_check_ftable_timeout(cmd->vxlcmd_ftable_timeout) == 0) { 2123 sc->vxl_ftable_timeout = cmd->vxlcmd_ftable_timeout; 2124 error = 0; 2125 } else 2126 error = EINVAL; 2127 VXLAN_WUNLOCK(sc); 2128 2129 return (error); 2130 } 2131 2132 static int 2133 vxlan_ctrl_set_ftable_max(struct vxlan_softc *sc, void *arg) 2134 { 2135 struct ifvxlancmd *cmd; 2136 int error; 2137 2138 cmd = arg; 2139 2140 VXLAN_WLOCK(sc); 2141 if (vxlan_check_ftable_max(cmd->vxlcmd_ftable_max) == 0) { 2142 sc->vxl_ftable_max = cmd->vxlcmd_ftable_max; 2143 error = 0; 2144 } else 2145 error = EINVAL; 2146 VXLAN_WUNLOCK(sc); 2147 2148 return (error); 2149 } 2150 2151 static int 2152 vxlan_ctrl_set_multicast_if(struct vxlan_softc * sc, void *arg) 2153 { 2154 struct ifvxlancmd *cmd; 2155 int error; 2156 2157 cmd = arg; 2158 2159 VXLAN_WLOCK(sc); 2160 if (vxlan_can_change_config(sc)) { 2161 strlcpy(sc->vxl_mc_ifname, cmd->vxlcmd_ifname, IFNAMSIZ); 2162 vxlan_set_hwcaps(sc); 2163 error = 0; 2164 } else 2165 error = EBUSY; 2166 VXLAN_WUNLOCK(sc); 2167 2168 return (error); 2169 } 2170 2171 static int 2172 vxlan_ctrl_set_ttl(struct vxlan_softc *sc, void *arg) 2173 { 2174 struct ifvxlancmd *cmd; 2175 int error; 2176 2177 cmd = arg; 2178 2179 VXLAN_WLOCK(sc); 2180 if (vxlan_check_ttl(cmd->vxlcmd_ttl) == 0) { 2181 sc->vxl_ttl = cmd->vxlcmd_ttl; 2182 if (sc->vxl_im4o != NULL) 2183 sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl; 2184 if (sc->vxl_im6o != NULL) 2185 sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl; 2186 error = 0; 2187 } else 2188 error = EINVAL; 2189 VXLAN_WUNLOCK(sc); 2190 2191 return (error); 2192 } 2193 2194 static int 2195 vxlan_ctrl_set_learn(struct vxlan_softc *sc, void *arg) 2196 { 2197 struct ifvxlancmd *cmd; 2198 2199 cmd = arg; 2200 2201 VXLAN_WLOCK(sc); 2202 if (cmd->vxlcmd_flags & VXLAN_CMD_FLAG_LEARN) 2203 sc->vxl_flags |= VXLAN_FLAG_LEARN; 2204 else 2205 sc->vxl_flags &= ~VXLAN_FLAG_LEARN; 2206 VXLAN_WUNLOCK(sc); 2207 2208 return (0); 2209 } 2210 2211 static int 2212 vxlan_ctrl_ftable_entry_add(struct vxlan_softc *sc, void *arg) 2213 { 2214 union vxlan_sockaddr vxlsa; 2215 struct ifvxlancmd *cmd; 2216 struct vxlan_ftable_entry *fe; 2217 int error; 2218 2219 cmd = arg; 2220 vxlsa = cmd->vxlcmd_sa; 2221 2222 if (!VXLAN_SOCKADDR_IS_IPV46(&vxlsa)) 2223 return (EINVAL); 2224 if (vxlan_sockaddr_in_any(&vxlsa) != 0) 2225 return (EINVAL); 2226 if (vxlan_sockaddr_in_multicast(&vxlsa) != 0) 2227 return (EINVAL); 2228 /* BMV: We could support both IPv4 and IPv6 later. */ 2229 if (vxlsa.sa.sa_family != sc->vxl_dst_addr.sa.sa_family) 2230 return (EAFNOSUPPORT); 2231 2232 if (VXLAN_SOCKADDR_IS_IPV6(&vxlsa)) { 2233 error = vxlan_sockaddr_in6_embedscope(&vxlsa); 2234 if (error) 2235 return (error); 2236 } 2237 2238 fe = vxlan_ftable_entry_alloc(); 2239 if (fe == NULL) 2240 return (ENOMEM); 2241 2242 if (vxlsa.in4.sin_port == 0) 2243 vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port; 2244 2245 vxlan_ftable_entry_init(sc, fe, cmd->vxlcmd_mac, &vxlsa.sa, 2246 VXLAN_FE_FLAG_STATIC); 2247 2248 VXLAN_WLOCK(sc); 2249 error = vxlan_ftable_entry_insert(sc, fe); 2250 VXLAN_WUNLOCK(sc); 2251 2252 if (error) 2253 vxlan_ftable_entry_free(fe); 2254 2255 return (error); 2256 } 2257 2258 static int 2259 vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *sc, void *arg) 2260 { 2261 struct ifvxlancmd *cmd; 2262 struct vxlan_ftable_entry *fe; 2263 int error; 2264 2265 cmd = arg; 2266 2267 VXLAN_WLOCK(sc); 2268 fe = vxlan_ftable_entry_lookup(sc, cmd->vxlcmd_mac); 2269 if (fe != NULL) { 2270 vxlan_ftable_entry_destroy(sc, fe); 2271 error = 0; 2272 } else 2273 error = ENOENT; 2274 VXLAN_WUNLOCK(sc); 2275 2276 return (error); 2277 } 2278 2279 static int 2280 vxlan_ctrl_flush(struct vxlan_softc *sc, void *arg) 2281 { 2282 struct ifvxlancmd *cmd; 2283 int all; 2284 2285 cmd = arg; 2286 all = cmd->vxlcmd_flags & VXLAN_CMD_FLAG_FLUSH_ALL; 2287 2288 VXLAN_WLOCK(sc); 2289 vxlan_ftable_flush(sc, all); 2290 VXLAN_WUNLOCK(sc); 2291 2292 return (0); 2293 } 2294 2295 static int 2296 vxlan_ioctl_drvspec(struct vxlan_softc *sc, struct ifdrv *ifd, int get) 2297 { 2298 const struct vxlan_control *vc; 2299 union { 2300 struct ifvxlancfg cfg; 2301 struct ifvxlancmd cmd; 2302 } args; 2303 int out, error; 2304 2305 if (ifd->ifd_cmd >= vxlan_control_table_size) 2306 return (EINVAL); 2307 2308 bzero(&args, sizeof(args)); 2309 vc = &vxlan_control_table[ifd->ifd_cmd]; 2310 out = (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) != 0; 2311 2312 if ((get != 0 && out == 0) || (get == 0 && out != 0)) 2313 return (EINVAL); 2314 2315 if (vc->vxlc_flags & VXLAN_CTRL_FLAG_SUSER) { 2316 error = priv_check(curthread, PRIV_NET_VXLAN); 2317 if (error) 2318 return (error); 2319 } 2320 2321 if (ifd->ifd_len != vc->vxlc_argsize || 2322 ifd->ifd_len > sizeof(args)) 2323 return (EINVAL); 2324 2325 if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYIN) { 2326 error = copyin(ifd->ifd_data, &args, ifd->ifd_len); 2327 if (error) 2328 return (error); 2329 } 2330 2331 error = vc->vxlc_func(sc, &args); 2332 if (error) 2333 return (error); 2334 2335 if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) { 2336 error = copyout(&args, ifd->ifd_data, ifd->ifd_len); 2337 if (error) 2338 return (error); 2339 } 2340 2341 return (0); 2342 } 2343 2344 static int 2345 vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 2346 { 2347 struct rm_priotracker tracker; 2348 struct vxlan_softc *sc; 2349 struct ifreq *ifr; 2350 struct ifdrv *ifd; 2351 int error; 2352 2353 sc = ifp->if_softc; 2354 ifr = (struct ifreq *) data; 2355 ifd = (struct ifdrv *) data; 2356 2357 error = 0; 2358 2359 switch (cmd) { 2360 case SIOCADDMULTI: 2361 case SIOCDELMULTI: 2362 break; 2363 2364 case SIOCGDRVSPEC: 2365 case SIOCSDRVSPEC: 2366 error = vxlan_ioctl_drvspec(sc, ifd, cmd == SIOCGDRVSPEC); 2367 break; 2368 2369 case SIOCSIFFLAGS: 2370 error = vxlan_ioctl_ifflags(sc); 2371 break; 2372 2373 case SIOCSIFMEDIA: 2374 case SIOCGIFMEDIA: 2375 error = ifmedia_ioctl(ifp, ifr, &sc->vxl_media, cmd); 2376 break; 2377 2378 case SIOCSIFMTU: 2379 if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > VXLAN_MAX_MTU) { 2380 error = EINVAL; 2381 } else { 2382 VXLAN_WLOCK(sc); 2383 ifp->if_mtu = ifr->ifr_mtu; 2384 sc->vxl_flags |= VXLAN_FLAG_USER_MTU; 2385 VXLAN_WUNLOCK(sc); 2386 } 2387 break; 2388 2389 case SIOCSIFCAP: 2390 VXLAN_WLOCK(sc); 2391 error = vxlan_set_reqcap(sc, ifp, ifr->ifr_reqcap); 2392 if (error == 0) 2393 vxlan_set_hwcaps(sc); 2394 VXLAN_WUNLOCK(sc); 2395 break; 2396 2397 case SIOCGTUNFIB: 2398 VXLAN_RLOCK(sc, &tracker); 2399 ifr->ifr_fib = sc->vxl_fibnum; 2400 VXLAN_RUNLOCK(sc, &tracker); 2401 break; 2402 2403 case SIOCSTUNFIB: 2404 if ((error = priv_check(curthread, PRIV_NET_VXLAN)) != 0) 2405 break; 2406 2407 if (ifr->ifr_fib >= rt_numfibs) 2408 error = EINVAL; 2409 else { 2410 VXLAN_WLOCK(sc); 2411 sc->vxl_fibnum = ifr->ifr_fib; 2412 VXLAN_WUNLOCK(sc); 2413 } 2414 break; 2415 2416 default: 2417 error = ether_ioctl(ifp, cmd, data); 2418 break; 2419 } 2420 2421 return (error); 2422 } 2423 2424 #if defined(INET) || defined(INET6) 2425 static uint16_t 2426 vxlan_pick_source_port(struct vxlan_softc *sc, struct mbuf *m) 2427 { 2428 int range; 2429 uint32_t hash; 2430 2431 range = sc->vxl_max_port - sc->vxl_min_port + 1; 2432 2433 if (M_HASHTYPE_ISHASH(m)) 2434 hash = m->m_pkthdr.flowid; 2435 else 2436 hash = jenkins_hash(m->m_data, ETHER_HDR_LEN, 2437 sc->vxl_port_hash_key); 2438 2439 return (sc->vxl_min_port + (hash % range)); 2440 } 2441 2442 static void 2443 vxlan_encap_header(struct vxlan_softc *sc, struct mbuf *m, int ipoff, 2444 uint16_t srcport, uint16_t dstport) 2445 { 2446 struct vxlanudphdr *hdr; 2447 struct udphdr *udph; 2448 struct vxlan_header *vxh; 2449 int len; 2450 2451 len = m->m_pkthdr.len - ipoff; 2452 MPASS(len >= sizeof(struct vxlanudphdr)); 2453 hdr = mtodo(m, ipoff); 2454 2455 udph = &hdr->vxlh_udp; 2456 udph->uh_sport = srcport; 2457 udph->uh_dport = dstport; 2458 udph->uh_ulen = htons(len); 2459 udph->uh_sum = 0; 2460 2461 vxh = &hdr->vxlh_hdr; 2462 vxh->vxlh_flags = htonl(VXLAN_HDR_FLAGS_VALID_VNI); 2463 vxh->vxlh_vni = htonl(sc->vxl_vni << VXLAN_HDR_VNI_SHIFT); 2464 } 2465 #endif 2466 2467 #if defined(INET6) || defined(INET) 2468 /* 2469 * Return the CSUM_INNER_* equivalent of CSUM_* caps. 2470 */ 2471 static uint32_t 2472 csum_flags_to_inner_flags(uint32_t csum_flags_in, const uint32_t encap) 2473 { 2474 uint32_t csum_flags = encap; 2475 const uint32_t v4 = CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP; 2476 2477 /* 2478 * csum_flags can request either v4 or v6 offload but not both. 2479 * tcp_output always sets CSUM_TSO (both CSUM_IP_TSO and CSUM_IP6_TSO) 2480 * so those bits are no good to detect the IP version. Other bits are 2481 * always set with CSUM_TSO and we use those to figure out the IP 2482 * version. 2483 */ 2484 if (csum_flags_in & v4) { 2485 if (csum_flags_in & CSUM_IP) 2486 csum_flags |= CSUM_INNER_IP; 2487 if (csum_flags_in & CSUM_IP_UDP) 2488 csum_flags |= CSUM_INNER_IP_UDP; 2489 if (csum_flags_in & CSUM_IP_TCP) 2490 csum_flags |= CSUM_INNER_IP_TCP; 2491 if (csum_flags_in & CSUM_IP_TSO) 2492 csum_flags |= CSUM_INNER_IP_TSO; 2493 } else { 2494 #ifdef INVARIANTS 2495 const uint32_t v6 = CSUM_IP6_UDP | CSUM_IP6_TCP; 2496 2497 MPASS((csum_flags_in & v6) != 0); 2498 #endif 2499 if (csum_flags_in & CSUM_IP6_UDP) 2500 csum_flags |= CSUM_INNER_IP6_UDP; 2501 if (csum_flags_in & CSUM_IP6_TCP) 2502 csum_flags |= CSUM_INNER_IP6_TCP; 2503 if (csum_flags_in & CSUM_IP6_TSO) 2504 csum_flags |= CSUM_INNER_IP6_TSO; 2505 } 2506 2507 return (csum_flags); 2508 } 2509 #endif 2510 2511 static int 2512 vxlan_encap4(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa, 2513 struct mbuf *m) 2514 { 2515 #ifdef INET 2516 struct ifnet *ifp; 2517 struct ip *ip; 2518 struct in_addr srcaddr, dstaddr; 2519 uint16_t srcport, dstport; 2520 int plen, mcast, error; 2521 struct route route, *ro; 2522 struct sockaddr_in *sin; 2523 uint32_t csum_flags; 2524 2525 NET_EPOCH_ASSERT(); 2526 2527 ifp = sc->vxl_ifp; 2528 srcaddr = sc->vxl_src_addr.in4.sin_addr; 2529 srcport = vxlan_pick_source_port(sc, m); 2530 dstaddr = fvxlsa->in4.sin_addr; 2531 dstport = fvxlsa->in4.sin_port; 2532 2533 plen = m->m_pkthdr.len; 2534 M_PREPEND(m, sizeof(struct ip) + sizeof(struct vxlanudphdr), 2535 M_NOWAIT); 2536 if (m == NULL) { 2537 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2538 return (ENOBUFS); 2539 } 2540 2541 ip = mtod(m, struct ip *); 2542 ip->ip_tos = 0; 2543 ip->ip_len = htons(m->m_pkthdr.len); 2544 ip->ip_off = 0; 2545 ip->ip_ttl = sc->vxl_ttl; 2546 ip->ip_p = IPPROTO_UDP; 2547 ip->ip_sum = 0; 2548 ip->ip_src = srcaddr; 2549 ip->ip_dst = dstaddr; 2550 2551 vxlan_encap_header(sc, m, sizeof(struct ip), srcport, dstport); 2552 2553 mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0; 2554 m->m_flags &= ~(M_MCAST | M_BCAST); 2555 2556 m->m_pkthdr.csum_flags &= CSUM_FLAGS_TX; 2557 if (m->m_pkthdr.csum_flags != 0) { 2558 /* 2559 * HW checksum (L3 and/or L4) or TSO has been requested. Look 2560 * up the ifnet for the outbound route and verify that the 2561 * outbound ifnet can perform the requested operation on the 2562 * inner frame. 2563 */ 2564 bzero(&route, sizeof(route)); 2565 ro = &route; 2566 sin = (struct sockaddr_in *)&ro->ro_dst; 2567 sin->sin_family = AF_INET; 2568 sin->sin_len = sizeof(*sin); 2569 sin->sin_addr = ip->ip_dst; 2570 ro->ro_nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_NONE, 2571 0); 2572 if (ro->ro_nh == NULL) { 2573 m_freem(m); 2574 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2575 return (EHOSTUNREACH); 2576 } 2577 2578 csum_flags = csum_flags_to_inner_flags(m->m_pkthdr.csum_flags, 2579 CSUM_ENCAP_VXLAN); 2580 if ((csum_flags & ro->ro_nh->nh_ifp->if_hwassist) != 2581 csum_flags) { 2582 if (ppsratecheck(&sc->err_time, &sc->err_pps, 1)) { 2583 const struct ifnet *nh_ifp = ro->ro_nh->nh_ifp; 2584 2585 if_printf(ifp, "interface %s is missing hwcaps " 2586 "0x%08x, csum_flags 0x%08x -> 0x%08x, " 2587 "hwassist 0x%08x\n", nh_ifp->if_xname, 2588 csum_flags & ~(uint32_t)nh_ifp->if_hwassist, 2589 m->m_pkthdr.csum_flags, csum_flags, 2590 (uint32_t)nh_ifp->if_hwassist); 2591 } 2592 m_freem(m); 2593 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2594 return (ENXIO); 2595 } 2596 m->m_pkthdr.csum_flags = csum_flags; 2597 if (csum_flags & 2598 (CSUM_INNER_IP | CSUM_INNER_IP_UDP | CSUM_INNER_IP6_UDP | 2599 CSUM_INNER_IP_TCP | CSUM_INNER_IP6_TCP)) { 2600 counter_u64_add(sc->vxl_stats.txcsum, 1); 2601 if (csum_flags & CSUM_INNER_TSO) 2602 counter_u64_add(sc->vxl_stats.tso, 1); 2603 } 2604 } else 2605 ro = NULL; 2606 error = ip_output(m, NULL, ro, 0, sc->vxl_im4o, NULL); 2607 if (error == 0) { 2608 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 2609 if_inc_counter(ifp, IFCOUNTER_OBYTES, plen); 2610 if (mcast != 0) 2611 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 2612 } else 2613 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2614 2615 return (error); 2616 #else 2617 m_freem(m); 2618 return (ENOTSUP); 2619 #endif 2620 } 2621 2622 static int 2623 vxlan_encap6(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa, 2624 struct mbuf *m) 2625 { 2626 #ifdef INET6 2627 struct ifnet *ifp; 2628 struct ip6_hdr *ip6; 2629 const struct in6_addr *srcaddr, *dstaddr; 2630 uint16_t srcport, dstport; 2631 int plen, mcast, error; 2632 struct route_in6 route, *ro; 2633 struct sockaddr_in6 *sin6; 2634 uint32_t csum_flags; 2635 2636 NET_EPOCH_ASSERT(); 2637 2638 ifp = sc->vxl_ifp; 2639 srcaddr = &sc->vxl_src_addr.in6.sin6_addr; 2640 srcport = vxlan_pick_source_port(sc, m); 2641 dstaddr = &fvxlsa->in6.sin6_addr; 2642 dstport = fvxlsa->in6.sin6_port; 2643 2644 plen = m->m_pkthdr.len; 2645 M_PREPEND(m, sizeof(struct ip6_hdr) + sizeof(struct vxlanudphdr), 2646 M_NOWAIT); 2647 if (m == NULL) { 2648 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2649 return (ENOBUFS); 2650 } 2651 2652 ip6 = mtod(m, struct ip6_hdr *); 2653 ip6->ip6_flow = 0; /* BMV: Keep in forwarding entry? */ 2654 ip6->ip6_vfc = IPV6_VERSION; 2655 ip6->ip6_plen = 0; 2656 ip6->ip6_nxt = IPPROTO_UDP; 2657 ip6->ip6_hlim = sc->vxl_ttl; 2658 ip6->ip6_src = *srcaddr; 2659 ip6->ip6_dst = *dstaddr; 2660 2661 vxlan_encap_header(sc, m, sizeof(struct ip6_hdr), srcport, dstport); 2662 2663 mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0; 2664 m->m_flags &= ~(M_MCAST | M_BCAST); 2665 2666 ro = NULL; 2667 m->m_pkthdr.csum_flags &= CSUM_FLAGS_TX; 2668 if (m->m_pkthdr.csum_flags != 0) { 2669 /* 2670 * HW checksum (L3 and/or L4) or TSO has been requested. Look 2671 * up the ifnet for the outbound route and verify that the 2672 * outbound ifnet can perform the requested operation on the 2673 * inner frame. 2674 */ 2675 bzero(&route, sizeof(route)); 2676 ro = &route; 2677 sin6 = (struct sockaddr_in6 *)&ro->ro_dst; 2678 sin6->sin6_family = AF_INET6; 2679 sin6->sin6_len = sizeof(*sin6); 2680 sin6->sin6_addr = ip6->ip6_dst; 2681 ro->ro_nh = fib6_lookup(M_GETFIB(m), &ip6->ip6_dst, 0, 2682 NHR_NONE, 0); 2683 if (ro->ro_nh == NULL) { 2684 m_freem(m); 2685 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2686 return (EHOSTUNREACH); 2687 } 2688 2689 csum_flags = csum_flags_to_inner_flags(m->m_pkthdr.csum_flags, 2690 CSUM_ENCAP_VXLAN); 2691 if ((csum_flags & ro->ro_nh->nh_ifp->if_hwassist) != 2692 csum_flags) { 2693 if (ppsratecheck(&sc->err_time, &sc->err_pps, 1)) { 2694 const struct ifnet *nh_ifp = ro->ro_nh->nh_ifp; 2695 2696 if_printf(ifp, "interface %s is missing hwcaps " 2697 "0x%08x, csum_flags 0x%08x -> 0x%08x, " 2698 "hwassist 0x%08x\n", nh_ifp->if_xname, 2699 csum_flags & ~(uint32_t)nh_ifp->if_hwassist, 2700 m->m_pkthdr.csum_flags, csum_flags, 2701 (uint32_t)nh_ifp->if_hwassist); 2702 } 2703 m_freem(m); 2704 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2705 return (ENXIO); 2706 } 2707 m->m_pkthdr.csum_flags = csum_flags; 2708 if (csum_flags & 2709 (CSUM_INNER_IP | CSUM_INNER_IP_UDP | CSUM_INNER_IP6_UDP | 2710 CSUM_INNER_IP_TCP | CSUM_INNER_IP6_TCP)) { 2711 counter_u64_add(sc->vxl_stats.txcsum, 1); 2712 if (csum_flags & CSUM_INNER_TSO) 2713 counter_u64_add(sc->vxl_stats.tso, 1); 2714 } 2715 } else if (ntohs(dstport) != V_zero_checksum_port) { 2716 struct udphdr *hdr = mtodo(m, sizeof(struct ip6_hdr)); 2717 2718 hdr->uh_sum = in6_cksum_pseudo(ip6, 2719 m->m_pkthdr.len - sizeof(struct ip6_hdr), IPPROTO_UDP, 0); 2720 m->m_pkthdr.csum_flags = CSUM_UDP_IPV6; 2721 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); 2722 } 2723 error = ip6_output(m, NULL, ro, 0, sc->vxl_im6o, NULL, NULL); 2724 if (error == 0) { 2725 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 2726 if_inc_counter(ifp, IFCOUNTER_OBYTES, plen); 2727 if (mcast != 0) 2728 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 2729 } else 2730 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2731 2732 return (error); 2733 #else 2734 m_freem(m); 2735 return (ENOTSUP); 2736 #endif 2737 } 2738 2739 #define MTAG_VXLAN_LOOP 0x7876706c /* vxlp */ 2740 static int 2741 vxlan_transmit(struct ifnet *ifp, struct mbuf *m) 2742 { 2743 struct rm_priotracker tracker; 2744 union vxlan_sockaddr vxlsa; 2745 struct vxlan_softc *sc; 2746 struct vxlan_ftable_entry *fe; 2747 struct ifnet *mcifp; 2748 struct ether_header *eh; 2749 int ipv4, error; 2750 2751 sc = ifp->if_softc; 2752 eh = mtod(m, struct ether_header *); 2753 fe = NULL; 2754 mcifp = NULL; 2755 2756 ETHER_BPF_MTAP(ifp, m); 2757 2758 VXLAN_RLOCK(sc, &tracker); 2759 M_SETFIB(m, sc->vxl_fibnum); 2760 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 2761 VXLAN_RUNLOCK(sc, &tracker); 2762 m_freem(m); 2763 return (ENETDOWN); 2764 } 2765 if (__predict_false(if_tunnel_check_nesting(ifp, m, MTAG_VXLAN_LOOP, 2766 max_vxlan_nesting) != 0)) { 2767 VXLAN_RUNLOCK(sc, &tracker); 2768 m_freem(m); 2769 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2770 return (ELOOP); 2771 } 2772 2773 if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) 2774 fe = vxlan_ftable_entry_lookup(sc, eh->ether_dhost); 2775 if (fe == NULL) 2776 fe = &sc->vxl_default_fe; 2777 vxlan_sockaddr_copy(&vxlsa, &fe->vxlfe_raddr.sa); 2778 2779 ipv4 = VXLAN_SOCKADDR_IS_IPV4(&vxlsa) != 0; 2780 if (vxlan_sockaddr_in_multicast(&vxlsa) != 0) 2781 mcifp = vxlan_multicast_if_ref(sc, ipv4); 2782 2783 VXLAN_ACQUIRE(sc); 2784 VXLAN_RUNLOCK(sc, &tracker); 2785 2786 if (ipv4 != 0) 2787 error = vxlan_encap4(sc, &vxlsa, m); 2788 else 2789 error = vxlan_encap6(sc, &vxlsa, m); 2790 2791 vxlan_release(sc); 2792 if (mcifp != NULL) 2793 if_rele(mcifp); 2794 2795 return (error); 2796 } 2797 2798 static void 2799 vxlan_qflush(struct ifnet *ifp __unused) 2800 { 2801 } 2802 2803 static bool 2804 vxlan_rcv_udp_packet(struct mbuf *m, int offset, struct inpcb *inpcb, 2805 const struct sockaddr *srcsa, void *xvso) 2806 { 2807 struct vxlan_socket *vso; 2808 struct vxlan_header *vxh, vxlanhdr; 2809 uint32_t vni; 2810 int error __unused; 2811 2812 M_ASSERTPKTHDR(m); 2813 vso = xvso; 2814 offset += sizeof(struct udphdr); 2815 2816 if (m->m_pkthdr.len < offset + sizeof(struct vxlan_header)) 2817 goto out; 2818 2819 if (__predict_false(m->m_len < offset + sizeof(struct vxlan_header))) { 2820 m_copydata(m, offset, sizeof(struct vxlan_header), 2821 (caddr_t) &vxlanhdr); 2822 vxh = &vxlanhdr; 2823 } else 2824 vxh = mtodo(m, offset); 2825 2826 /* 2827 * Drop if there is a reserved bit set in either the flags or VNI 2828 * fields of the header. This goes against the specification, but 2829 * a bit set may indicate an unsupported new feature. This matches 2830 * the behavior of the Linux implementation. 2831 */ 2832 if (vxh->vxlh_flags != htonl(VXLAN_HDR_FLAGS_VALID_VNI) || 2833 vxh->vxlh_vni & ~VXLAN_VNI_MASK) 2834 goto out; 2835 2836 vni = ntohl(vxh->vxlh_vni) >> VXLAN_HDR_VNI_SHIFT; 2837 2838 /* Adjust to the start of the inner Ethernet frame. */ 2839 m_adj_decap(m, offset + sizeof(struct vxlan_header)); 2840 2841 error = vxlan_input(vso, vni, &m, srcsa); 2842 MPASS(error != 0 || m == NULL); 2843 2844 out: 2845 if (m != NULL) 2846 m_freem(m); 2847 2848 return (true); 2849 } 2850 2851 static int 2852 vxlan_input(struct vxlan_socket *vso, uint32_t vni, struct mbuf **m0, 2853 const struct sockaddr *sa) 2854 { 2855 struct vxlan_softc *sc; 2856 struct ifnet *ifp; 2857 struct mbuf *m; 2858 struct ether_header *eh; 2859 int error; 2860 2861 m = *m0; 2862 2863 if (m->m_pkthdr.len < ETHER_HDR_LEN) 2864 return (EINVAL); 2865 2866 sc = vxlan_socket_lookup_softc(vso, vni); 2867 if (sc == NULL) 2868 return (ENOENT); 2869 2870 ifp = sc->vxl_ifp; 2871 if (m->m_len < ETHER_HDR_LEN && 2872 (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) { 2873 *m0 = NULL; 2874 error = ENOBUFS; 2875 goto out; 2876 } 2877 eh = mtod(m, struct ether_header *); 2878 2879 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 2880 error = ENETDOWN; 2881 goto out; 2882 } else if (ifp == m->m_pkthdr.rcvif) { 2883 /* XXX Does not catch more complex loops. */ 2884 error = EDEADLK; 2885 goto out; 2886 } 2887 2888 if (sc->vxl_flags & VXLAN_FLAG_LEARN) 2889 vxlan_ftable_learn(sc, sa, eh->ether_shost); 2890 2891 m_clrprotoflags(m); 2892 m->m_pkthdr.rcvif = ifp; 2893 M_SETFIB(m, ifp->if_fib); 2894 if (((ifp->if_capenable & IFCAP_RXCSUM && 2895 m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC) || 2896 (ifp->if_capenable & IFCAP_RXCSUM_IPV6 && 2897 !(m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC)))) { 2898 uint32_t csum_flags = 0; 2899 2900 if (m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC) 2901 csum_flags |= CSUM_L3_CALC; 2902 if (m->m_pkthdr.csum_flags & CSUM_INNER_L3_VALID) 2903 csum_flags |= CSUM_L3_VALID; 2904 if (m->m_pkthdr.csum_flags & CSUM_INNER_L4_CALC) 2905 csum_flags |= CSUM_L4_CALC; 2906 if (m->m_pkthdr.csum_flags & CSUM_INNER_L4_VALID) 2907 csum_flags |= CSUM_L4_VALID; 2908 m->m_pkthdr.csum_flags = csum_flags; 2909 counter_u64_add(sc->vxl_stats.rxcsum, 1); 2910 } else { 2911 /* clear everything */ 2912 m->m_pkthdr.csum_flags = 0; 2913 m->m_pkthdr.csum_data = 0; 2914 } 2915 2916 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 2917 (*ifp->if_input)(ifp, m); 2918 *m0 = NULL; 2919 error = 0; 2920 2921 out: 2922 vxlan_release(sc); 2923 return (error); 2924 } 2925 2926 static void 2927 vxlan_stats_alloc(struct vxlan_softc *sc) 2928 { 2929 struct vxlan_statistics *stats = &sc->vxl_stats; 2930 2931 stats->txcsum = counter_u64_alloc(M_WAITOK); 2932 stats->tso = counter_u64_alloc(M_WAITOK); 2933 stats->rxcsum = counter_u64_alloc(M_WAITOK); 2934 } 2935 2936 static void 2937 vxlan_stats_free(struct vxlan_softc *sc) 2938 { 2939 struct vxlan_statistics *stats = &sc->vxl_stats; 2940 2941 counter_u64_free(stats->txcsum); 2942 counter_u64_free(stats->tso); 2943 counter_u64_free(stats->rxcsum); 2944 } 2945 2946 static void 2947 vxlan_set_default_config(struct vxlan_softc *sc) 2948 { 2949 2950 sc->vxl_flags |= VXLAN_FLAG_LEARN; 2951 2952 sc->vxl_vni = VXLAN_VNI_MAX; 2953 sc->vxl_ttl = IPDEFTTL; 2954 2955 if (!vxlan_tunable_int(sc, "legacy_port", vxlan_legacy_port)) { 2956 sc->vxl_src_addr.in4.sin_port = htons(VXLAN_PORT); 2957 sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_PORT); 2958 } else { 2959 sc->vxl_src_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT); 2960 sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT); 2961 } 2962 2963 sc->vxl_min_port = V_ipport_firstauto; 2964 sc->vxl_max_port = V_ipport_lastauto; 2965 2966 sc->vxl_ftable_max = VXLAN_FTABLE_MAX; 2967 sc->vxl_ftable_timeout = VXLAN_FTABLE_TIMEOUT; 2968 } 2969 2970 static int 2971 vxlan_set_user_config(struct vxlan_softc *sc, struct ifvxlanparam *vxlp) 2972 { 2973 2974 #ifndef INET 2975 if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR4 | 2976 VXLAN_PARAM_WITH_REMOTE_ADDR4)) 2977 return (EAFNOSUPPORT); 2978 #endif 2979 2980 #ifndef INET6 2981 if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR6 | 2982 VXLAN_PARAM_WITH_REMOTE_ADDR6)) 2983 return (EAFNOSUPPORT); 2984 #else 2985 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) { 2986 int error = vxlan_sockaddr_in6_embedscope(&vxlp->vxlp_local_sa); 2987 if (error) 2988 return (error); 2989 } 2990 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) { 2991 int error = vxlan_sockaddr_in6_embedscope( 2992 &vxlp->vxlp_remote_sa); 2993 if (error) 2994 return (error); 2995 } 2996 #endif 2997 2998 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_VNI) { 2999 if (vxlan_check_vni(vxlp->vxlp_vni) == 0) 3000 sc->vxl_vni = vxlp->vxlp_vni; 3001 } 3002 3003 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR4) { 3004 sc->vxl_src_addr.in4.sin_len = sizeof(struct sockaddr_in); 3005 sc->vxl_src_addr.in4.sin_family = AF_INET; 3006 sc->vxl_src_addr.in4.sin_addr = 3007 vxlp->vxlp_local_sa.in4.sin_addr; 3008 } else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) { 3009 sc->vxl_src_addr.in6.sin6_len = sizeof(struct sockaddr_in6); 3010 sc->vxl_src_addr.in6.sin6_family = AF_INET6; 3011 sc->vxl_src_addr.in6.sin6_addr = 3012 vxlp->vxlp_local_sa.in6.sin6_addr; 3013 } 3014 3015 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR4) { 3016 sc->vxl_dst_addr.in4.sin_len = sizeof(struct sockaddr_in); 3017 sc->vxl_dst_addr.in4.sin_family = AF_INET; 3018 sc->vxl_dst_addr.in4.sin_addr = 3019 vxlp->vxlp_remote_sa.in4.sin_addr; 3020 } else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) { 3021 sc->vxl_dst_addr.in6.sin6_len = sizeof(struct sockaddr_in6); 3022 sc->vxl_dst_addr.in6.sin6_family = AF_INET6; 3023 sc->vxl_dst_addr.in6.sin6_addr = 3024 vxlp->vxlp_remote_sa.in6.sin6_addr; 3025 } 3026 3027 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_PORT) 3028 sc->vxl_src_addr.in4.sin_port = htons(vxlp->vxlp_local_port); 3029 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_PORT) 3030 sc->vxl_dst_addr.in4.sin_port = htons(vxlp->vxlp_remote_port); 3031 3032 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_PORT_RANGE) { 3033 if (vxlp->vxlp_min_port <= vxlp->vxlp_max_port) { 3034 sc->vxl_min_port = vxlp->vxlp_min_port; 3035 sc->vxl_max_port = vxlp->vxlp_max_port; 3036 } 3037 } 3038 3039 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_MULTICAST_IF) 3040 strlcpy(sc->vxl_mc_ifname, vxlp->vxlp_mc_ifname, IFNAMSIZ); 3041 3042 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_TIMEOUT) { 3043 if (vxlan_check_ftable_timeout(vxlp->vxlp_ftable_timeout) == 0) 3044 sc->vxl_ftable_timeout = vxlp->vxlp_ftable_timeout; 3045 } 3046 3047 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_MAX) { 3048 if (vxlan_check_ftable_max(vxlp->vxlp_ftable_max) == 0) 3049 sc->vxl_ftable_max = vxlp->vxlp_ftable_max; 3050 } 3051 3052 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_TTL) { 3053 if (vxlan_check_ttl(vxlp->vxlp_ttl) == 0) 3054 sc->vxl_ttl = vxlp->vxlp_ttl; 3055 } 3056 3057 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LEARN) { 3058 if (vxlp->vxlp_learn == 0) 3059 sc->vxl_flags &= ~VXLAN_FLAG_LEARN; 3060 } 3061 3062 return (0); 3063 } 3064 3065 static int 3066 vxlan_set_reqcap(struct vxlan_softc *sc, struct ifnet *ifp, int reqcap) 3067 { 3068 int mask = reqcap ^ ifp->if_capenable; 3069 3070 /* Disable TSO if tx checksums are disabled. */ 3071 if (mask & IFCAP_TXCSUM && !(reqcap & IFCAP_TXCSUM) && 3072 reqcap & IFCAP_TSO4) { 3073 reqcap &= ~IFCAP_TSO4; 3074 if_printf(ifp, "tso4 disabled due to -txcsum.\n"); 3075 } 3076 if (mask & IFCAP_TXCSUM_IPV6 && !(reqcap & IFCAP_TXCSUM_IPV6) && 3077 reqcap & IFCAP_TSO6) { 3078 reqcap &= ~IFCAP_TSO6; 3079 if_printf(ifp, "tso6 disabled due to -txcsum6.\n"); 3080 } 3081 3082 /* Do not enable TSO if tx checksums are disabled. */ 3083 if (mask & IFCAP_TSO4 && reqcap & IFCAP_TSO4 && 3084 !(reqcap & IFCAP_TXCSUM)) { 3085 if_printf(ifp, "enable txcsum first.\n"); 3086 return (EAGAIN); 3087 } 3088 if (mask & IFCAP_TSO6 && reqcap & IFCAP_TSO6 && 3089 !(reqcap & IFCAP_TXCSUM_IPV6)) { 3090 if_printf(ifp, "enable txcsum6 first.\n"); 3091 return (EAGAIN); 3092 } 3093 3094 sc->vxl_reqcap = reqcap; 3095 return (0); 3096 } 3097 3098 /* 3099 * A VXLAN interface inherits the capabilities of the vxlandev or the interface 3100 * hosting the vxlanlocal address. 3101 */ 3102 static void 3103 vxlan_set_hwcaps(struct vxlan_softc *sc) 3104 { 3105 struct epoch_tracker et; 3106 struct ifnet *p; 3107 struct ifaddr *ifa; 3108 u_long hwa; 3109 int cap, ena; 3110 bool rel; 3111 struct ifnet *ifp = sc->vxl_ifp; 3112 3113 /* reset caps */ 3114 ifp->if_capabilities &= VXLAN_BASIC_IFCAPS; 3115 ifp->if_capenable &= VXLAN_BASIC_IFCAPS; 3116 ifp->if_hwassist = 0; 3117 3118 NET_EPOCH_ENTER(et); 3119 CURVNET_SET(ifp->if_vnet); 3120 3121 rel = false; 3122 p = NULL; 3123 if (sc->vxl_mc_ifname[0] != '\0') { 3124 rel = true; 3125 p = ifunit_ref(sc->vxl_mc_ifname); 3126 } else if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) { 3127 if (sc->vxl_src_addr.sa.sa_family == AF_INET) { 3128 struct sockaddr_in in4 = sc->vxl_src_addr.in4; 3129 3130 in4.sin_port = 0; 3131 ifa = ifa_ifwithaddr((struct sockaddr *)&in4); 3132 if (ifa != NULL) 3133 p = ifa->ifa_ifp; 3134 } else if (sc->vxl_src_addr.sa.sa_family == AF_INET6) { 3135 struct sockaddr_in6 in6 = sc->vxl_src_addr.in6; 3136 3137 in6.sin6_port = 0; 3138 ifa = ifa_ifwithaddr((struct sockaddr *)&in6); 3139 if (ifa != NULL) 3140 p = ifa->ifa_ifp; 3141 } 3142 } 3143 if (p == NULL) 3144 goto done; 3145 3146 cap = ena = hwa = 0; 3147 3148 /* checksum offload */ 3149 if (p->if_capabilities & IFCAP_VXLAN_HWCSUM) 3150 cap |= p->if_capabilities & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6); 3151 if (p->if_capenable & IFCAP_VXLAN_HWCSUM) { 3152 ena |= sc->vxl_reqcap & p->if_capenable & 3153 (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6); 3154 if (ena & IFCAP_TXCSUM) { 3155 if (p->if_hwassist & CSUM_INNER_IP) 3156 hwa |= CSUM_IP; 3157 if (p->if_hwassist & CSUM_INNER_IP_UDP) 3158 hwa |= CSUM_IP_UDP; 3159 if (p->if_hwassist & CSUM_INNER_IP_TCP) 3160 hwa |= CSUM_IP_TCP; 3161 } 3162 if (ena & IFCAP_TXCSUM_IPV6) { 3163 if (p->if_hwassist & CSUM_INNER_IP6_UDP) 3164 hwa |= CSUM_IP6_UDP; 3165 if (p->if_hwassist & CSUM_INNER_IP6_TCP) 3166 hwa |= CSUM_IP6_TCP; 3167 } 3168 } 3169 3170 /* hardware TSO */ 3171 if (p->if_capabilities & IFCAP_VXLAN_HWTSO) { 3172 cap |= p->if_capabilities & IFCAP_TSO; 3173 if (p->if_hw_tsomax > IP_MAXPACKET - ifp->if_hdrlen) 3174 ifp->if_hw_tsomax = IP_MAXPACKET - ifp->if_hdrlen; 3175 else 3176 ifp->if_hw_tsomax = p->if_hw_tsomax; 3177 /* XXX: tsomaxsegcount decrement is cxgbe specific */ 3178 ifp->if_hw_tsomaxsegcount = p->if_hw_tsomaxsegcount - 1; 3179 ifp->if_hw_tsomaxsegsize = p->if_hw_tsomaxsegsize; 3180 } 3181 if (p->if_capenable & IFCAP_VXLAN_HWTSO) { 3182 ena |= sc->vxl_reqcap & p->if_capenable & IFCAP_TSO; 3183 if (ena & IFCAP_TSO) { 3184 if (p->if_hwassist & CSUM_INNER_IP_TSO) 3185 hwa |= CSUM_IP_TSO; 3186 if (p->if_hwassist & CSUM_INNER_IP6_TSO) 3187 hwa |= CSUM_IP6_TSO; 3188 } 3189 } 3190 3191 ifp->if_capabilities |= cap; 3192 ifp->if_capenable |= ena; 3193 ifp->if_hwassist |= hwa; 3194 if (rel) 3195 if_rele(p); 3196 done: 3197 CURVNET_RESTORE(); 3198 NET_EPOCH_EXIT(et); 3199 } 3200 3201 static int 3202 vxlan_clone_create(struct if_clone *ifc, char *name, size_t len, 3203 struct ifc_data *ifd, struct ifnet **ifpp) 3204 { 3205 struct vxlan_softc *sc; 3206 struct ifnet *ifp; 3207 struct ifvxlanparam vxlp; 3208 int error; 3209 3210 sc = malloc(sizeof(struct vxlan_softc), M_VXLAN, M_WAITOK | M_ZERO); 3211 sc->vxl_unit = ifd->unit; 3212 sc->vxl_fibnum = curthread->td_proc->p_fibnum; 3213 vxlan_set_default_config(sc); 3214 3215 if (ifd->params != NULL) { 3216 error = ifc_copyin(ifd, &vxlp, sizeof(vxlp)); 3217 if (error) 3218 goto fail; 3219 3220 error = vxlan_set_user_config(sc, &vxlp); 3221 if (error) 3222 goto fail; 3223 } 3224 3225 vxlan_stats_alloc(sc); 3226 ifp = if_alloc(IFT_ETHER); 3227 sc->vxl_ifp = ifp; 3228 rm_init(&sc->vxl_lock, "vxlanrm"); 3229 callout_init_rw(&sc->vxl_callout, &sc->vxl_lock, 0); 3230 sc->vxl_port_hash_key = arc4random(); 3231 vxlan_ftable_init(sc); 3232 3233 vxlan_sysctl_setup(sc); 3234 3235 ifp->if_softc = sc; 3236 if_initname(ifp, vxlan_name, ifd->unit); 3237 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 3238 ifp->if_init = vxlan_init; 3239 ifp->if_ioctl = vxlan_ioctl; 3240 ifp->if_transmit = vxlan_transmit; 3241 ifp->if_qflush = vxlan_qflush; 3242 ifp->if_capabilities = VXLAN_BASIC_IFCAPS; 3243 ifp->if_capenable = VXLAN_BASIC_IFCAPS; 3244 sc->vxl_reqcap = -1; 3245 vxlan_set_hwcaps(sc); 3246 3247 ifmedia_init(&sc->vxl_media, 0, vxlan_media_change, vxlan_media_status); 3248 ifmedia_add(&sc->vxl_media, IFM_ETHER | IFM_AUTO, 0, NULL); 3249 ifmedia_set(&sc->vxl_media, IFM_ETHER | IFM_AUTO); 3250 3251 ether_gen_addr(ifp, &sc->vxl_hwaddr); 3252 ether_ifattach(ifp, sc->vxl_hwaddr.octet); 3253 3254 ifp->if_baudrate = 0; 3255 3256 VXLAN_WLOCK(sc); 3257 vxlan_setup_interface_hdrlen(sc); 3258 VXLAN_WUNLOCK(sc); 3259 *ifpp = ifp; 3260 3261 return (0); 3262 3263 fail: 3264 free(sc, M_VXLAN); 3265 return (error); 3266 } 3267 3268 static int 3269 vxlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags) 3270 { 3271 struct vxlan_softc *sc; 3272 3273 sc = ifp->if_softc; 3274 3275 vxlan_teardown(sc); 3276 3277 vxlan_ftable_flush(sc, 1); 3278 3279 ether_ifdetach(ifp); 3280 if_free(ifp); 3281 ifmedia_removeall(&sc->vxl_media); 3282 3283 vxlan_ftable_fini(sc); 3284 3285 vxlan_sysctl_destroy(sc); 3286 rm_destroy(&sc->vxl_lock); 3287 vxlan_stats_free(sc); 3288 free(sc, M_VXLAN); 3289 3290 return (0); 3291 } 3292 3293 /* BMV: Taken from if_bridge. */ 3294 static uint32_t 3295 vxlan_mac_hash(struct vxlan_softc *sc, const uint8_t *addr) 3296 { 3297 uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->vxl_ftable_hash_key; 3298 3299 b += addr[5] << 8; 3300 b += addr[4]; 3301 a += addr[3] << 24; 3302 a += addr[2] << 16; 3303 a += addr[1] << 8; 3304 a += addr[0]; 3305 3306 /* 3307 * The following hash function is adapted from "Hash Functions" by Bob Jenkins 3308 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997). 3309 */ 3310 #define mix(a, b, c) \ 3311 do { \ 3312 a -= b; a -= c; a ^= (c >> 13); \ 3313 b -= c; b -= a; b ^= (a << 8); \ 3314 c -= a; c -= b; c ^= (b >> 13); \ 3315 a -= b; a -= c; a ^= (c >> 12); \ 3316 b -= c; b -= a; b ^= (a << 16); \ 3317 c -= a; c -= b; c ^= (b >> 5); \ 3318 a -= b; a -= c; a ^= (c >> 3); \ 3319 b -= c; b -= a; b ^= (a << 10); \ 3320 c -= a; c -= b; c ^= (b >> 15); \ 3321 } while (0) 3322 3323 mix(a, b, c); 3324 3325 #undef mix 3326 3327 return (c); 3328 } 3329 3330 static int 3331 vxlan_media_change(struct ifnet *ifp) 3332 { 3333 3334 /* Ignore. */ 3335 return (0); 3336 } 3337 3338 static void 3339 vxlan_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3340 { 3341 3342 ifmr->ifm_status = IFM_ACTIVE | IFM_AVALID; 3343 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 3344 } 3345 3346 static int 3347 vxlan_sockaddr_cmp(const union vxlan_sockaddr *vxladdr, 3348 const struct sockaddr *sa) 3349 { 3350 3351 return (bcmp(&vxladdr->sa, sa, vxladdr->sa.sa_len)); 3352 } 3353 3354 static void 3355 vxlan_sockaddr_copy(union vxlan_sockaddr *vxladdr, 3356 const struct sockaddr *sa) 3357 { 3358 3359 MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6); 3360 bzero(vxladdr, sizeof(*vxladdr)); 3361 3362 if (sa->sa_family == AF_INET) { 3363 vxladdr->in4 = *satoconstsin(sa); 3364 vxladdr->in4.sin_len = sizeof(struct sockaddr_in); 3365 } else if (sa->sa_family == AF_INET6) { 3366 vxladdr->in6 = *satoconstsin6(sa); 3367 vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6); 3368 } 3369 } 3370 3371 static int 3372 vxlan_sockaddr_in_equal(const union vxlan_sockaddr *vxladdr, 3373 const struct sockaddr *sa) 3374 { 3375 int equal; 3376 3377 if (sa->sa_family == AF_INET) { 3378 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr; 3379 equal = in4->s_addr == vxladdr->in4.sin_addr.s_addr; 3380 } else if (sa->sa_family == AF_INET6) { 3381 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr; 3382 equal = IN6_ARE_ADDR_EQUAL(in6, &vxladdr->in6.sin6_addr); 3383 } else 3384 equal = 0; 3385 3386 return (equal); 3387 } 3388 3389 static void 3390 vxlan_sockaddr_in_copy(union vxlan_sockaddr *vxladdr, 3391 const struct sockaddr *sa) 3392 { 3393 3394 MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6); 3395 3396 if (sa->sa_family == AF_INET) { 3397 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr; 3398 vxladdr->in4.sin_family = AF_INET; 3399 vxladdr->in4.sin_len = sizeof(struct sockaddr_in); 3400 vxladdr->in4.sin_addr = *in4; 3401 } else if (sa->sa_family == AF_INET6) { 3402 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr; 3403 vxladdr->in6.sin6_family = AF_INET6; 3404 vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6); 3405 vxladdr->in6.sin6_addr = *in6; 3406 } 3407 } 3408 3409 static int 3410 vxlan_sockaddr_supported(const union vxlan_sockaddr *vxladdr, int unspec) 3411 { 3412 const struct sockaddr *sa; 3413 int supported; 3414 3415 sa = &vxladdr->sa; 3416 supported = 0; 3417 3418 if (sa->sa_family == AF_UNSPEC && unspec != 0) { 3419 supported = 1; 3420 } else if (sa->sa_family == AF_INET) { 3421 #ifdef INET 3422 supported = 1; 3423 #endif 3424 } else if (sa->sa_family == AF_INET6) { 3425 #ifdef INET6 3426 supported = 1; 3427 #endif 3428 } 3429 3430 return (supported); 3431 } 3432 3433 static int 3434 vxlan_sockaddr_in_any(const union vxlan_sockaddr *vxladdr) 3435 { 3436 const struct sockaddr *sa; 3437 int any; 3438 3439 sa = &vxladdr->sa; 3440 3441 if (sa->sa_family == AF_INET) { 3442 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr; 3443 any = in4->s_addr == INADDR_ANY; 3444 } else if (sa->sa_family == AF_INET6) { 3445 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr; 3446 any = IN6_IS_ADDR_UNSPECIFIED(in6); 3447 } else 3448 any = -1; 3449 3450 return (any); 3451 } 3452 3453 static int 3454 vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *vxladdr) 3455 { 3456 const struct sockaddr *sa; 3457 int mc; 3458 3459 sa = &vxladdr->sa; 3460 3461 if (sa->sa_family == AF_INET) { 3462 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr; 3463 mc = IN_MULTICAST(ntohl(in4->s_addr)); 3464 } else if (sa->sa_family == AF_INET6) { 3465 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr; 3466 mc = IN6_IS_ADDR_MULTICAST(in6); 3467 } else 3468 mc = -1; 3469 3470 return (mc); 3471 } 3472 3473 static int 3474 vxlan_sockaddr_in6_embedscope(union vxlan_sockaddr *vxladdr) 3475 { 3476 int error; 3477 3478 MPASS(VXLAN_SOCKADDR_IS_IPV6(vxladdr)); 3479 #ifdef INET6 3480 error = sa6_embedscope(&vxladdr->in6, V_ip6_use_defzone); 3481 #else 3482 error = EAFNOSUPPORT; 3483 #endif 3484 3485 return (error); 3486 } 3487 3488 static int 3489 vxlan_can_change_config(struct vxlan_softc *sc) 3490 { 3491 struct ifnet *ifp; 3492 3493 ifp = sc->vxl_ifp; 3494 VXLAN_LOCK_ASSERT(sc); 3495 3496 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 3497 return (0); 3498 if (sc->vxl_flags & (VXLAN_FLAG_INIT | VXLAN_FLAG_TEARDOWN)) 3499 return (0); 3500 3501 return (1); 3502 } 3503 3504 static int 3505 vxlan_check_vni(uint32_t vni) 3506 { 3507 3508 return (vni >= VXLAN_VNI_MAX); 3509 } 3510 3511 static int 3512 vxlan_check_ttl(int ttl) 3513 { 3514 3515 return (ttl > MAXTTL); 3516 } 3517 3518 static int 3519 vxlan_check_ftable_timeout(uint32_t timeout) 3520 { 3521 3522 return (timeout > VXLAN_FTABLE_MAX_TIMEOUT); 3523 } 3524 3525 static int 3526 vxlan_check_ftable_max(uint32_t max) 3527 { 3528 3529 return (max > VXLAN_FTABLE_MAX); 3530 } 3531 3532 static void 3533 vxlan_sysctl_setup(struct vxlan_softc *sc) 3534 { 3535 struct sysctl_ctx_list *ctx; 3536 struct sysctl_oid *node; 3537 struct vxlan_statistics *stats; 3538 char namebuf[8]; 3539 3540 ctx = &sc->vxl_sysctl_ctx; 3541 stats = &sc->vxl_stats; 3542 snprintf(namebuf, sizeof(namebuf), "%d", sc->vxl_unit); 3543 3544 sysctl_ctx_init(ctx); 3545 sc->vxl_sysctl_node = SYSCTL_ADD_NODE(ctx, 3546 SYSCTL_STATIC_CHILDREN(_net_link_vxlan), OID_AUTO, namebuf, 3547 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); 3548 3549 node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node), 3550 OID_AUTO, "ftable", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); 3551 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "count", 3552 CTLFLAG_RD, &sc->vxl_ftable_cnt, 0, 3553 "Number of entries in forwarding table"); 3554 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "max", 3555 CTLFLAG_RD, &sc->vxl_ftable_max, 0, 3556 "Maximum number of entries allowed in forwarding table"); 3557 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "timeout", 3558 CTLFLAG_RD, &sc->vxl_ftable_timeout, 0, 3559 "Number of seconds between prunes of the forwarding table"); 3560 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "dump", 3561 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_SKIP, 3562 sc, 0, vxlan_ftable_sysctl_dump, "A", 3563 "Dump the forwarding table entries"); 3564 3565 node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node), 3566 OID_AUTO, "stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); 3567 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, 3568 "ftable_nospace", CTLFLAG_RD, &stats->ftable_nospace, 0, 3569 "Fowarding table reached maximum entries"); 3570 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, 3571 "ftable_lock_upgrade_failed", CTLFLAG_RD, 3572 &stats->ftable_lock_upgrade_failed, 0, 3573 "Forwarding table update required lock upgrade"); 3574 3575 SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "txcsum", 3576 CTLFLAG_RD, &stats->txcsum, 3577 "# of times hardware assisted with tx checksum"); 3578 SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "tso", 3579 CTLFLAG_RD, &stats->tso, "# of times hardware assisted with TSO"); 3580 SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "rxcsum", 3581 CTLFLAG_RD, &stats->rxcsum, 3582 "# of times hardware assisted with rx checksum"); 3583 } 3584 3585 static void 3586 vxlan_sysctl_destroy(struct vxlan_softc *sc) 3587 { 3588 3589 sysctl_ctx_free(&sc->vxl_sysctl_ctx); 3590 sc->vxl_sysctl_node = NULL; 3591 } 3592 3593 static int 3594 vxlan_tunable_int(struct vxlan_softc *sc, const char *knob, int def) 3595 { 3596 char path[64]; 3597 3598 snprintf(path, sizeof(path), "net.link.vxlan.%d.%s", 3599 sc->vxl_unit, knob); 3600 TUNABLE_INT_FETCH(path, &def); 3601 3602 return (def); 3603 } 3604 3605 static void 3606 vxlan_ifdetach_event(void *arg __unused, struct ifnet *ifp) 3607 { 3608 struct vxlan_softc_head list; 3609 struct vxlan_socket *vso; 3610 struct vxlan_softc *sc, *tsc; 3611 3612 LIST_INIT(&list); 3613 3614 if (ifp->if_flags & IFF_RENAMING) 3615 return; 3616 if ((ifp->if_flags & IFF_MULTICAST) == 0) 3617 return; 3618 3619 VXLAN_LIST_LOCK(); 3620 LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry) 3621 vxlan_socket_ifdetach(vso, ifp, &list); 3622 VXLAN_LIST_UNLOCK(); 3623 3624 LIST_FOREACH_SAFE(sc, &list, vxl_ifdetach_list, tsc) { 3625 LIST_REMOVE(sc, vxl_ifdetach_list); 3626 3627 sx_xlock(&vxlan_sx); 3628 VXLAN_WLOCK(sc); 3629 if (sc->vxl_flags & VXLAN_FLAG_INIT) 3630 vxlan_init_wait(sc); 3631 vxlan_teardown_locked(sc); 3632 sx_xunlock(&vxlan_sx); 3633 } 3634 } 3635 3636 static void 3637 vxlan_load(void) 3638 { 3639 3640 mtx_init(&vxlan_list_mtx, "vxlan list", NULL, MTX_DEF); 3641 LIST_INIT(&vxlan_socket_list); 3642 vxlan_ifdetach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, 3643 vxlan_ifdetach_event, NULL, EVENTHANDLER_PRI_ANY); 3644 3645 struct if_clone_addreq req = { 3646 .create_f = vxlan_clone_create, 3647 .destroy_f = vxlan_clone_destroy, 3648 .flags = IFC_F_AUTOUNIT, 3649 }; 3650 vxlan_cloner = ifc_attach_cloner(vxlan_name, &req); 3651 } 3652 3653 static void 3654 vxlan_unload(void) 3655 { 3656 3657 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 3658 vxlan_ifdetach_event_tag); 3659 ifc_detach_cloner(vxlan_cloner); 3660 mtx_destroy(&vxlan_list_mtx); 3661 MPASS(LIST_EMPTY(&vxlan_socket_list)); 3662 } 3663 3664 static int 3665 vxlan_modevent(module_t mod, int type, void *unused) 3666 { 3667 int error; 3668 3669 error = 0; 3670 3671 switch (type) { 3672 case MOD_LOAD: 3673 vxlan_load(); 3674 break; 3675 case MOD_UNLOAD: 3676 vxlan_unload(); 3677 break; 3678 default: 3679 error = ENOTSUP; 3680 break; 3681 } 3682 3683 return (error); 3684 } 3685 3686 static moduledata_t vxlan_mod = { 3687 "if_vxlan", 3688 vxlan_modevent, 3689 0 3690 }; 3691 3692 DECLARE_MODULE(if_vxlan, vxlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 3693 MODULE_VERSION(if_vxlan, 1); 3694