1 /*- 2 * Copyright (c) 2014, Bryan Venteicher <bryanv@FreeBSD.org> 3 * All rights reserved. 4 * Copyright (c) 2020, Chelsio Communications. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice unmodified, this list of conditions, and the following 11 * disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include "opt_inet.h" 29 #include "opt_inet6.h" 30 31 #include <sys/param.h> 32 #include <sys/eventhandler.h> 33 #include <sys/kernel.h> 34 #include <sys/lock.h> 35 #include <sys/hash.h> 36 #include <sys/malloc.h> 37 #include <sys/mbuf.h> 38 #include <sys/module.h> 39 #include <sys/refcount.h> 40 #include <sys/rmlock.h> 41 #include <sys/priv.h> 42 #include <sys/proc.h> 43 #include <sys/queue.h> 44 #include <sys/sbuf.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/sockio.h> 48 #include <sys/sysctl.h> 49 #include <sys/systm.h> 50 51 #include <net/bpf.h> 52 #include <net/ethernet.h> 53 #include <net/if.h> 54 #include <net/if_var.h> 55 #include <net/if_private.h> 56 #include <net/if_clone.h> 57 #include <net/if_dl.h> 58 #include <net/if_media.h> 59 #include <net/if_types.h> 60 #include <net/if_vxlan.h> 61 #include <net/netisr.h> 62 #include <net/route.h> 63 #include <net/route/nhop.h> 64 65 #include <netinet/in.h> 66 #include <netinet/in_systm.h> 67 #include <netinet/in_var.h> 68 #include <netinet/in_pcb.h> 69 #include <netinet/ip.h> 70 #include <netinet/ip6.h> 71 #include <netinet/ip_var.h> 72 #include <netinet/udp.h> 73 #include <netinet/udp_var.h> 74 #include <netinet/in_fib.h> 75 #include <netinet6/in6_fib.h> 76 77 #include <netinet6/ip6_var.h> 78 #include <netinet6/scope6_var.h> 79 80 struct vxlan_softc; 81 LIST_HEAD(vxlan_softc_head, vxlan_softc); 82 83 struct sx vxlan_sx; 84 SX_SYSINIT(vxlan, &vxlan_sx, "VXLAN global start/stop lock"); 85 86 struct vxlan_socket_mc_info { 87 union vxlan_sockaddr vxlsomc_saddr; 88 union vxlan_sockaddr vxlsomc_gaddr; 89 int vxlsomc_ifidx; 90 int vxlsomc_users; 91 }; 92 93 /* 94 * The maximum MTU of encapsulated ethernet frame within IPv4/UDP packet. 95 */ 96 #define VXLAN_MAX_MTU (IP_MAXPACKET - \ 97 60 /* Maximum IPv4 header len */ - \ 98 sizeof(struct udphdr) - \ 99 sizeof(struct vxlan_header) - \ 100 ETHER_HDR_LEN - ETHER_VLAN_ENCAP_LEN) 101 #define VXLAN_BASIC_IFCAPS (IFCAP_LINKSTATE | IFCAP_JUMBO_MTU) 102 103 #define VXLAN_SO_MC_MAX_GROUPS 32 104 105 #define VXLAN_SO_VNI_HASH_SHIFT 6 106 #define VXLAN_SO_VNI_HASH_SIZE (1 << VXLAN_SO_VNI_HASH_SHIFT) 107 #define VXLAN_SO_VNI_HASH(_vni) ((_vni) % VXLAN_SO_VNI_HASH_SIZE) 108 109 struct vxlan_socket { 110 struct socket *vxlso_sock; 111 struct rmlock vxlso_lock; 112 u_int vxlso_refcnt; 113 union vxlan_sockaddr vxlso_laddr; 114 LIST_ENTRY(vxlan_socket) vxlso_entry; 115 struct vxlan_softc_head vxlso_vni_hash[VXLAN_SO_VNI_HASH_SIZE]; 116 struct vxlan_socket_mc_info vxlso_mc[VXLAN_SO_MC_MAX_GROUPS]; 117 }; 118 119 #define VXLAN_SO_RLOCK(_vso, _p) rm_rlock(&(_vso)->vxlso_lock, (_p)) 120 #define VXLAN_SO_RUNLOCK(_vso, _p) rm_runlock(&(_vso)->vxlso_lock, (_p)) 121 #define VXLAN_SO_WLOCK(_vso) rm_wlock(&(_vso)->vxlso_lock) 122 #define VXLAN_SO_WUNLOCK(_vso) rm_wunlock(&(_vso)->vxlso_lock) 123 #define VXLAN_SO_LOCK_ASSERT(_vso) \ 124 rm_assert(&(_vso)->vxlso_lock, RA_LOCKED) 125 #define VXLAN_SO_LOCK_WASSERT(_vso) \ 126 rm_assert(&(_vso)->vxlso_lock, RA_WLOCKED) 127 128 #define VXLAN_SO_ACQUIRE(_vso) refcount_acquire(&(_vso)->vxlso_refcnt) 129 #define VXLAN_SO_RELEASE(_vso) refcount_release(&(_vso)->vxlso_refcnt) 130 131 struct vxlan_ftable_entry { 132 LIST_ENTRY(vxlan_ftable_entry) vxlfe_hash; 133 uint16_t vxlfe_flags; 134 uint8_t vxlfe_mac[ETHER_ADDR_LEN]; 135 union vxlan_sockaddr vxlfe_raddr; 136 time_t vxlfe_expire; 137 }; 138 139 #define VXLAN_FE_FLAG_DYNAMIC 0x01 140 #define VXLAN_FE_FLAG_STATIC 0x02 141 142 #define VXLAN_FE_IS_DYNAMIC(_fe) \ 143 ((_fe)->vxlfe_flags & VXLAN_FE_FLAG_DYNAMIC) 144 145 #define VXLAN_SC_FTABLE_SHIFT 9 146 #define VXLAN_SC_FTABLE_SIZE (1 << VXLAN_SC_FTABLE_SHIFT) 147 #define VXLAN_SC_FTABLE_MASK (VXLAN_SC_FTABLE_SIZE - 1) 148 #define VXLAN_SC_FTABLE_HASH(_sc, _mac) \ 149 (vxlan_mac_hash(_sc, _mac) % VXLAN_SC_FTABLE_SIZE) 150 151 LIST_HEAD(vxlan_ftable_head, vxlan_ftable_entry); 152 153 struct vxlan_statistics { 154 uint32_t ftable_nospace; 155 uint32_t ftable_lock_upgrade_failed; 156 counter_u64_t txcsum; 157 counter_u64_t tso; 158 counter_u64_t rxcsum; 159 }; 160 161 struct vxlan_softc { 162 struct ifnet *vxl_ifp; 163 int vxl_reqcap; 164 u_int vxl_fibnum; 165 struct vxlan_socket *vxl_sock; 166 uint32_t vxl_vni; 167 union vxlan_sockaddr vxl_src_addr; 168 union vxlan_sockaddr vxl_dst_addr; 169 uint32_t vxl_flags; 170 #define VXLAN_FLAG_INIT 0x0001 171 #define VXLAN_FLAG_TEARDOWN 0x0002 172 #define VXLAN_FLAG_LEARN 0x0004 173 #define VXLAN_FLAG_USER_MTU 0x0008 174 175 uint32_t vxl_port_hash_key; 176 uint16_t vxl_min_port; 177 uint16_t vxl_max_port; 178 uint8_t vxl_ttl; 179 180 /* Lookup table from MAC address to forwarding entry. */ 181 uint32_t vxl_ftable_cnt; 182 uint32_t vxl_ftable_max; 183 uint32_t vxl_ftable_timeout; 184 uint32_t vxl_ftable_hash_key; 185 struct vxlan_ftable_head *vxl_ftable; 186 187 /* Derived from vxl_dst_addr. */ 188 struct vxlan_ftable_entry vxl_default_fe; 189 190 struct ip_moptions *vxl_im4o; 191 struct ip6_moptions *vxl_im6o; 192 193 struct rmlock vxl_lock; 194 volatile u_int vxl_refcnt; 195 196 int vxl_unit; 197 int vxl_vso_mc_index; 198 struct vxlan_statistics vxl_stats; 199 struct sysctl_oid *vxl_sysctl_node; 200 struct sysctl_ctx_list vxl_sysctl_ctx; 201 struct callout vxl_callout; 202 struct ether_addr vxl_hwaddr; 203 int vxl_mc_ifindex; 204 struct ifnet *vxl_mc_ifp; 205 struct ifmedia vxl_media; 206 char vxl_mc_ifname[IFNAMSIZ]; 207 LIST_ENTRY(vxlan_softc) vxl_entry; 208 LIST_ENTRY(vxlan_softc) vxl_ifdetach_list; 209 210 /* For rate limiting errors on the tx fast path. */ 211 struct timeval err_time; 212 int err_pps; 213 }; 214 215 #define VXLAN_RLOCK(_sc, _p) rm_rlock(&(_sc)->vxl_lock, (_p)) 216 #define VXLAN_RUNLOCK(_sc, _p) rm_runlock(&(_sc)->vxl_lock, (_p)) 217 #define VXLAN_WLOCK(_sc) rm_wlock(&(_sc)->vxl_lock) 218 #define VXLAN_WUNLOCK(_sc) rm_wunlock(&(_sc)->vxl_lock) 219 #define VXLAN_LOCK_WOWNED(_sc) rm_wowned(&(_sc)->vxl_lock) 220 #define VXLAN_LOCK_ASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_LOCKED) 221 #define VXLAN_LOCK_WASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_WLOCKED) 222 #define VXLAN_UNLOCK(_sc, _p) do { \ 223 if (VXLAN_LOCK_WOWNED(_sc)) \ 224 VXLAN_WUNLOCK(_sc); \ 225 else \ 226 VXLAN_RUNLOCK(_sc, _p); \ 227 } while (0) 228 229 #define VXLAN_ACQUIRE(_sc) refcount_acquire(&(_sc)->vxl_refcnt) 230 #define VXLAN_RELEASE(_sc) refcount_release(&(_sc)->vxl_refcnt) 231 232 #define satoconstsin(sa) ((const struct sockaddr_in *)(sa)) 233 #define satoconstsin6(sa) ((const struct sockaddr_in6 *)(sa)) 234 235 struct vxlanudphdr { 236 struct udphdr vxlh_udp; 237 struct vxlan_header vxlh_hdr; 238 } __packed; 239 240 static int vxlan_ftable_addr_cmp(const uint8_t *, const uint8_t *); 241 static void vxlan_ftable_init(struct vxlan_softc *); 242 static void vxlan_ftable_fini(struct vxlan_softc *); 243 static void vxlan_ftable_flush(struct vxlan_softc *, int); 244 static void vxlan_ftable_expire(struct vxlan_softc *); 245 static int vxlan_ftable_update_locked(struct vxlan_softc *, 246 const union vxlan_sockaddr *, const uint8_t *, 247 struct rm_priotracker *); 248 static int vxlan_ftable_learn(struct vxlan_softc *, 249 const struct sockaddr *, const uint8_t *); 250 static int vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS); 251 252 static struct vxlan_ftable_entry * 253 vxlan_ftable_entry_alloc(void); 254 static void vxlan_ftable_entry_free(struct vxlan_ftable_entry *); 255 static void vxlan_ftable_entry_init(struct vxlan_softc *, 256 struct vxlan_ftable_entry *, const uint8_t *, 257 const struct sockaddr *, uint32_t); 258 static void vxlan_ftable_entry_destroy(struct vxlan_softc *, 259 struct vxlan_ftable_entry *); 260 static int vxlan_ftable_entry_insert(struct vxlan_softc *, 261 struct vxlan_ftable_entry *); 262 static struct vxlan_ftable_entry * 263 vxlan_ftable_entry_lookup(struct vxlan_softc *, 264 const uint8_t *); 265 static void vxlan_ftable_entry_dump(struct vxlan_ftable_entry *, 266 struct sbuf *); 267 268 static struct vxlan_socket * 269 vxlan_socket_alloc(const union vxlan_sockaddr *); 270 static void vxlan_socket_destroy(struct vxlan_socket *); 271 static void vxlan_socket_release(struct vxlan_socket *); 272 static struct vxlan_socket * 273 vxlan_socket_lookup(union vxlan_sockaddr *vxlsa); 274 static void vxlan_socket_insert(struct vxlan_socket *); 275 static int vxlan_socket_init(struct vxlan_socket *, struct ifnet *); 276 static int vxlan_socket_bind(struct vxlan_socket *, struct ifnet *); 277 static int vxlan_socket_create(struct ifnet *, int, 278 const union vxlan_sockaddr *, struct vxlan_socket **); 279 static void vxlan_socket_ifdetach(struct vxlan_socket *, 280 struct ifnet *, struct vxlan_softc_head *); 281 282 static struct vxlan_socket * 283 vxlan_socket_mc_lookup(const union vxlan_sockaddr *); 284 static int vxlan_sockaddr_mc_info_match( 285 const struct vxlan_socket_mc_info *, 286 const union vxlan_sockaddr *, 287 const union vxlan_sockaddr *, int); 288 static int vxlan_socket_mc_join_group(struct vxlan_socket *, 289 const union vxlan_sockaddr *, const union vxlan_sockaddr *, 290 int *, union vxlan_sockaddr *); 291 static int vxlan_socket_mc_leave_group(struct vxlan_socket *, 292 const union vxlan_sockaddr *, 293 const union vxlan_sockaddr *, int); 294 static int vxlan_socket_mc_add_group(struct vxlan_socket *, 295 const union vxlan_sockaddr *, const union vxlan_sockaddr *, 296 int, int *); 297 static void vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *, 298 int); 299 300 static struct vxlan_softc * 301 vxlan_socket_lookup_softc_locked(struct vxlan_socket *, 302 uint32_t); 303 static struct vxlan_softc * 304 vxlan_socket_lookup_softc(struct vxlan_socket *, uint32_t); 305 static int vxlan_socket_insert_softc(struct vxlan_socket *, 306 struct vxlan_softc *); 307 static void vxlan_socket_remove_softc(struct vxlan_socket *, 308 struct vxlan_softc *); 309 310 static struct ifnet * 311 vxlan_multicast_if_ref(struct vxlan_softc *, int); 312 static void vxlan_free_multicast(struct vxlan_softc *); 313 static int vxlan_setup_multicast_interface(struct vxlan_softc *); 314 315 static int vxlan_setup_multicast(struct vxlan_softc *); 316 static int vxlan_setup_socket(struct vxlan_softc *); 317 #ifdef INET6 318 static void vxlan_setup_zero_checksum_port(struct vxlan_softc *); 319 #endif 320 static void vxlan_setup_interface_hdrlen(struct vxlan_softc *); 321 static int vxlan_valid_init_config(struct vxlan_softc *); 322 static void vxlan_init_wait(struct vxlan_softc *); 323 static void vxlan_init_complete(struct vxlan_softc *); 324 static void vxlan_init(void *); 325 static void vxlan_release(struct vxlan_softc *); 326 static void vxlan_teardown_wait(struct vxlan_softc *); 327 static void vxlan_teardown_complete(struct vxlan_softc *); 328 static void vxlan_teardown_locked(struct vxlan_softc *); 329 static void vxlan_teardown(struct vxlan_softc *); 330 static void vxlan_ifdetach(struct vxlan_softc *, struct ifnet *, 331 struct vxlan_softc_head *); 332 static void vxlan_timer(void *); 333 334 static int vxlan_ctrl_get_config(struct vxlan_softc *, void *); 335 static int vxlan_ctrl_set_vni(struct vxlan_softc *, void *); 336 static int vxlan_ctrl_set_local_addr(struct vxlan_softc *, void *); 337 static int vxlan_ctrl_set_remote_addr(struct vxlan_softc *, void *); 338 static int vxlan_ctrl_set_local_port(struct vxlan_softc *, void *); 339 static int vxlan_ctrl_set_remote_port(struct vxlan_softc *, void *); 340 static int vxlan_ctrl_set_port_range(struct vxlan_softc *, void *); 341 static int vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *, void *); 342 static int vxlan_ctrl_set_ftable_max(struct vxlan_softc *, void *); 343 static int vxlan_ctrl_set_multicast_if(struct vxlan_softc * , void *); 344 static int vxlan_ctrl_set_ttl(struct vxlan_softc *, void *); 345 static int vxlan_ctrl_set_learn(struct vxlan_softc *, void *); 346 static int vxlan_ctrl_ftable_entry_add(struct vxlan_softc *, void *); 347 static int vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *, void *); 348 static int vxlan_ctrl_flush(struct vxlan_softc *, void *); 349 static int vxlan_ioctl_drvspec(struct vxlan_softc *, 350 struct ifdrv *, int); 351 static int vxlan_ioctl_ifflags(struct vxlan_softc *); 352 static int vxlan_ioctl(struct ifnet *, u_long, caddr_t); 353 354 #if defined(INET) || defined(INET6) 355 static uint16_t vxlan_pick_source_port(struct vxlan_softc *, struct mbuf *); 356 static void vxlan_encap_header(struct vxlan_softc *, struct mbuf *, 357 int, uint16_t, uint16_t); 358 #endif 359 static int vxlan_encap4(struct vxlan_softc *, 360 const union vxlan_sockaddr *, struct mbuf *); 361 static int vxlan_encap6(struct vxlan_softc *, 362 const union vxlan_sockaddr *, struct mbuf *); 363 static int vxlan_transmit(struct ifnet *, struct mbuf *); 364 static void vxlan_qflush(struct ifnet *); 365 static bool vxlan_rcv_udp_packet(struct mbuf *, int, struct inpcb *, 366 const struct sockaddr *, void *); 367 static int vxlan_input(struct vxlan_socket *, uint32_t, struct mbuf **, 368 const struct sockaddr *); 369 370 static void vxlan_stats_alloc(struct vxlan_softc *); 371 static void vxlan_stats_free(struct vxlan_softc *); 372 static void vxlan_set_default_config(struct vxlan_softc *); 373 static int vxlan_set_user_config(struct vxlan_softc *, 374 struct ifvxlanparam *); 375 static int vxlan_set_reqcap(struct vxlan_softc *, struct ifnet *, int); 376 static void vxlan_set_hwcaps(struct vxlan_softc *); 377 static int vxlan_clone_create(struct if_clone *, char *, size_t, 378 struct ifc_data *, struct ifnet **); 379 static int vxlan_clone_destroy(struct if_clone *, struct ifnet *, uint32_t); 380 381 static uint32_t vxlan_mac_hash(struct vxlan_softc *, const uint8_t *); 382 static int vxlan_media_change(struct ifnet *); 383 static void vxlan_media_status(struct ifnet *, struct ifmediareq *); 384 385 static int vxlan_sockaddr_cmp(const union vxlan_sockaddr *, 386 const struct sockaddr *); 387 static void vxlan_sockaddr_copy(union vxlan_sockaddr *, 388 const struct sockaddr *); 389 static int vxlan_sockaddr_in_equal(const union vxlan_sockaddr *, 390 const struct sockaddr *); 391 static void vxlan_sockaddr_in_copy(union vxlan_sockaddr *, 392 const struct sockaddr *); 393 static int vxlan_sockaddr_supported(const union vxlan_sockaddr *, int); 394 static int vxlan_sockaddr_in_any(const union vxlan_sockaddr *); 395 static int vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *); 396 static int vxlan_sockaddr_in6_embedscope(union vxlan_sockaddr *); 397 398 static int vxlan_can_change_config(struct vxlan_softc *); 399 static int vxlan_check_vni(uint32_t); 400 static int vxlan_check_ttl(int); 401 static int vxlan_check_ftable_timeout(uint32_t); 402 static int vxlan_check_ftable_max(uint32_t); 403 404 static void vxlan_sysctl_setup(struct vxlan_softc *); 405 static void vxlan_sysctl_destroy(struct vxlan_softc *); 406 static int vxlan_tunable_int(struct vxlan_softc *, const char *, int); 407 408 static void vxlan_ifdetach_event(void *, struct ifnet *); 409 static void vxlan_load(void); 410 static void vxlan_unload(void); 411 static int vxlan_modevent(module_t, int, void *); 412 413 static const char vxlan_name[] = "vxlan"; 414 static MALLOC_DEFINE(M_VXLAN, vxlan_name, 415 "Virtual eXtensible LAN Interface"); 416 static struct if_clone *vxlan_cloner; 417 418 static struct mtx vxlan_list_mtx; 419 #define VXLAN_LIST_LOCK() mtx_lock(&vxlan_list_mtx) 420 #define VXLAN_LIST_UNLOCK() mtx_unlock(&vxlan_list_mtx) 421 422 static LIST_HEAD(, vxlan_socket) vxlan_socket_list; 423 424 static eventhandler_tag vxlan_ifdetach_event_tag; 425 426 SYSCTL_DECL(_net_link); 427 SYSCTL_NODE(_net_link, OID_AUTO, vxlan, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 428 "Virtual eXtensible Local Area Network"); 429 430 static int vxlan_legacy_port = 0; 431 TUNABLE_INT("net.link.vxlan.legacy_port", &vxlan_legacy_port); 432 static int vxlan_reuse_port = 0; 433 TUNABLE_INT("net.link.vxlan.reuse_port", &vxlan_reuse_port); 434 435 /* 436 * This macro controls the default upper limitation on nesting of vxlan 437 * tunnels. By default it is 3, as the overhead of IPv6 vxlan tunnel is 70 438 * bytes, this will create at most 210 bytes overhead and the most inner 439 * tunnel's MTU will be 1290 which will meet IPv6 minimum MTU size 1280. 440 * Be careful to configure the tunnels when raising the limit. A large 441 * number of nested tunnels can introduce system crash. 442 */ 443 #ifndef MAX_VXLAN_NEST 444 #define MAX_VXLAN_NEST 3 445 #endif 446 static int max_vxlan_nesting = MAX_VXLAN_NEST; 447 SYSCTL_INT(_net_link_vxlan, OID_AUTO, max_nesting, CTLFLAG_RW, 448 &max_vxlan_nesting, 0, "Max nested tunnels"); 449 450 /* Default maximum number of addresses in the forwarding table. */ 451 #ifndef VXLAN_FTABLE_MAX 452 #define VXLAN_FTABLE_MAX 2000 453 #endif 454 455 /* Timeout (in seconds) of addresses learned in the forwarding table. */ 456 #ifndef VXLAN_FTABLE_TIMEOUT 457 #define VXLAN_FTABLE_TIMEOUT (20 * 60) 458 #endif 459 460 /* 461 * Maximum timeout (in seconds) of addresses learned in the forwarding 462 * table. 463 */ 464 #ifndef VXLAN_FTABLE_MAX_TIMEOUT 465 #define VXLAN_FTABLE_MAX_TIMEOUT (60 * 60 * 24) 466 #endif 467 468 /* Number of seconds between pruning attempts of the forwarding table. */ 469 #ifndef VXLAN_FTABLE_PRUNE 470 #define VXLAN_FTABLE_PRUNE (5 * 60) 471 #endif 472 473 static int vxlan_ftable_prune_period = VXLAN_FTABLE_PRUNE; 474 475 struct vxlan_control { 476 int (*vxlc_func)(struct vxlan_softc *, void *); 477 int vxlc_argsize; 478 int vxlc_flags; 479 #define VXLAN_CTRL_FLAG_COPYIN 0x01 480 #define VXLAN_CTRL_FLAG_COPYOUT 0x02 481 #define VXLAN_CTRL_FLAG_SUSER 0x04 482 }; 483 484 static const struct vxlan_control vxlan_control_table[] = { 485 [VXLAN_CMD_GET_CONFIG] = 486 { vxlan_ctrl_get_config, sizeof(struct ifvxlancfg), 487 VXLAN_CTRL_FLAG_COPYOUT 488 }, 489 490 [VXLAN_CMD_SET_VNI] = 491 { vxlan_ctrl_set_vni, sizeof(struct ifvxlancmd), 492 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 493 }, 494 495 [VXLAN_CMD_SET_LOCAL_ADDR] = 496 { vxlan_ctrl_set_local_addr, sizeof(struct ifvxlancmd), 497 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 498 }, 499 500 [VXLAN_CMD_SET_REMOTE_ADDR] = 501 { vxlan_ctrl_set_remote_addr, sizeof(struct ifvxlancmd), 502 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 503 }, 504 505 [VXLAN_CMD_SET_LOCAL_PORT] = 506 { vxlan_ctrl_set_local_port, sizeof(struct ifvxlancmd), 507 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 508 }, 509 510 [VXLAN_CMD_SET_REMOTE_PORT] = 511 { vxlan_ctrl_set_remote_port, sizeof(struct ifvxlancmd), 512 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 513 }, 514 515 [VXLAN_CMD_SET_PORT_RANGE] = 516 { vxlan_ctrl_set_port_range, sizeof(struct ifvxlancmd), 517 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 518 }, 519 520 [VXLAN_CMD_SET_FTABLE_TIMEOUT] = 521 { vxlan_ctrl_set_ftable_timeout, sizeof(struct ifvxlancmd), 522 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 523 }, 524 525 [VXLAN_CMD_SET_FTABLE_MAX] = 526 { vxlan_ctrl_set_ftable_max, sizeof(struct ifvxlancmd), 527 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 528 }, 529 530 [VXLAN_CMD_SET_MULTICAST_IF] = 531 { vxlan_ctrl_set_multicast_if, sizeof(struct ifvxlancmd), 532 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 533 }, 534 535 [VXLAN_CMD_SET_TTL] = 536 { vxlan_ctrl_set_ttl, sizeof(struct ifvxlancmd), 537 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 538 }, 539 540 [VXLAN_CMD_SET_LEARN] = 541 { vxlan_ctrl_set_learn, sizeof(struct ifvxlancmd), 542 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 543 }, 544 545 [VXLAN_CMD_FTABLE_ENTRY_ADD] = 546 { vxlan_ctrl_ftable_entry_add, sizeof(struct ifvxlancmd), 547 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 548 }, 549 550 [VXLAN_CMD_FTABLE_ENTRY_REM] = 551 { vxlan_ctrl_ftable_entry_rem, sizeof(struct ifvxlancmd), 552 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 553 }, 554 555 [VXLAN_CMD_FLUSH] = 556 { vxlan_ctrl_flush, sizeof(struct ifvxlancmd), 557 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 558 }, 559 }; 560 561 static const int vxlan_control_table_size = nitems(vxlan_control_table); 562 563 static int 564 vxlan_ftable_addr_cmp(const uint8_t *a, const uint8_t *b) 565 { 566 int i, d; 567 568 for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) 569 d = ((int)a[i]) - ((int)b[i]); 570 571 return (d); 572 } 573 574 static void 575 vxlan_ftable_init(struct vxlan_softc *sc) 576 { 577 int i; 578 579 sc->vxl_ftable = malloc(sizeof(struct vxlan_ftable_head) * 580 VXLAN_SC_FTABLE_SIZE, M_VXLAN, M_ZERO | M_WAITOK); 581 582 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) 583 LIST_INIT(&sc->vxl_ftable[i]); 584 sc->vxl_ftable_hash_key = arc4random(); 585 } 586 587 static void 588 vxlan_ftable_fini(struct vxlan_softc *sc) 589 { 590 int i; 591 592 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) { 593 KASSERT(LIST_EMPTY(&sc->vxl_ftable[i]), 594 ("%s: vxlan %p ftable[%d] not empty", __func__, sc, i)); 595 } 596 MPASS(sc->vxl_ftable_cnt == 0); 597 598 free(sc->vxl_ftable, M_VXLAN); 599 sc->vxl_ftable = NULL; 600 } 601 602 static void 603 vxlan_ftable_flush(struct vxlan_softc *sc, int all) 604 { 605 struct vxlan_ftable_entry *fe, *tfe; 606 int i; 607 608 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) { 609 LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) { 610 if (all || VXLAN_FE_IS_DYNAMIC(fe)) 611 vxlan_ftable_entry_destroy(sc, fe); 612 } 613 } 614 } 615 616 static void 617 vxlan_ftable_expire(struct vxlan_softc *sc) 618 { 619 struct vxlan_ftable_entry *fe, *tfe; 620 int i; 621 622 VXLAN_LOCK_WASSERT(sc); 623 624 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) { 625 LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) { 626 if (VXLAN_FE_IS_DYNAMIC(fe) && 627 time_uptime >= fe->vxlfe_expire) 628 vxlan_ftable_entry_destroy(sc, fe); 629 } 630 } 631 } 632 633 static int 634 vxlan_ftable_update_locked(struct vxlan_softc *sc, 635 const union vxlan_sockaddr *vxlsa, const uint8_t *mac, 636 struct rm_priotracker *tracker) 637 { 638 struct vxlan_ftable_entry *fe; 639 int error __unused; 640 641 VXLAN_LOCK_ASSERT(sc); 642 643 again: 644 /* 645 * A forwarding entry for this MAC address might already exist. If 646 * so, update it, otherwise create a new one. We may have to upgrade 647 * the lock if we have to change or create an entry. 648 */ 649 fe = vxlan_ftable_entry_lookup(sc, mac); 650 if (fe != NULL) { 651 fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout; 652 653 if (!VXLAN_FE_IS_DYNAMIC(fe) || 654 vxlan_sockaddr_in_equal(&fe->vxlfe_raddr, &vxlsa->sa)) 655 return (0); 656 if (!VXLAN_LOCK_WOWNED(sc)) { 657 VXLAN_RUNLOCK(sc, tracker); 658 VXLAN_WLOCK(sc); 659 sc->vxl_stats.ftable_lock_upgrade_failed++; 660 goto again; 661 } 662 vxlan_sockaddr_in_copy(&fe->vxlfe_raddr, &vxlsa->sa); 663 return (0); 664 } 665 666 if (!VXLAN_LOCK_WOWNED(sc)) { 667 VXLAN_RUNLOCK(sc, tracker); 668 VXLAN_WLOCK(sc); 669 sc->vxl_stats.ftable_lock_upgrade_failed++; 670 goto again; 671 } 672 673 if (sc->vxl_ftable_cnt >= sc->vxl_ftable_max) { 674 sc->vxl_stats.ftable_nospace++; 675 return (ENOSPC); 676 } 677 678 fe = vxlan_ftable_entry_alloc(); 679 if (fe == NULL) 680 return (ENOMEM); 681 682 vxlan_ftable_entry_init(sc, fe, mac, &vxlsa->sa, VXLAN_FE_FLAG_DYNAMIC); 683 684 /* The prior lookup failed, so the insert should not. */ 685 error = vxlan_ftable_entry_insert(sc, fe); 686 MPASS(error == 0); 687 688 return (0); 689 } 690 691 static int 692 vxlan_ftable_learn(struct vxlan_softc *sc, const struct sockaddr *sa, 693 const uint8_t *mac) 694 { 695 struct rm_priotracker tracker; 696 union vxlan_sockaddr vxlsa; 697 int error; 698 699 /* 700 * The source port may be randomly selected by the remote host, so 701 * use the port of the default destination address. 702 */ 703 vxlan_sockaddr_copy(&vxlsa, sa); 704 vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port; 705 706 if (VXLAN_SOCKADDR_IS_IPV6(&vxlsa)) { 707 error = vxlan_sockaddr_in6_embedscope(&vxlsa); 708 if (error) 709 return (error); 710 } 711 712 VXLAN_RLOCK(sc, &tracker); 713 error = vxlan_ftable_update_locked(sc, &vxlsa, mac, &tracker); 714 VXLAN_UNLOCK(sc, &tracker); 715 716 return (error); 717 } 718 719 static int 720 vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS) 721 { 722 struct rm_priotracker tracker; 723 struct sbuf sb; 724 struct vxlan_softc *sc; 725 struct vxlan_ftable_entry *fe; 726 size_t size; 727 int i, error; 728 729 /* 730 * This is mostly intended for debugging during development. It is 731 * not practical to dump an entire large table this way. 732 */ 733 734 sc = arg1; 735 size = PAGE_SIZE; /* Calculate later. */ 736 737 sbuf_new(&sb, NULL, size, SBUF_FIXEDLEN); 738 sbuf_putc(&sb, '\n'); 739 740 VXLAN_RLOCK(sc, &tracker); 741 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) { 742 LIST_FOREACH(fe, &sc->vxl_ftable[i], vxlfe_hash) { 743 if (sbuf_error(&sb) != 0) 744 break; 745 vxlan_ftable_entry_dump(fe, &sb); 746 } 747 } 748 VXLAN_RUNLOCK(sc, &tracker); 749 750 if (sbuf_len(&sb) == 1) 751 sbuf_setpos(&sb, 0); 752 753 sbuf_finish(&sb); 754 error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 755 sbuf_delete(&sb); 756 757 return (error); 758 } 759 760 static struct vxlan_ftable_entry * 761 vxlan_ftable_entry_alloc(void) 762 { 763 struct vxlan_ftable_entry *fe; 764 765 fe = malloc(sizeof(*fe), M_VXLAN, M_ZERO | M_NOWAIT); 766 767 return (fe); 768 } 769 770 static void 771 vxlan_ftable_entry_free(struct vxlan_ftable_entry *fe) 772 { 773 774 free(fe, M_VXLAN); 775 } 776 777 static void 778 vxlan_ftable_entry_init(struct vxlan_softc *sc, struct vxlan_ftable_entry *fe, 779 const uint8_t *mac, const struct sockaddr *sa, uint32_t flags) 780 { 781 782 fe->vxlfe_flags = flags; 783 fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout; 784 memcpy(fe->vxlfe_mac, mac, ETHER_ADDR_LEN); 785 vxlan_sockaddr_copy(&fe->vxlfe_raddr, sa); 786 } 787 788 static void 789 vxlan_ftable_entry_destroy(struct vxlan_softc *sc, 790 struct vxlan_ftable_entry *fe) 791 { 792 793 sc->vxl_ftable_cnt--; 794 LIST_REMOVE(fe, vxlfe_hash); 795 vxlan_ftable_entry_free(fe); 796 } 797 798 static int 799 vxlan_ftable_entry_insert(struct vxlan_softc *sc, 800 struct vxlan_ftable_entry *fe) 801 { 802 struct vxlan_ftable_entry *lfe; 803 uint32_t hash; 804 int dir; 805 806 VXLAN_LOCK_WASSERT(sc); 807 hash = VXLAN_SC_FTABLE_HASH(sc, fe->vxlfe_mac); 808 809 lfe = LIST_FIRST(&sc->vxl_ftable[hash]); 810 if (lfe == NULL) { 811 LIST_INSERT_HEAD(&sc->vxl_ftable[hash], fe, vxlfe_hash); 812 goto out; 813 } 814 815 do { 816 dir = vxlan_ftable_addr_cmp(fe->vxlfe_mac, lfe->vxlfe_mac); 817 if (dir == 0) 818 return (EEXIST); 819 if (dir > 0) { 820 LIST_INSERT_BEFORE(lfe, fe, vxlfe_hash); 821 goto out; 822 } else if (LIST_NEXT(lfe, vxlfe_hash) == NULL) { 823 LIST_INSERT_AFTER(lfe, fe, vxlfe_hash); 824 goto out; 825 } else 826 lfe = LIST_NEXT(lfe, vxlfe_hash); 827 } while (lfe != NULL); 828 829 out: 830 sc->vxl_ftable_cnt++; 831 832 return (0); 833 } 834 835 static struct vxlan_ftable_entry * 836 vxlan_ftable_entry_lookup(struct vxlan_softc *sc, const uint8_t *mac) 837 { 838 struct vxlan_ftable_entry *fe; 839 uint32_t hash; 840 int dir; 841 842 VXLAN_LOCK_ASSERT(sc); 843 hash = VXLAN_SC_FTABLE_HASH(sc, mac); 844 845 LIST_FOREACH(fe, &sc->vxl_ftable[hash], vxlfe_hash) { 846 dir = vxlan_ftable_addr_cmp(mac, fe->vxlfe_mac); 847 if (dir == 0) 848 return (fe); 849 if (dir > 0) 850 break; 851 } 852 853 return (NULL); 854 } 855 856 static void 857 vxlan_ftable_entry_dump(struct vxlan_ftable_entry *fe, struct sbuf *sb) 858 { 859 char buf[64]; 860 const union vxlan_sockaddr *sa; 861 const void *addr; 862 int i, len, af, width; 863 864 sa = &fe->vxlfe_raddr; 865 af = sa->sa.sa_family; 866 len = sbuf_len(sb); 867 868 sbuf_printf(sb, "%c 0x%02X ", VXLAN_FE_IS_DYNAMIC(fe) ? 'D' : 'S', 869 fe->vxlfe_flags); 870 871 for (i = 0; i < ETHER_ADDR_LEN - 1; i++) 872 sbuf_printf(sb, "%02X:", fe->vxlfe_mac[i]); 873 sbuf_printf(sb, "%02X ", fe->vxlfe_mac[i]); 874 875 if (af == AF_INET) { 876 addr = &sa->in4.sin_addr; 877 width = INET_ADDRSTRLEN - 1; 878 } else { 879 addr = &sa->in6.sin6_addr; 880 width = INET6_ADDRSTRLEN - 1; 881 } 882 inet_ntop(af, addr, buf, sizeof(buf)); 883 sbuf_printf(sb, "%*s ", width, buf); 884 885 sbuf_printf(sb, "%08jd", (intmax_t)fe->vxlfe_expire); 886 887 sbuf_putc(sb, '\n'); 888 889 /* Truncate a partial line. */ 890 if (sbuf_error(sb) != 0) 891 sbuf_setpos(sb, len); 892 } 893 894 static struct vxlan_socket * 895 vxlan_socket_alloc(const union vxlan_sockaddr *sa) 896 { 897 struct vxlan_socket *vso; 898 int i; 899 900 vso = malloc(sizeof(*vso), M_VXLAN, M_WAITOK | M_ZERO); 901 rm_init(&vso->vxlso_lock, "vxlansorm"); 902 refcount_init(&vso->vxlso_refcnt, 0); 903 for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) 904 LIST_INIT(&vso->vxlso_vni_hash[i]); 905 vso->vxlso_laddr = *sa; 906 907 return (vso); 908 } 909 910 static void 911 vxlan_socket_destroy(struct vxlan_socket *vso) 912 { 913 struct socket *so; 914 #ifdef INVARIANTS 915 int i; 916 struct vxlan_socket_mc_info *mc; 917 918 for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) { 919 mc = &vso->vxlso_mc[i]; 920 KASSERT(mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC, 921 ("%s: socket %p mc[%d] still has address", 922 __func__, vso, i)); 923 } 924 925 for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) { 926 KASSERT(LIST_EMPTY(&vso->vxlso_vni_hash[i]), 927 ("%s: socket %p vni_hash[%d] not empty", 928 __func__, vso, i)); 929 } 930 #endif 931 so = vso->vxlso_sock; 932 if (so != NULL) { 933 vso->vxlso_sock = NULL; 934 soclose(so); 935 } 936 937 rm_destroy(&vso->vxlso_lock); 938 free(vso, M_VXLAN); 939 } 940 941 static void 942 vxlan_socket_release(struct vxlan_socket *vso) 943 { 944 int destroy; 945 946 VXLAN_LIST_LOCK(); 947 destroy = VXLAN_SO_RELEASE(vso); 948 if (destroy != 0) 949 LIST_REMOVE(vso, vxlso_entry); 950 VXLAN_LIST_UNLOCK(); 951 952 if (destroy != 0) 953 vxlan_socket_destroy(vso); 954 } 955 956 static struct vxlan_socket * 957 vxlan_socket_lookup(union vxlan_sockaddr *vxlsa) 958 { 959 struct vxlan_socket *vso; 960 961 VXLAN_LIST_LOCK(); 962 LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry) { 963 if (vxlan_sockaddr_cmp(&vso->vxlso_laddr, &vxlsa->sa) == 0) { 964 VXLAN_SO_ACQUIRE(vso); 965 break; 966 } 967 } 968 VXLAN_LIST_UNLOCK(); 969 970 return (vso); 971 } 972 973 static void 974 vxlan_socket_insert(struct vxlan_socket *vso) 975 { 976 977 VXLAN_LIST_LOCK(); 978 VXLAN_SO_ACQUIRE(vso); 979 LIST_INSERT_HEAD(&vxlan_socket_list, vso, vxlso_entry); 980 VXLAN_LIST_UNLOCK(); 981 } 982 983 static int 984 vxlan_socket_init(struct vxlan_socket *vso, struct ifnet *ifp) 985 { 986 struct thread *td; 987 int error; 988 989 td = curthread; 990 991 error = socreate(vso->vxlso_laddr.sa.sa_family, &vso->vxlso_sock, 992 SOCK_DGRAM, IPPROTO_UDP, td->td_ucred, td); 993 if (error) { 994 if_printf(ifp, "cannot create socket: %d\n", error); 995 return (error); 996 } 997 998 error = udp_set_kernel_tunneling(vso->vxlso_sock, 999 vxlan_rcv_udp_packet, NULL, vso); 1000 if (error) { 1001 if_printf(ifp, "cannot set tunneling function: %d\n", error); 1002 return (error); 1003 } 1004 1005 if (vxlan_reuse_port != 0) { 1006 struct sockopt sopt; 1007 int val = 1; 1008 1009 bzero(&sopt, sizeof(sopt)); 1010 sopt.sopt_dir = SOPT_SET; 1011 sopt.sopt_level = IPPROTO_IP; 1012 sopt.sopt_name = SO_REUSEPORT; 1013 sopt.sopt_val = &val; 1014 sopt.sopt_valsize = sizeof(val); 1015 error = sosetopt(vso->vxlso_sock, &sopt); 1016 if (error) { 1017 if_printf(ifp, 1018 "cannot set REUSEADDR socket opt: %d\n", error); 1019 return (error); 1020 } 1021 } 1022 1023 return (0); 1024 } 1025 1026 static int 1027 vxlan_socket_bind(struct vxlan_socket *vso, struct ifnet *ifp) 1028 { 1029 union vxlan_sockaddr laddr; 1030 struct thread *td; 1031 int error; 1032 1033 td = curthread; 1034 laddr = vso->vxlso_laddr; 1035 1036 error = sobind(vso->vxlso_sock, &laddr.sa, td); 1037 if (error) { 1038 if (error != EADDRINUSE) 1039 if_printf(ifp, "cannot bind socket: %d\n", error); 1040 return (error); 1041 } 1042 1043 return (0); 1044 } 1045 1046 static int 1047 vxlan_socket_create(struct ifnet *ifp, int multicast, 1048 const union vxlan_sockaddr *saddr, struct vxlan_socket **vsop) 1049 { 1050 union vxlan_sockaddr laddr; 1051 struct vxlan_socket *vso; 1052 int error; 1053 1054 laddr = *saddr; 1055 1056 /* 1057 * If this socket will be multicast, then only the local port 1058 * must be specified when binding. 1059 */ 1060 if (multicast != 0) { 1061 if (VXLAN_SOCKADDR_IS_IPV4(&laddr)) 1062 laddr.in4.sin_addr.s_addr = INADDR_ANY; 1063 #ifdef INET6 1064 else 1065 laddr.in6.sin6_addr = in6addr_any; 1066 #endif 1067 } 1068 1069 vso = vxlan_socket_alloc(&laddr); 1070 if (vso == NULL) 1071 return (ENOMEM); 1072 1073 error = vxlan_socket_init(vso, ifp); 1074 if (error) 1075 goto fail; 1076 1077 error = vxlan_socket_bind(vso, ifp); 1078 if (error) 1079 goto fail; 1080 1081 /* 1082 * There is a small window between the bind completing and 1083 * inserting the socket, so that a concurrent create may fail. 1084 * Let's not worry about that for now. 1085 */ 1086 vxlan_socket_insert(vso); 1087 *vsop = vso; 1088 1089 return (0); 1090 1091 fail: 1092 vxlan_socket_destroy(vso); 1093 1094 return (error); 1095 } 1096 1097 static void 1098 vxlan_socket_ifdetach(struct vxlan_socket *vso, struct ifnet *ifp, 1099 struct vxlan_softc_head *list) 1100 { 1101 struct rm_priotracker tracker; 1102 struct vxlan_softc *sc; 1103 int i; 1104 1105 VXLAN_SO_RLOCK(vso, &tracker); 1106 for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) { 1107 LIST_FOREACH(sc, &vso->vxlso_vni_hash[i], vxl_entry) 1108 vxlan_ifdetach(sc, ifp, list); 1109 } 1110 VXLAN_SO_RUNLOCK(vso, &tracker); 1111 } 1112 1113 static struct vxlan_socket * 1114 vxlan_socket_mc_lookup(const union vxlan_sockaddr *vxlsa) 1115 { 1116 union vxlan_sockaddr laddr; 1117 struct vxlan_socket *vso; 1118 1119 laddr = *vxlsa; 1120 1121 if (VXLAN_SOCKADDR_IS_IPV4(&laddr)) 1122 laddr.in4.sin_addr.s_addr = INADDR_ANY; 1123 #ifdef INET6 1124 else 1125 laddr.in6.sin6_addr = in6addr_any; 1126 #endif 1127 1128 vso = vxlan_socket_lookup(&laddr); 1129 1130 return (vso); 1131 } 1132 1133 static int 1134 vxlan_sockaddr_mc_info_match(const struct vxlan_socket_mc_info *mc, 1135 const union vxlan_sockaddr *group, const union vxlan_sockaddr *local, 1136 int ifidx) 1137 { 1138 1139 if (!vxlan_sockaddr_in_any(local) && 1140 !vxlan_sockaddr_in_equal(&mc->vxlsomc_saddr, &local->sa)) 1141 return (0); 1142 if (!vxlan_sockaddr_in_equal(&mc->vxlsomc_gaddr, &group->sa)) 1143 return (0); 1144 if (ifidx != 0 && ifidx != mc->vxlsomc_ifidx) 1145 return (0); 1146 1147 return (1); 1148 } 1149 1150 static int 1151 vxlan_socket_mc_join_group(struct vxlan_socket *vso, 1152 const union vxlan_sockaddr *group, const union vxlan_sockaddr *local, 1153 int *ifidx, union vxlan_sockaddr *source) 1154 { 1155 struct sockopt sopt; 1156 int error; 1157 1158 *source = *local; 1159 1160 if (VXLAN_SOCKADDR_IS_IPV4(group)) { 1161 struct ip_mreq mreq; 1162 1163 mreq.imr_multiaddr = group->in4.sin_addr; 1164 mreq.imr_interface = local->in4.sin_addr; 1165 1166 bzero(&sopt, sizeof(sopt)); 1167 sopt.sopt_dir = SOPT_SET; 1168 sopt.sopt_level = IPPROTO_IP; 1169 sopt.sopt_name = IP_ADD_MEMBERSHIP; 1170 sopt.sopt_val = &mreq; 1171 sopt.sopt_valsize = sizeof(mreq); 1172 error = sosetopt(vso->vxlso_sock, &sopt); 1173 if (error) 1174 return (error); 1175 1176 /* 1177 * BMV: Ideally, there would be a formal way for us to get 1178 * the local interface that was selected based on the 1179 * imr_interface address. We could then update *ifidx so 1180 * vxlan_sockaddr_mc_info_match() would return a match for 1181 * later creates that explicitly set the multicast interface. 1182 * 1183 * If we really need to, we can of course look in the INP's 1184 * membership list: 1185 * sotoinpcb(vso->vxlso_sock)->inp_moptions-> 1186 * imo_head[]->imf_inm->inm_ifp 1187 * similarly to imo_match_group(). 1188 */ 1189 source->in4.sin_addr = local->in4.sin_addr; 1190 1191 } else if (VXLAN_SOCKADDR_IS_IPV6(group)) { 1192 struct ipv6_mreq mreq; 1193 1194 mreq.ipv6mr_multiaddr = group->in6.sin6_addr; 1195 mreq.ipv6mr_interface = *ifidx; 1196 1197 bzero(&sopt, sizeof(sopt)); 1198 sopt.sopt_dir = SOPT_SET; 1199 sopt.sopt_level = IPPROTO_IPV6; 1200 sopt.sopt_name = IPV6_JOIN_GROUP; 1201 sopt.sopt_val = &mreq; 1202 sopt.sopt_valsize = sizeof(mreq); 1203 error = sosetopt(vso->vxlso_sock, &sopt); 1204 if (error) 1205 return (error); 1206 1207 /* 1208 * BMV: As with IPv4, we would really like to know what 1209 * interface in6p_lookup_mcast_ifp() selected. 1210 */ 1211 } else 1212 error = EAFNOSUPPORT; 1213 1214 return (error); 1215 } 1216 1217 static int 1218 vxlan_socket_mc_leave_group(struct vxlan_socket *vso, 1219 const union vxlan_sockaddr *group, const union vxlan_sockaddr *source, 1220 int ifidx) 1221 { 1222 struct sockopt sopt; 1223 int error; 1224 1225 bzero(&sopt, sizeof(sopt)); 1226 sopt.sopt_dir = SOPT_SET; 1227 1228 if (VXLAN_SOCKADDR_IS_IPV4(group)) { 1229 struct ip_mreq mreq; 1230 1231 mreq.imr_multiaddr = group->in4.sin_addr; 1232 mreq.imr_interface = source->in4.sin_addr; 1233 1234 sopt.sopt_level = IPPROTO_IP; 1235 sopt.sopt_name = IP_DROP_MEMBERSHIP; 1236 sopt.sopt_val = &mreq; 1237 sopt.sopt_valsize = sizeof(mreq); 1238 error = sosetopt(vso->vxlso_sock, &sopt); 1239 1240 } else if (VXLAN_SOCKADDR_IS_IPV6(group)) { 1241 struct ipv6_mreq mreq; 1242 1243 mreq.ipv6mr_multiaddr = group->in6.sin6_addr; 1244 mreq.ipv6mr_interface = ifidx; 1245 1246 sopt.sopt_level = IPPROTO_IPV6; 1247 sopt.sopt_name = IPV6_LEAVE_GROUP; 1248 sopt.sopt_val = &mreq; 1249 sopt.sopt_valsize = sizeof(mreq); 1250 error = sosetopt(vso->vxlso_sock, &sopt); 1251 1252 } else 1253 error = EAFNOSUPPORT; 1254 1255 return (error); 1256 } 1257 1258 static int 1259 vxlan_socket_mc_add_group(struct vxlan_socket *vso, 1260 const union vxlan_sockaddr *group, const union vxlan_sockaddr *local, 1261 int ifidx, int *idx) 1262 { 1263 union vxlan_sockaddr source; 1264 struct vxlan_socket_mc_info *mc; 1265 int i, empty, error; 1266 1267 /* 1268 * Within a socket, the same multicast group may be used by multiple 1269 * interfaces, each with a different network identifier. But a socket 1270 * may only join a multicast group once, so keep track of the users 1271 * here. 1272 */ 1273 1274 VXLAN_SO_WLOCK(vso); 1275 for (empty = 0, i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) { 1276 mc = &vso->vxlso_mc[i]; 1277 1278 if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) { 1279 empty++; 1280 continue; 1281 } 1282 1283 if (vxlan_sockaddr_mc_info_match(mc, group, local, ifidx)) 1284 goto out; 1285 } 1286 VXLAN_SO_WUNLOCK(vso); 1287 1288 if (empty == 0) 1289 return (ENOSPC); 1290 1291 error = vxlan_socket_mc_join_group(vso, group, local, &ifidx, &source); 1292 if (error) 1293 return (error); 1294 1295 VXLAN_SO_WLOCK(vso); 1296 for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) { 1297 mc = &vso->vxlso_mc[i]; 1298 1299 if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) { 1300 vxlan_sockaddr_copy(&mc->vxlsomc_gaddr, &group->sa); 1301 vxlan_sockaddr_copy(&mc->vxlsomc_saddr, &source.sa); 1302 mc->vxlsomc_ifidx = ifidx; 1303 goto out; 1304 } 1305 } 1306 VXLAN_SO_WUNLOCK(vso); 1307 1308 error = vxlan_socket_mc_leave_group(vso, group, &source, ifidx); 1309 MPASS(error == 0); 1310 1311 return (ENOSPC); 1312 1313 out: 1314 mc->vxlsomc_users++; 1315 VXLAN_SO_WUNLOCK(vso); 1316 1317 *idx = i; 1318 1319 return (0); 1320 } 1321 1322 static void 1323 vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *vso, int idx) 1324 { 1325 union vxlan_sockaddr group, source; 1326 struct vxlan_socket_mc_info *mc; 1327 int ifidx, leave; 1328 1329 KASSERT(idx >= 0 && idx < VXLAN_SO_MC_MAX_GROUPS, 1330 ("%s: vso %p idx %d out of bounds", __func__, vso, idx)); 1331 1332 leave = 0; 1333 mc = &vso->vxlso_mc[idx]; 1334 1335 VXLAN_SO_WLOCK(vso); 1336 mc->vxlsomc_users--; 1337 if (mc->vxlsomc_users == 0) { 1338 group = mc->vxlsomc_gaddr; 1339 source = mc->vxlsomc_saddr; 1340 ifidx = mc->vxlsomc_ifidx; 1341 bzero(mc, sizeof(*mc)); 1342 leave = 1; 1343 } 1344 VXLAN_SO_WUNLOCK(vso); 1345 1346 if (leave != 0) { 1347 /* 1348 * Our socket's membership in this group may have already 1349 * been removed if we joined through an interface that's 1350 * been detached. 1351 */ 1352 vxlan_socket_mc_leave_group(vso, &group, &source, ifidx); 1353 } 1354 } 1355 1356 static struct vxlan_softc * 1357 vxlan_socket_lookup_softc_locked(struct vxlan_socket *vso, uint32_t vni) 1358 { 1359 struct vxlan_softc *sc; 1360 uint32_t hash; 1361 1362 VXLAN_SO_LOCK_ASSERT(vso); 1363 hash = VXLAN_SO_VNI_HASH(vni); 1364 1365 LIST_FOREACH(sc, &vso->vxlso_vni_hash[hash], vxl_entry) { 1366 if (sc->vxl_vni == vni) { 1367 VXLAN_ACQUIRE(sc); 1368 break; 1369 } 1370 } 1371 1372 return (sc); 1373 } 1374 1375 static struct vxlan_softc * 1376 vxlan_socket_lookup_softc(struct vxlan_socket *vso, uint32_t vni) 1377 { 1378 struct rm_priotracker tracker; 1379 struct vxlan_softc *sc; 1380 1381 VXLAN_SO_RLOCK(vso, &tracker); 1382 sc = vxlan_socket_lookup_softc_locked(vso, vni); 1383 VXLAN_SO_RUNLOCK(vso, &tracker); 1384 1385 return (sc); 1386 } 1387 1388 static int 1389 vxlan_socket_insert_softc(struct vxlan_socket *vso, struct vxlan_softc *sc) 1390 { 1391 struct vxlan_softc *tsc; 1392 uint32_t vni, hash; 1393 1394 vni = sc->vxl_vni; 1395 hash = VXLAN_SO_VNI_HASH(vni); 1396 1397 VXLAN_SO_WLOCK(vso); 1398 tsc = vxlan_socket_lookup_softc_locked(vso, vni); 1399 if (tsc != NULL) { 1400 VXLAN_SO_WUNLOCK(vso); 1401 vxlan_release(tsc); 1402 return (EEXIST); 1403 } 1404 1405 VXLAN_ACQUIRE(sc); 1406 LIST_INSERT_HEAD(&vso->vxlso_vni_hash[hash], sc, vxl_entry); 1407 VXLAN_SO_WUNLOCK(vso); 1408 1409 return (0); 1410 } 1411 1412 static void 1413 vxlan_socket_remove_softc(struct vxlan_socket *vso, struct vxlan_softc *sc) 1414 { 1415 1416 VXLAN_SO_WLOCK(vso); 1417 LIST_REMOVE(sc, vxl_entry); 1418 VXLAN_SO_WUNLOCK(vso); 1419 1420 vxlan_release(sc); 1421 } 1422 1423 static struct ifnet * 1424 vxlan_multicast_if_ref(struct vxlan_softc *sc, int ipv4) 1425 { 1426 struct ifnet *ifp; 1427 1428 VXLAN_LOCK_ASSERT(sc); 1429 1430 if (ipv4 && sc->vxl_im4o != NULL) 1431 ifp = sc->vxl_im4o->imo_multicast_ifp; 1432 else if (!ipv4 && sc->vxl_im6o != NULL) 1433 ifp = sc->vxl_im6o->im6o_multicast_ifp; 1434 else 1435 ifp = NULL; 1436 1437 if (ifp != NULL) 1438 if_ref(ifp); 1439 1440 return (ifp); 1441 } 1442 1443 static void 1444 vxlan_free_multicast(struct vxlan_softc *sc) 1445 { 1446 1447 if (sc->vxl_mc_ifp != NULL) { 1448 if_rele(sc->vxl_mc_ifp); 1449 sc->vxl_mc_ifp = NULL; 1450 sc->vxl_mc_ifindex = 0; 1451 } 1452 1453 if (sc->vxl_im4o != NULL) { 1454 free(sc->vxl_im4o, M_VXLAN); 1455 sc->vxl_im4o = NULL; 1456 } 1457 1458 if (sc->vxl_im6o != NULL) { 1459 free(sc->vxl_im6o, M_VXLAN); 1460 sc->vxl_im6o = NULL; 1461 } 1462 } 1463 1464 static int 1465 vxlan_setup_multicast_interface(struct vxlan_softc *sc) 1466 { 1467 struct ifnet *ifp; 1468 1469 ifp = ifunit_ref(sc->vxl_mc_ifname); 1470 if (ifp == NULL) { 1471 if_printf(sc->vxl_ifp, "multicast interface %s does " 1472 "not exist\n", sc->vxl_mc_ifname); 1473 return (ENOENT); 1474 } 1475 1476 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 1477 if_printf(sc->vxl_ifp, "interface %s does not support " 1478 "multicast\n", sc->vxl_mc_ifname); 1479 if_rele(ifp); 1480 return (ENOTSUP); 1481 } 1482 1483 sc->vxl_mc_ifp = ifp; 1484 sc->vxl_mc_ifindex = ifp->if_index; 1485 1486 return (0); 1487 } 1488 1489 static int 1490 vxlan_setup_multicast(struct vxlan_softc *sc) 1491 { 1492 const union vxlan_sockaddr *group; 1493 int error; 1494 1495 group = &sc->vxl_dst_addr; 1496 error = 0; 1497 1498 if (sc->vxl_mc_ifname[0] != '\0') { 1499 error = vxlan_setup_multicast_interface(sc); 1500 if (error) 1501 return (error); 1502 } 1503 1504 /* 1505 * Initialize an multicast options structure that is sufficiently 1506 * populated for use in the respective IP output routine. This 1507 * structure is typically stored in the socket, but our sockets 1508 * may be shared among multiple interfaces. 1509 */ 1510 if (VXLAN_SOCKADDR_IS_IPV4(group)) { 1511 sc->vxl_im4o = malloc(sizeof(struct ip_moptions), M_VXLAN, 1512 M_ZERO | M_WAITOK); 1513 sc->vxl_im4o->imo_multicast_ifp = sc->vxl_mc_ifp; 1514 sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl; 1515 sc->vxl_im4o->imo_multicast_vif = -1; 1516 } else if (VXLAN_SOCKADDR_IS_IPV6(group)) { 1517 sc->vxl_im6o = malloc(sizeof(struct ip6_moptions), M_VXLAN, 1518 M_ZERO | M_WAITOK); 1519 sc->vxl_im6o->im6o_multicast_ifp = sc->vxl_mc_ifp; 1520 sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl; 1521 } 1522 1523 return (error); 1524 } 1525 1526 static int 1527 vxlan_setup_socket(struct vxlan_softc *sc) 1528 { 1529 struct vxlan_socket *vso; 1530 struct ifnet *ifp; 1531 union vxlan_sockaddr *saddr, *daddr; 1532 int multicast, error; 1533 1534 vso = NULL; 1535 ifp = sc->vxl_ifp; 1536 saddr = &sc->vxl_src_addr; 1537 daddr = &sc->vxl_dst_addr; 1538 1539 multicast = vxlan_sockaddr_in_multicast(daddr); 1540 MPASS(multicast != -1); 1541 sc->vxl_vso_mc_index = -1; 1542 1543 /* 1544 * Try to create the socket. If that fails, attempt to use an 1545 * existing socket. 1546 */ 1547 error = vxlan_socket_create(ifp, multicast, saddr, &vso); 1548 if (error) { 1549 if (multicast != 0) 1550 vso = vxlan_socket_mc_lookup(saddr); 1551 else 1552 vso = vxlan_socket_lookup(saddr); 1553 1554 if (vso == NULL) { 1555 if_printf(ifp, "cannot create socket (error: %d), " 1556 "and no existing socket found\n", error); 1557 goto out; 1558 } 1559 } 1560 1561 if (multicast != 0) { 1562 error = vxlan_setup_multicast(sc); 1563 if (error) 1564 goto out; 1565 1566 error = vxlan_socket_mc_add_group(vso, daddr, saddr, 1567 sc->vxl_mc_ifindex, &sc->vxl_vso_mc_index); 1568 if (error) 1569 goto out; 1570 } 1571 1572 sc->vxl_sock = vso; 1573 error = vxlan_socket_insert_softc(vso, sc); 1574 if (error) { 1575 sc->vxl_sock = NULL; 1576 if_printf(ifp, "network identifier %d already exists in " 1577 "this socket\n", sc->vxl_vni); 1578 goto out; 1579 } 1580 1581 return (0); 1582 1583 out: 1584 if (vso != NULL) { 1585 if (sc->vxl_vso_mc_index != -1) { 1586 vxlan_socket_mc_release_group_by_idx(vso, 1587 sc->vxl_vso_mc_index); 1588 sc->vxl_vso_mc_index = -1; 1589 } 1590 if (multicast != 0) 1591 vxlan_free_multicast(sc); 1592 vxlan_socket_release(vso); 1593 } 1594 1595 return (error); 1596 } 1597 1598 #ifdef INET6 1599 static void 1600 vxlan_setup_zero_checksum_port(struct vxlan_softc *sc) 1601 { 1602 1603 if (!VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_src_addr)) 1604 return; 1605 1606 MPASS(sc->vxl_src_addr.in6.sin6_port != 0); 1607 MPASS(sc->vxl_dst_addr.in6.sin6_port != 0); 1608 1609 if (sc->vxl_src_addr.in6.sin6_port != sc->vxl_dst_addr.in6.sin6_port) { 1610 if_printf(sc->vxl_ifp, "port %d in src address does not match " 1611 "port %d in dst address, rfc6935_port (%d) not updated.\n", 1612 ntohs(sc->vxl_src_addr.in6.sin6_port), 1613 ntohs(sc->vxl_dst_addr.in6.sin6_port), 1614 V_zero_checksum_port); 1615 return; 1616 } 1617 1618 if (V_zero_checksum_port != 0) { 1619 if (V_zero_checksum_port != 1620 ntohs(sc->vxl_src_addr.in6.sin6_port)) { 1621 if_printf(sc->vxl_ifp, "rfc6935_port is already set to " 1622 "%d, cannot set it to %d.\n", V_zero_checksum_port, 1623 ntohs(sc->vxl_src_addr.in6.sin6_port)); 1624 } 1625 return; 1626 } 1627 1628 V_zero_checksum_port = ntohs(sc->vxl_src_addr.in6.sin6_port); 1629 if_printf(sc->vxl_ifp, "rfc6935_port set to %d\n", 1630 V_zero_checksum_port); 1631 } 1632 #endif 1633 1634 static void 1635 vxlan_setup_interface_hdrlen(struct vxlan_softc *sc) 1636 { 1637 struct ifnet *ifp; 1638 1639 VXLAN_LOCK_WASSERT(sc); 1640 1641 ifp = sc->vxl_ifp; 1642 ifp->if_hdrlen = ETHER_HDR_LEN + sizeof(struct vxlanudphdr); 1643 1644 if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr) != 0) 1645 ifp->if_hdrlen += sizeof(struct ip); 1646 else if (VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_dst_addr) != 0) 1647 ifp->if_hdrlen += sizeof(struct ip6_hdr); 1648 1649 if ((sc->vxl_flags & VXLAN_FLAG_USER_MTU) == 0) 1650 ifp->if_mtu = ETHERMTU - ifp->if_hdrlen; 1651 } 1652 1653 static int 1654 vxlan_valid_init_config(struct vxlan_softc *sc) 1655 { 1656 const char *reason; 1657 1658 if (vxlan_check_vni(sc->vxl_vni) != 0) { 1659 reason = "invalid virtual network identifier specified"; 1660 goto fail; 1661 } 1662 1663 if (vxlan_sockaddr_supported(&sc->vxl_src_addr, 1) == 0) { 1664 reason = "source address type is not supported"; 1665 goto fail; 1666 } 1667 1668 if (vxlan_sockaddr_supported(&sc->vxl_dst_addr, 0) == 0) { 1669 reason = "destination address type is not supported"; 1670 goto fail; 1671 } 1672 1673 if (vxlan_sockaddr_in_any(&sc->vxl_dst_addr) != 0) { 1674 reason = "no valid destination address specified"; 1675 goto fail; 1676 } 1677 1678 if (vxlan_sockaddr_in_multicast(&sc->vxl_dst_addr) == 0 && 1679 sc->vxl_mc_ifname[0] != '\0') { 1680 reason = "can only specify interface with a group address"; 1681 goto fail; 1682 } 1683 1684 if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) { 1685 if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_src_addr) ^ 1686 VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr)) { 1687 reason = "source and destination address must both " 1688 "be either IPv4 or IPv6"; 1689 goto fail; 1690 } 1691 } 1692 1693 if (sc->vxl_src_addr.in4.sin_port == 0) { 1694 reason = "local port not specified"; 1695 goto fail; 1696 } 1697 1698 if (sc->vxl_dst_addr.in4.sin_port == 0) { 1699 reason = "remote port not specified"; 1700 goto fail; 1701 } 1702 1703 return (0); 1704 1705 fail: 1706 if_printf(sc->vxl_ifp, "cannot initialize interface: %s\n", reason); 1707 return (EINVAL); 1708 } 1709 1710 static void 1711 vxlan_init_wait(struct vxlan_softc *sc) 1712 { 1713 1714 VXLAN_LOCK_WASSERT(sc); 1715 while (sc->vxl_flags & VXLAN_FLAG_INIT) 1716 rm_sleep(sc, &sc->vxl_lock, 0, "vxlint", hz); 1717 } 1718 1719 static void 1720 vxlan_init_complete(struct vxlan_softc *sc) 1721 { 1722 1723 VXLAN_WLOCK(sc); 1724 sc->vxl_flags &= ~VXLAN_FLAG_INIT; 1725 wakeup(sc); 1726 VXLAN_WUNLOCK(sc); 1727 } 1728 1729 static void 1730 vxlan_init(void *xsc) 1731 { 1732 static const uint8_t empty_mac[ETHER_ADDR_LEN]; 1733 struct vxlan_softc *sc; 1734 struct ifnet *ifp; 1735 1736 sc = xsc; 1737 ifp = sc->vxl_ifp; 1738 1739 sx_xlock(&vxlan_sx); 1740 VXLAN_WLOCK(sc); 1741 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1742 VXLAN_WUNLOCK(sc); 1743 sx_xunlock(&vxlan_sx); 1744 return; 1745 } 1746 sc->vxl_flags |= VXLAN_FLAG_INIT; 1747 VXLAN_WUNLOCK(sc); 1748 1749 if (vxlan_valid_init_config(sc) != 0) 1750 goto out; 1751 1752 if (vxlan_setup_socket(sc) != 0) 1753 goto out; 1754 1755 #ifdef INET6 1756 vxlan_setup_zero_checksum_port(sc); 1757 #endif 1758 1759 /* Initialize the default forwarding entry. */ 1760 vxlan_ftable_entry_init(sc, &sc->vxl_default_fe, empty_mac, 1761 &sc->vxl_dst_addr.sa, VXLAN_FE_FLAG_STATIC); 1762 1763 VXLAN_WLOCK(sc); 1764 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1765 callout_reset(&sc->vxl_callout, vxlan_ftable_prune_period * hz, 1766 vxlan_timer, sc); 1767 VXLAN_WUNLOCK(sc); 1768 1769 if_link_state_change(ifp, LINK_STATE_UP); 1770 1771 EVENTHANDLER_INVOKE(vxlan_start, ifp, sc->vxl_src_addr.in4.sin_family, 1772 ntohs(sc->vxl_src_addr.in4.sin_port)); 1773 out: 1774 vxlan_init_complete(sc); 1775 sx_xunlock(&vxlan_sx); 1776 } 1777 1778 static void 1779 vxlan_release(struct vxlan_softc *sc) 1780 { 1781 1782 /* 1783 * The softc may be destroyed as soon as we release our reference, 1784 * so we cannot serialize the wakeup with the softc lock. We use a 1785 * timeout in our sleeps so a missed wakeup is unfortunate but not 1786 * fatal. 1787 */ 1788 if (VXLAN_RELEASE(sc) != 0) 1789 wakeup(sc); 1790 } 1791 1792 static void 1793 vxlan_teardown_wait(struct vxlan_softc *sc) 1794 { 1795 1796 VXLAN_LOCK_WASSERT(sc); 1797 while (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) 1798 rm_sleep(sc, &sc->vxl_lock, 0, "vxltrn", hz); 1799 } 1800 1801 static void 1802 vxlan_teardown_complete(struct vxlan_softc *sc) 1803 { 1804 1805 VXLAN_WLOCK(sc); 1806 sc->vxl_flags &= ~VXLAN_FLAG_TEARDOWN; 1807 wakeup(sc); 1808 VXLAN_WUNLOCK(sc); 1809 } 1810 1811 static void 1812 vxlan_teardown_locked(struct vxlan_softc *sc) 1813 { 1814 struct ifnet *ifp; 1815 struct vxlan_socket *vso; 1816 bool running; 1817 1818 sx_assert(&vxlan_sx, SA_XLOCKED); 1819 VXLAN_LOCK_WASSERT(sc); 1820 MPASS(sc->vxl_flags & VXLAN_FLAG_TEARDOWN); 1821 1822 ifp = sc->vxl_ifp; 1823 ifp->if_flags &= ~IFF_UP; 1824 running = (ifp->if_drv_flags & IFF_DRV_RUNNING) != 0; 1825 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1826 callout_stop(&sc->vxl_callout); 1827 vso = sc->vxl_sock; 1828 sc->vxl_sock = NULL; 1829 1830 VXLAN_WUNLOCK(sc); 1831 if_link_state_change(ifp, LINK_STATE_DOWN); 1832 if (running) 1833 EVENTHANDLER_INVOKE(vxlan_stop, ifp, 1834 sc->vxl_src_addr.in4.sin_family, 1835 ntohs(sc->vxl_src_addr.in4.sin_port)); 1836 1837 if (vso != NULL) { 1838 vxlan_socket_remove_softc(vso, sc); 1839 1840 if (sc->vxl_vso_mc_index != -1) { 1841 vxlan_socket_mc_release_group_by_idx(vso, 1842 sc->vxl_vso_mc_index); 1843 sc->vxl_vso_mc_index = -1; 1844 } 1845 } 1846 1847 VXLAN_WLOCK(sc); 1848 while (sc->vxl_refcnt != 0) 1849 rm_sleep(sc, &sc->vxl_lock, 0, "vxldrn", hz); 1850 VXLAN_WUNLOCK(sc); 1851 1852 callout_drain(&sc->vxl_callout); 1853 1854 vxlan_free_multicast(sc); 1855 if (vso != NULL) 1856 vxlan_socket_release(vso); 1857 1858 vxlan_teardown_complete(sc); 1859 } 1860 1861 static void 1862 vxlan_teardown(struct vxlan_softc *sc) 1863 { 1864 1865 sx_xlock(&vxlan_sx); 1866 VXLAN_WLOCK(sc); 1867 if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) { 1868 vxlan_teardown_wait(sc); 1869 VXLAN_WUNLOCK(sc); 1870 sx_xunlock(&vxlan_sx); 1871 return; 1872 } 1873 1874 sc->vxl_flags |= VXLAN_FLAG_TEARDOWN; 1875 vxlan_teardown_locked(sc); 1876 sx_xunlock(&vxlan_sx); 1877 } 1878 1879 static void 1880 vxlan_ifdetach(struct vxlan_softc *sc, struct ifnet *ifp, 1881 struct vxlan_softc_head *list) 1882 { 1883 1884 VXLAN_WLOCK(sc); 1885 1886 if (sc->vxl_mc_ifp != ifp) 1887 goto out; 1888 if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) 1889 goto out; 1890 1891 sc->vxl_flags |= VXLAN_FLAG_TEARDOWN; 1892 LIST_INSERT_HEAD(list, sc, vxl_ifdetach_list); 1893 1894 out: 1895 VXLAN_WUNLOCK(sc); 1896 } 1897 1898 static void 1899 vxlan_timer(void *xsc) 1900 { 1901 struct vxlan_softc *sc; 1902 1903 sc = xsc; 1904 VXLAN_LOCK_WASSERT(sc); 1905 1906 vxlan_ftable_expire(sc); 1907 callout_schedule(&sc->vxl_callout, vxlan_ftable_prune_period * hz); 1908 } 1909 1910 static int 1911 vxlan_ioctl_ifflags(struct vxlan_softc *sc) 1912 { 1913 struct ifnet *ifp; 1914 1915 ifp = sc->vxl_ifp; 1916 1917 if (ifp->if_flags & IFF_UP) { 1918 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1919 vxlan_init(sc); 1920 } else { 1921 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 1922 vxlan_teardown(sc); 1923 } 1924 1925 return (0); 1926 } 1927 1928 static int 1929 vxlan_ctrl_get_config(struct vxlan_softc *sc, void *arg) 1930 { 1931 struct rm_priotracker tracker; 1932 struct ifvxlancfg *cfg; 1933 1934 cfg = arg; 1935 bzero(cfg, sizeof(*cfg)); 1936 1937 VXLAN_RLOCK(sc, &tracker); 1938 cfg->vxlc_vni = sc->vxl_vni; 1939 memcpy(&cfg->vxlc_local_sa, &sc->vxl_src_addr, 1940 sizeof(union vxlan_sockaddr)); 1941 memcpy(&cfg->vxlc_remote_sa, &sc->vxl_dst_addr, 1942 sizeof(union vxlan_sockaddr)); 1943 cfg->vxlc_mc_ifindex = sc->vxl_mc_ifindex; 1944 cfg->vxlc_ftable_cnt = sc->vxl_ftable_cnt; 1945 cfg->vxlc_ftable_max = sc->vxl_ftable_max; 1946 cfg->vxlc_ftable_timeout = sc->vxl_ftable_timeout; 1947 cfg->vxlc_port_min = sc->vxl_min_port; 1948 cfg->vxlc_port_max = sc->vxl_max_port; 1949 cfg->vxlc_learn = (sc->vxl_flags & VXLAN_FLAG_LEARN) != 0; 1950 cfg->vxlc_ttl = sc->vxl_ttl; 1951 VXLAN_RUNLOCK(sc, &tracker); 1952 1953 #ifdef INET6 1954 if (VXLAN_SOCKADDR_IS_IPV6(&cfg->vxlc_local_sa)) 1955 sa6_recoverscope(&cfg->vxlc_local_sa.in6); 1956 if (VXLAN_SOCKADDR_IS_IPV6(&cfg->vxlc_remote_sa)) 1957 sa6_recoverscope(&cfg->vxlc_remote_sa.in6); 1958 #endif 1959 1960 return (0); 1961 } 1962 1963 static int 1964 vxlan_ctrl_set_vni(struct vxlan_softc *sc, void *arg) 1965 { 1966 struct ifvxlancmd *cmd; 1967 int error; 1968 1969 cmd = arg; 1970 1971 if (vxlan_check_vni(cmd->vxlcmd_vni) != 0) 1972 return (EINVAL); 1973 1974 VXLAN_WLOCK(sc); 1975 if (vxlan_can_change_config(sc)) { 1976 sc->vxl_vni = cmd->vxlcmd_vni; 1977 error = 0; 1978 } else 1979 error = EBUSY; 1980 VXLAN_WUNLOCK(sc); 1981 1982 return (error); 1983 } 1984 1985 static int 1986 vxlan_ctrl_set_local_addr(struct vxlan_softc *sc, void *arg) 1987 { 1988 struct ifvxlancmd *cmd; 1989 union vxlan_sockaddr *vxlsa; 1990 int error; 1991 1992 cmd = arg; 1993 vxlsa = &cmd->vxlcmd_sa; 1994 1995 if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa)) 1996 return (EINVAL); 1997 if (vxlan_sockaddr_in_multicast(vxlsa) != 0) 1998 return (EINVAL); 1999 if (VXLAN_SOCKADDR_IS_IPV6(vxlsa)) { 2000 error = vxlan_sockaddr_in6_embedscope(vxlsa); 2001 if (error) 2002 return (error); 2003 } 2004 2005 VXLAN_WLOCK(sc); 2006 if (vxlan_can_change_config(sc)) { 2007 vxlan_sockaddr_in_copy(&sc->vxl_src_addr, &vxlsa->sa); 2008 vxlan_set_hwcaps(sc); 2009 error = 0; 2010 } else 2011 error = EBUSY; 2012 VXLAN_WUNLOCK(sc); 2013 2014 return (error); 2015 } 2016 2017 static int 2018 vxlan_ctrl_set_remote_addr(struct vxlan_softc *sc, void *arg) 2019 { 2020 struct ifvxlancmd *cmd; 2021 union vxlan_sockaddr *vxlsa; 2022 int error; 2023 2024 cmd = arg; 2025 vxlsa = &cmd->vxlcmd_sa; 2026 2027 if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa)) 2028 return (EINVAL); 2029 if (VXLAN_SOCKADDR_IS_IPV6(vxlsa)) { 2030 error = vxlan_sockaddr_in6_embedscope(vxlsa); 2031 if (error) 2032 return (error); 2033 } 2034 2035 VXLAN_WLOCK(sc); 2036 if (vxlan_can_change_config(sc)) { 2037 vxlan_sockaddr_in_copy(&sc->vxl_dst_addr, &vxlsa->sa); 2038 vxlan_setup_interface_hdrlen(sc); 2039 error = 0; 2040 } else 2041 error = EBUSY; 2042 VXLAN_WUNLOCK(sc); 2043 2044 return (error); 2045 } 2046 2047 static int 2048 vxlan_ctrl_set_local_port(struct vxlan_softc *sc, void *arg) 2049 { 2050 struct ifvxlancmd *cmd; 2051 int error; 2052 2053 cmd = arg; 2054 2055 if (cmd->vxlcmd_port == 0) 2056 return (EINVAL); 2057 2058 VXLAN_WLOCK(sc); 2059 if (vxlan_can_change_config(sc)) { 2060 sc->vxl_src_addr.in4.sin_port = htons(cmd->vxlcmd_port); 2061 error = 0; 2062 } else 2063 error = EBUSY; 2064 VXLAN_WUNLOCK(sc); 2065 2066 return (error); 2067 } 2068 2069 static int 2070 vxlan_ctrl_set_remote_port(struct vxlan_softc *sc, void *arg) 2071 { 2072 struct ifvxlancmd *cmd; 2073 int error; 2074 2075 cmd = arg; 2076 2077 if (cmd->vxlcmd_port == 0) 2078 return (EINVAL); 2079 2080 VXLAN_WLOCK(sc); 2081 if (vxlan_can_change_config(sc)) { 2082 sc->vxl_dst_addr.in4.sin_port = htons(cmd->vxlcmd_port); 2083 error = 0; 2084 } else 2085 error = EBUSY; 2086 VXLAN_WUNLOCK(sc); 2087 2088 return (error); 2089 } 2090 2091 static int 2092 vxlan_ctrl_set_port_range(struct vxlan_softc *sc, void *arg) 2093 { 2094 struct ifvxlancmd *cmd; 2095 uint16_t min, max; 2096 int error; 2097 2098 cmd = arg; 2099 min = cmd->vxlcmd_port_min; 2100 max = cmd->vxlcmd_port_max; 2101 2102 if (max < min) 2103 return (EINVAL); 2104 2105 VXLAN_WLOCK(sc); 2106 if (vxlan_can_change_config(sc)) { 2107 sc->vxl_min_port = min; 2108 sc->vxl_max_port = max; 2109 error = 0; 2110 } else 2111 error = EBUSY; 2112 VXLAN_WUNLOCK(sc); 2113 2114 return (error); 2115 } 2116 2117 static int 2118 vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *sc, void *arg) 2119 { 2120 struct ifvxlancmd *cmd; 2121 int error; 2122 2123 cmd = arg; 2124 2125 VXLAN_WLOCK(sc); 2126 if (vxlan_check_ftable_timeout(cmd->vxlcmd_ftable_timeout) == 0) { 2127 sc->vxl_ftable_timeout = cmd->vxlcmd_ftable_timeout; 2128 error = 0; 2129 } else 2130 error = EINVAL; 2131 VXLAN_WUNLOCK(sc); 2132 2133 return (error); 2134 } 2135 2136 static int 2137 vxlan_ctrl_set_ftable_max(struct vxlan_softc *sc, void *arg) 2138 { 2139 struct ifvxlancmd *cmd; 2140 int error; 2141 2142 cmd = arg; 2143 2144 VXLAN_WLOCK(sc); 2145 if (vxlan_check_ftable_max(cmd->vxlcmd_ftable_max) == 0) { 2146 sc->vxl_ftable_max = cmd->vxlcmd_ftable_max; 2147 error = 0; 2148 } else 2149 error = EINVAL; 2150 VXLAN_WUNLOCK(sc); 2151 2152 return (error); 2153 } 2154 2155 static int 2156 vxlan_ctrl_set_multicast_if(struct vxlan_softc * sc, void *arg) 2157 { 2158 struct ifvxlancmd *cmd; 2159 int error; 2160 2161 cmd = arg; 2162 2163 VXLAN_WLOCK(sc); 2164 if (vxlan_can_change_config(sc)) { 2165 strlcpy(sc->vxl_mc_ifname, cmd->vxlcmd_ifname, IFNAMSIZ); 2166 vxlan_set_hwcaps(sc); 2167 error = 0; 2168 } else 2169 error = EBUSY; 2170 VXLAN_WUNLOCK(sc); 2171 2172 return (error); 2173 } 2174 2175 static int 2176 vxlan_ctrl_set_ttl(struct vxlan_softc *sc, void *arg) 2177 { 2178 struct ifvxlancmd *cmd; 2179 int error; 2180 2181 cmd = arg; 2182 2183 VXLAN_WLOCK(sc); 2184 if (vxlan_check_ttl(cmd->vxlcmd_ttl) == 0) { 2185 sc->vxl_ttl = cmd->vxlcmd_ttl; 2186 if (sc->vxl_im4o != NULL) 2187 sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl; 2188 if (sc->vxl_im6o != NULL) 2189 sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl; 2190 error = 0; 2191 } else 2192 error = EINVAL; 2193 VXLAN_WUNLOCK(sc); 2194 2195 return (error); 2196 } 2197 2198 static int 2199 vxlan_ctrl_set_learn(struct vxlan_softc *sc, void *arg) 2200 { 2201 struct ifvxlancmd *cmd; 2202 2203 cmd = arg; 2204 2205 VXLAN_WLOCK(sc); 2206 if (cmd->vxlcmd_flags & VXLAN_CMD_FLAG_LEARN) 2207 sc->vxl_flags |= VXLAN_FLAG_LEARN; 2208 else 2209 sc->vxl_flags &= ~VXLAN_FLAG_LEARN; 2210 VXLAN_WUNLOCK(sc); 2211 2212 return (0); 2213 } 2214 2215 static int 2216 vxlan_ctrl_ftable_entry_add(struct vxlan_softc *sc, void *arg) 2217 { 2218 union vxlan_sockaddr vxlsa; 2219 struct ifvxlancmd *cmd; 2220 struct vxlan_ftable_entry *fe; 2221 int error; 2222 2223 cmd = arg; 2224 vxlsa = cmd->vxlcmd_sa; 2225 2226 if (!VXLAN_SOCKADDR_IS_IPV46(&vxlsa)) 2227 return (EINVAL); 2228 if (vxlan_sockaddr_in_any(&vxlsa) != 0) 2229 return (EINVAL); 2230 if (vxlan_sockaddr_in_multicast(&vxlsa) != 0) 2231 return (EINVAL); 2232 /* BMV: We could support both IPv4 and IPv6 later. */ 2233 if (vxlsa.sa.sa_family != sc->vxl_dst_addr.sa.sa_family) 2234 return (EAFNOSUPPORT); 2235 2236 if (VXLAN_SOCKADDR_IS_IPV6(&vxlsa)) { 2237 error = vxlan_sockaddr_in6_embedscope(&vxlsa); 2238 if (error) 2239 return (error); 2240 } 2241 2242 fe = vxlan_ftable_entry_alloc(); 2243 if (fe == NULL) 2244 return (ENOMEM); 2245 2246 if (vxlsa.in4.sin_port == 0) 2247 vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port; 2248 2249 vxlan_ftable_entry_init(sc, fe, cmd->vxlcmd_mac, &vxlsa.sa, 2250 VXLAN_FE_FLAG_STATIC); 2251 2252 VXLAN_WLOCK(sc); 2253 error = vxlan_ftable_entry_insert(sc, fe); 2254 VXLAN_WUNLOCK(sc); 2255 2256 if (error) 2257 vxlan_ftable_entry_free(fe); 2258 2259 return (error); 2260 } 2261 2262 static int 2263 vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *sc, void *arg) 2264 { 2265 struct ifvxlancmd *cmd; 2266 struct vxlan_ftable_entry *fe; 2267 int error; 2268 2269 cmd = arg; 2270 2271 VXLAN_WLOCK(sc); 2272 fe = vxlan_ftable_entry_lookup(sc, cmd->vxlcmd_mac); 2273 if (fe != NULL) { 2274 vxlan_ftable_entry_destroy(sc, fe); 2275 error = 0; 2276 } else 2277 error = ENOENT; 2278 VXLAN_WUNLOCK(sc); 2279 2280 return (error); 2281 } 2282 2283 static int 2284 vxlan_ctrl_flush(struct vxlan_softc *sc, void *arg) 2285 { 2286 struct ifvxlancmd *cmd; 2287 int all; 2288 2289 cmd = arg; 2290 all = cmd->vxlcmd_flags & VXLAN_CMD_FLAG_FLUSH_ALL; 2291 2292 VXLAN_WLOCK(sc); 2293 vxlan_ftable_flush(sc, all); 2294 VXLAN_WUNLOCK(sc); 2295 2296 return (0); 2297 } 2298 2299 static int 2300 vxlan_ioctl_drvspec(struct vxlan_softc *sc, struct ifdrv *ifd, int get) 2301 { 2302 const struct vxlan_control *vc; 2303 union { 2304 struct ifvxlancfg cfg; 2305 struct ifvxlancmd cmd; 2306 } args; 2307 int out, error; 2308 2309 if (ifd->ifd_cmd >= vxlan_control_table_size) 2310 return (EINVAL); 2311 2312 bzero(&args, sizeof(args)); 2313 vc = &vxlan_control_table[ifd->ifd_cmd]; 2314 out = (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) != 0; 2315 2316 if ((get != 0 && out == 0) || (get == 0 && out != 0)) 2317 return (EINVAL); 2318 2319 if (vc->vxlc_flags & VXLAN_CTRL_FLAG_SUSER) { 2320 error = priv_check(curthread, PRIV_NET_VXLAN); 2321 if (error) 2322 return (error); 2323 } 2324 2325 if (ifd->ifd_len != vc->vxlc_argsize || 2326 ifd->ifd_len > sizeof(args)) 2327 return (EINVAL); 2328 2329 if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYIN) { 2330 error = copyin(ifd->ifd_data, &args, ifd->ifd_len); 2331 if (error) 2332 return (error); 2333 } 2334 2335 error = vc->vxlc_func(sc, &args); 2336 if (error) 2337 return (error); 2338 2339 if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) { 2340 error = copyout(&args, ifd->ifd_data, ifd->ifd_len); 2341 if (error) 2342 return (error); 2343 } 2344 2345 return (0); 2346 } 2347 2348 static int 2349 vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 2350 { 2351 struct rm_priotracker tracker; 2352 struct vxlan_softc *sc; 2353 struct ifreq *ifr; 2354 struct ifdrv *ifd; 2355 int error; 2356 2357 sc = ifp->if_softc; 2358 ifr = (struct ifreq *) data; 2359 ifd = (struct ifdrv *) data; 2360 2361 error = 0; 2362 2363 switch (cmd) { 2364 case SIOCADDMULTI: 2365 case SIOCDELMULTI: 2366 break; 2367 2368 case SIOCGDRVSPEC: 2369 case SIOCSDRVSPEC: 2370 error = vxlan_ioctl_drvspec(sc, ifd, cmd == SIOCGDRVSPEC); 2371 break; 2372 2373 case SIOCSIFFLAGS: 2374 error = vxlan_ioctl_ifflags(sc); 2375 break; 2376 2377 case SIOCSIFMEDIA: 2378 case SIOCGIFMEDIA: 2379 error = ifmedia_ioctl(ifp, ifr, &sc->vxl_media, cmd); 2380 break; 2381 2382 case SIOCSIFMTU: 2383 if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > VXLAN_MAX_MTU) { 2384 error = EINVAL; 2385 } else { 2386 VXLAN_WLOCK(sc); 2387 ifp->if_mtu = ifr->ifr_mtu; 2388 sc->vxl_flags |= VXLAN_FLAG_USER_MTU; 2389 VXLAN_WUNLOCK(sc); 2390 } 2391 break; 2392 2393 case SIOCSIFCAP: 2394 VXLAN_WLOCK(sc); 2395 error = vxlan_set_reqcap(sc, ifp, ifr->ifr_reqcap); 2396 if (error == 0) 2397 vxlan_set_hwcaps(sc); 2398 VXLAN_WUNLOCK(sc); 2399 break; 2400 2401 case SIOCGTUNFIB: 2402 VXLAN_RLOCK(sc, &tracker); 2403 ifr->ifr_fib = sc->vxl_fibnum; 2404 VXLAN_RUNLOCK(sc, &tracker); 2405 break; 2406 2407 case SIOCSTUNFIB: 2408 if ((error = priv_check(curthread, PRIV_NET_VXLAN)) != 0) 2409 break; 2410 2411 if (ifr->ifr_fib >= rt_numfibs) 2412 error = EINVAL; 2413 else { 2414 VXLAN_WLOCK(sc); 2415 sc->vxl_fibnum = ifr->ifr_fib; 2416 VXLAN_WUNLOCK(sc); 2417 } 2418 break; 2419 2420 default: 2421 error = ether_ioctl(ifp, cmd, data); 2422 break; 2423 } 2424 2425 return (error); 2426 } 2427 2428 #if defined(INET) || defined(INET6) 2429 static uint16_t 2430 vxlan_pick_source_port(struct vxlan_softc *sc, struct mbuf *m) 2431 { 2432 int range; 2433 uint32_t hash; 2434 2435 range = sc->vxl_max_port - sc->vxl_min_port + 1; 2436 2437 if (M_HASHTYPE_ISHASH(m)) 2438 hash = m->m_pkthdr.flowid; 2439 else 2440 hash = jenkins_hash(m->m_data, ETHER_HDR_LEN, 2441 sc->vxl_port_hash_key); 2442 2443 return (sc->vxl_min_port + (hash % range)); 2444 } 2445 2446 static void 2447 vxlan_encap_header(struct vxlan_softc *sc, struct mbuf *m, int ipoff, 2448 uint16_t srcport, uint16_t dstport) 2449 { 2450 struct vxlanudphdr *hdr; 2451 struct udphdr *udph; 2452 struct vxlan_header *vxh; 2453 int len; 2454 2455 len = m->m_pkthdr.len - ipoff; 2456 MPASS(len >= sizeof(struct vxlanudphdr)); 2457 hdr = mtodo(m, ipoff); 2458 2459 udph = &hdr->vxlh_udp; 2460 udph->uh_sport = srcport; 2461 udph->uh_dport = dstport; 2462 udph->uh_ulen = htons(len); 2463 udph->uh_sum = 0; 2464 2465 vxh = &hdr->vxlh_hdr; 2466 vxh->vxlh_flags = htonl(VXLAN_HDR_FLAGS_VALID_VNI); 2467 vxh->vxlh_vni = htonl(sc->vxl_vni << VXLAN_HDR_VNI_SHIFT); 2468 } 2469 #endif 2470 2471 #if defined(INET6) || defined(INET) 2472 /* 2473 * Return the CSUM_INNER_* equivalent of CSUM_* caps. 2474 */ 2475 static uint32_t 2476 csum_flags_to_inner_flags(uint32_t csum_flags_in, const uint32_t encap) 2477 { 2478 uint32_t csum_flags = encap; 2479 const uint32_t v4 = CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP; 2480 2481 /* 2482 * csum_flags can request either v4 or v6 offload but not both. 2483 * tcp_output always sets CSUM_TSO (both CSUM_IP_TSO and CSUM_IP6_TSO) 2484 * so those bits are no good to detect the IP version. Other bits are 2485 * always set with CSUM_TSO and we use those to figure out the IP 2486 * version. 2487 */ 2488 if (csum_flags_in & v4) { 2489 if (csum_flags_in & CSUM_IP) 2490 csum_flags |= CSUM_INNER_IP; 2491 if (csum_flags_in & CSUM_IP_UDP) 2492 csum_flags |= CSUM_INNER_IP_UDP; 2493 if (csum_flags_in & CSUM_IP_TCP) 2494 csum_flags |= CSUM_INNER_IP_TCP; 2495 if (csum_flags_in & CSUM_IP_TSO) 2496 csum_flags |= CSUM_INNER_IP_TSO; 2497 } else { 2498 #ifdef INVARIANTS 2499 const uint32_t v6 = CSUM_IP6_UDP | CSUM_IP6_TCP; 2500 2501 MPASS((csum_flags_in & v6) != 0); 2502 #endif 2503 if (csum_flags_in & CSUM_IP6_UDP) 2504 csum_flags |= CSUM_INNER_IP6_UDP; 2505 if (csum_flags_in & CSUM_IP6_TCP) 2506 csum_flags |= CSUM_INNER_IP6_TCP; 2507 if (csum_flags_in & CSUM_IP6_TSO) 2508 csum_flags |= CSUM_INNER_IP6_TSO; 2509 } 2510 2511 return (csum_flags); 2512 } 2513 #endif 2514 2515 static int 2516 vxlan_encap4(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa, 2517 struct mbuf *m) 2518 { 2519 #ifdef INET 2520 struct ifnet *ifp; 2521 struct ip *ip; 2522 struct in_addr srcaddr, dstaddr; 2523 uint16_t srcport, dstport; 2524 int plen, mcast, error; 2525 struct route route, *ro; 2526 struct sockaddr_in *sin; 2527 uint32_t csum_flags; 2528 2529 NET_EPOCH_ASSERT(); 2530 2531 ifp = sc->vxl_ifp; 2532 srcaddr = sc->vxl_src_addr.in4.sin_addr; 2533 srcport = vxlan_pick_source_port(sc, m); 2534 dstaddr = fvxlsa->in4.sin_addr; 2535 dstport = fvxlsa->in4.sin_port; 2536 2537 plen = m->m_pkthdr.len; 2538 M_PREPEND(m, sizeof(struct ip) + sizeof(struct vxlanudphdr), 2539 M_NOWAIT); 2540 if (m == NULL) { 2541 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2542 return (ENOBUFS); 2543 } 2544 2545 ip = mtod(m, struct ip *); 2546 ip->ip_tos = 0; 2547 ip->ip_len = htons(m->m_pkthdr.len); 2548 ip->ip_off = 0; 2549 ip->ip_ttl = sc->vxl_ttl; 2550 ip->ip_p = IPPROTO_UDP; 2551 ip->ip_sum = 0; 2552 ip->ip_src = srcaddr; 2553 ip->ip_dst = dstaddr; 2554 2555 vxlan_encap_header(sc, m, sizeof(struct ip), srcport, dstport); 2556 2557 mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0; 2558 m->m_flags &= ~(M_MCAST | M_BCAST); 2559 2560 m->m_pkthdr.csum_flags &= CSUM_FLAGS_TX; 2561 if (m->m_pkthdr.csum_flags != 0) { 2562 /* 2563 * HW checksum (L3 and/or L4) or TSO has been requested. Look 2564 * up the ifnet for the outbound route and verify that the 2565 * outbound ifnet can perform the requested operation on the 2566 * inner frame. 2567 */ 2568 bzero(&route, sizeof(route)); 2569 ro = &route; 2570 sin = (struct sockaddr_in *)&ro->ro_dst; 2571 sin->sin_family = AF_INET; 2572 sin->sin_len = sizeof(*sin); 2573 sin->sin_addr = ip->ip_dst; 2574 ro->ro_nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_NONE, 2575 0); 2576 if (ro->ro_nh == NULL) { 2577 m_freem(m); 2578 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2579 return (EHOSTUNREACH); 2580 } 2581 2582 csum_flags = csum_flags_to_inner_flags(m->m_pkthdr.csum_flags, 2583 CSUM_ENCAP_VXLAN); 2584 if ((csum_flags & ro->ro_nh->nh_ifp->if_hwassist) != 2585 csum_flags) { 2586 if (ppsratecheck(&sc->err_time, &sc->err_pps, 1)) { 2587 const struct ifnet *nh_ifp = ro->ro_nh->nh_ifp; 2588 2589 if_printf(ifp, "interface %s is missing hwcaps " 2590 "0x%08x, csum_flags 0x%08x -> 0x%08x, " 2591 "hwassist 0x%08x\n", nh_ifp->if_xname, 2592 csum_flags & ~(uint32_t)nh_ifp->if_hwassist, 2593 m->m_pkthdr.csum_flags, csum_flags, 2594 (uint32_t)nh_ifp->if_hwassist); 2595 } 2596 m_freem(m); 2597 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2598 return (ENXIO); 2599 } 2600 m->m_pkthdr.csum_flags = csum_flags; 2601 if (csum_flags & 2602 (CSUM_INNER_IP | CSUM_INNER_IP_UDP | CSUM_INNER_IP6_UDP | 2603 CSUM_INNER_IP_TCP | CSUM_INNER_IP6_TCP)) { 2604 counter_u64_add(sc->vxl_stats.txcsum, 1); 2605 if (csum_flags & CSUM_INNER_TSO) 2606 counter_u64_add(sc->vxl_stats.tso, 1); 2607 } 2608 } else 2609 ro = NULL; 2610 error = ip_output(m, NULL, ro, 0, sc->vxl_im4o, NULL); 2611 if (error == 0) { 2612 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 2613 if_inc_counter(ifp, IFCOUNTER_OBYTES, plen); 2614 if (mcast != 0) 2615 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 2616 } else 2617 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2618 2619 return (error); 2620 #else 2621 m_freem(m); 2622 return (ENOTSUP); 2623 #endif 2624 } 2625 2626 static int 2627 vxlan_encap6(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa, 2628 struct mbuf *m) 2629 { 2630 #ifdef INET6 2631 struct ifnet *ifp; 2632 struct ip6_hdr *ip6; 2633 const struct in6_addr *srcaddr, *dstaddr; 2634 uint16_t srcport, dstport; 2635 int plen, mcast, error; 2636 struct route_in6 route, *ro; 2637 struct sockaddr_in6 *sin6; 2638 uint32_t csum_flags; 2639 2640 NET_EPOCH_ASSERT(); 2641 2642 ifp = sc->vxl_ifp; 2643 srcaddr = &sc->vxl_src_addr.in6.sin6_addr; 2644 srcport = vxlan_pick_source_port(sc, m); 2645 dstaddr = &fvxlsa->in6.sin6_addr; 2646 dstport = fvxlsa->in6.sin6_port; 2647 2648 plen = m->m_pkthdr.len; 2649 M_PREPEND(m, sizeof(struct ip6_hdr) + sizeof(struct vxlanudphdr), 2650 M_NOWAIT); 2651 if (m == NULL) { 2652 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2653 return (ENOBUFS); 2654 } 2655 2656 ip6 = mtod(m, struct ip6_hdr *); 2657 ip6->ip6_flow = 0; /* BMV: Keep in forwarding entry? */ 2658 ip6->ip6_vfc = IPV6_VERSION; 2659 ip6->ip6_plen = 0; 2660 ip6->ip6_nxt = IPPROTO_UDP; 2661 ip6->ip6_hlim = sc->vxl_ttl; 2662 ip6->ip6_src = *srcaddr; 2663 ip6->ip6_dst = *dstaddr; 2664 2665 vxlan_encap_header(sc, m, sizeof(struct ip6_hdr), srcport, dstport); 2666 2667 mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0; 2668 m->m_flags &= ~(M_MCAST | M_BCAST); 2669 2670 ro = NULL; 2671 m->m_pkthdr.csum_flags &= CSUM_FLAGS_TX; 2672 if (m->m_pkthdr.csum_flags != 0) { 2673 /* 2674 * HW checksum (L3 and/or L4) or TSO has been requested. Look 2675 * up the ifnet for the outbound route and verify that the 2676 * outbound ifnet can perform the requested operation on the 2677 * inner frame. 2678 */ 2679 bzero(&route, sizeof(route)); 2680 ro = &route; 2681 sin6 = (struct sockaddr_in6 *)&ro->ro_dst; 2682 sin6->sin6_family = AF_INET6; 2683 sin6->sin6_len = sizeof(*sin6); 2684 sin6->sin6_addr = ip6->ip6_dst; 2685 ro->ro_nh = fib6_lookup(M_GETFIB(m), &ip6->ip6_dst, 0, 2686 NHR_NONE, 0); 2687 if (ro->ro_nh == NULL) { 2688 m_freem(m); 2689 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2690 return (EHOSTUNREACH); 2691 } 2692 2693 csum_flags = csum_flags_to_inner_flags(m->m_pkthdr.csum_flags, 2694 CSUM_ENCAP_VXLAN); 2695 if ((csum_flags & ro->ro_nh->nh_ifp->if_hwassist) != 2696 csum_flags) { 2697 if (ppsratecheck(&sc->err_time, &sc->err_pps, 1)) { 2698 const struct ifnet *nh_ifp = ro->ro_nh->nh_ifp; 2699 2700 if_printf(ifp, "interface %s is missing hwcaps " 2701 "0x%08x, csum_flags 0x%08x -> 0x%08x, " 2702 "hwassist 0x%08x\n", nh_ifp->if_xname, 2703 csum_flags & ~(uint32_t)nh_ifp->if_hwassist, 2704 m->m_pkthdr.csum_flags, csum_flags, 2705 (uint32_t)nh_ifp->if_hwassist); 2706 } 2707 m_freem(m); 2708 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2709 return (ENXIO); 2710 } 2711 m->m_pkthdr.csum_flags = csum_flags; 2712 if (csum_flags & 2713 (CSUM_INNER_IP | CSUM_INNER_IP_UDP | CSUM_INNER_IP6_UDP | 2714 CSUM_INNER_IP_TCP | CSUM_INNER_IP6_TCP)) { 2715 counter_u64_add(sc->vxl_stats.txcsum, 1); 2716 if (csum_flags & CSUM_INNER_TSO) 2717 counter_u64_add(sc->vxl_stats.tso, 1); 2718 } 2719 } else if (ntohs(dstport) != V_zero_checksum_port) { 2720 struct udphdr *hdr = mtodo(m, sizeof(struct ip6_hdr)); 2721 2722 hdr->uh_sum = in6_cksum_pseudo(ip6, 2723 m->m_pkthdr.len - sizeof(struct ip6_hdr), IPPROTO_UDP, 0); 2724 m->m_pkthdr.csum_flags = CSUM_UDP_IPV6; 2725 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); 2726 } 2727 error = ip6_output(m, NULL, ro, 0, sc->vxl_im6o, NULL, NULL); 2728 if (error == 0) { 2729 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 2730 if_inc_counter(ifp, IFCOUNTER_OBYTES, plen); 2731 if (mcast != 0) 2732 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 2733 } else 2734 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2735 2736 return (error); 2737 #else 2738 m_freem(m); 2739 return (ENOTSUP); 2740 #endif 2741 } 2742 2743 #define MTAG_VXLAN_LOOP 0x7876706c /* vxlp */ 2744 static int 2745 vxlan_transmit(struct ifnet *ifp, struct mbuf *m) 2746 { 2747 struct rm_priotracker tracker; 2748 union vxlan_sockaddr vxlsa; 2749 struct vxlan_softc *sc; 2750 struct vxlan_ftable_entry *fe; 2751 struct ifnet *mcifp; 2752 struct ether_header *eh; 2753 int ipv4, error; 2754 2755 sc = ifp->if_softc; 2756 eh = mtod(m, struct ether_header *); 2757 fe = NULL; 2758 mcifp = NULL; 2759 2760 ETHER_BPF_MTAP(ifp, m); 2761 2762 VXLAN_RLOCK(sc, &tracker); 2763 M_SETFIB(m, sc->vxl_fibnum); 2764 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 2765 VXLAN_RUNLOCK(sc, &tracker); 2766 m_freem(m); 2767 return (ENETDOWN); 2768 } 2769 if (__predict_false(if_tunnel_check_nesting(ifp, m, MTAG_VXLAN_LOOP, 2770 max_vxlan_nesting) != 0)) { 2771 VXLAN_RUNLOCK(sc, &tracker); 2772 m_freem(m); 2773 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2774 return (ELOOP); 2775 } 2776 2777 if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) 2778 fe = vxlan_ftable_entry_lookup(sc, eh->ether_dhost); 2779 if (fe == NULL) 2780 fe = &sc->vxl_default_fe; 2781 vxlan_sockaddr_copy(&vxlsa, &fe->vxlfe_raddr.sa); 2782 2783 ipv4 = VXLAN_SOCKADDR_IS_IPV4(&vxlsa) != 0; 2784 if (vxlan_sockaddr_in_multicast(&vxlsa) != 0) 2785 mcifp = vxlan_multicast_if_ref(sc, ipv4); 2786 2787 VXLAN_ACQUIRE(sc); 2788 VXLAN_RUNLOCK(sc, &tracker); 2789 2790 if (ipv4 != 0) 2791 error = vxlan_encap4(sc, &vxlsa, m); 2792 else 2793 error = vxlan_encap6(sc, &vxlsa, m); 2794 2795 vxlan_release(sc); 2796 if (mcifp != NULL) 2797 if_rele(mcifp); 2798 2799 return (error); 2800 } 2801 2802 static void 2803 vxlan_qflush(struct ifnet *ifp __unused) 2804 { 2805 } 2806 2807 static bool 2808 vxlan_rcv_udp_packet(struct mbuf *m, int offset, struct inpcb *inpcb, 2809 const struct sockaddr *srcsa, void *xvso) 2810 { 2811 struct vxlan_socket *vso; 2812 struct vxlan_header *vxh, vxlanhdr; 2813 uint32_t vni; 2814 int error __unused; 2815 2816 M_ASSERTPKTHDR(m); 2817 vso = xvso; 2818 offset += sizeof(struct udphdr); 2819 2820 if (m->m_pkthdr.len < offset + sizeof(struct vxlan_header)) 2821 goto out; 2822 2823 if (__predict_false(m->m_len < offset + sizeof(struct vxlan_header))) { 2824 m_copydata(m, offset, sizeof(struct vxlan_header), 2825 (caddr_t) &vxlanhdr); 2826 vxh = &vxlanhdr; 2827 } else 2828 vxh = mtodo(m, offset); 2829 2830 /* 2831 * Drop if there is a reserved bit set in either the flags or VNI 2832 * fields of the header. This goes against the specification, but 2833 * a bit set may indicate an unsupported new feature. This matches 2834 * the behavior of the Linux implementation. 2835 */ 2836 if (vxh->vxlh_flags != htonl(VXLAN_HDR_FLAGS_VALID_VNI) || 2837 vxh->vxlh_vni & ~VXLAN_VNI_MASK) 2838 goto out; 2839 2840 vni = ntohl(vxh->vxlh_vni) >> VXLAN_HDR_VNI_SHIFT; 2841 2842 /* Adjust to the start of the inner Ethernet frame. */ 2843 m_adj_decap(m, offset + sizeof(struct vxlan_header)); 2844 2845 error = vxlan_input(vso, vni, &m, srcsa); 2846 MPASS(error != 0 || m == NULL); 2847 2848 out: 2849 if (m != NULL) 2850 m_freem(m); 2851 2852 return (true); 2853 } 2854 2855 static int 2856 vxlan_input(struct vxlan_socket *vso, uint32_t vni, struct mbuf **m0, 2857 const struct sockaddr *sa) 2858 { 2859 struct vxlan_softc *sc; 2860 struct ifnet *ifp; 2861 struct mbuf *m; 2862 struct ether_header *eh; 2863 int error; 2864 2865 m = *m0; 2866 2867 if (m->m_pkthdr.len < ETHER_HDR_LEN) 2868 return (EINVAL); 2869 2870 sc = vxlan_socket_lookup_softc(vso, vni); 2871 if (sc == NULL) 2872 return (ENOENT); 2873 2874 ifp = sc->vxl_ifp; 2875 if (m->m_len < ETHER_HDR_LEN && 2876 (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) { 2877 *m0 = NULL; 2878 error = ENOBUFS; 2879 goto out; 2880 } 2881 eh = mtod(m, struct ether_header *); 2882 2883 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 2884 error = ENETDOWN; 2885 goto out; 2886 } else if (ifp == m->m_pkthdr.rcvif) { 2887 /* XXX Does not catch more complex loops. */ 2888 error = EDEADLK; 2889 goto out; 2890 } 2891 2892 if (sc->vxl_flags & VXLAN_FLAG_LEARN) 2893 vxlan_ftable_learn(sc, sa, eh->ether_shost); 2894 2895 m_clrprotoflags(m); 2896 m->m_pkthdr.rcvif = ifp; 2897 M_SETFIB(m, ifp->if_fib); 2898 if (((ifp->if_capenable & IFCAP_RXCSUM && 2899 m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC) || 2900 (ifp->if_capenable & IFCAP_RXCSUM_IPV6 && 2901 !(m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC)))) { 2902 uint32_t csum_flags = 0; 2903 2904 if (m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC) 2905 csum_flags |= CSUM_L3_CALC; 2906 if (m->m_pkthdr.csum_flags & CSUM_INNER_L3_VALID) 2907 csum_flags |= CSUM_L3_VALID; 2908 if (m->m_pkthdr.csum_flags & CSUM_INNER_L4_CALC) 2909 csum_flags |= CSUM_L4_CALC; 2910 if (m->m_pkthdr.csum_flags & CSUM_INNER_L4_VALID) 2911 csum_flags |= CSUM_L4_VALID; 2912 m->m_pkthdr.csum_flags = csum_flags; 2913 counter_u64_add(sc->vxl_stats.rxcsum, 1); 2914 } else { 2915 /* clear everything */ 2916 m->m_pkthdr.csum_flags = 0; 2917 m->m_pkthdr.csum_data = 0; 2918 } 2919 2920 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 2921 (*ifp->if_input)(ifp, m); 2922 *m0 = NULL; 2923 error = 0; 2924 2925 out: 2926 vxlan_release(sc); 2927 return (error); 2928 } 2929 2930 static void 2931 vxlan_stats_alloc(struct vxlan_softc *sc) 2932 { 2933 struct vxlan_statistics *stats = &sc->vxl_stats; 2934 2935 stats->txcsum = counter_u64_alloc(M_WAITOK); 2936 stats->tso = counter_u64_alloc(M_WAITOK); 2937 stats->rxcsum = counter_u64_alloc(M_WAITOK); 2938 } 2939 2940 static void 2941 vxlan_stats_free(struct vxlan_softc *sc) 2942 { 2943 struct vxlan_statistics *stats = &sc->vxl_stats; 2944 2945 counter_u64_free(stats->txcsum); 2946 counter_u64_free(stats->tso); 2947 counter_u64_free(stats->rxcsum); 2948 } 2949 2950 static void 2951 vxlan_set_default_config(struct vxlan_softc *sc) 2952 { 2953 2954 sc->vxl_flags |= VXLAN_FLAG_LEARN; 2955 2956 sc->vxl_vni = VXLAN_VNI_MAX; 2957 sc->vxl_ttl = IPDEFTTL; 2958 2959 if (!vxlan_tunable_int(sc, "legacy_port", vxlan_legacy_port)) { 2960 sc->vxl_src_addr.in4.sin_port = htons(VXLAN_PORT); 2961 sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_PORT); 2962 } else { 2963 sc->vxl_src_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT); 2964 sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT); 2965 } 2966 2967 sc->vxl_min_port = V_ipport_firstauto; 2968 sc->vxl_max_port = V_ipport_lastauto; 2969 2970 sc->vxl_ftable_max = VXLAN_FTABLE_MAX; 2971 sc->vxl_ftable_timeout = VXLAN_FTABLE_TIMEOUT; 2972 } 2973 2974 static int 2975 vxlan_set_user_config(struct vxlan_softc *sc, struct ifvxlanparam *vxlp) 2976 { 2977 2978 #ifndef INET 2979 if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR4 | 2980 VXLAN_PARAM_WITH_REMOTE_ADDR4)) 2981 return (EAFNOSUPPORT); 2982 #endif 2983 2984 #ifndef INET6 2985 if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR6 | 2986 VXLAN_PARAM_WITH_REMOTE_ADDR6)) 2987 return (EAFNOSUPPORT); 2988 #else 2989 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) { 2990 int error = vxlan_sockaddr_in6_embedscope(&vxlp->vxlp_local_sa); 2991 if (error) 2992 return (error); 2993 } 2994 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) { 2995 int error = vxlan_sockaddr_in6_embedscope( 2996 &vxlp->vxlp_remote_sa); 2997 if (error) 2998 return (error); 2999 } 3000 #endif 3001 3002 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_VNI) { 3003 if (vxlan_check_vni(vxlp->vxlp_vni) == 0) 3004 sc->vxl_vni = vxlp->vxlp_vni; 3005 } 3006 3007 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR4) { 3008 sc->vxl_src_addr.in4.sin_len = sizeof(struct sockaddr_in); 3009 sc->vxl_src_addr.in4.sin_family = AF_INET; 3010 sc->vxl_src_addr.in4.sin_addr = 3011 vxlp->vxlp_local_sa.in4.sin_addr; 3012 } else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) { 3013 sc->vxl_src_addr.in6.sin6_len = sizeof(struct sockaddr_in6); 3014 sc->vxl_src_addr.in6.sin6_family = AF_INET6; 3015 sc->vxl_src_addr.in6.sin6_addr = 3016 vxlp->vxlp_local_sa.in6.sin6_addr; 3017 } 3018 3019 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR4) { 3020 sc->vxl_dst_addr.in4.sin_len = sizeof(struct sockaddr_in); 3021 sc->vxl_dst_addr.in4.sin_family = AF_INET; 3022 sc->vxl_dst_addr.in4.sin_addr = 3023 vxlp->vxlp_remote_sa.in4.sin_addr; 3024 } else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) { 3025 sc->vxl_dst_addr.in6.sin6_len = sizeof(struct sockaddr_in6); 3026 sc->vxl_dst_addr.in6.sin6_family = AF_INET6; 3027 sc->vxl_dst_addr.in6.sin6_addr = 3028 vxlp->vxlp_remote_sa.in6.sin6_addr; 3029 } 3030 3031 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_PORT) 3032 sc->vxl_src_addr.in4.sin_port = htons(vxlp->vxlp_local_port); 3033 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_PORT) 3034 sc->vxl_dst_addr.in4.sin_port = htons(vxlp->vxlp_remote_port); 3035 3036 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_PORT_RANGE) { 3037 if (vxlp->vxlp_min_port <= vxlp->vxlp_max_port) { 3038 sc->vxl_min_port = vxlp->vxlp_min_port; 3039 sc->vxl_max_port = vxlp->vxlp_max_port; 3040 } 3041 } 3042 3043 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_MULTICAST_IF) 3044 strlcpy(sc->vxl_mc_ifname, vxlp->vxlp_mc_ifname, IFNAMSIZ); 3045 3046 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_TIMEOUT) { 3047 if (vxlan_check_ftable_timeout(vxlp->vxlp_ftable_timeout) == 0) 3048 sc->vxl_ftable_timeout = vxlp->vxlp_ftable_timeout; 3049 } 3050 3051 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_MAX) { 3052 if (vxlan_check_ftable_max(vxlp->vxlp_ftable_max) == 0) 3053 sc->vxl_ftable_max = vxlp->vxlp_ftable_max; 3054 } 3055 3056 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_TTL) { 3057 if (vxlan_check_ttl(vxlp->vxlp_ttl) == 0) 3058 sc->vxl_ttl = vxlp->vxlp_ttl; 3059 } 3060 3061 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LEARN) { 3062 if (vxlp->vxlp_learn == 0) 3063 sc->vxl_flags &= ~VXLAN_FLAG_LEARN; 3064 } 3065 3066 return (0); 3067 } 3068 3069 static int 3070 vxlan_set_reqcap(struct vxlan_softc *sc, struct ifnet *ifp, int reqcap) 3071 { 3072 int mask = reqcap ^ ifp->if_capenable; 3073 3074 /* Disable TSO if tx checksums are disabled. */ 3075 if (mask & IFCAP_TXCSUM && !(reqcap & IFCAP_TXCSUM) && 3076 reqcap & IFCAP_TSO4) { 3077 reqcap &= ~IFCAP_TSO4; 3078 if_printf(ifp, "tso4 disabled due to -txcsum.\n"); 3079 } 3080 if (mask & IFCAP_TXCSUM_IPV6 && !(reqcap & IFCAP_TXCSUM_IPV6) && 3081 reqcap & IFCAP_TSO6) { 3082 reqcap &= ~IFCAP_TSO6; 3083 if_printf(ifp, "tso6 disabled due to -txcsum6.\n"); 3084 } 3085 3086 /* Do not enable TSO if tx checksums are disabled. */ 3087 if (mask & IFCAP_TSO4 && reqcap & IFCAP_TSO4 && 3088 !(reqcap & IFCAP_TXCSUM)) { 3089 if_printf(ifp, "enable txcsum first.\n"); 3090 return (EAGAIN); 3091 } 3092 if (mask & IFCAP_TSO6 && reqcap & IFCAP_TSO6 && 3093 !(reqcap & IFCAP_TXCSUM_IPV6)) { 3094 if_printf(ifp, "enable txcsum6 first.\n"); 3095 return (EAGAIN); 3096 } 3097 3098 sc->vxl_reqcap = reqcap; 3099 return (0); 3100 } 3101 3102 /* 3103 * A VXLAN interface inherits the capabilities of the vxlandev or the interface 3104 * hosting the vxlanlocal address. 3105 */ 3106 static void 3107 vxlan_set_hwcaps(struct vxlan_softc *sc) 3108 { 3109 struct epoch_tracker et; 3110 struct ifnet *p; 3111 struct ifaddr *ifa; 3112 u_long hwa; 3113 int cap, ena; 3114 bool rel; 3115 struct ifnet *ifp = sc->vxl_ifp; 3116 3117 /* reset caps */ 3118 ifp->if_capabilities &= VXLAN_BASIC_IFCAPS; 3119 ifp->if_capenable &= VXLAN_BASIC_IFCAPS; 3120 ifp->if_hwassist = 0; 3121 3122 NET_EPOCH_ENTER(et); 3123 CURVNET_SET(ifp->if_vnet); 3124 3125 rel = false; 3126 p = NULL; 3127 if (sc->vxl_mc_ifname[0] != '\0') { 3128 rel = true; 3129 p = ifunit_ref(sc->vxl_mc_ifname); 3130 } else if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) { 3131 if (sc->vxl_src_addr.sa.sa_family == AF_INET) { 3132 struct sockaddr_in in4 = sc->vxl_src_addr.in4; 3133 3134 in4.sin_port = 0; 3135 ifa = ifa_ifwithaddr((struct sockaddr *)&in4); 3136 if (ifa != NULL) 3137 p = ifa->ifa_ifp; 3138 } else if (sc->vxl_src_addr.sa.sa_family == AF_INET6) { 3139 struct sockaddr_in6 in6 = sc->vxl_src_addr.in6; 3140 3141 in6.sin6_port = 0; 3142 ifa = ifa_ifwithaddr((struct sockaddr *)&in6); 3143 if (ifa != NULL) 3144 p = ifa->ifa_ifp; 3145 } 3146 } 3147 if (p == NULL) 3148 goto done; 3149 3150 cap = ena = hwa = 0; 3151 3152 /* checksum offload */ 3153 if (p->if_capabilities & IFCAP_VXLAN_HWCSUM) 3154 cap |= p->if_capabilities & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6); 3155 if (p->if_capenable & IFCAP_VXLAN_HWCSUM) { 3156 ena |= sc->vxl_reqcap & p->if_capenable & 3157 (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6); 3158 if (ena & IFCAP_TXCSUM) { 3159 if (p->if_hwassist & CSUM_INNER_IP) 3160 hwa |= CSUM_IP; 3161 if (p->if_hwassist & CSUM_INNER_IP_UDP) 3162 hwa |= CSUM_IP_UDP; 3163 if (p->if_hwassist & CSUM_INNER_IP_TCP) 3164 hwa |= CSUM_IP_TCP; 3165 } 3166 if (ena & IFCAP_TXCSUM_IPV6) { 3167 if (p->if_hwassist & CSUM_INNER_IP6_UDP) 3168 hwa |= CSUM_IP6_UDP; 3169 if (p->if_hwassist & CSUM_INNER_IP6_TCP) 3170 hwa |= CSUM_IP6_TCP; 3171 } 3172 } 3173 3174 /* hardware TSO */ 3175 if (p->if_capabilities & IFCAP_VXLAN_HWTSO) { 3176 cap |= p->if_capabilities & IFCAP_TSO; 3177 if (p->if_hw_tsomax > IP_MAXPACKET - ifp->if_hdrlen) 3178 ifp->if_hw_tsomax = IP_MAXPACKET - ifp->if_hdrlen; 3179 else 3180 ifp->if_hw_tsomax = p->if_hw_tsomax; 3181 /* XXX: tsomaxsegcount decrement is cxgbe specific */ 3182 ifp->if_hw_tsomaxsegcount = p->if_hw_tsomaxsegcount - 1; 3183 ifp->if_hw_tsomaxsegsize = p->if_hw_tsomaxsegsize; 3184 } 3185 if (p->if_capenable & IFCAP_VXLAN_HWTSO) { 3186 ena |= sc->vxl_reqcap & p->if_capenable & IFCAP_TSO; 3187 if (ena & IFCAP_TSO) { 3188 if (p->if_hwassist & CSUM_INNER_IP_TSO) 3189 hwa |= CSUM_IP_TSO; 3190 if (p->if_hwassist & CSUM_INNER_IP6_TSO) 3191 hwa |= CSUM_IP6_TSO; 3192 } 3193 } 3194 3195 ifp->if_capabilities |= cap; 3196 ifp->if_capenable |= ena; 3197 ifp->if_hwassist |= hwa; 3198 if (rel) 3199 if_rele(p); 3200 done: 3201 CURVNET_RESTORE(); 3202 NET_EPOCH_EXIT(et); 3203 } 3204 3205 static int 3206 vxlan_clone_create(struct if_clone *ifc, char *name, size_t len, 3207 struct ifc_data *ifd, struct ifnet **ifpp) 3208 { 3209 struct vxlan_softc *sc; 3210 struct ifnet *ifp; 3211 struct ifvxlanparam vxlp; 3212 int error; 3213 3214 sc = malloc(sizeof(struct vxlan_softc), M_VXLAN, M_WAITOK | M_ZERO); 3215 sc->vxl_unit = ifd->unit; 3216 sc->vxl_fibnum = curthread->td_proc->p_fibnum; 3217 vxlan_set_default_config(sc); 3218 3219 if (ifd->params != NULL) { 3220 error = ifc_copyin(ifd, &vxlp, sizeof(vxlp)); 3221 if (error) 3222 goto fail; 3223 3224 error = vxlan_set_user_config(sc, &vxlp); 3225 if (error) 3226 goto fail; 3227 } 3228 3229 vxlan_stats_alloc(sc); 3230 ifp = if_alloc(IFT_ETHER); 3231 sc->vxl_ifp = ifp; 3232 rm_init(&sc->vxl_lock, "vxlanrm"); 3233 callout_init_rw(&sc->vxl_callout, &sc->vxl_lock, 0); 3234 sc->vxl_port_hash_key = arc4random(); 3235 vxlan_ftable_init(sc); 3236 3237 vxlan_sysctl_setup(sc); 3238 3239 ifp->if_softc = sc; 3240 if_initname(ifp, vxlan_name, ifd->unit); 3241 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 3242 ifp->if_init = vxlan_init; 3243 ifp->if_ioctl = vxlan_ioctl; 3244 ifp->if_transmit = vxlan_transmit; 3245 ifp->if_qflush = vxlan_qflush; 3246 ifp->if_capabilities = VXLAN_BASIC_IFCAPS; 3247 ifp->if_capenable = VXLAN_BASIC_IFCAPS; 3248 sc->vxl_reqcap = -1; 3249 vxlan_set_hwcaps(sc); 3250 3251 ifmedia_init(&sc->vxl_media, 0, vxlan_media_change, vxlan_media_status); 3252 ifmedia_add(&sc->vxl_media, IFM_ETHER | IFM_AUTO, 0, NULL); 3253 ifmedia_set(&sc->vxl_media, IFM_ETHER | IFM_AUTO); 3254 3255 ether_gen_addr(ifp, &sc->vxl_hwaddr); 3256 ether_ifattach(ifp, sc->vxl_hwaddr.octet); 3257 3258 ifp->if_baudrate = 0; 3259 3260 VXLAN_WLOCK(sc); 3261 vxlan_setup_interface_hdrlen(sc); 3262 VXLAN_WUNLOCK(sc); 3263 *ifpp = ifp; 3264 3265 return (0); 3266 3267 fail: 3268 free(sc, M_VXLAN); 3269 return (error); 3270 } 3271 3272 static int 3273 vxlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags) 3274 { 3275 struct vxlan_softc *sc; 3276 3277 sc = ifp->if_softc; 3278 3279 vxlan_teardown(sc); 3280 3281 vxlan_ftable_flush(sc, 1); 3282 3283 ether_ifdetach(ifp); 3284 if_free(ifp); 3285 ifmedia_removeall(&sc->vxl_media); 3286 3287 vxlan_ftable_fini(sc); 3288 3289 vxlan_sysctl_destroy(sc); 3290 rm_destroy(&sc->vxl_lock); 3291 vxlan_stats_free(sc); 3292 free(sc, M_VXLAN); 3293 3294 return (0); 3295 } 3296 3297 /* BMV: Taken from if_bridge. */ 3298 static uint32_t 3299 vxlan_mac_hash(struct vxlan_softc *sc, const uint8_t *addr) 3300 { 3301 uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->vxl_ftable_hash_key; 3302 3303 b += addr[5] << 8; 3304 b += addr[4]; 3305 a += addr[3] << 24; 3306 a += addr[2] << 16; 3307 a += addr[1] << 8; 3308 a += addr[0]; 3309 3310 /* 3311 * The following hash function is adapted from "Hash Functions" by Bob Jenkins 3312 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997). 3313 */ 3314 #define mix(a, b, c) \ 3315 do { \ 3316 a -= b; a -= c; a ^= (c >> 13); \ 3317 b -= c; b -= a; b ^= (a << 8); \ 3318 c -= a; c -= b; c ^= (b >> 13); \ 3319 a -= b; a -= c; a ^= (c >> 12); \ 3320 b -= c; b -= a; b ^= (a << 16); \ 3321 c -= a; c -= b; c ^= (b >> 5); \ 3322 a -= b; a -= c; a ^= (c >> 3); \ 3323 b -= c; b -= a; b ^= (a << 10); \ 3324 c -= a; c -= b; c ^= (b >> 15); \ 3325 } while (0) 3326 3327 mix(a, b, c); 3328 3329 #undef mix 3330 3331 return (c); 3332 } 3333 3334 static int 3335 vxlan_media_change(struct ifnet *ifp) 3336 { 3337 3338 /* Ignore. */ 3339 return (0); 3340 } 3341 3342 static void 3343 vxlan_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3344 { 3345 3346 ifmr->ifm_status = IFM_ACTIVE | IFM_AVALID; 3347 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 3348 } 3349 3350 static int 3351 vxlan_sockaddr_cmp(const union vxlan_sockaddr *vxladdr, 3352 const struct sockaddr *sa) 3353 { 3354 3355 return (bcmp(&vxladdr->sa, sa, vxladdr->sa.sa_len)); 3356 } 3357 3358 static void 3359 vxlan_sockaddr_copy(union vxlan_sockaddr *vxladdr, 3360 const struct sockaddr *sa) 3361 { 3362 3363 MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6); 3364 bzero(vxladdr, sizeof(*vxladdr)); 3365 3366 if (sa->sa_family == AF_INET) { 3367 vxladdr->in4 = *satoconstsin(sa); 3368 vxladdr->in4.sin_len = sizeof(struct sockaddr_in); 3369 } else if (sa->sa_family == AF_INET6) { 3370 vxladdr->in6 = *satoconstsin6(sa); 3371 vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6); 3372 } 3373 } 3374 3375 static int 3376 vxlan_sockaddr_in_equal(const union vxlan_sockaddr *vxladdr, 3377 const struct sockaddr *sa) 3378 { 3379 int equal; 3380 3381 if (sa->sa_family == AF_INET) { 3382 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr; 3383 equal = in4->s_addr == vxladdr->in4.sin_addr.s_addr; 3384 } else if (sa->sa_family == AF_INET6) { 3385 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr; 3386 equal = IN6_ARE_ADDR_EQUAL(in6, &vxladdr->in6.sin6_addr); 3387 } else 3388 equal = 0; 3389 3390 return (equal); 3391 } 3392 3393 static void 3394 vxlan_sockaddr_in_copy(union vxlan_sockaddr *vxladdr, 3395 const struct sockaddr *sa) 3396 { 3397 3398 MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6); 3399 3400 if (sa->sa_family == AF_INET) { 3401 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr; 3402 vxladdr->in4.sin_family = AF_INET; 3403 vxladdr->in4.sin_len = sizeof(struct sockaddr_in); 3404 vxladdr->in4.sin_addr = *in4; 3405 } else if (sa->sa_family == AF_INET6) { 3406 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr; 3407 vxladdr->in6.sin6_family = AF_INET6; 3408 vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6); 3409 vxladdr->in6.sin6_addr = *in6; 3410 } 3411 } 3412 3413 static int 3414 vxlan_sockaddr_supported(const union vxlan_sockaddr *vxladdr, int unspec) 3415 { 3416 const struct sockaddr *sa; 3417 int supported; 3418 3419 sa = &vxladdr->sa; 3420 supported = 0; 3421 3422 if (sa->sa_family == AF_UNSPEC && unspec != 0) { 3423 supported = 1; 3424 } else if (sa->sa_family == AF_INET) { 3425 #ifdef INET 3426 supported = 1; 3427 #endif 3428 } else if (sa->sa_family == AF_INET6) { 3429 #ifdef INET6 3430 supported = 1; 3431 #endif 3432 } 3433 3434 return (supported); 3435 } 3436 3437 static int 3438 vxlan_sockaddr_in_any(const union vxlan_sockaddr *vxladdr) 3439 { 3440 const struct sockaddr *sa; 3441 int any; 3442 3443 sa = &vxladdr->sa; 3444 3445 if (sa->sa_family == AF_INET) { 3446 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr; 3447 any = in4->s_addr == INADDR_ANY; 3448 } else if (sa->sa_family == AF_INET6) { 3449 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr; 3450 any = IN6_IS_ADDR_UNSPECIFIED(in6); 3451 } else 3452 any = -1; 3453 3454 return (any); 3455 } 3456 3457 static int 3458 vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *vxladdr) 3459 { 3460 const struct sockaddr *sa; 3461 int mc; 3462 3463 sa = &vxladdr->sa; 3464 3465 if (sa->sa_family == AF_INET) { 3466 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr; 3467 mc = IN_MULTICAST(ntohl(in4->s_addr)); 3468 } else if (sa->sa_family == AF_INET6) { 3469 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr; 3470 mc = IN6_IS_ADDR_MULTICAST(in6); 3471 } else 3472 mc = -1; 3473 3474 return (mc); 3475 } 3476 3477 static int 3478 vxlan_sockaddr_in6_embedscope(union vxlan_sockaddr *vxladdr) 3479 { 3480 int error; 3481 3482 MPASS(VXLAN_SOCKADDR_IS_IPV6(vxladdr)); 3483 #ifdef INET6 3484 error = sa6_embedscope(&vxladdr->in6, V_ip6_use_defzone); 3485 #else 3486 error = EAFNOSUPPORT; 3487 #endif 3488 3489 return (error); 3490 } 3491 3492 static int 3493 vxlan_can_change_config(struct vxlan_softc *sc) 3494 { 3495 struct ifnet *ifp; 3496 3497 ifp = sc->vxl_ifp; 3498 VXLAN_LOCK_ASSERT(sc); 3499 3500 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 3501 return (0); 3502 if (sc->vxl_flags & (VXLAN_FLAG_INIT | VXLAN_FLAG_TEARDOWN)) 3503 return (0); 3504 3505 return (1); 3506 } 3507 3508 static int 3509 vxlan_check_vni(uint32_t vni) 3510 { 3511 3512 return (vni >= VXLAN_VNI_MAX); 3513 } 3514 3515 static int 3516 vxlan_check_ttl(int ttl) 3517 { 3518 3519 return (ttl > MAXTTL); 3520 } 3521 3522 static int 3523 vxlan_check_ftable_timeout(uint32_t timeout) 3524 { 3525 3526 return (timeout > VXLAN_FTABLE_MAX_TIMEOUT); 3527 } 3528 3529 static int 3530 vxlan_check_ftable_max(uint32_t max) 3531 { 3532 3533 return (max > VXLAN_FTABLE_MAX); 3534 } 3535 3536 static void 3537 vxlan_sysctl_setup(struct vxlan_softc *sc) 3538 { 3539 struct sysctl_ctx_list *ctx; 3540 struct sysctl_oid *node; 3541 struct vxlan_statistics *stats; 3542 char namebuf[8]; 3543 3544 ctx = &sc->vxl_sysctl_ctx; 3545 stats = &sc->vxl_stats; 3546 snprintf(namebuf, sizeof(namebuf), "%d", sc->vxl_unit); 3547 3548 sysctl_ctx_init(ctx); 3549 sc->vxl_sysctl_node = SYSCTL_ADD_NODE(ctx, 3550 SYSCTL_STATIC_CHILDREN(_net_link_vxlan), OID_AUTO, namebuf, 3551 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); 3552 3553 node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node), 3554 OID_AUTO, "ftable", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); 3555 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "count", 3556 CTLFLAG_RD, &sc->vxl_ftable_cnt, 0, 3557 "Number of entries in forwarding table"); 3558 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "max", 3559 CTLFLAG_RD, &sc->vxl_ftable_max, 0, 3560 "Maximum number of entries allowed in forwarding table"); 3561 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "timeout", 3562 CTLFLAG_RD, &sc->vxl_ftable_timeout, 0, 3563 "Number of seconds between prunes of the forwarding table"); 3564 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "dump", 3565 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_SKIP, 3566 sc, 0, vxlan_ftable_sysctl_dump, "A", 3567 "Dump the forwarding table entries"); 3568 3569 node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node), 3570 OID_AUTO, "stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); 3571 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, 3572 "ftable_nospace", CTLFLAG_RD, &stats->ftable_nospace, 0, 3573 "Fowarding table reached maximum entries"); 3574 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, 3575 "ftable_lock_upgrade_failed", CTLFLAG_RD, 3576 &stats->ftable_lock_upgrade_failed, 0, 3577 "Forwarding table update required lock upgrade"); 3578 3579 SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "txcsum", 3580 CTLFLAG_RD, &stats->txcsum, 3581 "# of times hardware assisted with tx checksum"); 3582 SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "tso", 3583 CTLFLAG_RD, &stats->tso, "# of times hardware assisted with TSO"); 3584 SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "rxcsum", 3585 CTLFLAG_RD, &stats->rxcsum, 3586 "# of times hardware assisted with rx checksum"); 3587 } 3588 3589 static void 3590 vxlan_sysctl_destroy(struct vxlan_softc *sc) 3591 { 3592 3593 sysctl_ctx_free(&sc->vxl_sysctl_ctx); 3594 sc->vxl_sysctl_node = NULL; 3595 } 3596 3597 static int 3598 vxlan_tunable_int(struct vxlan_softc *sc, const char *knob, int def) 3599 { 3600 char path[64]; 3601 3602 snprintf(path, sizeof(path), "net.link.vxlan.%d.%s", 3603 sc->vxl_unit, knob); 3604 TUNABLE_INT_FETCH(path, &def); 3605 3606 return (def); 3607 } 3608 3609 static void 3610 vxlan_ifdetach_event(void *arg __unused, struct ifnet *ifp) 3611 { 3612 struct vxlan_softc_head list; 3613 struct vxlan_socket *vso; 3614 struct vxlan_softc *sc, *tsc; 3615 3616 LIST_INIT(&list); 3617 3618 if (ifp->if_flags & IFF_RENAMING) 3619 return; 3620 if ((ifp->if_flags & IFF_MULTICAST) == 0) 3621 return; 3622 3623 VXLAN_LIST_LOCK(); 3624 LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry) 3625 vxlan_socket_ifdetach(vso, ifp, &list); 3626 VXLAN_LIST_UNLOCK(); 3627 3628 LIST_FOREACH_SAFE(sc, &list, vxl_ifdetach_list, tsc) { 3629 LIST_REMOVE(sc, vxl_ifdetach_list); 3630 3631 sx_xlock(&vxlan_sx); 3632 VXLAN_WLOCK(sc); 3633 if (sc->vxl_flags & VXLAN_FLAG_INIT) 3634 vxlan_init_wait(sc); 3635 vxlan_teardown_locked(sc); 3636 sx_xunlock(&vxlan_sx); 3637 } 3638 } 3639 3640 static void 3641 vxlan_load(void) 3642 { 3643 3644 mtx_init(&vxlan_list_mtx, "vxlan list", NULL, MTX_DEF); 3645 LIST_INIT(&vxlan_socket_list); 3646 vxlan_ifdetach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, 3647 vxlan_ifdetach_event, NULL, EVENTHANDLER_PRI_ANY); 3648 3649 struct if_clone_addreq req = { 3650 .create_f = vxlan_clone_create, 3651 .destroy_f = vxlan_clone_destroy, 3652 .flags = IFC_F_AUTOUNIT, 3653 }; 3654 vxlan_cloner = ifc_attach_cloner(vxlan_name, &req); 3655 } 3656 3657 static void 3658 vxlan_unload(void) 3659 { 3660 3661 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 3662 vxlan_ifdetach_event_tag); 3663 ifc_detach_cloner(vxlan_cloner); 3664 mtx_destroy(&vxlan_list_mtx); 3665 MPASS(LIST_EMPTY(&vxlan_socket_list)); 3666 } 3667 3668 static int 3669 vxlan_modevent(module_t mod, int type, void *unused) 3670 { 3671 int error; 3672 3673 error = 0; 3674 3675 switch (type) { 3676 case MOD_LOAD: 3677 vxlan_load(); 3678 break; 3679 case MOD_UNLOAD: 3680 vxlan_unload(); 3681 break; 3682 default: 3683 error = ENOTSUP; 3684 break; 3685 } 3686 3687 return (error); 3688 } 3689 3690 static moduledata_t vxlan_mod = { 3691 "if_vxlan", 3692 vxlan_modevent, 3693 0 3694 }; 3695 3696 DECLARE_MODULE(if_vxlan, vxlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 3697 MODULE_VERSION(if_vxlan, 1); 3698