1 /*- 2 * Copyright (c) 2014, Bryan Venteicher <bryanv@FreeBSD.org> 3 * All rights reserved. 4 * Copyright (c) 2020, Chelsio Communications. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice unmodified, this list of conditions, and the following 11 * disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include "opt_inet.h" 29 #include "opt_inet6.h" 30 31 #include <sys/param.h> 32 #include <sys/eventhandler.h> 33 #include <sys/kernel.h> 34 #include <sys/lock.h> 35 #include <sys/hash.h> 36 #include <sys/malloc.h> 37 #include <sys/mbuf.h> 38 #include <sys/module.h> 39 #include <sys/refcount.h> 40 #include <sys/rmlock.h> 41 #include <sys/priv.h> 42 #include <sys/proc.h> 43 #include <sys/queue.h> 44 #include <sys/sbuf.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/sockio.h> 48 #include <sys/sysctl.h> 49 #include <sys/systm.h> 50 51 #include <net/bpf.h> 52 #include <net/ethernet.h> 53 #include <net/if.h> 54 #include <net/if_var.h> 55 #include <net/if_private.h> 56 #include <net/if_clone.h> 57 #include <net/if_dl.h> 58 #include <net/if_media.h> 59 #include <net/if_types.h> 60 #include <net/if_vxlan.h> 61 #include <net/netisr.h> 62 #include <net/route.h> 63 #include <net/route/nhop.h> 64 65 #include <netinet/in.h> 66 #include <netinet/in_systm.h> 67 #include <netinet/in_var.h> 68 #include <netinet/in_pcb.h> 69 #include <netinet/ip.h> 70 #include <netinet/ip6.h> 71 #include <netinet/ip_var.h> 72 #include <netinet/udp.h> 73 #include <netinet/udp_var.h> 74 #include <netinet/in_fib.h> 75 #include <netinet6/in6_fib.h> 76 77 #include <netinet6/ip6_var.h> 78 #include <netinet6/scope6_var.h> 79 80 struct vxlan_softc; 81 LIST_HEAD(vxlan_softc_head, vxlan_softc); 82 83 struct sx vxlan_sx; 84 SX_SYSINIT(vxlan, &vxlan_sx, "VXLAN global start/stop lock"); 85 86 struct vxlan_socket_mc_info { 87 union vxlan_sockaddr vxlsomc_saddr; 88 union vxlan_sockaddr vxlsomc_gaddr; 89 int vxlsomc_ifidx; 90 int vxlsomc_users; 91 }; 92 93 /* 94 * The maximum MTU of encapsulated ethernet frame within IPv4/UDP packet. 95 */ 96 #define VXLAN_MAX_MTU (IP_MAXPACKET - \ 97 60 /* Maximum IPv4 header len */ - \ 98 sizeof(struct udphdr) - \ 99 sizeof(struct vxlan_header) - \ 100 ETHER_HDR_LEN - ETHER_VLAN_ENCAP_LEN) 101 #define VXLAN_BASIC_IFCAPS (IFCAP_LINKSTATE | IFCAP_JUMBO_MTU) 102 103 #define VXLAN_SO_MC_MAX_GROUPS 32 104 105 #define VXLAN_SO_VNI_HASH_SHIFT 6 106 #define VXLAN_SO_VNI_HASH_SIZE (1 << VXLAN_SO_VNI_HASH_SHIFT) 107 #define VXLAN_SO_VNI_HASH(_vni) ((_vni) % VXLAN_SO_VNI_HASH_SIZE) 108 109 struct vxlan_socket { 110 struct socket *vxlso_sock; 111 struct rmlock vxlso_lock; 112 u_int vxlso_refcnt; 113 union vxlan_sockaddr vxlso_laddr; 114 LIST_ENTRY(vxlan_socket) vxlso_entry; 115 struct vxlan_softc_head vxlso_vni_hash[VXLAN_SO_VNI_HASH_SIZE]; 116 struct vxlan_socket_mc_info vxlso_mc[VXLAN_SO_MC_MAX_GROUPS]; 117 }; 118 119 #define VXLAN_SO_RLOCK(_vso, _p) rm_rlock(&(_vso)->vxlso_lock, (_p)) 120 #define VXLAN_SO_RUNLOCK(_vso, _p) rm_runlock(&(_vso)->vxlso_lock, (_p)) 121 #define VXLAN_SO_WLOCK(_vso) rm_wlock(&(_vso)->vxlso_lock) 122 #define VXLAN_SO_WUNLOCK(_vso) rm_wunlock(&(_vso)->vxlso_lock) 123 #define VXLAN_SO_LOCK_ASSERT(_vso) \ 124 rm_assert(&(_vso)->vxlso_lock, RA_LOCKED) 125 #define VXLAN_SO_LOCK_WASSERT(_vso) \ 126 rm_assert(&(_vso)->vxlso_lock, RA_WLOCKED) 127 128 #define VXLAN_SO_ACQUIRE(_vso) refcount_acquire(&(_vso)->vxlso_refcnt) 129 #define VXLAN_SO_RELEASE(_vso) refcount_release(&(_vso)->vxlso_refcnt) 130 131 struct vxlan_ftable_entry { 132 LIST_ENTRY(vxlan_ftable_entry) vxlfe_hash; 133 uint16_t vxlfe_flags; 134 uint8_t vxlfe_mac[ETHER_ADDR_LEN]; 135 union vxlan_sockaddr vxlfe_raddr; 136 time_t vxlfe_expire; 137 }; 138 139 #define VXLAN_FE_FLAG_DYNAMIC 0x01 140 #define VXLAN_FE_FLAG_STATIC 0x02 141 142 #define VXLAN_FE_IS_DYNAMIC(_fe) \ 143 ((_fe)->vxlfe_flags & VXLAN_FE_FLAG_DYNAMIC) 144 145 #define VXLAN_SC_FTABLE_SHIFT 9 146 #define VXLAN_SC_FTABLE_SIZE (1 << VXLAN_SC_FTABLE_SHIFT) 147 #define VXLAN_SC_FTABLE_MASK (VXLAN_SC_FTABLE_SIZE - 1) 148 #define VXLAN_SC_FTABLE_HASH(_sc, _mac) \ 149 (vxlan_mac_hash(_sc, _mac) % VXLAN_SC_FTABLE_SIZE) 150 151 LIST_HEAD(vxlan_ftable_head, vxlan_ftable_entry); 152 153 struct vxlan_statistics { 154 uint32_t ftable_nospace; 155 uint32_t ftable_lock_upgrade_failed; 156 counter_u64_t txcsum; 157 counter_u64_t tso; 158 counter_u64_t rxcsum; 159 }; 160 161 struct vxlan_softc { 162 struct ifnet *vxl_ifp; 163 int vxl_reqcap; 164 u_int vxl_fibnum; 165 struct vxlan_socket *vxl_sock; 166 uint32_t vxl_vni; 167 union vxlan_sockaddr vxl_src_addr; 168 union vxlan_sockaddr vxl_dst_addr; 169 uint32_t vxl_flags; 170 #define VXLAN_FLAG_INIT 0x0001 171 #define VXLAN_FLAG_TEARDOWN 0x0002 172 #define VXLAN_FLAG_LEARN 0x0004 173 #define VXLAN_FLAG_USER_MTU 0x0008 174 175 uint32_t vxl_port_hash_key; 176 uint16_t vxl_min_port; 177 uint16_t vxl_max_port; 178 uint8_t vxl_ttl; 179 180 /* Lookup table from MAC address to forwarding entry. */ 181 uint32_t vxl_ftable_cnt; 182 uint32_t vxl_ftable_max; 183 uint32_t vxl_ftable_timeout; 184 uint32_t vxl_ftable_hash_key; 185 struct vxlan_ftable_head *vxl_ftable; 186 187 /* Derived from vxl_dst_addr. */ 188 struct vxlan_ftable_entry vxl_default_fe; 189 190 struct ip_moptions *vxl_im4o; 191 struct ip6_moptions *vxl_im6o; 192 193 struct rmlock vxl_lock; 194 volatile u_int vxl_refcnt; 195 196 int vxl_unit; 197 int vxl_vso_mc_index; 198 struct vxlan_statistics vxl_stats; 199 struct sysctl_oid *vxl_sysctl_node; 200 struct sysctl_ctx_list vxl_sysctl_ctx; 201 struct callout vxl_callout; 202 struct ether_addr vxl_hwaddr; 203 int vxl_mc_ifindex; 204 struct ifnet *vxl_mc_ifp; 205 struct ifmedia vxl_media; 206 char vxl_mc_ifname[IFNAMSIZ]; 207 LIST_ENTRY(vxlan_softc) vxl_entry; 208 LIST_ENTRY(vxlan_softc) vxl_ifdetach_list; 209 210 /* For rate limiting errors on the tx fast path. */ 211 struct timeval err_time; 212 int err_pps; 213 }; 214 215 #define VXLAN_RLOCK(_sc, _p) rm_rlock(&(_sc)->vxl_lock, (_p)) 216 #define VXLAN_RUNLOCK(_sc, _p) rm_runlock(&(_sc)->vxl_lock, (_p)) 217 #define VXLAN_WLOCK(_sc) rm_wlock(&(_sc)->vxl_lock) 218 #define VXLAN_WUNLOCK(_sc) rm_wunlock(&(_sc)->vxl_lock) 219 #define VXLAN_LOCK_WOWNED(_sc) rm_wowned(&(_sc)->vxl_lock) 220 #define VXLAN_LOCK_ASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_LOCKED) 221 #define VXLAN_LOCK_WASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_WLOCKED) 222 #define VXLAN_UNLOCK(_sc, _p) do { \ 223 if (VXLAN_LOCK_WOWNED(_sc)) \ 224 VXLAN_WUNLOCK(_sc); \ 225 else \ 226 VXLAN_RUNLOCK(_sc, _p); \ 227 } while (0) 228 229 #define VXLAN_ACQUIRE(_sc) refcount_acquire(&(_sc)->vxl_refcnt) 230 #define VXLAN_RELEASE(_sc) refcount_release(&(_sc)->vxl_refcnt) 231 232 #define satoconstsin(sa) ((const struct sockaddr_in *)(sa)) 233 #define satoconstsin6(sa) ((const struct sockaddr_in6 *)(sa)) 234 235 struct vxlanudphdr { 236 struct udphdr vxlh_udp; 237 struct vxlan_header vxlh_hdr; 238 } __packed; 239 240 static int vxlan_ftable_addr_cmp(const uint8_t *, const uint8_t *); 241 static void vxlan_ftable_init(struct vxlan_softc *); 242 static void vxlan_ftable_fini(struct vxlan_softc *); 243 static void vxlan_ftable_flush(struct vxlan_softc *, int); 244 static void vxlan_ftable_expire(struct vxlan_softc *); 245 static int vxlan_ftable_update_locked(struct vxlan_softc *, 246 const union vxlan_sockaddr *, const uint8_t *, 247 struct rm_priotracker *); 248 static int vxlan_ftable_learn(struct vxlan_softc *, 249 const struct sockaddr *, const uint8_t *); 250 static int vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS); 251 252 static struct vxlan_ftable_entry * 253 vxlan_ftable_entry_alloc(void); 254 static void vxlan_ftable_entry_free(struct vxlan_ftable_entry *); 255 static void vxlan_ftable_entry_init(struct vxlan_softc *, 256 struct vxlan_ftable_entry *, const uint8_t *, 257 const struct sockaddr *, uint32_t); 258 static void vxlan_ftable_entry_destroy(struct vxlan_softc *, 259 struct vxlan_ftable_entry *); 260 static int vxlan_ftable_entry_insert(struct vxlan_softc *, 261 struct vxlan_ftable_entry *); 262 static struct vxlan_ftable_entry * 263 vxlan_ftable_entry_lookup(struct vxlan_softc *, 264 const uint8_t *); 265 static void vxlan_ftable_entry_dump(struct vxlan_ftable_entry *, 266 struct sbuf *); 267 268 static struct vxlan_socket * 269 vxlan_socket_alloc(const union vxlan_sockaddr *); 270 static void vxlan_socket_destroy(struct vxlan_socket *); 271 static void vxlan_socket_release(struct vxlan_socket *); 272 static struct vxlan_socket * 273 vxlan_socket_lookup(union vxlan_sockaddr *vxlsa); 274 static void vxlan_socket_insert(struct vxlan_socket *); 275 static int vxlan_socket_init(struct vxlan_socket *, struct ifnet *); 276 static int vxlan_socket_bind(struct vxlan_socket *, struct ifnet *); 277 static int vxlan_socket_create(struct ifnet *, int, 278 const union vxlan_sockaddr *, struct vxlan_socket **); 279 static void vxlan_socket_ifdetach(struct vxlan_socket *, 280 struct ifnet *, struct vxlan_softc_head *); 281 282 static struct vxlan_socket * 283 vxlan_socket_mc_lookup(const union vxlan_sockaddr *); 284 static int vxlan_sockaddr_mc_info_match( 285 const struct vxlan_socket_mc_info *, 286 const union vxlan_sockaddr *, 287 const union vxlan_sockaddr *, int); 288 static int vxlan_socket_mc_join_group(struct vxlan_socket *, 289 const union vxlan_sockaddr *, const union vxlan_sockaddr *, 290 int *, union vxlan_sockaddr *); 291 static int vxlan_socket_mc_leave_group(struct vxlan_socket *, 292 const union vxlan_sockaddr *, 293 const union vxlan_sockaddr *, int); 294 static int vxlan_socket_mc_add_group(struct vxlan_socket *, 295 const union vxlan_sockaddr *, const union vxlan_sockaddr *, 296 int, int *); 297 static void vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *, 298 int); 299 300 static struct vxlan_softc * 301 vxlan_socket_lookup_softc_locked(struct vxlan_socket *, 302 uint32_t); 303 static struct vxlan_softc * 304 vxlan_socket_lookup_softc(struct vxlan_socket *, uint32_t); 305 static int vxlan_socket_insert_softc(struct vxlan_socket *, 306 struct vxlan_softc *); 307 static void vxlan_socket_remove_softc(struct vxlan_socket *, 308 struct vxlan_softc *); 309 310 static struct ifnet * 311 vxlan_multicast_if_ref(struct vxlan_softc *, int); 312 static void vxlan_free_multicast(struct vxlan_softc *); 313 static int vxlan_setup_multicast_interface(struct vxlan_softc *); 314 315 static int vxlan_setup_multicast(struct vxlan_softc *); 316 static int vxlan_setup_socket(struct vxlan_softc *); 317 #ifdef INET6 318 static void vxlan_setup_zero_checksum_port(struct vxlan_softc *); 319 #endif 320 static void vxlan_setup_interface_hdrlen(struct vxlan_softc *); 321 static int vxlan_valid_init_config(struct vxlan_softc *); 322 static void vxlan_init_wait(struct vxlan_softc *); 323 static void vxlan_init_complete(struct vxlan_softc *); 324 static void vxlan_init(void *); 325 static void vxlan_release(struct vxlan_softc *); 326 static void vxlan_teardown_wait(struct vxlan_softc *); 327 static void vxlan_teardown_complete(struct vxlan_softc *); 328 static void vxlan_teardown_locked(struct vxlan_softc *); 329 static void vxlan_teardown(struct vxlan_softc *); 330 static void vxlan_ifdetach(struct vxlan_softc *, struct ifnet *, 331 struct vxlan_softc_head *); 332 static void vxlan_timer(void *); 333 334 static int vxlan_ctrl_get_config(struct vxlan_softc *, void *); 335 static int vxlan_ctrl_set_vni(struct vxlan_softc *, void *); 336 static int vxlan_ctrl_set_local_addr(struct vxlan_softc *, void *); 337 static int vxlan_ctrl_set_remote_addr(struct vxlan_softc *, void *); 338 static int vxlan_ctrl_set_local_port(struct vxlan_softc *, void *); 339 static int vxlan_ctrl_set_remote_port(struct vxlan_softc *, void *); 340 static int vxlan_ctrl_set_port_range(struct vxlan_softc *, void *); 341 static int vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *, void *); 342 static int vxlan_ctrl_set_ftable_max(struct vxlan_softc *, void *); 343 static int vxlan_ctrl_set_multicast_if(struct vxlan_softc * , void *); 344 static int vxlan_ctrl_set_ttl(struct vxlan_softc *, void *); 345 static int vxlan_ctrl_set_learn(struct vxlan_softc *, void *); 346 static int vxlan_ctrl_ftable_entry_add(struct vxlan_softc *, void *); 347 static int vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *, void *); 348 static int vxlan_ctrl_flush(struct vxlan_softc *, void *); 349 static int vxlan_ioctl_drvspec(struct vxlan_softc *, 350 struct ifdrv *, int); 351 static int vxlan_ioctl_ifflags(struct vxlan_softc *); 352 static int vxlan_ioctl(struct ifnet *, u_long, caddr_t); 353 354 #if defined(INET) || defined(INET6) 355 static uint16_t vxlan_pick_source_port(struct vxlan_softc *, struct mbuf *); 356 static void vxlan_encap_header(struct vxlan_softc *, struct mbuf *, 357 int, uint16_t, uint16_t); 358 #endif 359 static int vxlan_encap4(struct vxlan_softc *, 360 const union vxlan_sockaddr *, struct mbuf *); 361 static int vxlan_encap6(struct vxlan_softc *, 362 const union vxlan_sockaddr *, struct mbuf *); 363 static int vxlan_transmit(struct ifnet *, struct mbuf *); 364 static void vxlan_qflush(struct ifnet *); 365 static bool vxlan_rcv_udp_packet(struct mbuf *, int, struct inpcb *, 366 const struct sockaddr *, void *); 367 static int vxlan_input(struct vxlan_socket *, uint32_t, struct mbuf **, 368 const struct sockaddr *); 369 370 static void vxlan_stats_alloc(struct vxlan_softc *); 371 static void vxlan_stats_free(struct vxlan_softc *); 372 static void vxlan_set_default_config(struct vxlan_softc *); 373 static int vxlan_set_user_config(struct vxlan_softc *, 374 struct ifvxlanparam *); 375 static int vxlan_set_reqcap(struct vxlan_softc *, struct ifnet *, int); 376 static void vxlan_set_hwcaps(struct vxlan_softc *); 377 static int vxlan_clone_create(struct if_clone *, char *, size_t, 378 struct ifc_data *, struct ifnet **); 379 static int vxlan_clone_destroy(struct if_clone *, struct ifnet *, uint32_t); 380 381 static uint32_t vxlan_mac_hash(struct vxlan_softc *, const uint8_t *); 382 static int vxlan_media_change(struct ifnet *); 383 static void vxlan_media_status(struct ifnet *, struct ifmediareq *); 384 385 static int vxlan_sockaddr_cmp(const union vxlan_sockaddr *, 386 const struct sockaddr *); 387 static void vxlan_sockaddr_copy(union vxlan_sockaddr *, 388 const struct sockaddr *); 389 static int vxlan_sockaddr_in_equal(const union vxlan_sockaddr *, 390 const struct sockaddr *); 391 static void vxlan_sockaddr_in_copy(union vxlan_sockaddr *, 392 const struct sockaddr *); 393 static int vxlan_sockaddr_supported(const union vxlan_sockaddr *, int); 394 static int vxlan_sockaddr_in_any(const union vxlan_sockaddr *); 395 static int vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *); 396 static int vxlan_sockaddr_in6_embedscope(union vxlan_sockaddr *); 397 398 static int vxlan_can_change_config(struct vxlan_softc *); 399 static int vxlan_check_vni(uint32_t); 400 static int vxlan_check_ttl(int); 401 static int vxlan_check_ftable_timeout(uint32_t); 402 static int vxlan_check_ftable_max(uint32_t); 403 404 static void vxlan_sysctl_setup(struct vxlan_softc *); 405 static void vxlan_sysctl_destroy(struct vxlan_softc *); 406 static int vxlan_tunable_int(struct vxlan_softc *, const char *, int); 407 408 static void vxlan_ifdetach_event(void *, struct ifnet *); 409 static void vxlan_load(void); 410 static void vxlan_unload(void); 411 static int vxlan_modevent(module_t, int, void *); 412 413 static const char vxlan_name[] = "vxlan"; 414 static MALLOC_DEFINE(M_VXLAN, vxlan_name, 415 "Virtual eXtensible LAN Interface"); 416 static struct if_clone *vxlan_cloner; 417 418 static struct mtx vxlan_list_mtx; 419 #define VXLAN_LIST_LOCK() mtx_lock(&vxlan_list_mtx) 420 #define VXLAN_LIST_UNLOCK() mtx_unlock(&vxlan_list_mtx) 421 422 static LIST_HEAD(, vxlan_socket) vxlan_socket_list = 423 LIST_HEAD_INITIALIZER(vxlan_socket_list); 424 425 static eventhandler_tag vxlan_ifdetach_event_tag; 426 427 SYSCTL_DECL(_net_link); 428 SYSCTL_NODE(_net_link, OID_AUTO, vxlan, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 429 "Virtual eXtensible Local Area Network"); 430 431 static int vxlan_legacy_port = 0; 432 SYSCTL_INT(_net_link_vxlan, OID_AUTO, legacy_port, CTLFLAG_RDTUN, 433 &vxlan_legacy_port, 0, "Use legacy port"); 434 static int vxlan_reuse_port = 0; 435 SYSCTL_INT(_net_link_vxlan, OID_AUTO, reuse_port, CTLFLAG_RDTUN, 436 &vxlan_reuse_port, 0, "Re-use port"); 437 438 /* 439 * This macro controls the default upper limitation on nesting of vxlan 440 * tunnels. By default it is 3, as the overhead of IPv6 vxlan tunnel is 70 441 * bytes, this will create at most 210 bytes overhead and the most inner 442 * tunnel's MTU will be 1290 which will meet IPv6 minimum MTU size 1280. 443 * Be careful to configure the tunnels when raising the limit. A large 444 * number of nested tunnels can introduce system crash. 445 */ 446 #ifndef MAX_VXLAN_NEST 447 #define MAX_VXLAN_NEST 3 448 #endif 449 static int max_vxlan_nesting = MAX_VXLAN_NEST; 450 SYSCTL_INT(_net_link_vxlan, OID_AUTO, max_nesting, CTLFLAG_RW, 451 &max_vxlan_nesting, 0, "Max nested tunnels"); 452 453 /* Default maximum number of addresses in the forwarding table. */ 454 #ifndef VXLAN_FTABLE_MAX 455 #define VXLAN_FTABLE_MAX 2000 456 #endif 457 458 /* Timeout (in seconds) of addresses learned in the forwarding table. */ 459 #ifndef VXLAN_FTABLE_TIMEOUT 460 #define VXLAN_FTABLE_TIMEOUT (20 * 60) 461 #endif 462 463 /* 464 * Maximum timeout (in seconds) of addresses learned in the forwarding 465 * table. 466 */ 467 #ifndef VXLAN_FTABLE_MAX_TIMEOUT 468 #define VXLAN_FTABLE_MAX_TIMEOUT (60 * 60 * 24) 469 #endif 470 471 /* Number of seconds between pruning attempts of the forwarding table. */ 472 #ifndef VXLAN_FTABLE_PRUNE 473 #define VXLAN_FTABLE_PRUNE (5 * 60) 474 #endif 475 476 static int vxlan_ftable_prune_period = VXLAN_FTABLE_PRUNE; 477 478 struct vxlan_control { 479 int (*vxlc_func)(struct vxlan_softc *, void *); 480 int vxlc_argsize; 481 int vxlc_flags; 482 #define VXLAN_CTRL_FLAG_COPYIN 0x01 483 #define VXLAN_CTRL_FLAG_COPYOUT 0x02 484 #define VXLAN_CTRL_FLAG_SUSER 0x04 485 }; 486 487 static const struct vxlan_control vxlan_control_table[] = { 488 [VXLAN_CMD_GET_CONFIG] = 489 { vxlan_ctrl_get_config, sizeof(struct ifvxlancfg), 490 VXLAN_CTRL_FLAG_COPYOUT 491 }, 492 493 [VXLAN_CMD_SET_VNI] = 494 { vxlan_ctrl_set_vni, sizeof(struct ifvxlancmd), 495 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 496 }, 497 498 [VXLAN_CMD_SET_LOCAL_ADDR] = 499 { vxlan_ctrl_set_local_addr, sizeof(struct ifvxlancmd), 500 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 501 }, 502 503 [VXLAN_CMD_SET_REMOTE_ADDR] = 504 { vxlan_ctrl_set_remote_addr, sizeof(struct ifvxlancmd), 505 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 506 }, 507 508 [VXLAN_CMD_SET_LOCAL_PORT] = 509 { vxlan_ctrl_set_local_port, sizeof(struct ifvxlancmd), 510 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 511 }, 512 513 [VXLAN_CMD_SET_REMOTE_PORT] = 514 { vxlan_ctrl_set_remote_port, sizeof(struct ifvxlancmd), 515 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 516 }, 517 518 [VXLAN_CMD_SET_PORT_RANGE] = 519 { vxlan_ctrl_set_port_range, sizeof(struct ifvxlancmd), 520 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 521 }, 522 523 [VXLAN_CMD_SET_FTABLE_TIMEOUT] = 524 { vxlan_ctrl_set_ftable_timeout, sizeof(struct ifvxlancmd), 525 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 526 }, 527 528 [VXLAN_CMD_SET_FTABLE_MAX] = 529 { vxlan_ctrl_set_ftable_max, sizeof(struct ifvxlancmd), 530 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 531 }, 532 533 [VXLAN_CMD_SET_MULTICAST_IF] = 534 { vxlan_ctrl_set_multicast_if, sizeof(struct ifvxlancmd), 535 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 536 }, 537 538 [VXLAN_CMD_SET_TTL] = 539 { vxlan_ctrl_set_ttl, sizeof(struct ifvxlancmd), 540 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 541 }, 542 543 [VXLAN_CMD_SET_LEARN] = 544 { vxlan_ctrl_set_learn, sizeof(struct ifvxlancmd), 545 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 546 }, 547 548 [VXLAN_CMD_FTABLE_ENTRY_ADD] = 549 { vxlan_ctrl_ftable_entry_add, sizeof(struct ifvxlancmd), 550 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 551 }, 552 553 [VXLAN_CMD_FTABLE_ENTRY_REM] = 554 { vxlan_ctrl_ftable_entry_rem, sizeof(struct ifvxlancmd), 555 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 556 }, 557 558 [VXLAN_CMD_FLUSH] = 559 { vxlan_ctrl_flush, sizeof(struct ifvxlancmd), 560 VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER, 561 }, 562 }; 563 564 static const int vxlan_control_table_size = nitems(vxlan_control_table); 565 566 static int 567 vxlan_ftable_addr_cmp(const uint8_t *a, const uint8_t *b) 568 { 569 int i, d; 570 571 for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) 572 d = ((int)a[i]) - ((int)b[i]); 573 574 return (d); 575 } 576 577 static void 578 vxlan_ftable_init(struct vxlan_softc *sc) 579 { 580 int i; 581 582 sc->vxl_ftable = malloc(sizeof(struct vxlan_ftable_head) * 583 VXLAN_SC_FTABLE_SIZE, M_VXLAN, M_ZERO | M_WAITOK); 584 585 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) 586 LIST_INIT(&sc->vxl_ftable[i]); 587 sc->vxl_ftable_hash_key = arc4random(); 588 } 589 590 static void 591 vxlan_ftable_fini(struct vxlan_softc *sc) 592 { 593 int i; 594 595 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) { 596 KASSERT(LIST_EMPTY(&sc->vxl_ftable[i]), 597 ("%s: vxlan %p ftable[%d] not empty", __func__, sc, i)); 598 } 599 MPASS(sc->vxl_ftable_cnt == 0); 600 601 free(sc->vxl_ftable, M_VXLAN); 602 sc->vxl_ftable = NULL; 603 } 604 605 static void 606 vxlan_ftable_flush(struct vxlan_softc *sc, int all) 607 { 608 struct vxlan_ftable_entry *fe, *tfe; 609 int i; 610 611 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) { 612 LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) { 613 if (all || VXLAN_FE_IS_DYNAMIC(fe)) 614 vxlan_ftable_entry_destroy(sc, fe); 615 } 616 } 617 } 618 619 static void 620 vxlan_ftable_expire(struct vxlan_softc *sc) 621 { 622 struct vxlan_ftable_entry *fe, *tfe; 623 int i; 624 625 VXLAN_LOCK_WASSERT(sc); 626 627 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) { 628 LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) { 629 if (VXLAN_FE_IS_DYNAMIC(fe) && 630 time_uptime >= fe->vxlfe_expire) 631 vxlan_ftable_entry_destroy(sc, fe); 632 } 633 } 634 } 635 636 static int 637 vxlan_ftable_update_locked(struct vxlan_softc *sc, 638 const union vxlan_sockaddr *vxlsa, const uint8_t *mac, 639 struct rm_priotracker *tracker) 640 { 641 struct vxlan_ftable_entry *fe; 642 int error __unused; 643 644 VXLAN_LOCK_ASSERT(sc); 645 646 again: 647 /* 648 * A forwarding entry for this MAC address might already exist. If 649 * so, update it, otherwise create a new one. We may have to upgrade 650 * the lock if we have to change or create an entry. 651 */ 652 fe = vxlan_ftable_entry_lookup(sc, mac); 653 if (fe != NULL) { 654 fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout; 655 656 if (!VXLAN_FE_IS_DYNAMIC(fe) || 657 vxlan_sockaddr_in_equal(&fe->vxlfe_raddr, &vxlsa->sa)) 658 return (0); 659 if (!VXLAN_LOCK_WOWNED(sc)) { 660 VXLAN_RUNLOCK(sc, tracker); 661 VXLAN_WLOCK(sc); 662 sc->vxl_stats.ftable_lock_upgrade_failed++; 663 goto again; 664 } 665 vxlan_sockaddr_in_copy(&fe->vxlfe_raddr, &vxlsa->sa); 666 return (0); 667 } 668 669 if (!VXLAN_LOCK_WOWNED(sc)) { 670 VXLAN_RUNLOCK(sc, tracker); 671 VXLAN_WLOCK(sc); 672 sc->vxl_stats.ftable_lock_upgrade_failed++; 673 goto again; 674 } 675 676 if (sc->vxl_ftable_cnt >= sc->vxl_ftable_max) { 677 sc->vxl_stats.ftable_nospace++; 678 return (ENOSPC); 679 } 680 681 fe = vxlan_ftable_entry_alloc(); 682 if (fe == NULL) 683 return (ENOMEM); 684 685 vxlan_ftable_entry_init(sc, fe, mac, &vxlsa->sa, VXLAN_FE_FLAG_DYNAMIC); 686 687 /* The prior lookup failed, so the insert should not. */ 688 error = vxlan_ftable_entry_insert(sc, fe); 689 MPASS(error == 0); 690 691 return (0); 692 } 693 694 static int 695 vxlan_ftable_learn(struct vxlan_softc *sc, const struct sockaddr *sa, 696 const uint8_t *mac) 697 { 698 struct rm_priotracker tracker; 699 union vxlan_sockaddr vxlsa; 700 int error; 701 702 /* 703 * The source port may be randomly selected by the remote host, so 704 * use the port of the default destination address. 705 */ 706 vxlan_sockaddr_copy(&vxlsa, sa); 707 vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port; 708 709 if (VXLAN_SOCKADDR_IS_IPV6(&vxlsa)) { 710 error = vxlan_sockaddr_in6_embedscope(&vxlsa); 711 if (error) 712 return (error); 713 } 714 715 VXLAN_RLOCK(sc, &tracker); 716 error = vxlan_ftable_update_locked(sc, &vxlsa, mac, &tracker); 717 VXLAN_UNLOCK(sc, &tracker); 718 719 return (error); 720 } 721 722 static int 723 vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS) 724 { 725 struct rm_priotracker tracker; 726 struct sbuf sb; 727 struct vxlan_softc *sc; 728 struct vxlan_ftable_entry *fe; 729 size_t size; 730 int i, error; 731 732 /* 733 * This is mostly intended for debugging during development. It is 734 * not practical to dump an entire large table this way. 735 */ 736 737 sc = arg1; 738 size = PAGE_SIZE; /* Calculate later. */ 739 740 sbuf_new(&sb, NULL, size, SBUF_FIXEDLEN); 741 sbuf_putc(&sb, '\n'); 742 743 VXLAN_RLOCK(sc, &tracker); 744 for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) { 745 LIST_FOREACH(fe, &sc->vxl_ftable[i], vxlfe_hash) { 746 if (sbuf_error(&sb) != 0) 747 break; 748 vxlan_ftable_entry_dump(fe, &sb); 749 } 750 } 751 VXLAN_RUNLOCK(sc, &tracker); 752 753 if (sbuf_len(&sb) == 1) 754 sbuf_setpos(&sb, 0); 755 756 sbuf_finish(&sb); 757 error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 758 sbuf_delete(&sb); 759 760 return (error); 761 } 762 763 static struct vxlan_ftable_entry * 764 vxlan_ftable_entry_alloc(void) 765 { 766 struct vxlan_ftable_entry *fe; 767 768 fe = malloc(sizeof(*fe), M_VXLAN, M_ZERO | M_NOWAIT); 769 770 return (fe); 771 } 772 773 static void 774 vxlan_ftable_entry_free(struct vxlan_ftable_entry *fe) 775 { 776 777 free(fe, M_VXLAN); 778 } 779 780 static void 781 vxlan_ftable_entry_init(struct vxlan_softc *sc, struct vxlan_ftable_entry *fe, 782 const uint8_t *mac, const struct sockaddr *sa, uint32_t flags) 783 { 784 785 fe->vxlfe_flags = flags; 786 fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout; 787 memcpy(fe->vxlfe_mac, mac, ETHER_ADDR_LEN); 788 vxlan_sockaddr_copy(&fe->vxlfe_raddr, sa); 789 } 790 791 static void 792 vxlan_ftable_entry_destroy(struct vxlan_softc *sc, 793 struct vxlan_ftable_entry *fe) 794 { 795 796 sc->vxl_ftable_cnt--; 797 LIST_REMOVE(fe, vxlfe_hash); 798 vxlan_ftable_entry_free(fe); 799 } 800 801 static int 802 vxlan_ftable_entry_insert(struct vxlan_softc *sc, 803 struct vxlan_ftable_entry *fe) 804 { 805 struct vxlan_ftable_entry *lfe; 806 uint32_t hash; 807 int dir; 808 809 VXLAN_LOCK_WASSERT(sc); 810 hash = VXLAN_SC_FTABLE_HASH(sc, fe->vxlfe_mac); 811 812 lfe = LIST_FIRST(&sc->vxl_ftable[hash]); 813 if (lfe == NULL) { 814 LIST_INSERT_HEAD(&sc->vxl_ftable[hash], fe, vxlfe_hash); 815 goto out; 816 } 817 818 do { 819 dir = vxlan_ftable_addr_cmp(fe->vxlfe_mac, lfe->vxlfe_mac); 820 if (dir == 0) 821 return (EEXIST); 822 if (dir > 0) { 823 LIST_INSERT_BEFORE(lfe, fe, vxlfe_hash); 824 goto out; 825 } else if (LIST_NEXT(lfe, vxlfe_hash) == NULL) { 826 LIST_INSERT_AFTER(lfe, fe, vxlfe_hash); 827 goto out; 828 } else 829 lfe = LIST_NEXT(lfe, vxlfe_hash); 830 } while (lfe != NULL); 831 832 out: 833 sc->vxl_ftable_cnt++; 834 835 return (0); 836 } 837 838 static struct vxlan_ftable_entry * 839 vxlan_ftable_entry_lookup(struct vxlan_softc *sc, const uint8_t *mac) 840 { 841 struct vxlan_ftable_entry *fe; 842 uint32_t hash; 843 int dir; 844 845 VXLAN_LOCK_ASSERT(sc); 846 hash = VXLAN_SC_FTABLE_HASH(sc, mac); 847 848 LIST_FOREACH(fe, &sc->vxl_ftable[hash], vxlfe_hash) { 849 dir = vxlan_ftable_addr_cmp(mac, fe->vxlfe_mac); 850 if (dir == 0) 851 return (fe); 852 if (dir > 0) 853 break; 854 } 855 856 return (NULL); 857 } 858 859 static void 860 vxlan_ftable_entry_dump(struct vxlan_ftable_entry *fe, struct sbuf *sb) 861 { 862 char buf[64]; 863 const union vxlan_sockaddr *sa; 864 const void *addr; 865 int i, len, af, width; 866 867 sa = &fe->vxlfe_raddr; 868 af = sa->sa.sa_family; 869 len = sbuf_len(sb); 870 871 sbuf_printf(sb, "%c 0x%02X ", VXLAN_FE_IS_DYNAMIC(fe) ? 'D' : 'S', 872 fe->vxlfe_flags); 873 874 for (i = 0; i < ETHER_ADDR_LEN - 1; i++) 875 sbuf_printf(sb, "%02X:", fe->vxlfe_mac[i]); 876 sbuf_printf(sb, "%02X ", fe->vxlfe_mac[i]); 877 878 if (af == AF_INET) { 879 addr = &sa->in4.sin_addr; 880 width = INET_ADDRSTRLEN - 1; 881 } else { 882 addr = &sa->in6.sin6_addr; 883 width = INET6_ADDRSTRLEN - 1; 884 } 885 inet_ntop(af, addr, buf, sizeof(buf)); 886 sbuf_printf(sb, "%*s ", width, buf); 887 888 sbuf_printf(sb, "%08jd", (intmax_t)fe->vxlfe_expire); 889 890 sbuf_putc(sb, '\n'); 891 892 /* Truncate a partial line. */ 893 if (sbuf_error(sb) != 0) 894 sbuf_setpos(sb, len); 895 } 896 897 static struct vxlan_socket * 898 vxlan_socket_alloc(const union vxlan_sockaddr *sa) 899 { 900 struct vxlan_socket *vso; 901 int i; 902 903 vso = malloc(sizeof(*vso), M_VXLAN, M_WAITOK | M_ZERO); 904 rm_init(&vso->vxlso_lock, "vxlansorm"); 905 refcount_init(&vso->vxlso_refcnt, 0); 906 for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) 907 LIST_INIT(&vso->vxlso_vni_hash[i]); 908 vso->vxlso_laddr = *sa; 909 910 return (vso); 911 } 912 913 static void 914 vxlan_socket_destroy(struct vxlan_socket *vso) 915 { 916 struct socket *so; 917 #ifdef INVARIANTS 918 int i; 919 struct vxlan_socket_mc_info *mc; 920 921 for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) { 922 mc = &vso->vxlso_mc[i]; 923 KASSERT(mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC, 924 ("%s: socket %p mc[%d] still has address", 925 __func__, vso, i)); 926 } 927 928 for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) { 929 KASSERT(LIST_EMPTY(&vso->vxlso_vni_hash[i]), 930 ("%s: socket %p vni_hash[%d] not empty", 931 __func__, vso, i)); 932 } 933 #endif 934 so = vso->vxlso_sock; 935 if (so != NULL) { 936 vso->vxlso_sock = NULL; 937 soclose(so); 938 } 939 940 rm_destroy(&vso->vxlso_lock); 941 free(vso, M_VXLAN); 942 } 943 944 static void 945 vxlan_socket_release(struct vxlan_socket *vso) 946 { 947 int destroy; 948 949 VXLAN_LIST_LOCK(); 950 destroy = VXLAN_SO_RELEASE(vso); 951 if (destroy != 0) 952 LIST_REMOVE(vso, vxlso_entry); 953 VXLAN_LIST_UNLOCK(); 954 955 if (destroy != 0) 956 vxlan_socket_destroy(vso); 957 } 958 959 static struct vxlan_socket * 960 vxlan_socket_lookup(union vxlan_sockaddr *vxlsa) 961 { 962 struct vxlan_socket *vso; 963 964 VXLAN_LIST_LOCK(); 965 LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry) { 966 if (vxlan_sockaddr_cmp(&vso->vxlso_laddr, &vxlsa->sa) == 0) { 967 VXLAN_SO_ACQUIRE(vso); 968 break; 969 } 970 } 971 VXLAN_LIST_UNLOCK(); 972 973 return (vso); 974 } 975 976 static void 977 vxlan_socket_insert(struct vxlan_socket *vso) 978 { 979 980 VXLAN_LIST_LOCK(); 981 VXLAN_SO_ACQUIRE(vso); 982 LIST_INSERT_HEAD(&vxlan_socket_list, vso, vxlso_entry); 983 VXLAN_LIST_UNLOCK(); 984 } 985 986 static int 987 vxlan_socket_init(struct vxlan_socket *vso, struct ifnet *ifp) 988 { 989 struct thread *td; 990 int error; 991 992 td = curthread; 993 994 error = socreate(vso->vxlso_laddr.sa.sa_family, &vso->vxlso_sock, 995 SOCK_DGRAM, IPPROTO_UDP, td->td_ucred, td); 996 if (error) { 997 if_printf(ifp, "cannot create socket: %d\n", error); 998 return (error); 999 } 1000 1001 error = udp_set_kernel_tunneling(vso->vxlso_sock, 1002 vxlan_rcv_udp_packet, NULL, vso); 1003 if (error) { 1004 if_printf(ifp, "cannot set tunneling function: %d\n", error); 1005 return (error); 1006 } 1007 1008 if (vxlan_reuse_port != 0) { 1009 struct sockopt sopt; 1010 int val = 1; 1011 1012 bzero(&sopt, sizeof(sopt)); 1013 sopt.sopt_dir = SOPT_SET; 1014 sopt.sopt_level = IPPROTO_IP; 1015 sopt.sopt_name = SO_REUSEPORT; 1016 sopt.sopt_val = &val; 1017 sopt.sopt_valsize = sizeof(val); 1018 error = sosetopt(vso->vxlso_sock, &sopt); 1019 if (error) { 1020 if_printf(ifp, 1021 "cannot set REUSEADDR socket opt: %d\n", error); 1022 return (error); 1023 } 1024 } 1025 1026 return (0); 1027 } 1028 1029 static int 1030 vxlan_socket_bind(struct vxlan_socket *vso, struct ifnet *ifp) 1031 { 1032 union vxlan_sockaddr laddr; 1033 struct thread *td; 1034 int error; 1035 1036 td = curthread; 1037 laddr = vso->vxlso_laddr; 1038 1039 error = sobind(vso->vxlso_sock, &laddr.sa, td); 1040 if (error) { 1041 if (error != EADDRINUSE) 1042 if_printf(ifp, "cannot bind socket: %d\n", error); 1043 return (error); 1044 } 1045 1046 return (0); 1047 } 1048 1049 static int 1050 vxlan_socket_create(struct ifnet *ifp, int multicast, 1051 const union vxlan_sockaddr *saddr, struct vxlan_socket **vsop) 1052 { 1053 union vxlan_sockaddr laddr; 1054 struct vxlan_socket *vso; 1055 int error; 1056 1057 laddr = *saddr; 1058 1059 /* 1060 * If this socket will be multicast, then only the local port 1061 * must be specified when binding. 1062 */ 1063 if (multicast != 0) { 1064 if (VXLAN_SOCKADDR_IS_IPV4(&laddr)) 1065 laddr.in4.sin_addr.s_addr = INADDR_ANY; 1066 #ifdef INET6 1067 else 1068 laddr.in6.sin6_addr = in6addr_any; 1069 #endif 1070 } 1071 1072 vso = vxlan_socket_alloc(&laddr); 1073 if (vso == NULL) 1074 return (ENOMEM); 1075 1076 error = vxlan_socket_init(vso, ifp); 1077 if (error) 1078 goto fail; 1079 1080 error = vxlan_socket_bind(vso, ifp); 1081 if (error) 1082 goto fail; 1083 1084 /* 1085 * There is a small window between the bind completing and 1086 * inserting the socket, so that a concurrent create may fail. 1087 * Let's not worry about that for now. 1088 */ 1089 vxlan_socket_insert(vso); 1090 *vsop = vso; 1091 1092 return (0); 1093 1094 fail: 1095 vxlan_socket_destroy(vso); 1096 1097 return (error); 1098 } 1099 1100 static void 1101 vxlan_socket_ifdetach(struct vxlan_socket *vso, struct ifnet *ifp, 1102 struct vxlan_softc_head *list) 1103 { 1104 struct rm_priotracker tracker; 1105 struct vxlan_softc *sc; 1106 int i; 1107 1108 VXLAN_SO_RLOCK(vso, &tracker); 1109 for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) { 1110 LIST_FOREACH(sc, &vso->vxlso_vni_hash[i], vxl_entry) 1111 vxlan_ifdetach(sc, ifp, list); 1112 } 1113 VXLAN_SO_RUNLOCK(vso, &tracker); 1114 } 1115 1116 static struct vxlan_socket * 1117 vxlan_socket_mc_lookup(const union vxlan_sockaddr *vxlsa) 1118 { 1119 union vxlan_sockaddr laddr; 1120 struct vxlan_socket *vso; 1121 1122 laddr = *vxlsa; 1123 1124 if (VXLAN_SOCKADDR_IS_IPV4(&laddr)) 1125 laddr.in4.sin_addr.s_addr = INADDR_ANY; 1126 #ifdef INET6 1127 else 1128 laddr.in6.sin6_addr = in6addr_any; 1129 #endif 1130 1131 vso = vxlan_socket_lookup(&laddr); 1132 1133 return (vso); 1134 } 1135 1136 static int 1137 vxlan_sockaddr_mc_info_match(const struct vxlan_socket_mc_info *mc, 1138 const union vxlan_sockaddr *group, const union vxlan_sockaddr *local, 1139 int ifidx) 1140 { 1141 1142 if (!vxlan_sockaddr_in_any(local) && 1143 !vxlan_sockaddr_in_equal(&mc->vxlsomc_saddr, &local->sa)) 1144 return (0); 1145 if (!vxlan_sockaddr_in_equal(&mc->vxlsomc_gaddr, &group->sa)) 1146 return (0); 1147 if (ifidx != 0 && ifidx != mc->vxlsomc_ifidx) 1148 return (0); 1149 1150 return (1); 1151 } 1152 1153 static int 1154 vxlan_socket_mc_join_group(struct vxlan_socket *vso, 1155 const union vxlan_sockaddr *group, const union vxlan_sockaddr *local, 1156 int *ifidx, union vxlan_sockaddr *source) 1157 { 1158 struct sockopt sopt; 1159 int error; 1160 1161 *source = *local; 1162 1163 if (VXLAN_SOCKADDR_IS_IPV4(group)) { 1164 struct ip_mreq mreq; 1165 1166 mreq.imr_multiaddr = group->in4.sin_addr; 1167 mreq.imr_interface = local->in4.sin_addr; 1168 1169 bzero(&sopt, sizeof(sopt)); 1170 sopt.sopt_dir = SOPT_SET; 1171 sopt.sopt_level = IPPROTO_IP; 1172 sopt.sopt_name = IP_ADD_MEMBERSHIP; 1173 sopt.sopt_val = &mreq; 1174 sopt.sopt_valsize = sizeof(mreq); 1175 error = sosetopt(vso->vxlso_sock, &sopt); 1176 if (error) 1177 return (error); 1178 1179 /* 1180 * BMV: Ideally, there would be a formal way for us to get 1181 * the local interface that was selected based on the 1182 * imr_interface address. We could then update *ifidx so 1183 * vxlan_sockaddr_mc_info_match() would return a match for 1184 * later creates that explicitly set the multicast interface. 1185 * 1186 * If we really need to, we can of course look in the INP's 1187 * membership list: 1188 * sotoinpcb(vso->vxlso_sock)->inp_moptions-> 1189 * imo_head[]->imf_inm->inm_ifp 1190 * similarly to imo_match_group(). 1191 */ 1192 source->in4.sin_addr = local->in4.sin_addr; 1193 1194 } else if (VXLAN_SOCKADDR_IS_IPV6(group)) { 1195 struct ipv6_mreq mreq; 1196 1197 mreq.ipv6mr_multiaddr = group->in6.sin6_addr; 1198 mreq.ipv6mr_interface = *ifidx; 1199 1200 bzero(&sopt, sizeof(sopt)); 1201 sopt.sopt_dir = SOPT_SET; 1202 sopt.sopt_level = IPPROTO_IPV6; 1203 sopt.sopt_name = IPV6_JOIN_GROUP; 1204 sopt.sopt_val = &mreq; 1205 sopt.sopt_valsize = sizeof(mreq); 1206 error = sosetopt(vso->vxlso_sock, &sopt); 1207 if (error) 1208 return (error); 1209 1210 /* 1211 * BMV: As with IPv4, we would really like to know what 1212 * interface in6p_lookup_mcast_ifp() selected. 1213 */ 1214 } else 1215 error = EAFNOSUPPORT; 1216 1217 return (error); 1218 } 1219 1220 static int 1221 vxlan_socket_mc_leave_group(struct vxlan_socket *vso, 1222 const union vxlan_sockaddr *group, const union vxlan_sockaddr *source, 1223 int ifidx) 1224 { 1225 struct sockopt sopt; 1226 int error; 1227 1228 bzero(&sopt, sizeof(sopt)); 1229 sopt.sopt_dir = SOPT_SET; 1230 1231 if (VXLAN_SOCKADDR_IS_IPV4(group)) { 1232 struct ip_mreq mreq; 1233 1234 mreq.imr_multiaddr = group->in4.sin_addr; 1235 mreq.imr_interface = source->in4.sin_addr; 1236 1237 sopt.sopt_level = IPPROTO_IP; 1238 sopt.sopt_name = IP_DROP_MEMBERSHIP; 1239 sopt.sopt_val = &mreq; 1240 sopt.sopt_valsize = sizeof(mreq); 1241 error = sosetopt(vso->vxlso_sock, &sopt); 1242 1243 } else if (VXLAN_SOCKADDR_IS_IPV6(group)) { 1244 struct ipv6_mreq mreq; 1245 1246 mreq.ipv6mr_multiaddr = group->in6.sin6_addr; 1247 mreq.ipv6mr_interface = ifidx; 1248 1249 sopt.sopt_level = IPPROTO_IPV6; 1250 sopt.sopt_name = IPV6_LEAVE_GROUP; 1251 sopt.sopt_val = &mreq; 1252 sopt.sopt_valsize = sizeof(mreq); 1253 error = sosetopt(vso->vxlso_sock, &sopt); 1254 1255 } else 1256 error = EAFNOSUPPORT; 1257 1258 return (error); 1259 } 1260 1261 static int 1262 vxlan_socket_mc_add_group(struct vxlan_socket *vso, 1263 const union vxlan_sockaddr *group, const union vxlan_sockaddr *local, 1264 int ifidx, int *idx) 1265 { 1266 union vxlan_sockaddr source; 1267 struct vxlan_socket_mc_info *mc; 1268 int i, empty, error; 1269 1270 /* 1271 * Within a socket, the same multicast group may be used by multiple 1272 * interfaces, each with a different network identifier. But a socket 1273 * may only join a multicast group once, so keep track of the users 1274 * here. 1275 */ 1276 1277 VXLAN_SO_WLOCK(vso); 1278 for (empty = 0, i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) { 1279 mc = &vso->vxlso_mc[i]; 1280 1281 if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) { 1282 empty++; 1283 continue; 1284 } 1285 1286 if (vxlan_sockaddr_mc_info_match(mc, group, local, ifidx)) 1287 goto out; 1288 } 1289 VXLAN_SO_WUNLOCK(vso); 1290 1291 if (empty == 0) 1292 return (ENOSPC); 1293 1294 error = vxlan_socket_mc_join_group(vso, group, local, &ifidx, &source); 1295 if (error) 1296 return (error); 1297 1298 VXLAN_SO_WLOCK(vso); 1299 for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) { 1300 mc = &vso->vxlso_mc[i]; 1301 1302 if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) { 1303 vxlan_sockaddr_copy(&mc->vxlsomc_gaddr, &group->sa); 1304 vxlan_sockaddr_copy(&mc->vxlsomc_saddr, &source.sa); 1305 mc->vxlsomc_ifidx = ifidx; 1306 goto out; 1307 } 1308 } 1309 VXLAN_SO_WUNLOCK(vso); 1310 1311 error = vxlan_socket_mc_leave_group(vso, group, &source, ifidx); 1312 MPASS(error == 0); 1313 1314 return (ENOSPC); 1315 1316 out: 1317 mc->vxlsomc_users++; 1318 VXLAN_SO_WUNLOCK(vso); 1319 1320 *idx = i; 1321 1322 return (0); 1323 } 1324 1325 static void 1326 vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *vso, int idx) 1327 { 1328 union vxlan_sockaddr group, source; 1329 struct vxlan_socket_mc_info *mc; 1330 int ifidx, leave; 1331 1332 KASSERT(idx >= 0 && idx < VXLAN_SO_MC_MAX_GROUPS, 1333 ("%s: vso %p idx %d out of bounds", __func__, vso, idx)); 1334 1335 leave = 0; 1336 mc = &vso->vxlso_mc[idx]; 1337 1338 VXLAN_SO_WLOCK(vso); 1339 mc->vxlsomc_users--; 1340 if (mc->vxlsomc_users == 0) { 1341 group = mc->vxlsomc_gaddr; 1342 source = mc->vxlsomc_saddr; 1343 ifidx = mc->vxlsomc_ifidx; 1344 bzero(mc, sizeof(*mc)); 1345 leave = 1; 1346 } 1347 VXLAN_SO_WUNLOCK(vso); 1348 1349 if (leave != 0) { 1350 /* 1351 * Our socket's membership in this group may have already 1352 * been removed if we joined through an interface that's 1353 * been detached. 1354 */ 1355 vxlan_socket_mc_leave_group(vso, &group, &source, ifidx); 1356 } 1357 } 1358 1359 static struct vxlan_softc * 1360 vxlan_socket_lookup_softc_locked(struct vxlan_socket *vso, uint32_t vni) 1361 { 1362 struct vxlan_softc *sc; 1363 uint32_t hash; 1364 1365 VXLAN_SO_LOCK_ASSERT(vso); 1366 hash = VXLAN_SO_VNI_HASH(vni); 1367 1368 LIST_FOREACH(sc, &vso->vxlso_vni_hash[hash], vxl_entry) { 1369 if (sc->vxl_vni == vni) { 1370 VXLAN_ACQUIRE(sc); 1371 break; 1372 } 1373 } 1374 1375 return (sc); 1376 } 1377 1378 static struct vxlan_softc * 1379 vxlan_socket_lookup_softc(struct vxlan_socket *vso, uint32_t vni) 1380 { 1381 struct rm_priotracker tracker; 1382 struct vxlan_softc *sc; 1383 1384 VXLAN_SO_RLOCK(vso, &tracker); 1385 sc = vxlan_socket_lookup_softc_locked(vso, vni); 1386 VXLAN_SO_RUNLOCK(vso, &tracker); 1387 1388 return (sc); 1389 } 1390 1391 static int 1392 vxlan_socket_insert_softc(struct vxlan_socket *vso, struct vxlan_softc *sc) 1393 { 1394 struct vxlan_softc *tsc; 1395 uint32_t vni, hash; 1396 1397 vni = sc->vxl_vni; 1398 hash = VXLAN_SO_VNI_HASH(vni); 1399 1400 VXLAN_SO_WLOCK(vso); 1401 tsc = vxlan_socket_lookup_softc_locked(vso, vni); 1402 if (tsc != NULL) { 1403 VXLAN_SO_WUNLOCK(vso); 1404 vxlan_release(tsc); 1405 return (EEXIST); 1406 } 1407 1408 VXLAN_ACQUIRE(sc); 1409 LIST_INSERT_HEAD(&vso->vxlso_vni_hash[hash], sc, vxl_entry); 1410 VXLAN_SO_WUNLOCK(vso); 1411 1412 return (0); 1413 } 1414 1415 static void 1416 vxlan_socket_remove_softc(struct vxlan_socket *vso, struct vxlan_softc *sc) 1417 { 1418 1419 VXLAN_SO_WLOCK(vso); 1420 LIST_REMOVE(sc, vxl_entry); 1421 VXLAN_SO_WUNLOCK(vso); 1422 1423 vxlan_release(sc); 1424 } 1425 1426 static struct ifnet * 1427 vxlan_multicast_if_ref(struct vxlan_softc *sc, int ipv4) 1428 { 1429 struct ifnet *ifp; 1430 1431 VXLAN_LOCK_ASSERT(sc); 1432 1433 if (ipv4 && sc->vxl_im4o != NULL) 1434 ifp = sc->vxl_im4o->imo_multicast_ifp; 1435 else if (!ipv4 && sc->vxl_im6o != NULL) 1436 ifp = sc->vxl_im6o->im6o_multicast_ifp; 1437 else 1438 ifp = NULL; 1439 1440 if (ifp != NULL) 1441 if_ref(ifp); 1442 1443 return (ifp); 1444 } 1445 1446 static void 1447 vxlan_free_multicast(struct vxlan_softc *sc) 1448 { 1449 1450 if (sc->vxl_mc_ifp != NULL) { 1451 if_rele(sc->vxl_mc_ifp); 1452 sc->vxl_mc_ifp = NULL; 1453 sc->vxl_mc_ifindex = 0; 1454 } 1455 1456 if (sc->vxl_im4o != NULL) { 1457 free(sc->vxl_im4o, M_VXLAN); 1458 sc->vxl_im4o = NULL; 1459 } 1460 1461 if (sc->vxl_im6o != NULL) { 1462 free(sc->vxl_im6o, M_VXLAN); 1463 sc->vxl_im6o = NULL; 1464 } 1465 } 1466 1467 static int 1468 vxlan_setup_multicast_interface(struct vxlan_softc *sc) 1469 { 1470 struct ifnet *ifp; 1471 1472 ifp = ifunit_ref(sc->vxl_mc_ifname); 1473 if (ifp == NULL) { 1474 if_printf(sc->vxl_ifp, "multicast interface %s does " 1475 "not exist\n", sc->vxl_mc_ifname); 1476 return (ENOENT); 1477 } 1478 1479 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 1480 if_printf(sc->vxl_ifp, "interface %s does not support " 1481 "multicast\n", sc->vxl_mc_ifname); 1482 if_rele(ifp); 1483 return (ENOTSUP); 1484 } 1485 1486 sc->vxl_mc_ifp = ifp; 1487 sc->vxl_mc_ifindex = ifp->if_index; 1488 1489 return (0); 1490 } 1491 1492 static int 1493 vxlan_setup_multicast(struct vxlan_softc *sc) 1494 { 1495 const union vxlan_sockaddr *group; 1496 int error; 1497 1498 group = &sc->vxl_dst_addr; 1499 error = 0; 1500 1501 if (sc->vxl_mc_ifname[0] != '\0') { 1502 error = vxlan_setup_multicast_interface(sc); 1503 if (error) 1504 return (error); 1505 } 1506 1507 /* 1508 * Initialize an multicast options structure that is sufficiently 1509 * populated for use in the respective IP output routine. This 1510 * structure is typically stored in the socket, but our sockets 1511 * may be shared among multiple interfaces. 1512 */ 1513 if (VXLAN_SOCKADDR_IS_IPV4(group)) { 1514 sc->vxl_im4o = malloc(sizeof(struct ip_moptions), M_VXLAN, 1515 M_ZERO | M_WAITOK); 1516 sc->vxl_im4o->imo_multicast_ifp = sc->vxl_mc_ifp; 1517 sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl; 1518 sc->vxl_im4o->imo_multicast_vif = -1; 1519 } else if (VXLAN_SOCKADDR_IS_IPV6(group)) { 1520 sc->vxl_im6o = malloc(sizeof(struct ip6_moptions), M_VXLAN, 1521 M_ZERO | M_WAITOK); 1522 sc->vxl_im6o->im6o_multicast_ifp = sc->vxl_mc_ifp; 1523 sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl; 1524 } 1525 1526 return (error); 1527 } 1528 1529 static int 1530 vxlan_setup_socket(struct vxlan_softc *sc) 1531 { 1532 struct vxlan_socket *vso; 1533 struct ifnet *ifp; 1534 union vxlan_sockaddr *saddr, *daddr; 1535 int multicast, error; 1536 1537 vso = NULL; 1538 ifp = sc->vxl_ifp; 1539 saddr = &sc->vxl_src_addr; 1540 daddr = &sc->vxl_dst_addr; 1541 1542 multicast = vxlan_sockaddr_in_multicast(daddr); 1543 MPASS(multicast != -1); 1544 sc->vxl_vso_mc_index = -1; 1545 1546 /* 1547 * Try to create the socket. If that fails, attempt to use an 1548 * existing socket. 1549 */ 1550 error = vxlan_socket_create(ifp, multicast, saddr, &vso); 1551 if (error) { 1552 if (multicast != 0) 1553 vso = vxlan_socket_mc_lookup(saddr); 1554 else 1555 vso = vxlan_socket_lookup(saddr); 1556 1557 if (vso == NULL) { 1558 if_printf(ifp, "cannot create socket (error: %d), " 1559 "and no existing socket found\n", error); 1560 goto out; 1561 } 1562 } 1563 1564 if (multicast != 0) { 1565 error = vxlan_setup_multicast(sc); 1566 if (error) 1567 goto out; 1568 1569 error = vxlan_socket_mc_add_group(vso, daddr, saddr, 1570 sc->vxl_mc_ifindex, &sc->vxl_vso_mc_index); 1571 if (error) 1572 goto out; 1573 } 1574 1575 sc->vxl_sock = vso; 1576 error = vxlan_socket_insert_softc(vso, sc); 1577 if (error) { 1578 sc->vxl_sock = NULL; 1579 if_printf(ifp, "network identifier %d already exists in " 1580 "this socket\n", sc->vxl_vni); 1581 goto out; 1582 } 1583 1584 return (0); 1585 1586 out: 1587 if (vso != NULL) { 1588 if (sc->vxl_vso_mc_index != -1) { 1589 vxlan_socket_mc_release_group_by_idx(vso, 1590 sc->vxl_vso_mc_index); 1591 sc->vxl_vso_mc_index = -1; 1592 } 1593 if (multicast != 0) 1594 vxlan_free_multicast(sc); 1595 vxlan_socket_release(vso); 1596 } 1597 1598 return (error); 1599 } 1600 1601 #ifdef INET6 1602 static void 1603 vxlan_setup_zero_checksum_port(struct vxlan_softc *sc) 1604 { 1605 1606 if (!VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_src_addr)) 1607 return; 1608 1609 MPASS(sc->vxl_src_addr.in6.sin6_port != 0); 1610 MPASS(sc->vxl_dst_addr.in6.sin6_port != 0); 1611 1612 if (sc->vxl_src_addr.in6.sin6_port != sc->vxl_dst_addr.in6.sin6_port) { 1613 if_printf(sc->vxl_ifp, "port %d in src address does not match " 1614 "port %d in dst address, rfc6935_port (%d) not updated.\n", 1615 ntohs(sc->vxl_src_addr.in6.sin6_port), 1616 ntohs(sc->vxl_dst_addr.in6.sin6_port), 1617 V_zero_checksum_port); 1618 return; 1619 } 1620 1621 if (V_zero_checksum_port != 0) { 1622 if (V_zero_checksum_port != 1623 ntohs(sc->vxl_src_addr.in6.sin6_port)) { 1624 if_printf(sc->vxl_ifp, "rfc6935_port is already set to " 1625 "%d, cannot set it to %d.\n", V_zero_checksum_port, 1626 ntohs(sc->vxl_src_addr.in6.sin6_port)); 1627 } 1628 return; 1629 } 1630 1631 V_zero_checksum_port = ntohs(sc->vxl_src_addr.in6.sin6_port); 1632 if_printf(sc->vxl_ifp, "rfc6935_port set to %d\n", 1633 V_zero_checksum_port); 1634 } 1635 #endif 1636 1637 static void 1638 vxlan_setup_interface_hdrlen(struct vxlan_softc *sc) 1639 { 1640 struct ifnet *ifp; 1641 1642 VXLAN_LOCK_WASSERT(sc); 1643 1644 ifp = sc->vxl_ifp; 1645 ifp->if_hdrlen = ETHER_HDR_LEN + sizeof(struct vxlanudphdr); 1646 1647 if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr) != 0) 1648 ifp->if_hdrlen += sizeof(struct ip); 1649 else if (VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_dst_addr) != 0) 1650 ifp->if_hdrlen += sizeof(struct ip6_hdr); 1651 1652 if ((sc->vxl_flags & VXLAN_FLAG_USER_MTU) == 0) 1653 ifp->if_mtu = ETHERMTU - ifp->if_hdrlen; 1654 } 1655 1656 static int 1657 vxlan_valid_init_config(struct vxlan_softc *sc) 1658 { 1659 const char *reason; 1660 1661 if (vxlan_check_vni(sc->vxl_vni) != 0) { 1662 reason = "invalid virtual network identifier specified"; 1663 goto fail; 1664 } 1665 1666 if (vxlan_sockaddr_supported(&sc->vxl_src_addr, 1) == 0) { 1667 reason = "source address type is not supported"; 1668 goto fail; 1669 } 1670 1671 if (vxlan_sockaddr_supported(&sc->vxl_dst_addr, 0) == 0) { 1672 reason = "destination address type is not supported"; 1673 goto fail; 1674 } 1675 1676 if (vxlan_sockaddr_in_any(&sc->vxl_dst_addr) != 0) { 1677 reason = "no valid destination address specified"; 1678 goto fail; 1679 } 1680 1681 if (vxlan_sockaddr_in_multicast(&sc->vxl_dst_addr) == 0 && 1682 sc->vxl_mc_ifname[0] != '\0') { 1683 reason = "can only specify interface with a group address"; 1684 goto fail; 1685 } 1686 1687 if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) { 1688 if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_src_addr) ^ 1689 VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr)) { 1690 reason = "source and destination address must both " 1691 "be either IPv4 or IPv6"; 1692 goto fail; 1693 } 1694 } 1695 1696 if (sc->vxl_src_addr.in4.sin_port == 0) { 1697 reason = "local port not specified"; 1698 goto fail; 1699 } 1700 1701 if (sc->vxl_dst_addr.in4.sin_port == 0) { 1702 reason = "remote port not specified"; 1703 goto fail; 1704 } 1705 1706 return (0); 1707 1708 fail: 1709 if_printf(sc->vxl_ifp, "cannot initialize interface: %s\n", reason); 1710 return (EINVAL); 1711 } 1712 1713 static void 1714 vxlan_init_wait(struct vxlan_softc *sc) 1715 { 1716 1717 VXLAN_LOCK_WASSERT(sc); 1718 while (sc->vxl_flags & VXLAN_FLAG_INIT) 1719 rm_sleep(sc, &sc->vxl_lock, 0, "vxlint", hz); 1720 } 1721 1722 static void 1723 vxlan_init_complete(struct vxlan_softc *sc) 1724 { 1725 1726 VXLAN_WLOCK(sc); 1727 sc->vxl_flags &= ~VXLAN_FLAG_INIT; 1728 wakeup(sc); 1729 VXLAN_WUNLOCK(sc); 1730 } 1731 1732 static void 1733 vxlan_init(void *xsc) 1734 { 1735 static const uint8_t empty_mac[ETHER_ADDR_LEN]; 1736 struct vxlan_softc *sc; 1737 struct ifnet *ifp; 1738 1739 sc = xsc; 1740 ifp = sc->vxl_ifp; 1741 1742 sx_xlock(&vxlan_sx); 1743 VXLAN_WLOCK(sc); 1744 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1745 VXLAN_WUNLOCK(sc); 1746 sx_xunlock(&vxlan_sx); 1747 return; 1748 } 1749 sc->vxl_flags |= VXLAN_FLAG_INIT; 1750 VXLAN_WUNLOCK(sc); 1751 1752 if (vxlan_valid_init_config(sc) != 0) 1753 goto out; 1754 1755 if (vxlan_setup_socket(sc) != 0) 1756 goto out; 1757 1758 #ifdef INET6 1759 vxlan_setup_zero_checksum_port(sc); 1760 #endif 1761 1762 /* Initialize the default forwarding entry. */ 1763 vxlan_ftable_entry_init(sc, &sc->vxl_default_fe, empty_mac, 1764 &sc->vxl_dst_addr.sa, VXLAN_FE_FLAG_STATIC); 1765 1766 VXLAN_WLOCK(sc); 1767 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1768 callout_reset(&sc->vxl_callout, vxlan_ftable_prune_period * hz, 1769 vxlan_timer, sc); 1770 VXLAN_WUNLOCK(sc); 1771 1772 if_link_state_change(ifp, LINK_STATE_UP); 1773 1774 EVENTHANDLER_INVOKE(vxlan_start, ifp, sc->vxl_src_addr.in4.sin_family, 1775 ntohs(sc->vxl_src_addr.in4.sin_port)); 1776 out: 1777 vxlan_init_complete(sc); 1778 sx_xunlock(&vxlan_sx); 1779 } 1780 1781 static void 1782 vxlan_release(struct vxlan_softc *sc) 1783 { 1784 1785 /* 1786 * The softc may be destroyed as soon as we release our reference, 1787 * so we cannot serialize the wakeup with the softc lock. We use a 1788 * timeout in our sleeps so a missed wakeup is unfortunate but not 1789 * fatal. 1790 */ 1791 if (VXLAN_RELEASE(sc) != 0) 1792 wakeup(sc); 1793 } 1794 1795 static void 1796 vxlan_teardown_wait(struct vxlan_softc *sc) 1797 { 1798 1799 VXLAN_LOCK_WASSERT(sc); 1800 while (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) 1801 rm_sleep(sc, &sc->vxl_lock, 0, "vxltrn", hz); 1802 } 1803 1804 static void 1805 vxlan_teardown_complete(struct vxlan_softc *sc) 1806 { 1807 1808 VXLAN_WLOCK(sc); 1809 sc->vxl_flags &= ~VXLAN_FLAG_TEARDOWN; 1810 wakeup(sc); 1811 VXLAN_WUNLOCK(sc); 1812 } 1813 1814 static void 1815 vxlan_teardown_locked(struct vxlan_softc *sc) 1816 { 1817 struct ifnet *ifp; 1818 struct vxlan_socket *vso; 1819 bool running; 1820 1821 sx_assert(&vxlan_sx, SA_XLOCKED); 1822 VXLAN_LOCK_WASSERT(sc); 1823 MPASS(sc->vxl_flags & VXLAN_FLAG_TEARDOWN); 1824 1825 ifp = sc->vxl_ifp; 1826 ifp->if_flags &= ~IFF_UP; 1827 running = (ifp->if_drv_flags & IFF_DRV_RUNNING) != 0; 1828 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1829 callout_stop(&sc->vxl_callout); 1830 vso = sc->vxl_sock; 1831 sc->vxl_sock = NULL; 1832 1833 VXLAN_WUNLOCK(sc); 1834 if_link_state_change(ifp, LINK_STATE_DOWN); 1835 if (running) 1836 EVENTHANDLER_INVOKE(vxlan_stop, ifp, 1837 sc->vxl_src_addr.in4.sin_family, 1838 ntohs(sc->vxl_src_addr.in4.sin_port)); 1839 1840 if (vso != NULL) { 1841 vxlan_socket_remove_softc(vso, sc); 1842 1843 if (sc->vxl_vso_mc_index != -1) { 1844 vxlan_socket_mc_release_group_by_idx(vso, 1845 sc->vxl_vso_mc_index); 1846 sc->vxl_vso_mc_index = -1; 1847 } 1848 } 1849 1850 VXLAN_WLOCK(sc); 1851 while (sc->vxl_refcnt != 0) 1852 rm_sleep(sc, &sc->vxl_lock, 0, "vxldrn", hz); 1853 VXLAN_WUNLOCK(sc); 1854 1855 callout_drain(&sc->vxl_callout); 1856 1857 vxlan_free_multicast(sc); 1858 if (vso != NULL) 1859 vxlan_socket_release(vso); 1860 1861 vxlan_teardown_complete(sc); 1862 } 1863 1864 static void 1865 vxlan_teardown(struct vxlan_softc *sc) 1866 { 1867 1868 sx_xlock(&vxlan_sx); 1869 VXLAN_WLOCK(sc); 1870 if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) { 1871 vxlan_teardown_wait(sc); 1872 VXLAN_WUNLOCK(sc); 1873 sx_xunlock(&vxlan_sx); 1874 return; 1875 } 1876 1877 sc->vxl_flags |= VXLAN_FLAG_TEARDOWN; 1878 vxlan_teardown_locked(sc); 1879 sx_xunlock(&vxlan_sx); 1880 } 1881 1882 static void 1883 vxlan_ifdetach(struct vxlan_softc *sc, struct ifnet *ifp, 1884 struct vxlan_softc_head *list) 1885 { 1886 1887 VXLAN_WLOCK(sc); 1888 1889 if (sc->vxl_mc_ifp != ifp) 1890 goto out; 1891 if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) 1892 goto out; 1893 1894 sc->vxl_flags |= VXLAN_FLAG_TEARDOWN; 1895 LIST_INSERT_HEAD(list, sc, vxl_ifdetach_list); 1896 1897 out: 1898 VXLAN_WUNLOCK(sc); 1899 } 1900 1901 static void 1902 vxlan_timer(void *xsc) 1903 { 1904 struct vxlan_softc *sc; 1905 1906 sc = xsc; 1907 VXLAN_LOCK_WASSERT(sc); 1908 1909 vxlan_ftable_expire(sc); 1910 callout_schedule(&sc->vxl_callout, vxlan_ftable_prune_period * hz); 1911 } 1912 1913 static int 1914 vxlan_ioctl_ifflags(struct vxlan_softc *sc) 1915 { 1916 struct ifnet *ifp; 1917 1918 ifp = sc->vxl_ifp; 1919 1920 if (ifp->if_flags & IFF_UP) { 1921 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1922 vxlan_init(sc); 1923 } else { 1924 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 1925 vxlan_teardown(sc); 1926 } 1927 1928 return (0); 1929 } 1930 1931 static int 1932 vxlan_ctrl_get_config(struct vxlan_softc *sc, void *arg) 1933 { 1934 struct rm_priotracker tracker; 1935 struct ifvxlancfg *cfg; 1936 1937 cfg = arg; 1938 bzero(cfg, sizeof(*cfg)); 1939 1940 VXLAN_RLOCK(sc, &tracker); 1941 cfg->vxlc_vni = sc->vxl_vni; 1942 memcpy(&cfg->vxlc_local_sa, &sc->vxl_src_addr, 1943 sizeof(union vxlan_sockaddr)); 1944 memcpy(&cfg->vxlc_remote_sa, &sc->vxl_dst_addr, 1945 sizeof(union vxlan_sockaddr)); 1946 cfg->vxlc_mc_ifindex = sc->vxl_mc_ifindex; 1947 cfg->vxlc_ftable_cnt = sc->vxl_ftable_cnt; 1948 cfg->vxlc_ftable_max = sc->vxl_ftable_max; 1949 cfg->vxlc_ftable_timeout = sc->vxl_ftable_timeout; 1950 cfg->vxlc_port_min = sc->vxl_min_port; 1951 cfg->vxlc_port_max = sc->vxl_max_port; 1952 cfg->vxlc_learn = (sc->vxl_flags & VXLAN_FLAG_LEARN) != 0; 1953 cfg->vxlc_ttl = sc->vxl_ttl; 1954 VXLAN_RUNLOCK(sc, &tracker); 1955 1956 #ifdef INET6 1957 if (VXLAN_SOCKADDR_IS_IPV6(&cfg->vxlc_local_sa)) 1958 sa6_recoverscope(&cfg->vxlc_local_sa.in6); 1959 if (VXLAN_SOCKADDR_IS_IPV6(&cfg->vxlc_remote_sa)) 1960 sa6_recoverscope(&cfg->vxlc_remote_sa.in6); 1961 #endif 1962 1963 return (0); 1964 } 1965 1966 static int 1967 vxlan_ctrl_set_vni(struct vxlan_softc *sc, void *arg) 1968 { 1969 struct ifvxlancmd *cmd; 1970 int error; 1971 1972 cmd = arg; 1973 1974 if (vxlan_check_vni(cmd->vxlcmd_vni) != 0) 1975 return (EINVAL); 1976 1977 VXLAN_WLOCK(sc); 1978 if (vxlan_can_change_config(sc)) { 1979 sc->vxl_vni = cmd->vxlcmd_vni; 1980 error = 0; 1981 } else 1982 error = EBUSY; 1983 VXLAN_WUNLOCK(sc); 1984 1985 return (error); 1986 } 1987 1988 static int 1989 vxlan_ctrl_set_local_addr(struct vxlan_softc *sc, void *arg) 1990 { 1991 struct ifvxlancmd *cmd; 1992 union vxlan_sockaddr *vxlsa; 1993 int error; 1994 1995 cmd = arg; 1996 vxlsa = &cmd->vxlcmd_sa; 1997 1998 if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa)) 1999 return (EINVAL); 2000 if (vxlan_sockaddr_in_multicast(vxlsa) != 0) 2001 return (EINVAL); 2002 if (VXLAN_SOCKADDR_IS_IPV6(vxlsa)) { 2003 error = vxlan_sockaddr_in6_embedscope(vxlsa); 2004 if (error) 2005 return (error); 2006 } 2007 2008 VXLAN_WLOCK(sc); 2009 if (vxlan_can_change_config(sc)) { 2010 vxlan_sockaddr_in_copy(&sc->vxl_src_addr, &vxlsa->sa); 2011 vxlan_set_hwcaps(sc); 2012 error = 0; 2013 } else 2014 error = EBUSY; 2015 VXLAN_WUNLOCK(sc); 2016 2017 return (error); 2018 } 2019 2020 static int 2021 vxlan_ctrl_set_remote_addr(struct vxlan_softc *sc, void *arg) 2022 { 2023 struct ifvxlancmd *cmd; 2024 union vxlan_sockaddr *vxlsa; 2025 int error; 2026 2027 cmd = arg; 2028 vxlsa = &cmd->vxlcmd_sa; 2029 2030 if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa)) 2031 return (EINVAL); 2032 if (VXLAN_SOCKADDR_IS_IPV6(vxlsa)) { 2033 error = vxlan_sockaddr_in6_embedscope(vxlsa); 2034 if (error) 2035 return (error); 2036 } 2037 2038 VXLAN_WLOCK(sc); 2039 if (vxlan_can_change_config(sc)) { 2040 vxlan_sockaddr_in_copy(&sc->vxl_dst_addr, &vxlsa->sa); 2041 vxlan_setup_interface_hdrlen(sc); 2042 error = 0; 2043 } else 2044 error = EBUSY; 2045 VXLAN_WUNLOCK(sc); 2046 2047 return (error); 2048 } 2049 2050 static int 2051 vxlan_ctrl_set_local_port(struct vxlan_softc *sc, void *arg) 2052 { 2053 struct ifvxlancmd *cmd; 2054 int error; 2055 2056 cmd = arg; 2057 2058 if (cmd->vxlcmd_port == 0) 2059 return (EINVAL); 2060 2061 VXLAN_WLOCK(sc); 2062 if (vxlan_can_change_config(sc)) { 2063 sc->vxl_src_addr.in4.sin_port = htons(cmd->vxlcmd_port); 2064 error = 0; 2065 } else 2066 error = EBUSY; 2067 VXLAN_WUNLOCK(sc); 2068 2069 return (error); 2070 } 2071 2072 static int 2073 vxlan_ctrl_set_remote_port(struct vxlan_softc *sc, void *arg) 2074 { 2075 struct ifvxlancmd *cmd; 2076 int error; 2077 2078 cmd = arg; 2079 2080 if (cmd->vxlcmd_port == 0) 2081 return (EINVAL); 2082 2083 VXLAN_WLOCK(sc); 2084 if (vxlan_can_change_config(sc)) { 2085 sc->vxl_dst_addr.in4.sin_port = htons(cmd->vxlcmd_port); 2086 error = 0; 2087 } else 2088 error = EBUSY; 2089 VXLAN_WUNLOCK(sc); 2090 2091 return (error); 2092 } 2093 2094 static int 2095 vxlan_ctrl_set_port_range(struct vxlan_softc *sc, void *arg) 2096 { 2097 struct ifvxlancmd *cmd; 2098 uint16_t min, max; 2099 int error; 2100 2101 cmd = arg; 2102 min = cmd->vxlcmd_port_min; 2103 max = cmd->vxlcmd_port_max; 2104 2105 if (max < min) 2106 return (EINVAL); 2107 2108 VXLAN_WLOCK(sc); 2109 if (vxlan_can_change_config(sc)) { 2110 sc->vxl_min_port = min; 2111 sc->vxl_max_port = max; 2112 error = 0; 2113 } else 2114 error = EBUSY; 2115 VXLAN_WUNLOCK(sc); 2116 2117 return (error); 2118 } 2119 2120 static int 2121 vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *sc, void *arg) 2122 { 2123 struct ifvxlancmd *cmd; 2124 int error; 2125 2126 cmd = arg; 2127 2128 VXLAN_WLOCK(sc); 2129 if (vxlan_check_ftable_timeout(cmd->vxlcmd_ftable_timeout) == 0) { 2130 sc->vxl_ftable_timeout = cmd->vxlcmd_ftable_timeout; 2131 error = 0; 2132 } else 2133 error = EINVAL; 2134 VXLAN_WUNLOCK(sc); 2135 2136 return (error); 2137 } 2138 2139 static int 2140 vxlan_ctrl_set_ftable_max(struct vxlan_softc *sc, void *arg) 2141 { 2142 struct ifvxlancmd *cmd; 2143 int error; 2144 2145 cmd = arg; 2146 2147 VXLAN_WLOCK(sc); 2148 if (vxlan_check_ftable_max(cmd->vxlcmd_ftable_max) == 0) { 2149 sc->vxl_ftable_max = cmd->vxlcmd_ftable_max; 2150 error = 0; 2151 } else 2152 error = EINVAL; 2153 VXLAN_WUNLOCK(sc); 2154 2155 return (error); 2156 } 2157 2158 static int 2159 vxlan_ctrl_set_multicast_if(struct vxlan_softc * sc, void *arg) 2160 { 2161 struct ifvxlancmd *cmd; 2162 int error; 2163 2164 cmd = arg; 2165 2166 VXLAN_WLOCK(sc); 2167 if (vxlan_can_change_config(sc)) { 2168 strlcpy(sc->vxl_mc_ifname, cmd->vxlcmd_ifname, IFNAMSIZ); 2169 vxlan_set_hwcaps(sc); 2170 error = 0; 2171 } else 2172 error = EBUSY; 2173 VXLAN_WUNLOCK(sc); 2174 2175 return (error); 2176 } 2177 2178 static int 2179 vxlan_ctrl_set_ttl(struct vxlan_softc *sc, void *arg) 2180 { 2181 struct ifvxlancmd *cmd; 2182 int error; 2183 2184 cmd = arg; 2185 2186 VXLAN_WLOCK(sc); 2187 if (vxlan_check_ttl(cmd->vxlcmd_ttl) == 0) { 2188 sc->vxl_ttl = cmd->vxlcmd_ttl; 2189 if (sc->vxl_im4o != NULL) 2190 sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl; 2191 if (sc->vxl_im6o != NULL) 2192 sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl; 2193 error = 0; 2194 } else 2195 error = EINVAL; 2196 VXLAN_WUNLOCK(sc); 2197 2198 return (error); 2199 } 2200 2201 static int 2202 vxlan_ctrl_set_learn(struct vxlan_softc *sc, void *arg) 2203 { 2204 struct ifvxlancmd *cmd; 2205 2206 cmd = arg; 2207 2208 VXLAN_WLOCK(sc); 2209 if (cmd->vxlcmd_flags & VXLAN_CMD_FLAG_LEARN) 2210 sc->vxl_flags |= VXLAN_FLAG_LEARN; 2211 else 2212 sc->vxl_flags &= ~VXLAN_FLAG_LEARN; 2213 VXLAN_WUNLOCK(sc); 2214 2215 return (0); 2216 } 2217 2218 static int 2219 vxlan_ctrl_ftable_entry_add(struct vxlan_softc *sc, void *arg) 2220 { 2221 union vxlan_sockaddr vxlsa; 2222 struct ifvxlancmd *cmd; 2223 struct vxlan_ftable_entry *fe; 2224 int error; 2225 2226 cmd = arg; 2227 vxlsa = cmd->vxlcmd_sa; 2228 2229 if (!VXLAN_SOCKADDR_IS_IPV46(&vxlsa)) 2230 return (EINVAL); 2231 if (vxlan_sockaddr_in_any(&vxlsa) != 0) 2232 return (EINVAL); 2233 if (vxlan_sockaddr_in_multicast(&vxlsa) != 0) 2234 return (EINVAL); 2235 /* BMV: We could support both IPv4 and IPv6 later. */ 2236 if (vxlsa.sa.sa_family != sc->vxl_dst_addr.sa.sa_family) 2237 return (EAFNOSUPPORT); 2238 2239 if (VXLAN_SOCKADDR_IS_IPV6(&vxlsa)) { 2240 error = vxlan_sockaddr_in6_embedscope(&vxlsa); 2241 if (error) 2242 return (error); 2243 } 2244 2245 fe = vxlan_ftable_entry_alloc(); 2246 if (fe == NULL) 2247 return (ENOMEM); 2248 2249 if (vxlsa.in4.sin_port == 0) 2250 vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port; 2251 2252 vxlan_ftable_entry_init(sc, fe, cmd->vxlcmd_mac, &vxlsa.sa, 2253 VXLAN_FE_FLAG_STATIC); 2254 2255 VXLAN_WLOCK(sc); 2256 error = vxlan_ftable_entry_insert(sc, fe); 2257 VXLAN_WUNLOCK(sc); 2258 2259 if (error) 2260 vxlan_ftable_entry_free(fe); 2261 2262 return (error); 2263 } 2264 2265 static int 2266 vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *sc, void *arg) 2267 { 2268 struct ifvxlancmd *cmd; 2269 struct vxlan_ftable_entry *fe; 2270 int error; 2271 2272 cmd = arg; 2273 2274 VXLAN_WLOCK(sc); 2275 fe = vxlan_ftable_entry_lookup(sc, cmd->vxlcmd_mac); 2276 if (fe != NULL) { 2277 vxlan_ftable_entry_destroy(sc, fe); 2278 error = 0; 2279 } else 2280 error = ENOENT; 2281 VXLAN_WUNLOCK(sc); 2282 2283 return (error); 2284 } 2285 2286 static int 2287 vxlan_ctrl_flush(struct vxlan_softc *sc, void *arg) 2288 { 2289 struct ifvxlancmd *cmd; 2290 int all; 2291 2292 cmd = arg; 2293 all = cmd->vxlcmd_flags & VXLAN_CMD_FLAG_FLUSH_ALL; 2294 2295 VXLAN_WLOCK(sc); 2296 vxlan_ftable_flush(sc, all); 2297 VXLAN_WUNLOCK(sc); 2298 2299 return (0); 2300 } 2301 2302 static int 2303 vxlan_ioctl_drvspec(struct vxlan_softc *sc, struct ifdrv *ifd, int get) 2304 { 2305 const struct vxlan_control *vc; 2306 union { 2307 struct ifvxlancfg cfg; 2308 struct ifvxlancmd cmd; 2309 } args; 2310 int out, error; 2311 2312 if (ifd->ifd_cmd >= vxlan_control_table_size) 2313 return (EINVAL); 2314 2315 bzero(&args, sizeof(args)); 2316 vc = &vxlan_control_table[ifd->ifd_cmd]; 2317 out = (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) != 0; 2318 2319 if ((get != 0 && out == 0) || (get == 0 && out != 0)) 2320 return (EINVAL); 2321 2322 if (vc->vxlc_flags & VXLAN_CTRL_FLAG_SUSER) { 2323 error = priv_check(curthread, PRIV_NET_VXLAN); 2324 if (error) 2325 return (error); 2326 } 2327 2328 if (ifd->ifd_len != vc->vxlc_argsize || 2329 ifd->ifd_len > sizeof(args)) 2330 return (EINVAL); 2331 2332 if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYIN) { 2333 error = copyin(ifd->ifd_data, &args, ifd->ifd_len); 2334 if (error) 2335 return (error); 2336 } 2337 2338 error = vc->vxlc_func(sc, &args); 2339 if (error) 2340 return (error); 2341 2342 if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) { 2343 error = copyout(&args, ifd->ifd_data, ifd->ifd_len); 2344 if (error) 2345 return (error); 2346 } 2347 2348 return (0); 2349 } 2350 2351 static int 2352 vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 2353 { 2354 struct rm_priotracker tracker; 2355 struct vxlan_softc *sc; 2356 struct ifreq *ifr; 2357 struct ifdrv *ifd; 2358 int error; 2359 2360 sc = ifp->if_softc; 2361 ifr = (struct ifreq *) data; 2362 ifd = (struct ifdrv *) data; 2363 2364 error = 0; 2365 2366 switch (cmd) { 2367 case SIOCADDMULTI: 2368 case SIOCDELMULTI: 2369 break; 2370 2371 case SIOCGDRVSPEC: 2372 case SIOCSDRVSPEC: 2373 error = vxlan_ioctl_drvspec(sc, ifd, cmd == SIOCGDRVSPEC); 2374 break; 2375 2376 case SIOCSIFFLAGS: 2377 error = vxlan_ioctl_ifflags(sc); 2378 break; 2379 2380 case SIOCSIFMEDIA: 2381 case SIOCGIFMEDIA: 2382 error = ifmedia_ioctl(ifp, ifr, &sc->vxl_media, cmd); 2383 break; 2384 2385 case SIOCSIFMTU: 2386 if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > VXLAN_MAX_MTU) { 2387 error = EINVAL; 2388 } else { 2389 VXLAN_WLOCK(sc); 2390 ifp->if_mtu = ifr->ifr_mtu; 2391 sc->vxl_flags |= VXLAN_FLAG_USER_MTU; 2392 VXLAN_WUNLOCK(sc); 2393 } 2394 break; 2395 2396 case SIOCSIFCAP: 2397 VXLAN_WLOCK(sc); 2398 error = vxlan_set_reqcap(sc, ifp, ifr->ifr_reqcap); 2399 if (error == 0) 2400 vxlan_set_hwcaps(sc); 2401 VXLAN_WUNLOCK(sc); 2402 break; 2403 2404 case SIOCGTUNFIB: 2405 VXLAN_RLOCK(sc, &tracker); 2406 ifr->ifr_fib = sc->vxl_fibnum; 2407 VXLAN_RUNLOCK(sc, &tracker); 2408 break; 2409 2410 case SIOCSTUNFIB: 2411 if ((error = priv_check(curthread, PRIV_NET_VXLAN)) != 0) 2412 break; 2413 2414 if (ifr->ifr_fib >= rt_numfibs) 2415 error = EINVAL; 2416 else { 2417 VXLAN_WLOCK(sc); 2418 sc->vxl_fibnum = ifr->ifr_fib; 2419 VXLAN_WUNLOCK(sc); 2420 } 2421 break; 2422 2423 default: 2424 error = ether_ioctl(ifp, cmd, data); 2425 break; 2426 } 2427 2428 return (error); 2429 } 2430 2431 #if defined(INET) || defined(INET6) 2432 static uint16_t 2433 vxlan_pick_source_port(struct vxlan_softc *sc, struct mbuf *m) 2434 { 2435 int range; 2436 uint32_t hash; 2437 2438 range = sc->vxl_max_port - sc->vxl_min_port + 1; 2439 2440 if (M_HASHTYPE_ISHASH(m)) 2441 hash = m->m_pkthdr.flowid; 2442 else 2443 hash = jenkins_hash(m->m_data, ETHER_HDR_LEN, 2444 sc->vxl_port_hash_key); 2445 2446 return (sc->vxl_min_port + (hash % range)); 2447 } 2448 2449 static void 2450 vxlan_encap_header(struct vxlan_softc *sc, struct mbuf *m, int ipoff, 2451 uint16_t srcport, uint16_t dstport) 2452 { 2453 struct vxlanudphdr *hdr; 2454 struct udphdr *udph; 2455 struct vxlan_header *vxh; 2456 int len; 2457 2458 len = m->m_pkthdr.len - ipoff; 2459 MPASS(len >= sizeof(struct vxlanudphdr)); 2460 hdr = mtodo(m, ipoff); 2461 2462 udph = &hdr->vxlh_udp; 2463 udph->uh_sport = srcport; 2464 udph->uh_dport = dstport; 2465 udph->uh_ulen = htons(len); 2466 udph->uh_sum = 0; 2467 2468 vxh = &hdr->vxlh_hdr; 2469 vxh->vxlh_flags = htonl(VXLAN_HDR_FLAGS_VALID_VNI); 2470 vxh->vxlh_vni = htonl(sc->vxl_vni << VXLAN_HDR_VNI_SHIFT); 2471 } 2472 #endif 2473 2474 #if defined(INET6) || defined(INET) 2475 /* 2476 * Return the CSUM_INNER_* equivalent of CSUM_* caps. 2477 */ 2478 static uint32_t 2479 csum_flags_to_inner_flags(uint32_t csum_flags_in, const uint32_t encap) 2480 { 2481 uint32_t csum_flags = encap; 2482 const uint32_t v4 = CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP; 2483 2484 /* 2485 * csum_flags can request either v4 or v6 offload but not both. 2486 * tcp_output always sets CSUM_TSO (both CSUM_IP_TSO and CSUM_IP6_TSO) 2487 * so those bits are no good to detect the IP version. Other bits are 2488 * always set with CSUM_TSO and we use those to figure out the IP 2489 * version. 2490 */ 2491 if (csum_flags_in & v4) { 2492 if (csum_flags_in & CSUM_IP) 2493 csum_flags |= CSUM_INNER_IP; 2494 if (csum_flags_in & CSUM_IP_UDP) 2495 csum_flags |= CSUM_INNER_IP_UDP; 2496 if (csum_flags_in & CSUM_IP_TCP) 2497 csum_flags |= CSUM_INNER_IP_TCP; 2498 if (csum_flags_in & CSUM_IP_TSO) 2499 csum_flags |= CSUM_INNER_IP_TSO; 2500 } else { 2501 #ifdef INVARIANTS 2502 const uint32_t v6 = CSUM_IP6_UDP | CSUM_IP6_TCP; 2503 2504 MPASS((csum_flags_in & v6) != 0); 2505 #endif 2506 if (csum_flags_in & CSUM_IP6_UDP) 2507 csum_flags |= CSUM_INNER_IP6_UDP; 2508 if (csum_flags_in & CSUM_IP6_TCP) 2509 csum_flags |= CSUM_INNER_IP6_TCP; 2510 if (csum_flags_in & CSUM_IP6_TSO) 2511 csum_flags |= CSUM_INNER_IP6_TSO; 2512 } 2513 2514 return (csum_flags); 2515 } 2516 #endif 2517 2518 static int 2519 vxlan_encap4(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa, 2520 struct mbuf *m) 2521 { 2522 #ifdef INET 2523 struct ifnet *ifp; 2524 struct ip *ip; 2525 struct in_addr srcaddr, dstaddr; 2526 uint16_t srcport, dstport; 2527 int plen, mcast, error; 2528 struct route route, *ro; 2529 struct sockaddr_in *sin; 2530 uint32_t csum_flags; 2531 2532 NET_EPOCH_ASSERT(); 2533 2534 ifp = sc->vxl_ifp; 2535 srcaddr = sc->vxl_src_addr.in4.sin_addr; 2536 srcport = vxlan_pick_source_port(sc, m); 2537 dstaddr = fvxlsa->in4.sin_addr; 2538 dstport = fvxlsa->in4.sin_port; 2539 2540 plen = m->m_pkthdr.len; 2541 M_PREPEND(m, sizeof(struct ip) + sizeof(struct vxlanudphdr), 2542 M_NOWAIT); 2543 if (m == NULL) { 2544 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2545 return (ENOBUFS); 2546 } 2547 2548 ip = mtod(m, struct ip *); 2549 ip->ip_tos = 0; 2550 ip->ip_len = htons(m->m_pkthdr.len); 2551 ip->ip_off = 0; 2552 ip->ip_ttl = sc->vxl_ttl; 2553 ip->ip_p = IPPROTO_UDP; 2554 ip->ip_sum = 0; 2555 ip->ip_src = srcaddr; 2556 ip->ip_dst = dstaddr; 2557 2558 vxlan_encap_header(sc, m, sizeof(struct ip), srcport, dstport); 2559 2560 mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0; 2561 m->m_flags &= ~(M_MCAST | M_BCAST); 2562 2563 m->m_pkthdr.csum_flags &= CSUM_FLAGS_TX; 2564 if (m->m_pkthdr.csum_flags != 0) { 2565 /* 2566 * HW checksum (L3 and/or L4) or TSO has been requested. Look 2567 * up the ifnet for the outbound route and verify that the 2568 * outbound ifnet can perform the requested operation on the 2569 * inner frame. 2570 */ 2571 bzero(&route, sizeof(route)); 2572 ro = &route; 2573 sin = (struct sockaddr_in *)&ro->ro_dst; 2574 sin->sin_family = AF_INET; 2575 sin->sin_len = sizeof(*sin); 2576 sin->sin_addr = ip->ip_dst; 2577 ro->ro_nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_NONE, 2578 0); 2579 if (ro->ro_nh == NULL) { 2580 m_freem(m); 2581 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2582 return (EHOSTUNREACH); 2583 } 2584 2585 csum_flags = csum_flags_to_inner_flags(m->m_pkthdr.csum_flags, 2586 CSUM_ENCAP_VXLAN); 2587 if ((csum_flags & ro->ro_nh->nh_ifp->if_hwassist) != 2588 csum_flags) { 2589 if (ppsratecheck(&sc->err_time, &sc->err_pps, 1)) { 2590 const struct ifnet *nh_ifp = ro->ro_nh->nh_ifp; 2591 2592 if_printf(ifp, "interface %s is missing hwcaps " 2593 "0x%08x, csum_flags 0x%08x -> 0x%08x, " 2594 "hwassist 0x%08x\n", nh_ifp->if_xname, 2595 csum_flags & ~(uint32_t)nh_ifp->if_hwassist, 2596 m->m_pkthdr.csum_flags, csum_flags, 2597 (uint32_t)nh_ifp->if_hwassist); 2598 } 2599 m_freem(m); 2600 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2601 return (ENXIO); 2602 } 2603 m->m_pkthdr.csum_flags = csum_flags; 2604 if (csum_flags & 2605 (CSUM_INNER_IP | CSUM_INNER_IP_UDP | CSUM_INNER_IP6_UDP | 2606 CSUM_INNER_IP_TCP | CSUM_INNER_IP6_TCP)) { 2607 counter_u64_add(sc->vxl_stats.txcsum, 1); 2608 if (csum_flags & CSUM_INNER_TSO) 2609 counter_u64_add(sc->vxl_stats.tso, 1); 2610 } 2611 } else 2612 ro = NULL; 2613 error = ip_output(m, NULL, ro, 0, sc->vxl_im4o, NULL); 2614 if (error == 0) { 2615 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 2616 if_inc_counter(ifp, IFCOUNTER_OBYTES, plen); 2617 if (mcast != 0) 2618 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 2619 } else 2620 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2621 2622 return (error); 2623 #else 2624 m_freem(m); 2625 return (ENOTSUP); 2626 #endif 2627 } 2628 2629 static int 2630 vxlan_encap6(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa, 2631 struct mbuf *m) 2632 { 2633 #ifdef INET6 2634 struct ifnet *ifp; 2635 struct ip6_hdr *ip6; 2636 const struct in6_addr *srcaddr, *dstaddr; 2637 uint16_t srcport, dstport; 2638 int plen, mcast, error; 2639 struct route_in6 route, *ro; 2640 struct sockaddr_in6 *sin6; 2641 uint32_t csum_flags; 2642 2643 NET_EPOCH_ASSERT(); 2644 2645 ifp = sc->vxl_ifp; 2646 srcaddr = &sc->vxl_src_addr.in6.sin6_addr; 2647 srcport = vxlan_pick_source_port(sc, m); 2648 dstaddr = &fvxlsa->in6.sin6_addr; 2649 dstport = fvxlsa->in6.sin6_port; 2650 2651 plen = m->m_pkthdr.len; 2652 M_PREPEND(m, sizeof(struct ip6_hdr) + sizeof(struct vxlanudphdr), 2653 M_NOWAIT); 2654 if (m == NULL) { 2655 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2656 return (ENOBUFS); 2657 } 2658 2659 ip6 = mtod(m, struct ip6_hdr *); 2660 ip6->ip6_flow = 0; /* BMV: Keep in forwarding entry? */ 2661 ip6->ip6_vfc = IPV6_VERSION; 2662 ip6->ip6_plen = 0; 2663 ip6->ip6_nxt = IPPROTO_UDP; 2664 ip6->ip6_hlim = sc->vxl_ttl; 2665 ip6->ip6_src = *srcaddr; 2666 ip6->ip6_dst = *dstaddr; 2667 2668 vxlan_encap_header(sc, m, sizeof(struct ip6_hdr), srcport, dstport); 2669 2670 mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0; 2671 m->m_flags &= ~(M_MCAST | M_BCAST); 2672 2673 ro = NULL; 2674 m->m_pkthdr.csum_flags &= CSUM_FLAGS_TX; 2675 if (m->m_pkthdr.csum_flags != 0) { 2676 /* 2677 * HW checksum (L3 and/or L4) or TSO has been requested. Look 2678 * up the ifnet for the outbound route and verify that the 2679 * outbound ifnet can perform the requested operation on the 2680 * inner frame. 2681 */ 2682 bzero(&route, sizeof(route)); 2683 ro = &route; 2684 sin6 = (struct sockaddr_in6 *)&ro->ro_dst; 2685 sin6->sin6_family = AF_INET6; 2686 sin6->sin6_len = sizeof(*sin6); 2687 sin6->sin6_addr = ip6->ip6_dst; 2688 ro->ro_nh = fib6_lookup(M_GETFIB(m), &ip6->ip6_dst, 0, 2689 NHR_NONE, 0); 2690 if (ro->ro_nh == NULL) { 2691 m_freem(m); 2692 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2693 return (EHOSTUNREACH); 2694 } 2695 2696 csum_flags = csum_flags_to_inner_flags(m->m_pkthdr.csum_flags, 2697 CSUM_ENCAP_VXLAN); 2698 if ((csum_flags & ro->ro_nh->nh_ifp->if_hwassist) != 2699 csum_flags) { 2700 if (ppsratecheck(&sc->err_time, &sc->err_pps, 1)) { 2701 const struct ifnet *nh_ifp = ro->ro_nh->nh_ifp; 2702 2703 if_printf(ifp, "interface %s is missing hwcaps " 2704 "0x%08x, csum_flags 0x%08x -> 0x%08x, " 2705 "hwassist 0x%08x\n", nh_ifp->if_xname, 2706 csum_flags & ~(uint32_t)nh_ifp->if_hwassist, 2707 m->m_pkthdr.csum_flags, csum_flags, 2708 (uint32_t)nh_ifp->if_hwassist); 2709 } 2710 m_freem(m); 2711 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2712 return (ENXIO); 2713 } 2714 m->m_pkthdr.csum_flags = csum_flags; 2715 if (csum_flags & 2716 (CSUM_INNER_IP | CSUM_INNER_IP_UDP | CSUM_INNER_IP6_UDP | 2717 CSUM_INNER_IP_TCP | CSUM_INNER_IP6_TCP)) { 2718 counter_u64_add(sc->vxl_stats.txcsum, 1); 2719 if (csum_flags & CSUM_INNER_TSO) 2720 counter_u64_add(sc->vxl_stats.tso, 1); 2721 } 2722 } else if (ntohs(dstport) != V_zero_checksum_port) { 2723 struct udphdr *hdr = mtodo(m, sizeof(struct ip6_hdr)); 2724 2725 hdr->uh_sum = in6_cksum_pseudo(ip6, 2726 m->m_pkthdr.len - sizeof(struct ip6_hdr), IPPROTO_UDP, 0); 2727 m->m_pkthdr.csum_flags = CSUM_UDP_IPV6; 2728 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); 2729 } 2730 error = ip6_output(m, NULL, ro, 0, sc->vxl_im6o, NULL, NULL); 2731 if (error == 0) { 2732 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 2733 if_inc_counter(ifp, IFCOUNTER_OBYTES, plen); 2734 if (mcast != 0) 2735 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 2736 } else 2737 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2738 2739 return (error); 2740 #else 2741 m_freem(m); 2742 return (ENOTSUP); 2743 #endif 2744 } 2745 2746 #define MTAG_VXLAN_LOOP 0x7876706c /* vxlp */ 2747 static int 2748 vxlan_transmit(struct ifnet *ifp, struct mbuf *m) 2749 { 2750 struct rm_priotracker tracker; 2751 union vxlan_sockaddr vxlsa; 2752 struct vxlan_softc *sc; 2753 struct vxlan_ftable_entry *fe; 2754 struct ifnet *mcifp; 2755 struct ether_header *eh; 2756 int ipv4, error; 2757 2758 sc = ifp->if_softc; 2759 eh = mtod(m, struct ether_header *); 2760 fe = NULL; 2761 mcifp = NULL; 2762 2763 ETHER_BPF_MTAP(ifp, m); 2764 2765 VXLAN_RLOCK(sc, &tracker); 2766 M_SETFIB(m, sc->vxl_fibnum); 2767 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 2768 VXLAN_RUNLOCK(sc, &tracker); 2769 m_freem(m); 2770 return (ENETDOWN); 2771 } 2772 if (__predict_false(if_tunnel_check_nesting(ifp, m, MTAG_VXLAN_LOOP, 2773 max_vxlan_nesting) != 0)) { 2774 VXLAN_RUNLOCK(sc, &tracker); 2775 m_freem(m); 2776 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 2777 return (ELOOP); 2778 } 2779 2780 if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) 2781 fe = vxlan_ftable_entry_lookup(sc, eh->ether_dhost); 2782 if (fe == NULL) 2783 fe = &sc->vxl_default_fe; 2784 vxlan_sockaddr_copy(&vxlsa, &fe->vxlfe_raddr.sa); 2785 2786 ipv4 = VXLAN_SOCKADDR_IS_IPV4(&vxlsa) != 0; 2787 if (vxlan_sockaddr_in_multicast(&vxlsa) != 0) 2788 mcifp = vxlan_multicast_if_ref(sc, ipv4); 2789 2790 VXLAN_ACQUIRE(sc); 2791 VXLAN_RUNLOCK(sc, &tracker); 2792 2793 if (ipv4 != 0) 2794 error = vxlan_encap4(sc, &vxlsa, m); 2795 else 2796 error = vxlan_encap6(sc, &vxlsa, m); 2797 2798 vxlan_release(sc); 2799 if (mcifp != NULL) 2800 if_rele(mcifp); 2801 2802 return (error); 2803 } 2804 2805 static void 2806 vxlan_qflush(struct ifnet *ifp __unused) 2807 { 2808 } 2809 2810 static bool 2811 vxlan_rcv_udp_packet(struct mbuf *m, int offset, struct inpcb *inpcb, 2812 const struct sockaddr *srcsa, void *xvso) 2813 { 2814 struct vxlan_socket *vso; 2815 struct vxlan_header *vxh, vxlanhdr; 2816 uint32_t vni; 2817 int error __unused; 2818 2819 M_ASSERTPKTHDR(m); 2820 vso = xvso; 2821 offset += sizeof(struct udphdr); 2822 2823 if (m->m_pkthdr.len < offset + sizeof(struct vxlan_header)) 2824 goto out; 2825 2826 if (__predict_false(m->m_len < offset + sizeof(struct vxlan_header))) { 2827 m_copydata(m, offset, sizeof(struct vxlan_header), 2828 (caddr_t) &vxlanhdr); 2829 vxh = &vxlanhdr; 2830 } else 2831 vxh = mtodo(m, offset); 2832 2833 /* 2834 * Drop if there is a reserved bit set in either the flags or VNI 2835 * fields of the header. This goes against the specification, but 2836 * a bit set may indicate an unsupported new feature. This matches 2837 * the behavior of the Linux implementation. 2838 */ 2839 if (vxh->vxlh_flags != htonl(VXLAN_HDR_FLAGS_VALID_VNI) || 2840 vxh->vxlh_vni & ~VXLAN_VNI_MASK) 2841 goto out; 2842 2843 vni = ntohl(vxh->vxlh_vni) >> VXLAN_HDR_VNI_SHIFT; 2844 2845 /* Adjust to the start of the inner Ethernet frame. */ 2846 m_adj_decap(m, offset + sizeof(struct vxlan_header)); 2847 2848 error = vxlan_input(vso, vni, &m, srcsa); 2849 MPASS(error != 0 || m == NULL); 2850 2851 out: 2852 if (m != NULL) 2853 m_freem(m); 2854 2855 return (true); 2856 } 2857 2858 static int 2859 vxlan_input(struct vxlan_socket *vso, uint32_t vni, struct mbuf **m0, 2860 const struct sockaddr *sa) 2861 { 2862 struct vxlan_softc *sc; 2863 struct ifnet *ifp; 2864 struct mbuf *m; 2865 struct ether_header *eh; 2866 int error; 2867 2868 m = *m0; 2869 2870 if (m->m_pkthdr.len < ETHER_HDR_LEN) 2871 return (EINVAL); 2872 2873 sc = vxlan_socket_lookup_softc(vso, vni); 2874 if (sc == NULL) 2875 return (ENOENT); 2876 2877 ifp = sc->vxl_ifp; 2878 if (m->m_len < ETHER_HDR_LEN && 2879 (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) { 2880 *m0 = NULL; 2881 error = ENOBUFS; 2882 goto out; 2883 } 2884 eh = mtod(m, struct ether_header *); 2885 2886 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 2887 error = ENETDOWN; 2888 goto out; 2889 } else if (ifp == m->m_pkthdr.rcvif) { 2890 /* XXX Does not catch more complex loops. */ 2891 error = EDEADLK; 2892 goto out; 2893 } 2894 2895 if (sc->vxl_flags & VXLAN_FLAG_LEARN) 2896 vxlan_ftable_learn(sc, sa, eh->ether_shost); 2897 2898 m_clrprotoflags(m); 2899 m->m_pkthdr.rcvif = ifp; 2900 M_SETFIB(m, ifp->if_fib); 2901 if (((ifp->if_capenable & IFCAP_RXCSUM && 2902 m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC) || 2903 (ifp->if_capenable & IFCAP_RXCSUM_IPV6 && 2904 !(m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC)))) { 2905 uint32_t csum_flags = 0; 2906 2907 if (m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC) 2908 csum_flags |= CSUM_L3_CALC; 2909 if (m->m_pkthdr.csum_flags & CSUM_INNER_L3_VALID) 2910 csum_flags |= CSUM_L3_VALID; 2911 if (m->m_pkthdr.csum_flags & CSUM_INNER_L4_CALC) 2912 csum_flags |= CSUM_L4_CALC; 2913 if (m->m_pkthdr.csum_flags & CSUM_INNER_L4_VALID) 2914 csum_flags |= CSUM_L4_VALID; 2915 m->m_pkthdr.csum_flags = csum_flags; 2916 counter_u64_add(sc->vxl_stats.rxcsum, 1); 2917 } else { 2918 /* clear everything */ 2919 m->m_pkthdr.csum_flags = 0; 2920 m->m_pkthdr.csum_data = 0; 2921 } 2922 2923 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 2924 (*ifp->if_input)(ifp, m); 2925 *m0 = NULL; 2926 error = 0; 2927 2928 out: 2929 vxlan_release(sc); 2930 return (error); 2931 } 2932 2933 static void 2934 vxlan_stats_alloc(struct vxlan_softc *sc) 2935 { 2936 struct vxlan_statistics *stats = &sc->vxl_stats; 2937 2938 stats->txcsum = counter_u64_alloc(M_WAITOK); 2939 stats->tso = counter_u64_alloc(M_WAITOK); 2940 stats->rxcsum = counter_u64_alloc(M_WAITOK); 2941 } 2942 2943 static void 2944 vxlan_stats_free(struct vxlan_softc *sc) 2945 { 2946 struct vxlan_statistics *stats = &sc->vxl_stats; 2947 2948 counter_u64_free(stats->txcsum); 2949 counter_u64_free(stats->tso); 2950 counter_u64_free(stats->rxcsum); 2951 } 2952 2953 static void 2954 vxlan_set_default_config(struct vxlan_softc *sc) 2955 { 2956 2957 sc->vxl_flags |= VXLAN_FLAG_LEARN; 2958 2959 sc->vxl_vni = VXLAN_VNI_MAX; 2960 sc->vxl_ttl = IPDEFTTL; 2961 2962 if (!vxlan_tunable_int(sc, "legacy_port", vxlan_legacy_port)) { 2963 sc->vxl_src_addr.in4.sin_port = htons(VXLAN_PORT); 2964 sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_PORT); 2965 } else { 2966 sc->vxl_src_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT); 2967 sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT); 2968 } 2969 2970 sc->vxl_min_port = V_ipport_firstauto; 2971 sc->vxl_max_port = V_ipport_lastauto; 2972 2973 sc->vxl_ftable_max = VXLAN_FTABLE_MAX; 2974 sc->vxl_ftable_timeout = VXLAN_FTABLE_TIMEOUT; 2975 } 2976 2977 static int 2978 vxlan_set_user_config(struct vxlan_softc *sc, struct ifvxlanparam *vxlp) 2979 { 2980 2981 #ifndef INET 2982 if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR4 | 2983 VXLAN_PARAM_WITH_REMOTE_ADDR4)) 2984 return (EAFNOSUPPORT); 2985 #endif 2986 2987 #ifndef INET6 2988 if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR6 | 2989 VXLAN_PARAM_WITH_REMOTE_ADDR6)) 2990 return (EAFNOSUPPORT); 2991 #else 2992 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) { 2993 int error = vxlan_sockaddr_in6_embedscope(&vxlp->vxlp_local_sa); 2994 if (error) 2995 return (error); 2996 } 2997 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) { 2998 int error = vxlan_sockaddr_in6_embedscope( 2999 &vxlp->vxlp_remote_sa); 3000 if (error) 3001 return (error); 3002 } 3003 #endif 3004 3005 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_VNI) { 3006 if (vxlan_check_vni(vxlp->vxlp_vni) == 0) 3007 sc->vxl_vni = vxlp->vxlp_vni; 3008 } 3009 3010 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR4) { 3011 sc->vxl_src_addr.in4.sin_len = sizeof(struct sockaddr_in); 3012 sc->vxl_src_addr.in4.sin_family = AF_INET; 3013 sc->vxl_src_addr.in4.sin_addr = 3014 vxlp->vxlp_local_sa.in4.sin_addr; 3015 } else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) { 3016 sc->vxl_src_addr.in6.sin6_len = sizeof(struct sockaddr_in6); 3017 sc->vxl_src_addr.in6.sin6_family = AF_INET6; 3018 sc->vxl_src_addr.in6.sin6_addr = 3019 vxlp->vxlp_local_sa.in6.sin6_addr; 3020 } 3021 3022 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR4) { 3023 sc->vxl_dst_addr.in4.sin_len = sizeof(struct sockaddr_in); 3024 sc->vxl_dst_addr.in4.sin_family = AF_INET; 3025 sc->vxl_dst_addr.in4.sin_addr = 3026 vxlp->vxlp_remote_sa.in4.sin_addr; 3027 } else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) { 3028 sc->vxl_dst_addr.in6.sin6_len = sizeof(struct sockaddr_in6); 3029 sc->vxl_dst_addr.in6.sin6_family = AF_INET6; 3030 sc->vxl_dst_addr.in6.sin6_addr = 3031 vxlp->vxlp_remote_sa.in6.sin6_addr; 3032 } 3033 3034 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_PORT) 3035 sc->vxl_src_addr.in4.sin_port = htons(vxlp->vxlp_local_port); 3036 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_PORT) 3037 sc->vxl_dst_addr.in4.sin_port = htons(vxlp->vxlp_remote_port); 3038 3039 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_PORT_RANGE) { 3040 if (vxlp->vxlp_min_port <= vxlp->vxlp_max_port) { 3041 sc->vxl_min_port = vxlp->vxlp_min_port; 3042 sc->vxl_max_port = vxlp->vxlp_max_port; 3043 } 3044 } 3045 3046 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_MULTICAST_IF) 3047 strlcpy(sc->vxl_mc_ifname, vxlp->vxlp_mc_ifname, IFNAMSIZ); 3048 3049 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_TIMEOUT) { 3050 if (vxlan_check_ftable_timeout(vxlp->vxlp_ftable_timeout) == 0) 3051 sc->vxl_ftable_timeout = vxlp->vxlp_ftable_timeout; 3052 } 3053 3054 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_MAX) { 3055 if (vxlan_check_ftable_max(vxlp->vxlp_ftable_max) == 0) 3056 sc->vxl_ftable_max = vxlp->vxlp_ftable_max; 3057 } 3058 3059 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_TTL) { 3060 if (vxlan_check_ttl(vxlp->vxlp_ttl) == 0) 3061 sc->vxl_ttl = vxlp->vxlp_ttl; 3062 } 3063 3064 if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LEARN) { 3065 if (vxlp->vxlp_learn == 0) 3066 sc->vxl_flags &= ~VXLAN_FLAG_LEARN; 3067 } 3068 3069 return (0); 3070 } 3071 3072 static int 3073 vxlan_set_reqcap(struct vxlan_softc *sc, struct ifnet *ifp, int reqcap) 3074 { 3075 int mask = reqcap ^ ifp->if_capenable; 3076 3077 /* Disable TSO if tx checksums are disabled. */ 3078 if (mask & IFCAP_TXCSUM && !(reqcap & IFCAP_TXCSUM) && 3079 reqcap & IFCAP_TSO4) { 3080 reqcap &= ~IFCAP_TSO4; 3081 if_printf(ifp, "tso4 disabled due to -txcsum.\n"); 3082 } 3083 if (mask & IFCAP_TXCSUM_IPV6 && !(reqcap & IFCAP_TXCSUM_IPV6) && 3084 reqcap & IFCAP_TSO6) { 3085 reqcap &= ~IFCAP_TSO6; 3086 if_printf(ifp, "tso6 disabled due to -txcsum6.\n"); 3087 } 3088 3089 /* Do not enable TSO if tx checksums are disabled. */ 3090 if (mask & IFCAP_TSO4 && reqcap & IFCAP_TSO4 && 3091 !(reqcap & IFCAP_TXCSUM)) { 3092 if_printf(ifp, "enable txcsum first.\n"); 3093 return (EAGAIN); 3094 } 3095 if (mask & IFCAP_TSO6 && reqcap & IFCAP_TSO6 && 3096 !(reqcap & IFCAP_TXCSUM_IPV6)) { 3097 if_printf(ifp, "enable txcsum6 first.\n"); 3098 return (EAGAIN); 3099 } 3100 3101 sc->vxl_reqcap = reqcap; 3102 return (0); 3103 } 3104 3105 /* 3106 * A VXLAN interface inherits the capabilities of the vxlandev or the interface 3107 * hosting the vxlanlocal address. 3108 */ 3109 static void 3110 vxlan_set_hwcaps(struct vxlan_softc *sc) 3111 { 3112 struct epoch_tracker et; 3113 struct ifnet *p; 3114 struct ifaddr *ifa; 3115 u_long hwa; 3116 int cap, ena; 3117 bool rel; 3118 struct ifnet *ifp = sc->vxl_ifp; 3119 3120 /* reset caps */ 3121 ifp->if_capabilities &= VXLAN_BASIC_IFCAPS; 3122 ifp->if_capenable &= VXLAN_BASIC_IFCAPS; 3123 ifp->if_hwassist = 0; 3124 3125 NET_EPOCH_ENTER(et); 3126 CURVNET_SET(ifp->if_vnet); 3127 3128 rel = false; 3129 p = NULL; 3130 if (sc->vxl_mc_ifname[0] != '\0') { 3131 rel = true; 3132 p = ifunit_ref(sc->vxl_mc_ifname); 3133 } else if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) { 3134 if (sc->vxl_src_addr.sa.sa_family == AF_INET) { 3135 struct sockaddr_in in4 = sc->vxl_src_addr.in4; 3136 3137 in4.sin_port = 0; 3138 ifa = ifa_ifwithaddr((struct sockaddr *)&in4); 3139 if (ifa != NULL) 3140 p = ifa->ifa_ifp; 3141 } else if (sc->vxl_src_addr.sa.sa_family == AF_INET6) { 3142 struct sockaddr_in6 in6 = sc->vxl_src_addr.in6; 3143 3144 in6.sin6_port = 0; 3145 ifa = ifa_ifwithaddr((struct sockaddr *)&in6); 3146 if (ifa != NULL) 3147 p = ifa->ifa_ifp; 3148 } 3149 } 3150 if (p == NULL) 3151 goto done; 3152 3153 cap = ena = hwa = 0; 3154 3155 /* checksum offload */ 3156 if (p->if_capabilities & IFCAP_VXLAN_HWCSUM) 3157 cap |= p->if_capabilities & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6); 3158 if (p->if_capenable & IFCAP_VXLAN_HWCSUM) { 3159 ena |= sc->vxl_reqcap & p->if_capenable & 3160 (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6); 3161 if (ena & IFCAP_TXCSUM) { 3162 if (p->if_hwassist & CSUM_INNER_IP) 3163 hwa |= CSUM_IP; 3164 if (p->if_hwassist & CSUM_INNER_IP_UDP) 3165 hwa |= CSUM_IP_UDP; 3166 if (p->if_hwassist & CSUM_INNER_IP_TCP) 3167 hwa |= CSUM_IP_TCP; 3168 } 3169 if (ena & IFCAP_TXCSUM_IPV6) { 3170 if (p->if_hwassist & CSUM_INNER_IP6_UDP) 3171 hwa |= CSUM_IP6_UDP; 3172 if (p->if_hwassist & CSUM_INNER_IP6_TCP) 3173 hwa |= CSUM_IP6_TCP; 3174 } 3175 } 3176 3177 /* hardware TSO */ 3178 if (p->if_capabilities & IFCAP_VXLAN_HWTSO) { 3179 cap |= p->if_capabilities & IFCAP_TSO; 3180 if (p->if_hw_tsomax > IP_MAXPACKET - ifp->if_hdrlen) 3181 ifp->if_hw_tsomax = IP_MAXPACKET - ifp->if_hdrlen; 3182 else 3183 ifp->if_hw_tsomax = p->if_hw_tsomax; 3184 /* XXX: tsomaxsegcount decrement is cxgbe specific */ 3185 ifp->if_hw_tsomaxsegcount = p->if_hw_tsomaxsegcount - 1; 3186 ifp->if_hw_tsomaxsegsize = p->if_hw_tsomaxsegsize; 3187 } 3188 if (p->if_capenable & IFCAP_VXLAN_HWTSO) { 3189 ena |= sc->vxl_reqcap & p->if_capenable & IFCAP_TSO; 3190 if (ena & IFCAP_TSO) { 3191 if (p->if_hwassist & CSUM_INNER_IP_TSO) 3192 hwa |= CSUM_IP_TSO; 3193 if (p->if_hwassist & CSUM_INNER_IP6_TSO) 3194 hwa |= CSUM_IP6_TSO; 3195 } 3196 } 3197 3198 ifp->if_capabilities |= cap; 3199 ifp->if_capenable |= ena; 3200 ifp->if_hwassist |= hwa; 3201 if (rel) 3202 if_rele(p); 3203 done: 3204 CURVNET_RESTORE(); 3205 NET_EPOCH_EXIT(et); 3206 } 3207 3208 static int 3209 vxlan_clone_create(struct if_clone *ifc, char *name, size_t len, 3210 struct ifc_data *ifd, struct ifnet **ifpp) 3211 { 3212 struct vxlan_softc *sc; 3213 struct ifnet *ifp; 3214 struct ifvxlanparam vxlp; 3215 int error; 3216 3217 sc = malloc(sizeof(struct vxlan_softc), M_VXLAN, M_WAITOK | M_ZERO); 3218 sc->vxl_unit = ifd->unit; 3219 sc->vxl_fibnum = curthread->td_proc->p_fibnum; 3220 vxlan_set_default_config(sc); 3221 3222 if (ifd->params != NULL) { 3223 error = ifc_copyin(ifd, &vxlp, sizeof(vxlp)); 3224 if (error) 3225 goto fail; 3226 3227 error = vxlan_set_user_config(sc, &vxlp); 3228 if (error) 3229 goto fail; 3230 } 3231 3232 vxlan_stats_alloc(sc); 3233 ifp = if_alloc(IFT_ETHER); 3234 sc->vxl_ifp = ifp; 3235 rm_init(&sc->vxl_lock, "vxlanrm"); 3236 callout_init_rw(&sc->vxl_callout, &sc->vxl_lock, 0); 3237 sc->vxl_port_hash_key = arc4random(); 3238 vxlan_ftable_init(sc); 3239 3240 vxlan_sysctl_setup(sc); 3241 3242 ifp->if_softc = sc; 3243 if_initname(ifp, vxlan_name, ifd->unit); 3244 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 3245 ifp->if_init = vxlan_init; 3246 ifp->if_ioctl = vxlan_ioctl; 3247 ifp->if_transmit = vxlan_transmit; 3248 ifp->if_qflush = vxlan_qflush; 3249 ifp->if_capabilities = VXLAN_BASIC_IFCAPS; 3250 ifp->if_capenable = VXLAN_BASIC_IFCAPS; 3251 sc->vxl_reqcap = -1; 3252 vxlan_set_hwcaps(sc); 3253 3254 ifmedia_init(&sc->vxl_media, 0, vxlan_media_change, vxlan_media_status); 3255 ifmedia_add(&sc->vxl_media, IFM_ETHER | IFM_AUTO, 0, NULL); 3256 ifmedia_set(&sc->vxl_media, IFM_ETHER | IFM_AUTO); 3257 3258 ether_gen_addr(ifp, &sc->vxl_hwaddr); 3259 ether_ifattach(ifp, sc->vxl_hwaddr.octet); 3260 3261 ifp->if_baudrate = 0; 3262 3263 VXLAN_WLOCK(sc); 3264 vxlan_setup_interface_hdrlen(sc); 3265 VXLAN_WUNLOCK(sc); 3266 *ifpp = ifp; 3267 3268 return (0); 3269 3270 fail: 3271 free(sc, M_VXLAN); 3272 return (error); 3273 } 3274 3275 static int 3276 vxlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags) 3277 { 3278 struct vxlan_softc *sc; 3279 3280 sc = ifp->if_softc; 3281 3282 vxlan_teardown(sc); 3283 3284 vxlan_ftable_flush(sc, 1); 3285 3286 ether_ifdetach(ifp); 3287 if_free(ifp); 3288 ifmedia_removeall(&sc->vxl_media); 3289 3290 vxlan_ftable_fini(sc); 3291 3292 vxlan_sysctl_destroy(sc); 3293 rm_destroy(&sc->vxl_lock); 3294 vxlan_stats_free(sc); 3295 free(sc, M_VXLAN); 3296 3297 return (0); 3298 } 3299 3300 /* BMV: Taken from if_bridge. */ 3301 static uint32_t 3302 vxlan_mac_hash(struct vxlan_softc *sc, const uint8_t *addr) 3303 { 3304 uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->vxl_ftable_hash_key; 3305 3306 b += addr[5] << 8; 3307 b += addr[4]; 3308 a += addr[3] << 24; 3309 a += addr[2] << 16; 3310 a += addr[1] << 8; 3311 a += addr[0]; 3312 3313 /* 3314 * The following hash function is adapted from "Hash Functions" by Bob Jenkins 3315 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997). 3316 */ 3317 #define mix(a, b, c) \ 3318 do { \ 3319 a -= b; a -= c; a ^= (c >> 13); \ 3320 b -= c; b -= a; b ^= (a << 8); \ 3321 c -= a; c -= b; c ^= (b >> 13); \ 3322 a -= b; a -= c; a ^= (c >> 12); \ 3323 b -= c; b -= a; b ^= (a << 16); \ 3324 c -= a; c -= b; c ^= (b >> 5); \ 3325 a -= b; a -= c; a ^= (c >> 3); \ 3326 b -= c; b -= a; b ^= (a << 10); \ 3327 c -= a; c -= b; c ^= (b >> 15); \ 3328 } while (0) 3329 3330 mix(a, b, c); 3331 3332 #undef mix 3333 3334 return (c); 3335 } 3336 3337 static int 3338 vxlan_media_change(struct ifnet *ifp) 3339 { 3340 3341 /* Ignore. */ 3342 return (0); 3343 } 3344 3345 static void 3346 vxlan_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3347 { 3348 3349 ifmr->ifm_status = IFM_ACTIVE | IFM_AVALID; 3350 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 3351 } 3352 3353 static int 3354 vxlan_sockaddr_cmp(const union vxlan_sockaddr *vxladdr, 3355 const struct sockaddr *sa) 3356 { 3357 3358 return (bcmp(&vxladdr->sa, sa, vxladdr->sa.sa_len)); 3359 } 3360 3361 static void 3362 vxlan_sockaddr_copy(union vxlan_sockaddr *vxladdr, 3363 const struct sockaddr *sa) 3364 { 3365 3366 MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6); 3367 bzero(vxladdr, sizeof(*vxladdr)); 3368 3369 if (sa->sa_family == AF_INET) { 3370 vxladdr->in4 = *satoconstsin(sa); 3371 vxladdr->in4.sin_len = sizeof(struct sockaddr_in); 3372 } else if (sa->sa_family == AF_INET6) { 3373 vxladdr->in6 = *satoconstsin6(sa); 3374 vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6); 3375 } 3376 } 3377 3378 static int 3379 vxlan_sockaddr_in_equal(const union vxlan_sockaddr *vxladdr, 3380 const struct sockaddr *sa) 3381 { 3382 int equal; 3383 3384 if (sa->sa_family == AF_INET) { 3385 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr; 3386 equal = in4->s_addr == vxladdr->in4.sin_addr.s_addr; 3387 } else if (sa->sa_family == AF_INET6) { 3388 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr; 3389 equal = IN6_ARE_ADDR_EQUAL(in6, &vxladdr->in6.sin6_addr); 3390 } else 3391 equal = 0; 3392 3393 return (equal); 3394 } 3395 3396 static void 3397 vxlan_sockaddr_in_copy(union vxlan_sockaddr *vxladdr, 3398 const struct sockaddr *sa) 3399 { 3400 3401 MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6); 3402 3403 if (sa->sa_family == AF_INET) { 3404 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr; 3405 vxladdr->in4.sin_family = AF_INET; 3406 vxladdr->in4.sin_len = sizeof(struct sockaddr_in); 3407 vxladdr->in4.sin_addr = *in4; 3408 } else if (sa->sa_family == AF_INET6) { 3409 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr; 3410 vxladdr->in6.sin6_family = AF_INET6; 3411 vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6); 3412 vxladdr->in6.sin6_addr = *in6; 3413 } 3414 } 3415 3416 static int 3417 vxlan_sockaddr_supported(const union vxlan_sockaddr *vxladdr, int unspec) 3418 { 3419 const struct sockaddr *sa; 3420 int supported; 3421 3422 sa = &vxladdr->sa; 3423 supported = 0; 3424 3425 if (sa->sa_family == AF_UNSPEC && unspec != 0) { 3426 supported = 1; 3427 } else if (sa->sa_family == AF_INET) { 3428 #ifdef INET 3429 supported = 1; 3430 #endif 3431 } else if (sa->sa_family == AF_INET6) { 3432 #ifdef INET6 3433 supported = 1; 3434 #endif 3435 } 3436 3437 return (supported); 3438 } 3439 3440 static int 3441 vxlan_sockaddr_in_any(const union vxlan_sockaddr *vxladdr) 3442 { 3443 const struct sockaddr *sa; 3444 int any; 3445 3446 sa = &vxladdr->sa; 3447 3448 if (sa->sa_family == AF_INET) { 3449 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr; 3450 any = in4->s_addr == INADDR_ANY; 3451 } else if (sa->sa_family == AF_INET6) { 3452 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr; 3453 any = IN6_IS_ADDR_UNSPECIFIED(in6); 3454 } else 3455 any = -1; 3456 3457 return (any); 3458 } 3459 3460 static int 3461 vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *vxladdr) 3462 { 3463 const struct sockaddr *sa; 3464 int mc; 3465 3466 sa = &vxladdr->sa; 3467 3468 if (sa->sa_family == AF_INET) { 3469 const struct in_addr *in4 = &satoconstsin(sa)->sin_addr; 3470 mc = IN_MULTICAST(ntohl(in4->s_addr)); 3471 } else if (sa->sa_family == AF_INET6) { 3472 const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr; 3473 mc = IN6_IS_ADDR_MULTICAST(in6); 3474 } else 3475 mc = -1; 3476 3477 return (mc); 3478 } 3479 3480 static int 3481 vxlan_sockaddr_in6_embedscope(union vxlan_sockaddr *vxladdr) 3482 { 3483 int error; 3484 3485 MPASS(VXLAN_SOCKADDR_IS_IPV6(vxladdr)); 3486 #ifdef INET6 3487 error = sa6_embedscope(&vxladdr->in6, V_ip6_use_defzone); 3488 #else 3489 error = EAFNOSUPPORT; 3490 #endif 3491 3492 return (error); 3493 } 3494 3495 static int 3496 vxlan_can_change_config(struct vxlan_softc *sc) 3497 { 3498 struct ifnet *ifp; 3499 3500 ifp = sc->vxl_ifp; 3501 VXLAN_LOCK_ASSERT(sc); 3502 3503 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 3504 return (0); 3505 if (sc->vxl_flags & (VXLAN_FLAG_INIT | VXLAN_FLAG_TEARDOWN)) 3506 return (0); 3507 3508 return (1); 3509 } 3510 3511 static int 3512 vxlan_check_vni(uint32_t vni) 3513 { 3514 3515 return (vni >= VXLAN_VNI_MAX); 3516 } 3517 3518 static int 3519 vxlan_check_ttl(int ttl) 3520 { 3521 3522 return (ttl > MAXTTL); 3523 } 3524 3525 static int 3526 vxlan_check_ftable_timeout(uint32_t timeout) 3527 { 3528 3529 return (timeout > VXLAN_FTABLE_MAX_TIMEOUT); 3530 } 3531 3532 static int 3533 vxlan_check_ftable_max(uint32_t max) 3534 { 3535 3536 return (max > VXLAN_FTABLE_MAX); 3537 } 3538 3539 static void 3540 vxlan_sysctl_setup(struct vxlan_softc *sc) 3541 { 3542 struct sysctl_ctx_list *ctx; 3543 struct sysctl_oid *node; 3544 struct vxlan_statistics *stats; 3545 char namebuf[8]; 3546 3547 ctx = &sc->vxl_sysctl_ctx; 3548 stats = &sc->vxl_stats; 3549 snprintf(namebuf, sizeof(namebuf), "%d", sc->vxl_unit); 3550 3551 sysctl_ctx_init(ctx); 3552 sc->vxl_sysctl_node = SYSCTL_ADD_NODE(ctx, 3553 SYSCTL_STATIC_CHILDREN(_net_link_vxlan), OID_AUTO, namebuf, 3554 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); 3555 3556 node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node), 3557 OID_AUTO, "ftable", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); 3558 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "count", 3559 CTLFLAG_RD, &sc->vxl_ftable_cnt, 0, 3560 "Number of entries in forwarding table"); 3561 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "max", 3562 CTLFLAG_RD, &sc->vxl_ftable_max, 0, 3563 "Maximum number of entries allowed in forwarding table"); 3564 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "timeout", 3565 CTLFLAG_RD, &sc->vxl_ftable_timeout, 0, 3566 "Number of seconds between prunes of the forwarding table"); 3567 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "dump", 3568 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_SKIP, 3569 sc, 0, vxlan_ftable_sysctl_dump, "A", 3570 "Dump the forwarding table entries"); 3571 3572 node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node), 3573 OID_AUTO, "stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); 3574 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, 3575 "ftable_nospace", CTLFLAG_RD, &stats->ftable_nospace, 0, 3576 "Fowarding table reached maximum entries"); 3577 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, 3578 "ftable_lock_upgrade_failed", CTLFLAG_RD, 3579 &stats->ftable_lock_upgrade_failed, 0, 3580 "Forwarding table update required lock upgrade"); 3581 3582 SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "txcsum", 3583 CTLFLAG_RD, &stats->txcsum, 3584 "# of times hardware assisted with tx checksum"); 3585 SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "tso", 3586 CTLFLAG_RD, &stats->tso, "# of times hardware assisted with TSO"); 3587 SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "rxcsum", 3588 CTLFLAG_RD, &stats->rxcsum, 3589 "# of times hardware assisted with rx checksum"); 3590 } 3591 3592 static void 3593 vxlan_sysctl_destroy(struct vxlan_softc *sc) 3594 { 3595 3596 sysctl_ctx_free(&sc->vxl_sysctl_ctx); 3597 sc->vxl_sysctl_node = NULL; 3598 } 3599 3600 static int 3601 vxlan_tunable_int(struct vxlan_softc *sc, const char *knob, int def) 3602 { 3603 char path[64]; 3604 3605 snprintf(path, sizeof(path), "net.link.vxlan.%d.%s", 3606 sc->vxl_unit, knob); 3607 TUNABLE_INT_FETCH(path, &def); 3608 3609 return (def); 3610 } 3611 3612 static void 3613 vxlan_ifdetach_event(void *arg __unused, struct ifnet *ifp) 3614 { 3615 struct vxlan_softc_head list = LIST_HEAD_INITIALIZER(list); 3616 struct vxlan_socket *vso; 3617 struct vxlan_softc *sc, *tsc; 3618 3619 if (ifp->if_flags & IFF_RENAMING) 3620 return; 3621 if ((ifp->if_flags & IFF_MULTICAST) == 0) 3622 return; 3623 3624 VXLAN_LIST_LOCK(); 3625 LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry) 3626 vxlan_socket_ifdetach(vso, ifp, &list); 3627 VXLAN_LIST_UNLOCK(); 3628 3629 LIST_FOREACH_SAFE(sc, &list, vxl_ifdetach_list, tsc) { 3630 LIST_REMOVE(sc, vxl_ifdetach_list); 3631 3632 sx_xlock(&vxlan_sx); 3633 VXLAN_WLOCK(sc); 3634 if (sc->vxl_flags & VXLAN_FLAG_INIT) 3635 vxlan_init_wait(sc); 3636 vxlan_teardown_locked(sc); 3637 sx_xunlock(&vxlan_sx); 3638 } 3639 } 3640 3641 static void 3642 vxlan_load(void) 3643 { 3644 3645 mtx_init(&vxlan_list_mtx, "vxlan list", NULL, MTX_DEF); 3646 vxlan_ifdetach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, 3647 vxlan_ifdetach_event, NULL, EVENTHANDLER_PRI_ANY); 3648 3649 struct if_clone_addreq req = { 3650 .create_f = vxlan_clone_create, 3651 .destroy_f = vxlan_clone_destroy, 3652 .flags = IFC_F_AUTOUNIT, 3653 }; 3654 vxlan_cloner = ifc_attach_cloner(vxlan_name, &req); 3655 } 3656 3657 static void 3658 vxlan_unload(void) 3659 { 3660 3661 EVENTHANDLER_DEREGISTER(ifnet_departure_event, 3662 vxlan_ifdetach_event_tag); 3663 ifc_detach_cloner(vxlan_cloner); 3664 mtx_destroy(&vxlan_list_mtx); 3665 MPASS(LIST_EMPTY(&vxlan_socket_list)); 3666 } 3667 3668 static int 3669 vxlan_modevent(module_t mod, int type, void *unused) 3670 { 3671 int error; 3672 3673 error = 0; 3674 3675 switch (type) { 3676 case MOD_LOAD: 3677 vxlan_load(); 3678 break; 3679 case MOD_UNLOAD: 3680 vxlan_unload(); 3681 break; 3682 default: 3683 error = ENOTSUP; 3684 break; 3685 } 3686 3687 return (error); 3688 } 3689 3690 static moduledata_t vxlan_mod = { 3691 "if_vxlan", 3692 vxlan_modevent, 3693 0 3694 }; 3695 3696 DECLARE_MODULE(if_vxlan, vxlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 3697 MODULE_VERSION(if_vxlan, 1); 3698